## ----setup2, message = FALSE, warning = FALSE, results = 'hide'--------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) library(baselinenowcast) library(ggplot2) library(dplyr) library(tidyr) ## ----------------------------------------------------------------------------- nowcast_date <- "2021-08-01" eval_date <- "2021-10-01" target_data <- filter( germany_covid19_hosp, location == "DE", age_group == "00+", report_date <= eval_date, reference_date <= nowcast_date ) ## ----------------------------------------------------------------------------- latest_data <- target_data |> group_by(reference_date) |> summarise(final_count = sum(count)) ## ----------------------------------------------------------------------------- observed_data <- filter( target_data, report_date <= nowcast_date ) head(observed_data) ## ----------------------------------------------------------------------------- initial_reports <- observed_data |> group_by(reference_date) |> summarise(initial_count = sum(count)) ## ----------------------------------------------------------------------------- max_delay <- 30 ## ----------------------------------------------------------------------------- rep_tri_full <- as_reporting_triangle(observed_data) ## ----------------------------------------------------------------------------- rep_tri_full ## ----------------------------------------------------------------------------- rep_tri <- truncate_to_delay(rep_tri_full, max_delay = max_delay) rep_tri ## ----------------------------------------------------------------------------- scale_factor <- 3 prop_delay <- 0.5 tv <- allocate_reference_times( reporting_triangle = rep_tri, scale_factor = scale_factor, prop_delay = prop_delay ) n_history_delay <- tv$n_history_delay n_retrospective_nowcasts <- tv$n_retrospective_nowcasts ## ----------------------------------------------------------------------------- delay_pmf <- estimate_delay( reporting_triangle = rep_tri, n = n_history_delay ) ## ----------------------------------------------------------------------------- delay_df <- data.frame( delay = 0:(length(delay_pmf) - 1), pmf = delay_pmf ) delay_cdf_plot <- ggplot(delay_df) + geom_line(aes(x = delay, y = cumsum(pmf))) + xlab("Delay") + ylab("Cumulative proportion reported") + ggtitle("Empirical point estimate of cumulative proportion reported by delay") + # nolint theme_bw() delay_pmf_plot <- ggplot(delay_df) + geom_line(aes(x = delay, y = pmf)) + xlab("Delay") + ylab("Proportion reported") + ggtitle("Empirical point estimate of proportion reported by delay") + theme_bw() ## ----------------------------------------------------------------------------- delay_cdf_plot delay_pmf_plot ## ----------------------------------------------------------------------------- point_nowcast_matrix <- apply_delay( reporting_triangle = rep_tri, delay_pmf = delay_pmf ) ## ----------------------------------------------------------------------------- initial_reports_labeled <- initial_reports |> mutate(type = "Initial real-time") |> rename(count = initial_count) point_nowcast_df <- latest_data |> rename(count = final_count) |> mutate(nowcast = rowSums(point_nowcast_matrix)) |> pivot_longer( cols = c(count, nowcast), names_to = "type", values_to = "count" ) |> mutate(type = case_when( type == "count" ~ "Final observed data", type == "nowcast" ~ "Point nowcast", TRUE ~ type )) |> bind_rows( initial_reports_labeled ) # Create plot with data type as a variable plot_pt_nowcast <- ggplot(point_nowcast_df, aes( x = reference_date, y = count, color = type )) + geom_line() + scale_color_manual(values = c( "Initial reports" = "darkred", "Final observed data" = "black", "Point nowcast" = "darkblue" )) + theme_bw() + xlab("Reference date") + ylab("Confirmed admissions") + scale_y_continuous(trans = "log10") + ggtitle("Comparing initial reports, nowcasted, and later observed cases") + theme(legend.position = "bottom") + labs(color = "Type") ## ----------------------------------------------------------------------------- plot_pt_nowcast ## ----------------------------------------------------------------------------- trunc_rep_tri_list <- truncate_to_rows( rep_tri, n = n_retrospective_nowcasts ) ## ----------------------------------------------------------------------------- retro_rep_tri_list <- apply_reporting_structures(trunc_rep_tri_list) ## ----------------------------------------------------------------------------- retro_pt_nowcast_mat_list <- estimate_and_apply_delays( retro_reporting_triangles = retro_rep_tri_list, n = n_history_delay ) ## ----------------------------------------------------------------------------- disp_params <- estimate_uncertainty( point_nowcast_matrices = retro_pt_nowcast_mat_list, truncated_reporting_triangles = trunc_rep_tri_list, retro_reporting_triangles = retro_rep_tri_list, n = n_retrospective_nowcasts ) ## ----------------------------------------------------------------------------- nowcast_draws_df <- sample_nowcasts( point_nowcast_matrix, rep_tri, uncertainty_params = disp_params, draws = 100 ) head(nowcast_draws_df) ## ----------------------------------------------------------------------------- obs_with_nowcast_draws_df <- nowcast_draws_df |> left_join(latest_data, by = "reference_date") |> left_join(initial_reports, by = "reference_date") head(obs_with_nowcast_draws_df) ## ----------------------------------------------------------------------------- combined_data <- obs_with_nowcast_draws_df |> select(reference_date, initial_count, final_count) |> distinct() |> pivot_longer( cols = c(initial_count, final_count), names_to = "type", values_to = "count" ) |> mutate(type = case_when( type == "initial_count" ~ "Initial reports", type == "final_count" ~ "Final observed data" )) ## ----------------------------------------------------------------------------- combined_data <- obs_with_nowcast_draws_df |> select(reference_date, initial_count, final_count) |> distinct() |> pivot_longer( cols = c(initial_count, final_count), names_to = "type", values_to = "count" ) |> mutate(type = case_when( type == "initial_count" ~ "Initial reports", type == "final_count" ~ "Final observed data" )) # Plot with draws for nowcast only plot_prob_nowcast <- ggplot() + # Add nowcast draws as thin gray lines geom_line( data = obs_with_nowcast_draws_df, aes( x = reference_date, y = pred_count, group = draw, color = "Nowcast draw", linewidth = "Nowcast draw" ) ) + # Add observed data and final data once geom_line( data = combined_data, aes( x = reference_date, y = count, color = type, linewidth = type ) ) + theme_bw() + scale_color_manual( values = c( "Nowcast draw" = "gray", "Initial reports" = "darkred", "Final observed data" = "black" ), name = "" ) + scale_linewidth_manual( values = c( "Nowcast draw" = 0.2, "Initial reports" = 1, "Final observed data" = 1 ), name = "" ) + scale_y_continuous(trans = "log10") + xlab("Reference date") + ylab("Hospital admissions") + theme(legend.position = "bottom") + ggtitle("Comparison of admissions as of the nowcast date, later observed counts, \n and probabilistic nowcasted counts") # nolint ## ----------------------------------------------------------------------------- plot_prob_nowcast