## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 10, fig.height = 5.5, warning = FALSE, message = FALSE ) ## ----load-packages------------------------------------------------------------ library(HHBayes) library(dplyr) library(rstan) library(ggpubr) ## ----setup-dates-------------------------------------------------------------- study_start <- "2024-07-01" study_end <- "2025-06-30" cat("Study duration:", as.integer(as.Date(study_end) - as.Date(study_start)) + 1, "days\n") ## ----setup-surveillance------------------------------------------------------- dates_weekly <- seq(from = as.Date(study_start), to = as.Date(study_end), by = "week") surveillance_data <- data.frame( date = dates_weekly, cases = 0.1 + 100 * exp(-0.0002 * (as.numeric(dates_weekly - mean(dates_weekly)))^2) + abs(rnorm(length(dates_weekly), mean = 0, sd = 10)) ) plot(surveillance_data$date, surveillance_data$cases, type = "l", lwd = 2, col = "steelblue", xlab = "Date", ylab = "Weekly cases", main = "Synthetic Surveillance Curve") ## ----setup-contact------------------------------------------------------------ role_mixing_weights <- matrix(c( # Target: Infant Toddler Adult Elderly 0.0, 0.5, 1.0, 0.5, # Source: Infant 0.5, 0.9, 0.7, 0.5, # Source: Toddler 1.0, 0.7, 0.6, 0.7, # Source: Adult 0.5, 0.5, 0.7, 0.0 # Source: Elderly ), nrow = 4, byrow = TRUE, dimnames = list( c("infant", "toddler", "adult", "elderly"), c("infant", "toddler", "adult", "elderly"))) role_mixing_weights ## ----setup-household---------------------------------------------------------- household_profile <- list( prob_adults = c(0, 0, 1), # P(0, 1, 2 adults) — always 2 parents prob_infant = 1.0, # P(infant present) — always 1 infant prob_siblings = c(0, 0.8, 0.2), # P(0, 1, 2 toddlers) — 80% one, 20% two prob_elderly = c(0.7, 0.1, 0.2) # P(0, 1, 2 elderly) — 70% none ) ## ----setup-household-examples, eval = FALSE----------------------------------- # # Nuclear Western family # nuclear <- list( # prob_adults = c(0.05, 0.15, 0.80), # prob_infant = 0.5, # prob_siblings = c(0.40, 0.45, 0.15), # prob_elderly = c(0.95, 0.04, 0.01) # ) # # # Multi-generational Asian family # multigenerational <- list( # prob_adults = c(0, 0, 1), # prob_infant = 1.0, # prob_siblings = c(0.05, 0.30, 0.65), # prob_elderly = c(0.20, 0.50, 0.30) # ) ## ----setup-intervention------------------------------------------------------- sim_config <- list( list( name = "vacc_status", efficacy = 0, # Set to 0 for baseline (no effect) effect_on = "both", coverage = list( infant = 0, toddler = 0, adult = 0, elderly = 0 ) ) ) ## ----setup-intervention-real, eval = FALSE------------------------------------ # # Real vaccination scenario # vacc_config <- list( # list( # name = "vacc_status", # efficacy = 0.5, # 50% reduction in susceptibility and infectivity # effect_on = "both", # coverage = list( # infant = 0.00, # Not eligible # toddler = 0.30, # adult = 0.80, # elderly = 0.90 # ) # ), # # You can add multiple interventions — they stack multiplicatively # list( # name = "masked", # efficacy = 0.3, # effect_on = "both", # coverage = list(infant = 0, toddler = 0.1, adult = 0.7, elderly = 0.6) # ) # ) ## ----simulate----------------------------------------------------------------- sim_res <- simulate_multiple_households_comm( n_households = 50, viral_testing = "viral load", model_type = "ODE", infectious_shape = 10, infectious_scale = 1, waning_shape = 6, waning_scale = 10, surveillance_interval = 4, start_date = study_start, end_date = study_end, surveillance_df = surveillance_data, covariates_config = sim_config, household_profile_list = household_profile, role_mixing_matrix = role_mixing_weights, seed = 123 ) ## ----inspect-hh--------------------------------------------------------------- head(sim_res$hh_df) ## ----inspect-hh-str----------------------------------------------------------- cat("Columns:", paste(names(sim_res$hh_df), collapse = ", "), "\n") cat("Total person-episodes:", nrow(sim_res$hh_df), "\n") cat("Unique people:", nrow(distinct(sim_res$hh_df, hh_id, person_id)), "\n") ## ----inspect-diag------------------------------------------------------------- head(sim_res$diagnostic_df) ## ----inspect-diag-str--------------------------------------------------------- cat("Columns:", paste(names(sim_res$diagnostic_df), collapse = ", "), "\n") cat("Total test records:", nrow(sim_res$diagnostic_df), "\n") cat("Positive tests:", sum(sim_res$diagnostic_df$test_result), "\n") ## ----attack-rates------------------------------------------------------------- rates <- summarize_attack_rates(sim_res) ## ----ar-overall--------------------------------------------------------------- print(rates$primary_overall) ## ----ar-by-role--------------------------------------------------------------- print(rates$primary_by_role) ## ----ar-reinf----------------------------------------------------------------- print(rates$reinf_overall) print(rates$reinf_by_role) ## ----epi-curve, fig.width=10, fig.height=6------------------------------------ my_plot <- plot_epidemic_curve( sim_res, surveillance_data, start_date_str = study_start, bin_width = 7 # Weekly bins ) print(my_plot) ## ----stan-join---------------------------------------------------------------- # Extract a 1-row-per-person covariate table person_covariates <- sim_res$hh_df %>% dplyr::select(hh_id, person_id, vacc_status) %>% dplyr::distinct() # Merge into diagnostic data df_for_stan <- sim_res$diagnostic_df %>% dplyr::left_join(person_covariates, by = c("hh_id", "person_id")) cat("Rows in df_for_stan:", nrow(df_for_stan), "\n") cat("Columns:", paste(names(df_for_stan), collapse = ", "), "\n") ## ----stan-priors-------------------------------------------------------------- my_priors <- list( beta1 = list(dist = "normal", params = c(-5, 1)), beta2 = list(dist = "normal", params = c(-5, 1)), alpha = list(dist = "normal", params = c(-4, 1)), covariates = list(dist = "normal", params = c(0, 2)), gen_shape = list(dist = "lognormal", params = c(1.5, 0.5)), gen_rate = list(dist = "lognormal", params = c(0.0, 0.5)), ct50 = list(dist = "normal", params = c(3, 1)), slope = list(dist = "lognormal", params = c(0.4, 0.5)) ) ## ----stan-vl-params----------------------------------------------------------- VL_params_list <- list( adult = list(v_p = 4.14, t_p = 5.09, lambda_g = 2.31, lambda_d = 2.71), infant = list(v_p = 5.84, t_p = 4.09, lambda_g = 2.82, lambda_d = 1.01), toddler = list(v_p = 5.84, t_p = 4.09, lambda_g = 2.82, lambda_d = 1.01), elderly = list(v_p = 2.95, t_p = 5.10, lambda_g = 3.15, lambda_d = 0.87) ) ## ----stan-prepare------------------------------------------------------------- stan_input <- prepare_stan_data( df_clean = df_for_stan, surveillance_df = surveillance_data, study_start_date = as.Date(study_start), study_end_date = as.Date(study_end), use_vl_data = 1, model_type = "ODE", ODE_params_list = NULL, covariates_susceptibility = NULL, # No covariates in this baseline covariates_infectivity = NULL, imputation_params = VL_params_list, priors = my_priors, role_mixing_matrix = role_mixing_weights ) cat("Stan data prepared successfully.\n") cat("Number of individuals (N):", stan_input$N, "\n") cat("Number of time steps (T):", stan_input$T, "\n") cat("Number of households (H):", stan_input$H, "\n") cat("Number of roles (R):", stan_input$R, "\n") ## ----fit-model, eval = FALSE-------------------------------------------------- # options(mc.cores = parallel::detectCores()) # # fit <- fit_household_model( # stan_input, # pars = c("log_phi_by_role_raw", # "log_kappa_by_role_raw", # "log_beta1", # "log_beta2", # "log_alpha_comm", # "g_curve_est", # "V_term_calc"), # include = FALSE, # Exclude these internal parameters from saved output # iter = 1000, # For testing; use 2000+ for real analysis # warmup = 500, # For testing; use 1000+ for real analysis # chains = 1 # For testing; use 4 for real analysis # ) ## ----fit-model-real, eval = FALSE--------------------------------------------- # fit <- fit_household_model( # stan_input, # pars = c("log_phi_by_role_raw", "log_kappa_by_role_raw", # "log_beta1", "log_beta2", "log_alpha_comm", # "g_curve_est", "V_term_calc"), # include = FALSE, # iter = 2000, # warmup = 1000, # chains = 4 # ) ## ----fit-summary, eval = FALSE------------------------------------------------ # print(fit, probs = c(0.025, 0.5, 0.975)) ## ----fit-diagnostics, eval = FALSE-------------------------------------------- # # Quick check # fit_summary <- rstan::summary(fit)$summary # cat("Max Rhat:", max(fit_summary[, "Rhat"], na.rm = TRUE), "\n") # cat("Min n_eff:", min(fit_summary[, "n_eff"], na.rm = TRUE), "\n") # # # Trace plots (requires bayesplot) # if (requireNamespace("bayesplot", quietly = TRUE)) { # bayesplot::mcmc_trace(fit, pars = c("beta1", "beta2", "alpha_comm")) # } ## ----plot-posteriors, eval = FALSE-------------------------------------------- # p_post <- plot_posterior_distributions(fit) # print(p_post) ## ----plot-covariates, eval = FALSE-------------------------------------------- # plot_covariate_effects(fit, stan_input) ## ----plot-chains, eval = FALSE------------------------------------------------ # chains <- reconstruct_transmission_chains( # fit, # stan_input, # min_prob_threshold = 0.001 # Keep links with >= 0.1% probability # ) # # head(chains) ## ----plot-timeline, eval = FALSE---------------------------------------------- # p_hh <- plot_household_timeline( # chains, # stan_input, # target_hh_id = 11 # Plot household #11 # ) # print(p_hh) ## ----plot-timeline-filtered, eval = FALSE------------------------------------- # p_hh_filtered <- plot_household_timeline( # chains, stan_input, # target_hh_id = 11, # prob_cutoff = 0.05, # plot_width = 11, # plot_height = 7 # ) # print(p_hh_filtered) ## ----full-script, eval = FALSE------------------------------------------------ # library(HHBayes) # library(dplyr) # library(rstan) # library(ggpubr) # # # ============================================================================== # # 1. SETUP # # ============================================================================== # # study_start <- "2024-07-01" # study_end <- "2025-06-30" # # # Surveillance data # dates_weekly <- seq(as.Date(study_start), as.Date(study_end), by = "week") # surveillance_data <- data.frame( # date = dates_weekly, # cases = 0.1 + 100 * exp(-0.0002 * (as.numeric(dates_weekly - # mean(dates_weekly)))^2) + abs(rnorm(length(dates_weekly), 0, 10)) # ) # # # Contact matrix # role_mixing_weights <- matrix(c( # 0.0, 0.5, 1.0, 0.5, # 0.5, 0.9, 0.7, 0.5, # 1.0, 0.7, 0.6, 0.7, # 0.5, 0.5, 0.7, 0.0 # ), nrow = 4, byrow = TRUE, # dimnames = list( # c("infant", "toddler", "adult", "elderly"), # c("infant", "toddler", "adult", "elderly"))) # # # Household profile # household_profile <- list( # prob_adults = c(0, 0, 1), # prob_infant = 1.0, # prob_siblings = c(0, 0.8, 0.2), # prob_elderly = c(0.7, 0.1, 0.2) # ) # # # Intervention (baseline: no effect) # sim_config <- list( # list(name = "vacc_status", efficacy = 0, effect_on = "both", # coverage = list(infant = 0, toddler = 0, adult = 0, elderly = 0)) # ) # # # ============================================================================== # # 2. SIMULATE # # ============================================================================== # # sim_res <- simulate_multiple_households_comm( # n_households = 50, viral_testing = "viral load", model_type = "ODE", # infectious_shape = 10, infectious_scale = 1, # waning_shape = 6, waning_scale = 10, # surveillance_interval = 4, # start_date = study_start, end_date = study_end, # surveillance_df = surveillance_data, # covariates_config = sim_config, # household_profile_list = household_profile, # role_mixing_matrix = role_mixing_weights, # seed = 123 # ) # # rates <- summarize_attack_rates(sim_res) # print(rates$primary_by_role) # # plot_epidemic_curve(sim_res, surveillance_data, # start_date_str = study_start, bin_width = 7) # # # ============================================================================== # # 3. PREPARE FOR STAN # # ============================================================================== # # person_covariates <- sim_res$hh_df %>% # select(hh_id, person_id, vacc_status) %>% distinct() # # df_for_stan <- sim_res$diagnostic_df %>% # left_join(person_covariates, by = c("hh_id", "person_id")) # # my_priors <- list( # beta1 = list(dist = "normal", params = c(-5, 1)), # beta2 = list(dist = "normal", params = c(-5, 1)), # alpha = list(dist = "normal", params = c(-4, 1)), # covariates = list(dist = "normal", params = c(0, 2)), # gen_shape = list(dist = "lognormal", params = c(1.5, 0.5)), # gen_rate = list(dist = "lognormal", params = c(0.0, 0.5)), # ct50 = list(dist = "normal", params = c(3, 1)), # slope = list(dist = "lognormal", params = c(0.4, 0.5)) # ) # # VL_params_list <- list( # adult = list(v_p = 4.14, t_p = 5.09, lambda_g = 2.31, lambda_d = 2.71), # infant = list(v_p = 5.84, t_p = 4.09, lambda_g = 2.82, lambda_d = 1.01), # toddler = list(v_p = 5.84, t_p = 4.09, lambda_g = 2.82, lambda_d = 1.01), # elderly = list(v_p = 2.95, t_p = 5.10, lambda_g = 3.15, lambda_d = 0.87) # ) # # stan_input <- prepare_stan_data( # df_clean = df_for_stan, surveillance_df = surveillance_data, # study_start_date = as.Date(study_start), # study_end_date = as.Date(study_end), # use_vl_data = 1, model_type = "ODE", # imputation_params = VL_params_list, priors = my_priors, # role_mixing_matrix = role_mixing_weights # ) # # # ============================================================================== # # 4. FIT MODEL # # ============================================================================== # # options(mc.cores = parallel::detectCores()) # # fit <- fit_household_model( # stan_input, # pars = c("log_phi_by_role_raw", "log_kappa_by_role_raw", # "log_beta1", "log_beta2", "log_alpha_comm", # "g_curve_est", "V_term_calc"), # include = FALSE, # iter = 1000, warmup = 500, chains = 1 # ) # # # ============================================================================== # # 5. RESULTS # # ============================================================================== # # print(fit, probs = c(0.025, 0.5, 0.975)) # plot_posterior_distributions(fit) # # chains <- reconstruct_transmission_chains(fit, stan_input, # min_prob_threshold = 0.001) # plot_household_timeline(chains, stan_input, target_hh_id = 11) ## ----session-info------------------------------------------------------------- sessionInfo()