## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5 ) ## ----setup-------------------------------------------------------------------- library(bayesDiagnostics) library(brms) library(ggplot2) ## ----mcmc-diagnostics, eval=FALSE--------------------------------------------- # # Fit a simple model # data <- data.frame( # y = rnorm(100, mean = 5, sd = 2), # x1 = rnorm(100), # x2 = rnorm(100) # ) # # fit <- brm(y ~ x1 + x2, data = data, chains = 4, iter = 2000, refresh = 0) # # # Run comprehensive diagnostics # diag <- mcmc_diagnostics_summary(fit, rhat_threshold = 1.01, ess_threshold = 400) # print(diag) # # # Check results # diag$converged # TRUE/FALSE # diag$summary_table # Full diagnostic table # diag$divergences # Number of divergent transitions ## ----ess-diagnostics, eval=FALSE---------------------------------------------- # # Detailed ESS analysis # ess_diag <- effective_sample_size_diagnostics( # model = fit, # parameters = c("b_x1", "b_x2", "sigma"), # min_ess = 400, # by_chain = TRUE, # plot = TRUE # ) # # print(ess_diag) # plot(ess_diag) # # # Access specific components # ess_diag$ess_summary # Summary statistics # ess_diag$bulk_ess # Bulk ESS per parameter # ess_diag$tail_ess # Tail ESS per parameter # ess_diag$by_chain_ess # Per-chain ESS breakdown # ess_diag$problematic_params # Parameters with low ESS # ess_diag$recommendations # Actionable suggestions ## ----hierarchical-convergence, eval=FALSE------------------------------------- # # Fit hierarchical model # data_hier <- data.frame( # y = rnorm(200), # x = rnorm(200), # group = rep(1:10, each = 20) # ) # # fit_hier <- brm( # y ~ x + (1 + x | group), # data = data_hier, # chains = 4, # refresh = 0 # ) # # # Check hierarchical convergence # hier_conv <- hierarchical_convergence( # model = fit_hier, # group_vars = "group", # plot = TRUE # ) # # print(hier_conv) # plot(hier_conv) ## ----ppc-basic, eval=FALSE---------------------------------------------------- # # Basic PPC with multiple test statistics # ppc_result <- posterior_predictive_check( # model = fit, # observed_data = data$y, # n_samples = 1000, # test_statistics = c("mean", "sd", "median", "min", "max", "skewness", "kurtosis"), # plot = TRUE # ) # # print(ppc_result) # plot(ppc_result) # # # Access results # ppc_result$observed_stats # Observed test statistics # ppc_result$replicated_stats # Posterior predictive statistics # ppc_result$p_values # Bayesian p-values (should be ~0.5) ## ----automated-ppc, eval=FALSE------------------------------------------------ # # Automated checks with flagging # auto_ppc <- automated_ppc( # model = fit, # observed_data = data$y, # n_samples = 1000, # p_value_threshold = 0.05 # ) # # print(auto_ppc) # # # Check for issues # auto_ppc$diagnostics # Table with all statistics and flags # auto_ppc$flags # Text warnings for problematic statistics ## ----graphical-ppc, eval=FALSE------------------------------------------------ # # Density overlay # p1 <- graphical_ppc(fit, data$y, type = "density", n_draws = 50) # print(p1) # # # Prediction intervals # p2 <- graphical_ppc(fit, data$y, type = "intervals", n_draws = 50) # print(p2) # # # Ribbon plot (useful for ordered/time-series data) # p3 <- graphical_ppc(fit, data$y, type = "ribbon", n_draws = 50) # print(p3) ## ----ppc-loo, eval=FALSE------------------------------------------------------ # # LOO-PIT check # loo_ppc <- ppc_crossvalidation( # model = fit, # observed_y = data$y, # n_draws = NULL # Use all draws for accuracy # ) # # print(loo_ppc) # plot(loo_ppc) # # # Inspect results # loo_ppc$pit_values # Should be ~Uniform(0,1) if well-calibrated # loo_ppc$loo_result # LOO object # loo_ppc$weighted # Whether weighted PIT was used ## ----custom-pvalues, eval=FALSE----------------------------------------------- # # Generate posterior predictive samples # yrep <- posterior_predict(fit, ndraws = 1000) # # # Define custom test statistic # custom_stat <- function(x) max(x) - min(x) # Range # # # Calculate Bayesian p-value # result <- bayesian_p_values( # yrep = yrep, # y = data$y, # statistic = custom_stat # ) # # result$observed_value # Observed range # result$replicated_values # Posterior predictive ranges # result$p_value # P(replicated ≥ observed) ## ----prior-elicitation, eval=FALSE-------------------------------------------- # # Define expert beliefs # expert_input <- list( # parameter_name = "treatment_effect", # plausible_range = c(-0.5, 1.5), # Plausible values # most_likely_value = 0.3, # Best guess # confidence = 0.8 # How confident (0-1) # ) # # # Get prior recommendations # prior_rec <- prior_elicitation_helper( # expert_beliefs = expert_input, # parameter_type = "continuous", # method = "quantile", # data_sample = rnorm(100, 0.3, 0.5), # Optional: existing data # visualize = TRUE # ) # # print(prior_rec) # # # Use recommended prior # prior_rec$recommended_prior # brms::prior object # prior_rec$alternatives # Alternative priors for sensitivity # prior_rec$sensitivity_note # Guidance ## ----prior-sensitivity, eval=FALSE-------------------------------------------- # # Define alternative priors # prior_grid <- list( # weak = set_prior("normal(0, 10)", class = "b"), # moderate = set_prior("normal(0, 2)", class = "b"), # strong = set_prior("normal(0, 0.5)", class = "b") # ) # # # Run sensitivity analysis # sens_result <- prior_sensitivity( # model = fit, # parameters = c("b_x1", "b_x2"), # prior_grid = prior_grid, # comparison_metric = "KL", # or "Wasserstein", "overlap" # plot = TRUE, # n_draws = 2000 # ) # # print(sens_result) # plot(sens_result) # # # Check sensitivity # sens_result$sensitivity_metrics # How much posteriors differ ## ----prior-robustness, eval=FALSE--------------------------------------------- # robust_result <- prior_robustness( # model = fit, # prior_specifications = prior_grid, # parameters = c("b_x1", "b_x2"), # perturbation_direction = "expand", # or "contract", "shift" # dimensions = c(0.5, 1, 2, 4), # Scaling factors # comparison_metric = "KL", # plot = TRUE # ) # # print(robust_result) # # # Check robustness # robust_result$robustness_index # Composite score (higher = more robust) # robust_result$concerning_parameters # Parameters with low robustness # robust_result$recommendations # What to do ## ----model-comparison, eval=FALSE--------------------------------------------- # # Fit competing models # fit1 <- brm(y ~ x1, data = data, refresh = 0) # fit2 <- brm(y ~ x1 + x2, data = data, refresh = 0) # fit3 <- brm(y ~ x1 * x2, data = data, refresh = 0) # # # Compare models # comparison <- model_comparison_suite( # Model_1 = fit1, # Model_2 = fit2, # Model_3 = fit3, # criterion = "all", # LOO, WAIC, Bayes R² # plot = TRUE, # detailed = TRUE # ) # # print(comparison) # plot(comparison) # # # Results # comparison$comparison_table # All metrics # comparison$ic_differences # ΔLOO, ΔWAIC, model weights # comparison$plots # Visualization ## ----bayes-factor, eval=FALSE------------------------------------------------- # # Compare two models # bf_result <- bayes_factor_comparison( # Model_A = fit1, # Model_B = fit2, # method = "bridge_sampling", # or "waic" # repetitions = 5, # silent = TRUE # ) # # print(bf_result) # # # Interpretation # bf_result$bayes_factor # BF_{1,2} # bf_result$log_bf # Log Bayes Factor # bf_result$interpretation # Text interpretation # # # For 3+ models: pairwise comparisons # bf_multi <- bayes_factor_comparison(fit1, fit2, fit3, method = "bridge_sampling") # bf_multi$pairwise_comparisons ## ----predictive-performance, eval=FALSE--------------------------------------- # # In-sample performance # perf_in <- predictive_performance( # model = fit, # newdata = NULL, # NULL = use training data # observed_y = data$y, # metrics = "all", # RMSE, MAE, Coverage, CRPS # credible_level = 0.95, # n_draws = NULL # ) # # print(perf_in) # plot(perf_in) # # # Out-of-sample performance # test_data <- data.frame(x1 = rnorm(50), x2 = rnorm(50), y = rnorm(50)) # perf_out <- predictive_performance( # model = fit, # newdata = test_data, # observed_y = test_data$y, # metrics = "all" # ) # # # Compare metrics # perf_in$point_metrics # RMSE, MAE, Correlation # perf_in$interval_metrics # Coverage, Interval Width # perf_in$proper_scores # CRPS (lower is better) # perf_in$prediction_summary # Per-observation predictions ## ----extract-posterior, eval=FALSE-------------------------------------------- # # Extract as data frame # draws_df <- extract_posterior_unified( # model = fit, # parameters = c("b_x1", "b_x2", "sigma"), # format = "draws_df", # ndraws = 1000, # include_warmup = FALSE, # chains = NULL # All chains # ) # # # Extract as matrix # draws_matrix <- extract_posterior_unified(fit, format = "draws_matrix") # # # Extract as array (iterations × chains × parameters) # draws_array <- extract_posterior_unified(fit, format = "draws_array") # # # Extract as named list # draws_list <- extract_posterior_unified(fit, format = "list") ## ----diagnostic-report, eval=FALSE-------------------------------------------- # # Generate comprehensive report # diagnostic_report( # model = fit, # output_file = "bayesian_model_diagnostics.pdf", # output_format = "pdf", # or "html", "docx" # include_sections = c( # "model_summary", # "convergence", # "posterior_summary", # "recommendations" # ), # rhat_threshold = 1.01, # ess_threshold = 0.1, # open_report = TRUE # Open automatically # ) ## ----complete-workflow, eval=FALSE-------------------------------------------- # # ===== STEP 1: FIT MODEL ===== # library(bayesDiagnostics) # library(brms) # # # Simulate data # set.seed(123) # N <- 200 # data <- data.frame( # y = rnorm(N, mean = 3 + 2*rnorm(N) - 0.5*rnorm(N), sd = 1.5), # x1 = rnorm(N), # x2 = rnorm(N) # ) # # # Fit Bayesian model # fit <- brm( # y ~ x1 + x2, # data = data, # prior = c( # prior(normal(0, 5), class = "b"), # prior(student_t(3, 0, 2.5), class = "sigma") # ), # chains = 4, # iter = 2000, # warmup = 1000, # refresh = 0 # ) # # # ===== STEP 2: CONVERGENCE DIAGNOSTICS ===== # # Quick check # diag <- mcmc_diagnostics_summary(fit) # print(diag) # # # Detailed ESS analysis # ess_diag <- effective_sample_size_diagnostics(fit, plot = TRUE) # plot(ess_diag) # # # ===== STEP 3: POSTERIOR PREDICTIVE CHECKS ===== # # Comprehensive PPC # ppc <- posterior_predictive_check(fit, observed_data = data$y, plot = TRUE) # print(ppc) # # # Automated screening # auto_ppc <- automated_ppc(fit, data$y) # print(auto_ppc) # # # LOO cross-validation # loo_ppc <- ppc_crossvalidation(fit, data$y) # plot(loo_ppc) # # # ===== STEP 4: PRIOR SENSITIVITY ===== # # Define alternative priors # prior_grid <- list( # weak = set_prior("normal(0, 10)", class = "b"), # moderate = set_prior("normal(0, 5)", class = "b"), # strong = set_prior("normal(0, 1)", class = "b") # ) # # # Test sensitivity # sens <- prior_sensitivity( # fit, # parameters = c("b_x1", "b_x2"), # prior_grid = prior_grid, # plot = TRUE # ) # print(sens) # # # ===== STEP 5: MODEL COMPARISON ===== # # Fit alternative models # fit_x1 <- brm(y ~ x1, data = data, refresh = 0) # fit_x1x2 <- fit # fit_interaction <- brm(y ~ x1 * x2, data = data, refresh = 0) # # # Compare # comp <- model_comparison_suite( # Linear = fit_x1, # Additive = fit_x1x2, # Interaction = fit_interaction, # criterion = "all", # plot = TRUE # ) # print(comp) # # # ===== STEP 6: PREDICTIVE PERFORMANCE ===== # # Hold-out validation # test_idx <- sample(1:N, 40) # test_data <- data[test_idx, ] # train_data <- data[-test_idx, ] # # fit_train <- update(fit, newdata = train_data, refresh = 0) # # perf <- predictive_performance( # fit_train, # newdata = test_data, # observed_y = test_data$y, # metrics = "all" # ) # print(perf) # plot(perf) # # # ===== STEP 7: GENERATE REPORT ===== # diagnostic_report( # fit, # output_file = "full_diagnostics.html", # output_format = "html", # include_sections = c("model_summary", "convergence", # "posterior_summary", "recommendations") # ) ## ----help, eval=FALSE--------------------------------------------------------- # # Function documentation # ?mcmc_diagnostics_summary # ?posterior_predictive_check # ?prior_sensitivity # # # Package vignettes # browseVignettes("bayesDiagnostics") # # # Report issues # # https://github.com/yourusername/bayesDiagnostics/issues