## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, echo = TRUE, comment = "#>") # Set to TRUE to regenerate long-running simulation results run_long <- FALSE library(maskedcauses) old_opts <- options(digits = 4) ## ----observe-demo------------------------------------------------------------- # 1. Right-censoring: continuous monitoring, study ends at tau obs_right <- observe_right_censor(tau = 10) obs_right(7) # failure before tau -> exact obs_right(15) # survival past tau -> right-censored # 2. Left-censoring: single inspection at tau obs_left <- observe_left_censor(tau = 10) obs_left(7) # failed before inspection -> left-censored obs_left(15) # surviving at inspection -> right-censored # 3. Periodic inspection: inspections every delta, study ends at tau obs_periodic <- observe_periodic(delta = 2, tau = 10) obs_periodic(5.3) # failure in (4, 6) -> interval-censored obs_periodic(15) # survival past tau -> right-censored # 4. Mixture: compose arbitrary monitoring schemes obs_mixed <- observe_mixture( observe_right_censor(tau = 10), observe_left_censor(tau = 5), observe_periodic(delta = 2, tau = 10), weights = c(0.5, 0.2, 0.3) ) ## ----observe-rdata------------------------------------------------------------ model <- exp_series_md_c1_c2_c3() gen <- rdata(model) theta <- c(1, 1.1, 0.95) set.seed(42) df <- gen(theta, n = 200, p = 0.3, observe = observe_periodic(delta = 0.5, tau = 5)) cat("Observation type distribution:\n") print(table(df$omega)) ## ----hand-verify-data--------------------------------------------------------- # Construct one observation of each type df_mixed <- data.frame( t = c(3.0, 8.0, 5.0, 2.0), t_upper = c(NA, NA, NA, 6.0), omega = c("exact", "right", "left", "interval"), x1 = c(TRUE, FALSE, TRUE, TRUE), x2 = c(TRUE, FALSE, FALSE, TRUE), x3 = c(FALSE, FALSE, TRUE, FALSE), stringsAsFactors = FALSE ) rates <- c(0.5, 0.3, 0.2) lambda_sys <- sum(rates) ## ----hand-verify-loglik------------------------------------------------------- exp_model <- exp_series_md_c1_c2_c3() ll_fn <- loglik(exp_model) scr_fn <- score(exp_model) # Total log-likelihood ll_val <- ll_fn(df_mixed, rates) cat("Log-likelihood:", round(ll_val, 6), "\n") # Expected from hand calculation ll_exact <- log(0.8) - lambda_sys * 3 ll_right <- -lambda_sys * 8 ll_left <- log(0.7) + log(-expm1(-lambda_sys * 5)) - log(lambda_sys) ll_interval <- log(0.8) - lambda_sys * 2 + log(-expm1(-lambda_sys * 4)) - log(lambda_sys) ll_expected <- ll_exact + ll_right + ll_left + ll_interval cat("Expected: ", round(ll_expected, 6), "\n") cat("Match:", all.equal(ll_val, ll_expected, tolerance = 1e-10), "\n") ## ----hand-verify-score-------------------------------------------------------- scr_analytical <- scr_fn(df_mixed, rates) scr_numerical <- numDeriv::grad( func = function(th) ll_fn(df_mixed, th), x = rates ) score_df <- data.frame( Component = paste0("lambda_", 1:3), Analytical = round(scr_analytical, 6), Numerical = round(scr_numerical, 6), Abs_Diff = formatC(abs(scr_analytical - scr_numerical), format = "e", digits = 2) ) knitr::kable(score_df, caption = "Score verification: analytical vs numerical") ## ----load-precomputed, include=FALSE, eval=!run_long-------------------------- list2env(readRDS("precomputed_censoring_comparison.rds"), envir = environment()) ## ----sim-setup, eval=run_long, cache=TRUE------------------------------------- # set.seed(7231) # theta <- c(1.0, 1.1, 0.95) # m <- length(theta) # n <- 500 # B <- 200 # alpha <- 0.05 # # exp_model <- exp_series_md_c1_c2_c3() # gen <- rdata(exp_model) # solver <- fit(exp_model) # theta0 <- rep(1, m) # # # tau for ~25% right-censoring: solve S(tau) = 0.25 # lambda_sys <- sum(theta) # tau_25 <- -log(0.25) / lambda_sys # # # tau for left-censoring: F(tau) ~ 0.75 -> same tau # # For left_censor(tau), Pr(left) = F(tau), Pr(right) = S(tau) # # configs <- list( # A = list( # name = "100% exact", # observe = observe_right_censor(tau = Inf) # ), # B = list( # name = "75% exact + 25% right", # observe = observe_right_censor(tau = tau_25) # ), # C = list( # name = "~75% left + ~25% right", # observe = observe_left_censor(tau = tau_25) # ), # D = list( # name = "mix: exact + left + interval", # observe = observe_mixture( # observe_right_censor(tau = Inf), # observe_left_censor(tau = tau_25), # observe_periodic(delta = 0.3, tau = tau_25), # weights = c(0.75, 0.125, 0.125) # ) # ), # E = list( # name = "mix: all four types", # observe = observe_mixture( # observe_right_censor(tau = tau_25), # observe_left_censor(tau = tau_25), # observe_periodic(delta = 0.3, tau = tau_25), # weights = c(0.70, 0.15, 0.15) # ) # ) # ) ## ----sim-run, eval=run_long, cache=TRUE, warning=FALSE, message=FALSE--------- # sim_results <- list() # # for (cfg_name in names(configs)) { # cfg <- configs[[cfg_name]] # estimates <- matrix(NA, nrow = B, ncol = m) # se_estimates <- matrix(NA, nrow = B, ncol = m) # ci_lower <- matrix(NA, nrow = B, ncol = m) # ci_upper <- matrix(NA, nrow = B, ncol = m) # converged <- logical(B) # omega_counts <- list() # # for (b in 1:B) { # df_b <- gen(theta, n = n, p = 0.3, observe = cfg$observe) # # if (b == 1) { # omega_counts[[cfg_name]] <- table(df_b$omega) # } # # tryCatch({ # result_b <- solver(df_b, par = theta0, method = "Nelder-Mead") # estimates[b, ] <- result_b$par # se_estimates[b, ] <- sqrt(diag(result_b$vcov)) # z <- qnorm(1 - alpha / 2) # ci_lower[b, ] <- result_b$par - z * se_estimates[b, ] # ci_upper[b, ] <- result_b$par + z * se_estimates[b, ] # converged[b] <- result_b$converged # }, error = function(e) { # converged[b] <<- FALSE # }) # } # # valid <- converged & !is.na(estimates[, 1]) # est_valid <- estimates[valid, , drop = FALSE] # # bias <- colMeans(est_valid) - theta # variance <- apply(est_valid, 2, var) # mse <- bias^2 + variance # # coverage <- numeric(m) # for (j in 1:m) { # valid_j <- valid & !is.na(ci_lower[, j]) # covered <- ci_lower[valid_j, j] <= theta[j] & # theta[j] <= ci_upper[valid_j, j] # coverage[j] <- mean(covered) # } # # sim_results[[cfg_name]] <- list( # name = cfg$name, # bias = bias, # variance = variance, # mse = mse, # coverage = coverage, # mean_mse = mean(mse), # mean_coverage = mean(coverage), # convergence_rate = mean(converged), # omega_sample = if (length(omega_counts) > 0) omega_counts[[cfg_name]] else NULL # ) # } ## ----sim-save, eval=run_long, include=FALSE----------------------------------- # saveRDS(list( # sim_results = sim_results, # configs = configs, # theta = theta, # m = m, # n = n, # B = B, # alpha = alpha, # cross_model_results = if (exists("cross_model_results")) cross_model_results else NULL # ), "precomputed_censoring_comparison.rds") ## ----sim-table---------------------------------------------------------------- summary_df <- data.frame( Config = names(sim_results), Description = sapply(sim_results, function(x) x$name), Mean_Bias = sapply(sim_results, function(x) mean(abs(x$bias))), Mean_MSE = sapply(sim_results, function(x) x$mean_mse), Mean_RMSE = sapply(sim_results, function(x) sqrt(x$mean_mse)), Mean_Coverage = sapply(sim_results, function(x) x$mean_coverage), Conv_Rate = sapply(sim_results, function(x) x$convergence_rate), stringsAsFactors = FALSE, row.names = NULL ) knitr::kable(summary_df, digits = 4, caption = "Estimation quality by monitoring configuration", col.names = c("Config", "Description", "Mean |Bias|", "Mean MSE", "Mean RMSE", "Coverage", "Conv. Rate")) ## ----sim-plot, fig.width=8, fig.height=5-------------------------------------- cfg_labels <- sapply(sim_results, function(x) x$name) mse_vals <- sapply(sim_results, function(x) x$mean_mse) cov_vals <- sapply(sim_results, function(x) x$mean_coverage) oldpar <- par(mfrow = c(1, 2), mar = c(7, 4, 3, 1)) on.exit(par(oldpar)) # MSE comparison bp <- barplot(mse_vals, names.arg = names(sim_results), col = c("steelblue", "coral", "goldenrod", "mediumpurple", "seagreen"), main = "Mean MSE by Configuration", ylab = "Mean MSE", las = 1) text(bp, mse_vals + max(mse_vals) * 0.03, labels = round(mse_vals, 4), cex = 0.8) # Coverage comparison barplot(cov_vals, names.arg = names(sim_results), col = c("steelblue", "coral", "goldenrod", "mediumpurple", "seagreen"), main = "Mean Coverage by Configuration", ylab = "Coverage Probability", las = 1, ylim = c(0.85, 1.0)) abline(h = 1 - alpha, lty = 2, col = "red", lwd = 2) text(x = bp, y = 0.86, labels = round(cov_vals, 3), cex = 0.8) legend("bottomright", legend = "Nominal 95%", lty = 2, col = "red", lwd = 2, cex = 0.8) ## ----sim-component-table------------------------------------------------------ comp_rows <- list() for (cfg_name in names(sim_results)) { res <- sim_results[[cfg_name]] for (j in 1:m) { comp_rows[[length(comp_rows) + 1]] <- data.frame( Config = cfg_name, Component = j, True = theta[j], Bias = res$bias[j], MSE = res$mse[j], Coverage = res$coverage[j], stringsAsFactors = FALSE ) } } comp_df <- do.call(rbind, comp_rows) knitr::kable(comp_df, digits = 4, row.names = FALSE, caption = "Per-component estimation metrics by configuration", col.names = c("Config", "Comp.", "True", "Bias", "MSE", "Coverage")) ## ----cross-model-demo--------------------------------------------------------- # Generate mixed-censoring data from exponential DGP set.seed(42) theta_exp <- c(1.0, 1.1, 0.95) exp_model <- exp_series_md_c1_c2_c3() gen_exp <- rdata(exp_model) df_cross <- gen_exp(theta_exp, n = 300, p = 0.3, observe = observe_mixture( observe_right_censor(tau = 5), observe_left_censor(tau = 3), observe_periodic(delta = 0.5, tau = 5), weights = c(0.5, 0.2, 0.3) )) cat("Observation types:\n") print(table(df_cross$omega)) ## ----cross-model-loglik------------------------------------------------------- # Exponential loglik at true parameters ll_exp_fn <- loglik(exp_model) ll_exp_val <- ll_exp_fn(df_cross, theta_exp) # Homogeneous Weibull with k=1, scales = 1/rates hom_model <- wei_series_homogeneous_md_c1_c2_c3() ll_hom_fn <- loglik(hom_model) scales_from_rates <- 1 / theta_exp ll_hom_val <- ll_hom_fn(df_cross, c(1, scales_from_rates)) cat("Exponential log-likelihood: ", round(ll_exp_val, 4), "\n") cat("Homogeneous Weibull (k=1) log-likelihood:", round(ll_hom_val, 4), "\n") cat("Difference: ", formatC(ll_exp_val - ll_hom_val, format = "e", digits = 2), "\n") ## ----cross-model-fit, warning=FALSE------------------------------------------- # Fit both models solver_exp <- fit(exp_model) solver_hom <- fit(hom_model) mle_exp <- solver_exp(df_cross, par = rep(1, 3), method = "Nelder-Mead") mle_hom <- solver_hom(df_cross, par = c(1, rep(1, 3)), method = "Nelder-Mead") cat("Exponential MLE (rates):", round(mle_exp$par, 4), "\n") cat("Homogeneous Weibull MLE (k, scales):", round(mle_hom$par, 4), "\n") cat("\nExponential loglik at MLE:", round(mle_exp$loglik, 4), "\n") cat("Weibull loglik at MLE: ", round(mle_hom$loglik, 4), "\n") ## ----cross-model-shape-------------------------------------------------------- cat("Estimated shape k:", round(mle_hom$par[1], 4), "\n") cat("(Expected: 1.0 for exponential DGP)\n") ## ----timing-demo-------------------------------------------------------------- # Time loglik evaluation on mixed-censoring data set.seed(42) df_large <- gen_exp(theta_exp, n = 1000, p = 0.3, observe = observe_mixture( observe_right_censor(tau = 5), observe_periodic(delta = 0.5, tau = 5), weights = c(0.5, 0.5) )) wei_model <- wei_series_md_c1_c2_c3() ll_wei_fn <- loglik(wei_model) wei_par <- c(1, 1/theta_exp[1], 1, 1/theta_exp[2], 1, 1/theta_exp[3]) t_exp <- system.time(replicate(10, ll_exp_fn(df_large, theta_exp))) t_hom <- system.time(replicate(10, ll_hom_fn(df_large, c(1, 1/theta_exp)))) t_wei <- system.time(replicate(10, ll_wei_fn(df_large, wei_par))) timing_df <- data.frame( Model = c("Exponential", "Homogeneous Weibull", "Heterogeneous Weibull"), Time_10_evals = round(c(t_exp["elapsed"], t_hom["elapsed"], t_wei["elapsed"]), 3), Method = c("Closed-form", "Closed-form", "Numerical integration"), stringsAsFactors = FALSE ) knitr::kable(timing_df, caption = "Log-likelihood evaluation time (10 evaluations, n=1000)", col.names = c("Model", "Time (s)", "Left/Interval Method")) ## ----cleanup, include=FALSE--------------------------------------------------- options(old_opts)