## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, echo = TRUE, comment = "#>") # Set to TRUE to regenerate long-running simulation results run_long <- FALSE library(maskedcauses) old_opts <- options(digits = 4) ## ----worked-example-params---------------------------------------------------- theta <- c(k = 1.5, beta1 = 100, beta2 = 150, beta3 = 200) k <- theta[1] scales <- theta[-1] m <- length(scales) beta_sys <- wei_series_system_scale(k, scales) cat("System scale (beta_sys):", round(beta_sys, 2), "\n") cat("System mean lifetime:", round(beta_sys * gamma(1 + 1/k), 2), "\n") ## ----hazard-plot, fig.width=7, fig.height=4.5--------------------------------- model <- wei_series_homogeneous_md_c1_c2_c3() t_grid <- seq(0.1, 200, length.out = 300) cols <- c("#E41A1C", "#377EB8", "#4DAF4A") plot(NULL, xlim = c(0, 200), ylim = c(0, 0.06), xlab = "Time t", ylab = "Hazard h_j(t)", main = "Component Hazard Functions") for (j in seq_len(m)) { h_j <- component_hazard(model, j) lines(t_grid, h_j(t_grid, theta), col = cols[j], lwd = 2) } legend("topleft", paste0("Component ", 1:m, " (beta=", scales, ")"), col = cols, lwd = 2, cex = 0.9) ## ----cause-probs-------------------------------------------------------------- # Analytical cause weights: w_j = beta_j^{-k} / sum(beta_l^{-k}) w <- scales^(-k) / sum(scales^(-k)) names(w) <- paste0("Component ", 1:m) cat("Cause weights (w_j):\n") print(round(w, 4)) ## ----cond-cause-plot, fig.width=7, fig.height=4------------------------------- ccp_fn <- conditional_cause_probability( wei_series_homogeneous_md_c1_c2_c3(scales = scales) ) probs <- ccp_fn(t_grid, theta) plot(NULL, xlim = c(0, 200), ylim = c(0, 0.7), xlab = "Time t", ylab = "P(K = j | T = t)", main = "Conditional Cause Probability vs Time") for (j in seq_len(m)) { lines(t_grid, probs[, j], col = cols[j], lwd = 2) } legend("right", paste0("Component ", 1:m), col = cols, lwd = 2, cex = 0.9) abline(h = w, col = cols, lty = 3) ## ----gen-periodic-data-------------------------------------------------------- gen <- rdata(model) set.seed(2024) df <- gen(theta, n = 500, p = 0.3, observe = observe_periodic(delta = 20, tau = 250)) cat("Observation types:\n") print(table(df$omega)) cat("\nFirst 6 rows:\n") print(head(df), row.names = FALSE) ## ----mle-fit, warning=FALSE--------------------------------------------------- solver <- fit(model) theta0 <- c(1.2, 110, 140, 180) # Initial guess near true values estimate <- solver(df, par = theta0, method = "Nelder-Mead") print(estimate) ## ----mle-recovery------------------------------------------------------------- cat("True parameters: ", round(theta, 2), "\n") cat("MLE estimates: ", round(estimate$par, 2), "\n") cat("Std errors: ", round(sqrt(diag(estimate$vcov)), 2), "\n") cat("Relative error: ", round(100 * abs(estimate$par - theta) / theta, 1), "%\n") ## ----score-hessian-verify----------------------------------------------------- ll_fn <- loglik(model) scr_fn <- score(model) hess_fn <- hess_loglik(model) # Score at MLE should be near zero scr_mle <- scr_fn(df, estimate$par) cat("Score at MLE:", round(scr_mle, 4), "\n") # Verify score against numerical gradient scr_num <- numDeriv::grad(function(th) ll_fn(df, th), estimate$par) cat("Max |analytical - numerical| score:", formatC(max(abs(scr_mle - scr_num)), format = "e", digits = 2), "\n") # Hessian eigenvalues (should be negative for concavity) H <- hess_fn(df, estimate$par) cat("Hessian eigenvalues:", round(eigen(H)$values, 2), "\n") ## ----load-precomputed, include=FALSE, eval=!run_long-------------------------- results <- readRDS("precomputed_weibull_homogeneous.rds") ## ----mc-setup, cache=TRUE, eval=run_long-------------------------------------- # set.seed(2024) # # B <- 100 # Monte Carlo replications # n_mc <- 500 # Sample size per replication # p_mc <- 0.3 # Masking probability # alpha <- 0.05 # CI level # # # Three shape regimes # scenarios <- list( # DFR = c(k = 0.7, beta1 = 100, beta2 = 150, beta3 = 200), # CFR = c(k = 1.0, beta1 = 100, beta2 = 150, beta3 = 200), # IFR = c(k = 1.5, beta1 = 100, beta2 = 150, beta3 = 200) # ) # # results <- list() ## ----mc-run, cache=TRUE, eval=run_long, warning=FALSE, message=FALSE---------- # model_mc <- wei_series_homogeneous_md_c1_c2_c3() # gen_mc <- rdata(model_mc) # solver_mc <- fit(model_mc) # # for (sc_name in names(scenarios)) { # th <- scenarios[[sc_name]] # k_sc <- th[1] # scales_sc <- th[-1] # m_sc <- length(scales_sc) # npars <- m_sc + 1 # # # Choose tau to give ~25% right-censoring # beta_sys_sc <- wei_series_system_scale(k_sc, scales_sc) # tau_sc <- qweibull(0.75, shape = k_sc, scale = beta_sys_sc) # delta_sc <- tau_sc / 10 # ~10 inspection intervals # # estimates <- matrix(NA, nrow = B, ncol = npars) # se_est <- matrix(NA, nrow = B, ncol = npars) # ci_lower <- matrix(NA, nrow = B, ncol = npars) # ci_upper <- matrix(NA, nrow = B, ncol = npars) # converged <- logical(B) # cens_fracs <- numeric(B) # # for (b in seq_len(B)) { # df_b <- gen_mc(th, n = n_mc, p = p_mc, # observe = observe_periodic(delta = delta_sc, tau = tau_sc)) # cens_fracs[b] <- mean(df_b$omega == "right") # # tryCatch({ # est_b <- solver_mc(df_b, par = c(1, rep(120, m_sc)), # method = "L-BFGS-B", # lower = rep(1e-6, npars)) # estimates[b, ] <- est_b$par # se_est[b, ] <- sqrt(diag(est_b$vcov)) # z <- qnorm(1 - alpha / 2) # ci_lower[b, ] <- est_b$par - z * se_est[b, ] # ci_upper[b, ] <- est_b$par + z * se_est[b, ] # converged[b] <- est_b$converged # }, error = function(e) { # converged[b] <<- FALSE # }) # } # # results[[sc_name]] <- list( # theta = th, estimates = estimates, se_est = se_est, # ci_lower = ci_lower, ci_upper = ci_upper, # converged = converged, cens_fracs = cens_fracs, # tau = tau_sc, delta = delta_sc # ) # } ## ----mc-save, include=FALSE, eval=run_long------------------------------------ # saveRDS(results, "precomputed_weibull_homogeneous.rds") ## ----mc-results-table--------------------------------------------------------- summary_rows <- list() for (sc_name in names(results)) { res <- results[[sc_name]] th <- res$theta valid <- res$converged & !is.na(res$estimates[, 1]) est_v <- res$estimates[valid, , drop = FALSE] bias <- colMeans(est_v) - th variance <- apply(est_v, 2, var) mse <- bias^2 + variance pnames <- c("k", paste0("beta", seq_along(th[-1]))) for (j in seq_along(th)) { summary_rows[[length(summary_rows) + 1]] <- data.frame( Regime = sc_name, Parameter = pnames[j], True = th[j], Mean_Est = mean(est_v[, j]), Bias = bias[j], RMSE = sqrt(mse[j]), Rel_Bias_Pct = 100 * bias[j] / th[j], stringsAsFactors = FALSE ) } } mc_table <- do.call(rbind, summary_rows) knitr::kable(mc_table, digits = 3, row.names = FALSE, caption = "Monte Carlo Results by Shape Regime (B=100, n=500)", col.names = c("Regime", "Parameter", "True", "Mean Est.", "Bias", "RMSE", "Rel. Bias %")) ## ----mc-coverage-------------------------------------------------------------- coverage_rows <- list() for (sc_name in names(results)) { res <- results[[sc_name]] th <- res$theta valid <- res$converged & !is.na(res$ci_lower[, 1]) pnames <- c("k", paste0("beta", seq_along(th[-1]))) for (j in seq_along(th)) { valid_j <- valid & !is.na(res$ci_lower[, j]) & !is.na(res$ci_upper[, j]) covered <- (res$ci_lower[valid_j, j] <= th[j]) & (th[j] <= res$ci_upper[valid_j, j]) width <- mean(res$ci_upper[valid_j, j] - res$ci_lower[valid_j, j]) coverage_rows[[length(coverage_rows) + 1]] <- data.frame( Regime = sc_name, Parameter = pnames[j], Coverage = mean(covered), Mean_Width = width, stringsAsFactors = FALSE ) } } cov_table <- do.call(rbind, coverage_rows) knitr::kable(cov_table, digits = 3, row.names = FALSE, caption = "95% Wald CI Coverage by Shape Regime", col.names = c("Regime", "Parameter", "Coverage", "Mean Width")) ## ----mc-censoring-rates------------------------------------------------------- for (sc_name in names(results)) { res <- results[[sc_name]] conv_rate <- mean(res$converged) cens_rate <- mean(res$cens_fracs[res$converged]) cat(sprintf("%s (k=%.1f): convergence=%.1f%%, mean censoring=%.1f%%\n", sc_name, res$theta[1], 100 * conv_rate, 100 * cens_rate)) } ## ----mc-sampling-dist, fig.width=8, fig.height=7------------------------------ oldpar <- par(mfrow = c(3, 4), mar = c(4, 3, 2, 1), oma = c(0, 0, 2, 0)) on.exit(par(oldpar)) pnames <- c("k", "beta1", "beta2", "beta3") for (sc_name in names(results)) { res <- results[[sc_name]] th <- res$theta valid <- res$converged & !is.na(res$estimates[, 1]) est_v <- res$estimates[valid, , drop = FALSE] for (j in seq_along(th)) { hist(est_v[, j], breaks = 20, probability = TRUE, main = paste0(sc_name, ": ", pnames[j]), xlab = pnames[j], col = "lightblue", border = "white", cex.main = 0.9) abline(v = th[j], col = "red", lwd = 2, lty = 2) abline(v = mean(est_v[, j]), col = "blue", lwd = 2) } } mtext("Sampling Distributions by Regime", outer = TRUE, cex = 1.2) ## ----exp-identity------------------------------------------------------------- # Parameters: k=1 with scales equivalent to rates (1/beta_j) exp_rates <- c(0.01, 0.008, 0.005) wei_scales <- 1 / exp_rates # beta = 1/lambda wei_theta <- c(1, wei_scales) # Generate data under exponential model exp_model <- exp_series_md_c1_c2_c3() exp_gen <- rdata(exp_model) set.seed(42) df_test <- exp_gen(exp_rates, n = 200, tau = 300, p = 0.3) # Evaluate both log-likelihoods ll_exp <- loglik(exp_model) ll_wei <- loglik(model) val_exp <- ll_exp(df_test, exp_rates) val_wei <- ll_wei(df_test, wei_theta) cat("Exponential loglik:", round(val_exp, 6), "\n") cat("Weibull(k=1) loglik:", round(val_wei, 6), "\n") cat("Absolute difference:", formatC(abs(val_exp - val_wei), format = "e", digits = 2), "\n") ## ----exp-identity-score------------------------------------------------------- # Score comparison s_exp <- score(exp_model)(df_test, exp_rates) s_wei <- score(model)(df_test, wei_theta) # The exponential score is d/d(lambda_j), the Weibull score includes d/dk # and d/d(beta_j). Since lambda_j = 1/beta_j, by the chain rule: # d(ell)/d(lambda_j) = d(ell)/d(beta_j) * d(beta_j)/d(lambda_j) # = d(ell)/d(beta_j) * (-beta_j^2) # So: d(ell)/d(lambda_j) = -beta_j^2 * d(ell)/d(beta_j) s_wei_transformed <- -wei_scales^2 * s_wei[-1] cat("Exponential score: ", round(s_exp, 4), "\n") cat("Weibull(k=1) scale score: ", round(s_wei_transformed, 4), "\n") cat("Max |difference|:", formatC(max(abs(s_exp - s_wei_transformed)), format = "e", digits = 2), "\n") ## ----cleanup, include=FALSE--------------------------------------------------- options(old_opts)