## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 4 ) ## ----define-model------------------------------------------------------------- library(serieshaz) # Hypothesize: system has two failure modes # - Wear-out (Weibull) # - Random shock (exponential) model <- dfr_dist_series(list( dfr_weibull(shape = 2, scale = 100), dfr_exponential(0.01) )) ## ----simulate-data------------------------------------------------------------ set.seed(42) true_par <- c(2, 100, 0.01) # shape, scale, rate samp <- sampler(model) n <- 300 times <- samp(n, par = true_par) train <- data.frame(t = times, delta = rep(1L, n)) summary(train$t) ## ----fit-model---------------------------------------------------------------- solver <- fit(model) # Provide initial parameter guesses init_par <- c(1.5, 80, 0.02) result <- solver(train, par = init_par) ## ----extract-results---------------------------------------------------------- # Fitted parameters cat("True parameters:", true_par, "\n") cat("Fitted parameters:", coef(result), "\n") # Log-likelihood at MLE cat("Log-likelihood:", logLik(result), "\n") # Variance-covariance matrix vcov(result) ## ----multi-start-------------------------------------------------------------- # Strategy: try a few initial guesses, keep the best init_guesses <- list( c(1.5, 80, 0.02), c(2.5, 120, 0.005), c(1.0, 50, 0.05) ) best_ll <- -Inf best_result <- NULL for (init in init_guesses) { res <- tryCatch(solver(train, par = init), error = function(e) NULL) if (!is.null(res) && logLik(res) > best_ll) { best_ll <- logLik(res) best_result <- res } } cat("Best log-likelihood:", best_ll, "\n") cat("Best parameters:", coef(best_result), "\n") ## ----identifiability-exp------------------------------------------------------ # True rates: 0.1, 0.2, 0.3 (sum = 0.6) true_rates <- c(0.1, 0.2, 0.3) exp_sys <- dfr_dist_series(lapply(true_rates, dfr_exponential)) set.seed(123) exp_samp <- sampler(exp_sys) exp_data <- data.frame(t = exp_samp(500), delta = rep(1L, 500)) exp_solver <- fit(exp_sys) exp_result <- exp_solver(exp_data, par = c(0.15, 0.25, 0.25)) cat("True sum of rates:", sum(true_rates), "\n") cat("Fitted sum of rates:", sum(coef(exp_result)), "\n") cat("Individual rates (unreliable):", coef(exp_result), "\n") ## ----identifiability-mixed---------------------------------------------------- # Weibull (increasing hazard) + Exponential (constant hazard) mixed <- dfr_dist_series(list( dfr_weibull(shape = 2, scale = 100), dfr_exponential(0.01) )) set.seed(42) mixed_samp <- sampler(mixed) mixed_data <- data.frame(t = mixed_samp(500), delta = rep(1L, 500)) mixed_solver <- fit(mixed) mixed_result <- mixed_solver(mixed_data, par = c(1.5, 80, 0.02)) cat("True: shape=2.00, scale=100.0, rate=0.010\n") cat(sprintf("Fitted: shape=%.2f, scale=%.1f, rate=%.3f\n", coef(mixed_result)[1], coef(mixed_result)[2], coef(mixed_result)[3])) ## ----censored-data------------------------------------------------------------ set.seed(42) model <- dfr_dist_series(list( dfr_weibull(shape = 2, scale = 100), dfr_exponential(0.01) )) true_par <- c(2, 100, 0.01) # Generate true failure times n <- 500 true_times <- sampler(model)(n, par = true_par) # Right-censor at a fixed time horizon censor_time <- 80 observed_t <- pmin(true_times, censor_time) delta <- as.integer(true_times <= censor_time) cens_data <- data.frame(t = observed_t, delta = delta) cat("Proportion censored:", round(1 - mean(delta), 3), "\n") # Fit with censored data solver <- fit(model) cens_result <- solver(cens_data, par = c(1.5, 80, 0.02)) cat("True parameters: ", true_par, "\n") cat("Fitted (censored): ", round(coef(cens_result), 4), "\n") cat("Fitted (uncensored):", round(coef(result), 4), "\n") ## ----model-comparison--------------------------------------------------------- # Model 1: Weibull + Exponential (3 parameters) m1 <- dfr_dist_series(list( dfr_weibull(shape = 2, scale = 100), dfr_exponential(0.01) )) # Model 2: Two Weibulls (4 parameters) m2 <- dfr_dist_series(list( dfr_weibull(shape = 2, scale = 100), dfr_weibull(shape = 1.5, scale = 200) )) # Generate data from Model 1 set.seed(42) data_m1 <- data.frame( t = sampler(m1)(300, par = c(2, 100, 0.01)), delta = rep(1L, 300) ) r1 <- fit(m1)(data_m1, par = c(1.5, 80, 0.02)) r2 <- fit(m2)(data_m1, par = c(1.5, 80, 1.2, 150)) k1 <- length(coef(r1)) k2 <- length(coef(r2)) ll1 <- logLik(r1) ll2 <- logLik(r2) aic1 <- -2 * ll1 + 2 * k1 aic2 <- -2 * ll2 + 2 * k2 cat(sprintf("Model 1 (Weibull+Exp): LL=%.2f, k=%d, AIC=%.2f\n", ll1, k1, aic1)) cat(sprintf("Model 2 (Weibull+Weibull): LL=%.2f, k=%d, AIC=%.2f\n", ll2, k2, aic2)) cat("Preferred model (lower AIC):", ifelse(aic1 < aic2, "Model 1", "Model 2"), "\n") ## ----assumptions-------------------------------------------------------------- assumptions(m1) ## ----wald-ci------------------------------------------------------------------ result <- fit(m1)(data_m1, par = c(1.5, 80, 0.02)) est <- coef(result) se <- sqrt(diag(vcov(result))) alpha <- 0.05 z <- qnorm(1 - alpha / 2) ci_lower <- est - z * se ci_upper <- est + z * se ci_table <- data.frame( Parameter = c("shape", "scale", "rate"), Estimate = est, SE = se, Lower = ci_lower, Upper = ci_upper ) print(ci_table, digits = 4) ## ----delta-method------------------------------------------------------------- # For this system, MTBF is not available in closed form, but we can # approximate it numerically and use the delta method par_hat <- coef(result) V <- vcov(result) # Numerical MTBF estimate via integration of survival function S_fn <- surv(m1) # Wrap S_fn for integrate() which may pass vectorized t values compute_mtbf <- function(p) { integrate(function(t) vapply(t, function(ti) S_fn(ti, par = p), numeric(1)), 0, Inf, subdivisions = 1000L)$value } mtbf <- compute_mtbf(par_hat) cat("Estimated MTBF:", round(mtbf, 2), "\n") # Delta method: gradient of MTBF w.r.t. parameters eps <- 1e-5 grad_mtbf <- numeric(length(par_hat)) for (k in seq_along(par_hat)) { par_plus <- par_minus <- par_hat par_plus[k] <- par_hat[k] + eps par_minus[k] <- par_hat[k] - eps grad_mtbf[k] <- (compute_mtbf(par_plus) - compute_mtbf(par_minus)) / (2 * eps) } se_mtbf <- sqrt(t(grad_mtbf) %*% V %*% grad_mtbf) cat(sprintf("MTBF = %.2f +/- %.2f (95%% CI: [%.2f, %.2f])\n", mtbf, 1.96 * se_mtbf, mtbf - 1.96 * se_mtbf, mtbf + 1.96 * se_mtbf))