## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 4 ) ## ----setup-exact-------------------------------------------------------------- library(maskedhaz) model <- dfr_series_md(components = list( dfr_weibull(shape = 2, scale = 6), dfr_exponential(0.08), dfr_exponential(0.12) )) set.seed(100) df <- rdata(model)(theta = c(2, 6, 0.08, 0.12), n = 400, tau = 5, p = 0.4) table(df$omega) ## ----fit-exact---------------------------------------------------------------- result <- fit(model)(df, par = c(1.5, 7, 0.05, 0.1)) rbind(truth = c(2, 6, 0.08, 0.12), estimate = round(coef(result), 3)) ## ----setup-left--------------------------------------------------------------- df_left <- data.frame( t = c(2, 3, 4, 3.5, 5), omega = "left", x1 = c(TRUE, TRUE, FALSE, TRUE, TRUE), x2 = c(FALSE, TRUE, TRUE, FALSE, TRUE), x3 = c(TRUE, FALSE, TRUE, TRUE, FALSE), stringsAsFactors = FALSE ) df_left ## ----loglik-left-------------------------------------------------------------- ll_fn <- loglik(model) ll_fn(df_left, par = c(2, 6, 0.08, 0.12)) ## ----setup-interval----------------------------------------------------------- df_interval <- data.frame( t = c(1.0, 2.0, 3.5), t_upper = c(2.5, 4.0, 5.0), omega = "interval", x1 = c(TRUE, FALSE, TRUE), x2 = c(TRUE, TRUE, FALSE), x3 = c(FALSE, TRUE, TRUE), stringsAsFactors = FALSE ) df_interval ll_fn(df_interval, par = c(2, 6, 0.08, 0.12)) ## ----mixed-------------------------------------------------------------------- df_mixed <- data.frame( t = c(3, 5, 2, 1.5, 4.5), t_upper = c(NA, NA, NA, 3.0, NA), omega = c("exact", "right", "left", "interval", "exact"), x1 = c(TRUE, FALSE, TRUE, TRUE, FALSE), x2 = c(FALSE, FALSE, TRUE, FALSE, TRUE), x3 = c(TRUE, FALSE, FALSE, TRUE, TRUE), stringsAsFactors = FALSE ) ll_fn(df_mixed, par = c(2, 6, 0.08, 0.12)) ## ----cross-validate, eval = requireNamespace("maskedcauses", quietly = TRUE)---- library(maskedcauses) # Same three-component exponential series in both packages rates <- c(0.1, 0.2, 0.3) exp_model_haz <- dfr_series_md( components = list(dfr_exponential(), dfr_exponential(), dfr_exponential()), n_par = c(1L, 1L, 1L) ) exp_model_mc <- exp_series_md_c1_c2_c3() # A single-row data frame of each observation type df_cv <- data.frame( t = c(3.0, 5.0, 2.0, 1.5), t_upper = c(NA, NA, NA, 3.0), omega = c("exact", "right", "left", "interval"), x1 = c(TRUE, FALSE, TRUE, TRUE), x2 = c(FALSE, FALSE, TRUE, FALSE), x3 = c(TRUE, FALSE, FALSE, TRUE), stringsAsFactors = FALSE ) # Evaluate both log-likelihoods at the same parameters ll_haz <- loglik(exp_model_haz)(df_cv, par = rates) ll_mc <- likelihood.model::loglik(exp_model_mc)(df_cv, par = rates) c(maskedhaz = ll_haz, maskedcauses = ll_mc, difference = abs(ll_haz - ll_mc)) ## ----identifiability-exp, eval = requireNamespace("maskedcauses", quietly = TRUE)---- # A realistic-sized dataset with partial masking set.seed(2024) df_id <- likelihood.model::rdata(exp_model_mc)( theta = rates, n = 300, tau = 5, p = 0.5 ) result_id <- suppressWarnings( fit(exp_model_haz)(df_id, par = c(0.5, 0.5, 0.5)) ) cat("individual MLE:", round(coef(result_id), 3), " (truth:", rates, ")\n") cat("sum of MLE: ", round(sum(coef(result_id)), 3), " (truth:", sum(rates), ")\n") ## ----identifiability-structure, eval = requireNamespace("maskedcauses", quietly = TRUE)---- ll_fn <- loglik(exp_model_haz) c(truth = ll_fn(df_id, par = c(0.10, 0.20, 0.30)), balanced = ll_fn(df_id, par = c(0.20, 0.20, 0.20)), skewed = ll_fn(df_id, par = c(0.05, 0.05, 0.50)))