## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(ipeval) library(survival) ## ----------------------------------------------------------------------------- simulate_time_to_event <- function(n, constant_baseline_haz, LP) { u <- runif(n) -log(u) / (constant_baseline_haz * exp(LP)) } simulate_data <- function(n, seed) { set.seed(seed) data <- data.frame( L1 = rnorm(n), L2 = rbinom(n, 1, 0.5), P1 = rnorm(n), P2 = rbinom(n, 1, 0.5) ) data$A <- rbinom(n, 1, plogis(0.2 + 1.2*data$L1 - 0.3*data$L2)) LP <- 0.8*data$L1 + 0.3*data$L2 + 0.5*data$P1 + 0.7*data$P2 # time_0 is the uncensored untreated survival time, # time_1 is the uncensored treated survival time data$time_0 <- simulate_time_to_event(n, 0.04, LP) data$time_1 <- simulate_time_to_event(n, 0.04, LP - 0.5) # time_A is the uncensored survival time corresponding to assigned treatment data$time_A <- ifelse(data$A == 1, data$time_1, data$time_0) # uninformative censoring data$censor_time <- simulate_time_to_event(n, 0.05, 0) # in practice, we only observe (time, status): data$status <- ifelse(data$time_A <= data$censor_time, TRUE, FALSE) data$time <- ifelse(data$status == TRUE, data$time_A, data$censor_time) return(data) } df_dev <- simulate_data(n = 10000, seed = 123) summary(df_dev$time) summary(df_dev$status) ## ----------------------------------------------------------------------------- model_naive <- coxph( formula = Surv(time, status) ~ P1 + P2 + A, data = df_dev ) coefficients(model_naive) trt_model <- glm(A ~ L1 + L2, family = "binomial", data = df_dev) propensity_score <- predict(trt_model, type = "response") df_dev$iptw <- 1 / ifelse(df_dev$A == 1, propensity_score, 1 - propensity_score) model_causal <- coxph( formula = Surv(time, status) ~ P1 + P2 + A, data = df_dev, weights = iptw ) coefficients(model_causal) ## ----------------------------------------------------------------------------- df_val <- simulate_data(n = 10000, seed = 234) ## ----fig.width=6, fig.height=6------------------------------------------------ cfs <- ip_score( object = list("naive model" = model_naive, "causal model" = model_causal), data = df_val, outcome = Surv(time, status), treatment_formula = A ~ L1 + L2, treatment_of_interest = 1, time_horizon = 5, cens_model = "KM" ) cfs ## ----------------------------------------------------------------------------- df_val$censortime <- simulate_time_to_event(10000, 0.04, 0.1*df_val$L1 + 0.5*df_val$P2) summary(df_val$censortime) df_val$status <- ifelse(df_val$time_A <= df_val$censortime, TRUE, FALSE) df_val$time <- ifelse(df_val$status == TRUE, df_val$time_A, df_val$censortime) summary(df_val$time) summary(df_val$status) ## ----fig.width=6, fig.height=6------------------------------------------------ ip_score( object = list("naive model" = model_naive, "causal model" = model_causal), data = df_val, outcome = Surv(time, status), treatment_formula = A ~ L1 + L2, treatment_of_interest = 1, time_horizon = 5, cens_model = "cox", cens_formula = ~ L1 + P2, bootstrap = 100, bootstrap_progress = FALSE )