## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(causalWins) ## ----eval=FALSE--------------------------------------------------------------- # install.packages("causalWins") ## ----message=FALSE, warning=FALSE, results='hide', cache=TRUE, eval = FALSE---- # # --------------------------------------------------------- # # Generate data with mixed covariates # # --------------------------------------------------------- # set.seed(123) # n <- 100 # base <- data.frame( # X1 = rnorm(n, mean = 5, sd = 2), # numeric covariate 1 # X2 = rnorm(n, mean = 10, sd = 3), # numeric covariate 2 # X3 = factor(sample(c("A", "B", "C"), n, replace = TRUE)), # factor covariate # Y1 = rnorm(n, mean = 50, sd = 10), # numeric outcome 1 # Y2 = rnorm(n, mean = 100, sd = 20), # numeric outcome 2 # arm = sample(c(0, 1), n, replace = TRUE) # binary treatment arm # ) # # --------------------------------------------------------- # # Compute win ratio with nearest neighbor pairing # # --------------------------------------------------------- # res_nn <- nn_win_ratio( # base = base, # treatment_name = "arm", # outcome_names = c("Y1", "Y2") # ) # # --------------------------------------------------------- # # Compute win ratio with Distributional Random Forests # # --------------------------------------------------------- # res_drf <- drf_win_ratio( # base = base, # treatment_name = "arm", # outcome_names = c("Y1", "Y2") # ) # # --------------------------------------------------------- # # Compute win ratio with Efficient Influence Functions # # --------------------------------------------------------- # res_eif <- eif_win_ratio( # base = base, # treatment_name = "arm", # outcome_names = c("Y1", "Y2") # ) ## ----message=FALSE, warning=FALSE, results='hide', cache=TRUE, eval = FALSE---- # library(WINS) # # # In this example, there are two subpopulations: A and B. The potential outcomes are # # y_0a <- 4 # potential outcome of individuals of subpopulation A without treatment # y_1b <- 3 # potential outcome of individuals of subpopulation B under treatment # y_0b <- 2 # potential outcome of individuals of subpopulation B without treatment # y_1a <- 1 # potential outcome of individuals of subpopulation A under treatment # # alpha <- 1 / 3 # proportion of women # # n_rounds <- 100 # n <- 300*1:5 # number of individuals 300, 600, 900, 1200, 1500 # # rct_prob <- .5 # probability of assignment to treatment/control group # # results_nn <- data.frame(row_id = 1:n_rounds) # results_comp <- data.frame(row_id = 1:n_rounds) # # for (n_individuals in n) { # current_result_nn <- c() # current_result_comp <- c() # # for (round in 1:n_rounds) { # set.seed(123 + n_individuals + round) # # # --------------------------------------------------- # # Generate Data # # --------------------------------------------------- # # X <- rbinom(n_individuals, 1, alpha) # Sample element in A with prob `alpha` # treatment <- rbinom(n_individuals, 1, rct_prob) # Assign to treatment with prob `rct_prob` # Y <- X * treatment * y_1a + X * (1 - treatment) * y_0a + # (1 - X) * treatment * y_1b + (1 - X) * (1 - treatment) * y_0b # Outcomes # base <- data.frame(X = X, Y = Y, arm = treatment) # # # --------------------------------------------------- # # Compute Win Proportion for Nearest Neighbor Pairing # # --------------------------------------------------- # res_nn <- nn_win_ratio( # base = base, # treatment_name = "arm", # outcome_names = "Y" # ) # res_nn <- res_nn$win_proportion$value # current_result_nn <- c(current_result_nn, res_nn) # # # --------------------------------------------------- # # Compute Win Proportion for Complete Pairing # # --------------------------------------------------- # data <- base # # colnames(data)[colnames(data) == "Y"] <- "Y_1" # Format for use with the WINS Package # data$id <- 1:n_individuals # Format for use with the WINS Package # res_comp <- WINS::win.stat( # data = data, # ep_type = "continuous", # arm.name = c(1, 0), # priority = c(1), # summary.print = FALSE # ) # res_comp <- res_comp$Win_prop[[2]] # current_result_comp <- c(current_result_comp, res_comp) # } # results_nn[[as.character(n_individuals)]] <- current_result_nn # results_comp[[as.character(n_individuals)]] <- current_result_comp # } ## ----myplot, fig.width=6, fig.height=4, dev='png', dpi=96, eval = FALSE------- # results_nn$row_id <- NULL # results_comp$row_id <- NULL # # # # Interleave columns for side-by-side plotting # combined <- unlist(Map(list, results_nn, results_comp), recursive = FALSE) # # # Set names for x-axis # names(combined) <- rep(names(results_nn), each = 2) # # # Plot # boxplot(combined, # col = rep(c("skyblue", "orange"), ncol(results_nn)), # las = 2, # main = "Comparison of the win proportions computed \nwith Nearest Neighbors and Complete Pairings", # ylab = "Win proportion", # xlab = "Number of patients (n)" # ) # # legend("bottomleft", # legend = c("NN", "Complete"), # fill = c("skyblue", "orange") # ) # # abline(h = 0.5, lty = 2, col = "red") ## ----echo=FALSE, out.width="70%"---------------------------------------------- knitr::include_graphics("figures/Example2.png") ## ----message=FALSE, warning=FALSE, results='hide', cache=TRUE, eval = FALSE---- # library(MASS) # library(WINS) # # n_rounds <- 100 # n <- 300* 1:5 # number of individuals 300, 600, 900, 1200, 1500 # # mu <- c(0, 0) # Covariates mean # sigma <- matrix(c(1, 0.6, 0.6, 1), 2, 2) # Covariates covariance # # tau_cont <- 2 # ATE of continuous outcome # tau_bin <- 0.8 # ATE on logit scale of binary outcome # # rct_prob <- .5 # probability of assignment to treatment/control group # # results_nn <- data.frame(row_id = 1:n_rounds) # results_comp <- data.frame(row_id = 1:n_rounds) # # for (n_individuals in n) { # current_result_nn <- c() # current_result_comp <- c() # # for (round in 1:n_rounds) { # set.seed(123 + n_individuals + round) # # # --------------------------------------------------- # # Generate Covariates and Treatment # # --------------------------------------------------- # # covariates <- as.data.frame(mvrnorm(n_individuals, mu = mu, Sigma = sigma)) # covariates # colnames(covariates) <- c("X1", "X2") # treatment <- rbinom(n_individuals, 1, rct_prob) # Assign to treatment with prob `rct_prob` # # # --------------------------------------------------- # # Generate Binary Outcomes # # --------------------------------------------------- # # Potential Outcomes # lin_pred0 <- -1 + 0.3 * covariates$X1 - 0.5 * covariates$X2 # lin_pred1 <- lin_pred0 + tau_bin # # # Sample binary potential outcomes # prob0 <- 1 / (1 + exp(-lin_pred0)) # prob1 <- 1 / (1 + exp(-lin_pred1)) # Y0_bin <- rbinom(n_individuals, size = 1, prob = prob0) # Y1_bin <- rbinom(n_individuals, size = 1, prob = prob1) # # # Observed binary outcome # Y_1 <- Y0_bin * (1 - treatment) + Y1_bin * treatment # # # --------------------------------------------------- # # Generate Continuous Outcomes # # --------------------------------------------------- # # Potential Outcomes # Y0_cont <- 1 + 0.5 * covariates$X1 - 0.7 * covariates$X2 + rnorm(n_individuals, 0, 1) # Y1_cont <- Y0_cont + tau_cont # # # Observed continuous outcome # Y_2 <- Y0_cont * (1 - treatment) + Y1_cont * treatment # # # --------------------------------------------------- # # Compute Win Proportion for Nearest Neighbor Pairing # # --------------------------------------------------- # base <- data.frame(covariates, arm = treatment, Y_1, Y_2) # # res_nn <- nn_win_ratio( # base = base, # treatment_name = "arm", # outcome_names = c("Y_1", "Y_2") # ) # res_nn <- res_nn$win_proportion$value # current_result_nn <- c(current_result_nn, res_nn) # # --------------------------------------------------- # # Compute Win Proportion for Complete Pairing # # --------------------------------------------------- # data <- base # # data$id <- 1:n_individuals # Format for use with the WINS Package # res_comp <- WINS::win.stat( # data = data, # ep_type = c("binary", "continuous"), # arm.name = c(1, 0), # priority = c(1, 2), # summary.print = FALSE # ) # res_comp <- res_comp$Win_prop[[2]] # current_result_comp <- c(current_result_comp, res_comp) # } # results_nn[[as.character(n_individuals)]] <- current_result_nn # results_comp[[as.character(n_individuals)]] <- current_result_comp # } ## ----myplot2, fig.width=6, fig.height=4, dev='png', dpi=96, eval = FALSE------ # results_nn$row_id <- NULL # results_comp$row_id <- NULL # # # # Interleave columns for side-by-side plotting # combined <- unlist(Map(list, results_nn, results_comp), recursive = FALSE) # # # Set names for x-axis # names(combined) <- rep(names(results_nn), each = 2) # # # Plot # boxplot(combined, # col = rep(c("skyblue", "orange"), ncol(results_nn)), # las = 2, # main = "Comparison of the win proportions computed \nwith Nearest Neighbors and Complete Pairings", # ylab = "Win proportion", # xlab = "Number of patients (n)" # ) # # legend("bottomleft", # legend = c("NN", "Complete"), # fill = c("skyblue", "orange") # ) # # abline(h = 0.5, lty = 2, col = "red") ## ----echo=FALSE, out.width="70%"---------------------------------------------- knitr::include_graphics("figures/Example3.png") ## ----message=FALSE, warning=FALSE, results='hide', cache=TRUE, eval = FALSE---- # library(doParallel) # library(foreach) # # n_rounds <- 100 # n <- 10000 # mu <- c(0, 0) # sigma <- matrix(c(1, 0.6, 0.6, 1), 2, 2) # tau_cont <- 2 # tau_bin <- 0.8 # # # Set up cluster # n_cores <- parallel::detectCores() - 1 # leave 1 core free # cl <- makeCluster(n_cores) # registerDoParallel(cl) # # results_eif <- foreach( # round = 1:n_rounds, # .combine = c, # .packages = c("MASS", "causalWins") # ) %dopar% { # set.seed(123 + n + round) # # # ----------------------------------------------------------- # # Generate Covariates and Treatment (observational) # # ----------------------------------------------------------- # # covariates <- mvrnorm(n, mu = mu, Sigma = sigma) # covariates # colnames(covariates) <- c("X1", "X2") # covariates <- as.data.frame(covariates) # # p_treatment <- 1 / (1 + exp(-(0.5 * covariates$X1 - 0.3 * covariates$X2))) # non-rct # treatment <- rbinom(n, 1, p_treatment) # # # ----------------------------------------------------------- # # Generate Binary Outcomes # # ----------------------------------------------------------- # # Potential Outcomes # lin_pred0 <- -1 + 0.3 * covariates$X1 - 0.5 * covariates$X2 # lin_pred1 <- lin_pred0 + tau_bin # # # Sample binary potential outcomes # prob0 <- 1 / (1 + exp(-lin_pred0)) # prob1 <- 1 / (1 + exp(-lin_pred1)) # Y0_bin <- rbinom(n, size = 1, prob = prob0) # Y1_bin <- rbinom(n, size = 1, prob = prob1) # # # Observed binary outcome # Y_1 <- Y0_bin * (1 - treatment) + Y1_bin * treatment # # # ----------------------------------------------------------- # # Generate Continuous Outcomes # # ----------------------------------------------------------- # # Potential Outcomes # Y0_cont <- 1 + 0.5 * covariates$X1 - 0.7 * covariates$X2 + rnorm(n, 0, 1) # Y1_cont <- Y0_cont + tau_cont # # # Observed continuous outcome # Y_2 <- Y0_cont * (1 - treatment) + Y1_cont * treatment # # # ----------------------------------------------------------- # # Compute Win Proportion for drf and eif # # ----------------------------------------------------------- # base <- data.frame(covariates, arm = treatment, Y_1, Y_2) # # res_eif <- eif_win_ratio( # base = base, # treatment_name = "arm", # outcome_names = c("Y_1", "Y_2"), # propensity_model = "glm" # ) # # res_eif <- res_eif$win_proportion$value # # res_eif # } # stopCluster(cl) ## ----message=FALSE, warning=FALSE, results='hide', cache=TRUE, eval = FALSE---- # library(MASS) # mu <- c(0, 0) # Covariates mean # sigma <- matrix(c(1, 0.6, 0.6, 1), 2, 2) # tau_cont <- 2 # ATE of continuous outcome # tau_bin <- 0.8 # ATE on logit scale of binary outcome # # # Default win function # win_function <- function(y, z) { # if (all(y == z)) { # return(0) # Tie # } # result <- (y > z)[which.max(y != z)] * 1 # return(result) # } # # mean_given <- function(x) { # n_samples <- 10000 # # Binary potential outcomes # lin_pred0 <- -1 + 0.3 * x[1] - 0.5 * x[2] # lin_pred1 <- lin_pred0 + tau_bin # prob0 <- 1 / (1 + exp(-lin_pred0)) # prob1 <- 1 / (1 + exp(-lin_pred1)) # Y0_bin <- rbinom(n_samples, size = 1, prob = prob0) # Y1_bin <- rbinom(n_samples, size = 1, prob = prob1) # # # Continuous Potential Outcomes # Y0_cont <- 1 + 0.5 * x[1] - 0.7 * x[2] + rnorm(n_samples, 0, 1) # Y1_cont <- 1 + 0.5 * x[1] - 0.7 * x[2] + rnorm(n_samples, 0, 1) + tau_cont # # wins_given_x <- mapply(function(yb, yc, zb, zc) { # win_function(c(yb, yc), c(zb, zc)) # }, Y1_bin, Y1_cont, Y0_bin, Y0_cont) # # return(mean(wins_given_x)) # } # # set.seed(123) # # covariates <- as.data.frame(mvrnorm(10000, mu = mu, Sigma = sigma)) # # tau_star <- mean(apply(covariates, 1, function(row) mean_given(row))) # ## ----myplot3, fig.width=6, fig.height=4, dev='png', dpi=96, eval = FALSE------ # boxplot(results_eif, # main = "Win proportion with EIF", # xlab = "Number of patients 10000", # ylab = "Win proportion" # ) # abline(h = tau_star, lty = 2, col = "red") ## ----echo=FALSE, out.width="70%"---------------------------------------------- knitr::include_graphics("figures/EIF_resPlot.png")