## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(causalWins)

## ----eval=FALSE---------------------------------------------------------------
# install.packages("causalWins")

## ----message=FALSE, warning=FALSE, results='hide', cache=TRUE, eval = FALSE----
# # ---------------------------------------------------------
#     # Generate data with mixed covariates
# # ---------------------------------------------------------
# set.seed(123)
# n <- 100
# base <- data.frame(
#   X1 = rnorm(n, mean = 5, sd = 2), # numeric covariate 1
#   X2 = rnorm(n, mean = 10, sd = 3), # numeric covariate 2
#   X3 = factor(sample(c("A", "B", "C"), n, replace = TRUE)), # factor covariate
#   Y1 = rnorm(n, mean = 50, sd = 10), # numeric outcome 1
#   Y2 = rnorm(n, mean = 100, sd = 20), # numeric outcome 2
#   arm = sample(c(0, 1), n, replace = TRUE) # binary treatment arm
# )
# # ---------------------------------------------------------
#     # Compute win ratio with nearest neighbor pairing
# # ---------------------------------------------------------
# res_nn <- nn_win_ratio(
#   base = base,
#   treatment_name = "arm",
#   outcome_names = c("Y1", "Y2")
# )
# # ---------------------------------------------------------
#     # Compute win ratio with Distributional Random Forests
# # ---------------------------------------------------------
# res_drf <- drf_win_ratio(
#   base = base,
#   treatment_name = "arm",
#   outcome_names = c("Y1", "Y2")
# )
# # ---------------------------------------------------------
#     # Compute win ratio with Efficient Influence Functions
# # ---------------------------------------------------------
# res_eif <- eif_win_ratio(
#   base = base,
#   treatment_name = "arm",
#   outcome_names = c("Y1", "Y2")
# )

## ----message=FALSE, warning=FALSE, results='hide', cache=TRUE, eval = FALSE----
# library(WINS)
# 
# # In this example, there are two subpopulations: A and B. The potential outcomes are
# 
# y_0a <- 4 # potential outcome of individuals of subpopulation A without treatment
# y_1b <- 3 # potential outcome of individuals of subpopulation B under treatment
# y_0b <- 2 # potential outcome of individuals of subpopulation B without treatment
# y_1a <- 1 # potential outcome of individuals of subpopulation A under treatment
# 
# alpha <- 1 / 3 # proportion of women
# 
# n_rounds <- 100
# n <- 300*1:5  # number of individuals 300, 600, 900, 1200, 1500
# 
# rct_prob <- .5 # probability of assignment to treatment/control group
# 
# results_nn <- data.frame(row_id = 1:n_rounds)
# results_comp <- data.frame(row_id = 1:n_rounds)
# 
# for (n_individuals in n) {
#   current_result_nn <- c()
#   current_result_comp <- c()
# 
#   for (round in 1:n_rounds) {
#     set.seed(123 + n_individuals + round)
# 
#     # ---------------------------------------------------
#     # Generate Data
#     # ---------------------------------------------------
# 
#     X <- rbinom(n_individuals, 1, alpha) # Sample element in A with prob `alpha`
#     treatment <- rbinom(n_individuals, 1, rct_prob) # Assign to treatment with prob `rct_prob`
#     Y <- X * treatment * y_1a + X * (1 - treatment) * y_0a +
#       (1 - X) * treatment * y_1b + (1 - X) * (1 - treatment) * y_0b # Outcomes
#     base <- data.frame(X = X, Y = Y, arm = treatment)
# 
#     # ---------------------------------------------------
#     # Compute Win Proportion for Nearest Neighbor Pairing
#     # ---------------------------------------------------
#     res_nn <- nn_win_ratio(
#       base = base,
#       treatment_name = "arm",
#       outcome_names = "Y"
#     )
#     res_nn <- res_nn$win_proportion$value
#     current_result_nn <- c(current_result_nn, res_nn)
# 
#     # ---------------------------------------------------
#     # Compute Win Proportion for Complete Pairing
#     # ---------------------------------------------------
#     data <- base
# 
#     colnames(data)[colnames(data) == "Y"] <- "Y_1" # Format for use with the WINS Package
#     data$id <- 1:n_individuals # Format for use with the WINS Package
#     res_comp <- WINS::win.stat(
#       data = data,
#       ep_type = "continuous",
#       arm.name = c(1, 0),
#       priority = c(1),
#       summary.print = FALSE
#     )
#     res_comp <- res_comp$Win_prop[[2]]
#     current_result_comp <- c(current_result_comp, res_comp)
#   }
#   results_nn[[as.character(n_individuals)]] <- current_result_nn
#   results_comp[[as.character(n_individuals)]] <- current_result_comp
# }

## ----myplot, fig.width=6, fig.height=4, dev='png', dpi=96, eval = FALSE-------
# results_nn$row_id <- NULL
# results_comp$row_id <- NULL
# 
# # # Interleave columns for side-by-side plotting
# combined <- unlist(Map(list, results_nn, results_comp), recursive = FALSE)
# 
# # Set names for x-axis
# names(combined) <- rep(names(results_nn), each = 2)
# 
# # Plot
# boxplot(combined,
#   col = rep(c("skyblue", "orange"), ncol(results_nn)),
#   las = 2,
#   main = "Comparison of the win proportions computed \nwith Nearest Neighbors and Complete Pairings",
#   ylab = "Win proportion",
#   xlab = "Number of patients (n)"
# )
# 
# legend("bottomleft",
#   legend = c("NN", "Complete"),
#   fill = c("skyblue", "orange")
# )
# 
# abline(h = 0.5, lty = 2, col = "red")

## ----echo=FALSE, out.width="70%"----------------------------------------------
knitr::include_graphics("figures/Example2.png")

## ----message=FALSE, warning=FALSE, results='hide', cache=TRUE, eval = FALSE----
# library(MASS)
# library(WINS)
# 
# n_rounds <- 100
# n <- 300* 1:5 # number of individuals 300, 600, 900, 1200, 1500
# 
# mu <- c(0, 0) # Covariates mean
# sigma <- matrix(c(1, 0.6, 0.6, 1), 2, 2) # Covariates covariance
# 
# tau_cont <- 2 # ATE of continuous outcome
# tau_bin <- 0.8 # ATE on logit scale of binary outcome
# 
# rct_prob <- .5 # probability of assignment to treatment/control group
# 
# results_nn <- data.frame(row_id = 1:n_rounds)
# results_comp <- data.frame(row_id = 1:n_rounds)
# 
# for (n_individuals in n) {
#   current_result_nn <- c()
#   current_result_comp <- c()
# 
#   for (round in 1:n_rounds) {
#     set.seed(123 + n_individuals + round)
# 
#     # ---------------------------------------------------
#     # Generate Covariates and Treatment
#     # ---------------------------------------------------
# 
#     covariates <- as.data.frame(mvrnorm(n_individuals, mu = mu, Sigma = sigma)) # covariates
#     colnames(covariates) <- c("X1", "X2")
#     treatment <- rbinom(n_individuals, 1, rct_prob) # Assign to treatment with prob `rct_prob`
# 
#     # ---------------------------------------------------
#     # Generate Binary Outcomes
#     # ---------------------------------------------------
#     # Potential Outcomes
#     lin_pred0 <- -1 + 0.3 * covariates$X1 - 0.5 * covariates$X2
#     lin_pred1 <- lin_pred0 + tau_bin
# 
#     # Sample binary potential outcomes
#     prob0 <- 1 / (1 + exp(-lin_pred0))
#     prob1 <- 1 / (1 + exp(-lin_pred1))
#     Y0_bin <- rbinom(n_individuals, size = 1, prob = prob0)
#     Y1_bin <- rbinom(n_individuals, size = 1, prob = prob1)
# 
#     # Observed binary outcome
#     Y_1 <- Y0_bin * (1 - treatment) + Y1_bin * treatment
# 
#     # ---------------------------------------------------
#     # Generate Continuous Outcomes
#     # ---------------------------------------------------
#     # Potential Outcomes
#     Y0_cont <- 1 + 0.5 * covariates$X1 - 0.7 * covariates$X2 + rnorm(n_individuals, 0, 1)
#     Y1_cont <- Y0_cont + tau_cont
# 
#     # Observed continuous outcome
#     Y_2 <- Y0_cont * (1 - treatment) + Y1_cont * treatment
# 
#     # ---------------------------------------------------
#     # Compute Win Proportion for Nearest Neighbor Pairing
#     # ---------------------------------------------------
#     base <- data.frame(covariates, arm = treatment, Y_1, Y_2)
# 
#     res_nn <- nn_win_ratio(
#       base = base,
#       treatment_name = "arm",
#       outcome_names = c("Y_1", "Y_2")
#     )
#     res_nn <- res_nn$win_proportion$value
#     current_result_nn <- c(current_result_nn, res_nn)
#     # ---------------------------------------------------
#     # Compute Win Proportion for Complete Pairing
#     # ---------------------------------------------------
#     data <- base
# 
#     data$id <- 1:n_individuals # Format for use with the WINS Package
#     res_comp <- WINS::win.stat(
#       data = data,
#       ep_type = c("binary", "continuous"),
#       arm.name = c(1, 0),
#       priority = c(1, 2),
#       summary.print = FALSE
#     )
#     res_comp <- res_comp$Win_prop[[2]]
#     current_result_comp <- c(current_result_comp, res_comp)
#   }
#   results_nn[[as.character(n_individuals)]] <- current_result_nn
#   results_comp[[as.character(n_individuals)]] <- current_result_comp
# }

## ----myplot2, fig.width=6, fig.height=4, dev='png', dpi=96, eval = FALSE------
# results_nn$row_id <- NULL
# results_comp$row_id <- NULL
# 
# # # Interleave columns for side-by-side plotting
# combined <- unlist(Map(list, results_nn, results_comp), recursive = FALSE)
# 
# # Set names for x-axis
# names(combined) <- rep(names(results_nn), each = 2)
# 
# # Plot
# boxplot(combined,
#   col = rep(c("skyblue", "orange"), ncol(results_nn)),
#   las = 2,
#   main = "Comparison of the win proportions computed \nwith Nearest Neighbors and Complete Pairings",
#   ylab = "Win proportion",
#   xlab = "Number of patients (n)"
# )
# 
# legend("bottomleft",
#   legend = c("NN", "Complete"),
#   fill = c("skyblue", "orange")
# )
# 
# abline(h = 0.5, lty = 2, col = "red")

## ----echo=FALSE, out.width="70%"----------------------------------------------
knitr::include_graphics("figures/Example3.png")

## ----message=FALSE, warning=FALSE, results='hide', cache=TRUE, eval = FALSE----
# library(doParallel)
# library(foreach)
# 
# n_rounds <- 100
# n <- 10000
# mu <- c(0, 0)
# sigma <- matrix(c(1, 0.6, 0.6, 1), 2, 2)
# tau_cont <- 2
# tau_bin <- 0.8
# 
# # Set up cluster
# n_cores <- parallel::detectCores() - 1  # leave 1 core free
# cl <- makeCluster(n_cores)
# registerDoParallel(cl)
# 
# results_eif <- foreach(
#   round = 1:n_rounds,
#   .combine = c,
#   .packages = c("MASS", "causalWins")
# ) %dopar% {
#   set.seed(123 + n + round)
# 
#   # -----------------------------------------------------------
#   # Generate Covariates and Treatment (observational)
#   # -----------------------------------------------------------
# 
#   covariates <- mvrnorm(n, mu = mu, Sigma = sigma) # covariates
#   colnames(covariates) <- c("X1", "X2")
#   covariates <- as.data.frame(covariates)
# 
#   p_treatment <- 1 / (1 + exp(-(0.5 * covariates$X1 - 0.3 * covariates$X2))) # non-rct
#   treatment <- rbinom(n, 1, p_treatment)
# 
#   # -----------------------------------------------------------
#   # Generate Binary Outcomes
#   # -----------------------------------------------------------
#   # Potential Outcomes
#   lin_pred0 <- -1 + 0.3 * covariates$X1 - 0.5 * covariates$X2
#   lin_pred1 <- lin_pred0 + tau_bin
# 
#   # Sample binary potential outcomes
#   prob0 <- 1 / (1 + exp(-lin_pred0))
#   prob1 <- 1 / (1 + exp(-lin_pred1))
#   Y0_bin <- rbinom(n, size = 1, prob = prob0)
#   Y1_bin <- rbinom(n, size = 1, prob = prob1)
# 
#   # Observed binary outcome
#   Y_1 <- Y0_bin * (1 - treatment) + Y1_bin * treatment
# 
#   # -----------------------------------------------------------
#   # Generate Continuous Outcomes
#   # -----------------------------------------------------------
#   # Potential Outcomes
#   Y0_cont <- 1 + 0.5 * covariates$X1 - 0.7 * covariates$X2 + rnorm(n, 0, 1)
#   Y1_cont <- Y0_cont + tau_cont
# 
#   # Observed continuous outcome
#   Y_2 <- Y0_cont * (1 - treatment) + Y1_cont * treatment
# 
#   # -----------------------------------------------------------
#   # Compute Win Proportion for drf and eif
#   # -----------------------------------------------------------
#   base <- data.frame(covariates, arm = treatment, Y_1, Y_2)
# 
#   res_eif <- eif_win_ratio(
#     base = base,
#     treatment_name = "arm",
#     outcome_names = c("Y_1", "Y_2"),
#     propensity_model = "glm"
#   )
# 
#   res_eif <- res_eif$win_proportion$value
# 
#   res_eif
# }
# stopCluster(cl)

## ----message=FALSE, warning=FALSE, results='hide', cache=TRUE, eval = FALSE----
# library(MASS)
# mu <- c(0, 0) # Covariates mean
# sigma <- matrix(c(1, 0.6, 0.6, 1), 2, 2)
# tau_cont <- 2 # ATE of continuous outcome
# tau_bin <- 0.8 # ATE on logit scale of binary outcome
# 
# # Default win function
# win_function <- function(y, z) {
#   if (all(y == z)) {
#     return(0) # Tie
#   }
#   result <- (y > z)[which.max(y != z)] * 1
#   return(result)
# }
# 
# mean_given <- function(x) {
#   n_samples <- 10000
#   # Binary potential outcomes
#   lin_pred0 <- -1 + 0.3 * x[1] - 0.5 * x[2]
#   lin_pred1 <- lin_pred0 + tau_bin
#   prob0 <- 1 / (1 + exp(-lin_pred0))
#   prob1 <- 1 / (1 + exp(-lin_pred1))
#   Y0_bin <- rbinom(n_samples, size = 1, prob = prob0)
#   Y1_bin <- rbinom(n_samples, size = 1, prob = prob1)
# 
#   # Continuous Potential Outcomes
#   Y0_cont <- 1 + 0.5 * x[1] - 0.7 * x[2] + rnorm(n_samples, 0, 1)
#   Y1_cont <- 1 + 0.5 * x[1] - 0.7 * x[2] + rnorm(n_samples, 0, 1) + tau_cont
# 
#   wins_given_x <- mapply(function(yb, yc, zb, zc) {
#     win_function(c(yb, yc), c(zb, zc))
#   }, Y1_bin, Y1_cont, Y0_bin, Y0_cont)
# 
#   return(mean(wins_given_x))
# }
# 
# set.seed(123)
# 
# covariates <- as.data.frame(mvrnorm(10000, mu = mu, Sigma = sigma))
# 
# tau_star <- mean(apply(covariates, 1, function(row) mean_given(row)))
# 

## ----myplot3, fig.width=6, fig.height=4, dev='png', dpi=96, eval = FALSE------
# boxplot(results_eif,
#   main = "Win proportion with EIF",
#   xlab = "Number of patients 10000",
#   ylab = "Win proportion"
# )
# abline(h = tau_star, lty = 2, col = "red")

## ----echo=FALSE, out.width="70%"----------------------------------------------
knitr::include_graphics("figures/EIF_resPlot.png")