## ----setup, echo=FALSE--------------------------------------------------------
suppressMessages({
library(palmerpenguins)
library(dplyr)
library(tidyhte)
library(magrittr)
library(SuperLearner)
library(ggplot2)

theme_set(theme_minimal())
})

set.seed(100)
n <- nrow(penguins)

## ----dgp----------------------------------------------------------------------
penguins <- within(penguins, {
  id <- 1:n
  propensity_score <- 0.5
  treatment <- rbinom(n, 1, propensity_score)
  tau <- 0.2 * (species == "Gentoo") - 0.2 * (species == "Adelie") + rnorm(n, sd = 0.05)
  food_consumed_g <- rnorm(n, 500, 5) * (1 + tau * treatment)
  body_mass_g <- body_mass_g * (1 + tau * treatment)
})

## ----cfg----------------------------------------------------------------------
cfg <- basic_config() %>%
    add_known_propensity_score("propensity_score") %>%
    add_outcome_model("SL.glmnet", alpha = c(0.0, 1.0)) %>%
    add_moderator("Stratified", species, island, sex, year) %>%
    add_moderator("KernelSmooth", bill_length_mm, bill_depth_mm, flipper_length_mm) %>%
    add_vimp(sample_splitting = FALSE)

## ----attach-------------------------------------------------------------------
penguins %<>% attach_config(cfg)

## ----split--------------------------------------------------------------------
penguins %<>% make_splits(id, species, sex, flipper_length_mm, .num_splits = 3)

## ----sl, eval = FALSE---------------------------------------------------------
#  learners <- create.Learner(
#      "SL.glmnet",
#      tune = list(
#          alpha = c(0.05, 0.15, 0.2, 0.25, 0.5, 0.75)
#      ),
#      detailed_names = TRUE,
#      name_prefix = paste0("SLglmnet")
#  )
#  
#  CV.SuperLearner(label, covariates, SL.library = learners$names)

## ----eval = FALSE-------------------------------------------------------------
#  add_outcome_model(cfg, "SL.glmnet", alpha = c(0.0, 0.25, 0.5, 0.75, 1.0))

## ----plugins, results='hide', message=FALSE, warning=FALSE--------------------
penguins %<>% produce_plugin_estimates(
  # outcome
  food_consumed_g,
  # treatment
  treatment,
  # covariates
  species, island, sex, year, bill_length_mm, bill_depth_mm, flipper_length_mm
)

## ----psi, results='hide', message=FALSE, warning=FALSE------------------------
penguins %<>% construct_pseudo_outcomes(food_consumed_g, treatment)

## ----qoi, results='hide', message=FALSE, warning=FALSE------------------------
penguins %>%
  estimate_QoI(species, island, sex, year, bill_length_mm, bill_depth_mm, flipper_length_mm) ->
  results

## ----show_results-------------------------------------------------------------
results

## ----discrete_mcate, fig.height = 4, fig.width = 8----------------------------
filter(results, estimand == "MCATE", is.na(value)) %>%
ggplot(aes(level, estimate)) +
geom_point() +
geom_linerange(aes(ymin = estimate - 1.96 * std_error, ymax = estimate + 1.96 * std_error)) +
geom_hline(yintercept = 0, linetype = "dashed") +
coord_flip() +
facet_wrap(~term, scales = "free_y")

## ----cts_mcate, fig.height = 6, fig.width = 8---------------------------------
filter(results, estimand == "MCATE", is.na(level)) %>%
ggplot(aes(value, estimate)) +
geom_line() +
geom_ribbon(
  aes(ymin = estimate - 1.96 * std_error, ymax = estimate + 1.96 * std_error),
  alpha = 0.5
) +
geom_hline(yintercept = 0, linetype = "dashed") +
scale_x_continuous("Covariate value") +
scale_y_continuous("CATE") +
coord_flip() +
facet_wrap(~term, scales = "free_y")

## ----risk, fig.height = 2, fig.width = 8--------------------------------------
filter(results, estimand == "SL risk") %>%
ggplot(aes(reorder(term, estimate), estimate)) +
geom_point() +
geom_linerange(
  aes(ymin = estimate - 1.96 * std_error, ymax = estimate + 1.96 * std_error),
  alpha = 0.5
) +
geom_hline(yintercept = 0, linetype = "dashed") +
scale_x_discrete("") +
scale_y_continuous("Risk") +
facet_wrap(~level) +
coord_flip()

## ----coef, fig.height = 2, fig.width = 8--------------------------------------
filter(results, estimand == "SL coefficient") %>%
ggplot(aes(reorder(term, estimate), estimate)) +
geom_point() +
geom_linerange(
  aes(ymin = estimate - 1.96 * std_error, ymax = estimate + 1.96 * std_error),
  alpha = 0.5
) +
geom_hline(yintercept = 0, linetype = "dashed") +
scale_x_discrete("") +
scale_y_continuous("Coefficient") +
facet_wrap(~level) +
coord_flip()

## ----vimp, fig.height = 4, fig.width = 8--------------------------------------
filter(results, estimand == "VIMP") %>%
ggplot(aes(reorder(term, estimate), estimate)) +
geom_point() +
geom_linerange(
  aes(ymin = estimate - 1.96 * std_error, ymax = estimate + 1.96 * std_error),
  alpha = 0.5
) +
geom_hline(yintercept = 0, linetype = "dashed") +
scale_x_discrete("") +
scale_y_continuous("Reduction in R²") +
coord_flip()

## ----repeat, message=FALSE,warning=FALSE--------------------------------------
penguins %<>% produce_plugin_estimates(
  # outcome
  body_mass_g,
  # treatment
  treatment,
  # covariates
  species, island, sex, year, bill_length_mm, bill_depth_mm, flipper_length_mm
) %>%
construct_pseudo_outcomes(body_mass_g, treatment) %>%
estimate_QoI(
  species, island, sex, year, bill_length_mm, bill_depth_mm, flipper_length_mm
) -> results_mass

## ----combine------------------------------------------------------------------
results_all <- bind_rows(
  results %>% mutate(outcome = "food_consumed_g"),
  results_mass %>% mutate(outcome = "body_mass_g")
)