## ---- message = F, warning = F------------------------------------------------
library(tidyverse)
library(specr)

## -----------------------------------------------------------------------------
# Setup specs
specs <- setup(data = example_data,
               x = c("x1", "x2", "x3", "x4"),
               y = c("y1", "y2", "y3"),
               model = "lm")

# Summary of specifications
summary(specs, rows = 12)

# Run analysis and plot results
results <- specr(specs)
head(results$data)

## -----------------------------------------------------------------------------
# Dichotomous dependent variable
data <- example_data %>%
  mutate(y_dich = ifelse(y1 > mean(y1), 1, 0))

# Specific function
log_glm <- function(formula, data) {
  glm(formula, data, family = binomial())
}

# Setup specs
specs <- setup(data = data,
               x = c("x1", "x2"),
               y = c("y1", "y2", "y3", "y_dich"),
               model = c("lm", "log_glm"))
# Check
specs %>%
  as_tibble

## -----------------------------------------------------------------------------
# Filter out models that are not meaningful (here only keep log_glm, when y == "y4")
specs$specs <- specs$specs %>%
  filter(!(model == "log_glm" & y != "y_dich")) %>%
  filter(!(model == "lm" & y == "y_dich"))

# Check results (only meaningful specifications remain)
summary(specs, rows = 8)

# Run analysis and plot results
results <- specr(specs)
head(results$data)

## ---- warning = F, message = F------------------------------------------------
# Add mean (one choice)
data <- data %>%
  rowwise %>%
  mutate(x_mean = mean(x1, x2, x3, x4)) %>%
  ungroup

# Add custom function with latent measurement models to pass to "models" (another choice)
custom_sem <- function(formula, data) {
  
  # Make sure lavaan is loaded
  require(lavaan)
  
  # Add latent measurement as list
  latent <- list(latent_x12 = "latent_x12 =~ x1 + x2")
  
  # Remove +1 from formula as lavaan doesn't know how to process it 
  semformula <- str_remove_all(formula, "\\+ 1")
  
  # remove non-used latent measurement models from list by checking the formula
  valid <- purrr::keep(names(latent), ~ stringr::str_detect(formula, .x))
  
  # Create new formula that includes latent measurement models
  formula <- paste(formula, "\n", paste(latent[valid], collapse = " \n "))
  
  # Pass formula to `sem()`
  sem(formula, data)
}

# Create custom tidy function that extracts the same parameters from different models!
tidy_new <- function(x) {
  if(class(x) == "lavaan") {
    broom::tidy(x, conf.int = TRUE) %>% 
    select(term, estimate, conf.low, conf.high) %>%   # select parameters you want to keep
    filter(grepl(" ~ ", term)) %>%                    # term needs to be adjusted
    separate(term, c("dv", "term"), sep = " ~ ") %>%  # extract independent variable
    select(-dv)                                       # remove dependent variable
  } else {
    broom::tidy(x, conf.int = TRUE) %>% 
    select(term, estimate, conf.low, conf.high)      # same parameters as above
  }
}

# Setup specs with new custom function
specs <- setup(data = data,
               x = c("x1", "x2", "x3", "x4", "x_mean", "latent_x12"),
               y = c("y1", "y2"),
               model = c("lm", "custom_sem"),
               fun1 = tidy_new, # We pass the new extract function
               fun2 = NULL)     # switch off "glance" as it produces different fit indices and wouldn't work

# Quick check (still includes non-meaningful specifications)
summary(specs, rows = 12)

# Filter out non-meaningful specifications
specs$specs <- specs$specs %>%
  filter(!(model == "custom_sem" & !grepl("latent", x))) %>%
  filter(!(model == "lm" & grepl("latent", x)))

# Check again
summary(specs, rows = 12)

# Run analysis and plot results
results <- specr(specs)
plot(results, choices = c("x", "y"))

## -----------------------------------------------------------------------------
# Setup specification that include all combinations of covariates
specs1 <- setup(data = example_data,
               x = c("x1", "x2"),
               y = c("y1", "y2"),
               model = "lm",
               controls = c("c1", "c2", "c3", "c4"))  # simply providing a vector of control variables

# Setup secifications that include only no covariates, each individually, and all together
specs2 <- setup(data = example_data,
               x = c("x1", "x2"),
               y = c("y1", "y2"),
               model = "lm",
               controls = c("c1", "c2", "c3", "c4"),
               simplify = TRUE)   # Difference to specs1!

# Check
distinct(specs1$specs, controls)
distinct(specs2$specs, controls)

## -----------------------------------------------------------------------------
# Add groups of covariates
specs3 <- setup(data = example_data,
               x = c("x1", "x2"),
               y = c("y1", "y2"),
               model = "lm",
               controls = c("c1 + c2", "c3 + c4"))
# Check
distinct(specs3$specs, controls)

## -----------------------------------------------------------------------------
# Add some control variables to all models
specs4 <- setup(data = example_data,
               x = c("x1", "x2"),
               y = c("y1", "y2"),
               model = "lm",
               controls = c("c1", "c2"),
               add_to_formula = "c3")

# Check (see how `c3` is added to each formula, but is not part of controls)
specs4$specs[1:6,]

## -----------------------------------------------------------------------------
# Adding a covariate that is also a independent or dependent variable
specs5 <- setup(data = example_data,
               x = c("x1", "x2"),
               y = c("y1", "y2"),
               model = "lm",
               controls = c("x1", "y1"))

# Check (see how only 9 specifications are kept)
specs5$specs

## -----------------------------------------------------------------------------
# Setup specifications
specs <- setup(data = example_data,
               x = c("x1", "x2"),
               y = c("y1", "y2"),
               model = c("lm"),
               controls = "c1",
               subsets = list(group1 = unique(example_data$group1),
                              group2 = unique(example_data$group2)))

# Summary of specifications
summary(specs)

# Check subsets (in this case, 12 different types of subset analysis,
# including using "all" subjects)
distinct(specs$specs, subsets)

# Run analysis and plot results
results <- specr(specs)
plot(results, choices = c("x", "y", "subsets"))

## -----------------------------------------------------------------------------
# Create variables that denote outliers (here with a range of arbitrary thresholds)
data <- data %>%
  mutate(outlier1 = ifelse(y1 < mean(y1) - 2*sd(y1) | y1 > mean(y1) + 2*sd(y1), "outlier", "2.0*SD"),
         outlier2 = ifelse(y1 < mean(y1) - 2.1*sd(y1) | y1 > mean(y1) + 2.1*sd(y1), "outlier", "2.1*SD"),
         outlier3 = ifelse(y1 < mean(y1) - 2.2*sd(y1) | y1 > mean(y1) + 2.2*sd(y1), "outlier", "2.2*SD"),
         outlier4 = ifelse(y1 < mean(y1) - 2.3*sd(y1) | y1 > mean(y1) + 2.3*sd(y1), "outlier", "2.3*SD"),
         outlier5 = ifelse(y1 < mean(y1) - 2.4*sd(y1) | y1 > mean(y1) + 2.4*sd(y1), "outlier", "2.4*SD"),
         outlier6 = ifelse(y1 < mean(y1) - 2.5*sd(y1) | y1 > mean(y1) + 2.5*sd(y1), "outlier", "2.5*SD"))

# Setup specs
specs <- setup(data = data,
               x = c("x1", "x2"),
               y = c("y1", "y2"),
               model = "lm",
               controls = c("c1", "c2"),
               subsets = list(outlier1 = c("2.0*SD"),
                              outlier2 = c("2.1*SD"),
                              outlier3 = c("2.2*SD"),
                              outlier4 = c("2.3*SD"),
                              outlier5 = c("2.4*SD"),
                              outlier6 = c("2.5*SD")))

# Remove unnecessary combinations
specs$specs <- specs$specs %>%
  filter(subsets == "2.0*SD" | subsets == "2.1*SD" | 
         subsets == "2.2*SD" | subsets == "2.3*SD" |
         subsets == "2.4*SD" | subsets == "2.5*SD" |  
         subsets == "all")

# Check specifications (see how it contains only meaningful subsets?)
summary(specs, rows = 7)

# Run analysis and plot results
results <- specr(specs)
plot(results, choices = c("x", "y", "subsets"))