## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, echo=FALSE-------------------------------------------------------- library(formulaic) library(data.table) library(knitr) #library(DT) data("snack.dat", package = "formulaic") ## ----constant,echo=FALSE------------------------------------------------------ id.name <- "User ID" awareness.name <- "Awareness" satisfaction.name <- "Satisfaction" age.name <- "Age" gender.name <- "Gender" income.name <- "Income" region.name <- "Region" persona.name <- "Persona" bp.patterns <- "BP_" consumption.name <- "Consumption" consideration.name <- "Consideration" advocacy.name <- "Advocacy" satisfaction.name <- "Satisfaction" age.group.name <- "Age Group" income.group.name <- "Income Group" max.input.categories <- 20 max.outcome.categories.to.search <- 4 order.as <- "as.specified" include.backtick <- "as.needed" format.as <- "formula" force.main.effects <- TRUE outcome.name = awareness.name reduce = TRUE interactions = list(c(gender.name, income.group.name)) input.patterns = NULL force.main.effects = TRUE variables.to.exclude = NULL include.intercept = TRUE dat = snack.dat ## ----example------------------------------------------------------------------ awareness.name = "Awareness" variable.names = c("Age", "Gender", "Income Group", "Region", "Persona", "Typo") ex.form <- create.formula(outcome.name = awareness.name, input.names = variable.names, dat = snack.dat) ex.form$formula lm_example <- lm(formula = ex.form, data = snack.dat) summary(lm_example) ## ----------------------------------------------------------------------------- user.outcome.name <- "Satisfaction" user.input.names <- c('Age Group', 'Gender', 'Region') create.formula(outcome.name = user.outcome.name, input.names = user.input.names)$formula ## ----dataset------------------------------------------------------------------ list(dim(snack.dat), names(snack.dat)) ## ----add.backtick example----------------------------------------------------- as.needed = formulaic::add.backtick(x = names(snack.dat), include.backtick = 'as.needed') all = formulaic:::add.backtick(x = names(snack.dat), include.backtick = 'all') data = cbind(as.needed, all) list(data) ## ----add.backtick example 2--------------------------------------------------- create.formula(outcome.name = awareness.name, input.names = variable.names)$formula ## ----add.backtick example 3--------------------------------------------------- create.formula( outcome.name = awareness.name, input.names = variable.names, format.as = "character", include.backtick = "all" )$formula ## ----add.backtick example 4--------------------------------------------------- create.formula( outcome.name = awareness.name, input.names = variable.names, format.as = "character", include.backtick = "all" )$formula ## ----add.backtick example 5--------------------------------------------------- create.formula( outcome.name = awareness.name, input.names = c(region.name, gender.name, sprintf("sqrt(%s^2)", age.name), income.group.name, "ldkao"), format.as = "character", include.backtick = "as.needed" )$formula ## ----create.formula examples basic-------------------------------------------- outcome.name.awareness <- "Awareness" input.names <- c("Age", "Gender", "Income", "Region", "Persona", "Typo") basic.form <- create.formula(outcome.name = outcome.name.awareness, input.names = input.names, dat = snack.dat) print(basic.form) ## ----create.formula example interactions-------------------------------------- interactions = list(c("Age Group", "Gender"), c("Age Group", "Region"), c("Age Group", "Gender", "Region")) interaction.form <- create.formula( outcome.name = outcome.name.awareness, input.names = input.names, dat = snack.dat, interactions = interactions ) print(interaction.form) ## ----create.formula example input.patterns------------------------------------ bp.pattern = "BP_" input.patterns = c("Gend", bp.pattern) pattern.form <- create.formula( outcome.name = outcome.name.awareness, input.names = input.names, dat = snack.dat, input.patterns = input.patterns ) print(pattern.form) ## ----create.formula example dot.1--------------------------------------------- dot.form.1 <- create.formula(outcome.name = outcome.name.awareness, input.names = ".", dat = snack.dat) print(dot.form.1) ## ----create.formula example dot.2--------------------------------------------- input.names = c("Gender", ".") dot.form.2 <- create.formula(outcome.name = outcome.name.awareness, input.names = input.names, dat = snack.dat) print(dot.form.2) ## ----create.formula example dot.3--------------------------------------------- input.names = c("Typo", ".") dot.form.2 <- create.formula(outcome.name = outcome.name.awareness, input.names = input.names, dat = snack.dat) print(dot.form.2) ## ----create.formual example variables.to.exclude.form------------------------- input.names <- c("Age", "Gender", "Income", "Region", "Persona", "Typo", "Age Group") interactions <- list( c("Age", "Gender"), c("Age", "Income"), c("Age", "Gender", "Income"), c("Gender", "Inco"), c("Age", "Reg ion") ) bp.pattern = "BP_" variables.to.exclude = c("BP_Delicious_0_10", "Gender") variables.to.exclude.form <- create.formula( outcome.name = outcome.name.awareness, input.names = input.names, interactions = interactions, input.patterns = bp.pattern, variables.to.exclude = variables.to.exclude, dat = snack.dat ) print(variables.to.exclude.form) ## ----create.formula outcomes as inputs---------------------------------------- input.names <- c("Income", "Age", "Income") income.name = "Income" outcomes.as.inputs.form <- create.formula(outcome.name = income.name, input.names = input.names, dat = snack.dat) print(outcomes.as.inputs.form) ## ----create.formula example duplicated.inputs and interactions---------------- duplicated.inputs <- c(rep.int(x = "Age", times = 2), "Income") duplicated.interactions <- list(c("Age", "Income"), c("Age", "Income")) duplicated.form <- create.formula( outcome.name = outcome.name.awareness, input.names = duplicated.inputs, interactions = duplicated.interactions, dat = snack.dat ) print(duplicated.form) ## ----create.formula example with typo----------------------------------------- input.names <- c("Age", "Typo") income.name <- "Income" formula.with.typo <- create.formula(outcome.name = income.name, input.names = input.names) print(formula.with.typo) ## ----create.formula example without typo-------------------------------------- formula.without.typo <- create.formula(outcome.name = income.name, input.names = input.names, dat = snack.dat) print(formula.without.typo) ## ----Numeric variable with no variation--------------------------------------- snack.dat[, .N, keyby = c("Awareness", "Consideration")] ## ----lack of contrast example numerical variables----------------------------- formula.consideration <- create.formula(outcome.name = consideration.name, input.names = c(age.name, awareness.name)) print(formula.consideration$formula) glm(formula = formula.consideration, data = snack.dat, family = "binomial")$coefficients ## ----create.formula with lack of contrast categorical 1----------------------- formula.consideration <- create.formula( outcome.name = consideration.name, input.names = c(age.name, awareness.name), dat = snack.dat, reduce = TRUE ) print(formula.consideration) ## ----lack of contrast example categorical variables 0------------------------- formula.awareness <- create.formula(outcome.name = awareness.name, input.names = c(age.group.name, gender.name)) print(formula.awareness$formula) ## ----create.formula with lack of contrast 2, eval=FALSE, include=TRUE--------- # # # formula.awareness <- # create.formula( # outcome.name = awareness.name, # input.names = c(age.group.name, gender.name), # dat = snack.dat[get(age.group.name) == "[ 18, 35)", ], # reduce = TRUE # ) # # print(formula.awareness) ## ----create.fomula lack of contrast 3----------------------------------------- formula.consideration.1 <- create.formula( outcome.name = consideration.name, input.names = c(age.group.name, gender.name, awareness.name), dat = snack.dat, reduce = TRUE, max.outcome.categories.to.search = 1 ) print(formula.consideration.1) ## ----create.formula lack of contrast 4---------------------------------------- formula.consideration.2 <- create.formula( outcome.name = consideration.name, input.names = c(age.group.name, gender.name, awareness.name), dat = snack.dat, reduce = TRUE, max.outcome.categories.to.search = 2 ) print(formula.consideration.2) ## ----create.formula large volume of categorical variables 01------------------ create.formula( outcome.name = satisfaction.name, input.names = c(age.name, income.name, region.name, id.name), dat = snack.dat, reduce = TRUE, max.input.categories = 30 )$formula ## ----create.formula large volume of categorical variables--------------------- create.formula( outcome.name = income.name, input.names = ".", reduce = TRUE, dat = snack.dat, max.input.categories = 30 )$formula ## ----create.formula transformation 1------------------------------------------ create.formula( outcome.name = income.name, input.names = c(region.name, gender.name, sprintf("sqrt(%s^2) * log(%s)", age.name, income.name), income.group.name, "ldkao"), reduce = TRUE, interactions = list(c(gender.name, income.group.name)), input.patterns = NULL, force.main.effects = TRUE, max.input.categories = 20, max.outcome.categories.to.search = 4, order.as = "as.specified", include.backtick = "as.needed", format.as = "formula", variables.to.exclude = NULL, include.intercept = TRUE, dat = snack.dat )$formula ## ----create.formula transformation 2------------------------------------------ res <- create.formula(outcome.name = outcome.name, input.names = input.names, interactions = interactions, dat = snack.dat, reduce = reduce) res <- create.formula(outcome.name = awareness.name, input.names = input.names, interactions = interactions, dat = snack.dat, reduce = TRUE) glm(formula = res$formula, data = snack.dat, family = "binomial") res ## ----reduce.existing.formula example------------------------------------------ the.initial.formula <- 'Income ~ .' reduce.existing.formula( the.initial.formula = the.initial.formula, dat = snack.dat, max.input.categories = 30 )$formula