## ----include = FALSE---------------------------------------------------------- library(magrittr) knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ## ----message = FALSE---------------------------------------------------------- library(modelgrid) mg <- model_grid() mg ## ----message = FALSE---------------------------------------------------------- library(magrittr) library(caret) library(dplyr) library(purrr) # Load data on German credit applications. data(GermanCredit) # Construct empty model grid and define shared settings. mg <- model_grid() %>% share_settings( y = GermanCredit[["Class"]], x = GermanCredit %>% select(-Class), preProc = "nzv", metric = "ROC", trControl = trainControl( method = "cv", number = 5, summaryFunction = twoClassSummary, classProbs = TRUE ) ) purrr::map_chr(mg$shared_settings, class) ## ----------------------------------------------------------------------------- mg <- mg %>% add_model(model_name = "Logistic Regression Baseline", method = "glm", family = binomial(link = "logit")) mg$models ## ----------------------------------------------------------------------------- mg <- mg %>% add_model(model_name = "Logistic Regression PCA", method = "glm", family = binomial(link = "logit"), preProc = c("nzv", "center", "scale", "pca")) %>% add_model(model_name = "Logistic Regression PCA 98e-2", method = "glm", family = binomial(link = "logit"), preProc = c("nzv", "center", "scale", "pca"), custom_control = list(preProcOptions = list(thresh = 0.98))) mg$models ## ----------------------------------------------------------------------------- # there are no conflicts. dplyr::intersect(names(mg$shared_settings), names(mg$models$`Logistic Regression Baseline`)) # consolidate model settings into one model. consolidate_model( mg$shared_settings, mg$models$`Logistic Regression Baseline` ) %>% purrr::map_chr(class) ## ----------------------------------------------------------------------------- # the 'preProc' setting is defined both in the shared and model specific settings. dplyr::intersect(names(mg$shared_settings), names(mg$models$`Logistic Regression PCA`)) mg$shared_settings$preProc mg$models$`Logistic Regression PCA`$preProc # consolidate model settings into one model. consolidate_model( mg$shared_settings, mg$models$`Logistic Regression PCA` ) %>% magrittr::extract2("preProc") ## ----------------------------------------------------------------------------- # the 'trControl$preProcOptions$thresh' setting is defined in the shared # settings but customized in the model specific settings. mg$shared_settings$trControl$preProcOptions$thresh mg$models$`Logistic Regression PCA 98e-2`$custom_control$preProcOptions$thresh # consolidate model settings into one model. consolidate_model( mg$shared_settings, mg$models$`Logistic Regression PCA 98e-2` ) %>% magrittr::extract2(c("trControl", "preProcOptions", "thresh")) ## ----message = FALSE, warning = FALSE----------------------------------------- # train models from model grid. mg <- train(mg) # the fitted models now appear in the 'model_fits' component. names(mg$model_fits) # extract performance. mg$model_fits %>% caret::resamples(.) %>% summary(.) ## ----warning = FALSE---------------------------------------------------------- # train models from model grid. mg <- mg %>% add_model(model_name = "Funky Forest", method = "rf") %>% train(.) names(mg$model_fits) ## ----message = FALSE---------------------------------------------------------- # create base recipe. library(recipes) rec <- recipe(GermanCredit, formula = Class ~ .) %>% step_nzv(all_predictors()) ## ----warning = FALSE---------------------------------------------------------- mg_rec <- model_grid() %>% share_settings( metric = "ROC", data = GermanCredit, trControl = trainControl( method = "cv", number = 5, summaryFunction = twoClassSummary, classProbs = TRUE ) ) %>% add_model( model_name = "Log Reg", x = rec, method = "glm", family = binomial(link = "logit") ) %>% add_model( model_name = "Log Reg PCA", x = rec %>% step_center(all_predictors()) %>% step_scale(all_predictors()) %>% step_pca(all_predictors()), method = "glm", family = binomial(link = "logit") ) %>% train(.) mg_rec$model_fits %>% caret::resamples(.) %>% summary(.) ## ----------------------------------------------------------------------------- # existing model configuration. mg$models$`Logistic Regression PCA` # edit model configuration. mg <- mg %>% edit_model(model_name = "Logistic Regression PCA", preProc = c("nzv", "center", "scale", "ICA")) mg$models$`Logistic Regression PCA` ## ----------------------------------------------------------------------------- names(mg$models) # remove model configuration. mg <- mg %>% remove_model("Funky Forest") names(mg$models)