## ----message=FALSE, warning=FALSE--------------------------------------------- library("caret") library("pre") library("pROC") library("ggplot2") ## ----------------------------------------------------------------------------- BigSummary <- function (data, lev = NULL, model = NULL) { brscore <- try(mean((data[, lev[2]] - ifelse(data$obs == lev[2], 1, 0)) ^ 2), silent = TRUE) rocObject <- try(pROC::roc(ifelse(data$obs == lev[2], 1, 0), data[, lev[2]], direction = "<", quiet = TRUE), silent = TRUE) if (inherits(brscore, "try-error")) brscore <- NA rocAUC <- if (inherits(rocObject, "try-error")) { NA } else { rocObject$auc } return(c(AUCROC = rocAUC, Brier = brscore)) } ## ----------------------------------------------------------------------------- fitControl <- trainControl(method = "repeatedcv", number = 10, repeats = 1, classProbs = TRUE, ## get probabilities, not class labels summaryFunction = BigSummary, verboseIter = TRUE) ## ----------------------------------------------------------------------------- preGrid <- getModelInfo("pre")[[1]]$grid( maxdepth = 3L:4L, learnrate = c(.01, .05, .1), penalty.par.val = c("lambda.1se", "lambda.min"), sampfrac = c(0.5, 0.75, 1.0)) head(preGrid) ## ----------------------------------------------------------------------------- carrillo$sexo <- factor(paste0("g", as.character(carrillo$sexo))) ## ----------------------------------------------------------------------------- set.seed(42) train_ids <- sample(1:nrow(carrillo), .75*nrow(carrillo)) ## ----eval=FALSE--------------------------------------------------------------- # set.seed(42) # pre_tune <- train(sexo ~ ., data = carrillo[train_ids, ], method = "pre", # ntrees = 500, family = "binomial", # trControl = fitControl, tuneGrid = preGrid, # metric = "Brier", ## Specify "AUCROC" for optimizing AUC # maximize = TRUE) ## ----eval=FALSE--------------------------------------------------------------- # set.seed(42) # pre_tune2 <- train(sexo ~ ., data = carrillo[train_ids, ], method = "pre", # ntrees = 1000, family = "binomial", # trControl = fitControl, tuneGrid = preGrid, # metric = "Brier", maximize = TRUE) ## ----echo=FALSE, eval=FALSE--------------------------------------------------- # save(pre_tune, pre_tune2, file = "Tuning_results.Rda") ## ----echo=FALSE--------------------------------------------------------------- load("Tuning_results.Rda") ## ----fig.width=5, fig.height=4------------------------------------------------ ids <- which(pre_tune$results$Brier == min(pre_tune$results$Brier)) pre_tune$results[ids, c(1:6, 10)] plot(pre_tune, xlab = list(cex = .7), ylab = list(cex = .7), scales = list(cex=.7), par.strip.text=list(cex=.7)) ## ----fig.width=5, fig.height=4------------------------------------------------ ids2 <- which(pre_tune2$results$Brier == min(pre_tune2$results$Brier)) pre_tune2$results[ids2, c(1:6, 10)] plot(pre_tune2, xlab = list(cex = .7), ylab = list(cex = .7), scales = list(cex=.7), par.strip.text=list(cex=.7)) ## ----------------------------------------------------------------------------- set.seed(42) opt_pre_mod <- pre(formula = sexo ~ ., data = carrillo[train_ids, ], sampfrac = 1, maxdepth = 4, ntrees = 1000, family = "binomial") ## ----------------------------------------------------------------------------- set.seed(42) def_pre_mod <- pre(formula = sexo ~ ., data = carrillo[train_ids, ], family = "binomial") ## ----fig.width=5, fig.height=3------------------------------------------------ print(opt_pre_mod, penalty.par.val = "lambda.1se") importance(opt_pre_mod, penalty.par.val = "lambda.1se", cex = .7, cex.main = .7, cex.lab = .7) ## ----warning=FALSE, message=FALSE--------------------------------------------- pre_preds_opt <- predict(opt_pre_mod, newdata = carrillo[-train_ids, ], type = "response", penalty.par.val = "lambda.1se") y_test <- as.numeric(carrillo[-train_ids, "sexo"])-1 mean((pre_preds_opt - y_test)^2) ## Brier score sd((pre_preds_opt - y_test)^2)/sqrt(length(y_test)) ## standard error of SEL auc(response = carrillo[-train_ids, "sexo"], predictor = pre_preds_opt) ## ----warning=FALSE, message=FALSE--------------------------------------------- summary(def_pre_mod) pre_preds_def <- predict(def_pre_mod, newdata = carrillo[-train_ids, ], type = "response") mean((pre_preds_def - y_test)^2) ## Brier score sd((pre_preds_def - y_test)^2)/sqrt(length(y_test)) ## standard error of SEL auc(response = carrillo[-train_ids, "sexo"], predictor = pre_preds_def)