## ----results = "asis", message = FALSE, warning = FALSE, eval = FALSE--------- # library(datarobot) # ConnectToDataRobot(endpoint = "http:///api/v2", token = "") ## ----results = "asis", message = FALSE, warning = FALSE, eval = FALSE--------- # lendingClubURL <- "https://s3.amazonaws.com/datarobot_public_datasets/10K_Lending_Club_Loans.csv" # project <- StartProject(dataSource = lendingClubURL, # projectName = "AdvancedModelInsightsVignette", # mode = "auto", # target = "is_bad", # workerCount = "max", # wait = TRUE) ## ----results = "asis", message = FALSE, warning = FALSE, eval = FALSE--------- # results <- as.data.frame(ListModels(project)) # saveRDS(results, "resultsModelInsights.rds") # library(knitr) # kable(head(results), longtable = TRUE, booktabs = TRUE, row.names = TRUE) ## ----echo = FALSE, results = "asis", message = FALSE, warning = FALSE--------- results <- readRDS("resultsModelInsights.rds") library(knitr) kable(head(results), longtable = TRUE, booktabs = TRUE, row.names = TRUE) ## ----results = "asis", message = FALSE, warning = FALSE, eval = FALSE--------- # project <- GetProject("5eed0d790ef80408ae212f09") # allModels <- ListModels(project) # saveRDS(allModels, "modelsModelInsights.rds") # modelFrame <- as.data.frame(allModels) # metric <- modelFrame$validationMetric # if (project$metric %in% c('AUC', 'Gini Norm')) { # bestIndex <- which.max(metric) # } else { # bestIndex <- which.min(metric) # } # bestModel <- allModels[[bestIndex]] # bestModel$modelType ## ----echo = FALSE, results = "asis", message = FALSE, warning = FALSE--------- allModels <- readRDS("modelsModelInsights.rds") bestModel <- allModels[[1]] bestModel$modelType ## ----results = "asis", message = FALSE, warning = FALSE, eval = FALSE--------- # lc <- GetLiftChart(bestModel) # saveRDS(lc, "liftChartModelInsights.rds") # head(lc) ## ----echo = FALSE, results = "asis", message = FALSE, warning = FALSE--------- lc <- readRDS("liftChartModelInsights.rds") head(lc) ## ----results = "asis", message = FALSE, warning = FALSE, eval = FALSE--------- # ValidationLiftChart <- GetLiftChart(bestModel, source = "validation") # dr_dark_blue <- "#08233F" # dr_blue <- "#1F77B4" # dr_orange <- "#FF7F0E" # # # Function to plot lift chart # library(data.table) # LiftChartPlot <- function(ValidationLiftChart, bins = 10) { # if (60 %% bins == 0) { # ValidationLiftChart$bins <- rep(seq(bins), each = 60 / bins) # ValidationLiftChart <- data.table(ValidationLiftChart) # ValidationLiftChart[, actual := mean(actual), by = bins] # ValidationLiftChart[, predicted := mean(predicted), by = bins] # unique(ValidationLiftChart[, -"binWeight"]) # } else { # "Please provide bins less than 60 and divisor of 60" # } # } # LiftChartData <- LiftChartPlot(ValidationLiftChart) # saveRDS(LiftChartData, "LiftChartDataVal.rds") # par(bg = dr_dark_blue) # plot(LiftChartData$Actual, col = dr_orange, pch = 20, type = "b", # main = "Lift Chart", xlab = "Bins", ylab = "Value") # lines(LiftChartData$Predicted, col = dr_blue, pch = 20, type = "b") ## ----echo = FALSE, results = "asis", message = FALSE, warning = FALSE--------- # dr_dark_blue <- "#08233F" # dr_blue <- "#1F77B4" # dr_orange <- "#FF7F0E" # LiftChartData <- readRDS("LiftChartDataVal.rds") # par(bg = dr_dark_blue) # plot(LiftChartData$Actual, col = dr_orange, pch = 20, type = "b", # main = "Lift Chart", xlab = "Bins", ylab = "Value") # lines(LiftChartData$Predicted, col = dr_blue, pch = 20, type = "b") knitr::include_graphics("liftChartValidation.png") ## ----results = "asis", message = FALSE, warning = FALSE, eval = FALSE--------- # AllLiftChart <- ListLiftCharts(bestModel) # LiftChartData <- LiftChartPlot(AllLiftChart[["crossValidation"]]) # saveRDS(LiftChartData, "LiftChartDataCV.rds") # par(bg = dr_dark_blue) # plot(LiftChartData$Actual, col = dr_orange, pch = 20, type = "b", # main = "Lift Chart", xlab = "Bins", ylab = "Value") # lines(LiftChartData$Predicted, col = dr_blue, pch = 20, type = "b") ## ----echo = FALSE, results = "asis", message = FALSE, warning = FALSE--------- # LiftChartData <- readRDS("LiftChartDataCV.rds") # par(bg = dr_dark_blue) # plot(LiftChartData$Actual, col = dr_orange, pch = 20, type = "b", # main = "Lift Chart", xlab = "Bins", ylab = "Value") # lines(LiftChartData$Predicted, col = dr_blue, pch = 20, type = "b") knitr::include_graphics("liftChartCrossValidation.png") ## ---- eval = TRUE------------------------------------------------------------- library(ggplot2) lc$actual <- lc$actual / lc$binWeight lc$predicted <- lc$predicted / lc$binWeight lc <- lc[order(lc$predicted), ] lc$binWeight <- NULL lc <- data.frame(value = c(lc$actual, lc$predicted), variable = c(rep("Actual", length(lc$actual)), rep("Predicted", length(lc$predicted))), id = rep(seq_along(lc$actual), 2)) ggplot(lc) + geom_line(aes(x = id, y = value, color = variable)) ## ----results = "asis", message = FALSE, warning = FALSE, eval = FALSE--------- # roc <- GetRocCurve(bestModel) # saveRDS(roc, "ROCCurveModelInsights.rds") ## ----echo = FALSE, results = "asis", message = FALSE, warning = FALSE--------- lc <- readRDS("ROCCurveModelInsights.rds") ## ----results = "asis", message = FALSE, warning = FALSE, eval = FALSE--------- # dr_dark_blue <- "#08233F" # dr_roc_green <- "#03c75f" # ValidationRocCurve <- GetRocCurve(bestModel) # ValidationRocPoints <- ValidationRocCurve[["rocPoints"]] # saveRDS(ValidationRocPoints, "ValidationRocPoints.rds") # par(bg = dr_dark_blue, xaxs = "i", yaxs = "i") # plot(ValidationRocPoints$falsePositiveRate, ValidationRocPoints$truePositiveRate, # main = "ROC Curve", # xlab = "False Positive Rate (Fallout)", ylab = "True Positive Rate (Sensitivity)", # col = dr_roc_green, # ylim = c(0,1), xlim = c(0,1), # pch = 20, type = "b") ## ----echo = FALSE, results = "asis", message = FALSE, warning = FALSE--------- dr_dark_blue <- "#08233F" dr_roc_green <- "#03c75f" ValidationRocPoints <- readRDS("ValidationRocPoints.rds") par(bg = dr_dark_blue, xaxs = "i", yaxs = "i") plot(ValidationRocPoints$falsePositiveRate, ValidationRocPoints$truePositiveRate, main = "ROC Curve", xlab = "False Positive Rate (Fallout)", ylab = "True Positive Rate (Sensitivity)", col = dr_roc_green, ylim = c(0, 1), xlim = c(0, 1), pch = 20, type = "b") ## ----results = "asis", message = FALSE, warning = FALSE, eval = FALSE--------- # AllRocCurve <- ListRocCurves(bestModel) # CrossValidationRocPoints <- AllRocCurve[['crossValidation']][['rocPoints']] # saveRDS(CrossValidationRocPoints, 'CrossValidationRocPoints.rds') # par(bg = dr_dark_blue, xaxs = "i", yaxs = "i") # plot(CrossValidationRocPoints$falsePositiveRate, CrossValidationRocPoints$truePositiveRate, # main = "ROC Curve", # xlab = "False Positive Rate (Fallout)", ylab = "True Positive Rate (Sensitivity)", # col = dr_roc_green, # ylim = c(0, 1), xlim = c(0, 1), # pch = 20, type = "b") ## ----echo = FALSE, results = "asis", message = FALSE, warning = FALSE--------- CrossValidationRocPoints <- readRDS("CrossValidationRocPoints.rds") par(bg = dr_dark_blue, xaxs = "i", yaxs = "i") plot(CrossValidationRocPoints$falsePositiveRate, CrossValidationRocPoints$truePositiveRate, main = "ROC Curve", xlab = "False Positive Rate (Fallout)", ylab = "True Positive Rate (Sensitivity)", col = dr_roc_green, ylim = c(0, 1), xlim = c(0, 1), pch = 20, type = "b") ## ---- eval = TRUE------------------------------------------------------------- ggplot( ValidationRocPoints, aes(x = falsePositiveRate, y = truePositiveRate) ) + geom_line() ## ---- eval = TRUE------------------------------------------------------------- threshold <- ValidationRocPoints$threshold[which.max(ValidationRocPoints$f1Score)] ## ---- eval = FALSE------------------------------------------------------------ # ValidationRocPoints[ValidationRocPoints$threshold == tail(Filter(function(x) x > threshold, # ValidationRocPoints$threshold), # 1), ] ## ---- results = "asis", message = FALSE, warning = FALSE, eval = FALSE-------- # # Find word-based models by looking for "word" modelType # wordModels <- allModels[grep("Word", lapply(allModels, `[[`, "modelType"))] # wordModel <- wordModels[[1]] # # Get word cloud # wordCloud <- GetWordCloud(project, wordModel$modelId) # saveRDS(wordCloud, "wordCloudModelInsights.rds") ## ---- echo = FALSE, results = "asis", message = FALSE, warning = FALSE-------- library(modelwordcloud) wordCloud <- readRDS("wordCloudModelInsights.rds") ## ----color-specs, include = FALSE, eval = FALSE------------------------------- # colors <- c( # colormap::colormap(c("#255FEC", "#2DBEF9")), # colormap::colormap( # c("#FFAC9D", "#D80909"), # reverse = TRUE # ) # ) # saveRDS(colors, "colors.rds") ## ---- warning = FALSE, eval = TRUE-------------------------------------------- # Remove stop words wordCloud <- wordCloud[!wordCloud$isStopword, ] # Specify colors similar to what DataRobot produces for # a wordcloud in Insights colors <- readRDS("colors.rds") # Make word cloud suppressWarnings( wordcloud(words = wordCloud$ngram, freq = wordCloud$frequency, coefficients = wordCloud$coefficient, colors = colors, scale = c(3, 0.3)) )