## ----environment, echo = FALSE, message = FALSE, warning=FALSE---------------- knitr::opts_chunk$set(collapse = TRUE, comment = "", out.width = "600px", dpi = 70) options(tibble.print_min = 4L, tibble.print_max = 4L) library(dlookr) library(dplyr) library(ggplot2) ## ----import_data-------------------------------------------------------------- str(Carseats) ## ----missing------------------------------------------------------------------ carseats <- Carseats suppressWarnings(RNGversion("3.5.0")) set.seed(123) carseats[sample(seq(NROW(carseats)), 20), "Income"] <- NA suppressWarnings(RNGversion("3.5.0")) set.seed(456) carseats[sample(seq(NROW(carseats)), 10), "Urban"] <- NA ## ----imputate_na, fig.align='center', fig.width = 6, fig.height = 4----------- if (requireNamespace("rpart", quietly = TRUE)) { income <- imputate_na(carseats, Income, US, method = "rpart") # result of imputation income # summary of imputation summary(income) # viz of imputation plot(income) } else { cat("If you want to use this feature, you need to install the rpart package.\n") } ## ----imputate_na2, fig.align='center', fig.width = 6, fig.height = 4, eval=FALSE---- # library(mice) # # urban <- imputate_na(carseats, Urban, US, method = "mice") # # # result of imputation # urban # # # summary of imputation # summary(urban) # # # viz of imputation # plot(urban) ## ----imputate_na3------------------------------------------------------------- # The mean before and after the imputation of the Income variable carseats %>% mutate(Income_imp = imputate_na(carseats, Income, US, method = "knn")) %>% group_by(US) %>% summarise(orig = mean(Income, na.rm = TRUE), imputation = mean(Income_imp)) ## ----imputate_outlier, fig.align='center', fig.width = 6, fig.height = 4------ price <- imputate_outlier(carseats, Price, method = "capping") # result of imputation price # summary of imputation summary(price) # viz of imputation plot(price) ## ----imputate_outlier2-------------------------------------------------------- # The mean before and after the imputation of the Price variable carseats %>% mutate(Price_imp = imputate_outlier(carseats, Price, method = "capping")) %>% group_by(US) %>% summarise(orig = mean(Price, na.rm = TRUE), imputation = mean(Price_imp, na.rm = TRUE)) ## ----standardization, fig.align='center', fig.width = 6, fig.height = 4------- carseats %>% mutate(Income_minmax = transform(carseats$Income, method = "minmax"), Sales_minmax = transform(carseats$Sales, method = "minmax")) %>% select(Income_minmax, Sales_minmax) %>% boxplot() ## ----resolving1--------------------------------------------------------------- # find index of skewed variables find_skewness(carseats) # find names of skewed variables find_skewness(carseats, index = FALSE) # compute the skewness find_skewness(carseats, value = TRUE) # compute the skewness & filtering with threshold find_skewness(carseats, value = TRUE, thres = 0.1) ## ----resolving2, fig.align='center', fig.width = 6, fig.height = 4------------ Advertising_log <- transform(carseats$Advertising, method = "log") # result of transformation head(Advertising_log) # summary of transformation summary(Advertising_log) # viz of transformation plot(Advertising_log) ## ----resolving3, fig.align='center', fig.width = 6, fig.height = 4------------ Advertising_log <- transform(carseats$Advertising, method = "log+1") # result of transformation head(Advertising_log) # summary of transformation summary(Advertising_log) # viz of transformation # plot(Advertising_log) ## ----binning, fig.width = 6, fig.height = 4----------------------------------- # Binning the carat variable. the default type argument is "quantile" bin <- binning(carseats$Income) # Print bins class object bin # Summarize bins class object summary(bin) # Plot bins class object plot(bin) # Using labels argument bin <- binning(carseats$Income, nbins = 4, labels = c("LQ1", "UQ1", "LQ3", "UQ3")) bin # Using another type argument binning(carseats$Income, nbins = 5, type = "equal") binning(carseats$Income, nbins = 5, type = "pretty") if (requireNamespace("classInt", quietly = TRUE)) { binning(carseats$Income, nbins = 5, type = "kmeans") binning(carseats$Income, nbins = 5, type = "bclust") } else { cat("If you want to use this feature, you need to install the classInt package.\n") } # Extract the binned results extract(bin) # ------------------------- # Using pipes & dplyr # ------------------------- library(dplyr) carseats %>% mutate(Income_bin = binning(carseats$Income) %>% extract()) %>% group_by(ShelveLoc, Income_bin) %>% summarise(freq = n()) %>% arrange(desc(freq)) %>% head(10) ## ----binning_by, fig.width = 6, fig.height = 4-------------------------------- library(dplyr) if (requireNamespace("partykit", quietly = TRUE)) { # optimal binning using character bin <- binning_by(carseats, "US", "Advertising") # optimal binning using name bin <- binning_by(carseats, US, Advertising) bin # summary optimal_bins class summary(bin) # performance table attr(bin, "performance") # visualize optimal_bins class plot(bin) # extract binned results extract(bin) %>% head(20) } else { cat("If you want to use this feature, you need to install the partykit package.\n") } ## ----trans_web_report, eval=FALSE--------------------------------------------- # heartfailure %>% # transformation_web_report(target = "death_event", subtitle = "heartfailure", # output_dir = "./", output_file = "transformation.html", # theme = "blue") ## ----trans_web_title, echo=FALSE, out.width='80%', fig.align='center', fig.pos="!h", fig.cap="The part of the report"---- knitr::include_graphics('img/transformation_web_title.jpg') ## ----trans_paged_report, eval=FALSE------------------------------------------- # heartfailure %>% # transformation_paged_report(target = "death_event", subtitle = "heartfailure", # output_dir = "./", output_file = "transformation.pdf", # theme = "blue") ## ----trans_paged_cover, echo=FALSE, out.width='80%', fig.align='center', fig.pos="!h", fig.cap="The part of the report"---- knitr::include_graphics('img/transformation_paged_cover.jpg') ## ----trans_paged_cntent, echo=FALSE, out.width='80%', fig.align='center', fig.pos="!h", fig.cap="The dynamic contents of the report"---- knitr::include_graphics('img/transformation_paged_content.jpg')