## ----include=FALSE------------------------------------------------------------ knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, fig.align = "center" ) ## ----echo = FALSE, results='hide', message=FALSE, warning=FALSE--------------- library(VIM) library(data.table) ## ----setup_2, message = FALSE------------------------------------------------- data <- as.data.table(VIM::sleep) a <- aggr(sleep, plot = FALSE) plot(a, numbers = TRUE, prop = FALSE) ## ----message = FALSE---------------------------------------------------------- dataDS <- sleep[, c("Dream", "Sleep")] marginplot(dataDS, main = "Missing Values") ## ----include=TRUE, results='hide', message=FALSE, warning=FALSE--------------- result <- vimpute( data = data, pred_history = TRUE) ## ----------------------------------------------------------------------------- print(head(result$data, 3)) ## ----------------------------------------------------------------------------- dataDS <- as.data.frame(result$data[, c("Dream", "Sleep", "Dream_imp", "Sleep_imp")]) marginplot(dataDS, delimiter = "_imp", main = "Imputation with Default Model") ## ----include=TRUE, results='hide', message=FALSE, warning=FALSE--------------- result_mixed <- vimpute( data = data, method = list(NonD = "robust", Dream = "ranger", Sleep = "xgboost", Span = "ranger" , Gest = "regularized"), pred_history = TRUE ) ## ----------------------------------------------------------------------------- dataDS <- as.data.frame(result_mixed$data[, c("Dream", "Sleep", "Dream_imp", "Sleep_imp")]) marginplot(dataDS, delimiter = "_imp", main = "Imputation with different Models for each Variable") ## ----include=TRUE, results='hide', message=FALSE, warning=FALSE, echo=F------- result_xgboost <- vimpute( data = data, method = setNames(as.list(rep("xgboost", ncol(data))), names(data)), pred_history = TRUE, verbose = FALSE ) dataDS_xgboost <- as.data.frame(result_xgboost$data[, c("Dream", "Sleep", "Dream_imp", "Sleep_imp")]) result_regularized <- vimpute( data = data, method = setNames(as.list(rep("regularized", ncol(data))), names(data)), pred_history = TRUE ) dataDS_regularized <- as.data.frame(result_regularized$data[, c("Dream", "Sleep", "Dream_imp", "Sleep_imp")]) ## ----echo=F, warning=F-------------------------------------------------------- par(mfrow = c(1, 2)) marginplot(dataDS_xgboost, delimiter = "_imp", main = "Imputation with xgboost") marginplot(dataDS_regularized, delimiter = "_imp", main = "Imputation with Regularized") par(mfrow = c(1, 1)) ## ----eval = FALSE------------------------------------------------------------- # result <- vimpute( # data = data, # method = list(NonD = "robust", # Dream = "ranger", # Sleep = "xgboost", # Span = "ranger" , # Gest = "regularized"), # pmm = list(NonD = FALSE, Dream = TRUE, Sleep = FALSE, Span = FALSE , Gest = TRUE) # ) ## ----eval = FALSE------------------------------------------------------------- # result <- vimpute( # data = data, # method = setNames(as.list(rep("regularized", ncol(data))), names(data)) # formula = list( # NonD ~ Dream + Sleep, # Linear combination # Span ~ Dream:Sleep + Gest, # With interaction term # log(Gest) ~ Sleep + exp(Span) # With transformations # ) # ) ## ----eval = FALSE------------------------------------------------------------- # result <- vimpute( # data = data, # tune = TRUE # ) ## ----eval = FALSE------------------------------------------------------------- # result <- vimpute( # data = data, # nseq = 20, # eps = 0.01 # ) ## ----eval = FALSE------------------------------------------------------------- # result <- vimpute( # data = data, # imp_var = TRUE # ) ## ----------------------------------------------------------------------------- print(tail(result$pred_history, 9)) ## ----------------------------------------------------------------------------- library(reactable) data(iris) df <- as.data.table(iris) colnames(df) <- c("S.Length","S.Width","P.Length","P.Width","Species") # randomly produce some missing values in the data set.seed(1) nbr_missing <- 50 y <- data.frame(row=sample(nrow(iris),size = nbr_missing,replace = T), col=sample(ncol(iris)-1,size = nbr_missing,replace = T)) y<-y[!duplicated(y),] df[as.matrix(y)]<-NA aggr(df) ## ----------------------------------------------------------------------------- sapply(df, function(x)sum(is.na(x))) ## ----results='hide', message=FALSE, warning=FALSE,include=FALSE--------------- library(reactable) data(iris) df <- as.data.table(iris) colnames(df) <- c("S.Length","S.Width","P.Length","P.Width","Species") # Create complete copy before introducing NAs complete_data <- df # Randomly produce missing values set.seed(1) nbr_missing <- 50 y <- data.frame(row = sample(nrow(df), size = nbr_missing, replace = TRUE), col = sample(ncol(df), size = nbr_missing, replace = TRUE)) y <- y[!duplicated(y),] df[as.matrix(y)] <- NA # Perform imputation result <- vimpute(data = df, pred_history = TRUE) # Extracting the imputed columns from result$data imputed_columns <- grep("_imp$", names(result$data), value = TRUE) # Create a function to compare true and imputed values compare_values <- function(true_data, pred_data, imputed_data, col_name) { comparison <- data.frame( True_Value = true_data[[col_name]], Imputed_Value = ifelse(imputed_data, pred_data[[col_name]], NA) ) comparison <- comparison[!is.na(comparison$Imputed_Value), ] return(comparison) } # Initialize an empty list to store the comparison tables comparison_list <- list() # Loop through each imputed column and create a comparison table for (imputed_col in imputed_columns) { col_name <- sub("_imp$", "", imputed_col) comparison_list[[col_name]] <- compare_values(complete_data, result$data, result$data[[imputed_col]], col_name) } # Prepare the results in a combined wide format, ensuring equal row numbers results <- cbind( "TRUE1" = head(comparison_list[["S.Length"]][, "True_Value"], 5), "IMPUTED1" = head(comparison_list[["S.Length"]][, "Imputed_Value"], 5), "TRUE2" = head(comparison_list[["S.Width"]][, "True_Value"], 5), "IMPUTED2" = head(comparison_list[["S.Width"]][, "Imputed_Value"], 5), "TRUE3" = head(comparison_list[["P.Length"]][, "True_Value"], 5), "IMPUTED3" = head(comparison_list[["P.Length"]][, "Imputed_Value"], 5), "TRUE4" = head(comparison_list[["P.Width"]][, "True_Value"], 5), "IMPUTED4" = head(comparison_list[["P.Width"]][, "Imputed_Value"], 5) ) # Print the combined wide format table print(results) ## ----echo=F,warning=F--------------------------------------------------------- # Load the reactable library library(reactable) # Create the reactable reactable(results, columns = list( TRUE1 = colDef(name = "True"), IMPUTED1 = colDef(name = "Imputed"), TRUE2 = colDef(name = "True"), IMPUTED2 = colDef(name = "Imputed"), TRUE3 = colDef(name = "True"), IMPUTED3 = colDef(name = "Imputed"), TRUE4 = colDef(name = "True"), IMPUTED4 = colDef(name = "Imputed") ), columnGroups = list( colGroup(name = "S.Length", columns = c("TRUE1", "IMPUTED1")), colGroup(name = "S.Width", columns = c("TRUE2", "IMPUTED2")), colGroup(name = "P.Length", columns = c("TRUE3", "IMPUTED3")), colGroup(name = "P.Width", columns = c("TRUE4", "IMPUTED4")) ), striped = TRUE, highlight = TRUE, bordered = TRUE ) ## ----results='hide', message=FALSE, warning=FALSE,include=FALSE--------------- library(reactable) library(VIM) data(iris) # Create complete copy before introducing NAs complete_data <- iris colnames(complete_data) <- c("S.Length","S.Width","P.Length","P.Width","Species") df <- copy(complete_data) # Randomly produce missing values set.seed(1) nbr_missing <- 50 y <- data.frame(row = sample(nrow(df), size = nbr_missing, replace = TRUE), col = sample(ncol(df), size = nbr_missing, replace = TRUE)) y <- y[!duplicated(y),] df[as.matrix(y)] <- NA # Perform imputation with proper method specification result <- vimpute( data = df, method = setNames(lapply(names(df), function(x) "xgboost"),names(df)), pred_history = TRUE ) # Extracting the imputed columns from result$data imputed_columns <- grep("_imp$", names(result$data), value = TRUE) # Create a function to compare true and imputed values compare_values <- function(true_data, pred_data, imputed_data, col_name) { comparison <- data.frame( True_Value = true_data[[col_name]], Imputed_Value = ifelse(imputed_data, pred_data[[col_name]], NA) ) comparison <- comparison[!is.na(comparison$Imputed_Value), ] return(comparison) } # Initialize an empty list to store the comparison tables comparison_list <- list() # Loop through each imputed column and create a comparison table for (imputed_col in imputed_columns) { col_name <- sub("_imp$", "", imputed_col) comparison_list[[col_name]] <- compare_values(complete_data, result$data, result$data[[imputed_col]], col_name) } # Prepare the results in a combined wide format, ensuring equal row numbers results <- cbind( "TRUE1" = head(comparison_list[["S.Length"]][, "True_Value"], 5), "IMPUTED1" = head(comparison_list[["S.Length"]][, "Imputed_Value"], 5), "TRUE2" = head(comparison_list[["S.Width"]][, "True_Value"], 5), "IMPUTED2" = head(comparison_list[["S.Width"]][, "Imputed_Value"], 5), "TRUE3" = head(comparison_list[["P.Length"]][, "True_Value"], 5), "IMPUTED3" = head(comparison_list[["P.Length"]][, "Imputed_Value"], 5), "TRUE4" = head(comparison_list[["P.Width"]][, "True_Value"], 5), "IMPUTED4" = head(comparison_list[["P.Width"]][, "Imputed_Value"], 5) ) # Print the combined wide format table print(results) ## ----echo=F,warning=F--------------------------------------------------------- # Load the reactable library library(reactable) # Create the reactable reactable(results, columns = list( TRUE1 = colDef(name = "True"), IMPUTED1 = colDef(name = "Imputed"), TRUE2 = colDef(name = "True"), IMPUTED2 = colDef(name = "Imputed"), TRUE3 = colDef(name = "True"), IMPUTED3 = colDef(name = "Imputed"), TRUE4 = colDef(name = "True"), IMPUTED4 = colDef(name = "Imputed") ), columnGroups = list( colGroup(name = "S.Length", columns = c("TRUE1", "IMPUTED1")), colGroup(name = "S.Width", columns = c("TRUE2", "IMPUTED2")), colGroup(name = "P.Length", columns = c("TRUE3", "IMPUTED3")), colGroup(name = "P.Width", columns = c("TRUE4", "IMPUTED4")) ), striped = TRUE, highlight = TRUE, bordered = TRUE )