## ----setup, include=FALSE----------------------------------------------------- # ============================================================ # FILE: vignettes/plot-controls_scenario1_ensemble-runs_scenarioC-D.Rmd # FULL FIXED — Scenario 1 — Ensemble Runs: Scenario C & D # ============================================================ knitr::opts_chunk$set( echo = TRUE, message = FALSE, fig.width = 6.5, fig.height = 4, dpi = 96, fig.retina = 1, fig.align = "center", out.width = "85%", fig.path = "plot-controls_scenario1_ensemble-runs_scenarioC-D_files/figure-html/", cache.path = "cache/" ) if (!requireNamespace("DDESONN", quietly = TRUE)) { stop("DDESONN must be installed to build this vignette.", call. = FALSE) } library(DDESONN) .fig_root <- knitr::opts_chunk$get("fig.path") if (!nzchar(.fig_root)) .fig_root <- "figure/" # ============================================================ # SECTION: RUN FLAGS (Scenario 1 — Ensemble) # - Default: run both # ============================================================ RUN_SCENARIO_C_S1 <- RUN_SCENARIO_C_S1 <- identical(tolower(Sys.getenv("DDESONN_VIGNETTE_FULL", "false")), "true") RUN_SCENARIO_D_S1 <- FALSE # ============================================================ # FIX: DO NOT put DDESONN outputs under vignette _files/ # - Windows build copy fails due to very deep paths under *_files/figure-html/... # - Use a SHORT tempdir() root for artifacts/plots instead. # ============================================================ .dd_out_root <- file.path(tempdir(), "DDESONN_vignette_plot_controls_s1_ensemble_CD") outC <- file.path(.dd_out_root, "DDESONN_plots_scenarioC_s1") outD <- file.path(.dd_out_root, "DDESONN_plots_scenarioD_s1") dir.create(outC, recursive = TRUE, showWarnings = FALSE) dir.create(outD, recursive = TRUE, showWarnings = FALSE) include_saved_plots <- function(output_root, header) { plot_dir <- ddesonn_plots_dir(output_root) if (!dir.exists(plot_dir)) { return(knitr::asis_output(paste0("\n\nNo plots at: ", plot_dir, "\n\n"))) } pngs <- list.files(plot_dir, pattern = "\\.png$", recursive = TRUE, full.names = TRUE) if (!length(pngs)) { return(knitr::asis_output(paste0("\n\nNo PNGs under: ", plot_dir, "\n\n"))) } out <- c(paste0("\n\n## ", header, "\n\n")) for (p in sort(pngs)) { uri <- knitr::image_uri(p) # NOTE: this embeds base64 (your original behavior) out <- c(out, paste0('\n\n')) } knitr::asis_output(paste(out, collapse = "")) } # ======================= # DATA SPLIT (robust like @examples) #$$$$$$$$$$$$$ # ======================= set.seed(111) # ------------------------------------------------------------ # 1) Locate package extdata folder (robust across check/install) #$$$$$$$$$$$$$ # ------------------------------------------------------------ ext_dir <- system.file("extdata", package = "DDESONN") if (!nzchar(ext_dir)) { stop("Could not find DDESONN extdata folder. Is the package installed?", call. = FALSE) } # ------------------------------------------------------------ # 1b) Find CSVs (recursive + check-dir edge cases) #$$$$$$$$$$$$$ # ------------------------------------------------------------ csvs <- list.files( ext_dir, pattern = "\\.csv$", full.names = TRUE, recursive = TRUE ) # Defensive fallback for rare nested layouts #$$$$$$$$$$$$$ if (!length(csvs)) { #$$$$$$$$$$$$$ ext_dir2 <- file.path(ext_dir, "inst", "extdata") #$$$$$$$$$$$$$ if (dir.exists(ext_dir2)) { #$$$$$$$$$$$$$ csvs <- list.files( ext_dir2, pattern = "\\.csv$", full.names = TRUE, recursive = TRUE ) } } # If no data, skip vignette runs (do NOT error) #$$$$$$$$$$$$$ DATA_OK <- TRUE #$$$$$$$$$$$$$ if (!length(csvs)) { #$$$$$$$$$$$$$ DATA_OK <- FALSE #$$$$$$$$$$$$$ message(sprintf( "No .csv files found under: %s — vignette will skip runs.", ext_dir )) } #$$$$$$$$$$$$$ if (isTRUE(DATA_OK)) { #$$$$$$$$$$$$$ hf_path <- file.path(ext_dir, "heart_failure_clinical_records.csv") #$$$$$$$$$$$$$ data_path <- if (file.exists(hf_path)) hf_path else csvs[[1]] #$$$$$$$$$$$$$ cat("[extdata] using:", data_path, "\n") #$$$$$$$$$$$$$ # ------------------------------------------------------------ # 2) Load data # ------------------------------------------------------------ df <- read.csv(data_path) # Prefer DEATH_EVENT if present; otherwise infer a binary target target_col <- if ("DEATH_EVENT" %in% names(df)) { #$$$$$$$$$$$$$ "DEATH_EVENT" #$$$$$$$$$$$$$ } else { #$$$$$$$$$$$$$ cand <- names(df)[vapply(df, function(col) { v <- suppressWarnings(as.numeric(col)) if (all(is.na(v))) return(FALSE) u <- unique(v[is.finite(v)]) length(u) <= 2 && all(sort(u) %in% c(0, 1)) }, logical(1))] if (!length(cand)) { stop( "Could not infer a binary target column. ", "Provide a binary CSV in extdata or rename target to DEATH_EVENT.", call. = FALSE ) } cand[[1]] } cat("[data] target_col =", target_col, "\n") #$$$$$$$$$$$$$ # ------------------------------------------------------------ # 3) Build X and y # ------------------------------------------------------------ y_all <- matrix(as.integer(df[[target_col]]), ncol = 1) #$$$$$$$$$$$$$ x_df <- df[, setdiff(names(df), target_col), drop = FALSE] #$$$$$$$$$$$$$ x_all <- as.matrix(x_df) #$$$$$$$$$$$$$ storage.mode(x_all) <- "double" #$$$$$$$$$$$$$ # ------------------------------------------------------------ # 4) Split 70 / 15 / 15 # ------------------------------------------------------------ n <- nrow(x_all) #$$$$$$$$$$$$$ idx <- sample.int(n) #$$$$$$$$$$$$$ n_train <- floor(0.70 * n) #$$$$$$$$$$$$$ n_valid <- floor(0.15 * n) #$$$$$$$$$$$$$ i_tr <- idx[1:n_train] #$$$$$$$$$$$$$ i_va <- idx[(n_train + 1):(n_train + n_valid)] #$$$$$$$$$$$$$ i_te <- idx[(n_train + n_valid + 1):n] #$$$$$$$$$$$$$ x_train <- x_all[i_tr, , drop = FALSE] #$$$$$$$$$$$$$ y_train <- y_all[i_tr, , drop = FALSE] #$$$$$$$$$$$$$ x_valid <- x_all[i_va, , drop = FALSE] #$$$$$$$$$$$$$ y_valid <- y_all[i_va, , drop = FALSE] #$$$$$$$$$$$$$ x_test <- x_all[i_te, , drop = FALSE] #$$$$$$$$$$$$$ y_test <- y_all[i_te, , drop = FALSE] #$$$$$$$$$$$$$ cat(sprintf("[split] train=%d valid=%d test=%d\n", nrow(x_train), nrow(x_valid), nrow(x_test))) #$$$$$$$$$$$$$ # ------------------------------------------------------------ # 5) Scale using TRAIN stats only (no leakage) #$$$$$$$$$$$$$ # ------------------------------------------------------------ x_train_s <- scale(x_train) #$$$$$$$$$$$$$ ctr <- attr(x_train_s, "scaled:center") #$$$$$$$$$$$$$ scl <- attr(x_train_s, "scaled:scale") #$$$$$$$$$$$$$ scl[!is.finite(scl) | scl == 0] <- 1 #$$$$$$$$$$$$$ x_valid_s <- sweep(sweep(x_valid, 2, ctr, "-"), 2, scl, "/") #$$$$$$$$$$$$$ x_test_s <- sweep(sweep(x_test, 2, ctr, "-"), 2, scl, "/") #$$$$$$$$$$$$$ mx <- suppressWarnings(max(abs(x_train_s))) #$$$$$$$$$$$$$ if (!is.finite(mx) || mx == 0) mx <- 1 #$$$$$$$$$$$$$ x_train <- x_train_s / mx #$$$$$$$$$$$$$ x_valid <- x_valid_s / mx #$$$$$$$$$$$$$ x_test <- x_test_s / mx #$$$$$$$$$$$$$ } # end DATA_OK #$$$$$$$$$$$$$ ## ----scenarioC_s1, results="asis", echo=FALSE, eval=RUN_SCENARIO_C_S1--------- # options(DDESONN_OUTPUT_ROOT = outC) # Sys.setenv(DDESONN_ARTIFACTS_ROOT = outC) # # res_scenarioC_s1 <- ddesonn_run( # x = x_train, # y = y_train, # classification_mode = "binary", # hidden_sizes = c(64, 32), # # # ---------------------------------------------------------- # # ENSEMBLE CONFIG # # ---------------------------------------------------------- # do_ensemble = TRUE, # num_networks = 2L, # seeds = 1:1, # # validation = list(x = x_valid, y = y_valid), # test = list(x = x_test, y = y_test), # # # ---------------------------------------------------------- # # TRAINING OVERRIDES (Scenario 1 knobs) # # ---------------------------------------------------------- # training_overrides = list( # init_method = "he", # optimizer = "adagrad", # lr = 0.125, # lambda = 0.00028, # # activation_functions = list(relu, relu, sigmoid), # dropout_rates = list(0.10), # loss_type = "CrossEntropy", # # validation_metrics = TRUE, # num_epochs = 3, # final_summary_decimals = 6L, # # # ======================================================== # # SECTION: output_controls # # ======================================================== # viewTables = FALSE, # debug = FALSE, # verboseLow = FALSE, # browse = FALSE, # # # ======================================================== # # SECTION: per_epoch_plots # # ======================================================== # per_epoch_plots = list( # saveEnabled = TRUE, # loss_curve = TRUE, # probe_plots = TRUE, # verbose = FALSE # ), # # # ======================================================== # # SECTION: performance_relevance (Scenario 1) # # - bridged internally to plot_controls$performance_relevance # # ======================================================== # performance_relevance = list( # saveEnabled = TRUE, # viewAllPlots = FALSE, # # performance_high_mean_plots = TRUE, # performance_low_mean_plots = TRUE, # relevance_high_mean_plots = TRUE, # relevance_low_mean_plots = TRUE, # # verbose = FALSE # ), # # # ======================================================== # # SECTION: evaluate_predictions_report_plots # # ======================================================== # evaluate_predictions_report_plots = list( # accuracy_plot = TRUE, # accuracy_plot_mode = "both", # plot_roc = TRUE, # plot_pr = TRUE, # show_auprc = TRUE, # viewAllPlots = FALSE, # verbose = FALSE # ) # ) # ) # # # ============================================================ # # INCLUDE SAVED PLOTS (interactive / RStudio use) # # ============================================================ # if (exists("include_saved_plots")) { # include_saved_plots(outC, "Plot Controls: Scenario 1 — Ensemble Run — Scenario C") # } # ## ----scenarioD_s1, results="asis", echo=FALSE, eval=RUN_SCENARIO_D_S1--------- # options(DDESONN_OUTPUT_ROOT = outD) # Sys.setenv(DDESONN_ARTIFACTS_ROOT = outD) # # res_scenarioD_s1 <- ddesonn_run( # x = x_train, # y = y_train, # classification_mode = "binary", # hidden_sizes = c(64, 32), # # # ---------------------------------------------------------- # # ENSEMBLE CONFIG # # ---------------------------------------------------------- # do_ensemble = TRUE, # num_networks = 2L, # num_temp_iterations = 1L, # seeds = 1:1, # # validation = list(x = x_valid, y = y_valid), # test = list(x = x_test, y = y_test), # # # ---------------------------------------------------------- # # TRAINING OVERRIDES (Scenario 1 knobs) # # ---------------------------------------------------------- # training_overrides = list( # init_method = "he", # optimizer = "adagrad", # lr = 0.125, # lambda = 0.00028, # # activation_functions = list(relu, relu, sigmoid), # dropout_rates = list(0.10), # loss_type = "CrossEntropy", # # validation_metrics = TRUE, # num_epochs = 3, # final_summary_decimals = 6L, # # # ======================================================== # # SECTION: per_epoch_plots # # ======================================================== # per_epoch_plots = list( # saveEnabled = TRUE, # loss_curve = TRUE, # probe_plots = TRUE, # verbose = TRUE # ), # # # ======================================================== # # SECTION: performance_relevance (Scenario 1) # # - bridged internally to plot_controls$performance_relevance # # ======================================================== # performance_relevance = list( # saveEnabled = TRUE, # viewAllPlots = FALSE, # # performance_high_mean_plots = TRUE, # performance_low_mean_plots = TRUE, # relevance_high_mean_plots = TRUE, # relevance_low_mean_plots = TRUE, # # verbose = TRUE # ), # # # ======================================================== # # SECTION: evaluate_predictions_report_plots # # ======================================================== # evaluate_predictions_report_plots = list( # accuracy_plot = TRUE, # accuracy_plot_mode = "both", # plot_roc = TRUE, # plot_pr = TRUE, # show_auprc = TRUE, # viewAllPlots = FALSE, # verbose = TRUE # ) # ) # ) # # include_saved_plots( # outD, # "Plot Controls: Scenario 1 — Ensemble Runs — Scenario D" # ) #