## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, warning = FALSE, message = FALSE ) ## ----install, eval = FALSE---------------------------------------------------- # # From CRAN # install.packages("hierNest") # # # From source (development version) # # install.packages("devtools") # devtools::install_github("ZirenJiang/hierNest") ## ----load--------------------------------------------------------------------- library(hierNest) ## ----hier_info_example, eval = FALSE------------------------------------------ # # Illustration: 3 MDCs with 2 DRGs each (6 DRGs total) # # # # MDC 1 -> DRG 1, DRG 2 # # MDC 2 -> DRG 3, DRG 4 # # MDC 3 -> DRG 5, DRG 6 # # # # For an observation in MDC 2 / DRG 4: # # hier_info[i, ] = c(2, 4) ## ----load_data---------------------------------------------------------------- data("example_data") # Dimensions cat("X dimensions:", dim(example_data$X), "\n") cat("Y length: ", length(example_data$Y), "\n") cat("hier_info dimensions:", dim(example_data$hier_info), "\n") # Peek at hier_info head(example_data$hier_info) # Group sizes table_mdc <- table(example_data$hier_info[, 1]) cat("\nObservations per MDC group:\n") print(table_mdc) table_drg <- table(example_data$hier_info[, 2]) cat("\nObservations per DRG subgroup (first 10):\n") print(head(table_drg, 10)) # Outcome distribution (binary) cat("\nOutcome distribution:\n") print(table(example_data$Y)) ## ----hierNest_fit, eval = FALSE----------------------------------------------- # library(hierNest) # fit <- hierNest( # x = example_data$X, # y = example_data$Y, # hier_info = example_data$hier_info, # family = "binomial", # asparse1 = 1, # weight for MDC-level group penalty # asparse2 = 1 # weight for DRG-level subgroup penalty # ) # # # The fit contains a lambda sequence and corresponding coefficient paths # length(fit$lambda) # dim(fit$beta) # rows = reparameterized coefficients, cols = lambda values ## ----cv_fit, eval = FALSE----------------------------------------------------- # cv.fit <- cv.hierNest( # x = example_data$X, # y = example_data$Y, # method = "overlapping", # hier_info = example_data$hier_info, # family = "binomial", # partition = "subgroup", # stratify CV folds within subgroups # cvmethod = "grid_search", # asparse1 = c(0.5, 20), # search alpha_1 over [0.5, 20] # asparse2 = c(0.05, 0.20), # search alpha_2 over [0.05, 0.20] # asparse1_num = 3, # 3 x 3 = 9 grid points # asparse2_num = 3, # nlambda = 50, # lambda values per grid point # pred.loss = "ROC" # maximize AUROC # ) ## ----cv_fit_general, eval = FALSE--------------------------------------------- # cv.fit.simple <- cv.hierNest( # x = example_data$X, # y = example_data$Y, # method = "overlapping", # hier_info = example_data$hier_info, # family = "binomial", # partition = "subgroup", # cvmethod = "general", # asparse1 = 1, # asparse2 = 0.1, # nlambda = 100, # pred.loss = "ROC" # ) ## ----cv_fit_user, eval = FALSE------------------------------------------------ # cv.fit.user <- cv.hierNest( # x = example_data$X, # y = example_data$Y, # method = "overlapping", # hier_info = example_data$hier_info, # family = "binomial", # partition = "subgroup", # cvmethod = "user_supply", # asparse1 = c(0.5, 1, 5, 10), # explicit (alpha1, alpha2) pairs # asparse2 = c(0.05, 0.1, 0.1, 0.2), # nlambda = 50 # ) ## ----inspect_cv, eval = FALSE------------------------------------------------- # # Optimal tuning parameters selected by cross-validation # cv.fit$lambda.min # lambda minimising CV loss # cv.fit$min_alpha1 # alpha_1 selected # cv.fit$min_alpha2 # alpha_2 selected # # # # Number of non-zero coefficients in the reparameterized model # # at the optimal lambda # nnz <- sum(cv.fit$hierNest.fit$beta[, # which(cv.fit$hierNest.fit$lambda == cv.fit$lambda.min)] != 0) # cat("Non-zero reparameterized coefficients:", nnz, "\n") ## ----plot_coef, eval = FALSE-------------------------------------------------- # # Returns a plotly interactive heatmap # p_coef <- plot(cv.fit, type = "coefficients") # p_coef ## ----plot_subgroup, eval = FALSE---------------------------------------------- # p_sub <- plot(cv.fit, type = "Subgroup effects") # p_sub ## ----session_info------------------------------------------------------------- sessionInfo()