## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse  = TRUE,
  comment   = "#>",
  fig.width = 7,
  fig.height = 5,
  warning   = FALSE,
  message   = FALSE
)

## ----install, eval = FALSE----------------------------------------------------
# # From CRAN
# install.packages("hierNest")
# 
# # From source (development version)
# # install.packages("devtools")
# devtools::install_github("ZirenJiang/hierNest")

## ----load---------------------------------------------------------------------
library(hierNest)

## ----hier_info_example, eval = FALSE------------------------------------------
# # Illustration: 3 MDCs with 2 DRGs each (6 DRGs total)
# #
# # MDC 1 -> DRG 1, DRG 2
# # MDC 2 -> DRG 3, DRG 4
# # MDC 3 -> DRG 5, DRG 6
# #
# # For an observation in MDC 2 / DRG 4:
# #   hier_info[i, ] = c(2, 4)

## ----load_data----------------------------------------------------------------
data("example_data")

# Dimensions
cat("X dimensions:", dim(example_data$X), "\n")
cat("Y length:     ", length(example_data$Y), "\n")
cat("hier_info dimensions:", dim(example_data$hier_info), "\n")

# Peek at hier_info
head(example_data$hier_info)

# Group sizes
table_mdc <- table(example_data$hier_info[, 1])
cat("\nObservations per MDC group:\n")
print(table_mdc)

table_drg <- table(example_data$hier_info[, 2])
cat("\nObservations per DRG subgroup (first 10):\n")
print(head(table_drg, 10))

# Outcome distribution (binary)
cat("\nOutcome distribution:\n")
print(table(example_data$Y))

## ----hierNest_fit, eval = FALSE-----------------------------------------------
# library(hierNest)
# fit <- hierNest(
#   x         = example_data$X,
#   y         = example_data$Y,
#   hier_info = example_data$hier_info,
#   family    = "binomial",
#   asparse1  = 1,    # weight for MDC-level group penalty
#   asparse2  = 1     # weight for DRG-level subgroup penalty
# )
# 
# # The fit contains a lambda sequence and corresponding coefficient paths
# length(fit$lambda)
# dim(fit$beta)   # rows = reparameterized coefficients, cols = lambda values

## ----cv_fit, eval = FALSE-----------------------------------------------------
# cv.fit <- cv.hierNest(
#   x           = example_data$X,
#   y           = example_data$Y,
#   method      = "overlapping",
#   hier_info   = example_data$hier_info,
#   family      = "binomial",
#   partition   = "subgroup",    # stratify CV folds within subgroups
#   cvmethod    = "grid_search",
#   asparse1    = c(0.5, 20),    # search alpha_1 over [0.5, 20]
#   asparse2    = c(0.05, 0.20), # search alpha_2 over [0.05, 0.20]
#   asparse1_num = 3,            # 3 x 3 = 9 grid points
#   asparse2_num = 3,
#   nlambda     = 50,            # lambda values per grid point
#   pred.loss   = "ROC"          # maximize AUROC
# )

## ----cv_fit_general, eval = FALSE---------------------------------------------
# cv.fit.simple <- cv.hierNest(
#   x         = example_data$X,
#   y         = example_data$Y,
#   method    = "overlapping",
#   hier_info = example_data$hier_info,
#   family    = "binomial",
#   partition = "subgroup",
#   cvmethod  = "general",
#   asparse1  = 1,
#   asparse2  = 0.1,
#   nlambda   = 100,
#   pred.loss = "ROC"
# )

## ----cv_fit_user, eval = FALSE------------------------------------------------
# cv.fit.user <- cv.hierNest(
#   x         = example_data$X,
#   y         = example_data$Y,
#   method    = "overlapping",
#   hier_info = example_data$hier_info,
#   family    = "binomial",
#   partition = "subgroup",
#   cvmethod  = "user_supply",
#   asparse1  = c(0.5, 1, 5, 10),   # explicit (alpha1, alpha2) pairs
#   asparse2  = c(0.05, 0.1, 0.1, 0.2),
#   nlambda   = 50
# )

## ----inspect_cv, eval = FALSE-------------------------------------------------
# # Optimal tuning parameters selected by cross-validation
# cv.fit$lambda.min   # lambda minimising CV loss
# cv.fit$min_alpha1   # alpha_1 selected
# cv.fit$min_alpha2   # alpha_2 selected
# 
# 
# # Number of non-zero coefficients in the reparameterized model
# # at the optimal lambda
# nnz <- sum(cv.fit$hierNest.fit$beta[,
#            which(cv.fit$hierNest.fit$lambda == cv.fit$lambda.min)] != 0)
# cat("Non-zero reparameterized coefficients:", nnz, "\n")

## ----plot_coef, eval = FALSE--------------------------------------------------
# # Returns a plotly interactive heatmap
# p_coef <- plot(cv.fit, type = "coefficients")
# p_coef

## ----plot_subgroup, eval = FALSE----------------------------------------------
# p_sub <- plot(cv.fit, type = "Subgroup effects")
# p_sub

## ----session_info-------------------------------------------------------------
sessionInfo()