## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE,eval = T, comment = "#>" ) ## ----eval = FALSE------------------------------------------------------------- # install.packages("penalizedclr") ## ----eval = FALSE------------------------------------------------------------- # library(devtools) # install_github("veradjordjilovic/penalizedclr") ## ----setup-------------------------------------------------------------------- library(penalizedclr) ## ----message=FALSE, warning=FALSE--------------------------------------------- set.seed(1234) library(tidyverse) ## ----------------------------------------------------------------------------- # two groups of predictors p <- c(12, 40) # percentage and number of non-null variables p_nz <- c(0.5, 0.2) m_nz <- round(p*p_nz, 0) # number of different strata (case-control pairs) K <- 100 # number of cases and controls in each stratum (not necessarily 1:1 matching, # other designs are also allowed) n_cases <- 1 n_ctrl <- 1 # generating covariates X = cbind(matrix(rnorm(p[1] * K * (n_cases + n_ctrl), 0, 1), ncol = p[1]), matrix(rnorm(p[2] * K * (n_cases + n_ctrl), 0, 4), ncol = p[2])) # coefficients beta <- as.matrix(c(rnorm(m_nz[1], 4, 1), rep(0, p[1] - m_nz[1]), rnorm(m_nz[2], 2, 0.8), rep(0, p[2] - m_nz[2])), ncol = 1) # stratum membership stratum <- rep(1:K, each= n_cases+n_ctrl) # linear predictor lin_pred <- X %*% beta prob_case <- exp(lin_pred) / (1 + exp(lin_pred)) # generate the response Y <- rep(0, length(stratum)) data_sim <- as_tibble(data.frame(stratum = stratum, probability = prob_case, obs_id = 1 : length(stratum))) data_sim_cases <- data_sim %>% group_by(stratum)%>% sample_n(n_cases, weight = probability) Y[data_sim_cases$obs_id] <- 1 ## ----results = 'hide'--------------------------------------------------------- fit1 <- penalized.clr(response = Y, penalized = X, stratum = stratum, lambda = c(1,4), p = p, standardize = TRUE) ## ----------------------------------------------------------------------------- str(fit1) nonzero_index <- (beta != 0) * 1 #index of nonzero coefficients table(fitted = (fit1$penalized != 0) * 1, nonzero_index) ## ----results = 'hide'--------------------------------------------------------- fit2 <- penalized.clr(response = Y, penalized = X, stratum = stratum, lambda = c(1,4),p = p, standardize = TRUE, alpha = 0.6) ## ----------------------------------------------------------------------------- stable1 <- stable.clr(response = Y, B = 50, penalized = X, stratum = stratum, lambda.seq = c(1, 4, 6), parallel = TRUE) ## ----------------------------------------------------------------------------- hist(stable1$Pistab, xlab = "Selection probability", ylab="Frequency") ## ----------------------------------------------------------------------------- (tab1 <- table(stable = (stable1$P>0.65) * 1, nonzero_index)) (tab1[1,1] + tab1[2,2])/(sum(tab1)) ## ----------------------------------------------------------------------------- # plotting selection probabilities for true non-zero (red) and zero (blue) coefficients s_prob_nonzero <- cut(stable1$P[nonzero_index == 1], breaks = seq(0,1, by = 0.1), ordered_result = T) s_prob_zero <- cut(stable1$P[nonzero_index == 0], breaks = seq(0,1, by = 0.1), ordered_result = T) barplot(table(s_prob_nonzero), col = 2) barplot(table(s_prob_zero), add =T, col = 4 ) ## ----eval= T------------------------------------------------------------------ stable2 <- penalizedclr::stable.clr.g(response = Y, p = p, standardize = TRUE, penalized = X, stratum = stratum, lambda.list = list(c(4,1), c(1,4))) ## ----echo = FALSE, results = 'hide'------------------------------------------- # histogram of selection probabilities #hist(stable2$P, xlab = "Selection probability", ylab="Frequency") # table of true status vs. selection status (tab2 <- table(stable = (stable2$P>0.65) * 1, nonzero_index)) # classification accuracy (tab2[1,1] + tab2[2,2])/(sum(tab2)) ## ---- echo = F---------------------------------------------------------------- # plotting selection probabilities for true non-zero (red) and zero (blue) coefficients s_prob_nonzero <- cut(stable2$P[nonzero_index == 1], breaks = seq(0,1, by = 0.1), ordered_result = T) s_prob_zero <- cut(stable2$P[nonzero_index == 0], breaks = seq(0,1, by = 0.1), ordered_result = T) barplot(table(s_prob_nonzero), col = 2) barplot(table(s_prob_zero), add = T, col = 4 ) ## ---- eval = T---------------------------------------------------------------- (pf <- default.pf(response = Y, stratum = stratum, penalized = X, nfolds = 10, alpha = 0.3, standardize = TRUE, p = p, type.step1 = "comb")) (lambda <- find.default.lambda(response = Y, penalized = X, standardize = TRUE, stratum = stratum, p = p, pf.list = pf)) ## ----eval= T------------------------------------------------------------------ stable3 <- stable.clr.g(response = Y, p = p, standardize = TRUE, penalized = X, stratum = stratum, lambda.list = list(c(pf[[1]][1]*as.numeric(lambda), pf[[1]][2]*as.numeric(lambda)))) ## ---- echo = F---------------------------------------------------------------- # plotting selection probabilities for true non-zero (red) and zero (blue) coefficients s_prob_nonzero <- cut(stable3$P[nonzero_index == 1], breaks = seq(0, 1, by = 0.1), ordered_result = T) s_prob_zero <- cut(stable3$P[nonzero_index == 0], breaks = seq(0, 1, by = 0.1), ordered_result = T) barplot(table(s_prob_nonzero), col = 2) barplot(table(s_prob_zero), add = T, col = 4 ) ## ----eval = T----------------------------------------------------------------- alt.pf.list<- list(c(1,4), c(2,4), c(4,1), c(4,2), pf$pf) alt.lambda <- find.default.lambda(response = Y, penalized = X, standardize = TRUE, stratum = stratum, p = p, pf.list = alt.pf.list) lambda.matrix <- mapply(function (x,y) x*y, lambda, alt.pf.list) lambda.list <- lapply(seq_len(ncol(lambda.matrix)), function(i) lambda.matrix[,i]) ## ----------------------------------------------------------------------------- stable4 <- stable.clr.g(response = Y, p = p, standardize = TRUE, penalized = X, stratum = stratum, lambda.list = lambda.list ) ## ---- echo = F---------------------------------------------------------------- # plotting selection probabilities for true non-zero (red) and zero (blue) coefficients s_prob_nonzero <- cut(stable4$P[nonzero_index == 1], breaks = seq(0, 1, by = 0.1), ordered_result = T) s_prob_zero <- cut(stable4$P[nonzero_index == 0], breaks = seq(0, 1, by = 0.1), ordered_result = T) barplot(table(s_prob_nonzero), col = 2) barplot(table(s_prob_zero), add = T, col = 4 )