## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(message=F, warning = F, cache=T, echo=T, eval=T,
                      fig.pos="H", fig.align="center", comment='#>')

## ---- echo=FALSE--------------------------------------------------------------
tab <- matrix(c("\u2713","",rep("\u2713",4)),2,3)
rownames(tab) = c("$\\beta$","$f_j$'s")
colnames(tab) = c("None","$\\ell_1$","$\\ell_2$")
knitr::kable(tab, align = "rccc")

## -----------------------------------------------------------------------------
library(plsmselect)
library(purrr)

data(simData)

## ---- echo=FALSE--------------------------------------------------------------
knitr::kable(head(simData), format = "html", digits=2,
             caption="Table 1. First 6 samples of the simulated data set: simData.") %>% 
  kableExtra::kable_styling(bootstrap_options = "striped", font_size = 11.5)

## ----normalfit----------------------------------------------------------------
## Create model matrix X corresponding to linear terms
## (necessary for the formula option of gamlasso below)
simData$X = model.matrix(~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10, data=simData)[,-1]

## The formula approach
gfit = gamlasso(Yg ~ X +
                  s(z1, k=5, bs="ts") +
                  s(z2, k=5, bs="ts") +
                  s(z3, k=5, bs="ts") +
                  s(z4, k=5, bs="ts"),
                data = simData,
                seed = 1)

## ----normalfitalt, eval = F---------------------------------------------------
#  ## The term specification approach
#  gfit = gamlasso(response = "Yg",
#                  linear.terms = paste0("x",1:10),
#                  smooth.terms = paste0("z",1:4),
#                  data = simData,
#                  linear.penalty = "l1",
#                  smooth.penalty = "l1",
#                  num.knots = 5,
#                  seed = 1)

## -----------------------------------------------------------------------------
# mgcv::gam object:
class(gfit$gam)
# glmnet::cv.glmnet object
class(gfit$cv.glmnet)

## -----------------------------------------------------------------------------
summary(gfit)

## ----plotgam, fig.width=6, fig.height=6---------------------------------------
## Plot the estimates of the smooth effects:
plot(gfit$gam, pages=1)

## ----fittedvsobserved, fig.width=6, fig.height=6------------------------------
## Plot fitted versus observed values:
plot(simData$Yg, predict(gfit), xlab = "Observed values", ylab = "Fitted Values")

## ----poifit-------------------------------------------------------------------
## Create model matrix X corresponding to linear terms
## (necessary for the formula option of gamlasso below)
simData$X = model.matrix(~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10, data=simData)[,-1]

## Poisson response. Formula approach.
pfit = gamlasso(Yp ~ X + 
                  s(z1, bs="ts", k=5) + 
                  s(z2, bs="ts", k=5) + 
                  s(z3, bs="ts", k=5) + 
                  s(z4, bs="ts", k=5),
                data = simData,
                family = "poisson",
                seed = 1)

## ----poifitalt, eval=FALSE----------------------------------------------------
#  ## Poisson response. Term-specification approach.
#  pfit = gamlasso(response = "Yp",
#                  linear.terms = paste0("x",1:10),
#                  smooth.terms = paste0("z",1:4),
#                  data = simData,
#                  linear.penalty = "l1",
#                  smooth.penalty = "l1",
#                  family = "poisson",
#                  num.knots = 5,
#                  seed = 1)

## -----------------------------------------------------------------------------
coef(pfit$cv.glmnet, s="lambda.min")

## ---- fig.width=6, fig.height=3-----------------------------------------------
par(mfrow=c(1,2))
plot(pfit$gam, select=1) # estimate of smooth term z1
plot(pfit$gam, select=2) # estimate of smooth term z2

## ---- fig.width=6, fig.height=6-----------------------------------------------
plot(predict(pfit, type="response"), exp(simData$lp), xlab="predicted count", ylab="true expected count")

## ----binfit, eval=FALSE-------------------------------------------------------
#  ## Create model matrix X corresponding to linear terms
#  ## (necessary for the formula option of gamlasso below)
#  simData$X = model.matrix(~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10, data=simData)[,-1]
#  
#  ## Bernoulli trials response
#  bfit = gamlasso(Yb ~ X +
#                    s(z1, bs="ts", k=5) +
#                    s(z2, bs="ts", k=5) +
#                    s(z3, bs="ts", k=5) +
#                    s(z4, bs="ts", k=5),
#                  data = simData,
#                  family = "binomial",
#                  seed = 1)

## ----binfitalt, eval = F------------------------------------------------------
#  ## The term specification approach
#  bfit = gamlasso(response = "Yb",
#                  linear.terms = paste0("x",1:10),
#                  smooth.terms = paste0("z",1:4),
#                  data = simData,
#                  family="binomial",
#                  linear.penalty = "l1",
#                  smooth.penalty = "l1",
#                  num.knots = 5,
#                  seed = 1)

## ---- eval=FALSE--------------------------------------------------------------
#  summary(bfit)
#  plot(bfit$gam, pages=1)
#  pred.prob <- predict(bfit, type="response")
#  true.prob <- exp(simData$lp)/(1+exp(simData$lp))
#  plot(pred.prob, true.prob, xlab="predicted probability", ylab="true probability")

## ----binomfit, eval=FALSE-----------------------------------------------------
#  ## Create model matrix X corresponding to linear terms
#  ## (necessary for the formula option of gamlasso below)
#  simData$X = model.matrix(~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10, data=simData)[,-1]
#  
#  ## Binomial counts response. Formula approach.
#  bfit2 = gamlasso(cbind(success,failure) ~ X +
#                     s(z1, bs="ts", k=5) +
#                     s(z2, bs="ts", k=5) +
#                     s(z3, bs="ts", k=5) +
#                     s(z4, bs="ts", k=5),
#                   data = simData,
#                   family = "binomial",
#                   seed = 1)

## ----binomfitalt, eval = F----------------------------------------------------
#  ## Binomial counts response. Term specification approach
#  bfit2 = gamlasso(c("success","failure"),
#                   linear.terms=paste0("x",1:10),
#                   smooth.terms=paste0("z",1:4),
#                   data=simData,
#                   family = "binomial",
#                   linear.penalty = "l1",
#                   smooth.penalty = "l1",
#                   num.knots = 5,
#                   seed=1)

## ---- eval=FALSE--------------------------------------------------------------
#  summary(bfit2)
#  plot(bfit2$gam, pages=1)
#  pred.prob <- predict(bfit2, type="response")
#  true.prob <- exp(simData$lp)/(1+exp(simData$lp))
#  plot(pred.prob, true.prob, xlab="predicted probability", ylab="true probability")

## ----coxfit-------------------------------------------------------------------
## Create model matrix X corresponding to linear terms
## (necessary for the formula option of gamlasso below)
simData$X = model.matrix(~x1+x2+x3+x4+x5+x6+x7+x8+x9+x10, data=simData)[,-1]

# Censored time-to-event response. Formula approach.
cfit = gamlasso(time ~ X +
                  s(z1, bs="ts", k=5) +
                  s(z2, bs="ts", k=5) +
                  s(z3, bs="ts", k=5) +
                  s(z4, bs="ts", k=5),
                data = simData,
                family = "cox",
                weights = "status",
                seed = 1)

## ----coxfitalt, eval=FALSE----------------------------------------------------
#  # Censored time-to-event response. Term specification approach.
#  cfit = gamlasso(response = "time",
#                  linear.terms = paste0("x",1:10),
#                  smooth.terms = paste0("z",1:4),
#                  data = simData,
#                  linear.penalty = "l1",
#                  smooth.penalty = "l1",
#                  family = "cox",
#                  weights="status",
#                  num.knots = 5,
#                  seed = 1)

## ----coxdiagnos, fig.width=6, fig.height=6------------------------------------
## Obtain and plot predicted cumulative baseline hazard:
H0.pred <- cumbasehaz(cfit)

time.seq <- seq(0, 60, by=1)
plot(time.seq, H0.pred(time.seq), type="l", xlab = "Time", ylab="",
     main = "Predicted Cumulative \nBaseline Hazard")

## ----coxpredict, fig.width=6, fig.height=6------------------------------------
## Obtain predicted survival at days 1,2,3,...,60:
S.pred <- predict(cfit, type="response", new.event.times=1:60)

## Plot the survival curve for sample (subject) 17:
plot(1:60, S.pred[17,], xlab="time (in days)", ylab="Survival probability", type="l")

## ----simdata, eval=FALSE------------------------------------------------------
#  generate.inputdata <- function(N) {
#  
#    id <- paste0("i",1:N)
#    ## Define linear predictors
#    linear.pred <- matrix(c(rbinom(N*3,size=1,prob=0.2),
#                            rbinom(N*5,size=1,prob=0.5),
#                            rbinom(N*2,size=1,prob=0.9)),
#                          nrow=N)
#    colnames(linear.pred) = paste0("x", 1:ncol(linear.pred))
#  
#    ## Define smooth predictors
#    smooth.pred = as.data.frame(matrix(runif(N*4),nrow=N))
#    colnames(smooth.pred) = paste0("z", 1:ncol(smooth.pred))
#  
#    ## Coalesce the predictors
#    dat = cbind.data.frame(id, linear.pred, smooth.pred)
#  
#    return(dat)
#  
#  }
#  
#  ## Simulate N input data observations:
#  N <- 100
#  simData2 <- generate.inputdata(N)

## ---- eval=FALSE--------------------------------------------------------------
#  ## "True" coefficients of linear terms (last 7 are zero):
#  beta <- c(1, -0.6, 0.5, rep(0,7))
#  
#  ## "True" nonlinear smooth terms:
#  f1 <- function(x) x^3
#  f2 <- function(x) sin(x*pi)
#  
#  ## Calculate "True" linear predictor (lp) based on above data (simData2)
#  simData2$lp <- as.numeric(as.matrix(simData2[,paste0("x",1:10)]) %*% beta + f1(simData2$z1) + f2(simData2$z2))

## ---- eval=FALSE--------------------------------------------------------------
#  ## Simulate gaussian response with mean lp:
#  simData2$Yg = simData2$lp + rnorm(N, sd = 0.1)
#  
#  ## Simulate bernoulli trials with success probability exp(lp)/(1+exp(lp))
#  simData2$Yb = map_int(exp(simData2$lp), ~ rbinom(1, 1, p = ( ./(1+.) ) ) )
#  
#  ## Simulate Poisson response with log(mean) = lp
#  simData2$Yp = map_int(exp(simData2$lp), ~rpois(1, .) )

## ---- eval=FALSE--------------------------------------------------------------
#  ## Simulate binomial counts with success probability exp(lp)/(1+exp(lp))
#  sizes = sample(10, nrow(simData2), replace = TRUE)
#  # Simulate success:
#  simData2$success = map2_int(exp(simData2$lp), sizes, ~rbinom(1, .y, p = ( .x/(1+.x) )))
#  # Calculate failure:
#  simData2$failure = sizes - simData2$success

## ---- eval=FALSE--------------------------------------------------------------
#  ## Function that simulates N samples of censored event times (time, status)
#  ## Event times ~ Weibull(lambda0, rho) with linear predictor lp
#  ## rho=1 corresponds to exponential distribution
#  simulWeib <- function(N, lambda0, rho, lp)
#  {
#  
#    # Censoring times ~ Exponential(lambdaC)
#    lambdaC=0.0005 # very mild censoring
#  
#    # Simulate Weibull latent event times
#    v <- runif(n=N)
#    Tlat <- (- log(v) / (lambda0 * exp(lp)))^(1 / rho)
#  
#    # Simulate censoring times
#    C <- rexp(n=N, rate=lambdaC)
#  
#    # Calculate follow-up times and event indicators
#    time <- pmin(Tlat, C)
#    status <- as.numeric(Tlat <= C)
#  
#    data.frame(time=time, status=status)
#  
#  }
#  
#  ## Set parameters of Weibull baseline hazard h0(t) = lambda*rho*t^(rho-1):
#  lambda0 <- 0.01; rho <- 1;
#  
#  ## Simulate (times, status) from above censored Weibull model and cbind to our data:
#  simData2 <- cbind.data.frame(simData2, simulWeib(N, lambda0, rho, simData2$lp))