## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 10, fig.height = 5, fig.align = "center", warning = FALSE, message = FALSE, digits = 4 ) kbl10 <- function(x, n = 10, digits = 4, ...) { knitr::kable(utils::head(as.data.frame(x), n), digits = digits, align = "c", ...) } ## ----library------------------------------------------------------------------ library(betaregscale) ## ----simulate-data------------------------------------------------------------ sim_brsmm_data <- function(seed = 3501L, g = 24L, ni = 12L, beta = c(0.20, 0.65), gamma = c(-0.15), sigma_b = 0.55) { set.seed(seed) id <- factor(rep(seq_len(g), each = ni)) n <- length(id) x1 <- rnorm(n) b_true <- rnorm(g, mean = 0, sd = sigma_b) eta_mu <- beta[1] + beta[2] * x1 + b_true[as.integer(id)] eta_phi <- rep(gamma[1], n) mu <- plogis(eta_mu) phi <- plogis(eta_phi) shp <- brs_repar(mu = mu, phi = phi, repar = 2) y <- round(stats::rbeta(n, shp$shape1, shp$shape2) * 100) list( data = data.frame(y = y, x1 = x1, id = id), truth = list(beta = beta, gamma = gamma, sigma_b = sigma_b, b = b_true) ) } sim <- sim_brsmm_data( g = 5, ni = 200, beta = c(0.20, 0.65), gamma = c(-0.15), sigma_b = 0.55 ) str(sim$data) ## ----fit-brsmm---------------------------------------------------------------- fit_mm <- brsmm( y ~ x1, random = ~ 1 | id, data = sim$data, repar = 2, int_method = "laplace", method = "BFGS", control = list(maxit = 1000) ) summary(fit_mm) ## ----fit-brsmm-random-slope--------------------------------------------------- fit_mm_rs <- brsmm( y ~ x1, random = ~ 1 + x1 | id, data = sim$data, repar = 2, int_method = "laplace", method = "BFGS", control = list(maxit = 1200) ) summary(fit_mm_rs) ## ----random-covariance-------------------------------------------------------- kbl10(fit_mm_rs$random$D) kbl10( data.frame(term = names(fit_mm_rs$random$sd_b), sd = as.numeric(fit_mm_rs$random$sd_b)), digits = 4 ) kbl10(head(ranef(fit_mm_rs), 10)) ## ----ranef-study-------------------------------------------------------------- re_study <- brsmm_re_study(fit_mm_rs) print(re_study) kbl10(re_study$summary) kbl10(re_study$D) kbl10(re_study$Corr) ## ----ranef-visuals------------------------------------------------------------ if (requireNamespace("ggplot2", quietly = TRUE)) { autoplot.brsmm(fit_mm_rs, type = "ranef_caterpillar") autoplot.brsmm(fit_mm_rs, type = "ranef_density") autoplot.brsmm(fit_mm_rs, type = "ranef_pairs") autoplot.brsmm(fit_mm_rs, type = "ranef_qq") } ## ----methods-coef------------------------------------------------------------- kbl10( data.frame( parameter = names(coef(fit_mm, model = "full")), estimate = as.numeric(coef(fit_mm, model = "full")) ), digits = 4 ) kbl10( data.frame( log_sigma_b = as.numeric(coef(fit_mm, model = "random")), sigma_b = as.numeric(exp(coef(fit_mm, model = "random"))) ), digits = 4 ) kbl10(head(ranef(fit_mm), 10)) ## ----methods-coef-rs---------------------------------------------------------- kbl10( data.frame( parameter = names(coef(fit_mm_rs, model = "random")), estimate = as.numeric(coef(fit_mm_rs, model = "random")) ), digits = 4 ) kbl10(fit_mm_rs$random$D) ## ----methods-summary---------------------------------------------------------- vc <- vcov(fit_mm) dim(vc) sm <- summary(fit_mm) kbl10(sm$coefficients) kbl10( data.frame( logLik = as.numeric(logLik(fit_mm)), AIC = AIC(fit_mm), BIC = BIC(fit_mm), nobs = nobs(fit_mm) ), digits = 4 ) ## ----methods-predict---------------------------------------------------------- kbl10( data.frame( mu_hat = head(fitted(fit_mm, type = "mu")), phi_hat = head(fitted(fit_mm, type = "phi")), pred_mu = head(predict(fit_mm, type = "response")), pred_eta = head(predict(fit_mm, type = "link")), pred_phi = head(predict(fit_mm, type = "precision")), pred_var = head(predict(fit_mm, type = "variance")) ), digits = 4 ) kbl10( data.frame( res_response = head(residuals(fit_mm, type = "response")), res_pearson = head(residuals(fit_mm, type = "pearson")) ), digits = 4 ) ## ----methods-plot------------------------------------------------------------- plot(fit_mm, which = 1:4, type = "pearson") if (requireNamespace("ggplot2", quietly = TRUE)) { plot(fit_mm, which = 1:2, gg = TRUE) } ## ----methods-autoplot--------------------------------------------------------- if (requireNamespace("ggplot2", quietly = TRUE)) { autoplot.brsmm(fit_mm, type = "calibration") autoplot.brsmm(fit_mm, type = "score_dist") autoplot.brsmm(fit_mm, type = "ranef_qq") autoplot.brsmm(fit_mm, type = "residuals_by_group") } ## ----methods-newdata---------------------------------------------------------- nd <- sim$data[1:8, c("x1", "id")] kbl10( data.frame(pred_seen = as.numeric(predict(fit_mm, newdata = nd, type = "response"))), digits = 4 ) nd_unseen <- nd nd_unseen$id <- factor(rep("new_cluster", nrow(nd_unseen))) kbl10( data.frame(pred_unseen = as.numeric(predict(fit_mm, newdata = nd_unseen, type = "response"))), digits = 4 ) ## ----methods-newdata-rs------------------------------------------------------- kbl10( data.frame(pred_rs_seen = as.numeric(predict(fit_mm_rs, newdata = nd, type = "response"))), digits = 4 ) kbl10( data.frame(pred_rs_unseen = as.numeric(predict(fit_mm_rs, newdata = nd_unseen, type = "response"))), digits = 4 ) ## ----wald-tests--------------------------------------------------------------- sm <- summary(fit_mm) kbl10(sm$coefficients) ## ----lr-evolution------------------------------------------------------------- # Base model without a random effect fit_brs <- brs( y ~ x1, data = sim$data, repar = 2 ) # Reuse the mixed models already fitted: # fit_mm : random = ~ 1 | id # fit_mm_rs : random = ~ 1 + x1 | id tab_lr <- anova(fit_brs, fit_mm, fit_mm_rs, test = "Chisq") kbl10( data.frame(model = rownames(tab_lr), tab_lr, row.names = NULL), digits = 4 ) ## ----quick-diagnostics-------------------------------------------------------- r <- residuals(fit_mm, type = "pearson") kbl10( data.frame( mean = mean(r), sd = stats::sd(r), q025 = as.numeric(stats::quantile(r, 0.025)), q975 = as.numeric(stats::quantile(r, 0.975)) ), digits = 4 ) ## ----recovery-single---------------------------------------------------------- est <- c( beta0 = unname(coef(fit_mm, model = "mean")[1]), beta1 = unname(coef(fit_mm, model = "mean")[2]), sigma_b = unname(exp(coef(fit_mm, model = "random"))) ) true <- c( beta0 = sim$truth$beta[1], beta1 = sim$truth$beta[2], sigma_b = sim$truth$sigma_b ) recovery_table <- data.frame( parameter = names(true), true = as.numeric(true), estimate = as.numeric(est[names(true)]), bias = as.numeric(est[names(true)] - true) ) kbl10(recovery_table) ## ----recovery-montecarlo, eval = FALSE---------------------------------------- # mc_recovery <- function(R = 50L, seed = 7001L) { # set.seed(seed) # out <- vector("list", R) # # for (r in seq_len(R)) { # sim_r <- sim_brsmm_data(seed = seed + r) # fit_r <- brsmm( # y ~ x1, # random = ~ 1 | id, # data = sim_r$data, # repar = 2, # int_method = "laplace", # method = "BFGS", # control = list(maxit = 1000) # ) # # out[[r]] <- c( # beta0 = unname(coef(fit_r, model = "mean")[1]), # beta1 = unname(coef(fit_r, model = "mean")[2]), # sigma_b = unname(exp(coef(fit_r, model = "random"))) # ) # } # # est <- do.call(rbind, out) # truth <- c(beta0 = 0.20, beta1 = 0.65, sigma_b = 0.55) # # data.frame( # parameter = colnames(est), # truth = as.numeric(truth[colnames(est)]), # mean_est = colMeans(est), # bias = colMeans(est) - truth[colnames(est)], # rmse = sqrt(colMeans((sweep(est, 2, truth[colnames(est)], "-"))^2)) # ) # } # # kbl10(mc_recovery(R = 50)) ## ----run-tests, eval = FALSE-------------------------------------------------- # devtools::test(filter = "brsmm")