## ---- SETTINGS-knitr, include=FALSE------------------------------------------- stopifnot(require(knitr)) opts_chunk$set( comment=NA, message = FALSE, warning = FALSE, eval = identical(Sys.getenv("NOT_CRAN"), "true"), dev = "png", dpi = 150, fig.asp = 0.618, fig.width = 5, out.width = "60%", fig.align = "center" ) ## ---- SETTINGS-gg, include=TRUE----------------------------------------------- library(ggplot2) library(bayesplot) theme_set(bayesplot::theme_default()) ## ----binom-arsenic-data------------------------------------------------------- library(rstanarm) data(wells) wells$dist100 <- wells$dist / 100 ## ---- binom-arsenic-plot-dist100, fig.height=3-------------------------------- ggplot(wells, aes(x = dist100, y = ..density.., fill = switch == 1)) + geom_histogram() + scale_fill_manual(values = c("gray30", "skyblue")) ## ---- binom-arsenic-mcmc, results="hide"-------------------------------------- t_prior <- student_t(df = 7, location = 0, scale = 2.5) fit1 <- stan_glm(switch ~ dist100, data = wells, family = binomial(link = "logit"), prior = t_prior, prior_intercept = t_prior, cores = 2, seed = 12345) ## ---- binom-arsenic-print, echo=FALSE----------------------------------------- (coef_fit1 <- round(coef(fit1), 3)) ## ---- binom-arsenic-ci-------------------------------------------------------- round(posterior_interval(fit1, prob = 0.5), 2) ## ---- binom-arsenic-plot-model------------------------------------------------ # Predicted probability as a function of x pr_switch <- function(x, ests) plogis(ests[1] + ests[2] * x) # A function to slightly jitter the binary data jitt <- function(...) { geom_point(aes_string(...), position = position_jitter(height = 0.05, width = 0.1), size = 2, shape = 21, stroke = 0.2) } ggplot(wells, aes(x = dist100, y = switch, color = switch)) + scale_y_continuous(breaks = c(0, 0.5, 1)) + jitt(x="dist100") + stat_function(fun = pr_switch, args = list(ests = coef(fit1)), size = 2, color = "gray35") ## ----binom-arsenic-mcmc2, results="hide"-------------------------------------- fit2 <- update(fit1, formula = switch ~ dist100 + arsenic) ## ----------------------------------------------------------------------------- (coef_fit2 <- round(coef(fit2), 3)) ## ----echo=FALSE--------------------------------------------------------------- theme_update(legend.position = "right") ## ---- binom-arsenic-plot-model2----------------------------------------------- pr_switch2 <- function(x, y, ests) plogis(ests[1] + ests[2] * x + ests[3] * y) grid <- expand.grid(dist100 = seq(0, 4, length.out = 100), arsenic = seq(0, 10, length.out = 100)) grid$prob <- with(grid, pr_switch2(dist100, arsenic, coef(fit2))) ggplot(grid, aes(x = dist100, y = arsenic)) + geom_tile(aes(fill = prob)) + geom_point(data = wells, aes(color = factor(switch)), size = 2, alpha = 0.85) + scale_fill_gradient() + scale_color_manual("switch", values = c("white", "black"), labels = c("No", "Yes")) ## ----echo=FALSE--------------------------------------------------------------- theme_update(legend.position = "none") ## ---- binom-arsenic-plot-model2-alt------------------------------------------- # Quantiles q_ars <- quantile(wells$dist100, seq(0, 1, 0.25)) q_dist <- quantile(wells$arsenic, seq(0, 1, 0.25)) base <- ggplot(wells) + xlim(c(0, NA)) + scale_y_continuous(breaks = c(0, 0.5, 1)) vary_arsenic <- base + jitt(x="arsenic", y="switch", color="switch") vary_dist <- base + jitt(x="dist100", y="switch", color="switch") for (i in 1:5) { vary_dist <- vary_dist + stat_function(fun = pr_switch2, color = "gray35", args = list(ests = coef(fit2), y = q_dist[i])) vary_arsenic <- vary_arsenic + stat_function(fun = pr_switch2, color = "gray35", args = list(ests = coef(fit2), x = q_ars[i])) } bayesplot_grid(vary_dist, vary_arsenic, grid_args = list(ncol = 2)) ## ---- binom-arsenic-loo------------------------------------------------------- (loo1 <- loo(fit1)) (loo2 <- loo(fit2)) loo_compare(loo1, loo2) ## ---- results = "hide"-------------------------------------------------------- post <- stan_clogit(case ~ spontaneous + induced + (1 | parity), data = infert[order(infert$stratum), ], # order necessary strata = stratum, QR = TRUE, cores = 2, seed = 12345) ## ----------------------------------------------------------------------------- post ## ----------------------------------------------------------------------------- PPD <- posterior_predict(post) stopifnot(rowSums(PPD) == max(infert$stratum)) PLP <- posterior_linpred(post, transform = TRUE) stopifnot(round(rowSums(PLP)) == max(infert$stratum))