## ----message=FALSE------------------------------------------------------------
library(dplyr)
library(ggplot2)
library(ciTools)
library(MASS)
library(arm)
set.seed(20171102)

## -----------------------------------------------------------------------------
x <- rnorm(100, mean = 5)
y <- rbinom(n = 100, size = 1, prob = invlogit(-20 + 4*x))
df <- data.frame(x = x, y = y)
fit <- glm(y ~ x, family = binomial)

## -----------------------------------------------------------------------------
df1 <- add_ci(df, fit, names = c("lwr", "upr"), alpha = 0.1) %>%
    mutate(type = "parametric")

df2 <- add_ci(df, fit, type = "boot", names = c("lwr", "upr"), alpha = 0.1, nSims = 500) %>%
    mutate(type = "bootstrap")

df <- bind_rows(df1, df2)

## ----fig.width = 6, fig.heither = 4, fig.align = "center"---------------------
ggplot(df, aes(x = x, y = y)) +
    ggtitle("Logistic Regression", subtitle = "Model fit (black line) and confidence intervals (gray)") +
    geom_jitter(height = 0.01) +
    geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.4) +
    geom_line(aes(x =x , y = pred), size = 2) +
    facet_grid(~type)

## ----echo = F-----------------------------------------------------------------
df3 <- filter(df, type == "parametric")

df4 <- filter(df, type == "bootstrap") %>%
    rename(lboot = lwr, uboot = upr) %>%
    bind_cols(df3)

## ----fig.width = 6, fig.heither = 4, fig.align = "center", echo = F-----------
ggplot(df4, aes(x = x, y = y)) +
  ggtitle("Logistic Regression") +
  geom_jitter(height = 0.01) +
  geom_ribbon(aes(ymin = lboot, ymax = uboot), alpha = 0.4, fill = "red") +
  geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.4, fill = "royalblue") +
  geom_line(aes(x = x , y = pred...3), size = 2)

## -----------------------------------------------------------------------------
x <- rnorm(100, mean = 0)
y <- rpois(n = 100, lambda = exp(1.5 + 0.5*x))
df <- data.frame(x = x, y = y)
fit <- glm(y ~ x , family = poisson(link = "log"))

## -----------------------------------------------------------------------------
df_ints <- df %>%
  add_ci(fit, names = c("lcb", "ucb"), alpha = 0.1) %>%
  add_pi(fit, names = c("lpb", "upb"), alpha = 0.1, nSims = 20000) %>%
  head()

## ----fig.width = 6, fig.heither = 4, fig.align = "center"---------------------
df_ints %>%
    ggplot(aes(x = x, y = y)) +
    geom_point(size = 2) +
    ggtitle("Poisson Regression", subtitle = "Model fit (black line), with prediction intervals (gray), confidence intervals (dark gray)") +
    geom_line(aes(x = x, y = pred), size = 1.2) +
    geom_ribbon(aes(ymin = lcb, ymax = ucb), alpha = 0.4) +
    geom_ribbon(aes(ymin = lpb, ymax = upb), alpha = 0.2)

## -----------------------------------------------------------------------------
df %>%
  add_probs(fit, q = 10) %>%
  add_quantile(fit, p = 0.4) %>%
  head()

## -----------------------------------------------------------------------------
x <- runif(n = 100, min = 0, max = 2)
mu <- exp(1 + x)
y <- rnegbin(n = 100, mu = mu, theta = mu/(5 - 1))

## -----------------------------------------------------------------------------
df <- data.frame(x = x, y = y)
fit <- glm(y ~ x, family = quasipoisson(link = "log"))
summary(fit)$dispersion

## -----------------------------------------------------------------------------
df_ints <- add_ci(df, fit, names = c("lcb", "ucb"), alpha = 0.05) %>%
    add_pi(fit, names = c("lpb", "upb"), alpha = 0.1, nSims = 20000)

## ----fig.width = 6, fig.heither = 4, fig.align="center"-----------------------
ggplot(df_ints, aes(x = x, y = y)) +
    ggtitle("Quasipoisson Regression", subtitle = "Model fit (black line), with Prediction intervals (gray), Confidence intervals (dark gray)") +
    geom_point(size = 2) +
    geom_line(aes(x = x, y = pred), size = 1.2) +
    geom_ribbon(aes(ymin = lcb, ymax = ucb), alpha = 0.4) +
    geom_ribbon(aes(ymin = lpb, ymax = upb), alpha = 0.2)

## ----message=FALSE------------------------------------------------------------
pois <- read.csv("pois_pi_results.csv") %>%
  dplyr::select(-total_time) %>%
  rename(nominal_level = level) %>%
  dplyr::select(sample_size, everything())
knitr::kable(pois)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(pois, aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) +
    ylim(0, 1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("Poisson Regression -- P.I. Simulation", subtitle = "Coverage Probabilities on [0,1] scale") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.95), colour="#BB0000", linetype="dashed", size = 1)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(pois, aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("Poisson Regression -- P.I. Simulation", subtitle = "Coverage Probabilities (zoom in)") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.95), colour="#BB0000", linetype="dashed", size = 1)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(pois, aes(x=sample_size, y=int_width)) +
    geom_errorbar(aes(ymin=int_width - 2*int_width_se, ymax = int_width + 2*int_width_se), width=.1) +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    xlab("Sample Size (log scale)") +
    ylab("Interval Widths +/- 2 SEs") +
    ggtitle("Poisson Regression -- P.I. Simulation", subtitle = "Interval Widths") +
    geom_line(size = 1.5) +
    geom_point(size = 1.5)

## ----message=FALSE------------------------------------------------------------
neg_bin <- read.csv("negbin_pi_results.csv") %>%
  dplyr::select(-total_time) %>%
  rename(nominal_level = level) %>%
  dplyr::select(sample_size, everything())
knitr::kable(neg_bin)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(neg_bin, aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) +
    ylim(0, 1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("Negative Binomial Regression -- P.I. Simulation", subtitle = "Coverage Probabilities on [0,1] scale") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 150, 200, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.95), colour="#BB0000", linetype="dashed", size = 1)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(neg_bin, aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("Negative Binomial Regression -- P.I. Simulation", subtitle = "Coverage Probabilities (zoom in)") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 150, 200, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.95), colour="#BB0000", linetype="dashed", size = 1)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(neg_bin, aes(x=sample_size, y=int_width)) +
    geom_errorbar(aes(ymin=int_width - 2*int_width_se, ymax = int_width + 2*int_width_se), width=.1) +
    scale_x_log10(breaks = c(20, 30, 50, 100, 150, 200, 250, 500, 1000, 2000)) +
    xlab("Sample Size (log scale)") +
    ylab("Interval Widths +/- 2 SEs") +
    ggtitle("Negative Binomial Regression -- P.I. Simulation", subtitle = "Interval Widths") +
    geom_line(size = 1.5) +
    geom_point(size = 1.5)

## ----message=FALSE------------------------------------------------------------
gam <- read.csv("gamma_pi_results.csv") %>%
  dplyr::select(-total_time) %>%
  rename(nominal_level = level) %>%
  dplyr::select(sample_size, everything())

knitr::kable(gam)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(gam, aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) +
    ylim(0, 1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("Gamma Regression -- P.I. Simulation", subtitle = "Coverage Probabilities on [0,1] scale") +
    scale_x_log10(breaks = c(100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.95), colour="#BB0000", linetype="dashed", size = 1)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(gam, aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("Gamma Regression -- P.I. Simulation", subtitle = "Coverage Probabilities (zoom in)") +
    scale_x_log10(breaks = c(100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.95), colour="#BB0000", linetype="dashed", size = 1)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(gam, aes(x=sample_size, y=int_width)) +
    geom_errorbar(aes(ymin=int_width - 2*int_width_se, ymax = int_width + 2*int_width_se), width=.1) +
    scale_x_log10(breaks = c(100, 250, 500, 1000, 2000)) +
    xlab("Sample Size (log scale)") +
    ylab("Interval Widths +/- 2 SEs") +
    ggtitle("Gamma Regression -- P.I. Simulation", subtitle = "Interval Widths") +
    geom_line(size = 1.5) +
    geom_point(size = 1.5)

## ----message=FALSE------------------------------------------------------------
norm_log <- read.csv("gaussian_pi_loglink_results.csv") %>%
  dplyr::select(-total_time) %>%
  rename(nominal_level = level) %>%
  dplyr::select(sample_size, everything())
knitr::kable(norm_log)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(norm_log, aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) +
    ylim(0, 1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("Gaussian Regression -- P.I. Simulation", subtitle = "Coverage Probabilities on [0,1] scale") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.95), colour="#BB0000", linetype="dashed", size = 1)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(norm_log, aes(x=sample_size, y=cov_prob)) +
    geom_point(size = 2) +
    geom_line(size = 1.5) +
    geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) +
    xlab("Sample Size (log scale)") +
    ylab("Coverage Probs +/- 2 SEs") +
    ggtitle("Gaussian Regression -- P.I. Simulation", subtitle = "Coverage Probabilities (zoom in)") +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    geom_hline(aes(yintercept = 0.95), colour="#BB0000", linetype="dashed", size = 1)

## ----fig.width = 6, fig.heither = 4, fig.align="center", echo = F-------------
ggplot(norm_log, aes(x=sample_size, y=int_width)) +
    geom_errorbar(aes(ymin=int_width - 2*int_width_se, ymax = int_width + 2*int_width_se), width=.1) +
    scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) +
    xlab("Sample Size (log scale)") +
    ylab("Interval Widths +/- 2 SEs") +
    ggtitle("Gaussian Regression -- P.I. Simulation", subtitle = "Interval Widths") +
    geom_line(size = 1.5) +
    geom_point(size = 1.5)

## -----------------------------------------------------------------------------
sessionInfo()