## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>", 
  message = FALSE, 
  warning=FALSE
)

## -----------------------------------------------------------------------------
library(emmeans)
library(VizTest)
library(dplyr)
data(esoph)
esoph$age <- factor(as.character(esoph$age), 
                        levels=c("25-34", "35-44", "45-54", "55-64", "65-74", "75+"))
model1 <- glm(cbind(ncases, ncontrols) ~ age,
              data = esoph, family = binomial())
model_sum <- glm(cbind(ncases, ncontrols) ~ 1,
              data = esoph, family = binomial())

## -----------------------------------------------------------------------------
em <- emmeans(model1, "age")
fit <- print(em)

## -----------------------------------------------------------------------------
vt <- viztest(em, test_level = .05)
vt
ic_level <- get_viztest_levels(vt, "easiest")

## -----------------------------------------------------------------------------
fit$or <- exp(fit$emmean)
fit$lower <- exp(fit$asymp.LCL)
fit$upper <- exp(fit$asymp.UCL)
ag_data <- aggregate(esoph[,c("ncases", "ncontrols")], list(age = esoph$age), \(x)as.integer(sum(x)))
fit$n_cases <- ag_data$ncases
fit$n_controls <- ag_data$ncontrols
fit_ci <- confint(em, level = ic_level)
fit$lower_ici <- exp(fit_ci$asymp.LCL)
fit$upper_ici <- exp(fit_ci$asymp.UCL)

## -----------------------------------------------------------------------------
pred_sum <- predict(model_sum, type="link", se.fit=TRUE)
ci_sum <- confint(model_sum)
fit_sum <- data.frame(
  age = "Summary", 
  emmean = pred_sum$fit[1],
  SE = pred_sum$se.fit[1],
  asymp.LCL = ci_sum[1], 
  asymp.UCL = ci_sum[2],
  or = exp(pred_sum$fit[1]),
  lower = exp(ci_sum[1]),
  upper = exp(ci_sum[2]), 
  n_cases =sum(ag_data$ncases), 
  n_controls = sum(ag_data$ncontrols), 
  lower_ici = NA, 
  upper_ici = NA
)

## -----------------------------------------------------------------------------
fit <- rbind(fit[,-4], #remove df column
      fit_sum) 

# add row_number indicator for later. 
fit$row_number <- 1:nrow(fit)

## -----------------------------------------------------------------------------
fit$is_summary <- fit$age == "Summary"
fit$point_size <- 1/fit$SE^2
fit$age_label <- factor(fit$age, levels=rev(fit$age))

## -----------------------------------------------------------------------------
out <- gg_forest(fit, 
                 y = "age_label", 
                 x = "or", 
                 xmin_std = "lower", 
                 xmax_std = "upper", 
                 xmin_ici = "lower_ici", 
                 xmax_ici = "upper_ici",
                 vline = 1, 
                 size_prop = "point_size", 
                 is_summary = "is_summary", 
                 use_log_scale = TRUE, 
                 data_cols = c("Age" = "age_label", "Controls" = "n_controls", "Cases"="n_cases", "OR" = "or"), 
                 max_size=5,
                 table_header_size = 16, 
                 table_text_size = 5, 
                 col_nudge=c(-.15, 0,0,0), 
                 diamond_aspect=15, 
                 diamond_row_frac = 2)

## ----out.width="100%", fig.height=6, fig.width=9, fig.align="center", fig.cap="Forest Plot with Standard (95%) and Inferential (75%) Confidence Intervals.  The standard confidence intervals are the thin lines and the inferential confidence intervals are the thick lines.  See Armstrong and Poirier (2025) for more details on inferential confidence intervals."----
plot(out)

## ----fig.height=6, fig.width=6, fig.align="center", out.width="65%", fig.cap="Forest Plot with Standard (95%) and Inferential (75%) Confidence Intervals.  The standard confidence intervals are the thin lines and the inferential confidence intervals are the thick lines.  See Armstrong and Poirier (2025) for more details on inferential confidence intervals."----
library(ggplot2)
g <- ggplot(fit, aes(y=age_label, x=or)) + 
  geom_foreststripe(n_cols=NULL, start = 3) + 
  geom_linerange(aes(xmin = lower, xmax=upper, color="Standard CI"), linewidth=1) + 
  geom_linerange(aes(xmin=lower_ici, xmax=upper_ici, color="Inferential CI"), linewidth=5) + 
  geom_forestpoint(aes(is_summary = is_summary, size=point_size, xmin = lower, xmax=upper), fill="black", 
                   diamond_aspect=10, diamond_row_frac = .9) + 
  theme_classic() + 
  scale_color_manual(values=c("gray60", "black")) + 
  scale_x_log10() + 
  geom_vline(xintercept = 1, linetype="dashed") + 
  labs(x = "Odds Ratio", y="", colour="") + 
  scale_size_area(max_size=5) + 
  guides(size = "none")
g

## -----------------------------------------------------------------------------
g <- g + 
  theme(axis.ticks.y = element_blank(), 
        axis.text.y = element_blank(), 
        axis.line.y=element_blank())

## ----fig.height=6, fig.width=6, fig.align="center", out.width="65%", fig.cap="Data Table for Forest Plot."----
data_cols <- c("Age" = "age_label", "Controls" = "n_controls", "Cases"="n_cases", "OR" = "or")
tab <- ggplot(fit, aes(y=age_label)) + 
  geom_foreststripe(n_cols=length(data_cols), start=3) +
  geom_foresttable(
    cols = data_cols,
    col_nudge = c(-.15, 0, 0, 0), 
    fmt = list(
      age_label = as.character, 
      n_controls = function(x)sprintf("%d",x), 
      n_cases = function(x)sprintf("%d",x), 
      or = function(x)sprintf("%.2f",x)
    ), 
    col_align = c("left", "center", "center", "center"),
    size = 4
  ) +
  scale_x_foresttable(data_cols, names(data_cols),
                      col_gap = 1) +   # returns list(scale, theme) — ggplot2 handles lists fine
  theme_void() +
  theme(
    axis.text.x.top   = element_text(face = "bold", size = 16, margin = margin(t = 8)),
    axis.line.x.top   = element_blank(),
    axis.ticks.x.top  = element_blank(),
    axis.ticks.length = unit(0, "pt")
  )
tab

## ----out.width="100%", fig.height=6, fig.width=9, fig.align="center", fig.cap="Forest Plot with Data Table.  The left panel is the data table and the right panel is the forest plot.  The striping matches across panels."----
library(patchwork)
(tab + g + plot_layout(widths = c(1.25, 1), guides = "collect")) & theme(legend.position="bottom")

## -----------------------------------------------------------------------------
data(esoph)
esoph <- subset(esoph, agegp != "25-34")
esoph$agegp <- factor(as.character(esoph$agegp), 
                        levels=c("25-34", "35-44", "45-54", "55-64", "65-74", "75+"))
esoph$high_tob <- ifelse(esoph$tobgp %in% c("20-29", "30+"), "High Tobacco", "Low Tobacco")
esoph$high_tob <- factor(esoph$high_tob, levels=c("Low Tobacco", "High Tobacco"))
model1 <- glm(cbind(ncases, ncontrols) ~ agegp*high_tob,
              data = esoph, family = binomial())
model_sum <- glm(cbind(ncases, ncontrols) ~ high_tob,
              data = esoph, family = binomial())
em <- emmeans(model1, c("agegp", "high_tob"))
em_sum <- emmeans(model_sum, "high_tob")
fit <- print(em)
fit_sum <- print(em_sum)
fit_sum$agegp <- "Summary"
fit_sum$lower_ici <- fit_sum$upper_ici <- NA
ag_data <- aggregate(esoph[,c("ncases", "ncontrols")], list(high_tob = esoph$high_tob), \(x)as.integer(sum(x)))
fit_sum$n_cases <- ag_data$ncases
fit_sum$n_controls <- ag_data$ncontrols

vt <- viztest(em, include_zero = FALSE, test_level = .05)
vt
ic_level <- get_viztest_levels(vt, "easiest")
fit_ci <- confint(em, level = ic_level)
fit$lower_ici <- exp(fit_ci$asymp.LCL)
fit$upper_ici <- exp(fit_ci$asymp.UCL)
ag_data <- aggregate(esoph[,c("ncases", "ncontrols")], list(age = esoph$age, high_tob = esoph$high_tob), \(x)as.integer(sum(x)))
fit$n_cases <- ag_data$ncases
fit$n_controls <- ag_data$ncontrols
fit <- rbind(fit, fit_sum)
fit$or <- exp(fit$emmean)
fit$lower <- exp(fit$asymp.LCL)
fit$upper <- exp(fit$asymp.UCL)
fit$is_summary  <- fit$agegp == "Summary"
fit$point_size <- 1/fit$SE^2
fit <- fit[order(fit$agegp, fit$high_tob), ]
fit$row <- 1:nrow(fit)
fit$row_fac <- as.factor(1:nrow(fit))
fit$row_fac <- reorder_forest(fit$row_fac, fit$row)

## ----fig.height=6, fig.width=6, fig.align="center", out.width="65%", fig.cap="Forest Plot with Standard (95%) and Inferential (83%) Confidence Intervals.  The standard confidence intervals are the thin lines and the inferential confidence intervals are the thick lines.  See Armstrong and Poirier (2025) for more details on inferential confidence intervals."----
g <- ggplot(fit, aes(y=row_fac, x=or)) + 
  geom_foreststripe(n_cols=NULL, start = 3) + 
  geom_linerange(aes(xmin = lower, xmax=upper, color = high_tob, alpha="Standard CI"), linewidth=1) + 
  geom_linerange(aes(xmin=lower_ici, xmax=upper_ici, color=high_tob, alpha= "Inferential CI"), linewidth=5) + 
  geom_forestpoint(aes(is_summary = is_summary, size=point_size, xmin = lower, xmax=upper, color=high_tob, fill=high_tob), 
                   diamond_aspect=10, diamond_row_frac = .9) + 
  theme_classic() + 
  scale_alpha_manual(values=c(.25, .75)) +
  scale_x_log10() + 
  geom_vline(xintercept = 1, linetype="dashed") + 
  labs(x = "Odds Ratio", y="", alpha="") + 
  scale_size_area(max_size=5) + 
  guides(size = "none", color="none", fill="none") + 
  theme(axis.ticks.y = element_blank(), 
        axis.text.y = element_blank(), 
        axis.line.y=element_blank())
g

## ----fig.height=6, fig.width=6, fig.align="center", out.width="65%", fig.cap="Data Table for Forest Plot with Subgroups."----
data_cols = c("Age" = "agegp", "Tobacco\nUsage" = "high_tob", "Controls" = "n_controls", "Cases"="n_cases", "OR" = "or")
tab <- ggplot(fit, aes(y=row_fac, x=or)) + 
  geom_foreststripe(n_cols=length(data_cols), start=3) +
  geom_foresttable(aes(color=high_tob), 
    cols = data_cols,
    col_nudge = c(-.15, -.35, 0, 0, 0), 
    fmt = list(
      agegp = as.character, 
      high_tob = as.character,
      n_controls = function(x)sprintf("%d",x), 
      n_cases = function(x)sprintf("%d",x), 
      or = function(x)sprintf("%.2f",x)
    ), 
    col_align = c("left", "left", "center", "center", "center"),
    size = 4
  ) +
  scale_x_foresttable(data_cols, names(data_cols),
                      col_gap = 1) +   # returns list(scale, theme) — ggplot2 handles lists fine
  theme_void() +
  theme(
    axis.text.x.top   = element_text(face = "bold", size = 12, margin = margin(t = 8)),
    axis.line.x.top   = element_blank(),
    axis.ticks.x.top  = element_blank(),
    axis.ticks.length = unit(0, "pt")
  )
tab

## ----out.width="100%", fig.height=6, fig.width=9, fig.align="center", fig.cap="Forest Plot with Data Table with Subgroups.  The left panel is the data table and the right panel is the forest plot.  The striping matches across panels."----
library(patchwork)
(tab + g + plot_layout(widths = c(1.25, 1), guides = "collect")) & theme(legend.position="bottom")