## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE, warning=FALSE ) ## ----------------------------------------------------------------------------- library(emmeans) library(VizTest) library(dplyr) data(esoph) esoph$age <- factor(as.character(esoph$age), levels=c("25-34", "35-44", "45-54", "55-64", "65-74", "75+")) model1 <- glm(cbind(ncases, ncontrols) ~ age, data = esoph, family = binomial()) model_sum <- glm(cbind(ncases, ncontrols) ~ 1, data = esoph, family = binomial()) ## ----------------------------------------------------------------------------- em <- emmeans(model1, "age") fit <- print(em) ## ----------------------------------------------------------------------------- vt <- viztest(em, test_level = .05) vt ic_level <- get_viztest_levels(vt, "easiest") ## ----------------------------------------------------------------------------- fit$or <- exp(fit$emmean) fit$lower <- exp(fit$asymp.LCL) fit$upper <- exp(fit$asymp.UCL) ag_data <- aggregate(esoph[,c("ncases", "ncontrols")], list(age = esoph$age), \(x)as.integer(sum(x))) fit$n_cases <- ag_data$ncases fit$n_controls <- ag_data$ncontrols fit_ci <- confint(em, level = ic_level) fit$lower_ici <- exp(fit_ci$asymp.LCL) fit$upper_ici <- exp(fit_ci$asymp.UCL) ## ----------------------------------------------------------------------------- pred_sum <- predict(model_sum, type="link", se.fit=TRUE) ci_sum <- confint(model_sum) fit_sum <- data.frame( age = "Summary", emmean = pred_sum$fit[1], SE = pred_sum$se.fit[1], asymp.LCL = ci_sum[1], asymp.UCL = ci_sum[2], or = exp(pred_sum$fit[1]), lower = exp(ci_sum[1]), upper = exp(ci_sum[2]), n_cases =sum(ag_data$ncases), n_controls = sum(ag_data$ncontrols), lower_ici = NA, upper_ici = NA ) ## ----------------------------------------------------------------------------- fit <- rbind(fit[,-4], #remove df column fit_sum) # add row_number indicator for later. fit$row_number <- 1:nrow(fit) ## ----------------------------------------------------------------------------- fit$is_summary <- fit$age == "Summary" fit$point_size <- 1/fit$SE^2 fit$age_label <- factor(fit$age, levels=rev(fit$age)) ## ----------------------------------------------------------------------------- out <- gg_forest(fit, y = "age_label", x = "or", xmin_std = "lower", xmax_std = "upper", xmin_ici = "lower_ici", xmax_ici = "upper_ici", vline = 1, size_prop = "point_size", is_summary = "is_summary", use_log_scale = TRUE, data_cols = c("Age" = "age_label", "Controls" = "n_controls", "Cases"="n_cases", "OR" = "or"), max_size=5, table_header_size = 16, table_text_size = 5, col_nudge=c(-.15, 0,0,0), diamond_aspect=15, diamond_row_frac = 2) ## ----out.width="100%", fig.height=6, fig.width=9, fig.align="center", fig.cap="Forest Plot with Standard (95%) and Inferential (75%) Confidence Intervals. The standard confidence intervals are the thin lines and the inferential confidence intervals are the thick lines. See Armstrong and Poirier (2025) for more details on inferential confidence intervals."---- plot(out) ## ----fig.height=6, fig.width=6, fig.align="center", out.width="65%", fig.cap="Forest Plot with Standard (95%) and Inferential (75%) Confidence Intervals. The standard confidence intervals are the thin lines and the inferential confidence intervals are the thick lines. See Armstrong and Poirier (2025) for more details on inferential confidence intervals."---- library(ggplot2) g <- ggplot(fit, aes(y=age_label, x=or)) + geom_foreststripe(n_cols=NULL, start = 3) + geom_linerange(aes(xmin = lower, xmax=upper, color="Standard CI"), linewidth=1) + geom_linerange(aes(xmin=lower_ici, xmax=upper_ici, color="Inferential CI"), linewidth=5) + geom_forestpoint(aes(is_summary = is_summary, size=point_size, xmin = lower, xmax=upper), fill="black", diamond_aspect=10, diamond_row_frac = .9) + theme_classic() + scale_color_manual(values=c("gray60", "black")) + scale_x_log10() + geom_vline(xintercept = 1, linetype="dashed") + labs(x = "Odds Ratio", y="", colour="") + scale_size_area(max_size=5) + guides(size = "none") g ## ----------------------------------------------------------------------------- g <- g + theme(axis.ticks.y = element_blank(), axis.text.y = element_blank(), axis.line.y=element_blank()) ## ----fig.height=6, fig.width=6, fig.align="center", out.width="65%", fig.cap="Data Table for Forest Plot."---- data_cols <- c("Age" = "age_label", "Controls" = "n_controls", "Cases"="n_cases", "OR" = "or") tab <- ggplot(fit, aes(y=age_label)) + geom_foreststripe(n_cols=length(data_cols), start=3) + geom_foresttable( cols = data_cols, col_nudge = c(-.15, 0, 0, 0), fmt = list( age_label = as.character, n_controls = function(x)sprintf("%d",x), n_cases = function(x)sprintf("%d",x), or = function(x)sprintf("%.2f",x) ), col_align = c("left", "center", "center", "center"), size = 4 ) + scale_x_foresttable(data_cols, names(data_cols), col_gap = 1) + # returns list(scale, theme) — ggplot2 handles lists fine theme_void() + theme( axis.text.x.top = element_text(face = "bold", size = 16, margin = margin(t = 8)), axis.line.x.top = element_blank(), axis.ticks.x.top = element_blank(), axis.ticks.length = unit(0, "pt") ) tab ## ----out.width="100%", fig.height=6, fig.width=9, fig.align="center", fig.cap="Forest Plot with Data Table. The left panel is the data table and the right panel is the forest plot. The striping matches across panels."---- library(patchwork) (tab + g + plot_layout(widths = c(1.25, 1), guides = "collect")) & theme(legend.position="bottom") ## ----------------------------------------------------------------------------- data(esoph) esoph <- subset(esoph, agegp != "25-34") esoph$agegp <- factor(as.character(esoph$agegp), levels=c("25-34", "35-44", "45-54", "55-64", "65-74", "75+")) esoph$high_tob <- ifelse(esoph$tobgp %in% c("20-29", "30+"), "High Tobacco", "Low Tobacco") esoph$high_tob <- factor(esoph$high_tob, levels=c("Low Tobacco", "High Tobacco")) model1 <- glm(cbind(ncases, ncontrols) ~ agegp*high_tob, data = esoph, family = binomial()) model_sum <- glm(cbind(ncases, ncontrols) ~ high_tob, data = esoph, family = binomial()) em <- emmeans(model1, c("agegp", "high_tob")) em_sum <- emmeans(model_sum, "high_tob") fit <- print(em) fit_sum <- print(em_sum) fit_sum$agegp <- "Summary" fit_sum$lower_ici <- fit_sum$upper_ici <- NA ag_data <- aggregate(esoph[,c("ncases", "ncontrols")], list(high_tob = esoph$high_tob), \(x)as.integer(sum(x))) fit_sum$n_cases <- ag_data$ncases fit_sum$n_controls <- ag_data$ncontrols vt <- viztest(em, include_zero = FALSE, test_level = .05) vt ic_level <- get_viztest_levels(vt, "easiest") fit_ci <- confint(em, level = ic_level) fit$lower_ici <- exp(fit_ci$asymp.LCL) fit$upper_ici <- exp(fit_ci$asymp.UCL) ag_data <- aggregate(esoph[,c("ncases", "ncontrols")], list(age = esoph$age, high_tob = esoph$high_tob), \(x)as.integer(sum(x))) fit$n_cases <- ag_data$ncases fit$n_controls <- ag_data$ncontrols fit <- rbind(fit, fit_sum) fit$or <- exp(fit$emmean) fit$lower <- exp(fit$asymp.LCL) fit$upper <- exp(fit$asymp.UCL) fit$is_summary <- fit$agegp == "Summary" fit$point_size <- 1/fit$SE^2 fit <- fit[order(fit$agegp, fit$high_tob), ] fit$row <- 1:nrow(fit) fit$row_fac <- as.factor(1:nrow(fit)) fit$row_fac <- reorder_forest(fit$row_fac, fit$row) ## ----fig.height=6, fig.width=6, fig.align="center", out.width="65%", fig.cap="Forest Plot with Standard (95%) and Inferential (83%) Confidence Intervals. The standard confidence intervals are the thin lines and the inferential confidence intervals are the thick lines. See Armstrong and Poirier (2025) for more details on inferential confidence intervals."---- g <- ggplot(fit, aes(y=row_fac, x=or)) + geom_foreststripe(n_cols=NULL, start = 3) + geom_linerange(aes(xmin = lower, xmax=upper, color = high_tob, alpha="Standard CI"), linewidth=1) + geom_linerange(aes(xmin=lower_ici, xmax=upper_ici, color=high_tob, alpha= "Inferential CI"), linewidth=5) + geom_forestpoint(aes(is_summary = is_summary, size=point_size, xmin = lower, xmax=upper, color=high_tob, fill=high_tob), diamond_aspect=10, diamond_row_frac = .9) + theme_classic() + scale_alpha_manual(values=c(.25, .75)) + scale_x_log10() + geom_vline(xintercept = 1, linetype="dashed") + labs(x = "Odds Ratio", y="", alpha="") + scale_size_area(max_size=5) + guides(size = "none", color="none", fill="none") + theme(axis.ticks.y = element_blank(), axis.text.y = element_blank(), axis.line.y=element_blank()) g ## ----fig.height=6, fig.width=6, fig.align="center", out.width="65%", fig.cap="Data Table for Forest Plot with Subgroups."---- data_cols = c("Age" = "agegp", "Tobacco\nUsage" = "high_tob", "Controls" = "n_controls", "Cases"="n_cases", "OR" = "or") tab <- ggplot(fit, aes(y=row_fac, x=or)) + geom_foreststripe(n_cols=length(data_cols), start=3) + geom_foresttable(aes(color=high_tob), cols = data_cols, col_nudge = c(-.15, -.35, 0, 0, 0), fmt = list( agegp = as.character, high_tob = as.character, n_controls = function(x)sprintf("%d",x), n_cases = function(x)sprintf("%d",x), or = function(x)sprintf("%.2f",x) ), col_align = c("left", "left", "center", "center", "center"), size = 4 ) + scale_x_foresttable(data_cols, names(data_cols), col_gap = 1) + # returns list(scale, theme) — ggplot2 handles lists fine theme_void() + theme( axis.text.x.top = element_text(face = "bold", size = 12, margin = margin(t = 8)), axis.line.x.top = element_blank(), axis.ticks.x.top = element_blank(), axis.ticks.length = unit(0, "pt") ) tab ## ----out.width="100%", fig.height=6, fig.width=9, fig.align="center", fig.cap="Forest Plot with Data Table with Subgroups. The left panel is the data table and the right panel is the forest plot. The striping matches across panels."---- library(patchwork) (tab + g + plot_layout(widths = c(1.25, 1), guides = "collect")) & theme(legend.position="bottom")