--- title: "Z-Curve Publication Bias Diagnostics" author: "František Bartoš" date: "`r Sys.Date()`" output: rmarkdown::html_vignette: self_contained: yes bibliography: ../inst/REFERENCES.bib csl: ../inst/apa.csl vignette: > %\VignetteIndexEntry{Z-Curve Publication Bias Diagnostics} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} %\VignetteEngine{knitr::rmarkdown_notangle} --- ```{r setup, include = FALSE} is_check <- ("CheckExEnv" %in% search()) || any(c("_R_CHECK_TIMINGS_", "_R_CHECK_LICENSE_") %in% names(Sys.getenv())) || !file.exists("../models/zcurve/zcurve_RE_Hoppen2025.RDS") knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = !is_check, dev = "png", fig.width = 7, fig.height = 5) if(.Platform$OS.type == "windows"){ knitr::opts_chunk$set(dev.args = list(type = "cairo")) } ``` ```{r include = FALSE, eval = FALSE} # R package version updating library(RoBMA) # Ease of retrieval - Hoppen2025 data("Weingarten2018", package = "RoBMA") Weingarten2018 <- Weingarten2018[Weingarten2018$standard_paradigm & Weingarten2018$proximal_dataset, ] fit_RE_Weingarten2018 <- NoBMA(r = Weingarten2018$r_xy, n = round(Weingarten2018$N), study_ids = Weingarten2018$paper_id, priors_effect_null = NULL, priors_heterogeneity_null = NULL, algorithm = "ss", sample = 10000, burnin = 10000, adapt = 10000, chains = 5, parallel = TRUE, seed = 1, save = "min") fit_RoBMA_Weingarten2018 <- RoBMA(r = Weingarten2018$r_xy, n = round(Weingarten2018$N), study_ids = Weingarten2018$paper_id, algorithm = "ss", sample = 10000, burnin = 10000, adapt = 10000, chains = 5, parallel = TRUE, seed = 1, save = "min") zcurve_RE_Weingarten2018 <- as_zcurve(fit_RE_Weingarten2018) zcurve_RoBMA_Weingarten2018 <- as_zcurve(fit_RoBMA_Weingarten2018) saveRDS(zcurve_RE_Weingarten2018, file = "../models/zcurve/zcurve_RE_Weingarten2018.RDS", compress = "xz") saveRDS(zcurve_RoBMA_Weingarten2018, file = "../models/zcurve/zcurve_RoBMA_Weingarten2018.RDS", compress = "xz") # Social comparison example - Hoppen2025 data("Hoppen2025", package = "RoBMA") fit_RE_Hoppen2025 <- NoBMA(d = Hoppen2025$d, se = sqrt(Hoppen2025$v), priors_effect_null = NULL, priors_heterogeneity_null = NULL, algorithm = "ss", sample = 10000, burnin = 5000, adapt = 5000, chains = 5, parallel = TRUE, seed = 1, save = "min") fit_RoBMA_Hoppen2025 <- RoBMA(d = Hoppen2025$d, se = sqrt(Hoppen2025$v), algorithm = "ss", sample = 10000, burnin = 5000, adapt = 5000, chains = 5, parallel = TRUE, seed = 1, save = "min") zcurve_RE_Hoppen2025 <- as_zcurve(fit_RE_Hoppen2025) zcurve_RoBMA_Hoppen2025 <- as_zcurve(fit_RoBMA_Hoppen2025) saveRDS(zcurve_RE_Hoppen2025, file = "../models/zcurve/zcurve_RE_Hoppen2025.RDS", compress = "xz") saveRDS(zcurve_RoBMA_Hoppen2025, file = "../models/zcurve/zcurve_RoBMA_Hoppen2025.RDS", compress = "xz") # ChatGPT example - Wang2025 data("Wang2025", package = "RoBMA") Wang2025 <- Wang2025[Wang2025$Learning_effect == "Learning performance", ] fit_RE_Wang2025 <- NoBMA(d = Wang2025$g, se = Wang2025$se, priors_effect_null = NULL, priors_heterogeneity_null = NULL, algorithm = "ss", sample = 10000, burnin = 5000, adapt = 5000, chains = 5, parallel = TRUE, seed = 1, save = "min") fit_RoBMA_Wang2025 <- RoBMA(d = Wang2025$g, se = Wang2025$se, algorithm = "ss", sample = 10000, burnin = 5000, adapt = 5000, chains = 5, parallel = TRUE, seed = 1, save = "min") zcurve_RE_Wang2025 <- as_zcurve(fit_RE_Wang2025) zcurve_RoBMA_Wang2025 <- as_zcurve(fit_RoBMA_Wang2025) saveRDS(zcurve_RE_Wang2025, file = "../models/zcurve/zcurve_RE_Wang2025.RDS", compress = "xz") saveRDS(zcurve_RoBMA_Wang2025, file = "../models/zcurve/zcurve_RoBMA_Wang2025.RDS", compress = "xz") # Many Labs 2 example - ManyLabs16 data("ManyLabs16", package = "RoBMA") fit_RE_ManyLabs16 <- NoBMA(d = ManyLabs16$y, se = ManyLabs16$se, priors_effect_null = NULL, priors_heterogeneity_null = NULL, algorithm = "ss", sample = 10000, burnin = 5000, adapt = 5000, chains = 5, parallel = TRUE, seed = 1, save = "min") fit_RoBMA_ManyLabs16 <- RoBMA(d = ManyLabs16$y, se = ManyLabs16$se, algorithm = "ss", sample = 10000, burnin = 5000, adapt = 5000, chains = 5, parallel = TRUE, seed = 1, save = "min") zcurve_RE_ManyLabs16 <- as_zcurve(fit_RE_ManyLabs16) zcurve_RoBMA_ManyLabs16 <- as_zcurve(fit_RoBMA_ManyLabs16) saveRDS(zcurve_RE_ManyLabs16, file = "../models/zcurve/zcurve_RE_ManyLabs16.RDS", compress = "xz") saveRDS(zcurve_RoBMA_ManyLabs16, file = "../models/zcurve/zcurve_RoBMA_ManyLabs16.RDS", compress = "xz") ``` ```{r include = FALSE} knitr::opts_chunk$set(echo = TRUE) # preload the fitted models zcurve_RE_Weingarten2018 <- readRDS(file = "../models/zcurve/zcurve_RE_Weingarten2018.RDS") zcurve_RoBMA_Weingarten2018 <- readRDS(file = "../models/zcurve/zcurve_RoBMA_Weingarten2018.RDS") zcurve_RE_Hoppen2025 <- readRDS(file = "../models/zcurve/zcurve_RE_Hoppen2025.RDS") zcurve_RoBMA_Hoppen2025 <- readRDS(file = "../models/zcurve/zcurve_RoBMA_Hoppen2025.RDS") zcurve_RE_Wang2025 <- readRDS(file = "../models/zcurve/zcurve_RE_Wang2025.RDS") zcurve_RoBMA_Wang2025 <- readRDS(file = "../models/zcurve/zcurve_RoBMA_Wang2025.RDS") zcurve_RE_ManyLabs16 <- readRDS(file = "../models/zcurve/zcurve_RE_ManyLabs16.RDS") zcurve_RoBMA_ManyLabs16 <- readRDS(file = "../models/zcurve/zcurve_RoBMA_ManyLabs16.RDS") fit_RE_Weingarten2018 <- zcurve_RE_Weingarten2018 fit_RoBMA_Weingarten2018 <- zcurve_RoBMA_Weingarten2018 fit_RE_Hoppen2025 <- zcurve_RE_Hoppen2025 fit_RoBMA_Hoppen2025 <- zcurve_RoBMA_Hoppen2025 fit_RE_Wang2025 <- zcurve_RE_Wang2025 fit_RoBMA_Wang2025 <- zcurve_RoBMA_Wang2025 fit_RE_ManyLabs16 <- zcurve_RE_ManyLabs16 fit_RoBMA_ManyLabs16 <- zcurve_RoBMA_ManyLabs16 class(fit_RE_Weingarten2018) <- class(fit_RE_Weingarten2018)[!class(fit_RE_Weingarten2018) %in% "zcurve_RoBMA"] class(fit_RoBMA_Weingarten2018) <- class(fit_RoBMA_Weingarten2018)[!class(fit_RoBMA_Weingarten2018) %in% "zcurve_RoBMA"] class(fit_RE_Hoppen2025) <- class(fit_RE_Hoppen2025)[!class(fit_RE_Hoppen2025) %in% "zcurve_RoBMA"] class(fit_RoBMA_Hoppen2025) <- class(fit_RoBMA_Hoppen2025)[!class(fit_RoBMA_Hoppen2025) %in% "zcurve_RoBMA"] class(fit_RE_Wang2025) <- class(fit_RE_Wang2025)[!class(fit_RE_Wang2025) %in% "zcurve_RoBMA"] class(fit_RoBMA_Wang2025) <- class(fit_RoBMA_Wang2025)[!class(fit_RoBMA_Wang2025) %in% "zcurve_RoBMA"] class(fit_RE_ManyLabs16) <- class(fit_RE_ManyLabs16)[!class(fit_RE_ManyLabs16) %in% "zcurve_RoBMA"] class(fit_RoBMA_ManyLabs16) <- class(fit_RoBMA_ManyLabs16)[!class(fit_RoBMA_ManyLabs16) %in% "zcurve_RoBMA"] ``` ## Introduction **This vignette accompanies the "Z-Curve Plot: A Visual Diagnostic for Publication Bias in Meta-Analysis" manuscript [@bartos2025zcurve]. The manuscript provides the theoretical foundation and detailed methodology for z-curve diagnostics, while this vignette demonstrates their practical implementation using the RoBMA R package [@RoBMA].** Z-curve plots provide a visual assessment of meta-analytic model fit specifically aimed at detecting misfit due to publication bias [@bartos2025zcurve]. The methodology overlays model-implied posterior predictive distributions of z-statistics on the observed distribution of z-statistics [@gabry2019visualization], allowing researchers to assess how well different meta-analytic models capture the patterns in their data. The approach builds on the z-curve methodology developed by @brunner2020estimating for assessing the quality of research. The z-curve approach complements statistical tests of publication bias (such as inclusion Bayes factors) with intuitive visualizations that can highlight model misfit due to publication bias. Additionally, the method allows extrapolation to the pre-publication bias state, providing estimates of key metrics such as the expected discovery rate and the number of missing studies. We illustrate the z-curve diagnostics using four empirical examples from recent meta-analyses that exhibit different degrees of publication bias. The examples demonstrate how to interpret z-curve plots and use them to guide model selection in meta-analytic practice. For details refer to the accompanying manuscript [@bartos2025zcurve]. ## Getting Started Before we start, we load the RoBMA package: ```{r, message = FALSE} library("RoBMA") ``` The z-curve diagnostic workflow consists of the following steps: 1. Fit meta-analytic models to your data using `NoBMA()` for random-effects models or `RoBMA()` for publication bias-adjusted models [@maier2020robust; @bartos2021no]. 2. Create z-curve objects using the `as_zcurve()` function. 3. Generate histograms of observed z-statistics using `hist()` . 4. Overlay model fits using `lines()` to compare different models. 5. Use the visualization to supplement statistical model comparison. Note that when using `NoBMA()` to fit a standard random-effects model we need to set `priors_effect_null = NULL` and `priors_heterogeneity_null = NULL` to disable model averaging across null hypotheses. This ensures we fit a single random-effects model rather than a Bayesian model-averaged ensemble. The z-curve plot is implemented only for models fitted using the spike-and-slab algorithm specified via `algorithm = "ss"`. ## Applied Examples We demonstrate the z-curve diagnostics on four empirical meta-analyses that represent different publication bias scenarios: 1. **Ease-of-retrieval effect in the few/many paradigm** [@weingarten2018does] - An example with extreme publication bias 2. **Social comparison as behavior change technique** [@hoppen2025meta] - An example with strong publication bias 3. **ChatGPT effects on learning performance** [@wang2025effect] - An example with moderate publication bias 4. **Framing effects from Many Labs 2** [@klein2018many] - An example with no publication bias (registered replication reports) For each example, we fit both a simple random-effects model using `NoBMA()` and a publication bias-adjusted ensemble using `RoBMA()` [@maier2020robust; @bartos2021no]. We then use z-curve plots to compare how well each model captures the observed distribution of z-statistics. ### Example 1: Ease-of-Retrieval Effect in the Few/Many Paradigm This example examines the ease-of-retrieval effect, a well-established phenomenon in cognitive psychology where the subjective difficulty of recalling information influences judgments more than the actual number of items recalled [@schwarz1991ease]. We analyze 298 estimates from 111 studies examining the ease-of-retrieval effect in the few/many standard paradigm in the proximal dataset collected by Weingarten and colleagues [@weingarten2018does]. The original analysis reported a pooled effect size r = 0.25, 95% CI [0.22, 0.28]. When adjusted for publication bias using PET-PEESE, the effect was reduced but remained substantial at r = 0.19, 95% CI [0.15, 0.23]. #### Data and Model Fitting We begin by loading the ease-of-retrieval dataset and examining its structure. We focus on studies using the standard paradigm in proximal dataset conditions: ```{r} # Load the ease-of-retrieval dataset data("Weingarten2018", package = "RoBMA") # Filter to standard paradigm and proximal dataset as in the original analysis Weingarten2018 <- Weingarten2018[Weingarten2018$standard_paradigm & Weingarten2018$proximal_dataset, ] head(Weingarten2018) ``` The filtered dataset contains 298 effect size estimates (correlation coefficients) and sample sizes. Since the effect size estimates are nested within studies (indicated by the `paper_id` variable), we specify a multilevel random-effects model using `NoBMA()` and a multilevel publication bias-adjusted model using `RoBMA()` [@bartos2025robust]. (Note that RoBMA internally transforms the correlation coefficients to Fisher's z for analysis.) ```{r, eval = FALSE} # Fit random-effects model (unadjusted for publication bias) fit_RE_Weingarten2018 <- NoBMA(r = Weingarten2018$r_xy, n = round(Weingarten2018$N), study_ids = Weingarten2018$paper_id, priors_effect_null = NULL, priors_heterogeneity_null = NULL, algorithm = "ss", sample = 10000, burnin = 10000, adapt = 10000, chains = 5, parallel = TRUE, seed = 1) # Fit RoBMA model (adjusted for publication bias) fit_RoBMA_Weingarten2018 <- RoBMA(r = Weingarten2018$r_xy, n = round(Weingarten2018$N), study_ids = Weingarten2018$paper_id, algorithm = "ss", sample = 10000, burnin = 10000, adapt = 10000, chains = 5, parallel = TRUE, seed = 1) ``` #### Model Results Summary We can examine the results from both models using the `summary()` function. ```{r} # Random-effects model results summary(fit_RE_Weingarten2018, output_scale = "r") # RoBMA model results summary(fit_RoBMA_Weingarten2018, output_scale = "r") ``` The Bayesian multilevel random-effects model finds results similar to those reported in the original publication-bias-unadjusted analysis. The RoBMA analysis reveals extreme evidence for publication bias. However, RoBMA still finds extreme evidence for the presence of an effect, with a model-averaged effect size estimate of g = 0.17, 95% CI [0.13, 0.21]. #### Creating Z-Curve Objects The `as_zcurve()` function converts fitted RoBMA models into objects suitable for z-curve visualization: ```{r, eval = FALSE} # Create z-curve objects zcurve_RE_Weingarten2018 <- as_zcurve(fit_RE_Weingarten2018) zcurve_RoBMA_Weingarten2018 <- as_zcurve(fit_RoBMA_Weingarten2018) ``` #### Z-Curve Diagnostic Plots We now generate z-curve plots to assess how well each model captures the observed distribution of z-statistics. The `hist()` function creates a histogram of the observed z-statistics, while `lines()` overlays the model-implied posterior predictive distributions [@gabry2019visualization]: ```{r, fig.cap="Ease-of-Retrieval Effect: Model Fit Assessment"} # Create histogram of observed z-statistics hist(zcurve_RoBMA_Weingarten2018, from = -3, to = 6, by = 0.25) # Add model-implied distributions lines(zcurve_RE_Weingarten2018, from = -3, to = 6, col = "black", lty = 2, lwd = 2) lines(zcurve_RoBMA_Weingarten2018, from = -3, to = 6, col = "blue", lty = 2, lwd = 2) # Add legend legend("topright", legend = c("Random-Effects", "RoBMA"), col = c("black", "blue"), lty = 2, lwd = 2) ``` The z-curve plot reveals clear evidence of extreme publication bias in the ease-of-retrieval literature. Two extreme discontinuities are visible in the observed distribution of z-statistics (gray bars): 1. **Marginal Significance Threshold (z ≈ 1.64)**: There is a sharp increase in the frequency of test statistics just above the threshold for marginal significance (α = 0.10). 2. **Zero Threshold (z = 0)**: A weaker discontinuity occurs at zero, with additional suppression of studies reporting negative effects (z < 0). The random-effects model (black dashed line) fails to capture these patterns. It systematically overestimates the number of negative results and non-significant positive results. RoBMA (blue dashed line) captures both discontinuities and approximates the observed data much better. These results provide extreme evidence for the presence of publication bias and highlight the need to interpret the publication bias-adjusted model. #### Extrapolation to Pre-Publication Bias The package also allows us to extrapolate what the distribution of z-statistics might have looked like in the absence of publication bias. This is achieved by calling the `plot()` function (with the default `plot_extrapolation = TRUE` argument). ```{r, fig.cap="Ease-of-Retrieval Effect: Extrapolation Analysis"} # Plot extrapolation to pre-publication bias plot(zcurve_RoBMA_Weingarten2018, from = -3, to = 6, by.hist = 0.25) ``` The extrapolated distribution (blue line) shows what we would expect to observe if studies were published regardless of their results. Comparing the fitted distribution (accounting for publication bias) with the extrapolated distribution reveals the extent of the bias. The large discrepancy between these distributions quantifies the substantial impact of publication bias in this literature. #### Z-Curve Summary Metrics This discrepancy can be summarized with the additional statistics provided by the `summary()` function. ```{r} # Extract z-curve summary metrics summary(zcurve_RoBMA_Weingarten2018) ``` The summary provides several key results: The observed discovery rate (ODR = 0.45) substantially exceeds the expected discovery rate (EDR = 0.18, 95% CI [0.14, 0.23]), indicating that roughly 2.5 times as many significant results appear in the published literature as we would expect. The estimated number of missing studies, 579 (95% CI [311, 1014]), suggests that a substantial number of non-significant or negative results may be absent from the published literature. The false discovery risk (FDR = 0.24, 95% CI [0.18, 0.31]) provides an upper bound on the proportion of statistically significant results that may be false positives, though this risk remains moderate due to evidence for a genuine underlying effect despite the extreme publication bias. ### Example 2: Social Comparison and Behavior Change This example examines a meta-analysis of randomized controlled trials evaluating the efficacy of social comparison as a behavior change technique [@hoppen2025meta]. The analysis includes 37 trials comparing social comparison interventions to passive controls across domains including climate-change mitigation, health, performance, and service outcomes. #### Data and Model Fitting Again, we begin by loading the social comparison dataset and examining its structure: ```{r} # Load the social comparison dataset data("Hoppen2025", package = "RoBMA") head(Hoppen2025) ``` The dataset contains effect sizes (`d`) and sampling variances (`v`) from individual studies. We fit both a random-effects model using `NoBMA()` and a publication bias-adjusted model using `RoBMA()`: ```{r, eval = FALSE} # Fit random-effects model (unadjusted for publication bias) fit_RE_Hoppen2025 <- NoBMA(d = Hoppen2025$d, se = sqrt(Hoppen2025$v), priors_effect_null = NULL, priors_heterogeneity_null = NULL, algorithm = "ss", sample = 10000, burnin = 5000, adapt = 5000, chains = 5, parallel = TRUE, seed = 1) # Fit RoBMA model (adjusted for publication bias) fit_RoBMA_Hoppen2025 <- RoBMA(d = Hoppen2025$d, se = sqrt(Hoppen2025$v), algorithm = "ss", sample = 10000, burnin = 5000, adapt = 5000, chains = 5, parallel = TRUE, seed = 1) ``` #### Model Results Summary We examine the key results from both models: ```{r} # Random-effects model results summary(fit_RE_Hoppen2025) ``` The random-effects model estimates a positive effect size of g = 0.17, 95% CI [0.11, 0.23], suggesting a positive effect of social comparison interventions. However, this analysis does not account for potential publication bias. ```{r} # RoBMA model results summary(fit_RoBMA_Hoppen2025) ``` When accounting for the possibility of publication bias with RoBMA, we find extreme evidence for the presence of publication bias (BF_bias = 597). Consequently, the effect size shrinks to g = -0.01, 95% CI [-0.15, 0.05], with moderate evidence against the presence of an effect (BF_effect = 0.13). #### Z-Curve Diagnostic Plots ```{r, eval = FALSE} # Create z-curve objects zcurve_RE_Hoppen2025 <- as_zcurve(fit_RE_Hoppen2025) zcurve_RoBMA_Hoppen2025 <- as_zcurve(fit_RoBMA_Hoppen2025) ``` ```{r, fig.cap="Social Comparison: Model Fit Assessment"} # Create histogram of observed z-statistics hist(zcurve_RoBMA_Hoppen2025) # Add model-implied distributions lines(zcurve_RE_Hoppen2025, col = "black", lty = 2, lwd = 2) lines(zcurve_RoBMA_Hoppen2025, col = "blue", lty = 2, lwd = 2) # Add legend legend("topright", legend = c("Random-Effects", "RoBMA"), col = c("black", "blue"), lty = 2, lwd = 2) ``` lty = 2, lwd = 2) ``` #### Results The z-curve plot highlights the clear evidence of publication bias in this dataset. We can observe pronounced discontinuities in the observed distribution (gray bars) at critical thresholds; particularly at the transition to marginal significance (z ≈ 1.64) and at zero (indicating selection against negative results). The random-effects model (black dashed line) fails to capture these patterns, systematically overestimating the number of negative and non-significant results. In contrast, RoBMA (blue dashed line) successfully models both discontinuities, providing a markedly better fit to the observed data. This visual assessment aligns with the statistical evidence: RoBMA yields extreme evidence for publication bias and suggests that the unadjusted pooled effect is misleading. #### Extrapolation to Pre-Publication Bias The package also allows us to extrapolate what the distribution of z-statistics might have looked like in the absence of publication bias. This is achieved by either setting `extrapolate = TRUE` in the `lines()` functions, or calling the `plot()` function (with the default `plot_extrapolation = TRUE` argument). ```{r, fig.cap="Social Comparison: Extrapolation Analysis"} # Plot extrapolation to pre-publication bias plot(zcurve_RoBMA_Hoppen2025) ``` The extrapolated distribution (blue line) shows what we would expect to observe if studies were published regardless of their results. Comparing the fitted distribution (accounting for publication bias) with the extrapolated distribution reveals the extent of the bias. The large discrepancy between these distributions quantifies the substantial impact of publication bias in this literature, with implications for the estimated effect size and number of missing studies. #### Z-Curve Summary Metrics This discrepancy can be summarized with the additional statistics provided by the `summary()` function. ```{r} # Extract z-curve summary metrics summary(zcurve_RoBMA_Hoppen2025) ``` The summary provides several results: The observed discovery rate (ODR, i.e., the observed proportion of statistically significant results) in the dataset matches the expected discovery rate (EDR) due to the one-sided selection for marginally significant results (instead of statistically significant results). The estimated number of missing studies, 54, suggests that a considerable number of non-significant or negative results may be absent from the published literature. The false discovery risk (FDR), which corresponds to the upper bound on the false positive rate, is not extremely inflated due to the possible small positive and negative effects under the moderate heterogeneity. ### Example 3: ChatGPT and Learning Performance Our second example examines the effectiveness of ChatGPT-based interventions on students' learning performance [@wang2025effect]. This meta-analysis includes 42 randomized controlled trials comparing experimental groups (using ChatGPT for tutoring or learning support) with control groups (without ChatGPT) on learning outcomes such as exam scores and final grades. #### Data Preparation and Model Fitting We follow the same procedure as in the previous example: ```{r} # Load the ChatGPT dataset data("Wang2025", package = "RoBMA") # Select learning performance studies Wang2025 <- Wang2025[Wang2025$Learning_effect == "Learning performance", ] head(Wang2025) ``` ```{r, eval = FALSE} # Fit models fit_RE_Wang2025 <- NoBMA(d = Wang2025$g, se = Wang2025$se, priors_effect_null = NULL, priors_heterogeneity_null = NULL, algorithm = "ss", sample = 10000, burnin = 5000, adapt = 5000, chains = 5, parallel = TRUE, seed = 1) fit_RoBMA_Wang2025 <- RoBMA(d = Wang2025$g, se = Wang2025$se, algorithm = "ss", sample = 10000, burnin = 5000, adapt = 5000, chains = 5, parallel = TRUE, seed = 1) # Create z-curve objects zcurve_RE_Wang2025 <- as_zcurve(fit_RE_Wang2025) zcurve_RoBMA_Wang2025 <- as_zcurve(fit_RoBMA_Wang2025) ``` #### Z-Curve Analysis ```{r, fig.cap="ChatGPT: Model Fit Assessment"} # Assess model fit hist(zcurve_RoBMA_Wang2025, from = -2, to = 8) lines(zcurve_RE_Wang2025, col = "black", lty = 2, lwd = 2, from = -2, to = 8) lines(zcurve_RoBMA_Wang2025, col = "blue", lty = 2, lwd = 2, from = -2, to = 8) legend("topright", legend = c("Random-Effects", "RoBMA"), col = c("black", "blue"), lty = 2, lwd = 2) ``` #### Results The z-curve plot for the ChatGPT data shows a different pattern than the extreme publication bias observed in the social comparison example. While we do not see strong selection at conventional significance thresholds, there is a moderate discontinuity at the transition to non-conforming results (z = 0), suggesting some degree of selection against negative findings. The random-effects model (black dashed line) provides a better fit to the data than in the previous example; however, RoBMA (blue dashed line) captures the discontinuity at zero slightly better. This visual pattern corresponds to moderate statistical evidence for publication bias, highlighting a case where both models might be considered, though the RoBMA model incorporates the uncertainty about the best model and provides a more complete account of the data patterns. #### Extrapolation to Pre-Publication Bias We can examine the extrapolation to assess the impact of publication bias: ```{r, fig.cap="ChatGPT: Extrapolation Analysis"} # Examine extrapolation plot(zcurve_RoBMA_Wang2025, from = -2, to = 8) ``` The extrapolated distribution (blue line) shows a more modest difference between the fitted and extrapolated distributions compared to the extreme bias example, reflecting the moderate degree of publication bias in this literature. #### Model Results Summary To quantify these visual patterns, we examine the model summaries: ```{r} summary(fit_RE_Wang2025) summary(fit_RoBMA_Wang2025) ``` The random-effects model yields an effect size estimate of g = 0.79 [0.57, 1.01], while the RoBMA model accounting for selection produces a more conservative g = 0.38 [-0.04, 0.94]. Importantly, the results show an extreme degree of between-study heterogeneity, tau = 0.75 [0.51, 1.17], that greatly complicates feasible implications and recommendations. This demonstrates the moderate nature of the publication bias, where the adjusted estimate is meaningfully smaller but not completely reduced. The Bayes factor for publication bias (BF_bias = 3.2) provides moderate evidence for publication bias, and the evidence for the effect becomes weak (BF_effect = 2). #### Z-Curve Summary Metrics This moderate publication bias pattern is reflected in the summary statistics, ```{r} # Extract z-curve summary metrics for ChatGPT data summary(zcurve_RoBMA_Wang2025) ``` which show a moderate-to-high EDR of 0.61 and around 14 missing estimates. ### Example 4: Framing Effects from Many Labs 2 Our final example analyzes registered replication reports of the classic framing effect on decision making [@tversky1981framing] conducted as part of the Many Labs 2 project [@klein2018many]. This dataset provides an ideal test case for z-curve diagnostics because the pre-registered nature of these studies does not allow for publication bias. The analysis includes 55 effect size estimates that examine how framing influences decision-making preferences. #### Data Analysis and Model Fitting ```{r} # Load the Many Labs 2 framing effect data data("ManyLabs16", package = "RoBMA") head(ManyLabs16) ``` ```{r, eval = FALSE} # Fit models and create z-curve objects fit_RE_ManyLabs16 <- NoBMA(d = ManyLabs16$y, se = ManyLabs16$se, priors_effect_null = NULL, priors_heterogeneity_null = NULL, algorithm = "ss", sample = 10000, burnin = 5000, adapt = 5000, chains = 5, parallel = TRUE, seed = 1) fit_RoBMA_ManyLabs16 <- RoBMA(d = ManyLabs16$y, se = ManyLabs16$se, algorithm = "ss", sample = 10000, burnin = 5000, adapt = 5000, chains = 5, parallel = TRUE, seed = 1) zcurve_RE_ManyLabs16 <- as_zcurve(fit_RE_ManyLabs16) zcurve_RoBMA_ManyLabs16 <- as_zcurve(fit_RoBMA_ManyLabs16) ``` #### Z-Curve Assessment ```{r, fig.cap="Framing Effects: Model Fit Assessment"} # Assess model fit - should show good agreement hist(zcurve_RoBMA_ManyLabs16) lines(zcurve_RE_ManyLabs16, col = "black", lty = 2, lwd = 2) lines(zcurve_RoBMA_ManyLabs16, col = "blue", lty = 2, lwd = 2) legend("topleft", legend = c("Random-Effects", "RoBMA"), col = c("black", "blue"), lty = 2, lwd = 2) ``` #### Results The z-curve plot for the Many Labs 2 framing effects demonstrates what we expect to see in the absence of publication bias. The observed distribution of z-statistics (gray bars) appears smooth without sharp discontinuities at significance thresholds or at zero. Both the random-effects model (black dashed line) and RoBMA (blue dashed line) provide essentially identical fits to the data, with their posterior predictive distributions overlapping almost perfectly. This close agreement between models indicates that either approach would be appropriate for these data. The absence of publication bias is further confirmed by the statistical evidence: RoBMA provides moderate evidence against publication bias, demonstrating how the method appropriately penalizes unnecessary model complexity when simpler models explain the data equally well. #### Extrapolation to Pre-Publication Bias We can examine whether there would be any difference in the absence of publication bias: ```{r, fig.cap="Framing Effects: Extrapolation Analysis"} # Extrapolation should show minimal change plot(zcurve_RoBMA_ManyLabs16) ``` The extrapolated distribution (blue line) shows virtually no difference between the fitted and extrapolated distributions, confirming that publication bias has minimal impact in this well-designed replication project. This example illustrates the ideal scenario where traditional meta-analytic approaches are fully justified. #### Model Results Summary The quantitative results confirm the visual impression: ```{r, eval = FALSE} summary(fit_RE_ManyLabs16) summary(fit_RoBMA_ManyLabs16) ``` Both models yield virtually identical effect size estimate of d = 0.43 [0.35, 0.49]. The Bayes factor for publication bias (BF_bias = 0.21) provides moderate evidence against the presence of publication bias, appropriately penalizing the more complex model when it offers no advantage. This demonstrates the method's ability to distinguish between necessary and unnecessary model complexity. #### Z-Curve Summary Metrics The absence of publication bias is reflected in the publication bias assessment statistics — a moderate EDR matching the ODR and no missing studies. ```{r} # Extract z-curve summary metrics for Many Labs 2 data summary(zcurve_RoBMA_ManyLabs16) ``` ## Conclusions Z-curve plots are an intuitive diagnostic tool for assessing publication bias and model fit [@bartos2025zcurve]. By visualizing the distribution of test statistics and comparing observed patterns with model-implied expectations, researchers can make more informed decisions about their analytic approach using the RoBMA package [@RoBMA]. Z-curve diagnostics are particularly informative when applied to moderate to large meta-analyses (typically >20-30 studies), where histogram patterns become interpretable. Publication bias and QRP might produce similar patterns of results. The z-curve diagnostics cannot distinguish between them; however, it might help in assessing whether the model approximates the observed data well. They are especially useful for model comparison scenarios when they provide a visual supplement to statistical tests like inclusion Bayes factors. The following points are important for interpreting z-curve diagnostics: - Sharp drops in the observed distribution at z = 0, z = ±1.64, or z = ±1.96 suggest publication bias. - Models whose posterior predictive distributions closely match the observed pattern should be preferred. - Large differences between fitted and extrapolated distributions indicate substantial publication bias. - Visual assessment should complement formal statistical tests. ## References