## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) if (!requireNamespace("mgcv", quietly = TRUE)) { stop("Package 'mgcv' is required to run this vignette. Please install it with install.packages('mgcv').") } ## ----setup-------------------------------------------------------------------- library(RegCalReliab) ## ----------------------------------------------------------------------------- library(mgcv) set.seed(123) # Helper for measurement error add_err = function(v, sd = sqrt(0.4)) v + rnorm(length(v), 0, sd) # True coefficient (on log-odds scale) and OR beta = log(1.5) OR_true = 1.5 ## ----------------------------------------------------------------------------- simulate_once = function() { # ---- True covariates ---- x = mgcv::rmvn(3000, c(0,0), matrix(c(1,0.3,0.3,1), 2)) # Error-free covariates (W1 = continuous, W2 = binary) W1_main = rnorm(1500) W2_main = rbinom(1500, 1, 0.5) W1_rep = rnorm(1500) W2_rep = rbinom(1500, 1, 0.5) # ---- Main study error-prone Z ---- z.main = x[1:1500, 1:2] + matrix(rnorm(1500*2, 0, sqrt(0.4)), 1500, 2) colnames(z.main) = c("z1","z2") # ---- External replicates for Z ---- z1_rep = rbind( cbind(add_err(x[1501:2000, 1]), add_err(x[1501:2000, 1]), NA, NA), cbind(add_err(x[2001:2400, 1]), add_err(x[2001:2400, 1]), add_err(x[2001:2400, 1]), NA), cbind(add_err(x[2401:3000, 1]), add_err(x[2401:3000, 1]), add_err(x[2401:3000, 1]), add_err(x[2401:3000, 1])) ) colnames(z1_rep) = paste0("z1_", 1:4) z2_rep = rbind( cbind(add_err(x[1501:2000, 2]), add_err(x[1501:2000, 2]), NA, NA), cbind(add_err(x[2001:2400, 2]), add_err(x[2001:2400, 2]), add_err(x[2001:2400, 2]), NA), cbind(add_err(x[2401:3000, 2]), add_err(x[2401:3000, 2]), add_err(x[2401:3000, 2]), add_err(x[2401:3000, 2])) ) colnames(z2_rep) = paste0("z2_", 1:4) # ---- Outcome ---- p = plogis(-2.3 + beta*rowSums(x[1:1500, ]) + 0.5*W1_main - 0.3*W2_main) Y = rbinom(1500, 1, p) main_data = data.frame( Y = Y, z1 = z.main[, "z1"], z2 = z.main[, "z2"], W1 = W1_main, W2 = W2_main ) rep_data = data.frame(z1_rep, z2_rep, W1 = W1_rep, W2 = W2_rep, check.names = FALSE) # ---- Regression Calibration ---- res = RC_ExReliab( formula = Y ~ z1(z1_1, z1_2, z1_3, z1_4) + z2(z2_1, z2_2, z2_3, z2_4) + W1 + W2, main_data = main_data, rep_data = rep_data, link = "logistic" ) return(res) } ## ----------------------------------------------------------------------------- B = 100 results_list = replicate(B, simulate_once(), simplify = FALSE) # Extract naive + corrected tables naive_tabs = lapply(results_list, function(x) x$uncorrected) corrected_tabs = lapply(results_list, function(x) x$corrected) ## ----------------------------------------------------------------------------- avg_naive = Reduce("+", naive_tabs) / B avg_corrected = Reduce("+", corrected_tabs) / B cat("\nAverage Naive Logistic Estimates (B = ", B, "):\n", sep = "") print(round(avg_naive, 5)) cat("\nAverage Corrected Logistic Estimates (B = ", B, "):\n", sep = "") print(round(avg_corrected, 5)) ## ----------------------------------------------------------------------------- inside_ci = function(tab, i, truth = OR_true) { ci = tab[i , c("CI.low", "CI.high")] ci[1] <= truth && truth <= ci[2] } row_z1 = which(rownames(avg_naive) == "z1") row_z2 = which(rownames(avg_naive) == "z2") coverage = function(tab_list, row) { mean(sapply(tab_list, function(tab) inside_ci(tab, row))) * 100 } cov_z1_naive = coverage(naive_tabs, row_z1) cov_z1_corr = coverage(corrected_tabs, row_z1) cov_z2_naive = coverage(naive_tabs, row_z2) cov_z2_corr = coverage(corrected_tabs, row_z2) cat("\nCoverage of TRUE OR = 1.5 for error-prone exposure z1:\n") cat(sprintf(" • Naive : %5.1f %%\n", cov_z1_naive)) cat(sprintf(" • Regression Calibration: %5.1f %%\n", cov_z1_corr)) cat("\nCoverage of TRUE OR = 1.5 for error-prone exposure z2:\n") cat(sprintf(" • Naive : %5.1f %%\n", cov_z2_naive)) cat(sprintf(" • Regression Calibration: %5.1f %%\n", cov_z2_corr)) ## ----------------------------------------------------------------------------- library(mgcv) set.seed(123) # Helper for measurement error add_err = function(v, sd = sqrt(0.4)) v + rnorm(length(v), 0, sd) # True slope and OR beta = log(1.5) OR_true = 1.5 ## ----------------------------------------------------------------------------- simulate_once = function() { # ---- True covariates ---- x = mgcv::rmvn(3000, c(0,0,0), matrix(c(1,0.3,0.2, 0.3,1,0.5, 0.2,0.5,1), nrow = 3)) # Binary W2 depends on x1 w2 = sapply(x[,1], function(t) { if (t > median(x[,1])) rbinom(1,1,0.5) else rbinom(1,1,0.3) }) # Error-free covariates W = cbind(x[,3], w2) colnames(W) = c("W1", "W2") # ---- Replicate design ---- r = c(rep(1,1500), rep(2,500), rep(3,400), rep(4,600)) # Replicates for z1 z1 = rbind( cbind(add_err(x[1:1500, 1]), NA, NA, NA), cbind(add_err(x[1501:2000, 1]), add_err(x[1501:2000, 1]), NA, NA), cbind(add_err(x[2001:2400, 1]), add_err(x[2001:2400, 1]), add_err(x[2001:2400, 1]), NA), cbind(add_err(x[2401:3000, 1]), add_err(x[2401:3000, 1]), add_err(x[2401:3000, 1]), add_err(x[2401:3000, 1])) ) colnames(z1) = paste0("z1_",1:4) # Replicates for z2 z2 = rbind( cbind(add_err(x[1:1500, 2]), NA, NA, NA), cbind(add_err(x[1501:2000, 2]), add_err(x[1501:2000, 2]), NA, NA), cbind(add_err(x[2001:2400, 2]), add_err(x[2001:2400, 2]), add_err(x[2001:2400, 2]), NA), cbind(add_err(x[2401:3000, 2]), add_err(x[2401:3000, 2]), add_err(x[2401:3000, 2]), add_err(x[2401:3000, 2])) ) colnames(z2) = paste0("z2_",1:4) # ---- Outcome ---- p = plogis(-2.65 + beta*rowSums(x[,1:3]) + beta*w2) Y = rbinom(3000, 1, p) # ---- Main data with outcome, replicates, covariates ---- main_data = data.frame(Y, z1, z2, W) # ---- Regression calibration ---- res = RC_InReliab( formula = Y ~ myz1(z1_1, z1_2, z1_3, z1_4) + myz2(z2_1, z2_2, z2_3, z2_4) + W1 + W2, main_data = main_data, link = "logistic" ) return(res) } ## ----------------------------------------------------------------------------- B = 100 results_list = replicate(B, simulate_once(), simplify = FALSE) # Collect naïve + corrected tables naive_tabs = lapply(results_list, function(x) x$uncorrected) corrected_tabs = lapply(results_list, function(x) x$corrected) ## ----------------------------------------------------------------------------- avg_naive = Reduce("+", naive_tabs) / B avg_corrected = Reduce("+", corrected_tabs) / B cat("\nAverage Naive Logistic Estimates (B = ", B, "):\n", sep = "") print(round(avg_naive, 5)) cat("\nAverage Corrected Logistic Estimates (B = ", B, "):\n", sep = "") print(round(avg_corrected, 5)) ## ----------------------------------------------------------------------------- inside_ci = function(tab, i, truth = OR_true) { ci = tab[i , c("CI.low", "CI.high")] ci[1] <= truth && truth <= ci[2] } row_z1 = which(rownames(avg_naive) == "myz1") row_z2 = which(rownames(avg_naive) == "myz2") coverage = function(tab_list, row) { mean(sapply(tab_list, function(tab) inside_ci(tab, row))) * 100 } cov_z1_naive = coverage(naive_tabs, row_z1) cov_z1_corr = coverage(corrected_tabs, row_z1) cov_z2_naive = coverage(naive_tabs, row_z2) cov_z2_corr = coverage(corrected_tabs, row_z2) cat("\nCoverage of TRUE OR = 1.5 for error-prone exposure z1:\n") cat(sprintf(" • Naive : %5.1f %%\n", cov_z1_naive)) cat(sprintf(" • Regression Calibration: %5.1f %%\n", cov_z1_corr)) cat("\nCoverage of TRUE OR = 1.5 for error-prone exposure z2:\n") cat(sprintf(" • Naive : %5.1f %%\n", cov_z2_naive)) cat(sprintf(" • Regression Calibration: %5.1f %%\n", cov_z2_corr))