## ----echo=FALSE--------------------------------------------------------------- knitr::opts_chunk$set( message = FALSE, warning = FALSE, error = FALSE, tidy = FALSE, cache = FALSE ) ## ----------------------------------------------------------------------------- set.seed(238923) ## for reproducibility N <- 1000 Theta <- rchisq(N, df = 10) ## ----------------------------------------------------------------------------- nSIM <- 1000 data <- sapply(seq_len(nSIM), function(x) rpois(n = N, lambda = Theta)) ## ----------------------------------------------------------------------------- library(deconvolveR) tau <- seq(1, 32) results <- apply(data, 2, function(x) deconv(tau = tau, X = x, ignoreZero = FALSE, c0 = 1)) ## ----------------------------------------------------------------------------- g <- sapply(results, function(x) x$stats[, "g"]) mean <- apply(g, 1, mean) SE.g <- sapply(results, function(x) x$stats[, "SE.g"]) sd <- apply(SE.g, 1, mean) Bias.g <- sapply(results, function(x) x$stats[, "Bias.g"]) bias <- apply(Bias.g, 1, mean) gTheta <- pchisq(tau, df = 10) - pchisq(c(0, tau[-length(tau)]), df = 10) gTheta <- gTheta / sum(gTheta) simData <- data.frame(theta = tau, gTheta = gTheta, Mean = mean, StdDev = sd, Bias = bias, CoefVar = sd / mean) table1 <- transform(simData, gTheta = 100 * gTheta, Mean = 100 * Mean, StdDev = 100 * StdDev, Bias = 100 * Bias) ## ----------------------------------------------------------------------------- knitr::kable(table1[c(5, 10, 15, 20, 25), ], row.names=FALSE) ## ----------------------------------------------------------------------------- library(ggplot2) library(cowplot) theme_set(theme_get() + theme(panel.grid.major = element_line(colour = "gray90", size = 0.2), panel.grid.minor = element_line(colour = "gray98", size = 0.5))) p1 <- ggplot(data = as.data.frame(results[[1]]$stats)) + geom_line(mapping = aes(x = theta, y = SE.g), color = "black", linetype = "solid") + geom_line(mapping = aes(x = simData$theta, y = simData$StdDev), color = "red", linetype = "dashed") + labs(x = expression(theta), y = "Std. Dev") p2 <- ggplot(data = as.data.frame(results[[1]]$stats)) + geom_line(mapping = aes(x = theta, y = Bias.g), color = "black", linetype = "solid") + geom_line(mapping = aes(x = simData$theta, y = simData$Bias), color = "red", linetype = "dashed") + labs(x = expression(theta), y = "Bias") plot_grid(plotlist = list(p1, p2), ncol = 2) ## ----------------------------------------------------------------------------- data(bardWordCount) str(bardWordCount) ## ----------------------------------------------------------------------------- lambda <- seq(-4, 4.5, .025) tau <- exp(lambda) ## ----------------------------------------------------------------------------- result <- deconv(tau = tau, y = bardWordCount, n = 100, c0=2) stats <- result$stats ## ----------------------------------------------------------------------------- ggplot() + geom_line(mapping = aes(x = lambda, y = stats[, "g"])) + labs(x = expression(log(theta)), y = expression(g(theta))) ## ----------------------------------------------------------------------------- print(result$S) ## ----------------------------------------------------------------------------- d <- data.frame(lambda = lambda, g = stats[, "g"], tg = stats[, "tg"], SE.g = stats[, "SE.g"]) indices <- seq(1, length(lambda), 5) ggplot(data = d) + geom_line(mapping = aes(x = lambda, y = g)) + geom_errorbar(data = d[indices, ], mapping = aes(x = lambda, ymin = g - SE.g, ymax = g + SE.g), width = .01, color = "blue") + labs(x = expression(log(theta)), y = expression(g(theta))) + ylim(0, 0.006) + geom_line(mapping = aes(x = lambda, y = tg), linetype = "dashed", color = "red") ## ---- fig.keep='all', fig.width=7.5, fig.height=10---------------------------- gPost <- sapply(seq_len(100), function(i) local({tg <- d$tg * result$P[i, ]; tg / sum(tg)})) plots <- lapply(c(1, 2, 4, 8), function(i) { ggplot() + geom_line(mapping = aes(x = tau, y = gPost[, i])) + labs(x = expression(theta), y = expression(g(theta)), title = sprintf("x = %d", i)) }) plots <- Map(f = function(p, xlim) p + xlim(0, xlim), plots, list(6, 8, 14, 20)) plot_grid(plotlist = plots, ncol = 2) ## ----------------------------------------------------------------------------- set.seed(1783) B <- 200 fHat <- as.numeric(result$P %*% d$g) fHat <- fHat / sum(fHat) yStar <- rmultinom(n = B, size = sum(bardWordCount), prob = fHat) gBoot <- apply(yStar, 2, function(y) deconv(tau = tau, y = y, n = 100, c0 = 2)$stats[, "g"]) seG <- apply(gBoot, 1, sd) ggplot(data = d) + geom_line(mapping = aes(x = lambda, y = SE.g, color = "Theoretical", linetype = "Theoretical")) + geom_line(mapping = aes(x = lambda, y = seG, color = "Bootstrap", linetype = "Bootstrap")) + scale_color_manual(name = "Legend", values = c("Bootstrap" = "black", "Theoretical" = "red")) + scale_linetype_manual(name = "Legend", values = c("Bootstrap" = "solid", "Theoretical" = "dashed")) + theme(legend.title = element_blank()) + labs(x = expression(log(theta)), y = expression(sigma(hat(g)))) ## ----------------------------------------------------------------------------- gHat <- stats[, "g"] Rfn <- function(t) { sum( gHat * (1 - exp(-tau * t)) / (exp(tau) - 1) ) } r <- sapply(0:10, Rfn) ggplot() + geom_line(mapping = aes(x = 0:10, y = r)) + labs(x = "time multiple t", y = expression(R(t))) ## ----------------------------------------------------------------------------- print(uniroot(f = function(x) Rfn(x) - 1, interval = c(2, 4))$root) ## ----------------------------------------------------------------------------- set.seed(129023) N <- 10000 pi0 <- .90 data <- local({ nullCase <- (runif(N) <= pi0) muAndZ <- t(sapply(nullCase, function(isNull) { if (isNull) { mu <- 0 c(mu, rnorm(1)) } else { mu <- rnorm(1, mean = -3) c(mu, rnorm(1, mean = mu)) } })) data.frame(nullCase = nullCase, mu = muAndZ[, 1], z = muAndZ[, 2]) }) ## ----------------------------------------------------------------------------- p1 <- ggplot(mapping = aes(x = data$z)) + geom_histogram(mapping = aes(y = ..count.. / sum(..count..) ), color = "brown", bins = 60, alpha = 0.5) + labs(x = "z", y = "Density") p2 <- ggplot(mapping = aes(x = data$mu)) + geom_histogram(mapping = aes(y = ..count.. / sum(..count..) ), color = "brown", bins = 60, alpha = 0.5) + labs(x = expression(theta), y = "Density") plot_grid(plotlist = list(p1, p2), ncol = 2) ## ----------------------------------------------------------------------------- tau <- seq(from = -6, to = 3, by = 0.25) atomIndex <- which(tau == 0) result <- deconv(tau = tau, X = data$z, deltaAt = 0, family = "Normal", pDegree = 5) ## ----------------------------------------------------------------------------- knitr::kable(result$stats) ## ----------------------------------------------------------------------------- gData <- as.data.frame(result$stats[-atomIndex, c("theta", "g")]) gData$g <- gData$g / sum(gData$g) ggplot(data = gData) + geom_line(mapping = aes(x = theta, y = g)) + geom_line(mapping = aes(x = theta, y = dnorm(theta, mean = -3)), color = "red") + labs(x = expression(theta), y = expression(g(theta))) ## ----------------------------------------------------------------------------- p <- pnorm(data$z) orderP <- order(p) p <- p[orderP] ## ----------------------------------------------------------------------------- ## FCR q <- 0.05 R <- max(which(p <= seq_len(N) * q / N)) discIdx <- orderP[1:R] disc <- data[discIdx, ] cat("BY_q procedure discoveries", R, "cases,", sum(disc$nullCase), "actual nulls among them.\n") ## ----------------------------------------------------------------------------- alphaR <- 1 - R * q / N zAlpha <- qnorm(alphaR, lower.tail = FALSE) zMarker <- max(disc$z) xlim <- c(-7.6, 0.0) ylim <- c(-10, 0.0) BY.lo <- c(xlim[1] - zAlpha, xlim[2] - zAlpha) BY.up <- c(xlim[1] + zAlpha, xlim[2] + zAlpha) Bayes.lo <- c(0.5 * (xlim[1] - 3) - 1.96 / sqrt(2), 0.5 * (xlim[2] - 3) - 1.96 / sqrt(2)) Bayes.up <- c(0.5 * (xlim[1] - 3) + 1.96 / sqrt(2), 0.5 * (xlim[2] - 3) + 1.96 / sqrt(2)) ## ----------------------------------------------------------------------------- d <- data[order(data$mu), ] muVals <- unique(d$mu) s <- as.data.frame(result$stats) indices <- findInterval(muVals, s$theta) + 1 gMu <- s$g[indices] st <- seq(min(data$z), -2.0, length.out = 40) gMuPhi <- sapply(st, function(z) gMu * dnorm(z - muVals)) g2 <- apply(gMuPhi, 2, function(x) cumsum(x)/sum(x)) pct <- apply(g2, 2, function(dist) approx(y = muVals, x = dist, xout = c(0.025, 0.975))) qVals <- sapply(pct, function(item) item$y) ## ----------------------------------------------------------------------------- ggplot() + geom_line(mapping = aes(x = xlim, y = BY.lo), color = "blue") + geom_line(mapping = aes(x = xlim, y = BY.up), color = "blue") + geom_line(mapping = aes(x = xlim, y = Bayes.lo), color = "magenta", linetype = "dashed") + geom_line(mapping = aes(x = xlim, y = Bayes.up), color = "magenta", linetype = "dashed") + geom_point(mapping = aes(x = disc$z, y = disc$mu), color = "red") + geom_point(mapping = aes(x = disc$z[disc$nullCase], y = disc$mu[disc$nullCase]), color = "orange") + geom_line(mapping = aes(x = rep(zMarker, 2), y = c(-10, 1))) + geom_line(mapping = aes(x = st, y = qVals[1, ]), color = "brown") + geom_line(mapping = aes(x = st, y = qVals[2, ]), color = "brown") + labs(x = "Observed z", y = expression(mu)) + annotate("text", x = -1, y = -4.25, label = "BY.lo") + annotate("text", x = -1, y = 1.25, label = "BY.up") + annotate("text", x = -7.5, y = -6.1, label = "Bayes.lo") + annotate("text", x = -7.5, y = -3.4, label = "Bayes.up") + annotate("text", x = -2.0, y = -1.75, label = "EB.lo") + annotate("text", x = -2.0, y = -3.9, label = "EB.up") + annotate("text", x = zMarker, y = 1.25, label = as.character(round(zMarker, 2))) ## ----------------------------------------------------------------------------- p1 <- ggplot(mapping = aes(x = disjointTheta)) + geom_histogram(mapping = aes(y = ..count.. / sum(..count..) ), color = "brown", bins = 60, alpha = 0.5) + labs(x = expression(theta), y = "Density") set.seed (2332) z <- rnorm(n = length(disjointTheta), mean = disjointTheta) p2 <- ggplot(mapping = aes(x = z)) + geom_histogram(mapping = aes(y = ..count.. / sum(..count..) ), color = "brown", bins = 60, alpha = 0.5) + labs(x = "z", y = "Density") plot_grid(plotlist = list(p1, p2), ncol = 2) ## ---- fig.keep='all', fig.width=7.5, fig.height=10---------------------------- tau <- seq(from = -4, to = 6, by = 0.2) plots1 <- lapply(2:8, function(p) { result <- deconv(tau = tau, X = z, family = "Normal", pDegree = p) g <- result$stats[, "g"] ggplot(mapping = aes(x = disjointTheta)) + geom_histogram(mapping = aes(y = ..count.. / sum(..count..)), color = "brown", bins = 60, alpha = 0.2) + geom_line(mapping = aes(x = tau, y = g), color = "blue") + labs(x = expression(theta), y = "Density", title = sprintf("DF = %d", p)) }) ## ---- fig.keep='all', fig.width=7.5, fig.height=10---------------------------- plots2 <- lapply(c(0.5, 1, 2, 4, 8, 16, 32), function(c0) { result <- deconv(tau = tau, X = z, family = "Normal", pDegree = 6, c0 = c0) g <- result$stats[, "g"] ggplot(mapping = aes(x = disjointTheta)) + geom_histogram(mapping = aes(y = ..count.. / sum(..count..)), color = "brown", bins = 60, alpha = 0.2) + geom_line(mapping = aes(x = tau, y = g), color = "blue") + labs(x = expression(theta), y = "Density", title = sprintf("C0 = %.1f", c0)) }) plots <- mapply(function(x, y) list(x, y), plots1, plots2) plot_grid(plotlist = plots, ncol = 2) ## ----------------------------------------------------------------------------- c0_values <- c(.5, 1, 2, 4, 8, 16, 32) stable <- data.frame( c0 = c0_values, `DF 5` = sapply(c0_values, function(c0) deconv(tau = tau, X = z, family = "Normal", pDegree = 5, c0 = c0)$S), `DF 6` = sapply(c0_values, function(c0) deconv(tau = tau, X = z, family = "Normal", pDegree = 6, c0 = c0)$S), `DF 7` = sapply(c0_values, function(c0) deconv(tau = tau, X = z, family = "Normal", pDegree = 7, c0 = c0)$S) ) knitr::kable(stable) ## ----------------------------------------------------------------------------- tau <- seq(from = 0.01, to = 0.99, by = 0.01) result <- deconv(tau = tau, X = surg, family = "Binomial", c0 = 1) d <- data.frame(result$stats) indices <- seq(5, 99, 5) errorX <- tau[indices] ggplot() + geom_line(data = d, mapping = aes(x = tau, y = g)) + geom_errorbar(data = d[indices, ], mapping = aes(x = theta, ymin = g - SE.g, ymax = g + SE.g), width = .01, color = "blue") + labs(x = expression(theta), y = expression(paste(g(theta), " +/- SE"))) ## ----------------------------------------------------------------------------- knitr::kable(d[indices, ], row.names = FALSE) ## ----------------------------------------------------------------------------- cat(sprintf("Mass below .20 = %0.2f\n", sum(d[1:20, "g"]))) cat(sprintf("Mass above .80 = %0.2f\n", sum(d[80:99, "g"]))) ## ----------------------------------------------------------------------------- theta <- result$stats[, 'theta'] gTheta <- result$stats[, 'g'] f_alpha <- function(n_k, x_k) { ## .01 is the delta_theta in the Riemann sum sum(dbinom(x = x_k, size = n_k, prob = theta) * gTheta) * .01 } g_theta_hat <- function(n_k, x_k) { gTheta * dbinom(x = x_k, size = n_k, prob = theta) / f_alpha(n_k, x_k) } ## ----------------------------------------------------------------------------- g1 <- g_theta_hat(x_k = 7, n_k = 32) g2 <- g_theta_hat(x_k = 3, n_k = 6) g3 <- g_theta_hat(x_k = 17, n_k = 18) ggplot() + geom_line(mapping = aes(x = theta, y = g1), col = "magenta") + ylim(0, 10) + geom_line(mapping = aes(x = theta, y = g2), col = "red") + geom_line(mapping = aes(x = theta, y = g3), col = "blue") + labs(x = expression(theta), y = expression(g(paste(theta, "|(x, n)")))) + annotate("text", x = 0.15, y = 4.25, label = "x=7, n=32") + annotate("text", x = 0.425, y = 4.25, label = "x=3, n=6") + annotate("text", x = 0.85, y = 7.5, label = "x=17, n=18") ## ----------------------------------------------------------------------------- cat(sprintf("Empirical Bayes Estimate: %f\n", 0.01 * sum(theta * g2))) ## ----------------------------------------------------------------------------- set.seed(32776) B <- 1000 gHat <- d$g N <- nrow(surg) genBootSample <- function() { thetaStar <- sample(tau, size = N, replace = TRUE, prob = gHat) sStar <- sapply(seq_len(N), function(i) rbinom(n = 1 , size = surg$n[i], prob = thetaStar[i])) data.frame(n = surg$n, s = sStar) } bootResults <- lapply(seq_len(B), function(k) { surgBoot <- genBootSample() mat <- deconv(tau = tau, X = surgBoot, family = "Binomial", c0 = 1)$stats mat[, c("g", "Bias.g")] }) gBoot <- sapply(bootResults, function(x) x[, 1]) BiasBoot <- sapply(bootResults, function(x) x[, 2]) indices <- c(seq(1, 99, 11), 99) table2 <- data.frame(theta = tau, gTheta = round(gHat * 100, 3), sdFormula = round(d$SE.g * 100, 3), sdSimul = round(apply(gBoot, 1, sd) * 100, 3), BiasFormula = round(d$Bias.g * 100, 3), BiasSimul = round(apply(BiasBoot, 1, mean) * 100, 3))[ indices, ] ## ----------------------------------------------------------------------------- knitr::kable(table2, row.names = FALSE)