## ----setup, set.seed(2), include=FALSE----------------------------------- knitr::opts_chunk$set(echo = TRUE) set.seed(2) ## ------------------------------------------------------------------------ set.seed(2) library(corrcoverage) ## ----message = FALSE----------------------------------------------------- # library(simGWAS) data <- system.file("extdata", "", package="corrcoverage") ## ------------------------------------------------------------------------ # simulate reference haplotypes nsnps <- 200 nhaps <- 1000 lag <- 6 # genotypes are correlated between neighbouring variants maf <- runif(nsnps+lag,0.05,0.5) # common SNPs laghaps <- do.call("cbind", lapply(maf, function(f) rbinom(nhaps,1,f))) haps <- laghaps[,1:nsnps] for(j in 1:lag) haps <- haps + laghaps[,(1:nsnps)+j] haps <- round(haps/matrix(apply(haps,2,max),nhaps,nsnps,byrow=TRUE)) snps <- colnames(haps) <- paste0("s",1:nsnps) freq <- as.data.frame(haps+1) freq$Probability <- 1/nrow(freq) sum(freq$Probability) MAF <- colMeans(freq[,snps]-1) # SNP correlation matrix LD <- cor2(haps) ## ------------------------------------------------------------------------ CV <- sample(snps[which(colMeans(haps)>0.1)],1) iCV <- sub("s", "", CV) # index of cv OR <- 1.1 ## ------------------------------------------------------------------------ # z0 <- simulated_z_score(N0=5000, # number of controls # N1=5000, # number of cases # snps=snps, # column names in freq of SNPs # W=CV, # causal variants, subset of snps # gamma.W=log(OR), # log odds ratios # freq=freq # reference haplotypes # ) z0 <- readRDS(paste0(data,"/z0.RDS")) length(z0) z0[1:5] ## ----warning=FALSE, error=FALSE, fig.width=4----------------------------- N0 <- 5000 # number of controls N1 <- 5000 # number of cases varbeta <- Var.data.cc(f = MAF, N = N1+N0, s = N1/(N0+N1)) # variance of # estimated effect size postprobs <- ppfunc(z = z0, V = varbeta) plot(postprobs, xlab = "SNP index", ylab = "Posterior probability") abline(v = iCV, col = 2) ## ----warning=FALSE, error=FALSE------------------------------------------ thresh <- 0.9 credibleset <- credset(pp = postprobs, thr = thresh) credibleset ## ----warning=FALSE, error=FALSE------------------------------------------ # is the true CV contained within the credible set? thresh <- 0.9 credibleset <- credset(pp = postprobs, CV = iCV, thr = thresh) credibleset ## ------------------------------------------------------------------------ corrected_cov_estimate <- corrcov(z = z0, f = MAF, N0, N1, Sigma = LD, thr = thresh) ## ----echo = FALSE-------------------------------------------------------- df <- data.frame("Corrected Coverage" = corrected_cov_estimate, "Claimed Coverage" = credibleset$claimed.cov, "Threshold" = thresh) print(df, row.names = FALSE) ## ------------------------------------------------------------------------ # z0.tmp <- simulated_z_score(N0=5000, # number of controls # N1=5000, # number of cases # snps=snps, # column names in freq # W=CV, # causal variants, subset of snps # gamma.W=log(OR), # log odds ratios # freq=freq, # reference haplotypes # nrep = 200 # 200 simulations #) z0.tmp <- readRDS(paste0(data,"/z0.tmp.RDS")) pps <- ppfunc.mat(zstar = z0.tmp, V = varbeta) # find pps cs <- apply(pps, 1, function(x) credset(x, CV = iCV, thr = thresh)$cov) true.cov.est <- mean(cs) true.cov.est ## ----echo = FALSE-------------------------------------------------------- df.new <- data.frame("Empirical Coverage" = true.cov.est, "Corrected Coverage" = corrected_cov_estimate, "Claimed Coverage" = credibleset$claimed.cov, "Threshold" = thresh)[1,] print(df.new, row.names = FALSE)