## ----setup, set.seed = 18, include=FALSE---------------------------------
knitr::opts_chunk$set(echo = TRUE)
set.seed(18)
library(corrcoverage)

## ------------------------------------------------------------------------
set.seed(18)
library(corrcoverage)
# library(simGWAS)

data <- system.file("extdata", "", package="corrcoverage")

#  Simulate reference haplotypes
nsnps <- 200
nhaps <- 1000
lag <- 30  # genotypes are correlated between neighbouring variants
maf <- runif(nsnps + lag, 0.05, 0.5)  # common SNPs
laghaps <- do.call("cbind", lapply(maf, function(f) rbinom(nhaps, 1, f)))
haps <- laghaps[, 1:nsnps]
for (j in 1:lag) haps <- haps + laghaps[, (1:nsnps) + j]
haps <- round(haps/matrix(apply(haps, 2, max), nhaps, nsnps, byrow = TRUE))
snps <- colnames(haps) <- paste0("s", 1:nsnps)
freq <- as.data.frame(haps + 1)
freq$Probability <- 1/nrow(freq)
sum(freq$Probability)
MAF <- colMeans(freq[, snps] - 1)  # minor allele frequencies
CV <- sample(snps[which(colMeans(haps) > 0.1)], 1)
iCV <- sub("s", "", CV)  # index of cv
LD <- cor2(haps) # correlation between SNPs

## ------------------------------------------------------------------------
OR <- 1.1 # odds ratios
N0 <- 10000 # number of controls
N1 <- 10000 # number of cases
  
#z0 <- simulated_z_score(N0 = N0, # number of controls
#                        N1 = N1, # number of cases
#                        snps = snps, # column names in freq
#                        W = CV, # causal variants, subset of snps
#                        gamma.W = log(OR), # log odds ratios
#                        freq = freq) # reference haplotypes

z0 <- readRDS(paste0(data,"/z0_2.RDS"))

## ------------------------------------------------------------------------
varbeta <- Var.data.cc(f = MAF, N = N1+N0, s = N1/(N0+N1)) # variance of 
                                                       # estimated effect size

## ------------------------------------------------------------------------
postprobs <- ppfunc(z = z0, V = varbeta) 

## ------------------------------------------------------------------------
muhat <- est_mu(z0, MAF, N0, N1)
muhat

## ------------------------------------------------------------------------
thr = 0.9
corrcov <- corrected_cov(pp0 = postprobs, mu = muhat, V = varbeta, 
                         Sigma = LD, thr = thr, nrep = 1000)
cs <- credset(pp = postprobs, thr = thr)
data.frame(claimed.cov = cs$claimed.cov, corr.cov =  corrcov, nvar = cs$nvar)

## ------------------------------------------------------------------------
# z0.tmp <- simulated_z_score(N0 = N0, # number of controls
#                            N1 = N1, # number of cases
#                            snps = snps, # column names in freq
#                            W = CV, # causal variants, subset of snps
#                            gamma.W = log(OR), # log odds ratios
#                            freq = freq, # reference haplotypes
#                            nrep = 200) # increase nrep to increase accuracy of estimate

z0.tmp <- readRDS(paste0(data,"/z0.tmp_2.RDS"))

pps <- ppfunc.mat(zstar = z0.tmp, V = varbeta)  # find pps 
cs.cov <- apply(pps, 1, function(x) credset(x, CV = iCV, thr = thr)$cov)
true.cov.est <- mean(cs.cov)
data.frame(claimed.cov = cs$claimed.cov, corr.cov =  corrcov, 
           true.cov = true.cov.est, nvar = cs$nvar)

## ------------------------------------------------------------------------
res <- corrected_cs(z = z0, f = MAF, N0, N1, 
                    Sigma = LD, lower = 0.5, upper = 1, desired.cov = 0.9)
res

## ------------------------------------------------------------------------
new.cs.sims <- apply(pps, 1, function(x) credset(x, CV = iCV, thr = res$req.thr)$cov)
true.cov.est2 <- mean(new.cs.sims)

## ------------------------------------------------------------------------
df1 <- data.frame(claimed.cov = round(cs$claimed.cov, 3), corr.cov =  round(corrcov, 3), true.cov = round(true.cov.est, 3), nvar = cs$nvar)
print(df1, row.names = FALSE)

## ------------------------------------------------------------------------
df2 <- data.frame(claimed.cov = round(res$size, 3), corr.cov = round(res$corr.cov, 3), true.cov = round(true.cov.est2, 3), nvar = length(res$credset))
print(df2, row.names = FALSE)