## ----options, include = FALSE------------------------------------------------- knitr::opts_chunk$set( out.width = '80%', dpi = 300, collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(poolABC) ## ----example, tidy=TRUE------------------------------------------------------- data.frame(chr = c("NC297", "NC297"), pos =c(3530, 5450), rc = c("A", "T"), allele_count = 2, allele_states = c("A/G", "T/A"), deletion_sum = 0, snp_type = "pop", major_alleles = c("AA", "TT"), minor_alleles = c("GG", "AN"), maa_1 = c("54/55", "51/54"), maa_2 = c("76/78", "96/96"), mia_1 = c("1/55", "3/54"), mia_2 = c("2/78", "0/96")) ## ----import data, eval=FALSE, include=TRUE------------------------------------ # # load multiple files and organize information by contig # files <- importContigs(path = "/home/myawesomedata", pops = c(1, 4, 7, 10)) ## ----select blocks, eval=FALSE, include=TRUE, tidy=TRUE----------------------- # # randomly select blocks of a given size from several contigs # blocks <- pickWindows(freqs, positions, range, rMajor, rMinor, coverage, window = 1000, nLoci = 100) ## ----observed sumstats, eval=FALSE, include=TRUE------------------------------ # # compute a set of observed summary statitstics # obs <- statsContig(randomWindows = blocks, nPops = 4, stat.names = stat.names) ## ----two populations, tidy=TRUE----------------------------------------------- # set the mean and variance of the coverage sMean <- c(84.34, 66.76); sVars <- c(1437.22, 912.43) # create a list containing the information of the pool sizes by population size <- rep(list(rep(10, 10)), 2) # run simulation for a two-populations model sims <- poolSim(model="2pops", nDip=200, nPops=2, nLoci=100, nSites=2000, mutrate=2e-8, size=size, mean=sMean, variance=sVars, minimum=15, maximum=180, min.minor=1, Nref=c(25000, 25000), ratio=c(0.1, 3), pool=c(5, 180), seq=c(0.0001, 0.001), split=c(0, 3), CW=c(1e-13, 1e-3), WC=c(1e-13, 1e-3), bT=c(0, 0.5)) ## ----four populations, tidy=TRUE---------------------------------------------- # set the mean and variance of the coverage sMean <- c(84.34, 66.76, 65.69, 68.83); sVars <- c(1437.22, 912.43, 848.02, 1028.23) # create a list containing the information of the pool sizes by population size <- rep(list(rep(5, 20)), 4) # run simulation for a four-populations model sims <- poolSim(model="Single", nDip=400, nPops=4, nLoci=100, nSites=2000, mutrate=2e-8, size=size, mean=sMean, variance=sVars, minimum=15, maximum=180, min.minor=2, Nref=c(25000, 25000), ratio=c(0.1, 3), pool=c(5, 180), seq=c(0.0001, 0.001), split=c(0, 3), CW=c(1e-13, 1e-3), WC=c(1e-13, 1e-3), CC=c(1e-13, 1e-3), WW=c(1e-13, 1e-3), ANC=c(1e-13, 1e-3), bT=c(0, 0.5), bCW=c(0, 0.5), bWC=c(0, 0.5)) ## ----multiple simulations, tidy=TRUE------------------------------------------ # set the mean and variance of the coverage sMean <- c(84.34, 66.76); sVars <- c(1437.22, 912.43) # create a list containing the information of the pool sizes by population size <- rep(list(rep(5, 20)), 2) # define how many simulations to run nSims <- 10 # run one batch of simulations sims <- t(replicate(n = nSims, unlist(poolSim(model="2pops", nDip=200, nPops=2, nLoci=100, nSites=1000, mutrate=2e-8, size=size, mean=sMean, variance=sVars, minimum=20, maximum=185, min.minor=2, Nref=c(25000, 25000), ratio=c(0.1, 3), pool=c(5, 180), seq=c(0.0001, 0.001), split=c(0, 3), CW=c(1e-13, 1e-3), WC=c(1e-13, 1e-3), bT=c(0, 0.5))))) ## ----load data---------------------------------------------------------------- # load the data included in the package data("sumstats"); data("params"); data("limits") ## ----ABC, eval=FALSE, include=TRUE, tidy=TRUE--------------------------------- # # parameter estimation with ÃBC function # myabc <- ABC(nPops = 2, ntrials = 10, freqs = freqs, positions = positions, range = range, rMajor = rMajor, rMinor = rMinor, coverage = coverage, window = 1000, nLoci = 100, limits = limits, params = params, sumstats = sumstats, tol = 0.01, method = "regression") ## ----global sumstat, eval=FALSE, include=TRUE, tidy=TRUE---------------------- # # load multiple files and organize information by contig # blocks <- pickWindows(freqs = freqs, positions = positions, range = range, rMajor = rMajor, rMinor = rMinor, coverage = coverage, window = 1000, nLoci = 800) # # compute a set of summary statistics from the observed data # global <- statsContig(randomWindows = blocks, nPops = 2, stat.names = stat.names) ## ----merge posteriors, eval=FALSE, message=FALSE, include=TRUE, tidy=TRUE----- # # merge posterior distributions # myabc <- mergepost(target = myabc$target, global = global, post = myabc$adjusted, wtreg = myabc$weights) ## ----plot param, eval=FALSE, message=FALSE, include=TRUE---------------------- # # plot the density estimation of a parameter # plot_weighted(prior = params, merged_posterior = myabc, limits = limits, index = 2) ## ----model, message=FALSE, tidy=TRUE------------------------------------------ # create a vecto of model indices index <- c(rep("model1", nrow(sumstats)/2), rep("model2", nrow(sumstats)/2)) # select a random simulation to act as target target <- sumstats[10, ] # perform model selection with ABC mysel <- modelSelect(target = target, index = index, sumstats = sumstats, tol = 0.1, method = "regression") # display the structure of the mysel object str(mysel) ## ----summary model------------------------------------------------------------ # check results of model selection msel <- summary_modelSelect(object = mysel) ## ----Bayes factors------------------------------------------------------------ # print results of model selection msel ## ----compute error, warning=FALSE, tidy=TRUE---------------------------------- # perform an Approximate Bayesian Computation simulation study mycv <- simulationABC(params = params, sumstats = sumstats, limits = limits, nval = 100, tol = 0.01, method = "regression") # display the structure of the mycv object str(mycv, max.level = 2) ## ----plot error, fig.height=5, fig.width=6, message=FALSE--------------------- # plot the prediction errors plot_errorABC(x = mycv, method = "reg", statistic = "median", index = 1) ## ----simulate model selection------------------------------------------------- # perform a leave-one-out cross validation for model selection modelSim <- sim_modelSel(index = index, sumstats = sumstats, nval = 100, tol = 0.1) # display the structure of the modelSim object str(modelSim) ## ----model selection error---------------------------------------------------- # compute the mean misclassification probabilities mselError <- error_modelSel(object = modelSim) ## ----barplot, fig.height=3, fig.width=5, message=FALSE------------------------ # display a barplot of model misclassification plot_msel(object = mselError)