## ----setup, include=FALSE----------------------------------------------------- library(EpiPvr) knitr::opts_chunk$set(echo = TRUE) knitr::opts_chunk$set( comment = "#>", tidy = FALSE, width = 70 ) set.seed(123) # ensure reproducibility for CRAN checks ## ----include=FALSE------------------------------------------------------------ print("Now running: Chunk 1 setups...") flush.console() ## ----presets, cache=FALSE----------------------------------------------------- virus_type='SPT' lsEst_in=40; # upper bound on per insect survival in the experiment in days ## ----dataset, cache=FALSE----------------------------------------------------- data("ap_data_sim_SPT", package = "EpiPvr") print(ap_data_sim_SPT) ## ----sample, cache=FALSE, message=FALSE, warning=FALSE------------------------ if (identical(Sys.getenv("NOT_CRAN"), "true")) { EVSPT_sim=estimate_virus_parameters_SPT(ap_data_sim_SPT,lsEst_in,D_numPtsPdin=1, mcmcOptions=c(4500,6000),numChainsIn=4,mc.parallel=0) } else { # Load precomputed object for CRAN EVSPT_sim <- readRDS(system.file("extdata", "EVSPT_sim.rds", package = "EpiPvr")) } target=EVSPT_sim$array1 # fixing the first output array, contains the sample Markov chains ## ----print, cache=FALSE------------------------------------------------------- # viewing output content print(dim(EVSPT_sim$array0)) print(head(EVSPT_sim$array0)) print(dim(EVSPT_sim$array1)) print(head(EVSPT_sim$array1)) print(dim(EVSPT_sim$array2)) print(head(EVSPT_sim$array2)) print(length(EVSPT_sim$array3)) print(head(EVSPT_sim$array3)) print(length(EVSPT_sim$array4)) print(head(EVSPT_sim$array4)) print(length(EVSPT_sim$array5)) print(head(EVSPT_sim$array5)) print(length(EVSPT_sim$converge_results)) print(head(EVSPT_sim$converge_results)) runs=dim(target)[1] nChains=dim(target)[2] ## ----SPTprintw, cache=FALSE--------------------------------------------------- if (length(warnings()) > 0) { print("Warnings occurred during fitting; check convergence diagnostics.") } ## ----plotSPT, fig.width=5, fig.height=5, echo=FALSE--------------------------- #bayesplot::mcmc_pairs(EVSPT_sim$array0,diag_fun = "dens") ## ----plotSPT2, fig.width=5, fig.height=5, echo=FALSE-------------------------- #bayesplot::mcmc_dens(EVSPT_sim$array1,facet_args = list(ncol = 3))+ ggplot2::theme(aspect.ratio = 0.5) ## ----plotSPT3, echo=FALSE----------------------------------------------------- plot(EVSPT_sim$array3$lenA,EVSPT_sim$array3$propA, xlab = "AAP duration, hours", ylab = "Prop. test plants SPT infected",ylim=c(0,1)) lines(EVSPT_sim$array3$lenA,EVSPT_sim$array3$simulLA) lines(EVSPT_sim$array3$lenA,EVSPT_sim$array3$simulUA) legend(EVSPT_sim$array3$lenA[1], 0.5, legend=c("experiment", "95% Cr.I. estimates"), pch = c(1, NA), lty = c(NA, 1), col = "black", cex=0.8, bty = "n") plot(EVSPT_sim$array4$lenI,EVSPT_sim$array4$propI, xlab = "IAP duration, hours", ylab = "Prop. test plants SPT #infected",ylim=c(0,1)) lines(EVSPT_sim$array4$lenI,EVSPT_sim$array4$simulLI) lines(EVSPT_sim$array4$lenI,EVSPT_sim$array4$simulUI) legend(EVSPT_sim$array4$lenI[1], 0.5, legend=c("experiment", "95% Cr.I. estimates"), pch = c(1, NA), lty = c(NA, 1), col = "black", cex=0.8, bty = "n") ## ----unpack, cache=FALSE------------------------------------------------------ # P EPIDEMIC inferences # accessing the chains from estimate_virus_parameters # VIRUS PARAMETERS # convert from per hours to per day al_fits=target[,,dimnames(target)$variable=="al[1]", drop = TRUE]*24 be_fits=target[,,dimnames(target)$variable=="be[1]", drop = TRUE]*24 mu_fits=target[,,dimnames(target)$variable=="mu[1]", drop = TRUE]*24 #confirm there are no negative values print(sum((al_fits<0))) print(sum((be_fits<0))) print(sum((mu_fits<0))) # LOCAL PARAMETERS # set the local parameters rates (per day) thet_external <- 0.45 # dispersal r_external <- 1/28 # roguing bf_external <- 1/14 # vector mortality h_external <- 1/365 # harvesting rate nu_pl_external <- 1/14 # plant latent progression rate (1/plant latent period) localParams=c(thet_external, r_external, h_external, bf_external, nu_pl_external) # generate p Epdemic results for given insect burden (per plant) numInsects=3 ## ----infer, cache=FALSE------------------------------------------------------- runsPrag=10 # initial guess for expected time in hours until next event - the function will use this # to work out efficient time step (optional, default = 0.1) interval_ind=1/10 # # initialising vectors for storing chain samples result_vec_byPl <- array(rep(0, runsPrag*nChains), dim=c(runsPrag,nChains)) result_vec_byIns <- result_vec_byPl if (identical(Sys.getenv("NOT_CRAN"), "true")) { # loop through MCMC chains to sample posterior distributions of virus parameters and # calculate epidemic probability for a given vector abundance for (ggg in 1:nChains){ print(paste0('nChains',ggg)) for (ppp in 1:runsPrag) { al_estim=al_fits[nrow(al_fits)+1-ppp,ggg] be_estim=be_fits[nrow(al_fits)+1-ppp,ggg] mu_estim=mu_fits[nrow(al_fits)+1-ppp,ggg] virusParams=c(al_estim,be_estim,mu_estim) # returns vector of epidemic probabilities for different inoculum states (see ms) numVars <- ((numInsects+1)*3)-1 # insects per plant number no. inoculum states qm_out=calculate_epidemic_probability(numInsects,interval_ind,localParams,virusParams) result_vec_byPl[ppp,ggg]=qm_out[1] # storing epi prob for state: inoculum=single infected plant result_vec_byIns[ppp,ggg]=qm_out[(numVars-(numInsects-1))] # storing for state: single infected insect } } } else { # Load precomputed object for CRAN result_vec_byPl <- readRDS(system.file("extdata", "result_vec_byPl.rds", package = "EpiPvr")) result_vec_byIns <- readRDS(system.file("extdata", "result_vec_byIns.rds", package = "EpiPvr")) } ## ----distributions, cache=FALSE----------------------------------------------- # data wrangling the set of epidemic probabilities from the chains data_table_Pl=array(rep(0,3), dim=c(3, 1)) data_table_Ins=array(rep(0,3), dim=c(3, 1)) target_A1A2=array(rep(0,2*runsPrag*nChains), dim=c(2,runsPrag*nChains)) # for a given level of insect use 'summarise_draws' to calculate credible intervals target_A1=array(rep(0,runsPrag*nChains), dim=c(runsPrag, nChains)) target_A2=target_A1 for (gg in 1:nChains){ target_A1[,gg]=t(result_vec_byPl[,gg]) target_A2[,gg]=t(result_vec_byIns[,gg]) } target_A1A2=rbind(as.vector(target_A1),as.vector(target_A2)) # first the first few elements of epidemic probability from plant inocula print(head(target_A1A2)) # and from insect inocula print(head(target_A1A2)) ## ----tables1, cache=FALSE----------------------------------------------------- # ensure at least ~20 data points for each of 4 chains (ie cant summarise distribution if v small) ts=posterior::summarize_draws(posterior::as_draws(t(target_A1A2))) data_table_Pl=c(ts[1,]$median,ts[1,]$q5,ts[1,]$q95) data_table_Ins=c(ts[2,]$median,ts[2,]$q5,ts[2,]$q95) ## ----tables2, cache=FALSE----------------------------------------------------- names(data_table_Pl)=c('50','5','95') print(data_table_Pl) names(data_table_Ins)=c('50','5','95') print(data_table_Ins) ## ----PTpreandfit, cache=FALSE------------------------------------------------- virus_type='PT' data("ap_data_sim_PT", package = "EpiPvr") print(ap_data_sim_PT) ## ----PTsample, cache=TRUE----------------------------------------------------- if (identical(Sys.getenv("NOT_CRAN"), "true")) { EVPT_sim=estimate_virus_parameters_PT(ap_data_sim_PT,lsEst_in,D_numPtsPdin=1,mcmcOptions=c(1000,2000),numChainsIn=4,mc.parallel=0) } else { # Load precomputed object for CRAN EVPT_sim <- readRDS(system.file("extdata", "EVPT_sim.rds", package = "EpiPvr")) } target=EVPT_sim$array1 runs=dim(target)[1] nChains=dim(target)[2] ## ----PTprint, cache=FALSE----------------------------------------------------- print(dim(EVPT_sim$array0)) print(head(EVPT_sim$array0)) print(dim(EVPT_sim$array1)) print(head(EVPT_sim$array1)) print(dim(EVPT_sim$array2)) print(head(EVPT_sim$array2)) print(length(EVPT_sim$array3)) print(head(EVPT_sim$array3)) print(length(EVPT_sim$array4)) print(head(EVPT_sim$array4)) print(length(EVPT_sim$array5)) print(head(EVPT_sim$array5)) print(length(EVPT_sim$array6)) print(head(EVPT_sim$array6)) print(length(EVPT_sim$converge_results)) print(head(EVPT_sim$converge_results)) ## ----PTprintw, cache=FALSE---------------------------------------------------- if (length(warnings()) > 0) { print("Warnings occurred during fitting; check convergence diagnostics.") } ## ----PTplot, fig.width=5, fig.height=5, echo=FALSE---------------------------- # bayesplot::mcmc_pairs(EVPT_sim$array0,diag_fun = "dens") ## ----PTplot1, fig.width=5, fig.height=5, echo=FALSE--------------------------- #bayesplot::mcmc_dens(EVPT_sim$array1,facet_args = list(ncol = 4))+ ggplot2::theme(aspect.ratio = 0.5) ## ----PTplot2, echo=FALSE------------------------------------------------------ plot(EVPT_sim$array3$lenA,EVPT_sim$array3$propA, xlab = "AAP duration, hours", ylab = "Prop. test plants PT infected",ylim=c(0,1)) lines(EVPT_sim$array3$lenA,EVPT_sim$array3$simulLA) lines(EVPT_sim$array3$lenA,EVPT_sim$array3$simulUA) legend(EVPT_sim$array3$lenA[1], 0.5, legend=c("experiment", "95% Cr.I. estimates"), pch = c(1, NA), lty = c(NA, 1), col = "black", cex=0.8, bty = "n") plot(EVPT_sim$array4$lenL,EVPT_sim$array4$propL, xlab = "LAP duration, hours", ylab = "Prop. test plants PT #infected",ylim=c(0,1)) lines(EVPT_sim$array4$lenL,EVPT_sim$array4$simulLL) lines(EVPT_sim$array4$lenL,EVPT_sim$array4$simulUL) legend(EVPT_sim$array4$lenL[1], 0.5, legend=c("experiment", "95% Cr.I. estimates"), pch = c(1, NA), lty = c(NA, 1), col = "black", cex=0.8, bty = "n") plot(EVPT_sim$array5$lenI,EVPT_sim$array5$propI, xlab = "IAP duration, hours", ylab = "Prop. test plants PT #infected",ylim=c(0,1)) lines(EVPT_sim$array5$lenI,EVPT_sim$array5$simulLI) lines(EVPT_sim$array5$lenI,EVPT_sim$array5$simulUI) legend(EVPT_sim$array5$lenI[1], 0.5, legend=c("experiment", "95% Cr.I. estimates"), pch = c(1, NA), lty = c(NA, 1), col = "black", cex=0.8, bty = "n") ## ----PTinfer, cache=FALSE----------------------------------------------------- # P EPIDEMIC inferences # accessing the chains from estimate_virus_parameters # VIRUS PARAMETERS # convert from per min to per day al_fits=target[,,dimnames(target)$variable=="al[1]", drop = TRUE]*24 be_fits=target[,,dimnames(target)$variable=="be[1]", drop = TRUE]*24 mu_fits=target[,,dimnames(target)$variable=="mu[1]", drop = TRUE]*24 print(sum((al_fits<0))) print(sum((be_fits<0))) print(sum((mu_fits<0))) # LOCAL PARAMETERS # set the local parameters (per day) thet_external <- 0.45 # dispersal r_external <- 1/28 # roguing bf_external <- 1/14 # vector mortality h_external <- 1/365 # harvesting rate nu_pl_external <- 1/14 # plant latent progression rate (1/plant latent period) localParams=c(thet_external, r_external, h_external, bf_external, nu_pl_external) # generate p Epdemic results for various insect burdens (per plant) interval_ind=(1/10) # this is an optional start setting relating to efficiency of time-step result_vec_byPl_PT <- array(rep(0, runsPrag*nChains), dim=c(runsPrag,nChains)) result_vec_byIns_PT <- result_vec_byPl if (identical(Sys.getenv("NOT_CRAN"), "true")) { # loop through MCMC chains to sample posterior distributions of virus parameters # and calc epi prob for given vector abundance for (ggg in 1:nChains){ print(paste0('nChains',ggg)) for (ppp in 1:runsPrag) { al_estim=al_fits[nrow(al_fits)+1-ppp,ggg] be_estim=be_fits[nrow(al_fits)+1-ppp,ggg] mu_estim=mu_fits[nrow(al_fits)+1-ppp,ggg] virusParams=c(al_estim,be_estim,mu_estim) # returns vector of epidemic probabilities for different inoculum states (see ms) numVars <- ((numInsects+1)*3)-1 # insects per plant number no. inoculum states qm_out=calculate_epidemic_probability(numInsects,interval_ind,localParams,virusParams) result_vec_byPl_PT[ppp,ggg]=qm_out[1] # storing epi prob for state: inoculum=single infected plant result_vec_byIns_PT[ppp,ggg]=qm_out[(numVars-(numInsects-1))] # storingfor state: single infected insect } } } else { # Load precomputed object for CRAN result_vec_byPl_PT <- readRDS(system.file("extdata", "result_vec_byPl_PT.rds", package = "EpiPvr")) result_vec_byIns_PT <- readRDS(system.file("extdata", "result_vec_byIns_PT.rds", package = "EpiPvr")) } ## ----wrangleandout, cache=FALSE----------------------------------------------- # now run the epidemic probability calculator # runtime for a single parameter set is fast # runtime for a MCMC chains (potentially very many parameter sets) may take substantial time # data wrangling the set of epidemic probabilities from the chains data_table_Pl=array(rep(0,3), dim=c(3, 1)) data_table_Ins=array(rep(0,3), dim=c(3, 1)) target_A1A2=array(rep(0,2*runsPrag*nChains), dim=c(2,runsPrag*nChains)) target_A1=array(rep(0,runsPrag*nChains), dim=c(runsPrag, nChains)) target_A2=target_A1 for (gg in 1:nChains){ target_A1[,gg]=t(result_vec_byPl_PT[,gg]) target_A2[,gg]=t(result_vec_byIns_PT[,gg]) } target_A1A2=rbind(as.vector(target_A1),as.vector(target_A2)) # ensure at least ~20 data points for each of 4 chains ts=posterior::summarize_draws(posterior::as_draws(t(target_A1A2))) data_table_Pl=c(ts[1,]$median,ts[1,]$q5,ts[1,]$q95) data_table_Ins=c(ts[2,]$median,ts[2,]$q5,ts[2,]$q95) print(head(target_A1A2[1,])) print(head(target_A1A2[2,])) # nb default summarize_draws 90% interval - please adjust for 95% names(data_table_Pl)=c('50','5','95') print(data_table_Pl) names(data_table_Ins)=c('50','5','95') print(data_table_Ins)