## ----SETUP, include = FALSE-------------------------------------------------------- ## create directories for plots and cache dir.create("plots", showWarnings=FALSE) dir.create("monitoringCounts-cache", showWarnings=FALSE) ## load packages library('surveillance') library('gamlss') ## ----echo=FALSE-------------------------------------------------------------------- data("salmNewport") ## ----echo=FALSE-------------------------------------------------------------------- stopifnot( all.equal(observed(salmNewport), observed(as(as(salmNewport, "ts"), "sts"))) ) ## ----echo=FALSE-------------------------------------------------------------------- # This code is the one used for the Salmon et al. (2016) JSS article. # Using this code all examples from the article can be reproduced. # computeALL is FALSE to avoid the computationally intensive parts # of the code (simulations to find a threshold value for categoricalCUSUM, # INLA-driven BODA) but one can set it to TRUE to have it run. computeALL <- FALSE ## ----NewportPlot-simple, fig.keep = 'none'----------------------------------------- plot(salmNewport, type = observed ~ time, xaxis.tickFreq = list("%m" = atChange, "%G" = atChange), xaxis.labelFreq = list("%Y" = atMedian), xaxis.labelFormat = "%Y") ## ----unitPlot-simple, echo = FALSE, fig.keep = 'none'------------------------------ plot(salmNewport, units = 2:3) ## ----EARS, fig.keep='none'--------------------------------------------------------- in2011 <- which(isoWeekYear(epoch(salmNewport))$ISOYear == 2011) salmNewportGermany <- aggregate(salmNewport, by = "unit") control <- list(range = in2011, method = "C1", alpha = 0.05) surv <- earsC(salmNewportGermany, control = control) plot(surv) ## ----farHead----------------------------------------------------------------------- con.farrington <- list( range = in2011, noPeriods = 1, b = 4, w = 3, weightsThreshold = 1, pastWeeksNotIncluded = 3, pThresholdTrend = 0.05, thresholdMethod = "delta" ) con.noufaily <- list( range = in2011, noPeriods = 10, b = 4, w = 3, weightsThreshold = 2.58, pastWeeksNotIncluded = 26, pThresholdTrend = 1, thresholdMethod = "nbPlugin" ) ## ----echo=F------------------------------------------------------------------------ con.farrington$limit54 <- con.noufaily$limit54 <- c(0,50) # for the figure ## ---------------------------------------------------------------------------------- salm.farrington <- farringtonFlexible(salmNewportGermany, con.farrington) salm.noufaily <- farringtonFlexible(salmNewportGermany, con.noufaily) ## ----farPlot-simple, echo = FALSE, fig.keep = 'none'------------------------------- par(mfrow = c(1,2)) plot(salm.farrington) plot(salm.noufaily) ## ----campyDE-simple, fig.keep='none'----------------------------------------------- data("campyDE") cam.sts <- sts(epoch = campyDE$date, observed = campyDE$case, state = campyDE$state) plot(cam.sts, col = "mediumblue") lines(campyDE$hum * 50, col = "white", lwd = 2) axis(4, at = seq(0, 2500, by = 500), labels = seq(0, 50, by = 10)) ## ----boda-cache, echo = FALSE, results='hide'-------------------------------------- if (computeALL) { ##hoehle 2018-07-18: changed code to use NICELOOKINGboda, but that's iid. Reason: ##The option 'rw1' currently crashes INLA. library("INLA") rangeBoda <- which(epoch(cam.sts) >= as.Date("2007-01-01")) control.boda <- list(range = rangeBoda, X = NULL, trend = TRUE, season = TRUE, prior = "iid", alpha = 0.025, mc.munu = 10000, mc.y = 1000, samplingMethod = "marginals") boda <- boda(cam.sts, control = control.boda) save(list = c("boda", "control.boda", "rangeBoda"), file = "monitoringCounts-cache/boda.RData") } else { load("monitoringCounts-cache/boda.RData") } ## ----boda2-cache, echo = FALSE, results='hide'------------------------------------- if (computeALL) { covarNames <- c("l1.hum", "l2.hum", "l3.hum", "l4.hum", "newyears", "christmas", "O104period") control.boda2 <- modifyList(control.boda, list(X = campyDE[, covarNames], season = FALSE)) boda.covars <- boda(cam.sts, control = control.boda2) save(list = c("boda.covars", "covarNames", "control.boda2"), file = "monitoringCounts-cache/boda.covars.RData") } else { load("monitoringCounts-cache/boda.covars.RData") } ## ----bPlot-simple, echo = FALSE, fig.keep = 'none'--------------------------------- plot(boda.covars) ## ----boda3, echo = FALSE----------------------------------------------------------- control.far <- list(range=rangeBoda,b=4,w=5,alpha=0.025*2) far <- farrington(cam.sts,control=control.far) #Both farringtonFlexible and algo.bayes uses a one-sided interval just as boda. control.far2 <-modifyList(control.far,list(alpha=0.025)) farflex <- farringtonFlexible(cam.sts,control=control.far2) bayes <- suppressWarnings(bayes(cam.sts,control=control.far2)) ## ----boda4, echo = FALSE----------------------------------------------------------- # Small helper function to combine several equally long univariate sts objects combineSTS <- function(stsList) { epoch <- as.numeric(epoch(stsList[[1]])) observed <- NULL alarm <- NULL for (i in 1:length(stsList)) { observed <- cbind(observed,observed(stsList[[i]])) alarm <- cbind(alarm,alarms(stsList[[i]])) } colnames(observed) <- colnames(alarm) <- names(stsList) res <- sts(epoch=as.numeric(epoch), epochAsDate=TRUE, observed=observed, alarm=alarm) return(res) } ## ----alarmplot, fig.width=8, fig.height=4, out.width="\\linewidth", echo=FALSE----- # Make an artificial object containing two columns - one with the boda output # and one with the farrington output cam.surv <- combineSTS(list(boda.covars=boda.covars,boda=boda,bayes=bayes, farrington=far,farringtonFlexible=farflex)) par(mar=c(4,8,2.1,2),family="Times") plot(cam.surv,type = alarm ~ time,lvl=rep(1,ncol(cam.surv)), alarm.symbol=list(pch=17, col="red2", cex=1,lwd=3), cex.axis=1,xlab="Time (weeks)",cex.lab=1,xaxis.tickFreq=list("%m"=atChange,"%G"=atChange),xaxis.labelFreq=list("%G"=at2ndChange), xaxis.labelFormat="%G") ## ----glrnb, results='hide'--------------------------------------------------------- phase1 <- which(isoWeekYear(epoch(salmNewportGermany))$ISOYear < 2011) phase2 <- in2011 control <- list(range = phase2, c.ARL = 4, theta = log(2), ret = "cases", mu0 = list(S = 1, trend = TRUE, refit = FALSE)) salmGlrnb <- glrnb(salmNewportGermany, control = control) ## ----cat--------------------------------------------------------------------------- data("salmHospitalized") isoWeekYearData <- isoWeekYear(epoch(salmHospitalized)) dataBefore2013 <- which(isoWeekYearData$ISOYear < 2013) data2013 <- which(isoWeekYearData$ISOYear == 2013) dataEarly2014 <- which(isoWeekYearData$ISOYear == 2014 & isoWeekYearData$ISOWeek <= 4) phase1 <- dataBefore2013 phase2 <- c(data2013, dataEarly2014) salmHospitalized.df <- cbind(as.data.frame(salmHospitalized), weekNumber = isoWeekYearData$ISOWeek) names(salmHospitalized.df) <- c("y", "t", "state", "alarm", "upperbound", "n", "freq", "epochInPeriod", "weekNumber") ## ----catbis, results='hide'-------------------------------------------------------- vars <- c( "y", "n", "t", "epochInPeriod", "weekNumber") m.bbin <- gamlss(cbind(y, n-y) ~ 1 + t + sin(2 * pi * epochInPeriod) + cos(2 * pi * epochInPeriod) + sin(4 * pi * epochInPeriod) + cos(4 * pi * epochInPeriod) + I(weekNumber == 1) + I(weekNumber == 2), sigma.formula =~ 1, family = BB(sigma.link = "log"), data = salmHospitalized.df[phase1, vars]) ## ----cat2-------------------------------------------------------------------------- R <- 2 h <- 2 pi0 <- predict(m.bbin, newdata = salmHospitalized.df[phase2, vars], type = "response") pi1 <- plogis(qlogis(pi0) + log(R)) pi0m <- rbind(pi0, 1 - pi0) pi1m <- rbind(pi1, 1 - pi1) ## ----cat2bis----------------------------------------------------------------------- populationHosp <- unname(cbind( population(salmHospitalized), population(salmHospitalized))) observedHosp <- cbind( "Yes" = as.vector(observed(salmHospitalized)), "No" = as.vector(population(salmHospitalized) - observed(salmHospitalized))) salmHospitalized.multi <- sts( frequency = 52, start = c(2004, 1), epoch = epoch(salmHospitalized), observed = observedHosp, population = populationHosp, multinomialTS = TRUE) ## ----cat2ter----------------------------------------------------------------------- dBB.cusum <- function(y, mu, sigma, size, log = FALSE) { dBB(if (is.matrix(y)) y[1,] else y, if (is.matrix(y)) mu[1,] else mu, sigma = sigma, bd = size, log = log) } ## ----cat3-------------------------------------------------------------------------- controlCat <- list(range = phase2, h = 2, pi0 = pi0m, pi1 = pi1m, ret = "cases", dfun = dBB.cusum) salmHospitalizedCat <- categoricalCUSUM( salmHospitalized.multi, control = controlCat, sigma = exp(m.bbin$sigma.coefficients)) ## ---------------------------------------------------------------------------------- h.grid <- seq(1, 10, by = 0.5) ## ----cath-cache, echo = FALSE, results='hide'-------------------------------------- if (computeALL) { simone <- function(sts, h) { y <- rBB(length(phase2), mu = pi0m[1, , drop = FALSE], bd = population(sts)[phase2, ], sigma = exp(m.bbin$sigma.coefficients), fast = TRUE) observed(sts)[phase2, ] <- cbind(y, population(sts)[phase2, 1] - y) one.surv <- categoricalCUSUM( sts, control = modifyList(controlCat, list(h = h)), sigma = exp(m.bbin$sigma.coefficients)) return(any(alarms(one.surv)[, 1])) } set.seed(123) nSims <- 1000 pMC <- sapply(h.grid, function(h) { mean(replicate(nSims, simone(salmHospitalized.multi, h))) }) pMarkovChain <- sapply(h.grid, function(h) { TA <- LRCUSUM.runlength(mu = pi0m[1,,drop = FALSE], mu0 = pi0m[1,,drop = FALSE], mu1 = pi1m[1,,drop = FALSE], n = population(salmHospitalized.multi)[phase2, ], h = h, dfun = dBB.cusum, sigma = exp(m.bbin$sigma.coef)) return(tail(TA$cdf, n = 1)) }) save(pMC, file = "monitoringCounts-cache/pMC.RData") save(pMarkovChain, file = "monitoringCounts-cache/pMarkovChain.RData") } else { load("monitoringCounts-cache/pMC.RData") load("monitoringCounts-cache/pMarkovChain.RData") } ## ----catF-simple, echo = FALSE, fig.keep = 'none'---------------------------------- plot(salmHospitalizedCat[,1]) ## ----catARL-simple, echo = FALSE, fig.keep = 'none'-------------------------------- matplot(h.grid, cbind(pMC, pMarkovChain), type="l", lty=1:2, col=1) abline(h=0.1, lty=5) legend("center", c("Monte Carlo","Markov chain"), lty=1:2, bty="n") ## ----rotaPlot-simple, fig.keep='none'---------------------------------------------- data("rotaBB") plot(rotaBB) ## ---------------------------------------------------------------------------------- rotaBB.df <- as.data.frame(rotaBB) X <- with(rotaBB.df, cbind(intercept = 1, epoch, sin1 = sin(2 * pi * epochInPeriod), cos1 = cos(2 * pi * epochInPeriod))) phase1 <- epoch(rotaBB) < as.Date("2009-01-01") phase2 <- !phase1 library("MGLM") ## MGLMreg automatically takes the last class as ref so we reorder order <- c(2:5, 1); reorder <- c(5, 1:4) m0 <- MGLMreg(as.matrix(rotaBB.df[phase1, order]) ~ -1 + X[phase1, ], dist = "MN") ## ---------------------------------------------------------------------------------- m1 <- m0 m1@coefficients[1, ] <- m0@coefficients[1, ] + log(2) pi0 <- t(predict(m0, newdata = X[phase2, ])[, reorder]) pi1 <- t(predict(m1, newdata = X[phase2, ])[, reorder]) ## ----CATCUSUM---------------------------------------------------------------------- dfun <- function(y, size, mu, log = FALSE) { dmultinom(x = y, size = size, prob = mu, log = log) } h <- 2 # threshold for the CUSUM statistic control <- list(range = seq(nrow(rotaBB))[phase2], h = h, pi0 = pi0, pi1 = pi1, ret = "value", dfun = dfun) surv <- categoricalCUSUM(rotaBB,control=control) ## ----include = FALSE--------------------------------------------------------------- alarmDates <- epoch(surv)[which(alarms(surv)[,1])] format(alarmDates,"%b %Y") ## ----CATCUSUMMC,echo=FALSE,eval=FALSE---------------------------------------------- # #Number of MC samples # nSamples <- 1e4 # # #Do MC # simone.stop <- function(sts, control) { # phase2Times <- seq(nrow(sts))[phase2] # #Generate new phase2 data from the fitted in control model # y <- sapply(1:length(phase2Times), function(i) { # rmultinom(n=1, prob=pi0[,i],size=population(sts)[phase2Times[i],1]) # }) # observed(sts)[phase2Times,] <- t(y) # one.surv <- categoricalCUSUM(sts, control=control) # #compute P(S<=length(phase2)) # return(any(alarms(one.surv)[,1]>0)) # } # # set.seed(1233) # rlMN <- replicate(nSamples, simone.stop(rotaBB, control=control)) # mean(rlMN) # 0.5002 ## ---------------------------------------------------------------------------------- m0.dm <- MGLMreg(as.matrix(rotaBB.df[phase1, 1:5]) ~ -1 + X[phase1, ], dist = "DM") c(m0@AIC, m0.dm@AIC) ## ---------------------------------------------------------------------------------- ## Change intercept in the first class (for DM all 5 classes are modeled) delta <- 2 m1.dm <- m0.dm m1.dm@coefficients[1, ] <- m0.dm@coefficients[1, ] + c(-delta, rep(delta/4, 4)) alpha0 <- exp(X[phase2,] %*% m0.dm@coefficients) alpha1 <- exp(X[phase2,] %*% m1.dm@coefficients) dfun <- function(y, size, mu, log = FALSE) { dLog <- ddirmn(t(y), t(mu)) if (log) dLog else exp(dLog) } h <- 2 control <- list(range = seq(nrow(rotaBB))[phase2], h = h, pi0 = t(alpha0), pi1 = t(alpha1), ret = "value", dfun = dfun) surv.dm <- categoricalCUSUM(rotaBB, control = control) ## ----echo=FALSE,eval=FALSE--------------------------------------------------------- # matplot(alpha0/rowSums(alpha0),type="l",lwd=3,lty=1,ylim=c(0,1)) # matlines(alpha1/rowSums(alpha1),type="l",lwd=1,lty=2) ## ----ctPlot-simple, echo = FALSE, fig.keep = 'none'-------------------------------- par(mfrow = c(1,2)) surv@multinomialTS <- surv.dm@multinomialTS <- FALSE # trick plot method ... plot(surv[,1], col=c(NA,NA,4), ylab = expression(C[t]), ylim = c(0,33), xaxis.tickFreq=list("%Y"=atChange, "%m"=atChange), xaxis.labelFreq=list("%Y"=atMedian), xaxis.labelFormat="%Y") abline(h=h, lwd=2, col="darkgrey") plot(surv.dm[,1], col=c(NA,NA,4), ylab = expression(C[t]), ylim = c(0,33), xaxis.tickFreq=list("%Y"=atChange, "%m"=atChange), xaxis.labelFreq=list("%Y"=atMedian), xaxis.labelFormat="%Y") abline(h=h, lwd=2, col="darkgrey") ## ---------------------------------------------------------------------------------- today <- which(epoch(salmNewport) == as.Date("2013-12-23")) rangeAnalysis <- (today - 4):today in2013 <- which(isoWeekYear(epoch(salmNewport))$ISOYear == 2013) algoParameters <- list(range = rangeAnalysis, noPeriods = 10, populationBool = FALSE, b = 4, w = 3, weightsThreshold = 2.58, pastWeeksNotIncluded = 26, pThresholdTrend = 1, thresholdMethod = "nbPlugin", alpha = 0.05, limit54 = c(0, 50)) results <- farringtonFlexible(salmNewport[, c("Baden.Wuerttemberg", "North.Rhine.Westphalia")], control = algoParameters) ## ----results='asis'---------------------------------------------------------------- start <- isoWeekYear(epoch(salmNewport)[min(rangeAnalysis)]) end <- isoWeekYear(epoch(salmNewport)[max(rangeAnalysis)]) caption <- paste0("Results of the analysis of reported S. Newport ", "counts in two German federal states for the weeks ", start$ISOYear, "-W", start$ISOWeek, " to ", end$ISOYear, "-W", end$ISOWeek, ". Bold red counts indicate weeks with alarms.") toLatex(results, caption = caption, label = "tableResults", ubColumnLabel = "Threshold", include.rownames = FALSE, sanitize.text.function = identity)