## ----echo=FALSE-----------------------------------------------------
options(continue="  ", width=70, prompt="> ")
options(contrasts=c("contr.treatment", "contr.poly")) #ensure default
options(show.significant.stars = FALSE) #statistical intelligence
knitr::opts_chunk$set(
  collapse = TRUE, warning=FALSE, error=FALSE, tidy=FALSE,
  fig.asp=.75, fig.align="center",
  comment = "#>", highlight=TRUE, echo=TRUE, prompt=FALSE,
  fig.width=7, fig.height=5.5
)

library("survival")

## ----initial--------------------------------------------------------
data(aidssi, package="mstate")  # AIDS data set
data(ebmt3, package="mstate")   # transplant data set

## ----aids, fig.height=3, fig.width=3--------------------------------
oldpar <- par(mar=c(0,0,0,0))
states <- c("Event free", "AIDS", "SI")
smat <- matrix(0, 3, 3, dimnames=list(states, states))
smat[1,2] <- smat[1,3] <- 1
statefig(1:2, smat)
par(oldpar)

## ----fig-putter1, fig.cap='Correct Aalen-Johansen curves for the cumulative incidence of AIDS and SI'----
aidssi$event <- factor(aidssi$status, 0:2, c("censored", "AIDS", "SI"))

# The correct Aalen-Johansen curves
ajfit <- survfit(Surv(time, event) ~1, data = aidssi)
ajfit$transitions
plot(ajfit, xmax = 13, col = 1:2, lwd = 2,
     xlab = "Years from HIV infection", ylab = "Probability")
legend(8, .2, c("AIDS", "SI"), lty = 1, lwd = 2, col = 1:2, bty = 'n')

## ----fig-T2, fig.cap="This figure (T2) shows the estimated survival curve for AIDS and probability of SI appearance, based on the naive Kaplan-Meier estimate."----
# re-create figure T2
# KM curves that censor the other endpoint (a bad idea)
bad1 <- survfit(Surv(time, event=="AIDS") ~ 1, data = aidssi)
bad2 <- survfit(Surv(time, event=="SI") ~1, data = aidssi)

plot(bad1, conf.int = FALSE, xmax = 13, 
     xlab = "Years from HIV infection", ylab = "Probability")
lines(bad2, conf.int = FALSE, fun = 'event', xmax = 13)
text(c(8,8), c(.8, .22), c("AIDS", "SI"))

## ----pstack---------------------------------------------------------
pstack <- function(fit, top=FALSE, ...) {
    temp <- survfit0(fit)   # add the point at time 0
    if (is.matrix(temp$pstate))  # usual case
        temp$pstate <- t(apply(temp$pstate, 1, cumsum))
    else if (is.array(temp$pstate)) 
        temp$pstate <- aperm(apply(temp$pstate, 1:2, cumsum), c(2,3,1))
    # this works because we don't change any other aspect of the survfit
    #  object, but only modify the probabilities.
    if (top) plot(temp, noplot="", ...)
    else plot(temp, noplot=temp$states[length(temp$states)], ...)
}

## ----fig-T3, fig.cap="This figure (T3) shows estimates of probabilities of AIDS and SI appearance, based on the naive Kaplan-Meier (grey) and on cumulative incidence/Aalen-Johansen functions (black)"----
# re-create figure T3
pstack(ajfit[c(2,1,3)], col=1, xmax=13, lwd=2, 
     xlab="Years from HIV infection", ylab="Probability")
lines(bad1, conf.int=FALSE, col="lightgray")
lines(bad2, conf.int=FALSE, fun='event', col='lightgray')
text(c(4, 8,8), c(.5, .85, .15), c("Event free", "AIDS", "SI"), col=1)

## ----fig-T4, fig.cap="This figure (T4) shows cumulative incidence curves of AIDS and SI appearance.  The cumulative incidence functions are stacked; the distance between two curves represent the probability of the different events."----
pstack(ajfit[c(2,3,1)], xmax=13, lwd=2, col=1, ylim=c(0,1),
        xlab="Years from HIV infection", ylab="Probability")
text(c(11, 11, 11), c(.2, .55, .9), c("AIDS", "SI", "Event free"))

## ----fig-cuminc, fig.cap="Cumulative incidence fit using the cumlative hazard function"----
plot(ajfit, cumhaz=TRUE, xmax=13, col=1:2, lty=2,
     xlab="Years from HIV infection", ylab="Cumulative incidence")
lines(bad1, cumhaz=TRUE, conf.int=FALSE)
lines(bad2, cumhaz=TRUE, col=2, conf.int=FALSE)

## ----cfit-----------------------------------------------------------
cfit0 <- coxph(Surv(time, event) ~ ccr5, data = aidssi, id = patnr)
print(cfit0, digits=2)

cfit1 <- coxph(Surv(time, event=="AIDS") ~ ccr5, data = aidssi)
print(cfit1, digits=2)

cfit2 <- coxph(Surv(time, event=="SI") ~ ccr5, data = aidssi)
print(cfit2, digits=2)

## ----stack----------------------------------------------------------
temp <- subset(aidssi, select= c(patnr, time, ccr5))
temp1 <- data.frame(temp, status= 1*(aidssi$event=="AIDS"), cause="AIDS")
temp2 <- data.frame(temp, status= 1*(aidssi$event=="SI"),   cause="SI")
stack <- rbind(temp1, temp2)

cfit3 <- coxph(Surv(time, status) ~ ccr5 * strata(cause), data=stack)
print(cfit3, digits=2)

## ----stack2---------------------------------------------------------
stack$ccr5.1 <- (stack$ccr5=="WM") * (stack$cause == "AIDS")
stack$ccr5.2 <- (stack$ccr5=="WM") * (stack$cause == "SI")
cfit3b <- coxph(Surv(time, status) ~ ccr5.1 + ccr5.2 + strata(cause), data = stack)
cfit3b$coef

temp <- cbind(cfit0=cfit0$loglik, cfit3=cfit3$loglik, cfit3b=cfit3b$loglik)
rownames(temp) <- c("beta=0", "beta=final")
temp

## ----common---------------------------------------------------------
common1 <- coxph(Surv(time, status) ~ ccr5 + strata(cause), data=stack)
print(common1, digits=2)

common1b <- coxph(list( Surv(time, event) ~ 1, 
                        1:2 + 1:3 ~ ccr5/common ),
                  data=aidssi, id=patnr)

## ----common2--------------------------------------------------------
common2 <- coxph(Surv(time, status) ~ ccr5, data = stack)
all.equal(common2$coef, common1$coef)

## ----common3--------------------------------------------------------
# reprise common1 and common2, using the breslow option
test1 <- coxph(Surv(time, status) ~ ccr5 + strata(cause), stack,
               ties='breslow')
test2 <- coxph(Surv(time, status) ~ ccr5, stack, ties='breslow')
all.equal(test2$loglik + test2$nevent * log(2),  test1$loglik)
all.equal(test2$coef, test1$coef)

test3 <- coxph(Surv(time, status) ~ ccr5 + cause, stack, ties='breslow')
test3
all.equal(test3$coef[1], test1$coef)

## ----aidscurve------------------------------------------------------
# re-create figure T5 in a single panel
dummy <- data.frame(ccr5=c("WW", "WM"))
pred.aj <- survfit(cfit0, newdata=dummy)
dim(pred.aj)
pred.aj$states

## ----fig-T5, fig.cap="Figure T5 showing predicted curves for AIDS and SI stratified by subjects with CCR5 wild-type (WW) and mutant (WM)."----
oldpar <- par(mfrow=c(1,2))
plot(pred.aj[,,"AIDS"], lwd=2, col=c("black", "gray"), 
     xmax=13, ylim=c(0,.5),
     xlab="Years from HIV infection", ylab="Probability of AIDS")
text(c(9.5, 10), c(.3, .1), c("WW", "WM"))
plot(pred.aj[,,"SI"], lwd=2, col=c("black", "gray"), 
      xmax=13, ylim=c(0,.5),
    xlab="Years from HIV infection", ylab="Probability of SI")
text(c(8.5, 9), c(.33, .25), c("WW", "WM"))
par(oldpar)

## ----finegray-------------------------------------------------------
fdata1 <- finegray(Surv(time, event) ~ ., data = aidssi, etype = 'AIDS')
fgfit1 <- coxph(Surv(fgstart, fgstop, fgstatus) ~ ccr5, data = fdata1,
                weight = fgwt)
fgfit1

fdata2 <- finegray(Surv(time, event) ~., aidssi, etype="SI")
fgfit2 <- coxph(Surv(fgstart, fgstop, fgstatus) ~ ccr5, fdata2,
                weight = fgwt)
fgfit2

## ----fig-T8, fig.cap="Reproduction of Figure T8 showing cumulative incidence functions for AIDS (left) and SI appearance (right), for CCR5 wild-type (WW) and mutant (WM), based on the Fine and Gray model."----
# re-create figure T8: Fine-Gray curves
fgsurv1<-survfit(fgfit1, newdata=dummy)
fgsurv2<-survfit(fgfit2, newdata=dummy)

oldpar <- par(mfrow=c(1,2), mar=c(4.1, 3.1, 3.1, 1)) #leave room for title
plot(fgsurv1, col=1:2, lty=c(1,1,2,2), lwd=2, xmax=13,
     ylim=c(0, .5),fun='event',
     xlab="Years from HIV infection", ylab="Probability")
title("AIDS")
plot(fgsurv2, col=1:2, lty=c(1,1,2,2), lwd=2, xmax=13,
     ylim=c(0, .5), fun='event',
     xlab="Years from HIV infection", ylab="Probability")
title("SI appearance")    
par(oldpar)

## ----fig-T9, fig.cap="Reproduction of Figure T9 showing non-parametric cumulative incidence functions for AIDS (left) and SI appearance (right), for CCR5 wild-type (WW) and mutant (WM)"----
# re-create figure T9: curves by CCR type
aj2 <- survfit(Surv(time, event) ~ ccr5, data = aidssi)
oldpar <- par(mfrow=c(1,2))
plot(aj2[,"AIDS"], xmax=13, col=1:2, lwd=2, ylim=c(0, .5),
     xlab="Years from HIV infection", ylab="Probability of AIDS")
text(c(10, 10), c(.35, .07), c("WW", "WM"))

plot(aj2[,"SI"], xmax=13, col=1:2, lwd=2, ylim=c(0, .5), 
     xlab="Years from HIV infection", ylab="Probability of SI")
text(c(8, 8), c(.34, .18), c("WW", "WM"))
par(oldpar)

## ----fig-T13, fig.height=3, fig.width=4, fig.cap="Reproduce Figure T13"----
oldpar <- par(mar=c(0,0,0,0))
states <- c("Transplant", "Platelet recovery", 
            "Relapse or death")
tmat <- matrix(0, 3,3, dimnames=list(states, states))
tmat[1,2] <- tmat[1,3] <- tmat[2,3] <- 1 # arrows
statefig(cbind((1:3)/4, c(1,3,1)/4), tmat)
text(c(.3, .5, .7), c(.5, .3, .5), c(1169, 458, 383))
par(oldpar)

## ----tableT2--------------------------------------------------------
table(ebmt3$dissub)
table(ebmt3$drmatch)
table(ebmt3$tcd)
table(ebmt3$age)

## ----data1----------------------------------------------------------
temp <- subset(ebmt3, select = -c(prtime, prstat, rfstime, rfsstat))
edata <- tmerge(temp, ebmt3, id, 
                rstat = event(rfstime, rfsstat),
                pstat = event(prtime, prstat),
                priorpr = tdc(prtime))
print(edata[15:20,-(3:5)])

# Check that no one had recovery and death on the same day
with(edata, table(rstat, pstat))

# Create the factor outcome
edata$event <- with(edata, factor(pstat + 2*rstat, 0:2,
                           labels = c("censor", "PR", "RelDeath")))
levels(edata$drmatch) <- c("Match", "Mismatch")
survcheck(Surv(tstart, tstop, event) ~1, edata, id=id)

## ----fig-data1b-----------------------------------------------------
surv1 <- survfit(Surv(tstart, tstop, event) ~ 1, edata, id=id)
surv1$transitions   # matches the Frequencies on page C5
plot(surv1, col=1:2, xscale=365.25, lwd=2, 
     xlab="Years since transplant", ylab="Fraction in state")
legend(1000, .2, c("Platelet recovery", "Death or Relapse"), 
       lty=1, col=1:2, lwd=2, bty='n')

## ----efit1----------------------------------------------------------
efit1 <- coxph(Surv(tstart, tstop, event) ~ dissub + age + drmatch + tcd,
               id=id, data=edata, ties='breslow')
print(efit1, digits=2)

## ----fig-T14, fig.cap="Reproduce Figure T14"------------------------
# a data set containing the "reference" categories 
rdata <- data.frame(dissub="AML", age="<=20", drmatch="Match", tcd="No TCD")
esurv1 <- survfit(efit1, newdata=rdata)
plot(esurv1, cumhaz=TRUE, lty=1:3, xscale=365.25, xmax=7*365.35,
     xlab="Years since transplant", ylab="Cumulative hazard")
legend(365, .8, c("Transplant to platelet recovery (1:2)",
                "Transplant to death (1:3)",
                "Platelet recovery to death (2:3)"), lty=1:3, bty='n')

## ----efit2----------------------------------------------------------
efit2 <- coxph(list(Surv(tstart, tstop, event) ~ dissub + age + drmatch + tcd,
                    0:state("RelDeath") ~ 1 / shared),
                    id=id, data=edata, ties='breslow')
print(coef(efit2, type='matrix'), digits=2)

## ----efit3----------------------------------------------------------
prtime <- ifelse(edata$priorpr==1, edata$tstart, 0)/365.25
efit3 <-  coxph(list(Surv(tstart, tstop, event) ~ dissub + age + drmatch + tcd,
                    0:state("RelDeath") ~ 1/ shared,
                    "PR":"RelDeath" ~ prtime), 
                    id=id, data=edata, ties='breslow')
print(coef(efit3, type='matrix'), digits=2)

## -------------------------------------------------------------------
edummy <- expand.grid(age="<=20", dissub="AML", drmatch="Mismatch",
                      tcd=c("No TCD", "TCD"), priorpr=1)
ecurve2 <- survfit(efit2, newdata= edummy)
plot(ecurve2, col=c(1,1,2,2,3,3), lty=1:2, lwd=2, xscale=365.25,
     noplot=NULL, 
     xlab="Years since transplant", ylab="Predicted probabilities")
legend(700, .9, c("Currently alive in remission, no PR", "Currently in PR",
               "Relapse or death"), col=1:3, lwd=2, bty='n')
text(700, .95, "Solid= No TCD, dashed = TCD", adj=0)

## ----fourstate, fig.height=3, fig.width=5---------------------------
oldpar <- par(mar=c(0,0,0,0))
state4 <- c("Transplant", "Platelet recovery", "Relapse or death (1)",
            "Relapse or death (2)")
cmat <- matrix(0, 4, 4, dimnames = list(state4, state4))
cmat[1,2] <- cmat[1,3] <- cmat[2,4] <- 1
statefig(c(1,2,1), cmat)
par(oldpar)

## ----fourstate2-----------------------------------------------------
etemp <- as.numeric(edata$event)
etemp <- ifelse(etemp==3 & edata$priorpr==1, 4, etemp)
edata$event4 <- factor(etemp, 1:4, c("censor", "PR", "RelDeath1", 
                                     "RelDeath2")) 
survcheck(Surv(tstart, tstop, event4) ~ 1, edata, id=id)

efit4 <- coxph(list(Surv(tstart, tstop, event4) ~ dissub + age + drmatch + tcd,
                    1:3 + 2:4 ~ 1/ shared),
                    id=id, data=edata, ties='breslow')
efit4$cmap
# some of the coefficient names change, but not the values
all.equal(coef(efit4), coef(efit2), check.attributes= FALSE)

## ----fig-T15, fig.cap="Figure T15"----------------------------------
edummy <- expand.grid(dissub="AML", age= "<=20", drmatch="Match",
                      tcd=c("No TCD", "TCD"), priorpr=1)
ecurve4 <- survfit(efit4, newdata=edummy)

oldpar <- par(mfrow=c(1,2), mar=c(4.1, 3.1, 3.1, .1))

pstack(ecurve4[,1,c(2,4,3,1)],
       xscale=365.25, ylim=c(0,1),
       xlab="Years since transplant", ylab="Predicted probabilities")
text(rep(4*365, 4), c(.35, .54, .66, .9), cex=.7, 
     c("Alive in remission, PR", "Relapse or death after PR",
       "Relapse or death without PR", "Alive in remission, no PR"))
title("No TCD")

pstack(ecurve4[,2,c(2,4,3,1)],
       xscale=365.25, ylim=c(0,1),
       xlab="Years since transplant", ylab="Predicted probabilities")
text(rep(4*365, 4), c(.35, .65, .8, .95), cex=.7, 
     c("Alive in remission, PR", "Relapse or death after PR",
       "Relapse or death without PR", "Alive in remission, no PR"))
title("TCD")
par(oldpar)