\documentclass[a4paper,12pt]{article} \usepackage[utf8]{inputenc} \usepackage{amsmath} % \VignetteIndexEntry{Parametric duration models} \newcommand{\btheta}{{\ensuremath{\boldsymbol{\theta}}}} \newcommand{\bbeta}{{\ensuremath{\boldsymbol{\beta}}}} \newcommand{\bz}{\ensuremath{\mathbf{z}}} \newcommand{\bZ}{\ensuremath{\mathbf{Z}}} \newcommand{\bx}{\ensuremath{\mathbf{x}}} \newcommand{\bs}{\ensuremath{\mathbf{s}}} \newcommand{\bt}{\ensuremath{\mathbf{t}}} \newcommand{\bu}{\ensuremath{\mathbf{u}}} \newcommand{\bd}{\ensuremath{\mathbf{d}}} \newcommand{\bis}{\ensuremath{\mathbf{{\prime\prime}}}} \newcommand{\pe}[1]{{\ensuremath{\frac{\partial}{\partial #1}}}} \newcommand{\pt}[2]{{\ensuremath{\frac{\partial^2}{\partial #1\partial #2}}}} \newcommand{\ezb}{\ensuremath{{e^{\bz_i \bbeta}}}} \title{Parametric proportional hazards and accelerated failure time models} \author{G{\"o}ran Brostr{\"o}m} \date{February 16, 2009} \begin{document} \maketitle \begin{abstract} A unified implementation of parametric proportional hazards (PH) and accelerated failure time (AFT) models for right-censored or interval-censored and left-truncated data is described. The description here is vaĺid for time-constant covariates, but the necessary modifications for handling time-varying covariates are implemented in \emph{\tt eha}. Note that only piecewise constant time variation is handled. \end{abstract} \section{Introduction} There is a need for software for analyzing parametric proportional hazards (PH) and accelerated failure time (AFT) data, that are right or interval censored and left truncated. \section{The proportional hazards model}\label{sec:ph} We define proportional hazards models in terms of an expansion of a given survivor function $S_0$, \begin{equation}\label{eq:sur} s_\btheta(t; \bz) = \{S_0(g(t, \btheta))\}^{\exp(\bz\bbeta)}, \end{equation} where $\btheta$ is a parameter vector used in modeling the baseline distribution, $\bbeta$ is the vector of regression parameters, and $g$ is a positive function, which helps defining a parametric family of baseline survivor functions through \begin{equation}\label{eq:surv} S(t; \btheta) = S_0\bigl(g(t, \btheta)\bigr), \quad t > 0, \quad \btheta \in \boldsymbol{\varTheta}. \end{equation} With $f_0$ and $h_0$ defined as the density and hazard functions corresponding to $S_0$, respectively, the density function corresponding to $S$ is \begin{equation*} \begin{split} f(t; \btheta) &= -\frac{\partial}{\partial t} S(t, \btheta)\\ &= -\frac{\partial}{\partial t} S_0(g(t, \btheta)) \\ &= g_t(t, \btheta) f_0(g(t, \btheta)), \end{split} \end{equation*} where \begin{equation*} g_t(t, \btheta) = \frac{\partial}{\partial t} g(t, \btheta). \end{equation*} Correspondingly, the hazard function is \begin{equation}\label{eq:haz} \begin{split} h(t; \btheta) &= \frac{f(t; \btheta)}{S(t; \btheta)} \\ &= g_t(t, \btheta) h_0(g(t, \btheta)). \end{split} \end{equation} So, the proportional hazards model is \begin{equation}\label{eq:prop} \begin{split} \lambda_{\btheta}(t; \bz) &= h(t; \btheta) \exp(\bz\bbeta)\\ &= g_t(t, \btheta) h_0(g(t, \btheta)) \exp(\bz\bbeta), \end{split} \end{equation} corresponding to \eqref{eq:sur}. \subsection{Data and the likelihood function} Given left truncated and right or interval censored data $(s_i, t_i, u_i, d_i, \bz_i)$, $i = 1, \ldots, n$ and the model \eqref{eq:prop}, the likelihood function becomes \begin{equation}\label{eq:lik} \begin{split} L\bigl((\btheta, \bbeta); (\bs, \bt, \bu, \bd), \bZ\bigr) &= \prod_{i=1}^n \bigl\{\bigl(h(t_i; \btheta)\exp(\bz_i\bbeta)\bigr)^{I_{\{d_i=1\}}} \\ & \quad \times \bigl(S(t_i; \btheta)^{\exp(\bz_i\bbeta)} \bigr)^{I_{\{d_i \ne 2\}}}\\ & \quad \times \bigl(S(t_i; \btheta)^{\exp(\bz_i\bbeta)} - S(u_i; \btheta)^{\exp(\bz_i\bbeta)} \bigr)^{I_{\{d_i = 2\}}} \\ & \quad \times S(s_i;\btheta)^{-\exp(\bz_i\bbeta)} \bigr\} \end{split} \end{equation} %\end{document} Here, for $i = 1, \ldots, n$, $s_i < t_i \le u_i$ are the left truncation and exit intervals, respectively, $d_i = 0$ means that $t_i = u_i$ and right censoring at $u_i$, $d_i = 1$ means that $t_i = u_i$ and an event at $u_i$, and $d_i = 2$ means that $t_i < u_i$ and an event occurs in the interval $(t_i, u_i)$ (interval censoring) and $\bz_i = (z_{i1}, \ldots, z_{ip})$ is a vector of explanatory variables with corresponding parameter vector $\bbeta = (\beta_1, \ldots, \beta_p)$, $i = 1, \ldots, n$. %And %\begin{equation*} %P(t_i, u_i, \bz_i; \btheta, \bbeta) = S(t_i; \btheta)^{\exp(\bz_i\bbeta)} %- S(u_i; \btheta)^{\exp(\bz_i\bbeta)} %\end{equation*} From \eqref{eq:lik} we now get the log likelihood and the score vector in a straightforward manner. \begin{equation} \begin{split} \ell\bigl((\btheta, \bbeta); (\bs, \bt, \bu, \bd), \bZ\bigr) &= \sum_{i: d_i = 1} \bigl\{\log h(t_i; \btheta) + \bz_i\bbeta\bigr\} \\ &\quad + \sum_{i:d_i \ne 2} e^{\bz_i\bbeta} \log S(u_i; \btheta) \\ &\quad + \sum_{i:d_i = 2} \log\bigl\{S(t_i; \btheta)^{\ezb} - S(u_i; \btheta)^{e^{\bz_i\bbeta}} \bigr\} \\ & \quad - \sum_{i=1}^n \ezb \log S(s_i; \btheta) \end{split} \end{equation} and (in the following we drop the long argument list to $\ell$), for the regression parameters \bbeta, \begin{equation} \begin{split} \frac{\partial}{\partial \beta_j}\ell &= \sum_{i:d_i=1} z_{ij} \\ &\quad + \sum_{i:d_i \ne 2} z_{ij}\ezb \log S(t_i; \btheta) \\ &\quad + \sum_{i:d_i = 2}z_{ij}\ezb \frac{S(t_i;\btheta)^\ezb\log S(t_i;\btheta) - S(u_i;\btheta)^\ezb\log S(u_i;\btheta)} {S(t_i; \btheta)^\ezb - S(u_i; \btheta)^\ezb} \\ & \quad - \sum_{i=1}^n z_{ij}\ezb\log S(s_i; \btheta), \quad j = 1, \ldots, p, \end{split} \end{equation} and for the ``baseline'' parameters \btheta, in vector form, \begin{equation} \begin{split} \frac{\partial}{\partial \btheta}\ell &= \sum_{i:d_i = 1} \frac{h_\btheta(t_i, \btheta)}{h(t_i, \btheta)} \\ &\quad + \sum_{i:d_i \ne 2}e^{\bz_i\bbeta} \frac{S_\btheta(t_i; \btheta)}{S(t_i; \btheta)} \\ &\quad + \sum_{i:d_i = 2} \ezb \frac{S(t_i;\btheta)^{\ezb - 1} S_\btheta(t_i;\btheta) - S(u_i;\btheta)^{\ezb - 1} S_\btheta(u_i;\btheta)} {S(t_i; \btheta)^\ezb - S(u_i; \btheta)^\ezb} \\ & \quad - \sum_{i=1}^n\ezb\frac{S_\btheta(s_i; \btheta)}{S(s_i; \btheta)}. \end{split} \end{equation} From \eqref{eq:haz}, \begin{equation}\label{eq:hazprim} \begin{split} h_\btheta(t, \btheta) &= \pe{\btheta}h(t, \btheta) \\ &= g_{t\btheta}(t, \btheta)h_0(g(t, \btheta)) + g_t(t, \btheta) g_\theta(t, \btheta)h^\prime_0(g(t, \btheta)), \end{split} \end{equation} and, from \eqref{eq:surv}, \begin{equation}\label{eq:survprim} \begin{split} S_\btheta(t; \btheta) &= \pe{\btheta}S(t; \btheta) = \pe{\btheta}S_0\bigl(g(t, \btheta)\bigr)\\ &= -g_\btheta(t, \btheta) f_0\bigl(g(t, \btheta)\bigr). \end{split} \end{equation} For estimating standard errors, the observed information (the negative of the hessian) is useful. However, instead of the error-prone and tedious work of calculating analytic second-order derivatives, we will rely on approximations by numerical differentiation. \section{The shape--scale families} From \eqref{eq:sur} we get a \emph{shape--scale} family of distributions by choosing $\btheta = (p, \lambda)$ and \begin{equation*} g(t, (p, \lambda)) = \biggl(\frac{t}{\lambda}\biggr)^p, \quad t \ge 0; \quad p, \lambda > 0. \end{equation*} However, for reasons of efficient numerical optimization and normality of parameter estimates, we use the parametrisation $p = \exp(\gamma)$ and $\lambda = \exp(\alpha)$, thus redefining $g$ to \begin{equation} \label{eq:gshsc} g(t, (\gamma, \alpha)) = \biggl(\frac{t}{\exp(\alpha)}\biggr)^{\exp(\gamma)}, \quad t \ge 0; \quad -\infty < \gamma, \alpha < \infty. \end{equation} For the calculation of the score and hessian of the log likelihood function, we need some partial derivatives of $g$. They are found in an appendix. \subsection{The Weibull family of distributions} The Weibull family of distributions is obtained by \begin{equation*} S_0(t) = \exp(-t), \quad t \ge 0, \end{equation*} leading to \begin{equation*} f_0(t) = \exp(-t), \quad t \ge 0, \end{equation*} and \begin{equation*} h_0(t) = 1, \quad t \ge 0. \end{equation*} We need some first and second order derivatives of $f$ and $h$, and they are particularly simple in this case, for $h$ they are both zero, and for $f$ we get \begin{equation*} f_0^\prime(t) = -\exp(-t), \quad t \ge 0. \end{equation*} \subsection{The EV family of distributions} The EV (Extreme Value) family of distributions is obtained by setting \begin{equation*} h_0(t) = \exp(t), \quad t \ge 0, \end{equation*} leading to \begin{equation*} S_0(t) = \exp\{-(\exp(t) - 1)\}, \quad t \ge 0, \end{equation*} The rest of the necessary functions are easily derived from this. \subsection{The Gompertz family of distributions} The Gompertz family of distributions is given by \begin{equation*} h(t) = \tau\exp(t/\lambda), \quad t \ge 0; \quad \tau, \lambda > 0. \end{equation*} This family is not directly possible to generate from the described shape-scale models, so it is treated separately by direct maximum likelihood. \subsection{Other families of distributions} Included in the \emph{eha} package are the lognormal and the loglogistic distributions as well. \section{The accelerated failure time model} In the description of this family of models, we generate a scape-scale family of distributions as defined by the equations \eqref{eq:surv} and \eqref{eq:gshsc}. We get \begin{equation}\label{eq:survaft} \begin{split} S(t; (\gamma, \alpha)) &= S_0\bigl(g(t, (\gamma, \alpha))\bigr) \\ &= S_0\biggl(\biggl\{\frac{t}{\exp(\alpha)}\biggr\}^{\exp(\gamma)}\biggr) , \quad t > 0, \quad -\infty < \gamma, \alpha < \infty. \end{split} \end{equation} Given a vector $\bz = (z_1, \ldots, z_p)$ of explanatory variables and a vector of corresponding regression coefficients $\bbeta = (\beta_1, \ldots, \beta_p)$, the AFT regression model is defined by \begin{equation}\label{eq:aftreg} \begin{split} S(t; (\gamma, \alpha, \bbeta)) &= S_0\bigl(g(t\exp(\bz\bbeta), (\gamma, \alpha))\bigr) \\ &= S_0\biggl(\biggl\{\frac{t\exp(\bz\bbeta)}{\exp(\alpha)}\biggr\}^{\exp(\gamma)}\biggr) \\ &= S_0\biggl(\biggl\{\frac{t}{\exp(\alpha - \bz\bbeta)}\biggr\}^{\exp(\gamma)}\biggr) \\ &= S_0\bigl(g(t, (\gamma, \alpha - \bz\bbeta))\bigr), \quad t > 0. \end{split} \end{equation} So, by defining $\btheta = (\gamma, \alpha - \bz\bbeta)$, we are back in the framework of Section \ref{sec:ph}. We get \begin{equation*} f(t; \btheta) = g_t(t, \btheta) f_0(g(t, \btheta)) \end{equation*} and \begin{equation}\label{eq:afthaz} h(t; \btheta) = g_t(t, \btheta) h_0(g(t, \btheta)), \end{equation} defining the AFT model generated by the survivor function $S_0$ and corresponding density $f_0$ and hazard $h_0$. \subsection{Data and the likelihood function} Given left truncated and right or interval censored data $(s_i, t_i, u_i, d_i, \bz_i)$, $i = 1, \ldots, n$ and the model \eqref{eq:afthaz}, the likelihood function becomes \begin{equation}\label{eq:aftlik} \begin{split} L\bigl((\gamma, \alpha, \bbeta); (\bs, \bt, \bd), \bZ\bigr) &= \prod_{i=1}^n \bigl\{h(t_i; \btheta_i)^{I_{\{d_i=1\}}} \\ & \quad \times S(t_i; \btheta_i)^{I_{\{i\ne 2\}}} \\ & \quad \times \bigl(S(t_i; \btheta_i) - S(u_i; \btheta_i)\bigr)^{I_{\{d_i=2\}}} \\ & \quad \times S(s_i; \btheta_i)^{-1}\bigr\} \end{split} \end{equation} Here, for $i = 1, \ldots, n$, $s_i < t_i \le u_i$ are the left truncation and exit intervals, respectively, $d_i = 0$ means that $t_i = u_i$ and right censoring at $t_i$, $d_i = 1$ means that $t_i = u_i$ and an event at $t_i$, and $d_i = 2$ means that $t_i < u_i$ and an event uccurs in the interval $(t_i, u_i)$ (interval censoring), and $\bz_i = (z_{i1}, \ldots, z_{ip})$ is a vector of explanatory variables with corresponding parameter vector $\bbeta = (\beta_1, \ldots, \beta_p)$, $i = 1, \ldots, n$. From \eqref{eq:aftlik} we now get the log likelihood and the score vector in a straightforward manner. \begin{equation*} \begin{split} \ell\bigl((\gamma, \alpha, \bbeta); (\bs, \bt, \bu, \bd), \bZ\bigr) &= \sum_{i:d_i=1} \log h(t_i; \btheta_i) \\ &\quad + \sum_{i:d_i \ne 2} \log S(t_i; \btheta_i) \\ & \quad + \sum_{i:d_i = 2} \log\bigl(S(t_i; \btheta_i\bigr) - S(u_i; \btheta_i\bigr)\bigr) \\ &\quad - \sum_{i=1}^n \log S(s_i; \btheta_i) \end{split} \end{equation*} and (in the following we drop the long argument list to $\ell$), for the regression parameters \bbeta, \begin{equation*} \begin{split} \frac{\partial}{\partial \beta_j}\ell &= \sum_{d_i=1} \frac{h_j(t_i, \btheta_i)}{h(t_i, \btheta_i)} + \sum_{d_i \ne 2} \frac{S_j(t_i; \btheta_i)}{S(t_i; \btheta_i)} \\ & \quad + \sum_{d_i = 2} \frac{S_j(t_i; \btheta_i) - S_j(u_i; \btheta_i)}{S(t_i; \btheta_i) - S(u_i; \btheta_i)} - \sum_{i = 1}^n \frac{S_j(s_i; \btheta_i)}{S(s_i; \btheta_i)} \\ &= -\sum_{d_i=1} z_{ij} \frac{h_\alpha(t_i, \btheta_i)}{h(t_i, \btheta_i)} - \sum_{d_i \ne 2} z_{ij}\frac{S_\alpha(t_i; \btheta_i)}{S(t_i; \btheta_i)} \\ & \quad - \sum_{d_i = 2} z_{ij}\frac{S_\alpha(t_i; \btheta_i) - S_\alpha(u_i; \btheta_i)}{S(t_i; \btheta_i) - S(u_i; \btheta_i)} + \sum_{i = 1}^n z_{ij}\frac{S_\alpha(s_i; \btheta_i)}{S(s_i; \btheta_i)} \end{split} \end{equation*} and for the ``baseline'' parameters $\gamma$ and $\alpha$, \begin{equation*} \begin{split} \frac{\partial}{\partial \gamma}\ell &= \sum_{i:d_i=1} \frac{h_\gamma(t_i, \btheta_i)}{h(t_i, \btheta_i)} + \sum_{i:d_i \ne 2}\frac{S_\gamma(t_i; \btheta_i)}{S(t_i; \btheta_i)} \\ & \quad + \sum_{i:d_i = 2}\frac{S_\gamma(t_i; \btheta_i) - S_\gamma(u_i; \btheta_i)} {S(t_i; \btheta_i) - S(u_i; \btheta_i)} - \sum_{i=1}^n \frac{S_\gamma(s_i; \btheta_i)}{S(s_i; \btheta_i)}, \end{split} \end{equation*} and \begin{equation*} \begin{split} \frac{\partial}{\partial \alpha}\ell &= \sum_{i:d_i=1} \frac{h_\alpha(t_i, \btheta_i)}{h(t_i, \btheta_i)} + \sum_{i:d_i \ne 2}\frac{S_\alpha(t_i; \btheta_i)}{S(t_i; \btheta_i)} \\ & \quad + \sum_{i:d_i = 2}\frac{S_\alpha(t_i; \btheta_i) - S_\alpha(u_i; \btheta_i)} {S(t_i; \btheta_i) - S(u_i; \btheta_i)} - \sum_{i=1}^n \frac{S_\alpha(s_i; \btheta_i)}{S(s_i; \btheta_i)}. \end{split} \end{equation*} Here, from \eqref{eq:haz}, \begin{equation*}\label{eq:gammahazprim} \begin{split} h_\gamma(t, \btheta_i) &= \pe{\gamma}h(t, \btheta_i) \\ &= g_{t\gamma}(t, \btheta_i)h_0(g(t, \btheta_i)) + g_t(t, \btheta_i) g_\gamma(t, \btheta_i)h^\prime_0(g(t, \btheta_i)), \end{split} \end{equation*} \begin{equation*}\label{eq:alphahazprim} \begin{split} h_\alpha(t, \btheta_i) &= \pe{\alpha}h(t, \btheta_i) \\ &= g_{t\alpha}(t, \btheta_i)h_0(g(t, \btheta_i)) + g_t(t, \btheta_i) g_\alpha(t, \btheta_i)h^\prime_0(g(t, \btheta_i)), \end{split} \end{equation*} and \begin{equation*}\label{eq:betahazprim} \begin{split} h_j(t, \btheta_i) &= \pe{\beta_j}h(t, \btheta_i) = \pe{\alpha}h(t, \btheta_i)\pe{\beta_j}(\alpha - \bz_i\bbeta) \\ &= -z_{ij}h_\alpha(t, \btheta_i), \quad j = 1, \ldots, p. \end{split} \end{equation*} Similarly, from \eqref{eq:surv} we get \begin{equation*}\label{eq:gammasurvprim} \begin{split} S_\gamma(t; \btheta_i) &= \pe{\gamma}S(t; \btheta_i) = \pe{\gamma}S_0\bigl(g(t, \btheta_i)\bigr)\\ &= -g_\gamma(t, \btheta_i) f_0\bigl(g(t, \btheta_i)\bigr), \end{split} \end{equation*} \begin{equation*}\label{eq:alphasurvprim} \begin{split} S_\alpha(t; \btheta_i) &= \pe{\alpha}S(t; \btheta_i) = \pe{\alpha}S_0\bigl(g(t, \btheta_i)\bigr)\\ &= -g_\alpha(t, \btheta_i) f_0\bigl(g(t, \btheta_i)\bigr). \end{split} \end{equation*} and \begin{equation*}\label{eq:betasurvprim} \begin{split} S_j(t; \btheta_i) &= \pe{\beta_j}S(t; \btheta_i) = \pe{\alpha}S_0\bigl(g(t, \btheta_i)\bigr)\pe{\beta_j}(\alpha - \bz_i\bbeta) \\ &= -z_{ij} S_\alpha(t, \btheta_i), \quad j = 1, \ldots, p. \end{split} \end{equation*} For estimating standard errors, the observed information (the negative of the hessian) is useful, so \begin{multline*} -\pt{\beta_j}{\beta_m}\ell = -\sum_{i:d_i=1} z_{ij}z_{im}\biggl\{\frac{h_{\alpha\alpha}(t_i, \btheta_i)}{h(t_i, \btheta_i)} - \biggl(\frac{h_\alpha(t_i, \btheta_i)}{h(t_i, \btheta_i)}\biggr)^2\biggr\} \\ - \sum_{i:i \ne 2} z_{ij}z_{im} \biggl\{\frac{S_{\alpha\alpha}(t_i, \btheta_i)}{S(t_i, \btheta_i)} -\biggl(\frac{S_\alpha(t_i, \btheta_i)}{S(t_i, \btheta_i)}\biggr)^2\biggr\} \\ - \sum_{i:i = 2} z_{ij}z_{im} \biggl\{\frac{S_{\alpha\alpha}(t_i, \btheta_i) - S_{\alpha\alpha}(u_i, \btheta_i)}{S(t_i, \btheta_i) - S(u_i, \btheta_i)} -\biggl(\frac{S_\alpha(t_i, \btheta_i) - S_{\alpha}(u_i, \btheta_i)}{S(t_i, \btheta_i) - S(u_i, \btheta_i)}\biggr)^2\biggr\} \\ + \sum_{i=1}^nz_{ij}z_{im}\biggl\{\frac{S_{\alpha\alpha}(s_i, \btheta_i)}{S(s_i, \btheta_i)} -\biggl(\frac{S_\alpha(s_i, \btheta_i)}{S(s_i, \btheta_i)}\biggr)^2\biggr\}, \quad j, m = 1, \ldots, p, \end{multline*} and \begin{multline*} -\pt{\beta_j}{\tau}\ell = \sum_{i:d_i=1} z_{ij}\biggl\{\frac{h_{\alpha\tau}(t_i, \btheta_i)}{h(t_i, \btheta_i)} - \frac{h_\alpha(t_i, \btheta_i)h_{\tau}(t_i, \btheta_i)}{h^2(t_i, \btheta_i)}\biggr\} \\ + \sum_{i:i \ne 2} z_{ij} \biggl\{\frac{S_{\alpha\tau}(t_i, \btheta_i)}{S(t_i, \btheta_i)} -\frac{S_\alpha(t_i, \btheta_i) S_{\tau}(t_i, \btheta_i)}{S^2(t_i, \btheta_i)}\biggr\} \\ + \sum_{i:i = 2} z_{ij} \biggl\{\frac{S_{\alpha\tau}(t_i, \btheta_i) - S_{\alpha\tau}(u_i, \btheta_i)}{S(t_i, \btheta_i) - S(u_i, \btheta_i)} \\ -\frac{\bigl(S_\alpha(t_i, \btheta_i) - S_{\alpha}(u_i, \btheta_i)\bigr)\bigl(S_\tau(t_i, \btheta_i) - S_{\tau}(u_i, \btheta_i)\bigr)} {\bigl(S(t_i, \btheta_i) - S(u_i, \btheta_i)\bigr)^2}\biggr\} \\ - \sum_{i=1}^nz_{ij}\biggl\{\frac{S_{\alpha\tau}(s_i, \btheta_i)}{S(s_i, \btheta_i)} -\frac{S_\alpha(s_i, \btheta_i) S_\tau(s_i, \btheta_i)} {S^2(s_i, \btheta_i)}\biggr\} \\ \quad j = 1, \ldots, p; \; \tau = \gamma, \alpha, \end{multline*} and finally \begin{multline*} -\pt{\tau}{\tau^\prime}\ell = -\sum_{i:d_i=1} \biggl\{\frac{h_{\tau^\prime\tau}(t_i, \btheta_i)}{h(t_i, \btheta_i)} - \frac{h_{\tau^\prime}(t_i, \btheta_i)h_{\tau}(t_i, \btheta_i)}{h^2(t_i, \btheta_i)}\biggr\} \\ - \sum_{i:i \ne 2} \biggl\{\frac{S_{\tau^\prime\tau}(t_i, \btheta_i)}{S(t_i, \btheta_i)} -\frac{S_{\tau^\prime}(t_i, \btheta_i) S_{\tau}(t_i, \btheta_i)}{S^2(t_i, \btheta_i)}\biggr\} \\ - \sum_{i:i = 2} \biggl\{\frac{S_{\tau^\prime\tau}(t_i, \btheta_i) - S_{\tau^\prime\tau}(u_i, \btheta_i)}{S(t_i, \btheta_i) - S(u_i, \btheta_i)} \\ -\frac{\bigl(S_{\tau^\prime}(t_i, \btheta_i) - S_{\tau^\prime}(u_i, \btheta_i)\bigr)\bigl(S_\tau(t_i, \btheta_i) - S_{\tau}(u_i, \btheta_i)\bigr)} {\bigl(S(t_i, \btheta_i) - S(u_i, \btheta_i)\bigr)^2}\biggr\} \\ + \sum_{i=1}^n\biggl\{\frac{S_{\tau^\prime\tau}(s_i, \btheta_i)}{S(s_i, \btheta_i)} -\frac{S_{\tau^\prime}(s_i, \btheta_i) S_\tau(s_i, \btheta_i)} {S^2(s_i, \btheta_i)}\biggr\} \\ \quad (\tau, \tau^\prime) = (\gamma, \gamma), (\gamma, \alpha), (\alpha, \alpha). \end{multline*} The second order partial derivatives $h_{\tau\tau^\prime}$ and $S_{\tau\tau^\prime}$ are \begin{equation} \begin{split} h_{\tau\tau^\prime}(t, \btheta) &= \pe{\tau^\prime} h_\tau(t, \btheta) \\ &= g_{t\tau\tau^\prime}(t, \btheta) h_0(g(t, \btheta)) + g_{t\tau}(t, \btheta) g_{\tau^\prime}(t, \btheta) h_0^\prime(g(t, \btheta)) \\ &\quad + g_t(t, \btheta) g_\theta(t, \btheta)g_{\tau^\prime}(t, \btheta)h^{\bis}_0(g(t, \btheta)) \\ &\quad + g_t(t, \btheta) g_{\theta\theta^\prime}(t, \btheta)h^\prime_0(g(t, \btheta)) \\ &\quad + g_{t\tau^\prime}(t, \btheta) g_\theta(t, \btheta)h^\prime_0(g(t, \btheta)) \\ &= h_0(g(t, \btheta)) g_{t\tau\tau^\prime}(t, \btheta) \\ &\quad + h_0^\prime(g(t, \btheta)) \bigl\{g_t(t, \theta)g_{\theta\theta^\prime}(t, \btheta) \\ & \quad \quad \quad \quad \quad + g_{t\tau}(t, \btheta) g_{\tau^\prime}(t, \btheta) \\ &\quad \quad \quad \quad \quad + g_{t\tau^\prime}(t, \btheta) g_{\tau}(t, \btheta) \bigr\} \\ &\quad + h_0^\bis(g(t, \btheta))g_t(t, \btheta) g_\theta(t, \btheta)g_{\tau^\prime}(t, \btheta),, \\ & \quad (\tau, \tau^\prime) = (\gamma, \gamma), (\gamma, \lambda), (\lambda, \lambda), \end{split} \end{equation} and from \eqref{eq:survprim}, \begin{equation} \begin{split} S_{\tau\tau^\prime}(t, \btheta) &= \pe{\tau^\prime} S_\tau(t; \btheta) \\ &= -\bigl\{g_{\tau\tau^\prime}(t, \btheta)f_0\bigl(g(t, \btheta)\bigr) + g_\tau(t, \btheta) g_{\tau^\prime}(t, \btheta) f_0^\prime\bigl(g(t, \btheta)\bigr) \bigr\}, \\ & \quad (\tau, \tau^\prime) = (\gamma, \gamma), (\gamma, \lambda), (\lambda, \lambda). \end{split} \end{equation} \appendix \section{Some partial derivatives} The function (see \eqref{eq:gshsc}) \begin{equation} g(t, (\gamma, \alpha)) = \biggl(\frac{t}{\exp(\alpha)}\biggr)^{\exp(\gamma)}, \quad t \ge 0; \quad -\infty < \gamma, \alpha < \infty. \end{equation} has the following partial derivatives: \begin{align*} g_t\bigl(t, (\gamma, \alpha)\bigr) &= \frac{\exp(\gamma)}{t}g\bigl(t, (\gamma, \alpha)\bigr), \quad t > 0 \\ g_\gamma\bigl(t, (\gamma, \alpha)\bigr) &= g\bigl(t, (\gamma, \alpha)\bigr) \log\bigl\{g\bigl(t, (\gamma, \alpha)\bigr)\bigr\} \\ g_\alpha\bigl(t, (\gamma, \alpha)\bigr) &= -\exp(\gamma)g\bigl(t, (\gamma, \alpha)\bigr) \end{align*} \begin{align*} g_{t\gamma}\bigl(t, (\gamma, \alpha)\bigr) &= g_t\bigl(t, (\gamma, \alpha)\bigr) + \frac{\exp(\gamma)}{t} g_\gamma\bigl(t, (\gamma, \alpha)\bigr), \quad t > 0\\ g_{t\alpha}\bigl(t, (\gamma, \alpha)\bigr) &= -\exp(\gamma) g_t\bigl(t, (\gamma, \alpha)\bigr), \quad t > 0 \\ g_{\gamma^2}\bigl(t, (\gamma, \alpha)\bigr) &= g_\gamma\bigl(t, (\gamma, \alpha)\bigr)\bigl\{1 + \log g\bigl(t, (\gamma, \alpha)\bigr)\bigr\}\\ g_{\gamma\alpha}\bigl(t, (\gamma, \alpha)\bigr) &= g_\alpha\bigl(t, (\gamma, \alpha)\bigr)\bigl\{1 + \log g\bigl(t, (\gamma, \alpha)\bigr)\bigr\}\\ g_{\alpha^2}\bigl(t, (\gamma, \alpha)\bigr) &= -\exp(\gamma)g_\alpha\bigl(t, (\gamma, \alpha)\bigr) \end{align*} \begin{align*} g_{t\gamma^2}\bigl(t, (\gamma, \alpha)\bigr) &= g_{t\gamma}\bigl(t,(\gamma, \alpha)\bigr) \\ &\quad + \frac{\exp(\gamma)}{t} g_\gamma\bigl(t, (\gamma, \alpha)\bigr) \bigl\{2 + \log g\bigl(t, (\gamma, \alpha)\bigr)\bigr\} \\ g_{t\gamma\alpha}\bigl(t, (\gamma, \alpha)\bigr) &= -\exp(\gamma)\bigl\{ g_t\bigr(t, (\gamma, \alpha)\bigr) + g_{t\gamma}\bigr(t, (\gamma, \alpha)\bigr) \bigr\} \\ g_{t\alpha^2}\bigl(t, (\gamma, \alpha)\bigr) &= -\exp(\gamma) g_{t\alpha}\bigl(t, (\gamma, \alpha)\bigr) \end{align*} The formulas will be easier to read if we remove all function arguments, i.e., $(t, (\gamma, \alpha))$: \begin{align*} g_t &= \frac{\exp(\gamma)}{t}g, \quad t > 0 \\ g_\gamma &= g \log g \\ g_\alpha &= -\exp(\gamma)g \\ g_{t\gamma} &= g_t + \frac{\exp(\gamma)}{t} g_\gamma, \quad t > 0\\ g_{t\alpha} &= -\exp(\gamma) g_t, \quad t > 0 \\ g_{\gamma^2} &= g_\gamma\bigl\{1 + \log g\bigr\}\\ g_{\gamma\alpha} &= g_\alpha\bigl\{1 + \log g\bigr\}\\ g_{\alpha^2} &= -\exp(\gamma)g_\alpha \\ g_{t\gamma^2} &= g_{t\gamma} + \frac{\exp(\gamma)}{t} g_\gamma \bigl\{2 + \log g\bigr\}, \quad t > 0 \\ g_{t\gamma\alpha} &= -\exp(\gamma)\bigl\{g_t + g_{t\gamma}\bigr\}, \quad t > 0 \\ g_{t\alpha^2} &= -\exp(\gamma) g_{t\alpha}, \quad t > 0 \end{align*} \end{document}