% -*- mode: noweb; noweb-default-code-mode: R-mode; -*-
\documentclass[a4paper]{article}

% \VignetteIndexEntry{Comparative analysis of phylogenetics and evolution in R} 
% \VignetteDepends{ape} 
% \VignetteKeyword{stats} 


\usepackage{geometry}
\usepackage[utf8]{inputenc} % for fancy quotes
\usepackage[round]{natbib}
\usepackage{makeidx} 
\usepackage[font=small,format=plain,labelfont=bf,up,textfont=rm,up]{caption}
\usepackage{array}
\usepackage[svgnames]{xcolor}
\usepackage{colortbl} %% for markup of V matrices

\geometry{a4paper, textwidth=15cm, textheight=25cm}
\makeindex

% \SweaveOpts{keep.source=TRUE, echo=TRUE}

%%
%% Problems with comments in R code - want to keep them
%% but SweaveOpts keep.source=TRUE still ditches leading
%% and trailing comments, so currently need to duplicate the
%% code block inside verbatim if you want comments. 
%% R 2.14 may solve this
%%

% common representation of package name
\newcommand{\caper}{\texttt{caper}}
\newcommand{\fnidx}[1]{\texttt{#1}\index{Functions!#1}}
\newcommand{\dtidx}[1]{\texttt{#1}\index{Data!#1}}
\newcommand{\pkgidx}[1]{\texttt{#1}\index{Packages!#1}}


\title{The \caper{} package: comparative analysis of phylogenetics and evolution in R}
\author{David Orme}


\begin{document}

\maketitle

<<setup, print=FALSE, echo=FALSE>>=
	library(caper)
	## whilst code is being tested, load most recent versions from the pkg repository
	for(f in dir('../../R', full=TRUE)) (source(f))
@

This vignette documents the use of the \caper{} package for R
\citep{R-Development-Core-Team.2011.a} in carrying out a range of comparative
analysis methods for phylogenetic data. The \caper{} package, and the code in
this vignette, requires the \texttt{ape} package
\citep{Paradis.Claude.ea.2004.a} along with the packages \texttt{mvtnorm} and
\texttt{MASS}.

%% compressed ToC slightly to avoid spilling over page
\linespread{0.95}
\tableofcontents
\linespread{1}

%% add more space between paragraphs at this point
\addtolength{\parskip}{\baselineskip} \section{Background} Comparing the traits
of species (or of groups of species of higher taxonomic rank) can produce deep
insights into evolutionary processes. However, all such analyses should take
into account the degree to which species are related and hence do not provide
independent data on a hypothesis. This can lead both to situations in which
apparently strong relationships rest on relatively few truly independent events
(Fig. \ref{whycaper}a) or where strong relationships within groups are masked by
phylogenetic differences between groups (Fig. \ref{whycaper}b). 

\begin{figure}[htbp]
  \begin{center}
<<whycaper, echo= false, fig=true, width=6, height=6>>=
par(mfrow=c(2,2), mar=c(3,3,1,1), mgp=c(2,0.7,0))

plot(c(0,13),c(0,10), typ="n", bty="n", xaxt="n",yaxt="n", xlab="", ylab="", xaxs="i",yaxs="i")
lines(c(0,7), c(5,8.5))
lines(c(0,7), c(5,1.5))
lines(c(3.5,7), c(3.25,5))
polygon(c(7,10,10), y=c(8.5,10,7))
polygon(c(7,10,10), y=c(5,6.5,3.5))
polygon(c(7,10,10), y=c(1.5,3,0))


points( c(0.5,1.5) +0.5, c(5,5), cex=1, col=1:2)
points(c(7.5,8.5)+0.5, c(8.5,8.5), cex=1, col=1:2)
points(c(4,5)+0.5, c(3.25,3.25), cex=1.5, col=1:2)
points(c(7.5,8.5)+0.5, c(5,5), cex=1.5, col=1:2)
points(c(7.5,8.5)+0.5, c(1.5,1.5), cex=2, col=1:2)

rect(11 - c(.3,.2,.1), c(0,3.5,7), 11 + c(.3,.2,.1) , c(3,6.5,10))
rect(12 - c(.3,.2,.1), c(0,3.5,7), 12 + c(.3,.2,.1) , c(3,6.5,10), border="red")
par(usr=c(0,1,0,1)); text(0.1,0.9, cex=2,"a")

dat <- data.frame(x = rnorm(120, mean= rep(2:4, each=40), sd=0.5),
 				  y = rnorm(120, mean= rep(2:4, each=40), sd=0.5),
				  tx = gl(3,40))

plot(y~x, col=unclass(tx), data=dat, xaxt="n", xlab="", ylab="")
axis(1, col.axis="red")
abline(lm(y~x, data=dat))
for(sub in split(dat, dat$tx)) abline(lm(y~x,  data=sub), col=unclass(sub$tx)[1], lty=2) 


plot(c(0,13),c(0,10), typ="n", bty="n", xaxt="n",yaxt="n", xlab="", ylab="", xaxs="i",yaxs="i")
lines(c(0,7), c(5,8.5))
lines(c(0,7), c(5,1.5))
lines(c(3.5,7), c(3.25,5))
polygon(c(7,10,10), y=c(8.5,10,7))
polygon(c(7,10,10), y=c(5,6.5,3.5))
polygon(c(7,10,10), y=c(1.5,3,0))

points(c(0.5,1.5)+0.5, c(5,5), cex=1, col=1:2)
points(c(7.5,8.5)+0.5, c(8.5,8.5), cex=1, col=1:2)
points(c(4,5)+0.5, c(3.25,3.25), cex=c(1,1.5), col=1:2)
points(c(7.5,8.5)+0.5, c(5,5), cex=c(1,1.5), col=1:2)
points(c(7.5,8.5)+0.5, c(1.5,1.5), cex=c(1,2), col=1:2)

polygon(11 + c(-0,-0.1,0.1,0), c(3,0,0,3))
polygon(12 + c(-0.2,-0.3,0.3,0.2), c(3,0,0,3), border="red")
polygon(11 + c(-0,-0.1,0.1,0), c(6.5,3.5,3.5,6.5))
polygon(12 + c(-0.1,-0.2,0.2,0.1), c(6.5,3.5,3.5,6.5), border="red")
polygon(11 + c(-0,-0.1,0.1,0), c(10,7.5,7.5,10))
polygon(12 + c(-0.0,-0.1,0.1,0.), c(10,7.5,7.5,10), border="red")
par(usr=c(0,1,0,1)); text(0.1,0.9, cex=2, "b")


dat <- data.frame(x = rnorm(120, mean= rep(2:4, each=40), sd=0.5),
				  tx = gl(3,40))
dat$y <- dat$x - rnorm(120, mean= rep(0:2, each=40), sd=0.5)

plot(y~x, col=unclass(tx), data=dat, xaxt="n", xlab="", ylab="")
axis(1, col.axis="red")
abline(lm(y~x, data=dat))
for(sub in split(dat, dat$tx)) abline(lm(y~x,  data=sub), col=unclass(sub$tx)[1], lty=2) 

@ 
    \caption{Phylogenetic autocorrelation in action. a) Simple regression (solid
    line) suggests a strong relationship between two variables; the phylogeny
    shows that within the three main groups, there is no consistent relationship
    (dashed lines) and the apparent relationship stems from only two linked
    shifts in the means of the traits early in the evolution of the clade. b)
    Simple regression (solid line) suggests a weak positive relationship between
    the two variables; the phylogeny shows that there are strong positive
    relationships between the traits within each of the three main groups but
    this is masked by early shifts in the mean value of the red trait.}
    \label{whycaper}
  \end{center}
\end{figure}

The \caper{} package implements two methods that account for this phylogenetic
autocorrelation. The first, originally described by \citet{Felsenstein.1985.a},
is to recognize that the differences between taxa on either side of a
bifurcating node represent independent evolutionary trajectories and that these
differences (`independent contrasts') can be used to test hypotheses in a way
that accounts for the phylogenetic autocorrelation between the taxa.
\citet{Pagel.1992.a} extended this method to permit contrasts to be calculated
at polytomies. The second, described by [refs], is to include the phylogenetic
structure as a covariance matrix in a linear model. 

The \caper{} package also provides a number of other phylogenetic comparative
methods, along with a flexible method for simulating trait evolution on
phylogenies.

%% COMPARATIVE DATASETS

\section{Comparative datasets}

\subsection{The \fnidx{comparative.data} class and objects.}

The majority of functions in \caper{} require both a phylogeny and data set of
traits for the taxa at the tips of that phylogeny. Crucially, the rows of data
need to be matched carefully to the tips to ensure that the phylogenetic
structure in the variables is represented correctly: \caper{} provides the
function \fnidx{comparative.data} to facilitate this. The following simple
example shows the basic operation:

<<comparative.data>>=
phy <- read.tree(text='(((B:2,A:2):1,D:3):1,(C:1,E:1):3);')
dat <- data.frame(
  taxa=c("A","B","C","D","E"), 
  n.species=c(5,9,12,1,13), mass=c(4.1,4.5,5.9,3.0,6.0)
)
cdat <- comparative.data(data=dat, phy=phy, names.col="taxa")

print(cdat)
@

In order to make it easier to use contrast methods, where values are estimated
at internal nodes, the \fnidx{comparative.data} function and contrast methods
use internal node labels to identify values. Creating a comparative data object
will not replace existing labels but will insert simple numeric labels for
unlabelled nodes. The \fnidx{comparative.data} class has a number of generic
methods that allow users to extract a subset of taxa and the relevant linked
data from within a \fnidx{comparative.data} object.

\subsubsection{\texttt{na.omit}\index{Functions!comparative.data!na.omit}}

This method returns a subset of the object containing only those taxa for which
the data are complete (i.e no \texttt{NA} values). By default, the
\fnidx{comparative.data} function drops incomplete rows, but this can be
altered. The examples below show the function in use on the
\dtidx{perissodactyla} dataset.

<<na.omit>>=
data(perissodactyla)
(perisso <- comparative.data(perissodactyla.tree, perissodactyla.data, Binomial))
# but this can be turned off
(perissoFull <- comparative.data(perissodactyla.tree, perissodactyla.data, Binomial, na.omit=FALSE))
na.omit(perisso)
@

The \texttt{scope} argument uses a model formula to indicate which variables to
consider when reducing a comparative data object to complete rows and can be
used directly by \texttt{na.omit}\index{Functions!comparative.data!na.omit} or
indirectly through \fnidx{comparative.data}.

<<na.omit2>>=
comparative.data(perissodactyla.tree, perissodactyla.data, Binomial, scope= log.female.wt ~ log.gestation.length)
na.omit(perissoFull, scope=log.female.wt ~ log.gestation.length + Territoriality)
@

\subsubsection{\texttt{subset}\index{Functions!comparative.data!subset}}

 This method functions like the \texttt{subset} function for data frames: the
 \texttt{subset} argument is used to specify a logical subset of rows using one
 or more of the data columns; the \texttt{select} argument can be used to choose
 which variables to include in the subset.

<<subset>>=
subset(perissoFull, subset=! is.na(Territoriality))
subset(perissoFull, subset=log.female.wt > 5.5, select=c(log.female.wt, log.gestation.length))
@

\subsubsection{\texttt{\char91}\index{Functions!comparative.data!\texttt{\char91}}}

 This method treats the dataset as the rows (taxa) and columns (variables) of a
 matrix. The first index allows taxa to be specified by name or by their
 position in the sequence of tip labels for the phylogeny. The second index
 selects variables in the same way as in the extract method
 (\texttt{\char91}.data.frame) for a data frame.

<<squarebracket>>=
horses <- grep('Equus', perissoFull$phy$tip.label)
perissoFull[horses,]
perissoFull[horses, 2:3]
@

Far more functionality for creating and manipulating comparative datasets is
provided by the \texttt{phylobase} package. It is intended to incorporate this
functionality into \caper{}.

%%
%% EXAMPLE DATASETS
%%

\subsection{Example datasets}

The examples in this vignette make use of the following datasets:

\begin{description}
  \item[\dtidx{shorebird}] This dataset includes a phylogeny of 71 species of
  shorebirds along with data on male and female body mass, egg mass, clutch size
  and mating system \citep{Lislevand.Thomas.2006.a}.
  
  \item[\dtidx{syrphidae}] This dataset includes a phylogeny of 204 genera of
  hoverfly along with data on the species richness of each of those genera
  \citep{Katzourakis.Purvis.ea.2001.a}.
  
  \item[\dtidx{perissodactyla}] This dataset includes a phylogeny of 18
  perissodactyl species and a data frame of female and neonatal mass, gestation
  length and territoriality for 13 of those species. The data frame contains
  missing data and the dataset was provided as an example with the original CAIC
  program  \citep{Purvis.Rambaut.1995.a}.
  
  \item[\dtidx{IsaacEtAl}] The datafile contains  species level phylogenies and
  data sets of nine variables for each of four mammalian orders (Primates,
  Carnivora, Chiroptera and Marsupialia). The data were published in
  supplementary material for \citet{Isaac.Jones.ea.2005.a}. The variables are
  body mass, age at sexual maturity, gestation length, interbirth interval,
  litter size , population density, group size, mass dimorphism and
  length.dimorphism. The data sets all contain missing data and all the
  variables have been natural log transformed.
  
  \item[\dtidx{British Birds}] The datafile contains a species level phylogeny
  for 249 species of British birds (some represented by congeneric surrogates)
  and a data set on the conservation status of 181 of those species
  \citep{Thomas.2008.a}.
\end{description}

%%%%%%%%%%
%% METHODS
%%%%%%%%%%

\section{Methods and functions provided by \caper{}.}

%%%%%%%%%%
%% METHODS
%%%%%%%%%%

The \caper{} package provides two broad sets of methods of accounting for
phylogenetic autocorrelation in analyses. One is the use of phylogenetic
independent contrasts \citep{Felsenstein.1985.a}, the other is the use of a
phylogenetic variance-covariance matrix within a standard linear model
\citep{Freckleton.Harvey.ea.2002.a} [other refs]. These two approaches are
basically equivalent, although small differences may arise from the way in which
polytomies are handled.

<<pglsSetup, echo=FALSE>>=
## demo tree
z <- structure(list(edge = structure(c(11L, 15L, 18L, 18L, 15L, 16L, 
16L, 11L, 12L, 13L, 13L, 19L, 19L, 12L, 14L, 14L, 17L, 17L, 15L, 
18L, 1L, 2L, 16L, 3L, 4L, 12L, 13L, 5L, 19L, 6L, 7L, 14L, 8L, 
17L, 9L, 10L), .Dim = c(18L, 2L)), edge.length = c(5.723, 2.186, 
1.09, 1.09, 0.627, 2.649, 2.649, 1.049, 1.411, 6.54, 5.538, 1.001, 
1.001, 1.863, 6.088, 4.777, 1.312, 1.312), tip.label = c("t8", 
"t6", "t4", "t10", "t9", "t5", "t7", "t1", "t3", "t2"), Nnode = 9L), .Names = c("edge", 
"edge.length", "tip.label", "Nnode"), class = "phylo", order = "cladewise")
V <- VCV.array(z)
@

\subsubsection{Phylogenetic linear models}

In a basic linear regression, the response variable ($y$) is modelled as the
product of the estimated coefficients ($\beta$) and the explanatory variables
($\mathbf{X}$), plus residual variation ($\epsilon$): $y = \beta\mathbf{X} +
\epsilon$. If the cases in the model are related taxa, then the values in $y$
and $\mathbf{X}$ are no longer independent: each value will be more similar to
some values (closer relatives) than others (distant relatives). 

The \fnidx{pgls} function addresses this problem by incorporating the covariance
between taxa into the calculation of estimated coefficients: this is a
generalized least squares (GLS) model. The covariance matrix ($\mathbf{V}$),
showing the expected covariance between each pair of tips is calculated using
the branch lengths of a phylogeny showing how the taxa are related (Fig.
\ref{vcvFig}). 

\begin{figure}[htbp]
  \begin{center}
	%\framebox{  
	\begin{minipage}[c]{0.3\linewidth}
	\raggedleft

<<vcvFig, fig=TRUE, echo=FALSE, print=FALSE, height=2.13, width=1.5>>=
ec <- rep('grey50', 18)
ec[c(1,5,7)] <- 'red'
ec[c(8,14)] <- 'blue'
plot(z, no.margin=TRUE, cex=0.6, label.offset=0.3, edge.col=ec)
@

	\end{minipage}%}
	%\hfill
	%\framebox{
	\begin{minipage}[c]{0.5\linewidth}
	\newcolumntype{P}{>{\centering\arraybackslash$}m{4mm}<{$}}
	\newcommand{\bl}{\cellcolor{PowderBlue}}
	\newcommand{\pk}{\cellcolor{LightPink}}
	\renewcommand\arraystretch{1.2}
	{%\small              
	$\left[
	\begin{array}{*{10}{P}}
	%% V <- VCV.array(z)
	%% V <- V[10:1,10:1]
	%% xV <- xtable(unclass(V), digits=1)
	%% print(xV, only.contents=TRUE, include.rownames=FALSE, 
	%%       hline.after=NULL, include.colnames=FALSE, digits=1)
	% latex table generated in R 2.12.0 by xtable 1.5-6 package
	% Sat Apr 30 12:14:40 2011
	\mathbf{9.0} & 7.7 & \bl 2.9 & 1.0 & 1.0 & 1.0 & 0.0 & 0.0 & 0.0 & 0.0 \\ 
	7.7 & \mathbf{9.0} & \bl 2.9 & 1.0 & 1.0 & 1.0 & 0.0 & 0.0 & 0.0 & 0.0 \\ 
	\bl 2.9 & \bl 2.9 & \mathbf{9.0} & 1.0 & 1.0 & 1.0 & 0.0 & 0.0 & 0.0 & 0.0 \\ 
	1.0 & 1.0 & 1.0 & \mathbf{9.0} & 8.0 & 2.5 & 0.0 & 0.0 & 0.0 & 0.0 \\ 
	1.0 & 1.0 & 1.0 & 8.0 & \mathbf{9.0} & 2.5 & 0.0 & 0.0 & 0.0 & 0.0 \\ 
	1.0 & 1.0 & 1.0 & 2.5 & 2.5 & \mathbf{9.0} & 0.0 & 0.0 & 0.0 & 0.0 \\ 
	0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & \pk \mathbf{9.0} & 6.3 & 5.7 & 5.7 \\ 
	0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 6.3 & \mathbf{9.0} & 5.7 & 5.7 \\ 
	0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 5.7 & 5.7 & \mathbf{9.0} & 7.9 \\ 
	0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 0.0 & 5.7 & 5.7 & 7.9 & \mathbf{9.0} \\ 
	\end{array}
	\hspace{1mm}\right]$}
	\end{minipage}
    \caption{A phylogenetic tree of 10 taxa and the variance covariance matrix
    ($\mathbf{V}$) of that phylogeny. The diagonal of the matrix (bold values)
    shows the path length from each tip to the root (example in red). Off
    diagonal values show the shared path length for a given pair of tips
    (example in blue).} \label{vcvFig}
  \end{center}
\end{figure}

The initial covariance matrix ($\mathbf{V}$) taken from an ultrametric phylogeny
assumes a Brownian model of evolution: that variation between tips accumulates
along all branches of the tree at a rate proportional to the length of the
branches. However, not all variables necessarily correspond to this assumption
and likelihood methods can be used to find transformations of $\mathbf{V}$ that
improve the fit of the model to the data. Three transformations are implemented
in the \fnidx{pgls} function (Fig. \ref{bltrans}):

\begin{description}

  \item[lambda ($\lambda$)]{The internal branch lengths of the phylogeny are
  multiplied by a constant. When $\lambda = 0$, internal branch lengths are all
  zero and all the variation in the data is modelled as a function of the
  independent evolution along the branches leading to the tips. If $\lambda >
  1$, then the data shows more covariance than expected under a Brownian model.

  Note that a \fnidx{pgls} model with a lambda of 0 will not be the same as a
  standard linear model if the tree is not ultrametric: although the covariance
  terms will all be zero, the variances of the tips will differ.}

   \item[delta ($\delta$)]{In this case, all the values in the $\mathbf{V}$
   matrix are raised to the power of $\delta$. This is a transformation of the
   sum of the length of the shared branches between two tips {$l$}:
   $\sum{\left(l\right)}^\delta$.}

   \item[kappa ($\kappa$)]{All branch lengths in the phylogeny are raised to the
   power $\kappa$ --- the elements of the $\mathbf{V}$ are the sum of the
   individually transformed branch lengths $\sum{\left(l^\kappa\right)}$. As
   $\kappa$ decrease towards zero, all branches will become of equal length,
   representing an evolutionary model where variation accumulates only when two
   clades diverge.}
\end{description}

\begin{figure}[htbp]
  \begin{center}
<<bltrans, fig=TRUE, echo=FALSE, print=FALSE>>=
## internal branch lengths
internal <- z$edge[,2] > 10
vlines <- 9 * c(0, 0.5, 1, 1.5)
zz <- z

# layout
layout(matrix(c(1:9), ncol=3))
par(mar=c(0,0.5,2,0.5))
cx <- 0.8
ln <- 0.5
xl <- c(0,15)

# lambda
pvals <- c(1.4, 0.5)
ec <- ifelse(internal,'red', 'grey30')

## zz$edge.length <- ifelse(internal, z$edge.length*pvals[1], z$edge.length)
## plot(zz, edge.col=ec, x.lim=xl, label.offset=0.2)
## mtext(bquote(lambda == .(pvals[1])), side=3, cex=cx, line=ln)
## abline(v=vlines, lty=2, col='grey')
## 
## plot(z,  edge.col=ec, x.lim=xl, label.offset=0.2)
## abline(v=vlines, lty=2, col='grey')
## mtext(expression(lambda == 1), side=3, cex=cx, line=ln)
## 
## zz$edge.length <- ifelse(internal, z$edge.length*pvals[2], z$edge.length)
## plot(zz,  edge.col=ec, x.lim=xl, label.offset=0.2)
## mtext(bquote(lambda == .(pvals[2])), side=3, cex=cx, line=ln)
## abline(v=vlines, lty=2, col='grey')

# get the branching structure back from the matrix
dd <- ifelse(lower.tri(V) | upper.tri(V), V * pvals[1], V)
ddh <- as.phylo(hclust(dist(dd)))
plot(ddh, edge.col = 'red', x.lim=xl, label.offset=0.2)
mtext(bquote(lambda == .(pvals[1])), side=3, cex=cx, line=ln)
abline(v=vlines, lty=2, col='grey')

plot(z,  edge.col=ec, x.lim=xl, label.offset=0.2)
abline(v=vlines, lty=2, col='grey')
mtext(expression(lambda == 1), side=3, cex=cx, line=ln)

dd <- ifelse(lower.tri(V) | upper.tri(V), V * pvals[2], V)
ddh <- as.phylo(hclust(dist(dd)))
plot(ddh, edge.col = 'red', x.lim=xl, label.offset=0.2)
mtext(bquote(lambda == .(pvals[2])), side=3, cex=cx, line=ln)
abline(v=vlines, lty=2, col='grey')


# delta: 
pvals <- c(1.2, 0.5)

# get the branching structure back from the matrix
dd <- V ^ pvals[1]
ddh <- as.phylo(hclust(dist(dd)))
plot(ddh, edge.col = 'red', x.lim=xl, label.offset=0.2)
mtext(bquote(delta == .(pvals[1])), side=3, cex=cx, line=ln)
abline(v=vlines, lty=2, col='grey')

plot(z,  edge.col='red', x.lim=xl, label.offset=0.2)
abline(v=vlines, lty=2, col='grey')
mtext(expression(delta == 1), side=3, cex=cx, line=ln)

dd <- V ^ pvals[2]
ddh <- as.phylo(hclust(dist(dd)))
plot(ddh, edge.col='red', x.lim=xl, label.offset=0.2)
mtext(bquote(delta == .(pvals[2])),  cex=cx, line=ln)
abline(v=vlines, lty=2, col='grey')

# kappa
pvals <- c(1.2, 0.0)
ec <- ifelse(internal,'red', 'grey30')

zz$edge.length <- z$edge.length^pvals[1]
plot(zz, edge.col = 'red', x.lim=xl, label.offset=0.2)
mtext(bquote(kappa == .(pvals[1])), side=3, cex=cx, line=ln)
abline(v=vlines, lty=2, col='grey')

plot(z,  edge.col='red', x.lim=xl, label.offset=0.2)
abline(v=vlines, lty=2, col='grey')
mtext(expression(kappa == 1), side=3, cex=cx, line=ln)

zz$edge.length <- z$edge.length^pvals[2]
plot(zz, edge.col='red', x.lim=xl, label.offset=0.2)
mtext(bquote(kappa == .(pvals[1])),  cex=cx, line=ln)
abline(v=vlines, lty=2, col='grey')

@
    \caption{Examples of $\lambda$,  $\delta$ and  $\kappa$ branch length transformations. The branches affected by a given transformation are shown in red.}
    \label{bltrans}
  \end{center}
\end{figure}

\subsubsection{Fitting phylogenetic GLS models: \fnidx{pgls}}

The \fnidx{pgls} function implements GLS models accounting for phylogeny. It also includes the three branch length transformations ($\lambda, \kappa, \delta$) described above. The values of these  may be fixed at values provided by the user or optimised within bounds to find the maximum likelihood transformation. Multiple transformations can be optimised simultaneously, although the underlying evolutionary model may be hard to interpret.

The function uses a formula interface to describe the model and uses data provided as a comparative data object. Because the model fitting uses a covariance matrix, which can be computationally expensive to extract from a phylogeny, it is useful to include this when creating the comparative data object at the start of an analysis using \texttt{vcv=TRUE}. The resulting \texttt{pgls} model object has standard \texttt{print}\index{Functions!pgls!print} and \texttt{summary}\index{Functions!pgls!summary} methods.

<<basicPGLS>>=
data(shorebird)
shorebird <- comparative.data(shorebird.tree, shorebird.data, Species, vcv=TRUE)
mod <- pgls(log(Egg.Mass) ~ log(M.Mass), shorebird)
print(mod)
summary(mod)
@

\subsection{Optimising branch length transformations: \fnidx{profile.pgls}.}

The \fnidx{pgls} function has three arguments to specify parameter values for the $\lambda$, $\kappa$ and $\delta$ transformations. However, the value for these parameters can alternatively be specified as \texttt{"ML"}, in which case the maximum likelihood value for each ML parameter will be found within set bounds. The default bounds ($\lambda = [1\times10^{-6},1], \kappa = [1\times10^{-6},3], \delta = [1\times10^{-6},3]$) may be altered but cannot be negative. Optimising multiple parameters can lead to computational problems with lower bounds very close to zero, so it may be necessary to increase the lower bound slightly.

Because calculating likelihood under a $\kappa$ transformation requires a transformation of the individual branch lengths, it is not possible with a simple $\mathbf{V}$ matrix. This is because $\mathbf{V}$ only holds the sums of those branch lengths. Any variation of $\kappa$, whether for optimization or for profiling likelihood values, therefore needs a 3 dimensional $\mathbf{V}$ array, in which the branch lengths are held separately. The \texttt{vcv.dim} argument to the \fnidx{comparative.data} function creates this.

By default, \fnidx{pgls} will use likelihood ratio tests to establish confidence bounds on optimised parameters. The likelihood value at the ML estimate is tested against the values at the parameter bounds and confidence intervals of the ML estimate are also calculated. The \fnidx{profile.pgls} function allows the likelihood surface of a parameter for a model to be displayed. If the ML estimate of the parameter is known then then the profile will show this along with confidence limits (Fig. \ref{lambdaOptPlot}). The likelihood profile is taken for a single parameter at a time, with the other two parameters held at their fixed or optimum values.

<<lambdaOpt>>=
data(shorebird)
shorebird <- comparative.data(shorebird.tree, shorebird.data, Species, vcv=TRUE, vcv.dim=3)
mod <- pgls(log(Egg.Mass) ~ log(M.Mass), shorebird, lambda='ML')
summary(mod)
mod.l <- pgls.profile(mod, 'lambda')
mod.d <- pgls.profile(mod, 'delta')
mod.k <- pgls.profile(mod, 'kappa')
plot(mod.l); plot(mod.d); plot(mod.k)
@

\begin{figure}[htbp]
  \begin{center}
<<lambdaOptPlot, fig=TRUE, echo=FALSE, print=FALSE, height=3, width=8>>=
par(mar=c(4.5,3,1,1), mfrow=c(1,3), mgp=c(1.8,0.8,0), tcl=-0.2)
plot(mod.l); plot(mod.d); plot(mod.k)
@
    \caption[Likelihood surfaces]{Likelihood surfaces of \ensuremath{\lambda},
    \ensuremath{\kappa} and \ensuremath{\delta} for a simple model
    (\ensuremath{\log \textrm{Egg.Mass} ~ \log \textrm{M.Mass}}) 
    on the \dtidx{shorebird} dataset.
    The ML estimate has only been fitted for \ensuremath{\lambda} and so this is
    the only plot that shows confidence limits.
    } 
    \label{lambdaOptPlot}
  \end{center}
\end{figure}

\subsubsection{Criticism and simplification of\fnidx{pgls} models: \texttt{plot}\index{Functions!pgls!plot}, \texttt{anova}\index{Functions!pgls!anova} and \texttt{AIC}\index{Functions!pgls!AIC}.}

The \caper{} package provides standard generic methods for examining \texttt{pgls} models. The first is the \texttt{plot}\index{Functions!pgls!plot} methods, which produces four diagnostic plots. The first two show the fit of the residuals to a normal distribution: a density plot of the distribution of the residuals and a normal Q-Q plot the distribution of the residuals against their expected distribution under a normal distribution. The second two show the fitted values against both the residuals and the observed value to look for pattern in the residuals within the model. Figure \ref{pglsPlotCmd} shows these plots for the model above using the command \texttt{plot(mod)}.

\begin{figure}[htbp]
  \begin{center}
<<pglsPlotCmd, fig=TRUE, echo=FALSE, print=FALSE, height=6, width=6>>=
par(mfrow=c(2,2), mar=c(3,3,1,1), mgp=c(2,0.7,0), tcl=-0.2)
plot(mod)
@    
\caption[PGLS model diagnostics]{Model diagnostic plots for a simple model (\texttt{log(Egg.Mass) \~ log(M.Mass)}) on the \dtidx{shorebird} dataset.}
    \label{pglsPlotCmd}
  \end{center}
\end{figure}

The package also provides \texttt{anova}\index{Functions!pgls!anova} and \texttt{logLik}\index{Functions!pgls!logLik} methods that allow model comparison. The \texttt{logLik}\index{Functions!pgls!logLik} method allows the generic \texttt{AIC}\index{Functions!pgls!AIC} methods to be used with \fnidx{pgls} models. Analysis of variance tables for a \fnidx{pgls} model uses sequential sums of squares: each F value is the marginal improvement in the model given the combined effects of the terms above. The \texttt{anova}\index{Functions!pgls!anova} command can also be used to compare a set of models.

<<pglsCrit>>=
mod1 <- pgls(log(Egg.Mass) ~ log(M.Mass) * log(F.Mass), shorebird) 
anova(mod1)

mod2 <- pgls(log(Egg.Mass) ~ log(M.Mass) + log(F.Mass), shorebird)  
mod3 <- pgls(log(Egg.Mass) ~ log(M.Mass) , shorebird)
mod4 <- pgls(log(Egg.Mass) ~ 1, shorebird)

anova(mod1, mod2, mod3, mod4)
AIC(mod1, mod2, mod3, mod4)
@

%%%%%%%%%%
%% CONTRAST METHODS
%%%%%%%%%%

\subsection{Phylogenetic independent contrasts}

The \caper{} package implements the methods originally provided in the  programs CAIC \citep{Purvis.Rambaut.1995.a} and MacroCAIC \citep{Agapow.Isaac.2002.a}. Both programs calculate phylogenetically independent contrasts in a set of variables and then use linear models of those contrasts to test for evolutionary relationships. All of the functions in this section require a comparative dataset object and a description of a linear model as an R formula (see \texttt{formula()}).

In contrast model functions, a reference variable (\texttt{ref.var}) may also be provided. This is used to standardize the directions in which contrasts are calculated in multivariate models. If no reference variable is provided, the function defaults to using the first explanatory variable specified in the model formula. All contrast model functions enforce regression through the origin \citep{Garland.Harvey.ea.1992.a}.

\subsubsection{Variable names in contrast functions}
The three functions below (\fnidx{crunch}, \fnidx{brunch}, \fnidx{macrocaic}) all make use of existing methods within R to generate a model matrix. In normal linear models, the terms in the model formula allow transformations to be specified within models: a model can use \texttt{y ~ log(x)} and subsequent modifications of the model will expect to be able to use a data frame containing \texttt{x} that will be logged before use. These contrast  functions do support this in initial model fitting. However modifications cannot be made so simply to contrast models: the variable \texttt{log(x)} now represents \emph{contrasts} in \texttt{log(x)}, and reference back to the original variable \texttt{x} will not yield the correct results. Generating consistent behaviour using the R modelling framework in these cases is not easy.

It is therefore strongly recommended that any transformation of data should be carried out in the original data frame and that these transformed variables are used in model specification.

%%%%%%%%%%
%% CRUNCH
%%%%%%%%%%

\subsubsection{Continuous variables: \fnidx{crunch}}

The \fnidx{crunch} function provides calculation of independent contrasts \citep{Felsenstein.1985.a} for continuous variables, as implemented in the `crunch' option of the CAIC package \citep{Purvis.Rambaut.1995.a}, using the method of \citet{Pagel.1992.a} to calculate contrasts at polytomies. The following example shows an example analysis using the \dtidx{shorebird} dataset.

%% EXPAND? - contrast calculation example from CAIC manual

<<crunchExample>>=
data(shorebird)

shorebird.data$lgEgg.Mass <- log(shorebird.data$Egg.Mass)
shorebird.data$lgM.Mass <- log(shorebird.data$M.Mass)
shorebird.data$lgF.Mass <- log(shorebird.data$F.Mass)

shorebird <- comparative.data(shorebird.tree, shorebird.data, Species)
crunchMod <- crunch(lgEgg.Mass ~ lgF.Mass + lgM.Mass, data=shorebird)
summary(crunchMod)
@

%%%%%%%%%%
%% BRUNCH
%%%%%%%%%%

\subsubsection{Categorical variables: \texttt{brunch}\index{Functions!brunch|seealso{crunch}}}
\label{brunchSection}
The inclusion of categorical variables in phylogenetic independent contrast models is problematic. Continuous variables are assumed to evolve under a Brownian process, which permits estimates of nodal values and hence allows nested contrasts to be drawn.  There is no sensible model for reconstructing the nodal values of a categorical variable, except where all daughters share the same categorical value. However, identifying nodes with daughter variables that differ in the value of the categorical variable permit independent contrasts to be identified \citep{Burt.1989.a}. While nodal estimates of continuous variables can be made below at nodes below a \fnidx{brunch} contrast, to do so would assume independent evolution of these variables at the nodes of any deeper contrasts \citep{Burt.1989.a}. As a result, the \fnidx{brunch} algorithm only uses the data from any single tip in the calculation of a single contrast. 

The example below shows \fnidx{brunch} contrasts for the \dtidx{perissodactyla} dataset calculated using territoriality. Figure \ref{brunchPlot} shows how these contrasts are extracted from the available data. In each case, a contrast is drawn between groups of species showing differing territorial behaviour and no species is used in more than one contrast.

<<brunchExample>>=
perisso <- comparative.data(perissodactyla.tree, perissodactyla.data, Binomial)
brunchMod <- brunch(log.female.wt ~ Territoriality, data=perisso)
caic.table(brunchMod)
@

\begin{figure}[htbp]
  \begin{center}
<<brunchPlot, fig=TRUE, echo=FALSE, print=FALSE, height=3, width=6>>=
bwd <- rep(1,15)
bwd[c(5,6,11,12,9,13,14,15)] <- 3
plot(perisso$phy,  'cladogram', use.edge.length=FALSE, label.offset=0.4, show.node=TRUE, cex=0.7, show.tip=FALSE, x.lim=c(0,12), no.margin=TRUE, edge.width=bwd)
points(rep(8.25,9), 1:9, pch=ifelse(perisso$data$Terr=='Yes', 1,19))
text(rep(8.5,9), 1:9, perisso$phy$tip.label, font=3, adj=0)
@
    \caption[Brunch contrast locations]{Location of \fnidx{brunch} contrasts for
    territoriality in the \dtidx{perissodactyla} dataset. Branches in bold show
    the tips contributing to each contrast. See also the output of
    \fnidx{caic.table} in the main text.} \label{brunchPlot}
  \end{center}
\end{figure}

%%%%%%%%%%
%% MACROCAIC
%%%%%%%%%%

\subsubsection{Species richness contrasts: \texttt{macrocaic}\index{Functions!macrocaic|seealso{crunch}}}

One common approach in macroevolution is to investigate the links between the traits of taxa and their species richness. It is inappropriate to calculate \fnidx{crunch} contrasts in species richness, particularly for nested nodes, where \fnidx{crunch} contrasts would estimate nodal values as a weighted average of species richness for the daughter taxa, rather than tracking total species richness. For this reason, \citet{Agapow.Isaac.2002.a} developed the program MacroCAIC, which implemented the calculation of species richness contrasts.

The MacroCAIC program, and the \fnidx{macrocaic} function, take a statistical model of species richness as a function of continuous explanatory variables and calculate \fnidx{crunch} contrasts in the explanatory variables and one of two recommended species richness contrasts as a response \citep{Isaac.Agapow.ea.2003.a}. These two contrasts are the relative rate difference (RRD: $ln(N_1/N_2)$) and the proportion dominance index (PDI: $(N_1/(N_1 + N_2)) - 0.5$), where $N_1$ and $N_2$ are the number of species in the daughter clades and $N_1$ is the richness of the clade with the higher value in the focal explanatory variable \citep{Agapow.Isaac.2002.a}.

<<macrocaicExample>>=
data(IsaacEtAl)
primates <- comparative.data(primates.tree, primates.data, binomial, na.omit=FALSE)
primatesBodySize <- macrocaic(species.rich ~ body.mass, data=primates)
summary(primatesBodySize)

@

%% %%%%%%%%%%
%% %% PICLM
%% %%%%%%%%%%
%% %% STILL THINKING WHETHER OR NOT TO INCLUDE THIS
%% \subsubsection{\fnidx{piclm}}
%% 


%% %%%%%%%%%%
%% %% PHYLO D
%% %%%%%%%%%%


\subsubsection{Phylogenetic signal: \fnidx{phylo.d}}

\citet{Fritz.Purvis.2010.a} proposed the statistic $D$ as a measure of phylogenetic signal in binary traits. The $D$ statistic makes use of the way in which estimated nodal values calculated lusing \fnidx{crunch} change along the edges of a phylogeny. If a trait is highly conserved, with only a basal division between two clades expressing either trait value, then the only change will be along the edges  between the two daughter clades at the root. This will give a summed value of 1: the two differences between the root nodal value of 0.5 and the ancestors of the 1 and 0 clades. In contrast, if the trait is labile, more differences will be observed and the sum will be higher.

The sum of these changes across a phylogeny will be governed by the phylogenetic signal in the variable, but also by the size of the phylogeny and by the relative proportions of the two states (prevalence). In order to standardise the effects of phylogeny size and prevalence, \fnidx{phylo.d} uses two simulated null models:

\begin{description}
\item[Phylogenetic randomness]{Trait values are randomly shuffled relative to the tips of the phylogeny.}
\item[Brownian threshold model]{A continuous trait is evolved along the phylogeny under a Brownian process and then converted to a binary trait using a threshold that reproduces the relative prevalence of the observed trait.}
\end{description}

The mean sum of changes under the Brownian ($\bar{s_b}$) and random ($\bar{s_r}$) models are used to calibrate the observed sum of changes ($s_o$) values: $D = (s_o - \bar{s_b})/(\bar{s_r}-\bar{s_b})$ (Fig \ref{phylodScale}). On this standardised scale, a variable with random association will have $D \approx 1$ since $s_o \approx \bar{s_r}$ and a variable following the Brownian model will have $D \approx 0$ since $s_o = \bar{s_b}$. Values of $D$ smaller than 0 are phylogenetically more conserved than under the Brownian model and values of $D$ greater than 1 are phylogenetically overdispersed. The simulated values can also be used to assess whether the observed $D$ differs significantly from the simulated models.

\begin{figure}[htbp]
  \begin{center}
<<phylodScale, fig=TRUE, echo=FALSE, print=FALSE, height=3, width=6>>=
data(BritishBirds)
BritishBirds <- comparative.data(BritishBirds.tree, BritishBirds.data, binomial)
redPD <- phylo.d(BritishBirds, binvar=Red_list)

par(mar=c(3,3,2,1), mgp=c(1.8,0.8,0), tcl=-0.3)

plot(density(redPD$Permutations$random, bw=0.5), main='', xlim=c(10,55), col='blue', xlab='Sum of character change')
lines(density(redPD$Permutations$brownian,bw=0.5), col='red')
abline(v=redPD$Parameters$Observed)
abline(v=redPD$Parameters$Observed, col='grey')
abline(v=redPD$Parameters$MeanRandom, col='blue')
abline(v=redPD$Parameters$MeanBrownian, col='red')

with(redPD$Parameters, axis(3, at=c(MeanBrownian, Observed, MeanRandom), labels=c(0, round(redPD$DEstimate,2),1), mgp=c(1.5,0.7,0.25)))
mtext(expression(italic(D)==phantom(x)), side=3, at=25, line=1.2)

@
    \caption[D value scalings]{Scaling of $D$ values for the observed sum of
    character change from the distributions of simulated sums of character
    change under models of random association (blue line) and a Brownian process
    (red line). The distributions of the simulations are used to test the
    significance of departures from either model.} \label{phylodScale}
  \end{center}
\end{figure}

The example below uses the \dtidx{BritishBirds} dataset to calculate $D$ for the phylogenetic distribution of `Red' conservation status for 181 species of British birds \citep{Fritz.Purvis.2010.a,Thomas.2008.a}.

<<phylodDemo>>=
data(BritishBirds)
BritishBirds <- comparative.data(BritishBirds.tree, BritishBirds.data, binomial)
redPhyloD <- phylo.d(BritishBirds, binvar=Red_list)
print(redPhyloD)
@

%% %%%%%%%%%%
%% %% USING CONTRASTS
%% %%%%%%%%%%


\subsection{Checking and comparing contrast models.}

The three contrast model functions (\fnidx{crunch}, \fnidx{brunch} and \fnidx{macrocaic}) all return an object of class \texttt{caic} for which a number of common functions and methods are provided, including  \texttt{print}\index{Functions!crunch!print} and \texttt{summary}\index{Functions!crunch!summary} methods. These are useful for assessing the robustness of contrast models. The simplest function is \fnidx{caic.table}, which returns a table of contrasts and nodal values that is very like the original contrast files provided by CAIC and MacroCAIC. This data frame (see the example in the \fnidx{brunch} section, page \pageref{brunchSection}) provides all the information needed to assess model suitability but some convenience functions are provided for standard checks. 

\subsubsection{Testing evolutionary assumptions: \fnidx{caic.diagnostics}.}

This function implements three diagnostic regression tests for the robustness of a contrast model \citep{Garland.Harvey.ea.1992.a,Purvis.Rambaut.1995.b}.  The function performs regression for each test and optionally displays scatterplots of the diagnostic test data. The tests examine the behaviour of the absolute standardised contrasts with the scale of the following nodal values: 

\begin{description}
	\item[Estimated nodal value]{This tests an evolutionary assumption of the contrast model: that there is no relationship between the magnitude of estimated contrasts and the estimated nodal value. Put another way, the proportional difference between daughter clades should be independent of magnitude of their trait values. }
	\item[Estimated standard deviation at the node]{The evolutionary model underlying contrasts \citep{Felsenstein.1985.a} assumes that variance accumulates in continuous characters equally along all branches and that the amount of variation is proportional to branch length. The square root of the expected variance calculated at each node can therefore be used to standardize the absolute difference (`raw contrasts') between sister taxa. If this standardization is successful, there should be no relationship between absolute values of standardized contrasts and the estimated standard deviation.}
	\item[Age of the node]{If the phylogeny is ultrametric, then the node age, as time from the present, provides a further test of the effectiveness of the standardization of the contrasts.}
\end{description}

If any of these diagnostic regressions are significant, then the assumption of evolution under a Brownian walk has been violated and  a transformation of the data or branch lengths may be required. As in the example use below on the \dtidx{shorebird} dataset, log transformation of continuous variables is often an effective solution for problems with contrast diagnostics (Fig \ref{caicDiagExamplePlot}).

%% don't echo and fig - the code becomes part of the float
<<caicDiagExample>>=
par(mfrow=c(2,3))
shorebird <- comparative.data(shorebird.tree, shorebird.data, Species)
crunchMod <- crunch(Egg.Mass ~  M.Mass, data=shorebird)
caic.diagnostics(crunchMod)

shorebird.data$lgEgg.Mass <- log(shorebird.data$Egg.Mass)
shorebird.data$lgM.Mass <- log(shorebird.data$M.Mass)
shorebird <- comparative.data(shorebird.tree, shorebird.data, Species)

crunchMod2 <- crunch(lgEgg.Mass ~  lgM.Mass, data=shorebird)
caic.diagnostics(crunchMod2)
@


\begin{figure}[htbp]
  \begin{center}
<<caicDiagExamplePlot, fig=TRUE, echo=FALSE, print=FALSE, height=4, width=6>>=
par(mfrow=c(2,3), mar=c(3,3,1,1), mgp=c(2,0.7,0), tcl=-0.2)
shorebird <- comparative.data(shorebird.tree, shorebird.data, Species)
# need to store the output of caic diagnostics to stop it
# showing up in the float
crunchMod <- crunch(Egg.Mass ~  M.Mass, data=shorebird)
x <- caic.diagnostics(crunchMod)
crunchMod2 <- crunch(lgEgg.Mass ~  lgM.Mass, data=shorebird)
x <- caic.diagnostics(crunchMod2)
@
    \caption[Crunch diagnostics]{Diagnostic plots for contrasts in male
    shorebird mass for contrast models on unlogged (top row, poorly distributed
    with one serious outlier) and logged (bottom row) data.}
    \label{caicDiagExamplePlot}
  \end{center}
\end{figure}

\subsubsection{Robust contrasts: \fnidx{caic.robust}}

In Figure \ref{caicDiagExamplePlot}, there is a systematic problem with the unlogged model and, although one contrast has a high studentized residual, this point does not drive the relationship. In other cases, individual contrasts may exert undue influence over an otherwise well behaved set of contrasts. This is more commonly the case for models where there are known to be problems with branch length estimates or where all branch lengths are treated as equal length. In such cases, these nodes should be removed from the set of contrasts used in the linear model. A threshold of absolute studentized residuals greater than 3 is commonly applied \citep{Jones.Purvis.1997.a,Garland.Harvey.ea.1992.a}. 


The contrast model functions in \caper{} provide the \texttt{robust} argument to set the size of this threshold. By default, this is set to infinity: all contrasts are used to fit the linear model. This can be changed by altering the \texttt{robust} argument but can also be modified after initial inspection of contrast diagnostics using the \fnidx{caic.robust} function. This simple method alter the threshold for a \texttt{caic} object and returns a modified version with this threshold filter enforced. 

Note that  none of the contrasts or residuals are recalculated using the \texttt{robust} options: the \texttt{caic} object retains all of the original contrast data and the studentized residuals are \emph{always} those obtained from the full set of contrasts. The threshold filter is only applied in two contexts:
\begin{enumerate}
  \item The statistical model of the contrasts held in the \texttt{caic} object and displayed by the model summary.
  \item The points displayed by \fnidx{caic.diagnostics}. Note that the \texttt{outlier} argument to this function is independent of the value of \texttt{robust} for an object.
\end{enumerate}

Although clearly a bad model, the example below uses unlogged data from \dtidx{shorebird} to illustrate the use of the functions (Fig. \ref{caicRobustExamplePlot}).

<<caicRobustExample>>=
data(shorebird)
shorebird <- comparative.data(shorebird.tree, shorebird.data, Species)

crunchMod <- crunch(Egg.Mass ~ M.Mass, data=shorebird)
caic.diagnostics(crunchMod)

crunchModRobust <- caic.robust(crunchMod)
caic.diagnostics(crunchModRobust, outlier=2)
@

\begin{figure}[htbp]
  \begin{center}
<<caicRobustExamplePlot, fig=TRUE, echo=FALSE, print=FALSE, height=4, width=6>>=
par(mfrow=c(2,3), mar=c(3,3,1,1), mgp=c(2,0.7,0), tcl=-0.2)
data(shorebird)
shorebird <- comparative.data(shorebird.tree, shorebird.data, Species)
# need to store the output of caic diagnostics to stop ip showing up in the float
crunchMod <- crunch(Egg.Mass ~ M.Mass, data=shorebird)
x <-caic.diagnostics(crunchMod)

crunchModRobust <- caic.robust(crunchMod)
x <-caic.diagnostics(crunchModRobust, outlier=2)
@

    \caption[Crunch diagnostics and robust models]{Diagnostic plots and
    \fnidx{caic.robust}: the upper row shows diagnostic plots for the full
    model, highlighting a single point with an absolute studentized residual $>
    3$. The lower row shows the same plots after applying \fnidx{caic.robust}:
    the highlighted point has been excluded and the argument \texttt{outlier}
    has been used to reveal points with absolute residuals $> 2$.}
    \label{caicRobustExamplePlot}
  \end{center}
\end{figure}

\subsubsection{Model criticism: \texttt{plot}\index{Functions!crunch!plot}}

The preceding tools are broadly useful for assessing whether the evolutionary assumptions of contrast modelling have been met and for excluding problematic nodes. However it is still wise to check whether the statistical assumptions of the linear model on the contrasts are met. The  \caper{} package provides a simple wrapper to allow the standard model criticism plots to be generated for a \texttt{caic} object (Fig \ref{modelCritPlot}). 

<<modelCrit>>=

data(shorebird)

shorebird.data$lgEgg.Mass <- log(shorebird.data$Egg.Mass)
shorebird.data$lgM.Mass <- log(shorebird.data$M.Mass)
shorebird.data$lgF.Mass <- log(shorebird.data$F.Mass)

shorebird <- comparative.data(shorebird.tree, shorebird.data, Species)
crunchMod <- crunch(lgEgg.Mass ~ lgF.Mass + lgM.Mass, data=shorebird)
par(mfrow=c(2,2))
plot(crunchMod)

@

\begin{figure}[htbp]
  \begin{center}
<<modelCritPlot, fig=TRUE, echo=FALSE, print=FALSE, height=4, width=4.6>>=
par(mfrow=c(2,2), mar=c(3,3,2,1), mgp=c(2,0.7,0), tcl=-0.2)
data(shorebird)
crunchMod <- crunch(lgEgg.Mass ~ lgF.Mass + lgM.Mass, data=shorebird)
plot(crunchMod)
@
    \caption{Testing the assumptions of a linear model for a contrast plot}
    \label{modelCritPlot}
  \end{center}
\end{figure}

\subsubsection{Model comparison: \texttt{anova}\index{Functions!crunch!anova} \& \texttt{AIC}\index{Functions!crunch!AIC}}

In a similar fashion, the \caper{} package provides simple wrappers to standard \texttt{anova}\index{Functions!crunch!anova} and \texttt{AIC}\index{Functions!crunch!AIC} methods to allow contrast models to be compared.

<<modelComparison>>=
shorebird.data$lgEgg.Mass <- log(shorebird.data$Egg.Mass)
shorebird.data$lgM.Mass <- log(shorebird.data$M.Mass)
shorebird.data$lgF.Mass <- log(shorebird.data$F.Mass)
shorebird <- comparative.data(shorebird.tree, shorebird.data, Species)

cMod1 <- crunch(lgEgg.Mass ~ lgM.Mass * lgF.Mass, data=shorebird)
cMod2 <- crunch(lgEgg.Mass ~ lgM.Mass + lgF.Mass, data=shorebird)
cMod3 <- crunch(lgEgg.Mass ~ lgM.Mass , data=shorebird)

anova(cMod1, cMod2, cMod3)
AIC(cMod1, cMod2, cMod3)
@

%% Pointers for brunch and macrocaic
\texttt{AIC}\index{Functions!crunch!AIC}

%% %%%%%%%%%%
%% %% OTHER FUNCTIONS
%% %%%%%%%%%%

\subsection{Other comparative functions}

\subsubsection{Tree imbalance: \fnidx{fusco.test}}
The $I$ statistic \citep{Fusco.Cronk.1995.a} is a measure of phylogenetic imbalance. It has the useful properties that it can be calculated for trees containing polytomies and that the tips of trees can be associated with species richness values. An imbalance score, in the interval $[0,1]$ is calculated for all nodes in the phylogeny and the distribution of the nodal values is used to assess overall balance. Nodes leading to  fewer than 4 species are uninformative about balance and $I$ cannot be calculated for polytomous nodes and hence both are omitted from balance calculations.

The original $I$ statistic proposed by \citet{Fusco.Cronk.1995.a} has been shown to have a  bias at nodes with even numbers of species \citep{Purvis.Katzourakis.ea.2002.a}. This function therefore also implements the modified $I'$ statistic: a Wilcoxon test is used to assess whether the median of the observed $I'$ values differs from the value of 0.5 expected under a Markov process and a randomisation process is used to provide confidence intervals. 

The function can make use of species richness data but can also be set to treat the tips of the phylogeny as species. These two approaches contrast the balance of the distribution of species richness across a topology and the balance of the topology itself. 

<<fuscoDemo>>=
data(syrphidae)
syrphidae <- comparative.data(phy=syrphidaeTree, dat=syrphidaeRich, names.col=genus)
summary(fusco.test(syrphidae, rich=nSpp))
summary(fusco.test(syrphidae, tipsAsSpecies=TRUE))
@

\subsection{Phylogenetic diversity: \fnidx{pd.calc}, \fnidx{pd.bootstrap} and \fnidx{ed.calc}.}

There are two functions that provide simple ways to calculate phylogenetic diversity indices for a set of tips on a phylogeny (\fnidx{pd.calc}) and to bootstrap the distribution of those indices for a subset of a given size (\fnidx{pd.bootstrap}).

The following indices of phylogenetic diversity are supported (Fig. \ref{pdMeasurePlot}):
\begin{description}
	\item[Total Branch Length (TBL)]{The sum of all the edge lengths in the subtree given by the tip subset. This measure can be partitioned into the two next measures.}
  \item[Shared Branch Length (SBL)]{The sum of all edges in the subtree that are shared by more than one tip.}
  \item[Unique Evolutionary History (UEH)]{The sum of the edge lengths that give rise to only one tip in the subtree.}
  \item[Length of tip branch lengths (TIPS)]{Unlike UEH, this measure does not use the unique paths to each tips on the \emph{subtree} and instead gives the sum of the unique branches leading to the tips on the \emph{complete tree}.}
  \item[Minimum Spanning Tree (MST)]{The sum of the lengths of the edges for the smallest tree that links the subset tips, excluding any edges below the node of the most recent common ancestor.}
\end{description}

These two functions can be used together to compare observed phylogenetic diversity for a group of species to the distribution of phylogenetic diversity for a similar sized group. These functions use a phylogeny arranged as a \fnidx{clade.matrix}, as this greatly accelerates sampling of phylogenetic diversity measures. A \texttt{phylo} object can be used but will be immediately converted to a clade matrix.


<<pdExample>>=
data(BritishBirds)
BritishBirds.cm <- clade.matrix(BritishBirds.tree)
redListSpecies <- with(BritishBirds.data, binomial[Red_list==1])
obs <- pd.calc(BritishBirds.cm, redListSpecies, method="TBL")
rand <- pd.bootstrap(BritishBirds.cm, ntips=length(redListSpecies), rep=1000, method="TBL")

plot(density(rand), main='Total branch length for random sets of 32 species.')
abline(v=obs, col='red')
@

\setkeys{Gin}{width=\textwidth}
\begin{figure}[htbp]
  \begin{center}
<<pdMeasurePlot, fig=TRUE, echo=FALSE, print=FALSE, height=5, width=8>>=
tree <- read.tree(text="((((A:1,B:1):1.5,C:2.5):0.5,(D:0.6,E:0.6):2.4):0.5,((F:1.9,G:1.9):0.8,(H:1.6,I:1.6):1.1):0.8):0.2;")

clmat <- clade.matrix(tree)
tips <- c("A","C","D","E","G","H")

par(mfrow=c(2,3), mar=rep(1,4))

tip.col <- rep('grey', length=length(tree$tip.label))              
tip.col[c(1,3,4,5,7,8)] <- 'black'
                                         
# TBL
cl <- rep('black', length=length(tree$edge.length))
cl[c(1,2,3,4,6,7,8,9,10,11,13,14,15)] <- 'red'
plot(tree, label.offset=0.1, cex=1.2, tip.color=tip.col, x.lim=3.8, edge.col=cl)
text(0.2,8.5, "TBL", cex=1.2, adj=c(0,0))

# UEH
cl <- rep('black', length=length(tree$edge.length))
cl[c(3,4,6,8,9,11,14,13,15)] <- 'red'
plot(tree, label.offset=0.1, cex=1.2, tip.color=tip.col, x.lim=3.8, edge.col=cl)
text(0.2,8.5, "UEH", cex=1.2, adj=c(0,0))

# SBL
cl <- rep('black', length=length(tree$edge.length))
cl[c(1,2,7,10)] <- 'red'
plot(tree, label.offset=0.1, cex=1.2, tip.color=tip.col, x.lim=3.8, edge.col=cl)
text(0.2,8.5, "SBL", cex=1.2, adj=c(0,0))

# TIP
cl <- rep('black', length=length(tree$edge.length))
cl[c(4,6,8,9,13,15)] <- 'red'
plot(tree, label.offset=0.1, cex=1.2, tip.color=tip.col, x.lim=3.8, edge.col=cl)
text(0.2,8.5, "TIP", cex=1.2, adj=c(0,0))


# MST
tips <- c("A","C")
tip.col <- rep('grey', length=length(tree$tip.label))              
tip.col[c(1,3)] <- 'black'

cl <- rep('black', length=length(tree$edge.length))
cl[c(3,4,6)] <- 'red'

plot(tree, label.offset=0.1, cex=1.2, tip.color=tip.col, x.lim=3.8, edge.col=cl)
text(0.2,8.5, "MST", cex=1.2, adj=c(0,0))

par(mar=c(3,3,1,1))
plot(density(rand), main='', xlab= 'TBL values for red listed British birds.')
abline(v=obs, col='red')
@
    \caption{Illustration of the different measures of phylogenetic diversity
    available and a plot of observed (red) and randomized (black) TBL for red
    listed British birds} \label{pdMeasurePlot}
  \end{center}
\end{figure}

The \fnidx{ed.calc} function takes a phylogeny, again ideally converted using \fnidx{clade.matrix}, and calculates evolutionary distinctness (ED) scores for each tip \citep{Isaac.Turvey.ea.2007.a}. The length of each branch on the phylogeny is shared equally among all the tips descending from it and the sum of the per-tip contributions along the paths to each tip provides the ED measure of the distinctness of each tip (Fig \ref{edExamplePlots}a). In the example code below, two species (\textit{Allocebus trichotis} and \textit{Phaner furcifer}) share long branches and belong to species poor clades, resulting in high evolutionary distinctiveness (Fig \ref{edExamplePlots}b,c)

<<edExample>>=
data(IsaacEtAl)
primates.cm <- clade.matrix(primates.tree)
primateED <- ed.calc(primates.cm)
str(primateED)
plot(density(primateED$spp$ED))
with(primateED, spp[spp$ED == max(spp$ED),])
@

\setkeys{Gin}{width=\textwidth}
\begin{figure}[htbp]
  \begin{center}
<<edExamplePlots, fig=TRUE, echo=FALSE, print=FALSE, height=3, width=8>>=
par(mfrow=c(1,3), mar=c(0,0,0,0), mgp=c(2,0.7,0))
edDemo <- ed.calc(clmat)

edge.col <- rep('gray30', 16)
edge.col[c(1,2,6)] <- 'red'
edge.col[10:12] <- 'blue'

xx <-  plot(tree, show.tip.label=FALSE, x.lim=c(0,4),y.lim=c(0.6,9.4), edge.col=edge.col, edge.width=2, no.margin=TRUE)

edgeLab <- with(edDemo$branch, paste(len, '/' , nSp)) [tree$edge[,2]]
edgelabels(edgeLab, 1:16, adj=c(0.5,-0.3), frame='none', col='gray30', cex=0.7)

edgeLab <- edDemo$branch$ED[tree$edge[,2]]
edgelabels(sprintf("%0.2f", edgeLab), 1:16, adj=c(0.5,1.3), frame='none', col=edge.col, cex=0.9)

tipLab <- with(edDemo$spp, sprintf("%0.2f", ED))
tipCol <- rep('gray30', 9)
tipCol[c(3,6)] <- c('red','blue')
tiplabels(text=tipLab, adj=c(-0.25,0.5), frame='none', col=tipCol)

par(usr=c(0,1,0,1))
text(0.1, 0.9, "a)", cex=1.2)

par(mar=c(3,3,1,1))
plot(density(primateED$spp$ED), main='', xlab='Species ED score')

edMax <- max(primateED$spp$ED)
arrows(edMax, 0.04, edMax, 0.01, col='red', len=0.1)

par(usr=c(0,1,0,1), mar=c(0,0,0,0))
text(0.1, 0.9, "b)", cex=1.2)

maxSpp <- which(primateED$spp$ED == edMax)
xx <- which(rowSums(primates.cm$clade.matrix[,maxSpp])> 0)

edgeCol <- rep('grey30', length(primates.tree$edge.length))
edgeCol[match(xx, primates.tree$edge[,2])] <- 'red'

plot(primates.tree, show.tip=FALSE, no.margin=TRUE, edge.col=edgeCol)

par(usr=c(0,1,0,1))
text(0.1, 0.9, "c)", cex=1.2)

@
    \caption{a) Calculation of ED scores. Edge labels show the calculation of ED
    contributions (below branch) from the branch length and number of descendent
    species (above branch). Tip labels show species ED scores and the component
    contribution values are highlighted for two species. b) The distribution of
    ED scores for a phylogeny of primate species (c). The paths leading to the
    species with the highest ED values (red arrow) are shown on the phylogeny.}
    \label{edExamplePlots}
  \end{center}
\end{figure}

%% %%%%%%%%%%
%% %% GROWTREE
%% %%%%%%%%%%

\subsection{Phylogeny and trait simulation: \fnidx{growTree}}

A number of other packages provide fast methods for simulating phylogenies and traits along phylogenies under a range of models. The \fnidx{growTree} function is likely to be considerably slower for some common models but provides a flexible environment for simulating more complex scenarios. It bears many similarities to the simulations provided by the program MeSA written by Paul-Michael Agapow (http://www.agapow.net/software/mesa).

A basic \fnidx{growTree} simulation has three components: a speciation rate (\texttt{b}), an extinction rate (\texttt{d}) and a stopping rule  (\texttt{halt}). The default values for these (\texttt{b=1, d=0, halt=20}) therefore simply grow a phylogeny from a single ancestor until there are 20 extant species. The speciation and extinction rates both specify the rate of an exponential function which is used to draw waiting times until the next event. 

However, the power of \fnidx{growTree} is that any of these components can be R expressions using information about the scenario being simulated.  In simple simulations of a phylogeny, the following clade properties can be used: \texttt{clade.age},\texttt{nLin}, \texttt{nTip},\texttt{nExtantTip}, \texttt{nExtinctTip}\label{cladeProperties}. For example, the default \texttt{halt = 20} is actually just a shorthand for \texttt{expression(nExtantTip == 20)} and more examples are shown in the code below and in Fig. \ref{simpleGrowTreePlots}.

There are three important details of these simple simulations: 
\begin{enumerate}
	\item Expressions using \texttt{clade.age} \emph{cannot} test for an exact end time (\texttt{clade.age==3}) since the simulation may overstep this end point and run infinitely: \texttt{clade.age>=3} must be used instead. 
	\item Expressions using numbers of lineages or tips will halt the simulation exactly at a speciation or extinction event. The optional argument \texttt{extend.proportion} can be used to continue to run the simulation until a given proportion of the time to the next speciation has passed. 
	\item The \texttt{clade.age} includes the length of the root edge.
\end{enumerate}

The significance of the argument \texttt{grain=Inf} in these examples is discussed below (Section \ref{grainLabel}).
<<setSeed, echo=FALSE, print=FALSE>>=
# reasonable run time and reasonable results (on my Mac, 
# no idea if this will carry on to R-Forge.)
set.seed(2345)
@
<<simpleGrowTree>>=
basicTree <- growTree(b=1, d=0, halt=20, grain=Inf)
extendTree <- growTree(b=1, d=0, halt=20, extend.proportion=1, grain=Inf)
timeLimit <- growTree(b=1, d=0, halt=expression(clade.age>=3), grain=0.01)
extinctTree <- growTree(b=1, d=0.1, halt=expression(nExtinctTip>=3), extend.proportion=1, grain=Inf)
densityDependence <- growTree(b=expression(2/nExtantTip), d=0, halt=50, grain=Inf)
increasingExtinction <- growTree(b=1, d=expression(0.2 * clade.age), halt=50)
@

\begin{figure}[htbp]
  \begin{center}
<<simpleGrowTreePlots, fig=TRUE, echo=FALSE, print=FALSE, height=6, width=6>>=
par(mfrow=c(2,3), mar=c(1.5,0.5,0,0.5), tcl=-0.2, mgp=c(1,0.5,0), cex.axis=0.8)
plot(basicTree$phy  , root.edge=TRUE);   axis(1); par(usr=c(0,1,0,1)); text(0.1,0.06,"a)")
plot(extendTree$phy , root.edge=TRUE);  axis(1); par(usr=c(0,1,0,1)); text(0.1,0.06,"b)")
plot(timeLimit$phy  , root.edge=TRUE);   axis(1); par(usr=c(0,1,0,1)); text(0.1,0.06,"c)")
plot(extinctTree$phy, root.edge=TRUE); axis(1); par(usr=c(0,1,0,1)); text(0.1,0.06,"d)")
plot(densityDependence$phy, root.edge=TRUE); axis(1); par(usr=c(0,1,0,1)); text(0.1,0.06,"e)")
plot(increasingExtinction$phy, root.edge=TRUE); axis(1); par(usr=c(0,1,0,1)); text(0.1,0.06,"f)")

@
   \caption{growTree simulations}{Simple simulations using \fnidx{growTree}. See
   the code example for a) \texttt{basicTree}, b) \texttt{extendTree}, c)
   \texttt{timeLimit}, d) \texttt{extinctTree}, e) \texttt{densityDependence}
   and f) \texttt{increasingExtinction}.} \label{simpleGrowTreePlots}
   \end{center}
\end{figure}

\subsubsection{Simulating continuous characters.}

The \fnidx{growTree} function currently only supports Brownian evolution of continuous characters and requires the user to provide a vector containing of starting value for each trait (\texttt{ct.start}). If \texttt{ct.start} is a named vector, then these names will be used to identify each trait within the simulation. If no names are provided then the trait names default to the form \texttt{ct\#}. 

By default, the mean change per unit time in each trait is assumed to be zero --- there is no directional bias in the evolution of each trait. Similarly, the variance of each trait is assumed to be 1 and there is no covariance between traits. Both these defaults can be over-ridden by providing a  vector \texttt{ct.change} of mean changes and a vector \texttt{ct.var} of variances for each trait. Alternatively, \texttt{ct.var} can be a symmetrical square matrix giving the covariances between the traits.

During the simulation, the waiting times between events are used to randomly draw changes in trait values according to the specified model. The code below shows some examples using values for log body mass and log litter size taken from the PanTHERIA database of mammalian life-history traits \citep{Jones.Bielby.ea.2009.a}. The relationship between the two variables for the values at the tips of each simulation are shown in Fig. \ref{contTraitPlots}.

<<setSeed, echo=FALSE, print=FALSE>>=
# reasonable run time and reasonable results (on my Mac, 
# no idea if this will carry on to R-Forge.)
set.seed(421)
@
<<>>=
# > pantheria <- read.delim('PanTHERIA_1-0_WR05_Aug2008.txt', na.string='-999.00')
# > vars <- log(pantheria[, c(7,21)])
# > cov(vars, use='complete')
#                      X5.1_AdultBodyMass_g X15.1_LitterSize
# X5.1_AdultBodyMass_g           10.0038050       -0.6544125
# X15.1_LitterSize               -0.6544125        0.4427426
# > sum(complete.cases(vars))
# [1] 2325
# > mean(vars,na.rm=TRUE)
# X5.1_AdultBodyMass_g     X15.1_LitterSize 
#            5.4749827            0.6905053 
mammalMeans <- c(logBodyMass=5.48, logLitterSize=0.69)
simpleBrownian <- growTree(halt=100, ct.start=mammalMeans, extend.proportion=1, grain=Inf)
mammalVar <- c(10, 0.44)
varBrownian <- growTree(halt=100, ct.start=mammalMeans, ct.var=mammalVar, extend.proportion=1, grain=Inf)
mammalCovar <- matrix(c(10,-0.65, -0.65, 0.44), ncol=2)
covarBrownian <- growTree(halt=100, ct.start=mammalMeans, ct.var=mammalCovar, extend.proportion=1, grain=Inf)
@

\setkeys{Gin}{width=0.8\textwidth}
\begin{figure}[htbp]
  \begin{center}
<<contTraitPlots, fig=TRUE, echo=FALSE, print=FALSE, height=3, width=6>>=
## par(mfrow=c(1,3), mar=c(0,0.5,0,4), tcl=-0.2, mgp=c(1,0.5,0), cex.axis=0.8)
## 
## plot(simpleBrownian, show.tip=FALSE)
## 
## rightEdge <- rep(par('usr')[2], 25)
## bm <- with(simpleBrownian, ct.data$logBodyMass[ match(tip.label, ct.data$node)])
## points(rightEdge, 1:25, cex=bm / 3, xpd=NA)
## ls <- with(simpleBrownian, ct.data$logLitterSize[ match(tip.label, ct.data$node)])
## points(rightEdge*1.1, 1:25, cex=ls * 2, xpd=NA)
## 
## plot(varBrownian, show.tip=FALSE)
## 
## rightEdge <- rep(par('usr')[2], 25)
## bm <- with(varBrownian, ct.data$logBodyMass[ match(tip.label, ct.data$node)])
## points(rightEdge, 1:25, cex=bm / 3, xpd=NA)
## ls <- with(varBrownian, ct.data$logLitterSize[ match(tip.label, ct.data$node)])
## points(rightEdge*1.1, 1:25, cex=ls * 2, xpd=NA)
## 
## 
## 
## plot(covarBrownian, show.tip=FALSE)
## 
## rightEdge <- rep(par('usr')[2], 25)
## bm <- with(covarBrownian, ct.data$logBodyMass[ match(tip.label, ct.data$node)])
## points(rightEdge, 1:25, cex=bm / 3, xpd=NA)
## ls <- with(covarBrownian, ct.data$logLitterSize[ match(tip.label, ct.data$node)])
## points(rightEdge*1.1, 1:25, cex=ls *2 , xpd=NA)
par(mfrow=c(1,3), mar=c(2.5,2.5,0.5,0.5), tcl=-0.2, mgp=c(1.5,0.5,0), cex.axis=0.8)

stips <- simpleBrownian$data
vtips <- varBrownian$data
cvtips <- covarBrownian$data

lims <- sapply( rbind(stips, vtips,cvtips)[, c('logBodyMass','logLitterSize')], range)

plot(logBodyMass ~ logLitterSize, data=stips, xlim=lims[,2], ylim=lims[,1])
plot(logBodyMass ~ logLitterSize, data=vtips, xlim=lims[,2], ylim=lims[,1])
plot(logBodyMass ~ logLitterSize, data=cvtips, xlim=lims[,2], ylim=lims[,1])

@
   \caption[growTree continuous traits]{Simulated values for two continuous
   traits using a) only starting mean values (\texttt{simpleBrownian}), b) means
   and variances (\texttt{varBrownian}) and c) means and covariances
   (\texttt{covarBrownian}).} \label{contTraitPlots} \end{center}
\end{figure}


\subsubsection{Simulating discrete characters}

Simulation of discrete characters uses a transition matrix for each trait. This matrix defines the number of states and the names of those states and also the rates at which lineages move from one state to another. This matrix does not need to be symmetrical but the diagonal values (the rates at which traits change to the same state) should be zero. These discrete trait matrices need to be provided in a list and the names of the list items will be used as trait names. Again, if names are missing than default values are used: \texttt{dt\#} for trait names and \texttt{st\#} for state names. The simulation will start with the traits in the first state in the matrix.

<<discreteTraitExamples>>=
flight <- matrix(c(0,0.1,0.001,0), ncol=2)
flightNames <- c('ter','fly')
dimnames(flight) <- list(flightNames, flightNames)

trophic <- matrix(c(0,0.2,0.01,0.2,0,0.1,0,0.05,0), ncol=3)
trophNames <- c('herb','omni','carn')
dimnames(trophic) <- list(trophNames, trophNames)

discTraits <- list(flight=flight, trophic=trophic)
discreteTree <- growTree(halt=60, dt=discTraits, extend.proportion=1, grain=Inf)

@

Simulating correlated evolution between discrete traits is possible by combining traits into a single larger matrix that contains the rates of transitions between sets of combined states.

\setkeys{Gin}{width=0.8\textwidth}
\begin{figure}[htbp]
  \begin{center}
<<discreteTraitPlot,  fig=TRUE, echo=FALSE, print=FALSE, height=7, width=5>>=
par(mar=c(0,0,0,0), tcl=-0.2, mgp=c(1,0.5,0), cex.axis=0.8)
plot(discreteTree$phy, show.tip=FALSE)

lastPP <- get("last_plot.phylo", envir = .PlotPhyloEnv)
traitsByNode <- with(discreteTree,  rbind( cbind(node=phy$tip.label, data[, c('flight','trophic')]), node.data[, c('node','flight','trophic')]))
traitsByNode <- traitsByNode[match(1:199, traitsByNode$node), ]

with(lastPP, points(xx + 0.06, yy,  pch=21, col=unclass(traitsByNode$flight), xpd=NA, cex=0.6))
with(lastPP, points(xx + 0.12, yy,  pch=22, col=unclass(traitsByNode$trophic), xpd=NA, cex=0.6))

@
   \caption[growTree discrete traits]{Simulated values for two discrete traits:
   \texttt{flight} (circles) and \texttt{trophic} (squares).}
   \label{discreteTraitPlot} \end{center}
\end{figure}


\subsubsection{Lineage properties and simulation grain} \label{grainLabel}

In addition to the clade properties (page \pageref{cladeProperties}), expressions for speciation and extinction rates and halting the simulation can all use properties of the lineages. The basic lineage properties are less likely to be useful (\texttt{parent.id, id, lin.age, birth.time, death.time, extinct, tip}) but any simulated trait is also available using the trait name. Rates are calculated for every extant lineage and then the competing events (speciation, extinction and changes in discrete traits) draw waiting times given their rates and the event with the shortest waiting time occurs.

These events occur at the discrete points identified by the winning event. However, change in continuous traits is handled differently: once one of those events has occurred, that waiting time is used to scale simulated change in the continuous traits and all of this change is added on to the trait values at the event. Similarly, clade age advances in chunks: the waiting time is added on to the age of all extant tips as each event occurs.

If continuous trait values or clade age are used to control rates within the simulation then this can be problematic: smooth changes in rates may be badly simulated if there are relatively  large step changes in continuous character values or clade age at events. The solution in \fnidx{growTree} is to set the \texttt{grain} of the simulation: this is the maximum amount of time that can pass in a simulation without recalculating clade ages and continuous trait values. Essentially, this scales the relative size of step changes in these continuous values to more closely approximate a smooth function.

By default, \fnidx{growTree} uses a grain value of 0.1. If a particular simulation does not contain rates base on trait or age values then the grain can be set to infinity (\texttt{grain=Inf}). This may substantially speed up some simulations.

In the examples below, the age of each lineage is being used to control the rates of speciation and extinction (Fig. \ref{patencyPlot}). Speciation follows an asymptotic latency model: young species have lower rates of speciation but eventually reach a constant maximum speciation rate. Extinction follows a simple linear relationship of senescence: increasing extinction rate with lineage age. The second simulation incorporates an influence of body size: the asymptotic speciation rate for species is proportional to a simulated trait.
<<setseed, echo=FALSE, print=FALSE>>=
set.seed(95132)
@

<<patencyModel>>=
bLatency <- expression((0.2 *lin.age)/(0.1 + lin.age))
dSenesce <- expression(lin.age * 0.02)
halt <- expression(nExtantTip >= 60)
latencyTree <- growTree(b=bLatency, d=dSenesce, halt=halt, grain=0.01)

traitMean <- c(logBodyMass=5.48)
traitVar <- c(logBodyMass=10)
bLatTrait <- expression(((0.2 + ((5.48 - logBodyMass)/25))*lin.age)/(0.1 + lin.age))

latTraitTree <- growTree(b=bLatTrait, d=dSenesce, halt=halt, ct.start=traitMean, ct.var=traitVar, grain=0.01)

@

\setkeys{Gin}{width=\textwidth}
\begin{figure}[htbp]
  \begin{center}
<<patencyPlot,  fig=TRUE, echo=FALSE, print=FALSE, height=4, width=8>>=
par(mfrow=c(2,2), mar=c(2.5,2.5,1,1), tcl=-0.2, mgp=c(1.5,0.5,0), cex.axis=0.8)

curve(0.02*x, xlim=c(0,20), ylab='Rate', xlab="Lineage age")
curve(((0.2 + ((5.48 - 5.48)/25))*x)/(0.1 + x), add=TRUE, col='red')
curve(((0.2 + ((5.48 - 3.48)/25))*x)/(0.1 + x), add=TRUE, col='red', lty=2)
curve(((0.2 + ((5.48 - 7.48)/25))*x)/(0.1 + x), add=TRUE, col='red', lty=2)

plot(latencyTree$phy, no.margin=TRUE, cex=0.8)

plot(latTraitTree$phy, no.margin=TRUE, cex=0.8, show.tip=FALSE)

# rightEdge <- par('usr')[2] - 0.75
# tips <- seq_along(latTraitTree$tip.label)
# bm <- with(latTraitTree, ct.data$logBodyMass[ match(tips, ct.data$node)])
# 
# rect(rightEdge - 5.48/20, 0 , rightEdge + 5.48/20, max(tips)+1, col='grey80', border=NA)
# 
# arrows(rightEdge-bm/20, tips, rightEdge+bm/20, tips, code=0, col='red', xpd=NA, lwd=2, lend=2)

@
   \caption[growTree and expressions]{Simulations using expressions. a)
   Simulated extinction and speciation rates. Extinction (black) shows
   senescence and speciation shows latency. For the first simulation (b:
   \texttt{latencyTree}), speciation is determined entirely by lineage age and
   there are relatively few extant basal species and few rapid sequences of
   speciation along branches. For the second (c: \texttt{latTraitTree}), there
   are again few rapid sequence of speciation and simulated body mass (red bars)
   is predominantly smaller than the ancestral average (width of the grey bar).
   The dashed red lines in a) show the speciation rate for species with
   simulated log body mass 2 units higher and lower than the ancestral mean.}
   \label{patencyPlot} \end{center}
\end{figure}

\subsubsection{Inheritance rules.}

Another way of controlling simulations is through the use of inheritance rules. At each speciation event, two new lineages replace the parent lineage and inherit exact copies of any traits. Although the traits of these new species will then evolve independently, it is also possible to immediately alter the trait values using expressions. Each rule should generate two values, one for each daughter, and needs to be provided as a list with the name of the list showing the trait affected.

In the first example below, the two daughter lineages inherit divergent body size from their parent (Fig. \ref{inheritancePlots}). 

<<inheritanceExample1>>=
traitMean <- c(logBodyMass=5.48)
traitVar <- c(logBodyMass=10)
inherit <- list(logBodyMass=expression(logBodyMass * c(0.8, 1.2)))
inheritTree <- growTree(b=1, halt=40, ct.start=traitMean, ct.var=traitVar, inheritance=inherit, grain=Inf, extend.proportion=1)
@

The second example is a more complex biogeographic simulation (Fig. \ref{inheritancePlots}). The tree has a single discrete trait representing presence in two regions --- a species can be present in either or both regions and the matrix shows the rates of transition between these states. The extinction rule  gives different extinction rates for species occurring in single regions and species in both regions. Speciation occurs at different rates in the two regions and species present in both regions can speciate in either region. This needs two speciation rules, provided as a list, that compete with each other and among species. Finally the inheritance rules differentiate between speciation of a species in a single region (where both species stay in sympatry)  and speciation in a lineage that occurs in both regions (where the species diverge in allopatry). These inheritance rules make use of the simulation property \texttt{winnerName} which shows which event has happened at an event.

<<inheritanceExamples2>>=
rNames <- c("AB", "A", "B")
regions <- list(region = matrix(c(0, 0.05, 0.05, 0.1, 0,0,0.1,0,0), ncol=3,
	             dimnames=list(rNames,rNames)))
	
extinct <- expression(ifelse(region == "AB", 0.0001, 0.01))

spec <- list(regA_spec = expression(ifelse(region == "AB" | region == "A", 1, 0)),
             regB_spec = expression(ifelse(region == "AB" | region == "B", 0.5, 0)))

inherit <- list(region = expression(if(winnerName=="regB_spec" && region[1] == "AB") c("B","A") else region),
                region = expression(if(winnerName=="regA_spec" && region[1] == "AB") c("A","B") else region))

biogeogTree <- growTree(b=spec, d=extinct, halt=80, inherit=inherit, dt.rates=regions, inf.rates="quiet", grain=Inf)
@ 

\subsubsection{Epochs: linking simulations.}

The final way of controlling simulations is through the use of linked sets of simulations, each representing a different epoch. By default, the \fnidx{growTree} function returns a comparative data object but can also return a simulation as a raw table of lineages. These lineage tables can be used as a starting object for a simulation, allowing the output of one simulation to be passed into the next. Unless explicitly altered, all rules are inherited between epochs. The example below simulates a mass extinction event (Fig. \ref{inheritancePlots}).

<<epochExample>>=
epochTree <- growTree(halt=100, output.lineages=TRUE, grain=Inf)
epochTree2 <- growTree(d=5, halt=expression(nExtantTip==20), linObj=epochTree, output.lineages=TRUE, grain=Inf)
epochTree3 <- growTree(linObj=epochTree2, halt=100, grain=Inf)
@

\setkeys{Gin}{width=\textwidth}
\begin{figure}[htbp]
  \begin{center}
<<inheritancePlots,  fig=TRUE, echo=FALSE, print=FALSE, height=4, width=8>>=
par(mfrow=c(1,3), mar=c(1,1,1,1))

depth <- VCV.array(inheritTree$phy)[1,1]

plot(inheritTree$phy, no.margin=TRUE, cex=0.8, show.tip=FALSE, root.edge=TRUE, x.lim=c(0,depth*1.1))

rightEdge <- depth*1.05
tips <- seq_along(inheritTree$phy$tip.label)
bm <- inheritTree$data$logBodyMass
sc <- max(bm) / (depth*0.04)

rect(rightEdge - 5.48/sc, 0 , rightEdge + 5.48/sc, max(tips)+1, col='grey80', border=NA)

arrows(rightEdge-bm/sc, tips, rightEdge+bm/sc, tips, code=0, col='red', xpd=NA, lwd=2, lend=2)

plot(biogeogTree$phy, edge.col= unclass(biogeogTree$data$region), show.tip.label=FALSE, root.edge=TRUE)
plot(epochTree3$phy, show.tip.label=FALSE)
@
   \caption[growTree and inheritance rules]{Inheritance rules and epochs. a)
   Simulation showing divergent evolution of a continuous trait at speciation.
   b) A biogeographic simulation with sympatric speciation within single regions
   (red, green) and allopatric speciation of species occurring in both regions
   (black). Colours show the region at daughter node of an edge and do not show
   transitions along the edge. c) Use of epochs to simulate a mass extinction
   event.} \label{inheritancePlots} \end{center}
\end{figure}


\clearpage
\subsection{Utility functions}

\subsubsection{\fnidx{clade.matrix}}

The \fnidx{clade.matrix} function turns a phylogeny into a binary matrix showing the tips (columns) descending from each node (rows) in the tree, along with a vector of the edge lengths leading to that node. Although this is an extremely bulky way to represent a phylogeny, a clade matrix allows calculations on subsets of a phylogeny to be calculated much more rapidly.

<<cladeMatrixExample>>=
data(perissodactyla)
perisso <- comparative.data(perissodactyla.tree, perissodactyla.data, Binomial)
perissoCM <- clade.matrix(perisso$phy)
str(perissoCM)
perissoCM$clade.matrix
@

\subsubsection{\fnidx{clade.members} and \fnidx{clade.members.list}}

These functions return a vector of the tips that descend from a given node (\fnidx{clade.members}) or a list of vectors showing the tips descending from every node (\fnidx{clade.members.list}) in a phylogeny. Options to both functions control whether vectors of internal nodes are also returned (\texttt{include.nodes}) and whether tip labels are used instead of the index of those tip labels (\texttt{tip.labels}). 

<<cladeMembersExample>>=
data(perissodactyla)
perisso <- comparative.data(perissodactyla.tree, perissodactyla.data, Binomial)
clade.members(15, perisso$phy)
clade.members(15, perisso$phy, tip.labels=TRUE)
clade.members(15, perisso$phy, tip.labels=TRUE, include.nodes=TRUE)
str(clade.members.list(perisso$phy))
str(clade.members.list(perisso$phy, tip.labels=TRUE))
@

\subsubsection{\fnidx{VCV.array}}

This function replaces the \texttt{vcv.phylo} function provided by the ape package. This is primarily to provide covariance structures that retain separate branch lengths in order to support kappa transformations. For standard two dimensional covariance matrices, it is also faster than the \texttt{vcv.phylo} function for trees of more than about 100 tips.

<<vcvArrayExample>>=
data(perissodactyla)
perisso <- comparative.data(perissodactyla.tree, perissodactyla.data, Binomial)
str(VCV.array(perisso$phy))
str(VCV.array(perisso$phy, dim=3))
@

\bibliographystyle{plainnat}
\bibliography{caper_refs}

\printindex

\appendix

\section{The \texttt{CAIC} `package'}
The \texttt{CAIC} package was initially developed and used within the Section of Ecology and Evolution at Imperial College London as a replacement for the \texttt{CAIC} program \citep{Purvis.Rambaut.1995.a}. Over time, the \texttt{CAIC} package was circulated outside of the Section and accreted additional functions for comparative analysis and was subsequently made available through the R-Forge website. However, there was little integration of the different data formats across functions and no consistent provision of basic methods and documentation to support the main functions. 

The \caper{} package addresses these failings and replaces the package \texttt{CAIC}. Although the code and functions largely mirror the \texttt{CAIC} package, many functions are now used in different ways -- notably through the use of a common comparative data structure -- and function arguments have changed. Where possible these changes are backwards compatible but the two packages are not completely interchangeable. One of the more noticeable changes is that the function \texttt{pglm}\index{Functions!pglm|see{pgls}} has been renamed to \fnidx{pgls}: the function provides generalised least squares but does not provide the link functions of generalised linear models. The change in name of the package therefore reflects both the broader set of methods provided and this step change in the way the package is used.

\end{document}