>= Ys <- standards(mix) Ya <- analytes(mix) dim(Ys) dim(Ya) @ % These two functions must work as they should for your data too (if you want to the the ExpressionSet interface, otherwise see Section \ref{sec:martrix}) so make sure that they do. To proceed with normalization we first fit a normalization model. The complexity, number of principal components, is decided by the cross-validation functionality of the \emph{pcaMethods} package. To use CRMN we also need access to the experimental design. This can be done automatically by specifying the relevant factors in the \texttt{pData} object of the data or by directly providing a design matrix. I.e: <>= nfit <- normFit(mix, "crmn", factor="type", ncomp=2) @ % Is the same as doing: <>= G <- model.matrix(~-1+mix$type) nfit <- normFit(mix, "crmn", factor=G, ncomp=2) @ % We proceed by not specifying the complexity but letting the cross-validation take care of this step. <>= nfit <- normFit(mix, "crmn", factor="type") #complexty (number of PC's): sFit(nfit)$ncomp @ % The variance that CRMN identified as systematic error can be visualized using \texttt{slplot}, see Figure \ref{fig:tz}. \begin{figure}[hbt!] \centering <>= slplot(sFit(nfit)$fit$pc, scol=as.integer(mix$runorder)) @ \caption{PCA of the systematic error $T_Z$. Colors correspond to the known batches.} \label{fig:tz} \end{figure} The output from \texttt{normFit} is an object of class \texttt{nFit} and has a simple plot and print/show function which can give basic statistics about the normalization model, see Figure \ref{fig:plot} \begin{figure}[hbt!] \centering <>= nfit plot(nfit) @ \caption{Basic plot function.} \label{fig:plot} \end{figure} To normalize the data we predict the training data. Note that we could also have held some samples out from the training to obtain sample-independent normalization (potentially useful for quality control purposes). <>= normed.crmn <- normPred(nfit, mix, factor="type") @ % We can compare the result with other methods. Now we do this using the wrapper function \texttt{normalize} that combines \texttt{normFit} and \texttt{normPred}. See side-by-side PCA score plots of CRMN normalized data versus \emph{One} and NOMIS normalized data in Figure \ref{fig:compare}. <>= normed.one <- normalize(mix, "one", one="Hexadecanoate_13C4") normed.nomis <- normalize(mix, "nomis") @ % \begin{figure}[hbt!] \centering <>= pca.crmn <- pca(scale(log(t(exprs(normed.crmn))))) pca.one <- pca(scale(log(t(exprs(normed.one))))) pca.nomis <- pca(scale(log(t(exprs(normed.nomis))))) par(mfrow=c(1,3)) plot(scores(pca.one), col=as.integer(mix$type), pch=as.integer(mix$runorder), main="Single IS") plot(scores(pca.nomis), col=as.integer(mix$type), pch=as.integer(mix$runorder), main="NOMIS") plot(scores(pca.crmn), col=as.integer(mix$type), pch=as.integer(mix$runorder), main="CRMN") @ % \caption{\label{fig:compare} PCA of the \texttt{mix} using three different normalizations. Colors indicate the true concentration groups and plot character indicate the different batches (unwanted effect).} \end{figure} \clearpage{} \subsection{Normalization of a \texttt{matrix}} \label{sec:matrix} First we construct the required input parameters. This would of course normally be done by using \texttt{read.table} to read data as obtained by programs such as TargetSearch, HDA, metAlign etc. <>= Y <- exprs(mix) replicates <- factor(mix$type) G <- model.matrix(~-1+replicates) isIS <- fData(mix)$tag == 'IS' @ % Division of the dataset should now be possible as following (results hidden). <>= standards(Y, isIS) analytes(Y, isIS) @ % The main business is the same as when normalizing an \texttt{ExpressionSet} except that we now have to remember to pass the vector speciying the standards. <>= nfit <- normFit(Y, "crmn", factors=G, ncomp=2, standards=isIS) @ % To normalize the data predict the training data. <>= normed.crmn <- normPred(nfit, Y, factors=G, standards=isIS, ncomp=2) @ % and this could also have been done directly by: <>= normed.crmn <- normalize(Y, "crmn", factors=G, standards=isIS, ncomp=2) @ % \clearpage{} \begin{thebibliography}{RedestigXXXX} \bibitem{RedestigXXXX} Redestig, H., Fukushima, A., H., Stenlud, Moritz, T., Arita, M., Saito, K. and Kusano, M. {\sl Compensation for systematic cross-contribution improves normalization of mass spectrometry based metabolomics data} Anal Chem, 2009, 81, 7974-7980 \bibitem{SysiAho2007} Sysi-Aho, M., Katajamaa, M., Yetukuri, L. and Oresic, M {\sl Normalization method for metabolomics data using optimal selection of multiple internal standards} BMC Bioinformatics, 2007, 8, 93 \end{thebibliography} \end{document}