%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Do not modify this file since it was automatically generated from:
% 
%  normalizeCurveFit.matrix.R
% 
% by the Rdoc compiler part of the R.oo package.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


\name{normalizeCurveFit.matrix}
\alias{normalizeCurveFit.matrix}
\alias{normalizeCurveFit.matrix}

\alias{normalizeLoess.matrix}
\alias{normalizeLowess.matrix}
\alias{normalizeSpline.matrix}
\alias{normalizeRobustSpline.matrix}

\encoding{latin1}

\title{Weighted curve-fit normalization between a pair of channels}

\description{
  Weighted curve-fit normalization between a pair of channels.

  This method will estimate a smooth function of the dependency
  between the log-ratios and the log-intensity of the two channels and
  then correct the log-ratios (only) in order to remove the dependency.
  This is method is also known as \emph{intensity-dependent} or
  \emph{lowess normalization}.

  The curve-fit methods are by nature limited to paired-channel data.
  There exist at least one method trying to overcome this limitation,
  namely the cyclic-lowess [1], which applies the paired
  curve-fit method iteratively over all pairs of channels/arrays.
  Cyclic-lowess is not implented here.

  We recommend that affine normalization [2] is used instead of curve-fit
  normalization.
}

\usage{\method{normalizeCurveFit}{matrix}(X, weights=NULL, typeOfWeights=c("datapoint"), method=c("loess", "lowess", "spline", "robustSpline"), bandwidth=NULL, satSignal=2^16 - 1, ...)}

\arguments{
 \item{X}{An Nx2 \code{\link[base]{matrix}} where the columns represent the two channels
   to be normalized.}
 \item{weights}{If \code{\link[base]{NULL}}, non-weighted normalization is done.
   If data-point weights are used, this should be a \code{\link[base]{vector}} of length
   N of data point weights used when estimating the normalization
   function.
 }
 \item{typeOfWeights}{A \code{\link[base]{character}} string specifying the type of
   weights given in argument \code{weights}.
 }
 \item{method}{\code{\link[base]{character}} string specifying which method to use when
   fitting the intensity-dependent function.
   Supported methods:
    \code{"loess"} (better than lowess),
    \code{"lowess"} (classic; supports only zero-one weights),
    \code{"spline"} (more robust than lowess at lower and upper
                     intensities; supports only zero-one weights),
    \code{"robustSpline"} (better than spline).
 }
 \item{bandwidth}{A \code{\link[base]{double}} value specifying the bandwidth of the
  estimator used.
 }
 \item{satSignal}{Signals equal to or above this threshold will not
   be used in the fitting.
 }
 \item{...}{Not used.}
}

\value{
  A Nx2 \code{\link[base]{matrix}} of the normalized two channels.
  The fitted model is returned as attribute \code{modelFit}.
}

\details{
 A smooth function \eqn{c(A)} is fitted throught data in \eqn{(A,M)},
 where \eqn{M=log_2(y_2/y_1)} and \eqn{A=1/2*log_2(y_2*y_1)}. Data is
 normalized by \eqn{M <- M - c(A)}.

 Loess is by far the slowest method of the four, then lowess, and then
 robust spline, which iteratively calls the spline method.
}

\section{Negative, non-positive, and saturated values}{
 Non-positive values are set to not-a-number (\code{\link[base:is.finite]{NaN}}).
 Data points that are saturated in one or more channels are not used
 to estimate the normalization function, but they are normalized.
}

\section{Missing values}{
 The estimation of the affine normalization function will only be made
 based on complete non-saturated observations, i.e. observations that
 contains no \code{\link[base]{NA}} values nor saturated values as defined by \code{satSignal}.
}

\section{Weighted normalization}{
 Each data point, that is, each row in \code{X}, which is a
 vector of length 2, can be assigned a weight in [0,1] specifying how much
 it should \emph{affect the fitting of the affine normalization function}.
 Weights are given by argument \code{weights}, which should be a \code{\link[base]{numeric}}
 \code{\link[base]{vector}} of length N. Regardless of weights, all data points are
 \emph{normalized} based on the fitted normalization function.

 Note that the lowess and the spline method only support zero-one
 \{0,1\} weights.
 For such methods, all weights that are less than a half are set to zero.
}

\section{Details on loess}{
 For \code{\link[stats]{loess}}, the arguments \code{family="symmetric"},
 \code{degree=1}, \code{span=3/4},
 \code{control=loess.control(trace.hat="approximate"},
 \code{iterations=5}, \code{surface="direct")} are used.
}

\author{Henrik Bengtsson (\url{http://www.braju.com/R/})}

\references{
  [1] M. \enc{Åstrand}{Astrand},
      Contrast Normalization of Oligonucleotide Arrays,
      Journal Computational Biology, 2003, 10, 95-102. \cr
  [2] Henrik Bengtsson and Ola \enc{Hössjer}{Hossjer}, \emph{Methodological Study of Affine Transformations of Gene Expression Data}, Methodological study of affine transformations of gene expression data with proposed robust non-parametric multi-dimensional normalization method, BMC Bioinformatics, 2006, 7:100.
 \cr
}

\examples{
 pathname <- system.file("data-ex", "PMT-RGData.dat", package="aroma.light")
rg <- read.table(pathname, header=TRUE, sep="\t")
nbrOfScans <- max(rg$slide)

rg <- as.list(rg)
for (field in c("R", "G"))
  rg[[field]] <- matrix(as.double(rg[[field]]), ncol=nbrOfScans)
rg$slide <- rg$spot <- NULL
rg <- as.matrix(as.data.frame(rg))
colnames(rg) <- rep(c("R", "G"), each=nbrOfScans)

layout(matrix(c(1,2,0,3,4,0,5,6,7), ncol=3, byrow=TRUE))

rgC <- rg
for (channel in c("R", "G")) {
  sidx <- which(colnames(rg) == channel)
  channelColor <- switch(channel, R="red", G="green");

  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  # The raw data
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  plotMvsAPairs(rg[,sidx])
  title(main=paste("Observed", channel))
  box(col=channelColor)
 
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  # The calibrated data
  # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  rgC[,sidx] <- calibrateMultiscan(rg[,sidx], average=NULL)

  plotMvsAPairs(rgC[,sidx])
  title(main=paste("Calibrated", channel))
  box(col=channelColor)
} # for (channel ...)


# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# The average calibrated data
#
# Note how the red signals are weaker than the green. The reason
# for this can be that the scale factor in the green channel is
# greater than in the red channel, but it can also be that there
# is a remaining relative difference in bias between the green
# and the red channel, a bias that precedes the scanning.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
rgCA <- rg
for (channel in c("R", "G")) {
  sidx <- which(colnames(rg) == channel)
  rgCA[,sidx] <- calibrateMultiscan(rg[,sidx])
}

rgCAavg <- matrix(NA, nrow=nrow(rgCA), ncol=2)
colnames(rgCAavg) <- c("R", "G");
for (channel in c("R", "G")) {
  sidx <- which(colnames(rg) == channel)
  rgCAavg[,channel] <- apply(rgCA[,sidx], MARGIN=1, FUN=median, na.rm=TRUE);
}

# Add some "fake" outliers
outliers <- 1:600
rgCAavg[outliers,"G"] <- 50000;

plotMvsA(rgCAavg)
title(main="Average calibrated (AC)")

# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Normalize data
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Weight-down outliers when normalizing
weights <- rep(1, nrow(rgCAavg));
weights[outliers] <- 0.001;

# Affine normalization of channels
rgCANa <- normalizeAffine(rgCAavg, weights=weights)
# It is always ok to rescale the affine normalized data if its
# done on (R,G); not on (A,M)! However, this is only needed for
# esthetic purposes.
rgCANa <- rgCANa *2^1.4
plotMvsA(rgCANa)
title(main="Normalized AC")

# Curve-fit (lowess) normalization
rgCANlw <- normalizeLowess(rgCAavg, weights=weights)
plotMvsA(rgCANlw, col="orange", add=TRUE)

# Curve-fit (loess) normalization
rgCANl <- normalizeLoess(rgCAavg, weights=weights)
plotMvsA(rgCANl, col="red", add=TRUE)

# Curve-fit (robust spline) normalization
rgCANrs <- normalizeRobustSpline(rgCAavg, weights=weights)
plotMvsA(rgCANrs, col="blue", add=TRUE)

legend(x=0,y=16, legend=c("affine", "lowess", "loess", "r. spline"), pch=19,
       col=c("black", "orange", "red", "blue"), ncol=2, x.intersp=0.3, bty="n")


plotMvsMPairs(cbind(rgCANa, rgCANlw), col="orange", xlab=expression(M[affine]))
title(main="Normalized AC")
plotMvsMPairs(cbind(rgCANa, rgCANl), col="red", add=TRUE)
plotMvsMPairs(cbind(rgCANa, rgCANrs), col="blue", add=TRUE)
abline(a=0, b=1, lty=2)
legend(x=-6,y=6, legend=c("lowess", "loess", "r. spline"), pch=19,
       col=c("orange", "red", "blue"), ncol=2, x.intersp=0.3, bty="n")


}

\seealso{
  \code{\link[aroma.light:normalizeAffine.matrix]{*normalizeAffine}()}.
}
\keyword{methods}