\name{SIM-package}

\alias{SIM-package}

\alias{SIM}

\docType{package}

\title{Statistical Integration of Microarrays}

\description{
	SIM is a statistical model to identify associations between two genomic datasets. Where one is assigned as
dependent variable and the other as independent e.g. copy number measurements on several samples versus 
expression measurements on the same samples. 
A region of interest can be chosen to run the integrated analysis on either the same region for both dependent and 
independent datasets or different regions. For each dependent feature a P-value measures the association with the
independent data, the contribution of each independent feature is given as Z-scores. The integrated analysis 
is based on the random-effect model for gene-sets as implemented in \link[globaltest:v4_globaltest]{globaltest}.

maybe something about annotation?

By default we use \code{method.adjust = "BY"} (Benjamini-Yekutieli) for multiple testing correction. 
This method accounts for dependence between measurements and is more conservative than "BH" (Benjamini-Hochberg). 
For details on the multiple testing correction methods see \link[stats:p.adjust]{p.adjust}. 
We have experienced that a rather low stringency cut-off on the BY-values of 
20\% allows the detection of associations for data with a low number of samples or a low 
frequency of aberrations. False positives are rarely observed.

Make sure that the array probes are mapped to the same builds of the genome, and that the 
\link{chrom.table} used by the \link{integrated.analysis} is from the same build as well.
See \link{sim.update.chrom.table}.
}

\details{
\tabular{ll}{
Package: \tab SIM\cr
Type: \tab Package\cr
Version: \tab 1.99.0\cr
Date: \tab 2009-08-13\cr
License: \tab Open\cr}
}
 
\author{Marten Boetzer, Melle Sieswerda, Renee X. de Menezes  \email{R.X.Menezes@lumc.nl}}

\references{
Menezes RX, Boetzer M, Sieswerda M, van Ommen GJ, Boer JM (2009). 
Integrated analysis of DNA copy number and gene expression microarray data using gene sets.
\emph{BMC Bioinformatics}, \bold{10}, 203-.

Goeman JJ, van de Geer SA, de Kort F, van Houwelingen HC (2004). 
A global test for groups of genes: testing association with a clinical outcome. 
\emph{Bioinformatics}, \bold{20}, 93-109.
}

\keyword{package}

\seealso{
\link{assemble.data}, 
\link{integrated.analysis}, 
\link{sim.plot.zscore.heatmap}, 
\link{sim.plot.pvals.on.region}, 
\link{sim.plot.pvals.on.genome}, 
\link{tabulate.pvals}, 
\link{tabulate.top.dep.features}, 
\link{tabulate.top.indep.features}, 
\link{impute.nas.by.surrounding}, 
\link{sim.update.chrom.table}, 
\link{sim.plot.overlapping.indep.dep.features},
\link{getoverlappingregions}
}
\examples{
#load the datasets and the samples to run the integrated analysis
data(expr.data)
data(acgh.data)
data(samples) 
         
#assemble the data
assemble.data(dep.data = acgh.data, 
              indep.data = expr.data,
              dep.ann = colnames(acgh.data)[1:4], 
              indep.ann = colnames(expr.data)[1:4], 
              dep.id="ID", 
              dep.chr = "CHROMOSOME",
              dep.pos = "STARTPOS",
              dep.symb="Symbol",  
              indep.id="ID",
              indep.chr = "CHROMOSOME", 
              indep.pos = "STARTPOS", 
              indep.symb="Symbol", 
              overwrite = TRUE,
              run.name = "chr8q")

#run the integrated analysis
integrated.analysis(samples = samples, 
                    input.regions ="8q", 
                    zscores=TRUE, 
                    run.name = "chr8q")

# use functions to plot the results of the integrated analysis

#plot the P-values along the genome
sim.plot.pvals.on.genome(input.regions = "8q", 
                         significance = c(0.2, 0.05), 
                         adjust.method = "BY", 
                         pdf = FALSE, 
                         run.name = "chr8q")

#plot the P-values along the regions
sim.plot.pvals.on.region(input.regions = "8q", 
						 adjust.method="BY", 
						 run.name = "chr8q")

#plot the z-scores in an association heatmap
#plot the zscores in a heatmap
sim.plot.zscore.heatmap(input.regions = "8q", 
                        method="full", 
                        significance=0.2,                        
                        z.threshold=3, 
                        show.names.indep=TRUE, 
                        show.names.dep=TRUE, 
                        adjust.method = "BY",  
                        add.plot = "smooth", 
                        smooth.lambda = 2,
                        pdf = FALSE, 
                        run.name = "chr8q")

sim.plot.zscore.heatmap(input.regions = "8q", 
                        method="full", 
                        significance = 0.05,                        
                        z.threshold = 1, 
                        show.names.indep=TRUE, 
                        show.names.dep=FALSE, 
                        adjust.method = "BY",  
                        add.plot = "heatmap", 
                        smooth.lambda = 2,
                        pdf = FALSE, 
                        run.name = "chr8q")
                        
sim.plot.zscore.heatmap(input.regions = "8q", 
                        method="full", 
                        significance = 0.05,                        
                        z.threshold = 1, 
                        show.names.indep=TRUE, 
                        show.names.dep=TRUE, 
                        adjust.method = "BY",  
                        add.plot = "none",                                                                      
                        pdf = FALSE, 
                        run.name = "chr8q")

#tabulate the P-values per region (prints to screen)
tabulate.pvals(input.regions = "8q", 
               adjust.method="BY", 
               bins=c(0.001,0.005,0.01,0.025,0.05,0.075,0.10,0.20,1.0),               
               run.name = "chr8q") 
               
table.dep <- tabulate.top.dep.features(input.regions="8q", 
		                  adjust.method="BY", 
						  method="full",
						  significance=0.05,
						  run.name="chr8q")
head(table.dep[["8q"]])

table.indep <- tabulate.top.indep.features(input.regions="8q",
		                                  adjust.method="BY",
										  method="full",
										  significance= 0.05,										  
										  z.threshold=c(-1, 1),
										  run.name="chr8q")
head(table.indep[["8q"]])	

sim.plot.overlapping.indep.dep.features(input.regions="8q", 
		                                adjust.method="BY", 
										significance=0.1, 
										z.threshold= c(-1,1),                                        
										log=TRUE,	
										summarize="consecutive",										
										pdf=FALSE, 
										method="full",
										run.name="chr8q") 									  
						  
}