\name{semsim}
\alias{semsim}
\alias{pms}
\alias{subsumers}
\alias{conceptProbs}
\alias{usageCount}
\title{Compute semantic similarity measure for terms in
an object-ontology complex  }
\description{Compute semantic similarity measure for terms in
an object-ontology complex
}
\usage{
semsim(c1, c2, ooc, acc=NULL, pc=NULL)
conceptProbs(ooc,acc=NULL,inds=NULL) 
subsumers(c1, c2, ont, acc=NULL) 
pms(c1, c2, ooc, acc=NULL, pc=NULL) 
usageCount(map,acc,inds)
}
\arguments{
  \item{c1}{ c1, c2: "character" terms to be compared}
  \item{c2}{ c1, c2: "character" terms to be compared}
  \item{ooc}{ ooc: an object of class "OOC": object-ontology
complex}
  \item{ont}{ ont: an object of class "ontology": annotated rooted DAG}
  \item{acc}{ acc: optional (sparse) accessibility matrix for
the ontology}
  \item{pc}{ pc: optional vector of concept probabilities, if
pre-computed}
  \item{map}{ map: OOmap component of an ooc}
  \item{inds}{ inds: vector of numeric indices, row indices of
object-ontology map to be processed}
}
\details{
For large ontologies, computation of the term accessibility
relationships and term probabilities can be costly.
Once these are computed to support one semsim calculation,
they should be saved.  The acc and pc parameters allow use
of this saved information.
}
\value{
semsim returns the measure of semantic similarity cited by Lord et al (2003).
}
\references{PW Lord et al, Bioinformatics, 19(10)2003:1275  }
\author{Vince Carey <stvjc@channing.harvard.edu>}
%\note{  }


%\seealso{  }

\examples{
#
# we are given a graph of GOMF and the OOmap between LL and GOMF
# derived from humanLLMappings and stored as data resources in
# ontoTools -- these will have to be updated regularly
#
data(goMFgraph.1.15)
data(LL2GOMFooMap.1.15)
#
# build the rooted DAG, the ontology, and the OOC objects
#
gomfrDAG <- new("rootedDAG", root="GO:0003674", DAG=goMFgraph.1.15)
GOMFonto <- new("ontology", name="GOMF", version="bioc GO 1.15", rDAG=gomfrDAG)
LLGOMFOOC <- makeOOC(GOMFonto, LL2GOMFooMap.1.15)
#
# we are given the accessibility matrix for the GO MF graph as a 
# data resource, and we can compute some term probabilities
#
data(goMFamat.1.15)
pc <- conceptProbs(LLGOMFOOC, goMFamat.1.15, inds=1:20)
#
# now we will get a sample of GO MF terms and compute the
# semantic similarities of pairs of terms in the sample
#
data(LL2GOMFcp.1.15) # full set of precomputed concept probabilities
library(GO.db)
library(Biobase)
library(combinat)
 library(annotate)
GO() # get the GO environments
GOtags <- ls(GOTERM)
GOlabs <- mget(GOtags, GOTERM, ifnotfound=NA)
GOMFtags <- GOtags[ sapply(GOlabs,Ontology)=="MF" ]
GOMFtags <- GOMFtags[!is.na(GOMFtags)]
GOMFtermObs <- mget(GOMFtags,env=GOTERM)
GOMFterms <- sapply( GOMFtermObs, Term )
ntags <- length(GOMFtags)
if (any(duplicated(GOMFterms)))
 {
 dups <- (1:ntags)[duplicated(GOMFterms)]
 GOMFterms[dups] <- paste(GOMFterms[dups],".2",sep="")
 }
#names(GOMFterms) <- GOMFtags
set.seed(1234)
# does not lead to common samples across platforms...
st <- sample(names(GOMFterms),size=50) # take the sample
st <- intersect(st, names(LL2GOMFcp.1.15))[1:10] # use only those terms available in bioc GO 1.15
# thus ...
st = c("GO:0004397", "GO:0030215", "GO:0042802", "GO:0008504", "GO:0008640", 
"GO:0008528", "GO:0008375", "GO:0005436", "GO:0004756", "GO:0003729"
)
pst <- combn(st,2)   # get a matrix with the pairs of terms in columns
bad = c(4L, 12L, 19L, 25L, 31L, 32L, 33L, 34L, 35L) # can't use 8640
pst = pst[,-bad]
npst <- ncol(pst)
ss <- rep(NA,npst)
for (i in 1:npst)  # compute semantic similarities
  {
  cat(i)
  ss[i] <- semsim( pst[1,i], pst[2,i], ooc=LLGOMFOOC, acc=goMFamat.1.15, pc=LL2GOMFcp.1.15 )
  }
print(summary(ss))
top <- (1:npst)[ss==max(ss,na.rm=TRUE)][1]  # index of the most similar pair
             # note -- must come to an understanding of the NAs
print( GOMFterms[ as.character(pst[,top]) ] )
pen <- (1:npst)[ss==max(ss[-top],na.rm=TRUE)][1] # second most similar
print( GOMFterms[ as.character(pst[,pen]) ] )
}
\keyword{ models }