\name{GeneSet-class} \docType{class} \alias{GeneSet-class} \alias{[,GeneSet,character,ANY-method} \alias{[,GeneSet,numeric,ANY-method} \alias{[,ExpressionSet,GeneSet,ANY-method} \alias{[[,GeneSet,character-method} \alias{[[,GeneSet,numeric-method} \alias{Logic,character,GeneSet-method} \alias{|,GeneSet,GeneSet-method} \alias{|,GeneSet,character-method} \alias{$,GeneSet-method} \alias{&,GeneSet,GeneSet-method} \alias{&,GeneSet,character-method} \alias{collectionType<-,GeneSet,CollectionType-method} \alias{collectionType,GeneSet-method} \alias{contributor<-,GeneSet,character-method} \alias{contributor,GeneSet-method} \alias{creationDate<-,GeneSet,character-method} \alias{creationDate,GeneSet-method} \alias{description<-,GeneSet,character-method} \alias{description,GeneSet-method} \alias{geneIds<-,GeneSet,character-method} \alias{geneIds,GeneSet-method} \alias{initialize,GeneSet-method} \alias{intersect,GeneSet,GeneSet-method} \alias{longDescription<-,GeneSet,character-method} \alias{longDescription,GeneSet-method} \alias{organism<-,GeneSet,character-method} \alias{organism,GeneSet-method} \alias{organism,GeneIdentifierType-method} \alias{organism,GOAllFrameIdentifier-method} \alias{organism,KEGGFrameIdentifier-method} \alias{pubMedIds<-,GeneSet,character-method} \alias{pubMedIds,GeneSet-method} \alias{setdiff,GeneSet,GeneSet-method} \alias{setIdentifier<-,GeneSet,character-method} \alias{setIdentifier,GeneSet-method} \alias{setName<-,GeneSet,character-method} \alias{setName,GeneSet-method} \alias{geneIdType<-,GeneSet,character-method} \alias{geneIdType<-,GeneSet,GeneIdentifierType-method} \alias{geneIdType,GeneSet-method} \alias{setVersion<-,GeneSet,Versions-method} \alias{setVersion,GeneSet-method} \alias{show,GeneSet-method} \alias{toGmt,GeneSet-method} \alias{union,GeneSet,GeneSet-method} \alias{urls<-,GeneSet,character-method} \alias{urls,GeneSet-method} \alias{collectionType<-} \alias{collectionType} \alias{contributor<-} \alias{contributor} \alias{creationDate<-} \alias{creationDate} \alias{geneIds<-} \alias{geneIds} \alias{geneIdType} \alias{geneIdType<-} \alias{longDescription<-} \alias{longDescription} \alias{organism<-} \alias{organism} \alias{setIdentifier<-} \alias{setIdentifier} \alias{setName<-} \alias{setName} \alias{setVersion<-} \alias{setVersion} \alias{urls<-} \alias{urls} \title{Class "GeneSet"} \description{ A \code{GeneSet} contains a set of gene identifiers. Each gene set has a \code{geneIdType}, indicating how the gene identifiers should be interpreted (e.g., as Entrez identifiers), and a \code{collectionType}, indicating the origin of the gene set (perhaps including additional information about the set, as in the \code{\linkS4class{BroadCollection}} type). Conversion between identifiers, subsetting, and logical (set) operations can be performed. Relationships between genes and phenotype in a \code{GeneSet} can be summarized using \code{coloring} to create a \code{GeneColorSet}. A \code{GeneSet} can be exported to XML with \code{toBroadXML}. } \section{Objects from the Class}{ Construct a \code{GeneSet} with a \code{\link{GeneSet}} method (e.g., from a character vector of gene names, or an \code{\link[Biobase:class.ExpressionSet]{ExpressionSet}}), or from gene sets stored as XML (locally or on the internet; see \code{getBroadSets}) } \section{Slots}{ \describe{ \item{\code{setName}:}{Object of class \code{"ScalarCharacter"} containing a short name (single word is best) to identify the set.} \item{\code{setIdentifier}:}{Object of class \code{"ScalarCharacter"} containing a (unique) identifier for the set.} \item{\code{geneIdType}:}{Object of class \code{"GeneIdentifierType"} containing information about how the gene identifiers are encoded. See \code{\linkS4class{GeneIdentifierType}} and related classes.} \item{\code{geneIds}:}{Object of class \code{"character"} containing the gene symbols.} \item{\code{collectionType}:}{Object of class \code{"CollectionType"} containing information about how the geneIds were collected, including perhaps additional information unique to the collection methodology. See \code{\linkS4class{CollectionType}} and related classes.} \item{\code{shortDescription}:}{Object of class \code{"ScalarCharacter"} representing short description (1 line) of the gene set.} \item{\code{longDescription}:}{Object of class \code{"ScalarCharacter"} providing a longer description (e.g., like an abstract) of the gene set.} \item{\code{organism}:}{Object of class \code{"ScalarCharacter"} represents the organism the gene set is derived from.} \item{\code{pubMedIds}:}{Object of class \code{"character"} containing PubMed ids related to the gene set.} \item{\code{urls}:}{Object of class \code{"character"} containing urls used to construct or manipulate the gene set.} \item{\code{contributor}:}{Object of class \code{"character"} identifying who created the gene set.} \item{\code{version}:}{Object of class \code{"Versions"} a version number, manually curated (i.e., by the \code{contributor}) to provide a consistent way of tracking a gene set.} \item{\code{creationDate}:}{Object of class \code{"character"} containing the character string representation of the date on which the gene set was created.} } } \section{Methods}{ Gene set construction: \describe{ \item{GeneSet}{See \code{\link{GeneSet}} methods and \code{\link{getBroadSets}} for convenient construction.} } Slot access (e.g., \code{setName}) and retrieve (e.g., \code{setName<-}) : \describe{ \item{collectionType<-}{\code{signature(object = "GeneSet", value = "CollectionType")}} \item{collectionType}{\code{signature(object = "GeneSet")}} \item{contributor<-}{\code{signature(object = "GeneSet", value = "character")}} \item{contributor}{\code{signature(object = "GeneSet")}} \item{creationDate<-}{\code{signature(object = "GeneSet", value = "character")}} \item{creationDate}{\code{signature(object = "GeneSet")}} \item{description<-}{\code{signature(object = "GeneSet", value = "character")}} \item{description}{\code{signature(object = "GeneSet")}} \item{geneIds<-}{\code{signature(object = "GeneSet", value = "character")}} \item{geneIds}{\code{signature(object = "GeneSet")}} \item{longDescription<-}{\code{signature(object = "GeneSet", value = "character")}} \item{longDescription}{\code{signature(object = "GeneSet")}} \item{organism<-}{\code{signature(object = "GeneSet", value = "character")}} \item{organism}{\code{signature(object = "GeneSet")}} \item{pubMedIds<-}{\code{signature(object = "GeneSet", value = "character")}} \item{pubMedIds}{\code{signature(object = "GeneSet")}} \item{setdiff}{\code{signature(x = "GeneSet", y = "GeneSet")}} \item{setIdentifier<-}{\code{signature(object = "GeneSet", value = "character")}} \item{setIdentifier}{\code{signature(object = "GeneSet")}} \item{setName<-}{\code{signature(object = "GeneSet", value = "character")}} \item{setName}{\code{signature(object = "GeneSet")}} \item{geneIdType<-}{ \code{signature(object = "GeneSet", verbose=FALSE, value = "character")}, \code{signature(object = "GeneSet", verbose=FALSE, value = "GeneIdentifierType")}: These method attempt to coerce geneIds from the current type to the type named by \code{value}. Successful coercion requires an appropriate method for \code{\link{mapIdentifiers}}.} \item{geneIdType}{\code{signature(object = "GeneSet")}} \item{setVersion<-}{\code{signature(object = "GeneSet", value = "Versions")}} \item{setVersion}{\code{signature(object = "GeneSet")}} \item{urls<-}{\code{signature(object = "GeneSet", value = "character")}} \item{urls}{\code{signature(object = "GeneSet")}} } Logical and subsetting operations: \describe{ \item{union}{\code{signature(x = "GeneSet", y = "GeneSet")}: ... } \item{|}{\code{signature(e1 = "GeneSet", e2 = "GeneSet")}: calculate the logical `or' (union) of two gene sets. The sets must contain elements of the same \code{geneIdType}.} \item{|}{ \code{signature(e1 = "GeneSet", e2 = "character")}, \code{signature(e1 = "character", e2 = "GeneSet")}: calculate the logical `or' (union) of a gene set and a character vector, i.e., add the geneIds named in the character vector to the gene set.} \item{intersect}{\code{signature(x = "GeneSet", y = "GeneSet")}:} \item{&}{\code{signature(e1 = "GeneSet", e2 = "GeneSet")}: calculate the logical `and' (intersection) of two gene sets.} \item{&}{ \code{signature(e1 = "GeneSet", e2 = "character")}, \code{signature(e1 = "character", e2 = "GeneSet")}: calculate the logical `and' (intersection) of a gene set and a character vector, creating a new gene set containing only those genes named in the character vector.} \item{setdiff}{ \code{signature(x = "GeneSet", y = "GeneSet")}, \code{signature(x = "GeneSet", y = "character")}, \code{signature(x = "character", y = "GeneSet")}: calculate the logical set difference betwen two gene sets, or betwen a gene set and a character vector.} \item{[}{ \code{signature(x = "GeneSet", i="character")} \code{signature(x = "GeneSet", i="numeric")}: subset the gene set by index (\code{i="numeric"}) or value (\code{i="character"}). Genes are re-ordered as required} \item{[}{ \code{signature(x = "ExpressionSet", i = "GeneSet")}: subset the expression set, using genes in the gene set to select features. Genes in the gene set are coerced to appropriate annotation type if necessary (by consulting the \code{annotation} slot of the expression set, and using \code{geneIdType<-}).} \item{[[}{\code{signature(x = "GeneSet")}: select a single gene from the gene set.} \item{\$}{\code{signature(x = "GeneSet")}: select a single gene from the gene set, allowing partial matching.} } Useful additional methods include: \describe{ \item{GeneColorSet}{\code{signature(type = "GeneSet")}: create a 'color' gene set from a \code{GeneSet}, containing information about phenotype. This method has a required argument \code{phenotype}, a character string describing the phenotype for which color is available. See \code{\linkS4class{GeneColorSet}}.} \item{mapIdentifiers}{Use the code in the examples to list available methods. These convert genes from one \code{GeneIdentifierType} to another. See \code{\link{mapIdentifiers}} and specific methods in \code{\linkS4class{GeneIdentifierType}} for additional detail.} \item{incidence}{Summarize shared membership in genes across gene sets. See \code{\link{incidence-methods}}.} \item{toGmt}{Export to 'GMT' format file. See \code{\link{toGmt}}.} \item{show}{\code{signature(object = "GeneSet")}: display a short summary of the gene set.} \item{details}{\code{signature(object = "GeneSet")}: display additional information about the gene set. See \code{\link{details}}.} \item{initialize}{\code{signature(.Object = "GeneSet")}: Used internally during gene set construction.} } } \author{Martin Morgan } \seealso{ \code{\linkS4class{GeneColorSet}} \code{\linkS4class{CollectionType}} \code{\linkS4class{GeneIdentifierType}} } \examples{ ## Empty gene set GeneSet() ## Gene set from ExpressionSet data(sample.ExpressionSet) gs1 <- GeneSet(sample.ExpressionSet[100:109]) ## GeneSet from Broad XML; 'fl' could be a url fl <- system.file("extdata", "Broad.xml", package="GSEABase") gs2 <- getBroadSets(fl)[[1]] # actually, a list of two gene sets ## GeneSet from list of geneIds geneIds <- geneIds(gs2) # any character vector would do gs3 <- GeneSet(geneIds=geneIds) ## unspecified set type, so... is(geneIdType(gs3), "NullIdentifier") == TRUE ## update set type to match encoding of identifiers geneIdType(gs2) geneIdType(gs3) <- SymbolIdentifier() ## Convert between set types; this consults the 'annotation' ## information encoded in the 'AnnotationIdentifier' set type and the ## corresponding annotation package. \dontrun{ gs4 <- gs1 geneIdType(gs4) <- EntrezIdentifier() } ## logical (set) operations gs5 <- GeneSet(sample.ExpressionSet[100:109], setName="subset1") gs6 <- GeneSet(sample.ExpressionSet[105:114], setName="subset2") ## intersection: 5 'genes'; note the set name '(subset1 & subset2)' gs5 & gs6 ## union: 15 'genes'; note the set name gs5 | gs6 ## an identity gs7 <- gs5 | gs6 gs8 <- setdiff(gs5, gs6) | (gs5 & gs6) | setdiff(gs6, gs5) identical(geneIds(gs7), geneIds(gs8)) identical(gs7, gs8) == FALSE # gs7 and gs8 setNames differ ## output tmp <- tempfile() toBroadXML(gs2, tmp) noquote(readLines(tmp)) ## must be BroadCollection() collectionType try(toBroadXML(gs1)) gs9 <- gs1 collectionType(gs9) <- BroadCollection() toBroadXML(gs9, tmp) unlink(tmp) toBroadXML(gs9) # no connection --> character vector ## list of geneIds --> vector of Broad GENESET XML gs10 <- getBroadSets(fl) # two sets entries <- sapply(gs10, function(x) toBroadXML(x)[[2]]) ## list mapIdentifiers available for GeneSet showMethods("mapIdentifiers", classes="GeneSet", inherit=FALSE) } \keyword{classes}