## ----setup, include=FALSE----------------------------------------------------- options(width=80) knitr::opts_chunk$set(collapse=TRUE, message=TRUE, warning=TRUE, comment="", fig.align="center", fig.wide=TRUE) ## ----message=FALSE------------------------------------------------------------ library(SummarizedExperiment) library(GSVA) library(GSVAdata) data(geneprotExpCostaEtAl2021) ls() protExpCostaEtAl2021 assayNames(protExpCostaEtAl2021) ## ----eval=FALSE--------------------------------------------------------------- # library(GSEABase) # # URL <- "https://data.broadinstitute.org/gsea-msigdb/msigdb/release/2024.1.Hs/c7.immunesigdb.v2024.1.Hs.symbols.gmt" # c7.genesets <- readGMT(URL) ## ----echo=FALSE, message=FALSE, warning=FALSE--------------------------------- library(GSEABase) gmtfname <- system.file("extdata", "c7.immunesigdb.v2024.1.Hs.symbols.gmt.gz", package="GSVAdata") c7.genesets <- readGMT(gmtfname) c7.genesets ## ----------------------------------------------------------------------------- innatepat <- c("NKCELL_VS_.+_UP", "MAST_CELL_VS_.+_UP", "EOSINOPHIL_VS_.+_UP", "BASOPHIL_VS_.+_UP", "MACROPHAGE_VS_.+_UP", "NEUTROPHIL_VS_.+_UP") innatepat <- paste(innatepat, collapse="|") innategsets <- names(c7.genesets)[grep(innatepat, names(c7.genesets))] length(innategsets) adaptivepat <- c("CD4_TCELL_VS_.+_UP", "CD8_TCELL_VS_.+_UP", "BCELL_VS_.+_UP") adaptivepat <- paste(adaptivepat, collapse="|") adaptivegsets <- names(c7.genesets)[grep(adaptivepat, names(c7.genesets))] excludepat <- c("NAIVE", "LUPUS", "MYELOID") excludepat <- paste(excludepat, collapse="|") adaptivegsets <- adaptivegsets[-grep(excludepat, adaptivegsets)] length(adaptivegsets) c7.genesets.filt <- c7.genesets[c(innategsets, adaptivegsets)] length(c7.genesets.filt) ## ----------------------------------------------------------------------------- logCPMsWithNAs <- assay(protExpCostaEtAl2021, "logCPM") logCPMsWithNAs[1:5, 1:5] logCPMsWithNAs[is.na(assay(protExpCostaEtAl2021, "log2protexp"))] <- NA logCPMsWithNAs[1:5, 1:5] ## ----------------------------------------------------------------------------- gsvaAnnotation(logCPMsWithNAs) <- EntrezIdentifier("org.Hs.eg.db") ## ----------------------------------------------------------------------------- gsvapar <- gsvaParam(logCPMsWithNAs, c7.genesets.filt, minSize=5) ## ----------------------------------------------------------------------------- es_gsva_everything <- gsva(gsvapar) es_gsva_everything[1:10, 1:4] all(is.na(es_gsva_everything)) ## ----error=TRUE--------------------------------------------------------------- try({ gsvapar <- gsvaParam(logCPMsWithNAs, c7.genesets.filt, minSize=5, use="all.obs") }) ## ----------------------------------------------------------------------------- gsvapar <- gsvaParam(logCPMsWithNAs, c7.genesets.filt, minSize=5, use="na.rm") es_gsva_narm <- gsva(gsvapar) round(es_gsva_narm[1:10, 1:4], digits=2) ## ----------------------------------------------------------------------------- ssgseapar <- ssgseaParam(logCPMsWithNAs, c7.genesets.filt, minSize=5, use="na.rm") es_ssgsea_narm <- gsva(ssgseapar) round(es_ssgsea_narm[1:10, 1:4], digits=2) ## ----------------------------------------------------------------------------- gsvaAnnotation(protExpCostaEtAl2021) <- EntrezIdentifier("org.Hs.eg.db") gsvapar <- gsvaParam(protExpCostaEtAl2021, c7.genesets.filt, assay="logCPM", minSize=5) es_gsva <- gsva(gsvapar) ssgseapar <- ssgseaParam(protExpCostaEtAl2021, c7.genesets.filt, assay="logCPM", minSize=5) es_ssgsea <- gsva(ssgseapar) ## ----missingdatacomp, echo=TRUE, fig.width=5, fig.height=5, out.width="600px", dpi=100, fig.cap="Correlation between enrichment scores calculated from RNA-seq expression profiles with missing values, and those calculated from the complete version of the same data."---- r_es_gsva <- r_es_ssgsea <- numeric(nrow(es_gsva)) for (i in 1:nrow(es_gsva)) { r_es_gsva[i] <- cor(assay(es_gsva)[i, ], es_gsva_narm[i, ], use="complete.obs") r_es_ssgsea[i] <- cor(assay(es_ssgsea)[i, ], es_ssgsea_narm[i, ], use="complete.obs") } boxplot(list(GSVA=r_es_gsva, ssGSEA=r_es_ssgsea), ylab="Correlation with complete data", las=1) ## ----session_info, cache=FALSE------------------------------------------------ sessionInfo()