## ----include=FALSE------------------------------------------------------------ knitr::opts_chunk$set( collapse=TRUE, comment="#>" ) ## ----message=FALSE, warning=FALSE, results=FALSE------------------------------ library(CSOA) library(ggplot2) library(patchwork) library(scRNAseq) library(scuttle) library(stringr) library(Seurat) sceObj <- BaronPancreasData('human') sceObj <- logNormCounts(sceObj) seuratObj <- as.Seurat(sceObj) acinarMarkers <- c('PRSS1', 'KLK1', 'CTRC', 'PNLIP', 'AKR1C3', 'CTRB1', 'DUOXA2', 'ALDOB', 'REG3A', 'SERPINA3', 'PRSS3', 'REG1B', 'CFB', 'GDF15', 'MUC1','ANPEP', 'ANGPTL4', 'OLFM4', 'GSTA1', 'LGALS2', 'PDZK1IP1', 'RARRES2', 'CXCL17', 'UBD', 'GSTA2', 'LYZ', 'RBPJL', 'PTF1A', 'CELA3A', 'SPINK1', 'ZG16', 'CEL', 'CELA2A', 'CPB1', 'CELA1', 'PNLIPRP1', 'RNASE1', 'AMY2B', 'CPA2','CPA1', 'CELA3B', 'CTRB2', 'PLA2G1B', 'PRSS2', 'CLPS', 'REG1A', 'SYCN') ## ----------------------------------------------------------------------------- seuratObj <- runCSOA(seuratObj, list(CSOA_acinar = acinarMarkers), pairFileTemplate='pairs') ## ----------------------------------------------------------------------------- acinarPairs <- qGrab('pairsCSOA_acinar.qs') head(acinarPairs) ## ----------------------------------------------------------------------------- genes <- overlapGenes(acinarPairs) nGenes <- length(genes) nGenes length(acinarMarkers) setdiff(acinarMarkers, genes) ## ----------------------------------------------------------------------------- seuratObj <- runCSOA(seuratObj, list(CSOA_acinar2 = genes)) ## ----------------------------------------------------------------------------- printStats <- function(seuratObj, col1, col2){ paste0('Pearson correlation: ', cor(seuratObj[[]][[col1]], seuratObj[[]][[col2]])) absDiffs <- abs(seuratObj[[]][[col1]] - seuratObj[[]][[col2]]) absDiffsStats <- sapply(list(mean, median, max, min), function(fun) fun(absDiffs)) names(absDiffsStats) <- paste0(c('mean', 'median', 'max', 'min'), 'AbsDiff') absDiffsStats } ## ----------------------------------------------------------------------------- printStats(seuratObj, 'CSOA_acinar', 'CSOA_acinar2') ## ----------------------------------------------------------------------------- acinarPairsSub <- subset(acinarPairs, revCumsum > 10) nrow(acinarPairsSub) ## ----------------------------------------------------------------------------- genes90 <- overlapGenes(acinarPairsSub) genes90 nGenes90 <- length(genes90) nGenes90 ## ----------------------------------------------------------------------------- seuratObj <- runCSOA(seuratObj, list(CSOA_acinar3 = genes90)) printStats(seuratObj, 'CSOA_acinar', 'CSOA_acinar3') ## ----------------------------------------------------------------------------- set.seed(123) ## ----------------------------------------------------------------------------- runCSOAReplace <- function(seuratObj, markers, nReplacedGenes, pairFileTemplate = 'pairs'){ genesComplement <- setdiff(rownames(seuratObj), markers) geneSet <- list(c(sample(markers, length(markers) - nReplacedGenes), sample(genesComplement, nReplacedGenes))) names(geneSet) <- paste0('CSOA_replace', nReplacedGenes) seuratObj <- runCSOA(seuratObj, geneSet, pairFileTemplate=pairFileTemplate) return(seuratObj) } ## ----warning=FALSE, out.height='80%', out.width='80%', fig.height=4, fig.width=5---- nReplacedGenes <- ceiling(nGenes90 / 2) seuratObj <- runCSOAReplace(seuratObj, genes90, nReplacedGenes) newCol <- paste0('CSOA_replace', nReplacedGenes) printStats(seuratObj, 'CSOA_acinar3', newCol) VlnPlot(seuratObj, newCol, group.by='label') + NoLegend() ## ----------------------------------------------------------------------------- length(setdiff(overlapGenes(qGrab('pairsCSOA_replace12.qs')), genes90)) ## ----out.height='80%', out.width='80%', fig.height=4, fig.width=5------------- nReplacedGenes <- nGenes90 - 2 seuratObj <- runCSOAReplace(seuratObj, genes90, nReplacedGenes) newCol <- paste0('CSOA_replace', nReplacedGenes) printStats(seuratObj, 'CSOA_acinar3', newCol) dfReplace <- qGrab('pairsCSOA_replace22.qs') dfReplace setdiff(overlapGenes(dfReplace), genes90) VlnPlot(seuratObj, newCol, group.by='label') + NoLegend() ## ----warning=FALSE------------------------------------------------------------ stellateMarkers <- c('COL6A1', 'RGS5', 'COL1A1', 'MMP11', 'THY1', 'COL6A3', 'SFRP2', 'COL1A2', 'TNFAIP6', 'TIMP3', 'SPARC', 'COL3A1', 'MGP', 'COL6A2', 'COL4A1', 'FN1', 'SPON2', 'TIMP1', 'TGFB1', 'INHBA', 'PDGFRA', 'NDUFA4L2', 'MMP14', 'CTGF', 'CYGB', 'KRT10', 'PDGFRB', 'DYNLT1', 'GEM') mixedMarkers <- union(acinarMarkers, stellateMarkers) ## ----warning=FALSE, out.height='80%', out.width='80%', fig.height=4, fig.width=5---- seuratObj <- runCSOA(seuratObj, list(CSOA_mixed = mixedMarkers), pairFileTemplate='pairs') VlnPlot(seuratObj, 'CSOA_mixed', group.by='label') + NoLegend() ## ----out.height='80%', out.width='80%', fig.height=5, fig.width=5------------- dfNetwork <- qGrab('pairsCSOA_mixed.qs') networkPlot(dfNetwork, rankCol='overlapRank') ## ----------------------------------------------------------------------------- dfNetwork <- connectedComponents(dfNetwork) dplyr::count(dfNetwork, component) ## ----warning=FALSE, out.height='80%', out.width='80%', fig.height=7, fig.width=6---- components <- c(1, 2) seuratObj <- scoreModules(seuratObj, dfNetwork, components) p1 <- VlnPlot(seuratObj, 'CSOA_component1', group.by='label') + NoLegend() + theme(axis.text.y = element_blank()) p2 <- VlnPlot(seuratObj, 'CSOA_component2', group.by='label') + NoLegend() + theme(axis.text.y = element_blank()) p1 / p2 ## ----warning=FALSE------------------------------------------------------------ genes1 <- overlapGenes(subset(dfNetwork, component == 1)) genes1 genes2 <- overlapGenes(subset(dfNetwork, component == 2)) genes2 ## ----warning=FALSE, out.height='80%', out.width='80%', fig.height=4, fig.width=5---- ductalMarkers <- c('CFTR', 'SERPINA5', 'SLPI', 'TFF1', 'CFB', 'LGALS4', 'CTSH', 'PERP', 'PDLIM3', 'WFDC2', 'SLC3A1', 'AQP1', 'ALDH1A3', 'VTCN1', 'KRT19', 'TFF2', 'KRT7', 'CLDN4', 'LAMB3', 'TACSTD2', 'CCL2', 'DCDC2','CXCL2', 'CLDN10', 'HNF1B', 'KRT20', 'MUC1', 'ONECUT1', 'AMBP', 'HHEX', 'ANXA4', 'SPP1', 'PDX1', 'SERPINA3', 'GDF15', 'AKR1C3', 'MMP7', 'DEFB1', 'SERPING1', 'TSPAN8', 'CLDN1', 'S100A10', 'PIGR') mixedMarkers <- union(acinarMarkers, ductalMarkers) seuratObj <- runCSOA(seuratObj, list(CSOA_mixed = mixedMarkers), pairFileTemplate='pairs') VlnPlot(seuratObj, 'CSOA_mixed', group.by='label') + NoLegend() dfNetwork <- qGrab('pairsCSOA_mixed.qs') dfNetwork <- connectedComponents(dfNetwork) dplyr::count(dfNetwork, component) ## ----------------------------------------------------------------------------- seuratObj <- runCSOA(seuratObj, list(CSOA_mixed = mixedMarkers), jaccardCutoff=1/3, pairFileTemplate='pairs') ## ----out.height='80%', out.width='80%', fig.height=5, fig.width=5------------- dfNetwork <- qGrab('pairsCSOA_mixed.qs') networkPlot(dfNetwork, rankCol='overlapRank') ## ----message=FALSE, warning=FALSE, out.height='80%', out.width='80%', fig.height=7, fig.width=6---- dfNetwork <- connectedComponents(dfNetwork) components <- unique(dfNetwork$component) seuratObj <- scoreModules(seuratObj, dfNetwork, components) p1 <- VlnPlot(seuratObj, 'CSOA_component1', group.by='label') + NoLegend() + theme(axis.text.y=element_blank()) p2 <- VlnPlot(seuratObj, 'CSOA_component2', group.by='label') + NoLegend() + theme(axis.text.y=element_blank()) p1 / p2 ## ----out.height='100%', out.width='100%', fig.height=6, fig.width=6----------- geneRadialPlot(acinarPairs, title='Top 100 overlap genes - Acinar markers') ## ----message=FALSE, out.height='100%', out.width='100%', fig.height=6, fig.width=6---- alphaMarkers <- c('GCG', 'TTR', 'PCSK2', 'FXYD5', 'LDB2', 'MAFB', 'CHGA', 'SCGB2A1', 'GLS', 'FAP', 'DPP4', 'GPR119', 'PAX6', 'NEUROD1', 'LOXL4', 'PLCE1', 'GC', 'KLHL41', 'FEV', 'PTGER3', 'RFX6', 'SMARCA1', 'PGR', 'IRX1', 'UCP2', 'RGS4', 'KCNK16', 'GLP1R', 'ARX', 'POU3F4', 'RESP18', 'PYY', 'SLC38A5', 'TM4SF4', 'CRYBA2', 'SH3GL2', 'PCSK1', 'PRRG2', 'IRX2', 'ALDH1A1','PEMT', 'SMIM24', 'F10', 'SCGN', 'SLC30A8') gammaMarkers <- c('PPY', 'ABCC9', 'FGB', 'ZNF503', 'MEIS1', 'LMO3', 'EGR3', 'CHN2', 'PTGFR', 'ENTPD2', 'AQP3', 'THSD7A', 'CARTPT', 'ISL1', 'PAX6', 'NEUROD1', 'APOBEC2', 'SEMA3E', 'SLITRK6', 'SERTM1', 'PXK', 'PPY2P', 'ETV1', 'ARX', 'CMTM8', 'SCGB2A1', 'FXYD2', 'SCGN') geneSets <- list(acinarMarkers, alphaMarkers, ductalMarkers, gammaMarkers) names(geneSets) <- c('CSOA_acinar', 'CSOA_alpha', 'CSOA_ductal', 'CSOA_gamma') seuratObj <- runCSOA(seuratObj, geneSets, pairFileTemplate='pairs', keepOverlapOrder=TRUE) geneRadialPlot(lapply(paste0('pairs', names(geneSets), '.qs'), qGrab), groupLegendName='Gene set', groupNames=str_remove(names(geneSets), 'CSOA_'), cutoff=15, title='Top 15 overlap genes', extraCircles=2) ## ----------------------------------------------------------------------------- sessionInfo()