## ----setup, echo=FALSE, results="hide", include=FALSE-------------------------
library(knitr)
opts_chunk$set(cache=TRUE, autodep=TRUE, tidy=FALSE, fig.width=5, warning=FALSE, fig.height=5, width=60)
opts_knit$set(width=60)
is.internet <- FALSE

## ----eval=TRUE,echo=FALSE-----------------------------------------------------
library(ggplot2)
theme_set(theme_minimal())

## ----qsg1---------------------------------------------------------------------
library(tmod)
data(EgambiaResults)
tt <- EgambiaResults
tt <- tt[ order(tt$adj.P.Val), ]
l  <- tt$GENE_SYMBOL

## ----qsg2---------------------------------------------------------------------
res <- tmodCERNOtest(l)
head(res)

## ----qsg3---------------------------------------------------------------------
ggPanelplot(res)

## ----qsg4,eval=FALSE----------------------------------------------------------
#  ggEvidencePlot(l, "LI.M37.0", gene.labels=FALSE)
#  ggEvidencePlot(l, "DC.M4.2")
#  ggEvidencePlot(l, "DC.M3.4")
#  ggEvidencePlot(l, "DC.M1.2")

## ----fig.width=10,fig.height=10,echo=FALSE------------------------------------
library(cowplot)
g1 <- ggEvidencePlot(l, "LI.M37.0", gene.labels=FALSE) +
  ggtitle("LI.M37.0")
g2 <- ggEvidencePlot(l, "DC.M4.2") +
  ggtitle("DC.M4.2")
g3 <- ggEvidencePlot(l, "DC.M3.4") +
  ggtitle("DC.M3.4")
g4 <- ggEvidencePlot(l, "DC.M1.2") +
  ggtitle("DC.M1.2")
plot_grid(g1, g2, g3, g4)

## -----------------------------------------------------------------------------
getModuleMembers("DC.M1.2")

## ----twoA,eval=TRUE-----------------------------------------------------------
library(tmod)
data(Egambia)
E <- as.matrix(Egambia[,-c(1:3)])

## ----limma,eval=FALSE---------------------------------------------------------
#  library(limma)
#  design <- cbind(Intercept=rep(1, 30), TB=rep(c(0,1), each= 15))
#  fit <- eBayes(lmFit(E, design))
#  tt <- topTable(fit, coef=2, number=Inf,
#    genelist=Egambia[,1:3])

## ----twoA.2-------------------------------------------------------------------
data(EgambiaResults)
tt <- EgambiaResults

## ----twoA2,echo=FALSE, results="asis"-----------------------------------------
library(pander)
options(digits=3)
tmp <- tt[,c("GENE_SYMBOL", "GENE_NAME", "logFC", "adj.P.Val")]
rownames(tmp) <- NULL
pandoc.table(head(tmp), split.tables=Inf, justify="llrr")

## ----basic2, fig.width=4, fig.height=4----------------------------------------
group <- rep( c("CTRL", "TB"), each=15)
showGene(E["20799",], group,
  main=Egambia["20799", "GENE_SYMBOL"])

## ----fourB--------------------------------------------------------------------
l    <- tt$GENE_SYMBOL
resC <- tmodCERNOtest(l)
head(resC, 15)

## ----panelplot00,fig.width=14,fig.height=7------------------------------------
library(cowplot)

g1 <- ggPanelplot(list(Gambia=resC))

## calculate the number of significant genes
## per module
sgenes <- tmodDecideTests(g=tt$GENE_SYMBOL,
  lfc=tt$logFC,
  pval=tt$adj.P.Val)
names(sgenes) <- "Gambia"
g2 <- ggPanelplot(list(Gambia=resC), sgenes = sgenes)
plot_grid(g1, g2, labels=c("A", "B"))

## ----mod1-------------------------------------------------
fg <- tt$GENE_SYMBOL[tt$adj.P.Val < 0.05 & abs( tt$logFC ) > 1]
resHG <- tmodHGtest(fg=fg, bg=tt$GENE_SYMBOL)
options(width=60)
resHG

## ----mod2-------------------------------------------------
l    <- tt$GENE_SYMBOL
resU <- tmodUtest(l)
head(resU)
nrow(resU)

## ----mod2cerno--------------------------------------------
l    <- tt$GENE_SYMBOL
resCERNO <- tmodCERNOtest(l)
head(resCERNO)
nrow(resCERNO)

## ----plage------------------------------------------------
tmodPLAGEtest(Egambia$GENE_SYMBOL, Egambia[,-c(1:3)], group=group)

## ----heatmap0---------------------------------------------
m <- "LI.M75"
## getModuleMembers returns a list – you can choose to 
## select multiple modules
genes <- getModuleMembers(m)[[1]]
sel <- Egambia$GENE_SYMBOL %in% genes
x <- data.matrix(Egambia)[sel, -c(1:3)] # expression matrix

## ----heatmap1,fig.width=12,fig.height=7,echo=FALSE--------
x0 <- t(scale(t(x)))

cols <- colorRampPalette(c("blue", "white", "red"))(17) 
library(gplots)
heatmap.2(x0, trace="n", 
  labRow=Egambia$GENE_SYMBOL[sel], 
  scale="n", 
  dendrogram="r", Colv=F, 
  col=cols,
  breaks=seq(-2.5, 2.5, length.out=18))

## ----heatmap2,fig.width=10,fig.height=7,echo=FALSE--------
## per group means and standard deviations
ms <- apply(x, 1, function(xx) tapply(xx, group, mean))
sd <- apply(x, 1, function(xx) tapply(xx, group, sd))

library(plotwidgets)
plot(NULL, bty="n", ylim=range(x), xlim=c(0.5, 2.5), 
  xaxt="n", xlab="group", ylab="Expression")
axis(1, at=c(1,2), labels=unique(group))
errbar <- function(x, y0, y1, w=0.1, ...) {
  w <- w/2
  segments(x, y0, x, y1, ...)
  segments(x-w, y0, x+w, y0, ...)
  segments(x-w, y1, x+w, y1, ...)
}

cols <- plotPals("default")
n <- ncol(ms)
segments(1, ms[1,], 2, ms[2,], col=cols, lwd=2)
points(rep(1, n), ms[1,], pch=19, col=cols)
points(rep(2, n), ms[2,], pch=19, col=cols)
errbar(1, ms[1,]-sd[1,], ms[1,]+sd[1,], col=cols)
errbar(2, ms[2,]-sd[2,], ms[2,]+sd[2,], col=cols)

## ----eigengene,fig.width=8,fig.height=5-------------------
par(mfrow=c(1,2))
eig <- eigengene(Egambia[,-c(1:3)], Egambia$GENE_SYMBOL)
showGene(eig["LI.M75", ], group, 
  ylab="Eigengene",
  main="antiviral Interferon signature")
showGene(eig["LI.M16", ], group, 
  ylab="Eigengene",
  main="TLR and inflammatory signaling")

## ----five,fig=TRUE,fig.width=7,fig.height=4---------------
l    <- tt$GENE_SYMBOL
theme_set(theme_minimal())
ggEvidencePlot(l, "LI.M75") 

## ----fig.width=12-----------------------------------------
library(purrr)
sel <- c("LI.M67", "LI.M37.0")
plots <- map(sel, ~ ggEvidencePlot(l, .x, gene.labels=FALSE))
plot_grid(plotlist = plots, labels=sel)

## ---------------------------------------------------------
foo <- tmodCERNOtest(l) %>% dplyr::filter(ID %in% sel) 
foo %>% knitr::kable()

## ----fourC,size="tiny"------------------------------------
resAll <- list(CERNO=resC, U=resU, HG=resHG)
#head(tmodSummary(resAll))

## ----fourC2,results="asis",echo=FALSE,size="tiny"---------
tmp <- tmodSummary(resAll)
rownames(tmp) <- NULL
pandoc.table(head(tmp), split.tables=Inf, justify="llrrrrrr")

## ----panelplots, fig.width=8, fig.height=6----------------
resAll$HG$AUC <- log10(resAll$HG$E) - 0.5
ggPanelplot(resAll)

## ----panelplots2, fig.width=8, fig.height=5---------------
ggPanelplot(resAll, q_thr=1e-3)

## ----pie, fig.width=10,fig.height=6-----------------------
degs <- tmodDecideTests(g=tt$GENE_SYMBOL, lfc=tt$logFC,
                       pval=tt$adj.P.Val)[[1]]
degs <- list(CERNO=degs, HG=degs, U=degs)
ggPanelplot(resAll, sgenes = degs)

## ----ankrd22----------------------------------------------
x <- E[ match("ANKRD22", Egambia$GENE_SYMBOL), ]
cors <- t(cor(x, t(E)))
ord <- order(abs(cors), decreasing=TRUE)
head(tmodCERNOtest(Egambia$GENE_SYMBOL[ ord ]))

## ----m75--------------------------------------------------
g <- getGenes("LI.M75", genes=Egambia$GENE_SYMBOL, 
              as.list=TRUE)
x <- E[ match(g[[1]], Egambia$GENE_SYMBOL), ]

## calculating the "eigengene" (PC1)
pca <- prcomp(t(x), scale.=T)
eigen <- pca$x[,1]
cors <- t(cor(eigen, t(E)))

## order all genes by the correlation between the gene and the PC1
ord <- order(abs(cors), decreasing=TRUE)
head(tmodCERNOtest(Egambia$GENE_SYMBOL[ ord ]))

## ----six,fig=TRUE,fig.width=8,fig.height=5----------------
mypal <- c("#E69F00", "#56B4E9")
pca <- prcomp(t(E), scale.=TRUE)

col <- mypal[ factor(group) ]
par(mfrow=c(1, 2))
l<-pcaplot(pca, group=group, col=col)
 
legend("topleft", as.character(l$groups),
       pch=l$pch,
       col=l$colors, bty="n")
l<-pcaplot(pca, group=group, col=col, components=3:4)
legend("topleft", as.character(l$groups),
       pch=l$pch,
       col=l$colors, bty="n")

## ----seven------------------------------------------------
o <- order(abs(pca$rotation[,4]), decreasing=TRUE)
l <- Egambia$GENE_SYMBOL[o]
res <- tmodUtest(l)
head(res)

## ----pcasum-----------------------------------------------
# Calculate enrichment for each component
gs   <- Egambia$GENE_SYMBOL
# function calculating the enrichment of a PC
gn.f <- function(r) {
    tmodCERNOtest(gs[order(abs(r), decreasing=T)],
                qval=0.01)
}
x <- apply(pca$rotation, 2, gn.f)
tmodSummary(x, filter.empty=TRUE)[1:5,]

## ----pcsum2,fig=TRUE,fig.width=8,fig.height=5,results="hide"----
ggPanelplot(x)

## ----pcsum3,fig=TRUE,fig.width=8,fig.height=5-------------
qfnc <- function(r) quantile(r, 0.75)
qqs <- apply(pca$rotation[,1:10], 2, qfnc)
gloadings <- tmodDecideTests(gs, lfc=pca$rotation[,1:10], lfc.thr=qqs)
ggPanelplot(x[1:10], sgenes = gloadings) 

## ----fiveB------------------------------------------------
l    <- tt$GENE_SYMBOL
res2 <- tmodUtest(l, mset="LI")
head( res2 )

## ----tmodobj----------------------------------------------
data(tmod)
tmod

## ----tmodobj2---------------------------------------------
length(tmod)
sel <- grep("Interferon", tmod$gs$Title, ignore.case=TRUE)
ifn <- tmod[sel]
ifn
length(ifn)

## ----exmset-----------------------------------------------
mymset <- makeTmodGS(
  gs=data.frame(ID=c("A", "B"),
             Title=c("A title", 
                      "B title")),
  gs2gene=list(
    A=c("G1", "G2"),
    B=c("G3", "G4"))
)
mymset

## ----eval=FALSE-------------------------------------------
#  library(msigdbr)
#  msig <- msigdbr()
#  msig <- makeTmodFromDataFrame(df=msig,
#    feature_col="gene_symbol",
#    module_col="gs_id", title_col="gs_name",
#    extra_module_cols=c("gs_cat", "gs_subcat", "gs_url",
#                        "gs_exact_source", "gs_description"))

## ----msigdbparse1, eval=FALSE-----------------------------
#  msig <- tmodImportMSigDB("msigdb_v2022.1.Hs.xml")

## ----msigdb6,eval=FALSE-----------------------------------
#  res <- tmodCERNOtest(tt$GENE_SYMBOL, mset=msig)

## ----msigdb7,eval=FALSE-----------------------------------
#  sel <- msig$gs$gs_cat == "H"
#  tmodCERNOtest(tt$GENE_SYMBOL, mset=msig[sel] )

## ----biomart,eval=FALSE-----------------------------------
#  library(biomaRt)
#  mart <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
#  bm <- getBM(filters="hgnc_symbol",
#              values = Egambia$GENE_SYMBOL,
#              attributes = c( "hgnc_symbol", "entrezgene", "reactome", "go_id", "name_1006", "go_linkage_type"),
#              mart=mart)

## ----biomart2,eval=FALSE----------------------------------
#  m2g_r <- with(bm[ bm$reactome != "", ], split(hgnc_symbol, reactome))
#  m2g_g <- with(bm[ bm$go_id != "", ], split(hgnc_symbol, go_id))
#  
#  ll <- lengths(m2g_r)
#  m2g_r <- m2g_r[ ll >= 5 & ll <= 250 ]
#  ll <- lengths(m2g_g)
#  m2g_g <- m2g_g[ ll >= 5 & ll <= 250 ]
#  
#  m_r <- data.frame(ID=names(m2g_r), Title=names(m2g_r))
#  m_g <- data.frame(ID=names(m2g_g),
#    Title=bm$name_1006[ match(names(m2g_g), bm$go_id)])
#  
#  ensemblR  <- makeTmod(modules=m_r, modules2genes=m2g_r)
#  ensemblGO <- makeTmod(modules=m_g, modules2genes=m2g_g)
#  
#  ## these objects are no longer necessary
#  rm(bm, m_g, m_r, m2g_r, m2g_g)

## ----orghs,eval=FALSE-------------------------------------
#  library(org.Hs.eg.db)
#  mtab <- toTable(org.Hs.egGO)

## ----orghs2,eval=FALSE------------------------------------
#  mtab <- mtab[ mtab$Ontology == "BP", ]
#  m2g <- split(mtab$gene_id, mtab$go_id)
#  ## remove the rather large object
#  rm(mtab)
#  ll <- lengths(m2g)
#  m2g <- m2g[ ll >= 10 & ll <= 100 ]
#  length(m2g)

## ----orghs3,eval=FALSE------------------------------------
#  library(GO.db)
#  gt <- toTable(GOTERM)
#  m <- data.frame(ID=names(m2g))
#  m$Title <- gt$Term[ match(m$ID, gt$go_id) ]
#  
#  goset <- makeTmod(modules=m, modules2genes=m2g)
#  rm(gt, m2g, m)

## ----kegg, eval=FALSE-------------------------------------
#  library(KEGGREST)
#  pathways <- keggLink("pathway", "hsa")
#  
#  ## get pathway Names in addition to IDs
#  paths    <- sapply(unique(pathways), function(p) keggGet(p)[[1]]$NAME)
#  m <- data.frame(ID=unique(pathways), Title=paths)
#  
#  ## m2g is the mapping from modules (pathways) to genes
#  m2g <- split(names(pathways), pathways)
#  
#  ## kegg object can now be used with tmod
#  kegg <- makeTmod(modules=m, modules2genes=m2g)

## ----kegg2, eval=FALSE------------------------------------
#  eg <- paste0("hsa:", tt$EG)
#  tmodCERNOtest(eg, mset="kegg")

## ----msigdb1,eval=FALSE-----------------------------------
#  library(XML)
#  foo  <- xmlParse( "msigdb_v2022.1.Hs.xml" )
#  foo2 <- xmlToList(foo)

## ----msigdb2,eval=FALSE-----------------------------------
#  path1 <- foo2[[1]]
#  class(path1)

## ----msigdb2b,eval=FALSE----------------------------------
#  names(path1)

## ----msigdb3,eval=FALSE-----------------------------------
#  orgs <- sapply(foo2, function(x) x["ORGANISM"])
#  unique(orgs)
#  
#  foo3 <- foo2[ orgs == "Homo sapiens" ]
#  foo3 <- foo3[ ! sapply(foo3, is.null) ]

## ----msigdb4,eval=FALSE-----------------------------------
#  modules <- t(sapply(foo3,
#    function(x)
#      x[ c("SYSTEMATIC_NAME", "STANDARD_NAME", "CATEGORY_CODE", "SUBCATEGORY_CODE") ]))
#  colnames(modules) <- c( "ID", "Title", "Category", "Subcategory" )
#  modules <- data.frame(modules, stringsAsFactors=FALSE)
#  nrow(modules)

## ----msigdb5,eval=FALSE-----------------------------------
#  m2g <- lapply(foo3,
#    function(x) strsplit( x["MEMBERS_SYMBOLIZED"], "," )[[1]])
#  names(m2g) <- modules$ID
#  
#  mymsig <- makeTmod(modules=modules, modules2genes=m2g)
#  mymsig