## ----style, echo=FALSE, results='asis', message=FALSE------------------------- BiocStyle::markdown() knitr::opts_chunk$set(tidy = FALSE, warning = FALSE, message = FALSE) # rmarkdown::render("Readme.Rmd", envir=.GlobalEnv) ## ----------------------------------------------------------------------------- library(EWCE) library(ewceData) library(ggplot2) library(cowplot) library(limma) library(readxl) theme_set(theme_cowplot()) ## ----------------------------------------------------------------------------- #example datasets held in ewceData package cortex_mrna <- cortex_mrna() gene="Necab1" cellExpDist = data.frame(e=cortex_mrna$exp[gene,], l1=cortex_mrna$annot[ colnames(cortex_mrna$exp),]$level1class) ggplot(cellExpDist) + geom_boxplot(aes(x=l1,y=e)) + xlab("Cell type") + ylab("Unique Molecule Count") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) ## ----------------------------------------------------------------------------- nKeep = 1000 must_keep = c("Apoe","Gfap","Gapdh") set.seed(123458) keep_genes = c(must_keep,sample(rownames(cortex_mrna$exp),997)) cortex_mrna$exp = cortex_mrna$exp[keep_genes,] ## ----------------------------------------------------------------------------- library(sctransform) scT = sctransform::vst(cortex_mrna$exp, return_cell_attr = TRUE) cortex_mrna$exp_scT = correct_counts(scT, cortex_mrna$exp) # umi_corrected cortex_mrna$exp_scT_normed = Matrix::t(Matrix::t(cortex_mrna$exp_scT)*(1/Matrix::colSums( cortex_mrna$exp_scT))) ## ----------------------------------------------------------------------------- # Generate celltype data for just the cortex/hippocampus data exp_CortexOnly_DROPPED = drop_uninformative_genes(exp=cortex_mrna$exp_scT_normed, level2annot = cortex_mrna$annot$level2class) annotLevels = list(level1class=cortex_mrna$annot$level1class, level2class=cortex_mrna$annot$level2class) fNames_CortexOnly = generate_celltype_data(exp=exp_CortexOnly_DROPPED, annotLevels=annotLevels, groupName="kiCortexOnly", savePath=tempdir()) print(fNames_CortexOnly) fNames_CortexOnly = filter_genes_without_1to1_homolog(fNames_CortexOnly) print(fNames_CortexOnly) load(fNames_CortexOnly[1]) ## ---- fig.width = 7, fig.height = 4------------------------------------------- #Load merged cortex and hypothalamus dataset generated by Karolinska institute ctd <- ctd() set.seed(1234) library(reshape2) genes = c("Apoe","Gfap","Gapdh") exp = melt(cbind(ctd[[1]]$mean_exp[genes,],genes),id.vars="genes") colnames(exp) = c("Gene","Cell","AvgExp") ggplot(exp)+geom_bar(aes(x=Cell,y=AvgExp),stat="identity")+facet_grid(Gene~.)+ theme(axis.text.x = element_text(angle = 90, hjust = 1)) ## ---- fig.width = 7, fig.height = 4------------------------------------------- exp = melt(cbind(data.frame(ctd[[1]]$specificity[genes,]),genes), id.vars="genes") colnames(exp) = c("Gene","Cell","Expression") ggplot(exp)+geom_bar(aes(x=Cell,y=Expression),stat="identity")+ facet_grid(Gene~.)+ theme(axis.text.x = element_text(angle = 90, hjust = 1)) ## ---- fig.width = 7, fig.height = 4------------------------------------------- exp = melt(cbind(data.frame(ctd[[2]]$specificity[genes,]),genes), id.vars="genes") colnames(exp) = c("Gene","Cell","Specificity") ggplot(exp)+geom_bar(aes(x=Cell,y=Specificity),stat="identity")+ facet_grid(Gene~.)+ theme(axis.text.x = element_text(angle = 90, hjust = 1)) ## ----------------------------------------------------------------------------- example_genelist <- example_genelist() print(example_genelist) ## ----------------------------------------------------------------------------- mouse_to_human_homologs <- mouse_to_human_homologs() m2h = unique(mouse_to_human_homologs[,c("HGNC.symbol","MGI.symbol")]) mouse.hits = unique(m2h[m2h$HGNC.symbol %in% example_genelist,"MGI.symbol"]) #mouse.bg = unique(setdiff(m2h$MGI.symbol,mouse.hits)) mouse.bg = unique(m2h$MGI.symbol) ## ----------------------------------------------------------------------------- print(mouse.hits) ## ----------------------------------------------------------------------------- # Use 100 bootstrap lists for speed, for publishable analysis use >10000 reps=100 level=1 # <- Use level 1 annotations (i.e. Interneurons) ## ----------------------------------------------------------------------------- # Bootstrap significance test, no control for transcript length and GC content set.seed(12345678) full_results = bootstrap_enrichment_test(sct_data=ctd,hits=mouse.hits, bg=mouse.bg, reps=reps,annotLevel=level) ## ----------------------------------------------------------------------------- print(full_results$results[order(full_results$results$p),3:5][1:6,]) ## ---- fig.width = 7, fig.height = 4------------------------------------------- print(ewce_plot(full_results$results,mtc_method="BH")$plain) ## ----------------------------------------------------------------------------- print(ewce_plot(full_results$results,mtc_method="BH",ctd=ctd)$withDendro) ## ----------------------------------------------------------------------------- #human.hits = unique(m2h[m2h$HGNC.symbol %in% example_genelist,"HGNC.symbol"]) #human.bg = unique(setdiff(m2h$HGNC.symbol,human.hits)) human.hits = example_genelist human.bg = unique(c(human.hits,m2h$HGNC.symbol)) ## ---- results="hide"---------------------------------------------------------- # Again, set seed for reproducibility set.seed(12345678) # Bootstrap significance test controlling for transcript length and GC content cont_results = bootstrap_enrichment_test(sct_data=ctd,hits=human.hits, bg=human.bg,reps=reps,annotLevel=1, geneSizeControl=TRUE,genelistSpecies="human", sctSpecies="mouse") ## ---- fig.width = 7, fig.height = 4------------------------------------------- print(ewce_plot(cont_results$results,mtc_method="BH")$plain) ## ---- fig.width = 8, fig.height = 4, results="hide"--------------------------- # Again, set seed for reproducibility set.seed(12345678) # Bootstrap significance test controlling for transcript length and GC content cont_results = bootstrap_enrichment_test(sct_data=ctd,hits=human.hits, bg=human.bg,reps=reps,annotLevel=2,geneSizeControl=TRUE, genelistSpecies="human",sctSpecies="mouse") print(ewce_plot(cont_results$results,mtc_method="BH")$plain) ## ----------------------------------------------------------------------------- # Again, set seed for reproducibility set.seed(12345678) gene.list.2 = mouse.bg[1:30] second_results = bootstrap_enrichment_test(sct_data=ctd,hits=gene.list.2, bg=mouse.bg,reps=reps,annotLevel=1) full_res2 = data.frame(full_results$results,list="Alzheimers") scnd_res2 = data.frame(second_results$results,list="Second") merged_results = rbind(full_res2,scnd_res2) ## ----fig.width = 7, fig.height = 4-------------------------------------------- print(ewce_plot(total_res=merged_results,mtc_method="BH")$plain) ## ---- results="hide"---------------------------------------------------------- # ewce_expression_data calls bootstrap_enrichment_test so # set the seed for reproducibility set.seed(12345678) tt_alzh <- tt_alzh() tt_results = ewce_expression_data(sct_data=ctd,tt=tt_alzh,annotLevel=1, ttSpecies="human",sctSpecies="mouse") ## ----fig.width = 7, fig.height = 4-------------------------------------------- ewce_plot(tt_results$joint_results)$plain ## ----------------------------------------------------------------------------- # Set seed for bootstrap reproducibility set.seed(12345678) full_result_path = generate_bootstrap_plots_for_transcriptome(sct_data=ctd,tt=tt_alzh, annotLevel=1, full_results=tt_results, listFileName="examples", reps=reps,ttSpecies="human", sctSpecies="mouse", onlySignif=FALSE, savePath=tempdir()) ## ----results="hide"----------------------------------------------------------- # Load the data tt_alzh_BA36 <- tt_alzh_BA36() tt_alzh_BA44 <- tt_alzh_BA44() # set seed for reproducibility set.seed(12345678) # Run EWCE analysis tt_results_36 = ewce_expression_data(sct_data=ctd,tt=tt_alzh_BA36,annotLevel=1, ttSpecies="human",sctSpecies="mouse") tt_results_44 = ewce_expression_data(sct_data=ctd,tt=tt_alzh_BA44,annotLevel=1, ttSpecies="human",sctSpecies="mouse") # Fill a list with the results results = add_res_to_merging_list(tt_results) results = add_res_to_merging_list(tt_results_36,results) results = add_res_to_merging_list(tt_results_44,results) # Perform the merged analysis # For publication reps should be higher merged_res = merged_ewce(results,reps=10) print(merged_res) ## ----fig.width = 7, fig.height = 4-------------------------------------------- print(ewce_plot(merged_res)$plain) ## ----------------------------------------------------------------------------- mouse_to_human_homologs <- mouse_to_human_homologs() m2h = unique(mouse_to_human_homologs[,c("HGNC.symbol","MGI.symbol")]) mouse.hits = unique(m2h[m2h$HGNC.symbol %in% example_genelist,"MGI.symbol"]) mouse.bg = unique(m2h$MGI.symbol) reps=reps # set seed for bootstrap reproducibility set.seed(12345678) unconditional_results = bootstrap_enrichment_test(sct_data=ctd,hits=mouse.hits, bg=mouse.bg,reps=reps,annotLevel=1,genelistSpecies="mouse", sctSpecies="mouse") conditional_results_micro = bootstrap_enrichment_test(sct_data=ctd,hits=mouse.hits, bg=mouse.bg,reps=reps,annotLevel=1, controlledCT="microglia",genelistSpecies="mouse", sctSpecies="mouse") conditional_results_astro = bootstrap_enrichment_test(sct_data=ctd,hits=mouse.hits,bg=mouse.bg, reps=reps,annotLevel=1, controlledCT="astrocytes_ependymal", genelistSpecies="mouse",sctSpecies="mouse") full_res1 = data.frame(unconditional_results$results, list="Unconditional Enrichment") full_res2 = data.frame(conditional_results_micro$results, list="Conditional Enrichment (Microglia controlled)") full_res3 = data.frame(conditional_results_astro$results, list="Conditional Enrichment (Astrocyte controlled)") merged_results = rbind(rbind(full_res1,full_res2),full_res3) print(ewce_plot(total_res=merged_results,mtc_method="BH")$plain)