Enrichment

library(TDbasedUFE)
library(TDbasedUFEadv)
library(RTCGA.rnaseq)
#> Loading required package: RTCGA
#> Warning in fun(libname, pkgname): Package 'RTCGA' is deprecated and will be removed from Bioconductor
#>   version 3.24
library(RTCGA.clinical)

Introduction

It might be helpful to demonstrate how to evaluate selected genes by enrichment analysis. Here, we show some of useful tools applied to the output from TDbasedUFEadv In order foe this, we reproduce one example in “How to use TDbasedUFEadv” as follows.

Multi <- list(
  BLCA.rnaseq[seq_len(100), 1 + seq_len(1000)],
  BRCA.rnaseq[seq_len(100), 1 + seq_len(1000)],
  CESC.rnaseq[seq_len(100), 1 + seq_len(1000)],
  COAD.rnaseq[seq_len(100), 1 + seq_len(1000)]
)
Z <- prepareTensorfromList(Multi, 10L)
Z <- aperm(Z, c(2, 1, 3))
Clinical <- list(BLCA.clinical, BRCA.clinical, CESC.clinical, COAD.clinical)
Multi_sample <- list(
  BLCA.rnaseq[seq_len(100), 1, drop = FALSE],
  BRCA.rnaseq[seq_len(100), 1, drop = FALSE],
  CESC.rnaseq[seq_len(100), 1, drop = FALSE],
  COAD.rnaseq[seq_len(100), 1, drop = FALSE]
)
# patient.stage_event.tnm_categories.pathologic_categories.pathologic_m
ID_column_of_Multi_sample <- c(770, 1482, 773, 791)
# patient.bcr_patient_barcode
ID_column_of_Clinical <- c(20, 20, 12, 14)
Z <- PrepareSummarizedExperimentTensor(
  feature = colnames(ACC.rnaseq)[1 + seq_len(1000)],
  sample = array("", 1), value = Z,
  sampleData = prepareCondTCGA(
    Multi_sample, Clinical,
    ID_column_of_Multi_sample, ID_column_of_Clinical
  )
)
HOSVD <- computeHosvd(Z)
#>   |                                                                              |                                                                      |   0%  |                                                                              |=======================                                               |  33%  |                                                                              |===============================================                       |  67%  |                                                                              |======================================================================| 100%
cond <- attr(Z, "sampleData")
index <- selectFeatureProj(HOSVD, Multi, cond, de = 1e-3, input_all = 3) # Batch mode

head(tableFeatures(Z, index))
#>       Feature       p value adjusted p value
#> 10    ACTB|60  0.000000e+00     0.000000e+00
#> 11   ACTG1|71  0.000000e+00     0.000000e+00
#> 37  ALDOA|226  0.000000e+00     0.000000e+00
#> 19 ADAM6|8755 5.698305e-299    1.424576e-296
#> 22  AEBP1|165 1.057392e-218    2.114785e-216
#> 9    ACTA2|59 7.862975e-198    1.310496e-195
genes <- unlist(lapply(strsplit(tableFeatures(Z, index)[, 1], "|",
  fixed = TRUE
), "[", 1))
entrez <- unlist(lapply(strsplit(tableFeatures(Z, index)[, 1], "|",
  fixed = TRUE
), "[", 2))

Enrichr

Enrichr(Kuleshov et al. 2016) is one of tools that often provides us significant results toward genes selected by TDbasedUFE and TDbasedUFEadv.

if (!requireNamespace("enrichR", quietly = TRUE)) {
    stop("The enrichR package is required to run this example.")
}

enrichR::setEnrichrSite("Enrichr")

dbs <- c(
    "GO_Molecular_Function_2015",
    "GO_Cellular_Component_2015",
    "GO_Biological_Process_2015"
)

enriched <- enrichR::enrichr(genes, dbs)

enrichR::plotEnrich(
    enriched$GO_Biological_Process_2015,
    showTerms = 20,
    numChar = 40,
    y = "Count",
    orderBy = "P.value"
)

Enrichr can provide you huge number of enrichment analyses, many of which have good compatibility with the genes selected by TDbasedUFE as well as TDbasedUFEadv by the experience. Please check Enrichr’s web site to see what kinds of enrichment analyses can be done.

STRING

STRING(Szklarczyk et al. 2018) is enrichment analyses based upon protein-protein interaction, which is known to provide often significant results toward genes selected by TDbasedUFE as well as TDbasedUFEadv.

if (!requireNamespace("STRINGdb", quietly = TRUE)) {
    stop("The STRINGdb package is required to run this example.")
}

string_cache <- tools::R_user_dir("TDbasedUFEadv", which = "cache")
dir.create(string_cache, recursive = TRUE, showWarnings = FALSE)

string_db <- STRINGdb::STRINGdb$new(
    version = "11.5",
    species = 9606,
    score_threshold = 200,
    network_type = "full",
    input_directory = string_cache
)

example1_mapped <- string_db$map(
    data.frame(genes = genes),
    "genes",
    removeUnmappedRows = TRUE
)

hits <- example1_mapped$STRING_id
string_db$plot_network(hits)

enrichplot

Although these above can provide us enough number of information to evaluate the genes selected by TDbasedUFE as well as TDbasedUFEadv, one might need all one package for which one does not how to decide which category must be evaluated in enrichment analysis.

In this case, we would recommend Metascape(Zhou et al. 2019) that unfortunately
does not have the ways approached from R. Thus, we recommend RITAN as an alternative. It can list significant ones among multiple categories.

if (
    requireNamespace("DOSE", quietly = TRUE) &&
    requireNamespace("enrichplot", quietly = TRUE) &&
    requireNamespace("gson", quietly = TRUE) &&
    requireNamespace("ggplot2", quietly = TRUE)
) {
    edo <- DOSE::enrichDGN(entrez)

    enrichplot::dotplot(edo, showCategory = 30) +
        ggplot2::ggtitle("dotplot for ORA")
} else {
    message(
        "Skipping DOSE/enrichplot example because one or more optional packages ",
        "are not installed: DOSE, enrichplot, gson, ggplot2."
    )
}
#> 
#> Warning in calculate_qvalue(ora_res$pvalue): qvalue package not installed.
#> qvalue column will be NA.

sessionInfo()
#> R version 4.6.0 (2026-04-24)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.4 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] RTCGA.clinical_20151101.41.0 RTCGA.rnaseq_20151101.41.0  
#> [3] RTCGA_1.42.0                 TDbasedUFEadv_1.12.1        
#> [5] TDbasedUFE_1.12.0            BiocStyle_2.40.0            
#> 
#> loaded via a namespace (and not attached):
#>   [1] RColorBrewer_1.1-3      sys_3.4.3               jsonlite_2.0.0         
#>   [4] tidydr_0.0.6            tximport_1.40.0         magrittr_2.0.5         
#>   [7] ggtangle_0.1.2          farver_2.1.2            rmarkdown_2.31         
#>  [10] fs_2.1.0                vctrs_0.7.3             memoise_2.0.1          
#>  [13] RCurl_1.98-1.18         ggtree_4.2.0            rstatix_0.7.3          
#>  [16] htmltools_0.5.9         broom_1.0.12            Formula_1.2-5          
#>  [19] gridGraphics_0.5-1      sass_0.4.10             bslib_0.10.0           
#>  [22] htmlwidgets_1.6.4       plyr_1.8.9              cachem_1.1.0           
#>  [25] tximportData_1.39.0     buildtools_1.0.0        igraph_2.3.0           
#>  [28] mime_0.13               lifecycle_1.0.5         pkgconfig_2.0.3        
#>  [31] gson_0.1.0              Matrix_1.7-5            R6_2.6.1               
#>  [34] fastmap_1.2.0           shiny_1.13.0            digest_0.6.39          
#>  [37] aplot_0.2.9             enrichplot_1.32.0       ggnewscale_0.5.2       
#>  [40] patchwork_1.3.2         AnnotationDbi_1.74.0    S4Vectors_0.50.0       
#>  [43] MOFAdata_1.27.0         GenomicRanges_1.64.0    RSQLite_2.4.6          
#>  [46] ggpubr_0.6.3            labeling_0.4.3          polyclip_1.10-7        
#>  [49] httr_1.4.8              abind_1.4-8             compiler_4.6.0         
#>  [52] withr_3.0.2             fontquiver_0.2.1        bit64_4.8.0            
#>  [55] S7_0.2.2                backports_1.5.1         carData_3.0-6          
#>  [58] viridis_0.6.5           DBI_1.3.0               ggforce_0.5.0          
#>  [61] ggsignif_0.6.4          MASS_7.3-65             rappdirs_0.3.4         
#>  [64] tools_4.6.0             otel_0.2.0              scatterpie_0.2.6       
#>  [67] ape_5.8-1               httpuv_1.6.17           glue_1.8.1             
#>  [70] nlme_3.1-169            GOSemSim_2.38.0         promises_1.5.0         
#>  [73] grid_4.6.0              cluster_2.1.8.2         reshape2_1.4.5         
#>  [76] generics_0.1.4          gtable_0.3.6            tzdb_0.5.0             
#>  [79] tidyr_1.3.2             survminer_0.5.2         data.table_1.18.2.1    
#>  [82] hms_1.1.4               xml2_1.5.2              car_3.1-5              
#>  [85] XVector_0.52.0          BiocGenerics_0.58.0     ggrepel_0.9.8          
#>  [88] pillar_1.11.1           stringr_1.6.0           yulab.utils_0.2.4      
#>  [91] later_1.4.8             splines_4.6.0           tweenr_2.0.3           
#>  [94] dplyr_1.2.1             treeio_1.36.1           lattice_0.22-9         
#>  [97] survival_3.8-6          bit_4.6.0               rTensor_1.4.9          
#> [100] tidyselect_1.2.1        fontLiberation_0.1.0    GO.db_3.23.1           
#> [103] maketools_1.3.2         Biostrings_2.80.0       knitr_1.51             
#> [106] fontBitstreamVera_0.1.1 gridExtra_2.3           IRanges_2.46.0         
#> [109] Seqinfo_1.2.0           stats4_4.6.0            xfun_0.57              
#> [112] Biobase_2.72.0          stringi_1.8.7           lazyeval_0.2.3         
#> [115] ggfun_0.2.0             yaml_2.3.12             evaluate_1.0.5         
#> [118] gdtools_0.5.0           tibble_3.3.1            hash_2.2.6.4           
#> [121] BiocManager_1.30.27     ggplotify_0.1.3         cli_3.6.6              
#> [124] systemfonts_1.3.2       xtable_1.8-8            jquerylib_0.1.4        
#> [127] Rcpp_1.1.1-1.1          png_0.1-9               parallel_4.6.0         
#> [130] XML_3.99-0.23           ggplot2_4.0.3           readr_2.2.0            
#> [133] assertthat_0.2.1        blob_1.3.0              DOSE_4.6.0             
#> [136] bitops_1.0-9            tidytree_0.4.7          viridisLite_0.4.3      
#> [139] ggthemes_5.2.0          ggiraph_0.9.6           enrichit_0.1.4         
#> [142] scales_1.4.0            purrr_1.2.2             crayon_1.5.3           
#> [145] rlang_1.2.0             KEGGREST_1.52.0         rvest_1.0.5
Kuleshov, Maxim V., Matthew R. Jones, Andrew D. Rouillard, et al. 2016. Enrichr: a comprehensive gene set enrichment analysis web server 2016 update.” Nucleic Acids Research 44 (W1): W90–97. https://doi.org/10.1093/nar/gkw377.
Szklarczyk, Damian, Annika L Gable, David Lyon, et al. 2018. STRING v11: protein窶菟rotein association networks with increased coverage, supporting functional discovery in genome-wide experimental datasets.” Nucleic Acids Research 47 (D1): D607–13. https://doi.org/10.1093/nar/gky1131.
Zhou, Yingyao, Bin Zhou, Lars Pache, et al. 2019. Metascape provides a biologist-oriented resource for the analysis of systems-level datasets.” Nature Communications 10 (1): 1523. https://doi.org/10.1038/s41467-019-09234-6.