## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE, warning = FALSE ) ## ----session-info, eval=FALSE------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("RFGeneRank") ## ----package-overview--------------------------------------------------------- suppressPackageStartupMessages({ library(RFGeneRank) library(SummarizedExperiment) library(S4Vectors) }) set.seed(42) n_genes <- 300 n_samples <- 60 genes <- paste0("Gene", seq_len(n_genes)) samples <- paste0("Sample", seq_len(n_samples)) # Sample metadata (rows must match sample names) meta_df <- data.frame( state = factor(rep(c("CTRL","CASE"), each = n_samples/2), levels = c("CTRL","CASE")), batch = factor(rep(c("B1","B2"), length.out = n_samples)), sex = factor(rep(c("M","F"), length.out = n_samples)), age = round(stats::runif(n_samples, 25, 65)), stringsAsFactors = FALSE, check.names = TRUE ) # Transcriptomics-like expression: strictly positive values (log-normal) expr <- matrix( exp(rnorm(n_genes * n_samples, mean = 2.5, sd = 0.6)), nrow = n_genes, ncol = n_samples, dimnames = list(genes, samples) ) # Inject signal in CASE for a subset of genes signal_genes <- genes[1:25] case_cols <- meta_df$state == "CASE" expr[signal_genes, case_cols] <- expr[signal_genes, case_cols] * 1.8 # Critical alignment: metadata rownames must match expression colnames rownames(meta_df) <- colnames(expr) stopifnot(identical(colnames(expr), rownames(meta_df))) # Build SummarizedExperiment se <- SummarizedExperiment( assays = list(expr = expr), colData = DataFrame(meta_df) ) se ## ----simulate-expression------------------------------------------------------ # Detect whether the matrix is count-like (integer); our simulated data are continuous. is_integerish <- function(x) all(abs(x - round(x)) < 1e-8, na.rm = TRUE) counts_flag <- is_integerish(expr) se_prep <- prepare_data( mats = list(SummarizedExperiment::assay(se, "expr")), metas = list(meta_df), # use data.frame for robustness in vignettes label_col = "state", batch_col = "batch", log1p = counts_flag, batch_method = "combat", batch_correction_scope = "global" ) se_prep ## ----build-se----------------------------------------------------------------- cw <- c(CTRL = 1, CASE = 2) fit <- rank_genes( se_prep, label_col = "state", cv = "kfold", k = 3, n_top = 100, trees = 300, fold_batch_correction = FALSE, batch_col = "batch", class_weights = cw, auto_confounds = FALSE, seed = 42 ) fit ## ----run-rank-genes----------------------------------------------------------- top_genes(fit, n = 10) ## ----inspect-fit-------------------------------------------------------------- tab_signed <- sign_importance( fit, se_prep, y = SummarizedExperiment::colData(se_prep)[["state"]], method = "mean" ) head(tab_signed, 10) ## ----plotting-example--------------------------------------------------------- plot_importance( fit, top = 20, map_to_symbol = FALSE ) plot_roc(fit) plot_sign_importance( fit, tab = tab_signed, top = 20 ) ## ----session------------------------------------------------------------------ sessionInfo()