## ----setup, include=FALSE----------------------------------------------------- # Solution for the "magick package is required to crop" error: knitr::opts_chunk$set( echo = TRUE, # keep showing code chunks message = FALSE, warning = FALSE, # This is the line that fixes the cropping issue: crop = NULL ) ## ----Install the scECODA package, eval=FALSE---------------------------------- # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("scECODA") ## ----Load scECODA------------------------------------------------------------- library(scECODA) ## ----Load example SingleCellExperiment dataset, message=FALSE, warning=FALSE---- # Loading an example SingleCellExperiment dataset library(scRNAseq) all.ds <- as.data.frame(surveyDatasets()) ds <- 1 sce_object <- fetchDataset( all.ds[["name"]][ds], all.ds[["version"]][ds] ) names(colData(sce_object)) ## ----Create ECODA object------------------------------------------------------ # Create ECODA object se <- ecoda( sce_object, sample_col = "sample", celltype_col = "cluster" ) ## ----Show example of CLR df--------------------------------------------------- assay(se, "clr")[1:5,1:5] ## ----Create ECODA object from Seurat object----------------------------------- # Commented out because Seurat depends on igraph which has not been updated # to work with R 4.6 yet. # library(Seurat) # # counts <- counts(sce_object) # metadata <- as.data.frame(SummarizedExperiment::colData(sce_object)) # # seurat_object <- CreateSeuratObject(counts = counts, meta.data = metadata) # # se <- ecoda( # seurat_object, # sample_col = "sample", # celltype_col = "cluster" # ) ## ----Load example datasets---------------------------------------------------- # Load example datasets data(example_data) Zhang <- example_data$Zhang counts <- Zhang$cell_counts_lowresolution freq <- calc_freq(counts) metadata <- Zhang$metadata se <- ecoda(data = counts, metadata = metadata) se <- ecoda(data = freq, metadata = metadata) ## ----Show PCA based on cell type composition, message=FALSE, warning=FALSE---- se <- ecoda( data = example_data$Zhang$cell_counts_lowresolution, metadata = example_data$Zhang$metadata, ) plot_pca( se, label_col = "Tissue", title = "PCA based on cell type composition", n_hv_feat_show = 5 # Shows the most highly variable features (cell types) ) ## ----PCA based on highly variable cell types---------------------------------- plot_pca( se, assay = "clr_hvc", label_col = "Tissue", title = "PCA based on highly variable cell types", n_hv_feat_show = nrow(metadata(se)$clr_hvc) ) ## ----3D PCA------------------------------------------------------------------- plot_pca3d(se, label_col = "Tissue") ## ----Quantifying group separation, message=FALSE, warning=FALSE--------------- # You can get the separation scores like this: dist_mat <- dist(assay(se, "clr")) # Or use the pre-calculated distance matrix based on the CLR abundances: dist_mat <- metadata(se)$sample_distances labels <- colData(se)$Tissue anosim_r_score <- calc_anosim(dist_mat, labels) ari_score <- calc_ari(dist_mat, labels) # modularity_score <- calc_modularity(dist_mat, labels) silhouette_score <- calc_sil(dist_mat, labels) print(paste("ANOSIM R:", anosim_r_score)) print(paste("ARI:", ari_score)) # print(paste("Modularity:", modularity_score)) print(paste("Silhouette:", silhouette_score)) ## ----Box plot----------------------------------------------------------------- plot_boxplot(se, title = "CLR Abundance by Cell Type") ## ----Grouped box plot--------------------------------------------------------- plot_boxplot( se, label_col = "Tissue", title = "CLR Abundance by Cell Type (with Wilcoxon Test)" ) ## ----Bar plot by group-------------------------------------------------------- # Plotting average cell type abundance by experimental group plot_barplot( se, label_col = "Tissue", plot_by = "group", title = "Mean Relative Abundance by Condition" ) ## ----Bar plot by sample------------------------------------------------------- # Plotting cell type abundance for each sample separately plot_barplot( se, label_col = "Tissue", plot_by = "sample", title = "Relative Abundance for Each Sample" ) ## ----Plot heatmap example 1--------------------------------------------------- plot_heatmap( se, label_col = c("Clinical.efficacy.", "Tissue"), cutree_rows = 3, cutree_cols = 3 ) ## ----Plot heatmap example 2, fig.height=16, fig.width=12---------------------- se <- ecoda( data = example_data$GongSharma_full$cell_counts_highresolution, metadata = example_data$GongSharma_full$metadata ) plot_heatmap( se, label_col = c("subject.cmv", "age_group"), # Additional arguments for pheatmap: cutree_rows = 3, cutree_cols = 5, show_colnames = FALSE ) ## ----Plot heatmap example 2 only HVCs----------------------------------------- plot_heatmap( se, assay = "clr_hvc", label_col = c("subject.cmv", "age_group"), # Additional arguments for pheatmap: cutree_rows = 3, cutree_cols = 4, show_colnames = FALSE ) ## ----Correlation plot, fig.height=16, fig.width=16---------------------------- plot_corr(se) ## ----Correlation plot only HVCs, fig.height=16, fig.width=16------------------ plot_corr(se, assay = "clr_hvc") ## ----Mean-variance plot 50 percent-------------------------------------------- library(ggplot2) plot_varmean(se, labels = "none") + ggtitle("Default - cell types explaining 50% variance") ## ----Mean-variance plot 80 percent-------------------------------------------- se_new <- find_hvcs(se, variance_explained = 0.8) plot_varmean(se_new) + ggtitle("Cell types explaining 80% variance") ## ----Mean-variance plot top 5 HVCs-------------------------------------------- se_new <- find_hvcs(se, top_n_hvcs = 5) plot_varmean(se_new) + ggtitle("Top 5 HVCs") ## ----Load dataset including pseudobulk calculation, message=FALSE, warning=FALSE---- # Loading an example SingleCellExperiment dataset library(scRNAseq) all.ds <- as.data.frame(surveyDatasets()) ds <- 1 sce_object <- fetchDataset( all.ds[["name"]][ds], all.ds[["version"]][ds] ) se <- ecoda( data = sce_object, sample_col = "sample", celltype_col = "cluster", get_pb = TRUE ) ## ----PCA plot showing top 5 features------------------------------------------ plot_pca( se, label_col = "Condition", title = "PCA based on cell type composition", n_hv_feat_show = 5 ) ## ----PCA plot based on pseudobulk--------------------------------------------- plot_pca( se, assay = "pb", label_col = "Condition", title = "PCA based on pseudobulk gene expression" #, n_hv_feat_show = 5 # optionally, show top n most highly variable genes ) ## ----Removing cell types, eval=FALSE------------------------------------------ # # Load example datasets # data(example_data) # Stephenson <- example_data$Stephenson # counts <- Stephenson$cell_counts_lowresolution # metadata <- Stephenson$metadata # se <- ecoda(data = counts, metadata = metadata) # # # Specify cell types to drop # cts_to_drop <- c("RBC", "Platelets", "Neutrophils") # keep <- !rownames(assay(se, "counts")) %in% cts_to_drop # counts_subset <- assay(se, "counts")[, keep] # colData_subset <- as.data.frame(colData(se))[keep, ] # # # Re-initialize a new subset object # # This ensures all transformations (CLR, HVCs, etc.) are recalculated correctly # se_subset <- ecoda(data = counts_subset, metadata = colData_subset) ## ----sessionInfo-------------------------------------------------------------- sessionInfo()