## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = TRUE, cache = FALSE, warning = FALSE, message = FALSE, fig.width = 7, fig.height = 5 ) ## ----load--------------------------------------------------------------------- library(BioUtils) library(plotly) library(msigdbr) library(ggplot2) geo <- extract.expression( load.geo.soft(accession = "GDS507", log.transform = TRUE) ) ## ----pca---------------------------------------------------------------------- pca.plot(geo$expression, geo$phenotype, color.by = "disease.state") ## ----de----------------------------------------------------------------------- de.results <- run.limma.de(geo, condition.col = "disease.state") sig <- de.results[de.results$adj.P.Val < 0.05 & abs(de.results$logFC) > 1, ] cat("Significant DE probes (FDR < 0.05, |logFC| > 1): ", nrow(sig), "\n") cat("Upregulated in RCC: ", sum(sig$logFC > 0), "\n") cat("Downregulated in RCC: ", sum(sig$logFC < 0), "\n") ## ----volcano_static----------------------------------------------------------- volcano.plot(de.results, fc.threshold = 1, fdr.threshold = 0.05) ## ----volcano_plotly----------------------------------------------------------- de.plot <- de.results de.plot$neg.log10.fdr <- -log10(de.plot$adj.P.Val) de.plot$status <- "Not Significant" de.plot$status[de.plot$adj.P.Val < 0.05 & de.plot$logFC > 1] <- "Upregulated" de.plot$status[de.plot$adj.P.Val < 0.05 & de.plot$logFC < -1] <- "Downregulated" de.plot$gene <- "" top.idx <- order(de.plot$adj.P.Val)[1:50] de.plot$gene[top.idx] <- get.gene.name( geo$gene, rownames(de.plot)[top.idx], use.symbols = TRUE ) plotly::plot_ly( data = de.plot, x = ~logFC, y = ~neg.log10.fdr, color = ~status, colors = c("Upregulated" = "firebrick", "Downregulated" = "steelblue", "Not Significant" = "grey70"), text = ~paste0("Gene: ", gene, "
logFC: ", round(logFC, 2), "
FDR: ", signif(adj.P.Val, 3)), hoverinfo = "text", type = "scatter", mode = "markers", marker = list(size = 4, opacity = 0.6) ) %>% plotly::layout( title = "Volcano Plot: RCC vs Normal Kidney (Interactive, hover for gene labels)", xaxis = list(title = "Log2 Fold Change", zeroline = TRUE), yaxis = list(title = "-Log10 Adjusted P-Value", zeroline = FALSE), shapes = list( list(type = "line", x0 = 1, x1 = 1, y0 = 0, y1 = max(de.plot$neg.log10.fdr, na.rm = TRUE), line = list(dash = "dot", color = "black")), list(type = "line", x0 = -1, x1 = -1, y0 = 0, y1 = max(de.plot$neg.log10.fdr, na.rm = TRUE), line = list(dash = "dot", color = "black")) ) ) ## ----top_candidates----------------------------------------------------------- top.probes <- rownames(head(de.results[order(de.results$adj.P.Val), ], 10)) top.symbols <- get.gene.name(geo$gene, top.probes, use.symbols = TRUE) valid <- which(top.symbols != "") top.probes <- top.probes[valid] top.symbols <- top.symbols[valid] expr.mat <- get.gene.expression(geo$expression, top.probes) df.multi <- build.analysis.df(expr.mat, geo$phenotype, geo$gene) gene.analysis.plot(df.multi) ## ----single------------------------------------------------------------------- df.single <- df.multi[df.multi$gene == top.symbols[1], ] gene.analysis.plot(df.single) result <- analyze.gene(df.single) cat(result$interpretation) ## ----coexp-------------------------------------------------------------------- cor.mat <- gene.correlation.matrix( geo$expression, top.probes, method = "pearson" ) correlation.heatmap.plot(cor.mat, gene.names = top.symbols) ## ----gsea--------------------------------------------------------------------- hallmark.df <- msigdbr(species = "Homo sapiens", category = "H") pathways <- split(hallmark.df$gene_symbol, hallmark.df$gs_name) gsea.results <- run.gsea(de.results, geo$gene, pathways) gsea.results <- gsea.results[order(gsea.results$padj), ] top.pathways <- head(gsea.results[!is.na(gsea.results$padj), ], 15) ## ----gsea_plot, fig.height=7-------------------------------------------------- top.pathways$short.name <- gsub("HALLMARK_", "", top.pathways$pathway) top.pathways$direction <- ifelse(top.pathways$NES > 0, "Upregulated in RCC", "Downregulated in RCC") ggplot2::ggplot( top.pathways, ggplot2::aes( x = reorder(short.name, NES), y = NES, fill = direction ) ) + ggplot2::geom_col() + ggplot2::coord_flip() + ggplot2::scale_fill_manual( values = c("Upregulated in RCC" = "firebrick", "Downregulated in RCC" = "steelblue") ) + ggplot2::geom_hline(yintercept = 0, linetype = "solid", color = "black") + ggplot2::labs( title = "Hallmark Pathway Enrichment in RCC vs Normal Kidney", subtitle = "Bars extend from zero; direction indicates enrichment in RCC (red) or normal (blue)", x = NULL, y = "Normalized Enrichment Score (NES)", fill = NULL ) + ggplot2::theme_minimal() + ggplot2::theme( plot.title = ggplot2::element_text(hjust = 0.5, size = 13), plot.subtitle = ggplot2::element_text(hjust = 0.5, size = 9, color = "grey40"), legend.position = "bottom" ) ## ----lasso-------------------------------------------------------------------- phenotype.binary <- ifelse(geo$phenotype$disease.state == "RCC", 1, 0) lasso.fit <- fit.lasso(geo$expression, phenotype.binary) coef.mat <- coef(lasso.fit, s = "lambda.1se") selected <- coef.mat[coef.mat[, 1] != 0, , drop = FALSE] n.selected <- nrow(selected) - 1 # subtract intercept cat("Number of genes selected by LASSO:", n.selected, "\n\n") print(round(selected, 4)) ## ----lasso_plot--------------------------------------------------------------- # Visualize the cross-validation curve to show model selection plot(lasso.fit, main = "LASSO Cross-Validation Curve")