## ----style, echo = FALSE, results = 'asis', message=FALSE--------------------- BiocStyle::markdown() ## ----echo = FALSE, message = FALSE-------------------------------------------- library(Chromatograms) library(BiocStyle) register(SerialParam()) ## ----------------------------------------------------------------------------- # A data.frame with chromatogram variables. cdata <- data.frame( msLevel = c(1L, 1L), mz = c(112.2, 123.3), chromIndex = c(1L, 2L) ) # Retention time and intensity values for each chromatogram. pdata <- list( data.frame( rtime = c(11, 12.4, 12.8, 13.2, 14.6, 15.1, 16.5), intensity = c(50.5, 123.3, 153.6, 2354.3, 243.4, 123.4, 83.2) ), data.frame( rtime = c(45.1, 46.2, 53, 54.2, 55.3, 56.4, 57.5), intensity = c(100, 180.1, 300.45, 1400, 1200.3, 300.2, 150.1) ) ) # Create and initialize the backend be <- backendInitialize(ChromBackendMemory(), chromData = cdata, peaksData = pdata ) # Create Chromatograms object chr <- Chromatograms(be) chr ## ----------------------------------------------------------------------------- MRM_file <- system.file("proteomics", "MRM-standmix-5.mzML.gz", package = "msdata" ) be <- backendInitialize(ChromBackendMzR(), files = MRM_file, BPPARAM = SerialParam() ) chr_mzr <- Chromatograms(be) ## ----------------------------------------------------------------------------- length(chr) length(chr_mzr) ## ----------------------------------------------------------------------------- peaksData(chr) ## ----------------------------------------------------------------------------- peaksData(chr, columns = c("rtime"), drop = TRUE) chr$rtime ## ----------------------------------------------------------------------------- peaksData(chr) <- list( data.frame( rtime = c(1, 2, 3, 4, 5, 6, 7), intensity = c(1, 2, 3, 4, 5, 6, 7) ), data.frame( rtime = c(1, 2, 3, 4, 5, 6, 7), intensity = c(1, 2, 3, 4, 5, 6, 7) ) ) ## ----------------------------------------------------------------------------- chr$rtime <- list( c(8, 9, 10, 11, 12, 13, 14), c(8, 9, 10, 11, 12, 13, 14) ) ## ----------------------------------------------------------------------------- chr_filt <- filterPeaksData(chr, variables = "rtime", ranges = c(12, 15)) length(chr_filt) length(rtime(chr_filt)) ## ----------------------------------------------------------------------------- chromData(chr) ## ----------------------------------------------------------------------------- chromData(chr, columns = c("msLevel")) chr$chromIndex ## ----------------------------------------------------------------------------- chr$msLevel <- c(2L, 2L) chromData(chr) ## ----------------------------------------------------------------------------- chr$extra <- c("extra1", "extra2") chromData(chr) ## ----------------------------------------------------------------------------- chr_filt <- filterChromData(chr, variables = "chromIndex", ranges = c(1, 2), keep = TRUE ) length(chr_filt) length(chr) ## ----define-function---------------------------------------------------------- ## Define a function that takes the backend as an input, divides the intensity ## by parameter y and returns it. Note that ... is required in ## the function's definition. divide_intensities <- function(x, y, ...) { intensity(x) <- lapply(intensity(x), `/`, y) x } ## Add the function to the procesing queue chr_2 <- addProcessing(chr, divide_intensities, y = 2) chr_2 ## ----custom-processing-------------------------------------------------------- intensity(chr_2) intensity(chr) ## ----applyProcessing---------------------------------------------------------- length(chr_2@processingQueue) chr_2 <- applyProcessing(chr_2) length(chr_2@processingQueue) chr_2 ## ----------------------------------------------------------------------------- processingChunkFactor(chr) ## ----------------------------------------------------------------------------- processingChunkFactor(chr_mzr) |> head() ## ----------------------------------------------------------------------------- processingChunkSize(chr_mzr) <- 10 processingChunkFactor(chr_mzr) |> table() ## ----------------------------------------------------------------------------- print(object.size(chr_mzr), units = "Mb") chr_mzr <- setBackend(chr_mzr, ChromBackendMemory(), BPPARAM = SerialParam()) chr_mzr chr_mzr@backend@peaksData[[1]] |> head() # data is now in memory ## ----------------------------------------------------------------------------- print(object.size(chr_mzr), units = "Mb") ## ----message=FALSE------------------------------------------------------------ library(Spectra) library(IRanges) sp <- Spectra( DataFrame( rtime = c(100, 110, 120, 130, 140), msLevel = c(1L, 1L, 1L, 1L, 1L), dataOrigin = rep("example", 5L), mz = NumericList( c(100, 101), c(100, 101), c(100, 101), c(100, 101), c(100, 101), compress = FALSE ), intensity = NumericList( c(10, 20), c(15, 25), c(30, 5), c(12, 18), c(40, 2), compress = FALSE ) ), source = MsBackendDataFrame() ) chr_s <- Chromatograms(sp) ## ----------------------------------------------------------------------------- chr_s ## ----------------------------------------------------------------------------- ## Create custom metadata for EIC extraction custom_cd <- data.frame( msLevel = c(1L, 1L), dataOrigin = rep(dataOrigin(sp)[1], 2), mzMin = c(100, 200), mzMax = c(100.5, 200.5) ) chr_custom <- Chromatograms(sp, chromData = custom_cd) chr_custom ## ----------------------------------------------------------------------------- ## Work on a copy so downstream examples are not affected chr_s_tmp <- chr_s ## Modify metadata chr_s_tmp$msLevel <- rep(2L, length(chr_s_tmp)) ## Re-factorize to update the groupings chr_s_tmp <- factorize(chr_s_tmp) chromData(chr_s_tmp) ## ----------------------------------------------------------------------------- chromData(chr_s)$rtmin <- 125 chromData(chr_s)$rtmax <- 180 chromData(chr_s)$mzmin <- 100 chromData(chr_s)$mzmax <- 100.5 ## ----------------------------------------------------------------------------- library(RColorBrewer) col3 <- brewer.pal(3, "Dark2") plotChromatograms(chr_s, col = col3) ## ----------------------------------------------------------------------------- plotChromatogramsOverlay(chr_s, col = col3) ## ----------------------------------------------------------------------------- ## Define peaks of interest with retention time windows peak_table <- data.frame( rtMin = c(8, 11), rtMax = c(10, 13), msLevel = c(2L, 2L), chromIndex = c(1L, 2L) ) ## Extract those regions chr_extracted <- chromExtract(chr, peak_table, by = c("msLevel", "chromIndex")) chr_extracted ## ----------------------------------------------------------------------------- chromData(chr_extracted) ## ----------------------------------------------------------------------------- ## Define peak table with both retention time and m/z windows peak_table_mz <- data.frame( rtMin = c(125, 125), rtMax = c(180, 180), mzMin = c(100, 140), mzMax = c(100.5, 140.5), msLevel = c(1L, 1L), dataOrigin = rep(dataOrigin(chr_s)[1], 2), featureID = c("feature_1", "feature_2") ) ## Extract EICs for these features chr_eics <- chromExtract(chr_s, peak_table_mz, by = c("msLevel", "dataOrigin")) chr_eics ## ----------------------------------------------------------------------------- chromData(chr_eics) ## ----------------------------------------------------------------------------- ## A small Spectra with some missing intensities at m/z 100 sp_gaps <- Spectra( DataFrame( rtime = c(100, 110, 120, 130, 140, 150, 160, 170, 180), msLevel = rep(1L, 9), dataOrigin = rep("impute_demo", 9), mz = NumericList(100, 100, 100, 100, 100, 100, 100, 100, 100, compress = FALSE), intensity = NumericList(50, NA, 120, 200, NA, NA, 80, 30, 10, compress = FALSE) ), source = MsBackendDataFrame() ) ## Derive a Chromatograms and extract the EIC for m/z ≈ 100 chr_gaps <- Chromatograms(sp_gaps) eic_table <- data.frame( rtMin = 100, rtMax = 180, mzMin = 99.5, mzMax = 100.5, msLevel = 1L, dataOrigin = "impute_demo" ) chr_eic <- chromExtract(chr_gaps, eic_table, by = c("msLevel", "dataOrigin")) chr_eic ## ----fig.height=10, fig.width=8----------------------------------------------- ## Create copies for comparison chr_linear <- imputePeaksData(chr_eic, method = "linear") chr_spline <- imputePeaksData(chr_eic, method = "spline") chr_gaussian <- imputePeaksData(chr_eic, method = "gaussian", window = 2, sd = 1) chr_loess <- imputePeaksData(chr_eic, method = "loess", span = 0.75) ## Plot all methods for comparison par(mfrow = c(3, 2), mar = c(4, 4, 2, 1)) ## Original data plotChromatograms(chr_eic, main = "Original EIC") ## Linear interpolation plotChromatograms(chr_linear, main = "Linear Imputation") ## Spline interpolation plotChromatograms(chr_spline, main = "Spline Imputation") ## Gaussian smoothing plotChromatograms(chr_gaussian, main = "Gaussian Smoothing (window=2, sd=1)") ## LOESS smoothing plotChromatograms(chr_loess, main = "LOESS Smoothing (span=0.75)") ## ----------------------------------------------------------------------------- ## For on-disk backends, add imputation to the lazy queue chr_mzr_imputed <- imputePeaksData( chr_mzr, method = "gaussian", window = 5, sd = 2 ) chr_mzr_imputed ## ----------------------------------------------------------------------------- ## This reads from disk and applies imputation in one step peak_data <- peaksData(chr_mzr_imputed[1]) ## ----------------------------------------------------------------------------- length(chr_mzr_imputed@processingQueue) ## ----------------------------------------------------------------------------- ## For in-memory backends, you can persist the imputation chr_in_memory <- setBackend(chr_mzr_imputed, ChromBackendMemory()) chr_in_memory <- applyProcessing(chr_in_memory) # Now imputation is permanently applied length(chr_in_memory@processingQueue) ## ----compare-chromatograms-mrm------------------------------------------------ ## Pick 8 MRM chromatograms, skipping the first (a TIC with no m/z info) chr_sub <- chr_mzr[2:9] cor_arr <- compareChromatograms(chr_sub) cor_arr[, , "score"] ## similarity scores cor_arr[, , "n_peaks"] ## number of overlapping RT points per pair ## Use a chromData column as row/column labels cor_arr_labeled <- compareChromatograms(chr_sub, labelsColumn = "chromIndex") cor_arr_labeled[, , "score"] ## ----compare-chromatograms-heatmap, fig.width=6, fig.height=5----------------- library(pheatmap) ## Label rows/columns with precursor → product m/z transitions mz_labels <- paste0(round(chromData(chr_sub)$precursorMz, 1), " → ", round(chromData(chr_sub)$productMz, 1)) score_mat <- cor_arr[, , "score"] rownames(score_mat) <- colnames(score_mat) <- mz_labels pheatmap(score_mat, main = "Pairwise Pearson correlation", color = hcl.colors(30, palette = "RdYlBu", rev = TRUE)) ## ----compare-chromatograms-custom--------------------------------------------- cosine <- function(x, y) { sum(x * y) / (sqrt(sum(x^2)) * sqrt(sum(y^2))) } cos_arr <- compareChromatograms(chr_sub, FUN = cosine) cos_arr[, , "score"] ## ----compare-chromatograms-minpeaks------------------------------------------- ## Require at least 10 overlapping RT points to compute a score cor_strict <- compareChromatograms(chr_sub, minPeaks = 10L) cor_strict[, , "score"] ## NAs for pairs with < 10 common RT points cor_strict[, , "n_peaks"] ## actual overlap counts are always recorded ## ----compare-chromatograms-xy------------------------------------------------- ## Compare the first 4 chromatograms against the last 4 res <- compareChromatograms(chr_sub[1:4], chr_sub[5:8]) res[, , "score"] ## ----compare-chromatograms-groups, eval=FALSE--------------------------------- # grp_list <- split(chr_sub, chromData(chr_sub)$dataOrigin) # lapply(grp_list, compareChromatograms) ## ----si----------------------------------------------------------------------- sessionInfo()