## ----style, echo = FALSE, results = 'asis'------------------------------------ BiocStyle::markdown() ## ----setup-------------------------------------------------------------------- library(MetaboDynamics) library(SummarizedExperiment) # storing and manipulating simulated metabolomics data library(ggplot2) # visualization library(dplyr) # data handling library(tidyr) # data handling ## ----------------------------------------------------------------------------- data("longitudinalMetabolomics_df", package = "MetaboDynamics") ## ----fig.wide=TRUE------------------------------------------------------------ # take a sample of five metabolites and subset data metabolites <- sample(x = unique(longitudinalMetabolomics_df$metabolite),size = 5) data <- longitudinalMetabolomics_df%>%filter(metabolite%in%metabolites) head(data) ## ----------------------------------------------------------------------------- # fit dynamics model fits <- # when using a data frame as input fits have to stored in a separate object fit_dynamics_model( data = data, metabolite = "metabolite", # in which column are metabolite names stored? time = "time", # in which column are categorical time points stored? condition = "condition", # in which are categorical experimental conditions stored?, scaled_measurement = "m_scaled", # in which column are scaled measurments stored? max_treedepth = 10, adapt_delta = 0.95, # default 0.95 iter = 2000, warmup = 2000/4, # default is 1/4 of iterations chains = 1, # only set to 1 in vignette, recommended default is 4! cores = 1 # only set to 1 in vignette, can be same as chains if machine allows for parallelization ) ## ----------------------------------------------------------------------------- # Extract diagnostics diagnostics <- # when using a data frame as input diagnostics have to be stored in a separate object diagnostics_dynamics( data = data, # data frame that was used to fit the dynamics model, fits = fits, # list of fits from dynamics model, result of fit_dynamics_mode function iter = 2000, # how many iterations were used to fit the dynamics model chains = 1, # how many chains were used to fit the dynamics model ) ## ----------------------------------------------------------------------------- plot_diagnostics( data = data, # data frame used to fit the dynamics model diagnostics = diagnostics[["model_diagnostics"]] # summary of diagnostics ) ## ----------------------------------------------------------------------------- estimates <- # estimates have to be stored in a separate object when using data frames estimates_dynamics( data = data, fits = fits, iter = 2000, warmup = 500, chains = 1, condition = "condition", metabolite = "metabolite", time = "time", samples = 1 # 1 sample from posterior taken ) ## ----------------------------------------------------------------------------- # only visualize differences between time points plot_estimates(data = data, estimates = estimates, delta_t = TRUE, # choose to visualize differences between time points dynamics = FALSE) # only visualize dynamics plot_estimates(data = data, estimates = estimates, delta_t = FALSE, dynamics = TRUE) # choose to visualize the dynamics ## ----------------------------------------------------------------------------- head(estimates) ## ----------------------------------------------------------------------------- cluster <- # clustering results have to be stored in separate object when using data frame as input cluster_dynamics( data = estimates, # data is now the estimates or a data frame ob similar structure distance = "euclidean", # which distance method should be used agglomeration = "ward.D2", # which agglomeration method for hierarchical clustering should be used deepSplit = 2, # sensitivity of cluster analysis, minClusterSize = 1 # minimum number of metabolites in one cluster ) ## ----------------------------------------------------------------------------- plots <- plot_cluster(cluster) plots[["lineplot"]] ## ----------------------------------------------------------------------------- # For condition A plots[["A"]] ## ----------------------------------------------------------------------------- # load background dataset data("modules_compounds") # get KEGGs from data KEGGs <- unique(data$KEGG) # construct "metabolite_modules" data set metabolite_modules <- modules_compounds%>%filter(KEGG%in%KEGGs) # convert clustering results from list to dataframe cluster_data <- rbind(cluster[["A"]][["data"]], cluster[["B"]][["data"]], cluster[["C"]][["data"]]) ## ----------------------------------------------------------------------------- ora <- # store ORA result to separate object when using data frames as input ORA_hypergeometric( background = modules_compounds, annotations = metabolite_modules, data = cluster_data, # dataframe format of clustering output tested_column = "middle_hierarchy" ) # Visualization plot_ORA( data = ora, tested_column = "middle_hierarchy" ) ## ----------------------------------------------------------------------------- comparison_dynamics <- # result needs to be stored in a separate object when using data frames compare_dynamics( data = cluster_data, # clustering result dynamics = c("1","2","3","4"), # which columns specify the time point specific mean metabolite abundances? cores = 1 # only set to 1 for vignette, can be increased to 4 for parallelization ) ## ----------------------------------------------------------------------------- # Visualize comparison results heatmap_dynamics(estimates = comparison_dynamics[["estimates"]], data = cluster_data) ## ----------------------------------------------------------------------------- # compare metabolite composition compare_metabolites <- compare_metabolites( data = cluster_data ) # Visualization heatmap_metabolites( distances = compare_metabolites, data = cluster_data ) ## ----------------------------------------------------------------------------- # combine comparison results temp <- left_join( comparison_dynamics[["estimates"]], # dynamics comparison compare_metabolites, join_by("cluster_a","cluster_b") # join by cluster comparisons ) # get unique clusters x <- unique(c(temp[,"cluster_a"], temp[,"cluster_b"])) # draw plot ggplot(temp, aes(x = cluster_b, y = cluster_a)) + geom_point(aes(size = Jaccard, col = mu_mean)) + theme_bw() + scale_color_viridis_c(option = "magma") + scale_x_discrete(limits = x) + xlab("") + ylab("") + scale_y_discrete(limits = x) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + labs(col = "dynamics distance", size = "metabolite similarity") + ggtitle("comparison of clusters", "label = condition + cluster ID") ## ----------------------------------------------------------------------------- sessionInfo()