## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.dim = c(6, 4) ) ## ----setup-------------------------------------------------------------------- library(EZbakR) library(dplyr) library(ggplot2) library(MASS) # For plotting # For coloring points by density: # Source: https://slowkow.com/notes/ggplot2-color-by-density/ get_density <- function(x, y, ...) { dens <- kde2d(x, y, ...) ix <- findInterval(x, dens$x) iy <- findInterval(y, dens$y) ii <- cbind(ix, iy) return(dens$z[ii]) } ## ----class.source = 'fold-show', eval=TRUE------------------------------------ ### SIMULATE DATA simdata <- EZSimulate(nfeatures = 200, ntreatments = 1, mode = "dynamics", label_time = c(1, 3), dynamics_preset = "nuc2cyto") ### EZBAKR ANALYSES ezbdo <- EZbakRData(simdata$cB, simdata$metadf) ezbdo <- EstimateFractions(ezbdo) # Lengths of features: # In simulation, all features are same length, so this is here for illustrative # purposes only feature_lengths <- tibble( GF = unique(simdata$cB$GF), length = 1000 ) # Averaging fractions rather than kinetics # Note the model, which specifies that we want to average data for a given # compartment AND label time (tl). ezbdo <- AverageAndRegularize(ezbdo, formula_mean = ~tl:compartment - 1, type = "fractions", feature_lengths = feature_lengths, parameter = "logit_fraction_highTC") ## ----class.source = 'fold-show', eval=TRUE------------------------------------ ### Input #2: the graph graph <- matrix(c(0, 1, 0, 0, 0, 2, 3, 0, 0), nrow = 3, ncol = 3, byrow = TRUE) colnames(graph) <- c("0", "N", "C") rownames(graph) <- colnames(graph) # Each row and column represents an RNA species being modeled # Here we are modeling the dynamics of nuclear (N) and cytoplasmic (C) # RNA. "0" represents no RNA, and must appear in the graph. N is synthesized # from 0 (i.e., its synthesis kinetics are 0-th order), and C is degraded to 0 # # Numbers in graph should range from 0 to the number of parameters in your model. # Order is arbitrary ## ----class.source = 'fold-show', eval=TRUE------------------------------------ modeled_to_measured <- list( nuclear = list(GF ~ N), cytoplasm = list(GF ~ C), total = list(GF ~ C + N) # total RNA is a combination of C and N ) ## ----class.source = 'fold-show', eval=TRUE------------------------------------ # See documentation (?EZDynamics()) for descriptions of all parameters # specified. Bit of trickery here with an object loaded with EZbakR # called ode_models. More on this later ezbdo <- EZDynamics(ezbdo, graph = graph, sub_features = "GF", grouping_features = "GF", sample_feature = "compartment", modeled_to_measured = ode_models$nuc2cyto$formulas) ## ----pressure1, fig.width=6, out.width="40%", fig.align="default", fig.show='hold', eval=TRUE---- gt <- simdata$ground_truth$parameter_truth dynfit <- ezbdo$dynamics$dynamics1 compare <- dplyr::inner_join(dynfit, gt %>% dplyr::rename(GF = feature), by = "GF") true_scale_factor <- mean(exp(compare$logk1[compare$logk1 < 9.9]) / compare$true_k1[compare$logk1 < 9.9]) gPk1 <- compare %>% dplyr::mutate(density = get_density( x = log(true_k1), y = log(exp(logk1)/true_scale_factor), n = 200 )) %>% ggplot(aes(x = log(true_k1), y = log(exp(logk1)/true_scale_factor), color = density)) + geom_point(size=0.9) + theme_classic() + scale_color_viridis_c() + xlab("log(true ksyn)") + ylab("log(estimated ksyn)") + geom_abline(slope =1, intercept = 0, color = 'darkred', linewidth = 0.75, linetype = 'dotted') + theme(axis.text=element_text(size=10), axis.title=element_text(size=12), legend.text=element_text(size=10), legend.title=element_text(size=12)) gPk2 <- compare %>% dplyr::mutate(density = get_density( x = log(true_k2), y = logk2, n = 200 )) %>% ggplot(aes(x = log(true_k2), y = logk2, color = density)) + geom_point(size=0.9) + theme_classic() + scale_color_viridis_c() + xlab("log(true kexp)") + ylab("log(estimated kexp)") + geom_abline(slope =1, intercept = 0, color = 'darkred', linewidth = 0.75, linetype = 'dotted') + theme(axis.text=element_text(size=10), axis.title=element_text(size=12), legend.text=element_text(size=10), legend.title=element_text(size=12)) gPk3 <- compare %>% dplyr::mutate(density = get_density( x = log(true_k3), y = logk3, n = 200 )) %>% ggplot(aes(x = log(true_k3), y = logk3, color = density)) + geom_point(size=0.9) + theme_classic() + scale_color_viridis_c() + xlab("log(true kdeg)") + ylab("log(estimated kdeg)") + geom_abline(slope =1, intercept = 0, color = 'darkred', linewidth = 0.75, linetype = 'dotted') + theme(axis.text=element_text(size=10), axis.title=element_text(size=12), legend.text=element_text(size=10), legend.title=element_text(size=12)) gPk1 gPk2 gPk3 ## ----class.source = 'fold-show', eval=TRUE------------------------------------ ### Input #2: the graph graph <- matrix(c(0, 1, 0, 3, 0, 2, 4, 0, 0), nrow = 3, ncol = 3, byrow = TRUE) colnames(graph) <- c("0", "N", "C") rownames(graph) <- colnames(graph) # Each row and column represents an RNA species being modeled # Here we are modeling the dynamics of nuclear (N) and cytoplasmic (C) # RNA. "0" represents no RNA, and must appear in the graph. N is synthesized # from 0 (i.e., its synthesis kinetics are 0-th order), and C is degraded to 0 # # Numbers in graph should range from 0 to the number of parameters in your model. # Order is arbitrary ## ----eval=TRUE---------------------------------------------------------------- ##### SIMULATE DATA simdata <- EZSimulate(nfeatures = 50, ntreatments = 1, mode = "dynamics", label_time = c(1, 3), dynamics_preset = "nuc2cytowithNdeg") ##### EZBAKR ANALYSES ezbdo <- EZbakRData(simdata$cB, simdata$metadf) ezbdo <- EstimateFractions(ezbdo) # Lengths of features: # In simulation, all features are same length, so this is here for illustrative # purposes only feature_lengths <- tibble( GF = unique(simdata$cB$GF), length = 1000 ) # Averaging fractions rather than kinetics # Note the model, which specifies that we want to average data for a given # compartment AND label time (tl). ezbdo <- AverageAndRegularize(ezbdo, formula_mean = ~tl:compartment - 1, type = "fractions", feature_lengths = feature_lengths, parameter = "logit_fraction_highTC") # Fit ODE model # Getting some help from ode_models object again ezbdo <- EZDynamics(ezbdo, graph = graph, sub_features = "GF", grouping_features = "GF", sample_feature = "compartment", modeled_to_measured = ode_models$nuc2cytowithNdeg$formulas) ## ----eval=TRUE---------------------------------------------------------------- graph <- matrix(c(0, 1, 0, 0, 0, 2, 3, 0, 0), nrow = 3, ncol = 3, byrow = TRUE) colnames(graph) <- c("0", "P", "M") rownames(graph) <- colnames(graph) ## ----eval=TRUE---------------------------------------------------------------- ##### SIMULATE DATA simdata <- EZSimulate(nfeatures = 50, ntreatments = 1, mode = "dynamics", label_time = c(1, 3), dynamics_preset = "preRNA") ##### EZBAKR ANALYSES ezbdo <- EZbakRData(simdata$cB, simdata$metadf) ezbdo <- EstimateFractions(ezbdo) # Lengths of features: # In simulation, all features are same length, so this is here for illustrative # purposes only features <- unique(simdata$cB$feature) feature_lengths <- tibble( GF = c(features, rep("__no_feature", times = length(features))), XF = c(rep("__no_feature", times = length(features)), features), length = 1000 ) # Averaging fractions rather than kinetics # Note the model, which specifies that we want to average data for a given # compartment AND label time (tl). ezbdo <- AverageAndRegularize(ezbdo, formula_mean = ~tl - 1, type = "fractions", feature_lengths = feature_lengths, parameter = "logit_fraction_highTC") # Fit ODE model # Getting some help from ode_models object again ezbdo <- EZDynamics(ezbdo, graph = graph, sub_features = c("GF", "XF"), grouping_features = "feature", modeled_to_measured = ode_models$preRNA$formulas$total) ## ----eval=TRUE---------------------------------------------------------------- graph <- matrix(c(0, 1, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5, 6, 0, 0, 0, 0), nrow = 5, ncol = 5, byrow = TRUE) colnames(graph) <- c("0", "NP", "NM", "CP","CM") rownames(graph) <- colnames(graph) ## ----eval=TRUE---------------------------------------------------------------- ##### SIMULATE DATA simdata <- EZSimulate(nfeatures = 50, ntreatments = 1, mode = "dynamics", label_time = c(1, 3), dynamics_preset = "nuc2cytowithpreRNA") ##### EZBAKR ANALYSES ezbdo <- EZbakRData(simdata$cB, simdata$metadf) ezbdo <- EstimateFractions(ezbdo) # Lengths of features: # In simulation, all features are same length, so this is here for illustrative # purposes only features <- unique(simdata$cB$feature) feature_lengths <- tibble( GF = c(features, rep("__no_feature", times = length(features))), XF = c(rep("__no_feature", times = length(features)), features), length = 1000 ) # Averaging fractions rather than kinetics # Note the model, which specifies that we want to average data for a given # compartment AND label time (tl). ezbdo <- AverageAndRegularize(ezbdo, formula_mean = ~tl:compartment - 1, type = "fractions", feature_lengths = feature_lengths, parameter = "logit_fraction_highTC") # Fit ODE model # Getting some help from ode_models object again ezbdo <- EZDynamics(ezbdo, graph = graph, sub_features = c("GF", "XF"), grouping_features = "feature", sample_feature = "compartment", modeled_to_measured = ode_models$nuc2cytowithpreRNA$formulas) ## ----------------------------------------------------------------------------- ### SIMULATE DATA # Now we are simulating two distinct biological conditions. # Default is for about 50% of all features to have differences in # all parameters. simdata <- EZSimulate(nfeatures = 200, ntreatments = 2, mode = "dynamics", label_time = c(1, 3), dynamics_preset = "nuc2cyto") ### EZBAKR ANALYSES ezbdo <- EZbakRData(simdata$cB, simdata$metadf) ezbdo <- EstimateFractions(ezbdo) # Lengths of features: # In simulation, all features are same length, so this is here for illustrative # purposes only feature_lengths <- tibble( GF = unique(simdata$cB$GF), length = 1000 ) # DIFFERENT FROM SINGLE CONDITION ANALYSIS: "treatment" is included in the formula. ezbdo <- AverageAndRegularize(ezbdo, formula_mean = ~tl:compartment:treatment - 1, type = "fractions", feature_lengths = feature_lengths, parameter = "logit_fraction_highTC") # Nothing changes here! ezbdo <- EZDynamics(ezbdo, graph = ode_models$nuc2cyto$graph, sub_features = "GF", grouping_features = "GF", sample_feature = "compartment", modeled_to_measured = ode_models$nuc2cyto$formulas) # Now we can run CompareParameters. Need to: # 1) Let it know you want to use EZDynamics output (type = "dynamics") # 2) Specify the "design_factor" (treatment in this case) # 3) Specify the "parameter" you want to compare # (let's compare log(k2), a.k.a logk2). ezbdo <- CompareParameters(ezbdo, type = "dynamics", parameter = "logk2", design_factor = "treatment", reference = "treatment1", experimental = "treatment2") # Can make a volcano plot # Here I have specified everything to be exhaustive, but # in this case we could just specify `parameter = "logk2"`, # since we only have one logk2 comparative analysis in our # EZbakRData object EZVolcanoPlot(ezbdo, parameter = "logk2", design_factor = "treatment", reference = "treatment1", experimental = "treatment2") ## ----------------------------------------------------------------------------- graph <- matrix( c(0, 1, 0, 0, 0, 2, 3, 0, 0), ncol = 3, nrow = 3, byrow = TRUE ) colnames(graph) <- c("0", "N", "C") rownames(graph) <- colnames(graph) ## ----------------------------------------------------------------------------- metadf <- tibble( sample = c("cyto_1", "cyto_2", "nuc_1", "nuc_2", "tot_1", "tot_3"), tl = 1, compartment = c("cytoplasmic", "cytoplasmic", "nuclear", "nuclear", "total", "total") ) ## ----------------------------------------------------------------------------- mtom <- list( total = list(XF ~ C + N), nuclear = list(XF ~ N), cytoplasmic = list(XF ~ C) ) ## ----------------------------------------------------------------------------- mtom <- list( XF ~ M, GF ~ P ) ## ----------------------------------------------------------------------------- mtom <- list( XF_sj ~ M, XF_nosj ~ P + M, GF ~ P ) ## ----------------------------------------------------------------------------- graph <- matrix(c(0, 1, 0, 0, 0, 0, 0, 2, 3, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 5, 6, 0, 0, 0, 0), nrow = 5, ncol = 5, byrow = TRUE) colnames(graph) <- c("0", "NP", "NM", "CP","CM") rownames(graph) <- colnames(graph) ## ----------------------------------------------------------------------------- mtom <- list( total = list( GF ~ NP + CP, XF ~ NM + CM ), nuclear = list( GF ~ NP, XF ~ NM ), cytoplasmic = list( GF ~ CP, XF ~ CM ) )