## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", #tidy.opts=list(width.cutoff=60), #tidy=TRUE, fig.align = 'center', warning=FALSE ) ## ----setup, message=FALSE----------------------------------------------------- # load packages library(boot) library(car) library(rTPC) library(nls.multstart) library(broom) library(tidyverse) library(patchwork) library(minpack.lm) ## ----load_data, fig.height=4, fig.width=6------------------------------------- # load in data data("bacteria_tpc") # keep just a single curve d <- filter(bacteria_tpc, phage == 'nophage') # show the data ggplot(d, aes(temp, rate)) + geom_point(size = 2, alpha = 0.5) + theme_bw(base_size = 12) + labs(x = 'Temperature (ºC)', y = 'Growth rate', title = 'Growth rate across temperatures') ## ----fit_and_plot, fig.height=4, fig.width=6---------------------------------- # fit Sharpe-Schoolfield model d_fit <- nest(d, data = c(temp, rate)) %>% mutate(sharpeschoolhigh = map(data, ~nls_multstart(rate~sharpeschoolhigh_1981(temp = temp, r_tref,e,eh,th, tref = 15), data = .x, iter = c(3,3,3,3), start_lower = get_start_vals(.x$temp, .x$rate, model_name = 'sharpeschoolhigh_1981') - 10, start_upper = get_start_vals(.x$temp, .x$rate, model_name = 'sharpeschoolhigh_1981') + 10, lower = get_lower_lims(.x$temp, .x$rate, model_name = 'sharpeschoolhigh_1981'), upper = get_upper_lims(.x$temp, .x$rate, model_name = 'sharpeschoolhigh_1981'), supp_errors = 'Y', convergence_count = FALSE)), # create new temperature data new_data = map(data, ~tibble(temp = seq(min(.x$temp), max(.x$temp), length.out = 100))), # predict over that data, preds = map2(sharpeschoolhigh, new_data, ~augment(.x, newdata = .y))) # unnest predictions d_preds <- select(d_fit, preds) %>% unnest(preds) # plot data and predictions ggplot() + geom_line(aes(temp, .fitted), d_preds, col = 'blue') + geom_point(aes(temp, rate), d, size = 2, alpha = 0.5) + theme_bw(base_size = 12) + labs(x = 'Temperature (ºC)', y = 'Growth rate', title = 'Growth rate across temperatures') ## ----bootstrap_models1-------------------------------------------------------- # refit model using nlsLM fit_nlsLM <- minpack.lm::nlsLM(rate~sharpeschoolhigh_1981(temp = temp, r_tref,e,eh,th, tref = 15), data = d, start = coef(d_fit$sharpeschoolhigh[[1]]), lower = get_lower_lims(d$temp, d$rate, model_name = 'sharpeschoolhigh_1981'), upper = get_upper_lims(d$temp, d$rate, model_name = 'sharpeschoolhigh_1981'), weights = rep(1, times = nrow(d))) # bootstrap using case resampling boot1 <- Boot(fit_nlsLM, method = 'case') # look at the data head(boot1$t) ## ----hist_boot, fig.height=7, fig.width=8------------------------------------- hist(boot1, layout = c(2,2)) ## ----plot_boots, fig.height=3, fig.width=8, message=FALSE--------------------- # create predictions of each bootstrapped model boot1_preds <- boot1$t %>% as.data.frame() %>% drop_na() %>% mutate(iter = 1:n()) %>% group_by_all() %>% do(data.frame(temp = seq(min(d$temp), max(d$temp), length.out = 100))) %>% ungroup() %>% mutate(pred = sharpeschoolhigh_1981(temp, r_tref, e, eh, th, tref = 15)) # calculate bootstrapped confidence intervals boot1_conf_preds <- group_by(boot1_preds, temp) %>% summarise(conf_lower = quantile(pred, 0.025), conf_upper = quantile(pred, 0.975)) %>% ungroup() # plot bootstrapped CIs p1 <- ggplot() + geom_line(aes(temp, .fitted), d_preds, col = 'blue') + geom_ribbon(aes(temp, ymin = conf_lower, ymax = conf_upper), boot1_conf_preds, fill = 'blue', alpha = 0.3) + geom_point(aes(temp, rate), d, size = 2, alpha = 0.5) + theme_bw(base_size = 12) + labs(x = 'Temperature (ºC)', y = 'Growth rate', title = 'Growth rate across temperatures') # plot bootstrapped predictions p2 <- ggplot() + geom_line(aes(temp, .fitted), d_preds, col = 'blue') + geom_line(aes(temp, pred, group = iter), boot1_preds, col = 'blue', alpha = 0.007) + geom_point(aes(temp, rate), d, size = 2, alpha = 0.5) + theme_bw(base_size = 12) + labs(x = 'Temperature (ºC)', y = 'Growth rate', title = 'Growth rate across temperatures') p1 + p2 ## ----bootstrap_case2, message=FALSE------------------------------------------- # load in chlorella data data('chlorella_tpc') d2 <- filter(chlorella_tpc, curve_id == 1) # fit Sharpe-Schoolfield model to raw data d_fit <- nest(d2, data = c(temp, rate)) %>% mutate(sharpeschoolhigh = map(data, ~nls_multstart(rate~sharpeschoolhigh_1981(temp = temp, r_tref,e,eh,th, tref = 15), data = .x, iter = c(3,3,3,3), start_lower = get_start_vals(.x$temp, .x$rate, model_name = 'sharpeschoolhigh_1981') - 10, start_upper = get_start_vals(.x$temp, .x$rate, model_name = 'sharpeschoolhigh_1981') + 10, lower = get_lower_lims(.x$temp, .x$rate, model_name = 'sharpeschoolhigh_1981'), upper = get_upper_lims(.x$temp, .x$rate, model_name = 'sharpeschoolhigh_1981'), supp_errors = 'Y', convergence_count = FALSE)), # create new temperature data new_data = map(data, ~tibble(temp = seq(min(.x$temp), max(.x$temp), length.out = 100))), # predict over that data, preds = map2(sharpeschoolhigh, new_data, ~augment(.x, newdata = .y))) # refit model using nlsLM fit_nlsLM2 <- nlsLM(rate~sharpeschoolhigh_1981(temp = temp, r_tref,e,eh,th, tref = 15), data = d2, start = coef(d_fit$sharpeschoolhigh[[1]]), lower = get_lower_lims(d2$temp, d2$rate, model_name = 'sharpeschoolhigh_1981'), upper = get_upper_lims(d2$temp, d2$rate, model_name = 'sharpeschoolhigh_1981'), control = nls.lm.control(maxiter=500), weights = rep(1, times = nrow(d2))) # bootstrap using case resampling boot2 <- Boot(fit_nlsLM2, method = 'case') ## ----bootstrap_case2_plot, fig.height=3,fig.width=8--------------------------- # unnest predictions of original model fit d_preds <- select(d_fit, preds) %>% unnest(preds) # predict over new data boot2_preds <- boot2$t %>% as.data.frame() %>% drop_na() %>% mutate(iter = 1:n()) %>% group_by_all() %>% do(data.frame(temp = seq(min(d2$temp), max(d2$temp), length.out = 100))) %>% ungroup() %>% mutate(pred = sharpeschoolhigh_1981(temp, r_tref, e, eh, th, tref = 15)) # calculate bootstrapped confidence intervals boot2_conf_preds <- group_by(boot2_preds, temp) %>% summarise(conf_lower = quantile(pred, 0.025), conf_upper = quantile(pred, 0.975)) %>% ungroup() # plot bootstrapped CIs p1 <- ggplot() + geom_line(aes(temp, .fitted), d_preds, col = 'blue') + geom_ribbon(aes(temp, ymin = conf_lower, ymax = conf_upper), boot2_conf_preds, fill = 'blue', alpha = 0.3) + geom_point(aes(temp, rate), d2, size = 2) + theme_bw(base_size = 12) + labs(x = 'Temperature (ºC)', y = 'Growth rate', title = 'Growth rate across temperatures') # plot bootstrapped predictions p2 <- ggplot() + geom_line(aes(temp, .fitted), d_preds, col = 'blue') + geom_line(aes(temp, pred, group = iter), boot2_preds, col = 'blue', alpha = 0.007) + geom_point(aes(temp, rate), d2, size = 2) + theme_bw(base_size = 12) + labs(x = 'Temperature (ºC)', y = 'Growth rate', title = 'Growth rate across temperatures') p1 + p2 ## ----residual_resample_data, fig.height=3,fig.width=8------------------------- # bootstrap using residual resampling boot3 <- Boot(fit_nlsLM2, method = 'residual') # predict over new data boot3_preds <- boot3$t %>% as.data.frame() %>% drop_na() %>% mutate(iter = 1:n()) %>% group_by_all() %>% do(data.frame(temp = seq(min(d2$temp), max(d2$temp), length.out = 100))) %>% ungroup() %>% mutate(pred = sharpeschoolhigh_1981(temp, r_tref, e, eh, th, tref = 15)) # calculate bootstrapped confidence intervals boot3_conf_preds <- group_by(boot3_preds, temp) %>% summarise(conf_lower = quantile(pred, 0.025), conf_upper = quantile(pred, 0.975)) %>% ungroup() # plot bootstrapped CIs p1 <- ggplot() + geom_line(aes(temp, .fitted), d_preds, col = 'blue') + geom_ribbon(aes(temp, ymin = conf_lower, ymax = conf_upper), boot3_conf_preds, fill = 'blue', alpha = 0.3) + geom_point(aes(temp, rate), d2, size = 2) + theme_bw(base_size = 12) + labs(x = 'Temperature (ºC)', y = 'Growth rate', title = 'Growth rate across temperatures') # plot bootstrapped predictions p2 <- ggplot() + geom_line(aes(temp, .fitted), d_preds, col = 'blue') + geom_line(aes(temp, pred, group = iter), boot3_preds, col = 'blue', alpha = 0.007) + geom_point(aes(temp, rate), d2, size = 2) + theme_bw(base_size = 12) + labs(x = 'Temperature (ºC)', y = 'Growth rate', title = 'Growth rate across temperatures') p1 + p2 ## ----confint_bact, error=TRUE, fig.height = 5, fig.width = 7------------------ # First for the bacteria # get parameters of fitted model param_bact <- broom::tidy(fit_nlsLM) %>% select(param = term, estimate) # calculate confidence intervals of models ci_bact1 <- nlstools::confint2(fit_nlsLM, method = 'asymptotic') %>% as.data.frame() %>% rename(conf_lower = 1, conf_upper = 2) %>% rownames_to_column(., var = 'param') %>% mutate(method = 'asymptotic') ci_bact2 <- confint(fit_nlsLM) %>% as.data.frame() %>% rename(conf_lower = 1, conf_upper = 2) %>% rownames_to_column(., var = 'param') %>% mutate(method = 'profile') # CIs from case resampling ci_bact3 <- confint(boot1, method = 'bca') %>% as.data.frame() %>% rename(conf_lower = 1, conf_upper = 2) %>% rownames_to_column(., var = 'param') %>% mutate(method = 'case bootstrap') # CIs from residual resampling ci_bact4 <- Boot(fit_nlsLM, method = 'residual') %>% confint(., method = 'bca') %>% as.data.frame() %>% rename(conf_lower = 1, conf_upper = 2) %>% rownames_to_column(., var = 'param') %>% mutate(method = 'residual bootstrap') ci_bact <- bind_rows(ci_bact1, ci_bact2, ci_bact3, ci_bact4) %>% left_join(., param_bact) # plot ggplot(ci_bact, aes(forcats::fct_relevel(method, c('profile', 'asymptotic')), estimate, col = method)) + geom_hline(aes(yintercept = conf_lower), linetype = 2, filter(ci_bact, method == 'profile')) + geom_hline(aes(yintercept = conf_upper), linetype = 2, filter(ci_bact, method == 'profile')) + geom_point(size = 4) + geom_linerange(aes(ymin = conf_lower, ymax = conf_upper)) + theme_bw() + facet_wrap(~param, scales = 'free') + scale_x_discrete('', labels = function(x) stringr::str_wrap(x, width = 10)) + labs(title = 'Calculation of confidence intervals for model parameters', subtitle = 'For the bacteria TPC; dashed lines are CI of profiling method') ## ----confint_chlor, error=TRUE, fig.height = 5, fig.width = 7, warning=FALSE---- # Second for Chlorella data # get parameters of fitted model param_chlor <- broom::tidy(fit_nlsLM2) %>% select(param = term, estimate) # calculate confidence intervals of models ci_chlor1 <- nlstools::confint2(fit_nlsLM2, method = 'asymptotic') %>% as.data.frame() %>% rename(conf_lower = 1, conf_upper = 2) %>% rownames_to_column(., var = 'param') %>% mutate(method = 'asymptotic') ci_chlor2 <- nlstools::confint2(fit_nlsLM2, method = 'profile') # profiling method fails ci_chlor2 <- mutate(ci_chlor1, method = 'profile', conf_lower = NA, conf_upper = NA) # CIs from case resampling ci_chlor3 <- confint(boot2, method = 'bca') %>% as.data.frame() %>% rename(conf_lower = 1, conf_upper = 2) %>% rownames_to_column(., var = 'param') %>% mutate(method = 'case bootstrap') # CIs from residual resampling ci_chlor4 <- confint(boot3, method = 'bca') %>% as.data.frame() %>% rename(conf_lower = 1, conf_upper = 2) %>% rownames_to_column(., var = 'param') %>% mutate(method = 'residual bootstrap') ci_chlor <- bind_rows(ci_chlor1, ci_chlor2, ci_chlor3, ci_chlor4) %>% full_join(., param_chlor) ggplot(ci_chlor, aes(forcats::fct_relevel(method, c('profile', 'asymptotic')), estimate, col = method)) + geom_point(size = 4) + geom_linerange(aes(ymin = conf_lower, ymax = conf_upper)) + theme_bw() + facet_wrap(~param, scales = 'free') + scale_x_discrete('', labels = function(x) stringr::str_wrap(x, width = 10)) + labs(title = 'Calculation of confidence intervals for model parameters', subtitle = 'For the chlorella TPC; profile method failes') ## ---- ci_calc_param, fig.width = 7, fig.height = 6---------------------------- extra_params <- calc_params(fit_nlsLM) %>% pivot_longer(everything(), names_to = 'param', values_to = 'estimate') ci_extra_params <- Boot(fit_nlsLM, f = function(x){unlist(calc_params(x))}, labels = names(calc_params(fit_nlsLM)), R = 200, method = 'case') %>% confint(., method = 'bca') %>% as.data.frame() %>% rename(conf_lower = 1, conf_upper = 2) %>% rownames_to_column(., var = 'param') %>% mutate(method = 'case bootstrap') ci_extra_params <- left_join(ci_extra_params, extra_params) ggplot(ci_extra_params, aes(param, estimate)) + geom_point(size = 4) + geom_linerange(aes(ymin = conf_lower, ymax = conf_upper)) + theme_bw() + facet_wrap(~param, scales = 'free') + scale_x_discrete('') + labs(title = 'Calculation of confidence intervals for extra parameters', subtitle = 'For the bacteria TPC; using case resampling')