## ---- message = FALSE--------------------------------------------------------- library(dplyr) library(ggplot2) library(knitr) library(ciTools) library(here) set.seed(20180925) ## ----include = FALSE, cache = TRUE-------------------------------------------- library(SPREDA) car <- rep(c("suv", "sedan"), 25) temp <- seq(40,100, length.out = 50) time <- exp(0.33 + 0.2 * ifelse(car == "sedan", 0, 1) + 0.08 * temp + rsev(50)) time <- ifelse(time < 2000, time, 2000) failure <- ifelse(time < 2000, 1, 0) dat <- data.frame(temp = temp, car = car, time = time, failure = failure) ## ----cache = TRUE------------------------------------------------------------- kable(head(dat)) ## ----cache = TRUE, fig.width = 5, fig.height= 5, warning = FALSE-------------- ggplot(dat, aes(x = temp, y = time)) + geom_point(aes(color = factor(failure)))+ ggtitle("Censored obs. in red") + theme_bw() ## ----cache = TRUE------------------------------------------------------------- (fit <- survreg(Surv(time, failure) ~ temp + car, data = dat)) ## weibull dist is default ## ----cache = TRUE, fig.width = 8, fig.height = 5, warning = FALSE------------- with_ints <- ciTools::add_ci(dat,fit, names = c("lcb", "ucb")) %>% ciTools::add_pi(fit, names = c("lpb", "upb")) ## ----cache = TRUE, fig.width = 8, fig.height = 5, warning = FALSE------------- kable(head(with_ints)) ## ----cache = TRUE, fig.width = 8, fig.height = 5, warning = FALSE------------- ggplot(with_ints, aes(x = temp, y = time)) + geom_point(aes(color = car)) + facet_wrap(~car)+ theme_bw() + ggtitle("Model fit with 95% CIs and PIs", "solid line = mean, dotted line = median") + geom_line(aes(y = mean_pred), linetype = 1) + geom_line(aes(y = median_pred), linetype = 2) + geom_ribbon(aes(ymin = lcb, ymax = ucb), alpha = 0.5) + geom_ribbon(aes(ymin = lpb, ymax = upb), alpha = 0.1) ## ----cache = TRUE, fig.width = 8, fig.height = 5, warning = FALSE------------- probs <- ciTools::add_probs(dat, fit, q = 500, name = c("prob", "lcb", "ucb"), comparison = ">") ## ----cache = TRUE, fig.width = 8, fig.height = 5, warning = FALSE------------- ggplot(probs, aes(x = temp, y = prob)) + ggtitle("Estimated prob. of avg. spring lasting longer than 500 hrs.") + ylim(c(0,1)) + facet_wrap(~car)+ theme_bw() + geom_line(aes(y = prob)) + geom_ribbon(aes(ymin = lcb, ymax = ucb), alpha = 0.5) ## ----cache = TRUE, fig.width = 8, fig.height = 5, warning = FALSE------------- quants <- ciTools::add_quantile(dat, fit, p = 0.90, name = c("quant", "lcb", "ucb")) ## ----cache = TRUE, fig.width = 8, fig.height = 5, warning = FALSE------------- ggplot(quants, aes(x = temp, y = time)) + geom_point(aes(color = car)) + ggtitle("Estimated 90th percentile of condtional failure distribution, with CI") + facet_wrap(~car)+ theme_bw() + geom_line(aes(y = quant)) + geom_ribbon(aes(ymin = lcb, ymax = ucb), alpha = 0.5) ## ----include=FALSE, cache= TRUE----------------------------------------------- path <- "./survreg_data" exp0 <- read.csv(paste0(path,"/","exponential_sim_expect_ci_censoredp00.csv")) exp1 <- read.csv(paste0(path,"/","exponential_sim_expect_ci_censoredp03.csv")) exp2 <- read.csv(paste0(path,"/","exponential_sim_expect_ci_censoredp05.csv")) exp_dat <- rbind(exp0, exp1, exp2) %>% mutate(distr = "Exponential") loglog0 <- read.csv(paste0(path,"/", "loglogistic_sim_expect_ci_scale025_censoredp00.csv")) loglog1 <- read.csv(paste0(path,"/", "loglogistic_sim_expect_ci_scale025_censoredp03.csv")) loglog2 <- read.csv(paste0(path,"/", "loglogistic_sim_expect_ci_scale025_censoredp05.csv")) loglog_dat <- rbind(loglog0, loglog1, loglog2) %>% mutate(distr = "Loglogistic") %>% dplyr::select(-c(scale)) lognorm0 <- read.csv(paste0(path,"/","lognormal_sim_expect_ci_scale2_censoredp00.csv")) lognorm1 <- read.csv(paste0(path,"/","lognormal_sim_expect_ci_scale2_censoredp03.csv")) lognorm2 <- read.csv(paste0(path,"/","lognormal_sim_expect_ci_scale2_censoredp05.csv")) lognorm_dat <- rbind(lognorm0, lognorm1, lognorm2) %>% mutate(distr = "Lognormal") %>% dplyr::select(-c(scale)) weibull0 <- read.csv(paste0(path,"/","weibull_sim_expect_ci_scale2_censoredp00.csv")) weibull1 <- read.csv(paste0(path,"/","weibull_sim_expect_ci_scale2_censoredp03.csv")) weibull2 <- read.csv(paste0(path,"/","weibull_sim_expect_ci_scale2_censoredp05.csv")) weibull_dat <- rbind(weibull0, weibull1, weibull2) %>% mutate(distr = "Weibull") %>% dplyr::select(-c(scale)) ci_dat <- rbind(exp_dat, loglog_dat, lognorm_dat, weibull_dat) ## ----fig.width=8, fig.height=8, echo = FALSE, cache= TRUE--------------------- ggplot(filter(ci_dat), aes(x=sample_size, y=cov_prob)) + geom_point(size = 2) + geom_line(size = 1.5) + geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) + xlab("Sample Size (log scale)") + ylab("Coverage Probs +/- 2 SEs") + ggtitle("C.I. Simulation, by Distribution and Censoring", subtitle = "Coverage Probabilities (zoomed in)") + scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) + geom_hline(aes(yintercept = 0.90), colour="#BB0000", linetype="dashed", size = 1) + facet_grid(distr~censoredp) ## ----fig.width=8, fig.height=8, echo=FALSE, cache= TRUE----------------------- ggplot(filter(ci_dat), aes(x=sample_size, y=cov_prob)) + geom_point(size = 2) + geom_line(size = 1.5) + ylim(0,1) + geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) + xlab("Sample Size (log scale)") + ylab("Coverage Probs +/- 2 SEs") + ggtitle("C.I. Simulation, by Distribution and Censoring", subtitle = "Coverage Probabilities (zoomed out)") + scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) + geom_hline(aes(yintercept = 0.90), colour="#BB0000", linetype="dashed", size = 1) + facet_grid(distr~censoredp) ## ----fig.width=8, fig.height=8, echo = FALSE, cache= TRUE--------------------- ggplot(filter(ci_dat), aes(x=sample_size, y=int_width)) + geom_point(size = 2) + geom_line(size = 1.5) + geom_errorbar(aes(ymin=int_width - 2*int_width_se, ymax = int_width + 2*int_width_se), width=.1) + xlab("Sample Size (log scale)") + ylab("Interval Widths +/- 2 SEs") + ggtitle("C.I Simulation, by Distribution and Censoring", subtitle = "Interval Widths") + scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) + geom_hline(aes(yintercept = 0.0), colour="blue", linetype="dashed", size = 1) + facet_grid(distr~censoredp) ## ---- include = FALSE, cache= TRUE, warning = FALSE--------------------------- expp0 <- read.csv(paste0(path,"/","exponential_sim_prob_ci_censoredp00.csv")) expp1 <- read.csv(paste0(path,"/","exponential_sim_prob_ci_censoredp03.csv")) expp2 <- read.csv(paste0(path,"/","exponential_sim_prob_ci_censoredp05.csv")) expp_dat <- rbind(expp0, expp1, expp2) %>% mutate(distr = "Exponential") loglogp0 <- read.csv(paste0(path,"/", "loglogistic_sim_prob_ci_scale025_censoredp00.csv")) loglogp1 <- read.csv(paste0(path,"/", "loglogistic_sim_prob_ci_scale025_censoredp03.csv")) loglogp2 <- read.csv(paste0(path,"/", "loglogistic_sim_prob_ci_scale025_censoredp05.csv")) loglogp_dat <- rbind(loglogp0, loglogp1, loglogp2) %>% mutate(distr = "Loglogisic") %>% dplyr::select(-c(scale)) lognormp0 <- read.csv(paste0(path,"/","lognormal_sim_prob_ci_scale2_censoredp00.csv")) lognormp1 <- read.csv(paste0(path,"/","lognormal_sim_prob_ci_scale2_censoredp03.csv")) lognormp2 <- read.csv(paste0(path,"/","lognormal_sim_prob_ci_scale2_censoredp05.csv")) lognormp_dat <- rbind(lognormp0, lognormp1, lognormp2) %>% mutate(distr = "Lognormal") %>% dplyr::select(-c(scale)) weibullp0 <- read.csv(paste0(path,"/","weibull_sim_prob_ci_scale2_censoredp00.csv")) weibullp1 <- read.csv(paste0(path,"/","weibull_sim_prob_ci_scale2_censoredp03.csv")) weibullp2 <- read.csv(paste0(path,"/","weibull_sim_prob_ci_scale2_censoredp05.csv")) weibullp_dat <- rbind(weibullp0, weibullp1, weibullp2) %>% mutate(distr = "Weibull") %>% dplyr::select(-c(scale)) prob_dat <- rbind(expp_dat, loglogp_dat, lognormp_dat, weibullp_dat) ## ---- fig.width=8, fig.height=8, echo = FALSE, cache= TRUE, warning = FALSE---- ggplot(filter(prob_dat), aes(x=sample_size, y=cov_prob)) + geom_point(size = 2) + geom_line(size = 1.5) + geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) + xlab("Sample Size (log scale)") + ylab("Coverage Probs +/- 2 SEs") + ggtitle("C.I. for Survival Probability Simulation, by Distribution and Censoring", subtitle = "Coverage Probabilities (zoomed in)") + scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) + geom_hline(aes(yintercept = 0.90), colour="#BB0000", linetype="dashed", size = 1) + facet_grid(distr~censoredp) ## ---- fig.width=8, fig.height=8, echo = FALSE, cache= TRUE, warning = FALSE---- ggplot(filter(prob_dat), aes(x=sample_size, y=cov_prob)) + geom_point(size = 2) + geom_line(size = 1.5) + ylim(0,1) + geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) + xlab("Sample Size (log scale)") + ylab("Coverage Probs +/- 2 SEs") + ggtitle("Survival Probability Simulation, by Distribution and Censoring", subtitle = "Coverage Probabilities (zoomed out)") + scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) + geom_hline(aes(yintercept = 0.90), colour="#BB0000", linetype="dashed", size = 1) + facet_grid(distr~censoredp) ## ----fig.width=8, fig.height=8, echo = FALSE, cache= TRUE, warning = FALSE---- ggplot(filter(prob_dat), aes(x=sample_size, y=int_width)) + geom_point(size = 2) + geom_line(size = 1.5) + geom_errorbar(aes(ymin=int_width - 2*int_width_se, ymax = int_width + 2*int_width_se), width=.1) + xlab("Sample Size (log scale)") + ylab("Interval Widths +/- 2 SEs") + ggtitle("Survival Probability Simulation, by Distribution and Censoring", subtitle = "Interval Widths") + scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) + geom_hline(aes(yintercept = 0.0), colour="blue", linetype="dashed", size = 1) + facet_grid(distr~censoredp) ## ----include=FALSE, cache= TRUE----------------------------------------------- exppi0 <- read.csv(paste0(path,"/","exponential_sim_pi_censoredp00.csv")) exppi1 <- read.csv(paste0(path,"/","exponential_sim_pi_censoredp03.csv")) exppi2 <- read.csv(paste0(path,"/","exponential_sim_pi_censoredp05.csv")) exppi_dat <- rbind(exppi0, exppi1, exppi2) %>% mutate(distr = "Exponential") loglogpi0 <- read.csv(paste0(path,"/", "loglogistic_sim_pi_scale025_censoredp00.csv")) loglogpi1 <- read.csv(paste0(path,"/", "loglogistic_sim_pi_scale025_censoredp03.csv")) loglogpi2 <- read.csv(paste0(path,"/", "loglogistic_sim_pi_scale025_censoredp05.csv")) loglogpi_dat <- rbind(loglogpi0, loglogpi1, loglogpi2) %>% mutate(distr = "Loglogistic") %>% dplyr::select(-c(scale)) lognormpi0 <- read.csv(paste0(path,"/","lognormal_sim_pi_scale2_censoredp00.csv")) lognormpi1 <- read.csv(paste0(path,"/","lognormal_sim_pi_scale2_censoredp03.csv")) lognormpi2 <- read.csv(paste0(path,"/","lognormal_sim_pi_scale2_censoredp05.csv")) lognormpi_dat <- rbind(lognormpi0, lognormpi1, lognormpi2) %>% mutate(distr = "Lognormal") %>% dplyr::select(-c(scale)) weibullpi0 <- read.csv(paste0(path,"/","weibull_sim_pi_scale2_censoredp00.csv")) weibullpi1 <- read.csv(paste0(path,"/","weibull_sim_pi_scale2_censoredp03.csv")) weibullpi2 <- read.csv(paste0(path,"/","weibull_sim_pi_scale2_censoredp05.csv")) weibullpi_dat <- rbind(weibullpi0, weibullpi1, weibullpi2) %>% mutate(distr = "Weibull") %>% dplyr::select(-c(scale)) pi_dat <- rbind(exppi_dat, loglogpi_dat, lognormpi_dat, weibullpi_dat) ## ----fig.width=8, fig.height=8, echo=FALSE, cache= TRUE----------------------- ggplot(filter(pi_dat), aes(x=sample_size, y=cov_prob)) + geom_point(size = 2) + geom_line(size = 1.5) + geom_line(aes(y = cov_prob_boot), size = 1.5, color = "blue") + ## geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ## ymax = cov_prob + 2*cov_prob_se), width=.1) + ## geom_errorbar(aes(ymin=cov_prob_boot - 2*cov_prob_boot_se, ## ymax = cov_prob_boot + 2*cov_prob_boot_se), ## width=.1, color = "blue") + xlab("Sample Size (log scale)") + ylab("Coverage Probs +/- 2 SEs") + ggtitle("P.I. Simulation, by Distribution and Censoring", subtitle = "Coverage Probabilities. Naive method (black). Simulation method (blue) ") + scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) + geom_hline(aes(yintercept = 0.90), colour="#BB0000", linetype="dashed", size = 1) + facet_grid(distr~censoredp) ## ----fig.width=8, fig.height=8, echo = FALSE, cache= TRUE--------------------- ggplot(filter(pi_dat), aes(x=sample_size, y=cov_prob)) + geom_point(size = 2) + geom_line(size = 1.5) + ylim(0,1) + geom_errorbar(aes(ymin=cov_prob - 2*cov_prob_se, ymax = cov_prob + 2*cov_prob_se), width=.1) + xlab("Sample Size (log scale)") + ylab("Coverage Probs +/- 2 SEs") + ggtitle("P.I. Simulation, by Distribution and Censoring", subtitle = "Coverage Probabilities (zoomed out)") + scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) + geom_hline(aes(yintercept = 0.90), colour="#BB0000", linetype="dashed", size = 1) + facet_grid(distr~censoredp) ## ----fig.width=8, fig.height=5, echo = FALSE, cache= TRUE--------------------- ggplot(filter(pi_dat, distr == "Exponential"), aes(x=sample_size, y=int_width)) + geom_point(size = 2) + geom_line(size = 1.5) + geom_line(aes(y = int_width_boot), color = "blue", size = 1.5) + ## geom_errorbar(aes(ymin=int_width - 2*int_width_se, ## ymax = int_width + 2*int_width_se), width=.1) + xlab("Sample Size (log scale)") + ylab("Interval Widths +/- 2 SEs") + ggtitle("P.I Simulation, Exponential Distribution", subtitle = "Interval Widths. naive method (black), simulation method (blue)") + scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) + facet_wrap(~censoredp) ## ----fig.width=8, fig.height=5, echo = FALSE, cache= TRUE--------------------- ggplot(filter(pi_dat, distr == "Loglogistic"), aes(x=sample_size, y=int_width)) + geom_point(size = 2) + geom_line(size = 1.5) + geom_line(aes(y = int_width_boot), color = "blue", size = 1.5) + ## geom_errorbar(aes(ymin=int_width - 2*int_width_se, ## ymax = int_width + 2*int_width_se), width=.1) + xlab("Sample Size (log scale)") + ylab("Interval Widths +/- 2 SEs") + ggtitle("P.I Simulation, Loglogistic Distribution", subtitle = "Interval Widths. naive method (black), simulation method (blue)") + scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) + facet_wrap(~censoredp) ## ----fig.width=8, fig.height=5, echo = FALSE, cache= TRUE--------------------- ggplot(filter(pi_dat, distr == "Lognormal"), aes(x=sample_size, y=int_width)) + geom_point(size = 2) + geom_line(size = 1.5) + geom_line(aes(y = int_width_boot), color = "blue", size = 1.5) + ## geom_errorbar(aes(ymin=int_width - 2*int_width_se, ## ymax = int_width + 2*int_width_se), width=.1) + xlab("Sample Size (log scale)") + ylab("Interval Widths +/- 2 SEs") + ggtitle("P.I Simulation, Lognormal Distribution", subtitle = "Interval Widths. naive method (black), simulation method (blue)") + scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) + facet_wrap(~censoredp) ## ----fig.width=8, fig.height=5, echo = FALSE, cache= TRUE--------------------- ggplot(filter(pi_dat, distr == "Weibull"), aes(x=sample_size, y=int_width)) + geom_point(size = 2) + geom_line(size = 1.5) + geom_line(aes(y = int_width_boot), color = "blue", size = 1.5) + ## geom_errorbar(aes(ymin=int_width - 2*int_width_se, ## ymax = int_width + 2*int_width_se), width=.1) + xlab("Sample Size (log scale)") + ylab("Interval Widths +/- 2 SEs") + ggtitle("P.I Simulation, Weibull Distribution", subtitle = "Interval Widths. naive method (black), simulation method (blue)") + scale_x_log10(breaks = c(20, 30, 50, 100, 250, 500, 1000, 2000)) + facet_wrap(~censoredp) ## ----------------------------------------------------------------------------- sessionInfo()