## ---- include = FALSE-------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----installation, eval = F-------------------------------
#  install.packages("riskCommunicator")

## ----setup------------------------------------------------
library(riskCommunicator)
library(tidyverse)
library(printr)

## ---- printr.help.sections = c('usage','arguments')-------
?gComp

## ----load_other_data, eval = FALSE------------------------
#  mydata <- read.csv("C:/your/file/path/yourdata.csv")

## ----dataset----------------------------------------------
data(cvdd)

## ----variable_check---------------------------------------
cvdd$educ <- as.factor(cvdd$educ)
#educ is now a factor with 4 levels

str(cvdd$educ)

## ----error = TRUE-----------------------------------------
cvdd.break <- cvdd %>% 
  mutate(PREVHYP = as.character(PREVHYP))
  
binary.res.break <- gComp(data = cvdd.break, 
                          Y = "cvd_dth", 
                          X = "DIABETES", 
                          Z = c("AGE", "SEX", "BMI", "CURSMOKE", "PREVHYP"), 
                          outcome.type = "binary", 
                          R = 200)

## ----factor_check-----------------------------------------
str(cvdd$educ)

cvdd$educ <- factor(cvdd$educ,levels = c("4","1","2","3"))
#Category 4 is now the referent

str(cvdd$educ)

## ---------------------------------------------------------
cvdd %>%
 select(everything()) %>%
 summarise_all(list(~sum(is.na(.))))

## ----binary_outcome, paged.print = FALSE------------------

## Specify the regression formula
cvdd.formula <- cvd_dth ~ DIABETES + AGE + SEX + BMI + CURSMOKE + PREVHYP

## For reproducibility, we should always set the seed since the g-computation uses random resampling of the data to calculate confidence intervals and random sampling of the distribution when predicting outcomes
set.seed(1298)

## Call the gComp function
binary.res <- gComp(data = cvdd, 
                    formula = cvdd.formula, 
                    outcome.type = "binary", 
                    R = 200)

## ----binary_outcome_noFormula-----------------------------
set.seed(1298)

binary.res.alt <- gComp(data = cvdd, 
                        Y = "cvd_dth", 
                        X = "DIABETES", 
                        Z = c("AGE", "SEX", "BMI", "CURSMOKE", "PREVHYP"), 
                        outcome.type = "binary", 
                        R = 200)

## ----binary_outcome_defaults------------------------------
set.seed(1298)

binary.res.alt2 <- gComp(data = cvdd, 
                         formula = cvdd.formula, 
                         outcome.type = "binary", 
                         R = 200, 
                         Y = NULL, 
                         X = NULL, 
                         Z = NULL, 
                         subgroup = NULL, 
                         offset = NULL, 
                         rate.multiplier = 1, 
                         clusterID = NULL, 
                         parallel = "no", 
                         ncpus = 1)

## ----binary_results_check, paged.print = FALSE------------
binary.res
print(binary.res)

## ----gComp_class_explaination-----------------------------
class(binary.res)
# The names of the different items in the list 
names(binary.res)

# To see the sample size of the original data:
binary.res$n 

# To see the contrast being compared in the analysis:
binary.res$contrast

## ---------------------------------------------------------
summary(binary.res)

## ----binary_outcome_subgroup, paged.print = FALSE---------
set.seed(1298)

binary.res.subgroup <- gComp(data = cvdd, 
                             Y = "cvd_dth", 
                             X = "DIABETES", 
                             Z = c("AGE", "SEX", "BMI", "CURSMOKE", "PREVHYP"), 
                             subgroup = "SEX", 
                             outcome.type = "binary", 
                             R = 200)

binary.res.subgroup

## ----categorical_exposure, paged.print = FALSE------------
#number and percent of subjects in each BMI category 
table(cvdd$bmicat)
prop.table(table(cvdd$bmicat))*100

set.seed(345)
catExp.res <- gComp(data = cvdd, 
                    Y = "cvd_dth", 
                    X = "bmicat", 
                    Z = c("AGE", "SEX", "DIABETES", "CURSMOKE", "PREVHYP"), 
                    outcome.type = "binary", 
                    R = 200)

catExp.res


## ----continuous_exposure, paged.print = FALSE-------------
set.seed(4528)
contExp.res <- gComp(data = cvdd, 
                     Y = "cvd_dth", 
                     X = "AGE", 
                     Z = c("BMI", "SEX", "DIABETES", "CURSMOKE", "PREVHYP"), 
                     outcome.type = "binary", 
                     exposure.scalar = 10, 
                     R = 200)

contExp.res


## ----change_dataset_cvd_dth_to_numeric--------------------
cvdd.t <- cvdd %>%
  dplyr::mutate(cvd_dth = as.numeric(as.character(cvd_dth)),
                timeout = as.numeric(timeout))

## ----rate_outcome, paged.print = FALSE--------------------
set.seed(6534)

rate.res <- gComp(data = cvdd.t, 
                  Y = "cvd_dth", 
                  X = "DIABETES", 
                  Z = c("AGE", "SEX", "BMI", "CURSMOKE", "PREVHYP"), 
                  outcome.type = "rate", 
                  rate.multiplier = 365.25*100, 
                  offset = "timeout", 
                  R = 200)

rate.res

## ---- paged.print = FALSE---------------------------------
## Specify the regression formula
cvdd.formula <- cvd_dth ~ DIABETES + AGE + SEX + BMI + CURSMOKE + PREVHYP

set.seed(6534)
## Call the gComp function
rate.res.alt <- gComp(data = cvdd.t, 
                      formula = cvdd.formula, 
                      outcome.type = "rate", 
                      rate.multiplier = (365.25*100), 
                      offset = "timeout", 
                      R = 200)
rate.res.alt


## ----continuous_outcome, paged.print = FALSE--------------
set.seed(9385)

cont.res <- gComp(data = cvdd, 
                  Y = "glucoseyear6", 
                  X = "DIABETES", 
                  Z = c("AGE", "SEX", "BMI", "CURSMOKE", "PREVHYP"), 
                  outcome.type = "continuous", 
                  R = 200)

cont.res

## ----count_outcome, paged.print = FALSE-------------------
set.seed(7295)

count.formula <- "nhosp ~ DIABETES + AGE + SEX + BMI + CURSMOKE + PREVHYP"

count.res <- gComp(data = cvdd, 
                   formula = count.formula, 
                   outcome.type = "count", 
                   R = 200)

count.res

## ----catExp_binaryOutcome_plot, fig.width = 12, fig.height = 10, out.width = "100%"----
plot(catExp.res)

## ----catExp_binaryOutcome_resultsPlot, fig.height = 3, fig.width = 5, out.width = "100%"----
ggplot(catExp.res$results.df %>% 
         filter(Parameter %in% c("Risk Difference", "Risk Ratio"))
) + 
  geom_pointrange(aes(x = Comparison, 
                      y = Estimate, 
                      ymin = `2.5% CL`, 
                      ymax = `97.5% CL`, 
                      color = Comparison), 
                  shape = 2
  ) + 
  coord_flip() + 
  facet_wrap(~Parameter, scale = "free") + 
  theme_bw() + 
  theme(legend.position = "none")

## ----catExp_binaryOutcome_predOutcomes, paged.print = FALSE----
catExp.res$predicted.outcome

## ----catExp_binaryOutcome_glm.result, paged.print = FALSE----

summary(catExp.res$glm.result)


## ---------------------------------------------------------
sessionInfo()