## ----setup, echo=FALSE-------------------------------------------------------- knitr::opts_chunk$set( echo = TRUE, message = FALSE, warning = FALSE, collapse = TRUE, comment = "#>" ) ## ----packages----------------------------------------------------------------- library(public.ctn0094data) library(public.ctn0094extra) library(dplyr) library(purrr) library(tibble) library(tidyr) library(stringr) ## ----------------------------------------------------------------------------- examplePeople_int <- c(1L, 163L, 210L, 242L, 4L, 17L, 13L, 1103L, 233L, 2089L) ## ----trim-data-1-------------------------------------------------------------- data_ls <- loadRawData(c("randomization", "treatment")) data_ls$randomization <- data_ls$randomization %>% select(who, when, treatment, randomized = which) %>% # Remove second randomization events filter(randomized != 2) %>% # Retain example participants filter(who %in% examplePeople_int) data_ls$treatment <- data_ls$treatment %>% # Retain example participants filter(who %in% examplePeople_int) ## ----view-data-1-------------------------------------------------------------- data_ls$randomization data_ls$treatment ## ----mark-study-drug-days----------------------------------------------------- treatTimeLong_df <- # Collapse Data List data_ls %>% reduce(.f = full_join, by = c("who", "when")) %>% as_tibble() %>% arrange(who, when) %>% # First pass: find any day with a dose of treatment drug rowwise() %>% mutate( treated = if_else( condition = !is.na(amount) & amount > 0, true = TRUE, false = FALSE, missing = FALSE ) ) # Inspect results treatTimeLong_df ## ----calculate-induction-delay------------------------------------------------ inductDelay_df <- treatTimeLong_df %>% # Find the day of the first treatment group_by(who) %>% arrange(when) %>% filter(treated) %>% slice(1) %>% mutate(treatStart = when) %>% select(who, when, treatStart) %>% # Add the first day back to the original data left_join(treatTimeLong_df, ., by = c("who", "when")) %>% group_by(who) %>% fill(treatStart, .direction = "updown") %>% # Calculate the delay filter(randomized == 1) %>% # This sets time to be missing if the induction was not observed mutate(inductDelay = treatStart - when) %>% select(who, treatment, inductDelay) %>% ungroup() # Inspect results inductDelay_df ## ----echo=FALSE--------------------------------------------------------------- eg_df <- inductDelay_df %>% filter(inductDelay > 0) ## ----echo=FALSE--------------------------------------------------------------- rm(eg_df) ## ----clean-up-1--------------------------------------------------------------- rm(data_ls, treatTimeLong_df) ## ----backbone-2751------------------------------------------------------------ start_int <- c(`27` = -30L, `51` = -30L) end_int <- c(`27` = 168L, `51` = 168L) backbone2751_df <- CreateProtocolHistory( start_vec = start_int, end_vec = end_int ) %>% filter(who %in% examplePeople_int) # Inspect backbone2751_df ## ----backbone-30-------------------------------------------------------------- backbone30_df <- randomization %>% full_join(everybody, by = "who") %>% filter(project == "30") %>% filter(who %in% examplePeople_int) %>% CreateCTN30ProtocolHistory() %>% mutate(project = "30") %>% select(who, project, when) # Inspect backbone30_df backbone30_df %>% group_by(who) %>% summarise(lastDay = max(when)) ## ----backbone-first----------------------------------------------------------- backbone_df <- bind_rows(backbone2751_df, backbone30_df) %>% arrange(who) rm(backbone2751_df, backbone30_df, start_int, end_int) ## ----trim-data-2-------------------------------------------------------------- data_ls <- loadRawData(c("randomization", "visit")) data_ls$randomization <- data_ls$randomization %>% select(who, when, treatment, randomized = which) %>% # Remove second randomization events filter(randomized != 2) %>% # Retain example participants filter(who %in% examplePeople_int) data_ls$visit <- data_ls$visit %>% filter(who %in% examplePeople_int) ## ----add-randomization-day---------------------------------------------------- timelineRand1_df <- data_ls$randomization %>% mutate(randomized = randomized == "1") %>% # Join to backbone and arrange within subject by day full_join(backbone_df, by = c("who", "when")) %>% group_by(who) %>% arrange(when, .by_group = TRUE) %>% select(who, project, when, randomized) # Inspect timelineRand1_df ## ----add-visit-days----------------------------------------------------------- timelineVisit1_df <- data_ls$visit %>% select(who, when, visit, status = what) %>% filter(status %in% c("visit", "final")) %>% mutate(visit = TRUE) %>% select(who, when, visit) %>% left_join(timelineRand1_df, ., by = c("who", "when")) # Inspect timelineVisit1_df ## ----mark-missing-visits------------------------------------------------------ timelineMissing1_df <- MarkMissing(timelineVisit1_df) ## ----tidy-missing-visit-marks------------------------------------------------- derived_visitImputed <- timelineMissing1_df %>% mutate(visit = as.character(visit)) %>% replace_na(list(visit = "", visitYM = "")) %>% mutate(visitImputed = paste0(visit, visitYM)) %>% mutate( visitImputed = str_replace( visitImputed, pattern = "TRUETRUE", replacement = "Present" ) ) %>% select(who, when, visitImputed) %>% filter(visitImputed != "") %>% ungroup() # Inspect derived_visitImputed ## ----clean-up-2--------------------------------------------------------------- rm( backbone_df, data_ls, timelineMissing1_df, timelineRand1_df, timelineVisit1_df ) ## ----backbone-again----------------------------------------------------------- # CTN-0027 and CTN-0051 start_int <- c(`27` = -30L, `51` = -30L) end_int <- c(`27` = 168L, `51` = 168L) # 24 weeks backbone2751_df <- CreateProtocolHistory( start_vec = start_int, end_vec = end_int ) # CTN-0030 backbone30_df <- randomization %>% full_join(everybody, by = "who") %>% filter(project == "30") %>% CreateCTN30ProtocolHistory() %>% mutate(project = "30") %>% select(who, project, when) # All Days backbone_df <- bind_rows( backbone2751_df, backbone30_df ) %>% arrange(who) %>% mutate(project = factor(project, levels = c("27", "30", "51"))) %>% filter(who %in% examplePeople_int) rm(backbone2751_df, backbone30_df, start_int, end_int) ## ----------------------------------------------------------------------------- randomized_df <- randomization %>% filter(who %in% examplePeople_int) %>% mutate(randomized = as.integer(as.character(which))) %>% select(who, when, randomized) %>% left_join(everybody, by = "who") %>% filter( !(randomized == 2 & project %in% c("27", "51")) ) %>% select(-project) udsUse2_df <- backbone_df %>% left_join(randomized_df, by = c("who", "when")) %>% left_join(derived_visitImputed, by = c("who", "when")) %>% left_join(uds, by = c("who", "when")) %>% # So we can use MarkUse() with UDS data (instead of all_drugs) mutate(source = "UDS") ## ----eg-visit-day-substances-------------------------------------------------- udsUse2_df %>% filter(visitImputed == "Present") %>% filter(!is.na(what)) %>% filter(who == 17) %>% print(n = nrow(.)) ## ----good-drugs-bad-drugs----------------------------------------------------- nonStudyOpioids_ls <- list( "Buprenorphine" = c("Opioid", "Methadone"), "Methadone" = c("Opioid", "Buprenorphine"), "Naltrexone" = c("Opioid", "Methadone", "Buprenorphine"), "Not treated" = c("Opioid", "Methadone", "Buprenorphine") ) ## ----treatment-groups--------------------------------------------------------- treatGroups_ls <- public.ctn0094data::randomization %>% filter(who %in% examplePeople_int) %>% filter(which == 1) %>% left_join(everybody, by = "who") %>% select(who, treatment) %>% mutate( treat_drug = case_when( str_detect(treatment, "BUP") ~ "Buprenorphine", treatment == "Methadone" ~ "Methadone", treatment == "Inpatient NR-NTX" ~ "Naltrexone" ) ) %>% select(-treatment) %>% split(f = .$treat_drug) %>% map(.f = "who") # Inspect treatGroups_ls ## ----mark-non-study-drug-use, warning=TRUE------------------------------------ opioidUse_df <- udsUse2_df %>% mutate( treat_group = case_when( who %in% treatGroups_ls$Buprenorphine ~ "Buprenorphine", who %in% treatGroups_ls$Methadone ~ "Methadone", who %in% treatGroups_ls$Naltrexone ~ "Naltrexone", TRUE ~ "Not treated" ) ) %>% split(f = .$treat_group) %>% # List of data in alphabetical order, so the non-study drugs ls should match map2( .y = nonStudyOpioids_ls, .f = ~{ # REQUIRES "source" COLUMN MarkUse( targetDrugs_char = .y, drugs_df = .x, # because we have participants with no recorded UDS; in practice DO NOT # use this command retainEmptyRows = TRUE ) } ) %>% bind_rows() %>% mutate( udsOpioid = case_when( is.na(when) ~ NA, !is.na(when) ~ TRUE ) ) %>% select(who, when, udsOpioid) ## ----timeline-to-one-row-per-day---------------------------------------------- timelineUDS_df <- udsUse2_df %>% left_join(opioidUse_df, by = c("who", "when")) %>% select(-what, -source) %>% # 2,089 rows to 1,994 distinct() rm( derived_visitImputed, opioidUse_df, randomized_df, treatGroups_ls, udsUse2_df ) ## ----sets-of-randomized-people------------------------------------------------ wasRandomized_int <- timelineUDS_df %>% group_by(who) %>% summarise(randomized = any(randomized %in% 1:2)) %>% filter(randomized) %>% pull(who) notRandomized_int <- timelineUDS_df %>% filter( !(who %in% wasRandomized_int) ) %>% pull(who) %>% unique() # Was randomized: wasRandomized_int # Wasn't notRandomized_int ## ----study-day-ticker-randomized---------------------------------------------- timelineUDS2_df <- timelineUDS_df %>% filter(who %in% wasRandomized_int) %>% group_by(who) %>% filter(!is.na(randomized)) %>% mutate( whenRandomized1 = case_when(randomized == 1 ~ when), whenRandomized2 = case_when(randomized == 2 ~ when) ) %>% select(who, when, whenRandomized1, whenRandomized2) %>% left_join(timelineUDS_df, ., by = c("who", "when")) %>% filter(who %in% wasRandomized_int) %>% # Add back in the groupings BEFORE the fill() group_by(who) %>% fill(whenRandomized1, .direction = "updown") %>% fill(whenRandomized2, .direction = "updown") %>% mutate(daysSinceRand1 = when - whenRandomized1) %>% mutate(daysSinceRand2 = when - whenRandomized2) %>% select(-whenRandomized1, -whenRandomized2) ## ----weekly-use-summary------------------------------------------------------- weeklyUse_df <- timelineUDS2_df %>% # The (daysSinceRand1 - 1) adjustment is to ensure that the first study week # is a full 7 days, since "day 0" is the day before randomization. The "+1" # at the end is to shift the study week label such that "week 0" is the # week *BEFORE* treatment, rather than the first week of treatment. So, the # randomization day is the last day of "week 0" (the pre-treatment period). mutate(studyWeek1 = (daysSinceRand1 - 1) %/% 7 + 1) %>% mutate(studyWeek2 = (daysSinceRand2 - 1) %/% 7 + 1) %>% group_by(who, studyWeek1) %>% # There are some study weeks with multiple UDS, so we count the number of # positive and negative UDS per week. summarise( nPosUDS = sum(udsOpioid == 1, na.rm = TRUE), nNegUDS = sum(visitImputed == "Present" & is.na(udsOpioid), na.rm = TRUE), nMissing = sum(visitImputed == "Missing", na.rm = TRUE), randWk1 = sum(randomized == 1, na.rm = TRUE) > 0, randWk2 = sum(randomized == 2 & project == "30", na.rm = TRUE) > 0 ) %>% ungroup() ## ----single-week-symbols------------------------------------------------------ useByWeekRandomized_df <- weeklyUse_df %>% mutate( udsStatus = case_when( # If we see a positive UDS and no negative UDS, it's positive nPosUDS > 0 & nNegUDS == 0 ~ "+", # If we see a negative UDS and no positive UDS, it's negative nPosUDS == 0 & nNegUDS > 0 ~ "-", # If we see both positive and negative UDS in a single week, it's both # (note that we can recode all "B"s to be "+" as necessary) nPosUDS > 0 & nNegUDS > 0 ~ "*", # If we don't have any UDS in a week after randomization, it's missing # UPDATE 2022-03-08: I had this as a 0 originally, and I was using this # in context of consent, not randomization. This was wrong. nPosUDS == 0 & nNegUDS == 0 & studyWeek1 >= 1 ~ "o", # If none of the above are true, but we still have a missing value as # marked by the MarkMissing() function, then it's missing nMissing > 0 ~ "o", # If none of the above conditions are true (probably because it's a week # before randomization but not during a baseline visit for consent), # then leave it blank (pre-study) TRUE ~ "_" ) ) %>% group_by(who) %>% # For CTN-0030, Phase II could have started on any day of the week, even in # the middle of a treatment week. If we try to start counting Phase II # weeks the day after treatment arms are switched, we can end up with the # last "week" of Phase I not having 7 days. I'm going to leave the first # week of Phase II as whatever week the switch happened in. mutate( rand1Active = studyWeek1 > 0, # This returns 0 for any week before the Phase II randomization, and 1 for # the Phase II randomization week and all subsequent weeks (because the # randWk2 column is 1 only for the week of second randomization and 0 # all other rows). rand2Active = cumsum(randWk2), trialPhase = rand1Active + rand2Active ) %>% select( who, studyWeek = studyWeek1, randWk1, randWk2, udsStatus, trialPhase ) ## ----use-patterns-by-phase---------------------------------------------------- weeklyOpioidPatterns_df <- useByWeekRandomized_df %>% mutate( phase = case_when( trialPhase == 0L ~ "Baseline", trialPhase == 1L ~ "Phase_1", trialPhase == 2L ~ "Phase_2" ) ) %>% group_by(who, phase) %>% summarise( usePattern = paste0(udsStatus, collapse = "") ) %>% pivot_wider(names_from = "phase", values_from = "usePattern") weeklyOpioidPatterns_df ## ----use-pattern-phases-start-and-end----------------------------------------- derived_weeklyOpioidPatternRand <- useByWeekRandomized_df %>% mutate( randWeek1 = randWk1 * studyWeek, randWeek2 = randWk2 * studyWeek ) %>% summarise( startWeek = min(studyWeek), randWeek1 = max(randWeek1), randWeek2 = if_else( all(randWeek2 == 0), NA_real_, max(randWeek2) ), endWeek = max(studyWeek) ) %>% # View this smaller data set before joining. In practice, you can comment out # this print() command. print() %>% left_join(weeklyOpioidPatterns_df, by = "who") ## ----mark-use-non-randomized-------------------------------------------------- nonRandUDS_df <- backbone_df %>% filter(who %in% notRandomized_int) %>% left_join(uds) %>% # MarkUse() requires a "source" column mutate(source = "UDS") %>% MarkUse( targetDrugs_char = nonStudyOpioids_ls$`Not treated`, drugs_df = . ) %>% mutate( udsOpioid = case_when( !is.na(when) ~ TRUE, is.na(when) ~ NA ) ) %>% select(who, when, udsOpioid) %>% # Uneccessary here, but some UDS records are duplicated distinct() ## ----non-randomized-study-week------------------------------------------------ timelineNonRandUDS_df <- backbone_df %>% filter(who %in% notRandomized_int) %>% left_join(nonRandUDS_df, by = c("who", "when")) %>% # Because this week moves off of the consent date, there is no reason to add # a `(week - 1)` adjustment mutate(studyWeek = when %/% 7 + 1) %>% group_by(who, studyWeek) %>% summarise( posUDS = sum(udsOpioid == 1, na.rm = TRUE) > 0 ) ## ----single-week-symbols-non-randomized--------------------------------------- weeklyNonRandPatterns_df <- timelineNonRandUDS_df %>% mutate( udsStatus = case_when( # If they are positive, they are positive posUDS ~ "+", # If they aren't positive and it's after the consent week, they are # missing (because they weren't randomized) !posUDS & studyWeek >= 1 ~ "o", # If they aren't positive and it's on or before the consent week, then # leave it blank (pre-study) !posUDS & studyWeek < 1 ~ "_" ) ) %>% mutate( phase = case_when( studyWeek < 1 ~ "Baseline", studyWeek >= 1 ~ "Phase_1" ) ) %>% # Again, this print is unecessary, but here it make clear what we are doing print() %>% group_by(who, phase) %>% summarise( usePattern = paste0(udsStatus, collapse = "") ) %>% pivot_wider(names_from = "phase", values_from = "usePattern") %>% ungroup() weeklyNonRandPatterns_df ## ----non-randomized-start-and-end--------------------------------------------- derived_weeklyOpioidPatternNonRand <- timelineNonRandUDS_df %>% group_by(who) %>% summarise( startWeek = min(studyWeek), randWeek1 = NA_real_, randWeek2 = NA_real_, endWeek = max(studyWeek) ) %>% left_join(weeklyNonRandPatterns_df, by = "who") ## ----final-product------------------------------------------------------------ derived_weeklyOpioidPattern <- derived_weeklyOpioidPatternRand %>% bind_rows(derived_weeklyOpioidPatternNonRand) %>% arrange(who) %>% replace_na(list(Phase_2 = "")) derived_weeklyOpioidPattern ## ----------------------------------------------------------------------------- sessionInfo()