## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(ivs) library(clock) library(dplyr, warn.conflicts = FALSE) library(tidyr, warn.conflicts = FALSE) ## ----------------------------------------------------------------------------- # Interval vector of integers iv(1:5, 7:11) # Interval vector of dates starts <- as.Date("2019-01-01") + 0:2 ends <- starts + c(2, 5, 10) iv(starts, ends) ## ----------------------------------------------------------------------------- start <- bit64::as.integer64("900000000000") end <- start + 1234 iv(start, end) ## ----------------------------------------------------------------------------- start <- year_month_day(c(2019, 2020), c(1, 3)) end <- year_month_day(c(2020, 2020), c(2, 6)) iv(start, end) ## ----------------------------------------------------------------------------- x <- iv(1:3, 4:6) x ## ----------------------------------------------------------------------------- iv_start(x) iv_end(x) ## ----------------------------------------------------------------------------- tibble(x = x) ## ----------------------------------------------------------------------------- # iv_pairs() is a useful way to create small ivs from individual intervals needles <- iv_pairs(c(1, 5), c(3, 7), c(10, 12)) needles haystack <- iv_pairs(c(0, 6), c(13, 15), c(0, 2), c(7, 8), c(4, 5)) haystack locations <- iv_locate_overlaps(needles, haystack) locations ## ----------------------------------------------------------------------------- iv_align(needles, haystack, locations = locations) ## ----------------------------------------------------------------------------- iv_overlaps(needles, haystack) ## ----------------------------------------------------------------------------- locations <- iv_locate_overlaps( needles, haystack, type = "contains", no_match = "drop" ) iv_align(needles, haystack, locations = locations) ## ----------------------------------------------------------------------------- locations <- iv_locate_overlaps( needles, haystack, type = "within", no_match = "drop" ) iv_align(needles, haystack, locations = locations) ## ----------------------------------------------------------------------------- # Where does `needles` precede `haystack`? locations <- iv_locate_precedes(needles, haystack) locations ## ----------------------------------------------------------------------------- iv_align(needles, haystack, locations = locations) ## ----------------------------------------------------------------------------- # Where does `needles` follow `haystack`? locations <- iv_locate_follows(needles, haystack) iv_align(needles, haystack, locations = locations) ## ----------------------------------------------------------------------------- locations <- iv_locate_follows( needles = needles, haystack = haystack, closest = TRUE, no_match = "drop" ) iv_align(needles, haystack, locations = locations) ## ----------------------------------------------------------------------------- sales <- as.Date(c("2019-01-01", "2020-05-10", "2020-06-10")) commercial_starts <- as.Date(c( "2019-10-12", "2020-04-01", "2020-06-01", "2021-05-10" )) commercial_ends <- commercial_starts + 90 commercials <- iv(commercial_starts, commercial_ends) sales commercials ## ----------------------------------------------------------------------------- tibble(sales = sales) %>% mutate(commercial_running = iv_between(sales, commercials)) ## ----------------------------------------------------------------------------- iv_align(sales, commercials, locations = iv_locate_between(sales, commercials)) ## ----------------------------------------------------------------------------- x <- c(1, 5, 10, 12) x y <- iv_pairs(c(0, 6), c(7, 9), c(10, 12), c(10, 12)) y iv_pairwise_between(x, y) ## ----------------------------------------------------------------------------- enrollments <- tribble( ~name, ~start, ~end, "Amy", "1, Jan, 2017", "30, Jul, 2018", "Franklin", "1, Jan, 2017", "19, Feb, 2017", "Franklin", "5, Jun, 2017", "4, Feb, 2018", "Franklin", "21, Oct, 2018", "9, Mar, 2019", "Samir", "1, Jan, 2017", "4, Feb, 2017", "Samir", "5, Apr, 2017", "12, Jun, 2018" ) # Parse these into "day" precision year-month-day objects enrollments <- enrollments %>% mutate( start = year_month_day_parse(start, format = "%d, %b, %Y"), end = year_month_day_parse(end, format = "%d, %b, %Y"), ) enrollments ## ----------------------------------------------------------------------------- enrollments <- enrollments %>% mutate( start = calendar_narrow(start, "month"), end = calendar_narrow(end, "month") + 1L ) enrollments enrollments <- enrollments %>% mutate(active = iv(start, end), .keep = "unused") enrollments ## ----------------------------------------------------------------------------- bounds <- range(enrollments$active) lower <- iv_start(bounds[[1]]) upper <- iv_end(bounds[[2]]) - 1L months <- tibble(month = seq(lower, upper, by = 1)) months ## ----------------------------------------------------------------------------- months %>% mutate(count = iv_count_between(month, enrollments$active)) %>% print(n = Inf) ## ----------------------------------------------------------------------------- x <- iv_pairs(c(1, 5), c(5, 7), c(9, 11), c(10, 13), c(12, 13)) x iv_groups(x) ## ----------------------------------------------------------------------------- iv_groups(x, abutting = FALSE) ## ----------------------------------------------------------------------------- costs <- tribble( ~user, ~system, ~from, ~to, ~cost, 1L, "a", "2019-01-01", "2019-01-05", 200.5, 1L, "a", "2019-01-12", "2019-01-13", 15.6, 1L, "b", "2019-01-03", "2019-01-10", 500.3, 2L, "a", "2019-01-02", "2019-01-03", 25.6, 2L, "c", "2019-01-03", "2019-01-04", 30, 2L, "c", "2019-01-05", "2019-01-07", 66.2 ) costs <- costs %>% mutate( from = as.Date(from), to = as.Date(to) ) %>% mutate(range = iv(from, to), .keep = "unused") costs ## ----------------------------------------------------------------------------- costs %>% reframe(range = iv_groups(range), .by = user) ## ----------------------------------------------------------------------------- costs2 <- costs %>% mutate(range = iv_identify_group(range), .by = user) # `range` has been updated with the corresponding group costs2 # So now we can group on that to summarise the cost costs2 %>% summarise(cost = sum(cost), .by = c(user, range)) ## ----------------------------------------------------------------------------- x <- iv_pairs(c(1, 5), c(5, 7), c(9, 11), c(10, 13), c(12, 13)) x ## ----------------------------------------------------------------------------- iv_splits(x) ## ----------------------------------------------------------------------------- guests <- tibble( arrive = as.POSIXct( c("2008-05-20 19:30:00", "2008-05-20 20:10:00", "2008-05-20 22:15:00"), tz = "UTC" ), depart = as.POSIXct( c("2008-05-20 23:00:00", "2008-05-21 00:00:00", "2008-05-21 00:30:00"), tz = "UTC" ), name = list( c("Mary", "Harry"), c("Diana", "Susan"), "Peter" ) ) guests <- unnest(guests, name) %>% mutate(iv = iv(arrive, depart), .keep = "unused") guests ## ----------------------------------------------------------------------------- iv_splits(guests$iv) ## ----------------------------------------------------------------------------- # Mary's arrival/departure times guests$iv[[1]] # The first start and last end correspond to Mary's original times, # but we've also broken her stay up by the departures/arrivals of # everyone else iv_identify_splits(guests$iv)[[1]] ## ----------------------------------------------------------------------------- guests2 <- guests %>% mutate(iv = iv_identify_splits(iv)) %>% unnest(iv) %>% arrange(iv) guests2 ## ----------------------------------------------------------------------------- guests2 %>% summarise(n = n(), who = list(name), .by = iv) ## ----------------------------------------------------------------------------- x <- iv_pairs(c(1, 3), c(2, 5), c(10, 12), c(13, 15)) x iv_set_complement(x) ## ----------------------------------------------------------------------------- iv_set_complement(x, lower = 0, upper = Inf) ## ----------------------------------------------------------------------------- y <- iv_pairs(c(-5, 0), c(1, 4), c(8, 10), c(15, 16)) x y iv_set_union(x, y) ## ----------------------------------------------------------------------------- iv_set_intersect(x, y) ## ----------------------------------------------------------------------------- iv_set_difference(x, y) ## ----------------------------------------------------------------------------- starts <- as.Date(c("2019-01-05", "2019-01-20", "2019-01-25", "2019-02-01")) ends <- starts + c(5, 10, 3, 5) x <- iv(starts, ends) starts <- as.Date(c("2019-01-02", "2019-01-23")) ends <- starts + c(5, 6) y <- iv(starts, ends) x y ## ----------------------------------------------------------------------------- iv_set_intersect(x, y) ## ----------------------------------------------------------------------------- locations <- iv_locate_overlaps(x, y, no_match = "drop") overlaps <- iv_align(x, y, locations = locations) overlaps %>% mutate(intersect = iv_pairwise_set_intersect(needles, haystack)) ## ---- error=TRUE-------------------------------------------------------------- iv_pairwise_set_intersect(iv(1, 5), iv(6, 9)) ## ----------------------------------------------------------------------------- x <- iv_pairs(c(1, 5), c(3, NA), c(NA, 3)) x ## ----------------------------------------------------------------------------- y <- iv_pairs(c(NA, NA), c(0, 2)) y ## ----------------------------------------------------------------------------- # Match-like operations treat missing intervals as overlapping iv_locate_overlaps(x, y) iv_set_intersect(x, y) ## ----------------------------------------------------------------------------- # Pairwise operations treat missing intervals as infectious z <- iv_pairs(c(1, 2), c(1, 4)) iv_pairwise_set_intersect(y, z)