## ----eval = FALSE------------------------------------------------------------- # library("rbenchmark") # library("dat") # # benchmark( # flatmap(1:3, x ~ x^2), # sapply(1:3, function(x) x^2), # sapply(1:3, as.function(x ~ x^2)), # flatmap(1:3, function(x) x^2) # ) # # benchmark( # flatmap(1:1e4, x ~ x^2), # sapply(1:1e4, function(x) x^2) # ) ## ----eval = FALSE------------------------------------------------------------- # benchmark( # flatmap(1:3 ~ 1:3, f(x, y) ~ x + y), # mapply(function(x, y) x + y, 1:3, 1:3), # mapply(as.function(f(x, y) ~ x + y), 1:3, 1:3) # ) ## ----eval = FALSE------------------------------------------------------------- # library("data.table") # library("dplyr") # options("dat.use.dplyr" = FALSE) # N <- 2e7 # more is not possible with small laptop # K <- 100 # set.seed(1) # # DT <- data.table( # id1 = sample(sprintf("id%03d",1:K), N, TRUE), # large groups (char) # id2 = sample(sprintf("id%03d",1:K), N, TRUE), # large groups (char) # id3 = sample(sprintf("id%010d",1:(N/K)), N, TRUE), # small groups (char) # id4 = sample(K, N, TRUE), # large groups (int) # id5 = sample(K, N, TRUE), # large groups (int) # id6 = sample(N/K, N, TRUE), # small groups (int) # v1 = sample(5, N, TRUE), # int in range [1,5] # v2 = sample(5, N, TRUE), # int in range [1,5] # v3 = sample(round(runif(100,max=100),4), N, TRUE) # numeric e.g. 23.5749 # ) # # setClass("DataTable", "data.table") # setMethod("[", "DataTable", mutar) # DT4 <- new("DataTable", DT) # # cat("GB =", round(sum(gc()[,2]) / 1024, 3), "\n") # format(object.size(DT), units = "MB") # format(object.size(DT4), units = "MB") # # system.time(DT[, sum(v1), keyby = id1]) # system.time(DT[, sum(v1), keyby = id1]) # system.time(DT4[V1 ~ sum(v1), sby = "id1"]) # system.time(DT4[V1 ~ sum(v1), sby = "id1"]) # system.time(group_by(DT, id1) %>% summarise(V1 = sum(v1))) # system.time(group_by(DT, id1) %>% summarise(V1 = sum(v1))) # # system.time(DT[, sum(v1), keyby = "id1,id2"]) # system.time(DT[, sum(v1), keyby = "id1,id2"]) # system.time(DT4[V1 ~ sum(v1), sby = c("id1", "id2")]) # system.time(DT4[V1 ~ sum(v1), sby = c("id1", "id2")]) # system.time(group_by(DT, id1, id2) %>% summarise(V1 = sum(v1))) # system.time(group_by(DT, id1, id2) %>% summarise(V1 = sum(v1))) # # system.time(DT[, list(sum(v1), mean(v3)), keyby = id3]) # system.time(DT[, list(sum(v1), mean(v3)), keyby = id3]) # system.time(DT4[V1 ~ sum(v1), V3 ~ mean(v3), sby = "id3"]) # system.time(DT4[V1 ~ sum(v1), V3 ~ mean(v3), sby = "id3"]) # system.time(group_by(DT, id3) %>% summarise(V1 = sum(v1), V3 = mean(v3))) # system.time(group_by(DT, id3) %>% summarise(V1 = sum(v1), V3 = mean(v3))) # # system.time(DT[, lapply(.SD, mean), keyby = id4, .SDcols = 7:9]) # system.time(DT[, lapply(.SD, mean), keyby = id4, .SDcols = 7:9]) # system.time(DT4[FL(.n ~ mean(.n), .n = "^v[1-3]"), sby = "id4"]) # system.time(DT4[FL(.n ~ mean(.n), .n = "^v[1-3]"), sby = "id4"]) # system.time(group_by(DT, id4) %>% summarise(V1 = mean(v1), V2 = mean(v2), V3 = mean(v3))) # system.time(group_by(DT, id4) %>% summarise(V1 = mean(v1), V2 = mean(v2), V3 = mean(v3))) # # system.time(DT[, lapply(.SD, sum), keyby = id6, .SDcols = 7:9]) # system.time(DT[, lapply(.SD, sum), keyby = id6, .SDcols = 7:9]) # system.time(DT4[FL(.n ~ sum(.n), .n = "v1:v3"), sby = "id6"]) # system.time(DT4[FL(.n ~ sum(.n), .n = "v1:v3"), sby = "id6"]) # system.time(group_by(DT, id6) %>% summarise(V1 = sum(v1), V2 = sum(v2), V3 = sum(v3))) # system.time(group_by(DT, id6) %>% summarise(V1 = sum(v1), V2 = sum(v2), V3 = sum(v3)))