## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", warning = FALSE, message = FALSE, echo = TRUE, eval = TRUE ) ## ----------------------------------------------------------------------------- library(fastglm) ## ----------------------------------------------------------------------------- data(esoph) x <- model.matrix(~ agegp + unclass(tobgp) + unclass(alcgp), data = esoph) y <- cbind(esoph$ncases, esoph$ncontrols) fit <- fastglm(x, y, family = binomial(link = "cloglog")) summary(fit) ## ----------------------------------------------------------------------------- fit2 <- glm(cbind(ncases, ncontrols) ~ agegp + unclass(tobgp) + unclass(alcgp), data = esoph, family = binomial(link = "cloglog"), method = fastglm_fit) ## ----------------------------------------------------------------------------- set.seed(123) n <- 5000; p <- 30 x <- matrix(rnorm(n * p), n, p) y <- rbinom(n, 1, plogis(x %*% rnorm(p) * 0.05)) system.time(f0 <- fastglm(x, y, family = binomial())) # default system.time(f2 <- fastglm(x, y, family = binomial(), method = 2)) # LLT system.time(f3 <- fastglm(x, y, family = binomial(), method = 3)) # LDLT ## ----------------------------------------------------------------------------- fit <- fastglm(x, y, family = binomial(), method = 2) V <- vcov(fit) dim(V) ## standard errors agree with the diagonal of vcov() all.equal(sqrt(diag(V)), unname(coef(summary(fit))[, "Std. Error"])) ## ----------------------------------------------------------------------------- library(sandwich) V_hc <- vcovHC(fit, type = "HC0") V_hc[1:3, 1:3] cluster <- rep(seq_len(20), length.out = n) V_cl <- vcovCL(fit, cluster = cluster, type = "HC1") V_cl[1:3, 1:3] ## ----------------------------------------------------------------------------- xnew <- x[1:5, , drop = FALSE] predict(fit, newdata = xnew, type = "response", se.fit = TRUE) ## ----------------------------------------------------------------------------- n <- 4000 X <- cbind(1, matrix(rnorm(n * 3), n, 3)) y <- rbinom(n, 1, plogis(X %*% c(0.2, 0.4, -0.2, 0.3))) chunk_size <- 1000 chunks <- function(k) { idx <- ((k - 1) * chunk_size + 1):(k * chunk_size) list(X = X[idx, , drop = FALSE], y = y[idx]) } fit_stream <- fastglm_streaming(chunks, n_chunks = 4, family = binomial()) fit_full <- fastglm(X, y, family = binomial(), method = 2) max(abs(coef(fit_stream) - coef(fit_full))) ## ----eval = TRUE-------------------------------------------------------------- library(microbenchmark) set.seed(7) n <- 50000; p <- 30 x <- matrix(rnorm(n * p), n, p) y <- rpois(n, exp(x %*% rnorm(p) * 0.05 + 1)) ## force the R-callback path by giving the family object an unrecognised name disguise <- function(fam) { fam$family <- paste0(fam$family, "*"); fam } mb <- microbenchmark( native = fastglm(x, y, family = poisson(), method = 2), callback = fastglm(x, y, family = disguise(poisson()), method = 2), times = 5L ) print(mb)