--- title: "Standard Errors" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Standard Errors} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` This vignette demonstrates the calculation of standard errors for penalized estimates, using the sandwich estimator approach as in the "MLR" estimator of `lavaan`. The standard errors are obtained as the square roots of the diagonal elements of the sandwich variance estimator: $$ \mathrm{B}_\text{bread}^{-1} \; \mathrm{B}_\text{meat} \; \mathrm{B}_\text{bread}^{-1}, $$ where $\mathrm{B}_\text{bread}$ is the observed information matrix (i.e., the Hessian of the penalized objective function) and $\mathrm{B}_\text{meat}$ is the first-order information matrix (obtained using `lavInspect(..., information.first.order)`). ```{r} library(lavaan) library(plavaan) data(PoliticalDemocracy) ``` ## Penalize cross-loadings ### Two-factor CFA model ```{r} mod0 <- " ind60 =~ x1 + x2 + x3 dem60 =~ y1 + y2 + y3 + y4 ind60 ~~ dem60 " fit0 <- cfa(mod0, data = PoliticalDemocracy, std.lv = TRUE, estimator = "MLR") ``` ```{r eval=FALSE, include=FALSE} # lav_model_vcov_se(fit0@Model, fit0@ParTable) # not what I expected lavaan:::lav_model_nvcov_robust_sem lavaan:::lav_model_nvcov_robust_sandwich lavaan:::computeDelta(fit0@Model) lavaan:::lav_model_information_firstorder( lavmodel = fit0@Model, lavsamplestats = fit0@SampleStats, lavdata = fit0@Data, lavcache = fit0@Cache, lavimplied = fit0@implied,x lavh1 = fit0@h1, lavoptions = fit0@Options, extra = TRUE, check.pd = FALSE, augmented = FALSE, inverted = FALSE ) meat <- lavInspect(fit0, "information.first.order") # crossprod(estfun.lavaan(fit0)) / lavInspect(fit0, "nobs") - meat # lavInspect(fit0, "hessian") # ff0 <- lav_export_estimation(fit0) # numDeriv::hessian(ff0$objective, coef(fit0), lavaan_model = fit0) - # lavInspect(fit0, "hessian") bread <- lavInspect(fit0, "information.observed") solve(bread) %*% meat %*% solve(bread) fit0@vcov$vcov * 75 # May need to consult EFA var.names <- paste("x", 1:9, sep = "") # debugonce(lavaan:::lav_model_estimate) # It appears that EFA also uses optim. So perhaps need to code the objective directly. fit <- efa(data = HolzingerSwineford1939[, var.names], nfactors = 1:3) fit$nf3@optim ``` Penalized ```{r} mod <- " ind60 =~ x1 + x2 + x3 + y1 + y2 + y3 + y4 dem60 =~ x1 + x2 + x3 + y1 + y2 + y3 + y4 ind60 ~~ ind60 " fit <- cfa(mod, data = PoliticalDemocracy, std.lv = TRUE, do.fit = FALSE) ``` ```{r} pefa_fit <- penalized_est( fit, w = .03, pen_par_id = 4:10, se = "robust.huber.white" ) summary(pefa_fit) ``` ```{r, eval=FALSE} # Quick simulation to check SEs set.seed(1234) R <- 250 est_res <- matrix(NA, nrow = R, ncol = length(coef(pefa_fit))) se_res <- matrix(NA, nrow = R, ncol = length(coef(pefa_fit))) # Use the simple structure model as population pop_model <- parTable(pefa_fit) for (i in 1:R) { # Simulate data dat_sim <- simulateData(pop_model, sample.nobs = 200) # Fit penalized model fit_sim <- cfa(mod, data = dat_sim, std.lv = TRUE, do.fit = FALSE) pefa_sim <- try( penalized_est( fit_sim, w = .03, pen_par_id = 4:10, se = "robust.huber.white" ), silent = TRUE ) if (!inherits(pefa_sim, "try-error")) { est_res[i, ] <- coef(pefa_sim) se_res[i, ] <- sqrt(diag(vcov(pefa_sim))) } } # Compare empirical SD vs mean SE for a few parameters # (e.g., first few loadings) res_summary <- data.frame( param = names(coef(pefa_fit)), emp_sd = apply(est_res, 2, sd, na.rm = TRUE), mean_se = apply(se_res, 2, mean, na.rm = TRUE) ) ``` ```{r include=FALSE} if (!file.exists("sim-se-summary.rds")) { saveRDS(res_summary, "sim-se-summary.rds") } else { res_summary <- readRDS("sim-se-summary.rds") } ``` ```{r} print(res_summary, digits = 2) ``` ```{r} # meat <- lavInspect(pefa_fit, "information.first.order") # bread <- attr(pefa_fit, "hessian") # vc_pefa <- solve(bread) %*% meat %*% solve(bread) / 75 # pefa_fit@vcov$vcov <- vc_pefa # se_pefa <- sqrt(diag(vc_pefa)) # pefa_fit@ParTable$se <- 0 * pefa_fit@ParTable$est # pefa_fit@ParTable$se[which(pefa_fit@ParTable$free > 0)] <- se_pefa # cbind(coef(pefa_fit), sqrt(diag(vc_pefa))) ``` ## Penalize non-invariance ```{r} lconfig_mod_un <- " # Time 1 dem60 =~ .l1_1 * y1 + .l2_1 * y2 + .l3_1 * y3 + .l4_1 * y4 y1 ~ .i1_1 * 1 y2 ~ .i2_1 * 1 y3 ~ .i3_1 * 1 y4 ~ .i4_1 * 1 y1 ~~ .u1_1 * y1 y2 ~~ .u2_1 * y2 y3 ~~ .u3_1 * y3 y4 ~~ .u4_1 * y4 # Time 2 dem65 =~ .l1_2 * y5 + .l2_2 * y6 + .l3_2 * y7 + .l4_2 * y8 y5 ~ .i1_2 * 1 y6 ~ .i2_2 * 1 y7 ~ .i3_2 * 1 y8 ~ .i4_2 * 1 y5 ~~ .u1_2 * y5 y6 ~~ .u2_2 * y6 y7 ~~ .u3_2 * y7 y8 ~~ .u4_2 * y8 # Latent variances dem60 ~~ 1 * dem60 dem65 ~~ NA * dem65 # Latent means dem60 ~ 0 * 1 dem65 ~ NA * 1 # Lag Covariances y1 ~~ y5 y2 ~~ y6 y3 ~~ y7 y4 ~~ y8 " # Specify the under-identified model lconfig_fit_un <- cfa( lconfig_mod_un, data = PoliticalDemocracy, do.fit = FALSE, std.lv = TRUE, missing = "fiml", estimator = "mlr" ) ld_id <- rbind(1:4, 13:16) int_id <- rbind(5:8, 17:20) pen_fit <- penalized_est( lconfig_fit_un, w = 0.03, pen_fn = "l0a", pen_diff_id = list(loadings = ld_id, intercepts = int_id), se = "robust.huber.white" ) parameterEstimates(pen_fit) ``` Compared to scalar invariance model ```{r} lscalar_mod <- " # Time 1 dem60 =~ .l1 * y1 + .l2 * y2 + .l3 * y3 + .l4 * y4 y1 ~ .i1 * 1 y2 ~ .i2 * 1 y3 ~ .i3 * 1 y4 ~ .i4 * 1 y1 ~~ .u1_1 * y1 y2 ~~ .u2_1 * y2 y3 ~~ .u3_1 * y3 y4 ~~ .u4_1 * y4 # Time 2 dem65 =~ .l1 * y5 + .l2 * y6 + .l3 * y7 + .l4 * y8 y5 ~ .i1 * 1 y6 ~ .i2 * 1 y7 ~ .i3 * 1 y8 ~ .i4 * 1 y5 ~~ .u1_2 * y5 y6 ~~ .u2_2 * y6 y7 ~~ .u3_2 * y7 y8 ~~ .u4_2 * y8 # Latent variances dem60 ~~ 1 * dem60 dem65 ~~ NA * dem65 # Latent means dem60 ~ 0 * 1 dem65 ~ NA * 1 # Lag Covariances y1 ~~ y5 y2 ~~ y6 y3 ~~ y7 y4 ~~ y8 " lscalar_fit <- cfa( lscalar_mod, data = PoliticalDemocracy, std.lv = TRUE, missing = "fiml", estimator = "mlr" ) parameterEstimates(lscalar_fit) ```