--- title: "Penalized Estimation with Ordinal Data and Multiple Groups" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Penalized Estimation with Ordinal Data and Multiple Groups} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(plavaan) library(lavaan) data(HolzingerSwineford1939) ``` ## Prepare Ordinal Data First, we convert the continuous variables to ordinal with 3 categories: ```{r} # Select the 9 cognitive test variables hs_data <- HolzingerSwineford1939[, c("school", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9")] # Convert to ordinal with 3 points for (i in 2:10) { hs_data[[i]] <- cut( hs_data[[i]], breaks = 3, labels = FALSE, include.lowest = TRUE ) hs_data[[i]] <- ordered(hs_data[[i]]) } head(hs_data) ``` ## Multiple-Group CFA Model Now fit the model across the two schools: ```{r} mod_base <- " visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 x1 ~~ 1 * x1 x2 ~~ 1 * x2 x3 ~~ 1 * x3 x4 ~~ 1 * x4 x5 ~~ 1 * x5 x6 ~~ 1 * x6 x7 ~~ 1 * x7 x8 ~~ 1 * x8 x9 ~~ 1 * x9 " fit_mg <- cfa(mod_base, data = hs_data, ordered = TRUE, std.lv = TRUE, parameterization = "theta", group = "school") summary(fit_mg, fit.measures = TRUE) ``` One should also consider the magnitude of the objective function with the DWLS estimator, which is scaled differently than ML-based functions and is generally smaller based on experience. ```{r} fit_mg@optim$fx ``` ### Strict and partial invariance models ```{r} # Strict invariance: constrain loadings, thresholds, and residual variances fit_strict <- cfa(mod_base, data = hs_data, ordered = TRUE, std.lv = TRUE, parameterization = "theta", group = "school", group.equal = c("loadings", "thresholds", "residuals")) # Score test lavTestScore(fit_strict) ``` Only item 9 showed non-invariant thresholds, based on the score test. ## Penalized Multiple-Group Model We'll penalize differences in loadings and thresholds across groups. First, set up an over-specified (unidentified) model with the latent mean and variance only identified in the first group, without fitting (`do.fit = FALSE`): ```{r} mod_un <- " visual =~ x1 + x2 + x3 textual =~ x4 + x5 + x6 speed =~ x7 + x8 + x9 visual ~~ c(1, NA) * visual textual ~~ c(1, NA) * textual speed ~~ c(1, NA) * speed visual ~ c(0, NA) * 1 textual ~ c(0, NA) * 1 speed ~ c(0, NA) * 1 x1 ~~ 1 * x1 x2 ~~ 1 * x2 x3 ~~ 1 * x3 x4 ~~ 1 * x4 x5 ~~ 1 * x5 x6 ~~ 1 * x6 x7 ~~ 1 * x7 x8 ~~ 1 * x8 x9 ~~ 1 * x9 " fit_mg_nofit <- cfa(mod_base, data = hs_data, ordered = TRUE, std.lv = TRUE, auto.fix.first = FALSE, parameterization = "theta", group = "school", do.fit = FALSE) ``` Examine the parameter table to identify loadings and thresholds: ```{r} pt <- parTable(fit_mg_nofit) # Show loadings pt[pt$op == "=~", c("lhs", "op", "rhs", "group", "free")] ``` ```{r} # Show thresholds pt[pt$op == "|", c("lhs", "op", "rhs", "group", "free")] ``` Identify parameter IDs for loadings and thresholds in each group: ```{r} # Loadings: group 1 (Pasteur) and group 2 (Grant-White) load_g1 <- pt$free[pt$op == "=~" & pt$group == 1 & pt$free > 0] load_g2 <- pt$free[pt$op == "=~" & pt$group == 2 & pt$free > 0] # Thresholds: group 1 and group 2 thresh_g1 <- pt$free[pt$op == "|" & pt$group == 1 & pt$free > 0] thresh_g2 <- pt$free[pt$op == "|" & pt$group == 2 & pt$free > 0] print(list( loadings_g1 = load_g1, loadings_g2 = load_g2, thresholds_g1 = thresh_g1, thresholds_g2 = thresh_g2 )) ``` Fit the penalized model with penalties on differences in loadings and thresholds: ```{r} fit_pen_mg <- penalized_est( fit_mg_nofit, w = 0.03, pen_diff_id = list( loadings = rbind(load_g1, load_g2), thresholds = rbind(thresh_g1, thresh_g2) ) ) summary(fit_pen_mg) ``` ## Evaluate Invariance Here are the estimated loadings and thresholds, and we can calculate the effective number of parameters that differ across groups: ```{r} # Loadings load_ests_g1 <- as.numeric(coef(fit_pen_mg)[load_g1]) load_ests_g2 <- as.numeric(coef(fit_pen_mg)[load_g2]) load_mat <- rbind(load_ests_g1, load_ests_g2) colnames(load_mat) <- names(coef(fit_pen_mg))[load_g1] eff_load_diff <- composite_pair_loss(load_mat, fun = l0a) # Thresholds thresh_ests_g1 <- as.numeric(coef(fit_pen_mg)[thresh_g1]) thresh_ests_g2 <- as.numeric(coef(fit_pen_mg)[thresh_g2]) thresh_mat <- rbind(thresh_ests_g1, thresh_ests_g2) colnames(thresh_mat) <- names(coef(fit_pen_mg))[thresh_g1] eff_thresh_diff <- composite_pair_loss(thresh_mat, fun = l0a) cat("Penalized Loading Estimates:\n") print(load_mat, digits = 3) cat("\nPenalized Threshold Estimates:\n") print(thresh_mat, digits = 3) cat("Effective number of non-invariant loadings:", eff_load_diff, "\n") cat("Effective number of non-invariant thresholds:", eff_thresh_diff, "\n") ``` The penalized estimation approach identifies which loadings and thresholds substantively differ across groups, providing an efficienct, data-driven assessment of measurement invariance for ordinal data.