## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ## ----------------------------------------------------------------------------- library(personnelSelectionUtility) ## ----------------------------------------------------------------------------- schmidt_pat <- data.frame( selection_ratio = c(.05, .10, .20, .30, .40, .50, .80) ) schmidt_pat$one_year <- vapply( schmidt_pat$selection_ratio, function(sr) bcg_utility( validity = .76, selection_ratio = sr, sdy = 10413, n_selected = 618, tenure = 1, cost = 0 )$net_utility, numeric(1) ) schmidt_pat$multi_year <- vapply( schmidt_pat$selection_ratio, function(sr) bcg_utility( validity = .76, selection_ratio = sr, sdy = 10413, n_selected = 618, tenure = 9.69, cost = 0 )$net_utility, numeric(1) ) schmidt_pat ## ----------------------------------------------------------------------------- # All three cases share the same expected standardised score among offered # candidates: z_offered <- selected_mean_z(.20) # Case 1: uniform random rejection. The expected z among accepted candidates # equals z_offered; only the realised headcount is scaled by the acceptance rate. offer_rejection_adjustment( expected_z_offered = z_offered, mode = "uniform", acceptance_rate = .70, n_offered = 100 ) # Case 2: correlated rejection. Top candidates are more likely to decline, # captured by a negative quality-acceptance correlation. offer_rejection_adjustment( expected_z_offered = z_offered, mode = "correlated", acceptance_rate = .70, rho_quality_acceptance = -0.20, n_offered = 100 ) # Case 3: selective rejection. Explicit logit link with a strongly negative # slope, representing the case Murphy emphasises in which the very top candidates # almost always decline. offer_rejection_adjustment( expected_z_offered = z_offered, mode = "selective", acceptance_rate = .70, logit_intercept = qlogis(.70), logit_slope = -1.0, n_offered = 100 ) ## ----------------------------------------------------------------------------- set.seed(2024) # Simulate a moderately skewed criterion: lognormal with a few extreme outliers n <- 200 y_normal_part <- rlnorm(n, meanlog = 11.0, sdlog = 0.30) y_outliers_idx <- sample.int(n, 4) y_normal_part[y_outliers_idx] <- y_normal_part[y_outliers_idx] * 3.5 y <- y_normal_part x <- .50 * scale(log(y))[, 1] + rnorm(n, 0, sqrt(1 - .25)) sdy_with_outliers <- sd(y) sdy_without_outliers <- sd(y[-y_outliers_idx]) c(with_outliers = sdy_with_outliers, without_outliers = sdy_without_outliers, ratio = sdy_with_outliers / sdy_without_outliers) ## ----------------------------------------------------------------------------- utility_regression_diagnostics(x = x, y = y) ## ----------------------------------------------------------------------------- focal_validity <- .35 baseline_validity <- .20 selection_ratio <- .20 sdy <- 50000 n_year_one <- 100 tenure_years <- 5 fixed_cost <- 75000 ## ----------------------------------------------------------------------------- naive <- bcg_utility( validity = focal_validity, selection_ratio = selection_ratio, sdy = sdy, n_selected = n_year_one, tenure = tenure_years, cost = fixed_cost, baseline_validity = 0 ) naive$net_utility ## ----------------------------------------------------------------------------- with_baseline <- bcg_utility( validity = focal_validity, selection_ratio = selection_ratio, sdy = sdy, n_selected = n_year_one, tenure = tenure_years, cost = fixed_cost, baseline_validity = baseline_validity ) with_baseline$net_utility ## ----------------------------------------------------------------------------- S11 <- matrix(c(1, .30, .30, 1), 2, 2) S12 <- matrix(c(.30, .10, .15, .25), 2, 2, byrow = TRUE) S22 <- matrix(c(1, .40, .40, 1), 2, 2) b <- c(.7, .3) rcv <- restricted_canonical_validity(S11, S12, S22, criterion_weights = b) rcv$validity with_multidim <- bcg_utility( validity = rcv$validity, baseline_validity = baseline_validity, selection_ratio = selection_ratio, sdy = sdy, n_selected = n_year_one, tenure = tenure_years, cost = fixed_cost ) with_multidim$net_utility ## ----------------------------------------------------------------------------- hires <- c(n_year_one, 15, 15, 15, 15) losses <- c(0, 15, 15, 15, 15) active_n <- employee_flow(hires, losses) active_n ## ----------------------------------------------------------------------------- with_flows <- boudreau_utility( validity = rcv$validity, baseline_validity = baseline_validity, selection_ratio = selection_ratio, sdy = sdy, n_by_period = active_n, variable_value = 0, tax_rate = .25, discount_rate = .08, cost_by_period = c(fixed_cost, 5000, 5000, 5000, 5000) ) with_flows$net_present_value ## ----------------------------------------------------------------------------- probation_z <- -1 survivor_gain <- probation_adjustment(probation_z) discount_rate <- .08 periods <- seq_along(active_n) later_periods <- periods[-1] benefit_t <- survivor_gain * sdy * active_n[later_periods] * (1 - .25) discounted <- benefit_t / (1 + discount_rate)^later_periods extra_npv <- sum(discounted) with_probation_npv <- with_flows$net_present_value + extra_npv with_probation_npv ## ----------------------------------------------------------------------------- cascade <- data.frame( step = c("1. Naive BCG (random baseline)", "2. + operating baseline", "3. + multidim. criterion (RCV)", "4. + flows + tax + discount", "5. + probation (full comprehensive)"), net_utility = c(naive$net_utility, with_baseline$net_utility, with_multidim$net_utility, with_flows$net_present_value, with_probation_npv) ) cascade$pct_of_naive <- round(100 * cascade$net_utility / naive$net_utility, 1) cascade ## ----------------------------------------------------------------------------- S11 <- matrix(c(1, .30, .30, 1), 2, 2) S12 <- matrix(c(.30, .10, .15, .25), 2, 2, byrow = TRUE) S22 <- matrix(c(1, .40, .40, 1), 2, 2) s <- sturman_comprehensive( validity = .35, baseline_validity = .20, selection_ratio = .20, sdy = 50000, n_year_one = 100, tenure = 5, fixed_cost = 75000, hires_per_period = c(100, 15, 15, 15, 15), losses_per_period = c(0, 15, 15, 15, 15), tax_rate = .25, discount_rate = .08, predictor_cor = S11, predictor_criterion_cor = S12, criterion_cor = S22, criterion_weights = c(.7, .3), probation_cutoff_z = -1, acceptance_rate = 0.70, quality_acceptance_correlation = -0.20 ) s ## ----------------------------------------------------------------------------- Rxx <- matrix(c( 1.00, .30, .05, .10, .30, 1.00, .20, .25, .05, .20, 1.00, .40, .10, .25, .40, 1.00 ), 4, 4, byrow = TRUE) validities <- c(.51, .38, .23, .32) predictor_labels <- c("GMA", "Interview", "Conscientiousness", "Integrity") ## ----------------------------------------------------------------------------- selection_ratios <- c(.10, .20, .40) results <- lapply(selection_ratios, function(sr) { marginal_sr <- (sr)^(1 / 4) compare_selection_systems( predictor_cor = Rxx, validities = validities, compensatory_weights = validities, compensatory_selection_ratio = sr, hurdle_selection_ratios = rep(marginal_sr, 4), n_sim = 50000, seed = 42 ) }) names(results) <- paste0("SR=", selection_ratios) ## ----------------------------------------------------------------------------- ock_oswald <- data.frame( SR = selection_ratios, compensatory_z = vapply(results, function(o) o$compensatory$expected_criterion_z, numeric(1)), hurdle_z = vapply(results, function(o) o$multiple_hurdle$expected_criterion_z, numeric(1)), hurdle_joint_sr = vapply(results, function(o) o$multiple_hurdle$joint_selection_ratio, numeric(1)) ) ock_oswald$z_difference <- ock_oswald$compensatory_z - ock_oswald$hurdle_z ock_oswald ## ----------------------------------------------------------------------------- n_apps <- 1000 stage_design <- compare_selection_systems_staged( predictor_cor = Rxx, validities = validities, compensatory_weights = validities, compensatory_selection_ratio = .20, stage_predictors = list(1, c(2, 3), 4), stage_selection_ratios = c(.50, .60, .70), n_sim = 50000, seed = 42, n_applicants = n_apps, compensatory_cost_per_applicant = 800, hurdle_cost_per_stage = c(100, 400, 600), sdy = 50000 ) stage_design$net_utility_difference