## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5
)
library(couplr)
library(dplyr)

## ----first-match--------------------------------------------------------------
library(couplr)
library(dplyr)

# Create example data: treatment and control groups
set.seed(123)
treatment <- tibble(
  id = 1:50,
  age = rnorm(50, mean = 45, sd = 10),
  income = rnorm(50, mean = 55000, sd = 12000)
)

control <- tibble(
  id = 1:80,
  age = rnorm(80, mean = 50, sd = 12),
  income = rnorm(80, mean = 48000, sd = 15000)
)

# Match on age and income
result <- match_couples(
  left = treatment,
  right = control,
  vars = c("age", "income"),
  auto_scale = TRUE
)

# View matched pairs
head(result$pairs)

## ----output-explained---------------------------------------------------------
# Quick overview with summary()
summary(result)

# Or access specific info
result$info$n_matched

## ----scaling-demo-------------------------------------------------------------
# BAD: Without scaling, income dominates
result_unscaled <- match_couples(
  treatment, control,
  vars = c("age", "income"),
  auto_scale = FALSE
)

# GOOD: With scaling, both variables contribute equally
result_scaled <- match_couples(
  treatment, control,
  vars = c("age", "income"),
  auto_scale = TRUE
)

# Compare mean distances
cat("Unscaled mean distance:", round(mean(result_unscaled$pairs$distance), 1), "\n")
cat("Scaled mean distance:", round(mean(result_scaled$pairs$distance), 3), "\n")

## ----balance-check------------------------------------------------------------
# Get the matched observations
matched_treatment <- treatment[result$pairs$left_id, ]
matched_control <- control[result$pairs$right_id, ]

# Compare means before and after matching
cat("BEFORE matching:\n")
cat("  Age difference:", round(mean(treatment$age) - mean(control$age), 1), "years\n")
cat("  Income difference: $", round(mean(treatment$income) - mean(control$income), 0), "\n\n")

cat("AFTER matching:\n")
cat("  Age difference:", round(mean(matched_treatment$age) - mean(matched_control$age), 1), "years\n")
cat("  Income difference: $", round(mean(matched_treatment$income) - mean(matched_control$income), 0), "\n")

## ----plot-result, fig.width=6, fig.height=4, fig.alt="Histogram showing distribution of match distances, with most matches having low distances near zero"----
plot(result)

## ----greedy-example-----------------------------------------------------------
# Create larger datasets
set.seed(456)
large_treatment <- tibble(
  id = 1:2000,
  age = rnorm(2000, 45, 10),
  income = rnorm(2000, 55000, 12000)
)

large_control <- tibble(
  id = 1:3000,
  age = rnorm(3000, 50, 12),
  income = rnorm(3000, 48000, 15000)
)

# Fast greedy matching
result_greedy <- greedy_couples(
  large_treatment, large_control,
  vars = c("age", "income"),
  auto_scale = TRUE,
  strategy = "row_best"  # fastest strategy
)

cat("Matched", result_greedy$info$n_matched, "pairs\n")
cat("Mean distance:", round(mean(result_greedy$pairs$distance), 3), "\n")

## ----caliper-example----------------------------------------------------------
# Allow any match
result_loose <- match_couples(
  treatment, control,
  vars = c("age", "income"),
  auto_scale = TRUE
)

# Only allow close matches
result_strict <- match_couples(
  treatment, control,
  vars = c("age", "income"),
  auto_scale = TRUE,
  max_distance = 0.5  # reject pairs more different than this
)

cat("Without caliper:", result_loose$info$n_matched, "pairs\n")
cat("With caliper:", result_strict$info$n_matched, "pairs\n")

## ----blocking-example---------------------------------------------------------
# Data from multiple hospital sites
set.seed(321)
treated <- tibble(
  id = 1:60,
  site = rep(c("Hospital A", "Hospital B", "Hospital C"), each = 20),
  age = rnorm(60, 55, 10),
  severity = rnorm(60, 5, 2)
)

controls <- tibble(
  id = 1:90,
  site = rep(c("Hospital A", "Hospital B", "Hospital C"), each = 30),
  age = rnorm(90, 52, 12),
  severity = rnorm(90, 4.5, 2.5)
)

# Step 1: Create blocks by hospital site
blocks <- matchmaker(
  left = treated,
  right = controls,
  block_type = "group",
  block_by = "site"
)

# Step 2: Match within each block
result_blocked <- match_couples(
  left = blocks$left,
  right = blocks$right,
  vars = c("age", "severity"),
  block_id = "block_id",
  auto_scale = TRUE
)

# Verify: matches stay within their block
result_blocked$pairs |> count(block_id)

## ----complete-example---------------------------------------------------------
# 1. Prepare your data
set.seed(789)
patients_treated <- tibble(
  patient_id = paste0("T", 1:100),
  age = rnorm(100, 62, 8),
  bmi = rnorm(100, 28, 4),
  smoker = sample(0:1, 100, replace = TRUE, prob = c(0.6, 0.4))
)

patients_control <- tibble(
  patient_id = paste0("C", 1:200),
  age = rnorm(200, 58, 10),
  bmi = rnorm(200, 26, 5),
  smoker = sample(0:1, 200, replace = TRUE, prob = c(0.7, 0.3))
)

# 2. Match on clinical variables
matched <- match_couples(
  left = patients_treated,
  right = patients_control,
  vars = c("age", "bmi", "smoker"),
  auto_scale = TRUE
)

# 3. Check how many matched
cat("Treated patients:", nrow(patients_treated), "\n")
cat("Successfully matched:", matched$info$n_matched, "\n")
cat("Match rate:", round(100 * matched$info$n_matched / nrow(patients_treated), 1), "%\n")

# 4. Extract matched samples for analysis
treated_matched <- patients_treated[matched$pairs$left_id, ]
control_matched <- patients_control[matched$pairs$right_id, ]

# 5. Verify balance
cat("\nBalance check (difference in means):\n")
cat("  Age:", round(mean(treated_matched$age) - mean(control_matched$age), 2), "\n")
cat("  BMI:", round(mean(treated_matched$bmi) - mean(control_matched$bmi), 2), "\n")
cat("  Smoker %:", round(100*(mean(treated_matched$smoker) - mean(control_matched$smoker)), 1), "\n")

## ----lap-solve-basic----------------------------------------------------------
# Cost matrix: 3 workers × 3 tasks
cost <- matrix(c(
  4, 2, 5,
  3, 3, 6,
  7, 5, 4
), nrow = 3, byrow = TRUE)

result <- lap_solve(cost)
print(result)

## ----forbidden----------------------------------------------------------------
cost_forbidden <- matrix(c(
  4, 2, NA,   # Row 1 cannot go to column 3
  Inf, 3, 6,  # Row 2 cannot go to column 1
  7, 5, 4
), nrow = 3, byrow = TRUE)

lap_solve(cost_forbidden)

## ----maximize-----------------------------------------------------------------
preferences <- matrix(c(
  8, 5, 3,
  4, 7, 6,
  2, 4, 9
), nrow = 3, byrow = TRUE)

lap_solve(preferences, maximize = TRUE)

## ----grouped-lap--------------------------------------------------------------
# Weekly nurse-shift scheduling: solve each day separately
schedule <- tibble(
  day = rep(c("Mon", "Tue", "Wed"), each = 9),
  nurse = rep(rep(1:3, each = 3), 3),
  shift = rep(1:3, 9),
  cost = c(4,2,5, 3,3,6, 7,5,4,   # Monday costs
           5,3,4, 2,4,5, 6,4,3,   # Tuesday costs
           3,4,5, 4,2,6, 5,5,4)   # Wednesday costs
)

# Solve all three days at once
schedule |>
  group_by(day) |>
  lap_solve(nurse, shift, cost)

## ----kbest--------------------------------------------------------------------
cost <- matrix(c(1, 2, 3, 4, 3, 2, 5, 4, 1), nrow = 3, byrow = TRUE)

kbest <- lap_solve_kbest(cost, k = 3)
print(kbest)