## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment  = "#>"
)
library(soilKey)

## ----mini-benchmark-----------------------------------------------------------
expected <- c(
  HS = "Histosols", AT = "Anthrosols", TC = "Technosols", CR = "Cryosols",
  LP = "Leptosols", SN = "Solonetz",   VR = "Vertisols", SC = "Solonchaks",
  GL = "Gleysols",  AN = "Andosols",   PZ = "Podzols",   PT = "Plinthosols",
  PL = "Planosols", ST = "Stagnosols", NT = "Nitisols",  FR = "Ferralsols",
  CH = "Chernozems", KS = "Kastanozems", PH = "Phaeozems", UM = "Umbrisols",
  DU = "Durisols",  GY = "Gypsisols", CL = "Calcisols", RT = "Retisols",
  AC = "Acrisols",  LX = "Lixisols",   AL = "Alisols",   LV = "Luvisols",
  CM = "Cambisols", AR = "Arenosols",  FL = "Fluvisols"
)

fixfns <- list(
  HS = make_histosol_canonical,  AT = make_anthrosol_canonical,
  TC = make_technosol_canonical, CR = make_cryosol_canonical,
  LP = make_leptosol_canonical,  SN = make_solonetz_canonical,
  VR = make_vertisol_canonical,  SC = make_solonchak_canonical,
  GL = make_gleysol_canonical,   AN = make_andosol_canonical,
  PZ = make_podzol_canonical,    PT = make_plinthosol_canonical,
  PL = make_planosol_canonical,  ST = make_stagnosol_canonical,
  NT = make_nitisol_canonical,   FR = make_ferralsol_canonical,
  CH = make_chernozem_canonical, KS = make_kastanozem_canonical,
  PH = make_phaeozem_canonical,  UM = make_umbrisol_canonical,
  DU = make_durisol_canonical,   GY = make_gypsisol_canonical,
  CL = make_calcisol_canonical,  RT = make_retisol_canonical,
  AC = make_acrisol_canonical,   LX = make_lixisol_canonical,
  AL = make_alisol_canonical,    LV = make_luvisol_canonical,
  CM = make_cambisol_canonical,  AR = make_arenosol_canonical,
  FL = make_fluvisol_canonical
)

bench <- do.call(rbind, lapply(names(expected), function(code) {
  fx <- fixfns[[code]]()
  res <- classify_wrb2022(fx, on_missing = "silent")
  data.frame(
    fixture  = code,
    target   = expected[[code]],
    assigned = res$rsg_or_order,
    name     = res$name,
    grade    = res$evidence_grade,
    match    = res$rsg_or_order == expected[[code]]
  )
}))

knitr::kable(bench[, c("fixture", "target", "assigned", "match", "grade")])

## ----mini-stats---------------------------------------------------------------
data.frame(
  metric = c("n_profiles", "top1_agreement",
             "indeterminate_rate", "evidence_grade_A",
             "evidence_grade_B"),
  value  = c(nrow(bench),
             mean(bench$match),
             mean(is.na(bench$assigned)),
             mean(bench$grade == "A"),
             mean(bench$grade == "B"))
)

## ----mini-confusion-----------------------------------------------------------
table(target = bench$target, assigned = bench$assigned)

## ----mini-qualifier-----------------------------------------------------------
qualifier_prefix <- vapply(names(expected), function(code) {
  fx <- fixfns[[code]]()
  res <- classify_wrb2022(fx, on_missing = "silent")
  if (length(res$qualifiers$principal) == 0) NA_character_
  else res$qualifiers$principal[1]
}, character(1))

knitr::kable(
  data.frame(fixture          = names(qualifier_prefix),
             principal_prefix = qualifier_prefix),
  caption = "Most-specific principal qualifier per canonical fixture."
)

## ----wosis-protocol, eval = FALSE---------------------------------------------
# library(soilKey)
# 
# # 1. Pull a snapshot of WoSIS profiles via the WoSIS API.
# profiles <- read_wosis_profiles(
#   url     = "https://wosis.isric.org/api/v3/profiles?format=json",
#   page_size = 500L
# )
# 
# # 2. Build a PedonRecord per profile.
# pedons <- lapply(profiles, build_pedon_from_wosis)
# 
# # 3. Classify each through the v0.9.3 key.
# classifications <- lapply(pedons, classify_wrb2022, on_missing = "silent")
# 
# # 4. Compare against the WoSIS-recorded RSG.
# bench <- mapply(function(c, p) {
#   data.frame(
#     profile_id  = p$site$id,
#     target_rsg  = p$site$wosis_rsg,
#     assigned    = c$rsg_or_order,
#     grade       = c$evidence_grade,
#     match       = c$rsg_or_order == p$site$wosis_rsg
#   )
# }, classifications, pedons, SIMPLIFY = FALSE)
# bench <- do.call(rbind, bench)
# 
# # 5. Headline numbers.
# list(
#   n              = nrow(bench),
#   top1           = mean(bench$match, na.rm = TRUE),
#   indeterminate  = mean(is.na(bench$assigned)),
#   pct_grade_A    = mean(bench$grade == "A"),
#   by_rsg         = table(bench$target_rsg, bench$assigned)
# )

## ----canonical-bench, eval = FALSE--------------------------------------------
# source(system.file("benchmarks", "run_wosis_benchmark.R",
#                     package = "soilKey"))
# bench <- run_canonical_benchmark()

## ----retry-demo, eval = FALSE-------------------------------------------------
# source(system.file("benchmarks", "run_wosis_benchmark.R",
#                     package = "soilKey"))
# profs <- read_wosis_profiles_graphql(
#   continent = "South America",
#   n_max     = 100L,
#   page_size = 10L,
#   verbose   = TRUE
# )
# length(profs)
# #> [1] 40   # 100 was requested but server timed out at offset = 40;
# #>          # the partial pull was returned cleanly.

## ----bundled, eval = FALSE----------------------------------------------------
# sample <- load_wosis_sample()
# length(sample$pedons)
# #> [1] 40
# 
# sample$pulled_on
# #> [1] "2026-05-03"
# 
# # Classify offline:
# classify_wrb2022(sample$pedons[[1]])$rsg_or_order