## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval = FALSE
)

## ----extract-data-------------------------------------------------------------
# library(vald.extractor)
# 
# # Set credentials
# valdr::set_credentials(
#   client_id     = "your_client_id",
#   client_secret = "your_client_secret",
#   tenant_id     = "your_tenant_id",
#   region        = "aue"
# )
# 
# # Fetch data from 2020 onwards in chunks of 100 tests
# vald_data <- fetch_vald_batch(
#   start_date = "2020-01-01T00:00:00Z",
#   chunk_size = 100,
#   verbose = TRUE
# )
# 
# # Extract components
# tests_df <- vald_data$tests
# trials_df <- vald_data$trials
# 
# cat("Extracted", nrow(tests_df), "tests and", nrow(trials_df), "trials\n")

## ----fetch-metadata-----------------------------------------------------------
# # Fetch raw metadata
# metadata <- fetch_vald_metadata(
#   client_id     = "your_client_id",
#   client_secret = "your_client_secret",
#   tenant_id     = "your_tenant_id",
#   region        = "aue"
# )
# 
# # Standardize: unnest group memberships and create unified athlete records
# athlete_metadata <- standardize_vald_metadata(
#   profiles = metadata$profiles,
#   groups   = metadata$groups
# )
# 
# head(athlete_metadata)

## ----classify-sports----------------------------------------------------------
# athlete_metadata <- classify_sports(
#   data = athlete_metadata,
#   group_col = "all_group_names",
#   output_col = "sports_clean"
# )
# 
# # Inspect the mapping
# table(athlete_metadata$sports_clean)

## ----transform-wide-----------------------------------------------------------
# library(dplyr)
# 
# # Join trials and tests
# all_data <- left_join(trials_df, tests_df, by = c("testId", "athleteId"))
# 
# # Aggregate trials and pivot to wide format
# structured_test_data <- all_data %>%
#   group_by(athleteId, testId, testType, recordedUTC,
#            recordedDateOffset, trialLimb, definition_name) %>%
#   summarise(
#     mean_result = mean(as.numeric(value), na.rm = TRUE),
#     mean_weight = mean(as.numeric(weight), na.rm = TRUE),
#     .groups = "drop"
#   ) %>%
#   mutate(
#     TestTimestampUTC = lubridate::ymd_hms(recordedUTC),
#     TestTimestampLocal = TestTimestampUTC + lubridate::minutes(recordedDateOffset),
#     Testdate = as.Date(TestTimestampLocal)
#   ) %>%
#   select(athleteId, Testdate, testId, testType, trialLimb,
#          definition_name, mean_result, mean_weight) %>%
#   tidyr::pivot_wider(
#     id_cols = c(athleteId, Testdate, testId, mean_weight),
#     names_from = c(definition_name, trialLimb, testType),
#     values_from = mean_result,
#     names_glue = "{definition_name}_{trialLimb}_{testType}"
#   ) %>%
#   rename(Weight_on_Test_Day = mean_weight)
# 
# # Join with metadata
# final_analysis_data <- structured_test_data %>%
#   mutate(profileId = as.character(athleteId)) %>%
#   left_join(
#     athlete_metadata %>% mutate(profileId = as.character(profileId)),
#     by = "profileId"
#   ) %>%
#   mutate(
#     Testdate = as.Date(Testdate),
#     dateofbirth = as.Date(dateOfBirth),
#     age = as.numeric((Testdate - dateofbirth) / 365.25),
#     sports = sports_clean
#   )
# 
# cat("Final dataset:", nrow(final_analysis_data), "rows with",
#     ncol(final_analysis_data), "columns\n")

## ----split-tests--------------------------------------------------------------
# # Split into separate datasets per test type
# test_datasets <- split_by_test(
#   data = final_analysis_data,
#   metadata_cols = c("profileId", "sex", "Testdate", "dateofbirth",
#                     "age", "testId", "Weight_on_Test_Day", "sports")
# )
# 
# # Access individual test types
# cmj_data <- test_datasets$CMJ
# dj_data <- test_datasets$DJ
# 
# # Crucially: column names are now generic
# head(names(cmj_data))
# # "profileId", "sex", "Testdate", "PEAK_FORCE_Both", "JUMP_HEIGHT_Both", ...
# # Note: "_CMJ" suffix has been removed!

## ----generic-analysis---------------------------------------------------------
# analyze_peak_force <- function(test_data) {
#   summary(test_data$PEAK_FORCE_Both)  # Works for CMJ, DJ, ISO, etc.
# }
# 
# # Apply to all test types
# lapply(test_datasets, analyze_peak_force)

## ----patch-metadata-----------------------------------------------------------
# # Create an Excel file with: profileId, sex, dateOfBirth
# # Example: corrections.xlsx with rows like:
# #   profileId         sex       dateOfBirth
# #   abc123           Male      1995-03-15
# #   def456           Female    1998-07-22
# 
# cmj_data <- patch_metadata(
#   data = cmj_data,
#   patch_file = "corrections.xlsx",
#   patch_sheet = 1,
#   id_col = "profileId",
#   fields_to_patch = c("sex", "dateOfBirth")
# )
# 
# # Verify corrections
# table(cmj_data$sex)  # "Unknown" values should now be fixed

## ----summary-stats------------------------------------------------------------
# cmj_summary <- summary_vald_metrics(
#   data = cmj_data,
#   group_vars = c("sex", "sports"),
#   exclude_cols = c("profileId", "testId", "Testdate", "dateofbirth", "age")
# )
# 
# # View summary
# print(cmj_summary)
# 
# # Export to CSV
# write.csv(cmj_summary, "cmj_summary_by_sport_sex.csv", row.names = FALSE)

## ----plot-trends--------------------------------------------------------------
# library(ggplot2)
# 
# # Plot CMJ peak force trends by athlete
# plot_vald_trends(
#   data = cmj_data,
#   date_col = "Testdate",
#   metric_col = "PEAK_FORCE_Both",
#   group_col = "profileId",
#   facet_col = "sex",
#   title = "CMJ Peak Force Trends by Athlete",
#   smooth = TRUE
# )
# 
# # Plot sport-level averages over time
# sport_trends <- cmj_data %>%
#   group_by(Testdate, sports) %>%
#   summarise(avg_force = mean(PEAK_FORCE_Both, na.rm = TRUE), .groups = "drop")
# 
# plot_vald_trends(
#   data = sport_trends,
#   date_col = "Testdate",
#   metric_col = "avg_force",
#   group_col = "sports",
#   title = "Average CMJ Peak Force by Sport Over Time"
# )

## ----plot-compare-------------------------------------------------------------
# plot_vald_compare(
#   data = cmj_data,
#   metric_col = "PEAK_FORCE_Both",
#   group_col = "sports",
#   fill_col = "sex",
#   title = "CMJ Peak Force Comparison by Sport and Sex"
# )
# 
# # Compare jump height
# plot_vald_compare(
#   data = cmj_data,
#   metric_col = "JUMP_HEIGHT_Both",
#   group_col = "sports",
#   fill_col = "sex",
#   title = "CMJ Jump Height Comparison"
# )

## ----multi-test---------------------------------------------------------------
# # Define a function to extract a common metric across test types
# compare_metric_across_tests <- function(test_datasets, metric = "PEAK_FORCE_Both") {
# 
#   results <- lapply(names(test_datasets), function(test_name) {
#     test_data <- test_datasets[[test_name]]
# 
#     if (metric %in% names(test_data)) {
#       data.frame(
#         testType = test_name,
#         metric = metric,
#         mean = mean(test_data[[metric]], na.rm = TRUE),
#         sd = sd(test_data[[metric]], na.rm = TRUE),
#         n = sum(!is.na(test_data[[metric]]))
#       )
#     }
#   })
# 
#   do.call(rbind, results)
# }
# 
# # Compare peak force across CMJ, DJ, and ISO
# force_comparison <- compare_metric_across_tests(test_datasets, "PEAK_FORCE_Both")
# print(force_comparison)

## ----scheduled-updates, eval=FALSE--------------------------------------------
# # Weekly refresh script
# library(vald.extractor)
# 
# # Fetch only new data since last update
# last_update <- "2024-01-01T00:00:00Z"
# 
# new_data <- fetch_vald_batch(
#   start_date = last_update,
#   chunk_size = 100
# )
# 
# # Append to existing database
# load("vald_database.RData")
# updated_tests <- rbind(existing_tests, new_data$tests)
# updated_trials <- rbind(existing_trials, new_data$trials)
# 
# save(updated_tests, updated_trials, file = "vald_database.RData")

## ----error-logging------------------------------------------------------------
# # Errors are printed to console with chunk information:
# # "ERROR on chunk 23 (rows 2201-2300): API timeout"
# # "Continuing to next chunk..."
# 
# # This ensures partial data extraction even if some chunks fail

## ----taxonomy-config----------------------------------------------------------
# # sports_taxonomy.R
# sports_patterns <- list(
#   Football = "Football|FSI|TCFC|MCFC|Soccer",
#   Basketball = "Basketball|BBall",
#   Cricket = "Cricket",
#   # ... add your organization's patterns
# )
# 
# # Then use in classify_sports()