## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## -----------------------------------------------------------------------------
library(predictset)

set.seed(42)
n <- 500
x <- matrix(rnorm(n * 5), ncol = 5)
y <- x[, 1] * 2 + x[, 2] + rnorm(n)
x_new <- matrix(rnorm(100 * 5), ncol = 5)

result <- conformal_split(x, y, model = y ~ ., x_new = x_new, alpha = 0.10)
print(result)

## -----------------------------------------------------------------------------
result <- conformal_split(x, y, model = y ~ ., x_new = x_new)

## -----------------------------------------------------------------------------
fit <- lm(y ~ ., data = data.frame(y = y, x))
result <- conformal_split(x, y, model = fit, x_new = x_new)

## ----eval = FALSE-------------------------------------------------------------
# rf <- make_model(
#   train_fun = function(x, y) {
#     ranger::ranger(y ~ ., data = data.frame(y = y, x))
#   },
#   predict_fun = function(object, x_new) {
#     predict(object, data = as.data.frame(x_new))$predictions
#   },
#   type = "regression"
# )
# 
# result <- conformal_split(x, y, model = rf, x_new = x_new)

## ----eval = FALSE-------------------------------------------------------------
# set.seed(42)
# n <- 600
# x <- matrix(rnorm(n * 4), ncol = 4)
# y <- factor(ifelse(x[, 1] > 0.5, "A", ifelse(x[, 2] > 0, "B", "C")))
# x_new <- matrix(rnorm(100 * 4), ncol = 4)
# 
# clf <- make_model(
#   train_fun = function(x, y) {
#     ranger::ranger(y ~ ., data = data.frame(y = y, x), probability = TRUE)
#   },
#   predict_fun = function(object, x_new) {
#     predict(object, data = as.data.frame(x_new))$predictions
#   },
#   type = "classification"
# )
# 
# result <- conformal_aps(x, y, model = clf, x_new = x_new, alpha = 0.10)
# print(result)
# 
# # Most predictions are a single class; ambiguous ones include 2-3
# table(set_size(result))

## -----------------------------------------------------------------------------
set.seed(42)
n <- 600
x <- matrix(rnorm(n * 3), ncol = 3)
groups <- factor(ifelse(x[, 1] > 0, "high", "low"))
y <- x[, 1] * 2 + ifelse(groups == "high", 3, 0.5) * rnorm(n)
x_new <- matrix(rnorm(200 * 3), ncol = 3)
groups_new <- factor(ifelse(x_new[, 1] > 0, "high", "low"))

result <- conformal_mondrian(x, y, model = y ~ ., x_new = x_new,
                              groups = groups, groups_new = groups_new)

# Check per-group coverage
y_new <- x_new[, 1] * 2 + ifelse(groups_new == "high", 3, 0.5) * rnorm(200)
coverage_by_group(result, y_new, groups_new)

## ----eval = FALSE-------------------------------------------------------------
# # Empirical coverage (should be close to 1 - alpha)
# coverage(result, y_test)
# 
# # Average interval width (regression) - narrower is better
# mean(interval_width(result))
# 
# # Prediction set sizes (classification) - smaller is better
# table(set_size(result))
# 
# # Coverage within subgroups (fairness check)
# coverage_by_group(result, y_test, groups = groups_test)
# 
# # Coverage by prediction quantile bin
# coverage_by_bin(result, y_test, bins = 5)
# 
# # Conformal p-values for outlier detection
# pvals <- conformal_pvalue(result$scores, new_scores)

## -----------------------------------------------------------------------------
set.seed(42)
n <- 500
x <- matrix(rnorm(n * 3), ncol = 3)
y <- x[, 1] * 2 + rnorm(n)
x_new <- matrix(rnorm(200 * 3), ncol = 3)
y_new <- x_new[, 1] * 2 + rnorm(200)

comp <- conformal_compare(x, y, model = y ~ ., x_new = x_new, y_new = y_new,
                           methods = c("split", "cv", "jackknife"))
print(comp)