## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
	collapse = TRUE,
	comment = "#>",
	fig.width = 8,
	fig.height = 6
)
set.seed(123)
# Quiet down
lgr::get_logger("mlr3")$set_threshold("warn")
options("xplain.progress" = interactive())

## ----setup--------------------------------------------------------------------
library(xplainfi)
library(mlr3)
library(mlr3learners)
library(data.table)
library(ggplot2)

## ----setup-problem------------------------------------------------------------
task <- tgen("friedman1")$generate(n = 300)
learner <- lrn("regr.ranger", num.trees = 100)
measure <- msr("regr.mse")
resampling <- rsmp("cv", folds = 3)

## ----pfi-basic----------------------------------------------------------------
pfi <- PFI$new(
	task = task,
	learner = learner,
	measure = measure,
	resampling = resampling
)

pfi$compute()
pfi$importance()

## ----pfi-parameters-----------------------------------------------------------
pfi_stable <- PFI$new(
	task = task,
	learner = learner,
	measure = measure,
	resampling = resampling,
	n_repeats = 50
)

pfi_stable$compute()
pfi_stable$importance()

## ----pfi-ratio----------------------------------------------------------------
pfi_stable$importance(relation = "ratio")

## ----loco-basic---------------------------------------------------------------
loco <- LOCO$new(
	task = task,
	learner = learner,
	measure = measure,
	resampling = resampling
)

loco$compute()
loco$importance()

## ----samplers-demo------------------------------------------------------------
arf_sampler <- ConditionalARFSampler$new(task)

sample_data <- task$data(rows = 1:5)
sample_data[, .(important1, important2)]

## ----conditional-sampling-----------------------------------------------------
sampled_conditional <- arf_sampler$sample_newdata(
	feature = "important1",
	newdata = sample_data,
	conditioning_set = c("important2", "important3")
)

sample_data[, .(important1, important2, important3)]
sampled_conditional[, .(important1, important2, important3)]

## ----detailed-scores----------------------------------------------------------
pfi$scores() |>
	head(10) |>
	knitr::kable(digits = 4, caption = "Detailed PFI scores (first 10 rows)")

## ----scoring-summary----------------------------------------------------------
pfi$scores()[, .(
	features = uniqueN(feature),
	resampling_folds = uniqueN(iter_rsmp),
	permutation_iters = uniqueN(iter_repeat),
	total_scores = .N
)]

## ----detailed-scores-ratio----------------------------------------------------
pfi$scores(relation = "ratio") |>
	head(10) |>
	knitr::kable(digits = 4, caption = "PFI scores using the ratio (first 10 rows)")

## ----pfi-obs-scores-----------------------------------------------------------
pfi$obs_loss()

## ----parallel-future, eval = FALSE--------------------------------------------
# library(future)
# plan("multisession", workers = 2)
# 
# # PFI with parallelization across features
# pfi_parallel = PFI$new(
# 	task,
# 	learner = lrn("regr.ranger"),
# 	measure = msr("regr.mse"),
# 	n_repeats = 10
# )
# pfi_parallel$compute()
# pfi_parallel$importance()
# 
# # LOCO with parallelization (uses mlr3fselect internally)
# loco_parallel = LOCO$new(
# 	task,
# 	learner = lrn("regr.ranger"),
# 	measure = msr("regr.mse")
# )
# loco_parallel$compute()
# loco_parallel$importance()

## ----parallel-mirai, eval = FALSE---------------------------------------------
# library(mirai)
# daemons(n = 2)
# 
# # Same PFI/LOCO code works with mirai backend
# pfi_parallel = PFI$new(
# 	task,
# 	learner = lrn("regr.ranger"),
# 	measure = msr("regr.mse"),
# 	n_repeats = 10
# )
# pfi_parallel$compute()
# pfi_parallel$importance()
# 
# # Clean up daemons when done
# daemons(0)