## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 8, fig.height = 6 ) set.seed(123) # Quiet down lgr::get_logger("mlr3")$set_threshold("warn") options("xplain.progress" = interactive()) ## ----setup-------------------------------------------------------------------- library(xplainfi) library(mlr3) library(mlr3learners) library(data.table) library(ggplot2) ## ----setup-problem------------------------------------------------------------ task <- tgen("friedman1")$generate(n = 300) learner <- lrn("regr.ranger", num.trees = 100) measure <- msr("regr.mse") resampling <- rsmp("cv", folds = 3) ## ----pfi-basic---------------------------------------------------------------- pfi <- PFI$new( task = task, learner = learner, measure = measure, resampling = resampling ) pfi$compute() pfi$importance() ## ----pfi-parameters----------------------------------------------------------- pfi_stable <- PFI$new( task = task, learner = learner, measure = measure, resampling = resampling, n_repeats = 50 ) pfi_stable$compute() pfi_stable$importance() ## ----pfi-ratio---------------------------------------------------------------- pfi_stable$importance(relation = "ratio") ## ----loco-basic--------------------------------------------------------------- loco <- LOCO$new( task = task, learner = learner, measure = measure, resampling = resampling ) loco$compute() loco$importance() ## ----samplers-demo------------------------------------------------------------ arf_sampler <- ConditionalARFSampler$new(task) sample_data <- task$data(rows = 1:5) sample_data[, .(important1, important2)] ## ----conditional-sampling----------------------------------------------------- sampled_conditional <- arf_sampler$sample_newdata( feature = "important1", newdata = sample_data, conditioning_set = c("important2", "important3") ) sample_data[, .(important1, important2, important3)] sampled_conditional[, .(important1, important2, important3)] ## ----detailed-scores---------------------------------------------------------- pfi$scores() |> head(10) |> knitr::kable(digits = 4, caption = "Detailed PFI scores (first 10 rows)") ## ----scoring-summary---------------------------------------------------------- pfi$scores()[, .( features = uniqueN(feature), resampling_folds = uniqueN(iter_rsmp), permutation_iters = uniqueN(iter_repeat), total_scores = .N )] ## ----detailed-scores-ratio---------------------------------------------------- pfi$scores(relation = "ratio") |> head(10) |> knitr::kable(digits = 4, caption = "PFI scores using the ratio (first 10 rows)") ## ----pfi-obs-scores----------------------------------------------------------- pfi$obs_loss() ## ----parallel-future, eval = FALSE-------------------------------------------- # library(future) # plan("multisession", workers = 2) # # # PFI with parallelization across features # pfi_parallel = PFI$new( # task, # learner = lrn("regr.ranger"), # measure = msr("regr.mse"), # n_repeats = 10 # ) # pfi_parallel$compute() # pfi_parallel$importance() # # # LOCO with parallelization (uses mlr3fselect internally) # loco_parallel = LOCO$new( # task, # learner = lrn("regr.ranger"), # measure = msr("regr.mse") # ) # loco_parallel$compute() # loco_parallel$importance() ## ----parallel-mirai, eval = FALSE--------------------------------------------- # library(mirai) # daemons(n = 2) # # # Same PFI/LOCO code works with mirai backend # pfi_parallel = PFI$new( # task, # learner = lrn("regr.ranger"), # measure = msr("regr.mse"), # n_repeats = 10 # ) # pfi_parallel$compute() # pfi_parallel$importance() # # # Clean up daemons when done # daemons(0)