litedown::reactor( print=NA, collapse = TRUE, comment = "#>", fig.width=10, fig.height=3) data.table::setDTthreads(1) library(data.table) N <- 2400 abs.x <- 3*pi set.seed(2) grid.dt <- data.table( x=seq(-abs.x,abs.x, l=201), y=0) x.vec <- runif(N, -abs.x, abs.x) standard.deviation.vec <- c( easy=0.1, hard=1.7) reg.data.list <- list() grid.signal.dt.list <- list() sim_fun <- sin for(difficulty in names(standard.deviation.vec)){ standard.deviation <- standard.deviation.vec[[difficulty]] signal.vec <- sim_fun(x.vec) y <- signal.vec+rnorm(N,sd=standard.deviation) task.dt <- data.table(x=x.vec, y) reg.data.list[[difficulty]] <- data.table(difficulty, task.dt) grid.signal.dt.list[[difficulty]] <- data.table( difficulty, algorithm="ideal", x=grid.dt$x, y=sim_fun(grid.dt$x)) } reg.data <- rbindlist(reg.data.list) grid.signal.dt <- rbindlist(grid.signal.dt.list) algo.colors <- c( featureless="blue", rpart="red", ideal="black") if(require(ggplot2)){ my_theme <- theme_bw(15) ggplot()+ my_theme+ theme(panel.spacing=grid::unit(1, "cm"))+ geom_point(aes( x, y), fill="white", color="grey", data=reg.data)+ geom_line(aes( x, y, color=algorithm), linewidth=2, data=grid.signal.dt)+ scale_color_manual(values=algo.colors)+ facet_grid(. ~ difficulty, labeller=label_both) } SOAKED <- mlr3resampling::ResamplingSameOtherSizesCV$new() SOAKED$param_set$values$sizes <- 0 SOAKED$param_set$values$folds <- 10 set.seed(1) sim.meta.list <- list( different=rbind( reg.data[difficulty=="easy"][sample(.N, 400)], reg.data[difficulty=="hard"][sample(.N, 200)] )[, .(x,y,Subset=ifelse(difficulty=="easy", "large", "small"))], iid_easy=reg.data[ difficulty=="easy" ][sample(.N, 120)][ , Subset := rep(c("large","large","small"), l=.N) ][, .(x,y,Subset)]) d_task_list <- list() gg_list <- list() for(sim.name in names(sim.meta.list)){ sim.i.dt <- sim.meta.list[[sim.name]] sub_task <- mlr3::TaskRegr$new( sim.name, sim.i.dt, target="y") sub_task$col_roles$subset <- "Subset" sub_task$col_roles$feature <- "x" d_task_list[[sim.name]] <- sub_task if(require("ggplot2")){ gg_list[[sim.name]] <- ggplot()+ my_theme+ ggtitle(paste("Task:", sim.name))+ geom_point(aes( x, y), shape=21, color="black", fill="white", data=sim.i.dt)+ geom_line(aes( x, y, color=algorithm), data=grid.signal.dt)+ scale_color_manual(values=algo.colors)+ facet_grid(Subset~., labeller=label_both) } } gg_list reg.learner.list <- list( if(requireNamespace("rpart"))mlr3::LearnerRegrRpart$new(), mlr3::LearnerRegrFeatureless$new()) (reg.bench.grid <- mlr3::benchmark_grid( d_task_list, reg.learner.list, SOAKED)) if(require(future))plan("multisession") if(require(lgr))get_logger("mlr3")$set_threshold("warn") (reg.bench.result <- mlr3::benchmark(reg.bench.grid)) score_dt <- mlr3resampling::score( reg.bench.result, mlr3::msr("regr.rmse")) plot(score_dt)+my_theme plist <- mlr3resampling::pvalue(score_dt) plot(plist)+my_theme dlist <- mlr3resampling::pvalue_downsample(score_dt[ algorithm=="rpart" & task_id=="iid_easy" & test.subset=="large"]) plot(dlist)+my_theme dlist <- mlr3resampling::pvalue_downsample(score_dt[ algorithm=="rpart" & task_id=="iid_easy" & test.subset=="small"]) plot(dlist)+my_theme dlist <- mlr3resampling::pvalue_downsample(score_dt[ algorithm=="rpart" & task_id=="different" & test.subset=="large"]) plot(dlist)+my_theme dlist <- mlr3resampling::pvalue_downsample(score_dt[ algorithm=="rpart" & task_id=="different" & test.subset=="small"]) plot(dlist)+my_theme if(require(future))plan("sequential")