## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(etree) ## ----------------------------------------------------------------------------- # Set seed set.seed(123) # Number of observation n_obs <- 100 # Response variable for regression resp_reg <- c(rnorm(n_obs / 4, mean = 0, sd = 1), rnorm(n_obs / 4, mean = 2, sd = 1), rnorm(n_obs / 4, mean = 4, sd = 1), rnorm(n_obs / 4, mean = 6, sd = 1)) ## ----------------------------------------------------------------------------- # Response variable for classification resp_cls <- factor(c(sample(x = 1:4, size = n_obs / 4, replace = TRUE, prob = c(0.85, 0.05, 0.05, 0.05)), sample(x = 1:4, size = n_obs / 4, replace = TRUE, prob = c(0.05, 0.85, 0.05, 0.05)), sample(x = 1:4, size = n_obs / 4, replace = TRUE, prob = c(0.05, 0.05, 0.85, 0.05)), sample(x = 1:4, size = n_obs / 4, replace = TRUE, prob = c(0.05, 0.05, 0.05, 0.85)))) ## ----------------------------------------------------------------------------- # Numeric x1 <- c(runif(n_obs / 4, min = 0, max = 0.55), runif(n_obs / 4 * 3, min = 0.45, max = 1)) # Nominal x2 <- factor(c(rbinom(n_obs / 4, 1, 0.1), rbinom(n_obs / 4, 1, 0.9), rbinom(n_obs / 2, 1, 0.1))) # Functions x3 <- c(fda.usc::rproc2fdata(n_obs / 2, seq(0, 1, len = 100), sigma = 1), fda.usc::rproc2fdata(n_obs / 4, seq(0, 1, len = 100), sigma = 1, mu = rep(1, 100)), fda.usc::rproc2fdata(n_obs / 4, seq(0, 1, len = 100), sigma = 1)) # Graphs x4 <- c(lapply(1:(n_obs / 4 * 3), function(j) igraph::sample_gnp(100, 0.1)), lapply(1:(n_obs / 4), function(j) igraph::sample_gnp(100, 0.9))) # Covariates list cov_list <- list(X1 = x1, X2 = x2, X3 = x3, X4 = x4) ## ----------------------------------------------------------------------------- # Quick fit ef_fit <- eforest(response = resp_reg, covariates = cov_list, ntrees = 12, perf_metric = 'MSE') ## ---- fig.dim = c(25, 11)----------------------------------------------------- # Plot of a tree from the ensemble plot(ef_fit$ensemble[[4]]) ## ----------------------------------------------------------------------------- # Retrieve performance metric ef_fit$perf_metric # OOB MSE for this ensemble ef_fit$oob_score ## ----------------------------------------------------------------------------- # Predictions from the fitted object pred <- predict(ef_fit) print(pred) ## ----------------------------------------------------------------------------- # New set of covariates n_obs <- 40 x1n <- c(runif(n_obs / 4, min = 0, max = 0.55), runif(n_obs / 4 * 3, min = 0.45, max = 1)) x2n <- factor(c(rbinom(n_obs / 4, 1, 0.1), rbinom(n_obs / 4, 1, 0.9), rbinom(n_obs / 2, 1, 0.1))) x3n <- c(fda.usc::rproc2fdata(n_obs / 2, seq(0, 1, len = 100), sigma = 1), fda.usc::rproc2fdata(n_obs / 4, seq(0, 1, len = 100), sigma = 1, mu = rep(1, 100)), fda.usc::rproc2fdata(n_obs / 4, seq(0, 1, len = 100), sigma = 1)) x4n <- c(lapply(1:(n_obs / 4 * 3), function(j) igraph::sample_gnp(100, 0.1)), lapply(1:(n_obs / 4), function(j) igraph::sample_gnp(100, 0.9))) new_cov_list <- list(X1 = x1n, X2 = x2n, X3 = x3n, X4 = x4n) # New response new_resp_reg <- c(rnorm(n_obs / 4, mean = 0, sd = 1), rnorm(n_obs / 4, mean = 2, sd = 1), rnorm(n_obs / 4, mean = 4, sd = 1), rnorm(n_obs / 4, mean = 6, sd = 1)) # Predictions new_pred <- predict(ef_fit, newdata = new_cov_list) print(new_pred) ## ----------------------------------------------------------------------------- # MSE between the new response and its average mean((new_resp_reg - mean(new_resp_reg)) ^ 2) # MSE between the new response and predictions with the new set of covariates mean((new_resp_reg - new_pred) ^ 2) ## ---- fig.dim = c(7, 6)------------------------------------------------------- # Quick fit ef_fit <- eforest(response = resp_cls, covariates = cov_list, ntrees = 12, split_type = 'coeff') ## ----------------------------------------------------------------------------- # Predictions from the fitted object pred <- predict(ef_fit) print(pred) ## ----------------------------------------------------------------------------- # New response new_resp_cls <- factor(c(sample(x = 1:4, size = n_obs / 4, replace = TRUE, prob = c(0.85, 0.05, 0.05, 0.05)), sample(x = 1:4, size = n_obs / 4, replace = TRUE, prob = c(0.05, 0.85, 0.05, 0.05)), sample(x = 1:4, size = n_obs / 4, replace = TRUE, prob = c(0.05, 0.05, 0.85, 0.05)), sample(x = 1:4, size = n_obs / 4, replace = TRUE, prob = c(0.05, 0.05, 0.05, 0.85)))) # Predictions new_pred <- predict(ef_fit, newdata = new_cov_list) print(new_pred) # Confusion matrix between the new response and predictions from the fitted tree table(new_pred, new_resp_cls, dnn = c('Predicted', 'True')) # Misclassification error for predictions on the new set of covariates sum(new_pred != new_resp_cls) / length(new_resp_cls)