## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----------------------------------------------------------------------------- library(featdelta) raw_cars <- mtcars raw_cars$car_id <- seq_len(nrow(raw_cars)) raw_cars <- raw_cars[, c("car_id", "mpg", "cyl", "disp", "hp", "wt", "am")] head(raw_cars) ## ----------------------------------------------------------------------------- defs_basic <- fd_define( transmission = ifelse(am == 1, "automatic", "manual"), hp_per_cyl = hp / cyl, wt_per_hp = wt / hp ) defs_basic ## ----------------------------------------------------------------------------- features_basic <- fd_compute( data = raw_cars, defs = defs_basic, key = "car_id" ) head(features_basic) ## ----------------------------------------------------------------------------- defs_ordered <- fd_define( hp_per_cyl = hp / cyl, strong_engine = hp_per_cyl > 30, engine_label = ifelse(strong_engine, "strong", "regular") ) features_ordered <- fd_compute( data = raw_cars, defs = defs_ordered, key = "car_id" ) head(features_ordered) ## ----------------------------------------------------------------------------- log_hp_expr <- expression(log(hp)) heavy_car_expr <- expression(wt > 3.5) defs_programmatic <- fd_define( log_hp = log_hp_expr, heavy_car = heavy_car_expr ) features_programmatic <- fd_compute( data = raw_cars, defs = defs_programmatic, key = "car_id" ) head(features_programmatic) ## ----------------------------------------------------------------------------- defs_block <- fd_define( engine_ratios = fd_block({ data.frame( hp_per_cyl = hp / cyl, disp_per_cyl = disp / cyl, wt_per_hp = wt / hp ) }) ) features_block <- fd_compute( data = raw_cars, defs = defs_block, key = "car_id" ) head(features_block) ## ----------------------------------------------------------------------------- defs_script_block <- fd_define( engine_script = fd_block({ hp_per_cyl <- hp / cyl disp_per_cyl <- disp / cyl ratio_average <- (hp_per_cyl + disp_per_cyl) / 2 high_ratio <- ratio_average > stats::median(ratio_average, na.rm = TRUE) data.frame( hp_per_cyl = hp_per_cyl, disp_per_cyl = disp_per_cyl, engine_ratio_average = ratio_average, high_engine_ratio = high_ratio ) }) ) features_script_block <- fd_compute( data = raw_cars, defs = defs_script_block, key = "car_id" ) head(features_script_block) ## ----------------------------------------------------------------------------- make_engine_features <- function(data) { hp_per_cyl <- data$hp / data$cyl disp_per_cyl <- data$disp / data$cyl data.frame( hp_per_cyl = hp_per_cyl, disp_per_cyl = disp_per_cyl, engine_index = hp_per_cyl + disp_per_cyl ) } defs_function_block <- fd_define( engine_features = fd_block(make_engine_features) ) features_function_block <- fd_compute( data = raw_cars, defs = defs_function_block, key = "car_id" ) head(features_function_block) ## ----------------------------------------------------------------------------- make_scaled_features <- function(data) { vars <- c("hp", "disp", "wt") out <- list() for (var in vars) { center <- mean(data[[var]], na.rm = TRUE) spread <- stats::sd(data[[var]], na.rm = TRUE) out[[paste0(var, "_scaled")]] <- (data[[var]] - center) / spread } as.data.frame(out) } defs_loop_block <- fd_define( scaled_inputs = fd_block(make_scaled_features) ) features_loop_block <- fd_compute( data = raw_cars, defs = defs_loop_block, key = "car_id" ) head(features_loop_block) ## ----------------------------------------------------------------------------- defs_combined <- fd_define( transmission = ifelse(am == 1, "automatic", "manual"), engine_features = fd_block(make_engine_features), scaled_inputs = fd_block(make_scaled_features), engine_per_weight = engine_index / wt ) features_combined <- fd_compute( data = raw_cars, defs = defs_combined, key = "car_id" ) head(features_combined) ## ----------------------------------------------------------------------------- defs_expected <- fd_define( optional_engine_flags = fd_block( { data.frame( high_hp = hp > 150 ) }, expected_names = c("high_hp", "high_disp") ) ) features_expected <- fd_compute( data = raw_cars, defs = defs_expected, key = "car_id" ) head(features_expected) ## ----eval = FALSE------------------------------------------------------------- # # Local computation while developing feature logic # fd_compute(raw_data, defs, key = "id") # # # Full database pipeline once the definitions are ready # fd_run(con, sql, defs, key = "id", feat_table_name = "feature_table")