--- title: "Multi-View Symbolic Regression with leaf" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Multi-View Symbolic Regression with leaf} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ``` r # Install the Python backend (only needs to be done once). leaf::install_leaf() ``` ``` r # Load package library(leaf) if (!backend_available()) { message("Install backend with leaf::install_leaf()") } ``` ``` r set.seed(42) # Group sizes n_a <- 50L n_b <- 30L # Group A x1_a <- runif(n_a, min = 0, max = 2) x2_a <- runif(n_a, min = 0, max = 3) y_a <- x1_a^2 + rnorm(n_a, mean = 0, sd = 1e-4) # Group B x1_b <- runif(n_b, min = 0, max = 5) x2_b <- runif(n_b, min = 0, max = 1) y_b <- 2 * x1_b^2 - 3 * x2_b + rnorm(n_b, mean = 0, sd = 1e-4) # Combine data train_data <- data.frame( x1 = c(x1_a, x1_b), x2 = c(x2_a, x2_b), y = c(y_a, y_b), group = rep(c("A", "B"), c(n_a, n_b)) ) head(train_data) #> x1 x2 y group #> 1 1.8296121 1.0002816 3.3475126 A #> 2 1.8741508 1.0402447 3.5123629 A #> 3 0.5722791 1.1954562 0.3276609 A #> 4 1.6608953 2.3540783 2.7586373 A #> 5 1.2834910 0.1168095 1.6473582 A #> 6 1.0381919 2.2463862 1.0778701 A ``` ``` r # Initialize the symbolic regressor regressor = leaf::SymbolicRegressor$new( engine = 'rsrm', loss = 'MSE', max_params = 3L, num_iterations=3L, threshold = 1e-10, base = list(verbose = FALSE), mcts = list(times = 8L), gp = list(times = 8L) ) ``` ``` r # Stage 1: Discover equation skeletons search_results = regressor$search_equations( data = train_data, formula = "y ~ f(x1, x2 | group)" ) #> 1. Processing data for equation search based on formula... #> 2. Running engine 'rsrm' over 1 folds using up to 1 processes... #> -- FINAL RESULTS -- #> Episode: 3/3 #> time: 20.96s #> loss: 7.798843366570674e-09 #> form: X2+F #> HOF: #> equation complexity loss #> 0 0 0 999999999999999967336168804116691273849533185806555472917961779471295845921727862608739868455469056.00 #> 1 1.7959 1 208.56 #> 2 1.1097*X1 2 42.00 #> 3 0.4656*X1**2 3 1.31 #> 4 0.5621*X1**2 - 0.3723 4 0.70 #> 5 0.4656*X1**2 + 0.0000*X2 5 0.00 #> 6 0.4656*X1**2 + 0.0000*X2 - 0.0000 6 0.00 #> 7 0.4656*X1**2 + 0.0000*X2 - 0.0000/X1 8 0.00 #> 8 0.4656*X1**2 + 0.0000*X2 - 0.0000/X1**2 9 0.00 #> 9 0.4656*X1**2 + 0.0000*X2 - 0.0000/X1**2 12 0.00 #> --- #> task:dataset_aa19bbdd-5194-4694-a36e-e06f64ac155a expr:X1**2+X2+(-0.534387536908588*X1**2 + -0.9999837943962612*X2 + -2.0685022306810226e-08/X1**2) Loss_MSE:0.00 Test 0/1. #> final result: #> success rate : 0% #> average discovery time is 20.971 seconds #> Number of equations looked at (per test) [Total, Timed out, Successful]: [[1270, 0, 1270]] #> 3. Found 10 raw skeletons. Deduplicating... print("=== Search results ===") #> [1] "=== Search results ===" print(search_results) #> Equation Complexity #> 0 u1 1 #> 1 u1⋅x1 2 #> 2 u1⋅x1^2 3 #> 3 u1⋅x1^2 + -1⋅u2 4 #> 4 u1⋅x1^2 + u2⋅x2 5 #> 5 u1⋅x1^2 + u2⋅x2 + -1⋅u3 6 #> 6 u1⋅x1^2 + u2⋅x2 + -1⋅u3⋅x1^-1 8 #> 7 u1⋅x1^2 + u2⋅x2 + -1⋅u3⋅x1^2^-1 9 ``` ``` r # Stage 2: Fit parameters and compute loss (with CV) fit_results = regressor$fit( data=train_data ) #> Fitting parameters for 8 equations... #> Parameter fitting complete. print("\n=== Fit results ===") #> [1] "\n=== Fit results ===" print(fit_results) #> Equation Complexity Loss #> 0 u1 1 7.930425e+01 #> 1 u1⋅x1 2 1.588952e+01 #> 2 u1⋅x1^2 3 4.898860e-01 #> 3 u1⋅x1^2 + -1⋅u2 4 2.643394e-01 #> 4 u1⋅x1^2 + u2⋅x2 5 7.573504e-09 #> 5 u1⋅x1^2 + u2⋅x2 + -1⋅u3 6 7.556009e-09 #> 6 u1⋅x1^2 + u2⋅x2 + -1⋅u3⋅x1^-1 8 7.284390e-09 #> 7 u1⋅x1^2 + u2⋅x2 + -1⋅u3⋅x1^2^-1 9 7.245014e-09 # Inspect the last equation in the results last_id <- rownames(fit_results)[nrow(fit_results)] print("=== Last equation ===") #> [1] "=== Last equation ===" regressor$show_equation(last_id) #> Details for Equation ID: 7 #> ---------------------------------------- #> Equation: u1⋅x1^2 + u2⋅x2 + -1⋅u3⋅x1^2^-1 #> ---------------------------------------- #> Full Expression per Group: #> #> Group 'A': #> 2 9.63124936535987e-9 #> 0.999995292847239⋅x₁ + 1.67023510898235e-5⋅x₂ - ─────────────────── #> 2 #> x₁ #> #> Group 'B': #> 2 2.75633799071188e-6 #> 1.99999904083213⋅x₁ - 2.99998954617359⋅x₂ - ─────────────────── #> 2 #> x₁ ``` ``` r # Stage 3: Evaluate additional metrics eval_table = regressor$evaluate( metrics = c('RMSE', 'AIC', 'Elbow') ) print("\n=== Additional metrics ===") #> [1] "\n=== Additional metrics ===" print(eval_table) #> Equation Complexity Loss AIC #> 0 u1 1 7.930425e+01 578.8935 #> 1 u1⋅x1 2 1.588952e+01 450.2830 #> 2 u1⋅x1^2 3 4.898860e-01 171.9436 #> 3 u1⋅x1^2 + -1⋅u2 4 2.643394e-01 124.5885 #> 4 u1⋅x1^2 + u2⋅x2 5 7.573504e-09 -206.0486 #> 5 u1⋅x1^2 + u2⋅x2 + -1⋅u3 6 7.556009e-09 -204.0486 #> 6 u1⋅x1^2 + u2⋅x2 + -1⋅u3⋅x1^-1 8 7.284390e-09 -204.0486 #> 7 u1⋅x1^2 + u2⋅x2 + -1⋅u3⋅x1^2^-1 9 7.245014e-09 -204.0486 #> AIC_w RMSE Elbow #> 0 1.694368e-171 8.905294e+00 0.01615145 #> 1 1.433616e-143 3.986166e+00 0.07100753 #> 2 3.954274e-83 6.999186e-01 0.84262426 #> 3 7.587431e-73 5.141395e-01 0.83529735 #> 4 4.753667e-01 8.702588e-05 0.16452063 #> 5 1.748776e-01 8.692530e-05 0.08365697 #> 6 1.748778e-01 8.534864e-05 0.00000000 #> 7 1.748778e-01 8.511765e-05 0.00000000 ```