MLmorph integrates morphological modeling with machine learning (random forests) to support structured decision-making. The package enumerates a morphospace—the set of feasible configurations across selected variables—and estimates class probabilities over that space. It includes:
# install.packages("devtools")
::install_github("theogrost/MLmorph") devtools
library(MLmorph)
set.seed(1)
<- 120
n <- factor(sample(letters[1:3], n, TRUE))
y <- factorize_numeric_vector(runif(n, 10, 20), breaks_no = 3)
x1 <- factorize_numeric_vector(runif(n, 1, 2), breaks_no = 4)
x2 <- data.frame(y, x1, x2)
df
# 1) Train a random forest and validate on a holdout
<- create_rf_model(df, dependent = "y", ntree = 100, train_validate_split = 0.75)
fit $model # randomForest object
fithead(fit$variables_importance) # importance per predictor
$model_performance_on_test # caret::confusionMatrix fit
<- create_morphospace(df, fit$model)
ms
str(ms$morphospace)
# Columns: predictors (x1, x2), '<dependent>' (class label), 'calculated' (probability),
# 'purely_simulated' (TRUE if configuration not observed in df)
# Example: top 10 configurations by predicted probability
head(ms$morphospace[order(ms$morphospace$calculated, decreasing = TRUE), ], 10)
# Probability mass per class (sums to 1 for each unique predictor combination)
aggregate(calculated ~ x1 + x2, data = ms$morphospace, sum)
# Numeric → ordered factor via equal-frequency bins (default)
<- runif(12)
z <- factorize_numeric_vector(z, breaks_no = 3)
fz
# Logical → factor with labeled levels
<- factorize_binary_vector(c(TRUE, FALSE, TRUE))
fb
# Character → factor with stable, numbered labels
<- factorize_character_vector(c("alpha", "beta", "alpha"))
fc
# Apply heuristics column-wise
<- factorize_nicely_dataframe(data.frame(
df2 a = runif(20), b = c(TRUE, FALSE), c = c("x","y","x","z")
))str(df2)
# CSV
<- tempfile(fileext = ".csv")
tmp_csv ::write.csv(data.frame(a = 1:3, b = c("x","y","z")), tmp_csv, row.names = FALSE)
utils<- load_data(tmp_csv)
dat_csv
# JSON
<- tempfile(fileext = ".json")
tmp_json ::write_json(list(a = 1:2, b = c("u","v")), tmp_json, auto_unbox = TRUE)
jsonlite<- load_data(tmp_json)
dat_json
# XLSX
<- tempfile(fileext = ".xlsx")
tmp_xlsx ::write.xlsx(data.frame(a = 1:2, b = c("m","n")), tmp_xlsx)
openxlsx<- load_data(tmp_xlsx) dat_xlsx
# Launch the interactive app from the installed package
# (opens in browser; not run during automated checks)
# MLmorph()
To ensure reproducibility, record: - the variable set and
factorization (binning method and parameters), - model hyperparameters
(e.g., ntree
), - the random seed and train/validate
split.
If you use MLmorph in research or consulting reports, please cite the package and your model specification.
MIT © Oskar Kosch. See LICENSE.md
for details.
Please report bugs or feature requests at this repository’s Issues page.