## ---- include = FALSE---------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ------------------------------------------------------------------------ library(ssr) dataset <- friedman1 # Load friedman1 dataset. head(dataset) set.seed(1234) # Split the dataset into 70% for training and 30% for testing. split1 <- split_train_test(dataset, pctTrain = 70) # Choose 5% of the train set as the labeled set L and the remaining will be the unlabeled set U. split2 <- split_train_test(split1$trainset, pctTrain = 5) L <- split2$trainset # This is the labeled dataset. U <- split2$testset[, -11] # Remove the labels since this is the unlabeled dataset. testset <- split1$testset # This is the test set. ## ----message=FALSE, cache=F---------------------------------------------- # Define list of regressors. regressors <- list(linearRegression=lm, knn=caret::knnreg, svm=e1071::svm) # Fit the model. model <- ssr("Ytrue ~ .", L, U, regressors = regressors, testdata = testset) ## ----message=FALSE, eval = F--------------------------------------------- # regressors <- list("lm", "rvmLinear") ## ----message=FALSE, eval = F--------------------------------------------- # regressors <- list("lm", knn=caret::knnreg) ## ----fig.height=5, fig.width=7------------------------------------------- # Plot RMSE. plot(model) # Get the predictions on the testset. predictions <- predict(model, testset) # Calculate RMSE on the test set. rmse.result <- sqrt(mean((predictions - testset$Ytrue)^2)) rmse.result ## ----fig.height=5, fig.width=7------------------------------------------- plot(model, metric = "mae", ptype = 2) ## ----fig.height=5, fig.width=7, message=FALSE, warning=FALSE, cache=F, eval = FALSE---- # # # Prepare data. # dataset <- friedman1 # set.seed(1234) # split1 <- split_train_test(dataset, pctTrain = 70) # split2 <- split_train_test(split1$trainset, pctTrain = 5) # L <- split2$trainset # U <- split2$testset[, -11] # testset <- split1$testset # # # Define list of regressors. # regressors <- list(linearRegression=lm, knn=caret::knnreg) # # # Specify their parameters. k = 7 for knnreg in this case. # regressors.params <- list(NULL, list(k=7)) # # model2 <- ssr("Ytrue ~ .", L, U, # regressors = regressors, # regressors.params = regressors.params, # testdata = testset) # # plot(model2) # ## ----fig.height=5, fig.width=7, message=FALSE, warning=FALSE, cache=F---- # Define a custom function. myCustomModel <- function(theformula, data, myparam1){ # This is just a wrapper around knnreg but can be anything. # Our custom function also accepts one parameter myparam1. # Now we train a knnreg and pass our custom parameter. m <- caret::knnreg(theformula, data, k = myparam1) return(m) } # Prepare the data dataset <- friedman1 set.seed(1234) split1 <- split_train_test(dataset, pctTrain = 70) split2 <- split_train_test(split1$trainset, pctTrain = 5) L <- split2$trainset U <- split2$testset[, -11] testset <- split1$testset # Specify our custom function as regressor. regressors <- list(customModel = myCustomModel) # Specify the list of parameters. regressors.params <- list(list(myparam1=7)) # Fit the model. model3 <- ssr("Ytrue ~ .", L, U, regressors = regressors, regressors.params = regressors.params, testdata = testset) ## ----fig.height=5, fig.width=7, message=FALSE, warning=TRUE, cache=F----- # Prepare the data dataset <- friedman1 set.seed(1234) split1 <- split_train_test(dataset, pctTrain = 70) split2 <- split_train_test(split1$trainset, pctTrain = 5) L <- split2$trainset U <- split2$testset[, -11] testset <- split1$testset # Get the true labels for the unlabeled set. U.y <- split2$testset[, 11] # Define list of regressors. regressors <- list(linearRegression=lm, knn=caret::knnreg, svm=e1071::svm) # Fit the model. model4 <- ssr("Ytrue ~ .", L, U, regressors = regressors, testdata = testset, U.y = U.y) plot(model4) # Get the predictions on the testset. predictions <- predict(model4, testset) # Calculate RMSE on the test set. sqrt(mean((predictions - testset$Ytrue)^2)) ## ---- eval = F----------------------------------------------------------- # Enrique Garcia-Ceja (2019). ssr: Semi-Supervised Regression Methods. # R package https://CRAN.R-project.org/package=ssr ## ---- eval = F----------------------------------------------------------- # @Manual{enriqueSSR, # title = {ssr: Semi-Supervised Regression Methods}, # author = {Enrique Garcia-Ceja}, # year = {2019}, # note = {R package}, # url = {https://CRAN.R-project.org/package=ssr}, # }