## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(SmoothPLS) #library(ggplot2) ## ----------------------------------------------------------------------------- nind = 100 # number of individuals (train set) start = 0 # First time end = 100 # end time lambda_0 = 0.2 # Exponential law parameter for state 0 lambda_1 = 0.1 # Exponential law parameter for state 1 prob_start = 0.5 # Probability of starting with state 1 curve_type = 'cat' TTRatio = 0.2 # Train Test Ratio means we have floor(nind*TTRatio/(1-TTRatio)) # individuals in the test set NotS_ratio = 0.2 # noise variance over total variance for Y beta_0_real=5.4321 # Intercept value for the link between X(t) and Y nbasis = 10 # number of basis functions norder = 4 # 4 for cubic splines basis beta_real_func = beta_1_real_func # link between X(t) and Y regul_time = seq(start, end, 5) # regularisation time sequence regul_time_0 = seq(start, end, 1) int_mode = 1 # in case of integration errors. ## ----------------------------------------------------------------------------- plot(regul_time, beta_real_func(regul_time)) lines(regul_time, beta_real_func(regul_time)) ## ----df----------------------------------------------------------------------- df = generate_X_df(nind = nind, start = start, end = end, curve_type = 'cat', lambda_0 = lambda_0, lambda_1 = lambda_1, prob_start = prob_start) head(df) ## ----df_test------------------------------------------------------------------ nind_test = number_of_test_id(TTRatio = TTRatio, nind = nind) df_test = generate_X_df(nind = nind_test, start = start, end = end, curve_type = 'cat', lambda_0 = lambda_0, lambda_1 = lambda_1, prob_start = prob_start) ## ----------------------------------------------------------------------------- names(df) print(paste0("We have ", length(unique(df_test$id)), " id on test set.")) ## ----plot df individuals------------------------------------------------------ plot_CFD_individuals(df) ## ----------------------------------------------------------------------------- plot_CFD_individuals(df,by_cfda = TRUE) ## ----Y_df & Y_df_test--------------------------------------------------------- Y_df = generate_Y_df(df, curve_type = 'cat', beta_real_func_or_list = beta_real_func, beta_0_real, NotS_ratio, id_col = 'id', time_col = 'time') Y_df_test = generate_Y_df(df_test, curve_type = 'cat', beta_real_func_or_list = beta_real_func, beta_0_real, NotS_ratio, id_col = 'id', time_col = 'time') Y = Y_df$Y_noised names(Y_df) ## ----hist Y noised------------------------------------------------------------ oldpar <- par(mfrow=c(1, 2)) hist(Y_df$Y_real) hist(Y_df$Y_noised) par(oldpar) ## ----------------------------------------------------------------------------- length(regul_time) ## ----------------------------------------------------------------------------- df_regul = regularize_time_series(df, time_seq = regul_time, curve_type = curve_type) df_wide = convert_to_wide_format(df_regul) df_test_regul = regularize_time_series(df_test, time_seq = regul_time, curve_type = curve_type) df_test_wide = convert_to_wide_format(df_test_regul) ## ----basis creation----------------------------------------------------------- basis = create_bspline_basis(start, end, nbasis, norder) # ORTHOGONAL #basis = fda::create.fourier.basis(rangeval=c(start, end), nbasis=(nbasis)) plot(basis, main=paste0(nbasis, " Function basis")) # note cubic or Fourier ## ----------------------------------------------------------------------------- naive_pls_obj = naivePLS(df_list = df, Y = Y, curve_type_obj = 'cat', regul_time_obj = regul_time, id_col_obj = 'id', time_col_obj = 'time', print_steps = TRUE, plot_rmsep = TRUE, print_nbComp = TRUE, plot_reg_curves = TRUE) names(naive_pls_obj) ## ----------------------------------------------------------------------------- plot(x = regul_time, y = naive_pls_obj$opti_reg_coef, xlab='regul_time', ylab='beta coefficient value', main="naive pls regression coefficients") lines(x = regul_time, y = naive_pls_obj$opti_reg_coef, col='blue') lines(x=start:end, y=beta_real_func(start:end, end), col='red') ## ----------------------------------------------------------------------------- Y_hat = predict(naive_pls_obj$plsr_model, ncomp = naive_pls_obj$nbCP_opti, newdata = as.matrix(df_wide[, -c(1)])) cat("R^2 \n") r_squared_values(Y, Y_hat) r_squared_values(Y_df$Y_real, Y_hat) cat("Press \n") press_values(Y, Y_hat) cat("RMSEP \n") sqrt(press_values(Y, Y_hat)/nind) ## ----------------------------------------------------------------------------- Y_hat_test = predict(naive_pls_obj$plsr_model, ncomp = naive_pls_obj$nbCP_opti, newdata = as.matrix(df_test_wide[, -c(1)])) cat("R^2 \n") print(paste0("There is ", 100*NotS_ratio, "% of noised in Y")) cat("R^2 on Y_df_test$Y_noised \n") r_squared_values(Y_df_test$Y_noised, Y_hat_test) cat("R^2 on Y_df_test$Y_real \n") r_squared_values(Y_df_test$Y_real, Y_hat_test) cat("Press \n") press_values(Y_df_test$Y_real, Y_hat_test) cat("RMSEP \n") sqrt(press_values(Y_df_test$Y_real, Y_hat_test)/length(Y_hat_test)) ## ----------------------------------------------------------------------------- fpls_obj = funcPLS(df_list = list(df), Y = Y, basis_obj = basis, regul_time_obj = regul_time, curve_type_obj = 'cat', plot_rmsep = TRUE, plot_reg_curves = TRUE, print_steps =TRUE) ## ----------------------------------------------------------------------------- y_lim = eval_max_min_y(f_list = list(beta_real_func, fpls_obj$reg_obj$CatFD), regul_time = regul_time_0) plot(regul_time_0, beta_real_func(regul_time_0), type='l', xlab="Beta_t", ylim = c(-2, 3.5)) plot(fpls_obj$reg_obj$CatFD, add=TRUE, col='red') legend("topleft", legend = c("delta_FPLS"), col = c("red"), lty = 1, lwd = 1) ## ----------------------------------------------------------------------------- Y_hat_fpls = (predict(fpls_obj$plsr_model, ncomp = fpls_obj$nbCP_opti, newdata = fpls_obj$trans_alphas) + fpls_obj$reg_obj$Intercept + mean(Y)) sqrt(press_values(Y_df$Y_real, Y_hat_fpls)/length(Y_hat_fpls)) sqrt(press_values(Y_df$Y_noised, Y_hat_fpls)/length(Y_hat_fpls)) cat("NotS_ratio \n") print(paste0("There is ", 100*NotS_ratio, "% of noise in Y")) cat("R^2 on Y_df_test$Y_noised \n") r_squared_values(Y_df$Y_noised, Y_hat_fpls) cat("R^2 on Y_df_test$Y_real \n") r_squared_values(Y_df$Y_real, Y_hat_fpls) ## ----------------------------------------------------------------------------- Y_hat_fpls_test = smoothPLS_predict(df_predict = df_test, delta_list = fpls_obj$reg_obj, curve_type = curve_type, regul_time_obj = regul_time) sqrt(press_values(Y_df_test$Y_real, Y_hat_fpls_test)/length(Y_hat_fpls_test)) sqrt(press_values(Y_df_test$Y_noised, Y_hat_fpls_test)/length(Y_hat_fpls_test)) cat("R^2 \n") print(paste0("There is ", 100*NotS_ratio, "% of noise in Y")) cat("R^2 on Y_df_test$Y_noised \n") r_squared_values(Y_df_test$Y_noised, Y_hat_fpls_test) cat("R^2 on Y_df_test$Y_real \n") r_squared_values(Y_df_test$Y_real, Y_hat_fpls_test) ## ----------------------------------------------------------------------------- spls_obj = smoothPLS(df_list = df, Y = Y, basis_obj = basis, curve_type_obj = 'cat', orth_obj = TRUE, int_mode = 1, print_steps = TRUE, print_nbComp = TRUE, plot_rmsep = TRUE, plot_reg_curves = TRUE, id_col_obj = 'id',time_col_obj = 'time', jackknife = TRUE, validation = 'LOO', parallel=FALSE) ## ----------------------------------------------------------------------------- delta_0 = spls_obj$reg_obj$Intercept delta_0 delta = spls_obj$reg_obj$CatFD_1_state_1 plot(delta) ## ----------------------------------------------------------------------------- y_lim = eval_max_min_y(f_list = list(beta_real_func, delta), regul_time = regul_time_0) plot(regul_time_0, beta_real_func(regul_time_0), type='l', xlab="Beta_t", ylim = c(-2, 3.5)) plot(delta, add=TRUE, col='blue') legend("topleft", legend = c("delta_SmoothPLS"), col = c("blue"), lty = 1, lwd = 1) ## ----------------------------------------------------------------------------- #delta_0_spls Y_hat_spls = smoothPLS_predict(df_predict = df, delta_list = list(delta_0, delta), curve_type = 'cat', regul_time_obj = regul_time, int_mode = int_mode) sqrt(press_values(Y_df$Y_real, Y_hat_spls)/length(Y_hat_spls)) sqrt(press_values(Y_df$Y_noised, Y_hat_spls)/length(Y_hat_spls)) cat("R^2 \n") print(paste0("There is ", 100*NotS_ratio, "% of noise in Y")) cat("R^2 on Y_df_test$Y_noised \n") r_squared_values(Y_df$Y_noised, Y_hat_spls) cat("R^2 on Y_df_test$Y_real \n") r_squared_values(Y_df$Y_real, Y_hat_spls) ## ----------------------------------------------------------------------------- Y_hat_spls_test = smoothPLS_predict(df_predict = df_test, delta_list = list(delta_0, delta), curve_type = curve_type, regul_time_obj = regul_time, int_mode = int_mode) sqrt(press_values(Y_df_test$Y_real, Y_hat_spls)/length(Y_hat_spls_test)) sqrt(press_values(Y_df_test$Y_noised, Y_hat_spls)/length(Y_hat_spls_test)) cat("R^2 \n") print(paste0("There is ", 100*NotS_ratio, "% of noise in Y")) cat("R^2 on Y_df_test$Y_noised \n") r_squared_values(Y_df_test$Y_noised, Y_hat_spls_test) cat("R^2 on Y_df_test$Y_real \n") r_squared_values(Y_df_test$Y_real, Y_hat_spls_test) ## ----------------------------------------------------------------------------- cat("curve_1 : smooth PLS regression curve.\n") cat("curve_2 : functional PLS regression curve.\n") cat("curve_3 : naive PLS regression coefficients\n") evaluate_curves_distances(real_f = beta_real_func, regul_time = regul_time, fun_fd_list = list( delta, fpls_obj$reg_obj$CatFD, approxfun(x = regul_time, y = naive_pls_obj$opti_reg_coef) ) ) ## ----------------------------------------------------------------------------- y_lim = eval_max_min_y(f_list = list(spls_obj$reg_obj$CatFD_1_state_1, fpls_obj$reg_obj$CatFD_1_state_1, approxfun(x = regul_time, y = naive_pls_obj$opti_reg_coef), beta_real_func ), regul_time = regul_time_0) plot(regul_time_0, beta_real_func(regul_time_0), col = 'black', ylim = y_lim, xlab = 'Time', ylab = 'Value', type = 'l') lines(regul_time_0, approxfun(x = regul_time, y = naive_pls_obj$opti_reg_coef)(regul_time_0), col = 'green') title(paste0(names(spls_obj$reg_obj)[2], " regression curves")) plot(spls_obj$reg_obj$CatFD_1_state_1, col = 'blue', add = TRUE) plot(fpls_obj$reg_obj$CatFD_1_state_1, col = 'red', add = TRUE) legend("topleft", legend = c("Real curve", "NaivePLS coef", "SmoothPLS reg curve", "FunctionalPLS reg curve"), col = c("black", "green", "blue", "red"), lty = 1, lwd = 1) ## ----------------------------------------------------------------------------- y_lim = eval_max_min_y(f_list = list(spls_obj$reg_obj$CatFD_1_state_1, fpls_obj$reg_obj$CatFD_1_state_1, beta_real_func ), regul_time = regul_time_0) plot(regul_time_0, beta_real_func(regul_time_0), type='l', ylab="Values", xlab = 'Time', ylim = y_lim, col = 'black') title(paste0(names(spls_obj$reg_obj)[2], " regression curves")) plot(spls_obj$reg_obj$CatFD_1_state_1, add=TRUE, col='blue') plot(fpls_obj$reg_obj$CatFD_1_state_1, add=TRUE, col='red') legend("topleft", legend = c("Real curve", "SmoothPLS", "FuncPLS"), col = c("black", "blue", "red"), lty = 1, lwd = 1) ## ----------------------------------------------------------------------------- plot(fda::deriv.fd(expr = delta, Lfdobj = 2), col='blue') plot(fda::deriv.fd(expr = fpls_obj$reg_obj$CatFD, Lfdobj = 2), add=TRUE, col='red') title("Second derivatives") legend("bottomleft", legend = c("delta_SmoothPLS''", "delta_FPLS''"), col = c("blue", "red"), lty = 1, lwd = 1) ## ----------------------------------------------------------------------------- train_results = data.frame(matrix(ncol = 5, nrow = 3)) colnames(train_results) = c("PRESS", "RMSE", "MAE", "R2", "var_Y") rownames(train_results) = c("NaivePLS", "FPLS", "SmoothPLS") test_results = train_results ## ----------------------------------------------------------------------------- print(paste0("There is ", 100*NotS_ratio, "% of noised in Y")) ## ----------------------------------------------------------------------------- Y_train = Y_df$Y_noised # Naive Y_hat = predict(naive_pls_obj$plsr_model, ncomp = naive_pls_obj$nbCP_opti, newdata = as.matrix(df_wide[, -c(1)])) train_results["NaivePLS", ] = evaluate_results(Y_train, Y_hat) # FPLS Y_hat_fpls = (predict(fpls_obj$plsr_model, ncomp = fpls_obj$nbCP_opti, newdata = fpls_obj$trans_alphas) + fpls_obj$reg_obj$Intercept + mean(Y)) Y_hat_fpls = smoothPLS_predict(df_predict = df, delta_list = fpls_obj$reg_obj, curve_type = curve_type, regul_time_obj = regul_time, int_mode = int_mode) train_results["FPLS", ] = evaluate_results(Y_train, Y_hat_fpls) # Smooth PLS Y_hat_spls = smoothPLS_predict(df_predict = df, delta_list = list(delta_0, delta), curve_type = 'cat', regul_time_obj = regul_time, int_mode = int_mode) train_results["SmoothPLS", ] = evaluate_results(Y_train, Y_hat_spls) train_results["NaivePLS", "nb_cp"] = naive_pls_obj$nbCP_opti train_results["FPLS", "nb_cp"] = fpls_obj$nbCP_opti train_results["SmoothPLS", "nb_cp"] = spls_obj$nbCP_opti ## ----------------------------------------------------------------------------- train_results ## ----------------------------------------------------------------------------- oldpar <- par(mfrow=c(1,2)) hist(Y_train - Y_hat_fpls) hist(Y_train - Y_hat_spls) par(oldpar) ## ----------------------------------------------------------------------------- Y_test = Y_df_test$Y_noised # Naive Y_hat = naivePLS_predict(naive_pls_obj = naive_pls_obj, df_predict_list = df_test, regul_time_obj = regul_time, curve_type_obj = 'cat') test_results["NaivePLS", ] = evaluate_results(Y_test, Y_hat) # FPLS Y_hat_fpls = smoothPLS_predict(df_predict = df_test, delta_list = fpls_obj$reg_obj, curve_type = curve_type, regul_time_obj = regul_time, int_mode = int_mode) test_results["FPLS", ] = evaluate_results(Y_test, Y_hat_fpls) # Smooth PLS Y_hat_spls = smoothPLS_predict(df_predict = df_test, delta_list = list(delta_0, delta), curve_type = curve_type, regul_time_obj = regul_time, int_mode = int_mode) test_results["SmoothPLS", ] = evaluate_results(Y_test, Y_hat_spls) test_results["NaivePLS", "nb_cp"] = naive_pls_obj$nbCP_opti test_results["FPLS", "nb_cp"] = fpls_obj$nbCP_opti test_results["SmoothPLS", "nb_cp"] = spls_obj$nbCP_opti ## ----------------------------------------------------------------------------- Y_hat_naivePLS = naivePLS_predict(naive_pls_obj = naive_pls_obj, df_predict_list = df_test, regul_time_obj = regul_time, curve_type_obj = 'cat') ## ----------------------------------------------------------------------------- test_results ## ----------------------------------------------------------------------------- oldpar <- par(mfrow=c(1,2)) hist(Y_test - Y_hat_fpls) hist(Y_test - Y_hat_spls) par(oldpar) ## ----------------------------------------------------------------------------- plot_model_metrics_base(train_results, test_results) ## ----------------------------------------------------------------------------- plot_model_metrics_base(train_results, test_results, models_to_plot = c('FPLS', 'SmoothPLS')) ## ----basis creation Fourier--------------------------------------------------- basis_f = fda::create.fourier.basis(rangeval=c(start, end), nbasis=(nbasis)) plot(basis_f, main=paste0(nbasis, " Function basis")) # note cubic or Fourier ## ----------------------------------------------------------------------------- fpls_obj_2 = funcPLS(df_list = df, Y = Y, basis_obj = basis_f, regul_time_obj = regul_time, curve_type_obj = curve_type, print_steps = TRUE, plot_rmsep = TRUE, print_nbComp = TRUE, plot_reg_curves = TRUE) ## ----------------------------------------------------------------------------- spls_obj_2 = smoothPLS(df_list = df, Y = Y, basis_obj = basis_f, orth_obj = FALSE, curve_type_obj = curve_type, print_steps = TRUE, plot_rmsep = TRUE, print_nbComp = TRUE, plot_reg_curves = TRUE) ## ----------------------------------------------------------------------------- delta_0_2 = spls_obj_2$reg_obj$Intercept delta_2 = spls_obj_2$reg_obj$CatFD_1_state_1 plot(delta_2) ## ----------------------------------------------------------------------------- plot(regul_time_0, beta_real_func(regul_time_0), type='l', xlab="Beta_t", ylim = c(-2, 3.5)) plot(delta_2, add=TRUE, col='blue') plot(fpls_obj_2$reg_obj$CatFD, add=TRUE, col='red') legend("topleft", legend = c("delta_SmoothPLS", "delta_FPLS"), col = c("blue", "red"), lty = 1, lwd = 1) ## ----------------------------------------------------------------------------- evaluate_curves_distances(real_f = beta_real_func, regul_time = regul_time, fun_fd_list = list(delta_2, fpls_obj_2$reg_obj$CatFD)) ## ----------------------------------------------------------------------------- cat("curve_1 : smooth PLS regression curve.\n") cat("curve_2 : functional PLS regression curve.\n") cat("curve_3 : naive PLS regression coefficients\n") evaluate_curves_distances(real_f = beta_real_func, regul_time = regul_time, fun_fd_list = list( spls_obj_2$reg_obj$CatFD_1_state_1, fpls_obj_2$reg_obj$CatFD_1_state_1, approxfun(x = regul_time, y = naive_pls_obj$opti_reg_coef) ) ) ## ----------------------------------------------------------------------------- y_lim = eval_max_min_y(f_list = list(spls_obj_2$reg_obj$CatFD_1_state_1, fpls_obj_2$reg_obj$CatFD_1_state_1, approxfun(x = regul_time, y = naive_pls_obj$opti_reg_coef), beta_real_func ), regul_time = regul_time_0) plot(regul_time_0, beta_real_func(regul_time_0), col = 'black', ylim = y_lim, xlab = 'Time', ylab = 'Value', type = 'l') lines(regul_time_0, approxfun(x = regul_time, y = naive_pls_obj$opti_reg_coef)(regul_time_0), col = 'green') title(paste0(names(spls_obj_2$reg_obj)[2], " regression curves")) plot(spls_obj_2$reg_obj$CatFD_1_state_1, col = 'blue', add = TRUE) plot(fpls_obj_2$reg_obj$CatFD_1_state_1, col = 'red', add = TRUE) legend("topleft", legend = c("Real curve", "NaivePLS coef", "SmoothPLS reg curve", "FunctionalPLS reg curve"), col = c("black", "green", "blue", "red"), lty = 1, lwd = 1) ## ----------------------------------------------------------------------------- y_lim = eval_max_min_y(f_list = list(spls_obj_2$reg_obj$CatFD_1_state_1, fpls_obj_2$reg_obj$CatFD_1_state_1, beta_real_func), regul_time = regul_time_0) plot(regul_time_0, beta_real_func(regul_time_0), type='l', ylab="Values", xlab = 'Time', ylim = y_lim, col = 'black') title(paste0(names(spls_obj_2$reg_obj)[2], " regression curves")) plot(spls_obj_2$reg_obj$CatFD_1_state_1, add=TRUE, col='blue') plot(fpls_obj_2$reg_obj$CatFD_1_state_1, add=TRUE, col='red') legend("topleft", legend = c("Real curve", "SmoothPLS", "FuncPLS"), col = c("black", "blue", "red"), lty = 1, lwd = 1) ## ----------------------------------------------------------------------------- train_results_f = data.frame(matrix(ncol = 5, nrow = 3)) colnames(train_results_f) = c("PRESS", "RMSE", "MAE", "R2", "var_Y") rownames(train_results_f) = c("NaivePLS", "FPLS", "SmoothPLS") test_results_f = train_results_f ## ----------------------------------------------------------------------------- print(paste0("There is ", 100*NotS_ratio, "% of noised in Y")) ## ----------------------------------------------------------------------------- Y_train = Y_df$Y_noised # Naive Y_hat = predict(naive_pls_obj$plsr_model, ncomp = naive_pls_obj$nbCP_opti, newdata = as.matrix(df_wide[, -c(1)])) train_results_f["NaivePLS", ] = evaluate_results(Y_train, Y_hat) # FPLS Y_hat_fpls = (predict(fpls_obj_2$plsr_model, ncomp = fpls_obj_2$nbCP_opti, newdata = fpls_obj_2$trans_alphas) + fpls_obj_2$reg_obj$Intercept + mean(Y)) train_results_f["FPLS", ] = evaluate_results(Y_train, Y_hat_fpls) # Smooth PLS Y_hat_spls = smoothPLS_predict(df_predict_list = df, delta_list = list(delta_0_2, delta_2), curve_type = 'cat', int_mode = int_mode) train_results_f["SmoothPLS", ] = evaluate_results(Y_train, Y_hat_spls) train_results_f["NaivePLS", "nb_cp"] = naive_pls_obj$nbCP_opti train_results_f["FPLS", "nb_cp"] = fpls_obj$nbCP_opti train_results_f["SmoothPLS", "nb_cp"] = spls_obj$nbCP_opti ## ----------------------------------------------------------------------------- train_results_f ## ----------------------------------------------------------------------------- Y_test = Y_df_test$Y_noised # Naive Y_hat = predict(naive_pls_obj$plsr_model, ncomp = naive_pls_obj$nbCP_opti, newdata = as.matrix(df_test_wide[, -c(1)])) test_results_f["NaivePLS", ] = evaluate_results(Y_test, Y_hat) # FPLS Y_hat_fpls = smoothPLS_predict(df_predict = df_test, delta_list = fpls_obj_2$reg_obj, curve_type = curve_type) test_results_f["FPLS", ] = evaluate_results(Y_test, Y_hat_fpls) # Smooth PLS Y_hat_spls_f = smoothPLS_predict(df_predict = df_test, delta_list = list(delta_0_2, delta_2), curve_type = curve_type, int_mode = int_mode) test_results_f["SmoothPLS", ] = evaluate_results(Y_test, Y_hat_spls_f) test_results_f["NaivePLS", "nb_cp"] = naive_pls_obj$nbCP_opti test_results_f["FPLS", "nb_cp"] = fpls_obj$nbCP_opti test_results_f["SmoothPLS", "nb_cp"] = spls_obj$nbCP_opti ## ----------------------------------------------------------------------------- test_results_f ## ----------------------------------------------------------------------------- plot_model_metrics_base(train_results_f, test_results_f) ## ----------------------------------------------------------------------------- plot_model_metrics_base(train_results_f, test_results_f, models_to_plot = c('FPLS', 'SmoothPLS'))