## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----message=FALSE, warning=FALSE, fig.width=7, fig.height=6, fig.align='center'----
# Load the package
library(AddiVortes)

# --- Generate Training Data ---
set.seed(42) # for reproducibility

# Create a 5-column matrix of predictors
X <- matrix(runif(2500), ncol = 5)
X[,1] <- -10 - X[,1] * 10
X[,2] <- X[,2] * 100
X[,3] <- -9 + X[,3] * 10
X[,4] <- 8 + X[,4]
X[,5] <- X[,5] * 10

# Create the response 'Y' based on a rule and add noise
Y_underlying <- ifelse(-1 * X[,2] > 10 * X[,1] + 100, 10, 0)
Y <- Y_underlying + rnorm(length(Y_underlying))

# Visualise the relationships in the data
# The colours show the two underlying groups
pairs(X, 
      col = ifelse(Y_underlying == 10, "red", "blue"),
      pch = 19, cex = 0.5,
      main = "Structure of Predictor Variables")

## ----results='hide'-----------------------------------------------------------
# Fit the model
AModel <- AddiVortes(Y, X, m = 50, showProgress = FALSE)


## -----------------------------------------------------------------------------
# We can check the in-sample Root Mean Squared Error
cat("In-sample RMSE:", AModel$inSampleRmse, "\n")

## -----------------------------------------------------------------------------
# --- Generate Test Data ---
set.seed(101) # Use a different seed for the test set
testX <- matrix(runif(1000), ncol = 5)
testX[,1] <- -10 - testX[,1] * 10
testX[,2] <- testX[,2] * 100
testX[,3] <- -9 + testX[,3] * 10
testX[,4] <- 8 + testX[,4]
testX[,5] <- testX[,5] * 10

# Create the true test response values
testY_underlying <- ifelse(-1 * testX[,2] > 10 * testX[,1] + 100, 10, 0)
testY <- testY_underlying + rnorm(length(testY_underlying))

# --- Make Predictions ---
# Predict the mean response
preds <- predict(AModel, testX,
                 showProgress = FALSE)

# Predict the 90% credible interval (from 0.05 to 0.95 quantiles)
# By default, this uses interval = "credible" which only accounts for
# uncertainty in the mean function
preds_q <- predict(AModel, testX,
                   "quantile", c(0.05, 0.95), showProgress = FALSE)

# For prediction intervals that also include the model's error variance
# (similar to lm's prediction intervals), use interval = "prediction"
# preds_q_pred <- predict(AModel, testX,
#                         "quantile", c(0.05, 0.95), 
#                         interval = "prediction",
#                         showProgress = FALSE)

## ----fig.width=7, fig.height=6, fig.align='center'----------------------------
# Plot observed vs. predicted values
plot(testY,
     preds,
     xlab = "Observed Values",
     ylab = "Predicted Mean Values",
     main = "Out-of-Sample Prediction Performance",
     xlim = range(c(testY, preds_q)),
     ylim = range(c(testY, preds_q)),
     pch = 19, col = "darkblue"
)

# Add error lines for the 90% credible interval
for (i in 1:nrow(preds_q)) {
  segments(testY[i], preds_q[i, 1],
           testY[i], preds_q[i, 2],
           col = rgb(0, 0, 0.5, 0.5), lwd = 1.5
  )
}

# Add lines showing the true underlying means
lines(c(min(testY)-0.2, 3), c(0, 0), col = "pink", lwd = 3, lty = 2)
lines(c(7, max(testY)+0.2), c(10, 10), col = "pink", lwd = 3, lty = 2)

# Add a legend
legend("bottomright",
       legend = c("Predicted Mean & 90% Interval",
                  "True Underlying Mean"),
       col = c("darkblue", "pink"),
       pch = c(19, NA),
       lty = c(1, 2),
       lwd = c(1.5, 3),
       bty = "n"
)

## ----fig.width=7, fig.height=6, fig.align='center'----------------------------
# Use a subset of test data for clearer visualization
subset_indices <- 1:20
testX_subset <- testX[subset_indices, ]
testY_subset <- testY[subset_indices]

# Get credible intervals (uncertainty in mean only)
cred_intervals <- predict(AModel, testX_subset,
                         type = "quantile",
                         interval = "credible",
                         quantiles = c(0.025, 0.975),
                         showProgress = FALSE)

# Get prediction intervals (includes error variance)
pred_intervals <- predict(AModel, testX_subset,
                         type = "quantile",
                         interval = "prediction",
                         quantiles = c(0.025, 0.975),
                         showProgress = FALSE)

# Get mean predictions
mean_preds <- predict(AModel, testX_subset,
                     type = "response",
                     showProgress = FALSE)

# Create comparison plot
plot(1:length(testY_subset), testY_subset,
     xlab = "Observation Index",
     ylab = "Response Value",
     main = "Credible Intervals vs. Prediction Intervals",
     pch = 19, col = "black", cex = 1.2, ylim = c(-5,20))

# Add mean predictions
points(1:length(testY_subset), mean_preds, pch = 4, col = "blue", cex = 1)

# Add credible intervals (narrower, in blue)
for (i in 1:length(testY_subset)) {
  segments(i, cred_intervals[i, 1],
           i, cred_intervals[i, 2],
           col = "blue", lwd = 2)
}

# Add prediction intervals (wider, in red)
for (i in 1:length(testY_subset)) {
  segments(i - 0.1, pred_intervals[i, 1],
           i - 0.1, pred_intervals[i, 2],
           col = "red", lwd = 2, lty = 1)
}

# Add legend
legend("topright",
       legend = c("Observed Values", "Mean Prediction",
                  "95% Credible Interval", "95% Prediction Interval"),
       col = c("black", "blue", "blue", "red"),
       pch = c(19, 4, NA, NA),
       lty = c(NA, NA, 1, 1),
       lwd = c(NA, NA, 2, 2),
       bty = "n")

## -----------------------------------------------------------------------------
# Calculate average interval widths
cred_width <- mean(cred_intervals[, 2] - cred_intervals[, 1])
pred_width <- mean(pred_intervals[, 2] - pred_intervals[, 1])

cat("Average 95% credible interval width:", round(cred_width, 2), "\n")
cat("Average 95% prediction interval width:", round(pred_width, 2), "\n")
cat("Ratio (prediction/credible):", round(pred_width / cred_width, 2), "\n")