---
title: "Temporal and Streaming Analysis"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Temporal and Streaming Analysis}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5,
  fig.alt = "Temporal plot showing Bayesian surprise or model posterior values over time."
)
```

```{r setup}
library(bayesiansurpriser)
library(ggplot2)
```

## Overview

The `bayesiansurpriser` package supports temporal analysis and streaming data through specialized functions that track how surprise evolves over time.

## Temporal Analysis with surprise_temporal()

For panel data with observations across multiple time periods:

```{r temporal-basic}
# Create example temporal data
set.seed(42)
temporal_data <- data.frame(
  year = rep(2015:2024, each = 5),
  region = rep(c("North", "South", "East", "West", "Central"), times = 10),
  cases = rpois(50, lambda = rep(c(50, 80, 60, 70, 90), times = 10)),
  population = rep(c(10000, 20000, 15000, 18000, 25000), times = 10)
)

head(temporal_data)
```

```{r temporal-compute}
# Compute temporal surprise
# Note: Use unquoted column names (NSE)
result <- surprise_temporal(
  temporal_data,
  time_col = year,
  observed = cases,
  expected = population,
  region_col = region
)

print(result)
```

## Streaming Updates

For real-time data where observations arrive sequentially:

```{r streaming}
# Initial observation
initial_obs <- c(50, 100, 75, 90)
initial_exp <- c(10000, 20000, 15000, 18000)

result <- auto_surprise(initial_obs, initial_exp)
mspace <- get_model_space(result)
cat("Initial prior:", round(mspace$prior, 3), "\n")

# If you want posterior model weights for the initial batch, update explicitly.
updated_mspace <- bayesian_update(mspace, initial_obs)
cat("Initial posterior:", round(updated_mspace$posterior, 3), "\n")

# New observation arrives
new_obs <- c(55, 110, 80, 85)
updated <- update_surprise(result, new_obs, new_expected = initial_exp)
mspace2 <- get_model_space(updated)
cat("Updated posterior:", round(mspace2$posterior, 3), "\n")

# Another observation
newer_obs <- c(60, 105, 70, 95)
updated2 <- update_surprise(updated, newer_obs, new_expected = initial_exp)
mspace3 <- get_model_space(updated2)
cat("Further updated posterior:", round(mspace3$posterior, 3), "\n")
```

## Cumulative Bayesian Updates

Track how the model space posterior evolves as more data accumulates:

```{r cumulative-update}
# Create model space
space <- model_space(
  bs_model_uniform(),
  bs_model_gaussian()
)

# Sequential observations
obs_sequence <- list(
  c(10, 20, 30, 40),
  c(15, 25, 35, 45),
  c(12, 22, 32, 42),
  c(50, 60, 70, 80)  # Anomalous batch
)

# Track posteriors
posteriors <- matrix(nrow = length(obs_sequence), ncol = 2)
current_space <- space

for (i in seq_along(obs_sequence)) {
  current_space <- bayesian_update(current_space, obs_sequence[[i]])
  posteriors[i, ] <- current_space$posterior
}

# Plot evolution
df_post <- data.frame(
  batch = rep(1:4, 2),
  model = rep(c("Uniform", "Gaussian"), each = 4),
  posterior = c(posteriors[, 1], posteriors[, 2])
)

ggplot(df_post, aes(x = batch, y = posterior, color = model)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  labs(
    title = "Model Posterior Evolution",
    x = "Observation Batch",
    y = "Posterior Probability"
  ) +
  theme_minimal()
```

## Rolling Window Analysis

Use rolling windows to detect changes in patterns:

```{r rolling}
# Single time series
observations <- c(50, 52, 48, 55, 120, 115, 125, 60, 58, 62,
                  55, 53, 57, 150, 145, 155, 52, 54, 56, 51)
expected_vals <- rep(500, 20)

result_rolling <- surprise_rolling(
  observations,
  expected = expected_vals,
  window_size = 5
)

# Extract mean surprise for each window
window_surprise <- sapply(result_rolling$windows, function(w) mean(w$result$surprise))

# Plot
plot(seq_along(window_surprise), window_surprise, type = "l",
     xlab = "Window Position", ylab = "Mean Surprise",
     main = "Rolling Window Surprise")
```

## Anomaly Detection Over Time

Use temporal surprise to detect anomalous time periods:

```{r anomaly}
# Simulate data with anomaly at time 15-20
set.seed(123)
n_times <- 30
baseline <- 100
normal_obs <- rpois(n_times, lambda = baseline)
normal_obs[15:20] <- rpois(6, lambda = 200)  # Anomaly period

# Compute surprise for each time point
surprise_vals <- numeric(n_times)
expected_const <- 1000

for (t in 1:n_times) {
  result_t <- auto_surprise(normal_obs[t], expected_const)
  surprise_vals[t] <- result_t$surprise
}

# Plot with anomaly highlighted
df_plot <- data.frame(
  time = 1:n_times,
  surprise = surprise_vals,
  anomaly = ifelse(1:n_times %in% 15:20, "Anomaly", "Normal")
)

ggplot(df_plot, aes(x = time, y = surprise)) +
  geom_line() +
  geom_point(aes(color = anomaly), size = 3) +
  scale_color_manual(values = c("Normal" = "gray50", "Anomaly" = "red")) +
  geom_hline(yintercept = median(surprise_vals), linetype = "dashed") +
  labs(
    title = "Temporal Surprise with Anomaly Detection",
    x = "Time Period",
    y = "Surprise (bits)"
  ) +
  theme_minimal()
```

## Best Practices

1. **Choose appropriate window sizes**: For rolling analysis, balance responsiveness vs. stability
2. **Use streaming for real-time applications**: `update_surprise()` is efficient for incremental updates
3. **Normalize expected values**: Ensure expected values are on the same scale as observations
4. **Consider seasonality**: For periodic data, compare to same-period baselines
5. **Set thresholds carefully**: Use domain knowledge to determine what surprise level is actionable