## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, fig.alt = "Spatial map of Bayesian surprise values for an sf object." ) ## ----setup-------------------------------------------------------------------- library(bayesiansurpriser) library(sf) library(ggplot2) ## ----load-data---------------------------------------------------------------- # Load the NC SIDS dataset nc <- st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE) # Examine the data head(nc[, c("NAME", "SID74", "BIR74", "SID79", "BIR79")]) ## ----compute------------------------------------------------------------------ result <- surprise(nc, observed = SID74, expected = BIR74) # Result is an sf object with surprise columns added class(result) names(result)[grepl("surprise", names(result))] ## ----st-surprise-------------------------------------------------------------- result2 <- st_surprise(nc, observed = SID74, expected = BIR74) # Identical results all.equal(result$surprise, result2$surprise) ## ----extract------------------------------------------------------------------ # Get surprise values surprise_vals <- get_surprise(result, "surprise") head(surprise_vals) # Get signed surprise values signed_vals <- get_surprise(result, "signed") head(signed_vals) ## ----model-space-------------------------------------------------------------- mspace <- get_model_space(result) print(mspace) ## ----sf-ops------------------------------------------------------------------- # Filter high-surprise regions high_surprise <- result[result$surprise > median(result$surprise), ] cat("Regions with above-median surprise:", nrow(high_surprise), "\n") # Compute centroids centroids <- st_centroid(result) ## ----ggplot-basic------------------------------------------------------------- ggplot(result) + geom_sf(aes(fill = surprise)) + scale_fill_surprise() + labs(title = "Bayesian Surprise: NC SIDS 1974") ## ----ggplot-signed------------------------------------------------------------ ggplot(result) + geom_sf(aes(fill = signed_surprise)) + scale_fill_surprise_diverging() + labs(title = "Signed Surprise", subtitle = "Red = over-representation, Blue = under-representation") ## ----compare------------------------------------------------------------------ # Compute surprise for two time periods result_74 <- surprise(nc, observed = SID74, expected = BIR74) result_79 <- surprise(nc, observed = SID79, expected = BIR79) # Compare nc$surprise_74 <- result_74$surprise nc$surprise_79 <- result_79$surprise nc$surprise_change <- nc$surprise_79 - nc$surprise_74 # Plot change ggplot(nc) + geom_sf(aes(fill = surprise_change)) + scale_fill_gradient2(low = "blue", mid = "white", high = "red", name = "Change in\nSurprise") + labs(title = "Change in Surprise: 1974 to 1979") ## ----custom------------------------------------------------------------------- # Create sophisticated model space custom_space <- model_space( bs_model_uniform(), bs_model_baserate(nc$BIR74), bs_model_gaussian(), bs_model_funnel(nc$BIR74), prior = c(0.1, 0.3, 0.2, 0.4) ) result_custom <- surprise(nc, observed = SID74, expected = BIR74, models = custom_space) # Compute a global posterior model update explicitly if needed updated_space <- bayesian_update(custom_space, nc$SID74) cat("Prior:", custom_space$prior, "\n") cat("Posterior:", updated_space$posterior, "\n") ## ----dplyr, eval=FALSE-------------------------------------------------------- # library(dplyr) # # # Pipe-friendly workflow # nc %>% # st_surprise(observed = SID74, expected = BIR74) %>% # filter(surprise > 0.5) %>% # select(NAME, surprise, signed_surprise, geometry) ## ----large-data, eval=FALSE--------------------------------------------------- # # For large spatial datasets # result <- surprise(large_sf, # observed = cases, # expected = population, # models = model_space( # bs_model_uniform(), # bs_model_baserate(large_sf$population) # ))