## ----setup_1, include = FALSE------------------------------------------------- local <- (Sys.getenv("BUILD_VIGNETTES") == "TRUE") knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval=local, fig.width=6, fig.height=4 ) library(ncdfgeom) if(!require(nhdplusTools)) { warning("need nhdplusTools for this demo") knitr::opts_chunk$set(eval = FALSE) } org_options <- options(scipen = 9999) ## ----------------------------------------------------------------------------- gdptools_weights <- read.csv(system.file("extdata/gdptools_prl_out.csv", package = "ncdfgeom"), colClasses = c("character", "character", "numeric")) gdptools_weights <- dplyr::rename(gdptools_weights, gdptools_wght = wght) gage_id <- "USGS-01482100" basin <- nhdplusTools::get_nldi_basin(list(featureSource = "nwissite", featureId = gage_id)) huc08 <- nhdplusTools::get_huc(id = na.omit(unique(gdptools_weights$huc8)), type = "huc08") huc12 <- nhdplusTools::get_huc(id = na.omit(unique(gdptools_weights$huc12)), type = "huc12") org_par <- par(mar = c(0, 0, 0, 0)) plot(sf::st_as_sfc(sf::st_bbox(huc12))) plot(sf::st_geometry(basin), lwd = 4, add = TRUE) plot(sf::st_simplify(sf::st_geometry(huc08), dTolerance = 500), add = TRUE, lwd = 2) plot(sf::st_simplify(sf::st_geometry(huc12), dTolerance = 500), add = TRUE, lwd = 0.2, border = "grey") par(org_par) weights <- ncdfgeom::calculate_area_intersection_weights( x = sf::st_transform(dplyr::select(huc12, huc12), 6931), y = sf::st_transform(dplyr::select(huc08, huc8), 6931), normalize = TRUE ) # NOTE: normalize = TRUE means these weights can not be used for "intensive" # weighted sums such as population per polygon. See below for more on this. weights <- dplyr::left_join(weights, gdptools_weights, by = c("huc8", "huc12")) ## ----------------------------------------------------------------------------- weights$diff <- weights$w - weights$gdptools_wght # make sure nothing is way out of whack max(weights$diff, na.rm = TRUE) # ensure the weights generally sum as we would expect. sum(weights$gdptools_wght, na.rm = TRUE) sum(weights$w, na.rm = TRUE) length(unique(na.omit(weights$huc8))) # see how many NA values we have in each. sum(is.na(weights$w)) sum(is.na(weights$gdptools_wght)) # look at cases where gptools has NA and ncdfgeom does not weights[is.na(weights$gdptools_wght) & !is.na(weights$w),] ## ----------------------------------------------------------------------------- library(dplyr) library(sf) library(ncdfgeom) g <- list(rbind(c(-1,-1), c(1,-1), c(1,1), c(-1,1), c(-1,-1))) blue1 = sf::st_polygon(g) * 0.8 blue2 = blue1 + c(1, 2) blue3 = blue1 * 1.2 + c(-1, 2) pink1 = sf::st_polygon(g) pink2 = pink1 + 2 pink3 = pink1 + c(-0.2, 2) pink4 = pink1 + c(2.2, 0) blue = sf::st_sfc(blue1,blue2,blue3) pink = sf::st_sfc(pink1, pink2, pink3, pink4) plot(c(blue,pink), border = NA) plot(blue, border = "#648fff", add = TRUE) plot(pink, border = "#dc267f", add = TRUE) blue <- sf::st_sf(blue, data.frame(idblue = c(1, 2, 3))) pink <- sf::st_sf(pink, data.frame(idpink = c(7, 8, 9, 10))) text(sapply(sf::st_geometry(blue), \(x) mean(x[[1]][,1]) + 0.4), sapply(sf::st_geometry(blue), \(x) mean(x[[1]][,2]) + 0.3), blue$idblue, col = "#648fff") text(sapply(sf::st_geometry(pink), \(x) mean(x[[1]][,1]) + 0.4), sapply(sf::st_geometry(pink), \(x) mean(x[[1]][,2])), pink$idpink, col = "#dc267f") sf::st_agr(blue) <- sf::st_agr(pink) <- "constant" sf::st_crs(pink) <- sf::st_crs(blue) <- sf::st_crs(5070) ## ----------------------------------------------------------------------------- (blue_pink_norm_false <- calculate_area_intersection_weights(blue, pink, normalize = FALSE)) ## ----------------------------------------------------------------------------- blue$val = c(30, 10, 20) blue$area_blue <- as.numeric(sf::st_area(blue)) (result <- st_drop_geometry(blue) |> left_join(blue_pink_norm_false, by = "idblue")) ## ----------------------------------------------------------------------------- ((10 * 0.375 * 2.56) + (20 * 0.604167 * 3.6864)) / ((0.375 * 2.56) + (0.604167 * 3.6864)) ## ----------------------------------------------------------------------------- ((10 * 0.375 * 2.56) + (20 * 0.604167 * 3.6864)) + (NA * 1 * 0.81) / ((1 * 0.81) + (0.375 * 2.56) + (0.604167 * 3.6864)) ## ----------------------------------------------------------------------------- ((10 * 0.375) + (20 * 0.604167)) ## ----------------------------------------------------------------------------- (result <- result |> group_by(idpink) |> # group so we get one row per target # now we calculate the value for each `pink` with fraction of the area of each # polygon in `blue` per polygon in `pink` with an equation like this: summarize( new_val_intensive = sum((val * w * area_blue)) / sum(w * area_blue), new_val_extensive = sum(val * w))) ## ----------------------------------------------------------------------------- (blue_pink_norm_true <- calculate_area_intersection_weights(select(blue, idblue), pink, normalize = TRUE)) ## ----------------------------------------------------------------------------- (result <- st_drop_geometry(blue) |> left_join(blue_pink_norm_true, by = "idblue")) ## ----------------------------------------------------------------------------- ((10 * 0.24) + (20 * 0.5568)) / (0.24 + (0.5568)) ## ----------------------------------------------------------------------------- (result <- result |> group_by(idpink) |> # group so we get one row per target # now we calculate the value for each `pink` with fraction of the area of each # polygon in `blue` per polygon in `pink` with an equation like this: summarize( new_val = sum((val * w)) / sum(w))) ## ----echo=FALSE--------------------------------------------------------------- g <- list(rbind(c(-1,-1), c(1,-1), c(1,1), c(-1,1), c(-1,-1))) blue1 = st_polygon(g) * 0.75 + c(-.25, -.25) blue2 = blue1 + 1.5 blue3 = blue1 + c(0, 1.5) a4 = blue1 + c(1.5, 0) pink1 = st_polygon(g) pink2 = pink1 + 2 pink3 = pink1 + c(0, 2) pink4 = pink1 + c(2, 0) blue = st_sfc(blue1,blue2, blue3, a4) pink = st_sfc(pink1, pink2, pink3, pink4) pink <- st_sf(pink, data.frame(idpink = c(1, 2, 3, 4))) blue <- st_sf(blue, data.frame(idblue = c(6, 7, 8, 9))) plot(st_geometry(pink), border = "#dc267f", lwd = 3) plot(st_geometry(blue), border = "#648fff", lwd = 3, add = TRUE) text(sapply(st_geometry(blue), \(x) mean(x[[1]][,1]) + 0.4), sapply(st_geometry(blue), \(x) mean(x[[1]][,2]) + 0.3), blue$idblue, col = "#648fff") text(sapply(st_geometry(pink), \(x) mean(x[[1]][,1]) - 0.4), sapply(st_geometry(pink), \(x) mean(x[[1]][,2]) - 0.5), pink$idpink, col = "#dc267f") st_agr(blue) <- st_agr(pink) <- "constant" st_crs(pink) <- st_crs(blue) <- st_crs(5070) ## ----echo = FALSE------------------------------------------------------------- blue$val <- c(1, 2, 3, 4) blue$blue_areasqkm <- 1.5 ^ 2 plot(blue["val"], reset = FALSE, pal = heat.colors) plot(st_geometry(pink), border = "#dc267f", lwd = 3, add = TRUE, reset = FALSE) plot(st_geometry(blue), border = "#648fff", lwd = 3, add = TRUE) text(sapply(st_geometry(blue), \(x) mean(x[[1]][,1]) + 0.4), sapply(st_geometry(blue), \(x) mean(x[[1]][,2]) + 0.3), blue$idblue, col = "#648fff") text(sapply(st_geometry(pink), \(x) mean(x[[1]][,1]) - 0.4), sapply(st_geometry(pink), \(x) mean(x[[1]][,2]) - 0.5), pink$idpink, col = "#dc267f") ## ----------------------------------------------------------------------------- # say we have data from `blue` that we want sampled to `pink`. # this gives the percent of each `blue` that intersects each `pink` (blue_pink <- calculate_area_intersection_weights( select(blue, idblue), select(pink, idpink), normalize = FALSE)) # NOTE: `w` sums to 1 per `blue` in all cases summarize(group_by(blue_pink, idblue), w = sum(w)) # Since normalize is false, we apply weights like: st_drop_geometry(blue) |> left_join(blue_pink, by = "idblue") |> group_by(idpink) |> # group so we get one row per `pink` # now we calculate the value for each `pink` with fraction of the area of each # polygon in `blue` per polygon in `pink` with an equation like this: summarize( new_val_intensive = sum( (val * w * blue_areasqkm) ) / sum(w * blue_areasqkm), new_val_extensive = sum( (val * w) )) # NOTE: `w` is the fraction of the polygon in `blue`. We need to multiply `w` by the # unique area of the polygon it is associated with to get the weighted mean weight. # we can go in reverse if we had data from `pink` that we want sampled to `blue` (pink_blue <- calculate_area_intersection_weights( select(pink, idpink), select(blue, idblue), normalize = FALSE)) # NOTE: `w` sums to 1 per `pink` (source) only where `pink` is fully covered by `blue` (target). summarize(group_by(pink_blue, idpink), w = sum(w)) # Now let's look at what happens if we set normalize = TRUE. Here we # get `blue` as source and `pink` as target but normalize the weights so # the area of `blue` is built into `w`. (blue_pink <- calculate_area_intersection_weights( select(blue, idblue), select(pink, idpink), normalize = TRUE)) # NOTE: if we summarize by `pink` (target) `w` sums to 1 only where there is full overlap. summarize(group_by(blue_pink, idpink), w = sum(w)) # Since normalize is false, we apply weights like: st_drop_geometry(blue) |> left_join(blue_pink, by = "idblue") |> group_by(idpink) |> # group so we get one row per `pink` # now we weight by the percent of each polygon in `pink` per polygon in `blue` summarize(new_val = sum( (val * w) ) / sum( w )) # NOTE: `w` is the fraction of the polygon from `blue` overlapping the polygon from `pink`. # The area of `blue` is built into the weight so we just sum the with times value per polygon. # We can not calculate intensive weighted sums with this weight. ## ----cleanup, echo=FALSE------------------------------------------------------ options(org_options) unlink("climdiv_prcp.nc")