## ------------------------------------------------------------------------ #- an introduction example mm <- matrix( data = c( 1, 5, 8, 5, 8, 3, 13, 3, 13, 3, 2, 2, 21, 2, 21 ), nrow = 3, ncol = 5, byrow = TRUE, dimnames = list( paste0("s", 1:3), paste0("t", 1:5) )) mm ## ----message=FALSE, results='hide'--------------------------------------- #- a zip find example library(skm) # also load data.table, magrittr and Rcpp #- create source - target - population weighted distance matrix #- source dsrc <- skm::zip2012[zip %in% skm::source_zip_list, .(s = zip, s_lat = lat, s_lng = lng)] #- target ddst <- skm::zip2012[ , .(t = zip, t_lat = lat, t_lng = lng, p_pop = p_pop)] #- create source - target - population weighted distance data.table #- CJ.data.table extends CJ from data.table to cross join data.table - see yg::CJ.dt CJ.data.table <- function(X, Y) { stopifnot(is.data.table(X), is.data.table(Y)) k = NULL X = X[, c(k = 1L, .SD)] setkey(X, k) Y = Y[, c(k = 1L, .SD)] setkey(Y, NULL) return( X[Y, allow.cartesian=TRUE][, k := NULL][] ) } ddzt <- CJ.data.table(dsrc, ddst) %>% setorder(s, t) %>% .[ , `:=`(d = skm::dist_wlatlng(s_lat, s_lng, t_lat, t_lng), w = p_pop)] #- ddzt has 1,471,044 row each with source zip in s, target zip in t, #- and population weighted distance in d #- convert source - target - population weighted distance data.table into matrix dmtx <- ddzt %>% .[ , `:=`( wd = d * w )] %>% data.table::dcast( s ~ t, value.var = "wd" ) sname <- dmtx[["s"]] set(dmtx, i = NULL, j = 1L, value = NULL) # dmtx[ , `:=`(s = NULL)] tname <- names(dmtx) dmtx <- as.matrix(dmtx) rownames(dmtx) <- sname colnames(dmtx) <- tname ## ------------------------------------------------------------------------ round(dmtx[1L:4L, 1L:10L] * 10L^4L, 4L) ## ------------------------------------------------------------------------ #- select 10 from 51 candidate locations to provide service for 28,444 houses #- with the objective of minimizing the population weighted average distances #- note that the row indicies are indexed from 0 following the cpp convention m0 <- skm::skm_sgl_cpp( x = dmtx, s_init = c(0L, 1L, 2L, 3L, 5L, 8L, 13L, 21L, 34L, 4L), s_must = integer(0), max_it = 1000L ) m0$o # objective: sum of column minimial m0$s # selected source: row index set indexed from 0 so c(0L, 4L) implies rownames(dmtx)[c(1L, 5L)] = c("02124", "05452") sname[m0$s + 1L] #- must have candidate locations rownames(dmtx)[c(5L, 11L)] = c("05452", "20011") in the solution m1 <- skm::skm_sgl_cpp( x = dmtx, s_init = c(0L, 1L, 2L, 3L, 5L, 8L, 13L, 21L, 34L, 4L), s_must = c(4L, 10L), max_it = 1000L ) m1$o sname[m1$s + 1L] ## ------------------------------------------------------------------------ m2 <- skm::skm_rgi_cpp( x = dmtx, k = 10L, s_must = integer(0), max_it = 1000L ) m2$o sname[m2$s + 1L] ## ------------------------------------------------------------------------ #- g is important when the size of s is large #- g should be an integer vector same length as s, value indicate groups gname <- sname %>% vapply(substr, "", 1L, 1L) %>% unname %>% as.integer() m3 <- skm::skm_rgs_cpp( x = dmtx, k = 10L, g = gname, s_must = integer(0), max_it = 1000L ) m3$o sname[m3$s + 1L] ## ------------------------------------------------------------------------ #- multiple random initial points to overcome local optimial issue m4 <- skm::skm_mlp_cpp( x = dmtx, k = 10L, s_must = integer(0), max_it = 1000L, max_at = 4L ) m4$o sname[m4$s + 1L] m4$o_list # optimial objective find with each of the 4 random initial points #- str(m4$s_list) is num [1:4, 1:10] but not a matrix? apply(m4$s_list, 2L, function(s) { sname[s + 1L] }) # selected sources obtained with each of the 4 random initial points ## ------------------------------------------------------------------------ #- multiple random initial points initialized using stratified sampling w.r.t g m5 <- skm::skm_mls_cpp( x = dmtx, k = 10L, g = gname, s_must = integer(0), max_it = 1000L, max_at = 4L ) m5$o sname[m5$s + 1L] m5$o_list apply(m5$s_list, 2L, function(s) { sname[s + 1L] }) ## ------------------------------------------------------------------------ s_ggrp <- data.table(s = sname, g = gname) s_ggrp[c(5L, 8L, 13L, 21L, 34L)] ## ----message=FALSE------------------------------------------------------- # ddzt <- ddzt[ , .(s, t, d, w)] m6 <- skm::skm_mls( x = ddzt, k = 1L:10L, s_colname = "s", t_colname = "t", d_colname = "d", w_colname = "w", s_ggrp = integer(0L), s_must = integer(0L), max_it = 1000L, max_at = 1000L, auto_create_ggrp = TRUE, extra_immaculatism = TRUE, extra_at = 200L ) m6 ## ----message=FALSE------------------------------------------------------- m7 <- skm::skm_gdp_cpp( x = dmtx, k = 10L ) m7 # obtain the objective population weighted average distance from the row index vector m7 sum(apply(dmtx[m7 + 1L, ], 2L, min))