## ----setup, include = FALSE--------------------------------------------------- options(rmarkdown.html_vignette.check_title = FALSE) knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----packages, eval = FALSE--------------------------------------------------- # library(datamuseum) # # library(tidyr) # library(dplyr) # library(lubridate) # library(stringr) # library(googlesheets4) # # library(ggplot2) # library(maps) # library(rnaturalearth) # library(rnaturalearthdata) # library(sf) # ## ----GBIF 3/30/2026, eval = FALSE--------------------------------------------- # # #Raw Data # # GBIF_raw <- read.csv("GBIF_Octopodoidea_raw.csv") #88256 Observations # # #Trimmed Data # # GBIF_clean <- GBIF_raw[ -c(1:7, 11:15, 24:32, 34:36, 38, 40:50)] # # GBIF_clean <- GBIF_clean[ -c(7, 9)] #88256 Observations # # #Also available on Github as GBIF_Octopodoidea_trim.csv # # #Japan Octopus Data # # GBIF_Japan <- latlong_range(GBIF_clean, "decimalLatitude", "decimalLongitude", # 25, 50, 125, 150, drop_na = TRUE) %>% dplyr::rename( # "Prefecture" = "stateProvince", # "Precise Location" = "locality", "Longitude" = "decimalLongitude", "Latitude" = "decimalLatitude", # "Year" = "year", "Genus" = "genus", "Country" = "countryCode", # "SciName" = "species", "Family" = "family","Source" = "institutionCode") #2145 Observations # # GBIF_Japan <- replace(GBIF_Japan, GBIF_Japan=='', NA) # # GBIF_Japan <- GBIF_Japan %>% # filter(!is.na(Source) & !is.na(Family) & !is.na(Genus) & !is.na(SciName) # & !is.na(Year)) #798 Observations # ## ----InvBase 3/30/2026, eval = FALSE------------------------------------------ # # #Raw Data # # InvBase_raw <- read.csv("InvBase_Octopodoidea_raw.csv") #22608 Observations # # #Trimmed Data # # InvBase_clean <- InvBase_raw[ -c(1, 3:6, 8:13, 15:17, 19, 21:38, 40:62, 64:70, 72, 75:79, 82:103)] #22608 Observations # # #Also available on Github as InvBase_Octopodoidea_trim.csv # # #Japan Octopus Data # # InvBase_Japan <- latlong_range(InvBase_clean, "decimalLatitude", "decimalLongitude", # 25, 50, 125, 150, drop_na = TRUE) %>% dplyr::rename( # "Prefecture" = "stateProvince", "Country" = "country", # "Precise Location" = "county", "Longitude" = "decimalLongitude", "Latitude" = "decimalLatitude", # "Year" = "year", "Genus" = "genus", "Source" = "institutionCode", "Family" = "family" # ) #50 Observations # # InvBase_Japan <- replace(InvBase_Japan, InvBase_Japan=='', NA) # # InvBase_Japan <- InvBase_Japan %>% # filter(!is.na(Source) & !is.na(Family) & !is.na(Genus) & !is.na(specificEpithet) # & !is.na(Year)) #43 Observations # # taxon_column(InvBase_Japan, output = "list") # # taxon_rank(InvBase_Japan, c(Family, Genus, specificEpithet)) # # InvBase_Japan <- taxon_combine(InvBase_Japan, genus = Genus, epithet = specificEpithet, # new_column = "SciName") # # InvBase_Japan <- InvBase_Japan[ -c(5)] # ## ----BISMAL 3/30/2026, eval = FALSE------------------------------------------- # # #Raw Data # # BISMAL_raw <- read.csv("BISMAL_Octopodoidea_raw.csv") #1547 Observations # # #Trimmed Data # # BISMAL_clean <- BISMAL_raw[ -c(1:11, 13:18, 20:21, 23:49, 51:67, 67, 70, # 72:79, 82:99, 100:104, 106:108, 110, 112:116)] #1547 Observations # # #Also available on Github as BISMAL_Octopodoidea_trim.csv # # #Japan Octopus Data # # BISMAL_Japan <- latlong_range(BISMAL_clean, "decimalLatitude", "decimalLongitude", # 25, 50, 125, 150, drop_na = TRUE) %>% dplyr::rename( "Prefecture" = "stateProvince", # "Precise Location" = "locality", "Longitude" = "decimalLongitude", "Latitude" = "decimalLatitude", # "Year" = "year", "Genus" = "genus", "Country" = "country", "Source" = "institutionCode", # "Family" = "family") #1507 Observations # # BISMAL_Japan <- replace(BISMAL_Japan, BISMAL_Japan=='', NA) # # BISMAL_Japan <- BISMAL_Japan %>% # filter(!is.na(Source) & !is.na(Family) & !is.na(Genus) & !is.na(specificEpithet) # & !is.na(Year)) #473 Observations # # taxon_column(BISMAL_Japan, output = "list") # # taxon_rank(BISMAL_Japan, c(Family, Genus, specificEpithet)) # # BISMAL_Japan <- taxon_combine(BISMAL_Japan, genus = Genus, epithet = specificEpithet, # new_column = "SciName") # # BISMAL_Japan <- BISMAL_Japan[ -c(12)] # ## ----OBIS 3/30/2026, eval = FALSE--------------------------------------------- # # #Raw Data # # OBIS_raw <- read.csv("OBIS_Octopodoidea_raw.csv") #58526 Observations # # #Trimmed Data # # OBIS_clean <- OBIS_raw[ -c(0:20, 22:28, 30:39, 41, 44:63, 65:74, 76:100, 102:112, 114:127, 129:203, 205:209, # 211:282)] #58526 Observations # # #Also available on Github as OBIS_Octopodoidea_trim.csv # # #Japan Octopus Data # # OBIS_Japan <- latlong_range(OBIS_clean, "decimalLatitude", "decimalLongitude", # 25, 50, 125, 150, drop_na = TRUE) %>% dplyr::rename( # "Prefecture" = "stateProvince", "Country" = "country", # "Precise Location" = "locality", "Longitude" = "decimalLongitude", "Latitude" = "decimalLatitude", # "Year" = "date_year", "Source" = "institutionCode", "Family" = "family", "Genus" = "genus", # "SciName" = "species") #859 Observations # # OBIS_Japan <- replace(OBIS_Japan, OBIS_Japan=='', NA) # # OBIS_Japan <- OBIS_Japan %>% # filter(!is.na(Source) & !is.na(Family) & !is.na(Genus) & !is.na(SciName) # & !is.na(Year)) #698 Observations # ## ----NSMT 1/19/2024 + 3/11/2024, eval = FALSE--------------------------------- # # #Raw Data # # NSMT_raw <- read.csv("NSMT_Octopodoidea_raw.csv") #870 Observations # # #Trimmed Data # # NSMT_clean <- NSMT_raw[ -c(5, 9:12, 16:18, 22:23)] #870 Observations # # #Also available on Github as NSMT_Octopodoidea_trim.csv # # #Japan Octopus Data # # NSMT_Japan <- latlong_range(NSMT_clean, "Latitude", "Longitude", # 25, 50, 125, 150, drop_na = TRUE) %>% dplyr::rename( # "Prefecture" = "Region", "Precise Location" = "Previse.loc.", # "Source" = "Group.Abb.") #727 Observations # # NSMT_Japan <- replace(NSMT_Japan, NSMT_Japan=='', NA) # # NSMT_Japan <- NSMT_Japan %>% filter(!is.na(Source) & !is.na(Family) & !is.na(Genus) & # !is.na(Species)& !is.na(Year)) #726 Observations # # taxon_column(NSMT_Japan, output = "list") # # taxon_rank(NSMT_Japan, c(Family, Genus, Species)) # # NSMT_Japan <- taxon_combine(NSMT_Japan, genus = Genus, epithet = Species, # new_column = "SciName") # # NSMT_Japan <- NSMT_Japan[ -c(6, 7)] # ## ----museum, eval = FALSE----------------------------------------------------- # # museum <- rbind( # InvBase_Japan %>% mutate(`Data Frame` = "InvBase"), # GBIF_Japan %>% mutate(`Data Frame` = "GBIF"), # NSMT_Japan %>% mutate(`Data Frame` = "NSMT"), # OBIS_Japan %>% mutate(`Data Frame` = "OBIS"), # BISMAL_Japan %>% mutate(`Data Frame` = "BISMAL") # ) #2738 Observations # # museum <- deduplicate(museum, "catalogNumber", # drop_na = TRUE) #143 duplicate rows removed; 608 rows removed due to missing ID. 1987 Observations # # museum_dupes <- attr(museum, "duplicates") #1268 Observations, 143 FALSE # # museum <- duplicate(museum, "individualCount") #Duplication increased count from 1894 Observations to 2671 Observations ## ----museum_taxon, eval = FALSE----------------------------------------------- # # #Taxonomized Japan Octopus Data # # museum_clean <- taxon_cleaner(museum, SciName, in_place = TRUE, drop_na = TRUE) #2260 Observations # # museum_clean <- museum_clean %>% mutate(SciName = case_when( # SciName == "Octopus vulgaris" ~ "Octopus sinensis", # TRUE ~ SciName)) # # museum_valid <- taxon_validate(museum_clean, SciName, update_related = TRUE) # # valid_report <- attr(museum_valid, "validation_report") # # museum_check <- taxon_spellcheck(museum_valid, c(SciName), update = TRUE, # validation_report = valid_report) # # check_report <- attr(museum_check, "spellcheck_report") # # museum_check <- museum_check %>% # mutate(SciName = case_when( # SciName == "Pinnoctopus macropus" ~ "Callistoctopus macropus", # TRUE ~ SciName # )) # # museum_taxon <- taxon_add(museum_check, SciName, rank = c("order", "phylum", "family", "genus"), # author_year = FALSE, sort = FALSE) # # add_report <- attr(museum_taxon, "add_report") # # museum_taxon <- museum_taxon[ -c(3,4)] # # museum_taxon <- museum_taxon %>% # dplyr::rename( # "Order" = "order", "Phylum" = "phylum", # "Family" = "family", "Genus" = "genus") # # museum_taxon <- taxon_sort(museum_taxon) # # museum_taxon <- taxon_cite(museum_taxon, c(Family, Genus, SciName)) # # cite_report <- attr(museum_taxon, "cite_report") # # museum_taxon <- museum_taxon %>% # filter(SciName != "Muusoctopus small in mature") %>% # mutate(Family_cite = case_when( # Family_cite == "Bathypolypodidae" ~ "Bathypolypodidae (G. C. Robson, 1929)", # Family_cite == "Enteroctopodidae" ~ "Enteroctopodidae (Strugnell, M. Norman, Vecchione, Guzik & Allcock, 2014)", # Family_cite == "Megaleledonidae" ~ "Megaleledonidae (Iw. Taki, 1961)", # TRUE ~ Family_cite # )) # # museum_taxon <- italicize(museum_taxon, c(Genus_cite, SciName_cite)) ## ----graph, eval = FALSE, fig.width = 16, fig.height = 10--------------------- # # world_map <- map_data("world") # # japan <- map_data("world", region="japan") # # family_labels <- c( # "Octopodidae (D'Orbigny, 1839-1842 in Férussac and D'Orbigny, 1834-1848)" = "Octopodidae\n(D'Orbigny, 1839-1842\nin Férussac and\nD'Orbigny, 1834-1848)", # "Amphitretidae (Hoyle, 1886)" = "Amphitretidae\n(Hoyle, 1886)", # "Enteroctopodidae (Strugnell, M. Norman, Vecchione, Guzik & Allcock, 2014)" = "Enteroctopodidae\n(Strugnell, M. Norman,\nVecchione, Guzik\n& Allcock, 2014)", # "Bathypolypodidae (G. C. Robson, 1929)" = "Bathypolypodidae\n(G. C. Robson, 1929)", # "Megaleledonidae (Iw. Taki, 1961)" = "Megaleledonidae\n(Iw. Taki, 1961)" # ) # # lon_min <- 125 # lon_max <- 150 # lat_min <- 25 # lat_max <- 50 # # museum_taxon$Longitude <- as.numeric(museum_taxon$Longitude) # museum_taxon$Latitude <- as.numeric(museum_taxon$Latitude) # # genera_per_family <- museum_taxon %>% # select(Genus_cite_italic, Family) %>% # distinct() %>% # count(Family) # # family_palettes <- list( # "Amphitretidae" = sequential_hcl(n = 2, palette = "BrwnYl", l = c(30, 55)), # "Bathypolypodidae" = sequential_hcl(n = 1, palette = "Reds", l = c(45, 45)), # "Enteroctopodidae" = sequential_hcl(n = 3, palette = "Peach"), # "Megaleledonidae" = sequential_hcl(n = 1, palette = "YlOrRd", l = c(55, 55)), # "Octopodidae" = sequential_hcl(n = 7, palette = "Purples", l = c(25, 65)) # ) # # genus_family_map <- museum_taxon %>% # select(Genus_cite_italic, Family) %>% # distinct() %>% # arrange(Family, Genus_cite_italic) # arrange so shades are assigned alphabetically # # genus_colors <- unlist(lapply(names(family_palettes), function(fam) { # genera <- genus_family_map$Genus_cite_italic[genus_family_map$Family == fam] # colors <- family_palettes[[fam]] # setNames(colors, genera) # })) # # # Create ordered breaks grouped by family # genus_order <- museum_taxon %>% # select(Genus_cite_italic, Family, Family_cite) %>% # distinct() %>% # arrange(factor(Family, levels = c("Amphitretidae", "Bathypolypodidae", # "Enteroctopodidae", "Megaleledonidae", # "Octopodidae")), # Genus_cite_italic) # # # Step 1 - build genus_order_with_headers # genus_order_with_headers <- genus_order %>% # group_by(Family) %>% # group_modify(~ { # family_cite_label <- paste0("bold(", .y$Family, ")") # bind_rows( # data.frame( # Genus_cite_italic = family_cite_label, # Family_cite = .x$Family_cite[1] # ), # .x %>% select(Genus_cite_italic, Family_cite) # ) # }) %>% # ungroup() %>% # pull(Genus_cite_italic) # # # Step 2 - add spacer before Megaleledonidae # meg_pos <- which(genus_order_with_headers == "bold(Megaleledonidae)") # genus_order_with_headers <- c( # genus_order_with_headers[1:(meg_pos - 1)], # "' '", # genus_order_with_headers[meg_pos:length(genus_order_with_headers)] # ) # # # Step 3 - build colors # header_colors <- setNames( # rep("#FFFFFF00", 5), # grep("^bold", genus_order_with_headers, value = TRUE) # ) # spacer_color <- setNames("#FFFFFF00", "' '") # genus_colors_final <- c(genus_colors, header_colors, spacer_color) # # # Step 4 - legend overrides # legend_breaks <- intersect(genus_order_with_headers, names(genus_colors_final)) # n <- length(legend_breaks) # spacer_pos <- which(legend_breaks == "' '") # header_pos <- which(grepl("^bold", legend_breaks)) # hide_pos <- c(spacer_pos, header_pos) # # legend_size <- ifelse(seq_along(legend_breaks) %in% hide_pos, 0, 8) # legend_alpha <- ifelse(seq_along(legend_breaks) %in% hide_pos, 0, 1) # # legend_fill <- rep(NA_character_, n) # legend_color <- rep(NA_character_, n) # legend_stroke <- rep(0.5, n) # # legend_fill[hide_pos] <- "transparent" # legend_color[hide_pos] <- "transparent" # legend_stroke[hide_pos] <- 0 # # ggplot(data = world_map, aes(long, lat)) + # geom_polygon(aes(group = group)) + # coord_sf(xlim = c(lon_min - 1, lon_max + 1), ylim = c(lat_min - 1, lat_max + 1), # expand = FALSE) + # geom_point(data = museum_taxon, # aes(x = Longitude, y = Latitude, color = Genus_cite_italic), # size = 3, # position = position_jitter(width = .1, height = .1)) + # labs(x = "Longitude", y = "Latitude", color = "Genus") + # scale_colour_manual( # values = genus_colors_final, # breaks = genus_order_with_headers, # limits = names(genus_colors_final), # labels = function(x) lapply(x, function(i) parse(text = i)) # ) + # guides( # color = guide_legend( # override.aes = list( # size = legend_size, # alpha = legend_alpha), ncol = 4 # )) + # theme( # plot.margin = margin(t = .05, r = 1, b = .05, l = 1, unit = "mm"), # legend.key = element_rect(fill = "transparent", color = "transparent"), # legend.position = "bottom", # legend.title.position = "top", # legend.spacing.y = unit(0.25, "mm"), # legend.spacing.x = unit(1, "mm"), # legend.title = element_text(size = 16), # legend.text = element_text(size = 14), # strip.text = element_text(size = 16), # axis.title = element_text(size = 20), # axis.text.x = element_text(size = 16, angle = 60, hjust = 1), # axis.text.y = element_text(size = 16) # ) + # facet_wrap(~Family_cite, nrow = 1, labeller = as_labeller(family_labels)) # # ggsave("octopodoidea_japan.png", width = 16, height = 7.5, units = "in", dpi = 450) # ## ----figure, echo = FALSE, out.width = "100%", fig.cap = "Octopodoidea occurrences in Japan by family and genus."---- knitr::include_graphics("octopodoidea_japan.png")