# ============================================================================ # Setup: Libraries # ============================================================================ require(shinyjs) library(shiny) library(shinydashboard) library(leaflet) library(mapboxapi) library(tidyverse) library(tidycensus) library(sf) library(DT) library(RColorBrewer) library(terra) library(data.table) library(mapview) library(sjPlot) library(sjlabelled) library(bslib) library(shinycssloaders) library(glue) # ============================================================================ # Setup: HuggingFace base URL and cache directory # ============================================================================ HF_BASE <- "https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main" # Use data/cached/ when running locally (writable), otherwise fall back to # /tmp/sf_biodiv_cache/ for read-only environments like HuggingFace Spaces. cache_dir <- if (file.access(".", mode = 2) == 0) "data/cached" else "/tmp/sf_biodiv_cache" dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE) # Helper: if the file already exists in data/cached/, return that path. # Otherwise attempt to download from HuggingFace into data/cached/. # Returns the destination path regardless — caller must check file.exists() if # the download may fail (e.g. file not yet uploaded to HF). hf_or_local <- function(filename) { dest <- file.path(cache_dir, filename) if (!file.exists(dest)) { tryCatch( download.file(glue::glue("{HF_BASE}/{filename}"), dest, mode = "wb", quiet = TRUE), error = function(e) warning(glue::glue("HuggingFace download failed for {filename}: {e$message}")), warning = function(w) warning(glue::glue("HuggingFace download warning for {filename}: {w$message}")) ) } dest } message("[setup_unified] loading greenspace polygons (cache / HuggingFace)…") # ============================================================================ # Load Data: Greenspace (OSM polygons) # ============================================================================ # Shapefile bundle on HuggingFace — download sidecars into cache if needed. greenspace_shp <- file.path(cache_dir, "greenspaces_osm_nad83.shp") if (!file.exists(greenspace_shp)) { for (ext in c("shp", "dbf", "prj", "shx")) { hf_or_local(glue::glue("greenspaces_osm_nad83.{ext}")) } } osm_greenspace <- st_read(greenspace_shp, quiet = TRUE) |> st_transform(4326) if (!"name" %in% names(osm_greenspace)) osm_greenspace$name <- "Unnamed Greenspace" message("[setup_unified] loading greenspace distance rasters + NDVI…") # ============================================================================ # Load Data: Greenspace distance rasters # ============================================================================ greenspace_dist_raster <- terra::rast(hf_or_local("nearest_greenspace_dist.tif")) greenspace_osmid_raster <- terra::rast(hf_or_local("nearest_greenspace_osmid.tif")) rsfprogram_dist_raster <- terra::rast(hf_or_local("nearest_rsfprogram_dist.tif")) rsfprogram_id_raster <- terra::rast(hf_or_local("nearest_rsfprogram_id.tif")) # ============================================================================ # Load Data: NDVI raster # ============================================================================ ndvi <- terra::rast(hf_or_local("SF_EastBay_NDVI_Sentinel_10.tif")) message("[setup_unified] loading GBIF parquet + CBG polygons…") # ============================================================================ # Load Data: GBIF observations (parquet, queried via DuckDB in server) # ============================================================================ gbif_parquet <- hf_or_local("gbif_census_ndvi_anno.parquet") # ============================================================================ # Load Data: Census block groups (CBG) # ============================================================================ load(hf_or_local("cbg_vect_sf.Rdata")) if (!"unique_species" %in% names(cbg_vect_sf)) cbg_vect_sf$unique_species <- cbg_vect_sf$n_species if (!"n_observations" %in% names(cbg_vect_sf)) cbg_vect_sf$n_observations <- cbg_vect_sf$n if (!"median_inc" %in% names(cbg_vect_sf)) cbg_vect_sf$median_inc <- cbg_vect_sf$medincE if (!"ndvi_mean" %in% names(cbg_vect_sf)) cbg_vect_sf$ndvi_mean <- cbg_vect_sf$ndvi_sentinel message("[setup_unified] computing CBG × greenspace overlap (vector intersect)…") # ============================================================================ # Per-CBG greenspace overlap (computed here; no separate CSV on HuggingFace) # ============================================================================ cbg_proj <- st_transform(cbg_vect_sf[, "GEOID"], 3857) |> mutate(cbg_area_m2 = as.numeric(st_area(geometry))) gs_proj <- st_transform(osm_greenspace, 3857) |> st_make_valid() gs_union <- st_union(gs_proj) cbg_gs_inter <- st_intersection(cbg_proj, gs_union) cbg_greenspace_coverage <- cbg_gs_inter |> mutate(greenspace_m2 = as.numeric(st_area(geometry))) |> st_drop_geometry() |> group_by(GEOID) |> summarise(greenspace_m2 = sum(greenspace_m2), .groups = "drop") |> right_join(cbg_proj |> st_drop_geometry() |> dplyr::select(GEOID, cbg_area_m2), by = "GEOID") |> mutate( greenspace_m2 = tidyr::replace_na(greenspace_m2, 0), GEOID = as.character(GEOID) ) message("[setup_unified] loading biodiversity hotspots / coldspots…") # ============================================================================ # Load Data: Biodiversity hotspots / coldspots # ============================================================================ hotspots_shp <- file.path(cache_dir, "hotspots.shp") if (!file.exists(hotspots_shp)) { for (ext in c("shp", "dbf", "prj", "shx")) hf_or_local(glue::glue("hotspots.{ext}")) } biodiv_hotspots <- st_read(hotspots_shp, quiet = TRUE) |> st_transform(4326) coldspots_shp <- file.path(cache_dir, "coldspots.shp") if (!file.exists(coldspots_shp)) { for (ext in c("shp", "dbf", "prj", "shx")) hf_or_local(glue::glue("coldspots.{ext}")) } biodiv_coldspots <- st_read(coldspots_shp, quiet = TRUE) |> st_transform(4326) message("[setup_unified] loading RSF, CalEnviroScreen, SF EJ layers…") # ============================================================================ # Load Data: RSF Program Projects # ============================================================================ rsf_projects <- st_read(hf_or_local("RSF_Program_Projects_polygons.gpkg"), quiet = TRUE) |> st_transform(4326) # ============================================================================ # Load Data: CalEnviroScreen 4.0 (pre-filtered to SF) # ============================================================================ cenv_sf <- tryCatch({ sf::st_read(hf_or_local("calenviro_sf.gpkg"), quiet = TRUE) }, error = function(e) { warning("CalEnviroScreen failed to load: ", e$message); NULL }) # ============================================================================ # Load Data: SF Environmental Justice Communities # ============================================================================ sf_ej_sf <- tryCatch({ sf::st_read(hf_or_local("sf_ej_communities_map.gpkg"), quiet = TRUE) |> dplyr::mutate( symbol_hex = stringr::str_split(symbol_rgb, ",\\s*") |> lapply(function(x) sprintf("#%02X%02X%02X", as.integer(x[1]), as.integer(x[2]), as.integer(x[3]))) |> unlist(), ej_label = dplyr::case_when( is.na(score) ~ "Not EJ", score >= 21 ~ "High EJ burden (21-30)", score >= 11 ~ "Moderate EJ burden (11-20)", score >= 1 ~ "Low EJ burden (1-10)", score == 0 ~ "Score 0", TRUE ~ "Unknown" ) ) }, error = function(e) { warning("SF EJ layer failed to load: ", e$message); NULL }) message("[setup_unified] loading GTFS (zip, stops, shapes, timetable, headways)…") # ============================================================================ # Load Data: GTFS (SF Muni) # ============================================================================ gtfs_zip_path <- hf_or_local("sf_muni_gtfs.zip") # Unzip for read.csv(stops.txt, …); tidytransit/gtfsrouter read the .zip (gtfsio needs a zip path) gtfs_unzip_dir <- file.path(cache_dir, "muni_gtfs") dir.create(gtfs_unzip_dir, recursive = TRUE, showWarnings = FALSE) if (!dir.exists(gtfs_unzip_dir) || length(list.files(gtfs_unzip_dir, pattern = "\\.txt$")) == 0L) { unzip(gtfs_zip_path, exdir = gtfs_unzip_dir, overwrite = TRUE) } gtfs_path <- gtfs_unzip_dir # --- Transit stops ----------------------------------------------------------- gtfs_stops_sf <- tryCatch({ read.csv(file.path(gtfs_path, "stops.txt")) |> st_as_sf(coords = c("stop_lon", "stop_lat"), crs = 4326) }, error = function(e) { warning("GTFS stops failed to load: ", e$message); NULL }) # --- Route shapes ------------------------------------------------------------ gtfs_routes_sf <- tryCatch({ gtfs_shapes_raw <- read.csv(file.path(gtfs_path, "shapes.txt")) gtfs_trips_raw <- read.csv(file.path(gtfs_path, "trips.txt")) gtfs_routes_raw <- read.csv(file.path(gtfs_path, "routes.txt")) shape_route_map <- gtfs_trips_raw |> distinct(shape_id, route_id) route_meta <- gtfs_routes_raw |> select(route_id, route_short_name, route_long_name, route_color) |> mutate(route_color_hex = paste0("#", trimws(route_color))) shapes_split <- gtfs_shapes_raw |> arrange(shape_id, shape_pt_sequence) |> group_by(shape_id) |> group_split() shape_geoms <- lapply(shapes_split, function(s) { st_linestring(cbind(s$shape_pt_lon, s$shape_pt_lat)) }) st_sf( shape_id = sapply(shapes_split, function(s) s$shape_id[1]), geometry = st_sfc(shape_geoms, crs = 4326) ) |> left_join(shape_route_map, by = "shape_id") |> left_join(route_meta, by = "route_id") }, error = function(e) { warning("GTFS route shapes failed to load: ", e$message); NULL }) # --- gtfsrouter timetable ---------------------------------------------------- gtfs_router <- tryCatch({ timetable_path <- hf_or_local("gtfs_timetable_monday.rds") if (file.exists(timetable_path)) { readRDS(timetable_path) } else { gr <- gtfsrouter::extract_gtfs(gtfs_zip_path) result <- gtfsrouter::gtfs_timetable(gr, day = "Monday") saveRDS(result, file.path(cache_dir, "gtfs_timetable_monday.rds")) result } }, error = function(e) { warning("gtfsrouter failed to initialise: ", e$message); NULL }) # --- Pre-computed transit isochrone cache ------------------------------------ transit_iso_cache <- tryCatch({ p <- file.path(cache_dir, "transit_iso_cache.rds") if (file.exists(p)) readRDS(p) else NULL }, error = function(e) { NULL }) # --- Stop headways (AM peak 7-9am): cached as CSV (readable / diffable) ------- # gtfsrouter timetable stays .rds (opaque R object); this table is just columns. hw_csv <- file.path(cache_dir, "gtfs_stop_headways.csv") hw_rds <- file.path(cache_dir, "gtfs_stop_headways.rds") if (!file.exists(hw_csv) && file.exists(hw_rds)) { readRDS(hw_rds) |> readr::write_csv(hw_csv) } gtfs_stop_headways <- tryCatch({ headways_path <- hf_or_local("gtfs_stop_headways.csv") if (file.exists(headways_path)) { readr::read_csv(headways_path, show_col_types = FALSE) |> mutate(stop_id = as.character(stop_id)) } else { gt <- tidytransit::read_gtfs(gtfs_zip_path) hw <- tidytransit::get_stop_frequency(gt, start_time = 7 * 3600, end_time = 9 * 3600) |> group_by(stop_id) |> summarise( mean_headway_min = mean(mean_headway, na.rm = TRUE) / 60, n_departures_peak = sum(n_departures, na.rm = TRUE), .groups = "drop" ) |> mutate(stop_id = as.character(stop_id)) readr::write_csv(hw, hw_csv) hw } }, error = function(e) { warning("tidytransit headway computation failed: ", e$message); NULL }) if (!is.null(gtfs_stop_headways) && !is.null(gtfs_stops_sf)) { gtfs_stops_sf <- gtfs_stops_sf |> mutate(stop_id = as.character(stop_id)) |> left_join(gtfs_stop_headways, by = "stop_id") } message("[setup_unified] data load complete.")