Spaces:
Sleeping
Sleeping
| # ============================================================================ | |
| # Setup: Libraries | |
| # ============================================================================ | |
| require(shinyjs) | |
| library(shiny) | |
| library(shinydashboard) | |
| library(leaflet) | |
| library(mapboxapi) | |
| library(tidyverse) | |
| library(tidycensus) | |
| library(sf) | |
| library(DT) | |
| library(RColorBrewer) | |
| library(terra) | |
| library(data.table) | |
| library(mapview) | |
| library(sjPlot) | |
| library(sjlabelled) | |
| library(bslib) | |
| library(shinycssloaders) | |
| library(glue) | |
| # ============================================================================ | |
| # Setup: HuggingFace base URL and cache directory | |
| # ============================================================================ | |
| HF_BASE <- "https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main" | |
| # Use data/cached/ when running locally (writable), otherwise fall back to | |
| # /tmp/sf_biodiv_cache/ for read-only environments like HuggingFace Spaces. | |
| cache_dir <- if (file.access(".", mode = 2) == 0) "data/cached" else "/tmp/sf_biodiv_cache" | |
| dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE) | |
| # Helper: if the file already exists in data/cached/, return that path. | |
| # Otherwise attempt to download from HuggingFace into data/cached/. | |
| # Returns the destination path regardless — caller must check file.exists() if | |
| # the download may fail (e.g. file not yet uploaded to HF). | |
| hf_or_local <- function(filename) { | |
| dest <- file.path(cache_dir, filename) | |
| if (!file.exists(dest)) { | |
| tryCatch( | |
| download.file(glue::glue("{HF_BASE}/{filename}"), dest, mode = "wb", quiet = TRUE), | |
| error = function(e) warning(glue::glue("HuggingFace download failed for {filename}: {e$message}")), | |
| warning = function(w) warning(glue::glue("HuggingFace download warning for {filename}: {w$message}")) | |
| ) | |
| } | |
| dest | |
| } | |
| message("[setup_unified] loading greenspace polygons (cache / HuggingFace)…") | |
| # ============================================================================ | |
| # Load Data: Greenspace (OSM polygons) | |
| # ============================================================================ | |
| # Shapefile bundle on HuggingFace — download sidecars into cache if needed. | |
| greenspace_shp <- file.path(cache_dir, "greenspaces_osm_nad83.shp") | |
| if (!file.exists(greenspace_shp)) { | |
| for (ext in c("shp", "dbf", "prj", "shx")) { | |
| hf_or_local(glue::glue("greenspaces_osm_nad83.{ext}")) | |
| } | |
| } | |
| osm_greenspace <- st_read(greenspace_shp, quiet = TRUE) |> st_transform(4326) | |
| if (!"name" %in% names(osm_greenspace)) osm_greenspace$name <- "Unnamed Greenspace" | |
| message("[setup_unified] loading greenspace distance rasters + NDVI…") | |
| # ============================================================================ | |
| # Load Data: Greenspace distance rasters | |
| # ============================================================================ | |
| greenspace_dist_raster <- terra::rast(hf_or_local("nearest_greenspace_dist.tif")) | |
| greenspace_osmid_raster <- terra::rast(hf_or_local("nearest_greenspace_osmid.tif")) | |
| rsfprogram_dist_raster <- terra::rast(hf_or_local("nearest_rsfprogram_dist.tif")) | |
| rsfprogram_id_raster <- terra::rast(hf_or_local("nearest_rsfprogram_id.tif")) | |
| # ============================================================================ | |
| # Load Data: NDVI raster | |
| # ============================================================================ | |
| ndvi <- terra::rast(hf_or_local("SF_EastBay_NDVI_Sentinel_10.tif")) | |
| message("[setup_unified] loading GBIF parquet + CBG polygons…") | |
| # ============================================================================ | |
| # Load Data: GBIF observations (parquet, queried via DuckDB in server) | |
| # ============================================================================ | |
| gbif_parquet <- hf_or_local("gbif_census_ndvi_anno.parquet") | |
| # ============================================================================ | |
| # Load Data: Census block groups (CBG) | |
| # ============================================================================ | |
| load(hf_or_local("cbg_vect_sf.Rdata")) | |
| if (!"unique_species" %in% names(cbg_vect_sf)) cbg_vect_sf$unique_species <- cbg_vect_sf$n_species | |
| if (!"n_observations" %in% names(cbg_vect_sf)) cbg_vect_sf$n_observations <- cbg_vect_sf$n | |
| if (!"median_inc" %in% names(cbg_vect_sf)) cbg_vect_sf$median_inc <- cbg_vect_sf$medincE | |
| if (!"ndvi_mean" %in% names(cbg_vect_sf)) cbg_vect_sf$ndvi_mean <- cbg_vect_sf$ndvi_sentinel | |
| message("[setup_unified] computing CBG × greenspace overlap (vector intersect)…") | |
| # ============================================================================ | |
| # Per-CBG greenspace overlap (computed here; no separate CSV on HuggingFace) | |
| # ============================================================================ | |
| cbg_proj <- st_transform(cbg_vect_sf[, "GEOID"], 3857) |> | |
| mutate(cbg_area_m2 = as.numeric(st_area(geometry))) | |
| gs_proj <- st_transform(osm_greenspace, 3857) |> st_make_valid() | |
| gs_union <- st_union(gs_proj) | |
| cbg_gs_inter <- st_intersection(cbg_proj, gs_union) | |
| cbg_greenspace_coverage <- cbg_gs_inter |> | |
| mutate(greenspace_m2 = as.numeric(st_area(geometry))) |> | |
| st_drop_geometry() |> | |
| group_by(GEOID) |> | |
| summarise(greenspace_m2 = sum(greenspace_m2), .groups = "drop") |> | |
| right_join(cbg_proj |> st_drop_geometry() |> dplyr::select(GEOID, cbg_area_m2), by = "GEOID") |> | |
| mutate( | |
| greenspace_m2 = tidyr::replace_na(greenspace_m2, 0), | |
| GEOID = as.character(GEOID) | |
| ) | |
| message("[setup_unified] loading biodiversity hotspots / coldspots…") | |
| # ============================================================================ | |
| # Load Data: Biodiversity hotspots / coldspots | |
| # ============================================================================ | |
| hotspots_shp <- file.path(cache_dir, "hotspots.shp") | |
| if (!file.exists(hotspots_shp)) { | |
| for (ext in c("shp", "dbf", "prj", "shx")) hf_or_local(glue::glue("hotspots.{ext}")) | |
| } | |
| biodiv_hotspots <- st_read(hotspots_shp, quiet = TRUE) |> st_transform(4326) | |
| coldspots_shp <- file.path(cache_dir, "coldspots.shp") | |
| if (!file.exists(coldspots_shp)) { | |
| for (ext in c("shp", "dbf", "prj", "shx")) hf_or_local(glue::glue("coldspots.{ext}")) | |
| } | |
| biodiv_coldspots <- st_read(coldspots_shp, quiet = TRUE) |> st_transform(4326) | |
| message("[setup_unified] loading RSF, CalEnviroScreen, SF EJ layers…") | |
| # ============================================================================ | |
| # Load Data: RSF Program Projects | |
| # ============================================================================ | |
| rsf_projects <- st_read(hf_or_local("RSF_Program_Projects_polygons.gpkg"), quiet = TRUE) |> | |
| st_transform(4326) | |
| # ============================================================================ | |
| # Load Data: CalEnviroScreen 4.0 (pre-filtered to SF) | |
| # ============================================================================ | |
| cenv_sf <- tryCatch({ | |
| sf::st_read(hf_or_local("calenviro_sf.gpkg"), quiet = TRUE) | |
| }, error = function(e) { | |
| warning("CalEnviroScreen failed to load: ", e$message); NULL | |
| }) | |
| # ============================================================================ | |
| # Load Data: SF Environmental Justice Communities | |
| # ============================================================================ | |
| sf_ej_sf <- tryCatch({ | |
| sf::st_read(hf_or_local("sf_ej_communities_map.gpkg"), quiet = TRUE) |> | |
| dplyr::mutate( | |
| symbol_hex = stringr::str_split(symbol_rgb, ",\\s*") |> | |
| lapply(function(x) sprintf("#%02X%02X%02X", | |
| as.integer(x[1]), as.integer(x[2]), as.integer(x[3]))) |> | |
| unlist(), | |
| ej_label = dplyr::case_when( | |
| is.na(score) ~ "Not EJ", | |
| score >= 21 ~ "High EJ burden (21-30)", | |
| score >= 11 ~ "Moderate EJ burden (11-20)", | |
| score >= 1 ~ "Low EJ burden (1-10)", | |
| score == 0 ~ "Score 0", | |
| TRUE ~ "Unknown" | |
| ) | |
| ) | |
| }, error = function(e) { | |
| warning("SF EJ layer failed to load: ", e$message); NULL | |
| }) | |
| message("[setup_unified] loading GTFS (zip, stops, shapes, timetable, headways)…") | |
| # ============================================================================ | |
| # Load Data: GTFS (SF Muni) | |
| # ============================================================================ | |
| gtfs_zip_path <- hf_or_local("sf_muni_gtfs.zip") | |
| # Unzip for read.csv(stops.txt, …); tidytransit/gtfsrouter read the .zip (gtfsio needs a zip path) | |
| gtfs_unzip_dir <- file.path(cache_dir, "muni_gtfs") | |
| dir.create(gtfs_unzip_dir, recursive = TRUE, showWarnings = FALSE) | |
| if (!dir.exists(gtfs_unzip_dir) || length(list.files(gtfs_unzip_dir, pattern = "\\.txt$")) == 0L) { | |
| unzip(gtfs_zip_path, exdir = gtfs_unzip_dir, overwrite = TRUE) | |
| } | |
| gtfs_path <- gtfs_unzip_dir | |
| # --- Transit stops ----------------------------------------------------------- | |
| gtfs_stops_sf <- tryCatch({ | |
| read.csv(file.path(gtfs_path, "stops.txt")) |> | |
| st_as_sf(coords = c("stop_lon", "stop_lat"), crs = 4326) | |
| }, error = function(e) { warning("GTFS stops failed to load: ", e$message); NULL }) | |
| # --- Route shapes ------------------------------------------------------------ | |
| gtfs_routes_sf <- tryCatch({ | |
| gtfs_shapes_raw <- read.csv(file.path(gtfs_path, "shapes.txt")) | |
| gtfs_trips_raw <- read.csv(file.path(gtfs_path, "trips.txt")) | |
| gtfs_routes_raw <- read.csv(file.path(gtfs_path, "routes.txt")) | |
| shape_route_map <- gtfs_trips_raw |> distinct(shape_id, route_id) | |
| route_meta <- gtfs_routes_raw |> | |
| select(route_id, route_short_name, route_long_name, route_color) |> | |
| mutate(route_color_hex = paste0("#", trimws(route_color))) | |
| shapes_split <- gtfs_shapes_raw |> | |
| arrange(shape_id, shape_pt_sequence) |> | |
| group_by(shape_id) |> | |
| group_split() | |
| shape_geoms <- lapply(shapes_split, function(s) { | |
| st_linestring(cbind(s$shape_pt_lon, s$shape_pt_lat)) | |
| }) | |
| st_sf( | |
| shape_id = sapply(shapes_split, function(s) s$shape_id[1]), | |
| geometry = st_sfc(shape_geoms, crs = 4326) | |
| ) |> | |
| left_join(shape_route_map, by = "shape_id") |> | |
| left_join(route_meta, by = "route_id") | |
| }, error = function(e) { warning("GTFS route shapes failed to load: ", e$message); NULL }) | |
| # --- gtfsrouter timetable ---------------------------------------------------- | |
| gtfs_router <- tryCatch({ | |
| timetable_path <- hf_or_local("gtfs_timetable_monday.rds") | |
| if (file.exists(timetable_path)) { | |
| readRDS(timetable_path) | |
| } else { | |
| gr <- gtfsrouter::extract_gtfs(gtfs_zip_path) | |
| result <- gtfsrouter::gtfs_timetable(gr, day = "Monday") | |
| saveRDS(result, file.path(cache_dir, "gtfs_timetable_monday.rds")) | |
| result | |
| } | |
| }, error = function(e) { warning("gtfsrouter failed to initialise: ", e$message); NULL }) | |
| # --- Pre-computed transit isochrone cache ------------------------------------ | |
| transit_iso_cache <- tryCatch({ | |
| p <- file.path(cache_dir, "transit_iso_cache.rds") | |
| if (file.exists(p)) readRDS(p) else NULL | |
| }, error = function(e) { NULL }) | |
| # --- Stop headways (AM peak 7-9am): cached as CSV (readable / diffable) ------- | |
| # gtfsrouter timetable stays .rds (opaque R object); this table is just columns. | |
| hw_csv <- file.path(cache_dir, "gtfs_stop_headways.csv") | |
| hw_rds <- file.path(cache_dir, "gtfs_stop_headways.rds") | |
| if (!file.exists(hw_csv) && file.exists(hw_rds)) { | |
| readRDS(hw_rds) |> readr::write_csv(hw_csv) | |
| } | |
| gtfs_stop_headways <- tryCatch({ | |
| headways_path <- hf_or_local("gtfs_stop_headways.csv") | |
| if (file.exists(headways_path)) { | |
| readr::read_csv(headways_path, show_col_types = FALSE) |> | |
| mutate(stop_id = as.character(stop_id)) | |
| } else { | |
| gt <- tidytransit::read_gtfs(gtfs_zip_path) | |
| hw <- tidytransit::get_stop_frequency(gt, start_time = 7 * 3600, end_time = 9 * 3600) |> | |
| group_by(stop_id) |> | |
| summarise( | |
| mean_headway_min = mean(mean_headway, na.rm = TRUE) / 60, | |
| n_departures_peak = sum(n_departures, na.rm = TRUE), | |
| .groups = "drop" | |
| ) |> | |
| mutate(stop_id = as.character(stop_id)) | |
| readr::write_csv(hw, hw_csv) | |
| hw | |
| } | |
| }, error = function(e) { warning("tidytransit headway computation failed: ", e$message); NULL }) | |
| if (!is.null(gtfs_stop_headways) && !is.null(gtfs_stops_sf)) { | |
| gtfs_stops_sf <- gtfs_stops_sf |> | |
| mutate(stop_id = as.character(stop_id)) |> | |
| left_join(gtfs_stop_headways, by = "stop_id") | |
| } | |
| message("[setup_unified] data load complete.") | |