avephill
deploy
591af31
# ============================================================================
# Setup: Libraries
# ============================================================================
require(shinyjs)
library(shiny)
library(shinydashboard)
library(leaflet)
library(mapboxapi)
library(tidyverse)
library(tidycensus)
library(sf)
library(DT)
library(RColorBrewer)
library(terra)
library(data.table)
library(mapview)
library(sjPlot)
library(sjlabelled)
library(bslib)
library(shinycssloaders)
library(glue)
# ============================================================================
# Setup: HuggingFace base URL and cache directory
# ============================================================================
HF_BASE <- "https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main"
# Use data/cached/ when running locally (writable), otherwise fall back to
# /tmp/sf_biodiv_cache/ for read-only environments like HuggingFace Spaces.
cache_dir <- if (file.access(".", mode = 2) == 0) "data/cached" else "/tmp/sf_biodiv_cache"
dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)
# Helper: if the file already exists in data/cached/, return that path.
# Otherwise attempt to download from HuggingFace into data/cached/.
# Returns the destination path regardless — caller must check file.exists() if
# the download may fail (e.g. file not yet uploaded to HF).
hf_or_local <- function(filename) {
dest <- file.path(cache_dir, filename)
if (!file.exists(dest)) {
tryCatch(
download.file(glue::glue("{HF_BASE}/{filename}"), dest, mode = "wb", quiet = TRUE),
error = function(e) warning(glue::glue("HuggingFace download failed for {filename}: {e$message}")),
warning = function(w) warning(glue::glue("HuggingFace download warning for {filename}: {w$message}"))
)
}
dest
}
message("[setup_unified] loading greenspace polygons (cache / HuggingFace)…")
# ============================================================================
# Load Data: Greenspace (OSM polygons)
# ============================================================================
# Shapefile bundle on HuggingFace — download sidecars into cache if needed.
greenspace_shp <- file.path(cache_dir, "greenspaces_osm_nad83.shp")
if (!file.exists(greenspace_shp)) {
for (ext in c("shp", "dbf", "prj", "shx")) {
hf_or_local(glue::glue("greenspaces_osm_nad83.{ext}"))
}
}
osm_greenspace <- st_read(greenspace_shp, quiet = TRUE) |> st_transform(4326)
if (!"name" %in% names(osm_greenspace)) osm_greenspace$name <- "Unnamed Greenspace"
message("[setup_unified] loading greenspace distance rasters + NDVI…")
# ============================================================================
# Load Data: Greenspace distance rasters
# ============================================================================
greenspace_dist_raster <- terra::rast(hf_or_local("nearest_greenspace_dist.tif"))
greenspace_osmid_raster <- terra::rast(hf_or_local("nearest_greenspace_osmid.tif"))
rsfprogram_dist_raster <- terra::rast(hf_or_local("nearest_rsfprogram_dist.tif"))
rsfprogram_id_raster <- terra::rast(hf_or_local("nearest_rsfprogram_id.tif"))
# ============================================================================
# Load Data: NDVI raster
# ============================================================================
ndvi <- terra::rast(hf_or_local("SF_EastBay_NDVI_Sentinel_10.tif"))
message("[setup_unified] loading GBIF parquet + CBG polygons…")
# ============================================================================
# Load Data: GBIF observations (parquet, queried via DuckDB in server)
# ============================================================================
gbif_parquet <- hf_or_local("gbif_census_ndvi_anno.parquet")
# ============================================================================
# Load Data: Census block groups (CBG)
# ============================================================================
load(hf_or_local("cbg_vect_sf.Rdata"))
if (!"unique_species" %in% names(cbg_vect_sf)) cbg_vect_sf$unique_species <- cbg_vect_sf$n_species
if (!"n_observations" %in% names(cbg_vect_sf)) cbg_vect_sf$n_observations <- cbg_vect_sf$n
if (!"median_inc" %in% names(cbg_vect_sf)) cbg_vect_sf$median_inc <- cbg_vect_sf$medincE
if (!"ndvi_mean" %in% names(cbg_vect_sf)) cbg_vect_sf$ndvi_mean <- cbg_vect_sf$ndvi_sentinel
message("[setup_unified] computing CBG × greenspace overlap (vector intersect)…")
# ============================================================================
# Per-CBG greenspace overlap (computed here; no separate CSV on HuggingFace)
# ============================================================================
cbg_proj <- st_transform(cbg_vect_sf[, "GEOID"], 3857) |>
mutate(cbg_area_m2 = as.numeric(st_area(geometry)))
gs_proj <- st_transform(osm_greenspace, 3857) |> st_make_valid()
gs_union <- st_union(gs_proj)
cbg_gs_inter <- st_intersection(cbg_proj, gs_union)
cbg_greenspace_coverage <- cbg_gs_inter |>
mutate(greenspace_m2 = as.numeric(st_area(geometry))) |>
st_drop_geometry() |>
group_by(GEOID) |>
summarise(greenspace_m2 = sum(greenspace_m2), .groups = "drop") |>
right_join(cbg_proj |> st_drop_geometry() |> dplyr::select(GEOID, cbg_area_m2), by = "GEOID") |>
mutate(
greenspace_m2 = tidyr::replace_na(greenspace_m2, 0),
GEOID = as.character(GEOID)
)
message("[setup_unified] loading biodiversity hotspots / coldspots…")
# ============================================================================
# Load Data: Biodiversity hotspots / coldspots
# ============================================================================
hotspots_shp <- file.path(cache_dir, "hotspots.shp")
if (!file.exists(hotspots_shp)) {
for (ext in c("shp", "dbf", "prj", "shx")) hf_or_local(glue::glue("hotspots.{ext}"))
}
biodiv_hotspots <- st_read(hotspots_shp, quiet = TRUE) |> st_transform(4326)
coldspots_shp <- file.path(cache_dir, "coldspots.shp")
if (!file.exists(coldspots_shp)) {
for (ext in c("shp", "dbf", "prj", "shx")) hf_or_local(glue::glue("coldspots.{ext}"))
}
biodiv_coldspots <- st_read(coldspots_shp, quiet = TRUE) |> st_transform(4326)
message("[setup_unified] loading RSF, CalEnviroScreen, SF EJ layers…")
# ============================================================================
# Load Data: RSF Program Projects
# ============================================================================
rsf_projects <- st_read(hf_or_local("RSF_Program_Projects_polygons.gpkg"), quiet = TRUE) |>
st_transform(4326)
# ============================================================================
# Load Data: CalEnviroScreen 4.0 (pre-filtered to SF)
# ============================================================================
cenv_sf <- tryCatch({
sf::st_read(hf_or_local("calenviro_sf.gpkg"), quiet = TRUE)
}, error = function(e) {
warning("CalEnviroScreen failed to load: ", e$message); NULL
})
# ============================================================================
# Load Data: SF Environmental Justice Communities
# ============================================================================
sf_ej_sf <- tryCatch({
sf::st_read(hf_or_local("sf_ej_communities_map.gpkg"), quiet = TRUE) |>
dplyr::mutate(
symbol_hex = stringr::str_split(symbol_rgb, ",\\s*") |>
lapply(function(x) sprintf("#%02X%02X%02X",
as.integer(x[1]), as.integer(x[2]), as.integer(x[3]))) |>
unlist(),
ej_label = dplyr::case_when(
is.na(score) ~ "Not EJ",
score >= 21 ~ "High EJ burden (21-30)",
score >= 11 ~ "Moderate EJ burden (11-20)",
score >= 1 ~ "Low EJ burden (1-10)",
score == 0 ~ "Score 0",
TRUE ~ "Unknown"
)
)
}, error = function(e) {
warning("SF EJ layer failed to load: ", e$message); NULL
})
message("[setup_unified] loading GTFS (zip, stops, shapes, timetable, headways)…")
# ============================================================================
# Load Data: GTFS (SF Muni)
# ============================================================================
gtfs_zip_path <- hf_or_local("sf_muni_gtfs.zip")
# Unzip for read.csv(stops.txt, …); tidytransit/gtfsrouter read the .zip (gtfsio needs a zip path)
gtfs_unzip_dir <- file.path(cache_dir, "muni_gtfs")
dir.create(gtfs_unzip_dir, recursive = TRUE, showWarnings = FALSE)
if (!dir.exists(gtfs_unzip_dir) || length(list.files(gtfs_unzip_dir, pattern = "\\.txt$")) == 0L) {
unzip(gtfs_zip_path, exdir = gtfs_unzip_dir, overwrite = TRUE)
}
gtfs_path <- gtfs_unzip_dir
# --- Transit stops -----------------------------------------------------------
gtfs_stops_sf <- tryCatch({
read.csv(file.path(gtfs_path, "stops.txt")) |>
st_as_sf(coords = c("stop_lon", "stop_lat"), crs = 4326)
}, error = function(e) { warning("GTFS stops failed to load: ", e$message); NULL })
# --- Route shapes ------------------------------------------------------------
gtfs_routes_sf <- tryCatch({
gtfs_shapes_raw <- read.csv(file.path(gtfs_path, "shapes.txt"))
gtfs_trips_raw <- read.csv(file.path(gtfs_path, "trips.txt"))
gtfs_routes_raw <- read.csv(file.path(gtfs_path, "routes.txt"))
shape_route_map <- gtfs_trips_raw |> distinct(shape_id, route_id)
route_meta <- gtfs_routes_raw |>
select(route_id, route_short_name, route_long_name, route_color) |>
mutate(route_color_hex = paste0("#", trimws(route_color)))
shapes_split <- gtfs_shapes_raw |>
arrange(shape_id, shape_pt_sequence) |>
group_by(shape_id) |>
group_split()
shape_geoms <- lapply(shapes_split, function(s) {
st_linestring(cbind(s$shape_pt_lon, s$shape_pt_lat))
})
st_sf(
shape_id = sapply(shapes_split, function(s) s$shape_id[1]),
geometry = st_sfc(shape_geoms, crs = 4326)
) |>
left_join(shape_route_map, by = "shape_id") |>
left_join(route_meta, by = "route_id")
}, error = function(e) { warning("GTFS route shapes failed to load: ", e$message); NULL })
# --- gtfsrouter timetable ----------------------------------------------------
gtfs_router <- tryCatch({
timetable_path <- hf_or_local("gtfs_timetable_monday.rds")
if (file.exists(timetable_path)) {
readRDS(timetable_path)
} else {
gr <- gtfsrouter::extract_gtfs(gtfs_zip_path)
result <- gtfsrouter::gtfs_timetable(gr, day = "Monday")
saveRDS(result, file.path(cache_dir, "gtfs_timetable_monday.rds"))
result
}
}, error = function(e) { warning("gtfsrouter failed to initialise: ", e$message); NULL })
# --- Pre-computed transit isochrone cache ------------------------------------
transit_iso_cache <- tryCatch({
p <- file.path(cache_dir, "transit_iso_cache.rds")
if (file.exists(p)) readRDS(p) else NULL
}, error = function(e) { NULL })
# --- Stop headways (AM peak 7-9am): cached as CSV (readable / diffable) -------
# gtfsrouter timetable stays .rds (opaque R object); this table is just columns.
hw_csv <- file.path(cache_dir, "gtfs_stop_headways.csv")
hw_rds <- file.path(cache_dir, "gtfs_stop_headways.rds")
if (!file.exists(hw_csv) && file.exists(hw_rds)) {
readRDS(hw_rds) |> readr::write_csv(hw_csv)
}
gtfs_stop_headways <- tryCatch({
headways_path <- hf_or_local("gtfs_stop_headways.csv")
if (file.exists(headways_path)) {
readr::read_csv(headways_path, show_col_types = FALSE) |>
mutate(stop_id = as.character(stop_id))
} else {
gt <- tidytransit::read_gtfs(gtfs_zip_path)
hw <- tidytransit::get_stop_frequency(gt, start_time = 7 * 3600, end_time = 9 * 3600) |>
group_by(stop_id) |>
summarise(
mean_headway_min = mean(mean_headway, na.rm = TRUE) / 60,
n_departures_peak = sum(n_departures, na.rm = TRUE),
.groups = "drop"
) |>
mutate(stop_id = as.character(stop_id))
readr::write_csv(hw, hw_csv)
hw
}
}, error = function(e) { warning("tidytransit headway computation failed: ", e$message); NULL })
if (!is.null(gtfs_stop_headways) && !is.null(gtfs_stops_sf)) {
gtfs_stops_sf <- gtfs_stops_sf |>
mutate(stop_id = as.character(stop_id)) |>
left_join(gtfs_stop_headways, by = "stop_id")
}
message("[setup_unified] data load complete.")