Reimagining_SF_Shiny_App

Sleeping

Reimagining_SF_Shiny_App / Rscripts /setup_unified.R

avephill

deploy

591af31 about 1 month ago

12.2 kB

	# ============================================================================
	# Setup: Libraries
	# ============================================================================
	require(shinyjs)
	library(shiny)
	library(shinydashboard)
	library(leaflet)
	library(mapboxapi)
	library(tidyverse)
	library(tidycensus)
	library(sf)
	library(DT)
	library(RColorBrewer)
	library(terra)
	library(data.table)
	library(mapview)
	library(sjPlot)
	library(sjlabelled)
	library(bslib)
	library(shinycssloaders)
	library(glue)

	# ============================================================================
	# Setup: HuggingFace base URL and cache directory
	# ============================================================================
	HF_BASE <- "https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main"

	# Use data/cached/ when running locally (writable), otherwise fall back to
	# /tmp/sf_biodiv_cache/ for read-only environments like HuggingFace Spaces.
	cache_dir <- if (file.access(".", mode = 2) == 0) "data/cached" else "/tmp/sf_biodiv_cache"
	dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)

	# Helper: if the file already exists in data/cached/, return that path.
	# Otherwise attempt to download from HuggingFace into data/cached/.
	# Returns the destination path regardless — caller must check file.exists() if
	# the download may fail (e.g. file not yet uploaded to HF).
	hf_or_local <- function(filename) {
	dest <- file.path(cache_dir, filename)
	if (!file.exists(dest)) {
	tryCatch(
	download.file(glue::glue("{HF_BASE}/{filename}"), dest, mode = "wb", quiet = TRUE),
	error = function(e) warning(glue::glue("HuggingFace download failed for {filename}: {e$message}")),
	warning = function(w) warning(glue::glue("HuggingFace download warning for {filename}: {w$message}"))
	)
	}
	dest
	}

	message("[setup_unified] loading greenspace polygons (cache / HuggingFace)…")

	# ============================================================================
	# Load Data: Greenspace (OSM polygons)
	# ============================================================================
	# Shapefile bundle on HuggingFace — download sidecars into cache if needed.
	greenspace_shp <- file.path(cache_dir, "greenspaces_osm_nad83.shp")
	if (!file.exists(greenspace_shp)) {
	for (ext in c("shp", "dbf", "prj", "shx")) {
	hf_or_local(glue::glue("greenspaces_osm_nad83.{ext}"))
	}
	}
	osm_greenspace <- st_read(greenspace_shp, quiet = TRUE) \|> st_transform(4326)
	if (!"name" %in% names(osm_greenspace)) osm_greenspace$name <- "Unnamed Greenspace"

	message("[setup_unified] loading greenspace distance rasters + NDVI…")

	# ============================================================================
	# Load Data: Greenspace distance rasters
	# ============================================================================
	greenspace_dist_raster <- terra::rast(hf_or_local("nearest_greenspace_dist.tif"))
	greenspace_osmid_raster <- terra::rast(hf_or_local("nearest_greenspace_osmid.tif"))

	rsfprogram_dist_raster <- terra::rast(hf_or_local("nearest_rsfprogram_dist.tif"))
	rsfprogram_id_raster <- terra::rast(hf_or_local("nearest_rsfprogram_id.tif"))

	# ============================================================================
	# Load Data: NDVI raster
	# ============================================================================
	ndvi <- terra::rast(hf_or_local("SF_EastBay_NDVI_Sentinel_10.tif"))

	message("[setup_unified] loading GBIF parquet + CBG polygons…")

	# ============================================================================
	# Load Data: GBIF observations (parquet, queried via DuckDB in server)
	# ============================================================================
	gbif_parquet <- hf_or_local("gbif_census_ndvi_anno.parquet")

	# ============================================================================
	# Load Data: Census block groups (CBG)
	# ============================================================================
	load(hf_or_local("cbg_vect_sf.Rdata"))

	if (!"unique_species" %in% names(cbg_vect_sf)) cbg_vect_sf$unique_species <- cbg_vect_sf$n_species
	if (!"n_observations" %in% names(cbg_vect_sf)) cbg_vect_sf$n_observations <- cbg_vect_sf$n
	if (!"median_inc" %in% names(cbg_vect_sf)) cbg_vect_sf$median_inc <- cbg_vect_sf$medincE
	if (!"ndvi_mean" %in% names(cbg_vect_sf)) cbg_vect_sf$ndvi_mean <- cbg_vect_sf$ndvi_sentinel

	message("[setup_unified] computing CBG × greenspace overlap (vector intersect)…")

	# ============================================================================
	# Per-CBG greenspace overlap (computed here; no separate CSV on HuggingFace)
	# ============================================================================
	cbg_proj <- st_transform(cbg_vect_sf[, "GEOID"], 3857) \|>
	mutate(cbg_area_m2 = as.numeric(st_area(geometry)))
	gs_proj <- st_transform(osm_greenspace, 3857) \|> st_make_valid()
	gs_union <- st_union(gs_proj)
	cbg_gs_inter <- st_intersection(cbg_proj, gs_union)
	cbg_greenspace_coverage <- cbg_gs_inter \|>
	mutate(greenspace_m2 = as.numeric(st_area(geometry))) \|>
	st_drop_geometry() \|>
	group_by(GEOID) \|>
	summarise(greenspace_m2 = sum(greenspace_m2), .groups = "drop") \|>
	right_join(cbg_proj \|> st_drop_geometry() \|> dplyr::select(GEOID, cbg_area_m2), by = "GEOID") \|>
	mutate(
	greenspace_m2 = tidyr::replace_na(greenspace_m2, 0),
	GEOID = as.character(GEOID)
	)

	message("[setup_unified] loading biodiversity hotspots / coldspots…")

	# ============================================================================
	# Load Data: Biodiversity hotspots / coldspots
	# ============================================================================
	hotspots_shp <- file.path(cache_dir, "hotspots.shp")
	if (!file.exists(hotspots_shp)) {
	for (ext in c("shp", "dbf", "prj", "shx")) hf_or_local(glue::glue("hotspots.{ext}"))
	}
	biodiv_hotspots <- st_read(hotspots_shp, quiet = TRUE) \|> st_transform(4326)

	coldspots_shp <- file.path(cache_dir, "coldspots.shp")
	if (!file.exists(coldspots_shp)) {
	for (ext in c("shp", "dbf", "prj", "shx")) hf_or_local(glue::glue("coldspots.{ext}"))
	}
	biodiv_coldspots <- st_read(coldspots_shp, quiet = TRUE) \|> st_transform(4326)

	message("[setup_unified] loading RSF, CalEnviroScreen, SF EJ layers…")

	# ============================================================================
	# Load Data: RSF Program Projects
	# ============================================================================
	rsf_projects <- st_read(hf_or_local("RSF_Program_Projects_polygons.gpkg"), quiet = TRUE) \|>
	st_transform(4326)

	# ============================================================================
	# Load Data: CalEnviroScreen 4.0 (pre-filtered to SF)
	# ============================================================================
	cenv_sf <- tryCatch({
	sf::st_read(hf_or_local("calenviro_sf.gpkg"), quiet = TRUE)
	}, error = function(e) {
	warning("CalEnviroScreen failed to load: ", e$message); NULL
	})

	# ============================================================================
	# Load Data: SF Environmental Justice Communities
	# ============================================================================
	sf_ej_sf <- tryCatch({
	sf::st_read(hf_or_local("sf_ej_communities_map.gpkg"), quiet = TRUE) \|>
	dplyr::mutate(
	symbol_hex = stringr::str_split(symbol_rgb, ",\\s*") \|>
	lapply(function(x) sprintf("#%02X%02X%02X",
	as.integer(x[1]), as.integer(x[2]), as.integer(x[3]))) \|>
	unlist(),
	ej_label = dplyr::case_when(
	is.na(score) ~ "Not EJ",
	score >= 21 ~ "High EJ burden (21-30)",
	score >= 11 ~ "Moderate EJ burden (11-20)",
	score >= 1 ~ "Low EJ burden (1-10)",
	score == 0 ~ "Score 0",
	TRUE ~ "Unknown"
	)
	)
	}, error = function(e) {
	warning("SF EJ layer failed to load: ", e$message); NULL
	})

	message("[setup_unified] loading GTFS (zip, stops, shapes, timetable, headways)…")

	# ============================================================================
	# Load Data: GTFS (SF Muni)
	# ============================================================================

	gtfs_zip_path <- hf_or_local("sf_muni_gtfs.zip")

	# Unzip for read.csv(stops.txt, …); tidytransit/gtfsrouter read the .zip (gtfsio needs a zip path)
	gtfs_unzip_dir <- file.path(cache_dir, "muni_gtfs")
	dir.create(gtfs_unzip_dir, recursive = TRUE, showWarnings = FALSE)
	if (!dir.exists(gtfs_unzip_dir) \|\| length(list.files(gtfs_unzip_dir, pattern = "\\.txt$")) == 0L) {
	unzip(gtfs_zip_path, exdir = gtfs_unzip_dir, overwrite = TRUE)
	}
	gtfs_path <- gtfs_unzip_dir

	# --- Transit stops -----------------------------------------------------------
	gtfs_stops_sf <- tryCatch({
	read.csv(file.path(gtfs_path, "stops.txt")) \|>
	st_as_sf(coords = c("stop_lon", "stop_lat"), crs = 4326)
	}, error = function(e) { warning("GTFS stops failed to load: ", e$message); NULL })

	# --- Route shapes ------------------------------------------------------------
	gtfs_routes_sf <- tryCatch({
	gtfs_shapes_raw <- read.csv(file.path(gtfs_path, "shapes.txt"))
	gtfs_trips_raw <- read.csv(file.path(gtfs_path, "trips.txt"))
	gtfs_routes_raw <- read.csv(file.path(gtfs_path, "routes.txt"))

	shape_route_map <- gtfs_trips_raw \|> distinct(shape_id, route_id)
	route_meta <- gtfs_routes_raw \|>
	select(route_id, route_short_name, route_long_name, route_color) \|>
	mutate(route_color_hex = paste0("#", trimws(route_color)))

	shapes_split <- gtfs_shapes_raw \|>
	arrange(shape_id, shape_pt_sequence) \|>
	group_by(shape_id) \|>
	group_split()

	shape_geoms <- lapply(shapes_split, function(s) {
	st_linestring(cbind(s$shape_pt_lon, s$shape_pt_lat))
	})

	st_sf(
	shape_id = sapply(shapes_split, function(s) s$shape_id[1]),
	geometry = st_sfc(shape_geoms, crs = 4326)
	) \|>
	left_join(shape_route_map, by = "shape_id") \|>
	left_join(route_meta, by = "route_id")
	}, error = function(e) { warning("GTFS route shapes failed to load: ", e$message); NULL })

	# --- gtfsrouter timetable ----------------------------------------------------
	gtfs_router <- tryCatch({
	timetable_path <- hf_or_local("gtfs_timetable_monday.rds")
	if (file.exists(timetable_path)) {
	readRDS(timetable_path)
	} else {
	gr <- gtfsrouter::extract_gtfs(gtfs_zip_path)
	result <- gtfsrouter::gtfs_timetable(gr, day = "Monday")
	saveRDS(result, file.path(cache_dir, "gtfs_timetable_monday.rds"))
	result
	}
	}, error = function(e) { warning("gtfsrouter failed to initialise: ", e$message); NULL })

	# --- Pre-computed transit isochrone cache ------------------------------------
	transit_iso_cache <- tryCatch({
	p <- file.path(cache_dir, "transit_iso_cache.rds")
	if (file.exists(p)) readRDS(p) else NULL
	}, error = function(e) { NULL })

	# --- Stop headways (AM peak 7-9am): cached as CSV (readable / diffable) -------
	# gtfsrouter timetable stays .rds (opaque R object); this table is just columns.
	hw_csv <- file.path(cache_dir, "gtfs_stop_headways.csv")
	hw_rds <- file.path(cache_dir, "gtfs_stop_headways.rds")
	if (!file.exists(hw_csv) && file.exists(hw_rds)) {
	readRDS(hw_rds) \|> readr::write_csv(hw_csv)
	}

	gtfs_stop_headways <- tryCatch({
	headways_path <- hf_or_local("gtfs_stop_headways.csv")
	if (file.exists(headways_path)) {
	readr::read_csv(headways_path, show_col_types = FALSE) \|>
	mutate(stop_id = as.character(stop_id))
	} else {
	gt <- tidytransit::read_gtfs(gtfs_zip_path)
	hw <- tidytransit::get_stop_frequency(gt, start_time = 7 * 3600, end_time = 9 * 3600) \|>
	group_by(stop_id) \|>
	summarise(
	mean_headway_min = mean(mean_headway, na.rm = TRUE) / 60,
	n_departures_peak = sum(n_departures, na.rm = TRUE),
	.groups = "drop"
	) \|>
	mutate(stop_id = as.character(stop_id))
	readr::write_csv(hw, hw_csv)
	hw
	}
	}, error = function(e) { warning("tidytransit headway computation failed: ", e$message); NULL })

	if (!is.null(gtfs_stop_headways) && !is.null(gtfs_stops_sf)) {
	gtfs_stops_sf <- gtfs_stops_sf \|>
	mutate(stop_id = as.character(stop_id)) \|>
	left_join(gtfs_stop_headways, by = "stop_id")
	}

	message("[setup_unified] data load complete.")