# ============================================================================
# Setup: Libraries
# ============================================================================
require(shinyjs)
library(shiny)
library(shinydashboard)
library(leaflet)
library(mapboxapi)
library(tidyverse)
library(tidycensus)
library(sf)
library(DT)
library(RColorBrewer)
library(terra)
library(data.table)
library(mapview)
library(sjPlot)
library(sjlabelled)
library(bslib)
library(shinycssloaders)
library(glue)

# ============================================================================
# Setup: HuggingFace base URL and cache directory
# ============================================================================
HF_BASE <- "https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main"

# Use data/cached/ when running locally (writable), otherwise fall back to
# /tmp/sf_biodiv_cache/ for read-only environments like HuggingFace Spaces.
cache_dir <- if (file.access(".", mode = 2) == 0) "data/cached" else "/tmp/sf_biodiv_cache"
dir.create(cache_dir, recursive = TRUE, showWarnings = FALSE)

# Helper: if the file already exists in data/cached/, return that path.
# Otherwise attempt to download from HuggingFace into data/cached/.
# Returns the destination path regardless — caller must check file.exists() if
# the download may fail (e.g. file not yet uploaded to HF).
hf_or_local <- function(filename) {
  dest <- file.path(cache_dir, filename)
  if (!file.exists(dest)) {
    tryCatch(
      download.file(glue::glue("{HF_BASE}/{filename}"), dest, mode = "wb", quiet = TRUE),
      error   = function(e) warning(glue::glue("HuggingFace download failed for {filename}: {e$message}")),
      warning = function(w) warning(glue::glue("HuggingFace download warning for {filename}: {w$message}"))
    )
  }
  dest
}

message("[setup_unified] loading greenspace polygons (cache / HuggingFace)…")

# ============================================================================
# Load Data: Greenspace (OSM polygons)
# ============================================================================
# Shapefile bundle on HuggingFace — download sidecars into cache if needed.
greenspace_shp <- file.path(cache_dir, "greenspaces_osm_nad83.shp")
if (!file.exists(greenspace_shp)) {
  for (ext in c("shp", "dbf", "prj", "shx")) {
    hf_or_local(glue::glue("greenspaces_osm_nad83.{ext}"))
  }
}
osm_greenspace <- st_read(greenspace_shp, quiet = TRUE) |> st_transform(4326)
if (!"name" %in% names(osm_greenspace)) osm_greenspace$name <- "Unnamed Greenspace"

message("[setup_unified] loading greenspace distance rasters + NDVI…")

# ============================================================================
# Load Data: Greenspace distance rasters
# ============================================================================
greenspace_dist_raster <- terra::rast(hf_or_local("nearest_greenspace_dist.tif"))
greenspace_osmid_raster <- terra::rast(hf_or_local("nearest_greenspace_osmid.tif"))

rsfprogram_dist_raster <- terra::rast(hf_or_local("nearest_rsfprogram_dist.tif"))
rsfprogram_id_raster <- terra::rast(hf_or_local("nearest_rsfprogram_id.tif"))

# ============================================================================
# Load Data: NDVI raster
# ============================================================================
ndvi <- terra::rast(hf_or_local("SF_EastBay_NDVI_Sentinel_10.tif"))

message("[setup_unified] loading GBIF parquet + CBG polygons…")

# ============================================================================
# Load Data: GBIF observations (parquet, queried via DuckDB in server)
# ============================================================================
gbif_parquet <- hf_or_local("gbif_census_ndvi_anno.parquet")

# ============================================================================
# Load Data: Census block groups (CBG)
# ============================================================================
load(hf_or_local("cbg_vect_sf.Rdata"))

if (!"unique_species" %in% names(cbg_vect_sf))  cbg_vect_sf$unique_species  <- cbg_vect_sf$n_species
if (!"n_observations" %in% names(cbg_vect_sf)) cbg_vect_sf$n_observations <- cbg_vect_sf$n
if (!"median_inc"     %in% names(cbg_vect_sf)) cbg_vect_sf$median_inc     <- cbg_vect_sf$medincE
if (!"ndvi_mean"      %in% names(cbg_vect_sf)) cbg_vect_sf$ndvi_mean      <- cbg_vect_sf$ndvi_sentinel

message("[setup_unified] computing CBG × greenspace overlap (vector intersect)…")

# ============================================================================
# Per-CBG greenspace overlap (computed here; no separate CSV on HuggingFace)
# ============================================================================
cbg_proj <- st_transform(cbg_vect_sf[, "GEOID"], 3857) |>
  mutate(cbg_area_m2 = as.numeric(st_area(geometry)))
gs_proj <- st_transform(osm_greenspace, 3857) |> st_make_valid()
gs_union <- st_union(gs_proj)
cbg_gs_inter <- st_intersection(cbg_proj, gs_union)
cbg_greenspace_coverage <- cbg_gs_inter |>
  mutate(greenspace_m2 = as.numeric(st_area(geometry))) |>
  st_drop_geometry() |>
  group_by(GEOID) |>
  summarise(greenspace_m2 = sum(greenspace_m2), .groups = "drop") |>
  right_join(cbg_proj |> st_drop_geometry() |> dplyr::select(GEOID, cbg_area_m2), by = "GEOID") |>
  mutate(
    greenspace_m2 = tidyr::replace_na(greenspace_m2, 0),
    GEOID = as.character(GEOID)
  )

message("[setup_unified] loading biodiversity hotspots / coldspots…")

# ============================================================================
# Load Data: Biodiversity hotspots / coldspots
# ============================================================================
hotspots_shp <- file.path(cache_dir, "hotspots.shp")
if (!file.exists(hotspots_shp)) {
  for (ext in c("shp", "dbf", "prj", "shx")) hf_or_local(glue::glue("hotspots.{ext}"))
}
biodiv_hotspots <- st_read(hotspots_shp, quiet = TRUE) |> st_transform(4326)

coldspots_shp <- file.path(cache_dir, "coldspots.shp")
if (!file.exists(coldspots_shp)) {
  for (ext in c("shp", "dbf", "prj", "shx")) hf_or_local(glue::glue("coldspots.{ext}"))
}
biodiv_coldspots <- st_read(coldspots_shp, quiet = TRUE) |> st_transform(4326)

message("[setup_unified] loading RSF, CalEnviroScreen, SF EJ layers…")

# ============================================================================
# Load Data: RSF Program Projects
# ============================================================================
rsf_projects <- st_read(hf_or_local("RSF_Program_Projects_polygons.gpkg"), quiet = TRUE) |>
  st_transform(4326)

# ============================================================================
# Load Data: CalEnviroScreen 4.0 (pre-filtered to SF)
# ============================================================================
cenv_sf <- tryCatch({
  sf::st_read(hf_or_local("calenviro_sf.gpkg"), quiet = TRUE)
}, error = function(e) {
  warning("CalEnviroScreen failed to load: ", e$message); NULL
})

# ============================================================================
# Load Data: SF Environmental Justice Communities
# ============================================================================
sf_ej_sf <- tryCatch({
  sf::st_read(hf_or_local("sf_ej_communities_map.gpkg"), quiet = TRUE) |>
    dplyr::mutate(
      symbol_hex = stringr::str_split(symbol_rgb, ",\\s*") |>
        lapply(function(x) sprintf("#%02X%02X%02X",
                                   as.integer(x[1]), as.integer(x[2]), as.integer(x[3]))) |>
        unlist(),
      ej_label = dplyr::case_when(
        is.na(score) ~ "Not EJ",
        score >= 21  ~ "High EJ burden (21-30)",
        score >= 11  ~ "Moderate EJ burden (11-20)",
        score >= 1   ~ "Low EJ burden (1-10)",
        score == 0   ~ "Score 0",
        TRUE         ~ "Unknown"
      )
    )
}, error = function(e) {
  warning("SF EJ layer failed to load: ", e$message); NULL
})

message("[setup_unified] loading GTFS (zip, stops, shapes, timetable, headways)…")

# ============================================================================
# Load Data: GTFS (SF Muni)
# ============================================================================

gtfs_zip_path <- hf_or_local("sf_muni_gtfs.zip")

# Unzip for read.csv(stops.txt, …); tidytransit/gtfsrouter read the .zip (gtfsio needs a zip path)
gtfs_unzip_dir <- file.path(cache_dir, "muni_gtfs")
dir.create(gtfs_unzip_dir, recursive = TRUE, showWarnings = FALSE)
if (!dir.exists(gtfs_unzip_dir) || length(list.files(gtfs_unzip_dir, pattern = "\\.txt$")) == 0L) {
  unzip(gtfs_zip_path, exdir = gtfs_unzip_dir, overwrite = TRUE)
}
gtfs_path <- gtfs_unzip_dir

# --- Transit stops -----------------------------------------------------------
gtfs_stops_sf <- tryCatch({
  read.csv(file.path(gtfs_path, "stops.txt")) |>
    st_as_sf(coords = c("stop_lon", "stop_lat"), crs = 4326)
}, error = function(e) { warning("GTFS stops failed to load: ", e$message); NULL })

# --- Route shapes ------------------------------------------------------------
gtfs_routes_sf <- tryCatch({
  gtfs_shapes_raw <- read.csv(file.path(gtfs_path, "shapes.txt"))
  gtfs_trips_raw  <- read.csv(file.path(gtfs_path, "trips.txt"))
  gtfs_routes_raw <- read.csv(file.path(gtfs_path, "routes.txt"))

  shape_route_map <- gtfs_trips_raw |> distinct(shape_id, route_id)
  route_meta <- gtfs_routes_raw |>
    select(route_id, route_short_name, route_long_name, route_color) |>
    mutate(route_color_hex = paste0("#", trimws(route_color)))

  shapes_split <- gtfs_shapes_raw |>
    arrange(shape_id, shape_pt_sequence) |>
    group_by(shape_id) |>
    group_split()

  shape_geoms <- lapply(shapes_split, function(s) {
    st_linestring(cbind(s$shape_pt_lon, s$shape_pt_lat))
  })

  st_sf(
    shape_id = sapply(shapes_split, function(s) s$shape_id[1]),
    geometry = st_sfc(shape_geoms, crs = 4326)
  ) |>
    left_join(shape_route_map, by = "shape_id") |>
    left_join(route_meta, by = "route_id")
}, error = function(e) { warning("GTFS route shapes failed to load: ", e$message); NULL })

# --- gtfsrouter timetable ----------------------------------------------------
gtfs_router <- tryCatch({
  timetable_path <- hf_or_local("gtfs_timetable_monday.rds")
  if (file.exists(timetable_path)) {
    readRDS(timetable_path)
  } else {
    gr <- gtfsrouter::extract_gtfs(gtfs_zip_path)
    result <- gtfsrouter::gtfs_timetable(gr, day = "Monday")
    saveRDS(result, file.path(cache_dir, "gtfs_timetable_monday.rds"))
    result
  }
}, error = function(e) { warning("gtfsrouter failed to initialise: ", e$message); NULL })

# --- Pre-computed transit isochrone cache ------------------------------------
transit_iso_cache <- tryCatch({
  p <- file.path(cache_dir, "transit_iso_cache.rds")
  if (file.exists(p)) readRDS(p) else NULL
}, error = function(e) { NULL })

# --- Stop headways (AM peak 7-9am): cached as CSV (readable / diffable) -------
# gtfsrouter timetable stays .rds (opaque R object); this table is just columns.
hw_csv <- file.path(cache_dir, "gtfs_stop_headways.csv")
hw_rds <- file.path(cache_dir, "gtfs_stop_headways.rds")
if (!file.exists(hw_csv) && file.exists(hw_rds)) {
  readRDS(hw_rds) |> readr::write_csv(hw_csv)
}

gtfs_stop_headways <- tryCatch({
  headways_path <- hf_or_local("gtfs_stop_headways.csv")
  if (file.exists(headways_path)) {
    readr::read_csv(headways_path, show_col_types = FALSE) |>
      mutate(stop_id = as.character(stop_id))
  } else {
    gt <- tidytransit::read_gtfs(gtfs_zip_path)
    hw <- tidytransit::get_stop_frequency(gt, start_time = 7 * 3600, end_time = 9 * 3600) |>
      group_by(stop_id) |>
      summarise(
        mean_headway_min  = mean(mean_headway, na.rm = TRUE) / 60,
        n_departures_peak = sum(n_departures, na.rm = TRUE),
        .groups = "drop"
      ) |>
      mutate(stop_id = as.character(stop_id))
    readr::write_csv(hw, hw_csv)
    hw
  }
}, error = function(e) { warning("tidytransit headway computation failed: ", e$message); NULL })

if (!is.null(gtfs_stop_headways) && !is.null(gtfs_stops_sf)) {
  gtfs_stops_sf <- gtfs_stops_sf |>
    mutate(stop_id = as.character(stop_id)) |>
    left_join(gtfs_stop_headways, by = "stop_id")
}

message("[setup_unified] data load complete.")