avephill commited on
Commit
ceb0819
·
verified ·
1 Parent(s): 702467b

Update R/setup.R

Browse files
Files changed (1) hide show
  1. R/setup.R +37 -75
R/setup.R CHANGED
@@ -1,4 +1,10 @@
1
- # setup
 
 
 
 
 
 
2
  require(shinyjs)
3
  library(shiny)
4
  library(shinydashboard)
@@ -17,37 +23,39 @@ library(sjlabelled)
17
  library(bslib)
18
  library(shinycssloaders)
19
 
20
- # ------------------------------------------------
21
- # 1) API Keys
22
- # ------------------------------------------------
23
  mapbox_token <- "pk.eyJ1Ijoia3dhbGtlcnRjdSIsImEiOiJjbHc3NmI0cDMxYzhyMmt0OXBiYnltMjVtIn0.Thtu6WqIhOfin6AykskM2g"
24
- # mb_access_token(mapbox_token, install = FALSE)
25
 
26
- # ------------------------------------------------
27
- # 2) Load Data
28
- # ------------------------------------------------
29
- # -- Greenspace
30
- getwd()
31
- osm_greenspace <- st_read("/vsicurl/https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main/greenspaces_osm_nad83.shp", quiet = TRUE) %>%
32
 
 
 
33
  st_transform(4326)
 
34
  if (!"name" %in% names(osm_greenspace)) {
35
  osm_greenspace$name <- "Unnamed Greenspace"
36
  }
37
 
38
- # -- NDVI Raster
39
- ndvi <- terra::rast("/vsicurl/https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main/SF_EastBay_NDVI_Sentinel_10.tif")
 
40
 
 
 
41
 
42
- # -- GBIF data
43
- # Load what is basically inter_gbif !!!!!
44
- # load("data/sf_gbif.Rdata") # => sf_gbif
45
 
46
- download.file('https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main/gbif_census_ndvi_anno.Rdata', '/tmp/gbif_census_ndvi_anno.Rdata')
47
- load('/tmp/gbif_census_ndvi_anno.Rdata')
48
- vect_gbif <- vect(sf_gbif)
49
- # -- Precomputed CBG data
50
- download.file('https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main/cbg_vect_sf.Rdata', '/tmp/cbg_vect_sf.Rdata')
51
  load('/tmp/cbg_vect_sf.Rdata')
52
 
53
  if (!"unique_species" %in% names(cbg_vect_sf)) {
@@ -63,59 +71,13 @@ if (!"ndvi_mean" %in% names(cbg_vect_sf)) {
63
  cbg_vect_sf$ndvi_mean <- cbg_vect_sf$ndvi_sentinel
64
  }
65
 
66
- # -- Hotspots/Coldspots
67
- biodiv_hotspots <- st_read("/vsicurl/https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main/hotspots.shp", quiet = TRUE) %>% st_transform(4326)
68
- biodiv_coldspots <- st_read("/vsicurl/https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main/coldspots.shp", quiet = TRUE) %>% st_transform(4326)
69
-
70
 
 
 
71
 
72
- #
73
- # # Community Organizations shapefile
74
- # # For now simulate
75
- #
76
- # # Define San Francisco bounding box coordinates
77
- # sf_bbox <- st_bbox(c(
78
- # xmin = -122.5247, # Western longitude
79
- # ymin = 37.7045, # Southern latitude
80
- # xmax = -122.3569, # Eastern longitude
81
- # ymax = 37.8334 # Northern latitude
82
- # ), crs = st_crs(4326)) # WGS84 CRS
83
- #
84
- # # Convert bounding box to polygon
85
- # sf_boundary <- st_as_sfc(sf_bbox) %>% st_make_valid()
86
- #
87
- # # Transform boundary to projected CRS for accurate buffering (EPSG:3310)
88
- # sf_boundary_proj <- st_transform(sf_boundary, 3310)
89
- #
90
- # # Set seed for reproducibility
91
- # set.seed(123)
92
- #
93
- # # Simulate 20 random points within San Francisco boundary
94
- # community_points <- st_sample(sf_boundary_proj, size = 20, type = "random")
95
- #
96
- # # Convert to sf object with POINT geometry and assign unique names
97
- # community_points_sf <- st_sf(
98
- # NAME = paste("Community Org", 1:20),
99
- # geometry = community_points
100
- # )
101
- # # Select first 3 points to buffer
102
- # buffered_points_sf <- community_points_sf[1:3, ] %>%
103
- # st_buffer(dist = 100) # Buffer distance in meters
104
- #
105
- # # Update the NAME column to indicate buffered areas
106
- # buffered_points_sf$NAME <- paste(buffered_points_sf$NAME, "Area")
107
- # community_points_sf <- st_transform(community_points_sf, 4326)
108
- # buffered_points_sf <- st_transform(buffered_points_sf, 4326)
109
- #
110
- # # Combine points and polygons into one sf object
111
- # community_orgs <- bind_rows(
112
- # community_points_sf,
113
- # buffered_points_sf
114
- # )
115
- #
116
- # # View the combined dataset
117
- # print(community_orgs)
118
- #
119
- # community_points_only <- community_orgs %>% filter(st_geometry_type(geometry) == "POINT")
120
- # community_polygons_only <- community_orgs %>% filter(st_geometry_type(geometry) == "POLYGON")
121
- #
 
1
+ # ============================================================================
2
+ # Setup: HuggingFace-optimized data loading
3
+ # ============================================================================
4
+ # This version uses GDAL virtual file system and temporary downloads
5
+ # for efficient loading in cloud/ephemeral environments like HuggingFace Spaces.
6
+ # For local development with persistent caching, use setup_local.R instead.
7
+
8
  require(shinyjs)
9
  library(shiny)
10
  library(shinydashboard)
 
23
  library(bslib)
24
  library(shinycssloaders)
25
 
26
+ # ============================================================================
27
+ # API Keys
28
+ # ============================================================================
29
  mapbox_token <- "pk.eyJ1Ijoia3dhbGtlcnRjdSIsImEiOiJjbHc3NmI0cDMxYzhyMmt0OXBiYnltMjVtIn0.Thtu6WqIhOfin6AykskM2g"
 
30
 
31
+ # ============================================================================
32
+ # Load Data from HuggingFace
33
+ # ============================================================================
 
 
 
34
 
35
+ # -- Greenspace (read directly from URL via GDAL virtual file system)
36
+ osm_greenspace <- st_read("/vsicurl/https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main/greenspaces_osm_nad83.shp", quiet = TRUE) |>
37
  st_transform(4326)
38
+
39
  if (!"name" %in% names(osm_greenspace)) {
40
  osm_greenspace$name <- "Unnamed Greenspace"
41
  }
42
 
43
+ # -- Greenspace Distance Rasters (read directly from URL via GDAL virtual file system)
44
+ greenspace_dist_raster <- terra::rast("/vsicurl/https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main/nearest_greenspace_dist.tif")
45
+ greenspace_osmid_raster <- terra::rast("/vsicurl/https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main/nearest_greenspace_osmid.tif")
46
 
47
+ # -- NDVI Raster (read directly from URL via GDAL virtual file system)
48
+ ndvi <- terra::rast("/vsicurl/https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main/SF_EastBay_NDVI_Sentinel_10.tif")
49
 
50
+ # -- GBIF data (loaded via DuckDB parquet in app.R server function)
51
+ # DuckDB can read parquet files directly from URLs
52
+ gbif_parquet <- "https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main/gbif_census_ndvi_anno.parquet"
53
 
54
+ # -- Precomputed CBG data (download to /tmp and load)
55
+ download.file(
56
+ 'https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main/cbg_vect_sf.Rdata',
57
+ '/tmp/cbg_vect_sf.Rdata'
58
+ )
59
  load('/tmp/cbg_vect_sf.Rdata')
60
 
61
  if (!"unique_species" %in% names(cbg_vect_sf)) {
 
71
  cbg_vect_sf$ndvi_mean <- cbg_vect_sf$ndvi_sentinel
72
  }
73
 
74
+ # -- Hotspots/Coldspots (read directly from URL via GDAL virtual file system)
75
+ biodiv_hotspots <- st_read("/vsicurl/https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main/hotspots.shp", quiet = TRUE) |>
76
+ st_transform(4326)
 
77
 
78
+ biodiv_coldspots <- st_read("/vsicurl/https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main/coldspots.shp", quiet = TRUE) |>
79
+ st_transform(4326)
80
 
81
+ # -- RSF Program Projects (read directly from URL via GDAL virtual file system)
82
+ rsf_projects <- st_read("/vsicurl/https://huggingface.co/datasets/boettiger-lab/sf_biodiv_access/resolve/main/RSF_Program_Projects_polygons.gpkg", quiet = TRUE) |>
83
+ st_transform(4326)