Spaces:
Build error
Build error
pgzmnk
commited on
Commit
·
3683a4c
1
Parent(s):
84d8075
IndexGeneration class in a separate file. Cleanup.
Browse files- app.py +1 -310
- utils/__init__.py +3 -0
- utils/indicators.py +299 -0
app.py
CHANGED
|
@@ -1,324 +1,15 @@
|
|
| 1 |
-
import datetime
|
| 2 |
-
import json
|
| 3 |
-
import logging
|
| 4 |
-
import os
|
| 5 |
-
from itertools import repeat
|
| 6 |
-
|
| 7 |
-
import ee
|
| 8 |
import gradio as gr
|
| 9 |
-
import numpy as np
|
| 10 |
-
import pandas as pd
|
| 11 |
-
import plotly.graph_objects as go
|
| 12 |
-
import yaml
|
| 13 |
|
| 14 |
from utils import duckdb_queries as dq
|
| 15 |
from utils.gradio import get_window_url_params
|
| 16 |
-
|
| 17 |
-
# Logging
|
| 18 |
-
logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
|
| 19 |
|
| 20 |
# Define constants
|
| 21 |
DATE = "2020-01-01"
|
| 22 |
YEAR = 2020
|
| 23 |
LOCATION = [-74.653370, 5.845328]
|
| 24 |
ROI_RADIUS = 20000
|
| 25 |
-
GEE_SERVICE_ACCOUNT = (
|
| 26 |
-
"climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com"
|
| 27 |
-
)
|
| 28 |
INDICES_FILE = "indices.yaml"
|
| 29 |
-
START_YEAR = 2015
|
| 30 |
-
END_YEAR = 2022
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
class IndexGenerator:
|
| 34 |
-
"""
|
| 35 |
-
A class to generate indices and compute zonal means.
|
| 36 |
-
|
| 37 |
-
Args:
|
| 38 |
-
centroid (tuple): The centroid coordinates (latitude, longitude) of the region of interest.
|
| 39 |
-
year (int): The year for which indices are generated.
|
| 40 |
-
roi_radius (int, optional): The radius (in meters) for creating a buffer around the centroid as the region of interest. Defaults to 20000.
|
| 41 |
-
project_name (str, optional): The name of the project. Defaults to "".
|
| 42 |
-
map (geemap.Map, optional): Map object for mapping. Defaults to None (i.e. no map created)
|
| 43 |
-
"""
|
| 44 |
-
|
| 45 |
-
def __init__(
|
| 46 |
-
self,
|
| 47 |
-
centroid,
|
| 48 |
-
roi_radius,
|
| 49 |
-
indices_file,
|
| 50 |
-
project_name="",
|
| 51 |
-
map=None,
|
| 52 |
-
):
|
| 53 |
-
# Authenticate to GEE & DuckDB
|
| 54 |
-
self._authenticate_ee(GEE_SERVICE_ACCOUNT)
|
| 55 |
-
|
| 56 |
-
# Set instance variables
|
| 57 |
-
self.indices = self._load_indices(indices_file)
|
| 58 |
-
self.centroid = centroid
|
| 59 |
-
self.roi = ee.Geometry.Point(*centroid).buffer(roi_radius)
|
| 60 |
-
# self.project_name = project_name
|
| 61 |
-
self.map = map
|
| 62 |
-
if self.map is not None:
|
| 63 |
-
self.show = True
|
| 64 |
-
else:
|
| 65 |
-
self.show = False
|
| 66 |
-
|
| 67 |
-
def _cloudfree(self, gee_path, daterange):
|
| 68 |
-
"""
|
| 69 |
-
Internal method to generate a cloud-free composite.
|
| 70 |
-
|
| 71 |
-
Args:
|
| 72 |
-
gee_path (str): The path to the Google Earth Engine (GEE) image or image collection.
|
| 73 |
-
|
| 74 |
-
Returns:
|
| 75 |
-
ee.Image: The cloud-free composite clipped to the region of interest.
|
| 76 |
-
"""
|
| 77 |
-
# Load a raw Landsat ImageCollection for a single year.
|
| 78 |
-
collection = (
|
| 79 |
-
ee.ImageCollection(gee_path).filterDate(*daterange).filterBounds(self.roi)
|
| 80 |
-
)
|
| 81 |
-
|
| 82 |
-
# Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
|
| 83 |
-
composite_cloudfree = ee.Algorithms.Landsat.simpleComposite(
|
| 84 |
-
**{"collection": collection, "percentile": 75, "cloudScoreRange": 5}
|
| 85 |
-
)
|
| 86 |
-
return composite_cloudfree.clip(self.roi)
|
| 87 |
-
|
| 88 |
-
def _load_indices(self, indices_file):
|
| 89 |
-
# Read index configurations
|
| 90 |
-
with open(indices_file, "r") as stream:
|
| 91 |
-
try:
|
| 92 |
-
return yaml.safe_load(stream)
|
| 93 |
-
except yaml.YAMLError as e:
|
| 94 |
-
logging.error(e)
|
| 95 |
-
return None
|
| 96 |
-
|
| 97 |
-
def show_map(self, map=None):
|
| 98 |
-
if map is not None:
|
| 99 |
-
self.map = map
|
| 100 |
-
self.show = True
|
| 101 |
-
|
| 102 |
-
def disable_map(self):
|
| 103 |
-
self.show = False
|
| 104 |
-
|
| 105 |
-
def generate_index(self, index_config, year):
|
| 106 |
-
"""
|
| 107 |
-
Generates an index based on the provided index configuration.
|
| 108 |
-
|
| 109 |
-
Args:
|
| 110 |
-
index_config (dict): Configuration for generating the index.
|
| 111 |
-
|
| 112 |
-
Returns:
|
| 113 |
-
ee.Image: The generated index clipped to the region of interest.
|
| 114 |
-
"""
|
| 115 |
-
|
| 116 |
-
# Calculate date range, assume 1 year
|
| 117 |
-
start_date = str(datetime.date(year, 1, 1))
|
| 118 |
-
end_date = str(datetime.date(year, 12, 31))
|
| 119 |
-
daterange = [start_date, end_date]
|
| 120 |
-
|
| 121 |
-
# Calculate index based on type
|
| 122 |
-
match index_config["gee_type"]:
|
| 123 |
-
case "image":
|
| 124 |
-
dataset = ee.Image(index_config["gee_path"]).clip(self.roi)
|
| 125 |
-
if index_config.get("select"):
|
| 126 |
-
dataset = dataset.select(index_config["select"])
|
| 127 |
-
case "image_collection":
|
| 128 |
-
dataset = (
|
| 129 |
-
ee.ImageCollection(index_config["gee_path"])
|
| 130 |
-
.filterBounds(self.roi)
|
| 131 |
-
.map(lambda image: image.clip(self.roi))
|
| 132 |
-
.mean()
|
| 133 |
-
)
|
| 134 |
-
if index_config.get("select"):
|
| 135 |
-
dataset = dataset.select(index_config["select"])
|
| 136 |
-
case "feature_collection":
|
| 137 |
-
dataset = (
|
| 138 |
-
ee.Image()
|
| 139 |
-
.float()
|
| 140 |
-
.paint(
|
| 141 |
-
ee.FeatureCollection(index_config["gee_path"]),
|
| 142 |
-
index_config["select"],
|
| 143 |
-
)
|
| 144 |
-
.clip(self.roi)
|
| 145 |
-
)
|
| 146 |
-
case "algebraic":
|
| 147 |
-
image = self._cloudfree(index_config["gee_path"], daterange)
|
| 148 |
-
# to-do: params should come from index_config
|
| 149 |
-
dataset = image.normalizedDifference(["B4", "B3"])
|
| 150 |
-
case _:
|
| 151 |
-
dataset = None
|
| 152 |
-
|
| 153 |
-
if not dataset:
|
| 154 |
-
raise Exception("Failed to generate dataset.")
|
| 155 |
-
|
| 156 |
-
# Whether to display on GEE map
|
| 157 |
-
if self.show and index_config.get("show"):
|
| 158 |
-
map.addLayer(dataset, index_config["viz"], index_config["name"])
|
| 159 |
-
|
| 160 |
-
logging.info(f"Generated index: {index_config['name']}")
|
| 161 |
-
return dataset
|
| 162 |
-
|
| 163 |
-
def zonal_mean_index(self, index_key, year):
|
| 164 |
-
index_config = self.indices[index_key]
|
| 165 |
-
dataset = self.generate_index(index_config, year)
|
| 166 |
-
# zm = self._zonal_mean(single, index_config.get('bandname') or 'constant')
|
| 167 |
-
out = dataset.reduceRegion(
|
| 168 |
-
**{
|
| 169 |
-
"reducer": ee.Reducer.mean(),
|
| 170 |
-
"geometry": self.roi,
|
| 171 |
-
"scale": 200, # map scale
|
| 172 |
-
}
|
| 173 |
-
).getInfo()
|
| 174 |
-
if index_config.get("bandname"):
|
| 175 |
-
return out[index_config.get("bandname")]
|
| 176 |
-
return out
|
| 177 |
-
|
| 178 |
-
def generate_composite_index_df(self, year, indices=[]):
|
| 179 |
-
data = {
|
| 180 |
-
"metric": indices,
|
| 181 |
-
"year": year,
|
| 182 |
-
"centroid": str(self.centroid),
|
| 183 |
-
"project_name": self.project_name,
|
| 184 |
-
"value": list(map(self.zonal_mean_index, indices, repeat(year))),
|
| 185 |
-
"area": self.roi.area().getInfo(), # m^2
|
| 186 |
-
"geojson": str(self.roi.getInfo()),
|
| 187 |
-
# to-do: coefficient
|
| 188 |
-
}
|
| 189 |
-
|
| 190 |
-
logging.info("data", data)
|
| 191 |
-
df = pd.DataFrame(data)
|
| 192 |
-
return df
|
| 193 |
-
|
| 194 |
-
@staticmethod
|
| 195 |
-
def _authenticate_ee(ee_service_account):
|
| 196 |
-
"""
|
| 197 |
-
Huggingface Spaces does not support secret files, therefore authenticate with an environment variable containing the JSON.
|
| 198 |
-
"""
|
| 199 |
-
logging.info("Authenticating to Google Earth Engine...")
|
| 200 |
-
credentials = ee.ServiceAccountCredentials(
|
| 201 |
-
ee_service_account, key_data=os.environ["ee_service_account"]
|
| 202 |
-
)
|
| 203 |
-
ee.Initialize(credentials)
|
| 204 |
-
logging.info("Authenticated to Google Earth Engine.")
|
| 205 |
-
|
| 206 |
-
def _create_dataframe(self, years, project_name):
|
| 207 |
-
dfs = []
|
| 208 |
-
logging.info(years)
|
| 209 |
-
indices = self._load_indices(INDICES_FILE)
|
| 210 |
-
for year in years:
|
| 211 |
-
logging.info(year)
|
| 212 |
-
indexgenerator.project_name = project_name
|
| 213 |
-
df = indexgenerator.generate_composite_index_df(year, list(indices.keys()))
|
| 214 |
-
dfs.append(df)
|
| 215 |
-
return pd.concat(dfs)
|
| 216 |
-
|
| 217 |
-
# h/t: https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/12
|
| 218 |
-
def _latlon_to_config(self, longitudes=None, latitudes=None):
|
| 219 |
-
"""Function documentation:\n
|
| 220 |
-
Basic framework adopted from Krichardson under the following thread:
|
| 221 |
-
https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/7
|
| 222 |
-
|
| 223 |
-
# NOTE:
|
| 224 |
-
# THIS IS A TEMPORARY SOLUTION UNTIL THE DASH TEAM IMPLEMENTS DYNAMIC ZOOM
|
| 225 |
-
# in their plotly-functions associated with mapbox, such as go.Densitymapbox() etc.
|
| 226 |
-
|
| 227 |
-
Returns the appropriate zoom-level for these plotly-mapbox-graphics along with
|
| 228 |
-
the center coordinate tuple of all provided coordinate tuples.
|
| 229 |
-
"""
|
| 230 |
-
|
| 231 |
-
# Check whether both latitudes and longitudes have been passed,
|
| 232 |
-
# or if the list lenghts don't match
|
| 233 |
-
if (latitudes is None or longitudes is None) or (
|
| 234 |
-
len(latitudes) != len(longitudes)
|
| 235 |
-
):
|
| 236 |
-
# Otherwise, return the default values of 0 zoom and the coordinate origin as center point
|
| 237 |
-
return 0, (0, 0)
|
| 238 |
-
|
| 239 |
-
# Get the boundary-box
|
| 240 |
-
b_box = {}
|
| 241 |
-
b_box["height"] = latitudes.max() - latitudes.min()
|
| 242 |
-
b_box["width"] = longitudes.max() - longitudes.min()
|
| 243 |
-
b_box["center"] = (np.mean(longitudes), np.mean(latitudes))
|
| 244 |
-
|
| 245 |
-
# get the area of the bounding box in order to calculate a zoom-level
|
| 246 |
-
area = b_box["height"] * b_box["width"]
|
| 247 |
-
|
| 248 |
-
# * 1D-linear interpolation with numpy:
|
| 249 |
-
# - Pass the area as the only x-value and not as a list, in order to return a scalar as well
|
| 250 |
-
# - The x-points "xp" should be in parts in comparable order of magnitude of the given area
|
| 251 |
-
# - The zpom-levels are adapted to the areas, i.e. start with the smallest area possible of 0
|
| 252 |
-
# which leads to the highest possible zoom value 20, and so forth decreasing with increasing areas
|
| 253 |
-
# as these variables are antiproportional
|
| 254 |
-
zoom = np.interp(
|
| 255 |
-
x=area,
|
| 256 |
-
xp=[0, 5**-10, 4**-10, 3**-10, 2**-10, 1**-10, 1**-5],
|
| 257 |
-
fp=[20, 15, 14, 13, 12, 7, 5],
|
| 258 |
-
)
|
| 259 |
-
|
| 260 |
-
# Finally, return the zoom level and the associated boundary-box center coordinates
|
| 261 |
-
return zoom, b_box["center"]
|
| 262 |
-
|
| 263 |
-
def show_project_map(self, project_name):
|
| 264 |
-
prepared_statement = dq.get_project_geometry(project_name)
|
| 265 |
-
features = json.loads(prepared_statement[0][0].replace("'", '"'))["features"]
|
| 266 |
-
geometry = features[0]["geometry"]
|
| 267 |
-
longitudes = np.array(geometry["coordinates"])[0, :, 0]
|
| 268 |
-
latitudes = np.array(geometry["coordinates"])[0, :, 1]
|
| 269 |
-
zoom, bbox_center = self._latlon_to_config(longitudes, latitudes)
|
| 270 |
-
fig = go.Figure(
|
| 271 |
-
go.Scattermapbox(
|
| 272 |
-
mode="markers",
|
| 273 |
-
lon=[bbox_center[0]],
|
| 274 |
-
lat=[bbox_center[1]],
|
| 275 |
-
marker={"size": 20, "color": ["cyan"]},
|
| 276 |
-
)
|
| 277 |
-
)
|
| 278 |
-
|
| 279 |
-
fig.update_layout(
|
| 280 |
-
mapbox={
|
| 281 |
-
"style": "stamen-terrain",
|
| 282 |
-
"center": {"lon": bbox_center[0], "lat": bbox_center[1]},
|
| 283 |
-
"zoom": zoom,
|
| 284 |
-
"layers": [
|
| 285 |
-
{
|
| 286 |
-
"source": {
|
| 287 |
-
"type": "FeatureCollection",
|
| 288 |
-
"features": [{"type": "Feature", "geometry": geometry}],
|
| 289 |
-
},
|
| 290 |
-
"type": "fill",
|
| 291 |
-
"below": "traces",
|
| 292 |
-
"color": "royalblue",
|
| 293 |
-
}
|
| 294 |
-
],
|
| 295 |
-
},
|
| 296 |
-
margin={"l": 0, "r": 0, "b": 0, "t": 0},
|
| 297 |
-
)
|
| 298 |
-
|
| 299 |
-
return fig
|
| 300 |
-
|
| 301 |
-
def calculate_biodiversity_score(self, start_year, end_year, project_name):
|
| 302 |
-
years = []
|
| 303 |
-
for year in range(start_year, end_year):
|
| 304 |
-
row_exists = dq.check_if_project_exists_for_year(project_name, year)
|
| 305 |
-
if not row_exists:
|
| 306 |
-
years.append(year)
|
| 307 |
-
|
| 308 |
-
if len(years) > 0:
|
| 309 |
-
df = self._create_dataframe(years, project_name)
|
| 310 |
-
|
| 311 |
-
# Write score table to `_temptable`
|
| 312 |
-
dq.write_score_to_temptable()
|
| 313 |
-
|
| 314 |
-
# Create `bioindicator` table IF NOT EXISTS.
|
| 315 |
-
dq.get_or_create_bioindicator_table()
|
| 316 |
-
|
| 317 |
-
# UPSERT project record
|
| 318 |
-
dq.upsert_project_record()
|
| 319 |
-
logging.info("upserted records into motherduck")
|
| 320 |
-
scores = dq.get_project_scores(project_name, start_year, end_year)
|
| 321 |
-
return scores
|
| 322 |
|
| 323 |
|
| 324 |
# Instantiate outside gradio app to avoid re-initializing GEE, which is slow
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
|
| 3 |
from utils import duckdb_queries as dq
|
| 4 |
from utils.gradio import get_window_url_params
|
| 5 |
+
from utils.indicators import IndexGenerator
|
|
|
|
|
|
|
| 6 |
|
| 7 |
# Define constants
|
| 8 |
DATE = "2020-01-01"
|
| 9 |
YEAR = 2020
|
| 10 |
LOCATION = [-74.653370, 5.845328]
|
| 11 |
ROI_RADIUS = 20000
|
|
|
|
|
|
|
|
|
|
| 12 |
INDICES_FILE = "indices.yaml"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
# Instantiate outside gradio app to avoid re-initializing GEE, which is slow
|
utils/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import logging
|
| 2 |
+
|
| 3 |
+
logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
|
utils/indicators.py
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import datetime
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
from itertools import repeat
|
| 5 |
+
|
| 6 |
+
import ee
|
| 7 |
+
import numpy as np
|
| 8 |
+
import pandas as pd
|
| 9 |
+
import plotly.graph_objects as go
|
| 10 |
+
import yaml
|
| 11 |
+
|
| 12 |
+
from utils import duckdb_queries as dq
|
| 13 |
+
|
| 14 |
+
from . import logging
|
| 15 |
+
|
| 16 |
+
GEE_SERVICE_ACCOUNT = (
|
| 17 |
+
"climatebase-july-2023@ee-geospatialml-aquarry.iam.gserviceaccount.com"
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class IndexGenerator:
|
| 22 |
+
"""
|
| 23 |
+
A class to generate indices and compute zonal means.
|
| 24 |
+
|
| 25 |
+
Args:
|
| 26 |
+
centroid (tuple): The centroid coordinates (latitude, longitude) of the region of interest.
|
| 27 |
+
year (int): The year for which indices are generated.
|
| 28 |
+
roi_radius (int, optional): The radius (in meters) for creating a buffer around the centroid as the region of interest. Defaults to 20000.
|
| 29 |
+
project_name (str, optional): The name of the project. Defaults to "".
|
| 30 |
+
map (geemap.Map, optional): Map object for mapping. Defaults to None (i.e. no map created)
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
def __init__(
|
| 34 |
+
self,
|
| 35 |
+
centroid,
|
| 36 |
+
roi_radius,
|
| 37 |
+
indices_file,
|
| 38 |
+
map=None,
|
| 39 |
+
):
|
| 40 |
+
# Authenticate to GEE & DuckDB
|
| 41 |
+
self._authenticate_ee(GEE_SERVICE_ACCOUNT)
|
| 42 |
+
|
| 43 |
+
# Set instance variables
|
| 44 |
+
self.indices = self._load_indices(indices_file)
|
| 45 |
+
self.centroid = centroid
|
| 46 |
+
self.roi = ee.Geometry.Point(*centroid).buffer(roi_radius)
|
| 47 |
+
# self.project_name = project_name
|
| 48 |
+
self.map = map
|
| 49 |
+
if self.map is not None:
|
| 50 |
+
self.show = True
|
| 51 |
+
else:
|
| 52 |
+
self.show = False
|
| 53 |
+
|
| 54 |
+
def _cloudfree(self, gee_path, daterange):
|
| 55 |
+
"""
|
| 56 |
+
Internal method to generate a cloud-free composite.
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
gee_path (str): The path to the Google Earth Engine (GEE) image or image collection.
|
| 60 |
+
|
| 61 |
+
Returns:
|
| 62 |
+
ee.Image: The cloud-free composite clipped to the region of interest.
|
| 63 |
+
"""
|
| 64 |
+
# Load a raw Landsat ImageCollection for a single year.
|
| 65 |
+
collection = (
|
| 66 |
+
ee.ImageCollection(gee_path).filterDate(*daterange).filterBounds(self.roi)
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
# Create a cloud-free composite with custom parameters for cloud score threshold and percentile.
|
| 70 |
+
composite_cloudfree = ee.Algorithms.Landsat.simpleComposite(
|
| 71 |
+
**{"collection": collection, "percentile": 75, "cloudScoreRange": 5}
|
| 72 |
+
)
|
| 73 |
+
return composite_cloudfree.clip(self.roi)
|
| 74 |
+
|
| 75 |
+
def _load_indices(self, indices_file):
|
| 76 |
+
# Read index configurations
|
| 77 |
+
with open(indices_file, "r") as stream:
|
| 78 |
+
try:
|
| 79 |
+
return yaml.safe_load(stream)
|
| 80 |
+
except yaml.YAMLError as e:
|
| 81 |
+
logging.error(e)
|
| 82 |
+
return None
|
| 83 |
+
|
| 84 |
+
def generate_index(self, index_config, year):
|
| 85 |
+
"""
|
| 86 |
+
Generates an index based on the provided index configuration.
|
| 87 |
+
|
| 88 |
+
Args:
|
| 89 |
+
index_config (dict): Configuration for generating the index.
|
| 90 |
+
|
| 91 |
+
Returns:
|
| 92 |
+
ee.Image: The generated index clipped to the region of interest.
|
| 93 |
+
"""
|
| 94 |
+
|
| 95 |
+
# Calculate date range, assume 1 year
|
| 96 |
+
start_date = str(datetime.date(year, 1, 1))
|
| 97 |
+
end_date = str(datetime.date(year, 12, 31))
|
| 98 |
+
daterange = [start_date, end_date]
|
| 99 |
+
|
| 100 |
+
# Calculate index based on type
|
| 101 |
+
match index_config["gee_type"]:
|
| 102 |
+
case "image":
|
| 103 |
+
dataset = ee.Image(index_config["gee_path"]).clip(self.roi)
|
| 104 |
+
if index_config.get("select"):
|
| 105 |
+
dataset = dataset.select(index_config["select"])
|
| 106 |
+
case "image_collection":
|
| 107 |
+
dataset = (
|
| 108 |
+
ee.ImageCollection(index_config["gee_path"])
|
| 109 |
+
.filterBounds(self.roi)
|
| 110 |
+
.map(lambda image: image.clip(self.roi))
|
| 111 |
+
.mean()
|
| 112 |
+
)
|
| 113 |
+
if index_config.get("select"):
|
| 114 |
+
dataset = dataset.select(index_config["select"])
|
| 115 |
+
case "feature_collection":
|
| 116 |
+
dataset = (
|
| 117 |
+
ee.Image()
|
| 118 |
+
.float()
|
| 119 |
+
.paint(
|
| 120 |
+
ee.FeatureCollection(index_config["gee_path"]),
|
| 121 |
+
index_config["select"],
|
| 122 |
+
)
|
| 123 |
+
.clip(self.roi)
|
| 124 |
+
)
|
| 125 |
+
case "algebraic":
|
| 126 |
+
image = self._cloudfree(index_config["gee_path"], daterange)
|
| 127 |
+
# to-do: params should come from index_config
|
| 128 |
+
dataset = image.normalizedDifference(["B4", "B3"])
|
| 129 |
+
case _:
|
| 130 |
+
dataset = None
|
| 131 |
+
|
| 132 |
+
if not dataset:
|
| 133 |
+
raise Exception("Failed to generate dataset.")
|
| 134 |
+
|
| 135 |
+
# Whether to display on GEE map
|
| 136 |
+
if self.show and index_config.get("show"):
|
| 137 |
+
map.addLayer(dataset, index_config["viz"], index_config["name"])
|
| 138 |
+
|
| 139 |
+
logging.info(f"Generated index: {index_config['name']}")
|
| 140 |
+
return dataset
|
| 141 |
+
|
| 142 |
+
def zonal_mean_index(self, index_key, year):
|
| 143 |
+
index_config = self.indices[index_key]
|
| 144 |
+
dataset = self.generate_index(index_config, year)
|
| 145 |
+
# zm = self._zonal_mean(single, index_config.get('bandname') or 'constant')
|
| 146 |
+
out = dataset.reduceRegion(
|
| 147 |
+
**{
|
| 148 |
+
"reducer": ee.Reducer.mean(),
|
| 149 |
+
"geometry": self.roi,
|
| 150 |
+
"scale": 200, # map scale
|
| 151 |
+
}
|
| 152 |
+
).getInfo()
|
| 153 |
+
if index_config.get("bandname"):
|
| 154 |
+
return out[index_config.get("bandname")]
|
| 155 |
+
return out
|
| 156 |
+
|
| 157 |
+
def generate_composite_index_df(self, year, indices=[]):
|
| 158 |
+
data = {
|
| 159 |
+
"metric": indices,
|
| 160 |
+
"year": year,
|
| 161 |
+
"centroid": str(self.centroid),
|
| 162 |
+
"project_name": self.project_name,
|
| 163 |
+
"value": list(map(self.zonal_mean_index, indices, repeat(year))),
|
| 164 |
+
"area": self.roi.area().getInfo(), # m^2
|
| 165 |
+
"geojson": str(self.roi.getInfo()),
|
| 166 |
+
# to-do: coefficient
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
logging.info("data", data)
|
| 170 |
+
df = pd.DataFrame(data)
|
| 171 |
+
return df
|
| 172 |
+
|
| 173 |
+
@staticmethod
|
| 174 |
+
def _authenticate_ee(ee_service_account):
|
| 175 |
+
"""
|
| 176 |
+
Huggingface Spaces does not support secret files, therefore authenticate with an environment variable containing the JSON.
|
| 177 |
+
"""
|
| 178 |
+
logging.info("Authenticating to Google Earth Engine...")
|
| 179 |
+
credentials = ee.ServiceAccountCredentials(
|
| 180 |
+
ee_service_account, key_data=os.environ["ee_service_account"]
|
| 181 |
+
)
|
| 182 |
+
ee.Initialize(credentials)
|
| 183 |
+
logging.info("Authenticated to Google Earth Engine.")
|
| 184 |
+
|
| 185 |
+
def _create_dataframe(self, years, project_name):
|
| 186 |
+
dfs = []
|
| 187 |
+
logging.info(years)
|
| 188 |
+
for year in years:
|
| 189 |
+
logging.info(year)
|
| 190 |
+
self.project_name = project_name
|
| 191 |
+
df = self.generate_composite_index_df(year, list(self.indices.keys()))
|
| 192 |
+
dfs.append(df)
|
| 193 |
+
return pd.concat(dfs)
|
| 194 |
+
|
| 195 |
+
# h/t: https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/12
|
| 196 |
+
def _latlon_to_config(self, longitudes=None, latitudes=None):
|
| 197 |
+
"""Function documentation:\n
|
| 198 |
+
Basic framework adopted from Krichardson under the following thread:
|
| 199 |
+
https://community.plotly.com/t/dynamic-zoom-for-mapbox/32658/7
|
| 200 |
+
|
| 201 |
+
# NOTE:
|
| 202 |
+
# THIS IS A TEMPORARY SOLUTION UNTIL THE DASH TEAM IMPLEMENTS DYNAMIC ZOOM
|
| 203 |
+
# in their plotly-functions associated with mapbox, such as go.Densitymapbox() etc.
|
| 204 |
+
|
| 205 |
+
Returns the appropriate zoom-level for these plotly-mapbox-graphics along with
|
| 206 |
+
the center coordinate tuple of all provided coordinate tuples.
|
| 207 |
+
"""
|
| 208 |
+
|
| 209 |
+
# Check whether both latitudes and longitudes have been passed,
|
| 210 |
+
# or if the list lenghts don't match
|
| 211 |
+
if (latitudes is None or longitudes is None) or (
|
| 212 |
+
len(latitudes) != len(longitudes)
|
| 213 |
+
):
|
| 214 |
+
# Otherwise, return the default values of 0 zoom and the coordinate origin as center point
|
| 215 |
+
return 0, (0, 0)
|
| 216 |
+
|
| 217 |
+
# Get the boundary-box
|
| 218 |
+
b_box = {}
|
| 219 |
+
b_box["height"] = latitudes.max() - latitudes.min()
|
| 220 |
+
b_box["width"] = longitudes.max() - longitudes.min()
|
| 221 |
+
b_box["center"] = (np.mean(longitudes), np.mean(latitudes))
|
| 222 |
+
|
| 223 |
+
# get the area of the bounding box in order to calculate a zoom-level
|
| 224 |
+
area = b_box["height"] * b_box["width"]
|
| 225 |
+
|
| 226 |
+
# * 1D-linear interpolation with numpy:
|
| 227 |
+
# - Pass the area as the only x-value and not as a list, in order to return a scalar as well
|
| 228 |
+
# - The x-points "xp" should be in parts in comparable order of magnitude of the given area
|
| 229 |
+
# - The zpom-levels are adapted to the areas, i.e. start with the smallest area possible of 0
|
| 230 |
+
# which leads to the highest possible zoom value 20, and so forth decreasing with increasing areas
|
| 231 |
+
# as these variables are antiproportional
|
| 232 |
+
zoom = np.interp(
|
| 233 |
+
x=area,
|
| 234 |
+
xp=[0, 5**-10, 4**-10, 3**-10, 2**-10, 1**-10, 1**-5],
|
| 235 |
+
fp=[20, 15, 14, 13, 12, 7, 5],
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
# Finally, return the zoom level and the associated boundary-box center coordinates
|
| 239 |
+
return zoom, b_box["center"]
|
| 240 |
+
|
| 241 |
+
def show_project_map(self, project_name):
|
| 242 |
+
prepared_statement = dq.get_project_geometry(project_name)
|
| 243 |
+
features = json.loads(prepared_statement[0][0].replace("'", '"'))["features"]
|
| 244 |
+
geometry = features[0]["geometry"]
|
| 245 |
+
longitudes = np.array(geometry["coordinates"])[0, :, 0]
|
| 246 |
+
latitudes = np.array(geometry["coordinates"])[0, :, 1]
|
| 247 |
+
zoom, bbox_center = self._latlon_to_config(longitudes, latitudes)
|
| 248 |
+
fig = go.Figure(
|
| 249 |
+
go.Scattermapbox(
|
| 250 |
+
mode="markers",
|
| 251 |
+
lon=[bbox_center[0]],
|
| 252 |
+
lat=[bbox_center[1]],
|
| 253 |
+
marker={"size": 20, "color": ["cyan"]},
|
| 254 |
+
)
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
+
fig.update_layout(
|
| 258 |
+
mapbox={
|
| 259 |
+
"style": "stamen-terrain",
|
| 260 |
+
"center": {"lon": bbox_center[0], "lat": bbox_center[1]},
|
| 261 |
+
"zoom": zoom,
|
| 262 |
+
"layers": [
|
| 263 |
+
{
|
| 264 |
+
"source": {
|
| 265 |
+
"type": "FeatureCollection",
|
| 266 |
+
"features": [{"type": "Feature", "geometry": geometry}],
|
| 267 |
+
},
|
| 268 |
+
"type": "fill",
|
| 269 |
+
"below": "traces",
|
| 270 |
+
"color": "royalblue",
|
| 271 |
+
}
|
| 272 |
+
],
|
| 273 |
+
},
|
| 274 |
+
margin={"l": 0, "r": 0, "b": 0, "t": 0},
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
return fig
|
| 278 |
+
|
| 279 |
+
def calculate_biodiversity_score(self, start_year, end_year, project_name):
|
| 280 |
+
years = []
|
| 281 |
+
for year in range(start_year, end_year):
|
| 282 |
+
row_exists = dq.check_if_project_exists_for_year(project_name, year)
|
| 283 |
+
if not row_exists:
|
| 284 |
+
years.append(year)
|
| 285 |
+
|
| 286 |
+
if len(years) > 0:
|
| 287 |
+
df = self._create_dataframe(years, project_name)
|
| 288 |
+
|
| 289 |
+
# Write score table to `_temptable`
|
| 290 |
+
dq.write_score_to_temptable()
|
| 291 |
+
|
| 292 |
+
# Create `bioindicator` table IF NOT EXISTS.
|
| 293 |
+
dq.get_or_create_bioindicator_table()
|
| 294 |
+
|
| 295 |
+
# UPSERT project record
|
| 296 |
+
dq.upsert_project_record()
|
| 297 |
+
logging.info("upserted records into motherduck")
|
| 298 |
+
scores = dq.get_project_scores(project_name, start_year, end_year)
|
| 299 |
+
return scores
|