| |
| from __future__ import annotations |
|
|
| import json |
| import math |
| import time |
| from dataclasses import dataclass |
| from datetime import date, timedelta |
| from pathlib import Path |
| from typing import Any |
| from urllib.parse import urlencode |
| from urllib.request import Request, urlopen |
|
|
| import pandas as pd |
|
|
|
|
| REPO_ROOT = Path(__file__).resolve().parents[1] |
| RAW_FIELD_DIR = REPO_ROOT / "data" / "raw" / "field" |
| PROCESSED_DIR = REPO_ROOT / "data" / "processed" |
| LOCAL_DOWNLOADS_DIR = REPO_ROOT / "data_local" / "downloads" |
| USDA_SOIL_CACHE_DIR = LOCAL_DOWNLOADS_DIR / "usda_soil" |
| FIELD_BOUNDARY_PATH = RAW_FIELD_DIR / "field_boundary.geojson" |
| OUTPUT_PATH = PROCESSED_DIR / "zone_state_bootstrap.parquet" |
| DATASET_CARD_PATH = PROCESSED_DIR / "zone_state_bootstrap.dataset_card.json" |
| TOP_MANIFEST_PATH = PROCESSED_DIR / "manifest.json" |
|
|
| CURRENT_DATE = date(2026, 4, 1) |
| HISTORY_START = CURRENT_DATE - timedelta(days=30) |
| HISTORY_END = CURRENT_DATE - timedelta(days=1) |
| FORECAST_END = CURRENT_DATE + timedelta(days=6) |
|
|
| PROVISIONAL_FIELD = { |
| "field_id": "provisional_iowa_demo", |
| "name": "Provisional Iowa Demo Field", |
| "crop": "row_crop_mixed_demo", |
| "season": "2026", |
| "center_lat": 42.0412, |
| "center_lon": -93.8194, |
| "width_m": 300.0, |
| "height_m": 300.0, |
| "zone_size_m": 100.0, |
| "boundary_mode": "provisional_demo", |
| "source_note": "Generated locally to unblock the zone bootstrap pipeline until the real field boundary is supplied.", |
| "timezone": "America/Chicago", |
| } |
|
|
| SOIL_PROPERTIES = ["bdod", "cec", "cfvo", "clay", "nitrogen", "phh2o", "sand", "silt", "soc"] |
| SOIL_DEPTHS = ["0-5cm", "0-30cm"] |
| SOIL_COLUMN_SUFFIXES = { |
| "bdod": "kgdm3", |
| "cec": "cmolkg", |
| "cfvo": "pct", |
| "clay": "pct", |
| "nitrogen": "gkg", |
| "phh2o": "ph", |
| "sand": "pct", |
| "silt": "pct", |
| "soc": "gkg", |
| } |
| USDA_SDA_URL = "https://sdmdataaccess.sc.egov.usda.gov/Tabular/post.rest" |
| USDA_USABLE_HORIZON_FIELDS = [ |
| "sandtotal_r", |
| "silttotal_r", |
| "claytotal_r", |
| "om_r", |
| "dbthirdbar_r", |
| "cec7_r", |
| "ph1to1h2o_r", |
| "fragvoltot_r", |
| ] |
| USDA_QUERY_OFFSETS_M = [0.0, 10.0, -10.0, 25.0, -25.0, 40.0, -40.0] |
| SOILGRIDS_QUERY_OFFSETS_M = [0.0, 25.0, -25.0, 50.0, -50.0] |
|
|
|
|
| @dataclass(frozen=True) |
| class Zone: |
| zone_id: str |
| row: int |
| col: int |
| area_m2: float |
| center_lat: float |
| center_lon: float |
|
|
|
|
| def main() -> None: |
| RAW_FIELD_DIR.mkdir(parents=True, exist_ok=True) |
| PROCESSED_DIR.mkdir(parents=True, exist_ok=True) |
| USDA_SOIL_CACHE_DIR.mkdir(parents=True, exist_ok=True) |
|
|
| boundary = ensure_provisional_boundary() |
| boundary_props = boundary["features"][0]["properties"] |
| zones = build_zones_from_boundary(boundary) |
|
|
| elevations = fetch_elevations(zones) |
| soil_rows, soil_status_summary = fetch_soil_rows(zones) |
| archive_rows = fetch_archive_weather(zones) |
| forecast_rows = fetch_forecast_weather(zones) |
|
|
| records = [] |
| for zone in zones: |
| soil = soil_rows[zone.zone_id] |
| weather = archive_rows[zone.zone_id] |
| forecast = forecast_rows[zone.zone_id] |
| elevation = elevations[zone.zone_id] |
|
|
| recent_precip = weather["precipitation_sum_7d_mm"] |
| recent_et0 = weather["et0_sum_7d_mm"] |
| forecast_precip = forecast["forecast_precipitation_sum_7d_mm"] |
| forecast_et0 = forecast["forecast_et0_sum_7d_mm"] |
| recent_soil_moisture = weather["soil_moisture_0_7cm_mean_7d_m3m3"] |
| soil_nitrogen = soil.get("soil_nitrogen_0_30cm_gkg") |
|
|
| records.append( |
| { |
| "build_date": CURRENT_DATE.isoformat(), |
| "field_id": boundary_props["field_id"], |
| "field_name": boundary_props["name"], |
| "boundary_mode": boundary_props["boundary_mode"], |
| "crop": boundary_props["crop"], |
| "season": boundary_props["season"], |
| "zone_id": zone.zone_id, |
| "zone_row": zone.row, |
| "zone_col": zone.col, |
| "zone_area_m2": zone.area_m2, |
| "zone_center_lat": zone.center_lat, |
| "zone_center_lon": zone.center_lon, |
| "landcover_assumed": "cropland", |
| "elevation_m": elevation, |
| **soil, |
| **weather, |
| **forecast, |
| "water_balance_proxy_7d_mm": round(recent_precip - recent_et0, 3), |
| "forecast_water_balance_proxy_7d_mm": round(forecast_precip - forecast_et0, 3), |
| "irrigation_pressure_proxy": round(max(0.0, forecast_et0 - forecast_precip) * (1.0 - min(1.0, recent_soil_moisture / 0.35)), 3), |
| "nitrogen_pressure_proxy": round(max(0.0, 1.5 - soil_nitrogen), 3) if soil_nitrogen is not None else None, |
| "access_wet_risk_flag": bool(recent_soil_moisture >= 0.34 or recent_precip >= 25.0), |
| } |
| ) |
|
|
| df = pd.DataFrame(records).sort_values("zone_id").reset_index(drop=True) |
| df.to_parquet(OUTPUT_PATH, index=False) |
| df.to_json(PROCESSED_DIR / "zone_state_bootstrap.jsonl", orient="records", lines=True) |
|
|
| dataset_card = { |
| "dataset": "zone_state_bootstrap", |
| "description": "Zone-level bootstrap table for planning and simulator initialization, built from a provisional demo field plus public soil, weather, and elevation sources.", |
| "records": len(df), |
| "field_id": boundary_props["field_id"], |
| "boundary_mode": boundary_props["boundary_mode"], |
| "history_window": { |
| "start_date": HISTORY_START.isoformat(), |
| "end_date": HISTORY_END.isoformat(), |
| }, |
| "forecast_window": { |
| "start_date": CURRENT_DATE.isoformat(), |
| "end_date": FORECAST_END.isoformat(), |
| }, |
| "sources": { |
| "field_boundary": str(FIELD_BOUNDARY_PATH), |
| "usda_sda": USDA_SDA_URL, |
| "soilgrids": "https://rest.isric.org/soilgrids/v2.0/properties/query", |
| "open_meteo_archive": "https://archive-api.open-meteo.com/v1/archive", |
| "open_meteo_forecast": "https://api.open-meteo.com/v1/forecast", |
| "open_meteo_elevation": "https://api.open-meteo.com/v1/elevation", |
| }, |
| "soil_status_summary": soil_status_summary, |
| "soil_status_counts": df["soil_source_status"].value_counts().sort_index().to_dict(), |
| "columns": list(df.columns), |
| "note": "Replace the provisional field boundary with the real field polygon before using this dataset for field-specific deployment decisions.", |
| } |
| DATASET_CARD_PATH.write_text(json.dumps(dataset_card, indent=2, sort_keys=True)) |
|
|
| update_top_manifest(dataset_card) |
| print(f"Wrote {len(df)} records to {OUTPUT_PATH}") |
|
|
|
|
| def ensure_provisional_boundary() -> dict[str, Any]: |
| if FIELD_BOUNDARY_PATH.exists(): |
| return json.loads(FIELD_BOUNDARY_PATH.read_text()) |
|
|
| center_lat = PROVISIONAL_FIELD["center_lat"] |
| center_lon = PROVISIONAL_FIELD["center_lon"] |
| half_height_deg = meters_to_lat_deg(PROVISIONAL_FIELD["height_m"] / 2.0) |
| half_width_deg = meters_to_lon_deg(PROVISIONAL_FIELD["width_m"] / 2.0, center_lat) |
|
|
| north = center_lat + half_height_deg |
| south = center_lat - half_height_deg |
| east = center_lon + half_width_deg |
| west = center_lon - half_width_deg |
|
|
| feature = { |
| "type": "FeatureCollection", |
| "features": [ |
| { |
| "type": "Feature", |
| "properties": PROVISIONAL_FIELD, |
| "geometry": { |
| "type": "Polygon", |
| "coordinates": [ |
| [ |
| [west, south], |
| [east, south], |
| [east, north], |
| [west, north], |
| [west, south], |
| ] |
| ], |
| }, |
| } |
| ], |
| } |
| FIELD_BOUNDARY_PATH.write_text(json.dumps(feature, indent=2)) |
| return feature |
|
|
|
|
| def build_zones_from_boundary(boundary: dict[str, Any]) -> list[Zone]: |
| props = boundary["features"][0]["properties"] |
| center_lat = props["center_lat"] |
| center_lon = props["center_lon"] |
| width_m = props["width_m"] |
| height_m = props["height_m"] |
| zone_size_m = props["zone_size_m"] |
|
|
| cols = int(round(width_m / zone_size_m)) |
| rows = int(round(height_m / zone_size_m)) |
| x_origin = -width_m / 2.0 + zone_size_m / 2.0 |
| y_origin = height_m / 2.0 - zone_size_m / 2.0 |
|
|
| zones: list[Zone] = [] |
| for row in range(rows): |
| for col in range(cols): |
| dx_m = x_origin + col * zone_size_m |
| dy_m = y_origin - row * zone_size_m |
| zone_lat = center_lat + meters_to_lat_deg(dy_m) |
| zone_lon = center_lon + meters_to_lon_deg(dx_m, center_lat) |
| zones.append( |
| Zone( |
| zone_id=f"zone_r{row+1:02d}_c{col+1:02d}", |
| row=row + 1, |
| col=col + 1, |
| area_m2=zone_size_m * zone_size_m, |
| center_lat=round(zone_lat, 7), |
| center_lon=round(zone_lon, 7), |
| ) |
| ) |
| return zones |
|
|
|
|
| def fetch_elevations(zones: list[Zone]) -> dict[str, float]: |
| params = { |
| "latitude": ",".join(str(zone.center_lat) for zone in zones), |
| "longitude": ",".join(str(zone.center_lon) for zone in zones), |
| } |
| payload = fetch_json("https://api.open-meteo.com/v1/elevation", params) |
| elevations = payload["elevation"] |
| return {zone.zone_id: round(float(elevations[idx]), 3) for idx, zone in enumerate(zones)} |
|
|
|
|
| def fetch_archive_weather(zones: list[Zone]) -> dict[str, dict[str, float]]: |
| out: dict[str, dict[str, float]] = {} |
| for zone in zones: |
| params = { |
| "latitude": zone.center_lat, |
| "longitude": zone.center_lon, |
| "start_date": HISTORY_START.isoformat(), |
| "end_date": HISTORY_END.isoformat(), |
| "timezone": PROVISIONAL_FIELD["timezone"], |
| "hourly": ",".join( |
| [ |
| "temperature_2m", |
| "precipitation", |
| "et0_fao_evapotranspiration", |
| "soil_temperature_0_to_7cm", |
| "soil_moisture_0_to_7cm", |
| ] |
| ), |
| } |
| loc = fetch_json("https://archive-api.open-meteo.com/v1/archive", params) |
| hourly = pd.DataFrame(loc["hourly"]) |
| hourly["time"] = pd.to_datetime(hourly["time"]) |
| out[zone.zone_id] = { |
| "temperature_2m_mean_7d_c": round(hourly.tail(24 * 7)["temperature_2m"].mean(), 3), |
| "temperature_2m_mean_30d_c": round(hourly["temperature_2m"].mean(), 3), |
| "precipitation_sum_7d_mm": round(hourly.tail(24 * 7)["precipitation"].sum(), 3), |
| "precipitation_sum_30d_mm": round(hourly["precipitation"].sum(), 3), |
| "et0_sum_7d_mm": round(hourly.tail(24 * 7)["et0_fao_evapotranspiration"].sum(), 3), |
| "et0_sum_30d_mm": round(hourly["et0_fao_evapotranspiration"].sum(), 3), |
| "soil_temperature_0_7cm_mean_7d_c": round(hourly.tail(24 * 7)["soil_temperature_0_to_7cm"].mean(), 3), |
| "soil_temperature_0_7cm_mean_30d_c": round(hourly["soil_temperature_0_to_7cm"].mean(), 3), |
| "soil_moisture_0_7cm_mean_7d_m3m3": round(hourly.tail(24 * 7)["soil_moisture_0_to_7cm"].mean(), 4), |
| "soil_moisture_0_7cm_mean_30d_m3m3": round(hourly["soil_moisture_0_to_7cm"].mean(), 4), |
| } |
| return out |
|
|
|
|
| def fetch_forecast_weather(zones: list[Zone]) -> dict[str, dict[str, float]]: |
| out: dict[str, dict[str, float]] = {} |
| for zone in zones: |
| params = { |
| "latitude": zone.center_lat, |
| "longitude": zone.center_lon, |
| "timezone": PROVISIONAL_FIELD["timezone"], |
| "daily": ",".join(["temperature_2m_mean", "precipitation_sum", "et0_fao_evapotranspiration"]), |
| "forecast_days": 7, |
| } |
| loc = fetch_json("https://api.open-meteo.com/v1/forecast", params) |
| daily = pd.DataFrame(loc["daily"]) |
| out[zone.zone_id] = { |
| "forecast_temperature_2m_mean_7d_c": round(daily["temperature_2m_mean"].mean(), 3), |
| "forecast_precipitation_sum_7d_mm": round(daily["precipitation_sum"].sum(), 3), |
| "forecast_et0_sum_7d_mm": round(daily["et0_fao_evapotranspiration"].sum(), 3), |
| } |
| return out |
|
|
|
|
| def fetch_soil_rows(zones: list[Zone]) -> tuple[dict[str, dict[str, float | None]], str]: |
| out: dict[str, dict[str, float | None]] = {} |
| statuses: list[str] = [] |
| for zone in zones: |
| try: |
| out[zone.zone_id] = fetch_usda_soil_row(zone) |
| statuses.append(str(out[zone.zone_id]["soil_source_status"])) |
| continue |
| except Exception: |
| pass |
|
|
| try: |
| payload = fetch_soilgrids_with_fallback(zone.center_lat, zone.center_lon) |
| soil_row = parse_soilgrids_payload(payload) |
| soil_row.update( |
| { |
| "soil_source_name": "soilgrids_rest", |
| "soil_source_status": "soilgrids_rest_offset_search", |
| "soil_query_offset_dx_m": 0.0, |
| "soil_query_offset_dy_m": 0.0, |
| "soil_query_lat": round(zone.center_lat, 7), |
| "soil_query_lon": round(zone.center_lon, 7), |
| } |
| ) |
| out[zone.zone_id] = soil_row |
| statuses.append("soilgrids_rest_offset_search") |
| time.sleep(12.5) |
| except Exception: |
| soil_row = empty_soil_row() |
| soil_row.update( |
| { |
| "soil_source_name": "unavailable", |
| "soil_source_status": "soil_source_unavailable_columns_null", |
| "soil_query_offset_dx_m": None, |
| "soil_query_offset_dy_m": None, |
| "soil_query_lat": None, |
| "soil_query_lon": None, |
| } |
| ) |
| out[zone.zone_id] = soil_row |
| statuses.append("soil_source_unavailable_columns_null") |
| return out, summarize_statuses(statuses) |
|
|
|
|
| def fetch_usda_soil_row(zone: Zone) -> dict[str, float | None]: |
| attempts: list[dict[str, Any]] = [] |
| for dy_m in USDA_QUERY_OFFSETS_M: |
| for dx_m in USDA_QUERY_OFFSETS_M: |
| query_lat = round(zone.center_lat + meters_to_lat_deg(dy_m), 7) |
| query_lon = round(zone.center_lon + meters_to_lon_deg(dx_m, zone.center_lat), 7) |
| payload = query_usda_rows(query_lat, query_lon) |
| rows = parse_usda_table(payload) |
| attempts.append({"dx_m": dx_m, "dy_m": dy_m, "rows": len(rows), "query_lat": query_lat, "query_lon": query_lon}) |
| if not rows: |
| continue |
| soil_row = parse_usda_rows(rows) |
| soil_row.update( |
| { |
| "soil_source_name": "usda_sda", |
| "soil_source_status": "usda_sda_exact_point" if dx_m == 0.0 and dy_m == 0.0 else "usda_sda_offset_point", |
| "soil_query_offset_dx_m": dx_m, |
| "soil_query_offset_dy_m": dy_m, |
| "soil_query_lat": query_lat, |
| "soil_query_lon": query_lon, |
| } |
| ) |
| write_usda_cache(zone, payload, attempts, soil_row) |
| return soil_row |
| write_usda_cache(zone, {}, attempts, None) |
| raise RuntimeError(f"No USDA soil rows returned for {zone.zone_id}") |
|
|
|
|
| def query_usda_rows(query_lat: float, query_lon: float) -> dict[str, Any]: |
| query = f""" |
| select |
| MU.mukey as mukey, |
| MU.musym as musym, |
| MU.muname as muname, |
| MU.slopegradwta as slopegradwta, |
| MU.aws025wta as aws025wta, |
| MU.drclassdcd as drclassdcd, |
| MU.hydgrpdcd as hydgrpdcd, |
| C.cokey as cokey, |
| C.compname as compname, |
| C.comppct_r as comppct_r, |
| H.hzdept_r as hzdept_r, |
| H.hzdepb_r as hzdepb_r, |
| H.sandtotal_r as sandtotal_r, |
| H.silttotal_r as silttotal_r, |
| H.claytotal_r as claytotal_r, |
| H.om_r as om_r, |
| H.dbthirdbar_r as dbthirdbar_r, |
| H.cec7_r as cec7_r, |
| H.ph1to1h2o_r as ph1to1h2o_r, |
| H.fragvoltot_r as fragvoltot_r, |
| H.awc_r as awc_r |
| from SDA_Get_Mukey_from_intersection_with_WktWgs84('point({query_lon} {query_lat})') as S |
| join muaggatt MU on MU.mukey = S.mukey |
| join component C on C.mukey = S.mukey |
| join chorizon H on H.cokey = C.cokey |
| where C.comppct_r is not null |
| and H.hzdept_r is not null |
| and H.hzdepb_r is not null |
| order by C.comppct_r desc, H.hzdept_r asc |
| """ |
| payload = post_json( |
| USDA_SDA_URL, |
| { |
| "SERVICE": "query", |
| "REQUEST": "query", |
| "QUERY": query, |
| "FORMAT": "JSON+COLUMNNAME", |
| }, |
| ) |
| return payload |
|
|
|
|
| def write_usda_cache(zone: Zone, payload: dict[str, Any], attempts: list[dict[str, Any]], selected_soil_row: dict[str, Any] | None) -> None: |
| cache_path = USDA_SOIL_CACHE_DIR / f"{zone.zone_id}.json" |
| cache_payload = { |
| "zone_id": zone.zone_id, |
| "zone_center_lat": zone.center_lat, |
| "zone_center_lon": zone.center_lon, |
| "attempts": attempts, |
| "selected_source_status": None if selected_soil_row is None else selected_soil_row.get("soil_source_status"), |
| "selected_query_lat": None if selected_soil_row is None else selected_soil_row.get("soil_query_lat"), |
| "selected_query_lon": None if selected_soil_row is None else selected_soil_row.get("soil_query_lon"), |
| "selected_query_offset_dx_m": None if selected_soil_row is None else selected_soil_row.get("soil_query_offset_dx_m"), |
| "selected_query_offset_dy_m": None if selected_soil_row is None else selected_soil_row.get("soil_query_offset_dy_m"), |
| "response": payload, |
| } |
| cache_path.write_text(json.dumps(cache_payload, indent=2, sort_keys=True)) |
|
|
|
|
| def parse_usda_table(payload: dict[str, Any]) -> list[dict[str, Any]]: |
| table = payload.get("Table", []) |
| if len(table) < 2: |
| return [] |
| columns = table[0] |
| return [dict(zip(columns, row, strict=True)) for row in table[1:]] |
|
|
|
|
| def parse_usda_rows(rows: list[dict[str, Any]]) -> dict[str, float | None]: |
| out = empty_soil_row() |
|
|
| first = rows[0] |
| out.update( |
| { |
| "soil_usda_mukey": first.get("mukey"), |
| "soil_usda_musym": first.get("musym"), |
| "soil_usda_muname": first.get("muname"), |
| "soil_usda_slopegradwta_pct": to_float(first.get("slopegradwta")), |
| "soil_usda_aws025wta_cm": to_float(first.get("aws025wta")), |
| "soil_usda_drainage_class": first.get("drclassdcd"), |
| "soil_usda_hydrologic_group": first.get("hydgrpdcd"), |
| } |
| ) |
|
|
| depth_windows = { |
| "0_to_5cm": (0.0, 5.0), |
| "0_to_30cm": (0.0, 30.0), |
| } |
|
|
| mapped_fields = { |
| "soil_bdod_{depth}_kgdm3": "dbthirdbar_r", |
| "soil_cec_{depth}_cmolkg": "cec7_r", |
| "soil_cfvo_{depth}_pct": "fragvoltot_r", |
| "soil_clay_{depth}_pct": "claytotal_r", |
| "soil_phh2o_{depth}_ph": "ph1to1h2o_r", |
| "soil_sand_{depth}_pct": "sandtotal_r", |
| "soil_silt_{depth}_pct": "silttotal_r", |
| } |
|
|
| for depth_label, (start_cm, end_cm) in depth_windows.items(): |
| averages = weighted_horizon_averages(rows, start_cm, end_cm) |
| for target_template, source_key in mapped_fields.items(): |
| value = averages.get(source_key) |
| out[target_template.format(depth=depth_label)] = round(value, 4) if value is not None else None |
|
|
| om_value = averages.get("om_r") |
| out[f"soil_soc_{depth_label}_gkg"] = round(om_value * 5.8, 4) if om_value is not None else None |
| out[f"soil_nitrogen_{depth_label}_gkg"] = None |
|
|
| return out |
|
|
|
|
| def weighted_horizon_averages(rows: list[dict[str, Any]], start_cm: float, end_cm: float) -> dict[str, float | None]: |
| numerators = {field: 0.0 for field in USDA_USABLE_HORIZON_FIELDS} |
| denominators = {field: 0.0 for field in USDA_USABLE_HORIZON_FIELDS} |
|
|
| for row in rows: |
| hz_top = to_float(row.get("hzdept_r")) |
| hz_bottom = to_float(row.get("hzdepb_r")) |
| component_pct = to_float(row.get("comppct_r")) |
| if hz_top is None or hz_bottom is None or component_pct is None: |
| continue |
| overlap = max(0.0, min(hz_bottom, end_cm) - max(hz_top, start_cm)) |
| if overlap <= 0: |
| continue |
| base_weight = component_pct * overlap |
| for field in USDA_USABLE_HORIZON_FIELDS: |
| value = to_float(row.get(field)) |
| if value is None: |
| continue |
| numerators[field] += value * base_weight |
| denominators[field] += base_weight |
|
|
| return { |
| field: (numerators[field] / denominators[field]) if denominators[field] > 0 else None |
| for field in USDA_USABLE_HORIZON_FIELDS |
| } |
|
|
|
|
| def fetch_soilgrids_with_fallback(lat: float, lon: float) -> dict[str, Any]: |
| for dy in SOILGRIDS_QUERY_OFFSETS_M: |
| for dx in SOILGRIDS_QUERY_OFFSETS_M: |
| candidate_lat = lat + meters_to_lat_deg(dy) |
| candidate_lon = lon + meters_to_lon_deg(dx, lat) |
| payload = fetch_json( |
| "https://rest.isric.org/soilgrids/v2.0/properties/query", |
| { |
| "lat": candidate_lat, |
| "lon": candidate_lon, |
| **repeat_params("property", SOIL_PROPERTIES), |
| **repeat_params("depth", SOIL_DEPTHS), |
| "value": "mean", |
| }, |
| ) |
| layers = extract_soil_layers(payload) |
| if layers: |
| return payload |
| raise RuntimeError(f"Unable to retrieve SoilGrids values near lat={lat}, lon={lon}") |
|
|
|
|
| def parse_soilgrids_payload(payload: dict[str, Any]) -> dict[str, float | None]: |
| result: dict[str, float | None] = {} |
| for layer in extract_soil_layers(payload): |
| property_name = layer["name"] |
| d_factor = layer.get("unit_measure", {}).get("d_factor", 1) or 1 |
| for depth in layer.get("depths", []): |
| depth_label = depth["label"].replace("cm", "cm").replace("-", "_to_") |
| mean_value = depth.get("values", {}).get("mean") |
| conventional_value = None if mean_value is None else round(float(mean_value) / float(d_factor), 4) |
| suffix = soil_property_suffix(property_name) |
| result[f"soil_{property_name}_{depth_label}_{suffix}"] = conventional_value |
| return result |
|
|
|
|
| def empty_soil_row() -> dict[str, float | None]: |
| out: dict[str, float | None] = {} |
| for property_name in SOIL_PROPERTIES: |
| suffix = soil_property_suffix(property_name) |
| for depth in SOIL_DEPTHS: |
| depth_label = depth.replace("-", "_to_") |
| out[f"soil_{property_name}_{depth_label}_{suffix}"] = None |
| out.update( |
| { |
| "soil_source_name": None, |
| "soil_source_status": None, |
| "soil_query_offset_dx_m": None, |
| "soil_query_offset_dy_m": None, |
| "soil_query_lat": None, |
| "soil_query_lon": None, |
| } |
| ) |
| return out |
|
|
|
|
| def extract_soil_layers(payload: dict[str, Any]) -> list[dict[str, Any]]: |
| if "properties" in payload and isinstance(payload["properties"], dict): |
| return payload["properties"].get("layers", []) |
| if payload.get("features"): |
| return payload["features"][0]["properties"].get("layers", []) |
| return [] |
|
|
|
|
| def soil_property_suffix(property_name: str) -> str: |
| return SOIL_COLUMN_SUFFIXES[property_name] |
|
|
|
|
| def update_top_manifest(dataset_card: dict[str, Any]) -> None: |
| manifest = json.loads(TOP_MANIFEST_PATH.read_text()) if TOP_MANIFEST_PATH.exists() else {"datasets": {}} |
| manifest.setdefault("datasets", {}) |
| manifest["datasets"]["zone_state_bootstrap"] = { |
| "dataset": "zone_state_bootstrap", |
| "description": dataset_card["description"], |
| "records": dataset_card["records"], |
| "field_id": dataset_card["field_id"], |
| "boundary_mode": dataset_card["boundary_mode"], |
| "history_window": dataset_card["history_window"], |
| "forecast_window": dataset_card["forecast_window"], |
| "output_path": str(OUTPUT_PATH), |
| "dataset_card_path": str(DATASET_CARD_PATH), |
| } |
| blocked = manifest.get("blocked", {}) |
| blocked.pop("zone_state_bootstrap", None) |
| if dataset_card["boundary_mode"] == "provisional_demo": |
| blocked["field_specific_replacement"] = "Current zone_state_bootstrap is built from a provisional demo field. Replace data/raw/field/field_boundary.geojson with the actual field polygon and rebuild before deployment." |
| if blocked: |
| manifest["blocked"] = blocked |
| elif "blocked" in manifest: |
| manifest.pop("blocked") |
| TOP_MANIFEST_PATH.write_text(json.dumps(manifest, indent=2, sort_keys=True)) |
|
|
|
|
| def fetch_json(base_url: str, params: dict[str, Any]) -> dict[str, Any]: |
| url = f"{base_url}?{urlencode(params, doseq=True)}" |
| with urlopen(url, timeout=120) as response: |
| return json.loads(response.read().decode("utf-8")) |
|
|
|
|
| def post_json(url: str, params: dict[str, Any]) -> dict[str, Any]: |
| data = urlencode(params, doseq=True).encode() |
| request = Request(url, data=data) |
| with urlopen(request, timeout=120) as response: |
| return json.loads(response.read().decode("utf-8")) |
|
|
|
|
| def repeat_params(key: str, values: list[str]) -> dict[str, list[str]]: |
| return {key: values} |
|
|
|
|
| def summarize_statuses(statuses: list[str]) -> str: |
| counts = pd.Series(statuses).value_counts().sort_index() |
| return ", ".join(f"{status}:{count}" for status, count in counts.items()) |
|
|
|
|
| def to_float(value: Any) -> float | None: |
| if value in (None, "", "NULL"): |
| return None |
| try: |
| return float(value) |
| except (TypeError, ValueError): |
| return None |
|
|
|
|
| def meters_to_lat_deg(meters: float) -> float: |
| return meters / 111_320.0 |
|
|
|
|
| def meters_to_lon_deg(meters: float, latitude_deg: float) -> float: |
| return meters / (111_320.0 * math.cos(math.radians(latitude_deg))) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|