site-intelligence-studio / src /sample_data.py
Eishaan's picture
Support polygon-only analysis flow
338ebb6
Raw
History Blame Contribute Delete
7.97 kB
from __future__ import annotations
from copy import deepcopy
from typing import Any
from .evidence import make_evidence
from .models import EvidenceItem
CHORWAD_SAMPLE_LAT = 21.00248
CHORWAD_SAMPLE_LON = 70.24537
def is_sample_site(
project_name: str,
site_name: str,
boundary_source: str,
anchor_lat: float | None = None,
anchor_lon: float | None = None,
) -> bool:
text = " ".join([project_name or "", site_name or "", boundary_source or ""]).lower()
return ("chorwad" in text and "sample" in text) or _near_chorwad_sample(anchor_lat, anchor_lon)
def apply_chorwad_sample_fallbacks(
*,
project_name: str,
site_name: str,
boundary_source: str,
site_identity: dict[str, Any] | None,
climate: dict[str, Any],
osm_context: dict[str, Any],
topography: dict[str, Any],
soil: dict[str, Any],
anchor_lat: float | None = None,
anchor_lon: float | None = None,
) -> tuple[dict[str, Any] | None, dict[str, Any], dict[str, Any], dict[str, Any], dict[str, Any], list[EvidenceItem], list[str]]:
if not is_sample_site(project_name, site_name, boundary_source, anchor_lat, anchor_lon):
return site_identity, climate, osm_context, topography, soil, [], []
evidence: list[EvidenceItem] = []
warnings: list[str] = []
used_layers: list[str] = []
if _missing_identity(site_identity):
site_identity = _sample_identity()
used_layers.append("site identity")
if _missing_climate(climate):
climate = _sample_climate()
used_layers.append("climate")
if not (osm_context.get("counts") or osm_context.get("features")):
osm_context = _sample_osm_context()
used_layers.append("context")
if not topography:
topography = _sample_topography()
used_layers.append("topography")
if not soil:
soil = _sample_soil()
used_layers.append("soil")
if not used_layers:
return site_identity, climate, osm_context, topography, soil, [], []
warnings.append(
"Chorwad sample fallback data was used for "
+ ", ".join(used_layers)
+ " because one or more live public-data calls were unavailable. Use this only for demo/testing, not final project evidence."
)
evidence.append(
make_evidence(
category="Demo fallback",
finding="Bundled Chorwad sample fallback data was used to keep the judge demo complete when live APIs are unavailable.",
source_name="Bundled sample fallback",
source_url="",
source_type="demo fixture",
resolution_or_scope=", ".join(used_layers),
confidence="low",
limitation="This fallback is not live public data and must not be used as final site evidence.",
design_implication="Use the demo to understand workflow and output structure; rerun with live data or verified uploads for actual work.",
verification_needed="Replace with live API results, CAD/KML/GeoJSON, site photos, and site visit observations.",
output_label="site_visit_required",
)
)
return site_identity, climate, osm_context, topography, soil, evidence, warnings
def _near_chorwad_sample(anchor_lat: float | None, anchor_lon: float | None) -> bool:
if anchor_lat is None or anchor_lon is None:
return False
return abs(anchor_lat - CHORWAD_SAMPLE_LAT) <= 0.01 and abs(anchor_lon - CHORWAD_SAMPLE_LON) <= 0.01
def _missing_climate(climate: dict[str, Any]) -> bool:
for key in ("forecast", "recent_historical", "climate_normal"):
value = climate.get(key)
if isinstance(value, dict) and value:
return False
return True
def _missing_identity(site_identity: dict[str, Any] | None) -> bool:
if not site_identity:
return True
return not any(
site_identity.get(key)
for key in ("display_name", "city", "town", "village", "district", "state", "country")
)
def _sample_identity() -> dict[str, Any]:
return {
"display_name": "Malia Taluka, Junagadh, Gujarat, India",
"district": "Junagadh",
"state": "Gujarat",
"country": "India",
"postcode": "362250",
}
def _sample_climate() -> dict[str, Any]:
months = [
(1, 21.8, 0.3),
(2, 23.4, 0.0),
(3, 25.9, 0.0),
(4, 27.8, 1.9),
(5, 29.3, 11.5),
(6, 29.1, 187.4),
(7, 27.5, 380.2),
(8, 26.8, 224.4),
(9, 26.9, 193.7),
(10, 27.4, 65.9),
(11, 25.7, 5.3),
(12, 23.2, 4.8),
]
month_rows = [
{"month": month, "temperature_c": temp, "precipitation_mm": rain}
for month, temp, rain in months
]
total_rain = round(sum(row["precipitation_mm"] for row in month_rows), 1)
return {
"forecast": {
"current_temperature_c": 31.5,
"current_humidity_pct": 68,
"current_wind_speed_kmh": 23.1,
"current_wind_direction_deg": 235,
},
"recent_historical": {
"period": "cached Chorwad sample derived from a previous successful public-data run",
"months": deepcopy(month_rows),
"total_precipitation_mm": total_rain,
},
"climate_normal": {
"period": "cached 10-year style Chorwad sample for demo fallback only",
"months": month_rows,
"total_precipitation_mm": total_rain,
},
}
def _sample_osm_context() -> dict[str, Any]:
return {
"counts": {"water": 3, "roads/access": 1, "landuse:industrial": 1, "buildings": 2},
"radius_m": 500,
"features": [
{
"type": "way",
"tags": {"highway": "residential", "name": "sample access road"},
"geometry": [
{"lat": 21.00170, "lon": 70.24480},
{"lat": 21.00250, "lon": 70.24515},
{"lat": 21.00320, "lon": 70.24555},
],
},
{
"type": "way",
"tags": {"natural": "water"},
"geometry": [
{"lat": 21.00160, "lon": 70.24365},
{"lat": 21.00335, "lon": 70.24365},
{"lat": 21.00335, "lon": 70.24430},
{"lat": 21.00160, "lon": 70.24430},
{"lat": 21.00160, "lon": 70.24365},
],
},
{
"type": "way",
"tags": {"landuse": "industrial"},
"geometry": [
{"lat": 21.00295, "lon": 70.24615},
{"lat": 21.00345, "lon": 70.24615},
{"lat": 21.00345, "lon": 70.24670},
{"lat": 21.00295, "lon": 70.24670},
{"lat": 21.00295, "lon": 70.24615},
],
},
{
"type": "way",
"tags": {"building": "yes"},
"geometry": [
{"lat": 21.00205, "lon": 70.24615},
{"lat": 21.00225, "lon": 70.24615},
{"lat": 21.00225, "lon": 70.24635},
{"lat": 21.00205, "lon": 70.24635},
{"lat": 21.00205, "lon": 70.24615},
],
},
],
}
def _sample_topography() -> dict[str, Any]:
return {
"mean_elevation_m": 5.1,
"relief_m": 2.0,
"approx_slope_pct": 0.98,
"interpretation": "Cached sample suggests low relief; verify contours, drainage, and waterlogging on site.",
}
def _sample_soil() -> dict[str, Any]:
return {
"texture_signal": "mixed or uncertain topsoil signal",
"clay_pct": None,
"sand_pct": None,
"silt_pct": None,
"ph_h2o": None,
"design_implication": "Use only as a prompt to request local soil/geotechnical verification.",
}