site-intelligence-studio / src /kml_parser.py
Eishaan's picture
Add KML terrain and soil analysis layers
2f91d7e
Raw
History Blame Contribute Delete
3.97 kB
from __future__ import annotations
import uuid
import zipfile
from pathlib import Path
from typing import Any
from xml.etree import ElementTree as ET
from .geometry import calculate_site_metrics
from .models import SiteSelection
def parse_kml_file(path: str | Path) -> SiteSelection:
source = Path(path)
xml_text = _read_kml_text(source)
root = ET.fromstring(xml_text)
polygons = _extract_polygons(root)
if not polygons:
raise ValueError("KML/KMZ must contain at least one Polygon boundary.")
geometry = _largest_polygon(polygons)
metrics = calculate_site_metrics(geometry)
return SiteSelection(
id=f"S-{uuid.uuid4().hex[:8]}",
selection_type="kml_boundary",
coordinate_mode="wgs84",
geometry_geojson=geometry,
local_geometry=None,
anchor_lat=metrics["centroid"][0],
anchor_lon=metrics["centroid"][1],
radius_m=None,
area_sqm=metrics["area_sqm"],
perimeter_m=metrics["perimeter_m"],
centroid=metrics["centroid"],
bbox=metrics["bbox"],
unit_source="Google Earth / KML WGS84 coordinates",
accuracy_label="uploaded Google Earth/KML boundary",
source_files=[source.name],
selected_boundary_id=None,
limitations=[
"KML/KMZ geometry is treated as a user-exported Google Earth or GIS boundary.",
"It is not legal/cadastral boundary verification.",
"Verify against faculty CAD, survey, or project documents before plot-level decisions.",
],
)
def _read_kml_text(source: Path) -> str:
suffix = source.suffix.lower()
if suffix == ".kml":
return source.read_text(encoding="utf-8-sig")
if suffix == ".kmz":
with zipfile.ZipFile(source) as archive:
kml_names = [name for name in archive.namelist() if name.lower().endswith(".kml")]
if not kml_names:
raise ValueError("KMZ archive does not contain a KML file.")
with archive.open(kml_names[0]) as handle:
return handle.read().decode("utf-8-sig")
raise ValueError("KML parser supports only .kml and .kmz files.")
def _extract_polygons(root: ET.Element) -> list[dict[str, Any]]:
polygons: list[dict[str, Any]] = []
for polygon in root.iter():
if not _tag_endswith(polygon.tag, "Polygon"):
continue
coordinates_text = None
for child in polygon.iter():
if _tag_endswith(child.tag, "coordinates") and child.text:
coordinates_text = child.text
break
if not coordinates_text:
continue
ring = _parse_coordinates(coordinates_text)
if len(ring) >= 4:
polygons.append({"type": "Polygon", "coordinates": [ring]})
return polygons
def _parse_coordinates(text: str) -> list[list[float]]:
points: list[list[float]] = []
for token in text.replace("\n", " ").replace("\t", " ").split():
parts = [part for part in token.split(",") if part != ""]
if len(parts) < 2:
continue
lon = float(parts[0])
lat = float(parts[1])
if not (-180 <= lon <= 180 and -90 <= lat <= 90):
raise ValueError("KML coordinates do not look like WGS84 longitude/latitude.")
points.append([lon, lat])
if points and points[0] != points[-1]:
points.append(points[0])
return points
def _largest_polygon(polygons: list[dict[str, Any]]) -> dict[str, Any]:
scored = []
for geometry in polygons:
try:
scored.append((calculate_site_metrics(geometry)["area_sqm"] or 0, geometry))
except Exception:
continue
if not scored:
raise ValueError("KML polygons could not be converted to valid site geometry.")
return max(scored, key=lambda item: item[0])[1]
def _tag_endswith(tag: str, suffix: str) -> bool:
return tag.endswith("}" + suffix) or tag == suffix