from __future__ import annotations import uuid import zipfile from pathlib import Path from typing import Any from xml.etree import ElementTree as ET from .geometry import calculate_site_metrics from .models import SiteSelection def parse_kml_file(path: str | Path) -> SiteSelection: source = Path(path) xml_text = _read_kml_text(source) root = ET.fromstring(xml_text) polygons = _extract_polygons(root) if not polygons: raise ValueError("KML/KMZ must contain at least one Polygon boundary.") geometry = _largest_polygon(polygons) metrics = calculate_site_metrics(geometry) return SiteSelection( id=f"S-{uuid.uuid4().hex[:8]}", selection_type="kml_boundary", coordinate_mode="wgs84", geometry_geojson=geometry, local_geometry=None, anchor_lat=metrics["centroid"][0], anchor_lon=metrics["centroid"][1], radius_m=None, area_sqm=metrics["area_sqm"], perimeter_m=metrics["perimeter_m"], centroid=metrics["centroid"], bbox=metrics["bbox"], unit_source="Google Earth / KML WGS84 coordinates", accuracy_label="uploaded Google Earth/KML boundary", source_files=[source.name], selected_boundary_id=None, limitations=[ "KML/KMZ geometry is treated as a user-exported Google Earth or GIS boundary.", "It is not legal/cadastral boundary verification.", "Verify against faculty CAD, survey, or project documents before plot-level decisions.", ], ) def _read_kml_text(source: Path) -> str: suffix = source.suffix.lower() if suffix == ".kml": return source.read_text(encoding="utf-8-sig") if suffix == ".kmz": with zipfile.ZipFile(source) as archive: kml_names = [name for name in archive.namelist() if name.lower().endswith(".kml")] if not kml_names: raise ValueError("KMZ archive does not contain a KML file.") with archive.open(kml_names[0]) as handle: return handle.read().decode("utf-8-sig") raise ValueError("KML parser supports only .kml and .kmz files.") def _extract_polygons(root: ET.Element) -> list[dict[str, Any]]: polygons: list[dict[str, Any]] = [] for polygon in root.iter(): if not _tag_endswith(polygon.tag, "Polygon"): continue coordinates_text = None for child in polygon.iter(): if _tag_endswith(child.tag, "coordinates") and child.text: coordinates_text = child.text break if not coordinates_text: continue ring = _parse_coordinates(coordinates_text) if len(ring) >= 4: polygons.append({"type": "Polygon", "coordinates": [ring]}) return polygons def _parse_coordinates(text: str) -> list[list[float]]: points: list[list[float]] = [] for token in text.replace("\n", " ").replace("\t", " ").split(): parts = [part for part in token.split(",") if part != ""] if len(parts) < 2: continue lon = float(parts[0]) lat = float(parts[1]) if not (-180 <= lon <= 180 and -90 <= lat <= 90): raise ValueError("KML coordinates do not look like WGS84 longitude/latitude.") points.append([lon, lat]) if points and points[0] != points[-1]: points.append(points[0]) return points def _largest_polygon(polygons: list[dict[str, Any]]) -> dict[str, Any]: scored = [] for geometry in polygons: try: scored.append((calculate_site_metrics(geometry)["area_sqm"] or 0, geometry)) except Exception: continue if not scored: raise ValueError("KML polygons could not be converted to valid site geometry.") return max(scored, key=lambda item: item[0])[1] def _tag_endswith(tag: str, suffix: str) -> bool: return tag.endswith("}" + suffix) or tag == suffix