| import xarray as xr
|
| import numpy as np
|
| from shapely.geometry import Point
|
| from shapely.ops import transform
|
| import pyproj
|
| from typing import Dict
|
| from pathlib import Path
|
| import re
|
| from shapely.wkt import loads
|
| import json
|
| import pandas as pd
|
| import geopandas as gpd
|
|
|
|
|
| def _load_field_mapping(consortium, consortium_folder_name: str, sensors_df: pd.DataFrame) -> Dict:
|
| """
|
| Load field geometries from geojson files in data/01-raw/{consortium_folder_name}/field_shapes.
|
| Each file must be named {field_name}_irri.geojson.
|
| The mapping is done if field_name (lowercase) appears inside datastream_name.lower().
|
| Returns {sensor_name: geometry_wkt}.
|
| """
|
|
|
| base_dir = Path(f"data/01_raw/{consortium_folder_name}/fields_shapes")
|
| if not base_dir.exists():
|
| raise FileNotFoundError(f"Folder not found: {base_dir}")
|
|
|
| result = {}
|
|
|
|
|
| sensor_names = sensors_df["datastream_name"].tolist()
|
| sensor_names_lower = [s.lower() for s in sensor_names]
|
|
|
|
|
| for geojson_path in base_dir.glob("*.geojson"):
|
|
|
| stem = geojson_path.stem.lower()
|
|
|
|
|
| if consortium == "consortium0":
|
| field_name = stem.replace("_irri", "")
|
| elif consortium == "consortium1":
|
| field_name = stem
|
| else:
|
| field_name = stem
|
|
|
| gdf = gpd.read_file(geojson_path)
|
| if gdf.empty:
|
| continue
|
|
|
| geom_wkt = gdf.geometry.iloc[0].wkt
|
|
|
| for sensor_name, sensor_lower in zip(sensor_names, sensor_names_lower):
|
| if field_name in sensor_lower:
|
| result[sensor_name] = geom_wkt
|
|
|
| return result
|
|
|
| def _infer_dataset_crs(ds: xr.Dataset) -> str:
|
| """Infer CRS from dataset coordinates."""
|
|
|
| if 'crs' in ds.attrs:
|
| return ds.attrs['crs']
|
|
|
|
|
| x_range = float(ds.x.max() - ds.x.min())
|
| y_range = float(ds.y.max() - ds.y.min())
|
|
|
|
|
| if x_range < 1 and y_range < 1:
|
| return "EPSG:4326"
|
|
|
|
|
| return "EPSG:32632"
|
|
|
|
|
|
|
| def _get_closest_pixel_indices(ds: xr.Dataset, lat: float, lon: float,
|
| transformer: pyproj.Transformer) -> tuple:
|
| """Get indices of closest pixel to given location."""
|
|
|
| x_proj, y_proj = transformer.transform(lon, lat)
|
|
|
|
|
| x_idx = int(np.argmin(np.abs(ds.x.values - x_proj)))
|
| y_idx = int(np.argmin(np.abs(ds.y.values - y_proj)))
|
|
|
| return y_idx, x_idx
|
|
|
|
|
| def _get_field_pixel_indices(ds: xr.Dataset,
|
| field_geom,
|
| transformer: pyproj.Transformer) -> tuple:
|
| """Get indices of all pixels within field boundary."""
|
|
|
|
|
| '''
|
| field_geom = __swap_coordinates(field_geom)
|
| print("Geometry to parse:", field_geom)'''
|
|
|
|
|
| field_geom = loads(field_geom)
|
|
|
|
|
| field_proj = transform(transformer.transform, field_geom)
|
|
|
|
|
| y_coords = ds.y.values
|
| x_coords = ds.x.values
|
|
|
|
|
| x_grid, y_grid = np.meshgrid(x_coords, y_coords)
|
|
|
|
|
| y_indices = []
|
| x_indices = []
|
|
|
| for i in range(len(y_coords)):
|
| for j in range(len(x_coords)):
|
| pixel_point = Point(x_grid[i, j], y_grid[i, j])
|
| if field_proj.contains(pixel_point):
|
| y_indices.append(i)
|
| x_indices.append(j)
|
|
|
| return y_indices, x_indices
|
|
|
|
|
| def __swap_coordinates(geometry):
|
| """
|
| Swaps x and y coordinates in a geometry string for both POLYGON and MULTIPOLYGON.
|
| Handles both outer rings and inner rings (holes) in polygons.
|
|
|
| Parameters:
|
| - geometry (str): The geometry string
|
|
|
| Returns:
|
| - str: The geometry string with swapped coordinates.
|
| """
|
| geometry = geometry.strip()
|
| geometry_type = geometry.split("(")[0].strip()
|
|
|
| def swap_coordinate_pairs(coord_string):
|
| """Helper function to swap coordinate pairs in a string."""
|
| coordinates = re.findall(r"([-+]?\d*\.\d+|\d+)\s([-+]?\d*\.\d+|\d+)", coord_string)
|
| return ",".join(f"{y} {x}" for x, y in coordinates)
|
|
|
| if geometry_type == 'POLYGON':
|
|
|
| rings = re.findall(r"\(([-+\d\s.,]+)\)", geometry)
|
| swapped_rings = [swap_coordinate_pairs(ring) for ring in rings]
|
| swapped_geometry = f"POLYGON (({'),('.join(swapped_rings)}))"
|
|
|
| elif geometry_type == 'MULTIPOLYGON':
|
|
|
| polygons = re.findall(r"\(\(([-+\d\s.,()]+?)\)\)", geometry)
|
|
|
| swapped_polygons = []
|
| for polygon in polygons:
|
|
|
| rings = polygon.split('),(')
|
|
|
|
|
| swapped_rings = [swap_coordinate_pairs(ring) for ring in rings]
|
|
|
|
|
| if len(rings) > 1:
|
|
|
| swapped_polygons.append(f"(({'),('.join(swapped_rings)}))")
|
| else:
|
|
|
| swapped_polygons.append(f"(({swapped_rings[0]}))")
|
|
|
|
|
| swapped_geometry = f"MULTIPOLYGON ({','.join(swapped_polygons)})"
|
|
|
| else:
|
| raise ValueError(f"Unsupported geometry type: {geometry_type}")
|
|
|
|
|
| try:
|
| swapped_geom = loads(swapped_geometry)
|
| if not swapped_geom.is_valid:
|
| print(f"Warning: Invalid geometry after swap: {swapped_geom.wkt}")
|
| except Exception as e:
|
| raise ValueError(f"Invalid geometry after swapping coordinates: {str(e)}")
|
|
|
| return swapped_geometry
|
|
|
|
|
|
|
|
|
|
|
| def get_fields_4_sensors(fields_json, locations_df):
|
|
|
| with open(fields_json, "r") as f:
|
| fields_data = json.load(f)
|
|
|
|
|
| field_geoms = []
|
| field_ids = []
|
|
|
| for fdict in fields_data:
|
| field_geom = fdict["feature"]
|
| field_geom = __swap_coordinates(field_geom)
|
| print("Geometry to parse:", field_geom)
|
| geom = loads(field_geom)
|
| field_geoms.append(geom)
|
| field_ids.append(fdict["field_id"])
|
|
|
|
|
| fields_gdf = gpd.GeoDataFrame({"field_id": field_ids, "geometry": field_geoms}, crs="EPSG:4326")
|
|
|
|
|
| devices_gdf = gpd.GeoDataFrame(
|
| locations_df,
|
| geometry=gpd.points_from_xy(locations_df.y, locations_df.x),
|
| crs="EPSG:4326"
|
| )
|
|
|
|
|
| joined = gpd.sjoin(devices_gdf, fields_gdf, how="left", predicate="within")
|
|
|
|
|
| device_to_fields = (
|
| joined.groupby("datastream_id")["field_id"]
|
| .apply(lambda x: list(x.dropna().unique()))
|
| .reindex(locations_df["datastream_id"])
|
| .reset_index()
|
| )
|
|
|
|
|
| locations_with_fields = locations_df.merge(device_to_fields, on="datastream_id")
|
|
|
| print(locations_with_fields.head())
|
|
|
| return locations_with_fields
|
|
|
|
|
|
|
|
|