import math import numpy as np import pytest import rioxarray # noqa: F401 import xarray as xr from shapely.geometry import box from muscari_widget.features import extract_features_for_records from muscari_widget.geometry import PolygonRecord def synthetic_datasets(): coords = {"x": [0.5, 1.5, 2.5], "y": [2.5, 1.5, 0.5]} env = xr.Dataset( {"bio1": (("y", "x"), np.arange(9, dtype=float).reshape(3, 3))}, coords=coords, ).rio.write_crs("EPSG:3035") lc = xr.Dataset( {"landcover": (("y", "x"), np.array([[0, 1, 1], [1, 0, 1], [0, 0, 1]], dtype=np.int16))}, coords=coords, ).rio.write_crs("EPSG:3035") return env, lc def test_extract_features_for_record_orders_columns_and_fractions(): env, lc = synthetic_datasets() record = PolygonRecord( record_id="polygon_1", geometry_wgs84=box(0, 0, 3, 3), geometry_3035=box(0, 0, 3, 3), area_m2=9.0, ) feature_names = ["bio1", "std_bio1", "lc_frac_0", "lc_frac_1", "log_sp_unit_area"] result = extract_features_for_records([record], feature_names, env, lc) assert result.features.columns.tolist() == feature_names row = result.features.iloc[0] assert row["bio1"] == pytest.approx(4.0) assert row["std_bio1"] == pytest.approx(np.std(np.arange(9, dtype=float))) assert row["lc_frac_0"] == pytest.approx(4 / 9) assert row["lc_frac_1"] == pytest.approx(5 / 9) assert row["log_sp_unit_area"] == pytest.approx(math.log(9.0)) def test_extract_features_for_small_polygon_uses_nearest_raster_cell_with_warning(): env, lc = synthetic_datasets() record = PolygonRecord( record_id="polygon_1", geometry_wgs84=box(0.1, 0.1, 0.2, 0.2), geometry_3035=box(0.1, 0.1, 0.2, 0.2), area_m2=100.0, warnings=("Area is below the MuScaRi training range lower bound (0.01 km2); interpret this estimate as extrapolation.",), ) feature_names = ["bio1", "std_bio1", "lc_frac_0", "lc_frac_1", "log_sp_unit_area"] result = extract_features_for_records([record], feature_names, env, lc) row = result.features.iloc[0] assert row["bio1"] == pytest.approx(6.0) assert row["std_bio1"] == pytest.approx(0.0) assert row["lc_frac_0"] == pytest.approx(1.0) assert row["lc_frac_1"] == pytest.approx(0.0) assert row["log_sp_unit_area"] == pytest.approx(math.log(100.0)) warnings = result.warnings_by_id["polygon_1"] assert len(warnings) == 2 assert "below the MuScaRi training range" in warnings[0] assert "below the environmental raster resolution" in warnings[1] def test_extract_features_rejects_outside_polygon(): env, lc = synthetic_datasets() record = PolygonRecord( record_id="polygon_1", geometry_wgs84=box(10, 10, 11, 11), geometry_3035=box(10, 10, 11, 11), area_m2=1.0, ) with pytest.raises(ValueError, match="does not overlap"): extract_features_for_records([record], ["bio1", "log_sp_unit_area"], env, lc)