Spaces:
Running
Running
| import math | |
| import numpy as np | |
| import pytest | |
| import rioxarray # noqa: F401 | |
| import xarray as xr | |
| from shapely.geometry import box | |
| from muscari_widget.features import extract_features_for_records | |
| from muscari_widget.geometry import PolygonRecord | |
| def synthetic_datasets(): | |
| coords = {"x": [0.5, 1.5, 2.5], "y": [2.5, 1.5, 0.5]} | |
| env = xr.Dataset( | |
| {"bio1": (("y", "x"), np.arange(9, dtype=float).reshape(3, 3))}, | |
| coords=coords, | |
| ).rio.write_crs("EPSG:3035") | |
| lc = xr.Dataset( | |
| {"landcover": (("y", "x"), np.array([[0, 1, 1], [1, 0, 1], [0, 0, 1]], dtype=np.int16))}, | |
| coords=coords, | |
| ).rio.write_crs("EPSG:3035") | |
| return env, lc | |
| def test_extract_features_for_record_orders_columns_and_fractions(): | |
| env, lc = synthetic_datasets() | |
| record = PolygonRecord( | |
| record_id="polygon_1", | |
| geometry_wgs84=box(0, 0, 3, 3), | |
| geometry_3035=box(0, 0, 3, 3), | |
| area_m2=9.0, | |
| ) | |
| feature_names = ["bio1", "std_bio1", "lc_frac_0", "lc_frac_1", "log_sp_unit_area"] | |
| result = extract_features_for_records([record], feature_names, env, lc) | |
| assert result.features.columns.tolist() == feature_names | |
| row = result.features.iloc[0] | |
| assert row["bio1"] == pytest.approx(4.0) | |
| assert row["std_bio1"] == pytest.approx(np.std(np.arange(9, dtype=float))) | |
| assert row["lc_frac_0"] == pytest.approx(4 / 9) | |
| assert row["lc_frac_1"] == pytest.approx(5 / 9) | |
| assert row["log_sp_unit_area"] == pytest.approx(math.log(9.0)) | |
| def test_extract_features_for_small_polygon_uses_nearest_raster_cell_with_warning(): | |
| env, lc = synthetic_datasets() | |
| record = PolygonRecord( | |
| record_id="polygon_1", | |
| geometry_wgs84=box(0.1, 0.1, 0.2, 0.2), | |
| geometry_3035=box(0.1, 0.1, 0.2, 0.2), | |
| area_m2=100.0, | |
| warnings=("Area is below the MuScaRi training range lower bound (0.01 km2); interpret this estimate as extrapolation.",), | |
| ) | |
| feature_names = ["bio1", "std_bio1", "lc_frac_0", "lc_frac_1", "log_sp_unit_area"] | |
| result = extract_features_for_records([record], feature_names, env, lc) | |
| row = result.features.iloc[0] | |
| assert row["bio1"] == pytest.approx(6.0) | |
| assert row["std_bio1"] == pytest.approx(0.0) | |
| assert row["lc_frac_0"] == pytest.approx(1.0) | |
| assert row["lc_frac_1"] == pytest.approx(0.0) | |
| assert row["log_sp_unit_area"] == pytest.approx(math.log(100.0)) | |
| warnings = result.warnings_by_id["polygon_1"] | |
| assert len(warnings) == 2 | |
| assert "below the MuScaRi training range" in warnings[0] | |
| assert "below the environmental raster resolution" in warnings[1] | |
| def test_extract_features_rejects_outside_polygon(): | |
| env, lc = synthetic_datasets() | |
| record = PolygonRecord( | |
| record_id="polygon_1", | |
| geometry_wgs84=box(10, 10, 11, 11), | |
| geometry_3035=box(10, 10, 11, 11), | |
| area_m2=1.0, | |
| ) | |
| with pytest.raises(ValueError, match="does not overlap"): | |
| extract_features_for_records([record], ["bio1", "log_sp_unit_area"], env, lc) | |