MuScaRi / tests /test_features.py
Victor Boussange
Enhance feature extraction and result presentation
6cee138
import math
import numpy as np
import pytest
import rioxarray # noqa: F401
import xarray as xr
from shapely.geometry import box
from muscari_widget.features import extract_features_for_records
from muscari_widget.geometry import PolygonRecord
def synthetic_datasets():
coords = {"x": [0.5, 1.5, 2.5], "y": [2.5, 1.5, 0.5]}
env = xr.Dataset(
{"bio1": (("y", "x"), np.arange(9, dtype=float).reshape(3, 3))},
coords=coords,
).rio.write_crs("EPSG:3035")
lc = xr.Dataset(
{"landcover": (("y", "x"), np.array([[0, 1, 1], [1, 0, 1], [0, 0, 1]], dtype=np.int16))},
coords=coords,
).rio.write_crs("EPSG:3035")
return env, lc
def test_extract_features_for_record_orders_columns_and_fractions():
env, lc = synthetic_datasets()
record = PolygonRecord(
record_id="polygon_1",
geometry_wgs84=box(0, 0, 3, 3),
geometry_3035=box(0, 0, 3, 3),
area_m2=9.0,
)
feature_names = ["bio1", "std_bio1", "lc_frac_0", "lc_frac_1", "log_sp_unit_area"]
result = extract_features_for_records([record], feature_names, env, lc)
assert result.features.columns.tolist() == feature_names
row = result.features.iloc[0]
assert row["bio1"] == pytest.approx(4.0)
assert row["std_bio1"] == pytest.approx(np.std(np.arange(9, dtype=float)))
assert row["lc_frac_0"] == pytest.approx(4 / 9)
assert row["lc_frac_1"] == pytest.approx(5 / 9)
assert row["log_sp_unit_area"] == pytest.approx(math.log(9.0))
def test_extract_features_for_small_polygon_uses_nearest_raster_cell_with_warning():
env, lc = synthetic_datasets()
record = PolygonRecord(
record_id="polygon_1",
geometry_wgs84=box(0.1, 0.1, 0.2, 0.2),
geometry_3035=box(0.1, 0.1, 0.2, 0.2),
area_m2=100.0,
warnings=("Area is below the MuScaRi training range lower bound (0.01 km2); interpret this estimate as extrapolation.",),
)
feature_names = ["bio1", "std_bio1", "lc_frac_0", "lc_frac_1", "log_sp_unit_area"]
result = extract_features_for_records([record], feature_names, env, lc)
row = result.features.iloc[0]
assert row["bio1"] == pytest.approx(6.0)
assert row["std_bio1"] == pytest.approx(0.0)
assert row["lc_frac_0"] == pytest.approx(1.0)
assert row["lc_frac_1"] == pytest.approx(0.0)
assert row["log_sp_unit_area"] == pytest.approx(math.log(100.0))
warnings = result.warnings_by_id["polygon_1"]
assert len(warnings) == 2
assert "below the MuScaRi training range" in warnings[0]
assert "below the environmental raster resolution" in warnings[1]
def test_extract_features_rejects_outside_polygon():
env, lc = synthetic_datasets()
record = PolygonRecord(
record_id="polygon_1",
geometry_wgs84=box(10, 10, 11, 11),
geometry_3035=box(10, 10, 11, 11),
area_m2=1.0,
)
with pytest.raises(ValueError, match="does not overlap"):
extract_features_for_records([record], ["bio1", "log_sp_unit_area"], env, lc)