"""Continuous four-factor confidence scoring for EO indicators. Factors: - **temporal**: fraction of the analysis period with valid monthly data - **baseline_depth**: fraction of the expected baseline with valid data - **spatial_completeness**: fraction of AOI pixels that are not nodata - **anomaly_consistency**: penalty when anomaly months ≈ total months (high anomaly fraction signals baseline drift, not per-month signal) All factors are continuous 0..1 — the previous stepped version saturated at 1.0 for realistic analyses, producing "1.00 / 1.00 / 1.00 High" on every indicator. The new version returns finer-grained values so readers can compare relative reliability across indicators. """ from __future__ import annotations from typing import Any from app.models import ConfidenceLevel def _clamp(v: float, lo: float = 0.0, hi: float = 1.0) -> float: """Clamp a float into a range.""" if v < lo: return lo if v > hi: return hi return v def score_temporal_coverage(valid_months: int, expected_months: int | None = None) -> float: """Fraction of analysis months with valid observations. If ``expected_months`` is not provided, assume 12 months (legacy calls). Returns a continuous value in [0, 1]. """ if expected_months is None or expected_months <= 0: expected_months = 12 return _clamp(valid_months / expected_months) def score_baseline_depth( baseline_valid_months: int, baseline_years: int = 5, ) -> float: """Fraction of the expected baseline that has valid monthly data. For a 5-year baseline we expect 60 monthly composites. Missing data (cloud cover, sensor gaps) reduces this score proportionally. """ expected = max(1, baseline_years * 12) return _clamp(baseline_valid_months / expected) def score_spatial_completeness(fraction: float) -> float: """Fraction of AOI pixels that are valid (non-nodata). Returned unchanged — already continuous. """ return _clamp(fraction) def score_anomaly_consistency(anomaly_months: int, total_months: int) -> float: """Penalty when anomaly months approach the total. When ~everything is flagged anomalous, that indicates baseline drift or regime shift rather than meaningful per-month signal — so our confidence in the *per-month* reading drops. Returns 1.0 when anomaly fraction is near zero, drops linearly, reaching 0 when 100% of months are anomalous. """ if total_months <= 0: return 1.0 frac = anomaly_months / total_months return _clamp(1.0 - frac) def compute_confidence( valid_months: int, baseline_years_with_data: int = 5, spatial_completeness: float = 1.0, *, expected_months: int | None = None, baseline_valid_months: int | None = None, anomaly_months: int = 0, ) -> dict[str, Any]: """Return a four-factor confidence dict for an indicator. Backwards-compatible: old callers passing (valid_months, baseline_years_with_data, spatial_completeness) still work. New callers should also pass ``expected_months`` and ``baseline_valid_months`` for better differentiation. """ temporal = score_temporal_coverage(valid_months, expected_months) # Prefer the more accurate baseline_valid_months when provided; fall # back to years × 12 for legacy call sites. if baseline_valid_months is None: baseline_valid_months = baseline_years_with_data * 12 baseline = score_baseline_depth(baseline_valid_months, baseline_years=5) spatial = score_spatial_completeness(spatial_completeness) total_anom_months = expected_months if expected_months else valid_months consistency = score_anomaly_consistency(anomaly_months, total_anom_months) # Weighted composite — temporal and baseline dominate; consistency and # spatial are secondary. score = ( temporal * 0.30 + baseline * 0.30 + spatial * 0.20 + consistency * 0.20 ) if score > 0.7: level = ConfidenceLevel.HIGH elif score >= 0.4: level = ConfidenceLevel.MODERATE else: level = ConfidenceLevel.LOW return { "level": level, "score": round(score, 3), "factors": { "temporal": round(temporal, 2), "baseline_depth": round(baseline, 2), "spatial_completeness": round(spatial, 2), "anomaly_consistency": round(consistency, 2), }, }