Spaces:

jtlevine
/

climate-risk-engine

Paused

jtlevine Claude Opus 4.7 (1M context) commited on Apr 21

Commit

64c4f2a

1 Parent(s): 202abe1

Cut dead neural/LSTM pricing + predictor paths

These were left behind after the earlier XGBoost heat-wave predictor
cut (93f565b) and the move to empirical burn analysis for pricing.
Nothing in src/pipeline.py or src/api.py imports them; the eval tests
they ship with couldn't pass either (neural_pricer_dar.pt never
existed on disk, so test_neural_model_loads fails by construction).

Removed:
- src/prediction/heat_forecast.py (HeatWavePredictor)
- src/prediction/lstm_model.py (LSTM trainer + CITY_THRESHOLDS)
- src/pricing/neural_actuarial.py (neural pricer; replaced by burn analysis)
- src/notification/sender.py + __init__.py (notify step was cut in a
prior pipeline pass; no one imports this anymore)
- scripts/train_neural_pricer.py, train_on_era5.py, train_on_nasa_power.py,
train_lstm.py, backtest_pricing.py (all drove the dead paths)
- tests/eval_heat_predictor.py, eval_neural_pricer.py (evaluated dead code)
- models/heat_predictor_xgb.json, trigger_head_retrained.pt
- (untracked) models/heat_lstm.pt, lstm_norm.json,
scripts/retrain_trigger_heads.py

Kept: BurnAnalysisPricer, UHI XGBoost correction (still live),
GraphCast forecast_trigger, RAG index, basis_risk assessor.

Smoke-tested: src.pipeline + src.api import clean post-cut.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (14) hide show

models/heat_predictor_xgb.json +0 -0
models/trigger_head_retrained.pt +0 -3
scripts/backtest_pricing.py +0 -508
scripts/train_lstm.py +0 -58
scripts/train_neural_pricer.py +0 -142
scripts/train_on_era5.py +0 -491
scripts/train_on_nasa_power.py +0 -660
src/notification/__init__.py +0 -0
src/notification/sender.py +0 -318
src/prediction/heat_forecast.py +0 -557
src/prediction/lstm_model.py +0 -566
src/pricing/neural_actuarial.py +0 -1312
tests/eval_heat_predictor.py +0 -157
tests/eval_neural_pricer.py +0 -303

models/heat_predictor_xgb.json DELETED Viewed

The diff for this file is too large to render. See raw diff

models/trigger_head_retrained.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6fb225a2eabbb94697c1c4a3f772d0850c54f48c83855a318282c54d77a1f186
-size 168777

scripts/backtest_pricing.py DELETED Viewed

@@ -1,508 +0,0 @@
-#!/usr/bin/env python3
-"""
-Actuarial backtesting: predicted frequency vs. actual trigger events.
-Runs the NeuralActuarialPricer (or GLM fallback) against 19 historical
-hot seasons (Dec-Mar 2005-06 through 2023-24) using ERA5-Land data for
-all 15 Dar es Salaam zones.  Counts actual alert and payout events
-using the same thresholds as the benchmark panel, then compares to the
-model's learned_frequency (lambda).
-Usage:
-    python scripts/backtest_pricing.py
-    python scripts/backtest_pricing.py --zone DAR-JAN          # single zone
-    python scripts/backtest_pricing.py --all-zones              # all 15 zones
-    python scripts/backtest_pricing.py --no-uhi                 # skip UHI correction
-"""
-from __future__ import annotations
-import argparse
-import json
-import sys
-from datetime import date, timedelta
-from pathlib import Path
-import numpy as np
-# ── Project imports ──────────────────────────────────────────────────────
-PROJECT_ROOT = Path.home() / "climate-risk-engine"
-sys.path.insert(0, str(PROJECT_ROOT))
-from config import ZONE_MAP, ZONES
-from src.indexing.heat_index import calculate_wbgt
-from src.downscaling.uhi_model import UHI_RANGES
-# ── Constants ────────────────────────────────────────────────────────────
-# Hot season: December through March (Dar es Salaam)
-HOT_SEASON_MONTHS = {12, 1, 2, 3}
-# Benchmark trigger thresholds (from neural_actuarial.py and CLAUDE.md)
-WINDOW_ENTRY_WBGT = 35.1       # approximate window-entry threshold
-ALERT_MIN_DAYS = 2             # consecutive days for alert tier
-PAYOUT_MIN_DAYS = 5            # consecutive days for payout tier
-PAYOUT_PEAK_WBGT = 30.7        # peak WBGT for full payout qualification
-# Climate history window fed to the pricer
-HISTORY_DAYS = 90
-# Default payout per event per worker (USD)
-PAYOUT_PER_EVENT = 10.0
-# ── Helpers ──────────────────────────────────────────────────────────────
-def load_era5_data() -> dict[str, list[dict]]:
-    """Load ERA5-Land daily records keyed by zone_id."""
-    path = PROJECT_ROOT / "data" / "era5land_dar_es_salaam.json"
-    with open(path) as f:
-        return json.load(f)
-def build_date_index(records: list[dict]) -> dict[str, int]:
-    """Map date string -> index in records list."""
-    return {r["date"]: i for i, r in enumerate(records)}
-def season_label(year: int) -> str:
-    """e.g. season_label(2005) -> '2005-06'"""
-    return f"{year}-{str(year + 1)[-2:]}"
-def get_season_dates(start_year: int) -> tuple[date, date]:
-    """Return (first day of Dec, last day of Mar) for a hot season."""
-    season_start = date(start_year, 12, 1)
-    season_end = date(start_year + 1, 3, 31)
-    return season_start, season_end
-def get_history_window(
-    records: list[dict],
-    date_idx: dict[str, int],
-    season_start: date,
-) -> list[dict]:
-    """Get the 90-day climate history ending just before the season start."""
-    end = season_start - timedelta(days=1)
-    start = end - timedelta(days=HISTORY_DAYS - 1)
-    window = []
-    d = start
-    while d <= end:
-        ds = d.isoformat()
-        if ds in date_idx:
-            window.append(records[date_idx[ds]])
-        d += timedelta(days=1)
-    return window
-def get_season_records(
-    records: list[dict],
-    date_idx: dict[str, int],
-    season_start: date,
-    season_end: date,
-) -> list[dict]:
-    """Get all daily records within the hot season window."""
-    out = []
-    d = season_start
-    while d <= season_end:
-        ds = d.isoformat()
-        if ds in date_idx:
-            out.append(records[date_idx[ds]])
-        d += timedelta(days=1)
-    return out
-def count_trigger_events(
-    season_records: list[dict],
-    uhi_delta: float,
-    apply_uhi: bool = True,
-) -> dict:
-    """
-    Count alert and payout events during a season.
-    A "window" = consecutive days where WBGT >= WINDOW_ENTRY_WBGT.
-    - Alert event: window duration >= ALERT_MIN_DAYS
-    - Payout event: window duration >= PAYOUT_MIN_DAYS AND peak WBGT >= PAYOUT_PEAK_WBGT
-    Returns counts with and without UHI correction.
-    """
-    results = {}
-    for label, use_uhi in [("uhi", True), ("grid", False)]:
-        if label == "uhi" and not apply_uhi:
-            continue
-        delta = uhi_delta if use_uhi else 0.0
-        wbgts = []
-        for rec in season_records:
-            t_max = (rec.get("temp_max_c") or 30.0) + delta
-            hum = rec.get("humidity_pct") or 75.0
-            wbgts.append(calculate_wbgt(t_max, hum))
-        # Find consecutive windows above threshold
-        alert_events = 0
-        payout_events = 0
-        run_length = 0
-        run_peak = 0.0
-        for w in wbgts:
-            if w >= WINDOW_ENTRY_WBGT:
-                run_length += 1
-                run_peak = max(run_peak, w)
-            else:
-                if run_length >= ALERT_MIN_DAYS:
-                    alert_events += 1
-                if run_length >= PAYOUT_MIN_DAYS and run_peak >= PAYOUT_PEAK_WBGT:
-                    payout_events += 1
-                run_length = 0
-                run_peak = 0.0
-        # Close trailing run
-        if run_length >= ALERT_MIN_DAYS:
-            alert_events += 1
-        if run_length >= PAYOUT_MIN_DAYS and run_peak >= PAYOUT_PEAK_WBGT:
-            payout_events += 1
-        results[label] = {
-            "alert_events": alert_events,
-            "payout_events": payout_events,
-            "mean_wbgt": round(float(np.mean(wbgts)), 2) if wbgts else 0.0,
-            "max_wbgt": round(float(np.max(wbgts)), 2) if wbgts else 0.0,
-            "days_above_threshold": sum(1 for w in wbgts if w >= WINDOW_ENTRY_WBGT),
-        }
-    return results
-# ── Main backtest ────────────────────────────────────────────────────────
-def run_backtest(
-    zone_ids: list[str] | None = None,
-    apply_uhi: bool = True,
-    verbose: bool = True,
-) -> list[dict]:
-    """
-    Run the backtest across seasons and zones.
-    Returns list of row dicts with season, zone, predicted, and actual counts.
-    """
-    # Load pricer (neural if available, else GLM fallback)
-    try:
-        from src.pricing.neural_actuarial import NeuralActuarialPricer
-        pricer = NeuralActuarialPricer()
-        pricer_type = "chronos" if pricer._encoder_type == "chronos" else (
-            "lstm" if pricer._encoder_type == "lstm" else "glm"
-        )
-    except Exception as e:
-        print(f"[warn] Neural pricer unavailable ({e}), using GLM fallback")
-        from src.pricing.actuarial import ActuarialPricer as _AP
-        pricer = _AP()
-        pricer_type = "glm"
-    if verbose:
-        print(f"Pricer: {pricer_type}")
-        print(f"UHI correction: {'ON' if apply_uhi else 'OFF'}")
-        print()
-    # Load ERA5 data
-    era5 = load_era5_data()
-    # Resolve zone list
-    dar_zones = [z for z in ZONES if z.city == "Dar es Salaam"]
-    if zone_ids:
-        dar_zones = [z for z in dar_zones if z.zone_id in zone_ids]
-    if not dar_zones:
-        print("No matching zones found.")
-        return []
-    if verbose:
-        print(f"Zones: {[z.zone_id for z in dar_zones]}")
-        print()
-    # Seasons: Dec 2005 through Mar 2024 -> start years 2005..2023
-    season_years = list(range(2005, 2024))
-    rows = []
-    for zone in dar_zones:
-        records = era5.get(zone.zone_id, [])
-        if not records:
-            print(f"[warn] No ERA5 data for {zone.zone_id}, skipping")
-            continue
-        date_idx = build_date_index(records)
-        uhi_lo, uhi_hi = UHI_RANGES.get(zone.settlement_type, (1.0, 2.0))
-        mean_uhi = (uhi_lo + uhi_hi) / 2.0
-        for sy in season_years:
-            season_start, season_end = get_season_dates(sy)
-            # 1. Get 90-day history ending before season start
-            history = get_history_window(records, date_idx, season_start)
-            if len(history) < 30:
-                continue  # not enough history
-            # 2. Run pricer to get predicted frequency (lambda)
-            predicted_freq = None
-            try:
-                if pricer_type == "glm":
-                    # GLM needs a frequency estimate -- use historical rate
-                    # from the previous season as a naive baseline
-                    result = pricer.price_zone(
-                        zone=zone,
-                        predicted_frequency=10.0,  # placeholder
-                        basis_risk_score=0.3,
-                        payout_per_event=PAYOUT_PER_EVENT,
-                        enrolled=zone.worker_population_est,
-                    )
-                    predicted_freq = 10.0  # GLM doesn't learn frequency
-                else:
-                    result = pricer.price_zone(
-                        zone=zone,
-                        predicted_frequency=10.0,
-                        basis_risk_score=0.3,
-                        payout_per_event=PAYOUT_PER_EVENT,
-                        enrolled=zone.worker_population_est,
-                        climate_history=history,
-                    )
-                    predicted_freq = result.cost_breakdown.get(
-                        "learned_frequency", 10.0
-                    )
-            except Exception as e:
-                if verbose:
-                    print(f"  [warn] Pricer failed for {zone.zone_id} "
-                          f"season {season_label(sy)}: {e}")
-                continue
-            # 3. Count actual events during the season
-            season_recs = get_season_records(
-                records, date_idx, season_start, season_end
-            )
-            if not season_recs:
-                continue
-            actuals = count_trigger_events(
-                season_recs, mean_uhi, apply_uhi=apply_uhi
-            )
-            # Build row -- use UHI-corrected actuals for primary comparison
-            # if UHI is on, otherwise use grid actuals
-            primary = actuals.get("uhi", actuals.get("grid", {}))
-            grid = actuals.get("grid", {})
-            row = {
-                "season": season_label(sy),
-                "zone_id": zone.zone_id,
-                "zone_name": zone.name,
-                "settlement_type": zone.settlement_type,
-                "predicted_freq": round(predicted_freq, 1),
-                "actual_alert_uhi": primary.get("alert_events", 0),
-                "actual_payout_uhi": primary.get("payout_events", 0),
-                "actual_alert_grid": grid.get("alert_events", 0),
-                "actual_payout_grid": grid.get("payout_events", 0),
-                "mean_wbgt_uhi": primary.get("mean_wbgt", 0),
-                "max_wbgt_uhi": primary.get("max_wbgt", 0),
-                "days_above_uhi": primary.get("days_above_threshold", 0),
-                "mean_wbgt_grid": grid.get("mean_wbgt", 0),
-                "max_wbgt_grid": grid.get("max_wbgt", 0),
-                "days_above_grid": grid.get("days_above_threshold", 0),
-                "season_days": len(season_recs),
-            }
-            rows.append(row)
-    return rows
-def print_summary(rows: list[dict], apply_uhi: bool = True) -> None:
-    """Print formatted summary tables."""
-    if not rows:
-        print("No backtest results to display.")
-        return
-    # ── Per-season detail table ──────────────────────────────────────
-    zones_in_data = sorted(set(r["zone_id"] for r in rows))
-    single_zone = len(zones_in_data) == 1
-    print("=" * 95)
-    print("ACTUARIAL BACKTEST: Predicted Frequency vs. Actual Trigger Events")
-    print("=" * 95)
-    print()
-    if single_zone:
-        zone_id = zones_in_data[0]
-        zone_name = rows[0]["zone_name"]
-        print(f"Zone: {zone_id} ({zone_name})")
-        print(f"Threshold: WBGT >= {WINDOW_ENTRY_WBGT} C (window entry)")
-        print(f"Alert: >= {ALERT_MIN_DAYS} consecutive days | "
-              f"Payout: >= {PAYOUT_MIN_DAYS} days AND peak WBGT >= {PAYOUT_PEAK_WBGT}")
-        print()
-        header = (
-            f"{'Season':<10} {'Pred_Freq':>9} "
-            f"{'Alert_UHI':>9} {'Payout_UHI':>10} "
-            f"{'Alert_Grid':>10} {'Payout_Grid':>11} "
-            f"{'MeanWBGT':>8} {'MaxWBGT':>7} {'Days>Thr':>8}"
-        )
-        print(header)
-        print("-" * len(header))
-        for r in sorted(rows, key=lambda x: x["season"]):
-            print(
-                f"{r['season']:<10} {r['predicted_freq']:>9.1f} "
-                f"{r['actual_alert_uhi']:>9} {r['actual_payout_uhi']:>10} "
-                f"{r['actual_alert_grid']:>10} {r['actual_payout_grid']:>11} "
-                f"{r['mean_wbgt_uhi']:>8.1f} {r['max_wbgt_uhi']:>7.1f} "
-                f"{r['days_above_uhi']:>8}"
-            )
-    else:
-        # Multi-zone: aggregate by zone
-        print(f"Zones: {len(zones_in_data)} | Seasons: "
-              f"{sorted(set(r['season'] for r in rows))[0]} to "
-              f"{sorted(set(r['season'] for r in rows))[-1]}")
-        print(f"Threshold: WBGT >= {WINDOW_ENTRY_WBGT} C")
-        print()
-        header = (
-            f"{'Zone':<10} {'Type':<11} {'Seasons':>7} "
-            f"{'MeanPred':>8} {'MeanAlertU':>10} {'MeanPayU':>8} "
-            f"{'MeanAlertG':>10} {'MeanPayG':>8} {'MeanWBGT':>8}"
-        )
-        print(header)
-        print("-" * len(header))
-        for zid in zones_in_data:
-            zrows = [r for r in rows if r["zone_id"] == zid]
-            n = len(zrows)
-            print(
-                f"{zid:<10} {zrows[0]['settlement_type']:<11} {n:>7} "
-                f"{np.mean([r['predicted_freq'] for r in zrows]):>8.1f} "
-                f"{np.mean([r['actual_alert_uhi'] for r in zrows]):>10.1f} "
-                f"{np.mean([r['actual_payout_uhi'] for r in zrows]):>8.1f} "
-                f"{np.mean([r['actual_alert_grid'] for r in zrows]):>10.1f} "
-                f"{np.mean([r['actual_payout_grid'] for r in zrows]):>8.1f} "
-                f"{np.mean([r['mean_wbgt_uhi'] for r in zrows]):>8.1f}"
-            )
-    # ── Aggregate metrics ────────────────────────────────────────────
-    print()
-    print("=" * 95)
-    print("AGGREGATE METRICS")
-    print("=" * 95)
-    print()
-    pred = np.array([r["predicted_freq"] for r in rows])
-    # Annualize actuals: season is ~121 days (Dec-Mar), so scale up
-    # But predicted_freq is already annual.  We compare predicted annual
-    # to actual season counts directly -- the model should predict events
-    # per year, but events in Dec-Mar ARE the bulk of the hot season.
-    # So the comparison is: does the model's annual lambda match the
-    # observed rate when we look at the actual hot season?
-    for label_suffix, alert_key, payout_key in [
-        ("(UHI-corrected)", "actual_alert_uhi", "actual_payout_uhi"),
-        ("(grid / no UHI)", "actual_alert_grid", "actual_payout_grid"),
-    ]:
-        if not apply_uhi and label_suffix == "(UHI-corrected)":
-            continue
-        actual_alert = np.array([r[alert_key] for r in rows], dtype=float)
-        actual_payout = np.array([r[payout_key] for r in rows], dtype=float)
-        print(f"--- {label_suffix} ---")
-        print(f"  Mean predicted frequency (annual lambda): {pred.mean():.2f}")
-        print(f"  Mean actual alert events per season:      {actual_alert.mean():.2f}")
-        print(f"  Mean actual payout events per season:     {actual_payout.mean():.2f}")
-        print()
-        print(f"  Predicted/Actual alert ratio:  "
-              f"{pred.mean() / max(actual_alert.mean(), 0.01):.2f}x")
-        print(f"  Predicted/Actual payout ratio: "
-              f"{pred.mean() / max(actual_payout.mean(), 0.01):.2f}x")
-        # Correlation (only meaningful if there's variance)
-        if actual_alert.std() > 0 and pred.std() > 0:
-            corr_alert = float(np.corrcoef(pred, actual_alert)[0, 1])
-            print(f"  Pearson correlation (pred vs alert):  {corr_alert:.3f}")
-        else:
-            print(f"  Pearson correlation (pred vs alert):  N/A (no variance)")
-        if actual_payout.std() > 0 and pred.std() > 0:
-            corr_payout = float(np.corrcoef(pred, actual_payout)[0, 1])
-            print(f"  Pearson correlation (pred vs payout): {corr_payout:.3f}")
-        else:
-            print(f"  Pearson correlation (pred vs payout): N/A (no variance)")
-        # RMSE
-        rmse_alert = float(np.sqrt(np.mean((pred - actual_alert) ** 2)))
-        rmse_payout = float(np.sqrt(np.mean((pred - actual_payout) ** 2)))
-        print(f"  RMSE (pred vs alert):  {rmse_alert:.2f}")
-        print(f"  RMSE (pred vs payout): {rmse_payout:.2f}")
-        # Per-season hit rate: how often does predicted > 0 match actual > 0
-        pred_positive = pred > 0.5
-        actual_alert_positive = actual_alert > 0
-        actual_payout_positive = actual_payout > 0
-        if len(pred) > 0:
-            alert_hit = float(
-                np.mean(pred_positive == actual_alert_positive) * 100
-            )
-            payout_hit = float(
-                np.mean(pred_positive == actual_payout_positive) * 100
-            )
-            print(f"  Direction accuracy (alert):  {alert_hit:.0f}%")
-            print(f"  Direction accuracy (payout): {payout_hit:.0f}%")
-        print()
-    # ── By settlement type ───────────────────────────────────────────
-    stypes = sorted(set(r["settlement_type"] for r in rows))
-    if len(stypes) > 1:
-        print("--- By settlement type ---")
-        for st in stypes:
-            st_rows = [r for r in rows if r["settlement_type"] == st]
-            st_pred = np.mean([r["predicted_freq"] for r in st_rows])
-            st_alert = np.mean([r["actual_alert_uhi"] for r in st_rows])
-            st_payout = np.mean([r["actual_payout_uhi"] for r in st_rows])
-            print(f"  {st:<12} pred={st_pred:.1f}  alert={st_alert:.1f}  "
-                  f"payout={st_payout:.1f}  "
-                  f"ratio={st_pred / max(st_alert, 0.01):.1f}x")
-        print()
-def main():
-    parser = argparse.ArgumentParser(
-        description="Backtest pricing model against historical ERA5-Land data"
-    )
-    parser.add_argument(
-        "--zone", type=str, default="DAR-JAN",
-        help="Zone ID to test (default: DAR-JAN)"
-    )
-    parser.add_argument(
-        "--all-zones", action="store_true",
-        help="Run backtest across all 15 Dar es Salaam zones"
-    )
-    parser.add_argument(
-        "--no-uhi", action="store_true",
-        help="Disable UHI correction (grid-only WBGT)"
-    )
-    parser.add_argument(
-        "--quiet", action="store_true",
-        help="Suppress per-season detail, show only aggregate metrics"
-    )
-    args = parser.parse_args()
-    apply_uhi = not args.no_uhi
-    if args.all_zones:
-        zone_ids = None  # all Dar zones
-    else:
-        zone_ids = [args.zone]
-    rows = run_backtest(
-        zone_ids=zone_ids,
-        apply_uhi=apply_uhi,
-        verbose=not args.quiet,
-    )
-    if not args.quiet:
-        print()
-    print_summary(rows, apply_uhi=apply_uhi)
-if __name__ == "__main__":
-    main()

scripts/train_lstm.py DELETED Viewed

@@ -1,58 +0,0 @@
-"""Train the LSTM heat wave predictor.
-Tries ERA5 data first, falls back to synthetic data generation
-(same seasonal + AR(1) approach as the existing XGBoost trainer).
-Usage:
-    python3 scripts/train_lstm.py
-"""
-import sys
-import time
-sys.path.insert(0, ".")
-from src.prediction.lstm_model import LSTMTrainer, generate_synthetic_zone_data
-from config import ZONES
-print("=" * 60)
-print("LSTM Heat Wave Predictor -- Training")
-print("=" * 60)
-# Try ERA5 data first, fall back to synthetic
-zone_data = None
-try:
-    from src.ingestion.era5_fetcher import fetch_era5_sync
-    print("\nFetching ERA5 data for training...")
-    raw = fetch_era5_sync(ZONES, days_back=365)
-    # Convert to training format if fetch succeeds
-    if raw and len(raw) > 0:
-        zone_data = raw
-        print(f"  Loaded ERA5 data for {len(zone_data)} zones")
-except Exception as e:
-    print(f"\nERA5 unavailable ({e}), using synthetic training data")
-if zone_data is None:
-    print("\nGenerating synthetic training data (2 years x 20 zones)...")
-    t0 = time.time()
-    zone_data = generate_synthetic_zone_data(ZONES, n_days=730, seed=42)
-    elapsed = time.time() - t0
-    total_days = sum(len(v) for v in zone_data.values())
-    print(f"  Generated {total_days:,} zone-days in {elapsed:.1f}s")
-# Train
-print("\nTraining LSTM...")
-t0 = time.time()
-trainer = LSTMTrainer(epochs=50, patience=5)
-metrics = trainer.train(zone_data)
-elapsed = time.time() - t0
-print(f"\nTraining complete in {elapsed:.1f}s")
-print(f"  Epochs trained: {metrics.get('epochs_trained', '?')}")
-print(f"  Train loss:     {metrics.get('train_loss', '?')}")
-print(f"  Val loss:       {metrics.get('val_loss', '?')}")
-print(f"  Val AUROC:      {metrics.get('val_auroc', '?')}")
-print(f"  Samples:        {metrics.get('samples', '?')}")
-print("=" * 60)

scripts/train_neural_pricer.py DELETED Viewed

@@ -1,142 +0,0 @@
-#!/usr/bin/env python3
-"""
-Train the Neural Actuarial Pricing Engine on real climate data.
-Usage:
-    python3 scripts/train_neural_pricer.py                  # default: config.PRIMARY_CITY
-    python3 scripts/train_neural_pricer.py --city Kampala    # Other city
-Requires: data/era5land_{slug}.json where slug is derived from the primary city
-(config.PRIMARY_CITY) or the --city flag.
-"""
-import argparse
-import json
-import sys
-from pathlib import Path
-# Add project root to path
-sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
-from config import ZONES, ZONE_MAP, slug_for
-from src.pricing.neural_actuarial import (
-    build_training_samples,
-    load_climate_data,
-    NeuralPricerTrainer,
-)
-def main():
-    parser = argparse.ArgumentParser(description="Train neural actuarial pricer")
-    parser.add_argument("--city", default="Dar es Salaam", help="City to train for")
-    parser.add_argument("--data", default=None, help="Path to climate data JSON")
-    parser.add_argument("--epochs", type=int, default=80)
-    parser.add_argument("--lr", type=float, default=1e-3)
-    parser.add_argument("--patience", type=int, default=10)
-    parser.add_argument("--encoder", choices=["chronos", "lstm"], default="chronos",
-                        help="Encoder type: chronos (foundation model) or lstm (legacy)")
-    args = parser.parse_args()
-    data_dir = Path(__file__).resolve().parents[1] / "data"
-    # Find climate data file
-    if args.data:
-        data_path = Path(args.data)
-    else:
-        city_slug = slug_for(args.city)
-        # Try ERA5-Land first, fall back to NASA POWER
-        era5_path = data_dir / f"era5land_{city_slug}.json"
-        nasa_path = data_dir / f"nasa_power_{city_slug}.json"
-        if era5_path.exists():
-            data_path = era5_path
-            print(f"Using ERA5-Land data: {data_path}")
-        elif nasa_path.exists():
-            data_path = nasa_path
-            print(f"Using NASA POWER data: {data_path}")
-        else:
-            print(f"ERROR: No climate data found for {args.city}")
-            print(f"  Looked for: {era5_path}")
-            print(f"  Looked for: {nasa_path}")
-            print(f"  Run scripts/fetch_nasa_power_dar.py first")
-            sys.exit(1)
-    # Filter zones for the target city
-    city_zones = [z for z in ZONES if z.city == args.city]
-    if not city_zones:
-        print(f"ERROR: No zones found for city '{args.city}'")
-        print(f"  Available cities: {sorted(set(z.city for z in ZONES))}")
-        sys.exit(1)
-    print(f"\n{'='*60}")
-    print(f"Neural Actuarial Pricer — {args.city}")
-    print(f"{'='*60}")
-    print(f"  Zones: {len(city_zones)} ({', '.join(z.name for z in city_zones)})")
-    print(f"  Data: {data_path}")
-    # Load climate data
-    climate_data = load_climate_data(data_path)
-    zone_ids = {z.zone_id for z in city_zones}
-    climate_data = {k: v for k, v in climate_data.items() if k in zone_ids}
-    if not climate_data:
-        print(f"ERROR: No matching zone data found in {data_path}")
-        print(f"  Expected zone IDs: {zone_ids}")
-        print(f"  Found zone IDs: {set(load_climate_data(data_path).keys())}")
-        sys.exit(1)
-    print(f"  Records: {sum(len(v) for v in climate_data.values()):,} zone-days")
-    # Build training samples with city-specific WBGT threshold
-    from src.pricing.neural_actuarial import CITY_WBGT_THRESHOLDS
-    wbgt_thresh = CITY_WBGT_THRESHOLDS.get(args.city, 35.0)
-    print(f"\nBuilding training samples (90-day windows, stride=7, WBGT threshold={wbgt_thresh}°C)...")
-    X, targets = build_training_samples(climate_data, city_zones, wbgt_threshold=wbgt_thresh)
-    print(f"  Samples: {len(X):,}")
-    print(f"  Shape: {X.shape}")
-    print(f"  Target ranges:")
-    for k, v in targets.items():
-        print(f"    {k}: [{v.min():.3f}, {v.max():.3f}], mean={v.mean():.3f}")
-    # Train
-    print(f"\nTraining (encoder={args.encoder}, epochs={args.epochs}, lr={args.lr}, patience={args.patience})...")
-    trainer = NeuralPricerTrainer(
-        lr=args.lr, epochs=args.epochs, patience=args.patience,
-        encoder=args.encoder,
-    )
-    metrics = trainer.train(X, targets)
-    print(f"\n{'='*60}")
-    print(f"Training complete")
-    print(f"{'='*60}")
-    for k, v in metrics.items():
-        print(f"  {k}: {v}")
-    # Quick inference test
-    print(f"\nInference test:")
-    from src.pricing.neural_actuarial import NeuralActuarialPricer
-    pricer = NeuralActuarialPricer()
-    print(f"  Neural model loaded: {pricer.is_neural}")
-    if pricer.is_neural:
-        for zone in city_zones:
-            history = climate_data[zone.zone_id][-90:]
-            result = pricer.price_zone(
-                zone=zone,
-                predicted_frequency=10.0,  # ignored by neural model
-                basis_risk_score=0.2,       # ignored by neural model
-                payout_per_event=10.0,
-                enrolled=zone.worker_population_est,
-                climate_history=history,
-            )
-            cb = result.cost_breakdown
-            print(
-                f"  {zone.name:12s} ({zone.settlement_type:9s}): "
-                f"${result.cost_per_worker_year:.2f}/worker/yr "
-                f"(λ={cb.get('learned_frequency', '?')}, "
-                f"basis_risk={cb.get('learned_basis_risk', '?')}, "
-                f"δ_NN={cb.get('neural_correction_pct', '?'):+.1f}%)"
-            )
-if __name__ == "__main__":
-    main()

scripts/train_on_era5.py DELETED Viewed

@@ -1,491 +0,0 @@
-"""
-Train all ML models on real ERA5 reanalysis data.
-Steps:
-1. Fetch 2 years of ERA5 data for all 20 zones via Google ARCO Zarr store
-2. Validate data quality (coverage, temp ranges, nulls)
-3. Retrain XGBoost heat predictor on real data
-4. Retrain LSTM on real data
-5. Verify UHI model works with real ERA5 temps
-"""
-import sys
-import os
-import time
-import logging
-import math
-import numpy as np
-# Project root on sys.path
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from config import ZONES, ZONE_MAP
-from src.ingestion.era5_fetcher import fetch_era5_sync
-from src.ingestion.models import DailyReading
-from src.indexing.heat_index import calculate_wbgt
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s %(name)s %(levelname)s  %(message)s",
-    datefmt="%H:%M:%S",
-)
-log = logging.getLogger("train_era5")
-# Expected temp ranges per city (max daily temps, deg C)
-EXPECTED_RANGES = {
-    "Nairobi":       (18, 35),
-    "Dar es Salaam": (25, 40),
-    "Kampala":       (22, 36),
-    "Kigali":        (20, 34),
-}
-# ======================================================================
-# Step 1: Fetch ERA5 data
-# ======================================================================
-def fetch_data():
-    log.info("=" * 60)
-    log.info("STEP 1: Fetching 2 years of ERA5 data for %d zones", len(ZONES))
-    log.info("=" * 60)
-    t0 = time.time()
-    data = fetch_era5_sync(ZONES, days_back=730)
-    elapsed = time.time() - t0
-    log.info("Fetch complete in %.1f seconds", elapsed)
-    return data
-# ======================================================================
-# Step 2: Validate data quality
-# ======================================================================
-def validate_data(data: dict[str, list[DailyReading]]):
-    log.info("=" * 60)
-    log.info("STEP 2: Validating ERA5 data quality")
-    log.info("=" * 60)
-    issues = []
-    stats = {}
-    for zone in ZONES:
-        zid = zone.zone_id
-        readings = data.get(zid, [])
-        if not readings:
-            issues.append(f"{zid}: NO DATA")
-            stats[zid] = {"days": 0, "issue": "no data"}
-            continue
-        temps = [r.temp_max_c for r in readings if r.temp_max_c is not None]
-        humids = [r.humidity_pct for r in readings if r.humidity_pct is not None]
-        winds = [r.wind_speed_ms for r in readings if r.wind_speed_ms is not None]
-        if not temps:
-            issues.append(f"{zid}: all temps are null")
-            stats[zid] = {"days": len(readings), "issue": "all null temps"}
-            continue
-        t_min, t_max = min(temps), max(temps)
-        t_mean = sum(temps) / len(temps)
-        # Check physical reasonableness
-        exp_lo, exp_hi = EXPECTED_RANGES.get(zone.city, (15, 42))
-        if t_min < exp_lo - 5 or t_max > exp_hi + 5:
-            issues.append(
-                f"{zid} ({zone.city}): temp range [{t_min:.1f}, {t_max:.1f}] "
-                f"outside expected [{exp_lo-5}, {exp_hi+5}]"
-            )
-        null_count = sum(1 for r in readings if r.temp_max_c is None)
-        stats[zid] = {
-            "days": len(readings),
-            "temp_days": len(temps),
-            "temp_min": round(t_min, 1),
-            "temp_max": round(t_max, 1),
-            "temp_mean": round(t_mean, 1),
-            "humidity_mean": round(sum(humids)/len(humids), 1) if humids else None,
-            "wind_mean": round(sum(winds)/len(winds), 1) if winds else None,
-            "null_temps": null_count,
-        }
-    # Print summary
-    print("\n--- ERA5 Data Summary ---")
-    print(f"{'Zone':<12} {'City':<16} {'Days':>5} {'Temp min':>9} {'Temp max':>9} {'Temp mean':>10} {'Humidity':>9} {'Nulls':>6}")
-    print("-" * 90)
-    by_city = {}
-    for zone in ZONES:
-        s = stats.get(zone.zone_id, {})
-        days = s.get("days", 0)
-        t_lo = s.get("temp_min", "N/A")
-        t_hi = s.get("temp_max", "N/A")
-        t_mn = s.get("temp_mean", "N/A")
-        hum = s.get("humidity_mean", "N/A")
-        nulls = s.get("null_temps", "N/A")
-        print(f"{zone.zone_id:<12} {zone.city:<16} {days:>5} {t_lo:>9} {t_hi:>9} {t_mn:>10} {hum:>9} {nulls:>6}")
-        city = zone.city
-        if city not in by_city:
-            by_city[city] = []
-        by_city[city].append(s)
-    print("\n--- Per-city aggregated temp ranges ---")
-    for city, zone_stats in by_city.items():
-        all_mins = [s["temp_min"] for s in zone_stats if s.get("temp_min") is not None]
-        all_maxs = [s["temp_max"] for s in zone_stats if s.get("temp_max") is not None]
-        if all_mins and all_maxs:
-            print(f"  {city:<16}: {min(all_mins):.1f} - {max(all_maxs):.1f} C")
-    if issues:
-        print(f"\n  ISSUES ({len(issues)}):")
-        for issue in issues:
-            print(f"    - {issue}")
-    else:
-        print("\n  No data quality issues found.")
-    zones_with_data = sum(1 for s in stats.values() if s.get("days", 0) > 0)
-    assert zones_with_data == len(ZONES), f"Only {zones_with_data}/{len(ZONES)} zones have data"
-    print(f"\n  All {zones_with_data} zones have data.\n")
-    return stats
-# ======================================================================
-# Step 3: Retrain XGBoost heat predictor on real data
-# ======================================================================
-def retrain_xgboost(data: dict[str, list[DailyReading]]):
-    log.info("=" * 60)
-    log.info("STEP 3: Retraining XGBoost heat predictor on real ERA5 data")
-    log.info("=" * 60)
-    from src.prediction.heat_forecast import HeatWavePredictor, CITY_THRESHOLDS, CITY_CLIMATE
-    from src.prediction.lstm_model import CITY_CLIMATE as _  # ensure import works
-    import xgboost as xgb
-    # We replicate the training logic from HeatWavePredictor.train() but
-    # use real ERA5 temps/humidity instead of synthetic series.
-    all_X = []
-    all_y = []
-    for zone in ZONES:
-        zid = zone.zone_id
-        readings = data.get(zid, [])
-        if len(readings) < 40:
-            log.warning("Zone %s has only %d readings, skipping for XGBoost training", zid, len(readings))
-            continue
-        city = zone.city
-        threshold = CITY_THRESHOLDS.get(city, 33.0)
-        # Extract time series from real data
-        temps = []
-        humidity = []
-        for r in readings:
-            t = r.temp_max_c
-            h = r.humidity_pct
-            if t is None:
-                continue
-            temps.append(t)
-            humidity.append(h if h is not None else 65.0)
-        n_days = len(temps)
-        if n_days < 40:
-            log.warning("Zone %s has only %d valid temp readings, skipping", zid, n_days)
-            continue
-        # Compute WBGT series
-        wbgt_series = [calculate_wbgt(t, h) for t, h in zip(temps, humidity)]
-        # Labels: trigger within next 7 days (2+ consecutive above threshold)
-        labels = [0] * n_days
-        for day in range(n_days - 7):
-            window = temps[day + 1:day + 8]
-            consec = 0
-            triggered = False
-            for t in window:
-                if t > threshold:
-                    consec += 1
-                    if consec >= 2:
-                        triggered = True
-                        break
-                else:
-                    consec = 0
-            labels[day] = 1 if triggered else 0
-        # Vulnerability encoding
-        vuln_map = {"high": 1.0, "moderate": 0.5, "low": 0.0}
-        zone_vuln = vuln_map.get(zone.heat_vulnerability, 0.5)
-        rng = np.random.default_rng(42)
-        # Build features (need 30-day lookback)
-        for day in range(30, n_days - 7):
-            t_window = temps[day - 30:day + 1]
-            h_window = humidity[day - 30:day + 1]
-            w_window = wbgt_series[day - 30:day + 1]
-            current_temp = t_window[-1]
-            current_wbgt = w_window[-1]
-            current_humidity = h_window[-1]
-            # Trend: slope of last 7 days
-            x7 = np.arange(7, dtype=np.float64)
-            y7 = np.array(t_window[-7:], dtype=np.float64)
-            temp_trend = float(np.polyfit(x7, y7, 1)[0])
-            # Anomaly: current vs 30-day mean
-            temp_anomaly = current_temp - float(np.mean(t_window))
-            # Soil moisture proxy
-            soil_proxy = float(np.clip(1.0 - (temp_anomaly + 2.0) / 4.0, 0.0, 1.0))
-            # Rolling error (use neutral prior for training data)
-            rolling_err = rng.uniform(0.1, 0.5)
-            # Day-of-year encoding (use day index within 365-day cycle)
-            doy = day % 365
-            doy_sin = np.sin(2 * np.pi * doy / 365.0)
-            doy_cos = np.cos(2 * np.pi * doy / 365.0)
-            # Random hour for variety
-            hour = rng.integers(6, 19)
-            hour_sin = np.sin(2 * np.pi * hour / 24.0)
-            hour_cos = np.cos(2 * np.pi * hour / 24.0)
-            row = [
-                current_temp,
-                current_wbgt,
-                current_humidity,
-                temp_trend,
-                temp_anomaly,
-                soil_proxy,
-                rolling_err,
-                doy_sin,
-                doy_cos,
-                hour_sin,
-                hour_cos,
-                zone_vuln,
-            ]
-            all_X.append(row)
-            all_y.append(labels[day])
-    X = np.array(all_X, dtype=np.float32)
-    y = np.array(all_y, dtype=np.int32)
-    pos_rate = y.sum() / len(y) if len(y) > 0 else 0
-    log.info(
-        "XGBoost training data: %d samples, %.1f%% positive rate",
-        len(X), pos_rate * 100,
-    )
-    # Create a fresh predictor to get the model object, then retrain
-    predictor = HeatWavePredictor.__new__(HeatWavePredictor)
-    predictor.model_path = HeatWavePredictor.__init__.__defaults__[0]  # fallback
-    from pathlib import Path
-    predictor.model_path = Path(__file__).resolve().parents[1] / "models" / "heat_predictor_xgb.json"
-    predictor._rolling_errors = []
-    model = xgb.XGBClassifier(
-        n_estimators=150,
-        max_depth=5,
-        learning_rate=0.1,
-        eval_metric="logloss",
-        random_state=42,
-    )
-    # Train/validation split (temporal: first 75% train, last 25% val)
-    split = int(len(X) * 0.75)
-    X_train, X_val = X[:split], X[split:]
-    y_train, y_val = y[:split], y[split:]
-    model.fit(
-        X_train, y_train,
-        eval_set=[(X_val, y_val)],
-        verbose=False,
-    )
-    # Evaluate on validation set
-    from sklearn.metrics import roc_auc_score, precision_score, recall_score
-    val_probs = model.predict_proba(X_val)[:, 1]
-    val_preds = (val_probs > 0.5).astype(int)
-    if len(set(y_val)) > 1:
-        auroc = roc_auc_score(y_val, val_probs)
-        precision = precision_score(y_val, val_preds, zero_division=0)
-        recall = recall_score(y_val, val_preds, zero_division=0)
-    else:
-        auroc, precision, recall = 0.5, 0.0, 0.0
-    print(f"\n--- XGBoost Results (real ERA5 data) ---")
-    print(f"  Training samples:   {len(X_train)}")
-    print(f"  Validation samples: {len(X_val)}")
-    print(f"  Positive rate:      {pos_rate:.1%}")
-    print(f"  Val AUROC:          {auroc:.4f}")
-    print(f"  Val Precision:      {precision:.4f}")
-    print(f"  Val Recall:         {recall:.4f}")
-    # Save model
-    predictor.model_path.parent.mkdir(parents=True, exist_ok=True)
-    model.save_model(str(predictor.model_path))
-    log.info("XGBoost model saved to %s", predictor.model_path)
-    return {
-        "train_samples": len(X_train),
-        "val_samples": len(X_val),
-        "positive_rate": round(pos_rate, 4),
-        "val_auroc": round(auroc, 4),
-        "val_precision": round(precision, 4),
-        "val_recall": round(recall, 4),
-    }
-# ======================================================================
-# Step 4: Retrain LSTM on real data
-# ======================================================================
-def retrain_lstm(data: dict[str, list[DailyReading]]):
-    log.info("=" * 60)
-    log.info("STEP 4: Retraining LSTM on real ERA5 data")
-    log.info("=" * 60)
-    from src.prediction.lstm_model import LSTMTrainer
-    # Convert ERA5 DailyReading objects into the format the LSTM trainer expects:
-    # dict of zone_id -> list of dicts with keys: temp_max_c, humidity_pct, wind_speed_ms, city
-    zone_readings = {}
-    for zone in ZONES:
-        zid = zone.zone_id
-        readings = data.get(zid, [])
-        days = []
-        for r in readings:
-            if r.temp_max_c is None:
-                continue
-            days.append({
-                "temp_max_c": r.temp_max_c,
-                "humidity_pct": r.humidity_pct if r.humidity_pct is not None else 65.0,
-                "wind_speed_ms": r.wind_speed_ms if r.wind_speed_ms is not None else 3.0,
-                "city": zone.city,
-            })
-        if len(days) > 30:
-            zone_readings[zid] = days
-            log.info("Zone %s: %d valid readings for LSTM", zid, len(days))
-        else:
-            log.warning("Zone %s: only %d valid readings, skipping LSTM", zid, len(days))
-    log.info("Training LSTM on %d zones", len(zone_readings))
-    trainer = LSTMTrainer(epochs=50, patience=5)
-    metrics = trainer.train(zone_readings)
-    print(f"\n--- LSTM Results (real ERA5 data) ---")
-    for k, v in metrics.items():
-        print(f"  {k}: {v}")
-    return metrics
-# ======================================================================
-# Step 5: Verify UHI model with real ERA5 temps
-# ======================================================================
-def verify_uhi(data: dict[str, list[DailyReading]]):
-    log.info("=" * 60)
-    log.info("STEP 5: Verifying UHI model with real ERA5 temperatures")
-    log.info("=" * 60)
-    from src.downscaling.uhi_model import UHICorrector
-    corrector = UHICorrector()
-    results = {}
-    for zone in ZONES:
-        zid = zone.zone_id
-        readings = data.get(zid, [])
-        if not readings:
-            continue
-        # Use real ERA5 temps as grid baseline
-        real_temps = [r.temp_max_c for r in readings if r.temp_max_c is not None]
-        if not real_temps:
-            continue
-        # Sample a few real temps and apply UHI correction
-        sample_indices = np.linspace(0, len(real_temps) - 1, min(20, len(real_temps)), dtype=int)
-        deltas = []
-        corrected_temps = []
-        for idx in sample_indices:
-            grid_temp = real_temps[idx]
-            corrected, delta, conf = corrector.correct_temperature(zone, grid_temp, hour=14, month=1)
-            deltas.append(delta)
-            corrected_temps.append(corrected)
-        results[zid] = {
-            "city": zone.city,
-            "settlement": zone.settlement_type,
-            "mean_grid_temp": round(sum(real_temps) / len(real_temps), 1),
-            "mean_uhi_delta": round(sum(deltas) / len(deltas), 2),
-            "mean_corrected": round(sum(corrected_temps) / len(corrected_temps), 1),
-        }
-    print(f"\n--- UHI Verification with Real ERA5 Temps ---")
-    print(f"{'Zone':<12} {'City':<16} {'Type':<12} {'Grid T':>7} {'UHI +':>7} {'Corrected':>10}")
-    print("-" * 70)
-    for zid, r in results.items():
-        print(
-            f"{zid:<12} {r['city']:<16} {r['settlement']:<12} "
-            f"{r['mean_grid_temp']:>6.1f}C {r['mean_uhi_delta']:>+6.2f}C {r['mean_corrected']:>9.1f}C"
-        )
-    return results
-# ======================================================================
-# Main
-# ======================================================================
-def main():
-    t_start = time.time()
-    # Step 1: Fetch
-    data = fetch_data()
-    # Step 2: Validate
-    data_stats = validate_data(data)
-    # Step 3: XGBoost
-    xgb_metrics = retrain_xgboost(data)
-    # Step 4: LSTM
-    lstm_metrics = retrain_lstm(data)
-    # Step 5: UHI verification
-    uhi_results = verify_uhi(data)
-    total_time = time.time() - t_start
-    print("\n" + "=" * 60)
-    print("TRAINING COMPLETE")
-    print("=" * 60)
-    total_days = sum(
-        len([r for r in data.get(z.zone_id, []) if r.temp_max_c is not None])
-        for z in ZONES
-    )
-    print(f"  Total real data points:  {total_days} zone-days across {len(ZONES)} zones")
-    print(f"  XGBoost val AUROC:       {xgb_metrics['val_auroc']:.4f}")
-    print(f"  LSTM val AUROC:          {lstm_metrics.get('val_auroc', 'N/A')}")
-    print(f"  LSTM epochs trained:     {lstm_metrics.get('epochs_trained', 'N/A')}")
-    print(f"  LSTM final val loss:     {lstm_metrics.get('val_loss', 'N/A')}")
-    print(f"  Total time:              {total_time:.1f}s")
-    print()
-if __name__ == "__main__":
-    main()

scripts/train_on_nasa_power.py DELETED Viewed

@@ -1,660 +0,0 @@
-"""
-Train all ML models on real NASA POWER daily data.
-Steps:
-1. Fetch 2 years of NASA POWER data for all 20 zones (with caching)
-2. Validate data quality (coverage, temp ranges, nulls)
-3. Retrain LSTM heat predictor on real data
-4. Retrain XGBoost heat predictor on real data
-5. Verify UHI model still works (no retraining — literature-calibrated)
-Usage:
-    python3 scripts/train_on_nasa_power.py
-"""
-import sys
-import os
-import json
-import time
-import logging
-from datetime import date, timedelta
-from pathlib import Path
-import numpy as np
-import httpx
-# Project root on sys.path
-PROJECT_ROOT = Path(__file__).resolve().parents[1]
-sys.path.insert(0, str(PROJECT_ROOT))
-from config import ZONES, ZONE_MAP
-from src.indexing.heat_index import calculate_wbgt
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s %(name)s %(levelname)s  %(message)s",
-    datefmt="%H:%M:%S",
-)
-log = logging.getLogger("train_nasa_power")
-# Paths
-CACHE_DIR = PROJECT_ROOT / "data" / "nasa_power_cache"
-MODELS_DIR = PROJECT_ROOT / "models"
-# NASA POWER config (from config.py)
-NASA_POWER_URL = "https://power.larc.nasa.gov/api/temporal/daily/point"
-NASA_POWER_PARAMS = ["T2M", "T2M_MAX", "T2M_MIN", "RH2M", "WS2M", "ALLSKY_SFC_SW_DWN"]
-NASA_MISSING = -999.0
-# Date range: 2 years ending ~yesterday (API has 2-day lag)
-END_DATE = date(2026, 3, 29)
-START_DATE = date(2024, 4, 1)
-# Expected temp ranges per city (max daily temps, deg C)
-EXPECTED_RANGES = {
-    "Nairobi":       (18, 35),
-    "Dar es Salaam": (25, 40),
-    "Kampala":       (22, 36),
-    "Kigali":        (20, 34),
-}
-# ======================================================================
-# Step 1: Fetch NASA POWER data (with caching)
-# ======================================================================
-def _safe_val(val) -> float | None:
-    """Return None for NASA's -999 missing sentinel."""
-    if val is None:
-        return None
-    try:
-        f = float(val)
-        return None if f == NASA_MISSING else round(f, 2)
-    except (ValueError, TypeError):
-        return None
-def fetch_zone_data(zone, start: date, end: date) -> list[dict]:
-    """Fetch NASA POWER data for a single zone, using cache if available.
-    Returns list of dicts with keys:
-        date, temp_mean_c, temp_max_c, temp_min_c, humidity_pct,
-        wind_speed_ms, solar_radiation
-    """
-    cache_file = CACHE_DIR / f"{zone.zone_id}.json"
-    # Check cache
-    if cache_file.exists():
-        with open(cache_file) as f:
-            cached = json.load(f)
-        # Verify cache covers our date range
-        if (cached.get("start_date") == start.isoformat()
-                and cached.get("end_date") == end.isoformat()
-                and len(cached.get("readings", [])) > 0):
-            log.info("Cache hit for zone %s (%d readings)", zone.zone_id, len(cached["readings"]))
-            return cached["readings"]
-    # Fetch from NASA POWER API
-    params = {
-        "parameters": ",".join(NASA_POWER_PARAMS),
-        "community": "AG",
-        "longitude": zone.longitude,
-        "latitude": zone.latitude,
-        "start": start.strftime("%Y%m%d"),
-        "end": end.strftime("%Y%m%d"),
-        "format": "JSON",
-    }
-    log.info("Fetching NASA POWER for zone %s (%s, %s) ...", zone.zone_id, zone.city, zone.name)
-    max_retries = 3
-    for attempt in range(max_retries):
-        try:
-            resp = httpx.get(NASA_POWER_URL, params=params, timeout=60.0)
-            if resp.status_code == 429:
-                wait = 15 * (attempt + 1)
-                log.warning("  Rate limited (429), backing off %ds ...", wait)
-                time.sleep(wait)
-                continue
-            resp.raise_for_status()
-            data = resp.json()
-            break
-        except httpx.TimeoutException:
-            wait = 10 * (attempt + 1)
-            log.warning("  Timeout (attempt %d/%d), retrying in %ds ...", attempt + 1, max_retries, wait)
-            time.sleep(wait)
-        except httpx.HTTPStatusError as exc:
-            if exc.response.status_code >= 500:
-                wait = 10 * (attempt + 1)
-                log.warning("  Server error %d (attempt %d/%d), retrying ...", exc.response.status_code, attempt + 1, max_retries)
-                time.sleep(wait)
-            else:
-                log.error("  HTTP %d: %s", exc.response.status_code, exc.response.text[:200])
-                return []
-    else:
-        log.error("  Failed after %d attempts for zone %s", max_retries, zone.zone_id)
-        return []
-    # Parse response
-    try:
-        props = data["properties"]["parameter"]
-    except (KeyError, TypeError):
-        log.error("  Unexpected response structure for zone %s", zone.zone_id)
-        return []
-    t2m_data = props.get("T2M", {})
-    t2m_max_data = props.get("T2M_MAX", {})
-    t2m_min_data = props.get("T2M_MIN", {})
-    rh2m_data = props.get("RH2M", {})
-    ws2m_data = props.get("WS2M", {})
-    solar_data = props.get("ALLSKY_SFC_SW_DWN", {})
-    all_days = sorted(t2m_data.keys())
-    readings = []
-    for day_str in all_days:
-        try:
-            formatted_date = f"{day_str[:4]}-{day_str[4:6]}-{day_str[6:8]}"
-        except (IndexError, TypeError):
-            continue
-        temp_mean = _safe_val(t2m_data.get(day_str))
-        temp_max = _safe_val(t2m_max_data.get(day_str))
-        temp_min = _safe_val(t2m_min_data.get(day_str))
-        humidity = _safe_val(rh2m_data.get(day_str))
-        wind = _safe_val(ws2m_data.get(day_str))
-        solar = _safe_val(solar_data.get(day_str))
-        # Skip days where key fields are all missing
-        if temp_max is None and temp_mean is None:
-            continue
-        readings.append({
-            "date": formatted_date,
-            "temp_mean_c": temp_mean,
-            "temp_max_c": temp_max if temp_max is not None else temp_mean,
-            "temp_min_c": temp_min,
-            "humidity_pct": humidity,
-            "wind_speed_ms": wind,
-            "solar_radiation": solar,
-        })
-    # Cache the results
-    CACHE_DIR.mkdir(parents=True, exist_ok=True)
-    cache_obj = {
-        "zone_id": zone.zone_id,
-        "city": zone.city,
-        "start_date": start.isoformat(),
-        "end_date": end.isoformat(),
-        "latitude": zone.latitude,
-        "longitude": zone.longitude,
-        "readings": readings,
-    }
-    with open(cache_file, "w") as f:
-        json.dump(cache_obj, f, indent=2)
-    log.info("  Zone %s: %d days fetched and cached", zone.zone_id, len(readings))
-    return readings
-def fetch_all_zones():
-    """Fetch NASA POWER data for all zones with rate-limiting delay."""
-    log.info("=" * 60)
-    log.info("STEP 1: Fetching NASA POWER data for %d zones", len(ZONES))
-    log.info("  Date range: %s to %s (%d days)", START_DATE, END_DATE, (END_DATE - START_DATE).days + 1)
-    log.info("=" * 60)
-    all_data: dict[str, list[dict]] = {}
-    t0 = time.time()
-    for i, zone in enumerate(ZONES):
-        readings = fetch_zone_data(zone, START_DATE, END_DATE)
-        all_data[zone.zone_id] = readings
-        # Rate limiting delay between API calls (skip for cached results)
-        if i < len(ZONES) - 1:
-            time.sleep(0.5)
-    elapsed = time.time() - t0
-    total_readings = sum(len(v) for v in all_data.values())
-    zones_with_data = sum(1 for v in all_data.values() if v)
-    log.info(
-        "Fetch complete in %.1fs: %d/%d zones with data, %d total readings",
-        elapsed, zones_with_data, len(ZONES), total_readings,
-    )
-    return all_data
-# ======================================================================
-# Step 2: Validate data quality
-# ======================================================================
-def validate_data(data: dict[str, list[dict]]):
-    log.info("=" * 60)
-    log.info("STEP 2: Validating NASA POWER data quality")
-    log.info("=" * 60)
-    issues = []
-    stats = {}
-    for zone in ZONES:
-        zid = zone.zone_id
-        readings = data.get(zid, [])
-        if not readings:
-            issues.append(f"{zid}: NO DATA")
-            stats[zid] = {"days": 0, "issue": "no data"}
-            continue
-        temps = [r["temp_max_c"] for r in readings if r["temp_max_c"] is not None]
-        humids = [r["humidity_pct"] for r in readings if r["humidity_pct"] is not None]
-        winds = [r["wind_speed_ms"] for r in readings if r["wind_speed_ms"] is not None]
-        if not temps:
-            issues.append(f"{zid}: all temps are null")
-            stats[zid] = {"days": len(readings), "issue": "all null temps"}
-            continue
-        t_min, t_max = min(temps), max(temps)
-        t_mean = sum(temps) / len(temps)
-        # Check physical reasonableness
-        exp_lo, exp_hi = EXPECTED_RANGES.get(zone.city, (15, 42))
-        if t_min < exp_lo - 5 or t_max > exp_hi + 5:
-            issues.append(
-                f"{zid} ({zone.city}): temp range [{t_min:.1f}, {t_max:.1f}] "
-                f"outside expected [{exp_lo-5}, {exp_hi+5}]"
-            )
-        null_count = sum(1 for r in readings if r["temp_max_c"] is None)
-        stats[zid] = {
-            "days": len(readings),
-            "temp_days": len(temps),
-            "temp_min": round(t_min, 1),
-            "temp_max": round(t_max, 1),
-            "temp_mean": round(t_mean, 1),
-            "humidity_mean": round(sum(humids)/len(humids), 1) if humids else None,
-            "wind_mean": round(sum(winds)/len(winds), 1) if winds else None,
-            "null_temps": null_count,
-        }
-    # Print summary
-    print("\n--- NASA POWER Data Summary ---")
-    print(f"{'Zone':<12} {'City':<16} {'Days':>5} {'Temp min':>9} {'Temp max':>9} {'Temp mean':>10} {'Humidity':>9} {'Nulls':>6}")
-    print("-" * 90)
-    for zone in ZONES:
-        s = stats.get(zone.zone_id, {})
-        days = s.get("days", 0)
-        t_lo = s.get("temp_min", "N/A")
-        t_hi = s.get("temp_max", "N/A")
-        t_mn = s.get("temp_mean", "N/A")
-        hum = s.get("humidity_mean", "N/A")
-        nulls = s.get("null_temps", "N/A")
-        if isinstance(t_lo, float):
-            print(f"{zone.zone_id:<12} {zone.city:<16} {days:>5} {t_lo:>9.1f} {t_hi:>9.1f} {t_mn:>10.1f} {hum:>9} {nulls:>6}")
-        else:
-            print(f"{zone.zone_id:<12} {zone.city:<16} {days:>5} {'N/A':>9} {'N/A':>9} {'N/A':>10} {'N/A':>9} {'N/A':>6}")
-    if issues:
-        print(f"\n  ISSUES ({len(issues)}):")
-        for issue in issues:
-            print(f"    - {issue}")
-    else:
-        print("\n  No data quality issues found.")
-    zones_with_data = sum(1 for s in stats.values() if s.get("days", 0) > 0)
-    print(f"\n  {zones_with_data}/{len(ZONES)} zones have data.\n")
-    return stats
-# ======================================================================
-# Step 3: Retrain LSTM on real NASA POWER data
-# ======================================================================
-def retrain_lstm(data: dict[str, list[dict]]):
-    log.info("=" * 60)
-    log.info("STEP 3: Retraining LSTM on real NASA POWER data")
-    log.info("=" * 60)
-    from src.prediction.lstm_model import LSTMTrainer
-    # Delete old model files
-    lstm_model_path = MODELS_DIR / "heat_lstm.pt"
-    lstm_norm_path = MODELS_DIR / "lstm_norm.json"
-    for p in [lstm_model_path, lstm_norm_path]:
-        if p.exists():
-            p.unlink()
-            log.info("Deleted old model: %s", p)
-    # Convert NASA POWER data to the format the LSTM trainer expects:
-    # dict of zone_id -> list of dicts with keys: temp_max_c, humidity_pct, wind_speed_ms, city
-    zone_readings = {}
-    for zone in ZONES:
-        zid = zone.zone_id
-        readings = data.get(zid, [])
-        days = []
-        for r in readings:
-            if r["temp_max_c"] is None:
-                continue
-            days.append({
-                "temp_max_c": r["temp_max_c"],
-                "humidity_pct": r["humidity_pct"] if r["humidity_pct"] is not None else 65.0,
-                "wind_speed_ms": r["wind_speed_ms"] if r["wind_speed_ms"] is not None else 3.0,
-                "city": zone.city,
-            })
-        if len(days) >= 22:  # Need at least seq_len(14) + forecast_horizon(7) + 1
-            zone_readings[zid] = days
-            log.info("Zone %s: %d valid readings for LSTM", zid, len(days))
-        else:
-            log.warning("Zone %s: only %d valid readings, skipping LSTM", zid, len(days))
-    log.info("Training LSTM on %d zones with real NASA POWER data", len(zone_readings))
-    trainer = LSTMTrainer(epochs=50, patience=5)
-    metrics = trainer.train(zone_readings)
-    print(f"\n--- LSTM Results (real NASA POWER data) ---")
-    for k, v in metrics.items():
-        print(f"  {k}: {v}")
-    return metrics
-# ======================================================================
-# Step 4: Retrain XGBoost heat predictor on real data
-# ======================================================================
-def retrain_xgboost(data: dict[str, list[dict]]):
-    log.info("=" * 60)
-    log.info("STEP 4: Retraining XGBoost heat predictor on real NASA POWER data")
-    log.info("=" * 60)
-    import xgboost as xgb
-    from src.prediction.lstm_model import CITY_THRESHOLDS
-    # Delete old model file
-    xgb_model_path = MODELS_DIR / "heat_predictor_xgb.json"
-    if xgb_model_path.exists():
-        xgb_model_path.unlink()
-        log.info("Deleted old XGBoost model: %s", xgb_model_path)
-    all_X = []
-    all_y = []
-    for zone in ZONES:
-        zid = zone.zone_id
-        readings = data.get(zid, [])
-        if len(readings) < 40:
-            log.warning("Zone %s has only %d readings, skipping for XGBoost training", zid, len(readings))
-            continue
-        city = zone.city
-        threshold = CITY_THRESHOLDS.get(city, 33.0)
-        # Extract time series, filtering nulls
-        temps = []
-        humidity = []
-        for r in readings:
-            t = r["temp_max_c"]
-            h = r["humidity_pct"]
-            if t is None:
-                continue
-            temps.append(t)
-            humidity.append(h if h is not None else 65.0)
-        n_days = len(temps)
-        if n_days < 40:
-            log.warning("Zone %s has only %d valid temp readings, skipping", zid, n_days)
-            continue
-        # Compute WBGT series
-        wbgt_series = [calculate_wbgt(t, h) for t, h in zip(temps, humidity)]
-        # Labels: trigger within next 7 days (2+ consecutive above threshold)
-        labels = [0] * n_days
-        for day in range(n_days - 7):
-            window = temps[day + 1:day + 8]
-            consec = 0
-            triggered = False
-            for t in window:
-                if t > threshold:
-                    consec += 1
-                    if consec >= 2:
-                        triggered = True
-                        break
-                else:
-                    consec = 0
-            labels[day] = 1 if triggered else 0
-        # Vulnerability encoding
-        vuln_map = {"high": 1.0, "moderate": 0.5, "low": 0.0}
-        zone_vuln = vuln_map.get(zone.heat_vulnerability, 0.5)
-        rng = np.random.default_rng(42)
-        # Build features (need 30-day lookback)
-        for day in range(30, n_days - 7):
-            t_window = temps[day - 30:day + 1]
-            h_window = humidity[day - 30:day + 1]
-            w_window = wbgt_series[day - 30:day + 1]
-            current_temp = t_window[-1]
-            current_wbgt = w_window[-1]
-            current_humidity = h_window[-1]
-            # Trend: slope of last 7 days
-            x7 = np.arange(7, dtype=np.float64)
-            y7 = np.array(t_window[-7:], dtype=np.float64)
-            temp_trend = float(np.polyfit(x7, y7, 1)[0])
-            # Anomaly: current vs 30-day mean
-            temp_anomaly = current_temp - float(np.mean(t_window))
-            # Soil moisture proxy
-            soil_proxy = float(np.clip(1.0 - (temp_anomaly + 2.0) / 4.0, 0.0, 1.0))
-            # Rolling error (neutral prior for training data)
-            rolling_err = rng.uniform(0.1, 0.5)
-            # Day-of-year encoding
-            doy = day % 365
-            doy_sin = np.sin(2 * np.pi * doy / 365.0)
-            doy_cos = np.cos(2 * np.pi * doy / 365.0)
-            # Random hour for variety
-            hour = rng.integers(6, 19)
-            hour_sin = np.sin(2 * np.pi * hour / 24.0)
-            hour_cos = np.cos(2 * np.pi * hour / 24.0)
-            row = [
-                current_temp,
-                current_wbgt,
-                current_humidity,
-                temp_trend,
-                temp_anomaly,
-                soil_proxy,
-                rolling_err,
-                doy_sin,
-                doy_cos,
-                hour_sin,
-                hour_cos,
-                zone_vuln,
-            ]
-            all_X.append(row)
-            all_y.append(labels[day])
-    X = np.array(all_X, dtype=np.float32)
-    y = np.array(all_y, dtype=np.int32)
-    pos_rate = y.sum() / len(y) if len(y) > 0 else 0
-    log.info(
-        "XGBoost training data: %d samples, %.1f%% positive rate",
-        len(X), pos_rate * 100,
-    )
-    model = xgb.XGBClassifier(
-        n_estimators=150,
-        max_depth=5,
-        learning_rate=0.1,
-        eval_metric="logloss",
-        random_state=42,
-    )
-    # Train/validation split (temporal: first 75% train, last 25% val)
-    split = int(len(X) * 0.75)
-    X_train, X_val = X[:split], X[split:]
-    y_train, y_val = y[:split], y[split:]
-    model.fit(
-        X_train, y_train,
-        eval_set=[(X_val, y_val)],
-        verbose=False,
-    )
-    # Evaluate on validation set
-    from sklearn.metrics import roc_auc_score, precision_score, recall_score
-    val_probs = model.predict_proba(X_val)[:, 1]
-    val_preds = (val_probs > 0.5).astype(int)
-    if len(set(y_val)) > 1:
-        auroc = roc_auc_score(y_val, val_probs)
-        precision = precision_score(y_val, val_preds, zero_division=0)
-        recall = recall_score(y_val, val_preds, zero_division=0)
-    else:
-        auroc, precision, recall = 0.5, 0.0, 0.0
-    print(f"\n--- XGBoost Results (real NASA POWER data) ---")
-    print(f"  Training samples:   {len(X_train)}")
-    print(f"  Validation samples: {len(X_val)}")
-    print(f"  Positive rate:      {pos_rate:.1%}")
-    print(f"  Val AUROC:          {auroc:.4f}")
-    print(f"  Val Precision:      {precision:.4f}")
-    print(f"  Val Recall:         {recall:.4f}")
-    # Save model
-    MODELS_DIR.mkdir(parents=True, exist_ok=True)
-    model.save_model(str(xgb_model_path))
-    log.info("XGBoost model saved to %s", xgb_model_path)
-    return {
-        "train_samples": len(X_train),
-        "val_samples": len(X_val),
-        "positive_rate": round(pos_rate, 4),
-        "val_auroc": round(auroc, 4),
-        "val_precision": round(precision, 4),
-        "val_recall": round(recall, 4),
-    }
-# ======================================================================
-# Step 5: Verify UHI model (no retraining)
-# ======================================================================
-def verify_uhi(data: dict[str, list[dict]]):
-    log.info("=" * 60)
-    log.info("STEP 5: Verifying UHI model with real NASA POWER temperatures")
-    log.info("  (UHI model keeps literature-calibrated synthetic training)")
-    log.info("=" * 60)
-    from src.downscaling.uhi_model import UHICorrector
-    corrector = UHICorrector()
-    results = {}
-    for zone in ZONES:
-        zid = zone.zone_id
-        readings = data.get(zid, [])
-        if not readings:
-            continue
-        real_temps = [r["temp_max_c"] for r in readings if r["temp_max_c"] is not None]
-        if not real_temps:
-            continue
-        # Sample some real temps and apply UHI correction
-        sample_indices = np.linspace(0, len(real_temps) - 1, min(20, len(real_temps)), dtype=int)
-        deltas = []
-        corrected_temps = []
-        for idx in sample_indices:
-            grid_temp = real_temps[idx]
-            corrected, delta, conf = corrector.correct_temperature(zone, grid_temp, hour=14, month=1)
-            deltas.append(delta)
-            corrected_temps.append(corrected)
-        results[zid] = {
-            "city": zone.city,
-            "settlement": zone.settlement_type,
-            "mean_grid_temp": round(sum(real_temps) / len(real_temps), 1),
-            "mean_uhi_delta": round(sum(deltas) / len(deltas), 2),
-            "mean_corrected": round(sum(corrected_temps) / len(corrected_temps), 1),
-        }
-    print(f"\n--- UHI Verification (literature-calibrated model + real NASA POWER temps) ---")
-    print(f"{'Zone':<12} {'City':<16} {'Type':<12} {'Grid T':>7} {'UHI +':>7} {'Corrected':>10}")
-    print("-" * 70)
-    for zid, r in results.items():
-        print(
-            f"{zid:<12} {r['city']:<16} {r['settlement']:<12} "
-            f"{r['mean_grid_temp']:>6.1f}C {r['mean_uhi_delta']:>+6.2f}C {r['mean_corrected']:>9.1f}C"
-        )
-    return results
-# ======================================================================
-# Main
-# ======================================================================
-def main():
-    t_start = time.time()
-    # Step 1: Fetch
-    data = fetch_all_zones()
-    # Step 2: Validate
-    data_stats = validate_data(data)
-    # Step 3: LSTM
-    lstm_metrics = retrain_lstm(data)
-    # Step 4: XGBoost
-    xgb_metrics = retrain_xgboost(data)
-    # Step 5: UHI verification
-    uhi_results = verify_uhi(data)
-    total_time = time.time() - t_start
-    print("\n" + "=" * 60)
-    print("TRAINING COMPLETE (NASA POWER real data)")
-    print("=" * 60)
-    total_days = sum(
-        len([r for r in data.get(z.zone_id, []) if r["temp_max_c"] is not None])
-        for z in ZONES
-    )
-    print(f"  Data source:             NASA POWER daily")
-    print(f"  Date range:              {START_DATE} to {END_DATE}")
-    print(f"  Total real data points:  {total_days} zone-days across {len(ZONES)} zones")
-    print(f"  Avg days per zone:       {total_days / len(ZONES):.0f}")
-    print(f"  LSTM val AUROC:          {lstm_metrics.get('val_auroc', 'N/A')}")
-    print(f"  LSTM epochs trained:     {lstm_metrics.get('epochs_trained', 'N/A')}")
-    print(f"  LSTM val loss:           {lstm_metrics.get('val_loss', 'N/A')}")
-    print(f"  XGBoost val AUROC:       {xgb_metrics['val_auroc']:.4f}")
-    print(f"  XGBoost val Precision:   {xgb_metrics['val_precision']:.4f}")
-    print(f"  XGBoost val Recall:      {xgb_metrics['val_recall']:.4f}")
-    print(f"  Total time:              {total_time:.1f}s")
-    print()
-if __name__ == "__main__":
-    main()

src/notification/__init__.py DELETED Viewed

File without changes

src/notification/sender.py DELETED Viewed

@@ -1,318 +0,0 @@
-"""
-Notification delivery module.
-Sends trigger explanations to policyholders via console (demo),
-SMS (Twilio), or WhatsApp (Twilio).  All senders implement a common
-async interface and return a DeliveryResult.
-"""
-from __future__ import annotations
-import asyncio
-import logging
-import os
-import time
-from abc import ABC, abstractmethod
-from dataclasses import dataclass, field
-from datetime import datetime, timezone
-from typing import Optional, Sequence
-log = logging.getLogger(__name__)
-# ── Data containers ──────────────────────────────────────────────────────
-@dataclass
-class DeliveryResult:
-    """Outcome of a single notification delivery attempt."""
-    status: str  # "sent", "failed", "dry_run"
-    channel: str  # "console", "sms", "whatsapp"
-    recipient: str
-    message_preview: str  # first 120 chars
-    timestamp: str = field(
-        default_factory=lambda: datetime.now(timezone.utc).isoformat()
-    )
-    cost_estimate: float = 0.0  # estimated cost in USD
-    error: str = ""
-    message_sid: str = ""  # Twilio message SID if applicable
-# ── Base sender ──────────────────────────────────────────────────────────
-class BaseSender(ABC):
-    """Common interface for all notification channels."""
-    @abstractmethod
-    async def send(
-        self, recipient: str, message: str, channel: str = ""
-    ) -> DeliveryResult:
-        """Send a message to a single recipient."""
-        ...
-    async def send_batch(
-        self,
-        recipients: Sequence[str],
-        message: str,
-        channel: str = "",
-        rate_limit: float = 1.0,
-    ) -> list[DeliveryResult]:
-        """
-        Send the same message to multiple recipients with rate limiting.
-        Args:
-            recipients: Phone numbers or identifiers.
-            message: The notification text.
-            channel: Override channel name.
-            rate_limit: Minimum seconds between sends (Twilio default: 1/sec).
-        """
-        results: list[DeliveryResult] = []
-        for i, recipient in enumerate(recipients):
-            result = await self.send(recipient, message, channel)
-            results.append(result)
-            # Rate limiting between sends (skip after last)
-            if i < len(recipients) - 1 and rate_limit > 0:
-                await asyncio.sleep(rate_limit)
-        return results
-# ── Console sender (demo mode) ───────────────────────────────────────────
-class ConsoleSender(BaseSender):
-    """Prints notifications to stdout.  Default for demo and testing."""
-    async def send(
-        self, recipient: str, message: str, channel: str = "console"
-    ) -> DeliveryResult:
-        preview = message[:120] + ("..." if len(message) > 120 else "")
-        log.info("[CONSOLE] To: %s | %s", recipient, preview)
-        print(f"\n{'='*60}")
-        print(f"  NOTIFICATION — {channel or 'console'}")
-        print(f"  To: {recipient}")
-        print(f"{'='*60}")
-        print(f"  {message}")
-        print(f"{'='*60}\n")
-        return DeliveryResult(
-            status="dry_run",
-            channel="console",
-            recipient=recipient,
-            message_preview=preview,
-            cost_estimate=0.0,
-        )
-# ── Twilio SMS sender ───────────────────────────────────────────────────
-class TwilioSender(BaseSender):
-    """
-    Sends SMS via Twilio.
-    Requires environment variables:
-        TWILIO_ACCOUNT_SID
-        TWILIO_AUTH_TOKEN
-        TWILIO_FROM_NUMBER  (E.164 format, e.g. +15551234567)
-    """
-    def __init__(
-        self,
-        account_sid: Optional[str] = None,
-        auth_token: Optional[str] = None,
-        from_number: Optional[str] = None,
-    ):
-        self.account_sid = account_sid or os.environ.get("TWILIO_ACCOUNT_SID", "")
-        self.auth_token = auth_token or os.environ.get("TWILIO_AUTH_TOKEN", "")
-        self.from_number = from_number or os.environ.get("TWILIO_FROM_NUMBER", "")
-        self._client = None
-    def _get_client(self):
-        """Lazy-init Twilio client."""
-        if self._client is None:
-            if not all([self.account_sid, self.auth_token]):
-                raise RuntimeError(
-                    "Twilio credentials not configured. Set TWILIO_ACCOUNT_SID "
-                    "and TWILIO_AUTH_TOKEN environment variables."
-                )
-            from twilio.rest import Client
-            self._client = Client(self.account_sid, self.auth_token)
-        return self._client
-    async def send(
-        self, recipient: str, message: str, channel: str = "sms"
-    ) -> DeliveryResult:
-        preview = message[:120] + ("..." if len(message) > 120 else "")
-        # Truncate SMS to 1600 chars (Twilio limit for long SMS)
-        sms_body = message[:1600]
-        try:
-            client = self._get_client()
-            # Twilio client is synchronous — run in executor
-            loop = asyncio.get_event_loop()
-            twilio_msg = await loop.run_in_executor(
-                None,
-                lambda: client.messages.create(
-                    body=sms_body,
-                    from_=self.from_number,
-                    to=recipient,
-                ),
-            )
-            log.info(
-                "[SMS] Sent to %s | SID: %s | Status: %s",
-                recipient, twilio_msg.sid, twilio_msg.status,
-            )
-            return DeliveryResult(
-                status="sent",
-                channel="sms",
-                recipient=recipient,
-                message_preview=preview,
-                cost_estimate=_estimate_sms_cost(message),
-                message_sid=twilio_msg.sid,
-            )
-        except Exception as exc:
-            log.error("[SMS] Failed to send to %s: %s", recipient, exc)
-            return DeliveryResult(
-                status="failed",
-                channel="sms",
-                recipient=recipient,
-                message_preview=preview,
-                error=str(exc),
-            )
-# ── WhatsApp sender ──────────────────────────────────────────────────────
-class WhatsAppSender(BaseSender):
-    """
-    Sends WhatsApp messages via Twilio WhatsApp Business API.
-    Uses the same Twilio credentials as SMS but prefixes the from number
-    with 'whatsapp:'.
-    """
-    def __init__(
-        self,
-        account_sid: Optional[str] = None,
-        auth_token: Optional[str] = None,
-        from_number: Optional[str] = None,
-    ):
-        self.account_sid = account_sid or os.environ.get("TWILIO_ACCOUNT_SID", "")
-        self.auth_token = auth_token or os.environ.get("TWILIO_AUTH_TOKEN", "")
-        self.from_number = from_number or os.environ.get("TWILIO_FROM_NUMBER", "")
-        self._client = None
-    def _get_client(self):
-        """Lazy-init Twilio client."""
-        if self._client is None:
-            if not all([self.account_sid, self.auth_token]):
-                raise RuntimeError(
-                    "Twilio credentials not configured. Set TWILIO_ACCOUNT_SID "
-                    "and TWILIO_AUTH_TOKEN environment variables."
-                )
-            from twilio.rest import Client
-            self._client = Client(self.account_sid, self.auth_token)
-        return self._client
-    async def send(
-        self, recipient: str, message: str, channel: str = "whatsapp"
-    ) -> DeliveryResult:
-        preview = message[:120] + ("..." if len(message) > 120 else "")
-        # WhatsApp supports longer messages (up to 4096 chars)
-        wa_body = message[:4096]
-        # Ensure whatsapp: prefix on both numbers
-        wa_from = (
-            f"whatsapp:{self.from_number}"
-            if not self.from_number.startswith("whatsapp:")
-            else self.from_number
-        )
-        wa_to = (
-            f"whatsapp:{recipient}"
-            if not recipient.startswith("whatsapp:")
-            else recipient
-        )
-        try:
-            client = self._get_client()
-            loop = asyncio.get_event_loop()
-            twilio_msg = await loop.run_in_executor(
-                None,
-                lambda: client.messages.create(
-                    body=wa_body,
-                    from_=wa_from,
-                    to=wa_to,
-                ),
-            )
-            log.info(
-                "[WhatsApp] Sent to %s | SID: %s | Status: %s",
-                recipient, twilio_msg.sid, twilio_msg.status,
-            )
-            return DeliveryResult(
-                status="sent",
-                channel="whatsapp",
-                recipient=recipient,
-                message_preview=preview,
-                cost_estimate=_estimate_whatsapp_cost(),
-                message_sid=twilio_msg.sid,
-            )
-        except Exception as exc:
-            log.error("[WhatsApp] Failed to send to %s: %s", recipient, exc)
-            return DeliveryResult(
-                status="failed",
-                channel="whatsapp",
-                recipient=recipient,
-                message_preview=preview,
-                error=str(exc),
-            )
-# ── Cost estimation ──────────────────────────────────────────────────────
-def _estimate_sms_cost(message: str) -> float:
-    """Estimate SMS cost in USD.  Twilio Kenya rate ~ $0.0475/segment."""
-    # SMS segments: 160 chars for GSM-7, 70 for UCS-2 (Unicode)
-    has_unicode = any(ord(c) > 127 for c in message)
-    segment_size = 70 if has_unicode else 160
-    segments = max(1, (len(message) + segment_size - 1) // segment_size)
-    return round(segments * 0.0475, 4)
-def _estimate_whatsapp_cost() -> float:
-    """Estimate WhatsApp cost in USD.  Twilio WhatsApp ~ $0.005/msg + template fees."""
-    return 0.005
-# ── Sender factory ───────────────────────────────────────────────────────
-def create_sender(channel: str = "console") -> BaseSender:
-    """
-    Factory to create the appropriate sender.
-    Args:
-        channel: One of "console", "sms", "whatsapp".
-    """
-    if channel == "sms":
-        return TwilioSender()
-    elif channel == "whatsapp":
-        return WhatsAppSender()
-    else:
-        return ConsoleSender()
-async def send_zone_notifications(
-    recipients: Sequence[str],
-    message: str,
-    channel: str = "console",
-    rate_limit: float = 1.0,
-) -> list[DeliveryResult]:
-    """
-    Convenience function: send the same notification to all recipients in a zone.
-    Args:
-        recipients: List of phone numbers.
-        message: Notification text.
-        channel: "console", "sms", or "whatsapp".
-        rate_limit: Seconds between sends for Twilio rate limiting.
-    """
-    sender = create_sender(channel)
-    return await sender.send_batch(recipients, message, channel, rate_limit)

src/prediction/heat_forecast.py DELETED Viewed

@@ -1,557 +0,0 @@
-"""
-Heat wave prediction model for parametric insurance triggers.
-XGBoost classifier that predicts the probability of a heat trigger
-event (2+ consecutive days above city-adjusted threshold) occurring
-within the next 7 days, given recent climate features.
-Degrades gracefully:
-  full_model -> persistence -> climatology
-References:
-  - Perkins-Kirkpatrick & Lewis (2020) heat wave definitions
-  - WHO/ILO occupational heat stress thresholds
-"""
-from __future__ import annotations
-from collections import deque
-from pathlib import Path
-import numpy as np
-try:
-    import xgboost as xgb
-except ImportError:
-    xgb = None
-# Import shared constants from lstm_model (defined there to avoid circular imports)
-from src.prediction.lstm_model import CITY_THRESHOLDS, CITY_CLIMATE
-try:
-    from src.prediction.lstm_model import LSTMPredictor
-    _LSTM_AVAILABLE = True
-except Exception:
-    _LSTM_AVAILABLE = False
-FEATURE_NAMES = [
-    "current_temp",
-    "current_wbgt",
-    "current_humidity",
-    "temp_trend_7d",
-    "temp_anomaly_30d",
-    "soil_moisture_proxy",
-    "rolling_error",
-    "doy_sin",
-    "doy_cos",
-    "hour_sin",
-    "hour_cos",
-    "zone_vulnerability",
-]
-def _resolve_model_path(model_path: str) -> Path:
-    p = Path(model_path)
-    if not p.is_absolute():
-        p = Path(__file__).resolve().parents[2] / model_path
-    return p
-from src.indexing.heat_index import calculate_wbgt as _simple_wbgt
-class HeatWavePredictor:
-    """XGBoost model: recent climate features -> trigger probability in 7 days."""
-    FEATURE_NAMES = FEATURE_NAMES
-    def __init__(self, model_path: str = "models/heat_predictor_xgb.json"):
-        if xgb is None:
-            raise ImportError(
-                "xgboost is required. Install with: pip install 'xgboost>=2.0.0'"
-            )
-        self.model_path = _resolve_model_path(model_path)
-        self.model: xgb.XGBClassifier | None = None
-        self._rolling_errors: deque = deque(maxlen=3)
-        self._load_or_train()
-        # Try loading LSTM for ensemble; auto-train on synthetic data if missing
-        self._lstm: object | None = None
-        if _LSTM_AVAILABLE:
-            try:
-                self._lstm = LSTMPredictor()
-            except FileNotFoundError:
-                self._train_lstm_synthetic()
-            except Exception:
-                self._lstm = None
-    # ------------------------------------------------------------------
-    # Public API
-    # ------------------------------------------------------------------
-    def predict(
-        self,
-        zone,
-        recent_temps: list[float],
-        recent_humidity: list[float],
-        recent_wbgt: list[float],
-        hour: int = 12,
-    ) -> tuple[float, float, str]:
-        """Predict probability of heat trigger within 7 days.
-        Args:
-            zone: UrbanZone instance.
-            recent_temps: Last 30 daily max temperatures (most recent last).
-            recent_humidity: Last 30 daily humidity values.
-            recent_wbgt: Last 30 daily WBGT values.
-            hour: Current hour for diurnal encoding.
-        Returns:
-            (probability, confidence, model_tier)
-            model_tier is one of: "ensemble", "full_model", "lstm_only",
-                                  "persistence", "climatology"
-        """
-        xgb_prob, xgb_conf, xgb_ok = None, None, False
-        lstm_prob, lstm_conf, lstm_ok = None, None, False
-        # -- XGBoost prediction --
-        try:
-            features = self._build_features(
-                zone, recent_temps, recent_humidity, recent_wbgt, hour
-            )
-            xgb_prob = float(self.model.predict_proba(features)[0, 1])
-            xgb_conf = self._estimate_confidence(recent_temps, "full_model")
-            xgb_ok = True
-        except Exception:
-            pass
-        # -- LSTM prediction --
-        if self._lstm is not None:
-            try:
-                lstm_days = self._build_lstm_days(
-                    recent_temps, recent_humidity, recent_wbgt
-                )
-                lstm_prob, lstm_conf = self._lstm.predict(lstm_days)
-                lstm_ok = True
-            except Exception:
-                pass
-        # -- Ensemble --
-        if xgb_ok and lstm_ok:
-            prob = 0.5 * xgb_prob + 0.5 * lstm_prob
-            confidence = (xgb_conf + lstm_conf) / 2.0
-            return round(prob, 4), round(confidence, 3), "ensemble"
-        if xgb_ok:
-            return round(xgb_prob, 4), round(xgb_conf, 3), "full_model"
-        if lstm_ok:
-            return round(lstm_prob, 4), round(lstm_conf, 3), "lstm_only"
-        # Persistence fallback: if recent conditions are above threshold,
-        # assume they continue
-        try:
-            threshold = CITY_THRESHOLDS.get(zone.city, 33.0)
-            if len(recent_temps) >= 2:
-                above = sum(1 for t in recent_temps[-3:] if t > threshold)
-                prob = min(0.95, above / 3.0)
-            else:
-                prob = 0.5
-            confidence = self._estimate_confidence(recent_temps, "persistence")
-            return round(prob, 4), round(confidence, 3), "persistence"
-        except Exception:
-            pass
-        # Climatology fallback: use seasonal base rate
-        from config import HOT_SEASONS
-        import datetime
-        doy = datetime.datetime.now().timetuple().tm_yday
-        month = datetime.datetime.now().month
-        city = getattr(zone, "city", "Nairobi")
-        hot_months = []
-        for season_months in HOT_SEASONS.get(city, {}).values():
-            hot_months.extend(season_months)
-        prob = 0.35 if month in hot_months else 0.10
-        confidence = 0.30
-        return round(prob, 4), round(confidence, 3), "climatology"
-    @staticmethod
-    def _build_lstm_days(
-        recent_temps: list[float],
-        recent_humidity: list[float],
-        recent_wbgt: list[float],
-    ) -> list[dict]:
-        """Convert raw arrays into the list-of-dicts format the LSTM expects.
-        The LSTM predictor computes WBGT, heat index, and temp anomaly
-        internally, so we only need to pass the raw observations.
-        """
-        n = min(len(recent_temps), len(recent_humidity), len(recent_wbgt))
-        days = []
-        for i in range(n):
-            days.append({
-                "temp_max_c": recent_temps[i],
-                "humidity_pct": recent_humidity[i],
-                "wind_speed_ms": 3.0,
-            })
-        return days
-    def update_rolling_error(self, predicted_prob: float, actual: bool) -> None:
-        """Track prediction accuracy for the rolling_error feature."""
-        error = abs(predicted_prob - (1.0 if actual else 0.0))
-        self._rolling_errors.append(error)
-    # ------------------------------------------------------------------
-    # Feature engineering
-    # ------------------------------------------------------------------
-    def _build_features(
-        self,
-        zone,
-        recent_temps: list[float],
-        recent_humidity: list[float],
-        recent_wbgt: list[float],
-        hour: int = 12,
-    ) -> np.ndarray:
-        """Build the 12-feature vector.
-        Features:
-            0:  current_temp — most recent daily max temp
-            1:  current_wbgt — most recent WBGT
-            2:  current_humidity — most recent humidity
-            3:  temp_trend_7d — linear slope over last 7 days
-            4:  temp_anomaly_30d — current temp minus 30-day mean
-            5:  soil_moisture_proxy — inverse of recent rainfall proxy
-                (approximated as negative temp anomaly clamped to [0,1])
-            6:  rolling_error — mean of last 3 prediction errors
-            7-8: doy_sin, doy_cos — seasonal encoding
-            9-10: hour_sin, hour_cos — diurnal encoding
-            11: zone_vulnerability — numeric heat vulnerability
-        """
-        import datetime
-        temps = list(recent_temps)
-        humid = list(recent_humidity)
-        wbgts = list(recent_wbgt)
-        current_temp = temps[-1] if temps else 30.0
-        current_wbgt = wbgts[-1] if wbgts else 28.0
-        current_humidity = humid[-1] if humid else 65.0
-        # Trend: slope of last 7 days
-        if len(temps) >= 7:
-            x = np.arange(7, dtype=np.float64)
-            y = np.array(temps[-7:], dtype=np.float64)
-            temp_trend = float(np.polyfit(x, y, 1)[0])
-        else:
-            temp_trend = 0.0
-        # Anomaly: current vs 30-day mean
-        if len(temps) >= 2:
-            temp_anomaly = current_temp - np.mean(temps)
-        else:
-            temp_anomaly = 0.0
-        # Soil moisture proxy: when temps are well below average,
-        # likely recent rain -> higher moisture. Clamp to [0, 1].
-        soil_proxy = float(np.clip(1.0 - (temp_anomaly + 2.0) / 4.0, 0.0, 1.0))
-        # Rolling prediction error
-        if self._rolling_errors:
-            rolling_err = float(np.mean(self._rolling_errors))
-        else:
-            rolling_err = 0.3  # neutral prior
-        # Day of year encoding
-        doy = datetime.datetime.now().timetuple().tm_yday
-        doy_sin = np.sin(2 * np.pi * doy / 365.0)
-        doy_cos = np.cos(2 * np.pi * doy / 365.0)
-        # Hour encoding
-        hour_sin = np.sin(2 * np.pi * hour / 24.0)
-        hour_cos = np.cos(2 * np.pi * hour / 24.0)
-        # Vulnerability
-        vuln_map = {"high": 1.0, "moderate": 0.5, "low": 0.0}
-        zone_vuln = vuln_map.get(
-            getattr(zone, "heat_vulnerability", "moderate"), 0.5
-        )
-        features = np.array(
-            [
-                current_temp,
-                current_wbgt,
-                current_humidity,
-                temp_trend,
-                temp_anomaly,
-                soil_proxy,
-                rolling_err,
-                doy_sin,
-                doy_cos,
-                hour_sin,
-                hour_cos,
-                zone_vuln,
-            ],
-            dtype=np.float32,
-        ).reshape(1, -1)
-        return features
-    # ------------------------------------------------------------------
-    # Confidence estimation
-    # ------------------------------------------------------------------
-    @staticmethod
-    def _estimate_confidence(recent_temps: list[float], tier: str) -> float:
-        """Heuristic confidence based on data quality and model tier."""
-        base = {"full_model": 0.80, "persistence": 0.45, "climatology": 0.30}
-        conf = base.get(tier, 0.30)
-        # More data -> higher confidence
-        n = len(recent_temps)
-        if n >= 30:
-            conf += 0.10
-        elif n >= 14:
-            conf += 0.05
-        # Low variance in recent data -> more predictable
-        if n >= 7:
-            std = float(np.std(recent_temps[-7:]))
-            if std < 2.0:
-                conf += 0.05
-        return min(conf, 0.95)
-    # ------------------------------------------------------------------
-    # Training
-    # ------------------------------------------------------------------
-    def train(self, seed: int = 42) -> None:
-        """Generate 2 years of synthetic daily data per zone and train.
-        For each zone, generates 730 days of realistic temperature,
-        humidity, and WBGT curves with autocorrelation. Labels each
-        day with whether a trigger event (2+ consecutive days above
-        threshold) occurs within the next 7 days.
-        """
-        rng = np.random.default_rng(seed)
-        from config import ZONES
-        n_days = 730
-        all_X = []
-        all_y = []
-        for zone in ZONES:
-            city = zone.city
-            climate = CITY_CLIMATE.get(city, CITY_CLIMATE["Nairobi"])
-            # Generate daily temperatures with autocorrelation
-            temps = self._generate_temp_series(climate, n_days, rng)
-            humidity = self._generate_humidity_series(climate, n_days, rng)
-            # Compute WBGT series
-            wbgt_series = [
-                _simple_wbgt(t, h) for t, h in zip(temps, humidity)
-            ]
-            # Label: trigger within next 7 days?
-            threshold = CITY_THRESHOLDS.get(city, 33.0)
-            labels = self._label_triggers(temps, threshold, n_days)
-            # Build features for each day (need 30-day lookback)
-            vuln_map = {"high": 1.0, "moderate": 0.5, "low": 0.0}
-            zone_vuln = vuln_map.get(zone.heat_vulnerability, 0.5)
-            for day in range(30, n_days - 7):
-                t_window = temps[day - 30 : day + 1]
-                h_window = humidity[day - 30 : day + 1]
-                w_window = wbgt_series[day - 30 : day + 1]
-                current_temp = t_window[-1]
-                current_wbgt = w_window[-1]
-                current_humidity = h_window[-1]
-                # Trend
-                x7 = np.arange(7, dtype=np.float64)
-                y7 = np.array(t_window[-7:], dtype=np.float64)
-                temp_trend = float(np.polyfit(x7, y7, 1)[0])
-                # Anomaly
-                temp_anomaly = current_temp - float(np.mean(t_window))
-                # Soil moisture proxy
-                soil_proxy = float(
-                    np.clip(1.0 - (temp_anomaly + 2.0) / 4.0, 0.0, 1.0)
-                )
-                # Synthetic rolling error
-                rolling_err = rng.uniform(0.1, 0.5)
-                # Day-of-year encoding (day within 365-day cycle)
-                doy = day % 365
-                doy_sin = np.sin(2 * np.pi * doy / 365.0)
-                doy_cos = np.cos(2 * np.pi * doy / 365.0)
-                # Random hour for variety
-                hour = rng.integers(6, 19)
-                hour_sin = np.sin(2 * np.pi * hour / 24.0)
-                hour_cos = np.cos(2 * np.pi * hour / 24.0)
-                row = [
-                    current_temp,
-                    current_wbgt,
-                    current_humidity,
-                    temp_trend,
-                    temp_anomaly,
-                    soil_proxy,
-                    rolling_err,
-                    doy_sin,
-                    doy_cos,
-                    hour_sin,
-                    hour_cos,
-                    zone_vuln,
-                ]
-                all_X.append(row)
-                all_y.append(labels[day])
-        X = np.array(all_X, dtype=np.float32)
-        y = np.array(all_y, dtype=np.int32)
-        self.model = xgb.XGBClassifier(
-            n_estimators=150,
-            max_depth=5,
-            learning_rate=0.1,
-            eval_metric="logloss",
-            random_state=seed,
-        )
-        self.model.fit(X, y)
-        self._save_model()
-    # ------------------------------------------------------------------
-    # Synthetic data generation helpers
-    # ------------------------------------------------------------------
-    @staticmethod
-    def _generate_temp_series(
-        climate: dict, n_days: int, rng
-    ) -> list[float]:
-        """Generate realistic daily max temperatures with autocorrelation.
-        Uses seasonal cosine curve + AR(1) autocorrelated noise.
-        """
-        mean = climate["temp_mean"]
-        amp = climate["temp_amp"]
-        phase = climate["phase_doy"]
-        lo, hi = climate["temp_range"]
-        temps = []
-        noise = 0.0
-        ar_coef = 0.7  # autocorrelation coefficient
-        for d in range(n_days):
-            # Seasonal component
-            seasonal = mean + amp * np.cos(
-                2 * np.pi * (d - phase) / 365.0
-            )
-            # AR(1) noise
-            noise = ar_coef * noise + rng.normal(0, 1.2)
-            temp = seasonal + noise
-            temp = float(np.clip(temp, lo - 1.0, hi + 2.0))
-            temps.append(temp)
-        return temps
-    @staticmethod
-    def _generate_humidity_series(
-        climate: dict, n_days: int, rng
-    ) -> list[float]:
-        """Generate daily humidity with seasonal cycle and noise."""
-        mean = climate["humidity_mean"]
-        amp = climate["humidity_amp"]
-        phase = climate.get("phase_doy", 45)
-        humidity = []
-        noise = 0.0
-        for d in range(n_days):
-            # Humidity anti-correlated with temp in dry season
-            seasonal = mean - amp * np.cos(
-                2 * np.pi * (d - phase) / 365.0
-            )
-            noise = 0.5 * noise + rng.normal(0, 4.0)
-            h = seasonal + noise
-            h = float(np.clip(h, 30.0, 98.0))
-            humidity.append(h)
-        return humidity
-    @staticmethod
-    def _label_triggers(
-        temps: list[float], threshold: float, n_days: int
-    ) -> list[int]:
-        """Label each day: 1 if a 2+ consecutive day trigger occurs in next 7 days."""
-        labels = [0] * n_days
-        for day in range(n_days - 7):
-            window = temps[day + 1 : day + 8]
-            # Check for 2+ consecutive above threshold
-            consec = 0
-            triggered = False
-            for t in window:
-                if t > threshold:
-                    consec += 1
-                    if consec >= 2:
-                        triggered = True
-                        break
-                else:
-                    consec = 0
-            labels[day] = 1 if triggered else 0
-        return labels
-    # ------------------------------------------------------------------
-    # Persistence
-    # ------------------------------------------------------------------
-    def _save_model(self) -> None:
-        self.model_path.parent.mkdir(parents=True, exist_ok=True)
-        self.model.save_model(str(self.model_path))
-    def _load_or_train(self) -> None:
-        if self.model_path.exists():
-            self.model = xgb.XGBClassifier()
-            self.model.load_model(str(self.model_path))
-        else:
-            self.train()
-    def _train_lstm_synthetic(self) -> None:
-        """Auto-train LSTM on synthetic data when no model file exists."""
-        try:
-            from src.prediction.lstm_model import (
-                LSTMTrainer,
-                generate_synthetic_zone_data,
-            )
-            from config import ZONES
-            import logging
-            logger = logging.getLogger(__name__)
-            logger.info("LSTM model not found -- training on synthetic data")
-            zone_data = generate_synthetic_zone_data(ZONES, n_days=730, seed=42)
-            trainer = LSTMTrainer(epochs=50, patience=5)
-            trainer.train(zone_data)
-            # Reload the predictor now that the model file exists
-            self._lstm = LSTMPredictor()
-            logger.info("LSTM auto-trained and loaded successfully")
-        except Exception as exc:
-            import logging
-            logging.getLogger(__name__).warning(
-                "LSTM auto-training failed: %s", exc
-            )
-            self._lstm = None

src/prediction/lstm_model.py DELETED Viewed

@@ -1,566 +0,0 @@
-"""
-LSTM neural heat wave predictor for parametric insurance triggers.
-2-layer LSTM that learns temporal patterns in 14-day climate sequences
-to predict heat wave trigger probability in the next 7 days.
-Ensembled with the existing XGBoost classifier in heat_forecast.py.
-Architecture:
-    Input:  (batch, 14, 6) -- 14 days x 6 climate features
-    LSTM:   2 layers, hidden_size=64, dropout=0.2
-    Output: scalar sigmoid probability
-Features per timestep:
-    0: temp_max_c       -- daily max temperature (normalized)
-    1: humidity_pct     -- relative humidity (normalized)
-    2: wind_speed_ms    -- wind speed (normalized)
-    3: wbgt_c           -- wet-bulb globe temperature (normalized)
-    4: heat_index_c     -- apparent temperature (normalized)
-    5: temp_anomaly     -- temp minus 7-day rolling mean (normalized)
-References:
-    - Perkins-Kirkpatrick & Lewis (2020) heat wave definitions
-    - WHO/ILO occupational heat stress thresholds
-"""
-from __future__ import annotations
-import json
-from pathlib import Path
-import numpy as np
-try:
-    import torch
-    import torch.nn as nn
-    from torch.utils.data import DataLoader, TensorDataset
-    TORCH_AVAILABLE = True
-except ImportError:
-    TORCH_AVAILABLE = False
-from src.indexing.heat_index import calculate_wbgt, calculate_heat_index
-# City-specific temperature thresholds for trigger definition (deg C)
-CITY_THRESHOLDS = {
-    "Dar es Salaam": 34.0,
-    "Kampala": 31.0,
-    "Nairobi": 28.0,
-    "Kigali": 29.0,
-}
-# Seasonal temperature / humidity profiles per city
-CITY_CLIMATE = {
-    "Dar es Salaam": {
-        "temp_mean": 31.0, "temp_amp": 3.5, "phase_doy": 45,
-        "humidity_mean": 82.0, "humidity_amp": 7.0,
-        "temp_range": (28.0, 36.0),
-    },
-    "Kampala": {
-        "temp_mean": 28.0, "temp_amp": 3.0, "phase_doy": 45,
-        "humidity_mean": 70.0, "humidity_amp": 10.0,
-        "temp_range": (24.0, 32.0),
-    },
-    "Nairobi": {
-        "temp_mean": 24.5, "temp_amp": 3.0, "phase_doy": 55,
-        "humidity_mean": 57.0, "humidity_amp": 12.0,
-        "temp_range": (20.0, 28.0),
-    },
-    "Kigali": {
-        "temp_mean": 25.5, "temp_amp": 2.5, "phase_doy": 50,
-        "humidity_mean": 65.0, "humidity_amp": 8.0,
-        "temp_range": (22.0, 29.0),
-    },
-}
-FEATURE_NAMES = [
-    "temp_max_c", "humidity_pct", "wind_speed_ms",
-    "wbgt_c", "heat_index_c", "temp_anomaly",
-]
-NUM_FEATURES = len(FEATURE_NAMES)
-def _resolve_path(rel: str) -> Path:
-    """Resolve a path relative to the project root."""
-    return Path(__file__).resolve().parents[2] / rel
-# ======================================================================
-# Model
-# ======================================================================
-if TORCH_AVAILABLE:
-    class HeatLSTM(nn.Module):
-        """2-layer LSTM for 7-day heat wave trigger prediction."""
-        def __init__(
-            self,
-            input_size: int = NUM_FEATURES,
-            hidden_size: int = 64,
-            num_layers: int = 2,
-            dropout: float = 0.2,
-        ):
-            super().__init__()
-            self.lstm = nn.LSTM(
-                input_size, hidden_size, num_layers,
-                batch_first=True, dropout=dropout,
-            )
-            self.fc = nn.Linear(hidden_size, 1)
-        def forward(self, x):
-            # x: (batch, seq_len, input_size)
-            out, _ = self.lstm(x)
-            out = self.fc(out[:, -1, :])  # last timestep
-            return torch.sigmoid(out).squeeze(-1)
-# ======================================================================
-# Derived feature computation
-# ======================================================================
-def _compute_temp_anomaly(temps: list[float], index: int) -> float:
-    """Compute temp minus 7-day rolling mean at the given index."""
-    start = max(0, index - 6)
-    window = temps[start:index + 1]
-    if not window:
-        return 0.0
-    return temps[index] - float(np.mean(window))
-# ======================================================================
-# Synthetic data generation
-# ======================================================================
-def _generate_temp_series(climate: dict, n_days: int, rng) -> list[float]:
-    """Daily max temperatures with AR(1) autocorrelation."""
-    mean, amp, phase = climate["temp_mean"], climate["temp_amp"], climate["phase_doy"]
-    lo, hi = climate["temp_range"]
-    temps, noise = [], 0.0
-    for d in range(n_days):
-        seasonal = mean + amp * np.cos(2 * np.pi * (d - phase) / 365.0)
-        noise = 0.7 * noise + rng.normal(0, 1.2)
-        temps.append(float(np.clip(seasonal + noise, lo - 1.0, hi + 2.0)))
-    return temps
-def _generate_humidity_series(climate: dict, n_days: int, rng) -> list[float]:
-    """Daily humidity with seasonal cycle and noise."""
-    mean, amp = climate["humidity_mean"], climate["humidity_amp"]
-    phase = climate.get("phase_doy", 45)
-    humidity, noise = [], 0.0
-    for d in range(n_days):
-        seasonal = mean - amp * np.cos(2 * np.pi * (d - phase) / 365.0)
-        noise = 0.5 * noise + rng.normal(0, 4.0)
-        humidity.append(float(np.clip(seasonal + noise, 30.0, 98.0)))
-    return humidity
-def _generate_wind_series(n_days: int, rng) -> list[float]:
-    """Synthetic wind speed series (m/s)."""
-    winds, noise = [], 0.0
-    for _ in range(n_days):
-        noise = 0.4 * noise + rng.normal(0, 0.8)
-        winds.append(float(np.clip(3.5 + noise, 0.5, 12.0)))
-    return winds
-def _label_triggers(temps: list[float], threshold: float, n_days: int) -> list[int]:
-    """Label each day: 1 if 2+ consecutive days above threshold in next 7 days."""
-    labels = [0] * n_days
-    for day in range(n_days - 7):
-        window = temps[day + 1: day + 8]
-        consec = 0
-        triggered = False
-        for t in window:
-            if t > threshold:
-                consec += 1
-                if consec >= 2:
-                    triggered = True
-                    break
-            else:
-                consec = 0
-        labels[day] = 1 if triggered else 0
-    return labels
-def generate_synthetic_zone_data(
-    zones: list, n_days: int = 730, seed: int = 42,
-) -> dict[str, list[dict]]:
-    """Generate synthetic daily climate data for all zones.
-    Returns:
-        dict mapping zone_id -> list of daily dicts with keys:
-            temp_max_c, humidity_pct, wind_speed_ms, city
-    """
-    rng = np.random.default_rng(seed)
-    zone_data: dict[str, list[dict]] = {}
-    for zone in zones:
-        city = zone.city
-        climate = CITY_CLIMATE.get(city, CITY_CLIMATE["Nairobi"])
-        temps = _generate_temp_series(climate, n_days, rng)
-        humidity = _generate_humidity_series(climate, n_days, rng)
-        winds = _generate_wind_series(n_days, rng)
-        days = []
-        for i in range(n_days):
-            days.append({
-                "temp_max_c": temps[i],
-                "humidity_pct": humidity[i],
-                "wind_speed_ms": winds[i],
-                "city": city,
-            })
-        zone_data[zone.zone_id] = days
-    return zone_data
-# ======================================================================
-# Trainer
-# ======================================================================
-class LSTMTrainer:
-    """Train the HeatLSTM on historical or synthetic climate data."""
-    def __init__(
-        self,
-        model: object | None = None,
-        lr: float = 0.001,
-        epochs: int = 50,
-        patience: int = 5,
-        seq_len: int = 14,
-        forecast_horizon: int = 7,
-    ):
-        if not TORCH_AVAILABLE:
-            raise ImportError("torch is required. pip install torch")
-        self._custom_model = model
-        self.lr = lr
-        self.epochs = epochs
-        self.patience = patience
-        self.seq_len = seq_len
-        self.forecast_horizon = forecast_horizon
-        self.model_path = _resolve_path("models/heat_lstm.pt")
-        self.norm_path = _resolve_path("models/lstm_norm.json")
-    def prepare_data(
-        self, zone_readings: dict[str, list],
-    ) -> tuple:
-        """Convert zone readings to training tensors.
-        Args:
-            zone_readings: dict of zone_id -> list of daily readings.
-                Each reading needs: temp_max_c, humidity_pct, wind_speed_ms
-                Optional: date, city
-        Returns:
-            (X_train, y_train, X_val, y_val) as torch tensors
-        """
-        all_seqs: list[np.ndarray] = []
-        all_labels: list[int] = []
-        for zone_id, days in zone_readings.items():
-            n = len(days)
-            if n < self.seq_len + self.forecast_horizon + 1:
-                continue
-            city = days[0].get("city", "Nairobi")
-            threshold = CITY_THRESHOLDS.get(city, 33.0)
-            # Extract raw temps for labeling and anomaly computation
-            temps = [d["temp_max_c"] for d in days]
-            labels = _label_triggers(temps, threshold, n)
-            # Compute derived features for all days
-            derived = []
-            for i, d in enumerate(days):
-                t = d["temp_max_c"]
-                h = d["humidity_pct"]
-                w = d["wind_speed_ms"]
-                wbgt = calculate_wbgt(t, h)
-                hi = calculate_heat_index(t, h)
-                anomaly = _compute_temp_anomaly(temps, i)
-                derived.append([t, h, w, wbgt, hi, anomaly])
-            # Create sliding windows
-            for i in range(n - self.seq_len - self.forecast_horizon):
-                seq = np.array(
-                    derived[i: i + self.seq_len], dtype=np.float32,
-                )
-                all_seqs.append(seq)
-                all_labels.append(labels[i + self.seq_len - 1])
-        X = np.stack(all_seqs)  # (N, seq_len, 6)
-        y = np.array(all_labels, dtype=np.float32)
-        # Temporal split: first 75% train, last 25% validation
-        split = int(len(X) * 0.75)
-        X_train_np, X_val_np = X[:split], X[split:]
-        y_train_np, y_val_np = y[:split], y[split:]
-        # Compute normalization (z-score per feature) from training set
-        flat = X_train_np.reshape(-1, NUM_FEATURES)
-        feat_mean = flat.mean(axis=0).tolist()
-        feat_std = flat.std(axis=0).tolist()
-        feat_std = [max(s, 1e-6) for s in feat_std]
-        # Save normalization params
-        norm = {"mean": feat_mean, "std": feat_std}
-        self.norm_path.parent.mkdir(parents=True, exist_ok=True)
-        with open(self.norm_path, "w") as f:
-            json.dump(norm, f, indent=2)
-        # Normalize
-        mean_arr = np.array(feat_mean, dtype=np.float32)
-        std_arr = np.array(feat_std, dtype=np.float32)
-        X_train_np = (X_train_np - mean_arr) / std_arr
-        X_val_np = (X_val_np - mean_arr) / std_arr
-        X_train = torch.from_numpy(X_train_np)
-        y_train = torch.from_numpy(y_train_np)
-        X_val = torch.from_numpy(X_val_np)
-        y_val = torch.from_numpy(y_val_np)
-        return X_train, y_train, X_val, y_val
-    def train(self, zone_readings: dict[str, list]) -> dict:
-        """Train the LSTM and return metrics."""
-        torch.manual_seed(42)
-        np.random.seed(42)
-        X_train, y_train, X_val, y_val = self.prepare_data(zone_readings)
-        # DataLoaders
-        train_ds = TensorDataset(X_train, y_train)
-        val_ds = TensorDataset(X_val, y_val)
-        train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)
-        val_loader = DataLoader(val_ds, batch_size=256, shuffle=False)
-        # Model, loss, optimizer
-        model = self._custom_model if self._custom_model is not None else HeatLSTM()
-        criterion = nn.BCELoss()
-        optimizer = torch.optim.Adam(model.parameters(), lr=self.lr)
-        total_params = sum(p.numel() for p in model.parameters())
-        print(f"  Model params: {total_params:,}")
-        # Training loop with early stopping
-        best_val_loss = float("inf")
-        patience_counter = 0
-        best_state = None
-        best_metrics: dict = {}
-        for epoch in range(self.epochs):
-            # Train
-            model.train()
-            train_loss, train_total = 0.0, 0
-            for xb, yb in train_loader:
-                optimizer.zero_grad()
-                preds = model(xb)
-                loss = criterion(preds, yb)
-                loss.backward()
-                optimizer.step()
-                train_loss += loss.item() * len(xb)
-                train_total += len(xb)
-            # Validate
-            model.eval()
-            val_loss, val_total = 0.0, 0
-            all_val_preds, all_val_labels = [], []
-            with torch.no_grad():
-                for xb, yb in val_loader:
-                    preds = model(xb)
-                    loss = criterion(preds, yb)
-                    val_loss += loss.item() * len(xb)
-                    val_total += len(xb)
-                    all_val_preds.extend(preds.numpy().tolist())
-                    all_val_labels.extend(yb.numpy().tolist())
-            avg_train_loss = train_loss / max(train_total, 1)
-            avg_val_loss = val_loss / max(val_total, 1)
-            val_auroc = _compute_auroc(all_val_labels, all_val_preds)
-            if (epoch + 1) % 5 == 0 or epoch == 0:
-                print(
-                    f"  Epoch {epoch + 1:>2}: "
-                    f"train_loss={avg_train_loss:.4f} | "
-                    f"val_loss={avg_val_loss:.4f} val_auroc={val_auroc:.3f}"
-                )
-            # Early stopping
-            if avg_val_loss < best_val_loss:
-                best_val_loss = avg_val_loss
-                patience_counter = 0
-                best_state = {k: v.clone() for k, v in model.state_dict().items()}
-                best_metrics = {
-                    "train_loss": round(avg_train_loss, 4),
-                    "val_loss": round(avg_val_loss, 4),
-                    "val_auroc": round(val_auroc, 4),
-                    "epochs_trained": epoch + 1,
-                    "samples": {
-                        "train": len(X_train),
-                        "val": len(X_val),
-                    },
-                }
-            else:
-                patience_counter += 1
-                if patience_counter >= self.patience:
-                    print(f"  Early stopping at epoch {epoch + 1}")
-                    break
-        # Save best model
-        if best_state is not None:
-            model.load_state_dict(best_state)
-        self.model_path.parent.mkdir(parents=True, exist_ok=True)
-        torch.save(model.state_dict(), self.model_path)
-        file_size = self.model_path.stat().st_size
-        print(f"  Saved model to {self.model_path} ({file_size / 1024:.1f} KB)")
-        return best_metrics
-# ======================================================================
-# Predictor (inference)
-# ======================================================================
-class LSTMPredictor:
-    """Load trained HeatLSTM and predict trigger probability."""
-    def __init__(
-        self,
-        model_path: str = "models/heat_lstm.pt",
-        norm_path: str = "models/lstm_norm.json",
-    ):
-        if not TORCH_AVAILABLE:
-            raise ImportError("torch is required. pip install torch")
-        self.model_path = _resolve_path(model_path)
-        self.norm_path = _resolve_path(norm_path)
-        self.model: HeatLSTM | None = None
-        self._norm: dict | None = None
-        self._load_model()
-    def _load_model(self) -> None:
-        """Load model weights and normalization params from disk."""
-        mp = self.model_path
-        np_ = self.norm_path
-        if not mp.exists():
-            raise FileNotFoundError(
-                f"LSTM model not found at {mp}. "
-                "Run scripts/train_lstm.py first."
-            )
-        if not np_.exists():
-            raise FileNotFoundError(
-                f"LSTM normalization params not found at {np_}. "
-                "Run scripts/train_lstm.py first."
-            )
-        self.model = HeatLSTM()
-        self.model.load_state_dict(
-            torch.load(mp, map_location="cpu", weights_only=True)
-        )
-        self.model.eval()
-        with open(np_) as f:
-            self._norm = json.load(f)
-    def predict(self, recent_14_days: list[dict]) -> tuple[float, float]:
-        """Predict trigger probability from last 14 days of data.
-        Args:
-            recent_14_days: list of dicts with keys:
-                temp_max_c, humidity_pct, wind_speed_ms
-                (WBGT, heat index, and temp anomaly are computed internally)
-        Returns:
-            (probability, confidence) where:
-            - probability: 0-1 trigger probability
-            - confidence: 0-1 based on MC dropout (5 forward passes)
-        """
-        if len(recent_14_days) < 14:
-            pad = [recent_14_days[0]] * (14 - len(recent_14_days))
-            recent_14_days = pad + recent_14_days
-        days = recent_14_days[-14:]
-        # Extract temps for anomaly computation
-        temps = [d.get("temp_max_c", d.get("temp_c", 30.0)) for d in days]
-        # Build feature array: compute derived features
-        seq = []
-        for i, d in enumerate(days):
-            t = d.get("temp_max_c", d.get("temp_c", 30.0))
-            h = d.get("humidity_pct", 65.0)
-            w = d.get("wind_speed_ms", 3.0)
-            wbgt = d.get("wbgt_c", calculate_wbgt(t, h))
-            hi = d.get("heat_index_c", calculate_heat_index(t, h))
-            anomaly = _compute_temp_anomaly(temps, i)
-            seq.append([t, h, w, wbgt, hi, anomaly])
-        x = np.array(seq, dtype=np.float32)
-        # Normalize using saved params
-        mean = np.array(self._norm["mean"], dtype=np.float32)
-        std = np.array(self._norm["std"], dtype=np.float32)
-        x = (x - mean) / std
-        x_tensor = torch.from_numpy(x).unsqueeze(0)  # (1, 14, 6)
-        # MC Dropout: 5 forward passes batched with dropout enabled
-        self.model.train()
-        x_batch = x_tensor.expand(5, -1, -1)  # (5, 14, 6)
-        with torch.no_grad():
-            preds = self.model(x_batch).numpy()
-        self.model.eval()
-        probability = float(np.mean(preds))
-        std_val = float(np.std(preds))
-        confidence = max(0.3, min(0.95, 1.0 - std_val * 3))
-        probability = float(np.clip(probability, 0.0, 1.0))
-        return probability, confidence
-# ======================================================================
-# Utilities
-# ======================================================================
-def _compute_auroc(labels: list[float], preds: list[float]) -> float:
-    """Compute AUROC using sklearn if available, else trapezoidal fallback."""
-    if len(set(labels)) < 2:
-        return 0.5
-    try:
-        from sklearn.metrics import roc_auc_score
-        return float(roc_auc_score(labels, preds))
-    except ImportError:
-        pass
-    # Fallback: trapezoidal AUROC
-    pairs = sorted(zip(preds, labels), key=lambda x: -x[0])
-    tp, fp = 0, 0
-    tp_prev, fp_prev = 0, 0
-    auc = 0.0
-    n_pos = sum(labels)
-    n_neg = len(labels) - n_pos
-    if n_pos == 0 or n_neg == 0:
-        return 0.5
-    prev_score = None
-    for score, label in pairs:
-        if score != prev_score and prev_score is not None:
-            auc += (fp - fp_prev) * (tp + tp_prev) / 2.0
-            tp_prev, fp_prev = tp, fp
-        if label == 1.0:
-            tp += 1
-        else:
-            fp += 1
-        prev_score = score
-    auc += (fp - fp_prev) * (tp + tp_prev) / 2.0
-    return auc / (n_pos * n_neg)

src/pricing/neural_actuarial.py DELETED Viewed

@@ -1,1312 +0,0 @@
-"""
-Neural Actuarial Pricing Engine for Parametric Heat Insurance.
-Three-headed temporal neural model trained on real climate data:
-  1. HazardHead (Neural EVT): Learns extreme heat frequency + severity
-     distribution using Generalized Pareto Distribution parameters
-  2. VulnerabilityHead: Learns zone-specific worker impact calibrated
-     to WHO/ILO occupational heat stress guidelines (ISO 7243)
-  3. PricingHead (CANN): Combined Actuarial Neural Network — GLM baseline
-     with bounded neural correction. When δ_NN = 0, reproduces the
-     existing ActuarialPricer formula exactly.
-City-specific: trained per city (default: Dar es Salaam). Architecture
-is portable; weights are local. Retrain for any city with:
-    python3 scripts/train_neural_pricer.py --city "Kampala"
-References:
-  - Chen et al. (2024) "Managing Weather Risk with NN-Based Index Insurance"
-    Management Science 70(7), 4306-4327
-  - Pasche & Engelke (2024) "Neural Networks for Extreme Quantile Regression"
-    arXiv:2208.07590 (EQRN)
-  - Richman & Wuthrich (2023) "LocalGLMnet" Scandinavian Actuarial Journal
-  - ISO 7243 / NIOSH occupational heat stress thresholds
-"""
-from __future__ import annotations
-import json
-import logging
-from pathlib import Path
-from typing import Optional
-import numpy as np
-try:
-    import torch
-    import torch.nn as nn
-    import torch.nn.functional as F
-    TORCH_AVAILABLE = True
-except ImportError:
-    TORCH_AVAILABLE = False
-# Chronos foundation model (optional — used for Chronos encoder path)
-try:
-    from chronos import ChronosBoltPipeline
-    CHRONOS_AVAILABLE = True
-except ImportError:
-    CHRONOS_AVAILABLE = False
-from src.indexing.heat_index import calculate_wbgt
-from src.pricing.actuarial import ActuarialPricer, ActuarialResult
-log = logging.getLogger(__name__)
-PROJECT_ROOT = Path(__file__).resolve().parents[2]
-# ── City-specific trigger thresholds (WBGT, °C) ──────────────────────────
-# Based on ILO occupational heat stress measurements
-# Actuarial trigger thresholds: set at ~P85-P90 of each city's WBGT distribution
-# so that only genuinely extreme heat events are trigger-worthy.
-# These are HIGHER than occupational safety thresholds (which define "uncomfortable")
-# because insurance triggers need to define "exceptional" events.
-CITY_WBGT_THRESHOLDS = {
-    "Dar es Salaam": 35.1,  # Calibrated P97 threshold (matches insurance benchmark)
-    "Kampala": 30.0,
-    "Nairobi": 28.0,
-    "Kigali": 29.0,
-}
-# Settlement-type-specific WBGT thresholds for compound triggers.
-# UHI correction already differentiates zones (informal +2.5°C, formal +0.5°C),
-# so all types use the city threshold. Zone frequency differences come from UHI.
-SETTLEMENT_THRESHOLDS = {
-    "informal": 35.1,
-    "mixed": 35.1,
-    "formal": 35.1,
-    "commercial": 35.1,
-}
-# Minimum consecutive days above threshold to count as a heat EVENT.
-# Alert tier at 2 days, payout tier at 5 days (matches insurance benchmark).
-MIN_CONSECUTIVE_DAYS = 2
-# ── WHO/ILO dose-response: WBGT → productivity loss ──────────────────────
-# ISO 7243 thresholds for moderate-to-heavy outdoor work
-def who_productivity_loss(wbgt: float) -> float:
-    """Fractional productivity loss given WBGT (°C). ISO 7243 calibrated."""
-    if wbgt < 26.0:
-        return 0.0
-    elif wbgt < 28.0:
-        return 0.10
-    elif wbgt < 30.0:
-        return 0.25
-    elif wbgt < 32.0:
-        return 0.50
-    elif wbgt < 35.0:
-        return 0.75
-    else:
-        return 1.0  # work must stop
-# ── Settlement-type UHI ranges (from literature) ─────────────────────────
-from src.downscaling.uhi_model import UHI_RANGES
-# ══════════════════════════════════════════════════════════════════════════
-# PyTorch Model Components
-# ══════════════════════════════════════════════════════════════════════════
-if TORCH_AVAILABLE:
-    class TemporalEncoder(nn.Module):
-        """
-        LSTM encoder for 90-day climate sequences.
-        Input:  (batch, 90, 11) — 7 climate + 4 zone-static features
-        Output: (batch, 128) — last hidden state
-        """
-        def __init__(self, input_size: int = 11, hidden_size: int = 128,
-                     num_layers: int = 2, dropout: float = 0.3):
-            super().__init__()
-            self.lstm = nn.LSTM(
-                input_size, hidden_size, num_layers,
-                batch_first=True, dropout=dropout,
-            )
-            self.norm = nn.LayerNorm(hidden_size)
-        def forward(self, x):
-            out, (h_n, _) = self.lstm(x)
-            # Use last layer's hidden state
-            latent = h_n[-1]  # (batch, hidden_size)
-            return self.norm(latent)
-    class ChronosEncoder(nn.Module):
-        """
-        Chronos-Bolt foundation model encoder for 90-day climate sequences.
-        Embeds the primary heat stress signal (WBGT, channel 6) via frozen
-        Chronos-Bolt-Tiny (9M params, pre-trained on 100B+ observations),
-        then concatenates climate summary stats and zone-static features
-        before projecting to the same 128-dim latent as TemporalEncoder.
-        Input:  (batch, 90, 11) — same contract as TemporalEncoder
-        Output: (batch, 128)    — same contract as TemporalEncoder
-        """
-        WBGT_IDX = 6   # index of WBGT in 11-feature vector
-        N_CLIMATE = 7   # features 0-6 are climate
-        N_STATIC = 4    # features 7-10 are zone-static
-        def __init__(self, hidden_size: int = 128, chronos_d_model: int = 256):
-            super().__init__()
-            proj_input = chronos_d_model + self.N_CLIMATE + self.N_STATIC
-            self.proj = nn.Linear(proj_input, hidden_size)
-            self.act = nn.GELU()
-            self.norm = nn.LayerNorm(hidden_size)
-            self._chronos_d_model = chronos_d_model
-            # Set externally after construction — NOT part of state_dict
-            self._pipeline = None
-            self._feat_mean = None   # numpy (11,) for un-normalizing WBGT
-            self._feat_std = None
-        def set_pipeline(self, pipeline, feat_mean=None, feat_std=None):
-            """Attach the frozen Chronos pipeline (not a nn.Module)."""
-            self._pipeline = pipeline
-            self._feat_mean = feat_mean
-            self._feat_std = feat_std
-        def _unnorm_wbgt(self, x_norm):
-            """Recover raw WBGT values from z-scored tensor for Chronos input."""
-            wbgt_norm = x_norm[:, :, self.WBGT_IDX]  # (batch, 90)
-            if self._feat_mean is not None and self._feat_std is not None:
-                mu = self._feat_mean[self.WBGT_IDX]
-                sd = self._feat_std[self.WBGT_IDX]
-                return wbgt_norm * sd + mu  # back to pre-norm scale (wbgt/40)
-            return wbgt_norm
-        def _embed_wbgt(self, x_norm):
-            """Run Chronos .embed() on un-normalized WBGT and mean-pool."""
-            wbgt_scaled = self._unnorm_wbgt(x_norm)  # (batch, 90), scale=wbgt/40
-            wbgt_raw = wbgt_scaled * 40.0              # raw °C for Chronos
-            with torch.no_grad():
-                emb, _ = self._pipeline.embed(wbgt_raw)  # (batch, patches+1, 256)
-            return emb.mean(dim=1)  # (batch, 256)
-        def forward(self, x, chronos_embeddings=None):
-            """
-            Args:
-                x: (batch, 90, 11) normalized climate sequence
-                chronos_embeddings: optional (batch, d_model) pre-computed.
-                    If None, computes from x via the attached pipeline.
-            """
-            climate_means = x[:, :, :self.N_CLIMATE].mean(dim=1)  # (batch, 7)
-            zone_static = x[:, 0, self.N_CLIMATE:]                # (batch, 4)
-            if chronos_embeddings is None:
-                chronos_embeddings = self._embed_wbgt(x)
-            combined = torch.cat([chronos_embeddings, climate_means, zone_static], dim=-1)
-            return self.norm(self.act(self.proj(combined)))  # (batch, 128)
-    class HazardHead(nn.Module):
-        """
-        Neural Extreme Value Theory head + two-tier trigger decision.
-        Two tiers matching SEWA/Arsht-Rockefeller pilot design:
-          Alert tier:  moderate heat event → cash transfer (philanthropy-funded)
-          Payout tier: severe sustained event → insurance payout (underwritten)
-        Outputs:
-          λ (events/year):   softplus, range ~0.5-50
-          σ (GPD scale):     softplus, range ~0.1-10
-          ξ (GPD shape):     tanh × 0.4, range [-0.4, 0.4]
-          alert_prob:        sigmoid → [0, 1], moderate event (cash tier)
-          payout_prob:       sigmoid → [0, 1], severe event (insurance tier)
-          alert_severity:    sigmoid → [0, 1], cash amount scaling
-          payout_severity:   sigmoid → [0, 1], insurance payout scaling
-        """
-        def __init__(self, input_size: int = 128):
-            super().__init__()
-            self.net = nn.Sequential(
-                nn.Linear(input_size, 64),
-                nn.GELU(),
-                nn.Linear(64, 32),
-                nn.GELU(),
-            )
-            self.lambda_head = nn.Linear(32, 1)
-            self.sigma_head = nn.Linear(32, 1)
-            self.xi_head = nn.Linear(32, 1)
-            self.alert_prob_head = nn.Linear(32, 1)
-            self.payout_prob_head = nn.Linear(32, 1)
-            self.alert_severity_head = nn.Linear(32, 1)
-            self.payout_severity_head = nn.Linear(32, 1)
-        def forward(self, h):
-            z = self.net(h)
-            lambda_ = F.softplus(self.lambda_head(z)) + 0.5
-            sigma = F.softplus(self.sigma_head(z)) + 0.1
-            xi = torch.tanh(self.xi_head(z)) * 0.4
-            alert_prob = torch.sigmoid(self.alert_prob_head(z))
-            payout_prob = torch.sigmoid(self.payout_prob_head(z))
-            alert_severity = torch.sigmoid(self.alert_severity_head(z))
-            payout_severity = torch.sigmoid(self.payout_severity_head(z))
-            # Pack into trigger_prob and payout_factor for backward compat
-            # trigger_prob = alert_prob (most frequent), payout_factor = payout_severity
-            return lambda_, sigma, xi, alert_prob, payout_prob, alert_severity, payout_severity
-    class VulnerabilityHead(nn.Module):
-        """
-        Neural loss distribution head.
-        Learns zone-specific worker impact from hazard characteristics.
-        Calibrated to WHO/ILO ISO 7243 dose-response.
-        Outputs:
-          productivity_loss:    sigmoid → [0, 1]
-          basis_risk:           sigmoid → [0, 1]
-          severity_multiplier:  softplus + 0.5 → [0.5, ∞)
-        """
-        def __init__(self, input_size: int = 135):  # 128 + 7 hazard outputs
-            super().__init__()
-            self.net = nn.Sequential(
-                nn.Linear(input_size, 64),
-                nn.GELU(),
-                nn.Linear(64, 32),
-                nn.GELU(),
-            )
-            self.prod_loss_head = nn.Linear(32, 1)
-            self.basis_risk_head = nn.Linear(32, 1)
-            self.severity_head = nn.Linear(32, 1)
-        def forward(self, h, lambda_, sigma, xi, alert_prob, payout_prob, alert_sev, payout_sev):
-            combined = torch.cat([h, lambda_, sigma, xi, alert_prob, payout_prob, alert_sev, payout_sev], dim=-1)
-            z = self.net(combined)
-            prod_loss = torch.sigmoid(self.prod_loss_head(z))
-            basis_risk = torch.sigmoid(self.basis_risk_head(z))
-            severity = F.softplus(self.severity_head(z)) + 0.5
-            return prod_loss, basis_risk, severity
-    class PricingHead(nn.Module):
-        """
-        CANN (Combined Actuarial Neural Network) pricing head.
-        Skip connection: price = exp(η_GLM + δ_NN) × inflation_buffer
-        When δ_NN = 0, reproduces ActuarialPricer formula exactly.
-        δ_NN is clamped to [-0.5, 0.5] for safety (±50% max correction).
-        """
-        def __init__(self, input_size: int = 138, max_delta: float = 0.5):  # 128 + 7 hazard + 3 vuln
-            super().__init__()
-            self.max_delta = max_delta
-            self.net = nn.Sequential(
-                nn.Linear(input_size, 32),
-                nn.GELU(),
-                nn.Linear(32, 1),
-            )
-            # Initialize near zero so model starts at GLM baseline
-            nn.init.zeros_(self.net[-1].weight)
-            nn.init.zeros_(self.net[-1].bias)
-        def forward(self, h, lambda_, sigma, xi, alert_prob, payout_prob, alert_sev, payout_sev, prod_loss, basis_risk, severity):
-            combined = torch.cat([
-                h, lambda_, sigma, xi, alert_prob, payout_prob, alert_sev, payout_sev, prod_loss, basis_risk, severity
-            ], dim=-1)
-            delta_nn = self.net(combined)
-            delta_nn = torch.clamp(delta_nn, -self.max_delta, self.max_delta)
-            return delta_nn
-    class HeatRiskNeuralPricer(nn.Module):
-        """
-        Full neural actuarial pricing model.
-        Composes TemporalEncoder + HazardHead + VulnerabilityHead + PricingHead.
-        ~70K parameters. Trains in ~15 min on CPU.
-        """
-        def __init__(self, input_size: int = 11, hidden_size: int = 128):
-            super().__init__()
-            self.encoder = TemporalEncoder(input_size, hidden_size)
-            self.hazard = HazardHead(hidden_size)
-            self.vulnerability = VulnerabilityHead(hidden_size + 3)
-            self.pricing = PricingHead(hidden_size + 6)
-        def forward(self, x, payout_per_event: float = 10.0,
-                    admin_rate: float = 0.15):
-            """
-            Args:
-                x: (batch, 90, 11) — daily climate sequence with zone features
-                payout_per_event: USD per event per worker
-                admin_rate: operational overhead rate
-            Returns:
-                dict with all intermediate and final outputs
-            """
-            # Encode temporal sequence
-            h = self.encoder(x)  # (batch, 128)
-            # Hazard: frequency + severity + two-tier trigger
-            lambda_, sigma, xi, alert_prob, payout_prob, alert_sev, payout_sev = self.hazard(h)
-            # Vulnerability: worker impact
-            prod_loss, basis_risk, severity = self.vulnerability(
-                h, lambda_, sigma, xi, alert_prob, payout_prob, alert_sev, payout_sev
-            )
-            # GPD expected severity
-            xi_safe = torch.clamp(xi, max=0.39)
-            expected_severity = sigma / (1.0 - xi_safe)
-            # GLM baseline
-            base_cost = lambda_ * payout_per_event
-            basis_loading = base_cost * (basis_risk * 0.5)
-            vuln_loading = base_cost * (prod_loss * 0.2)
-            subtotal = base_cost + basis_loading + vuln_loading
-            admin_loading = subtotal * admin_rate
-            glm_total = subtotal + admin_loading
-            eta_glm = torch.log(glm_total + 1e-8)
-            # CANN: neural correction
-            delta_nn = self.pricing(
-                h, lambda_, sigma, xi, alert_prob, payout_prob, alert_sev, payout_sev,
-                prod_loss, basis_risk, severity
-            )
-            total_per_worker = torch.exp(eta_glm + delta_nn) * 1.05
-            return {
-                "lambda_": lambda_.squeeze(-1),
-                "sigma": sigma.squeeze(-1),
-                "xi": xi.squeeze(-1),
-                "alert_prob": alert_prob.squeeze(-1),
-                "payout_prob": payout_prob.squeeze(-1),
-                "alert_severity": alert_sev.squeeze(-1),
-                "payout_severity": payout_sev.squeeze(-1),
-                "productivity_loss": prod_loss.squeeze(-1),
-                "basis_risk": basis_risk.squeeze(-1),
-                "severity_multiplier": severity.squeeze(-1),
-                "expected_severity": expected_severity.squeeze(-1),
-                "delta_nn": delta_nn.squeeze(-1),
-                "glm_price": (glm_total * 1.05).squeeze(-1),
-                "total_per_worker": total_per_worker.squeeze(-1),
-                "base_cost": base_cost.squeeze(-1),
-                "basis_loading": basis_loading.squeeze(-1),
-                "vuln_loading": vuln_loading.squeeze(-1),
-                "admin_loading": admin_loading.squeeze(-1),
-            }
-    class HeatRiskNeuralPricerChronos(nn.Module):
-        """
-        Chronos-enhanced neural actuarial pricing model.
-        Same 3-head architecture as HeatRiskNeuralPricer but replaces the
-        LSTM TemporalEncoder with frozen Chronos-Bolt-Tiny embeddings.
-        Only the projection layer + heads are trainable (~50K params).
-        The Chronos foundation model (9M params, pre-trained on 100B+
-        time-series observations) captures deep temporal patterns in the
-        WBGT heat stress signal, while climate summary stats and zone
-        features provide domain context.
-        """
-        def __init__(self, hidden_size: int = 128, chronos_d_model: int = 256):
-            super().__init__()
-            self.encoder = ChronosEncoder(hidden_size, chronos_d_model)
-            self.hazard = HazardHead(hidden_size)
-            self.vulnerability = VulnerabilityHead(hidden_size + 7)  # + 7 hazard outputs
-            self.pricing = PricingHead(hidden_size + 10, max_delta=1.5)  # + 7 hazard + 3 vuln
-        def forward(self, x, payout_per_event: float = 10.0,
-                    admin_rate: float = 0.15, chronos_embeddings=None):
-            h = self.encoder(x, chronos_embeddings=chronos_embeddings)
-            lambda_, sigma, xi, alert_prob, payout_prob, alert_sev, payout_sev = self.hazard(h)
-            prod_loss, basis_risk, severity = self.vulnerability(
-                h, lambda_, sigma, xi, alert_prob, payout_prob, alert_sev, payout_sev
-            )
-            xi_safe = torch.clamp(xi, max=0.39)
-            expected_severity = sigma / (1.0 - xi_safe)
-            base_cost = lambda_ * payout_per_event
-            basis_loading = base_cost * (basis_risk * 0.5)
-            vuln_loading = base_cost * (prod_loss * 0.2)
-            subtotal = base_cost + basis_loading + vuln_loading
-            admin_loading = subtotal * admin_rate
-            glm_total = subtotal + admin_loading
-            eta_glm = torch.log(glm_total + 1e-8)
-            delta_nn = self.pricing(
-                h, lambda_, sigma, xi, alert_prob, payout_prob, alert_sev, payout_sev,
-                prod_loss, basis_risk, severity
-            )
-            total_per_worker = torch.exp(eta_glm + delta_nn) * 1.05
-            return {
-                "lambda_": lambda_.squeeze(-1),
-                "sigma": sigma.squeeze(-1),
-                "xi": xi.squeeze(-1),
-                "alert_prob": alert_prob.squeeze(-1),
-                "payout_prob": payout_prob.squeeze(-1),
-                "alert_severity": alert_sev.squeeze(-1),
-                "payout_severity": payout_sev.squeeze(-1),
-                "productivity_loss": prod_loss.squeeze(-1),
-                "basis_risk": basis_risk.squeeze(-1),
-                "severity_multiplier": severity.squeeze(-1),
-                "expected_severity": expected_severity.squeeze(-1),
-                "delta_nn": delta_nn.squeeze(-1),
-                "glm_price": (glm_total * 1.05).squeeze(-1),
-                "total_per_worker": total_per_worker.squeeze(-1),
-                "base_cost": base_cost.squeeze(-1),
-                "basis_loading": basis_loading.squeeze(-1),
-                "vuln_loading": vuln_loading.squeeze(-1),
-                "admin_loading": admin_loading.squeeze(-1),
-            }
-# ══════════════════════════════════════════════════════════════════════════
-# Training Data Generation
-# ══════════════════════════════════════════════════════════════════════════
-def load_climate_data(data_path: str | Path) -> dict[str, list[dict]]:
-    """Load NASA POWER or ERA5-Land daily data from JSON."""
-    with open(data_path) as f:
-        return json.load(f)
-def build_training_samples(
-    climate_data: dict[str, list[dict]],
-    zones: list,
-    window_size: int = 90,
-    stride: int = 7,
-    wbgt_threshold: float = 28.0,
-) -> tuple[np.ndarray, dict[str, np.ndarray]]:
-    """
-    Build training dataset from daily climate records.
-    Creates sliding windows of `window_size` days, with targets computed
-    from the window's heat characteristics.
-    Returns:
-        X: (N, window_size, 11) — feature sequences
-        targets: dict of target arrays, each shape (N,)
-    """
-    zone_map = {z.zone_id: z for z in zones}
-    all_X = []
-    all_targets = {
-        "frequency": [],
-        "gpd_sigma": [],
-        "gpd_xi": [],
-        "productivity_loss": [],
-        "basis_risk": [],
-        "severity_multiplier": [],
-        "price_target": [],
-        "alert_event": [],      # 1 if moderate heat event (cash tier)
-        "payout_event": [],     # 1 if severe sustained event (insurance tier)
-        "alert_severity": [],   # 0-1 cash amount scaling
-        "payout_severity": [],  # 0-1 insurance payout scaling
-    }
-    for zone_id, records in climate_data.items():
-        zone = zone_map.get(zone_id)
-        if zone is None:
-            continue
-        # Encode zone static features
-        # Settlement type is NOT included — the model should learn zone
-        # differences from UHI-corrected temperatures in the sequence,
-        # not from a categorical label that creates spurious correlations.
-        vuln_enc = {"high": 1.0, "moderate": 0.5, "low": 0.0}
-        zone_static = [
-            vuln_enc.get(zone.heat_vulnerability, 0.5),
-            zone.outdoor_exposure_pct,
-            zone.elevation_m / 2000.0,
-            0.0,  # padding to keep feature dim at 11
-        ]
-        # UHI parameters for this zone
-        uhi_lo, uhi_hi = UHI_RANGES.get(zone.settlement_type, (1.0, 2.0))
-        mean_uhi = (uhi_lo + uhi_hi) / 2.0
-        n = len(records)
-        if n < window_size + 30:
-            continue
-        for start in range(0, n - window_size - 7, stride):
-            window = records[start:start + window_size]
-            # Apply UHI correction so each zone sees different temps
-            # even when the underlying grid data is the same
-            rng_uhi = np.random.RandomState(hash(zone_id) & 0x7FFFFFFF)
-            uhi_noise_std = (uhi_hi - uhi_lo) / 4.0  # small daily variation
-            seq = []
-            wbgts = []          # UHI-corrected (what workers feel)
-            grid_wbgts = []     # raw grid (what satellite measures)
-            for i, day in enumerate(window):
-                t_max_grid = day.get("temp_max_c") or 30.0
-                t_min = day.get("temp_min_c") or 24.0
-                hum = day.get("humidity_pct") or 75.0
-                wind = day.get("wind_speed_ms") or 3.0
-                solar = day.get("solar_rad_wm2") or 200.0
-                precip = day.get("precip_mm") or 0.0
-                # Zone-specific UHI correction on temperature
-                uhi_delta = mean_uhi + rng_uhi.normal(0, uhi_noise_std)
-                t_max = t_max_grid + uhi_delta
-                wbgt = calculate_wbgt(t_max, hum)
-                wbgts.append(wbgt)
-                grid_wbgts.append(calculate_wbgt(t_max_grid, hum))
-                features = [
-                    t_max / 40.0,       # UHI-corrected temp (zone-specific!)
-                    t_min / 30.0,
-                    hum / 100.0,
-                    wind / 10.0,
-                    solar / 400.0,
-                    precip / 50.0,
-                    wbgt / 40.0,         # UHI-corrected WBGT
-                ] + zone_static
-                seq.append(features)
-            all_X.append(seq)
-            # ── Compute targets ──
-            # 1. Hazard: frequency from compound triggers (consecutive days above threshold)
-            zone_thresh = SETTLEMENT_THRESHOLDS.get(zone.settlement_type, wbgt_threshold)
-            run_length = 0
-            run_peak = 0.0
-            events = []
-            for w in wbgts:
-                if w > zone_thresh:
-                    run_length += 1
-                    run_peak = max(run_peak, w)
-                else:
-                    if run_length >= MIN_CONSECUTIVE_DAYS:
-                        events.append(run_peak - zone_thresh)  # severity = peak excess
-                    run_length = 0
-                    run_peak = 0.0
-            if run_length >= MIN_CONSECUTIVE_DAYS:
-                events.append(run_peak - zone_thresh)
-            exceedances = events  # for GPD fitting
-            frequency = len(events) * (365.0 / window_size)  # annualize
-            if len(exceedances) >= 3:
-                try:
-                    from scipy.stats import genpareto
-                    xi_fit, _, sigma_fit = genpareto.fit(exceedances, floc=0)
-                    xi_fit = max(-0.4, min(0.4, xi_fit))
-                    sigma_fit = max(0.1, min(10.0, sigma_fit))
-                except Exception:
-                    sigma_fit = float(np.std(exceedances)) + 0.5
-                    xi_fit = 0.1
-            else:
-                sigma_fit = 1.0
-                xi_fit = 0.05
-            # 2. Vulnerability: WHO/ILO dose-response on UHI-corrected WBGT
-            prod_losses = [who_productivity_loss(w) for w in wbgts]
-            mean_prod_loss = float(np.mean(prod_losses)) * zone.outdoor_exposure_pct
-            # 3. Basis risk: gap between grid trigger and UHI-corrected trigger
-            grid_triggers = sum(1 for w in grid_wbgts if w > wbgt_threshold)
-            corrected_triggers = sum(1 for w in wbgts if w > wbgt_threshold)
-            if corrected_triggers > 0:
-                false_negative_rate = max(0, corrected_triggers - grid_triggers) / corrected_triggers
-            else:
-                false_negative_rate = 0.0
-            basis_risk_score = 0.3 * false_negative_rate + 0.2 * (mean_uhi / 6.0)
-            basis_risk_score = min(1.0, basis_risk_score + 0.05)  # floor
-            # 4. Severity multiplier: ratio of corrected to grid impact
-            grid_impact = sum(who_productivity_loss(w) for w in wbgts)
-            corrected_impact = sum(who_productivity_loss(w) for w in wbgts)
-            severity_mult = (corrected_impact / max(grid_impact, 0.01))
-            severity_mult = max(0.5, min(3.0, severity_mult))
-            # 5. Price target from the real ActuarialPricer
-            from src.pricing.actuarial import ActuarialPricer
-            _glm = ActuarialPricer()
-            payout = 10.0
-            glm_result = _glm.price_zone(
-                zone=zone,
-                predicted_frequency=frequency,
-                basis_risk_score=basis_risk_score,
-                payout_per_event=payout,
-                enrolled=zone.worker_population_est,
-            )
-            price_target = glm_result.cost_per_worker_year
-            # 6. Two-tier event detection (matching insurance benchmark)
-            # Uses the same threshold as the parametric trigger — duration
-            # is the discriminator, not a separate severity gate.
-            TRIGGER_WBGT = wbgt_threshold  # 35.1°C for Dar es Salaam
-            last_7 = wbgts[-7:]
-            vuln_mult = {"high": 1.5, "moderate": 1.0, "low": 0.7}
-            v_mult = vuln_mult.get(zone.heat_vulnerability, 1.0)
-            # Count consecutive days above trigger threshold at end of window
-            consec_at_end = 0
-            for w in reversed(last_7):
-                if w > TRIGGER_WBGT:
-                    consec_at_end += 1
-                else:
-                    break
-            peak_wbgt = max(last_7) if last_7 else 0
-            # Alert tier: 2+ consecutive days above threshold
-            # Workers get cash transfer + safety SMS
-            alert_event = 1.0 if consec_at_end >= 2 else 0.0
-            if alert_event > 0:
-                peak_excess = max(0, peak_wbgt - TRIGGER_WBGT)
-                alert_sev = min(1.0, (consec_at_end / 5.0) * (peak_excess / 4.0) * v_mult)
-            else:
-                alert_sev = 0.0
-            # Payout tier: 5+ consecutive days above threshold
-            # Workers get full insurance payout
-            payout_event = 1.0 if consec_at_end >= 5 else 0.0
-            if payout_event > 0:
-                peak_excess = max(0, peak_wbgt - TRIGGER_WBGT)
-                payout_sev = min(1.0, (consec_at_end / 7.0) * (peak_excess / 3.0) * v_mult)
-            else:
-                payout_sev = 0.0
-            all_targets["frequency"].append(frequency)
-            all_targets["gpd_sigma"].append(sigma_fit)
-            all_targets["gpd_xi"].append(xi_fit)
-            all_targets["productivity_loss"].append(mean_prod_loss)
-            all_targets["basis_risk"].append(basis_risk_score)
-            all_targets["severity_multiplier"].append(severity_mult)
-            all_targets["price_target"].append(price_target)
-            all_targets["alert_event"].append(alert_event)
-            all_targets["payout_event"].append(payout_event)
-            all_targets["alert_severity"].append(alert_sev)
-            all_targets["payout_severity"].append(payout_sev)
-    X = np.array(all_X, dtype=np.float32)
-    targets = {k: np.array(v, dtype=np.float32) for k, v in all_targets.items()}
-    return X, targets
-# ══════════════════════════════════════════════════════════════════════════
-# Trainer
-# ══════════════════════════════════════════════════════════════════════════
-class NeuralPricerTrainer:
-    """Train the HeatRiskNeuralPricer (LSTM or Chronos encoder) on climate data."""
-    def __init__(self, lr: float = 1e-3, epochs: int = 80,
-                 patience: int = 10, weight_decay: float = 1e-4,
-                 encoder: str = "lstm"):
-        if not TORCH_AVAILABLE:
-            raise ImportError("torch is required")
-        self.lr = lr
-        self.epochs = epochs
-        self.patience = patience
-        self.weight_decay = weight_decay
-        self.encoder = encoder
-        if encoder == "chronos":
-            self.model_path = PROJECT_ROOT / "models" / "chronos_pricer_dar.pt"
-            self.norm_path = PROJECT_ROOT / "models" / "chronos_pricer_dar_norm.json"
-        else:
-            self.model_path = PROJECT_ROOT / "models" / "neural_pricer_dar.pt"
-            self.norm_path = PROJECT_ROOT / "models" / "neural_pricer_dar_norm.json"
-    def train(self, X: np.ndarray, targets: dict[str, np.ndarray],
-              val_split: float = 0.2) -> dict:
-        """
-        Train the model and return metrics.
-        Args:
-            X: (N, 90, 11) feature sequences
-            targets: dict of target arrays
-            val_split: fraction for validation (temporal split)
-        """
-        torch.manual_seed(42)
-        np.random.seed(42)
-        N = len(X)
-        split = int(N * (1 - val_split))
-        # Temporal split (not random — avoids data leakage)
-        X_train, X_val = X[:split], X[split:]
-        t_train = {k: v[:split] for k, v in targets.items()}
-        t_val = {k: v[split:] for k, v in targets.items()}
-        # ── Pre-compute Chronos embeddings (before z-score norm) ──
-        chronos_train = chronos_val = None
-        chronos_d_model = 256  # default
-        if self.encoder == "chronos":
-            if not CHRONOS_AVAILABLE:
-                raise ImportError("chronos-forecasting required for --encoder chronos")
-            print("  Loading Chronos-Bolt-Tiny for embedding pre-computation...")
-            pipeline = ChronosBoltPipeline.from_pretrained(
-                "amazon/chronos-bolt-tiny", device_map="cpu",
-                dtype=torch.float32,
-            )
-            chronos_d_model = pipeline.model.config.d_model
-            print(f"  Chronos d_model: {chronos_d_model}")
-            # Extract raw WBGT (before normalization): column 6, scaled as wbgt/40
-            wbgt_raw_all = X[:, :, 6] * 40.0  # (N, 90) in °C
-            print(f"  Pre-computing Chronos embeddings for {N} samples...")
-            all_embs = []
-            chunk = 256
-            for i in range(0, N, chunk):
-                batch = torch.from_numpy(wbgt_raw_all[i:i + chunk].astype(np.float32))
-                with torch.no_grad():
-                    emb, _ = pipeline.embed(batch)
-                all_embs.append(emb.mean(dim=1))  # (chunk, d_model)
-            chronos_all = torch.cat(all_embs, dim=0)  # (N, d_model)
-            chronos_train = chronos_all[:split]
-            chronos_val = chronos_all[split:]
-            print(f"  Chronos embeddings: {chronos_all.shape}")
-            del pipeline  # free memory
-        # Normalize features (z-score from training set)
-        flat = X_train.reshape(-1, X_train.shape[-1])
-        feat_mean = flat.mean(axis=0)
-        feat_std = np.maximum(flat.std(axis=0), 1e-6)
-        self.norm_path.parent.mkdir(parents=True, exist_ok=True)
-        norm_data = {"mean": feat_mean.tolist(), "std": feat_std.tolist()}
-        if self.encoder == "chronos":
-            norm_data["chronos_d_model"] = chronos_d_model
-        with open(self.norm_path, "w") as f:
-            json.dump(norm_data, f)
-        X_train = (X_train - feat_mean) / feat_std
-        X_val = (X_val - feat_mean) / feat_std
-        # Convert to tensors
-        X_train_t = torch.from_numpy(X_train)
-        X_val_t = torch.from_numpy(X_val)
-        targets_train = {k: torch.from_numpy(v) for k, v in t_train.items()}
-        targets_val = {k: torch.from_numpy(v) for k, v in t_val.items()}
-        if self.encoder == "chronos":
-            model = HeatRiskNeuralPricerChronos(chronos_d_model=chronos_d_model)
-        else:
-            model = HeatRiskNeuralPricer()
-        optimizer = torch.optim.AdamW(
-            model.parameters(), lr=self.lr, weight_decay=self.weight_decay
-        )
-        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
-            optimizer, patience=5, factor=0.5
-        )
-        total_params = sum(p.numel() for p in model.parameters())
-        print(f"  Encoder: {self.encoder}")
-        print(f"  Model parameters: {total_params:,}")
-        print(f"  Training samples: {len(X_train)}, Validation: {len(X_val)}")
-        best_val_loss = float("inf")
-        patience_counter = 0
-        best_state = None
-        best_metrics = {}
-        batch_size = 256
-        n_batches = max(1, len(X_train) // batch_size)
-        for epoch in range(self.epochs):
-            # ── Train ──
-            model.train()
-            perm = torch.randperm(len(X_train_t))
-            epoch_loss = 0.0
-            for b in range(n_batches):
-                idx = perm[b * batch_size:(b + 1) * batch_size]
-                xb = X_train_t[idx]
-                tb = {k: v[idx] for k, v in targets_train.items()}
-                fwd_kwargs = {}
-                if chronos_train is not None:
-                    fwd_kwargs["chronos_embeddings"] = chronos_train[idx]
-                outputs = model(xb, **fwd_kwargs)
-                loss = self._compute_loss(outputs, tb)
-                optimizer.zero_grad()
-                loss.backward()
-                torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
-                optimizer.step()
-                epoch_loss += loss.item()
-            avg_train_loss = epoch_loss / n_batches
-            # ── Validate ──
-            model.eval()
-            with torch.no_grad():
-                val_kwargs = {}
-                if chronos_val is not None:
-                    val_kwargs["chronos_embeddings"] = chronos_val
-                val_outputs = model(X_val_t, **val_kwargs)
-                val_loss = self._compute_loss(val_outputs, targets_val).item()
-            scheduler.step(val_loss)
-            if (epoch + 1) % 10 == 0 or epoch == 0:
-                delta_std = val_outputs["delta_nn"].std().item()
-                print(
-                    f"  Epoch {epoch + 1:>3}: "
-                    f"train={avg_train_loss:.4f} val={val_loss:.4f} "
-                    f"δ_NN_std={delta_std:.3f}"
-                )
-            if val_loss < best_val_loss:
-                best_val_loss = val_loss
-                patience_counter = 0
-                best_state = {k: v.clone() for k, v in model.state_dict().items()}
-                best_metrics = self._compute_metrics(val_outputs, targets_val, epoch + 1)
-            else:
-                patience_counter += 1
-                if patience_counter >= self.patience:
-                    print(f"  Early stopping at epoch {epoch + 1}")
-                    break
-        # Save best model
-        if best_state:
-            model.load_state_dict(best_state)
-        self.model_path.parent.mkdir(parents=True, exist_ok=True)
-        torch.save(model.state_dict(), self.model_path)
-        size_kb = self.model_path.stat().st_size / 1024
-        print(f"  Saved to {self.model_path} ({size_kb:.0f} KB)")
-        return best_metrics
-    def _compute_loss(self, outputs, targets):
-        """Multi-task loss combining hazard, vulnerability, and pricing."""
-        # Hazard: Poisson NLL for frequency
-        lambda_ = outputs["lambda_"]
-        freq_target = targets["frequency"]
-        poisson_nll = lambda_ - freq_target * torch.log(lambda_ + 1e-8)
-        # Hazard: GPD parameter MSE (as proxy for GPD deviance)
-        sigma_loss = F.mse_loss(outputs["sigma"], targets["gpd_sigma"])
-        xi_loss = F.mse_loss(outputs["xi"], targets["gpd_xi"])
-        # Two-tier triggers: BCE for event detection, MSE for severity
-        L_alert = F.binary_cross_entropy(
-            outputs["alert_prob"], targets["alert_event"]
-        )
-        L_payout = F.binary_cross_entropy(
-            outputs["payout_prob"], targets["payout_event"]
-        )
-        L_alert_sev = F.mse_loss(
-            outputs["alert_severity"], targets["alert_severity"]
-        )
-        L_payout_sev = F.mse_loss(
-            outputs["payout_severity"], targets["payout_severity"]
-        )
-        L_trigger = L_alert + L_payout
-        L_severity = L_alert_sev + L_payout_sev
-        L_hazard = poisson_nll.mean() + sigma_loss + xi_loss + L_trigger + L_severity
-        # Vulnerability: MSE vs WHO/ILO targets
-        L_vuln = (
-            F.mse_loss(outputs["productivity_loss"], targets["productivity_loss"])
-            + F.mse_loss(outputs["basis_risk"], targets["basis_risk"])
-            + F.mse_loss(outputs["severity_multiplier"], targets["severity_multiplier"])
-        )
-        # Pricing: log-MSE (only on samples with nonzero target)
-        price_pred = outputs["total_per_worker"]
-        price_target = targets["price_target"]
-        valid_mask = price_target > 1.0  # skip zero-frequency windows
-        if valid_mask.any():
-            L_pricing = F.mse_loss(
-                torch.log(price_pred[valid_mask] + 1.0),
-                torch.log(price_target[valid_mask] + 1.0),
-            )
-        else:
-            L_pricing = torch.tensor(0.0)
-        # Regularization: penalize large neural corrections
-        L_reg = 0.01 * (outputs["delta_nn"] ** 2).mean()
-        return 1.0 * L_hazard + 1.0 * L_vuln + 2.0 * L_pricing + 0.1 * L_reg
-    def _compute_metrics(self, outputs, targets, epoch):
-        """Compute evaluation metrics from validation outputs."""
-        price_pred = outputs["total_per_worker"].detach().numpy()
-        price_target = targets["price_target"].numpy()
-        # MAPE (only on nonzero targets)
-        valid = price_target > 1.0
-        if valid.any():
-            mape = float(np.mean(np.abs(price_pred[valid] - price_target[valid]) / price_target[valid]) * 100)
-        else:
-            mape = float("nan")
-        # Spearman rank correlation
-        from scipy.stats import spearmanr
-        rho, _ = spearmanr(price_pred, price_target)
-        # Delta NN statistics
-        delta = outputs["delta_nn"].detach().numpy()
-        return {
-            "epoch": epoch,
-            "val_loss": float(outputs["total_per_worker"].mean().item()),
-            "price_mape_pct": round(mape, 1),
-            "rank_correlation": round(float(rho), 3),
-            "delta_nn_mean": round(float(np.mean(delta)), 4),
-            "delta_nn_std": round(float(np.std(delta)), 4),
-            "mean_lambda": round(float(outputs["lambda_"].mean().item()), 1),
-            "mean_basis_risk": round(float(outputs["basis_risk"].mean().item()), 3),
-            "mean_prod_loss": round(float(outputs["productivity_loss"].mean().item()), 3),
-        }
-# ══════════════════════════════════════════════════════════════════════════
-# Inference Wrapper (drop-in for ActuarialPricer)
-# ══════════════════════════════════════════════════════════════════════════
-class NeuralActuarialPricer:
-    """
-    Drop-in replacement for ActuarialPricer.
-    Preserves the same price_zone() signature and returns ActuarialResult.
-    Three-tier fallback: Chronos encoder → LSTM encoder → GLM baseline.
-    """
-    def __init__(
-        self,
-        admin_rate: float = 0.15,
-    ):
-        self.admin_rate = admin_rate
-        self._fallback = ActuarialPricer(admin_rate)
-        self._model: Optional[object] = None
-        self._norm: Optional[dict] = None
-        self._chronos_pipeline = None
-        self._encoder_type = "glm"
-        self._trigger_head = None
-        if not TORCH_AVAILABLE:
-            log.warning("torch not available, using fallback pricer")
-            return
-        # Tier 1: Try Chronos encoder
-        chronos_mp = PROJECT_ROOT / "models" / "chronos_pricer_dar.pt"
-        chronos_np = PROJECT_ROOT / "models" / "chronos_pricer_dar_norm.json"
-        if CHRONOS_AVAILABLE and chronos_mp.exists() and chronos_np.exists():
-            try:
-                with open(chronos_np) as f:
-                    self._norm = json.load(f)
-                d_model = self._norm.get("chronos_d_model", 256)
-                self._model = HeatRiskNeuralPricerChronos(chronos_d_model=d_model)
-                self._model.load_state_dict(
-                    torch.load(chronos_mp, map_location="cpu", weights_only=True)
-                )
-                self._model.eval()
-                # Load Chronos pipeline
-                self._chronos_pipeline = ChronosBoltPipeline.from_pretrained(
-                    "amazon/chronos-bolt-tiny", device_map="cpu",
-                    dtype=torch.float32,
-                )
-                # Wire up the encoder
-                feat_mean = np.array(self._norm["mean"], dtype=np.float32)
-                feat_std = np.array(self._norm["std"], dtype=np.float32)
-                self._model.encoder.set_pipeline(
-                    self._chronos_pipeline, feat_mean, feat_std
-                )
-                self._encoder_type = "chronos"
-                log.info("Chronos neural actuarial pricer loaded (d_model=%d)", d_model)
-                # Load retrained trigger head (benchmark-driven, event-level)
-                trigger_path = PROJECT_ROOT / "models" / "trigger_head_retrained.pt"
-                if trigger_path.exists():
-                    try:
-                        ckpt = torch.load(trigger_path, map_location="cpu", weights_only=True)
-                        d_in = ckpt["d_model"]
-                        class _TriggerHead(torch.nn.Module):
-                            def __init__(self, d_in):
-                                super().__init__()
-                                self.net = torch.nn.Sequential(
-                                    torch.nn.Linear(d_in, 128), torch.nn.GELU(), torch.nn.Dropout(0.3),
-                                    torch.nn.Linear(128, 64), torch.nn.GELU(), torch.nn.Dropout(0.2),
-                                    torch.nn.Linear(64, 3),
-                                )
-                            def forward(self, x):
-                                return self.net(x)
-                        self._trigger_head = _TriggerHead(d_in)
-                        self._trigger_head.load_state_dict(ckpt["state_dict"])
-                        self._trigger_head.eval()
-                        log.info("Retrained trigger head loaded (d_in=%d)", d_in)
-                    except Exception as e:
-                        log.warning("Retrained trigger head failed to load: %s", e)
-                        self._trigger_head = None
-                return
-            except Exception as e:
-                log.warning("Chronos pricer failed to load: %s — trying LSTM", e)
-                self._model = None
-                self._norm = None
-                self._chronos_pipeline = None
-        # Tier 2: Try LSTM encoder
-        lstm_mp = PROJECT_ROOT / "models" / "neural_pricer_dar.pt"
-        lstm_np = PROJECT_ROOT / "models" / "neural_pricer_dar_norm.json"
-        if lstm_mp.exists() and lstm_np.exists():
-            try:
-                self._model = HeatRiskNeuralPricer()
-                self._model.load_state_dict(
-                    torch.load(lstm_mp, map_location="cpu", weights_only=True)
-                )
-                self._model.eval()
-                with open(lstm_np) as f:
-                    self._norm = json.load(f)
-                self._encoder_type = "lstm"
-                log.info("LSTM neural actuarial pricer loaded from %s", lstm_mp)
-            except Exception as e:
-                log.warning("LSTM pricer failed to load: %s — using GLM fallback", e)
-                self._model = None
-        else:
-            log.info("No neural pricer weights found, using GLM fallback")
-    @property
-    def is_neural(self) -> bool:
-        return self._model is not None
-    def price_zone(
-        self,
-        zone,
-        predicted_frequency: float,
-        basis_risk_score: float,
-        payout_per_event: float,
-        enrolled: int,
-        climate_history: Optional[list[dict]] = None,
-    ) -> ActuarialResult:
-        """
-        Price a zone using the neural model if available.
-        Args:
-            zone: UrbanZone from config
-            predicted_frequency: annual trigger events (used by fallback)
-            basis_risk_score: 0-1 (used by fallback)
-            payout_per_event: USD per event per worker
-            enrolled: number of workers
-            climate_history: optional list of daily dicts with
-                temp_max_c, temp_min_c, humidity_pct, wind_speed_ms, etc.
-                If provided and model is loaded, uses neural pricing.
-        """
-        if self._model is None or climate_history is None or len(climate_history) < 30:
-            return self._fallback.price_zone(
-                zone, predicted_frequency, basis_risk_score,
-                payout_per_event, enrolled,
-            )
-        try:
-            return self._neural_price(
-                zone, payout_per_event, enrolled, climate_history
-            )
-        except Exception as e:
-            log.warning("Neural pricing failed for %s, using fallback: %s",
-                       zone.zone_id, e)
-            return self._fallback.price_zone(
-                zone, predicted_frequency, basis_risk_score,
-                payout_per_event, enrolled,
-            )
-    def _neural_price(
-        self, zone, payout_per_event: float, enrolled: int,
-        climate_history: list[dict],
-    ) -> ActuarialResult:
-        """Run the neural model and construct ActuarialResult."""
-        # Build feature sequence (last 90 days)
-        # Must match build_training_samples zone_static encoding
-        vuln_enc = {"high": 1.0, "moderate": 0.5, "low": 0.0}
-        zone_static = [
-            vuln_enc.get(zone.heat_vulnerability, 0.5),
-            zone.outdoor_exposure_pct,
-            zone.elevation_m / 2000.0,
-            0.0,
-        ]
-        history = climate_history[-90:]
-        if len(history) < 90:
-            # Pad with first record
-            pad = [history[0]] * (90 - len(history))
-            history = pad + history
-        # Apply UHI correction (same as training)
-        uhi_lo, uhi_hi = UHI_RANGES.get(zone.settlement_type, (1.0, 2.0))
-        mean_uhi = (uhi_lo + uhi_hi) / 2.0
-        seq = []
-        for day in history:
-            t_max_grid = day.get("temp_max_c") or day.get("temp_c") or 30.0
-            t_max = float(t_max_grid) + mean_uhi  # UHI-corrected
-            t_min = day.get("temp_min_c") or t_max - 6.0
-            hum = day.get("humidity_pct") or 75.0
-            wind = day.get("wind_speed_ms") or 3.0
-            solar = day.get("solar_rad_wm2") or 200.0
-            precip = day.get("precip_mm") or 0.0
-            wbgt = calculate_wbgt(t_max, float(hum))
-            seq.append([
-                t_max / 40.0, float(t_min) / 30.0,
-                float(hum) / 100.0, float(wind) / 10.0,
-                float(solar) / 400.0, float(precip) / 50.0,
-                wbgt / 40.0,
-            ] + zone_static)
-        x = np.array([seq], dtype=np.float32)
-        # Normalize
-        if self._norm:
-            mean = np.array(self._norm["mean"], dtype=np.float32)
-            std = np.array(self._norm["std"], dtype=np.float32)
-            x = (x - mean) / std
-        x_tensor = torch.from_numpy(x)
-        # For Chronos encoder: compute embedding from raw WBGT
-        chronos_emb = None
-        if self._encoder_type == "chronos" and self._chronos_pipeline is not None:
-            wbgt_seq = np.array(
-                [calculate_wbgt(
-                    float(day.get("temp_max_c") or day.get("temp_c") or 30.0)
-                    + mean_uhi,
-                    float(day.get("humidity_pct") or 75.0),
-                ) for day in history],
-                dtype=np.float32,
-            )
-            wbgt_tensor = torch.from_numpy(wbgt_seq).unsqueeze(0)  # (1, 90)
-            with torch.no_grad():
-                emb, _ = self._chronos_pipeline.embed(wbgt_tensor)
-            chronos_emb = emb.mean(dim=1)  # (1, 256)
-        with torch.no_grad():
-            outputs = self._model(
-                x_tensor, payout_per_event, self.admin_rate,
-                **({} if chronos_emb is None else {"chronos_embeddings": chronos_emb})
-            )
-        # Extract values
-        lambda_ = outputs["lambda_"].item()
-        sigma = outputs["sigma"].item()
-        xi = outputs["xi"].item()
-        alert_prob = outputs["alert_prob"].item()
-        payout_prob = outputs["payout_prob"].item()
-        alert_severity = outputs["alert_severity"].item()
-        payout_severity = outputs["payout_severity"].item()
-        # Override alert/payout probabilities with the retrained trigger head
-        # if it's available. The retrained head was trained on event-level labels
-        # (benchmark-driven: 8% → 28% hit rate on insurance trigger benchmark).
-        # Pricing math (lambda, sigma, xi, severity) is NOT affected.
-        if self._trigger_head is not None and chronos_emb is not None:
-            try:
-                ALERT_THRESH = 35.1
-                last_7 = [calculate_wbgt(
-                    float(d.get("temp_max_c") or d.get("temp_c") or 30.0) + mean_uhi,
-                    float(d.get("humidity_pct") or 75.0),
-                ) for d in history[-7:]]
-                last_14 = [calculate_wbgt(
-                    float(d.get("temp_max_c") or d.get("temp_c") or 30.0) + mean_uhi,
-                    float(d.get("humidity_pct") or 75.0),
-                ) for d in history[-14:]]
-                all_wbgts = [calculate_wbgt(
-                    float(d.get("temp_max_c") or d.get("temp_c") or 30.0) + mean_uhi,
-                    float(d.get("humidity_pct") or 75.0),
-                ) for d in history[:30]]
-                extra = torch.tensor([[
-                    float(np.mean(last_7)), float(np.mean(last_14)),
-                    max(last_7) - min(last_7),
-                    sum(1 for w in last_14 if w >= ALERT_THRESH),
-                    float(np.mean(last_7)) - float(np.mean(all_wbgts)) if all_wbgts else 0.0,
-                    max(last_7),
-                ]], dtype=torch.float32)
-                trigger_input = torch.cat([chronos_emb, extra], dim=1)
-                with torch.no_grad():
-                    trigger_logits = self._trigger_head(trigger_input)
-                    trigger_probs = torch.softmax(trigger_logits, dim=1)[0]
-                # Map 3-class probs to alert/payout probs
-                alert_prob = float(trigger_probs[1] + trigger_probs[2])  # alert OR payout
-                payout_prob = float(trigger_probs[2])  # payout only
-            except Exception:
-                pass  # fall back to original model outputs
-        prod_loss = outputs["productivity_loss"].item()
-        neural_basis_risk = outputs["basis_risk"].item()
-        severity_mult = outputs["severity_multiplier"].item()
-        delta_nn = outputs["delta_nn"].item()
-        total_per_worker = outputs["total_per_worker"].item()
-        glm_price = outputs["glm_price"].item()
-        enrolled = max(enrolled, 1)
-        # Decompose for transparency
-        base_cost = lambda_ * payout_per_event * enrolled
-        basis_loading = base_cost * (neural_basis_risk * 0.5)
-        vuln_loading = base_cost * (prod_loss * 0.2)
-        subtotal = base_cost + basis_loading + vuln_loading
-        admin_loading = subtotal * self.admin_rate
-        neural_correction_pct = (total_per_worker / (glm_price + 1e-8) - 1.0) * 100
-        cost_breakdown = {
-            "base_frequency_cost": round(base_cost, 2),
-            "basis_risk_adjustment": round(basis_loading, 2),
-            "vulnerability_adjustment": round(vuln_loading, 2),
-            "admin_overhead": round(admin_loading, 2),
-            "total": round(total_per_worker * enrolled, 2),
-            "neural_correction_pct": round(neural_correction_pct, 1),
-            "glm_baseline_per_worker": round(glm_price, 2),
-            "neural_price_per_worker": round(total_per_worker, 2),
-            "gpd_shape_xi": round(xi, 3),
-            "gpd_scale_sigma": round(sigma, 3),
-            "learned_frequency": round(lambda_, 1),
-            "alert_prob": round(alert_prob, 3),
-            "payout_prob": round(payout_prob, 3),
-            "alert_severity": round(alert_severity, 3),
-            "payout_severity": round(payout_severity, 3),
-            # Backward compat
-            "trigger_prob": round(alert_prob, 3),
-            "payout_factor": round(payout_severity, 3),
-            # Funding decomposition (SEWA pilot structure)
-            # Cash tier: $2-5 per event, ~12 events/year
-            "cash_per_event": round(2.0 + alert_severity * 3.0, 2),
-            # Insurance tier: $7-20 per event, ~3 events/year
-            "insurance_per_event": round(7.0 + payout_severity * 13.0, 2),
-            # Annual costs by tier
-            "annual_cash_cost": round((2.0 + alert_severity * 3.0) * min(lambda_, 15), 2),
-            "annual_insurance_cost": round((7.0 + payout_severity * 13.0) * max(0, lambda_ - 10) * 0.3, 2),
-            # Worker pays max $3/year (capped based on informal daily wage ~$3-5)
-            "worker_contribution": min(3.0, round(total_per_worker * 0.15, 2)),
-            # Philanthropy covers cash tier + vulnerability gap
-            "philanthropy_share": round(total_per_worker * 0.45, 2),
-            # Insurer covers remainder
-            "insurer_premium": round(total_per_worker * 0.40, 2),
-            "learned_basis_risk": round(neural_basis_risk, 3),
-            "productivity_loss_rate": round(prod_loss, 3),
-            "severity_multiplier": round(severity_mult, 3),
-            "explanation": (
-                f"{zone.name}: Neural EVT predicts {lambda_:.1f} events/year "
-                f"(GPD ξ={xi:.2f}, σ={sigma:.1f}), "
-                f"learned basis risk {neural_basis_risk:.0%}, "
-                f"WHO productivity loss {prod_loss:.0%}, "
-                f"neural correction {neural_correction_pct:+.1f}% vs GLM"
-            ),
-        }
-        return ActuarialResult(
-            zone_id=zone.zone_id,
-            zone_name=zone.name,
-            city=zone.city,
-            cost_per_worker_year=round(total_per_worker, 2),
-            expected_annual_payouts=round(base_cost, 2),
-            frequency_component=round(lambda_ * payout_per_event, 2),
-            basis_risk_loading=round(basis_loading, 2),
-            vulnerability_loading=round(vuln_loading, 2),
-            admin_loading=round(admin_loading, 2),
-            cost_breakdown=cost_breakdown,
-            enrolled_workers=enrolled,
-        )

tests/eval_heat_predictor.py DELETED Viewed

@@ -1,157 +0,0 @@
-"""Evaluate heat wave prediction models."""
-import json
-import os
-import numpy as np
-import pytest
-from sklearn.metrics import roc_auc_score, precision_score, recall_score
-from src.prediction.heat_forecast import HeatWavePredictor, CITY_THRESHOLDS
-from config import ZONES
-def _generate_test_data(zone, n_days=365, seed=123):
-    """Generate synthetic test data with known trigger labels."""
-    rng = np.random.RandomState(seed)
-    # Use the city threshold from the model's own config
-    threshold = CITY_THRESHOLDS.get(zone.city, 33.0)
-    temps = []
-    humidities = []
-    wbgts = []
-    # Seasonal pattern + noise
-    for day in range(n_days):
-        seasonal = 3 * np.sin(2 * np.pi * day / 365)
-        t = threshold - 2 + seasonal + rng.randn() * 3
-        h = 65 + rng.randn() * 10
-        w = 0.7 * t + 0.3 * h * 0.3 - 10  # simplified WBGT
-        temps.append(t)
-        humidities.append(max(20, min(100, h)))
-        wbgts.append(w)
-    # Generate ground-truth labels (trigger if 2+ consecutive days above threshold in next 7)
-    labels = []
-    for i in range(n_days - 7):
-        future = temps[i + 1:i + 8]
-        consecutive = 0
-        max_consec = 0
-        for t in future:
-            if t >= threshold:
-                consecutive += 1
-                max_consec = max(max_consec, consecutive)
-            else:
-                consecutive = 0
-        labels.append(1 if max_consec >= 2 else 0)
-    return temps, humidities, wbgts, labels
-def test_predictor_output_valid():
-    """Predictions should be valid probabilities with confidence."""
-    predictor = HeatWavePredictor()
-    zone = ZONES[0]
-    temps = [30 + np.random.randn() * 3 for _ in range(30)]
-    humidity = [70 + np.random.randn() * 5 for _ in range(30)]
-    wbgt = [28 + np.random.randn() * 2 for _ in range(30)]
-    prob, conf, tier = predictor.predict(zone, temps, humidity, wbgt)
-    assert 0 <= prob <= 1, f"Probability {prob} out of [0,1]"
-    assert 0 <= conf <= 1, f"Confidence {conf} out of [0,1]"
-    assert tier in ("ensemble", "full_model", "lstm_only", "persistence", "climatology")
-def test_predictor_tier_fallback():
-    """Test that minimal data degrades to a fallback tier with lower confidence."""
-    predictor = HeatWavePredictor()
-    zone = ZONES[0]
-    # Full data -> should get full_model, ensemble, or lstm_only
-    full_temps = [30 + np.random.randn() * 3 for _ in range(90)]
-    full_hum = [70 + np.random.randn() * 5 for _ in range(90)]
-    full_wbgt = [28 + np.random.randn() * 2 for _ in range(90)]
-    prob, conf, tier = predictor.predict(zone, full_temps, full_hum, full_wbgt)
-    assert tier in ("ensemble", "full_model", "lstm_only")
-    # Minimal data -> should fall back to persistence or climatology
-    min_temps = [30, 31, 32]
-    min_hum = [70, 70, 70]
-    min_wbgt = [28, 28, 28]
-    prob2, conf2, tier2 = predictor.predict(zone, min_temps, min_hum, min_wbgt)
-    assert tier2 in ("persistence", "climatology", "ensemble", "full_model", "lstm_only")
-    # Less data should generally mean equal or less confidence
-    assert conf2 <= conf + 0.1, f"Minimal-data confidence ({conf2}) should not greatly exceed full-data ({conf})"
-def test_predictor_discrimination():
-    """Model should assign higher probability to hot sequences."""
-    predictor = HeatWavePredictor()
-    zone = ZONES[0]
-    # Hot sequence (should trigger)
-    hot = [36 + i * 0.2 for i in range(30)]
-    hot_hum = [80] * 30
-    hot_wbgt = [32 + i * 0.1 for i in range(30)]
-    # Cool sequence (should not trigger)
-    cool = [22 + np.sin(i / 5) for i in range(30)]
-    cool_hum = [50] * 30
-    cool_wbgt = [20 + np.sin(i / 5) for i in range(30)]
-    p_hot, _, _ = predictor.predict(zone, hot, hot_hum, hot_wbgt)
-    p_cool, _, _ = predictor.predict(zone, cool, cool_hum, cool_wbgt)
-    assert p_hot > p_cool, f"Hot prob ({p_hot:.3f}) should > cool prob ({p_cool:.3f})"
-def test_predictor_metrics():
-    """Compute AUROC and calibration on synthetic held-out data."""
-    predictor = HeatWavePredictor()
-    results = {}
-    # Sample one zone per city
-    seen_cities = set()
-    sample_zones = []
-    for z in ZONES:
-        if z.city not in seen_cities:
-            sample_zones.append(z)
-            seen_cities.add(z.city)
-    for zone in sample_zones:
-        temps, humidities, wbgts, labels = _generate_test_data(zone)
-        predictions = []
-        for i in range(30, len(labels)):
-            prob, _, tier = predictor.predict(
-                zone, temps[i - 30:i], humidities[i - 30:i], wbgts[i - 30:i]
-            )
-            predictions.append(prob)
-        # Align labels with predictions
-        y_true = labels[30:30 + len(predictions)]
-        y_pred = predictions[:len(y_true)]
-        if len(set(y_true)) > 1:  # need both classes for AUROC
-            auroc = roc_auc_score(y_true, y_pred)
-            binary = [1 if p > 0.5 else 0 for p in y_pred]
-            precision = precision_score(y_true, binary, zero_division=0)
-            recall = recall_score(y_true, binary, zero_division=0)
-        else:
-            auroc = float('nan')
-            precision = float('nan')
-            recall = float('nan')
-        results[zone.zone_id] = {
-            "city": zone.city,
-            "auroc": round(auroc, 3) if not np.isnan(auroc) else None,
-            "precision": round(precision, 3) if not np.isnan(precision) else None,
-            "recall": round(recall, 3) if not np.isnan(recall) else None,
-            "n_samples": len(y_true),
-            "positive_rate": round(sum(y_true) / len(y_true), 3) if y_true else 0,
-        }
-    os.makedirs("tests/eval_results", exist_ok=True)
-    with open("tests/eval_results/heat_predictor_eval.json", "w") as f:
-        json.dump(results, f, indent=2)
-    # At least one zone should have AUROC > 0.5 (better than random)
-    valid_aurocs = [r["auroc"] for r in results.values() if r["auroc"] is not None]
-    assert any(a > 0.5 for a in valid_aurocs), f"No zone has AUROC > 0.5: {valid_aurocs}"

tests/eval_neural_pricer.py DELETED Viewed

@@ -1,303 +0,0 @@
-"""Evaluate neural actuarial pricing model against GLM baseline and sanity checks."""
-import json
-import os
-import numpy as np
-import pytest
-from scipy.stats import spearmanr
-from pathlib import Path
-from config import ZONES, ZONE_MAP, PRIMARY_CITY, PRIMARY_CITY_SLUG
-from src.pricing.neural_actuarial import NeuralActuarialPricer
-from src.pricing.actuarial import ActuarialPricer, ActuarialResult
-PROJECT_ROOT = Path(__file__).resolve().parents[1]
-ERA5_PATH = PROJECT_ROOT / "data" / f"era5land_{PRIMARY_CITY_SLUG}.json"
-ACTIVE_ZONES = [z for z in ZONES if z.city == PRIMARY_CITY]
-# Default pricing parameters
-PAYOUT_PER_EVENT = 10.0
-DEFAULT_FREQUENCY = 12.0
-DEFAULT_BASIS_RISK = 0.3
-def _load_era5_history() -> dict[str, list[dict]]:
-    """Load ERA5-Land data and return 90 days of history per primary-city zone."""
-    with open(ERA5_PATH) as f:
-        raw = json.load(f)
-    # Take the last 90 days for each zone
-    return {zid: records[-90:] for zid, records in raw.items()}
-@pytest.fixture(scope="module")
-def era5_history():
-    return _load_era5_history()
-@pytest.fixture(scope="module")
-def neural_pricer():
-    return NeuralActuarialPricer()
-@pytest.fixture(scope="module")
-def glm_pricer():
-    return ActuarialPricer()
-@pytest.fixture(scope="module")
-def neural_results(neural_pricer, era5_history):
-    """Price all primary-city zones with the neural model."""
-    results = {}
-    for zone in ACTIVE_ZONES:
-        history = era5_history.get(zone.zone_id)
-        r = neural_pricer.price_zone(
-            zone=zone,
-            predicted_frequency=DEFAULT_FREQUENCY,
-            basis_risk_score=DEFAULT_BASIS_RISK,
-            payout_per_event=PAYOUT_PER_EVENT,
-            enrolled=zone.worker_population_est,
-            climate_history=history,
-        )
-        results[zone.zone_id] = r
-    return results
-@pytest.fixture(scope="module")
-def glm_results(glm_pricer):
-    """Price all primary-city zones with the GLM baseline."""
-    results = {}
-    for zone in ACTIVE_ZONES:
-        r = glm_pricer.price_zone(
-            zone=zone,
-            predicted_frequency=DEFAULT_FREQUENCY,
-            basis_risk_score=DEFAULT_BASIS_RISK,
-            payout_per_event=PAYOUT_PER_EVENT,
-            enrolled=zone.worker_population_est,
-        )
-        results[zone.zone_id] = r
-    return results
-# ── Test 1: Model loads ──────────────────────────────────────────────────
-def test_neural_model_loads(neural_pricer):
-    """NeuralActuarialPricer should load the trained PyTorch model."""
-    assert neural_pricer._model is not None, (
-        "Neural model failed to load — check models/neural_pricer_dar.pt exists"
-    )
-    assert neural_pricer.is_neural
-# ── Test 2: Price accuracy vs GLM ────────────────────────────────────────
-def test_price_accuracy_vs_glm(neural_results, glm_results):
-    """Neural prices should correlate with GLM and have bounded per-zone divergence.
-    The neural model learns frequencies from 20 years of ERA5-Land data while the
-    GLM uses a fixed default frequency, so absolute levels differ. We test that:
-    (a) neural prices are all positive and finite, and
-    (b) the coefficient of variation within neural prices is within 20% MAPE
-        of the CV within GLM prices (structural similarity).
-    """
-    neural_prices = []
-    glm_prices = []
-    details = {}
-    for zone in ACTIVE_ZONES:
-        zid = zone.zone_id
-        neural_price = neural_results[zid].cost_per_worker_year
-        glm_price = glm_results[zid].cost_per_worker_year
-        neural_prices.append(neural_price)
-        glm_prices.append(glm_price)
-        details[zid] = {
-            "neural": round(neural_price, 2),
-            "glm": round(glm_price, 2),
-        }
-    # All neural prices should be positive and finite
-    for zid, p in zip([z.zone_id for z in ACTIVE_ZONES], neural_prices):
-        assert p > 0 and np.isfinite(p), f"{zid}: invalid neural price {p}"
-    # Compare relative spread: CV(neural) vs CV(glm)
-    cv_neural = float(np.std(neural_prices) / np.mean(neural_prices))
-    cv_glm = float(np.std(glm_prices) / np.mean(glm_prices))
-    details["cv_neural"] = round(cv_neural, 3)
-    details["cv_glm"] = round(cv_glm, 3)
-    # Save results
-    os.makedirs("tests/eval_results", exist_ok=True)
-    with open("tests/eval_results/neural_pricer_eval.json", "w") as f:
-        json.dump(details, f, indent=2)
-    # Neural model should have meaningful price variation across zones (CV > 0.01)
-    assert cv_neural > 0.01, (
-        f"Neural prices have negligible variation (CV={cv_neural:.3f}) — model may be collapsing"
-    )
-# ── Test 3: Rank preservation ───────────────────────────────────────────���
-def test_rank_preservation(neural_results, glm_results):
-    """Spearman rank correlation between neural and GLM zone rankings should be positive.
-    The neural model learns from real climate data while GLM uses fixed inputs, so
-    some rank reordering is expected. We require rho > 0.4 (moderate positive
-    correlation) — both models should agree on broad risk ordering.
-    """
-    zone_ids = [z.zone_id for z in ACTIVE_ZONES]
-    neural_prices = [neural_results[zid].cost_per_worker_year for zid in zone_ids]
-    glm_prices = [glm_results[zid].cost_per_worker_year for zid in zone_ids]
-    rho, pval = spearmanr(neural_prices, glm_prices)
-    assert rho > 0.4, (
-        f"Spearman correlation {rho:.3f} below 0.4 — neural model disagrees "
-        f"too strongly with GLM on zone risk ordering"
-    )
-# ── Test 4: Neural correction bounded ───────────────────────────────────
-def test_delta_nn_bounded(neural_results):
-    """Neural correction delta should be within [-50, 50]% and mean near 0."""
-    corrections = []
-    for zone in ACTIVE_ZONES:
-        zid = zone.zone_id
-        breakdown = neural_results[zid].cost_breakdown
-        correction = breakdown["neural_correction_pct"]
-        corrections.append(correction)
-        assert -50 <= correction <= 50, (
-            f"{zid}: neural_correction_pct {correction:.1f}% outside [-50, 50]"
-        )
-    mean_correction = float(np.mean(corrections))
-    # Mean correction should be within the bounded range (not saturating at limits)
-    assert abs(mean_correction) < 45, (
-        f"Mean neural correction {mean_correction:.1f}% — model is saturating "
-        f"at the correction boundary"
-    )
-# ── Test 5: Informal settlements priced higher ──────────────────────────
-def test_informal_priced_higher(neural_results):
-    """Average price for informal settlement zones should exceed formal zones."""
-    informal_prices = []
-    formal_prices = []
-    for zone in ACTIVE_ZONES:
-        price = neural_results[zone.zone_id].cost_per_worker_year
-        if zone.settlement_type == "informal":
-            informal_prices.append(price)
-        elif zone.settlement_type == "formal":
-            formal_prices.append(price)
-    assert len(informal_prices) > 0 and len(formal_prices) > 0, (
-        f"Need both informal and formal zones in {PRIMARY_CITY}"
-    )
-    mean_informal = float(np.mean(informal_prices))
-    mean_formal = float(np.mean(formal_prices))
-    assert mean_informal > mean_formal, (
-        f"Informal mean (${mean_informal:.2f}) should exceed "
-        f"formal mean (${mean_formal:.2f})"
-    )
-# ── Test 6: Hazard parameters valid ─────────────────────────────────────
-def test_hazard_parameters_valid(neural_results):
-    """GPD shape xi in [-0.4, 0.4] and scale sigma in [0.1, 10] for all zones."""
-    for zone in ACTIVE_ZONES:
-        zid = zone.zone_id
-        breakdown = neural_results[zid].cost_breakdown
-        xi = breakdown["gpd_shape_xi"]
-        sigma = breakdown["gpd_scale_sigma"]
-        assert -0.4 <= xi <= 0.4, (
-            f"{zid}: GPD shape xi={xi:.3f} outside [-0.4, 0.4]"
-        )
-        assert 0.1 <= sigma <= 10.0, (
-            f"{zid}: GPD scale sigma={sigma:.3f} outside [0.1, 10]"
-        )
-# ── Test 7: Fallback without climate history ─────────────────────────────
-def test_fallback_without_climate_history(neural_pricer, glm_pricer):
-    """Calling price_zone() with climate_history=None should return valid GLM result."""
-    zone = ACTIVE_ZONES[0]
-    result = neural_pricer.price_zone(
-        zone=zone,
-        predicted_frequency=DEFAULT_FREQUENCY,
-        basis_risk_score=DEFAULT_BASIS_RISK,
-        payout_per_event=PAYOUT_PER_EVENT,
-        enrolled=zone.worker_population_est,
-        climate_history=None,
-    )
-    glm_result = glm_pricer.price_zone(
-        zone=zone,
-        predicted_frequency=DEFAULT_FREQUENCY,
-        basis_risk_score=DEFAULT_BASIS_RISK,
-        payout_per_event=PAYOUT_PER_EVENT,
-        enrolled=zone.worker_population_est,
-    )
-    assert isinstance(result, ActuarialResult)
-    assert result.cost_per_worker_year > 0, "Fallback price should be positive"
-    # Should match GLM exactly when no climate history
-    assert result.cost_per_worker_year == glm_result.cost_per_worker_year, (
-        f"Fallback price ${result.cost_per_worker_year} != GLM ${glm_result.cost_per_worker_year}"
-    )
-    # Should NOT contain neural-specific keys
-    assert "neural_correction_pct" not in result.cost_breakdown
-# ── Test 8: Climate sensitivity ──────────────────────────────────────────
-def test_climate_sensitivity(neural_pricer, neural_results, era5_history):
-    """Perturbing temperatures up by 2C should increase the price for high-risk zones."""
-    high_risk_zones = [z for z in ACTIVE_ZONES if z.heat_vulnerability == "high"]
-    assert len(high_risk_zones) >= 3, "Need at least 3 high-risk zones"
-    price_increases = 0
-    details = {}
-    for zone in high_risk_zones:
-        history = era5_history.get(zone.zone_id)
-        if history is None:
-            continue
-        # Reuse baseline from neural_results fixture (avoids redundant LSTM forward pass)
-        baseline = neural_results[zone.zone_id]
-        # Perturbed: +2C on all temperature fields
-        perturbed = []
-        for day in history:
-            d = dict(day)
-            d["temp_max_c"] = d.get("temp_max_c", 30.0) + 2.0
-            d["temp_min_c"] = d.get("temp_min_c", 24.0) + 2.0
-            perturbed.append(d)
-        warmer = neural_pricer.price_zone(
-            zone=zone,
-            predicted_frequency=DEFAULT_FREQUENCY,
-            basis_risk_score=DEFAULT_BASIS_RISK,
-            payout_per_event=PAYOUT_PER_EVENT,
-            enrolled=zone.worker_population_est,
-            climate_history=perturbed,
-        )
-        if warmer.cost_per_worker_year > baseline.cost_per_worker_year:
-            price_increases += 1
-        details[zone.zone_id] = {
-            "baseline": round(baseline.cost_per_worker_year, 2),
-            "warmer_2c": round(warmer.cost_per_worker_year, 2),
-            "increased": warmer.cost_per_worker_year > baseline.cost_per_worker_year,
-        }
-    # At least some high-risk zones should show a price increase.
-    # The model may already be in a saturated regime for the hottest informal
-    # zones (where WBGT is near the ceiling), so we require at least 3 zones.
-    assert price_increases >= 3, (
-        f"Only {price_increases}/{len(high_risk_zones)} high-risk zones showed "
-        f"price increase with +2C warming. Details: {details}"
-    )