"""Empirical burn analysis for parametric heat insurance pricing. Industry-standard actuarial approach: apply the trigger rules to 20 years of historical weather data, count how often each tier would have paid out, and price accordingly. This is how ARC, CCRIF, and the SEWA/Arsht-Rockefeller pilot actually set premiums — no neural network, just historical frequency × expected payout × loading factors. Usage: from src.pricing.burn_analysis import BurnAnalysisPricer pricer = BurnAnalysisPricer() result = pricer.price_zone(zone) """ from __future__ import annotations import json import logging from dataclasses import dataclass from pathlib import Path from config import UrbanZone, PRIMARY_CITY_SLUG from src.downscaling import get_zone_uhi_range from src.indexing.heat_index import calculate_wbgt from src.pricing.actuarial import ActuarialResult log = logging.getLogger(__name__) PROJECT_ROOT = Path(__file__).resolve().parents[2] # Trigger thresholds (calibrated P97 from 20 years ERA5-Land at Dar es Salaam) WBGT_THRESHOLD_C = 35.1 ALERT_CONSECUTIVE_DAYS = 2 PAYOUT_CONSECUTIVE_DAYS = 5 # Payout amounts per tier (SEWA pilot structure) ALERT_PAYOUT_USD = 3.50 # cash transfer + safety SMS FULL_PAYOUT_USD = 14.00 # full insurance payout # Loading factors ADMIN_RATE = 0.15 # 15% administrative overhead (industry standard) CONTINGENCY_RATE = 0.05 # 5% contingency for model/data uncertainty INFLATION_BUFFER = 1.05 # 5% inflation reserve # Funding split (SEWA pilot) WORKER_CAP_USD = 3.00 # max worker contribution per year PHILANTHROPY_SHARE = 0.45 # Gates/Arsht-Rock subsidy INSURER_SHARE = 0.40 # commercial insurance @dataclass class BurnResult: """Burn analysis result for a single zone.""" zone_id: str years_analyzed: int alert_events_total: int payout_events_total: int alert_freq_per_year: float payout_freq_per_year: float mean_alert_duration_days: float mean_payout_duration_days: float expected_annual_loss_usd: float basis_risk_score: float def _load_era5_data() -> dict[str, list[dict]]: """Load ERA5-Land daily records for the primary city.""" path = PROJECT_ROOT / "data" / f"era5land_{PRIMARY_CITY_SLUG}.json" return json.loads(path.read_text()) def compute_burn( daily_records: list[dict], uhi_delta_c: float, threshold_c: float = WBGT_THRESHOLD_C, ) -> BurnResult: """Run burn analysis on daily records for one zone. Applies UHI correction, computes WBGT, counts alert and payout events over the full history. Args: daily_records: List of daily dicts with temp_max_c, humidity_pct, date. uhi_delta_c: Mean UHI temperature correction for this zone (°C). threshold_c: WBGT threshold for triggering (default 35.1°C). Returns: BurnResult with event counts and frequencies. """ # Compute UHI-corrected WBGT for each day wbgts = [] for day in daily_records: temp = (day.get("temp_max_c") or 30.0) + uhi_delta_c hum = day.get("humidity_pct") or 70.0 wbgts.append(calculate_wbgt(temp, hum)) # Count events: consecutive runs above threshold. # Tiers are EXCLUSIVE — a 5+ day event is a payout event, not also an alert. # This matches the trigger logic (returns one action, not both). alert_only_durations = [] # 2-4 consecutive days (alert tier only) payout_durations = [] # 5+ consecutive days (full payout tier) run_length = 0 for w in wbgts: if w >= threshold_c: run_length += 1 else: if run_length >= PAYOUT_CONSECUTIVE_DAYS: payout_durations.append(run_length) elif run_length >= ALERT_CONSECUTIVE_DAYS: alert_only_durations.append(run_length) run_length = 0 # Handle trailing run if run_length >= PAYOUT_CONSECUTIVE_DAYS: payout_durations.append(run_length) elif run_length >= ALERT_CONSECUTIVE_DAYS: alert_only_durations.append(run_length) n_years = len(daily_records) / 365.25 return BurnResult( zone_id="", # filled by caller years_analyzed=round(n_years), alert_events_total=len(alert_only_durations), payout_events_total=len(payout_durations), alert_freq_per_year=len(alert_only_durations) / n_years if n_years > 0 else 0, payout_freq_per_year=len(payout_durations) / n_years if n_years > 0 else 0, mean_alert_duration_days=( sum(alert_only_durations) / len(alert_only_durations) if alert_only_durations else 0 ), mean_payout_duration_days=( sum(payout_durations) / len(payout_durations) if payout_durations else 0 ), expected_annual_loss_usd=0, # computed below basis_risk_score=0, # computed below ) def _basis_risk_for_zone(zone: UrbanZone, uhi_delta_c: float) -> float: """Estimate basis risk from UHI uncertainty. Basis risk arises because the index (ERA5 grid + UHI model) doesn't perfectly match actual conditions at each worker's location. Higher UHI correction = more uncertainty = higher basis risk. Returns a score 0-1 where higher means more basis risk. """ uhi_lo, uhi_hi = get_zone_uhi_range(zone) uhi_range = uhi_hi - uhi_lo # Wider UHI range = more uncertainty about actual conditions # Normalized: informal (range=3) → 0.15, formal (range=1) → 0.05 uncertainty_component = min(0.20, uhi_range * 0.05) # Outdoor exposure amplifies basis risk — workers in shade are # less affected by index errors exposure_component = zone.outdoor_exposure_pct * 0.05 return round(uncertainty_component + exposure_component + 0.05, 3) # 5% floor class BurnAnalysisPricer: """Price parametric heat coverage using empirical burn analysis. Loads 20 years of ERA5-Land data once, runs burn analysis per zone with UHI correction, and computes loaded premiums. """ def __init__(self): self._era5_data: dict[str, list[dict]] | None = None self._burn_cache: dict[str, BurnResult] = {} def _ensure_loaded(self) -> None: if self._era5_data is None: self._era5_data = _load_era5_data() log.info("Loaded ERA5-Land data: %d zones", len(self._era5_data)) def burn_for_zone(self, zone: UrbanZone) -> BurnResult: """Get or compute burn analysis for a zone.""" if zone.zone_id in self._burn_cache: return self._burn_cache[zone.zone_id] self._ensure_loaded() records = self._era5_data.get(zone.zone_id, []) if not records: log.warning("No ERA5 data for zone %s", zone.zone_id) result = BurnResult( zone_id=zone.zone_id, years_analyzed=0, alert_events_total=0, payout_events_total=0, alert_freq_per_year=0, payout_freq_per_year=0, mean_alert_duration_days=0, mean_payout_duration_days=0, expected_annual_loss_usd=0, basis_risk_score=0.1, ) self._burn_cache[zone.zone_id] = result return result # Get zone-specific UHI correction (UHI_MODEL env var selects synthetic/lst) uhi_lo, uhi_hi = get_zone_uhi_range(zone) mean_uhi = (uhi_lo + uhi_hi) / 2.0 # Zone-specific trigger threshold (THRESHOLD_MODE env var selects # global=35.1°C vs zone_specific=per-zone P90 from local climatology). from src.pricing.zone_thresholds import get_zone_thresholds alert_c, _payout_peak_c = get_zone_thresholds(zone) result = compute_burn(records, mean_uhi, threshold_c=alert_c) result.zone_id = zone.zone_id result.basis_risk_score = _basis_risk_for_zone(zone, mean_uhi) # Expected annual loss result.expected_annual_loss_usd = ( result.alert_freq_per_year * ALERT_PAYOUT_USD + result.payout_freq_per_year * FULL_PAYOUT_USD ) self._burn_cache[zone.zone_id] = result return result def price_zone( self, zone: UrbanZone, **kwargs, ) -> ActuarialResult: """Price coverage for a single zone using burn analysis. Accepts **kwargs for compatibility with NeuralActuarialPricer's interface (predicted_frequency, climate_history, etc. are ignored). """ burn = self.burn_for_zone(zone) enrolled = max(zone.worker_population_est, 1) # Expected annual loss per worker (actuarial fair price) eal_per_worker = burn.expected_annual_loss_usd # Loading factors basis_risk_loading = eal_per_worker * burn.basis_risk_score vulnerability_loading = eal_per_worker * (zone.outdoor_exposure_pct * 0.10) subtotal = eal_per_worker + basis_risk_loading + vulnerability_loading admin_loading = subtotal * ADMIN_RATE contingency = subtotal * CONTINGENCY_RATE loaded_premium = (subtotal + admin_loading + contingency) * INFLATION_BUFFER # Funding split worker_share = min(WORKER_CAP_USD, loaded_premium) remaining = loaded_premium - worker_share philanthropy = remaining * (PHILANTHROPY_SHARE / (PHILANTHROPY_SHARE + INSURER_SHARE)) insurer = remaining - philanthropy cost_breakdown = { # Burn analysis inputs "alert_freq_per_year": round(burn.alert_freq_per_year, 2), "payout_freq_per_year": round(burn.payout_freq_per_year, 2), "alert_payout_usd": ALERT_PAYOUT_USD, "full_payout_usd": FULL_PAYOUT_USD, "years_analyzed": burn.years_analyzed, "alert_events_total": burn.alert_events_total, "payout_events_total": burn.payout_events_total, "mean_alert_duration_days": round(burn.mean_alert_duration_days, 1), "mean_payout_duration_days": round(burn.mean_payout_duration_days, 1), # Pricing components "expected_annual_loss": round(eal_per_worker, 2), "basis_risk_loading": round(basis_risk_loading, 2), "basis_risk_score": burn.basis_risk_score, "vulnerability_loading": round(vulnerability_loading, 2), "admin_loading": round(admin_loading, 2), "contingency_loading": round(contingency, 2), "loaded_premium": round(loaded_premium, 2), # Funding split "worker_contribution": round(worker_share, 2), "philanthropy_share": round(philanthropy, 2), "insurer_premium": round(insurer, 2), # For pipeline DB compatibility "learned_frequency": round( burn.alert_freq_per_year + burn.payout_freq_per_year, 2), "trigger_prob": 0, "payout_factor": round( burn.payout_freq_per_year / max(burn.alert_freq_per_year, 0.1), 2), } total_zone_cost = loaded_premium * enrolled result = ActuarialResult( zone_id=zone.zone_id, zone_name=zone.name, city=zone.city, cost_per_worker_year=round(loaded_premium, 2), expected_annual_payouts=round(burn.expected_annual_loss_usd * enrolled, 2), frequency_component=round(eal_per_worker, 2), basis_risk_loading=round(basis_risk_loading * enrolled, 2), vulnerability_loading=round(vulnerability_loading * enrolled, 2), admin_loading=round((admin_loading + contingency) * enrolled, 2), cost_breakdown=cost_breakdown, enrolled_workers=enrolled, ) log.info( "Priced %s (%s): $%.2f/worker/yr | alert=%.1f/yr payout=%.1f/yr | " "EAL=$%.2f | basis_risk=%.1f%%", zone.name, zone.settlement_type, loaded_premium, burn.alert_freq_per_year, burn.payout_freq_per_year, eal_per_worker, burn.basis_risk_score * 100, ) return result