Spaces:

jtlevine
/

climate-risk-engine

Paused

App Files Files Community

climate-risk-engine / src /pricing /burn_analysis.py

jtlevine

Zone-specific trigger thresholds behind THRESHOLD_MODE env var

9b0be4c about 1 month ago

raw

history blame contribute delete

12.1 kB

	"""Empirical burn analysis for parametric heat insurance pricing.

	Industry-standard actuarial approach: apply the trigger rules to 20 years
	of historical weather data, count how often each tier would have paid out,
	and price accordingly. This is how ARC, CCRIF, and the SEWA/Arsht-Rockefeller
	pilot actually set premiums — no neural network, just historical frequency
	× expected payout × loading factors.

	Usage:
	from src.pricing.burn_analysis import BurnAnalysisPricer
	pricer = BurnAnalysisPricer()
	result = pricer.price_zone(zone)
	"""
	from __future__ import annotations

	import json
	import logging
	from dataclasses import dataclass
	from pathlib import Path

	from config import UrbanZone, PRIMARY_CITY_SLUG
	from src.downscaling import get_zone_uhi_range
	from src.indexing.heat_index import calculate_wbgt
	from src.pricing.actuarial import ActuarialResult

	log = logging.getLogger(__name__)

	PROJECT_ROOT = Path(__file__).resolve().parents[2]

	# Trigger thresholds (calibrated P97 from 20 years ERA5-Land at Dar es Salaam)
	WBGT_THRESHOLD_C = 35.1
	ALERT_CONSECUTIVE_DAYS = 2
	PAYOUT_CONSECUTIVE_DAYS = 5

	# Payout amounts per tier (SEWA pilot structure)
	ALERT_PAYOUT_USD = 3.50 # cash transfer + safety SMS
	FULL_PAYOUT_USD = 14.00 # full insurance payout

	# Loading factors
	ADMIN_RATE = 0.15 # 15% administrative overhead (industry standard)
	CONTINGENCY_RATE = 0.05 # 5% contingency for model/data uncertainty
	INFLATION_BUFFER = 1.05 # 5% inflation reserve

	# Funding split (SEWA pilot)
	WORKER_CAP_USD = 3.00 # max worker contribution per year
	PHILANTHROPY_SHARE = 0.45 # Gates/Arsht-Rock subsidy
	INSURER_SHARE = 0.40 # commercial insurance


	@dataclass
	class BurnResult:
	"""Burn analysis result for a single zone."""
	zone_id: str
	years_analyzed: int
	alert_events_total: int
	payout_events_total: int
	alert_freq_per_year: float
	payout_freq_per_year: float
	mean_alert_duration_days: float
	mean_payout_duration_days: float
	expected_annual_loss_usd: float
	basis_risk_score: float


	def _load_era5_data() -> dict[str, list[dict]]:
	"""Load ERA5-Land daily records for the primary city."""
	path = PROJECT_ROOT / "data" / f"era5land_{PRIMARY_CITY_SLUG}.json"
	return json.loads(path.read_text())


	def compute_burn(
	daily_records: list[dict],
	uhi_delta_c: float,
	threshold_c: float = WBGT_THRESHOLD_C,
	) -> BurnResult:
	"""Run burn analysis on daily records for one zone.

	Applies UHI correction, computes WBGT, counts alert and payout events
	over the full history.

	Args:
	daily_records: List of daily dicts with temp_max_c, humidity_pct, date.
	uhi_delta_c: Mean UHI temperature correction for this zone (°C).
	threshold_c: WBGT threshold for triggering (default 35.1°C).

	Returns:
	BurnResult with event counts and frequencies.
	"""
	# Compute UHI-corrected WBGT for each day
	wbgts = []
	for day in daily_records:
	temp = (day.get("temp_max_c") or 30.0) + uhi_delta_c
	hum = day.get("humidity_pct") or 70.0
	wbgts.append(calculate_wbgt(temp, hum))

	# Count events: consecutive runs above threshold.
	# Tiers are EXCLUSIVE — a 5+ day event is a payout event, not also an alert.
	# This matches the trigger logic (returns one action, not both).
	alert_only_durations = [] # 2-4 consecutive days (alert tier only)
	payout_durations = [] # 5+ consecutive days (full payout tier)
	run_length = 0

	for w in wbgts:
	if w >= threshold_c:
	run_length += 1
	else:
	if run_length >= PAYOUT_CONSECUTIVE_DAYS:
	payout_durations.append(run_length)
	elif run_length >= ALERT_CONSECUTIVE_DAYS:
	alert_only_durations.append(run_length)
	run_length = 0
	# Handle trailing run
	if run_length >= PAYOUT_CONSECUTIVE_DAYS:
	payout_durations.append(run_length)
	elif run_length >= ALERT_CONSECUTIVE_DAYS:
	alert_only_durations.append(run_length)

	n_years = len(daily_records) / 365.25

	return BurnResult(
	zone_id="", # filled by caller
	years_analyzed=round(n_years),
	alert_events_total=len(alert_only_durations),
	payout_events_total=len(payout_durations),
	alert_freq_per_year=len(alert_only_durations) / n_years if n_years > 0 else 0,
	payout_freq_per_year=len(payout_durations) / n_years if n_years > 0 else 0,
	mean_alert_duration_days=(
	sum(alert_only_durations) / len(alert_only_durations)
	if alert_only_durations else 0
	),
	mean_payout_duration_days=(
	sum(payout_durations) / len(payout_durations)
	if payout_durations else 0
	),
	expected_annual_loss_usd=0, # computed below
	basis_risk_score=0, # computed below
	)


	def _basis_risk_for_zone(zone: UrbanZone, uhi_delta_c: float) -> float:
	"""Estimate basis risk from UHI uncertainty.

	Basis risk arises because the index (ERA5 grid + UHI model) doesn't
	perfectly match actual conditions at each worker's location. Higher
	UHI correction = more uncertainty = higher basis risk.

	Returns a score 0-1 where higher means more basis risk.
	"""
	uhi_lo, uhi_hi = get_zone_uhi_range(zone)
	uhi_range = uhi_hi - uhi_lo

	# Wider UHI range = more uncertainty about actual conditions
	# Normalized: informal (range=3) → 0.15, formal (range=1) → 0.05
	uncertainty_component = min(0.20, uhi_range * 0.05)

	# Outdoor exposure amplifies basis risk — workers in shade are
	# less affected by index errors
	exposure_component = zone.outdoor_exposure_pct * 0.05

	return round(uncertainty_component + exposure_component + 0.05, 3) # 5% floor


	class BurnAnalysisPricer:
	"""Price parametric heat coverage using empirical burn analysis.

	Loads 20 years of ERA5-Land data once, runs burn analysis per zone
	with UHI correction, and computes loaded premiums.
	"""

	def __init__(self):
	self._era5_data: dict[str, list[dict]] \| None = None
	self._burn_cache: dict[str, BurnResult] = {}

	def _ensure_loaded(self) -> None:
	if self._era5_data is None:
	self._era5_data = _load_era5_data()
	log.info("Loaded ERA5-Land data: %d zones", len(self._era5_data))

	def burn_for_zone(self, zone: UrbanZone) -> BurnResult:
	"""Get or compute burn analysis for a zone."""
	if zone.zone_id in self._burn_cache:
	return self._burn_cache[zone.zone_id]

	self._ensure_loaded()

	records = self._era5_data.get(zone.zone_id, [])
	if not records:
	log.warning("No ERA5 data for zone %s", zone.zone_id)
	result = BurnResult(
	zone_id=zone.zone_id, years_analyzed=0,
	alert_events_total=0, payout_events_total=0,
	alert_freq_per_year=0, payout_freq_per_year=0,
	mean_alert_duration_days=0, mean_payout_duration_days=0,
	expected_annual_loss_usd=0, basis_risk_score=0.1,
	)
	self._burn_cache[zone.zone_id] = result
	return result

	# Get zone-specific UHI correction (UHI_MODEL env var selects synthetic/lst)
	uhi_lo, uhi_hi = get_zone_uhi_range(zone)
	mean_uhi = (uhi_lo + uhi_hi) / 2.0

	# Zone-specific trigger threshold (THRESHOLD_MODE env var selects
	# global=35.1°C vs zone_specific=per-zone P90 from local climatology).
	from src.pricing.zone_thresholds import get_zone_thresholds
	alert_c, _payout_peak_c = get_zone_thresholds(zone)

	result = compute_burn(records, mean_uhi, threshold_c=alert_c)
	result.zone_id = zone.zone_id
	result.basis_risk_score = _basis_risk_for_zone(zone, mean_uhi)

	# Expected annual loss
	result.expected_annual_loss_usd = (
	result.alert_freq_per_year * ALERT_PAYOUT_USD
	+ result.payout_freq_per_year * FULL_PAYOUT_USD
	)

	self._burn_cache[zone.zone_id] = result
	return result

	def price_zone(
	self,
	zone: UrbanZone,
	**kwargs,
	) -> ActuarialResult:
	"""Price coverage for a single zone using burn analysis.

	Accepts **kwargs for compatibility with NeuralActuarialPricer's
	interface (predicted_frequency, climate_history, etc. are ignored).
	"""
	burn = self.burn_for_zone(zone)
	enrolled = max(zone.worker_population_est, 1)

	# Expected annual loss per worker (actuarial fair price)
	eal_per_worker = burn.expected_annual_loss_usd

	# Loading factors
	basis_risk_loading = eal_per_worker * burn.basis_risk_score
	vulnerability_loading = eal_per_worker * (zone.outdoor_exposure_pct * 0.10)
	subtotal = eal_per_worker + basis_risk_loading + vulnerability_loading
	admin_loading = subtotal * ADMIN_RATE
	contingency = subtotal * CONTINGENCY_RATE
	loaded_premium = (subtotal + admin_loading + contingency) * INFLATION_BUFFER

	# Funding split
	worker_share = min(WORKER_CAP_USD, loaded_premium)
	remaining = loaded_premium - worker_share
	philanthropy = remaining * (PHILANTHROPY_SHARE / (PHILANTHROPY_SHARE + INSURER_SHARE))
	insurer = remaining - philanthropy

	cost_breakdown = {
	# Burn analysis inputs
	"alert_freq_per_year": round(burn.alert_freq_per_year, 2),
	"payout_freq_per_year": round(burn.payout_freq_per_year, 2),
	"alert_payout_usd": ALERT_PAYOUT_USD,
	"full_payout_usd": FULL_PAYOUT_USD,
	"years_analyzed": burn.years_analyzed,
	"alert_events_total": burn.alert_events_total,
	"payout_events_total": burn.payout_events_total,
	"mean_alert_duration_days": round(burn.mean_alert_duration_days, 1),
	"mean_payout_duration_days": round(burn.mean_payout_duration_days, 1),
	# Pricing components
	"expected_annual_loss": round(eal_per_worker, 2),
	"basis_risk_loading": round(basis_risk_loading, 2),
	"basis_risk_score": burn.basis_risk_score,
	"vulnerability_loading": round(vulnerability_loading, 2),
	"admin_loading": round(admin_loading, 2),
	"contingency_loading": round(contingency, 2),
	"loaded_premium": round(loaded_premium, 2),
	# Funding split
	"worker_contribution": round(worker_share, 2),
	"philanthropy_share": round(philanthropy, 2),
	"insurer_premium": round(insurer, 2),
	# For pipeline DB compatibility
	"learned_frequency": round(
	burn.alert_freq_per_year + burn.payout_freq_per_year, 2),
	"trigger_prob": 0,
	"payout_factor": round(
	burn.payout_freq_per_year / max(burn.alert_freq_per_year, 0.1), 2),
	}

	total_zone_cost = loaded_premium * enrolled

	result = ActuarialResult(
	zone_id=zone.zone_id,
	zone_name=zone.name,
	city=zone.city,
	cost_per_worker_year=round(loaded_premium, 2),
	expected_annual_payouts=round(burn.expected_annual_loss_usd * enrolled, 2),
	frequency_component=round(eal_per_worker, 2),
	basis_risk_loading=round(basis_risk_loading * enrolled, 2),
	vulnerability_loading=round(vulnerability_loading * enrolled, 2),
	admin_loading=round((admin_loading + contingency) * enrolled, 2),
	cost_breakdown=cost_breakdown,
	enrolled_workers=enrolled,
	)

	log.info(
	"Priced %s (%s): $%.2f/worker/yr \| alert=%.1f/yr payout=%.1f/yr \| "
	"EAL=$%.2f \| basis_risk=%.1f%%",
	zone.name, zone.settlement_type, loaded_premium,
	burn.alert_freq_per_year, burn.payout_freq_per_year,
	eal_per_worker, burn.basis_risk_score * 100,
	)

	return result