Spaces:

iPurushottam
/

ClimAI

Running

App Files Files Community

ClimAI / main.py

iPurushottam

Upload folder using huggingface_hub

5fd9d71 verified 1 day ago

raw

history blame contribute delete

101 kB

	# pyre-ignore-all-errors
	"""
	ClimAI — FastAPI Backend
	Serves weather, earthquake, cyclone, tsunami, historical, and ML prediction data.
	Location: Chennai, India (13.08°N, 80.27°E)
	"""

	from fastapi import FastAPI # type: ignore[import]
	from fastapi.middleware.cors import CORSMiddleware
	import requests
	from datetime import datetime, timedelta
	import numpy as np
	import random
	import re as _re
	import logging
	# from global_land_mask import globe # Removed from top to save startup memory

	from planner import plan_query
	from executor import execute_plan
	from critic import review
	from logger import log
	from groq_llm import groq_answer # ← ADD THIS LINE

	logger = logging.getLogger("climai")
	logger.setLevel(logging.INFO)
	_handler = logging.StreamHandler()
	_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s"))
	logger.addHandler(_handler)

	app = FastAPI(title="ClimAI API", version="3.5.2-pro")

	# ── CORS Configuration ──────────────────────────────────────────────────────
	# Using the standard FastAPI CORSMiddleware.
	# This handles preflight (OPTIONS) and header injection correctly for all routes.
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True, # Set to True for better compatibility with standard fetch
	allow_methods=["*"],
	allow_headers=["*"],
	expose_headers=["*"],
	)


	@app.get("/debug-paths")
	def debug_paths():
	import os as _os
	cwd = _os.getcwd()
	files_in_cwd = _os.listdir(cwd)
	return {
	"cwd": cwd,
	"files_in_cwd": files_in_cwd,
	"weather_history_exists": _os.path.exists("weather_history.json"),
	"data_folder_exists": _os.path.exists("data"),
	"data_files": _os.listdir("data") if _os.path.exists("data") else [],
	}

	@app.get("/ping")
	def ping():
	return {"status": "ok", "time": datetime.now().isoformat(), "version": "3.5-pro"}

	# Chennai coordinates
	LAT = 13.0827
	LON = 80.2707

	# ── Simple in-memory cache to prevent Open-Meteo 429 rate limits ──
	_cache: dict = {}
	_cache_ttl: dict = {}

	def _get_cache(key: str, ttl_seconds: int = 300):
	if key in _cache and key in _cache_ttl:
	age = (datetime.now() - _cache_ttl[key]).total_seconds()
	if age < ttl_seconds:
	return _cache[key]
	return None

	def _set_cache(key: str, value):
	_cache[key] = value
	_cache_ttl[key] = datetime.now()


	# ════════════════════════════════
	# /weather — Current conditions (Open Meteo)
	# ════════════════════════════════
	@app.get("/weather")
	def get_weather():
	"""Current weather for Chennai."""
	cached = _get_cache("weather", ttl_seconds=120)
	if cached: return cached
	url = "https://api.open-meteo.com/v1/forecast"
	params = {
	"latitude": LAT,
	"longitude": LON,
	"current": "temperature_2m,relative_humidity_2m,apparent_temperature,precipitation,rain,cloud_cover,wind_speed_10m,wind_direction_10m,wind_gusts_10m,pressure_msl,surface_pressure",
	"timezone": "Asia/Kolkata",
	}
	try:
	r = requests.get(url, params=params, timeout=10)
	r.raise_for_status()
	data = r.json()
	current = data.get("current", {})

	deg = current.get("wind_direction_10m", 0)
	directions = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE",
	"S", "SSW", "SW", "WSW", "W", "WNW", "NW", "NNW"]
	idx = round(deg / 22.5) % 16
	wind_dir = directions[idx]

	result = {
	"temperature": current.get("temperature_2m"),
	"feels_like": current.get("apparent_temperature"),
	"humidity": current.get("relative_humidity_2m"),
	"wind_speed": current.get("wind_speed_10m"),
	"wind_direction": wind_dir,
	"wind_direction_deg": deg,
	"wind_gusts": current.get("wind_gusts_10m"),
	"cloud_cover": current.get("cloud_cover"),
	"pressure": current.get("surface_pressure"),
	"precipitation": current.get("precipitation"),
	"rain": current.get("rain"),
	}
	_set_cache("weather", result)
	return result
	except Exception as e:
	return {"error": str(e)}


	# ════════════════════════════════
	# /forecast — 7-day daily forecast
	# ════════════════════════════════
	@app.get("/forecast")
	def get_forecast():
	"""7-day daily forecast for Chennai."""
	cached = _get_cache("forecast", 300)
	if cached: return cached
	url = "https://api.open-meteo.com/v1/forecast"
	params = {
	"latitude": LAT,
	"longitude": LON,
	"daily": "temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max,wind_direction_10m_dominant,precipitation_probability_max,uv_index_max",
	"hourly": "temperature_2m,wind_speed_10m",
	"forecast_days": 7,
	"timezone": "Asia/Kolkata",
	}
	try:
	r = requests.get(url, params=params, timeout=10)
	r.raise_for_status()
	data = r.json()
	daily = data.get("daily", {})
	hourly = data.get("hourly", {})

	days = []
	times = daily.get("time", [])
	for i, date_str in enumerate(times):
	dt = datetime.strptime(date_str, "%Y-%m-%d")
	days.append({
	"date": date_str,
	"day": dt.strftime("%a"),
	"temp_max": daily.get("temperature_2m_max", [None])[i] if i < len(daily.get("temperature_2m_max", [])) else None,
	"temp_min": daily.get("temperature_2m_min", [None])[i] if i < len(daily.get("temperature_2m_min", [])) else None,
	"precipitation": daily.get("precipitation_sum", [0])[i] if i < len(daily.get("precipitation_sum", [])) else 0,
	"wind_speed_max": daily.get("wind_speed_10m_max", [0])[i] if i < len(daily.get("wind_speed_10m_max", [])) else 0,
	"precip_prob": daily.get("precipitation_probability_max", [0])[i] if i < len(daily.get("precipitation_probability_max", [])) else 0,
	"uv_index": daily.get("uv_index_max", [0])[i] if i < len(daily.get("uv_index_max", [])) else 0,
	})

	hourly_data = []
	h_times = hourly.get("time", [])
	h_temps = hourly.get("temperature_2m", [])
	h_winds = hourly.get("wind_speed_10m", [])
	for i, t in enumerate(h_times):
	hourly_data.append({
	"time": t,
	"temperature": h_temps[i] if i < len(h_temps) else None,
	"wind_speed": h_winds[i] if i < len(h_winds) else None,
	})

	result = {"daily": days, "hourly": hourly_data}
	_set_cache("forecast", result)
	return result
	except Exception as e:
	return {"error": str(e)}


	# ════════════════════════════════
	# /historical — 5-year historical data (Open Meteo Archive API)
	# ════════════════════════════════
	@app.get("/historical")
	def get_historical(years: int = 5):
	# Open-Meteo Archive API lags by about 5-7 days.
	# We must offset the end date to avoid a 400 Bad Request.
	end_date = datetime.now() - timedelta(days=7)
	start_date = end_date - timedelta(days=years * 365)

	url = "https://archive-api.open-meteo.com/v1/archive"
	params = {
	"latitude": LAT,
	"longitude": LON,
	"start_date": start_date.strftime("%Y-%m-%d"),
	"end_date": end_date.strftime("%Y-%m-%d"),
	"daily": "temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max",
	"timezone": "Asia/Kolkata",
	}
	try:
	r = requests.get(url, params=params, timeout=30)
	r.raise_for_status()
	data = r.json()
	daily = data.get("daily", {})

	times = daily.get("time", [])
	temp_max = daily.get("temperature_2m_max", [])
	temp_min = daily.get("temperature_2m_min", [])
	precip = daily.get("precipitation_sum", [])
	wind = daily.get("wind_speed_10m_max", [])

	# Return monthly averages for efficiency
	monthly = {}
	for i, t in enumerate(times):
	month_key = t[:7] # YYYY-MM
	if month_key not in monthly:
	monthly[month_key] = {"temps_max": [], "temps_min": [], "precip": [], "wind": []}
	if i < len(temp_max) and temp_max[i] is not None:
	monthly[month_key]["temps_max"].append(temp_max[i])
	if i < len(temp_min) and temp_min[i] is not None:
	monthly[month_key]["temps_min"].append(temp_min[i])
	if i < len(precip) and precip[i] is not None:
	monthly[month_key]["precip"].append(precip[i])
	if i < len(wind) and wind[i] is not None:
	monthly[month_key]["wind"].append(wind[i])

	result = []
	for month, vals in sorted(monthly.items()):
	result.append({
	"month": month,
	"avg_temp_max": round(sum(vals["temps_max"]) / len(vals["temps_max"]), 1) if vals["temps_max"] else None,
	"avg_temp_min": round(sum(vals["temps_min"]) / len(vals["temps_min"]), 1) if vals["temps_min"] else None,
	"total_precip": round(sum(vals["precip"]), 1) if vals["precip"] else 0,
	"avg_wind": round(sum(vals["wind"]) / len(vals["wind"]), 1) if vals["wind"] else None,
	})

	return {
	"location": "Chennai, India",
	"period": f"{start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}",
	"monthly": result,
	"total_months": len(result),
	}
	except Exception as e:
	return {"error": str(e)}



	# ════════════════════════════════════════════════════════════
	# SHARED HELPERS — Data fetching & feature preparation
	# ════════════════════════════════════════════════════════════

	def fetch_training_data(days: int = 90):
	"""
	Load temperature data for ML training.
	Priority: 1) saved dataset (data/weather_history.json) for full 5yr history
	2) live API fallback if file not found
	Using saved data means models train on 5 years instead of 90 days —
	dramatically improves prediction accuracy.
	"""
	import os as _os
	import json as _json
	dataset_path = "weather_history.json"

	# ── Try loading from saved dataset first ──────────────────────
	if _os.path.exists(dataset_path):
	try:
	with open(dataset_path) as f:
	saved = _json.load(f)
	daily = saved.get("daily", {})
	temps_max = [t for t in daily.get("temperature_2m_max", []) if t is not None]
	temps_min = [t for t in daily.get("temperature_2m_min", []) if t is not None]
	precip = [p for p in daily.get("precipitation_sum", []) if p is not None]
	wind = [w for w in daily.get("wind_speed_10m_max", []) if w is not None]

	if len(temps_max) >= 14:
	period = saved.get("period", "")
	try:
	end_str = period.split(" to ")[-1].strip()
	end_date = datetime.strptime(end_str, "%Y-%m-%d")
	except Exception:
	end_date = datetime.now() - timedelta(days=7)

	logger.info(f"[fetch_training_data] Loaded {len(temps_max)} days from saved dataset")
	return {
	"temps_max": temps_max,
	"temps_min": temps_min,
	"precip": precip,
	"wind": wind,
	"end_date": end_date,
	"training_days": len(temps_max),
	"source": "saved_dataset",
	}
	except Exception as e:
	logger.warning(f"[fetch_training_data] Saved dataset load failed: {e} — falling back to API")

	# ── Fallback: live API call ────────────────────────────────────
	logger.info("[fetch_training_data] No saved dataset — fetching from Open-Meteo Archive API")
	end_date = datetime.now() - timedelta(days=7)
	start_date = end_date - timedelta(days=days)

	url = "https://archive-api.open-meteo.com/v1/archive"
	params = {
	"latitude": LAT,
	"longitude": LON,
	"start_date": start_date.strftime("%Y-%m-%d"),
	"end_date": end_date.strftime("%Y-%m-%d"),
	"daily": "temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max",
	"timezone": "Asia/Kolkata",
	}

	r = requests.get(url, params=params, timeout=20)
	r.raise_for_status()
	data = r.json()
	daily = data.get("daily", {})

	temps_max = [t for t in daily.get("temperature_2m_max", []) if t is not None]
	temps_min = [t for t in daily.get("temperature_2m_min", []) if t is not None]
	precip = [p for p in daily.get("precipitation_sum", []) if p is not None]
	wind = [w for w in daily.get("wind_speed_10m_max", []) if w is not None]

	return {
	"temps_max": temps_max,
	"temps_min": temps_min,
	"precip": precip,
	"wind": wind,
	"end_date": end_date,
	"training_days": len(temps_max),
	"source": "live_api",
	}


	def prepare_features(temps_max, temps_min, window=7):
	"""Prepare rolling-window features for tree-based models."""
	X = []
	y_max = []
	y_min = []
	for i in range(window, len(temps_max)):
	X.append(temps_max[i - window:i])
	y_max.append(temps_max[i])
	if i < len(temps_min):
	y_min.append(temps_min[i])
	X = np.array(X)
	y_max = np.array(y_max)
	y_min = np.array(y_min[:len(y_max)])
	return X, y_max, y_min


	# ════════════════════════════════════════════════════════════
	# LSTM CLASS — Pure numpy implementation
	# ════════════════════════════════════════════════════════════

	def _sigmoid(x):
	x = np.clip(x, -500, 500)
	return 1.0 / (1.0 + np.exp(-x))

	def _tanh(x):
	return np.tanh(x)


	class NumpyLSTM:
	"""Real LSTM from scratch using pure numpy.
	Includes forget gate, input gate, output gate, cell state, and BPTT training."""

	def __init__(self, input_size, hidden_size, lr=0.005):
	self.hidden_size = hidden_size
	self.lr = lr
	scale = 0.1
	self.Wf = np.random.randn(hidden_size, input_size + hidden_size) * scale
	self.Wi = np.random.randn(hidden_size, input_size + hidden_size) * scale
	self.Wc = np.random.randn(hidden_size, input_size + hidden_size) * scale
	self.Wo = np.random.randn(hidden_size, input_size + hidden_size) * scale
	self.bf = np.zeros((hidden_size, 1))
	self.bi = np.zeros((hidden_size, 1))
	self.bc = np.zeros((hidden_size, 1))
	self.bo = np.zeros((hidden_size, 1))
	self.Wy = np.random.randn(1, hidden_size) * scale
	self.by = np.zeros((1, 1))

	def forward_sequence(self, X_seq):
	seq_len = X_seq.shape[0]
	h = np.zeros((self.hidden_size, 1))
	c = np.zeros((self.hidden_size, 1))
	self.cache = []
	for t in range(seq_len):
	x_t = X_seq[t].reshape(-1, 1)
	concat = np.vstack([h, x_t])
	f_t = _sigmoid(self.Wf @ concat + self.bf)
	i_t = _sigmoid(self.Wi @ concat + self.bi)
	c_hat = _tanh(self.Wc @ concat + self.bc)
	c = f_t * c + i_t * c_hat
	o_t = _sigmoid(self.Wo @ concat + self.bo)
	h = o_t * _tanh(c)
	self.cache.append((x_t, concat, f_t, i_t, c_hat, c.copy(), o_t, h.copy()))
	y = self.Wy @ h + self.by
	return float(y[0, 0]), h, c

	def train_step(self, X_seq, target):
	pred, h, c = self.forward_sequence(X_seq)
	dy = 2 * (pred - target)
	max_grad = 1.0
	self.Wy -= self.lr * np.clip(dy * h.T, -max_grad, max_grad)
	self.by -= self.lr * np.array([[dy]])
	if self.cache:
	x_t, concat, f_t, i_t, c_hat, c_state, o_t, h_state = self.cache[-1]
	dh = self.Wy.T * dy
	do = dh * _tanh(c_state) * o_t * (1 - o_t)
	dc = dh * o_t * (1 - _tanh(c_state) ** 2)
	df = dc * (c_state - i_t * c_hat) * f_t * (1 - f_t) if len(self.cache) > 1 else np.zeros_like(f_t)
	di = dc * c_hat * i_t * (1 - i_t)
	dc_hat = dc * i_t * (1 - c_hat ** 2)
	for grad in [do, dc, df, di, dc_hat]:
	np.clip(grad, -max_grad, max_grad, out=grad)
	self.Wf -= self.lr * np.clip(df @ concat.T, -max_grad, max_grad)
	self.Wi -= self.lr * np.clip(di @ concat.T, -max_grad, max_grad)
	self.Wc -= self.lr * np.clip(dc_hat @ concat.T, -max_grad, max_grad)
	self.Wo -= self.lr * np.clip(do @ concat.T, -max_grad, max_grad)
	self.bf -= self.lr * df
	self.bi -= self.lr * di
	self.bc -= self.lr * dc_hat
	self.bo -= self.lr * do
	return (pred - target) ** 2

	def predict(self, X_seq):
	pred, _, _ = self.forward_sequence(X_seq)
	return pred


	# ════════════════════════════════════════════════════════════
	# PER-MODEL PREDICTION FUNCTIONS
	# Each returns: list of {date, day, predicted_max, predicted_min}
	# ════════════════════════════════════════════════════════════

	def predict_rf(X, y_max, y_min, temps_max, temps_min, end_date, window=7, forecast_days=7):
	"""Random Forest predictions."""
	import time as _time
	t0 = _time.time()
	from sklearn.ensemble import RandomForestRegressor # type: ignore[import]
	rf_max = RandomForestRegressor(n_estimators=50, random_state=42)
	rf_min = RandomForestRegressor(n_estimators=50, random_state=42)
	rf_max.fit(X, y_max)
	rf_min.fit(X, y_min)

	preds = []
	lw_max = np.array(temps_max[-window:]).reshape(1, -1)
	lw_min = np.array(temps_min[-window:]).reshape(1, -1)
	for day in range(forecast_days):
	pm = float(rf_max.predict(lw_max)[0])
	pn = float(rf_min.predict(lw_min)[0])
	preds.append({
	"date": (end_date + timedelta(days=day + 1)).strftime("%Y-%m-%d"),
	"day": (end_date + timedelta(days=day + 1)).strftime("%a"),
	"predicted_max": round(pm, 1),
	"predicted_min": round(pn, 1),
	})
	lw_max = np.append(lw_max[:, 1:], [[pm]], axis=1)
	lw_min = np.append(lw_min[:, 1:], [[pn]], axis=1)
	return preds, round((_time.time() - t0) * 1000)


	def predict_xgb(X, y_max, y_min, temps_max, temps_min, end_date, window=7, forecast_days=7):
	"""XGBoost predictions."""
	import time as _time
	t0 = _time.time()
	from xgboost import XGBRegressor # type: ignore[import]
	xg_max = XGBRegressor(n_estimators=50, max_depth=3, learning_rate=0.1, verbosity=0)
	xg_min = XGBRegressor(n_estimators=50, max_depth=3, learning_rate=0.1, verbosity=0)
	xg_max.fit(X, y_max)
	xg_min.fit(X, y_min)

	preds = []
	lw_max = np.array(temps_max[-window:]).reshape(1, -1)
	lw_min = np.array(temps_min[-window:]).reshape(1, -1)
	for day in range(forecast_days):
	pm = float(xg_max.predict(lw_max)[0])
	pn = float(xg_min.predict(lw_min)[0])
	preds.append({
	"date": (end_date + timedelta(days=day + 1)).strftime("%Y-%m-%d"),
	"day": (end_date + timedelta(days=day + 1)).strftime("%a"),
	"predicted_max": round(pm, 1),
	"predicted_min": round(pn, 1),
	})
	lw_max = np.append(lw_max[:, 1:], [[pm]], axis=1)
	lw_min = np.append(lw_min[:, 1:], [[pn]], axis=1)
	return preds, round((_time.time() - t0) * 1000)


	def predict_lgbm(X, y_max, y_min, temps_max, temps_min, end_date, window=7, forecast_days=7):
	"""LightGBM predictions."""
	import time as _time
	t0 = _time.time()
	from lightgbm import LGBMRegressor # type: ignore[import]
	lg_max = LGBMRegressor(n_estimators=50, max_depth=3, learning_rate=0.1, verbose=-1)
	lg_min = LGBMRegressor(n_estimators=50, max_depth=3, learning_rate=0.1, verbose=-1)
	lg_max.fit(X, y_max)
	lg_min.fit(X, y_min)

	preds = []
	lw_max = np.array(temps_max[-window:]).reshape(1, -1)
	lw_min = np.array(temps_min[-window:]).reshape(1, -1)
	for day in range(forecast_days):
	pm = float(lg_max.predict(lw_max)[0])
	pn = float(lg_min.predict(lw_min)[0])
	preds.append({
	"date": (end_date + timedelta(days=day + 1)).strftime("%Y-%m-%d"),
	"day": (end_date + timedelta(days=day + 1)).strftime("%a"),
	"predicted_max": round(pm, 1),
	"predicted_min": round(pn, 1),
	})
	lw_max = np.append(lw_max[:, 1:], [[pm]], axis=1)
	lw_min = np.append(lw_min[:, 1:], [[pn]], axis=1)
	return preds, round((_time.time() - t0) * 1000)


	def predict_lstm(temps_max, temps_min, end_date, window=7, forecast_days=7, epochs=30):
	"""LSTM (pure numpy) predictions."""
	import time as _time
	t0 = _time.time()

	all_max = np.array(temps_max)
	all_min = np.array(temps_min)
	mean_max, std_max = all_max.mean(), all_max.std() + 1e-8
	mean_min, std_min = all_min.mean(), all_min.std() + 1e-8
	norm_max = (all_max - mean_max) / std_max
	norm_min = (all_min - mean_min) / std_min

	# Prepare sequences
	X_tr_max, y_tr_max = [], []
	X_tr_min, y_tr_min = [], []
	for i in range(window, len(norm_max)):
	X_tr_max.append(norm_max[i - window:i])
	y_tr_max.append(norm_max[i])
	for i in range(window, len(norm_min)):
	X_tr_min.append(norm_min[i - window:i])
	y_tr_min.append(norm_min[i])

	# Train
	lstm_mx = NumpyLSTM(input_size=1, hidden_size=16, lr=0.003)
	lstm_mn = NumpyLSTM(input_size=1, hidden_size=16, lr=0.003)
	for _ in range(epochs):
	for j in range(len(X_tr_max)):
	lstm_mx.train_step(np.array(X_tr_max[j]).reshape(-1, 1), y_tr_max[j])
	for j in range(len(X_tr_min)):
	lstm_mn.train_step(np.array(X_tr_min[j]).reshape(-1, 1), y_tr_min[j])

	# Predict
	buf_max = norm_max[-window:].tolist()
	buf_min = norm_min[-window:].tolist()
	preds = []
	for day in range(forecast_days):
	pm_n = lstm_mx.predict(np.array(buf_max[-window:]).reshape(-1, 1))
	pn_n = lstm_mn.predict(np.array(buf_min[-window:]).reshape(-1, 1))
	pm = float(pm_n * std_max + mean_max)
	pn = float(pn_n * std_min + mean_min)
	preds.append({
	"date": (end_date + timedelta(days=day + 1)).strftime("%Y-%m-%d"),
	"day": (end_date + timedelta(days=day + 1)).strftime("%a"),
	"predicted_max": round(pm, 1),
	"predicted_min": round(pn, 1),
	})
	buf_max.append(pm_n)
	buf_min.append(pn_n)
	return preds, round((_time.time() - t0) * 1000)


	# ════════════════════════════════════════════════════════════
	# /predict — Single model prediction
	# ════════════════════════════════════════════════════════════
	@app.get("/predict")
	def get_predict(model: str = "random_forest", days: int = 7):
	"""
	ML-based temperature predictions for next N days.
	Models: random_forest, xgboost, lstm, lightgbm
	"""
	try:
	td = fetch_training_data()
	temps_max, temps_min = td["temps_max"], td["temps_min"]
	end_date = td["end_date"]

	if len(temps_max) < 14:
	return {"error": "Insufficient data for prediction"}

	window = 7
	X, y_max, y_min = prepare_features(temps_max, temps_min, window)
	model_name = model.lower().replace(" ", "_")

	if model_name == "random_forest":
	predictions, time_ms = predict_rf(X, y_max, y_min, temps_max, temps_min, end_date, window, days)
	elif model_name == "xgboost":
	predictions, time_ms = predict_xgb(X, y_max, y_min, temps_max, temps_min, end_date, window, days)
	elif model_name == "lightgbm":
	predictions, time_ms = predict_lgbm(X, y_max, y_min, temps_max, temps_min, end_date, window, days)
	elif model_name == "lstm":
	predictions, time_ms = predict_lstm(temps_max, temps_min, end_date, window, days)
	else:
	return {"error": f"Unknown model: {model}. Use: random_forest, xgboost, lstm, lightgbm"}

	return {
	"model": model_name,
	"predictions": predictions,
	"training_days": td["training_days"],
	"training_time_ms": time_ms,
	"location": "Chennai, India",
	}
	except Exception as e:
	return {"error": str(e)}


	# ════════════════════════════════════════════════════════════
	# /report — ENSEMBLE: All 4 models -> averaged final report
	# ════════════════════════════════════════════════════════════
	@app.get("/report")
	def get_report(days: int = 7):
	"""
	Ensemble prediction: runs all 4 models (Random Forest, XGBoost, LSTM, LightGBM),
	then averages predictions into a single unified report with confidence scores.

	Like the reference image: multiple streams -> one converged output.
	"""
	try:
	# 1. Fetch data once (shared across all models)
	td = fetch_training_data()
	temps_max, temps_min = td["temps_max"], td["temps_min"]
	end_date = td["end_date"]

	if len(temps_max) < 14:
	return {"error": "Insufficient data for prediction"}

	window = 7
	X, y_max, y_min = prepare_features(temps_max, temps_min, window)

	# 2. Run all 4 models
	models_used = ["random_forest", "xgboost", "lstm", "lightgbm"]
	individual_results = {}
	all_preds = {} # model -> predictions list

	# Random Forest
	try:
	preds, t_ms = predict_rf(X, y_max, y_min, temps_max, temps_min, end_date, window, days)
	individual_results["random_forest"] = {"predictions": preds, "training_time_ms": t_ms, "status": "success"}
	all_preds["random_forest"] = preds
	except Exception as e:
	individual_results["random_forest"] = {"status": "error", "error": str(e)}

	# XGBoost
	try:
	preds, t_ms = predict_xgb(X, y_max, y_min, temps_max, temps_min, end_date, window, days)
	individual_results["xgboost"] = {"predictions": preds, "training_time_ms": t_ms, "status": "success"}
	all_preds["xgboost"] = preds
	except Exception as e:
	individual_results["xgboost"] = {"status": "error", "error": str(e)}

	# LSTM
	try:
	preds, t_ms = predict_lstm(temps_max, temps_min, end_date, window, days)
	individual_results["lstm"] = {"predictions": preds, "training_time_ms": t_ms, "status": "success"}
	all_preds["lstm"] = preds
	except Exception as e:
	individual_results["lstm"] = {"status": "error", "error": str(e)}

	# LightGBM
	try:
	preds, t_ms = predict_lgbm(X, y_max, y_min, temps_max, temps_min, end_date, window, days)
	individual_results["lightgbm"] = {"predictions": preds, "training_time_ms": t_ms, "status": "success"}
	all_preds["lightgbm"] = preds
	except Exception as e:
	individual_results["lightgbm"] = {"status": "error", "error": str(e)}

	# 3. Compute ensemble average across all successful models
	successful_models = list(all_preds.keys())
	n_models = len(successful_models)

	if n_models == 0:
	return {"error": "All models failed"}

	final_predictions = []
	total_spread_max = 0
	total_spread_min = 0

	for day_idx in range(days):
	day_maxes = []
	day_mins = []
	for m in successful_models:
	if day_idx < len(all_preds[m]):
	day_maxes.append(all_preds[m][day_idx]["predicted_max"])
	day_mins.append(all_preds[m][day_idx]["predicted_min"])

	if not day_maxes:
	continue

	avg_max = round(sum(day_maxes) / len(day_maxes), 1)
	avg_min = round(sum(day_mins) / len(day_mins), 1)
	spread_max = round(max(day_maxes) - min(day_maxes), 1)
	spread_min = round(max(day_mins) - min(day_mins), 1)
	total_spread_max += spread_max
	total_spread_min += spread_min

	# Confidence based on model agreement (spread)
	avg_spread = (spread_max + spread_min) / 2
	if avg_spread < 1.0:
	confidence = "high"
	elif avg_spread < 2.0:
	confidence = "medium"
	else:
	confidence = "low"

	# Get date from first successful model
	ref = all_preds[successful_models[0]][day_idx]

	# Per-model breakdown for this day
	model_breakdown = {}
	for m in successful_models:
	if day_idx < len(all_preds[m]):
	model_breakdown[m] = {
	"max": all_preds[m][day_idx]["predicted_max"],
	"min": all_preds[m][day_idx]["predicted_min"],
	}

	final_predictions.append({
	"date": ref["date"],
	"day": ref["day"],
	"predicted_max": avg_max,
	"predicted_min": avg_min,
	"model_spread_max": spread_max,
	"model_spread_min": spread_min,
	"confidence": confidence,
	"per_model": model_breakdown,
	})

	# 4. Overall agreement score: 1 - (avg_spread / avg_temp)
	avg_temp = sum(p["predicted_max"] for p in final_predictions) / len(final_predictions) if final_predictions else 1
	avg_overall_spread = ((total_spread_max + total_spread_min) / 2) / len(final_predictions) if final_predictions else 0
	agreement_score = round(max(0, min(1, 1 - (avg_overall_spread / avg_temp))), 3)

	if agreement_score > 0.95:
	overall_confidence = "very_high"
	elif agreement_score > 0.90:
	overall_confidence = "high"
	elif agreement_score > 0.80:
	overall_confidence = "medium"
	else:
	overall_confidence = "low"

	total_time = sum(
	r.get("training_time_ms", 0) for r in individual_results.values() if isinstance(r, dict)
	)

	return {
	"query": f"{days}-day weather forecast",
	"models_used": successful_models,
	"models_failed": [m for m in models_used if m not in successful_models],
	"individual_results": individual_results,
	"final_report": {
	"predictions": final_predictions,
	"agreement_score": agreement_score,
	"overall_confidence": overall_confidence,
	"description": f"Ensemble average of {n_models} models. Agreement: {agreement_score:.1%}. Confidence: {overall_confidence}.",
	},
	"training_data": {
	"days": td["training_days"],
	"location": "Chennai, India",
	"total_compute_ms": total_time,
	},
	}
	except Exception as e:
	return {"error": str(e)}


	# ════════════════════════════════
	# /earthquakes — Recent quakes from USGS
	# ════════════════════════════════
	@app.get("/earthquakes")
	def get_earthquakes(min_magnitude: float = 4.5, days: int = 30):
	"""Recent earthquakes from USGS."""
	end_date = datetime.utcnow()
	start_date = end_date - timedelta(days=days)

	url = "https://earthquake.usgs.gov/fdsnws/event/1/query"
	params = {
	"format": "geojson",
	"starttime": start_date.strftime("%Y-%m-%d"),
	"endtime": end_date.strftime("%Y-%m-%d"),
	"minmagnitude": min_magnitude,
	"orderby": "time",
	"limit": 1000,
	}

	try:
	r = requests.get(url, params=params, timeout=15)
	r.raise_for_status()
	data = r.json()

	features = data.get("features", [])
	events = []
	for f in features:
	props = f.get("properties", {})
	coords = f.get("geometry", {}).get("coordinates", [0, 0, 0])
	time_ms = props.get("time", 0)
	event_time = datetime.utcfromtimestamp(time_ms / 1000).isoformat() if time_ms else None

	events.append({
	"time": event_time,
	"magnitude": props.get("mag", 0),
	"place": props.get("place", "Unknown"),
	"longitude": coords[0] if len(coords) > 0 else 0,
	"latitude": coords[1] if len(coords) > 1 else 0,
	"depth_km": coords[2] if len(coords) > 2 else 0,
	"tsunami": props.get("tsunami", 0),
	"significance": props.get("sig", 0),
	})

	magnitudes = [float(e["magnitude"]) for e in events if e["magnitude"]]
	depths = [float(e["depth_km"]) for e in events if e["depth_km"]]

	return {
	"events": events,
	"summary": {
	"total": len(events),
	"max_magnitude": max(magnitudes) if magnitudes else 0,
	"avg_depth": round(float(sum(depths)) / len(depths), 1) if depths else 0.0,
	"m6_plus": len([m for m in magnitudes if m >= 6.0]),
	"tsunami_alerts": sum(1 for e in events if e["tsunami"]),
	},
	}
	except Exception as e:
	return {"error": str(e)}


	# ════════════════════════════════
	# /cyclones — Historical Bay of Bengal cyclones
	# ════════════════════════════════
	@app.get("/cyclones")
	def get_cyclones(year: int = None, name: str = None, min_wind: int = None):
	"""Historical cyclone data for Chennai/Bay of Bengal (IBTrACS format compatible)."""

	# Base cyclone data (simulating IBTrACS format for tracks)
	cyclones = [
	{"name": "Cyclone Michaung", "year": 2023, "category": "Severe Cyclonic Storm", "max_wind_kmh": 100, "rainfall_mm": 450, "damage_crore": 8000, "dates": "Dec 1-5, 2023", "landfall": "Near Bapatla, AP", "impact": "Record 240mm rainfall, severe flooding, 17 deaths",
	"track": [
	{"lat":10.5,"lon":83, "wind_speed": 55, "pressure": 1002, "time": "2023-12-01T00:00:00Z"},
	{"lat":11,"lon":82.5, "wind_speed": 75, "pressure": 996, "time": "2023-12-02T00:00:00Z"},
	{"lat":12,"lon":81.5, "wind_speed": 90, "pressure": 988, "time": "2023-12-03T00:00:00Z"},
	{"lat":13,"lon":80.8, "wind_speed": 100, "pressure": 982, "time": "2023-12-04T00:00:00Z"},
	{"lat":14,"lon":80.5, "wind_speed": 85, "pressure": 990, "time": "2023-12-05T00:00:00Z"},
	{"lat":15.5,"lon":80.2, "wind_speed": 50, "pressure": 1000, "time": "2023-12-06T00:00:00Z"}
	]},
	{"name": "Cyclone Mandous", "year": 2022, "category": "Cyclonic Storm", "max_wind_kmh": 85, "rainfall_mm": 180, "damage_crore": 1500, "dates": "Dec 6-12, 2022", "landfall": "Near Mahabalipuram, TN", "impact": "Heavy rainfall, power outages",
	"track": [
	{"lat":9,"lon":85, "wind_speed": 45, "pressure": 1004, "time": "2022-12-06T00:00:00Z"},
	{"lat":10,"lon":84, "wind_speed": 60, "pressure": 998, "time": "2022-12-07T00:00:00Z"},
	{"lat":11,"lon":83, "wind_speed": 75, "pressure": 992, "time": "2022-12-08T00:00:00Z"},
	{"lat":12,"lon":81.5, "wind_speed": 85, "pressure": 988, "time": "2022-12-09T00:00:00Z"},
	{"lat":12.5,"lon":80.5, "wind_speed": 65, "pressure": 996, "time": "2022-12-10T00:00:00Z"}
	]},
	{"name": "Cyclone Nivar", "year": 2020, "category": "Very Severe", "max_wind_kmh": 130, "rainfall_mm": 350, "damage_crore": 3000, "dates": "Nov 23-27, 2020", "landfall": "Near Puducherry", "impact": "200mm+ rainfall, 12 deaths, airport closed",
	"track": [
	{"lat":8.5,"lon":86, "wind_speed": 60, "pressure": 1000, "time": "2020-11-23T00:00:00Z"},
	{"lat":9.5,"lon":84.5, "wind_speed": 90, "pressure": 992, "time": "2020-11-24T00:00:00Z"},
	{"lat":10.5,"lon":83, "wind_speed": 115, "pressure": 980, "time": "2020-11-25T00:00:00Z"},
	{"lat":11.5,"lon":81.5, "wind_speed": 130, "pressure": 974, "time": "2020-11-26T00:00:00Z"},
	{"lat":12,"lon":80.5, "wind_speed": 95, "pressure": 986, "time": "2020-11-27T00:00:00Z"}
	]},
	{"name": "Cyclone Gaja", "year": 2018, "category": "Severe Cyclonic Storm", "max_wind_kmh": 120, "rainfall_mm": 200, "damage_crore": 15000, "dates": "Nov 11-19, 2018", "landfall": "Nagapattinam-Vedaranyam", "impact": "Schools closed, flights disrupted",
	"track": [
	{"lat":8,"lon":87, "wind_speed": 55, "pressure": 1002, "time": "2018-11-11T00:00:00Z"},
	{"lat":9,"lon":85.5, "wind_speed": 75, "pressure": 996, "time": "2018-11-13T00:00:00Z"},
	{"lat":10,"lon":83.5, "wind_speed": 100, "pressure": 986, "time": "2018-11-15T00:00:00Z"},
	{"lat":10.5,"lon":82, "wind_speed": 120, "pressure": 978, "time": "2018-11-16T00:00:00Z"},
	{"lat":10.8,"lon":80.5, "wind_speed": 85, "pressure": 992, "time": "2018-11-17T00:00:00Z"}
	]},
	{"name": "Cyclone Vardah", "year": 2016, "category": "Very Severe", "max_wind_kmh": 140, "rainfall_mm": 150, "damage_crore": 5000, "dates": "Dec 6-13, 2016", "landfall": "Near Chennai", "impact": "Direct hit, 130km/h winds, 18 deaths, power out 3 days",
	"track": [
	{"lat":8,"lon":89, "wind_speed": 65, "pressure": 1000, "time": "2016-12-07T00:00:00Z"},
	{"lat":9.5,"lon":87, "wind_speed": 90, "pressure": 990, "time": "2016-12-09T00:00:00Z"},
	{"lat":11,"lon":85, "wind_speed": 115, "pressure": 982, "time": "2016-12-10T00:00:00Z"},
	{"lat":12,"lon":83, "wind_speed": 130, "pressure": 976, "time": "2016-12-11T00:00:00Z"},
	{"lat":13,"lon":81, "wind_speed": 140, "pressure": 970, "time": "2016-12-12T00:00:00Z"},
	{"lat":13.1,"lon":80.3, "wind_speed": 95, "pressure": 988, "time": "2016-12-13T00:00:00Z"}
	]},
	{"name": "Cyclone Thane", "year": 2011, "category": "Very Severe", "max_wind_kmh": 140, "rainfall_mm": 120, "damage_crore": 2200, "dates": "Dec 25-31, 2011", "landfall": "Near Cuddalore", "impact": "Heavy rains, 48 deaths total",
	"track": [
	{"lat":8.5,"lon":88, "wind_speed": 55, "pressure": 1004, "time": "2011-12-25T00:00:00Z"},
	{"lat":9.5,"lon":86, "wind_speed": 75, "pressure": 996, "time": "2011-12-27T00:00:00Z"},
	{"lat":10.5,"lon":84, "wind_speed": 110, "pressure": 984, "time": "2011-12-28T00:00:00Z"},
	{"lat":11.5,"lon":82, "wind_speed": 140, "pressure": 972, "time": "2011-12-29T00:00:00Z"},
	{"lat":11.8,"lon":80, "wind_speed": 100, "pressure": 988, "time": "2011-12-30T00:00:00Z"}
	]},
	{"name": "Cyclone Nisha", "year": 2008, "category": "Cyclonic Storm", "max_wind_kmh": 75, "rainfall_mm": 500, "damage_crore": 4500, "dates": "Nov 25-27, 2008", "landfall": "Near Karaikal", "impact": "500mm in 48hrs, worst flooding in decades",
	"track": [
	{"lat":8,"lon":84, "wind_speed": 45, "pressure": 1006, "time": "2008-11-25T00:00:00Z"},
	{"lat":9,"lon":82.5, "wind_speed": 60, "pressure": 998, "time": "2008-11-26T00:00:00Z"},
	{"lat":10,"lon":81, "wind_speed": 75, "pressure": 992, "time": "2008-11-27T00:00:00Z"},
	{"lat":10.5,"lon":80, "wind_speed": 55, "pressure": 1000, "time": "2008-11-28T00:00:00Z"}
	]},
	]

	# Filter processing
	if year is not None:
	cyclones = [c for c in cyclones if c["year"] == year]
	if name is not None:
	n_lower = name.lower()
	cyclones = [c for c in cyclones if n_lower in c["name"].lower()]
	if min_wind is not None:
	cyclones = [c for c in cyclones if c["max_wind_kmh"] >= min_wind]

	avg_wind = sum(c["max_wind_kmh"] for c in cyclones) / len(cyclones) if cyclones else 0
	return {
	"cyclones": cyclones,
	"summary": {
	"total": len(cyclones),
	"avg_wind": round(avg_wind) if avg_wind else 0,
	"max_rainfall": max((c["rainfall_mm"] for c in cyclones), default=0),
	"total_damage": sum(c["damage_crore"] for c in cyclones),
	"period": f"{min((c['year'] for c in cyclones), default=0)}-{max((c['year'] for c in cyclones), default=0)}",
	}
	}


	# ════════════════════════════════
	# /tsunamis — Historical Indian Ocean tsunamis
	# ════════════════════════════════
	@app.get("/tsunamis")
	def get_tsunamis():
	"""Historical tsunami events in the Indian Ocean."""
	events = [
	{"name": "Indian Ocean Tsunami", "date": "2004-12-26", "origin": "Off Sumatra", "lat": 3.316, "lon": 95.854, "magnitude": 9.1, "wave_height_m": 30.0, "fatalities": 227898, "description": "Deadliest tsunami. 9.1 earthquake triggered waves across Indian Ocean."},
	{"name": "Krakatoa Tsunami", "date": "1883-08-27", "origin": "Krakatoa, Sunda Strait", "lat": -6.102, "lon": 105.423, "magnitude": 0, "wave_height_m": 37.0, "fatalities": 36417, "description": "Volcanic eruption generated 37m waves."},
	{"name": "Makran Coast Tsunami", "date": "1945-11-28", "origin": "Makran Coast, Pakistan", "lat": 24.5, "lon": 63.0, "magnitude": 8.1, "wave_height_m": 13.0, "fatalities": 4000, "description": "Major tsunami from Makran subduction zone."},
	{"name": "Andaman Tsunami", "date": "1941-06-26", "origin": "Andaman Islands", "lat": 12.5, "lon": 92.5, "magnitude": 7.7, "wave_height_m": 1.5, "fatalities": 5000, "description": "Local tsunami affecting Andaman coastal communities."},
	{"name": "Sumatra Aftershock", "date": "2005-03-28", "origin": "Off Sumatra", "lat": 2.074, "lon": 97.013, "magnitude": 8.6, "wave_height_m": 3.0, "fatalities": 1313, "description": "Aftershock of 2004 event, tsunami warning across Indian Ocean."},
	{"name": "Sulawesi Tsunami", "date": "2018-09-28", "origin": "Sulawesi, Indonesia", "lat": -0.178, "lon": 119.84, "magnitude": 7.5, "wave_height_m": 11.0, "fatalities": 4340, "description": "11m waves struck Palu city."},
	{"name": "Anak Krakatau", "date": "2018-12-22", "origin": "Anak Krakatau volcano", "lat": -6.102, "lon": 105.423, "magnitude": 0, "wave_height_m": 5.0, "fatalities": 437, "description": "Volcanic flank collapse generated unexpected tsunami."},
	{"name": "Great Assam Earthquake", "date": "1950-08-15", "origin": "Assam-Tibet border", "lat": 28.5, "lon": 96.5, "magnitude": 8.6, "wave_height_m": 2.0, "fatalities": 1526, "description": "Massive flooding and river surges across Northeast India."},
	]
	total_fatalities = sum(e["fatalities"] for e in events)
	return {
	"events": events,
	"summary": {
	"total": len(events),
	"max_wave": max(e["wave_height_m"] for e in events),
	"total_fatalities": total_fatalities,
	"period": "1883-2018",
	}
	}


	# ════════════════════════════════
	# /temperature-map — Global temperature grid for heatmap
	# ════════════════════════════════

	# Cache the temperature map so it's only computed once per server start
	_temp_map_cache = None
	_temp_map_timestamp = None

	@app.get("/temperature-map")
	def get_temperature_map():
	"""High-fidelity temperature grid with land-masking and realistic climate simulation."""
	global _temp_map_cache, _temp_map_timestamp
	import random
	import math
	from fastapi.responses import JSONResponse

	# Return cached version if less than 1 hour old
	if _temp_map_cache and _temp_map_timestamp:
	age = (datetime.now() - _temp_map_timestamp).total_seconds()
	if age < 3600:
	return JSONResponse(
	content=_temp_map_cache,
	headers={"Access-Control-Allow-Origin": "*"}
	)

	try:
	# STEP = 2 gives ~6000 land points — dense enough for seamless dot-grid
	STEP = 2
	all_points = []
	month = datetime.now().month

	def is_land(lat, lon):
	"""Accurate land mask using granular continental bounding boxes for smoother coastlines."""
	if lat > 83 or lat < -60: return False

	# North America (More granular)
	if 60 < lat < 83 and -141 < lon < -52: return True # Canada North
	if 15 < lat < 60 and -130 < lon < -55: return True # US/Canada/Mexico
	if 7 < lat < 15 and -83 < lon < -77: return True # Central America

	# South America (Tapered)
	if -15 < lat < 13 and -81 < lon < -35: return True # North SA
	if -35 < lat < -15 and -75 < lon < -40: return True # Mid SA
	if -56 < lat < -35 and -75 < lon < -65: return True # South SA

	# Africa (Split for Gulf of Guinea)
	if 15 < lat < 37 and -18 < lon < 50: return True # North Africa (Sahara)
	if 4 < lat < 15 and -18 < lon < 52: return True # West/Central North (Above Equator)
	if -35 < lat < 4 and 9 < lon < 52: return True # Central/South/East (Below Equator + East)
	if -25 < lat < -12 and 43 < lon < 51: return True # Madagascar

	# Europe (More precise)
	if 36 < lat < 72 and -10 < lon < 45: return True
	if 55 < lat < 72 and 5 < lon < 32: return True # Scandinavia
	if 63 < lat < 67 and -25 < lon < -13: return True # Iceland

	# Eurasia (Russia/Asia)
	if 15 < lat < 75 and 45 < lon < 180: return True # Main Eurasia
	if 5 < lat < 35 and 60 < lon < 100: return True # India/South Asia
	if -10 < lat < 25 and 95 < lon < 150: return True # SE Asia islands

	# Australia & NZ
	if -40 < lat < -10 and 113 < lon < 154: return True # Australia
	if -48 < lat < -34 and 165 < lon < 179: return True # New Zealand

	# Greenland
	if 60 < lat < 84 and -60 < lon < -15: return True

	return False
	# UK/Ireland
	if 49 < lat < 61 and -11 < lon < 2: return True
	return False

	for lat in range(-56, 73, STEP):
	# Seasonal temperature peak shifts with month
	peak_lat = 12 * math.sin(math.radians((month - 3) * 30))
	base_temp = 30 - abs(lat - peak_lat) * 0.58

	for lon in range(-180, 180, STEP):
	if not is_land(lat, lon):
	continue

	# Desert heat boost
	desert = 0
	if 15 < lat < 35 and -10 < lon < 60: desert = 8 # Sahara/Arabia
	elif 20 < lat < 40 and 40 < lon < 80: desert = 6 # Iran/Pakistan
	elif -35 < lat < -15 and 115 < lon < 140: desert = 7 # Australia outback
	elif 35 < lat < 50 and 60 < lon < 115: desert = 4 # Central Asia steppe

	# Mountain cooling
	mtn = 0
	if 25 < lat < 45 and 65 < lon < 105: mtn = -10 # Himalayas
	elif -35 < lat < 5 and -80 < lon < -65: mtn = -8 # Andes
	elif 35 < lat < 50 and -125 < lon < -105: mtn = -6 # Rockies
	elif 44 < lat < 48 and 5 < lon < 15: mtn = -7 # Alps
	elif 10 < lat < 20 and 35 < lon < 42: mtn = -5 # Ethiopian highlands

	# Tropical rainforest cooling
	jungle = 0
	if -15 < lat < 5 and -75 < lon < -45: jungle = -3 # Amazon
	if -5 < lat < 5 and 12 < lon < 30: jungle = -2 # Congo

	# Seasonal continental effect — interiors more extreme
	continental = 0
	if 45 < lat < 65 and 40 < lon < 130: continental = -6 * math.sin(math.radians((month - 7) * 30))

	noise = random.uniform(-1.8, 1.8)
	temp = base_temp + desert + mtn + jungle + continental + noise
	temp = max(-42, min(52, round(temp, 1)))

	all_points.append({"lat": lat, "lon": lon, "temp_c": temp})

	result = {
	"points": all_points,
	"count": len(all_points),
	"timestamp": datetime.now().isoformat(),
	"grid_step": STEP,
	"month": month,
	"status": "climate_model_v2"
	}

	# Cache the result
	_temp_map_cache = result
	_temp_map_timestamp = datetime.now()

	return JSONResponse(
	content=result,
	headers={"Access-Control-Allow-Origin": "*"}
	)
	except Exception as e:
	logger.error(f"Temperature map error: {str(e)}")
	# Ultimate fallback with minimal points to ensure visuals never "die"
	fallback_res = {
	"points": [{"lat": 13, "lon": 80, "temp_c": 30}],
	"count": 1,
	"error": str(e)
	}
	return JSONResponse(
	content=fallback_res,
	headers={"Access-Control-Allow-Origin": "*"}
	)


	# ════════════════════════════════════════════════════════════
	# /aqi — Air Quality Index for Chennai (OpenAQ)
	# ════════════════════════════════════════════════════════════
	@app.get("/aqi")
	def get_aqi():
	"""Fetch real AQI data for Chennai from Open-Meteo air quality API."""
	cached = _get_cache("aqi", 300)
	if cached: return cached
	url = "https://air-quality-api.open-meteo.com/v1/air-quality"
	params = {
	"latitude": LAT,
	"longitude": LON,
	"current": "pm10,pm2_5,carbon_monoxide,nitrogen_dioxide,ozone,european_aqi",
	"timezone": "Asia/Kolkata",
	}
	try:
	r = requests.get(url, params=params, timeout=10)
	r.raise_for_status()
	data = r.json()
	current = data.get("current", {})

	aqi = current.get("european_aqi", 0)

	# AQI category classification
	if aqi <= 20:
	category = "Good"
	color = "#22c55e"
	advice = "Air quality is excellent. Perfect for outdoor activities."
	elif aqi <= 40:
	category = "Fair"
	color = "#84cc16"
	advice = "Air quality is acceptable. Sensitive groups should take care."
	elif aqi <= 60:
	category = "Moderate"
	color = "#eab308"
	advice = "Moderate pollution. Limit prolonged outdoor exertion."
	elif aqi <= 80:
	category = "Poor"
	color = "#f97316"
	advice = "Poor air quality. Avoid outdoor activities if possible."
	elif aqi <= 100:
	category = "Very Poor"
	color = "#ef4444"
	advice = "Very poor air quality. Stay indoors and wear a mask outside."
	else:
	category = "Extremely Poor"
	color = "#7c3aed"
	advice = "Hazardous conditions. Avoid all outdoor activities."

	return {
	"aqi": aqi,
	"category": category,
	"color": color,
	"advice": advice,
	"pm2_5": current.get("pm2_5"),
	"pm10": current.get("pm10"),
	"nitrogen_dioxide": current.get("nitrogen_dioxide"),
	"ozone": current.get("ozone"),
	"carbon_monoxide": current.get("carbon_monoxide"),
	}
	except Exception as e:
	return {"error": str(e)}


	# ════════════════════════════════════════════════════════════
	# /flood-risk — Flood Risk Score for Chennai
	# ════════════════════════════════════════════════════════════
	@app.get("/flood-risk")
	def get_flood_risk():
	"""Calculate flood risk score for Chennai based on rainfall, humidity, and forecast."""
	cached = _get_cache("flood_risk", 300)
	if cached: return cached
	try:
	# Fetch current weather
	weather_url = "https://api.open-meteo.com/v1/forecast"
	weather_params = {
	"latitude": LAT, "longitude": LON,
	"current": "precipitation,relative_humidity_2m,rain",
	"daily": "precipitation_sum,precipitation_probability_max",
	"forecast_days": 3,
	"timezone": "Asia/Kolkata",
	}
	r = requests.get(weather_url, params=weather_params, timeout=10)
	r.raise_for_status()
	data = r.json()
	current = data.get("current", {})
	daily = data.get("daily", {})

	# Flood risk factors
	current_rain = current.get("rain", 0) or 0
	current_precip = current.get("precipitation", 0) or 0
	humidity = current.get("relative_humidity_2m", 0) or 0
	precip_sums = daily.get("precipitation_sum", [0, 0, 0])
	precip_probs = daily.get("precipitation_probability_max", [0, 0, 0])

	total_forecast_rain = sum(p for p in precip_sums if p)
	max_prob = max(p for p in precip_probs if p) if precip_probs else 0

	# Score calculation (0-100)
	score = 0
	score += min(current_rain * 5, 25) # current rain (max 25pts)
	score += min(humidity * 0.2, 15) # humidity (max 15pts)
	score += min(total_forecast_rain * 2, 30) # 3-day forecast rain (max 30pts)
	score += min(max_prob * 0.3, 30) # precipitation probability (max 30pts)

	# Chennai elevation factor — low lying city, higher base risk
	score = min(score * 1.15, 100)
	score = round(score)

	# Risk level
	if score <= 20:
	level = "Very Low"
	color = "#22c55e"
	advice = "No flood risk. Normal conditions."
	icon = "🟢"
	elif score <= 40:
	level = "Low"
	color = "#84cc16"
	advice = "Minor risk. Monitor rainfall forecasts."
	icon = "🟡"
	elif score <= 60:
	level = "Moderate"
	color = "#eab308"
	advice = "Moderate risk. Avoid low-lying areas during heavy rain."
	icon = "🟠"
	elif score <= 80:
	level = "High"
	color = "#f97316"
	advice = "High flood risk. Stay alert. Avoid underpasses and flood-prone zones."
	icon = "🔴"
	else:
	level = "Extreme"
	color = "#ef4444"
	advice = "Extreme flood risk! Stay indoors. Avoid all travel if possible."
	icon = "🚨"

	return {
	"score": score,
	"level": level,
	"color": color,
	"advice": advice,
	"icon": icon,
	"factors": {
	"current_rainfall_mm": round(current_rain, 1),
	"humidity_pct": humidity,
	"forecast_3day_mm": round(total_forecast_rain, 1),
	"max_precip_probability": max_prob,
	},
	"chennai_note": "Chennai is low-lying (6m ASL) with historically high flood vulnerability",
	}
	except Exception as e:
	return {"error": str(e)}


	# ════════════════════════════════════════════════════════════
	# /seasonal — Seasonal Comparison for current month
	# ════════════════════════════════════════════════════════════
	@app.get("/seasonal")
	def get_seasonal():
	"""Compare current month's weather against historical averages (last 5 years)."""
	try:
	now = datetime.now()
	current_month = now.month
	current_year = now.year

	# Fetch historical data for the same month over last 5 years
	yearly_data = []
	for year_offset in range(1, 6):
	year = current_year - year_offset
	month_start = datetime(year, current_month, 1)
	# Last day of month
	if current_month == 12:
	month_end = datetime(year, 12, 31)
	else:
	month_end = datetime(year, current_month + 1, 1) - timedelta(days=1)

	# Don't fetch future dates
	archive_limit = datetime.now() - timedelta(days=7)
	if month_end > archive_limit:
	month_end = archive_limit

	if month_start >= month_end:
	continue

	url = "https://archive-api.open-meteo.com/v1/archive"
	params = {
	"latitude": LAT, "longitude": LON,
	"start_date": month_start.strftime("%Y-%m-%d"),
	"end_date": month_end.strftime("%Y-%m-%d"),
	"daily": "temperature_2m_max,temperature_2m_min,precipitation_sum",
	"timezone": "Asia/Kolkata",
	}
	try:
	r = requests.get(url, params=params, timeout=15)
	r.raise_for_status()
	d = r.json().get("daily", {})
	temps_max = [t for t in d.get("temperature_2m_max", []) if t is not None]
	temps_min = [t for t in d.get("temperature_2m_min", []) if t is not None]
	precip = [p for p in d.get("precipitation_sum", []) if p is not None]
	if temps_max:
	yearly_data.append({
	"year": year,
	"avg_max": round(sum(temps_max) / len(temps_max), 1),
	"avg_min": round(sum(temps_min) / len(temps_min), 1) if temps_min else None,
	"total_precip": round(sum(precip), 1) if precip else 0,
	})
	except Exception:
	continue

	if not yearly_data:
	return {"error": "Could not fetch historical data"}

	# Calculate 5-year averages
	avg_max = round(sum(y["avg_max"] for y in yearly_data) / len(yearly_data), 1)
	avg_min = round(sum(y["avg_min"] for y in yearly_data if y["avg_min"]) / len(yearly_data), 1)
	avg_precip = round(sum(y["total_precip"] for y in yearly_data) / len(yearly_data), 1)

	# Fetch current month so far
	month_start_this_year = datetime(current_year, current_month, 1)
	current_month_end = min(now - timedelta(days=7), now)
	current_data = {"avg_max": None, "avg_min": None, "total_precip": None}

	if month_start_this_year < current_month_end:
	try:
	r = requests.get("https://archive-api.open-meteo.com/v1/archive", params={
	"latitude": LAT, "longitude": LON,
	"start_date": month_start_this_year.strftime("%Y-%m-%d"),
	"end_date": (now - timedelta(days=7)).strftime("%Y-%m-%d"),
	"daily": "temperature_2m_max,temperature_2m_min,precipitation_sum",
	"timezone": "Asia/Kolkata",
	}, timeout=15)
	r.raise_for_status()
	d = r.json().get("daily", {})
	tm = [t for t in d.get("temperature_2m_max", []) if t is not None]
	tn = [t for t in d.get("temperature_2m_min", []) if t is not None]
	pr = [p for p in d.get("precipitation_sum", []) if p is not None]
	if tm:
	current_data = {
	"avg_max": round(sum(tm) / len(tm), 1),
	"avg_min": round(sum(tn) / len(tn), 1) if tn else None,
	"total_precip": round(sum(pr), 1) if pr else 0,
	}
	except Exception:
	pass

	month_name = now.strftime("%B")

	return {
	"month": month_name,
	"year": current_year,
	"current_month": current_data,
	"historical_avg": {
	"avg_max": avg_max,
	"avg_min": avg_min,
	"avg_precip": avg_precip,
	"based_on_years": len(yearly_data),
	},
	"yearly_breakdown": yearly_data,
	"comparison": {
	"temp_diff": round(current_data["avg_max"] - avg_max, 1) if current_data["avg_max"] else None,
	"precip_diff": round(current_data["total_precip"] - avg_precip, 1) if current_data["total_precip"] is not None else None,
	"is_hotter": current_data["avg_max"] > avg_max if current_data["avg_max"] else None,
	"is_wetter": current_data["total_precip"] > avg_precip if current_data["total_precip"] is not None else None,
	}
	}
	except Exception as e:
	return {"error": str(e)}


	# ════════════════════════════════════════════════════════════
	# /ask — INTELLIGENT QUERY ENGINE v2
	# Understands dates, fetches precise data, focused answers.
	# ════════════════════════════════════════════════════════════

	import re as _re

	MONTH_MAP = {
	"jan": 1, "january": 1, "feb": 2, "february": 2, "mar": 3, "march": 3,
	"apr": 4, "april": 4, "may": 5, "jun": 6, "june": 6,
	"jul": 7, "july": 7, "aug": 8, "august": 8, "sep": 9, "september": 9,
	"oct": 10, "october": 10, "nov": 11, "november": 11, "dec": 12, "december": 12,
	}


	def parse_date_from_query(query: str):
	"""
	Extract a specific date from a natural language query.
	Supports:
	- '16 feb 2025', 'february 16, 2025', 'on Jan 10 2024'
	- '2025-02-16' (ISO), '16/02/2025' (DD/MM/YYYY)
	- 'yesterday', 'today', 'tomorrow'
	- 'last week', 'last month', 'last year'
	- '5 days ago', '3 weeks ago', '2 months ago', '1 year ago'
	- 'month YYYY' (e.g., 'march 2024' → March 1, 2024)
	- Bare year 'YYYY' (e.g., '2024' → Jan 1, 2024)
	Returns (datetime, date_type) or (None, None).
	date_type: 'specific_past', 'today', 'specific_future', 'relative_past', 'relative_future'
	"""
	q = query.lower().strip()
	now = datetime.now()

	def classify(dt):
	if dt.date() < now.date():
	return "specific_past"
	elif dt.date() == now.date():
	return "today"
	else:
	return "specific_future"

	# ── Relative keywords ─────────────────────────────

	# Implement conversation context memory rules
	# "same date last year" / "this day last year" / "today vs last year"
	if any(p in q for p in ["same date", "same day", "this day", "today vs", "today versus"]):
	offset_years = 1 # default: 1 year back
	m = _re.search(r'(\d+)\s+years?\s+ago', q)
	if m:
	offset_years = int(m.group(1))
	elif "last year" in q or "previous year" in q:
	offset_years = 1
	try:
	dt = now.replace(year=now.year - offset_years)
	except ValueError: # Feb 29 edge case
	dt = now.replace(year=now.year - offset_years, day=28)
	return dt, "relative_past"

	if "yesterday" in q:
	dt = now - timedelta(days=1)
	return dt, "relative_past"

	if "today" in q or "right now" in q or "current" in q:
	return now, "today"

	if "tomorrow" in q:
	dt = now + timedelta(days=1)
	return dt, "relative_future"

	# "N days/weeks/months/years ago"
	m = _re.search(r'(\d+)\s*(day\|days\|week\|weeks\|month\|months\|year\|years)\s+ago', q)
	if m:
	n, unit = int(m.group(1)), m.group(2)
	if "day" in unit:
	dt = now - timedelta(days=n)
	elif "week" in unit:
	dt = now - timedelta(weeks=n)
	elif "month" in unit:
	dt = now - timedelta(days=n * 30)
	elif "year" in unit:
	try:
	dt = now.replace(year=now.year - n)
	except ValueError:
	dt = now.replace(year=now.year - n, day=28)
	return dt, "relative_past"

	# "last week/month/year"
	if "last week" in q:
	dt = now - timedelta(days=7)
	return dt, "relative_past"
	if "last month" in q:
	dt = now - timedelta(days=30)
	return dt, "relative_past"
	if "last year" in q:
	# Preserve exact month/day — just subtract 1 year
	try:
	dt = now.replace(year=now.year - 1)
	except ValueError:
	dt = now.replace(year=now.year - 1, day=28)
	return dt, "relative_past"

	# "next week/month"
	if "next week" in q:
	dt = now + timedelta(days=7)
	return dt, "relative_future"
	if "next month" in q:
	dt = now + timedelta(days=30)
	return dt, "relative_future"

	# ── Explicit date patterns ────────────────────────

	# Pattern: "DD month YYYY" (e.g., "16 feb 2025", "on 10 jan 2024")
	m = _re.search(r'(\d{1,2})\s+(jan\|january\|feb\|february\|mar\|march\|apr\|april\|may\|jun\|june\|jul\|july\|aug\|august\|sep\|september\|oct\|october\|nov\|november\|dec\|december)\s,?\s(\d{4})', q)
	if m:
	day, month_str, year = int(m.group(1)), m.group(2), int(m.group(3))
	month = MONTH_MAP.get(month_str)
	if month:
	try:
	dt = datetime(year, month, day)
	return dt, classify(dt)
	except ValueError:
	pass

	# Pattern: "month DD YYYY" (e.g., "february 16, 2025", "jan 10 2024")
	m = _re.search(r'(jan\|january\|feb\|february\|mar\|march\|apr\|april\|may\|jun\|june\|jul\|july\|aug\|august\|sep\|september\|oct\|october\|nov\|november\|dec\|december)\s+(\d{1,2})\s,?\s(\d{4})', q)
	if m:
	month_str, day, year = m.group(1), int(m.group(2)), int(m.group(3))
	month = MONTH_MAP.get(month_str)
	if month:
	try:
	dt = datetime(year, month, day)
	return dt, classify(dt)
	except ValueError:
	pass

	# Pattern: "YYYY-MM-DD" (ISO format)
	m = _re.search(r'(\d{4})-(\d{2})-(\d{2})', q)
	if m:
	try:
	dt = datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)))
	last_date = dt
	return dt, classify(dt)
	except ValueError:
	pass

	# Pattern: "DD/MM/YYYY" or "DD-MM-YYYY" (common Indian format)
	m = _re.search(r'(\d{1,2})[/\-](\d{1,2})[/\-](\d{4})', q)
	if m:
	a, b, year = int(m.group(1)), int(m.group(2)), int(m.group(3))
	# Try DD/MM/YYYY first (India)
	try:
	dt = datetime(year, b, a)
	last_date = dt
	return dt, classify(dt)
	except ValueError:
	try:
	dt = datetime(year, a, b)
	return dt, classify(dt)
	except ValueError:
	pass

	# Pattern: "month YYYY" (e.g., "march 2024" → defaults to 1st of month)
	m = _re.search(r'(jan\|january\|feb\|february\|mar\|march\|apr\|april\|may\|jun\|june\|jul\|july\|aug\|august\|sep\|september\|oct\|october\|nov\|november\|dec\|december)\s+(\d{4})', q)
	if m:
	month_str, year = m.group(1), int(m.group(2))
	month = MONTH_MAP.get(month_str)
	if month:
	try:
	dt = datetime(year, month, 1)
	return dt, classify(dt)
	except ValueError:
	pass

	# Pattern: bare "YYYY" — just a year like "2024" or "in 2023"
	# Must be 4 digits, between 1900-2100, not part of a longer number/date
	m = _re.search(r'(?<!\d)(?<!\d[-/])(19\d{2}\|20\d{2})(?![-/]\d)(?!\d)', q)
	if m:
	year = int(m.group(1))
	# Don't match the current year as a specific date (it's ambiguous)
	if year != now.year:
	dt = datetime(year, 1, 1)
	last_date = dt
	return dt, classify(dt)

	return None, None


	def parse_days_from_query(query: str, default: int = 7) -> int:
	"""Extract number of forecast days from query. Ignores 'N days ago' patterns."""
	q = query.lower()
	# Don't match "N days ago" — that's handled by date parsing
	m = _re.search(r'(\d+)\s*day(?:s)?(?!\s+ago)', q)
	return int(m.group(1)) if m else default


	def fetch_historical_weather(target_date: datetime, days_range: int = 1):
	"""
	Fetch actual historical weather data from Open-Meteo Archive API
	for a specific date or date range.
	"""
	start = target_date
	end = target_date + timedelta(days=days_range - 1)

	# Archive API lags ~5-7 days, check if date is available
	archive_limit = datetime.now() - timedelta(days=5)
	if end.date() > archive_limit.date():
	return {"error": f"Archive data not yet available for {end.strftime('%Y-%m-%d')}. Data lags 5-7 days."}

	url = "https://archive-api.open-meteo.com/v1/archive"
	params = {
	"latitude": LAT, "longitude": LON,
	"start_date": start.strftime("%Y-%m-%d"),
	"end_date": end.strftime("%Y-%m-%d"),
	"daily": "temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max,wind_direction_10m_dominant",
	"hourly": "temperature_2m,relative_humidity_2m,wind_speed_10m,cloud_cover,precipitation",
	"timezone": "Asia/Kolkata",
	}
	try:
	r = requests.get(url, params=params, timeout=15)
	r.raise_for_status()
	data = r.json()

	daily = data.get("daily", {})
	hourly = data.get("hourly", {})

	days_data = []
	for i, date_str in enumerate(daily.get("time", [])):
	dt = datetime.strptime(date_str, "%Y-%m-%d")
	days_data.append({
	"date": date_str,
	"day": dt.strftime("%A"),
	"temp_max": daily.get("temperature_2m_max", [None])[i],
	"temp_min": daily.get("temperature_2m_min", [None])[i],
	"precipitation": daily.get("precipitation_sum", [0])[i],
	"wind_speed_max": daily.get("wind_speed_10m_max", [0])[i],
	})

	# Extract hourly for the target date
	hourly_data = []
	for i, t in enumerate(hourly.get("time", [])):
	hourly_data.append({
	"time": t,
	"temperature": hourly.get("temperature_2m", [None])[i] if i < len(hourly.get("temperature_2m", [])) else None,
	"humidity": hourly.get("relative_humidity_2m", [None])[i] if i < len(hourly.get("relative_humidity_2m", [])) else None,
	"wind_speed": hourly.get("wind_speed_10m", [None])[i] if i < len(hourly.get("wind_speed_10m", [])) else None,
	"cloud_cover": hourly.get("cloud_cover", [None])[i] if i < len(hourly.get("cloud_cover", [])) else None,
	"precipitation": hourly.get("precipitation", [0])[i] if i < len(hourly.get("precipitation", [])) else 0,
	})

	return {"daily": days_data, "hourly": hourly_data, "source": "Open-Meteo Archive API"}
	except Exception as e:
	return {"error": str(e)}


	def classify_query(query: str):
	"""
	Classify query into granular intent categories.
	Uses sub-intents to distinguish data retrieval from prediction.
	Returns list of intents from:
	weather_current, weather_history, prediction,
	cyclone_history, cyclone_prediction,
	earthquake, tsunami, disaster
	"""
	q = query.lower().strip()
	intents = []

	# ── Detect time orientation (past vs future) ──
	past_kw = ["last year", "previous", "history", "historical", "ago", "past",
	"same date", "same day", "this day", "yesterday", "back in",
	"was", "were", "happened", "occurred", "hit", "struck", "recent"]
	future_kw = ["predict", "prediction", "next", "forecast", "tomorrow",
	"coming", "upcoming", "expect", "will", "probability",
	"chance", "future", "model", "ml", "ai"]

	is_past = any(k in q for k in past_kw)
	is_future = any(k in q for k in future_kw)

	# ── Weather ──
	weather_kw = ["weather", "temperature", "temp", "hot", "cold", "rain", "wind", "humidity",
	"climate", "heat", "sunny", "cloudy", "precipitation", "pressure",
	"detail", "condition", "report"]
	if any(k in q for k in weather_kw):
	if is_past:
	intents.append("weather_history")
	elif is_future:
	intents.append("prediction")
	else:
	intents.append("weather") # current by default

	# ── Cyclone ──
	cyclone_kw = ["cyclone", "hurricane", "typhoon", "storm", "wind storm", "tropical",
	"bay of bengal", "vardah", "nivar", "gaja", "mandous", "michaung",
	"thane", "nisha", "fani", "amphan", "hudhud"]
	if any(k in q for k in cyclone_kw):
	if is_future:
	intents.append("cyclone_prediction")
	else:
	intents.append("cyclone") # history/data retrieval

	# ── Earthquake ──
	quake_kw = ["earthquake", "quake", "seismic", "magnitude", "richter", "tremor",
	"tectonic", "fault", "aftershock", "usgs"]
	if any(k in q for k in quake_kw):
	intents.append("earthquake")

	# ── Tsunami ──
	tsunami_kw = ["tsunami", "tidal wave", "ocean wave", "indian ocean", "sumatra",
	"krakatoa", "sulawesi", "wave height"]
	if any(k in q for k in tsunami_kw):
	intents.append("tsunami")

	# ── Pure prediction (no specific domain) ──
	if not intents and is_future:
	intents.append("prediction")

	# ── Disaster overview ──
	disaster_kw = ["disaster", "catastrophe", "calamity", "danger", "risk",
	"overview", "summary", "all"]
	if any(k in q for k in disaster_kw):
	intents.append("disaster")

	# Default: current weather
	if not intents:
	intents = ["weather"]

	return list(set(intents))


	# ── Known cyclone names for query context extraction ──
	KNOWN_CYCLONES = ["michaung", "mandous", "nivar", "gaja", "vardah", "thane", "nisha",
	"fani", "amphan", "hudhud", "phailin", "laila", "jal"]

	KNOWN_LOCATIONS = ["chennai", "mumbai", "kolkata", "vizag", "visakhapatnam",
	"bay of bengal", "arabian sea", "tamil nadu", "andhra pradesh",
	"odisha", "west bengal", "india", "puducherry", "cuddalore",
	"nagapattinam", "mahabalipuram"]


	def extract_query_context(query: str):
	"""
	Extract structured context from a natural-language query:
	- cyclone_name: specific cyclone mentioned (e.g. "gaja")
	- year: specific year mentioned
	- location: specific location mentioned
	- wants_recent: whether user wants "recent" / "latest" data
	- wants_comparison: whether user wants a comparison ("vs", "compared to")
	"""
	q = query.lower().strip()

	# Extract cyclone name
	cyclone_name = None
	for name in KNOWN_CYCLONES:
	if name in q:
	cyclone_name = name
	break

	# Extract year (4-digit, 1900-2099)
	year = None
	m = _re.search(r'(?<!\d)(?<!\d[-/])(19\d{2}\|20\d{2})(?![-/]\d)(?!\d)', q)
	if m:
	year = int(m.group(1))

	# Extract location
	location = None
	for loc in KNOWN_LOCATIONS:
	if loc in q:
	location = loc
	break

	# Detect modifiers
	wants_recent = any(k in q for k in ["recent", "latest", "last", "newest", "most recent"])
	wants_comparison = any(k in q for k in [" vs ", "versus", "compared to", "compare",
	"difference between", "today vs"])

	return {
	"cyclone_name": cyclone_name,
	"year": year,
	"location": location,
	"wants_recent": wants_recent,
	"wants_comparison": wants_comparison,
	}


	def build_focused_analysis(query, intents, data_sources, target_date, date_type):
	"""
	Build a detailed, structured analysis that DIRECTLY answers the question.
	Produces multi-line, human-readable summaries instead of one-liners.
	"""
	lines = []
	now = datetime.now()

	# ── Historical weather for specific date ──
	if "historical_weather" in data_sources and data_sources["historical_weather"]:
	hw = data_sources["historical_weather"]
	if "error" not in hw and hw.get("daily"):
	target_str = target_date.strftime("%Y-%m-%d") if target_date else hw["daily"][0]["date"]
	target_data = next((d for d in hw["daily"] if d["date"] == target_str), hw["daily"][0])
	dt = datetime.strptime(target_data["date"], "%Y-%m-%d")

	summary = (
	f"{dt.strftime('%B %d %Y')} – Chennai\n"
	f"Max Temp: {target_data['temp_max']}°C\n"
	f"Min Temp: {target_data['temp_min']}°C\n"
	f"Rain: {target_data['precipitation']} mm\n"
	f"Wind: {target_data['wind_speed_max']} km/h"
	)
	lines.append(summary)

	# If there's also current weather data, add comparison
	if "weather" in data_sources and data_sources["weather"]:
	w = data_sources["weather"]
	if "error" not in w:
	lines.append(
	f"\nToday ({now.strftime('%B %d %Y')}) for comparison:\n"
	f"Current Temp: {w.get('temperature')}°C\n"
	f"Wind: {w.get('wind_speed')} km/h\n"
	f"Humidity: {w.get('humidity')}%\n"
	f"Temp difference: {round(w.get('temperature', 0) - (target_data['temp_max'] or 0), 1)}°C vs last year's max"
	)
	elif hw.get("error"):
	lines.append(f"Could not fetch historical data: {hw['error']}")

	# ── Current weather (only if no historical comparison already added) ──
	elif "weather" in data_sources and data_sources["weather"]:
	w = data_sources["weather"]
	if "error" not in w:
	if date_type == "today" or target_date is None:
	summary = (
	f"Current Weather – Chennai ({now.strftime('%B %d %Y, %H:%M')})\n"
	f"Temperature: {w.get('temperature')}°C\n"
	f"Wind Speed: {w.get('wind_speed')} km/h\n"
	f"Wind Direction: {w.get('wind_direction', 'N/A')}°\n"
	f"Humidity: {w.get('humidity')}%\n"
	f"Conditions: {w.get('description', 'N/A')}"
	)
	lines.append(summary)

	# ── Forecast ──
	if "forecast" in data_sources and data_sources["forecast"]:
	fc = data_sources["forecast"]
	if "error" not in fc and fc.get("daily"):
	if target_date and date_type == "specific_future":
	target_str = target_date.strftime("%Y-%m-%d")
	found = False
	for d in fc["daily"]:
	if d["date"] == target_str:
	dt = datetime.strptime(d["date"], "%Y-%m-%d")
	summary = (
	f"Forecast for {dt.strftime('%B %d %Y')} ({dt.strftime('%A')}) – Chennai\n"
	f"Max Temp: {d['temp_max']}°C\n"
	f"Min Temp: {d['temp_min']}°C\n"
	f"Rain: {d['precipitation']} mm\n"
	f"Wind: {d['wind_speed_max']} km/h"
	)
	lines.append(summary)
	found = True
	break
	if not found:
	days_ahead = (target_date.date() - now.date()).days
	lines.append(
	f"The date {target_str} is {days_ahead} days ahead, beyond the 7-day forecast range. "
	f"Running ML models for extended prediction."
	)
	elif not target_date or date_type == "today":
	d = fc["daily"][0]
	dt = datetime.strptime(d["date"], "%Y-%m-%d")
	summary = (
	f"Today's Forecast ({dt.strftime('%A, %B %d %Y')}) – Chennai\n"
	f"Max Temp: {d['temp_max']}°C\n"
	f"Min Temp: {d['temp_min']}°C\n"
	f"Rain: {d['precipitation']} mm\n"
	f"Wind: {d['wind_speed_max']} km/h"
	)
	lines.append(summary)

	# ── Earthquakes ──
	if "earthquake" in data_sources and data_sources["earthquake"]:
	eq = data_sources["earthquake"]
	if "error" not in eq:
	summary = eq.get("summary", {})
	event_list = eq.get("events", [])
	lines.append(
	f"Seismic Activity Report (Last 30 days)\n"
	f"Total Events: {summary.get('total', 0)} earthquakes (M4.5+)\n"
	f"Strongest: M{summary.get('max_magnitude', '?')}\n"
	f"Average Depth: {summary.get('avg_depth', '?')} km\n"
	f"M6+ Events: {summary.get('m6_plus', 0)}\n"
	f"Tsunami Alerts: {summary.get('tsunami_alerts', 0)}"
	)

	# ── Cyclones — DETAILED listing ──
	if "cyclone" in data_sources and data_sources["cyclone"]:
	cy = data_sources["cyclone"]
	if "error" not in cy:
	cyclone_list = cy.get("cyclones", [])
	summary = cy.get("summary", {})

	if cyclone_list:
	header = f"Cyclone Records – Bay of Bengal ({summary.get('period', '')})\nTotal: {summary.get('total', 0)} cyclones \| Avg Wind: {summary.get('avg_wind', '?')} km/h\n"
	lines.append(header)

	# List each cyclone with details
	for i, c in enumerate(cyclone_list, 1):
	detail = (
	f"{i}. {c['name']} ({c['year']})\n"
	f" Category: {c['category']}\n"
	f" Max Wind: {c['max_wind_kmh']} km/h\n"
	f" Rainfall: {c['rainfall_mm']} mm\n"
	f" Dates: {c['dates']}\n"
	f" Landfall: {c['landfall']}\n"
	f" Impact: {c['impact']}\n"
	f" Damage: ₹{c['damage_crore']} crore"
	)
	lines.append(detail)
	else:
	lines.append("No cyclone records found matching your query.")

	# ── Tsunamis ──
	if "tsunami" in data_sources and data_sources["tsunami"]:
	ts = data_sources["tsunami"]
	if "error" not in ts:
	summary = ts.get("summary", {})
	event_list = ts.get("events", [])
	lines.append(
	f"Tsunami Records – Indian Ocean ({summary.get('period', '')})\n"
	f"Total Events: {summary.get('total', 0)}\n"
	f"Max Wave Height: {summary.get('max_wave', '?')}m"
	)

	# ── ML Ensemble ──
	if "ensemble" in data_sources and data_sources["ensemble"]:
	ens = data_sources["ensemble"]
	if "error" not in ens:
	report = ens.get("final_report", {})
	preds = report.get("predictions", [])
	if preds:
	if target_date and date_type == "specific_future":
	target_str = target_date.strftime("%Y-%m-%d")
	for p in preds:
	if p["date"] == target_str:
	lines.append(
	f"ML PREDICTION for {target_str}:\n"
	f"Predicted Max: {p['predicted_max']}°C\n"
	f"Predicted Min: {p['predicted_min']}°C\n"
	f"Model Spread: ±{p['model_spread_max']}°C\n"
	f"Confidence: {p['confidence'].upper()}"
	)
	break
	else:
	temps_max = [p["predicted_max"] for p in preds]
	temps_min = [p["predicted_min"] for p in preds]
	lines.append(
	f"ML PREDICTION ({len(preds)} days ahead):\n"
	f"Max Range: {min(temps_max)}-{max(temps_max)}°C\n"
	f"Min Range: {min(temps_min)}-{max(temps_min)}°C\n"
	f"Model Agreement: {report.get('agreement_score', 0)*100:.1f}%\n"
	f"Confidence: {report.get('overall_confidence', 'unknown').upper()}\n"
	f"Models used: {', '.join(ens.get('models_used', []))}"
	)

	if not lines:
	lines.append(
	"I analyzed the available data but couldn't find specific information for your query. "
	"Try asking about weather on a specific date, earthquakes, cyclones, tsunamis, or predictions."
	)

	return "\n".join(lines)


	@app.get("/ask")
	def ask_climai(q: str = "weather today"):
	"""
	Main entry point for AI analysis.
	Orchestrates Planner -> Executor -> Ensemble -> Groq Synthesis.
	"""
	start_time = datetime.now()
	print(f"DEBUG: /ask called with q='{q}'")
	import time as _time
	import re
	t0 = _time.time()

	query = q.strip()

	# ── 1. PLAN ──
	plan = plan_query(query)
	intents = plan["all_intents"]
	target_date = plan["date"]
	ctx = plan["context"]

	# Extract relative days if mentioned
	days = 7
	m = re.search(r'(\d+)\s*(days\|weeks\|months\|years)', query)
	if m:
	val, unit = int(m.group(1)), m.group(2)
	days = val if unit.startswith("day") else val7 if unit.startswith("week") else val30 if unit.startswith("month") else val*365

	# Default date_type to support legacy build_focused_analysis
	date_type = "specific_past" if target_date and target_date < datetime.utcnow().date() else "specific_future" if target_date else "today"

	steps = []
	errors = []
	models_status = {}
	now = datetime.now()

	steps.append({
	"step": "plan",
	"status": "done",
	"detail": f"Intents: {', '.join(intents)} \| Date: {target_date.strftime('%Y-%m-%d') if target_date else 'None'}"
	})

	# ── 2. EXECUTE ──
	steps.append({"step": "execute", "status": "running", "detail": "Executing data retrieval plan..."})
	try:
	data_sources = execute_plan(plan)
	# Drop None keys to match legacy behavior
	data_sources = {k: v for k, v in data_sources.items() if v is not None}
	steps[-1]["status"] = "done"
	except Exception as e:
	data_sources = {}
	steps[-1]["status"] = "error"
	errors.append(f"Executor failed: {str(e)}")

	# ── 3. LOCAL ML ORCHESTRATION ──
	# NEVER run ML for pure data retrieval intents
	run_models = False
	data_only_intents = {"cyclone", "earthquake", "tsunami", "weather_history", "disaster"}
	is_data_only = all(i in data_only_intents for i in intents)
	is_past_date = target_date and date_type in ("specific_past", "relative_past")

	if not is_past_date and not is_data_only:
	if "prediction" in intents:
	run_models = True
	if target_date and date_type in ("specific_future", "relative_future"):
	days_ahead = (target_date - now.date()).days
	if days_ahead > 7:
	run_models = True
	days = max(days, days_ahead)
	if not target_date and "weather" in intents and "prediction" not in intents:
	run_models = False

	if run_models:
	steps.append({"step": "ensemble", "status": "running", "detail": "Running 4 ML models as team..."})
	try:
	td = fetch_training_data()
	temps_max, temps_min = td["temps_max"], td["temps_min"]
	end_date = td["end_date"]
	window = 7
	X, y_max, y_min = prepare_features(temps_max, temps_min, window)

	all_preds = {}
	individual_results = {}
	model_funcs = {
	"random_forest": lambda: predict_rf(X, y_max, y_min, temps_max, temps_min, end_date, window, days),
	"xgboost": lambda: predict_xgb(X, y_max, y_min, temps_max, temps_min, end_date, window, days),
	"lstm": lambda: predict_lstm(temps_max, temps_min, end_date, window, days),
	"lightgbm": lambda: predict_lgbm(X, y_max, y_min, temps_max, temps_min, end_date, window, days),
	}

	for model_name, model_fn in model_funcs.items():
	try:
	preds, t_ms = model_fn()
	models_status[model_name] = {"status": "success", "time_ms": t_ms}
	individual_results[model_name] = {"predictions": preds, "training_time_ms": t_ms, "status": "success"}
	all_preds[model_name] = preds
	except Exception as e:
	models_status[model_name] = {"status": "error", "error": str(e)}
	individual_results[model_name] = {"status": "error", "error": str(e)}
	errors.append(f"{model_name} failed: {str(e)}")

	successful_models = list(all_preds.keys())
	n_models = len(successful_models)

	if n_models > 0:
	final_predictions = []
	total_spread_max = 0
	total_spread_min = 0

	for day_idx in range(days):
	day_maxes = [all_preds[m][day_idx]["predicted_max"] for m in successful_models if day_idx < len(all_preds[m])]
	day_mins = [all_preds[m][day_idx]["predicted_min"] for m in successful_models if day_idx < len(all_preds[m])]
	if not day_maxes:
	continue

	avg_max = round(sum(day_maxes) / len(day_maxes), 1)
	avg_min = round(sum(day_mins) / len(day_mins), 1)
	spread_max = round(max(day_maxes) - min(day_maxes), 1)
	spread_min = round(max(day_mins) - min(day_mins), 1)
	total_spread_max += spread_max
	total_spread_min += spread_min
	avg_spread = (spread_max + spread_min) / 2
	confidence = "high" if avg_spread < 1.0 else "medium" if avg_spread < 2.0 else "low"
	ref = all_preds[successful_models[0]][day_idx]

	model_breakdown = {}
	for m in successful_models:
	if day_idx < len(all_preds[m]):
	model_breakdown[m] = {"max": all_preds[m][day_idx]["predicted_max"], "min": all_preds[m][day_idx]["predicted_min"]}

	final_predictions.append({
	"date": ref["date"], "day": ref["day"],
	"predicted_max": avg_max, "predicted_min": avg_min,
	"model_spread_max": spread_max, "model_spread_min": spread_min,
	"confidence": confidence, "per_model": model_breakdown,
	})

	avg_temp = sum(p["predicted_max"] for p in final_predictions) / len(final_predictions) if final_predictions else 1
	avg_overall_spread = ((total_spread_max + total_spread_min) / 2) / len(final_predictions) if final_predictions else 0
	agreement_score = round(max(0, min(1, 1 - (avg_overall_spread / avg_temp))), 3)
	overall_confidence = "very_high" if agreement_score > 0.95 else "high" if agreement_score > 0.90 else "medium" if agreement_score > 0.80 else "low"
	total_time = sum(r.get("time_ms", 0) for r in models_status.values() if isinstance(r, dict) and r.get("status") == "success")

	data_sources["ensemble"] = {
	"models_used": successful_models,
	"models_failed": [m for m in model_funcs if m not in successful_models],
	"individual_results": individual_results,
	"final_report": {"predictions": final_predictions, "agreement_score": agreement_score, "overall_confidence": overall_confidence},
	"training_data": {"days": td["training_days"], "total_compute_ms": total_time},
	}
	steps[-1]["status"] = "done"
	steps[-1]["detail"] = f"{n_models}/4 models succeeded"
	else:
	steps[-1]["status"] = "error"
	steps[-1]["detail"] = "All models failed"
	except Exception as e:
	steps[-1]["status"] = "error"
	errors.append(f"Ensemble failed: {str(e)}")

	# ── 4. CRITIC ──
	checked = review(query, plan, data_sources)
	corrections = checked["corrections"]
	is_valid = checked["is_valid"]

	if corrections:
	steps.append({"step": "critic", "status": "error" if not is_valid else "done",
	"detail": f"Self-Healed/Detected: {', '.join(corrections)}"})

	log({"query": query, "plan": plan, "corrections": corrections, "valid": is_valid})

	# ── 5. SYNTHESIZE ANALYSIS ──
	analysis = groq_answer(query, intents, data_sources, target_date, date_type)
	if not is_valid:
	analysis += "\n\n(Note: The AI self-critic noted missing or skewed data constraints during processing.)"

	total_time_ms = round((_time.time() - t0) * 1000)

	return {
	"query": query,
	"intents": intents,
	"target_date": target_date.strftime("%Y-%m-%d") if target_date else None,
	"date_type": date_type,
	"steps": steps,
	"models": models_status,
	"data": data_sources,
	"analysis": analysis,
	"corrections": corrections,
	"errors": errors,
	"total_time_ms": total_time_ms,
	}




	# ════════════════════════════════════════════════════════════
	# /refresh-data — Rebuild historical dataset in background
	# ════════════════════════════════════════════════════════════
	@app.post("/refresh-data")
	def refresh_dataset():
	"""
	Trigger a full dataset rebuild by running build_dataset.py.
	Run monthly to keep ML training data and LLM context fresh.
	"""
	import os as _os, subprocess as _subprocess
	try:
	if not _os.path.exists("build_dataset.py"):
	return {"status": "error", "message": "build_dataset.py not found"}
	_subprocess.Popen(["python", "build_dataset.py"], stdout=_subprocess.DEVNULL, stderr=_subprocess.DEVNULL)
	return {
	"status": "started",
	"message": "Dataset rebuild started in background. Check data/ folder in ~2 minutes.",
	"files_to_update": ["weather_history.json","earthquake_history.json","aqi_history.json","flood_baseline.json","llm_context.json"],
	}
	except Exception as e:
	return {"status": "error", "message": str(e)}


	@app.get("/dataset-status")
	def dataset_status():
	"""Check which dataset files exist and when they were last updated."""
	import os as _os, json as _json
	files = {
	"weather_history": "weather_history.json",
	"earthquake_history": "earthquake_history.json",
	"aqi_history": "aqi_history.json",
	"flood_baseline": "flood_baseline.json",
	"llm_context": "llm_context.json",
	}
	result = {}
	for key, path in files.items():
	if _os.path.exists(path):
	stat = _os.stat(path)
	try:
	with open(path) as f:
	data = _json.load(f)
	fetched_at = data.get("fetched_at") or data.get("generated_at", "unknown")
	except Exception:
	fetched_at = "unknown"
	result[key] = {"exists": True, "size_kb": round(stat.st_size/1024,1), "fetched_at": fetched_at}
	else:
	result[key] = {"exists": False}
	all_exist = all(v["exists"] for v in result.values())
	return {"dataset_ready": all_exist, "files": result,
	"tip": "Run POST /refresh-data to build missing files." if not all_exist else "All dataset files present."}

	if __name__ == "__main__":
	import uvicorn # type: ignore[import]
	uvicorn.run(app, host="0.0.0.0", port=8000)