ClimAI / main.py
iPurushottam's picture
Upload folder using huggingface_hub
5fd9d71 verified
# pyre-ignore-all-errors
"""
ClimAI β€” FastAPI Backend
Serves weather, earthquake, cyclone, tsunami, historical, and ML prediction data.
Location: Chennai, India (13.08Β°N, 80.27Β°E)
"""
from fastapi import FastAPI # type: ignore[import]
from fastapi.middleware.cors import CORSMiddleware
import requests
from datetime import datetime, timedelta
import numpy as np
import random
import re as _re
import logging
# from global_land_mask import globe # Removed from top to save startup memory
from planner import plan_query
from executor import execute_plan
from critic import review
from logger import log
from groq_llm import groq_answer # ← ADD THIS LINE
logger = logging.getLogger("climai")
logger.setLevel(logging.INFO)
_handler = logging.StreamHandler()
_handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s"))
logger.addHandler(_handler)
app = FastAPI(title="ClimAI API", version="3.5.2-pro")
# ── CORS Configuration ──────────────────────────────────────────────────────
# Using the standard FastAPI CORSMiddleware.
# This handles preflight (OPTIONS) and header injection correctly for all routes.
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True, # Set to True for better compatibility with standard fetch
allow_methods=["*"],
allow_headers=["*"],
expose_headers=["*"],
)
@app.get("/debug-paths")
def debug_paths():
import os as _os
cwd = _os.getcwd()
files_in_cwd = _os.listdir(cwd)
return {
"cwd": cwd,
"files_in_cwd": files_in_cwd,
"weather_history_exists": _os.path.exists("weather_history.json"),
"data_folder_exists": _os.path.exists("data"),
"data_files": _os.listdir("data") if _os.path.exists("data") else [],
}
@app.get("/ping")
def ping():
return {"status": "ok", "time": datetime.now().isoformat(), "version": "3.5-pro"}
# Chennai coordinates
LAT = 13.0827
LON = 80.2707
# ── Simple in-memory cache to prevent Open-Meteo 429 rate limits ──
_cache: dict = {}
_cache_ttl: dict = {}
def _get_cache(key: str, ttl_seconds: int = 300):
if key in _cache and key in _cache_ttl:
age = (datetime.now() - _cache_ttl[key]).total_seconds()
if age < ttl_seconds:
return _cache[key]
return None
def _set_cache(key: str, value):
_cache[key] = value
_cache_ttl[key] = datetime.now()
# ════════════════════════════════
# /weather β€” Current conditions (Open Meteo)
# ════════════════════════════════
@app.get("/weather")
def get_weather():
"""Current weather for Chennai."""
cached = _get_cache("weather", ttl_seconds=120)
if cached: return cached
url = "https://api.open-meteo.com/v1/forecast"
params = {
"latitude": LAT,
"longitude": LON,
"current": "temperature_2m,relative_humidity_2m,apparent_temperature,precipitation,rain,cloud_cover,wind_speed_10m,wind_direction_10m,wind_gusts_10m,pressure_msl,surface_pressure",
"timezone": "Asia/Kolkata",
}
try:
r = requests.get(url, params=params, timeout=10)
r.raise_for_status()
data = r.json()
current = data.get("current", {})
deg = current.get("wind_direction_10m", 0)
directions = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE",
"S", "SSW", "SW", "WSW", "W", "WNW", "NW", "NNW"]
idx = round(deg / 22.5) % 16
wind_dir = directions[idx]
result = {
"temperature": current.get("temperature_2m"),
"feels_like": current.get("apparent_temperature"),
"humidity": current.get("relative_humidity_2m"),
"wind_speed": current.get("wind_speed_10m"),
"wind_direction": wind_dir,
"wind_direction_deg": deg,
"wind_gusts": current.get("wind_gusts_10m"),
"cloud_cover": current.get("cloud_cover"),
"pressure": current.get("surface_pressure"),
"precipitation": current.get("precipitation"),
"rain": current.get("rain"),
}
_set_cache("weather", result)
return result
except Exception as e:
return {"error": str(e)}
# ════════════════════════════════
# /forecast β€” 7-day daily forecast
# ════════════════════════════════
@app.get("/forecast")
def get_forecast():
"""7-day daily forecast for Chennai."""
cached = _get_cache("forecast", 300)
if cached: return cached
url = "https://api.open-meteo.com/v1/forecast"
params = {
"latitude": LAT,
"longitude": LON,
"daily": "temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max,wind_direction_10m_dominant,precipitation_probability_max,uv_index_max",
"hourly": "temperature_2m,wind_speed_10m",
"forecast_days": 7,
"timezone": "Asia/Kolkata",
}
try:
r = requests.get(url, params=params, timeout=10)
r.raise_for_status()
data = r.json()
daily = data.get("daily", {})
hourly = data.get("hourly", {})
days = []
times = daily.get("time", [])
for i, date_str in enumerate(times):
dt = datetime.strptime(date_str, "%Y-%m-%d")
days.append({
"date": date_str,
"day": dt.strftime("%a"),
"temp_max": daily.get("temperature_2m_max", [None])[i] if i < len(daily.get("temperature_2m_max", [])) else None,
"temp_min": daily.get("temperature_2m_min", [None])[i] if i < len(daily.get("temperature_2m_min", [])) else None,
"precipitation": daily.get("precipitation_sum", [0])[i] if i < len(daily.get("precipitation_sum", [])) else 0,
"wind_speed_max": daily.get("wind_speed_10m_max", [0])[i] if i < len(daily.get("wind_speed_10m_max", [])) else 0,
"precip_prob": daily.get("precipitation_probability_max", [0])[i] if i < len(daily.get("precipitation_probability_max", [])) else 0,
"uv_index": daily.get("uv_index_max", [0])[i] if i < len(daily.get("uv_index_max", [])) else 0,
})
hourly_data = []
h_times = hourly.get("time", [])
h_temps = hourly.get("temperature_2m", [])
h_winds = hourly.get("wind_speed_10m", [])
for i, t in enumerate(h_times):
hourly_data.append({
"time": t,
"temperature": h_temps[i] if i < len(h_temps) else None,
"wind_speed": h_winds[i] if i < len(h_winds) else None,
})
result = {"daily": days, "hourly": hourly_data}
_set_cache("forecast", result)
return result
except Exception as e:
return {"error": str(e)}
# ════════════════════════════════
# /historical β€” 5-year historical data (Open Meteo Archive API)
# ════════════════════════════════
@app.get("/historical")
def get_historical(years: int = 5):
# Open-Meteo Archive API lags by about 5-7 days.
# We must offset the end date to avoid a 400 Bad Request.
end_date = datetime.now() - timedelta(days=7)
start_date = end_date - timedelta(days=years * 365)
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
"latitude": LAT,
"longitude": LON,
"start_date": start_date.strftime("%Y-%m-%d"),
"end_date": end_date.strftime("%Y-%m-%d"),
"daily": "temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max",
"timezone": "Asia/Kolkata",
}
try:
r = requests.get(url, params=params, timeout=30)
r.raise_for_status()
data = r.json()
daily = data.get("daily", {})
times = daily.get("time", [])
temp_max = daily.get("temperature_2m_max", [])
temp_min = daily.get("temperature_2m_min", [])
precip = daily.get("precipitation_sum", [])
wind = daily.get("wind_speed_10m_max", [])
# Return monthly averages for efficiency
monthly = {}
for i, t in enumerate(times):
month_key = t[:7] # YYYY-MM
if month_key not in monthly:
monthly[month_key] = {"temps_max": [], "temps_min": [], "precip": [], "wind": []}
if i < len(temp_max) and temp_max[i] is not None:
monthly[month_key]["temps_max"].append(temp_max[i])
if i < len(temp_min) and temp_min[i] is not None:
monthly[month_key]["temps_min"].append(temp_min[i])
if i < len(precip) and precip[i] is not None:
monthly[month_key]["precip"].append(precip[i])
if i < len(wind) and wind[i] is not None:
monthly[month_key]["wind"].append(wind[i])
result = []
for month, vals in sorted(monthly.items()):
result.append({
"month": month,
"avg_temp_max": round(sum(vals["temps_max"]) / len(vals["temps_max"]), 1) if vals["temps_max"] else None,
"avg_temp_min": round(sum(vals["temps_min"]) / len(vals["temps_min"]), 1) if vals["temps_min"] else None,
"total_precip": round(sum(vals["precip"]), 1) if vals["precip"] else 0,
"avg_wind": round(sum(vals["wind"]) / len(vals["wind"]), 1) if vals["wind"] else None,
})
return {
"location": "Chennai, India",
"period": f"{start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}",
"monthly": result,
"total_months": len(result),
}
except Exception as e:
return {"error": str(e)}
# ════════════════════════════════════════════════════════════
# SHARED HELPERS β€” Data fetching & feature preparation
# ════════════════════════════════════════════════════════════
def fetch_training_data(days: int = 90):
"""
Load temperature data for ML training.
Priority: 1) saved dataset (data/weather_history.json) for full 5yr history
2) live API fallback if file not found
Using saved data means models train on 5 years instead of 90 days β€”
dramatically improves prediction accuracy.
"""
import os as _os
import json as _json
dataset_path = "weather_history.json"
# ── Try loading from saved dataset first ──────────────────────
if _os.path.exists(dataset_path):
try:
with open(dataset_path) as f:
saved = _json.load(f)
daily = saved.get("daily", {})
temps_max = [t for t in daily.get("temperature_2m_max", []) if t is not None]
temps_min = [t for t in daily.get("temperature_2m_min", []) if t is not None]
precip = [p for p in daily.get("precipitation_sum", []) if p is not None]
wind = [w for w in daily.get("wind_speed_10m_max", []) if w is not None]
if len(temps_max) >= 14:
period = saved.get("period", "")
try:
end_str = period.split(" to ")[-1].strip()
end_date = datetime.strptime(end_str, "%Y-%m-%d")
except Exception:
end_date = datetime.now() - timedelta(days=7)
logger.info(f"[fetch_training_data] Loaded {len(temps_max)} days from saved dataset")
return {
"temps_max": temps_max,
"temps_min": temps_min,
"precip": precip,
"wind": wind,
"end_date": end_date,
"training_days": len(temps_max),
"source": "saved_dataset",
}
except Exception as e:
logger.warning(f"[fetch_training_data] Saved dataset load failed: {e} β€” falling back to API")
# ── Fallback: live API call ────────────────────────────────────
logger.info("[fetch_training_data] No saved dataset β€” fetching from Open-Meteo Archive API")
end_date = datetime.now() - timedelta(days=7)
start_date = end_date - timedelta(days=days)
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
"latitude": LAT,
"longitude": LON,
"start_date": start_date.strftime("%Y-%m-%d"),
"end_date": end_date.strftime("%Y-%m-%d"),
"daily": "temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max",
"timezone": "Asia/Kolkata",
}
r = requests.get(url, params=params, timeout=20)
r.raise_for_status()
data = r.json()
daily = data.get("daily", {})
temps_max = [t for t in daily.get("temperature_2m_max", []) if t is not None]
temps_min = [t for t in daily.get("temperature_2m_min", []) if t is not None]
precip = [p for p in daily.get("precipitation_sum", []) if p is not None]
wind = [w for w in daily.get("wind_speed_10m_max", []) if w is not None]
return {
"temps_max": temps_max,
"temps_min": temps_min,
"precip": precip,
"wind": wind,
"end_date": end_date,
"training_days": len(temps_max),
"source": "live_api",
}
def prepare_features(temps_max, temps_min, window=7):
"""Prepare rolling-window features for tree-based models."""
X = []
y_max = []
y_min = []
for i in range(window, len(temps_max)):
X.append(temps_max[i - window:i])
y_max.append(temps_max[i])
if i < len(temps_min):
y_min.append(temps_min[i])
X = np.array(X)
y_max = np.array(y_max)
y_min = np.array(y_min[:len(y_max)])
return X, y_max, y_min
# ════════════════════════════════════════════════════════════
# LSTM CLASS β€” Pure numpy implementation
# ════════════════════════════════════════════════════════════
def _sigmoid(x):
x = np.clip(x, -500, 500)
return 1.0 / (1.0 + np.exp(-x))
def _tanh(x):
return np.tanh(x)
class NumpyLSTM:
"""Real LSTM from scratch using pure numpy.
Includes forget gate, input gate, output gate, cell state, and BPTT training."""
def __init__(self, input_size, hidden_size, lr=0.005):
self.hidden_size = hidden_size
self.lr = lr
scale = 0.1
self.Wf = np.random.randn(hidden_size, input_size + hidden_size) * scale
self.Wi = np.random.randn(hidden_size, input_size + hidden_size) * scale
self.Wc = np.random.randn(hidden_size, input_size + hidden_size) * scale
self.Wo = np.random.randn(hidden_size, input_size + hidden_size) * scale
self.bf = np.zeros((hidden_size, 1))
self.bi = np.zeros((hidden_size, 1))
self.bc = np.zeros((hidden_size, 1))
self.bo = np.zeros((hidden_size, 1))
self.Wy = np.random.randn(1, hidden_size) * scale
self.by = np.zeros((1, 1))
def forward_sequence(self, X_seq):
seq_len = X_seq.shape[0]
h = np.zeros((self.hidden_size, 1))
c = np.zeros((self.hidden_size, 1))
self.cache = []
for t in range(seq_len):
x_t = X_seq[t].reshape(-1, 1)
concat = np.vstack([h, x_t])
f_t = _sigmoid(self.Wf @ concat + self.bf)
i_t = _sigmoid(self.Wi @ concat + self.bi)
c_hat = _tanh(self.Wc @ concat + self.bc)
c = f_t * c + i_t * c_hat
o_t = _sigmoid(self.Wo @ concat + self.bo)
h = o_t * _tanh(c)
self.cache.append((x_t, concat, f_t, i_t, c_hat, c.copy(), o_t, h.copy()))
y = self.Wy @ h + self.by
return float(y[0, 0]), h, c
def train_step(self, X_seq, target):
pred, h, c = self.forward_sequence(X_seq)
dy = 2 * (pred - target)
max_grad = 1.0
self.Wy -= self.lr * np.clip(dy * h.T, -max_grad, max_grad)
self.by -= self.lr * np.array([[dy]])
if self.cache:
x_t, concat, f_t, i_t, c_hat, c_state, o_t, h_state = self.cache[-1]
dh = self.Wy.T * dy
do = dh * _tanh(c_state) * o_t * (1 - o_t)
dc = dh * o_t * (1 - _tanh(c_state) ** 2)
df = dc * (c_state - i_t * c_hat) * f_t * (1 - f_t) if len(self.cache) > 1 else np.zeros_like(f_t)
di = dc * c_hat * i_t * (1 - i_t)
dc_hat = dc * i_t * (1 - c_hat ** 2)
for grad in [do, dc, df, di, dc_hat]:
np.clip(grad, -max_grad, max_grad, out=grad)
self.Wf -= self.lr * np.clip(df @ concat.T, -max_grad, max_grad)
self.Wi -= self.lr * np.clip(di @ concat.T, -max_grad, max_grad)
self.Wc -= self.lr * np.clip(dc_hat @ concat.T, -max_grad, max_grad)
self.Wo -= self.lr * np.clip(do @ concat.T, -max_grad, max_grad)
self.bf -= self.lr * df
self.bi -= self.lr * di
self.bc -= self.lr * dc_hat
self.bo -= self.lr * do
return (pred - target) ** 2
def predict(self, X_seq):
pred, _, _ = self.forward_sequence(X_seq)
return pred
# ════════════════════════════════════════════════════════════
# PER-MODEL PREDICTION FUNCTIONS
# Each returns: list of {date, day, predicted_max, predicted_min}
# ════════════════════════════════════════════════════════════
def predict_rf(X, y_max, y_min, temps_max, temps_min, end_date, window=7, forecast_days=7):
"""Random Forest predictions."""
import time as _time
t0 = _time.time()
from sklearn.ensemble import RandomForestRegressor # type: ignore[import]
rf_max = RandomForestRegressor(n_estimators=50, random_state=42)
rf_min = RandomForestRegressor(n_estimators=50, random_state=42)
rf_max.fit(X, y_max)
rf_min.fit(X, y_min)
preds = []
lw_max = np.array(temps_max[-window:]).reshape(1, -1)
lw_min = np.array(temps_min[-window:]).reshape(1, -1)
for day in range(forecast_days):
pm = float(rf_max.predict(lw_max)[0])
pn = float(rf_min.predict(lw_min)[0])
preds.append({
"date": (end_date + timedelta(days=day + 1)).strftime("%Y-%m-%d"),
"day": (end_date + timedelta(days=day + 1)).strftime("%a"),
"predicted_max": round(pm, 1),
"predicted_min": round(pn, 1),
})
lw_max = np.append(lw_max[:, 1:], [[pm]], axis=1)
lw_min = np.append(lw_min[:, 1:], [[pn]], axis=1)
return preds, round((_time.time() - t0) * 1000)
def predict_xgb(X, y_max, y_min, temps_max, temps_min, end_date, window=7, forecast_days=7):
"""XGBoost predictions."""
import time as _time
t0 = _time.time()
from xgboost import XGBRegressor # type: ignore[import]
xg_max = XGBRegressor(n_estimators=50, max_depth=3, learning_rate=0.1, verbosity=0)
xg_min = XGBRegressor(n_estimators=50, max_depth=3, learning_rate=0.1, verbosity=0)
xg_max.fit(X, y_max)
xg_min.fit(X, y_min)
preds = []
lw_max = np.array(temps_max[-window:]).reshape(1, -1)
lw_min = np.array(temps_min[-window:]).reshape(1, -1)
for day in range(forecast_days):
pm = float(xg_max.predict(lw_max)[0])
pn = float(xg_min.predict(lw_min)[0])
preds.append({
"date": (end_date + timedelta(days=day + 1)).strftime("%Y-%m-%d"),
"day": (end_date + timedelta(days=day + 1)).strftime("%a"),
"predicted_max": round(pm, 1),
"predicted_min": round(pn, 1),
})
lw_max = np.append(lw_max[:, 1:], [[pm]], axis=1)
lw_min = np.append(lw_min[:, 1:], [[pn]], axis=1)
return preds, round((_time.time() - t0) * 1000)
def predict_lgbm(X, y_max, y_min, temps_max, temps_min, end_date, window=7, forecast_days=7):
"""LightGBM predictions."""
import time as _time
t0 = _time.time()
from lightgbm import LGBMRegressor # type: ignore[import]
lg_max = LGBMRegressor(n_estimators=50, max_depth=3, learning_rate=0.1, verbose=-1)
lg_min = LGBMRegressor(n_estimators=50, max_depth=3, learning_rate=0.1, verbose=-1)
lg_max.fit(X, y_max)
lg_min.fit(X, y_min)
preds = []
lw_max = np.array(temps_max[-window:]).reshape(1, -1)
lw_min = np.array(temps_min[-window:]).reshape(1, -1)
for day in range(forecast_days):
pm = float(lg_max.predict(lw_max)[0])
pn = float(lg_min.predict(lw_min)[0])
preds.append({
"date": (end_date + timedelta(days=day + 1)).strftime("%Y-%m-%d"),
"day": (end_date + timedelta(days=day + 1)).strftime("%a"),
"predicted_max": round(pm, 1),
"predicted_min": round(pn, 1),
})
lw_max = np.append(lw_max[:, 1:], [[pm]], axis=1)
lw_min = np.append(lw_min[:, 1:], [[pn]], axis=1)
return preds, round((_time.time() - t0) * 1000)
def predict_lstm(temps_max, temps_min, end_date, window=7, forecast_days=7, epochs=30):
"""LSTM (pure numpy) predictions."""
import time as _time
t0 = _time.time()
all_max = np.array(temps_max)
all_min = np.array(temps_min)
mean_max, std_max = all_max.mean(), all_max.std() + 1e-8
mean_min, std_min = all_min.mean(), all_min.std() + 1e-8
norm_max = (all_max - mean_max) / std_max
norm_min = (all_min - mean_min) / std_min
# Prepare sequences
X_tr_max, y_tr_max = [], []
X_tr_min, y_tr_min = [], []
for i in range(window, len(norm_max)):
X_tr_max.append(norm_max[i - window:i])
y_tr_max.append(norm_max[i])
for i in range(window, len(norm_min)):
X_tr_min.append(norm_min[i - window:i])
y_tr_min.append(norm_min[i])
# Train
lstm_mx = NumpyLSTM(input_size=1, hidden_size=16, lr=0.003)
lstm_mn = NumpyLSTM(input_size=1, hidden_size=16, lr=0.003)
for _ in range(epochs):
for j in range(len(X_tr_max)):
lstm_mx.train_step(np.array(X_tr_max[j]).reshape(-1, 1), y_tr_max[j])
for j in range(len(X_tr_min)):
lstm_mn.train_step(np.array(X_tr_min[j]).reshape(-1, 1), y_tr_min[j])
# Predict
buf_max = norm_max[-window:].tolist()
buf_min = norm_min[-window:].tolist()
preds = []
for day in range(forecast_days):
pm_n = lstm_mx.predict(np.array(buf_max[-window:]).reshape(-1, 1))
pn_n = lstm_mn.predict(np.array(buf_min[-window:]).reshape(-1, 1))
pm = float(pm_n * std_max + mean_max)
pn = float(pn_n * std_min + mean_min)
preds.append({
"date": (end_date + timedelta(days=day + 1)).strftime("%Y-%m-%d"),
"day": (end_date + timedelta(days=day + 1)).strftime("%a"),
"predicted_max": round(pm, 1),
"predicted_min": round(pn, 1),
})
buf_max.append(pm_n)
buf_min.append(pn_n)
return preds, round((_time.time() - t0) * 1000)
# ════════════════════════════════════════════════════════════
# /predict β€” Single model prediction
# ════════════════════════════════════════════════════════════
@app.get("/predict")
def get_predict(model: str = "random_forest", days: int = 7):
"""
ML-based temperature predictions for next N days.
Models: random_forest, xgboost, lstm, lightgbm
"""
try:
td = fetch_training_data()
temps_max, temps_min = td["temps_max"], td["temps_min"]
end_date = td["end_date"]
if len(temps_max) < 14:
return {"error": "Insufficient data for prediction"}
window = 7
X, y_max, y_min = prepare_features(temps_max, temps_min, window)
model_name = model.lower().replace(" ", "_")
if model_name == "random_forest":
predictions, time_ms = predict_rf(X, y_max, y_min, temps_max, temps_min, end_date, window, days)
elif model_name == "xgboost":
predictions, time_ms = predict_xgb(X, y_max, y_min, temps_max, temps_min, end_date, window, days)
elif model_name == "lightgbm":
predictions, time_ms = predict_lgbm(X, y_max, y_min, temps_max, temps_min, end_date, window, days)
elif model_name == "lstm":
predictions, time_ms = predict_lstm(temps_max, temps_min, end_date, window, days)
else:
return {"error": f"Unknown model: {model}. Use: random_forest, xgboost, lstm, lightgbm"}
return {
"model": model_name,
"predictions": predictions,
"training_days": td["training_days"],
"training_time_ms": time_ms,
"location": "Chennai, India",
}
except Exception as e:
return {"error": str(e)}
# ════════════════════════════════════════════════════════════
# /report β€” ENSEMBLE: All 4 models -> averaged final report
# ════════════════════════════════════════════════════════════
@app.get("/report")
def get_report(days: int = 7):
"""
Ensemble prediction: runs all 4 models (Random Forest, XGBoost, LSTM, LightGBM),
then averages predictions into a single unified report with confidence scores.
Like the reference image: multiple streams -> one converged output.
"""
try:
# 1. Fetch data once (shared across all models)
td = fetch_training_data()
temps_max, temps_min = td["temps_max"], td["temps_min"]
end_date = td["end_date"]
if len(temps_max) < 14:
return {"error": "Insufficient data for prediction"}
window = 7
X, y_max, y_min = prepare_features(temps_max, temps_min, window)
# 2. Run all 4 models
models_used = ["random_forest", "xgboost", "lstm", "lightgbm"]
individual_results = {}
all_preds = {} # model -> predictions list
# Random Forest
try:
preds, t_ms = predict_rf(X, y_max, y_min, temps_max, temps_min, end_date, window, days)
individual_results["random_forest"] = {"predictions": preds, "training_time_ms": t_ms, "status": "success"}
all_preds["random_forest"] = preds
except Exception as e:
individual_results["random_forest"] = {"status": "error", "error": str(e)}
# XGBoost
try:
preds, t_ms = predict_xgb(X, y_max, y_min, temps_max, temps_min, end_date, window, days)
individual_results["xgboost"] = {"predictions": preds, "training_time_ms": t_ms, "status": "success"}
all_preds["xgboost"] = preds
except Exception as e:
individual_results["xgboost"] = {"status": "error", "error": str(e)}
# LSTM
try:
preds, t_ms = predict_lstm(temps_max, temps_min, end_date, window, days)
individual_results["lstm"] = {"predictions": preds, "training_time_ms": t_ms, "status": "success"}
all_preds["lstm"] = preds
except Exception as e:
individual_results["lstm"] = {"status": "error", "error": str(e)}
# LightGBM
try:
preds, t_ms = predict_lgbm(X, y_max, y_min, temps_max, temps_min, end_date, window, days)
individual_results["lightgbm"] = {"predictions": preds, "training_time_ms": t_ms, "status": "success"}
all_preds["lightgbm"] = preds
except Exception as e:
individual_results["lightgbm"] = {"status": "error", "error": str(e)}
# 3. Compute ensemble average across all successful models
successful_models = list(all_preds.keys())
n_models = len(successful_models)
if n_models == 0:
return {"error": "All models failed"}
final_predictions = []
total_spread_max = 0
total_spread_min = 0
for day_idx in range(days):
day_maxes = []
day_mins = []
for m in successful_models:
if day_idx < len(all_preds[m]):
day_maxes.append(all_preds[m][day_idx]["predicted_max"])
day_mins.append(all_preds[m][day_idx]["predicted_min"])
if not day_maxes:
continue
avg_max = round(sum(day_maxes) / len(day_maxes), 1)
avg_min = round(sum(day_mins) / len(day_mins), 1)
spread_max = round(max(day_maxes) - min(day_maxes), 1)
spread_min = round(max(day_mins) - min(day_mins), 1)
total_spread_max += spread_max
total_spread_min += spread_min
# Confidence based on model agreement (spread)
avg_spread = (spread_max + spread_min) / 2
if avg_spread < 1.0:
confidence = "high"
elif avg_spread < 2.0:
confidence = "medium"
else:
confidence = "low"
# Get date from first successful model
ref = all_preds[successful_models[0]][day_idx]
# Per-model breakdown for this day
model_breakdown = {}
for m in successful_models:
if day_idx < len(all_preds[m]):
model_breakdown[m] = {
"max": all_preds[m][day_idx]["predicted_max"],
"min": all_preds[m][day_idx]["predicted_min"],
}
final_predictions.append({
"date": ref["date"],
"day": ref["day"],
"predicted_max": avg_max,
"predicted_min": avg_min,
"model_spread_max": spread_max,
"model_spread_min": spread_min,
"confidence": confidence,
"per_model": model_breakdown,
})
# 4. Overall agreement score: 1 - (avg_spread / avg_temp)
avg_temp = sum(p["predicted_max"] for p in final_predictions) / len(final_predictions) if final_predictions else 1
avg_overall_spread = ((total_spread_max + total_spread_min) / 2) / len(final_predictions) if final_predictions else 0
agreement_score = round(max(0, min(1, 1 - (avg_overall_spread / avg_temp))), 3)
if agreement_score > 0.95:
overall_confidence = "very_high"
elif agreement_score > 0.90:
overall_confidence = "high"
elif agreement_score > 0.80:
overall_confidence = "medium"
else:
overall_confidence = "low"
total_time = sum(
r.get("training_time_ms", 0) for r in individual_results.values() if isinstance(r, dict)
)
return {
"query": f"{days}-day weather forecast",
"models_used": successful_models,
"models_failed": [m for m in models_used if m not in successful_models],
"individual_results": individual_results,
"final_report": {
"predictions": final_predictions,
"agreement_score": agreement_score,
"overall_confidence": overall_confidence,
"description": f"Ensemble average of {n_models} models. Agreement: {agreement_score:.1%}. Confidence: {overall_confidence}.",
},
"training_data": {
"days": td["training_days"],
"location": "Chennai, India",
"total_compute_ms": total_time,
},
}
except Exception as e:
return {"error": str(e)}
# ════════════════════════════════
# /earthquakes β€” Recent quakes from USGS
# ════════════════════════════════
@app.get("/earthquakes")
def get_earthquakes(min_magnitude: float = 4.5, days: int = 30):
"""Recent earthquakes from USGS."""
end_date = datetime.utcnow()
start_date = end_date - timedelta(days=days)
url = "https://earthquake.usgs.gov/fdsnws/event/1/query"
params = {
"format": "geojson",
"starttime": start_date.strftime("%Y-%m-%d"),
"endtime": end_date.strftime("%Y-%m-%d"),
"minmagnitude": min_magnitude,
"orderby": "time",
"limit": 1000,
}
try:
r = requests.get(url, params=params, timeout=15)
r.raise_for_status()
data = r.json()
features = data.get("features", [])
events = []
for f in features:
props = f.get("properties", {})
coords = f.get("geometry", {}).get("coordinates", [0, 0, 0])
time_ms = props.get("time", 0)
event_time = datetime.utcfromtimestamp(time_ms / 1000).isoformat() if time_ms else None
events.append({
"time": event_time,
"magnitude": props.get("mag", 0),
"place": props.get("place", "Unknown"),
"longitude": coords[0] if len(coords) > 0 else 0,
"latitude": coords[1] if len(coords) > 1 else 0,
"depth_km": coords[2] if len(coords) > 2 else 0,
"tsunami": props.get("tsunami", 0),
"significance": props.get("sig", 0),
})
magnitudes = [float(e["magnitude"]) for e in events if e["magnitude"]]
depths = [float(e["depth_km"]) for e in events if e["depth_km"]]
return {
"events": events,
"summary": {
"total": len(events),
"max_magnitude": max(magnitudes) if magnitudes else 0,
"avg_depth": round(float(sum(depths)) / len(depths), 1) if depths else 0.0,
"m6_plus": len([m for m in magnitudes if m >= 6.0]),
"tsunami_alerts": sum(1 for e in events if e["tsunami"]),
},
}
except Exception as e:
return {"error": str(e)}
# ════════════════════════════════
# /cyclones β€” Historical Bay of Bengal cyclones
# ════════════════════════════════
@app.get("/cyclones")
def get_cyclones(year: int = None, name: str = None, min_wind: int = None):
"""Historical cyclone data for Chennai/Bay of Bengal (IBTrACS format compatible)."""
# Base cyclone data (simulating IBTrACS format for tracks)
cyclones = [
{"name": "Cyclone Michaung", "year": 2023, "category": "Severe Cyclonic Storm", "max_wind_kmh": 100, "rainfall_mm": 450, "damage_crore": 8000, "dates": "Dec 1-5, 2023", "landfall": "Near Bapatla, AP", "impact": "Record 240mm rainfall, severe flooding, 17 deaths",
"track": [
{"lat":10.5,"lon":83, "wind_speed": 55, "pressure": 1002, "time": "2023-12-01T00:00:00Z"},
{"lat":11,"lon":82.5, "wind_speed": 75, "pressure": 996, "time": "2023-12-02T00:00:00Z"},
{"lat":12,"lon":81.5, "wind_speed": 90, "pressure": 988, "time": "2023-12-03T00:00:00Z"},
{"lat":13,"lon":80.8, "wind_speed": 100, "pressure": 982, "time": "2023-12-04T00:00:00Z"},
{"lat":14,"lon":80.5, "wind_speed": 85, "pressure": 990, "time": "2023-12-05T00:00:00Z"},
{"lat":15.5,"lon":80.2, "wind_speed": 50, "pressure": 1000, "time": "2023-12-06T00:00:00Z"}
]},
{"name": "Cyclone Mandous", "year": 2022, "category": "Cyclonic Storm", "max_wind_kmh": 85, "rainfall_mm": 180, "damage_crore": 1500, "dates": "Dec 6-12, 2022", "landfall": "Near Mahabalipuram, TN", "impact": "Heavy rainfall, power outages",
"track": [
{"lat":9,"lon":85, "wind_speed": 45, "pressure": 1004, "time": "2022-12-06T00:00:00Z"},
{"lat":10,"lon":84, "wind_speed": 60, "pressure": 998, "time": "2022-12-07T00:00:00Z"},
{"lat":11,"lon":83, "wind_speed": 75, "pressure": 992, "time": "2022-12-08T00:00:00Z"},
{"lat":12,"lon":81.5, "wind_speed": 85, "pressure": 988, "time": "2022-12-09T00:00:00Z"},
{"lat":12.5,"lon":80.5, "wind_speed": 65, "pressure": 996, "time": "2022-12-10T00:00:00Z"}
]},
{"name": "Cyclone Nivar", "year": 2020, "category": "Very Severe", "max_wind_kmh": 130, "rainfall_mm": 350, "damage_crore": 3000, "dates": "Nov 23-27, 2020", "landfall": "Near Puducherry", "impact": "200mm+ rainfall, 12 deaths, airport closed",
"track": [
{"lat":8.5,"lon":86, "wind_speed": 60, "pressure": 1000, "time": "2020-11-23T00:00:00Z"},
{"lat":9.5,"lon":84.5, "wind_speed": 90, "pressure": 992, "time": "2020-11-24T00:00:00Z"},
{"lat":10.5,"lon":83, "wind_speed": 115, "pressure": 980, "time": "2020-11-25T00:00:00Z"},
{"lat":11.5,"lon":81.5, "wind_speed": 130, "pressure": 974, "time": "2020-11-26T00:00:00Z"},
{"lat":12,"lon":80.5, "wind_speed": 95, "pressure": 986, "time": "2020-11-27T00:00:00Z"}
]},
{"name": "Cyclone Gaja", "year": 2018, "category": "Severe Cyclonic Storm", "max_wind_kmh": 120, "rainfall_mm": 200, "damage_crore": 15000, "dates": "Nov 11-19, 2018", "landfall": "Nagapattinam-Vedaranyam", "impact": "Schools closed, flights disrupted",
"track": [
{"lat":8,"lon":87, "wind_speed": 55, "pressure": 1002, "time": "2018-11-11T00:00:00Z"},
{"lat":9,"lon":85.5, "wind_speed": 75, "pressure": 996, "time": "2018-11-13T00:00:00Z"},
{"lat":10,"lon":83.5, "wind_speed": 100, "pressure": 986, "time": "2018-11-15T00:00:00Z"},
{"lat":10.5,"lon":82, "wind_speed": 120, "pressure": 978, "time": "2018-11-16T00:00:00Z"},
{"lat":10.8,"lon":80.5, "wind_speed": 85, "pressure": 992, "time": "2018-11-17T00:00:00Z"}
]},
{"name": "Cyclone Vardah", "year": 2016, "category": "Very Severe", "max_wind_kmh": 140, "rainfall_mm": 150, "damage_crore": 5000, "dates": "Dec 6-13, 2016", "landfall": "Near Chennai", "impact": "Direct hit, 130km/h winds, 18 deaths, power out 3 days",
"track": [
{"lat":8,"lon":89, "wind_speed": 65, "pressure": 1000, "time": "2016-12-07T00:00:00Z"},
{"lat":9.5,"lon":87, "wind_speed": 90, "pressure": 990, "time": "2016-12-09T00:00:00Z"},
{"lat":11,"lon":85, "wind_speed": 115, "pressure": 982, "time": "2016-12-10T00:00:00Z"},
{"lat":12,"lon":83, "wind_speed": 130, "pressure": 976, "time": "2016-12-11T00:00:00Z"},
{"lat":13,"lon":81, "wind_speed": 140, "pressure": 970, "time": "2016-12-12T00:00:00Z"},
{"lat":13.1,"lon":80.3, "wind_speed": 95, "pressure": 988, "time": "2016-12-13T00:00:00Z"}
]},
{"name": "Cyclone Thane", "year": 2011, "category": "Very Severe", "max_wind_kmh": 140, "rainfall_mm": 120, "damage_crore": 2200, "dates": "Dec 25-31, 2011", "landfall": "Near Cuddalore", "impact": "Heavy rains, 48 deaths total",
"track": [
{"lat":8.5,"lon":88, "wind_speed": 55, "pressure": 1004, "time": "2011-12-25T00:00:00Z"},
{"lat":9.5,"lon":86, "wind_speed": 75, "pressure": 996, "time": "2011-12-27T00:00:00Z"},
{"lat":10.5,"lon":84, "wind_speed": 110, "pressure": 984, "time": "2011-12-28T00:00:00Z"},
{"lat":11.5,"lon":82, "wind_speed": 140, "pressure": 972, "time": "2011-12-29T00:00:00Z"},
{"lat":11.8,"lon":80, "wind_speed": 100, "pressure": 988, "time": "2011-12-30T00:00:00Z"}
]},
{"name": "Cyclone Nisha", "year": 2008, "category": "Cyclonic Storm", "max_wind_kmh": 75, "rainfall_mm": 500, "damage_crore": 4500, "dates": "Nov 25-27, 2008", "landfall": "Near Karaikal", "impact": "500mm in 48hrs, worst flooding in decades",
"track": [
{"lat":8,"lon":84, "wind_speed": 45, "pressure": 1006, "time": "2008-11-25T00:00:00Z"},
{"lat":9,"lon":82.5, "wind_speed": 60, "pressure": 998, "time": "2008-11-26T00:00:00Z"},
{"lat":10,"lon":81, "wind_speed": 75, "pressure": 992, "time": "2008-11-27T00:00:00Z"},
{"lat":10.5,"lon":80, "wind_speed": 55, "pressure": 1000, "time": "2008-11-28T00:00:00Z"}
]},
]
# Filter processing
if year is not None:
cyclones = [c for c in cyclones if c["year"] == year]
if name is not None:
n_lower = name.lower()
cyclones = [c for c in cyclones if n_lower in c["name"].lower()]
if min_wind is not None:
cyclones = [c for c in cyclones if c["max_wind_kmh"] >= min_wind]
avg_wind = sum(c["max_wind_kmh"] for c in cyclones) / len(cyclones) if cyclones else 0
return {
"cyclones": cyclones,
"summary": {
"total": len(cyclones),
"avg_wind": round(avg_wind) if avg_wind else 0,
"max_rainfall": max((c["rainfall_mm"] for c in cyclones), default=0),
"total_damage": sum(c["damage_crore"] for c in cyclones),
"period": f"{min((c['year'] for c in cyclones), default=0)}-{max((c['year'] for c in cyclones), default=0)}",
}
}
# ════════════════════════════════
# /tsunamis β€” Historical Indian Ocean tsunamis
# ════════════════════════════════
@app.get("/tsunamis")
def get_tsunamis():
"""Historical tsunami events in the Indian Ocean."""
events = [
{"name": "Indian Ocean Tsunami", "date": "2004-12-26", "origin": "Off Sumatra", "lat": 3.316, "lon": 95.854, "magnitude": 9.1, "wave_height_m": 30.0, "fatalities": 227898, "description": "Deadliest tsunami. 9.1 earthquake triggered waves across Indian Ocean."},
{"name": "Krakatoa Tsunami", "date": "1883-08-27", "origin": "Krakatoa, Sunda Strait", "lat": -6.102, "lon": 105.423, "magnitude": 0, "wave_height_m": 37.0, "fatalities": 36417, "description": "Volcanic eruption generated 37m waves."},
{"name": "Makran Coast Tsunami", "date": "1945-11-28", "origin": "Makran Coast, Pakistan", "lat": 24.5, "lon": 63.0, "magnitude": 8.1, "wave_height_m": 13.0, "fatalities": 4000, "description": "Major tsunami from Makran subduction zone."},
{"name": "Andaman Tsunami", "date": "1941-06-26", "origin": "Andaman Islands", "lat": 12.5, "lon": 92.5, "magnitude": 7.7, "wave_height_m": 1.5, "fatalities": 5000, "description": "Local tsunami affecting Andaman coastal communities."},
{"name": "Sumatra Aftershock", "date": "2005-03-28", "origin": "Off Sumatra", "lat": 2.074, "lon": 97.013, "magnitude": 8.6, "wave_height_m": 3.0, "fatalities": 1313, "description": "Aftershock of 2004 event, tsunami warning across Indian Ocean."},
{"name": "Sulawesi Tsunami", "date": "2018-09-28", "origin": "Sulawesi, Indonesia", "lat": -0.178, "lon": 119.84, "magnitude": 7.5, "wave_height_m": 11.0, "fatalities": 4340, "description": "11m waves struck Palu city."},
{"name": "Anak Krakatau", "date": "2018-12-22", "origin": "Anak Krakatau volcano", "lat": -6.102, "lon": 105.423, "magnitude": 0, "wave_height_m": 5.0, "fatalities": 437, "description": "Volcanic flank collapse generated unexpected tsunami."},
{"name": "Great Assam Earthquake", "date": "1950-08-15", "origin": "Assam-Tibet border", "lat": 28.5, "lon": 96.5, "magnitude": 8.6, "wave_height_m": 2.0, "fatalities": 1526, "description": "Massive flooding and river surges across Northeast India."},
]
total_fatalities = sum(e["fatalities"] for e in events)
return {
"events": events,
"summary": {
"total": len(events),
"max_wave": max(e["wave_height_m"] for e in events),
"total_fatalities": total_fatalities,
"period": "1883-2018",
}
}
# ════════════════════════════════
# /temperature-map β€” Global temperature grid for heatmap
# ════════════════════════════════
# Cache the temperature map so it's only computed once per server start
_temp_map_cache = None
_temp_map_timestamp = None
@app.get("/temperature-map")
def get_temperature_map():
"""High-fidelity temperature grid with land-masking and realistic climate simulation."""
global _temp_map_cache, _temp_map_timestamp
import random
import math
from fastapi.responses import JSONResponse
# Return cached version if less than 1 hour old
if _temp_map_cache and _temp_map_timestamp:
age = (datetime.now() - _temp_map_timestamp).total_seconds()
if age < 3600:
return JSONResponse(
content=_temp_map_cache,
headers={"Access-Control-Allow-Origin": "*"}
)
try:
# STEP = 2 gives ~6000 land points β€” dense enough for seamless dot-grid
STEP = 2
all_points = []
month = datetime.now().month
def is_land(lat, lon):
"""Accurate land mask using granular continental bounding boxes for smoother coastlines."""
if lat > 83 or lat < -60: return False
# North America (More granular)
if 60 < lat < 83 and -141 < lon < -52: return True # Canada North
if 15 < lat < 60 and -130 < lon < -55: return True # US/Canada/Mexico
if 7 < lat < 15 and -83 < lon < -77: return True # Central America
# South America (Tapered)
if -15 < lat < 13 and -81 < lon < -35: return True # North SA
if -35 < lat < -15 and -75 < lon < -40: return True # Mid SA
if -56 < lat < -35 and -75 < lon < -65: return True # South SA
# Africa (Split for Gulf of Guinea)
if 15 < lat < 37 and -18 < lon < 50: return True # North Africa (Sahara)
if 4 < lat < 15 and -18 < lon < 52: return True # West/Central North (Above Equator)
if -35 < lat < 4 and 9 < lon < 52: return True # Central/South/East (Below Equator + East)
if -25 < lat < -12 and 43 < lon < 51: return True # Madagascar
# Europe (More precise)
if 36 < lat < 72 and -10 < lon < 45: return True
if 55 < lat < 72 and 5 < lon < 32: return True # Scandinavia
if 63 < lat < 67 and -25 < lon < -13: return True # Iceland
# Eurasia (Russia/Asia)
if 15 < lat < 75 and 45 < lon < 180: return True # Main Eurasia
if 5 < lat < 35 and 60 < lon < 100: return True # India/South Asia
if -10 < lat < 25 and 95 < lon < 150: return True # SE Asia islands
# Australia & NZ
if -40 < lat < -10 and 113 < lon < 154: return True # Australia
if -48 < lat < -34 and 165 < lon < 179: return True # New Zealand
# Greenland
if 60 < lat < 84 and -60 < lon < -15: return True
return False
# UK/Ireland
if 49 < lat < 61 and -11 < lon < 2: return True
return False
for lat in range(-56, 73, STEP):
# Seasonal temperature peak shifts with month
peak_lat = 12 * math.sin(math.radians((month - 3) * 30))
base_temp = 30 - abs(lat - peak_lat) * 0.58
for lon in range(-180, 180, STEP):
if not is_land(lat, lon):
continue
# Desert heat boost
desert = 0
if 15 < lat < 35 and -10 < lon < 60: desert = 8 # Sahara/Arabia
elif 20 < lat < 40 and 40 < lon < 80: desert = 6 # Iran/Pakistan
elif -35 < lat < -15 and 115 < lon < 140: desert = 7 # Australia outback
elif 35 < lat < 50 and 60 < lon < 115: desert = 4 # Central Asia steppe
# Mountain cooling
mtn = 0
if 25 < lat < 45 and 65 < lon < 105: mtn = -10 # Himalayas
elif -35 < lat < 5 and -80 < lon < -65: mtn = -8 # Andes
elif 35 < lat < 50 and -125 < lon < -105: mtn = -6 # Rockies
elif 44 < lat < 48 and 5 < lon < 15: mtn = -7 # Alps
elif 10 < lat < 20 and 35 < lon < 42: mtn = -5 # Ethiopian highlands
# Tropical rainforest cooling
jungle = 0
if -15 < lat < 5 and -75 < lon < -45: jungle = -3 # Amazon
if -5 < lat < 5 and 12 < lon < 30: jungle = -2 # Congo
# Seasonal continental effect β€” interiors more extreme
continental = 0
if 45 < lat < 65 and 40 < lon < 130: continental = -6 * math.sin(math.radians((month - 7) * 30))
noise = random.uniform(-1.8, 1.8)
temp = base_temp + desert + mtn + jungle + continental + noise
temp = max(-42, min(52, round(temp, 1)))
all_points.append({"lat": lat, "lon": lon, "temp_c": temp})
result = {
"points": all_points,
"count": len(all_points),
"timestamp": datetime.now().isoformat(),
"grid_step": STEP,
"month": month,
"status": "climate_model_v2"
}
# Cache the result
_temp_map_cache = result
_temp_map_timestamp = datetime.now()
return JSONResponse(
content=result,
headers={"Access-Control-Allow-Origin": "*"}
)
except Exception as e:
logger.error(f"Temperature map error: {str(e)}")
# Ultimate fallback with minimal points to ensure visuals never "die"
fallback_res = {
"points": [{"lat": 13, "lon": 80, "temp_c": 30}],
"count": 1,
"error": str(e)
}
return JSONResponse(
content=fallback_res,
headers={"Access-Control-Allow-Origin": "*"}
)
# ════════════════════════════════════════════════════════════
# /aqi β€” Air Quality Index for Chennai (OpenAQ)
# ════════════════════════════════════════════════════════════
@app.get("/aqi")
def get_aqi():
"""Fetch real AQI data for Chennai from Open-Meteo air quality API."""
cached = _get_cache("aqi", 300)
if cached: return cached
url = "https://air-quality-api.open-meteo.com/v1/air-quality"
params = {
"latitude": LAT,
"longitude": LON,
"current": "pm10,pm2_5,carbon_monoxide,nitrogen_dioxide,ozone,european_aqi",
"timezone": "Asia/Kolkata",
}
try:
r = requests.get(url, params=params, timeout=10)
r.raise_for_status()
data = r.json()
current = data.get("current", {})
aqi = current.get("european_aqi", 0)
# AQI category classification
if aqi <= 20:
category = "Good"
color = "#22c55e"
advice = "Air quality is excellent. Perfect for outdoor activities."
elif aqi <= 40:
category = "Fair"
color = "#84cc16"
advice = "Air quality is acceptable. Sensitive groups should take care."
elif aqi <= 60:
category = "Moderate"
color = "#eab308"
advice = "Moderate pollution. Limit prolonged outdoor exertion."
elif aqi <= 80:
category = "Poor"
color = "#f97316"
advice = "Poor air quality. Avoid outdoor activities if possible."
elif aqi <= 100:
category = "Very Poor"
color = "#ef4444"
advice = "Very poor air quality. Stay indoors and wear a mask outside."
else:
category = "Extremely Poor"
color = "#7c3aed"
advice = "Hazardous conditions. Avoid all outdoor activities."
return {
"aqi": aqi,
"category": category,
"color": color,
"advice": advice,
"pm2_5": current.get("pm2_5"),
"pm10": current.get("pm10"),
"nitrogen_dioxide": current.get("nitrogen_dioxide"),
"ozone": current.get("ozone"),
"carbon_monoxide": current.get("carbon_monoxide"),
}
except Exception as e:
return {"error": str(e)}
# ════════════════════════════════════════════════════════════
# /flood-risk β€” Flood Risk Score for Chennai
# ════════════════════════════════════════════════════════════
@app.get("/flood-risk")
def get_flood_risk():
"""Calculate flood risk score for Chennai based on rainfall, humidity, and forecast."""
cached = _get_cache("flood_risk", 300)
if cached: return cached
try:
# Fetch current weather
weather_url = "https://api.open-meteo.com/v1/forecast"
weather_params = {
"latitude": LAT, "longitude": LON,
"current": "precipitation,relative_humidity_2m,rain",
"daily": "precipitation_sum,precipitation_probability_max",
"forecast_days": 3,
"timezone": "Asia/Kolkata",
}
r = requests.get(weather_url, params=weather_params, timeout=10)
r.raise_for_status()
data = r.json()
current = data.get("current", {})
daily = data.get("daily", {})
# Flood risk factors
current_rain = current.get("rain", 0) or 0
current_precip = current.get("precipitation", 0) or 0
humidity = current.get("relative_humidity_2m", 0) or 0
precip_sums = daily.get("precipitation_sum", [0, 0, 0])
precip_probs = daily.get("precipitation_probability_max", [0, 0, 0])
total_forecast_rain = sum(p for p in precip_sums if p)
max_prob = max(p for p in precip_probs if p) if precip_probs else 0
# Score calculation (0-100)
score = 0
score += min(current_rain * 5, 25) # current rain (max 25pts)
score += min(humidity * 0.2, 15) # humidity (max 15pts)
score += min(total_forecast_rain * 2, 30) # 3-day forecast rain (max 30pts)
score += min(max_prob * 0.3, 30) # precipitation probability (max 30pts)
# Chennai elevation factor β€” low lying city, higher base risk
score = min(score * 1.15, 100)
score = round(score)
# Risk level
if score <= 20:
level = "Very Low"
color = "#22c55e"
advice = "No flood risk. Normal conditions."
icon = "🟒"
elif score <= 40:
level = "Low"
color = "#84cc16"
advice = "Minor risk. Monitor rainfall forecasts."
icon = "🟑"
elif score <= 60:
level = "Moderate"
color = "#eab308"
advice = "Moderate risk. Avoid low-lying areas during heavy rain."
icon = "🟠"
elif score <= 80:
level = "High"
color = "#f97316"
advice = "High flood risk. Stay alert. Avoid underpasses and flood-prone zones."
icon = "πŸ”΄"
else:
level = "Extreme"
color = "#ef4444"
advice = "Extreme flood risk! Stay indoors. Avoid all travel if possible."
icon = "🚨"
return {
"score": score,
"level": level,
"color": color,
"advice": advice,
"icon": icon,
"factors": {
"current_rainfall_mm": round(current_rain, 1),
"humidity_pct": humidity,
"forecast_3day_mm": round(total_forecast_rain, 1),
"max_precip_probability": max_prob,
},
"chennai_note": "Chennai is low-lying (6m ASL) with historically high flood vulnerability",
}
except Exception as e:
return {"error": str(e)}
# ════════════════════════════════════════════════════════════
# /seasonal β€” Seasonal Comparison for current month
# ════════════════════════════════════════════════════════════
@app.get("/seasonal")
def get_seasonal():
"""Compare current month's weather against historical averages (last 5 years)."""
try:
now = datetime.now()
current_month = now.month
current_year = now.year
# Fetch historical data for the same month over last 5 years
yearly_data = []
for year_offset in range(1, 6):
year = current_year - year_offset
month_start = datetime(year, current_month, 1)
# Last day of month
if current_month == 12:
month_end = datetime(year, 12, 31)
else:
month_end = datetime(year, current_month + 1, 1) - timedelta(days=1)
# Don't fetch future dates
archive_limit = datetime.now() - timedelta(days=7)
if month_end > archive_limit:
month_end = archive_limit
if month_start >= month_end:
continue
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
"latitude": LAT, "longitude": LON,
"start_date": month_start.strftime("%Y-%m-%d"),
"end_date": month_end.strftime("%Y-%m-%d"),
"daily": "temperature_2m_max,temperature_2m_min,precipitation_sum",
"timezone": "Asia/Kolkata",
}
try:
r = requests.get(url, params=params, timeout=15)
r.raise_for_status()
d = r.json().get("daily", {})
temps_max = [t for t in d.get("temperature_2m_max", []) if t is not None]
temps_min = [t for t in d.get("temperature_2m_min", []) if t is not None]
precip = [p for p in d.get("precipitation_sum", []) if p is not None]
if temps_max:
yearly_data.append({
"year": year,
"avg_max": round(sum(temps_max) / len(temps_max), 1),
"avg_min": round(sum(temps_min) / len(temps_min), 1) if temps_min else None,
"total_precip": round(sum(precip), 1) if precip else 0,
})
except Exception:
continue
if not yearly_data:
return {"error": "Could not fetch historical data"}
# Calculate 5-year averages
avg_max = round(sum(y["avg_max"] for y in yearly_data) / len(yearly_data), 1)
avg_min = round(sum(y["avg_min"] for y in yearly_data if y["avg_min"]) / len(yearly_data), 1)
avg_precip = round(sum(y["total_precip"] for y in yearly_data) / len(yearly_data), 1)
# Fetch current month so far
month_start_this_year = datetime(current_year, current_month, 1)
current_month_end = min(now - timedelta(days=7), now)
current_data = {"avg_max": None, "avg_min": None, "total_precip": None}
if month_start_this_year < current_month_end:
try:
r = requests.get("https://archive-api.open-meteo.com/v1/archive", params={
"latitude": LAT, "longitude": LON,
"start_date": month_start_this_year.strftime("%Y-%m-%d"),
"end_date": (now - timedelta(days=7)).strftime("%Y-%m-%d"),
"daily": "temperature_2m_max,temperature_2m_min,precipitation_sum",
"timezone": "Asia/Kolkata",
}, timeout=15)
r.raise_for_status()
d = r.json().get("daily", {})
tm = [t for t in d.get("temperature_2m_max", []) if t is not None]
tn = [t for t in d.get("temperature_2m_min", []) if t is not None]
pr = [p for p in d.get("precipitation_sum", []) if p is not None]
if tm:
current_data = {
"avg_max": round(sum(tm) / len(tm), 1),
"avg_min": round(sum(tn) / len(tn), 1) if tn else None,
"total_precip": round(sum(pr), 1) if pr else 0,
}
except Exception:
pass
month_name = now.strftime("%B")
return {
"month": month_name,
"year": current_year,
"current_month": current_data,
"historical_avg": {
"avg_max": avg_max,
"avg_min": avg_min,
"avg_precip": avg_precip,
"based_on_years": len(yearly_data),
},
"yearly_breakdown": yearly_data,
"comparison": {
"temp_diff": round(current_data["avg_max"] - avg_max, 1) if current_data["avg_max"] else None,
"precip_diff": round(current_data["total_precip"] - avg_precip, 1) if current_data["total_precip"] is not None else None,
"is_hotter": current_data["avg_max"] > avg_max if current_data["avg_max"] else None,
"is_wetter": current_data["total_precip"] > avg_precip if current_data["total_precip"] is not None else None,
}
}
except Exception as e:
return {"error": str(e)}
# ════════════════════════════════════════════════════════════
# /ask β€” INTELLIGENT QUERY ENGINE v2
# Understands dates, fetches precise data, focused answers.
# ════════════════════════════════════════════════════════════
import re as _re
MONTH_MAP = {
"jan": 1, "january": 1, "feb": 2, "february": 2, "mar": 3, "march": 3,
"apr": 4, "april": 4, "may": 5, "jun": 6, "june": 6,
"jul": 7, "july": 7, "aug": 8, "august": 8, "sep": 9, "september": 9,
"oct": 10, "october": 10, "nov": 11, "november": 11, "dec": 12, "december": 12,
}
def parse_date_from_query(query: str):
"""
Extract a specific date from a natural language query.
Supports:
- '16 feb 2025', 'february 16, 2025', 'on Jan 10 2024'
- '2025-02-16' (ISO), '16/02/2025' (DD/MM/YYYY)
- 'yesterday', 'today', 'tomorrow'
- 'last week', 'last month', 'last year'
- '5 days ago', '3 weeks ago', '2 months ago', '1 year ago'
- 'month YYYY' (e.g., 'march 2024' β†’ March 1, 2024)
- Bare year 'YYYY' (e.g., '2024' β†’ Jan 1, 2024)
Returns (datetime, date_type) or (None, None).
date_type: 'specific_past', 'today', 'specific_future', 'relative_past', 'relative_future'
"""
q = query.lower().strip()
now = datetime.now()
def classify(dt):
if dt.date() < now.date():
return "specific_past"
elif dt.date() == now.date():
return "today"
else:
return "specific_future"
# ── Relative keywords ─────────────────────────────
# Implement conversation context memory rules
# "same date last year" / "this day last year" / "today vs last year"
if any(p in q for p in ["same date", "same day", "this day", "today vs", "today versus"]):
offset_years = 1 # default: 1 year back
m = _re.search(r'(\d+)\s+years?\s+ago', q)
if m:
offset_years = int(m.group(1))
elif "last year" in q or "previous year" in q:
offset_years = 1
try:
dt = now.replace(year=now.year - offset_years)
except ValueError: # Feb 29 edge case
dt = now.replace(year=now.year - offset_years, day=28)
return dt, "relative_past"
if "yesterday" in q:
dt = now - timedelta(days=1)
return dt, "relative_past"
if "today" in q or "right now" in q or "current" in q:
return now, "today"
if "tomorrow" in q:
dt = now + timedelta(days=1)
return dt, "relative_future"
# "N days/weeks/months/years ago"
m = _re.search(r'(\d+)\s*(day|days|week|weeks|month|months|year|years)\s+ago', q)
if m:
n, unit = int(m.group(1)), m.group(2)
if "day" in unit:
dt = now - timedelta(days=n)
elif "week" in unit:
dt = now - timedelta(weeks=n)
elif "month" in unit:
dt = now - timedelta(days=n * 30)
elif "year" in unit:
try:
dt = now.replace(year=now.year - n)
except ValueError:
dt = now.replace(year=now.year - n, day=28)
return dt, "relative_past"
# "last week/month/year"
if "last week" in q:
dt = now - timedelta(days=7)
return dt, "relative_past"
if "last month" in q:
dt = now - timedelta(days=30)
return dt, "relative_past"
if "last year" in q:
# Preserve exact month/day β€” just subtract 1 year
try:
dt = now.replace(year=now.year - 1)
except ValueError:
dt = now.replace(year=now.year - 1, day=28)
return dt, "relative_past"
# "next week/month"
if "next week" in q:
dt = now + timedelta(days=7)
return dt, "relative_future"
if "next month" in q:
dt = now + timedelta(days=30)
return dt, "relative_future"
# ── Explicit date patterns ────────────────────────
# Pattern: "DD month YYYY" (e.g., "16 feb 2025", "on 10 jan 2024")
m = _re.search(r'(\d{1,2})\s+(jan|january|feb|february|mar|march|apr|april|may|jun|june|jul|july|aug|august|sep|september|oct|october|nov|november|dec|december)\s*,?\s*(\d{4})', q)
if m:
day, month_str, year = int(m.group(1)), m.group(2), int(m.group(3))
month = MONTH_MAP.get(month_str)
if month:
try:
dt = datetime(year, month, day)
return dt, classify(dt)
except ValueError:
pass
# Pattern: "month DD YYYY" (e.g., "february 16, 2025", "jan 10 2024")
m = _re.search(r'(jan|january|feb|february|mar|march|apr|april|may|jun|june|jul|july|aug|august|sep|september|oct|october|nov|november|dec|december)\s+(\d{1,2})\s*,?\s*(\d{4})', q)
if m:
month_str, day, year = m.group(1), int(m.group(2)), int(m.group(3))
month = MONTH_MAP.get(month_str)
if month:
try:
dt = datetime(year, month, day)
return dt, classify(dt)
except ValueError:
pass
# Pattern: "YYYY-MM-DD" (ISO format)
m = _re.search(r'(\d{4})-(\d{2})-(\d{2})', q)
if m:
try:
dt = datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)))
last_date = dt
return dt, classify(dt)
except ValueError:
pass
# Pattern: "DD/MM/YYYY" or "DD-MM-YYYY" (common Indian format)
m = _re.search(r'(\d{1,2})[/\-](\d{1,2})[/\-](\d{4})', q)
if m:
a, b, year = int(m.group(1)), int(m.group(2)), int(m.group(3))
# Try DD/MM/YYYY first (India)
try:
dt = datetime(year, b, a)
last_date = dt
return dt, classify(dt)
except ValueError:
try:
dt = datetime(year, a, b)
return dt, classify(dt)
except ValueError:
pass
# Pattern: "month YYYY" (e.g., "march 2024" β†’ defaults to 1st of month)
m = _re.search(r'(jan|january|feb|february|mar|march|apr|april|may|jun|june|jul|july|aug|august|sep|september|oct|october|nov|november|dec|december)\s+(\d{4})', q)
if m:
month_str, year = m.group(1), int(m.group(2))
month = MONTH_MAP.get(month_str)
if month:
try:
dt = datetime(year, month, 1)
return dt, classify(dt)
except ValueError:
pass
# Pattern: bare "YYYY" β€” just a year like "2024" or "in 2023"
# Must be 4 digits, between 1900-2100, not part of a longer number/date
m = _re.search(r'(?<!\d)(?<!\d[-/])(19\d{2}|20\d{2})(?![-/]\d)(?!\d)', q)
if m:
year = int(m.group(1))
# Don't match the current year as a specific date (it's ambiguous)
if year != now.year:
dt = datetime(year, 1, 1)
last_date = dt
return dt, classify(dt)
return None, None
def parse_days_from_query(query: str, default: int = 7) -> int:
"""Extract number of forecast days from query. Ignores 'N days ago' patterns."""
q = query.lower()
# Don't match "N days ago" β€” that's handled by date parsing
m = _re.search(r'(\d+)\s*day(?:s)?(?!\s+ago)', q)
return int(m.group(1)) if m else default
def fetch_historical_weather(target_date: datetime, days_range: int = 1):
"""
Fetch actual historical weather data from Open-Meteo Archive API
for a specific date or date range.
"""
start = target_date
end = target_date + timedelta(days=days_range - 1)
# Archive API lags ~5-7 days, check if date is available
archive_limit = datetime.now() - timedelta(days=5)
if end.date() > archive_limit.date():
return {"error": f"Archive data not yet available for {end.strftime('%Y-%m-%d')}. Data lags 5-7 days."}
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
"latitude": LAT, "longitude": LON,
"start_date": start.strftime("%Y-%m-%d"),
"end_date": end.strftime("%Y-%m-%d"),
"daily": "temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max,wind_direction_10m_dominant",
"hourly": "temperature_2m,relative_humidity_2m,wind_speed_10m,cloud_cover,precipitation",
"timezone": "Asia/Kolkata",
}
try:
r = requests.get(url, params=params, timeout=15)
r.raise_for_status()
data = r.json()
daily = data.get("daily", {})
hourly = data.get("hourly", {})
days_data = []
for i, date_str in enumerate(daily.get("time", [])):
dt = datetime.strptime(date_str, "%Y-%m-%d")
days_data.append({
"date": date_str,
"day": dt.strftime("%A"),
"temp_max": daily.get("temperature_2m_max", [None])[i],
"temp_min": daily.get("temperature_2m_min", [None])[i],
"precipitation": daily.get("precipitation_sum", [0])[i],
"wind_speed_max": daily.get("wind_speed_10m_max", [0])[i],
})
# Extract hourly for the target date
hourly_data = []
for i, t in enumerate(hourly.get("time", [])):
hourly_data.append({
"time": t,
"temperature": hourly.get("temperature_2m", [None])[i] if i < len(hourly.get("temperature_2m", [])) else None,
"humidity": hourly.get("relative_humidity_2m", [None])[i] if i < len(hourly.get("relative_humidity_2m", [])) else None,
"wind_speed": hourly.get("wind_speed_10m", [None])[i] if i < len(hourly.get("wind_speed_10m", [])) else None,
"cloud_cover": hourly.get("cloud_cover", [None])[i] if i < len(hourly.get("cloud_cover", [])) else None,
"precipitation": hourly.get("precipitation", [0])[i] if i < len(hourly.get("precipitation", [])) else 0,
})
return {"daily": days_data, "hourly": hourly_data, "source": "Open-Meteo Archive API"}
except Exception as e:
return {"error": str(e)}
def classify_query(query: str):
"""
Classify query into granular intent categories.
Uses sub-intents to distinguish data retrieval from prediction.
Returns list of intents from:
weather_current, weather_history, prediction,
cyclone_history, cyclone_prediction,
earthquake, tsunami, disaster
"""
q = query.lower().strip()
intents = []
# ── Detect time orientation (past vs future) ──
past_kw = ["last year", "previous", "history", "historical", "ago", "past",
"same date", "same day", "this day", "yesterday", "back in",
"was", "were", "happened", "occurred", "hit", "struck", "recent"]
future_kw = ["predict", "prediction", "next", "forecast", "tomorrow",
"coming", "upcoming", "expect", "will", "probability",
"chance", "future", "model", "ml", "ai"]
is_past = any(k in q for k in past_kw)
is_future = any(k in q for k in future_kw)
# ── Weather ──
weather_kw = ["weather", "temperature", "temp", "hot", "cold", "rain", "wind", "humidity",
"climate", "heat", "sunny", "cloudy", "precipitation", "pressure",
"detail", "condition", "report"]
if any(k in q for k in weather_kw):
if is_past:
intents.append("weather_history")
elif is_future:
intents.append("prediction")
else:
intents.append("weather") # current by default
# ── Cyclone ──
cyclone_kw = ["cyclone", "hurricane", "typhoon", "storm", "wind storm", "tropical",
"bay of bengal", "vardah", "nivar", "gaja", "mandous", "michaung",
"thane", "nisha", "fani", "amphan", "hudhud"]
if any(k in q for k in cyclone_kw):
if is_future:
intents.append("cyclone_prediction")
else:
intents.append("cyclone") # history/data retrieval
# ── Earthquake ──
quake_kw = ["earthquake", "quake", "seismic", "magnitude", "richter", "tremor",
"tectonic", "fault", "aftershock", "usgs"]
if any(k in q for k in quake_kw):
intents.append("earthquake")
# ── Tsunami ──
tsunami_kw = ["tsunami", "tidal wave", "ocean wave", "indian ocean", "sumatra",
"krakatoa", "sulawesi", "wave height"]
if any(k in q for k in tsunami_kw):
intents.append("tsunami")
# ── Pure prediction (no specific domain) ──
if not intents and is_future:
intents.append("prediction")
# ── Disaster overview ──
disaster_kw = ["disaster", "catastrophe", "calamity", "danger", "risk",
"overview", "summary", "all"]
if any(k in q for k in disaster_kw):
intents.append("disaster")
# Default: current weather
if not intents:
intents = ["weather"]
return list(set(intents))
# ── Known cyclone names for query context extraction ──
KNOWN_CYCLONES = ["michaung", "mandous", "nivar", "gaja", "vardah", "thane", "nisha",
"fani", "amphan", "hudhud", "phailin", "laila", "jal"]
KNOWN_LOCATIONS = ["chennai", "mumbai", "kolkata", "vizag", "visakhapatnam",
"bay of bengal", "arabian sea", "tamil nadu", "andhra pradesh",
"odisha", "west bengal", "india", "puducherry", "cuddalore",
"nagapattinam", "mahabalipuram"]
def extract_query_context(query: str):
"""
Extract structured context from a natural-language query:
- cyclone_name: specific cyclone mentioned (e.g. "gaja")
- year: specific year mentioned
- location: specific location mentioned
- wants_recent: whether user wants "recent" / "latest" data
- wants_comparison: whether user wants a comparison ("vs", "compared to")
"""
q = query.lower().strip()
# Extract cyclone name
cyclone_name = None
for name in KNOWN_CYCLONES:
if name in q:
cyclone_name = name
break
# Extract year (4-digit, 1900-2099)
year = None
m = _re.search(r'(?<!\d)(?<!\d[-/])(19\d{2}|20\d{2})(?![-/]\d)(?!\d)', q)
if m:
year = int(m.group(1))
# Extract location
location = None
for loc in KNOWN_LOCATIONS:
if loc in q:
location = loc
break
# Detect modifiers
wants_recent = any(k in q for k in ["recent", "latest", "last", "newest", "most recent"])
wants_comparison = any(k in q for k in [" vs ", "versus", "compared to", "compare",
"difference between", "today vs"])
return {
"cyclone_name": cyclone_name,
"year": year,
"location": location,
"wants_recent": wants_recent,
"wants_comparison": wants_comparison,
}
def build_focused_analysis(query, intents, data_sources, target_date, date_type):
"""
Build a detailed, structured analysis that DIRECTLY answers the question.
Produces multi-line, human-readable summaries instead of one-liners.
"""
lines = []
now = datetime.now()
# ── Historical weather for specific date ──
if "historical_weather" in data_sources and data_sources["historical_weather"]:
hw = data_sources["historical_weather"]
if "error" not in hw and hw.get("daily"):
target_str = target_date.strftime("%Y-%m-%d") if target_date else hw["daily"][0]["date"]
target_data = next((d for d in hw["daily"] if d["date"] == target_str), hw["daily"][0])
dt = datetime.strptime(target_data["date"], "%Y-%m-%d")
summary = (
f"{dt.strftime('%B %d %Y')} – Chennai\n"
f"Max Temp: {target_data['temp_max']}Β°C\n"
f"Min Temp: {target_data['temp_min']}Β°C\n"
f"Rain: {target_data['precipitation']} mm\n"
f"Wind: {target_data['wind_speed_max']} km/h"
)
lines.append(summary)
# If there's also current weather data, add comparison
if "weather" in data_sources and data_sources["weather"]:
w = data_sources["weather"]
if "error" not in w:
lines.append(
f"\nToday ({now.strftime('%B %d %Y')}) for comparison:\n"
f"Current Temp: {w.get('temperature')}Β°C\n"
f"Wind: {w.get('wind_speed')} km/h\n"
f"Humidity: {w.get('humidity')}%\n"
f"Temp difference: {round(w.get('temperature', 0) - (target_data['temp_max'] or 0), 1)}Β°C vs last year's max"
)
elif hw.get("error"):
lines.append(f"Could not fetch historical data: {hw['error']}")
# ── Current weather (only if no historical comparison already added) ──
elif "weather" in data_sources and data_sources["weather"]:
w = data_sources["weather"]
if "error" not in w:
if date_type == "today" or target_date is None:
summary = (
f"Current Weather – Chennai ({now.strftime('%B %d %Y, %H:%M')})\n"
f"Temperature: {w.get('temperature')}Β°C\n"
f"Wind Speed: {w.get('wind_speed')} km/h\n"
f"Wind Direction: {w.get('wind_direction', 'N/A')}Β°\n"
f"Humidity: {w.get('humidity')}%\n"
f"Conditions: {w.get('description', 'N/A')}"
)
lines.append(summary)
# ── Forecast ──
if "forecast" in data_sources and data_sources["forecast"]:
fc = data_sources["forecast"]
if "error" not in fc and fc.get("daily"):
if target_date and date_type == "specific_future":
target_str = target_date.strftime("%Y-%m-%d")
found = False
for d in fc["daily"]:
if d["date"] == target_str:
dt = datetime.strptime(d["date"], "%Y-%m-%d")
summary = (
f"Forecast for {dt.strftime('%B %d %Y')} ({dt.strftime('%A')}) – Chennai\n"
f"Max Temp: {d['temp_max']}Β°C\n"
f"Min Temp: {d['temp_min']}Β°C\n"
f"Rain: {d['precipitation']} mm\n"
f"Wind: {d['wind_speed_max']} km/h"
)
lines.append(summary)
found = True
break
if not found:
days_ahead = (target_date.date() - now.date()).days
lines.append(
f"The date {target_str} is {days_ahead} days ahead, beyond the 7-day forecast range. "
f"Running ML models for extended prediction."
)
elif not target_date or date_type == "today":
d = fc["daily"][0]
dt = datetime.strptime(d["date"], "%Y-%m-%d")
summary = (
f"Today's Forecast ({dt.strftime('%A, %B %d %Y')}) – Chennai\n"
f"Max Temp: {d['temp_max']}Β°C\n"
f"Min Temp: {d['temp_min']}Β°C\n"
f"Rain: {d['precipitation']} mm\n"
f"Wind: {d['wind_speed_max']} km/h"
)
lines.append(summary)
# ── Earthquakes ──
if "earthquake" in data_sources and data_sources["earthquake"]:
eq = data_sources["earthquake"]
if "error" not in eq:
summary = eq.get("summary", {})
event_list = eq.get("events", [])
lines.append(
f"Seismic Activity Report (Last 30 days)\n"
f"Total Events: {summary.get('total', 0)} earthquakes (M4.5+)\n"
f"Strongest: M{summary.get('max_magnitude', '?')}\n"
f"Average Depth: {summary.get('avg_depth', '?')} km\n"
f"M6+ Events: {summary.get('m6_plus', 0)}\n"
f"Tsunami Alerts: {summary.get('tsunami_alerts', 0)}"
)
# ── Cyclones β€” DETAILED listing ──
if "cyclone" in data_sources and data_sources["cyclone"]:
cy = data_sources["cyclone"]
if "error" not in cy:
cyclone_list = cy.get("cyclones", [])
summary = cy.get("summary", {})
if cyclone_list:
header = f"Cyclone Records – Bay of Bengal ({summary.get('period', '')})\nTotal: {summary.get('total', 0)} cyclones | Avg Wind: {summary.get('avg_wind', '?')} km/h\n"
lines.append(header)
# List each cyclone with details
for i, c in enumerate(cyclone_list, 1):
detail = (
f"{i}. {c['name']} ({c['year']})\n"
f" Category: {c['category']}\n"
f" Max Wind: {c['max_wind_kmh']} km/h\n"
f" Rainfall: {c['rainfall_mm']} mm\n"
f" Dates: {c['dates']}\n"
f" Landfall: {c['landfall']}\n"
f" Impact: {c['impact']}\n"
f" Damage: β‚Ή{c['damage_crore']} crore"
)
lines.append(detail)
else:
lines.append("No cyclone records found matching your query.")
# ── Tsunamis ──
if "tsunami" in data_sources and data_sources["tsunami"]:
ts = data_sources["tsunami"]
if "error" not in ts:
summary = ts.get("summary", {})
event_list = ts.get("events", [])
lines.append(
f"Tsunami Records – Indian Ocean ({summary.get('period', '')})\n"
f"Total Events: {summary.get('total', 0)}\n"
f"Max Wave Height: {summary.get('max_wave', '?')}m"
)
# ── ML Ensemble ──
if "ensemble" in data_sources and data_sources["ensemble"]:
ens = data_sources["ensemble"]
if "error" not in ens:
report = ens.get("final_report", {})
preds = report.get("predictions", [])
if preds:
if target_date and date_type == "specific_future":
target_str = target_date.strftime("%Y-%m-%d")
for p in preds:
if p["date"] == target_str:
lines.append(
f"ML PREDICTION for {target_str}:\n"
f"Predicted Max: {p['predicted_max']}Β°C\n"
f"Predicted Min: {p['predicted_min']}Β°C\n"
f"Model Spread: Β±{p['model_spread_max']}Β°C\n"
f"Confidence: {p['confidence'].upper()}"
)
break
else:
temps_max = [p["predicted_max"] for p in preds]
temps_min = [p["predicted_min"] for p in preds]
lines.append(
f"ML PREDICTION ({len(preds)} days ahead):\n"
f"Max Range: {min(temps_max)}-{max(temps_max)}Β°C\n"
f"Min Range: {min(temps_min)}-{max(temps_min)}Β°C\n"
f"Model Agreement: {report.get('agreement_score', 0)*100:.1f}%\n"
f"Confidence: {report.get('overall_confidence', 'unknown').upper()}\n"
f"Models used: {', '.join(ens.get('models_used', []))}"
)
if not lines:
lines.append(
"I analyzed the available data but couldn't find specific information for your query. "
"Try asking about weather on a specific date, earthquakes, cyclones, tsunamis, or predictions."
)
return "\n".join(lines)
@app.get("/ask")
def ask_climai(q: str = "weather today"):
"""
Main entry point for AI analysis.
Orchestrates Planner -> Executor -> Ensemble -> Groq Synthesis.
"""
start_time = datetime.now()
print(f"DEBUG: /ask called with q='{q}'")
import time as _time
import re
t0 = _time.time()
query = q.strip()
# ── 1. PLAN ──
plan = plan_query(query)
intents = plan["all_intents"]
target_date = plan["date"]
ctx = plan["context"]
# Extract relative days if mentioned
days = 7
m = re.search(r'(\d+)\s*(days|weeks|months|years)', query)
if m:
val, unit = int(m.group(1)), m.group(2)
days = val if unit.startswith("day") else val*7 if unit.startswith("week") else val*30 if unit.startswith("month") else val*365
# Default date_type to support legacy build_focused_analysis
date_type = "specific_past" if target_date and target_date < datetime.utcnow().date() else "specific_future" if target_date else "today"
steps = []
errors = []
models_status = {}
now = datetime.now()
steps.append({
"step": "plan",
"status": "done",
"detail": f"Intents: {', '.join(intents)} | Date: {target_date.strftime('%Y-%m-%d') if target_date else 'None'}"
})
# ── 2. EXECUTE ──
steps.append({"step": "execute", "status": "running", "detail": "Executing data retrieval plan..."})
try:
data_sources = execute_plan(plan)
# Drop None keys to match legacy behavior
data_sources = {k: v for k, v in data_sources.items() if v is not None}
steps[-1]["status"] = "done"
except Exception as e:
data_sources = {}
steps[-1]["status"] = "error"
errors.append(f"Executor failed: {str(e)}")
# ── 3. LOCAL ML ORCHESTRATION ──
# NEVER run ML for pure data retrieval intents
run_models = False
data_only_intents = {"cyclone", "earthquake", "tsunami", "weather_history", "disaster"}
is_data_only = all(i in data_only_intents for i in intents)
is_past_date = target_date and date_type in ("specific_past", "relative_past")
if not is_past_date and not is_data_only:
if "prediction" in intents:
run_models = True
if target_date and date_type in ("specific_future", "relative_future"):
days_ahead = (target_date - now.date()).days
if days_ahead > 7:
run_models = True
days = max(days, days_ahead)
if not target_date and "weather" in intents and "prediction" not in intents:
run_models = False
if run_models:
steps.append({"step": "ensemble", "status": "running", "detail": "Running 4 ML models as team..."})
try:
td = fetch_training_data()
temps_max, temps_min = td["temps_max"], td["temps_min"]
end_date = td["end_date"]
window = 7
X, y_max, y_min = prepare_features(temps_max, temps_min, window)
all_preds = {}
individual_results = {}
model_funcs = {
"random_forest": lambda: predict_rf(X, y_max, y_min, temps_max, temps_min, end_date, window, days),
"xgboost": lambda: predict_xgb(X, y_max, y_min, temps_max, temps_min, end_date, window, days),
"lstm": lambda: predict_lstm(temps_max, temps_min, end_date, window, days),
"lightgbm": lambda: predict_lgbm(X, y_max, y_min, temps_max, temps_min, end_date, window, days),
}
for model_name, model_fn in model_funcs.items():
try:
preds, t_ms = model_fn()
models_status[model_name] = {"status": "success", "time_ms": t_ms}
individual_results[model_name] = {"predictions": preds, "training_time_ms": t_ms, "status": "success"}
all_preds[model_name] = preds
except Exception as e:
models_status[model_name] = {"status": "error", "error": str(e)}
individual_results[model_name] = {"status": "error", "error": str(e)}
errors.append(f"{model_name} failed: {str(e)}")
successful_models = list(all_preds.keys())
n_models = len(successful_models)
if n_models > 0:
final_predictions = []
total_spread_max = 0
total_spread_min = 0
for day_idx in range(days):
day_maxes = [all_preds[m][day_idx]["predicted_max"] for m in successful_models if day_idx < len(all_preds[m])]
day_mins = [all_preds[m][day_idx]["predicted_min"] for m in successful_models if day_idx < len(all_preds[m])]
if not day_maxes:
continue
avg_max = round(sum(day_maxes) / len(day_maxes), 1)
avg_min = round(sum(day_mins) / len(day_mins), 1)
spread_max = round(max(day_maxes) - min(day_maxes), 1)
spread_min = round(max(day_mins) - min(day_mins), 1)
total_spread_max += spread_max
total_spread_min += spread_min
avg_spread = (spread_max + spread_min) / 2
confidence = "high" if avg_spread < 1.0 else "medium" if avg_spread < 2.0 else "low"
ref = all_preds[successful_models[0]][day_idx]
model_breakdown = {}
for m in successful_models:
if day_idx < len(all_preds[m]):
model_breakdown[m] = {"max": all_preds[m][day_idx]["predicted_max"], "min": all_preds[m][day_idx]["predicted_min"]}
final_predictions.append({
"date": ref["date"], "day": ref["day"],
"predicted_max": avg_max, "predicted_min": avg_min,
"model_spread_max": spread_max, "model_spread_min": spread_min,
"confidence": confidence, "per_model": model_breakdown,
})
avg_temp = sum(p["predicted_max"] for p in final_predictions) / len(final_predictions) if final_predictions else 1
avg_overall_spread = ((total_spread_max + total_spread_min) / 2) / len(final_predictions) if final_predictions else 0
agreement_score = round(max(0, min(1, 1 - (avg_overall_spread / avg_temp))), 3)
overall_confidence = "very_high" if agreement_score > 0.95 else "high" if agreement_score > 0.90 else "medium" if agreement_score > 0.80 else "low"
total_time = sum(r.get("time_ms", 0) for r in models_status.values() if isinstance(r, dict) and r.get("status") == "success")
data_sources["ensemble"] = {
"models_used": successful_models,
"models_failed": [m for m in model_funcs if m not in successful_models],
"individual_results": individual_results,
"final_report": {"predictions": final_predictions, "agreement_score": agreement_score, "overall_confidence": overall_confidence},
"training_data": {"days": td["training_days"], "total_compute_ms": total_time},
}
steps[-1]["status"] = "done"
steps[-1]["detail"] = f"{n_models}/4 models succeeded"
else:
steps[-1]["status"] = "error"
steps[-1]["detail"] = "All models failed"
except Exception as e:
steps[-1]["status"] = "error"
errors.append(f"Ensemble failed: {str(e)}")
# ── 4. CRITIC ──
checked = review(query, plan, data_sources)
corrections = checked["corrections"]
is_valid = checked["is_valid"]
if corrections:
steps.append({"step": "critic", "status": "error" if not is_valid else "done",
"detail": f"Self-Healed/Detected: {', '.join(corrections)}"})
log({"query": query, "plan": plan, "corrections": corrections, "valid": is_valid})
# ── 5. SYNTHESIZE ANALYSIS ──
analysis = groq_answer(query, intents, data_sources, target_date, date_type)
if not is_valid:
analysis += "\n\n(Note: The AI self-critic noted missing or skewed data constraints during processing.)"
total_time_ms = round((_time.time() - t0) * 1000)
return {
"query": query,
"intents": intents,
"target_date": target_date.strftime("%Y-%m-%d") if target_date else None,
"date_type": date_type,
"steps": steps,
"models": models_status,
"data": data_sources,
"analysis": analysis,
"corrections": corrections,
"errors": errors,
"total_time_ms": total_time_ms,
}
# ════════════════════════════════════════════════════════════
# /refresh-data β€” Rebuild historical dataset in background
# ════════════════════════════════════════════════════════════
@app.post("/refresh-data")
def refresh_dataset():
"""
Trigger a full dataset rebuild by running build_dataset.py.
Run monthly to keep ML training data and LLM context fresh.
"""
import os as _os, subprocess as _subprocess
try:
if not _os.path.exists("build_dataset.py"):
return {"status": "error", "message": "build_dataset.py not found"}
_subprocess.Popen(["python", "build_dataset.py"], stdout=_subprocess.DEVNULL, stderr=_subprocess.DEVNULL)
return {
"status": "started",
"message": "Dataset rebuild started in background. Check data/ folder in ~2 minutes.",
"files_to_update": ["weather_history.json","earthquake_history.json","aqi_history.json","flood_baseline.json","llm_context.json"],
}
except Exception as e:
return {"status": "error", "message": str(e)}
@app.get("/dataset-status")
def dataset_status():
"""Check which dataset files exist and when they were last updated."""
import os as _os, json as _json
files = {
"weather_history": "weather_history.json",
"earthquake_history": "earthquake_history.json",
"aqi_history": "aqi_history.json",
"flood_baseline": "flood_baseline.json",
"llm_context": "llm_context.json",
}
result = {}
for key, path in files.items():
if _os.path.exists(path):
stat = _os.stat(path)
try:
with open(path) as f:
data = _json.load(f)
fetched_at = data.get("fetched_at") or data.get("generated_at", "unknown")
except Exception:
fetched_at = "unknown"
result[key] = {"exists": True, "size_kb": round(stat.st_size/1024,1), "fetched_at": fetched_at}
else:
result[key] = {"exists": False}
all_exist = all(v["exists"] for v in result.values())
return {"dataset_ready": all_exist, "files": result,
"tip": "Run POST /refresh-data to build missing files." if not all_exist else "All dataset files present."}
if __name__ == "__main__":
import uvicorn # type: ignore[import]
uvicorn.run(app, host="0.0.0.0", port=8000)