Spaces:

iPurushottam
/

ClimAI

Sleeping

App Files Files Community

iPurushottam commited on about 1 month ago

Commit

37b38a7

0 Parent(s):

fix: correctly add backend files after submodule removal

Browse files

Files changed (12) hide show

.gitignore +10 -0
.pyre_configuration +8 -0
Dockerfile +28 -0
README.md +12 -0
critic.py +41 -0
date_utils.py +125 -0
executor.py +185 -0
groq_llm.py +177 -0
logger.py +28 -0
main.py +0 -0
planner.py +226 -0
weather_service.py +252 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,10 @@

+__pycache__/
+*.py[cod]
+*$py.class
+climai.log
+*.json
+*.txt
+!requirements.txt
+.git/
+.venv/
+.vscode/

.pyre_configuration ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "site_package_search_strategy": "all",
+    "search_path": ["."],
+    "source_directories": ["."],
+    "strict": [],
+    "targets": [],
+    "typeshed": "bundled"
+}

Dockerfile ADDED Viewed

	@@ -0,0 +1,28 @@

+# Use an official Python runtime as a parent image
+FROM python:3.10-slim
+# Set the working directory
+WORKDIR /app
+# Install system dependencies (needed for AI/ML libraries)
+# libgomp1 is required for XGBoost and LightGBM
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    cmake \
+    libgomp1 \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements and install them
+COPY requirements.txt .
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the backend code
+COPY . .
+# Expose the port FastAPI runs on
+EXPOSE 7860
+# Command to run the app (Hugging Face uses port 7860)
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: ClimAI
+emoji: 🌍
+colorFrom: blue
+colorTo: green
+sdk: docker
+pinned: false
+app_port: 7860
+---
+# ClimAI Backend
+16GB RAM upgrade for the ClimAI backend.

critic.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from datetime import datetime
+def review(query: str, plan: dict, raw_data: dict):
+    """
+    Critic Module: Detects mistakes and automatically corrects or flags them.
+    Checks date parsing logic, data retrieval status, and ML model health.
+    """
+    corrections = []
+    # 1. Date Misinterpretation Check
+    # If the parser defaulted to Jan 1st but the user explicitly asked for another month
+    if plan.get("date") and plan["date"].month == 1 and plan["date"].day == 1:
+        months = ["feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec"]
+        q_lower = query.lower()
+        if any(m in q_lower for m in months):
+            corrections.append("date_reparsed_from_jan1_default")
+            # In a full self-healing loop, we would re-trigger the planner here
+            # with explicit hints. For now, we flag the correction.
+    # 2. Data Missing Check
+    if not raw_data:
+        corrections.append("data_missing")
+    elif isinstance(raw_data, dict):
+        # 3. ML Model Failure Check (if ML data exists)
+        model_data = raw_data.get("models", {})
+        if model_data:
+            for m_name, m_res in model_data.items():
+                if m_res.get("status") == "error":
+                    corrections.append(f"fallback_triggered_for_{m_name}")
+        # 4. Empty Open-Meteo Arrays Check
+        weather_data = raw_data.get("weather", {})
+        if weather_data and "daily" in weather_data:
+            if not weather_data["daily"].get("time"):
+                corrections.append("open_meteo_returned_empty_arrays")
+    return {
+        "corrections": corrections,
+        "data": raw_data,
+        "is_valid": "data_missing" not in corrections
+    }

date_utils.py ADDED Viewed

	@@ -0,0 +1,125 @@

+from datetime import datetime, timedelta
+import re
+import dateparser
+def parse_date(query: str):
+    """
+    Advanced date intelligence module.
+    Fixes:
+      - "previous year same date" / "last year" / "same day last year" etc.
+        now correctly returns today's date minus 1 year, NOT Jan 1st.
+      - Explicit dates like "Mar 9 2025", "2025-03-09" parsed correctly.
+      - Relative phrases like "3 days ago", "yesterday" work as before.
+    """
+    clean = query.lower().strip()
+    now   = datetime.utcnow()
+    today = now.date()
+    # ── 1. "same date / same day / today's date — previous year / last year" ──
+    # Catches all natural ways a user says "this day but last year"
+    same_date_last_year_patterns = [
+        r"same (date|day).{0,20}(last|previous|prior) year",
+        r"(last|previous|prior) year.{0,20}same (date|day)",
+        r"this (date|day).{0,20}(last|previous|prior) year",
+        r"(last|previous|prior) year.{0,20}this (date|day)",
+        r"(last|previous|prior) year.{0,20}today",
+        r"today.{0,20}(last|previous|prior) year",
+        r"same date last year",
+        r"same day last year",
+        r"year ago today",
+        r"a year ago",
+        r"1 year ago",
+        # Handles: "tell me previous year 2025 weather" when today is Mar 9 2026
+        # i.e. user wants Mar 9 2025
+        r"(previous|last|prior) year \d{4}",
+        r"\d{4}.{0,10}(previous|last|prior) year",
+    ]
+    for pattern in same_date_last_year_patterns:
+        if re.search(pattern, clean):
+            try:
+                return today.replace(year=today.year - 1)
+            except ValueError:
+                # Handles Feb 29 edge case
+                return today.replace(year=today.year - 1, day=28)
+    # ── 2. Explicit relative phrases (fast path before dateparser) ──
+    if "yesterday" in clean:
+        return (today - timedelta(days=1))
+    if "today" in clean:
+        return today
+    if "tomorrow" in clean:
+        return (today + timedelta(days=1))
+    # e.g. "3 days ago", "2 weeks ago"
+    m = re.search(r'(\d+)\s+(day|days|week|weeks|month|months|year|years)\s+ago', clean)
+    if m:
+        n, unit = int(m.group(1)), m.group(2)
+        if "day" in unit:   return (today - timedelta(days=n))
+        if "week" in unit:  return (today - timedelta(weeks=n))
+        if "month" in unit: return (today - timedelta(days=n * 30))
+        if "year" in unit:
+            try:    return today.replace(year=today.year - n)
+            except: return today.replace(year=today.year - n, day=28)
+    # ── 3. Explicit date formats (YYYY-MM-DD or DD/MM/YYYY) ──
+    m = re.search(r'(\d{4})-(\d{2})-(\d{2})', clean)
+    if m:
+        try:
+            return datetime(int(m.group(1)), int(m.group(2)), int(m.group(3))).date()
+        except ValueError:
+            pass
+    m = re.search(r'(\d{1,2})[/-](\d{1,2})[/-](\d{4})', clean)
+    if m:
+        try:
+            return datetime(int(m.group(3)), int(m.group(2)), int(m.group(1))).date()
+        except ValueError:
+            pass
+    # ── 4. Explicit month name + day + year e.g. "Mar 9 2025", "9 March 2025" ──
+    m = re.search(
+        r'(\d{1,2})\s+(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[a-z]*\s+(\d{4})|'
+        r'(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)[a-z]*\s+(\d{1,2})[,\s]+(\d{4})',
+        clean
+    )
+    if m:
+        parsed = dateparser.parse(m.group(0),
+                                  settings={"PREFER_DATES_FROM": "past",
+                                            "RETURN_AS_TIMEZONE_AWARE": False})
+        if parsed:
+            return parsed.date()
+    # ── 5. Isolated "in YYYY" or "of YYYY" — return Jan 1 of that year only
+    #       when the user clearly means a whole year, not a specific date
+    m = re.search(r'\b(in|of|year)\s+(19\d{2}|20\d{2})\b', clean)
+    if m:
+        try:
+            return datetime(int(m.group(2)), 1, 1).date()
+        except ValueError:
+            pass
+    # ── 6. Last resort: strip noise words and try dateparser ──
+    #    Only pass short date-like fragments, NOT the full sentence
+    #    (full sentences confuse dateparser into picking Jan 1)
+    noise = r'\b(tell|me|what|was|the|weather|like|previous|last|this|same|day|date|year|in|for|at|of|a|an|give|show|fetch|get|want|need|please|how|about|is|are|will|be)\b'
+    stripped = re.sub(noise, '', clean).strip()
+    stripped = re.sub(r'\s+', ' ', stripped)
+    if stripped and len(stripped) > 2:
+        parsed = dateparser.parse(
+            stripped,
+            settings={"PREFER_DATES_FROM": "past", "RETURN_AS_TIMEZONE_AWARE": False}
+        )
+        if parsed:
+            # Safety check: reject if dateparser returned Jan 1 with no "jan" or "january"
+            # or "1st" in the original query — that's a default, not user intent
+            if parsed.month == 1 and parsed.day == 1:
+                if not re.search(r'\b(jan|january|1st|jan\s*1|01[/-]01)\b', clean):
+                    return None   # Refuse the bad default
+            return parsed.date()
+    return None

executor.py ADDED Viewed

	@@ -0,0 +1,185 @@

+import requests
+from datetime import datetime, timedelta
+import re
+OPEN_METEO_FORECAST = "https://api.open-meteo.com/v1/forecast"
+OPEN_METEO_ARCHIVE = "https://archive-api.open-meteo.com/v1/archive"
+# Default testing coordinates for Chennai
+DEFAULT_LAT = 13.0827
+DEFAULT_LON = 80.2707
+def _ensure_datetime(d):
+    """Ensure d is a full datetime object, not just a date."""
+    if d is None:
+        return None
+    if isinstance(d, datetime):
+        return d
+    return datetime(d.year, d.month, d.day)
+def _infer_past_date_from_query(query: str):
+    current_year = datetime.now().year
+    m = re.search(r'\b(19\d{2}|20\d{2})\b', query.lower())
+    if m:
+        year = int(m.group(1))
+        if year < current_year:
+            now = datetime.utcnow()
+            try:
+                return datetime(year, now.month, now.day)
+            except ValueError:
+                return datetime(year, now.month, 28)
+    return None
+def _extract_all_past_years(query: str):
+    current_year = datetime.now().year
+    now = datetime.now()
+    q = query.lower()
+    seen = set()
+    years_to_process = []
+    range_matches = re.finditer(r'\b(19\d{2}|20\d{2})\s*(?:to|-|and)\s*(19\d{2}|20\d{2})\b', q)
+    for m in range_matches:
+        start_y = int(m.group(1))
+        end_y = int(m.group(2))
+        if start_y > end_y:
+            start_y, end_y = end_y, start_y
+        for y in range(start_y, end_y + 1):
+            if y < current_year and y not in seen:
+                seen.add(y)
+                years_to_process.append(y)
+    single_matches = re.finditer(r'\b(19\d{2}|20\d{2}|2[0-5])\b', q)
+    for m in single_matches:
+        val = m.group(1)
+        y = 2000 + int(val) if len(val) == 2 else int(val)
+        if y < current_year and y not in seen:
+            seen.add(y)
+            years_to_process.append(y)
+    years_to_process = sorted(years_to_process, reverse=True)[:6]
+    years_to_process.sort()
+    past_dates = []
+    for year in years_to_process:
+        try:
+            past_dates.append(datetime(year, now.month, now.day))
+        except ValueError:
+            past_dates.append(datetime(year, now.month, 28))
+    return past_dates
+def execute_plan(plan):
+    """
+    Executes the deterministic plan generated by the Planner.
+    Routes to the correct external APIs or internal ML modules.
+    """
+    intent = plan.get("intent", "weather")
+    all_intents = plan.get("all_intents", [intent])
+    target_date = _ensure_datetime(plan.get("date"))
+    ctx = plan.get("context", {})
+    query = plan.get("query", "")
+    # Fallback: infer past date from query year
+    if target_date is None and intent in ["weather_history", "weather"]:
+        inferred = _infer_past_date_from_query(query)
+        if inferred:
+            target_date = inferred
+            intent = "weather_history"
+            plan["intent"] = intent
+    execution_result = {
+        "weather": None,
+        "forecast": None,
+        "historical_weather": None,
+        "historical_comparison": None,
+        "cyclone": None,
+        "earthquake": None,
+        "tsunami": None,
+        "models": None
+    }
+    try:
+        from weather_service import (get_cyclones, get_earthquakes, get_tsunamis,
+                                     get_weather, get_forecast, fetch_historical_weather)
+        now = datetime.utcnow().date()
+        # ── DISASTER ROUTE ────────────────────────────────────────────────────
+        # Full report: always fetch weather + forecast + cyclones + earthquakes
+        if "disaster" in all_intents:
+            execution_result["weather"]    = get_weather()
+            execution_result["forecast"]   = get_forecast()
+            execution_result["cyclone"]    = get_cyclones()
+            execution_result["earthquake"] = get_earthquakes()
+            execution_result["tsunami"]    = get_tsunamis()
+            # Don't return early — other intents below may add more data
+        # ── COMPARISON ROUTE ──────────────────────────────────────────────────
+        if intent == "weather_comparison" or ctx.get("wants_comparison"):
+            execution_result["weather"] = get_weather()
+            past_dates = _extract_all_past_years(query)
+            if not past_dates and target_date:
+                target_dt_only = target_date.date() if isinstance(target_date, datetime) else target_date
+                if target_dt_only < now:
+                    past_dates = [target_date]
+            if past_dates:
+                comparison_results = []
+                for past_dt in past_dates:
+                    past_date_only = past_dt.date()
+                    archive_limit = datetime.utcnow().date() - timedelta(days=5)
+                    if past_date_only <= archive_limit:
+                        hist = fetch_historical_weather(past_dt, days_range=1)
+                        if hist and "error" not in hist:
+                            hist["queried_year"] = past_dt.year
+                            hist["queried_date"] = past_dt.strftime("%Y-%m-%d")
+                            comparison_results.append(hist)
+                if comparison_results:
+                    execution_result["historical_comparison"] = comparison_results
+                    execution_result["historical_weather"] = comparison_results[0]
+            execution_result["forecast"] = get_forecast()
+        # ── STANDARD WEATHER / HISTORY / PREDICTION ROUTE ────────────────────
+        elif intent in ["weather_history", "weather", "prediction"]:
+            if target_date:
+                target_date_only = target_date.date() if isinstance(target_date, datetime) else target_date
+                if target_date_only < now:
+                    execution_result["historical_weather"] = fetch_historical_weather(target_date, days_range=1)
+                elif target_date_only > now and (target_date_only - now).days <= 7:
+                    execution_result["forecast"] = get_forecast()
+            target_date_only = target_date.date() if target_date and isinstance(target_date, datetime) else target_date
+            if not target_date or target_date_only == now:
+                execution_result["weather"] = get_weather()
+            if not target_date and intent in ["weather", "prediction"]:
+                execution_result["forecast"] = get_forecast()
+        # ── CYCLONE ROUTE ─────────────────────────────────────────────────────
+        if "cyclone" in all_intents and execution_result["cyclone"] is None:
+            cy_name = ctx.get("cyclone_name")
+            cy_year = ctx.get("year")
+            c_data = get_cyclones(name=cy_name, year=cy_year)
+            if ctx.get("wants_recent") and not cy_name:
+                cyc_list = sorted(c_data.get("cyclones", []), key=lambda c: c["year"], reverse=True)[:3]
+                c_data["cyclones"] = cyc_list
+            execution_result["cyclone"] = c_data
+        # ── EARTHQUAKE ROUTE ──────────────────────────────────────────────────
+        if "earthquake" in all_intents and execution_result["earthquake"] is None:
+            execution_result["earthquake"] = get_earthquakes()
+        # ── TSUNAMI ROUTE ─────────────────────────────────────────────────────
+        if "tsunami" in all_intents and execution_result["tsunami"] is None:
+            execution_result["tsunami"] = get_tsunamis()
+    except ImportError as e:
+        print(f"Executor Import Error: {e}")
+    return execution_result

groq_llm.py ADDED Viewed

	@@ -0,0 +1,177 @@

+"""
+groq_llm.py — Groq LLM Answer Generator
+Drop-in replacement for build_focused_analysis().
+Reads all fetched data + ML results and generates a smart natural language answer.
+Install: pip install groq
+Get free API key: https://console.groq.com
+"""
+import json
+import os
+from groq import Groq
+# ── Put your key here OR set env variable GROQ_API_KEY ──
+GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "")
+client = Groq(api_key=GROQ_API_KEY)
+SYSTEM_PROMPT = """You are ClimAI, an expert disaster and weather intelligence assistant for Chennai, India.
+You receive structured data fetched from real APIs (Open-Meteo, USGS, NOAA) and ML model predictions.
+Your job is to answer the user's question clearly and conversationally using ONLY the data provided.
+Rules:
+- Be concise but informative (3-6 sentences max unless a detailed report is asked)
+- Always mention the actual numbers from the data (temperatures, wind speed, etc.)
+- If ML ensemble predictions are present, mention the confidence level
+- If data is missing or has errors, say so honestly
+- Never make up numbers — only use what's in the data
+- Format nicely: use line breaks for readability
+- Always mention the date/time period the data refers to
+- For comparisons and multi-year ranges: if historical_comparison data is present, you MUST use it.
+  Show a clear year-by-year breakdown with differences, or summarize the trend over the years.
+  Identify the hottest/coldest years or highest precipitation if a range is provided.
+  Never say historical data is unavailable if historical_comparison is in the provided data.
+"""
+def groq_answer(query: str, intents: list, data_sources: dict,
+                target_date=None, date_type: str = "today") -> str:
+    """
+    Generate a natural language answer using Groq LLM.
+    """
+    data_summary = {}
+    # Current weather
+    if "weather" in data_sources and data_sources["weather"]:
+        w = data_sources["weather"]
+        data_summary["current_weather"] = {
+            "temperature":    w.get("temperature"),
+            "feels_like":     w.get("feels_like"),
+            "humidity":       w.get("humidity"),
+            "wind_speed":     w.get("wind_speed"),
+            "wind_direction": w.get("wind_direction"),
+            "precipitation":  w.get("precipitation"),
+            "cloud_cover":    w.get("cloud_cover"),
+        }
+    # Single historical weather (non-comparison)
+    if "historical_weather" in data_sources and data_sources["historical_weather"]:
+        hw = data_sources["historical_weather"]
+        if isinstance(hw, dict) and "daily" in hw:
+            data_summary["historical_weather"] = {
+                "date_range": hw.get("period", hw.get("queried_date", "")),
+                "days": hw["daily"][:5] if hw["daily"] else []
+            }
+        else:
+            data_summary["historical_weather"] = hw
+    # ── NEW: Multi-year comparison data ──────────────────────────────────────
+    # If executor fetched historical data for multiple past years, include ALL
+    # of it so Groq can do a proper side-by-side comparison.
+    if "historical_comparison" in data_sources and data_sources["historical_comparison"]:
+        comparison_list = data_sources["historical_comparison"]
+        comparison_summary = []
+        for entry in comparison_list:
+            if isinstance(entry, dict) and "daily" in entry:
+                comparison_summary.append({
+                    "year":         entry.get("queried_year"),
+                    "date":         entry.get("queried_date"),
+                    "daily":        entry["daily"][:3],   # first 3 days is enough
+                    "source":       entry.get("source", "Open-Meteo Archive API"),
+                })
+            else:
+                comparison_summary.append(entry)
+        data_summary["historical_comparison"] = comparison_summary
+    # ─────────────────────────────────────────────────────────────────────────
+    # Earthquake
+    if "earthquake" in data_sources and data_sources["earthquake"]:
+        eq = data_sources["earthquake"]
+        if isinstance(eq, dict):
+            # Include high-level summary + only the 10 most recent/significant events
+            data_summary["earthquakes"] = {
+                "summary": eq.get("summary"),
+                "recent_events": eq.get("events", [])[:10]
+            }
+        elif isinstance(eq, list):
+            data_summary["earthquakes"] = eq[:10]
+        else:
+            data_summary["earthquakes"] = eq
+    # Cyclone
+    if "cyclone" in data_sources and data_sources["cyclone"]:
+        cy = data_sources["cyclone"]
+        if isinstance(cy, dict) and "cyclones" in cy:
+            # Truncate detailed tracks to prevent massive token usage
+            truncated_cyc = []
+            for c in cy["cyclones"]:
+                c_copy = c.copy()
+                if "track" in c_copy:
+                    c_copy["track"] = c_copy["track"][:5] # Just show the start/progression
+                truncated_cyc.append(c_copy)
+            data_summary["cyclone"] = {"cyclones": truncated_cyc}
+        else:
+            data_summary["cyclone"] = cy
+    # Tsunami
+    if "tsunami" in data_sources and data_sources["tsunami"]:
+        data_summary["tsunami"] = data_sources["tsunami"]
+    # ML Ensemble predictions
+    if "ensemble" in data_sources and data_sources["ensemble"]:
+        ens = data_sources["ensemble"]
+        report = ens.get("final_report", {})
+        preds = report.get("predictions", [])
+        data_summary["ml_predictions"] = {
+            "models_used":        ens.get("models_used", []),
+            "overall_confidence": report.get("overall_confidence", "unknown"),
+            "agreement_score":    report.get("agreement_score"),
+            "next_7_days":        preds[:7],
+        }
+    # Forecast
+    if "forecast" in data_sources and data_sources["forecast"]:
+        fc = data_sources["forecast"]
+        if isinstance(fc, dict) and "daily" in fc:
+            data_summary["forecast"] = fc["daily"][:7]
+    date_str = target_date.strftime("%B %d, %Y") if target_date else "today"
+    # Build a comparison-aware instruction hint for the prompt
+    comparison_hint = ""
+    if "historical_comparison" in data_summary:
+        years = [str(e.get("year", "?")) for e in data_summary["historical_comparison"]]
+        comparison_hint = (
+            f"\n\nIMPORTANT: The user wants a comparison. "
+            f"You have historical data for: {', '.join(years)}. "
+            f"You also have current_weather for today (2026). "
+            f"Compare them directly — show specific numbers and calculate the differences."
+        )
+    user_prompt = f"""User question: "{query}"
+Detected intents: {', '.join(intents)}
+Date context: {date_str} ({date_type})
+Location: Chennai, India
+Available data:
+{json.dumps(data_summary, indent=2, default=str)}{comparison_hint}
+Please answer the user's question based on this data."""
+    try:
+        response = client.chat.completions.create(
+            model="llama-3.3-70b-versatile",
+            messages=[
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user",   "content": user_prompt},
+            ],
+            max_tokens=600,
+            temperature=0.3,
+        )
+        return response.choices[0].message.content.strip()
+    except Exception as e:
+        return f"[Groq unavailable: {e}] Data was fetched successfully — check the 'data' field in the response."

logger.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import logging
+import os
+import sys
+# Ensure logs directory exists if needed, but logging in the same dir for now
+try:
+    logging.basicConfig(
+        filename="climai.log",
+        level=logging.INFO,
+        format="%(asctime)s - %(levelname)s - %(message)s"
+    )
+except (PermissionError, OSError):
+    # Fallback to console logging if file system is read-only (e.g. on certain Render tiers)
+    logging.basicConfig(
+        stream=sys.stdout,
+        level=logging.INFO,
+        format="%(asctime)s - %(levelname)s - %(message)s"
+    )
+def log(message):
+    """
+    Centralized logging function.
+    Accepts strings or dictionaries (which are logged as strings).
+    """
+    try:
+        logging.info(str(message))
+    except:
+        print(f"FAILED TO LOG: {message}")

main.py ADDED Viewed

The diff for this file is too large to render. See raw diff

planner.py ADDED Viewed

	@@ -0,0 +1,226 @@

+from date_utils import parse_date
+import re
+import json
+import logging
+from groq_llm import client as groq_client
+KNOWN_CYCLONES = ["michaung", "mandous", "nivar", "gaja", "vardah", "thane", "nisha",
+                  "fani", "amphan", "hudhud", "phailin", "laila", "jal"]
+KNOWN_LOCATIONS = ["chennai", "mumbai", "kolkata", "vizag", "visakhapatnam",
+                   "bay of bengal", "arabian sea", "tamil nadu", "andhra pradesh",
+                   "odisha", "west bengal", "india", "puducherry", "cuddalore",
+                   "nagapattinam", "mahabalipuram"]
+def _normalize_query(q: str) -> str:
+    typo_map = {
+        r"\bpervious\b": "previous",
+        r"\bprevios\b": "previous",
+        r"\bpreviuos\b": "previous",
+        r"\bprevioues\b": "previous",
+        r"\bprevius\b": "previous",
+        r"\bprevioius\b": "previous",
+        r"\bhistorcal\b": "historical",
+        r"\bhistoricle\b": "historical",
+        r"\byesterady\b": "yesterday",
+        r"\byestarday\b": "yesterday",
+    }
+    for pattern, replacement in typo_map.items():
+        q = re.sub(pattern, replacement, q)
+    return q
+def _expand_disaster_intents(intents: list) -> list:
+    """If disaster intent detected, always include weather, cyclone, earthquake."""
+    if "disaster" in intents:
+        for extra in ["weather", "cyclone", "earthquake", "tsunami"]:
+            if extra not in intents:
+                intents.append(extra)
+    return intents
+def classify_query(query: str):
+    q = _normalize_query(query.lower().strip())
+    intents = []
+    past_kw = ["last year", "previous", "history", "historical", "ago", "past",
+               "same date", "same day", "this day", "yesterday", "back in",
+               "was", "were", "happened", "occurred", "hit", "struck", "recent"]
+    future_kw = ["predict", "prediction", "next", "forecast", "tomorrow",
+                 "coming", "upcoming", "expect", "will", "probability",
+                 "chance", "future", "model", "ml", "ai"]
+    is_past = any(re.search(rf"\b{k}\b", q) for k in past_kw)
+    is_future = any(re.search(rf"\b{k}\b", q) for k in future_kw)
+    current_year = __import__("datetime").datetime.now().year
+    past_year_match = re.search(r'\b(19\d{2}|20\d{2})\b', q)
+    if past_year_match and int(past_year_match.group(1)) < current_year:
+        is_past = True
+        is_future = False
+    weather_kw = ["weather", "temperature", "temp", "hot", "cold", "rain", "wind", "humidity",
+                  "climate", "heat", "sunny", "cloudy", "precipitation", "pressure",
+                  "detail", "condition", "report"]
+    if any(re.search(rf"\b{k}\b", q) for k in weather_kw):
+        if is_past: intents.append("weather_history")
+        elif is_future: intents.append("prediction")
+        else: intents.append("weather")
+    cyclone_kw = ["cyclone", "hurricane", "typhoon", "storm", "wind storm", "tropical",
+                  "bay of bengal", "vardah", "nivar", "gaja", "mandous", "michaung",
+                  "thane", "nisha", "fani", "amphan", "hudhud"]
+    if any(re.search(rf"\b{k}\b", q) for k in cyclone_kw):
+        if is_future: intents.append("cyclone_prediction")
+        else: intents.append("cyclone")
+    quake_kw = ["earthquake", "quake", "seismic", "magnitude", "richter", "tremor",
+                "tectonic", "fault", "aftershock", "usgs"]
+    if any(re.search(rf"\b{k}\b", q) for k in quake_kw):
+        intents.append("earthquake")
+    tsunami_kw = ["tsunami", "tidal wave", "ocean wave", "indian ocean", "sumatra",
+                  "krakatoa", "sulawesi", "wave height"]
+    if any(re.search(rf"\b{k}\b", q) for k in tsunami_kw):
+        intents.append("tsunami")
+    if not intents and is_future:
+        intents.append("prediction")
+    disaster_kw = ["disaster", "catastrophe", "calamity", "danger", "risk",
+                   "overview", "summary", "all", "report", "threat", "alert"]
+    if any(re.search(rf"\b{k}\b", q) for k in disaster_kw):
+        intents.append("disaster")
+    if "compare" in q or "difference" in q or re.search(r"\bvs\b", q) or "versus" in q:
+        intents.append("weather_comparison")
+    is_range = bool(re.search(r'\b(19\d{2}|20\d{2})\s*(?:to|-|and)\s*(19\d{2}|20\d{2})\b', q))
+    if is_range and "weather_comparison" not in intents:
+        intents.append("weather_comparison")
+    if not intents:
+        intents.append("weather")
+    return _expand_disaster_intents(list(set(intents)))
+def extract_query_context(query: str):
+    q = _normalize_query(query.lower().strip())
+    cyclone_name = None
+    for name in KNOWN_CYCLONES:
+        if name in q:
+            cyclone_name = name
+            break
+    year = None
+    m = re.search(r'(?<!\d)(?<!\d[-/])(19\d{2}|20\d{2})(?![-/]\d)(?!\d)', q)
+    if m: year = int(m.group(1))
+    location = None
+    for loc in KNOWN_LOCATIONS:
+        if loc in q:
+            location = loc
+            break
+    wants_recent = any(k in q for k in ["recent", "latest", "last", "newest", "most recent"])
+    wants_comparison = any(k in q for k in ["compare", "vs", "versus", "difference", "than"])
+    is_range = bool(re.search(r'\b(19\d{2}|20\d{2})\s*(?:to|-|and)\s*(19\d{2}|20\d{2})\b', q))
+    if is_range:
+        wants_comparison = True
+    return {
+        "cyclone_name": cyclone_name,
+        "year": year,
+        "location": location,
+        "wants_recent": wants_recent,
+        "wants_comparison": wants_comparison
+    }
+def extract_intent_with_llm(query: str) -> dict:
+    system_prompt = """You are an intent classifier for a climate and disaster tracking app.
+    Given a user query, you must extract their intent and basic context.
+    The query may contain severe typos or bad grammar. You must figure out what they mean.
+    Allowed intents: weather, weather_history, weather_comparison, prediction, cyclone, cyclone_history, cyclone_prediction, earthquake, tsunami, disaster.
+    Output exactly valid JSON in this format:
+    {
+        "intents": ["list", "of", "intents"],
+        "context": {
+            "cyclone_name": null,
+            "year": null,
+            "location": null,
+            "wants_recent": false,
+            "wants_comparison": false
+        }
+    }
+    """
+    try:
+        response = groq_client.chat.completions.create(
+            model="llama-3.3-70b-versatile",
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": f"Query: {query}"}
+            ],
+            response_format={"type": "json_object"},
+            temperature=0.1,
+            max_tokens=200
+        )
+        result = json.loads(response.choices[0].message.content)
+        if "intents" not in result or "context" not in result:
+            raise ValueError("LLM returned malformed JSON structure")
+        return result
+    except Exception as e:
+        logging.error(f"LLM extraction failed: {e}")
+        return None
+def plan_query(query: str):
+    """
+    Create a deterministic execution plan, using LLM for typo-tolerant intent extraction.
+    Falls back to regex parsing if the LLM fails.
+    """
+    # 1. Try LLM Extraction First
+    llm_result = extract_intent_with_llm(query)
+    if llm_result:
+        intents = llm_result.get("intents", [])
+        context = llm_result.get("context", {})
+        # Safety fallback if LLM returns empty intents
+        if not intents:
+            intents = classify_query(query)
+        else:
+            # Always apply disaster expansion even for LLM results
+            intents = _expand_disaster_intents(intents)
+    else:
+        # 2. Fallback to Regex
+        logging.warning("Falling back to regex intent classification")
+        intents = classify_query(query)
+        context = extract_query_context(query)
+    date_val = parse_date(query)
+    # 3. Select the primary intent
+    primary_intent = "weather"
+    if "weather_comparison" in intents:
+        primary_intent = "weather_comparison"
+    elif "disaster" in intents:
+        primary_intent = "disaster"
+    elif "cyclone" in intents or "cyclone_history" in intents:
+        primary_intent = "cyclone_history"
+    elif "weather_history" in intents:
+        primary_intent = "weather_history"
+    else:
+        primary_intent = intents[0] if intents else "unknown"
+    return {
+        "intent": primary_intent,
+        "all_intents": intents,
+        "date": date_val,
+        "query": query,
+        "context": context
+    }

weather_service.py ADDED Viewed

	@@ -0,0 +1,252 @@

+import requests
+from datetime import datetime, timedelta
+import math
+import random
+# Chennai coordinates
+LAT = 13.0827
+LON = 80.2707
+def get_weather():
+    """Current weather for Chennai."""
+    url = "https://api.open-meteo.com/v1/forecast"
+    params = {
+        "latitude": LAT,
+        "longitude": LON,
+        "current": "temperature_2m,relative_humidity_2m,apparent_temperature,precipitation,rain,cloud_cover,wind_speed_10m,wind_direction_10m,wind_gusts_10m,pressure_msl,surface_pressure",
+        "timezone": "Asia/Kolkata",
+    }
+    try:
+        r = requests.get(url, params=params, timeout=10)
+        r.raise_for_status()
+        data = r.json()
+        current = data.get("current", {})
+        deg = current.get("wind_direction_10m", 0)
+        directions = ["N", "NNE", "NE", "ENE", "E", "ESE", "SE", "SSE",
+                       "S", "SSW", "SW", "WSW", "W", "WNW", "NW", "NNW"]
+        idx = round(deg / 22.5) % 16
+        wind_dir = directions[idx]
+        return {
+            "temperature": current.get("temperature_2m"),
+            "feels_like": current.get("apparent_temperature"),
+            "humidity": current.get("relative_humidity_2m"),
+            "wind_speed": current.get("wind_speed_10m"),
+            "wind_direction": wind_dir,
+            "wind_direction_deg": deg,
+            "wind_gusts": current.get("wind_gusts_10m"),
+            "cloud_cover": current.get("cloud_cover"),
+            "pressure": current.get("surface_pressure"),
+            "precipitation": current.get("precipitation"),
+            "rain": current.get("rain"),
+        }
+    except Exception as e:
+        return {"error": str(e)}
+def get_forecast():
+    """7-day daily forecast for Chennai."""
+    url = "https://api.open-meteo.com/v1/forecast"
+    params = {
+        "latitude": LAT,
+        "longitude": LON,
+        "daily": "temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max,wind_direction_10m_dominant,precipitation_probability_max,uv_index_max",
+        "hourly": "temperature_2m,wind_speed_10m",
+        "forecast_days": 7,
+        "timezone": "Asia/Kolkata",
+    }
+    try:
+        r = requests.get(url, params=params, timeout=10)
+        r.raise_for_status()
+        data = r.json()
+        daily = data.get("daily", {})
+        hourly = data.get("hourly", {})
+        days = []
+        times = daily.get("time", [])
+        for i, date_str in enumerate(times):
+            dt = datetime.strptime(date_str, "%Y-%m-%d")
+            days.append({
+                "date": date_str,
+                "day": dt.strftime("%a"),
+                "temp_max": daily.get("temperature_2m_max", [None])[i] if i < len(daily.get("temperature_2m_max", [])) else None,
+                "temp_min": daily.get("temperature_2m_min", [None])[i] if i < len(daily.get("temperature_2m_min", [])) else None,
+                "precipitation": daily.get("precipitation_sum", [0])[i] if i < len(daily.get("precipitation_sum", [])) else 0,
+                "wind_speed_max": daily.get("wind_speed_10m_max", [0])[i] if i < len(daily.get("wind_speed_10m_max", [])) else 0,
+                "precip_prob": daily.get("precipitation_probability_max", [0])[i] if i < len(daily.get("precipitation_probability_max", [])) else 0,
+                "uv_index": daily.get("uv_index_max", [0])[i] if i < len(daily.get("uv_index_max", [])) else 0,
+            })
+        hourly_data = []
+        h_times = hourly.get("time", [])
+        h_temps = hourly.get("temperature_2m", [])
+        h_winds = hourly.get("wind_speed_10m", [])
+        for i, t in enumerate(h_times):
+            hourly_data.append({
+                "time": t,
+                "temperature": h_temps[i] if i < len(h_temps) else None,
+                "wind_speed": h_winds[i] if i < len(h_winds) else None,
+            })
+        return {"daily": days, "hourly": hourly_data}
+    except Exception as e:
+        return {"error": str(e)}
+def fetch_historical_weather(target_date: datetime, days_range: int = 1):
+    """Fetch actual historical weather data from Open-Meteo Archive API."""
+    start = target_date
+    end = target_date + timedelta(days=days_range - 1)
+    archive_limit = datetime.now() - timedelta(days=5)
+    if end.date() > archive_limit.date():
+        return {"error": f"Archive data not yet available for {end.strftime('%Y-%m-%d')}. Data lags 5-7 days."}
+    url = "https://archive-api.open-meteo.com/v1/archive"
+    params = {
+        "latitude": LAT, "longitude": LON,
+        "start_date": start.strftime("%Y-%m-%d"),
+        "end_date": end.strftime("%Y-%m-%d"),
+        "daily": "temperature_2m_max,temperature_2m_min,precipitation_sum,wind_speed_10m_max,wind_direction_10m_dominant",
+        "hourly": "temperature_2m,relative_humidity_2m,wind_speed_10m,cloud_cover,precipitation",
+        "timezone": "Asia/Kolkata",
+    }
+    try:
+        r = requests.get(url, params=params, timeout=15)
+        r.raise_for_status()
+        data = r.json()
+        daily = data.get("daily", {})
+        hourly = data.get("hourly", {})
+        days_data = []
+        for i, date_str in enumerate(daily.get("time", [])):
+            dt = datetime.strptime(date_str, "%Y-%m-%d")
+            days_data.append({
+                "date": date_str,
+                "day": dt.strftime("%A"),
+                "temp_max": daily.get("temperature_2m_max", [None])[i],
+                "temp_min": daily.get("temperature_2m_min", [None])[i],
+                "precipitation": daily.get("precipitation_sum", [0])[i],
+                "wind_speed_max": daily.get("wind_speed_10m_max", [0])[i],
+            })
+        return {"daily": days_data, "source": "Open-Meteo Archive API"}
+    except Exception as e:
+        return {"error": str(e)}
+def get_earthquakes(min_magnitude: float = 4.5, days: int = 30):
+    """Significant earthquakes from USGS."""
+    end_time = datetime.utcnow()
+    start_time = end_time - timedelta(days=days)
+    url = "https://earthquake.usgs.gov/fdsnws/event/1/query"
+    params = {
+        "format": "geojson",
+        "starttime": start_time.isoformat(),
+        "endtime": end_time.isoformat(),
+        "minmagnitude": min_magnitude,
+        "latitude": LAT,
+        "longitude": LON,
+        "maxradiuskm": 8000,
+    }
+    try:
+        r = requests.get(url, params=params, timeout=10)
+        r.raise_for_status()
+        data = r.json()
+        features = data.get("features", [])
+        events = []
+        for f in features:
+            props = f.get("properties", {})
+            geom = f.get("geometry", {})
+            coords = geom.get("coordinates", [0, 0, 0])
+            events.append({
+                "id": f.get("id"),
+                "magnitude": props.get("mag"),
+                "place": props.get("place"),
+                "time": datetime.fromtimestamp(props.get("time") / 1000).isoformat(),
+                "url": props.get("url"),
+                "tsunami": props.get("tsunami"),
+                "lat": coords[1],
+                "lon": coords[0],
+                "depth": coords[2]
+            })
+        return {
+            "events": events,
+            "summary": {
+                "total": len(events),
+                "max_magnitude": max((e["magnitude"] for e in events), default=0),
+                "avg_depth": round(sum(e["depth"] for e in events) / len(events), 1) if events else 0,
+                "tsunami_alerts": sum(1 for e in events if e["tsunami"]),
+                "m6_plus": sum(1 for e in events if e["magnitude"] >= 6.0),
+            }
+        }
+    except Exception as e:
+        return {"error": str(e)}
+def get_cyclones(year: int = None, name: str = None, min_wind: int = None):
+    """Historical cyclone data for Bay of Bengal (simulated/expanded dataset)."""
+    # ... (content from main.py)
+    # I'll use a slightly more compact version to save space but keep logic same
+    cyclones = [
+        {"name": "Cyclone Michaung", "year": 2023, "category": "Severe", "max_wind_kmh": 110, "rainfall_mm": 240, "damage_crore": 1500, "dates": "Dec 2-6, 2023", "landfall": "Andhra Coast", "impact": "Heavy rain in Chennai, massive flooding"},
+        {"name": "Cyclone Mandous", "year": 2022, "category": "Severe", "max_wind_kmh": 105, "rainfall_mm": 180, "damage_crore": 1000, "dates": "Dec 6-10, 2022", "landfall": "Near Mahabalipuram", "impact": "Trees uprooted, coastal flooding"},
+        {"name": "Cyclone Nivar", "year": 2020, "category": "Very Severe", "max_wind_kmh": 145, "rainfall_mm": 250, "damage_crore": 2500, "dates": "Nov 23-27, 2020", "landfall": "Near Puducherry", "impact": "Crop damage, power outages"},
+        {"name": "Cyclone Gaja", "year": 2018, "category": "Very Severe", "max_wind_kmh": 120, "rainfall_mm": 180, "damage_crore": 6000, "dates": "Nov 10-17, 2018", "landfall": "Near Vedaranyam", "impact": "Direct hit, 130km/h winds"},
+        {"name": "Cyclone Vardah", "year": 2016, "category": "Very Severe", "max_wind_kmh": 140, "rainfall_mm": 150, "damage_crore": 5000, "dates": "Dec 6-13, 2016", "landfall": "Near Chennai", "impact": "Direct hit, 130km/h winds"},
+        {"name": "Cyclone Thane", "year": 2011, "category": "Very Severe", "max_wind_kmh": 140, "rainfall_mm": 120, "damage_crore": 2200, "dates": "Dec 25-31, 2011", "landfall": "Near Cuddalore", "impact": "Heavy rains"},
+        {"name": "Cyclone Nisha", "year": 2008, "category": "Cyclonic Storm", "max_wind_kmh": 75, "rainfall_mm": 500, "damage_crore": 4500, "dates": "Nov 25-27, 2008", "landfall": "Near Karaikal", "impact": "500mm in 48hrs"},
+    ]
+    if year: cyclones = [c for c in cyclones if c["year"] == year]
+    if name: cyclones = [c for c in cyclones if name.lower() in c["name"].lower()]
+    if min_wind: cyclones = [c for c in cyclones if c["max_wind_kmh"] >= min_wind]
+    avg_wind = sum(c["max_wind_kmh"] for c in cyclones) / len(cyclones) if cyclones else 0
+    return {
+        "cyclones": cyclones,
+        "summary": {
+            "total": len(cyclones),
+            "avg_wind": round(avg_wind) if avg_wind else 0,
+            "max_rainfall": max((c["rainfall_mm"] for c in cyclones), default=0),
+            "total_damage": sum(c["damage_crore"] for c in cyclones),
+            "period": f"{min((c['year'] for c in cyclones), default=0)}-{max((c['year'] for c in cyclones), default=0)}",
+        }
+    }
+def get_tsunamis():
+    """Historical tsunami events worldwide — 30 verified events."""
+    events = [
+        {"name": "2004 Indian Ocean Tsunami", "date": "2004-12-26", "wave_height_m": 30.0, "fatalities": 227898},
+        {"name": "2011 Tōhoku Tsunami", "date": "2011-03-11", "wave_height_m": 40.5, "fatalities": 19759},
+        {"name": "1960 Valdivia Tsunami", "date": "1960-05-22", "wave_height_m": 25.0, "fatalities": 6000},
+        {"name": "1964 Alaska Tsunami", "date": "1964-03-27", "wave_height_m": 67.0, "fatalities": 131},
+        {"name": "1883 Krakatoa Tsunami", "date": "1883-08-27", "wave_height_m": 37.0, "fatalities": 36417},
+        {"name": "1755 Lisbon Tsunami", "date": "1755-11-01", "wave_height_m": 20.0, "fatalities": 60000},
+        {"name": "1868 Arica Tsunami", "date": "1868-08-13", "wave_height_m": 21.0, "fatalities": 25000},
+        {"name": "1896 Meiji Sanriku Tsunami", "date": "1896-06-15", "wave_height_m": 38.2, "fatalities": 22066},
+        {"name": "1945 Makran Coast Tsunami", "date": "1945-11-28", "wave_height_m": 13.0, "fatalities": 4000},
+        {"name": "1941 Andaman Tsunami", "date": "1941-06-26", "wave_height_m": 1.5, "fatalities": 5000},
+        {"name": "2018 Sulawesi Tsunami", "date": "2018-09-28", "wave_height_m": 11.0, "fatalities": 4340},
+        {"name": "2018 Anak Krakatau Tsunami", "date": "2018-12-22", "wave_height_m": 5.0, "fatalities": 437},
+        {"name": "2005 Nias–Simeulue Tsunami", "date": "2005-03-28", "wave_height_m": 3.0, "fatalities": 1313},
+        {"name": "1958 Lituya Bay Mega-Tsunami", "date": "1958-07-09", "wave_height_m": 524.0, "fatalities": 5},
+        {"name": "1976 Moro Gulf Tsunami", "date": "1976-08-16", "wave_height_m": 9.0, "fatalities": 5000},
+        {"name": "1998 Papua New Guinea Tsunami", "date": "1998-07-17", "wave_height_m": 15.0, "fatalities": 2183},
+        {"name": "2009 Samoa Tsunami", "date": "2009-09-29", "wave_height_m": 14.0, "fatalities": 192},
+        {"name": "2010 Chile Tsunami", "date": "2010-02-27", "wave_height_m": 29.0, "fatalities": 525},
+        {"name": "1933 Shōwa Sanriku Tsunami", "date": "1933-03-02", "wave_height_m": 28.7, "fatalities": 3064},
+        {"name": "1946 Aleutian Tsunami", "date": "1946-04-01", "wave_height_m": 35.0, "fatalities": 165},
+        {"name": "1952 Kamchatka Tsunami", "date": "1952-11-04", "wave_height_m": 18.0, "fatalities": 2336},
+        {"name": "1992 Flores Island Tsunami", "date": "1992-12-12", "wave_height_m": 26.0, "fatalities": 2500},
+        {"name": "1993 Hokkaido Tsunami", "date": "1993-07-12", "wave_height_m": 31.7, "fatalities": 230},
+        {"name": "2006 Java Tsunami", "date": "2006-07-17", "wave_height_m": 7.0, "fatalities": 668},
+        {"name": "1929 Grand Banks Tsunami", "date": "1929-11-18", "wave_height_m": 13.0, "fatalities": 28},
+        {"name": "1960 Hilo Tsunami", "date": "1960-05-23", "wave_height_m": 10.7, "fatalities": 61},
+        {"name": "2007 Solomon Islands Tsunami", "date": "2007-04-01", "wave_height_m": 12.0, "fatalities": 52},
+        {"name": "1908 Messina Tsunami", "date": "1908-12-28", "wave_height_m": 12.0, "fatalities": 80000},
+        {"name": "1692 Port Royal Tsunami", "date": "1692-06-07", "wave_height_m": 2.0, "fatalities": 2000},
+        {"name": "2004 Sri Lanka Tsunami Impact", "date": "2004-12-26", "wave_height_m": 11.0, "fatalities": 35322},
+    ]
+    return {
+        "events": events,
+        "summary": {
+            "total": len(events),
+            "max_wave": max(e["wave_height_m"] for e in events),
+            "total_fatalities": sum(e["fatalities"] for e in events),
+            "period": "1692-2018",
+        }
+    }