from date_utils import parse_date import re import json import logging from groq_llm import client as groq_client KNOWN_CYCLONES = ["michaung", "mandous", "nivar", "gaja", "vardah", "thane", "nisha", "fani", "amphan", "hudhud", "phailin", "laila", "jal"] KNOWN_LOCATIONS = ["chennai", "mumbai", "kolkata", "vizag", "visakhapatnam", "bay of bengal", "arabian sea", "tamil nadu", "andhra pradesh", "odisha", "west bengal", "india", "puducherry", "cuddalore", "nagapattinam", "mahabalipuram"] def _normalize_query(q: str) -> str: typo_map = { r"\bpervious\b": "previous", r"\bprevios\b": "previous", r"\bpreviuos\b": "previous", r"\bprevioues\b": "previous", r"\bprevius\b": "previous", r"\bprevioius\b": "previous", r"\bhistorcal\b": "historical", r"\bhistoricle\b": "historical", r"\byesterady\b": "yesterday", r"\byestarday\b": "yesterday", } for pattern, replacement in typo_map.items(): q = re.sub(pattern, replacement, q) return q def _expand_disaster_intents(intents: list) -> list: """If disaster intent detected, always include weather, cyclone, earthquake.""" if "disaster" in intents: for extra in ["weather", "cyclone", "earthquake", "tsunami"]: if extra not in intents: intents.append(extra) return intents def classify_query(query: str): q = _normalize_query(query.lower().strip()) intents = [] past_kw = ["last year", "previous", "history", "historical", "ago", "past", "same date", "same day", "this day", "yesterday", "back in", "was", "were", "happened", "occurred", "hit", "struck", "recent"] future_kw = ["predict", "prediction", "next", "forecast", "tomorrow", "coming", "upcoming", "expect", "will", "probability", "chance", "future", "model", "ml", "ai"] is_past = any(re.search(rf"\b{k}\b", q) for k in past_kw) is_future = any(re.search(rf"\b{k}\b", q) for k in future_kw) current_year = __import__("datetime").datetime.now().year past_year_match = re.search(r'\b(19\d{2}|20\d{2})\b', q) if past_year_match and int(past_year_match.group(1)) < current_year: is_past = True is_future = False weather_kw = ["weather", "temperature", "temp", "hot", "cold", "rain", "wind", "humidity", "climate", "heat", "sunny", "cloudy", "precipitation", "pressure", "detail", "condition", "report"] if any(re.search(rf"\b{k}\b", q) for k in weather_kw): if is_past: intents.append("weather_history") elif is_future: intents.append("prediction") else: intents.append("weather") cyclone_kw = ["cyclone", "hurricane", "typhoon", "storm", "wind storm", "tropical", "bay of bengal", "vardah", "nivar", "gaja", "mandous", "michaung", "thane", "nisha", "fani", "amphan", "hudhud"] if any(re.search(rf"\b{k}\b", q) for k in cyclone_kw): if is_future: intents.append("cyclone_prediction") else: intents.append("cyclone") quake_kw = ["earthquake", "quake", "seismic", "magnitude", "richter", "tremor", "tectonic", "fault", "aftershock", "usgs"] if any(re.search(rf"\b{k}\b", q) for k in quake_kw): intents.append("earthquake") tsunami_kw = ["tsunami", "tidal wave", "ocean wave", "indian ocean", "sumatra", "krakatoa", "sulawesi", "wave height"] if any(re.search(rf"\b{k}\b", q) for k in tsunami_kw): intents.append("tsunami") if not intents and is_future: intents.append("prediction") disaster_kw = ["disaster", "catastrophe", "calamity", "danger", "risk", "overview", "summary", "all", "report", "threat", "alert"] if any(re.search(rf"\b{k}\b", q) for k in disaster_kw): intents.append("disaster") if "compare" in q or "difference" in q or re.search(r"\bvs\b", q) or "versus" in q: intents.append("weather_comparison") is_range = bool(re.search(r'\b(19\d{2}|20\d{2})\s*(?:to|-|and)\s*(19\d{2}|20\d{2})\b', q)) if is_range and "weather_comparison" not in intents: intents.append("weather_comparison") if not intents: intents.append("weather") return _expand_disaster_intents(list(set(intents))) def extract_query_context(query: str): q = _normalize_query(query.lower().strip()) cyclone_name = None for name in KNOWN_CYCLONES: if name in q: cyclone_name = name break year = None m = re.search(r'(? dict: system_prompt = """You are an intent classifier for a climate and disaster tracking app. Given a user query, you must extract their intent and basic context. The query may contain severe typos or bad grammar. You must figure out what they mean. Allowed intents: weather, weather_history, weather_comparison, prediction, cyclone, cyclone_history, cyclone_prediction, earthquake, tsunami, disaster. Output exactly valid JSON in this format: { "intents": ["list", "of", "intents"], "context": { "cyclone_name": null, "year": null, "location": null, "wants_recent": false, "wants_comparison": false } } """ try: response = groq_client.chat.completions.create( model="llama-3.3-70b-versatile", messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": f"Query: {query}"} ], response_format={"type": "json_object"}, temperature=0.1, max_tokens=200 ) result = json.loads(response.choices[0].message.content) if "intents" not in result or "context" not in result: raise ValueError("LLM returned malformed JSON structure") return result except Exception as e: logging.error(f"LLM extraction failed: {e}") return None def plan_query(query: str): """ Create a deterministic execution plan, using LLM for typo-tolerant intent extraction. Falls back to regex parsing if the LLM fails. """ # 1. Try LLM Extraction First llm_result = extract_intent_with_llm(query) if llm_result: intents = llm_result.get("intents", []) context = llm_result.get("context", {}) # Safety fallback if LLM returns empty intents if not intents: intents = classify_query(query) else: # Always apply disaster expansion even for LLM results intents = _expand_disaster_intents(intents) else: # 2. Fallback to Regex logging.warning("Falling back to regex intent classification") intents = classify_query(query) context = extract_query_context(query) date_val = parse_date(query) # 3. Select the primary intent primary_intent = "weather" if "weather_comparison" in intents: primary_intent = "weather_comparison" elif "disaster" in intents: primary_intent = "disaster" elif "cyclone" in intents or "cyclone_history" in intents: primary_intent = "cyclone_history" elif "weather_history" in intents: primary_intent = "weather_history" else: primary_intent = intents[0] if intents else "unknown" return { "intent": primary_intent, "all_intents": intents, "date": date_val, "query": query, "context": context }