Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| """Normalize EM-DAT location field into a single short search token. | |
| The EM-DAT `location` field is inconsistent — sometimes a single city | |
| ("Valencia Province"), sometimes a 10-item list of tiny administrative units | |
| ("Gorenjska, Goriska, Jugovzodna Slovenija, Koroska, ..."), sometimes null. | |
| Neither raw form is directly usable in a news search query. | |
| This module wraps a single LLM call that returns ONE token suitable for | |
| dropping into a GDELT query. When the location is too diffuse or absent, | |
| it falls back to the country name. | |
| """ | |
| from __future__ import annotations | |
| import logging | |
| import re | |
| from src.llm.client import LLMClient | |
| from src.models.schemas import FloodEvent | |
| logger = logging.getLogger(__name__) | |
| def extract_location_token( | |
| event: FloodEvent, llm_client: LLMClient, config: dict | |
| ) -> str: | |
| """Return a single place token for use in news queries. | |
| Falls back to the country name on any LLM failure or empty output. | |
| """ | |
| variables = { | |
| "country": event.country, | |
| "location": event.location or "null", | |
| } | |
| try: | |
| response = llm_client.call_with_config( | |
| prompt_key="location_extract", | |
| knowledge_key="search", # reuse expert_search as knowledge placeholder | |
| variables=variables, | |
| config=config, | |
| ) | |
| except Exception as e: | |
| logger.warning(f"Location extraction failed for {event.event_id}: {e}") | |
| return event.country | |
| # Take first non-empty line, strip quotes/punctuation | |
| token = "" | |
| for line in response.splitlines(): | |
| s = line.strip().strip('"').strip("'").strip() | |
| if s: | |
| token = s | |
| break | |
| # Sanity checks: must be short, alphabetic-ish, not empty | |
| if not token or len(token) > 50 or not re.search(r"[A-Za-z]", token): | |
| logger.warning( | |
| f"Location extraction produced invalid token '{token}' for {event.event_id}; " | |
| f"falling back to country" | |
| ) | |
| return event.country | |
| return token | |