Spaces:
Running
Running
| """Pre-process user queries before retrieval.""" | |
| from __future__ import annotations | |
| import re | |
| from rag_engine.utils.logger import get_logger | |
| logger = get_logger(__name__) | |
| _QUESTION_STARTERS = re.compile( | |
| r"^(what|is|are|does|do|can|will|how|when|where|why)\b", re.IGNORECASE | |
| ) | |
| _FLOOD_KEYWORDS = {"flood", "water", "storm", "hurricane"} | |
| _FIRE_KEYWORDS = {"fire", "smoke", "explosion", "burn"} | |
| _THEFT_KEYWORDS = {"theft", "stolen", "burglary", "robbery"} | |
| _DEDUCTIBLE_KEYWORDS = {"deductible", "excess", "out-of-pocket"} | |
| _LIMIT_KEYWORDS = {"limit", "maximum", "cap", "ceiling"} | |
| class QueryPreprocessor: | |
| """Clean and enrich user queries for retrieval.""" | |
| # ------------------------------------------------------------------ # | |
| # preprocess | |
| # ------------------------------------------------------------------ # | |
| def preprocess(self, query: str) -> str: | |
| """Normalise whitespace and append ``?`` to question-like queries.""" | |
| original = query | |
| result = query.strip() | |
| # Append "?" for question-like phrases that don't already end with one | |
| if not result.endswith("?") and _QUESTION_STARTERS.match(result): | |
| # Capitalise the first letter of question-like phrases | |
| if result: | |
| result = result[0].upper() + result[1:] | |
| result = result + "?" | |
| # Collapse multiple spaces | |
| result = re.sub(r"\s+", " ", result) | |
| logger.info("Query preprocessed: '%s' → '%s'", original, result) | |
| return result | |
| # ------------------------------------------------------------------ # | |
| # extract_filters | |
| # ------------------------------------------------------------------ # | |
| def extract_filters(self, query: str, policy_id: str | None = None) -> dict: | |
| """Build a Supabase metadata filter dict from *query* keywords.""" | |
| filters: dict = {} | |
| if policy_id is not None: | |
| filters["policy_id"] = policy_id | |
| lower = query.lower() | |
| words = set(lower.split()) | |
| if words & _FLOOD_KEYWORDS: | |
| filters["coverage_category"] = "flood" | |
| if words & _FIRE_KEYWORDS: | |
| filters["coverage_category"] = "fire" | |
| if words & _THEFT_KEYWORDS: | |
| filters["coverage_category"] = "theft" | |
| if words & _DEDUCTIBLE_KEYWORDS: | |
| filters["deductible_related"] = True | |
| if words & _LIMIT_KEYWORDS: | |
| filters["limit_related"] = True | |
| logger.info("Extracted filters: %s", filters) | |
| return filters | |