Spaces:
Sleeping
Sleeping
| # ============================================================= | |
| # File: backend/api/services/query_expander.py | |
| # ============================================================= | |
| """ | |
| Query expansion and disambiguation service. | |
| Uses LLM to expand ambiguous queries and improve search results. | |
| """ | |
| import re | |
| from typing import List, Dict, Any, Optional | |
| from .llm_client import LLMClient | |
| class QueryExpander: | |
| """Expands and disambiguates queries for better search results.""" | |
| def __init__(self, llm_client: LLMClient): | |
| self.llm = llm_client | |
| async def expand_ambiguous_query(self, query: str, context: Optional[str] = None) -> List[str]: | |
| """ | |
| Generate multiple query variations for ambiguous terms. | |
| Args: | |
| query: Original query | |
| context: Optional context to help disambiguation | |
| Returns: | |
| List of expanded query variations | |
| """ | |
| # Check if query is ambiguous (short terms, common abbreviations) | |
| ambiguous_patterns = [ | |
| r'\b(al|ai|ml|dl|nlp|api|ui|ux|db|sql|js|ts|py|go|rs)\b', | |
| r'\b[a-z]{1,2}\b' # Very short words | |
| ] | |
| is_ambiguous = any(re.search(p, query.lower()) for p in ambiguous_patterns) | |
| if not is_ambiguous: | |
| return [query] # Return original if not ambiguous | |
| # Use LLM to generate query variations | |
| prompt = f"""Given the user query: "{query}" | |
| Generate 3-5 alternative search queries that could help find relevant information. | |
| Consider different interpretations, synonyms, and related terms. | |
| {f"Context: {context}" if context else ""} | |
| Return only the queries, one per line, without numbering or bullets:""" | |
| try: | |
| response = await self.llm.simple_call(prompt, temperature=0.3) | |
| # Parse response into list of queries | |
| queries = [ | |
| line.strip() | |
| for line in response.split('\n') | |
| if line.strip() and not line.strip().startswith(('#', '-', '*', '1.', '2.', '3.')) | |
| ] | |
| # Include original query | |
| queries.insert(0, query) | |
| return queries[:5] # Limit to 5 variations | |
| except Exception: | |
| # Fallback: return original query | |
| return [query] | |
| def expand_news_query(self, query: str) -> List[str]: | |
| """ | |
| Generate multiple variations for news queries. | |
| Args: | |
| query: News query | |
| Returns: | |
| List of query variations | |
| """ | |
| variations = [query] | |
| # Add time-based variations | |
| if "latest" not in query.lower(): | |
| variations.append(f"latest {query}") | |
| if "news" not in query.lower(): | |
| variations.append(f"{query} news") | |
| if "breaking" not in query.lower() and "latest" in query.lower(): | |
| variations.append(query.replace("latest", "breaking")) | |
| # Add date-specific variations | |
| variations.append(f"{query} 2024") | |
| variations.append(f"{query} 2025") | |
| return variations[:5] # Limit to 5 | |
| def expand_short_query(self, query: str) -> str: | |
| """ | |
| Expand very short queries with common expansions. | |
| Args: | |
| query: Short query | |
| Returns: | |
| Expanded query | |
| """ | |
| query_lower = query.lower() | |
| # Common abbreviations | |
| expansions = { | |
| "al": "artificial intelligence AI", | |
| "ai": "artificial intelligence", | |
| "ml": "machine learning", | |
| "dl": "deep learning", | |
| "nlp": "natural language processing" | |
| } | |
| for abbrev, expansion in expansions.items(): | |
| if abbrev in query_lower and len(query.split()) <= 3: | |
| return query.replace(abbrev, expansion, 1) | |
| return query | |