# ============================================================= # File: backend/api/services/query_expander.py # ============================================================= """ Query expansion and disambiguation service. Uses LLM to expand ambiguous queries and improve search results. """ import re from typing import List, Dict, Any, Optional from .llm_client import LLMClient class QueryExpander: """Expands and disambiguates queries for better search results.""" def __init__(self, llm_client: LLMClient): self.llm = llm_client async def expand_ambiguous_query(self, query: str, context: Optional[str] = None) -> List[str]: """ Generate multiple query variations for ambiguous terms. Args: query: Original query context: Optional context to help disambiguation Returns: List of expanded query variations """ # Check if query is ambiguous (short terms, common abbreviations) ambiguous_patterns = [ r'\b(al|ai|ml|dl|nlp|api|ui|ux|db|sql|js|ts|py|go|rs)\b', r'\b[a-z]{1,2}\b' # Very short words ] is_ambiguous = any(re.search(p, query.lower()) for p in ambiguous_patterns) if not is_ambiguous: return [query] # Return original if not ambiguous # Use LLM to generate query variations prompt = f"""Given the user query: "{query}" Generate 3-5 alternative search queries that could help find relevant information. Consider different interpretations, synonyms, and related terms. {f"Context: {context}" if context else ""} Return only the queries, one per line, without numbering or bullets:""" try: response = await self.llm.simple_call(prompt, temperature=0.3) # Parse response into list of queries queries = [ line.strip() for line in response.split('\n') if line.strip() and not line.strip().startswith(('#', '-', '*', '1.', '2.', '3.')) ] # Include original query queries.insert(0, query) return queries[:5] # Limit to 5 variations except Exception: # Fallback: return original query return [query] def expand_news_query(self, query: str) -> List[str]: """ Generate multiple variations for news queries. Args: query: News query Returns: List of query variations """ variations = [query] # Add time-based variations if "latest" not in query.lower(): variations.append(f"latest {query}") if "news" not in query.lower(): variations.append(f"{query} news") if "breaking" not in query.lower() and "latest" in query.lower(): variations.append(query.replace("latest", "breaking")) # Add date-specific variations variations.append(f"{query} 2024") variations.append(f"{query} 2025") return variations[:5] # Limit to 5 def expand_short_query(self, query: str) -> str: """ Expand very short queries with common expansions. Args: query: Short query Returns: Expanded query """ query_lower = query.lower() # Common abbreviations expansions = { "al": "artificial intelligence AI", "ai": "artificial intelligence", "ml": "machine learning", "dl": "deep learning", "nlp": "natural language processing" } for abbrev, expansion in expansions.items(): if abbrev in query_lower and len(query.split()) <= 3: return query.replace(abbrev, expansion, 1) return query