IntegraChat / backend /api /services /query_expander.py
nothingworry's picture
feat: add caching, query expansion, improved streaming, and enhanced error handling
ddc5c21
raw
history blame
3.98 kB
# =============================================================
# File: backend/api/services/query_expander.py
# =============================================================
"""
Query expansion and disambiguation service.
Uses LLM to expand ambiguous queries and improve search results.
"""
import re
from typing import List, Dict, Any, Optional
from .llm_client import LLMClient
class QueryExpander:
"""Expands and disambiguates queries for better search results."""
def __init__(self, llm_client: LLMClient):
self.llm = llm_client
async def expand_ambiguous_query(self, query: str, context: Optional[str] = None) -> List[str]:
"""
Generate multiple query variations for ambiguous terms.
Args:
query: Original query
context: Optional context to help disambiguation
Returns:
List of expanded query variations
"""
# Check if query is ambiguous (short terms, common abbreviations)
ambiguous_patterns = [
r'\b(al|ai|ml|dl|nlp|api|ui|ux|db|sql|js|ts|py|go|rs)\b',
r'\b[a-z]{1,2}\b' # Very short words
]
is_ambiguous = any(re.search(p, query.lower()) for p in ambiguous_patterns)
if not is_ambiguous:
return [query] # Return original if not ambiguous
# Use LLM to generate query variations
prompt = f"""Given the user query: "{query}"
Generate 3-5 alternative search queries that could help find relevant information.
Consider different interpretations, synonyms, and related terms.
{f"Context: {context}" if context else ""}
Return only the queries, one per line, without numbering or bullets:"""
try:
response = await self.llm.simple_call(prompt, temperature=0.3)
# Parse response into list of queries
queries = [
line.strip()
for line in response.split('\n')
if line.strip() and not line.strip().startswith(('#', '-', '*', '1.', '2.', '3.'))
]
# Include original query
queries.insert(0, query)
return queries[:5] # Limit to 5 variations
except Exception:
# Fallback: return original query
return [query]
def expand_news_query(self, query: str) -> List[str]:
"""
Generate multiple variations for news queries.
Args:
query: News query
Returns:
List of query variations
"""
variations = [query]
# Add time-based variations
if "latest" not in query.lower():
variations.append(f"latest {query}")
if "news" not in query.lower():
variations.append(f"{query} news")
if "breaking" not in query.lower() and "latest" in query.lower():
variations.append(query.replace("latest", "breaking"))
# Add date-specific variations
variations.append(f"{query} 2024")
variations.append(f"{query} 2025")
return variations[:5] # Limit to 5
def expand_short_query(self, query: str) -> str:
"""
Expand very short queries with common expansions.
Args:
query: Short query
Returns:
Expanded query
"""
query_lower = query.lower()
# Common abbreviations
expansions = {
"al": "artificial intelligence AI",
"ai": "artificial intelligence",
"ml": "machine learning",
"dl": "deep learning",
"nlp": "natural language processing"
}
for abbrev, expansion in expansions.items():
if abbrev in query_lower and len(query.split()) <= 3:
return query.replace(abbrev, expansion, 1)
return query