Lancer / app /temporal /intent_detector.py
Madras1's picture
Upload 36 files
7fbd9ac verified
"""Temporal intent detection for search queries.
Analyzes queries to determine if they require fresh/current information
or if historical information is acceptable.
"""
import re
from datetime import datetime
from typing import Literal
def _get_dynamic_years() -> set[str]:
"""Get current and previous year dynamically."""
current_year = datetime.now().year
return {str(current_year), str(current_year - 1)}
# Keywords that strongly indicate need for current information
FRESHNESS_KEYWORDS = {
# English
"latest", "newest", "recent", "current", "today", "now",
"this week", "this month", "this year", "breaking",
"update", "updates", "new", "just", "announced",
*_get_dynamic_years(), # Dynamic years
# Portuguese
"último", "últimos", "recente", "atual", "hoje", "agora",
"essa semana", "esse mês", "esse ano", "novidade",
"atualização", "novo", "novos", "anunciado",
}
# Keywords that indicate historical queries (less urgent freshness)
HISTORICAL_KEYWORDS = {
"history", "historical", "origin", "origins", "invented",
"founded", "first", "original", "classic", "traditional",
"história", "histórico", "origem", "inventado", "fundado",
}
# Entity types that typically require fresh information
FRESH_ENTITY_PATTERNS = [
r"\b(?:price|prices|stock|stocks|market)\b", # Financial
r"\b(?:weather|forecast|temperature)\b", # Weather
r"\b(?:news|headlines|breaking)\b", # News
r"\b(?:score|scores|game|match|vs)\b", # Sports
r"\b(?:version|release|update|patch)\b", # Software
r"\b(?:gpt-?\d|claude|gemini|llama|mistral)\b", # AI models
]
def detect_temporal_intent(
query: str,
) -> tuple[Literal["current", "historical", "neutral"], float]:
"""
Detect the temporal intent of a search query.
Args:
query: The search query string
Returns:
Tuple of (intent, urgency) where:
- intent: "current", "historical", or "neutral"
- urgency: float 0-1 indicating how important freshness is
"""
query_lower = query.lower()
# Count freshness indicators
freshness_score = 0.0
historical_score = 0.0
# Check for freshness keywords
for keyword in FRESHNESS_KEYWORDS:
if keyword in query_lower:
freshness_score += 0.3
# Check for historical keywords
for keyword in HISTORICAL_KEYWORDS:
if keyword in query_lower:
historical_score += 0.3
# Check for fresh entity patterns
for pattern in FRESH_ENTITY_PATTERNS:
if re.search(pattern, query_lower):
freshness_score += 0.2
# Question words that often imply current info needed
if re.search(r"\b(?:what is|who is|how to|where is)\b", query_lower):
freshness_score += 0.1
# Superlatives often need current info
if re.search(r"\b(?:best|top|most|fastest|cheapest)\b", query_lower):
freshness_score += 0.15
# Normalize scores
freshness_score = min(freshness_score, 1.0)
historical_score = min(historical_score, 1.0)
# Determine intent
if freshness_score > historical_score and freshness_score > 0.2:
intent = "current"
urgency = min(0.3 + freshness_score, 1.0)
elif historical_score > freshness_score and historical_score > 0.2:
intent = "historical"
urgency = max(0.2 - historical_score * 0.1, 0.1)
else:
intent = "neutral"
urgency = 0.5
return intent, urgency