Spaces:
Running
Running
feat: implement aggressive temporal weighting and 14-day default window for news relevance
Browse files
src/core/use_cases/rag_chat_use_case.py
CHANGED
|
@@ -271,13 +271,18 @@ JSON:"""
|
|
| 271 |
# ββ Regex fallback for temporal + source βββββββββββββββββββββββββββββ
|
| 272 |
query_lower = query.lower()
|
| 273 |
|
| 274 |
-
# Temporal fallback
|
| 275 |
-
if any(w in query_lower for w in ("today", "tonight", "now", "breaking", "
|
| 276 |
result["days_back"] = 1
|
| 277 |
elif "yesterday" in query_lower:
|
| 278 |
result["days_back"] = 2
|
| 279 |
-
elif any(w in query_lower for w in ("this week", "recently", "past few days")):
|
| 280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
elif "this month" in query_lower:
|
| 282 |
result["days_back"] = 30
|
| 283 |
|
|
@@ -330,7 +335,12 @@ JSON:"""
|
|
| 330 |
try:
|
| 331 |
pub_date = datetime.fromisoformat(pub_at.replace("Z", "+00:00"))
|
| 332 |
days_old = (now.replace(tzinfo=None) - pub_date.replace(tzinfo=None)).days
|
| 333 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
except:
|
| 335 |
pass
|
| 336 |
|
|
@@ -408,6 +418,11 @@ JSON:"""
|
|
| 408 |
# Sanitize placeholder filter values from API
|
| 409 |
actual_source_filter = None if source_filter == "string" else source_filter
|
| 410 |
actual_language_filter = None if (not language_filter or language_filter == "string") else language_filter
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
|
| 412 |
# ββ HYBRID SEARCH INTEGRATION ββββββββββββββββββββββββββββββββββββββββββ
|
| 413 |
# Check if hybrid search is enabled and orchestrator is available
|
|
|
|
| 271 |
# ββ Regex fallback for temporal + source βββββββββββββββββββββββββββββ
|
| 272 |
query_lower = query.lower()
|
| 273 |
|
| 274 |
+
# Temporal fallback - more aggressive keywords
|
| 275 |
+
if any(w in query_lower for w in ("today", "tonight", "now", "breaking", "latest", "current", "update")):
|
| 276 |
result["days_back"] = 1
|
| 277 |
elif "yesterday" in query_lower:
|
| 278 |
result["days_back"] = 2
|
| 279 |
+
elif any(w in query_lower for w in ("this week", "recently", "past few days", "days ago")):
|
| 280 |
+
# Check for "X days ago"
|
| 281 |
+
days_match = re.search(r'(\d+)\s+days?\s+ago', query_lower)
|
| 282 |
+
if days_match:
|
| 283 |
+
result["days_back"] = int(days_match.group(1)) + 1
|
| 284 |
+
else:
|
| 285 |
+
result["days_back"] = 7
|
| 286 |
elif "this month" in query_lower:
|
| 287 |
result["days_back"] = 30
|
| 288 |
|
|
|
|
| 335 |
try:
|
| 336 |
pub_date = datetime.fromisoformat(pub_at.replace("Z", "+00:00"))
|
| 337 |
days_old = (now.replace(tzinfo=None) - pub_date.replace(tzinfo=None)).days
|
| 338 |
+
# Aggressive news freshness multiplier
|
| 339 |
+
if days_old <= 0: score_multiplier = 1.05 # Boost today
|
| 340 |
+
elif days_old == 1: score_multiplier = 1.0 # Normal yesterday
|
| 341 |
+
elif days_old < 7: score_multiplier = 0.85 # Slight penalty this week
|
| 342 |
+
elif days_old < 30: score_multiplier = 0.6 # Heavy penalty this month
|
| 343 |
+
else: score_multiplier = 0.3 # Archive data
|
| 344 |
except:
|
| 345 |
pass
|
| 346 |
|
|
|
|
| 418 |
# Sanitize placeholder filter values from API
|
| 419 |
actual_source_filter = None if source_filter == "string" else source_filter
|
| 420 |
actual_language_filter = None if (not language_filter or language_filter == "string") else language_filter
|
| 421 |
+
|
| 422 |
+
# Default temporal window (last 14 days) if not specified β avoids ancient data
|
| 423 |
+
if days_back is None:
|
| 424 |
+
days_back = 14
|
| 425 |
+
logger.info(f"[RAG] No temporal filter provided β defaulting to 14 days for relevance")
|
| 426 |
|
| 427 |
# ββ HYBRID SEARCH INTEGRATION ββββββββββββββββββββββββββββββββββββββββββ
|
| 428 |
# Check if hybrid search is enabled and orchestrator is available
|