Spaces:

Madras1
/

Lancer

Sleeping

App Files Files Community

Madras1 commited on Jan 6

Commit

7fbd9ac

verified ·

1 Parent(s): 9d1dc1d

Upload 36 files

Browse files

Files changed (6) hide show

app/agents/llm_client.py +122 -32
app/agents/synthesizer.py +59 -13
app/api/routes/search.py +109 -1
app/reranking/embeddings.py +102 -0
app/reranking/pipeline.py +57 -29
app/temporal/intent_detector.py +9 -1

app/agents/llm_client.py CHANGED Viewed

@@ -5,12 +5,24 @@ Supports Groq and OpenRouter for LLM inference.
 import httpx
 import json
-from typing import Optional
 import asyncio
 from app.config import get_settings
 async def generate_completion(
     messages: list[dict],
     model: Optional[str] = None,
@@ -30,45 +42,65 @@ async def generate_completion(
         raise ValueError(f"Unknown LLM provider: {provider}")
 async def _call_groq(
     messages: list[dict],
     model: str,
     temperature: float,
     max_tokens: int,
 ) -> str:
-    """Call Groq API."""
     settings = get_settings()
     if not settings.groq_api_key:
         raise ValueError("GROQ_API_KEY not configured")
-    async with httpx.AsyncClient(timeout=60.0) as client:
-        response = await client.post(
-            "https://api.groq.com/openai/v1/chat/completions",
-            headers={
-                "Authorization": f"Bearer {settings.groq_api_key}",
-                "Content-Type": "application/json",
-            },
-            json={
-                "model": model,
-                "messages": messages,
-                "temperature": temperature,
-                "max_tokens": max_tokens,
-            },
-        )
-        response.raise_for_status()
-        data = response.json()
-    return data["choices"][0]["message"]["content"]
 async def _call_openrouter(
     messages: list[dict],
     model: str,
     temperature: float,
     max_tokens: int,
 ) -> str:
-    """Call OpenRouter API - following official docs exactly."""
     settings = get_settings()
     if not settings.openrouter_api_key:
@@ -81,22 +113,80 @@ async def _call_openrouter(
         "X-Title": "Lancer Search API",
     }
-    # Payload exactly like official docs
     payload = {
         "model": model,
         "messages": messages,
     }
     async with httpx.AsyncClient(timeout=120.0) as client:
-        response = await client.post(
             "https://openrouter.ai/api/v1/chat/completions",
             headers=headers,
-            content=json.dumps(payload),  # Using content= with json.dumps like official docs
-        )
-        if response.status_code != 200:
-            error_text = response.text
-            raise ValueError(f"OpenRouter error {response.status_code}: {error_text}")
-        data = response.json()
-        return data["choices"][0]["message"]["content"]

 import httpx
 import json
+from typing import Optional, AsyncIterator
 import asyncio
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_exponential,
+    retry_if_exception_type,
+)
 from app.config import get_settings
+class RetryableError(Exception):
+    """Error that should trigger a retry."""
+    pass
 async def generate_completion(
     messages: list[dict],
     model: Optional[str] = None,
         raise ValueError(f"Unknown LLM provider: {provider}")
+@retry(
+    stop=stop_after_attempt(3),
+    wait=wait_exponential(multiplier=1, min=2, max=10),
+    retry=retry_if_exception_type(RetryableError),
+    reraise=True,
+)
 async def _call_groq(
     messages: list[dict],
     model: str,
     temperature: float,
     max_tokens: int,
 ) -> str:
+    """Call Groq API with retry logic."""
     settings = get_settings()
     if not settings.groq_api_key:
         raise ValueError("GROQ_API_KEY not configured")
+    try:
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            response = await client.post(
+                "https://api.groq.com/openai/v1/chat/completions",
+                headers={
+                    "Authorization": f"Bearer {settings.groq_api_key}",
+                    "Content-Type": "application/json",
+                },
+                json={
+                    "model": model,
+                    "messages": messages,
+                    "temperature": temperature,
+                    "max_tokens": max_tokens,
+                },
+            )
+            # Retry on rate limit or server errors
+            if response.status_code in (429, 502, 503, 504):
+                raise RetryableError(f"Groq error {response.status_code}")
+            response.raise_for_status()
+            data = response.json()
+        return data["choices"][0]["message"]["content"]
+    except httpx.TimeoutException as e:
+        raise RetryableError(f"Groq timeout: {e}")
+@retry(
+    stop=stop_after_attempt(3),
+    wait=wait_exponential(multiplier=1, min=2, max=10),
+    retry=retry_if_exception_type(RetryableError),
+    reraise=True,
+)
 async def _call_openrouter(
     messages: list[dict],
     model: str,
     temperature: float,
     max_tokens: int,
 ) -> str:
+    """Call OpenRouter API with retry logic."""
     settings = get_settings()
     if not settings.openrouter_api_key:
         "X-Title": "Lancer Search API",
     }
     payload = {
         "model": model,
         "messages": messages,
     }
+    try:
+        async with httpx.AsyncClient(timeout=120.0) as client:
+            response = await client.post(
+                "https://openrouter.ai/api/v1/chat/completions",
+                headers=headers,
+                content=json.dumps(payload),
+            )
+            # Retry on rate limit or server errors
+            if response.status_code in (429, 502, 503, 504):
+                raise RetryableError(f"OpenRouter error {response.status_code}")
+            if response.status_code != 200:
+                error_text = response.text
+                raise ValueError(f"OpenRouter error {response.status_code}: {error_text}")
+            data = response.json()
+            return data["choices"][0]["message"]["content"]
+    except httpx.TimeoutException as e:
+        raise RetryableError(f"OpenRouter timeout: {e}")
+async def generate_completion_stream(
+    messages: list[dict],
+    model: Optional[str] = None,
+    temperature: float = 0.3,
+    max_tokens: int = 2048,
+) -> AsyncIterator[str]:
+    """Generate a streaming completion using OpenRouter."""
+    settings = get_settings()
+    model = model or settings.llm_model
+    if not settings.openrouter_api_key:
+        raise ValueError("OPENROUTER_API_KEY not configured")
+    headers = {
+        "Authorization": f"Bearer {settings.openrouter_api_key}",
+        "Content-Type": "application/json",
+        "HTTP-Referer": "https://madras1-lancer.hf.space",
+        "X-Title": "Lancer Search API",
+    }
+    payload = {
+        "model": model,
+        "messages": messages,
+        "stream": True,
+    }
     async with httpx.AsyncClient(timeout=120.0) as client:
+        async with client.stream(
+            "POST",
             "https://openrouter.ai/api/v1/chat/completions",
             headers=headers,
+            content=json.dumps(payload),
+        ) as response:
+            if response.status_code != 200:
+                error_text = await response.aread()
+                raise ValueError(f"OpenRouter streaming error {response.status_code}: {error_text}")
+            async for line in response.aiter_lines():
+                if line.startswith("data: "):
+                    data_str = line[6:]
+                    if data_str.strip() == "[DONE]":
+                        break
+                    try:
+                        data = json.loads(data_str)
+                        delta = data.get("choices", [{}])[0].get("delta", {})
+                        content = delta.get("content", "")
+                        if content:
+                            yield content
+                    except json.JSONDecodeError:
+                        continue

app/agents/synthesizer.py CHANGED Viewed

@@ -4,10 +4,10 @@ Generates a coherent answer from search results with citations.
 """
 from datetime import datetime
-from typing import Optional
 from app.api.schemas import SearchResult, TemporalContext, Citation
-from app.agents.llm_client import generate_completion
 SYNTHESIS_PROMPT = """You are a research assistant that synthesizes information from search results.
@@ -54,6 +54,57 @@ async def synthesize_answer(
     if not results:
         return "No results found to synthesize an answer.", []
     # Format results for the prompt
     formatted_results = format_results_for_prompt(results[:10])  # Top 10 only
@@ -83,18 +134,14 @@ async def synthesize_answer(
         formatted_results=formatted_results,
     )
-    messages = [
         {"role": "system", "content": "You are a helpful research assistant."},
         {"role": "user", "content": prompt},
     ]
-    try:
-        answer = await generate_completion(messages, temperature=0.3)
-    except Exception as e:
-        # Fallback: return a simple summary without LLM
-        answer = f"Error generating synthesis: {e}. Please review the search results directly."
-    # Build citations list
     citations = []
     for i, result in enumerate(results[:10], 1):
         citations.append(
@@ -104,8 +151,7 @@ async def synthesize_answer(
                 title=result.title,
             )
         )
-    return answer, citations
 def format_results_for_prompt(results: list[SearchResult]) -> str:

 """
 from datetime import datetime
+from typing import Optional, AsyncIterator
 from app.api.schemas import SearchResult, TemporalContext, Citation
+from app.agents.llm_client import generate_completion, generate_completion_stream
 SYNTHESIS_PROMPT = """You are a research assistant that synthesizes information from search results.
     if not results:
         return "No results found to synthesize an answer.", []
+    messages = _build_messages(query, results, temporal_context)
+    try:
+        answer = await generate_completion(messages, temperature=0.3)
+    except Exception as e:
+        # Fallback: return a simple summary without LLM
+        answer = f"Error generating synthesis: {e}. Please review the search results directly."
+    # Build citations list
+    citations = _build_citations(results)
+    return answer, citations
+async def synthesize_answer_stream(
+    query: str,
+    results: list[SearchResult],
+    temporal_context: Optional[TemporalContext] = None,
+) -> AsyncIterator[str]:
+    """
+    Synthesize an answer with streaming output.
+    Yields chunks of the answer as they are generated.
+    Args:
+        query: Original search query
+        results: List of search results to synthesize from
+        temporal_context: Temporal analysis context
+    Yields:
+        Chunks of the answer text
+    """
+    if not results:
+        yield "No results found to synthesize an answer."
+        return
+    messages = _build_messages(query, results, temporal_context)
+    try:
+        async for chunk in generate_completion_stream(messages, temperature=0.3):
+            yield chunk
+    except Exception as e:
+        yield f"Error generating synthesis: {e}. Please review the search results directly."
+def _build_messages(
+    query: str,
+    results: list[SearchResult],
+    temporal_context: Optional[TemporalContext] = None,
+) -> list[dict]:
+    """Build messages for LLM prompt."""
     # Format results for the prompt
     formatted_results = format_results_for_prompt(results[:10])  # Top 10 only
         formatted_results=formatted_results,
     )
+    return [
         {"role": "system", "content": "You are a helpful research assistant."},
         {"role": "user", "content": prompt},
     ]
+def _build_citations(results: list[SearchResult]) -> list[Citation]:
+    """Build citations list from results."""
     citations = []
     for i, result in enumerate(results[:10], 1):
         citations.append(
                 title=result.title,
             )
         )
+    return citations
 def format_results_for_prompt(results: list[SearchResult]) -> str:

app/api/routes/search.py CHANGED Viewed

@@ -1,9 +1,11 @@
 """Search API routes."""
 import time
 from datetime import datetime
 from fastapi import APIRouter, HTTPException
 from app.api.schemas import (
     SearchRequest,
@@ -19,7 +21,7 @@ from app.temporal.freshness_scorer import calculate_freshness_score
 from app.sources.tavily import search_tavily
 from app.sources.duckduckgo import search_duckduckgo
 from app.reranking.pipeline import rerank_results
-from app.agents.synthesizer import synthesize_answer
 router = APIRouter()
@@ -144,3 +146,109 @@ async def search_raw(request: SearchRequest) -> SearchResponse:
     """Fast search without answer synthesis."""
     request.include_answer = False
     return await search(request)

 """Search API routes."""
+import json
 import time
 from datetime import datetime
 from fastapi import APIRouter, HTTPException
+from fastapi.responses import StreamingResponse
 from app.api.schemas import (
     SearchRequest,
 from app.sources.tavily import search_tavily
 from app.sources.duckduckgo import search_duckduckgo
 from app.reranking.pipeline import rerank_results
+from app.agents.synthesizer import synthesize_answer, synthesize_answer_stream
 router = APIRouter()
     """Fast search without answer synthesis."""
     request.include_answer = False
     return await search(request)
+@router.post(
+    "/search/stream",
+    summary="Search with streaming synthesis",
+    description="Perform a search and stream the AI-synthesized answer in real-time using SSE.",
+)
+async def search_stream(request: SearchRequest):
+    """
+    Streaming search with Server-Sent Events.
+    Returns results first, then streams the answer as it's generated.
+    """
+    settings = get_settings()
+    async def event_generator():
+        try:
+            # Step 1: Analyze temporal intent
+            temporal_intent, temporal_urgency = detect_temporal_intent(request.query)
+            temporal_context = TemporalContext(
+                query_temporal_intent=temporal_intent,
+                temporal_urgency=temporal_urgency,
+                current_date=datetime.now().strftime("%Y-%m-%d"),
+            )
+            # Step 2: Search sources
+            raw_results = []
+            if settings.tavily_api_key:
+                tavily_results = await search_tavily(
+                    query=request.query,
+                    max_results=settings.max_search_results,
+                    freshness=request.freshness,
+                    include_domains=request.include_domains,
+                    exclude_domains=request.exclude_domains,
+                )
+                raw_results.extend(tavily_results)
+            if not raw_results:
+                ddg_results = await search_duckduckgo(
+                    query=request.query,
+                    max_results=settings.max_search_results,
+                )
+                raw_results.extend(ddg_results)
+            if not raw_results:
+                yield f"data: {json.dumps({'type': 'error', 'content': 'No results found'})}\n\n"
+                return
+            # Step 3: Rerank
+            ranked_results = await rerank_results(
+                query=request.query,
+                results=raw_results,
+                temporal_urgency=temporal_urgency,
+                max_results=request.max_results,
+            )
+            # Step 4: Convert to SearchResult models
+            search_results = []
+            for result in ranked_results:
+                freshness = calculate_freshness_score(result.get("published_date"))
+                search_results.append(
+                    SearchResult(
+                        title=result.get("title", ""),
+                        url=result.get("url", ""),
+                        content=result.get("content", ""),
+                        score=result.get("score", 0.5),
+                        published_date=result.get("published_date"),
+                        freshness_score=freshness,
+                        authority_score=result.get("authority_score", 0.5),
+                    )
+                )
+            # Send results first
+            results_data = {
+                "type": "results",
+                "results": [r.model_dump(mode="json") for r in search_results],
+                "temporal_context": temporal_context.model_dump(),
+            }
+            yield f"data: {json.dumps(results_data)}\n\n"
+            # Step 5: Stream answer
+            yield f"data: {json.dumps({'type': 'answer_start'})}\n\n"
+            async for chunk in synthesize_answer_stream(
+                query=request.query,
+                results=search_results,
+                temporal_context=temporal_context,
+            ):
+                yield f"data: {json.dumps({'type': 'answer_chunk', 'content': chunk})}\n\n"
+            yield f"data: {json.dumps({'type': 'done'})}\n\n"
+        except Exception as e:
+            yield f"data: {json.dumps({'type': 'error', 'content': str(e)})}\n\n"
+    return StreamingResponse(
+        event_generator(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no",
+        },
+    )

app/reranking/embeddings.py ADDED Viewed

	@@ -0,0 +1,102 @@

+"""Embedding-based reranking using sentence-transformers.
+Provides bi-encoder and cross-encoder reranking for better relevance scoring.
+"""
+from functools import lru_cache
+from typing import Optional
+import numpy as np
+from app.config import get_settings
+@lru_cache(maxsize=1)
+def get_bi_encoder():
+    """Load and cache the bi-encoder model."""
+    from sentence_transformers import SentenceTransformer
+    settings = get_settings()
+    return SentenceTransformer(settings.bi_encoder_model)
+@lru_cache(maxsize=1)
+def get_cross_encoder():
+    """Load and cache the cross-encoder model."""
+    from sentence_transformers import CrossEncoder
+    settings = get_settings()
+    return CrossEncoder(settings.cross_encoder_model)
+def compute_bi_encoder_scores(
+    query: str,
+    documents: list[str],
+) -> list[float]:
+    """
+    Compute semantic similarity scores using bi-encoder.
+    Fast but less accurate than cross-encoder.
+    Good for initial filtering of large result sets.
+    Args:
+        query: Search query
+        documents: List of document texts
+    Returns:
+        List of similarity scores (0-1)
+    """
+    if not documents:
+        return []
+    model = get_bi_encoder()
+    # Encode query and documents
+    query_embedding = model.encode(query, normalize_embeddings=True)
+    doc_embeddings = model.encode(documents, normalize_embeddings=True)
+    # Compute cosine similarities (embeddings are normalized, so dot product = cosine)
+    similarities = np.dot(doc_embeddings, query_embedding)
+    # Convert to list and ensure values are in [0, 1]
+    scores = [(float(s) + 1) / 2 for s in similarities]  # Map from [-1, 1] to [0, 1]
+    return scores
+def compute_cross_encoder_scores(
+    query: str,
+    documents: list[str],
+) -> list[float]:
+    """
+    Compute relevance scores using cross-encoder.
+    More accurate than bi-encoder but slower.
+    Use after initial filtering for precise ranking.
+    Args:
+        query: Search query
+        documents: List of document texts
+    Returns:
+        List of relevance scores (0-1)
+    """
+    if not documents:
+        return []
+    model = get_cross_encoder()
+    # Create query-document pairs
+    pairs = [[query, doc] for doc in documents]
+    # Get scores
+    scores = model.predict(pairs)
+    # Normalize to [0, 1] using sigmoid if needed
+    min_score = float(np.min(scores))
+    max_score = float(np.max(scores))
+    if max_score > min_score:
+        normalized = [(float(s) - min_score) / (max_score - min_score) for s in scores]
+    else:
+        normalized = [0.5] * len(scores)
+    return normalized

app/reranking/pipeline.py CHANGED Viewed

@@ -1,38 +1,44 @@
 """Multi-stage reranking pipeline.
 Implements a 3-stage reranking approach:
-1. Bi-Encoder: Fast semantic similarity (optional, for large result sets)
 2. Cross-Encoder: Accurate relevance scoring
 3. Temporal + Authority: Freshness and domain trust weighting
 """
 from typing import Optional
 from app.temporal.freshness_scorer import calculate_freshness_score, adjust_score_by_freshness
 from app.reranking.authority_scorer import calculate_authority_score
 async def rerank_results(
     query: str,
     results: list[dict],
     temporal_urgency: float = 0.5,
     max_results: int = 10,
 ) -> list[dict]:
     """
     Apply multi-stage reranking to search results.
-    For MVP, we use a simplified pipeline:
-    - Calculate freshness scores
-    - Calculate authority scores
-    - Combine with original relevance scores
-    Full pipeline with embeddings can be enabled later.
     Args:
         query: Original search query
         results: Raw search results
         temporal_urgency: How important freshness is (0-1)
         max_results: Maximum results to return
     Returns:
         Reranked results with updated scores
@@ -40,16 +46,19 @@ async def rerank_results(
     if not results:
         return []
-    # Stage 1: Skip bi-encoder for now (MVP)
-    # In production, use sentence-transformers for initial filtering of 100+ results
-    # Stage 2: Skip cross-encoder for now (MVP)
-    # In production, use BGE-reranker for precise scoring
     # Stage 3: Apply temporal + authority scoring
-    scored_results = []
-    for result in results:
         # Calculate freshness score
         freshness = calculate_freshness_score(result.get("published_date"))
         result["freshness_score"] = freshness
@@ -58,7 +67,7 @@ async def rerank_results(
         authority = calculate_authority_score(result.get("url", ""))
         result["authority_score"] = authority
-        # Get base score (from search source)
         base_score = result.get("score", 0.5)
         # Adjust for freshness based on temporal urgency
@@ -71,8 +80,6 @@ async def rerank_results(
         # Also factor in authority (10% weight)
         final_score = (adjusted_score * 0.9) + (authority * 0.1)
         result["score"] = final_score
-        scored_results.append(result)
     # Sort by final score (descending)
     scored_results.sort(key=lambda x: x["score"], reverse=True)
@@ -80,20 +87,41 @@ async def rerank_results(
     return scored_results[:max_results]
-async def rerank_with_embeddings(
     query: str,
     results: list[dict],
-    max_results: int = 10,
 ) -> list[dict]:
-    """
-    Full reranking with embedding models.
-    TODO: Implement when adding sentence-transformers support:
-    1. Use bi-encoder for fast filtering
-    2. Use cross-encoder for precise scoring
-    This is a placeholder for the full implementation.
-    """
-    # For now, just return sorted by original score
-    sorted_results = sorted(results, key=lambda x: x.get("score", 0), reverse=True)
-    return sorted_results[:max_results]

 """Multi-stage reranking pipeline.
 Implements a 3-stage reranking approach:
+1. Bi-Encoder: Fast semantic similarity (for large result sets)
 2. Cross-Encoder: Accurate relevance scoring
 3. Temporal + Authority: Freshness and domain trust weighting
 """
+import logging
 from typing import Optional
 from app.temporal.freshness_scorer import calculate_freshness_score, adjust_score_by_freshness
 from app.reranking.authority_scorer import calculate_authority_score
+logger = logging.getLogger(__name__)
+# Flag to enable/disable embedding-based reranking
+ENABLE_EMBEDDING_RERANKING = True
 async def rerank_results(
     query: str,
     results: list[dict],
     temporal_urgency: float = 0.5,
     max_results: int = 10,
+    use_embeddings: bool = True,
 ) -> list[dict]:
     """
     Apply multi-stage reranking to search results.
+    Pipeline:
+    1. Bi-encoder: Quick semantic filtering (if results > 20)
+    2. Cross-encoder: Precise relevance scoring (top candidates)
+    3. Temporal + Authority: Freshness and trust weighting
     Args:
         query: Original search query
         results: Raw search results
         temporal_urgency: How important freshness is (0-1)
         max_results: Maximum results to return
+        use_embeddings: Whether to use embedding models
     Returns:
         Reranked results with updated scores
     if not results:
         return []
+    scored_results = results.copy()
+    # Stage 1 & 2: Embedding-based reranking
+    if use_embeddings and ENABLE_EMBEDDING_RERANKING:
+        try:
+            scored_results = await _apply_embedding_reranking(query, scored_results)
+            logger.info(f"Applied embedding reranking to {len(scored_results)} results")
+        except Exception as e:
+            logger.warning(f"Embedding reranking failed, using fallback: {e}")
+            # Fall through to basic scoring
     # Stage 3: Apply temporal + authority scoring
+    for result in scored_results:
         # Calculate freshness score
         freshness = calculate_freshness_score(result.get("published_date"))
         result["freshness_score"] = freshness
         authority = calculate_authority_score(result.get("url", ""))
         result["authority_score"] = authority
+        # Get base score (from search source or embedding)
         base_score = result.get("score", 0.5)
         # Adjust for freshness based on temporal urgency
         # Also factor in authority (10% weight)
         final_score = (adjusted_score * 0.9) + (authority * 0.1)
         result["score"] = final_score
     # Sort by final score (descending)
     scored_results.sort(key=lambda x: x["score"], reverse=True)
     return scored_results[:max_results]
+async def _apply_embedding_reranking(
     query: str,
     results: list[dict],
 ) -> list[dict]:
+    """Apply bi-encoder and cross-encoder reranking."""
+    from app.reranking.embeddings import compute_bi_encoder_scores, compute_cross_encoder_scores
+    # Extract document contents for embedding
+    documents = [
+        f"{r.get('title', '')}. {r.get('content', '')[:500]}"
+        for r in results
+    ]
+    # Stage 1: Bi-encoder for initial scoring (fast)
+    if len(results) > 15:
+        bi_scores = compute_bi_encoder_scores(query, documents)
+        for i, result in enumerate(results):
+            result["bi_encoder_score"] = bi_scores[i]
+        # Sort by bi-encoder and keep top 15 for cross-encoder
+        results.sort(key=lambda x: x.get("bi_encoder_score", 0), reverse=True)
+        results = results[:15]
+        documents = documents[:15]
+    # Stage 2: Cross-encoder for precise scoring (slower but accurate)
+    cross_scores = compute_cross_encoder_scores(query, documents)
+    for i, result in enumerate(results):
+        # Blend cross-encoder score with original source score
+        original_score = result.get("score", 0.5)
+        cross_score = cross_scores[i]
+        # Cross-encoder gets 70% weight, original 30%
+        result["score"] = (cross_score * 0.7) + (original_score * 0.3)
+        result["cross_encoder_score"] = cross_score
+    return results

app/temporal/intent_detector.py CHANGED Viewed

@@ -5,15 +5,23 @@ or if historical information is acceptable.
 """
 import re
 from typing import Literal
 # Keywords that strongly indicate need for current information
 FRESHNESS_KEYWORDS = {
     # English
     "latest", "newest", "recent", "current", "today", "now",
     "this week", "this month", "this year", "breaking",
     "update", "updates", "new", "just", "announced",
-    "2024", "2025",  # Current years
     # Portuguese
     "último", "últimos", "recente", "atual", "hoje", "agora",
     "essa semana", "esse mês", "esse ano", "novidade",

 """
 import re
+from datetime import datetime
 from typing import Literal
+def _get_dynamic_years() -> set[str]:
+    """Get current and previous year dynamically."""
+    current_year = datetime.now().year
+    return {str(current_year), str(current_year - 1)}
 # Keywords that strongly indicate need for current information
 FRESHNESS_KEYWORDS = {
     # English
     "latest", "newest", "recent", "current", "today", "now",
     "this week", "this month", "this year", "breaking",
     "update", "updates", "new", "just", "announced",
+    *_get_dynamic_years(),  # Dynamic years
     # Portuguese
     "último", "últimos", "recente", "atual", "hoje", "agora",
     "essa semana", "esse mês", "esse ano", "novidade",