Peterase's picture
fix(news): Add time-based filtering and cache-control to /latest endpoint
2da1e29
from fastapi import APIRouter, Query, HTTPException, Depends, Response
from typing import Optional
from src.core.ports.vector_store_port import VectorStorePort
from src.core.ports.embedder_port import EmbedderPort
from src.core.use_cases.analytics_use_case import AnalyticsUseCase
from src.api.dependencies import get_vector_store_port, get_embedder_port, get_analytics_use_case
from src.core.domain.schemas import BrowseResponse, SearchResponse, NewsArticle
router = APIRouter()
def _dict_to_article(payload: dict, score: float = None) -> NewsArticle:
if payload is None:
payload = {}
# Safely get metadata
metadata = payload.get("metadata") if payload else {}
if metadata is None:
metadata = {}
# Title can be stored at top-level payload OR nested inside metadata
title = (
payload.get("title")
or (metadata.get("title") if isinstance(metadata, dict) else None)
)
return NewsArticle(
doc_id=payload.get("doc_id", "unknown"),
url=payload.get("url"),
title=title,
content=payload.get("text", payload.get("content", "")),
source=payload.get("source"),
published_at=payload.get("published_at"),
score=score,
metadata=metadata if isinstance(metadata, dict) else {}
)
@router.get("/latest", response_model=BrowseResponse)
def get_latest_news(
response: Response,
limit: int = Query(10, le=50),
source: Optional[str] = None,
language: Optional[str] = None,
days_back: int = Query(7, ge=1, le=30, description="Number of days to look back (default: 7)"),
vector_store: VectorStorePort = Depends(get_vector_store_port)
):
"""Get latest news articles sorted by publication date (default: last 7 days)"""
# Prevent caching of news results - always fetch fresh data
response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate, max-age=0"
response.headers["Pragma"] = "no-cache"
response.headers["Expires"] = "0"
try:
result = vector_store.browse(
limit=limit,
offset=0,
source=source,
language=language,
days_back=days_back
)
# Convert Qdrant points to articles (already sorted by Qdrant)
articles = []
for p in result["articles"]:
article = _dict_to_article(p.payload or {}, getattr(p, "score", None))
articles.append(article)
# Ensure next_offset is an integer or None
next_offset = result.get("next_offset")
if next_offset is not None and not isinstance(next_offset, int):
next_offset = None # If it's not an int, set to None
return BrowseResponse(
total_returned=len(articles),
articles=articles,
next_offset=next_offset
)
except Exception as e:
# Return empty response on error instead of 500
print(f"Error fetching news: {e}")
return BrowseResponse(
total_returned=0,
articles=[],
next_offset=None
)
@router.get("/browse", response_model=BrowseResponse)
def browse_news(
limit: int = Query(20, le=100),
offset: int = 0,
source: Optional[str] = None,
language: Optional[str] = None,
vector_store: VectorStorePort = Depends(get_vector_store_port)
):
result = vector_store.browse(limit=limit, offset=offset, source=source, language=language)
# Qdrant scroll returns points with .payload
articles = []
for p in result["articles"]:
articles.append(_dict_to_article(p.payload or {}, getattr(p, "score", None)))
return BrowseResponse(
total_returned=len(articles),
articles=articles,
next_offset=result["next_offset"]
)
@router.get("/search", response_model=SearchResponse)
def search_news(
q: str = Query(..., min_length=1),
top_k: int = Query(10, le=50),
source: Optional[str] = None,
language: Optional[str] = None,
embedder: EmbedderPort = Depends(get_embedder_port),
vector_store: VectorStorePort = Depends(get_vector_store_port)
):
try:
query_vector = embedder.encode_query(q)
results = vector_store.search(
query_vectors=query_vector,
limit=top_k,
source_filter=source,
language_filter=language
)
# VectorStorePort.search returns SearchResult objects (content, metadata, score, doc_id)
articles = []
for r in results:
articles.append(_dict_to_article(r.metadata, r.score))
return SearchResponse(results=articles)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@router.get("/sources")
def get_sources(analytics: AnalyticsUseCase = Depends(get_analytics_use_case)):
query = "SELECT source, count() as article_count FROM sentiment_results GROUP BY source ORDER BY article_count DESC"
res = analytics.execute_raw_query(query)
if res and res.get("rows"):
sources = [row[0] for row in res["rows"] if row[0]]
return {"sources": sources}
return {"sources": []}
@router.get("/{doc_id}", response_model=NewsArticle)
def get_news_article(doc_id: str, vector_store: VectorStorePort = Depends(get_vector_store_port)):
result = vector_store.get_by_doc_id(doc_id)
if not result:
raise HTTPException(status_code=404, detail="Article not found")
return _dict_to_article(result.metadata, result.score)