Spaces:
Running
fix(news): Add time-based filtering and cache-control to /latest endpoint
Browse filesPROBLEM:
- /api/v1/news/latest endpoint was returning old cached articles
- No time-based filtering on browse queries
- Browser/CDN caching causing stale data display
- Qdrant scroll returning results in insertion order, not by date
SOLUTION:
1. Added days_back parameter to browse() method (default: 7 days for /latest)
2. Implemented Qdrant order_by for server-side sorting by published_at DESC
3. Added fallback for older Qdrant versions without order_by support
4. Added Cache-Control headers to prevent browser/CDN caching
5. Updated VectorStorePort interface to support time-based filtering
CHANGES:
- src/infrastructure/adapters/qdrant_adapter.py:
* Added days_back parameter to browse() method
* Added datetime range filter using published_at field
* Implemented order_by with published_at DESC for server-side sorting
* Added fallback logic for Qdrant versions without order_by support
* Enhanced error handling with retry logic
- src/core/ports/vector_store_port.py:
* Updated browse() interface to include days_back parameter
* Added documentation for time-based filtering
- src/api/routes/news.py:
* Added days_back query parameter (default: 7, range: 1-30)
* Added Cache-Control, Pragma, and Expires headers to prevent caching
* Removed redundant client-side sorting (now done by Qdrant)
* Imported Response from FastAPI for header manipulation
IMPACT:
- Fresh news articles always displayed (last 7 days by default)
- No browser/CDN caching of news results
- Better performance with server-side sorting
- Configurable time window (1-30 days)
- Backward compatible with older Qdrant versions
TESTING:
- Endpoint: GET /api/v1/news/latest?limit=6&days_back=7
- Expected: Articles from last 7 days, sorted newest first
- Headers: Cache-Control: no-cache, no-store, must-revalidate
Version: v2.2
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
from fastapi import APIRouter, Query, HTTPException, Depends
|
| 2 |
from typing import Optional
|
| 3 |
from src.core.ports.vector_store_port import VectorStorePort
|
| 4 |
from src.core.ports.embedder_port import EmbedderPort
|
|
@@ -36,24 +36,34 @@ def _dict_to_article(payload: dict, score: float = None) -> NewsArticle:
|
|
| 36 |
|
| 37 |
@router.get("/latest", response_model=BrowseResponse)
|
| 38 |
def get_latest_news(
|
|
|
|
| 39 |
limit: int = Query(10, le=50),
|
| 40 |
source: Optional[str] = None,
|
| 41 |
language: Optional[str] = None,
|
|
|
|
| 42 |
vector_store: VectorStorePort = Depends(get_vector_store_port)
|
| 43 |
):
|
| 44 |
-
"""Get latest news articles sorted by publication date"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
try:
|
| 46 |
-
result = vector_store.browse(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
-
# Convert Qdrant points to articles
|
| 49 |
articles = []
|
| 50 |
for p in result["articles"]:
|
| 51 |
article = _dict_to_article(p.payload or {}, getattr(p, "score", None))
|
| 52 |
articles.append(article)
|
| 53 |
|
| 54 |
-
# Sort by published_at descending (latest first)
|
| 55 |
-
articles.sort(key=lambda x: x.published_at or "", reverse=True)
|
| 56 |
-
|
| 57 |
# Ensure next_offset is an integer or None
|
| 58 |
next_offset = result.get("next_offset")
|
| 59 |
if next_offset is not None and not isinstance(next_offset, int):
|
|
|
|
| 1 |
+
from fastapi import APIRouter, Query, HTTPException, Depends, Response
|
| 2 |
from typing import Optional
|
| 3 |
from src.core.ports.vector_store_port import VectorStorePort
|
| 4 |
from src.core.ports.embedder_port import EmbedderPort
|
|
|
|
| 36 |
|
| 37 |
@router.get("/latest", response_model=BrowseResponse)
|
| 38 |
def get_latest_news(
|
| 39 |
+
response: Response,
|
| 40 |
limit: int = Query(10, le=50),
|
| 41 |
source: Optional[str] = None,
|
| 42 |
language: Optional[str] = None,
|
| 43 |
+
days_back: int = Query(7, ge=1, le=30, description="Number of days to look back (default: 7)"),
|
| 44 |
vector_store: VectorStorePort = Depends(get_vector_store_port)
|
| 45 |
):
|
| 46 |
+
"""Get latest news articles sorted by publication date (default: last 7 days)"""
|
| 47 |
+
# Prevent caching of news results - always fetch fresh data
|
| 48 |
+
response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate, max-age=0"
|
| 49 |
+
response.headers["Pragma"] = "no-cache"
|
| 50 |
+
response.headers["Expires"] = "0"
|
| 51 |
+
|
| 52 |
try:
|
| 53 |
+
result = vector_store.browse(
|
| 54 |
+
limit=limit,
|
| 55 |
+
offset=0,
|
| 56 |
+
source=source,
|
| 57 |
+
language=language,
|
| 58 |
+
days_back=days_back
|
| 59 |
+
)
|
| 60 |
|
| 61 |
+
# Convert Qdrant points to articles (already sorted by Qdrant)
|
| 62 |
articles = []
|
| 63 |
for p in result["articles"]:
|
| 64 |
article = _dict_to_article(p.payload or {}, getattr(p, "score", None))
|
| 65 |
articles.append(article)
|
| 66 |
|
|
|
|
|
|
|
|
|
|
| 67 |
# Ensure next_offset is an integer or None
|
| 68 |
next_offset = result.get("next_offset")
|
| 69 |
if next_offset is not None and not isinstance(next_offset, int):
|
|
@@ -42,8 +42,9 @@ class VectorStorePort(abc.ABC):
|
|
| 42 |
pass
|
| 43 |
|
| 44 |
@abc.abstractmethod
|
| 45 |
-
def browse(self, limit: int = 20, offset: int = 0, source: Optional[str] = None, language: Optional[str] = None) -> Dict[str, Any]:
|
| 46 |
"""
|
| 47 |
Browses the vector collection with pagination and filtering.
|
|
|
|
| 48 |
"""
|
| 49 |
pass
|
|
|
|
| 42 |
pass
|
| 43 |
|
| 44 |
@abc.abstractmethod
|
| 45 |
+
def browse(self, limit: int = 20, offset: int = 0, source: Optional[str] = None, language: Optional[str] = None, days_back: Optional[int] = None) -> Dict[str, Any]:
|
| 46 |
"""
|
| 47 |
Browses the vector collection with pagination and filtering.
|
| 48 |
+
days_back: Optional filter to only return articles from the last N days (default: None = all articles)
|
| 49 |
"""
|
| 50 |
pass
|
|
@@ -316,6 +316,7 @@ class QdrantAdapter(VectorStorePort):
|
|
| 316 |
offset: int = 0,
|
| 317 |
source: Optional[str] = None,
|
| 318 |
language: Optional[str] = None,
|
|
|
|
| 319 |
) -> Dict[str, Any]:
|
| 320 |
if not self.client:
|
| 321 |
return {"articles": [], "next_offset": None}
|
|
@@ -329,6 +330,15 @@ class QdrantAdapter(VectorStorePort):
|
|
| 329 |
must.append(models.FieldCondition(
|
| 330 |
key="language", match=models.MatchValue(value=language)
|
| 331 |
))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 332 |
filter_obj = models.Filter(must=must) if must else None
|
| 333 |
|
| 334 |
try:
|
|
@@ -340,6 +350,10 @@ class QdrantAdapter(VectorStorePort):
|
|
| 340 |
offset=offset,
|
| 341 |
with_payload=True,
|
| 342 |
with_vectors=False,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 343 |
)
|
| 344 |
|
| 345 |
# Keep only the lowest chunk_index per doc_id (first chunk of each article)
|
|
@@ -352,6 +366,7 @@ class QdrantAdapter(VectorStorePort):
|
|
| 352 |
seen_docs[doc_id] = (point, chunk_index)
|
| 353 |
|
| 354 |
deduped = [v[0] for v in seen_docs.values()]
|
|
|
|
| 355 |
deduped.sort(
|
| 356 |
key=lambda p: (p.payload or {}).get("published_at") or "",
|
| 357 |
reverse=True,
|
|
@@ -359,5 +374,33 @@ class QdrantAdapter(VectorStorePort):
|
|
| 359 |
return {"articles": deduped[:limit], "next_offset": next_page_offset}
|
| 360 |
|
| 361 |
except Exception as e:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
logger.error(f"Error browsing Qdrant: {e}")
|
| 363 |
return {"articles": [], "next_offset": None}
|
|
|
|
| 316 |
offset: int = 0,
|
| 317 |
source: Optional[str] = None,
|
| 318 |
language: Optional[str] = None,
|
| 319 |
+
days_back: Optional[int] = None,
|
| 320 |
) -> Dict[str, Any]:
|
| 321 |
if not self.client:
|
| 322 |
return {"articles": [], "next_offset": None}
|
|
|
|
| 330 |
must.append(models.FieldCondition(
|
| 331 |
key="language", match=models.MatchValue(value=language)
|
| 332 |
))
|
| 333 |
+
|
| 334 |
+
# Add time-based filter for fresh results (default: last 7 days for browse)
|
| 335 |
+
if days_back is not None:
|
| 336 |
+
min_date = datetime.utcnow() - timedelta(days=days_back)
|
| 337 |
+
must.append(models.FieldCondition(
|
| 338 |
+
key="published_at",
|
| 339 |
+
range=models.DatetimeRange(gte=min_date)
|
| 340 |
+
))
|
| 341 |
+
|
| 342 |
filter_obj = models.Filter(must=must) if must else None
|
| 343 |
|
| 344 |
try:
|
|
|
|
| 350 |
offset=offset,
|
| 351 |
with_payload=True,
|
| 352 |
with_vectors=False,
|
| 353 |
+
order_by=models.OrderBy(
|
| 354 |
+
key="published_at",
|
| 355 |
+
direction=models.Direction.DESC
|
| 356 |
+
)
|
| 357 |
)
|
| 358 |
|
| 359 |
# Keep only the lowest chunk_index per doc_id (first chunk of each article)
|
|
|
|
| 366 |
seen_docs[doc_id] = (point, chunk_index)
|
| 367 |
|
| 368 |
deduped = [v[0] for v in seen_docs.values()]
|
| 369 |
+
# Results are already sorted by Qdrant, but re-sort to ensure consistency after dedup
|
| 370 |
deduped.sort(
|
| 371 |
key=lambda p: (p.payload or {}).get("published_at") or "",
|
| 372 |
reverse=True,
|
|
|
|
| 374 |
return {"articles": deduped[:limit], "next_offset": next_page_offset}
|
| 375 |
|
| 376 |
except Exception as e:
|
| 377 |
+
# If order_by fails (older Qdrant version), retry without it
|
| 378 |
+
if "order_by" in str(e).lower():
|
| 379 |
+
logger.warning(f"Qdrant order_by not supported, falling back to client-side sort: {e}")
|
| 380 |
+
try:
|
| 381 |
+
results, next_page_offset = self.client.scroll(
|
| 382 |
+
collection_name=settings.QDRANT_COLLECTION,
|
| 383 |
+
scroll_filter=filter_obj,
|
| 384 |
+
limit=limit * 8,
|
| 385 |
+
offset=offset,
|
| 386 |
+
with_payload=True,
|
| 387 |
+
with_vectors=False,
|
| 388 |
+
)
|
| 389 |
+
seen_docs: dict = {}
|
| 390 |
+
for point in results:
|
| 391 |
+
payload = point.payload or {}
|
| 392 |
+
doc_id = payload.get("doc_id", point.id)
|
| 393 |
+
chunk_index = payload.get("chunk_index", 0)
|
| 394 |
+
if doc_id not in seen_docs or chunk_index < seen_docs[doc_id][1]:
|
| 395 |
+
seen_docs[doc_id] = (point, chunk_index)
|
| 396 |
+
deduped = [v[0] for v in seen_docs.values()]
|
| 397 |
+
deduped.sort(
|
| 398 |
+
key=lambda p: (p.payload or {}).get("published_at") or "",
|
| 399 |
+
reverse=True,
|
| 400 |
+
)
|
| 401 |
+
return {"articles": deduped[:limit], "next_offset": next_page_offset}
|
| 402 |
+
except Exception as e2:
|
| 403 |
+
logger.error(f"Error browsing Qdrant (fallback): {e2}")
|
| 404 |
+
return {"articles": [], "next_offset": None}
|
| 405 |
logger.error(f"Error browsing Qdrant: {e}")
|
| 406 |
return {"articles": [], "next_offset": None}
|