Spaces:
Running
Running
feat: guarantee 6 top stories with vector DB fallback
Browse files
src/api/routes/top_stories.py
CHANGED
|
@@ -14,8 +14,9 @@ from fastapi import APIRouter, Query, Depends
|
|
| 14 |
from pydantic import BaseModel
|
| 15 |
from datetime import datetime
|
| 16 |
|
| 17 |
-
from src.api.dependencies import get_cache_port, get_live_search_port
|
| 18 |
from src.core.ports.cache_port import CachePort
|
|
|
|
| 19 |
from src.infrastructure.adapters.duckduckgo_adapter import DuckDuckGoAdapter
|
| 20 |
|
| 21 |
try:
|
|
@@ -232,8 +233,8 @@ async def fetch_live_stories(n: int = 6, adapter: DuckDuckGoAdapter = None) -> L
|
|
| 232 |
return []
|
| 233 |
|
| 234 |
try:
|
| 235 |
-
#
|
| 236 |
-
query = "Ethiopia
|
| 237 |
results = await adapter.search(query)
|
| 238 |
|
| 239 |
stories = []
|
|
@@ -261,7 +262,8 @@ async def fetch_live_stories(n: int = 6, adapter: DuckDuckGoAdapter = None) -> L
|
|
| 261 |
async def get_top_stories(
|
| 262 |
force_refresh: bool = Query(default=False, description="Force cache refresh"),
|
| 263 |
cache: CachePort = Depends(get_cache_port),
|
| 264 |
-
adapter: DuckDuckGoAdapter = Depends(get_live_search_port)
|
|
|
|
| 265 |
):
|
| 266 |
"""
|
| 267 |
Get top 6 news stories for the landing page.
|
|
@@ -296,12 +298,39 @@ async def get_top_stories(
|
|
| 296 |
all_stories: List[TopStory] = []
|
| 297 |
seen_titles: set = set()
|
| 298 |
|
| 299 |
-
for story in live_stories + kafka_stories: # Prioritize live
|
| 300 |
-
title_key = story.title.lower()[:60]
|
| 301 |
if title_key not in seen_titles:
|
| 302 |
seen_titles.add(title_key)
|
| 303 |
all_stories.append(story)
|
| 304 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
now_iso = datetime.utcnow().isoformat()
|
| 306 |
final_stories = all_stories[:6]
|
| 307 |
|
|
|
|
| 14 |
from pydantic import BaseModel
|
| 15 |
from datetime import datetime
|
| 16 |
|
| 17 |
+
from src.api.dependencies import get_cache_port, get_live_search_port, get_vector_store_port
|
| 18 |
from src.core.ports.cache_port import CachePort
|
| 19 |
+
from src.core.ports.vector_store_port import VectorStorePort
|
| 20 |
from src.infrastructure.adapters.duckduckgo_adapter import DuckDuckGoAdapter
|
| 21 |
|
| 22 |
try:
|
|
|
|
| 233 |
return []
|
| 234 |
|
| 235 |
try:
|
| 236 |
+
# Broaden search to ensure we get enough results
|
| 237 |
+
query = "Ethiopia latest news breaking world"
|
| 238 |
results = await adapter.search(query)
|
| 239 |
|
| 240 |
stories = []
|
|
|
|
| 262 |
async def get_top_stories(
|
| 263 |
force_refresh: bool = Query(default=False, description="Force cache refresh"),
|
| 264 |
cache: CachePort = Depends(get_cache_port),
|
| 265 |
+
adapter: DuckDuckGoAdapter = Depends(get_live_search_port),
|
| 266 |
+
vector_store: VectorStorePort = Depends(get_vector_store_port)
|
| 267 |
):
|
| 268 |
"""
|
| 269 |
Get top 6 news stories for the landing page.
|
|
|
|
| 298 |
all_stories: List[TopStory] = []
|
| 299 |
seen_titles: set = set()
|
| 300 |
|
| 301 |
+
for story in live_stories + kafka_stories: # Prioritize live
|
| 302 |
+
title_key = story.title.lower().strip()[:60]
|
| 303 |
if title_key not in seen_titles:
|
| 304 |
seen_titles.add(title_key)
|
| 305 |
all_stories.append(story)
|
| 306 |
|
| 307 |
+
# ββ FALLBACK: If still less than 6, pull from Vector DB (Qdrant) ββββββββββ
|
| 308 |
+
if len(all_stories) < 6:
|
| 309 |
+
needed = 6 - len(all_stories)
|
| 310 |
+
logger.info(f"Top stories fallback: pulling {needed} from Vector DB")
|
| 311 |
+
try:
|
| 312 |
+
db_res = vector_store.browse(limit=needed * 2, days_back=7)
|
| 313 |
+
for p in db_res.get("articles", []):
|
| 314 |
+
payload = p.payload or {}
|
| 315 |
+
title = payload.get("title") or payload.get("content", "")[:100]
|
| 316 |
+
url = payload.get("url") or "#"
|
| 317 |
+
|
| 318 |
+
title_key = title.lower().strip()[:60]
|
| 319 |
+
if title_key not in seen_titles and len(all_stories) < 6:
|
| 320 |
+
seen_titles.add(title_key)
|
| 321 |
+
all_stories.append(TopStory(
|
| 322 |
+
title=title,
|
| 323 |
+
url=url,
|
| 324 |
+
source=payload.get("source", "ARKI Intelligence"),
|
| 325 |
+
published_at=payload.get("published_at", datetime.utcnow().isoformat()),
|
| 326 |
+
category="UPDATE",
|
| 327 |
+
excerpt=payload.get("content", "")[:150],
|
| 328 |
+
image_url=payload.get("image_url") or payload.get("thumbnail"),
|
| 329 |
+
origin="db"
|
| 330 |
+
))
|
| 331 |
+
except Exception as e:
|
| 332 |
+
logger.error(f"Top stories DB fallback failed: {e}")
|
| 333 |
+
|
| 334 |
now_iso = datetime.utcnow().isoformat()
|
| 335 |
final_stories = all_stories[:6]
|
| 336 |
|