Peterase commited on
Commit
34be12b
Β·
1 Parent(s): 1d5c812

feat: guarantee 6 top stories with vector DB fallback

Browse files
Files changed (1) hide show
  1. src/api/routes/top_stories.py +35 -6
src/api/routes/top_stories.py CHANGED
@@ -14,8 +14,9 @@ from fastapi import APIRouter, Query, Depends
14
  from pydantic import BaseModel
15
  from datetime import datetime
16
 
17
- from src.api.dependencies import get_cache_port, get_live_search_port
18
  from src.core.ports.cache_port import CachePort
 
19
  from src.infrastructure.adapters.duckduckgo_adapter import DuckDuckGoAdapter
20
 
21
  try:
@@ -232,8 +233,8 @@ async def fetch_live_stories(n: int = 6, adapter: DuckDuckGoAdapter = None) -> L
232
  return []
233
 
234
  try:
235
- # Focus on Ethiopia-related world-wide news
236
- query = "Ethiopia related world-wide news today"
237
  results = await adapter.search(query)
238
 
239
  stories = []
@@ -261,7 +262,8 @@ async def fetch_live_stories(n: int = 6, adapter: DuckDuckGoAdapter = None) -> L
261
  async def get_top_stories(
262
  force_refresh: bool = Query(default=False, description="Force cache refresh"),
263
  cache: CachePort = Depends(get_cache_port),
264
- adapter: DuckDuckGoAdapter = Depends(get_live_search_port)
 
265
  ):
266
  """
267
  Get top 6 news stories for the landing page.
@@ -296,12 +298,39 @@ async def get_top_stories(
296
  all_stories: List[TopStory] = []
297
  seen_titles: set = set()
298
 
299
- for story in live_stories + kafka_stories: # Prioritize live for today's top stories
300
- title_key = story.title.lower()[:60]
301
  if title_key not in seen_titles:
302
  seen_titles.add(title_key)
303
  all_stories.append(story)
304
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  now_iso = datetime.utcnow().isoformat()
306
  final_stories = all_stories[:6]
307
 
 
14
  from pydantic import BaseModel
15
  from datetime import datetime
16
 
17
+ from src.api.dependencies import get_cache_port, get_live_search_port, get_vector_store_port
18
  from src.core.ports.cache_port import CachePort
19
+ from src.core.ports.vector_store_port import VectorStorePort
20
  from src.infrastructure.adapters.duckduckgo_adapter import DuckDuckGoAdapter
21
 
22
  try:
 
233
  return []
234
 
235
  try:
236
+ # Broaden search to ensure we get enough results
237
+ query = "Ethiopia latest news breaking world"
238
  results = await adapter.search(query)
239
 
240
  stories = []
 
262
  async def get_top_stories(
263
  force_refresh: bool = Query(default=False, description="Force cache refresh"),
264
  cache: CachePort = Depends(get_cache_port),
265
+ adapter: DuckDuckGoAdapter = Depends(get_live_search_port),
266
+ vector_store: VectorStorePort = Depends(get_vector_store_port)
267
  ):
268
  """
269
  Get top 6 news stories for the landing page.
 
298
  all_stories: List[TopStory] = []
299
  seen_titles: set = set()
300
 
301
+ for story in live_stories + kafka_stories: # Prioritize live
302
+ title_key = story.title.lower().strip()[:60]
303
  if title_key not in seen_titles:
304
  seen_titles.add(title_key)
305
  all_stories.append(story)
306
 
307
+ # ── FALLBACK: If still less than 6, pull from Vector DB (Qdrant) ──────────
308
+ if len(all_stories) < 6:
309
+ needed = 6 - len(all_stories)
310
+ logger.info(f"Top stories fallback: pulling {needed} from Vector DB")
311
+ try:
312
+ db_res = vector_store.browse(limit=needed * 2, days_back=7)
313
+ for p in db_res.get("articles", []):
314
+ payload = p.payload or {}
315
+ title = payload.get("title") or payload.get("content", "")[:100]
316
+ url = payload.get("url") or "#"
317
+
318
+ title_key = title.lower().strip()[:60]
319
+ if title_key not in seen_titles and len(all_stories) < 6:
320
+ seen_titles.add(title_key)
321
+ all_stories.append(TopStory(
322
+ title=title,
323
+ url=url,
324
+ source=payload.get("source", "ARKI Intelligence"),
325
+ published_at=payload.get("published_at", datetime.utcnow().isoformat()),
326
+ category="UPDATE",
327
+ excerpt=payload.get("content", "")[:150],
328
+ image_url=payload.get("image_url") or payload.get("thumbnail"),
329
+ origin="db"
330
+ ))
331
+ except Exception as e:
332
+ logger.error(f"Top stories DB fallback failed: {e}")
333
+
334
  now_iso = datetime.utcnow().isoformat()
335
  final_stories = all_stories[:6]
336