Peterase commited on
Commit
e032d36
·
1 Parent(s): 35f6d98

feat: multi-region parallel search for top stories

Browse files
Files changed (1) hide show
  1. src/api/routes/top_stories.py +17 -9
src/api/routes/top_stories.py CHANGED
@@ -232,19 +232,23 @@ async def fetch_live_stories(n: int = 6, adapter: DuckDuckGoAdapter = None) -> L
232
  return []
233
 
234
  async def fetch_live_stories(n: int = 6, adapter: DuckDuckGoAdapter = None) -> List[TopStory]:
235
- """Fetch N live stories from DuckDuckGo using multiple queries to ensure high yield"""
236
  if not adapter:
237
  return []
238
 
239
  try:
240
- # Run multiple queries in parallel for better coverage
241
- queries = [
242
- "Ethiopia news breaking today",
243
- "Addis Ababa latest updates",
244
- "Ethiopia world news headlines"
 
245
  ]
246
 
247
- search_tasks = [adapter.search(q) for q in queries]
 
 
 
248
  all_results_lists = await asyncio.gather(*search_tasks)
249
 
250
  # Flatten and deduplicate
@@ -258,7 +262,8 @@ async def fetch_live_stories(n: int = 6, adapter: DuckDuckGoAdapter = None) -> L
258
  title = r.get("title", "Untitled")
259
  title_key = title.lower().strip()[:60]
260
 
261
- if url in seen_urls or title_key in seen_titles:
 
262
  continue
263
 
264
  seen_urls.add(url)
@@ -275,7 +280,10 @@ async def fetch_live_stories(n: int = 6, adapter: DuckDuckGoAdapter = None) -> L
275
  origin="live",
276
  ))
277
 
278
- logger.info(f"Multi-query live search: collected {len(stories)} unique stories")
 
 
 
279
  return stories[:n]
280
  except Exception as e:
281
  logger.error(f"Live top stories error: {e}")
 
232
  return []
233
 
234
  async def fetch_live_stories(n: int = 6, adapter: DuckDuckGoAdapter = None) -> List[TopStory]:
235
+ """Fetch N live stories from DuckDuckGo using multi-region queries for maximum yield"""
236
  if not adapter:
237
  return []
238
 
239
  try:
240
+ # We run 4 parallel searches with different regional focuses
241
+ search_configs = [
242
+ {"q": "Ethiopia news breaking today", "reg": "et-en"}, # Local Focus
243
+ {"q": "Ethiopia latest breaking news", "reg": "wt-wt"}, # Global Focus (CNN, BBC, etc)
244
+ {"q": "Addis Ababa news updates", "reg": "et-en"}, # Capital Focus
245
+ {"q": "Ethiopia world news reporting", "reg": "us-en"} # International Perspective
246
  ]
247
 
248
+ search_tasks = [
249
+ adapter.search(conf["q"], region=conf["reg"], max_results=10)
250
+ for conf in search_configs
251
+ ]
252
  all_results_lists = await asyncio.gather(*search_tasks)
253
 
254
  # Flatten and deduplicate
 
262
  title = r.get("title", "Untitled")
263
  title_key = title.lower().strip()[:60]
264
 
265
+ # Check for duplicates or empty titles
266
+ if url in seen_urls or title_key in seen_titles or len(title) < 10:
267
  continue
268
 
269
  seen_urls.add(url)
 
280
  origin="live",
281
  ))
282
 
283
+ # Sorting: Prioritize those with images, then by freshness
284
+ stories.sort(key=lambda s: (1 if s.image_url else 0, s.published_at), reverse=True)
285
+
286
+ logger.info(f"Multi-region search: collected {len(stories)} unique stories")
287
  return stories[:n]
288
  except Exception as e:
289
  logger.error(f"Live top stories error: {e}")