ifieryarrows commited on
Commit
ccc0d44
·
verified ·
1 Parent(s): 865ae4b

Sync from GitHub

Browse files
Files changed (1) hide show
  1. app/data_manager.py +33 -28
app/data_manager.py CHANGED
@@ -235,8 +235,6 @@ def ingest_news(session: Session) -> dict:
235
  Returns:
236
  Dict with stats: imported, duplicates, language_filtered, fuzzy_filtered
237
  """
238
- import random
239
-
240
  settings = get_settings()
241
 
242
  # Strategic queries based on S&P Global 2026 copper market report
@@ -269,9 +267,7 @@ def ingest_news(session: Session) -> dict:
269
  "grade decline copper mining",
270
  ]
271
 
272
- # Select a random strategic query for this run (ensures diversity over time)
273
- strategic_query = random.choice(STRATEGIC_QUERIES)
274
- logger.info(f"🕵️ Strategic News Agent: Investigating '{strategic_query}'")
275
 
276
  stats = {
277
  "imported": 0,
@@ -279,38 +275,47 @@ def ingest_news(session: Session) -> dict:
279
  "language_filtered": 0,
280
  "fuzzy_filtered": 0,
281
  "source": "unknown",
282
- "query_used": strategic_query,
283
  }
284
 
285
- # Collect articles from sources
286
  all_articles = []
287
 
288
- # Try NewsAPI first if key is available
289
- if settings.newsapi_key:
290
- articles = fetch_newsapi_articles(
291
- api_key=settings.newsapi_key,
292
- query=strategic_query,
293
- language=settings.news_language,
294
- lookback_days=settings.lookback_days,
295
- )
296
- if articles:
297
- all_articles.extend(articles)
298
- stats["source"] = "newsapi"
299
-
300
- # RSS fallback/supplement - also use strategic query
301
- if not all_articles or not settings.newsapi_key:
302
- rss_articles = fetch_google_news(
303
- query=strategic_query,
304
- language=settings.news_language,
305
- )
306
- all_articles.extend(rss_articles)
307
- stats["source"] = "rss" if not settings.newsapi_key else "newsapi+rss"
 
 
 
 
 
 
 
 
 
308
 
309
  if not all_articles:
310
  logger.warning("No articles fetched from any source")
311
  return stats
312
 
313
- logger.info(f"Total articles fetched: {len(all_articles)}")
314
 
315
  # Language filter
316
  all_articles, lang_filtered = filter_by_language(
 
235
  Returns:
236
  Dict with stats: imported, duplicates, language_filtered, fuzzy_filtered
237
  """
 
 
238
  settings = get_settings()
239
 
240
  # Strategic queries based on S&P Global 2026 copper market report
 
267
  "grade decline copper mining",
268
  ]
269
 
270
+ logger.info(f"🕵️ Strategic News Agent: Investigating {len(STRATEGIC_QUERIES)} topics...")
 
 
271
 
272
  stats = {
273
  "imported": 0,
 
275
  "language_filtered": 0,
276
  "fuzzy_filtered": 0,
277
  "source": "unknown",
278
+ "queries_used": len(STRATEGIC_QUERIES),
279
  }
280
 
281
+ # Collect articles from ALL strategic queries
282
  all_articles = []
283
 
284
+ for i, strategic_query in enumerate(STRATEGIC_QUERIES, 1):
285
+ logger.info(f" [{i}/{len(STRATEGIC_QUERIES)}] Searching: '{strategic_query}'")
286
+
287
+ query_articles = []
288
+
289
+ # Try NewsAPI first if key is available
290
+ if settings.newsapi_key:
291
+ articles = fetch_newsapi_articles(
292
+ api_key=settings.newsapi_key,
293
+ query=strategic_query,
294
+ language=settings.news_language,
295
+ lookback_days=settings.lookback_days,
296
+ )
297
+ if articles:
298
+ query_articles.extend(articles)
299
+
300
+ # RSS fallback/supplement
301
+ if not query_articles or not settings.newsapi_key:
302
+ rss_articles = fetch_google_news(
303
+ query=strategic_query,
304
+ language=settings.news_language,
305
+ )
306
+ query_articles.extend(rss_articles)
307
+
308
+ if query_articles:
309
+ logger.info(f" → Found {len(query_articles)} articles")
310
+ all_articles.extend(query_articles)
311
+
312
+ stats["source"] = "newsapi+rss" if settings.newsapi_key else "rss"
313
 
314
  if not all_articles:
315
  logger.warning("No articles fetched from any source")
316
  return stats
317
 
318
+ logger.info(f"Total articles fetched from all queries: {len(all_articles)}")
319
 
320
  # Language filter
321
  all_articles, lang_filtered = filter_by_language(