ifieryarrows commited on
Commit
99e7b4b
Β·
verified Β·
1 Parent(s): 84f8194

Sync from GitHub (tests passed)

Browse files
Files changed (5) hide show
  1. app/ai_engine.py +78 -19
  2. app/main.py +347 -0
  3. app/openrouter_client.py +45 -1
  4. app/schemas.py +81 -0
  5. app/settings.py +12 -8
app/ai_engine.py CHANGED
@@ -138,9 +138,13 @@ Label mapping:
138
  - impact_score <= -0.15 => BEARISH
139
  - otherwise => NEUTRAL
140
  """
141
- LLM_SCORING_RESPONSE_FORMAT_V2 = {
142
- "type": "json_object",
143
- }
 
 
 
 
144
  SCORING_V2_VERSION = "commodity_v2"
145
 
146
 
@@ -1068,9 +1072,10 @@ async def _score_subset_with_model_v2(
1068
  "fallback_models": settings.openrouter_fallback_models_list,
1069
  "referer": "https://copper-mind.vercel.app",
1070
  "title": "CopperMind Sentiment Analysis V2",
1071
- "response_format": LLM_SCORING_RESPONSE_FORMAT_V2,
1072
  "extra_payload": {"reasoning": {"exclude": True}},
1073
  }
 
 
1074
  return await create_chat_completion(**request_kwargs)
1075
 
1076
  parse_fail_count = 0
@@ -1187,13 +1192,14 @@ async def score_batch_with_llm_v2(
1187
  escalation_ids = sorted(set(fast_failed).union(conflict_ids))
1188
  escalation_count = len(escalation_ids)
1189
 
 
1190
  if escalation_ids and not fast_rate_limited:
1191
  reliable_subset = [
1192
  article_by_id[article_id]
1193
  for article_id in escalation_ids
1194
  if article_id in article_by_id
1195
  ]
1196
- reliable_valid, _reliable_failed, parse_fail_reliable, _rl = await _score_subset_with_model_v2(
1197
  settings=settings,
1198
  model_name=reliable_model,
1199
  articles=reliable_subset,
@@ -1219,6 +1225,10 @@ async def score_batch_with_llm_v2(
1219
  "fallback_count": fallback_count,
1220
  "model_fast": fast_model,
1221
  "model_reliable": reliable_model,
 
 
 
 
1222
  }
1223
 
1224
 
@@ -1347,6 +1357,7 @@ def score_unscored_processed_articles(
1347
  NewsProcessed.id.label("processed_id"),
1348
  NewsProcessed.canonical_title,
1349
  NewsProcessed.cleaned_text,
 
1350
  NewsRaw.title.label("raw_title"),
1351
  NewsRaw.description.label("raw_description"),
1352
  NewsRaw.published_at,
@@ -1387,22 +1398,37 @@ def score_unscored_processed_articles(
1387
  fast_model = settings.resolved_scoring_fast_model
1388
  reliable_model = settings.resolved_scoring_reliable_model
1389
 
 
 
 
 
 
 
 
1390
  for chunk_idx in range(0, len(rows), chunk_size):
1391
  chunk_rows = rows[chunk_idx:chunk_idx + chunk_size]
1392
  chunk_items: list[dict] = []
 
1393
  for row in chunk_rows:
1394
  title = str(row.raw_title or row.canonical_title or "")[:500]
1395
  description = str(row.raw_description or "")[:1000]
1396
  text = str(row.cleaned_text or f"{title} {description}")[:2000]
 
 
1397
  chunk_items.append(
1398
  {
1399
- "id": int(row.processed_id),
1400
  "title": title,
1401
  "description": description,
1402
  "text": text,
1403
  "published_at": row.published_at,
 
1404
  }
1405
  )
 
 
 
 
1406
 
1407
  finbert_by_id = score_batch_with_finbert_v2(chunk_items)
1408
  finbert_used += len(finbert_by_id)
@@ -1410,14 +1436,29 @@ def score_unscored_processed_articles(
1410
  llm_results_by_id: dict[int, dict] = {}
1411
  llm_candidates: list[dict] = []
1412
 
1413
- # Rate-limit flag is keyed to today's UTC date so it resets automatically at midnight.
 
 
 
 
1414
  today_utc = datetime.now(timezone.utc).date().isoformat()
1415
- rate_limited_date = getattr(score_unscored_processed_articles, "_rate_limited_date", None)
1416
- global_rate_limited = rate_limited_date == today_utc
1417
-
1418
- if settings.openrouter_api_key and llm_budget_remaining > 0 and not global_rate_limited:
1419
- llm_take = min(len(chunk_items), llm_budget_remaining)
1420
- llm_candidates = chunk_items[:llm_take]
 
 
 
 
 
 
 
 
 
 
 
1421
  llm_budget_remaining -= llm_take
1422
 
1423
  if llm_candidates:
@@ -1434,13 +1475,31 @@ def score_unscored_processed_articles(
1434
  fast_model = str(llm_bundle.get("model_fast", fast_model))
1435
  reliable_model = str(llm_bundle.get("model_reliable", reliable_model))
1436
 
1437
- # If LLM returned 100% fail and flagged rate limit, mark for today's UTC date.
1438
- # Flag resets automatically the next UTC day when the daily limit refreshes.
1439
- if llm_bundle.get("rate_limited", False):
1440
- score_unscored_processed_articles._rate_limited_date = datetime.now(timezone.utc).date().isoformat()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1441
  logger.warning(
1442
- "V2 batch hit OpenRouter daily rate limit - LLM scoring disabled for the rest of UTC day %s.",
1443
- score_unscored_processed_articles._rate_limited_date,
 
1444
  )
1445
 
1446
  except Exception as exc:
 
138
  - impact_score <= -0.15 => BEARISH
139
  - otherwise => NEUTRAL
140
  """
141
+ # NOTE: Intentionally omit OpenAI-style `response_format` for V2 scoring.
142
+ # Some free-tier OpenRouter providers (stepfun, mistral free variants) either
143
+ # reject `{"type": "json_object"}` or wrap the expected JSON array inside an
144
+ # object/unexpected structure when that hint is set. The prompt explicitly asks
145
+ # for a JSON array, and `_clean_json_content` already handles markdown fences,
146
+ # wrapped objects, and preambles, so we rely on prompt+post-processing instead.
147
+ LLM_SCORING_RESPONSE_FORMAT_V2: dict[str, Any] | None = None
148
  SCORING_V2_VERSION = "commodity_v2"
149
 
150
 
 
1072
  "fallback_models": settings.openrouter_fallback_models_list,
1073
  "referer": "https://copper-mind.vercel.app",
1074
  "title": "CopperMind Sentiment Analysis V2",
 
1075
  "extra_payload": {"reasoning": {"exclude": True}},
1076
  }
1077
+ if LLM_SCORING_RESPONSE_FORMAT_V2 is not None:
1078
+ request_kwargs["response_format"] = LLM_SCORING_RESPONSE_FORMAT_V2
1079
  return await create_chat_completion(**request_kwargs)
1080
 
1081
  parse_fail_count = 0
 
1192
  escalation_ids = sorted(set(fast_failed).union(conflict_ids))
1193
  escalation_count = len(escalation_ids)
1194
 
1195
+ reliable_rate_limited = False
1196
  if escalation_ids and not fast_rate_limited:
1197
  reliable_subset = [
1198
  article_by_id[article_id]
1199
  for article_id in escalation_ids
1200
  if article_id in article_by_id
1201
  ]
1202
+ reliable_valid, _reliable_failed, parse_fail_reliable, reliable_rate_limited = await _score_subset_with_model_v2(
1203
  settings=settings,
1204
  model_name=reliable_model,
1205
  articles=reliable_subset,
 
1225
  "fallback_count": fallback_count,
1226
  "model_fast": fast_model,
1227
  "model_reliable": reliable_model,
1228
+ "rate_limited_fast": bool(fast_rate_limited),
1229
+ "rate_limited_reliable": bool(reliable_rate_limited),
1230
+ # Backward-compat: true only when BOTH models hit their daily ceiling.
1231
+ "rate_limited": bool(fast_rate_limited and reliable_rate_limited),
1232
  }
1233
 
1234
 
 
1357
  NewsProcessed.id.label("processed_id"),
1358
  NewsProcessed.canonical_title,
1359
  NewsProcessed.cleaned_text,
1360
+ NewsProcessed.language.label("language"),
1361
  NewsRaw.title.label("raw_title"),
1362
  NewsRaw.description.label("raw_description"),
1363
  NewsRaw.published_at,
 
1398
  fast_model = settings.resolved_scoring_fast_model
1399
  reliable_model = settings.resolved_scoring_reliable_model
1400
 
1401
+ # Articles that are non-English or too short do not benefit from LLM
1402
+ # classification (prompt is English, quotas are scarce) β€” we route them
1403
+ # straight to FinBERT+rule fallback. These thresholds are intentionally
1404
+ # conservative so we only skip when we're confident the LLM call would
1405
+ # be wasted.
1406
+ MIN_TEXT_CHARS_FOR_LLM = 80
1407
+
1408
  for chunk_idx in range(0, len(rows), chunk_size):
1409
  chunk_rows = rows[chunk_idx:chunk_idx + chunk_size]
1410
  chunk_items: list[dict] = []
1411
+ llm_eligible_ids: set[int] = set()
1412
  for row in chunk_rows:
1413
  title = str(row.raw_title or row.canonical_title or "")[:500]
1414
  description = str(row.raw_description or "")[:1000]
1415
  text = str(row.cleaned_text or f"{title} {description}")[:2000]
1416
+ language = (getattr(row, "language", None) or "").strip().lower()
1417
+ processed_id = int(row.processed_id)
1418
  chunk_items.append(
1419
  {
1420
+ "id": processed_id,
1421
  "title": title,
1422
  "description": description,
1423
  "text": text,
1424
  "published_at": row.published_at,
1425
+ "language": language or None,
1426
  }
1427
  )
1428
+ is_english = (not language) or language.startswith("en")
1429
+ long_enough = len(text) >= MIN_TEXT_CHARS_FOR_LLM
1430
+ if is_english and long_enough:
1431
+ llm_eligible_ids.add(processed_id)
1432
 
1433
  finbert_by_id = score_batch_with_finbert_v2(chunk_items)
1434
  finbert_used += len(finbert_by_id)
 
1436
  llm_results_by_id: dict[int, dict] = {}
1437
  llm_candidates: list[dict] = []
1438
 
1439
+ # Per-model rate-limit tracking (keyed by UTC date). A specific model
1440
+ # is considered exhausted for today if its entry equals today's date.
1441
+ # LLM scoring is skipped for the chunk only when BOTH fast and reliable
1442
+ # models have been flagged today β€” otherwise we still attempt (fallback
1443
+ # chain inside `score_batch_with_llm_v2` handles partial exhaustion).
1444
  today_utc = datetime.now(timezone.utc).date().isoformat()
1445
+ rate_limited_by_model: dict[str, str] = getattr(
1446
+ score_unscored_processed_articles, "_rate_limited_by_model", {}
1447
+ ) or {}
1448
+ fast_exhausted = rate_limited_by_model.get(fast_model) == today_utc
1449
+ reliable_exhausted = rate_limited_by_model.get(reliable_model) == today_utc
1450
+ both_exhausted = fast_exhausted and reliable_exhausted
1451
+
1452
+ if settings.openrouter_api_key and llm_budget_remaining > 0 and not both_exhausted:
1453
+ eligible_items = [item for item in chunk_items if item["id"] in llm_eligible_ids]
1454
+ skipped = len(chunk_items) - len(eligible_items)
1455
+ if skipped > 0:
1456
+ logger.info(
1457
+ "V2 skipping %d non-English/short-text articles; routing directly to FinBERT+rule fallback",
1458
+ skipped,
1459
+ )
1460
+ llm_take = min(len(eligible_items), llm_budget_remaining)
1461
+ llm_candidates = eligible_items[:llm_take]
1462
  llm_budget_remaining -= llm_take
1463
 
1464
  if llm_candidates:
 
1475
  fast_model = str(llm_bundle.get("model_fast", fast_model))
1476
  reliable_model = str(llm_bundle.get("model_reliable", reliable_model))
1477
 
1478
+ # Record per-model rate-limit state so individual model exhaustion
1479
+ # doesn't block the other one. Only emits the "disabled for day"
1480
+ # warning when both are flagged.
1481
+ updated = False
1482
+ if llm_bundle.get("rate_limited_fast"):
1483
+ rate_limited_by_model[fast_model] = today_utc
1484
+ updated = True
1485
+ logger.warning(
1486
+ "V2 fast model %s hit daily rate limit; will be skipped for rest of UTC day %s.",
1487
+ fast_model, today_utc,
1488
+ )
1489
+ if llm_bundle.get("rate_limited_reliable"):
1490
+ rate_limited_by_model[reliable_model] = today_utc
1491
+ updated = True
1492
+ logger.warning(
1493
+ "V2 reliable model %s hit daily rate limit; will be skipped for rest of UTC day %s.",
1494
+ reliable_model, today_utc,
1495
+ )
1496
+ if updated:
1497
+ score_unscored_processed_articles._rate_limited_by_model = rate_limited_by_model
1498
+ if llm_bundle.get("rate_limited"):
1499
  logger.warning(
1500
+ "V2 both models (%s, %s) rate-limited; LLM scoring paused until UTC %s.",
1501
+ fast_model, reliable_model,
1502
+ (datetime.now(timezone.utc).date() + timedelta(days=1)).isoformat(),
1503
  )
1504
 
1505
  except Exception as exc:
app/main.py CHANGED
@@ -36,6 +36,11 @@ from app.schemas import (
36
  ConsensusSignal,
37
  TFTModelSummaryResponse,
38
  BacktestReportResponse,
 
 
 
 
 
39
  )
40
 
41
  # Configure logging
@@ -1468,3 +1473,345 @@ async def get_sentiment_summary(
1468
  "generated_at": now.isoformat(),
1469
  }
1470
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  ConsensusSignal,
37
  TFTModelSummaryResponse,
38
  BacktestReportResponse,
39
+ NewsItem,
40
+ NewsListResponse,
41
+ NewsStatsResponse,
42
+ NewsFinbertProbs,
43
+ NewsSentimentBlock,
44
  )
45
 
46
  # Configure logging
 
1473
  "generated_at": now.isoformat(),
1474
  }
1475
 
1476
+
1477
+ # =============================================================================
1478
+ # News intelligence endpoints
1479
+ # =============================================================================
1480
+ #
1481
+ # Serves the Overview right-sidebar news feed. Reads from the news_raw/
1482
+ # news_processed/news_sentiments_v2 pipeline the daily worker already fills β€”
1483
+ # no LLM is invoked on the hot path.
1484
+ #
1485
+ # Source taxonomy:
1486
+ # * channel = ingestion channel (NewsRaw.source): "google_news" | "newsapi"
1487
+ # * publisher = original publisher (raw_payload.source): Reuters, Mining.com…
1488
+ # =============================================================================
1489
+
1490
+ _news_list_cache: dict[tuple, tuple[float, dict]] = {}
1491
+ _news_stats_cache: dict[int, tuple[float, dict]] = {}
1492
+ _NEWS_LIST_TTL_S = 60.0
1493
+ _NEWS_STATS_TTL_S = 120.0
1494
+ _VALID_LABELS = {"BULLISH", "BEARISH", "NEUTRAL"}
1495
+
1496
+
1497
+ def _extract_publisher(raw_payload) -> Optional[str]:
1498
+ """Pull the original publisher name out of a NewsRaw.raw_payload blob."""
1499
+ if not raw_payload:
1500
+ return None
1501
+ if isinstance(raw_payload, str):
1502
+ try:
1503
+ import json as _json
1504
+ raw_payload = _json.loads(raw_payload)
1505
+ except (ValueError, TypeError):
1506
+ return None
1507
+ if not isinstance(raw_payload, dict):
1508
+ return None
1509
+ src = raw_payload.get("source")
1510
+ if isinstance(src, dict):
1511
+ name = src.get("name") or src.get("title")
1512
+ return str(name) if name else None
1513
+ if isinstance(src, str) and src.strip():
1514
+ return src.strip()
1515
+ name = raw_payload.get("publisher") or raw_payload.get("author")
1516
+ return str(name) if name else None
1517
+
1518
+
1519
+ def _build_news_sentiment_block(sent: Optional[NewsSentimentV2]) -> Optional[NewsSentimentBlock]:
1520
+ if sent is None:
1521
+ return None
1522
+ return NewsSentimentBlock(
1523
+ label=sent.label,
1524
+ final_score=float(sent.final_score) if sent.final_score is not None else None,
1525
+ impact_score_llm=float(sent.impact_score_llm) if sent.impact_score_llm is not None else None,
1526
+ confidence=float(sent.confidence_calibrated) if sent.confidence_calibrated is not None else None,
1527
+ relevance=float(sent.relevance_score) if sent.relevance_score is not None else None,
1528
+ event_type=sent.event_type,
1529
+ finbert=NewsFinbertProbs(
1530
+ pos=float(sent.finbert_pos or 0.0),
1531
+ neu=float(sent.finbert_neu or 0.0),
1532
+ neg=float(sent.finbert_neg or 0.0),
1533
+ ),
1534
+ reasoning=_extract_reasoning_text(sent.reasoning_json),
1535
+ scored_at=sent.scored_at.isoformat() if sent.scored_at else None,
1536
+ )
1537
+
1538
+
1539
+ def _extract_reasoning_text(reasoning_json: Optional[str]) -> Optional[str]:
1540
+ """Pull a short human-readable rationale out of the cached JSON blob."""
1541
+ if not reasoning_json:
1542
+ return None
1543
+ try:
1544
+ import json as _json
1545
+ blob = _json.loads(reasoning_json)
1546
+ except (ValueError, TypeError):
1547
+ return str(reasoning_json)[:500] if reasoning_json else None
1548
+ if isinstance(blob, dict):
1549
+ for key in ("reasoning", "rationale", "summary", "explanation"):
1550
+ val = blob.get(key)
1551
+ if isinstance(val, str) and val.strip():
1552
+ return val.strip()[:500]
1553
+ return None
1554
+ if isinstance(blob, str):
1555
+ return blob[:500]
1556
+ return None
1557
+
1558
+
1559
+ @app.get(
1560
+ "/api/news",
1561
+ response_model=NewsListResponse,
1562
+ summary="Paginated news feed with sentiment annotations",
1563
+ )
1564
+ async def get_news_feed(
1565
+ limit: int = Query(default=20, ge=1, le=50),
1566
+ offset: int = Query(default=0, ge=0),
1567
+ since_hours: int = Query(default=48, ge=1, le=168),
1568
+ label: str = Query(default="all"),
1569
+ event_type: str = Query(default="all"),
1570
+ min_relevance: float = Query(default=0.0, ge=0.0, le=1.0),
1571
+ channel: str = Query(default="all"),
1572
+ publisher: Optional[str] = Query(default=None, max_length=200),
1573
+ search: Optional[str] = Query(default=None, max_length=200),
1574
+ ):
1575
+ from sqlalchemy import desc as _desc
1576
+
1577
+ filters_echo = {
1578
+ "limit": limit,
1579
+ "offset": offset,
1580
+ "since_hours": since_hours,
1581
+ "label": label,
1582
+ "event_type": event_type,
1583
+ "min_relevance": min_relevance,
1584
+ "channel": channel,
1585
+ "publisher": publisher,
1586
+ "search": search,
1587
+ }
1588
+ cache_key = tuple(sorted(filters_echo.items()))
1589
+ now_ts = datetime.now(timezone.utc).timestamp()
1590
+ cached = _news_list_cache.get(cache_key)
1591
+ if cached and (now_ts - cached[0]) < _NEWS_LIST_TTL_S:
1592
+ return cached[1]
1593
+
1594
+ label_upper = label.upper()
1595
+ if label_upper != "ALL" and label_upper not in _VALID_LABELS:
1596
+ raise HTTPException(status_code=400, detail=f"Invalid label '{label}'")
1597
+
1598
+ with SessionLocal() as session:
1599
+ now = datetime.now(timezone.utc)
1600
+ cutoff = now - timedelta(hours=since_hours)
1601
+
1602
+ q = (
1603
+ session.query(NewsRaw, NewsProcessed, NewsSentimentV2)
1604
+ .join(NewsProcessed, NewsProcessed.raw_id == NewsRaw.id)
1605
+ .outerjoin(
1606
+ NewsSentimentV2,
1607
+ NewsSentimentV2.news_processed_id == NewsProcessed.id,
1608
+ )
1609
+ .filter(NewsRaw.published_at >= cutoff)
1610
+ )
1611
+
1612
+ if channel.lower() != "all":
1613
+ q = q.filter(NewsRaw.source == channel)
1614
+ if event_type.lower() != "all":
1615
+ q = q.filter(NewsSentimentV2.event_type == event_type)
1616
+ if label_upper != "ALL":
1617
+ q = q.filter(NewsSentimentV2.label == label_upper)
1618
+ if min_relevance > 0:
1619
+ q = q.filter(NewsSentimentV2.relevance_score >= min_relevance)
1620
+ if search:
1621
+ q = q.filter(NewsRaw.title.ilike(f"%{search}%"))
1622
+
1623
+ q = q.order_by(_desc(NewsRaw.published_at))
1624
+
1625
+ publisher_needle = publisher.strip().lower() if publisher and publisher.strip() else None
1626
+
1627
+ if publisher_needle:
1628
+ # Publisher filter requires JSON extraction; do it in Python to
1629
+ # remain backend-agnostic (sqlite/postgres) and keep the endpoint
1630
+ # simple. Scope is bounded by the time window filter above.
1631
+ rows = q.limit(500).all()
1632
+ filtered = [
1633
+ triple for triple in rows
1634
+ if (
1635
+ _extract_publisher(triple[0].raw_payload) or ""
1636
+ ).lower().find(publisher_needle) >= 0
1637
+ ]
1638
+ total = len(filtered)
1639
+ page_rows = filtered[offset: offset + limit]
1640
+ else:
1641
+ total = q.count()
1642
+ page_rows = q.offset(offset).limit(limit).all()
1643
+
1644
+ items: list[NewsItem] = []
1645
+ for raw, processed, sentiment in page_rows:
1646
+ items.append(
1647
+ NewsItem(
1648
+ id=int(processed.id),
1649
+ raw_id=int(raw.id),
1650
+ title=str(raw.title or ""),
1651
+ description=str(raw.description or "") or None,
1652
+ url=str(raw.url or "") or None,
1653
+ channel=str(raw.source or "unknown"),
1654
+ publisher=_extract_publisher(raw.raw_payload),
1655
+ source_feed=str(raw.source_feed or "") or None,
1656
+ published_at=raw.published_at.isoformat() if raw.published_at else None,
1657
+ fetched_at=raw.fetched_at.isoformat() if raw.fetched_at else None,
1658
+ language=str(processed.language or "") or None,
1659
+ sentiment=_build_news_sentiment_block(sentiment),
1660
+ )
1661
+ )
1662
+
1663
+ response = NewsListResponse(
1664
+ items=items,
1665
+ total=int(total),
1666
+ limit=limit,
1667
+ offset=offset,
1668
+ has_more=(offset + limit) < int(total),
1669
+ generated_at=now.isoformat(),
1670
+ filters=filters_echo,
1671
+ )
1672
+
1673
+ payload = response.model_dump()
1674
+ _news_list_cache[cache_key] = (now_ts, payload)
1675
+ # Trim cache to avoid unbounded growth.
1676
+ if len(_news_list_cache) > 128:
1677
+ oldest = sorted(_news_list_cache.items(), key=lambda kv: kv[1][0])[: len(_news_list_cache) - 128]
1678
+ for k, _ in oldest:
1679
+ _news_list_cache.pop(k, None)
1680
+ return payload
1681
+
1682
+
1683
+ @app.get(
1684
+ "/api/news/stats",
1685
+ response_model=NewsStatsResponse,
1686
+ summary="Aggregate stats for the news sidebar header",
1687
+ )
1688
+ async def get_news_stats(
1689
+ since_hours: int = Query(default=24, ge=1, le=168),
1690
+ ):
1691
+ now_ts = datetime.now(timezone.utc).timestamp()
1692
+ cached = _news_stats_cache.get(since_hours)
1693
+ if cached and (now_ts - cached[0]) < _NEWS_STATS_TTL_S:
1694
+ return cached[1]
1695
+
1696
+ with SessionLocal() as session:
1697
+ now = datetime.now(timezone.utc)
1698
+ cutoff = now - timedelta(hours=since_hours)
1699
+
1700
+ rows = (
1701
+ session.query(NewsRaw, NewsProcessed, NewsSentimentV2)
1702
+ .join(NewsProcessed, NewsProcessed.raw_id == NewsRaw.id)
1703
+ .outerjoin(
1704
+ NewsSentimentV2,
1705
+ NewsSentimentV2.news_processed_id == NewsProcessed.id,
1706
+ )
1707
+ .filter(NewsRaw.published_at >= cutoff)
1708
+ .all()
1709
+ )
1710
+
1711
+ label_dist: dict[str, int] = {"BULLISH": 0, "BEARISH": 0, "NEUTRAL": 0}
1712
+ event_dist: dict[str, int] = {}
1713
+ channel_dist: dict[str, int] = {}
1714
+ publisher_acc: dict[str, dict[str, float]] = {}
1715
+ score_sum = 0.0
1716
+ conf_sum = 0.0
1717
+ rel_sum = 0.0
1718
+ scored_count = 0
1719
+ total = len(rows)
1720
+
1721
+ for raw, _processed, sent in rows:
1722
+ ch = str(raw.source or "unknown")
1723
+ channel_dist[ch] = channel_dist.get(ch, 0) + 1
1724
+ pub = _extract_publisher(raw.raw_payload)
1725
+ if pub:
1726
+ acc = publisher_acc.setdefault(pub, {"count": 0, "score_sum": 0.0})
1727
+ acc["count"] += 1
1728
+ if sent is not None and sent.final_score is not None:
1729
+ acc["score_sum"] += float(sent.final_score)
1730
+ if sent is None:
1731
+ continue
1732
+ scored_count += 1
1733
+ if sent.label in label_dist:
1734
+ label_dist[sent.label] += 1
1735
+ else:
1736
+ label_dist[sent.label] = label_dist.get(sent.label, 0) + 1
1737
+ etype = sent.event_type or "unknown"
1738
+ event_dist[etype] = event_dist.get(etype, 0) + 1
1739
+ if sent.final_score is not None:
1740
+ score_sum += float(sent.final_score)
1741
+ if sent.confidence_calibrated is not None:
1742
+ conf_sum += float(sent.confidence_calibrated)
1743
+ if sent.relevance_score is not None:
1744
+ rel_sum += float(sent.relevance_score)
1745
+
1746
+ top_publishers = sorted(
1747
+ (
1748
+ {
1749
+ "publisher": name,
1750
+ "count": int(data["count"]),
1751
+ "avg_final_score": (
1752
+ round(float(data["score_sum"]) / float(data["count"]), 4)
1753
+ if data["count"] > 0
1754
+ else 0.0
1755
+ ),
1756
+ }
1757
+ for name, data in publisher_acc.items()
1758
+ ),
1759
+ key=lambda item: item["count"],
1760
+ reverse=True,
1761
+ )[:5]
1762
+
1763
+ response = NewsStatsResponse(
1764
+ window_hours=since_hours,
1765
+ total_articles=total,
1766
+ scored_articles=scored_count,
1767
+ label_distribution=label_dist,
1768
+ event_type_distribution=event_dist,
1769
+ channel_distribution=channel_dist,
1770
+ top_publishers=top_publishers,
1771
+ avg_final_score=(score_sum / scored_count) if scored_count else None,
1772
+ avg_confidence=(conf_sum / scored_count) if scored_count else None,
1773
+ avg_relevance=(rel_sum / scored_count) if scored_count else None,
1774
+ generated_at=now.isoformat(),
1775
+ )
1776
+
1777
+ payload = response.model_dump()
1778
+ _news_stats_cache[since_hours] = (now_ts, payload)
1779
+ return payload
1780
+
1781
+
1782
+ @app.get(
1783
+ "/api/news/{processed_id}",
1784
+ response_model=NewsItem,
1785
+ summary="Full detail for a single news article",
1786
+ )
1787
+ async def get_news_item(processed_id: int):
1788
+ with SessionLocal() as session:
1789
+ row = (
1790
+ session.query(NewsRaw, NewsProcessed, NewsSentimentV2)
1791
+ .join(NewsProcessed, NewsProcessed.raw_id == NewsRaw.id)
1792
+ .outerjoin(
1793
+ NewsSentimentV2,
1794
+ NewsSentimentV2.news_processed_id == NewsProcessed.id,
1795
+ )
1796
+ .filter(NewsProcessed.id == processed_id)
1797
+ .first()
1798
+ )
1799
+ if row is None:
1800
+ raise HTTPException(status_code=404, detail="Article not found")
1801
+ raw, processed, sentiment = row
1802
+ return NewsItem(
1803
+ id=int(processed.id),
1804
+ raw_id=int(raw.id),
1805
+ title=str(raw.title or ""),
1806
+ description=str(raw.description or "") or None,
1807
+ url=str(raw.url or "") or None,
1808
+ channel=str(raw.source or "unknown"),
1809
+ publisher=_extract_publisher(raw.raw_payload),
1810
+ source_feed=str(raw.source_feed or "") or None,
1811
+ published_at=raw.published_at.isoformat() if raw.published_at else None,
1812
+ fetched_at=raw.fetched_at.isoformat() if raw.fetched_at else None,
1813
+ language=str(processed.language or "") or None,
1814
+ sentiment=_build_news_sentiment_block(sentiment),
1815
+ )
1816
+
1817
+
app/openrouter_client.py CHANGED
@@ -31,6 +31,37 @@ class OpenRouterRateLimitError(OpenRouterError):
31
  """Raised when OpenRouter rate limiting persists after retries."""
32
 
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  def _parse_retry_after_seconds(response: httpx.Response) -> Optional[float]:
35
  """Parse Retry-After header in seconds if provided."""
36
  value = response.headers.get("Retry-After")
@@ -167,6 +198,7 @@ async def create_chat_completion(
167
  continue
168
 
169
  if response.status_code == 200:
 
170
  try:
171
  return response.json()
172
  except ValueError as exc:
@@ -176,7 +208,19 @@ async def create_chat_completion(
176
  if retryable and attempt < max_retries:
177
  retry_num = attempt + 1
178
  retry_after = _parse_retry_after_seconds(response)
179
- delay = retry_after if retry_after is not None else float(2 ** retry_num) + random.uniform(0.0, 0.5)
 
 
 
 
 
 
 
 
 
 
 
 
180
  logger.warning(
181
  "OpenRouter retryable error status=%s (attempt %s/%s). Retrying in %.2fs",
182
  response.status_code,
 
31
  """Raised when OpenRouter rate limiting persists after retries."""
32
 
33
 
34
+ def _log_rate_limit_headers(
35
+ response: httpx.Response,
36
+ model: str,
37
+ *,
38
+ level: int = logging.DEBUG,
39
+ ) -> None:
40
+ """Surface OpenRouter/provider rate-limit headers so we can monitor quota."""
41
+ remaining = (
42
+ response.headers.get("X-Ratelimit-Remaining")
43
+ or response.headers.get("x-ratelimit-remaining")
44
+ or response.headers.get("X-RateLimit-Remaining")
45
+ )
46
+ limit = (
47
+ response.headers.get("X-Ratelimit-Limit")
48
+ or response.headers.get("x-ratelimit-limit")
49
+ or response.headers.get("X-RateLimit-Limit")
50
+ )
51
+ reset = (
52
+ response.headers.get("X-Ratelimit-Reset")
53
+ or response.headers.get("x-ratelimit-reset")
54
+ or response.headers.get("X-RateLimit-Reset")
55
+ )
56
+ if remaining is None and limit is None and reset is None:
57
+ return
58
+ logger.log(
59
+ level,
60
+ "OpenRouter quota [model=%s] remaining=%s limit=%s reset=%s",
61
+ model, remaining, limit, reset,
62
+ )
63
+
64
+
65
  def _parse_retry_after_seconds(response: httpx.Response) -> Optional[float]:
66
  """Parse Retry-After header in seconds if provided."""
67
  value = response.headers.get("Retry-After")
 
198
  continue
199
 
200
  if response.status_code == 200:
201
+ _log_rate_limit_headers(response, model)
202
  try:
203
  return response.json()
204
  except ValueError as exc:
 
208
  if retryable and attempt < max_retries:
209
  retry_num = attempt + 1
210
  retry_after = _parse_retry_after_seconds(response)
211
+ if response.status_code == 429:
212
+ # Free-tier daily limits rarely recover in seconds; enforce a
213
+ # floor so we don't burn remaining retries with tight retries.
214
+ base = retry_after if retry_after is not None else 30.0
215
+ delay = max(base, 30.0) + random.uniform(0.0, 5.0)
216
+ delay = min(delay, 300.0)
217
+ _log_rate_limit_headers(response, model, level=logging.WARNING)
218
+ else:
219
+ delay = (
220
+ retry_after
221
+ if retry_after is not None
222
+ else float(2 ** retry_num) + random.uniform(0.0, 0.5)
223
+ )
224
  logger.warning(
225
  "OpenRouter retryable error status=%s (attempt %s/%s). Retrying in %.2fs",
226
  response.status_code,
app/schemas.py CHANGED
@@ -257,3 +257,84 @@ class BacktestReportResponse(BaseModel):
257
  theta_comparison: Optional[Dict[str, Any]] = Field(None, description="Comparison with Theta baseline")
258
  verdict: Optional[str] = Field(None, description="TFT_SUPERIOR, THETA_SUPERIOR, or MIXED")
259
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  theta_comparison: Optional[Dict[str, Any]] = Field(None, description="Comparison with Theta baseline")
258
  verdict: Optional[str] = Field(None, description="TFT_SUPERIOR, THETA_SUPERIOR, or MIXED")
259
 
260
+
261
+ # =============================================================================
262
+ # News Intelligence schemas
263
+ # =============================================================================
264
+
265
+
266
+ class NewsFinbertProbs(BaseModel):
267
+ """FinBERT class probability triplet for a news article."""
268
+ pos: float = Field(..., ge=0, le=1)
269
+ neu: float = Field(..., ge=0, le=1)
270
+ neg: float = Field(..., ge=0, le=1)
271
+
272
+
273
+ class NewsSentimentBlock(BaseModel):
274
+ """Per-article sentiment payload shipped to the frontend feed."""
275
+ label: Optional[str] = Field(None, description="BULLISH | BEARISH | NEUTRAL")
276
+ final_score: Optional[float] = Field(None, description="Ensemble score in [-1, 1]")
277
+ impact_score_llm: Optional[float] = Field(None, description="LLM-only impact in [-1, 1]")
278
+ confidence: Optional[float] = Field(None, description="Calibrated confidence in [0, 1]")
279
+ relevance: Optional[float] = Field(None, description="Relevance to copper market in [0, 1]")
280
+ event_type: Optional[str] = Field(None, description="LLM event type bucket")
281
+ finbert: Optional[NewsFinbertProbs] = Field(None, description="FinBERT probability triplet")
282
+ reasoning: Optional[str] = Field(None, description="Short textual rationale from the LLM")
283
+ scored_at: Optional[str] = Field(None, description="ISO timestamp when the score was written")
284
+
285
+
286
+ class NewsItem(BaseModel):
287
+ """Single article row in the news feed."""
288
+ id: int = Field(..., description="news_processed id (stable frontend key)")
289
+ raw_id: Optional[int] = Field(None, description="news_raw id for debugging")
290
+ title: str
291
+ description: Optional[str] = None
292
+ url: Optional[str] = None
293
+ channel: str = Field(
294
+ ..., description="Ingestion channel (google_news, newsapi, ...)"
295
+ )
296
+ publisher: Optional[str] = Field(
297
+ None, description="Original publisher extracted from raw_payload.source"
298
+ )
299
+ source_feed: Optional[str] = Field(None, description="RSS query / feed identifier")
300
+ published_at: Optional[str] = Field(None, description="ISO timestamp")
301
+ fetched_at: Optional[str] = Field(None, description="ISO timestamp")
302
+ language: Optional[str] = None
303
+ sentiment: Optional[NewsSentimentBlock] = None
304
+
305
+
306
+ class NewsListResponse(BaseModel):
307
+ """Paginated news feed response."""
308
+ items: List[NewsItem] = Field(default_factory=list)
309
+ total: int = Field(..., description="Total rows matching filters (for pagination)")
310
+ limit: int = Field(...)
311
+ offset: int = Field(...)
312
+ has_more: bool = Field(...)
313
+ generated_at: str = Field(..., description="ISO timestamp the response was built")
314
+ filters: Dict[str, Any] = Field(
315
+ default_factory=dict,
316
+ description="Echo of the filter args applied server-side",
317
+ )
318
+
319
+
320
+ class NewsStatsResponse(BaseModel):
321
+ """Aggregate stats for the news intelligence sidebar header."""
322
+ window_hours: int = Field(..., description="Rolling window used for aggregation")
323
+ total_articles: int = Field(..., ge=0)
324
+ scored_articles: int = Field(..., ge=0)
325
+ label_distribution: Dict[str, int] = Field(
326
+ default_factory=dict, description="BULLISH/BEARISH/NEUTRAL counts"
327
+ )
328
+ event_type_distribution: Dict[str, int] = Field(default_factory=dict)
329
+ channel_distribution: Dict[str, int] = Field(
330
+ default_factory=dict, description="google_news / newsapi counts"
331
+ )
332
+ top_publishers: List[Dict[str, Any]] = Field(
333
+ default_factory=list,
334
+ description="[{publisher, count, avg_final_score}]",
335
+ )
336
+ avg_final_score: Optional[float] = None
337
+ avg_confidence: Optional[float] = None
338
+ avg_relevance: Optional[float] = None
339
+ generated_at: str = Field(...)
340
+
app/settings.py CHANGED
@@ -81,22 +81,26 @@ class Settings(BaseSettings):
81
  # OpenRouter AI Commentary
82
  openrouter_api_key: Optional[str] = None
83
  # Deprecated - kept for backward compatibility
84
- openrouter_model: str = "arcee-ai/trinity-large-preview:free"
85
  # Scoring models:
86
  # fast β†’ stepfun/step-3.5-flash:free (196B MoE, 256K ctx, system prompt + JSON OK)
87
  # reliable β†’ mistralai/mistral-small-3.1-24b-instruct:free (128K ctx, 24B, reliable JSON)
88
  # commentary β†’ same as fast for balanced quality/speed
89
  # NOTE: google/gemma-3-4b-it:free fails on Google AI Studio (system prompt blocked).
90
  # google/gemma-3n-e4b-it:free (nano) also blocks system prompts β€” do NOT use.
91
- openrouter_model_scoring: str = "stepfun/step-3.5-flash:free"
92
  openrouter_model_scoring_fast: Optional[str] = None
93
- openrouter_model_scoring_reliable: Optional[str] = "mistralai/mistral-small-3.1-24b-instruct:free"
94
- openrouter_model_commentary: str = "stepfun/step-3.5-flash:free"
95
  openrouter_rpm: int = 18
96
  openrouter_max_retries: int = 3
97
- # Free tier: 50 req/day. At 12 articles/chunk, 100 articles = ~9 chunks = ~9-18 req.
98
- # Keep well under the daily limit to avoid rate-limit cascades mid-run.
99
- max_llm_articles_per_run: int = 100
 
 
 
 
100
  openrouter_fallback_models: Optional[str] = None
101
  tokenizers_parallelism: str = "false"
102
 
@@ -114,7 +118,7 @@ class Settings(BaseSettings):
114
 
115
  # LLM Sentiment Analysis
116
  # Deprecated - kept for backward compatibility
117
- llm_sentiment_model: str = "arcee-ai/trinity-large-preview:free"
118
 
119
  # Pipeline trigger authentication
120
  pipeline_trigger_secret: Optional[str] = None
 
81
  # OpenRouter AI Commentary
82
  openrouter_api_key: Optional[str] = None
83
  # Deprecated - kept for backward compatibility
84
+ openrouter_model: str = "minimax/minimax-m2.5:free"
85
  # Scoring models:
86
  # fast β†’ stepfun/step-3.5-flash:free (196B MoE, 256K ctx, system prompt + JSON OK)
87
  # reliable β†’ mistralai/mistral-small-3.1-24b-instruct:free (128K ctx, 24B, reliable JSON)
88
  # commentary β†’ same as fast for balanced quality/speed
89
  # NOTE: google/gemma-3-4b-it:free fails on Google AI Studio (system prompt blocked).
90
  # google/gemma-3n-e4b-it:free (nano) also blocks system prompts β€” do NOT use.
91
+ openrouter_model_scoring: str = "minimax/minimax-m2.5:free"
92
  openrouter_model_scoring_fast: Optional[str] = None
93
+ openrouter_model_scoring_reliable: Optional[str] = "minimax/minimax-m2.5:free"
94
+ openrouter_model_commentary: str = "minimax/minimax-m2.5:free"
95
  openrouter_rpm: int = 18
96
  openrouter_max_retries: int = 3
97
+ # Free tier: ~50 req/day per model. At chunk_size=12 a run of 60 articles
98
+ # costs ~5 chunks (=5–10 requests incl. escalation) which leaves headroom
99
+ # for multiple runs per day before hitting the ceiling. Raise cautiously.
100
+ max_llm_articles_per_run: int = 60
101
+ # Comma-separated list of additional OpenRouter model slugs used by the
102
+ # client as transport-level fallbacks when the primary model 429s/5xx's.
103
+ # Example: "google/gemini-flash-1.5:free,meta-llama/llama-3.1-8b-instruct:free"
104
  openrouter_fallback_models: Optional[str] = None
105
  tokenizers_parallelism: str = "false"
106
 
 
118
 
119
  # LLM Sentiment Analysis
120
  # Deprecated - kept for backward compatibility
121
+ llm_sentiment_model: str = "minimax/minimax-m2.5:free"
122
 
123
  # Pipeline trigger authentication
124
  pipeline_trigger_secret: Optional[str] = None