Spaces:
Running
Running
Sync from GitHub (tests passed)
Browse files- app/ai_engine.py +78 -19
- app/main.py +347 -0
- app/openrouter_client.py +45 -1
- app/schemas.py +81 -0
- app/settings.py +12 -8
app/ai_engine.py
CHANGED
|
@@ -138,9 +138,13 @@ Label mapping:
|
|
| 138 |
- impact_score <= -0.15 => BEARISH
|
| 139 |
- otherwise => NEUTRAL
|
| 140 |
"""
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
SCORING_V2_VERSION = "commodity_v2"
|
| 145 |
|
| 146 |
|
|
@@ -1068,9 +1072,10 @@ async def _score_subset_with_model_v2(
|
|
| 1068 |
"fallback_models": settings.openrouter_fallback_models_list,
|
| 1069 |
"referer": "https://copper-mind.vercel.app",
|
| 1070 |
"title": "CopperMind Sentiment Analysis V2",
|
| 1071 |
-
"response_format": LLM_SCORING_RESPONSE_FORMAT_V2,
|
| 1072 |
"extra_payload": {"reasoning": {"exclude": True}},
|
| 1073 |
}
|
|
|
|
|
|
|
| 1074 |
return await create_chat_completion(**request_kwargs)
|
| 1075 |
|
| 1076 |
parse_fail_count = 0
|
|
@@ -1187,13 +1192,14 @@ async def score_batch_with_llm_v2(
|
|
| 1187 |
escalation_ids = sorted(set(fast_failed).union(conflict_ids))
|
| 1188 |
escalation_count = len(escalation_ids)
|
| 1189 |
|
|
|
|
| 1190 |
if escalation_ids and not fast_rate_limited:
|
| 1191 |
reliable_subset = [
|
| 1192 |
article_by_id[article_id]
|
| 1193 |
for article_id in escalation_ids
|
| 1194 |
if article_id in article_by_id
|
| 1195 |
]
|
| 1196 |
-
reliable_valid, _reliable_failed, parse_fail_reliable,
|
| 1197 |
settings=settings,
|
| 1198 |
model_name=reliable_model,
|
| 1199 |
articles=reliable_subset,
|
|
@@ -1219,6 +1225,10 @@ async def score_batch_with_llm_v2(
|
|
| 1219 |
"fallback_count": fallback_count,
|
| 1220 |
"model_fast": fast_model,
|
| 1221 |
"model_reliable": reliable_model,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1222 |
}
|
| 1223 |
|
| 1224 |
|
|
@@ -1347,6 +1357,7 @@ def score_unscored_processed_articles(
|
|
| 1347 |
NewsProcessed.id.label("processed_id"),
|
| 1348 |
NewsProcessed.canonical_title,
|
| 1349 |
NewsProcessed.cleaned_text,
|
|
|
|
| 1350 |
NewsRaw.title.label("raw_title"),
|
| 1351 |
NewsRaw.description.label("raw_description"),
|
| 1352 |
NewsRaw.published_at,
|
|
@@ -1387,22 +1398,37 @@ def score_unscored_processed_articles(
|
|
| 1387 |
fast_model = settings.resolved_scoring_fast_model
|
| 1388 |
reliable_model = settings.resolved_scoring_reliable_model
|
| 1389 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1390 |
for chunk_idx in range(0, len(rows), chunk_size):
|
| 1391 |
chunk_rows = rows[chunk_idx:chunk_idx + chunk_size]
|
| 1392 |
chunk_items: list[dict] = []
|
|
|
|
| 1393 |
for row in chunk_rows:
|
| 1394 |
title = str(row.raw_title or row.canonical_title or "")[:500]
|
| 1395 |
description = str(row.raw_description or "")[:1000]
|
| 1396 |
text = str(row.cleaned_text or f"{title} {description}")[:2000]
|
|
|
|
|
|
|
| 1397 |
chunk_items.append(
|
| 1398 |
{
|
| 1399 |
-
"id":
|
| 1400 |
"title": title,
|
| 1401 |
"description": description,
|
| 1402 |
"text": text,
|
| 1403 |
"published_at": row.published_at,
|
|
|
|
| 1404 |
}
|
| 1405 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1406 |
|
| 1407 |
finbert_by_id = score_batch_with_finbert_v2(chunk_items)
|
| 1408 |
finbert_used += len(finbert_by_id)
|
|
@@ -1410,14 +1436,29 @@ def score_unscored_processed_articles(
|
|
| 1410 |
llm_results_by_id: dict[int, dict] = {}
|
| 1411 |
llm_candidates: list[dict] = []
|
| 1412 |
|
| 1413 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1414 |
today_utc = datetime.now(timezone.utc).date().isoformat()
|
| 1415 |
-
|
| 1416 |
-
|
| 1417 |
-
|
| 1418 |
-
|
| 1419 |
-
|
| 1420 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1421 |
llm_budget_remaining -= llm_take
|
| 1422 |
|
| 1423 |
if llm_candidates:
|
|
@@ -1434,13 +1475,31 @@ def score_unscored_processed_articles(
|
|
| 1434 |
fast_model = str(llm_bundle.get("model_fast", fast_model))
|
| 1435 |
reliable_model = str(llm_bundle.get("model_reliable", reliable_model))
|
| 1436 |
|
| 1437 |
-
#
|
| 1438 |
-
#
|
| 1439 |
-
|
| 1440 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1441 |
logger.warning(
|
| 1442 |
-
"V2
|
| 1443 |
-
|
|
|
|
| 1444 |
)
|
| 1445 |
|
| 1446 |
except Exception as exc:
|
|
|
|
| 138 |
- impact_score <= -0.15 => BEARISH
|
| 139 |
- otherwise => NEUTRAL
|
| 140 |
"""
|
| 141 |
+
# NOTE: Intentionally omit OpenAI-style `response_format` for V2 scoring.
|
| 142 |
+
# Some free-tier OpenRouter providers (stepfun, mistral free variants) either
|
| 143 |
+
# reject `{"type": "json_object"}` or wrap the expected JSON array inside an
|
| 144 |
+
# object/unexpected structure when that hint is set. The prompt explicitly asks
|
| 145 |
+
# for a JSON array, and `_clean_json_content` already handles markdown fences,
|
| 146 |
+
# wrapped objects, and preambles, so we rely on prompt+post-processing instead.
|
| 147 |
+
LLM_SCORING_RESPONSE_FORMAT_V2: dict[str, Any] | None = None
|
| 148 |
SCORING_V2_VERSION = "commodity_v2"
|
| 149 |
|
| 150 |
|
|
|
|
| 1072 |
"fallback_models": settings.openrouter_fallback_models_list,
|
| 1073 |
"referer": "https://copper-mind.vercel.app",
|
| 1074 |
"title": "CopperMind Sentiment Analysis V2",
|
|
|
|
| 1075 |
"extra_payload": {"reasoning": {"exclude": True}},
|
| 1076 |
}
|
| 1077 |
+
if LLM_SCORING_RESPONSE_FORMAT_V2 is not None:
|
| 1078 |
+
request_kwargs["response_format"] = LLM_SCORING_RESPONSE_FORMAT_V2
|
| 1079 |
return await create_chat_completion(**request_kwargs)
|
| 1080 |
|
| 1081 |
parse_fail_count = 0
|
|
|
|
| 1192 |
escalation_ids = sorted(set(fast_failed).union(conflict_ids))
|
| 1193 |
escalation_count = len(escalation_ids)
|
| 1194 |
|
| 1195 |
+
reliable_rate_limited = False
|
| 1196 |
if escalation_ids and not fast_rate_limited:
|
| 1197 |
reliable_subset = [
|
| 1198 |
article_by_id[article_id]
|
| 1199 |
for article_id in escalation_ids
|
| 1200 |
if article_id in article_by_id
|
| 1201 |
]
|
| 1202 |
+
reliable_valid, _reliable_failed, parse_fail_reliable, reliable_rate_limited = await _score_subset_with_model_v2(
|
| 1203 |
settings=settings,
|
| 1204 |
model_name=reliable_model,
|
| 1205 |
articles=reliable_subset,
|
|
|
|
| 1225 |
"fallback_count": fallback_count,
|
| 1226 |
"model_fast": fast_model,
|
| 1227 |
"model_reliable": reliable_model,
|
| 1228 |
+
"rate_limited_fast": bool(fast_rate_limited),
|
| 1229 |
+
"rate_limited_reliable": bool(reliable_rate_limited),
|
| 1230 |
+
# Backward-compat: true only when BOTH models hit their daily ceiling.
|
| 1231 |
+
"rate_limited": bool(fast_rate_limited and reliable_rate_limited),
|
| 1232 |
}
|
| 1233 |
|
| 1234 |
|
|
|
|
| 1357 |
NewsProcessed.id.label("processed_id"),
|
| 1358 |
NewsProcessed.canonical_title,
|
| 1359 |
NewsProcessed.cleaned_text,
|
| 1360 |
+
NewsProcessed.language.label("language"),
|
| 1361 |
NewsRaw.title.label("raw_title"),
|
| 1362 |
NewsRaw.description.label("raw_description"),
|
| 1363 |
NewsRaw.published_at,
|
|
|
|
| 1398 |
fast_model = settings.resolved_scoring_fast_model
|
| 1399 |
reliable_model = settings.resolved_scoring_reliable_model
|
| 1400 |
|
| 1401 |
+
# Articles that are non-English or too short do not benefit from LLM
|
| 1402 |
+
# classification (prompt is English, quotas are scarce) β we route them
|
| 1403 |
+
# straight to FinBERT+rule fallback. These thresholds are intentionally
|
| 1404 |
+
# conservative so we only skip when we're confident the LLM call would
|
| 1405 |
+
# be wasted.
|
| 1406 |
+
MIN_TEXT_CHARS_FOR_LLM = 80
|
| 1407 |
+
|
| 1408 |
for chunk_idx in range(0, len(rows), chunk_size):
|
| 1409 |
chunk_rows = rows[chunk_idx:chunk_idx + chunk_size]
|
| 1410 |
chunk_items: list[dict] = []
|
| 1411 |
+
llm_eligible_ids: set[int] = set()
|
| 1412 |
for row in chunk_rows:
|
| 1413 |
title = str(row.raw_title or row.canonical_title or "")[:500]
|
| 1414 |
description = str(row.raw_description or "")[:1000]
|
| 1415 |
text = str(row.cleaned_text or f"{title} {description}")[:2000]
|
| 1416 |
+
language = (getattr(row, "language", None) or "").strip().lower()
|
| 1417 |
+
processed_id = int(row.processed_id)
|
| 1418 |
chunk_items.append(
|
| 1419 |
{
|
| 1420 |
+
"id": processed_id,
|
| 1421 |
"title": title,
|
| 1422 |
"description": description,
|
| 1423 |
"text": text,
|
| 1424 |
"published_at": row.published_at,
|
| 1425 |
+
"language": language or None,
|
| 1426 |
}
|
| 1427 |
)
|
| 1428 |
+
is_english = (not language) or language.startswith("en")
|
| 1429 |
+
long_enough = len(text) >= MIN_TEXT_CHARS_FOR_LLM
|
| 1430 |
+
if is_english and long_enough:
|
| 1431 |
+
llm_eligible_ids.add(processed_id)
|
| 1432 |
|
| 1433 |
finbert_by_id = score_batch_with_finbert_v2(chunk_items)
|
| 1434 |
finbert_used += len(finbert_by_id)
|
|
|
|
| 1436 |
llm_results_by_id: dict[int, dict] = {}
|
| 1437 |
llm_candidates: list[dict] = []
|
| 1438 |
|
| 1439 |
+
# Per-model rate-limit tracking (keyed by UTC date). A specific model
|
| 1440 |
+
# is considered exhausted for today if its entry equals today's date.
|
| 1441 |
+
# LLM scoring is skipped for the chunk only when BOTH fast and reliable
|
| 1442 |
+
# models have been flagged today β otherwise we still attempt (fallback
|
| 1443 |
+
# chain inside `score_batch_with_llm_v2` handles partial exhaustion).
|
| 1444 |
today_utc = datetime.now(timezone.utc).date().isoformat()
|
| 1445 |
+
rate_limited_by_model: dict[str, str] = getattr(
|
| 1446 |
+
score_unscored_processed_articles, "_rate_limited_by_model", {}
|
| 1447 |
+
) or {}
|
| 1448 |
+
fast_exhausted = rate_limited_by_model.get(fast_model) == today_utc
|
| 1449 |
+
reliable_exhausted = rate_limited_by_model.get(reliable_model) == today_utc
|
| 1450 |
+
both_exhausted = fast_exhausted and reliable_exhausted
|
| 1451 |
+
|
| 1452 |
+
if settings.openrouter_api_key and llm_budget_remaining > 0 and not both_exhausted:
|
| 1453 |
+
eligible_items = [item for item in chunk_items if item["id"] in llm_eligible_ids]
|
| 1454 |
+
skipped = len(chunk_items) - len(eligible_items)
|
| 1455 |
+
if skipped > 0:
|
| 1456 |
+
logger.info(
|
| 1457 |
+
"V2 skipping %d non-English/short-text articles; routing directly to FinBERT+rule fallback",
|
| 1458 |
+
skipped,
|
| 1459 |
+
)
|
| 1460 |
+
llm_take = min(len(eligible_items), llm_budget_remaining)
|
| 1461 |
+
llm_candidates = eligible_items[:llm_take]
|
| 1462 |
llm_budget_remaining -= llm_take
|
| 1463 |
|
| 1464 |
if llm_candidates:
|
|
|
|
| 1475 |
fast_model = str(llm_bundle.get("model_fast", fast_model))
|
| 1476 |
reliable_model = str(llm_bundle.get("model_reliable", reliable_model))
|
| 1477 |
|
| 1478 |
+
# Record per-model rate-limit state so individual model exhaustion
|
| 1479 |
+
# doesn't block the other one. Only emits the "disabled for day"
|
| 1480 |
+
# warning when both are flagged.
|
| 1481 |
+
updated = False
|
| 1482 |
+
if llm_bundle.get("rate_limited_fast"):
|
| 1483 |
+
rate_limited_by_model[fast_model] = today_utc
|
| 1484 |
+
updated = True
|
| 1485 |
+
logger.warning(
|
| 1486 |
+
"V2 fast model %s hit daily rate limit; will be skipped for rest of UTC day %s.",
|
| 1487 |
+
fast_model, today_utc,
|
| 1488 |
+
)
|
| 1489 |
+
if llm_bundle.get("rate_limited_reliable"):
|
| 1490 |
+
rate_limited_by_model[reliable_model] = today_utc
|
| 1491 |
+
updated = True
|
| 1492 |
+
logger.warning(
|
| 1493 |
+
"V2 reliable model %s hit daily rate limit; will be skipped for rest of UTC day %s.",
|
| 1494 |
+
reliable_model, today_utc,
|
| 1495 |
+
)
|
| 1496 |
+
if updated:
|
| 1497 |
+
score_unscored_processed_articles._rate_limited_by_model = rate_limited_by_model
|
| 1498 |
+
if llm_bundle.get("rate_limited"):
|
| 1499 |
logger.warning(
|
| 1500 |
+
"V2 both models (%s, %s) rate-limited; LLM scoring paused until UTC %s.",
|
| 1501 |
+
fast_model, reliable_model,
|
| 1502 |
+
(datetime.now(timezone.utc).date() + timedelta(days=1)).isoformat(),
|
| 1503 |
)
|
| 1504 |
|
| 1505 |
except Exception as exc:
|
app/main.py
CHANGED
|
@@ -36,6 +36,11 @@ from app.schemas import (
|
|
| 36 |
ConsensusSignal,
|
| 37 |
TFTModelSummaryResponse,
|
| 38 |
BacktestReportResponse,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
)
|
| 40 |
|
| 41 |
# Configure logging
|
|
@@ -1468,3 +1473,345 @@ async def get_sentiment_summary(
|
|
| 1468 |
"generated_at": now.isoformat(),
|
| 1469 |
}
|
| 1470 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
ConsensusSignal,
|
| 37 |
TFTModelSummaryResponse,
|
| 38 |
BacktestReportResponse,
|
| 39 |
+
NewsItem,
|
| 40 |
+
NewsListResponse,
|
| 41 |
+
NewsStatsResponse,
|
| 42 |
+
NewsFinbertProbs,
|
| 43 |
+
NewsSentimentBlock,
|
| 44 |
)
|
| 45 |
|
| 46 |
# Configure logging
|
|
|
|
| 1473 |
"generated_at": now.isoformat(),
|
| 1474 |
}
|
| 1475 |
|
| 1476 |
+
|
| 1477 |
+
# =============================================================================
|
| 1478 |
+
# News intelligence endpoints
|
| 1479 |
+
# =============================================================================
|
| 1480 |
+
#
|
| 1481 |
+
# Serves the Overview right-sidebar news feed. Reads from the news_raw/
|
| 1482 |
+
# news_processed/news_sentiments_v2 pipeline the daily worker already fills β
|
| 1483 |
+
# no LLM is invoked on the hot path.
|
| 1484 |
+
#
|
| 1485 |
+
# Source taxonomy:
|
| 1486 |
+
# * channel = ingestion channel (NewsRaw.source): "google_news" | "newsapi"
|
| 1487 |
+
# * publisher = original publisher (raw_payload.source): Reuters, Mining.comβ¦
|
| 1488 |
+
# =============================================================================
|
| 1489 |
+
|
| 1490 |
+
_news_list_cache: dict[tuple, tuple[float, dict]] = {}
|
| 1491 |
+
_news_stats_cache: dict[int, tuple[float, dict]] = {}
|
| 1492 |
+
_NEWS_LIST_TTL_S = 60.0
|
| 1493 |
+
_NEWS_STATS_TTL_S = 120.0
|
| 1494 |
+
_VALID_LABELS = {"BULLISH", "BEARISH", "NEUTRAL"}
|
| 1495 |
+
|
| 1496 |
+
|
| 1497 |
+
def _extract_publisher(raw_payload) -> Optional[str]:
|
| 1498 |
+
"""Pull the original publisher name out of a NewsRaw.raw_payload blob."""
|
| 1499 |
+
if not raw_payload:
|
| 1500 |
+
return None
|
| 1501 |
+
if isinstance(raw_payload, str):
|
| 1502 |
+
try:
|
| 1503 |
+
import json as _json
|
| 1504 |
+
raw_payload = _json.loads(raw_payload)
|
| 1505 |
+
except (ValueError, TypeError):
|
| 1506 |
+
return None
|
| 1507 |
+
if not isinstance(raw_payload, dict):
|
| 1508 |
+
return None
|
| 1509 |
+
src = raw_payload.get("source")
|
| 1510 |
+
if isinstance(src, dict):
|
| 1511 |
+
name = src.get("name") or src.get("title")
|
| 1512 |
+
return str(name) if name else None
|
| 1513 |
+
if isinstance(src, str) and src.strip():
|
| 1514 |
+
return src.strip()
|
| 1515 |
+
name = raw_payload.get("publisher") or raw_payload.get("author")
|
| 1516 |
+
return str(name) if name else None
|
| 1517 |
+
|
| 1518 |
+
|
| 1519 |
+
def _build_news_sentiment_block(sent: Optional[NewsSentimentV2]) -> Optional[NewsSentimentBlock]:
|
| 1520 |
+
if sent is None:
|
| 1521 |
+
return None
|
| 1522 |
+
return NewsSentimentBlock(
|
| 1523 |
+
label=sent.label,
|
| 1524 |
+
final_score=float(sent.final_score) if sent.final_score is not None else None,
|
| 1525 |
+
impact_score_llm=float(sent.impact_score_llm) if sent.impact_score_llm is not None else None,
|
| 1526 |
+
confidence=float(sent.confidence_calibrated) if sent.confidence_calibrated is not None else None,
|
| 1527 |
+
relevance=float(sent.relevance_score) if sent.relevance_score is not None else None,
|
| 1528 |
+
event_type=sent.event_type,
|
| 1529 |
+
finbert=NewsFinbertProbs(
|
| 1530 |
+
pos=float(sent.finbert_pos or 0.0),
|
| 1531 |
+
neu=float(sent.finbert_neu or 0.0),
|
| 1532 |
+
neg=float(sent.finbert_neg or 0.0),
|
| 1533 |
+
),
|
| 1534 |
+
reasoning=_extract_reasoning_text(sent.reasoning_json),
|
| 1535 |
+
scored_at=sent.scored_at.isoformat() if sent.scored_at else None,
|
| 1536 |
+
)
|
| 1537 |
+
|
| 1538 |
+
|
| 1539 |
+
def _extract_reasoning_text(reasoning_json: Optional[str]) -> Optional[str]:
|
| 1540 |
+
"""Pull a short human-readable rationale out of the cached JSON blob."""
|
| 1541 |
+
if not reasoning_json:
|
| 1542 |
+
return None
|
| 1543 |
+
try:
|
| 1544 |
+
import json as _json
|
| 1545 |
+
blob = _json.loads(reasoning_json)
|
| 1546 |
+
except (ValueError, TypeError):
|
| 1547 |
+
return str(reasoning_json)[:500] if reasoning_json else None
|
| 1548 |
+
if isinstance(blob, dict):
|
| 1549 |
+
for key in ("reasoning", "rationale", "summary", "explanation"):
|
| 1550 |
+
val = blob.get(key)
|
| 1551 |
+
if isinstance(val, str) and val.strip():
|
| 1552 |
+
return val.strip()[:500]
|
| 1553 |
+
return None
|
| 1554 |
+
if isinstance(blob, str):
|
| 1555 |
+
return blob[:500]
|
| 1556 |
+
return None
|
| 1557 |
+
|
| 1558 |
+
|
| 1559 |
+
@app.get(
|
| 1560 |
+
"/api/news",
|
| 1561 |
+
response_model=NewsListResponse,
|
| 1562 |
+
summary="Paginated news feed with sentiment annotations",
|
| 1563 |
+
)
|
| 1564 |
+
async def get_news_feed(
|
| 1565 |
+
limit: int = Query(default=20, ge=1, le=50),
|
| 1566 |
+
offset: int = Query(default=0, ge=0),
|
| 1567 |
+
since_hours: int = Query(default=48, ge=1, le=168),
|
| 1568 |
+
label: str = Query(default="all"),
|
| 1569 |
+
event_type: str = Query(default="all"),
|
| 1570 |
+
min_relevance: float = Query(default=0.0, ge=0.0, le=1.0),
|
| 1571 |
+
channel: str = Query(default="all"),
|
| 1572 |
+
publisher: Optional[str] = Query(default=None, max_length=200),
|
| 1573 |
+
search: Optional[str] = Query(default=None, max_length=200),
|
| 1574 |
+
):
|
| 1575 |
+
from sqlalchemy import desc as _desc
|
| 1576 |
+
|
| 1577 |
+
filters_echo = {
|
| 1578 |
+
"limit": limit,
|
| 1579 |
+
"offset": offset,
|
| 1580 |
+
"since_hours": since_hours,
|
| 1581 |
+
"label": label,
|
| 1582 |
+
"event_type": event_type,
|
| 1583 |
+
"min_relevance": min_relevance,
|
| 1584 |
+
"channel": channel,
|
| 1585 |
+
"publisher": publisher,
|
| 1586 |
+
"search": search,
|
| 1587 |
+
}
|
| 1588 |
+
cache_key = tuple(sorted(filters_echo.items()))
|
| 1589 |
+
now_ts = datetime.now(timezone.utc).timestamp()
|
| 1590 |
+
cached = _news_list_cache.get(cache_key)
|
| 1591 |
+
if cached and (now_ts - cached[0]) < _NEWS_LIST_TTL_S:
|
| 1592 |
+
return cached[1]
|
| 1593 |
+
|
| 1594 |
+
label_upper = label.upper()
|
| 1595 |
+
if label_upper != "ALL" and label_upper not in _VALID_LABELS:
|
| 1596 |
+
raise HTTPException(status_code=400, detail=f"Invalid label '{label}'")
|
| 1597 |
+
|
| 1598 |
+
with SessionLocal() as session:
|
| 1599 |
+
now = datetime.now(timezone.utc)
|
| 1600 |
+
cutoff = now - timedelta(hours=since_hours)
|
| 1601 |
+
|
| 1602 |
+
q = (
|
| 1603 |
+
session.query(NewsRaw, NewsProcessed, NewsSentimentV2)
|
| 1604 |
+
.join(NewsProcessed, NewsProcessed.raw_id == NewsRaw.id)
|
| 1605 |
+
.outerjoin(
|
| 1606 |
+
NewsSentimentV2,
|
| 1607 |
+
NewsSentimentV2.news_processed_id == NewsProcessed.id,
|
| 1608 |
+
)
|
| 1609 |
+
.filter(NewsRaw.published_at >= cutoff)
|
| 1610 |
+
)
|
| 1611 |
+
|
| 1612 |
+
if channel.lower() != "all":
|
| 1613 |
+
q = q.filter(NewsRaw.source == channel)
|
| 1614 |
+
if event_type.lower() != "all":
|
| 1615 |
+
q = q.filter(NewsSentimentV2.event_type == event_type)
|
| 1616 |
+
if label_upper != "ALL":
|
| 1617 |
+
q = q.filter(NewsSentimentV2.label == label_upper)
|
| 1618 |
+
if min_relevance > 0:
|
| 1619 |
+
q = q.filter(NewsSentimentV2.relevance_score >= min_relevance)
|
| 1620 |
+
if search:
|
| 1621 |
+
q = q.filter(NewsRaw.title.ilike(f"%{search}%"))
|
| 1622 |
+
|
| 1623 |
+
q = q.order_by(_desc(NewsRaw.published_at))
|
| 1624 |
+
|
| 1625 |
+
publisher_needle = publisher.strip().lower() if publisher and publisher.strip() else None
|
| 1626 |
+
|
| 1627 |
+
if publisher_needle:
|
| 1628 |
+
# Publisher filter requires JSON extraction; do it in Python to
|
| 1629 |
+
# remain backend-agnostic (sqlite/postgres) and keep the endpoint
|
| 1630 |
+
# simple. Scope is bounded by the time window filter above.
|
| 1631 |
+
rows = q.limit(500).all()
|
| 1632 |
+
filtered = [
|
| 1633 |
+
triple for triple in rows
|
| 1634 |
+
if (
|
| 1635 |
+
_extract_publisher(triple[0].raw_payload) or ""
|
| 1636 |
+
).lower().find(publisher_needle) >= 0
|
| 1637 |
+
]
|
| 1638 |
+
total = len(filtered)
|
| 1639 |
+
page_rows = filtered[offset: offset + limit]
|
| 1640 |
+
else:
|
| 1641 |
+
total = q.count()
|
| 1642 |
+
page_rows = q.offset(offset).limit(limit).all()
|
| 1643 |
+
|
| 1644 |
+
items: list[NewsItem] = []
|
| 1645 |
+
for raw, processed, sentiment in page_rows:
|
| 1646 |
+
items.append(
|
| 1647 |
+
NewsItem(
|
| 1648 |
+
id=int(processed.id),
|
| 1649 |
+
raw_id=int(raw.id),
|
| 1650 |
+
title=str(raw.title or ""),
|
| 1651 |
+
description=str(raw.description or "") or None,
|
| 1652 |
+
url=str(raw.url or "") or None,
|
| 1653 |
+
channel=str(raw.source or "unknown"),
|
| 1654 |
+
publisher=_extract_publisher(raw.raw_payload),
|
| 1655 |
+
source_feed=str(raw.source_feed or "") or None,
|
| 1656 |
+
published_at=raw.published_at.isoformat() if raw.published_at else None,
|
| 1657 |
+
fetched_at=raw.fetched_at.isoformat() if raw.fetched_at else None,
|
| 1658 |
+
language=str(processed.language or "") or None,
|
| 1659 |
+
sentiment=_build_news_sentiment_block(sentiment),
|
| 1660 |
+
)
|
| 1661 |
+
)
|
| 1662 |
+
|
| 1663 |
+
response = NewsListResponse(
|
| 1664 |
+
items=items,
|
| 1665 |
+
total=int(total),
|
| 1666 |
+
limit=limit,
|
| 1667 |
+
offset=offset,
|
| 1668 |
+
has_more=(offset + limit) < int(total),
|
| 1669 |
+
generated_at=now.isoformat(),
|
| 1670 |
+
filters=filters_echo,
|
| 1671 |
+
)
|
| 1672 |
+
|
| 1673 |
+
payload = response.model_dump()
|
| 1674 |
+
_news_list_cache[cache_key] = (now_ts, payload)
|
| 1675 |
+
# Trim cache to avoid unbounded growth.
|
| 1676 |
+
if len(_news_list_cache) > 128:
|
| 1677 |
+
oldest = sorted(_news_list_cache.items(), key=lambda kv: kv[1][0])[: len(_news_list_cache) - 128]
|
| 1678 |
+
for k, _ in oldest:
|
| 1679 |
+
_news_list_cache.pop(k, None)
|
| 1680 |
+
return payload
|
| 1681 |
+
|
| 1682 |
+
|
| 1683 |
+
@app.get(
|
| 1684 |
+
"/api/news/stats",
|
| 1685 |
+
response_model=NewsStatsResponse,
|
| 1686 |
+
summary="Aggregate stats for the news sidebar header",
|
| 1687 |
+
)
|
| 1688 |
+
async def get_news_stats(
|
| 1689 |
+
since_hours: int = Query(default=24, ge=1, le=168),
|
| 1690 |
+
):
|
| 1691 |
+
now_ts = datetime.now(timezone.utc).timestamp()
|
| 1692 |
+
cached = _news_stats_cache.get(since_hours)
|
| 1693 |
+
if cached and (now_ts - cached[0]) < _NEWS_STATS_TTL_S:
|
| 1694 |
+
return cached[1]
|
| 1695 |
+
|
| 1696 |
+
with SessionLocal() as session:
|
| 1697 |
+
now = datetime.now(timezone.utc)
|
| 1698 |
+
cutoff = now - timedelta(hours=since_hours)
|
| 1699 |
+
|
| 1700 |
+
rows = (
|
| 1701 |
+
session.query(NewsRaw, NewsProcessed, NewsSentimentV2)
|
| 1702 |
+
.join(NewsProcessed, NewsProcessed.raw_id == NewsRaw.id)
|
| 1703 |
+
.outerjoin(
|
| 1704 |
+
NewsSentimentV2,
|
| 1705 |
+
NewsSentimentV2.news_processed_id == NewsProcessed.id,
|
| 1706 |
+
)
|
| 1707 |
+
.filter(NewsRaw.published_at >= cutoff)
|
| 1708 |
+
.all()
|
| 1709 |
+
)
|
| 1710 |
+
|
| 1711 |
+
label_dist: dict[str, int] = {"BULLISH": 0, "BEARISH": 0, "NEUTRAL": 0}
|
| 1712 |
+
event_dist: dict[str, int] = {}
|
| 1713 |
+
channel_dist: dict[str, int] = {}
|
| 1714 |
+
publisher_acc: dict[str, dict[str, float]] = {}
|
| 1715 |
+
score_sum = 0.0
|
| 1716 |
+
conf_sum = 0.0
|
| 1717 |
+
rel_sum = 0.0
|
| 1718 |
+
scored_count = 0
|
| 1719 |
+
total = len(rows)
|
| 1720 |
+
|
| 1721 |
+
for raw, _processed, sent in rows:
|
| 1722 |
+
ch = str(raw.source or "unknown")
|
| 1723 |
+
channel_dist[ch] = channel_dist.get(ch, 0) + 1
|
| 1724 |
+
pub = _extract_publisher(raw.raw_payload)
|
| 1725 |
+
if pub:
|
| 1726 |
+
acc = publisher_acc.setdefault(pub, {"count": 0, "score_sum": 0.0})
|
| 1727 |
+
acc["count"] += 1
|
| 1728 |
+
if sent is not None and sent.final_score is not None:
|
| 1729 |
+
acc["score_sum"] += float(sent.final_score)
|
| 1730 |
+
if sent is None:
|
| 1731 |
+
continue
|
| 1732 |
+
scored_count += 1
|
| 1733 |
+
if sent.label in label_dist:
|
| 1734 |
+
label_dist[sent.label] += 1
|
| 1735 |
+
else:
|
| 1736 |
+
label_dist[sent.label] = label_dist.get(sent.label, 0) + 1
|
| 1737 |
+
etype = sent.event_type or "unknown"
|
| 1738 |
+
event_dist[etype] = event_dist.get(etype, 0) + 1
|
| 1739 |
+
if sent.final_score is not None:
|
| 1740 |
+
score_sum += float(sent.final_score)
|
| 1741 |
+
if sent.confidence_calibrated is not None:
|
| 1742 |
+
conf_sum += float(sent.confidence_calibrated)
|
| 1743 |
+
if sent.relevance_score is not None:
|
| 1744 |
+
rel_sum += float(sent.relevance_score)
|
| 1745 |
+
|
| 1746 |
+
top_publishers = sorted(
|
| 1747 |
+
(
|
| 1748 |
+
{
|
| 1749 |
+
"publisher": name,
|
| 1750 |
+
"count": int(data["count"]),
|
| 1751 |
+
"avg_final_score": (
|
| 1752 |
+
round(float(data["score_sum"]) / float(data["count"]), 4)
|
| 1753 |
+
if data["count"] > 0
|
| 1754 |
+
else 0.0
|
| 1755 |
+
),
|
| 1756 |
+
}
|
| 1757 |
+
for name, data in publisher_acc.items()
|
| 1758 |
+
),
|
| 1759 |
+
key=lambda item: item["count"],
|
| 1760 |
+
reverse=True,
|
| 1761 |
+
)[:5]
|
| 1762 |
+
|
| 1763 |
+
response = NewsStatsResponse(
|
| 1764 |
+
window_hours=since_hours,
|
| 1765 |
+
total_articles=total,
|
| 1766 |
+
scored_articles=scored_count,
|
| 1767 |
+
label_distribution=label_dist,
|
| 1768 |
+
event_type_distribution=event_dist,
|
| 1769 |
+
channel_distribution=channel_dist,
|
| 1770 |
+
top_publishers=top_publishers,
|
| 1771 |
+
avg_final_score=(score_sum / scored_count) if scored_count else None,
|
| 1772 |
+
avg_confidence=(conf_sum / scored_count) if scored_count else None,
|
| 1773 |
+
avg_relevance=(rel_sum / scored_count) if scored_count else None,
|
| 1774 |
+
generated_at=now.isoformat(),
|
| 1775 |
+
)
|
| 1776 |
+
|
| 1777 |
+
payload = response.model_dump()
|
| 1778 |
+
_news_stats_cache[since_hours] = (now_ts, payload)
|
| 1779 |
+
return payload
|
| 1780 |
+
|
| 1781 |
+
|
| 1782 |
+
@app.get(
|
| 1783 |
+
"/api/news/{processed_id}",
|
| 1784 |
+
response_model=NewsItem,
|
| 1785 |
+
summary="Full detail for a single news article",
|
| 1786 |
+
)
|
| 1787 |
+
async def get_news_item(processed_id: int):
|
| 1788 |
+
with SessionLocal() as session:
|
| 1789 |
+
row = (
|
| 1790 |
+
session.query(NewsRaw, NewsProcessed, NewsSentimentV2)
|
| 1791 |
+
.join(NewsProcessed, NewsProcessed.raw_id == NewsRaw.id)
|
| 1792 |
+
.outerjoin(
|
| 1793 |
+
NewsSentimentV2,
|
| 1794 |
+
NewsSentimentV2.news_processed_id == NewsProcessed.id,
|
| 1795 |
+
)
|
| 1796 |
+
.filter(NewsProcessed.id == processed_id)
|
| 1797 |
+
.first()
|
| 1798 |
+
)
|
| 1799 |
+
if row is None:
|
| 1800 |
+
raise HTTPException(status_code=404, detail="Article not found")
|
| 1801 |
+
raw, processed, sentiment = row
|
| 1802 |
+
return NewsItem(
|
| 1803 |
+
id=int(processed.id),
|
| 1804 |
+
raw_id=int(raw.id),
|
| 1805 |
+
title=str(raw.title or ""),
|
| 1806 |
+
description=str(raw.description or "") or None,
|
| 1807 |
+
url=str(raw.url or "") or None,
|
| 1808 |
+
channel=str(raw.source or "unknown"),
|
| 1809 |
+
publisher=_extract_publisher(raw.raw_payload),
|
| 1810 |
+
source_feed=str(raw.source_feed or "") or None,
|
| 1811 |
+
published_at=raw.published_at.isoformat() if raw.published_at else None,
|
| 1812 |
+
fetched_at=raw.fetched_at.isoformat() if raw.fetched_at else None,
|
| 1813 |
+
language=str(processed.language or "") or None,
|
| 1814 |
+
sentiment=_build_news_sentiment_block(sentiment),
|
| 1815 |
+
)
|
| 1816 |
+
|
| 1817 |
+
|
app/openrouter_client.py
CHANGED
|
@@ -31,6 +31,37 @@ class OpenRouterRateLimitError(OpenRouterError):
|
|
| 31 |
"""Raised when OpenRouter rate limiting persists after retries."""
|
| 32 |
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
def _parse_retry_after_seconds(response: httpx.Response) -> Optional[float]:
|
| 35 |
"""Parse Retry-After header in seconds if provided."""
|
| 36 |
value = response.headers.get("Retry-After")
|
|
@@ -167,6 +198,7 @@ async def create_chat_completion(
|
|
| 167 |
continue
|
| 168 |
|
| 169 |
if response.status_code == 200:
|
|
|
|
| 170 |
try:
|
| 171 |
return response.json()
|
| 172 |
except ValueError as exc:
|
|
@@ -176,7 +208,19 @@ async def create_chat_completion(
|
|
| 176 |
if retryable and attempt < max_retries:
|
| 177 |
retry_num = attempt + 1
|
| 178 |
retry_after = _parse_retry_after_seconds(response)
|
| 179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
logger.warning(
|
| 181 |
"OpenRouter retryable error status=%s (attempt %s/%s). Retrying in %.2fs",
|
| 182 |
response.status_code,
|
|
|
|
| 31 |
"""Raised when OpenRouter rate limiting persists after retries."""
|
| 32 |
|
| 33 |
|
| 34 |
+
def _log_rate_limit_headers(
|
| 35 |
+
response: httpx.Response,
|
| 36 |
+
model: str,
|
| 37 |
+
*,
|
| 38 |
+
level: int = logging.DEBUG,
|
| 39 |
+
) -> None:
|
| 40 |
+
"""Surface OpenRouter/provider rate-limit headers so we can monitor quota."""
|
| 41 |
+
remaining = (
|
| 42 |
+
response.headers.get("X-Ratelimit-Remaining")
|
| 43 |
+
or response.headers.get("x-ratelimit-remaining")
|
| 44 |
+
or response.headers.get("X-RateLimit-Remaining")
|
| 45 |
+
)
|
| 46 |
+
limit = (
|
| 47 |
+
response.headers.get("X-Ratelimit-Limit")
|
| 48 |
+
or response.headers.get("x-ratelimit-limit")
|
| 49 |
+
or response.headers.get("X-RateLimit-Limit")
|
| 50 |
+
)
|
| 51 |
+
reset = (
|
| 52 |
+
response.headers.get("X-Ratelimit-Reset")
|
| 53 |
+
or response.headers.get("x-ratelimit-reset")
|
| 54 |
+
or response.headers.get("X-RateLimit-Reset")
|
| 55 |
+
)
|
| 56 |
+
if remaining is None and limit is None and reset is None:
|
| 57 |
+
return
|
| 58 |
+
logger.log(
|
| 59 |
+
level,
|
| 60 |
+
"OpenRouter quota [model=%s] remaining=%s limit=%s reset=%s",
|
| 61 |
+
model, remaining, limit, reset,
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
def _parse_retry_after_seconds(response: httpx.Response) -> Optional[float]:
|
| 66 |
"""Parse Retry-After header in seconds if provided."""
|
| 67 |
value = response.headers.get("Retry-After")
|
|
|
|
| 198 |
continue
|
| 199 |
|
| 200 |
if response.status_code == 200:
|
| 201 |
+
_log_rate_limit_headers(response, model)
|
| 202 |
try:
|
| 203 |
return response.json()
|
| 204 |
except ValueError as exc:
|
|
|
|
| 208 |
if retryable and attempt < max_retries:
|
| 209 |
retry_num = attempt + 1
|
| 210 |
retry_after = _parse_retry_after_seconds(response)
|
| 211 |
+
if response.status_code == 429:
|
| 212 |
+
# Free-tier daily limits rarely recover in seconds; enforce a
|
| 213 |
+
# floor so we don't burn remaining retries with tight retries.
|
| 214 |
+
base = retry_after if retry_after is not None else 30.0
|
| 215 |
+
delay = max(base, 30.0) + random.uniform(0.0, 5.0)
|
| 216 |
+
delay = min(delay, 300.0)
|
| 217 |
+
_log_rate_limit_headers(response, model, level=logging.WARNING)
|
| 218 |
+
else:
|
| 219 |
+
delay = (
|
| 220 |
+
retry_after
|
| 221 |
+
if retry_after is not None
|
| 222 |
+
else float(2 ** retry_num) + random.uniform(0.0, 0.5)
|
| 223 |
+
)
|
| 224 |
logger.warning(
|
| 225 |
"OpenRouter retryable error status=%s (attempt %s/%s). Retrying in %.2fs",
|
| 226 |
response.status_code,
|
app/schemas.py
CHANGED
|
@@ -257,3 +257,84 @@ class BacktestReportResponse(BaseModel):
|
|
| 257 |
theta_comparison: Optional[Dict[str, Any]] = Field(None, description="Comparison with Theta baseline")
|
| 258 |
verdict: Optional[str] = Field(None, description="TFT_SUPERIOR, THETA_SUPERIOR, or MIXED")
|
| 259 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
theta_comparison: Optional[Dict[str, Any]] = Field(None, description="Comparison with Theta baseline")
|
| 258 |
verdict: Optional[str] = Field(None, description="TFT_SUPERIOR, THETA_SUPERIOR, or MIXED")
|
| 259 |
|
| 260 |
+
|
| 261 |
+
# =============================================================================
|
| 262 |
+
# News Intelligence schemas
|
| 263 |
+
# =============================================================================
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
class NewsFinbertProbs(BaseModel):
|
| 267 |
+
"""FinBERT class probability triplet for a news article."""
|
| 268 |
+
pos: float = Field(..., ge=0, le=1)
|
| 269 |
+
neu: float = Field(..., ge=0, le=1)
|
| 270 |
+
neg: float = Field(..., ge=0, le=1)
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
class NewsSentimentBlock(BaseModel):
|
| 274 |
+
"""Per-article sentiment payload shipped to the frontend feed."""
|
| 275 |
+
label: Optional[str] = Field(None, description="BULLISH | BEARISH | NEUTRAL")
|
| 276 |
+
final_score: Optional[float] = Field(None, description="Ensemble score in [-1, 1]")
|
| 277 |
+
impact_score_llm: Optional[float] = Field(None, description="LLM-only impact in [-1, 1]")
|
| 278 |
+
confidence: Optional[float] = Field(None, description="Calibrated confidence in [0, 1]")
|
| 279 |
+
relevance: Optional[float] = Field(None, description="Relevance to copper market in [0, 1]")
|
| 280 |
+
event_type: Optional[str] = Field(None, description="LLM event type bucket")
|
| 281 |
+
finbert: Optional[NewsFinbertProbs] = Field(None, description="FinBERT probability triplet")
|
| 282 |
+
reasoning: Optional[str] = Field(None, description="Short textual rationale from the LLM")
|
| 283 |
+
scored_at: Optional[str] = Field(None, description="ISO timestamp when the score was written")
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
class NewsItem(BaseModel):
|
| 287 |
+
"""Single article row in the news feed."""
|
| 288 |
+
id: int = Field(..., description="news_processed id (stable frontend key)")
|
| 289 |
+
raw_id: Optional[int] = Field(None, description="news_raw id for debugging")
|
| 290 |
+
title: str
|
| 291 |
+
description: Optional[str] = None
|
| 292 |
+
url: Optional[str] = None
|
| 293 |
+
channel: str = Field(
|
| 294 |
+
..., description="Ingestion channel (google_news, newsapi, ...)"
|
| 295 |
+
)
|
| 296 |
+
publisher: Optional[str] = Field(
|
| 297 |
+
None, description="Original publisher extracted from raw_payload.source"
|
| 298 |
+
)
|
| 299 |
+
source_feed: Optional[str] = Field(None, description="RSS query / feed identifier")
|
| 300 |
+
published_at: Optional[str] = Field(None, description="ISO timestamp")
|
| 301 |
+
fetched_at: Optional[str] = Field(None, description="ISO timestamp")
|
| 302 |
+
language: Optional[str] = None
|
| 303 |
+
sentiment: Optional[NewsSentimentBlock] = None
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
class NewsListResponse(BaseModel):
|
| 307 |
+
"""Paginated news feed response."""
|
| 308 |
+
items: List[NewsItem] = Field(default_factory=list)
|
| 309 |
+
total: int = Field(..., description="Total rows matching filters (for pagination)")
|
| 310 |
+
limit: int = Field(...)
|
| 311 |
+
offset: int = Field(...)
|
| 312 |
+
has_more: bool = Field(...)
|
| 313 |
+
generated_at: str = Field(..., description="ISO timestamp the response was built")
|
| 314 |
+
filters: Dict[str, Any] = Field(
|
| 315 |
+
default_factory=dict,
|
| 316 |
+
description="Echo of the filter args applied server-side",
|
| 317 |
+
)
|
| 318 |
+
|
| 319 |
+
|
| 320 |
+
class NewsStatsResponse(BaseModel):
|
| 321 |
+
"""Aggregate stats for the news intelligence sidebar header."""
|
| 322 |
+
window_hours: int = Field(..., description="Rolling window used for aggregation")
|
| 323 |
+
total_articles: int = Field(..., ge=0)
|
| 324 |
+
scored_articles: int = Field(..., ge=0)
|
| 325 |
+
label_distribution: Dict[str, int] = Field(
|
| 326 |
+
default_factory=dict, description="BULLISH/BEARISH/NEUTRAL counts"
|
| 327 |
+
)
|
| 328 |
+
event_type_distribution: Dict[str, int] = Field(default_factory=dict)
|
| 329 |
+
channel_distribution: Dict[str, int] = Field(
|
| 330 |
+
default_factory=dict, description="google_news / newsapi counts"
|
| 331 |
+
)
|
| 332 |
+
top_publishers: List[Dict[str, Any]] = Field(
|
| 333 |
+
default_factory=list,
|
| 334 |
+
description="[{publisher, count, avg_final_score}]",
|
| 335 |
+
)
|
| 336 |
+
avg_final_score: Optional[float] = None
|
| 337 |
+
avg_confidence: Optional[float] = None
|
| 338 |
+
avg_relevance: Optional[float] = None
|
| 339 |
+
generated_at: str = Field(...)
|
| 340 |
+
|
app/settings.py
CHANGED
|
@@ -81,22 +81,26 @@ class Settings(BaseSettings):
|
|
| 81 |
# OpenRouter AI Commentary
|
| 82 |
openrouter_api_key: Optional[str] = None
|
| 83 |
# Deprecated - kept for backward compatibility
|
| 84 |
-
openrouter_model: str = "
|
| 85 |
# Scoring models:
|
| 86 |
# fast β stepfun/step-3.5-flash:free (196B MoE, 256K ctx, system prompt + JSON OK)
|
| 87 |
# reliable β mistralai/mistral-small-3.1-24b-instruct:free (128K ctx, 24B, reliable JSON)
|
| 88 |
# commentary β same as fast for balanced quality/speed
|
| 89 |
# NOTE: google/gemma-3-4b-it:free fails on Google AI Studio (system prompt blocked).
|
| 90 |
# google/gemma-3n-e4b-it:free (nano) also blocks system prompts β do NOT use.
|
| 91 |
-
openrouter_model_scoring: str = "
|
| 92 |
openrouter_model_scoring_fast: Optional[str] = None
|
| 93 |
-
openrouter_model_scoring_reliable: Optional[str] = "
|
| 94 |
-
openrouter_model_commentary: str = "
|
| 95 |
openrouter_rpm: int = 18
|
| 96 |
openrouter_max_retries: int = 3
|
| 97 |
-
# Free tier: 50 req/day. At 12
|
| 98 |
-
#
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
openrouter_fallback_models: Optional[str] = None
|
| 101 |
tokenizers_parallelism: str = "false"
|
| 102 |
|
|
@@ -114,7 +118,7 @@ class Settings(BaseSettings):
|
|
| 114 |
|
| 115 |
# LLM Sentiment Analysis
|
| 116 |
# Deprecated - kept for backward compatibility
|
| 117 |
-
llm_sentiment_model: str = "
|
| 118 |
|
| 119 |
# Pipeline trigger authentication
|
| 120 |
pipeline_trigger_secret: Optional[str] = None
|
|
|
|
| 81 |
# OpenRouter AI Commentary
|
| 82 |
openrouter_api_key: Optional[str] = None
|
| 83 |
# Deprecated - kept for backward compatibility
|
| 84 |
+
openrouter_model: str = "minimax/minimax-m2.5:free"
|
| 85 |
# Scoring models:
|
| 86 |
# fast β stepfun/step-3.5-flash:free (196B MoE, 256K ctx, system prompt + JSON OK)
|
| 87 |
# reliable β mistralai/mistral-small-3.1-24b-instruct:free (128K ctx, 24B, reliable JSON)
|
| 88 |
# commentary β same as fast for balanced quality/speed
|
| 89 |
# NOTE: google/gemma-3-4b-it:free fails on Google AI Studio (system prompt blocked).
|
| 90 |
# google/gemma-3n-e4b-it:free (nano) also blocks system prompts β do NOT use.
|
| 91 |
+
openrouter_model_scoring: str = "minimax/minimax-m2.5:free"
|
| 92 |
openrouter_model_scoring_fast: Optional[str] = None
|
| 93 |
+
openrouter_model_scoring_reliable: Optional[str] = "minimax/minimax-m2.5:free"
|
| 94 |
+
openrouter_model_commentary: str = "minimax/minimax-m2.5:free"
|
| 95 |
openrouter_rpm: int = 18
|
| 96 |
openrouter_max_retries: int = 3
|
| 97 |
+
# Free tier: ~50 req/day per model. At chunk_size=12 a run of 60 articles
|
| 98 |
+
# costs ~5 chunks (=5β10 requests incl. escalation) which leaves headroom
|
| 99 |
+
# for multiple runs per day before hitting the ceiling. Raise cautiously.
|
| 100 |
+
max_llm_articles_per_run: int = 60
|
| 101 |
+
# Comma-separated list of additional OpenRouter model slugs used by the
|
| 102 |
+
# client as transport-level fallbacks when the primary model 429s/5xx's.
|
| 103 |
+
# Example: "google/gemini-flash-1.5:free,meta-llama/llama-3.1-8b-instruct:free"
|
| 104 |
openrouter_fallback_models: Optional[str] = None
|
| 105 |
tokenizers_parallelism: str = "false"
|
| 106 |
|
|
|
|
| 118 |
|
| 119 |
# LLM Sentiment Analysis
|
| 120 |
# Deprecated - kept for backward compatibility
|
| 121 |
+
llm_sentiment_model: str = "minimax/minimax-m2.5:free"
|
| 122 |
|
| 123 |
# Pipeline trigger authentication
|
| 124 |
pipeline_trigger_secret: Optional[str] = None
|