Spaces:

ifieryarrows
/

copper-mind

Running

App Files Files Community

ifieryarrows commited on about 1 month ago

Commit

99e7b4b

verified ·

1 Parent(s): 84f8194

Sync from GitHub (tests passed)

Browse files

Files changed (5) hide show

app/ai_engine.py +78 -19
app/main.py +347 -0
app/openrouter_client.py +45 -1
app/schemas.py +81 -0
app/settings.py +12 -8

app/ai_engine.py CHANGED Viewed

@@ -138,9 +138,13 @@ Label mapping:
 - impact_score <= -0.15 => BEARISH
 - otherwise => NEUTRAL
 """
-LLM_SCORING_RESPONSE_FORMAT_V2 = {
-    "type": "json_object",
-}
 SCORING_V2_VERSION = "commodity_v2"
@@ -1068,9 +1072,10 @@ async def _score_subset_with_model_v2(
             "fallback_models": settings.openrouter_fallback_models_list,
             "referer": "https://copper-mind.vercel.app",
             "title": "CopperMind Sentiment Analysis V2",
-            "response_format": LLM_SCORING_RESPONSE_FORMAT_V2,
             "extra_payload": {"reasoning": {"exclude": True}},
         }
         return await create_chat_completion(**request_kwargs)
     parse_fail_count = 0
@@ -1187,13 +1192,14 @@ async def score_batch_with_llm_v2(
     escalation_ids = sorted(set(fast_failed).union(conflict_ids))
     escalation_count = len(escalation_ids)
     if escalation_ids and not fast_rate_limited:
         reliable_subset = [
             article_by_id[article_id]
             for article_id in escalation_ids
             if article_id in article_by_id
         ]
-        reliable_valid, _reliable_failed, parse_fail_reliable, _rl = await _score_subset_with_model_v2(
             settings=settings,
             model_name=reliable_model,
             articles=reliable_subset,
@@ -1219,6 +1225,10 @@ async def score_batch_with_llm_v2(
         "fallback_count": fallback_count,
         "model_fast": fast_model,
         "model_reliable": reliable_model,
     }
@@ -1347,6 +1357,7 @@ def score_unscored_processed_articles(
             NewsProcessed.id.label("processed_id"),
             NewsProcessed.canonical_title,
             NewsProcessed.cleaned_text,
             NewsRaw.title.label("raw_title"),
             NewsRaw.description.label("raw_description"),
             NewsRaw.published_at,
@@ -1387,22 +1398,37 @@ def score_unscored_processed_articles(
     fast_model = settings.resolved_scoring_fast_model
     reliable_model = settings.resolved_scoring_reliable_model
     for chunk_idx in range(0, len(rows), chunk_size):
         chunk_rows = rows[chunk_idx:chunk_idx + chunk_size]
         chunk_items: list[dict] = []
         for row in chunk_rows:
             title = str(row.raw_title or row.canonical_title or "")[:500]
             description = str(row.raw_description or "")[:1000]
             text = str(row.cleaned_text or f"{title} {description}")[:2000]
             chunk_items.append(
                 {
-                    "id": int(row.processed_id),
                     "title": title,
                     "description": description,
                     "text": text,
                     "published_at": row.published_at,
                 }
             )
         finbert_by_id = score_batch_with_finbert_v2(chunk_items)
         finbert_used += len(finbert_by_id)
@@ -1410,14 +1436,29 @@ def score_unscored_processed_articles(
         llm_results_by_id: dict[int, dict] = {}
         llm_candidates: list[dict] = []
-        # Rate-limit flag is keyed to today's UTC date so it resets automatically at midnight.
         today_utc = datetime.now(timezone.utc).date().isoformat()
-        rate_limited_date = getattr(score_unscored_processed_articles, "_rate_limited_date", None)
-        global_rate_limited = rate_limited_date == today_utc
-        if settings.openrouter_api_key and llm_budget_remaining > 0 and not global_rate_limited:
-            llm_take = min(len(chunk_items), llm_budget_remaining)
-            llm_candidates = chunk_items[:llm_take]
             llm_budget_remaining -= llm_take
         if llm_candidates:
@@ -1434,13 +1475,31 @@ def score_unscored_processed_articles(
                 fast_model = str(llm_bundle.get("model_fast", fast_model))
                 reliable_model = str(llm_bundle.get("model_reliable", reliable_model))
-                # If LLM returned 100% fail and flagged rate limit, mark for today's UTC date.
-                # Flag resets automatically the next UTC day when the daily limit refreshes.
-                if llm_bundle.get("rate_limited", False):
-                    score_unscored_processed_articles._rate_limited_date = datetime.now(timezone.utc).date().isoformat()
                     logger.warning(
-                        "V2 batch hit OpenRouter daily rate limit - LLM scoring disabled for the rest of UTC day %s.",
-                        score_unscored_processed_articles._rate_limited_date,
                     )
             except Exception as exc:

 - impact_score <= -0.15 => BEARISH
 - otherwise => NEUTRAL
 """
+# NOTE: Intentionally omit OpenAI-style `response_format` for V2 scoring.
+# Some free-tier OpenRouter providers (stepfun, mistral free variants) either
+# reject `{"type": "json_object"}` or wrap the expected JSON array inside an
+# object/unexpected structure when that hint is set. The prompt explicitly asks
+# for a JSON array, and `_clean_json_content` already handles markdown fences,
+# wrapped objects, and preambles, so we rely on prompt+post-processing instead.
+LLM_SCORING_RESPONSE_FORMAT_V2: dict[str, Any] | None = None
 SCORING_V2_VERSION = "commodity_v2"
             "fallback_models": settings.openrouter_fallback_models_list,
             "referer": "https://copper-mind.vercel.app",
             "title": "CopperMind Sentiment Analysis V2",
             "extra_payload": {"reasoning": {"exclude": True}},
         }
+        if LLM_SCORING_RESPONSE_FORMAT_V2 is not None:
+            request_kwargs["response_format"] = LLM_SCORING_RESPONSE_FORMAT_V2
         return await create_chat_completion(**request_kwargs)
     parse_fail_count = 0
     escalation_ids = sorted(set(fast_failed).union(conflict_ids))
     escalation_count = len(escalation_ids)
+    reliable_rate_limited = False
     if escalation_ids and not fast_rate_limited:
         reliable_subset = [
             article_by_id[article_id]
             for article_id in escalation_ids
             if article_id in article_by_id
         ]
+        reliable_valid, _reliable_failed, parse_fail_reliable, reliable_rate_limited = await _score_subset_with_model_v2(
             settings=settings,
             model_name=reliable_model,
             articles=reliable_subset,
         "fallback_count": fallback_count,
         "model_fast": fast_model,
         "model_reliable": reliable_model,
+        "rate_limited_fast": bool(fast_rate_limited),
+        "rate_limited_reliable": bool(reliable_rate_limited),
+        # Backward-compat: true only when BOTH models hit their daily ceiling.
+        "rate_limited": bool(fast_rate_limited and reliable_rate_limited),
     }
             NewsProcessed.id.label("processed_id"),
             NewsProcessed.canonical_title,
             NewsProcessed.cleaned_text,
+            NewsProcessed.language.label("language"),
             NewsRaw.title.label("raw_title"),
             NewsRaw.description.label("raw_description"),
             NewsRaw.published_at,
     fast_model = settings.resolved_scoring_fast_model
     reliable_model = settings.resolved_scoring_reliable_model
+    # Articles that are non-English or too short do not benefit from LLM
+    # classification (prompt is English, quotas are scarce) — we route them
+    # straight to FinBERT+rule fallback. These thresholds are intentionally
+    # conservative so we only skip when we're confident the LLM call would
+    # be wasted.
+    MIN_TEXT_CHARS_FOR_LLM = 80
     for chunk_idx in range(0, len(rows), chunk_size):
         chunk_rows = rows[chunk_idx:chunk_idx + chunk_size]
         chunk_items: list[dict] = []
+        llm_eligible_ids: set[int] = set()
         for row in chunk_rows:
             title = str(row.raw_title or row.canonical_title or "")[:500]
             description = str(row.raw_description or "")[:1000]
             text = str(row.cleaned_text or f"{title} {description}")[:2000]
+            language = (getattr(row, "language", None) or "").strip().lower()
+            processed_id = int(row.processed_id)
             chunk_items.append(
                 {
+                    "id": processed_id,
                     "title": title,
                     "description": description,
                     "text": text,
                     "published_at": row.published_at,
+                    "language": language or None,
                 }
             )
+            is_english = (not language) or language.startswith("en")
+            long_enough = len(text) >= MIN_TEXT_CHARS_FOR_LLM
+            if is_english and long_enough:
+                llm_eligible_ids.add(processed_id)
         finbert_by_id = score_batch_with_finbert_v2(chunk_items)
         finbert_used += len(finbert_by_id)
         llm_results_by_id: dict[int, dict] = {}
         llm_candidates: list[dict] = []
+        # Per-model rate-limit tracking (keyed by UTC date). A specific model
+        # is considered exhausted for today if its entry equals today's date.
+        # LLM scoring is skipped for the chunk only when BOTH fast and reliable
+        # models have been flagged today — otherwise we still attempt (fallback
+        # chain inside `score_batch_with_llm_v2` handles partial exhaustion).
         today_utc = datetime.now(timezone.utc).date().isoformat()
+        rate_limited_by_model: dict[str, str] = getattr(
+            score_unscored_processed_articles, "_rate_limited_by_model", {}
+        ) or {}
+        fast_exhausted = rate_limited_by_model.get(fast_model) == today_utc
+        reliable_exhausted = rate_limited_by_model.get(reliable_model) == today_utc
+        both_exhausted = fast_exhausted and reliable_exhausted
+        if settings.openrouter_api_key and llm_budget_remaining > 0 and not both_exhausted:
+            eligible_items = [item for item in chunk_items if item["id"] in llm_eligible_ids]
+            skipped = len(chunk_items) - len(eligible_items)
+            if skipped > 0:
+                logger.info(
+                    "V2 skipping %d non-English/short-text articles; routing directly to FinBERT+rule fallback",
+                    skipped,
+                )
+            llm_take = min(len(eligible_items), llm_budget_remaining)
+            llm_candidates = eligible_items[:llm_take]
             llm_budget_remaining -= llm_take
         if llm_candidates:
                 fast_model = str(llm_bundle.get("model_fast", fast_model))
                 reliable_model = str(llm_bundle.get("model_reliable", reliable_model))
+                # Record per-model rate-limit state so individual model exhaustion
+                # doesn't block the other one. Only emits the "disabled for day"
+                # warning when both are flagged.
+                updated = False
+                if llm_bundle.get("rate_limited_fast"):
+                    rate_limited_by_model[fast_model] = today_utc
+                    updated = True
+                    logger.warning(
+                        "V2 fast model %s hit daily rate limit; will be skipped for rest of UTC day %s.",
+                        fast_model, today_utc,
+                    )
+                if llm_bundle.get("rate_limited_reliable"):
+                    rate_limited_by_model[reliable_model] = today_utc
+                    updated = True
+                    logger.warning(
+                        "V2 reliable model %s hit daily rate limit; will be skipped for rest of UTC day %s.",
+                        reliable_model, today_utc,
+                    )
+                if updated:
+                    score_unscored_processed_articles._rate_limited_by_model = rate_limited_by_model
+                if llm_bundle.get("rate_limited"):
                     logger.warning(
+                        "V2 both models (%s, %s) rate-limited; LLM scoring paused until UTC %s.",
+                        fast_model, reliable_model,
+                        (datetime.now(timezone.utc).date() + timedelta(days=1)).isoformat(),
                     )
             except Exception as exc:

app/main.py CHANGED Viewed

@@ -36,6 +36,11 @@ from app.schemas import (
     ConsensusSignal,
     TFTModelSummaryResponse,
     BacktestReportResponse,
 )
 # Configure logging
@@ -1468,3 +1473,345 @@ async def get_sentiment_summary(
             "generated_at": now.isoformat(),
         }

     ConsensusSignal,
     TFTModelSummaryResponse,
     BacktestReportResponse,
+    NewsItem,
+    NewsListResponse,
+    NewsStatsResponse,
+    NewsFinbertProbs,
+    NewsSentimentBlock,
 )
 # Configure logging
             "generated_at": now.isoformat(),
         }
+# =============================================================================
+# News intelligence endpoints
+# =============================================================================
+#
+# Serves the Overview right-sidebar news feed. Reads from the news_raw/
+# news_processed/news_sentiments_v2 pipeline the daily worker already fills —
+# no LLM is invoked on the hot path.
+#
+# Source taxonomy:
+#   * channel   = ingestion channel (NewsRaw.source): "google_news" | "newsapi"
+#   * publisher = original publisher (raw_payload.source): Reuters, Mining.com…
+# =============================================================================
+_news_list_cache: dict[tuple, tuple[float, dict]] = {}
+_news_stats_cache: dict[int, tuple[float, dict]] = {}
+_NEWS_LIST_TTL_S = 60.0
+_NEWS_STATS_TTL_S = 120.0
+_VALID_LABELS = {"BULLISH", "BEARISH", "NEUTRAL"}
+def _extract_publisher(raw_payload) -> Optional[str]:
+    """Pull the original publisher name out of a NewsRaw.raw_payload blob."""
+    if not raw_payload:
+        return None
+    if isinstance(raw_payload, str):
+        try:
+            import json as _json
+            raw_payload = _json.loads(raw_payload)
+        except (ValueError, TypeError):
+            return None
+    if not isinstance(raw_payload, dict):
+        return None
+    src = raw_payload.get("source")
+    if isinstance(src, dict):
+        name = src.get("name") or src.get("title")
+        return str(name) if name else None
+    if isinstance(src, str) and src.strip():
+        return src.strip()
+    name = raw_payload.get("publisher") or raw_payload.get("author")
+    return str(name) if name else None
+def _build_news_sentiment_block(sent: Optional[NewsSentimentV2]) -> Optional[NewsSentimentBlock]:
+    if sent is None:
+        return None
+    return NewsSentimentBlock(
+        label=sent.label,
+        final_score=float(sent.final_score) if sent.final_score is not None else None,
+        impact_score_llm=float(sent.impact_score_llm) if sent.impact_score_llm is not None else None,
+        confidence=float(sent.confidence_calibrated) if sent.confidence_calibrated is not None else None,
+        relevance=float(sent.relevance_score) if sent.relevance_score is not None else None,
+        event_type=sent.event_type,
+        finbert=NewsFinbertProbs(
+            pos=float(sent.finbert_pos or 0.0),
+            neu=float(sent.finbert_neu or 0.0),
+            neg=float(sent.finbert_neg or 0.0),
+        ),
+        reasoning=_extract_reasoning_text(sent.reasoning_json),
+        scored_at=sent.scored_at.isoformat() if sent.scored_at else None,
+    )
+def _extract_reasoning_text(reasoning_json: Optional[str]) -> Optional[str]:
+    """Pull a short human-readable rationale out of the cached JSON blob."""
+    if not reasoning_json:
+        return None
+    try:
+        import json as _json
+        blob = _json.loads(reasoning_json)
+    except (ValueError, TypeError):
+        return str(reasoning_json)[:500] if reasoning_json else None
+    if isinstance(blob, dict):
+        for key in ("reasoning", "rationale", "summary", "explanation"):
+            val = blob.get(key)
+            if isinstance(val, str) and val.strip():
+                return val.strip()[:500]
+        return None
+    if isinstance(blob, str):
+        return blob[:500]
+    return None
+@app.get(
+    "/api/news",
+    response_model=NewsListResponse,
+    summary="Paginated news feed with sentiment annotations",
+)
+async def get_news_feed(
+    limit: int = Query(default=20, ge=1, le=50),
+    offset: int = Query(default=0, ge=0),
+    since_hours: int = Query(default=48, ge=1, le=168),
+    label: str = Query(default="all"),
+    event_type: str = Query(default="all"),
+    min_relevance: float = Query(default=0.0, ge=0.0, le=1.0),
+    channel: str = Query(default="all"),
+    publisher: Optional[str] = Query(default=None, max_length=200),
+    search: Optional[str] = Query(default=None, max_length=200),
+):
+    from sqlalchemy import desc as _desc
+    filters_echo = {
+        "limit": limit,
+        "offset": offset,
+        "since_hours": since_hours,
+        "label": label,
+        "event_type": event_type,
+        "min_relevance": min_relevance,
+        "channel": channel,
+        "publisher": publisher,
+        "search": search,
+    }
+    cache_key = tuple(sorted(filters_echo.items()))
+    now_ts = datetime.now(timezone.utc).timestamp()
+    cached = _news_list_cache.get(cache_key)
+    if cached and (now_ts - cached[0]) < _NEWS_LIST_TTL_S:
+        return cached[1]
+    label_upper = label.upper()
+    if label_upper != "ALL" and label_upper not in _VALID_LABELS:
+        raise HTTPException(status_code=400, detail=f"Invalid label '{label}'")
+    with SessionLocal() as session:
+        now = datetime.now(timezone.utc)
+        cutoff = now - timedelta(hours=since_hours)
+        q = (
+            session.query(NewsRaw, NewsProcessed, NewsSentimentV2)
+            .join(NewsProcessed, NewsProcessed.raw_id == NewsRaw.id)
+            .outerjoin(
+                NewsSentimentV2,
+                NewsSentimentV2.news_processed_id == NewsProcessed.id,
+            )
+            .filter(NewsRaw.published_at >= cutoff)
+        )
+        if channel.lower() != "all":
+            q = q.filter(NewsRaw.source == channel)
+        if event_type.lower() != "all":
+            q = q.filter(NewsSentimentV2.event_type == event_type)
+        if label_upper != "ALL":
+            q = q.filter(NewsSentimentV2.label == label_upper)
+        if min_relevance > 0:
+            q = q.filter(NewsSentimentV2.relevance_score >= min_relevance)
+        if search:
+            q = q.filter(NewsRaw.title.ilike(f"%{search}%"))
+        q = q.order_by(_desc(NewsRaw.published_at))
+        publisher_needle = publisher.strip().lower() if publisher and publisher.strip() else None
+        if publisher_needle:
+            # Publisher filter requires JSON extraction; do it in Python to
+            # remain backend-agnostic (sqlite/postgres) and keep the endpoint
+            # simple. Scope is bounded by the time window filter above.
+            rows = q.limit(500).all()
+            filtered = [
+                triple for triple in rows
+                if (
+                    _extract_publisher(triple[0].raw_payload) or ""
+                ).lower().find(publisher_needle) >= 0
+            ]
+            total = len(filtered)
+            page_rows = filtered[offset: offset + limit]
+        else:
+            total = q.count()
+            page_rows = q.offset(offset).limit(limit).all()
+        items: list[NewsItem] = []
+        for raw, processed, sentiment in page_rows:
+            items.append(
+                NewsItem(
+                    id=int(processed.id),
+                    raw_id=int(raw.id),
+                    title=str(raw.title or ""),
+                    description=str(raw.description or "") or None,
+                    url=str(raw.url or "") or None,
+                    channel=str(raw.source or "unknown"),
+                    publisher=_extract_publisher(raw.raw_payload),
+                    source_feed=str(raw.source_feed or "") or None,
+                    published_at=raw.published_at.isoformat() if raw.published_at else None,
+                    fetched_at=raw.fetched_at.isoformat() if raw.fetched_at else None,
+                    language=str(processed.language or "") or None,
+                    sentiment=_build_news_sentiment_block(sentiment),
+                )
+            )
+        response = NewsListResponse(
+            items=items,
+            total=int(total),
+            limit=limit,
+            offset=offset,
+            has_more=(offset + limit) < int(total),
+            generated_at=now.isoformat(),
+            filters=filters_echo,
+        )
+    payload = response.model_dump()
+    _news_list_cache[cache_key] = (now_ts, payload)
+    # Trim cache to avoid unbounded growth.
+    if len(_news_list_cache) > 128:
+        oldest = sorted(_news_list_cache.items(), key=lambda kv: kv[1][0])[: len(_news_list_cache) - 128]
+        for k, _ in oldest:
+            _news_list_cache.pop(k, None)
+    return payload
+@app.get(
+    "/api/news/stats",
+    response_model=NewsStatsResponse,
+    summary="Aggregate stats for the news sidebar header",
+)
+async def get_news_stats(
+    since_hours: int = Query(default=24, ge=1, le=168),
+):
+    now_ts = datetime.now(timezone.utc).timestamp()
+    cached = _news_stats_cache.get(since_hours)
+    if cached and (now_ts - cached[0]) < _NEWS_STATS_TTL_S:
+        return cached[1]
+    with SessionLocal() as session:
+        now = datetime.now(timezone.utc)
+        cutoff = now - timedelta(hours=since_hours)
+        rows = (
+            session.query(NewsRaw, NewsProcessed, NewsSentimentV2)
+            .join(NewsProcessed, NewsProcessed.raw_id == NewsRaw.id)
+            .outerjoin(
+                NewsSentimentV2,
+                NewsSentimentV2.news_processed_id == NewsProcessed.id,
+            )
+            .filter(NewsRaw.published_at >= cutoff)
+            .all()
+        )
+        label_dist: dict[str, int] = {"BULLISH": 0, "BEARISH": 0, "NEUTRAL": 0}
+        event_dist: dict[str, int] = {}
+        channel_dist: dict[str, int] = {}
+        publisher_acc: dict[str, dict[str, float]] = {}
+        score_sum = 0.0
+        conf_sum = 0.0
+        rel_sum = 0.0
+        scored_count = 0
+        total = len(rows)
+        for raw, _processed, sent in rows:
+            ch = str(raw.source or "unknown")
+            channel_dist[ch] = channel_dist.get(ch, 0) + 1
+            pub = _extract_publisher(raw.raw_payload)
+            if pub:
+                acc = publisher_acc.setdefault(pub, {"count": 0, "score_sum": 0.0})
+                acc["count"] += 1
+                if sent is not None and sent.final_score is not None:
+                    acc["score_sum"] += float(sent.final_score)
+            if sent is None:
+                continue
+            scored_count += 1
+            if sent.label in label_dist:
+                label_dist[sent.label] += 1
+            else:
+                label_dist[sent.label] = label_dist.get(sent.label, 0) + 1
+            etype = sent.event_type or "unknown"
+            event_dist[etype] = event_dist.get(etype, 0) + 1
+            if sent.final_score is not None:
+                score_sum += float(sent.final_score)
+            if sent.confidence_calibrated is not None:
+                conf_sum += float(sent.confidence_calibrated)
+            if sent.relevance_score is not None:
+                rel_sum += float(sent.relevance_score)
+        top_publishers = sorted(
+            (
+                {
+                    "publisher": name,
+                    "count": int(data["count"]),
+                    "avg_final_score": (
+                        round(float(data["score_sum"]) / float(data["count"]), 4)
+                        if data["count"] > 0
+                        else 0.0
+                    ),
+                }
+                for name, data in publisher_acc.items()
+            ),
+            key=lambda item: item["count"],
+            reverse=True,
+        )[:5]
+        response = NewsStatsResponse(
+            window_hours=since_hours,
+            total_articles=total,
+            scored_articles=scored_count,
+            label_distribution=label_dist,
+            event_type_distribution=event_dist,
+            channel_distribution=channel_dist,
+            top_publishers=top_publishers,
+            avg_final_score=(score_sum / scored_count) if scored_count else None,
+            avg_confidence=(conf_sum / scored_count) if scored_count else None,
+            avg_relevance=(rel_sum / scored_count) if scored_count else None,
+            generated_at=now.isoformat(),
+        )
+    payload = response.model_dump()
+    _news_stats_cache[since_hours] = (now_ts, payload)
+    return payload
+@app.get(
+    "/api/news/{processed_id}",
+    response_model=NewsItem,
+    summary="Full detail for a single news article",
+)
+async def get_news_item(processed_id: int):
+    with SessionLocal() as session:
+        row = (
+            session.query(NewsRaw, NewsProcessed, NewsSentimentV2)
+            .join(NewsProcessed, NewsProcessed.raw_id == NewsRaw.id)
+            .outerjoin(
+                NewsSentimentV2,
+                NewsSentimentV2.news_processed_id == NewsProcessed.id,
+            )
+            .filter(NewsProcessed.id == processed_id)
+            .first()
+        )
+        if row is None:
+            raise HTTPException(status_code=404, detail="Article not found")
+        raw, processed, sentiment = row
+        return NewsItem(
+            id=int(processed.id),
+            raw_id=int(raw.id),
+            title=str(raw.title or ""),
+            description=str(raw.description or "") or None,
+            url=str(raw.url or "") or None,
+            channel=str(raw.source or "unknown"),
+            publisher=_extract_publisher(raw.raw_payload),
+            source_feed=str(raw.source_feed or "") or None,
+            published_at=raw.published_at.isoformat() if raw.published_at else None,
+            fetched_at=raw.fetched_at.isoformat() if raw.fetched_at else None,
+            language=str(processed.language or "") or None,
+            sentiment=_build_news_sentiment_block(sentiment),
+        )

app/openrouter_client.py CHANGED Viewed

@@ -31,6 +31,37 @@ class OpenRouterRateLimitError(OpenRouterError):
     """Raised when OpenRouter rate limiting persists after retries."""
 def _parse_retry_after_seconds(response: httpx.Response) -> Optional[float]:
     """Parse Retry-After header in seconds if provided."""
     value = response.headers.get("Retry-After")
@@ -167,6 +198,7 @@ async def create_chat_completion(
                 continue
             if response.status_code == 200:
                 try:
                     return response.json()
                 except ValueError as exc:
@@ -176,7 +208,19 @@ async def create_chat_completion(
             if retryable and attempt < max_retries:
                 retry_num = attempt + 1
                 retry_after = _parse_retry_after_seconds(response)
-                delay = retry_after if retry_after is not None else float(2 ** retry_num) + random.uniform(0.0, 0.5)
                 logger.warning(
                     "OpenRouter retryable error status=%s (attempt %s/%s). Retrying in %.2fs",
                     response.status_code,

     """Raised when OpenRouter rate limiting persists after retries."""
+def _log_rate_limit_headers(
+    response: httpx.Response,
+    model: str,
+    *,
+    level: int = logging.DEBUG,
+) -> None:
+    """Surface OpenRouter/provider rate-limit headers so we can monitor quota."""
+    remaining = (
+        response.headers.get("X-Ratelimit-Remaining")
+        or response.headers.get("x-ratelimit-remaining")
+        or response.headers.get("X-RateLimit-Remaining")
+    )
+    limit = (
+        response.headers.get("X-Ratelimit-Limit")
+        or response.headers.get("x-ratelimit-limit")
+        or response.headers.get("X-RateLimit-Limit")
+    )
+    reset = (
+        response.headers.get("X-Ratelimit-Reset")
+        or response.headers.get("x-ratelimit-reset")
+        or response.headers.get("X-RateLimit-Reset")
+    )
+    if remaining is None and limit is None and reset is None:
+        return
+    logger.log(
+        level,
+        "OpenRouter quota [model=%s] remaining=%s limit=%s reset=%s",
+        model, remaining, limit, reset,
+    )
 def _parse_retry_after_seconds(response: httpx.Response) -> Optional[float]:
     """Parse Retry-After header in seconds if provided."""
     value = response.headers.get("Retry-After")
                 continue
             if response.status_code == 200:
+                _log_rate_limit_headers(response, model)
                 try:
                     return response.json()
                 except ValueError as exc:
             if retryable and attempt < max_retries:
                 retry_num = attempt + 1
                 retry_after = _parse_retry_after_seconds(response)
+                if response.status_code == 429:
+                    # Free-tier daily limits rarely recover in seconds; enforce a
+                    # floor so we don't burn remaining retries with tight retries.
+                    base = retry_after if retry_after is not None else 30.0
+                    delay = max(base, 30.0) + random.uniform(0.0, 5.0)
+                    delay = min(delay, 300.0)
+                    _log_rate_limit_headers(response, model, level=logging.WARNING)
+                else:
+                    delay = (
+                        retry_after
+                        if retry_after is not None
+                        else float(2 ** retry_num) + random.uniform(0.0, 0.5)
+                    )
                 logger.warning(
                     "OpenRouter retryable error status=%s (attempt %s/%s). Retrying in %.2fs",
                     response.status_code,

app/schemas.py CHANGED Viewed

@@ -257,3 +257,84 @@ class BacktestReportResponse(BaseModel):
     theta_comparison: Optional[Dict[str, Any]] = Field(None, description="Comparison with Theta baseline")
     verdict: Optional[str] = Field(None, description="TFT_SUPERIOR, THETA_SUPERIOR, or MIXED")

     theta_comparison: Optional[Dict[str, Any]] = Field(None, description="Comparison with Theta baseline")
     verdict: Optional[str] = Field(None, description="TFT_SUPERIOR, THETA_SUPERIOR, or MIXED")
+# =============================================================================
+# News Intelligence schemas
+# =============================================================================
+class NewsFinbertProbs(BaseModel):
+    """FinBERT class probability triplet for a news article."""
+    pos: float = Field(..., ge=0, le=1)
+    neu: float = Field(..., ge=0, le=1)
+    neg: float = Field(..., ge=0, le=1)
+class NewsSentimentBlock(BaseModel):
+    """Per-article sentiment payload shipped to the frontend feed."""
+    label: Optional[str] = Field(None, description="BULLISH | BEARISH | NEUTRAL")
+    final_score: Optional[float] = Field(None, description="Ensemble score in [-1, 1]")
+    impact_score_llm: Optional[float] = Field(None, description="LLM-only impact in [-1, 1]")
+    confidence: Optional[float] = Field(None, description="Calibrated confidence in [0, 1]")
+    relevance: Optional[float] = Field(None, description="Relevance to copper market in [0, 1]")
+    event_type: Optional[str] = Field(None, description="LLM event type bucket")
+    finbert: Optional[NewsFinbertProbs] = Field(None, description="FinBERT probability triplet")
+    reasoning: Optional[str] = Field(None, description="Short textual rationale from the LLM")
+    scored_at: Optional[str] = Field(None, description="ISO timestamp when the score was written")
+class NewsItem(BaseModel):
+    """Single article row in the news feed."""
+    id: int = Field(..., description="news_processed id (stable frontend key)")
+    raw_id: Optional[int] = Field(None, description="news_raw id for debugging")
+    title: str
+    description: Optional[str] = None
+    url: Optional[str] = None
+    channel: str = Field(
+        ..., description="Ingestion channel (google_news, newsapi, ...)"
+    )
+    publisher: Optional[str] = Field(
+        None, description="Original publisher extracted from raw_payload.source"
+    )
+    source_feed: Optional[str] = Field(None, description="RSS query / feed identifier")
+    published_at: Optional[str] = Field(None, description="ISO timestamp")
+    fetched_at: Optional[str] = Field(None, description="ISO timestamp")
+    language: Optional[str] = None
+    sentiment: Optional[NewsSentimentBlock] = None
+class NewsListResponse(BaseModel):
+    """Paginated news feed response."""
+    items: List[NewsItem] = Field(default_factory=list)
+    total: int = Field(..., description="Total rows matching filters (for pagination)")
+    limit: int = Field(...)
+    offset: int = Field(...)
+    has_more: bool = Field(...)
+    generated_at: str = Field(..., description="ISO timestamp the response was built")
+    filters: Dict[str, Any] = Field(
+        default_factory=dict,
+        description="Echo of the filter args applied server-side",
+    )
+class NewsStatsResponse(BaseModel):
+    """Aggregate stats for the news intelligence sidebar header."""
+    window_hours: int = Field(..., description="Rolling window used for aggregation")
+    total_articles: int = Field(..., ge=0)
+    scored_articles: int = Field(..., ge=0)
+    label_distribution: Dict[str, int] = Field(
+        default_factory=dict, description="BULLISH/BEARISH/NEUTRAL counts"
+    )
+    event_type_distribution: Dict[str, int] = Field(default_factory=dict)
+    channel_distribution: Dict[str, int] = Field(
+        default_factory=dict, description="google_news / newsapi counts"
+    )
+    top_publishers: List[Dict[str, Any]] = Field(
+        default_factory=list,
+        description="[{publisher, count, avg_final_score}]",
+    )
+    avg_final_score: Optional[float] = None
+    avg_confidence: Optional[float] = None
+    avg_relevance: Optional[float] = None
+    generated_at: str = Field(...)

app/settings.py CHANGED Viewed

@@ -81,22 +81,26 @@ class Settings(BaseSettings):
     # OpenRouter AI Commentary
     openrouter_api_key: Optional[str] = None
     # Deprecated - kept for backward compatibility
-    openrouter_model: str = "arcee-ai/trinity-large-preview:free"
     # Scoring models:
     #   fast   → stepfun/step-3.5-flash:free  (196B MoE, 256K ctx, system prompt + JSON OK)
     #   reliable → mistralai/mistral-small-3.1-24b-instruct:free (128K ctx, 24B, reliable JSON)
     #   commentary → same as fast for balanced quality/speed
     # NOTE: google/gemma-3-4b-it:free fails on Google AI Studio (system prompt blocked).
     #        google/gemma-3n-e4b-it:free (nano) also blocks system prompts — do NOT use.
-    openrouter_model_scoring: str = "stepfun/step-3.5-flash:free"
     openrouter_model_scoring_fast: Optional[str] = None
-    openrouter_model_scoring_reliable: Optional[str] = "mistralai/mistral-small-3.1-24b-instruct:free"
-    openrouter_model_commentary: str = "stepfun/step-3.5-flash:free"
     openrouter_rpm: int = 18
     openrouter_max_retries: int = 3
-    # Free tier: 50 req/day. At 12 articles/chunk, 100 articles = ~9 chunks = ~9-18 req.
-    # Keep well under the daily limit to avoid rate-limit cascades mid-run.
-    max_llm_articles_per_run: int = 100
     openrouter_fallback_models: Optional[str] = None
     tokenizers_parallelism: str = "false"
@@ -114,7 +118,7 @@ class Settings(BaseSettings):
     # LLM Sentiment Analysis
     # Deprecated - kept for backward compatibility
-    llm_sentiment_model: str = "arcee-ai/trinity-large-preview:free"
     # Pipeline trigger authentication
     pipeline_trigger_secret: Optional[str] = None

     # OpenRouter AI Commentary
     openrouter_api_key: Optional[str] = None
     # Deprecated - kept for backward compatibility
+    openrouter_model: str = "minimax/minimax-m2.5:free"
     # Scoring models:
     #   fast   → stepfun/step-3.5-flash:free  (196B MoE, 256K ctx, system prompt + JSON OK)
     #   reliable → mistralai/mistral-small-3.1-24b-instruct:free (128K ctx, 24B, reliable JSON)
     #   commentary → same as fast for balanced quality/speed
     # NOTE: google/gemma-3-4b-it:free fails on Google AI Studio (system prompt blocked).
     #        google/gemma-3n-e4b-it:free (nano) also blocks system prompts — do NOT use.
+    openrouter_model_scoring: str = "minimax/minimax-m2.5:free"
     openrouter_model_scoring_fast: Optional[str] = None
+    openrouter_model_scoring_reliable: Optional[str] = "minimax/minimax-m2.5:free"
+    openrouter_model_commentary: str = "minimax/minimax-m2.5:free"
     openrouter_rpm: int = 18
     openrouter_max_retries: int = 3
+    # Free tier: ~50 req/day per model. At chunk_size=12 a run of 60 articles
+    # costs ~5 chunks (=5–10 requests incl. escalation) which leaves headroom
+    # for multiple runs per day before hitting the ceiling. Raise cautiously.
+    max_llm_articles_per_run: int = 60
+    # Comma-separated list of additional OpenRouter model slugs used by the
+    # client as transport-level fallbacks when the primary model 429s/5xx's.
+    # Example: "google/gemini-flash-1.5:free,meta-llama/llama-3.1-8b-instruct:free"
     openrouter_fallback_models: Optional[str] = None
     tokenizers_parallelism: str = "false"
     # LLM Sentiment Analysis
     # Deprecated - kept for backward compatibility
+    llm_sentiment_model: str = "minimax/minimax-m2.5:free"
     # Pipeline trigger authentication
     pipeline_trigger_secret: Optional[str] = None