Spaces:

ifieryarrows
/

copper-mind

Running

App Files Files Community

ifieryarrows commited on Feb 21

Commit

1e6ab4d

verified ·

1 Parent(s): 8e48995

Sync from GitHub (tests passed)

Browse files

Files changed (6) hide show

Dockerfile +5 -2
app/ai_engine.py +210 -140
app/commentary.py +114 -76
app/models.py +1 -1
app/openrouter_client.py +201 -0
app/settings.py +51 -0

Dockerfile CHANGED Viewed

@@ -35,7 +35,10 @@ EXPOSE 7860
 # Environment
 ENV PYTHONUNBUFFERED=1 \
     PYTHONPATH=/code \
-    REDIS_URL=redis://127.0.0.1:6379/0
 # Run supervisord (manages redis + api + worker)
-CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]

 # Environment
 ENV PYTHONUNBUFFERED=1 \
     PYTHONPATH=/code \
+    REDIS_URL=redis://127.0.0.1:6379/0 \
+    HF_HUB_DISABLE_PROGRESS_BARS=1 \
+    TRANSFORMERS_VERBOSITY=error \
+    TRANSFORMERS_NO_ADVISORY_WARNINGS=1
 # Run supervisord (manages redis + api + worker)
+CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"]

app/ai_engine.py CHANGED Viewed

@@ -2,7 +2,7 @@
 AI Engine: LLM sentiment scoring (with FinBERT fallback) + XGBoost training.
 Sentiment Analysis:
-    Primary: Gemini LLM with copper-specific context (1M token batch)
     Fallback: FinBERT for generic financial sentiment
 Usage:
@@ -15,16 +15,11 @@ import argparse
 import json
 import logging
 import os
-import time
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
 from typing import Any, Optional
-# Suppress httpx request logging to prevent API keys in URLs from appearing in logs
-logging.getLogger("httpx").setLevel(logging.WARNING)
-import httpx
 import numpy as np
 import pandas as pd
 from sqlalchemy import func
@@ -38,6 +33,7 @@ from app.settings import get_settings
 from app.features import build_feature_matrix, get_feature_descriptions
 from app.lock import pipeline_lock
 from app.async_bridge import run_async_from_sync
 logging.basicConfig(
     level=logging.INFO,
@@ -123,11 +119,16 @@ def _log_finbert_output_once(raw_output: Any) -> None:
     )
     _FINBERT_OUTPUT_LOGGED = True
 def get_finbert_pipeline():
     """
     Load FinBERT model pipeline.
     Lazy loading to avoid import overhead when not needed.
     """
     from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
     model_name = "ProsusAI/finbert"
@@ -214,7 +215,7 @@ def score_text_with_finbert(
 # =============================================================================
-# LLM Sentiment Scoring (Primary - Gemini)
 # =============================================================================
 # Copper-specific system prompt for LLM sentiment analysis
@@ -276,125 +277,174 @@ Rules:
 - Use standard decimals (e.g., -0.4, 0.15, 1.0); no NaN, no scientific notation."""
 async def score_batch_with_llm(
     articles: list[dict],
 ) -> list[dict]:
     """
-    Score a batch of articles using LLM (Gemini via OpenRouter).
-    Args:
-        articles: List of dicts with 'id', 'title', 'description'
-    Returns:
-        List of dicts with 'id', 'score', 'reasoning', 'prob_positive', 'prob_neutral', 'prob_negative'
-    Raises:
-        Exception on API error or JSON parse failure
     """
     settings = get_settings()
     if not settings.openrouter_api_key:
         raise RuntimeError("OpenRouter API key not configured")
-    # Build articles text for prompt
     articles_text = "\n".join([
-        f"{i+1}. [ID:{a['id']}] {a['title']}" + (f" - {a['description'][:200]}" if a.get('description') else "")
         for i, a in enumerate(articles)
     ])
     user_prompt = f"""Score these {len(articles)} news articles for copper market sentiment.
 Articles:
 {articles_text}
-Return ONLY a valid JSON array with this exact structure (no markdown code blocks):
-[
-  {{"id": <article_id>, "score": <float from -1.0 to 1.0>, "reasoning": "<brief explanation>"}},
-  ...
-]
-Rules:
-- score: -1.0 (very bearish) to +1.0 (very bullish), 0 = neutral
-- reasoning: 1 sentence max explaining the copper market impact
-- Include ALL {len(articles)} articles in your response"""
-    async with httpx.AsyncClient(timeout=60.0) as client:
-        response = await client.post(
-            "https://openrouter.ai/api/v1/chat/completions",
-            headers={
-                "Authorization": f"Bearer {settings.openrouter_api_key}",
-                "Content-Type": "application/json",
-                "HTTP-Referer": "https://copper-mind.vercel.app",
-                "X-Title": "CopperMind Sentiment Analysis",
-            },
-            json={
-                "model": settings.llm_sentiment_model,
-                "messages": [
-                    {"role": "system", "content": LLM_SENTIMENT_SYSTEM_PROMPT},
-                    {"role": "user", "content": user_prompt}
-                ],
-                "max_tokens": 2000,
-                "temperature": 0.3,  # Lower temperature for consistent scoring
-            }
-        )
-        if response.status_code != 200:
-            raise RuntimeError(f"OpenRouter API error: {response.status_code} - {response.text}")
-        data = response.json()
-        content = data.get("choices", [{}])[0].get("message", {}).get("content", "")
-        if not content:
-            raise RuntimeError("Empty response from LLM")
-        # Clean up response - remove markdown code blocks if present
-        content = content.strip()
-        if content.startswith("```"):
-            # Remove ```json and ``` markers
-            lines = content.split("\n")
-            content = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:])
-        # Parse JSON
-        try:
-            results = json.loads(content)
-        except json.JSONDecodeError as e:
-            logger.error(f"LLM JSON parse error: {e}\nContent: {content[:500]}")
-            raise
-        # Validate and enrich results
-        enriched = []
-        for item in results:
-            score = float(item.get("score", 0))
-            # Clamp score to [-1, 1]
-            score = max(-1.0, min(1.0, score))
-            # Derive probabilities from score
-            # score = prob_positive - prob_negative
-            # Assume prob_neutral is inverse of confidence
-            confidence = abs(score)
-            if score > 0:
-                prob_positive = 0.33 + (confidence * 0.67)
-                prob_negative = 0.33 - (confidence * 0.33)
-                prob_neutral = 1.0 - prob_positive - prob_negative
-            elif score < 0:
-                prob_negative = 0.33 + (confidence * 0.67)
-                prob_positive = 0.33 - (confidence * 0.33)
-                prob_neutral = 1.0 - prob_positive - prob_negative
-            else:
-                prob_positive = 0.33
-                prob_neutral = 0.34
-                prob_negative = 0.33
-            enriched.append({
-                "id": item.get("id"),
-                "score": score,
-                "reasoning": item.get("reasoning", ""),
-                "prob_positive": round(prob_positive, 4),
-                "prob_neutral": round(prob_neutral, 4),
-                "prob_negative": round(prob_negative, 4),
-            })
-        return enriched
 def score_batch_with_finbert(articles: list) -> list[dict]:
@@ -435,10 +485,10 @@ def score_unscored_articles(
     Score all articles that don't have sentiment scores yet.
     Strategy:
-    - Primary: LLM (Gemini) with copper-specific context
     - Fallback: FinBERT per chunk if LLM fails
     - Chunk size: 20 articles for error isolation
-    - Rate limiting: 2 second delay between chunks
     Returns:
         Number of articles scored
@@ -459,6 +509,9 @@ def score_unscored_articles(
     scored_count = 0
     total_chunks = (len(unscored) + chunk_size - 1) // chunk_size
     # Process in chunks
     for chunk_idx in range(0, len(unscored), chunk_size):
@@ -466,31 +519,52 @@ def score_unscored_articles(
         chunk_num = chunk_idx // chunk_size + 1
         logger.info(f"Processing chunk {chunk_num}/{total_chunks} ({len(chunk)} articles)")
-        # Prepare articles for LLM
-        articles_data = [
-            {"id": a.id, "title": a.title, "description": a.description}
-            for a in chunk
-        ]
-        results = None
-        used_model = settings.llm_sentiment_model
-        # Try LLM first
-        if settings.openrouter_api_key:
             try:
-                # Bridge async scoring into sync callers without nested-loop errors.
-                results = run_async_from_sync(score_batch_with_llm, articles_data)
-                logger.info(f"LLM scored chunk {chunk_num} successfully")
             except Exception as e:
                 logger.warning(f"LLM scoring failed for chunk {chunk_num}, falling back to FinBERT: {e}")
-                results = None
-        # Fallback to FinBERT if LLM failed or not configured
-        if results is None:
-            logger.info(f"Using FinBERT fallback for chunk {chunk_num}")
-            results = score_batch_with_finbert(chunk)
-            used_model = "ProsusAI/finbert"
         # Create a lookup for results
         results_by_id = {r["id"]: r for r in results}
@@ -507,6 +581,7 @@ def score_unscored_articles(
                     "prob_positive": 0.33,
                     "prob_neutral": 0.34,
                     "prob_negative": 0.33,
                 }
             sentiment = NewsSentiment(
@@ -516,7 +591,7 @@ def score_unscored_articles(
                 prob_negative=result["prob_negative"],
                 score=result["score"],
                 reasoning=result.get("reasoning"),
-                model_name=result.get("model_name", used_model),
                 scored_at=datetime.now(timezone.utc)
             )
@@ -526,11 +601,6 @@ def score_unscored_articles(
         # Commit after each chunk
         session.commit()
         logger.info(f"Committed chunk {chunk_num}: {len(chunk)} articles")
-        # Rate limiting: 2 second delay between chunks (except last)
-        if chunk_idx + chunk_size < len(unscored):
-            logger.debug("Rate limit delay: 2 seconds")
-            time.sleep(2)
     logger.info(f"Total articles scored: {scored_count}")
     return scored_count

 AI Engine: LLM sentiment scoring (with FinBERT fallback) + XGBoost training.
 Sentiment Analysis:
+    Primary: OpenRouter LLM with structured outputs
     Fallback: FinBERT for generic financial sentiment
 Usage:
 import json
 import logging
 import os
+from functools import lru_cache
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
 from typing import Any, Optional
 import numpy as np
 import pandas as pd
 from sqlalchemy import func
 from app.features import build_feature_matrix, get_feature_descriptions
 from app.lock import pipeline_lock
 from app.async_bridge import run_async_from_sync
+from app.openrouter_client import OpenRouterError, create_chat_completion
 logging.basicConfig(
     level=logging.INFO,
     )
     _FINBERT_OUTPUT_LOGGED = True
+@lru_cache(maxsize=1)
 def get_finbert_pipeline():
     """
     Load FinBERT model pipeline.
     Lazy loading to avoid import overhead when not needed.
     """
+    os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
+    os.environ.setdefault("TRANSFORMERS_VERBOSITY", "error")
+    os.environ.setdefault("TRANSFORMERS_NO_ADVISORY_WARNINGS", "1")
     from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
     model_name = "ProsusAI/finbert"
 # =============================================================================
+# LLM Sentiment Scoring (Primary - OpenRouter)
 # =============================================================================
 # Copper-specific system prompt for LLM sentiment analysis
 - Use standard decimals (e.g., -0.4, 0.15, 1.0); no NaN, no scientific notation."""
+LLM_SCORING_RESPONSE_FORMAT = {
+    "type": "json_schema",
+    "json_schema": {
+        "name": "news_sentiment_scores",
+        "strict": True,
+        "schema": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "id": {"type": "integer"},
+                    "score": {"type": "number", "minimum": -1, "maximum": 1},
+                    "reasoning": {"type": "string"},
+                },
+                "required": ["id", "score"],
+                "additionalProperties": False,
+            },
+        },
+    },
+}
+LLM_SCORING_PROVIDER_OPTIONS = {"require_parameters": True}
+def _derive_probs_from_score(score: float) -> tuple[float, float, float]:
+    """Derive pseudo-probabilities from signed score for downstream compatibility."""
+    confidence = abs(score)
+    if score > 0:
+        prob_positive = 0.33 + (confidence * 0.67)
+        prob_negative = 0.33 - (confidence * 0.33)
+        prob_neutral = 1.0 - prob_positive - prob_negative
+    elif score < 0:
+        prob_negative = 0.33 + (confidence * 0.67)
+        prob_positive = 0.33 - (confidence * 0.33)
+        prob_neutral = 1.0 - prob_positive - prob_negative
+    else:
+        prob_positive = 0.33
+        prob_neutral = 0.34
+        prob_negative = 0.33
+    return round(prob_positive, 4), round(prob_neutral, 4), round(prob_negative, 4)
+def _extract_chat_message_content(data: dict[str, Any]) -> str:
+    """Extract text content from OpenRouter chat completion response."""
+    message = data.get("choices", [{}])[0].get("message", {})
+    content = message.get("content", "")
+    if isinstance(content, str):
+        return content.strip()
+    if isinstance(content, list):
+        text_parts: list[str] = []
+        for item in content:
+            if isinstance(item, dict) and item.get("type") == "text":
+                text = item.get("text")
+                if isinstance(text, str):
+                    text_parts.append(text)
+        return "\n".join(text_parts).strip()
+    return ""
+def _validate_and_enrich_llm_results(
+    *,
+    raw_results: Any,
+    expected_ids: list[int],
+    model_name: str,
+) -> list[dict]:
+    """Validate LLM result shape and enrich with derived probability fields."""
+    if not isinstance(raw_results, list):
+        raise ValueError(f"Structured result must be a list, got {type(raw_results).__name__}")
+    results_by_id: dict[int, dict] = {}
+    for item in raw_results:
+        if not isinstance(item, dict):
+            raise ValueError(f"Structured result item must be object, got {type(item).__name__}")
+        if "id" not in item or "score" not in item:
+            raise ValueError("Structured result missing required fields: id and score")
+        article_id = int(item["id"])
+        if article_id in results_by_id:
+            raise ValueError(f"Duplicate article id in structured output: {article_id}")
+        score = max(-1.0, min(1.0, float(item["score"])))
+        reasoning_raw = item.get("reasoning", "")
+        reasoning = reasoning_raw if isinstance(reasoning_raw, str) else str(reasoning_raw)
+        prob_positive, prob_neutral, prob_negative = _derive_probs_from_score(score)
+        results_by_id[article_id] = {
+            "id": article_id,
+            "score": score,
+            "reasoning": reasoning,
+            "prob_positive": prob_positive,
+            "prob_neutral": prob_neutral,
+            "prob_negative": prob_negative,
+            "model_name": model_name,
+        }
+    expected = set(expected_ids)
+    got = set(results_by_id.keys())
+    missing = sorted(expected - got)
+    extra = sorted(got - expected)
+    if missing or extra:
+        raise ValueError(f"Structured result ID mismatch. missing={missing} extra={extra}")
+    return [results_by_id[article_id] for article_id in expected_ids]
 async def score_batch_with_llm(
     articles: list[dict],
 ) -> list[dict]:
     """
+    Score a batch of articles using OpenRouter with strict JSON schema response.
     """
     settings = get_settings()
     if not settings.openrouter_api_key:
         raise RuntimeError("OpenRouter API key not configured")
     articles_text = "\n".join([
+        f"{i+1}. [ID:{a['id']}] {a['title']}" + (f" - {a['description'][:200]}" if a.get("description") else "")
         for i, a in enumerate(articles)
     ])
     user_prompt = f"""Score these {len(articles)} news articles for copper market sentiment.
 Articles:
 {articles_text}
+Output must follow the provided JSON schema."""
+    model_name = settings.resolved_scoring_model
+    data = await create_chat_completion(
+        api_key=settings.openrouter_api_key,
+        model=model_name,
+        messages=[
+            {"role": "system", "content": LLM_SENTIMENT_SYSTEM_PROMPT},
+            {"role": "user", "content": user_prompt},
+        ],
+        max_tokens=2000,
+        temperature=0.3,
+        timeout_seconds=60.0,
+        max_retries=settings.openrouter_max_retries,
+        rpm=settings.openrouter_rpm,
+        response_format=LLM_SCORING_RESPONSE_FORMAT,
+        provider=LLM_SCORING_PROVIDER_OPTIONS,
+        fallback_models=settings.openrouter_fallback_models_list,
+        referer="https://copper-mind.vercel.app",
+        title="CopperMind Sentiment Analysis",
+    )
+    content = _extract_chat_message_content(data)
+    if not content:
+        raise OpenRouterError("Empty response content from LLM scoring")
+    try:
+        raw_results = json.loads(content)
+    except json.JSONDecodeError as exc:
+        logger.error("LLM JSON parse error after structured output: %s", exc)
+        raise
+    expected_ids = [int(article["id"]) for article in articles]
+    return _validate_and_enrich_llm_results(
+        raw_results=raw_results,
+        expected_ids=expected_ids,
+        model_name=model_name,
+    )
 def score_batch_with_finbert(articles: list) -> list[dict]:
     Score all articles that don't have sentiment scores yet.
     Strategy:
+    - Primary: OpenRouter LLM with strict JSON schema output
     - Fallback: FinBERT per chunk if LLM fails
     - Chunk size: 20 articles for error isolation
+    - Run budget: cap LLM-scored articles per run, overflow uses FinBERT
     Returns:
         Number of articles scored
     scored_count = 0
     total_chunks = (len(unscored) + chunk_size - 1) // chunk_size
+    llm_budget_remaining = max(0, settings.max_llm_articles_per_run)
+    budget_exhausted_logged = False
+    logger.info("LLM scoring budget for this run: %s articles", llm_budget_remaining)
     # Process in chunks
     for chunk_idx in range(0, len(unscored), chunk_size):
         chunk_num = chunk_idx // chunk_size + 1
         logger.info(f"Processing chunk {chunk_num}/{total_chunks} ({len(chunk)} articles)")
+        llm_candidates: list[Any] = []
+        finbert_candidates: list[Any] = []
+        results: list[dict] = []
+        if settings.openrouter_api_key and llm_budget_remaining > 0:
+            llm_take = min(len(chunk), llm_budget_remaining)
+            llm_candidates = chunk[:llm_take]
+            finbert_candidates = chunk[llm_take:]
+        else:
+            finbert_candidates = chunk
+            if settings.openrouter_api_key and llm_budget_remaining <= 0 and not budget_exhausted_logged:
+                logger.info(
+                    "LLM budget exhausted (%s articles). Remaining chunks will use FinBERT fallback.",
+                    settings.max_llm_articles_per_run,
+                )
+                budget_exhausted_logged = True
+        if llm_candidates:
+            articles_data = [
+                {"id": a.id, "title": a.title, "description": a.description}
+                for a in llm_candidates
+            ]
             try:
+                llm_results = run_async_from_sync(score_batch_with_llm, articles_data)
+                results.extend(llm_results)
+                llm_budget_remaining -= len(llm_candidates)
+                logger.info(
+                    "LLM scored %s article(s) in chunk %s. Budget remaining: %s",
+                    len(llm_candidates),
+                    chunk_num,
+                    llm_budget_remaining,
+                )
             except Exception as e:
                 logger.warning(f"LLM scoring failed for chunk {chunk_num}, falling back to FinBERT: {e}")
+                finbert_candidates = chunk
+                results = []
+        if finbert_candidates:
+            logger.info(
+                "Using FinBERT fallback for %s article(s) in chunk %s",
+                len(finbert_candidates),
+                chunk_num,
+            )
+            finbert_results = score_batch_with_finbert(finbert_candidates)
+            results.extend(finbert_results)
         # Create a lookup for results
         results_by_id = {r["id"]: r for r in results}
                     "prob_positive": 0.33,
                     "prob_neutral": 0.34,
                     "prob_negative": 0.33,
+                    "model_name": "ProsusAI/finbert",
                 }
             sentiment = NewsSentiment(
                 prob_negative=result["prob_negative"],
                 score=result["score"],
                 reasoning=result.get("reasoning"),
+                model_name=result.get("model_name", settings.resolved_scoring_model),
                 scored_at=datetime.now(timezone.utc)
             )
         # Commit after each chunk
         session.commit()
         logger.info(f"Committed chunk {chunk_num}: {len(chunk)} articles")
     logger.info(f"Total articles scored: {scored_count}")
     return scored_count

app/commentary.py CHANGED Viewed

@@ -4,19 +4,62 @@ Generates human-readable market analysis from FinBERT + XGBoost results.
 """
 import logging
-# Suppress httpx request logging to prevent API keys in URLs from appearing in logs
-logging.getLogger("httpx").setLevel(logging.WARNING)
-import httpx
 from typing import Optional
 from datetime import datetime
 from .settings import get_settings
 logger = logging.getLogger(__name__)
 async def determine_ai_stance(commentary: str) -> str:
     """
     Have the AI analyze its own commentary to determine market stance.
@@ -43,34 +86,26 @@ Commentary:
 Your response (one word only):"""
         try:
-            async with httpx.AsyncClient(timeout=30.0) as client:
-                response = await client.post(
-                    "https://openrouter.ai/api/v1/chat/completions",
-                    headers={
-                        "Authorization": f"Bearer {settings.openrouter_api_key}",
-                        "Content-Type": "application/json",
-                    },
-                    json={
-                        "model": settings.openrouter_model,
-                        "messages": [{"role": "user", "content": prompt}],
-                        "max_tokens": 10,
-                        "temperature": 0.1,
-                    }
-                )
-                if response.status_code == 200:
-                    data = response.json()
-                    stance = data.get("choices", [{}])[0].get("message", {}).get("content", "").strip().upper()
-                    # Validate response
-                    if stance in ["BULLISH", "NEUTRAL", "BEARISH"]:
-                        logger.info(f"AI stance determined: {stance}")
-                        return stance
-                    else:
-                        logger.warning(f"Invalid AI stance response: '{stance}', using keyword fallback")
-                else:
-                    logger.warning(f"AI stance API error: {response.status_code}, using keyword fallback")
         except Exception as e:
             logger.warning(f"AI stance detection failed: {e}, using keyword fallback")
@@ -134,9 +169,19 @@ async def generate_commentary(
     """
     settings = get_settings()
     if not settings.openrouter_api_key:
-        logger.warning("OpenRouter API key not configured, skipping commentary")
-        return None
     # Build the prompt
     influencers_text = "\n".join([
@@ -184,48 +229,41 @@ Output requirements:
 - End with this exact line on its own: This is NOT financial advice."""
     try:
-        async with httpx.AsyncClient(timeout=30.0) as client:
-            response = await client.post(
-                "https://openrouter.ai/api/v1/chat/completions",
-                headers={
-                    "Authorization": f"Bearer {settings.openrouter_api_key}",
-                    "Content-Type": "application/json",
-                    "HTTP-Referer": "https://copper-mind.vercel.app",
-                    "X-Title": "CopperMind AI Analysis",
                 },
-                json={
-                    "model": settings.openrouter_model,
-                    "messages": [
-                        {
-                            "role": "system",
-                            "content": system_prompt
-                        },
-                        {
-                            "role": "user",
-                            "content": prompt
-                        }
-                    ],
-                    "max_tokens": 700,
-                    "temperature": 0.6,
-                }
-            )
-            if response.status_code == 200:
-                data = response.json()
-                commentary = data.get("choices", [{}])[0].get("message", {}).get("content", "")
-                if commentary:
-                    logger.info(f"AI commentary generated successfully ({len(commentary)} chars)")
-                    return commentary.strip()
-                else:
-                    logger.warning("Empty response from OpenRouter")
-                    return None
-            else:
-                logger.error(f"OpenRouter API error: {response.status_code} - {response.text}")
-                return None
     except Exception as e:
         logger.error(f"Failed to generate AI commentary: {e}")
-        return None
 def save_commentary_to_db(
@@ -258,7 +296,7 @@ def save_commentary_to_db(
         existing.sentiment_label = sentiment_label
         existing.ai_stance = ai_stance
         existing.generated_at = datetime.utcnow()
-        existing.model_name = settings.openrouter_model
         logger.info(f"Updated AI commentary for {symbol} (stance: {ai_stance})")
     else:
         # Create new
@@ -270,7 +308,7 @@ def save_commentary_to_db(
             predicted_return=predicted_return,
             sentiment_label=sentiment_label,
             ai_stance=ai_stance,
-            model_name=settings.openrouter_model,
         )
         session.add(new_commentary)
         logger.info(f"Created new AI commentary for {symbol} (stance: {ai_stance})")

 """
 import logging
 from typing import Optional
 from datetime import datetime
 from .settings import get_settings
+from .openrouter_client import OpenRouterError, create_chat_completion
 logger = logging.getLogger(__name__)
+def _extract_chat_message_content(data: dict) -> str:
+    """Extract text content from OpenRouter chat completion response."""
+    message = data.get("choices", [{}])[0].get("message", {})
+    content = message.get("content", "")
+    if isinstance(content, str):
+        return content.strip()
+    if isinstance(content, list):
+        text_parts: list[str] = []
+        for item in content:
+            if isinstance(item, dict) and item.get("type") == "text":
+                text = item.get("text")
+                if isinstance(text, str):
+                    text_parts.append(text)
+        return "\n".join(text_parts).strip()
+    return ""
+def _build_commentary_template_fallback(
+    current_price: float,
+    predicted_price: float,
+    predicted_return: float,
+    sentiment_index: float,
+    sentiment_label: str,
+    top_influencers: list[dict],
+    news_count: int,
+) -> str:
+    """Deterministic fallback commentary used when LLM is unavailable."""
+    direction = "upside" if predicted_return >= 0 else "downside"
+    top_driver_names = [inf.get("feature", "unknown_driver") for inf in top_influencers[:3]]
+    while len(top_driver_names) < 3:
+        top_driver_names.append("unknown_driver")
+    return "\n".join([
+        "Risks:",
+        f"1. Model indicates {direction} uncertainty around the next-day move ({predicted_return * 100:.2f}%).",
+        f"2. Sentiment regime is {sentiment_label} with score {sentiment_index:.3f}, which can reverse quickly.",
+        f"3. News sample size ({news_count}) may be insufficient for stable short-horizon inference.",
+        "Opportunities:",
+        f"1. Predicted price path implies a move from ${current_price:.4f} to ${predicted_price:.4f}.",
+        f"2. Feature signal concentration around `{top_driver_names[0]}` can support tactical monitoring.",
+        f"3. Secondary drivers `{top_driver_names[1]}` and `{top_driver_names[2]}` provide confirmation checkpoints.",
+        f"Summary: Current model inputs suggest a cautious {direction} bias with elevated uncertainty.",
+        "Bias warning: This view is model-driven and sensitive to news mix, data latency, and feature drift.",
+        "This is NOT financial advice.",
+    ])
 async def determine_ai_stance(commentary: str) -> str:
     """
     Have the AI analyze its own commentary to determine market stance.
 Your response (one word only):"""
         try:
+            data = await create_chat_completion(
+                api_key=settings.openrouter_api_key,
+                model=settings.resolved_commentary_model,
+                messages=[{"role": "user", "content": prompt}],
+                max_tokens=10,
+                temperature=0.1,
+                timeout_seconds=30.0,
+                max_retries=settings.openrouter_max_retries,
+                rpm=settings.openrouter_rpm,
+                fallback_models=settings.openrouter_fallback_models_list,
+            )
+            stance = _extract_chat_message_content(data).upper()
+            # Validate response
+            if stance in ["BULLISH", "NEUTRAL", "BEARISH"]:
+                logger.info(f"AI stance determined: {stance}")
+                return stance
+            logger.warning(f"Invalid AI stance response: '{stance}', using keyword fallback")
+        except OpenRouterError as e:
+            logger.warning(f"AI stance detection failed via OpenRouter: {e}, using keyword fallback")
         except Exception as e:
             logger.warning(f"AI stance detection failed: {e}, using keyword fallback")
     """
     settings = get_settings()
+    fallback_commentary = _build_commentary_template_fallback(
+        current_price=current_price,
+        predicted_price=predicted_price,
+        predicted_return=predicted_return,
+        sentiment_index=sentiment_index,
+        sentiment_label=sentiment_label,
+        top_influencers=top_influencers,
+        news_count=news_count,
+    )
     if not settings.openrouter_api_key:
+        logger.warning("OpenRouter API key not configured, using template commentary fallback")
+        return fallback_commentary
     # Build the prompt
     influencers_text = "\n".join([
 - End with this exact line on its own: This is NOT financial advice."""
     try:
+        data = await create_chat_completion(
+            api_key=settings.openrouter_api_key,
+            model=settings.resolved_commentary_model,
+            messages=[
+                {
+                    "role": "system",
+                    "content": system_prompt,
                 },
+                {
+                    "role": "user",
+                    "content": prompt,
+                },
+            ],
+            max_tokens=700,
+            temperature=0.6,
+            timeout_seconds=30.0,
+            max_retries=settings.openrouter_max_retries,
+            rpm=settings.openrouter_rpm,
+            fallback_models=settings.openrouter_fallback_models_list,
+            referer="https://copper-mind.vercel.app",
+            title="CopperMind AI Analysis",
+        )
+        commentary = _extract_chat_message_content(data)
+        if commentary:
+            logger.info(f"AI commentary generated successfully ({len(commentary)} chars)")
+            return commentary.strip()
+        logger.warning("Empty response from OpenRouter, using template commentary fallback")
+        return fallback_commentary
+    except OpenRouterError as e:
+        logger.warning("OpenRouter commentary failed: %s. Using template fallback.", e)
+        return fallback_commentary
     except Exception as e:
         logger.error(f"Failed to generate AI commentary: {e}")
+        return fallback_commentary
 def save_commentary_to_db(
         existing.sentiment_label = sentiment_label
         existing.ai_stance = ai_stance
         existing.generated_at = datetime.utcnow()
+        existing.model_name = settings.resolved_commentary_model
         logger.info(f"Updated AI commentary for {symbol} (stance: {ai_stance})")
     else:
         # Create new
             predicted_return=predicted_return,
             sentiment_label=sentiment_label,
             ai_stance=ai_stance,
+            model_name=settings.resolved_commentary_model,
         )
         session.add(new_commentary)
         logger.info(f"Created new AI commentary for {symbol} (stance: {ai_stance})")

app/models.py CHANGED Viewed

@@ -105,7 +105,7 @@ class PriceBar(Base):
 class NewsSentiment(Base):
     """
     Sentiment scores for each news article.
-    Primary: LLM (Gemini) with copper-specific context
     Fallback: FinBERT for generic financial sentiment
     One-to-one relationship with NewsArticle.
     """

 class NewsSentiment(Base):
     """
     Sentiment scores for each news article.
+    Primary: LLM (OpenRouter structured outputs) with copper-specific context
     Fallback: FinBERT for generic financial sentiment
     One-to-one relationship with NewsArticle.
     """

app/openrouter_client.py ADDED Viewed

	@@ -0,0 +1,201 @@

+"""
+Shared OpenRouter client with retry, throttling, and model fallback support.
+"""
+from __future__ import annotations
+import asyncio
+import logging
+import random
+import threading
+import time
+from typing import Any, Optional
+import httpx
+logger = logging.getLogger(__name__)
+_RATE_LOCK = threading.Lock()
+_NEXT_ALLOWED_TS = 0.0
+class OpenRouterError(RuntimeError):
+    """Base error raised for OpenRouter client failures."""
+    def __init__(self, message: str, status_code: Optional[int] = None):
+        super().__init__(message)
+        self.status_code = status_code
+class OpenRouterRateLimitError(OpenRouterError):
+    """Raised when OpenRouter rate limiting persists after retries."""
+def _parse_retry_after_seconds(response: httpx.Response) -> Optional[float]:
+    """Parse Retry-After header in seconds if provided."""
+    value = response.headers.get("Retry-After")
+    if not value:
+        return None
+    try:
+        seconds = float(value)
+        return max(seconds, 0.0)
+    except ValueError:
+        return None
+def _build_model_payload(primary_model: str, fallback_models: Optional[list[str]]) -> dict[str, Any]:
+    """
+    Build model payload for OpenRouter.
+    Uses `models` only when fallback models are provided.
+    """
+    if not fallback_models:
+        return {"model": primary_model}
+    ordered: list[str] = []
+    for model in [primary_model, *fallback_models]:
+        if model and model not in ordered:
+            ordered.append(model)
+    if len(ordered) == 1:
+        return {"model": ordered[0]}
+    return {"models": ordered}
+async def _throttle_request(rpm: int) -> None:
+    """
+    Global soft-throttle shared across all OpenRouter requests in this process.
+    """
+    if rpm <= 0:
+        return
+    min_interval = 60.0 / float(rpm)
+    now = time.monotonic()
+    wait_seconds = 0.0
+    global _NEXT_ALLOWED_TS
+    with _RATE_LOCK:
+        if now < _NEXT_ALLOWED_TS:
+            wait_seconds = _NEXT_ALLOWED_TS - now
+            _NEXT_ALLOWED_TS += min_interval
+        else:
+            _NEXT_ALLOWED_TS = now + min_interval
+    if wait_seconds > 0:
+        logger.debug("OpenRouter throttle wait: %.3fs", wait_seconds)
+        await asyncio.sleep(wait_seconds)
+async def create_chat_completion(
+    *,
+    api_key: str,
+    model: str,
+    messages: list[dict[str, Any]],
+    max_tokens: Optional[int] = None,
+    temperature: Optional[float] = None,
+    timeout_seconds: float = 60.0,
+    max_retries: int = 3,
+    rpm: int = 18,
+    response_format: Optional[dict[str, Any]] = None,
+    provider: Optional[dict[str, Any]] = None,
+    fallback_models: Optional[list[str]] = None,
+    referer: Optional[str] = None,
+    title: Optional[str] = None,
+    extra_payload: Optional[dict[str, Any]] = None,
+) -> dict[str, Any]:
+    """
+    Call OpenRouter chat completions with retry/backoff and soft throttling.
+    Retry policy:
+    - Retry on 429 and 5xx
+    - Retry on transient network errors
+    - Delay: Retry-After (if present) else 2^attempt + jitter(0..0.5)
+    """
+    if not api_key:
+        raise OpenRouterError("OpenRouter API key not configured")
+    payload: dict[str, Any] = {
+        **_build_model_payload(model, fallback_models),
+        "messages": messages,
+    }
+    if max_tokens is not None:
+        payload["max_tokens"] = max_tokens
+    if temperature is not None:
+        payload["temperature"] = temperature
+    if response_format is not None:
+        payload["response_format"] = response_format
+    if provider is not None:
+        payload["provider"] = provider
+    if extra_payload:
+        payload.update(extra_payload)
+    headers = {
+        "Authorization": f"Bearer {api_key}",
+        "Content-Type": "application/json",
+    }
+    if referer:
+        headers["HTTP-Referer"] = referer
+    if title:
+        headers["X-Title"] = title
+    async with httpx.AsyncClient(timeout=timeout_seconds) as client:
+        for attempt in range(max_retries + 1):
+            await _throttle_request(rpm)
+            try:
+                response = await client.post(
+                    "https://openrouter.ai/api/v1/chat/completions",
+                    headers=headers,
+                    json=payload,
+                )
+            except httpx.RequestError as exc:
+                if attempt >= max_retries:
+                    raise OpenRouterError(
+                        f"OpenRouter request failed after retries: {exc}"
+                    ) from exc
+                retry_num = attempt + 1
+                delay = float(2 ** retry_num) + random.uniform(0.0, 0.5)
+                logger.warning(
+                    "OpenRouter network error (attempt %s/%s). Retrying in %.2fs: %s",
+                    retry_num,
+                    max_retries,
+                    delay,
+                    exc,
+                )
+                await asyncio.sleep(delay)
+                continue
+            if response.status_code == 200:
+                try:
+                    return response.json()
+                except ValueError as exc:
+                    raise OpenRouterError("OpenRouter returned non-JSON response body") from exc
+            retryable = response.status_code == 429 or 500 <= response.status_code < 600
+            if retryable and attempt < max_retries:
+                retry_num = attempt + 1
+                retry_after = _parse_retry_after_seconds(response)
+                delay = retry_after if retry_after is not None else float(2 ** retry_num) + random.uniform(0.0, 0.5)
+                logger.warning(
+                    "OpenRouter retryable error status=%s (attempt %s/%s). Retrying in %.2fs",
+                    response.status_code,
+                    retry_num,
+                    max_retries,
+                    delay,
+                )
+                await asyncio.sleep(delay)
+                continue
+            body_preview = response.text[:500]
+            if response.status_code == 429:
+                raise OpenRouterRateLimitError(
+                    f"OpenRouter rate limit exceeded after retries: {body_preview}",
+                    status_code=response.status_code,
+                )
+            raise OpenRouterError(
+                f"OpenRouter API error: {response.status_code} - {body_preview}",
+                status_code=response.status_code,
+            )
+    raise OpenRouterError("OpenRouter request unexpectedly terminated")

app/settings.py CHANGED Viewed

@@ -68,12 +68,21 @@ class Settings(BaseSettings):
     # OpenRouter AI Commentary
     openrouter_api_key: Optional[str] = None
     openrouter_model: str = "openai/gpt-oss-120b:free"
     # Twelve Data (Live Price)
     twelvedata_api_key: Optional[str] = None
     # LLM Sentiment Analysis
     llm_sentiment_model: str = "openai/gpt-oss-120b:free"
     # Pipeline trigger authentication
@@ -158,6 +167,48 @@ class Settings(BaseSettings):
         symbols = self.symbols_list
         return symbols[0] if symbols else "HG=F"
 @lru_cache
 def get_settings() -> Settings:

     # OpenRouter AI Commentary
     openrouter_api_key: Optional[str] = None
+    # Deprecated - kept for backward compatibility
     openrouter_model: str = "openai/gpt-oss-120b:free"
+    # New primary config
+    openrouter_model_scoring: str = "stepfun/step-3.5-flash:free"
+    openrouter_model_commentary: str = "stepfun/step-3.5-flash:free"
+    openrouter_rpm: int = 18
+    openrouter_max_retries: int = 3
+    max_llm_articles_per_run: int = 200
+    openrouter_fallback_models: Optional[str] = None
     # Twelve Data (Live Price)
     twelvedata_api_key: Optional[str] = None
     # LLM Sentiment Analysis
+    # Deprecated - kept for backward compatibility
     llm_sentiment_model: str = "openai/gpt-oss-120b:free"
     # Pipeline trigger authentication
         symbols = self.symbols_list
         return symbols[0] if symbols else "HG=F"
+    @staticmethod
+    def _first_non_empty(*values: Optional[str]) -> Optional[str]:
+        """Return first non-empty string value."""
+        for value in values:
+            if value and value.strip():
+                return value.strip()
+        return None
+    @property
+    def resolved_scoring_model(self) -> str:
+        """Preferred scoring model with backward-compatible fallback chain."""
+        return (
+            self._first_non_empty(
+                self.openrouter_model_scoring,
+                self.llm_sentiment_model,
+                self.openrouter_model,
+            )
+            or "stepfun/step-3.5-flash:free"
+        )
+    @property
+    def resolved_commentary_model(self) -> str:
+        """Preferred commentary model with backward-compatible fallback chain."""
+        return (
+            self._first_non_empty(
+                self.openrouter_model_commentary,
+                self.openrouter_model,
+                self.llm_sentiment_model,
+            )
+            or "stepfun/step-3.5-flash:free"
+        )
+    @property
+    def openrouter_fallback_models_list(self) -> list[str]:
+        """
+        Parse comma-separated fallback models.
+        Empty/whitespace items are ignored.
+        """
+        if not self.openrouter_fallback_models:
+            return []
+        return [m.strip() for m in self.openrouter_fallback_models.split(",") if m.strip()]
 @lru_cache
 def get_settings() -> Settings: