CiscsoPonce's picture
feat: portfolio re-evaluation GitHub Action + structured LLM timeout
c602f24
import os
import time
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnableConfig
load_dotenv()
_llm_instance = None
# Ordered by preference: quality + reliability + speed
MODEL_CHAIN = [
"nvidia/nemotron-3-nano-30b-a3b:free",
"stepfun/step-3.5-flash:free",
"arcee-ai/trinity-large-preview:free",
"google/gemma-3-27b-it:free",
"meta-llama/llama-3.3-70b-instruct:free",
"mistralai/mistral-small-3.1-24b-instruct:free",
]
def get_llm() -> ChatOpenAI:
"""Lazy-initialised LLM singleton with automatic model fallback.
Tries the primary model first. If it has been marked as failing,
the fallback chain is tried until one works.
"""
global _llm_instance
if _llm_instance is not None:
return _llm_instance
api_key = os.getenv("OPENROUTER_API_KEY")
if not api_key:
from src.core.logger import get_logger
logger = get_logger(__name__)
logger.error("OPENROUTER_API_KEY not found in environment")
available = [k for k in os.environ if "API" in k or "KEY" in k]
logger.error("Available key-like env vars: %s", available)
raise ValueError("OPENROUTER_API_KEY not found. Check your secrets.")
_llm_instance = ChatOpenAI(
model=MODEL_CHAIN[0],
api_key=api_key,
base_url="https://openrouter.ai/api/v1",
temperature=0,
)
return _llm_instance
def get_structured_llm(max_tokens: int = 65536) -> ChatOpenAI:
"""Return an LLM instance configured for structured output.
A generous ``max_tokens`` ensures reasoning models have enough
headroom to think and then produce the full structured JSON.
"""
api_key = os.getenv("OPENROUTER_API_KEY")
if not api_key:
raise ValueError("OPENROUTER_API_KEY not found.")
return ChatOpenAI(
model=MODEL_CHAIN[0],
api_key=api_key,
base_url="https://openrouter.ai/api/v1",
temperature=0,
max_tokens=max_tokens,
request_timeout=120,
)
def invoke_with_fallback(prompt: str, max_retries: int = 2, run_name: str = "llm_call") -> str:
"""Invoke the LLM with automatic model fallback on 429 rate limits.
Tries each model in MODEL_CHAIN until one succeeds. Returns the
response content string.
Each invocation is tagged with the model name so LangSmith can filter
by ``model:<name>`` and ``error:429`` for the error dashboard.
"""
from src.core.logger import get_logger
logger = get_logger(__name__)
api_key = os.getenv("OPENROUTER_API_KEY")
if not api_key:
raise ValueError("OPENROUTER_API_KEY not found.")
last_error = None
for model_id in MODEL_CHAIN:
for attempt in range(max_retries):
try:
llm = ChatOpenAI(
model=model_id,
api_key=api_key,
base_url="https://openrouter.ai/api/v1",
temperature=0,
)
# LangSmith: tag every call with model name + attempt number
config = RunnableConfig(
run_name=run_name,
tags=[f"model:{model_id}", f"attempt:{attempt + 1}"],
metadata={
"model_id": model_id,
"attempt": attempt + 1,
"fallback_position": MODEL_CHAIN.index(model_id),
},
)
response = llm.invoke(prompt, config=config)
logger.info("LLM response from %s (attempt %d)", model_id, attempt + 1)
return response.content
except Exception as exc:
last_error = exc
err_str = str(exc)
if "429" in err_str:
logger.warning("Rate-limited on %s (attempt %d), trying next...", model_id, attempt + 1)
time.sleep(2)
break # move to next model
elif "404" in err_str:
logger.warning("Model %s not available, skipping", model_id)
break # move to next model
else:
logger.error("LLM error on %s: %s", model_id, exc)
if attempt < max_retries - 1:
time.sleep(1)
else:
break
raise RuntimeError(f"All {len(MODEL_CHAIN)} models failed. Last tried: {MODEL_CHAIN[-1]}. Last error: {last_error}")