Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| from dotenv import load_dotenv | |
| from langchain_openai import ChatOpenAI | |
| from langchain_core.runnables import RunnableConfig | |
| load_dotenv() | |
| _llm_instance = None | |
| # Ordered by preference: quality + reliability + speed | |
| MODEL_CHAIN = [ | |
| "nvidia/nemotron-3-nano-30b-a3b:free", | |
| "stepfun/step-3.5-flash:free", | |
| "arcee-ai/trinity-large-preview:free", | |
| "google/gemma-3-27b-it:free", | |
| "meta-llama/llama-3.3-70b-instruct:free", | |
| "mistralai/mistral-small-3.1-24b-instruct:free", | |
| ] | |
| def get_llm() -> ChatOpenAI: | |
| """Lazy-initialised LLM singleton with automatic model fallback. | |
| Tries the primary model first. If it has been marked as failing, | |
| the fallback chain is tried until one works. | |
| """ | |
| global _llm_instance | |
| if _llm_instance is not None: | |
| return _llm_instance | |
| api_key = os.getenv("OPENROUTER_API_KEY") | |
| if not api_key: | |
| from src.core.logger import get_logger | |
| logger = get_logger(__name__) | |
| logger.error("OPENROUTER_API_KEY not found in environment") | |
| available = [k for k in os.environ if "API" in k or "KEY" in k] | |
| logger.error("Available key-like env vars: %s", available) | |
| raise ValueError("OPENROUTER_API_KEY not found. Check your secrets.") | |
| _llm_instance = ChatOpenAI( | |
| model=MODEL_CHAIN[0], | |
| api_key=api_key, | |
| base_url="https://openrouter.ai/api/v1", | |
| temperature=0, | |
| ) | |
| return _llm_instance | |
| def get_structured_llm(max_tokens: int = 65536) -> ChatOpenAI: | |
| """Return an LLM instance configured for structured output. | |
| A generous ``max_tokens`` ensures reasoning models have enough | |
| headroom to think and then produce the full structured JSON. | |
| """ | |
| api_key = os.getenv("OPENROUTER_API_KEY") | |
| if not api_key: | |
| raise ValueError("OPENROUTER_API_KEY not found.") | |
| return ChatOpenAI( | |
| model=MODEL_CHAIN[0], | |
| api_key=api_key, | |
| base_url="https://openrouter.ai/api/v1", | |
| temperature=0, | |
| max_tokens=max_tokens, | |
| request_timeout=120, | |
| ) | |
| def invoke_with_fallback(prompt: str, max_retries: int = 2, run_name: str = "llm_call") -> str: | |
| """Invoke the LLM with automatic model fallback on 429 rate limits. | |
| Tries each model in MODEL_CHAIN until one succeeds. Returns the | |
| response content string. | |
| Each invocation is tagged with the model name so LangSmith can filter | |
| by ``model:<name>`` and ``error:429`` for the error dashboard. | |
| """ | |
| from src.core.logger import get_logger | |
| logger = get_logger(__name__) | |
| api_key = os.getenv("OPENROUTER_API_KEY") | |
| if not api_key: | |
| raise ValueError("OPENROUTER_API_KEY not found.") | |
| last_error = None | |
| for model_id in MODEL_CHAIN: | |
| for attempt in range(max_retries): | |
| try: | |
| llm = ChatOpenAI( | |
| model=model_id, | |
| api_key=api_key, | |
| base_url="https://openrouter.ai/api/v1", | |
| temperature=0, | |
| ) | |
| # LangSmith: tag every call with model name + attempt number | |
| config = RunnableConfig( | |
| run_name=run_name, | |
| tags=[f"model:{model_id}", f"attempt:{attempt + 1}"], | |
| metadata={ | |
| "model_id": model_id, | |
| "attempt": attempt + 1, | |
| "fallback_position": MODEL_CHAIN.index(model_id), | |
| }, | |
| ) | |
| response = llm.invoke(prompt, config=config) | |
| logger.info("LLM response from %s (attempt %d)", model_id, attempt + 1) | |
| return response.content | |
| except Exception as exc: | |
| last_error = exc | |
| err_str = str(exc) | |
| if "429" in err_str: | |
| logger.warning("Rate-limited on %s (attempt %d), trying next...", model_id, attempt + 1) | |
| time.sleep(2) | |
| break # move to next model | |
| elif "404" in err_str: | |
| logger.warning("Model %s not available, skipping", model_id) | |
| break # move to next model | |
| else: | |
| logger.error("LLM error on %s: %s", model_id, exc) | |
| if attempt < max_retries - 1: | |
| time.sleep(1) | |
| else: | |
| break | |
| raise RuntimeError(f"All {len(MODEL_CHAIN)} models failed. Last tried: {MODEL_CHAIN[-1]}. Last error: {last_error}") | |