Spaces:
Running
Running
| """Utility functions for working with the language model.""" | |
| import logging | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from config import settings | |
| from services.google import ApiKeyPool | |
| logger = logging.getLogger(__name__) | |
| _pool = ApiKeyPool() | |
| MODEL_NAME = "gemini-2.5-flash" | |
| def _get_api_key() -> str: | |
| """Return an API key using round-robin selection in a thread-safe way.""" | |
| return _pool.get_key_sync() | |
| def create_llm( | |
| temperature: float = settings.temperature, | |
| top_p: float = settings.top_p, | |
| ) -> ChatGoogleGenerativeAI: | |
| """Create a standard LLM instance.""" | |
| llm = ChatGoogleGenerativeAI( | |
| model=MODEL_NAME, | |
| google_api_key=_get_api_key(), | |
| temperature=temperature, | |
| top_p=top_p, | |
| thinking_budget=1024, | |
| timeout=settings.request_timeout, | |
| max_retries=3, | |
| ) | |
| return llm | |
| def create_light_llm(temperature: float = settings.temperature, top_p: float = settings.top_p): | |
| """Create a light LLM instance with a shorter timeout.""" | |
| llm = ChatGoogleGenerativeAI( | |
| model="gemini-2.0-flash", | |
| google_api_key=_get_api_key(), | |
| temperature=temperature, | |
| top_p=top_p, | |
| timeout=settings.request_timeout, | |
| max_retries=3, | |
| ) | |
| return llm | |
| def create_precise_llm() -> ChatGoogleGenerativeAI: | |
| """Return an LLM tuned for deterministic output.""" | |
| return create_llm(temperature=0, top_p=1) | |