"""LLM utility functions using Groq API (OpenAI-compatible).""" from langchain_core.language_models.chat_models import BaseChatModel from langchain_openai import ChatOpenAI from src.config import settings from src.utils.logging import log_pipeline _model_cache: dict[str, BaseChatModel] = {} _override_large_model: str = None # Global override for large model def set_large_model_override(model_name: str = None): """Set global override for large model.""" global _override_large_model _override_large_model = model_name def get_small_model() -> BaseChatModel: """Get or create small LLM (for routing, reranking, RAG) - using Groq.""" cache_key = "small" if cache_key in _model_cache: return _model_cache[cache_key] model = ChatOpenAI( model=settings.model_small, api_key=settings.groq_api_key, base_url=settings.groq_base_url, temperature=0.6, max_tokens=4096, ) _model_cache[cache_key] = model log_pipeline(f"[Model] Small model initialized: {settings.model_small} (Groq)") return model def get_large_model(model_name: str = None) -> BaseChatModel: """Get or create large LLM (for logic/direct answering) - using Groq. Args: model_name: Optional model name to override default """ # Use override if set, otherwise use parameter or default effective_model = _override_large_model or model_name or settings.model_large cache_key = f"large_{effective_model}" # Always recreate if override is set (for Streamlit model switching) if _override_large_model and cache_key in _model_cache: del _model_cache[cache_key] if cache_key in _model_cache: return _model_cache[cache_key] # Use higher temperature for GPT-OSS-120B to encourage reasoning temperature = 0.3 if "gpt-oss-120b" in effective_model.lower() else 0.0 model = ChatOpenAI( model=effective_model, api_key=settings.groq_api_key, base_url=settings.groq_base_url, temperature=temperature, max_tokens=2048, ) _model_cache[cache_key] = model log_pipeline(f"[Model] Large model initialized: {effective_model} (Groq, temp={temperature})") return model def get_available_large_models() -> list[str]: """Get list of available large models for testing.""" return settings.available_large_models