quanho114
Deploy VietQA API
ebb8326
"""LLM utility functions using Groq API (OpenAI-compatible)."""
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_openai import ChatOpenAI
from src.config import settings
from src.utils.logging import log_pipeline
_model_cache: dict[str, BaseChatModel] = {}
_override_large_model: str = None # Global override for large model
def set_large_model_override(model_name: str = None):
"""Set global override for large model."""
global _override_large_model
_override_large_model = model_name
def get_small_model() -> BaseChatModel:
"""Get or create small LLM (for routing, reranking, RAG) - using Groq."""
cache_key = "small"
if cache_key in _model_cache:
return _model_cache[cache_key]
model = ChatOpenAI(
model=settings.model_small,
api_key=settings.groq_api_key,
base_url=settings.groq_base_url,
temperature=0.6,
max_tokens=4096,
)
_model_cache[cache_key] = model
log_pipeline(f"[Model] Small model initialized: {settings.model_small} (Groq)")
return model
def get_large_model(model_name: str = None) -> BaseChatModel:
"""Get or create large LLM (for logic/direct answering) - using Groq.
Args:
model_name: Optional model name to override default
"""
# Use override if set, otherwise use parameter or default
effective_model = _override_large_model or model_name or settings.model_large
cache_key = f"large_{effective_model}"
# Always recreate if override is set (for Streamlit model switching)
if _override_large_model and cache_key in _model_cache:
del _model_cache[cache_key]
if cache_key in _model_cache:
return _model_cache[cache_key]
# Use higher temperature for GPT-OSS-120B to encourage reasoning
temperature = 0.3 if "gpt-oss-120b" in effective_model.lower() else 0.0
model = ChatOpenAI(
model=effective_model,
api_key=settings.groq_api_key,
base_url=settings.groq_base_url,
temperature=temperature,
max_tokens=2048,
)
_model_cache[cache_key] = model
log_pipeline(f"[Model] Large model initialized: {effective_model} (Groq, temp={temperature})")
return model
def get_available_large_models() -> list[str]:
"""Get list of available large models for testing."""
return settings.available_large_models