"""Groq provider using OpenAI-compatible API.""" from typing import Any from config.settings import Settings from core.anthropic import ReasoningReplayMode, build_base_request_body from providers.base import ProviderConfig from providers.defaults import GROQ_DEFAULT_BASE from providers.openai_compat import OpenAIChatTransport class GroqProvider(OpenAIChatTransport): """Groq provider using OpenAI-compatible /chat/completions.""" # Mapping of proxy model refs to Groq API model IDs. # groq/ prefix is stripped, but the inner prefix (like qwen/) is kept. GROQ_MODEL_MAP: dict[str, str] = { "groq/qwen3-32b": "qwen/qwen3-32b", "groq/llama-3.3-70b-versatile": "llama-3.3-70b-versatile", "groq/llama-3.1-8b-instant": "llama-3.1-8b-instant", } def __init__(self, config: ProviderConfig, *, settings: Settings): base_url = (config.base_url or GROQ_DEFAULT_BASE).rstrip("/") if not base_url.endswith("/v1"): base_url = base_url + "/v1" # Groq has generous rate limits - set high limits with no adaptive throttling super().__init__( config, provider_name="Groq", base_url=base_url, api_key=config.api_key, nim_rate_limit=500, # High limit for Groq nim_max_concurrency=100, # High concurrency for Groq ) self._settings = settings def _build_request_body( self, request: Any, thinking_enabled: bool | None = None ) -> dict: thinking = self._is_thinking_enabled(request, thinking_enabled) reasoning_replay = ( ReasoningReplayMode.REASONING_CONTENT if thinking else ReasoningReplayMode.DISABLED ) body = build_base_request_body(request, reasoning_replay=reasoning_replay) # Map proxy model ref to actual Groq API model ID model = body.get("model", "") if model in self.GROQ_MODEL_MAP: body["model"] = self.GROQ_MODEL_MAP[model] elif model.startswith("groq/"): body["model"] = model[len("groq/") :] return body