Spaces:
Running
Running
| """Groq provider using OpenAI-compatible API.""" | |
| from typing import Any | |
| from config.settings import Settings | |
| from core.anthropic import ReasoningReplayMode, build_base_request_body | |
| from providers.base import ProviderConfig | |
| from providers.defaults import GROQ_DEFAULT_BASE | |
| from providers.openai_compat import OpenAIChatTransport | |
| class GroqProvider(OpenAIChatTransport): | |
| """Groq provider using OpenAI-compatible /chat/completions.""" | |
| # Mapping of proxy model refs to Groq API model IDs. | |
| # groq/ prefix is stripped, but the inner prefix (like qwen/) is kept. | |
| GROQ_MODEL_MAP: dict[str, str] = { | |
| "groq/qwen3-32b": "qwen/qwen3-32b", | |
| "groq/llama-3.3-70b-versatile": "llama-3.3-70b-versatile", | |
| "groq/llama-3.1-8b-instant": "llama-3.1-8b-instant", | |
| } | |
| def __init__(self, config: ProviderConfig, *, settings: Settings): | |
| base_url = (config.base_url or GROQ_DEFAULT_BASE).rstrip("/") | |
| if not base_url.endswith("/v1"): | |
| base_url = base_url + "/v1" | |
| # Groq has generous rate limits - set high limits with no adaptive throttling | |
| super().__init__( | |
| config, | |
| provider_name="Groq", | |
| base_url=base_url, | |
| api_key=config.api_key, | |
| nim_rate_limit=500, # High limit for Groq | |
| nim_max_concurrency=100, # High concurrency for Groq | |
| ) | |
| self._settings = settings | |
| def _build_request_body( | |
| self, request: Any, thinking_enabled: bool | None = None | |
| ) -> dict: | |
| thinking = self._is_thinking_enabled(request, thinking_enabled) | |
| reasoning_replay = ( | |
| ReasoningReplayMode.REASONING_CONTENT | |
| if thinking | |
| else ReasoningReplayMode.DISABLED | |
| ) | |
| body = build_base_request_body(request, reasoning_replay=reasoning_replay) | |
| # Map proxy model ref to actual Groq API model ID | |
| model = body.get("model", "") | |
| if model in self.GROQ_MODEL_MAP: | |
| body["model"] = self.GROQ_MODEL_MAP[model] | |
| elif model.startswith("groq/"): | |
| body["model"] = model[len("groq/") :] | |
| return body | |