Yash030's picture
Add Cerebras, Silicon Flow, and Groq providers with debug logging.
db83b53
"""Groq provider using OpenAI-compatible API."""
from typing import Any
from config.settings import Settings
from core.anthropic import ReasoningReplayMode, build_base_request_body
from providers.base import ProviderConfig
from providers.defaults import GROQ_DEFAULT_BASE
from providers.openai_compat import OpenAIChatTransport
class GroqProvider(OpenAIChatTransport):
"""Groq provider using OpenAI-compatible /chat/completions."""
# Mapping of proxy model refs to Groq API model IDs.
# groq/ prefix is stripped, but the inner prefix (like qwen/) is kept.
GROQ_MODEL_MAP: dict[str, str] = {
"groq/qwen3-32b": "qwen/qwen3-32b",
"groq/llama-3.3-70b-versatile": "llama-3.3-70b-versatile",
"groq/llama-3.1-8b-instant": "llama-3.1-8b-instant",
}
def __init__(self, config: ProviderConfig, *, settings: Settings):
base_url = (config.base_url or GROQ_DEFAULT_BASE).rstrip("/")
if not base_url.endswith("/v1"):
base_url = base_url + "/v1"
# Groq has generous rate limits - set high limits with no adaptive throttling
super().__init__(
config,
provider_name="Groq",
base_url=base_url,
api_key=config.api_key,
nim_rate_limit=500, # High limit for Groq
nim_max_concurrency=100, # High concurrency for Groq
)
self._settings = settings
def _build_request_body(
self, request: Any, thinking_enabled: bool | None = None
) -> dict:
thinking = self._is_thinking_enabled(request, thinking_enabled)
reasoning_replay = (
ReasoningReplayMode.REASONING_CONTENT
if thinking
else ReasoningReplayMode.DISABLED
)
body = build_base_request_body(request, reasoning_replay=reasoning_replay)
# Map proxy model ref to actual Groq API model ID
model = body.get("model", "")
if model in self.GROQ_MODEL_MAP:
body["model"] = self.GROQ_MODEL_MAP[model]
elif model.startswith("groq/"):
body["model"] = model[len("groq/") :]
return body