Spaces:
Running
$(cat <<EOF
Browse filesAdd Groq provider support with ultra-fast inference models.
Groq API key validated and working.
Models added:
- groq/llama-3.3-70b-versatile (fast, 70B reasoning)
- groq/llama-3.1-8b-instant (fastest, 8B general)
- groq/qwen3-32b (fast, reasoning capable)
Files modified:
- config/provider_catalog.py: Added GROQ_DEFAULT_BASE and groq provider descriptor
- providers/defaults.py: Re-export GROQ_DEFAULT_BASE
- config/settings.py: Added groq_api_key and provider_is_configured check
- providers/registry.py: Added _create_groq factory
- providers/groq/client.py: New provider client
- providers/groq/__init__.py: New provider init
- core/model_capabilities.py: Registered groq models
- api/routes.py: Added groq models to REQUESTED_PROVIDER_MODELS
- .env: Added GROQ_API_KEY
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
EOF
)
- .env +1 -0
- api/routes.py +4 -0
- config/provider_catalog.py +11 -1
- config/settings.py +4 -0
- core/model_capabilities.py +33 -0
- providers/defaults.py +2 -0
- providers/groq/__init__.py +5 -0
- providers/groq/client.py +41 -0
- providers/registry.py +7 -0
|
@@ -18,6 +18,7 @@ NVIDIA_NIM_FALLBACK_MODELS="nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct,nvidi
|
|
| 18 |
#CEREBRAS_API_KEY="csk-2ewy2h26eeph4yex94kmjnfwwx35pdpyyxkv3j6wcj4cxc3t"
|
| 19 |
CEREBRAS_API_KEY="csk-2ewy2h26eeph4yex94kmjnfwwx35pdpyyxkv3j6wcj4cxc3t"
|
| 20 |
SILICON_API_KEY="sk-vkswknrlhztbogulqjizbxpkdipbafudnirbrhzosxjkvmri"
|
|
|
|
| 21 |
|
| 22 |
LOG_RAW_API_PAYLOADS="true"
|
| 23 |
# Disable API key authentication (allow requests without auth)
|
|
|
|
| 18 |
#CEREBRAS_API_KEY="csk-2ewy2h26eeph4yex94kmjnfwwx35pdpyyxkv3j6wcj4cxc3t"
|
| 19 |
CEREBRAS_API_KEY="csk-2ewy2h26eeph4yex94kmjnfwwx35pdpyyxkv3j6wcj4cxc3t"
|
| 20 |
SILICON_API_KEY="sk-vkswknrlhztbogulqjizbxpkdipbafudnirbrhzosxjkvmri"
|
| 21 |
+
GROQ_API_KEY="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"
|
| 22 |
|
| 23 |
LOG_RAW_API_PAYLOADS="true"
|
| 24 |
# Disable API key authentication (allow requests without auth)
|
|
@@ -52,6 +52,10 @@ REQUESTED_PROVIDER_MODELS = [
|
|
| 52 |
"silicon/Qwen/Qwen3.5-27B",
|
| 53 |
"silicon/google/gemma-4-26B-A4B-it",
|
| 54 |
"silicon/google/gemma-4-31B-it",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
]
|
| 56 |
|
| 57 |
|
|
|
|
| 52 |
"silicon/Qwen/Qwen3.5-27B",
|
| 53 |
"silicon/google/gemma-4-26B-A4B-it",
|
| 54 |
"silicon/google/gemma-4-31B-it",
|
| 55 |
+
# Groq models (ultra fast inference)
|
| 56 |
+
"groq/llama-3.3-70b-versatile",
|
| 57 |
+
"groq/llama-3.1-8b-instant",
|
| 58 |
+
"groq/qwen3-32b",
|
| 59 |
]
|
| 60 |
|
| 61 |
|
|
@@ -16,6 +16,7 @@ NVIDIA_NIM_DEFAULT_BASE = "https://integrate.api.nvidia.com/v1"
|
|
| 16 |
ZEN_DEFAULT_BASE = "https://opencode.ai/zen"
|
| 17 |
CEREBRAS_DEFAULT_BASE = "https://api.cerebras.ai/v1"
|
| 18 |
SILICON_DEFAULT_BASE = "https://api.siliconflow.com/v1"
|
|
|
|
| 19 |
|
| 20 |
|
| 21 |
@dataclass(frozen=True, slots=True)
|
|
@@ -73,10 +74,19 @@ PROVIDER_CATALOG: dict[str, ProviderDescriptor] = {
|
|
| 73 |
default_base_url=SILICON_DEFAULT_BASE,
|
| 74 |
capabilities=("chat", "streaming", "tools", "thinking"),
|
| 75 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
}
|
| 77 |
|
| 78 |
# Order matches docs; must match PROVIDER_CATALOG keys.
|
| 79 |
-
SUPPORTED_PROVIDER_IDS: tuple[str, ...] = ("nvidia_nim", "zen", "cerebras", "silicon")
|
| 80 |
|
| 81 |
if len(set(SUPPORTED_PROVIDER_IDS)) != len(SUPPORTED_PROVIDER_IDS):
|
| 82 |
raise AssertionError("Duplicate provider ids in PROVIDER_CATALOG key order")
|
|
|
|
| 16 |
ZEN_DEFAULT_BASE = "https://opencode.ai/zen"
|
| 17 |
CEREBRAS_DEFAULT_BASE = "https://api.cerebras.ai/v1"
|
| 18 |
SILICON_DEFAULT_BASE = "https://api.siliconflow.com/v1"
|
| 19 |
+
GROQ_DEFAULT_BASE = "https://api.groq.com/openai/v1"
|
| 20 |
|
| 21 |
|
| 22 |
@dataclass(frozen=True, slots=True)
|
|
|
|
| 74 |
default_base_url=SILICON_DEFAULT_BASE,
|
| 75 |
capabilities=("chat", "streaming", "tools", "thinking"),
|
| 76 |
),
|
| 77 |
+
"groq": ProviderDescriptor(
|
| 78 |
+
provider_id="groq",
|
| 79 |
+
transport_type="openai_chat",
|
| 80 |
+
credential_env="GROQ_API_KEY",
|
| 81 |
+
credential_url="https://console.groq.com/keys",
|
| 82 |
+
credential_attr="groq_api_key",
|
| 83 |
+
default_base_url=GROQ_DEFAULT_BASE,
|
| 84 |
+
capabilities=("chat", "streaming", "tools", "thinking"),
|
| 85 |
+
),
|
| 86 |
}
|
| 87 |
|
| 88 |
# Order matches docs; must match PROVIDER_CATALOG keys.
|
| 89 |
+
SUPPORTED_PROVIDER_IDS: tuple[str, ...] = ("nvidia_nim", "zen", "cerebras", "silicon", "groq")
|
| 90 |
|
| 91 |
if len(set(SUPPORTED_PROVIDER_IDS)) != len(SUPPORTED_PROVIDER_IDS):
|
| 92 |
raise AssertionError("Duplicate provider ids in PROVIDER_CATALOG key order")
|
|
@@ -149,6 +149,8 @@ class Settings(BaseSettings):
|
|
| 149 |
cerebras_api_key: str = Field(default="", validation_alias="CEREBRAS_API_KEY")
|
| 150 |
# ==================== Silicon Flow Config ====================
|
| 151 |
silicon_api_key: str = Field(default="", validation_alias="SILICON_API_KEY")
|
|
|
|
|
|
|
| 152 |
zen_base_url: str = Field(
|
| 153 |
default="https://opencode.ai/zen", validation_alias="ZEN_BASE_URL"
|
| 154 |
)
|
|
@@ -543,6 +545,8 @@ class Settings(BaseSettings):
|
|
| 543 |
return bool(self.cerebras_api_key.strip())
|
| 544 |
if provider_id == "silicon":
|
| 545 |
return bool(self.silicon_api_key.strip())
|
|
|
|
|
|
|
| 546 |
# conservative default: assume not configured
|
| 547 |
return False
|
| 548 |
|
|
|
|
| 149 |
cerebras_api_key: str = Field(default="", validation_alias="CEREBRAS_API_KEY")
|
| 150 |
# ==================== Silicon Flow Config ====================
|
| 151 |
silicon_api_key: str = Field(default="", validation_alias="SILICON_API_KEY")
|
| 152 |
+
# ==================== Groq Config ====================
|
| 153 |
+
groq_api_key: str = Field(default="", validation_alias="GROQ_API_KEY")
|
| 154 |
zen_base_url: str = Field(
|
| 155 |
default="https://opencode.ai/zen", validation_alias="ZEN_BASE_URL"
|
| 156 |
)
|
|
|
|
| 545 |
return bool(self.cerebras_api_key.strip())
|
| 546 |
if provider_id == "silicon":
|
| 547 |
return bool(self.silicon_api_key.strip())
|
| 548 |
+
if provider_id == "groq":
|
| 549 |
+
return bool(self.groq_api_key.strip())
|
| 550 |
# conservative default: assume not configured
|
| 551 |
return False
|
| 552 |
|
|
@@ -228,6 +228,39 @@ MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {
|
|
| 228 |
speed="fast",
|
| 229 |
priority=76,
|
| 230 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
}
|
| 232 |
|
| 233 |
|
|
|
|
| 228 |
speed="fast",
|
| 229 |
priority=76,
|
| 230 |
),
|
| 231 |
+
# Groq models
|
| 232 |
+
"groq/llama-3.3-70b-versatile": ModelCapabilities(
|
| 233 |
+
provider_id="groq",
|
| 234 |
+
model_id="llama-3.3-70b-versatile",
|
| 235 |
+
model_ref="groq/llama-3.3-70b-versatile",
|
| 236 |
+
coding=True,
|
| 237 |
+
reasoning=True,
|
| 238 |
+
general_text=True,
|
| 239 |
+
max_tokens=32768,
|
| 240 |
+
speed="fast",
|
| 241 |
+
priority=85,
|
| 242 |
+
),
|
| 243 |
+
"groq/llama-3.1-8b-instant": ModelCapabilities(
|
| 244 |
+
provider_id="groq",
|
| 245 |
+
model_id="llama-3.1-8b-instant",
|
| 246 |
+
model_ref="groq/llama-3.1-8b-instant",
|
| 247 |
+
coding=True,
|
| 248 |
+
general_text=True,
|
| 249 |
+
max_tokens=131072,
|
| 250 |
+
speed="fast",
|
| 251 |
+
priority=90,
|
| 252 |
+
),
|
| 253 |
+
"groq/qwen3-32b": ModelCapabilities(
|
| 254 |
+
provider_id="groq",
|
| 255 |
+
model_id="qwen3-32b",
|
| 256 |
+
model_ref="groq/qwen3-32b",
|
| 257 |
+
coding=True,
|
| 258 |
+
reasoning=True,
|
| 259 |
+
general_text=True,
|
| 260 |
+
max_tokens=40960,
|
| 261 |
+
speed="medium",
|
| 262 |
+
priority=88,
|
| 263 |
+
),
|
| 264 |
}
|
| 265 |
|
| 266 |
|
|
@@ -2,6 +2,7 @@
|
|
| 2 |
|
| 3 |
from config.provider_catalog import (
|
| 4 |
CEREBRAS_DEFAULT_BASE,
|
|
|
|
| 5 |
NVIDIA_NIM_DEFAULT_BASE,
|
| 6 |
SILICON_DEFAULT_BASE,
|
| 7 |
ZEN_DEFAULT_BASE,
|
|
@@ -9,6 +10,7 @@ from config.provider_catalog import (
|
|
| 9 |
|
| 10 |
__all__ = (
|
| 11 |
"CEREBRAS_DEFAULT_BASE",
|
|
|
|
| 12 |
"NVIDIA_NIM_DEFAULT_BASE",
|
| 13 |
"SILICON_DEFAULT_BASE",
|
| 14 |
"ZEN_DEFAULT_BASE",
|
|
|
|
| 2 |
|
| 3 |
from config.provider_catalog import (
|
| 4 |
CEREBRAS_DEFAULT_BASE,
|
| 5 |
+
GROQ_DEFAULT_BASE,
|
| 6 |
NVIDIA_NIM_DEFAULT_BASE,
|
| 7 |
SILICON_DEFAULT_BASE,
|
| 8 |
ZEN_DEFAULT_BASE,
|
|
|
|
| 10 |
|
| 11 |
__all__ = (
|
| 12 |
"CEREBRAS_DEFAULT_BASE",
|
| 13 |
+
"GROQ_DEFAULT_BASE",
|
| 14 |
"NVIDIA_NIM_DEFAULT_BASE",
|
| 15 |
"SILICON_DEFAULT_BASE",
|
| 16 |
"ZEN_DEFAULT_BASE",
|
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Groq provider module."""
|
| 2 |
+
|
| 3 |
+
from providers.groq.client import GroqProvider
|
| 4 |
+
|
| 5 |
+
__all__ = ["GroqProvider"]
|
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Groq provider using OpenAI-compatible API."""
|
| 2 |
+
|
| 3 |
+
from typing import Any
|
| 4 |
+
|
| 5 |
+
from config.settings import Settings
|
| 6 |
+
from core.anthropic import ReasoningReplayMode, build_base_request_body
|
| 7 |
+
from providers.base import ProviderConfig
|
| 8 |
+
from providers.defaults import GROQ_DEFAULT_BASE
|
| 9 |
+
from providers.openai_compat import OpenAIChatTransport
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class GroqProvider(OpenAIChatTransport):
|
| 13 |
+
"""Groq provider using OpenAI-compatible /chat/completions."""
|
| 14 |
+
|
| 15 |
+
def __init__(self, config: ProviderConfig, *, settings: Settings):
|
| 16 |
+
base_url = (config.base_url or GROQ_DEFAULT_BASE).rstrip("/")
|
| 17 |
+
if not base_url.endswith("/v1"):
|
| 18 |
+
base_url = base_url + "/v1"
|
| 19 |
+
super().__init__(
|
| 20 |
+
config,
|
| 21 |
+
provider_name="Groq",
|
| 22 |
+
base_url=base_url,
|
| 23 |
+
api_key=config.api_key,
|
| 24 |
+
)
|
| 25 |
+
self._settings = settings
|
| 26 |
+
|
| 27 |
+
def _build_request_body(
|
| 28 |
+
self, request: Any, thinking_enabled: bool | None = None
|
| 29 |
+
) -> dict:
|
| 30 |
+
thinking = self._is_thinking_enabled(request, thinking_enabled)
|
| 31 |
+
reasoning_replay = (
|
| 32 |
+
ReasoningReplayMode.REASONING_CONTENT
|
| 33 |
+
if thinking
|
| 34 |
+
else ReasoningReplayMode.DISABLED
|
| 35 |
+
)
|
| 36 |
+
body = build_base_request_body(request, reasoning_replay=reasoning_replay)
|
| 37 |
+
# Strip groq/ prefix so the API gets the bare model ID
|
| 38 |
+
model = body.get("model", "")
|
| 39 |
+
if model.startswith("groq/"):
|
| 40 |
+
body["model"] = model[len("groq/") :]
|
| 41 |
+
return body
|
|
@@ -56,11 +56,18 @@ def _create_silicon(config: ProviderConfig, settings: Settings) -> BaseProvider:
|
|
| 56 |
return SiliconProvider(config, settings=settings)
|
| 57 |
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
PROVIDER_FACTORIES: dict[str, ProviderFactory] = {
|
| 60 |
"nvidia_nim": _create_nvidia_nim,
|
| 61 |
"zen": _create_zen,
|
| 62 |
"cerebras": _create_cerebras,
|
| 63 |
"silicon": _create_silicon,
|
|
|
|
| 64 |
}
|
| 65 |
|
| 66 |
if set(PROVIDER_DESCRIPTORS) != set(SUPPORTED_PROVIDER_IDS) or set(
|
|
|
|
| 56 |
return SiliconProvider(config, settings=settings)
|
| 57 |
|
| 58 |
|
| 59 |
+
def _create_groq(config: ProviderConfig, settings: Settings) -> BaseProvider:
|
| 60 |
+
from providers.groq import GroqProvider
|
| 61 |
+
|
| 62 |
+
return GroqProvider(config, settings=settings)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
PROVIDER_FACTORIES: dict[str, ProviderFactory] = {
|
| 66 |
"nvidia_nim": _create_nvidia_nim,
|
| 67 |
"zen": _create_zen,
|
| 68 |
"cerebras": _create_cerebras,
|
| 69 |
"silicon": _create_silicon,
|
| 70 |
+
"groq": _create_groq,
|
| 71 |
}
|
| 72 |
|
| 73 |
if set(PROVIDER_DESCRIPTORS) != set(SUPPORTED_PROVIDER_IDS) or set(
|