Yash030 commited on
Commit
98fdd46
·
1 Parent(s): 55f294b

$(cat <<EOF

Browse files

Add Groq provider support with ultra-fast inference models.

Groq API key validated and working.
Models added:
- groq/llama-3.3-70b-versatile (fast, 70B reasoning)
- groq/llama-3.1-8b-instant (fastest, 8B general)
- groq/qwen3-32b (fast, reasoning capable)

Files modified:
- config/provider_catalog.py: Added GROQ_DEFAULT_BASE and groq provider descriptor
- providers/defaults.py: Re-export GROQ_DEFAULT_BASE
- config/settings.py: Added groq_api_key and provider_is_configured check
- providers/registry.py: Added _create_groq factory
- providers/groq/client.py: New provider client
- providers/groq/__init__.py: New provider init
- core/model_capabilities.py: Registered groq models
- api/routes.py: Added groq models to REQUESTED_PROVIDER_MODELS
- .env: Added GROQ_API_KEY

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
EOF
)

.env CHANGED
@@ -18,6 +18,7 @@ NVIDIA_NIM_FALLBACK_MODELS="nvidia_nim/qwen/qwen3-coder-480b-a35b-instruct,nvidi
18
  #CEREBRAS_API_KEY="csk-2ewy2h26eeph4yex94kmjnfwwx35pdpyyxkv3j6wcj4cxc3t"
19
  CEREBRAS_API_KEY="csk-2ewy2h26eeph4yex94kmjnfwwx35pdpyyxkv3j6wcj4cxc3t"
20
  SILICON_API_KEY="sk-vkswknrlhztbogulqjizbxpkdipbafudnirbrhzosxjkvmri"
 
21
 
22
  LOG_RAW_API_PAYLOADS="true"
23
  # Disable API key authentication (allow requests without auth)
 
18
  #CEREBRAS_API_KEY="csk-2ewy2h26eeph4yex94kmjnfwwx35pdpyyxkv3j6wcj4cxc3t"
19
  CEREBRAS_API_KEY="csk-2ewy2h26eeph4yex94kmjnfwwx35pdpyyxkv3j6wcj4cxc3t"
20
  SILICON_API_KEY="sk-vkswknrlhztbogulqjizbxpkdipbafudnirbrhzosxjkvmri"
21
+ GROQ_API_KEY="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"
22
 
23
  LOG_RAW_API_PAYLOADS="true"
24
  # Disable API key authentication (allow requests without auth)
api/routes.py CHANGED
@@ -52,6 +52,10 @@ REQUESTED_PROVIDER_MODELS = [
52
  "silicon/Qwen/Qwen3.5-27B",
53
  "silicon/google/gemma-4-26B-A4B-it",
54
  "silicon/google/gemma-4-31B-it",
 
 
 
 
55
  ]
56
 
57
 
 
52
  "silicon/Qwen/Qwen3.5-27B",
53
  "silicon/google/gemma-4-26B-A4B-it",
54
  "silicon/google/gemma-4-31B-it",
55
+ # Groq models (ultra fast inference)
56
+ "groq/llama-3.3-70b-versatile",
57
+ "groq/llama-3.1-8b-instant",
58
+ "groq/qwen3-32b",
59
  ]
60
 
61
 
config/provider_catalog.py CHANGED
@@ -16,6 +16,7 @@ NVIDIA_NIM_DEFAULT_BASE = "https://integrate.api.nvidia.com/v1"
16
  ZEN_DEFAULT_BASE = "https://opencode.ai/zen"
17
  CEREBRAS_DEFAULT_BASE = "https://api.cerebras.ai/v1"
18
  SILICON_DEFAULT_BASE = "https://api.siliconflow.com/v1"
 
19
 
20
 
21
  @dataclass(frozen=True, slots=True)
@@ -73,10 +74,19 @@ PROVIDER_CATALOG: dict[str, ProviderDescriptor] = {
73
  default_base_url=SILICON_DEFAULT_BASE,
74
  capabilities=("chat", "streaming", "tools", "thinking"),
75
  ),
 
 
 
 
 
 
 
 
 
76
  }
77
 
78
  # Order matches docs; must match PROVIDER_CATALOG keys.
79
- SUPPORTED_PROVIDER_IDS: tuple[str, ...] = ("nvidia_nim", "zen", "cerebras", "silicon")
80
 
81
  if len(set(SUPPORTED_PROVIDER_IDS)) != len(SUPPORTED_PROVIDER_IDS):
82
  raise AssertionError("Duplicate provider ids in PROVIDER_CATALOG key order")
 
16
  ZEN_DEFAULT_BASE = "https://opencode.ai/zen"
17
  CEREBRAS_DEFAULT_BASE = "https://api.cerebras.ai/v1"
18
  SILICON_DEFAULT_BASE = "https://api.siliconflow.com/v1"
19
+ GROQ_DEFAULT_BASE = "https://api.groq.com/openai/v1"
20
 
21
 
22
  @dataclass(frozen=True, slots=True)
 
74
  default_base_url=SILICON_DEFAULT_BASE,
75
  capabilities=("chat", "streaming", "tools", "thinking"),
76
  ),
77
+ "groq": ProviderDescriptor(
78
+ provider_id="groq",
79
+ transport_type="openai_chat",
80
+ credential_env="GROQ_API_KEY",
81
+ credential_url="https://console.groq.com/keys",
82
+ credential_attr="groq_api_key",
83
+ default_base_url=GROQ_DEFAULT_BASE,
84
+ capabilities=("chat", "streaming", "tools", "thinking"),
85
+ ),
86
  }
87
 
88
  # Order matches docs; must match PROVIDER_CATALOG keys.
89
+ SUPPORTED_PROVIDER_IDS: tuple[str, ...] = ("nvidia_nim", "zen", "cerebras", "silicon", "groq")
90
 
91
  if len(set(SUPPORTED_PROVIDER_IDS)) != len(SUPPORTED_PROVIDER_IDS):
92
  raise AssertionError("Duplicate provider ids in PROVIDER_CATALOG key order")
config/settings.py CHANGED
@@ -149,6 +149,8 @@ class Settings(BaseSettings):
149
  cerebras_api_key: str = Field(default="", validation_alias="CEREBRAS_API_KEY")
150
  # ==================== Silicon Flow Config ====================
151
  silicon_api_key: str = Field(default="", validation_alias="SILICON_API_KEY")
 
 
152
  zen_base_url: str = Field(
153
  default="https://opencode.ai/zen", validation_alias="ZEN_BASE_URL"
154
  )
@@ -543,6 +545,8 @@ class Settings(BaseSettings):
543
  return bool(self.cerebras_api_key.strip())
544
  if provider_id == "silicon":
545
  return bool(self.silicon_api_key.strip())
 
 
546
  # conservative default: assume not configured
547
  return False
548
 
 
149
  cerebras_api_key: str = Field(default="", validation_alias="CEREBRAS_API_KEY")
150
  # ==================== Silicon Flow Config ====================
151
  silicon_api_key: str = Field(default="", validation_alias="SILICON_API_KEY")
152
+ # ==================== Groq Config ====================
153
+ groq_api_key: str = Field(default="", validation_alias="GROQ_API_KEY")
154
  zen_base_url: str = Field(
155
  default="https://opencode.ai/zen", validation_alias="ZEN_BASE_URL"
156
  )
 
545
  return bool(self.cerebras_api_key.strip())
546
  if provider_id == "silicon":
547
  return bool(self.silicon_api_key.strip())
548
+ if provider_id == "groq":
549
+ return bool(self.groq_api_key.strip())
550
  # conservative default: assume not configured
551
  return False
552
 
core/model_capabilities.py CHANGED
@@ -228,6 +228,39 @@ MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {
228
  speed="fast",
229
  priority=76,
230
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  }
232
 
233
 
 
228
  speed="fast",
229
  priority=76,
230
  ),
231
+ # Groq models
232
+ "groq/llama-3.3-70b-versatile": ModelCapabilities(
233
+ provider_id="groq",
234
+ model_id="llama-3.3-70b-versatile",
235
+ model_ref="groq/llama-3.3-70b-versatile",
236
+ coding=True,
237
+ reasoning=True,
238
+ general_text=True,
239
+ max_tokens=32768,
240
+ speed="fast",
241
+ priority=85,
242
+ ),
243
+ "groq/llama-3.1-8b-instant": ModelCapabilities(
244
+ provider_id="groq",
245
+ model_id="llama-3.1-8b-instant",
246
+ model_ref="groq/llama-3.1-8b-instant",
247
+ coding=True,
248
+ general_text=True,
249
+ max_tokens=131072,
250
+ speed="fast",
251
+ priority=90,
252
+ ),
253
+ "groq/qwen3-32b": ModelCapabilities(
254
+ provider_id="groq",
255
+ model_id="qwen3-32b",
256
+ model_ref="groq/qwen3-32b",
257
+ coding=True,
258
+ reasoning=True,
259
+ general_text=True,
260
+ max_tokens=40960,
261
+ speed="medium",
262
+ priority=88,
263
+ ),
264
  }
265
 
266
 
providers/defaults.py CHANGED
@@ -2,6 +2,7 @@
2
 
3
  from config.provider_catalog import (
4
  CEREBRAS_DEFAULT_BASE,
 
5
  NVIDIA_NIM_DEFAULT_BASE,
6
  SILICON_DEFAULT_BASE,
7
  ZEN_DEFAULT_BASE,
@@ -9,6 +10,7 @@ from config.provider_catalog import (
9
 
10
  __all__ = (
11
  "CEREBRAS_DEFAULT_BASE",
 
12
  "NVIDIA_NIM_DEFAULT_BASE",
13
  "SILICON_DEFAULT_BASE",
14
  "ZEN_DEFAULT_BASE",
 
2
 
3
  from config.provider_catalog import (
4
  CEREBRAS_DEFAULT_BASE,
5
+ GROQ_DEFAULT_BASE,
6
  NVIDIA_NIM_DEFAULT_BASE,
7
  SILICON_DEFAULT_BASE,
8
  ZEN_DEFAULT_BASE,
 
10
 
11
  __all__ = (
12
  "CEREBRAS_DEFAULT_BASE",
13
+ "GROQ_DEFAULT_BASE",
14
  "NVIDIA_NIM_DEFAULT_BASE",
15
  "SILICON_DEFAULT_BASE",
16
  "ZEN_DEFAULT_BASE",
providers/groq/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ """Groq provider module."""
2
+
3
+ from providers.groq.client import GroqProvider
4
+
5
+ __all__ = ["GroqProvider"]
providers/groq/client.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Groq provider using OpenAI-compatible API."""
2
+
3
+ from typing import Any
4
+
5
+ from config.settings import Settings
6
+ from core.anthropic import ReasoningReplayMode, build_base_request_body
7
+ from providers.base import ProviderConfig
8
+ from providers.defaults import GROQ_DEFAULT_BASE
9
+ from providers.openai_compat import OpenAIChatTransport
10
+
11
+
12
+ class GroqProvider(OpenAIChatTransport):
13
+ """Groq provider using OpenAI-compatible /chat/completions."""
14
+
15
+ def __init__(self, config: ProviderConfig, *, settings: Settings):
16
+ base_url = (config.base_url or GROQ_DEFAULT_BASE).rstrip("/")
17
+ if not base_url.endswith("/v1"):
18
+ base_url = base_url + "/v1"
19
+ super().__init__(
20
+ config,
21
+ provider_name="Groq",
22
+ base_url=base_url,
23
+ api_key=config.api_key,
24
+ )
25
+ self._settings = settings
26
+
27
+ def _build_request_body(
28
+ self, request: Any, thinking_enabled: bool | None = None
29
+ ) -> dict:
30
+ thinking = self._is_thinking_enabled(request, thinking_enabled)
31
+ reasoning_replay = (
32
+ ReasoningReplayMode.REASONING_CONTENT
33
+ if thinking
34
+ else ReasoningReplayMode.DISABLED
35
+ )
36
+ body = build_base_request_body(request, reasoning_replay=reasoning_replay)
37
+ # Strip groq/ prefix so the API gets the bare model ID
38
+ model = body.get("model", "")
39
+ if model.startswith("groq/"):
40
+ body["model"] = model[len("groq/") :]
41
+ return body
providers/registry.py CHANGED
@@ -56,11 +56,18 @@ def _create_silicon(config: ProviderConfig, settings: Settings) -> BaseProvider:
56
  return SiliconProvider(config, settings=settings)
57
 
58
 
 
 
 
 
 
 
59
  PROVIDER_FACTORIES: dict[str, ProviderFactory] = {
60
  "nvidia_nim": _create_nvidia_nim,
61
  "zen": _create_zen,
62
  "cerebras": _create_cerebras,
63
  "silicon": _create_silicon,
 
64
  }
65
 
66
  if set(PROVIDER_DESCRIPTORS) != set(SUPPORTED_PROVIDER_IDS) or set(
 
56
  return SiliconProvider(config, settings=settings)
57
 
58
 
59
+ def _create_groq(config: ProviderConfig, settings: Settings) -> BaseProvider:
60
+ from providers.groq import GroqProvider
61
+
62
+ return GroqProvider(config, settings=settings)
63
+
64
+
65
  PROVIDER_FACTORIES: dict[str, ProviderFactory] = {
66
  "nvidia_nim": _create_nvidia_nim,
67
  "zen": _create_zen,
68
  "cerebras": _create_cerebras,
69
  "silicon": _create_silicon,
70
+ "groq": _create_groq,
71
  }
72
 
73
  if set(PROVIDER_DESCRIPTORS) != set(SUPPORTED_PROVIDER_IDS) or set(