Yash030 commited on
Commit
332dd16
·
1 Parent(s): 98fdd46

$(cat <<EOF

Browse files

Fix Groq and Cerebras model IDs and rate limits.

Groq: Model IDs need inner prefixes (qwen/qwen3-32b not qwen3-32b)
Cerebras: Removed inaccessible models, set higher rate limits
Silicon: Increased rate limits to 300/min

All providers now have appropriate concurrency and rate limit settings.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
EOF
)

providers/cerebras/client.py CHANGED
@@ -13,27 +13,23 @@ class CerebrasProvider(OpenAIChatTransport):
13
  """Cerebras provider using OpenAI-compatible /chat/completions."""
14
 
15
  # Mapping of proxy model refs to Cerebras API model IDs.
16
- # The proxy uses full refs like "cerebras/qwen-3-235b-a22b-instruct-2507"
17
- # but Cerebras API expects bare model IDs like "qwen-3-235b-a22b-instruct-2507".
18
  CEREBRAS_MODEL_MAP: dict[str, str] = {
19
- "llama3.1-8b": "llama3.1-8b",
20
- "qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
21
- "zai-glm-4.7": "zai-glm-4.7",
22
- "gpt-oss-120b": "gpt-oss-120b",
23
  "cerebras/llama3.1-8b": "llama3.1-8b",
24
  "cerebras/qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
25
- "cerebras/z-ai/glm4.7": "zai-glm-4.7",
26
  }
27
 
28
  def __init__(self, config: ProviderConfig, *, settings: Settings):
29
  base_url = (config.base_url or CEREBRAS_DEFAULT_BASE).rstrip("/")
30
  if not base_url.endswith("/v1"):
31
  base_url = base_url + "/v1"
 
32
  super().__init__(
33
  config,
34
  provider_name="Cerebras",
35
  base_url=base_url,
36
  api_key=config.api_key,
 
 
37
  )
38
  self._settings = settings
39
 
@@ -47,10 +43,9 @@ class CerebrasProvider(OpenAIChatTransport):
47
  else ReasoningReplayMode.DISABLED
48
  )
49
  body = build_base_request_body(request, reasoning_replay=reasoning_replay)
50
- # Strip cerebras/ prefix so the API gets the bare model ID
51
  model = body.get("model", "")
52
  if model in self.CEREBRAS_MODEL_MAP:
53
  body["model"] = self.CEREBRAS_MODEL_MAP[model]
54
  elif model.startswith("cerebras/"):
55
- body["model"] = model[len("cerebras/") :]
56
  return body
 
13
  """Cerebras provider using OpenAI-compatible /chat/completions."""
14
 
15
  # Mapping of proxy model refs to Cerebras API model IDs.
 
 
16
  CEREBRAS_MODEL_MAP: dict[str, str] = {
 
 
 
 
17
  "cerebras/llama3.1-8b": "llama3.1-8b",
18
  "cerebras/qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
 
19
  }
20
 
21
  def __init__(self, config: ProviderConfig, *, settings: Settings):
22
  base_url = (config.base_url or CEREBRAS_DEFAULT_BASE).rstrip("/")
23
  if not base_url.endswith("/v1"):
24
  base_url = base_url + "/v1"
25
+ # Cerebras has generous rate limits
26
  super().__init__(
27
  config,
28
  provider_name="Cerebras",
29
  base_url=base_url,
30
  api_key=config.api_key,
31
+ nim_rate_limit=300,
32
+ nim_max_concurrency=80,
33
  )
34
  self._settings = settings
35
 
 
43
  else ReasoningReplayMode.DISABLED
44
  )
45
  body = build_base_request_body(request, reasoning_replay=reasoning_replay)
 
46
  model = body.get("model", "")
47
  if model in self.CEREBRAS_MODEL_MAP:
48
  body["model"] = self.CEREBRAS_MODEL_MAP[model]
49
  elif model.startswith("cerebras/"):
50
+ body["model"] = model[len("cerebras/"):]
51
  return body
providers/groq/client.py CHANGED
@@ -12,15 +12,26 @@ from providers.openai_compat import OpenAIChatTransport
12
  class GroqProvider(OpenAIChatTransport):
13
  """Groq provider using OpenAI-compatible /chat/completions."""
14
 
 
 
 
 
 
 
 
 
15
  def __init__(self, config: ProviderConfig, *, settings: Settings):
16
  base_url = (config.base_url or GROQ_DEFAULT_BASE).rstrip("/")
17
  if not base_url.endswith("/v1"):
18
  base_url = base_url + "/v1"
 
19
  super().__init__(
20
  config,
21
  provider_name="Groq",
22
  base_url=base_url,
23
  api_key=config.api_key,
 
 
24
  )
25
  self._settings = settings
26
 
@@ -34,8 +45,10 @@ class GroqProvider(OpenAIChatTransport):
34
  else ReasoningReplayMode.DISABLED
35
  )
36
  body = build_base_request_body(request, reasoning_replay=reasoning_replay)
37
- # Strip groq/ prefix so the API gets the bare model ID
38
  model = body.get("model", "")
39
- if model.startswith("groq/"):
40
- body["model"] = model[len("groq/") :]
 
 
41
  return body
 
12
  class GroqProvider(OpenAIChatTransport):
13
  """Groq provider using OpenAI-compatible /chat/completions."""
14
 
15
+ # Mapping of proxy model refs to Groq API model IDs.
16
+ # groq/ prefix is stripped, but the inner prefix (like qwen/) is kept.
17
+ GROQ_MODEL_MAP: dict[str, str] = {
18
+ "groq/qwen3-32b": "qwen/qwen3-32b",
19
+ "groq/llama-3.3-70b-versatile": "llama-3.3-70b-versatile",
20
+ "groq/llama-3.1-8b-instant": "llama-3.1-8b-instant",
21
+ }
22
+
23
  def __init__(self, config: ProviderConfig, *, settings: Settings):
24
  base_url = (config.base_url or GROQ_DEFAULT_BASE).rstrip("/")
25
  if not base_url.endswith("/v1"):
26
  base_url = base_url + "/v1"
27
+ # Groq has generous rate limits - set high limits with no adaptive throttling
28
  super().__init__(
29
  config,
30
  provider_name="Groq",
31
  base_url=base_url,
32
  api_key=config.api_key,
33
+ nim_rate_limit=500, # High limit for Groq
34
+ nim_max_concurrency=100, # High concurrency for Groq
35
  )
36
  self._settings = settings
37
 
 
45
  else ReasoningReplayMode.DISABLED
46
  )
47
  body = build_base_request_body(request, reasoning_replay=reasoning_replay)
48
+ # Map proxy model ref to actual Groq API model ID
49
  model = body.get("model", "")
50
+ if model in self.GROQ_MODEL_MAP:
51
+ body["model"] = self.GROQ_MODEL_MAP[model]
52
+ elif model.startswith("groq/"):
53
+ body["model"] = model[len("groq/"):]
54
  return body
providers/silicon/client.py CHANGED
@@ -16,11 +16,14 @@ class SiliconProvider(OpenAIChatTransport):
16
  base_url = (config.base_url or SILICON_DEFAULT_BASE).rstrip("/")
17
  if not base_url.endswith("/v1"):
18
  base_url = base_url + "/v1"
 
19
  super().__init__(
20
  config,
21
  provider_name="Silicon",
22
  base_url=base_url,
23
  api_key=config.api_key,
 
 
24
  )
25
  self._settings = settings
26
 
 
16
  base_url = (config.base_url or SILICON_DEFAULT_BASE).rstrip("/")
17
  if not base_url.endswith("/v1"):
18
  base_url = base_url + "/v1"
19
+ # Silicon Flow has generous rate limits
20
  super().__init__(
21
  config,
22
  provider_name="Silicon",
23
  base_url=base_url,
24
  api_key=config.api_key,
25
+ nim_rate_limit=300,
26
+ nim_max_concurrency=80,
27
  )
28
  self._settings = settings
29