Yash030 commited on
Commit
58a3721
·
1 Parent(s): 0223890

$(cat <<EOF

Browse files

Fix model ID format for Cerebras and Silicon Flow providers.

Cerebras API expects bare model IDs (e.g. "qwen-3-235b-a22b-instruct-2507")
not the full "provider/model" format. Strip cerebras/ prefix before sending.

Silicon Flow API similarly expects bare model IDs. Strip silicon/ prefix.

Also updated REQUESTED_PROVIDER_MODELS and MODEL_CAPABILITIES to use
correct ref format (cerebras/qwen-3-235b-a22b-instruct-2507, etc.).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
EOF
)

api/routes.py CHANGED
@@ -41,10 +41,10 @@ REQUESTED_PROVIDER_MODELS = [
41
  "nvidia_nim/z-ai/glm4.7",
42
  "nvidia_nim/bytedance/seed-oss-36b-instruct",
43
  "nvidia_nim/mistralai/mistral-nemotron",
44
- # Cerebras models
45
- "cerebras/qwen/qwen-3-235b-a22b-instruct-2507",
46
- "cerebras/z-ai/glm4.7",
47
- # Silicon Flow models
48
  "silicon/Qwen/Qwen3.6-35B-A3B",
49
  "silicon/Qwen/Qwen3.6-27B",
50
  "silicon/Qwen/Qwen3.5-35B-A3B",
 
41
  "nvidia_nim/z-ai/glm4.7",
42
  "nvidia_nim/bytedance/seed-oss-36b-instruct",
43
  "nvidia_nim/mistralai/mistral-nemotron",
44
+ # Cerebras models (uses bare model IDs on the API)
45
+ "cerebras/qwen-3-235b-a22b-instruct-2507",
46
+ "cerebras/zai-glm-4.7",
47
+ # Silicon Flow models (uses bare model IDs on the API)
48
  "silicon/Qwen/Qwen3.6-35B-A3B",
49
  "silicon/Qwen/Qwen3.6-27B",
50
  "silicon/Qwen/Qwen3.5-35B-A3B",
core/model_capabilities.py CHANGED
@@ -132,10 +132,10 @@ MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {
132
  priority=60,
133
  ),
134
  # Cerebras models
135
- "cerebras/qwen/qwen-3-235b-a22b-instruct-2507": ModelCapabilities(
136
  provider_id="cerebras",
137
- model_id="qwen/qwen-3-235b-a22b-instruct-2507",
138
- model_ref="cerebras/qwen/qwen-3-235b-a22b-instruct-2507",
139
  coding=True,
140
  reasoning=True,
141
  general_text=True,
@@ -143,10 +143,10 @@ MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {
143
  speed="slow",
144
  priority=85,
145
  ),
146
- "cerebras/z-ai/glm4.7": ModelCapabilities(
147
  provider_id="cerebras",
148
- model_id="z-ai/glm4.7",
149
- model_ref="cerebras/z-ai/glm4.7",
150
  coding=True,
151
  reasoning=True,
152
  general_text=True,
 
132
  priority=60,
133
  ),
134
  # Cerebras models
135
+ "cerebras/qwen-3-235b-a22b-instruct-2507": ModelCapabilities(
136
  provider_id="cerebras",
137
+ model_id="qwen-3-235b-a22b-instruct-2507",
138
+ model_ref="cerebras/qwen-3-235b-a22b-instruct-2507",
139
  coding=True,
140
  reasoning=True,
141
  general_text=True,
 
143
  speed="slow",
144
  priority=85,
145
  ),
146
+ "cerebras/zai-glm-4.7": ModelCapabilities(
147
  provider_id="cerebras",
148
+ model_id="zai-glm-4.7",
149
+ model_ref="cerebras/zai-glm-4.7",
150
  coding=True,
151
  reasoning=True,
152
  general_text=True,
providers/cerebras/client.py CHANGED
@@ -12,6 +12,16 @@ from providers.openai_compat import OpenAIChatTransport
12
  class CerebrasProvider(OpenAIChatTransport):
13
  """Cerebras provider using OpenAI-compatible /chat/completions."""
14
 
 
 
 
 
 
 
 
 
 
 
15
  def __init__(self, config: ProviderConfig, *, settings: Settings):
16
  base_url = (config.base_url or CEREBRAS_DEFAULT_BASE).rstrip("/")
17
  if not base_url.endswith("/v1"):
@@ -33,4 +43,11 @@ class CerebrasProvider(OpenAIChatTransport):
33
  if thinking
34
  else ReasoningReplayMode.DISABLED
35
  )
36
- return build_base_request_body(request, reasoning_replay=reasoning_replay)
 
 
 
 
 
 
 
 
12
  class CerebrasProvider(OpenAIChatTransport):
13
  """Cerebras provider using OpenAI-compatible /chat/completions."""
14
 
15
+ # Mapping of proxy model refs to Cerebras API model IDs.
16
+ # The proxy uses full refs like "cerebras/qwen-3-235b-a22b-instruct-2507"
17
+ # but Cerebras API expects bare model IDs like "qwen-3-235b-a22b-instruct-2507".
18
+ CEREBRAS_MODEL_MAP: dict[str, str] = {
19
+ "qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
20
+ "zai-glm-4.7": "zai-glm-4.7",
21
+ "cerebras/qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
22
+ "cerebras/z-ai/glm4.7": "zai-glm-4.7",
23
+ }
24
+
25
  def __init__(self, config: ProviderConfig, *, settings: Settings):
26
  base_url = (config.base_url or CEREBRAS_DEFAULT_BASE).rstrip("/")
27
  if not base_url.endswith("/v1"):
 
43
  if thinking
44
  else ReasoningReplayMode.DISABLED
45
  )
46
+ body = build_base_request_body(request, reasoning_replay=reasoning_replay)
47
+ # Strip cerebras/ prefix so the API gets the bare model ID
48
+ model = body.get("model", "")
49
+ if model in self.CEREBRAS_MODEL_MAP:
50
+ body["model"] = self.CEREBRAS_MODEL_MAP[model]
51
+ elif model.startswith("cerebras/"):
52
+ body["model"] = model[len("cerebras/") :]
53
+ return body
providers/silicon/client.py CHANGED
@@ -33,4 +33,9 @@ class SiliconProvider(OpenAIChatTransport):
33
  if thinking
34
  else ReasoningReplayMode.DISABLED
35
  )
36
- return build_base_request_body(request, reasoning_replay=reasoning_replay)
 
 
 
 
 
 
33
  if thinking
34
  else ReasoningReplayMode.DISABLED
35
  )
36
+ body = build_base_request_body(request, reasoning_replay=reasoning_replay)
37
+ # Strip silicon/ prefix so the API gets the bare model ID
38
+ model = body.get("model", "")
39
+ if model.startswith("silicon/"):
40
+ body["model"] = model[len("silicon/") :]
41
+ return body