Spaces:
Running
Running
$(cat <<EOF
Browse filesFix model ID format for Cerebras and Silicon Flow providers.
Cerebras API expects bare model IDs (e.g. "qwen-3-235b-a22b-instruct-2507")
not the full "provider/model" format. Strip cerebras/ prefix before sending.
Silicon Flow API similarly expects bare model IDs. Strip silicon/ prefix.
Also updated REQUESTED_PROVIDER_MODELS and MODEL_CAPABILITIES to use
correct ref format (cerebras/qwen-3-235b-a22b-instruct-2507, etc.).
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
EOF
)
- api/routes.py +4 -4
- core/model_capabilities.py +6 -6
- providers/cerebras/client.py +18 -1
- providers/silicon/client.py +6 -1
api/routes.py
CHANGED
|
@@ -41,10 +41,10 @@ REQUESTED_PROVIDER_MODELS = [
|
|
| 41 |
"nvidia_nim/z-ai/glm4.7",
|
| 42 |
"nvidia_nim/bytedance/seed-oss-36b-instruct",
|
| 43 |
"nvidia_nim/mistralai/mistral-nemotron",
|
| 44 |
-
# Cerebras models
|
| 45 |
-
"cerebras/qwen
|
| 46 |
-
"cerebras/
|
| 47 |
-
# Silicon Flow models
|
| 48 |
"silicon/Qwen/Qwen3.6-35B-A3B",
|
| 49 |
"silicon/Qwen/Qwen3.6-27B",
|
| 50 |
"silicon/Qwen/Qwen3.5-35B-A3B",
|
|
|
|
| 41 |
"nvidia_nim/z-ai/glm4.7",
|
| 42 |
"nvidia_nim/bytedance/seed-oss-36b-instruct",
|
| 43 |
"nvidia_nim/mistralai/mistral-nemotron",
|
| 44 |
+
# Cerebras models (uses bare model IDs on the API)
|
| 45 |
+
"cerebras/qwen-3-235b-a22b-instruct-2507",
|
| 46 |
+
"cerebras/zai-glm-4.7",
|
| 47 |
+
# Silicon Flow models (uses bare model IDs on the API)
|
| 48 |
"silicon/Qwen/Qwen3.6-35B-A3B",
|
| 49 |
"silicon/Qwen/Qwen3.6-27B",
|
| 50 |
"silicon/Qwen/Qwen3.5-35B-A3B",
|
core/model_capabilities.py
CHANGED
|
@@ -132,10 +132,10 @@ MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {
|
|
| 132 |
priority=60,
|
| 133 |
),
|
| 134 |
# Cerebras models
|
| 135 |
-
"cerebras/qwen
|
| 136 |
provider_id="cerebras",
|
| 137 |
-
model_id="qwen
|
| 138 |
-
model_ref="cerebras/qwen
|
| 139 |
coding=True,
|
| 140 |
reasoning=True,
|
| 141 |
general_text=True,
|
|
@@ -143,10 +143,10 @@ MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {
|
|
| 143 |
speed="slow",
|
| 144 |
priority=85,
|
| 145 |
),
|
| 146 |
-
"cerebras/
|
| 147 |
provider_id="cerebras",
|
| 148 |
-
model_id="
|
| 149 |
-
model_ref="cerebras/
|
| 150 |
coding=True,
|
| 151 |
reasoning=True,
|
| 152 |
general_text=True,
|
|
|
|
| 132 |
priority=60,
|
| 133 |
),
|
| 134 |
# Cerebras models
|
| 135 |
+
"cerebras/qwen-3-235b-a22b-instruct-2507": ModelCapabilities(
|
| 136 |
provider_id="cerebras",
|
| 137 |
+
model_id="qwen-3-235b-a22b-instruct-2507",
|
| 138 |
+
model_ref="cerebras/qwen-3-235b-a22b-instruct-2507",
|
| 139 |
coding=True,
|
| 140 |
reasoning=True,
|
| 141 |
general_text=True,
|
|
|
|
| 143 |
speed="slow",
|
| 144 |
priority=85,
|
| 145 |
),
|
| 146 |
+
"cerebras/zai-glm-4.7": ModelCapabilities(
|
| 147 |
provider_id="cerebras",
|
| 148 |
+
model_id="zai-glm-4.7",
|
| 149 |
+
model_ref="cerebras/zai-glm-4.7",
|
| 150 |
coding=True,
|
| 151 |
reasoning=True,
|
| 152 |
general_text=True,
|
providers/cerebras/client.py
CHANGED
|
@@ -12,6 +12,16 @@ from providers.openai_compat import OpenAIChatTransport
|
|
| 12 |
class CerebrasProvider(OpenAIChatTransport):
|
| 13 |
"""Cerebras provider using OpenAI-compatible /chat/completions."""
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
def __init__(self, config: ProviderConfig, *, settings: Settings):
|
| 16 |
base_url = (config.base_url or CEREBRAS_DEFAULT_BASE).rstrip("/")
|
| 17 |
if not base_url.endswith("/v1"):
|
|
@@ -33,4 +43,11 @@ class CerebrasProvider(OpenAIChatTransport):
|
|
| 33 |
if thinking
|
| 34 |
else ReasoningReplayMode.DISABLED
|
| 35 |
)
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
class CerebrasProvider(OpenAIChatTransport):
|
| 13 |
"""Cerebras provider using OpenAI-compatible /chat/completions."""
|
| 14 |
|
| 15 |
+
# Mapping of proxy model refs to Cerebras API model IDs.
|
| 16 |
+
# The proxy uses full refs like "cerebras/qwen-3-235b-a22b-instruct-2507"
|
| 17 |
+
# but Cerebras API expects bare model IDs like "qwen-3-235b-a22b-instruct-2507".
|
| 18 |
+
CEREBRAS_MODEL_MAP: dict[str, str] = {
|
| 19 |
+
"qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
|
| 20 |
+
"zai-glm-4.7": "zai-glm-4.7",
|
| 21 |
+
"cerebras/qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
|
| 22 |
+
"cerebras/z-ai/glm4.7": "zai-glm-4.7",
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
def __init__(self, config: ProviderConfig, *, settings: Settings):
|
| 26 |
base_url = (config.base_url or CEREBRAS_DEFAULT_BASE).rstrip("/")
|
| 27 |
if not base_url.endswith("/v1"):
|
|
|
|
| 43 |
if thinking
|
| 44 |
else ReasoningReplayMode.DISABLED
|
| 45 |
)
|
| 46 |
+
body = build_base_request_body(request, reasoning_replay=reasoning_replay)
|
| 47 |
+
# Strip cerebras/ prefix so the API gets the bare model ID
|
| 48 |
+
model = body.get("model", "")
|
| 49 |
+
if model in self.CEREBRAS_MODEL_MAP:
|
| 50 |
+
body["model"] = self.CEREBRAS_MODEL_MAP[model]
|
| 51 |
+
elif model.startswith("cerebras/"):
|
| 52 |
+
body["model"] = model[len("cerebras/") :]
|
| 53 |
+
return body
|
providers/silicon/client.py
CHANGED
|
@@ -33,4 +33,9 @@ class SiliconProvider(OpenAIChatTransport):
|
|
| 33 |
if thinking
|
| 34 |
else ReasoningReplayMode.DISABLED
|
| 35 |
)
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
if thinking
|
| 34 |
else ReasoningReplayMode.DISABLED
|
| 35 |
)
|
| 36 |
+
body = build_base_request_body(request, reasoning_replay=reasoning_replay)
|
| 37 |
+
# Strip silicon/ prefix so the API gets the bare model ID
|
| 38 |
+
model = body.get("model", "")
|
| 39 |
+
if model.startswith("silicon/"):
|
| 40 |
+
body["model"] = model[len("silicon/") :]
|
| 41 |
+
return body
|