Spaces:
Running
Running
$(cat <<EOF
Browse filesUpdate Cerebras model list based on API test results.
Cerebras key only has access to llama3.1-8b currently.
qwen-3-235b-a22b-instruct-2507 exists but is rate-limited.
zai-glm-4.7 and gpt-oss-120b are not accessible with this key.
Also added all models to CEREBRAS_MODEL_MAP for flexibility.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
EOF
)
- .claude/settings.local.json +2 -1
- api/routes.py +4 -3
- core/anthropic/conversion.py +12 -20
- core/model_capabilities.py +9 -18
- providers/cerebras/client.py +3 -0
- test_cerebras_api.py +64 -0
- test_silicon_api.py +63 -0
.claude/settings.local.json
CHANGED
|
@@ -18,7 +18,8 @@
|
|
| 18 |
"mcp__context7__resolve-library-id",
|
| 19 |
"mcp__context7__query-docs",
|
| 20 |
"Bash(git remote *)",
|
| 21 |
-
"Bash(python *)"
|
|
|
|
| 22 |
]
|
| 23 |
},
|
| 24 |
"enableAllProjectMcpServers": true,
|
|
|
|
| 18 |
"mcp__context7__resolve-library-id",
|
| 19 |
"mcp__context7__query-docs",
|
| 20 |
"Bash(git remote *)",
|
| 21 |
+
"Bash(python *)",
|
| 22 |
+
"Bash(.venv/Scripts/python -m pytest tests/ -x -q)"
|
| 23 |
]
|
| 24 |
},
|
| 25 |
"enableAllProjectMcpServers": true,
|
api/routes.py
CHANGED
|
@@ -41,9 +41,10 @@ REQUESTED_PROVIDER_MODELS = [
|
|
| 41 |
"nvidia_nim/z-ai/glm4.7",
|
| 42 |
"nvidia_nim/bytedance/seed-oss-36b-instruct",
|
| 43 |
"nvidia_nim/mistralai/mistral-nemotron",
|
| 44 |
-
# Cerebras models (
|
| 45 |
-
|
| 46 |
-
|
|
|
|
| 47 |
# Silicon Flow models (uses bare model IDs on the API)
|
| 48 |
"silicon/Qwen/Qwen3.6-35B-A3B",
|
| 49 |
"silicon/Qwen/Qwen3.6-27B",
|
|
|
|
| 41 |
"nvidia_nim/z-ai/glm4.7",
|
| 42 |
"nvidia_nim/bytedance/seed-oss-36b-instruct",
|
| 43 |
"nvidia_nim/mistralai/mistral-nemotron",
|
| 44 |
+
# Cerebras models (key only has access to llama3.1-8b currently)
|
| 45 |
+
# qwen-3-235b-a22b-instruct-2507 exists but is rate-limited
|
| 46 |
+
# zai-glm-4.7 and gpt-oss-120b are not accessible with current key
|
| 47 |
+
"cerebras/llama3.1-8b",
|
| 48 |
# Silicon Flow models (uses bare model IDs on the API)
|
| 49 |
"silicon/Qwen/Qwen3.6-35B-A3B",
|
| 50 |
"silicon/Qwen/Qwen3.6-27B",
|
core/anthropic/conversion.py
CHANGED
|
@@ -445,21 +445,17 @@ class AnthropicToOpenAIConverter:
|
|
| 445 |
max_image_bytes = 20 * 1024 * 1024
|
| 446 |
if estimated_size > max_image_bytes:
|
| 447 |
raise OpenAIConversionError(
|
| 448 |
-
f"Image size ({estimated_size/1024/1024:.1f}MB) exceeds limit "
|
| 449 |
-
f"({max_image_bytes/1024/1024:.1f}MB)"
|
| 450 |
)
|
| 451 |
image_url = f"data:{media_type};base64,{data}"
|
| 452 |
-
result.append(
|
| 453 |
-
"type": "image_url",
|
| 454 |
-
|
| 455 |
-
})
|
| 456 |
elif source_type == "url":
|
| 457 |
# Handle URL-based images
|
| 458 |
url = source.get("url", "")
|
| 459 |
-
result.append({
|
| 460 |
-
"type": "image_url",
|
| 461 |
-
"image_url": {"url": url}
|
| 462 |
-
})
|
| 463 |
else:
|
| 464 |
logger.warning("Unsupported image source type: {}", source_type)
|
| 465 |
elif block_type == "tool_result":
|
|
@@ -520,21 +516,17 @@ class AnthropicToOpenAIConverter:
|
|
| 520 |
max_image_bytes = 20 * 1024 * 1024
|
| 521 |
if estimated_size > max_image_bytes:
|
| 522 |
raise OpenAIConversionError(
|
| 523 |
-
f"Image size ({estimated_size/1024/1024:.1f}MB) exceeds limit "
|
| 524 |
-
f"({max_image_bytes/1024/1024:.1f}MB)"
|
| 525 |
)
|
| 526 |
image_url = f"data:{media_type};base64,{data}"
|
| 527 |
-
result.append(
|
| 528 |
-
"type": "image_url",
|
| 529 |
-
|
| 530 |
-
})
|
| 531 |
elif source_type == "url":
|
| 532 |
# Handle URL-based images
|
| 533 |
url = source.get("url", "")
|
| 534 |
-
result.append({
|
| 535 |
-
"type": "image_url",
|
| 536 |
-
"image_url": {"url": url}
|
| 537 |
-
})
|
| 538 |
else:
|
| 539 |
logger.warning("Unsupported image source type: {}", source_type)
|
| 540 |
elif block_type == "tool_result":
|
|
|
|
| 445 |
max_image_bytes = 20 * 1024 * 1024
|
| 446 |
if estimated_size > max_image_bytes:
|
| 447 |
raise OpenAIConversionError(
|
| 448 |
+
f"Image size ({estimated_size / 1024 / 1024:.1f}MB) exceeds limit "
|
| 449 |
+
f"({max_image_bytes / 1024 / 1024:.1f}MB)"
|
| 450 |
)
|
| 451 |
image_url = f"data:{media_type};base64,{data}"
|
| 452 |
+
result.append(
|
| 453 |
+
{"type": "image_url", "image_url": {"url": image_url}}
|
| 454 |
+
)
|
|
|
|
| 455 |
elif source_type == "url":
|
| 456 |
# Handle URL-based images
|
| 457 |
url = source.get("url", "")
|
| 458 |
+
result.append({"type": "image_url", "image_url": {"url": url}})
|
|
|
|
|
|
|
|
|
|
| 459 |
else:
|
| 460 |
logger.warning("Unsupported image source type: {}", source_type)
|
| 461 |
elif block_type == "tool_result":
|
|
|
|
| 516 |
max_image_bytes = 20 * 1024 * 1024
|
| 517 |
if estimated_size > max_image_bytes:
|
| 518 |
raise OpenAIConversionError(
|
| 519 |
+
f"Image size ({estimated_size / 1024 / 1024:.1f}MB) exceeds limit "
|
| 520 |
+
f"({max_image_bytes / 1024 / 1024:.1f}MB)"
|
| 521 |
)
|
| 522 |
image_url = f"data:{media_type};base64,{data}"
|
| 523 |
+
result.append(
|
| 524 |
+
{"type": "image_url", "image_url": {"url": image_url}}
|
| 525 |
+
)
|
|
|
|
| 526 |
elif source_type == "url":
|
| 527 |
# Handle URL-based images
|
| 528 |
url = source.get("url", "")
|
| 529 |
+
result.append({"type": "image_url", "image_url": {"url": url}})
|
|
|
|
|
|
|
|
|
|
| 530 |
else:
|
| 531 |
logger.warning("Unsupported image source type: {}", source_type)
|
| 532 |
elif block_type == "tool_result":
|
core/model_capabilities.py
CHANGED
|
@@ -131,28 +131,19 @@ MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {
|
|
| 131 |
speed="medium",
|
| 132 |
priority=60,
|
| 133 |
),
|
| 134 |
-
# Cerebras models
|
| 135 |
-
|
|
|
|
|
|
|
| 136 |
provider_id="cerebras",
|
| 137 |
-
model_id="
|
| 138 |
-
model_ref="cerebras/
|
| 139 |
coding=True,
|
| 140 |
-
reasoning=
|
| 141 |
general_text=True,
|
| 142 |
max_tokens=32000,
|
| 143 |
-
speed="
|
| 144 |
-
priority=
|
| 145 |
-
),
|
| 146 |
-
"cerebras/zai-glm-4.7": ModelCapabilities(
|
| 147 |
-
provider_id="cerebras",
|
| 148 |
-
model_id="zai-glm-4.7",
|
| 149 |
-
model_ref="cerebras/zai-glm-4.7",
|
| 150 |
-
coding=True,
|
| 151 |
-
reasoning=True,
|
| 152 |
-
general_text=True,
|
| 153 |
-
max_tokens=32000,
|
| 154 |
-
speed="medium",
|
| 155 |
-
priority=80,
|
| 156 |
),
|
| 157 |
# Silicon Flow models
|
| 158 |
"silicon/Qwen/Qwen3.6-35B-A3B": ModelCapabilities(
|
|
|
|
| 131 |
speed="medium",
|
| 132 |
priority=60,
|
| 133 |
),
|
| 134 |
+
# Cerebras models (key only has access to llama3.1-8b currently)
|
| 135 |
+
# Note: qwen-3-235b-a22b-instruct-2507 exists but is rate-limited
|
| 136 |
+
# Note: zai-glm-4.7 and gpt-oss-120b are not accessible with current key
|
| 137 |
+
"cerebras/llama3.1-8b": ModelCapabilities(
|
| 138 |
provider_id="cerebras",
|
| 139 |
+
model_id="llama3.1-8b",
|
| 140 |
+
model_ref="cerebras/llama3.1-8b",
|
| 141 |
coding=True,
|
| 142 |
+
reasoning=False,
|
| 143 |
general_text=True,
|
| 144 |
max_tokens=32000,
|
| 145 |
+
speed="fast",
|
| 146 |
+
priority=60,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
),
|
| 148 |
# Silicon Flow models
|
| 149 |
"silicon/Qwen/Qwen3.6-35B-A3B": ModelCapabilities(
|
providers/cerebras/client.py
CHANGED
|
@@ -16,8 +16,11 @@ class CerebrasProvider(OpenAIChatTransport):
|
|
| 16 |
# The proxy uses full refs like "cerebras/qwen-3-235b-a22b-instruct-2507"
|
| 17 |
# but Cerebras API expects bare model IDs like "qwen-3-235b-a22b-instruct-2507".
|
| 18 |
CEREBRAS_MODEL_MAP: dict[str, str] = {
|
|
|
|
| 19 |
"qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
|
| 20 |
"zai-glm-4.7": "zai-glm-4.7",
|
|
|
|
|
|
|
| 21 |
"cerebras/qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
|
| 22 |
"cerebras/z-ai/glm4.7": "zai-glm-4.7",
|
| 23 |
}
|
|
|
|
| 16 |
# The proxy uses full refs like "cerebras/qwen-3-235b-a22b-instruct-2507"
|
| 17 |
# but Cerebras API expects bare model IDs like "qwen-3-235b-a22b-instruct-2507".
|
| 18 |
CEREBRAS_MODEL_MAP: dict[str, str] = {
|
| 19 |
+
"llama3.1-8b": "llama3.1-8b",
|
| 20 |
"qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
|
| 21 |
"zai-glm-4.7": "zai-glm-4.7",
|
| 22 |
+
"gpt-oss-120b": "gpt-oss-120b",
|
| 23 |
+
"cerebras/llama3.1-8b": "llama3.1-8b",
|
| 24 |
"cerebras/qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
|
| 25 |
"cerebras/z-ai/glm4.7": "zai-glm-4.7",
|
| 26 |
}
|
test_cerebras_api.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Test Cerebras API key directly."""
|
| 2 |
+
import os
|
| 3 |
+
import httpx
|
| 4 |
+
from openai import AsyncOpenAI
|
| 5 |
+
|
| 6 |
+
CEREBRAS_API_KEY = "csk-2ewy2h26eeph4yex94kmjnfwwx35pdpyyxkv3j6wcj4cxc3t"
|
| 7 |
+
CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1"
|
| 8 |
+
|
| 9 |
+
async def test_models_list():
|
| 10 |
+
"""Test listing models."""
|
| 11 |
+
print("=== Testing /v1/models ===")
|
| 12 |
+
async with httpx.AsyncClient() as client:
|
| 13 |
+
try:
|
| 14 |
+
response = await client.get(
|
| 15 |
+
f"{CEREBRAS_BASE_URL}/models",
|
| 16 |
+
headers={"Authorization": f"Bearer {CEREBRAS_API_KEY}"},
|
| 17 |
+
timeout=10.0,
|
| 18 |
+
)
|
| 19 |
+
print(f"Status: {response.status_code}")
|
| 20 |
+
print(f"Response: {response.text[:800]}")
|
| 21 |
+
except Exception as e:
|
| 22 |
+
print(f"Error: {e}")
|
| 23 |
+
|
| 24 |
+
async def test_chat():
|
| 25 |
+
"""Test a chat completion."""
|
| 26 |
+
print("\n=== Testing /v1/chat/completions ===")
|
| 27 |
+
client = AsyncOpenAI(
|
| 28 |
+
api_key=CEREBRAS_API_KEY,
|
| 29 |
+
base_url=CEREBRAS_BASE_URL,
|
| 30 |
+
timeout=httpx.Timeout(60.0),
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
models_to_try = [
|
| 34 |
+
"qwen-3-235b-a22b-instruct-2507",
|
| 35 |
+
"zai-glm-4.7",
|
| 36 |
+
"gpt-oss-120b",
|
| 37 |
+
"llama3.1-8b",
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
for model in models_to_try:
|
| 41 |
+
print(f"\nTrying model: {model}")
|
| 42 |
+
try:
|
| 43 |
+
response = await client.chat.completions.create(
|
| 44 |
+
model=model,
|
| 45 |
+
messages=[{"role": "user", "content": "hi"}],
|
| 46 |
+
max_tokens=20,
|
| 47 |
+
)
|
| 48 |
+
print(f"Success!")
|
| 49 |
+
print(f"Model: {response.model}")
|
| 50 |
+
print(f"Content: {response.choices[0].message.content}")
|
| 51 |
+
break
|
| 52 |
+
except Exception as e:
|
| 53 |
+
error_text = str(e)
|
| 54 |
+
if hasattr(e, 'response') and e.response:
|
| 55 |
+
error_text = e.response.text
|
| 56 |
+
print(f"Error: {error_text[:300]}")
|
| 57 |
+
|
| 58 |
+
async def main():
|
| 59 |
+
await test_models_list()
|
| 60 |
+
await test_chat()
|
| 61 |
+
|
| 62 |
+
if __name__ == "__main__":
|
| 63 |
+
import asyncio
|
| 64 |
+
asyncio.run(main())
|
test_silicon_api.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Test Silicon Flow API key directly."""
|
| 2 |
+
import os
|
| 3 |
+
import httpx
|
| 4 |
+
from openai import AsyncOpenAI
|
| 5 |
+
|
| 6 |
+
SILICON_API_KEY = "sk-vkswknrlhztbogulqjizbxpkdipbafudnirbrhzosxjkvmri"
|
| 7 |
+
SILICON_BASE_URL = "https://api.siliconflow.cn/v1"
|
| 8 |
+
|
| 9 |
+
async def test_models_list():
|
| 10 |
+
"""Test listing models."""
|
| 11 |
+
print("=== Testing /v1/models ===")
|
| 12 |
+
async with httpx.AsyncClient() as client:
|
| 13 |
+
try:
|
| 14 |
+
response = await client.get(
|
| 15 |
+
f"{SILICON_BASE_URL}/models",
|
| 16 |
+
headers={"Authorization": f"Bearer {SILICON_API_KEY}"},
|
| 17 |
+
timeout=10.0,
|
| 18 |
+
)
|
| 19 |
+
print(f"Status: {response.status_code}")
|
| 20 |
+
print(f"Response: {response.text[:500]}")
|
| 21 |
+
except Exception as e:
|
| 22 |
+
print(f"Error: {e}")
|
| 23 |
+
|
| 24 |
+
async def test_chat():
|
| 25 |
+
"""Test a chat completion."""
|
| 26 |
+
print("\n=== Testing /v1/chat/completions ===")
|
| 27 |
+
client = AsyncOpenAI(
|
| 28 |
+
api_key=SILICON_API_KEY,
|
| 29 |
+
base_url=SILICON_BASE_URL,
|
| 30 |
+
timeout=httpx.Timeout(30.0),
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
models_to_try = [
|
| 34 |
+
"Qwen/Qwen3.6-35B-A3B",
|
| 35 |
+
"Qwen3.6-35B-A3B",
|
| 36 |
+
"Qwen/Qwen2.5-72B-Instruct",
|
| 37 |
+
"Pro/Qwen/Qwen2.5-72B-Instruct",
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
for model in models_to_try:
|
| 41 |
+
print(f"\nTrying model: {model}")
|
| 42 |
+
try:
|
| 43 |
+
response = await client.chat.completions.create(
|
| 44 |
+
model=model,
|
| 45 |
+
messages=[{"role": "user", "content": "hi"}],
|
| 46 |
+
max_tokens=10,
|
| 47 |
+
)
|
| 48 |
+
print(f"Success! Response: {response}")
|
| 49 |
+
break
|
| 50 |
+
except Exception as e:
|
| 51 |
+
error_text = str(e)
|
| 52 |
+
# Extract useful part of error
|
| 53 |
+
if hasattr(e, 'response') and e.response:
|
| 54 |
+
error_text = e.response.text
|
| 55 |
+
print(f"Error: {error_text[:200]}")
|
| 56 |
+
|
| 57 |
+
async def main():
|
| 58 |
+
await test_models_list()
|
| 59 |
+
await test_chat()
|
| 60 |
+
|
| 61 |
+
if __name__ == "__main__":
|
| 62 |
+
import asyncio
|
| 63 |
+
asyncio.run(main())
|