Yash030 commited on
Commit
0ba585f
·
1 Parent(s): 58a3721

$(cat <<EOF

Browse files

Update Cerebras model list based on API test results.

Cerebras key only has access to llama3.1-8b currently.
qwen-3-235b-a22b-instruct-2507 exists but is rate-limited.
zai-glm-4.7 and gpt-oss-120b are not accessible with this key.
Also added all models to CEREBRAS_MODEL_MAP for flexibility.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
EOF
)

.claude/settings.local.json CHANGED
@@ -18,7 +18,8 @@
18
  "mcp__context7__resolve-library-id",
19
  "mcp__context7__query-docs",
20
  "Bash(git remote *)",
21
- "Bash(python *)"
 
22
  ]
23
  },
24
  "enableAllProjectMcpServers": true,
 
18
  "mcp__context7__resolve-library-id",
19
  "mcp__context7__query-docs",
20
  "Bash(git remote *)",
21
+ "Bash(python *)",
22
+ "Bash(.venv/Scripts/python -m pytest tests/ -x -q)"
23
  ]
24
  },
25
  "enableAllProjectMcpServers": true,
api/routes.py CHANGED
@@ -41,9 +41,10 @@ REQUESTED_PROVIDER_MODELS = [
41
  "nvidia_nim/z-ai/glm4.7",
42
  "nvidia_nim/bytedance/seed-oss-36b-instruct",
43
  "nvidia_nim/mistralai/mistral-nemotron",
44
- # Cerebras models (uses bare model IDs on the API)
45
- "cerebras/qwen-3-235b-a22b-instruct-2507",
46
- "cerebras/zai-glm-4.7",
 
47
  # Silicon Flow models (uses bare model IDs on the API)
48
  "silicon/Qwen/Qwen3.6-35B-A3B",
49
  "silicon/Qwen/Qwen3.6-27B",
 
41
  "nvidia_nim/z-ai/glm4.7",
42
  "nvidia_nim/bytedance/seed-oss-36b-instruct",
43
  "nvidia_nim/mistralai/mistral-nemotron",
44
+ # Cerebras models (key only has access to llama3.1-8b currently)
45
+ # qwen-3-235b-a22b-instruct-2507 exists but is rate-limited
46
+ # zai-glm-4.7 and gpt-oss-120b are not accessible with current key
47
+ "cerebras/llama3.1-8b",
48
  # Silicon Flow models (uses bare model IDs on the API)
49
  "silicon/Qwen/Qwen3.6-35B-A3B",
50
  "silicon/Qwen/Qwen3.6-27B",
core/anthropic/conversion.py CHANGED
@@ -445,21 +445,17 @@ class AnthropicToOpenAIConverter:
445
  max_image_bytes = 20 * 1024 * 1024
446
  if estimated_size > max_image_bytes:
447
  raise OpenAIConversionError(
448
- f"Image size ({estimated_size/1024/1024:.1f}MB) exceeds limit "
449
- f"({max_image_bytes/1024/1024:.1f}MB)"
450
  )
451
  image_url = f"data:{media_type};base64,{data}"
452
- result.append({
453
- "type": "image_url",
454
- "image_url": {"url": image_url}
455
- })
456
  elif source_type == "url":
457
  # Handle URL-based images
458
  url = source.get("url", "")
459
- result.append({
460
- "type": "image_url",
461
- "image_url": {"url": url}
462
- })
463
  else:
464
  logger.warning("Unsupported image source type: {}", source_type)
465
  elif block_type == "tool_result":
@@ -520,21 +516,17 @@ class AnthropicToOpenAIConverter:
520
  max_image_bytes = 20 * 1024 * 1024
521
  if estimated_size > max_image_bytes:
522
  raise OpenAIConversionError(
523
- f"Image size ({estimated_size/1024/1024:.1f}MB) exceeds limit "
524
- f"({max_image_bytes/1024/1024:.1f}MB)"
525
  )
526
  image_url = f"data:{media_type};base64,{data}"
527
- result.append({
528
- "type": "image_url",
529
- "image_url": {"url": image_url}
530
- })
531
  elif source_type == "url":
532
  # Handle URL-based images
533
  url = source.get("url", "")
534
- result.append({
535
- "type": "image_url",
536
- "image_url": {"url": url}
537
- })
538
  else:
539
  logger.warning("Unsupported image source type: {}", source_type)
540
  elif block_type == "tool_result":
 
445
  max_image_bytes = 20 * 1024 * 1024
446
  if estimated_size > max_image_bytes:
447
  raise OpenAIConversionError(
448
+ f"Image size ({estimated_size / 1024 / 1024:.1f}MB) exceeds limit "
449
+ f"({max_image_bytes / 1024 / 1024:.1f}MB)"
450
  )
451
  image_url = f"data:{media_type};base64,{data}"
452
+ result.append(
453
+ {"type": "image_url", "image_url": {"url": image_url}}
454
+ )
 
455
  elif source_type == "url":
456
  # Handle URL-based images
457
  url = source.get("url", "")
458
+ result.append({"type": "image_url", "image_url": {"url": url}})
 
 
 
459
  else:
460
  logger.warning("Unsupported image source type: {}", source_type)
461
  elif block_type == "tool_result":
 
516
  max_image_bytes = 20 * 1024 * 1024
517
  if estimated_size > max_image_bytes:
518
  raise OpenAIConversionError(
519
+ f"Image size ({estimated_size / 1024 / 1024:.1f}MB) exceeds limit "
520
+ f"({max_image_bytes / 1024 / 1024:.1f}MB)"
521
  )
522
  image_url = f"data:{media_type};base64,{data}"
523
+ result.append(
524
+ {"type": "image_url", "image_url": {"url": image_url}}
525
+ )
 
526
  elif source_type == "url":
527
  # Handle URL-based images
528
  url = source.get("url", "")
529
+ result.append({"type": "image_url", "image_url": {"url": url}})
 
 
 
530
  else:
531
  logger.warning("Unsupported image source type: {}", source_type)
532
  elif block_type == "tool_result":
core/model_capabilities.py CHANGED
@@ -131,28 +131,19 @@ MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {
131
  speed="medium",
132
  priority=60,
133
  ),
134
- # Cerebras models
135
- "cerebras/qwen-3-235b-a22b-instruct-2507": ModelCapabilities(
 
 
136
  provider_id="cerebras",
137
- model_id="qwen-3-235b-a22b-instruct-2507",
138
- model_ref="cerebras/qwen-3-235b-a22b-instruct-2507",
139
  coding=True,
140
- reasoning=True,
141
  general_text=True,
142
  max_tokens=32000,
143
- speed="slow",
144
- priority=85,
145
- ),
146
- "cerebras/zai-glm-4.7": ModelCapabilities(
147
- provider_id="cerebras",
148
- model_id="zai-glm-4.7",
149
- model_ref="cerebras/zai-glm-4.7",
150
- coding=True,
151
- reasoning=True,
152
- general_text=True,
153
- max_tokens=32000,
154
- speed="medium",
155
- priority=80,
156
  ),
157
  # Silicon Flow models
158
  "silicon/Qwen/Qwen3.6-35B-A3B": ModelCapabilities(
 
131
  speed="medium",
132
  priority=60,
133
  ),
134
+ # Cerebras models (key only has access to llama3.1-8b currently)
135
+ # Note: qwen-3-235b-a22b-instruct-2507 exists but is rate-limited
136
+ # Note: zai-glm-4.7 and gpt-oss-120b are not accessible with current key
137
+ "cerebras/llama3.1-8b": ModelCapabilities(
138
  provider_id="cerebras",
139
+ model_id="llama3.1-8b",
140
+ model_ref="cerebras/llama3.1-8b",
141
  coding=True,
142
+ reasoning=False,
143
  general_text=True,
144
  max_tokens=32000,
145
+ speed="fast",
146
+ priority=60,
 
 
 
 
 
 
 
 
 
 
 
147
  ),
148
  # Silicon Flow models
149
  "silicon/Qwen/Qwen3.6-35B-A3B": ModelCapabilities(
providers/cerebras/client.py CHANGED
@@ -16,8 +16,11 @@ class CerebrasProvider(OpenAIChatTransport):
16
  # The proxy uses full refs like "cerebras/qwen-3-235b-a22b-instruct-2507"
17
  # but Cerebras API expects bare model IDs like "qwen-3-235b-a22b-instruct-2507".
18
  CEREBRAS_MODEL_MAP: dict[str, str] = {
 
19
  "qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
20
  "zai-glm-4.7": "zai-glm-4.7",
 
 
21
  "cerebras/qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
22
  "cerebras/z-ai/glm4.7": "zai-glm-4.7",
23
  }
 
16
  # The proxy uses full refs like "cerebras/qwen-3-235b-a22b-instruct-2507"
17
  # but Cerebras API expects bare model IDs like "qwen-3-235b-a22b-instruct-2507".
18
  CEREBRAS_MODEL_MAP: dict[str, str] = {
19
+ "llama3.1-8b": "llama3.1-8b",
20
  "qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
21
  "zai-glm-4.7": "zai-glm-4.7",
22
+ "gpt-oss-120b": "gpt-oss-120b",
23
+ "cerebras/llama3.1-8b": "llama3.1-8b",
24
  "cerebras/qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
25
  "cerebras/z-ai/glm4.7": "zai-glm-4.7",
26
  }
test_cerebras_api.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Test Cerebras API key directly."""
2
+ import os
3
+ import httpx
4
+ from openai import AsyncOpenAI
5
+
6
+ CEREBRAS_API_KEY = "csk-2ewy2h26eeph4yex94kmjnfwwx35pdpyyxkv3j6wcj4cxc3t"
7
+ CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1"
8
+
9
+ async def test_models_list():
10
+ """Test listing models."""
11
+ print("=== Testing /v1/models ===")
12
+ async with httpx.AsyncClient() as client:
13
+ try:
14
+ response = await client.get(
15
+ f"{CEREBRAS_BASE_URL}/models",
16
+ headers={"Authorization": f"Bearer {CEREBRAS_API_KEY}"},
17
+ timeout=10.0,
18
+ )
19
+ print(f"Status: {response.status_code}")
20
+ print(f"Response: {response.text[:800]}")
21
+ except Exception as e:
22
+ print(f"Error: {e}")
23
+
24
+ async def test_chat():
25
+ """Test a chat completion."""
26
+ print("\n=== Testing /v1/chat/completions ===")
27
+ client = AsyncOpenAI(
28
+ api_key=CEREBRAS_API_KEY,
29
+ base_url=CEREBRAS_BASE_URL,
30
+ timeout=httpx.Timeout(60.0),
31
+ )
32
+
33
+ models_to_try = [
34
+ "qwen-3-235b-a22b-instruct-2507",
35
+ "zai-glm-4.7",
36
+ "gpt-oss-120b",
37
+ "llama3.1-8b",
38
+ ]
39
+
40
+ for model in models_to_try:
41
+ print(f"\nTrying model: {model}")
42
+ try:
43
+ response = await client.chat.completions.create(
44
+ model=model,
45
+ messages=[{"role": "user", "content": "hi"}],
46
+ max_tokens=20,
47
+ )
48
+ print(f"Success!")
49
+ print(f"Model: {response.model}")
50
+ print(f"Content: {response.choices[0].message.content}")
51
+ break
52
+ except Exception as e:
53
+ error_text = str(e)
54
+ if hasattr(e, 'response') and e.response:
55
+ error_text = e.response.text
56
+ print(f"Error: {error_text[:300]}")
57
+
58
+ async def main():
59
+ await test_models_list()
60
+ await test_chat()
61
+
62
+ if __name__ == "__main__":
63
+ import asyncio
64
+ asyncio.run(main())
test_silicon_api.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Test Silicon Flow API key directly."""
2
+ import os
3
+ import httpx
4
+ from openai import AsyncOpenAI
5
+
6
+ SILICON_API_KEY = "sk-vkswknrlhztbogulqjizbxpkdipbafudnirbrhzosxjkvmri"
7
+ SILICON_BASE_URL = "https://api.siliconflow.cn/v1"
8
+
9
+ async def test_models_list():
10
+ """Test listing models."""
11
+ print("=== Testing /v1/models ===")
12
+ async with httpx.AsyncClient() as client:
13
+ try:
14
+ response = await client.get(
15
+ f"{SILICON_BASE_URL}/models",
16
+ headers={"Authorization": f"Bearer {SILICON_API_KEY}"},
17
+ timeout=10.0,
18
+ )
19
+ print(f"Status: {response.status_code}")
20
+ print(f"Response: {response.text[:500]}")
21
+ except Exception as e:
22
+ print(f"Error: {e}")
23
+
24
+ async def test_chat():
25
+ """Test a chat completion."""
26
+ print("\n=== Testing /v1/chat/completions ===")
27
+ client = AsyncOpenAI(
28
+ api_key=SILICON_API_KEY,
29
+ base_url=SILICON_BASE_URL,
30
+ timeout=httpx.Timeout(30.0),
31
+ )
32
+
33
+ models_to_try = [
34
+ "Qwen/Qwen3.6-35B-A3B",
35
+ "Qwen3.6-35B-A3B",
36
+ "Qwen/Qwen2.5-72B-Instruct",
37
+ "Pro/Qwen/Qwen2.5-72B-Instruct",
38
+ ]
39
+
40
+ for model in models_to_try:
41
+ print(f"\nTrying model: {model}")
42
+ try:
43
+ response = await client.chat.completions.create(
44
+ model=model,
45
+ messages=[{"role": "user", "content": "hi"}],
46
+ max_tokens=10,
47
+ )
48
+ print(f"Success! Response: {response}")
49
+ break
50
+ except Exception as e:
51
+ error_text = str(e)
52
+ # Extract useful part of error
53
+ if hasattr(e, 'response') and e.response:
54
+ error_text = e.response.text
55
+ print(f"Error: {error_text[:200]}")
56
+
57
+ async def main():
58
+ await test_models_list()
59
+ await test_chat()
60
+
61
+ if __name__ == "__main__":
62
+ import asyncio
63
+ asyncio.run(main())