Spaces:

Yash030
/

claude-code-proxy

Running

Yash030 commited on 2 days ago

Commit

0ba585f

1 Parent(s): 58a3721

$(cat <<EOF

Update Cerebras model list based on API test results.

Cerebras key only has access to llama3.1-8b currently.
qwen-3-235b-a22b-instruct-2507 exists but is rate-limited.
zai-glm-4.7 and gpt-oss-120b are not accessible with this key.
Also added all models to CEREBRAS_MODEL_MAP for flexibility.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
EOF
)

Files changed (7) hide show

.claude/settings.local.json +2 -1
api/routes.py +4 -3
core/anthropic/conversion.py +12 -20
core/model_capabilities.py +9 -18
providers/cerebras/client.py +3 -0
test_cerebras_api.py +64 -0
test_silicon_api.py +63 -0

.claude/settings.local.json CHANGED Viewed

@@ -18,7 +18,8 @@
       "mcp__context7__resolve-library-id",
       "mcp__context7__query-docs",
       "Bash(git remote *)",
-      "Bash(python *)"
     ]
   },
   "enableAllProjectMcpServers": true,

       "mcp__context7__resolve-library-id",
       "mcp__context7__query-docs",
       "Bash(git remote *)",
+      "Bash(python *)",
+      "Bash(.venv/Scripts/python -m pytest tests/ -x -q)"
     ]
   },
   "enableAllProjectMcpServers": true,

api/routes.py CHANGED Viewed

@@ -41,9 +41,10 @@ REQUESTED_PROVIDER_MODELS = [
     "nvidia_nim/z-ai/glm4.7",
     "nvidia_nim/bytedance/seed-oss-36b-instruct",
     "nvidia_nim/mistralai/mistral-nemotron",
-    # Cerebras models (uses bare model IDs on the API)
-    "cerebras/qwen-3-235b-a22b-instruct-2507",
-    "cerebras/zai-glm-4.7",
     # Silicon Flow models (uses bare model IDs on the API)
     "silicon/Qwen/Qwen3.6-35B-A3B",
     "silicon/Qwen/Qwen3.6-27B",

     "nvidia_nim/z-ai/glm4.7",
     "nvidia_nim/bytedance/seed-oss-36b-instruct",
     "nvidia_nim/mistralai/mistral-nemotron",
+    # Cerebras models (key only has access to llama3.1-8b currently)
+    # qwen-3-235b-a22b-instruct-2507 exists but is rate-limited
+    # zai-glm-4.7 and gpt-oss-120b are not accessible with current key
+    "cerebras/llama3.1-8b",
     # Silicon Flow models (uses bare model IDs on the API)
     "silicon/Qwen/Qwen3.6-35B-A3B",
     "silicon/Qwen/Qwen3.6-27B",

core/anthropic/conversion.py CHANGED Viewed

@@ -445,21 +445,17 @@ class AnthropicToOpenAIConverter:
                     max_image_bytes = 20 * 1024 * 1024
                     if estimated_size > max_image_bytes:
                         raise OpenAIConversionError(
-                            f"Image size ({estimated_size/1024/1024:.1f}MB) exceeds limit "
-                            f"({max_image_bytes/1024/1024:.1f}MB)"
                         )
                     image_url = f"data:{media_type};base64,{data}"
-                    result.append({
-                        "type": "image_url",
-                        "image_url": {"url": image_url}
-                    })
                 elif source_type == "url":
                     # Handle URL-based images
                     url = source.get("url", "")
-                    result.append({
-                        "type": "image_url",
-                        "image_url": {"url": url}
-                    })
                 else:
                     logger.warning("Unsupported image source type: {}", source_type)
             elif block_type == "tool_result":
@@ -520,21 +516,17 @@ class AnthropicToOpenAIConverter:
                     max_image_bytes = 20 * 1024 * 1024
                     if estimated_size > max_image_bytes:
                         raise OpenAIConversionError(
-                            f"Image size ({estimated_size/1024/1024:.1f}MB) exceeds limit "
-                            f"({max_image_bytes/1024/1024:.1f}MB)"
                         )
                     image_url = f"data:{media_type};base64,{data}"
-                    result.append({
-                        "type": "image_url",
-                        "image_url": {"url": image_url}
-                    })
                 elif source_type == "url":
                     # Handle URL-based images
                     url = source.get("url", "")
-                    result.append({
-                        "type": "image_url",
-                        "image_url": {"url": url}
-                    })
                 else:
                     logger.warning("Unsupported image source type: {}", source_type)
             elif block_type == "tool_result":

                     max_image_bytes = 20 * 1024 * 1024
                     if estimated_size > max_image_bytes:
                         raise OpenAIConversionError(
+                            f"Image size ({estimated_size / 1024 / 1024:.1f}MB) exceeds limit "
+                            f"({max_image_bytes / 1024 / 1024:.1f}MB)"
                         )
                     image_url = f"data:{media_type};base64,{data}"
+                    result.append(
+                        {"type": "image_url", "image_url": {"url": image_url}}
+                    )
                 elif source_type == "url":
                     # Handle URL-based images
                     url = source.get("url", "")
+                    result.append({"type": "image_url", "image_url": {"url": url}})
                 else:
                     logger.warning("Unsupported image source type: {}", source_type)
             elif block_type == "tool_result":
                     max_image_bytes = 20 * 1024 * 1024
                     if estimated_size > max_image_bytes:
                         raise OpenAIConversionError(
+                            f"Image size ({estimated_size / 1024 / 1024:.1f}MB) exceeds limit "
+                            f"({max_image_bytes / 1024 / 1024:.1f}MB)"
                         )
                     image_url = f"data:{media_type};base64,{data}"
+                    result.append(
+                        {"type": "image_url", "image_url": {"url": image_url}}
+                    )
                 elif source_type == "url":
                     # Handle URL-based images
                     url = source.get("url", "")
+                    result.append({"type": "image_url", "image_url": {"url": url}})
                 else:
                     logger.warning("Unsupported image source type: {}", source_type)
             elif block_type == "tool_result":

core/model_capabilities.py CHANGED Viewed

@@ -131,28 +131,19 @@ MODEL_CAPABILITIES: dict[str, ModelCapabilities] = {
         speed="medium",
         priority=60,
     ),
-    # Cerebras models
-    "cerebras/qwen-3-235b-a22b-instruct-2507": ModelCapabilities(
         provider_id="cerebras",
-        model_id="qwen-3-235b-a22b-instruct-2507",
-        model_ref="cerebras/qwen-3-235b-a22b-instruct-2507",
         coding=True,
-        reasoning=True,
         general_text=True,
         max_tokens=32000,
-        speed="slow",
-        priority=85,
-    ),
-    "cerebras/zai-glm-4.7": ModelCapabilities(
-        provider_id="cerebras",
-        model_id="zai-glm-4.7",
-        model_ref="cerebras/zai-glm-4.7",
-        coding=True,
-        reasoning=True,
-        general_text=True,
-        max_tokens=32000,
-        speed="medium",
-        priority=80,
     ),
     # Silicon Flow models
     "silicon/Qwen/Qwen3.6-35B-A3B": ModelCapabilities(

         speed="medium",
         priority=60,
     ),
+    # Cerebras models (key only has access to llama3.1-8b currently)
+    # Note: qwen-3-235b-a22b-instruct-2507 exists but is rate-limited
+    # Note: zai-glm-4.7 and gpt-oss-120b are not accessible with current key
+    "cerebras/llama3.1-8b": ModelCapabilities(
         provider_id="cerebras",
+        model_id="llama3.1-8b",
+        model_ref="cerebras/llama3.1-8b",
         coding=True,
+        reasoning=False,
         general_text=True,
         max_tokens=32000,
+        speed="fast",
+        priority=60,
     ),
     # Silicon Flow models
     "silicon/Qwen/Qwen3.6-35B-A3B": ModelCapabilities(

providers/cerebras/client.py CHANGED Viewed

@@ -16,8 +16,11 @@ class CerebrasProvider(OpenAIChatTransport):
     # The proxy uses full refs like "cerebras/qwen-3-235b-a22b-instruct-2507"
     # but Cerebras API expects bare model IDs like "qwen-3-235b-a22b-instruct-2507".
     CEREBRAS_MODEL_MAP: dict[str, str] = {
         "qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
         "zai-glm-4.7": "zai-glm-4.7",
         "cerebras/qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
         "cerebras/z-ai/glm4.7": "zai-glm-4.7",
     }

     # The proxy uses full refs like "cerebras/qwen-3-235b-a22b-instruct-2507"
     # but Cerebras API expects bare model IDs like "qwen-3-235b-a22b-instruct-2507".
     CEREBRAS_MODEL_MAP: dict[str, str] = {
+        "llama3.1-8b": "llama3.1-8b",
         "qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
         "zai-glm-4.7": "zai-glm-4.7",
+        "gpt-oss-120b": "gpt-oss-120b",
+        "cerebras/llama3.1-8b": "llama3.1-8b",
         "cerebras/qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
         "cerebras/z-ai/glm4.7": "zai-glm-4.7",
     }

test_cerebras_api.py ADDED Viewed

	@@ -0,0 +1,64 @@

+"""Test Cerebras API key directly."""
+import os
+import httpx
+from openai import AsyncOpenAI
+CEREBRAS_API_KEY = "csk-2ewy2h26eeph4yex94kmjnfwwx35pdpyyxkv3j6wcj4cxc3t"
+CEREBRAS_BASE_URL = "https://api.cerebras.ai/v1"
+async def test_models_list():
+    """Test listing models."""
+    print("=== Testing /v1/models ===")
+    async with httpx.AsyncClient() as client:
+        try:
+            response = await client.get(
+                f"{CEREBRAS_BASE_URL}/models",
+                headers={"Authorization": f"Bearer {CEREBRAS_API_KEY}"},
+                timeout=10.0,
+            )
+            print(f"Status: {response.status_code}")
+            print(f"Response: {response.text[:800]}")
+        except Exception as e:
+            print(f"Error: {e}")
+async def test_chat():
+    """Test a chat completion."""
+    print("\n=== Testing /v1/chat/completions ===")
+    client = AsyncOpenAI(
+        api_key=CEREBRAS_API_KEY,
+        base_url=CEREBRAS_BASE_URL,
+        timeout=httpx.Timeout(60.0),
+    )
+    models_to_try = [
+        "qwen-3-235b-a22b-instruct-2507",
+        "zai-glm-4.7",
+        "gpt-oss-120b",
+        "llama3.1-8b",
+    ]
+    for model in models_to_try:
+        print(f"\nTrying model: {model}")
+        try:
+            response = await client.chat.completions.create(
+                model=model,
+                messages=[{"role": "user", "content": "hi"}],
+                max_tokens=20,
+            )
+            print(f"Success!")
+            print(f"Model: {response.model}")
+            print(f"Content: {response.choices[0].message.content}")
+            break
+        except Exception as e:
+            error_text = str(e)
+            if hasattr(e, 'response') and e.response:
+                error_text = e.response.text
+            print(f"Error: {error_text[:300]}")
+async def main():
+    await test_models_list()
+    await test_chat()
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(main())

test_silicon_api.py ADDED Viewed

	@@ -0,0 +1,63 @@

+"""Test Silicon Flow API key directly."""
+import os
+import httpx
+from openai import AsyncOpenAI
+SILICON_API_KEY = "sk-vkswknrlhztbogulqjizbxpkdipbafudnirbrhzosxjkvmri"
+SILICON_BASE_URL = "https://api.siliconflow.cn/v1"
+async def test_models_list():
+    """Test listing models."""
+    print("=== Testing /v1/models ===")
+    async with httpx.AsyncClient() as client:
+        try:
+            response = await client.get(
+                f"{SILICON_BASE_URL}/models",
+                headers={"Authorization": f"Bearer {SILICON_API_KEY}"},
+                timeout=10.0,
+            )
+            print(f"Status: {response.status_code}")
+            print(f"Response: {response.text[:500]}")
+        except Exception as e:
+            print(f"Error: {e}")
+async def test_chat():
+    """Test a chat completion."""
+    print("\n=== Testing /v1/chat/completions ===")
+    client = AsyncOpenAI(
+        api_key=SILICON_API_KEY,
+        base_url=SILICON_BASE_URL,
+        timeout=httpx.Timeout(30.0),
+    )
+    models_to_try = [
+        "Qwen/Qwen3.6-35B-A3B",
+        "Qwen3.6-35B-A3B",
+        "Qwen/Qwen2.5-72B-Instruct",
+        "Pro/Qwen/Qwen2.5-72B-Instruct",
+    ]
+    for model in models_to_try:
+        print(f"\nTrying model: {model}")
+        try:
+            response = await client.chat.completions.create(
+                model=model,
+                messages=[{"role": "user", "content": "hi"}],
+                max_tokens=10,
+            )
+            print(f"Success! Response: {response}")
+            break
+        except Exception as e:
+            error_text = str(e)
+            # Extract useful part of error
+            if hasattr(e, 'response') and e.response:
+                error_text = e.response.text
+            print(f"Error: {error_text[:200]}")
+async def main():
+    await test_models_list()
+    await test_chat()
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(main())