Spaces:

Yash030
/

claude-code-proxy

Running

Yash030 commited on 2 days ago

Commit

332dd16

1 Parent(s): 98fdd46

$(cat <<EOF

Fix Groq and Cerebras model IDs and rate limits.

Groq: Model IDs need inner prefixes (qwen/qwen3-32b not qwen3-32b)
Cerebras: Removed inaccessible models, set higher rate limits
Silicon: Increased rate limits to 300/min

All providers now have appropriate concurrency and rate limit settings.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
EOF
)

Files changed (3) hide show

providers/cerebras/client.py +4 -9
providers/groq/client.py +16 -3
providers/silicon/client.py +3 -0

providers/cerebras/client.py CHANGED Viewed

@@ -13,27 +13,23 @@ class CerebrasProvider(OpenAIChatTransport):
     """Cerebras provider using OpenAI-compatible /chat/completions."""
     # Mapping of proxy model refs to Cerebras API model IDs.
-    # The proxy uses full refs like "cerebras/qwen-3-235b-a22b-instruct-2507"
-    # but Cerebras API expects bare model IDs like "qwen-3-235b-a22b-instruct-2507".
     CEREBRAS_MODEL_MAP: dict[str, str] = {
-        "llama3.1-8b": "llama3.1-8b",
-        "qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
-        "zai-glm-4.7": "zai-glm-4.7",
-        "gpt-oss-120b": "gpt-oss-120b",
         "cerebras/llama3.1-8b": "llama3.1-8b",
         "cerebras/qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
-        "cerebras/z-ai/glm4.7": "zai-glm-4.7",
     }
     def __init__(self, config: ProviderConfig, *, settings: Settings):
         base_url = (config.base_url or CEREBRAS_DEFAULT_BASE).rstrip("/")
         if not base_url.endswith("/v1"):
             base_url = base_url + "/v1"
         super().__init__(
             config,
             provider_name="Cerebras",
             base_url=base_url,
             api_key=config.api_key,
         )
         self._settings = settings
@@ -47,10 +43,9 @@ class CerebrasProvider(OpenAIChatTransport):
             else ReasoningReplayMode.DISABLED
         )
         body = build_base_request_body(request, reasoning_replay=reasoning_replay)
-        # Strip cerebras/ prefix so the API gets the bare model ID
         model = body.get("model", "")
         if model in self.CEREBRAS_MODEL_MAP:
             body["model"] = self.CEREBRAS_MODEL_MAP[model]
         elif model.startswith("cerebras/"):
-            body["model"] = model[len("cerebras/") :]
         return body

     """Cerebras provider using OpenAI-compatible /chat/completions."""
     # Mapping of proxy model refs to Cerebras API model IDs.
     CEREBRAS_MODEL_MAP: dict[str, str] = {
         "cerebras/llama3.1-8b": "llama3.1-8b",
         "cerebras/qwen-3-235b-a22b-instruct-2507": "qwen-3-235b-a22b-instruct-2507",
     }
     def __init__(self, config: ProviderConfig, *, settings: Settings):
         base_url = (config.base_url or CEREBRAS_DEFAULT_BASE).rstrip("/")
         if not base_url.endswith("/v1"):
             base_url = base_url + "/v1"
+        # Cerebras has generous rate limits
         super().__init__(
             config,
             provider_name="Cerebras",
             base_url=base_url,
             api_key=config.api_key,
+            nim_rate_limit=300,
+            nim_max_concurrency=80,
         )
         self._settings = settings
             else ReasoningReplayMode.DISABLED
         )
         body = build_base_request_body(request, reasoning_replay=reasoning_replay)
         model = body.get("model", "")
         if model in self.CEREBRAS_MODEL_MAP:
             body["model"] = self.CEREBRAS_MODEL_MAP[model]
         elif model.startswith("cerebras/"):
+            body["model"] = model[len("cerebras/"):]
         return body

providers/groq/client.py CHANGED Viewed

@@ -12,15 +12,26 @@ from providers.openai_compat import OpenAIChatTransport
 class GroqProvider(OpenAIChatTransport):
     """Groq provider using OpenAI-compatible /chat/completions."""
     def __init__(self, config: ProviderConfig, *, settings: Settings):
         base_url = (config.base_url or GROQ_DEFAULT_BASE).rstrip("/")
         if not base_url.endswith("/v1"):
             base_url = base_url + "/v1"
         super().__init__(
             config,
             provider_name="Groq",
             base_url=base_url,
             api_key=config.api_key,
         )
         self._settings = settings
@@ -34,8 +45,10 @@ class GroqProvider(OpenAIChatTransport):
             else ReasoningReplayMode.DISABLED
         )
         body = build_base_request_body(request, reasoning_replay=reasoning_replay)
-        # Strip groq/ prefix so the API gets the bare model ID
         model = body.get("model", "")
-        if model.startswith("groq/"):
-            body["model"] = model[len("groq/") :]
         return body

 class GroqProvider(OpenAIChatTransport):
     """Groq provider using OpenAI-compatible /chat/completions."""
+    # Mapping of proxy model refs to Groq API model IDs.
+    # groq/ prefix is stripped, but the inner prefix (like qwen/) is kept.
+    GROQ_MODEL_MAP: dict[str, str] = {
+        "groq/qwen3-32b": "qwen/qwen3-32b",
+        "groq/llama-3.3-70b-versatile": "llama-3.3-70b-versatile",
+        "groq/llama-3.1-8b-instant": "llama-3.1-8b-instant",
+    }
     def __init__(self, config: ProviderConfig, *, settings: Settings):
         base_url = (config.base_url or GROQ_DEFAULT_BASE).rstrip("/")
         if not base_url.endswith("/v1"):
             base_url = base_url + "/v1"
+        # Groq has generous rate limits - set high limits with no adaptive throttling
         super().__init__(
             config,
             provider_name="Groq",
             base_url=base_url,
             api_key=config.api_key,
+            nim_rate_limit=500,  # High limit for Groq
+            nim_max_concurrency=100,  # High concurrency for Groq
         )
         self._settings = settings
             else ReasoningReplayMode.DISABLED
         )
         body = build_base_request_body(request, reasoning_replay=reasoning_replay)
+        # Map proxy model ref to actual Groq API model ID
         model = body.get("model", "")
+        if model in self.GROQ_MODEL_MAP:
+            body["model"] = self.GROQ_MODEL_MAP[model]
+        elif model.startswith("groq/"):
+            body["model"] = model[len("groq/"):]
         return body

providers/silicon/client.py CHANGED Viewed

@@ -16,11 +16,14 @@ class SiliconProvider(OpenAIChatTransport):
         base_url = (config.base_url or SILICON_DEFAULT_BASE).rstrip("/")
         if not base_url.endswith("/v1"):
             base_url = base_url + "/v1"
         super().__init__(
             config,
             provider_name="Silicon",
             base_url=base_url,
             api_key=config.api_key,
         )
         self._settings = settings

         base_url = (config.base_url or SILICON_DEFAULT_BASE).rstrip("/")
         if not base_url.endswith("/v1"):
             base_url = base_url + "/v1"
+        # Silicon Flow has generous rate limits
         super().__init__(
             config,
             provider_name="Silicon",
             base_url=base_url,
             api_key=config.api_key,
+            nim_rate_limit=300,
+            nim_max_concurrency=80,
         )
         self._settings = settings