Spaces:

Moealsarraj
/

devkit

Sleeping

App Files Files Community

Mohammed AL Sarraj commited on Apr 12

Commit

85a09fa

1 Parent(s): 186efee

fix: Cohere V2 API handler, handle 402 errors, fix model names

Browse files

Files changed (1) hide show

app/core/ai.py +24 -4

app/core/ai.py CHANGED Viewed

@@ -41,7 +41,7 @@ _PREMIUM_MODELS = {
     "openai":     "gpt-4o-mini",
     "deepseek":   "deepseek-chat",
     "together":   "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
-    "cohere":     "command-r-plus",
 }
 # ── Task-specific model routing ──
@@ -57,7 +57,7 @@ _TASK_MODELS = {
         "mistral":    "mistral-medium-latest",
         "deepseek":   "deepseek-chat",
         "together":   "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
-        "cohere":     "command-r-plus",
     },
     "code": {
         "groq":       "llama-3.3-70b-versatile",
@@ -66,7 +66,7 @@ _TASK_MODELS = {
         "mistral":    "mistral-medium-latest",
         "deepseek":   "deepseek-chat",
         "together":   "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
-        "cohere":     "command-r-plus",
     },
     "fast": {
         "groq":       "llama-3.1-8b-instant",
@@ -196,6 +196,8 @@ def call_ai_single(provider_name: str, messages: list, system: str = "",
         messages = [{"role": "system", "content": system}] + messages
     if provider_name == "gemini":
         return _post_gemini(prov["key"], model, messages, max_tokens, prov["timeout"])
     return _post_openai(
         prov["url"], prov["key"], model,
         messages, max_tokens, prov["extra"], prov["timeout"]
@@ -221,6 +223,22 @@ def _post_openai(url, key, model, messages, max_tokens, extra_headers, timeout=6
     return _clean(r.json()["choices"][0]["message"]["content"])
 def _build_chain(task_hint: str) -> list[dict]:
     """Build an ordered provider chain for the given task hint."""
     hint = task_hint if task_hint in _TASK_PRIORITY else "default"
@@ -281,13 +299,15 @@ def call_ai(messages: list, system: str = "", max_tokens: int = 2048,
             logger.debug("Trying %s/%s for task=%s", prov["name"], prov["model"], task_hint)
             if prov["name"] == "gemini":
                 return _post_gemini(prov["key"], prov["model"], messages, max_tokens, prov["timeout"])
             return _post_openai(
                 prov["url"], prov["key"], prov["model"],
                 messages, max_tokens, prov["extra"], prov["timeout"]
             )
         except requests.exceptions.HTTPError as e:
             status = e.response.status_code if e.response is not None else 0
-            if status in (429, 503, 502):
                 logger.debug("Provider %s returned %s, trying next", prov["name"], status)
                 last_exc = e
                 continue

     "openai":     "gpt-4o-mini",
     "deepseek":   "deepseek-chat",
     "together":   "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+    "cohere":     "command-r",
 }
 # ── Task-specific model routing ──
         "mistral":    "mistral-medium-latest",
         "deepseek":   "deepseek-chat",
         "together":   "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+        "cohere":     "command-r",
     },
     "code": {
         "groq":       "llama-3.3-70b-versatile",
         "mistral":    "mistral-medium-latest",
         "deepseek":   "deepseek-chat",
         "together":   "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free",
+        "cohere":     "command-r",
     },
     "fast": {
         "groq":       "llama-3.1-8b-instant",
         messages = [{"role": "system", "content": system}] + messages
     if provider_name == "gemini":
         return _post_gemini(prov["key"], model, messages, max_tokens, prov["timeout"])
+    if provider_name == "cohere":
+        return _post_cohere(prov["key"], model, messages, max_tokens, prov["timeout"])
     return _post_openai(
         prov["url"], prov["key"], model,
         messages, max_tokens, prov["extra"], prov["timeout"]
     return _clean(r.json()["choices"][0]["message"]["content"])
+def _post_cohere(key: str, model: str, messages: list, max_tokens: int, timeout: int = 45) -> str:
+    """Call Cohere V2 Chat API."""
+    headers = {"Authorization": f"Bearer {key}", "Content-Type": "application/json"}
+    r = requests.post("https://api.cohere.com/v2/chat",
+        headers=headers,
+        json={"model": model, "messages": messages, "max_tokens": max_tokens},
+        timeout=timeout)
+    r.raise_for_status()
+    data = r.json()
+    # V2 returns content as list of blocks
+    content = data.get("message", {}).get("content", [])
+    if content and isinstance(content, list):
+        return _clean(content[0].get("text", ""))
+    return _clean(str(data))
 def _build_chain(task_hint: str) -> list[dict]:
     """Build an ordered provider chain for the given task hint."""
     hint = task_hint if task_hint in _TASK_PRIORITY else "default"
             logger.debug("Trying %s/%s for task=%s", prov["name"], prov["model"], task_hint)
             if prov["name"] == "gemini":
                 return _post_gemini(prov["key"], prov["model"], messages, max_tokens, prov["timeout"])
+            if prov["name"] == "cohere":
+                return _post_cohere(prov["key"], prov["model"], messages, max_tokens, prov["timeout"])
             return _post_openai(
                 prov["url"], prov["key"], prov["model"],
                 messages, max_tokens, prov["extra"], prov["timeout"]
             )
         except requests.exceptions.HTTPError as e:
             status = e.response.status_code if e.response is not None else 0
+            if status in (402, 429, 503, 502):
                 logger.debug("Provider %s returned %s, trying next", prov["name"], status)
                 last_exc = e
                 continue