Spaces:

dzezzefezfz
/

azertyui

Sleeping

App Files Files Community

dzezzefezfz commited on Dec 17, 2025

Commit

db2194b

verified ·

1 Parent(s): 0fa7058

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -74

app.py CHANGED Viewed

@@ -2,7 +2,7 @@
 import os
 import json
-from typing import Generator, List, Dict, Optional, Any
 import gradio as gr
 import httpx
@@ -14,19 +14,11 @@ CODEWORDS_API_KEY = os.environ.get("CODEWORDS_API_KEY")
 CODEWORDS_OPENAI_COMPAT_ENDPOINT = "https://openai.codewords.ai/v1/chat/completions"
 HF_TOKEN = os.environ.get("HF_TOKEN")
-# Use a public, non-gated default to avoid 400 model_not_found
-HF_MODEL = os.environ.get("HF_MODEL", "mistralai/Mistral-7B-Instruct-v0.2")
 HF_BASE_URL = os.environ.get("HF_BASE_URL", "https://router.huggingface.co/v1")
 DEFAULT_MODEL = "gpt-4.1-mini"
-MODEL_OPTIONS = [
-    "gpt-5-mini",
-    "gpt-4.1-mini",
-    "gpt-5",
-    "claude-haiku-4-5",
-    "claude-sonnet-4-5",
-    "gemini-2.5-flash",
-]
 SYSTEM_PROMPT = "You are a helpful AI assistant. Be concise and accurate."
@@ -45,46 +37,64 @@ def to_openai_messages(latest_user_message: str, history: Optional[ChatHistory])
 def safe_error_body(resp: httpx.Response) -> str:
     try:
-        raw = resp.read()  # must read stream before .text/.json
         return raw.decode("utf-8", errors="ignore") if isinstance(raw, (bytes, bytearray)) else str(raw)
     except Exception:
         return "<unable to read error body>"
-def humanize_hf_error(status: int, body: str) -> str:
-    """Explain common router errors (why: faster debugging in Spaces)."""
-    hint = ""
     try:
         data = json.loads(body)
     except Exception:
-        data = {}
-    err = (data.get("error") or data.get("message") or data) if isinstance(data, dict) else data
-    msg = ""
     if isinstance(err, dict):
         code = err.get("code") or ""
-        message = err.get("message") or ""
-        msg = f"{message} (code={code})".strip()
-        if code == "model_not_found" or "does not exist" in message:
             hint = (
-                "\n\n• The model id is wrong or not routed.\n"
-                "• Use a public chat model, e.g.:\n"
-                "  - mistralai/Mistral-7B-Instruct-v0.2\n"
-                "  - HuggingFaceH4/zephyr-7b-beta\n"
-                "  - Qwen/Qwen2.5-7B-Instruct\n"
-                "• If you need Llama, accept the model license on HF with the **same account as your HF_TOKEN** "
-                "(e.g. Meta Llama 3.1 is gated)."
             )
-        elif code in {"access_required", "forbidden"} or "access" in message.lower():
-            hint = (
-                "\n\n• Your HF_TOKEN lacks access. Accept the model’s license or request access, then re-run.\n"
-                "• Alternatively switch to a public model as above."
-            )
-        elif code in {"rate_limit_exceeded", "too_many_requests"}:
-            hint = "\n\n• Rate limited. Reduce concurrency or add retry/backoff."
-    else:
-        msg = str(err)
-    return f"HTTP {status}: {msg}{hint}"
 # -----------------------------
 # Providers
@@ -94,12 +104,7 @@ def chat_codewords(message: str, history: Optional[ChatHistory], model: str) ->
         yield "⚠️ CODEWORDS_API_KEY missing."
         return
-    payload = {
-        "model": model,
-        "messages": to_openai_messages(message, history),
-        "stream": True,
-        "max_tokens": 2048,
-    }
     response_text = ""
     try:
@@ -107,14 +112,11 @@ def chat_codewords(message: str, history: Optional[ChatHistory], model: str) ->
             with client.stream(
                 "POST",
                 CODEWORDS_OPENAI_COMPAT_ENDPOINT,
-                headers={
-                    "Authorization": f"Bearer {CODEWORDS_API_KEY}",
-                    "Content-Type": "application/json",
-                },
                 json=payload,
             ) as resp:
                 if resp.status_code != 200:
-                    yield humanize_hf_error(resp.status_code, safe_error_body(resp))
                     return
                 for line in resp.iter_lines():
                     if not line:
@@ -141,18 +143,14 @@ def chat_codewords(message: str, history: Optional[ChatHistory], model: str) ->
     except Exception as e:
         yield f"⚠️ Error: {e}"
-def chat_hf_router(message: str, history: Optional[ChatHistory]) -> Generator[str, None, None]:
     if not HF_TOKEN:
         yield "⚠️ HF_TOKEN missing."
         return
     url = HF_BASE_URL.rstrip("/") + "/chat/completions"
-    payload = {
-        "model": HF_MODEL,  # must be accessible to your HF token
-        "messages": to_openai_messages(message, history),
-        "stream": True,
-        "max_tokens": 1024,
-    }
     response_text = ""
     try:
@@ -164,7 +162,7 @@ def chat_hf_router(message: str, history: Optional[ChatHistory]) -> Generator[st
                 json=payload,
             ) as resp:
                 if resp.status_code != 200:
-                    yield humanize_hf_error(resp.status_code, safe_error_body(resp))
                     return
                 for line in resp.iter_lines():
                     if not line:
@@ -209,11 +207,10 @@ with gr.Blocks(title="Multi-Provider Chat") as demo:
             label="Provider",
         )
         model_choice = gr.Dropdown(choices=MODEL_OPTIONS, value=DEFAULT_MODEL, label="Model (CodeWords only)")
-        hf_model = gr.Textbox(
-            value=HF_MODEL,
-            label="HF Model (router)",
-            placeholder="e.g. mistralai/Mistral-7B-Instruct-v0.2",
-        )
     chatbot = gr.Chatbot(label="Chat", height=520, show_label=False)
@@ -223,36 +220,40 @@ with gr.Blocks(title="Multi-Provider Chat") as demo:
     with gr.Row():
         clear = gr.Button("Clear Chat")
     gr.Markdown(
         """
-**Space secrets**
-- `HF_TOKEN` (required for router)
-- `HF_MODEL` (default model id; can override in UI)
-- Optional: `HF_BASE_URL` (defaults to `https://router.huggingface.co/v1`)
-- For CodeWords: `CODEWORDS_API_KEY`
 """
     )
-    def respond(message: str, chat_history: Optional[ChatHistory], prov: str, model: str, hf_model_ui: str):
         chat_history = chat_history or []
         chat_history.append({"role": "user", "content": message})
         chat_history.append({"role": "assistant", "content": ""})
-        # allow runtime override of router model
-        global HF_MODEL
-        HF_MODEL = hf_model_ui.strip() or HF_MODEL
         if prov.startswith("CodeWords"):
             gen = chat_codewords(message, chat_history[:-1], model=model)
         else:
-            gen = chat_hf_router(message, chat_history[:-1])
         for partial in gen:
             chat_history[-1] = {"role": "assistant", "content": partial}
             yield chat_history
-    msg.submit(respond, inputs=[msg, chatbot, provider, model_choice, hf_model], outputs=[chatbot])
-    submit.click(respond, inputs=[msg, chatbot, provider, model_choice, hf_model], outputs=[chatbot])
     clear.click(lambda: [], inputs=None, outputs=chatbot, queue=False)
     msg.submit(lambda: "", inputs=None, outputs=msg, queue=False)
     submit.click(lambda: "", inputs=None, outputs=msg, queue=False)

 import os
 import json
+from typing import Generator, List, Dict, Optional, Any, Tuple
 import gradio as gr
 import httpx
 CODEWORDS_OPENAI_COMPAT_ENDPOINT = "https://openai.codewords.ai/v1/chat/completions"
 HF_TOKEN = os.environ.get("HF_TOKEN")
+HF_MODEL = os.environ.get("HF_MODEL", "deepseek-ai/DeepSeek-R1")  # safer example; coverage is common, still verify
 HF_BASE_URL = os.environ.get("HF_BASE_URL", "https://router.huggingface.co/v1")
 DEFAULT_MODEL = "gpt-4.1-mini"
+MODEL_OPTIONS = ["gpt-5-mini", "gpt-4.1-mini", "gpt-5", "claude-haiku-4-5", "claude-sonnet-4-5", "gemini-2.5-flash"]
 SYSTEM_PROMPT = "You are a helpful AI assistant. Be concise and accurate."
 def safe_error_body(resp: httpx.Response) -> str:
     try:
+        raw = resp.read()
         return raw.decode("utf-8", errors="ignore") if isinstance(raw, (bytes, bytearray)) else str(raw)
     except Exception:
         return "<unable to read error body>"
+def humanize_router_error(status: int, body: str) -> str:
     try:
         data = json.loads(body)
     except Exception:
+        return f"HTTP {status}: {body}"
+    err = data.get("error") or data
     if isinstance(err, dict):
         code = err.get("code") or ""
+        message = err.get("message") or str(err)
+        hint = ""
+        if code in {"model_not_supported", "model_not_found"}:
             hint = (
+                "\n\nNext steps:\n"
+                "• Ensure at least one provider is enabled in your HF Inference Provider settings.\n"
+                "• Pick a model listed by /v1/models for your token.\n"
+                "• Try adding a routing suffix (e.g., :fastest / :cheapest) or select a specific provider on a supported model."
             )
+        return f"HTTP {status}: {message} (code={code}){hint}"
+    return f"HTTP {status}: {err}"
+# -----------------------------
+# Router: list available models for this token
+# -----------------------------
+def list_hf_models() -> Tuple[List[str], str]:
+    if not HF_TOKEN:
+        return [], "⚠️ HF_TOKEN missing."
+    url = HF_BASE_URL.rstrip("/") + "/models"
+    try:
+        with httpx.Client(timeout=30.0) as client:
+            resp = client.get(url, headers={"Authorization": f"Bearer {HF_TOKEN}"})
+        if resp.status_code != 200:
+            return [], humanize_router_error(resp.status_code, safe_error_body(resp))
+        data = resp.json()
+        # Expect shape: {"data": [{"id": "repo_id", ...}, ...]} (OpenAI-like)
+        ids = []
+        if isinstance(data, dict) and isinstance(data.get("data"), list):
+            for item in data["data"]:
+                mid = item.get("id")
+                if isinstance(mid, str):
+                    ids.append(mid)
+        elif isinstance(data, list):
+            # Fallback if providers return a flat list
+            ids = [m.get("id") for m in data if isinstance(m, dict) and "id" in m]
+        return ids, f"Loaded {len(ids)} models."
+    except Exception as e:
+        return [], f"⚠️ Failed to list models: {e}"
+def maybe_add_policy_suffix(model_id: str) -> str:
+    # If user didn't specify a provider/policy suffix, prefer ':fastest' for better chances.
+    if ":" in model_id:
+        return model_id
+    return f"{model_id}:fastest"
 # -----------------------------
 # Providers
         yield "⚠️ CODEWORDS_API_KEY missing."
         return
+    payload = {"model": model, "messages": to_openai_messages(message, history), "stream": True, "max_tokens": 2048}
     response_text = ""
     try:
             with client.stream(
                 "POST",
                 CODEWORDS_OPENAI_COMPAT_ENDPOINT,
+                headers={"Authorization": f"Bearer {CODEWORDS_API_KEY}", "Content-Type": "application/json"},
                 json=payload,
             ) as resp:
                 if resp.status_code != 200:
+                    yield humanize_router_error(resp.status_code, safe_error_body(resp))
                     return
                 for line in resp.iter_lines():
                     if not line:
     except Exception as e:
         yield f"⚠️ Error: {e}"
+def chat_hf_router(message: str, history: Optional[ChatHistory], model_override: Optional[str] = None) -> Generator[str, None, None]:
     if not HF_TOKEN:
         yield "⚠️ HF_TOKEN missing."
         return
+    model_id = maybe_add_policy_suffix((model_override or HF_MODEL).strip())
     url = HF_BASE_URL.rstrip("/") + "/chat/completions"
+    payload = {"model": model_id, "messages": to_openai_messages(message, history), "stream": True, "max_tokens": 1024}
     response_text = ""
     try:
                 json=payload,
             ) as resp:
                 if resp.status_code != 200:
+                    yield humanize_router_error(resp.status_code, safe_error_body(resp))
                     return
                 for line in resp.iter_lines():
                     if not line:
             label="Provider",
         )
         model_choice = gr.Dropdown(choices=MODEL_OPTIONS, value=DEFAULT_MODEL, label="Model (CodeWords only)")
+    with gr.Row():
+        hf_model = gr.Textbox(value=HF_MODEL, label="HF Model (router)", placeholder="e.g. deepseek-ai/DeepSeek-R1")
+        refresh_models = gr.Button("Refresh models from HF Router")
+    available_models = gr.Dropdown(choices=[], value=None, label="Available models (token-scoped)", interactive=True)
     chatbot = gr.Chatbot(label="Chat", height=520, show_label=False)
     with gr.Row():
         clear = gr.Button("Clear Chat")
+    info = gr.Markdown("")
     gr.Markdown(
         """
+**Tips**
+- If you see `model_not_supported`, enable at least one provider in your HF account and use a model from the **Available models** list.
+- You can add `:fastest` or `:cheapest` to prefer routing policies.
 """
     )
+    def do_refresh_models() -> Tuple[List[str], str]:
+        models, msg_text = list_hf_models()
+        return models, msg_text
+    def respond(message: str, chat_history: Optional[ChatHistory], prov: str, model: str, hf_model_ui: str, selected_model: Optional[str]):
         chat_history = chat_history or []
         chat_history.append({"role": "user", "content": message})
         chat_history.append({"role": "assistant", "content": ""})
+        target_model = (selected_model or hf_model_ui or HF_MODEL).strip()
         if prov.startswith("CodeWords"):
             gen = chat_codewords(message, chat_history[:-1], model=model)
         else:
+            gen = chat_hf_router(message, chat_history[:-1], model_override=target_model)
         for partial in gen:
             chat_history[-1] = {"role": "assistant", "content": partial}
             yield chat_history
+    # Wiring
+    refresh_models.click(do_refresh_models, inputs=None, outputs=[available_models, info])
+    msg.submit(respond, inputs=[msg, chatbot, provider, model_choice, hf_model, available_models], outputs=[chatbot])
+    submit.click(respond, inputs=[msg, chatbot, provider, model_choice, hf_model, available_models], outputs=[chatbot])
     clear.click(lambda: [], inputs=None, outputs=chatbot, queue=False)
     msg.submit(lambda: "", inputs=None, outputs=msg, queue=False)
     submit.click(lambda: "", inputs=None, outputs=msg, queue=False)