dzezzefezfz commited on
Commit
db2194b
·
verified ·
1 Parent(s): 0fa7058

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -74
app.py CHANGED
@@ -2,7 +2,7 @@
2
 
3
  import os
4
  import json
5
- from typing import Generator, List, Dict, Optional, Any
6
 
7
  import gradio as gr
8
  import httpx
@@ -14,19 +14,11 @@ CODEWORDS_API_KEY = os.environ.get("CODEWORDS_API_KEY")
14
  CODEWORDS_OPENAI_COMPAT_ENDPOINT = "https://openai.codewords.ai/v1/chat/completions"
15
 
16
  HF_TOKEN = os.environ.get("HF_TOKEN")
17
- # Use a public, non-gated default to avoid 400 model_not_found
18
- HF_MODEL = os.environ.get("HF_MODEL", "mistralai/Mistral-7B-Instruct-v0.2")
19
  HF_BASE_URL = os.environ.get("HF_BASE_URL", "https://router.huggingface.co/v1")
20
 
21
  DEFAULT_MODEL = "gpt-4.1-mini"
22
- MODEL_OPTIONS = [
23
- "gpt-5-mini",
24
- "gpt-4.1-mini",
25
- "gpt-5",
26
- "claude-haiku-4-5",
27
- "claude-sonnet-4-5",
28
- "gemini-2.5-flash",
29
- ]
30
 
31
  SYSTEM_PROMPT = "You are a helpful AI assistant. Be concise and accurate."
32
 
@@ -45,46 +37,64 @@ def to_openai_messages(latest_user_message: str, history: Optional[ChatHistory])
45
 
46
  def safe_error_body(resp: httpx.Response) -> str:
47
  try:
48
- raw = resp.read() # must read stream before .text/.json
49
  return raw.decode("utf-8", errors="ignore") if isinstance(raw, (bytes, bytearray)) else str(raw)
50
  except Exception:
51
  return "<unable to read error body>"
52
 
53
- def humanize_hf_error(status: int, body: str) -> str:
54
- """Explain common router errors (why: faster debugging in Spaces)."""
55
- hint = ""
56
  try:
57
  data = json.loads(body)
58
  except Exception:
59
- data = {}
60
- err = (data.get("error") or data.get("message") or data) if isinstance(data, dict) else data
61
 
62
- msg = ""
63
  if isinstance(err, dict):
64
  code = err.get("code") or ""
65
- message = err.get("message") or ""
66
- msg = f"{message} (code={code})".strip()
67
- if code == "model_not_found" or "does not exist" in message:
68
  hint = (
69
- "\n\n• The model id is wrong or not routed.\n"
70
- "• Use a public chat model, e.g.:\n"
71
- " - mistralai/Mistral-7B-Instruct-v0.2\n"
72
- " - HuggingFaceH4/zephyr-7b-beta\n"
73
- " - Qwen/Qwen2.5-7B-Instruct\n"
74
- "• If you need Llama, accept the model license on HF with the **same account as your HF_TOKEN** "
75
- "(e.g. Meta Llama 3.1 is gated)."
76
  )
77
- elif code in {"access_required", "forbidden"} or "access" in message.lower():
78
- hint = (
79
- "\n\n• Your HF_TOKEN lacks access. Accept the model’s license or request access, then re-run.\n"
80
- "• Alternatively switch to a public model as above."
81
- )
82
- elif code in {"rate_limit_exceeded", "too_many_requests"}:
83
- hint = "\n\n• Rate limited. Reduce concurrency or add retry/backoff."
84
- else:
85
- msg = str(err)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- return f"HTTP {status}: {msg}{hint}"
 
 
 
 
88
 
89
  # -----------------------------
90
  # Providers
@@ -94,12 +104,7 @@ def chat_codewords(message: str, history: Optional[ChatHistory], model: str) ->
94
  yield "⚠️ CODEWORDS_API_KEY missing."
95
  return
96
 
97
- payload = {
98
- "model": model,
99
- "messages": to_openai_messages(message, history),
100
- "stream": True,
101
- "max_tokens": 2048,
102
- }
103
 
104
  response_text = ""
105
  try:
@@ -107,14 +112,11 @@ def chat_codewords(message: str, history: Optional[ChatHistory], model: str) ->
107
  with client.stream(
108
  "POST",
109
  CODEWORDS_OPENAI_COMPAT_ENDPOINT,
110
- headers={
111
- "Authorization": f"Bearer {CODEWORDS_API_KEY}",
112
- "Content-Type": "application/json",
113
- },
114
  json=payload,
115
  ) as resp:
116
  if resp.status_code != 200:
117
- yield humanize_hf_error(resp.status_code, safe_error_body(resp))
118
  return
119
  for line in resp.iter_lines():
120
  if not line:
@@ -141,18 +143,14 @@ def chat_codewords(message: str, history: Optional[ChatHistory], model: str) ->
141
  except Exception as e:
142
  yield f"⚠️ Error: {e}"
143
 
144
- def chat_hf_router(message: str, history: Optional[ChatHistory]) -> Generator[str, None, None]:
145
  if not HF_TOKEN:
146
  yield "⚠️ HF_TOKEN missing."
147
  return
148
 
 
149
  url = HF_BASE_URL.rstrip("/") + "/chat/completions"
150
- payload = {
151
- "model": HF_MODEL, # must be accessible to your HF token
152
- "messages": to_openai_messages(message, history),
153
- "stream": True,
154
- "max_tokens": 1024,
155
- }
156
 
157
  response_text = ""
158
  try:
@@ -164,7 +162,7 @@ def chat_hf_router(message: str, history: Optional[ChatHistory]) -> Generator[st
164
  json=payload,
165
  ) as resp:
166
  if resp.status_code != 200:
167
- yield humanize_hf_error(resp.status_code, safe_error_body(resp))
168
  return
169
  for line in resp.iter_lines():
170
  if not line:
@@ -209,11 +207,10 @@ with gr.Blocks(title="Multi-Provider Chat") as demo:
209
  label="Provider",
210
  )
211
  model_choice = gr.Dropdown(choices=MODEL_OPTIONS, value=DEFAULT_MODEL, label="Model (CodeWords only)")
212
- hf_model = gr.Textbox(
213
- value=HF_MODEL,
214
- label="HF Model (router)",
215
- placeholder="e.g. mistralai/Mistral-7B-Instruct-v0.2",
216
- )
217
 
218
  chatbot = gr.Chatbot(label="Chat", height=520, show_label=False)
219
 
@@ -223,36 +220,40 @@ with gr.Blocks(title="Multi-Provider Chat") as demo:
223
  with gr.Row():
224
  clear = gr.Button("Clear Chat")
225
 
 
 
226
  gr.Markdown(
227
  """
228
- **Space secrets**
229
- - `HF_TOKEN` (required for router)
230
- - `HF_MODEL` (default model id; can override in UI)
231
- - Optional: `HF_BASE_URL` (defaults to `https://router.huggingface.co/v1`)
232
- - For CodeWords: `CODEWORDS_API_KEY`
233
  """
234
  )
235
 
236
- def respond(message: str, chat_history: Optional[ChatHistory], prov: str, model: str, hf_model_ui: str):
 
 
 
 
237
  chat_history = chat_history or []
238
  chat_history.append({"role": "user", "content": message})
239
  chat_history.append({"role": "assistant", "content": ""})
240
 
241
- # allow runtime override of router model
242
- global HF_MODEL
243
- HF_MODEL = hf_model_ui.strip() or HF_MODEL
244
 
245
  if prov.startswith("CodeWords"):
246
  gen = chat_codewords(message, chat_history[:-1], model=model)
247
  else:
248
- gen = chat_hf_router(message, chat_history[:-1])
249
 
250
  for partial in gen:
251
  chat_history[-1] = {"role": "assistant", "content": partial}
252
  yield chat_history
253
 
254
- msg.submit(respond, inputs=[msg, chatbot, provider, model_choice, hf_model], outputs=[chatbot])
255
- submit.click(respond, inputs=[msg, chatbot, provider, model_choice, hf_model], outputs=[chatbot])
 
 
256
  clear.click(lambda: [], inputs=None, outputs=chatbot, queue=False)
257
  msg.submit(lambda: "", inputs=None, outputs=msg, queue=False)
258
  submit.click(lambda: "", inputs=None, outputs=msg, queue=False)
 
2
 
3
  import os
4
  import json
5
+ from typing import Generator, List, Dict, Optional, Any, Tuple
6
 
7
  import gradio as gr
8
  import httpx
 
14
  CODEWORDS_OPENAI_COMPAT_ENDPOINT = "https://openai.codewords.ai/v1/chat/completions"
15
 
16
  HF_TOKEN = os.environ.get("HF_TOKEN")
17
+ HF_MODEL = os.environ.get("HF_MODEL", "deepseek-ai/DeepSeek-R1") # safer example; coverage is common, still verify
 
18
  HF_BASE_URL = os.environ.get("HF_BASE_URL", "https://router.huggingface.co/v1")
19
 
20
  DEFAULT_MODEL = "gpt-4.1-mini"
21
+ MODEL_OPTIONS = ["gpt-5-mini", "gpt-4.1-mini", "gpt-5", "claude-haiku-4-5", "claude-sonnet-4-5", "gemini-2.5-flash"]
 
 
 
 
 
 
 
22
 
23
  SYSTEM_PROMPT = "You are a helpful AI assistant. Be concise and accurate."
24
 
 
37
 
38
  def safe_error_body(resp: httpx.Response) -> str:
39
  try:
40
+ raw = resp.read()
41
  return raw.decode("utf-8", errors="ignore") if isinstance(raw, (bytes, bytearray)) else str(raw)
42
  except Exception:
43
  return "<unable to read error body>"
44
 
45
+ def humanize_router_error(status: int, body: str) -> str:
 
 
46
  try:
47
  data = json.loads(body)
48
  except Exception:
49
+ return f"HTTP {status}: {body}"
 
50
 
51
+ err = data.get("error") or data
52
  if isinstance(err, dict):
53
  code = err.get("code") or ""
54
+ message = err.get("message") or str(err)
55
+ hint = ""
56
+ if code in {"model_not_supported", "model_not_found"}:
57
  hint = (
58
+ "\n\nNext steps:\n"
59
+ "• Ensure at least one provider is enabled in your HF Inference Provider settings.\n"
60
+ " Pick a model listed by /v1/models for your token.\n"
61
+ " Try adding a routing suffix (e.g., :fastest / :cheapest) or select a specific provider on a supported model."
 
 
 
62
  )
63
+ return f"HTTP {status}: {message} (code={code}){hint}"
64
+ return f"HTTP {status}: {err}"
65
+
66
+ # -----------------------------
67
+ # Router: list available models for this token
68
+ # -----------------------------
69
+ def list_hf_models() -> Tuple[List[str], str]:
70
+ if not HF_TOKEN:
71
+ return [], "⚠️ HF_TOKEN missing."
72
+ url = HF_BASE_URL.rstrip("/") + "/models"
73
+ try:
74
+ with httpx.Client(timeout=30.0) as client:
75
+ resp = client.get(url, headers={"Authorization": f"Bearer {HF_TOKEN}"})
76
+ if resp.status_code != 200:
77
+ return [], humanize_router_error(resp.status_code, safe_error_body(resp))
78
+ data = resp.json()
79
+ # Expect shape: {"data": [{"id": "repo_id", ...}, ...]} (OpenAI-like)
80
+ ids = []
81
+ if isinstance(data, dict) and isinstance(data.get("data"), list):
82
+ for item in data["data"]:
83
+ mid = item.get("id")
84
+ if isinstance(mid, str):
85
+ ids.append(mid)
86
+ elif isinstance(data, list):
87
+ # Fallback if providers return a flat list
88
+ ids = [m.get("id") for m in data if isinstance(m, dict) and "id" in m]
89
+ return ids, f"Loaded {len(ids)} models."
90
+ except Exception as e:
91
+ return [], f"⚠️ Failed to list models: {e}"
92
 
93
+ def maybe_add_policy_suffix(model_id: str) -> str:
94
+ # If user didn't specify a provider/policy suffix, prefer ':fastest' for better chances.
95
+ if ":" in model_id:
96
+ return model_id
97
+ return f"{model_id}:fastest"
98
 
99
  # -----------------------------
100
  # Providers
 
104
  yield "⚠️ CODEWORDS_API_KEY missing."
105
  return
106
 
107
+ payload = {"model": model, "messages": to_openai_messages(message, history), "stream": True, "max_tokens": 2048}
 
 
 
 
 
108
 
109
  response_text = ""
110
  try:
 
112
  with client.stream(
113
  "POST",
114
  CODEWORDS_OPENAI_COMPAT_ENDPOINT,
115
+ headers={"Authorization": f"Bearer {CODEWORDS_API_KEY}", "Content-Type": "application/json"},
 
 
 
116
  json=payload,
117
  ) as resp:
118
  if resp.status_code != 200:
119
+ yield humanize_router_error(resp.status_code, safe_error_body(resp))
120
  return
121
  for line in resp.iter_lines():
122
  if not line:
 
143
  except Exception as e:
144
  yield f"⚠️ Error: {e}"
145
 
146
+ def chat_hf_router(message: str, history: Optional[ChatHistory], model_override: Optional[str] = None) -> Generator[str, None, None]:
147
  if not HF_TOKEN:
148
  yield "⚠️ HF_TOKEN missing."
149
  return
150
 
151
+ model_id = maybe_add_policy_suffix((model_override or HF_MODEL).strip())
152
  url = HF_BASE_URL.rstrip("/") + "/chat/completions"
153
+ payload = {"model": model_id, "messages": to_openai_messages(message, history), "stream": True, "max_tokens": 1024}
 
 
 
 
 
154
 
155
  response_text = ""
156
  try:
 
162
  json=payload,
163
  ) as resp:
164
  if resp.status_code != 200:
165
+ yield humanize_router_error(resp.status_code, safe_error_body(resp))
166
  return
167
  for line in resp.iter_lines():
168
  if not line:
 
207
  label="Provider",
208
  )
209
  model_choice = gr.Dropdown(choices=MODEL_OPTIONS, value=DEFAULT_MODEL, label="Model (CodeWords only)")
210
+ with gr.Row():
211
+ hf_model = gr.Textbox(value=HF_MODEL, label="HF Model (router)", placeholder="e.g. deepseek-ai/DeepSeek-R1")
212
+ refresh_models = gr.Button("Refresh models from HF Router")
213
+ available_models = gr.Dropdown(choices=[], value=None, label="Available models (token-scoped)", interactive=True)
 
214
 
215
  chatbot = gr.Chatbot(label="Chat", height=520, show_label=False)
216
 
 
220
  with gr.Row():
221
  clear = gr.Button("Clear Chat")
222
 
223
+ info = gr.Markdown("")
224
+
225
  gr.Markdown(
226
  """
227
+ **Tips**
228
+ - If you see `model_not_supported`, enable at least one provider in your HF account and use a model from the **Available models** list.
229
+ - You can add `:fastest` or `:cheapest` to prefer routing policies.
 
 
230
  """
231
  )
232
 
233
+ def do_refresh_models() -> Tuple[List[str], str]:
234
+ models, msg_text = list_hf_models()
235
+ return models, msg_text
236
+
237
+ def respond(message: str, chat_history: Optional[ChatHistory], prov: str, model: str, hf_model_ui: str, selected_model: Optional[str]):
238
  chat_history = chat_history or []
239
  chat_history.append({"role": "user", "content": message})
240
  chat_history.append({"role": "assistant", "content": ""})
241
 
242
+ target_model = (selected_model or hf_model_ui or HF_MODEL).strip()
 
 
243
 
244
  if prov.startswith("CodeWords"):
245
  gen = chat_codewords(message, chat_history[:-1], model=model)
246
  else:
247
+ gen = chat_hf_router(message, chat_history[:-1], model_override=target_model)
248
 
249
  for partial in gen:
250
  chat_history[-1] = {"role": "assistant", "content": partial}
251
  yield chat_history
252
 
253
+ # Wiring
254
+ refresh_models.click(do_refresh_models, inputs=None, outputs=[available_models, info])
255
+ msg.submit(respond, inputs=[msg, chatbot, provider, model_choice, hf_model, available_models], outputs=[chatbot])
256
+ submit.click(respond, inputs=[msg, chatbot, provider, model_choice, hf_model, available_models], outputs=[chatbot])
257
  clear.click(lambda: [], inputs=None, outputs=chatbot, queue=False)
258
  msg.submit(lambda: "", inputs=None, outputs=msg, queue=False)
259
  submit.click(lambda: "", inputs=None, outputs=msg, queue=False)