cntalk commited on
Commit
bb3d2f3
·
verified ·
1 Parent(s): 57e1d20

Update: Switch to OpenRouter API with free models

Browse files
Files changed (1) hide show
  1. app.py +158 -128
app.py CHANGED
@@ -1,132 +1,159 @@
1
  """
2
- Hermes HF Space - Multi-Model AI Hub
3
- 多模型对比助手:同时调多个开源模型,对比回答质量
4
  """
5
 
6
  import gradio as gr
7
  import os
8
- from typing import Optional
9
  import time
 
10
 
11
- # HF Inference API (免费 tier)
12
- HF_TOKEN = os.getenv("HF_TOKEN", "")
13
- if not HF_TOKEN:
14
- # 尝试从环境变量读取,token 在 secrets 中配置
15
- pass
16
-
17
- def call_hf_inference(model_id: str, prompt: str, max_tokens: int = 256) -> str:
18
- """调用 HF Inference API"""
19
- import requests
20
-
21
- headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
22
-
23
- # 不同模型的 API 格式
24
- if "mistral" in model_id.lower() or "llama" in model_id.lower() or "qwen" in model_id.lower():
25
- # Chat models
26
- api_url = f"https://router.huggingface.co/hf-inference/models/{model_id}"
27
- payload = {
28
- "inputs": prompt,
29
- "parameters": {"max_new_tokens": max_tokens, "return_full_text": False}
30
- }
31
- else:
32
- # Text generation models
33
- api_url = f"https://router.huggingface.co/hf-inference/models/{model_id}"
34
- payload = {
35
- "inputs": prompt,
36
- "parameters": {"max_new_tokens": max_tokens}
37
- }
38
-
39
- try:
40
- resp = requests.post(api_url, json=payload, headers=headers, timeout=60)
41
- if resp.status_code == 200:
42
- result = resp.json()
43
- if isinstance(result, list) and len(result) > 0:
44
- return result[0].get("generated_text", str(result[0]))
45
- return str(result)
46
- elif resp.status_code == 429:
47
- return "⚠️ Rate limit exceeded. Please wait a moment."
48
- elif resp.status_code == 403:
49
- return "⚠️ Model requires additional permissions. Visit the model page to accept terms."
50
- else:
51
- return f"⚠️ Error {resp.status_code}: {resp.text[:200]}"
52
- except Exception as e:
53
- return f"⚠️ Request failed: {str(e)[:100]}"
54
-
55
- def chat_with_model(model_id: str, prompt: str, system: str = "") -> str:
56
- """带系统提示的对话"""
57
- full_prompt = f"{system}\n\nUser: {prompt}\n\nAssistant:" if system else prompt
58
- return call_hf_inference(model_id, full_prompt, max_tokens=384)
59
 
60
- # 预设模型列表(免费 tier 可用的优质小模型)
61
  MODELS = {
62
- "🦙 Llama 3.2-3B (Instruct)": "meta-llama/Llama-3.2-3B-Instruct",
63
- "🔮 Qwen2.5-7B (Instruct)": "Qwen/Qwen2.5-7B-Instruct",
64
- "🤖 Gemma 2-2B (Instruct)": "google/gemma-2-2b-it",
65
- "⚡ Mistral-7B (Instruct)": "mistralai/Mistral-7B-Instruct-v0.3",
66
- "🌟 StarCoder2-3B": "bigcode/starcoder2-3b",
67
- "💬 ChatGLM3-6B": "THUDM/chatglm3-6b",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  }
69
 
70
  SYSTEM_PROMPTS = {
71
  "Default": "",
72
  "Code Assistant": "You are an expert programmer. Write clean, efficient code with brief explanations.",
73
- " 中文助手": "你是一个有帮助的中文AI助手,用简洁清晰的语言回答。",
74
  "Summarizer": "You are a text summarization expert. Provide concise, accurate summaries.",
75
  "Creative Writer": "You are a creative writer. Write engaging, imaginative content.",
76
  }
77
 
78
- def format_response(model_name: str, response: str, elapsed: float) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  emoji = "✅" if not response.startswith("⚠️") else "⚠️"
80
- return f"{emoji} **{model_name}** ({elapsed:.1f}s)\n{response}\n"
 
81
 
82
  def compare_models(user_input: str, model_keys: list, system_key: str = "Default"):
83
  """对比多个模型的回答"""
84
  if not user_input.strip():
85
  return "⚠️ Please enter a message."
86
-
87
  system = SYSTEM_PROMPTS.get(system_key, "")
88
  results = []
89
-
90
  for key in model_keys:
91
- model_id = MODELS.get(key)
92
- if not model_id:
93
  continue
94
-
95
- start = time.time()
96
- response = chat_with_model(model_id, user_input, system)
97
- elapsed = time.time() - start
98
-
99
- results.append(format_response(key, response, elapsed))
100
-
101
  if not results:
102
  return "⚠️ Please select at least one model."
103
-
104
  return "\n---\n".join(results)
105
 
 
106
  def single_chat(model_key: str, user_input: str, system_key: str, history: list):
107
  """单模型对话(带历史)"""
108
  if not user_input.strip():
109
  return history, ""
110
-
111
- model_id = MODELS.get(model_key, "")
 
112
  system = SYSTEM_PROMPTS.get(system_key, "")
113
-
114
- start = time.time()
115
- response = chat_with_model(model_id, user_input, system)
116
- elapsed = time.time() - start
117
-
118
- history.append((user_input, f"{response}\n\n⏱️ Response time: {elapsed:.1f}s"))
 
 
 
 
119
  return history, ""
120
 
 
121
  # Gradio UI
122
- with gr.Blocks(title="Hermes HF Hub", theme=gr.themes.Soft()) as demo:
123
  gr.Markdown("""
124
- # 🐠 Hermes HF Hub
125
  ### 多模型 AI 助手 — 同时对比多个开源模型的回答
126
-
127
- 支持模型: Llama 3.2 / Qwen2.5 / Gemma 2 / Mistral 7B / StarCoder2 / ChatGLM3
 
 
128
  """)
129
-
130
  with gr.Tabs():
131
  with gr.TabItem("🔍 模型对比"):
132
  with gr.Row():
@@ -134,51 +161,52 @@ with gr.Blocks(title="Hermes HF Hub", theme=gr.themes.Soft()) as demo:
134
  user_input = gr.Textbox(
135
  label="✏️ 输入问题",
136
  placeholder="例如: 解释一下什么是transformer架构",
137
- lines=4
138
  )
139
  with gr.Row():
140
  system_dropdown = gr.Dropdown(
141
  choices=list(SYSTEM_PROMPTS.keys()),
142
  value="Default",
143
- label="系统提示"
144
  )
145
- compare_btn = gr.Button("🚀 对比所有模型", variant="primary")
146
-
147
  gr.Markdown("**选择要对比的模型:**")
148
  model_checkboxes = gr.CheckboxGroup(
149
- choices=list(MODELS.keys()),
150
- value=[list(MODELS.keys())[0], list(MODELS.keys())[1]],
151
- interactive=True
152
  )
153
-
154
  with gr.Column(scale=3):
155
  output = gr.Markdown("""
156
- *选择模型后点击「对比所有模型」开始分析*
157
-
158
  每个模型独立回答,可对比:
159
  - 回答质量与风格
160
  - 响应速度
161
- - 对中文/英文/代码的处理能力
 
 
162
  """)
163
-
164
  compare_btn.click(
165
  fn=compare_models,
166
  inputs=[user_input, model_checkboxes, system_dropdown],
167
- outputs=output
168
  )
169
-
170
  with gr.TabItem("💬 单模型对话"):
171
  with gr.Row():
172
  with gr.Column(scale=1):
173
  model_select = gr.Dropdown(
174
- choices=list(MODELS.keys()),
175
- value=list(MODELS.keys())[0],
176
- label="选择模型"
177
  )
178
  system_s = gr.Dropdown(
179
  choices=list(SYSTEM_PROMPTS.keys()),
180
  value="Default",
181
- label="系统提示"
182
  )
183
  with gr.Column(scale=3):
184
  chat_history = gr.Chatbot(label="对话历史", height=400)
@@ -186,38 +214,40 @@ with gr.Blocks(title="Hermes HF Hub", theme=gr.themes.Soft()) as demo:
186
  msg_input = gr.Textbox(
187
  placeholder="输入消息...",
188
  scale=4,
189
- lines=2
190
  )
191
  send_btn = gr.Button("发送", variant="primary", scale=1)
192
-
193
  def on_send(msg, history):
194
  return single_chat(model_select.value, msg, system_s.value, history)
195
-
196
- send_btn.click(fn=on_send, inputs=[msg_input, chat_history], outputs=[chat_history, msg_input])
197
- msg_input.submit(fn=on_send, inputs=[msg_input, chat_history], outputs=[chat_history, msg_input])
198
-
 
 
 
 
199
  with gr.TabItem("ℹ️ 关于"):
200
  gr.Markdown("""
201
- ## 🐠 Hermes HF Hub
202
-
203
  **功能:**
204
- - 🔍 多模型对比:一次提问,同时获得 6 个模型的回答
205
  - 💬 单模型对话:深入对话某一特定模型
206
- - 🌐 中英文支持:多语言模型覆盖
207
-
208
- **支持的模型:**
209
- | 模型 | 参数量 | 特点 |
210
- |------|--------|------|
211
- | Llama 3.2 | 3B | 高质量对话,资源友好 |
212
- | Qwen2.5 | 7B | 强项,知识面广 |
213
- | Gemma 2 | 2B | 轻量快速 |
214
- | Mistral 7B | 7B | 欧洲劲旅效果出色 |
215
- | StarCoder2 | 3B | 代码专用 |
216
- | ChatGLM3 | 6B | 中文原生推理快 |
217
-
218
- **限制:** 免费 tier 有速率限制,高频使用请考虑升级。
219
-
220
- Powered by [Hugging Face Inference API](https://huggingface.co/inference-endpoints)
221
  """)
222
 
223
  # 启动
 
1
  """
2
+ Hermes HF Space - Multi-Model AI Hub (OpenRouter Edition)
3
+ 多模型对比助手:使 OpenRouter 免费模型
4
  """
5
 
6
  import gradio as gr
7
  import os
 
8
  import time
9
+ import requests
10
 
11
+ # OpenRouter 配置
12
+ OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")
13
+ OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1/chat/completions"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # 预设模型列表(OpenRouter 免费模型)
16
  MODELS = {
17
+ "🦙 Llama 3.3 70B (free)": {
18
+ "id": "meta-llama/llama-3.3-70b-instruct:free",
19
+ "name": "🦙 Llama 3.3 70B",
20
+ "context": "66K",
21
+ },
22
+ "🤖 NVIDIA Nemotron 120B (free)": {
23
+ "id": "nvidia/nemotron-3-super-120b-a12b:free",
24
+ "name": "🤖 NVIDIA Nemotron 120B",
25
+ "context": "1M",
26
+ },
27
+ "🧠 Nous Hermes 3 405B (free)": {
28
+ "id": "nousresearch/hermes-3-llama-3.1-405b:free",
29
+ "name": "🧠 Nous Hermes 3 405B",
30
+ "context": "128K",
31
+ },
32
+ "🔧 CoBuddy Code (free)": {
33
+ "id": "baidu/cobuddy:free",
34
+ "name": "🔧 CoBuddy (百度代码模型)",
35
+ "context": "131K",
36
+ },
37
  }
38
 
39
  SYSTEM_PROMPTS = {
40
  "Default": "",
41
  "Code Assistant": "You are an expert programmer. Write clean, efficient code with brief explanations.",
42
+ "中文助手": "你是一个有帮助的中文AI助手,用简洁清晰的语言回答。",
43
  "Summarizer": "You are a text summarization expert. Provide concise, accurate summaries.",
44
  "Creative Writer": "You are a creative writer. Write engaging, imaginative content.",
45
  }
46
 
47
+
48
+ def call_openrouter(model_id: str, prompt: str, max_tokens: int = 384, system: str = "") -> tuple[str, float]:
49
+ """调用 OpenRouter API"""
50
+ if not OPENROUTER_API_KEY:
51
+ return "⚠️ API key not configured. Please set OPENROUTER_API_KEY in Space secrets.", 0.0
52
+
53
+ headers = {
54
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
55
+ "Content-Type": "application/json",
56
+ "HTTP-Referer": "https://cntalk-hermes.hf.space",
57
+ "X-Title": "Hermes OpenRouter Hub",
58
+ }
59
+
60
+ messages = []
61
+ if system:
62
+ messages.append({"role": "system", "content": system})
63
+ messages.append({"role": "user", "content": prompt})
64
+
65
+ payload = {
66
+ "model": model_id,
67
+ "messages": messages,
68
+ "max_tokens": max_tokens,
69
+ "temperature": 0.7,
70
+ }
71
+
72
+ try:
73
+ start = time.time()
74
+ resp = requests.post(
75
+ OPENROUTER_BASE_URL,
76
+ headers=headers,
77
+ json=payload,
78
+ timeout=90,
79
+ )
80
+ elapsed = time.time() - start
81
+
82
+ if resp.status_code == 200:
83
+ result = resp.json()
84
+ content = result["choices"][0]["message"]["content"]
85
+ return content, elapsed
86
+ elif resp.status_code == 429:
87
+ return "⚠️ Rate limit exceeded. Please wait a moment or try a different model.", elapsed
88
+ else:
89
+ error_msg = resp.json().get("error", {}).get("message", resp.text[:150])
90
+ return f"⚠️ Error {resp.status_code}: {error_msg}", elapsed
91
+ except Exception as e:
92
+ return f"⚠️ Request failed: {str(e)[:100]}", 0.0
93
+
94
+
95
+ def format_response(model_name: str, context: str, response: str, elapsed: float) -> str:
96
  emoji = "✅" if not response.startswith("⚠️") else "⚠️"
97
+ return f"{emoji} **{model_name}** [ctx:{context}] ({elapsed:.1f}s)\n{response}\n"
98
+
99
 
100
  def compare_models(user_input: str, model_keys: list, system_key: str = "Default"):
101
  """对比多个模型的回答"""
102
  if not user_input.strip():
103
  return "⚠️ Please enter a message."
104
+
105
  system = SYSTEM_PROMPTS.get(system_key, "")
106
  results = []
107
+
108
  for key in model_keys:
109
+ model = MODELS.get(key)
110
+ if not model:
111
  continue
112
+ model_id = model["id"]
113
+ model_name = model["name"]
114
+ model_context = model["context"]
115
+
116
+ response, elapsed = call_openrouter(model_id, user_input, system=system)
117
+ results.append(format_response(model_name, model_context, response, elapsed))
118
+
119
  if not results:
120
  return "⚠️ Please select at least one model."
121
+
122
  return "\n---\n".join(results)
123
 
124
+
125
  def single_chat(model_key: str, user_input: str, system_key: str, history: list):
126
  """单模型对话(带历史)"""
127
  if not user_input.strip():
128
  return history, ""
129
+
130
+ model = MODELS.get(model_key, {})
131
+ model_id = model.get("id", "")
132
  system = SYSTEM_PROMPTS.get(system_key, "")
133
+
134
+ # 构建带历史的 prompt
135
+ prompt = ""
136
+ for h_user, h_bot in history:
137
+ prompt += f"User: {h_user}\nAssistant: {h_bot}\n"
138
+ prompt += f"User: {user_input}"
139
+
140
+ response, elapsed = call_openrouter(model_id, prompt, system=system)
141
+
142
+ history.append((user_input, f"{response}\n\n⏱️ {elapsed:.1f}s"))
143
  return history, ""
144
 
145
+
146
  # Gradio UI
147
+ with gr.Blocks(title="Hermes OpenRouter Hub", theme=gr.themes.Soft()) as demo:
148
  gr.Markdown("""
149
+ # 🐠 Hermes OpenRouter Hub
150
  ### 多模型 AI 助手 — 同时对比多个开源模型的回答
151
+
152
+ 基于 OpenRouter API,支持 Llama 3.3 / Nemotron / Hermes 3 等免费模型
153
+
154
+ ⚠️ 免费模型有速率限制,如遇报错稍后重试即可
155
  """)
156
+
157
  with gr.Tabs():
158
  with gr.TabItem("🔍 模型对比"):
159
  with gr.Row():
 
161
  user_input = gr.Textbox(
162
  label="✏️ 输入问题",
163
  placeholder="例如: 解释一下什么是transformer架构",
164
+ lines=4,
165
  )
166
  with gr.Row():
167
  system_dropdown = gr.Dropdown(
168
  choices=list(SYSTEM_PROMPTS.keys()),
169
  value="Default",
170
+ label="系统提示",
171
  )
172
+ compare_btn = gr.Button("🚀 对比模型", variant="primary")
173
+
174
  gr.Markdown("**选择要对比的模型:**")
175
  model_checkboxes = gr.CheckboxGroup(
176
+ choices=[(v["name"], k) for k, v in MODELS.items()],
177
+ value=["🦙 Llama 3.3 70B (free)", "🤖 NVIDIA Nemotron 120B (free)"],
178
+ interactive=True,
179
  )
 
180
  with gr.Column(scale=3):
181
  output = gr.Markdown("""
182
+ *选择模型后点击「对比模型」开始分析*
183
+
184
  每个模型独立回答,可对比:
185
  - 回答质量与风格
186
  - 响应速度
187
+ - Context 长度差异
188
+
189
+ 💡 免费模型有并发限制,高频使用建议错峰
190
  """)
191
+
192
  compare_btn.click(
193
  fn=compare_models,
194
  inputs=[user_input, model_checkboxes, system_dropdown],
195
+ outputs=output,
196
  )
197
+
198
  with gr.TabItem("💬 单模型对话"):
199
  with gr.Row():
200
  with gr.Column(scale=1):
201
  model_select = gr.Dropdown(
202
+ choices=[(v["name"], k) for k, v in MODELS.items()],
203
+ value="🦙 Llama 3.3 70B (free)",
204
+ label="选择模型",
205
  )
206
  system_s = gr.Dropdown(
207
  choices=list(SYSTEM_PROMPTS.keys()),
208
  value="Default",
209
+ label="系统提示",
210
  )
211
  with gr.Column(scale=3):
212
  chat_history = gr.Chatbot(label="对话历史", height=400)
 
214
  msg_input = gr.Textbox(
215
  placeholder="输入消息...",
216
  scale=4,
217
+ lines=2,
218
  )
219
  send_btn = gr.Button("发送", variant="primary", scale=1)
220
+
221
  def on_send(msg, history):
222
  return single_chat(model_select.value, msg, system_s.value, history)
223
+
224
+ send_btn.click(
225
+ fn=on_send, inputs=[msg_input, chat_history], outputs=[chat_history, msg_input]
226
+ )
227
+ msg_input.submit(
228
+ fn=on_send, inputs=[msg_input, chat_history], outputs=[chat_history, msg_input]
229
+ )
230
+
231
  with gr.TabItem("ℹ️ 关于"):
232
  gr.Markdown("""
233
+ ## 🐠 Hermes OpenRouter Hub
234
+
235
  **功能:**
236
+ - 🔍 多模型对比:一次提问,同时获得个模型的回答
237
  - 💬 单模型对话:深入对话某一特定模型
238
+ - 🌐 中英文支持
239
+
240
+ **支持的免费模型:**
241
+ | 模型 | 参数量 | Context | 特点 |
242
+ |------|--------|---------|------|
243
+ | Llama 3.3 70B | 70B | 66K | 高质量多语言 |
244
+ | NVIDIA Nemotron 120B | 120B MoE | 1M | 超长上下文 |
245
+ | Nous Hermes 3 405B | 405B | 128K | 超大模型 |
246
+ | CoBuddy | 1.44B | 131K | 代码专用百度 |
247
+
248
+ **限制:** 免费 tier 有速率限制高频使用请考虑升级或自备 key。
249
+
250
+ Powered by [OpenRouter](https://openrouter.ai)
 
 
251
  """)
252
 
253
  # 启动