| """ |
| Hermes HF Space - Multi-Model AI Hub (OpenRouter Edition) |
| 多模型对比助手:使用 OpenRouter 免费模型 |
| """ |
|
|
| import gradio as gr |
| import os |
| import time |
| import requests |
|
|
| |
| OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "") |
| OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1/chat/completions" |
|
|
| |
| MODELS = { |
| "🦙 Llama 3.3 70B (free)": { |
| "id": "meta-llama/llama-3.3-70b-instruct:free", |
| "name": "🦙 Llama 3.3 70B", |
| "context": "66K", |
| }, |
| "🤖 NVIDIA Nemotron 120B (free)": { |
| "id": "nvidia/nemotron-3-super-120b-a12b:free", |
| "name": "🤖 NVIDIA Nemotron 120B", |
| "context": "1M", |
| }, |
| "🧠 Nous Hermes 3 405B (free)": { |
| "id": "nousresearch/hermes-3-llama-3.1-405b:free", |
| "name": "🧠 Nous Hermes 3 405B", |
| "context": "128K", |
| }, |
| "🔧 CoBuddy Code (free)": { |
| "id": "baidu/cobuddy:free", |
| "name": "🔧 CoBuddy (百度代码模型)", |
| "context": "131K", |
| }, |
| } |
|
|
| SYSTEM_PROMPTS = { |
| "Default": "", |
| "Code Assistant": "You are an expert programmer. Write clean, efficient code with brief explanations.", |
| "中文助手": "你是一个有帮助的中文AI助手,用简洁清晰的语言回答。", |
| "Summarizer": "You are a text summarization expert. Provide concise, accurate summaries.", |
| "Creative Writer": "You are a creative writer. Write engaging, imaginative content.", |
| } |
|
|
|
|
| def call_openrouter(model_id: str, prompt: str, max_tokens: int = 384, system: str = "") -> tuple[str, float]: |
| """调用 OpenRouter API""" |
| if not OPENROUTER_API_KEY: |
| return "⚠️ API key not configured. Please set OPENROUTER_API_KEY in Space secrets.", 0.0 |
|
|
| headers = { |
| "Authorization": f"Bearer {OPENROUTER_API_KEY}", |
| "Content-Type": "application/json", |
| "HTTP-Referer": "https://cntalk-hermes.hf.space", |
| "X-Title": "Hermes OpenRouter Hub", |
| } |
|
|
| messages = [] |
| if system: |
| messages.append({"role": "system", "content": system}) |
| messages.append({"role": "user", "content": prompt}) |
|
|
| payload = { |
| "model": model_id, |
| "messages": messages, |
| "max_tokens": max_tokens, |
| "temperature": 0.7, |
| } |
|
|
| try: |
| start = time.time() |
| resp = requests.post( |
| OPENROUTER_BASE_URL, |
| headers=headers, |
| json=payload, |
| timeout=90, |
| ) |
| elapsed = time.time() - start |
|
|
| if resp.status_code == 200: |
| result = resp.json() |
| content = result["choices"][0]["message"]["content"] |
| return content, elapsed |
| elif resp.status_code == 429: |
| return "⚠️ Rate limit exceeded. Please wait a moment or try a different model.", elapsed |
| else: |
| error_msg = resp.json().get("error", {}).get("message", resp.text[:150]) |
| return f"⚠️ Error {resp.status_code}: {error_msg}", elapsed |
| except Exception as e: |
| return f"⚠️ Request failed: {str(e)[:100]}", 0.0 |
|
|
|
|
| def format_response(model_name: str, context: str, response: str, elapsed: float) -> str: |
| emoji = "✅" if not response.startswith("⚠️") else "⚠️" |
| return f"{emoji} **{model_name}** [ctx:{context}] ({elapsed:.1f}s)\n{response}\n" |
|
|
|
|
| def compare_models(user_input: str, model_keys: list, system_key: str = "Default"): |
| """对比多个模型的回答""" |
| if not user_input.strip(): |
| return "⚠️ Please enter a message." |
|
|
| system = SYSTEM_PROMPTS.get(system_key, "") |
| results = [] |
|
|
| for key in model_keys: |
| model = MODELS.get(key) |
| if not model: |
| continue |
| model_id = model["id"] |
| model_name = model["name"] |
| model_context = model["context"] |
|
|
| response, elapsed = call_openrouter(model_id, user_input, system=system) |
| results.append(format_response(model_name, model_context, response, elapsed)) |
|
|
| if not results: |
| return "⚠️ Please select at least one model." |
|
|
| return "\n---\n".join(results) |
|
|
|
|
| def single_chat(model_key: str, user_input: str, system_key: str, history: list): |
| """单模型对话(带历史)""" |
| if not user_input.strip(): |
| return history, "" |
|
|
| model = MODELS.get(model_key, {}) |
| model_id = model.get("id", "") |
| system = SYSTEM_PROMPTS.get(system_key, "") |
|
|
| |
| prompt = "" |
| for h_user, h_bot in history: |
| prompt += f"User: {h_user}\nAssistant: {h_bot}\n" |
| prompt += f"User: {user_input}" |
|
|
| response, elapsed = call_openrouter(model_id, prompt, system=system) |
|
|
| history.append((user_input, f"{response}\n\n⏱️ {elapsed:.1f}s")) |
| return history, "" |
|
|
|
|
| |
| with gr.Blocks(title="Hermes OpenRouter Hub", theme=gr.themes.Soft()) as demo: |
| gr.Markdown(""" |
| # 🐠 Hermes OpenRouter Hub |
| ### 多模型 AI 助手 — 同时对比多个开源模型的回答 |
| |
| 基于 OpenRouter API,支持 Llama 3.3 / Nemotron / Hermes 3 等免费模型 |
| |
| ⚠️ 免费模型有速率限制,如遇报错稍后重试即可 |
| """) |
|
|
| with gr.Tabs(): |
| with gr.TabItem("🔍 模型对比"): |
| with gr.Row(): |
| with gr.Column(scale=2): |
| user_input = gr.Textbox( |
| label="✏️ 输入问题", |
| placeholder="例如: 解释一下什么是transformer架构", |
| lines=4, |
| ) |
| with gr.Row(): |
| system_dropdown = gr.Dropdown( |
| choices=list(SYSTEM_PROMPTS.keys()), |
| value="Default", |
| label="系统提示", |
| ) |
| compare_btn = gr.Button("🚀 对比模型", variant="primary") |
|
|
| gr.Markdown("**选择要对比的模型:**") |
| model_checkboxes = gr.CheckboxGroup( |
| choices=[(v["name"], k) for k, v in MODELS.items()], |
| value=["🤖 NVIDIA Nemotron 120B (free)", "🔧 CoBuddy Code (free)"], |
| interactive=True, |
| ) |
| with gr.Column(scale=3): |
| output = gr.Markdown(""" |
| *选择模型后点击「对比模型」开始分析* |
| |
| 每个模型独立回答,可对比: |
| - 回答质量与风格 |
| - 响应速度 |
| - Context 长度差异 |
| |
| 💡 免费模型有并发限制,高频使用建议错峰 |
| """) |
|
|
| compare_btn.click( |
| fn=compare_models, |
| inputs=[user_input, model_checkboxes, system_dropdown], |
| outputs=output, |
| ) |
|
|
| with gr.TabItem("💬 单模型对话"): |
| with gr.Row(): |
| with gr.Column(scale=1): |
| model_select = gr.Dropdown( |
| choices=[(v["name"], k) for k, v in MODELS.items()], |
| value="🤖 NVIDIA Nemotron 120B (free)", |
| label="选择模型", |
| ) |
| system_s = gr.Dropdown( |
| choices=list(SYSTEM_PROMPTS.keys()), |
| value="Default", |
| label="系统提示", |
| ) |
| with gr.Column(scale=3): |
| chat_history = gr.Chatbot(label="对话历史", height=400) |
| with gr.Row(): |
| msg_input = gr.Textbox( |
| placeholder="输入消息...", |
| scale=4, |
| lines=2, |
| ) |
| send_btn = gr.Button("发送", variant="primary", scale=1) |
|
|
| def on_send(msg, history): |
| return single_chat(model_select.value, msg, system_s.value, history) |
|
|
| send_btn.click( |
| fn=on_send, inputs=[msg_input, chat_history], outputs=[chat_history, msg_input] |
| ) |
| msg_input.submit( |
| fn=on_send, inputs=[msg_input, chat_history], outputs=[chat_history, msg_input] |
| ) |
|
|
| with gr.TabItem("ℹ️ 关于"): |
| gr.Markdown(""" |
| ## 🐠 Hermes OpenRouter Hub |
| |
| **功能:** |
| - 🔍 多模型对比:一次提问,同时获得多个模型的回答 |
| - 💬 单模型对话:深入对话某一特定模型 |
| - 🌐 中英文支持 |
| |
| **支持的免费模型:** |
| | 模型 | 参数量 | Context | 特点 | |
| |------|--------|---------|------| |
| | Llama 3.3 70B | 70B | 66K | 高质量多语言 | |
| | NVIDIA Nemotron 120B | 120B MoE | 1M | 超长上下文 | |
| | Nous Hermes 3 405B | 405B | 128K | 超大模型 | |
| | CoBuddy | 1.44B | 131K | 代码专用,百度 | |
| |
| **限制:** 免费 tier 有速率限制,高频使用请考虑升级或自备 key。 |
| |
| Powered by [OpenRouter](https://openrouter.ai) |
| """) |
|
|
| |
| demo.launch() |