Spaces:

hellokawei
/

language

Runtime error

App Files Files Community

hellokawei commited on Jun 27, 2025

Commit

e27dbdf

verified ·

1 Parent(s): 17a3c9b

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -111

app.py CHANGED Viewed

@@ -4,22 +4,22 @@ import gradio as gr
 from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 from typing import List, Tuple, Dict
-# 如果需要使用Hugging Face访问令牌，取消下面两行的注释并设置环境变量
-# from huggingface_hub import login
-# login(token=os.getenv("HUGGINGFACE_TOKEN"))
-# 模型配置 - 可根据需要添加更多模型
 MODELS = {
-    "Llama 2 7B Chat": {
-        "model_id": "meta-llama/Llama-2-7b-chat-hf",
         "kwargs": {"torch_dtype": torch.float16}
     },
     "Mistral 7B Instruct": {
         "model_id": "mistralai/Mistral-7B-Instruct-v0.2",
         "kwargs": {"torch_dtype": torch.float16}
     },
-    "Zephyr 7B Beta": {
-        "model_id": "HuggingFaceH4/zephyr-7b-beta",
         "kwargs": {"torch_dtype": torch.float16}
     }
 }
@@ -29,8 +29,8 @@ def load_model(model_name):
     model_config = MODELS[model_name]
     tokenizer = AutoTokenizer.from_pretrained(model_config["model_id"])
-    # 检查模型是否需要特殊处理
-    if "Llama-2" in model_name:
         model_config["kwargs"]["trust_remote_code"] = True
     model = AutoModelForCausalLM.from_pretrained(
@@ -38,7 +38,7 @@ def load_model(model_name):
         **model_config["kwargs"]
     )
-    # 将模型移动到可用设备
     device = "cuda" if torch.cuda.is_available() else "cpu"
     model = model.to(device)
@@ -49,18 +49,31 @@ loaded_models = {}
 for model_name in MODELS:
     loaded_models[model_name] = load_model(model_name)
-# 构建对话提示词（针对Llama 2等需要特定格式的模型）
-def build_prompt(message, history, system_prompt):
-    prompt = f"[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n"
-    # 添加对话历史
-    for user_msg, assistant_msg in history:
-        prompt += f"{user_msg} [/INST] {assistant_msg} [INST] "
-    # 添加当前用户消息
-    prompt += f"{message} [/INST]"
-    return prompt
 # 模型推理函数
 def generate_response(
@@ -73,11 +86,10 @@ def generate_response(
     top_p: float,
     top_k: int
 ):
-    # 获取模型、分词器和设备
     model, tokenizer, device = loaded_models[model_name]
-    # 构建完整提示词
-    full_prompt = build_prompt(message, history, system_prompt)
     # 编码输入
     inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
@@ -89,8 +101,8 @@ def generate_response(
         "top_p": top_p,
         "top_k": top_k,
         "do_sample": True,
-        "eos_token_id": tokenizer.eos_token_id,
-        "pad_token_id": tokenizer.pad_token_id
     }
     # 生成响应
@@ -103,12 +115,12 @@ def generate_response(
     # 解码输出
     response = tokenizer.decode(output[0], skip_special_tokens=True)
-    # 提取模型生成的部分（去除提示词）
     response = response[len(full_prompt):].strip()
     return response
-# 处理用户输入并生成回复
 def process_chat(
     message: str,
     history: List[Tuple[str, str]],
@@ -119,138 +131,79 @@ def process_chat(
     top_p: float,
     top_k: int
 ):
-    # 生成响应
     response = generate_response(
         message, history, system_prompt, model_name,
         max_new_tokens, temperature, top_p, top_k
     )
-    # 更新对话历史
     history.append((message, response))
     return history, history
-# 语音转文字功能（使用Whisper模型）
 asr = None
 if torch.cuda.is_available() or torch.backends.mps.is_available():
     try:
         from transformers import WhisperProcessor, WhisperForConditionalGeneration
         processor = WhisperProcessor.from_pretrained("openai/whisper-base")
         asr_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to("cuda" if torch.cuda.is_available() else "cpu")
-        asr = {
-            "processor": processor,
-            "model": asr_model
-        }
-    except Exception as e:
-        print(f"语音识别模型加载失败: {e}")
         asr = None
 def transcribe(audio):
     if asr is None:
         return "语音识别模型未加载"
     processor, model = asr["processor"], asr["model"]
     input_features = processor(audio, return_tensors="pt").input_features.to(model.device)
     predicted_ids = model.generate(input_features)
-    transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
-    return transcription
 # 构建Gradio界面
-with gr.Blocks(title="语言模型对话助手") as demo:
-    gr.Markdown("## 基于Transformer的语言模型对话应用")
     with gr.Row():
         with gr.Column(scale=1):
-            # 输入区域
-            message_input = gr.Textbox(
-                label="输入消息",
-                placeholder="请输入您想与AI对话的内容..."
-            )
-            # 系统提示词
             system_prompt = gr.Textbox(
                 label="系统提示词",
                 value="你是一个 helpful、知识渊博的AI助手。",
-                placeholder="设置AI的角色和行为准则..."
             )
-            # 模型选择
             model_choice = gr.Dropdown(
                 choices=list(MODELS.keys()),
                 value=list(MODELS.keys())[0],
                 label="选择语言模型"
             )
-            # 生成参数
-            with gr.Accordion("高级生成参数", open=False):
-                max_new_tokens = gr.Slider(
-                    minimum=1, maximum=2048, value=512, step=1,
-                    label="最大生成Token数"
-                )
-                temperature = gr.Slider(
-                    minimum=0.1, maximum=2.0, value=0.7, step=0.1,
-                    label="温度（随机性）"
-                )
-                top_p = gr.Slider(
-                    minimum=0.1, maximum=1.0, value=0.9, step=0.05,
-                    label="Top-p（核采样）"
-                )
-                top_k = gr.Slider(
-                    minimum=1, maximum=100, value=50, step=1,
-                    label="Top-k（采样数）"
-                )
-            # 语音输入
             use_voice = gr.Checkbox(label="使用语音输入")
-            audio_input = gr.Audio(
-                type="filepath",
-                label="语音输入（录制或上传音频）"
-            )
-            # 按钮
             send_btn = gr.Button("发送消息", variant="primary")
             clear_btn = gr.Button("清空对话")
         with gr.Column(scale=2):
-            # 对话历史
-            chat_history = gr.Chatbot(
-                label="对话历史",
-                show_label=True
-            )
     # 语音输入处理
-    def handle_voice(audio, use_voice):
-        if use_voice and audio:
-            return transcribe(audio)
-        return ""
     audio_input.change(
-        fn=handle_voice,
         inputs=[audio_input, use_voice],
         outputs=message_input
     )
-    # 发送消息处理
     send_btn.click(
         fn=process_chat,
-        inputs=[
-            message_input, chat_history, system_prompt, model_choice,
-            max_new_tokens, temperature, top_p, top_k
-        ],
-        outputs=[chat_history, chat_history],
-        show_progress=True
     )
     # 清空对话
-    clear_btn.click(
-        fn=lambda: None,
-        inputs=None,
-        outputs=chat_history
-    )
 # 启动应用
 if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=True
-    )

 from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 from typing import List, Tuple, Dict
+# 模型配置 - 全部使用无访问限制的公开模型
 MODELS = {
+    "Zephyr 7B Beta": {
+        "model_id": "HuggingFaceH4/zephyr-7b-beta",
         "kwargs": {"torch_dtype": torch.float16}
     },
     "Mistral 7B Instruct": {
         "model_id": "mistralai/Mistral-7B-Instruct-v0.2",
         "kwargs": {"torch_dtype": torch.float16}
     },
+    "OpenHermes 2.5": {
+        "model_id": "teknium/OpenHermes-2.5-Mistral-7B",
+        "kwargs": {"torch_dtype": torch.float16}
+    },
+    "Falcon 7B Instruct": {
+        "model_id": "tiiuae/falcon-7b-instruct",
         "kwargs": {"torch_dtype": torch.float16}
     }
 }
     model_config = MODELS[model_name]
     tokenizer = AutoTokenizer.from_pretrained(model_config["model_id"])
+    # 处理特殊模型参数（如需要）
+    if "Falcon" in model_name:
         model_config["kwargs"]["trust_remote_code"] = True
     model = AutoModelForCausalLM.from_pretrained(
         **model_config["kwargs"]
     )
+    # 移动到可用设备
     device = "cuda" if torch.cuda.is_available() else "cpu"
     model = model.to(device)
 for model_name in MODELS:
     loaded_models[model_name] = load_model(model_name)
+# 构建对话提示词（针对不同模型可能需要不同格式）
+def build_prompt(message, history, system_prompt, model_name):
+    # Zephyr/Mistral等模型使用简单格式
+    if "Zephyr" in model_name or "Mistral" in model_name:
+        prompt = f"系统提示: {system_prompt}\n"
+        for user_msg, assistant_msg in history:
+            prompt += f"用户: {user_msg}\n助手: {assistant_msg}\n"
+        prompt += f"用户: {message}\n助手:"
+        return prompt
+    # Falcon模型使用更简洁的格式
+    elif "Falcon" in model_name:
+        prompt = f"### System:\n{system_prompt}\n\n"
+        for user_msg, assistant_msg in history:
+            prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"
+        prompt += f"### User:\n{message}\n\n### Assistant:"
+        return prompt
+    # 默认为通用格式
+    else:
+        prompt = f"[System] {system_prompt}\n"
+        for user_msg, assistant_msg in history:
+            prompt += f"[User] {user_msg}\n[Assistant] {assistant_msg}\n"
+        prompt += f"[User] {message}\n[Assistant]"
+        return prompt
 # 模型推理函数
 def generate_response(
     top_p: float,
     top_k: int
 ):
     model, tokenizer, device = loaded_models[model_name]
+    # 构建提示词
+    full_prompt = build_prompt(message, history, system_prompt, model_name)
     # 编码输入
     inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
         "top_p": top_p,
         "top_k": top_k,
         "do_sample": True,
+        "eos_token_id": tokenizer.eos_token_id or tokenizer.unk_token_id,
+        "pad_token_id": tokenizer.pad_token_id or tokenizer.eos_token_id
     }
     # 生成响应
     # 解码输出
     response = tokenizer.decode(output[0], skip_special_tokens=True)
+    # 提取模型生成的部分
     response = response[len(full_prompt):].strip()
     return response
+# 处理用户输入
 def process_chat(
     message: str,
     history: List[Tuple[str, str]],
     top_p: float,
     top_k: int
 ):
     response = generate_response(
         message, history, system_prompt, model_name,
         max_new_tokens, temperature, top_p, top_k
     )
     history.append((message, response))
     return history, history
+# 语音转文字功能
 asr = None
 if torch.cuda.is_available() or torch.backends.mps.is_available():
     try:
         from transformers import WhisperProcessor, WhisperForConditionalGeneration
         processor = WhisperProcessor.from_pretrained("openai/whisper-base")
         asr_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to("cuda" if torch.cuda.is_available() else "cpu")
+        asr = {"processor": processor, "model": asr_model}
+    except:
         asr = None
 def transcribe(audio):
     if asr is None:
         return "语音识别模型未加载"
     processor, model = asr["processor"], asr["model"]
     input_features = processor(audio, return_tensors="pt").input_features.to(model.device)
     predicted_ids = model.generate(input_features)
+    return processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
 # 构建Gradio界面
+with gr.Blocks(title="无权限语言模型对话助手") as demo:
+    gr.Markdown("## 公开语言模型对话应用（无需访问权限）")
     with gr.Row():
         with gr.Column(scale=1):
+            message_input = gr.Textbox(label="输入消息")
             system_prompt = gr.Textbox(
                 label="系统提示词",
                 value="你是一个 helpful、知识渊博的AI助手。",
             )
             model_choice = gr.Dropdown(
                 choices=list(MODELS.keys()),
                 value=list(MODELS.keys())[0],
                 label="选择语言模型"
             )
+            with gr.Accordion("生成参数", open=False):
+                max_new_tokens = gr.Slider(minimum=1, maximum=2048, value=512, label="最大Token数")
+                temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="随机性")
+                top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p采样")
+                top_k = gr.Slider(minimum=1, maximum=100, value=50, label="Top-k采样")
             use_voice = gr.Checkbox(label="使用语音输入")
+            audio_input = gr.Audio(type="filepath", label="语音输入")
             send_btn = gr.Button("发送消息", variant="primary")
             clear_btn = gr.Button("清空对话")
         with gr.Column(scale=2):
+            chat_history = gr.Chatbot(label="对话历史")
     # 语音输入处理
     audio_input.change(
+        fn=lambda audio, use: transcribe(audio) if use else "",
         inputs=[audio_input, use_voice],
         outputs=message_input
     )
+    # 发送消息
     send_btn.click(
         fn=process_chat,
+        inputs=[message_input, chat_history, system_prompt, model_choice,
+                max_new_tokens, temperature, top_p, top_k],
+        outputs=[chat_history, chat_history]
     )
     # 清空对话
+    clear_btn.click(fn=lambda: None, outputs=chat_history)
 # 启动应用
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)