language / app.py
hellokawei's picture
Update app.py
0a9bd20 verified
import os
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, WhisperProcessor, WhisperForConditionalGeneration
from typing import List, Tuple # 新增:导入类型
# 方案 A:使用自定义环境变量名 "language"
hf_token = os.environ.get("language")
if not hf_token:
raise EnvironmentError("未找到名为 'language' 的环境变量,请在Space设置中添加")
# 方案 B:改用规范的 "HUGGINGFACE_HUB_TOKEN"(需同步修改Space环境变量)
# hf_token = os.environ.get("HUGGINGFACE_HUB_TOKEN")
# if not hf_token:
# raise EnvironmentError("未找到HUGGINGFACE_HUB_TOKEN环境变量,请在Space设置中添加")
# 模型配置 - 使用公开模型
MODELS = {
"Zephyr 7B Beta": {
"model_id": "HuggingFaceH4/zephyr-7b-beta",
"kwargs": {"torch_dtype": torch.float16}
},
"Falcon 7B Instruct": {
"model_id": "tiiuae/falcon-7b-instruct",
"kwargs": {"torch_dtype": torch.float16, "trust_remote_code": True}
}
}
# 加载模型
def load_model(model_name):
model_config = MODELS[model_name]
tokenizer = AutoTokenizer.from_pretrained(
model_config["model_id"],
use_auth_token=hf_token
)
model = AutoModelForCausalLM.from_pretrained(
model_config["model_id"],
use_auth_token=hf_token,
**model_config["kwargs"]
)
device = "cuda" if torch.cuda.is_available() else "cpu"
return model.to(device), tokenizer, device
# 初始化模型
loaded_models = {}
for model_name in MODELS:
loaded_models[model_name] = load_model(model_name)
# 构建对话提示词
def build_prompt(message: str, history: List[Tuple[str, str]], system_prompt: str, model_name: str) -> str:
if "Zephyr" in model_name:
prompt = f"系统提示: {system_prompt}\n"
for user_msg, assistant_msg in history:
prompt += f"用户: {user_msg}\n助手: {assistant_msg}\n"
prompt += f"用户: {message}\n助手:"
elif "Falcon" in model_name:
prompt = f"### System:\n{system_prompt}\n\n"
for user_msg, assistant_msg in history:
prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"
prompt += f"### User:\n{message}\n\n### Assistant:"
else:
prompt = f"[System] {system_prompt}\n"
for user_msg, assistant_msg in history:
prompt += f"[User] {user_msg}\n[Assistant] {assistant_msg}\n"
prompt += f"[User] {message}\n[Assistant]"
return prompt
# 模型推理函数
def generate_response(
message: str,
history: List[Tuple[str, str]],
system_prompt: str,
model_name: str,
max_new_tokens: int,
temperature: float,
top_p: float,
top_k: int
) -> str:
model, tokenizer, device = loaded_models[model_name]
full_prompt = build_prompt(message, history, system_prompt, model_name)
inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
generate_kwargs = {
"max_new_tokens": max_new_tokens,
"temperature": temperature,
"top_p": top_p,
"top_k": top_k,
"do_sample": True,
"eos_token_id": tokenizer.eos_token_id or tokenizer.unk_token_id,
"pad_token_id": tokenizer.pad_token_id or tokenizer.eos_token_id
}
with torch.no_grad():
output = model.generate(**inputs, **generate_kwargs)
response = tokenizer.decode(output[0], skip_special_tokens=True)
return response[len(full_prompt):].strip()
# 处理用户输入
def process_chat(
message: str,
history: List[Tuple[str, str]],
system_prompt: str,
model_name: str,
max_new_tokens: int,
temperature: float,
top_p: float,
top_k: int
) -> Tuple[List[Tuple[str, str]], List[Tuple[str, str]]]:
response = generate_response(message, history, system_prompt, model_name, max_new_tokens, temperature, top_p, top_k)
history.append((message, response))
return history, history
# 语音转文字功能
asr = None
if torch.cuda.is_available() or torch.backends.mps.is_available():
try:
processor = WhisperProcessor.from_pretrained("openai/whisper-base")
asr_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to("cuda" if torch.cuda.is_available() else "cpu")
asr = {"processor": processor, "model": asr_model}
except Exception as e:
print(f"语音模型加载失败: {e}")
asr = None
def transcribe(audio) -> str:
if asr is None:
return "语音识别模型未加载"
processor, model = asr["processor"], asr["model"]
input_features = processor(audio, return_tensors="pt").input_features.to(model.device)
predicted_ids = model.generate(input_features)
return processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
# 构建Gradio界面
with gr.Blocks(title="无权限语言模型对话助手") as demo:
gr.Markdown("## 公开语言模型对话应用(无需访问权限)")
with gr.Row():
with gr.Column(scale=1):
message_input = gr.Textbox(label="输入消息")
system_prompt = gr.Textbox(
label="系统提示词",
value="你是一个 helpful、知识渊博的AI助手。",
)
model_choice = gr.Dropdown(
choices=list(MODELS.keys()),
value=list(MODELS.keys())[0],
label="选择语言模型"
)
with gr.Accordion("生成参数", open=False):
max_new_tokens = gr.Slider(minimum=1, maximum=2048, value=512, label="最大Token数")
temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="随机性")
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p采样")
top_k = gr.Slider(minimum=1, maximum=100, value=50, label="Top-k采样")
use_voice = gr.Checkbox(label="使用语音输入")
audio_input = gr.Audio(type="filepath", label="语音输入")
send_btn = gr.Button("发送消息", variant="primary")
clear_btn = gr.Button("清空对话")
with gr.Column(scale=2):
chat_history = gr.Chatbot(label="对话历史")
# 语音输入处理
audio_input.change(
fn=lambda audio, use: transcribe(audio) if use else "",
inputs=[audio_input, use_voice],
outputs=message_input
)
# 发送消息
send_btn.click(
fn=process_chat,
inputs=[message_input, chat_history, system_prompt, model_choice, max_new_tokens, temperature, top_p, top_k],
outputs=[chat_history, chat_history]
)
# 清空对话
clear_btn.click(fn=lambda: None, outputs=chat_history)
# 启动应用
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, share=True)