Spaces:
Runtime error
Runtime error
File size: 6,894 Bytes
17a3c9b 373a381 0a9bd20 373a381 0a9bd20 6ac95b8 6a3c032 0a9bd20 6a3c032 17a3c9b e27dbdf 17a3c9b e27dbdf 6a3c032 17a3c9b 0d6dc53 373a381 6a3c032 17a3c9b 6a3c032 17a3c9b 6a3c032 17a3c9b 6a3c032 17a3c9b 373a381 0a9bd20 e27dbdf 0a9bd20 d3c054d 17a3c9b 0a9bd20 17a3c9b e27dbdf 17a3c9b e27dbdf 17a3c9b 0a9bd20 17a3c9b 0a9bd20 d3c054d e27dbdf 17a3c9b 0a9bd20 17a3c9b d3c054d e27dbdf 17a3c9b e27dbdf 0a9bd20 17a3c9b 373a381 0a9bd20 17a3c9b e27dbdf 373a381 17a3c9b e27dbdf 17a3c9b e27dbdf 17a3c9b e27dbdf 17a3c9b e27dbdf 17a3c9b e27dbdf 17a3c9b e27dbdf 17a3c9b e27dbdf 17a3c9b 0a9bd20 e27dbdf 17a3c9b e27dbdf 373a381 17a3c9b 373a381 e27dbdf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
import os
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, WhisperProcessor, WhisperForConditionalGeneration
from typing import List, Tuple # 新增:导入类型
# 方案 A:使用自定义环境变量名 "language"
hf_token = os.environ.get("language")
if not hf_token:
raise EnvironmentError("未找到名为 'language' 的环境变量,请在Space设置中添加")
# 方案 B:改用规范的 "HUGGINGFACE_HUB_TOKEN"(需同步修改Space环境变量)
# hf_token = os.environ.get("HUGGINGFACE_HUB_TOKEN")
# if not hf_token:
# raise EnvironmentError("未找到HUGGINGFACE_HUB_TOKEN环境变量,请在Space设置中添加")
# 模型配置 - 使用公开模型
MODELS = {
"Zephyr 7B Beta": {
"model_id": "HuggingFaceH4/zephyr-7b-beta",
"kwargs": {"torch_dtype": torch.float16}
},
"Falcon 7B Instruct": {
"model_id": "tiiuae/falcon-7b-instruct",
"kwargs": {"torch_dtype": torch.float16, "trust_remote_code": True}
}
}
# 加载模型
def load_model(model_name):
model_config = MODELS[model_name]
tokenizer = AutoTokenizer.from_pretrained(
model_config["model_id"],
use_auth_token=hf_token
)
model = AutoModelForCausalLM.from_pretrained(
model_config["model_id"],
use_auth_token=hf_token,
**model_config["kwargs"]
)
device = "cuda" if torch.cuda.is_available() else "cpu"
return model.to(device), tokenizer, device
# 初始化模型
loaded_models = {}
for model_name in MODELS:
loaded_models[model_name] = load_model(model_name)
# 构建对话提示词
def build_prompt(message: str, history: List[Tuple[str, str]], system_prompt: str, model_name: str) -> str:
if "Zephyr" in model_name:
prompt = f"系统提示: {system_prompt}\n"
for user_msg, assistant_msg in history:
prompt += f"用户: {user_msg}\n助手: {assistant_msg}\n"
prompt += f"用户: {message}\n助手:"
elif "Falcon" in model_name:
prompt = f"### System:\n{system_prompt}\n\n"
for user_msg, assistant_msg in history:
prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"
prompt += f"### User:\n{message}\n\n### Assistant:"
else:
prompt = f"[System] {system_prompt}\n"
for user_msg, assistant_msg in history:
prompt += f"[User] {user_msg}\n[Assistant] {assistant_msg}\n"
prompt += f"[User] {message}\n[Assistant]"
return prompt
# 模型推理函数
def generate_response(
message: str,
history: List[Tuple[str, str]],
system_prompt: str,
model_name: str,
max_new_tokens: int,
temperature: float,
top_p: float,
top_k: int
) -> str:
model, tokenizer, device = loaded_models[model_name]
full_prompt = build_prompt(message, history, system_prompt, model_name)
inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
generate_kwargs = {
"max_new_tokens": max_new_tokens,
"temperature": temperature,
"top_p": top_p,
"top_k": top_k,
"do_sample": True,
"eos_token_id": tokenizer.eos_token_id or tokenizer.unk_token_id,
"pad_token_id": tokenizer.pad_token_id or tokenizer.eos_token_id
}
with torch.no_grad():
output = model.generate(**inputs, **generate_kwargs)
response = tokenizer.decode(output[0], skip_special_tokens=True)
return response[len(full_prompt):].strip()
# 处理用户输入
def process_chat(
message: str,
history: List[Tuple[str, str]],
system_prompt: str,
model_name: str,
max_new_tokens: int,
temperature: float,
top_p: float,
top_k: int
) -> Tuple[List[Tuple[str, str]], List[Tuple[str, str]]]:
response = generate_response(message, history, system_prompt, model_name, max_new_tokens, temperature, top_p, top_k)
history.append((message, response))
return history, history
# 语音转文字功能
asr = None
if torch.cuda.is_available() or torch.backends.mps.is_available():
try:
processor = WhisperProcessor.from_pretrained("openai/whisper-base")
asr_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to("cuda" if torch.cuda.is_available() else "cpu")
asr = {"processor": processor, "model": asr_model}
except Exception as e:
print(f"语音模型加载失败: {e}")
asr = None
def transcribe(audio) -> str:
if asr is None:
return "语音识别模型未加载"
processor, model = asr["processor"], asr["model"]
input_features = processor(audio, return_tensors="pt").input_features.to(model.device)
predicted_ids = model.generate(input_features)
return processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
# 构建Gradio界面
with gr.Blocks(title="无权限语言模型对话助手") as demo:
gr.Markdown("## 公开语言模型对话应用(无需访问权限)")
with gr.Row():
with gr.Column(scale=1):
message_input = gr.Textbox(label="输入消息")
system_prompt = gr.Textbox(
label="系统提示词",
value="你是一个 helpful、知识渊博的AI助手。",
)
model_choice = gr.Dropdown(
choices=list(MODELS.keys()),
value=list(MODELS.keys())[0],
label="选择语言模型"
)
with gr.Accordion("生成参数", open=False):
max_new_tokens = gr.Slider(minimum=1, maximum=2048, value=512, label="最大Token数")
temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="随机性")
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p采样")
top_k = gr.Slider(minimum=1, maximum=100, value=50, label="Top-k采样")
use_voice = gr.Checkbox(label="使用语音输入")
audio_input = gr.Audio(type="filepath", label="语音输入")
send_btn = gr.Button("发送消息", variant="primary")
clear_btn = gr.Button("清空对话")
with gr.Column(scale=2):
chat_history = gr.Chatbot(label="对话历史")
# 语音输入处理
audio_input.change(
fn=lambda audio, use: transcribe(audio) if use else "",
inputs=[audio_input, use_voice],
outputs=message_input
)
# 发送消息
send_btn.click(
fn=process_chat,
inputs=[message_input, chat_history, system_prompt, model_choice, max_new_tokens, temperature, top_p, top_k],
outputs=[chat_history, chat_history]
)
# 清空对话
clear_btn.click(fn=lambda: None, outputs=chat_history)
# 启动应用
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, share=True) |