Spaces:
Runtime error
Runtime error
File size: 6,981 Bytes
17a3c9b 373a381 6a3c032 373a381 6a3c032 6ac95b8 6a3c032 17a3c9b e27dbdf 17a3c9b e27dbdf 6a3c032 17a3c9b 0d6dc53 373a381 6a3c032 17a3c9b 6a3c032 17a3c9b 6a3c032 17a3c9b 6a3c032 373a381 17a3c9b 373a381 e27dbdf d3c054d 17a3c9b e27dbdf 17a3c9b e27dbdf 17a3c9b e27dbdf 17a3c9b d3c054d e27dbdf 17a3c9b d3c054d e27dbdf 17a3c9b e27dbdf 17a3c9b 373a381 17a3c9b e27dbdf 373a381 17a3c9b e27dbdf 17a3c9b e27dbdf 17a3c9b e27dbdf 17a3c9b e27dbdf 17a3c9b e27dbdf 17a3c9b e27dbdf 17a3c9b e27dbdf 17a3c9b e27dbdf 17a3c9b e27dbdf 373a381 17a3c9b 373a381 e27dbdf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
import os
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
# 从环境变量获取Hugging Face Token
hf_token = os.environ.get("language")
if not hf_token:
raise EnvironmentError("未找到HUGGINGFACE_HUB_TOKEN环境变量,请在Space设置中添加")
# 模型配置 - 使用公开模型
MODELS = {
"Zephyr 7B Beta": {
"model_id": "HuggingFaceH4/zephyr-7b-beta",
"kwargs": {"torch_dtype": torch.float16}
},
"Falcon 7B Instruct": {
"model_id": "tiiuae/falcon-7b-instruct",
"kwargs": {"torch_dtype": torch.float16, "trust_remote_code": True}
}
}
# 加载模型
def load_model(model_name):
model_config = MODELS[model_name]
tokenizer = AutoTokenizer.from_pretrained(
model_config["model_id"],
use_auth_token=hf_token
)
model = AutoModelForCausalLM.from_pretrained(
model_config["model_id"],
use_auth_token=hf_token,
**model_config["kwargs"]
)
device = "cuda" if torch.cuda.is_available() else "cpu"
return model.to(device), tokenizer, device
# 其余代码(界面构建和交互逻辑)保持不变...
# 初始化模型
loaded_models = {}
for model_name in MODELS:
loaded_models[model_name] = load_model(model_name)
# 构建对话提示词(针对不同模型可能需要不同格式)
def build_prompt(message, history, system_prompt, model_name):
# Zephyr/Mistral等模型使用简单格式
if "Zephyr" in model_name or "Mistral" in model_name:
prompt = f"系统提示: {system_prompt}\n"
for user_msg, assistant_msg in history:
prompt += f"用户: {user_msg}\n助手: {assistant_msg}\n"
prompt += f"用户: {message}\n助手:"
return prompt
# Falcon模型使用更简洁的格式
elif "Falcon" in model_name:
prompt = f"### System:\n{system_prompt}\n\n"
for user_msg, assistant_msg in history:
prompt += f"### User:\n{user_msg}\n\n### Assistant:\n{assistant_msg}\n\n"
prompt += f"### User:\n{message}\n\n### Assistant:"
return prompt
# 默认为通用格式
else:
prompt = f"[System] {system_prompt}\n"
for user_msg, assistant_msg in history:
prompt += f"[User] {user_msg}\n[Assistant] {assistant_msg}\n"
prompt += f"[User] {message}\n[Assistant]"
return prompt
# 模型推理函数
def generate_response(
message: str,
history: List[Tuple[str, str]],
system_prompt: str,
model_name: str,
max_new_tokens: int,
temperature: float,
top_p: float,
top_k: int
):
model, tokenizer, device = loaded_models[model_name]
# 构建提示词
full_prompt = build_prompt(message, history, system_prompt, model_name)
# 编码输入
inputs = tokenizer(full_prompt, return_tensors="pt").to(device)
# 生成参数
generate_kwargs = {
"max_new_tokens": max_new_tokens,
"temperature": temperature,
"top_p": top_p,
"top_k": top_k,
"do_sample": True,
"eos_token_id": tokenizer.eos_token_id or tokenizer.unk_token_id,
"pad_token_id": tokenizer.pad_token_id or tokenizer.eos_token_id
}
# 生成响应
with torch.no_grad():
output = model.generate(
**inputs,
**generate_kwargs
)
# 解码输出
response = tokenizer.decode(output[0], skip_special_tokens=True)
# 提取模型生成的部分
response = response[len(full_prompt):].strip()
return response
# 处理用户输入
def process_chat(
message: str,
history: List[Tuple[str, str]],
system_prompt: str,
model_name: str,
max_new_tokens: int,
temperature: float,
top_p: float,
top_k: int
):
response = generate_response(
message, history, system_prompt, model_name,
max_new_tokens, temperature, top_p, top_k
)
history.append((message, response))
return history, history
# 语音转文字功能
asr = None
if torch.cuda.is_available() or torch.backends.mps.is_available():
try:
from transformers import WhisperProcessor, WhisperForConditionalGeneration
processor = WhisperProcessor.from_pretrained("openai/whisper-base")
asr_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base").to("cuda" if torch.cuda.is_available() else "cpu")
asr = {"processor": processor, "model": asr_model}
except:
asr = None
def transcribe(audio):
if asr is None:
return "语音识别模型未加载"
processor, model = asr["processor"], asr["model"]
input_features = processor(audio, return_tensors="pt").input_features.to(model.device)
predicted_ids = model.generate(input_features)
return processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
# 构建Gradio界面
with gr.Blocks(title="无权限语言模型对话助手") as demo:
gr.Markdown("## 公开语言模型对话应用(无需访问权限)")
with gr.Row():
with gr.Column(scale=1):
message_input = gr.Textbox(label="输入消息")
system_prompt = gr.Textbox(
label="系统提示词",
value="你是一个 helpful、知识渊博的AI助手。",
)
model_choice = gr.Dropdown(
choices=list(MODELS.keys()),
value=list(MODELS.keys())[0],
label="选择语言模型"
)
with gr.Accordion("生成参数", open=False):
max_new_tokens = gr.Slider(minimum=1, maximum=2048, value=512, label="最大Token数")
temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, label="随机性")
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, label="Top-p采样")
top_k = gr.Slider(minimum=1, maximum=100, value=50, label="Top-k采样")
use_voice = gr.Checkbox(label="使用语音输入")
audio_input = gr.Audio(type="filepath", label="语音输入")
send_btn = gr.Button("发送消息", variant="primary")
clear_btn = gr.Button("清空对话")
with gr.Column(scale=2):
chat_history = gr.Chatbot(label="对话历史")
# 语音输入处理
audio_input.change(
fn=lambda audio, use: transcribe(audio) if use else "",
inputs=[audio_input, use_voice],
outputs=message_input
)
# 发送消息
send_btn.click(
fn=process_chat,
inputs=[message_input, chat_history, system_prompt, model_choice,
max_new_tokens, temperature, top_p, top_k],
outputs=[chat_history, chat_history]
)
# 清空对话
clear_btn.click(fn=lambda: None, outputs=chat_history)
# 启动应用
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, share=True) |