Spaces:

pplboy
/

qwen-customer-chat

Sleeping

App Files Files Community

qwen-customer-chat / app.py

pplboy

Upload 4 files

05c199b verified 5 months ago

raw

history blame contribute delete

4.85 kB

	"""
	Hugging Face Space - 客服模型对话演示
	模型: pplboy/test (基于 Qwen2.5-0.5B-Instruct 的 LoRA 微调)
	"""

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from peft import PeftModel
	import os

	# 全局变量存储模型
	model = None
	tokenizer = None

	def load_model():
	"""加载模型（只加载一次）"""
	global model, tokenizer

	if model is None or tokenizer is None:
	print("正在加载基础模型...")
	base_model = AutoModelForCausalLM.from_pretrained(
	"Qwen/Qwen2.5-0.5B-Instruct",
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True
	)

	print("正在加载 LoRA 适配器...")
	model = PeftModel.from_pretrained(
	base_model,
	"pplboy/test",
	torch_dtype=torch.float16
	)

	print("正在加载分词器...")
	tokenizer = AutoTokenizer.from_pretrained("pplboy/test", trust_remote_code=True)

	print("✅ 模型加载完成！")

	return model, tokenizer

	def chat(message, history):
	"""处理对话"""
	try:
	# 加载模型（如果还没加载）
	if model is None or tokenizer is None:
	load_model()

	# 构建对话历史
	if history is None:
	history = []

	# 使用聊天模板格式化输入
	messages = []
	for user_msg, assistant_msg in history:
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": assistant_msg})
	messages.append({"role": "user", "content": message})

	# 应用聊天模板
	text = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	# 编码输入
	inputs = tokenizer(text, return_tensors="pt").to(model.device)

	# 生成回复
	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=200,
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id
	)

	# 解码输出
	response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)

	return response.strip()

	except Exception as e:
	return f"❌ 错误: {str(e)}"

	def clear_history():
	"""清空历史"""
	return None, []

	# 页面标题和描述
	title = "🤗 客服模型对话演示"
	description = """
	这是一个基于 Qwen2.5-0.5B-Instruct 微调的客服对话模型。

	使用方法：
	1. 在输入框中输入你的问题
	2. 点击"发送"或按 Enter 键
	3. 模型会生成回复

	模型信息：
	- 基础模型: Qwen/Qwen2.5-0.5B-Instruct
	- 微调方法: LoRA
	- 用途: 客服对话、智能问答
	"""

	# 创建 Gradio 界面
	with gr.Blocks(title=title, theme=gr.themes.Soft()) as demo:
	gr.Markdown(f"# {title}")
	gr.Markdown(description)

	chatbot = gr.Chatbot(
	label="对话历史",
	height=400,
	show_copy_button=True
	)

	with gr.Row():
	msg = gr.Textbox(
	label="输入消息",
	placeholder="请输入你的问题...",
	scale=4,
	lines=2
	)
	submit_btn = gr.Button("发送", variant="primary", scale=1)

	with gr.Row():
	clear_btn = gr.Button("清空历史", variant="secondary")

	# 示例问题
	gr.Examples(
	examples=[
	"你好，我想咨询一下产品",
	"这个产品有什么特点？",
	"如何退货？",
	"客服工作时间是什么时候？",
	"产品支持哪些支付方式？"
	],
	inputs=msg
	)

	# 事件绑定
	def respond(message, chat_history):
	bot_message = chat(message, chat_history)
	chat_history.append((message, bot_message))
	return "", chat_history

	msg.submit(respond, [msg, chatbot], [msg, chatbot])
	submit_btn.click(respond, [msg, chatbot], [msg, chatbot])
	clear_btn.click(clear_history, outputs=[msg, chatbot])

	# 页面加载时显示提示
	demo.load(
	fn=lambda: "模型正在加载中，请稍候...",
	outputs=gr.Textbox(visible=False)
	)

	if __name__ == "__main__":
	# 在 Space 中运行
	demo.launch(server_name="0.0.0.0", server_port=7860)