Spaces:

mingming58
/

xiaozhi

Running

File size: 2,011 Bytes

68e2d25
47c378b
1716d5b
7affaf6
47c378b
 
 
7affaf6
 
47c378b
7affaf6
47c378b
1716d5b
68e2d25
 
47c378b
7affaf6
47c378b
1716d5b
47c378b
 
7affaf6
47c378b
 
 
 
 
 
 
 
 
 
 
 
 
7affaf6
47c378b
1716d5b
47c378b
 
 
 
 
 
 
 
 
 
 
 
f1a0f6f
7affaf6
47c378b
68e2d25
7affaf6
 
1716d5b
47c378b
1716d5b
 
 
47c378b
1716d5b

import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# 加载模型和分词器（适配 Qwen1.5-1.8B-Chat 并保留优化配置）
model_name = "Qwen/Qwen1.5-1.8B-Chat"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,  # 半精度减少内存占用
    device_map="auto",
    load_in_4bit=True,  # 4-bit 量化降低内存压力
    bnb_4bit_compute_dtype=torch.float16
)

# 优化后的聊天函数（适配 Qwen 的对话模板）
def chat_with_model(message, history):
    # 只保留最近 3 轮历史，减少计算量
    history = history[-3:]
    messages = []
    # 拼接历史对话
    for user_msg, bot_msg in history:
        messages.append({"role": "user", "content": user_msg})
        messages.append({"role": "assistant", "content": bot_msg})
    # 加入当前用户消息
    messages.append({"role": "user", "content": message})
    
    # 生成模型输入
    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=True,
        return_dict=True,
        return_tensors="pt",
    ).to(model.device)
    
    # 推理生成回复
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=150,
            temperature=0.7,
            repetition_penalty=1.1,
            do_sample=True
        )
    # 解码并提取回复
    bot_response = tokenizer.decode(
        outputs[0][inputs["input_ids"].shape[-1]:],
        skip_special_tokens=True
    ).strip()
    return bot_response

# 启动 Gradio 界面（保留资源优化配置）
if __name__ == "__main__":
    gr.ChatInterface(
        fn=chat_with_model,
        title="轻量聊天助手",
        description="基于 Qwen1.5-1.8B-Chat 适配 2 核 16G 配置"
    ).launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        inline=False
    )