import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel # **基模型 ID(LoRA 适配器是基于该基础 LLM 训练的)** BASE_MODEL_ID = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" ADAPTER_MODEL_ID = "Snow2222/autotrain-fst" print("🚀 正在加载 Tokenizer...") tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID) print("🚀 正在加载 Base Model(基础模型)...") device = "cuda" if torch.cuda.is_available() else "cpu" base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL_ID, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" ) # **解决 `size mismatch` 问题:让基模型的词表大小与 LoRA 的词表保持一致** print("🔧 调整 vocab_size 以匹配 LoRA...") new_vocab_size = 151665 base_model.resize_token_embeddings(new_vocab_size) print("🚀 正在加载 LoRA 适配器...") model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL_ID).to(device) # ======== 正确地“放大” LoRA 权重影响力 ======== # 1) 获取 adapter 名称(默认叫 "default" 或者是你的自定义名称) adapter_name = model.active_adapter or "default" # 若未自定义,一般是 "default" # 2) 修改 peft_config.lora_alpha peft_config = model.peft_config[adapter_name] print(f"【原始】lora_alpha: {peft_config.lora_alpha}") peft_config.lora_alpha = 128 # 你想尝试的较大值 print(f"【更新后】lora_alpha: {peft_config.lora_alpha}") # 3) 遍历模型的 LoRA 层,将 scaling[adapter_name] 更新为相同的 alpha for module_name, module in model.named_modules(): # LoRA 层通常有 module.scaling,是个 dict if hasattr(module, "scaling") and isinstance(module.scaling, dict): # 如果当前 adapter 在该 dict 中,就更新 if adapter_name in module.scaling: module.scaling[adapter_name] = peft_config.lora_alpha # ======== LoRA 放大操作结束 ======== def respond(message, history, system_message, max_tokens, temperature, top_p): print("==== 🚀 处理用户输入 ====") print(f"用户输入: {message}") # 构造简单的 Prompt prompt = f"{system_message}\n用户: {message}\n助手:" print(f"📡 处理 Prompt: {prompt}") # 只处理当前输入 inputs = tokenizer(message, return_tensors="pt", truncation=True).to(device) with torch.no_grad(): output = model.generate( **inputs, max_new_tokens=max_tokens, temperature=temperature, top_p=top_p ) response = tokenizer.decode(output[0], skip_special_tokens=True) print(f"✅ 生成结果: {response}") return response # **Gradio UI** demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="You are a friendly Chatbot.", label="System message"), gr.Slider(minimum=1, maximum=1024, value=256, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"), ], ) if __name__ == "__main__": print("🌍 启动 Gradio 界面...") demo.launch()