""" LoRA模型合并脚本 将LoRA适配器合并到基础模型中 """ import argparse from pathlib import Path import torch from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel def merge_lora( base_model_path: str, lora_path: str, output_dir: str ): """ 合并LoRA适配器到基础模型 Args: base_model_path: 基础模型路径或名称 lora_path: LoRA适配器路径 output_dir: 输出目录 """ print("="*50) print("合并LoRA模型") print("="*50) output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) # 检测设备 use_mps = torch.backends.mps.is_available() device = "mps" if use_mps else ("cuda" if torch.cuda.is_available() else "cpu") print(f"\n使用设备: {device}") torch_dtype = torch.bfloat16 if use_mps else torch.float16 # 加载基础模型 print(f"\n加载基础模型: {base_model_path}") base_model = AutoModelForCausalLM.from_pretrained( base_model_path, torch_dtype=torch_dtype, device_map=None if use_mps else "auto", trust_remote_code=True ) if use_mps: base_model = base_model.to("mps") # 加载tokenizer print(f"加载tokenizer...") tokenizer = AutoTokenizer.from_pretrained( base_model_path, trust_remote_code=True ) # 加载LoRA适配器 print(f"加载LoRA适配器: {lora_path}") model = PeftModel.from_pretrained( base_model, lora_path ) # 合并模型 print(f"\n合并LoRA权重...") merged_model = model.merge_and_unload() # 保存合并后的模型 print(f"保存合并后的模型到: {output_dir}") merged_model.save_pretrained( output_dir, safe_serialization=True ) tokenizer.save_pretrained(output_dir) # 打印模型信息 print(f"\n✓ 模型合并完成!") print(f"\n合并后模型信息:") print(f" 路径: {output_dir}") print(f" 模型大小: {sum(f.stat().st_size for f in output_dir.rglob('*.safetensors') if f.is_file()) / 1024**3:.2f} GB") # 验证模型 print(f"\n验证合并后的模型...") try: test_model = AutoModelForCausalLM.from_pretrained( output_dir, device_map=None if use_mps else "auto", torch_dtype=torch_dtype, trust_remote_code=True ) print(f"✓ 模型验证成功,可以正常加载") except Exception as e: print(f"⚠️ 模型验证失败: {e}") return output_dir def test_merged_model(model_path: str, test_prompts: list = None): """测试合并后的模型""" print("\n" + "="*50) print("测试合并后的模型") print("="*50) import torch # 加载模型 print(f"\n加载模型: {model_path}") model = AutoModelForCausalLM.from_pretrained( model_path, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True ) tokenizer = AutoTokenizer.from_pretrained( model_path, trust_remote_code=True ) # 默认测试提示 if test_prompts is None: test_prompts = [ "我想申请培训", "请问年假怎么计算?", "我想查询社保缴纳情况" ] print(f"\n测试提示数: {len(test_prompts)}") for idx, prompt in enumerate(test_prompts, 1): print(f"\n--- 测试 {idx} ---") print(f"输入: {prompt}") # 构建消息 messages = [ {"role": "system", "content": "你是一个专业的HR助手,请礼貌、准确地回答员工问题。"}, {"role": "user", "content": prompt} ] # 应用模板 text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) # 编码 inputs = tokenizer(text, return_tensors="pt").to(model.device) # 生成 with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=100, temperature=0.7, top_p=0.9, do_sample=True ) # 解码 response = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True) print(f"输出: {response}") print(f"\n✓ 测试完成") def main(): """主函数""" parser = argparse.ArgumentParser(description='合并LoRA模型') parser.add_argument( '--base_model', type=str, required=True, help='基础模型路径或名称' ) parser.add_argument( '--lora_path', type=str, required=True, help='LoRA适配器路径' ) parser.add_argument( '--output_dir', type=str, required=True, help='输出目录' ) parser.add_argument( '--test', action='store_true', help='是否测试合并后的模型' ) args = parser.parse_args() # 合并模型 output_path = merge_lora( base_model_path=args.base_model, lora_path=args.lora_path, output_dir=args.output_dir ) # 测试模型 if args.test: test_merged_model(str(output_path)) print(f"\n✓ 完成!") print(f"\n使用方法:") print(f" from transformers import AutoModelForCausalLM") print(f" model = AutoModelForCausalLM.from_pretrained('{args.output_dir}')") print(f" tokenizer = AutoTokenizer.from_pretrained('{args.output_dir}')") if __name__ == '__main__': main()