| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| # 使用你本地的检查点路径 | |
| model_path = "/root/Qwen2.5-7B-Instruct-R1-forfinance/" | |
| # 加载模型和分词器 | |
| print("正在加载模型...") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_path, | |
| torch_dtype=torch.bfloat16, # 根据config.json中的torch_dtype | |
| device_map="auto", | |
| trust_remote_code=True # 如果需要的话 | |
| ) | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| model_path, | |
| trust_remote_code=True | |
| ) | |
| print("模型加载完成!") | |
| # 准备输入 | |
| prompt = "假设你是一位金融行业专家,请回答下列问题。\n在宏观分析中,描述在既定利率水平下产品市场达到均衡状态的曲线是什么?\n请一步步思考。" | |
| messages = [ | |
| {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| # 应用聊天模板 | |
| text = tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| print("输入文本:") | |
| print(text) | |
| print("\n" + "="*50 + "\n") | |
| # 编码输入 | |
| model_inputs = tokenizer([text], return_tensors="pt").to(model.device) | |
| # 生成回答 | |
| print("正在生成回答...") | |
| with torch.no_grad(): # 节省显存 | |
| generated_ids = model.generate( | |
| **model_inputs, | |
| max_new_tokens=2048, # 适当减少避免太长 | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.8, | |
| repetition_penalty=1.05, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| # 解码生成的tokens | |
| generated_ids = [ | |
| output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids) | |
| ] | |
| response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
| # 输出结果 | |
| print("模型回答:") | |
| print(response) | |