my-llama2-finetune / test_model.py
RianLi's picture
Upload 8 files
fe4323b verified
raw
history blame
2.34 kB
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
# 加载基础模型和分词器
model_name = "microsoft/DialoGPT-small"
base_model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float32,
low_cpu_mem_usage=True,
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
# 加载LoRA适配器
model = PeftModel.from_pretrained(base_model, "./dialogpt-small-lora")
# 测试函数
def test_model(instruction, input_text):
prompt = f"### Instruction:\n{instruction}\n\n### Input:\n{input_text}\n\n### Response:\n"
# 编码输入
inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True)
# 生成响应
with torch.no_grad():
outputs = model.generate(
input_ids=inputs['input_ids'],
attention_mask=inputs['attention_mask'],
max_new_tokens=50, # 限制新生成的token数量
num_return_sequences=1,
temperature=0.3, # 降低温度获得更确定的输出
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
repetition_penalty=1.1 # 减少重复
)
# 解码输出
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# 提取生成的部分
generated_text = response[len(prompt):].strip()
return generated_text
# 测试示例
if __name__ == "__main__":
print("测试微调后的模型...")
print("="*50)
# 测试1:生成用户JSON对象
instruction = "根据以下信息,生成一个用户JSON对象。"
input_text = "用户ID是999,用户名是test_user,邮箱是test@example.com"
result = test_model(instruction, input_text)
print(f"指令: {instruction}")
print(f"输入: {input_text}")
print(f"输出: {result}")
print("="*50)
# 测试2:另一个示例
instruction = "根据以下信息,生成一个用户JSON对象。"
input_text = "用户ID是888,用户名是admin,邮箱是admin@company.com"
result = test_model(instruction, input_text)
print(f"指令: {instruction}")
print(f"输入: {input_text}")
print(f"输出: {result}")
print("="*50)