|
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
import torch
|
|
|
|
|
|
|
|
|
model_path = "./qwen2.5_1.5b_model"
|
|
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
|
|
|
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained(
|
|
|
model_path,
|
|
|
device_map="cpu",
|
|
|
torch_dtype=torch.float16,
|
|
|
trust_remote_code=True
|
|
|
)
|
|
|
|
|
|
|
|
|
prompt = "Solve the equation: 2x + 5 = 15"
|
|
|
inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=50).to("cpu")
|
|
|
|
|
|
|
|
|
outputs = model.generate(
|
|
|
inputs.input_ids,
|
|
|
attention_mask=inputs.attention_mask,
|
|
|
max_length=200,
|
|
|
num_return_sequences=1,
|
|
|
do_sample=True,
|
|
|
top_k=50,
|
|
|
top_p=0.9
|
|
|
)
|
|
|
|
|
|
|
|
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
print("Model response:", response) |