File size: 940 Bytes
ac31e85 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import os.path as op
output_dir = "./checkpoint-2"
tokenizer = AutoTokenizer.from_pretrained(output_dir)
model = AutoModelForCausalLM.from_pretrained(output_dir, device_map="auto")
# We use the tokenizer's chat template to format each message - see https://huggingface.co/docs/transformers/main/en/chat_templating
# messages = [
# {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
# ]
# prepare the messages for the model
# input_ids = tokenizer.apply_chat_template(messages, truncation=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
# inference
# outputs = model.generate(
# input_ids=input_ids,
# max_new_tokens=256,
# do_sample=True,
# temperature=0.7,
# top_k=50,
# top_p=0.95
# )
# print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])
|