File size: 1,459 Bytes
13e00a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
import torch
model_id = r"/home/ionet/output_model/checkpoint-5500"
tokenizer = AutoTokenizer.from_pretrained(
model_id,
trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
model_id,
trust_remote_code=True,
)
# Move model to CUDA if available
if torch.cuda.is_available():
model = model.to('cuda')
streamer = TextStreamer(tokenizer)
messages = [
{"role": "user", "content": "lost my order, can you help me check the status? My order ID is 12345678."}
]
tools = [
{
"type": "function",
"function": {
"name": "order_status_checker",
"description": "Checks the status of an order given an order ID.",
"parameters": {
"type": "object",
"properties": {
"order_id": {"type": "string", "description": "The order ID to check status for."}
},
"required": ["order_id"]
}
}
}
]
input_text = tokenizer.apply_chat_template(
messages,
tools=tools,
tokenize=False,
enable_thinking=False,
)
inputs = tokenizer(input_text, return_tensors="pt")
if torch.cuda.is_available():
inputs = {k: v.to('cuda') for k, v in inputs.items()}
generate_ids = model.generate(
inputs['input_ids'],
max_new_tokens=1024,
temperature=0.6,
do_sample=True,
streamer=streamer
)
|