| from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer | |
| import torch | |
| model_id = r"/home/ionet/output_model/checkpoint-5500" | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| model_id, | |
| trust_remote_code=True | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| trust_remote_code=True, | |
| ) | |
| # Move model to CUDA if available | |
| if torch.cuda.is_available(): | |
| model = model.to('cuda') | |
| streamer = TextStreamer(tokenizer) | |
| messages = [ | |
| {"role": "user", "content": "lost my order, can you help me check the status? My order ID is 12345678."} | |
| ] | |
| tools = [ | |
| { | |
| "type": "function", | |
| "function": { | |
| "name": "order_status_checker", | |
| "description": "Checks the status of an order given an order ID.", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "order_id": {"type": "string", "description": "The order ID to check status for."} | |
| }, | |
| "required": ["order_id"] | |
| } | |
| } | |
| } | |
| ] | |
| input_text = tokenizer.apply_chat_template( | |
| messages, | |
| tools=tools, | |
| tokenize=False, | |
| enable_thinking=False, | |
| ) | |
| inputs = tokenizer(input_text, return_tensors="pt") | |
| if torch.cuda.is_available(): | |
| inputs = {k: v.to('cuda') for k, v in inputs.items()} | |
| generate_ids = model.generate( | |
| inputs['input_ids'], | |
| max_new_tokens=1024, | |
| temperature=0.6, | |
| do_sample=True, | |
| streamer=streamer | |
| ) | |