from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer import torch model_id = r"/home/ionet/output_model/checkpoint-5500" tokenizer = AutoTokenizer.from_pretrained( model_id, trust_remote_code=True ) model = AutoModelForCausalLM.from_pretrained( model_id, trust_remote_code=True, ) # Move model to CUDA if available if torch.cuda.is_available(): model = model.to('cuda') streamer = TextStreamer(tokenizer) messages = [ {"role": "user", "content": "lost my order, can you help me check the status? My order ID is 12345678."} ] tools = [ { "type": "function", "function": { "name": "order_status_checker", "description": "Checks the status of an order given an order ID.", "parameters": { "type": "object", "properties": { "order_id": {"type": "string", "description": "The order ID to check status for."} }, "required": ["order_id"] } } } ] input_text = tokenizer.apply_chat_template( messages, tools=tools, tokenize=False, enable_thinking=False, ) inputs = tokenizer(input_text, return_tensors="pt") if torch.cuda.is_available(): inputs = {k: v.to('cuda') for k, v in inputs.items()} generate_ids = model.generate( inputs['input_ids'], max_new_tokens=1024, temperature=0.6, do_sample=True, streamer=streamer )