from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
import torch
model_id = r"/home/ionet/output_model/checkpoint-5500"
tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    trust_remote_code=True
)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
)
# Move model to CUDA if available
if torch.cuda.is_available():
    model = model.to('cuda')
streamer = TextStreamer(tokenizer)
messages = [
    {"role": "user", "content": "lost my order, can you help me check the status? My order ID is 12345678."}
]
tools = [
    {
        "type": "function",
        "function": {
            "name": "order_status_checker",
            "description": "Checks the status of an order given an order ID.",
            "parameters": {
                "type": "object",
                "properties": {
                    "order_id": {"type": "string", "description": "The order ID to check status for."}
                },
                "required": ["order_id"]
            }
        }
    }
]
input_text = tokenizer.apply_chat_template(
    messages,
    tools=tools,
    tokenize=False,
    enable_thinking=False,
)
inputs = tokenizer(input_text, return_tensors="pt")
if torch.cuda.is_available():
    inputs = {k: v.to('cuda') for k, v in inputs.items()}
generate_ids = model.generate(
    inputs['input_ids'],
    max_new_tokens=1024,
    temperature=0.6,
    do_sample=True,
    streamer=streamer
)