import torch from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel base_model_name = "Qwen/Qwen2.5-Coder-14B-Instruct" adapter_path = "./outputs/qwen25-coder-n8n" print("Loading base model...") base_model = AutoModelForCausalLM.from_pretrained( base_model_name, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True ) print("Loading adapter...") model = PeftModel.from_pretrained(base_model, adapter_path) tokenizer = AutoTokenizer.from_pretrained(base_model_name) system_prompt = "You are an expert n8n workflow generation assistant. Your goal is to create valid, efficient, and error-free n8n workflow JSONs based on the user's requirements. Always output ONLY the valid JSON workflow." user_input = "Create a workflow that gets data from a webhook and sends it to Slack. Also have a sticky note as documentation." messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_input} ] text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) inputs = tokenizer([text], return_tensors="pt").to(model.device) print("Generating workflow...") outputs = model.generate(**inputs, max_new_tokens=2048, do_sample=True, temperature=0.1) print(tokenizer.decode(outputs[0], skip_special_tokens=True))