import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from peft import PeftModel

BASE_MODEL = "Salesforce/codet5-base"
ADAPTER = "checkpoints/sft_adapter"   # change if needed

device = "mps" if torch.backends.mps.is_available() else "cpu"

print("Loading model...")

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
model = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL)
model = PeftModel.from_pretrained(model, ADAPTER)

model = model.to(device)
model.eval()

# 5 random Spider style questions
questions = [
    "List all employee names",
    "Find the number of students in each department",
    "Show the average salary of employees",
    "Which flights depart from LA?",
    "Find customers who bought more than 5 items"
]

for q in questions:
    prompt = f"Translate to SQL: {q}"

    inputs = tokenizer(prompt, return_tensors="pt").to(device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=128,
            temperature=0.0,   # deterministic
        )

    sql = tokenizer.decode(outputs[0], skip_special_tokens=True)

    print("\nQUESTION:", q)
    print("SQL:", sql)
    print("-"*60)