from transformers import AutoModelForCausalLM, AutoTokenizer
import torch, json

model_path = "./trained_model"  # path to your trained model
print("Loading model...")
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path)
model.eval()
print("Model loaded! Generating quiz question...")

prompt = (
    "Generate a math quiz question for age 4, difficulty easy, "
    "in strict JSON format like this: "
    '{"question": "", "options": ["", "", "", ""], "answer": 0, "explanation": ""}\n'
)

inputs = tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
    output = model.generate(
        **inputs,
        max_new_tokens=150,      # limit length
        temperature=0.3,         # lower = more deterministic
        top_p=0.9,               # nucleus sampling
        do_sample=True,          # allow variety but controlled
        eos_token_id=tokenizer.eos_token_id
    )

decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)

# Try to extract JSON part only
json_start = decoded_output.find("{")
json_end = decoded_output.rfind("}") + 1
if json_start != -1 and json_end != -1:
    json_str = decoded_output[json_start:json_end]
    try:
        quiz = json.loads(json_str)
        print("✅ Parsed JSON output:")
        print(json.dumps(quiz, indent=2))
    except json.JSONDecodeError:
        print("⚠ Raw output (invalid JSON):")
        print(decoded_output)
else:
    print("⚠ Raw output (no JSON found):")
    print(decoded_output)