from transformers import AutoModelForCausalLM, AutoTokenizer import torch, json model_path = "./trained_model" # path to your trained model print("Loading model...") tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForCausalLM.from_pretrained(model_path) model.eval() print("Model loaded! Generating quiz question...") prompt = ( "Generate a math quiz question for age 4, difficulty easy, " "in strict JSON format like this: " '{"question": "", "options": ["", "", "", ""], "answer": 0, "explanation": ""}\n' ) inputs = tokenizer(prompt, return_tensors="pt") with torch.no_grad(): output = model.generate( **inputs, max_new_tokens=150, # limit length temperature=0.3, # lower = more deterministic top_p=0.9, # nucleus sampling do_sample=True, # allow variety but controlled eos_token_id=tokenizer.eos_token_id ) decoded_output = tokenizer.decode(output[0], skip_special_tokens=True) # Try to extract JSON part only json_start = decoded_output.find("{") json_end = decoded_output.rfind("}") + 1 if json_start != -1 and json_end != -1: json_str = decoded_output[json_start:json_end] try: quiz = json.loads(json_str) print("✅ Parsed JSON output:") print(json.dumps(quiz, indent=2)) except json.JSONDecodeError: print("⚠ Raw output (invalid JSON):") print(decoded_output) else: print("⚠ Raw output (no JSON found):") print(decoded_output)