from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# Load model
tokenizer = AutoTokenizer.from_pretrained("./final_model")
model = AutoModelForSeq2SeqLM.from_pretrained("./final_model")

# Move to CPU if needed
device = torch.device('cpu')
model = model.to(device)

# Test function
def ask_question(question, context):
    input_text = f"question: {question} context: {context}"
    inputs = tokenizer(input_text, return_tensors="pt", max_length=384, truncation=True)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    
    with torch.no_grad():
        outputs = model.generate(**inputs, max_length=128, num_beams=3)
    
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Test examples
test_cases = [
    {
        "context": "In the name of Allah, the Most Gracious, the Most Merciful.",
        "question": "What are the attributes of Allah mentioned?"
    },
    {
        "context": "And We have certainly made the Quran easy for remembrance, so is there any who will remember?",
        "question": "What has Allah made easy?"
    }
]

print("Testing model:\n")
for test in test_cases:
    answer = ask_question(test["question"], test["context"])
    print(f"Context: {test['context']}")
    print(f"Question: {test['question']}")
    print(f"Answer: {answer}")
    print("-" * 80)