import gradio as gr
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Load model and tokenizer from local directory
model_path = "./gpt2-finetuned-uet"  # Path to your saved model

def load_model():
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = GPT2LMHeadModel.from_pretrained(model_path).to(device)
    tokenizer = GPT2Tokenizer.from_pretrained(model_path)
    return model, tokenizer, device

model, tokenizer, device = load_model()

def generate_response(question):
    try:
        # Create prompt with special tokens
        prompt = f"<|startoftext|>Question: {question} Answer:"
        
        # Tokenize and generate
        inputs = tokenizer(prompt, return_tensors="pt").to(device)
        output = model.generate(
            **inputs,
            max_length=256,
            pad_token_id=tokenizer.eos_token_id,
            do_sample=True,
            top_k=50,
            top_p=0.95,
            temperature=0.7
        )
        
        # Decode and clean output
        full_response = tokenizer.decode(output[0], skip_special_tokens=True)
        answer = full_response.split("Answer:")[-1].strip()
        return answer
    
    except Exception as e:
        return f"Error generating response: {str(e)}"

# Gradio interface
demo = gr.Interface(
    fn=generate_response,
    inputs=gr.Textbox(lines=2, placeholder="Ask me anything about UET Mardan..."),
    outputs="text",
    title="UET Mardan AI Chatbot",
    description="A GPT-2 model fine-tuned on UET Mardan questions and answers",
    examples=[
        ["What programs does UET Mardan offer?"],
        ["How can I apply for admission?"],
        ["What are the contact details of UET Mardan?"]
    ],
    css=".gradio-container {background-color: #f0f2f6}",
    allow_flagging="never"
)

# For Hugging Face Spaces deployment
if __name__ == "__main__":
    demo.launch()