import gradio as gr
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Load model and tokenizer
MODEL_NAME = "google/flan-t5-base"   # You can change to "t5-small" if memory is limited

tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)
model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def generate_answer(question, context):
    if not question or not context:
        return "Please provide both question and context."

    input_text = f"question: {question} context: {context}"

    inputs = tokenizer(
        input_text,
        return_tensors="pt",
        max_length=512,
        truncation=True
    ).to(device)

    outputs = model.generate(
        inputs["input_ids"],
        max_length=150,
        num_beams=4,
        early_stopping=True
    )

    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return answer


# Gradio Interface
interface = gr.Interface(
    fn=generate_answer,
    inputs=[
        gr.Textbox(lines=2, placeholder="Enter your question here...", label="Question"),
        gr.Textbox(lines=6, placeholder="Enter context passage here...", label="Context"),
    ],
    outputs=gr.Textbox(label="Generated Answer"),
    title="Generative Question Answering using T5",
    description="Enter a question and a context passage. The model will generate an answer using FLAN-T5."
)

interface.launch()