import gradio as gr import torch from transformers import T5Tokenizer, T5ForConditionalGeneration # Load model and tokenizer MODEL_NAME = "google/flan-t5-base" # You can change to "t5-small" if memory is limited tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME) model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) def generate_answer(question, context): if not question or not context: return "Please provide both question and context." input_text = f"question: {question} context: {context}" inputs = tokenizer( input_text, return_tensors="pt", max_length=512, truncation=True ).to(device) outputs = model.generate( inputs["input_ids"], max_length=150, num_beams=4, early_stopping=True ) answer = tokenizer.decode(outputs[0], skip_special_tokens=True) return answer # Gradio Interface interface = gr.Interface( fn=generate_answer, inputs=[ gr.Textbox(lines=2, placeholder="Enter your question here...", label="Question"), gr.Textbox(lines=6, placeholder="Enter context passage here...", label="Context"), ], outputs=gr.Textbox(label="Generated Answer"), title="Generative Question Answering using T5", description="Enter a question and a context passage. The model will generate an answer using FLAN-T5." ) interface.launch()