import torch import gradio as gr import os from PyPDF2 import PdfReader from docx import Document import io # Use a pipeline as a high-level helper from transformers import pipeline question_answer = pipeline("question-answering", model="deepset/roberta-base-squad2") def extract_file_content(file_obj): filename = file_obj.name file_ext = filename.split('.')[-1].lower() try: if file_ext == "pdf": reader = PdfReader(file_obj) text = "" for page in reader.pages: text += page.extract_text() + "\n" return text.strip() elif file_ext == "txt": return file_obj.read().decode('utf-8') elif file_ext == "docx": doc = Document(io.BytesIO(file_obj.read())) text = "" for para in doc.paragraphs: text += para.text + "\n" return text.strip() else: return "Unsupported file format. Please upload PDF, TXT, or DOCX." except Exception as e: return f"Error reading file: {str(e)}" def get_ans(file,question): context = extract_file_content(file) answer = question_answer(question=question ,context=context) return answer["answer"] demo = gr.Interface( fn=get_ans, inputs=[gr.File(label="Upload Your File"), gr.Textbox(label="Enter question",lines=4)], outputs=[gr.Textbox(label="Answer", lines=5)], title="@RosangenAi Project 3: Document question answer") demo.launch()