Rosan144's picture
Create app.py
f23cc4f verified
import torch
import gradio as gr
import os
from PyPDF2 import PdfReader
from docx import Document
import io
# Use a pipeline as a high-level helper
from transformers import pipeline
question_answer = pipeline("question-answering", model="deepset/roberta-base-squad2")
def extract_file_content(file_obj):
filename = file_obj.name
file_ext = filename.split('.')[-1].lower()
try:
if file_ext == "pdf":
reader = PdfReader(file_obj)
text = ""
for page in reader.pages:
text += page.extract_text() + "\n"
return text.strip()
elif file_ext == "txt":
return file_obj.read().decode('utf-8')
elif file_ext == "docx":
doc = Document(io.BytesIO(file_obj.read()))
text = ""
for para in doc.paragraphs:
text += para.text + "\n"
return text.strip()
else:
return "Unsupported file format. Please upload PDF, TXT, or DOCX."
except Exception as e:
return f"Error reading file: {str(e)}"
def get_ans(file,question):
context = extract_file_content(file)
answer = question_answer(question=question ,context=context)
return answer["answer"]
demo = gr.Interface(
fn=get_ans,
inputs=[gr.File(label="Upload Your File"), gr.Textbox(label="Enter question",lines=4)],
outputs=[gr.Textbox(label="Answer", lines=5)],
title="@RosangenAi Project 3: Document question answer")
demo.launch()