Spaces:

Rosan144
/

Document_Question_Answer

Sleeping

File size: 1,510 Bytes

f23cc4f

import torch
import gradio as gr 
import os
from PyPDF2 import PdfReader
from docx import Document
import io


# Use a pipeline as a high-level helper
from transformers import pipeline

question_answer = pipeline("question-answering", model="deepset/roberta-base-squad2")



def extract_file_content(file_obj):
    filename = file_obj.name
    file_ext = filename.split('.')[-1].lower()

    try:
        if file_ext == "pdf":
            reader = PdfReader(file_obj)
            text = ""
            for page in reader.pages:
                text += page.extract_text() + "\n"
            return text.strip()

        elif file_ext == "txt":
            return file_obj.read().decode('utf-8')

        elif file_ext == "docx":
            doc = Document(io.BytesIO(file_obj.read()))
            text = ""
            for para in doc.paragraphs:
                text += para.text + "\n"
            return text.strip()

        else:
            return "Unsupported file format. Please upload PDF, TXT, or DOCX."

    except Exception as e:
        return f"Error reading file: {str(e)}"

def get_ans(file,question):
    context = extract_file_content(file)
    answer = question_answer(question=question ,context=context)
    return answer["answer"]


demo = gr.Interface(
    fn=get_ans,
    inputs=[gr.File(label="Upload Your File"), gr.Textbox(label="Enter question",lines=4)],
    outputs=[gr.Textbox(label="Answer", lines=5)],
    title="@RosangenAi Project 3: Document question answer")

demo.launch()