question-answer / app.py
DrDavis's picture
Getting everyting ready.
77bf3a4
raw
history blame
1.09 kB
from pathlib import Path
from typing import Union
from pypdf import PdfReader
from transformers import pipeline
import gradio as gr
question_answerer = pipeline(task="question-answering", model="deepset/tinyroberta-squad2")
def get_text_from_pdf(pdf_file: Union[str, Path]) -> str:
"""Read the PDF from the given path and return a string with its entire content."""
reader = PdfReader(pdf_file)
# Extract text from all pages
full_text = ""
for page in reader.pages:
full_text += page.extract_text()
return full_text
def answer_doc_question(pdf_file, question):
pdf_text = get_text_from_pdf(pdf_file)
answer = question_answerer(question, pdf_text)
return answer["answer"]
# Add default a file and question, so it's easy to try out the app.
pdf_input = gr.File(
file_types=[".pdf"],
label="Upload a PDF document and ask a question about it.",
)
question = gr.Textbox(
label="Type a question regarding the uploaded document here.",
)
gr.Interface(fn=answer_doc_question, inputs=[pdf_input, question], outputs="text").launch()