Spaces:
Sleeping
Sleeping
| from pathlib import Path | |
| from typing import Union | |
| from pypdf import PdfReader | |
| from transformers import pipeline | |
| import gradio as gr | |
| question_answerer = pipeline(task="question-answering", model="deepset/tinyroberta-squad2") | |
| def get_text_from_pdf(pdf_file: Union[str, Path]) -> str: | |
| """Read the PDF from the given path and return a string with its entire content.""" | |
| reader = PdfReader(pdf_file) | |
| # Extract text from all pages | |
| full_text = "" | |
| for page in reader.pages: | |
| full_text += page.extract_text() | |
| return full_text | |
| def answer_doc_question(pdf_file, question): | |
| pdf_text = get_text_from_pdf(pdf_file) | |
| answer = question_answerer(question, pdf_text) | |
| return answer["answer"] | |
| # Add default a file and question, so it's easy to try out the app. | |
| pdf_input = gr.File( | |
| file_types=[".pdf"], | |
| label="Upload a PDF document and ask a question about it.", | |
| ) | |
| question = gr.Textbox( | |
| label="Type a question regarding the uploaded document here.", | |
| ) | |
| gr.Interface(fn=answer_doc_question, inputs=[pdf_input, question], outputs="text").launch() |