Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from pdfminer.high_level import extract_pages, extract_text | |
| from pdfminer.layout import LTTextContainer | |
| from transformers import pipeline | |
| def extract_text_from_pdf(pdf_file_path): | |
| extracted_text = "" | |
| for page_layout in extract_pages(pdf_file_path): | |
| for element in page_layout: | |
| if isinstance(element, LTTextContainer): | |
| extracted_text += element.get_text() | |
| return extracted_text | |
| def extract_text_from_pdf_file(pdf_file): | |
| extracted_text = extract_text_from_pdf(pdf_file.name) | |
| return extracted_text | |
| def question_answering(pdf_file, question): | |
| extracted_text = extract_text_from_pdf(pdf_file.name) | |
| context = extracted_text | |
| question_answerer = pipeline("question-answering", model='distilbert-base-cased-distilled-squad') | |
| result = question_answerer(question=question, context=context) | |
| return result['answer'] | |
| title = 'PDF Text Extraction and Question Answering Demo' | |
| iface = gr.Interface(fn=question_answering, | |
| inputs=["file", "text"], | |
| outputs="text", | |
| title=title, | |
| description="Upload a PDF file and ask a question about its content to get an answer.", | |
| theme="peach") | |
| iface.launch() | |