File size: 1,283 Bytes
68803e2
 
 
940028c
68803e2
 
 
 
 
 
 
 
 
 
 
 
 
4af4ff7
1d42568
 
 
 
 
 
 
 
 
4af4ff7
68803e2
 
4af4ff7
68803e2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import gradio as gr
from pdfminer.high_level import extract_pages, extract_text
from pdfminer.layout import LTTextContainer
from transformers import pipeline

def extract_text_from_pdf(pdf_file_path):
    extracted_text = ""
    for page_layout in extract_pages(pdf_file_path):
        for element in page_layout:
            if isinstance(element, LTTextContainer):
                extracted_text += element.get_text()
    return extracted_text

def extract_text_from_pdf_file(pdf_file):
    extracted_text = extract_text_from_pdf(pdf_file.name)
    return extracted_text

def question_answering(pdf_file, question):
    extracted_text = extract_text_from_pdf(pdf_file.name)
    context = extracted_text
    question_answerer = pipeline("question-answering", model='distilbert-base-cased-distilled-squad')
    result = question_answerer(question=question, context=context)
    return result['answer']

title = 'PDF Text Extraction and Question Answering Demo'

iface = gr.Interface(fn=question_answering,
                     inputs=["file", "text"],
                     outputs="text",
                     title=title,
                     description="Upload a PDF file and ask a question about its content to get an answer.",
                     theme="peach")
iface.launch()