DrDavis commited on
Commit
90c6195
·
1 Parent(s): b56accb

Complete project.

Browse files
Files changed (1) hide show
  1. app.py +24 -3
app.py CHANGED
@@ -1,18 +1,39 @@
 
 
 
 
 
1
  import gradio as gr
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  def answer_doc_question(pdf_file, question):
4
  pdf_text = get_text_from_pdf(pdf_file)
5
  answer = question_answerer(question, pdf_text)
6
  return answer["answer"]
7
 
 
8
  # Add default a file and question, so it's easy to try out the app.
9
  pdf_input = gr.File(
 
10
  file_types=[".pdf"],
11
  label="Upload a PDF document and ask a question about it.",
12
  )
13
  question = gr.Textbox(
 
14
  label="Type a question regarding the uploaded document here.",
15
  )
16
- gr.Interface(
17
- fn=answer_doc_question, inputs=[pdf_input, question], outputs="text"
18
- ).launch()
 
1
+ from pathlib import Path
2
+ from typing import Union
3
+
4
+ from pypdf import PdfReader
5
+ from transformers import pipeline
6
  import gradio as gr
7
 
8
+
9
+ question_answerer = pipeline(task="question-answering", model="deepset/tinyroberta-squad2")
10
+
11
+
12
+ def get_text_from_pdf(pdf_file: Union[str, Path]) -> str:
13
+ """Read the PDF from the given path and return a string with its entire content."""
14
+ reader = PdfReader(pdf_file)
15
+
16
+ # Extract text from all pages
17
+ full_text = ""
18
+ for page in reader.pages:
19
+ full_text += page.extract_text()
20
+ return full_text
21
+
22
+
23
  def answer_doc_question(pdf_file, question):
24
  pdf_text = get_text_from_pdf(pdf_file)
25
  answer = question_answerer(question, pdf_text)
26
  return answer["answer"]
27
 
28
+
29
  # Add default a file and question, so it's easy to try out the app.
30
  pdf_input = gr.File(
31
+ value="https://ris.uni-paderborn.de/download/30236/30237/author_version.pdf",
32
  file_types=[".pdf"],
33
  label="Upload a PDF document and ask a question about it.",
34
  )
35
  question = gr.Textbox(
36
+ value="What is mobile-env?",
37
  label="Type a question regarding the uploaded document here.",
38
  )
39
+ gr.Interface(fn=answer_doc_question, inputs=[pdf_input, question], outputs="text").launch()