Akshayram1 commited on
Commit
68803e2
·
verified ·
1 Parent(s): b2bb942

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -0
app.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from pdfminer.high_level import extract_pages, extract_text
3
+ from pdfminer.layout import LTTextContainer
4
+
5
+ title = 'PDF Text Extraction Demo'
6
+
7
+ def extract_text_from_pdf(pdf_file_path):
8
+ extracted_text = ""
9
+ for page_layout in extract_pages(pdf_file_path):
10
+ for element in page_layout:
11
+ if isinstance(element, LTTextContainer):
12
+ extracted_text += element.get_text()
13
+ return extracted_text
14
+
15
+ def extract_text_from_pdf_file(pdf_file):
16
+ extracted_text = extract_text_from_pdf(pdf_file.name)
17
+ return extracted_text
18
+
19
+ iface = gr.Interface(fn=extract_text_from_pdf_file,
20
+ inputs="file",
21
+ outputs="text",
22
+ title=title,
23
+ description="Upload a PDF file to extract its text.",
24
+ theme="peach")
25
+
26
+ iface.launch()