mkoot007 commited on
Commit
295f37f
·
1 Parent(s): bbd4821

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -1,15 +1,14 @@
1
  import gradio as gr
2
  import re
3
- from docx import Document # Use python-docx to read DOCX files
4
- from PyPDF2 import PdfFileReader # Import PdfFileReader from PyPDF2
5
 
6
  # Function to extract text from a PDF file
7
  def extract_text_from_pdf(pdf_file):
8
  text = ""
9
- pdf = PdfFileReader(pdf_file)
10
- for page_num in range(pdf.getNumPages()):
11
- page = pdf.getPage(page_num)
12
- text += page.extractText()
13
  return text
14
 
15
  # Function to extract text from a DOCX file
@@ -65,4 +64,4 @@ iface = gr.Interface(
65
  )
66
 
67
  # Deploy the Gradio interface
68
- iface.launch(share=True)
 
1
  import gradio as gr
2
  import re
3
+ from docx import Document
4
+ from PyPDF2 import PdfReader # Use PdfReader from PyPDF2
5
 
6
  # Function to extract text from a PDF file
7
  def extract_text_from_pdf(pdf_file):
8
  text = ""
9
+ pdf = PdfReader(pdf_file)
10
+ for page in pdf.pages:
11
+ text += page.extract_text()
 
12
  return text
13
 
14
  # Function to extract text from a DOCX file
 
64
  )
65
 
66
  # Deploy the Gradio interface
67
+ iface.launch(share=True)