mkoot007 commited on
Commit
0850daa
·
1 Parent(s): 7518568

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -11
app.py CHANGED
@@ -1,21 +1,15 @@
1
  import gradio as gr
2
  import re
3
  from docx import Document
4
- from PyPDF2 import PdfFileReader
5
 
6
  # Function to extract text from a PDF file
7
  def extract_text_from_pdf(pdf_file):
8
  text = ""
9
- try:
10
- pdf = PdfFileReader(pdf_file)
11
- if pdf.numPages == 0:
12
- raise ValueError("The PDF file is empty.")
13
- for page_num in range(pdf.getNumPages()):
14
- page = pdf.getPage(page_num)
15
- text += page.extractText()
16
- return text
17
- except Exception as e:
18
- raise ValueError("Error reading the PDF file: " + str(e))
19
 
20
  # Function to extract text from a DOCX file
21
  def extract_text_from_docx(docx_file):
 
1
  import gradio as gr
2
  import re
3
  from docx import Document
4
+ from PyPDF2 import PdfReader # Use PdfReader from PyPDF2
5
 
6
  # Function to extract text from a PDF file
7
  def extract_text_from_pdf(pdf_file):
8
  text = ""
9
+ pdf = PdfReader(pdf_file)
10
+ for page in pdf.pages:
11
+ text += page.extract_text()
12
+ return text
 
 
 
 
 
 
13
 
14
  # Function to extract text from a DOCX file
15
  def extract_text_from_docx(docx_file):