krishbaresha commited on
Commit
dbc37da
·
verified ·
1 Parent(s): c883909

Update pdf_utils.py

Browse files
Files changed (1) hide show
  1. pdf_utils.py +11 -4
pdf_utils.py CHANGED
@@ -1,8 +1,15 @@
1
- import fitz
2
 
3
  def extract_text_from_pdf(file_path):
 
 
 
4
  text = ""
5
- doc = fitz.open(file_path)
6
- for page in doc:
7
- text += page.get_text()
 
 
 
 
8
  return text
 
1
+ import fitz # PyMuPDF
2
 
3
  def extract_text_from_pdf(file_path):
4
+ """
5
+ Extract text from PDF using PyMuPDF
6
+ """
7
  text = ""
8
+ try:
9
+ doc = fitz.open(file_path)
10
+ for page in doc:
11
+ text += page.get_text()
12
+ except Exception as e:
13
+ text = ""
14
+ print(f"PDF extraction error: {e}")
15
  return text