AptlyDigital commited on
Commit
8eb157f
·
verified ·
1 Parent(s): 1b50722

Update documents/loader.py

Browse files
Files changed (1) hide show
  1. documents/loader.py +17 -17
documents/loader.py CHANGED
@@ -1,17 +1,17 @@
1
- import PyPDF2
2
- from PIL import Image
3
- import pytesseract
4
-
5
- class DocumentLoader:
6
- def load_pdf(self, file_path):
7
- text = ""
8
- with open(file_path, 'rb') as file:
9
- pdf_reader = PyPDF2.PdfReader(file)
10
- for page in pdf_reader.pages:
11
- text += page.extract_text()
12
- return text
13
-
14
- def load_image(self, file_path):
15
- image = Image.open(file_path)
16
- text = pytesseract.image_to_string(image)
17
- return text
 
1
+ import pypdf # Changed from PyPDF2
2
+ from PIL import Image
3
+ import pytesseract
4
+
5
+ class DocumentLoader:
6
+ def load_pdf(self, file_path):
7
+ text = ""
8
+ with open(file_path, 'rb') as file:
9
+ pdf_reader = pypdf.PdfReader(file) # Changed here
10
+ for page in pdf_reader.pages:
11
+ text += page.extract_text()
12
+ return text
13
+
14
+ def load_image(self, file_path):
15
+ image = Image.open(file_path)
16
+ text = pytesseract.image_to_string(image)
17
+ return text