Spaces:
Runtime error
Runtime error
Update documents/loader.py
Browse files- documents/loader.py +17 -17
documents/loader.py
CHANGED
|
@@ -1,17 +1,17 @@
|
|
| 1 |
-
import PyPDF2
|
| 2 |
-
from PIL import Image
|
| 3 |
-
import pytesseract
|
| 4 |
-
|
| 5 |
-
class DocumentLoader:
|
| 6 |
-
def load_pdf(self, file_path):
|
| 7 |
-
text = ""
|
| 8 |
-
with open(file_path, 'rb') as file:
|
| 9 |
-
pdf_reader =
|
| 10 |
-
for page in pdf_reader.pages:
|
| 11 |
-
text += page.extract_text()
|
| 12 |
-
return text
|
| 13 |
-
|
| 14 |
-
def load_image(self, file_path):
|
| 15 |
-
image = Image.open(file_path)
|
| 16 |
-
text = pytesseract.image_to_string(image)
|
| 17 |
-
return text
|
|
|
|
| 1 |
+
import pypdf # Changed from PyPDF2
|
| 2 |
+
from PIL import Image
|
| 3 |
+
import pytesseract
|
| 4 |
+
|
| 5 |
+
class DocumentLoader:
|
| 6 |
+
def load_pdf(self, file_path):
|
| 7 |
+
text = ""
|
| 8 |
+
with open(file_path, 'rb') as file:
|
| 9 |
+
pdf_reader = pypdf.PdfReader(file) # Changed here
|
| 10 |
+
for page in pdf_reader.pages:
|
| 11 |
+
text += page.extract_text()
|
| 12 |
+
return text
|
| 13 |
+
|
| 14 |
+
def load_image(self, file_path):
|
| 15 |
+
image = Image.open(file_path)
|
| 16 |
+
text = pytesseract.image_to_string(image)
|
| 17 |
+
return text
|