GERNET Enody commited on
Multiple pdf file correction
Browse files- utilities/convert.py +6 -3
utilities/convert.py
CHANGED
|
@@ -51,6 +51,9 @@ def convert_pdf_to_text(file):
|
|
| 51 |
images = convert_from_bytes(file)
|
| 52 |
else:
|
| 53 |
images = convert_from_path(file)
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
images = convert_from_bytes(file)
|
| 52 |
else:
|
| 53 |
images = convert_from_path(file)
|
| 54 |
+
extraction = []
|
| 55 |
+
for img in images:
|
| 56 |
+
text = pytesseract.image_to_string(img)
|
| 57 |
+
extraction.append(text)
|
| 58 |
+
|
| 59 |
+
return " ".join(extraction)
|