Spaces:
Sleeping
Sleeping
fixing packages
Browse files
product_approval_ai/document_processor.py
CHANGED
|
@@ -16,16 +16,6 @@ def read_text_from_docx(file_path: str) -> str:
|
|
| 16 |
except Exception as e:
|
| 17 |
print(f"An error occurred while reading the docx file: {e}")
|
| 18 |
return ""
|
| 19 |
-
|
| 20 |
-
import fitz # type: ignore # PyMuPDF
|
| 21 |
-
|
| 22 |
-
def convert_pdf_to_text(pdf_path):
|
| 23 |
-
text = ""
|
| 24 |
-
with fitz.open(pdf_path) as doc:
|
| 25 |
-
for page in doc:
|
| 26 |
-
text += page.get_text()
|
| 27 |
-
return text
|
| 28 |
-
|
| 29 |
|
| 30 |
def read_text_file(file_path: str) -> str:
|
| 31 |
"""
|
|
@@ -39,4 +29,15 @@ def read_text_file(file_path: str) -> str:
|
|
| 39 |
return ""
|
| 40 |
except Exception as e:
|
| 41 |
print(f"An error occurred while reading the text file: {e}")
|
| 42 |
-
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
except Exception as e:
|
| 17 |
print(f"An error occurred while reading the docx file: {e}")
|
| 18 |
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
def read_text_file(file_path: str) -> str:
|
| 21 |
"""
|
|
|
|
| 29 |
return ""
|
| 30 |
except Exception as e:
|
| 31 |
print(f"An error occurred while reading the text file: {e}")
|
| 32 |
+
return ""
|
| 33 |
+
|
| 34 |
+
'''
|
| 35 |
+
import fitz # type: ignore # PyMuPDF
|
| 36 |
+
|
| 37 |
+
def convert_pdf_to_text(pdf_path):
|
| 38 |
+
text = ""
|
| 39 |
+
with fitz.open(pdf_path) as doc:
|
| 40 |
+
for page in doc:
|
| 41 |
+
text += page.get_text()
|
| 42 |
+
return text
|
| 43 |
+
'''
|