Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,12 @@ import pandas as pd
|
|
| 3 |
import os
|
| 4 |
from datetime import datetime
|
| 5 |
import docx2txt
|
| 6 |
-
from docx
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
from PyPDF2 import PdfReader
|
| 8 |
import openai
|
| 9 |
from dotenv import load_dotenv
|
|
@@ -38,13 +43,17 @@ def extract_text_from_docx(file):
|
|
| 38 |
try:
|
| 39 |
text = docx2txt.process(file)
|
| 40 |
return text
|
| 41 |
-
except:
|
| 42 |
-
# If docx2txt fails
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
except Exception as e:
|
| 49 |
st.error(f"Error extracting text from DOCX: {str(e)}")
|
| 50 |
return None
|
|
@@ -633,5 +642,4 @@ def main():
|
|
| 633 |
)
|
| 634 |
|
| 635 |
if __name__ == "__main__":
|
| 636 |
-
main()
|
| 637 |
-
|
|
|
|
| 3 |
import os
|
| 4 |
from datetime import datetime
|
| 5 |
import docx2txt
|
| 6 |
+
# Try importing Document from python-docx, but don't fail if not available
|
| 7 |
+
try:
|
| 8 |
+
from docx import Document
|
| 9 |
+
DOCX_AVAILABLE = True
|
| 10 |
+
except ImportError:
|
| 11 |
+
DOCX_AVAILABLE = False
|
| 12 |
from PyPDF2 import PdfReader
|
| 13 |
import openai
|
| 14 |
from dotenv import load_dotenv
|
|
|
|
| 43 |
try:
|
| 44 |
text = docx2txt.process(file)
|
| 45 |
return text
|
| 46 |
+
except Exception as e1:
|
| 47 |
+
# If docx2txt fails and python-docx is available, try that
|
| 48 |
+
if DOCX_AVAILABLE:
|
| 49 |
+
doc = Document(file)
|
| 50 |
+
text = ""
|
| 51 |
+
for paragraph in doc.paragraphs:
|
| 52 |
+
text += paragraph.text + "\n"
|
| 53 |
+
return text
|
| 54 |
+
else:
|
| 55 |
+
st.error("Could not process DOCX file. Please ensure python-docx is installed.")
|
| 56 |
+
return None
|
| 57 |
except Exception as e:
|
| 58 |
st.error(f"Error extracting text from DOCX: {str(e)}")
|
| 59 |
return None
|
|
|
|
| 642 |
)
|
| 643 |
|
| 644 |
if __name__ == "__main__":
|
| 645 |
+
main()
|
|
|