Deepakkori45 commited on
Commit
4c63be3
·
verified ·
1 Parent(s): b4891e2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -10
app.py CHANGED
@@ -3,7 +3,12 @@ import pandas as pd
3
  import os
4
  from datetime import datetime
5
  import docx2txt
6
- from docx import Document
 
 
 
 
 
7
  from PyPDF2 import PdfReader
8
  import openai
9
  from dotenv import load_dotenv
@@ -38,13 +43,17 @@ def extract_text_from_docx(file):
38
  try:
39
  text = docx2txt.process(file)
40
  return text
41
- except:
42
- # If docx2txt fails, try python-docx
43
- doc = Document(file)
44
- text = ""
45
- for paragraph in doc.paragraphs:
46
- text += paragraph.text + "\n"
47
- return text
 
 
 
 
48
  except Exception as e:
49
  st.error(f"Error extracting text from DOCX: {str(e)}")
50
  return None
@@ -633,5 +642,4 @@ def main():
633
  )
634
 
635
  if __name__ == "__main__":
636
- main()
637
-
 
3
  import os
4
  from datetime import datetime
5
  import docx2txt
6
+ # Try importing Document from python-docx, but don't fail if not available
7
+ try:
8
+ from docx import Document
9
+ DOCX_AVAILABLE = True
10
+ except ImportError:
11
+ DOCX_AVAILABLE = False
12
  from PyPDF2 import PdfReader
13
  import openai
14
  from dotenv import load_dotenv
 
43
  try:
44
  text = docx2txt.process(file)
45
  return text
46
+ except Exception as e1:
47
+ # If docx2txt fails and python-docx is available, try that
48
+ if DOCX_AVAILABLE:
49
+ doc = Document(file)
50
+ text = ""
51
+ for paragraph in doc.paragraphs:
52
+ text += paragraph.text + "\n"
53
+ return text
54
+ else:
55
+ st.error("Could not process DOCX file. Please ensure python-docx is installed.")
56
+ return None
57
  except Exception as e:
58
  st.error(f"Error extracting text from DOCX: {str(e)}")
59
  return None
 
642
  )
643
 
644
  if __name__ == "__main__":
645
+ main()