Wajahat698 commited on
Commit
65ebf46
·
verified ·
1 Parent(s): 3d3bdda

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -23
app.py CHANGED
@@ -51,6 +51,9 @@ st.set_page_config(layout="wide")
51
  import logging
52
  import asyncio
53
  import re
 
 
 
54
  # Set up logging to suppress Streamlit warnings about experimental functions
55
  logging.getLogger('streamlit').setLevel(logging.ERROR)
56
 
@@ -103,25 +106,18 @@ def convert_pdf_to_md(file):
103
 
104
  def convert_docx_to_md(file):
105
  """
106
- Convert a .docx file to Markdown text with error handling.
107
  """
108
  try:
109
- file.seek(0) # Reset file pointer to the beginning
110
- doc = Document(file) # Directly load the file
111
-
112
- markdown_text = ""
113
- for paragraph in doc.paragraphs:
114
- markdown_text += paragraph.text.strip() + "\n\n" # Add spacing between paragraphs
115
-
116
- if not markdown_text.strip():
117
- st.error("The .docx file is empty or could not be read.")
118
- return None
119
-
120
- return markdown_text
121
-
122
  except Exception as e:
123
- st.error(f"Error converting .docx file: {e}")
124
- return None
125
 
126
  def convert_txt_to_md(file):
127
  """
@@ -1962,13 +1958,7 @@ def handle_prompt(prompt):
1962
  # Prevent duplicate messages in chat history
1963
  if not any(msg["content"] == prompt for msg in st.session_state["chat_history"]):
1964
  st.session_state.chat_history.append({"role": "user", "content": prompt})
1965
- document_response = handle_document_query(prompt)
1966
- if document_response:
1967
- with st.chat_message("assistant"):
1968
- st.markdown(document_response)
1969
- st.session_state["handled"] = True # Mark as handled
1970
- return
1971
-
1972
 
1973
 
1974
 
 
51
  import logging
52
  import asyncio
53
  import re
54
+ from docx import Document as DocxDocument
55
+
56
+
57
  # Set up logging to suppress Streamlit warnings about experimental functions
58
  logging.getLogger('streamlit').setLevel(logging.ERROR)
59
 
 
106
 
107
  def convert_docx_to_md(file):
108
  """
109
+ Extract text from a .docx file and return as a single string.
110
  """
111
  try:
112
+ # Read the file
113
+ doc = DocxDocument(uploaded_file)
114
+ # Extract all text
115
+ text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
116
+ if not text.strip(): # Handle empty content
117
+ raise ValueError("The document has no content.")
118
+ return text
 
 
 
 
 
 
119
  except Exception as e:
120
+ raise ValueError(f"Error reading .docx file: {e}")
 
121
 
122
  def convert_txt_to_md(file):
123
  """
 
1958
  # Prevent duplicate messages in chat history
1959
  if not any(msg["content"] == prompt for msg in st.session_state["chat_history"]):
1960
  st.session_state.chat_history.append({"role": "user", "content": prompt})
1961
+
 
 
 
 
 
 
1962
 
1963
 
1964