Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -51,6 +51,9 @@ st.set_page_config(layout="wide")
|
|
| 51 |
import logging
|
| 52 |
import asyncio
|
| 53 |
import re
|
|
|
|
|
|
|
|
|
|
| 54 |
# Set up logging to suppress Streamlit warnings about experimental functions
|
| 55 |
logging.getLogger('streamlit').setLevel(logging.ERROR)
|
| 56 |
|
|
@@ -103,25 +106,18 @@ def convert_pdf_to_md(file):
|
|
| 103 |
|
| 104 |
def convert_docx_to_md(file):
|
| 105 |
"""
|
| 106 |
-
|
| 107 |
"""
|
| 108 |
try:
|
| 109 |
-
|
| 110 |
-
doc =
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
if not markdown_text.strip():
|
| 117 |
-
st.error("The .docx file is empty or could not be read.")
|
| 118 |
-
return None
|
| 119 |
-
|
| 120 |
-
return markdown_text
|
| 121 |
-
|
| 122 |
except Exception as e:
|
| 123 |
-
|
| 124 |
-
return None
|
| 125 |
|
| 126 |
def convert_txt_to_md(file):
|
| 127 |
"""
|
|
@@ -1962,13 +1958,7 @@ def handle_prompt(prompt):
|
|
| 1962 |
# Prevent duplicate messages in chat history
|
| 1963 |
if not any(msg["content"] == prompt for msg in st.session_state["chat_history"]):
|
| 1964 |
st.session_state.chat_history.append({"role": "user", "content": prompt})
|
| 1965 |
-
|
| 1966 |
-
if document_response:
|
| 1967 |
-
with st.chat_message("assistant"):
|
| 1968 |
-
st.markdown(document_response)
|
| 1969 |
-
st.session_state["handled"] = True # Mark as handled
|
| 1970 |
-
return
|
| 1971 |
-
|
| 1972 |
|
| 1973 |
|
| 1974 |
|
|
|
|
| 51 |
import logging
|
| 52 |
import asyncio
|
| 53 |
import re
|
| 54 |
+
from docx import Document as DocxDocument
|
| 55 |
+
|
| 56 |
+
|
| 57 |
# Set up logging to suppress Streamlit warnings about experimental functions
|
| 58 |
logging.getLogger('streamlit').setLevel(logging.ERROR)
|
| 59 |
|
|
|
|
| 106 |
|
| 107 |
def convert_docx_to_md(file):
|
| 108 |
"""
|
| 109 |
+
Extract text from a .docx file and return as a single string.
|
| 110 |
"""
|
| 111 |
try:
|
| 112 |
+
# Read the file
|
| 113 |
+
doc = DocxDocument(uploaded_file)
|
| 114 |
+
# Extract all text
|
| 115 |
+
text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
|
| 116 |
+
if not text.strip(): # Handle empty content
|
| 117 |
+
raise ValueError("The document has no content.")
|
| 118 |
+
return text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
except Exception as e:
|
| 120 |
+
raise ValueError(f"Error reading .docx file: {e}")
|
|
|
|
| 121 |
|
| 122 |
def convert_txt_to_md(file):
|
| 123 |
"""
|
|
|
|
| 1958 |
# Prevent duplicate messages in chat history
|
| 1959 |
if not any(msg["content"] == prompt for msg in st.session_state["chat_history"]):
|
| 1960 |
st.session_state.chat_history.append({"role": "user", "content": prompt})
|
| 1961 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1962 |
|
| 1963 |
|
| 1964 |
|