Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -2079,29 +2079,24 @@ def clean_and_format_markdown(raw_text):
|
|
| 2079 |
|
| 2080 |
|
| 2081 |
def deep_clean_text(text):
|
| 2082 |
-
# Normalize Unicode
|
| 2083 |
text = unicodedata.normalize('NFKC', text)
|
| 2084 |
-
|
| 2085 |
-
# Remove zero-width characters and other invisible Unicode chars (ZWSP: \u200B, ZWNBSP: \uFEFF, etc.)
|
| 2086 |
text = re.sub(r'[\u200B\uFEFF\u200C\u200D]', '', text)
|
| 2087 |
-
|
| 2088 |
-
|
|
|
|
| 2089 |
text = text.replace('\\n', ' ').replace('\n', ' ')
|
| 2090 |
-
|
| 2091 |
-
# Remove HTML tags if any
|
| 2092 |
text = re.sub(r'<[^>]*>', '', text)
|
| 2093 |
-
|
| 2094 |
-
# Insert space after punctuation if missing
|
| 2095 |
text = re.sub(r'([.,!?])(\S)', r'\1 \2', text)
|
| 2096 |
-
|
| 2097 |
-
# Fix spacing between numbers and letters
|
| 2098 |
text = re.sub(r'(\d)([A-Za-z])', r'\1 \2', text)
|
| 2099 |
text = re.sub(r'([A-Za-z])(\d)', r'\1 \2', text)
|
| 2100 |
-
|
| 2101 |
-
# Replace multiple spaces with a single space
|
| 2102 |
text = re.sub(r'\s+', ' ', text).strip()
|
| 2103 |
-
|
| 2104 |
-
# You can also replace unusual hyphens or dashes with a standard dash
|
| 2105 |
text = text.replace('−', '-')
|
| 2106 |
|
| 2107 |
return text
|
|
@@ -2277,12 +2272,9 @@ def handle_prompt(prompt):
|
|
| 2277 |
combined_text = full_response
|
| 2278 |
with response_placeholder:
|
| 2279 |
with st.chat_message("assistant"):
|
| 2280 |
-
|
| 2281 |
-
|
| 2282 |
-
|
| 2283 |
-
</div>
|
| 2284 |
-
"""
|
| 2285 |
-
st.write(html_content,unsafe_allow_html=True)
|
| 2286 |
st.session_state.chat_history.append({"role": "assistant", "content": combined_text})
|
| 2287 |
copy_to_clipboard(combined_text)
|
| 2288 |
|
|
|
|
| 2079 |
|
| 2080 |
|
| 2081 |
def deep_clean_text(text):
|
| 2082 |
+
# Normalize Unicode
|
| 2083 |
text = unicodedata.normalize('NFKC', text)
|
| 2084 |
+
# Remove zero-width and invisible chars
|
|
|
|
| 2085 |
text = re.sub(r'[\u200B\uFEFF\u200C\u200D]', '', text)
|
| 2086 |
+
# Remove control chars
|
| 2087 |
+
text = re.sub(r'[\x00-\x1F\x7F-\x9F]', '', text)
|
| 2088 |
+
# Replace newlines
|
| 2089 |
text = text.replace('\\n', ' ').replace('\n', ' ')
|
| 2090 |
+
# Remove HTML tags
|
|
|
|
| 2091 |
text = re.sub(r'<[^>]*>', '', text)
|
| 2092 |
+
# Space after punctuation
|
|
|
|
| 2093 |
text = re.sub(r'([.,!?])(\S)', r'\1 \2', text)
|
| 2094 |
+
# Space between numbers and letters
|
|
|
|
| 2095 |
text = re.sub(r'(\d)([A-Za-z])', r'\1 \2', text)
|
| 2096 |
text = re.sub(r'([A-Za-z])(\d)', r'\1 \2', text)
|
| 2097 |
+
# Normalize spaces
|
|
|
|
| 2098 |
text = re.sub(r'\s+', ' ', text).strip()
|
| 2099 |
+
# Normalize dashes
|
|
|
|
| 2100 |
text = text.replace('−', '-')
|
| 2101 |
|
| 2102 |
return text
|
|
|
|
| 2272 |
combined_text = full_response
|
| 2273 |
with response_placeholder:
|
| 2274 |
with st.chat_message("assistant"):
|
| 2275 |
+
logging.debug(repr(combined_text))
|
| 2276 |
+
st.text(combined_text)
|
| 2277 |
+
#st.write(combined_text,unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
| 2278 |
st.session_state.chat_history.append({"role": "assistant", "content": combined_text})
|
| 2279 |
copy_to_clipboard(combined_text)
|
| 2280 |
|