Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -355,50 +355,47 @@ def extract_name(email):
|
|
| 355 |
return email.split('@')[0].capitalize()
|
| 356 |
|
| 357 |
def clean_text(text):
|
| 358 |
-
|
|
|
|
|
|
|
|
|
|
| 359 |
|
| 360 |
-
# Remove all HTML tags
|
| 361 |
text = re.sub(r'<[^>]*>', '', text)
|
| 362 |
-
# Remove any remaining < or > characters
|
| 363 |
-
text = text.replace('<', '').replace('>', '')
|
| 364 |
-
text = re.sub(r'<[^>]+>', '', text)
|
| 365 |
-
text = re.sub(r'(\d+)\s*(B|M|T|billion|million|trillion)', lambda m: f"{m.group(1)} {m.group(2)}", text)
|
| 366 |
-
text = re.sub(r'(\d)\s*([a-zA-Z])', r'\1 \2', text) # Fix numbers next to letters
|
| 367 |
-
text = re.sub(r'(\d+)\s+([a-zA-Z])', r'\1 \2', text) # Fix broken numbers and words
|
| 368 |
-
text = re.sub(r'<span class="(mathnormal|mord)">.*?</span>', '', text, flags=re.DOTALL)
|
| 369 |
-
|
| 370 |
-
# Split the text into paragraphs
|
| 371 |
-
paragraphs = text.split('\n\n')
|
| 372 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
cleaned_paragraphs = []
|
| 374 |
for paragraph in paragraphs:
|
| 375 |
lines = paragraph.split('\n')
|
| 376 |
cleaned_lines = []
|
| 377 |
for line in lines:
|
| 378 |
-
#
|
| 379 |
-
if line.strip().startswith('
|
| 380 |
cleaned_line = line.strip()
|
| 381 |
else:
|
| 382 |
-
#
|
| 383 |
-
cleaned_line = re.sub(r'\*
|
| 384 |
cleaned_line = re.sub(r'([a-z])([A-Z])', r'\1 \2', cleaned_line)
|
| 385 |
|
| 386 |
-
# Handle bullet points
|
| 387 |
-
if cleaned_line.strip().startswith('-'):
|
| 388 |
-
cleaned_line = '\n' + cleaned_line.strip()
|
| 389 |
-
|
| 390 |
-
# Remove extra spaces
|
| 391 |
-
|
| 392 |
cleaned_lines.append(cleaned_line)
|
| 393 |
|
| 394 |
-
# Join
|
| 395 |
cleaned_paragraph = '\n'.join(cleaned_lines)
|
| 396 |
cleaned_paragraphs.append(cleaned_paragraph)
|
| 397 |
|
| 398 |
-
# Join
|
| 399 |
-
cleaned_text = '\n\n'.join(
|
| 400 |
return cleaned_text
|
| 401 |
-
|
| 402 |
|
| 403 |
|
| 404 |
|
|
@@ -2028,9 +2025,8 @@ def handle_prompt(prompt):
|
|
| 2028 |
|
| 2029 |
cleaned_text = clean_text(full_response)
|
| 2030 |
trust_tip, suggestion = get_trust_tip_and_suggestion()
|
| 2031 |
-
formatted_text = format_links_with_escaping(cleaned_text)
|
| 2032 |
|
| 2033 |
-
combined_text = f"{
|
| 2034 |
with response_placeholder:
|
| 2035 |
with st.chat_message("assistant"):
|
| 2036 |
st.markdown(combined_text, unsafe_allow_html=False)
|
|
|
|
| 355 |
return email.split('@')[0].capitalize()
|
| 356 |
|
| 357 |
def clean_text(text):
|
| 358 |
+
"""
|
| 359 |
+
Clean text to remove broken formatting while preserving valid Markdown.
|
| 360 |
+
"""
|
| 361 |
+
text = text.replace('\\n', '\n') # Normalize newlines
|
| 362 |
|
| 363 |
+
# Remove all HTML tags (but preserve Markdown formatting)
|
| 364 |
text = re.sub(r'<[^>]*>', '', text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 365 |
|
| 366 |
+
# Fix spacing issues between numbers and words
|
| 367 |
+
text = re.sub(r'(\d+)\s*(B|M|T|billion|million|trillion)', r'\1 \2", text)
|
| 368 |
+
text = re.sub(r'(\d)\s*([a-zA-Z])', r'\1 \2', text)
|
| 369 |
+
text = re.sub(r'([a-zA-Z])\s*(\d)', r'\1 \2', text)
|
| 370 |
+
|
| 371 |
+
# Preserve Markdown links and formatting
|
| 372 |
+
text = re.sub(r'\*+', '*', text) # Remove excess asterisks but keep single or double for Markdown
|
| 373 |
+
|
| 374 |
+
# Split text into paragraphs and clean each paragraph
|
| 375 |
+
paragraphs = text.split('\n\n')
|
| 376 |
cleaned_paragraphs = []
|
| 377 |
for paragraph in paragraphs:
|
| 378 |
lines = paragraph.split('\n')
|
| 379 |
cleaned_lines = []
|
| 380 |
for line in lines:
|
| 381 |
+
# Handle bullet points properly
|
| 382 |
+
if line.strip().startswith('-'):
|
| 383 |
cleaned_line = line.strip()
|
| 384 |
else:
|
| 385 |
+
# Preserve valid Markdown formatting
|
| 386 |
+
cleaned_line = re.sub(r'[^\w\s\*_\[\]\(\)-]', '', line)
|
| 387 |
cleaned_line = re.sub(r'([a-z])([A-Z])', r'\1 \2', cleaned_line)
|
| 388 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
cleaned_lines.append(cleaned_line)
|
| 390 |
|
| 391 |
+
# Join lines back into paragraphs
|
| 392 |
cleaned_paragraph = '\n'.join(cleaned_lines)
|
| 393 |
cleaned_paragraphs.append(cleaned_paragraph)
|
| 394 |
|
| 395 |
+
# Join all paragraphs together
|
| 396 |
+
cleaned_text = '\n\n'.join(cleaned_paragraphs)
|
| 397 |
return cleaned_text
|
| 398 |
+
|
| 399 |
|
| 400 |
|
| 401 |
|
|
|
|
| 2025 |
|
| 2026 |
cleaned_text = clean_text(full_response)
|
| 2027 |
trust_tip, suggestion = get_trust_tip_and_suggestion()
|
|
|
|
| 2028 |
|
| 2029 |
+
combined_text = f"{cleaned_text}\n\n---\n\n**Trust Tip**: {trust_tip}\n\n**Suggestion**: {suggestion}"
|
| 2030 |
with response_placeholder:
|
| 2031 |
with st.chat_message("assistant"):
|
| 2032 |
st.markdown(combined_text, unsafe_allow_html=False)
|