Wajahat698 commited on
Commit
cce6cfe
·
verified ·
1 Parent(s): 47b988c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -28
app.py CHANGED
@@ -355,50 +355,47 @@ def extract_name(email):
355
  return email.split('@')[0].capitalize()
356
 
357
  def clean_text(text):
358
- text = text.replace('\\n', '\n')
 
 
 
359
 
360
- # Remove all HTML tags, including nested structures
361
  text = re.sub(r'<[^>]*>', '', text)
362
- # Remove any remaining < or > characters
363
- text = text.replace('<', '').replace('>', '')
364
- text = re.sub(r'<[^>]+>', '', text)
365
- text = re.sub(r'(\d+)\s*(B|M|T|billion|million|trillion)', lambda m: f"{m.group(1)} {m.group(2)}", text)
366
- text = re.sub(r'(\d)\s*([a-zA-Z])', r'\1 \2', text) # Fix numbers next to letters
367
- text = re.sub(r'(\d+)\s+([a-zA-Z])', r'\1 \2', text) # Fix broken numbers and words
368
- text = re.sub(r'<span class="(mathnormal|mord)">.*?</span>', '', text, flags=re.DOTALL)
369
-
370
- # Split the text into paragraphs
371
- paragraphs = text.split('\n\n')
372
 
 
 
 
 
 
 
 
 
 
 
373
  cleaned_paragraphs = []
374
  for paragraph in paragraphs:
375
  lines = paragraph.split('\n')
376
  cleaned_lines = []
377
  for line in lines:
378
- # Preserve bold formatting for headings
379
- if line.strip().startswith('**') and line.strip().endswith('**'):
380
  cleaned_line = line.strip()
381
  else:
382
- # Remove asterisks, special characters, and fix merged text
383
- cleaned_line = re.sub(r'\*|\−|\', '', line)
384
  cleaned_line = re.sub(r'([a-z])([A-Z])', r'\1 \2', cleaned_line)
385
 
386
- # Handle bullet points
387
- if cleaned_line.strip().startswith('-'):
388
- cleaned_line = '\n' + cleaned_line.strip()
389
-
390
- # Remove extra spaces
391
-
392
  cleaned_lines.append(cleaned_line)
393
 
394
- # Join the lines within each paragraph
395
  cleaned_paragraph = '\n'.join(cleaned_lines)
396
  cleaned_paragraphs.append(cleaned_paragraph)
397
 
398
- # Join the paragraphs back together
399
- cleaned_text = '\n\n'.join(para for para in cleaned_paragraphs if para)
400
  return cleaned_text
401
-
402
 
403
 
404
 
@@ -2028,9 +2025,8 @@ def handle_prompt(prompt):
2028
 
2029
  cleaned_text = clean_text(full_response)
2030
  trust_tip, suggestion = get_trust_tip_and_suggestion()
2031
- formatted_text = format_links_with_escaping(cleaned_text)
2032
 
2033
- combined_text = f"{formatted_text}\n\n---\n\n**Trust Tip**: {trust_tip}\n\n**Suggestion**: {suggestion}"
2034
  with response_placeholder:
2035
  with st.chat_message("assistant"):
2036
  st.markdown(combined_text, unsafe_allow_html=False)
 
355
  return email.split('@')[0].capitalize()
356
 
357
  def clean_text(text):
358
+ """
359
+ Clean text to remove broken formatting while preserving valid Markdown.
360
+ """
361
+ text = text.replace('\\n', '\n') # Normalize newlines
362
 
363
+ # Remove all HTML tags (but preserve Markdown formatting)
364
  text = re.sub(r'<[^>]*>', '', text)
 
 
 
 
 
 
 
 
 
 
365
 
366
+ # Fix spacing issues between numbers and words
367
+ text = re.sub(r'(\d+)\s*(B|M|T|billion|million|trillion)', r'\1 \2", text)
368
+ text = re.sub(r'(\d)\s*([a-zA-Z])', r'\1 \2', text)
369
+ text = re.sub(r'([a-zA-Z])\s*(\d)', r'\1 \2', text)
370
+
371
+ # Preserve Markdown links and formatting
372
+ text = re.sub(r'\*+', '*', text) # Remove excess asterisks but keep single or double for Markdown
373
+
374
+ # Split text into paragraphs and clean each paragraph
375
+ paragraphs = text.split('\n\n')
376
  cleaned_paragraphs = []
377
  for paragraph in paragraphs:
378
  lines = paragraph.split('\n')
379
  cleaned_lines = []
380
  for line in lines:
381
+ # Handle bullet points properly
382
+ if line.strip().startswith('-'):
383
  cleaned_line = line.strip()
384
  else:
385
+ # Preserve valid Markdown formatting
386
+ cleaned_line = re.sub(r'[^\w\s\*_\[\]\(\)-]', '', line)
387
  cleaned_line = re.sub(r'([a-z])([A-Z])', r'\1 \2', cleaned_line)
388
 
 
 
 
 
 
 
389
  cleaned_lines.append(cleaned_line)
390
 
391
+ # Join lines back into paragraphs
392
  cleaned_paragraph = '\n'.join(cleaned_lines)
393
  cleaned_paragraphs.append(cleaned_paragraph)
394
 
395
+ # Join all paragraphs together
396
+ cleaned_text = '\n\n'.join(cleaned_paragraphs)
397
  return cleaned_text
398
+
399
 
400
 
401
 
 
2025
 
2026
  cleaned_text = clean_text(full_response)
2027
  trust_tip, suggestion = get_trust_tip_and_suggestion()
 
2028
 
2029
+ combined_text = f"{cleaned_text}\n\n---\n\n**Trust Tip**: {trust_tip}\n\n**Suggestion**: {suggestion}"
2030
  with response_placeholder:
2031
  with st.chat_message("assistant"):
2032
  st.markdown(combined_text, unsafe_allow_html=False)