Spaces:

trustlogic
/

Live

Sleeping

App Files Files Community

Wajahat698 commited on Nov 22, 2024

Commit

cce6cfe

verified ·

1 Parent(s): 47b988c

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -28

app.py CHANGED Viewed

@@ -355,50 +355,47 @@ def extract_name(email):
     return email.split('@')[0].capitalize()
 def clean_text(text):
-    text = text.replace('\\n', '\n')
-    # Remove all HTML tags, including nested structures
     text = re.sub(r'<[^>]*>', '', text)
-    # Remove any remaining < or > characters
-    text = text.replace('<', '').replace('>', '')
-    text = re.sub(r'<[^>]+>', '', text)
-    text = re.sub(r'(\d+)\s*(B|M|T|billion|million|trillion)', lambda m: f"{m.group(1)} {m.group(2)}", text)
-    text = re.sub(r'(\d)\s*([a-zA-Z])', r'\1 \2', text)  # Fix numbers next to letters
-    text = re.sub(r'(\d+)\s+([a-zA-Z])', r'\1 \2', text)  # Fix broken numbers and words
-    text = re.sub(r'<span class="(mathnormal|mord)">.*?</span>', '', text, flags=re.DOTALL)
-    # Split the text into paragraphs
-    paragraphs = text.split('\n\n')
     cleaned_paragraphs = []
     for paragraph in paragraphs:
         lines = paragraph.split('\n')
         cleaned_lines = []
         for line in lines:
-            # Preserve bold formatting for headings
-            if line.strip().startswith('**') and line.strip().endswith('**'):
                 cleaned_line = line.strip()
             else:
-                # Remove asterisks, special characters, and fix merged text
-                cleaned_line = re.sub(r'\*|\−|\∗', '', line)
                 cleaned_line = re.sub(r'([a-z])([A-Z])', r'\1 \2', cleaned_line)
-            # Handle bullet points
-            if cleaned_line.strip().startswith('-'):
-                cleaned_line = '\n' + cleaned_line.strip()
-            # Remove extra spaces
             cleaned_lines.append(cleaned_line)
-        # Join the lines within each paragraph
         cleaned_paragraph = '\n'.join(cleaned_lines)
         cleaned_paragraphs.append(cleaned_paragraph)
-    # Join the paragraphs back together
-    cleaned_text = '\n\n'.join(para for para in cleaned_paragraphs if para)
     return cleaned_text
@@ -2028,9 +2025,8 @@ def handle_prompt(prompt):
                             cleaned_text = clean_text(full_response)
                             trust_tip, suggestion = get_trust_tip_and_suggestion()
-                            formatted_text = format_links_with_escaping(cleaned_text)
-                            combined_text = f"{formatted_text}\n\n---\n\n**Trust Tip**: {trust_tip}\n\n**Suggestion**: {suggestion}"
                             with response_placeholder:
                                 with st.chat_message("assistant"):
                                     st.markdown(combined_text, unsafe_allow_html=False)

     return email.split('@')[0].capitalize()
 def clean_text(text):
+    """
+    Clean text to remove broken formatting while preserving valid Markdown.
+    """
+    text = text.replace('\\n', '\n')  # Normalize newlines
+    # Remove all HTML tags (but preserve Markdown formatting)
     text = re.sub(r'<[^>]*>', '', text)
+    # Fix spacing issues between numbers and words
+    text = re.sub(r'(\d+)\s*(B|M|T|billion|million|trillion)', r'\1 \2", text)
+    text = re.sub(r'(\d)\s*([a-zA-Z])', r'\1 \2', text)
+    text = re.sub(r'([a-zA-Z])\s*(\d)', r'\1 \2', text)
+    # Preserve Markdown links and formatting
+    text = re.sub(r'\*+', '*', text)  # Remove excess asterisks but keep single or double for Markdown
+    # Split text into paragraphs and clean each paragraph
+    paragraphs = text.split('\n\n')
     cleaned_paragraphs = []
     for paragraph in paragraphs:
         lines = paragraph.split('\n')
         cleaned_lines = []
         for line in lines:
+            # Handle bullet points properly
+            if line.strip().startswith('-'):
                 cleaned_line = line.strip()
             else:
+                # Preserve valid Markdown formatting
+                cleaned_line = re.sub(r'[^\w\s\*_\[\]\(\)-]', '', line)
                 cleaned_line = re.sub(r'([a-z])([A-Z])', r'\1 \2', cleaned_line)
             cleaned_lines.append(cleaned_line)
+        # Join lines back into paragraphs
         cleaned_paragraph = '\n'.join(cleaned_lines)
         cleaned_paragraphs.append(cleaned_paragraph)
+    # Join all paragraphs together
+    cleaned_text = '\n\n'.join(cleaned_paragraphs)
     return cleaned_text
                             cleaned_text = clean_text(full_response)
                             trust_tip, suggestion = get_trust_tip_and_suggestion()
+                            combined_text = f"{cleaned_text}\n\n---\n\n**Trust Tip**: {trust_tip}\n\n**Suggestion**: {suggestion}"
                             with response_placeholder:
                                 with st.chat_message("assistant"):
                                     st.markdown(combined_text, unsafe_allow_html=False)