Spaces:

trustlogic
/

Live

Sleeping

Wajahat698 commited on Nov 22, 2024

Commit

f4cc211

verified ·

1 Parent(s): 101acac

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -354,10 +354,11 @@ def send_feedback_via_email(name, email, feedback):
 def extract_name(email):
     return email.split('@')[0].capitalize()
-def clean_text(text):
     """
     Cleans and formats text to handle special characters, encode URLs,
-    and fix improperly joined bold/italic Markdown formatting.
     """
     # Normalize newlines
     text = text.replace('\\n', '\n')
@@ -380,13 +381,14 @@ def clean_text(text):
     text = re.sub(source_link_pattern, add_markdown_link, text)
-    # Fix joined or nested Markdown formatting
-    text = re.sub(r'\*\*_(.*?)_\*\*', r'_\1_', text)  # Convert **_..._** → _..._
-    text = re.sub(r'_+\*+(.*?)\*+_+', r'_\1_', text)  # Convert _**...**_ → _..._
-    text = re.sub(r'\*+_*(.*?)_*\*+', r'**\1**', text)  # Convert *...* or *_..._* → **...**
-    # Remove stray asterisks or underscores
-    text = re.sub(r'(?<!\w)[\*_]+(?!\w)', '', text)  # Removes standalone * or _
     # Remove extra spaces and normalize
     text = re.sub(r'\s+', ' ', text).strip()
@@ -419,7 +421,6 @@ def clean_text(text):
     return cleaned_text

 def extract_name(email):
     return email.split('@')[0].capitalize()
+def clean_and_format_text(text):
     """
     Cleans and formats text to handle special characters, encode URLs,
+    fix italic blocks, handle inline tags, and ensure proper Markdown formatting.
     """
     # Normalize newlines
     text = text.replace('\\n', '\n')
     text = re.sub(source_link_pattern, add_markdown_link, text)
+    # Fix italic blocks and remove unnecessary tags
+    text = re.sub(r'<i>(.*?)</i>', r'_\1_', text)  # Convert <i>...</i> to Markdown italic (_..._)
+    text = re.sub(r'<b>(.*?)</b>', r'**\1**', text)  # Convert <b>...</b> to Markdown bold (**...**)
+    text = re.sub(r'</?(?:i|b|strong|em)>', '', text)  # Remove stray tags
+    # Handle inline tags like "a", "c", "v" breaking into separate lines
+    inline_tag_pattern = r'\b(a|c|v)\b'
+    text = re.sub(inline_tag_pattern, r'\1', text)  # Fix single-character inline tags
     # Remove extra spaces and normalize
     text = re.sub(r'\s+', ' ', text).strip()
     return cleaned_text