Wajahat698 commited on
Commit
f4cc211
·
verified ·
1 Parent(s): 101acac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -9
app.py CHANGED
@@ -354,10 +354,11 @@ def send_feedback_via_email(name, email, feedback):
354
  def extract_name(email):
355
  return email.split('@')[0].capitalize()
356
 
357
- def clean_text(text):
 
358
  """
359
  Cleans and formats text to handle special characters, encode URLs,
360
- and fix improperly joined bold/italic Markdown formatting.
361
  """
362
  # Normalize newlines
363
  text = text.replace('\\n', '\n')
@@ -380,13 +381,14 @@ def clean_text(text):
380
 
381
  text = re.sub(source_link_pattern, add_markdown_link, text)
382
 
383
- # Fix joined or nested Markdown formatting
384
- text = re.sub(r'\*\*_(.*?)_\*\*', r'_\1_', text) # Convert **_..._** _..._
385
- text = re.sub(r'_+\*+(.*?)\*+_+', r'_\1_', text) # Convert _**...**_ _..._
386
- text = re.sub(r'\*+_*(.*?)_*\*+', r'**\1**', text) # Convert *...* or *_..._* → **...**
387
 
388
- # Remove stray asterisks or underscores
389
- text = re.sub(r'(?<!\w)[\*_]+(?!\w)', '', text) # Removes standalone * or _
 
390
 
391
  # Remove extra spaces and normalize
392
  text = re.sub(r'\s+', ' ', text).strip()
@@ -419,7 +421,6 @@ def clean_text(text):
419
 
420
  return cleaned_text
421
 
422
-
423
 
424
 
425
 
 
354
  def extract_name(email):
355
  return email.split('@')[0].capitalize()
356
 
357
+
358
+ def clean_and_format_text(text):
359
  """
360
  Cleans and formats text to handle special characters, encode URLs,
361
+ fix italic blocks, handle inline tags, and ensure proper Markdown formatting.
362
  """
363
  # Normalize newlines
364
  text = text.replace('\\n', '\n')
 
381
 
382
  text = re.sub(source_link_pattern, add_markdown_link, text)
383
 
384
+ # Fix italic blocks and remove unnecessary tags
385
+ text = re.sub(r'<i>(.*?)</i>', r'_\1_', text) # Convert <i>...</i> to Markdown italic (_..._)
386
+ text = re.sub(r'<b>(.*?)</b>', r'**\1**', text) # Convert <b>...</b> to Markdown bold (**...**)
387
+ text = re.sub(r'</?(?:i|b|strong|em)>', '', text) # Remove stray tags
388
 
389
+ # Handle inline tags like "a", "c", "v" breaking into separate lines
390
+ inline_tag_pattern = r'\b(a|c|v)\b'
391
+ text = re.sub(inline_tag_pattern, r'\1', text) # Fix single-character inline tags
392
 
393
  # Remove extra spaces and normalize
394
  text = re.sub(r'\s+', ' ', text).strip()
 
421
 
422
  return cleaned_text
423
 
 
424
 
425
 
426