Copy-AI

Build error

Wajahat698 commited on Aug 27, 2024

Commit

f164755

verified ·

1 Parent(s): 7d95fad

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -152,11 +152,6 @@ def clean_text(text):
     # Convert <a> tags to Markdown links
     for a in soup.find_all('a'):
         a.replace_with(f"[{a.get_text()}]({a['href']})")
-    # Remove <i> and <em> tags but keep their content
-    for tag in ['i', 'em']:
-        for element in soup.find_all(tag):
-            element.unwrap()  # Remove the tag but keep the content
     # Convert <b> and <strong> tags to Markdown bold
     for tag in ['b', 'strong']:
@@ -165,15 +160,39 @@ def clean_text(text):
             element.insert_after('**')
             element.unwrap()
     # Get the cleaned text
     cleaned_text = soup.get_text()
     # Handle extra whitespace and preserve spacing
-    cleaned_text = re.sub(r'\n\s*\n', '\n\n', cleaned_text)  # Maintain paragraph breaks
-    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)  # Replace multiple spaces with a single space
-    cleaned_text = cleaned_text.strip()  # Remove leading/trailing spaces
     return cleaned_text
 def side():
     with st.sidebar.form(key='feedback_form'):

     # Convert <a> tags to Markdown links
     for a in soup.find_all('a'):
         a.replace_with(f"[{a.get_text()}]({a['href']})")
     # Convert <b> and <strong> tags to Markdown bold
     for tag in ['b', 'strong']:
             element.insert_after('**')
             element.unwrap()
+    # Convert <i> and <em> tags to Markdown italic
+    for tag in ['i', 'em']:
+        for element in soup.find_all(tag):
+            element.insert_before('*')
+            element.insert_after('*')
+            element.unwrap()
+    # Preserve <h1> to <h6> tags as Markdown headings
+    for level in range(1, 7):
+        for heading in soup.find_all(f'h{level}'):
+            heading.insert_before('#' * level + ' ')
+            heading.insert_after('\n')
+            heading.unwrap()
+    # Convert <u> tags to Markdown underline (if needed)
+    # Markdown does not support underline, so we'll remove the tags
+    for tag in ['u']:
+        for element in soup.find_all(tag):
+            element.unwrap()
     # Get the cleaned text
     cleaned_text = soup.get_text()
     # Handle extra whitespace and preserve spacing
+    # Maintain paragraph breaks
+    cleaned_text = re.sub(r'\n\s*\n', '\n\n', cleaned_text)
+    # Replace multiple spaces with a single space
+    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
+    # Strip leading/trailing spaces
+    cleaned_text = cleaned_text.strip()
     return cleaned_text
 def side():
     with st.sidebar.form(key='feedback_form'):