Spaces:

trustlogic
/

Copy-AI

Sleeping

Wajahat698 commited on Aug 27, 2024

Commit

a8a9c22

verified ·

1 Parent(s): e277fb3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -145,22 +145,25 @@ def send_feedback_via_email(name, email, feedback):
         st.error(f"Error sending email: {e}")
 def clean_text(text):
     soup = BeautifulSoup(text, 'html.parser')
     # Convert <a> tags to Markdown links
     for a in soup.find_all('a'):
         a.replace_with(f"[{a.get_text()}]({a['href']})")
-    # Remove unwanted tags but preserve text
-    for tag in ['span', 'i', 'b']:
         for element in soup.find_all(tag):
             element.unwrap()  # Remove the tag but keep the content
     # Get the cleaned text
     cleaned_text = soup.get_text()
-    # Remove excessive whitespace
-    cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
     return cleaned_text
 def side():

         st.error(f"Error sending email: {e}")
 def clean_text(text):
+    # Use BeautifulSoup to parse and clean HTML
     soup = BeautifulSoup(text, 'html.parser')
     # Convert <a> tags to Markdown links
     for a in soup.find_all('a'):
         a.replace_with(f"[{a.get_text()}]({a['href']})")
+    # Remove unwanted tags but preserve their text
+    for tag in ['span', 'i', 'b', 'u', 'em', 'strong']:
         for element in soup.find_all(tag):
             element.unwrap()  # Remove the tag but keep the content
     # Get the cleaned text
     cleaned_text = soup.get_text()
+    # Handle extra whitespace and preserve spacing
+    cleaned_text = re.sub(r'\n\s*\n', '\n\n', cleaned_text)  # Maintain paragraph breaks
+    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)  # Replace multiple spaces with a single space
+    cleaned_text = cleaned_text.strip()  # Remove leading/trailing spaces
     return cleaned_text
 def side():