Copy-AI

Build error

App Files Files Community

Wajahat698 commited on Aug 27, 2024

Commit

ee37b61

verified ·

1 Parent(s): 8aa4386

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -4

app.py CHANGED Viewed

@@ -146,9 +146,38 @@ def send_feedback_via_email(name, email, feedback):
         st.error(f"Error sending email: {e}")
-def clean_text(text):
     """Converts HTML content to Markdown format using markdownify."""
-    return markdownify(text, strip=['img', 'video'])
 def side():
     with st.sidebar.form(key='feedback_form'):
@@ -561,8 +590,9 @@ if prompt :
                 })
                 full_response = output["output"]
                 full_response= replace_terms(full_response)
-                cleaned_text = clean_text(full_response)
                 #cleaned_text = re.sub(r'</span>', '', cleaned_text)

         st.error(f"Error sending email: {e}")
+def clean_html_text(text):
+    """Cleans HTML text to preserve basic formatting."""
+    soup = BeautifulSoup(text, 'html.parser')
+    # Convert <a> tags to Markdown links
+    for a in soup.find_all('a'):
+        a.replace_with(f"[{a.get_text()}]({a['href']})")
+    # Remove unwanted tags but preserve their text
+    for tag in ['span', 'i', 'b', 'u', 'em', 'strong']:
+        for element in soup.find_all(tag):
+            element.unwrap()  # Remove the tag but keep the content
+    # Handle headings and preserve formatting
+    for header in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
+        for element in soup.find_all(header):
+            level = header[1]  # Extract heading level (1-6)
+            element.replace_with(f"{'#' * int(level)} {element.get_text()}")
+    # Get the cleaned text
+    cleaned_text = soup.get_text()
+    # Maintain paragraph breaks and replace multiple spaces with a single space
+    cleaned_text = re.sub(r'\n\s*\n', '\n\n', cleaned_text)  # Maintain paragraph breaks
+    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)  # Replace multiple spaces with a single space
+    cleaned_text = cleaned_text.strip()  # Remove leading/trailing spaces
+    return cleaned_text
+def convert_html_to_markdown(html_content):
     """Converts HTML content to Markdown format using markdownify."""
+    return markdownify(html_content, strip=['img', 'video'])
 def side():
     with st.sidebar.form(key='feedback_form'):
                 })
                 full_response = output["output"]
                 full_response= replace_terms(full_response)
+                markdown_text = convert_html_to_markdown(full_response)
+                cleaned_text = clean_text(markdown_text)
                 #cleaned_text = re.sub(r'</span>', '', cleaned_text)