Spaces:

trustlogic
/

Copy-AI

Sleeping

App Files Files Community

Wajahat698 commited on Sep 8, 2024

Commit

70bad2d

verified ·

1 Parent(s): de6f42c

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -35

app.py CHANGED Viewed

@@ -156,52 +156,53 @@ def send_feedback_via_email(name, email, feedback):
         st.error(f"Error sending email: {e}")
 def clean_text(text):
-    # Replace newline escape sequences with actual newlines
     text = text.replace('\\n', '\n')
-    # Use BeautifulSoup to parse the HTML and remove problematic tags
-    soup = BeautifulSoup(text, "html.parser")
-    # Remove all span tags with problematic classes
-    for span in soup.find_all('span', {'class': ['mord', 'mathnormal']}):
-        span.decompose()  # Remove the entire span tag
-    # Get cleaned text without any HTML tags
-    cleaned_text = soup.get_text()
-    # Remove unwanted asterisks and special characters
-    cleaned_text = re.sub(r'[\*−∗]', '', cleaned_text)
-    # Fix numbers adjacent to letters and units (e.g., 10B -> 10 B)
-    cleaned_text = re.sub(r'(\d+)\s*(B|M|T|billion|million|trillion)', lambda m: f"{m.group(1)} {m.group(2)}", cleaned_text)
-    cleaned_text = re.sub(r'(\d)\s*([a-zA-Z])', r'\1 \2', cleaned_text)  # Fix numbers next to letters
-    cleaned_text = re.sub(r'(\d+)\s+([a-zA-Z])', r'\1 \2', cleaned_text)  # Fix broken numbers and words
-    # Ensure that any broken words or split letters are rejoined
-    cleaned_text = re.sub(r'([a-zA-Z])\s(?=[a-zA-Z])', r'\1', cleaned_text)  # Remove unnecessary spaces between letters
-    # Split text into paragraphs based on double newlines for readability
-    paragraphs = cleaned_text.split('\n\n')
     cleaned_paragraphs = []
     for paragraph in paragraphs:
         lines = paragraph.split('\n')
         cleaned_lines = []
         for line in lines:
-            # Separate merged words (e.g., "HelloWorld" -> "Hello World")
-            cleaned_line = re.sub(r'([a-z])([A-Z])', r'\1 \2', line)
-            cleaned_line = re.sub(r'\s+', ' ', cleaned_line).strip()  # Remove extra spaces
             cleaned_lines.append(cleaned_line)
-        # Join cleaned lines into paragraphs
         cleaned_paragraph = '\n'.join(cleaned_lines)
         cleaned_paragraphs.append(cleaned_paragraph)
-    # Join cleaned paragraphs back into the final cleaned text
-    final_cleaned_text = '\n\n'.join(para for para in cleaned_paragraphs if para)
-    return final_cleaned_text
 def get_trust_tip_and_suggestion():
@@ -637,7 +638,7 @@ if prompt :
                 #combined_text = f"{cleaned_text}\n\n---\n\n**Trust Tip**: {trust_tip}\n\n**Suggestion**: {suggestion}"
                 combined_text = f"{cleaned_text}\n\n---\n\n**Trust Tip**: {trust_tip}\n\n**Suggestion**: {suggestion}"
-                st.markdown(combined_text)
                 #seprtor= st.markdown("---")  # Add a separator
                 #t_tip= st.markdown(f"**Trust Tip**: {trust_tip}")

         st.error(f"Error sending email: {e}")
 def clean_text(text):
     text = text.replace('\\n', '\n')
+    # Remove all HTML tags, including nested structures
+    text = re.sub(r'<[^>]*>', '', text)
+    # Remove any remaining < or > characters
+    text = text.replace('<', '').replace('>', '')
+    text = re.sub(r'<[^>]+>', '', text)
+    text = re.sub(r'(\d+)\s*(B|M|T|billion|million|trillion)', lambda m: f"{m.group(1)} {m.group(2)}", text)
+    text = re.sub(r'(\d)\s*([a-zA-Z])', r'\1 \2', text)  # Fix numbers next to letters
+    text = re.sub(r'(\d+)\s+([a-zA-Z])', r'\1 \2', text)  # Fix broken numbers and words
+    text = re.sub(r'<span class="(mathnormal|mord)">.*?</span>', '', text, flags=re.DOTALL)
+    # Split the text into paragraphs
+    paragraphs = text.split('\n\n')
     cleaned_paragraphs = []
     for paragraph in paragraphs:
         lines = paragraph.split('\n')
         cleaned_lines = []
         for line in lines:
+            # Preserve bold formatting for headings
+            if line.strip().startswith('**') and line.strip().endswith('**'):
+                cleaned_line = line.strip()
+            else:
+                # Remove asterisks, special characters, and fix merged text
+                cleaned_line = re.sub(r'\*|\−|\∗', '', line)
+                cleaned_line = re.sub(r'([a-z])([A-Z])', r'\1 \2', cleaned_line)
+            # Handle bullet points
+            if cleaned_line.strip().startswith('-'):
+                cleaned_line = '\n' + cleaned_line.strip()
+            # Remove extra spaces
+            cleaned_line = re.sub(r'\s+', ' ', cleaned_line).strip()
             cleaned_lines.append(cleaned_line)
+        # Join the lines within each paragraph
         cleaned_paragraph = '\n'.join(cleaned_lines)
         cleaned_paragraphs.append(cleaned_paragraph)
+    # Join the paragraphs back together
+    cleaned_text = '\n\n'.join(para for para in cleaned_paragraphs if para)
 def get_trust_tip_and_suggestion():
                 #combined_text = f"{cleaned_text}\n\n---\n\n**Trust Tip**: {trust_tip}\n\n**Suggestion**: {suggestion}"
                 combined_text = f"{cleaned_text}\n\n---\n\n**Trust Tip**: {trust_tip}\n\n**Suggestion**: {suggestion}"
+                st.markdown(combined_text,unsafe_allow_html=True)
                 #seprtor= st.markdown("---")  # Add a separator
                 #t_tip= st.markdown(f"**Trust Tip**: {trust_tip}")