Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -145,22 +145,25 @@ def send_feedback_via_email(name, email, feedback):
|
|
| 145 |
st.error(f"Error sending email: {e}")
|
| 146 |
|
| 147 |
def clean_text(text):
|
|
|
|
| 148 |
soup = BeautifulSoup(text, 'html.parser')
|
| 149 |
|
| 150 |
# Convert <a> tags to Markdown links
|
| 151 |
for a in soup.find_all('a'):
|
| 152 |
a.replace_with(f"[{a.get_text()}]({a['href']})")
|
| 153 |
|
| 154 |
-
# Remove unwanted tags but preserve text
|
| 155 |
-
for tag in ['span', 'i', 'b']:
|
| 156 |
for element in soup.find_all(tag):
|
| 157 |
element.unwrap() # Remove the tag but keep the content
|
| 158 |
|
| 159 |
# Get the cleaned text
|
| 160 |
cleaned_text = soup.get_text()
|
| 161 |
|
| 162 |
-
#
|
| 163 |
-
cleaned_text = re.sub(r'\s
|
|
|
|
|
|
|
| 164 |
|
| 165 |
return cleaned_text
|
| 166 |
def side():
|
|
|
|
| 145 |
st.error(f"Error sending email: {e}")
|
| 146 |
|
| 147 |
def clean_text(text):
|
| 148 |
+
# Use BeautifulSoup to parse and clean HTML
|
| 149 |
soup = BeautifulSoup(text, 'html.parser')
|
| 150 |
|
| 151 |
# Convert <a> tags to Markdown links
|
| 152 |
for a in soup.find_all('a'):
|
| 153 |
a.replace_with(f"[{a.get_text()}]({a['href']})")
|
| 154 |
|
| 155 |
+
# Remove unwanted tags but preserve their text
|
| 156 |
+
for tag in ['span', 'i', 'b', 'u', 'em', 'strong']:
|
| 157 |
for element in soup.find_all(tag):
|
| 158 |
element.unwrap() # Remove the tag but keep the content
|
| 159 |
|
| 160 |
# Get the cleaned text
|
| 161 |
cleaned_text = soup.get_text()
|
| 162 |
|
| 163 |
+
# Handle extra whitespace and preserve spacing
|
| 164 |
+
cleaned_text = re.sub(r'\n\s*\n', '\n\n', cleaned_text) # Maintain paragraph breaks
|
| 165 |
+
cleaned_text = re.sub(r'\s+', ' ', cleaned_text) # Replace multiple spaces with a single space
|
| 166 |
+
cleaned_text = cleaned_text.strip() # Remove leading/trailing spaces
|
| 167 |
|
| 168 |
return cleaned_text
|
| 169 |
def side():
|