Wajahat698 commited on
Commit
a8a9c22
·
verified ·
1 Parent(s): e277fb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -145,22 +145,25 @@ def send_feedback_via_email(name, email, feedback):
145
  st.error(f"Error sending email: {e}")
146
 
147
  def clean_text(text):
 
148
  soup = BeautifulSoup(text, 'html.parser')
149
 
150
  # Convert <a> tags to Markdown links
151
  for a in soup.find_all('a'):
152
  a.replace_with(f"[{a.get_text()}]({a['href']})")
153
 
154
- # Remove unwanted tags but preserve text
155
- for tag in ['span', 'i', 'b']:
156
  for element in soup.find_all(tag):
157
  element.unwrap() # Remove the tag but keep the content
158
 
159
  # Get the cleaned text
160
  cleaned_text = soup.get_text()
161
 
162
- # Remove excessive whitespace
163
- cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
 
 
164
 
165
  return cleaned_text
166
  def side():
 
145
  st.error(f"Error sending email: {e}")
146
 
147
  def clean_text(text):
148
+ # Use BeautifulSoup to parse and clean HTML
149
  soup = BeautifulSoup(text, 'html.parser')
150
 
151
  # Convert <a> tags to Markdown links
152
  for a in soup.find_all('a'):
153
  a.replace_with(f"[{a.get_text()}]({a['href']})")
154
 
155
+ # Remove unwanted tags but preserve their text
156
+ for tag in ['span', 'i', 'b', 'u', 'em', 'strong']:
157
  for element in soup.find_all(tag):
158
  element.unwrap() # Remove the tag but keep the content
159
 
160
  # Get the cleaned text
161
  cleaned_text = soup.get_text()
162
 
163
+ # Handle extra whitespace and preserve spacing
164
+ cleaned_text = re.sub(r'\n\s*\n', '\n\n', cleaned_text) # Maintain paragraph breaks
165
+ cleaned_text = re.sub(r'\s+', ' ', cleaned_text) # Replace multiple spaces with a single space
166
+ cleaned_text = cleaned_text.strip() # Remove leading/trailing spaces
167
 
168
  return cleaned_text
169
  def side():