Wajahat698 commited on
Commit
f164755
·
verified ·
1 Parent(s): 7d95fad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -9
app.py CHANGED
@@ -152,11 +152,6 @@ def clean_text(text):
152
  # Convert <a> tags to Markdown links
153
  for a in soup.find_all('a'):
154
  a.replace_with(f"[{a.get_text()}]({a['href']})")
155
-
156
- # Remove <i> and <em> tags but keep their content
157
- for tag in ['i', 'em']:
158
- for element in soup.find_all(tag):
159
- element.unwrap() # Remove the tag but keep the content
160
 
161
  # Convert <b> and <strong> tags to Markdown bold
162
  for tag in ['b', 'strong']:
@@ -165,15 +160,39 @@ def clean_text(text):
165
  element.insert_after('**')
166
  element.unwrap()
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  # Get the cleaned text
169
  cleaned_text = soup.get_text()
170
 
171
  # Handle extra whitespace and preserve spacing
172
- cleaned_text = re.sub(r'\n\s*\n', '\n\n', cleaned_text) # Maintain paragraph breaks
173
- cleaned_text = re.sub(r'\s+', ' ', cleaned_text) # Replace multiple spaces with a single space
174
- cleaned_text = cleaned_text.strip() # Remove leading/trailing spaces
175
-
 
 
 
176
  return cleaned_text
 
177
  def side():
178
  with st.sidebar.form(key='feedback_form'):
179
 
 
152
  # Convert <a> tags to Markdown links
153
  for a in soup.find_all('a'):
154
  a.replace_with(f"[{a.get_text()}]({a['href']})")
 
 
 
 
 
155
 
156
  # Convert <b> and <strong> tags to Markdown bold
157
  for tag in ['b', 'strong']:
 
160
  element.insert_after('**')
161
  element.unwrap()
162
 
163
+ # Convert <i> and <em> tags to Markdown italic
164
+ for tag in ['i', 'em']:
165
+ for element in soup.find_all(tag):
166
+ element.insert_before('*')
167
+ element.insert_after('*')
168
+ element.unwrap()
169
+
170
+ # Preserve <h1> to <h6> tags as Markdown headings
171
+ for level in range(1, 7):
172
+ for heading in soup.find_all(f'h{level}'):
173
+ heading.insert_before('#' * level + ' ')
174
+ heading.insert_after('\n')
175
+ heading.unwrap()
176
+
177
+ # Convert <u> tags to Markdown underline (if needed)
178
+ # Markdown does not support underline, so we'll remove the tags
179
+ for tag in ['u']:
180
+ for element in soup.find_all(tag):
181
+ element.unwrap()
182
+
183
  # Get the cleaned text
184
  cleaned_text = soup.get_text()
185
 
186
  # Handle extra whitespace and preserve spacing
187
+ # Maintain paragraph breaks
188
+ cleaned_text = re.sub(r'\n\s*\n', '\n\n', cleaned_text)
189
+ # Replace multiple spaces with a single space
190
+ cleaned_text = re.sub(r'\s+', ' ', cleaned_text)
191
+ # Strip leading/trailing spaces
192
+ cleaned_text = cleaned_text.strip()
193
+
194
  return cleaned_text
195
+
196
  def side():
197
  with st.sidebar.form(key='feedback_form'):
198