Wajahat698 commited on
Commit
70bad2d
·
verified ·
1 Parent(s): de6f42c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -35
app.py CHANGED
@@ -156,52 +156,53 @@ def send_feedback_via_email(name, email, feedback):
156
  st.error(f"Error sending email: {e}")
157
 
158
 
 
159
  def clean_text(text):
160
- # Replace newline escape sequences with actual newlines
161
  text = text.replace('\\n', '\n')
162
-
163
- # Use BeautifulSoup to parse the HTML and remove problematic tags
164
- soup = BeautifulSoup(text, "html.parser")
165
-
166
- # Remove all span tags with problematic classes
167
- for span in soup.find_all('span', {'class': ['mord', 'mathnormal']}):
168
- span.decompose() # Remove the entire span tag
169
-
170
- # Get cleaned text without any HTML tags
171
- cleaned_text = soup.get_text()
172
-
173
- # Remove unwanted asterisks and special characters
174
- cleaned_text = re.sub(r'[\*−∗]', '', cleaned_text)
175
-
176
- # Fix numbers adjacent to letters and units (e.g., 10B -> 10 B)
177
- cleaned_text = re.sub(r'(\d+)\s*(B|M|T|billion|million|trillion)', lambda m: f"{m.group(1)} {m.group(2)}", cleaned_text)
178
- cleaned_text = re.sub(r'(\d)\s*([a-zA-Z])', r'\1 \2', cleaned_text) # Fix numbers next to letters
179
- cleaned_text = re.sub(r'(\d+)\s+([a-zA-Z])', r'\1 \2', cleaned_text) # Fix broken numbers and words
180
-
181
- # Ensure that any broken words or split letters are rejoined
182
- cleaned_text = re.sub(r'([a-zA-Z])\s(?=[a-zA-Z])', r'\1', cleaned_text) # Remove unnecessary spaces between letters
183
-
184
- # Split text into paragraphs based on double newlines for readability
185
- paragraphs = cleaned_text.split('\n\n')
186
-
187
  cleaned_paragraphs = []
188
  for paragraph in paragraphs:
189
  lines = paragraph.split('\n')
190
  cleaned_lines = []
191
  for line in lines:
192
- # Separate merged words (e.g., "HelloWorld" -> "Hello World")
193
- cleaned_line = re.sub(r'([a-z])([A-Z])', r'\1 \2', line)
194
- cleaned_line = re.sub(r'\s+', ' ', cleaned_line).strip() # Remove extra spaces
 
 
 
 
 
 
 
 
 
 
 
195
  cleaned_lines.append(cleaned_line)
196
-
197
- # Join cleaned lines into paragraphs
198
  cleaned_paragraph = '\n'.join(cleaned_lines)
199
  cleaned_paragraphs.append(cleaned_paragraph)
 
 
 
 
200
 
201
- # Join cleaned paragraphs back into the final cleaned text
202
- final_cleaned_text = '\n\n'.join(para for para in cleaned_paragraphs if para)
203
 
204
- return final_cleaned_text
205
 
206
 
207
  def get_trust_tip_and_suggestion():
@@ -637,7 +638,7 @@ if prompt :
637
  #combined_text = f"{cleaned_text}\n\n---\n\n**Trust Tip**: {trust_tip}\n\n**Suggestion**: {suggestion}"
638
  combined_text = f"{cleaned_text}\n\n---\n\n**Trust Tip**: {trust_tip}\n\n**Suggestion**: {suggestion}"
639
 
640
- st.markdown(combined_text)
641
 
642
  #seprtor= st.markdown("---") # Add a separator
643
  #t_tip= st.markdown(f"**Trust Tip**: {trust_tip}")
 
156
  st.error(f"Error sending email: {e}")
157
 
158
 
159
+
160
  def clean_text(text):
 
161
  text = text.replace('\\n', '\n')
162
+
163
+ # Remove all HTML tags, including nested structures
164
+ text = re.sub(r'<[^>]*>', '', text)
165
+ # Remove any remaining < or > characters
166
+ text = text.replace('<', '').replace('>', '')
167
+ text = re.sub(r'<[^>]+>', '', text)
168
+ text = re.sub(r'(\d+)\s*(B|M|T|billion|million|trillion)', lambda m: f"{m.group(1)} {m.group(2)}", text)
169
+ text = re.sub(r'(\d)\s*([a-zA-Z])', r'\1 \2', text) # Fix numbers next to letters
170
+ text = re.sub(r'(\d+)\s+([a-zA-Z])', r'\1 \2', text) # Fix broken numbers and words
171
+ text = re.sub(r'<span class="(mathnormal|mord)">.*?</span>', '', text, flags=re.DOTALL)
172
+
173
+ # Split the text into paragraphs
174
+ paragraphs = text.split('\n\n')
175
+
 
 
 
 
 
 
 
 
 
 
 
176
  cleaned_paragraphs = []
177
  for paragraph in paragraphs:
178
  lines = paragraph.split('\n')
179
  cleaned_lines = []
180
  for line in lines:
181
+ # Preserve bold formatting for headings
182
+ if line.strip().startswith('**') and line.strip().endswith('**'):
183
+ cleaned_line = line.strip()
184
+ else:
185
+ # Remove asterisks, special characters, and fix merged text
186
+ cleaned_line = re.sub(r'\*|\−|\∗', '', line)
187
+ cleaned_line = re.sub(r'([a-z])([A-Z])', r'\1 \2', cleaned_line)
188
+
189
+ # Handle bullet points
190
+ if cleaned_line.strip().startswith('-'):
191
+ cleaned_line = '\n' + cleaned_line.strip()
192
+
193
+ # Remove extra spaces
194
+ cleaned_line = re.sub(r'\s+', ' ', cleaned_line).strip()
195
  cleaned_lines.append(cleaned_line)
196
+
197
+ # Join the lines within each paragraph
198
  cleaned_paragraph = '\n'.join(cleaned_lines)
199
  cleaned_paragraphs.append(cleaned_paragraph)
200
+
201
+ # Join the paragraphs back together
202
+ cleaned_text = '\n\n'.join(para for para in cleaned_paragraphs if para)
203
+
204
 
 
 
205
 
 
206
 
207
 
208
  def get_trust_tip_and_suggestion():
 
638
  #combined_text = f"{cleaned_text}\n\n---\n\n**Trust Tip**: {trust_tip}\n\n**Suggestion**: {suggestion}"
639
  combined_text = f"{cleaned_text}\n\n---\n\n**Trust Tip**: {trust_tip}\n\n**Suggestion**: {suggestion}"
640
 
641
+ st.markdown(combined_text,unsafe_allow_html=True)
642
 
643
  #seprtor= st.markdown("---") # Add a separator
644
  #t_tip= st.markdown(f"**Trust Tip**: {trust_tip}")