Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -156,52 +156,53 @@ def send_feedback_via_email(name, email, feedback):
|
|
| 156 |
st.error(f"Error sending email: {e}")
|
| 157 |
|
| 158 |
|
|
|
|
| 159 |
def clean_text(text):
|
| 160 |
-
# Replace newline escape sequences with actual newlines
|
| 161 |
text = text.replace('\\n', '\n')
|
| 162 |
-
|
| 163 |
-
#
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
#
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
# Fix numbers adjacent to letters and units (e.g., 10B -> 10 B)
|
| 177 |
-
cleaned_text = re.sub(r'(\d+)\s*(B|M|T|billion|million|trillion)', lambda m: f"{m.group(1)} {m.group(2)}", cleaned_text)
|
| 178 |
-
cleaned_text = re.sub(r'(\d)\s*([a-zA-Z])', r'\1 \2', cleaned_text) # Fix numbers next to letters
|
| 179 |
-
cleaned_text = re.sub(r'(\d+)\s+([a-zA-Z])', r'\1 \2', cleaned_text) # Fix broken numbers and words
|
| 180 |
-
|
| 181 |
-
# Ensure that any broken words or split letters are rejoined
|
| 182 |
-
cleaned_text = re.sub(r'([a-zA-Z])\s(?=[a-zA-Z])', r'\1', cleaned_text) # Remove unnecessary spaces between letters
|
| 183 |
-
|
| 184 |
-
# Split text into paragraphs based on double newlines for readability
|
| 185 |
-
paragraphs = cleaned_text.split('\n\n')
|
| 186 |
-
|
| 187 |
cleaned_paragraphs = []
|
| 188 |
for paragraph in paragraphs:
|
| 189 |
lines = paragraph.split('\n')
|
| 190 |
cleaned_lines = []
|
| 191 |
for line in lines:
|
| 192 |
-
#
|
| 193 |
-
|
| 194 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
cleaned_lines.append(cleaned_line)
|
| 196 |
-
|
| 197 |
-
# Join
|
| 198 |
cleaned_paragraph = '\n'.join(cleaned_lines)
|
| 199 |
cleaned_paragraphs.append(cleaned_paragraph)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
|
| 201 |
-
# Join cleaned paragraphs back into the final cleaned text
|
| 202 |
-
final_cleaned_text = '\n\n'.join(para for para in cleaned_paragraphs if para)
|
| 203 |
|
| 204 |
-
return final_cleaned_text
|
| 205 |
|
| 206 |
|
| 207 |
def get_trust_tip_and_suggestion():
|
|
@@ -637,7 +638,7 @@ if prompt :
|
|
| 637 |
#combined_text = f"{cleaned_text}\n\n---\n\n**Trust Tip**: {trust_tip}\n\n**Suggestion**: {suggestion}"
|
| 638 |
combined_text = f"{cleaned_text}\n\n---\n\n**Trust Tip**: {trust_tip}\n\n**Suggestion**: {suggestion}"
|
| 639 |
|
| 640 |
-
st.markdown(combined_text)
|
| 641 |
|
| 642 |
#seprtor= st.markdown("---") # Add a separator
|
| 643 |
#t_tip= st.markdown(f"**Trust Tip**: {trust_tip}")
|
|
|
|
| 156 |
st.error(f"Error sending email: {e}")
|
| 157 |
|
| 158 |
|
| 159 |
+
|
| 160 |
def clean_text(text):
|
|
|
|
| 161 |
text = text.replace('\\n', '\n')
|
| 162 |
+
|
| 163 |
+
# Remove all HTML tags, including nested structures
|
| 164 |
+
text = re.sub(r'<[^>]*>', '', text)
|
| 165 |
+
# Remove any remaining < or > characters
|
| 166 |
+
text = text.replace('<', '').replace('>', '')
|
| 167 |
+
text = re.sub(r'<[^>]+>', '', text)
|
| 168 |
+
text = re.sub(r'(\d+)\s*(B|M|T|billion|million|trillion)', lambda m: f"{m.group(1)} {m.group(2)}", text)
|
| 169 |
+
text = re.sub(r'(\d)\s*([a-zA-Z])', r'\1 \2', text) # Fix numbers next to letters
|
| 170 |
+
text = re.sub(r'(\d+)\s+([a-zA-Z])', r'\1 \2', text) # Fix broken numbers and words
|
| 171 |
+
text = re.sub(r'<span class="(mathnormal|mord)">.*?</span>', '', text, flags=re.DOTALL)
|
| 172 |
+
|
| 173 |
+
# Split the text into paragraphs
|
| 174 |
+
paragraphs = text.split('\n\n')
|
| 175 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
cleaned_paragraphs = []
|
| 177 |
for paragraph in paragraphs:
|
| 178 |
lines = paragraph.split('\n')
|
| 179 |
cleaned_lines = []
|
| 180 |
for line in lines:
|
| 181 |
+
# Preserve bold formatting for headings
|
| 182 |
+
if line.strip().startswith('**') and line.strip().endswith('**'):
|
| 183 |
+
cleaned_line = line.strip()
|
| 184 |
+
else:
|
| 185 |
+
# Remove asterisks, special characters, and fix merged text
|
| 186 |
+
cleaned_line = re.sub(r'\*|\−|\∗', '', line)
|
| 187 |
+
cleaned_line = re.sub(r'([a-z])([A-Z])', r'\1 \2', cleaned_line)
|
| 188 |
+
|
| 189 |
+
# Handle bullet points
|
| 190 |
+
if cleaned_line.strip().startswith('-'):
|
| 191 |
+
cleaned_line = '\n' + cleaned_line.strip()
|
| 192 |
+
|
| 193 |
+
# Remove extra spaces
|
| 194 |
+
cleaned_line = re.sub(r'\s+', ' ', cleaned_line).strip()
|
| 195 |
cleaned_lines.append(cleaned_line)
|
| 196 |
+
|
| 197 |
+
# Join the lines within each paragraph
|
| 198 |
cleaned_paragraph = '\n'.join(cleaned_lines)
|
| 199 |
cleaned_paragraphs.append(cleaned_paragraph)
|
| 200 |
+
|
| 201 |
+
# Join the paragraphs back together
|
| 202 |
+
cleaned_text = '\n\n'.join(para for para in cleaned_paragraphs if para)
|
| 203 |
+
|
| 204 |
|
|
|
|
|
|
|
| 205 |
|
|
|
|
| 206 |
|
| 207 |
|
| 208 |
def get_trust_tip_and_suggestion():
|
|
|
|
| 638 |
#combined_text = f"{cleaned_text}\n\n---\n\n**Trust Tip**: {trust_tip}\n\n**Suggestion**: {suggestion}"
|
| 639 |
combined_text = f"{cleaned_text}\n\n---\n\n**Trust Tip**: {trust_tip}\n\n**Suggestion**: {suggestion}"
|
| 640 |
|
| 641 |
+
st.markdown(combined_text,unsafe_allow_html=True)
|
| 642 |
|
| 643 |
#seprtor= st.markdown("---") # Add a separator
|
| 644 |
#t_tip= st.markdown(f"**Trust Tip**: {trust_tip}")
|