Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -146,39 +146,57 @@ def send_feedback_via_email(name, email, feedback):
|
|
| 146 |
st.error(f"Error sending email: {e}")
|
| 147 |
|
| 148 |
|
| 149 |
-
def
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
#
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
return cleaned_text
|
| 177 |
|
| 178 |
-
|
| 179 |
-
"""Converts HTML content to Markdown format using markdownify."""
|
| 180 |
-
return markdownify(html_content, strip=['img', 'video'])
|
| 181 |
-
|
| 182 |
def side():
|
| 183 |
with st.sidebar.form(key='feedback_form'):
|
| 184 |
|
|
@@ -591,8 +609,7 @@ if prompt :
|
|
| 591 |
full_response = output["output"]
|
| 592 |
full_response= replace_terms(full_response)
|
| 593 |
|
| 594 |
-
|
| 595 |
-
cleaned_text = clean_html_text(markdown_text)
|
| 596 |
|
| 597 |
|
| 598 |
#cleaned_text = re.sub(r'</span>', '', cleaned_text)
|
|
|
|
| 146 |
st.error(f"Error sending email: {e}")
|
| 147 |
|
| 148 |
|
| 149 |
+
def clean_text(text):
|
| 150 |
+
# Replace escaped newlines with actual newlines
|
| 151 |
+
text = text.replace('\\n', '\n')
|
| 152 |
+
|
| 153 |
+
# Remove any span and italic tags
|
| 154 |
+
text = re.sub(r'<span[^>]*>', '', text)
|
| 155 |
+
text = re.sub(r'</span>', '', text)
|
| 156 |
+
text = re.sub(r'<i[^>]*>', '', text)
|
| 157 |
+
text = re.sub(r'</i>', '', text)
|
| 158 |
+
text = re.sub(r'<span[^>]*>.*?</span>', '', text, flags=re.DOTALL)
|
| 159 |
+
text = re.sub(r'<span[^>]*>.*?</span>', '', text, flags=re.DOTALL)
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
# Preserve and correctly format markdown links (don't modify URLs)
|
| 164 |
+
#text = re.sub(r'\[([^\]]+)\]\((https?://[^\)]+)\)', r'\1: \2', text)
|
| 165 |
+
|
| 166 |
+
# Split the text into paragraphs
|
| 167 |
+
paragraphs = text.split('\n\n')
|
| 168 |
+
|
| 169 |
+
cleaned_paragraphs = []
|
| 170 |
+
for paragraph in paragraphs:
|
| 171 |
+
lines = paragraph.split('\n')
|
| 172 |
+
cleaned_lines = []
|
| 173 |
+
for line in lines:
|
| 174 |
+
# Preserve bold formatting for headings
|
| 175 |
+
if line.strip().startswith('**') and line.strip().endswith('**'):
|
| 176 |
+
cleaned_line = line.strip()
|
| 177 |
+
else:
|
| 178 |
+
# Remove asterisks, special characters, and fix merged text
|
| 179 |
+
cleaned_line = re.sub(r'\*|\−|\∗', '', line)
|
| 180 |
+
cleaned_line = re.sub(r'([a-z])([A-Z])', r'\1 \2', cleaned_line)
|
| 181 |
+
|
| 182 |
+
# Handle bullet points
|
| 183 |
+
if cleaned_line.strip().startswith('-'):
|
| 184 |
+
cleaned_line = '\n' + cleaned_line.strip()
|
| 185 |
+
|
| 186 |
+
# Remove extra spaces
|
| 187 |
+
cleaned_line = re.sub(r'\s+', ' ', cleaned_line).strip()
|
| 188 |
+
cleaned_lines.append(cleaned_line)
|
| 189 |
+
|
| 190 |
+
# Join the lines within each paragraph
|
| 191 |
+
cleaned_paragraph = '\n'.join(cleaned_lines)
|
| 192 |
+
cleaned_paragraphs.append(cleaned_paragraph)
|
| 193 |
+
|
| 194 |
+
# Join the paragraphs back together
|
| 195 |
+
cleaned_text = '\n\n'.join(para for para in cleaned_paragraphs if para)
|
| 196 |
+
|
| 197 |
return cleaned_text
|
| 198 |
|
| 199 |
+
|
|
|
|
|
|
|
|
|
|
| 200 |
def side():
|
| 201 |
with st.sidebar.form(key='feedback_form'):
|
| 202 |
|
|
|
|
| 609 |
full_response = output["output"]
|
| 610 |
full_response= replace_terms(full_response)
|
| 611 |
|
| 612 |
+
cleaned_text = clean_text(markdown_text)
|
|
|
|
| 613 |
|
| 614 |
|
| 615 |
#cleaned_text = re.sub(r'</span>', '', cleaned_text)
|