Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -146,9 +146,38 @@ def send_feedback_via_email(name, email, feedback):
|
|
| 146 |
st.error(f"Error sending email: {e}")
|
| 147 |
|
| 148 |
|
| 149 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
"""Converts HTML content to Markdown format using markdownify."""
|
| 151 |
-
return markdownify(
|
| 152 |
|
| 153 |
def side():
|
| 154 |
with st.sidebar.form(key='feedback_form'):
|
|
@@ -561,8 +590,9 @@ if prompt :
|
|
| 561 |
})
|
| 562 |
full_response = output["output"]
|
| 563 |
full_response= replace_terms(full_response)
|
| 564 |
-
|
| 565 |
-
|
|
|
|
| 566 |
|
| 567 |
|
| 568 |
#cleaned_text = re.sub(r'</span>', '', cleaned_text)
|
|
|
|
| 146 |
st.error(f"Error sending email: {e}")
|
| 147 |
|
| 148 |
|
| 149 |
+
def clean_html_text(text):
|
| 150 |
+
"""Cleans HTML text to preserve basic formatting."""
|
| 151 |
+
soup = BeautifulSoup(text, 'html.parser')
|
| 152 |
+
|
| 153 |
+
# Convert <a> tags to Markdown links
|
| 154 |
+
for a in soup.find_all('a'):
|
| 155 |
+
a.replace_with(f"[{a.get_text()}]({a['href']})")
|
| 156 |
+
|
| 157 |
+
# Remove unwanted tags but preserve their text
|
| 158 |
+
for tag in ['span', 'i', 'b', 'u', 'em', 'strong']:
|
| 159 |
+
for element in soup.find_all(tag):
|
| 160 |
+
element.unwrap() # Remove the tag but keep the content
|
| 161 |
+
|
| 162 |
+
# Handle headings and preserve formatting
|
| 163 |
+
for header in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
|
| 164 |
+
for element in soup.find_all(header):
|
| 165 |
+
level = header[1] # Extract heading level (1-6)
|
| 166 |
+
element.replace_with(f"{'#' * int(level)} {element.get_text()}")
|
| 167 |
+
|
| 168 |
+
# Get the cleaned text
|
| 169 |
+
cleaned_text = soup.get_text()
|
| 170 |
+
|
| 171 |
+
# Maintain paragraph breaks and replace multiple spaces with a single space
|
| 172 |
+
cleaned_text = re.sub(r'\n\s*\n', '\n\n', cleaned_text) # Maintain paragraph breaks
|
| 173 |
+
cleaned_text = re.sub(r'\s+', ' ', cleaned_text) # Replace multiple spaces with a single space
|
| 174 |
+
cleaned_text = cleaned_text.strip() # Remove leading/trailing spaces
|
| 175 |
+
|
| 176 |
+
return cleaned_text
|
| 177 |
+
|
| 178 |
+
def convert_html_to_markdown(html_content):
|
| 179 |
"""Converts HTML content to Markdown format using markdownify."""
|
| 180 |
+
return markdownify(html_content, strip=['img', 'video'])
|
| 181 |
|
| 182 |
def side():
|
| 183 |
with st.sidebar.form(key='feedback_form'):
|
|
|
|
| 590 |
})
|
| 591 |
full_response = output["output"]
|
| 592 |
full_response= replace_terms(full_response)
|
| 593 |
+
|
| 594 |
+
markdown_text = convert_html_to_markdown(full_response)
|
| 595 |
+
cleaned_text = clean_text(markdown_text)
|
| 596 |
|
| 597 |
|
| 598 |
#cleaned_text = re.sub(r'</span>', '', cleaned_text)
|