Spaces:
Sleeping
Sleeping
| import re | |
| def remove_html_tags(text): | |
| # Replace common HTML entities with their corresponding characters | |
| text = text.replace('"', '"') # Replace " | |
| text = text.replace('"', '"') # Also replace the named entity for " | |
| text = text.replace(''', "'") # Replace ' | |
| text = text.replace(''', "'") # Also replace the numeric entity for ' | |
| text = text.replace('&', '&') # Replace & | |
| text = text.replace('<br />', ' ') # Replace line breaks with a space | |
| text = text.replace('<br>', ' ') # Also handle <br> | |
| # Use regex to remove any remaining HTML tags (e.g., <p>, <div>, <span>) | |
| clean_text = re.sub(r'<[^>]+>', '', text) | |
| return clean_text.lower() |