import re
def remove_html_tags(text):
# Replace common HTML entities with their corresponding characters
text = text.replace('"', '"') # Replace "
text = text.replace('"', '"') # Also replace the named entity for "
text = text.replace(''', "'") # Replace '
text = text.replace(''', "'") # Also replace the numeric entity for '
text = text.replace('&', '&') # Replace &
text = text.replace('
', ' ') # Replace line breaks with a space
text = text.replace('
', ' ') # Also handle
# Use regex to remove any remaining HTML tags (e.g.,
,