import re from bs4 import BeautifulSoup def clean_html(raw_html: str) -> str: """Supprime les balises HTML et normalise le texte.""" soup = BeautifulSoup(raw_html, "html.parser") text = soup.get_text(separator=" ") text = re.sub(r"\s+", " ", text).strip() return text