File size: 289 Bytes
dfdddb1
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
import re
from bs4 import BeautifulSoup

def clean_html(raw_html: str) -> str:
    """Supprime les balises HTML et normalise le texte."""
    soup = BeautifulSoup(raw_html, "html.parser")
    text = soup.get_text(separator=" ")
    text = re.sub(r"\s+", " ", text).strip()
    return text