import re
import html
from typing import List


def clean_text(text: str) -> str:
    """Clean and normalize raw text — decodes HTML, strips URLs, normalizes whitespace."""
    text = html.unescape(text)
    text = re.sub(r'http\S+', '', text)
    text = text.replace('\u201c', '"').replace(
        '\u201d', '"').replace('\u2013', '-')
    text = re.sub(r'\s+', ' ', text).strip()
    return text


def preprocess_batch(texts: List[str]) -> List[str]:
    """Apply clean_text to a list of strings."""
    return [clean_text(text) for text in texts]