Spaces:
Sleeping
Sleeping
| import unicodedata | |
| class Normalizer: | |
| def __init__(self): | |
| pass | |
| def normalize_text(self, text: str) -> str: | |
| # Unicode normalization (e.g., full-width → half-width, etc.) | |
| text = unicodedata.normalize("NFKC", text) | |
| # Lowercase | |
| #text = text.lower() | |
| # Remove punctuation | |
| #text = "".join(char for char in text if char not in self.punctuation) | |
| # Collapse multiple whitespace | |
| #text = re.sub(r"\s+", " ", text).strip() | |
| return text | |