Spaces:
Running
Running
| """Text preprocessing utilities for customer support intent classification.""" | |
| import re | |
| from typing import List | |
| import numpy as np | |
| def clean_text(text: str) -> str: | |
| """Lowercase, strip non-ASCII, and normalize whitespace. Punctuation is preserved.""" | |
| if not isinstance(text, str): | |
| text = str(text) | |
| text = text.lower() | |
| text = text.encode("ascii", errors="ignore").decode("ascii") | |
| text = re.sub(r"\s+", " ", text).strip() | |
| return text | |
| def clean_texts(texts: List[str]) -> List[str]: | |
| """Apply clean_text to each string in texts.""" | |
| return [clean_text(t) for t in texts] | |
| def set_global_seeds(seed: int = 42) -> None: | |
| """Set random seeds for reproducibility across numpy, Python random, and torch.""" | |
| import random | |
| random.seed(seed) | |
| np.random.seed(seed) | |
| try: | |
| import torch | |
| torch.manual_seed(seed) | |
| if torch.cuda.is_available(): | |
| torch.cuda.manual_seed_all(seed) | |
| except ImportError: | |
| pass | |