Spaces:
Running
Running
File size: 997 Bytes
e323466 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | """Text preprocessing utilities for customer support intent classification."""
import re
from typing import List
import numpy as np
def clean_text(text: str) -> str:
"""Lowercase, strip non-ASCII, and normalize whitespace. Punctuation is preserved."""
if not isinstance(text, str):
text = str(text)
text = text.lower()
text = text.encode("ascii", errors="ignore").decode("ascii")
text = re.sub(r"\s+", " ", text).strip()
return text
def clean_texts(texts: List[str]) -> List[str]:
"""Apply clean_text to each string in texts."""
return [clean_text(t) for t in texts]
def set_global_seeds(seed: int = 42) -> None:
"""Set random seeds for reproducibility across numpy, Python random, and torch."""
import random
random.seed(seed)
np.random.seed(seed)
try:
import torch
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)
except ImportError:
pass
|