ragent-chatbot / utils /normalizer.py
shafiqul1357's picture
upload source code
633bb91 verified
raw
history blame
519 Bytes
import unicodedata
class Normalizer:
def __init__(self):
pass
def normalize_text(self, text: str) -> str:
# Unicode normalization (e.g., full-width → half-width, etc.)
text = unicodedata.normalize("NFKC", text)
# Lowercase
#text = text.lower()
# Remove punctuation
#text = "".join(char for char in text if char not in self.punctuation)
# Collapse multiple whitespace
#text = re.sub(r"\s+", " ", text).strip()
return text