Heng2004's picture
Create text_utils.py
ba907f9 verified
raw
history blame
341 Bytes
# data/text_utils.py – shared text helpers
import re
def normalize_question(q: str) -> str:
"""
Normalize Lao question:
- lowercase
- remove basic punctuation
- collapse spaces
"""
q = q.lower()
q = re.sub(r"[?!?!\.\,\:\;\"β€œβ€'β€˜β€™]", " ", q)
q = re.sub(r"\s+", " ", q)
return q.strip()