baocaodulieu / lib /utils.py
hoangthiencm's picture
Update lib/utils.py
44fd71e verified
import unicodedata
import re
import time
import random
import string
from datetime import datetime
def normalize_text(value: str) -> str:
"""Normalize text: remove accents, lowercase, trim."""
if not value:
return ""
text = str(value)
text = unicodedata.normalize("NFD", text)
text = re.sub(r"[\u0300-\u036f]", "", text)
text = text.replace("\u0111", "d").replace("\u0110", "D")
return text.strip().lower()
def slugify(value: str) -> str:
"""Convert to slug: no accents, lowercase, underscores."""
if not value:
return ""
text = normalize_text(value)
text = re.sub(r"[^a-z0-9\s_]", "", text)
text = text.strip()
text = re.sub(r"\s+", "_", text)
return text
def remove_accents(value: str) -> str:
"""Remove Vietnamese accents but keep case."""
if not value:
return ""
text = str(value)
text = unicodedata.normalize("NFD", text)
text = re.sub(r"[\u0300-\u036f]", "", text)
text = text.replace("\u0111", "d").replace("\u0110", "D")
return text
def safe_filename(raw_name: str) -> str:
"""Make a safe filename from raw text."""
base = remove_accents(raw_name or "file")
base = re.sub(r"[^a-zA-Z0-9._ -]", "", base)
base = re.sub(r"\s+", " ", base).strip()
return base or "file"
def make_id(prefix: str = "id") -> str:
ts = int(time.time() * 1000)
rand = "".join(random.choices(string.ascii_lowercase + string.digits, k=6))
return f"{prefix}_{ts}_{rand}"
def format_timestamp() -> str:
now = datetime.now()
return now.strftime("%d/%m/%Y %H:%M:%S")
def now_iso() -> str:
return datetime.utcnow().isoformat() + "Z"
def sanitize_field_type(field_type: str) -> str:
norm = normalize_text(field_type or "")
if norm == "number":
return "number"
if norm == "none":
return "none"
return "text"