import unicodedata import re import time import random import string from datetime import datetime def normalize_text(value: str) -> str: """Normalize text: remove accents, lowercase, trim.""" if not value: return "" text = str(value) text = unicodedata.normalize("NFD", text) text = re.sub(r"[\u0300-\u036f]", "", text) text = text.replace("\u0111", "d").replace("\u0110", "D") return text.strip().lower() def slugify(value: str) -> str: """Convert to slug: no accents, lowercase, underscores.""" if not value: return "" text = normalize_text(value) text = re.sub(r"[^a-z0-9\s_]", "", text) text = text.strip() text = re.sub(r"\s+", "_", text) return text def remove_accents(value: str) -> str: """Remove Vietnamese accents but keep case.""" if not value: return "" text = str(value) text = unicodedata.normalize("NFD", text) text = re.sub(r"[\u0300-\u036f]", "", text) text = text.replace("\u0111", "d").replace("\u0110", "D") return text def safe_filename(raw_name: str) -> str: """Make a safe filename from raw text.""" base = remove_accents(raw_name or "file") base = re.sub(r"[^a-zA-Z0-9._ -]", "", base) base = re.sub(r"\s+", " ", base).strip() return base or "file" def make_id(prefix: str = "id") -> str: ts = int(time.time() * 1000) rand = "".join(random.choices(string.ascii_lowercase + string.digits, k=6)) return f"{prefix}_{ts}_{rand}" def format_timestamp() -> str: now = datetime.now() return now.strftime("%d/%m/%Y %H:%M:%S") def now_iso() -> str: return datetime.utcnow().isoformat() + "Z" def sanitize_field_type(field_type: str) -> str: norm = normalize_text(field_type or "") if norm == "number": return "number" if norm == "none": return "none" return "text"