from typing import Literal Lang = Literal["vi", "en", "mix", "unknown"] VI_CHARS = set("ăâđêôơưáàảãạấầẩẫậắằẳẵặéèẻẽẹếềểễệíìỉĩịóòỏõọốồổỗộớờởỡợúùủũụứừửữựýỳỷỹỵ") EN_CHARS = set("abcdefghijklmnopqrstuvwxyz") def detect_language(text: str) -> Lang: """Very small heuristic detector for Vietnamese vs English vs mixed.""" t = text.lower() has_vi = any(ch in VI_CHARS for ch in t) has_en = any(ch in EN_CHARS for ch in t) if has_vi and has_en: return "mix" if has_vi: return "vi" if has_en: return "en" return "unknown" if __name__ == "__main__": tests = [ "Đi tới phòng khách", "Turn on the lights", "Đi tới living room", "12345 !!!", ] for t in tests: print(t, "->", detect_language(t))