| from typing import Literal | |
| Lang = Literal["vi", "en", "mix", "unknown"] | |
| VI_CHARS = set("ăâđêôơưáàảãạấầẩẫậắằẳẵặéèẻẽẹếềểễệíìỉĩịóòỏõọốồổỗộớờởỡợúùủũụứừửữựýỳỷỹỵ") | |
| EN_CHARS = set("abcdefghijklmnopqrstuvwxyz") | |
| def detect_language(text: str) -> Lang: | |
| """Very small heuristic detector for Vietnamese vs English vs mixed.""" | |
| t = text.lower() | |
| has_vi = any(ch in VI_CHARS for ch in t) | |
| has_en = any(ch in EN_CHARS for ch in t) | |
| if has_vi and has_en: | |
| return "mix" | |
| if has_vi: | |
| return "vi" | |
| if has_en: | |
| return "en" | |
| return "unknown" | |
| if __name__ == "__main__": | |
| tests = [ | |
| "Đi tới phòng khách", | |
| "Turn on the lights", | |
| "Đi tới living room", | |
| "12345 !!!", | |
| ] | |
| for t in tests: | |
| print(t, "->", detect_language(t)) | |