hoangs's picture
Upload model.py
1e90554 verified
from typing import Literal
Lang = Literal["vi", "en", "mix", "unknown"]
VI_CHARS = set("ăâđêôơưáàảãạấầẩẫậắằẳẵặéèẻẽẹếềểễệíìỉĩịóòỏõọốồổỗộớờởỡợúùủũụứừửữựýỳỷỹỵ")
EN_CHARS = set("abcdefghijklmnopqrstuvwxyz")
def detect_language(text: str) -> Lang:
"""Very small heuristic detector for Vietnamese vs English vs mixed."""
t = text.lower()
has_vi = any(ch in VI_CHARS for ch in t)
has_en = any(ch in EN_CHARS for ch in t)
if has_vi and has_en:
return "mix"
if has_vi:
return "vi"
if has_en:
return "en"
return "unknown"
if __name__ == "__main__":
tests = [
"Đi tới phòng khách",
"Turn on the lights",
"Đi tới living room",
"12345 !!!",
]
for t in tests:
print(t, "->", detect_language(t))