AI_math / tutor /lang_detect.py
NSamson1's picture
Create tutor/lang_detect.py
b605a27 verified
"""tutor/lang_detect.py — lightweight keyword-based language detection."""
from __future__ import annotations
import re
_MARKERS = {
"kin": ["mbiri","gatatu","kane","gatanu","ikibazo","igisubizo","komeza",
"byiza","wabitsinze","ntangaza","umwe","babiri","batatu","bane","batanu"],
"sw": ["moja","mbili","tatu","nne","tano","sita","saba","nane","tisa","kumi",
"jibu","swali","hesabu","vizuri","hongera","endelea","jaribu"],
"fr": ["un","deux","trois","quatre","cinq","six","sept","huit","neuf","dix",
"bonjour","merci","oui","non","la","le","les","est","une","je","tu",
"combien","réponse","bravo","essaie","encore"],
"en": ["one","two","three","four","five","six","seven","eight","nine","ten",
"hello","yes","no","the","answer","how","many","count","add","minus",
"great","amazing","correct","wrong","next"],
}
def detect(text: str) -> str:
tokens = re.findall(r"[a-zA-ZÀ-öø-ÿ']+", text.lower())
if not tokens:
return "en"
scores = {lang: sum(1 for t in tokens if t in markers)
for lang, markers in _MARKERS.items()}
best = max(scores.values())
if best == 0:
return "en"
winners = [l for l, s in scores.items() if s == best]
if len(winners) == 1:
return winners[0]
return "en" if "en" in winners else "mix"
def reply_lang(detected: str, fallback: str = "en") -> str:
return detected if detected in ("en","fr","kin","sw") else fallback