"""tutor/lang_detect.py — lightweight keyword-based language detection.""" from __future__ import annotations import re _MARKERS = { "kin": ["mbiri","gatatu","kane","gatanu","ikibazo","igisubizo","komeza", "byiza","wabitsinze","ntangaza","umwe","babiri","batatu","bane","batanu"], "sw": ["moja","mbili","tatu","nne","tano","sita","saba","nane","tisa","kumi", "jibu","swali","hesabu","vizuri","hongera","endelea","jaribu"], "fr": ["un","deux","trois","quatre","cinq","six","sept","huit","neuf","dix", "bonjour","merci","oui","non","la","le","les","est","une","je","tu", "combien","réponse","bravo","essaie","encore"], "en": ["one","two","three","four","five","six","seven","eight","nine","ten", "hello","yes","no","the","answer","how","many","count","add","minus", "great","amazing","correct","wrong","next"], } def detect(text: str) -> str: tokens = re.findall(r"[a-zA-ZÀ-öø-ÿ']+", text.lower()) if not tokens: return "en" scores = {lang: sum(1 for t in tokens if t in markers) for lang, markers in _MARKERS.items()} best = max(scores.values()) if best == 0: return "en" winners = [l for l, s in scores.items() if s == best] if len(winners) == 1: return winners[0] return "en" if "en" in winners else "mix" def reply_lang(detected: str, fallback: str = "en") -> str: return detected if detected in ("en","fr","kin","sw") else fallback