Spaces:

NSamson1
/

AI_math

Sleeping

App Files Files Community

NSamson1 commited on Apr 28

Commit

2efd757

verified ·

1 Parent(s): 0e2e9ee

Create tutor/asr_adapt.py

Browse files

Files changed (1) hide show

tutor/asr_adapt.py +82 -0

tutor/asr_adapt.py ADDED Viewed

	@@ -0,0 +1,82 @@

+"""tutor/asr_adapt.py — ASR helpers (Whisper optional, graceful fallback)."""
+from __future__ import annotations
+import re
+import numpy as np
+from typing import Optional, Tuple
+from tutor.lang_detect import detect as lang_detect
+SILENCE_RMS = 0.005
+SILENCE_MIN_SECS = 0.3
+_WORD_MAP = {
+    # English
+    "zero":0,"one":1,"two":2,"three":3,"four":4,"five":5,
+    "six":6,"seven":7,"eight":8,"nine":9,"ten":10,
+    # French
+    "zéro":0,"un":1,"une":1,"deux":2,"trois":3,"quatre":4,
+    "cinq":5,"sept":7,"huit":8,"neuf":9,"dix":10,
+    # Kinyarwanda
+    "zeru":0,"rimwe":1,"ebyiri":2,"eshatu":3,"ine":4,"eshanu":5,
+    "gatandatu":6,"indwi":7,"umunani":8,"icyenda":9,"icumi":10,
+    "mbiri":2,"gatatu":3,"kane":4,"gatanu":5,
+    # Kiswahili
+    "sifuri":0,"moja":1,"mbili":2,"tatu":3,"nne":4,"tano":5,
+    "sita":6,"saba":7,"nane":8,"tisa":9,"kumi":10,
+}
+_whisper_model = None
+_whisper_ok    = False
+def is_silence(audio_f32: np.ndarray, sr: int = 16000) -> bool:
+    if len(audio_f32) < int(SILENCE_MIN_SECS * sr):
+        return True
+    return float(np.sqrt(np.mean(audio_f32 ** 2))) < SILENCE_RMS
+def extract_integer(text: str) -> Optional[int]:
+    if not text:
+        return None
+    m = re.search(r"\b(\d{1,2})\b", text)
+    if m:
+        return int(m.group(1))
+    for tok in re.findall(r"[a-zA-ZÀ-öø-ÿ]+", text.lower()):
+        if tok in _WORD_MAP:
+            return _WORD_MAP[tok]
+    return None
+def _load_whisper() -> bool:
+    global _whisper_model, _whisper_ok
+    if _whisper_model is not None:
+        return _whisper_ok
+    try:
+        import whisper  # type: ignore
+        _whisper_model = whisper.load_model("tiny")
+        _whisper_ok    = True
+    except Exception:
+        _whisper_ok = False
+    return _whisper_ok
+_LANG_TO_WHISPER = {"en":"en","fr":"fr","kin":"rw","sw":"sw"}
+_WHISPER_TO_LANG = {"en":"en","fr":"fr","rw":"kin","sw":"sw"}
+def transcribe(audio_f32: np.ndarray, lang_hint: str = "en", sample_rate: int = 16000
+               ) -> Tuple[str, str, float]:
+    if is_silence(audio_f32, sample_rate):
+        return "", lang_hint, 0.0
+    if _load_whisper():
+        try:
+            import whisper  # type: ignore
+            result   = _whisper_model.transcribe(
+                audio_f32,
+                language=_LANG_TO_WHISPER.get(lang_hint, "en"),
+                fp16=False, task="transcribe")
+            text     = result.get("text", "").strip()
+            detected = _WHISPER_TO_LANG.get(result.get("language", "en"), lang_hint)
+            return text, detected, 0.9
+        except Exception:
+            pass
+    return "", lang_hint, 0.0