NSamson1 commited on
Commit
2efd757
·
verified ·
1 Parent(s): 0e2e9ee

Create tutor/asr_adapt.py

Browse files
Files changed (1) hide show
  1. tutor/asr_adapt.py +82 -0
tutor/asr_adapt.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """tutor/asr_adapt.py — ASR helpers (Whisper optional, graceful fallback)."""
2
+ from __future__ import annotations
3
+ import re
4
+ import numpy as np
5
+ from typing import Optional, Tuple
6
+ from tutor.lang_detect import detect as lang_detect
7
+
8
+ SILENCE_RMS = 0.005
9
+ SILENCE_MIN_SECS = 0.3
10
+
11
+ _WORD_MAP = {
12
+ # English
13
+ "zero":0,"one":1,"two":2,"three":3,"four":4,"five":5,
14
+ "six":6,"seven":7,"eight":8,"nine":9,"ten":10,
15
+ # French
16
+ "zéro":0,"un":1,"une":1,"deux":2,"trois":3,"quatre":4,
17
+ "cinq":5,"sept":7,"huit":8,"neuf":9,"dix":10,
18
+ # Kinyarwanda
19
+ "zeru":0,"rimwe":1,"ebyiri":2,"eshatu":3,"ine":4,"eshanu":5,
20
+ "gatandatu":6,"indwi":7,"umunani":8,"icyenda":9,"icumi":10,
21
+ "mbiri":2,"gatatu":3,"kane":4,"gatanu":5,
22
+ # Kiswahili
23
+ "sifuri":0,"moja":1,"mbili":2,"tatu":3,"nne":4,"tano":5,
24
+ "sita":6,"saba":7,"nane":8,"tisa":9,"kumi":10,
25
+ }
26
+
27
+ _whisper_model = None
28
+ _whisper_ok = False
29
+
30
+
31
+ def is_silence(audio_f32: np.ndarray, sr: int = 16000) -> bool:
32
+ if len(audio_f32) < int(SILENCE_MIN_SECS * sr):
33
+ return True
34
+ return float(np.sqrt(np.mean(audio_f32 ** 2))) < SILENCE_RMS
35
+
36
+
37
+ def extract_integer(text: str) -> Optional[int]:
38
+ if not text:
39
+ return None
40
+ m = re.search(r"\b(\d{1,2})\b", text)
41
+ if m:
42
+ return int(m.group(1))
43
+ for tok in re.findall(r"[a-zA-ZÀ-öø-ÿ]+", text.lower()):
44
+ if tok in _WORD_MAP:
45
+ return _WORD_MAP[tok]
46
+ return None
47
+
48
+
49
+ def _load_whisper() -> bool:
50
+ global _whisper_model, _whisper_ok
51
+ if _whisper_model is not None:
52
+ return _whisper_ok
53
+ try:
54
+ import whisper # type: ignore
55
+ _whisper_model = whisper.load_model("tiny")
56
+ _whisper_ok = True
57
+ except Exception:
58
+ _whisper_ok = False
59
+ return _whisper_ok
60
+
61
+
62
+ _LANG_TO_WHISPER = {"en":"en","fr":"fr","kin":"rw","sw":"sw"}
63
+ _WHISPER_TO_LANG = {"en":"en","fr":"fr","rw":"kin","sw":"sw"}
64
+
65
+
66
+ def transcribe(audio_f32: np.ndarray, lang_hint: str = "en", sample_rate: int = 16000
67
+ ) -> Tuple[str, str, float]:
68
+ if is_silence(audio_f32, sample_rate):
69
+ return "", lang_hint, 0.0
70
+ if _load_whisper():
71
+ try:
72
+ import whisper # type: ignore
73
+ result = _whisper_model.transcribe(
74
+ audio_f32,
75
+ language=_LANG_TO_WHISPER.get(lang_hint, "en"),
76
+ fp16=False, task="transcribe")
77
+ text = result.get("text", "").strip()
78
+ detected = _WHISPER_TO_LANG.get(result.get("language", "en"), lang_hint)
79
+ return text, detected, 0.9
80
+ except Exception:
81
+ pass
82
+ return "", lang_hint, 0.0