Spaces:
Sleeping
Sleeping
MOHAN799S commited on
Commit ·
3ba902d
1
Parent(s): a349f20
Fix: convert Kannada→Telugu script before storing (Whisper quirk)
Browse files- multi_modal/audio_to_text.py +38 -1
multi_modal/audio_to_text.py
CHANGED
|
@@ -39,6 +39,7 @@ _VALID_SCRIPTS = {
|
|
| 39 |
"LATIN", # English
|
| 40 |
"DEVANAGARI", # Hindi
|
| 41 |
"TELUGU", # Telugu
|
|
|
|
| 42 |
"COMMON", # punctuation, digits, spaces
|
| 43 |
}
|
| 44 |
|
|
@@ -54,11 +55,45 @@ _ALLOWED_LANGUAGES = {"en", "te", "hi"}
|
|
| 54 |
# This prevents Telugu audio from being accepted as Hindi.
|
| 55 |
_LANG_EXPECTED_SCRIPT = {
|
| 56 |
"en": {"LATIN"},
|
| 57 |
-
"te": {"TELUGU"},
|
| 58 |
"hi": {"DEVANAGARI"},
|
| 59 |
}
|
| 60 |
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
# ── Load Whisper ONCE at import time ──────────────────────────────────────────
|
| 63 |
if _AUDIO_BACKEND == "local":
|
| 64 |
print(f"🔄 Loading Whisper '{MODEL_ID}' on {_DEVICE}…")
|
|
@@ -272,6 +307,7 @@ def _transcribe_local(audio_file) -> str:
|
|
| 272 |
print(f"[audio_to_text] script mismatch: forced {lang} but got {dominant} — trying next")
|
| 273 |
continue
|
| 274 |
|
|
|
|
| 275 |
print(f"[audio_to_text] OK lang={lang} | "
|
| 276 |
f"{len(text)} chars: {text[:100]}")
|
| 277 |
return text
|
|
@@ -425,6 +461,7 @@ def _transcribe_via_hf_api(audio_file) -> str:
|
|
| 425 |
text = ""
|
| 426 |
|
| 427 |
if _is_valid_transcription(text):
|
|
|
|
| 428 |
print(f"[audio_to_text] HF API OK: {len(text)} chars: {text[:100]}")
|
| 429 |
return text
|
| 430 |
else:
|
|
|
|
| 39 |
"LATIN", # English
|
| 40 |
"DEVANAGARI", # Hindi
|
| 41 |
"TELUGU", # Telugu
|
| 42 |
+
"KANNADA", # Whisper sometimes outputs Kannada for Telugu audio
|
| 43 |
"COMMON", # punctuation, digits, spaces
|
| 44 |
}
|
| 45 |
|
|
|
|
| 55 |
# This prevents Telugu audio from being accepted as Hindi.
|
| 56 |
_LANG_EXPECTED_SCRIPT = {
|
| 57 |
"en": {"LATIN"},
|
| 58 |
+
"te": {"TELUGU", "KANNADA"}, # Whisper may use Kannada script for Telugu
|
| 59 |
"hi": {"DEVANAGARI"},
|
| 60 |
}
|
| 61 |
|
| 62 |
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 66 |
+
# KANNADA → TELUGU SCRIPT FIX
|
| 67 |
+
# Whisper sometimes outputs Telugu audio in Kannada script (very similar glyphs).
|
| 68 |
+
# We convert Kannada codepoints → Telugu so stored text is always Telugu script.
|
| 69 |
+
# ─────────────────────────────────────────────────────────────────────────────
|
| 70 |
+
_KANNADA_TO_TELUGU = {
|
| 71 |
+
"ಅ":"అ","ಆ":"ఆ","ಇ":"ఇ","ಈ":"ఈ","ಉ":"ఉ","ಊ":"ఊ","ಋ":"ఋ",
|
| 72 |
+
"ಎ":"ఎ","ಏ":"ఏ","ಐ":"ఐ","ಒ":"ఒ","ಓ":"ఓ","ಔ":"ఔ",
|
| 73 |
+
"ಾ":"ా","ಿ":"ి","ీ":"ీ","ು":"ు","ూ":"ూ","ೃ":"ృ",
|
| 74 |
+
"ೆ":"ె","ೇ":"ే","ೈ":"ై","ೊ":"ొ","ೋ":"ో","ೌ":"ౌ",
|
| 75 |
+
"ಂ":"ం","ಃ":"ః","಼":"఼",
|
| 76 |
+
"ಕ":"క","ಖ":"ఖ","ಗ":"గ","ಘ":"ఘ","ಙ":"ఙ",
|
| 77 |
+
"ಚ":"చ","ಛ":"ఛ","ಜ":"జ","ಝ":"ఝ","ಞ":"ఞ",
|
| 78 |
+
"ಟ":"ట","ಠ":"ఠ","ಡ":"డ","ಢ":"ఢ","ಣ":"ణ",
|
| 79 |
+
"ತ":"త","ಥ":"థ","ದ":"ద","ಧ":"ధ","ನ":"న",
|
| 80 |
+
"ಪ":"ప","ಫ":"ఫ","ಬ":"బ","ಭ":"భ","ಮ":"మ",
|
| 81 |
+
"ಯ":"య","ರ":"ర","ಲ":"ల","ವ":"వ","ಶ":"శ",
|
| 82 |
+
"ಷ":"ష","ಸ":"స","ಹ":"హ","ಳ":"ళ",
|
| 83 |
+
"್":"్",
|
| 84 |
+
"೦":"౦","೧":"౧","೨":"౨","೩":"౩","೪":"౪",
|
| 85 |
+
"೫":"౫","೬":"౬","೭":"౭","೮":"౮","೯":"౯",
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
def fix_script(text: str) -> str:
|
| 89 |
+
"""Convert Kannada script → Telugu if Whisper used wrong script for Telugu audio."""
|
| 90 |
+
import unicodedata
|
| 91 |
+
if any(unicodedata.name(ch, "").startswith("KANNADA") for ch in text if ch.strip()):
|
| 92 |
+
converted = "".join(_KANNADA_TO_TELUGU.get(ch, ch) for ch in text)
|
| 93 |
+
print(f"[audio_to_text] Kannada→Telugu fix: {text[:40]!r} → {converted[:40]!r}")
|
| 94 |
+
return converted
|
| 95 |
+
return text
|
| 96 |
+
|
| 97 |
# ── Load Whisper ONCE at import time ──────────────────────────────────────────
|
| 98 |
if _AUDIO_BACKEND == "local":
|
| 99 |
print(f"🔄 Loading Whisper '{MODEL_ID}' on {_DEVICE}…")
|
|
|
|
| 307 |
print(f"[audio_to_text] script mismatch: forced {lang} but got {dominant} — trying next")
|
| 308 |
continue
|
| 309 |
|
| 310 |
+
text = fix_script(text) # Kannada→Telugu if needed
|
| 311 |
print(f"[audio_to_text] OK lang={lang} | "
|
| 312 |
f"{len(text)} chars: {text[:100]}")
|
| 313 |
return text
|
|
|
|
| 461 |
text = ""
|
| 462 |
|
| 463 |
if _is_valid_transcription(text):
|
| 464 |
+
text = fix_script(text) # Kannada→Telugu if Whisper used wrong script
|
| 465 |
print(f"[audio_to_text] HF API OK: {len(text)} chars: {text[:100]}")
|
| 466 |
return text
|
| 467 |
else:
|