Noumida commited on
Commit
797ee59
·
verified ·
1 Parent(s): 61a4205

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -43
app.py CHANGED
@@ -8,64 +8,92 @@ import re
8
 
9
  DESCRIPTION = "IndicConformer-600M Multilingual ASR (CTC + RNNT) with Auto Language ID"
10
 
11
- # --- CORRECTED Language Identification Data ---
12
- LANGUAGE_DATA = {
13
- "as": {"chars": set("অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবভমযৰলৱশষসহৎংঃঽািীুূৃেৈোৌ্"), "words": set(["আৰু", "হয়", "এটা", "কৰা", "ওপৰত", "যে"])},
14
- "bn": {"chars": set("অআইঈউঊঋএঐওঔকখগঘঙচছজঝঞটঠডঢণতথদধনপফবभমযরলশষসহৎংঃঽািীুূৃেৈোৌ্ড়ঢ়য়"), "words": set(["এবং", "একটি", "করুন", "জন্য", "সঙ্গে", "হচ্ছে"])},
15
- "br": {"chars": set("अआइईउऊऋएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसह़ािीुূृेैोौ्"), "words": set(["आरो", "एसे", "मोनसे", "माव", "दं", "जा"])},
16
- "doi": {"chars": set("अआइईउऊएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहािीुूेैोौ्"), "words": set(["ते", "दे", "ऐ", "इक", "ओह्", "कर"])},
17
- "gu": {"chars": set("અઆઇઈઉઊઋએઐઓઔકખગઘઙચછજઝઞટઠડઢણતથદધનપફબભમયરલળવશષસહ઼ાિીુૂૃેૈોૌ્"), "words": set(["અને", "એક", "માટે", "છે", "સાથે", "કરવું"])},
18
- "hi": {"chars": set("अआइईउऊएऐओऔकखगघङचछझञटठडढणतथदधनपफबभमयरलवशषसहािीुूृेैोौ्"), "words": set(["और", "ह", "एक", "में", "के", "लिए"])},
19
- "kn": {"chars": set("ಅಆಇಈಉಊಋಎಏಐಒಓಔಕಖಗಘಙಚಛಜಝಞಟಠಡಢಣತಥದಧನಪಫಬಭಮಯರಲವಶಷಸಹಳಱಾಿೀುೂೃೆೇೈೊೋೌ್"), "words": set(["ಮತ್ತು", "ಒಂದು", "ಹೇಗೆ", "ನಾನು", "ಇದೆ", "ಆ"])},
20
- "ks": {"chars": set("اآبپتٹثجچحخدڈذرڑزژسشصضطظعغفقکگلمنوھءییے"), "words": set([" تہٕ", "چھُ", "اکھ", "منز", "کیتھ", "छु", "छ"])},
21
- "kok": {"chars": set("अआइईउऊऋएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहािीुूृेैोौ्"), "words": set(["आनी", "एक", "कर", "खातीर", "कडेन", "आसा"])},
22
- "mai": {"chars": set("अआइईउऊऋएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहािीुूृेैोौ्"), "words": set(["आ", "एक", "हम", "अछि", "क'", "छै"])},
23
- "ml": {"chars": set("അആഇഈഉഊഋഎഏഐഒഓഔകഖഗഘങചഛജഝഞടഠഡഢണതഥദധനപഫബഭമയരലവശഷസഹളഴറാിീുൂൃെേൈൊോൌ്"), "words": set(["ഒരു", "കൂടാതെ", "എങ്ങനെ", "ഞാൻ", "ഇത്", "ആണ്"])},
24
- "mni": {"chars": set("ꯑ꯲꯳꯴꯵꯶꯷꯸꯹꯺꯻꯼꯽꯾꯿ꯀꯂꯃꯄꯅꯆꯇꯈꯉꯊꯋꯌꯍꯎꯏꯐꯑ"), "words": set(["ꯗꯥ", "ꯑꯃꯥ", "ꯀꯔꯤ", "ꯑꯩꯅꯥ", "ꯑꯁꯤ", "ꯂꯩ"])},
25
- "mr": {"chars": set("अआइईउऊऋएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहािीुूृेैोौ्ळ"), "words": set(["आणि", "एक", "आहे", "मी", "तू", "जे"])},
26
- "ne": {"chars": set("अआइईउऊऋएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहािीुूृेैोौ्"), "words": set(["र", "एक", "हो", "म", "तिमी", "छ"])},
27
- "or": {"chars": set("ଅଆଇଈଉଊଋଏଐଓଔକଖଗଘଙଚଛଜଝଞଟଠଡଢଣତଥଦଧନପଫବଭମଯରଲଳବଶଷସହକ୍ଷାିୀୁୂୃେୈୋୌ୍"), "words": set(["ଏବଂ", "ଗୋଟିଏ", "କରନ୍ତୁ", "ପାଇଁ", "ସହିତ", "ଅଛି"])},
28
- "pa": {"chars": set("ਅਆਇਈਉਊਏਐਓਔਕਖਗਘਙਚਛਜਝਞਟਠਡਢਣਤਥਦਧਨਪਫਬਭਮਯਰਲਵਸ਼ਸਹਖ਼ਗ਼ਜ਼ੜਫ਼ਲ਼ਿੀੁੂੇੈੋੌ੍"), "words": set(["ਅਤੇ", "ਇੱਕ", "ਹੈ", "ਵਿੱਚ", "ਨੂੰ", "ਦਾ"])},
29
- "sa": {"chars": set("अआइईउऊऋएऐओऔकखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसहािीुूृेैोौ्"), "words": set(["च", "एकः", "अस्ति", "अहम्", "त्वम्", "सः"])},
30
- "sat": {"chars": set("ᱚᱛᱚᱜᱚᱝᱞᱟᱠᱥᱮᱫఇᱤᱩੂେୈᱪᱡᱭ"), "words": set(["ᱟᱨ", "ᱫᱚ", "ᱢᱤᱫ", "ಒಂದು", "ಮತ್ತು", "ক"])},
31
- "sd": {"chars": set("اآبڀتٽثپجڄ جھچحخڌدڏڊذرزڙژسشصضطظعغفڦقڪکگڳڱلمنوھ ءي"), "words": set(["۽", "هڪ", "آهي", "۾", "کي", "جو"])},
32
- "ta": {"chars": set("அஆஇஈஉஊஎஏஐஒஓஔகஙசஞடணதநனபமயரலவழளஷஸஹ"), "words": set(["மற்றும்", "ஒரு", "வேண்டும்", "நான்", "இது", "ஆகும்"])},
33
- "te": {"chars": set("అఆఇఈఉఊఋఎఏఐఒఓఔకఖగఘఙచఛజఝఞటఠడఢణతథదధనపఫబభమయరలవశషసహళక్షఱాిీుూృెేైొోౌ్"), "words": set(["మరియు", "ఒక", "కావాలి", "నేను", "ఇది", "ఉంది"])},
34
- "ur": {"chars": set("اآبپتٹثجچحخدڈذرڑزژسشصضطظعغفقکگلمنوھءییے"), "words": set(["اور", "ہے", "ایک", "میں", "کے", "لیے"])},
 
 
35
  }
36
 
37
- # This is the correct way to define the mapping. The faulty line has been removed.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  LANGUAGE_CODE_TO_NAME = { "as": "Assamese", "bn": "Bengali", "br": "Bodo", "doi": "Dogri", "gu": "Gujarati", "hi": "Hindi", "kn": "Kannada", "ks": "Kashmiri", "kok": "Konkani", "mai": "Maithili", "ml": "Malayalam", "mni": "Manipuri", "mr": "Marathi", "ne": "Nepali", "or": "Odia", "pa": "Punjabi", "sa": "Sanskrit", "sat": "Santali", "sd": "Sindhi", "ta": "Tamil", "te": "Telugu", "ur": "Urdu"}
39
  device = "cuda" if torch.cuda.is_available() else "cpu"
40
 
41
- # Load Indic Conformer model
42
  print("Loading IndicConformer model...")
43
  model = AutoModel.from_pretrained("ai4bharat/indic-conformer-600m-multilingual", trust_remote_code=True).to(device)
44
  model.eval()
45
  print("✅ Model loaded successfully.")
46
 
 
47
  def identify_language(text: str) -> str | None:
48
  """Identifies the language of a given text based on character sets and common words."""
49
  if not text.strip():
50
  return None
51
 
52
- scores = {lang: 0 for lang in LANGUAGE_DATA}
53
  text_chars = set(text)
54
  text_words = set(re.split(r'[\s,.:;!?]+', text))
55
 
56
- for lang_code, data in LANGUAGE_DATA.items():
57
- char_score = len(text_chars.intersection(data["chars"]))
58
- word_score = len(text_words.intersection(data["words"]))
59
  scores[lang_code] = (char_score * 2) + word_score
60
 
61
  max_score = max(scores.values())
62
- if max_score < 3:
63
  return None
64
-
65
  identified_code = max(scores, key=scores.get)
66
  return identified_code
67
 
68
-
69
  @spaces.GPU
70
  def transcribe_and_identify(audio_path):
71
  if not audio_path:
@@ -79,11 +107,9 @@ def transcribe_and_identify(audio_path):
79
  return f"Error loading audio: {e}", "", ""
80
 
81
  try:
82
- # 1. Perform a fast, initial transcription using a pivot language (Hindi)
83
  with torch.no_grad():
84
  initial_transcription = model(waveform, "hi", "ctc")
85
-
86
- # 2. Identify the language from the initial transcription
87
  identified_lang_code = identify_language(initial_transcription)
88
 
89
  if not identified_lang_code:
@@ -92,7 +118,6 @@ def transcribe_and_identify(audio_path):
92
 
93
  detected_lang_str = f"Detected Language: {LANGUAGE_CODE_TO_NAME.get(identified_lang_code, 'Unknown')}"
94
 
95
- # 3. Perform the final, high-quality transcription using the identified language
96
  with torch.no_grad():
97
  transcription_ctc = model(waveform, identified_lang_code, "ctc")
98
  transcription_rnnt = model(waveform, identified_lang_code, "rnnt")
@@ -102,17 +127,16 @@ def transcribe_and_identify(audio_path):
102
 
103
  return detected_lang_str, transcription_ctc.strip(), transcription_rnnt.strip()
104
 
105
-
106
- # Gradio UI
107
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
108
  gr.Markdown(f"## {DESCRIPTION}")
109
  gr.Markdown("Upload or record audio in any of the 22 supported Indian languages. The app will automatically detect the language and provide the transcription using both CTC and RNNT decoding.")
110
-
111
  with gr.Row():
112
  with gr.Column(scale=1):
113
  audio = gr.Audio(label="Upload or Record Audio", type="filepath")
114
  transcribe_btn = gr.Button("Transcribe", variant="primary")
115
-
116
  with gr.Column(scale=2):
117
  detected_lang_output = gr.Label(label="Language Detection Result")
118
  gr.Markdown("### RNNT Transcription (More Accurate)")
@@ -121,8 +145,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
121
  ctc_output = gr.Textbox(lines=3, label="CTC Output")
122
 
123
  transcribe_btn.click(
124
- fn=transcribe_and_identify,
125
- inputs=[audio],
126
  outputs=[detected_lang_output, ctc_output, rnnt_output],
127
  api_name="transcribe"
128
  )
 
8
 
9
  DESCRIPTION = "IndicConformer-600M Multilingual ASR (CTC + RNNT) with Auto Language ID"
10
 
11
+ # --- Data Dictionaries ---
12
+
13
+ # Dictionary for character sets, now with improved formatting for readability.
14
+ LANGUAGE_CHARSETS = {
15
+ "as": set(['অ', 'আ', 'ই', 'ঈ', 'উ', 'ঊ', 'ঋ', 'এ', 'ঐ', 'ও', 'ঔ', 'ক', 'খ', 'গ', 'ঘ', 'ঙ', 'চ', 'ছ', 'জ', 'ঝ', 'ঞ', 'ট', 'ঠ', 'ড', 'ঢ', 'ণ', 'ত', 'থ', 'দ', 'ধ', 'ন', 'প', 'ফ', 'ব', 'ভ', 'ম', 'য', 'ৰ', 'ল', 'ৱ', 'শ', 'ষ', 'স', 'হ', 'ৎ', 'ং', 'ঃ', 'ঽ', 'া', 'ি', 'ী', 'ু', 'ূ', 'ৃ', 'ে', 'ৈ', 'ো', 'ৌ', '্']),
16
+ "bn": set(['অ', 'আ', 'ই', 'ঈ', 'উ', 'ঊ', 'ঋ', 'এ', 'ঐ', 'ও', 'ঔ', 'ক', 'খ', 'গ', 'ঘ', 'ঙ', 'চ', 'ছ', 'জ', 'ঝ', 'ঞ', 'ট', 'ঠ', 'ড', 'ঢ', 'ণ', 'ত', 'থ', 'দ', 'ধ', 'ন', 'প', 'ফ', 'ব', 'ভ', 'ম', 'য', 'র', 'ল', 'শ', 'ষ', 'স', 'হ', 'ৎ', 'ং', 'ঃ', 'ঽ', 'া', 'ি', 'ী', 'ু', 'ূ', 'ৃ', 'ে', 'ৈ', 'ো', 'ৌ', '্', 'ড়', 'ঢ়', 'য়']),
17
+ "br": set(['अ', 'आ', 'इ', 'ई', 'उ', 'ऊ', 'ऋ', 'ए', 'ऐ', 'ओ', 'औ', 'क', 'ख', 'ग', 'घ', 'ङ', 'च', 'छ', 'ज', 'झ', 'ञ', 'ट', 'ठ', 'ड', 'ढ', 'ण', 'त', 'थ', 'द', 'ध', 'न', 'प', 'फ', 'ब', 'भ', 'म', 'य', 'र', 'ल', 'व', 'श', 'ष', 'स', 'ह', '़', 'ा', 'ि', 'ी', 'ु', 'ূ', 'ृ', 'े', 'ै', 'ो', 'ौ', '्']),
18
+ "doi": set(['', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'ج', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', 'ि', '', '', '', 'े', '', 'ो', 'ौ', '्']),
19
+ "gu": set(['અ', 'આ', 'ઇ', 'ઈ', 'ઉ', 'ઊ', 'ઋ', 'એ', 'ઐ', 'ઓ', 'ઔ', 'ક', 'ખ', 'ગ', 'ઘ', 'ઙ', 'ચ', 'છ', 'જ', 'ઝ', 'ઞ', 'ટ', 'ઠ', 'ડ', 'ઢ', 'ણ', 'ત', 'થ', 'દ', 'ધ', 'ન', 'પ', 'ફ', 'બ', 'ભ', 'મ', 'ય', 'ર', 'લ', 'ળ', 'વ', 'શ', 'ષ', 'સ', 'હ', '઼', 'ા', 'િ', 'ી', 'ુ', 'ૂ', 'ૃ', 'ે', 'ૈ', 'ો', 'ૌ', '્']),
20
+ "hi": set(['अ', 'आ', 'इ', 'ई', 'उ', 'ऊ', 'ऋ', 'ए', 'ऐ', 'ओ', 'औ', 'क', 'ख', 'ग', 'घ', 'ङ', 'च', '', 'ज', 'झ', 'ञ', 'ट', 'ठ', 'ड', 'ढ', 'ण', 'त', 'थ', 'द', 'ध', 'न', 'प', 'फ', 'ब', 'भ', 'म', 'य', 'र', 'ल', 'व', 'श', 'ष', 'स', 'ह', 'ा', 'ि', 'ी', '', 'ू', 'ृ', 'े', 'ै', 'ो', 'ौ', '्']),
21
+ "kn": set(['ಅ', 'ಆ', 'ಇ', 'ಈ', 'ಉ', 'ಊ', 'ಋ', 'ಎ', 'ಏ', 'ಐ', 'ಒ', 'ಓ', 'ಔ', 'ಕ', 'ಖ', 'ಗ', 'ಘ', 'ಙ', 'ಚ', 'ಛ', 'ಜ', 'ಝ', 'ಞ', 'ಟ', 'ಠ', 'ಡ', 'ಢ', 'ಣ', 'ತ', 'ಥ', 'ದ', 'ಧ', 'ನ', 'ಪ', 'ಫ', 'ಬ', 'ಭ', 'ಮ', 'ಯ', 'ರ', 'ಲ', 'ವ', 'ಶ', 'ಷ', 'ಸ', 'ಹ', 'ಳ', 'ಱ', 'ಾ', 'ಿ', 'ೀ', 'ು', 'ೂ', 'ೃ', 'ೆ', 'ೇ', 'ೈ', 'ೊ', 'ೋ', 'ೌ', '್']),
22
+ "ks": set(['ا', 'آ', 'ب', 'پ', 'ت', 'ٹ', 'ث', 'ج', 'چ', 'ح', 'خ', 'د', 'ڈ', 'ذ', 'ر', 'ڑ', 'ز', 'ژ', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ف', 'ق', 'ک', 'گ', 'ل', 'م', 'ن', 'و', 'ھ', 'ء', 'ی', 'ی', 'ے']),
23
+ "kok": set(['अ', 'आ', 'इ', 'ई', 'उ', 'ऊ', 'ऋ', 'ए', 'ऐ', 'ओ', 'औ', 'क', 'ख', 'ग', 'घ', 'ङ', 'च', 'छ', 'ज', 'झ', 'ञ', 'ट', 'ठ', 'ड', 'ढ', 'ण', 'त', 'थ', 'द', 'ध', 'न', 'प', 'फ', 'ब', 'भ', 'म', 'य', 'र', 'ल', 'व', 'श', 'ष', 'स', 'ह', 'ा', 'ि', 'ी', 'ु', 'ू', 'ृ', 'े', 'ै', 'ो', 'ौ', '्']),
24
+ "mai": set(['अ', 'आ', 'इ', 'ई', 'उ', 'ऊ', 'ऋ', 'ए', 'ऐ', 'ओ', 'औ', 'क', 'ख', 'ग', 'घ', 'ङ', 'च', 'छ', 'ज', 'झ', 'ञ', 'ट', 'ठ', 'ड', 'ढ', 'ण', 'त', 'थ', 'द', 'ध', 'न', 'प', 'फ', 'ब', 'भ', 'म', 'य', 'र', 'ल', 'व', 'श', 'ष', 'स', 'ह', 'ा', 'ि', 'ी', 'ु', 'ू', 'ृ', 'े', 'ै', 'ो', 'ौ', '्']),
25
+ "ml": set(['അ', 'ആ', 'ഇ', 'ഈ', 'ഉ', 'ഊ', 'ഋ', 'എ', 'ഏ', 'ഐ', 'ഒ', 'ഓ', 'ഔ', 'ക', 'ഖ', 'ഗ', 'ഘ', 'ങ', 'ച', 'ഛ', 'ജ', 'ഝ', 'ഞ', 'ട', 'ഠ', 'ഡ', 'ഢ', 'ണ', 'ത', 'ഥ', 'ദ', 'ധ', 'ന', 'പ', 'ഫ', 'ബ', 'ഭ', 'മ', 'യ', 'ര', 'ല', 'വ', 'ശ', 'ഷ', 'സ', 'ഹ', 'ള', 'ഴ', 'റ', 'ാ', 'ി', 'ീ', 'ു', 'ൂ', 'ൃ', 'െ', 'േ', 'ൈ', 'ൊ', 'ോ', 'ൌ', '്']),
26
+ "mni": set(['ꯑ', '꯲', '꯳', '꯴', '꯵', '꯶', '꯷', '꯸', '꯹', '꯺', '꯻', '꯼', '꯽', '꯾', '꯿', 'ꯀ', 'ꯂ', 'ꯃ', 'ꯄ', 'ꯅ', 'ꯆ', 'ꯇ', 'ꯈ', 'ꯉ', 'ꯊ', 'ꯋ', 'ꯌ', 'ꯍ', 'ꯎ', 'ꯏ', 'ꯐ', 'ꯑ']),
27
+ "mr": set(['अ', 'आ', 'इ', 'ई', 'उ', 'ऊ', 'ऋ', 'ए', 'ऐ', 'ओ', 'औ', 'क', 'ख', 'ग', 'घ', 'ङ', 'च', 'छ', 'ज', 'झ', 'ञ', 'ट', 'ठ', 'ड', 'ढ', 'ण', 'त', 'थ', 'द', 'ध', 'न', 'प', 'फ', 'ब', 'भ', 'म', 'य', 'र', 'ल', 'व', 'श', 'ष', 'स', 'ह', 'ा', 'ि', 'ी', 'ु', 'ू', 'ृ', 'े', 'ै', 'ो', 'ौ', '्', 'ळ']),
28
+ "ne": set(['अ', 'आ', 'इ', 'ई', 'उ', 'ऊ', 'ऋ', 'ए', 'ऐ', 'ओ', 'औ', 'क', 'ख', 'ग', 'घ', 'ङ', 'च', 'छ', 'ज', 'झ', 'ञ', 'ट', 'ठ', 'ड', 'ढ', 'ण', 'त', 'थ', 'द', 'ध', 'न', 'प', 'फ', 'ब', 'भ', 'म', 'य', 'र', 'ल', 'व', 'श', 'ष', 'स', 'ह', 'ा', 'ि', 'ी', 'ु', 'ू', 'ृ', 'े', 'ै', 'ो', 'ौ', '्']),
29
+ "or": set(['ଅ', 'ଆ', 'ଇ', 'ଈ', 'ଉ', 'ଊ', 'ଋ', 'ଏ', 'ଐ', 'ଓ', 'ଔ', 'କ', 'ଖ', 'ଗ', 'ଘ', 'ଙ', 'ଚ', 'ଛ', 'ଜ', 'ଝ', 'ଞ', 'ଟ', 'ଠ', 'ଡ', 'ଢ', 'ଣ', 'ତ', 'ଥ', 'ଦ', 'ଧ', 'ନ', 'ପ', 'ଫ', 'ବ', 'ଭ', 'ମ', 'ଯ', 'ର', 'ଲ', 'ଳ', 'ବ', 'ଶ', 'ଷ', 'ସ', 'ହ', 'କ୍ଷ', 'ା', 'ି', 'ୀ', 'ୁ', 'ୂ', 'ୃ', 'େ', 'ୈ', 'ୋ', 'ୌ', '୍']),
30
+ "pa": set(['ਅ', 'ਆ', 'ਇ', 'ਈ', 'ਉ', 'ਊ', 'ਏ', 'ਐ', 'ਓ', 'ਔ', 'ਕ', 'ਖ', 'ਗ', 'ਘ', 'ਙ', 'ਚ', 'ਛ', 'ਜ', 'ਝ', 'ਞ', 'ਟ', 'ਠ', 'ਡ', 'ਢ', 'ਣ', 'ਤ', 'ਥ', 'ਦ', 'ਧ', 'ਨ', 'ਪ', 'ਫ', 'ਬ', 'ਭ', 'ਮ', 'ਯ', 'ਰ', 'ਲ', 'ਵ', 'ਸ਼', 'ਸ', 'ਹ', 'ਖ਼', 'ਗ਼', 'ਜ਼', 'ੜ', 'ਫ਼', 'ਲ਼', 'ਿ', 'ੀ', 'ੁ', 'ੂ', 'ੇ', 'ੈ', 'ੋ', 'ੌ', '੍']),
31
+ "sa": set(['अ', 'आ', 'इ', 'ई', 'उ', 'ऊ', 'ऋ', 'ए', 'ऐ', 'ओ', 'औ', 'क', 'ख', 'ग', 'घ', 'ङ', 'च', 'छ', 'ज', 'झ', 'ञ', 'ट', 'ठ', 'ड', 'ढ', 'ण', 'त', 'थ', 'द', 'ध', 'न', 'प', 'फ', 'ब', 'भ', 'म', 'य', 'र', 'ल', 'व', 'श', 'ष', 'स', 'ह', 'ा', 'ि', 'ी', 'ु', 'ू', 'ृ', 'े', 'ै', 'ो', 'ौ', '्']),
32
+ "sat": set(['ᱚ', 'ᱛ', 'ᱚ', 'ᱜ', 'ᱚ', 'ᱝ', 'ᱞ', 'ᱟ', 'ᱠ', 'ᱥ', 'ᱮ', 'ᱫ', 'ఇ', 'ᱤ', 'ᱩ', 'ੂ', 'େ', 'ୈ', 'ᱪ', 'ᱡ', 'ᱭ']),
33
+ "sd": set(['ا', 'آ', 'ب', 'ڀ', 'ت', 'ٽ', 'ث', 'پ', 'ج', 'ڄ', 'جھ', 'چ', 'ح', 'خ', 'ڌ', 'د', 'ڏ', 'ڊ', 'ذ', 'ر', 'ز', 'ڙ', 'ژ', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ف', 'ڦ', 'ق', 'ڪ', 'ک', 'گ', 'ڳ', 'ڱ', 'ل', 'م', 'ن', 'و', 'ھ', 'ء', 'ي']),
34
+ "ta": set(['அ', 'ஆ', 'இ', 'ஈ', 'உ', 'ஊ', 'எ', 'ஏ', 'ஐ', 'ஒ', 'ஓ', 'ஔ', 'க', 'ங', 'ச', 'ஞ', 'ட', 'ண', 'த', 'ந', 'ன', 'ப', 'ம', 'ய', 'ர', 'ல', 'வ', 'ழ', 'ள', 'ஷ', 'ஸ', 'ஹ']),
35
+ "te": set(['అ', 'ఆ', 'ఇ', 'ఈ', 'ఉ', 'ఊ', 'ఋ', 'ఎ', 'ఏ', 'ఐ', 'ఒ', 'ఓ', 'ఔ', 'క', 'ఖ', 'గ', 'ఘ', 'ఙ', 'చ', 'ఛ', 'జ', 'ఝ', 'ఞ', 'ట', 'ఠ', 'డ', 'ఢ', 'ణ', 'త', 'థ', 'ద', 'ధ', 'న', 'ప', 'ఫ', 'బ', 'భ', 'మ', 'య', 'ర', 'ల', 'వ', 'శ', 'ష', 'స', 'హ', 'ళ', 'క్ష', 'ఱ', 'ా', 'ి', 'ీ', 'ు', 'ూ', 'ృ', 'ె', 'ే', 'ై', 'ొ', 'ో', 'ౌ', '్']),
36
+ "ur": set(['ا', 'آ', 'ب', 'پ', 'ت', 'ٹ', 'ث', 'ج', 'چ', 'ح', 'خ', 'د', 'ڈ', 'ذ', 'ر', 'ڑ', 'ز', 'ژ', 'س', 'ش', 'ص', 'ض', 'ط', 'ظ', 'ع', 'غ', 'ف', 'ق', 'ک', 'گ', 'ل', 'م', 'ن', 'و', 'ھ', 'ء', 'ی', 'ی', 'ے']),
37
  }
38
 
39
+ # Dictionary for common words of each language
40
+ LANGUAGE_COMMON_WORDS = {
41
+ "as": set(["আৰু", "হয়", "এটা", "কৰা", "ওপৰত", "যে"]),
42
+ "bn": set(["এবং", "একটি", "করুন", "জন্য", "সঙ্গে", "হচ্ছে"]),
43
+ "br": set(["आरो", "एसे", "मोनसे", "माव", "दं", "जा"]),
44
+ "doi": set(["ते", "दे", "ऐ", "इक", "ओह्", "कर"]),
45
+ "gu": set(["અને", "એક", "માટે", "છે", "સાથે", "કરવું"]),
46
+ "hi": set(["और", "है", "एक", "में", "के", "लिए"]),
47
+ "kn": set(["ಮತ್ತು", "ಒಂದು", "ಹೇಗೆ", "ನಾನು", "ಇದೆ", "ಆ"]),
48
+ "ks": set([" تہٕ", "چھُ", "اکھ", "منز", "کیتھ", "छु", "छ"]),
49
+ "kok": set(["आनी", "एक", "कर", "खातीर", "कडेन", "आसा"]),
50
+ "mai": set(["आ", "एक", "हम", "अछि", "क'", "छै"]),
51
+ "ml": set(["ഒരു", "കൂടാതെ", "എങ്ങനെ", "ഞാൻ", "ഇത്", "ആണ്"]),
52
+ "mni": set(["ꯗꯥ", "ꯑꯃꯥ", "ꯀꯔꯤ", "ꯑꯩꯅꯥ", "ꯑꯁꯤ", "ꯂꯩ"]),
53
+ "mr": set(["आणि", "एक", "आहे", "मी", "तू", "जे"]),
54
+ "ne": set(["र", "एक", "हो", "म", "तिमी", "छ"]),
55
+ "or": set(["ଏବଂ", "ଗୋଟିଏ", "କରନ୍ତୁ", "ପାଇଁ", "ସହିତ", "ଅଛି"]),
56
+ "pa": set(["ਅਤੇ", "ਇੱਕ", "ਹੈ", "ਵਿੱਚ", "ਨੂੰ", "ਦਾ"]),
57
+ "sa": set(["च", "एकः", "अस्ति", "अहम्", "त्वम्", "सः"]),
58
+ "sat": set(["ᱟᱨ", "ᱫᱚ", "ᱢᱤᱫ", "ಒಂದು", "ಮತ್ತು", "ক"]),
59
+ "sd": set(["۽", "هڪ", "آهي", "۾", "کي", "جو"]),
60
+ "ta": set(["மற்றும்", "ஒரு", "வேண்டும்", "நான்", "இது", "ஆகும்"]),
61
+ "te": set(["మరియు", "ఒక", "కావాలి", "నేను", "ఇది", "ఉంది"]),
62
+ "ur": set(["اور", "ہے", "ایک", "میں", "کے", "لیے"]),
63
+ }
64
+
65
+ # Mapping from language code to its full name for display purposes
66
  LANGUAGE_CODE_TO_NAME = { "as": "Assamese", "bn": "Bengali", "br": "Bodo", "doi": "Dogri", "gu": "Gujarati", "hi": "Hindi", "kn": "Kannada", "ks": "Kashmiri", "kok": "Konkani", "mai": "Maithili", "ml": "Malayalam", "mni": "Manipuri", "mr": "Marathi", "ne": "Nepali", "or": "Odia", "pa": "Punjabi", "sa": "Sanskrit", "sat": "Santali", "sd": "Sindhi", "ta": "Tamil", "te": "Telugu", "ur": "Urdu"}
67
  device = "cuda" if torch.cuda.is_available() else "cpu"
68
 
69
+ # --- Model Loading ---
70
  print("Loading IndicConformer model...")
71
  model = AutoModel.from_pretrained("ai4bharat/indic-conformer-600m-multilingual", trust_remote_code=True).to(device)
72
  model.eval()
73
  print("✅ Model loaded successfully.")
74
 
75
+ # --- Core Logic ---
76
  def identify_language(text: str) -> str | None:
77
  """Identifies the language of a given text based on character sets and common words."""
78
  if not text.strip():
79
  return None
80
 
81
+ scores = {lang: 0 for lang in LANGUAGE_CHARSETS.keys()}
82
  text_chars = set(text)
83
  text_words = set(re.split(r'[\s,.:;!?]+', text))
84
 
85
+ for lang_code in scores.keys():
86
+ char_score = len(text_chars.intersection(LANGUAGE_CHARSETS.get(lang_code, set())))
87
+ word_score = len(text_words.intersection(LANGUAGE_COMMON_WORDS.get(lang_code, set())))
88
  scores[lang_code] = (char_score * 2) + word_score
89
 
90
  max_score = max(scores.values())
91
+ if max_score < 3:
92
  return None
93
+
94
  identified_code = max(scores, key=scores.get)
95
  return identified_code
96
 
 
97
  @spaces.GPU
98
  def transcribe_and_identify(audio_path):
99
  if not audio_path:
 
107
  return f"Error loading audio: {e}", "", ""
108
 
109
  try:
 
110
  with torch.no_grad():
111
  initial_transcription = model(waveform, "hi", "ctc")
112
+
 
113
  identified_lang_code = identify_language(initial_transcription)
114
 
115
  if not identified_lang_code:
 
118
 
119
  detected_lang_str = f"Detected Language: {LANGUAGE_CODE_TO_NAME.get(identified_lang_code, 'Unknown')}"
120
 
 
121
  with torch.no_grad():
122
  transcription_ctc = model(waveform, identified_lang_code, "ctc")
123
  transcription_rnnt = model(waveform, identified_lang_code, "rnnt")
 
127
 
128
  return detected_lang_str, transcription_ctc.strip(), transcription_rnnt.strip()
129
 
130
+ # --- Gradio UI ---
 
131
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
132
  gr.Markdown(f"## {DESCRIPTION}")
133
  gr.Markdown("Upload or record audio in any of the 22 supported Indian languages. The app will automatically detect the language and provide the transcription using both CTC and RNNT decoding.")
134
+
135
  with gr.Row():
136
  with gr.Column(scale=1):
137
  audio = gr.Audio(label="Upload or Record Audio", type="filepath")
138
  transcribe_btn = gr.Button("Transcribe", variant="primary")
139
+
140
  with gr.Column(scale=2):
141
  detected_lang_output = gr.Label(label="Language Detection Result")
142
  gr.Markdown("### RNNT Transcription (More Accurate)")
 
145
  ctc_output = gr.Textbox(lines=3, label="CTC Output")
146
 
147
  transcribe_btn.click(
148
+ fn=transcribe_and_identify,
149
+ inputs=[audio],
150
  outputs=[detected_lang_output, ctc_output, rnnt_output],
151
  api_name="transcribe"
152
  )