ASR_New

Runtime error

App Files Files Community

Noumida commited on Aug 11, 2025

Commit

2fa52c3

verified ·

1 Parent(s): 3627a6f

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -9

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import torch
 import torchaudio
 import gradio as gr
 import spaces
-# Import the correct AutoModel class for the task
 from transformers import AutoModel, AutoModelForAudioClassification, Wav2Vec2FeatureExtractor
 DESCRIPTION = "IndicConformer ASR with Automatic Language Identification"
@@ -20,18 +19,17 @@ asr_model = AutoModel.from_pretrained(asr_model_id, trust_remote_code=True).to(d
 asr_model.eval()
 print("✅ ASR Model loaded.")
-# Language Identification (LID) Model
 print("\nLoading Language ID model (MMS-LID-1024)...")
 lid_model_id = "facebook/mms-lid-1024"
 lid_processor = Wav2Vec2FeatureExtractor.from_pretrained(lid_model_id)
-# Load the model with its audio classification head to get logits
-lid_model = AutoModelForAudioClassification.from_pretrained(lid_model_id).to(device) # <-- THIS LINE IS UPDATED
 lid_model.eval()
 print("✅ Language ID Model loaded.")
 # --- Language Mappings ---
-# Maps the LID model's output code to the ASR model's code
 LID_TO_ASR_LANG_MAP = {
     "asm_Beng": "as", "ben_Beng": "bn", "brx_Deva": "br", "doi_Deva": "doi",
     "guj_Gujr": "gu", "hin_Deva": "hi", "kan_Knda": "kn", "kas_Arab": "ks",
@@ -41,7 +39,6 @@ LID_TO_ASR_LANG_MAP = {
     "tam_Taml": "ta", "tel_Telu": "te", "urd_Arab": "ur"
 }
-# Maps the ASR model's code back to a full name for display
 ASR_CODE_TO_NAME = { "as": "Assamese", "bn": "Bengali", "br": "Bodo", "doi": "Dogri", "gu": "Gujarati", "hi": "Hindi", "kn": "Kannada", "ks": "Kashmiri", "kok": "Konkani", "mai": "Maithili", "ml": "Malayalam", "mni": "Manipuri", "mr": "Marathi", "ne": "Nepali", "or": "Odia", "pa": "Punjabi", "sa": "Sanskrit", "sat": "Santali", "sd": "Sindhi", "ta": "Tamil", "te": "Telugu", "ur": "Urdu"}
@@ -51,7 +48,6 @@ def transcribe_audio_with_lid(audio_path):
         return "Please provide an audio file.", "", ""
     try:
-        # Load and preprocess audio
         waveform, sr = torchaudio.load(audio_path)
         waveform_16k = torchaudio.functional.resample(waveform, sr, 16000)
     except Exception as e:
@@ -63,8 +59,9 @@ def transcribe_audio_with_lid(audio_path):
         with torch.no_grad():
             outputs = lid_model(**inputs)
-        # This will now work because the output object has the .logits attribute
-        predicted_lid_id = outputs.logits.argmax(-1).item()
         detected_lid_code = lid_model.config.id2label[predicted_lid_id]
         # 2. --- Map to ASR Language Code ---

 import torchaudio
 import gradio as gr
 import spaces
 from transformers import AutoModel, AutoModelForAudioClassification, Wav2Vec2FeatureExtractor
 DESCRIPTION = "IndicConformer ASR with Automatic Language Identification"
 asr_model.eval()
 print("✅ ASR Model loaded.")
+# Language Identification (LID) Model - Using your specified model
 print("\nLoading Language ID model (MMS-LID-1024)...")
 lid_model_id = "facebook/mms-lid-1024"
 lid_processor = Wav2Vec2FeatureExtractor.from_pretrained(lid_model_id)
+# Load the model with its audio classification head
+lid_model = AutoModelForAudioClassification.from_pretrained(lid_model_id).to(device)
 lid_model.eval()
 print("✅ Language ID Model loaded.")
 # --- Language Mappings ---
 LID_TO_ASR_LANG_MAP = {
     "asm_Beng": "as", "ben_Beng": "bn", "brx_Deva": "br", "doi_Deva": "doi",
     "guj_Gujr": "gu", "hin_Deva": "hi", "kan_Knda": "kn", "kas_Arab": "ks",
     "tam_Taml": "ta", "tel_Telu": "te", "urd_Arab": "ur"
 }
 ASR_CODE_TO_NAME = { "as": "Assamese", "bn": "Bengali", "br": "Bodo", "doi": "Dogri", "gu": "Gujarati", "hi": "Hindi", "kn": "Kannada", "ks": "Kashmiri", "kok": "Konkani", "mai": "Maithili", "ml": "Malayalam", "mni": "Manipuri", "mr": "Marathi", "ne": "Nepali", "or": "Odia", "pa": "Punjabi", "sa": "Sanskrit", "sat": "Santali", "sd": "Sindhi", "ta": "Tamil", "te": "Telugu", "ur": "Urdu"}
         return "Please provide an audio file.", "", ""
     try:
         waveform, sr = torchaudio.load(audio_path)
         waveform_16k = torchaudio.functional.resample(waveform, sr, 16000)
     except Exception as e:
         with torch.no_grad():
             outputs = lid_model(**inputs)
+        # CORRECTED: Access logits as the first element of the output tuple
+        logits = outputs[0]
+        predicted_lid_id = logits.argmax(-1).item()
         detected_lid_code = lid_model.config.id2label[predicted_lid_id]
         # 2. --- Map to ASR Language Code ---