import os os.environ["OMP_NUM_THREADS"] = "1" os.environ["OPENBLAS_NUM_THREADS"] = "1" os.environ["MKL_NUM_THREADS"] = "1" import torch import gradio as gr from transformers import AutoFeatureExtractor, Wav2Vec2ForSequenceClassification import librosa try: model_id = "facebook/mms-lid-4017" print(f"Loading MMS-LID model: {model_id}...") processor = AutoFeatureExtractor.from_pretrained(model_id) model = Wav2Vec2ForSequenceClassification.from_pretrained(model_id) model.eval() print("MMS-LID model loaded successfully.") except Exception as e: print(f"Error loading MMS-LID model: {e}") processor = None model = None def load_audio(file_path: str): y, sr = librosa.load(file_path, sr=16000, mono=True) return y def detect_language_mms(audio_file_path: str) -> str: if processor is None or model is None: return "❌ MMS-LID model failed to initialize." if not audio_file_path or not os.path.exists(audio_file_path): return "⚠️ Please upload or record a valid audio file." try: waveform = load_audio(audio_file_path) if waveform.size == 0: return "⚠️ Audio file is empty." # prepare inputs inputs = processor( waveform, sampling_rate=16000, return_tensors="pt", padding=True ) with torch.no_grad(): logits = model(**inputs).logits predicted_id = torch.argmax(logits, dim=-1).item() lang = model.config.id2label[predicted_id] return f"### 🌐 Detected Language (MMS-LID)\n**{lang}**" except Exception as e: print(f"MMS-LID detection error: {e}") return f"❌ Detection error: {str(e)}" iface = gr.Interface( fn=detect_language_mms, inputs=gr.Audio( sources=["microphone", "upload"], type="filepath", label="Input Audio" ), outputs=gr.Markdown(label="Detected Language"), title="Facebook MMS-LID Language Detection", description="Upload or record audio. This uses Facebook’s MMS-LID model for language detection.", ) if __name__ == "__main__": iface.launch()