File size: 2,133 Bytes
2639ef8
 
 
 
b95b5e5
2639ef8
 
4d9a4c3
2639ef8
 
2721455
4d9a4c3
 
 
2639ef8
 
 
 
 
 
 
 
 
 
 
 
4d9a4c3
 
 
 
 
2639ef8
 
4d9a4c3
 
 
 
2639ef8
4d9a4c3
2639ef8
4d9a4c3
2639ef8
 
4d9a4c3
2639ef8
 
4d9a4c3
 
2639ef8
 
4d9a4c3
b95b5e5
2639ef8
 
 
 
 
 
 
 
 
4d9a4c3
2639ef8
b95b5e5
2639ef8
 
 
4d9a4c3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"

import torch
import gradio as gr
from transformers import AutoFeatureExtractor, Wav2Vec2ForSequenceClassification
import librosa
try:
    model_id = "facebook/mms-lid-4017"
    print(f"Loading MMS-LID model: {model_id}...")
    processor = AutoFeatureExtractor.from_pretrained(model_id)
    model = Wav2Vec2ForSequenceClassification.from_pretrained(model_id)
    model.eval()
    print("MMS-LID model loaded successfully.")
except Exception as e:
    print(f"Error loading MMS-LID model: {e}")
    processor = None
    model = None

def load_audio(file_path: str):
    y, sr = librosa.load(file_path, sr=16000, mono=True)
    return y
def detect_language_mms(audio_file_path: str) -> str:
    if processor is None or model is None:
        return "❌ MMS-LID model failed to initialize."

    if not audio_file_path or not os.path.exists(audio_file_path):
        return "⚠️ Please upload or record a valid audio file."

    try:
        waveform = load_audio(audio_file_path)
        if waveform.size == 0:
            return "⚠️ Audio file is empty."

        # prepare inputs
        inputs = processor(
            waveform, sampling_rate=16000, return_tensors="pt", padding=True
        )

        with torch.no_grad():
            logits = model(**inputs).logits

        predicted_id = torch.argmax(logits, dim=-1).item()
        lang = model.config.id2label[predicted_id]

        return f"### 🌐 Detected Language (MMS-LID)\n**{lang}**"
    except Exception as e:
        print(f"MMS-LID detection error: {e}")
        return f"❌ Detection error: {str(e)}"

iface = gr.Interface(
    fn=detect_language_mms,
    inputs=gr.Audio(
        sources=["microphone", "upload"],
        type="filepath",
        label="Input Audio"
    ),
    outputs=gr.Markdown(label="Detected Language"),
    title="Facebook MMS-LID Language Detection",
    description="Upload or record audio. This uses Facebook’s MMS-LID model for language detection.",
)

if __name__ == "__main__":
    iface.launch()