File size: 2,060 Bytes
5db0355
 
 
 
 
e1b40db
5db0355
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import gradio as gr
import torchaudio
from transformers import pipeline

# Load only the Moul-Sout-100 model
asr_pipeline = pipeline("automatic-speech-recognition", model="01Yassine/moulsot_v0.2_1000")

# Adjust generation config if necessary
asr_pipeline.model.generation_config.input_ids = asr_pipeline.model.generation_config.forced_decoder_ids
asr_pipeline.model.generation_config.forced_decoder_ids = None


def ensure_mono_16k(audio_path):
    """Load audio, convert to mono + 16kHz, and save a temp version."""
    waveform, sr = torchaudio.load(audio_path)
    
    # Convert to mono if necessary
    if waveform.shape[0] > 1:
        waveform = waveform.mean(dim=0, keepdim=True)
    
    # Resample to 16kHz if necessary
    if sr != 16000:
        resampler = torchaudio.transforms.Resample(sr, 16000)
        waveform = resampler(waveform)
        sr = 16000
    
    tmp_path = "/tmp/processed_16k.wav"
    torchaudio.save(tmp_path, waveform, sr)
    return tmp_path


def transcribe(audio):
    if audio is None:
        return "Please record or upload an audio file."
    
    # Process and transcribe
    processed_audio = ensure_mono_16k(audio)
    result = asr_pipeline(processed_audio)["text"]
    
    return result


title = "πŸŽ™οΈ Moul-Sout ASR πŸ‡²πŸ‡¦"
description = """
**Moul-Sout** model for Darija ASR πŸ‡²πŸ‡¦.
You can record or upload an audio sample (it will be automatically resampled to 16 kHz mono),
and view the transcription result below.
"""

with gr.Blocks(title=title) as demo:
    gr.Markdown(f"# {title}\n{description}")

    with gr.Row():
        audio_input = gr.Audio(
            sources=["microphone", "upload"],
            type="filepath",
            label="🎀 Record or Upload Audio (auto 16 kHz mono)"
        )

    transcribe_btn = gr.Button("πŸš€ Transcribe")

    output_text = gr.Textbox(label="🟩 Transcription Output")

    transcribe_btn.click(
        fn=transcribe,
        inputs=[audio_input],
        outputs=[output_text]
    )

# Local launch
if __name__ == "__main__":
    demo.launch()