|
|
import gradio as gr |
|
|
import torchaudio |
|
|
from transformers import pipeline |
|
|
|
|
|
|
|
|
asr_pipeline = pipeline("automatic-speech-recognition", model="01Yassine/moulsot_v0.2_1000") |
|
|
|
|
|
|
|
|
asr_pipeline.model.generation_config.input_ids = asr_pipeline.model.generation_config.forced_decoder_ids |
|
|
asr_pipeline.model.generation_config.forced_decoder_ids = None |
|
|
|
|
|
|
|
|
def ensure_mono_16k(audio_path): |
|
|
"""Load audio, convert to mono + 16kHz, and save a temp version.""" |
|
|
waveform, sr = torchaudio.load(audio_path) |
|
|
|
|
|
|
|
|
if waveform.shape[0] > 1: |
|
|
waveform = waveform.mean(dim=0, keepdim=True) |
|
|
|
|
|
|
|
|
if sr != 16000: |
|
|
resampler = torchaudio.transforms.Resample(sr, 16000) |
|
|
waveform = resampler(waveform) |
|
|
sr = 16000 |
|
|
|
|
|
tmp_path = "/tmp/processed_16k.wav" |
|
|
torchaudio.save(tmp_path, waveform, sr) |
|
|
return tmp_path |
|
|
|
|
|
|
|
|
def transcribe(audio): |
|
|
if audio is None: |
|
|
return "Please record or upload an audio file." |
|
|
|
|
|
|
|
|
processed_audio = ensure_mono_16k(audio) |
|
|
result = asr_pipeline(processed_audio)["text"] |
|
|
|
|
|
return result |
|
|
|
|
|
|
|
|
title = "ποΈ Moul-Sout ASR π²π¦" |
|
|
description = """ |
|
|
**Moul-Sout** model for Darija ASR π²π¦. |
|
|
You can record or upload an audio sample (it will be automatically resampled to 16 kHz mono), |
|
|
and view the transcription result below. |
|
|
""" |
|
|
|
|
|
with gr.Blocks(title=title) as demo: |
|
|
gr.Markdown(f"# {title}\n{description}") |
|
|
|
|
|
with gr.Row(): |
|
|
audio_input = gr.Audio( |
|
|
sources=["microphone", "upload"], |
|
|
type="filepath", |
|
|
label="π€ Record or Upload Audio (auto 16 kHz mono)" |
|
|
) |
|
|
|
|
|
transcribe_btn = gr.Button("π Transcribe") |
|
|
|
|
|
output_text = gr.Textbox(label="π© Transcription Output") |
|
|
|
|
|
transcribe_btn.click( |
|
|
fn=transcribe, |
|
|
inputs=[audio_input], |
|
|
outputs=[output_text] |
|
|
) |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |