File size: 2,060 Bytes
5db0355 e1b40db 5db0355 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import gradio as gr
import torchaudio
from transformers import pipeline
# Load only the Moul-Sout-100 model
asr_pipeline = pipeline("automatic-speech-recognition", model="01Yassine/moulsot_v0.2_1000")
# Adjust generation config if necessary
asr_pipeline.model.generation_config.input_ids = asr_pipeline.model.generation_config.forced_decoder_ids
asr_pipeline.model.generation_config.forced_decoder_ids = None
def ensure_mono_16k(audio_path):
"""Load audio, convert to mono + 16kHz, and save a temp version."""
waveform, sr = torchaudio.load(audio_path)
# Convert to mono if necessary
if waveform.shape[0] > 1:
waveform = waveform.mean(dim=0, keepdim=True)
# Resample to 16kHz if necessary
if sr != 16000:
resampler = torchaudio.transforms.Resample(sr, 16000)
waveform = resampler(waveform)
sr = 16000
tmp_path = "/tmp/processed_16k.wav"
torchaudio.save(tmp_path, waveform, sr)
return tmp_path
def transcribe(audio):
if audio is None:
return "Please record or upload an audio file."
# Process and transcribe
processed_audio = ensure_mono_16k(audio)
result = asr_pipeline(processed_audio)["text"]
return result
title = "ποΈ Moul-Sout ASR π²π¦"
description = """
**Moul-Sout** model for Darija ASR π²π¦.
You can record or upload an audio sample (it will be automatically resampled to 16 kHz mono),
and view the transcription result below.
"""
with gr.Blocks(title=title) as demo:
gr.Markdown(f"# {title}\n{description}")
with gr.Row():
audio_input = gr.Audio(
sources=["microphone", "upload"],
type="filepath",
label="π€ Record or Upload Audio (auto 16 kHz mono)"
)
transcribe_btn = gr.Button("π Transcribe")
output_text = gr.Textbox(label="π© Transcription Output")
transcribe_btn.click(
fn=transcribe,
inputs=[audio_input],
outputs=[output_text]
)
# Local launch
if __name__ == "__main__":
demo.launch() |