Spaces:
Runtime error
Runtime error
| # app.py | |
| import gradio as gr | |
| from transformers import pipeline | |
| import numpy as np | |
| import librosa # pip install librosa | |
| # --- EDIT THIS: map display names to your HF Hub model IDs --- | |
| language_models = { | |
| "Akan (Asanti Twi)": "FarmerlineML/w2v-bert-2.0_twi_alpha_v1", | |
| "Ewe": "FarmerlineML/w2v-bert-2.0_ewe_2", | |
| "Kiswahili": "FarmerlineML/w2v-bert-2.0_swahili_alpha", | |
| "Luganda": "FarmerlineML/w2v-bert-2.0_luganda", | |
| "Brazilian Portuguese": "FarmerlineML/w2v-bert-2.0_brazilian_portugese_alpha", | |
| "Fante Kissi": "misterkissi/w2v2-lg-xls-r-300m-fante", | |
| "Runyankore Kissi": "misterkissi/w2v2-lg-xls-r-300m-runyankore", | |
| # add more as needed | |
| } | |
| # Pre-load pipelines for each language on CPU (device=-1) | |
| asr_pipelines = { | |
| lang: pipeline( | |
| task="automatic-speech-recognition", | |
| model=model_id, | |
| device=-1, # force CPU usage | |
| chunk_length_s=30 | |
| ) | |
| for lang, model_id in language_models.items() | |
| } | |
| def transcribe(audio_path: str, language: str) -> str: | |
| """ | |
| Load the audio via librosa (supports mp3, wav, flac, m4a, ogg, etc.), | |
| convert to mono, then run it through the chosen ASR pipeline. | |
| """ | |
| if not audio_path: | |
| return "โ ๏ธ Please upload or record an audio clip." | |
| # librosa.load returns a 1D np.ndarray (mono) and the sample rate | |
| speech, sr = librosa.load(audio_path, sr=None, mono=True) | |
| # Call the Hugging Face ASR pipeline | |
| result = asr_pipelines[language]({ | |
| "sampling_rate": sr, | |
| "raw": speech | |
| }) | |
| return result.get("text", "") | |
| with gr.Blocks(title="๐ Multilingual ASR Demo") as demo: | |
| gr.Markdown( | |
| """ | |
| ## ๐๏ธ Multilingual Speech-to-Text | |
| Upload an audio file (MP3, WAV, FLAC, M4A, OGG,โฆ) or record via your microphone. | |
| Then choose the language/model and hit **Transcribe**. | |
| """ | |
| ) | |
| with gr.Row(): | |
| lang = gr.Dropdown( | |
| choices=list(language_models.keys()), | |
| value=list(language_models.keys())[0], | |
| label="Select Language / Model" | |
| ) | |
| with gr.Row(): | |
| audio = gr.Audio( | |
| sources=["upload", "microphone"], | |
| type="filepath", | |
| label="Upload or Record Audio" | |
| ) | |
| btn = gr.Button("Transcribe") | |
| output = gr.Textbox(label="Transcription") | |
| btn.click(fn=transcribe, inputs=[audio, lang], outputs=output) | |
| if __name__ == "__main__": | |
| demo.launch() | |