| import gradio as gr |
| import torch |
| import librosa |
| import numpy as np |
| from transformers import AutoProcessor, AutoModelForCTC |
|
|
| |
| print("Loading model...") |
| processor = AutoProcessor.from_pretrained("HAMMALE/mms-darija-finetuned") |
| model = AutoModelForCTC.from_pretrained("HAMMALE/mms-darija-finetuned") |
|
|
| def transcribe_audio(audio_file): |
| try: |
| |
| if audio_file is None: |
| return "Please upload an audio file." |
| |
| |
| audio, sr = librosa.load(audio_file, sr=16000) |
| |
| |
| if len(audio) < 1600: |
| return "Audio too short. Please upload a longer audio file." |
| |
| |
| inputs = processor(audio, sampling_rate=16000, return_tensors="pt") |
| |
| |
| with torch.no_grad(): |
| logits = model(**inputs).logits |
| |
| predicted_ids = torch.argmax(logits, dim=-1) |
| transcription = processor.batch_decode(predicted_ids)[0] |
| |
| return transcription if transcription.strip() else "No transcription generated." |
| |
| except Exception as e: |
| return f"Error processing audio: {str(e)}" |
|
|
| |
| demo = gr.Interface( |
| fn=transcribe_audio, |
| inputs=gr.Audio(type="filepath", label="Upload Darija Audio"), |
| outputs=gr.Textbox(label="Transcription", placeholder="Transcription will appear here..."), |
| title="🎤 Darija Speech Recognition", |
| description="Upload an audio file in Moroccan Arabic (Darija) and get the transcription. This model was fine-tuned on the Darija Bible dataset.", |
| article="Model: [HAMMALE/mms-darija-finetuned](https://huggingface.co/HAMMALE/mms-darija-finetuned)", |
| examples=[ |
| |
| ], |
| cache_examples=False, |
| theme=gr.themes.Soft() |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|