Spaces:
Running
Running
| import os | |
| import gradio as gr | |
| from transformers import pipeline | |
| import librosa | |
| import numpy as np | |
| # Ensure offline mode is used | |
| os.environ["TRANSFORMERS_OFFLINE"] = "1" | |
| # Model name (must already be cached) | |
| MODEL_NAME = "Rezuwan/regional_asr_weights" | |
| # Load the ASR pipeline from local cache only | |
| transcriber = pipeline( | |
| "automatic-speech-recognition", | |
| model=MODEL_NAME, | |
| ) | |
| # Transcription function | |
| def transcribe_audio(audio_path): | |
| try: | |
| audio_data, sample_rate = librosa.load(audio_path, sr=16000) | |
| audio_data = librosa.to_mono(audio_data) if audio_data.ndim > 1 else audio_data | |
| audio_data = audio_data.astype(np.float32) | |
| audio_data /= np.max(np.abs(audio_data)) | |
| result = transcriber(audio_data) | |
| return result["text"] | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| # Gradio UI | |
| iface = gr.Interface( | |
| fn=transcribe_audio, | |
| inputs=gr.Audio(type="filepath", label="Upload or Record Audio"), | |
| outputs=gr.Textbox(label="Transcription"), | |
| title="Bengali Speech-to-Text with Regional Dialects", | |
| description=( | |
| f""" | |
| Model Card: [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files of arbitrary length. | |
| Instructions: | |
| 1. Record or upload an audio file using the left panel. | |
| 2. Click 'Submit' after waveform appears. | |
| 3. Wait for processing and see the result on the right. | |
| Notes: | |
| - This model handles Bengali speech with regional dialects. | |
| - Accuracy may vary due to limited training data. | |
| - Offline mode is enabled for isolated environments. | |
| """ | |
| ) | |
| ) | |
| # Launch the Gradio app | |
| iface.launch() | |