Rezuwan's picture
Update app.py
0801379 verified
import os
import gradio as gr
from transformers import pipeline
import librosa
import numpy as np
# Ensure offline mode is used
os.environ["TRANSFORMERS_OFFLINE"] = "1"
# Model name (must already be cached)
MODEL_NAME = "Rezuwan/regional_asr_weights"
# Load the ASR pipeline from local cache only
transcriber = pipeline(
"automatic-speech-recognition",
model=MODEL_NAME,
)
# Transcription function
def transcribe_audio(audio_path):
try:
audio_data, sample_rate = librosa.load(audio_path, sr=16000)
audio_data = librosa.to_mono(audio_data) if audio_data.ndim > 1 else audio_data
audio_data = audio_data.astype(np.float32)
audio_data /= np.max(np.abs(audio_data))
result = transcriber(audio_data)
return result["text"]
except Exception as e:
return f"Error: {str(e)}"
# Gradio UI
iface = gr.Interface(
fn=transcribe_audio,
inputs=gr.Audio(type="filepath", label="Upload or Record Audio"),
outputs=gr.Textbox(label="Transcription"),
title="Bengali Speech-to-Text with Regional Dialects",
description=(
f"""
Model Card: [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files of arbitrary length.
Instructions:
1. Record or upload an audio file using the left panel.
2. Click 'Submit' after waveform appears.
3. Wait for processing and see the result on the right.
Notes:
- This model handles Bengali speech with regional dialects.
- Accuracy may vary due to limited training data.
- Offline mode is enabled for isolated environments.
"""
)
)
# Launch the Gradio app
iface.launch()