Spaces:

Rezuwan
/

Regional_Speech_ASR

Running

Update app.py

0801379 verified 10 months ago

1.73 kB

	import os
	import gradio as gr
	from transformers import pipeline
	import librosa
	import numpy as np

	# Ensure offline mode is used
	os.environ["TRANSFORMERS_OFFLINE"] = "1"

	# Model name (must already be cached)
	MODEL_NAME = "Rezuwan/regional_asr_weights"

	# Load the ASR pipeline from local cache only
	transcriber = pipeline(
	"automatic-speech-recognition",
	model=MODEL_NAME,
	)

	# Transcription function
	def transcribe_audio(audio_path):
	try:
	audio_data, sample_rate = librosa.load(audio_path, sr=16000)
	audio_data = librosa.to_mono(audio_data) if audio_data.ndim > 1 else audio_data
	audio_data = audio_data.astype(np.float32)
	audio_data /= np.max(np.abs(audio_data))
	result = transcriber(audio_data)
	return result["text"]
	except Exception as e:
	return f"Error: {str(e)}"

	# Gradio UI
	iface = gr.Interface(
	fn=transcribe_audio,
	inputs=gr.Audio(type="filepath", label="Upload or Record Audio"),
	outputs=gr.Textbox(label="Transcription"),
	title="Bengali Speech-to-Text with Regional Dialects",
	description=(
	f"""
	Model Card: [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files of arbitrary length.

	Instructions:
	1. Record or upload an audio file using the left panel.
	2. Click 'Submit' after waveform appears.
	3. Wait for processing and see the result on the right.

	Notes:
	- This model handles Bengali speech with regional dialects.
	- Accuracy may vary due to limited training data.
	- Offline mode is enabled for isolated environments.
	"""
	)
	)

	# Launch the Gradio app
	iface.launch()