Spaces:

chiyo123
/

BembaASRtest

Sleeping

App Files Files Community

BembaASRtest / app.py

chiyo123

Update app.py

cff7277 verified 2 months ago

raw

history blame contribute delete

3.25 kB

	import gradio as gr
	from transformers import pipeline
	import torch

	# Initialize the speech recognition pipeline
	print("Loading Whisper Lozi model...")
	try:
	# Use the specific Lozi model
	transcriber = pipeline(
	"automatic-speech-recognition",
	model="simzacademy/whisper-small-lozi1",
	device=0 if torch.cuda.is_available() else -1 # Use GPU if available
	)
	print("Model loaded successfully!")
	except Exception as e:
	print(f"Error loading model: {e}")
	transcriber = None

	def transcribe_audio(audio):
	"""
	Transcribe audio to text using the Whisper Lozi model

	Args:
	audio: Audio file path or tuple (sample_rate, audio_data)

	Returns:
	Transcribed text
	"""
	if transcriber is None:
	return "Error: Model failed to load. Please check your installation."

	if audio is None:
	return "Please provide an audio file or recording."

	try:
	# Transcribe the audio
	result = transcriber(audio)
	return result["text"]
	except Exception as e:
	return f"Error during transcription: {str(e)}"

	# Create the Gradio interface
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"""
	# 🎤 Lozi Speech-to-Text Interface
	### Powered by Whisper Small Lozi Model

	This interface uses the `simzacademy/whisper-small-lozi1` model to transcribe
	Lozi language speech to text.
	"""
	)

	with gr.Row():
	with gr.Column():
	# Audio input - supports both recording and file upload
	audio_input = gr.Audio(
	sources=["microphone", "upload"],
	type="filepath",
	label="Record or Upload Audio"
	)

	transcribe_btn = gr.Button("🔄 Transcribe", variant="primary", size="lg")

	with gr.Column():
	output_text = gr.Textbox(
	label="Transcription",
	placeholder="Your transcription will appear here...",
	lines=10
	)

	gr.Markdown(
	"""
	### 📋 Instructions:
	1. Record: Click the microphone icon to record audio directly
	2. Upload: Or click to upload an audio file (MP3, WAV, etc.)
	3. Transcribe: Click the "Transcribe" button to convert speech to text
	4. View: The transcribed text will appear on the right

	### ℹ️ Notes:
	- Speak clearly in Lozi for best results
	- The model works best with clear audio and minimal background noise
	- First transcription may take longer as the model loads
	"""
	)

	# Set up the transcription action
	transcribe_btn.click(
	fn=transcribe_audio,
	inputs=audio_input,
	outputs=output_text
	)

	# Also allow Enter key to trigger transcription
	audio_input.change(
	fn=lambda: gr.update(interactive=True),
	outputs=transcribe_btn
	)

	# Launch the interface
	if __name__ == "__main__":
	demo.launch(
	share=False, # Set to True to create a public link
	server_name="0.0.0.0", # Allow access from network
	server_port=7860
	)