Spaces:

therealbee
/

mai_tarjamaa

Runtime error

Bello Abdullahi

Fix audio recording - handle both recorded and uploaded audio properly

035adb1 8 months ago

3.4 kB

	import gradio as gr
	import torch
	from transformers import WhisperForConditionalGeneration, WhisperProcessor
	import librosa
	import numpy as np

	# Load model and processor
	def load_model():
	try:
	model = WhisperForConditionalGeneration.from_pretrained(
	"therealbee/whisper-small-ha-bible-tts",
	ignore_mismatched_sizes=True
	)
	processor = WhisperProcessor.from_pretrained("therealbee/whisper-small-ha-bible-tts")
	return model, processor
	except Exception as e:
	print(f"Error loading model: {e}")
	return None, None

	# Load model once
	model, processor = load_model()

	def transcribe_audio(audio_input):
	"""Transcribe audio file to Hausa text"""
	if model is None or processor is None:
	return "❌ Model failed to load. Please try again later."

	if audio_input is None:
	return "❌ Please provide an audio file or record audio."

	try:
	# Handle different audio input types
	if isinstance(audio_input, tuple):
	# Recorded audio: (sample_rate, audio_data)
	sample_rate, audio_data = audio_input
	audio = audio_data.astype(np.float32)

	# Normalize if needed
	if audio.max() > 1.0:
	audio = audio / np.max(np.abs(audio))

	# Resample if needed
	if sample_rate != 16000:
	audio = librosa.resample(audio, orig_sr=sample_rate, target_sr=16000)
	else:
	# Uploaded file: filepath
	audio, sample_rate = librosa.load(audio_input, sr=16000)

	# Ensure audio is not empty
	if len(audio) < 1000: # Less than ~0.06 seconds
	return "❌ Audio too short. Please record at least 1 second of speech."

	# Prepare inputs
	inputs = processor(
	audio,
	sampling_rate=16000,
	return_tensors="pt",
	language="ha"
	)

	# Generate transcription
	with torch.no_grad():
	outputs = model.generate(inputs.input_features, task="transcribe")

	# Decode transcription
	transcription = processor.batch_decode(outputs, skip_special_tokens=True)[0]

	if not transcription.strip():
	return "❌ No speech detected. Please speak more clearly or check your audio."

	return f"📝 Hausa Transcription:\n\n{transcription}"

	except Exception as e:
	return f"❌ Transcription failed: {str(e)}"

	# Create Gradio interface
	demo = gr.Interface(
	fn=transcribe_audio,
	inputs=gr.Audio(
	sources=["microphone", "upload"],
	type="numpy",
	label="🎙️ Record or Upload Hausa Audio"
	),
	outputs=gr.Textbox(
	label="📋 Transcription Result",
	lines=5,
	placeholder="Your Hausa transcription will appear here..."
	),
	title="🎙️ Hausa Speech Transcription",
	description="""
	Upload an audio file or record directly to get Hausa transcription.

	Supported formats: WAV, MP3, OGG, M4A, FLAC

	Tips:
	- Speak clearly in Hausa
	- Keep recordings under 30 seconds for best results
	- Use good quality audio
	""",
	examples=[],
	theme=gr.themes.Soft(),
	allow_flagging="never"
	)

	if __name__ == "__main__":
	demo.launch()