Spaces:

Ansu
/

mHubert-Basque-ASR-demo

Running

mHubert-Basque-ASR-demo / app.py

Update app.py

f971019 verified 9 months ago

1.24 kB

	import gradio as gr
	import torch
	from transformers import HubertForCTC, Wav2Vec2Processor
	import librosa

	# Load the model and processor from Hugging Face Hub
	model_name = "Ansu/mHubert-basque-ASR" # Change this to your model
	processor = Wav2Vec2Processor.from_pretrained(model_name)
	model = HubertForCTC.from_pretrained(model_name)

	# Function to transcribe audio
	def transcribe(audio):
	# Load audio file
	audio, _ = librosa.load(audio, sr=16000)

	# Process input
	inputs = processor(audio, sampling_rate=16000, return_tensors="pt", padding=True)

	# Get model predictions
	with torch.no_grad():
	logits = model(inputs.input_values).logits
	predicted_ids = torch.argmax(logits, dim=-1)

	# Decode predictions
	transcription = processor.batch_decode(predicted_ids)[0]

	return transcription

	# Create Gradio interface
	iface = gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(sources=["upload", "microphone"], type="filepath", label="🎤 Upload or Record Audio"),
	outputs="text",
	title="HuBERT ASR Demo",
	description="🎙️ Speak into the microphone or upload an audio file to get a transcription.",
	live=True, # Enables real-time recording
	)

	iface.launch()