Spaces:

E-motionAssistant
/

Space5

Running

Space5 / app.py

Update app.py

cf64064 verified 11 days ago

1.63 kB

	import gradio as gr
	import torch
	import librosa
	from transformers import Wav2Vec2FeatureExtractor, HubertForSequenceClassification

	# Load model and processor
	model_id = "superb/hubert-base-superb-er"
	feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(model_id)
	model = HubertForSequenceClassification.from_pretrained(model_id)

	def predict_emotion(audio):
	if audio is None:
	return "Please upload an audio file."

	# Load and resample audio to 16kHz
	# Gradio provides the path to the temporary file
	speech, sr = librosa.load(audio, sr=16000)

	# Preprocess
	inputs = feature_extractor(speech, sampling_rate=16000, return_tensors="pt", padding=True)

	# Inference
	with torch.no_grad():
	logits = model(**inputs).logits

	# Get probabilities via Softmax
	probs = torch.nn.functional.softmax(logits, dim=-1)

	# Map to labels
	# Model labels: 0: neu, 1: hap, 2: ang, 3: sad
	labels = ["Neutral", "Happy", "Angry", "Sad"]
	results = {labels[i]: float(probs[0][i]) for i in range(len(labels))}

	return results

	# Define the Gradio Interface
	demo = gr.Interface(
	fn=predict_emotion,
	inputs=gr.Audio(type="filepath", label="Upload Audio or Record"),
	outputs=gr.Label(label="Detected Emotion"),
	title="HuBERT Emotion Recognition",
	description="Upload an audio clip to detect the primary emotion. This model (hubert-base-superb-er) is fine-tuned for Neutral, Happy, Angry, and Sad classifications.",
	examples=[], # You can add paths to example .wav files here
	theme="soft"
	)

	if __name__ == "__main__":
	demo.launch()