JagjeevanAK
/

Speech-emotion-detection

Audio Classification

emotion-recognition

Eval Results (legacy)

Model card Files Files and versions

Speech-emotion-detection / predict.py

JagjeevanAK's picture

Upload folder using huggingface_hub

9401b94 verified 9 months ago

history blame contribute delete

2.75 kB

	"""
	Speech Emotion Recognition Inference Script
	"""

	import librosa
	import numpy as np
	import tensorflow as tf
	from tensorflow.keras.models import load_model
	from sklearn.preprocessing import LabelEncoder
	import argparse

	def extract_feature(data, sr, mfcc=True, chroma=True, mel=True):
	"""
	Extract features from audio files into numpy array
	"""
	result = np.array([])
	if mfcc:
	mfccs = np.mean(librosa.feature.mfcc(y=data, sr=sr, n_mfcc=40).T, axis=0)
	result = np.hstack((result, mfccs))
	if chroma:
	stft = np.abs(librosa.stft(data))
	chroma_feat = np.mean(librosa.feature.chroma_stft(S=stft, sr=sr).T, axis=0)
	result = np.hstack((result, chroma_feat))
	if mel:
	mel_feat = np.mean(librosa.feature.melspectrogram(y=data, sr=sr).T, axis=0)
	result = np.hstack((result, mel_feat))
	return result

	def predict_emotion(audio_path, model_path='trained_model.h5'):
	"""
	Predict emotion from audio file
	"""
	# Load audio
	data, sr = librosa.load(audio_path, sr=22050)

	# Extract features
	feature = extract_feature(data, sr, mfcc=True, chroma=True, mel=True)
	feature = np.expand_dims(feature, axis=0)
	feature = np.expand_dims(feature, axis=2)

	# Load model and predict
	model = load_model(model_path)
	prediction = model.predict(feature)
	predicted_class = np.argmax(prediction, axis=1)

	# Map to emotion labels
	emotions = {
	'01': 'Neutral',
	'02': 'Calm',
	'03': 'Happy',
	'04': 'Sad',
	'05': 'Angry',
	'06': 'Fearful',
	'07': 'Disgust',
	'08': 'Surprised'
	}

	emojis = {
	'Neutral': '😐',
	'Calm': '😌',
	'Happy': '😊',
	'Sad': '😢',
	'Angry': '😠',
	'Fearful': '😨',
	'Disgust': '🤢',
	'Surprised': '😲'
	}

	label_encoder = LabelEncoder()
	label_encoder.fit(list(emotions.values()))
	predicted_emotion = label_encoder.inverse_transform(predicted_class)[0]

	return predicted_emotion, emojis[predicted_emotion], prediction[0]

	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description='Predict emotion from audio file')
	parser.add_argument('audio_path', help='Path to audio file')
	parser.add_argument('--model', default='trained_model.h5', help='Path to model file')

	args = parser.parse_args()

	try:
	emotion, emoji, confidence = predict_emotion(args.audio_path, args.model)
	print(f"Predicted Emotion: {emotion} {emoji}")
	print(f"Confidence scores: {confidence}")
	except Exception as e:
	print(f"Error: {e}")