Spaces:

ML-with-Rajibul
/

Sentiment-based-Music-Therapy

Sleeping

App Files Files Community

Sentiment-based-Music-Therapy / SER.py

ML-with-Rajibul

Update SER.py

9fa244e verified almost 2 years ago

raw

history blame contribute delete

3.61 kB

	import pandas as pd
	import numpy as np

	import librosa

	import sklearn
	from sklearn.preprocessing import StandardScaler, OneHotEncoder
	from sklearn.model_selection import train_test_split

	import tensorflow as tf
	from keras.models import load_model

	import pickle

	sample_rate = 22050

	def noise(data):
	noise_value = 0.015 * np.random.uniform() * np.amax(data)
	data = data + noise_value * np.random.normal(size=data.shape[0])
	return data

	def stretch(data, rate=0.8):
	return librosa.effects.time_stretch(data, rate=rate)

	def shift(data):
	shift_range = int(np.random.uniform(low=-5, high=5) * 1000)
	return np.roll(data, shift_range)

	def pitch(data,sampling_rate,pitch_factor=0.7):
	return librosa.effects.pitch_shift(data,sr=sampling_rate, n_steps=pitch_factor)

	def extract_process(data):

	sample_rate = 22050
	output_result = np.array([])
	mean_zero = np.mean(librosa.feature.zero_crossing_rate(y=data).T,axis=0)
	output_result = np.hstack((output_result,mean_zero))

	stft_out = np.abs(librosa.stft(data))
	chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft_out,sr=sample_rate).T,axis=0)
	output_result = np.hstack((output_result,chroma_stft))

	mfcc_out = np.mean(librosa.feature.mfcc(y=data,sr=sample_rate).T,axis=0)
	output_result = np.hstack((output_result,mfcc_out))

	root_mean_out = np.mean(librosa.feature.rms(y=data).T,axis=0)
	output_result = np.hstack((output_result,root_mean_out))

	mel_spectogram = np.mean(librosa.feature.melspectrogram(y=data,sr=sample_rate).T,axis=0)
	output_result = np.hstack((output_result,mel_spectogram))

	return output_result

	def export_process(path):

	data,sample_rate = librosa.load(path,duration=5,offset=1)

	output_1 = extract_process(data)
	result = np.array(output_1)

	noise_out = noise(data)
	output_2 = extract_process(noise_out)
	result = np.vstack((result,output_2))

	new_out = stretch(data)
	strectch_pitch = pitch(new_out,sample_rate)
	output_3 = extract_process(strectch_pitch)
	result = np.vstack((result,output_3))

	return result

	# Load X_train from Google Drive
	with open('X_train.pkl', 'rb') as f:
	X_train = pickle.load(f)

	# Load X_train from Google Drive
	with open('Y_train.pkl', 'rb') as f:
	Y_train = pickle.load(f)

	Features = pd.DataFrame(X_train)
	Features['labels'] = Y_train

	X = Features.iloc[: ,:-1].values
	Y = Features['labels'].values

	encoder_label = OneHotEncoder()
	Y = encoder_label.fit_transform(np.array(Y).reshape(-1,1)).toarray()

	x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size=0.9, random_state=42, shuffle=True)

	scaler_data = StandardScaler()
	x_train = scaler_data.fit_transform(x_train)
	x_test = scaler_data.transform(x_test)

	def preprocess_audio(audio):
	#data, sample_rate = librosa.load(audio, duration=2.5, offset=0.6)
	features = export_process(audio)
	features = scaler_data.transform(features)
	return np.expand_dims(features, axis=2)

	# Function to predict emotion from preprocessed audio
	def predict_emotion(preprocessed_audio):
	model = load_model('speech-emotion-recognition.hdf5')
	prediction = model.predict(preprocessed_audio)
	predicted_emotion = encoder_label.inverse_transform(prediction)
	return predicted_emotion[0]

	# Live emotion recognition
	def live_emotion_recognition(audio_path):
	# Preprocess live audio
	preprocessed_audio = preprocess_audio(audio_path)
	# Predict emotion
	predicted_emotion = predict_emotion(preprocessed_audio)
	#print("Predicted Emotion:", predicted_emotion)
	return predicted_emotion[0]