|
|
import librosa |
|
|
import numpy as np |
|
|
from tensorflow.keras.models import load_model |
|
|
|
|
|
|
|
|
model = load_model('/kaggle/working/Dubai_Audio.h5') |
|
|
speaker_to_int = { |
|
|
'Brene Brown': 0, |
|
|
'Eckhart Tolle': 1, |
|
|
'Eric Thomas': 2, |
|
|
'Gary Vee': 3, |
|
|
'Jay Shetty': 4, |
|
|
'Les Brown': 5, |
|
|
'Mel Robbins': 6, |
|
|
'Nick Vujicic': 7, |
|
|
'Oprah Winfrey': 8, |
|
|
'Rabin Sharma': 9, |
|
|
'Simon Sinek': 10 |
|
|
} |
|
|
|
|
|
def preprocess_audio(file_path, n_mfcc=40, max_pad_len=216): |
|
|
audio, sample_rate = librosa.load(file_path, sr=None) |
|
|
mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc) |
|
|
if mfcc.shape[1] < max_pad_len: |
|
|
pad_width = max_pad_len - mfcc.shape[1] |
|
|
mfcc = np.pad(mfcc, pad_width=((0,0), (0,pad_width)), mode='constant') |
|
|
else: |
|
|
mfcc = mfcc[:, :max_pad_len] |
|
|
mfcc = mfcc[np.newaxis, ..., np.newaxis] |
|
|
return mfcc |
|
|
|
|
|
def predict_audio_class(file_path, model, int_to_speaker): |
|
|
processed_audio = preprocess_audio(file_path) |
|
|
predictions = model.predict(processed_audio) |
|
|
predicted_index = np.argmax(predictions, axis=1)[0] |
|
|
print(predicted_index) |
|
|
predicted_class = int_to_speaker[predicted_index] |
|
|
return predicted_class |
|
|
|
|
|
|
|
|
int_to_speaker = {v: k for k, v in speaker_to_int.items()} |
|
|
|
|
|
|
|
|
uploaded_audio_path = '/kaggle/input/audio-classifier-dataset/augmented-audio/Mel Robbins/113Mel Robbins113.wav' |
|
|
|
|
|
predicted_class = predict_audio_class(uploaded_audio_path, model, int_to_speaker) |
|
|
print(f'Predicted class: {predicted_class}') |
|
|
|