Bikas0
/

Audio-Voice-Multi-Class-Classifications

Model card Files Files and versions

Bikas0 commited on Aug 11, 2025

Commit

ccbabbb

·

verified ·

1 Parent(s): 536b0f9

inference

Files changed (1) hide show

main.py +47 -0

main.py CHANGED Viewed

	@@ -0,0 +1,47 @@

+import librosa
+import numpy as np
+from tensorflow.keras.models import load_model
+# Load your trained model
+model = load_model('/kaggle/working/Dubai_Audio.h5')
+speaker_to_int = {
+    'Brene Brown': 0,
+    'Eckhart Tolle': 1,
+    'Eric Thomas': 2,
+    'Gary Vee': 3,
+    'Jay Shetty': 4,
+    'Les Brown': 5,
+    'Mel Robbins': 6,
+    'Nick Vujicic': 7,
+    'Oprah Winfrey': 8,
+    'Rabin Sharma': 9,
+    'Simon Sinek': 10
+}
+def preprocess_audio(file_path, n_mfcc=40, max_pad_len=216):
+    audio, sample_rate = librosa.load(file_path, sr=None)
+    mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
+    if mfcc.shape[1] < max_pad_len:
+        pad_width = max_pad_len - mfcc.shape[1]
+        mfcc = np.pad(mfcc, pad_width=((0,0), (0,pad_width)), mode='constant')
+    else:
+        mfcc = mfcc[:, :max_pad_len]
+    mfcc = mfcc[np.newaxis, ..., np.newaxis]
+    return mfcc
+def predict_audio_class(file_path, model, int_to_speaker):
+    processed_audio = preprocess_audio(file_path)
+    predictions = model.predict(processed_audio)
+    predicted_index = np.argmax(predictions, axis=1)[0]
+    print(predicted_index)
+    predicted_class = int_to_speaker[predicted_index]
+    return predicted_class
+# Reverse mapping dictionary
+int_to_speaker = {v: k for k, v in speaker_to_int.items()}
+# Example usage
+uploaded_audio_path = '/kaggle/input/audio-classifier-dataset/augmented-audio/Mel Robbins/113Mel Robbins113.wav'
+predicted_class = predict_audio_class(uploaded_audio_path, model, int_to_speaker)
+print(f'Predicted class: {predicted_class}')