Bikas0 commited on
Commit
ccbabbb
·
verified ·
1 Parent(s): 536b0f9
Files changed (1) hide show
  1. main.py +47 -0
main.py CHANGED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ import numpy as np
3
+ from tensorflow.keras.models import load_model
4
+
5
+ # Load your trained model
6
+ model = load_model('/kaggle/working/Dubai_Audio.h5')
7
+ speaker_to_int = {
8
+ 'Brene Brown': 0,
9
+ 'Eckhart Tolle': 1,
10
+ 'Eric Thomas': 2,
11
+ 'Gary Vee': 3,
12
+ 'Jay Shetty': 4,
13
+ 'Les Brown': 5,
14
+ 'Mel Robbins': 6,
15
+ 'Nick Vujicic': 7,
16
+ 'Oprah Winfrey': 8,
17
+ 'Rabin Sharma': 9,
18
+ 'Simon Sinek': 10
19
+ }
20
+
21
+ def preprocess_audio(file_path, n_mfcc=40, max_pad_len=216):
22
+ audio, sample_rate = librosa.load(file_path, sr=None)
23
+ mfcc = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=n_mfcc)
24
+ if mfcc.shape[1] < max_pad_len:
25
+ pad_width = max_pad_len - mfcc.shape[1]
26
+ mfcc = np.pad(mfcc, pad_width=((0,0), (0,pad_width)), mode='constant')
27
+ else:
28
+ mfcc = mfcc[:, :max_pad_len]
29
+ mfcc = mfcc[np.newaxis, ..., np.newaxis]
30
+ return mfcc
31
+
32
+ def predict_audio_class(file_path, model, int_to_speaker):
33
+ processed_audio = preprocess_audio(file_path)
34
+ predictions = model.predict(processed_audio)
35
+ predicted_index = np.argmax(predictions, axis=1)[0]
36
+ print(predicted_index)
37
+ predicted_class = int_to_speaker[predicted_index]
38
+ return predicted_class
39
+
40
+ # Reverse mapping dictionary
41
+ int_to_speaker = {v: k for k, v in speaker_to_int.items()}
42
+
43
+ # Example usage
44
+ uploaded_audio_path = '/kaggle/input/audio-classifier-dataset/augmented-audio/Mel Robbins/113Mel Robbins113.wav'
45
+
46
+ predicted_class = predict_audio_class(uploaded_audio_path, model, int_to_speaker)
47
+ print(f'Predicted class: {predicted_class}')