Update app.py
Browse files
app.py
CHANGED
|
@@ -32,29 +32,29 @@ EMOTIONS = ['angry', 'disgust', 'fearful', 'happy', 'neutral', 'sad', 'surprised
|
|
| 32 |
|
| 33 |
def preprocess_audio(audio_path):
|
| 34 |
try:
|
| 35 |
-
# 1. Load audio
|
| 36 |
-
data, sr = librosa.load(audio_path, duration=2.5, offset=0.6)
|
| 37 |
|
| 38 |
-
# 2. Extract Features
|
| 39 |
-
# Feature 1: Zero Crossing Rate
|
| 40 |
zcr = librosa.feature.zero_crossing_rate(y=data)
|
| 41 |
-
|
| 42 |
-
# Feature 2: RMS (Energy)
|
| 43 |
rms = librosa.feature.rms(y=data)
|
| 44 |
-
|
| 45 |
-
# Feature 3: MFCC (Mel-frequency cepstral coefficients)
|
| 46 |
mfcc = librosa.feature.mfcc(y=data, sr=sr, n_mfcc=13)
|
| 47 |
|
| 48 |
-
# 3. Combine
|
| 49 |
-
#
|
| 50 |
features = np.concatenate((
|
| 51 |
np.mean(zcr, axis=1),
|
| 52 |
np.mean(rms, axis=1),
|
| 53 |
np.mean(mfcc, axis=1)
|
| 54 |
), axis=0)
|
| 55 |
|
| 56 |
-
#
|
| 57 |
-
# If
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
if len(features) < 2376:
|
| 59 |
features = np.pad(features, (0, 2376 - len(features)), 'constant')
|
| 60 |
else:
|
|
|
|
| 32 |
|
| 33 |
def preprocess_audio(audio_path):
|
| 34 |
try:
|
| 35 |
+
# 1. Load audio - using 2.5s duration as per Kaggle
|
| 36 |
+
data, sr = librosa.load(audio_path, duration=2.5, offset=0.6, res_type='kaiser_fast')
|
| 37 |
|
| 38 |
+
# 2. Extract Features
|
|
|
|
| 39 |
zcr = librosa.feature.zero_crossing_rate(y=data)
|
|
|
|
|
|
|
| 40 |
rms = librosa.feature.rms(y=data)
|
|
|
|
|
|
|
| 41 |
mfcc = librosa.feature.mfcc(y=data, sr=sr, n_mfcc=13)
|
| 42 |
|
| 43 |
+
# 3. Combine
|
| 44 |
+
# We use the mean across the time axis for each feature
|
| 45 |
features = np.concatenate((
|
| 46 |
np.mean(zcr, axis=1),
|
| 47 |
np.mean(rms, axis=1),
|
| 48 |
np.mean(mfcc, axis=1)
|
| 49 |
), axis=0)
|
| 50 |
|
| 51 |
+
# 4. MANUAL SCALING (CRITICAL)
|
| 52 |
+
# If you didn't upload your 'scaler.pkl', we must normalize manually.
|
| 53 |
+
# This prevents the model from being overwhelmed by volume differences.
|
| 54 |
+
features = (features - np.mean(features)) / (np.std(features) + 1e-5)
|
| 55 |
+
|
| 56 |
+
# 5. Reshape to match your (2376, 1) architecture
|
| 57 |
+
# If your combined features are shorter than 2376, we pad with zeros.
|
| 58 |
if len(features) < 2376:
|
| 59 |
features = np.pad(features, (0, 2376 - len(features)), 'constant')
|
| 60 |
else:
|