Spaces:

notuser77
/

emotion

Sleeping

notuser77 commited on Dec 20, 2025

Commit

0bc5891

verified ·

1 Parent(s): bb0d091

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -32,29 +32,29 @@ EMOTIONS = ['angry', 'disgust', 'fearful', 'happy', 'neutral', 'sad', 'surprised
 def preprocess_audio(audio_path):
     try:
-        # 1. Load audio (must match training sample rate)
-        data, sr = librosa.load(audio_path, duration=2.5, offset=0.6)
-        # 2. Extract Features (Matches the Kaggle logic)
-        # Feature 1: Zero Crossing Rate
         zcr = librosa.feature.zero_crossing_rate(y=data)
-        # Feature 2: RMS (Energy)
         rms = librosa.feature.rms(y=data)
-        # Feature 3: MFCC (Mel-frequency cepstral coefficients)
         mfcc = librosa.feature.mfcc(y=data, sr=sr, n_mfcc=13)
-        # 3. Combine them exactly like the notebook
-        # This creates a feature vector of length 2376
         features = np.concatenate((
             np.mean(zcr, axis=1),
             np.mean(rms, axis=1),
             np.mean(mfcc, axis=1)
         ), axis=0)
-        # The Kaggle notebook likely used a loop/flattening that resulted in 2376.
-        # If the above doesn't hit 2376 exactly, we pad it to ensure no crash:
         if len(features) < 2376:
             features = np.pad(features, (0, 2376 - len(features)), 'constant')
         else:

 def preprocess_audio(audio_path):
     try:
+        # 1. Load audio - using 2.5s duration as per Kaggle
+        data, sr = librosa.load(audio_path, duration=2.5, offset=0.6, res_type='kaiser_fast')
+        # 2. Extract Features
         zcr = librosa.feature.zero_crossing_rate(y=data)
         rms = librosa.feature.rms(y=data)
         mfcc = librosa.feature.mfcc(y=data, sr=sr, n_mfcc=13)
+        # 3. Combine
+        # We use the mean across the time axis for each feature
         features = np.concatenate((
             np.mean(zcr, axis=1),
             np.mean(rms, axis=1),
             np.mean(mfcc, axis=1)
         ), axis=0)
+        # 4. MANUAL SCALING (CRITICAL)
+        # If you didn't upload your 'scaler.pkl', we must normalize manually.
+        # This prevents the model from being overwhelmed by volume differences.
+        features = (features - np.mean(features)) / (np.std(features) + 1e-5)
+        # 5. Reshape to match your (2376, 1) architecture
+        # If your combined features are shorter than 2376, we pad with zeros.
         if len(features) < 2376:
             features = np.pad(features, (0, 2376 - len(features)), 'constant')
         else: