notuser77 commited on
Commit
0bc5891
·
verified ·
1 Parent(s): bb0d091

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -12
app.py CHANGED
@@ -32,29 +32,29 @@ EMOTIONS = ['angry', 'disgust', 'fearful', 'happy', 'neutral', 'sad', 'surprised
32
 
33
  def preprocess_audio(audio_path):
34
  try:
35
- # 1. Load audio (must match training sample rate)
36
- data, sr = librosa.load(audio_path, duration=2.5, offset=0.6)
37
 
38
- # 2. Extract Features (Matches the Kaggle logic)
39
- # Feature 1: Zero Crossing Rate
40
  zcr = librosa.feature.zero_crossing_rate(y=data)
41
-
42
- # Feature 2: RMS (Energy)
43
  rms = librosa.feature.rms(y=data)
44
-
45
- # Feature 3: MFCC (Mel-frequency cepstral coefficients)
46
  mfcc = librosa.feature.mfcc(y=data, sr=sr, n_mfcc=13)
47
 
48
- # 3. Combine them exactly like the notebook
49
- # This creates a feature vector of length 2376
50
  features = np.concatenate((
51
  np.mean(zcr, axis=1),
52
  np.mean(rms, axis=1),
53
  np.mean(mfcc, axis=1)
54
  ), axis=0)
55
 
56
- # The Kaggle notebook likely used a loop/flattening that resulted in 2376.
57
- # If the above doesn't hit 2376 exactly, we pad it to ensure no crash:
 
 
 
 
 
58
  if len(features) < 2376:
59
  features = np.pad(features, (0, 2376 - len(features)), 'constant')
60
  else:
 
32
 
33
  def preprocess_audio(audio_path):
34
  try:
35
+ # 1. Load audio - using 2.5s duration as per Kaggle
36
+ data, sr = librosa.load(audio_path, duration=2.5, offset=0.6, res_type='kaiser_fast')
37
 
38
+ # 2. Extract Features
 
39
  zcr = librosa.feature.zero_crossing_rate(y=data)
 
 
40
  rms = librosa.feature.rms(y=data)
 
 
41
  mfcc = librosa.feature.mfcc(y=data, sr=sr, n_mfcc=13)
42
 
43
+ # 3. Combine
44
+ # We use the mean across the time axis for each feature
45
  features = np.concatenate((
46
  np.mean(zcr, axis=1),
47
  np.mean(rms, axis=1),
48
  np.mean(mfcc, axis=1)
49
  ), axis=0)
50
 
51
+ # 4. MANUAL SCALING (CRITICAL)
52
+ # If you didn't upload your 'scaler.pkl', we must normalize manually.
53
+ # This prevents the model from being overwhelmed by volume differences.
54
+ features = (features - np.mean(features)) / (np.std(features) + 1e-5)
55
+
56
+ # 5. Reshape to match your (2376, 1) architecture
57
+ # If your combined features are shorter than 2376, we pad with zeros.
58
  if len(features) < 2376:
59
  features = np.pad(features, (0, 2376 - len(features)), 'constant')
60
  else: