notuser77 commited on
Commit
3c7ab19
·
verified ·
1 Parent(s): 76d8f21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -51
app.py CHANGED
@@ -1,103 +1,87 @@
1
  import os
 
 
 
 
 
 
 
2
  import huggingface_hub
3
  from speechbrain.inference.classifiers import EncoderClassifier
4
 
5
- # 1. Corrected Monkey Patch for SpeechBrain 1.0.0 + huggingface-hub 0.23+
 
6
  orig_download = huggingface_hub.hf_hub_download
7
 
8
  def patched_download(*args, **kwargs):
9
- # Fix the 'use_auth_token' vs 'token' renaming issue
10
  if 'use_auth_token' in kwargs:
11
  kwargs['token'] = kwargs.pop('use_auth_token')
12
 
13
- # Get the requested filename
14
  fname = kwargs.get('filename') or (args[1] if len(args) > 1 else None)
15
 
16
  try:
17
  return orig_download(*args, **kwargs)
18
  except Exception as e:
19
- # If 'custom.py' is missing (404), return a dummy file path instead of None
20
- # to prevent the 'NoneType' crash in pathlib.
21
  if fname == "custom.py" and ("404" in str(e) or "Not Found" in str(e)):
22
  dummy_path = os.path.abspath("dummy_custom.py")
23
  if not os.path.exists(dummy_path):
24
  with open(dummy_path, "w") as f:
25
- f.write("# Dummy file for SpeechBrain compatibility\n")
26
  return dummy_path
27
  raise e
28
 
29
  huggingface_hub.hf_hub_download = patched_download
30
-
31
- # 2. Load the model
32
- print("Loading SpeechBrain ECAPA feature extractor...")
33
- feature_extractor = EncoderClassifier.from_hparams(
34
- source="speechbrain/spkrec-ecapa-voxceleb",
35
- savedir="pretrained_models/spkrec-ecapa-voxceleb"
36
- )
37
- # Import SpeechBrain AFTER the patch
38
- from speechbrain.inference.speaker import EncoderClassifier
39
-
40
- # Suppress scikit-learn version warnings
41
  warnings.filterwarnings("ignore")
42
 
43
- # --- STEP 2: LOAD MODELS ---
44
- # We check for the specific filenames you uploaded
45
  MODEL_PATH = 'ravdess_svm_speechbrain_ecapa_voxceleb_no_processor_cv_8class.pkl'
46
  if not os.path.exists(MODEL_PATH):
47
  MODEL_PATH = 'svm_model.joblib'
48
 
49
  print(f"Loading SVM classifier: {MODEL_PATH}")
50
- model = joblib.load(MODEL_PATH)
51
 
 
52
  print("Loading SpeechBrain ECAPA feature extractor...")
53
- # This will now successfully skip the missing custom.py
54
  feature_extractor = EncoderClassifier.from_hparams(
55
  source="speechbrain/spkrec-ecapa-voxceleb",
56
- savedir="pretrained_models/ecapa"
57
  )
58
 
59
- # --- STEP 3: PREDICTION LOGIC ---
 
 
60
  def predict_emotion(audio_path):
61
  if audio_path is None:
62
  return "Please upload an audio file."
63
-
64
- # Load audio and resample to 16kHz (ECAPA requirement)
65
  signal, fs = torchaudio.load(audio_path)
66
- if fs != 16000:
67
- resampler = torchaudio.transforms.Resample(orig_freq=fs, new_freq=16000)
68
- signal = resampler(signal)
69
 
70
- # Mono conversion
71
- if signal.shape[0] > 1:
72
- signal = torch.mean(signal, dim=0, keepdim=True)
73
-
74
- # Extract 192-D ECAPA-TDNN Embeddings
75
  with torch.no_grad():
76
  embeddings = feature_extractor.encode_batch(signal)
77
- embeddings = embeddings.squeeze().cpu().numpy().reshape(1, -1)
 
78
 
79
- # Create DataFrame with the specific feature names expected by your SVM
80
- feature_names = [f"{i}_speechbrain_embedding" for i in range(192)]
81
- X = pd.DataFrame(embeddings, columns=feature_names)
82
 
83
- # Predict Emotion
84
- try:
85
- # Returns a dictionary of {Emotion: Confidence}
86
- probs = model.predict_proba(X)[0]
87
- return {str(model.classes_[i]): float(probs[i]) for i in range(len(model.classes_))}
88
- except Exception:
89
- # Fallback if probability was not enabled during training
90
- prediction = model.predict(X)[0]
91
- return str(prediction)
92
 
93
- # --- STEP 4: GRADIO INTERFACE ---
94
  demo = gr.Interface(
95
  fn=predict_emotion,
96
- inputs=gr.Audio(type="filepath", label="Upload Audio (WAV/MP3)"),
97
  outputs=gr.Label(label="Detected Emotion"),
98
- title="RAVDESS Emotion Classifier",
99
- description="Classifies emotions using ECAPA-TDNN speaker embeddings and a Support Vector Machine.",
100
- allow_flagging="never"
101
  )
102
 
103
  if __name__ == "__main__":
 
1
  import os
2
+ import joblib
3
+ import pandas as pd
4
+ import numpy as np
5
+ import torch
6
+ import torchaudio
7
+ import warnings # <--- This fixes the NameError
8
+ import gradio as gr
9
  import huggingface_hub
10
  from speechbrain.inference.classifiers import EncoderClassifier
11
 
12
+ # 1. ROBUST MONKEY PATCH
13
+ # This fixes the 'use_auth_token' vs 'token' error and the 'NoneType' crash
14
  orig_download = huggingface_hub.hf_hub_download
15
 
16
  def patched_download(*args, **kwargs):
 
17
  if 'use_auth_token' in kwargs:
18
  kwargs['token'] = kwargs.pop('use_auth_token')
19
 
 
20
  fname = kwargs.get('filename') or (args[1] if len(args) > 1 else None)
21
 
22
  try:
23
  return orig_download(*args, **kwargs)
24
  except Exception as e:
25
+ # If SpeechBrain looks for 'custom.py' and it's missing (404),
26
+ # return a dummy file path instead of None to prevent a crash.
27
  if fname == "custom.py" and ("404" in str(e) or "Not Found" in str(e)):
28
  dummy_path = os.path.abspath("dummy_custom.py")
29
  if not os.path.exists(dummy_path):
30
  with open(dummy_path, "w") as f:
31
+ f.write("# Dummy file for compatibility\n")
32
  return dummy_path
33
  raise e
34
 
35
  huggingface_hub.hf_hub_download = patched_download
 
 
 
 
 
 
 
 
 
 
 
36
  warnings.filterwarnings("ignore")
37
 
38
+ # 2. LOAD MODELS
39
+ # Load your SVM Classifier (trying both possible filenames)
40
  MODEL_PATH = 'ravdess_svm_speechbrain_ecapa_voxceleb_no_processor_cv_8class.pkl'
41
  if not os.path.exists(MODEL_PATH):
42
  MODEL_PATH = 'svm_model.joblib'
43
 
44
  print(f"Loading SVM classifier: {MODEL_PATH}")
45
+ svm_model = joblib.load(MODEL_PATH)
46
 
47
+ # Load SpeechBrain Feature Extractor
48
  print("Loading SpeechBrain ECAPA feature extractor...")
 
49
  feature_extractor = EncoderClassifier.from_hparams(
50
  source="speechbrain/spkrec-ecapa-voxceleb",
51
+ savedir="pretrained_models/spkrec-ecapa-voxceleb"
52
  )
53
 
54
+ # 3. DEFINE INFERENCE
55
+ EMOTIONS = ['neutral', 'calm', 'happy', 'sad', 'angry', 'fearful', 'disgust', 'surprised']
56
+
57
  def predict_emotion(audio_path):
58
  if audio_path is None:
59
  return "Please upload an audio file."
60
+
61
+ # Load and Preprocess Audio
62
  signal, fs = torchaudio.load(audio_path)
 
 
 
63
 
64
+ # Extract ECAPA-TDNN Embeddings
 
 
 
 
65
  with torch.no_grad():
66
  embeddings = feature_extractor.encode_batch(signal)
67
+ # Convert to numpy and flatten (SVM expects 1D array of 192 features)
68
+ embeddings = embeddings.cpu().numpy().squeeze().reshape(1, -1)
69
 
70
+ # Predict with SVM
71
+ prediction = svm_model.predict(embeddings)[0]
 
72
 
73
+ # Return mapped label if numeric, otherwise return string
74
+ if isinstance(prediction, (int, np.integer)):
75
+ return EMOTIONS[prediction]
76
+ return prediction
 
 
 
 
 
77
 
78
+ # 4. GRADIO INTERFACE
79
  demo = gr.Interface(
80
  fn=predict_emotion,
81
+ inputs=gr.Audio(type="filepath", label="Upload Speech (WAV)"),
82
  outputs=gr.Label(label="Detected Emotion"),
83
+ title="Speech Emotion Recognition (RAVDESS)",
84
+ description="This app uses SpeechBrain ECAPA-TDNN embeddings and a pre-trained SVM to classify emotions."
 
85
  )
86
 
87
  if __name__ == "__main__":