st192011 commited on
Commit
2e1a0bf
·
verified ·
1 Parent(s): 18da8b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -4
app.py CHANGED
@@ -11,9 +11,12 @@ df_cached = pd.read_csv("emodb_full_zeroshot_predictions.csv")
11
  X_embeddings = np.load("emodb_full_embeddings.npy")
12
 
13
  print("🧠 Phase 2: Dynamically Training Both Linear Classification Heads...")
14
- # --- Head A: The Global 80/20 Head ---
15
- labels = df_cached['True_Emotion'].values
 
16
  indices = np.arange(len(labels))
 
 
17
  X_train, X_test, y_train, y_test, idx_train, idx_test = train_test_split(
18
  X_embeddings, labels, indices, test_size=0.20, random_state=42, stratify=labels
19
  )
@@ -22,13 +25,19 @@ global_head.fit(X_train, y_train)
22
 
23
  # --- Head B: The Cross-Speaker Head (Train on Speaker 31 & 34) ---
24
  train_speakers = ['Speaker_31.0', 'Speaker_34.0']
25
- cross_train_mask = df_cached['Speaker_Info'].isin(train_speakers)
 
 
 
26
  X_train_cross = X_embeddings[cross_train_mask]
27
- y_train_cross = df_cached[cross_train_mask]['True_Emotion'].values
 
28
 
29
  cross_head = LogisticRegression(max_iter=1000, class_weight='balanced', random_state=42)
30
  cross_head.fit(X_train_cross, y_train_cross)
31
 
 
 
32
  print("🌍 Phase 3: Attaching to EmoDB on Hugging Face Hub for Audio Streaming...")
33
  hf_dataset = load_dataset("harritaylor/er_emodb", split="train")
34
 
 
11
  X_embeddings = np.load("emodb_full_embeddings.npy")
12
 
13
  print("🧠 Phase 2: Dynamically Training Both Linear Classification Heads...")
14
+
15
+ # 1. Cleanse PyArrow strings into native NumPy string arrays to avoid Python 3.13 indexing crashes
16
+ labels = df_cached['True_Emotion'].to_numpy().astype(str)
17
  indices = np.arange(len(labels))
18
+
19
+ # --- Head A: The Global 80/20 Head ---
20
  X_train, X_test, y_train, y_test, idx_train, idx_test = train_test_split(
21
  X_embeddings, labels, indices, test_size=0.20, random_state=42, stratify=labels
22
  )
 
25
 
26
  # --- Head B: The Cross-Speaker Head (Train on Speaker 31 & 34) ---
27
  train_speakers = ['Speaker_31.0', 'Speaker_34.0']
28
+
29
+ # Convert the boolean mask to a native NumPy boolean array
30
+ cross_train_mask = df_cached['Speaker_Info'].isin(train_speakers).to_numpy()
31
+
32
  X_train_cross = X_embeddings[cross_train_mask]
33
+ # Index the native NumPy labels array directly using our mask
34
+ y_train_cross = labels[cross_train_mask]
35
 
36
  cross_head = LogisticRegression(max_iter=1000, class_weight='balanced', random_state=42)
37
  cross_head.fit(X_train_cross, y_train_cross)
38
 
39
+ print("✅ Classification heads successfully trained with native NumPy types!")
40
+
41
  print("🌍 Phase 3: Attaching to EmoDB on Hugging Face Hub for Audio Streaming...")
42
  hf_dataset = load_dataset("harritaylor/er_emodb", split="train")
43