Spaces:

st192011
/

EmoDB-ALM-Protocol

Running

st192011 commited on 10 days ago

Commit

2e1a0bf

verified ·

1 Parent(s): 18da8b7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,9 +11,12 @@ df_cached = pd.read_csv("emodb_full_zeroshot_predictions.csv")
 X_embeddings = np.load("emodb_full_embeddings.npy")
 print("🧠 Phase 2: Dynamically Training Both Linear Classification Heads...")
-# --- Head A: The Global 80/20 Head ---
-labels = df_cached['True_Emotion'].values
 indices = np.arange(len(labels))
 X_train, X_test, y_train, y_test, idx_train, idx_test = train_test_split(
     X_embeddings, labels, indices, test_size=0.20, random_state=42, stratify=labels
 )
@@ -22,13 +25,19 @@ global_head.fit(X_train, y_train)
 # --- Head B: The Cross-Speaker Head (Train on Speaker 31 & 34) ---
 train_speakers = ['Speaker_31.0', 'Speaker_34.0']
-cross_train_mask = df_cached['Speaker_Info'].isin(train_speakers)
 X_train_cross = X_embeddings[cross_train_mask]
-y_train_cross = df_cached[cross_train_mask]['True_Emotion'].values
 cross_head = LogisticRegression(max_iter=1000, class_weight='balanced', random_state=42)
 cross_head.fit(X_train_cross, y_train_cross)
 print("🌍 Phase 3: Attaching to EmoDB on Hugging Face Hub for Audio Streaming...")
 hf_dataset = load_dataset("harritaylor/er_emodb", split="train")

 X_embeddings = np.load("emodb_full_embeddings.npy")
 print("🧠 Phase 2: Dynamically Training Both Linear Classification Heads...")
+# 1. Cleanse PyArrow strings into native NumPy string arrays to avoid Python 3.13 indexing crashes
+labels = df_cached['True_Emotion'].to_numpy().astype(str)
 indices = np.arange(len(labels))
+# --- Head A: The Global 80/20 Head ---
 X_train, X_test, y_train, y_test, idx_train, idx_test = train_test_split(
     X_embeddings, labels, indices, test_size=0.20, random_state=42, stratify=labels
 )
 # --- Head B: The Cross-Speaker Head (Train on Speaker 31 & 34) ---
 train_speakers = ['Speaker_31.0', 'Speaker_34.0']
+# Convert the boolean mask to a native NumPy boolean array
+cross_train_mask = df_cached['Speaker_Info'].isin(train_speakers).to_numpy()
 X_train_cross = X_embeddings[cross_train_mask]
+# Index the native NumPy labels array directly using our mask
+y_train_cross = labels[cross_train_mask]
 cross_head = LogisticRegression(max_iter=1000, class_weight='balanced', random_state=42)
 cross_head.fit(X_train_cross, y_train_cross)
+print("✅ Classification heads successfully trained with native NumPy types!")
 print("🌍 Phase 3: Attaching to EmoDB on Hugging Face Hub for Audio Streaming...")
 hf_dataset = load_dataset("harritaylor/er_emodb", split="train")