Spaces:
Sleeping
Sleeping
| import torch | |
| import torch.nn as nn | |
| import torch.optim as optim | |
| from torch.utils.data import Dataset, DataLoader | |
| import pandas as pd | |
| import numpy as np | |
| from pathlib import Path | |
| from sklearn.metrics import classification_report, confusion_matrix | |
| import os | |
| # 1. Dataset Class | |
| class EmotionEmbeddingDataset(Dataset): | |
| def __init__(self, metadata_df, embedding_dir, label_map): | |
| # Reset index so iloc matches current dataframe length | |
| self.df = metadata_df.reset_index(drop=True) | |
| self.embedding_dir = Path(embedding_dir) | |
| self.label_map = label_map | |
| # Pre-filter to only files that exist in the embedding dir | |
| self.valid_indices = [] | |
| for idx, row in self.df.iterrows(): | |
| stem = Path(row['path']).stem | |
| # This matches the naming in build_features.py | |
| emb_path = self.embedding_dir / f"{row['split']}_{row['emotion']}_{stem}.npy" | |
| if emb_path.exists(): | |
| self.valid_indices.append((idx, str(emb_path))) | |
| print(f"Loaded {len(self.valid_indices)} valid embeddings.") | |
| def __len__(self): | |
| return len(self.valid_indices) | |
| def __getitem__(self, idx): | |
| original_idx, emb_path = self.valid_indices[idx] | |
| embedding = np.load(emb_path) | |
| label_str = self.df.iloc[original_idx]['emotion'] | |
| label = self.label_map[label_str] | |
| return torch.tensor(embedding, dtype=torch.float32), torch.tensor(label, dtype=torch.long) | |
| # 2. Simple Neural Network Architecture | |
| class EmotionClassifier(nn.Module): | |
| def __init__(self, input_dim=768, hidden_dim=256, num_classes=7): | |
| super(EmotionClassifier, self).__init__() | |
| self.network = nn.Sequential( | |
| nn.Linear(input_dim, hidden_dim), | |
| nn.BatchNorm1d(hidden_dim), | |
| nn.ReLU(), | |
| nn.Dropout(0.3), | |
| nn.Linear(hidden_dim, hidden_dim // 2), | |
| nn.ReLU(), | |
| nn.Linear(hidden_dim // 2, num_classes) | |
| ) | |
| def forward(self, x): | |
| return self.network(x) | |
| def train_model(): | |
| # Setup paths | |
| METADATA = "data/processed/metadata.csv" | |
| EMB_DIR = "data/embeddings/wav2vec2" | |
| MODEL_SAVE_PATH = "models/emotion_classifier.pth" | |
| # Label Mapping | |
| df = pd.read_csv(METADATA) | |
| emotions = sorted(df['emotion'].unique()) | |
| label_map = {name: i for i, name in enumerate(emotions)} | |
| print(f"Label Map: {label_map}") | |
| # Prepare DataLoaders | |
| train_ds = EmotionEmbeddingDataset(df[df['split']=='train'], EMB_DIR, label_map) | |
| val_ds = EmotionEmbeddingDataset(df[df['split']=='val'], EMB_DIR, label_map) | |
| train_loader = DataLoader(train_ds, batch_size=64, shuffle=True) | |
| val_loader = DataLoader(val_ds, batch_size=64) | |
| # Initialize Model | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model = EmotionClassifier(num_classes=len(emotions)).to(device) | |
| criterion = nn.CrossEntropyLoss() | |
| optimizer = optim.Adam(model.parameters(), lr=0.001) | |
| # Training Loop | |
| epochs = 50 | |
| best_val_acc = 0.0 | |
| print("\nStarting Training...") | |
| for epoch in range(epochs): | |
| model.train() | |
| train_loss = 0 | |
| for x, y in train_loader: | |
| x, y = x.to(device), y.to(device) | |
| optimizer.zero_grad() | |
| outputs = model(x) | |
| loss = criterion(outputs, y) | |
| loss.backward() | |
| optimizer.step() | |
| train_loss += loss.item() | |
| # Validation | |
| model.eval() | |
| correct = 0 | |
| total = 0 | |
| with torch.no_grad(): | |
| for x, y in val_loader: | |
| x, y = x.to(device), y.to(device) | |
| outputs = model(x) | |
| _, predicted = torch.max(outputs.data, 1) | |
| total += y.size(0) | |
| correct += (predicted == y).sum().item() | |
| val_acc = 100 * correct / total | |
| print(f"Epoch {epoch+1}/{epochs} | Loss: {train_loss/len(train_loader):.4f} | Val Acc: {val_acc:.2f}%") | |
| if val_acc > best_val_acc: | |
| best_val_acc = val_acc | |
| torch.save({ | |
| 'model_state_dict': model.state_dict(), | |
| 'label_map': label_map, | |
| 'emotions': emotions | |
| }, MODEL_SAVE_PATH) | |
| print(f"Saved new best model!") | |
| print(f"\nTraining Complete. Best Val Accuracy: {best_val_acc:.2f}%") | |
| if __name__ == "__main__": | |
| train_model() | |