Spaces:

mateo496
/

esc50-model

Running

App Files Files Community

mateo496 commited on 26 days ago

Commit

a12db03

0 Parent(s):

Initialize git repo.

Browse files

Files changed (6) hide show

.gitignore +29 -0
main.py +101 -0
src/models/cnn.py +32 -0
src/models/predict.py +37 -0
src/models/train.py +202 -0
src/visualization/plot.py +39 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,29 @@

+data/
+*.npy
+*.wav
+models/checkpoints
+models/saved
+*.pt
+*.pth
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg

main.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import os
+import numpy as np
+import torch
+import json
+import matplotlib.pyplot as plt
+from sklearn.model_selection import train_test_split
+from src.models.cnn import CNN
+from src.models.train import train_cnn
+from src.data.augment import create_augmented_datasets, create_log_mel
+def main():
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(device)
+    X_path = "data/preprocessed/X.npy"
+    y_path = "data/preprocessed/y.npy"
+    if os.path.exists(X_path) and os.path.exists(y_path):
+        print("Loading existing processed data...")
+        X = np.load(X_path, allow_pickle=True)
+        y = np.load(y_path)
+    else:
+        print("Processing audio data...")
+        audio_training_path = "data/audio/0"
+        directories = os.listdir(audio_training_path)
+        if len(directories) == 1:
+            print("Creating augmented datasets...")
+            create_augmented_datasets("data/audio/0", "data/audio")
+        print("Creating log-mel spectrograms...")
+        X, y = create_log_mel("data/audio", "data/preprocessed")
+    print(f"Dataset size: {len(X)} samples, {len(np.unique(y))} classes")
+    X_train, X_val, y_train, y_val = train_test_split(
+        X, y, test_size=0.2, random_state=42, stratify=y
+    )
+    print(f"Train: {len(X_train)}, Val: {len(X_val)}")
+    model = CNN(n_classes=len(np.unique(y)))
+    best_val_acc = train_cnn(
+        model,
+        X_train, y_train,
+        X_val, y_val,
+        epochs=100,
+        batch_size=100,
+        lr=1e-2,
+        device=device,
+        use_all_patches=True,
+        samples_per_epoch_fraction=1/8,
+        checkpoint_dir="models/checkpoints",
+        save_every_n_epoch=1,
+        resume_from=None
+    )
+    print(f"\nTraining complete! Best validation accuracy: {best_val_acc:.4f}")
+    return best_val_acc
+def main_resume(checkpoint_dir="models/checkpoints", resume_from="models/checkpoints/latest_checkpoint.pt"):
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"Using device: {device}")
+    print("Loading processed data...")
+    X = np.load("data/log_mel/X.npy", allow_pickle=True)
+    y = np.load("data/log_mel/y.npy")
+    X_train, X_val, y_train, y_val = train_test_split(
+        X, y, test_size=0.2, random_state=42, stratify=y
+    )
+    print(f"Train: {len(X_train)}, Val: {len(X_val)}")
+    n_classes = len(np.unique(y))
+    model = CNN(n_classes=n_classes)
+    print(f"Resuming from: {resume_from}")
+    best_val_acc = train_cnn(
+        model,
+        X_train, y_train,
+        X_val, y_val,
+        epochs=100,
+        batch_size=100,
+        lr=0.01,
+        device=device,
+        use_all_patches=True,
+        samples_per_epoch_fraction=1/8,
+        checkpoint_dir=checkpoint_dir,
+        save_every_n_epoch=1,
+        resume_from=resume_from
+    )
+    print(f"\nTraining complete! Best validation accuracy: {best_val_acc:.4f}")
+    return best_val_acc
+main()

src/models/cnn.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import torch.nn as nn
+class CNN(nn.Module):
+    def __init__(self, n_classes=50):
+        super().__init__()
+        self.features = nn.Sequential(
+            nn.Conv2d(1, 24, kernel_size=(5, 5)),
+            nn.ReLU(),
+            nn.MaxPool2d(kernel_size=(4, 2), stride=(4, 2)),
+            nn.Conv2d(24, 48, kernel_size=(5, 5)),
+            nn.ReLU(),
+            nn.MaxPool2d(kernel_size=(4, 2), stride=(4, 2)),
+            nn.Conv2d(48, 48, kernel_size=(5, 5)),
+            nn.ReLU(),
+        )
+        self.classifier = nn.Sequential(
+            nn.Dropout(0.5),
+            nn.Linear(2400, 64),
+            nn.ReLU(),
+            nn.Dropout(0.5),
+            nn.Linear(64, n_classes)
+        )
+    def forward(self, x):
+        x = self.features(x)
+        x = x.flatten(1)
+        return self.classifier(x)

src/models/predict.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import numpy as np
+import torch
+cnn_input_length = 128
+def predict_with_overlapping_patches(model, spectrogram, patch_length=cnn_input_length, hop=1, batch_size=100, device="cuda"):
+    model.eval()
+    n_frames, n_mels = spectrogram.shape
+    if n_frames < patch_length:
+        pad = patch_length - n_frames
+        spectrogram = np.pad(spectrogram, ((0, pad), (0, 0)), mode='constant')
+        n_frames = patch_length
+    patches = []
+    for start in range(0, n_frames - patch_length + 1, hop):
+        patch = spectrogram[start:start + patch_length]
+        patch = patch[np.newaxis, np.newaxis, :, :]
+        patches.append(patch)
+    patches = np.concatenate(patches, axis=0)
+    patches = torch.tensor(patches, dtype=torch.float32).to(device)
+    all_outputs = []
+    with torch.no_grad():
+        for i in range(0, len(patches), batch_size):
+            batch = patches[i:i + batch_size]
+            outputs = model(batch)
+            all_outputs.append(outputs)
+    all_outputs = torch.cat(all_outputs, dim=0)
+    mean_activations = all_outputs.mean(dim=0)
+    predicted_class = mean_activations.argmax().item()
+    return predicted_class

src/models/train.py ADDED Viewed

	@@ -0,0 +1,202 @@

+import os
+import torch
+import tqdm
+import json
+from torch.utils.data import DataLoader
+from src.models.predict import predict_with_overlapping_patches
+from src.data.datasets import FullTFPatchesDataset, RandomPatchDataset
+def train_cnn(
+    model,
+    X_train, y_train,
+    X_val, y_val,
+    epochs=50,
+    batch_size=100,
+    lr=0.01,
+    device="cuda",
+    use_all_patches=True,
+    samples_per_epoch_fraction=1/8,
+    checkpoint_dir="models/checkpoints",
+    save_every_n_epoch=1,
+    resume_from=None
+):
+    os.makedirs(checkpoint_dir, exist_ok=True)
+    model.to(device)
+    if use_all_patches:
+        train_dataset = FullTFPatchesDataset(X_train, y_train, patch_length=128)
+        print(f"\n{'='*60}")
+        print("Using ALL PATCHES method (as per paper)")
+        print(f"{'='*60}")
+    else:
+        train_dataset = RandomPatchDataset(X_train, y_train, patch_length=128)
+        print(f"\n{'='*60}")
+        print("Using RANDOM PATCHES method (simpler)")
+        print(f"{'='*60}")
+    train_loader = DataLoader(
+        train_dataset,
+        batch_size=batch_size,
+        shuffle=True,
+        num_workers=4,
+        pin_memory=True
+    )
+    total_patches = len(train_dataset)
+    patches_per_epoch = int(total_patches * samples_per_epoch_fraction)
+    batches_per_epoch = patches_per_epoch // batch_size
+    print(f"Total available patches: {total_patches:,}")
+    print(f"Patches per epoch ({samples_per_epoch_fraction}): {patches_per_epoch:,}")
+    print(f"Batches per epoch: {batches_per_epoch:,}")
+    print(f"{'='*60}\n")
+    criterion = torch.nn.CrossEntropyLoss()
+    optimizer = torch.optim.SGD([
+        {'params': model.features.parameters(), 'weight_decay': 0.0},
+        {'params': model.classifier.parameters(), 'weight_decay': 0.001}
+    ], lr=lr, momentum=0.9)
+    start_epoch = 0
+    best_val_acc = 0.0
+    training_history = {
+        'train_loss': [],
+        'train_acc': [],
+        'val_acc': [],
+        'epochs': []
+    }
+    if resume_from and os.path.exists(resume_from):
+        print(f"Resuming from checkpoint: {resume_from}")
+        checkpoint = torch.load(resume_from, map_location=device)
+        model.load_state_dict(checkpoint['model_state_dict'])
+        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+        start_epoch = checkpoint['epoch'] + 1
+        best_val_acc = checkpoint['best_val_acc']
+        training_history = checkpoint['history']
+        print(f"Resuming training from epoch: {checkpoint['epoch']}")
+        print(f"Best val acc: {best_val_acc:.4f}\n")
+    for epoch in range(start_epoch, epochs):
+        model.train()
+        train_loss = 0.0
+        correct = 0
+        total = 0
+        batches_processed = 0
+        for xb, yb in tqdm.tqdm(train_loader, f"Epoch {epoch+1} Train", leave=False):
+            if batches_processed >= batches_per_epoch:
+                break
+            xb = xb.to(device)
+            yb = yb.to(device)
+            optimizer.zero_grad()
+            out = model(xb)
+            loss = criterion(out, yb)
+            loss.backward()
+            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
+            optimizer.step()
+            train_loss += loss.item() * xb.size(0)
+            _, pred = out.max(1)
+            correct += (pred == yb).sum().item()
+            total += yb.size(0)
+            batches_processed += 1
+        train_loss /= total
+        train_acc = correct / total
+        model.eval()
+        val_correct = 0
+        val_total = len(y_val)
+        for i in tqdm.tqdm(range(val_total), desc=f"Epoch {epoch+1} Val", leave=False):
+            spec = X_val[i]
+            true_label = y_val[i]
+            pred_label = predict_with_overlapping_patches(model, spec, device=device)
+            if pred_label == true_label:
+                val_correct += 1
+        val_acc = val_correct / val_total
+        training_history['train_loss'].append(train_loss)
+        training_history['train_acc'].append(train_acc)
+        training_history['val_acc'].append(val_acc)
+        training_history['epochs'].append(epoch + 1)
+        is_best = val_acc > best_val_acc
+        if is_best:
+            best_val_acc = val_acc
+            torch.save(model.state_dict(), "best_model.pt")
+        print(
+            f"Epoch {epoch+1}/{epochs} | "
+            f"Train loss: {train_loss:.4f}, Train acc: {train_acc:.4f} | "
+            f"Val acc: {val_acc:.4f} (best: {best_val_acc:.4f})"
+        )
+        if (epoch + 1) % save_every_n_epoch == 0:
+            checkpoint = {
+                'epoch': epoch,
+                'model_state_dict': model.state_dict(),
+                'optimizer_state_dict': optimizer.state_dict(),
+                'train_loss': train_loss,
+                'train_acc': train_acc,
+                'val_acc': val_acc,
+                'best_val_acc': best_val_acc,
+                'history': training_history,
+                'config': {
+                    'batch_size': batch_size,
+                    'lr': lr,
+                    'total_patches': total_patches,
+                    'patches_per_epoch': patches_per_epoch,
+                }
+            }
+            checkpoint_path = os.path.join(
+                checkpoint_dir,
+                f"checkpoint_epoch_{epoch+1}.pt"
+            )
+            torch.save(checkpoint, checkpoint_path)
+            if is_best:
+                best_path = os.path.join(checkpoint_dir, "best_model.pt")
+                torch.save(checkpoint, best_path)
+                #print("Saved best model")
+            latest_path = os.path.join(checkpoint_dir, "latest_checkpoint.pt")
+            torch.save(checkpoint, latest_path)
+            history_path = os.path.join(checkpoint_dir, "training_history.json")
+            with open(history_path, 'w') as f:
+                json.dump(training_history, f, indent=2)
+    final_model_dir = "models/saved"
+    os.makedirs(final_model_dir, exist_ok=True)
+    final_model_path = os.path.join(final_model_dir, "final_model.pt")
+    torch.save({
+        'model_state_dict': model.state_dict(),
+        'best_val_acc': best_val_acc,
+        'config': {
+            'batch_size': batch_size,
+            'lr': lr,
+            'epochs': epochs,
+        }
+    }, final_model_path)
+    print(f"\nTraining complete! Final model saved to {final_model_path}")
+    return best_val_acc

src/visualization/plot.py ADDED Viewed

	@@ -0,0 +1,39 @@

+def plot_training_history(checkpoint_dir):
+    history_path = os.path.join(checkpoint_dir, "training_history.json")
+    if not os.path.exists(history_path):
+        print(f"No training history found at {history_path}")
+        return
+    with open(history_path, 'r') as f:
+        history = json.load(f)
+    epochs = history['epochs']
+    train_loss = history['train_loss']
+    train_acc = history['train_acc']
+    val_acc = history['val_acc']
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
+    ax1.plot(epochs, train_loss, 'b-', label='Train Loss')
+    ax1.set_xlabel('Epoch')
+    ax1.set_ylabel('Loss')
+    ax1.set_title('Training Loss')
+    ax1.legend()
+    ax1.grid(True, alpha=0.3)
+    ax2.plot(epochs, train_acc, 'b-', label='Train Accuracy')
+    ax2.plot(epochs, val_acc, 'r-', label='Validation Accuracy')
+    ax2.set_xlabel('Epoch')
+    ax2.set_ylabel('Accuracy')
+    ax2.set_title('Training and Validation Accuracy')
+    ax2.legend()
+    ax2.grid(True, alpha=0.3)
+    plt.tight_layout()
+    plot_path = os.path.join(checkpoint_dir, "training_curves.png")
+    plt.savefig(plot_path, dpi=150, bbox_inches='tight')
+    print(f"Saved training curves to {plot_path}")
+    plt.show()