mateo496 commited on
Commit
a12db03
·
0 Parent(s):

Initialize git repo.

Browse files
.gitignore ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data/
2
+ *.npy
3
+ *.wav
4
+
5
+ models/checkpoints
6
+ models/saved
7
+ *.pt
8
+ *.pth
9
+
10
+ __pycache__/
11
+ *.py[cod]
12
+ *$py.class
13
+ *.so
14
+ .Python
15
+ build/
16
+ develop-eggs/
17
+ dist/
18
+ downloads/
19
+ eggs/
20
+ .eggs/
21
+ lib/
22
+ lib64/
23
+ parts/
24
+ sdist/
25
+ var/
26
+ wheels/
27
+ *.egg-info/
28
+ .installed.cfg
29
+ *.egg
main.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import torch
4
+ import json
5
+ import matplotlib.pyplot as plt
6
+ from sklearn.model_selection import train_test_split
7
+
8
+ from src.models.cnn import CNN
9
+ from src.models.train import train_cnn
10
+ from src.data.augment import create_augmented_datasets, create_log_mel
11
+
12
+ def main():
13
+ device = "cuda" if torch.cuda.is_available() else "cpu"
14
+ print(device)
15
+
16
+ X_path = "data/preprocessed/X.npy"
17
+ y_path = "data/preprocessed/y.npy"
18
+
19
+ if os.path.exists(X_path) and os.path.exists(y_path):
20
+ print("Loading existing processed data...")
21
+ X = np.load(X_path, allow_pickle=True)
22
+ y = np.load(y_path)
23
+ else:
24
+ print("Processing audio data...")
25
+ audio_training_path = "data/audio/0"
26
+ directories = os.listdir(audio_training_path)
27
+ if len(directories) == 1:
28
+ print("Creating augmented datasets...")
29
+ create_augmented_datasets("data/audio/0", "data/audio")
30
+
31
+ print("Creating log-mel spectrograms...")
32
+ X, y = create_log_mel("data/audio", "data/preprocessed")
33
+
34
+ print(f"Dataset size: {len(X)} samples, {len(np.unique(y))} classes")
35
+
36
+ X_train, X_val, y_train, y_val = train_test_split(
37
+ X, y, test_size=0.2, random_state=42, stratify=y
38
+ )
39
+
40
+ print(f"Train: {len(X_train)}, Val: {len(X_val)}")
41
+
42
+ model = CNN(n_classes=len(np.unique(y)))
43
+
44
+ best_val_acc = train_cnn(
45
+ model,
46
+ X_train, y_train,
47
+ X_val, y_val,
48
+ epochs=100,
49
+ batch_size=100,
50
+ lr=1e-2,
51
+ device=device,
52
+ use_all_patches=True,
53
+ samples_per_epoch_fraction=1/8,
54
+ checkpoint_dir="models/checkpoints",
55
+ save_every_n_epoch=1,
56
+ resume_from=None
57
+ )
58
+
59
+ print(f"\nTraining complete! Best validation accuracy: {best_val_acc:.4f}")
60
+
61
+ return best_val_acc
62
+
63
+ def main_resume(checkpoint_dir="models/checkpoints", resume_from="models/checkpoints/latest_checkpoint.pt"):
64
+ device = "cuda" if torch.cuda.is_available() else "cpu"
65
+ print(f"Using device: {device}")
66
+
67
+ print("Loading processed data...")
68
+ X = np.load("data/log_mel/X.npy", allow_pickle=True)
69
+ y = np.load("data/log_mel/y.npy")
70
+
71
+ X_train, X_val, y_train, y_val = train_test_split(
72
+ X, y, test_size=0.2, random_state=42, stratify=y
73
+ )
74
+
75
+ print(f"Train: {len(X_train)}, Val: {len(X_val)}")
76
+
77
+ n_classes = len(np.unique(y))
78
+ model = CNN(n_classes=n_classes)
79
+
80
+ print(f"Resuming from: {resume_from}")
81
+ best_val_acc = train_cnn(
82
+ model,
83
+ X_train, y_train,
84
+ X_val, y_val,
85
+ epochs=100,
86
+ batch_size=100,
87
+ lr=0.01,
88
+ device=device,
89
+ use_all_patches=True,
90
+ samples_per_epoch_fraction=1/8,
91
+ checkpoint_dir=checkpoint_dir,
92
+ save_every_n_epoch=1,
93
+ resume_from=resume_from
94
+ )
95
+
96
+ print(f"\nTraining complete! Best validation accuracy: {best_val_acc:.4f}")
97
+ return best_val_acc
98
+
99
+
100
+
101
+ main()
src/models/cnn.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+
3
+ class CNN(nn.Module):
4
+ def __init__(self, n_classes=50):
5
+ super().__init__()
6
+ self.features = nn.Sequential(
7
+ nn.Conv2d(1, 24, kernel_size=(5, 5)),
8
+ nn.ReLU(),
9
+ nn.MaxPool2d(kernel_size=(4, 2), stride=(4, 2)),
10
+
11
+
12
+ nn.Conv2d(24, 48, kernel_size=(5, 5)),
13
+ nn.ReLU(),
14
+ nn.MaxPool2d(kernel_size=(4, 2), stride=(4, 2)),
15
+
16
+
17
+ nn.Conv2d(48, 48, kernel_size=(5, 5)),
18
+ nn.ReLU(),
19
+ )
20
+ self.classifier = nn.Sequential(
21
+ nn.Dropout(0.5),
22
+ nn.Linear(2400, 64),
23
+ nn.ReLU(),
24
+ nn.Dropout(0.5),
25
+ nn.Linear(64, n_classes)
26
+ )
27
+
28
+
29
+ def forward(self, x):
30
+ x = self.features(x)
31
+ x = x.flatten(1)
32
+ return self.classifier(x)
src/models/predict.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+
4
+ cnn_input_length = 128
5
+
6
+ def predict_with_overlapping_patches(model, spectrogram, patch_length=cnn_input_length, hop=1, batch_size=100, device="cuda"):
7
+ model.eval()
8
+
9
+ n_frames, n_mels = spectrogram.shape
10
+
11
+ if n_frames < patch_length:
12
+ pad = patch_length - n_frames
13
+ spectrogram = np.pad(spectrogram, ((0, pad), (0, 0)), mode='constant')
14
+ n_frames = patch_length
15
+
16
+ patches = []
17
+ for start in range(0, n_frames - patch_length + 1, hop):
18
+ patch = spectrogram[start:start + patch_length]
19
+ patch = patch[np.newaxis, np.newaxis, :, :]
20
+ patches.append(patch)
21
+
22
+ patches = np.concatenate(patches, axis=0)
23
+ patches = torch.tensor(patches, dtype=torch.float32).to(device)
24
+
25
+ all_outputs = []
26
+ with torch.no_grad():
27
+ for i in range(0, len(patches), batch_size):
28
+ batch = patches[i:i + batch_size]
29
+ outputs = model(batch)
30
+ all_outputs.append(outputs)
31
+
32
+ all_outputs = torch.cat(all_outputs, dim=0)
33
+
34
+ mean_activations = all_outputs.mean(dim=0)
35
+ predicted_class = mean_activations.argmax().item()
36
+
37
+ return predicted_class
src/models/train.py ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import tqdm
4
+ import json
5
+ from torch.utils.data import DataLoader
6
+
7
+ from src.models.predict import predict_with_overlapping_patches
8
+ from src.data.datasets import FullTFPatchesDataset, RandomPatchDataset
9
+
10
+ def train_cnn(
11
+ model,
12
+ X_train, y_train,
13
+ X_val, y_val,
14
+ epochs=50,
15
+ batch_size=100,
16
+ lr=0.01,
17
+ device="cuda",
18
+ use_all_patches=True,
19
+ samples_per_epoch_fraction=1/8,
20
+ checkpoint_dir="models/checkpoints",
21
+ save_every_n_epoch=1,
22
+ resume_from=None
23
+ ):
24
+ os.makedirs(checkpoint_dir, exist_ok=True)
25
+
26
+ model.to(device)
27
+
28
+ if use_all_patches:
29
+ train_dataset = FullTFPatchesDataset(X_train, y_train, patch_length=128)
30
+ print(f"\n{'='*60}")
31
+ print("Using ALL PATCHES method (as per paper)")
32
+ print(f"{'='*60}")
33
+ else:
34
+ train_dataset = RandomPatchDataset(X_train, y_train, patch_length=128)
35
+ print(f"\n{'='*60}")
36
+ print("Using RANDOM PATCHES method (simpler)")
37
+ print(f"{'='*60}")
38
+
39
+ train_loader = DataLoader(
40
+ train_dataset,
41
+ batch_size=batch_size,
42
+ shuffle=True,
43
+ num_workers=4,
44
+ pin_memory=True
45
+ )
46
+
47
+ total_patches = len(train_dataset)
48
+ patches_per_epoch = int(total_patches * samples_per_epoch_fraction)
49
+ batches_per_epoch = patches_per_epoch // batch_size
50
+
51
+ print(f"Total available patches: {total_patches:,}")
52
+ print(f"Patches per epoch ({samples_per_epoch_fraction}): {patches_per_epoch:,}")
53
+ print(f"Batches per epoch: {batches_per_epoch:,}")
54
+ print(f"{'='*60}\n")
55
+
56
+ criterion = torch.nn.CrossEntropyLoss()
57
+ optimizer = torch.optim.SGD([
58
+ {'params': model.features.parameters(), 'weight_decay': 0.0},
59
+ {'params': model.classifier.parameters(), 'weight_decay': 0.001}
60
+ ], lr=lr, momentum=0.9)
61
+
62
+
63
+ start_epoch = 0
64
+ best_val_acc = 0.0
65
+ training_history = {
66
+ 'train_loss': [],
67
+ 'train_acc': [],
68
+ 'val_acc': [],
69
+ 'epochs': []
70
+ }
71
+
72
+ if resume_from and os.path.exists(resume_from):
73
+ print(f"Resuming from checkpoint: {resume_from}")
74
+ checkpoint = torch.load(resume_from, map_location=device)
75
+
76
+ model.load_state_dict(checkpoint['model_state_dict'])
77
+ optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
78
+ start_epoch = checkpoint['epoch'] + 1
79
+ best_val_acc = checkpoint['best_val_acc']
80
+ training_history = checkpoint['history']
81
+
82
+ print(f"Resuming training from epoch: {checkpoint['epoch']}")
83
+ print(f"Best val acc: {best_val_acc:.4f}\n")
84
+
85
+
86
+
87
+ for epoch in range(start_epoch, epochs):
88
+ model.train()
89
+ train_loss = 0.0
90
+ correct = 0
91
+ total = 0
92
+ batches_processed = 0
93
+
94
+ for xb, yb in tqdm.tqdm(train_loader, f"Epoch {epoch+1} Train", leave=False):
95
+ if batches_processed >= batches_per_epoch:
96
+ break
97
+
98
+ xb = xb.to(device)
99
+ yb = yb.to(device)
100
+
101
+ optimizer.zero_grad()
102
+ out = model(xb)
103
+
104
+ loss = criterion(out, yb)
105
+
106
+ loss.backward()
107
+ torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
108
+
109
+ optimizer.step()
110
+
111
+ train_loss += loss.item() * xb.size(0)
112
+ _, pred = out.max(1)
113
+ correct += (pred == yb).sum().item()
114
+ total += yb.size(0)
115
+ batches_processed += 1
116
+
117
+ train_loss /= total
118
+ train_acc = correct / total
119
+
120
+ model.eval()
121
+ val_correct = 0
122
+ val_total = len(y_val)
123
+
124
+
125
+ for i in tqdm.tqdm(range(val_total), desc=f"Epoch {epoch+1} Val", leave=False):
126
+ spec = X_val[i]
127
+ true_label = y_val[i]
128
+
129
+ pred_label = predict_with_overlapping_patches(model, spec, device=device)
130
+
131
+ if pred_label == true_label:
132
+ val_correct += 1
133
+
134
+ val_acc = val_correct / val_total
135
+
136
+ training_history['train_loss'].append(train_loss)
137
+ training_history['train_acc'].append(train_acc)
138
+ training_history['val_acc'].append(val_acc)
139
+ training_history['epochs'].append(epoch + 1)
140
+
141
+ is_best = val_acc > best_val_acc
142
+
143
+ if is_best:
144
+ best_val_acc = val_acc
145
+ torch.save(model.state_dict(), "best_model.pt")
146
+
147
+ print(
148
+ f"Epoch {epoch+1}/{epochs} | "
149
+ f"Train loss: {train_loss:.4f}, Train acc: {train_acc:.4f} | "
150
+ f"Val acc: {val_acc:.4f} (best: {best_val_acc:.4f})"
151
+ )
152
+
153
+ if (epoch + 1) % save_every_n_epoch == 0:
154
+ checkpoint = {
155
+ 'epoch': epoch,
156
+ 'model_state_dict': model.state_dict(),
157
+ 'optimizer_state_dict': optimizer.state_dict(),
158
+ 'train_loss': train_loss,
159
+ 'train_acc': train_acc,
160
+ 'val_acc': val_acc,
161
+ 'best_val_acc': best_val_acc,
162
+ 'history': training_history,
163
+ 'config': {
164
+ 'batch_size': batch_size,
165
+ 'lr': lr,
166
+ 'total_patches': total_patches,
167
+ 'patches_per_epoch': patches_per_epoch,
168
+ }
169
+ }
170
+ checkpoint_path = os.path.join(
171
+ checkpoint_dir,
172
+ f"checkpoint_epoch_{epoch+1}.pt"
173
+ )
174
+ torch.save(checkpoint, checkpoint_path)
175
+
176
+ if is_best:
177
+ best_path = os.path.join(checkpoint_dir, "best_model.pt")
178
+ torch.save(checkpoint, best_path)
179
+ #print("Saved best model")
180
+
181
+ latest_path = os.path.join(checkpoint_dir, "latest_checkpoint.pt")
182
+ torch.save(checkpoint, latest_path)
183
+
184
+ history_path = os.path.join(checkpoint_dir, "training_history.json")
185
+ with open(history_path, 'w') as f:
186
+ json.dump(training_history, f, indent=2)
187
+
188
+ final_model_dir = "models/saved"
189
+ os.makedirs(final_model_dir, exist_ok=True)
190
+ final_model_path = os.path.join(final_model_dir, "final_model.pt")
191
+ torch.save({
192
+ 'model_state_dict': model.state_dict(),
193
+ 'best_val_acc': best_val_acc,
194
+ 'config': {
195
+ 'batch_size': batch_size,
196
+ 'lr': lr,
197
+ 'epochs': epochs,
198
+ }
199
+ }, final_model_path)
200
+ print(f"\nTraining complete! Final model saved to {final_model_path}")
201
+
202
+ return best_val_acc
src/visualization/plot.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ def plot_training_history(checkpoint_dir):
3
+
4
+ history_path = os.path.join(checkpoint_dir, "training_history.json")
5
+
6
+ if not os.path.exists(history_path):
7
+ print(f"No training history found at {history_path}")
8
+ return
9
+
10
+ with open(history_path, 'r') as f:
11
+ history = json.load(f)
12
+
13
+ epochs = history['epochs']
14
+ train_loss = history['train_loss']
15
+ train_acc = history['train_acc']
16
+ val_acc = history['val_acc']
17
+
18
+ fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
19
+
20
+ ax1.plot(epochs, train_loss, 'b-', label='Train Loss')
21
+ ax1.set_xlabel('Epoch')
22
+ ax1.set_ylabel('Loss')
23
+ ax1.set_title('Training Loss')
24
+ ax1.legend()
25
+ ax1.grid(True, alpha=0.3)
26
+
27
+ ax2.plot(epochs, train_acc, 'b-', label='Train Accuracy')
28
+ ax2.plot(epochs, val_acc, 'r-', label='Validation Accuracy')
29
+ ax2.set_xlabel('Epoch')
30
+ ax2.set_ylabel('Accuracy')
31
+ ax2.set_title('Training and Validation Accuracy')
32
+ ax2.legend()
33
+ ax2.grid(True, alpha=0.3)
34
+
35
+ plt.tight_layout()
36
+ plot_path = os.path.join(checkpoint_dir, "training_curves.png")
37
+ plt.savefig(plot_path, dpi=150, bbox_inches='tight')
38
+ print(f"Saved training curves to {plot_path}")
39
+ plt.show()