johnamit commited on
Commit
a4191d8
·
verified ·
1 Parent(s): 41c1985

Upload folder using huggingface_hub

Browse files
Files changed (37) hide show
  1. models/.gitkeep +0 -0
  2. models/bilstm/results/test_confusion_matrix.png +0 -0
  3. models/bilstm/train.py +316 -0
  4. models/bilstm/weights/bidirectionallstm_label_encoder.pkl +3 -0
  5. models/bilstm/weights/bidirectionallstm_model.pt +3 -0
  6. models/bilstm/weights/bidirectionallstm_scaler.pkl +3 -0
  7. models/bilstm/weights/similarity_centroids.pkl +3 -0
  8. models/cnn_bilstm/results/test_confusion_matrix.png +0 -0
  9. models/cnn_bilstm/train.py +336 -0
  10. models/cnn_bilstm/weights/cnn_bilstm_label_encoder.pkl +3 -0
  11. models/cnn_bilstm/weights/cnn_bilstm_model.pt +3 -0
  12. models/cnn_bilstm/weights/cnn_bilstm_scaler.pkl +3 -0
  13. models/cnn_bilstm/weights/similarity_centroids.pkl +3 -0
  14. models/gru/results/test_confusion_matrix.png +0 -0
  15. models/gru/train.py +322 -0
  16. models/gru/weights/gru_label_encoder.pkl +3 -0
  17. models/gru/weights/gru_model.pt +3 -0
  18. models/gru/weights/gru_scaler.pkl +3 -0
  19. models/gru/weights/similarity_centroids.pkl +3 -0
  20. models/lstm/results/test_confusion_matrix.png +0 -0
  21. models/lstm/train.py +316 -0
  22. models/lstm/weights/lstm_label_encoder.pkl +3 -0
  23. models/lstm/weights/lstm_model.pt +3 -0
  24. models/lstm/weights/lstm_scaler.pkl +3 -0
  25. models/lstm/weights/similarity_centroids.pkl +3 -0
  26. models/st_gcn/results/test_confusion_matrix.png +0 -0
  27. models/st_gcn/train.py +379 -0
  28. models/st_gcn/weights/similarity_centroids.pkl +3 -0
  29. models/st_gcn/weights/st_gcn_label_encoder.pkl +3 -0
  30. models/st_gcn/weights/st_gcn_model.pt +3 -0
  31. models/st_gcn/weights/st_gcn_scaler.pkl +3 -0
  32. models/tcn/results/test_confusion_matrix.png +0 -0
  33. models/tcn/train.py +372 -0
  34. models/tcn/weights/similarity_centroids.pkl +3 -0
  35. models/tcn/weights/tcn_label_encoder.pkl +3 -0
  36. models/tcn/weights/tcn_model.pt +3 -0
  37. models/tcn/weights/tcn_scaler.pkl +3 -0
models/.gitkeep ADDED
File without changes
models/bilstm/results/test_confusion_matrix.png ADDED
models/bilstm/train.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from pathlib import Path
3
+
4
+ import joblib
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+ import pandas as pd
8
+ import torch
9
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score, precision_score, recall_score
10
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
11
+ from torch import nn
12
+ from torch.utils.data import DataLoader, Dataset
13
+
14
+
15
+ def parse_args():
16
+ parser = argparse.ArgumentParser()
17
+ parser.add_argument("--train-file", default="data/train_sequences.csv")
18
+ parser.add_argument("--val-file", default="data/val_sequences.csv")
19
+ parser.add_argument("--test-file", default="data/test_internal_sequences.csv")
20
+ parser.add_argument("--output-dir", default="models/bilstm/results")
21
+ parser.add_argument("--sequence-length", type=int, default=30)
22
+ parser.add_argument("--feature-count", type=int, default=78)
23
+ parser.add_argument("--units", type=int, default=73)
24
+ parser.add_argument("--dropout", type=float, default=0.2174)
25
+ parser.add_argument("--learning-rate", type=float, default=0.0004)
26
+ parser.add_argument("--batch-size", type=int, default=54)
27
+ parser.add_argument("--epochs", type=int, default=73)
28
+ parser.add_argument("--early-stopping-patience", type=int, default=10)
29
+ parser.add_argument("--lr-plateau-patience", type=int, default=5)
30
+ parser.add_argument("--lr-plateau-factor", type=float, default=0.5)
31
+ parser.add_argument("--num-workers", type=int, default=4)
32
+ parser.add_argument("--seed", type=int, default=42)
33
+ return parser.parse_args()
34
+
35
+ # Define a PyTorch Dataset for loading sequence features and labels from tensors
36
+ class SequenceDataset(Dataset):
37
+ def __init__(self, feature_tensor, label_tensor):
38
+ self.feature_tensor = feature_tensor
39
+ self.label_tensor = label_tensor
40
+
41
+ def __len__(self):
42
+ return len(self.label_tensor)
43
+
44
+ def __getitem__(self, index):
45
+ return self.feature_tensor[index], self.label_tensor[index]
46
+
47
+ # Define the BiLSTM classifier model architecture with two LSTM layers, dropout, and a linear classification head
48
+ class BidirectionalLstmClassifier(nn.Module):
49
+ def __init__(self, feature_count, hidden_size, class_count, dropout_probability):
50
+ super().__init__()
51
+ self.bilstm = nn.LSTM(input_size=feature_count, hidden_size=hidden_size, num_layers=2, batch_first=True, dropout=dropout_probability, bidirectional=True)
52
+ self.dropout = nn.Dropout(dropout_probability)
53
+ self.classifier = nn.Linear(hidden_size * 2, class_count)
54
+
55
+ def forward(self, input_sequence):
56
+ recurrent_output, _ = self.bilstm(input_sequence)
57
+ final_timestep_output = recurrent_output[:, -1, :]
58
+ dropout_output = self.dropout(final_timestep_output)
59
+ logits = self.classifier(dropout_output)
60
+ return logits
61
+
62
+ # Set random seeds for reproducibility across numpy and PyTorch (both CPU and CUDA)
63
+ def set_random_seed(seed):
64
+ np.random.seed(seed)
65
+ torch.manual_seed(seed)
66
+ torch.cuda.manual_seed_all(seed)
67
+
68
+ # Load the sequence table from a CSV file, separating the flattened feature columns and raw label column, and returning them as numpy arrays
69
+ def load_sequence_table(input_file_path):
70
+ sequence_table = pd.read_csv(input_file_path)
71
+ metadata_columns = {"video_id", "exercise_label", "start_frame_index", "end_frame_index"}
72
+ flattened_feature_columns = [column_name for column_name in sequence_table.columns if column_name not in metadata_columns]
73
+ flattened_features = sequence_table[flattened_feature_columns].to_numpy(dtype=np.float32)
74
+ raw_labels = sequence_table["exercise_label"].to_numpy()
75
+ return flattened_features, raw_labels
76
+
77
+ # Scale features with StandardScaler, reshape them into 3D tensors for LSTM input, and return the scaled feature tensors along with the fitted scaler object
78
+ def scale_and_reshape_features(train_features, validation_features, test_features, sequence_length, feature_count):
79
+ scaler = StandardScaler()
80
+ scaler.fit(train_features)
81
+
82
+ scaled_train = scaler.transform(train_features).reshape(-1, sequence_length, feature_count)
83
+ scaled_validation = scaler.transform(validation_features).reshape(-1, sequence_length, feature_count)
84
+ scaled_test = scaler.transform(test_features).reshape(-1, sequence_length, feature_count)
85
+
86
+ return scaled_train, scaled_validation, scaled_test, scaler
87
+
88
+ # Build PyTorch DataLoaders for the training, validation, and test sets using the SequenceDataset
89
+ def build_dataloaders(train_features, validation_features, test_features, train_labels, validation_labels, test_labels, batch_size, num_workers):
90
+ train_feature_tensor = torch.tensor(train_features, dtype=torch.float32)
91
+ validation_feature_tensor = torch.tensor(validation_features, dtype=torch.float32)
92
+ test_feature_tensor = torch.tensor(test_features, dtype=torch.float32)
93
+
94
+ train_label_tensor = torch.tensor(train_labels, dtype=torch.long)
95
+ validation_label_tensor = torch.tensor(validation_labels, dtype=torch.long)
96
+ test_label_tensor = torch.tensor(test_labels, dtype=torch.long)
97
+
98
+ train_dataset = SequenceDataset(train_feature_tensor, train_label_tensor)
99
+ validation_dataset = SequenceDataset(validation_feature_tensor, validation_label_tensor)
100
+ test_dataset = SequenceDataset(test_feature_tensor, test_label_tensor)
101
+
102
+ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
103
+ validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
104
+ test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
105
+
106
+ return train_loader, validation_loader, test_loader
107
+
108
+ # Single training epoch
109
+ def run_training_epoch(model, data_loader, optimizer, loss_function, device):
110
+ model.train()
111
+ cumulative_loss = 0.0
112
+
113
+ for feature_batch, label_batch in data_loader:
114
+ feature_batch = feature_batch.to(device, non_blocking=True)
115
+ label_batch = label_batch.to(device, non_blocking=True)
116
+
117
+ optimizer.zero_grad(set_to_none=True)
118
+ logits = model(feature_batch)
119
+ loss = loss_function(logits, label_batch)
120
+ loss.backward()
121
+ optimizer.step()
122
+
123
+ cumulative_loss += loss.item() * feature_batch.size(0)
124
+
125
+ epoch_loss = cumulative_loss / len(data_loader.dataset)
126
+ return epoch_loss
127
+
128
+ # Single validation epoch
129
+ def run_validation_epoch(model, data_loader, loss_function, device):
130
+ model.eval()
131
+ cumulative_loss = 0.0
132
+
133
+ with torch.inference_mode():
134
+ for feature_batch, label_batch in data_loader:
135
+ feature_batch = feature_batch.to(device, non_blocking=True)
136
+ label_batch = label_batch.to(device, non_blocking=True)
137
+ logits = model(feature_batch)
138
+ loss = loss_function(logits, label_batch)
139
+ cumulative_loss += loss.item() * feature_batch.size(0)
140
+
141
+ epoch_loss = cumulative_loss / len(data_loader.dataset)
142
+ return epoch_loss
143
+
144
+ # Run inference on the test set to obtain true labels and predicted labels for metric calculation
145
+ def predict_labels(model, data_loader, device):
146
+ model.eval()
147
+ predicted_labels = []
148
+ true_labels = []
149
+
150
+ with torch.inference_mode():
151
+ for feature_batch, label_batch in data_loader:
152
+ feature_batch = feature_batch.to(device, non_blocking=True)
153
+ logits = model(feature_batch)
154
+ predicted_batch = torch.argmax(logits, dim=1)
155
+ predicted_labels.append(predicted_batch.cpu().numpy())
156
+ true_labels.append(label_batch.numpy())
157
+
158
+ predicted_labels = np.concatenate(predicted_labels)
159
+ true_labels = np.concatenate(true_labels)
160
+ return true_labels, predicted_labels
161
+
162
+ # Save a confusion matrix figure with class names and value annotations to the specified file path
163
+ def save_confusion_matrix_figure(confusion_matrix_array, class_names, output_file_path):
164
+ figure = plt.figure(figsize=(8, 6))
165
+ axis = figure.add_subplot(111)
166
+ image = axis.imshow(confusion_matrix_array, interpolation="nearest", cmap="Blues")
167
+ axis.figure.colorbar(image, ax=axis)
168
+ axis.set_xticks(np.arange(len(class_names)))
169
+ axis.set_yticks(np.arange(len(class_names)))
170
+ axis.set_xticklabels(class_names, rotation=45, ha="right")
171
+ axis.set_yticklabels(class_names)
172
+ axis.set_xlabel("Predicted label")
173
+ axis.set_ylabel("True label")
174
+ axis.set_title("Test Confusion Matrix")
175
+
176
+ threshold = confusion_matrix_array.max() / 2.0 if confusion_matrix_array.size > 0 else 0.0
177
+ for row_index in range(confusion_matrix_array.shape[0]):
178
+ for column_index in range(confusion_matrix_array.shape[1]):
179
+ value = confusion_matrix_array[row_index, column_index]
180
+ color = "white" if value > threshold else "black"
181
+ axis.text(column_index, row_index, str(value), ha="center", va="center", color=color)
182
+
183
+ figure.tight_layout()
184
+ output_file_path.parent.mkdir(parents=True, exist_ok=True)
185
+ figure.savefig(output_file_path, dpi=180)
186
+ plt.close(figure)
187
+
188
+ # loads data, trains the BiLSTM model, evaluates on the test set, saves the model and metrics, and generates a confusion matrix figure
189
+ def main():
190
+ args = parse_args()
191
+
192
+ train_file_path = Path(args.train_file)
193
+ validation_file_path = Path(args.val_file)
194
+ test_file_path = Path(args.test_file)
195
+ output_directory_path = Path(args.output_dir)
196
+ output_directory_path.mkdir(parents=True, exist_ok=True)
197
+
198
+ sequence_length = args.sequence_length
199
+ feature_count = args.feature_count
200
+ hidden_size = args.units
201
+ dropout_probability = args.dropout
202
+ learning_rate = args.learning_rate
203
+ batch_size = args.batch_size
204
+ maximum_epochs = args.epochs
205
+ early_stopping_patience = args.early_stopping_patience
206
+ lr_plateau_patience = args.lr_plateau_patience
207
+ lr_plateau_factor = args.lr_plateau_factor
208
+ num_workers = args.num_workers
209
+ seed = args.seed
210
+
211
+ set_random_seed(seed)
212
+
213
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
214
+ print(f"Using device: {device}")
215
+
216
+ train_features, train_raw_labels = load_sequence_table(train_file_path)
217
+ validation_features, validation_raw_labels = load_sequence_table(validation_file_path)
218
+ test_features, test_raw_labels = load_sequence_table(test_file_path)
219
+
220
+ label_encoder = LabelEncoder()
221
+ label_encoder.fit(train_raw_labels)
222
+ train_labels = label_encoder.transform(train_raw_labels)
223
+ validation_labels = label_encoder.transform(validation_raw_labels)
224
+ test_labels = label_encoder.transform(test_raw_labels)
225
+
226
+ scaled_train, scaled_validation, scaled_test, scaler = scale_and_reshape_features(train_features, validation_features, test_features, sequence_length, feature_count)
227
+
228
+ train_loader, validation_loader, test_loader = build_dataloaders(
229
+ train_features=scaled_train,
230
+ validation_features=scaled_validation,
231
+ test_features=scaled_test,
232
+ train_labels=train_labels,
233
+ validation_labels=validation_labels,
234
+ test_labels=test_labels,
235
+ batch_size=batch_size,
236
+ num_workers=num_workers,
237
+ )
238
+
239
+ class_count = len(label_encoder.classes_)
240
+ model = BidirectionalLstmClassifier(feature_count, hidden_size, class_count, dropout_probability).to(device)
241
+ optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
242
+ scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=lr_plateau_factor, patience=lr_plateau_patience)
243
+ loss_function = nn.CrossEntropyLoss()
244
+
245
+ training_losses = []
246
+ validation_losses = []
247
+ best_validation_loss = float("inf")
248
+ best_model_state = None
249
+ epochs_without_improvement = 0
250
+
251
+ for epoch_index in range(maximum_epochs):
252
+ training_loss = run_training_epoch(model, train_loader, optimizer, loss_function, device)
253
+ validation_loss = run_validation_epoch(model, validation_loader, loss_function, device)
254
+ scheduler.step(validation_loss)
255
+
256
+ training_losses.append(training_loss)
257
+ validation_losses.append(validation_loss)
258
+
259
+ print(f"Epoch {epoch_index + 1}/{maximum_epochs} - train_loss: {training_loss:.6f} - val_loss: {validation_loss:.6f}")
260
+
261
+ if validation_loss < best_validation_loss:
262
+ best_validation_loss = validation_loss
263
+ best_model_state = {key: value.detach().cpu().clone() for key, value in model.state_dict().items()}
264
+ epochs_without_improvement = 0
265
+ else:
266
+ epochs_without_improvement += 1
267
+
268
+ if epochs_without_improvement >= early_stopping_patience:
269
+ print("Early stopping triggered.")
270
+ break
271
+
272
+ if best_model_state is not None:
273
+ model.load_state_dict(best_model_state)
274
+
275
+ test_true_labels, test_predicted_labels = predict_labels(model, test_loader, device)
276
+
277
+ accuracy = accuracy_score(test_true_labels, test_predicted_labels)
278
+ precision = precision_score(test_true_labels, test_predicted_labels, average="weighted", zero_division=0)
279
+ recall = recall_score(test_true_labels, test_predicted_labels, average="weighted", zero_division=0)
280
+ f1 = f1_score(test_true_labels, test_predicted_labels, average="weighted", zero_division=0)
281
+ report_text = classification_report(test_true_labels, test_predicted_labels, target_names=label_encoder.classes_, zero_division=0)
282
+ matrix = confusion_matrix(test_true_labels, test_predicted_labels)
283
+
284
+ print("\nTest metrics")
285
+ print(f"Accuracy: {accuracy:.4f}")
286
+ print(f"Precision: {precision:.4f}")
287
+ print(f"Recall: {recall:.4f}")
288
+ print(f"F1-score: {f1:.4f}")
289
+ print("\nClassification report")
290
+ print(report_text)
291
+
292
+ torch.save(model.state_dict(), output_directory_path / "bidirectionallstm_model.pt")
293
+ joblib.dump(scaler, output_directory_path / "bidirectionallstm_scaler.pkl")
294
+ joblib.dump(label_encoder, output_directory_path / "bidirectionallstm_label_encoder.pkl")
295
+
296
+ training_history = {"training_loss": training_losses, "validation_loss": validation_losses}
297
+ metrics = {
298
+ "accuracy": float(accuracy),
299
+ "precision_weighted": float(precision),
300
+ "recall_weighted": float(recall),
301
+ "f1_weighted": float(f1),
302
+ "classes": list(label_encoder.classes_),
303
+ "classification_report_text": report_text,
304
+ "confusion_matrix": matrix.tolist(),
305
+ }
306
+
307
+ pd.DataFrame({"training_loss": training_losses, "validation_loss": validation_losses}).to_csv(output_directory_path / "training_history.csv", index=False)
308
+ pd.DataFrame([{"accuracy": float(accuracy), "precision_weighted": float(precision), "recall_weighted": float(recall), "f1_weighted": float(f1)}]).to_csv(output_directory_path / "test_metrics.csv", index=False)
309
+ pd.DataFrame(matrix).to_csv(output_directory_path / "test_confusion_matrix_values.csv", index=False)
310
+
311
+ save_confusion_matrix_figure(matrix, label_encoder.classes_, output_directory_path / "test_confusion_matrix.png")
312
+ print(f"Saved artifacts to: {output_directory_path}")
313
+
314
+
315
+ if __name__ == "__main__":
316
+ main()
models/bilstm/weights/bidirectionallstm_label_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f4cfd54771b8c9476a062d20dcc10b2f1a400a916e4eafff447bae05efe17fc
3
+ size 530
models/bilstm/weights/bidirectionallstm_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4fde062aa774a488d12b3001c8e79f6090e7b53f7225cc4a582ed69cb2060a5
3
+ size 879857
models/bilstm/weights/bidirectionallstm_scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed2cfbc36c088284e8ecab353ebae20a47d0083b40b15921369593c2014c72a2
3
+ size 56775
models/bilstm/weights/similarity_centroids.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccbb641aa4eba4be0ab13ce574bfd027680f03b1c46ac95141a2376f2a5e0d14
3
+ size 38051
models/cnn_bilstm/results/test_confusion_matrix.png ADDED
models/cnn_bilstm/train.py ADDED
@@ -0,0 +1,336 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from pathlib import Path
3
+
4
+ import joblib
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+ import pandas as pd
8
+ import torch
9
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score, precision_score, recall_score
10
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
11
+ from torch import nn
12
+ from torch.utils.data import DataLoader, Dataset
13
+
14
+
15
+ def parse_args():
16
+ parser = argparse.ArgumentParser()
17
+ parser.add_argument("--train-file", default="data/train_sequences.csv")
18
+ parser.add_argument("--val-file", default="data/val_sequences.csv")
19
+ parser.add_argument("--test-file", default="data/test_internal_sequences.csv")
20
+ parser.add_argument("--output-dir", default="models/cnn_bilstm/results")
21
+ parser.add_argument("--sequence-length", type=int, default=30)
22
+ parser.add_argument("--feature-count", type=int, default=78)
23
+ parser.add_argument("--cnn-filters", type=int, default=128)
24
+ parser.add_argument("--cnn-kernel-size", type=int, default=3)
25
+ parser.add_argument("--lstm-units", type=int, default=73)
26
+ parser.add_argument("--dropout", type=float, default=0.2)
27
+ parser.add_argument("--learning-rate", type=float, default=0.0003)
28
+ parser.add_argument("--batch-size", type=int, default=54)
29
+ parser.add_argument("--epochs", type=int, default=73)
30
+ parser.add_argument("--early-stopping-patience", type=int, default=10)
31
+ parser.add_argument("--lr-plateau-patience", type=int, default=5)
32
+ parser.add_argument("--lr-plateau-factor", type=float, default=0.5)
33
+ parser.add_argument("--num-workers", type=int, default=4)
34
+ parser.add_argument("--seed", type=int, default=42)
35
+ return parser.parse_args()
36
+
37
+
38
+ class SequenceDataset(Dataset):
39
+ def __init__(self, feature_tensor, label_tensor):
40
+ self.feature_tensor = feature_tensor
41
+ self.label_tensor = label_tensor
42
+
43
+ def __len__(self):
44
+ return len(self.label_tensor)
45
+
46
+ def __getitem__(self, index):
47
+ return self.feature_tensor[index], self.label_tensor[index]
48
+
49
+
50
+ class CnnBiLstmClassifier(nn.Module):
51
+ def __init__(self, feature_count, class_count, cnn_filters, cnn_kernel_size, lstm_units, dropout_probability):
52
+ super().__init__()
53
+ cnn_padding = cnn_kernel_size // 2
54
+ self.conv1d = nn.Conv1d(in_channels=feature_count, out_channels=cnn_filters, kernel_size=cnn_kernel_size, padding=cnn_padding)
55
+ self.relu = nn.ReLU()
56
+ self.dropout1 = nn.Dropout(dropout_probability)
57
+ self.bilstm = nn.LSTM(input_size=cnn_filters, hidden_size=lstm_units, num_layers=2, batch_first=True, dropout=dropout_probability, bidirectional=True)
58
+ self.dropout2 = nn.Dropout(dropout_probability)
59
+ self.classifier = nn.Linear(lstm_units * 2, class_count)
60
+
61
+ def forward(self, input_sequence):
62
+ temporal_tensor = input_sequence.transpose(1, 2)
63
+ temporal_tensor = self.conv1d(temporal_tensor)
64
+ temporal_tensor = self.relu(temporal_tensor)
65
+ temporal_tensor = self.dropout1(temporal_tensor)
66
+ temporal_tensor = temporal_tensor.transpose(1, 2)
67
+
68
+ recurrent_output, _ = self.bilstm(temporal_tensor)
69
+ final_timestep_output = recurrent_output[:, -1, :]
70
+ dropout_output = self.dropout2(final_timestep_output)
71
+ logits = self.classifier(dropout_output)
72
+ return logits
73
+
74
+
75
+ def set_random_seed(seed):
76
+ np.random.seed(seed)
77
+ torch.manual_seed(seed)
78
+ torch.cuda.manual_seed_all(seed)
79
+
80
+
81
+ def load_sequence_table(input_file_path):
82
+ sequence_table = pd.read_csv(input_file_path)
83
+ metadata_columns = {"video_id", "exercise_label", "start_frame_index", "end_frame_index"}
84
+ flattened_feature_columns = [column_name for column_name in sequence_table.columns if column_name not in metadata_columns]
85
+ flattened_features = sequence_table[flattened_feature_columns].to_numpy(dtype=np.float32)
86
+ raw_labels = sequence_table["exercise_label"].to_numpy()
87
+ return flattened_features, raw_labels
88
+
89
+
90
+ def scale_and_reshape_features(train_features, validation_features, test_features, sequence_length, feature_count):
91
+ scaler = StandardScaler()
92
+ scaler.fit(train_features)
93
+
94
+ scaled_train = scaler.transform(train_features).reshape(-1, sequence_length, feature_count)
95
+ scaled_validation = scaler.transform(validation_features).reshape(-1, sequence_length, feature_count)
96
+ scaled_test = scaler.transform(test_features).reshape(-1, sequence_length, feature_count)
97
+
98
+ return scaled_train, scaled_validation, scaled_test, scaler
99
+
100
+
101
+ def build_dataloaders(train_features, validation_features, test_features, train_labels, validation_labels, test_labels, batch_size, num_workers):
102
+ train_feature_tensor = torch.tensor(train_features, dtype=torch.float32)
103
+ validation_feature_tensor = torch.tensor(validation_features, dtype=torch.float32)
104
+ test_feature_tensor = torch.tensor(test_features, dtype=torch.float32)
105
+
106
+ train_label_tensor = torch.tensor(train_labels, dtype=torch.long)
107
+ validation_label_tensor = torch.tensor(validation_labels, dtype=torch.long)
108
+ test_label_tensor = torch.tensor(test_labels, dtype=torch.long)
109
+
110
+ train_dataset = SequenceDataset(train_feature_tensor, train_label_tensor)
111
+ validation_dataset = SequenceDataset(validation_feature_tensor, validation_label_tensor)
112
+ test_dataset = SequenceDataset(test_feature_tensor, test_label_tensor)
113
+
114
+ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
115
+ validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
116
+ test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
117
+
118
+ return train_loader, validation_loader, test_loader
119
+
120
+
121
+ def run_training_epoch(model, data_loader, optimizer, loss_function, device):
122
+ model.train()
123
+ cumulative_loss = 0.0
124
+
125
+ for feature_batch, label_batch in data_loader:
126
+ feature_batch = feature_batch.to(device, non_blocking=True)
127
+ label_batch = label_batch.to(device, non_blocking=True)
128
+
129
+ optimizer.zero_grad(set_to_none=True)
130
+ logits = model(feature_batch)
131
+ loss = loss_function(logits, label_batch)
132
+ loss.backward()
133
+ optimizer.step()
134
+
135
+ cumulative_loss += loss.item() * feature_batch.size(0)
136
+
137
+ epoch_loss = cumulative_loss / len(data_loader.dataset)
138
+ return epoch_loss
139
+
140
+
141
+ def run_validation_epoch(model, data_loader, loss_function, device):
142
+ model.eval()
143
+ cumulative_loss = 0.0
144
+
145
+ with torch.inference_mode():
146
+ for feature_batch, label_batch in data_loader:
147
+ feature_batch = feature_batch.to(device, non_blocking=True)
148
+ label_batch = label_batch.to(device, non_blocking=True)
149
+ logits = model(feature_batch)
150
+ loss = loss_function(logits, label_batch)
151
+ cumulative_loss += loss.item() * feature_batch.size(0)
152
+
153
+ epoch_loss = cumulative_loss / len(data_loader.dataset)
154
+ return epoch_loss
155
+
156
+
157
+ def predict_labels(model, data_loader, device):
158
+ model.eval()
159
+ predicted_labels = []
160
+ true_labels = []
161
+
162
+ with torch.inference_mode():
163
+ for feature_batch, label_batch in data_loader:
164
+ feature_batch = feature_batch.to(device, non_blocking=True)
165
+ logits = model(feature_batch)
166
+ predicted_batch = torch.argmax(logits, dim=1)
167
+ predicted_labels.append(predicted_batch.cpu().numpy())
168
+ true_labels.append(label_batch.numpy())
169
+
170
+ predicted_labels = np.concatenate(predicted_labels)
171
+ true_labels = np.concatenate(true_labels)
172
+ return true_labels, predicted_labels
173
+
174
+
175
+ def save_confusion_matrix_figure(confusion_matrix_array, class_names, output_file_path):
176
+ figure = plt.figure(figsize=(8, 6))
177
+ axis = figure.add_subplot(111)
178
+ image = axis.imshow(confusion_matrix_array, interpolation="nearest", cmap="Blues")
179
+ axis.figure.colorbar(image, ax=axis)
180
+ axis.set_xticks(np.arange(len(class_names)))
181
+ axis.set_yticks(np.arange(len(class_names)))
182
+ axis.set_xticklabels(class_names, rotation=45, ha="right")
183
+ axis.set_yticklabels(class_names)
184
+ axis.set_xlabel("Predicted label")
185
+ axis.set_ylabel("True label")
186
+ axis.set_title("Test Confusion Matrix")
187
+
188
+ threshold = confusion_matrix_array.max() / 2.0 if confusion_matrix_array.size > 0 else 0.0
189
+ for row_index in range(confusion_matrix_array.shape[0]):
190
+ for column_index in range(confusion_matrix_array.shape[1]):
191
+ value = confusion_matrix_array[row_index, column_index]
192
+ color = "white" if value > threshold else "black"
193
+ axis.text(column_index, row_index, str(value), ha="center", va="center", color=color)
194
+
195
+ figure.tight_layout()
196
+ output_file_path.parent.mkdir(parents=True, exist_ok=True)
197
+ figure.savefig(output_file_path, dpi=180)
198
+ plt.close(figure)
199
+
200
+
201
+ def main():
202
+ args = parse_args()
203
+
204
+ train_file_path = Path(args.train_file)
205
+ validation_file_path = Path(args.val_file)
206
+ test_file_path = Path(args.test_file)
207
+ output_directory_path = Path(args.output_dir)
208
+ output_directory_path.mkdir(parents=True, exist_ok=True)
209
+
210
+ sequence_length = args.sequence_length
211
+ feature_count = args.feature_count
212
+ cnn_filters = args.cnn_filters
213
+ cnn_kernel_size = args.cnn_kernel_size
214
+ lstm_units = args.lstm_units
215
+ dropout_probability = args.dropout
216
+ learning_rate = args.learning_rate
217
+ batch_size = args.batch_size
218
+ maximum_epochs = args.epochs
219
+ early_stopping_patience = args.early_stopping_patience
220
+ lr_plateau_patience = args.lr_plateau_patience
221
+ lr_plateau_factor = args.lr_plateau_factor
222
+ num_workers = args.num_workers
223
+ seed = args.seed
224
+
225
+ set_random_seed(seed)
226
+
227
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
228
+ print(f"Using device: {device}")
229
+
230
+ train_features, train_raw_labels = load_sequence_table(train_file_path)
231
+ validation_features, validation_raw_labels = load_sequence_table(validation_file_path)
232
+ test_features, test_raw_labels = load_sequence_table(test_file_path)
233
+
234
+ label_encoder = LabelEncoder()
235
+ label_encoder.fit(train_raw_labels)
236
+ train_labels = label_encoder.transform(train_raw_labels)
237
+ validation_labels = label_encoder.transform(validation_raw_labels)
238
+ test_labels = label_encoder.transform(test_raw_labels)
239
+
240
+ scaled_train, scaled_validation, scaled_test, scaler = scale_and_reshape_features(
241
+ train_features=train_features,
242
+ validation_features=validation_features,
243
+ test_features=test_features,
244
+ sequence_length=sequence_length,
245
+ feature_count=feature_count,
246
+ )
247
+
248
+ train_loader, validation_loader, test_loader = build_dataloaders(
249
+ train_features=scaled_train,
250
+ validation_features=scaled_validation,
251
+ test_features=scaled_test,
252
+ train_labels=train_labels,
253
+ validation_labels=validation_labels,
254
+ test_labels=test_labels,
255
+ batch_size=batch_size,
256
+ num_workers=num_workers,
257
+ )
258
+
259
+ class_count = len(label_encoder.classes_)
260
+ model = CnnBiLstmClassifier(feature_count, class_count, cnn_filters, cnn_kernel_size, lstm_units, dropout_probability).to(device)
261
+ optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
262
+ scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=lr_plateau_factor, patience=lr_plateau_patience)
263
+ loss_function = nn.CrossEntropyLoss()
264
+
265
+ training_losses = []
266
+ validation_losses = []
267
+ best_validation_loss = float("inf")
268
+ best_model_state = None
269
+ epochs_without_improvement = 0
270
+
271
+ for epoch_index in range(maximum_epochs):
272
+ training_loss = run_training_epoch(model, train_loader, optimizer, loss_function, device)
273
+ validation_loss = run_validation_epoch(model, validation_loader, loss_function, device)
274
+ scheduler.step(validation_loss)
275
+
276
+ training_losses.append(training_loss)
277
+ validation_losses.append(validation_loss)
278
+
279
+ print(f"Epoch {epoch_index + 1}/{maximum_epochs} - train_loss: {training_loss:.6f} - val_loss: {validation_loss:.6f}")
280
+
281
+ if validation_loss < best_validation_loss:
282
+ best_validation_loss = validation_loss
283
+ best_model_state = {key: value.detach().cpu().clone() for key, value in model.state_dict().items()}
284
+ epochs_without_improvement = 0
285
+ else:
286
+ epochs_without_improvement += 1
287
+
288
+ if epochs_without_improvement >= early_stopping_patience:
289
+ print("Early stopping triggered.")
290
+ break
291
+
292
+ if best_model_state is not None:
293
+ model.load_state_dict(best_model_state)
294
+
295
+ test_true_labels, test_predicted_labels = predict_labels(model, test_loader, device)
296
+
297
+ accuracy = accuracy_score(test_true_labels, test_predicted_labels)
298
+ precision = precision_score(test_true_labels, test_predicted_labels, average="weighted", zero_division=0)
299
+ recall = recall_score(test_true_labels, test_predicted_labels, average="weighted", zero_division=0)
300
+ f1 = f1_score(test_true_labels, test_predicted_labels, average="weighted", zero_division=0)
301
+ report_text = classification_report(test_true_labels, test_predicted_labels, target_names=label_encoder.classes_, zero_division=0)
302
+ matrix = confusion_matrix(test_true_labels, test_predicted_labels)
303
+
304
+ print("\nTest metrics")
305
+ print(f"Accuracy: {accuracy:.4f}")
306
+ print(f"Precision: {precision:.4f}")
307
+ print(f"Recall: {recall:.4f}")
308
+ print(f"F1-score: {f1:.4f}")
309
+ print("\nClassification report")
310
+ print(report_text)
311
+
312
+ torch.save(model.state_dict(), output_directory_path / "cnn_bilstm_model.pt")
313
+ joblib.dump(scaler, output_directory_path / "cnn_bilstm_scaler.pkl")
314
+ joblib.dump(label_encoder, output_directory_path / "cnn_bilstm_label_encoder.pkl")
315
+
316
+ training_history = {"training_loss": training_losses, "validation_loss": validation_losses}
317
+ metrics = {
318
+ "accuracy": float(accuracy),
319
+ "precision_weighted": float(precision),
320
+ "recall_weighted": float(recall),
321
+ "f1_weighted": float(f1),
322
+ "classes": list(label_encoder.classes_),
323
+ "classification_report_text": report_text,
324
+ "confusion_matrix": matrix.tolist(),
325
+ }
326
+
327
+ pd.DataFrame({"training_loss": training_losses, "validation_loss": validation_losses}).to_csv(output_directory_path / "training_history.csv", index=False)
328
+ pd.DataFrame([{"accuracy": float(accuracy), "precision_weighted": float(precision), "recall_weighted": float(recall), "f1_weighted": float(f1)}]).to_csv(output_directory_path / "test_metrics.csv", index=False)
329
+ pd.DataFrame(matrix).to_csv(output_directory_path / "test_confusion_matrix_values.csv", index=False)
330
+
331
+ save_confusion_matrix_figure(matrix, label_encoder.classes_, output_directory_path / "test_confusion_matrix.png")
332
+ print(f"Saved artifacts to: {output_directory_path}")
333
+
334
+
335
+ if __name__ == "__main__":
336
+ main()
models/cnn_bilstm/weights/cnn_bilstm_label_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f4cfd54771b8c9476a062d20dcc10b2f1a400a916e4eafff447bae05efe17fc
3
+ size 530
models/cnn_bilstm/weights/cnn_bilstm_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88eb1d8849497db61267176948f897f9d9cb33f9529d4c443140d02564b8c203
3
+ size 1117500
models/cnn_bilstm/weights/cnn_bilstm_scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed2cfbc36c088284e8ecab353ebae20a47d0083b40b15921369593c2014c72a2
3
+ size 56775
models/cnn_bilstm/weights/similarity_centroids.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5647b55236903f8dc65b7cc42678789970988e162a8cb212cd209d014be13d29
3
+ size 38051
models/gru/results/test_confusion_matrix.png ADDED
models/gru/train.py ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from pathlib import Path
3
+
4
+ import joblib
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+ import pandas as pd
8
+ import torch
9
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score, precision_score, recall_score
10
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
11
+ from torch import nn
12
+ from torch.utils.data import DataLoader, Dataset
13
+
14
+
15
+ def parse_args():
16
+ parser = argparse.ArgumentParser()
17
+ parser.add_argument("--train-file", default="data/train_sequences.csv")
18
+ parser.add_argument("--val-file", default="data/val_sequences.csv")
19
+ parser.add_argument("--test-file", default="data/test_internal_sequences.csv")
20
+ parser.add_argument("--output-dir", default="models/gru/results")
21
+ parser.add_argument("--sequence-length", type=int, default=30)
22
+ parser.add_argument("--feature-count", type=int, default=78)
23
+ parser.add_argument("--units", type=int, default=96)
24
+ parser.add_argument("--dropout", type=float, default=0.2)
25
+ parser.add_argument("--learning-rate", type=float, default=0.0003)
26
+ parser.add_argument("--batch-size", type=int, default=54)
27
+ parser.add_argument("--epochs", type=int, default=73)
28
+ parser.add_argument("--early-stopping-patience", type=int, default=10)
29
+ parser.add_argument("--lr-plateau-patience", type=int, default=5)
30
+ parser.add_argument("--lr-plateau-factor", type=float, default=0.5)
31
+ parser.add_argument("--num-workers", type=int, default=4)
32
+ parser.add_argument("--seed", type=int, default=42)
33
+ return parser.parse_args()
34
+
35
+
36
+ class SequenceDataset(Dataset):
37
+ def __init__(self, feature_tensor, label_tensor):
38
+ self.feature_tensor = feature_tensor
39
+ self.label_tensor = label_tensor
40
+
41
+ def __len__(self):
42
+ return len(self.label_tensor)
43
+
44
+ def __getitem__(self, index):
45
+ return self.feature_tensor[index], self.label_tensor[index]
46
+
47
+
48
+ class GruClassifier(nn.Module):
49
+ def __init__(self, feature_count, hidden_size, class_count, dropout_probability):
50
+ super().__init__()
51
+ self.gru = nn.GRU(input_size=feature_count, hidden_size=hidden_size, num_layers=2, batch_first=True, dropout=dropout_probability, bidirectional=False)
52
+ self.dropout = nn.Dropout(dropout_probability)
53
+ self.classifier = nn.Linear(hidden_size, class_count)
54
+
55
+ def forward(self, input_sequence):
56
+ recurrent_output, _ = self.gru(input_sequence)
57
+ final_timestep_output = recurrent_output[:, -1, :]
58
+ dropout_output = self.dropout(final_timestep_output)
59
+ logits = self.classifier(dropout_output)
60
+ return logits
61
+
62
+
63
+ def set_random_seed(seed):
64
+ np.random.seed(seed)
65
+ torch.manual_seed(seed)
66
+ torch.cuda.manual_seed_all(seed)
67
+
68
+
69
+ def load_sequence_table(input_file_path):
70
+ sequence_table = pd.read_csv(input_file_path)
71
+ metadata_columns = {"video_id", "exercise_label", "start_frame_index", "end_frame_index"}
72
+ flattened_feature_columns = [column_name for column_name in sequence_table.columns if column_name not in metadata_columns]
73
+ flattened_features = sequence_table[flattened_feature_columns].to_numpy(dtype=np.float32)
74
+ raw_labels = sequence_table["exercise_label"].to_numpy()
75
+ return flattened_features, raw_labels
76
+
77
+
78
+ def scale_and_reshape_features(train_features, validation_features, test_features, sequence_length, feature_count):
79
+ scaler = StandardScaler()
80
+ scaler.fit(train_features)
81
+
82
+ scaled_train = scaler.transform(train_features).reshape(-1, sequence_length, feature_count)
83
+ scaled_validation = scaler.transform(validation_features).reshape(-1, sequence_length, feature_count)
84
+ scaled_test = scaler.transform(test_features).reshape(-1, sequence_length, feature_count)
85
+
86
+ return scaled_train, scaled_validation, scaled_test, scaler
87
+
88
+
89
+ def build_dataloaders(train_features, validation_features, test_features, train_labels, validation_labels, test_labels, batch_size, num_workers):
90
+ train_feature_tensor = torch.tensor(train_features, dtype=torch.float32)
91
+ validation_feature_tensor = torch.tensor(validation_features, dtype=torch.float32)
92
+ test_feature_tensor = torch.tensor(test_features, dtype=torch.float32)
93
+
94
+ train_label_tensor = torch.tensor(train_labels, dtype=torch.long)
95
+ validation_label_tensor = torch.tensor(validation_labels, dtype=torch.long)
96
+ test_label_tensor = torch.tensor(test_labels, dtype=torch.long)
97
+
98
+ train_dataset = SequenceDataset(train_feature_tensor, train_label_tensor)
99
+ validation_dataset = SequenceDataset(validation_feature_tensor, validation_label_tensor)
100
+ test_dataset = SequenceDataset(test_feature_tensor, test_label_tensor)
101
+
102
+ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
103
+ validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
104
+ test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
105
+
106
+ return train_loader, validation_loader, test_loader
107
+
108
+
109
+ def run_training_epoch(model, data_loader, optimizer, loss_function, device):
110
+ model.train()
111
+ cumulative_loss = 0.0
112
+
113
+ for feature_batch, label_batch in data_loader:
114
+ feature_batch = feature_batch.to(device, non_blocking=True)
115
+ label_batch = label_batch.to(device, non_blocking=True)
116
+
117
+ optimizer.zero_grad(set_to_none=True)
118
+ logits = model(feature_batch)
119
+ loss = loss_function(logits, label_batch)
120
+ loss.backward()
121
+ optimizer.step()
122
+
123
+ cumulative_loss += loss.item() * feature_batch.size(0)
124
+
125
+ epoch_loss = cumulative_loss / len(data_loader.dataset)
126
+ return epoch_loss
127
+
128
+
129
+ def run_validation_epoch(model, data_loader, loss_function, device):
130
+ model.eval()
131
+ cumulative_loss = 0.0
132
+
133
+ with torch.inference_mode():
134
+ for feature_batch, label_batch in data_loader:
135
+ feature_batch = feature_batch.to(device, non_blocking=True)
136
+ label_batch = label_batch.to(device, non_blocking=True)
137
+ logits = model(feature_batch)
138
+ loss = loss_function(logits, label_batch)
139
+ cumulative_loss += loss.item() * feature_batch.size(0)
140
+
141
+ epoch_loss = cumulative_loss / len(data_loader.dataset)
142
+ return epoch_loss
143
+
144
+
145
+ def predict_labels(model, data_loader, device):
146
+ model.eval()
147
+ predicted_labels = []
148
+ true_labels = []
149
+
150
+ with torch.inference_mode():
151
+ for feature_batch, label_batch in data_loader:
152
+ feature_batch = feature_batch.to(device, non_blocking=True)
153
+ logits = model(feature_batch)
154
+ predicted_batch = torch.argmax(logits, dim=1)
155
+ predicted_labels.append(predicted_batch.cpu().numpy())
156
+ true_labels.append(label_batch.numpy())
157
+
158
+ predicted_labels = np.concatenate(predicted_labels)
159
+ true_labels = np.concatenate(true_labels)
160
+ return true_labels, predicted_labels
161
+
162
+
163
+ def save_confusion_matrix_figure(confusion_matrix_array, class_names, output_file_path):
164
+ figure = plt.figure(figsize=(8, 6))
165
+ axis = figure.add_subplot(111)
166
+ image = axis.imshow(confusion_matrix_array, interpolation="nearest", cmap="Blues")
167
+ axis.figure.colorbar(image, ax=axis)
168
+ axis.set_xticks(np.arange(len(class_names)))
169
+ axis.set_yticks(np.arange(len(class_names)))
170
+ axis.set_xticklabels(class_names, rotation=45, ha="right")
171
+ axis.set_yticklabels(class_names)
172
+ axis.set_xlabel("Predicted label")
173
+ axis.set_ylabel("True label")
174
+ axis.set_title("Test Confusion Matrix")
175
+
176
+ threshold = confusion_matrix_array.max() / 2.0 if confusion_matrix_array.size > 0 else 0.0
177
+ for row_index in range(confusion_matrix_array.shape[0]):
178
+ for column_index in range(confusion_matrix_array.shape[1]):
179
+ value = confusion_matrix_array[row_index, column_index]
180
+ color = "white" if value > threshold else "black"
181
+ axis.text(column_index, row_index, str(value), ha="center", va="center", color=color)
182
+
183
+ figure.tight_layout()
184
+ output_file_path.parent.mkdir(parents=True, exist_ok=True)
185
+ figure.savefig(output_file_path, dpi=180)
186
+ plt.close(figure)
187
+
188
+
189
+ def main():
190
+ args = parse_args()
191
+
192
+ train_file_path = Path(args.train_file)
193
+ validation_file_path = Path(args.val_file)
194
+ test_file_path = Path(args.test_file)
195
+ output_directory_path = Path(args.output_dir)
196
+ output_directory_path.mkdir(parents=True, exist_ok=True)
197
+
198
+ sequence_length = args.sequence_length
199
+ feature_count = args.feature_count
200
+ hidden_size = args.units
201
+ dropout_probability = args.dropout
202
+ learning_rate = args.learning_rate
203
+ batch_size = args.batch_size
204
+ maximum_epochs = args.epochs
205
+ early_stopping_patience = args.early_stopping_patience
206
+ lr_plateau_patience = args.lr_plateau_patience
207
+ lr_plateau_factor = args.lr_plateau_factor
208
+ num_workers = args.num_workers
209
+ seed = args.seed
210
+
211
+ set_random_seed(seed)
212
+
213
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
214
+ print(f"Using device: {device}")
215
+
216
+ train_features, train_raw_labels = load_sequence_table(train_file_path)
217
+ validation_features, validation_raw_labels = load_sequence_table(validation_file_path)
218
+ test_features, test_raw_labels = load_sequence_table(test_file_path)
219
+
220
+ label_encoder = LabelEncoder()
221
+ label_encoder.fit(train_raw_labels)
222
+ train_labels = label_encoder.transform(train_raw_labels)
223
+ validation_labels = label_encoder.transform(validation_raw_labels)
224
+ test_labels = label_encoder.transform(test_raw_labels)
225
+
226
+ scaled_train, scaled_validation, scaled_test, scaler = scale_and_reshape_features(
227
+ train_features=train_features,
228
+ validation_features=validation_features,
229
+ test_features=test_features,
230
+ sequence_length=sequence_length,
231
+ feature_count=feature_count,
232
+ )
233
+
234
+ train_loader, validation_loader, test_loader = build_dataloaders(
235
+ train_features=scaled_train,
236
+ validation_features=scaled_validation,
237
+ test_features=scaled_test,
238
+ train_labels=train_labels,
239
+ validation_labels=validation_labels,
240
+ test_labels=test_labels,
241
+ batch_size=batch_size,
242
+ num_workers=num_workers,
243
+ )
244
+
245
+ class_count = len(label_encoder.classes_)
246
+ model = GruClassifier(feature_count, hidden_size, class_count, dropout_probability).to(device)
247
+ optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
248
+ scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=lr_plateau_factor, patience=lr_plateau_patience)
249
+ loss_function = nn.CrossEntropyLoss()
250
+
251
+ training_losses = []
252
+ validation_losses = []
253
+ best_validation_loss = float("inf")
254
+ best_model_state = None
255
+ epochs_without_improvement = 0
256
+
257
+ for epoch_index in range(maximum_epochs):
258
+ training_loss = run_training_epoch(model, train_loader, optimizer, loss_function, device)
259
+ validation_loss = run_validation_epoch(model, validation_loader, loss_function, device)
260
+ scheduler.step(validation_loss)
261
+
262
+ training_losses.append(training_loss)
263
+ validation_losses.append(validation_loss)
264
+
265
+ print(f"Epoch {epoch_index + 1}/{maximum_epochs} - train_loss: {training_loss:.6f} - val_loss: {validation_loss:.6f}")
266
+
267
+ if validation_loss < best_validation_loss:
268
+ best_validation_loss = validation_loss
269
+ best_model_state = {key: value.detach().cpu().clone() for key, value in model.state_dict().items()}
270
+ epochs_without_improvement = 0
271
+ else:
272
+ epochs_without_improvement += 1
273
+
274
+ if epochs_without_improvement >= early_stopping_patience:
275
+ print("Early stopping triggered.")
276
+ break
277
+
278
+ if best_model_state is not None:
279
+ model.load_state_dict(best_model_state)
280
+
281
+ test_true_labels, test_predicted_labels = predict_labels(model, test_loader, device)
282
+
283
+ accuracy = accuracy_score(test_true_labels, test_predicted_labels)
284
+ precision = precision_score(test_true_labels, test_predicted_labels, average="weighted", zero_division=0)
285
+ recall = recall_score(test_true_labels, test_predicted_labels, average="weighted", zero_division=0)
286
+ f1 = f1_score(test_true_labels, test_predicted_labels, average="weighted", zero_division=0)
287
+ report_text = classification_report(test_true_labels, test_predicted_labels, target_names=label_encoder.classes_, zero_division=0)
288
+ matrix = confusion_matrix(test_true_labels, test_predicted_labels)
289
+
290
+ print("\nTest metrics")
291
+ print(f"Accuracy: {accuracy:.4f}")
292
+ print(f"Precision: {precision:.4f}")
293
+ print(f"Recall: {recall:.4f}")
294
+ print(f"F1-score: {f1:.4f}")
295
+ print("\nClassification report")
296
+ print(report_text)
297
+
298
+ torch.save(model.state_dict(), output_directory_path / "gru_model.pt")
299
+ joblib.dump(scaler, output_directory_path / "gru_scaler.pkl")
300
+ joblib.dump(label_encoder, output_directory_path / "gru_label_encoder.pkl")
301
+
302
+ training_history = {"training_loss": training_losses, "validation_loss": validation_losses}
303
+ metrics = {
304
+ "accuracy": float(accuracy),
305
+ "precision_weighted": float(precision),
306
+ "recall_weighted": float(recall),
307
+ "f1_weighted": float(f1),
308
+ "classes": list(label_encoder.classes_),
309
+ "classification_report_text": report_text,
310
+ "confusion_matrix": matrix.tolist(),
311
+ }
312
+
313
+ pd.DataFrame({"training_loss": training_losses, "validation_loss": validation_losses}).to_csv(output_directory_path / "training_history.csv", index=False)
314
+ pd.DataFrame([{"accuracy": float(accuracy), "precision_weighted": float(precision), "recall_weighted": float(recall), "f1_weighted": float(f1)}]).to_csv(output_directory_path / "test_metrics.csv", index=False)
315
+ pd.DataFrame(matrix).to_csv(output_directory_path / "test_confusion_matrix_values.csv", index=False)
316
+
317
+ save_confusion_matrix_figure(matrix, label_encoder.classes_, output_directory_path / "test_confusion_matrix.png")
318
+ print(f"Saved artifacts to: {output_directory_path}")
319
+
320
+
321
+ if __name__ == "__main__":
322
+ main()
models/gru/weights/gru_label_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f4cfd54771b8c9476a062d20dcc10b2f1a400a916e4eafff447bae05efe17fc
3
+ size 530
models/gru/weights/gru_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d764464b4740af3cac740c2ad2857bbbb3ef4cf2db2ed93a0839cafc174ec22
3
+ size 431909
models/gru/weights/gru_scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed2cfbc36c088284e8ecab353ebae20a47d0083b40b15921369593c2014c72a2
3
+ size 56775
models/gru/weights/similarity_centroids.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed435c695fb88da13a6c2eceea9b8447651745e07a84aeb7bae38a62ddfcd67f
3
+ size 38051
models/lstm/results/test_confusion_matrix.png ADDED
models/lstm/train.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from pathlib import Path
3
+
4
+ import joblib
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+ import pandas as pd
8
+ import torch
9
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score, precision_score, recall_score
10
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
11
+ from torch import nn
12
+ from torch.utils.data import DataLoader, Dataset
13
+
14
+
15
+ def parse_args():
16
+ parser = argparse.ArgumentParser()
17
+ parser.add_argument("--train-file", default="data/train_sequences.csv")
18
+ parser.add_argument("--val-file", default="data/val_sequences.csv")
19
+ parser.add_argument("--test-file", default="data/test_internal_sequences.csv")
20
+ parser.add_argument("--output-dir", default="models/lstm/results")
21
+ parser.add_argument("--sequence-length", type=int, default=30)
22
+ parser.add_argument("--feature-count", type=int, default=78)
23
+ parser.add_argument("--units", type=int, default=117)
24
+ parser.add_argument("--dropout", type=float, default=0.3829)
25
+ parser.add_argument("--learning-rate", type=float, default=0.0001)
26
+ parser.add_argument("--batch-size", type=int, default=38)
27
+ parser.add_argument("--epochs", type=int, default=57)
28
+ parser.add_argument("--early-stopping-patience", type=int, default=10)
29
+ parser.add_argument("--lr-plateau-patience", type=int, default=5)
30
+ parser.add_argument("--lr-plateau-factor", type=float, default=0.5)
31
+ parser.add_argument("--num-workers", type=int, default=4)
32
+ parser.add_argument("--seed", type=int, default=42)
33
+ return parser.parse_args()
34
+
35
+
36
+ class SequenceDataset(Dataset):
37
+ def __init__(self, feature_tensor, label_tensor):
38
+ self.feature_tensor = feature_tensor
39
+ self.label_tensor = label_tensor
40
+
41
+ def __len__(self):
42
+ return len(self.label_tensor)
43
+
44
+ def __getitem__(self, index):
45
+ return self.feature_tensor[index], self.label_tensor[index]
46
+
47
+
48
+ class LstmClassifier(nn.Module):
49
+ def __init__(self, feature_count, hidden_size, class_count, dropout_probability):
50
+ super().__init__()
51
+ self.lstm = nn.LSTM(input_size=feature_count, hidden_size=hidden_size, num_layers=2, batch_first=True, dropout=dropout_probability, bidirectional=False)
52
+ self.dropout = nn.Dropout(dropout_probability)
53
+ self.classifier = nn.Linear(hidden_size, class_count)
54
+
55
+ def forward(self, input_sequence):
56
+ recurrent_output, _ = self.lstm(input_sequence)
57
+ final_timestep_output = recurrent_output[:, -1, :]
58
+ dropout_output = self.dropout(final_timestep_output)
59
+ logits = self.classifier(dropout_output)
60
+ return logits
61
+
62
+
63
+ def set_random_seed(seed):
64
+ np.random.seed(seed)
65
+ torch.manual_seed(seed)
66
+ torch.cuda.manual_seed_all(seed)
67
+
68
+
69
+ def load_sequence_table(input_file_path):
70
+ sequence_table = pd.read_csv(input_file_path)
71
+ metadata_columns = {"video_id", "exercise_label", "start_frame_index", "end_frame_index"}
72
+ flattened_feature_columns = [column_name for column_name in sequence_table.columns if column_name not in metadata_columns]
73
+ flattened_features = sequence_table[flattened_feature_columns].to_numpy(dtype=np.float32)
74
+ raw_labels = sequence_table["exercise_label"].to_numpy()
75
+ return flattened_features, raw_labels
76
+
77
+
78
+ def scale_and_reshape_features(train_features, validation_features, test_features, sequence_length, feature_count):
79
+ scaler = StandardScaler()
80
+ scaler.fit(train_features)
81
+
82
+ scaled_train = scaler.transform(train_features).reshape(-1, sequence_length, feature_count)
83
+ scaled_validation = scaler.transform(validation_features).reshape(-1, sequence_length, feature_count)
84
+ scaled_test = scaler.transform(test_features).reshape(-1, sequence_length, feature_count)
85
+
86
+ return scaled_train, scaled_validation, scaled_test, scaler
87
+
88
+
89
+ def build_dataloaders(train_features, validation_features, test_features, train_labels, validation_labels, test_labels, batch_size, num_workers):
90
+ train_feature_tensor = torch.tensor(train_features, dtype=torch.float32)
91
+ validation_feature_tensor = torch.tensor(validation_features, dtype=torch.float32)
92
+ test_feature_tensor = torch.tensor(test_features, dtype=torch.float32)
93
+
94
+ train_label_tensor = torch.tensor(train_labels, dtype=torch.long)
95
+ validation_label_tensor = torch.tensor(validation_labels, dtype=torch.long)
96
+ test_label_tensor = torch.tensor(test_labels, dtype=torch.long)
97
+
98
+ train_dataset = SequenceDataset(train_feature_tensor, train_label_tensor)
99
+ validation_dataset = SequenceDataset(validation_feature_tensor, validation_label_tensor)
100
+ test_dataset = SequenceDataset(test_feature_tensor, test_label_tensor)
101
+
102
+ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
103
+ validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
104
+ test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
105
+
106
+ return train_loader, validation_loader, test_loader
107
+
108
+
109
+ def run_training_epoch(model, data_loader, optimizer, loss_function, device):
110
+ model.train()
111
+ cumulative_loss = 0.0
112
+
113
+ for feature_batch, label_batch in data_loader:
114
+ feature_batch = feature_batch.to(device, non_blocking=True)
115
+ label_batch = label_batch.to(device, non_blocking=True)
116
+
117
+ optimizer.zero_grad(set_to_none=True)
118
+ logits = model(feature_batch)
119
+ loss = loss_function(logits, label_batch)
120
+ loss.backward()
121
+ optimizer.step()
122
+
123
+ cumulative_loss += loss.item() * feature_batch.size(0)
124
+
125
+ epoch_loss = cumulative_loss / len(data_loader.dataset)
126
+ return epoch_loss
127
+
128
+
129
+ def run_validation_epoch(model, data_loader, loss_function, device):
130
+ model.eval()
131
+ cumulative_loss = 0.0
132
+
133
+ with torch.inference_mode():
134
+ for feature_batch, label_batch in data_loader:
135
+ feature_batch = feature_batch.to(device, non_blocking=True)
136
+ label_batch = label_batch.to(device, non_blocking=True)
137
+ logits = model(feature_batch)
138
+ loss = loss_function(logits, label_batch)
139
+ cumulative_loss += loss.item() * feature_batch.size(0)
140
+
141
+ epoch_loss = cumulative_loss / len(data_loader.dataset)
142
+ return epoch_loss
143
+
144
+
145
+ def predict_labels(model, data_loader, device):
146
+ model.eval()
147
+ predicted_labels = []
148
+ true_labels = []
149
+
150
+ with torch.inference_mode():
151
+ for feature_batch, label_batch in data_loader:
152
+ feature_batch = feature_batch.to(device, non_blocking=True)
153
+ logits = model(feature_batch)
154
+ predicted_batch = torch.argmax(logits, dim=1)
155
+ predicted_labels.append(predicted_batch.cpu().numpy())
156
+ true_labels.append(label_batch.numpy())
157
+
158
+ predicted_labels = np.concatenate(predicted_labels)
159
+ true_labels = np.concatenate(true_labels)
160
+ return true_labels, predicted_labels
161
+
162
+
163
+ def save_confusion_matrix_figure(confusion_matrix_array, class_names, output_file_path):
164
+ figure = plt.figure(figsize=(8, 6))
165
+ axis = figure.add_subplot(111)
166
+ image = axis.imshow(confusion_matrix_array, interpolation="nearest", cmap="Blues")
167
+ axis.figure.colorbar(image, ax=axis)
168
+ axis.set_xticks(np.arange(len(class_names)))
169
+ axis.set_yticks(np.arange(len(class_names)))
170
+ axis.set_xticklabels(class_names, rotation=45, ha="right")
171
+ axis.set_yticklabels(class_names)
172
+ axis.set_xlabel("Predicted label")
173
+ axis.set_ylabel("True label")
174
+ axis.set_title("Test Confusion Matrix")
175
+
176
+ threshold = confusion_matrix_array.max() / 2.0 if confusion_matrix_array.size > 0 else 0.0
177
+ for row_index in range(confusion_matrix_array.shape[0]):
178
+ for column_index in range(confusion_matrix_array.shape[1]):
179
+ value = confusion_matrix_array[row_index, column_index]
180
+ color = "white" if value > threshold else "black"
181
+ axis.text(column_index, row_index, str(value), ha="center", va="center", color=color)
182
+
183
+ figure.tight_layout()
184
+ output_file_path.parent.mkdir(parents=True, exist_ok=True)
185
+ figure.savefig(output_file_path, dpi=180)
186
+ plt.close(figure)
187
+
188
+
189
+ def main():
190
+ args = parse_args()
191
+
192
+ train_file_path = Path(args.train_file)
193
+ validation_file_path = Path(args.val_file)
194
+ test_file_path = Path(args.test_file)
195
+ output_directory_path = Path(args.output_dir)
196
+ output_directory_path.mkdir(parents=True, exist_ok=True)
197
+
198
+ sequence_length = args.sequence_length
199
+ feature_count = args.feature_count
200
+ hidden_size = args.units
201
+ dropout_probability = args.dropout
202
+ learning_rate = args.learning_rate
203
+ batch_size = args.batch_size
204
+ maximum_epochs = args.epochs
205
+ early_stopping_patience = args.early_stopping_patience
206
+ lr_plateau_patience = args.lr_plateau_patience
207
+ lr_plateau_factor = args.lr_plateau_factor
208
+ num_workers = args.num_workers
209
+ seed = args.seed
210
+
211
+ set_random_seed(seed)
212
+
213
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
214
+ print(f"Using device: {device}")
215
+
216
+ train_features, train_raw_labels = load_sequence_table(train_file_path)
217
+ validation_features, validation_raw_labels = load_sequence_table(validation_file_path)
218
+ test_features, test_raw_labels = load_sequence_table(test_file_path)
219
+
220
+ label_encoder = LabelEncoder()
221
+ label_encoder.fit(train_raw_labels)
222
+ train_labels = label_encoder.transform(train_raw_labels)
223
+ validation_labels = label_encoder.transform(validation_raw_labels)
224
+ test_labels = label_encoder.transform(test_raw_labels)
225
+
226
+ scaled_train, scaled_validation, scaled_test, scaler = scale_and_reshape_features(train_features, validation_features, test_features, sequence_length, feature_count)
227
+
228
+ train_loader, validation_loader, test_loader = build_dataloaders(
229
+ train_features=scaled_train,
230
+ validation_features=scaled_validation,
231
+ test_features=scaled_test,
232
+ train_labels=train_labels,
233
+ validation_labels=validation_labels,
234
+ test_labels=test_labels,
235
+ batch_size=batch_size,
236
+ num_workers=num_workers,
237
+ )
238
+
239
+ class_count = len(label_encoder.classes_)
240
+ model = LstmClassifier(feature_count, hidden_size, class_count, dropout_probability).to(device)
241
+ optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
242
+ scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=lr_plateau_factor, patience=lr_plateau_patience)
243
+ loss_function = nn.CrossEntropyLoss()
244
+
245
+ training_losses = []
246
+ validation_losses = []
247
+ best_validation_loss = float("inf")
248
+ best_model_state = None
249
+ epochs_without_improvement = 0
250
+
251
+ for epoch_index in range(maximum_epochs):
252
+ training_loss = run_training_epoch(model, train_loader, optimizer, loss_function, device)
253
+ validation_loss = run_validation_epoch(model, validation_loader, loss_function, device)
254
+ scheduler.step(validation_loss)
255
+
256
+ training_losses.append(training_loss)
257
+ validation_losses.append(validation_loss)
258
+
259
+ print(f"Epoch {epoch_index + 1}/{maximum_epochs} - train_loss: {training_loss:.6f} - val_loss: {validation_loss:.6f}")
260
+
261
+ if validation_loss < best_validation_loss:
262
+ best_validation_loss = validation_loss
263
+ best_model_state = {key: value.detach().cpu().clone() for key, value in model.state_dict().items()}
264
+ epochs_without_improvement = 0
265
+ else:
266
+ epochs_without_improvement += 1
267
+
268
+ if epochs_without_improvement >= early_stopping_patience:
269
+ print("Early stopping triggered.")
270
+ break
271
+
272
+ if best_model_state is not None:
273
+ model.load_state_dict(best_model_state)
274
+
275
+ test_true_labels, test_predicted_labels = predict_labels(model, test_loader, device)
276
+
277
+ accuracy = accuracy_score(test_true_labels, test_predicted_labels)
278
+ precision = precision_score(test_true_labels, test_predicted_labels, average="weighted", zero_division=0)
279
+ recall = recall_score(test_true_labels, test_predicted_labels, average="weighted", zero_division=0)
280
+ f1 = f1_score(test_true_labels, test_predicted_labels, average="weighted", zero_division=0)
281
+ report_text = classification_report(test_true_labels, test_predicted_labels, target_names=label_encoder.classes_, zero_division=0)
282
+ matrix = confusion_matrix(test_true_labels, test_predicted_labels)
283
+
284
+ print("\nTest metrics")
285
+ print(f"Accuracy: {accuracy:.4f}")
286
+ print(f"Precision: {precision:.4f}")
287
+ print(f"Recall: {recall:.4f}")
288
+ print(f"F1-score: {f1:.4f}")
289
+ print("\nClassification report")
290
+ print(report_text)
291
+
292
+ torch.save(model.state_dict(), output_directory_path / "lstm_model.pt")
293
+ joblib.dump(scaler, output_directory_path / "lstm_scaler.pkl")
294
+ joblib.dump(label_encoder, output_directory_path / "lstm_label_encoder.pkl")
295
+
296
+ training_history = {"training_loss": training_losses, "validation_loss": validation_losses}
297
+ metrics = {
298
+ "accuracy": float(accuracy),
299
+ "precision_weighted": float(precision),
300
+ "recall_weighted": float(recall),
301
+ "f1_weighted": float(f1),
302
+ "classes": list(label_encoder.classes_),
303
+ "classification_report_text": report_text,
304
+ "confusion_matrix": matrix.tolist(),
305
+ }
306
+
307
+ pd.DataFrame({"training_loss": training_losses, "validation_loss": validation_losses}).to_csv(output_directory_path / "training_history.csv", index=False)
308
+ pd.DataFrame([{"accuracy": float(accuracy), "precision_weighted": float(precision), "recall_weighted": float(recall), "f1_weighted": float(f1)}]).to_csv(output_directory_path / "test_metrics.csv", index=False)
309
+ pd.DataFrame(matrix).to_csv(output_directory_path / "test_confusion_matrix_values.csv", index=False)
310
+
311
+ save_confusion_matrix_figure(matrix, label_encoder.classes_, output_directory_path / "test_confusion_matrix.png")
312
+ print(f"Saved artifacts to: {output_directory_path}")
313
+
314
+
315
+ if __name__ == "__main__":
316
+ main()
models/lstm/weights/lstm_label_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f4cfd54771b8c9476a062d20dcc10b2f1a400a916e4eafff447bae05efe17fc
3
+ size 530
models/lstm/weights/lstm_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2c5432cf867444020fe41f67d7166035c763f3f905bf7ec5365662d82de1554
3
+ size 815356
models/lstm/weights/lstm_scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed2cfbc36c088284e8ecab353ebae20a47d0083b40b15921369593c2014c72a2
3
+ size 56775
models/lstm/weights/similarity_centroids.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbf58be723e6310d755b038c5778ad5d1771091c76ff211f9ab13ead23a1225b
3
+ size 38051
models/st_gcn/results/test_confusion_matrix.png ADDED
models/st_gcn/train.py ADDED
@@ -0,0 +1,379 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from pathlib import Path
3
+
4
+ import joblib
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+ import pandas as pd
8
+ import torch
9
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score, precision_score, recall_score
10
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
11
+ from torch import nn
12
+ from torch.utils.data import DataLoader, Dataset
13
+
14
+
15
+ def parse_args():
16
+ parser = argparse.ArgumentParser()
17
+ parser.add_argument("--train-file", default="data/train_sequences.csv")
18
+ parser.add_argument("--val-file", default="data/val_sequences.csv")
19
+ parser.add_argument("--test-file", default="data/test_internal_sequences.csv")
20
+ parser.add_argument("--output-dir", default="models/st_gcn/results")
21
+ parser.add_argument("--sequence-length", type=int, default=30)
22
+ parser.add_argument("--feature-count", type=int, default=78)
23
+ parser.add_argument("--dropout", type=float, default=0.2)
24
+ parser.add_argument("--learning-rate", type=float, default=0.0003)
25
+ parser.add_argument("--batch-size", type=int, default=54)
26
+ parser.add_argument("--epochs", type=int, default=73)
27
+ parser.add_argument("--early-stopping-patience", type=int, default=10)
28
+ parser.add_argument("--lr-plateau-patience", type=int, default=5)
29
+ parser.add_argument("--lr-plateau-factor", type=float, default=0.5)
30
+ parser.add_argument("--num-workers", type=int, default=4)
31
+ parser.add_argument("--seed", type=int, default=42)
32
+ return parser.parse_args()
33
+
34
+
35
+ class SequenceDataset(Dataset):
36
+ def __init__(self, feature_tensor, label_tensor):
37
+ self.feature_tensor = feature_tensor
38
+ self.label_tensor = label_tensor
39
+
40
+ def __len__(self):
41
+ return len(self.label_tensor)
42
+
43
+ def __getitem__(self, index):
44
+ return self.feature_tensor[index], self.label_tensor[index]
45
+
46
+
47
+ class GraphConvolution(nn.Module):
48
+ def __init__(self, input_channels, output_channels):
49
+ super().__init__()
50
+ self.projection = nn.Conv2d(input_channels, output_channels, kernel_size=1)
51
+
52
+ def forward(self, input_tensor, adjacency_matrix):
53
+ projected_tensor = self.projection(input_tensor)
54
+ output_tensor = torch.einsum("nctv,vw->nctw", projected_tensor, adjacency_matrix)
55
+ return output_tensor
56
+
57
+
58
+ class StGcnBlock(nn.Module):
59
+ def __init__(self, input_channels, output_channels, dropout, stride=1):
60
+ super().__init__()
61
+ self.graph_convolution = GraphConvolution(input_channels, output_channels)
62
+ self.temporal_convolution = nn.Sequential(
63
+ nn.BatchNorm2d(output_channels),
64
+ nn.ReLU(inplace=True),
65
+ nn.Conv2d(output_channels, output_channels, kernel_size=(9, 1), stride=(stride, 1), padding=(4, 0)),
66
+ nn.BatchNorm2d(output_channels),
67
+ nn.Dropout(dropout),
68
+ )
69
+
70
+ if stride != 1 or input_channels != output_channels:
71
+ self.residual = nn.Sequential(
72
+ nn.Conv2d(input_channels, output_channels, kernel_size=1, stride=(stride, 1)),
73
+ nn.BatchNorm2d(output_channels),
74
+ )
75
+ else:
76
+ self.residual = nn.Identity()
77
+
78
+ self.activation = nn.ReLU(inplace=True)
79
+
80
+ def forward(self, input_tensor, adjacency_matrix):
81
+ residual_tensor = self.residual(input_tensor)
82
+ output_tensor = self.graph_convolution(input_tensor, adjacency_matrix)
83
+ output_tensor = self.temporal_convolution(output_tensor)
84
+ return self.activation(output_tensor + residual_tensor)
85
+
86
+
87
+ class StGcnClassifier(nn.Module):
88
+ def __init__(self, feature_count, class_count, dropout):
89
+ super().__init__()
90
+ self.feature_count = feature_count
91
+ self.input_batch_norm = nn.BatchNorm1d(feature_count)
92
+ self.register_parameter("adjacency_logits", nn.Parameter(torch.eye(feature_count)))
93
+
94
+ self.block1 = StGcnBlock(1, 64, dropout=dropout, stride=1)
95
+ self.block2 = StGcnBlock(64, 64, dropout=dropout, stride=1)
96
+ self.block3 = StGcnBlock(64, 128, dropout=dropout, stride=1)
97
+ self.classifier = nn.Linear(128, class_count)
98
+
99
+ def get_normalized_adjacency(self):
100
+ adjacency_matrix = torch.softmax(self.adjacency_logits, dim=1)
101
+ return adjacency_matrix
102
+
103
+ def forward(self, input_sequence):
104
+ batch_size, sequence_length, feature_count = input_sequence.shape
105
+ normalized_input = input_sequence.reshape(batch_size * sequence_length, feature_count)
106
+ normalized_input = self.input_batch_norm(normalized_input)
107
+ normalized_input = normalized_input.reshape(batch_size, sequence_length, feature_count)
108
+
109
+ graph_tensor = normalized_input.unsqueeze(1)
110
+
111
+ adjacency_matrix = self.get_normalized_adjacency()
112
+ graph_tensor = self.block1(graph_tensor, adjacency_matrix)
113
+ graph_tensor = self.block2(graph_tensor, adjacency_matrix)
114
+ graph_tensor = self.block3(graph_tensor, adjacency_matrix)
115
+
116
+ pooled_tensor = graph_tensor.mean(dim=2).mean(dim=2)
117
+ logits = self.classifier(pooled_tensor)
118
+ return logits
119
+
120
+
121
+ def set_random_seed(seed):
122
+ np.random.seed(seed)
123
+ torch.manual_seed(seed)
124
+ torch.cuda.manual_seed_all(seed)
125
+
126
+
127
+ def load_sequence_table(input_file_path):
128
+ sequence_table = pd.read_csv(input_file_path)
129
+ metadata_columns = {"video_id", "exercise_label", "start_frame_index", "end_frame_index"}
130
+ flattened_feature_columns = [column_name for column_name in sequence_table.columns if column_name not in metadata_columns]
131
+ flattened_features = sequence_table[flattened_feature_columns].to_numpy(dtype=np.float32)
132
+ raw_labels = sequence_table["exercise_label"].to_numpy()
133
+ return flattened_features, raw_labels
134
+
135
+
136
+ def scale_and_reshape_features(train_features, validation_features, test_features, sequence_length, feature_count):
137
+ scaler = StandardScaler()
138
+ scaler.fit(train_features)
139
+
140
+ scaled_train = scaler.transform(train_features).reshape(-1, sequence_length, feature_count)
141
+ scaled_validation = scaler.transform(validation_features).reshape(-1, sequence_length, feature_count)
142
+ scaled_test = scaler.transform(test_features).reshape(-1, sequence_length, feature_count)
143
+
144
+ return scaled_train, scaled_validation, scaled_test, scaler
145
+
146
+
147
+ def build_dataloaders(train_features, validation_features, test_features, train_labels, validation_labels, test_labels, batch_size, num_workers):
148
+ train_feature_tensor = torch.tensor(train_features, dtype=torch.float32)
149
+ validation_feature_tensor = torch.tensor(validation_features, dtype=torch.float32)
150
+ test_feature_tensor = torch.tensor(test_features, dtype=torch.float32)
151
+
152
+ train_label_tensor = torch.tensor(train_labels, dtype=torch.long)
153
+ validation_label_tensor = torch.tensor(validation_labels, dtype=torch.long)
154
+ test_label_tensor = torch.tensor(test_labels, dtype=torch.long)
155
+
156
+ train_dataset = SequenceDataset(train_feature_tensor, train_label_tensor)
157
+ validation_dataset = SequenceDataset(validation_feature_tensor, validation_label_tensor)
158
+ test_dataset = SequenceDataset(test_feature_tensor, test_label_tensor)
159
+
160
+ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
161
+ validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
162
+ test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
163
+
164
+ return train_loader, validation_loader, test_loader
165
+
166
+
167
+ def run_training_epoch(model, data_loader, optimizer, loss_function, device):
168
+ model.train()
169
+ cumulative_loss = 0.0
170
+
171
+ for feature_batch, label_batch in data_loader:
172
+ feature_batch = feature_batch.to(device, non_blocking=True)
173
+ label_batch = label_batch.to(device, non_blocking=True)
174
+
175
+ optimizer.zero_grad(set_to_none=True)
176
+ logits = model(feature_batch)
177
+ loss = loss_function(logits, label_batch)
178
+ loss.backward()
179
+ optimizer.step()
180
+
181
+ cumulative_loss += loss.item() * feature_batch.size(0)
182
+
183
+ epoch_loss = cumulative_loss / len(data_loader.dataset)
184
+ return epoch_loss
185
+
186
+
187
+ def run_validation_epoch(model, data_loader, loss_function, device):
188
+ model.eval()
189
+ cumulative_loss = 0.0
190
+
191
+ with torch.inference_mode():
192
+ for feature_batch, label_batch in data_loader:
193
+ feature_batch = feature_batch.to(device, non_blocking=True)
194
+ label_batch = label_batch.to(device, non_blocking=True)
195
+ logits = model(feature_batch)
196
+ loss = loss_function(logits, label_batch)
197
+ cumulative_loss += loss.item() * feature_batch.size(0)
198
+
199
+ epoch_loss = cumulative_loss / len(data_loader.dataset)
200
+ return epoch_loss
201
+
202
+
203
+ def predict_labels(model, data_loader, device):
204
+ model.eval()
205
+ predicted_labels = []
206
+ true_labels = []
207
+
208
+ with torch.inference_mode():
209
+ for feature_batch, label_batch in data_loader:
210
+ feature_batch = feature_batch.to(device, non_blocking=True)
211
+ logits = model(feature_batch)
212
+ predicted_batch = torch.argmax(logits, dim=1)
213
+ predicted_labels.append(predicted_batch.cpu().numpy())
214
+ true_labels.append(label_batch.numpy())
215
+
216
+ predicted_labels = np.concatenate(predicted_labels)
217
+ true_labels = np.concatenate(true_labels)
218
+ return true_labels, predicted_labels
219
+
220
+
221
+ def save_confusion_matrix_figure(confusion_matrix_array, class_names, output_file_path):
222
+ figure = plt.figure(figsize=(8, 6))
223
+ axis = figure.add_subplot(111)
224
+ image = axis.imshow(confusion_matrix_array, interpolation="nearest", cmap="Blues")
225
+ axis.figure.colorbar(image, ax=axis)
226
+ axis.set_xticks(np.arange(len(class_names)))
227
+ axis.set_yticks(np.arange(len(class_names)))
228
+ axis.set_xticklabels(class_names, rotation=45, ha="right")
229
+ axis.set_yticklabels(class_names)
230
+ axis.set_xlabel("Predicted label")
231
+ axis.set_ylabel("True label")
232
+ axis.set_title("Test Confusion Matrix")
233
+
234
+ threshold = confusion_matrix_array.max() / 2.0 if confusion_matrix_array.size > 0 else 0.0
235
+ for row_index in range(confusion_matrix_array.shape[0]):
236
+ for column_index in range(confusion_matrix_array.shape[1]):
237
+ value = confusion_matrix_array[row_index, column_index]
238
+ color = "white" if value > threshold else "black"
239
+ axis.text(column_index, row_index, str(value), ha="center", va="center", color=color)
240
+
241
+ figure.tight_layout()
242
+ output_file_path.parent.mkdir(parents=True, exist_ok=True)
243
+ figure.savefig(output_file_path, dpi=180)
244
+ plt.close(figure)
245
+
246
+
247
+ def main():
248
+ args = parse_args()
249
+
250
+ train_file_path = Path(args.train_file)
251
+ validation_file_path = Path(args.val_file)
252
+ test_file_path = Path(args.test_file)
253
+ output_directory_path = Path(args.output_dir)
254
+ output_directory_path.mkdir(parents=True, exist_ok=True)
255
+
256
+ sequence_length = args.sequence_length
257
+ feature_count = args.feature_count
258
+ dropout_probability = args.dropout
259
+ learning_rate = args.learning_rate
260
+ batch_size = args.batch_size
261
+ maximum_epochs = args.epochs
262
+ early_stopping_patience = args.early_stopping_patience
263
+ lr_plateau_patience = args.lr_plateau_patience
264
+ lr_plateau_factor = args.lr_plateau_factor
265
+ num_workers = args.num_workers
266
+ seed = args.seed
267
+
268
+ set_random_seed(seed)
269
+
270
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
271
+ print(f"Using device: {device}")
272
+
273
+ train_features, train_raw_labels = load_sequence_table(train_file_path)
274
+ validation_features, validation_raw_labels = load_sequence_table(validation_file_path)
275
+ test_features, test_raw_labels = load_sequence_table(test_file_path)
276
+
277
+ label_encoder = LabelEncoder()
278
+ label_encoder.fit(train_raw_labels)
279
+ train_labels = label_encoder.transform(train_raw_labels)
280
+ validation_labels = label_encoder.transform(validation_raw_labels)
281
+ test_labels = label_encoder.transform(test_raw_labels)
282
+
283
+ scaled_train, scaled_validation, scaled_test, scaler = scale_and_reshape_features(
284
+ train_features=train_features,
285
+ validation_features=validation_features,
286
+ test_features=test_features,
287
+ sequence_length=sequence_length,
288
+ feature_count=feature_count,
289
+ )
290
+
291
+ train_loader, validation_loader, test_loader = build_dataloaders(
292
+ train_features=scaled_train,
293
+ validation_features=scaled_validation,
294
+ test_features=scaled_test,
295
+ train_labels=train_labels,
296
+ validation_labels=validation_labels,
297
+ test_labels=test_labels,
298
+ batch_size=batch_size,
299
+ num_workers=num_workers,
300
+ )
301
+
302
+ class_count = len(label_encoder.classes_)
303
+ model = StGcnClassifier(feature_count=feature_count, class_count=class_count, dropout=dropout_probability).to(device)
304
+ optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
305
+ scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=lr_plateau_factor, patience=lr_plateau_patience)
306
+ loss_function = nn.CrossEntropyLoss()
307
+
308
+ training_losses = []
309
+ validation_losses = []
310
+ best_validation_loss = float("inf")
311
+ best_model_state = None
312
+ epochs_without_improvement = 0
313
+
314
+ for epoch_index in range(maximum_epochs):
315
+ training_loss = run_training_epoch(model, train_loader, optimizer, loss_function, device)
316
+ validation_loss = run_validation_epoch(model, validation_loader, loss_function, device)
317
+ scheduler.step(validation_loss)
318
+
319
+ training_losses.append(training_loss)
320
+ validation_losses.append(validation_loss)
321
+
322
+ print(f"Epoch {epoch_index + 1}/{maximum_epochs} - train_loss: {training_loss:.6f} - val_loss: {validation_loss:.6f}")
323
+
324
+ if validation_loss < best_validation_loss:
325
+ best_validation_loss = validation_loss
326
+ best_model_state = {key: value.detach().cpu().clone() for key, value in model.state_dict().items()}
327
+ epochs_without_improvement = 0
328
+ else:
329
+ epochs_without_improvement += 1
330
+
331
+ if epochs_without_improvement >= early_stopping_patience:
332
+ print("Early stopping triggered.")
333
+ break
334
+
335
+ if best_model_state is not None:
336
+ model.load_state_dict(best_model_state)
337
+
338
+ test_true_labels, test_predicted_labels = predict_labels(model, test_loader, device)
339
+
340
+ accuracy = accuracy_score(test_true_labels, test_predicted_labels)
341
+ precision = precision_score(test_true_labels, test_predicted_labels, average="weighted", zero_division=0)
342
+ recall = recall_score(test_true_labels, test_predicted_labels, average="weighted", zero_division=0)
343
+ f1 = f1_score(test_true_labels, test_predicted_labels, average="weighted", zero_division=0)
344
+ report_text = classification_report(test_true_labels, test_predicted_labels, target_names=label_encoder.classes_, zero_division=0)
345
+ matrix = confusion_matrix(test_true_labels, test_predicted_labels)
346
+
347
+ print("\nTest metrics")
348
+ print(f"Accuracy: {accuracy:.4f}")
349
+ print(f"Precision: {precision:.4f}")
350
+ print(f"Recall: {recall:.4f}")
351
+ print(f"F1-score: {f1:.4f}")
352
+ print("\nClassification report")
353
+ print(report_text)
354
+
355
+ torch.save(model.state_dict(), output_directory_path / "st_gcn_model.pt")
356
+ joblib.dump(scaler, output_directory_path / "st_gcn_scaler.pkl")
357
+ joblib.dump(label_encoder, output_directory_path / "st_gcn_label_encoder.pkl")
358
+
359
+ training_history = {"training_loss": training_losses, "validation_loss": validation_losses}
360
+ metrics = {
361
+ "accuracy": float(accuracy),
362
+ "precision_weighted": float(precision),
363
+ "recall_weighted": float(recall),
364
+ "f1_weighted": float(f1),
365
+ "classes": list(label_encoder.classes_),
366
+ "classification_report_text": report_text,
367
+ "confusion_matrix": matrix.tolist(),
368
+ }
369
+
370
+ pd.DataFrame({"training_loss": training_losses, "validation_loss": validation_losses}).to_csv(output_directory_path / "training_history.csv", index=False)
371
+ pd.DataFrame([{"accuracy": float(accuracy), "precision_weighted": float(precision), "recall_weighted": float(recall), "f1_weighted": float(f1)}]).to_csv(output_directory_path / "test_metrics.csv", index=False)
372
+ pd.DataFrame(matrix).to_csv(output_directory_path / "test_confusion_matrix_values.csv", index=False)
373
+
374
+ save_confusion_matrix_figure(matrix, label_encoder.classes_, output_directory_path / "test_confusion_matrix.png")
375
+ print(f"Saved artifacts to: {output_directory_path}")
376
+
377
+
378
+ if __name__ == "__main__":
379
+ main()
models/st_gcn/weights/similarity_centroids.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50fa1731fd489141ad01468cf8ee31024651597df073f731723247173c3ad616
3
+ size 38051
models/st_gcn/weights/st_gcn_label_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f4cfd54771b8c9476a062d20dcc10b2f1a400a916e4eafff447bae05efe17fc
3
+ size 530
models/st_gcn/weights/st_gcn_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f58d11c4696ddadd25b3922a94388578ac0b18c90c37ac98e8baa81cc25266ae
3
+ size 1031937
models/st_gcn/weights/st_gcn_scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed2cfbc36c088284e8ecab353ebae20a47d0083b40b15921369593c2014c72a2
3
+ size 56775
models/tcn/results/test_confusion_matrix.png ADDED
models/tcn/train.py ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ from pathlib import Path
3
+
4
+ import joblib
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+ import pandas as pd
8
+ import torch
9
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score, precision_score, recall_score
10
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
11
+ from torch import nn
12
+ from torch.utils.data import DataLoader, Dataset
13
+
14
+
15
+ def parse_args():
16
+ parser = argparse.ArgumentParser()
17
+ parser.add_argument("--train-file", default="data/train_sequences.csv")
18
+ parser.add_argument("--val-file", default="data/val_sequences.csv")
19
+ parser.add_argument("--test-file", default="data/test_internal_sequences.csv")
20
+ parser.add_argument("--output-dir", default="models/tcn/results")
21
+ parser.add_argument("--sequence-length", type=int, default=30)
22
+ parser.add_argument("--feature-count", type=int, default=78)
23
+ parser.add_argument("--channel-width", type=int, default=128)
24
+ parser.add_argument("--kernel-size", type=int, default=3)
25
+ parser.add_argument("--dropout", type=float, default=0.2)
26
+ parser.add_argument("--learning-rate", type=float, default=0.0003)
27
+ parser.add_argument("--batch-size", type=int, default=54)
28
+ parser.add_argument("--epochs", type=int, default=73)
29
+ parser.add_argument("--early-stopping-patience", type=int, default=10)
30
+ parser.add_argument("--lr-plateau-patience", type=int, default=5)
31
+ parser.add_argument("--lr-plateau-factor", type=float, default=0.5)
32
+ parser.add_argument("--num-workers", type=int, default=4)
33
+ parser.add_argument("--seed", type=int, default=42)
34
+ return parser.parse_args()
35
+
36
+
37
+ class SequenceDataset(Dataset):
38
+ def __init__(self, feature_tensor, label_tensor):
39
+ self.feature_tensor = feature_tensor
40
+ self.label_tensor = label_tensor
41
+
42
+ def __len__(self):
43
+ return len(self.label_tensor)
44
+
45
+ def __getitem__(self, index):
46
+ return self.feature_tensor[index], self.label_tensor[index]
47
+
48
+
49
+ class Chomp1d(nn.Module):
50
+ def __init__(self, chomp_size):
51
+ super().__init__()
52
+ self.chomp_size = chomp_size
53
+
54
+ def forward(self, input_tensor):
55
+ if self.chomp_size == 0:
56
+ return input_tensor
57
+ return input_tensor[:, :, :-self.chomp_size].contiguous()
58
+
59
+
60
+ class TemporalBlock(nn.Module):
61
+ def __init__(self, input_channels, output_channels, kernel_size, dilation, dropout):
62
+ super().__init__()
63
+ padding = (kernel_size - 1) * dilation
64
+ self.conv1 = nn.Conv1d(input_channels, output_channels, kernel_size, padding=padding, dilation=dilation)
65
+ self.chomp1 = Chomp1d(padding)
66
+ self.relu1 = nn.ReLU()
67
+ self.dropout1 = nn.Dropout(dropout)
68
+
69
+ self.conv2 = nn.Conv1d(output_channels, output_channels, kernel_size, padding=padding, dilation=dilation)
70
+ self.chomp2 = Chomp1d(padding)
71
+ self.relu2 = nn.ReLU()
72
+ self.dropout2 = nn.Dropout(dropout)
73
+
74
+ self.downsample = nn.Conv1d(input_channels, output_channels, kernel_size=1) if input_channels != output_channels else None
75
+ self.final_relu = nn.ReLU()
76
+
77
+ def forward(self, input_tensor):
78
+ output_tensor = self.conv1(input_tensor)
79
+ output_tensor = self.chomp1(output_tensor)
80
+ output_tensor = self.relu1(output_tensor)
81
+ output_tensor = self.dropout1(output_tensor)
82
+
83
+ output_tensor = self.conv2(output_tensor)
84
+ output_tensor = self.chomp2(output_tensor)
85
+ output_tensor = self.relu2(output_tensor)
86
+ output_tensor = self.dropout2(output_tensor)
87
+
88
+ residual_tensor = input_tensor if self.downsample is None else self.downsample(input_tensor)
89
+ return self.final_relu(output_tensor + residual_tensor)
90
+
91
+
92
+ class TcnClassifier(nn.Module):
93
+ def __init__(self, feature_count, class_count, channel_width, kernel_size, dropout):
94
+ super().__init__()
95
+ self.input_projection = nn.Conv1d(feature_count, channel_width, kernel_size=1)
96
+ self.block1 = TemporalBlock(channel_width, channel_width, kernel_size, dilation=1, dropout=dropout)
97
+ self.block2 = TemporalBlock(channel_width, channel_width, kernel_size, dilation=2, dropout=dropout)
98
+ self.block3 = TemporalBlock(channel_width, channel_width, kernel_size, dilation=4, dropout=dropout)
99
+ self.classifier = nn.Linear(channel_width, class_count)
100
+
101
+ def forward(self, input_sequence):
102
+ temporal_tensor = input_sequence.transpose(1, 2)
103
+ temporal_tensor = self.input_projection(temporal_tensor)
104
+ temporal_tensor = self.block1(temporal_tensor)
105
+ temporal_tensor = self.block2(temporal_tensor)
106
+ temporal_tensor = self.block3(temporal_tensor)
107
+ final_timestep_tensor = temporal_tensor[:, :, -1]
108
+ logits = self.classifier(final_timestep_tensor)
109
+ return logits
110
+
111
+
112
+ def set_random_seed(seed):
113
+ np.random.seed(seed)
114
+ torch.manual_seed(seed)
115
+ torch.cuda.manual_seed_all(seed)
116
+
117
+
118
+ def load_sequence_table(input_file_path):
119
+ sequence_table = pd.read_csv(input_file_path)
120
+ metadata_columns = {"video_id", "exercise_label", "start_frame_index", "end_frame_index"}
121
+ flattened_feature_columns = [column_name for column_name in sequence_table.columns if column_name not in metadata_columns]
122
+ flattened_features = sequence_table[flattened_feature_columns].to_numpy(dtype=np.float32)
123
+ raw_labels = sequence_table["exercise_label"].to_numpy()
124
+ return flattened_features, raw_labels
125
+
126
+
127
+ def scale_and_reshape_features(train_features, validation_features, test_features, sequence_length, feature_count):
128
+ scaler = StandardScaler()
129
+ scaler.fit(train_features)
130
+
131
+ scaled_train = scaler.transform(train_features).reshape(-1, sequence_length, feature_count)
132
+ scaled_validation = scaler.transform(validation_features).reshape(-1, sequence_length, feature_count)
133
+ scaled_test = scaler.transform(test_features).reshape(-1, sequence_length, feature_count)
134
+
135
+ return scaled_train, scaled_validation, scaled_test, scaler
136
+
137
+
138
+ def build_dataloaders(train_features, validation_features, test_features, train_labels, validation_labels, test_labels, batch_size, num_workers):
139
+ train_feature_tensor = torch.tensor(train_features, dtype=torch.float32)
140
+ validation_feature_tensor = torch.tensor(validation_features, dtype=torch.float32)
141
+ test_feature_tensor = torch.tensor(test_features, dtype=torch.float32)
142
+
143
+ train_label_tensor = torch.tensor(train_labels, dtype=torch.long)
144
+ validation_label_tensor = torch.tensor(validation_labels, dtype=torch.long)
145
+ test_label_tensor = torch.tensor(test_labels, dtype=torch.long)
146
+
147
+ train_dataset = SequenceDataset(train_feature_tensor, train_label_tensor)
148
+ validation_dataset = SequenceDataset(validation_feature_tensor, validation_label_tensor)
149
+ test_dataset = SequenceDataset(test_feature_tensor, test_label_tensor)
150
+
151
+ train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
152
+ validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
153
+ test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
154
+
155
+ return train_loader, validation_loader, test_loader
156
+
157
+
158
+ def run_training_epoch(model, data_loader, optimizer, loss_function, device):
159
+ model.train()
160
+ cumulative_loss = 0.0
161
+
162
+ for feature_batch, label_batch in data_loader:
163
+ feature_batch = feature_batch.to(device, non_blocking=True)
164
+ label_batch = label_batch.to(device, non_blocking=True)
165
+
166
+ optimizer.zero_grad(set_to_none=True)
167
+ logits = model(feature_batch)
168
+ loss = loss_function(logits, label_batch)
169
+ loss.backward()
170
+ optimizer.step()
171
+
172
+ cumulative_loss += loss.item() * feature_batch.size(0)
173
+
174
+ epoch_loss = cumulative_loss / len(data_loader.dataset)
175
+ return epoch_loss
176
+
177
+
178
+ def run_validation_epoch(model, data_loader, loss_function, device):
179
+ model.eval()
180
+ cumulative_loss = 0.0
181
+
182
+ with torch.inference_mode():
183
+ for feature_batch, label_batch in data_loader:
184
+ feature_batch = feature_batch.to(device, non_blocking=True)
185
+ label_batch = label_batch.to(device, non_blocking=True)
186
+ logits = model(feature_batch)
187
+ loss = loss_function(logits, label_batch)
188
+ cumulative_loss += loss.item() * feature_batch.size(0)
189
+
190
+ epoch_loss = cumulative_loss / len(data_loader.dataset)
191
+ return epoch_loss
192
+
193
+
194
+ def predict_labels(model, data_loader, device):
195
+ model.eval()
196
+ predicted_labels = []
197
+ true_labels = []
198
+
199
+ with torch.inference_mode():
200
+ for feature_batch, label_batch in data_loader:
201
+ feature_batch = feature_batch.to(device, non_blocking=True)
202
+ logits = model(feature_batch)
203
+ predicted_batch = torch.argmax(logits, dim=1)
204
+ predicted_labels.append(predicted_batch.cpu().numpy())
205
+ true_labels.append(label_batch.numpy())
206
+
207
+ predicted_labels = np.concatenate(predicted_labels)
208
+ true_labels = np.concatenate(true_labels)
209
+ return true_labels, predicted_labels
210
+
211
+
212
+ def save_confusion_matrix_figure(confusion_matrix_array, class_names, output_file_path):
213
+ figure = plt.figure(figsize=(8, 6))
214
+ axis = figure.add_subplot(111)
215
+ image = axis.imshow(confusion_matrix_array, interpolation="nearest", cmap="Blues")
216
+ axis.figure.colorbar(image, ax=axis)
217
+ axis.set_xticks(np.arange(len(class_names)))
218
+ axis.set_yticks(np.arange(len(class_names)))
219
+ axis.set_xticklabels(class_names, rotation=45, ha="right")
220
+ axis.set_yticklabels(class_names)
221
+ axis.set_xlabel("Predicted label")
222
+ axis.set_ylabel("True label")
223
+ axis.set_title("Test Confusion Matrix")
224
+
225
+ threshold = confusion_matrix_array.max() / 2.0 if confusion_matrix_array.size > 0 else 0.0
226
+ for row_index in range(confusion_matrix_array.shape[0]):
227
+ for column_index in range(confusion_matrix_array.shape[1]):
228
+ value = confusion_matrix_array[row_index, column_index]
229
+ color = "white" if value > threshold else "black"
230
+ axis.text(column_index, row_index, str(value), ha="center", va="center", color=color)
231
+
232
+ figure.tight_layout()
233
+ output_file_path.parent.mkdir(parents=True, exist_ok=True)
234
+ figure.savefig(output_file_path, dpi=180)
235
+ plt.close(figure)
236
+
237
+
238
+ def main():
239
+ args = parse_args()
240
+
241
+ train_file_path = Path(args.train_file)
242
+ validation_file_path = Path(args.val_file)
243
+ test_file_path = Path(args.test_file)
244
+ output_directory_path = Path(args.output_dir)
245
+ output_directory_path.mkdir(parents=True, exist_ok=True)
246
+
247
+ sequence_length = args.sequence_length
248
+ feature_count = args.feature_count
249
+ channel_width = args.channel_width
250
+ kernel_size = args.kernel_size
251
+ dropout_probability = args.dropout
252
+ learning_rate = args.learning_rate
253
+ batch_size = args.batch_size
254
+ maximum_epochs = args.epochs
255
+ early_stopping_patience = args.early_stopping_patience
256
+ lr_plateau_patience = args.lr_plateau_patience
257
+ lr_plateau_factor = args.lr_plateau_factor
258
+ num_workers = args.num_workers
259
+ seed = args.seed
260
+
261
+ set_random_seed(seed)
262
+
263
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
264
+ print(f"Using device: {device}")
265
+
266
+ train_features, train_raw_labels = load_sequence_table(train_file_path)
267
+ validation_features, validation_raw_labels = load_sequence_table(validation_file_path)
268
+ test_features, test_raw_labels = load_sequence_table(test_file_path)
269
+
270
+ label_encoder = LabelEncoder()
271
+ label_encoder.fit(train_raw_labels)
272
+ train_labels = label_encoder.transform(train_raw_labels)
273
+ validation_labels = label_encoder.transform(validation_raw_labels)
274
+ test_labels = label_encoder.transform(test_raw_labels)
275
+
276
+ scaled_train, scaled_validation, scaled_test, scaler = scale_and_reshape_features(
277
+ train_features=train_features,
278
+ validation_features=validation_features,
279
+ test_features=test_features,
280
+ sequence_length=sequence_length,
281
+ feature_count=feature_count,
282
+ )
283
+
284
+ train_loader, validation_loader, test_loader = build_dataloaders(
285
+ train_features=scaled_train,
286
+ validation_features=scaled_validation,
287
+ test_features=scaled_test,
288
+ train_labels=train_labels,
289
+ validation_labels=validation_labels,
290
+ test_labels=test_labels,
291
+ batch_size=batch_size,
292
+ num_workers=num_workers,
293
+ )
294
+
295
+ class_count = len(label_encoder.classes_)
296
+ model = TcnClassifier(feature_count, class_count, channel_width, kernel_size, dropout_probability).to(device)
297
+ optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
298
+ scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=lr_plateau_factor, patience=lr_plateau_patience)
299
+ loss_function = nn.CrossEntropyLoss()
300
+
301
+ training_losses = []
302
+ validation_losses = []
303
+ best_validation_loss = float("inf")
304
+ best_model_state = None
305
+ epochs_without_improvement = 0
306
+
307
+ for epoch_index in range(maximum_epochs):
308
+ training_loss = run_training_epoch(model, train_loader, optimizer, loss_function, device)
309
+ validation_loss = run_validation_epoch(model, validation_loader, loss_function, device)
310
+ scheduler.step(validation_loss)
311
+
312
+ training_losses.append(training_loss)
313
+ validation_losses.append(validation_loss)
314
+
315
+ print(f"Epoch {epoch_index + 1}/{maximum_epochs} - train_loss: {training_loss:.6f} - val_loss: {validation_loss:.6f}")
316
+
317
+ if validation_loss < best_validation_loss:
318
+ best_validation_loss = validation_loss
319
+ best_model_state = {key: value.detach().cpu().clone() for key, value in model.state_dict().items()}
320
+ epochs_without_improvement = 0
321
+ else:
322
+ epochs_without_improvement += 1
323
+
324
+ if epochs_without_improvement >= early_stopping_patience:
325
+ print("Early stopping triggered.")
326
+ break
327
+
328
+ if best_model_state is not None:
329
+ model.load_state_dict(best_model_state)
330
+
331
+ test_true_labels, test_predicted_labels = predict_labels(model, test_loader, device)
332
+
333
+ accuracy = accuracy_score(test_true_labels, test_predicted_labels)
334
+ precision = precision_score(test_true_labels, test_predicted_labels, average="weighted", zero_division=0)
335
+ recall = recall_score(test_true_labels, test_predicted_labels, average="weighted", zero_division=0)
336
+ f1 = f1_score(test_true_labels, test_predicted_labels, average="weighted", zero_division=0)
337
+ report_text = classification_report(test_true_labels, test_predicted_labels, target_names=label_encoder.classes_, zero_division=0)
338
+ matrix = confusion_matrix(test_true_labels, test_predicted_labels)
339
+
340
+ print("\nTest metrics")
341
+ print(f"Accuracy: {accuracy:.4f}")
342
+ print(f"Precision: {precision:.4f}")
343
+ print(f"Recall: {recall:.4f}")
344
+ print(f"F1-score: {f1:.4f}")
345
+ print("\nClassification report")
346
+ print(report_text)
347
+
348
+ torch.save(model.state_dict(), output_directory_path / "tcn_model.pt")
349
+ joblib.dump(scaler, output_directory_path / "tcn_scaler.pkl")
350
+ joblib.dump(label_encoder, output_directory_path / "tcn_label_encoder.pkl")
351
+
352
+ training_history = {"training_loss": training_losses, "validation_loss": validation_losses}
353
+ metrics = {
354
+ "accuracy": float(accuracy),
355
+ "precision_weighted": float(precision),
356
+ "recall_weighted": float(recall),
357
+ "f1_weighted": float(f1),
358
+ "classes": list(label_encoder.classes_),
359
+ "classification_report_text": report_text,
360
+ "confusion_matrix": matrix.tolist(),
361
+ }
362
+
363
+ pd.DataFrame({"training_loss": training_losses, "validation_loss": validation_losses}).to_csv(output_directory_path / "training_history.csv", index=False)
364
+ pd.DataFrame([{"accuracy": float(accuracy), "precision_weighted": float(precision), "recall_weighted": float(recall), "f1_weighted": float(f1)}]).to_csv(output_directory_path / "test_metrics.csv", index=False)
365
+ pd.DataFrame(matrix).to_csv(output_directory_path / "test_confusion_matrix_values.csv", index=False)
366
+
367
+ save_confusion_matrix_figure(matrix, label_encoder.classes_, output_directory_path / "test_confusion_matrix.png")
368
+ print(f"Saved artifacts to: {output_directory_path}")
369
+
370
+
371
+ if __name__ == "__main__":
372
+ main()
models/tcn/weights/similarity_centroids.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:016e61ce08bf8afab60ec1739b2be7c33cdc1b14da65a23fad73c005eb9e2b14
3
+ size 38051
models/tcn/weights/tcn_label_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f4cfd54771b8c9476a062d20dcc10b2f1a400a916e4eafff447bae05efe17fc
3
+ size 530
models/tcn/weights/tcn_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e83fdd174a58aad672c316c19a051cec4b10370502c252206ffc85c735cb0366
3
+ size 1232031
models/tcn/weights/tcn_scaler.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed2cfbc36c088284e8ecab353ebae20a47d0083b40b15921369593c2014c72a2
3
+ size 56775