k23064919 commited on
Commit
7875de0
·
2 Parent(s): 534796e 1de07b3

Merge branch 'develop' of https://github.kcl.ac.uk/K23064919/smallGroupProject into develop

Browse files
dataPrep/data_preparation.py CHANGED
@@ -6,7 +6,7 @@ import random
6
  import numpy as np
7
  import pandas as pd
8
  from datasets import load_dataset
9
- from helpers.create_dataset import load_subset_from_dataset
10
  from helpers.transforms_loaders import make_dataset_loaders
11
 
12
  # --- Visualization ---
@@ -15,17 +15,28 @@ import matplotlib.pyplot as plt
15
 
16
  # --- PyTorch (Machine Learning) ---
17
  import torch
18
- from torchvision import transforms
19
- from torch.utils.data import DataLoader
20
 
21
  # --- Experiment Tracking ---
22
- from clearml import Task, Logger, Dataset
23
 
24
 
25
- # Setting up the SEED to be able to repeat experiments
 
26
  SEED = 42
 
27
  DATASET_SUBSET_RATIO = 0.25
28
 
 
 
 
 
 
 
 
 
 
 
 
29
  random.seed(SEED)
30
  np.random.seed(SEED)
31
  torch.manual_seed(SEED)
@@ -34,20 +45,37 @@ if torch.cuda.is_available():
34
 
35
 
36
  # ----- ClearML Setup -----
37
- task = Task.init(project_name= 'Small Group CW', task_name = 'data_prep')
 
 
 
 
38
  task.set_random_seed(SEED)
39
  clearml_logger = task.get_logger()
40
 
41
- # Log subset config to ClearML
42
- task.connect_configuration(
43
- {"subset_ratio": DATASET_SUBSET_RATIO},
44
- name="Data subsetting"
45
- )
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  # ----- Load a subset from a given dataset & track with ClearML -----
49
- data_plants, prototyping_dataset, features, clearml_dataset = load_subset_from_dataset(
50
- SEED, DATASET_SUBSET_RATIO, clearml_logger
51
  )
52
 
53
 
@@ -56,7 +84,7 @@ data_plants, prototyping_dataset, features, clearml_dataset = load_subset_from_d
56
  # Reformatting the label feature to understand bias
57
  labels_list = prototyping_dataset['label']
58
  df_labels = pd.Series(labels_list)
59
- label_count = df_labels.value_counts(sort = False)
60
 
61
  # Checking the amount of samples in each class and logging it to clearML
62
 
@@ -100,12 +128,11 @@ plt.title("Class Distribution in Prototype Dataset")
100
  plt.xlabel("Class")
101
  plt.ylabel("Count")
102
  plt.tight_layout()
103
- plt.savefig("class_distribution.png")
104
 
105
- clearml_logger.report_image(
106
  title="EDA Class Distribution",
107
  series="Prototype Subset",
108
- local_path="class_distribution.png",
109
  iteration=1
110
  )
111
 
@@ -113,9 +140,16 @@ clearml_logger.report_image(
113
  # ----------------------------------------------------------------------
114
  if __name__ == "__main__":
115
 
116
- # ------------------- Dataset splits ----------------------------------
 
 
 
 
 
 
 
117
  prototype_loaders = make_dataset_loaders(
118
- prototyping_dataset, seed=SEED, batch_size=32, test_size=0.3
119
  )
120
 
121
  print("\n--- Handoff Test Successful ---")
@@ -123,8 +157,15 @@ if __name__ == "__main__":
123
  print(f"Prototype Validation loader batches: {len(prototype_loaders['val'])}")
124
  print(f"Prototype Test loader batches: {len(prototype_loaders['test'])}")
125
 
 
 
 
 
 
 
 
126
  final_loaders = make_dataset_loaders(
127
- data_plants, seed=SEED, batch_size=32, test_size=0.3
128
  )
129
 
130
  print("\n--- Handoff Test Successful ---")
@@ -137,6 +178,8 @@ if __name__ == "__main__":
137
  {"dataset_id": clearml_dataset.id},
138
  name="Dataset Metadata"
139
  )
 
 
140
 
141
  # Close the ClearML task
142
  task.close()
 
6
  import numpy as np
7
  import pandas as pd
8
  from datasets import load_dataset
9
+ from helpers.create_dataset import make_subset
10
  from helpers.transforms_loaders import make_dataset_loaders
11
 
12
  # --- Visualization ---
 
15
 
16
  # --- PyTorch (Machine Learning) ---
17
  import torch
 
 
18
 
19
  # --- Experiment Tracking ---
20
+ from clearml import Task
21
 
22
 
23
+ # -------- Controllable parameters --------
24
+ # Dataset parameters
25
  SEED = 42
26
+ DATASET_LINK = "DScomp380/plant_village"
27
  DATASET_SUBSET_RATIO = 0.25
28
 
29
+ # Augmentation parameters
30
+ ROTATION = 30
31
+ BRIGHTNESS = 0.2
32
+ SATURATION = 0.2
33
+ BLUR = 3
34
+
35
+ # DataLoader parameters
36
+ BATCH_SIZE = 32
37
+ TEST_SIZE = 0.3
38
+
39
+ # Setting up the SEED to be able to repeat experiments
40
  random.seed(SEED)
41
  np.random.seed(SEED)
42
  torch.manual_seed(SEED)
 
45
 
46
 
47
  # ----- ClearML Setup -----
48
+ task = Task.init(
49
+ project_name='Small Group Project',
50
+ task_name='Data Preparation',
51
+ task_type=Task.TaskTypes.data_processing
52
+ )
53
  task.set_random_seed(SEED)
54
  clearml_logger = task.get_logger()
55
 
 
 
 
 
 
56
 
57
+ # -------- Track full configuration in ClearML --------
58
+ task.connect({
59
+ "seed": SEED,
60
+ "dataset": {
61
+ "link": DATASET_LINK,
62
+ "subset_ratio": DATASET_SUBSET_RATIO,
63
+ },
64
+ "augmentation": {
65
+ "rotation": ROTATION,
66
+ "brightness": BRIGHTNESS,
67
+ "saturation": SATURATION,
68
+ "blur": BLUR
69
+ },
70
+ "dataloaders": {
71
+ "batch_size": BATCH_SIZE,
72
+ "test_size": TEST_SIZE
73
+ }
74
+ })
75
 
76
  # ----- Load a subset from a given dataset & track with ClearML -----
77
+ data_plants, prototyping_dataset, features, clearml_dataset = make_subset(
78
+ DATASET_LINK, DATASET_SUBSET_RATIO, clearml_logger
79
  )
80
 
81
 
 
84
  # Reformatting the label feature to understand bias
85
  labels_list = prototyping_dataset['label']
86
  df_labels = pd.Series(labels_list)
87
+ label_count = df_labels.value_counts(sort=False)
88
 
89
  # Checking the amount of samples in each class and logging it to clearML
90
 
 
128
  plt.xlabel("Class")
129
  plt.ylabel("Count")
130
  plt.tight_layout()
 
131
 
132
+ clearml_logger.report_matplotlib_figure(
133
  title="EDA Class Distribution",
134
  series="Prototype Subset",
135
+ figure=plt.gcf(),
136
  iteration=1
137
  )
138
 
 
140
  # ----------------------------------------------------------------------
141
  if __name__ == "__main__":
142
 
143
+ # ---------------- Dataset splits ----------------
144
+ aug_config = {
145
+ 'rotation': ROTATION,
146
+ 'brightness': BRIGHTNESS,
147
+ 'saturation': SATURATION,
148
+ 'blur': BLUR
149
+ }
150
+
151
  prototype_loaders = make_dataset_loaders(
152
+ prototyping_dataset, SEED, BATCH_SIZE, TEST_SIZE, aug_config
153
  )
154
 
155
  print("\n--- Handoff Test Successful ---")
 
157
  print(f"Prototype Validation loader batches: {len(prototype_loaders['val'])}")
158
  print(f"Prototype Test loader batches: {len(prototype_loaders['test'])}")
159
 
160
+ clearml_logger.report_text(
161
+ f"Prototype loaders created: "
162
+ f"train={len(prototype_loaders['train'])}, "
163
+ f"val={len(prototype_loaders['val'])}, "
164
+ f"test={len(prototype_loaders['test'])}"
165
+ )
166
+
167
  final_loaders = make_dataset_loaders(
168
+ data_plants, SEED, BATCH_SIZE, TEST_SIZE, aug_config
169
  )
170
 
171
  print("\n--- Handoff Test Successful ---")
 
178
  {"dataset_id": clearml_dataset.id},
179
  name="Dataset Metadata"
180
  )
181
+ task.mark_completed()
182
+
183
 
184
  # Close the ClearML task
185
  task.close()
dataPrep/helpers/create_dataset.py CHANGED
@@ -2,19 +2,23 @@
2
  A collection of dataset (DS) loading and subsetting functions.
3
  """
4
 
 
5
  import random
6
  import numpy as np
7
  from datasets import load_dataset
8
  from clearml import Dataset
9
 
10
 
11
- # Load a DS from HuggingFace Link and subset - upload both to ClearML
12
- def load_subset_from_dataset(seed, subset_ratio, clearml_logger):
13
- DATASET_LINK = "DScomp380/plant_village"
 
 
 
14
 
15
  # Load dataset
16
  try:
17
- ds = load_dataset(DATASET_LINK)
18
  except Exception as e:
19
  raise RuntimeError(f"Error loading the dataset: {e}")
20
 
@@ -31,19 +35,26 @@ def load_subset_from_dataset(seed, subset_ratio, clearml_logger):
31
  subset_indices = indices[:subset_size]
32
 
33
  prototyping_dataset = data_plants.select(subset_indices)
34
-
35
- # ---------- Register subset in ClearML ----------
 
36
  clearml_dataset = Dataset.create(
37
  dataset_name="Plant Village Prototype",
38
- dataset_project="smallGroupProject",
39
- dataset_tags=["prototype", "subset"]
 
40
  )
 
 
 
 
41
 
42
  # Save indices
43
  subset_path = "subset_indices.npy"
44
  np.save(subset_path, subset_indices)
45
  clearml_dataset.add_files(subset_path)
46
  clearml_dataset.set_metadata({
 
47
  "subset_ratio": subset_ratio,
48
  "total_samples": len(prototyping_dataset)
49
  })
@@ -52,4 +63,7 @@ def load_subset_from_dataset(seed, subset_ratio, clearml_logger):
52
  clearml_dataset.finalize()
53
  clearml_logger.report_text(f"Created ClearML Dataset: {clearml_dataset.id}")
54
 
 
 
 
55
  return data_plants, prototyping_dataset, features, clearml_dataset
 
2
  A collection of dataset (DS) loading and subsetting functions.
3
  """
4
 
5
+ import os
6
  import random
7
  import numpy as np
8
  from datasets import load_dataset
9
  from clearml import Dataset
10
 
11
 
12
+ '''
13
+ Load a DS from HuggingFace Link & randomly subset it - upload subset to ClearML
14
+ Subset indicies are uploaded to ClearML for reproducibility
15
+ REPRODUCE: Load full DS, then load indicies from ClearML to get same subset
16
+ '''
17
+ def make_subset(dataset_link, subset_ratio, clearml_logger):
18
 
19
  # Load dataset
20
  try:
21
+ ds = load_dataset(dataset_link)
22
  except Exception as e:
23
  raise RuntimeError(f"Error loading the dataset: {e}")
24
 
 
35
  subset_indices = indices[:subset_size]
36
 
37
  prototyping_dataset = data_plants.select(subset_indices)
38
+ # I THINK WE NEED TO REMOVE THIS LATER
39
+ # We dont really need to upload subset everytime (Im not sure tho)
40
+ # Register subset in ClearML
41
  clearml_dataset = Dataset.create(
42
  dataset_name="Plant Village Prototype",
43
+ dataset_project="Small Group Project",
44
+ dataset_tags=["prototype", "subset"],
45
+ use_current_task=False
46
  )
47
+ clearml_dataset.add_tags([
48
+ f"subset_ratio_{subset_ratio}",
49
+ "hf_source"
50
+ ])
51
 
52
  # Save indices
53
  subset_path = "subset_indices.npy"
54
  np.save(subset_path, subset_indices)
55
  clearml_dataset.add_files(subset_path)
56
  clearml_dataset.set_metadata({
57
+ "huggingface_dataset": dataset_link,
58
  "subset_ratio": subset_ratio,
59
  "total_samples": len(prototyping_dataset)
60
  })
 
63
  clearml_dataset.finalize()
64
  clearml_logger.report_text(f"Created ClearML Dataset: {clearml_dataset.id}")
65
 
66
+ # Clean up local file
67
+ os.remove(subset_path)
68
+
69
  return data_plants, prototyping_dataset, features, clearml_dataset
dataPrep/helpers/transforms_loaders.py CHANGED
@@ -6,17 +6,16 @@ from torchvision import transforms
6
  from torch.utils.data import DataLoader
7
 
8
 
9
-
10
- # Defines and returns the normalization and augmentation pipelines.
11
- def make_transform_pipelines():
12
-
13
- # Standard ImageNet mean and std - Used to normalize the tensors
14
- IMAGENET_MEAN = [0.485, 0.456, 0.406]
15
- IMAGENET_STD = [0.229, 0.224, 0.225]
16
 
17
  # Pipeline ensures image format is consistent (for Val/Test)
18
  normalisation = transforms.Compose([
19
-
20
  # Convert PIL Image to a PyTorch Tensor, scales pixel values from [0, 255] to [0.0, 1.0]
21
  transforms.ToTensor(),
22
 
@@ -24,43 +23,62 @@ def make_transform_pipelines():
24
  transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)
25
  ])
26
 
 
 
 
 
 
 
 
 
 
 
27
  # Augmentation pipeline (to create "new" images by changing some parameters)
28
  augmentation = transforms.Compose([
29
-
30
  # Randomly changing some parameters of pictures to enrich dataset
31
- transforms.RandomRotation(30),
32
- transforms.ColorJitter(brightness=0.2, saturation=0.2),
33
- transforms.GaussianBlur(3),
34
  transforms.ToTensor(),
35
  transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)
36
  ])
37
 
38
- return normalisation, augmentation
39
 
40
 
41
  """
42
  Creates and returns DataLoaders (train, val, test) for a given dataset.
43
  Performs a 70/15/15 split
44
  """
45
- def make_dataset_loaders(dataset, seed, batch_size=32, test_size=0.3):
46
 
47
  # Define transformation pipelines for the dataset
48
- normalisation, augmentation = make_transform_pipelines()
 
 
 
 
 
49
 
 
 
 
 
50
  # 70/30 split creates train set
51
  split_1 = dataset.train_test_split(test_size=test_size, seed=seed)
52
  train_split = split_1['train']
53
  remaining_split = split_1['test']
54
 
55
  # 15/15 split on remaining data - validation and test sets
56
- val_split = test_size/2
57
  split_2 = remaining_split.train_test_split(test_size=val_split, seed=seed)
58
  val_split, test_split = split_2['train'], split_2['test']
59
 
60
  # Put each split through pipelines
61
- train_split.set_transform(augmentation)
62
- val_split.set_transform(normalisation)
63
- test_split.set_transform(normalisation)
64
 
65
  # Create dataloader for each
66
  train_loader = DataLoader(train_split, batch_size=batch_size, shuffle=True)
 
6
  from torch.utils.data import DataLoader
7
 
8
 
9
+ # Standard ImageNet mean and std - Used to normalize the tensors
10
+ IMAGENET_MEAN = [0.485, 0.456, 0.406]
11
+ IMAGENET_STD = [0.229, 0.224, 0.225]
12
+ IMAGE_SIZE = (256, 256)
13
+ # Defines and returns the normalization pipeline.
14
+ def make_norm_pipeline():
 
15
 
16
  # Pipeline ensures image format is consistent (for Val/Test)
17
  normalisation = transforms.Compose([
18
+ transforms.Resize(IMAGE_SIZE),
19
  # Convert PIL Image to a PyTorch Tensor, scales pixel values from [0, 255] to [0.0, 1.0]
20
  transforms.ToTensor(),
21
 
 
23
  transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)
24
  ])
25
 
26
+ return normalisation
27
+
28
+ # Defines and returns the augmentation (rotation, brightness, saturation, blur) pipeline.
29
+ def make_augment_pipeline(aug_config):
30
+
31
+ rotation = aug_config['rotation']
32
+ brightness = aug_config['brightness']
33
+ saturation = aug_config['saturation']
34
+ blur = aug_config['blur']
35
+
36
  # Augmentation pipeline (to create "new" images by changing some parameters)
37
  augmentation = transforms.Compose([
38
+ transforms.Resize(IMAGE_SIZE),
39
  # Randomly changing some parameters of pictures to enrich dataset
40
+ transforms.RandomRotation(rotation),
41
+ transforms.ColorJitter(brightness=brightness, saturation=saturation),
42
+ transforms.GaussianBlur(blur),
43
  transforms.ToTensor(),
44
  transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)
45
  ])
46
 
47
+ return augmentation
48
 
49
 
50
  """
51
  Creates and returns DataLoaders (train, val, test) for a given dataset.
52
  Performs a 70/15/15 split
53
  """
54
+ def make_dataset_loaders(dataset, seed, batch_size, test_size, aug_config):
55
 
56
  # Define transformation pipelines for the dataset
57
+ normalisation = make_norm_pipeline()
58
+ augmentation = make_augment_pipeline(aug_config)
59
+
60
+ def apply_augmentation(batch):
61
+ batch['image'] = [augmentation(x) for x in batch['image']]
62
+ return batch
63
 
64
+ def apply_normalisation(batch):
65
+ batch['image'] = [normalisation(x) for x in batch['image']]
66
+ return batch
67
+
68
  # 70/30 split creates train set
69
  split_1 = dataset.train_test_split(test_size=test_size, seed=seed)
70
  train_split = split_1['train']
71
  remaining_split = split_1['test']
72
 
73
  # 15/15 split on remaining data - validation and test sets
74
+ val_split = 0.5
75
  split_2 = remaining_split.train_test_split(test_size=val_split, seed=seed)
76
  val_split, test_split = split_2['train'], split_2['test']
77
 
78
  # Put each split through pipelines
79
+ train_split.set_transform(apply_augmentation)
80
+ val_split.set_transform(apply_normalisation)
81
+ test_split.set_transform(apply_normalisation)
82
 
83
  # Create dataloader for each
84
  train_loader = DataLoader(train_split, batch_size=batch_size, shuffle=True)
models/__init__.py ADDED
File without changes
models/modelOne.py CHANGED
@@ -13,7 +13,7 @@ class modelOne(nn.Module) :
13
  self.conv2 = nn.Conv2d(6, 16, 5, padding=2)
14
  self.batchNorm2 = nn.BatchNorm2d(16)
15
 
16
- self.fc1 = nn.Linear(16*64*64, 512)
17
  self.dropout = nn.Dropout(0.5)
18
 
19
  self.fc2 = nn.Linear(512, 84)
@@ -23,6 +23,7 @@ class modelOne(nn.Module) :
23
  x = self.pool(F.relu(self.batchNorm1(self.conv1(x))))
24
  x = self.pool(F.relu(self.batchNorm2(self.conv2(x))))
25
  x = torch.flatten(x, 1)
 
26
  x = self.dropout(x)
27
  x = F.relu(self.fc1(x))
28
  x = F.relu(self.fc2(x))
 
13
  self.conv2 = nn.Conv2d(6, 16, 5, padding=2)
14
  self.batchNorm2 = nn.BatchNorm2d(16)
15
 
16
+ self.fc1 = nn.Linear(63504, 512)
17
  self.dropout = nn.Dropout(0.5)
18
 
19
  self.fc2 = nn.Linear(512, 84)
 
23
  x = self.pool(F.relu(self.batchNorm1(self.conv1(x))))
24
  x = self.pool(F.relu(self.batchNorm2(self.conv2(x))))
25
  x = torch.flatten(x, 1)
26
+ print("Flattened size:", x.shape[1])
27
  x = self.dropout(x)
28
  x = F.relu(self.fc1(x))
29
  x = F.relu(self.fc2(x))
trainingModel/Training.py CHANGED
@@ -2,16 +2,10 @@ import torch
2
  import torch.nn as nn
3
  import numpy as np
4
  from torcheval.metrics import MulticlassAccuracy
5
- #from torchvision import transforms
6
-
7
-
8
-
9
  from torch.utils.data import DataLoader
10
- #from torchvision.datasets import MNIST
11
 
12
- #import torchvision.utils
13
 
14
- # loss, optimizer, training loop, validation, best model saving
15
 
16
 
17
  def train_model(
@@ -26,7 +20,19 @@ def train_model(
26
  num_classes : int = 39,
27
 
28
  ):
29
-
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
 
32
  # Move model to device
@@ -43,19 +49,20 @@ def train_model(
43
  # Arrays to log metrics
44
  num_batches = len(train_loader)
45
 
 
 
 
46
  # Store training losses and accuracies for every batch
47
  # num_batches is the number of batches for every epoch
48
  training_losses = np.zeros(num_batches * n_epochs)
49
  training_accuracies = np.zeros(num_batches * n_epochs)
50
 
51
-
52
  # store validation accuracy for every epoch
53
  val_accuracies = np.zeros(n_epochs)
 
54
  # keep track of best validation accuracy and best model
55
  best_accuracy = 0.0
56
 
57
-
58
-
59
  #----------------------
60
  # training loop
61
  #----------------------
@@ -69,16 +76,14 @@ def train_model(
69
 
70
  # move to GPU memory
71
  inputs = batch["image"].to(device)
72
- labels = batch["label"].to(device)
73
 
74
  # flatten if not cnn REVISE LATER
75
  if flatten_input:
76
  inputs = inputs.view(inputs.size(0), -1)
77
 
78
-
79
  optimizer.zero_grad()
80
 
81
-
82
  # Forward pass
83
  outputs = model(inputs)
84
  loss = criterion(outputs, labels)
@@ -92,40 +97,31 @@ def train_model(
92
  # log the loss value
93
  training_losses[epoch * num_batches + i] = loss.item()
94
 
95
- # Compute accuracy of the batch.
96
-
97
-
98
  #updates the accuracy computation with new data
99
  train_accuracy_fn.update(outputs, labels)
100
 
101
  #compute accuracy with the current data
102
  training_accuracies[epoch * num_batches + i] = train_accuracy_fn.compute().item()
103
 
104
-
105
- # display some progress (every 200 batches)
106
- # optional, you can comment out
107
- # if i % 200 == 0:
108
- # print(f'Epoch {epoch + 1}, batch {i+1} of {len(train_loader)}')
109
-
110
  print(f'Epoch {epoch + 1} training complete')
111
 
112
- # Validation after each epoch
 
 
 
113
  model.eval()
114
  val_accuracy_fn.reset()
115
 
116
 
117
- # The context 'torch.no_grad()' tells pytorch we are not interested in computing
118
- # gradients here, so forward pass is more efficient
119
  with torch.no_grad():
120
- for i, batch in enumerate(val_loader):
121
  inputs = batch["image"].to(device)
122
- labels = batch["label"].to(device)
123
 
124
  # flatten if not cnn REVISE LATER
125
  if flatten_input:
126
  inputs = inputs.view(inputs.size(0), -1)
127
 
128
-
129
  outputs = model(inputs)
130
 
131
  val_accuracy_fn.update(outputs, labels)
@@ -133,7 +129,6 @@ def train_model(
133
  current_accuracy = val_accuracy_fn.compute().item()
134
  val_accuracies[epoch] = current_accuracy
135
 
136
-
137
  # keep track of best validation accuracy and save best model so far
138
  if current_accuracy > best_accuracy:
139
  best_accuracy = current_accuracy
@@ -144,10 +139,12 @@ def train_model(
144
  print(f"\nTraining finished. Best val accuracy: {best_accuracy:.4f}")
145
  print(f"Best model weights saved to: {save_path}")
146
 
147
- return training_losses, training_accuracies, val_accuracies, best_accuracy
 
 
 
 
 
148
 
 
149
 
150
- #tweak later
151
- #best_model = MNISTNet().to(device)
152
- #best_model.load_state_dict(
153
- # torch.load('mnist-torch-best_model.pt', map_location=device))
 
2
  import torch.nn as nn
3
  import numpy as np
4
  from torcheval.metrics import MulticlassAccuracy
 
 
 
 
5
  from torch.utils.data import DataLoader
 
6
 
 
7
 
8
+ # fix errors in runtime
9
 
10
 
11
  def train_model(
 
20
  num_classes : int = 39,
21
 
22
  ):
23
+ """
24
+ Trains the given model and returns:
25
+ - training_losses: numpy array of loss per batch
26
+ - training_accuracies: numpy array of running accuracy per batch
27
+ - val_accuracies: numpy array of accuracy per epoch
28
+ - best_accuracy: highest validation accuracy achieved
29
+
30
+ Expected batch format:
31
+ batch["image"] → Tensor [B, C, H, W]
32
+ batch["label"] → Tensor [B] with class IDs (int64)
33
+ Model output:
34
+ outputs → Tensor [B, num_classes] (logits)
35
+ """
36
 
37
 
38
  # Move model to device
 
49
  # Arrays to log metrics
50
  num_batches = len(train_loader)
51
 
52
+ if num_batches == 0:
53
+ raise RuntimeError("UH OH!!!! empty train loader")
54
+
55
  # Store training losses and accuracies for every batch
56
  # num_batches is the number of batches for every epoch
57
  training_losses = np.zeros(num_batches * n_epochs)
58
  training_accuracies = np.zeros(num_batches * n_epochs)
59
 
 
60
  # store validation accuracy for every epoch
61
  val_accuracies = np.zeros(n_epochs)
62
+
63
  # keep track of best validation accuracy and best model
64
  best_accuracy = 0.0
65
 
 
 
66
  #----------------------
67
  # training loop
68
  #----------------------
 
76
 
77
  # move to GPU memory
78
  inputs = batch["image"].to(device)
79
+ labels = batch["label"].to(device).long()
80
 
81
  # flatten if not cnn REVISE LATER
82
  if flatten_input:
83
  inputs = inputs.view(inputs.size(0), -1)
84
 
 
85
  optimizer.zero_grad()
86
 
 
87
  # Forward pass
88
  outputs = model(inputs)
89
  loss = criterion(outputs, labels)
 
97
  # log the loss value
98
  training_losses[epoch * num_batches + i] = loss.item()
99
 
 
 
 
100
  #updates the accuracy computation with new data
101
  train_accuracy_fn.update(outputs, labels)
102
 
103
  #compute accuracy with the current data
104
  training_accuracies[epoch * num_batches + i] = train_accuracy_fn.compute().item()
105
 
 
 
 
 
 
 
106
  print(f'Epoch {epoch + 1} training complete')
107
 
108
+ # ----------------------
109
+ # validation loop
110
+ # ----------------------
111
+
112
  model.eval()
113
  val_accuracy_fn.reset()
114
 
115
 
 
 
116
  with torch.no_grad():
117
+ for batch in val_loader:
118
  inputs = batch["image"].to(device)
119
+ labels = batch["label"].to(device).long()
120
 
121
  # flatten if not cnn REVISE LATER
122
  if flatten_input:
123
  inputs = inputs.view(inputs.size(0), -1)
124
 
 
125
  outputs = model(inputs)
126
 
127
  val_accuracy_fn.update(outputs, labels)
 
129
  current_accuracy = val_accuracy_fn.compute().item()
130
  val_accuracies[epoch] = current_accuracy
131
 
 
132
  # keep track of best validation accuracy and save best model so far
133
  if current_accuracy > best_accuracy:
134
  best_accuracy = current_accuracy
 
139
  print(f"\nTraining finished. Best val accuracy: {best_accuracy:.4f}")
140
  print(f"Best model weights saved to: {save_path}")
141
 
142
+ training_metrics = {
143
+ "losses": training_losses,
144
+ "accuracies": training_accuracies,
145
+ "val_accuracies": val_accuracies,
146
+ "best_accuracy": best_accuracy,
147
+ }
148
 
149
+ return training_metrics
150
 
 
 
 
 
trainingModel/__init__.py ADDED
File without changes
trainingModel/run_training.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+
4
+ from clearml import Task, Dataset
5
+ from datasets import load_dataset
6
+ from dataPrep.helpers.transforms_loaders import make_dataset_loaders
7
+
8
+ import torch
9
+ from models.modelOne import modelOne
10
+ from trainingModel.Training import train_model
11
+
12
+
13
+ # -------------- Load Data --------------
14
+
15
+ all_tasks = Task.get_tasks(project_name="Small Group Project")
16
+ if not all_tasks:
17
+ raise RuntimeError("No tasks found in project 'Small Group Project'")
18
+
19
+ dp_tasks = [t for t in all_tasks if t.name == "Data Preparation"]
20
+ if not dp_tasks:
21
+ raise RuntimeError("No 'Data Preparation' tasks found in this project!")
22
+
23
+ # Latest Data Prep Task
24
+ latest_task = max(dp_tasks, key=lambda t: t.id)
25
+ DYNAMIC_TASK_ID = latest_task.id
26
+ DATA_PREP = Task.get_task(task_id=DYNAMIC_TASK_ID)
27
+
28
+ # Dataset ID
29
+ config_objects = DATA_PREP.get_configuration_objects()
30
+ raw_meta = config_objects["Dataset Metadata"]
31
+ dataset_id = raw_meta.split("=")[1].strip().replace('"', "")
32
+
33
+ # Load ClearML Dataset
34
+ subset_clearml = Dataset.get(dataset_id=dataset_id)
35
+ local_folder = subset_clearml.get_local_copy()
36
+
37
+ subset_indices = np.load(os.path.join(local_folder, "subset_indices.npy"))
38
+
39
+ # Load Dataset Parameters
40
+ data_params = DATA_PREP.get_parameters()
41
+ dataset_link = data_params['General/dataset/link']
42
+
43
+ # Load Full Dataset
44
+ try:
45
+ ds = load_dataset(dataset_link)
46
+ except Exception as e:
47
+ raise RuntimeError(f"Error loading the dataset: {e}")
48
+
49
+ full_dataset = ds['train']
50
+
51
+
52
+
53
+ # Apply subset indices to full dataset - this gives you the same subset as data prep
54
+ subset_dataset = full_dataset.select(subset_indices)
55
+
56
+
57
+ # Extract parameters from data prep task - these will create the DataLoaders
58
+ seed = int(data_params['General/seed'])
59
+ batch_size = int(data_params['General/dataloaders/batch_size'])
60
+ test_size = float(data_params['General/dataloaders/test_size'])
61
+
62
+ aug_config = {
63
+ 'rotation': float(data_params['General/augmentation/rotation']),
64
+ 'brightness': float(data_params['General/augmentation/brightness']),
65
+ 'saturation': float(data_params['General/augmentation/saturation']),
66
+ 'blur': float(data_params['General/augmentation/blur'])
67
+ }
68
+
69
+ # Create DataLoaders using the parameters from data prep
70
+ subset_loaders = make_dataset_loaders(
71
+ subset_dataset, seed, batch_size, test_size, aug_config
72
+ )
73
+
74
+ print("\n--- Handoff Test Successful ---")
75
+ print(f"Prototype Train loader batches: {len(subset_loaders['train'])}")
76
+ print(f"Prototype Validation loader batches: {len(subset_loaders['val'])}")
77
+ print(f"Prototype Test loader batches: {len(subset_loaders['test'])}")
78
+
79
+
80
+ full_loaders = make_dataset_loaders(
81
+ full_dataset, seed, batch_size, test_size, aug_config
82
+ )
83
+
84
+ print("\n--- Handoff Test Successful ---")
85
+ print(f"Train loader batches: {len(full_loaders['train'])}")
86
+ print(f"Validation loader batches: {len(full_loaders['val'])}")
87
+ print(f"Test loader batches: {len(full_loaders['test'])}")
88
+ # -------------- DATA PREP ENDS --------------
89
+
90
+
91
+ # -------- ClearML Training Task Setup --------
92
+ training_task = Task.init(
93
+ project_name="Small Group Project",
94
+ task_name="Model Training",
95
+ reuse_last_task_id=False,
96
+ )
97
+
98
+ training_logger = training_task.get_logger()
99
+ training_task.connect({"data_prep_task_used": DYNAMIC_TASK_ID})
100
+
101
+ # Training parameters - Modify these to experiment
102
+ training_config = {
103
+ "num_classes": 39,
104
+ "n_epochs": 1,
105
+ "learning_rate": 1e-3,
106
+ "batch_size": batch_size,
107
+ "save_path": "best_model.pt",
108
+ }
109
+ training_task.connect(training_config)
110
+
111
+
112
+ # -------- Build the ML model --------
113
+ model = modelOne(noOfClasses=training_config["num_classes"])
114
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
115
+
116
+
117
+ # ------- Train the model (on subset for now) -------
118
+
119
+ print("\n--- Starting Model Training on Subset ---")
120
+ training_metrics = train_model(
121
+ model=model,
122
+ train_loader=subset_loaders['train'],
123
+ val_loader=subset_loaders['val'],
124
+ device=device,
125
+ n_epochs=training_config["n_epochs"],
126
+ lr=training_config["learning_rate"],
127
+ save_path=training_config["save_path"],
128
+ )
129
+
130
+
131
+ # ----------- Log metrics to ClearML -----------
132
+ # Per-batch training losses and accuracies
133
+ for i, loss in enumerate(training_metrics["losses"]):
134
+ training_logger.report_scalar("train", "loss_per_batch", value=loss, iteration=i)
135
+
136
+ for i, acc in enumerate(training_metrics["accuracies"]):
137
+ training_logger.report_scalar("train", "accuracy_per_batch", value=acc, iteration=i)
138
+
139
+ # Per-epoch validation accuracy
140
+ for epoch, acc in enumerate(training_metrics["val_accuracies"]):
141
+ training_logger.report_scalar("validation", "accuracy_per_epoch", value=acc, iteration=epoch)
142
+
143
+ training_logger.report_single_value("best_val_accuracy", training_metrics["best_accuracy"])
144
+
145
+ # Upload best model as artifact
146
+ training_task.upload_artifact("best_model", training_config["save_path"])
147
+
148
+ print("\nTraining complete.")
149
+ training_task.close()