Yusuf commited on
Commit
9dbc9de
·
1 Parent(s): 2ace27a

FIX: modify positional params in aug pipeline

Browse files
dataPrep/data_preparation.py CHANGED
@@ -45,10 +45,33 @@ if torch.cuda.is_available():
45
 
46
 
47
  # ----- ClearML Setup -----
48
- task = Task.init(project_name='Small Group Project', task_name='data_prep')
 
 
 
 
49
  task.set_random_seed(SEED)
50
  clearml_logger = task.get_logger()
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  # ----- Load a subset from a given dataset & track with ClearML -----
53
  data_plants, prototyping_dataset, features, clearml_dataset = make_subset(
54
  DATASET_LINK, DATASET_SUBSET_RATIO, clearml_logger
@@ -110,12 +133,11 @@ plt.title("Class Distribution in Prototype Dataset")
110
  plt.xlabel("Class")
111
  plt.ylabel("Count")
112
  plt.tight_layout()
113
- plt.savefig("class_distribution.png")
114
 
115
- clearml_logger.report_image(
116
  title="EDA Class Distribution",
117
  series="Prototype Subset",
118
- local_path="class_distribution.png",
119
  iteration=1
120
  )
121
 
@@ -140,6 +162,13 @@ if __name__ == "__main__":
140
  print(f"Prototype Validation loader batches: {len(prototype_loaders['val'])}")
141
  print(f"Prototype Test loader batches: {len(prototype_loaders['test'])}")
142
 
 
 
 
 
 
 
 
143
  final_loaders = make_dataset_loaders(
144
  data_plants, SEED, BATCH_SIZE, TEST_SIZE, aug_config
145
  )
@@ -154,6 +183,8 @@ if __name__ == "__main__":
154
  {"dataset_id": clearml_dataset.id},
155
  name="Dataset Metadata"
156
  )
 
 
157
 
158
  # Close the ClearML task
159
  task.close()
 
45
 
46
 
47
  # ----- ClearML Setup -----
48
+ task = Task.init(
49
+ project_name='Small Group Project',
50
+ task_name='Data Preparation',
51
+ task_type=Task.TaskTypes.data_processing
52
+ )
53
  task.set_random_seed(SEED)
54
  clearml_logger = task.get_logger()
55
 
56
+ # --- Track full configuration in ClearML ---
57
+ task.connect({
58
+ "seed": SEED,
59
+ "dataset": {
60
+ "link": DATASET_LINK,
61
+ "subset_ratio": DATASET_SUBSET_RATIO,
62
+ },
63
+ "augmentation": {
64
+ "rotation": ROTATION,
65
+ "brightness": BRIGHTNESS,
66
+ "saturation": SATURATION,
67
+ "blur": BLUR
68
+ },
69
+ "dataloaders": {
70
+ "batch_size": BATCH_SIZE,
71
+ "test_size": TEST_SIZE
72
+ }
73
+ })
74
+
75
  # ----- Load a subset from a given dataset & track with ClearML -----
76
  data_plants, prototyping_dataset, features, clearml_dataset = make_subset(
77
  DATASET_LINK, DATASET_SUBSET_RATIO, clearml_logger
 
133
  plt.xlabel("Class")
134
  plt.ylabel("Count")
135
  plt.tight_layout()
 
136
 
137
+ clearml_logger.report_matplotlib_figure(
138
  title="EDA Class Distribution",
139
  series="Prototype Subset",
140
+ figure=plt.gcf(),
141
  iteration=1
142
  )
143
 
 
162
  print(f"Prototype Validation loader batches: {len(prototype_loaders['val'])}")
163
  print(f"Prototype Test loader batches: {len(prototype_loaders['test'])}")
164
 
165
+ clearml_logger.report_text(
166
+ f"Prototype loaders created: "
167
+ f"train={len(prototype_loaders['train'])}, "
168
+ f"val={len(prototype_loaders['val'])}, "
169
+ f"test={len(prototype_loaders['test'])}"
170
+ )
171
+
172
  final_loaders = make_dataset_loaders(
173
  data_plants, SEED, BATCH_SIZE, TEST_SIZE, aug_config
174
  )
 
183
  {"dataset_id": clearml_dataset.id},
184
  name="Dataset Metadata"
185
  )
186
+ task.mark_completed()
187
+
188
 
189
  # Close the ClearML task
190
  task.close()
dataPrep/helpers/create_dataset.py CHANGED
@@ -39,6 +39,10 @@ def make_subset(dataset_link, subset_ratio, clearml_logger):
39
  dataset_tags=["prototype", "subset"],
40
  use_current_task=True
41
  )
 
 
 
 
42
 
43
  # Save indices
44
  subset_path = "subset_indices.npy"
 
39
  dataset_tags=["prototype", "subset"],
40
  use_current_task=True
41
  )
42
+ clearml_dataset.add_tags([
43
+ f"subset_ratio_{subset_ratio}",
44
+ "hf_source"
45
+ ])
46
 
47
  # Save indices
48
  subset_path = "subset_indices.npy"
dataPrep/helpers/transforms_loaders.py CHANGED
@@ -38,7 +38,7 @@ def make_augment_pipeline(aug_config):
38
 
39
  # Randomly changing some parameters of pictures to enrich dataset
40
  transforms.RandomRotation(rotation),
41
- transforms.ColorJitter(brightness, saturation),
42
  transforms.GaussianBlur(blur),
43
  transforms.ToTensor(),
44
  transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)
 
38
 
39
  # Randomly changing some parameters of pictures to enrich dataset
40
  transforms.RandomRotation(rotation),
41
+ transforms.ColorJitter(brightness=brightness, saturation=saturation),
42
  transforms.GaussianBlur(blur),
43
  transforms.ToTensor(),
44
  transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD)