ra1425 commited on
Commit
83d4d7f
·
1 Parent(s): 18d7ed3

Fix: Conflict of formats between Data prep and Training, fix Tensor size

Browse files
dataPrep/helpers/create_dataset.py CHANGED
@@ -35,13 +35,14 @@ def make_subset(dataset_link, subset_ratio, clearml_logger):
35
  subset_indices = indices[:subset_size]
36
 
37
  prototyping_dataset = data_plants.select(subset_indices)
38
-
39
- # ---------- Register subset in ClearML ----------
 
40
  clearml_dataset = Dataset.create(
41
  dataset_name="Plant Village Prototype",
42
  dataset_project="Small Group Project",
43
  dataset_tags=["prototype", "subset"],
44
- use_current_task=True
45
  )
46
  clearml_dataset.add_tags([
47
  f"subset_ratio_{subset_ratio}",
 
35
  subset_indices = indices[:subset_size]
36
 
37
  prototyping_dataset = data_plants.select(subset_indices)
38
+ # I THINK WE NEED TO REMOVE THIS LATER
39
+ # We dont really need to upload subset everytime (Im not sure tho)
40
+ # Register subset in ClearML
41
  clearml_dataset = Dataset.create(
42
  dataset_name="Plant Village Prototype",
43
  dataset_project="Small Group Project",
44
  dataset_tags=["prototype", "subset"],
45
+ use_current_task=False
46
  )
47
  clearml_dataset.add_tags([
48
  f"subset_ratio_{subset_ratio}",
dataPrep/helpers/transforms_loaders.py CHANGED
@@ -9,13 +9,13 @@ from torch.utils.data import DataLoader
9
  # Standard ImageNet mean and std - Used to normalize the tensors
10
  IMAGENET_MEAN = [0.485, 0.456, 0.406]
11
  IMAGENET_STD = [0.229, 0.224, 0.225]
12
-
13
  # Defines and returns the normalization pipeline.
14
  def make_norm_pipeline():
15
 
16
  # Pipeline ensures image format is consistent (for Val/Test)
17
  normalisation = transforms.Compose([
18
-
19
  # Convert PIL Image to a PyTorch Tensor, scales pixel values from [0, 255] to [0.0, 1.0]
20
  transforms.ToTensor(),
21
 
@@ -35,7 +35,7 @@ def make_augment_pipeline(aug_config):
35
 
36
  # Augmentation pipeline (to create "new" images by changing some parameters)
37
  augmentation = transforms.Compose([
38
-
39
  # Randomly changing some parameters of pictures to enrich dataset
40
  transforms.RandomRotation(rotation),
41
  transforms.ColorJitter(brightness=brightness, saturation=saturation),
@@ -57,6 +57,14 @@ def make_dataset_loaders(dataset, seed, batch_size, test_size, aug_config):
57
  normalisation = make_norm_pipeline()
58
  augmentation = make_augment_pipeline(aug_config)
59
 
 
 
 
 
 
 
 
 
60
  # 70/30 split creates train set
61
  split_1 = dataset.train_test_split(test_size=test_size, seed=seed)
62
  train_split = split_1['train']
@@ -68,9 +76,9 @@ def make_dataset_loaders(dataset, seed, batch_size, test_size, aug_config):
68
  val_split, test_split = split_2['train'], split_2['test']
69
 
70
  # Put each split through pipelines
71
- train_split.set_transform(augmentation)
72
- val_split.set_transform(normalisation)
73
- test_split.set_transform(normalisation)
74
 
75
  # Create dataloader for each
76
  train_loader = DataLoader(train_split, batch_size=batch_size, shuffle=True)
 
9
  # Standard ImageNet mean and std - Used to normalize the tensors
10
  IMAGENET_MEAN = [0.485, 0.456, 0.406]
11
  IMAGENET_STD = [0.229, 0.224, 0.225]
12
+ IMAGE_SIZE = (256, 256)
13
  # Defines and returns the normalization pipeline.
14
  def make_norm_pipeline():
15
 
16
  # Pipeline ensures image format is consistent (for Val/Test)
17
  normalisation = transforms.Compose([
18
+ transforms.Resize(IMAGE_SIZE),
19
  # Convert PIL Image to a PyTorch Tensor, scales pixel values from [0, 255] to [0.0, 1.0]
20
  transforms.ToTensor(),
21
 
 
35
 
36
  # Augmentation pipeline (to create "new" images by changing some parameters)
37
  augmentation = transforms.Compose([
38
+ transforms.Resize(IMAGE_SIZE),
39
  # Randomly changing some parameters of pictures to enrich dataset
40
  transforms.RandomRotation(rotation),
41
  transforms.ColorJitter(brightness=brightness, saturation=saturation),
 
57
  normalisation = make_norm_pipeline()
58
  augmentation = make_augment_pipeline(aug_config)
59
 
60
+ def apply_augmentation(batch):
61
+ batch['image'] = [augmentation(x) for x in batch['image']]
62
+ return batch
63
+
64
+ def apply_normalisation(batch):
65
+ batch['image'] = [normalisation(x) for x in batch['image']]
66
+ return batch
67
+
68
  # 70/30 split creates train set
69
  split_1 = dataset.train_test_split(test_size=test_size, seed=seed)
70
  train_split = split_1['train']
 
76
  val_split, test_split = split_2['train'], split_2['test']
77
 
78
  # Put each split through pipelines
79
+ train_split.set_transform(apply_augmentation)
80
+ val_split.set_transform(apply_normalisation)
81
+ test_split.set_transform(apply_normalisation)
82
 
83
  # Create dataloader for each
84
  train_loader = DataLoader(train_split, batch_size=batch_size, shuffle=True)