Spaces:

k23064919
/

smallGroupProject

Sleeping

App Files Files Community

ra1425 commited on Nov 6, 2025

Commit

6f31a0a

1 Parent(s): 42b46e3

REF: Rearrange code for readability

Browse files

Files changed (1) hide show

data_preparation.py +8 -4

data_preparation.py CHANGED Viewed

@@ -19,6 +19,7 @@ from torch.utils.data import DataLoader
 # --- Experiment Tracking ---
 from clearml import Task, Logger
 # Setting up the SEED to be able to repeat experiments
 SEED = 42
 random.seed(SEED)
@@ -84,6 +85,7 @@ task.connect_configuration(
     {"subset_ratio": SUBSET_RATIO},
     name="Data subsetting"
 )
 # Calculate amount of samples we use
 subset_size = int(data_length * SUBSET_RATIO)
@@ -98,6 +100,7 @@ print("✅ Checkpoint: Prototyping dataset is created")
 #Verifying
 print(f"Prototyping dataset size: {len(prototyping_dataset)}")
 # ---- Exploratory data analysis (EDA) ----
 # Reformatting the label feature to understand bias
@@ -152,6 +155,7 @@ plt.title('Class distribution among chosen samples')
 plot_file = 'class_distribution.png'
 plt.savefig(plot_file)
 clearml_logger.report_image(
     title="EDA",                    # The title for the plot section in ClearML
     series="Class Distribution",    # The name of this specific plot
@@ -165,7 +169,7 @@ print("✅ Checkpoint: Plot with classes distributions is created and saved")
 # --------------- Data Splits ------------
-def get_prototype_loaders(batch_size=32) :
     # Standard ImageNet mean and std
     # These values are used to normalize the tensors
@@ -223,9 +227,9 @@ def get_prototype_loaders(batch_size=32) :
     proto_test_split.set_transform(normalisation_pipeline)
     # -- Creating the prototype dataloaders --
-    proto_train_loader = DataLoader(dataset = proto_train_split, batch_size = batch_size, shuffle = True)
-    proto_val_loader = DataLoader(dataset = proto_val_split, batch_size = batch_size, shuffle = False)
-    proto_test_loader = DataLoader(dataset = proto_test_split, batch_size = batch_size, shuffle = False)
     print("✅ Checkpoint: DataLoaders are set")
     return proto_train_loader, proto_val_loader, proto_test_loader

 # --- Experiment Tracking ---
 from clearml import Task, Logger
 # Setting up the SEED to be able to repeat experiments
 SEED = 42
 random.seed(SEED)
     {"subset_ratio": SUBSET_RATIO},
     name="Data subsetting"
 )
 # Calculate amount of samples we use
 subset_size = int(data_length * SUBSET_RATIO)
 #Verifying
 print(f"Prototyping dataset size: {len(prototyping_dataset)}")
 # ---- Exploratory data analysis (EDA) ----
 # Reformatting the label feature to understand bias
 plot_file = 'class_distribution.png'
 plt.savefig(plot_file)
 clearml_logger.report_image(
     title="EDA",                    # The title for the plot section in ClearML
     series="Class Distribution",    # The name of this specific plot
 # --------------- Data Splits ------------
+def get_prototype_loaders(batch_size=32):
     # Standard ImageNet mean and std
     # These values are used to normalize the tensors
     proto_test_split.set_transform(normalisation_pipeline)
     # -- Creating the prototype dataloaders --
+    proto_train_loader = DataLoader(dataset = proto_train_split, batch_size = batch_size, shuffle = True )
+    proto_val_loader = DataLoader(dataset = proto_val_split, batch_size = batch_size, shuffle = False )
+    proto_test_loader = DataLoader(dataset = proto_test_split, batch_size = batch_size, shuffle = False )
     print("✅ Checkpoint: DataLoaders are set")
     return proto_train_loader, proto_val_loader, proto_test_loader