Yusuf commited on
Commit
8e6181a
·
1 Parent(s): ca4c582

FEAT: load clearml dataset to prepare for training

Browse files
Files changed (1) hide show
  1. trainingModel/run_training.py +96 -0
trainingModel/run_training.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+
4
+ from clearml import Task, Dataset
5
+ from datasets import load_dataset
6
+ from dataPrep.helpers.transforms_loaders import make_dataset_loaders
7
+
8
+ import torch
9
+ from models.modelOne import modelOne
10
+ from trainingModel.Training import train_model
11
+
12
+
13
+ # Load data prep task from ClearML
14
+ DATA_PREP_TASK_ID = "f6888baedc7142fcad9e0cc6837c5cb5"
15
+ DATA_PREP = Task.get_task(task_id=DATA_PREP_TASK_ID)
16
+
17
+ data_params = DATA_PREP.get_parameters()
18
+ dataset_link = data_params['General/dataset/link']
19
+
20
+ # Load the whole dataset
21
+ try:
22
+ ds = load_dataset(dataset_link)
23
+ except Exception as e:
24
+ raise RuntimeError(f"Error loading the dataset: {e}")
25
+
26
+ full_dataset = ds['train']
27
+
28
+
29
+ # Load the subset indices from ClearML
30
+ SUBSET_ID = "f6888baedc7142fcad9e0cc6837c5cb5"
31
+ subset_clearml = Dataset.get(dataset_id=SUBSET_ID)
32
+
33
+ local_folder = subset_clearml.get_local_copy()
34
+ subset_indices_path = os.path.join(local_folder, "subset_indices.npy")
35
+ subset_indices = np.load(subset_indices_path)
36
+
37
+ print("Loaded subset indices:", subset_indices.shape)
38
+
39
+
40
+ # Apply subset indices to full dataset - this gives you the same subset as data prep
41
+ subset_dataset = full_dataset.select(subset_indices)
42
+
43
+
44
+ # Extract parameters from data prep task - these will create the DataLoaders
45
+ seed = int(data_params['General/seed'])
46
+ batch_size = int(data_params['General/dataloaders/batch_size'])
47
+ test_size = float(data_params['General/dataloaders/test_size'])
48
+
49
+ aug_config = {
50
+ 'rotation': float(data_params['General/augmentation/rotation']),
51
+ 'brightness': float(data_params['General/augmentation/brightness']),
52
+ 'saturation': float(data_params['General/augmentation/saturation']),
53
+ 'blur': float(data_params['General/augmentation/blur'])
54
+ }
55
+
56
+ # Create DataLoaders using the parameters from data prep
57
+ subset_loaders = make_dataset_loaders(
58
+ subset_dataset, seed, batch_size, test_size, aug_config
59
+ )
60
+
61
+ print("\n--- Handoff Test Successful ---")
62
+ print(f"Prototype Train loader batches: {len(subset_loaders['train'])}")
63
+ print(f"Prototype Validation loader batches: {len(subset_loaders['val'])}")
64
+ print(f"Prototype Test loader batches: {len(subset_loaders['test'])}")
65
+
66
+
67
+ full_loaders = make_dataset_loaders(
68
+ full_dataset, seed, batch_size, test_size, aug_config
69
+ )
70
+
71
+ print("\n--- Handoff Test Successful ---")
72
+ print(f"Train loader batches: {len(full_loaders['train'])}")
73
+ print(f"Validation loader batches: {len(full_loaders['val'])}")
74
+ print(f"Test loader batches: {len(full_loaders['test'])}")
75
+
76
+
77
+ # -------- Build the ML model --------
78
+ model = modelOne(noOfClasses=39)
79
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
80
+
81
+
82
+ # ------- Train the model (on subset for now) -------
83
+ '''
84
+ When calling this function, the model should be trained on the given dataset
85
+
86
+
87
+ train_model(
88
+ model=model,
89
+ train_loader=subset_loaders['train'],
90
+ val_loader=subset_loaders['val'],
91
+ device=device,
92
+ n_epochs=10,
93
+ lr=1e-3,
94
+ save_path="best_model.pt",
95
+ )
96
+ '''