ra1425 commited on
Commit
18d7ed3
·
1 Parent(s): f597d2e

Add dynamic Task ID tracking to automatcally get latest task created by Data Prep

Browse files
Files changed (1) hide show
  1. trainingModel/run_training.py +30 -19
trainingModel/run_training.py CHANGED
@@ -10,14 +10,36 @@ from models.modelOne import modelOne
10
  from trainingModel.Training import train_model
11
 
12
 
13
- # Load data prep task from ClearML
14
- DATA_PREP_TASK_ID = "f6888baedc7142fcad9e0cc6837c5cb5"
15
- DATA_PREP = Task.get_task(task_id=DATA_PREP_TASK_ID)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
 
17
  data_params = DATA_PREP.get_parameters()
18
  dataset_link = data_params['General/dataset/link']
19
 
20
- # Load the whole dataset
21
  try:
22
  ds = load_dataset(dataset_link)
23
  except Exception as e:
@@ -26,16 +48,6 @@ except Exception as e:
26
  full_dataset = ds['train']
27
 
28
 
29
- # Load the subset indices from ClearML
30
- SUBSET_ID = "f6888baedc7142fcad9e0cc6837c5cb5"
31
- subset_clearml = Dataset.get(dataset_id=SUBSET_ID)
32
-
33
- local_folder = subset_clearml.get_local_copy()
34
- subset_indices_path = os.path.join(local_folder, "subset_indices.npy")
35
- subset_indices = np.load(subset_indices_path)
36
-
37
- print("Loaded subset indices:", subset_indices.shape)
38
-
39
 
40
  # Apply subset indices to full dataset - this gives you the same subset as data prep
41
  subset_dataset = full_dataset.select(subset_indices)
@@ -80,10 +92,10 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
80
 
81
 
82
  # ------- Train the model (on subset for now) -------
83
- '''
84
- When calling this function, the model should be trained on the given dataset
85
-
86
 
 
87
  train_model(
88
  model=model,
89
  train_loader=subset_loaders['train'],
@@ -92,5 +104,4 @@ train_model(
92
  n_epochs=10,
93
  lr=1e-3,
94
  save_path="best_model.pt",
95
- )
96
- '''
 
10
  from trainingModel.Training import train_model
11
 
12
 
13
+ # Latest Data Prep Task
14
+ all_tasks = Task.get_tasks(project_name="Small Group Project")
15
+ if not all_tasks:
16
+ raise RuntimeError("No tasks found in project 'Small Group Project'")
17
+
18
+ dp_tasks = [t for t in all_tasks if t.name == "Data Preparation"]
19
+ if not dp_tasks:
20
+ raise RuntimeError("No 'Data Preparation' tasks found in this project!")
21
+
22
+ latest_task = max(dp_tasks, key=lambda t: t.id)
23
+ DYNAMIC_TASK_ID = latest_task.id
24
+ DATA_PREP = Task.get_task(task_id=DYNAMIC_TASK_ID)
25
+
26
+ # Dataset ID
27
+ config_objects = DATA_PREP.get_configuration_objects()
28
+ raw_meta = config_objects["Dataset Metadata"]
29
+ dataset_id = raw_meta.split("=")[1].strip().replace('"', "")
30
+
31
+ # Load ClearML Dataset
32
+ subset_clearml = Dataset.get(dataset_id=dataset_id)
33
+ local_folder = subset_clearml.get_local_copy()
34
+
35
+ subset_indices_path = os.path.join(local_folder, "subset_indices.npy")
36
+ subset_indices = np.load(subset_indices_path)
37
 
38
+ # Load Dataset Parameters
39
  data_params = DATA_PREP.get_parameters()
40
  dataset_link = data_params['General/dataset/link']
41
 
42
+ # Load Full Dataset
43
  try:
44
  ds = load_dataset(dataset_link)
45
  except Exception as e:
 
48
  full_dataset = ds['train']
49
 
50
 
 
 
 
 
 
 
 
 
 
 
51
 
52
  # Apply subset indices to full dataset - this gives you the same subset as data prep
53
  subset_dataset = full_dataset.select(subset_indices)
 
92
 
93
 
94
  # ------- Train the model (on subset for now) -------
95
+
96
+ #When calling this function, the model should be trained on the given dataset
 
97
 
98
+ print("\n--- Starting Model Training on Subset ---")
99
  train_model(
100
  model=model,
101
  train_loader=subset_loaders['train'],
 
104
  n_epochs=10,
105
  lr=1e-3,
106
  save_path="best_model.pt",
107
+ )