Spaces:

k23064919
/

smallGroupProject

Sleeping

App Files Files Community

ra1425 commited on Nov 25, 2025

Commit

18d7ed3

1 Parent(s): f597d2e

Add dynamic Task ID tracking to automatcally get latest task created by Data Prep

Browse files

Files changed (1) hide show

trainingModel/run_training.py +30 -19

trainingModel/run_training.py CHANGED Viewed

@@ -10,14 +10,36 @@ from models.modelOne import modelOne
 from trainingModel.Training import train_model
-# Load data prep task from ClearML
-DATA_PREP_TASK_ID = "f6888baedc7142fcad9e0cc6837c5cb5"
-DATA_PREP = Task.get_task(task_id=DATA_PREP_TASK_ID)
 data_params = DATA_PREP.get_parameters()
 dataset_link = data_params['General/dataset/link']
-# Load the whole dataset
 try:
     ds = load_dataset(dataset_link)
 except Exception as e:
@@ -26,16 +48,6 @@ except Exception as e:
 full_dataset = ds['train']
-# Load the subset indices from ClearML
-SUBSET_ID = "f6888baedc7142fcad9e0cc6837c5cb5"
-subset_clearml = Dataset.get(dataset_id=SUBSET_ID)
-local_folder = subset_clearml.get_local_copy()
-subset_indices_path = os.path.join(local_folder, "subset_indices.npy")
-subset_indices = np.load(subset_indices_path)
-print("Loaded subset indices:", subset_indices.shape)
 # Apply subset indices to full dataset - this gives you the same subset as data prep
 subset_dataset = full_dataset.select(subset_indices)
@@ -80,10 +92,10 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 # ------- Train the model (on subset for now) -------
-'''
-When calling this function, the model should be trained on the given dataset
 train_model(
     model=model,
     train_loader=subset_loaders['train'],
@@ -92,5 +104,4 @@ train_model(
     n_epochs=10,
     lr=1e-3,
     save_path="best_model.pt",
-)
-'''

 from trainingModel.Training import train_model
+# Latest Data Prep Task
+all_tasks = Task.get_tasks(project_name="Small Group Project")
+if not all_tasks:
+    raise RuntimeError("No tasks found in project 'Small Group Project'")
+dp_tasks = [t for t in all_tasks if t.name == "Data Preparation"]
+if not dp_tasks:
+    raise RuntimeError("No 'Data Preparation' tasks found in this project!")
+latest_task = max(dp_tasks, key=lambda t: t.id)
+DYNAMIC_TASK_ID = latest_task.id
+DATA_PREP = Task.get_task(task_id=DYNAMIC_TASK_ID)
+# Dataset ID
+config_objects = DATA_PREP.get_configuration_objects()
+raw_meta = config_objects["Dataset Metadata"]
+dataset_id = raw_meta.split("=")[1].strip().replace('"', "")
+# Load ClearML Dataset
+subset_clearml = Dataset.get(dataset_id=dataset_id)
+local_folder = subset_clearml.get_local_copy()
+subset_indices_path = os.path.join(local_folder, "subset_indices.npy")
+subset_indices = np.load(subset_indices_path)
+# Load Dataset Parameters
 data_params = DATA_PREP.get_parameters()
 dataset_link = data_params['General/dataset/link']
+# Load Full Dataset
 try:
     ds = load_dataset(dataset_link)
 except Exception as e:
 full_dataset = ds['train']
 # Apply subset indices to full dataset - this gives you the same subset as data prep
 subset_dataset = full_dataset.select(subset_indices)
 # ------- Train the model (on subset for now) -------
+#When calling this function, the model should be trained on the given dataset
+print("\n--- Starting Model Training on Subset ---")
 train_model(
     model=model,
     train_loader=subset_loaders['train'],
     n_epochs=10,
     lr=1e-3,
     save_path="best_model.pt",
+)