Spaces:
Sleeping
Sleeping
ra1425
commited on
Commit
·
18d7ed3
1
Parent(s):
f597d2e
Add dynamic Task ID tracking to automatcally get latest task created by Data Prep
Browse files- trainingModel/run_training.py +30 -19
trainingModel/run_training.py
CHANGED
|
@@ -10,14 +10,36 @@ from models.modelOne import modelOne
|
|
| 10 |
from trainingModel.Training import train_model
|
| 11 |
|
| 12 |
|
| 13 |
-
#
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
|
|
|
| 17 |
data_params = DATA_PREP.get_parameters()
|
| 18 |
dataset_link = data_params['General/dataset/link']
|
| 19 |
|
| 20 |
-
# Load
|
| 21 |
try:
|
| 22 |
ds = load_dataset(dataset_link)
|
| 23 |
except Exception as e:
|
|
@@ -26,16 +48,6 @@ except Exception as e:
|
|
| 26 |
full_dataset = ds['train']
|
| 27 |
|
| 28 |
|
| 29 |
-
# Load the subset indices from ClearML
|
| 30 |
-
SUBSET_ID = "f6888baedc7142fcad9e0cc6837c5cb5"
|
| 31 |
-
subset_clearml = Dataset.get(dataset_id=SUBSET_ID)
|
| 32 |
-
|
| 33 |
-
local_folder = subset_clearml.get_local_copy()
|
| 34 |
-
subset_indices_path = os.path.join(local_folder, "subset_indices.npy")
|
| 35 |
-
subset_indices = np.load(subset_indices_path)
|
| 36 |
-
|
| 37 |
-
print("Loaded subset indices:", subset_indices.shape)
|
| 38 |
-
|
| 39 |
|
| 40 |
# Apply subset indices to full dataset - this gives you the same subset as data prep
|
| 41 |
subset_dataset = full_dataset.select(subset_indices)
|
|
@@ -80,10 +92,10 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
| 80 |
|
| 81 |
|
| 82 |
# ------- Train the model (on subset for now) -------
|
| 83 |
-
|
| 84 |
-
When calling this function, the model should be trained on the given dataset
|
| 85 |
-
|
| 86 |
|
|
|
|
| 87 |
train_model(
|
| 88 |
model=model,
|
| 89 |
train_loader=subset_loaders['train'],
|
|
@@ -92,5 +104,4 @@ train_model(
|
|
| 92 |
n_epochs=10,
|
| 93 |
lr=1e-3,
|
| 94 |
save_path="best_model.pt",
|
| 95 |
-
)
|
| 96 |
-
'''
|
|
|
|
| 10 |
from trainingModel.Training import train_model
|
| 11 |
|
| 12 |
|
| 13 |
+
# Latest Data Prep Task
|
| 14 |
+
all_tasks = Task.get_tasks(project_name="Small Group Project")
|
| 15 |
+
if not all_tasks:
|
| 16 |
+
raise RuntimeError("No tasks found in project 'Small Group Project'")
|
| 17 |
+
|
| 18 |
+
dp_tasks = [t for t in all_tasks if t.name == "Data Preparation"]
|
| 19 |
+
if not dp_tasks:
|
| 20 |
+
raise RuntimeError("No 'Data Preparation' tasks found in this project!")
|
| 21 |
+
|
| 22 |
+
latest_task = max(dp_tasks, key=lambda t: t.id)
|
| 23 |
+
DYNAMIC_TASK_ID = latest_task.id
|
| 24 |
+
DATA_PREP = Task.get_task(task_id=DYNAMIC_TASK_ID)
|
| 25 |
+
|
| 26 |
+
# Dataset ID
|
| 27 |
+
config_objects = DATA_PREP.get_configuration_objects()
|
| 28 |
+
raw_meta = config_objects["Dataset Metadata"]
|
| 29 |
+
dataset_id = raw_meta.split("=")[1].strip().replace('"', "")
|
| 30 |
+
|
| 31 |
+
# Load ClearML Dataset
|
| 32 |
+
subset_clearml = Dataset.get(dataset_id=dataset_id)
|
| 33 |
+
local_folder = subset_clearml.get_local_copy()
|
| 34 |
+
|
| 35 |
+
subset_indices_path = os.path.join(local_folder, "subset_indices.npy")
|
| 36 |
+
subset_indices = np.load(subset_indices_path)
|
| 37 |
|
| 38 |
+
# Load Dataset Parameters
|
| 39 |
data_params = DATA_PREP.get_parameters()
|
| 40 |
dataset_link = data_params['General/dataset/link']
|
| 41 |
|
| 42 |
+
# Load Full Dataset
|
| 43 |
try:
|
| 44 |
ds = load_dataset(dataset_link)
|
| 45 |
except Exception as e:
|
|
|
|
| 48 |
full_dataset = ds['train']
|
| 49 |
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
# Apply subset indices to full dataset - this gives you the same subset as data prep
|
| 53 |
subset_dataset = full_dataset.select(subset_indices)
|
|
|
|
| 92 |
|
| 93 |
|
| 94 |
# ------- Train the model (on subset for now) -------
|
| 95 |
+
|
| 96 |
+
#When calling this function, the model should be trained on the given dataset
|
|
|
|
| 97 |
|
| 98 |
+
print("\n--- Starting Model Training on Subset ---")
|
| 99 |
train_model(
|
| 100 |
model=model,
|
| 101 |
train_loader=subset_loaders['train'],
|
|
|
|
| 104 |
n_epochs=10,
|
| 105 |
lr=1e-3,
|
| 106 |
save_path="best_model.pt",
|
| 107 |
+
)
|
|
|