Yusuf
fix dataloader worker number
84cfdfc
from clearml import Task
from dataPrep.helpers.clearml_data import extract_latest_data_task
import torch
from models.modelOne import modelOne
from models.modelTwo import BetterCNN
from trainingModel.helpers.Training import train_model
# -------------- Load Data --------------
NUM_WORKERS = 0
project_name = "Small Group Project"
subset_loaders, full_loaders, data_prep_metadata = extract_latest_data_task(project_name=project_name, num_workers=NUM_WORKERS)
# -------- ClearML Training Task Setup --------
training_task = Task.init(
project_name=f"{project_name}/Model Training",
task_name="Model Training",
reuse_last_task_id=False,
)
# Detail the data prep task used
training_logger = training_task.get_logger()
training_task.connect(data_prep_metadata, name="data_prep_metadata_READONLY")
# Training parameters - Modify these to experiment
training_config = {
"num_classes": 39,
"n_epochs": 1,
"learning_rate": 1e-3,
"optimizer": "adam",
"save_path": "best_model.pt",
"num_workers": NUM_WORKERS
}
training_task.connect(training_config)
# -------- Build the ML model --------
model = BetterCNN(noOfClasses=training_config["num_classes"])
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
# Print device info
print(f"\n**Using device: {device}**\n")
if device.type == 'cuda':
print(f"GPU Name: {torch.cuda.get_device_name(0)}")
# ------- Train the model (on subset for now) -------
print("\n--- Starting Model Training on Subset ---")
training_metrics = train_model(
model=model,
train_loader=subset_loaders['train'],
val_loader=subset_loaders['val'],
n_epochs=training_config["n_epochs"],
lr=training_config["learning_rate"],
num_classes=training_config["num_classes"],
save_path=training_config["save_path"],
early_stop=3,
)
# ----------- Log metrics to ClearML -----------
# Per-epoch training losses and accuracies
for epoch, loss in enumerate(training_metrics["losses"]):
training_logger.report_scalar("training epoch loss", "loss", value=loss, iteration=epoch)
for epoch, acc in enumerate(training_metrics["accuracies"]):
training_logger.report_scalar("training epoch accuracy", "accuracy", value=acc, iteration=epoch)
# Per-epoch validation accuracies
for epoch, acc in enumerate(training_metrics["val_accuracies"]):
training_logger.report_scalar("validation epoch accuracy", "accuracy", value=acc, iteration=epoch)
# Best validation accuracy
training_logger.report_single_value("best_val_accuracy", training_metrics["best_accuracy"])
# Upload best model as artifact
training_task.upload_artifact("best_model", training_config["save_path"])
print("\nTraining complete.")
training_task.close()