from clearml import Task from dataPrep.helpers.clearml_data import extract_latest_data_task import torch from models.modelOne import modelOne from models.modelTwo import BetterCNN from trainingModel.helpers.Training import train_model # -------------- Load Data -------------- NUM_WORKERS = 0 project_name = "Small Group Project" subset_loaders, full_loaders, data_prep_metadata = extract_latest_data_task(project_name=project_name, num_workers=NUM_WORKERS) # -------- ClearML Training Task Setup -------- training_task = Task.init( project_name=f"{project_name}/Model Training", task_name="Model Training", reuse_last_task_id=False, ) # Detail the data prep task used training_logger = training_task.get_logger() training_task.connect(data_prep_metadata, name="data_prep_metadata_READONLY") # Training parameters - Modify these to experiment training_config = { "num_classes": 39, "n_epochs": 1, "learning_rate": 1e-3, "optimizer": "adam", "save_path": "best_model.pt", "num_workers": NUM_WORKERS } training_task.connect(training_config) # -------- Build the ML model -------- model = BetterCNN(noOfClasses=training_config["num_classes"]) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # Print device info print(f"\n**Using device: {device}**\n") if device.type == 'cuda': print(f"GPU Name: {torch.cuda.get_device_name(0)}") # ------- Train the model (on subset for now) ------- print("\n--- Starting Model Training on Subset ---") training_metrics = train_model( model=model, train_loader=subset_loaders['train'], val_loader=subset_loaders['val'], n_epochs=training_config["n_epochs"], lr=training_config["learning_rate"], num_classes=training_config["num_classes"], save_path=training_config["save_path"], early_stop=3, ) # ----------- Log metrics to ClearML ----------- # Per-epoch training losses and accuracies for epoch, loss in enumerate(training_metrics["losses"]): training_logger.report_scalar("training epoch loss", "loss", value=loss, iteration=epoch) for epoch, acc in enumerate(training_metrics["accuracies"]): training_logger.report_scalar("training epoch accuracy", "accuracy", value=acc, iteration=epoch) # Per-epoch validation accuracies for epoch, acc in enumerate(training_metrics["val_accuracies"]): training_logger.report_scalar("validation epoch accuracy", "accuracy", value=acc, iteration=epoch) # Best validation accuracy training_logger.report_single_value("best_val_accuracy", training_metrics["best_accuracy"]) # Upload best model as artifact training_task.upload_artifact("best_model", training_config["save_path"]) print("\nTraining complete.") training_task.close()