Spaces:
Sleeping
Sleeping
| from clearml import Task | |
| from dataPrep.helpers.clearml_data import extract_latest_data_task | |
| import torch | |
| from models.modelOne import modelOne | |
| from models.modelTwo import BetterCNN | |
| from trainingModel.helpers.Training import train_model | |
| # -------------- Load Data -------------- | |
| NUM_WORKERS = 0 | |
| project_name = "Small Group Project" | |
| subset_loaders, full_loaders, data_prep_metadata = extract_latest_data_task(project_name=project_name, num_workers=NUM_WORKERS) | |
| # -------- ClearML Training Task Setup -------- | |
| training_task = Task.init( | |
| project_name=f"{project_name}/Model Training", | |
| task_name="Model Training", | |
| reuse_last_task_id=False, | |
| ) | |
| # Detail the data prep task used | |
| training_logger = training_task.get_logger() | |
| training_task.connect(data_prep_metadata, name="data_prep_metadata_READONLY") | |
| # Training parameters - Modify these to experiment | |
| training_config = { | |
| "num_classes": 39, | |
| "n_epochs": 1, | |
| "learning_rate": 1e-3, | |
| "optimizer": "adam", | |
| "save_path": "best_model.pt", | |
| "num_workers": NUM_WORKERS | |
| } | |
| training_task.connect(training_config) | |
| # -------- Build the ML model -------- | |
| model = BetterCNN(noOfClasses=training_config["num_classes"]) | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model.to(device) | |
| # Print device info | |
| print(f"\n**Using device: {device}**\n") | |
| if device.type == 'cuda': | |
| print(f"GPU Name: {torch.cuda.get_device_name(0)}") | |
| # ------- Train the model (on subset for now) ------- | |
| print("\n--- Starting Model Training on Subset ---") | |
| training_metrics = train_model( | |
| model=model, | |
| train_loader=subset_loaders['train'], | |
| val_loader=subset_loaders['val'], | |
| n_epochs=training_config["n_epochs"], | |
| lr=training_config["learning_rate"], | |
| num_classes=training_config["num_classes"], | |
| save_path=training_config["save_path"], | |
| early_stop=3, | |
| ) | |
| # ----------- Log metrics to ClearML ----------- | |
| # Per-epoch training losses and accuracies | |
| for epoch, loss in enumerate(training_metrics["losses"]): | |
| training_logger.report_scalar("training epoch loss", "loss", value=loss, iteration=epoch) | |
| for epoch, acc in enumerate(training_metrics["accuracies"]): | |
| training_logger.report_scalar("training epoch accuracy", "accuracy", value=acc, iteration=epoch) | |
| # Per-epoch validation accuracies | |
| for epoch, acc in enumerate(training_metrics["val_accuracies"]): | |
| training_logger.report_scalar("validation epoch accuracy", "accuracy", value=acc, iteration=epoch) | |
| # Best validation accuracy | |
| training_logger.report_single_value("best_val_accuracy", training_metrics["best_accuracy"]) | |
| # Upload best model as artifact | |
| training_task.upload_artifact("best_model", training_config["save_path"]) | |
| print("\nTraining complete.") | |
| training_task.close() |