from clearml import Task from dataPrep.helpers.clearml_data import extract_latest_data_task import torch from models.modelOne import modelOne from models.modelTwo import BetterCNN from testingModel.helpers.evaluation import make_predictions, class_accuracies, plot_class_accuracies # -------------- Load Data -------------- project_name = "Small Group Project" subset_loaders, full_loaders, data_prep_metadata = extract_latest_data_task(project_name=project_name) # -------- ClearML Testing Task Setup -------- testing_task = Task.init( project_name=f"{project_name}/Model Testing", task_name="Model Testing", task_type=Task.TaskTypes.testing, reuse_last_task_id=False, ) # Reference the data prep task used testing_logger = testing_task.get_logger() testing_task.connect(data_prep_metadata, name="data_prep_metadata_READONLY") CLEARML_TRAINING_ID = "dca82d7c2f404c249f2e5325aaf77207" # Testing parameters - Modify these when experimenting testing_config = { "model_train_id": CLEARML_TRAINING_ID, "num_classes": 39, "model_path": "best_model.pt", } testing_task.connect(testing_config) # Load the model weights from ClearML training task training_task = Task.get_task(task_id=testing_config["model_train_id"]) model_artifact = training_task.artifacts.get("best_model") model_path = model_artifact.get_local_copy() # Reference training metadata training_hyperparams = training_task.get_parameters_as_dict() testing_task.connect(training_hyperparams['General'], name="training_metadata_READONLY") # -------- Rebuild the ML model -------- model = BetterCNN(noOfClasses=testing_config["num_classes"]) state_dict = torch.load(model_path, map_location="cpu") # Load to CPU first model.load_state_dict(state_dict) model.eval() # set dropout & batch norm layers to eval mode # Move model to GPU if available device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) # -------------------- Test model on test set -------------------- testing_logger.report_text("Starting evaluation on TEST SUBSET...\n") test_subset = subset_loaders['test'] subset_results = make_predictions(model, test_subset, device) # Accuracy & Loss logging testing_logger.report_single_value(name="Test Subset Accuracy", value=subset_results["accuracy"]) testing_logger.report_single_value(name="Test Subset Loss", value=subset_results["loss"]) # Compute per-class accuracy preds = subset_results["predictions"] labels = subset_results["labels"] class_acc = class_accuracies( labels, preds, num_classes=testing_config["num_classes"] ) # Plot with formatted class names class_names = subset_loaders['classNames'] formatted_class_names = [" ".join(name.replace('_', ' ').split()) for name in class_names] acc_fig = plot_class_accuracies(class_acc, formatted_class_names) # Log accuracies plot to ClearML testing_logger.report_matplotlib_figure( title="Subset Per-Class Accuracy", series="Class Accuracy", figure=acc_fig ) # --------- Complete ----------------- print("\n------ Testing Complete ------") testing_logger.report_text( f"TEST SUBSET RESULTS:\n" f"Loss: {subset_results['loss']:.4f}\n" f"Accuracy: {subset_results['accuracy']:.4f}\n" ) testing_task.close()