Spaces:
Sleeping
Sleeping
| from clearml import Task | |
| from dataPrep.helpers.clearml_data import extract_latest_data_task | |
| import torch | |
| from models.modelOne import modelOne | |
| from models.modelTwo import BetterCNN | |
| from testingModel.helpers.evaluation import make_predictions, class_accuracies, plot_class_accuracies | |
| # -------------- Load Data -------------- | |
| project_name = "Small Group Project" | |
| subset_loaders, full_loaders, data_prep_metadata = extract_latest_data_task(project_name=project_name) | |
| # -------- ClearML Testing Task Setup -------- | |
| testing_task = Task.init( | |
| project_name=f"{project_name}/Model Testing", | |
| task_name="Model Testing", | |
| task_type=Task.TaskTypes.testing, | |
| reuse_last_task_id=False, | |
| ) | |
| # Reference the data prep task used | |
| testing_logger = testing_task.get_logger() | |
| testing_task.connect(data_prep_metadata, name="data_prep_metadata_READONLY") | |
| CLEARML_TRAINING_ID = "dca82d7c2f404c249f2e5325aaf77207" | |
| # Testing parameters - Modify these when experimenting | |
| testing_config = { | |
| "model_train_id": CLEARML_TRAINING_ID, | |
| "num_classes": 39, | |
| "model_path": "best_model.pt", | |
| } | |
| testing_task.connect(testing_config) | |
| # Load the model weights from ClearML training task | |
| training_task = Task.get_task(task_id=testing_config["model_train_id"]) | |
| model_artifact = training_task.artifacts.get("best_model") | |
| model_path = model_artifact.get_local_copy() | |
| # Reference training metadata | |
| training_hyperparams = training_task.get_parameters_as_dict() | |
| testing_task.connect(training_hyperparams['General'], name="training_metadata_READONLY") | |
| # -------- Rebuild the ML model -------- | |
| model = BetterCNN(noOfClasses=testing_config["num_classes"]) | |
| state_dict = torch.load(model_path, map_location="cpu") # Load to CPU first | |
| model.load_state_dict(state_dict) | |
| model.eval() # set dropout & batch norm layers to eval mode | |
| # Move model to GPU if available | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model.to(device) | |
| # -------------------- Test model on test set -------------------- | |
| testing_logger.report_text("Starting evaluation on TEST SUBSET...\n") | |
| test_subset = subset_loaders['test'] | |
| subset_results = make_predictions(model, test_subset, device) | |
| # Accuracy & Loss logging | |
| testing_logger.report_single_value(name="Test Subset Accuracy", value=subset_results["accuracy"]) | |
| testing_logger.report_single_value(name="Test Subset Loss", value=subset_results["loss"]) | |
| # Compute per-class accuracy | |
| preds = subset_results["predictions"] | |
| labels = subset_results["labels"] | |
| class_acc = class_accuracies( | |
| labels, | |
| preds, | |
| num_classes=testing_config["num_classes"] | |
| ) | |
| # Plot with formatted class names | |
| class_names = subset_loaders['classNames'] | |
| formatted_class_names = [" ".join(name.replace('_', ' ').split()) for name in class_names] | |
| acc_fig = plot_class_accuracies(class_acc, formatted_class_names) | |
| # Log accuracies plot to ClearML | |
| testing_logger.report_matplotlib_figure( | |
| title="Subset Per-Class Accuracy", | |
| series="Class Accuracy", | |
| figure=acc_fig | |
| ) | |
| # --------- Complete ----------------- | |
| print("\n------ Testing Complete ------") | |
| testing_logger.report_text( | |
| f"TEST SUBSET RESULTS:\n" | |
| f"Loss: {subset_results['loss']:.4f}\n" | |
| f"Accuracy: {subset_results['accuracy']:.4f}\n" | |
| ) | |
| testing_task.close() | |