File size: 3,344 Bytes
84cfdfc
 
 
 
 
 
ed657fc
84cfdfc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ed657fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84cfdfc
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from clearml import Task
from dataPrep.helpers.clearml_data import extract_latest_data_task

import torch
from models.modelOne import modelOne
from models.modelTwo import BetterCNN
from testingModel.helpers.evaluation import make_predictions, class_accuracies, plot_class_accuracies


# -------------- Load Data --------------
project_name = "Small Group Project"
subset_loaders, full_loaders, data_prep_metadata = extract_latest_data_task(project_name=project_name)


# -------- ClearML Testing Task Setup --------
testing_task = Task.init(
    project_name=f"{project_name}/Model Testing",
    task_name="Model Testing",
    task_type=Task.TaskTypes.testing,
    reuse_last_task_id=False,
)

# Reference the data prep task used
testing_logger = testing_task.get_logger()
testing_task.connect(data_prep_metadata, name="data_prep_metadata_READONLY")

CLEARML_TRAINING_ID = "dca82d7c2f404c249f2e5325aaf77207"

# Testing parameters - Modify these when experimenting
testing_config = {
    "model_train_id": CLEARML_TRAINING_ID,
    "num_classes": 39,
    "model_path": "best_model.pt",
}
testing_task.connect(testing_config)

# Load the model weights from ClearML training task
training_task = Task.get_task(task_id=testing_config["model_train_id"])
model_artifact = training_task.artifacts.get("best_model")
model_path = model_artifact.get_local_copy()

# Reference training metadata
training_hyperparams = training_task.get_parameters_as_dict()
testing_task.connect(training_hyperparams['General'], name="training_metadata_READONLY")


# -------- Rebuild the ML model --------
model = BetterCNN(noOfClasses=testing_config["num_classes"])
state_dict = torch.load(model_path, map_location="cpu") # Load to CPU first
model.load_state_dict(state_dict)
model.eval() # set dropout & batch norm layers to eval mode

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


# -------------------- Test model on test set --------------------
testing_logger.report_text("Starting evaluation on TEST SUBSET...\n")
test_subset = subset_loaders['test']

subset_results = make_predictions(model, test_subset, device)


# Accuracy & Loss logging
testing_logger.report_single_value(name="Test Subset Accuracy", value=subset_results["accuracy"])
testing_logger.report_single_value(name="Test Subset Loss", value=subset_results["loss"])

# Compute per-class accuracy
preds = subset_results["predictions"]
labels = subset_results["labels"]
class_acc = class_accuracies(
    labels,
    preds,
    num_classes=testing_config["num_classes"]
)

# Plot with formatted class names
class_names = subset_loaders['classNames']
formatted_class_names = [" ".join(name.replace('_', ' ').split()) for name in class_names]
acc_fig = plot_class_accuracies(class_acc, formatted_class_names)

# Log accuracies plot to ClearML
testing_logger.report_matplotlib_figure(
    title="Subset Per-Class Accuracy",
    series="Class Accuracy",
    figure=acc_fig
)


# --------- Complete -----------------
print("\n------ Testing Complete ------")
testing_logger.report_text(
    f"TEST SUBSET RESULTS:\n"
    f"Loss: {subset_results['loss']:.4f}\n"
    f"Accuracy: {subset_results['accuracy']:.4f}\n"
)
testing_task.close()