Image Classification
English
zhanwang's picture
update
377dccd verified
# Copyright 2020-present, Pietro Buzzega, Matteo Boschini, Angelo Porrello, Davide Abati, Simone Calderara.
# All rights reserved.
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
import numpy as np
import torch
from sklearn import metrics
def backward_transfer(results):
n_tasks = len(results)
li = []
for i in range(n_tasks - 1):
li.append(results[-1][i] - results[i][i])
return np.mean(li)
def forward_transfer(results, random_results):
n_tasks = len(results)
li = []
for i in range(1, n_tasks):
li.append(results[i - 1][i] - random_results[i])
return np.mean(li)
def forgetting(results):
n_tasks = len(results)
li = []
for i in range(n_tasks - 1):
results[i] += [0.0] * (n_tasks - len(results[i]))
np_res = np.array(results)
maxx = np.max(np_res, axis=0)
for i in range(n_tasks - 1):
li.append(maxx[i] - results[-1][i])
return np.mean(li)
def calc_aurc_eaurc(softmax, correct):
softmax = np.array(softmax)
correctness = np.array(correct)
softmax_max = np.max(softmax, 1)
sort_values = sorted(zip(softmax_max[:], correctness[:]), key=lambda x:x[0], reverse=True)
sort_softmax_max, sort_correctness = zip(*sort_values)
risk_li, coverage_li = coverage_risk(sort_softmax_max, sort_correctness)
aurc, eaurc = aurc_eaurc(risk_li)
return aurc, eaurc
def calc_fpr_aupr(softmax, correct):
softmax = np.array(softmax)
correctness = np.array(correct)
softmax_max = np.max(softmax, 1)
fpr, tpr, thresholds = metrics.roc_curve(correctness, softmax_max)
auroc = metrics.auc(fpr, tpr)
idx_tpr_95 = np.argmin(np.abs(tpr - 0.95))
fpr_in_tpr_95 = fpr[idx_tpr_95]
precision, recall, thresholds = metrics.precision_recall_curve(correctness, softmax_max)
aupr_success = metrics.auc(recall, precision)
aupr_err = metrics.average_precision_score(-1 * correctness + 1, -1 * softmax_max)
return auroc, aupr_success, aupr_err, fpr_in_tpr_95
def calc_ace(softmax_outputs, targets, num_bins=15):
"""
Calculate Adaptive Calibration Error (ACE)
Args:
softmax_outputs: numpy array of shape (n_samples, n_classes) - softmax probabilities
targets: numpy array of shape (n_samples,) - true labels
num_bins: number of bins for calibration
Returns:
ace: Adaptive Calibration Error value
"""
confidences = np.max(softmax_outputs, axis=1)
predictions = np.argmax(softmax_outputs, axis=1)
accuracies = (predictions == targets).astype(float)
bin_boundaries = np.quantile(confidences, np.linspace(0, 1, num_bins + 1))
bin_boundaries[0] = 0.0
bin_boundaries[-1] = 1.0
bin_boundaries = np.unique(bin_boundaries)
actual_num_bins = len(bin_boundaries) - 1
ace = 0.0
total_samples = len(confidences)
for i in range(actual_num_bins):
bin_lower = bin_boundaries[i]
bin_upper = bin_boundaries[i + 1]
if i == actual_num_bins - 1:
in_bin = (confidences >= bin_lower) & (confidences <= bin_upper)
else:
in_bin = (confidences >= bin_lower) & (confidences < bin_upper)
if np.sum(in_bin) > 0:
bin_confidence = np.mean(confidences[in_bin])
bin_accuracy = np.mean(accuracies[in_bin])
bin_size = np.sum(in_bin)
ace += (bin_size / total_samples) * abs(bin_confidence - bin_accuracy)
return ace
def calc_ece(softmax, label, bins=15):
bin_boundaries = torch.linspace(0, 1, bins + 1)
bin_lowers = bin_boundaries[:-1]
bin_uppers = bin_boundaries[1:]
softmax = torch.tensor(softmax)
labels = torch.tensor(label)
softmax_max, predictions = torch.max(softmax, 1)
correctness = predictions.eq(labels.long())
ece = torch.zeros(1)
for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
in_bin = softmax_max.gt(bin_lower.item()) * softmax_max.le(bin_upper.item())
prop_in_bin = in_bin.float().mean()
if prop_in_bin.item() > 0.0:
accuracy_in_bin = correctness[in_bin].float().mean()
avg_confidence_in_bin = softmax_max[in_bin].mean()
ece += torch.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin
return ece.item()
# NLL & Brier Score
def calc_nll_brier(softmax, logit, label):
nb_cls = logit.shape[1]
label_onehot = np.eye(nb_cls)[label]
brier_score = np.mean(np.sum((softmax - label_onehot) ** 2, axis=1))
logit = torch.tensor(logit, dtype=torch.float)
label = torch.tensor(label, dtype=torch.int)
logsoftmax = torch.nn.LogSoftmax(dim=1)
log_softmax = logsoftmax(logit)
nll = calc_nll(log_softmax, label)
return nll.item(), brier_score
# Calc NLL
def calc_nll(log_softmax, label):
out = torch.zeros_like(label, dtype=torch.float)
for i in range(len(label)):
out[i] = log_softmax[i][label[i]]
return -out.sum()/len(out)
# Calc coverage, risk
def coverage_risk(confidence, correctness):
risk_list = []
coverage_list = []
risk = 0
for i in range(len(confidence)):
coverage = (i + 1) / len(confidence)
coverage_list.append(coverage)
if correctness[i] == 0:
risk += 1
risk_list.append(risk / (i + 1))
return risk_list, coverage_list
# Calc aurc, eaurc
def aurc_eaurc(risk_list):
r = risk_list[-1]
risk_coverage_curve_area = 0
optimal_risk_area = r + (1 - r) * np.log(1 - r)
for risk_value in risk_list:
risk_coverage_curve_area += risk_value * (1 / len(risk_list))
aurc = risk_coverage_curve_area
eaurc = risk_coverage_curve_area - optimal_risk_area
return aurc, eaurc