update

377dccd verified 3 months ago

5.81 kB

	# Copyright 2020-present, Pietro Buzzega, Matteo Boschini, Angelo Porrello, Davide Abati, Simone Calderara.
	# All rights reserved.
	# This source code is licensed under the license found in the
	# LICENSE file in the root directory of this source tree.

	import numpy as np
	import torch
	from sklearn import metrics

	def backward_transfer(results):
	n_tasks = len(results)
	li = []
	for i in range(n_tasks - 1):
	li.append(results[-1][i] - results[i][i])

	return np.mean(li)

	def forward_transfer(results, random_results):
	n_tasks = len(results)
	li = []
	for i in range(1, n_tasks):
	li.append(results[i - 1][i] - random_results[i])

	return np.mean(li)

	def forgetting(results):
	n_tasks = len(results)
	li = []
	for i in range(n_tasks - 1):
	results[i] += [0.0] * (n_tasks - len(results[i]))
	np_res = np.array(results)
	maxx = np.max(np_res, axis=0)
	for i in range(n_tasks - 1):
	li.append(maxx[i] - results[-1][i])

	return np.mean(li)
	def calc_aurc_eaurc(softmax, correct):
	softmax = np.array(softmax)
	correctness = np.array(correct)
	softmax_max = np.max(softmax, 1)

	sort_values = sorted(zip(softmax_max[:], correctness[:]), key=lambda x:x[0], reverse=True)
	sort_softmax_max, sort_correctness = zip(*sort_values)
	risk_li, coverage_li = coverage_risk(sort_softmax_max, sort_correctness)
	aurc, eaurc = aurc_eaurc(risk_li)

	return aurc, eaurc

	def calc_fpr_aupr(softmax, correct):
	softmax = np.array(softmax)
	correctness = np.array(correct)
	softmax_max = np.max(softmax, 1)

	fpr, tpr, thresholds = metrics.roc_curve(correctness, softmax_max)
	auroc = metrics.auc(fpr, tpr)
	idx_tpr_95 = np.argmin(np.abs(tpr - 0.95))
	fpr_in_tpr_95 = fpr[idx_tpr_95]

	precision, recall, thresholds = metrics.precision_recall_curve(correctness, softmax_max)
	aupr_success = metrics.auc(recall, precision)
	aupr_err = metrics.average_precision_score(-1 * correctness + 1, -1 * softmax_max)


	return auroc, aupr_success, aupr_err, fpr_in_tpr_95

	def calc_ace(softmax_outputs, targets, num_bins=15):
	"""
	Calculate Adaptive Calibration Error (ACE)

	Args:
	softmax_outputs: numpy array of shape (n_samples, n_classes) - softmax probabilities
	targets: numpy array of shape (n_samples,) - true labels
	num_bins: number of bins for calibration

	Returns:
	ace: Adaptive Calibration Error value
	"""

	confidences = np.max(softmax_outputs, axis=1)
	predictions = np.argmax(softmax_outputs, axis=1)

	accuracies = (predictions == targets).astype(float)

	bin_boundaries = np.quantile(confidences, np.linspace(0, 1, num_bins + 1))
	bin_boundaries[0] = 0.0
	bin_boundaries[-1] = 1.0

	bin_boundaries = np.unique(bin_boundaries)
	actual_num_bins = len(bin_boundaries) - 1

	ace = 0.0
	total_samples = len(confidences)

	for i in range(actual_num_bins):
	bin_lower = bin_boundaries[i]
	bin_upper = bin_boundaries[i + 1]

	if i == actual_num_bins - 1:
	in_bin = (confidences >= bin_lower) & (confidences <= bin_upper)
	else:
	in_bin = (confidences >= bin_lower) & (confidences < bin_upper)

	if np.sum(in_bin) > 0:
	bin_confidence = np.mean(confidences[in_bin])
	bin_accuracy = np.mean(accuracies[in_bin])
	bin_size = np.sum(in_bin)
	ace += (bin_size / total_samples) * abs(bin_confidence - bin_accuracy)

	return ace

	def calc_ece(softmax, label, bins=15):
	bin_boundaries = torch.linspace(0, 1, bins + 1)
	bin_lowers = bin_boundaries[:-1]
	bin_uppers = bin_boundaries[1:]

	softmax = torch.tensor(softmax)
	labels = torch.tensor(label)

	softmax_max, predictions = torch.max(softmax, 1)
	correctness = predictions.eq(labels.long())

	ece = torch.zeros(1)

	for bin_lower, bin_upper in zip(bin_lowers, bin_uppers):
	in_bin = softmax_max.gt(bin_lower.item()) * softmax_max.le(bin_upper.item())
	prop_in_bin = in_bin.float().mean()

	if prop_in_bin.item() > 0.0:
	accuracy_in_bin = correctness[in_bin].float().mean()
	avg_confidence_in_bin = softmax_max[in_bin].mean()

	ece += torch.abs(avg_confidence_in_bin - accuracy_in_bin) * prop_in_bin

	return ece.item()

	# NLL & Brier Score
	def calc_nll_brier(softmax, logit, label):
	nb_cls = logit.shape[1]
	label_onehot = np.eye(nb_cls)[label]
	brier_score = np.mean(np.sum((softmax - label_onehot) ** 2, axis=1))

	logit = torch.tensor(logit, dtype=torch.float)
	label = torch.tensor(label, dtype=torch.int)
	logsoftmax = torch.nn.LogSoftmax(dim=1)

	log_softmax = logsoftmax(logit)
	nll = calc_nll(log_softmax, label)


	return nll.item(), brier_score

	# Calc NLL
	def calc_nll(log_softmax, label):
	out = torch.zeros_like(label, dtype=torch.float)
	for i in range(len(label)):
	out[i] = log_softmax[i][label[i]]

	return -out.sum()/len(out)

	# Calc coverage, risk
	def coverage_risk(confidence, correctness):
	risk_list = []
	coverage_list = []
	risk = 0
	for i in range(len(confidence)):
	coverage = (i + 1) / len(confidence)
	coverage_list.append(coverage)

	if correctness[i] == 0:
	risk += 1

	risk_list.append(risk / (i + 1))

	return risk_list, coverage_list

	# Calc aurc, eaurc
	def aurc_eaurc(risk_list):
	r = risk_list[-1]
	risk_coverage_curve_area = 0
	optimal_risk_area = r + (1 - r) * np.log(1 - r)
	for risk_value in risk_list:
	risk_coverage_curve_area += risk_value * (1 / len(risk_list))

	aurc = risk_coverage_curve_area
	eaurc = risk_coverage_curve_area - optimal_risk_area

	return aurc, eaurc