ZeroShot-AD / evaluation /utils /metrics.py
HoomKh's picture
files
e5461d8 verified
# -----------------------------------------------------------------------------
# Do Not Alter This File!
# -----------------------------------------------------------------------------
# The following code is part of the logic used for loading and evaluating your
# output scores. Please DO NOT modify this section, as upon your submission,
# the whole evaluation logic will be overwritten by the original code.
# -----------------------------------------------------------------------------
import numpy as np
from sklearn.metrics import (
auc,
roc_auc_score,
average_precision_score,
precision_recall_curve,
)
from skimage import measure
import warnings
# ref: https://github.com/gudovskiy/cflow-ad/blob/master/train.py
def cal_pro_score(masks, amaps, max_step=200, expect_fpr=0.3):
binary_amaps = np.zeros_like(amaps, dtype=bool)
min_th, max_th = amaps.min(), amaps.max()
delta = (max_th - min_th) / max_step
pros, fprs, ths = [], [], []
for th in np.arange(min_th, max_th, delta):
binary_amaps[amaps <= th], binary_amaps[amaps > th] = 0, 1
pro = []
for binary_amap, mask in zip(binary_amaps, masks):
for region in measure.regionprops(measure.label(mask)):
tp_pixels = binary_amap[region.coords[:, 0], region.coords[:, 1]].sum()
pro.append(tp_pixels / region.area)
inverse_masks = 1 - masks
fp_pixels = np.logical_and(inverse_masks, binary_amaps).sum()
fpr = fp_pixels / inverse_masks.sum()
pros.append(np.array(pro).mean())
fprs.append(fpr)
ths.append(th)
pros, fprs, ths = np.array(pros), np.array(fprs), np.array(ths)
idxes = fprs < expect_fpr
fprs = fprs[idxes]
print("fprs: ", fprs)
fprs = (fprs - fprs.min()) / (fprs.max() - fprs.min())
pro_auc = auc(fprs, pros[idxes])
return pro_auc
def compute_metrics(gt_sp=None, pr_sp=None, gt_px=None, pr_px=None):
# classification
if (
gt_sp is None
or pr_sp is None
or gt_sp.sum() == 0
or gt_sp.sum() == gt_sp.shape[0]
):
auroc_sp, f1_sp, ap_sp = 0, 0, 0
else:
auroc_sp = roc_auc_score(gt_sp, pr_sp)
ap_sp = average_precision_score(gt_sp, pr_sp)
precisions, recalls, thresholds = precision_recall_curve(gt_sp, pr_sp)
f1_scores = (2 * precisions * recalls) / (precisions + recalls)
f1_sp = np.max(f1_scores[np.isfinite(f1_scores)])
# segmentation
if gt_px is None or pr_px is None or gt_px.sum() == 0:
auroc_px, f1_px, ap_px, aupro = 0, 0, 0, 0
else:
auroc_px = roc_auc_score(gt_px.ravel(), pr_px.ravel())
ap_px = average_precision_score(gt_px.ravel(), pr_px.ravel())
precisions, recalls, thresholds = precision_recall_curve(
gt_px.ravel(), pr_px.ravel()
)
f1_scores = (2 * precisions * recalls) / (precisions + recalls)
f1_px = np.max(f1_scores[np.isfinite(f1_scores)])
aupro = cal_pro_score(gt_px.squeeze(), pr_px.squeeze())
image_metric = [auroc_sp, f1_sp, ap_sp]
pixel_metric = [auroc_px, f1_px, ap_px, aupro]
return image_metric, pixel_metric
def compute_auroc(labels, scores):
"""
Computes the Area Under the Receiver Operating Characteristic Curve (AUROC).
Args:
labels (list or np.ndarray): True binary labels (0 for normal, 1 for anomaly).
scores (list or np.ndarray): Predicted scores or probabilities for the positive class.
Returns:
float: AUROC score. Returns None if AUROC is undefined.
"""
# Convert inputs to numpy arrays
labels = np.array(labels)
scores = np.array(scores)
# Ensure that labels are binary
unique_labels = np.unique(labels)
if set(unique_labels) != {0, 1}:
raise ValueError(f"Labels must be binary (0 and 1). Found labels: {unique_labels}")
# Check if both classes are present
if len(unique_labels) < 2:
warnings.warn("Only one class present in labels. AUROC is undefined.")
return None
try:
auroc = roc_auc_score(labels, scores)
return auroc
except ValueError as e:
warnings.warn(f"Error computing AUROC: {e}")
return None