File size: 1,519 Bytes
bd33eac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
"""
Tests the correct computation of evaluation scores from BinaryClassificationEvaluator
"""
from __future__ import annotations
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from sentence_transformers import (
evaluation,
)
def test_BinaryClassificationEvaluator_find_best_f1_and_threshold() -> None:
"""Tests that the F1 score for the computed threshold is correct"""
y_true = np.random.randint(0, 2, 1000)
y_pred_cosine = np.random.randn(1000)
(
best_f1,
best_precision,
best_recall,
threshold,
) = evaluation.BinaryClassificationEvaluator.find_best_f1_and_threshold(
y_pred_cosine, y_true, high_score_more_similar=True
)
y_pred_labels = [1 if pred >= threshold else 0 for pred in y_pred_cosine]
sklearn_f1score = f1_score(y_true, y_pred_labels)
assert np.abs(best_f1 - sklearn_f1score) < 1e-6
def test_BinaryClassificationEvaluator_find_best_accuracy_and_threshold() -> None:
"""Tests that the Acc score for the computed threshold is correct"""
y_true = np.random.randint(0, 2, 1000)
y_pred_cosine = np.random.randn(1000)
(
max_acc,
threshold,
) = evaluation.BinaryClassificationEvaluator.find_best_acc_and_threshold(
y_pred_cosine, y_true, high_score_more_similar=True
)
y_pred_labels = [1 if pred >= threshold else 0 for pred in y_pred_cosine]
sklearn_acc = accuracy_score(y_true, y_pred_labels)
assert np.abs(max_acc - sklearn_acc) < 1e-6
|