| """ | |
| Tests the correct computation of evaluation scores from BinaryClassificationEvaluator | |
| """ | |
| from __future__ import annotations | |
| import numpy as np | |
| from sklearn.metrics import accuracy_score, f1_score | |
| from sentence_transformers import ( | |
| evaluation, | |
| ) | |
| def test_BinaryClassificationEvaluator_find_best_f1_and_threshold() -> None: | |
| """Tests that the F1 score for the computed threshold is correct""" | |
| y_true = np.random.randint(0, 2, 1000) | |
| y_pred_cosine = np.random.randn(1000) | |
| ( | |
| best_f1, | |
| best_precision, | |
| best_recall, | |
| threshold, | |
| ) = evaluation.BinaryClassificationEvaluator.find_best_f1_and_threshold( | |
| y_pred_cosine, y_true, high_score_more_similar=True | |
| ) | |
| y_pred_labels = [1 if pred >= threshold else 0 for pred in y_pred_cosine] | |
| sklearn_f1score = f1_score(y_true, y_pred_labels) | |
| assert np.abs(best_f1 - sklearn_f1score) < 1e-6 | |
| def test_BinaryClassificationEvaluator_find_best_accuracy_and_threshold() -> None: | |
| """Tests that the Acc score for the computed threshold is correct""" | |
| y_true = np.random.randint(0, 2, 1000) | |
| y_pred_cosine = np.random.randn(1000) | |
| ( | |
| max_acc, | |
| threshold, | |
| ) = evaluation.BinaryClassificationEvaluator.find_best_acc_and_threshold( | |
| y_pred_cosine, y_true, high_score_more_similar=True | |
| ) | |
| y_pred_labels = [1 if pred >= threshold else 0 for pred in y_pred_cosine] | |
| sklearn_acc = accuracy_score(y_true, y_pred_labels) | |
| assert np.abs(max_acc - sklearn_acc) < 1e-6 | |