File size: 2,702 Bytes
bd33eac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
import logging
from datasets import load_dataset
from sentence_transformers import SparseEncoder
from sentence_transformers.sparse_encoder.evaluation import SparseBinaryClassificationEvaluator
logging.basicConfig(format="%(message)s", level=logging.INFO)
# Initialize the SPLADE model
model = SparseEncoder("naver/splade-cocondenser-ensembledistil")
# Load a dataset with two text columns and a class label column (https://huggingface.co/datasets/sentence-transformers/quora-duplicates)
eval_dataset = load_dataset("sentence-transformers/quora-duplicates", "pair-class", split="train[-1000:]")
# Initialize the evaluator
binary_acc_evaluator = SparseBinaryClassificationEvaluator(
sentences1=eval_dataset["sentence1"],
sentences2=eval_dataset["sentence2"],
labels=eval_dataset["label"],
name="quora_duplicates_dev",
show_progress_bar=True,
similarity_fn_names=["cosine", "dot", "euclidean", "manhattan"],
)
results = binary_acc_evaluator(model)
"""
Accuracy with Cosine-Similarity: 75.00 (Threshold: 0.8668)
F1 with Cosine-Similarity: 67.22 (Threshold: 0.5974)
Precision with Cosine-Similarity: 54.18
Recall with Cosine-Similarity: 88.51
Average Precision with Cosine-Similarity: 67.81
Matthews Correlation with Cosine-Similarity: 49.56
Accuracy with Dot-Product: 76.50 (Threshold: 23.4236)
F1 with Dot-Product: 67.00 (Threshold: 19.0095)
Precision with Dot-Product: 55.93
Recall with Dot-Product: 83.54
Average Precision with Dot-Product: 65.89
Matthews Correlation with Dot-Product: 48.88
Accuracy with Euclidean-Distance: 67.70 (Threshold: -10.0041)
F1 with Euclidean-Distance: 48.60 (Threshold: -0.1876)
Precision with Euclidean-Distance: 32.13
Recall with Euclidean-Distance: 99.69
Average Precision with Euclidean-Distance: 20.52
Matthews Correlation with Euclidean-Distance: -4.59
Accuracy with Manhattan-Distance: 67.70 (Threshold: -103.0263)
F1 with Manhattan-Distance: 48.60 (Threshold: -0.8532)
Precision with Manhattan-Distance: 32.13
Recall with Manhattan-Distance: 99.69
Average Precision with Manhattan-Distance: 21.05
Matthews Correlation with Manhattan-Distance: -4.59
Model Sparsity: Active Dimensions: 61.2, Sparsity Ratio: 0.9980
"""
# Print the results
print(f"Primary metric: {binary_acc_evaluator.primary_metric}")
# => Primary metric: quora_duplicates_dev_max_ap
print(f"Primary metric value: {results[binary_acc_evaluator.primary_metric]:.4f}")
# => Primary metric value: 0.6781
|