File size: 1,816 Bytes
bd33eac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import logging
from datasets import load_dataset
from sentence_transformers import SparseEncoder
from sentence_transformers.sparse_encoder.evaluation import SparseRerankingEvaluator
logging.basicConfig(format="%(message)s", level=logging.INFO)
# Load a model
model = SparseEncoder("naver/splade-cocondenser-ensembledistil")
# Load a dataset with queries, positives, and negatives
eval_dataset = load_dataset("microsoft/ms_marco", "v1.1", split="validation").select(range(1000))
samples = [
{
"query": sample["query"],
"positive": [
text
for is_selected, text in zip(sample["passages"]["is_selected"], sample["passages"]["passage_text"])
if is_selected
],
"negative": [
text
for is_selected, text in zip(sample["passages"]["is_selected"], sample["passages"]["passage_text"])
if not is_selected
],
}
for sample in eval_dataset
]
# Now evaluate using only the documents from the 1000 samples
reranking_evaluator = SparseRerankingEvaluator(
samples=samples,
name="ms_marco_dev_small",
show_progress_bar=True,
batch_size=32,
)
results = reranking_evaluator(model)
"""
RerankingEvaluator: Evaluating the model on the ms_marco_dev_small dataset:
Queries: 967 Positives: Min 1.0, Mean 1.1, Max 3.0 Negatives: Min 1.0, Mean 7.1, Max 9.0
MAP: 53.41
MRR@10: 54.14
NDCG@10: 65.06
Model Query Sparsity: Active Dimensions: 42.2, Sparsity Ratio: 0.9986
Model Corpus Sparsity: Active Dimensions: 126.5, Sparsity Ratio: 0.9959
"""
# Print the results
print(f"Primary metric: {reranking_evaluator.primary_metric}")
# => Primary metric: ms_marco_dev_small_ndcg@10
print(f"Primary metric value: {results[reranking_evaluator.primary_metric]:.4f}")
# => Primary metric value: 0.6506
|