lsmpp's picture
Add files using upload-large-folder tool
bd33eac verified
import logging
from datasets import load_dataset
from sentence_transformers import SparseEncoder
from sentence_transformers.sparse_encoder.evaluation import SparseRerankingEvaluator
logging.basicConfig(format="%(message)s", level=logging.INFO)
# Load a model
model = SparseEncoder("naver/splade-cocondenser-ensembledistil")
# Load a dataset with queries, positives, and negatives
eval_dataset = load_dataset("microsoft/ms_marco", "v1.1", split="validation").select(range(1000))
samples = [
{
"query": sample["query"],
"positive": [
text
for is_selected, text in zip(sample["passages"]["is_selected"], sample["passages"]["passage_text"])
if is_selected
],
"negative": [
text
for is_selected, text in zip(sample["passages"]["is_selected"], sample["passages"]["passage_text"])
if not is_selected
],
}
for sample in eval_dataset
]
# Now evaluate using only the documents from the 1000 samples
reranking_evaluator = SparseRerankingEvaluator(
samples=samples,
name="ms_marco_dev_small",
show_progress_bar=True,
batch_size=32,
)
results = reranking_evaluator(model)
"""
RerankingEvaluator: Evaluating the model on the ms_marco_dev_small dataset:
Queries: 967 Positives: Min 1.0, Mean 1.1, Max 3.0 Negatives: Min 1.0, Mean 7.1, Max 9.0
MAP: 53.41
MRR@10: 54.14
NDCG@10: 65.06
Model Query Sparsity: Active Dimensions: 42.2, Sparsity Ratio: 0.9986
Model Corpus Sparsity: Active Dimensions: 126.5, Sparsity Ratio: 0.9959
"""
# Print the results
print(f"Primary metric: {reranking_evaluator.primary_metric}")
# => Primary metric: ms_marco_dev_small_ndcg@10
print(f"Primary metric value: {results[reranking_evaluator.primary_metric]:.4f}")
# => Primary metric value: 0.6506