qwenillustrious
/
sentence-transformers
/examples
/sparse_encoder
/evaluation
/sparse_reranking_evaluator.py
| import logging | |
| from datasets import load_dataset | |
| from sentence_transformers import SparseEncoder | |
| from sentence_transformers.sparse_encoder.evaluation import SparseRerankingEvaluator | |
| logging.basicConfig(format="%(message)s", level=logging.INFO) | |
| # Load a model | |
| model = SparseEncoder("naver/splade-cocondenser-ensembledistil") | |
| # Load a dataset with queries, positives, and negatives | |
| eval_dataset = load_dataset("microsoft/ms_marco", "v1.1", split="validation").select(range(1000)) | |
| samples = [ | |
| { | |
| "query": sample["query"], | |
| "positive": [ | |
| text | |
| for is_selected, text in zip(sample["passages"]["is_selected"], sample["passages"]["passage_text"]) | |
| if is_selected | |
| ], | |
| "negative": [ | |
| text | |
| for is_selected, text in zip(sample["passages"]["is_selected"], sample["passages"]["passage_text"]) | |
| if not is_selected | |
| ], | |
| } | |
| for sample in eval_dataset | |
| ] | |
| # Now evaluate using only the documents from the 1000 samples | |
| reranking_evaluator = SparseRerankingEvaluator( | |
| samples=samples, | |
| name="ms_marco_dev_small", | |
| show_progress_bar=True, | |
| batch_size=32, | |
| ) | |
| results = reranking_evaluator(model) | |
| """ | |
| RerankingEvaluator: Evaluating the model on the ms_marco_dev_small dataset: | |
| Queries: 967 Positives: Min 1.0, Mean 1.1, Max 3.0 Negatives: Min 1.0, Mean 7.1, Max 9.0 | |
| MAP: 53.41 | |
| MRR@10: 54.14 | |
| NDCG@10: 65.06 | |
| Model Query Sparsity: Active Dimensions: 42.2, Sparsity Ratio: 0.9986 | |
| Model Corpus Sparsity: Active Dimensions: 126.5, Sparsity Ratio: 0.9959 | |
| """ | |
| # Print the results | |
| print(f"Primary metric: {reranking_evaluator.primary_metric}") | |
| # => Primary metric: ms_marco_dev_small_ndcg@10 | |
| print(f"Primary metric value: {results[reranking_evaluator.primary_metric]:.4f}") | |
| # => Primary metric value: 0.6506 | |