qwenillustrious
/
sentence-transformers
/examples
/sparse_encoder
/evaluation
/sparse_classification_evaluator.py
| import logging | |
| from datasets import load_dataset | |
| from sentence_transformers import SparseEncoder | |
| from sentence_transformers.sparse_encoder.evaluation import SparseBinaryClassificationEvaluator | |
| logging.basicConfig(format="%(message)s", level=logging.INFO) | |
| # Initialize the SPLADE model | |
| model = SparseEncoder("naver/splade-cocondenser-ensembledistil") | |
| # Load a dataset with two text columns and a class label column (https://huggingface.co/datasets/sentence-transformers/quora-duplicates) | |
| eval_dataset = load_dataset("sentence-transformers/quora-duplicates", "pair-class", split="train[-1000:]") | |
| # Initialize the evaluator | |
| binary_acc_evaluator = SparseBinaryClassificationEvaluator( | |
| sentences1=eval_dataset["sentence1"], | |
| sentences2=eval_dataset["sentence2"], | |
| labels=eval_dataset["label"], | |
| name="quora_duplicates_dev", | |
| show_progress_bar=True, | |
| similarity_fn_names=["cosine", "dot", "euclidean", "manhattan"], | |
| ) | |
| results = binary_acc_evaluator(model) | |
| """ | |
| Accuracy with Cosine-Similarity: 75.00 (Threshold: 0.8668) | |
| F1 with Cosine-Similarity: 67.22 (Threshold: 0.5974) | |
| Precision with Cosine-Similarity: 54.18 | |
| Recall with Cosine-Similarity: 88.51 | |
| Average Precision with Cosine-Similarity: 67.81 | |
| Matthews Correlation with Cosine-Similarity: 49.56 | |
| Accuracy with Dot-Product: 76.50 (Threshold: 23.4236) | |
| F1 with Dot-Product: 67.00 (Threshold: 19.0095) | |
| Precision with Dot-Product: 55.93 | |
| Recall with Dot-Product: 83.54 | |
| Average Precision with Dot-Product: 65.89 | |
| Matthews Correlation with Dot-Product: 48.88 | |
| Accuracy with Euclidean-Distance: 67.70 (Threshold: -10.0041) | |
| F1 with Euclidean-Distance: 48.60 (Threshold: -0.1876) | |
| Precision with Euclidean-Distance: 32.13 | |
| Recall with Euclidean-Distance: 99.69 | |
| Average Precision with Euclidean-Distance: 20.52 | |
| Matthews Correlation with Euclidean-Distance: -4.59 | |
| Accuracy with Manhattan-Distance: 67.70 (Threshold: -103.0263) | |
| F1 with Manhattan-Distance: 48.60 (Threshold: -0.8532) | |
| Precision with Manhattan-Distance: 32.13 | |
| Recall with Manhattan-Distance: 99.69 | |
| Average Precision with Manhattan-Distance: 21.05 | |
| Matthews Correlation with Manhattan-Distance: -4.59 | |
| Model Sparsity: Active Dimensions: 61.2, Sparsity Ratio: 0.9980 | |
| """ | |
| # Print the results | |
| print(f"Primary metric: {binary_acc_evaluator.primary_metric}") | |
| # => Primary metric: quora_duplicates_dev_max_ap | |
| print(f"Primary metric value: {results[binary_acc_evaluator.primary_metric]:.4f}") | |
| # => Primary metric value: 0.6781 | |