| | --- |
| | pipeline_tag: sentence-similarity |
| | tags: |
| | - sentence-transformers |
| | - feature-extraction |
| | - sentence-similarity |
| | - transformers |
| | - mteb |
| | model-index: |
| | - name: mmlw-roberta-large |
| | results: |
| | - task: |
| | type: Clustering |
| | dataset: |
| | type: PL-MTEB/8tags-clustering |
| | name: MTEB 8TagsClustering |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: v_measure |
| | value: 31.16472823814849 |
| | - task: |
| | type: Classification |
| | dataset: |
| | type: PL-MTEB/allegro-reviews |
| | name: MTEB AllegroReviews |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: accuracy |
| | value: 47.48508946322067 |
| | - type: f1 |
| | value: 42.33327527584009 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: arguana-pl |
| | name: MTEB ArguAna-PL |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 38.834 |
| | - type: map_at_10 |
| | value: 55.22899999999999 |
| | - type: map_at_100 |
| | value: 55.791999999999994 |
| | - type: map_at_1000 |
| | value: 55.794 |
| | - type: map_at_3 |
| | value: 51.233 |
| | - type: map_at_5 |
| | value: 53.772 |
| | - type: mrr_at_1 |
| | value: 39.687 |
| | - type: mrr_at_10 |
| | value: 55.596000000000004 |
| | - type: mrr_at_100 |
| | value: 56.157000000000004 |
| | - type: mrr_at_1000 |
| | value: 56.157999999999994 |
| | - type: mrr_at_3 |
| | value: 51.66 |
| | - type: mrr_at_5 |
| | value: 54.135 |
| | - type: ndcg_at_1 |
| | value: 38.834 |
| | - type: ndcg_at_10 |
| | value: 63.402 |
| | - type: ndcg_at_100 |
| | value: 65.78 |
| | - type: ndcg_at_1000 |
| | value: 65.816 |
| | - type: ndcg_at_3 |
| | value: 55.349000000000004 |
| | - type: ndcg_at_5 |
| | value: 59.892 |
| | - type: precision_at_1 |
| | value: 38.834 |
| | - type: precision_at_10 |
| | value: 8.905000000000001 |
| | - type: precision_at_100 |
| | value: 0.9939999999999999 |
| | - type: precision_at_1000 |
| | value: 0.1 |
| | - type: precision_at_3 |
| | value: 22.428 |
| | - type: precision_at_5 |
| | value: 15.647 |
| | - type: recall_at_1 |
| | value: 38.834 |
| | - type: recall_at_10 |
| | value: 89.047 |
| | - type: recall_at_100 |
| | value: 99.36 |
| | - type: recall_at_1000 |
| | value: 99.644 |
| | - type: recall_at_3 |
| | value: 67.283 |
| | - type: recall_at_5 |
| | value: 78.236 |
| | - task: |
| | type: Classification |
| | dataset: |
| | type: PL-MTEB/cbd |
| | name: MTEB CBD |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: accuracy |
| | value: 69.33 |
| | - type: ap |
| | value: 22.972409521444508 |
| | - type: f1 |
| | value: 58.91072163784952 |
| | - task: |
| | type: PairClassification |
| | dataset: |
| | type: PL-MTEB/cdsce-pairclassification |
| | name: MTEB CDSC-E |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: cos_sim_accuracy |
| | value: 89.8 |
| | - type: cos_sim_ap |
| | value: 79.87039801032493 |
| | - type: cos_sim_f1 |
| | value: 68.53932584269663 |
| | - type: cos_sim_precision |
| | value: 73.49397590361446 |
| | - type: cos_sim_recall |
| | value: 64.21052631578948 |
| | - type: dot_accuracy |
| | value: 86.1 |
| | - type: dot_ap |
| | value: 63.684975861694035 |
| | - type: dot_f1 |
| | value: 63.61746361746362 |
| | - type: dot_precision |
| | value: 52.57731958762887 |
| | - type: dot_recall |
| | value: 80.52631578947368 |
| | - type: euclidean_accuracy |
| | value: 89.8 |
| | - type: euclidean_ap |
| | value: 79.7527126811392 |
| | - type: euclidean_f1 |
| | value: 68.46361185983827 |
| | - type: euclidean_precision |
| | value: 70.1657458563536 |
| | - type: euclidean_recall |
| | value: 66.84210526315789 |
| | - type: manhattan_accuracy |
| | value: 89.7 |
| | - type: manhattan_ap |
| | value: 79.64632771093657 |
| | - type: manhattan_f1 |
| | value: 68.4931506849315 |
| | - type: manhattan_precision |
| | value: 71.42857142857143 |
| | - type: manhattan_recall |
| | value: 65.78947368421053 |
| | - type: max_accuracy |
| | value: 89.8 |
| | - type: max_ap |
| | value: 79.87039801032493 |
| | - type: max_f1 |
| | value: 68.53932584269663 |
| | - task: |
| | type: STS |
| | dataset: |
| | type: PL-MTEB/cdscr-sts |
| | name: MTEB CDSC-R |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: cos_sim_pearson |
| | value: 92.1088892402831 |
| | - type: cos_sim_spearman |
| | value: 92.54126377343101 |
| | - type: euclidean_pearson |
| | value: 91.99022371986013 |
| | - type: euclidean_spearman |
| | value: 92.55235973775511 |
| | - type: manhattan_pearson |
| | value: 91.92170171331357 |
| | - type: manhattan_spearman |
| | value: 92.47797623672449 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: dbpedia-pl |
| | name: MTEB DBPedia-PL |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 8.683 |
| | - type: map_at_10 |
| | value: 18.9 |
| | - type: map_at_100 |
| | value: 26.933 |
| | - type: map_at_1000 |
| | value: 28.558 |
| | - type: map_at_3 |
| | value: 13.638 |
| | - type: map_at_5 |
| | value: 15.9 |
| | - type: mrr_at_1 |
| | value: 63.74999999999999 |
| | - type: mrr_at_10 |
| | value: 73.566 |
| | - type: mrr_at_100 |
| | value: 73.817 |
| | - type: mrr_at_1000 |
| | value: 73.824 |
| | - type: mrr_at_3 |
| | value: 71.875 |
| | - type: mrr_at_5 |
| | value: 73.2 |
| | - type: ndcg_at_1 |
| | value: 53.125 |
| | - type: ndcg_at_10 |
| | value: 40.271 |
| | - type: ndcg_at_100 |
| | value: 45.51 |
| | - type: ndcg_at_1000 |
| | value: 52.968 |
| | - type: ndcg_at_3 |
| | value: 45.122 |
| | - type: ndcg_at_5 |
| | value: 42.306 |
| | - type: precision_at_1 |
| | value: 63.74999999999999 |
| | - type: precision_at_10 |
| | value: 31.55 |
| | - type: precision_at_100 |
| | value: 10.440000000000001 |
| | - type: precision_at_1000 |
| | value: 2.01 |
| | - type: precision_at_3 |
| | value: 48.333 |
| | - type: precision_at_5 |
| | value: 40.5 |
| | - type: recall_at_1 |
| | value: 8.683 |
| | - type: recall_at_10 |
| | value: 24.63 |
| | - type: recall_at_100 |
| | value: 51.762 |
| | - type: recall_at_1000 |
| | value: 75.64999999999999 |
| | - type: recall_at_3 |
| | value: 15.136 |
| | - type: recall_at_5 |
| | value: 18.678 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: fiqa-pl |
| | name: MTEB FiQA-PL |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 19.872999999999998 |
| | - type: map_at_10 |
| | value: 32.923 |
| | - type: map_at_100 |
| | value: 34.819 |
| | - type: map_at_1000 |
| | value: 34.99 |
| | - type: map_at_3 |
| | value: 28.500999999999998 |
| | - type: map_at_5 |
| | value: 31.087999999999997 |
| | - type: mrr_at_1 |
| | value: 40.432 |
| | - type: mrr_at_10 |
| | value: 49.242999999999995 |
| | - type: mrr_at_100 |
| | value: 50.014 |
| | - type: mrr_at_1000 |
| | value: 50.05500000000001 |
| | - type: mrr_at_3 |
| | value: 47.144999999999996 |
| | - type: mrr_at_5 |
| | value: 48.171 |
| | - type: ndcg_at_1 |
| | value: 40.586 |
| | - type: ndcg_at_10 |
| | value: 40.887 |
| | - type: ndcg_at_100 |
| | value: 47.701 |
| | - type: ndcg_at_1000 |
| | value: 50.624 |
| | - type: ndcg_at_3 |
| | value: 37.143 |
| | - type: ndcg_at_5 |
| | value: 38.329 |
| | - type: precision_at_1 |
| | value: 40.586 |
| | - type: precision_at_10 |
| | value: 11.497 |
| | - type: precision_at_100 |
| | value: 1.838 |
| | - type: precision_at_1000 |
| | value: 0.23700000000000002 |
| | - type: precision_at_3 |
| | value: 25.0 |
| | - type: precision_at_5 |
| | value: 18.549 |
| | - type: recall_at_1 |
| | value: 19.872999999999998 |
| | - type: recall_at_10 |
| | value: 48.073 |
| | - type: recall_at_100 |
| | value: 73.473 |
| | - type: recall_at_1000 |
| | value: 90.94 |
| | - type: recall_at_3 |
| | value: 33.645 |
| | - type: recall_at_5 |
| | value: 39.711 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: hotpotqa-pl |
| | name: MTEB HotpotQA-PL |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 39.399 |
| | - type: map_at_10 |
| | value: 62.604000000000006 |
| | - type: map_at_100 |
| | value: 63.475 |
| | - type: map_at_1000 |
| | value: 63.534 |
| | - type: map_at_3 |
| | value: 58.870999999999995 |
| | - type: map_at_5 |
| | value: 61.217 |
| | - type: mrr_at_1 |
| | value: 78.758 |
| | - type: mrr_at_10 |
| | value: 84.584 |
| | - type: mrr_at_100 |
| | value: 84.753 |
| | - type: mrr_at_1000 |
| | value: 84.759 |
| | - type: mrr_at_3 |
| | value: 83.65700000000001 |
| | - type: mrr_at_5 |
| | value: 84.283 |
| | - type: ndcg_at_1 |
| | value: 78.798 |
| | - type: ndcg_at_10 |
| | value: 71.04 |
| | - type: ndcg_at_100 |
| | value: 74.048 |
| | - type: ndcg_at_1000 |
| | value: 75.163 |
| | - type: ndcg_at_3 |
| | value: 65.862 |
| | - type: ndcg_at_5 |
| | value: 68.77600000000001 |
| | - type: precision_at_1 |
| | value: 78.798 |
| | - type: precision_at_10 |
| | value: 14.949000000000002 |
| | - type: precision_at_100 |
| | value: 1.7309999999999999 |
| | - type: precision_at_1000 |
| | value: 0.188 |
| | - type: precision_at_3 |
| | value: 42.237 |
| | - type: precision_at_5 |
| | value: 27.634999999999998 |
| | - type: recall_at_1 |
| | value: 39.399 |
| | - type: recall_at_10 |
| | value: 74.747 |
| | - type: recall_at_100 |
| | value: 86.529 |
| | - type: recall_at_1000 |
| | value: 93.849 |
| | - type: recall_at_3 |
| | value: 63.356 |
| | - type: recall_at_5 |
| | value: 69.08800000000001 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: msmarco-pl |
| | name: MTEB MSMARCO-PL |
| | config: default |
| | split: validation |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 19.598 |
| | - type: map_at_10 |
| | value: 30.453999999999997 |
| | - type: map_at_100 |
| | value: 31.601000000000003 |
| | - type: map_at_1000 |
| | value: 31.66 |
| | - type: map_at_3 |
| | value: 27.118 |
| | - type: map_at_5 |
| | value: 28.943 |
| | - type: mrr_at_1 |
| | value: 20.1 |
| | - type: mrr_at_10 |
| | value: 30.978 |
| | - type: mrr_at_100 |
| | value: 32.057 |
| | - type: mrr_at_1000 |
| | value: 32.112 |
| | - type: mrr_at_3 |
| | value: 27.679 |
| | - type: mrr_at_5 |
| | value: 29.493000000000002 |
| | - type: ndcg_at_1 |
| | value: 20.158 |
| | - type: ndcg_at_10 |
| | value: 36.63 |
| | - type: ndcg_at_100 |
| | value: 42.291000000000004 |
| | - type: ndcg_at_1000 |
| | value: 43.828 |
| | - type: ndcg_at_3 |
| | value: 29.744999999999997 |
| | - type: ndcg_at_5 |
| | value: 33.024 |
| | - type: precision_at_1 |
| | value: 20.158 |
| | - type: precision_at_10 |
| | value: 5.811999999999999 |
| | - type: precision_at_100 |
| | value: 0.868 |
| | - type: precision_at_1000 |
| | value: 0.1 |
| | - type: precision_at_3 |
| | value: 12.689 |
| | - type: precision_at_5 |
| | value: 9.295 |
| | - type: recall_at_1 |
| | value: 19.598 |
| | - type: recall_at_10 |
| | value: 55.596999999999994 |
| | - type: recall_at_100 |
| | value: 82.143 |
| | - type: recall_at_1000 |
| | value: 94.015 |
| | - type: recall_at_3 |
| | value: 36.720000000000006 |
| | - type: recall_at_5 |
| | value: 44.606 |
| | - task: |
| | type: Classification |
| | dataset: |
| | type: mteb/amazon_massive_intent |
| | name: MTEB MassiveIntentClassification (pl) |
| | config: pl |
| | split: test |
| | revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7 |
| | metrics: |
| | - type: accuracy |
| | value: 74.8117014122394 |
| | - type: f1 |
| | value: 72.0259730121889 |
| | - task: |
| | type: Classification |
| | dataset: |
| | type: mteb/amazon_massive_scenario |
| | name: MTEB MassiveScenarioClassification (pl) |
| | config: pl |
| | split: test |
| | revision: 7d571f92784cd94a019292a1f45445077d0ef634 |
| | metrics: |
| | - type: accuracy |
| | value: 77.84465366509752 |
| | - type: f1 |
| | value: 77.73439218970051 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: nfcorpus-pl |
| | name: MTEB NFCorpus-PL |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 5.604 |
| | - type: map_at_10 |
| | value: 12.684000000000001 |
| | - type: map_at_100 |
| | value: 16.274 |
| | - type: map_at_1000 |
| | value: 17.669 |
| | - type: map_at_3 |
| | value: 9.347 |
| | - type: map_at_5 |
| | value: 10.752 |
| | - type: mrr_at_1 |
| | value: 43.963 |
| | - type: mrr_at_10 |
| | value: 52.94 |
| | - type: mrr_at_100 |
| | value: 53.571000000000005 |
| | - type: mrr_at_1000 |
| | value: 53.613 |
| | - type: mrr_at_3 |
| | value: 51.032 |
| | - type: mrr_at_5 |
| | value: 52.193 |
| | - type: ndcg_at_1 |
| | value: 41.486000000000004 |
| | - type: ndcg_at_10 |
| | value: 33.937 |
| | - type: ndcg_at_100 |
| | value: 31.726 |
| | - type: ndcg_at_1000 |
| | value: 40.331 |
| | - type: ndcg_at_3 |
| | value: 39.217 |
| | - type: ndcg_at_5 |
| | value: 36.521 |
| | - type: precision_at_1 |
| | value: 43.034 |
| | - type: precision_at_10 |
| | value: 25.324999999999996 |
| | - type: precision_at_100 |
| | value: 8.022 |
| | - type: precision_at_1000 |
| | value: 2.0629999999999997 |
| | - type: precision_at_3 |
| | value: 36.945 |
| | - type: precision_at_5 |
| | value: 31.517 |
| | - type: recall_at_1 |
| | value: 5.604 |
| | - type: recall_at_10 |
| | value: 16.554 |
| | - type: recall_at_100 |
| | value: 33.113 |
| | - type: recall_at_1000 |
| | value: 62.832 |
| | - type: recall_at_3 |
| | value: 10.397 |
| | - type: recall_at_5 |
| | value: 12.629999999999999 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: nq-pl |
| | name: MTEB NQ-PL |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 26.642 |
| | - type: map_at_10 |
| | value: 40.367999999999995 |
| | - type: map_at_100 |
| | value: 41.487 |
| | - type: map_at_1000 |
| | value: 41.528 |
| | - type: map_at_3 |
| | value: 36.292 |
| | - type: map_at_5 |
| | value: 38.548 |
| | - type: mrr_at_1 |
| | value: 30.156 |
| | - type: mrr_at_10 |
| | value: 42.853 |
| | - type: mrr_at_100 |
| | value: 43.742 |
| | - type: mrr_at_1000 |
| | value: 43.772 |
| | - type: mrr_at_3 |
| | value: 39.47 |
| | - type: mrr_at_5 |
| | value: 41.366 |
| | - type: ndcg_at_1 |
| | value: 30.214000000000002 |
| | - type: ndcg_at_10 |
| | value: 47.620000000000005 |
| | - type: ndcg_at_100 |
| | value: 52.486 |
| | - type: ndcg_at_1000 |
| | value: 53.482 |
| | - type: ndcg_at_3 |
| | value: 39.864 |
| | - type: ndcg_at_5 |
| | value: 43.645 |
| | - type: precision_at_1 |
| | value: 30.214000000000002 |
| | - type: precision_at_10 |
| | value: 8.03 |
| | - type: precision_at_100 |
| | value: 1.0739999999999998 |
| | - type: precision_at_1000 |
| | value: 0.117 |
| | - type: precision_at_3 |
| | value: 18.183 |
| | - type: precision_at_5 |
| | value: 13.105 |
| | - type: recall_at_1 |
| | value: 26.642 |
| | - type: recall_at_10 |
| | value: 67.282 |
| | - type: recall_at_100 |
| | value: 88.632 |
| | - type: recall_at_1000 |
| | value: 96.109 |
| | - type: recall_at_3 |
| | value: 47.048 |
| | - type: recall_at_5 |
| | value: 55.791000000000004 |
| | - task: |
| | type: Classification |
| | dataset: |
| | type: laugustyniak/abusive-clauses-pl |
| | name: MTEB PAC |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: accuracy |
| | value: 64.69446857804807 |
| | - type: ap |
| | value: 75.58028779280512 |
| | - type: f1 |
| | value: 62.3610392963539 |
| | - task: |
| | type: PairClassification |
| | dataset: |
| | type: PL-MTEB/ppc-pairclassification |
| | name: MTEB PPC |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: cos_sim_accuracy |
| | value: 88.4 |
| | - type: cos_sim_ap |
| | value: 93.56462741831817 |
| | - type: cos_sim_f1 |
| | value: 90.73634204275535 |
| | - type: cos_sim_precision |
| | value: 86.94992412746586 |
| | - type: cos_sim_recall |
| | value: 94.86754966887418 |
| | - type: dot_accuracy |
| | value: 75.3 |
| | - type: dot_ap |
| | value: 83.06945936688015 |
| | - type: dot_f1 |
| | value: 81.50887573964496 |
| | - type: dot_precision |
| | value: 73.66310160427807 |
| | - type: dot_recall |
| | value: 91.22516556291392 |
| | - type: euclidean_accuracy |
| | value: 88.8 |
| | - type: euclidean_ap |
| | value: 93.53974198044985 |
| | - type: euclidean_f1 |
| | value: 90.87947882736157 |
| | - type: euclidean_precision |
| | value: 89.42307692307693 |
| | - type: euclidean_recall |
| | value: 92.3841059602649 |
| | - type: manhattan_accuracy |
| | value: 88.8 |
| | - type: manhattan_ap |
| | value: 93.54209967780366 |
| | - type: manhattan_f1 |
| | value: 90.85072231139645 |
| | - type: manhattan_precision |
| | value: 88.1619937694704 |
| | - type: manhattan_recall |
| | value: 93.70860927152319 |
| | - type: max_accuracy |
| | value: 88.8 |
| | - type: max_ap |
| | value: 93.56462741831817 |
| | - type: max_f1 |
| | value: 90.87947882736157 |
| | - task: |
| | type: PairClassification |
| | dataset: |
| | type: PL-MTEB/psc-pairclassification |
| | name: MTEB PSC |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: cos_sim_accuracy |
| | value: 97.03153988868274 |
| | - type: cos_sim_ap |
| | value: 98.63208302459417 |
| | - type: cos_sim_f1 |
| | value: 95.06172839506173 |
| | - type: cos_sim_precision |
| | value: 96.25 |
| | - type: cos_sim_recall |
| | value: 93.90243902439023 |
| | - type: dot_accuracy |
| | value: 86.82745825602969 |
| | - type: dot_ap |
| | value: 83.77450133931302 |
| | - type: dot_f1 |
| | value: 79.3053545586107 |
| | - type: dot_precision |
| | value: 75.48209366391184 |
| | - type: dot_recall |
| | value: 83.53658536585365 |
| | - type: euclidean_accuracy |
| | value: 97.03153988868274 |
| | - type: euclidean_ap |
| | value: 98.80678168225653 |
| | - type: euclidean_f1 |
| | value: 95.20958083832335 |
| | - type: euclidean_precision |
| | value: 93.52941176470588 |
| | - type: euclidean_recall |
| | value: 96.95121951219512 |
| | - type: manhattan_accuracy |
| | value: 97.21706864564007 |
| | - type: manhattan_ap |
| | value: 98.82279484224186 |
| | - type: manhattan_f1 |
| | value: 95.44072948328268 |
| | - type: manhattan_precision |
| | value: 95.15151515151516 |
| | - type: manhattan_recall |
| | value: 95.73170731707317 |
| | - type: max_accuracy |
| | value: 97.21706864564007 |
| | - type: max_ap |
| | value: 98.82279484224186 |
| | - type: max_f1 |
| | value: 95.44072948328268 |
| | - task: |
| | type: Classification |
| | dataset: |
| | type: PL-MTEB/polemo2_in |
| | name: MTEB PolEmo2.0-IN |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: accuracy |
| | value: 76.84210526315789 |
| | - type: f1 |
| | value: 75.49713789106988 |
| | - task: |
| | type: Classification |
| | dataset: |
| | type: PL-MTEB/polemo2_out |
| | name: MTEB PolEmo2.0-OUT |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: accuracy |
| | value: 53.7246963562753 |
| | - type: f1 |
| | value: 43.060592194322986 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: quora-pl |
| | name: MTEB Quora-PL |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 67.021 |
| | - type: map_at_10 |
| | value: 81.362 |
| | - type: map_at_100 |
| | value: 82.06700000000001 |
| | - type: map_at_1000 |
| | value: 82.084 |
| | - type: map_at_3 |
| | value: 78.223 |
| | - type: map_at_5 |
| | value: 80.219 |
| | - type: mrr_at_1 |
| | value: 77.17 |
| | - type: mrr_at_10 |
| | value: 84.222 |
| | - type: mrr_at_100 |
| | value: 84.37599999999999 |
| | - type: mrr_at_1000 |
| | value: 84.379 |
| | - type: mrr_at_3 |
| | value: 83.003 |
| | - type: mrr_at_5 |
| | value: 83.834 |
| | - type: ndcg_at_1 |
| | value: 77.29 |
| | - type: ndcg_at_10 |
| | value: 85.506 |
| | - type: ndcg_at_100 |
| | value: 87.0 |
| | - type: ndcg_at_1000 |
| | value: 87.143 |
| | - type: ndcg_at_3 |
| | value: 82.17 |
| | - type: ndcg_at_5 |
| | value: 84.057 |
| | - type: precision_at_1 |
| | value: 77.29 |
| | - type: precision_at_10 |
| | value: 13.15 |
| | - type: precision_at_100 |
| | value: 1.522 |
| | - type: precision_at_1000 |
| | value: 0.156 |
| | - type: precision_at_3 |
| | value: 36.173 |
| | - type: precision_at_5 |
| | value: 23.988 |
| | - type: recall_at_1 |
| | value: 67.021 |
| | - type: recall_at_10 |
| | value: 93.943 |
| | - type: recall_at_100 |
| | value: 99.167 |
| | - type: recall_at_1000 |
| | value: 99.929 |
| | - type: recall_at_3 |
| | value: 84.55799999999999 |
| | - type: recall_at_5 |
| | value: 89.697 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: scidocs-pl |
| | name: MTEB SCIDOCS-PL |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 4.523 |
| | - type: map_at_10 |
| | value: 11.584 |
| | - type: map_at_100 |
| | value: 13.705 |
| | - type: map_at_1000 |
| | value: 14.038999999999998 |
| | - type: map_at_3 |
| | value: 8.187999999999999 |
| | - type: map_at_5 |
| | value: 9.922 |
| | - type: mrr_at_1 |
| | value: 22.1 |
| | - type: mrr_at_10 |
| | value: 32.946999999999996 |
| | - type: mrr_at_100 |
| | value: 34.11 |
| | - type: mrr_at_1000 |
| | value: 34.163 |
| | - type: mrr_at_3 |
| | value: 29.633 |
| | - type: mrr_at_5 |
| | value: 31.657999999999998 |
| | - type: ndcg_at_1 |
| | value: 22.2 |
| | - type: ndcg_at_10 |
| | value: 19.466 |
| | - type: ndcg_at_100 |
| | value: 27.725 |
| | - type: ndcg_at_1000 |
| | value: 33.539 |
| | - type: ndcg_at_3 |
| | value: 18.26 |
| | - type: ndcg_at_5 |
| | value: 16.265 |
| | - type: precision_at_1 |
| | value: 22.2 |
| | - type: precision_at_10 |
| | value: 10.11 |
| | - type: precision_at_100 |
| | value: 2.204 |
| | - type: precision_at_1000 |
| | value: 0.36 |
| | - type: precision_at_3 |
| | value: 17.1 |
| | - type: precision_at_5 |
| | value: 14.44 |
| | - type: recall_at_1 |
| | value: 4.523 |
| | - type: recall_at_10 |
| | value: 20.497 |
| | - type: recall_at_100 |
| | value: 44.757000000000005 |
| | - type: recall_at_1000 |
| | value: 73.14699999999999 |
| | - type: recall_at_3 |
| | value: 10.413 |
| | - type: recall_at_5 |
| | value: 14.638000000000002 |
| | - task: |
| | type: PairClassification |
| | dataset: |
| | type: PL-MTEB/sicke-pl-pairclassification |
| | name: MTEB SICK-E-PL |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: cos_sim_accuracy |
| | value: 87.4235629841011 |
| | - type: cos_sim_ap |
| | value: 84.46531935663157 |
| | - type: cos_sim_f1 |
| | value: 77.18910963944077 |
| | - type: cos_sim_precision |
| | value: 79.83257229832572 |
| | - type: cos_sim_recall |
| | value: 74.71509971509973 |
| | - type: dot_accuracy |
| | value: 81.10476966979209 |
| | - type: dot_ap |
| | value: 71.12231750543143 |
| | - type: dot_f1 |
| | value: 68.13455657492355 |
| | - type: dot_precision |
| | value: 59.69989281886387 |
| | - type: dot_recall |
| | value: 79.34472934472934 |
| | - type: euclidean_accuracy |
| | value: 87.21973094170403 |
| | - type: euclidean_ap |
| | value: 84.33077991405355 |
| | - type: euclidean_f1 |
| | value: 76.81931132410365 |
| | - type: euclidean_precision |
| | value: 76.57466383581033 |
| | - type: euclidean_recall |
| | value: 77.06552706552706 |
| | - type: manhattan_accuracy |
| | value: 87.21973094170403 |
| | - type: manhattan_ap |
| | value: 84.35651252115137 |
| | - type: manhattan_f1 |
| | value: 76.87004481213376 |
| | - type: manhattan_precision |
| | value: 74.48229792919172 |
| | - type: manhattan_recall |
| | value: 79.41595441595442 |
| | - type: max_accuracy |
| | value: 87.4235629841011 |
| | - type: max_ap |
| | value: 84.46531935663157 |
| | - type: max_f1 |
| | value: 77.18910963944077 |
| | - task: |
| | type: STS |
| | dataset: |
| | type: PL-MTEB/sickr-pl-sts |
| | name: MTEB SICK-R-PL |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: cos_sim_pearson |
| | value: 83.05629619004273 |
| | - type: cos_sim_spearman |
| | value: 79.90632583043678 |
| | - type: euclidean_pearson |
| | value: 81.56426663515931 |
| | - type: euclidean_spearman |
| | value: 80.05439220131294 |
| | - type: manhattan_pearson |
| | value: 81.52958181013108 |
| | - type: manhattan_spearman |
| | value: 80.0387467163383 |
| | - task: |
| | type: STS |
| | dataset: |
| | type: mteb/sts22-crosslingual-sts |
| | name: MTEB STS22 (pl) |
| | config: pl |
| | split: test |
| | revision: 6d1ba47164174a496b7fa5d3569dae26a6813b80 |
| | metrics: |
| | - type: cos_sim_pearson |
| | value: 35.93847200513348 |
| | - type: cos_sim_spearman |
| | value: 39.31543525546526 |
| | - type: euclidean_pearson |
| | value: 30.19743936591465 |
| | - type: euclidean_spearman |
| | value: 39.966612599252095 |
| | - type: manhattan_pearson |
| | value: 30.195614462473387 |
| | - type: manhattan_spearman |
| | value: 39.822552043685754 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: scifact-pl |
| | name: MTEB SciFact-PL |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 56.05 |
| | - type: map_at_10 |
| | value: 65.93299999999999 |
| | - type: map_at_100 |
| | value: 66.571 |
| | - type: map_at_1000 |
| | value: 66.60000000000001 |
| | - type: map_at_3 |
| | value: 63.489 |
| | - type: map_at_5 |
| | value: 64.91799999999999 |
| | - type: mrr_at_1 |
| | value: 59.0 |
| | - type: mrr_at_10 |
| | value: 67.026 |
| | - type: mrr_at_100 |
| | value: 67.559 |
| | - type: mrr_at_1000 |
| | value: 67.586 |
| | - type: mrr_at_3 |
| | value: 65.444 |
| | - type: mrr_at_5 |
| | value: 66.278 |
| | - type: ndcg_at_1 |
| | value: 59.0 |
| | - type: ndcg_at_10 |
| | value: 70.233 |
| | - type: ndcg_at_100 |
| | value: 72.789 |
| | - type: ndcg_at_1000 |
| | value: 73.637 |
| | - type: ndcg_at_3 |
| | value: 66.40700000000001 |
| | - type: ndcg_at_5 |
| | value: 68.206 |
| | - type: precision_at_1 |
| | value: 59.0 |
| | - type: precision_at_10 |
| | value: 9.367 |
| | - type: precision_at_100 |
| | value: 1.06 |
| | - type: precision_at_1000 |
| | value: 0.11299999999999999 |
| | - type: precision_at_3 |
| | value: 26.222 |
| | - type: precision_at_5 |
| | value: 17.067 |
| | - type: recall_at_1 |
| | value: 56.05 |
| | - type: recall_at_10 |
| | value: 82.089 |
| | - type: recall_at_100 |
| | value: 93.167 |
| | - type: recall_at_1000 |
| | value: 100.0 |
| | - type: recall_at_3 |
| | value: 71.822 |
| | - type: recall_at_5 |
| | value: 76.483 |
| | - task: |
| | type: Retrieval |
| | dataset: |
| | type: trec-covid-pl |
| | name: MTEB TRECCOVID-PL |
| | config: default |
| | split: test |
| | revision: None |
| | metrics: |
| | - type: map_at_1 |
| | value: 0.21 |
| | - type: map_at_10 |
| | value: 1.7680000000000002 |
| | - type: map_at_100 |
| | value: 9.447999999999999 |
| | - type: map_at_1000 |
| | value: 21.728 |
| | - type: map_at_3 |
| | value: 0.603 |
| | - type: map_at_5 |
| | value: 0.9610000000000001 |
| | - type: mrr_at_1 |
| | value: 80.0 |
| | - type: mrr_at_10 |
| | value: 88.667 |
| | - type: mrr_at_100 |
| | value: 88.667 |
| | - type: mrr_at_1000 |
| | value: 88.667 |
| | - type: mrr_at_3 |
| | value: 87.667 |
| | - type: mrr_at_5 |
| | value: 88.667 |
| | - type: ndcg_at_1 |
| | value: 77.0 |
| | - type: ndcg_at_10 |
| | value: 70.814 |
| | - type: ndcg_at_100 |
| | value: 52.532000000000004 |
| | - type: ndcg_at_1000 |
| | value: 45.635999999999996 |
| | - type: ndcg_at_3 |
| | value: 76.542 |
| | - type: ndcg_at_5 |
| | value: 73.24000000000001 |
| | - type: precision_at_1 |
| | value: 80.0 |
| | - type: precision_at_10 |
| | value: 75.0 |
| | - type: precision_at_100 |
| | value: 53.879999999999995 |
| | - type: precision_at_1000 |
| | value: 20.002 |
| | - type: precision_at_3 |
| | value: 80.0 |
| | - type: precision_at_5 |
| | value: 76.4 |
| | - type: recall_at_1 |
| | value: 0.21 |
| | - type: recall_at_10 |
| | value: 2.012 |
| | - type: recall_at_100 |
| | value: 12.781999999999998 |
| | - type: recall_at_1000 |
| | value: 42.05 |
| | - type: recall_at_3 |
| | value: 0.644 |
| | - type: recall_at_5 |
| | value: 1.04 |
| | language: pl |
| | license: apache-2.0 |
| | widget: |
| | - source_sentence: "zapytanie: Jak dożyć 100 lat?" |
| | sentences: |
| | - "Trzeba zdrowo się odżywiać i uprawiać sport." |
| | - "Trzeba pić alkohol, imprezować i jeździć szybkimi autami." |
| | - "Gdy trwała kampania politycy zapewniali, że rozprawią się z zakazem niedzielnego handlu." |
| |
|
| | --- |
| | |
| | <h1 align="center">MMLW-roberta-large</h1> |
| |
|
| | MMLW (muszę mieć lepszą wiadomość) are neural text encoders for Polish. |
| | This is a distilled model that can be used to generate embeddings applicable to many tasks such as semantic similarity, clustering, information retrieval. The model can also serve as a base for further fine-tuning. |
| | It transforms texts to 1024 dimensional vectors. |
| | The model was initialized with Polish RoBERTa checkpoint, and then trained with [multilingual knowledge distillation method](https://aclanthology.org/2020.emnlp-main.365/) on a diverse corpus of 60 million Polish-English text pairs. We utilised [English FlagEmbeddings (BGE)](https://huggingface.co/BAAI/bge-base-en) as teacher models for distillation. |
| |
|
| | ## Usage (Sentence-Transformers) |
| |
|
| | ⚠️ Our embedding models require the use of specific prefixes and suffixes when encoding texts. For this model, each query should be preceded by the prefix **"zapytanie: "** ⚠️ |
| |
|
| | You can use the model like this with [sentence-transformers](https://www.SBERT.net): |
| |
|
| | ```python |
| | from sentence_transformers import SentenceTransformer |
| | from sentence_transformers.util import cos_sim |
| | |
| | query_prefix = "zapytanie: " |
| | answer_prefix = "" |
| | queries = [query_prefix + "Jak dożyć 100 lat?"] |
| | answers = [ |
| | answer_prefix + "Trzeba zdrowo się odżywiać i uprawiać sport.", |
| | answer_prefix + "Trzeba pić alkohol, imprezować i jeździć szybkimi autami.", |
| | answer_prefix + "Gdy trwała kampania politycy zapewniali, że rozprawią się z zakazem niedzielnego handlu." |
| | ] |
| | model = SentenceTransformer("sdadas/mmlw-roberta-large") |
| | queries_emb = model.encode(queries, convert_to_tensor=True, show_progress_bar=False) |
| | answers_emb = model.encode(answers, convert_to_tensor=True, show_progress_bar=False) |
| | |
| | best_answer = cos_sim(queries_emb, answers_emb).argmax().item() |
| | print(answers[best_answer]) |
| | # Trzeba zdrowo się odżywiać i uprawiać sport. |
| | ``` |
| |
|
| | ## Evaluation Results |
| |
|
| | - The model achieves an **Average Score** of **63.23** on the Polish Massive Text Embedding Benchmark (MTEB). See [MTEB Leaderboard](https://huggingface.co/spaces/mteb/leaderboard) for detailed results. |
| | - The model achieves **NDCG@10** of **55.95** on the Polish Information Retrieval Benchmark. See [PIRB Leaderboard](https://huggingface.co/spaces/sdadas/pirb) for detailed results. |
| |
|
| | ## Acknowledgements |
| | This model was trained with the A100 GPU cluster support delivered by the Gdansk University of Technology within the TASK center initiative. |
| |
|
| | ## Citation |
| |
|
| | ```bibtex |
| | @inproceedings{dadas2024pirb, |
| | title={PIRB: A Comprehensive Benchmark of Polish Dense and Hybrid Text Retrieval Methods}, |
| | author={Dadas, Slawomir and Pere{\l}kiewicz, Micha{\l} and Po{\'s}wiata, Rafa{\l}}, |
| | booktitle={Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)}, |
| | pages={12761--12774}, |
| | year={2024} |
| | } |
| | ``` |