File size: 41,428 Bytes

bdf0235

---
tags:
- ColBERT
- PyLate
- sentence-transformers
- sentence-similarity
- feature-extraction
- generated_from_trainer
- dataset_size:10000000
- loss:Contrastive
base_model: answerdotai/ModernBERT-base
datasets:
- bclavie/msmarco-10m-triplets
pipeline_tag: sentence-similarity
library_name: PyLate
metrics:
- MaxSim_accuracy@1
- MaxSim_accuracy@3
- MaxSim_accuracy@5
- MaxSim_accuracy@10
- MaxSim_precision@1
- MaxSim_precision@3
- MaxSim_precision@5
- MaxSim_precision@10
- MaxSim_recall@1
- MaxSim_recall@3
- MaxSim_recall@5
- MaxSim_recall@10
- MaxSim_ndcg@10
- MaxSim_mrr@10
- MaxSim_map@100
model-index:
- name: PyLate model based on answerdotai/ModernBERT-base
  results:
  - task:
      type: py-late-information-retrieval
      name: Py Late Information Retrieval
    dataset:
      name: NanoClimateFEVER
      type: NanoClimateFEVER
    metrics:
    - type: MaxSim_accuracy@1
      value: 0.3
      name: Maxsim Accuracy@1
    - type: MaxSim_accuracy@3
      value: 0.46
      name: Maxsim Accuracy@3
    - type: MaxSim_accuracy@5
      value: 0.54
      name: Maxsim Accuracy@5
    - type: MaxSim_accuracy@10
      value: 0.72
      name: Maxsim Accuracy@10
    - type: MaxSim_precision@1
      value: 0.3
      name: Maxsim Precision@1
    - type: MaxSim_precision@3
      value: 0.15999999999999998
      name: Maxsim Precision@3
    - type: MaxSim_precision@5
      value: 0.12800000000000003
      name: Maxsim Precision@5
    - type: MaxSim_precision@10
      value: 0.09399999999999999
      name: Maxsim Precision@10
    - type: MaxSim_recall@1
      value: 0.145
      name: Maxsim Recall@1
    - type: MaxSim_recall@3
      value: 0.20066666666666666
      name: Maxsim Recall@3
    - type: MaxSim_recall@5
      value: 0.25566666666666665
      name: Maxsim Recall@5
    - type: MaxSim_recall@10
      value: 0.3723333333333333
      name: Maxsim Recall@10
    - type: MaxSim_ndcg@10
      value: 0.29984094041575976
      name: Maxsim Ndcg@10
    - type: MaxSim_mrr@10
      value: 0.40457936507936504
      name: Maxsim Mrr@10
    - type: MaxSim_map@100
      value: 0.23154243919711487
      name: Maxsim Map@100
  - task:
      type: py-late-information-retrieval
      name: Py Late Information Retrieval
    dataset:
      name: NanoDBPedia
      type: NanoDBPedia
    metrics:
    - type: MaxSim_accuracy@1
      value: 0.84
      name: Maxsim Accuracy@1
    - type: MaxSim_accuracy@3
      value: 0.92
      name: Maxsim Accuracy@3
    - type: MaxSim_accuracy@5
      value: 0.92
      name: Maxsim Accuracy@5
    - type: MaxSim_accuracy@10
      value: 0.92
      name: Maxsim Accuracy@10
    - type: MaxSim_precision@1
      value: 0.84
      name: Maxsim Precision@1
    - type: MaxSim_precision@3
      value: 0.6599999999999998
      name: Maxsim Precision@3
    - type: MaxSim_precision@5
      value: 0.6000000000000001
      name: Maxsim Precision@5
    - type: MaxSim_precision@10
      value: 0.53
      name: Maxsim Precision@10
    - type: MaxSim_recall@1
      value: 0.11978017136836354
      name: Maxsim Recall@1
    - type: MaxSim_recall@3
      value: 0.19320640931807406
      name: Maxsim Recall@3
    - type: MaxSim_recall@5
      value: 0.2474564677729374
      name: Maxsim Recall@5
    - type: MaxSim_recall@10
      value: 0.35362762531754766
      name: Maxsim Recall@10
    - type: MaxSim_ndcg@10
      value: 0.6642857997687286
      name: Maxsim Ndcg@10
    - type: MaxSim_mrr@10
      value: 0.8766666666666666
      name: Maxsim Mrr@10
    - type: MaxSim_map@100
      value: 0.5056362918461486
      name: Maxsim Map@100
  - task:
      type: py-late-information-retrieval
      name: Py Late Information Retrieval
    dataset:
      name: NanoFEVER
      type: NanoFEVER
    metrics:
    - type: MaxSim_accuracy@1
      value: 0.86
      name: Maxsim Accuracy@1
    - type: MaxSim_accuracy@3
      value: 1.0
      name: Maxsim Accuracy@3
    - type: MaxSim_accuracy@5
      value: 1.0
      name: Maxsim Accuracy@5
    - type: MaxSim_accuracy@10
      value: 1.0
      name: Maxsim Accuracy@10
    - type: MaxSim_precision@1
      value: 0.86
      name: Maxsim Precision@1
    - type: MaxSim_precision@3
      value: 0.34666666666666657
      name: Maxsim Precision@3
    - type: MaxSim_precision@5
      value: 0.20799999999999996
      name: Maxsim Precision@5
    - type: MaxSim_precision@10
      value: 0.10799999999999997
      name: Maxsim Precision@10
    - type: MaxSim_recall@1
      value: 0.8066666666666668
      name: Maxsim Recall@1
    - type: MaxSim_recall@3
      value: 0.9566666666666667
      name: Maxsim Recall@3
    - type: MaxSim_recall@5
      value: 0.9566666666666667
      name: Maxsim Recall@5
    - type: MaxSim_recall@10
      value: 0.9733333333333333
      name: Maxsim Recall@10
    - type: MaxSim_ndcg@10
      value: 0.9143032727772558
      name: Maxsim Ndcg@10
    - type: MaxSim_mrr@10
      value: 0.92
      name: Maxsim Mrr@10
    - type: MaxSim_map@100
      value: 0.8848835412953059
      name: Maxsim Map@100
  - task:
      type: py-late-information-retrieval
      name: Py Late Information Retrieval
    dataset:
      name: NanoFiQA2018
      type: NanoFiQA2018
    metrics:
    - type: MaxSim_accuracy@1
      value: 0.5
      name: Maxsim Accuracy@1
    - type: MaxSim_accuracy@3
      value: 0.68
      name: Maxsim Accuracy@3
    - type: MaxSim_accuracy@5
      value: 0.72
      name: Maxsim Accuracy@5
    - type: MaxSim_accuracy@10
      value: 0.8
      name: Maxsim Accuracy@10
    - type: MaxSim_precision@1
      value: 0.5
      name: Maxsim Precision@1
    - type: MaxSim_precision@3
      value: 0.33333333333333326
      name: Maxsim Precision@3
    - type: MaxSim_precision@5
      value: 0.236
      name: Maxsim Precision@5
    - type: MaxSim_precision@10
      value: 0.14
      name: Maxsim Precision@10
    - type: MaxSim_recall@1
      value: 0.29724603174603176
      name: Maxsim Recall@1
    - type: MaxSim_recall@3
      value: 0.49257142857142855
      name: Maxsim Recall@3
    - type: MaxSim_recall@5
      value: 0.5465079365079365
      name: Maxsim Recall@5
    - type: MaxSim_recall@10
      value: 0.6031746031746033
      name: Maxsim Recall@10
    - type: MaxSim_ndcg@10
      value: 0.5453834796894957
      name: Maxsim Ndcg@10
    - type: MaxSim_mrr@10
      value: 0.604079365079365
      name: Maxsim Mrr@10
    - type: MaxSim_map@100
      value: 0.49074315182112516
      name: Maxsim Map@100
  - task:
      type: py-late-information-retrieval
      name: Py Late Information Retrieval
    dataset:
      name: NanoHotpotQA
      type: NanoHotpotQA
    metrics:
    - type: MaxSim_accuracy@1
      value: 0.9
      name: Maxsim Accuracy@1
    - type: MaxSim_accuracy@3
      value: 0.96
      name: Maxsim Accuracy@3
    - type: MaxSim_accuracy@5
      value: 0.96
      name: Maxsim Accuracy@5
    - type: MaxSim_accuracy@10
      value: 1.0
      name: Maxsim Accuracy@10
    - type: MaxSim_precision@1
      value: 0.9
      name: Maxsim Precision@1
    - type: MaxSim_precision@3
      value: 0.5266666666666666
      name: Maxsim Precision@3
    - type: MaxSim_precision@5
      value: 0.32799999999999996
      name: Maxsim Precision@5
    - type: MaxSim_precision@10
      value: 0.17799999999999996
      name: Maxsim Precision@10
    - type: MaxSim_recall@1
      value: 0.45
      name: Maxsim Recall@1
    - type: MaxSim_recall@3
      value: 0.79
      name: Maxsim Recall@3
    - type: MaxSim_recall@5
      value: 0.82
      name: Maxsim Recall@5
    - type: MaxSim_recall@10
      value: 0.89
      name: Maxsim Recall@10
    - type: MaxSim_ndcg@10
      value: 0.8430810883372716
      name: Maxsim Ndcg@10
    - type: MaxSim_mrr@10
      value: 0.9353571428571428
      name: Maxsim Mrr@10
    - type: MaxSim_map@100
      value: 0.778500350140056
      name: Maxsim Map@100
  - task:
      type: py-late-information-retrieval
      name: Py Late Information Retrieval
    dataset:
      name: NanoMSMARCO
      type: NanoMSMARCO
    metrics:
    - type: MaxSim_accuracy@1
      value: 0.48
      name: Maxsim Accuracy@1
    - type: MaxSim_accuracy@3
      value: 0.7
      name: Maxsim Accuracy@3
    - type: MaxSim_accuracy@5
      value: 0.74
      name: Maxsim Accuracy@5
    - type: MaxSim_accuracy@10
      value: 0.9
      name: Maxsim Accuracy@10
    - type: MaxSim_precision@1
      value: 0.48
      name: Maxsim Precision@1
    - type: MaxSim_precision@3
      value: 0.2333333333333333
      name: Maxsim Precision@3
    - type: MaxSim_precision@5
      value: 0.14800000000000002
      name: Maxsim Precision@5
    - type: MaxSim_precision@10
      value: 0.08999999999999998
      name: Maxsim Precision@10
    - type: MaxSim_recall@1
      value: 0.48
      name: Maxsim Recall@1
    - type: MaxSim_recall@3
      value: 0.7
      name: Maxsim Recall@3
    - type: MaxSim_recall@5
      value: 0.74
      name: Maxsim Recall@5
    - type: MaxSim_recall@10
      value: 0.9
      name: Maxsim Recall@10
    - type: MaxSim_ndcg@10
      value: 0.681981684088073
      name: Maxsim Ndcg@10
    - type: MaxSim_mrr@10
      value: 0.6141031746031747
      name: Maxsim Mrr@10
    - type: MaxSim_map@100
      value: 0.6195014186409419
      name: Maxsim Map@100
  - task:
      type: py-late-information-retrieval
      name: Py Late Information Retrieval
    dataset:
      name: NanoNFCorpus
      type: NanoNFCorpus
    metrics:
    - type: MaxSim_accuracy@1
      value: 0.48
      name: Maxsim Accuracy@1
    - type: MaxSim_accuracy@3
      value: 0.54
      name: Maxsim Accuracy@3
    - type: MaxSim_accuracy@5
      value: 0.62
      name: Maxsim Accuracy@5
    - type: MaxSim_accuracy@10
      value: 0.7
      name: Maxsim Accuracy@10
    - type: MaxSim_precision@1
      value: 0.48
      name: Maxsim Precision@1
    - type: MaxSim_precision@3
      value: 0.37333333333333335
      name: Maxsim Precision@3
    - type: MaxSim_precision@5
      value: 0.36
      name: Maxsim Precision@5
    - type: MaxSim_precision@10
      value: 0.29
      name: Maxsim Precision@10
    - type: MaxSim_recall@1
      value: 0.024846700166746567
      name: Maxsim Recall@1
    - type: MaxSim_recall@3
      value: 0.06745637325640307
      name: Maxsim Recall@3
    - type: MaxSim_recall@5
      value: 0.1008052160248601
      name: Maxsim Recall@5
    - type: MaxSim_recall@10
      value: 0.1497664943203363
      name: Maxsim Recall@10
    - type: MaxSim_ndcg@10
      value: 0.34867256192135143
      name: Maxsim Ndcg@10
    - type: MaxSim_mrr@10
      value: 0.5346031746031746
      name: Maxsim Mrr@10
    - type: MaxSim_map@100
      value: 0.13572305233276538
      name: Maxsim Map@100
  - task:
      type: py-late-information-retrieval
      name: Py Late Information Retrieval
    dataset:
      name: NanoNQ
      type: NanoNQ
    metrics:
    - type: MaxSim_accuracy@1
      value: 0.54
      name: Maxsim Accuracy@1
    - type: MaxSim_accuracy@3
      value: 0.8
      name: Maxsim Accuracy@3
    - type: MaxSim_accuracy@5
      value: 0.86
      name: Maxsim Accuracy@5
    - type: MaxSim_accuracy@10
      value: 0.9
      name: Maxsim Accuracy@10
    - type: MaxSim_precision@1
      value: 0.54
      name: Maxsim Precision@1
    - type: MaxSim_precision@3
      value: 0.2733333333333333
      name: Maxsim Precision@3
    - type: MaxSim_precision@5
      value: 0.17599999999999993
      name: Maxsim Precision@5
    - type: MaxSim_precision@10
      value: 0.09599999999999997
      name: Maxsim Precision@10
    - type: MaxSim_recall@1
      value: 0.51
      name: Maxsim Recall@1
    - type: MaxSim_recall@3
      value: 0.75
      name: Maxsim Recall@3
    - type: MaxSim_recall@5
      value: 0.81
      name: Maxsim Recall@5
    - type: MaxSim_recall@10
      value: 0.86
      name: Maxsim Recall@10
    - type: MaxSim_ndcg@10
      value: 0.7009621199364733
      name: Maxsim Ndcg@10
    - type: MaxSim_mrr@10
      value: 0.6670238095238094
      name: Maxsim Mrr@10
    - type: MaxSim_map@100
      value: 0.6421027387645034
      name: Maxsim Map@100
  - task:
      type: py-late-information-retrieval
      name: Py Late Information Retrieval
    dataset:
      name: NanoQuoraRetrieval
      type: NanoQuoraRetrieval
    metrics:
    - type: MaxSim_accuracy@1
      value: 0.9
      name: Maxsim Accuracy@1
    - type: MaxSim_accuracy@3
      value: 0.98
      name: Maxsim Accuracy@3
    - type: MaxSim_accuracy@5
      value: 0.98
      name: Maxsim Accuracy@5
    - type: MaxSim_accuracy@10
      value: 1.0
      name: Maxsim Accuracy@10
    - type: MaxSim_precision@1
      value: 0.9
      name: Maxsim Precision@1
    - type: MaxSim_precision@3
      value: 0.38666666666666655
      name: Maxsim Precision@3
    - type: MaxSim_precision@5
      value: 0.24799999999999997
      name: Maxsim Precision@5
    - type: MaxSim_precision@10
      value: 0.13799999999999998
      name: Maxsim Precision@10
    - type: MaxSim_recall@1
      value: 0.7973333333333333
      name: Maxsim Recall@1
    - type: MaxSim_recall@3
      value: 0.9246666666666666
      name: Maxsim Recall@3
    - type: MaxSim_recall@5
      value: 0.9426666666666668
      name: Maxsim Recall@5
    - type: MaxSim_recall@10
      value: 0.9966666666666666
      name: Maxsim Recall@10
    - type: MaxSim_ndcg@10
      value: 0.9436609396356616
      name: Maxsim Ndcg@10
    - type: MaxSim_mrr@10
      value: 0.9366666666666665
      name: Maxsim Mrr@10
    - type: MaxSim_map@100
      value: 0.9184467532467532
      name: Maxsim Map@100
  - task:
      type: py-late-information-retrieval
      name: Py Late Information Retrieval
    dataset:
      name: NanoSCIDOCS
      type: NanoSCIDOCS
    metrics:
    - type: MaxSim_accuracy@1
      value: 0.44
      name: Maxsim Accuracy@1
    - type: MaxSim_accuracy@3
      value: 0.66
      name: Maxsim Accuracy@3
    - type: MaxSim_accuracy@5
      value: 0.68
      name: Maxsim Accuracy@5
    - type: MaxSim_accuracy@10
      value: 0.8
      name: Maxsim Accuracy@10
    - type: MaxSim_precision@1
      value: 0.44
      name: Maxsim Precision@1
    - type: MaxSim_precision@3
      value: 0.31999999999999995
      name: Maxsim Precision@3
    - type: MaxSim_precision@5
      value: 0.236
      name: Maxsim Precision@5
    - type: MaxSim_precision@10
      value: 0.166
      name: Maxsim Precision@10
    - type: MaxSim_recall@1
      value: 0.09366666666666668
      name: Maxsim Recall@1
    - type: MaxSim_recall@3
      value: 0.19866666666666666
      name: Maxsim Recall@3
    - type: MaxSim_recall@5
      value: 0.24366666666666664
      name: Maxsim Recall@5
    - type: MaxSim_recall@10
      value: 0.3396666666666667
      name: Maxsim Recall@10
    - type: MaxSim_ndcg@10
      value: 0.3404490877439103
      name: Maxsim Ndcg@10
    - type: MaxSim_mrr@10
      value: 0.5581666666666668
      name: Maxsim Mrr@10
    - type: MaxSim_map@100
      value: 0.2561512796776031
      name: Maxsim Map@100
  - task:
      type: py-late-information-retrieval
      name: Py Late Information Retrieval
    dataset:
      name: NanoArguAna
      type: NanoArguAna
    metrics:
    - type: MaxSim_accuracy@1
      value: 0.22
      name: Maxsim Accuracy@1
    - type: MaxSim_accuracy@3
      value: 0.52
      name: Maxsim Accuracy@3
    - type: MaxSim_accuracy@5
      value: 0.64
      name: Maxsim Accuracy@5
    - type: MaxSim_accuracy@10
      value: 0.8
      name: Maxsim Accuracy@10
    - type: MaxSim_precision@1
      value: 0.22
      name: Maxsim Precision@1
    - type: MaxSim_precision@3
      value: 0.1733333333333333
      name: Maxsim Precision@3
    - type: MaxSim_precision@5
      value: 0.128
      name: Maxsim Precision@5
    - type: MaxSim_precision@10
      value: 0.08
      name: Maxsim Precision@10
    - type: MaxSim_recall@1
      value: 0.22
      name: Maxsim Recall@1
    - type: MaxSim_recall@3
      value: 0.52
      name: Maxsim Recall@3
    - type: MaxSim_recall@5
      value: 0.64
      name: Maxsim Recall@5
    - type: MaxSim_recall@10
      value: 0.8
      name: Maxsim Recall@10
    - type: MaxSim_ndcg@10
      value: 0.4988624746761941
      name: Maxsim Ndcg@10
    - type: MaxSim_mrr@10
      value: 0.40369047619047616
      name: Maxsim Mrr@10
    - type: MaxSim_map@100
      value: 0.40858139686400563
      name: Maxsim Map@100
  - task:
      type: py-late-information-retrieval
      name: Py Late Information Retrieval
    dataset:
      name: NanoSciFact
      type: NanoSciFact
    metrics:
    - type: MaxSim_accuracy@1
      value: 0.7
      name: Maxsim Accuracy@1
    - type: MaxSim_accuracy@3
      value: 0.8
      name: Maxsim Accuracy@3
    - type: MaxSim_accuracy@5
      value: 0.84
      name: Maxsim Accuracy@5
    - type: MaxSim_accuracy@10
      value: 0.88
      name: Maxsim Accuracy@10
    - type: MaxSim_precision@1
      value: 0.7
      name: Maxsim Precision@1
    - type: MaxSim_precision@3
      value: 0.2866666666666666
      name: Maxsim Precision@3
    - type: MaxSim_precision@5
      value: 0.184
      name: Maxsim Precision@5
    - type: MaxSim_precision@10
      value: 0.09799999999999999
      name: Maxsim Precision@10
    - type: MaxSim_recall@1
      value: 0.675
      name: Maxsim Recall@1
    - type: MaxSim_recall@3
      value: 0.785
      name: Maxsim Recall@3
    - type: MaxSim_recall@5
      value: 0.825
      name: Maxsim Recall@5
    - type: MaxSim_recall@10
      value: 0.87
      name: Maxsim Recall@10
    - type: MaxSim_ndcg@10
      value: 0.7836102750432731
      name: Maxsim Ndcg@10
    - type: MaxSim_mrr@10
      value: 0.7577777777777777
      name: Maxsim Mrr@10
    - type: MaxSim_map@100
      value: 0.7575977078477077
      name: Maxsim Map@100
  - task:
      type: py-late-information-retrieval
      name: Py Late Information Retrieval
    dataset:
      name: NanoTouche2020
      type: NanoTouche2020
    metrics:
    - type: MaxSim_accuracy@1
      value: 0.7551020408163265
      name: Maxsim Accuracy@1
    - type: MaxSim_accuracy@3
      value: 0.9795918367346939
      name: Maxsim Accuracy@3
    - type: MaxSim_accuracy@5
      value: 0.9795918367346939
      name: Maxsim Accuracy@5
    - type: MaxSim_accuracy@10
      value: 0.9795918367346939
      name: Maxsim Accuracy@10
    - type: MaxSim_precision@1
      value: 0.7551020408163265
      name: Maxsim Precision@1
    - type: MaxSim_precision@3
      value: 0.7142857142857143
      name: Maxsim Precision@3
    - type: MaxSim_precision@5
      value: 0.6204081632653061
      name: Maxsim Precision@5
    - type: MaxSim_precision@10
      value: 0.5061224489795919
      name: Maxsim Precision@10
    - type: MaxSim_recall@1
      value: 0.05215472128680775
      name: Maxsim Recall@1
    - type: MaxSim_recall@3
      value: 0.14371450561336085
      name: Maxsim Recall@3
    - type: MaxSim_recall@5
      value: 0.20898774766999936
      name: Maxsim Recall@5
    - type: MaxSim_recall@10
      value: 0.3295518520522591
      name: Maxsim Recall@10
    - type: MaxSim_ndcg@10
      value: 0.5852674107635566
      name: Maxsim Ndcg@10
    - type: MaxSim_mrr@10
      value: 0.8639455782312924
      name: Maxsim Mrr@10
    - type: MaxSim_map@100
      value: 0.43897324704873364
      name: Maxsim Map@100
  - task:
      type: nano-beir
      name: Nano BEIR
    dataset:
      name: NanoBEIR mean
      type: NanoBEIR_mean
    metrics:
    - type: MaxSim_accuracy@1
      value: 0.6088540031397175
      name: Maxsim Accuracy@1
    - type: MaxSim_accuracy@3
      value: 0.769199372056515
      name: Maxsim Accuracy@3
    - type: MaxSim_accuracy@5
      value: 0.8061224489795917
      name: Maxsim Accuracy@5
    - type: MaxSim_accuracy@10
      value: 0.8768916797488226
      name: Maxsim Accuracy@10
    - type: MaxSim_precision@1
      value: 0.6088540031397175
      name: Maxsim Precision@1
    - type: MaxSim_precision@3
      value: 0.3682783882783882
      name: Maxsim Precision@3
    - type: MaxSim_precision@5
      value: 0.27695447409733126
      name: Maxsim Precision@5
    - type: MaxSim_precision@10
      value: 0.19339403453689166
      name: Maxsim Precision@10
    - type: MaxSim_recall@1
      value: 0.35936109932573973
      name: Maxsim Recall@1
    - type: MaxSim_recall@3
      value: 0.5171242602635334
      name: Maxsim Recall@3
    - type: MaxSim_recall@5
      value: 0.5644172334340307
      name: Maxsim Recall@5
    - type: MaxSim_recall@10
      value: 0.6490861980665189
      name: Maxsim Recall@10
    - type: MaxSim_ndcg@10
      value: 0.6269508565228465
      name: Maxsim Ndcg@10
    - type: MaxSim_mrr@10
      value: 0.6982046049188907
      name: Maxsim Mrr@10
    - type: MaxSim_map@100
      value: 0.5437217975940587
      name: Maxsim Map@100
---

# PyLate model based on answerdotai/ModernBERT-base

This is a [PyLate](https://github.com/lightonai/pylate) model finetuned from [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) on the [msmarco-10m-triplets](https://huggingface.co/datasets/bclavie/msmarco-10m-triplets) dataset. It maps sentences & paragraphs to sequences of 128-dimensional dense vectors and can be used for semantic textual similarity using the MaxSim operator.

## Model Details

### Model Description
- **Model Type:** PyLate model
- **Base model:** [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) <!-- at revision 8949b909ec900327062f0ebf497f51aef5e6f0c8 -->
- **Document Length:** 512 tokens
- **Query Length:** 32 tokens
- **Output Dimensionality:** 128 tokens
- **Similarity Function:** MaxSim
- **Training Dataset:**
    - [msmarco-10m-triplets](https://huggingface.co/datasets/bclavie/msmarco-10m-triplets)
<!-- - **Language:** Unknown -->
<!-- - **License:** Unknown -->

### Model Sources

- **Documentation:** [PyLate Documentation](https://lightonai.github.io/pylate/)
- **Repository:** [PyLate on GitHub](https://github.com/lightonai/pylate)
- **Hugging Face:** [PyLate models on Hugging Face](https://huggingface.co/models?library=PyLate)

### Full Model Architecture

```
ColBERT(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
  (1): Dense({'in_features': 768, 'out_features': 128, 'bias': False, 'activation_function': 'torch.nn.modules.linear.Identity', 'use_residual': False})
)
```

## Usage
First install the PyLate library:

```bash
pip install -U pylate
```

### Retrieval

Use this model with PyLate to index and retrieve documents. The index uses [FastPLAID](https://github.com/lightonai/fast-plaid) for efficient similarity search.

#### Indexing documents

Load the ColBERT model and initialize the PLAID index, then encode and index your documents:

```python
from pylate import indexes, models, retrieve

# Step 1: Load the ColBERT model
model = models.ColBERT(
    model_name_or_path="pylate_model_id",
)

# Step 2: Initialize the PLAID index
index = indexes.PLAID(
    index_folder="pylate-index",
    index_name="index",
    override=True,  # This overwrites the existing index if any
)

# Step 3: Encode the documents
documents_ids = ["1", "2", "3"]
documents = ["document 1 text", "document 2 text", "document 3 text"]

documents_embeddings = model.encode(
    documents,
    batch_size=32,
    is_query=False,  # Ensure that it is set to False to indicate that these are documents, not queries
    show_progress_bar=True,
)

# Step 4: Add document embeddings to the index by providing embeddings and corresponding ids
index.add_documents(
    documents_ids=documents_ids,
    documents_embeddings=documents_embeddings,
)
```

Note that you do not have to recreate the index and encode the documents every time. Once you have created an index and added the documents, you can re-use the index later by loading it:

```python
# To load an index, simply instantiate it with the correct folder/name and without overriding it
index = indexes.PLAID(
    index_folder="pylate-index",
    index_name="index",
)
```

#### Retrieving top-k documents for queries

Once the documents are indexed, you can retrieve the top-k most relevant documents for a given set of queries.
To do so, initialize the ColBERT retriever with the index you want to search in, encode the queries and then retrieve the top-k documents to get the top matches ids and relevance scores:

```python
# Step 1: Initialize the ColBERT retriever
retriever = retrieve.ColBERT(index=index)

# Step 2: Encode the queries
queries_embeddings = model.encode(
    ["query for document 3", "query for document 1"],
    batch_size=32,
    is_query=True,  #  # Ensure that it is set to False to indicate that these are queries
    show_progress_bar=True,
)

# Step 3: Retrieve top-k documents
scores = retriever.retrieve(
    queries_embeddings=queries_embeddings,
    k=10,  # Retrieve the top 10 matches for each query
)
```

### Reranking
If you only want to use the ColBERT model to perform reranking on top of your first-stage retrieval pipeline without building an index, you can simply use rank function and pass the queries and documents to rerank:

```python
from pylate import rank, models

queries = [
    "query A",
    "query B",
]

documents = [
    ["document A", "document B"],
    ["document 1", "document C", "document B"],
]

documents_ids = [
    [1, 2],
    [1, 3, 2],
]

model = models.ColBERT(
    model_name_or_path="pylate_model_id",
)

queries_embeddings = model.encode(
    queries,
    is_query=True,
)

documents_embeddings = model.encode(
    documents,
    is_query=False,
)

reranked_documents = rank.rerank(
    documents_ids=documents_ids,
    queries_embeddings=queries_embeddings,
    documents_embeddings=documents_embeddings,
)
```

<!--
### Direct Usage (Transformers)

<details><summary>Click to see the direct usage in Transformers</summary>

</details>
-->

<!--
### Downstream Usage (Sentence Transformers)

You can finetune this model on your own dataset.

<details><summary>Click to expand</summary>

</details>
-->

<!--
### Out-of-Scope Use

*List how the model may foreseeably be misused and address what users ought not to do with the model.*
-->

## Evaluation

### Metrics

#### Py Late Information Retrieval
* Dataset: `['NanoClimateFEVER', 'NanoDBPedia', 'NanoFEVER', 'NanoFiQA2018', 'NanoHotpotQA', 'NanoMSMARCO', 'NanoNFCorpus', 'NanoNQ', 'NanoQuoraRetrieval', 'NanoSCIDOCS', 'NanoArguAna', 'NanoSciFact', 'NanoTouche2020']`
* Evaluated with <code>pylate.evaluation.pylate_information_retrieval_evaluator.PyLateInformationRetrievalEvaluator</code>

| Metric              | NanoClimateFEVER | NanoDBPedia | NanoFEVER  | NanoFiQA2018 | NanoHotpotQA | NanoMSMARCO | NanoNFCorpus | NanoNQ    | NanoQuoraRetrieval | NanoSCIDOCS | NanoArguAna | NanoSciFact | NanoTouche2020 |
|:--------------------|:-----------------|:------------|:-----------|:-------------|:-------------|:------------|:-------------|:----------|:-------------------|:------------|:------------|:------------|:---------------|
| MaxSim_accuracy@1   | 0.3              | 0.84        | 0.86       | 0.5          | 0.9          | 0.48        | 0.48         | 0.54      | 0.9                | 0.44        | 0.22        | 0.7         | 0.7551         |
| MaxSim_accuracy@3   | 0.46             | 0.92        | 1.0        | 0.68         | 0.96         | 0.7         | 0.54         | 0.8       | 0.98               | 0.66        | 0.52        | 0.8         | 0.9796         |
| MaxSim_accuracy@5   | 0.54             | 0.92        | 1.0        | 0.72         | 0.96         | 0.74        | 0.62         | 0.86      | 0.98               | 0.68        | 0.64        | 0.84        | 0.9796         |
| MaxSim_accuracy@10  | 0.72             | 0.92        | 1.0        | 0.8          | 1.0          | 0.9         | 0.7          | 0.9       | 1.0                | 0.8         | 0.8         | 0.88        | 0.9796         |
| MaxSim_precision@1  | 0.3              | 0.84        | 0.86       | 0.5          | 0.9          | 0.48        | 0.48         | 0.54      | 0.9                | 0.44        | 0.22        | 0.7         | 0.7551         |
| MaxSim_precision@3  | 0.16             | 0.66        | 0.3467     | 0.3333       | 0.5267       | 0.2333      | 0.3733       | 0.2733    | 0.3867             | 0.32        | 0.1733      | 0.2867      | 0.7143         |
| MaxSim_precision@5  | 0.128            | 0.6         | 0.208      | 0.236        | 0.328        | 0.148       | 0.36         | 0.176     | 0.248              | 0.236       | 0.128       | 0.184       | 0.6204         |
| MaxSim_precision@10 | 0.094            | 0.53        | 0.108      | 0.14         | 0.178        | 0.09        | 0.29         | 0.096     | 0.138              | 0.166       | 0.08        | 0.098       | 0.5061         |
| MaxSim_recall@1     | 0.145            | 0.1198      | 0.8067     | 0.2972       | 0.45         | 0.48        | 0.0248       | 0.51      | 0.7973             | 0.0937      | 0.22        | 0.675       | 0.0522         |
| MaxSim_recall@3     | 0.2007           | 0.1932      | 0.9567     | 0.4926       | 0.79         | 0.7         | 0.0675       | 0.75      | 0.9247             | 0.1987      | 0.52        | 0.785       | 0.1437         |
| MaxSim_recall@5     | 0.2557           | 0.2475      | 0.9567     | 0.5465       | 0.82         | 0.74        | 0.1008       | 0.81      | 0.9427             | 0.2437      | 0.64        | 0.825       | 0.209          |
| MaxSim_recall@10    | 0.3723           | 0.3536      | 0.9733     | 0.6032       | 0.89         | 0.9         | 0.1498       | 0.86      | 0.9967             | 0.3397      | 0.8         | 0.87        | 0.3296         |
| **MaxSim_ndcg@10**  | **0.2998**       | **0.6643**  | **0.9143** | **0.5454**   | **0.8431**   | **0.682**   | **0.3487**   | **0.701** | **0.9437**         | **0.3404**  | **0.4989**  | **0.7836**  | **0.5853**     |
| MaxSim_mrr@10       | 0.4046           | 0.8767      | 0.92       | 0.6041       | 0.9354       | 0.6141      | 0.5346       | 0.667     | 0.9367             | 0.5582      | 0.4037      | 0.7578      | 0.8639         |
| MaxSim_map@100      | 0.2315           | 0.5056      | 0.8849     | 0.4907       | 0.7785       | 0.6195      | 0.1357       | 0.6421    | 0.9184             | 0.2562      | 0.4086      | 0.7576      | 0.439          |

#### Nano BEIR
* Dataset: `NanoBEIR_mean`
* Evaluated with <code>pylate.evaluation.nano_beir_evaluator.NanoBEIREvaluator</code>

| Metric              | Value     |
|:--------------------|:----------|
| MaxSim_accuracy@1   | 0.6089    |
| MaxSim_accuracy@3   | 0.7692    |
| MaxSim_accuracy@5   | 0.8061    |
| MaxSim_accuracy@10  | 0.8769    |
| MaxSim_precision@1  | 0.6089    |
| MaxSim_precision@3  | 0.3683    |
| MaxSim_precision@5  | 0.277     |
| MaxSim_precision@10 | 0.1934    |
| MaxSim_recall@1     | 0.3594    |
| MaxSim_recall@3     | 0.5171    |
| MaxSim_recall@5     | 0.5644    |
| MaxSim_recall@10    | 0.6491    |
| **MaxSim_ndcg@10**  | **0.627** |
| MaxSim_mrr@10       | 0.6982    |
| MaxSim_map@100      | 0.5437    |

<!--
## Bias, Risks and Limitations

*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
-->

<!--
### Recommendations

*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
-->

## Training Details

### Training Dataset

#### msmarco-10m-triplets

* Dataset: [msmarco-10m-triplets](https://huggingface.co/datasets/bclavie/msmarco-10m-triplets) at [8c5139a](https://huggingface.co/datasets/bclavie/msmarco-10m-triplets/tree/8c5139a245a5997992605792faa49ec12a6eb5f2)
* Size: 10,000,000 training samples
* Columns: <code>query</code>, <code>positive</code>, and <code>negative</code>
* Approximate statistics based on the first 1000 samples:
  |         | query                                                                            | positive                                                                           | negative                                                                           |
  |:--------|:---------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
  | type    | string                                                                           | string                                                                             | string                                                                             |
  | details | <ul><li>min: 4 tokens</li><li>mean: 9.31 tokens</li><li>max: 31 tokens</li></ul> | <ul><li>min: 20 tokens</li><li>mean: 31.95 tokens</li><li>max: 32 tokens</li></ul> | <ul><li>min: 18 tokens</li><li>mean: 31.91 tokens</li><li>max: 32 tokens</li></ul> |
* Samples:
  | query                                                                              | positive                                                                                                                                                                                                                                                                                                                 | negative                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
  |:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
  | <code>the most important factor that influences k+ secretion is __________.</code> | <code>The regulation of K+ distribution between the intracellular and extracellular space is referred to as internal K+ balance. The most important factors regulating this movement under normal conditions are insulin and catecholamines (1).</code>                                                                  | <code>They are both also important for secretion and flow of bile: 1  Cholecystokinin: The name of this hormone describes its effect on the biliary system-cholecysto = gallbladder and kinin = movement. 2  Secretin: This hormone is secreted in response to acid in the duodenum.</code>                                                                                                                                                                                                               |
  | <code>how much did the mackinac bridge cost to build</code>                        | <code>The cost to design the project was $3,500,000 (Steinman Company). The cost to construct the bridge was $70, 268,500. Two primary contractors were hired to build the bridge: American Bridge for superstructure - $44,532,900; and Merritt-Chapman and Scott of New York for the foundations - $25,735,600.</code> | <code>When your child needs a dental tooth bridge, you need to know the average cost so you can factor the price into your budget. Several factors affect the price of a bridge, which can run between $700 to $1,500 per tooth. If you have insurance or your child is covered by Medicaid, part of the cost may be covered.</code>                                                                                                                                                                      |
  | <code>when do concussion symptoms appear</code>                                    | <code>Then you can get advice on what to do next. For milder symptoms, the doctor may recommend rest and ask you to watch your child closely for changes, such as a headache that gets worse. Symptoms of a concussion don't always show up right away, and can develop within 24 to 72 hours after an injury.</code>    | <code>Concussion: A traumatic injury to soft tissue, usually the brain, as a result of a violent blow, shaking, or spinning. A brain concussion can cause immediate but temporary impairment of brain functions, such as thinking, vision, equilibrium, and consciousness. After a person has had a concussion, he or she is at increased risk for recurrence. Moreover, after a person has several concussions, less of a blow can cause injury, and the person can require more time to recover.</code> |
* Loss: <code>pylate.losses.contrastive.Contrastive</code>

### Training Hyperparameters
#### Non-Default Hyperparameters

- `eval_strategy`: steps
- `per_device_train_batch_size`: 64
- `learning_rate`: 3e-05
- `max_steps`: 50000
- `fp16`: True
- `dataloader_drop_last`: True
- `dataloader_num_workers`: 8
- `ddp_find_unused_parameters`: False
- `torch_compile`: True
- `torch_compile_backend`: inductor
- `eval_on_start`: True

#### All Hyperparameters
<details><summary>Click to expand</summary>

- `overwrite_output_dir`: False
- `do_predict`: False
- `eval_strategy`: steps
- `prediction_loss_only`: True
- `per_device_train_batch_size`: 64
- `per_device_eval_batch_size`: 8
- `per_gpu_train_batch_size`: None
- `per_gpu_eval_batch_size`: None
- `gradient_accumulation_steps`: 1
- `eval_accumulation_steps`: None
- `torch_empty_cache_steps`: None
- `learning_rate`: 3e-05
- `weight_decay`: 0.0
- `adam_beta1`: 0.9
- `adam_beta2`: 0.999
- `adam_epsilon`: 1e-08
- `max_grad_norm`: 1.0
- `num_train_epochs`: 3.0
- `max_steps`: 50000
- `lr_scheduler_type`: linear
- `lr_scheduler_kwargs`: {}
- `warmup_ratio`: 0.0
- `warmup_steps`: 0
- `log_level`: passive
- `log_level_replica`: warning
- `log_on_each_node`: True
- `logging_nan_inf_filter`: True
- `save_safetensors`: True
- `save_on_each_node`: False
- `save_only_model`: False
- `restore_callback_states_from_checkpoint`: False
- `no_cuda`: False
- `use_cpu`: False
- `use_mps_device`: False
- `seed`: 42
- `data_seed`: None
- `jit_mode_eval`: False
- `use_ipex`: False
- `bf16`: False
- `fp16`: True
- `fp16_opt_level`: O1
- `half_precision_backend`: auto
- `bf16_full_eval`: False
- `fp16_full_eval`: False
- `tf32`: None
- `local_rank`: 0
- `ddp_backend`: None
- `tpu_num_cores`: None
- `tpu_metrics_debug`: False
- `debug`: []
- `dataloader_drop_last`: True
- `dataloader_num_workers`: 8
- `dataloader_prefetch_factor`: None
- `past_index`: -1
- `disable_tqdm`: False
- `remove_unused_columns`: True
- `label_names`: None
- `load_best_model_at_end`: False
- `ignore_data_skip`: False
- `fsdp`: []
- `fsdp_min_num_params`: 0
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
- `fsdp_transformer_layer_cls_to_wrap`: None
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
- `parallelism_config`: None
- `deepspeed`: None
- `label_smoothing_factor`: 0.0
- `optim`: adamw_torch
- `optim_args`: None
- `adafactor`: False
- `group_by_length`: False
- `length_column_name`: length
- `ddp_find_unused_parameters`: False
- `ddp_bucket_cap_mb`: None
- `ddp_broadcast_buffers`: False
- `dataloader_pin_memory`: True
- `dataloader_persistent_workers`: False
- `skip_memory_metrics`: True
- `use_legacy_prediction_loop`: False
- `push_to_hub`: False
- `resume_from_checkpoint`: None
- `hub_model_id`: None
- `hub_strategy`: every_save
- `hub_private_repo`: None
- `hub_always_push`: False
- `hub_revision`: None
- `gradient_checkpointing`: False
- `gradient_checkpointing_kwargs`: None
- `include_inputs_for_metrics`: False
- `include_for_metrics`: []
- `eval_do_concat_batches`: True
- `fp16_backend`: auto
- `push_to_hub_model_id`: None
- `push_to_hub_organization`: None
- `mp_parameters`: 
- `auto_find_batch_size`: False
- `full_determinism`: False
- `torchdynamo`: None
- `ray_scope`: last
- `ddp_timeout`: 1800
- `torch_compile`: True
- `torch_compile_backend`: inductor
- `torch_compile_mode`: None
- `include_tokens_per_second`: False
- `include_num_input_tokens_seen`: False
- `neftune_noise_alpha`: None
- `optim_target_modules`: None
- `batch_eval_metrics`: False
- `eval_on_start`: True
- `use_liger_kernel`: False
- `liger_kernel_config`: None
- `eval_use_gather_object`: False
- `average_tokens_across_devices`: False
- `prompts`: None
- `batch_sampler`: batch_sampler
- `multi_dataset_batch_sampler`: proportional
- `router_mapping`: {}
- `learning_rate_mapping`: {}

</details>