--- tags: - ColBERT - PyLate - sentence-transformers - sentence-similarity - embeddings - retrieval - feature-extraction - generated_from_trainer - dataset_size:640000 - loss:Distillation pipeline_tag: sentence-similarity library_name: PyLate license: apache-2.0 language: - en metrics: - MaxSim_accuracy@1 - MaxSim_accuracy@3 - MaxSim_accuracy@5 - MaxSim_accuracy@10 - MaxSim_precision@1 - MaxSim_precision@3 - MaxSim_precision@5 - MaxSim_precision@10 - MaxSim_recall@1 - MaxSim_recall@3 - MaxSim_recall@5 - MaxSim_recall@10 - MaxSim_ndcg@10 - MaxSim_mrr@10 - MaxSim_map@100 model-index: - name: PyLate results: - task: type: py-late-information-retrieval name: Py Late Information Retrieval dataset: name: NanoClimateFEVER type: NanoClimateFEVER metrics: - type: MaxSim_accuracy@1 value: 0.28 name: Maxsim Accuracy@1 - type: MaxSim_accuracy@3 value: 0.68 name: Maxsim Accuracy@3 - type: MaxSim_accuracy@5 value: 0.78 name: Maxsim Accuracy@5 - type: MaxSim_accuracy@10 value: 0.88 name: Maxsim Accuracy@10 - type: MaxSim_precision@1 value: 0.28 name: Maxsim Precision@1 - type: MaxSim_precision@3 value: 0.28 name: Maxsim Precision@3 - type: MaxSim_precision@5 value: 0.19999999999999996 name: Maxsim Precision@5 - type: MaxSim_precision@10 value: 0.142 name: Maxsim Precision@10 - type: MaxSim_recall@1 value: 0.15833333333333333 name: Maxsim Recall@1 - type: MaxSim_recall@3 value: 0.35999999999999993 name: Maxsim Recall@3 - type: MaxSim_recall@5 value: 0.40399999999999997 name: Maxsim Recall@5 - type: MaxSim_recall@10 value: 0.5263333333333333 name: Maxsim Recall@10 - type: MaxSim_ndcg@10 value: 0.42422936715942183 name: Maxsim Ndcg@10 - type: MaxSim_mrr@10 value: 0.4945555555555556 name: Maxsim Mrr@10 - type: MaxSim_map@100 value: 0.3394857122449798 name: Maxsim Map@100 - task: type: py-late-information-retrieval name: Py Late Information Retrieval dataset: name: NanoDBPedia type: NanoDBPedia metrics: - type: MaxSim_accuracy@1 value: 0.84 name: Maxsim Accuracy@1 - type: MaxSim_accuracy@3 value: 0.94 name: Maxsim Accuracy@3 - type: MaxSim_accuracy@5 value: 0.94 name: Maxsim Accuracy@5 - type: MaxSim_accuracy@10 value: 0.96 name: Maxsim Accuracy@10 - type: MaxSim_precision@1 value: 0.84 name: Maxsim Precision@1 - type: MaxSim_precision@3 value: 0.7133333333333333 name: Maxsim Precision@3 - type: MaxSim_precision@5 value: 0.6639999999999999 name: Maxsim Precision@5 - type: MaxSim_precision@10 value: 0.5840000000000001 name: Maxsim Precision@10 - type: MaxSim_recall@1 value: 0.09765098476549273 name: Maxsim Recall@1 - type: MaxSim_recall@3 value: 0.21493936533978503 name: Maxsim Recall@3 - type: MaxSim_recall@5 value: 0.29263849542716386 name: Maxsim Recall@5 - type: MaxSim_recall@10 value: 0.4078048460655947 name: Maxsim Recall@10 - type: MaxSim_ndcg@10 value: 0.7220216066014423 name: Maxsim Ndcg@10 - type: MaxSim_mrr@10 value: 0.8895238095238095 name: Maxsim Mrr@10 - type: MaxSim_map@100 value: 0.5775350393915936 name: Maxsim Map@100 - task: type: py-late-information-retrieval name: Py Late Information Retrieval dataset: name: NanoFEVER type: NanoFEVER metrics: - type: MaxSim_accuracy@1 value: 0.98 name: Maxsim Accuracy@1 - type: MaxSim_accuracy@3 value: 1.0 name: Maxsim Accuracy@3 - type: MaxSim_accuracy@5 value: 1.0 name: Maxsim Accuracy@5 - type: MaxSim_accuracy@10 value: 1.0 name: Maxsim Accuracy@10 - type: MaxSim_precision@1 value: 0.98 name: Maxsim Precision@1 - type: MaxSim_precision@3 value: 0.35999999999999993 name: Maxsim Precision@3 - type: MaxSim_precision@5 value: 0.21999999999999997 name: Maxsim Precision@5 - type: MaxSim_precision@10 value: 0.10999999999999999 name: Maxsim Precision@10 - type: MaxSim_recall@1 value: 0.9166666666666667 name: Maxsim Recall@1 - type: MaxSim_recall@3 value: 0.97 name: Maxsim Recall@3 - type: MaxSim_recall@5 value: 0.98 name: Maxsim Recall@5 - type: MaxSim_recall@10 value: 0.98 name: Maxsim Recall@10 - type: MaxSim_ndcg@10 value: 0.9746887890888085 name: Maxsim Ndcg@10 - type: MaxSim_mrr@10 value: 0.99 name: Maxsim Mrr@10 - type: MaxSim_map@100 value: 0.9662597402597402 name: Maxsim Map@100 - task: type: py-late-information-retrieval name: Py Late Information Retrieval dataset: name: NanoFiQA2018 type: NanoFiQA2018 metrics: - type: MaxSim_accuracy@1 value: 0.54 name: Maxsim Accuracy@1 - type: MaxSim_accuracy@3 value: 0.68 name: Maxsim Accuracy@3 - type: MaxSim_accuracy@5 value: 0.72 name: Maxsim Accuracy@5 - type: MaxSim_accuracy@10 value: 0.78 name: Maxsim Accuracy@10 - type: MaxSim_precision@1 value: 0.54 name: Maxsim Precision@1 - type: MaxSim_precision@3 value: 0.32 name: Maxsim Precision@3 - type: MaxSim_precision@5 value: 0.24799999999999997 name: Maxsim Precision@5 - type: MaxSim_precision@10 value: 0.144 name: Maxsim Precision@10 - type: MaxSim_recall@1 value: 0.30257936507936506 name: Maxsim Recall@1 - type: MaxSim_recall@3 value: 0.46840476190476193 name: Maxsim Recall@3 - type: MaxSim_recall@5 value: 0.5460079365079364 name: Maxsim Recall@5 - type: MaxSim_recall@10 value: 0.6121984126984128 name: Maxsim Recall@10 - type: MaxSim_ndcg@10 value: 0.543179419158261 name: Maxsim Ndcg@10 - type: MaxSim_mrr@10 value: 0.6155793650793651 name: Maxsim Mrr@10 - type: MaxSim_map@100 value: 0.48231826405604816 name: Maxsim Map@100 - task: type: py-late-information-retrieval name: Py Late Information Retrieval dataset: name: NanoHotpotQA type: NanoHotpotQA metrics: - type: MaxSim_accuracy@1 value: 0.98 name: Maxsim Accuracy@1 - type: MaxSim_accuracy@3 value: 1.0 name: Maxsim Accuracy@3 - type: MaxSim_accuracy@5 value: 1.0 name: Maxsim Accuracy@5 - type: MaxSim_accuracy@10 value: 1.0 name: Maxsim Accuracy@10 - type: MaxSim_precision@1 value: 0.98 name: Maxsim Precision@1 - type: MaxSim_precision@3 value: 0.6066666666666667 name: Maxsim Precision@3 - type: MaxSim_precision@5 value: 0.3679999999999999 name: Maxsim Precision@5 - type: MaxSim_precision@10 value: 0.18599999999999994 name: Maxsim Precision@10 - type: MaxSim_recall@1 value: 0.49 name: Maxsim Recall@1 - type: MaxSim_recall@3 value: 0.91 name: Maxsim Recall@3 - type: MaxSim_recall@5 value: 0.92 name: Maxsim Recall@5 - type: MaxSim_recall@10 value: 0.93 name: Maxsim Recall@10 - type: MaxSim_ndcg@10 value: 0.928244418306152 name: Maxsim Ndcg@10 - type: MaxSim_mrr@10 value: 0.99 name: Maxsim Mrr@10 - type: MaxSim_map@100 value: 0.9025083961789844 name: Maxsim Map@100 - task: type: py-late-information-retrieval name: Py Late Information Retrieval dataset: name: NanoMSMARCO type: NanoMSMARCO metrics: - type: MaxSim_accuracy@1 value: 0.56 name: Maxsim Accuracy@1 - type: MaxSim_accuracy@3 value: 0.68 name: Maxsim Accuracy@3 - type: MaxSim_accuracy@5 value: 0.78 name: Maxsim Accuracy@5 - type: MaxSim_accuracy@10 value: 0.88 name: Maxsim Accuracy@10 - type: MaxSim_precision@1 value: 0.56 name: Maxsim Precision@1 - type: MaxSim_precision@3 value: 0.22666666666666666 name: Maxsim Precision@3 - type: MaxSim_precision@5 value: 0.15600000000000003 name: Maxsim Precision@5 - type: MaxSim_precision@10 value: 0.088 name: Maxsim Precision@10 - type: MaxSim_recall@1 value: 0.56 name: Maxsim Recall@1 - type: MaxSim_recall@3 value: 0.68 name: Maxsim Recall@3 - type: MaxSim_recall@5 value: 0.78 name: Maxsim Recall@5 - type: MaxSim_recall@10 value: 0.88 name: Maxsim Recall@10 - type: MaxSim_ndcg@10 value: 0.7066072782610768 name: Maxsim Ndcg@10 - type: MaxSim_mrr@10 value: 0.6527142857142857 name: Maxsim Mrr@10 - type: MaxSim_map@100 value: 0.6594095238095238 name: Maxsim Map@100 - task: type: py-late-information-retrieval name: Py Late Information Retrieval dataset: name: NanoNFCorpus type: NanoNFCorpus metrics: - type: MaxSim_accuracy@1 value: 0.56 name: Maxsim Accuracy@1 - type: MaxSim_accuracy@3 value: 0.66 name: Maxsim Accuracy@3 - type: MaxSim_accuracy@5 value: 0.68 name: Maxsim Accuracy@5 - type: MaxSim_accuracy@10 value: 0.74 name: Maxsim Accuracy@10 - type: MaxSim_precision@1 value: 0.56 name: Maxsim Precision@1 - type: MaxSim_precision@3 value: 0.42 name: Maxsim Precision@3 - type: MaxSim_precision@5 value: 0.368 name: Maxsim Precision@5 - type: MaxSim_precision@10 value: 0.298 name: Maxsim Precision@10 - type: MaxSim_recall@1 value: 0.06692907683596779 name: Maxsim Recall@1 - type: MaxSim_recall@3 value: 0.10206246733295422 name: Maxsim Recall@3 - type: MaxSim_recall@5 value: 0.12200662252749642 name: Maxsim Recall@5 - type: MaxSim_recall@10 value: 0.15528296036675676 name: Maxsim Recall@10 - type: MaxSim_ndcg@10 value: 0.3873590885021362 name: Maxsim Ndcg@10 - type: MaxSim_mrr@10 value: 0.6126904761904761 name: Maxsim Mrr@10 - type: MaxSim_map@100 value: 0.18205866068341273 name: Maxsim Map@100 - task: type: py-late-information-retrieval name: Py Late Information Retrieval dataset: name: NanoNQ type: NanoNQ metrics: - type: MaxSim_accuracy@1 value: 0.62 name: Maxsim Accuracy@1 - type: MaxSim_accuracy@3 value: 0.8 name: Maxsim Accuracy@3 - type: MaxSim_accuracy@5 value: 0.84 name: Maxsim Accuracy@5 - type: MaxSim_accuracy@10 value: 0.88 name: Maxsim Accuracy@10 - type: MaxSim_precision@1 value: 0.62 name: Maxsim Precision@1 - type: MaxSim_precision@3 value: 0.2733333333333333 name: Maxsim Precision@3 - type: MaxSim_precision@5 value: 0.172 name: Maxsim Precision@5 - type: MaxSim_precision@10 value: 0.09799999999999998 name: Maxsim Precision@10 - type: MaxSim_recall@1 value: 0.58 name: Maxsim Recall@1 - type: MaxSim_recall@3 value: 0.76 name: Maxsim Recall@3 - type: MaxSim_recall@5 value: 0.79 name: Maxsim Recall@5 - type: MaxSim_recall@10 value: 0.86 name: Maxsim Recall@10 - type: MaxSim_ndcg@10 value: 0.7400441315570866 name: Maxsim Ndcg@10 - type: MaxSim_mrr@10 value: 0.7189999999999999 name: Maxsim Mrr@10 - type: MaxSim_map@100 value: 0.6963679394624304 name: Maxsim Map@100 - task: type: py-late-information-retrieval name: Py Late Information Retrieval dataset: name: NanoQuoraRetrieval type: NanoQuoraRetrieval metrics: - type: MaxSim_accuracy@1 value: 0.92 name: Maxsim Accuracy@1 - type: MaxSim_accuracy@3 value: 1.0 name: Maxsim Accuracy@3 - type: MaxSim_accuracy@5 value: 1.0 name: Maxsim Accuracy@5 - type: MaxSim_accuracy@10 value: 1.0 name: Maxsim Accuracy@10 - type: MaxSim_precision@1 value: 0.92 name: Maxsim Precision@1 - type: MaxSim_precision@3 value: 0.3933333333333333 name: Maxsim Precision@3 - type: MaxSim_precision@5 value: 0.24799999999999997 name: Maxsim Precision@5 - type: MaxSim_precision@10 value: 0.13599999999999998 name: Maxsim Precision@10 - type: MaxSim_recall@1 value: 0.7973333333333333 name: Maxsim Recall@1 - type: MaxSim_recall@3 value: 0.9420000000000001 name: Maxsim Recall@3 - type: MaxSim_recall@5 value: 0.9626666666666668 name: Maxsim Recall@5 - type: MaxSim_recall@10 value: 0.9933333333333334 name: Maxsim Recall@10 - type: MaxSim_ndcg@10 value: 0.9480099324300113 name: Maxsim Ndcg@10 - type: MaxSim_mrr@10 value: 0.9566666666666667 name: Maxsim Mrr@10 - type: MaxSim_map@100 value: 0.9210518925518926 name: Maxsim Map@100 - task: type: py-late-information-retrieval name: Py Late Information Retrieval dataset: name: NanoSCIDOCS type: NanoSCIDOCS metrics: - type: MaxSim_accuracy@1 value: 0.5 name: Maxsim Accuracy@1 - type: MaxSim_accuracy@3 value: 0.68 name: Maxsim Accuracy@3 - type: MaxSim_accuracy@5 value: 0.76 name: Maxsim Accuracy@5 - type: MaxSim_accuracy@10 value: 0.86 name: Maxsim Accuracy@10 - type: MaxSim_precision@1 value: 0.5 name: Maxsim Precision@1 - type: MaxSim_precision@3 value: 0.36666666666666664 name: Maxsim Precision@3 - type: MaxSim_precision@5 value: 0.296 name: Maxsim Precision@5 - type: MaxSim_precision@10 value: 0.19999999999999996 name: Maxsim Precision@10 - type: MaxSim_recall@1 value: 0.10466666666666669 name: Maxsim Recall@1 - type: MaxSim_recall@3 value: 0.22666666666666668 name: Maxsim Recall@3 - type: MaxSim_recall@5 value: 0.30266666666666664 name: Maxsim Recall@5 - type: MaxSim_recall@10 value: 0.40766666666666657 name: Maxsim Recall@10 - type: MaxSim_ndcg@10 value: 0.40113887814097937 name: Maxsim Ndcg@10 - type: MaxSim_mrr@10 value: 0.6128809523809524 name: Maxsim Mrr@10 - type: MaxSim_map@100 value: 0.3131237586203457 name: Maxsim Map@100 - task: type: py-late-information-retrieval name: Py Late Information Retrieval dataset: name: NanoArguAna type: NanoArguAna metrics: - type: MaxSim_accuracy@1 value: 0.26 name: Maxsim Accuracy@1 - type: MaxSim_accuracy@3 value: 0.56 name: Maxsim Accuracy@3 - type: MaxSim_accuracy@5 value: 0.68 name: Maxsim Accuracy@5 - type: MaxSim_accuracy@10 value: 0.88 name: Maxsim Accuracy@10 - type: MaxSim_precision@1 value: 0.26 name: Maxsim Precision@1 - type: MaxSim_precision@3 value: 0.18666666666666668 name: Maxsim Precision@3 - type: MaxSim_precision@5 value: 0.136 name: Maxsim Precision@5 - type: MaxSim_precision@10 value: 0.088 name: Maxsim Precision@10 - type: MaxSim_recall@1 value: 0.26 name: Maxsim Recall@1 - type: MaxSim_recall@3 value: 0.56 name: Maxsim Recall@3 - type: MaxSim_recall@5 value: 0.68 name: Maxsim Recall@5 - type: MaxSim_recall@10 value: 0.88 name: Maxsim Recall@10 - type: MaxSim_ndcg@10 value: 0.5560482472286857 name: Maxsim Ndcg@10 - type: MaxSim_mrr@10 value: 0.454095238095238 name: Maxsim Mrr@10 - type: MaxSim_map@100 value: 0.4572284326784326 name: Maxsim Map@100 - task: type: py-late-information-retrieval name: Py Late Information Retrieval dataset: name: NanoSciFact type: NanoSciFact metrics: - type: MaxSim_accuracy@1 value: 0.74 name: Maxsim Accuracy@1 - type: MaxSim_accuracy@3 value: 0.86 name: Maxsim Accuracy@3 - type: MaxSim_accuracy@5 value: 0.92 name: Maxsim Accuracy@5 - type: MaxSim_accuracy@10 value: 0.92 name: Maxsim Accuracy@10 - type: MaxSim_precision@1 value: 0.74 name: Maxsim Precision@1 - type: MaxSim_precision@3 value: 0.3 name: Maxsim Precision@3 - type: MaxSim_precision@5 value: 0.20399999999999996 name: Maxsim Precision@5 - type: MaxSim_precision@10 value: 0.10199999999999998 name: Maxsim Precision@10 - type: MaxSim_recall@1 value: 0.715 name: Maxsim Recall@1 - type: MaxSim_recall@3 value: 0.83 name: Maxsim Recall@3 - type: MaxSim_recall@5 value: 0.91 name: Maxsim Recall@5 - type: MaxSim_recall@10 value: 0.91 name: Maxsim Recall@10 - type: MaxSim_ndcg@10 value: 0.8258399595069874 name: Maxsim Ndcg@10 - type: MaxSim_mrr@10 value: 0.804 name: Maxsim Mrr@10 - type: MaxSim_map@100 value: 0.7954666666666667 name: Maxsim Map@100 - task: type: py-late-information-retrieval name: Py Late Information Retrieval dataset: name: NanoTouche2020 type: NanoTouche2020 metrics: - type: MaxSim_accuracy@1 value: 0.7755102040816326 name: Maxsim Accuracy@1 - type: MaxSim_accuracy@3 value: 0.8979591836734694 name: Maxsim Accuracy@3 - type: MaxSim_accuracy@5 value: 0.9795918367346939 name: Maxsim Accuracy@5 - type: MaxSim_accuracy@10 value: 1.0 name: Maxsim Accuracy@10 - type: MaxSim_precision@1 value: 0.7755102040816326 name: Maxsim Precision@1 - type: MaxSim_precision@3 value: 0.6802721088435373 name: Maxsim Precision@3 - type: MaxSim_precision@5 value: 0.6612244897959183 name: Maxsim Precision@5 - type: MaxSim_precision@10 value: 0.5122448979591837 name: Maxsim Precision@10 - type: MaxSim_recall@1 value: 0.05163796594097508 name: Maxsim Recall@1 - type: MaxSim_recall@3 value: 0.1395393096723396 name: Maxsim Recall@3 - type: MaxSim_recall@5 value: 0.21812762563969756 name: Maxsim Recall@5 - type: MaxSim_recall@10 value: 0.33298037717516343 name: Maxsim Recall@10 - type: MaxSim_ndcg@10 value: 0.5881232935062076 name: Maxsim Ndcg@10 - type: MaxSim_mrr@10 value: 0.8547619047619047 name: Maxsim Mrr@10 - type: MaxSim_map@100 value: 0.4232821896377805 name: Maxsim Map@100 - task: type: nano-beir name: Nano BEIR dataset: name: NanoBEIR mean type: NanoBEIR_mean metrics: - type: MaxSim_accuracy@1 value: 0.6581161695447411 name: Maxsim Accuracy@1 - type: MaxSim_accuracy@3 value: 0.8029199372056516 name: Maxsim Accuracy@3 - type: MaxSim_accuracy@5 value: 0.8522762951334379 name: Maxsim Accuracy@5 - type: MaxSim_accuracy@10 value: 0.9061538461538462 name: Maxsim Accuracy@10 - type: MaxSim_precision@1 value: 0.6581161695447411 name: Maxsim Precision@1 - type: MaxSim_precision@3 value: 0.39437990580847726 name: Maxsim Precision@3 - type: MaxSim_precision@5 value: 0.303171114599686 name: Maxsim Precision@5 - type: MaxSim_precision@10 value: 0.20678806907378336 name: Maxsim Precision@10 - type: MaxSim_recall@1 value: 0.3923690302016769 name: Maxsim Recall@1 - type: MaxSim_recall@3 value: 0.5510471208397314 name: Maxsim Recall@3 - type: MaxSim_recall@5 value: 0.6083164625719715 name: Maxsim Recall@5 - type: MaxSim_recall@10 value: 0.6827384561260968 name: Maxsim Recall@10 - type: MaxSim_ndcg@10 value: 0.6727334161113274 name: Maxsim Ndcg@10 - type: MaxSim_mrr@10 value: 0.7420360195360195 name: Maxsim Mrr@10 - type: MaxSim_map@100 value: 0.5935458627878331 name: Maxsim Map@100 ---
[![Website](https://img.shields.io/badge/LightOn-Website-blue?logo=google-chrome)](https://lighton.ai) [![LinkedIn](https://img.shields.io/badge/LightOn-LinkedIn-0A66C2?logo=linkedin)](https://www.linkedin.com/company/lighton/) [![X](https://img.shields.io/badge/@LightOnIO-X-black?logo=x)](https://x.com/LightOnIO) πŸ“„ [Paper](https://arxiv.org/abs/2602.16609) | πŸ“ [Blog](https://huggingface.co/blog/lightonai/colbert-zero) | πŸ“š [Collection](https://huggingface.co/collections/lightonai/colbert-zero)
# ColBERT-Zero > 🎯 **TL;DR**: First large-scale fully pre-trained ColBERT model using only public data. Achieves **55.43 nDCG@10** on BEIR benchmark, outperforming GTE-ModernColBERT and GTE-ModernBERT trained on closed and stronger data. **New SOTA on BEIR for models <150M parameters**. ## Why ColBERT-Zero? Late interaction (ColBERT / multi-vector) models have clear advantages in out-of-domain generalization, long-context handling, and reasoning-intensive retrieval. Yet they remain undertrained: current state-of-the-art ColBERT models (e.g, [GTE-ModernColBERT](https://huggingface.co/Alibaba-NLP/gte-modernbert-colbert) and [ColBERT-small](https://huggingface.co)) are simply built by bolting a small knowledge distillation step onto a strong dense (single-vector) model. Even recent efforts like [mxbai-edge-colbert-v0](https://huggingface.co/collections/mixedbread-ai/mxbai-edge-colbert-v0-series) perform all early training stages in a single-vector setting, only switching to the multi-vector objective at the very end. **This leaves a lot of performance on the table.** ColBERT-Zero demonstrates that performing contrastive pre-training directly in the multi-vector setting, rather than treating it as an afterthought, unlocks a significantly higher performance ceiling. Trained exclusively on public data ([Nomic-embed](https://arxiv.org/abs/2402.01613) dataset mixture), [ColBERT-Zero](https://huggingface.co/lightonai/ColBERT-Zero) overcomes a 2.4-point data quality disadvantage to outperform models trained on proprietary, closed-source data. For detailed results, please have a look at our [blogpost](https://huggingface.co/blog/lightonai/colbert-zero/) and the [paper](https://arxiv.org/abs/2602.16609). All the [models](https://huggingface.co/collections/lightonai/colbert-zero) (including intermediate checkpoints) as well [training code](https://github.com/lightonai/pylate/tree/main/examples/train/ColBERT-zero) are released under an Apache 2.0 license. ## Controlled Comparison Design We deliberately trained on the public [Nomic-embed](https://arxiv.org/abs/2402.01613) data mixture for a strategic reason: Nomic has already trained a dense ModernBERT model ([ModernBERT-embed](https://huggingface.co/nomic-ai/modernbert-embed-base)) on this exact data. This lets us compare dense vs. multi-vector training with the **same data, same base model ([ModernBERT](https://huggingface.co/answerdotai/ModernBERT-base)), and same pipeline**. The only variable is whether the contrastive phases are performed in the dense or multi-vector setting. This design reveals a striking result: the dense baseline trained on Nomic data scores 52.89, while the one trained on GTE's proprietary data scores 55.33: a 2.4-point data quality gap. Despite this disadvantage, ColBERT-Zero's full multi-vector pre-training pipeline closes and surpasses this gap, reaching **55.43 nDCG@10**. ## The Three-Phase Training Pipeline The development followed a three-phase pipeline, each providing a different type of learning signal: ### Phase 1 - Unsupervised Contrastive Pre-training We began with the [nomic-embed-unsupervised-data](https://huggingface.co/datasets/nomic-ai/nomic-embed-unsupervised-data) dataset. Using [PyLate](https://lightonai.github.io/pylate/)'s **GradCache** implementation to scale per-GPU batch size without VRAM constraints, combined with **cross-GPU gathering** of representations, we reached effective batch sizes of **~16k**, required for unsupervised training to produce plausible in-batch hard negatives. Unlike dense training, the multi-vector objective allows the encoder to learn fine-grained token importance from the very first phase. ### Phase 2 - Supervised Contrastive Fine-tuning We refined the model using the [nomic-embed-supervised-data](https://huggingface.co/datasets/nomic-ai/nomic-embed-supervised-data). This stage introduced mined hard negatives: documents that are superficially similar to the query but not actually relevant. This allows teaching the model to handle nuance by prioritizing specific keywords and contextual tokens most indicative of a true match. ### Phase 3 - Knowledge Distillation (KD) The final stage used the [ms-marco-en-bge](https://huggingface.co/datasets/lightonai/ms-marco-en-bge) dataset. We leveraged a powerful Gemma-based model as a teacher, allowing our student models to learn to replicate complex reasoning scores via the efficient MaxSim operator. ## Key Findings ### 1. The Standard Recipe Leaves Performance on the Table The KD-only approach (the current industry standard) scores 54.09, lagging behind full pre-training by **1.3 points**. A simple distillation step is insufficient for optimal multi-vector performance. ### 2. Supervised + KD Is the Efficiency Sweet Spot By running a supervised contrastive step in the multi-vector setting before distillation, we reach **55.12 nDCG@10**, closing most of the gap with the fully pre-trained model (55.43). This costs **~40 GH200-hours instead of ~408**: roughly **10Γ— cheaper for 99.4% of the performance**.
### 3. Prompt Alignment Is Non-Negotiable Nomic's base models are pre-trained with asymmetric prompts (`search_query:` and `search_document:`). While ColBERT has its own asymmetric mechanism via `[Q]` and `[D]` markers, we found: - **Stripping pre-training prompts during fine-tuning** causes significant performance degradation. - **Adding prompts to a model not pre-trained with them** also hurts performance. - **Even with perfect alignment**, prompts provide an intrinsic benefit: full ColBERT pre-training with prompts (55.43) vs. without prompts (54.61), no mismatch in either case, shows a meaningful 0.82-point gap.
**Why do prompts help?** Our leading hypothesis is that prompt tokens act as **implicit query expansion**: extra slots that don't carry specific meaning but let the model store global information about the sequence. The original ColBERT used `[PAD]` tokens for this purpose, but modern Flash Attention implementations broke this trick (masked tokens no longer produce usable embeddings). Explicit prompt tokens may be quietly re-enabling it. **Practical takeaway:** Always align your prompts with the base model's pre-training setup. Misalignment is one of the easiest ways to silently lose performance. Note that this sensitivity decreases with stronger downstream fine-tuning: with enough training, the model can adapt to an initial mismatch. ## Model Lineup ### The Main Models (ColBERT-Zero) `ColBERT-Zero` utilizes the full 3-phase pipeline with strict prompt alignment, **achieving 55.43 nDCG@10 on BEIR**, setting a new SOTA for models <150M parameters. We also provide `ColBERT-Zero-noprompts`, the same pipeline without asymmetric prompts, to study the impact of query expansion on multi-vector performance. ### The cheap-to-train ones (ModernColBERT-embed-base) These models represent the practical sweet spot. By skipping the expensive unsupervised phase, `ModernColBERT-embed-base` (Supervised + KD) achieves ~97% of the flagship's performance at only ~10% of the compute cost. For reference, `ModernColBERT-embed-base-kd` performs only the distillation step on a supervised dense base. ### Intermediate Checkpoints For researchers studying the incremental impact of each phase and prompt alignment, we release several ablation variants: `ColBERT-Zero-supervised`, `ColBERT-Zero-unsupervised` (and their `-noprompts` versions), and `ModernColBERT-embed-base-supervised`. #### Full Performance on BEIR
Model Avg FiQANFCorpusTREC-COVIDToucheArguAnaQuoraSCIDOCSSciFactNQClimateFEVERHotpotQADBPediaCQADupstackFEVERMSMARCO
Baselines
ModernBERT-embed-unsupervised 47.05 42.5335.3368.4418.5848.8288.6319.8372.3046.3222.9760.0037.9742.4067.3934.23
ModernBERT-embed-supervised 52.89 40.5933.4084.1531.9148.9688.8518.5969.6362.1535.6767.1141.5042.0887.3541.47
GTE-ModernColBERT 54.67 45.2837.9383.5931.2348.5186.6119.0676.3461.8030.6277.3248.0341.0087.4445.32
gte-modernbert-base 55.33 48.8136.4481.9521.6872.6888.5521.2977.4057.6237.7469.4741.7942.6391.0340.90
KD from dense supervised
ModernColBERT-embed-base-kd-only 54.09 42.5137.0179.5234.5851.7587.6718.1575.0461.4528.3176.7047.5440.6884.8245.57
Supervised + KD from dense unsupervised
ModernColBERT-embed-base-supervised 50.72 40.0935.5671.1225.5344.2786.9618.1973.7858.8932.9571.4943.2342.5570.5145.72
ModernColBERT-embed-base 55.12 41.5036.5177.4633.7752.4586.2618.6674.9062.2437.2780.0748.2741.6089.7146.17
ColBERT-Zero
Unsupervised 51.44 45.3836.8867.8222.5951.5387.7822.3076.7658.8024.2468.2943.1645.7681.5838.78
Supervised 51.81 42.4535.6074.7223.8341.8187.1919.8573.7161.9535.0171.3746.2045.1672.6145.68
Distilled 55.43 42.6237.2878.6936.1353.0785.2419.8876.5061.6635.7279.4147.4841.3490.5945.80
ColBERT-Zero-noprompts
Unsupervised 51.70 45.3134.7273.5523.2652.5688.1522.6376.1059.1824.2466.6642.6145.5681.8839.15
Supervised 52.39 43.3636.0172.4223.7947.4287.7921.3073.8562.2531.6170.3244.0744.0385.5442.11
Distilled 54.61 43.1436.6078.6036.3649.4988.0519.1376.4261.7332.7076.9947.6940.2185.9746.01
## Limitations & Discussion - **Data-specific findings.** We deliberately used the Nomic Embed data mixture for controlled comparison. Some observations (particularly around prompt sensitivity) may not generalize to different or stronger training configurations. - **Scale vs. objective.** The gains from multi-vector pre-training likely reflect *more training time* in the multi-vector setting, rather than the contrastive objective itself. Performing KD alone at a larger scale might yield similar or superior results due to the higher quality of the distillation signal. Our study uses the conventional setup where training scale is inversely proportional to signal quality, reflecting the higher cost of generating high-quality labels. - **Prompt sensitivity decreases with stronger fine-tuning.** When experimenting with stronger fine-tuning data (e.g., NV-Retriever), adding prompts on top of a model pre-trained without them did not degrade results the way it did with ColBERT-Zero. With enough downstream training, the model can adapt to an initial mismatch. ## Serving at Scale For production deployment of ColBERT-Zero and other multi-vector models, check out [NextPlaid](https://github.com/lightonai/nextplaid) and [FastPlaid](https://github.com/lightonai/fastplaid), our production-grade engines for multi-vector retrieval. ## Resources - πŸ“¦ **All checkpoints:** [HF Collection](https://huggingface.co/collections/lightonai/colbert-zero) - every phase, with and without prompts - πŸ’» **Code:** [Training boilerplates](https://github.com/lightonai/pylate/tree/main/examples/train/ColBERT-zero) - πŸ“„ **Paper:** [ArXiv](https://arxiv.org/abs/2602.16609) ## Model Details ### Model Description - **Model Type:** PyLate model - **Document Length:** 519 tokens - **Query Length:** 39 tokens - **Output Dimensionality:** 128 tokens - **Similarity Function:** MaxSim - **Training Dataset:** - train ### Model Sources - **Documentation:** [PyLate Documentation](https://lightonai.github.io/pylate/) - **Repository:** [PyLate on GitHub](https://github.com/lightonai/pylate) - **Hugging Face:** [PyLate models on Hugging Face](https://huggingface.co/models?library=PyLate) ### Full Model Architecture ``` ColBERT( (0): Transformer({'max_seq_length': 518, 'do_lower_case': False, 'architecture': 'ModernBertModel'}) (1): Dense({'in_features': 768, 'out_features': 128, 'bias': False, 'activation_function': 'torch.nn.modules.linear.Identity', 'use_residual': False}) ) ``` ## Usage First install the PyLate library: ```bash pip install -U pylate ``` > [!WARNING] > **Prompt alignment is critical for ColBERT-Zero models.** You **must** use `prompt_name="query"` when encoding queries and `prompt_name="document"` when encoding documents. ColBERT-Zero was pre-trained with asymmetric prompts (`search_query:` / `search_document:`), and stripping them causes significant performance. ### Retrieval Use this model with PyLate to index and retrieve documents. The index uses [FastPLAID](https://github.com/lightonai/fast-plaid) for efficient similarity search. #### Indexing documents Load the ColBERT model and initialize the PLAID index, then encode and index your documents: ```python from pylate import indexes, models, retrieve # Step 1: Load the ColBERT model model = models.ColBERT( model_name_or_path="pylate_model_id", ) # Step 2: Initialize the PLAID index index = indexes.PLAID( index_folder="pylate-index", index_name="index", override=True, # This overwrites the existing index if any ) # Step 3: Encode the documents documents_ids = ["1", "2", "3"] documents = ["document 1 text", "document 2 text", "document 3 text"] documents_embeddings = model.encode( documents, batch_size=32, is_query=False, # Ensure that it is set to False to indicate that these are documents, not queries prompt_name="document", # ⚠️ Required for ColBERT-Zero! Do not omit. show_progress_bar=True, ) # Step 4: Add document embeddings to the index by providing embeddings and corresponding ids index.add_documents( documents_ids=documents_ids, documents_embeddings=documents_embeddings, ) ``` Note that you do not have to recreate the index and encode the documents every time. Once you have created an index and added the documents, you can re-use the index later by loading it: ```python # To load an index, simply instantiate it with the correct folder/name and without overriding it index = indexes.PLAID( index_folder="pylate-index", index_name="index", ) ``` #### Retrieving top-k documents for queries Once the documents are indexed, you can retrieve the top-k most relevant documents for a given set of queries. To do so, initialize the ColBERT retriever with the index you want to search in, encode the queries and then retrieve the top-k documents to get the top matches ids and relevance scores: [!WARNING] Always pass prompt_name="query" for queries and prompt_name="document" for documents. Omitting these prompts will silently degrade retrieval quality. ```python # Step 1: Initialize the ColBERT retriever retriever = retrieve.ColBERT(index=index) # Step 2: Encode the queries queries_embeddings = model.encode( ["query for document 3", "query for document 1"], batch_size=32, is_query=True, # # Ensure that it is set to False to indicate that these are queries prompt_name="query", # ⚠️ Required for ColBERT-Zero! Do not omit. show_progress_bar=True, ) # Step 3: Retrieve top-k documents scores = retriever.retrieve( queries_embeddings=queries_embeddings, k=10, # Retrieve the top 10 matches for each query ) ``` ### Reranking > [!WARNING] > Always pass `prompt_name="query"` for queries and `prompt_name="document"` for documents. Omitting these prompts will silently degrade retrieval quality. If you only want to use the ColBERT model to perform reranking on top of your first-stage retrieval pipeline without building an index, you can simply use rank function and pass the queries and documents to rerank: ```python from pylate import rank, models queries = [ "query A", "query B", ] documents = [ ["document A", "document B"], ["document 1", "document C", "document B"], ] documents_ids = [ [1, 2], [1, 3, 2], ] model = models.ColBERT( model_name_or_path="pylate_model_id", ) queries_embeddings = model.encode( queries, is_query=True, prompt_name="query" # ⚠️ Required for ColBERT-Zero! Do not omit. ) documents_embeddings = model.encode( documents, is_query=False, prompt_name="document" # ⚠️ Required for ColBERT-Zero! Do not omit. ) reranked_documents = rank.rerank( documents_ids=documents_ids, queries_embeddings=queries_embeddings, documents_embeddings=documents_embeddings, ) ``` ## Evaluation ### Metrics #### Py Late Information Retrieval * Dataset: `['NanoClimateFEVER', 'NanoDBPedia', 'NanoFEVER', 'NanoFiQA2018', 'NanoHotpotQA', 'NanoMSMARCO', 'NanoNFCorpus', 'NanoNQ', 'NanoQuoraRetrieval', 'NanoSCIDOCS', 'NanoArguAna', 'NanoSciFact', 'NanoTouche2020']` * Evaluated with pylate.evaluation.pylate_information_retrieval_evaluator.PyLateInformationRetrievalEvaluator | Metric | NanoClimateFEVER | NanoDBPedia | NanoFEVER | NanoFiQA2018 | NanoHotpotQA | NanoMSMARCO | NanoNFCorpus | NanoNQ | NanoQuoraRetrieval | NanoSCIDOCS | NanoArguAna | NanoSciFact | NanoTouche2020 | |:--------------------|:-----------------|:------------|:-----------|:-------------|:-------------|:------------|:-------------|:---------|:-------------------|:------------|:------------|:------------|:---------------| | MaxSim_accuracy@1 | 0.28 | 0.84 | 0.98 | 0.54 | 0.98 | 0.56 | 0.56 | 0.62 | 0.92 | 0.5 | 0.26 | 0.74 | 0.7755 | | MaxSim_accuracy@3 | 0.68 | 0.94 | 1.0 | 0.68 | 1.0 | 0.68 | 0.66 | 0.8 | 1.0 | 0.68 | 0.56 | 0.86 | 0.898 | | MaxSim_accuracy@5 | 0.78 | 0.94 | 1.0 | 0.72 | 1.0 | 0.78 | 0.68 | 0.84 | 1.0 | 0.76 | 0.68 | 0.92 | 0.9796 | | MaxSim_accuracy@10 | 0.88 | 0.96 | 1.0 | 0.78 | 1.0 | 0.88 | 0.74 | 0.88 | 1.0 | 0.86 | 0.88 | 0.92 | 1.0 | | MaxSim_precision@1 | 0.28 | 0.84 | 0.98 | 0.54 | 0.98 | 0.56 | 0.56 | 0.62 | 0.92 | 0.5 | 0.26 | 0.74 | 0.7755 | | MaxSim_precision@3 | 0.28 | 0.7133 | 0.36 | 0.32 | 0.6067 | 0.2267 | 0.42 | 0.2733 | 0.3933 | 0.3667 | 0.1867 | 0.3 | 0.6803 | | MaxSim_precision@5 | 0.2 | 0.664 | 0.22 | 0.248 | 0.368 | 0.156 | 0.368 | 0.172 | 0.248 | 0.296 | 0.136 | 0.204 | 0.6612 | | MaxSim_precision@10 | 0.142 | 0.584 | 0.11 | 0.144 | 0.186 | 0.088 | 0.298 | 0.098 | 0.136 | 0.2 | 0.088 | 0.102 | 0.5122 | | MaxSim_recall@1 | 0.1583 | 0.0977 | 0.9167 | 0.3026 | 0.49 | 0.56 | 0.0669 | 0.58 | 0.7973 | 0.1047 | 0.26 | 0.715 | 0.0516 | | MaxSim_recall@3 | 0.36 | 0.2149 | 0.97 | 0.4684 | 0.91 | 0.68 | 0.1021 | 0.76 | 0.942 | 0.2267 | 0.56 | 0.83 | 0.1395 | | MaxSim_recall@5 | 0.404 | 0.2926 | 0.98 | 0.546 | 0.92 | 0.78 | 0.122 | 0.79 | 0.9627 | 0.3027 | 0.68 | 0.91 | 0.2181 | | MaxSim_recall@10 | 0.5263 | 0.4078 | 0.98 | 0.6122 | 0.93 | 0.88 | 0.1553 | 0.86 | 0.9933 | 0.4077 | 0.88 | 0.91 | 0.333 | | **MaxSim_ndcg@10** | **0.4242** | **0.722** | **0.9747** | **0.5432** | **0.9282** | **0.7066** | **0.3874** | **0.74** | **0.948** | **0.4011** | **0.556** | **0.8258** | **0.5881** | | MaxSim_mrr@10 | 0.4946 | 0.8895 | 0.99 | 0.6156 | 0.99 | 0.6527 | 0.6127 | 0.719 | 0.9567 | 0.6129 | 0.4541 | 0.804 | 0.8548 | | MaxSim_map@100 | 0.3395 | 0.5775 | 0.9663 | 0.4823 | 0.9025 | 0.6594 | 0.1821 | 0.6964 | 0.9211 | 0.3131 | 0.4572 | 0.7955 | 0.4233 | #### Nano BEIR * Dataset: `NanoBEIR_mean` * Evaluated with pylate.evaluation.nano_beir_evaluator.NanoBEIREvaluator | Metric | Value | |:--------------------|:-----------| | MaxSim_accuracy@1 | 0.6581 | | MaxSim_accuracy@3 | 0.8029 | | MaxSim_accuracy@5 | 0.8523 | | MaxSim_accuracy@10 | 0.9062 | | MaxSim_precision@1 | 0.6581 | | MaxSim_precision@3 | 0.3944 | | MaxSim_precision@5 | 0.3032 | | MaxSim_precision@10 | 0.2068 | | MaxSim_recall@1 | 0.3924 | | MaxSim_recall@3 | 0.551 | | MaxSim_recall@5 | 0.6083 | | MaxSim_recall@10 | 0.6827 | | **MaxSim_ndcg@10** | **0.6727** | | MaxSim_mrr@10 | 0.742 | | MaxSim_map@100 | 0.5935 | ## Training Details ### Training Dataset #### train * Dataset: train * Size: 640,000 training samples * Columns: query_id, document_ids, and scores * Approximate statistics based on the first 1000 samples: | | query_id | document_ids | scores | |:--------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------|:------------------------------------| | type | int | list | list | | details | | | | * Samples: | query_id | document_ids | scores | |:--------------------|:----------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------| | 685613 | [7546874, 1176459, 197677, 2306318, 8541504, ...] | [0.9999999992804947, 0.24845418756716053, 0.7594154013647826, 0.26644182105618575, 0.390668914839766, ...] | | 237784 | [6366584, 4034101, 2325374, 6914618, 6042146, ...] | [0.9999999991784339, 0.42233632827946693, 0.5956354295491569, 0.12644415907455164, 0.6636713730105909, ...] | | 904294 | [448408, 8743975, 49600, 7339401, 2714261, ...] | [0.9999999991841937, 0.877629062381539, 0.8330146583389045, 0.3116634796692611, 0.4633524534142185, ...] | * Loss: pylate.losses.distillation.Distillation ### Training Hyperparameters #### Non-Default Hyperparameters - `eval_strategy`: steps - `per_device_train_batch_size`: 4 - `per_device_eval_batch_size`: 4 - `gradient_accumulation_steps`: 2 - `learning_rate`: 4e-05 - `num_train_epochs`: 1.0 - `bf16`: True - `dataloader_num_workers`: 4 - `ddp_find_unused_parameters`: False #### All Hyperparameters
Click to expand - `overwrite_output_dir`: False - `do_predict`: False - `eval_strategy`: steps - `prediction_loss_only`: True - `per_device_train_batch_size`: 4 - `per_device_eval_batch_size`: 4 - `per_gpu_train_batch_size`: None - `per_gpu_eval_batch_size`: None - `gradient_accumulation_steps`: 2 - `eval_accumulation_steps`: None - `torch_empty_cache_steps`: None - `learning_rate`: 4e-05 - `weight_decay`: 0.0 - `adam_beta1`: 0.9 - `adam_beta2`: 0.999 - `adam_epsilon`: 1e-08 - `max_grad_norm`: 1.0 - `num_train_epochs`: 1.0 - `max_steps`: -1 - `lr_scheduler_type`: linear - `lr_scheduler_kwargs`: {} - `warmup_ratio`: 0.0 - `warmup_steps`: 0 - `log_level`: passive - `log_level_replica`: warning - `log_on_each_node`: True - `logging_nan_inf_filter`: True - `save_safetensors`: True - `save_on_each_node`: False - `save_only_model`: False - `restore_callback_states_from_checkpoint`: False - `no_cuda`: False - `use_cpu`: False - `use_mps_device`: False - `seed`: 42 - `data_seed`: None - `jit_mode_eval`: False - `use_ipex`: False - `bf16`: True - `fp16`: False - `fp16_opt_level`: O1 - `half_precision_backend`: auto - `bf16_full_eval`: False - `fp16_full_eval`: False - `tf32`: None - `local_rank`: 2 - `ddp_backend`: None - `tpu_num_cores`: None - `tpu_metrics_debug`: False - `debug`: [] - `dataloader_drop_last`: True - `dataloader_num_workers`: 4 - `dataloader_prefetch_factor`: None - `past_index`: -1 - `disable_tqdm`: False - `remove_unused_columns`: True - `label_names`: None - `load_best_model_at_end`: False - `ignore_data_skip`: False - `fsdp`: [] - `fsdp_min_num_params`: 0 - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False} - `fsdp_transformer_layer_cls_to_wrap`: None - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None} - `deepspeed`: None - `label_smoothing_factor`: 0.0 - `optim`: adamw_torch - `optim_args`: None - `adafactor`: False - `group_by_length`: False - `length_column_name`: length - `ddp_find_unused_parameters`: False - `ddp_bucket_cap_mb`: None - `ddp_broadcast_buffers`: False - `dataloader_pin_memory`: True - `dataloader_persistent_workers`: False - `skip_memory_metrics`: True - `use_legacy_prediction_loop`: False - `push_to_hub`: False - `resume_from_checkpoint`: None - `hub_model_id`: None - `hub_strategy`: every_save - `hub_private_repo`: None - `hub_always_push`: False - `gradient_checkpointing`: False - `gradient_checkpointing_kwargs`: None - `include_inputs_for_metrics`: False - `include_for_metrics`: [] - `eval_do_concat_batches`: True - `fp16_backend`: auto - `push_to_hub_model_id`: None - `push_to_hub_organization`: None - `mp_parameters`: - `auto_find_batch_size`: False - `full_determinism`: False - `torchdynamo`: None - `ray_scope`: last - `ddp_timeout`: 1800 - `torch_compile`: False - `torch_compile_backend`: None - `torch_compile_mode`: None - `dispatch_batches`: None - `split_batches`: None - `include_tokens_per_second`: False - `include_num_input_tokens_seen`: False - `neftune_noise_alpha`: None - `optim_target_modules`: None - `batch_eval_metrics`: False - `eval_on_start`: False - `use_liger_kernel`: False - `eval_use_gather_object`: False - `average_tokens_across_devices`: False - `prompts`: None - `batch_sampler`: batch_sampler - `multi_dataset_batch_sampler`: proportional - `router_mapping`: {} - `learning_rate_mapping`: {}
### Training Logs
Click to expand | Epoch | Step | Training Loss | NanoClimateFEVER_MaxSim_ndcg@10 | NanoDBPedia_MaxSim_ndcg@10 | NanoFEVER_MaxSim_ndcg@10 | NanoFiQA2018_MaxSim_ndcg@10 | NanoHotpotQA_MaxSim_ndcg@10 | NanoMSMARCO_MaxSim_ndcg@10 | NanoNFCorpus_MaxSim_ndcg@10 | NanoNQ_MaxSim_ndcg@10 | NanoQuoraRetrieval_MaxSim_ndcg@10 | NanoSCIDOCS_MaxSim_ndcg@10 | NanoArguAna_MaxSim_ndcg@10 | NanoSciFact_MaxSim_ndcg@10 | NanoTouche2020_MaxSim_ndcg@10 | NanoBEIR_mean_MaxSim_ndcg@10 | |:------:|:-----:|:-------------:|:-------------------------------:|:--------------------------:|:------------------------:|:---------------------------:|:---------------------------:|:--------------------------:|:---------------------------:|:---------------------:|:---------------------------------:|:--------------------------:|:--------------------------:|:--------------------------:|:-----------------------------:|:----------------------------:| | 0.0025 | 50 | 0.0192 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.0275 | 550 | 0.0161 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.0525 | 1050 | 0.0146 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.075 | 1500 | 0.0145 | 0.4345 | 0.7035 | 0.9608 | 0.5361 | 0.9348 | 0.6818 | 0.3704 | 0.7291 | 0.9381 | 0.3923 | 0.5558 | 0.8060 | 0.5785 | 0.6632 | | 0.0775 | 1550 | 0.0139 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.1025 | 2050 | 0.0139 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.1275 | 2550 | 0.0129 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.15 | 3000 | 0.0128 | 0.4348 | 0.7184 | 0.9742 | 0.5474 | 0.9354 | 0.6925 | 0.3707 | 0.7316 | 0.9577 | 0.3986 | 0.5715 | 0.8156 | 0.6029 | 0.6732 | | 0.1525 | 3050 | 0.0127 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.1775 | 3550 | 0.0123 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.2025 | 4050 | 0.0123 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.225 | 4500 | 0.0119 | 0.4090 | 0.6891 | 0.9742 | 0.5301 | 0.9347 | 0.6903 | 0.3767 | 0.7251 | 0.9542 | 0.3935 | 0.5705 | 0.8245 | 0.5910 | 0.6664 | | 0.2275 | 4550 | 0.0117 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.2525 | 5050 | 0.012 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.2775 | 5550 | 0.0116 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.3 | 6000 | 0.0114 | 0.4342 | 0.7072 | 0.9698 | 0.5441 | 0.9302 | 0.7098 | 0.3777 | 0.7255 | 0.9533 | 0.4037 | 0.5621 | 0.8294 | 0.6033 | 0.6731 | | 0.3025 | 6050 | 0.0115 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.3275 | 6550 | 0.0114 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.3525 | 7050 | 0.0112 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.375 | 7500 | 0.0116 | 0.4160 | 0.7142 | 0.9722 | 0.5442 | 0.9281 | 0.6993 | 0.3749 | 0.7276 | 0.9494 | 0.4042 | 0.5444 | 0.8346 | 0.5940 | 0.6695 | | 0.3775 | 7550 | 0.0111 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.4025 | 8050 | 0.0106 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.4275 | 8550 | 0.0107 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.45 | 9000 | 0.0103 | 0.4267 | 0.7286 | 0.9722 | 0.5501 | 0.9325 | 0.7014 | 0.3794 | 0.7266 | 0.9487 | 0.4042 | 0.5635 | 0.8247 | 0.5986 | 0.6736 | | 0.4525 | 9050 | 0.0108 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.4775 | 9550 | 0.0106 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.5025 | 10050 | 0.0101 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.525 | 10500 | 0.0102 | 0.4293 | 0.7121 | 0.9731 | 0.5298 | 0.9270 | 0.7058 | 0.3716 | 0.7231 | 0.9452 | 0.4008 | 0.5605 | 0.8185 | 0.5808 | 0.6675 | | 0.5275 | 10550 | 0.0103 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.5525 | 11050 | 0.0101 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.5775 | 11550 | 0.0098 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.6 | 12000 | 0.01 | 0.4250 | 0.7261 | 0.9755 | 0.5208 | 0.9349 | 0.6825 | 0.3794 | 0.7314 | 0.9455 | 0.3970 | 0.5482 | 0.8161 | 0.5875 | 0.6669 | | 0.6025 | 12050 | 0.0101 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.6275 | 12550 | 0.0098 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.6525 | 13050 | 0.0099 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.675 | 13500 | 0.0096 | 0.4303 | 0.7139 | 0.9739 | 0.5517 | 0.9286 | 0.7090 | 0.3857 | 0.7466 | 0.9494 | 0.3902 | 0.5457 | 0.8178 | 0.5991 | 0.6725 | | 0.6775 | 13550 | 0.0097 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.7025 | 14050 | 0.0097 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.7275 | 14550 | 0.0095 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.75 | 15000 | 0.0096 | 0.4406 | 0.7261 | 0.9755 | 0.5440 | 0.9321 | 0.6973 | 0.3761 | 0.7283 | 0.9469 | 0.3958 | 0.5671 | 0.8162 | 0.5827 | 0.6714 | | 0.7525 | 15050 | 0.0096 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.7775 | 15550 | 0.0094 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.8025 | 16050 | 0.009 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.825 | 16500 | 0.0095 | 0.4313 | 0.7244 | 0.9717 | 0.5470 | 0.9280 | 0.7140 | 0.3872 | 0.7388 | 0.9487 | 0.3940 | 0.5719 | 0.8234 | 0.5910 | 0.6747 | | 0.8275 | 16550 | 0.0089 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.8525 | 17050 | 0.0091 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.8775 | 17550 | 0.0091 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.9 | 18000 | 0.0091 | 0.4252 | 0.7205 | 0.9731 | 0.5461 | 0.9268 | 0.7029 | 0.3876 | 0.7445 | 0.9481 | 0.4054 | 0.5564 | 0.8258 | 0.5928 | 0.6735 | | 0.9025 | 18050 | 0.0094 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.9275 | 18550 | 0.0091 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.9525 | 19050 | 0.0089 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | | 0.975 | 19500 | 0.0091 | 0.4242 | 0.7220 | 0.9747 | 0.5432 | 0.9282 | 0.7066 | 0.3874 | 0.7400 | 0.9480 | 0.4011 | 0.5560 | 0.8258 | 0.5881 | 0.6727 | | 0.9775 | 19550 | 0.0093 | - | - | - | - | - | - | - | - | - | - | - | - | - | - |
### Framework Versions - Python: 3.13.0 - Sentence Transformers: 5.1.1 - PyLate: 1.3.4 - Transformers: 4.48.3 - PyTorch: 2.6.0 - Accelerate: 1.12.0 - Datasets: 4.4.1 - Tokenizers: 0.21.0 ## Citation ### BibTeX #### ColBERT-Zero ```bibtex @misc{chaffin2026colbertzeropretrainpretraincolbert, title = {ColBERT-Zero: To Pre-train Or Not To Pre-train ColBERT models}, author = {Antoine Chaffin and Luca Arnaboldi and AmΓ©lie Chatelain and Florent Krzakala}, year = {2026}, eprint = {2602.16609}, archivePrefix = {arXiv}, primaryClass = {cs.CL}, url = {https://arxiv.org/abs/2602.16609}, } ``` #### Sentence Transformers ```bibtex @inproceedings{reimers-2019-sentence-bert, title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks", author = "Reimers, Nils and Gurevych, Iryna", booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing", month = "11", year = "2019", publisher = "Association for Computational Linguistics", url = "https://arxiv.org/abs/1908.10084" } ``` #### PyLate ```bibtex @inproceedings{DBLP:conf/cikm/ChaffinS25, author = {Antoine Chaffin and Rapha{"{e}}l Sourty}, editor = {Meeyoung Cha and Chanyoung Park and Noseong Park and Carl Yang and Senjuti Basu Roy and Jessie Li and Jaap Kamps and Kijung Shin and Bryan Hooi and Lifang He}, title = {PyLate: Flexible Training and Retrieval for Late Interaction Models}, booktitle = {Proceedings of the 34th {ACM} International Conference on Information and Knowledge Management, {CIKM} 2025, Seoul, Republic of Korea, November 10-14, 2025}, pages = {6334--6339}, publisher = {{ACM}}, year = {2025}, url = {https://github.com/lightonai/pylate}, doi = {10.1145/3746252.3761608}, } ``` #### Nomic Embed ```bibtex @article{DBLP:journals/tmlr/NussbaumMMD25, author = {Zach Nussbaum and John Xavier Morris and Andriy Mulyar and Brandon Duderstadt}, title = {Nomic Embed: Training a Reproducible Long Context Text Embedder}, journal = {Trans. Mach. Learn. Res.}, volume = {2025}, year = {2025}, url = {https://openreview.net/forum?id=IPmzyQSiQE}, timestamp = {Fri, 20 Jun 2025 14:19:48 +0200}, biburl = {https://dblp.org/rec/journals/tmlr/NussbaumMMD25.bib}, bibsource = {dblp computer science bibliography, https://dblp.org} } ```