{ "best_global_step": 36000, "best_metric": 0.5250944624924359, "best_model_checkpoint": "ModernBERT-small-distilled-v2/checkpoint-36000", "epoch": 0.6826197428798968, "eval_steps": 1000, "global_step": 36000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0018961659524441578, "grad_norm": 1.531716227531433, "learning_rate": 9.38566552901024e-07, "loss": 4.2698190307617185, "step": 100 }, { "epoch": 0.0037923319048883157, "grad_norm": 1.45695960521698, "learning_rate": 1.8866135760333712e-06, "loss": 4.230399475097657, "step": 200 }, { "epoch": 0.005688497857332474, "grad_norm": 1.4260753393173218, "learning_rate": 2.8346605991657187e-06, "loss": 4.128007202148438, "step": 300 }, { "epoch": 0.007584663809776631, "grad_norm": 1.9604460000991821, "learning_rate": 3.7827076222980664e-06, "loss": 3.8576431274414062, "step": 400 }, { "epoch": 0.00948082976222079, "grad_norm": 0.8248822689056396, "learning_rate": 4.730754645430414e-06, "loss": 3.1561373901367187, "step": 500 }, { "epoch": 0.011376995714664948, "grad_norm": 0.6517618894577026, "learning_rate": 5.678801668562761e-06, "loss": 2.552709503173828, "step": 600 }, { "epoch": 0.013273161667109106, "grad_norm": 0.5630219578742981, "learning_rate": 6.626848691695109e-06, "loss": 2.327459716796875, "step": 700 }, { "epoch": 0.015169327619553263, "grad_norm": 0.4430118799209595, "learning_rate": 7.574895714827455e-06, "loss": 2.2655821228027344, "step": 800 }, { "epoch": 0.017065493571997423, "grad_norm": 0.5574463605880737, "learning_rate": 8.522942737959804e-06, "loss": 2.2401161193847656, "step": 900 }, { "epoch": 0.01896165952444158, "grad_norm": 0.48447561264038086, "learning_rate": 9.47098976109215e-06, "loss": 2.22558349609375, "step": 1000 }, { "epoch": 0.01896165952444158, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.01, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.16999999999999998, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.04, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.06999999999999999, "eval_NanoBEIR_mean_cosine_map@100": 0.030615740568451958, "eval_NanoBEIR_mean_cosine_mrr@10": 0.04062301587301588, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.05451501792713928, "eval_NanoBEIR_mean_cosine_precision@1": 0.01, "eval_NanoBEIR_mean_cosine_precision@10": 0.018000000000000002, "eval_NanoBEIR_mean_cosine_precision@3": 0.013333333333333332, "eval_NanoBEIR_mean_cosine_precision@5": 0.014, "eval_NanoBEIR_mean_cosine_recall@1": 0.005, "eval_NanoBEIR_mean_cosine_recall@10": 0.13, "eval_NanoBEIR_mean_cosine_recall@3": 0.02, "eval_NanoBEIR_mean_cosine_recall@5": 0.04, "eval_NanoHotpotQA_cosine_accuracy@1": 0.02, "eval_NanoHotpotQA_cosine_accuracy@10": 0.18, "eval_NanoHotpotQA_cosine_accuracy@3": 0.08, "eval_NanoHotpotQA_cosine_accuracy@5": 0.12, "eval_NanoHotpotQA_cosine_map@100": 0.03479001595433966, "eval_NanoHotpotQA_cosine_mrr@10": 0.0601904761904762, "eval_NanoHotpotQA_cosine_ndcg@10": 0.05765775039428842, "eval_NanoHotpotQA_cosine_precision@1": 0.02, "eval_NanoHotpotQA_cosine_precision@10": 0.020000000000000004, "eval_NanoHotpotQA_cosine_precision@3": 0.026666666666666665, "eval_NanoHotpotQA_cosine_precision@5": 0.024, "eval_NanoHotpotQA_cosine_recall@1": 0.01, "eval_NanoHotpotQA_cosine_recall@10": 0.1, "eval_NanoHotpotQA_cosine_recall@3": 0.04, "eval_NanoHotpotQA_cosine_recall@5": 0.06, "eval_NanoMSMARCO_cosine_accuracy@1": 0.0, "eval_NanoMSMARCO_cosine_accuracy@10": 0.16, "eval_NanoMSMARCO_cosine_accuracy@3": 0.0, "eval_NanoMSMARCO_cosine_accuracy@5": 0.02, "eval_NanoMSMARCO_cosine_map@100": 0.026441465182564253, "eval_NanoMSMARCO_cosine_mrr@10": 0.021055555555555557, "eval_NanoMSMARCO_cosine_ndcg@10": 0.05137228545999013, "eval_NanoMSMARCO_cosine_precision@1": 0.0, "eval_NanoMSMARCO_cosine_precision@10": 0.016, "eval_NanoMSMARCO_cosine_precision@3": 0.0, "eval_NanoMSMARCO_cosine_precision@5": 0.004, "eval_NanoMSMARCO_cosine_recall@1": 0.0, "eval_NanoMSMARCO_cosine_recall@10": 0.16, "eval_NanoMSMARCO_cosine_recall@3": 0.0, "eval_NanoMSMARCO_cosine_recall@5": 0.02, "eval_mse-dev_negative_mse": -221.21437072753906, "eval_runtime": 11.2871, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.05451501792713928, "eval_steps_per_second": 0.0, "step": 1000 }, { "epoch": 0.020857825476885736, "grad_norm": 0.6331929564476013, "learning_rate": 1.0419036784224499e-05, "loss": 2.213970031738281, "step": 1100 }, { "epoch": 0.022753991429329896, "grad_norm": 0.501175045967102, "learning_rate": 1.1367083807356845e-05, "loss": 2.191977081298828, "step": 1200 }, { "epoch": 0.024650157381774052, "grad_norm": 0.5054857134819031, "learning_rate": 1.2315130830489193e-05, "loss": 2.1839501953125, "step": 1300 }, { "epoch": 0.026546323334218212, "grad_norm": 0.6071318984031677, "learning_rate": 1.326317785362154e-05, "loss": 2.1661726379394532, "step": 1400 }, { "epoch": 0.02844248928666237, "grad_norm": 0.508758008480072, "learning_rate": 1.4211224876753888e-05, "loss": 2.1598078918457033, "step": 1500 }, { "epoch": 0.030338655239106525, "grad_norm": 0.7203693985939026, "learning_rate": 1.5159271899886234e-05, "loss": 2.145241394042969, "step": 1600 }, { "epoch": 0.03223482119155068, "grad_norm": 0.5547841787338257, "learning_rate": 1.6107318923018582e-05, "loss": 2.122596435546875, "step": 1700 }, { "epoch": 0.034130987143994845, "grad_norm": 0.7341112494468689, "learning_rate": 1.705536594615093e-05, "loss": 2.106784210205078, "step": 1800 }, { "epoch": 0.036027153096439, "grad_norm": 0.6560561656951904, "learning_rate": 1.800341296928328e-05, "loss": 2.0941481018066406, "step": 1900 }, { "epoch": 0.03792331904888316, "grad_norm": 0.7147130966186523, "learning_rate": 1.8951459992415623e-05, "loss": 2.0796484375, "step": 2000 }, { "epoch": 0.03792331904888316, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.06, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.2, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.14, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.15, "eval_NanoBEIR_mean_cosine_map@100": 0.09355619049166879, "eval_NanoBEIR_mean_cosine_mrr@10": 0.10269444444444445, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.10765372452496824, "eval_NanoBEIR_mean_cosine_precision@1": 0.06, "eval_NanoBEIR_mean_cosine_precision@10": 0.021, "eval_NanoBEIR_mean_cosine_precision@3": 0.04666666666666666, "eval_NanoBEIR_mean_cosine_precision@5": 0.030000000000000002, "eval_NanoBEIR_mean_cosine_recall@1": 0.045, "eval_NanoBEIR_mean_cosine_recall@10": 0.16499999999999998, "eval_NanoBEIR_mean_cosine_recall@3": 0.11499999999999999, "eval_NanoBEIR_mean_cosine_recall@5": 0.12, "eval_NanoHotpotQA_cosine_accuracy@1": 0.06, "eval_NanoHotpotQA_cosine_accuracy@10": 0.16, "eval_NanoHotpotQA_cosine_accuracy@3": 0.1, "eval_NanoHotpotQA_cosine_accuracy@5": 0.12, "eval_NanoHotpotQA_cosine_map@100": 0.057732668001867715, "eval_NanoHotpotQA_cosine_mrr@10": 0.08650000000000001, "eval_NanoHotpotQA_cosine_ndcg@10": 0.0671827764380485, "eval_NanoHotpotQA_cosine_precision@1": 0.06, "eval_NanoHotpotQA_cosine_precision@10": 0.018, "eval_NanoHotpotQA_cosine_precision@3": 0.03333333333333333, "eval_NanoHotpotQA_cosine_precision@5": 0.024, "eval_NanoHotpotQA_cosine_recall@1": 0.03, "eval_NanoHotpotQA_cosine_recall@10": 0.09, "eval_NanoHotpotQA_cosine_recall@3": 0.05, "eval_NanoHotpotQA_cosine_recall@5": 0.06, "eval_NanoMSMARCO_cosine_accuracy@1": 0.06, "eval_NanoMSMARCO_cosine_accuracy@10": 0.24, "eval_NanoMSMARCO_cosine_accuracy@3": 0.18, "eval_NanoMSMARCO_cosine_accuracy@5": 0.18, "eval_NanoMSMARCO_cosine_map@100": 0.12937971298146986, "eval_NanoMSMARCO_cosine_mrr@10": 0.11888888888888888, "eval_NanoMSMARCO_cosine_ndcg@10": 0.14812467261188797, "eval_NanoMSMARCO_cosine_precision@1": 0.06, "eval_NanoMSMARCO_cosine_precision@10": 0.024000000000000004, "eval_NanoMSMARCO_cosine_precision@3": 0.06, "eval_NanoMSMARCO_cosine_precision@5": 0.036000000000000004, "eval_NanoMSMARCO_cosine_recall@1": 0.06, "eval_NanoMSMARCO_cosine_recall@10": 0.24, "eval_NanoMSMARCO_cosine_recall@3": 0.18, "eval_NanoMSMARCO_cosine_recall@5": 0.18, "eval_mse-dev_negative_mse": -206.88653564453125, "eval_runtime": 12.6634, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.10765372452496824, "eval_steps_per_second": 0.0, "step": 2000 }, { "epoch": 0.039819485001327315, "grad_norm": 0.6483538746833801, "learning_rate": 1.989950701554797e-05, "loss": 2.062061767578125, "step": 2100 }, { "epoch": 0.04171565095377147, "grad_norm": 0.7314621806144714, "learning_rate": 2.084755403868032e-05, "loss": 2.054515838623047, "step": 2200 }, { "epoch": 0.043611816906215635, "grad_norm": 0.7281008362770081, "learning_rate": 2.1795601061812668e-05, "loss": 2.0381907653808593, "step": 2300 }, { "epoch": 0.04550798285865979, "grad_norm": 0.7791172862052917, "learning_rate": 2.2743648084945016e-05, "loss": 2.0266854858398435, "step": 2400 }, { "epoch": 0.04740414881110395, "grad_norm": 0.7451071739196777, "learning_rate": 2.369169510807736e-05, "loss": 2.016678466796875, "step": 2500 }, { "epoch": 0.049300314763548105, "grad_norm": 0.8240593671798706, "learning_rate": 2.463974213120971e-05, "loss": 2.004122619628906, "step": 2600 }, { "epoch": 0.05119648071599226, "grad_norm": 0.8770548701286316, "learning_rate": 2.5587789154342057e-05, "loss": 1.990180206298828, "step": 2700 }, { "epoch": 0.053092646668436425, "grad_norm": 0.8051754236221313, "learning_rate": 2.6535836177474405e-05, "loss": 1.9746481323242187, "step": 2800 }, { "epoch": 0.05498881262088058, "grad_norm": 0.8228394389152527, "learning_rate": 2.7483883200606753e-05, "loss": 1.9650479125976563, "step": 2900 }, { "epoch": 0.05688497857332474, "grad_norm": 0.9059156775474548, "learning_rate": 2.84319302237391e-05, "loss": 1.9538874816894531, "step": 3000 }, { "epoch": 0.05688497857332474, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.05, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.27, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.12000000000000001, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.18, "eval_NanoBEIR_mean_cosine_map@100": 0.10268455521269725, "eval_NanoBEIR_mean_cosine_mrr@10": 0.10739682539682541, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.12427348063841058, "eval_NanoBEIR_mean_cosine_precision@1": 0.05, "eval_NanoBEIR_mean_cosine_precision@10": 0.031, "eval_NanoBEIR_mean_cosine_precision@3": 0.043333333333333335, "eval_NanoBEIR_mean_cosine_precision@5": 0.042, "eval_NanoBEIR_mean_cosine_recall@1": 0.035, "eval_NanoBEIR_mean_cosine_recall@10": 0.215, "eval_NanoBEIR_mean_cosine_recall@3": 0.09, "eval_NanoBEIR_mean_cosine_recall@5": 0.14500000000000002, "eval_NanoHotpotQA_cosine_accuracy@1": 0.06, "eval_NanoHotpotQA_cosine_accuracy@10": 0.3, "eval_NanoHotpotQA_cosine_accuracy@3": 0.14, "eval_NanoHotpotQA_cosine_accuracy@5": 0.2, "eval_NanoHotpotQA_cosine_map@100": 0.09775604930816952, "eval_NanoHotpotQA_cosine_mrr@10": 0.12591269841269842, "eval_NanoHotpotQA_cosine_ndcg@10": 0.1241974731265571, "eval_NanoHotpotQA_cosine_precision@1": 0.06, "eval_NanoHotpotQA_cosine_precision@10": 0.038, "eval_NanoHotpotQA_cosine_precision@3": 0.05333333333333333, "eval_NanoHotpotQA_cosine_precision@5": 0.052000000000000005, "eval_NanoHotpotQA_cosine_recall@1": 0.03, "eval_NanoHotpotQA_cosine_recall@10": 0.19, "eval_NanoHotpotQA_cosine_recall@3": 0.08, "eval_NanoHotpotQA_cosine_recall@5": 0.13, "eval_NanoMSMARCO_cosine_accuracy@1": 0.04, "eval_NanoMSMARCO_cosine_accuracy@10": 0.24, "eval_NanoMSMARCO_cosine_accuracy@3": 0.1, "eval_NanoMSMARCO_cosine_accuracy@5": 0.16, "eval_NanoMSMARCO_cosine_map@100": 0.10761306111722498, "eval_NanoMSMARCO_cosine_mrr@10": 0.0888809523809524, "eval_NanoMSMARCO_cosine_ndcg@10": 0.12434948815026406, "eval_NanoMSMARCO_cosine_precision@1": 0.04, "eval_NanoMSMARCO_cosine_precision@10": 0.024, "eval_NanoMSMARCO_cosine_precision@3": 0.03333333333333333, "eval_NanoMSMARCO_cosine_precision@5": 0.032, "eval_NanoMSMARCO_cosine_recall@1": 0.04, "eval_NanoMSMARCO_cosine_recall@10": 0.24, "eval_NanoMSMARCO_cosine_recall@3": 0.1, "eval_NanoMSMARCO_cosine_recall@5": 0.16, "eval_mse-dev_negative_mse": -194.54396057128906, "eval_runtime": 11.1789, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.12427348063841058, "eval_steps_per_second": 0.0, "step": 3000 }, { "epoch": 0.058781144525768894, "grad_norm": 1.0623186826705933, "learning_rate": 2.937997724687145e-05, "loss": 1.9401417541503907, "step": 3100 }, { "epoch": 0.06067731047821305, "grad_norm": 0.8394317030906677, "learning_rate": 3.032802427000379e-05, "loss": 1.93172607421875, "step": 3200 }, { "epoch": 0.06257347643065721, "grad_norm": 0.7523216009140015, "learning_rate": 3.127607129313614e-05, "loss": 1.9180873107910157, "step": 3300 }, { "epoch": 0.06446964238310136, "grad_norm": 0.8299034237861633, "learning_rate": 3.222411831626849e-05, "loss": 1.9097779846191407, "step": 3400 }, { "epoch": 0.06636580833554552, "grad_norm": 0.7642733454704285, "learning_rate": 3.3172165339400835e-05, "loss": 1.8983055114746095, "step": 3500 }, { "epoch": 0.06826197428798969, "grad_norm": 0.806705892086029, "learning_rate": 3.412021236253318e-05, "loss": 1.8924456787109376, "step": 3600 }, { "epoch": 0.07015814024043385, "grad_norm": 0.786217212677002, "learning_rate": 3.506825938566553e-05, "loss": 1.8805953979492187, "step": 3700 }, { "epoch": 0.072054306192878, "grad_norm": 0.8994006514549255, "learning_rate": 3.601630640879788e-05, "loss": 1.8717079162597656, "step": 3800 }, { "epoch": 0.07395047214532216, "grad_norm": 0.856419026851654, "learning_rate": 3.696435343193023e-05, "loss": 1.8591105651855468, "step": 3900 }, { "epoch": 0.07584663809776632, "grad_norm": 0.9824651479721069, "learning_rate": 3.7912400455062576e-05, "loss": 1.8524658203125, "step": 4000 }, { "epoch": 0.07584663809776632, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.06, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.38, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.19, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.28, "eval_NanoBEIR_mean_cosine_map@100": 0.1284317459612893, "eval_NanoBEIR_mean_cosine_mrr@10": 0.14647222222222223, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.16958431091390092, "eval_NanoBEIR_mean_cosine_precision@1": 0.06, "eval_NanoBEIR_mean_cosine_precision@10": 0.044000000000000004, "eval_NanoBEIR_mean_cosine_precision@3": 0.06666666666666665, "eval_NanoBEIR_mean_cosine_precision@5": 0.06200000000000001, "eval_NanoBEIR_mean_cosine_recall@1": 0.04, "eval_NanoBEIR_mean_cosine_recall@10": 0.30000000000000004, "eval_NanoBEIR_mean_cosine_recall@3": 0.14, "eval_NanoBEIR_mean_cosine_recall@5": 0.215, "eval_NanoHotpotQA_cosine_accuracy@1": 0.08, "eval_NanoHotpotQA_cosine_accuracy@10": 0.44, "eval_NanoHotpotQA_cosine_accuracy@3": 0.22, "eval_NanoHotpotQA_cosine_accuracy@5": 0.32, "eval_NanoHotpotQA_cosine_map@100": 0.12429452794406634, "eval_NanoHotpotQA_cosine_mrr@10": 0.17600000000000002, "eval_NanoHotpotQA_cosine_ndcg@10": 0.1744798681125654, "eval_NanoHotpotQA_cosine_precision@1": 0.08, "eval_NanoHotpotQA_cosine_precision@10": 0.05600000000000001, "eval_NanoHotpotQA_cosine_precision@3": 0.07999999999999999, "eval_NanoHotpotQA_cosine_precision@5": 0.07600000000000001, "eval_NanoHotpotQA_cosine_recall@1": 0.04, "eval_NanoHotpotQA_cosine_recall@10": 0.28, "eval_NanoHotpotQA_cosine_recall@3": 0.12, "eval_NanoHotpotQA_cosine_recall@5": 0.19, "eval_NanoMSMARCO_cosine_accuracy@1": 0.04, "eval_NanoMSMARCO_cosine_accuracy@10": 0.32, "eval_NanoMSMARCO_cosine_accuracy@3": 0.16, "eval_NanoMSMARCO_cosine_accuracy@5": 0.24, "eval_NanoMSMARCO_cosine_map@100": 0.1325689639785123, "eval_NanoMSMARCO_cosine_mrr@10": 0.11694444444444443, "eval_NanoMSMARCO_cosine_ndcg@10": 0.16468875371523642, "eval_NanoMSMARCO_cosine_precision@1": 0.04, "eval_NanoMSMARCO_cosine_precision@10": 0.032, "eval_NanoMSMARCO_cosine_precision@3": 0.05333333333333333, "eval_NanoMSMARCO_cosine_precision@5": 0.04800000000000001, "eval_NanoMSMARCO_cosine_recall@1": 0.04, "eval_NanoMSMARCO_cosine_recall@10": 0.32, "eval_NanoMSMARCO_cosine_recall@3": 0.16, "eval_NanoMSMARCO_cosine_recall@5": 0.24, "eval_mse-dev_negative_mse": -184.20260620117188, "eval_runtime": 11.2486, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.16958431091390092, "eval_steps_per_second": 0.0, "step": 4000 }, { "epoch": 0.07774280405021047, "grad_norm": 0.7716678380966187, "learning_rate": 3.8860447478194924e-05, "loss": 1.8416305541992188, "step": 4100 }, { "epoch": 0.07963897000265463, "grad_norm": 0.8711826801300049, "learning_rate": 3.980849450132727e-05, "loss": 1.8359121704101562, "step": 4200 }, { "epoch": 0.08153513595509879, "grad_norm": 0.9473533630371094, "learning_rate": 4.075654152445961e-05, "loss": 1.825589141845703, "step": 4300 }, { "epoch": 0.08343130190754294, "grad_norm": 0.8626433610916138, "learning_rate": 4.170458854759196e-05, "loss": 1.8131285095214844, "step": 4400 }, { "epoch": 0.0853274678599871, "grad_norm": 0.9295884370803833, "learning_rate": 4.265263557072431e-05, "loss": 1.8063204956054688, "step": 4500 }, { "epoch": 0.08722363381243127, "grad_norm": 0.9008107781410217, "learning_rate": 4.360068259385666e-05, "loss": 1.7949688720703125, "step": 4600 }, { "epoch": 0.08911979976487543, "grad_norm": 0.791011393070221, "learning_rate": 4.4548729616989006e-05, "loss": 1.7845721435546875, "step": 4700 }, { "epoch": 0.09101596571731958, "grad_norm": 0.7334835529327393, "learning_rate": 4.5496776640121354e-05, "loss": 1.7761888122558593, "step": 4800 }, { "epoch": 0.09291213166976374, "grad_norm": 0.9481487274169922, "learning_rate": 4.64448236632537e-05, "loss": 1.7620162963867188, "step": 4900 }, { "epoch": 0.0948082976222079, "grad_norm": 0.870833694934845, "learning_rate": 4.739287068638605e-05, "loss": 1.7605400085449219, "step": 5000 }, { "epoch": 0.0948082976222079, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.1, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.41000000000000003, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.28, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.33999999999999997, "eval_NanoBEIR_mean_cosine_map@100": 0.1626079879266355, "eval_NanoBEIR_mean_cosine_mrr@10": 0.19677777777777777, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.19920527873932037, "eval_NanoBEIR_mean_cosine_precision@1": 0.1, "eval_NanoBEIR_mean_cosine_precision@10": 0.045000000000000005, "eval_NanoBEIR_mean_cosine_precision@3": 0.09666666666666665, "eval_NanoBEIR_mean_cosine_precision@5": 0.07, "eval_NanoBEIR_mean_cosine_recall@1": 0.07, "eval_NanoBEIR_mean_cosine_recall@10": 0.305, "eval_NanoBEIR_mean_cosine_recall@3": 0.2, "eval_NanoBEIR_mean_cosine_recall@5": 0.24, "eval_NanoHotpotQA_cosine_accuracy@1": 0.12, "eval_NanoHotpotQA_cosine_accuracy@10": 0.5, "eval_NanoHotpotQA_cosine_accuracy@3": 0.34, "eval_NanoHotpotQA_cosine_accuracy@5": 0.42, "eval_NanoHotpotQA_cosine_map@100": 0.15098296694670035, "eval_NanoHotpotQA_cosine_mrr@10": 0.23677777777777778, "eval_NanoHotpotQA_cosine_ndcg@10": 0.20242024631804575, "eval_NanoHotpotQA_cosine_precision@1": 0.12, "eval_NanoHotpotQA_cosine_precision@10": 0.05800000000000001, "eval_NanoHotpotQA_cosine_precision@3": 0.11999999999999998, "eval_NanoHotpotQA_cosine_precision@5": 0.08800000000000001, "eval_NanoHotpotQA_cosine_recall@1": 0.06, "eval_NanoHotpotQA_cosine_recall@10": 0.29, "eval_NanoHotpotQA_cosine_recall@3": 0.18, "eval_NanoHotpotQA_cosine_recall@5": 0.22, "eval_NanoMSMARCO_cosine_accuracy@1": 0.08, "eval_NanoMSMARCO_cosine_accuracy@10": 0.32, "eval_NanoMSMARCO_cosine_accuracy@3": 0.22, "eval_NanoMSMARCO_cosine_accuracy@5": 0.26, "eval_NanoMSMARCO_cosine_map@100": 0.17423300890657065, "eval_NanoMSMARCO_cosine_mrr@10": 0.15677777777777777, "eval_NanoMSMARCO_cosine_ndcg@10": 0.195990311160595, "eval_NanoMSMARCO_cosine_precision@1": 0.08, "eval_NanoMSMARCO_cosine_precision@10": 0.032, "eval_NanoMSMARCO_cosine_precision@3": 0.07333333333333333, "eval_NanoMSMARCO_cosine_precision@5": 0.052000000000000005, "eval_NanoMSMARCO_cosine_recall@1": 0.08, "eval_NanoMSMARCO_cosine_recall@10": 0.32, "eval_NanoMSMARCO_cosine_recall@3": 0.22, "eval_NanoMSMARCO_cosine_recall@5": 0.26, "eval_mse-dev_negative_mse": -175.1685333251953, "eval_runtime": 11.7971, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.19920527873932037, "eval_steps_per_second": 0.0, "step": 5000 }, { "epoch": 0.09670446357465205, "grad_norm": 0.9423368573188782, "learning_rate": 4.83409177095184e-05, "loss": 1.7480519104003907, "step": 5100 }, { "epoch": 0.09860062952709621, "grad_norm": 0.980880856513977, "learning_rate": 4.9288964732650746e-05, "loss": 1.7419432067871095, "step": 5200 }, { "epoch": 0.10049679547954037, "grad_norm": 0.8834021687507629, "learning_rate": 5.0237011755783095e-05, "loss": 1.730076446533203, "step": 5300 }, { "epoch": 0.10239296143198452, "grad_norm": 0.9464291930198669, "learning_rate": 5.118505877891544e-05, "loss": 1.727989959716797, "step": 5400 }, { "epoch": 0.10428912738442868, "grad_norm": 0.9521955251693726, "learning_rate": 5.213310580204779e-05, "loss": 1.7130671691894532, "step": 5500 }, { "epoch": 0.10618529333687285, "grad_norm": 0.8180538415908813, "learning_rate": 5.308115282518014e-05, "loss": 1.7063189697265626, "step": 5600 }, { "epoch": 0.108081459289317, "grad_norm": 0.9113965034484863, "learning_rate": 5.402919984831249e-05, "loss": 1.695858917236328, "step": 5700 }, { "epoch": 0.10997762524176116, "grad_norm": 0.8348143100738525, "learning_rate": 5.497724687144482e-05, "loss": 1.6884242248535157, "step": 5800 }, { "epoch": 0.11187379119420532, "grad_norm": 1.00839364528656, "learning_rate": 5.592529389457717e-05, "loss": 1.6800929260253907, "step": 5900 }, { "epoch": 0.11376995714664948, "grad_norm": 0.9566198587417603, "learning_rate": 5.687334091770952e-05, "loss": 1.6699765014648438, "step": 6000 }, { "epoch": 0.11376995714664948, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.15, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.42000000000000004, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.29000000000000004, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.35, "eval_NanoBEIR_mean_cosine_map@100": 0.19699705475888196, "eval_NanoBEIR_mean_cosine_mrr@10": 0.2369126984126984, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.2321280636169713, "eval_NanoBEIR_mean_cosine_precision@1": 0.15, "eval_NanoBEIR_mean_cosine_precision@10": 0.04700000000000001, "eval_NanoBEIR_mean_cosine_precision@3": 0.09999999999999999, "eval_NanoBEIR_mean_cosine_precision@5": 0.07600000000000001, "eval_NanoBEIR_mean_cosine_recall@1": 0.105, "eval_NanoBEIR_mean_cosine_recall@10": 0.33, "eval_NanoBEIR_mean_cosine_recall@3": 0.22000000000000003, "eval_NanoBEIR_mean_cosine_recall@5": 0.27, "eval_NanoHotpotQA_cosine_accuracy@1": 0.18, "eval_NanoHotpotQA_cosine_accuracy@10": 0.46, "eval_NanoHotpotQA_cosine_accuracy@3": 0.3, "eval_NanoHotpotQA_cosine_accuracy@5": 0.38, "eval_NanoHotpotQA_cosine_map@100": 0.17208604019775084, "eval_NanoHotpotQA_cosine_mrr@10": 0.2664126984126984, "eval_NanoHotpotQA_cosine_ndcg@10": 0.21500115424853145, "eval_NanoHotpotQA_cosine_precision@1": 0.18, "eval_NanoHotpotQA_cosine_precision@10": 0.05600000000000001, "eval_NanoHotpotQA_cosine_precision@3": 0.10666666666666666, "eval_NanoHotpotQA_cosine_precision@5": 0.08800000000000001, "eval_NanoHotpotQA_cosine_recall@1": 0.09, "eval_NanoHotpotQA_cosine_recall@10": 0.28, "eval_NanoHotpotQA_cosine_recall@3": 0.16, "eval_NanoHotpotQA_cosine_recall@5": 0.22, "eval_NanoMSMARCO_cosine_accuracy@1": 0.12, "eval_NanoMSMARCO_cosine_accuracy@10": 0.38, "eval_NanoMSMARCO_cosine_accuracy@3": 0.28, "eval_NanoMSMARCO_cosine_accuracy@5": 0.32, "eval_NanoMSMARCO_cosine_map@100": 0.2219080693200131, "eval_NanoMSMARCO_cosine_mrr@10": 0.20741269841269844, "eval_NanoMSMARCO_cosine_ndcg@10": 0.24925497298541116, "eval_NanoMSMARCO_cosine_precision@1": 0.12, "eval_NanoMSMARCO_cosine_precision@10": 0.038000000000000006, "eval_NanoMSMARCO_cosine_precision@3": 0.09333333333333332, "eval_NanoMSMARCO_cosine_precision@5": 0.064, "eval_NanoMSMARCO_cosine_recall@1": 0.12, "eval_NanoMSMARCO_cosine_recall@10": 0.38, "eval_NanoMSMARCO_cosine_recall@3": 0.28, "eval_NanoMSMARCO_cosine_recall@5": 0.32, "eval_mse-dev_negative_mse": -166.4923858642578, "eval_runtime": 10.5014, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.2321280636169713, "eval_steps_per_second": 0.0, "step": 6000 }, { "epoch": 0.11566612309909363, "grad_norm": 0.8698049783706665, "learning_rate": 5.7821387940841866e-05, "loss": 1.6636680603027343, "step": 6100 }, { "epoch": 0.11756228905153779, "grad_norm": 0.88554447889328, "learning_rate": 5.8769434963974214e-05, "loss": 1.6543186950683593, "step": 6200 }, { "epoch": 0.11945845500398194, "grad_norm": 0.9408504366874695, "learning_rate": 5.971748198710656e-05, "loss": 1.6451298522949218, "step": 6300 }, { "epoch": 0.1213546209564261, "grad_norm": 0.8811279535293579, "learning_rate": 6.066552901023891e-05, "loss": 1.6382298278808594, "step": 6400 }, { "epoch": 0.12325078690887026, "grad_norm": 0.9638504385948181, "learning_rate": 6.161357603337125e-05, "loss": 1.6278233337402344, "step": 6500 }, { "epoch": 0.12514695286131441, "grad_norm": 0.9717722535133362, "learning_rate": 6.25616230565036e-05, "loss": 1.62345458984375, "step": 6600 }, { "epoch": 0.12704311881375857, "grad_norm": 1.0567059516906738, "learning_rate": 6.350967007963595e-05, "loss": 1.6149652099609375, "step": 6700 }, { "epoch": 0.12893928476620273, "grad_norm": 0.9955742359161377, "learning_rate": 6.44577171027683e-05, "loss": 1.6053521728515625, "step": 6800 }, { "epoch": 0.13083545071864688, "grad_norm": 1.0742182731628418, "learning_rate": 6.540576412590064e-05, "loss": 1.6007347106933594, "step": 6900 }, { "epoch": 0.13273161667109104, "grad_norm": 0.9622364044189453, "learning_rate": 6.6353811149033e-05, "loss": 1.587445068359375, "step": 7000 }, { "epoch": 0.13273161667109104, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.2, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.46, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.28, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.33, "eval_NanoBEIR_mean_cosine_map@100": 0.2218708796716416, "eval_NanoBEIR_mean_cosine_mrr@10": 0.2654285714285714, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.257882927462247, "eval_NanoBEIR_mean_cosine_precision@1": 0.2, "eval_NanoBEIR_mean_cosine_precision@10": 0.052000000000000005, "eval_NanoBEIR_mean_cosine_precision@3": 0.09666666666666665, "eval_NanoBEIR_mean_cosine_precision@5": 0.07, "eval_NanoBEIR_mean_cosine_recall@1": 0.145, "eval_NanoBEIR_mean_cosine_recall@10": 0.36, "eval_NanoBEIR_mean_cosine_recall@3": 0.21500000000000002, "eval_NanoBEIR_mean_cosine_recall@5": 0.255, "eval_NanoHotpotQA_cosine_accuracy@1": 0.22, "eval_NanoHotpotQA_cosine_accuracy@10": 0.52, "eval_NanoHotpotQA_cosine_accuracy@3": 0.28, "eval_NanoHotpotQA_cosine_accuracy@5": 0.34, "eval_NanoHotpotQA_cosine_map@100": 0.1843725858934317, "eval_NanoHotpotQA_cosine_mrr@10": 0.2868571428571428, "eval_NanoHotpotQA_cosine_ndcg@10": 0.23487212685023443, "eval_NanoHotpotQA_cosine_precision@1": 0.22, "eval_NanoHotpotQA_cosine_precision@10": 0.064, "eval_NanoHotpotQA_cosine_precision@3": 0.09999999999999998, "eval_NanoHotpotQA_cosine_precision@5": 0.07600000000000001, "eval_NanoHotpotQA_cosine_recall@1": 0.11, "eval_NanoHotpotQA_cosine_recall@10": 0.32, "eval_NanoHotpotQA_cosine_recall@3": 0.15, "eval_NanoHotpotQA_cosine_recall@5": 0.19, "eval_NanoMSMARCO_cosine_accuracy@1": 0.18, "eval_NanoMSMARCO_cosine_accuracy@10": 0.4, "eval_NanoMSMARCO_cosine_accuracy@3": 0.28, "eval_NanoMSMARCO_cosine_accuracy@5": 0.32, "eval_NanoMSMARCO_cosine_map@100": 0.2593691734498515, "eval_NanoMSMARCO_cosine_mrr@10": 0.24400000000000002, "eval_NanoMSMARCO_cosine_ndcg@10": 0.28089372807425955, "eval_NanoMSMARCO_cosine_precision@1": 0.18, "eval_NanoMSMARCO_cosine_precision@10": 0.04, "eval_NanoMSMARCO_cosine_precision@3": 0.09333333333333332, "eval_NanoMSMARCO_cosine_precision@5": 0.064, "eval_NanoMSMARCO_cosine_recall@1": 0.18, "eval_NanoMSMARCO_cosine_recall@10": 0.4, "eval_NanoMSMARCO_cosine_recall@3": 0.28, "eval_NanoMSMARCO_cosine_recall@5": 0.32, "eval_mse-dev_negative_mse": -158.10133361816406, "eval_runtime": 11.0707, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.257882927462247, "eval_steps_per_second": 0.0, "step": 7000 }, { "epoch": 0.13462778262353522, "grad_norm": 0.979546070098877, "learning_rate": 6.730185817216534e-05, "loss": 1.582412109375, "step": 7100 }, { "epoch": 0.13652394857597938, "grad_norm": 1.0893486738204956, "learning_rate": 6.82499051952977e-05, "loss": 1.57244384765625, "step": 7200 }, { "epoch": 0.13842011452842354, "grad_norm": 1.0537185668945312, "learning_rate": 6.919795221843004e-05, "loss": 1.5668838500976563, "step": 7300 }, { "epoch": 0.1403162804808677, "grad_norm": 0.9376671314239502, "learning_rate": 7.014599924156239e-05, "loss": 1.553501739501953, "step": 7400 }, { "epoch": 0.14221244643331185, "grad_norm": 0.9399901032447815, "learning_rate": 7.109404626469473e-05, "loss": 1.5449533081054687, "step": 7500 }, { "epoch": 0.144108612385756, "grad_norm": 0.88112473487854, "learning_rate": 7.204209328782709e-05, "loss": 1.5345271301269532, "step": 7600 }, { "epoch": 0.14600477833820016, "grad_norm": 0.9386707544326782, "learning_rate": 7.299014031095943e-05, "loss": 1.5340492248535156, "step": 7700 }, { "epoch": 0.14790094429064432, "grad_norm": 0.942371129989624, "learning_rate": 7.393818733409178e-05, "loss": 1.5242007446289063, "step": 7800 }, { "epoch": 0.14979711024308848, "grad_norm": 0.8463137745857239, "learning_rate": 7.488623435722411e-05, "loss": 1.5181001281738282, "step": 7900 }, { "epoch": 0.15169327619553263, "grad_norm": 0.9643734693527222, "learning_rate": 7.583428138035647e-05, "loss": 1.5085635375976563, "step": 8000 }, { "epoch": 0.15169327619553263, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.21, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.48, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.31000000000000005, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.33999999999999997, "eval_NanoBEIR_mean_cosine_map@100": 0.2338424020963123, "eval_NanoBEIR_mean_cosine_mrr@10": 0.28084126984126984, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.27053479230771704, "eval_NanoBEIR_mean_cosine_precision@1": 0.21, "eval_NanoBEIR_mean_cosine_precision@10": 0.053000000000000005, "eval_NanoBEIR_mean_cosine_precision@3": 0.10666666666666666, "eval_NanoBEIR_mean_cosine_precision@5": 0.07600000000000001, "eval_NanoBEIR_mean_cosine_recall@1": 0.15, "eval_NanoBEIR_mean_cosine_recall@10": 0.375, "eval_NanoBEIR_mean_cosine_recall@3": 0.23, "eval_NanoBEIR_mean_cosine_recall@5": 0.27, "eval_NanoHotpotQA_cosine_accuracy@1": 0.24, "eval_NanoHotpotQA_cosine_accuracy@10": 0.52, "eval_NanoHotpotQA_cosine_accuracy@3": 0.34, "eval_NanoHotpotQA_cosine_accuracy@5": 0.36, "eval_NanoHotpotQA_cosine_map@100": 0.20036400143179198, "eval_NanoHotpotQA_cosine_mrr@10": 0.30996825396825395, "eval_NanoHotpotQA_cosine_ndcg@10": 0.24538724835027803, "eval_NanoHotpotQA_cosine_precision@1": 0.24, "eval_NanoHotpotQA_cosine_precision@10": 0.062, "eval_NanoHotpotQA_cosine_precision@3": 0.11999999999999998, "eval_NanoHotpotQA_cosine_precision@5": 0.08800000000000001, "eval_NanoHotpotQA_cosine_recall@1": 0.12, "eval_NanoHotpotQA_cosine_recall@10": 0.31, "eval_NanoHotpotQA_cosine_recall@3": 0.18, "eval_NanoHotpotQA_cosine_recall@5": 0.22, "eval_NanoMSMARCO_cosine_accuracy@1": 0.18, "eval_NanoMSMARCO_cosine_accuracy@10": 0.44, "eval_NanoMSMARCO_cosine_accuracy@3": 0.28, "eval_NanoMSMARCO_cosine_accuracy@5": 0.32, "eval_NanoMSMARCO_cosine_map@100": 0.2673208027608326, "eval_NanoMSMARCO_cosine_mrr@10": 0.2517142857142857, "eval_NanoMSMARCO_cosine_ndcg@10": 0.295682336265156, "eval_NanoMSMARCO_cosine_precision@1": 0.18, "eval_NanoMSMARCO_cosine_precision@10": 0.044000000000000004, "eval_NanoMSMARCO_cosine_precision@3": 0.09333333333333332, "eval_NanoMSMARCO_cosine_precision@5": 0.064, "eval_NanoMSMARCO_cosine_recall@1": 0.18, "eval_NanoMSMARCO_cosine_recall@10": 0.44, "eval_NanoMSMARCO_cosine_recall@3": 0.28, "eval_NanoMSMARCO_cosine_recall@5": 0.32, "eval_mse-dev_negative_mse": -150.10321044921875, "eval_runtime": 10.6864, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.27053479230771704, "eval_steps_per_second": 0.0, "step": 8000 }, { "epoch": 0.1535894421479768, "grad_norm": 0.8630809187889099, "learning_rate": 7.677284793325749e-05, "loss": 1.5007017517089845, "step": 8100 }, { "epoch": 0.15548560810042095, "grad_norm": 0.8799474835395813, "learning_rate": 7.772089495638985e-05, "loss": 1.4950062561035156, "step": 8200 }, { "epoch": 0.1573817740528651, "grad_norm": 0.9594865441322327, "learning_rate": 7.866894197952219e-05, "loss": 1.4829434204101561, "step": 8300 }, { "epoch": 0.15927794000530926, "grad_norm": 0.8919075727462769, "learning_rate": 7.961698900265454e-05, "loss": 1.4779867553710937, "step": 8400 }, { "epoch": 0.16117410595775342, "grad_norm": 0.9076706767082214, "learning_rate": 8.056503602578687e-05, "loss": 1.4736830139160155, "step": 8500 }, { "epoch": 0.16307027191019757, "grad_norm": 0.8629969954490662, "learning_rate": 8.151308304891923e-05, "loss": 1.4603062438964844, "step": 8600 }, { "epoch": 0.16496643786264173, "grad_norm": 0.969744086265564, "learning_rate": 8.246113007205157e-05, "loss": 1.451029052734375, "step": 8700 }, { "epoch": 0.16686260381508589, "grad_norm": 0.9152198433876038, "learning_rate": 8.340917709518392e-05, "loss": 1.4499801635742187, "step": 8800 }, { "epoch": 0.16875876976753004, "grad_norm": 0.7964587211608887, "learning_rate": 8.435722411831626e-05, "loss": 1.440777587890625, "step": 8900 }, { "epoch": 0.1706549357199742, "grad_norm": 0.9044669270515442, "learning_rate": 8.530527114144862e-05, "loss": 1.4372213745117188, "step": 9000 }, { "epoch": 0.1706549357199742, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.23, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.51, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.33999999999999997, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.4, "eval_NanoBEIR_mean_cosine_map@100": 0.25323404391343074, "eval_NanoBEIR_mean_cosine_mrr@10": 0.3023690476190476, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.2928600617450003, "eval_NanoBEIR_mean_cosine_precision@1": 0.23, "eval_NanoBEIR_mean_cosine_precision@10": 0.058, "eval_NanoBEIR_mean_cosine_precision@3": 0.12, "eval_NanoBEIR_mean_cosine_precision@5": 0.08800000000000001, "eval_NanoBEIR_mean_cosine_recall@1": 0.16, "eval_NanoBEIR_mean_cosine_recall@10": 0.405, "eval_NanoBEIR_mean_cosine_recall@3": 0.255, "eval_NanoBEIR_mean_cosine_recall@5": 0.31, "eval_NanoHotpotQA_cosine_accuracy@1": 0.28, "eval_NanoHotpotQA_cosine_accuracy@10": 0.56, "eval_NanoHotpotQA_cosine_accuracy@3": 0.38, "eval_NanoHotpotQA_cosine_accuracy@5": 0.44, "eval_NanoHotpotQA_cosine_map@100": 0.2338868196262281, "eval_NanoHotpotQA_cosine_mrr@10": 0.3493809523809523, "eval_NanoHotpotQA_cosine_ndcg@10": 0.2824008390246955, "eval_NanoHotpotQA_cosine_precision@1": 0.28, "eval_NanoHotpotQA_cosine_precision@10": 0.07, "eval_NanoHotpotQA_cosine_precision@3": 0.13999999999999999, "eval_NanoHotpotQA_cosine_precision@5": 0.10400000000000001, "eval_NanoHotpotQA_cosine_recall@1": 0.14, "eval_NanoHotpotQA_cosine_recall@10": 0.35, "eval_NanoHotpotQA_cosine_recall@3": 0.21, "eval_NanoHotpotQA_cosine_recall@5": 0.26, "eval_NanoMSMARCO_cosine_accuracy@1": 0.18, "eval_NanoMSMARCO_cosine_accuracy@10": 0.46, "eval_NanoMSMARCO_cosine_accuracy@3": 0.3, "eval_NanoMSMARCO_cosine_accuracy@5": 0.36, "eval_NanoMSMARCO_cosine_map@100": 0.27258126820063344, "eval_NanoMSMARCO_cosine_mrr@10": 0.2553571428571429, "eval_NanoMSMARCO_cosine_ndcg@10": 0.3033192844653051, "eval_NanoMSMARCO_cosine_precision@1": 0.18, "eval_NanoMSMARCO_cosine_precision@10": 0.046, "eval_NanoMSMARCO_cosine_precision@3": 0.1, "eval_NanoMSMARCO_cosine_precision@5": 0.07200000000000001, "eval_NanoMSMARCO_cosine_recall@1": 0.18, "eval_NanoMSMARCO_cosine_recall@10": 0.46, "eval_NanoMSMARCO_cosine_recall@3": 0.3, "eval_NanoMSMARCO_cosine_recall@5": 0.36, "eval_mse-dev_negative_mse": -142.8462371826172, "eval_runtime": 10.0151, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.2928600617450003, "eval_steps_per_second": 0.0, "step": 9000 }, { "epoch": 0.17255110167241838, "grad_norm": 0.9912068843841553, "learning_rate": 8.625331816458096e-05, "loss": 1.4270211791992187, "step": 9100 }, { "epoch": 0.17444726762486254, "grad_norm": 0.9523755311965942, "learning_rate": 8.720136518771332e-05, "loss": 1.4232991027832032, "step": 9200 }, { "epoch": 0.1763434335773067, "grad_norm": 0.9893079996109009, "learning_rate": 8.814941221084566e-05, "loss": 1.4135417175292968, "step": 9300 }, { "epoch": 0.17823959952975085, "grad_norm": 0.8273277282714844, "learning_rate": 8.909745923397801e-05, "loss": 1.4074359130859375, "step": 9400 }, { "epoch": 0.180135765482195, "grad_norm": 0.9652109146118164, "learning_rate": 9.004550625711035e-05, "loss": 1.3981039428710937, "step": 9500 }, { "epoch": 0.18203193143463917, "grad_norm": 0.9654005169868469, "learning_rate": 9.099355328024271e-05, "loss": 1.3918597412109375, "step": 9600 }, { "epoch": 0.18392809738708332, "grad_norm": 1.0751373767852783, "learning_rate": 9.194160030337505e-05, "loss": 1.3844194030761718, "step": 9700 }, { "epoch": 0.18582426333952748, "grad_norm": 0.8573171496391296, "learning_rate": 9.28896473265074e-05, "loss": 1.3740664672851564, "step": 9800 }, { "epoch": 0.18772042929197164, "grad_norm": 0.9025856256484985, "learning_rate": 9.383769434963975e-05, "loss": 1.368533172607422, "step": 9900 }, { "epoch": 0.1896165952444158, "grad_norm": 0.936182975769043, "learning_rate": 9.47857413727721e-05, "loss": 1.3668016052246095, "step": 10000 }, { "epoch": 0.1896165952444158, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.26, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.55, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.34, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.41000000000000003, "eval_NanoBEIR_mean_cosine_map@100": 0.2717915991834402, "eval_NanoBEIR_mean_cosine_mrr@10": 0.3298809523809524, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.31266733186022544, "eval_NanoBEIR_mean_cosine_precision@1": 0.26, "eval_NanoBEIR_mean_cosine_precision@10": 0.061000000000000006, "eval_NanoBEIR_mean_cosine_precision@3": 0.12666666666666665, "eval_NanoBEIR_mean_cosine_precision@5": 0.09200000000000001, "eval_NanoBEIR_mean_cosine_recall@1": 0.175, "eval_NanoBEIR_mean_cosine_recall@10": 0.43, "eval_NanoBEIR_mean_cosine_recall@3": 0.26, "eval_NanoBEIR_mean_cosine_recall@5": 0.32, "eval_NanoHotpotQA_cosine_accuracy@1": 0.34, "eval_NanoHotpotQA_cosine_accuracy@10": 0.6, "eval_NanoHotpotQA_cosine_accuracy@3": 0.4, "eval_NanoHotpotQA_cosine_accuracy@5": 0.46, "eval_NanoHotpotQA_cosine_map@100": 0.260384776010371, "eval_NanoHotpotQA_cosine_mrr@10": 0.3953809523809524, "eval_NanoHotpotQA_cosine_ndcg@10": 0.3058907512098868, "eval_NanoHotpotQA_cosine_precision@1": 0.34, "eval_NanoHotpotQA_cosine_precision@10": 0.07200000000000001, "eval_NanoHotpotQA_cosine_precision@3": 0.15999999999999998, "eval_NanoHotpotQA_cosine_precision@5": 0.11200000000000002, "eval_NanoHotpotQA_cosine_recall@1": 0.17, "eval_NanoHotpotQA_cosine_recall@10": 0.36, "eval_NanoHotpotQA_cosine_recall@3": 0.24, "eval_NanoHotpotQA_cosine_recall@5": 0.28, "eval_NanoMSMARCO_cosine_accuracy@1": 0.18, "eval_NanoMSMARCO_cosine_accuracy@10": 0.5, "eval_NanoMSMARCO_cosine_accuracy@3": 0.28, "eval_NanoMSMARCO_cosine_accuracy@5": 0.36, "eval_NanoMSMARCO_cosine_map@100": 0.28319842235650944, "eval_NanoMSMARCO_cosine_mrr@10": 0.2643809523809524, "eval_NanoMSMARCO_cosine_ndcg@10": 0.31944391251056414, "eval_NanoMSMARCO_cosine_precision@1": 0.18, "eval_NanoMSMARCO_cosine_precision@10": 0.05, "eval_NanoMSMARCO_cosine_precision@3": 0.09333333333333332, "eval_NanoMSMARCO_cosine_precision@5": 0.07200000000000001, "eval_NanoMSMARCO_cosine_recall@1": 0.18, "eval_NanoMSMARCO_cosine_recall@10": 0.5, "eval_NanoMSMARCO_cosine_recall@3": 0.28, "eval_NanoMSMARCO_cosine_recall@5": 0.36, "eval_mse-dev_negative_mse": -135.70806884765625, "eval_runtime": 11.1158, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.31266733186022544, "eval_steps_per_second": 0.0, "step": 10000 }, { "epoch": 0.19151276119685995, "grad_norm": 0.9819117784500122, "learning_rate": 9.573378839590444e-05, "loss": 1.35683349609375, "step": 10100 }, { "epoch": 0.1934089271493041, "grad_norm": 0.9364531636238098, "learning_rate": 9.667235494880547e-05, "loss": 1.3505201721191407, "step": 10200 }, { "epoch": 0.19530509310174826, "grad_norm": 1.0975953340530396, "learning_rate": 9.762040197193781e-05, "loss": 1.3433110046386718, "step": 10300 }, { "epoch": 0.19720125905419242, "grad_norm": 0.8945000171661377, "learning_rate": 9.856844899507016e-05, "loss": 1.3337992858886718, "step": 10400 }, { "epoch": 0.19909742500663657, "grad_norm": 0.90827876329422, "learning_rate": 9.95164960182025e-05, "loss": 1.3294851684570312, "step": 10500 }, { "epoch": 0.20099359095908073, "grad_norm": 1.0766637325286865, "learning_rate": 9.994838193156919e-05, "loss": 1.3274673461914062, "step": 10600 }, { "epoch": 0.2028897569115249, "grad_norm": 0.9869415760040283, "learning_rate": 9.984303893477163e-05, "loss": 1.3149089050292968, "step": 10700 }, { "epoch": 0.20478592286396904, "grad_norm": 0.9914052486419678, "learning_rate": 9.973769593797405e-05, "loss": 1.3119027709960938, "step": 10800 }, { "epoch": 0.2066820888164132, "grad_norm": 0.8931730389595032, "learning_rate": 9.963235294117647e-05, "loss": 1.30553466796875, "step": 10900 }, { "epoch": 0.20857825476885736, "grad_norm": 0.9103732705116272, "learning_rate": 9.95270099443789e-05, "loss": 1.2952238464355468, "step": 11000 }, { "epoch": 0.20857825476885736, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.28, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.52, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.4, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.49, "eval_NanoBEIR_mean_cosine_map@100": 0.2958090237794817, "eval_NanoBEIR_mean_cosine_mrr@10": 0.3554563492063492, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.3271554490713938, "eval_NanoBEIR_mean_cosine_precision@1": 0.28, "eval_NanoBEIR_mean_cosine_precision@10": 0.06100000000000001, "eval_NanoBEIR_mean_cosine_precision@3": 0.14666666666666667, "eval_NanoBEIR_mean_cosine_precision@5": 0.10800000000000001, "eval_NanoBEIR_mean_cosine_recall@1": 0.185, "eval_NanoBEIR_mean_cosine_recall@10": 0.41500000000000004, "eval_NanoBEIR_mean_cosine_recall@3": 0.30500000000000005, "eval_NanoBEIR_mean_cosine_recall@5": 0.375, "eval_NanoHotpotQA_cosine_accuracy@1": 0.38, "eval_NanoHotpotQA_cosine_accuracy@10": 0.6, "eval_NanoHotpotQA_cosine_accuracy@3": 0.46, "eval_NanoHotpotQA_cosine_accuracy@5": 0.56, "eval_NanoHotpotQA_cosine_map@100": 0.2970541527466325, "eval_NanoHotpotQA_cosine_mrr@10": 0.4416904761904762, "eval_NanoHotpotQA_cosine_ndcg@10": 0.34337262327682183, "eval_NanoHotpotQA_cosine_precision@1": 0.38, "eval_NanoHotpotQA_cosine_precision@10": 0.07800000000000001, "eval_NanoHotpotQA_cosine_precision@3": 0.18, "eval_NanoHotpotQA_cosine_precision@5": 0.132, "eval_NanoHotpotQA_cosine_recall@1": 0.19, "eval_NanoHotpotQA_cosine_recall@10": 0.39, "eval_NanoHotpotQA_cosine_recall@3": 0.27, "eval_NanoHotpotQA_cosine_recall@5": 0.33, "eval_NanoMSMARCO_cosine_accuracy@1": 0.18, "eval_NanoMSMARCO_cosine_accuracy@10": 0.44, "eval_NanoMSMARCO_cosine_accuracy@3": 0.34, "eval_NanoMSMARCO_cosine_accuracy@5": 0.42, "eval_NanoMSMARCO_cosine_map@100": 0.29456389481233086, "eval_NanoMSMARCO_cosine_mrr@10": 0.26922222222222225, "eval_NanoMSMARCO_cosine_ndcg@10": 0.31093827486596576, "eval_NanoMSMARCO_cosine_precision@1": 0.18, "eval_NanoMSMARCO_cosine_precision@10": 0.044000000000000004, "eval_NanoMSMARCO_cosine_precision@3": 0.11333333333333333, "eval_NanoMSMARCO_cosine_precision@5": 0.084, "eval_NanoMSMARCO_cosine_recall@1": 0.18, "eval_NanoMSMARCO_cosine_recall@10": 0.44, "eval_NanoMSMARCO_cosine_recall@3": 0.34, "eval_NanoMSMARCO_cosine_recall@5": 0.42, "eval_mse-dev_negative_mse": -129.20640563964844, "eval_runtime": 10.3813, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.3271554490713938, "eval_steps_per_second": 0.0, "step": 11000 }, { "epoch": 0.21047442072130154, "grad_norm": 1.1640655994415283, "learning_rate": 9.942166694758133e-05, "loss": 1.2919923400878905, "step": 11100 }, { "epoch": 0.2123705866737457, "grad_norm": 0.9011592864990234, "learning_rate": 9.931632395078376e-05, "loss": 1.2851214599609375, "step": 11200 }, { "epoch": 0.21426675262618985, "grad_norm": 0.9254733324050903, "learning_rate": 9.921098095398619e-05, "loss": 1.2769430541992188, "step": 11300 }, { "epoch": 0.216162918578634, "grad_norm": 0.9079636931419373, "learning_rate": 9.910563795718862e-05, "loss": 1.2746614837646484, "step": 11400 }, { "epoch": 0.21805908453107817, "grad_norm": 0.9787989258766174, "learning_rate": 9.900029496039104e-05, "loss": 1.268571014404297, "step": 11500 }, { "epoch": 0.21995525048352232, "grad_norm": 0.8455345630645752, "learning_rate": 9.889495196359346e-05, "loss": 1.2683941650390624, "step": 11600 }, { "epoch": 0.22185141643596648, "grad_norm": 0.9073353409767151, "learning_rate": 9.878960896679589e-05, "loss": 1.2581684875488282, "step": 11700 }, { "epoch": 0.22374758238841064, "grad_norm": 0.8951073288917542, "learning_rate": 9.868426596999832e-05, "loss": 1.258204574584961, "step": 11800 }, { "epoch": 0.2256437483408548, "grad_norm": 1.0486690998077393, "learning_rate": 9.857892297320075e-05, "loss": 1.247862319946289, "step": 11900 }, { "epoch": 0.22753991429329895, "grad_norm": 0.8603843450546265, "learning_rate": 9.847357997640317e-05, "loss": 1.241845016479492, "step": 12000 }, { "epoch": 0.22753991429329895, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.28, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.6, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.41000000000000003, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.45, "eval_NanoBEIR_mean_cosine_map@100": 0.302852595589562, "eval_NanoBEIR_mean_cosine_mrr@10": 0.36785317460317457, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.3493080002249725, "eval_NanoBEIR_mean_cosine_precision@1": 0.28, "eval_NanoBEIR_mean_cosine_precision@10": 0.069, "eval_NanoBEIR_mean_cosine_precision@3": 0.14666666666666667, "eval_NanoBEIR_mean_cosine_precision@5": 0.10200000000000001, "eval_NanoBEIR_mean_cosine_recall@1": 0.19, "eval_NanoBEIR_mean_cosine_recall@10": 0.47, "eval_NanoBEIR_mean_cosine_recall@3": 0.31, "eval_NanoBEIR_mean_cosine_recall@5": 0.355, "eval_NanoHotpotQA_cosine_accuracy@1": 0.36, "eval_NanoHotpotQA_cosine_accuracy@10": 0.7, "eval_NanoHotpotQA_cosine_accuracy@3": 0.46, "eval_NanoHotpotQA_cosine_accuracy@5": 0.5, "eval_NanoHotpotQA_cosine_map@100": 0.2899683891353945, "eval_NanoHotpotQA_cosine_mrr@10": 0.4411031746031746, "eval_NanoHotpotQA_cosine_ndcg@10": 0.3546937420389296, "eval_NanoHotpotQA_cosine_precision@1": 0.36, "eval_NanoHotpotQA_cosine_precision@10": 0.088, "eval_NanoHotpotQA_cosine_precision@3": 0.1733333333333333, "eval_NanoHotpotQA_cosine_precision@5": 0.124, "eval_NanoHotpotQA_cosine_recall@1": 0.18, "eval_NanoHotpotQA_cosine_recall@10": 0.44, "eval_NanoHotpotQA_cosine_recall@3": 0.26, "eval_NanoHotpotQA_cosine_recall@5": 0.31, "eval_NanoMSMARCO_cosine_accuracy@1": 0.2, "eval_NanoMSMARCO_cosine_accuracy@10": 0.5, "eval_NanoMSMARCO_cosine_accuracy@3": 0.36, "eval_NanoMSMARCO_cosine_accuracy@5": 0.4, "eval_NanoMSMARCO_cosine_map@100": 0.31573680204372945, "eval_NanoMSMARCO_cosine_mrr@10": 0.2946031746031746, "eval_NanoMSMARCO_cosine_ndcg@10": 0.34392225841101537, "eval_NanoMSMARCO_cosine_precision@1": 0.2, "eval_NanoMSMARCO_cosine_precision@10": 0.05, "eval_NanoMSMARCO_cosine_precision@3": 0.12, "eval_NanoMSMARCO_cosine_precision@5": 0.08, "eval_NanoMSMARCO_cosine_recall@1": 0.2, "eval_NanoMSMARCO_cosine_recall@10": 0.5, "eval_NanoMSMARCO_cosine_recall@3": 0.36, "eval_NanoMSMARCO_cosine_recall@5": 0.4, "eval_mse-dev_negative_mse": -123.62611389160156, "eval_runtime": 10.308, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.3493080002249725, "eval_steps_per_second": 0.0, "step": 12000 }, { "epoch": 0.2294360802457431, "grad_norm": 0.9303557276725769, "learning_rate": 9.83682369796056e-05, "loss": 1.240003662109375, "step": 12100 }, { "epoch": 0.23133224619818726, "grad_norm": 0.9165602326393127, "learning_rate": 9.826289398280803e-05, "loss": 1.232986068725586, "step": 12200 }, { "epoch": 0.23322841215063142, "grad_norm": 0.8384730815887451, "learning_rate": 9.815755098601045e-05, "loss": 1.2288270568847657, "step": 12300 }, { "epoch": 0.23512457810307558, "grad_norm": 0.9244160652160645, "learning_rate": 9.805326141918085e-05, "loss": 1.223012924194336, "step": 12400 }, { "epoch": 0.23702074405551973, "grad_norm": 1.01241135597229, "learning_rate": 9.794791842238329e-05, "loss": 1.2164186096191407, "step": 12500 }, { "epoch": 0.2389169100079639, "grad_norm": 0.9336892366409302, "learning_rate": 9.784257542558571e-05, "loss": 1.2156867980957031, "step": 12600 }, { "epoch": 0.24081307596040805, "grad_norm": 0.9515780210494995, "learning_rate": 9.773723242878813e-05, "loss": 1.2165725708007813, "step": 12700 }, { "epoch": 0.2427092419128522, "grad_norm": 0.8875882029533386, "learning_rate": 9.763188943199057e-05, "loss": 1.2044532775878907, "step": 12800 }, { "epoch": 0.24460540786529636, "grad_norm": 0.8906784057617188, "learning_rate": 9.7526546435193e-05, "loss": 1.2034928131103515, "step": 12900 }, { "epoch": 0.24650157381774052, "grad_norm": 0.860988438129425, "learning_rate": 9.742120343839543e-05, "loss": 1.1968316650390625, "step": 13000 }, { "epoch": 0.24650157381774052, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.25, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.5700000000000001, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.38, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.45999999999999996, "eval_NanoBEIR_mean_cosine_map@100": 0.2897002867515749, "eval_NanoBEIR_mean_cosine_mrr@10": 0.3452896825396825, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.3305841730427018, "eval_NanoBEIR_mean_cosine_precision@1": 0.25, "eval_NanoBEIR_mean_cosine_precision@10": 0.065, "eval_NanoBEIR_mean_cosine_precision@3": 0.1433333333333333, "eval_NanoBEIR_mean_cosine_precision@5": 0.10200000000000001, "eval_NanoBEIR_mean_cosine_recall@1": 0.165, "eval_NanoBEIR_mean_cosine_recall@10": 0.45, "eval_NanoBEIR_mean_cosine_recall@3": 0.30000000000000004, "eval_NanoBEIR_mean_cosine_recall@5": 0.36, "eval_NanoHotpotQA_cosine_accuracy@1": 0.34, "eval_NanoHotpotQA_cosine_accuracy@10": 0.64, "eval_NanoHotpotQA_cosine_accuracy@3": 0.42, "eval_NanoHotpotQA_cosine_accuracy@5": 0.5, "eval_NanoHotpotQA_cosine_map@100": 0.2823695142784583, "eval_NanoHotpotQA_cosine_mrr@10": 0.417079365079365, "eval_NanoHotpotQA_cosine_ndcg@10": 0.332921649409912, "eval_NanoHotpotQA_cosine_precision@1": 0.34, "eval_NanoHotpotQA_cosine_precision@10": 0.08, "eval_NanoHotpotQA_cosine_precision@3": 0.1733333333333333, "eval_NanoHotpotQA_cosine_precision@5": 0.12000000000000002, "eval_NanoHotpotQA_cosine_recall@1": 0.17, "eval_NanoHotpotQA_cosine_recall@10": 0.4, "eval_NanoHotpotQA_cosine_recall@3": 0.26, "eval_NanoHotpotQA_cosine_recall@5": 0.3, "eval_NanoMSMARCO_cosine_accuracy@1": 0.16, "eval_NanoMSMARCO_cosine_accuracy@10": 0.5, "eval_NanoMSMARCO_cosine_accuracy@3": 0.34, "eval_NanoMSMARCO_cosine_accuracy@5": 0.42, "eval_NanoMSMARCO_cosine_map@100": 0.29703105922469153, "eval_NanoMSMARCO_cosine_mrr@10": 0.2735, "eval_NanoMSMARCO_cosine_ndcg@10": 0.3282466966754917, "eval_NanoMSMARCO_cosine_precision@1": 0.16, "eval_NanoMSMARCO_cosine_precision@10": 0.05, "eval_NanoMSMARCO_cosine_precision@3": 0.11333333333333333, "eval_NanoMSMARCO_cosine_precision@5": 0.084, "eval_NanoMSMARCO_cosine_recall@1": 0.16, "eval_NanoMSMARCO_cosine_recall@10": 0.5, "eval_NanoMSMARCO_cosine_recall@3": 0.34, "eval_NanoMSMARCO_cosine_recall@5": 0.42, "eval_mse-dev_negative_mse": -118.86907958984375, "eval_runtime": 11.1772, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.3305841730427018, "eval_steps_per_second": 0.0, "step": 13000 }, { "epoch": 0.2483977397701847, "grad_norm": 0.944284200668335, "learning_rate": 9.731586044159785e-05, "loss": 1.1941532135009765, "step": 13100 }, { "epoch": 0.25029390572262883, "grad_norm": 0.846736490726471, "learning_rate": 9.721051744480028e-05, "loss": 1.189548873901367, "step": 13200 }, { "epoch": 0.252190071675073, "grad_norm": 0.9077499508857727, "learning_rate": 9.71051744480027e-05, "loss": 1.184281463623047, "step": 13300 }, { "epoch": 0.25408623762751714, "grad_norm": 0.9021602869033813, "learning_rate": 9.699983145120512e-05, "loss": 1.1755128479003907, "step": 13400 }, { "epoch": 0.2559824035799613, "grad_norm": 0.9804133772850037, "learning_rate": 9.689448845440755e-05, "loss": 1.175633773803711, "step": 13500 }, { "epoch": 0.25787856953240545, "grad_norm": 0.8400120139122009, "learning_rate": 9.678914545760998e-05, "loss": 1.1707258605957032, "step": 13600 }, { "epoch": 0.25977473548484964, "grad_norm": 0.8351007103919983, "learning_rate": 9.668380246081241e-05, "loss": 1.1637205505371093, "step": 13700 }, { "epoch": 0.26167090143729377, "grad_norm": 0.9614461064338684, "learning_rate": 9.657845946401483e-05, "loss": 1.1684355926513672, "step": 13800 }, { "epoch": 0.26356706738973795, "grad_norm": 0.9544349312782288, "learning_rate": 9.647311646721725e-05, "loss": 1.162786636352539, "step": 13900 }, { "epoch": 0.2654632333421821, "grad_norm": 0.8563331365585327, "learning_rate": 9.636777347041969e-05, "loss": 1.1585095977783204, "step": 14000 }, { "epoch": 0.2654632333421821, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.28, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.62, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.42, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.5, "eval_NanoBEIR_mean_cosine_map@100": 0.31463974475417134, "eval_NanoBEIR_mean_cosine_mrr@10": 0.3753571428571429, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.36789514654221167, "eval_NanoBEIR_mean_cosine_precision@1": 0.28, "eval_NanoBEIR_mean_cosine_precision@10": 0.072, "eval_NanoBEIR_mean_cosine_precision@3": 0.16, "eval_NanoBEIR_mean_cosine_precision@5": 0.116, "eval_NanoBEIR_mean_cosine_recall@1": 0.185, "eval_NanoBEIR_mean_cosine_recall@10": 0.515, "eval_NanoBEIR_mean_cosine_recall@3": 0.32999999999999996, "eval_NanoBEIR_mean_cosine_recall@5": 0.405, "eval_NanoHotpotQA_cosine_accuracy@1": 0.38, "eval_NanoHotpotQA_cosine_accuracy@10": 0.62, "eval_NanoHotpotQA_cosine_accuracy@3": 0.48, "eval_NanoHotpotQA_cosine_accuracy@5": 0.54, "eval_NanoHotpotQA_cosine_map@100": 0.3112110057061059, "eval_NanoHotpotQA_cosine_mrr@10": 0.4472222222222223, "eval_NanoHotpotQA_cosine_ndcg@10": 0.3578654483822233, "eval_NanoHotpotQA_cosine_precision@1": 0.38, "eval_NanoHotpotQA_cosine_precision@10": 0.08199999999999999, "eval_NanoHotpotQA_cosine_precision@3": 0.2, "eval_NanoHotpotQA_cosine_precision@5": 0.14, "eval_NanoHotpotQA_cosine_recall@1": 0.19, "eval_NanoHotpotQA_cosine_recall@10": 0.41, "eval_NanoHotpotQA_cosine_recall@3": 0.3, "eval_NanoHotpotQA_cosine_recall@5": 0.35, "eval_NanoMSMARCO_cosine_accuracy@1": 0.18, "eval_NanoMSMARCO_cosine_accuracy@10": 0.62, "eval_NanoMSMARCO_cosine_accuracy@3": 0.36, "eval_NanoMSMARCO_cosine_accuracy@5": 0.46, "eval_NanoMSMARCO_cosine_map@100": 0.31806848380223673, "eval_NanoMSMARCO_cosine_mrr@10": 0.3034920634920635, "eval_NanoMSMARCO_cosine_ndcg@10": 0.3779248447022001, "eval_NanoMSMARCO_cosine_precision@1": 0.18, "eval_NanoMSMARCO_cosine_precision@10": 0.062, "eval_NanoMSMARCO_cosine_precision@3": 0.12, "eval_NanoMSMARCO_cosine_precision@5": 0.092, "eval_NanoMSMARCO_cosine_recall@1": 0.18, "eval_NanoMSMARCO_cosine_recall@10": 0.62, "eval_NanoMSMARCO_cosine_recall@3": 0.36, "eval_NanoMSMARCO_cosine_recall@5": 0.46, "eval_mse-dev_negative_mse": -115.4122085571289, "eval_runtime": 14.015, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.36789514654221167, "eval_steps_per_second": 0.0, "step": 14000 }, { "epoch": 0.26735939929462627, "grad_norm": 0.9541077017784119, "learning_rate": 9.626243047362213e-05, "loss": 1.160166244506836, "step": 14100 }, { "epoch": 0.26925556524707045, "grad_norm": 1.0204025506973267, "learning_rate": 9.615708747682455e-05, "loss": 1.1503668212890625, "step": 14200 }, { "epoch": 0.2711517311995146, "grad_norm": 1.0752142667770386, "learning_rate": 9.605174448002698e-05, "loss": 1.1483226776123048, "step": 14300 }, { "epoch": 0.27304789715195876, "grad_norm": 0.9642768502235413, "learning_rate": 9.59464014832294e-05, "loss": 1.1488003540039062, "step": 14400 }, { "epoch": 0.2749440631044029, "grad_norm": 0.8722686171531677, "learning_rate": 9.584105848643182e-05, "loss": 1.139219741821289, "step": 14500 }, { "epoch": 0.2768402290568471, "grad_norm": 0.9259271025657654, "learning_rate": 9.573676891960223e-05, "loss": 1.134266128540039, "step": 14600 }, { "epoch": 0.2787363950092912, "grad_norm": 1.019303560256958, "learning_rate": 9.563142592280465e-05, "loss": 1.136265869140625, "step": 14700 }, { "epoch": 0.2806325609617354, "grad_norm": 0.9323062300682068, "learning_rate": 9.552608292600709e-05, "loss": 1.1342037200927735, "step": 14800 }, { "epoch": 0.2825287269141795, "grad_norm": 0.8613787293434143, "learning_rate": 9.542073992920951e-05, "loss": 1.132669448852539, "step": 14900 }, { "epoch": 0.2844248928666237, "grad_norm": 0.9772534966468811, "learning_rate": 9.531539693241194e-05, "loss": 1.1218692779541015, "step": 15000 }, { "epoch": 0.2844248928666237, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.29000000000000004, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.63, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.43, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.55, "eval_NanoBEIR_mean_cosine_map@100": 0.32670175603229334, "eval_NanoBEIR_mean_cosine_mrr@10": 0.38711111111111113, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.3792944580347569, "eval_NanoBEIR_mean_cosine_precision@1": 0.29000000000000004, "eval_NanoBEIR_mean_cosine_precision@10": 0.07500000000000001, "eval_NanoBEIR_mean_cosine_precision@3": 0.16, "eval_NanoBEIR_mean_cosine_precision@5": 0.126, "eval_NanoBEIR_mean_cosine_recall@1": 0.195, "eval_NanoBEIR_mean_cosine_recall@10": 0.525, "eval_NanoBEIR_mean_cosine_recall@3": 0.32999999999999996, "eval_NanoBEIR_mean_cosine_recall@5": 0.44, "eval_NanoHotpotQA_cosine_accuracy@1": 0.38, "eval_NanoHotpotQA_cosine_accuracy@10": 0.66, "eval_NanoHotpotQA_cosine_accuracy@3": 0.5, "eval_NanoHotpotQA_cosine_accuracy@5": 0.6, "eval_NanoHotpotQA_cosine_map@100": 0.3244366589749346, "eval_NanoHotpotQA_cosine_mrr@10": 0.46277777777777784, "eval_NanoHotpotQA_cosine_ndcg@10": 0.37914250624163204, "eval_NanoHotpotQA_cosine_precision@1": 0.38, "eval_NanoHotpotQA_cosine_precision@10": 0.09, "eval_NanoHotpotQA_cosine_precision@3": 0.2, "eval_NanoHotpotQA_cosine_precision@5": 0.15200000000000002, "eval_NanoHotpotQA_cosine_recall@1": 0.19, "eval_NanoHotpotQA_cosine_recall@10": 0.45, "eval_NanoHotpotQA_cosine_recall@3": 0.3, "eval_NanoHotpotQA_cosine_recall@5": 0.38, "eval_NanoMSMARCO_cosine_accuracy@1": 0.2, "eval_NanoMSMARCO_cosine_accuracy@10": 0.6, "eval_NanoMSMARCO_cosine_accuracy@3": 0.36, "eval_NanoMSMARCO_cosine_accuracy@5": 0.5, "eval_NanoMSMARCO_cosine_map@100": 0.3289668530896521, "eval_NanoMSMARCO_cosine_mrr@10": 0.3114444444444444, "eval_NanoMSMARCO_cosine_ndcg@10": 0.37944640982788175, "eval_NanoMSMARCO_cosine_precision@1": 0.2, "eval_NanoMSMARCO_cosine_precision@10": 0.06000000000000001, "eval_NanoMSMARCO_cosine_precision@3": 0.12, "eval_NanoMSMARCO_cosine_precision@5": 0.1, "eval_NanoMSMARCO_cosine_recall@1": 0.2, "eval_NanoMSMARCO_cosine_recall@10": 0.6, "eval_NanoMSMARCO_cosine_recall@3": 0.36, "eval_NanoMSMARCO_cosine_recall@5": 0.5, "eval_mse-dev_negative_mse": -111.91387176513672, "eval_runtime": 11.9368, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.3792944580347569, "eval_steps_per_second": 0.0, "step": 15000 }, { "epoch": 0.28632105881906783, "grad_norm": 0.9433382749557495, "learning_rate": 9.521005393561436e-05, "loss": 1.124610137939453, "step": 15100 }, { "epoch": 0.288217224771512, "grad_norm": 0.880102276802063, "learning_rate": 9.510471093881679e-05, "loss": 1.1151537322998046, "step": 15200 }, { "epoch": 0.29011339072395614, "grad_norm": 0.8995987772941589, "learning_rate": 9.499936794201922e-05, "loss": 1.119567642211914, "step": 15300 }, { "epoch": 0.29200955667640033, "grad_norm": 0.7987125515937805, "learning_rate": 9.489402494522165e-05, "loss": 1.109741439819336, "step": 15400 }, { "epoch": 0.29390572262884446, "grad_norm": 0.8933894038200378, "learning_rate": 9.478868194842407e-05, "loss": 1.106731185913086, "step": 15500 }, { "epoch": 0.29580188858128864, "grad_norm": 0.9454442858695984, "learning_rate": 9.468333895162649e-05, "loss": 1.0994451904296876, "step": 15600 }, { "epoch": 0.29769805453373277, "grad_norm": 0.9284511804580688, "learning_rate": 9.457799595482893e-05, "loss": 1.107660446166992, "step": 15700 }, { "epoch": 0.29959422048617695, "grad_norm": 0.9509237408638, "learning_rate": 9.447265295803135e-05, "loss": 1.1057376098632812, "step": 15800 }, { "epoch": 0.3014903864386211, "grad_norm": 0.8351031541824341, "learning_rate": 9.436730996123379e-05, "loss": 1.0948797607421874, "step": 15900 }, { "epoch": 0.30338655239106527, "grad_norm": 0.9255380034446716, "learning_rate": 9.426196696443621e-05, "loss": 1.0980982208251953, "step": 16000 }, { "epoch": 0.30338655239106527, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.28, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.65, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.45, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.54, "eval_NanoBEIR_mean_cosine_map@100": 0.3266167689165571, "eval_NanoBEIR_mean_cosine_mrr@10": 0.3881944444444445, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.38609049913548005, "eval_NanoBEIR_mean_cosine_precision@1": 0.28, "eval_NanoBEIR_mean_cosine_precision@10": 0.07799999999999999, "eval_NanoBEIR_mean_cosine_precision@3": 0.16666666666666669, "eval_NanoBEIR_mean_cosine_precision@5": 0.12200000000000003, "eval_NanoBEIR_mean_cosine_recall@1": 0.185, "eval_NanoBEIR_mean_cosine_recall@10": 0.5449999999999999, "eval_NanoBEIR_mean_cosine_recall@3": 0.35, "eval_NanoBEIR_mean_cosine_recall@5": 0.43, "eval_NanoHotpotQA_cosine_accuracy@1": 0.38, "eval_NanoHotpotQA_cosine_accuracy@10": 0.68, "eval_NanoHotpotQA_cosine_accuracy@3": 0.5, "eval_NanoHotpotQA_cosine_accuracy@5": 0.58, "eval_NanoHotpotQA_cosine_map@100": 0.3233119770078725, "eval_NanoHotpotQA_cosine_mrr@10": 0.46327777777777784, "eval_NanoHotpotQA_cosine_ndcg@10": 0.38549342229017597, "eval_NanoHotpotQA_cosine_precision@1": 0.38, "eval_NanoHotpotQA_cosine_precision@10": 0.09399999999999999, "eval_NanoHotpotQA_cosine_precision@3": 0.2, "eval_NanoHotpotQA_cosine_precision@5": 0.14400000000000002, "eval_NanoHotpotQA_cosine_recall@1": 0.19, "eval_NanoHotpotQA_cosine_recall@10": 0.47, "eval_NanoHotpotQA_cosine_recall@3": 0.3, "eval_NanoHotpotQA_cosine_recall@5": 0.36, "eval_NanoMSMARCO_cosine_accuracy@1": 0.18, "eval_NanoMSMARCO_cosine_accuracy@10": 0.62, "eval_NanoMSMARCO_cosine_accuracy@3": 0.4, "eval_NanoMSMARCO_cosine_accuracy@5": 0.5, "eval_NanoMSMARCO_cosine_map@100": 0.32992156082524177, "eval_NanoMSMARCO_cosine_mrr@10": 0.3131111111111111, "eval_NanoMSMARCO_cosine_ndcg@10": 0.3866875759807841, "eval_NanoMSMARCO_cosine_precision@1": 0.18, "eval_NanoMSMARCO_cosine_precision@10": 0.062, "eval_NanoMSMARCO_cosine_precision@3": 0.13333333333333333, "eval_NanoMSMARCO_cosine_precision@5": 0.10000000000000002, "eval_NanoMSMARCO_cosine_recall@1": 0.18, "eval_NanoMSMARCO_cosine_recall@10": 0.62, "eval_NanoMSMARCO_cosine_recall@3": 0.4, "eval_NanoMSMARCO_cosine_recall@5": 0.5, "eval_mse-dev_negative_mse": -109.29944610595703, "eval_runtime": 13.2813, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.38609049913548005, "eval_steps_per_second": 0.0, "step": 16000 }, { "epoch": 0.30528271834350945, "grad_norm": 0.9251424670219421, "learning_rate": 9.415662396763864e-05, "loss": 1.0933486938476562, "step": 16100 }, { "epoch": 0.3071788842959536, "grad_norm": 0.9098881483078003, "learning_rate": 9.405128097084106e-05, "loss": 1.0872834777832032, "step": 16200 }, { "epoch": 0.30907505024839776, "grad_norm": 0.9585905075073242, "learning_rate": 9.394593797404348e-05, "loss": 1.0850564575195312, "step": 16300 }, { "epoch": 0.3109712162008419, "grad_norm": 0.8983785510063171, "learning_rate": 9.384059497724592e-05, "loss": 1.0840210723876953, "step": 16400 }, { "epoch": 0.3128673821532861, "grad_norm": 0.8971573114395142, "learning_rate": 9.373525198044834e-05, "loss": 1.0830884552001954, "step": 16500 }, { "epoch": 0.3147635481057302, "grad_norm": 0.9502484202384949, "learning_rate": 9.363096241361875e-05, "loss": 1.0755316925048828, "step": 16600 }, { "epoch": 0.3166597140581744, "grad_norm": 0.8195205330848694, "learning_rate": 9.352561941682118e-05, "loss": 1.0733245086669922, "step": 16700 }, { "epoch": 0.3185558800106185, "grad_norm": 0.866369366645813, "learning_rate": 9.34202764200236e-05, "loss": 1.072414016723633, "step": 16800 }, { "epoch": 0.3204520459630627, "grad_norm": 0.8804235458374023, "learning_rate": 9.331493342322602e-05, "loss": 1.069804458618164, "step": 16900 }, { "epoch": 0.32234821191550683, "grad_norm": 0.8990177512168884, "learning_rate": 9.320959042642845e-05, "loss": 1.0709500122070312, "step": 17000 }, { "epoch": 0.32234821191550683, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.31, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.66, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.47, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.55, "eval_NanoBEIR_mean_cosine_map@100": 0.35180720243649677, "eval_NanoBEIR_mean_cosine_mrr@10": 0.41262301587301586, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.4079069196220826, "eval_NanoBEIR_mean_cosine_precision@1": 0.31, "eval_NanoBEIR_mean_cosine_precision@10": 0.08, "eval_NanoBEIR_mean_cosine_precision@3": 0.18, "eval_NanoBEIR_mean_cosine_precision@5": 0.126, "eval_NanoBEIR_mean_cosine_recall@1": 0.20500000000000002, "eval_NanoBEIR_mean_cosine_recall@10": 0.56, "eval_NanoBEIR_mean_cosine_recall@3": 0.38, "eval_NanoBEIR_mean_cosine_recall@5": 0.445, "eval_NanoHotpotQA_cosine_accuracy@1": 0.42, "eval_NanoHotpotQA_cosine_accuracy@10": 0.68, "eval_NanoHotpotQA_cosine_accuracy@3": 0.5, "eval_NanoHotpotQA_cosine_accuracy@5": 0.58, "eval_NanoHotpotQA_cosine_map@100": 0.3515281128331601, "eval_NanoHotpotQA_cosine_mrr@10": 0.48841269841269835, "eval_NanoHotpotQA_cosine_ndcg@10": 0.40662948657099507, "eval_NanoHotpotQA_cosine_precision@1": 0.42, "eval_NanoHotpotQA_cosine_precision@10": 0.096, "eval_NanoHotpotQA_cosine_precision@3": 0.21333333333333332, "eval_NanoHotpotQA_cosine_precision@5": 0.14800000000000002, "eval_NanoHotpotQA_cosine_recall@1": 0.21, "eval_NanoHotpotQA_cosine_recall@10": 0.48, "eval_NanoHotpotQA_cosine_recall@3": 0.32, "eval_NanoHotpotQA_cosine_recall@5": 0.37, "eval_NanoMSMARCO_cosine_accuracy@1": 0.2, "eval_NanoMSMARCO_cosine_accuracy@10": 0.64, "eval_NanoMSMARCO_cosine_accuracy@3": 0.44, "eval_NanoMSMARCO_cosine_accuracy@5": 0.52, "eval_NanoMSMARCO_cosine_map@100": 0.35208629203983344, "eval_NanoMSMARCO_cosine_mrr@10": 0.3368333333333334, "eval_NanoMSMARCO_cosine_ndcg@10": 0.4091843526731701, "eval_NanoMSMARCO_cosine_precision@1": 0.2, "eval_NanoMSMARCO_cosine_precision@10": 0.064, "eval_NanoMSMARCO_cosine_precision@3": 0.14666666666666667, "eval_NanoMSMARCO_cosine_precision@5": 0.10400000000000001, "eval_NanoMSMARCO_cosine_recall@1": 0.2, "eval_NanoMSMARCO_cosine_recall@10": 0.64, "eval_NanoMSMARCO_cosine_recall@3": 0.44, "eval_NanoMSMARCO_cosine_recall@5": 0.52, "eval_mse-dev_negative_mse": -106.3768539428711, "eval_runtime": 10.2249, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.4079069196220826, "eval_steps_per_second": 0.0, "step": 17000 }, { "epoch": 0.324244377867951, "grad_norm": 0.9269504547119141, "learning_rate": 9.310424742963088e-05, "loss": 1.0698513031005858, "step": 17100 }, { "epoch": 0.32614054382039515, "grad_norm": 0.9297342896461487, "learning_rate": 9.29989044328333e-05, "loss": 1.06423828125, "step": 17200 }, { "epoch": 0.32803670977283933, "grad_norm": 0.8609415292739868, "learning_rate": 9.289356143603573e-05, "loss": 1.0575923919677734, "step": 17300 }, { "epoch": 0.32993287572528346, "grad_norm": 0.9494638442993164, "learning_rate": 9.278821843923817e-05, "loss": 1.059657211303711, "step": 17400 }, { "epoch": 0.33182904167772764, "grad_norm": 0.9297378063201904, "learning_rate": 9.268287544244059e-05, "loss": 1.0571788024902344, "step": 17500 }, { "epoch": 0.33372520763017177, "grad_norm": 0.8993592262268066, "learning_rate": 9.257753244564303e-05, "loss": 1.0546926879882812, "step": 17600 }, { "epoch": 0.33562137358261596, "grad_norm": 0.8981407880783081, "learning_rate": 9.247218944884545e-05, "loss": 1.0501728057861328, "step": 17700 }, { "epoch": 0.3375175395350601, "grad_norm": 0.8592208623886108, "learning_rate": 9.236684645204787e-05, "loss": 1.0466949462890625, "step": 17800 }, { "epoch": 0.33941370548750427, "grad_norm": 0.8278118371963501, "learning_rate": 9.22615034552503e-05, "loss": 1.0484512329101563, "step": 17900 }, { "epoch": 0.3413098714399484, "grad_norm": 0.8379432559013367, "learning_rate": 9.215616045845272e-05, "loss": 1.0455326843261719, "step": 18000 }, { "epoch": 0.3413098714399484, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.31, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.71, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.51, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6, "eval_NanoBEIR_mean_cosine_map@100": 0.3693867458958786, "eval_NanoBEIR_mean_cosine_mrr@10": 0.43332936507936504, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.43735880478726147, "eval_NanoBEIR_mean_cosine_precision@1": 0.31, "eval_NanoBEIR_mean_cosine_precision@10": 0.087, "eval_NanoBEIR_mean_cosine_precision@3": 0.19333333333333333, "eval_NanoBEIR_mean_cosine_precision@5": 0.138, "eval_NanoBEIR_mean_cosine_recall@1": 0.21000000000000002, "eval_NanoBEIR_mean_cosine_recall@10": 0.61, "eval_NanoBEIR_mean_cosine_recall@3": 0.41000000000000003, "eval_NanoBEIR_mean_cosine_recall@5": 0.49, "eval_NanoHotpotQA_cosine_accuracy@1": 0.4, "eval_NanoHotpotQA_cosine_accuracy@10": 0.72, "eval_NanoHotpotQA_cosine_accuracy@3": 0.54, "eval_NanoHotpotQA_cosine_accuracy@5": 0.62, "eval_NanoHotpotQA_cosine_map@100": 0.3550150187843317, "eval_NanoHotpotQA_cosine_mrr@10": 0.4942460317460317, "eval_NanoHotpotQA_cosine_ndcg@10": 0.4237044581505819, "eval_NanoHotpotQA_cosine_precision@1": 0.4, "eval_NanoHotpotQA_cosine_precision@10": 0.10399999999999998, "eval_NanoHotpotQA_cosine_precision@3": 0.22666666666666668, "eval_NanoHotpotQA_cosine_precision@5": 0.16, "eval_NanoHotpotQA_cosine_recall@1": 0.2, "eval_NanoHotpotQA_cosine_recall@10": 0.52, "eval_NanoHotpotQA_cosine_recall@3": 0.34, "eval_NanoHotpotQA_cosine_recall@5": 0.4, "eval_NanoMSMARCO_cosine_accuracy@1": 0.22, "eval_NanoMSMARCO_cosine_accuracy@10": 0.7, "eval_NanoMSMARCO_cosine_accuracy@3": 0.48, "eval_NanoMSMARCO_cosine_accuracy@5": 0.58, "eval_NanoMSMARCO_cosine_map@100": 0.3837584730074255, "eval_NanoMSMARCO_cosine_mrr@10": 0.3724126984126984, "eval_NanoMSMARCO_cosine_ndcg@10": 0.45101315142394105, "eval_NanoMSMARCO_cosine_precision@1": 0.22, "eval_NanoMSMARCO_cosine_precision@10": 0.07, "eval_NanoMSMARCO_cosine_precision@3": 0.15999999999999998, "eval_NanoMSMARCO_cosine_precision@5": 0.11599999999999999, "eval_NanoMSMARCO_cosine_recall@1": 0.22, "eval_NanoMSMARCO_cosine_recall@10": 0.7, "eval_NanoMSMARCO_cosine_recall@3": 0.48, "eval_NanoMSMARCO_cosine_recall@5": 0.58, "eval_mse-dev_negative_mse": -103.76982879638672, "eval_runtime": 10.0162, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.43735880478726147, "eval_steps_per_second": 0.0, "step": 18000 }, { "epoch": 0.3432060373923926, "grad_norm": 0.8932205438613892, "learning_rate": 9.205081746165516e-05, "loss": 1.0432756805419923, "step": 18100 }, { "epoch": 0.34510220334483677, "grad_norm": 0.9294377565383911, "learning_rate": 9.194547446485758e-05, "loss": 1.0403505706787108, "step": 18200 }, { "epoch": 0.3469983692972809, "grad_norm": 0.8712144494056702, "learning_rate": 9.184013146806e-05, "loss": 1.0396759796142578, "step": 18300 }, { "epoch": 0.3488945352497251, "grad_norm": 0.8681181073188782, "learning_rate": 9.173478847126243e-05, "loss": 1.0351734161376953, "step": 18400 }, { "epoch": 0.3507907012021692, "grad_norm": 0.8668209910392761, "learning_rate": 9.162944547446487e-05, "loss": 1.0318231964111328, "step": 18500 }, { "epoch": 0.3526868671546134, "grad_norm": 0.9021549224853516, "learning_rate": 9.152410247766729e-05, "loss": 1.0302366638183593, "step": 18600 }, { "epoch": 0.3545830331070575, "grad_norm": 0.8724125623703003, "learning_rate": 9.141875948086973e-05, "loss": 1.0330332183837891, "step": 18700 }, { "epoch": 0.3564791990595017, "grad_norm": 0.9171428680419922, "learning_rate": 9.131446991404012e-05, "loss": 1.0219937896728515, "step": 18800 }, { "epoch": 0.35837536501194583, "grad_norm": 0.8523043394088745, "learning_rate": 9.120912691724254e-05, "loss": 1.0223383331298828, "step": 18900 }, { "epoch": 0.36027153096439, "grad_norm": 0.8599100112915039, "learning_rate": 9.110378392044497e-05, "loss": 1.0254383087158203, "step": 19000 }, { "epoch": 0.36027153096439, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.32, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.6799999999999999, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.55, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.63, "eval_NanoBEIR_mean_cosine_map@100": 0.3761081537709, "eval_NanoBEIR_mean_cosine_mrr@10": 0.43933333333333335, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.43521214929807284, "eval_NanoBEIR_mean_cosine_precision@1": 0.32, "eval_NanoBEIR_mean_cosine_precision@10": 0.08399999999999999, "eval_NanoBEIR_mean_cosine_precision@3": 0.2033333333333333, "eval_NanoBEIR_mean_cosine_precision@5": 0.14600000000000002, "eval_NanoBEIR_mean_cosine_recall@1": 0.22, "eval_NanoBEIR_mean_cosine_recall@10": 0.5800000000000001, "eval_NanoBEIR_mean_cosine_recall@3": 0.435, "eval_NanoBEIR_mean_cosine_recall@5": 0.515, "eval_NanoHotpotQA_cosine_accuracy@1": 0.4, "eval_NanoHotpotQA_cosine_accuracy@10": 0.72, "eval_NanoHotpotQA_cosine_accuracy@3": 0.58, "eval_NanoHotpotQA_cosine_accuracy@5": 0.66, "eval_NanoHotpotQA_cosine_map@100": 0.3567383904240635, "eval_NanoHotpotQA_cosine_mrr@10": 0.49855555555555553, "eval_NanoHotpotQA_cosine_ndcg@10": 0.426475283640488, "eval_NanoHotpotQA_cosine_precision@1": 0.4, "eval_NanoHotpotQA_cosine_precision@10": 0.10399999999999998, "eval_NanoHotpotQA_cosine_precision@3": 0.2333333333333333, "eval_NanoHotpotQA_cosine_precision@5": 0.172, "eval_NanoHotpotQA_cosine_recall@1": 0.2, "eval_NanoHotpotQA_cosine_recall@10": 0.52, "eval_NanoHotpotQA_cosine_recall@3": 0.35, "eval_NanoHotpotQA_cosine_recall@5": 0.43, "eval_NanoMSMARCO_cosine_accuracy@1": 0.24, "eval_NanoMSMARCO_cosine_accuracy@10": 0.64, "eval_NanoMSMARCO_cosine_accuracy@3": 0.52, "eval_NanoMSMARCO_cosine_accuracy@5": 0.6, "eval_NanoMSMARCO_cosine_map@100": 0.3954779171177365, "eval_NanoMSMARCO_cosine_mrr@10": 0.3801111111111111, "eval_NanoMSMARCO_cosine_ndcg@10": 0.44394901495565775, "eval_NanoMSMARCO_cosine_precision@1": 0.24, "eval_NanoMSMARCO_cosine_precision@10": 0.064, "eval_NanoMSMARCO_cosine_precision@3": 0.1733333333333333, "eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002, "eval_NanoMSMARCO_cosine_recall@1": 0.24, "eval_NanoMSMARCO_cosine_recall@10": 0.64, "eval_NanoMSMARCO_cosine_recall@3": 0.52, "eval_NanoMSMARCO_cosine_recall@5": 0.6, "eval_mse-dev_negative_mse": -101.57431030273438, "eval_runtime": 10.982, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.43521214929807284, "eval_steps_per_second": 0.0, "step": 19000 }, { "epoch": 0.36216769691683415, "grad_norm": 0.9670674204826355, "learning_rate": 9.09984409236474e-05, "loss": 1.0186353302001954, "step": 19100 }, { "epoch": 0.36406386286927833, "grad_norm": 0.8563957810401917, "learning_rate": 9.089309792684983e-05, "loss": 1.0215565490722656, "step": 19200 }, { "epoch": 0.36596002882172246, "grad_norm": 0.9011367559432983, "learning_rate": 9.078775493005225e-05, "loss": 1.0152357482910157, "step": 19300 }, { "epoch": 0.36785619477416664, "grad_norm": 0.8407337665557861, "learning_rate": 9.068241193325469e-05, "loss": 1.0139485931396484, "step": 19400 }, { "epoch": 0.3697523607266108, "grad_norm": 0.8842604756355286, "learning_rate": 9.057706893645711e-05, "loss": 1.0125227355957032, "step": 19500 }, { "epoch": 0.37164852667905496, "grad_norm": 0.9665144085884094, "learning_rate": 9.047172593965954e-05, "loss": 1.008692398071289, "step": 19600 }, { "epoch": 0.3735446926314991, "grad_norm": 0.8938872218132019, "learning_rate": 9.036638294286196e-05, "loss": 1.0044830322265625, "step": 19700 }, { "epoch": 0.37544085858394327, "grad_norm": 0.8201034069061279, "learning_rate": 9.026103994606438e-05, "loss": 1.0031690979003907, "step": 19800 }, { "epoch": 0.3773370245363874, "grad_norm": 0.8051674365997314, "learning_rate": 9.015569694926682e-05, "loss": 1.001277542114258, "step": 19900 }, { "epoch": 0.3792331904888316, "grad_norm": 0.8701341152191162, "learning_rate": 9.005035395246924e-05, "loss": 1.001656494140625, "step": 20000 }, { "epoch": 0.3792331904888316, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.35, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.71, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.53, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6000000000000001, "eval_NanoBEIR_mean_cosine_map@100": 0.3885181482447372, "eval_NanoBEIR_mean_cosine_mrr@10": 0.45785714285714285, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.4463748112002441, "eval_NanoBEIR_mean_cosine_precision@1": 0.35, "eval_NanoBEIR_mean_cosine_precision@10": 0.08499999999999999, "eval_NanoBEIR_mean_cosine_precision@3": 0.2, "eval_NanoBEIR_mean_cosine_precision@5": 0.14, "eval_NanoBEIR_mean_cosine_recall@1": 0.24, "eval_NanoBEIR_mean_cosine_recall@10": 0.5900000000000001, "eval_NanoBEIR_mean_cosine_recall@3": 0.425, "eval_NanoBEIR_mean_cosine_recall@5": 0.49, "eval_NanoHotpotQA_cosine_accuracy@1": 0.44, "eval_NanoHotpotQA_cosine_accuracy@10": 0.76, "eval_NanoHotpotQA_cosine_accuracy@3": 0.56, "eval_NanoHotpotQA_cosine_accuracy@5": 0.64, "eval_NanoHotpotQA_cosine_map@100": 0.37180551870545886, "eval_NanoHotpotQA_cosine_mrr@10": 0.5252698412698412, "eval_NanoHotpotQA_cosine_ndcg@10": 0.43738455459561965, "eval_NanoHotpotQA_cosine_precision@1": 0.44, "eval_NanoHotpotQA_cosine_precision@10": 0.10399999999999998, "eval_NanoHotpotQA_cosine_precision@3": 0.2333333333333333, "eval_NanoHotpotQA_cosine_precision@5": 0.16799999999999998, "eval_NanoHotpotQA_cosine_recall@1": 0.22, "eval_NanoHotpotQA_cosine_recall@10": 0.52, "eval_NanoHotpotQA_cosine_recall@3": 0.35, "eval_NanoHotpotQA_cosine_recall@5": 0.42, "eval_NanoMSMARCO_cosine_accuracy@1": 0.26, "eval_NanoMSMARCO_cosine_accuracy@10": 0.66, "eval_NanoMSMARCO_cosine_accuracy@3": 0.5, "eval_NanoMSMARCO_cosine_accuracy@5": 0.56, "eval_NanoMSMARCO_cosine_map@100": 0.40523077778401556, "eval_NanoMSMARCO_cosine_mrr@10": 0.39044444444444454, "eval_NanoMSMARCO_cosine_ndcg@10": 0.4553650678048685, "eval_NanoMSMARCO_cosine_precision@1": 0.26, "eval_NanoMSMARCO_cosine_precision@10": 0.066, "eval_NanoMSMARCO_cosine_precision@3": 0.16666666666666669, "eval_NanoMSMARCO_cosine_precision@5": 0.11200000000000002, "eval_NanoMSMARCO_cosine_recall@1": 0.26, "eval_NanoMSMARCO_cosine_recall@10": 0.66, "eval_NanoMSMARCO_cosine_recall@3": 0.5, "eval_NanoMSMARCO_cosine_recall@5": 0.56, "eval_mse-dev_negative_mse": -99.66129302978516, "eval_runtime": 11.4825, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.4463748112002441, "eval_steps_per_second": 0.0, "step": 20000 }, { "epoch": 0.3811293564412757, "grad_norm": 0.8422971963882446, "learning_rate": 8.994501095567167e-05, "loss": 1.0006825256347656, "step": 20100 }, { "epoch": 0.3830255223937199, "grad_norm": 0.955066978931427, "learning_rate": 8.98396679588741e-05, "loss": 0.9958713531494141, "step": 20200 }, { "epoch": 0.3849216883461641, "grad_norm": 0.8364739418029785, "learning_rate": 8.973432496207653e-05, "loss": 0.9965061950683594, "step": 20300 }, { "epoch": 0.3868178542986082, "grad_norm": 0.9399869441986084, "learning_rate": 8.962898196527896e-05, "loss": 0.9909481048583985, "step": 20400 }, { "epoch": 0.3887140202510524, "grad_norm": 0.8677252531051636, "learning_rate": 8.952363896848139e-05, "loss": 0.9901930236816406, "step": 20500 }, { "epoch": 0.3906101862034965, "grad_norm": 0.8382641077041626, "learning_rate": 8.941829597168381e-05, "loss": 0.9903465270996094, "step": 20600 }, { "epoch": 0.3925063521559407, "grad_norm": 0.9324244856834412, "learning_rate": 8.931295297488623e-05, "loss": 0.9927156829833984, "step": 20700 }, { "epoch": 0.39440251810838484, "grad_norm": 0.9975899457931519, "learning_rate": 8.920760997808866e-05, "loss": 0.9864664459228516, "step": 20800 }, { "epoch": 0.396298684060829, "grad_norm": 0.8882135152816772, "learning_rate": 8.910226698129108e-05, "loss": 0.9842584991455078, "step": 20900 }, { "epoch": 0.39819485001327315, "grad_norm": 0.8512315154075623, "learning_rate": 8.899692398449352e-05, "loss": 0.9808792114257813, "step": 21000 }, { "epoch": 0.39819485001327315, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.35, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.72, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.51, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.62, "eval_NanoBEIR_mean_cosine_map@100": 0.3971597947674501, "eval_NanoBEIR_mean_cosine_mrr@10": 0.45810317460317457, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.4575468587350072, "eval_NanoBEIR_mean_cosine_precision@1": 0.35, "eval_NanoBEIR_mean_cosine_precision@10": 0.088, "eval_NanoBEIR_mean_cosine_precision@3": 0.2, "eval_NanoBEIR_mean_cosine_precision@5": 0.15, "eval_NanoBEIR_mean_cosine_recall@1": 0.245, "eval_NanoBEIR_mean_cosine_recall@10": 0.6100000000000001, "eval_NanoBEIR_mean_cosine_recall@3": 0.41500000000000004, "eval_NanoBEIR_mean_cosine_recall@5": 0.52, "eval_NanoHotpotQA_cosine_accuracy@1": 0.42, "eval_NanoHotpotQA_cosine_accuracy@10": 0.76, "eval_NanoHotpotQA_cosine_accuracy@3": 0.56, "eval_NanoHotpotQA_cosine_accuracy@5": 0.66, "eval_NanoHotpotQA_cosine_map@100": 0.3788968895489927, "eval_NanoHotpotQA_cosine_mrr@10": 0.5139603174603175, "eval_NanoHotpotQA_cosine_ndcg@10": 0.4462090585062046, "eval_NanoHotpotQA_cosine_precision@1": 0.42, "eval_NanoHotpotQA_cosine_precision@10": 0.10799999999999998, "eval_NanoHotpotQA_cosine_precision@3": 0.24666666666666667, "eval_NanoHotpotQA_cosine_precision@5": 0.184, "eval_NanoHotpotQA_cosine_recall@1": 0.21, "eval_NanoHotpotQA_cosine_recall@10": 0.54, "eval_NanoHotpotQA_cosine_recall@3": 0.37, "eval_NanoHotpotQA_cosine_recall@5": 0.46, "eval_NanoMSMARCO_cosine_accuracy@1": 0.28, "eval_NanoMSMARCO_cosine_accuracy@10": 0.68, "eval_NanoMSMARCO_cosine_accuracy@3": 0.46, "eval_NanoMSMARCO_cosine_accuracy@5": 0.58, "eval_NanoMSMARCO_cosine_map@100": 0.41542269998590753, "eval_NanoMSMARCO_cosine_mrr@10": 0.4022460317460317, "eval_NanoMSMARCO_cosine_ndcg@10": 0.46888465896380976, "eval_NanoMSMARCO_cosine_precision@1": 0.28, "eval_NanoMSMARCO_cosine_precision@10": 0.068, "eval_NanoMSMARCO_cosine_precision@3": 0.15333333333333332, "eval_NanoMSMARCO_cosine_precision@5": 0.11599999999999999, "eval_NanoMSMARCO_cosine_recall@1": 0.28, "eval_NanoMSMARCO_cosine_recall@10": 0.68, "eval_NanoMSMARCO_cosine_recall@3": 0.46, "eval_NanoMSMARCO_cosine_recall@5": 0.58, "eval_mse-dev_negative_mse": -97.49221801757812, "eval_runtime": 11.4324, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.4575468587350072, "eval_steps_per_second": 0.0, "step": 21000 }, { "epoch": 0.40009101596571733, "grad_norm": 0.7803339958190918, "learning_rate": 8.889158098769594e-05, "loss": 0.9800699615478515, "step": 21100 }, { "epoch": 0.40198718191816146, "grad_norm": 0.8740707635879517, "learning_rate": 8.878623799089836e-05, "loss": 0.9785236358642578, "step": 21200 }, { "epoch": 0.40388334787060565, "grad_norm": 0.9020572304725647, "learning_rate": 8.868089499410079e-05, "loss": 0.9718000793457031, "step": 21300 }, { "epoch": 0.4057795138230498, "grad_norm": 0.8485739827156067, "learning_rate": 8.857555199730322e-05, "loss": 0.9725127410888672, "step": 21400 }, { "epoch": 0.40767567977549396, "grad_norm": 0.9113863110542297, "learning_rate": 8.847020900050565e-05, "loss": 0.9704845428466797, "step": 21500 }, { "epoch": 0.4095718457279381, "grad_norm": 0.9105412364006042, "learning_rate": 8.836486600370809e-05, "loss": 0.9728768157958985, "step": 21600 }, { "epoch": 0.4114680116803823, "grad_norm": 0.9580652713775635, "learning_rate": 8.825952300691051e-05, "loss": 0.9713729095458984, "step": 21700 }, { "epoch": 0.4133641776328264, "grad_norm": 0.863349199295044, "learning_rate": 8.815418001011293e-05, "loss": 0.9646768951416016, "step": 21800 }, { "epoch": 0.4152603435852706, "grad_norm": 0.8929393291473389, "learning_rate": 8.804883701331536e-05, "loss": 0.9623196411132813, "step": 21900 }, { "epoch": 0.4171565095377147, "grad_norm": 0.8821211457252502, "learning_rate": 8.794349401651779e-05, "loss": 0.9578647613525391, "step": 22000 }, { "epoch": 0.4171565095377147, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.35, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.72, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.55, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.63, "eval_NanoBEIR_mean_cosine_map@100": 0.398172306882256, "eval_NanoBEIR_mean_cosine_mrr@10": 0.46505158730158724, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.4595267255374118, "eval_NanoBEIR_mean_cosine_precision@1": 0.35, "eval_NanoBEIR_mean_cosine_precision@10": 0.08800000000000001, "eval_NanoBEIR_mean_cosine_precision@3": 0.20999999999999996, "eval_NanoBEIR_mean_cosine_precision@5": 0.15, "eval_NanoBEIR_mean_cosine_recall@1": 0.24, "eval_NanoBEIR_mean_cosine_recall@10": 0.6100000000000001, "eval_NanoBEIR_mean_cosine_recall@3": 0.435, "eval_NanoBEIR_mean_cosine_recall@5": 0.52, "eval_NanoHotpotQA_cosine_accuracy@1": 0.44, "eval_NanoHotpotQA_cosine_accuracy@10": 0.76, "eval_NanoHotpotQA_cosine_accuracy@3": 0.62, "eval_NanoHotpotQA_cosine_accuracy@5": 0.68, "eval_NanoHotpotQA_cosine_map@100": 0.38667158218589576, "eval_NanoHotpotQA_cosine_mrr@10": 0.5341904761904762, "eval_NanoHotpotQA_cosine_ndcg@10": 0.454887980345426, "eval_NanoHotpotQA_cosine_precision@1": 0.44, "eval_NanoHotpotQA_cosine_precision@10": 0.10800000000000001, "eval_NanoHotpotQA_cosine_precision@3": 0.25999999999999995, "eval_NanoHotpotQA_cosine_precision@5": 0.184, "eval_NanoHotpotQA_cosine_recall@1": 0.22, "eval_NanoHotpotQA_cosine_recall@10": 0.54, "eval_NanoHotpotQA_cosine_recall@3": 0.39, "eval_NanoHotpotQA_cosine_recall@5": 0.46, "eval_NanoMSMARCO_cosine_accuracy@1": 0.26, "eval_NanoMSMARCO_cosine_accuracy@10": 0.68, "eval_NanoMSMARCO_cosine_accuracy@3": 0.48, "eval_NanoMSMARCO_cosine_accuracy@5": 0.58, "eval_NanoMSMARCO_cosine_map@100": 0.40967303157861634, "eval_NanoMSMARCO_cosine_mrr@10": 0.39591269841269827, "eval_NanoMSMARCO_cosine_ndcg@10": 0.46416547072939757, "eval_NanoMSMARCO_cosine_precision@1": 0.26, "eval_NanoMSMARCO_cosine_precision@10": 0.068, "eval_NanoMSMARCO_cosine_precision@3": 0.15999999999999998, "eval_NanoMSMARCO_cosine_precision@5": 0.11599999999999999, "eval_NanoMSMARCO_cosine_recall@1": 0.26, "eval_NanoMSMARCO_cosine_recall@10": 0.68, "eval_NanoMSMARCO_cosine_recall@3": 0.48, "eval_NanoMSMARCO_cosine_recall@5": 0.58, "eval_mse-dev_negative_mse": -95.78128814697266, "eval_runtime": 11.0251, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.4595267255374118, "eval_steps_per_second": 0.0, "step": 22000 }, { "epoch": 0.4190526754901589, "grad_norm": 0.8627265095710754, "learning_rate": 8.783815101972022e-05, "loss": 0.9553171539306641, "step": 22100 }, { "epoch": 0.4209488414426031, "grad_norm": 0.8205426931381226, "learning_rate": 8.773280802292264e-05, "loss": 0.9557749176025391, "step": 22200 }, { "epoch": 0.4228450073950472, "grad_norm": 0.8694571256637573, "learning_rate": 8.762746502612506e-05, "loss": 0.9584300994873047, "step": 22300 }, { "epoch": 0.4247411733474914, "grad_norm": 0.8678444623947144, "learning_rate": 8.752212202932749e-05, "loss": 0.9544028472900391, "step": 22400 }, { "epoch": 0.4266373392999355, "grad_norm": 0.8822008967399597, "learning_rate": 8.741677903252991e-05, "loss": 0.9520068359375, "step": 22500 }, { "epoch": 0.4285335052523797, "grad_norm": 0.951594352722168, "learning_rate": 8.731143603573235e-05, "loss": 0.9515534210205078, "step": 22600 }, { "epoch": 0.43042967120482384, "grad_norm": 0.9522872567176819, "learning_rate": 8.720714646890276e-05, "loss": 0.9542991638183593, "step": 22700 }, { "epoch": 0.432325837157268, "grad_norm": 0.9078388214111328, "learning_rate": 8.710180347210518e-05, "loss": 0.9501979064941406, "step": 22800 }, { "epoch": 0.43422200310971215, "grad_norm": 0.8574204444885254, "learning_rate": 8.69964604753076e-05, "loss": 0.9476995086669922, "step": 22900 }, { "epoch": 0.43611816906215634, "grad_norm": 0.8338425159454346, "learning_rate": 8.689111747851003e-05, "loss": 0.940532455444336, "step": 23000 }, { "epoch": 0.43611816906215634, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.39, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.71, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.5700000000000001, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.61, "eval_NanoBEIR_mean_cosine_map@100": 0.4144241147192019, "eval_NanoBEIR_mean_cosine_mrr@10": 0.4880277777777777, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.4688451653582658, "eval_NanoBEIR_mean_cosine_precision@1": 0.39, "eval_NanoBEIR_mean_cosine_precision@10": 0.086, "eval_NanoBEIR_mean_cosine_precision@3": 0.21333333333333332, "eval_NanoBEIR_mean_cosine_precision@5": 0.14399999999999996, "eval_NanoBEIR_mean_cosine_recall@1": 0.275, "eval_NanoBEIR_mean_cosine_recall@10": 0.595, "eval_NanoBEIR_mean_cosine_recall@3": 0.445, "eval_NanoBEIR_mean_cosine_recall@5": 0.505, "eval_NanoHotpotQA_cosine_accuracy@1": 0.46, "eval_NanoHotpotQA_cosine_accuracy@10": 0.76, "eval_NanoHotpotQA_cosine_accuracy@3": 0.64, "eval_NanoHotpotQA_cosine_accuracy@5": 0.64, "eval_NanoHotpotQA_cosine_map@100": 0.38426119225152133, "eval_NanoHotpotQA_cosine_mrr@10": 0.5458888888888889, "eval_NanoHotpotQA_cosine_ndcg@10": 0.4520622291124691, "eval_NanoHotpotQA_cosine_precision@1": 0.46, "eval_NanoHotpotQA_cosine_precision@10": 0.10599999999999998, "eval_NanoHotpotQA_cosine_precision@3": 0.25999999999999995, "eval_NanoHotpotQA_cosine_precision@5": 0.17199999999999996, "eval_NanoHotpotQA_cosine_recall@1": 0.23, "eval_NanoHotpotQA_cosine_recall@10": 0.53, "eval_NanoHotpotQA_cosine_recall@3": 0.39, "eval_NanoHotpotQA_cosine_recall@5": 0.43, "eval_NanoMSMARCO_cosine_accuracy@1": 0.32, "eval_NanoMSMARCO_cosine_accuracy@10": 0.66, "eval_NanoMSMARCO_cosine_accuracy@3": 0.5, "eval_NanoMSMARCO_cosine_accuracy@5": 0.58, "eval_NanoMSMARCO_cosine_map@100": 0.4445870371868824, "eval_NanoMSMARCO_cosine_mrr@10": 0.4301666666666666, "eval_NanoMSMARCO_cosine_ndcg@10": 0.48562810160406256, "eval_NanoMSMARCO_cosine_precision@1": 0.32, "eval_NanoMSMARCO_cosine_precision@10": 0.066, "eval_NanoMSMARCO_cosine_precision@3": 0.16666666666666669, "eval_NanoMSMARCO_cosine_precision@5": 0.11599999999999999, "eval_NanoMSMARCO_cosine_recall@1": 0.32, "eval_NanoMSMARCO_cosine_recall@10": 0.66, "eval_NanoMSMARCO_cosine_recall@3": 0.5, "eval_NanoMSMARCO_cosine_recall@5": 0.58, "eval_mse-dev_negative_mse": -93.92383575439453, "eval_runtime": 11.5971, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.4688451653582658, "eval_steps_per_second": 0.0, "step": 23000 }, { "epoch": 0.43801433501460046, "grad_norm": 0.8566615581512451, "learning_rate": 8.678577448171246e-05, "loss": 0.9448033142089843, "step": 23100 }, { "epoch": 0.43991050096704465, "grad_norm": 0.8012374639511108, "learning_rate": 8.668043148491489e-05, "loss": 0.9424338531494141, "step": 23200 }, { "epoch": 0.4418066669194888, "grad_norm": 0.8802723288536072, "learning_rate": 8.657508848811732e-05, "loss": 0.9369033050537109, "step": 23300 }, { "epoch": 0.44370283287193296, "grad_norm": 0.814888596534729, "learning_rate": 8.646974549131975e-05, "loss": 0.93183837890625, "step": 23400 }, { "epoch": 0.4455989988243771, "grad_norm": 0.8690612316131592, "learning_rate": 8.636440249452217e-05, "loss": 0.9342401123046875, "step": 23500 }, { "epoch": 0.4474951647768213, "grad_norm": 0.8208878040313721, "learning_rate": 8.625905949772459e-05, "loss": 0.9391999053955078, "step": 23600 }, { "epoch": 0.4493913307292654, "grad_norm": 0.8126626014709473, "learning_rate": 8.615371650092702e-05, "loss": 0.9358238983154297, "step": 23700 }, { "epoch": 0.4512874966817096, "grad_norm": 0.8614762425422668, "learning_rate": 8.604837350412945e-05, "loss": 0.9303498077392578, "step": 23800 }, { "epoch": 0.4531836626341537, "grad_norm": 0.8028171062469482, "learning_rate": 8.594408393729986e-05, "loss": 0.9305805969238281, "step": 23900 }, { "epoch": 0.4550798285865979, "grad_norm": 0.8973707556724548, "learning_rate": 8.583874094050229e-05, "loss": 0.927711410522461, "step": 24000 }, { "epoch": 0.4550798285865979, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.39, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.73, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.59, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6599999999999999, "eval_NanoBEIR_mean_cosine_map@100": 0.4316413980898389, "eval_NanoBEIR_mean_cosine_mrr@10": 0.5037738095238096, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.4871808124834046, "eval_NanoBEIR_mean_cosine_precision@1": 0.39, "eval_NanoBEIR_mean_cosine_precision@10": 0.08900000000000001, "eval_NanoBEIR_mean_cosine_precision@3": 0.22666666666666666, "eval_NanoBEIR_mean_cosine_precision@5": 0.15999999999999998, "eval_NanoBEIR_mean_cosine_recall@1": 0.275, "eval_NanoBEIR_mean_cosine_recall@10": 0.62, "eval_NanoBEIR_mean_cosine_recall@3": 0.455, "eval_NanoBEIR_mean_cosine_recall@5": 0.5449999999999999, "eval_NanoHotpotQA_cosine_accuracy@1": 0.46, "eval_NanoHotpotQA_cosine_accuracy@10": 0.76, "eval_NanoHotpotQA_cosine_accuracy@3": 0.72, "eval_NanoHotpotQA_cosine_accuracy@5": 0.74, "eval_NanoHotpotQA_cosine_map@100": 0.41902772468451716, "eval_NanoHotpotQA_cosine_mrr@10": 0.5768571428571428, "eval_NanoHotpotQA_cosine_ndcg@10": 0.479755445861627, "eval_NanoHotpotQA_cosine_precision@1": 0.46, "eval_NanoHotpotQA_cosine_precision@10": 0.10800000000000001, "eval_NanoHotpotQA_cosine_precision@3": 0.3, "eval_NanoHotpotQA_cosine_precision@5": 0.204, "eval_NanoHotpotQA_cosine_recall@1": 0.23, "eval_NanoHotpotQA_cosine_recall@10": 0.54, "eval_NanoHotpotQA_cosine_recall@3": 0.45, "eval_NanoHotpotQA_cosine_recall@5": 0.51, "eval_NanoMSMARCO_cosine_accuracy@1": 0.32, "eval_NanoMSMARCO_cosine_accuracy@10": 0.7, "eval_NanoMSMARCO_cosine_accuracy@3": 0.46, "eval_NanoMSMARCO_cosine_accuracy@5": 0.58, "eval_NanoMSMARCO_cosine_map@100": 0.44425507149516064, "eval_NanoMSMARCO_cosine_mrr@10": 0.4306904761904762, "eval_NanoMSMARCO_cosine_ndcg@10": 0.49460617910518223, "eval_NanoMSMARCO_cosine_precision@1": 0.32, "eval_NanoMSMARCO_cosine_precision@10": 0.07, "eval_NanoMSMARCO_cosine_precision@3": 0.15333333333333332, "eval_NanoMSMARCO_cosine_precision@5": 0.11599999999999999, "eval_NanoMSMARCO_cosine_recall@1": 0.32, "eval_NanoMSMARCO_cosine_recall@10": 0.7, "eval_NanoMSMARCO_cosine_recall@3": 0.46, "eval_NanoMSMARCO_cosine_recall@5": 0.58, "eval_mse-dev_negative_mse": -92.24274444580078, "eval_runtime": 11.2493, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.4871808124834046, "eval_steps_per_second": 0.0, "step": 24000 }, { "epoch": 0.45697599453904203, "grad_norm": 0.8175747394561768, "learning_rate": 8.573339794370471e-05, "loss": 0.9266593170166015, "step": 24100 }, { "epoch": 0.4588721604914862, "grad_norm": 0.9604556560516357, "learning_rate": 8.562805494690713e-05, "loss": 0.9227654266357422, "step": 24200 }, { "epoch": 0.4607683264439304, "grad_norm": 0.82953941822052, "learning_rate": 8.552271195010956e-05, "loss": 0.9239090728759766, "step": 24300 }, { "epoch": 0.4626644923963745, "grad_norm": 0.9319136142730713, "learning_rate": 8.541736895331198e-05, "loss": 0.9225330352783203, "step": 24400 }, { "epoch": 0.4645606583488187, "grad_norm": 0.8900800943374634, "learning_rate": 8.531202595651442e-05, "loss": 0.9169361877441407, "step": 24500 }, { "epoch": 0.46645682430126284, "grad_norm": 0.8238077759742737, "learning_rate": 8.520668295971684e-05, "loss": 0.9170392608642578, "step": 24600 }, { "epoch": 0.468352990253707, "grad_norm": 0.9116878509521484, "learning_rate": 8.510133996291926e-05, "loss": 0.9195194244384766, "step": 24700 }, { "epoch": 0.47024915620615115, "grad_norm": 0.8857290744781494, "learning_rate": 8.49959969661217e-05, "loss": 0.915346450805664, "step": 24800 }, { "epoch": 0.47214532215859534, "grad_norm": 0.8089697360992432, "learning_rate": 8.489065396932412e-05, "loss": 0.9137913513183594, "step": 24900 }, { "epoch": 0.47404148811103947, "grad_norm": 0.9027810096740723, "learning_rate": 8.478531097252656e-05, "loss": 0.9107527923583985, "step": 25000 }, { "epoch": 0.47404148811103947, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.38, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.73, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.54, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6599999999999999, "eval_NanoBEIR_mean_cosine_map@100": 0.4163537056013121, "eval_NanoBEIR_mean_cosine_mrr@10": 0.4892301587301586, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.47172532243736104, "eval_NanoBEIR_mean_cosine_precision@1": 0.38, "eval_NanoBEIR_mean_cosine_precision@10": 0.088, "eval_NanoBEIR_mean_cosine_precision@3": 0.21333333333333335, "eval_NanoBEIR_mean_cosine_precision@5": 0.15800000000000003, "eval_NanoBEIR_mean_cosine_recall@1": 0.26, "eval_NanoBEIR_mean_cosine_recall@10": 0.605, "eval_NanoBEIR_mean_cosine_recall@3": 0.43, "eval_NanoBEIR_mean_cosine_recall@5": 0.5449999999999999, "eval_NanoHotpotQA_cosine_accuracy@1": 0.48, "eval_NanoHotpotQA_cosine_accuracy@10": 0.8, "eval_NanoHotpotQA_cosine_accuracy@3": 0.64, "eval_NanoHotpotQA_cosine_accuracy@5": 0.72, "eval_NanoHotpotQA_cosine_map@100": 0.41568504331630274, "eval_NanoHotpotQA_cosine_mrr@10": 0.5788571428571428, "eval_NanoHotpotQA_cosine_ndcg@10": 0.48124067192024733, "eval_NanoHotpotQA_cosine_precision@1": 0.48, "eval_NanoHotpotQA_cosine_precision@10": 0.11, "eval_NanoHotpotQA_cosine_precision@3": 0.28, "eval_NanoHotpotQA_cosine_precision@5": 0.196, "eval_NanoHotpotQA_cosine_recall@1": 0.24, "eval_NanoHotpotQA_cosine_recall@10": 0.55, "eval_NanoHotpotQA_cosine_recall@3": 0.42, "eval_NanoHotpotQA_cosine_recall@5": 0.49, "eval_NanoMSMARCO_cosine_accuracy@1": 0.28, "eval_NanoMSMARCO_cosine_accuracy@10": 0.66, "eval_NanoMSMARCO_cosine_accuracy@3": 0.44, "eval_NanoMSMARCO_cosine_accuracy@5": 0.6, "eval_NanoMSMARCO_cosine_map@100": 0.4170223678863214, "eval_NanoMSMARCO_cosine_mrr@10": 0.39960317460317446, "eval_NanoMSMARCO_cosine_ndcg@10": 0.46220997295447475, "eval_NanoMSMARCO_cosine_precision@1": 0.28, "eval_NanoMSMARCO_cosine_precision@10": 0.066, "eval_NanoMSMARCO_cosine_precision@3": 0.14666666666666667, "eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002, "eval_NanoMSMARCO_cosine_recall@1": 0.28, "eval_NanoMSMARCO_cosine_recall@10": 0.66, "eval_NanoMSMARCO_cosine_recall@3": 0.44, "eval_NanoMSMARCO_cosine_recall@5": 0.6, "eval_mse-dev_negative_mse": -90.7634506225586, "eval_runtime": 11.8991, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.47172532243736104, "eval_steps_per_second": 0.0, "step": 25000 }, { "epoch": 0.47593765406348365, "grad_norm": 0.8811827898025513, "learning_rate": 8.467996797572898e-05, "loss": 0.913282470703125, "step": 25100 }, { "epoch": 0.4778338200159278, "grad_norm": 0.7843953967094421, "learning_rate": 8.457462497893141e-05, "loss": 0.9076313781738281, "step": 25200 }, { "epoch": 0.47972998596837196, "grad_norm": 0.9090595245361328, "learning_rate": 8.446928198213383e-05, "loss": 0.9081029510498047, "step": 25300 }, { "epoch": 0.4816261519208161, "grad_norm": 0.9231439828872681, "learning_rate": 8.436393898533625e-05, "loss": 0.9092655944824218, "step": 25400 }, { "epoch": 0.4835223178732603, "grad_norm": 0.8273399472236633, "learning_rate": 8.425859598853869e-05, "loss": 0.9036608123779297, "step": 25500 }, { "epoch": 0.4854184838257044, "grad_norm": 0.9115743637084961, "learning_rate": 8.415325299174111e-05, "loss": 0.9024863433837891, "step": 25600 }, { "epoch": 0.4873146497781486, "grad_norm": 0.8682368993759155, "learning_rate": 8.404790999494354e-05, "loss": 0.9058139801025391, "step": 25700 }, { "epoch": 0.4892108157305927, "grad_norm": 0.8775367140769958, "learning_rate": 8.394256699814596e-05, "loss": 0.901763916015625, "step": 25800 }, { "epoch": 0.4911069816830369, "grad_norm": 0.8083050847053528, "learning_rate": 8.383722400134838e-05, "loss": 0.901358642578125, "step": 25900 }, { "epoch": 0.49300314763548103, "grad_norm": 0.8163812160491943, "learning_rate": 8.373188100455082e-05, "loss": 0.8946353912353515, "step": 26000 }, { "epoch": 0.49300314763548103, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.38, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.73, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.56, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6399999999999999, "eval_NanoBEIR_mean_cosine_map@100": 0.424081987400691, "eval_NanoBEIR_mean_cosine_mrr@10": 0.49457936507936506, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.4851060192183279, "eval_NanoBEIR_mean_cosine_precision@1": 0.38, "eval_NanoBEIR_mean_cosine_precision@10": 0.092, "eval_NanoBEIR_mean_cosine_precision@3": 0.22, "eval_NanoBEIR_mean_cosine_precision@5": 0.158, "eval_NanoBEIR_mean_cosine_recall@1": 0.26, "eval_NanoBEIR_mean_cosine_recall@10": 0.63, "eval_NanoBEIR_mean_cosine_recall@3": 0.445, "eval_NanoBEIR_mean_cosine_recall@5": 0.54, "eval_NanoHotpotQA_cosine_accuracy@1": 0.48, "eval_NanoHotpotQA_cosine_accuracy@10": 0.78, "eval_NanoHotpotQA_cosine_accuracy@3": 0.66, "eval_NanoHotpotQA_cosine_accuracy@5": 0.7, "eval_NanoHotpotQA_cosine_map@100": 0.42326811379287077, "eval_NanoHotpotQA_cosine_mrr@10": 0.5793888888888888, "eval_NanoHotpotQA_cosine_ndcg@10": 0.4956820213676064, "eval_NanoHotpotQA_cosine_precision@1": 0.48, "eval_NanoHotpotQA_cosine_precision@10": 0.11599999999999998, "eval_NanoHotpotQA_cosine_precision@3": 0.2866666666666667, "eval_NanoHotpotQA_cosine_precision@5": 0.2, "eval_NanoHotpotQA_cosine_recall@1": 0.24, "eval_NanoHotpotQA_cosine_recall@10": 0.58, "eval_NanoHotpotQA_cosine_recall@3": 0.43, "eval_NanoHotpotQA_cosine_recall@5": 0.5, "eval_NanoMSMARCO_cosine_accuracy@1": 0.28, "eval_NanoMSMARCO_cosine_accuracy@10": 0.68, "eval_NanoMSMARCO_cosine_accuracy@3": 0.46, "eval_NanoMSMARCO_cosine_accuracy@5": 0.58, "eval_NanoMSMARCO_cosine_map@100": 0.4248958610085112, "eval_NanoMSMARCO_cosine_mrr@10": 0.40976984126984123, "eval_NanoMSMARCO_cosine_ndcg@10": 0.4745300170690494, "eval_NanoMSMARCO_cosine_precision@1": 0.28, "eval_NanoMSMARCO_cosine_precision@10": 0.068, "eval_NanoMSMARCO_cosine_precision@3": 0.15333333333333332, "eval_NanoMSMARCO_cosine_precision@5": 0.11599999999999999, "eval_NanoMSMARCO_cosine_recall@1": 0.28, "eval_NanoMSMARCO_cosine_recall@10": 0.68, "eval_NanoMSMARCO_cosine_recall@3": 0.46, "eval_NanoMSMARCO_cosine_recall@5": 0.58, "eval_mse-dev_negative_mse": -89.25623321533203, "eval_runtime": 10.0875, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.4851060192183279, "eval_steps_per_second": 0.0, "step": 26000 }, { "epoch": 0.4948993135879252, "grad_norm": 0.8695216178894043, "learning_rate": 8.362653800775326e-05, "loss": 0.8982176208496093, "step": 26100 }, { "epoch": 0.4967954795403694, "grad_norm": 0.87025386095047, "learning_rate": 8.352119501095568e-05, "loss": 0.8945767211914063, "step": 26200 }, { "epoch": 0.49869164549281353, "grad_norm": 0.8507541418075562, "learning_rate": 8.34158520141581e-05, "loss": 0.8941314697265625, "step": 26300 }, { "epoch": 0.5005878114452577, "grad_norm": 0.9079861044883728, "learning_rate": 8.33115624473285e-05, "loss": 0.8925470733642578, "step": 26400 }, { "epoch": 0.5024839773977019, "grad_norm": 0.8484945893287659, "learning_rate": 8.320621945053094e-05, "loss": 0.8947381591796875, "step": 26500 }, { "epoch": 0.504380143350146, "grad_norm": 0.889153003692627, "learning_rate": 8.310087645373336e-05, "loss": 0.89056884765625, "step": 26600 }, { "epoch": 0.5062763093025902, "grad_norm": 0.7697421312332153, "learning_rate": 8.29955334569358e-05, "loss": 0.889549560546875, "step": 26700 }, { "epoch": 0.5081724752550343, "grad_norm": 0.8403399586677551, "learning_rate": 8.289019046013822e-05, "loss": 0.886633529663086, "step": 26800 }, { "epoch": 0.5100686412074785, "grad_norm": 0.9034698009490967, "learning_rate": 8.278484746334064e-05, "loss": 0.8839826965332032, "step": 26900 }, { "epoch": 0.5119648071599227, "grad_norm": 0.8018946051597595, "learning_rate": 8.267950446654307e-05, "loss": 0.8764205932617187, "step": 27000 }, { "epoch": 0.5119648071599227, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.45000000000000007, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.75, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.56, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6799999999999999, "eval_NanoBEIR_mean_cosine_map@100": 0.4516633759155257, "eval_NanoBEIR_mean_cosine_mrr@10": 0.5368730158730158, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.5092015395473641, "eval_NanoBEIR_mean_cosine_precision@1": 0.45000000000000007, "eval_NanoBEIR_mean_cosine_precision@10": 0.092, "eval_NanoBEIR_mean_cosine_precision@3": 0.21666666666666667, "eval_NanoBEIR_mean_cosine_precision@5": 0.166, "eval_NanoBEIR_mean_cosine_recall@1": 0.31000000000000005, "eval_NanoBEIR_mean_cosine_recall@10": 0.635, "eval_NanoBEIR_mean_cosine_recall@3": 0.44, "eval_NanoBEIR_mean_cosine_recall@5": 0.565, "eval_NanoHotpotQA_cosine_accuracy@1": 0.56, "eval_NanoHotpotQA_cosine_accuracy@10": 0.8, "eval_NanoHotpotQA_cosine_accuracy@3": 0.66, "eval_NanoHotpotQA_cosine_accuracy@5": 0.76, "eval_NanoHotpotQA_cosine_map@100": 0.4514755786098336, "eval_NanoHotpotQA_cosine_mrr@10": 0.6343888888888888, "eval_NanoHotpotQA_cosine_ndcg@10": 0.5172578575160077, "eval_NanoHotpotQA_cosine_precision@1": 0.56, "eval_NanoHotpotQA_cosine_precision@10": 0.11399999999999999, "eval_NanoHotpotQA_cosine_precision@3": 0.28, "eval_NanoHotpotQA_cosine_precision@5": 0.212, "eval_NanoHotpotQA_cosine_recall@1": 0.28, "eval_NanoHotpotQA_cosine_recall@10": 0.57, "eval_NanoHotpotQA_cosine_recall@3": 0.42, "eval_NanoHotpotQA_cosine_recall@5": 0.53, "eval_NanoMSMARCO_cosine_accuracy@1": 0.34, "eval_NanoMSMARCO_cosine_accuracy@10": 0.7, "eval_NanoMSMARCO_cosine_accuracy@3": 0.46, "eval_NanoMSMARCO_cosine_accuracy@5": 0.6, "eval_NanoMSMARCO_cosine_map@100": 0.45185117322121776, "eval_NanoMSMARCO_cosine_mrr@10": 0.43935714285714284, "eval_NanoMSMARCO_cosine_ndcg@10": 0.5011452215787204, "eval_NanoMSMARCO_cosine_precision@1": 0.34, "eval_NanoMSMARCO_cosine_precision@10": 0.07, "eval_NanoMSMARCO_cosine_precision@3": 0.15333333333333332, "eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002, "eval_NanoMSMARCO_cosine_recall@1": 0.34, "eval_NanoMSMARCO_cosine_recall@10": 0.7, "eval_NanoMSMARCO_cosine_recall@3": 0.46, "eval_NanoMSMARCO_cosine_recall@5": 0.6, "eval_mse-dev_negative_mse": -87.8038558959961, "eval_runtime": 12.2079, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5092015395473641, "eval_steps_per_second": 0.0, "step": 27000 }, { "epoch": 0.5138609731123668, "grad_norm": 0.8619687557220459, "learning_rate": 8.257416146974549e-05, "loss": 0.8859089660644531, "step": 27100 }, { "epoch": 0.5157571390648109, "grad_norm": 0.8811931610107422, "learning_rate": 8.246881847294792e-05, "loss": 0.8839226531982421, "step": 27200 }, { "epoch": 0.5176533050172551, "grad_norm": 0.8505755066871643, "learning_rate": 8.236347547615035e-05, "loss": 0.8794448852539063, "step": 27300 }, { "epoch": 0.5195494709696993, "grad_norm": 0.8391817212104797, "learning_rate": 8.225813247935278e-05, "loss": 0.8790214538574219, "step": 27400 }, { "epoch": 0.5214456369221434, "grad_norm": 0.7982373237609863, "learning_rate": 8.21527894825552e-05, "loss": 0.8788404083251953, "step": 27500 }, { "epoch": 0.5233418028745875, "grad_norm": 0.87211674451828, "learning_rate": 8.204744648575762e-05, "loss": 0.8779651641845703, "step": 27600 }, { "epoch": 0.5252379688270318, "grad_norm": 0.8461468815803528, "learning_rate": 8.194210348896006e-05, "loss": 0.8749393463134766, "step": 27700 }, { "epoch": 0.5271341347794759, "grad_norm": 0.8423062562942505, "learning_rate": 8.18367604921625e-05, "loss": 0.8741777038574219, "step": 27800 }, { "epoch": 0.52903030073192, "grad_norm": 0.8545904159545898, "learning_rate": 8.173141749536492e-05, "loss": 0.8700465393066407, "step": 27900 }, { "epoch": 0.5309264666843642, "grad_norm": 0.8632199764251709, "learning_rate": 8.162607449856734e-05, "loss": 0.8691284942626953, "step": 28000 }, { "epoch": 0.5309264666843642, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.39, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.74, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.54, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6599999999999999, "eval_NanoBEIR_mean_cosine_map@100": 0.43119589947238984, "eval_NanoBEIR_mean_cosine_mrr@10": 0.4965515873015872, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.4856222050166246, "eval_NanoBEIR_mean_cosine_precision@1": 0.39, "eval_NanoBEIR_mean_cosine_precision@10": 0.09, "eval_NanoBEIR_mean_cosine_precision@3": 0.21333333333333332, "eval_NanoBEIR_mean_cosine_precision@5": 0.15800000000000003, "eval_NanoBEIR_mean_cosine_recall@1": 0.275, "eval_NanoBEIR_mean_cosine_recall@10": 0.6200000000000001, "eval_NanoBEIR_mean_cosine_recall@3": 0.44, "eval_NanoBEIR_mean_cosine_recall@5": 0.5449999999999999, "eval_NanoHotpotQA_cosine_accuracy@1": 0.46, "eval_NanoHotpotQA_cosine_accuracy@10": 0.8, "eval_NanoHotpotQA_cosine_accuracy@3": 0.6, "eval_NanoHotpotQA_cosine_accuracy@5": 0.72, "eval_NanoHotpotQA_cosine_map@100": 0.4123353201122342, "eval_NanoHotpotQA_cosine_mrr@10": 0.5583888888888888, "eval_NanoHotpotQA_cosine_ndcg@10": 0.4776237090129175, "eval_NanoHotpotQA_cosine_precision@1": 0.46, "eval_NanoHotpotQA_cosine_precision@10": 0.11199999999999999, "eval_NanoHotpotQA_cosine_precision@3": 0.26666666666666666, "eval_NanoHotpotQA_cosine_precision@5": 0.196, "eval_NanoHotpotQA_cosine_recall@1": 0.23, "eval_NanoHotpotQA_cosine_recall@10": 0.56, "eval_NanoHotpotQA_cosine_recall@3": 0.4, "eval_NanoHotpotQA_cosine_recall@5": 0.49, "eval_NanoMSMARCO_cosine_accuracy@1": 0.32, "eval_NanoMSMARCO_cosine_accuracy@10": 0.68, "eval_NanoMSMARCO_cosine_accuracy@3": 0.48, "eval_NanoMSMARCO_cosine_accuracy@5": 0.6, "eval_NanoMSMARCO_cosine_map@100": 0.45005647883254546, "eval_NanoMSMARCO_cosine_mrr@10": 0.43471428571428566, "eval_NanoMSMARCO_cosine_ndcg@10": 0.49362070102033173, "eval_NanoMSMARCO_cosine_precision@1": 0.32, "eval_NanoMSMARCO_cosine_precision@10": 0.068, "eval_NanoMSMARCO_cosine_precision@3": 0.15999999999999998, "eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002, "eval_NanoMSMARCO_cosine_recall@1": 0.32, "eval_NanoMSMARCO_cosine_recall@10": 0.68, "eval_NanoMSMARCO_cosine_recall@3": 0.48, "eval_NanoMSMARCO_cosine_recall@5": 0.6, "eval_mse-dev_negative_mse": -86.44185638427734, "eval_runtime": 14.6199, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.4856222050166246, "eval_steps_per_second": 0.0, "step": 28000 }, { "epoch": 0.5328226326368084, "grad_norm": 0.9085125923156738, "learning_rate": 8.152073150176977e-05, "loss": 0.874675521850586, "step": 28100 }, { "epoch": 0.5347187985892525, "grad_norm": 0.8658029437065125, "learning_rate": 8.141538850497219e-05, "loss": 0.8643728637695313, "step": 28200 }, { "epoch": 0.5366149645416967, "grad_norm": 0.9218304753303528, "learning_rate": 8.131004550817463e-05, "loss": 0.8673239898681641, "step": 28300 }, { "epoch": 0.5385111304941409, "grad_norm": 0.8571885228157043, "learning_rate": 8.120470251137705e-05, "loss": 0.86698486328125, "step": 28400 }, { "epoch": 0.540407296446585, "grad_norm": 0.8248752355575562, "learning_rate": 8.109935951457947e-05, "loss": 0.863829116821289, "step": 28500 }, { "epoch": 0.5423034623990292, "grad_norm": 0.9771467447280884, "learning_rate": 8.09940165177819e-05, "loss": 0.8649395751953125, "step": 28600 }, { "epoch": 0.5441996283514733, "grad_norm": 0.8203988075256348, "learning_rate": 8.088867352098432e-05, "loss": 0.8629121398925781, "step": 28700 }, { "epoch": 0.5460957943039175, "grad_norm": 0.7756925225257874, "learning_rate": 8.078333052418676e-05, "loss": 0.8629222106933594, "step": 28800 }, { "epoch": 0.5479919602563617, "grad_norm": 0.8539568781852722, "learning_rate": 8.067798752738918e-05, "loss": 0.8591197204589843, "step": 28900 }, { "epoch": 0.5498881262088058, "grad_norm": 0.8543459177017212, "learning_rate": 8.057264453059162e-05, "loss": 0.856646499633789, "step": 29000 }, { "epoch": 0.5498881262088058, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.37, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.73, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.5800000000000001, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6699999999999999, "eval_NanoBEIR_mean_cosine_map@100": 0.42708582266190265, "eval_NanoBEIR_mean_cosine_mrr@10": 0.49876190476190474, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.48552032214505464, "eval_NanoBEIR_mean_cosine_precision@1": 0.37, "eval_NanoBEIR_mean_cosine_precision@10": 0.091, "eval_NanoBEIR_mean_cosine_precision@3": 0.2233333333333333, "eval_NanoBEIR_mean_cosine_precision@5": 0.162, "eval_NanoBEIR_mean_cosine_recall@1": 0.26, "eval_NanoBEIR_mean_cosine_recall@10": 0.62, "eval_NanoBEIR_mean_cosine_recall@3": 0.45499999999999996, "eval_NanoBEIR_mean_cosine_recall@5": 0.5549999999999999, "eval_NanoHotpotQA_cosine_accuracy@1": 0.44, "eval_NanoHotpotQA_cosine_accuracy@10": 0.8, "eval_NanoHotpotQA_cosine_accuracy@3": 0.68, "eval_NanoHotpotQA_cosine_accuracy@5": 0.74, "eval_NanoHotpotQA_cosine_map@100": 0.41616973619547015, "eval_NanoHotpotQA_cosine_mrr@10": 0.5761666666666667, "eval_NanoHotpotQA_cosine_ndcg@10": 0.49181648887243534, "eval_NanoHotpotQA_cosine_precision@1": 0.44, "eval_NanoHotpotQA_cosine_precision@10": 0.11599999999999998, "eval_NanoHotpotQA_cosine_precision@3": 0.2866666666666666, "eval_NanoHotpotQA_cosine_precision@5": 0.204, "eval_NanoHotpotQA_cosine_recall@1": 0.22, "eval_NanoHotpotQA_cosine_recall@10": 0.58, "eval_NanoHotpotQA_cosine_recall@3": 0.43, "eval_NanoHotpotQA_cosine_recall@5": 0.51, "eval_NanoMSMARCO_cosine_accuracy@1": 0.3, "eval_NanoMSMARCO_cosine_accuracy@10": 0.66, "eval_NanoMSMARCO_cosine_accuracy@3": 0.48, "eval_NanoMSMARCO_cosine_accuracy@5": 0.6, "eval_NanoMSMARCO_cosine_map@100": 0.43800190912833514, "eval_NanoMSMARCO_cosine_mrr@10": 0.42135714285714276, "eval_NanoMSMARCO_cosine_ndcg@10": 0.47922415541767394, "eval_NanoMSMARCO_cosine_precision@1": 0.3, "eval_NanoMSMARCO_cosine_precision@10": 0.066, "eval_NanoMSMARCO_cosine_precision@3": 0.15999999999999998, "eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002, "eval_NanoMSMARCO_cosine_recall@1": 0.3, "eval_NanoMSMARCO_cosine_recall@10": 0.66, "eval_NanoMSMARCO_cosine_recall@3": 0.48, "eval_NanoMSMARCO_cosine_recall@5": 0.6, "eval_mse-dev_negative_mse": -85.04077911376953, "eval_runtime": 12.4825, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.48552032214505464, "eval_steps_per_second": 0.0, "step": 29000 }, { "epoch": 0.5517842921612499, "grad_norm": 0.8201111555099487, "learning_rate": 8.046730153379404e-05, "loss": 0.8587515258789062, "step": 29100 }, { "epoch": 0.5536804581136942, "grad_norm": 0.8306780457496643, "learning_rate": 8.036195853699647e-05, "loss": 0.8544799041748047, "step": 29200 }, { "epoch": 0.5555766240661383, "grad_norm": 0.8447550535202026, "learning_rate": 8.025661554019889e-05, "loss": 0.8534080505371093, "step": 29300 }, { "epoch": 0.5574727900185824, "grad_norm": 0.8507358431816101, "learning_rate": 8.015127254340133e-05, "loss": 0.8543455505371094, "step": 29400 }, { "epoch": 0.5593689559710265, "grad_norm": 0.8200713396072388, "learning_rate": 8.004592954660375e-05, "loss": 0.8533712768554688, "step": 29500 }, { "epoch": 0.5612651219234708, "grad_norm": 0.8041396141052246, "learning_rate": 7.994058654980617e-05, "loss": 0.8519126129150391, "step": 29600 }, { "epoch": 0.5631612878759149, "grad_norm": 0.8296621441841125, "learning_rate": 7.98352435530086e-05, "loss": 0.8486277008056641, "step": 29700 }, { "epoch": 0.565057453828359, "grad_norm": 0.8634279370307922, "learning_rate": 7.972990055621102e-05, "loss": 0.8529573822021485, "step": 29800 }, { "epoch": 0.5669536197808032, "grad_norm": 0.9058282375335693, "learning_rate": 7.962455755941344e-05, "loss": 0.8476997375488281, "step": 29900 }, { "epoch": 0.5688497857332474, "grad_norm": 0.8404967784881592, "learning_rate": 7.951921456261588e-05, "loss": 0.8465479278564453, "step": 30000 }, { "epoch": 0.5688497857332474, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.4, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.75, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.61, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6699999999999999, "eval_NanoBEIR_mean_cosine_map@100": 0.44285306339192243, "eval_NanoBEIR_mean_cosine_mrr@10": 0.5205357142857143, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.5041657136741884, "eval_NanoBEIR_mean_cosine_precision@1": 0.4, "eval_NanoBEIR_mean_cosine_precision@10": 0.093, "eval_NanoBEIR_mean_cosine_precision@3": 0.2366666666666667, "eval_NanoBEIR_mean_cosine_precision@5": 0.164, "eval_NanoBEIR_mean_cosine_recall@1": 0.275, "eval_NanoBEIR_mean_cosine_recall@10": 0.645, "eval_NanoBEIR_mean_cosine_recall@3": 0.48, "eval_NanoBEIR_mean_cosine_recall@5": 0.5549999999999999, "eval_NanoHotpotQA_cosine_accuracy@1": 0.5, "eval_NanoHotpotQA_cosine_accuracy@10": 0.78, "eval_NanoHotpotQA_cosine_accuracy@3": 0.72, "eval_NanoHotpotQA_cosine_accuracy@5": 0.76, "eval_NanoHotpotQA_cosine_map@100": 0.4425880574674547, "eval_NanoHotpotQA_cosine_mrr@10": 0.6113333333333334, "eval_NanoHotpotQA_cosine_ndcg@10": 0.5096854578340355, "eval_NanoHotpotQA_cosine_precision@1": 0.5, "eval_NanoHotpotQA_cosine_precision@10": 0.11399999999999999, "eval_NanoHotpotQA_cosine_precision@3": 0.3066666666666667, "eval_NanoHotpotQA_cosine_precision@5": 0.212, "eval_NanoHotpotQA_cosine_recall@1": 0.25, "eval_NanoHotpotQA_cosine_recall@10": 0.57, "eval_NanoHotpotQA_cosine_recall@3": 0.46, "eval_NanoHotpotQA_cosine_recall@5": 0.53, "eval_NanoMSMARCO_cosine_accuracy@1": 0.3, "eval_NanoMSMARCO_cosine_accuracy@10": 0.72, "eval_NanoMSMARCO_cosine_accuracy@3": 0.5, "eval_NanoMSMARCO_cosine_accuracy@5": 0.58, "eval_NanoMSMARCO_cosine_map@100": 0.44311806931639014, "eval_NanoMSMARCO_cosine_mrr@10": 0.4297380952380952, "eval_NanoMSMARCO_cosine_ndcg@10": 0.49864596951434115, "eval_NanoMSMARCO_cosine_precision@1": 0.3, "eval_NanoMSMARCO_cosine_precision@10": 0.07200000000000001, "eval_NanoMSMARCO_cosine_precision@3": 0.16666666666666669, "eval_NanoMSMARCO_cosine_precision@5": 0.11600000000000002, "eval_NanoMSMARCO_cosine_recall@1": 0.3, "eval_NanoMSMARCO_cosine_recall@10": 0.72, "eval_NanoMSMARCO_cosine_recall@3": 0.5, "eval_NanoMSMARCO_cosine_recall@5": 0.58, "eval_mse-dev_negative_mse": -83.94352722167969, "eval_runtime": 11.241, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5041657136741884, "eval_steps_per_second": 0.0, "step": 30000 }, { "epoch": 0.5707459516856915, "grad_norm": 0.8594946265220642, "learning_rate": 7.94138715658183e-05, "loss": 0.8424729156494141, "step": 30100 }, { "epoch": 0.5726421176381357, "grad_norm": 0.8535016775131226, "learning_rate": 7.930852856902074e-05, "loss": 0.8437194061279297, "step": 30200 }, { "epoch": 0.5745382835905799, "grad_norm": 0.8929939866065979, "learning_rate": 7.920318557222316e-05, "loss": 0.8429566192626953, "step": 30300 }, { "epoch": 0.576434449543024, "grad_norm": 0.7629504203796387, "learning_rate": 7.909784257542559e-05, "loss": 0.8431417846679687, "step": 30400 }, { "epoch": 0.5783306154954682, "grad_norm": 0.8285149335861206, "learning_rate": 7.899355300859598e-05, "loss": 0.8423690032958985, "step": 30500 }, { "epoch": 0.5802267814479123, "grad_norm": 0.866598904132843, "learning_rate": 7.888821001179842e-05, "loss": 0.8403389739990235, "step": 30600 }, { "epoch": 0.5821229474003565, "grad_norm": 0.8084122538566589, "learning_rate": 7.878286701500086e-05, "loss": 0.8347031402587891, "step": 30700 }, { "epoch": 0.5840191133528007, "grad_norm": 0.8977468013763428, "learning_rate": 7.867752401820328e-05, "loss": 0.8343724822998047, "step": 30800 }, { "epoch": 0.5859152793052448, "grad_norm": 0.8902882933616638, "learning_rate": 7.85721810214057e-05, "loss": 0.8348311614990235, "step": 30900 }, { "epoch": 0.5878114452576889, "grad_norm": 0.9056336283683777, "learning_rate": 7.846683802460813e-05, "loss": 0.8350757598876953, "step": 31000 }, { "epoch": 0.5878114452576889, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.39, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.78, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.5900000000000001, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6799999999999999, "eval_NanoBEIR_mean_cosine_map@100": 0.43741232846707523, "eval_NanoBEIR_mean_cosine_mrr@10": 0.5121309523809523, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.5043257907139727, "eval_NanoBEIR_mean_cosine_precision@1": 0.39, "eval_NanoBEIR_mean_cosine_precision@10": 0.096, "eval_NanoBEIR_mean_cosine_precision@3": 0.22666666666666668, "eval_NanoBEIR_mean_cosine_precision@5": 0.16399999999999998, "eval_NanoBEIR_mean_cosine_recall@1": 0.27, "eval_NanoBEIR_mean_cosine_recall@10": 0.6599999999999999, "eval_NanoBEIR_mean_cosine_recall@3": 0.46499999999999997, "eval_NanoBEIR_mean_cosine_recall@5": 0.565, "eval_NanoHotpotQA_cosine_accuracy@1": 0.48, "eval_NanoHotpotQA_cosine_accuracy@10": 0.84, "eval_NanoHotpotQA_cosine_accuracy@3": 0.68, "eval_NanoHotpotQA_cosine_accuracy@5": 0.74, "eval_NanoHotpotQA_cosine_map@100": 0.4314881589932104, "eval_NanoHotpotQA_cosine_mrr@10": 0.5937380952380952, "eval_NanoHotpotQA_cosine_ndcg@10": 0.508792106805762, "eval_NanoHotpotQA_cosine_precision@1": 0.48, "eval_NanoHotpotQA_cosine_precision@10": 0.11999999999999998, "eval_NanoHotpotQA_cosine_precision@3": 0.2866666666666667, "eval_NanoHotpotQA_cosine_precision@5": 0.204, "eval_NanoHotpotQA_cosine_recall@1": 0.24, "eval_NanoHotpotQA_cosine_recall@10": 0.6, "eval_NanoHotpotQA_cosine_recall@3": 0.43, "eval_NanoHotpotQA_cosine_recall@5": 0.51, "eval_NanoMSMARCO_cosine_accuracy@1": 0.3, "eval_NanoMSMARCO_cosine_accuracy@10": 0.72, "eval_NanoMSMARCO_cosine_accuracy@3": 0.5, "eval_NanoMSMARCO_cosine_accuracy@5": 0.62, "eval_NanoMSMARCO_cosine_map@100": 0.4433364979409401, "eval_NanoMSMARCO_cosine_mrr@10": 0.43052380952380953, "eval_NanoMSMARCO_cosine_ndcg@10": 0.4998594746221832, "eval_NanoMSMARCO_cosine_precision@1": 0.3, "eval_NanoMSMARCO_cosine_precision@10": 0.07200000000000001, "eval_NanoMSMARCO_cosine_precision@3": 0.16666666666666669, "eval_NanoMSMARCO_cosine_precision@5": 0.124, "eval_NanoMSMARCO_cosine_recall@1": 0.3, "eval_NanoMSMARCO_cosine_recall@10": 0.72, "eval_NanoMSMARCO_cosine_recall@3": 0.5, "eval_NanoMSMARCO_cosine_recall@5": 0.62, "eval_mse-dev_negative_mse": -82.8113021850586, "eval_runtime": 11.2139, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5043257907139727, "eval_steps_per_second": 0.0, "step": 31000 }, { "epoch": 0.5897076112101332, "grad_norm": 0.841434895992279, "learning_rate": 7.836149502781055e-05, "loss": 0.8361685943603515, "step": 31100 }, { "epoch": 0.5916037771625773, "grad_norm": 0.8636693358421326, "learning_rate": 7.825615203101299e-05, "loss": 0.8306892395019532, "step": 31200 }, { "epoch": 0.5934999431150214, "grad_norm": 0.9691203236579895, "learning_rate": 7.815080903421541e-05, "loss": 0.8314771270751953, "step": 31300 }, { "epoch": 0.5953961090674655, "grad_norm": 0.862746000289917, "learning_rate": 7.804546603741783e-05, "loss": 0.8310930633544922, "step": 31400 }, { "epoch": 0.5972922750199098, "grad_norm": 0.9316207766532898, "learning_rate": 7.794012304062026e-05, "loss": 0.8304837036132813, "step": 31500 }, { "epoch": 0.5991884409723539, "grad_norm": 0.8787679672241211, "learning_rate": 7.783478004382268e-05, "loss": 0.8304119873046875, "step": 31600 }, { "epoch": 0.601084606924798, "grad_norm": 0.8498113751411438, "learning_rate": 7.772943704702512e-05, "loss": 0.8277024841308593, "step": 31700 }, { "epoch": 0.6029807728772422, "grad_norm": 0.7722318768501282, "learning_rate": 7.762409405022754e-05, "loss": 0.8249209594726562, "step": 31800 }, { "epoch": 0.6048769388296864, "grad_norm": 0.8988415598869324, "learning_rate": 7.751875105342998e-05, "loss": 0.8261857604980469, "step": 31900 }, { "epoch": 0.6067731047821305, "grad_norm": 0.8066183924674988, "learning_rate": 7.74134080566324e-05, "loss": 0.8235664367675781, "step": 32000 }, { "epoch": 0.6067731047821305, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.42000000000000004, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.74, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.61, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.65, "eval_NanoBEIR_mean_cosine_map@100": 0.45000936240708805, "eval_NanoBEIR_mean_cosine_mrr@10": 0.5258571428571428, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.5033699936729163, "eval_NanoBEIR_mean_cosine_precision@1": 0.42000000000000004, "eval_NanoBEIR_mean_cosine_precision@10": 0.09199999999999998, "eval_NanoBEIR_mean_cosine_precision@3": 0.2433333333333333, "eval_NanoBEIR_mean_cosine_precision@5": 0.158, "eval_NanoBEIR_mean_cosine_recall@1": 0.28500000000000003, "eval_NanoBEIR_mean_cosine_recall@10": 0.625, "eval_NanoBEIR_mean_cosine_recall@3": 0.495, "eval_NanoBEIR_mean_cosine_recall@5": 0.535, "eval_NanoHotpotQA_cosine_accuracy@1": 0.54, "eval_NanoHotpotQA_cosine_accuracy@10": 0.82, "eval_NanoHotpotQA_cosine_accuracy@3": 0.7, "eval_NanoHotpotQA_cosine_accuracy@5": 0.74, "eval_NanoHotpotQA_cosine_map@100": 0.45760396761575023, "eval_NanoHotpotQA_cosine_mrr@10": 0.6278571428571429, "eval_NanoHotpotQA_cosine_ndcg@10": 0.525620245048735, "eval_NanoHotpotQA_cosine_precision@1": 0.54, "eval_NanoHotpotQA_cosine_precision@10": 0.11799999999999997, "eval_NanoHotpotQA_cosine_precision@3": 0.3133333333333333, "eval_NanoHotpotQA_cosine_precision@5": 0.204, "eval_NanoHotpotQA_cosine_recall@1": 0.27, "eval_NanoHotpotQA_cosine_recall@10": 0.59, "eval_NanoHotpotQA_cosine_recall@3": 0.47, "eval_NanoHotpotQA_cosine_recall@5": 0.51, "eval_NanoMSMARCO_cosine_accuracy@1": 0.3, "eval_NanoMSMARCO_cosine_accuracy@10": 0.66, "eval_NanoMSMARCO_cosine_accuracy@3": 0.52, "eval_NanoMSMARCO_cosine_accuracy@5": 0.56, "eval_NanoMSMARCO_cosine_map@100": 0.4424147571984259, "eval_NanoMSMARCO_cosine_mrr@10": 0.42385714285714277, "eval_NanoMSMARCO_cosine_ndcg@10": 0.48111974229709764, "eval_NanoMSMARCO_cosine_precision@1": 0.3, "eval_NanoMSMARCO_cosine_precision@10": 0.066, "eval_NanoMSMARCO_cosine_precision@3": 0.1733333333333333, "eval_NanoMSMARCO_cosine_precision@5": 0.11200000000000002, "eval_NanoMSMARCO_cosine_recall@1": 0.3, "eval_NanoMSMARCO_cosine_recall@10": 0.66, "eval_NanoMSMARCO_cosine_recall@3": 0.52, "eval_NanoMSMARCO_cosine_recall@5": 0.56, "eval_mse-dev_negative_mse": -81.73892211914062, "eval_runtime": 10.8876, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5033699936729163, "eval_steps_per_second": 0.0, "step": 32000 }, { "epoch": 0.6086692707345747, "grad_norm": 0.8193183541297913, "learning_rate": 7.730806505983482e-05, "loss": 0.82093994140625, "step": 32100 }, { "epoch": 0.6105654366870189, "grad_norm": 0.8819192051887512, "learning_rate": 7.720272206303726e-05, "loss": 0.8226362609863281, "step": 32200 }, { "epoch": 0.612461602639463, "grad_norm": 0.8473449349403381, "learning_rate": 7.709737906623968e-05, "loss": 0.8206555938720703, "step": 32300 }, { "epoch": 0.6143577685919072, "grad_norm": 0.8858373761177063, "learning_rate": 7.699203606944211e-05, "loss": 0.8223712921142579, "step": 32400 }, { "epoch": 0.6162539345443513, "grad_norm": 0.8924335837364197, "learning_rate": 7.688669307264453e-05, "loss": 0.8162551879882812, "step": 32500 }, { "epoch": 0.6181501004967955, "grad_norm": 0.9139745235443115, "learning_rate": 7.678135007584696e-05, "loss": 0.818095932006836, "step": 32600 }, { "epoch": 0.6200462664492397, "grad_norm": 0.8812312483787537, "learning_rate": 7.667600707904938e-05, "loss": 0.8147300720214844, "step": 32700 }, { "epoch": 0.6219424324016838, "grad_norm": 0.8906788229942322, "learning_rate": 7.657066408225182e-05, "loss": 0.8169952392578125, "step": 32800 }, { "epoch": 0.6238385983541279, "grad_norm": 0.8133891820907593, "learning_rate": 7.646532108545424e-05, "loss": 0.815572509765625, "step": 32900 }, { "epoch": 0.6257347643065722, "grad_norm": 0.8570773601531982, "learning_rate": 7.635997808865668e-05, "loss": 0.814079818725586, "step": 33000 }, { "epoch": 0.6257347643065722, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.4, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.78, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.5800000000000001, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.62, "eval_NanoBEIR_mean_cosine_map@100": 0.4399737313034061, "eval_NanoBEIR_mean_cosine_mrr@10": 0.5117857142857143, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.5063679664931301, "eval_NanoBEIR_mean_cosine_precision@1": 0.4, "eval_NanoBEIR_mean_cosine_precision@10": 0.097, "eval_NanoBEIR_mean_cosine_precision@3": 0.22666666666666668, "eval_NanoBEIR_mean_cosine_precision@5": 0.15200000000000002, "eval_NanoBEIR_mean_cosine_recall@1": 0.28, "eval_NanoBEIR_mean_cosine_recall@10": 0.665, "eval_NanoBEIR_mean_cosine_recall@3": 0.46499999999999997, "eval_NanoBEIR_mean_cosine_recall@5": 0.515, "eval_NanoHotpotQA_cosine_accuracy@1": 0.48, "eval_NanoHotpotQA_cosine_accuracy@10": 0.84, "eval_NanoHotpotQA_cosine_accuracy@3": 0.66, "eval_NanoHotpotQA_cosine_accuracy@5": 0.7, "eval_NanoHotpotQA_cosine_map@100": 0.4291126300296302, "eval_NanoHotpotQA_cosine_mrr@10": 0.5858809523809524, "eval_NanoHotpotQA_cosine_ndcg@10": 0.5085208412692355, "eval_NanoHotpotQA_cosine_precision@1": 0.48, "eval_NanoHotpotQA_cosine_precision@10": 0.122, "eval_NanoHotpotQA_cosine_precision@3": 0.2866666666666667, "eval_NanoHotpotQA_cosine_precision@5": 0.196, "eval_NanoHotpotQA_cosine_recall@1": 0.24, "eval_NanoHotpotQA_cosine_recall@10": 0.61, "eval_NanoHotpotQA_cosine_recall@3": 0.43, "eval_NanoHotpotQA_cosine_recall@5": 0.49, "eval_NanoMSMARCO_cosine_accuracy@1": 0.32, "eval_NanoMSMARCO_cosine_accuracy@10": 0.72, "eval_NanoMSMARCO_cosine_accuracy@3": 0.5, "eval_NanoMSMARCO_cosine_accuracy@5": 0.54, "eval_NanoMSMARCO_cosine_map@100": 0.450834832577182, "eval_NanoMSMARCO_cosine_mrr@10": 0.43769047619047613, "eval_NanoMSMARCO_cosine_ndcg@10": 0.5042150917170247, "eval_NanoMSMARCO_cosine_precision@1": 0.32, "eval_NanoMSMARCO_cosine_precision@10": 0.07200000000000001, "eval_NanoMSMARCO_cosine_precision@3": 0.16666666666666669, "eval_NanoMSMARCO_cosine_precision@5": 0.10800000000000003, "eval_NanoMSMARCO_cosine_recall@1": 0.32, "eval_NanoMSMARCO_cosine_recall@10": 0.72, "eval_NanoMSMARCO_cosine_recall@3": 0.5, "eval_NanoMSMARCO_cosine_recall@5": 0.54, "eval_mse-dev_negative_mse": -80.49793243408203, "eval_runtime": 11.8238, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5063679664931301, "eval_steps_per_second": 0.0, "step": 33000 }, { "epoch": 0.6276309302590163, "grad_norm": 0.7868529558181763, "learning_rate": 7.625568852182707e-05, "loss": 0.8087701416015625, "step": 33100 }, { "epoch": 0.6295270962114604, "grad_norm": 0.9016054272651672, "learning_rate": 7.61503455250295e-05, "loss": 0.8097662353515624, "step": 33200 }, { "epoch": 0.6314232621639045, "grad_norm": 0.9913731217384338, "learning_rate": 7.604500252823192e-05, "loss": 0.813260498046875, "step": 33300 }, { "epoch": 0.6333194281163488, "grad_norm": 0.8851051330566406, "learning_rate": 7.593965953143435e-05, "loss": 0.8086640167236329, "step": 33400 }, { "epoch": 0.6352155940687929, "grad_norm": 0.8317673206329346, "learning_rate": 7.583431653463678e-05, "loss": 0.8086080169677734, "step": 33500 }, { "epoch": 0.637111760021237, "grad_norm": 0.7769960165023804, "learning_rate": 7.572897353783922e-05, "loss": 0.8093731689453125, "step": 33600 }, { "epoch": 0.6390079259736812, "grad_norm": 0.8762325644493103, "learning_rate": 7.562363054104164e-05, "loss": 0.805412826538086, "step": 33700 }, { "epoch": 0.6409040919261254, "grad_norm": 0.8687974810600281, "learning_rate": 7.551828754424406e-05, "loss": 0.8043125915527344, "step": 33800 }, { "epoch": 0.6428002578785695, "grad_norm": 0.868188202381134, "learning_rate": 7.541294454744649e-05, "loss": 0.8034954833984375, "step": 33900 }, { "epoch": 0.6446964238310137, "grad_norm": 0.8662635087966919, "learning_rate": 7.530760155064892e-05, "loss": 0.799036865234375, "step": 34000 }, { "epoch": 0.6446964238310137, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.4, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.77, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.61, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.71, "eval_NanoBEIR_mean_cosine_map@100": 0.44345995358170154, "eval_NanoBEIR_mean_cosine_mrr@10": 0.5206190476190475, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.5078251449398562, "eval_NanoBEIR_mean_cosine_precision@1": 0.4, "eval_NanoBEIR_mean_cosine_precision@10": 0.096, "eval_NanoBEIR_mean_cosine_precision@3": 0.23333333333333334, "eval_NanoBEIR_mean_cosine_precision@5": 0.174, "eval_NanoBEIR_mean_cosine_recall@1": 0.28, "eval_NanoBEIR_mean_cosine_recall@10": 0.655, "eval_NanoBEIR_mean_cosine_recall@3": 0.475, "eval_NanoBEIR_mean_cosine_recall@5": 0.585, "eval_NanoHotpotQA_cosine_accuracy@1": 0.48, "eval_NanoHotpotQA_cosine_accuracy@10": 0.84, "eval_NanoHotpotQA_cosine_accuracy@3": 0.72, "eval_NanoHotpotQA_cosine_accuracy@5": 0.82, "eval_NanoHotpotQA_cosine_map@100": 0.4349260827283061, "eval_NanoHotpotQA_cosine_mrr@10": 0.6048571428571428, "eval_NanoHotpotQA_cosine_ndcg@10": 0.5166147735280449, "eval_NanoHotpotQA_cosine_precision@1": 0.48, "eval_NanoHotpotQA_cosine_precision@10": 0.122, "eval_NanoHotpotQA_cosine_precision@3": 0.3, "eval_NanoHotpotQA_cosine_precision@5": 0.22799999999999998, "eval_NanoHotpotQA_cosine_recall@1": 0.24, "eval_NanoHotpotQA_cosine_recall@10": 0.61, "eval_NanoHotpotQA_cosine_recall@3": 0.45, "eval_NanoHotpotQA_cosine_recall@5": 0.57, "eval_NanoMSMARCO_cosine_accuracy@1": 0.32, "eval_NanoMSMARCO_cosine_accuracy@10": 0.7, "eval_NanoMSMARCO_cosine_accuracy@3": 0.5, "eval_NanoMSMARCO_cosine_accuracy@5": 0.6, "eval_NanoMSMARCO_cosine_map@100": 0.45199382443509706, "eval_NanoMSMARCO_cosine_mrr@10": 0.43638095238095226, "eval_NanoMSMARCO_cosine_ndcg@10": 0.4990355163516675, "eval_NanoMSMARCO_cosine_precision@1": 0.32, "eval_NanoMSMARCO_cosine_precision@10": 0.07, "eval_NanoMSMARCO_cosine_precision@3": 0.16666666666666669, "eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002, "eval_NanoMSMARCO_cosine_recall@1": 0.32, "eval_NanoMSMARCO_cosine_recall@10": 0.7, "eval_NanoMSMARCO_cosine_recall@3": 0.5, "eval_NanoMSMARCO_cosine_recall@5": 0.6, "eval_mse-dev_negative_mse": -79.57255554199219, "eval_runtime": 11.3457, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5078251449398562, "eval_steps_per_second": 0.0, "step": 34000 }, { "epoch": 0.6465925897834578, "grad_norm": 0.8791268467903137, "learning_rate": 7.520225855385135e-05, "loss": 0.8034612274169922, "step": 34100 }, { "epoch": 0.648488755735902, "grad_norm": 0.9503916501998901, "learning_rate": 7.509691555705377e-05, "loss": 0.7990459442138672, "step": 34200 }, { "epoch": 0.6503849216883462, "grad_norm": 0.8711104393005371, "learning_rate": 7.499157256025619e-05, "loss": 0.7996244812011719, "step": 34300 }, { "epoch": 0.6522810876407903, "grad_norm": 0.8348352313041687, "learning_rate": 7.488622956345862e-05, "loss": 0.8004853820800781, "step": 34400 }, { "epoch": 0.6541772535932345, "grad_norm": 0.8777920007705688, "learning_rate": 7.478088656666105e-05, "loss": 0.8000244140625, "step": 34500 }, { "epoch": 0.6560734195456787, "grad_norm": 0.847030758857727, "learning_rate": 7.467554356986348e-05, "loss": 0.7975210571289062, "step": 34600 }, { "epoch": 0.6579695854981228, "grad_norm": 0.8619401454925537, "learning_rate": 7.457020057306591e-05, "loss": 0.7959075927734375, "step": 34700 }, { "epoch": 0.6598657514505669, "grad_norm": 0.8588744401931763, "learning_rate": 7.446485757626834e-05, "loss": 0.7920943450927734, "step": 34800 }, { "epoch": 0.6617619174030112, "grad_norm": 0.7903246879577637, "learning_rate": 7.435951457947076e-05, "loss": 0.7915798187255859, "step": 34900 }, { "epoch": 0.6636580833554553, "grad_norm": 0.9617411494255066, "learning_rate": 7.425417158267318e-05, "loss": 0.7933383178710938, "step": 35000 }, { "epoch": 0.6636580833554553, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.41000000000000003, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.75, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.62, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6799999999999999, "eval_NanoBEIR_mean_cosine_map@100": 0.45535390379706975, "eval_NanoBEIR_mean_cosine_mrr@10": 0.526079365079365, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.5121596378017648, "eval_NanoBEIR_mean_cosine_precision@1": 0.41000000000000003, "eval_NanoBEIR_mean_cosine_precision@10": 0.094, "eval_NanoBEIR_mean_cosine_precision@3": 0.2433333333333333, "eval_NanoBEIR_mean_cosine_precision@5": 0.16199999999999998, "eval_NanoBEIR_mean_cosine_recall@1": 0.29000000000000004, "eval_NanoBEIR_mean_cosine_recall@10": 0.645, "eval_NanoBEIR_mean_cosine_recall@3": 0.495, "eval_NanoBEIR_mean_cosine_recall@5": 0.55, "eval_NanoHotpotQA_cosine_accuracy@1": 0.48, "eval_NanoHotpotQA_cosine_accuracy@10": 0.8, "eval_NanoHotpotQA_cosine_accuracy@3": 0.72, "eval_NanoHotpotQA_cosine_accuracy@5": 0.78, "eval_NanoHotpotQA_cosine_map@100": 0.44473741551922635, "eval_NanoHotpotQA_cosine_mrr@10": 0.6006666666666667, "eval_NanoHotpotQA_cosine_ndcg@10": 0.5139083384213542, "eval_NanoHotpotQA_cosine_precision@1": 0.48, "eval_NanoHotpotQA_cosine_precision@10": 0.118, "eval_NanoHotpotQA_cosine_precision@3": 0.3133333333333333, "eval_NanoHotpotQA_cosine_precision@5": 0.20799999999999996, "eval_NanoHotpotQA_cosine_recall@1": 0.24, "eval_NanoHotpotQA_cosine_recall@10": 0.59, "eval_NanoHotpotQA_cosine_recall@3": 0.47, "eval_NanoHotpotQA_cosine_recall@5": 0.52, "eval_NanoMSMARCO_cosine_accuracy@1": 0.34, "eval_NanoMSMARCO_cosine_accuracy@10": 0.7, "eval_NanoMSMARCO_cosine_accuracy@3": 0.52, "eval_NanoMSMARCO_cosine_accuracy@5": 0.58, "eval_NanoMSMARCO_cosine_map@100": 0.4659703920749132, "eval_NanoMSMARCO_cosine_mrr@10": 0.4514920634920634, "eval_NanoMSMARCO_cosine_ndcg@10": 0.5104109371821753, "eval_NanoMSMARCO_cosine_precision@1": 0.34, "eval_NanoMSMARCO_cosine_precision@10": 0.07, "eval_NanoMSMARCO_cosine_precision@3": 0.1733333333333333, "eval_NanoMSMARCO_cosine_precision@5": 0.11600000000000002, "eval_NanoMSMARCO_cosine_recall@1": 0.34, "eval_NanoMSMARCO_cosine_recall@10": 0.7, "eval_NanoMSMARCO_cosine_recall@3": 0.52, "eval_NanoMSMARCO_cosine_recall@5": 0.58, "eval_mse-dev_negative_mse": -78.7884292602539, "eval_runtime": 10.7989, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5121596378017648, "eval_steps_per_second": 0.0, "step": 35000 }, { "epoch": 0.6655542493078994, "grad_norm": 0.8038257956504822, "learning_rate": 7.414882858587562e-05, "loss": 0.7908313751220704, "step": 35100 }, { "epoch": 0.6674504152603435, "grad_norm": 0.8573588132858276, "learning_rate": 7.404453901904602e-05, "loss": 0.7913258361816407, "step": 35200 }, { "epoch": 0.6693465812127878, "grad_norm": 0.829589307308197, "learning_rate": 7.393919602224845e-05, "loss": 0.7921287536621093, "step": 35300 }, { "epoch": 0.6712427471652319, "grad_norm": 0.8911552429199219, "learning_rate": 7.383385302545088e-05, "loss": 0.7928565979003906, "step": 35400 }, { "epoch": 0.673138913117676, "grad_norm": 0.9379572868347168, "learning_rate": 7.37285100286533e-05, "loss": 0.7914694213867187, "step": 35500 }, { "epoch": 0.6750350790701202, "grad_norm": 0.9253071546554565, "learning_rate": 7.362316703185572e-05, "loss": 0.7871210479736328, "step": 35600 }, { "epoch": 0.6769312450225644, "grad_norm": 0.9133068323135376, "learning_rate": 7.351782403505816e-05, "loss": 0.7835692596435547, "step": 35700 }, { "epoch": 0.6788274109750085, "grad_norm": 0.8401673436164856, "learning_rate": 7.341248103826058e-05, "loss": 0.7804772186279297, "step": 35800 }, { "epoch": 0.6807235769274527, "grad_norm": 0.8454675674438477, "learning_rate": 7.330713804146301e-05, "loss": 0.7870156860351563, "step": 35900 }, { "epoch": 0.6826197428798968, "grad_norm": 0.83338463306427, "learning_rate": 7.320179504466543e-05, "loss": 0.7796939849853516, "step": 36000 }, { "epoch": 0.6826197428798968, "eval_NanoBEIR_mean_cosine_accuracy@1": 0.42000000000000004, "eval_NanoBEIR_mean_cosine_accuracy@10": 0.8, "eval_NanoBEIR_mean_cosine_accuracy@3": 0.64, "eval_NanoBEIR_mean_cosine_accuracy@5": 0.69, "eval_NanoBEIR_mean_cosine_map@100": 0.46799146568426697, "eval_NanoBEIR_mean_cosine_mrr@10": 0.5508928571428571, "eval_NanoBEIR_mean_cosine_ndcg@10": 0.5353904032358002, "eval_NanoBEIR_mean_cosine_precision@1": 0.42000000000000004, "eval_NanoBEIR_mean_cosine_precision@10": 0.099, "eval_NanoBEIR_mean_cosine_precision@3": 0.2533333333333333, "eval_NanoBEIR_mean_cosine_precision@5": 0.17, "eval_NanoBEIR_mean_cosine_recall@1": 0.29000000000000004, "eval_NanoBEIR_mean_cosine_recall@10": 0.685, "eval_NanoBEIR_mean_cosine_recall@3": 0.51, "eval_NanoBEIR_mean_cosine_recall@5": 0.575, "eval_NanoHotpotQA_cosine_accuracy@1": 0.52, "eval_NanoHotpotQA_cosine_accuracy@10": 0.84, "eval_NanoHotpotQA_cosine_accuracy@3": 0.76, "eval_NanoHotpotQA_cosine_accuracy@5": 0.78, "eval_NanoHotpotQA_cosine_map@100": 0.47358422601023775, "eval_NanoHotpotQA_cosine_mrr@10": 0.6494444444444444, "eval_NanoHotpotQA_cosine_ndcg@10": 0.5456863439791646, "eval_NanoHotpotQA_cosine_precision@1": 0.52, "eval_NanoHotpotQA_cosine_precision@10": 0.122, "eval_NanoHotpotQA_cosine_precision@3": 0.33333333333333326, "eval_NanoHotpotQA_cosine_precision@5": 0.22, "eval_NanoHotpotQA_cosine_recall@1": 0.26, "eval_NanoHotpotQA_cosine_recall@10": 0.61, "eval_NanoHotpotQA_cosine_recall@3": 0.5, "eval_NanoHotpotQA_cosine_recall@5": 0.55, "eval_NanoMSMARCO_cosine_accuracy@1": 0.32, "eval_NanoMSMARCO_cosine_accuracy@10": 0.76, "eval_NanoMSMARCO_cosine_accuracy@3": 0.52, "eval_NanoMSMARCO_cosine_accuracy@5": 0.6, "eval_NanoMSMARCO_cosine_map@100": 0.4623987053582962, "eval_NanoMSMARCO_cosine_mrr@10": 0.4523412698412697, "eval_NanoMSMARCO_cosine_ndcg@10": 0.5250944624924359, "eval_NanoMSMARCO_cosine_precision@1": 0.32, "eval_NanoMSMARCO_cosine_precision@10": 0.07600000000000001, "eval_NanoMSMARCO_cosine_precision@3": 0.1733333333333333, "eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002, "eval_NanoMSMARCO_cosine_recall@1": 0.32, "eval_NanoMSMARCO_cosine_recall@10": 0.76, "eval_NanoMSMARCO_cosine_recall@3": 0.52, "eval_NanoMSMARCO_cosine_recall@5": 0.6, "eval_mse-dev_negative_mse": -77.74003601074219, "eval_runtime": 11.1488, "eval_samples_per_second": 0.0, "eval_sequential_score": 0.5353904032358002, "eval_steps_per_second": 0.0, "step": 36000 } ], "logging_steps": 100, "max_steps": 105476, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }