ModernBERT-small-v2 / trainer_state.json
johnnyboycurtis's picture
Upload folder using huggingface_hub
3a7af12 verified
{
"best_global_step": 36000,
"best_metric": 0.5250944624924359,
"best_model_checkpoint": "ModernBERT-small-distilled-v2/checkpoint-36000",
"epoch": 0.6826197428798968,
"eval_steps": 1000,
"global_step": 36000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0018961659524441578,
"grad_norm": 1.531716227531433,
"learning_rate": 9.38566552901024e-07,
"loss": 4.2698190307617185,
"step": 100
},
{
"epoch": 0.0037923319048883157,
"grad_norm": 1.45695960521698,
"learning_rate": 1.8866135760333712e-06,
"loss": 4.230399475097657,
"step": 200
},
{
"epoch": 0.005688497857332474,
"grad_norm": 1.4260753393173218,
"learning_rate": 2.8346605991657187e-06,
"loss": 4.128007202148438,
"step": 300
},
{
"epoch": 0.007584663809776631,
"grad_norm": 1.9604460000991821,
"learning_rate": 3.7827076222980664e-06,
"loss": 3.8576431274414062,
"step": 400
},
{
"epoch": 0.00948082976222079,
"grad_norm": 0.8248822689056396,
"learning_rate": 4.730754645430414e-06,
"loss": 3.1561373901367187,
"step": 500
},
{
"epoch": 0.011376995714664948,
"grad_norm": 0.6517618894577026,
"learning_rate": 5.678801668562761e-06,
"loss": 2.552709503173828,
"step": 600
},
{
"epoch": 0.013273161667109106,
"grad_norm": 0.5630219578742981,
"learning_rate": 6.626848691695109e-06,
"loss": 2.327459716796875,
"step": 700
},
{
"epoch": 0.015169327619553263,
"grad_norm": 0.4430118799209595,
"learning_rate": 7.574895714827455e-06,
"loss": 2.2655821228027344,
"step": 800
},
{
"epoch": 0.017065493571997423,
"grad_norm": 0.5574463605880737,
"learning_rate": 8.522942737959804e-06,
"loss": 2.2401161193847656,
"step": 900
},
{
"epoch": 0.01896165952444158,
"grad_norm": 0.48447561264038086,
"learning_rate": 9.47098976109215e-06,
"loss": 2.22558349609375,
"step": 1000
},
{
"epoch": 0.01896165952444158,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.01,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.16999999999999998,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.04,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.06999999999999999,
"eval_NanoBEIR_mean_cosine_map@100": 0.030615740568451958,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.04062301587301588,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.05451501792713928,
"eval_NanoBEIR_mean_cosine_precision@1": 0.01,
"eval_NanoBEIR_mean_cosine_precision@10": 0.018000000000000002,
"eval_NanoBEIR_mean_cosine_precision@3": 0.013333333333333332,
"eval_NanoBEIR_mean_cosine_precision@5": 0.014,
"eval_NanoBEIR_mean_cosine_recall@1": 0.005,
"eval_NanoBEIR_mean_cosine_recall@10": 0.13,
"eval_NanoBEIR_mean_cosine_recall@3": 0.02,
"eval_NanoBEIR_mean_cosine_recall@5": 0.04,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.02,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.18,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.08,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.12,
"eval_NanoHotpotQA_cosine_map@100": 0.03479001595433966,
"eval_NanoHotpotQA_cosine_mrr@10": 0.0601904761904762,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.05765775039428842,
"eval_NanoHotpotQA_cosine_precision@1": 0.02,
"eval_NanoHotpotQA_cosine_precision@10": 0.020000000000000004,
"eval_NanoHotpotQA_cosine_precision@3": 0.026666666666666665,
"eval_NanoHotpotQA_cosine_precision@5": 0.024,
"eval_NanoHotpotQA_cosine_recall@1": 0.01,
"eval_NanoHotpotQA_cosine_recall@10": 0.1,
"eval_NanoHotpotQA_cosine_recall@3": 0.04,
"eval_NanoHotpotQA_cosine_recall@5": 0.06,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.0,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.16,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.0,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.02,
"eval_NanoMSMARCO_cosine_map@100": 0.026441465182564253,
"eval_NanoMSMARCO_cosine_mrr@10": 0.021055555555555557,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.05137228545999013,
"eval_NanoMSMARCO_cosine_precision@1": 0.0,
"eval_NanoMSMARCO_cosine_precision@10": 0.016,
"eval_NanoMSMARCO_cosine_precision@3": 0.0,
"eval_NanoMSMARCO_cosine_precision@5": 0.004,
"eval_NanoMSMARCO_cosine_recall@1": 0.0,
"eval_NanoMSMARCO_cosine_recall@10": 0.16,
"eval_NanoMSMARCO_cosine_recall@3": 0.0,
"eval_NanoMSMARCO_cosine_recall@5": 0.02,
"eval_mse-dev_negative_mse": -221.21437072753906,
"eval_runtime": 11.2871,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.05451501792713928,
"eval_steps_per_second": 0.0,
"step": 1000
},
{
"epoch": 0.020857825476885736,
"grad_norm": 0.6331929564476013,
"learning_rate": 1.0419036784224499e-05,
"loss": 2.213970031738281,
"step": 1100
},
{
"epoch": 0.022753991429329896,
"grad_norm": 0.501175045967102,
"learning_rate": 1.1367083807356845e-05,
"loss": 2.191977081298828,
"step": 1200
},
{
"epoch": 0.024650157381774052,
"grad_norm": 0.5054857134819031,
"learning_rate": 1.2315130830489193e-05,
"loss": 2.1839501953125,
"step": 1300
},
{
"epoch": 0.026546323334218212,
"grad_norm": 0.6071318984031677,
"learning_rate": 1.326317785362154e-05,
"loss": 2.1661726379394532,
"step": 1400
},
{
"epoch": 0.02844248928666237,
"grad_norm": 0.508758008480072,
"learning_rate": 1.4211224876753888e-05,
"loss": 2.1598078918457033,
"step": 1500
},
{
"epoch": 0.030338655239106525,
"grad_norm": 0.7203693985939026,
"learning_rate": 1.5159271899886234e-05,
"loss": 2.145241394042969,
"step": 1600
},
{
"epoch": 0.03223482119155068,
"grad_norm": 0.5547841787338257,
"learning_rate": 1.6107318923018582e-05,
"loss": 2.122596435546875,
"step": 1700
},
{
"epoch": 0.034130987143994845,
"grad_norm": 0.7341112494468689,
"learning_rate": 1.705536594615093e-05,
"loss": 2.106784210205078,
"step": 1800
},
{
"epoch": 0.036027153096439,
"grad_norm": 0.6560561656951904,
"learning_rate": 1.800341296928328e-05,
"loss": 2.0941481018066406,
"step": 1900
},
{
"epoch": 0.03792331904888316,
"grad_norm": 0.7147130966186523,
"learning_rate": 1.8951459992415623e-05,
"loss": 2.0796484375,
"step": 2000
},
{
"epoch": 0.03792331904888316,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.06,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.2,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.14,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.15,
"eval_NanoBEIR_mean_cosine_map@100": 0.09355619049166879,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.10269444444444445,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.10765372452496824,
"eval_NanoBEIR_mean_cosine_precision@1": 0.06,
"eval_NanoBEIR_mean_cosine_precision@10": 0.021,
"eval_NanoBEIR_mean_cosine_precision@3": 0.04666666666666666,
"eval_NanoBEIR_mean_cosine_precision@5": 0.030000000000000002,
"eval_NanoBEIR_mean_cosine_recall@1": 0.045,
"eval_NanoBEIR_mean_cosine_recall@10": 0.16499999999999998,
"eval_NanoBEIR_mean_cosine_recall@3": 0.11499999999999999,
"eval_NanoBEIR_mean_cosine_recall@5": 0.12,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.06,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.16,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.1,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.12,
"eval_NanoHotpotQA_cosine_map@100": 0.057732668001867715,
"eval_NanoHotpotQA_cosine_mrr@10": 0.08650000000000001,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.0671827764380485,
"eval_NanoHotpotQA_cosine_precision@1": 0.06,
"eval_NanoHotpotQA_cosine_precision@10": 0.018,
"eval_NanoHotpotQA_cosine_precision@3": 0.03333333333333333,
"eval_NanoHotpotQA_cosine_precision@5": 0.024,
"eval_NanoHotpotQA_cosine_recall@1": 0.03,
"eval_NanoHotpotQA_cosine_recall@10": 0.09,
"eval_NanoHotpotQA_cosine_recall@3": 0.05,
"eval_NanoHotpotQA_cosine_recall@5": 0.06,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.06,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.24,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.18,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.18,
"eval_NanoMSMARCO_cosine_map@100": 0.12937971298146986,
"eval_NanoMSMARCO_cosine_mrr@10": 0.11888888888888888,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.14812467261188797,
"eval_NanoMSMARCO_cosine_precision@1": 0.06,
"eval_NanoMSMARCO_cosine_precision@10": 0.024000000000000004,
"eval_NanoMSMARCO_cosine_precision@3": 0.06,
"eval_NanoMSMARCO_cosine_precision@5": 0.036000000000000004,
"eval_NanoMSMARCO_cosine_recall@1": 0.06,
"eval_NanoMSMARCO_cosine_recall@10": 0.24,
"eval_NanoMSMARCO_cosine_recall@3": 0.18,
"eval_NanoMSMARCO_cosine_recall@5": 0.18,
"eval_mse-dev_negative_mse": -206.88653564453125,
"eval_runtime": 12.6634,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.10765372452496824,
"eval_steps_per_second": 0.0,
"step": 2000
},
{
"epoch": 0.039819485001327315,
"grad_norm": 0.6483538746833801,
"learning_rate": 1.989950701554797e-05,
"loss": 2.062061767578125,
"step": 2100
},
{
"epoch": 0.04171565095377147,
"grad_norm": 0.7314621806144714,
"learning_rate": 2.084755403868032e-05,
"loss": 2.054515838623047,
"step": 2200
},
{
"epoch": 0.043611816906215635,
"grad_norm": 0.7281008362770081,
"learning_rate": 2.1795601061812668e-05,
"loss": 2.0381907653808593,
"step": 2300
},
{
"epoch": 0.04550798285865979,
"grad_norm": 0.7791172862052917,
"learning_rate": 2.2743648084945016e-05,
"loss": 2.0266854858398435,
"step": 2400
},
{
"epoch": 0.04740414881110395,
"grad_norm": 0.7451071739196777,
"learning_rate": 2.369169510807736e-05,
"loss": 2.016678466796875,
"step": 2500
},
{
"epoch": 0.049300314763548105,
"grad_norm": 0.8240593671798706,
"learning_rate": 2.463974213120971e-05,
"loss": 2.004122619628906,
"step": 2600
},
{
"epoch": 0.05119648071599226,
"grad_norm": 0.8770548701286316,
"learning_rate": 2.5587789154342057e-05,
"loss": 1.990180206298828,
"step": 2700
},
{
"epoch": 0.053092646668436425,
"grad_norm": 0.8051754236221313,
"learning_rate": 2.6535836177474405e-05,
"loss": 1.9746481323242187,
"step": 2800
},
{
"epoch": 0.05498881262088058,
"grad_norm": 0.8228394389152527,
"learning_rate": 2.7483883200606753e-05,
"loss": 1.9650479125976563,
"step": 2900
},
{
"epoch": 0.05688497857332474,
"grad_norm": 0.9059156775474548,
"learning_rate": 2.84319302237391e-05,
"loss": 1.9538874816894531,
"step": 3000
},
{
"epoch": 0.05688497857332474,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.05,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.27,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.12000000000000001,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.18,
"eval_NanoBEIR_mean_cosine_map@100": 0.10268455521269725,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.10739682539682541,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.12427348063841058,
"eval_NanoBEIR_mean_cosine_precision@1": 0.05,
"eval_NanoBEIR_mean_cosine_precision@10": 0.031,
"eval_NanoBEIR_mean_cosine_precision@3": 0.043333333333333335,
"eval_NanoBEIR_mean_cosine_precision@5": 0.042,
"eval_NanoBEIR_mean_cosine_recall@1": 0.035,
"eval_NanoBEIR_mean_cosine_recall@10": 0.215,
"eval_NanoBEIR_mean_cosine_recall@3": 0.09,
"eval_NanoBEIR_mean_cosine_recall@5": 0.14500000000000002,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.06,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.3,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.14,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.2,
"eval_NanoHotpotQA_cosine_map@100": 0.09775604930816952,
"eval_NanoHotpotQA_cosine_mrr@10": 0.12591269841269842,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.1241974731265571,
"eval_NanoHotpotQA_cosine_precision@1": 0.06,
"eval_NanoHotpotQA_cosine_precision@10": 0.038,
"eval_NanoHotpotQA_cosine_precision@3": 0.05333333333333333,
"eval_NanoHotpotQA_cosine_precision@5": 0.052000000000000005,
"eval_NanoHotpotQA_cosine_recall@1": 0.03,
"eval_NanoHotpotQA_cosine_recall@10": 0.19,
"eval_NanoHotpotQA_cosine_recall@3": 0.08,
"eval_NanoHotpotQA_cosine_recall@5": 0.13,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.04,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.24,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.1,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.16,
"eval_NanoMSMARCO_cosine_map@100": 0.10761306111722498,
"eval_NanoMSMARCO_cosine_mrr@10": 0.0888809523809524,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.12434948815026406,
"eval_NanoMSMARCO_cosine_precision@1": 0.04,
"eval_NanoMSMARCO_cosine_precision@10": 0.024,
"eval_NanoMSMARCO_cosine_precision@3": 0.03333333333333333,
"eval_NanoMSMARCO_cosine_precision@5": 0.032,
"eval_NanoMSMARCO_cosine_recall@1": 0.04,
"eval_NanoMSMARCO_cosine_recall@10": 0.24,
"eval_NanoMSMARCO_cosine_recall@3": 0.1,
"eval_NanoMSMARCO_cosine_recall@5": 0.16,
"eval_mse-dev_negative_mse": -194.54396057128906,
"eval_runtime": 11.1789,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.12427348063841058,
"eval_steps_per_second": 0.0,
"step": 3000
},
{
"epoch": 0.058781144525768894,
"grad_norm": 1.0623186826705933,
"learning_rate": 2.937997724687145e-05,
"loss": 1.9401417541503907,
"step": 3100
},
{
"epoch": 0.06067731047821305,
"grad_norm": 0.8394317030906677,
"learning_rate": 3.032802427000379e-05,
"loss": 1.93172607421875,
"step": 3200
},
{
"epoch": 0.06257347643065721,
"grad_norm": 0.7523216009140015,
"learning_rate": 3.127607129313614e-05,
"loss": 1.9180873107910157,
"step": 3300
},
{
"epoch": 0.06446964238310136,
"grad_norm": 0.8299034237861633,
"learning_rate": 3.222411831626849e-05,
"loss": 1.9097779846191407,
"step": 3400
},
{
"epoch": 0.06636580833554552,
"grad_norm": 0.7642733454704285,
"learning_rate": 3.3172165339400835e-05,
"loss": 1.8983055114746095,
"step": 3500
},
{
"epoch": 0.06826197428798969,
"grad_norm": 0.806705892086029,
"learning_rate": 3.412021236253318e-05,
"loss": 1.8924456787109376,
"step": 3600
},
{
"epoch": 0.07015814024043385,
"grad_norm": 0.786217212677002,
"learning_rate": 3.506825938566553e-05,
"loss": 1.8805953979492187,
"step": 3700
},
{
"epoch": 0.072054306192878,
"grad_norm": 0.8994006514549255,
"learning_rate": 3.601630640879788e-05,
"loss": 1.8717079162597656,
"step": 3800
},
{
"epoch": 0.07395047214532216,
"grad_norm": 0.856419026851654,
"learning_rate": 3.696435343193023e-05,
"loss": 1.8591105651855468,
"step": 3900
},
{
"epoch": 0.07584663809776632,
"grad_norm": 0.9824651479721069,
"learning_rate": 3.7912400455062576e-05,
"loss": 1.8524658203125,
"step": 4000
},
{
"epoch": 0.07584663809776632,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.06,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.38,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.19,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.28,
"eval_NanoBEIR_mean_cosine_map@100": 0.1284317459612893,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.14647222222222223,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.16958431091390092,
"eval_NanoBEIR_mean_cosine_precision@1": 0.06,
"eval_NanoBEIR_mean_cosine_precision@10": 0.044000000000000004,
"eval_NanoBEIR_mean_cosine_precision@3": 0.06666666666666665,
"eval_NanoBEIR_mean_cosine_precision@5": 0.06200000000000001,
"eval_NanoBEIR_mean_cosine_recall@1": 0.04,
"eval_NanoBEIR_mean_cosine_recall@10": 0.30000000000000004,
"eval_NanoBEIR_mean_cosine_recall@3": 0.14,
"eval_NanoBEIR_mean_cosine_recall@5": 0.215,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.08,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.44,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.22,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.32,
"eval_NanoHotpotQA_cosine_map@100": 0.12429452794406634,
"eval_NanoHotpotQA_cosine_mrr@10": 0.17600000000000002,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.1744798681125654,
"eval_NanoHotpotQA_cosine_precision@1": 0.08,
"eval_NanoHotpotQA_cosine_precision@10": 0.05600000000000001,
"eval_NanoHotpotQA_cosine_precision@3": 0.07999999999999999,
"eval_NanoHotpotQA_cosine_precision@5": 0.07600000000000001,
"eval_NanoHotpotQA_cosine_recall@1": 0.04,
"eval_NanoHotpotQA_cosine_recall@10": 0.28,
"eval_NanoHotpotQA_cosine_recall@3": 0.12,
"eval_NanoHotpotQA_cosine_recall@5": 0.19,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.04,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.32,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.16,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.24,
"eval_NanoMSMARCO_cosine_map@100": 0.1325689639785123,
"eval_NanoMSMARCO_cosine_mrr@10": 0.11694444444444443,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.16468875371523642,
"eval_NanoMSMARCO_cosine_precision@1": 0.04,
"eval_NanoMSMARCO_cosine_precision@10": 0.032,
"eval_NanoMSMARCO_cosine_precision@3": 0.05333333333333333,
"eval_NanoMSMARCO_cosine_precision@5": 0.04800000000000001,
"eval_NanoMSMARCO_cosine_recall@1": 0.04,
"eval_NanoMSMARCO_cosine_recall@10": 0.32,
"eval_NanoMSMARCO_cosine_recall@3": 0.16,
"eval_NanoMSMARCO_cosine_recall@5": 0.24,
"eval_mse-dev_negative_mse": -184.20260620117188,
"eval_runtime": 11.2486,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.16958431091390092,
"eval_steps_per_second": 0.0,
"step": 4000
},
{
"epoch": 0.07774280405021047,
"grad_norm": 0.7716678380966187,
"learning_rate": 3.8860447478194924e-05,
"loss": 1.8416305541992188,
"step": 4100
},
{
"epoch": 0.07963897000265463,
"grad_norm": 0.8711826801300049,
"learning_rate": 3.980849450132727e-05,
"loss": 1.8359121704101562,
"step": 4200
},
{
"epoch": 0.08153513595509879,
"grad_norm": 0.9473533630371094,
"learning_rate": 4.075654152445961e-05,
"loss": 1.825589141845703,
"step": 4300
},
{
"epoch": 0.08343130190754294,
"grad_norm": 0.8626433610916138,
"learning_rate": 4.170458854759196e-05,
"loss": 1.8131285095214844,
"step": 4400
},
{
"epoch": 0.0853274678599871,
"grad_norm": 0.9295884370803833,
"learning_rate": 4.265263557072431e-05,
"loss": 1.8063204956054688,
"step": 4500
},
{
"epoch": 0.08722363381243127,
"grad_norm": 0.9008107781410217,
"learning_rate": 4.360068259385666e-05,
"loss": 1.7949688720703125,
"step": 4600
},
{
"epoch": 0.08911979976487543,
"grad_norm": 0.791011393070221,
"learning_rate": 4.4548729616989006e-05,
"loss": 1.7845721435546875,
"step": 4700
},
{
"epoch": 0.09101596571731958,
"grad_norm": 0.7334835529327393,
"learning_rate": 4.5496776640121354e-05,
"loss": 1.7761888122558593,
"step": 4800
},
{
"epoch": 0.09291213166976374,
"grad_norm": 0.9481487274169922,
"learning_rate": 4.64448236632537e-05,
"loss": 1.7620162963867188,
"step": 4900
},
{
"epoch": 0.0948082976222079,
"grad_norm": 0.870833694934845,
"learning_rate": 4.739287068638605e-05,
"loss": 1.7605400085449219,
"step": 5000
},
{
"epoch": 0.0948082976222079,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.1,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.41000000000000003,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.28,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.33999999999999997,
"eval_NanoBEIR_mean_cosine_map@100": 0.1626079879266355,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.19677777777777777,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.19920527873932037,
"eval_NanoBEIR_mean_cosine_precision@1": 0.1,
"eval_NanoBEIR_mean_cosine_precision@10": 0.045000000000000005,
"eval_NanoBEIR_mean_cosine_precision@3": 0.09666666666666665,
"eval_NanoBEIR_mean_cosine_precision@5": 0.07,
"eval_NanoBEIR_mean_cosine_recall@1": 0.07,
"eval_NanoBEIR_mean_cosine_recall@10": 0.305,
"eval_NanoBEIR_mean_cosine_recall@3": 0.2,
"eval_NanoBEIR_mean_cosine_recall@5": 0.24,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.12,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.5,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.34,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.42,
"eval_NanoHotpotQA_cosine_map@100": 0.15098296694670035,
"eval_NanoHotpotQA_cosine_mrr@10": 0.23677777777777778,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.20242024631804575,
"eval_NanoHotpotQA_cosine_precision@1": 0.12,
"eval_NanoHotpotQA_cosine_precision@10": 0.05800000000000001,
"eval_NanoHotpotQA_cosine_precision@3": 0.11999999999999998,
"eval_NanoHotpotQA_cosine_precision@5": 0.08800000000000001,
"eval_NanoHotpotQA_cosine_recall@1": 0.06,
"eval_NanoHotpotQA_cosine_recall@10": 0.29,
"eval_NanoHotpotQA_cosine_recall@3": 0.18,
"eval_NanoHotpotQA_cosine_recall@5": 0.22,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.08,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.32,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.22,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.26,
"eval_NanoMSMARCO_cosine_map@100": 0.17423300890657065,
"eval_NanoMSMARCO_cosine_mrr@10": 0.15677777777777777,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.195990311160595,
"eval_NanoMSMARCO_cosine_precision@1": 0.08,
"eval_NanoMSMARCO_cosine_precision@10": 0.032,
"eval_NanoMSMARCO_cosine_precision@3": 0.07333333333333333,
"eval_NanoMSMARCO_cosine_precision@5": 0.052000000000000005,
"eval_NanoMSMARCO_cosine_recall@1": 0.08,
"eval_NanoMSMARCO_cosine_recall@10": 0.32,
"eval_NanoMSMARCO_cosine_recall@3": 0.22,
"eval_NanoMSMARCO_cosine_recall@5": 0.26,
"eval_mse-dev_negative_mse": -175.1685333251953,
"eval_runtime": 11.7971,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.19920527873932037,
"eval_steps_per_second": 0.0,
"step": 5000
},
{
"epoch": 0.09670446357465205,
"grad_norm": 0.9423368573188782,
"learning_rate": 4.83409177095184e-05,
"loss": 1.7480519104003907,
"step": 5100
},
{
"epoch": 0.09860062952709621,
"grad_norm": 0.980880856513977,
"learning_rate": 4.9288964732650746e-05,
"loss": 1.7419432067871095,
"step": 5200
},
{
"epoch": 0.10049679547954037,
"grad_norm": 0.8834021687507629,
"learning_rate": 5.0237011755783095e-05,
"loss": 1.730076446533203,
"step": 5300
},
{
"epoch": 0.10239296143198452,
"grad_norm": 0.9464291930198669,
"learning_rate": 5.118505877891544e-05,
"loss": 1.727989959716797,
"step": 5400
},
{
"epoch": 0.10428912738442868,
"grad_norm": 0.9521955251693726,
"learning_rate": 5.213310580204779e-05,
"loss": 1.7130671691894532,
"step": 5500
},
{
"epoch": 0.10618529333687285,
"grad_norm": 0.8180538415908813,
"learning_rate": 5.308115282518014e-05,
"loss": 1.7063189697265626,
"step": 5600
},
{
"epoch": 0.108081459289317,
"grad_norm": 0.9113965034484863,
"learning_rate": 5.402919984831249e-05,
"loss": 1.695858917236328,
"step": 5700
},
{
"epoch": 0.10997762524176116,
"grad_norm": 0.8348143100738525,
"learning_rate": 5.497724687144482e-05,
"loss": 1.6884242248535157,
"step": 5800
},
{
"epoch": 0.11187379119420532,
"grad_norm": 1.00839364528656,
"learning_rate": 5.592529389457717e-05,
"loss": 1.6800929260253907,
"step": 5900
},
{
"epoch": 0.11376995714664948,
"grad_norm": 0.9566198587417603,
"learning_rate": 5.687334091770952e-05,
"loss": 1.6699765014648438,
"step": 6000
},
{
"epoch": 0.11376995714664948,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.15,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.42000000000000004,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.29000000000000004,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.35,
"eval_NanoBEIR_mean_cosine_map@100": 0.19699705475888196,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.2369126984126984,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.2321280636169713,
"eval_NanoBEIR_mean_cosine_precision@1": 0.15,
"eval_NanoBEIR_mean_cosine_precision@10": 0.04700000000000001,
"eval_NanoBEIR_mean_cosine_precision@3": 0.09999999999999999,
"eval_NanoBEIR_mean_cosine_precision@5": 0.07600000000000001,
"eval_NanoBEIR_mean_cosine_recall@1": 0.105,
"eval_NanoBEIR_mean_cosine_recall@10": 0.33,
"eval_NanoBEIR_mean_cosine_recall@3": 0.22000000000000003,
"eval_NanoBEIR_mean_cosine_recall@5": 0.27,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.18,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.46,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.3,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.38,
"eval_NanoHotpotQA_cosine_map@100": 0.17208604019775084,
"eval_NanoHotpotQA_cosine_mrr@10": 0.2664126984126984,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.21500115424853145,
"eval_NanoHotpotQA_cosine_precision@1": 0.18,
"eval_NanoHotpotQA_cosine_precision@10": 0.05600000000000001,
"eval_NanoHotpotQA_cosine_precision@3": 0.10666666666666666,
"eval_NanoHotpotQA_cosine_precision@5": 0.08800000000000001,
"eval_NanoHotpotQA_cosine_recall@1": 0.09,
"eval_NanoHotpotQA_cosine_recall@10": 0.28,
"eval_NanoHotpotQA_cosine_recall@3": 0.16,
"eval_NanoHotpotQA_cosine_recall@5": 0.22,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.12,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.38,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.28,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.32,
"eval_NanoMSMARCO_cosine_map@100": 0.2219080693200131,
"eval_NanoMSMARCO_cosine_mrr@10": 0.20741269841269844,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.24925497298541116,
"eval_NanoMSMARCO_cosine_precision@1": 0.12,
"eval_NanoMSMARCO_cosine_precision@10": 0.038000000000000006,
"eval_NanoMSMARCO_cosine_precision@3": 0.09333333333333332,
"eval_NanoMSMARCO_cosine_precision@5": 0.064,
"eval_NanoMSMARCO_cosine_recall@1": 0.12,
"eval_NanoMSMARCO_cosine_recall@10": 0.38,
"eval_NanoMSMARCO_cosine_recall@3": 0.28,
"eval_NanoMSMARCO_cosine_recall@5": 0.32,
"eval_mse-dev_negative_mse": -166.4923858642578,
"eval_runtime": 10.5014,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.2321280636169713,
"eval_steps_per_second": 0.0,
"step": 6000
},
{
"epoch": 0.11566612309909363,
"grad_norm": 0.8698049783706665,
"learning_rate": 5.7821387940841866e-05,
"loss": 1.6636680603027343,
"step": 6100
},
{
"epoch": 0.11756228905153779,
"grad_norm": 0.88554447889328,
"learning_rate": 5.8769434963974214e-05,
"loss": 1.6543186950683593,
"step": 6200
},
{
"epoch": 0.11945845500398194,
"grad_norm": 0.9408504366874695,
"learning_rate": 5.971748198710656e-05,
"loss": 1.6451298522949218,
"step": 6300
},
{
"epoch": 0.1213546209564261,
"grad_norm": 0.8811279535293579,
"learning_rate": 6.066552901023891e-05,
"loss": 1.6382298278808594,
"step": 6400
},
{
"epoch": 0.12325078690887026,
"grad_norm": 0.9638504385948181,
"learning_rate": 6.161357603337125e-05,
"loss": 1.6278233337402344,
"step": 6500
},
{
"epoch": 0.12514695286131441,
"grad_norm": 0.9717722535133362,
"learning_rate": 6.25616230565036e-05,
"loss": 1.62345458984375,
"step": 6600
},
{
"epoch": 0.12704311881375857,
"grad_norm": 1.0567059516906738,
"learning_rate": 6.350967007963595e-05,
"loss": 1.6149652099609375,
"step": 6700
},
{
"epoch": 0.12893928476620273,
"grad_norm": 0.9955742359161377,
"learning_rate": 6.44577171027683e-05,
"loss": 1.6053521728515625,
"step": 6800
},
{
"epoch": 0.13083545071864688,
"grad_norm": 1.0742182731628418,
"learning_rate": 6.540576412590064e-05,
"loss": 1.6007347106933594,
"step": 6900
},
{
"epoch": 0.13273161667109104,
"grad_norm": 0.9622364044189453,
"learning_rate": 6.6353811149033e-05,
"loss": 1.587445068359375,
"step": 7000
},
{
"epoch": 0.13273161667109104,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.2,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.46,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.28,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.33,
"eval_NanoBEIR_mean_cosine_map@100": 0.2218708796716416,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.2654285714285714,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.257882927462247,
"eval_NanoBEIR_mean_cosine_precision@1": 0.2,
"eval_NanoBEIR_mean_cosine_precision@10": 0.052000000000000005,
"eval_NanoBEIR_mean_cosine_precision@3": 0.09666666666666665,
"eval_NanoBEIR_mean_cosine_precision@5": 0.07,
"eval_NanoBEIR_mean_cosine_recall@1": 0.145,
"eval_NanoBEIR_mean_cosine_recall@10": 0.36,
"eval_NanoBEIR_mean_cosine_recall@3": 0.21500000000000002,
"eval_NanoBEIR_mean_cosine_recall@5": 0.255,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.22,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.52,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.28,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.34,
"eval_NanoHotpotQA_cosine_map@100": 0.1843725858934317,
"eval_NanoHotpotQA_cosine_mrr@10": 0.2868571428571428,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.23487212685023443,
"eval_NanoHotpotQA_cosine_precision@1": 0.22,
"eval_NanoHotpotQA_cosine_precision@10": 0.064,
"eval_NanoHotpotQA_cosine_precision@3": 0.09999999999999998,
"eval_NanoHotpotQA_cosine_precision@5": 0.07600000000000001,
"eval_NanoHotpotQA_cosine_recall@1": 0.11,
"eval_NanoHotpotQA_cosine_recall@10": 0.32,
"eval_NanoHotpotQA_cosine_recall@3": 0.15,
"eval_NanoHotpotQA_cosine_recall@5": 0.19,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.18,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.4,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.28,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.32,
"eval_NanoMSMARCO_cosine_map@100": 0.2593691734498515,
"eval_NanoMSMARCO_cosine_mrr@10": 0.24400000000000002,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.28089372807425955,
"eval_NanoMSMARCO_cosine_precision@1": 0.18,
"eval_NanoMSMARCO_cosine_precision@10": 0.04,
"eval_NanoMSMARCO_cosine_precision@3": 0.09333333333333332,
"eval_NanoMSMARCO_cosine_precision@5": 0.064,
"eval_NanoMSMARCO_cosine_recall@1": 0.18,
"eval_NanoMSMARCO_cosine_recall@10": 0.4,
"eval_NanoMSMARCO_cosine_recall@3": 0.28,
"eval_NanoMSMARCO_cosine_recall@5": 0.32,
"eval_mse-dev_negative_mse": -158.10133361816406,
"eval_runtime": 11.0707,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.257882927462247,
"eval_steps_per_second": 0.0,
"step": 7000
},
{
"epoch": 0.13462778262353522,
"grad_norm": 0.979546070098877,
"learning_rate": 6.730185817216534e-05,
"loss": 1.582412109375,
"step": 7100
},
{
"epoch": 0.13652394857597938,
"grad_norm": 1.0893486738204956,
"learning_rate": 6.82499051952977e-05,
"loss": 1.57244384765625,
"step": 7200
},
{
"epoch": 0.13842011452842354,
"grad_norm": 1.0537185668945312,
"learning_rate": 6.919795221843004e-05,
"loss": 1.5668838500976563,
"step": 7300
},
{
"epoch": 0.1403162804808677,
"grad_norm": 0.9376671314239502,
"learning_rate": 7.014599924156239e-05,
"loss": 1.553501739501953,
"step": 7400
},
{
"epoch": 0.14221244643331185,
"grad_norm": 0.9399901032447815,
"learning_rate": 7.109404626469473e-05,
"loss": 1.5449533081054687,
"step": 7500
},
{
"epoch": 0.144108612385756,
"grad_norm": 0.88112473487854,
"learning_rate": 7.204209328782709e-05,
"loss": 1.5345271301269532,
"step": 7600
},
{
"epoch": 0.14600477833820016,
"grad_norm": 0.9386707544326782,
"learning_rate": 7.299014031095943e-05,
"loss": 1.5340492248535156,
"step": 7700
},
{
"epoch": 0.14790094429064432,
"grad_norm": 0.942371129989624,
"learning_rate": 7.393818733409178e-05,
"loss": 1.5242007446289063,
"step": 7800
},
{
"epoch": 0.14979711024308848,
"grad_norm": 0.8463137745857239,
"learning_rate": 7.488623435722411e-05,
"loss": 1.5181001281738282,
"step": 7900
},
{
"epoch": 0.15169327619553263,
"grad_norm": 0.9643734693527222,
"learning_rate": 7.583428138035647e-05,
"loss": 1.5085635375976563,
"step": 8000
},
{
"epoch": 0.15169327619553263,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.21,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.48,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.31000000000000005,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.33999999999999997,
"eval_NanoBEIR_mean_cosine_map@100": 0.2338424020963123,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.28084126984126984,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.27053479230771704,
"eval_NanoBEIR_mean_cosine_precision@1": 0.21,
"eval_NanoBEIR_mean_cosine_precision@10": 0.053000000000000005,
"eval_NanoBEIR_mean_cosine_precision@3": 0.10666666666666666,
"eval_NanoBEIR_mean_cosine_precision@5": 0.07600000000000001,
"eval_NanoBEIR_mean_cosine_recall@1": 0.15,
"eval_NanoBEIR_mean_cosine_recall@10": 0.375,
"eval_NanoBEIR_mean_cosine_recall@3": 0.23,
"eval_NanoBEIR_mean_cosine_recall@5": 0.27,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.24,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.52,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.34,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.36,
"eval_NanoHotpotQA_cosine_map@100": 0.20036400143179198,
"eval_NanoHotpotQA_cosine_mrr@10": 0.30996825396825395,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.24538724835027803,
"eval_NanoHotpotQA_cosine_precision@1": 0.24,
"eval_NanoHotpotQA_cosine_precision@10": 0.062,
"eval_NanoHotpotQA_cosine_precision@3": 0.11999999999999998,
"eval_NanoHotpotQA_cosine_precision@5": 0.08800000000000001,
"eval_NanoHotpotQA_cosine_recall@1": 0.12,
"eval_NanoHotpotQA_cosine_recall@10": 0.31,
"eval_NanoHotpotQA_cosine_recall@3": 0.18,
"eval_NanoHotpotQA_cosine_recall@5": 0.22,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.18,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.44,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.28,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.32,
"eval_NanoMSMARCO_cosine_map@100": 0.2673208027608326,
"eval_NanoMSMARCO_cosine_mrr@10": 0.2517142857142857,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.295682336265156,
"eval_NanoMSMARCO_cosine_precision@1": 0.18,
"eval_NanoMSMARCO_cosine_precision@10": 0.044000000000000004,
"eval_NanoMSMARCO_cosine_precision@3": 0.09333333333333332,
"eval_NanoMSMARCO_cosine_precision@5": 0.064,
"eval_NanoMSMARCO_cosine_recall@1": 0.18,
"eval_NanoMSMARCO_cosine_recall@10": 0.44,
"eval_NanoMSMARCO_cosine_recall@3": 0.28,
"eval_NanoMSMARCO_cosine_recall@5": 0.32,
"eval_mse-dev_negative_mse": -150.10321044921875,
"eval_runtime": 10.6864,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.27053479230771704,
"eval_steps_per_second": 0.0,
"step": 8000
},
{
"epoch": 0.1535894421479768,
"grad_norm": 0.8630809187889099,
"learning_rate": 7.677284793325749e-05,
"loss": 1.5007017517089845,
"step": 8100
},
{
"epoch": 0.15548560810042095,
"grad_norm": 0.8799474835395813,
"learning_rate": 7.772089495638985e-05,
"loss": 1.4950062561035156,
"step": 8200
},
{
"epoch": 0.1573817740528651,
"grad_norm": 0.9594865441322327,
"learning_rate": 7.866894197952219e-05,
"loss": 1.4829434204101561,
"step": 8300
},
{
"epoch": 0.15927794000530926,
"grad_norm": 0.8919075727462769,
"learning_rate": 7.961698900265454e-05,
"loss": 1.4779867553710937,
"step": 8400
},
{
"epoch": 0.16117410595775342,
"grad_norm": 0.9076706767082214,
"learning_rate": 8.056503602578687e-05,
"loss": 1.4736830139160155,
"step": 8500
},
{
"epoch": 0.16307027191019757,
"grad_norm": 0.8629969954490662,
"learning_rate": 8.151308304891923e-05,
"loss": 1.4603062438964844,
"step": 8600
},
{
"epoch": 0.16496643786264173,
"grad_norm": 0.969744086265564,
"learning_rate": 8.246113007205157e-05,
"loss": 1.451029052734375,
"step": 8700
},
{
"epoch": 0.16686260381508589,
"grad_norm": 0.9152198433876038,
"learning_rate": 8.340917709518392e-05,
"loss": 1.4499801635742187,
"step": 8800
},
{
"epoch": 0.16875876976753004,
"grad_norm": 0.7964587211608887,
"learning_rate": 8.435722411831626e-05,
"loss": 1.440777587890625,
"step": 8900
},
{
"epoch": 0.1706549357199742,
"grad_norm": 0.9044669270515442,
"learning_rate": 8.530527114144862e-05,
"loss": 1.4372213745117188,
"step": 9000
},
{
"epoch": 0.1706549357199742,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.23,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.51,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.33999999999999997,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.4,
"eval_NanoBEIR_mean_cosine_map@100": 0.25323404391343074,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.3023690476190476,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.2928600617450003,
"eval_NanoBEIR_mean_cosine_precision@1": 0.23,
"eval_NanoBEIR_mean_cosine_precision@10": 0.058,
"eval_NanoBEIR_mean_cosine_precision@3": 0.12,
"eval_NanoBEIR_mean_cosine_precision@5": 0.08800000000000001,
"eval_NanoBEIR_mean_cosine_recall@1": 0.16,
"eval_NanoBEIR_mean_cosine_recall@10": 0.405,
"eval_NanoBEIR_mean_cosine_recall@3": 0.255,
"eval_NanoBEIR_mean_cosine_recall@5": 0.31,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.28,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.56,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.38,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.44,
"eval_NanoHotpotQA_cosine_map@100": 0.2338868196262281,
"eval_NanoHotpotQA_cosine_mrr@10": 0.3493809523809523,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.2824008390246955,
"eval_NanoHotpotQA_cosine_precision@1": 0.28,
"eval_NanoHotpotQA_cosine_precision@10": 0.07,
"eval_NanoHotpotQA_cosine_precision@3": 0.13999999999999999,
"eval_NanoHotpotQA_cosine_precision@5": 0.10400000000000001,
"eval_NanoHotpotQA_cosine_recall@1": 0.14,
"eval_NanoHotpotQA_cosine_recall@10": 0.35,
"eval_NanoHotpotQA_cosine_recall@3": 0.21,
"eval_NanoHotpotQA_cosine_recall@5": 0.26,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.18,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.46,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.3,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.36,
"eval_NanoMSMARCO_cosine_map@100": 0.27258126820063344,
"eval_NanoMSMARCO_cosine_mrr@10": 0.2553571428571429,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.3033192844653051,
"eval_NanoMSMARCO_cosine_precision@1": 0.18,
"eval_NanoMSMARCO_cosine_precision@10": 0.046,
"eval_NanoMSMARCO_cosine_precision@3": 0.1,
"eval_NanoMSMARCO_cosine_precision@5": 0.07200000000000001,
"eval_NanoMSMARCO_cosine_recall@1": 0.18,
"eval_NanoMSMARCO_cosine_recall@10": 0.46,
"eval_NanoMSMARCO_cosine_recall@3": 0.3,
"eval_NanoMSMARCO_cosine_recall@5": 0.36,
"eval_mse-dev_negative_mse": -142.8462371826172,
"eval_runtime": 10.0151,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.2928600617450003,
"eval_steps_per_second": 0.0,
"step": 9000
},
{
"epoch": 0.17255110167241838,
"grad_norm": 0.9912068843841553,
"learning_rate": 8.625331816458096e-05,
"loss": 1.4270211791992187,
"step": 9100
},
{
"epoch": 0.17444726762486254,
"grad_norm": 0.9523755311965942,
"learning_rate": 8.720136518771332e-05,
"loss": 1.4232991027832032,
"step": 9200
},
{
"epoch": 0.1763434335773067,
"grad_norm": 0.9893079996109009,
"learning_rate": 8.814941221084566e-05,
"loss": 1.4135417175292968,
"step": 9300
},
{
"epoch": 0.17823959952975085,
"grad_norm": 0.8273277282714844,
"learning_rate": 8.909745923397801e-05,
"loss": 1.4074359130859375,
"step": 9400
},
{
"epoch": 0.180135765482195,
"grad_norm": 0.9652109146118164,
"learning_rate": 9.004550625711035e-05,
"loss": 1.3981039428710937,
"step": 9500
},
{
"epoch": 0.18203193143463917,
"grad_norm": 0.9654005169868469,
"learning_rate": 9.099355328024271e-05,
"loss": 1.3918597412109375,
"step": 9600
},
{
"epoch": 0.18392809738708332,
"grad_norm": 1.0751373767852783,
"learning_rate": 9.194160030337505e-05,
"loss": 1.3844194030761718,
"step": 9700
},
{
"epoch": 0.18582426333952748,
"grad_norm": 0.8573171496391296,
"learning_rate": 9.28896473265074e-05,
"loss": 1.3740664672851564,
"step": 9800
},
{
"epoch": 0.18772042929197164,
"grad_norm": 0.9025856256484985,
"learning_rate": 9.383769434963975e-05,
"loss": 1.368533172607422,
"step": 9900
},
{
"epoch": 0.1896165952444158,
"grad_norm": 0.936182975769043,
"learning_rate": 9.47857413727721e-05,
"loss": 1.3668016052246095,
"step": 10000
},
{
"epoch": 0.1896165952444158,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.26,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.55,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.34,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.41000000000000003,
"eval_NanoBEIR_mean_cosine_map@100": 0.2717915991834402,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.3298809523809524,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.31266733186022544,
"eval_NanoBEIR_mean_cosine_precision@1": 0.26,
"eval_NanoBEIR_mean_cosine_precision@10": 0.061000000000000006,
"eval_NanoBEIR_mean_cosine_precision@3": 0.12666666666666665,
"eval_NanoBEIR_mean_cosine_precision@5": 0.09200000000000001,
"eval_NanoBEIR_mean_cosine_recall@1": 0.175,
"eval_NanoBEIR_mean_cosine_recall@10": 0.43,
"eval_NanoBEIR_mean_cosine_recall@3": 0.26,
"eval_NanoBEIR_mean_cosine_recall@5": 0.32,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.34,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.6,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.4,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.46,
"eval_NanoHotpotQA_cosine_map@100": 0.260384776010371,
"eval_NanoHotpotQA_cosine_mrr@10": 0.3953809523809524,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.3058907512098868,
"eval_NanoHotpotQA_cosine_precision@1": 0.34,
"eval_NanoHotpotQA_cosine_precision@10": 0.07200000000000001,
"eval_NanoHotpotQA_cosine_precision@3": 0.15999999999999998,
"eval_NanoHotpotQA_cosine_precision@5": 0.11200000000000002,
"eval_NanoHotpotQA_cosine_recall@1": 0.17,
"eval_NanoHotpotQA_cosine_recall@10": 0.36,
"eval_NanoHotpotQA_cosine_recall@3": 0.24,
"eval_NanoHotpotQA_cosine_recall@5": 0.28,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.18,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.5,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.28,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.36,
"eval_NanoMSMARCO_cosine_map@100": 0.28319842235650944,
"eval_NanoMSMARCO_cosine_mrr@10": 0.2643809523809524,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.31944391251056414,
"eval_NanoMSMARCO_cosine_precision@1": 0.18,
"eval_NanoMSMARCO_cosine_precision@10": 0.05,
"eval_NanoMSMARCO_cosine_precision@3": 0.09333333333333332,
"eval_NanoMSMARCO_cosine_precision@5": 0.07200000000000001,
"eval_NanoMSMARCO_cosine_recall@1": 0.18,
"eval_NanoMSMARCO_cosine_recall@10": 0.5,
"eval_NanoMSMARCO_cosine_recall@3": 0.28,
"eval_NanoMSMARCO_cosine_recall@5": 0.36,
"eval_mse-dev_negative_mse": -135.70806884765625,
"eval_runtime": 11.1158,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.31266733186022544,
"eval_steps_per_second": 0.0,
"step": 10000
},
{
"epoch": 0.19151276119685995,
"grad_norm": 0.9819117784500122,
"learning_rate": 9.573378839590444e-05,
"loss": 1.35683349609375,
"step": 10100
},
{
"epoch": 0.1934089271493041,
"grad_norm": 0.9364531636238098,
"learning_rate": 9.667235494880547e-05,
"loss": 1.3505201721191407,
"step": 10200
},
{
"epoch": 0.19530509310174826,
"grad_norm": 1.0975953340530396,
"learning_rate": 9.762040197193781e-05,
"loss": 1.3433110046386718,
"step": 10300
},
{
"epoch": 0.19720125905419242,
"grad_norm": 0.8945000171661377,
"learning_rate": 9.856844899507016e-05,
"loss": 1.3337992858886718,
"step": 10400
},
{
"epoch": 0.19909742500663657,
"grad_norm": 0.90827876329422,
"learning_rate": 9.95164960182025e-05,
"loss": 1.3294851684570312,
"step": 10500
},
{
"epoch": 0.20099359095908073,
"grad_norm": 1.0766637325286865,
"learning_rate": 9.994838193156919e-05,
"loss": 1.3274673461914062,
"step": 10600
},
{
"epoch": 0.2028897569115249,
"grad_norm": 0.9869415760040283,
"learning_rate": 9.984303893477163e-05,
"loss": 1.3149089050292968,
"step": 10700
},
{
"epoch": 0.20478592286396904,
"grad_norm": 0.9914052486419678,
"learning_rate": 9.973769593797405e-05,
"loss": 1.3119027709960938,
"step": 10800
},
{
"epoch": 0.2066820888164132,
"grad_norm": 0.8931730389595032,
"learning_rate": 9.963235294117647e-05,
"loss": 1.30553466796875,
"step": 10900
},
{
"epoch": 0.20857825476885736,
"grad_norm": 0.9103732705116272,
"learning_rate": 9.95270099443789e-05,
"loss": 1.2952238464355468,
"step": 11000
},
{
"epoch": 0.20857825476885736,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.28,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.52,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.4,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.49,
"eval_NanoBEIR_mean_cosine_map@100": 0.2958090237794817,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.3554563492063492,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.3271554490713938,
"eval_NanoBEIR_mean_cosine_precision@1": 0.28,
"eval_NanoBEIR_mean_cosine_precision@10": 0.06100000000000001,
"eval_NanoBEIR_mean_cosine_precision@3": 0.14666666666666667,
"eval_NanoBEIR_mean_cosine_precision@5": 0.10800000000000001,
"eval_NanoBEIR_mean_cosine_recall@1": 0.185,
"eval_NanoBEIR_mean_cosine_recall@10": 0.41500000000000004,
"eval_NanoBEIR_mean_cosine_recall@3": 0.30500000000000005,
"eval_NanoBEIR_mean_cosine_recall@5": 0.375,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.38,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.6,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.46,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.56,
"eval_NanoHotpotQA_cosine_map@100": 0.2970541527466325,
"eval_NanoHotpotQA_cosine_mrr@10": 0.4416904761904762,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.34337262327682183,
"eval_NanoHotpotQA_cosine_precision@1": 0.38,
"eval_NanoHotpotQA_cosine_precision@10": 0.07800000000000001,
"eval_NanoHotpotQA_cosine_precision@3": 0.18,
"eval_NanoHotpotQA_cosine_precision@5": 0.132,
"eval_NanoHotpotQA_cosine_recall@1": 0.19,
"eval_NanoHotpotQA_cosine_recall@10": 0.39,
"eval_NanoHotpotQA_cosine_recall@3": 0.27,
"eval_NanoHotpotQA_cosine_recall@5": 0.33,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.18,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.44,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.34,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.42,
"eval_NanoMSMARCO_cosine_map@100": 0.29456389481233086,
"eval_NanoMSMARCO_cosine_mrr@10": 0.26922222222222225,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.31093827486596576,
"eval_NanoMSMARCO_cosine_precision@1": 0.18,
"eval_NanoMSMARCO_cosine_precision@10": 0.044000000000000004,
"eval_NanoMSMARCO_cosine_precision@3": 0.11333333333333333,
"eval_NanoMSMARCO_cosine_precision@5": 0.084,
"eval_NanoMSMARCO_cosine_recall@1": 0.18,
"eval_NanoMSMARCO_cosine_recall@10": 0.44,
"eval_NanoMSMARCO_cosine_recall@3": 0.34,
"eval_NanoMSMARCO_cosine_recall@5": 0.42,
"eval_mse-dev_negative_mse": -129.20640563964844,
"eval_runtime": 10.3813,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.3271554490713938,
"eval_steps_per_second": 0.0,
"step": 11000
},
{
"epoch": 0.21047442072130154,
"grad_norm": 1.1640655994415283,
"learning_rate": 9.942166694758133e-05,
"loss": 1.2919923400878905,
"step": 11100
},
{
"epoch": 0.2123705866737457,
"grad_norm": 0.9011592864990234,
"learning_rate": 9.931632395078376e-05,
"loss": 1.2851214599609375,
"step": 11200
},
{
"epoch": 0.21426675262618985,
"grad_norm": 0.9254733324050903,
"learning_rate": 9.921098095398619e-05,
"loss": 1.2769430541992188,
"step": 11300
},
{
"epoch": 0.216162918578634,
"grad_norm": 0.9079636931419373,
"learning_rate": 9.910563795718862e-05,
"loss": 1.2746614837646484,
"step": 11400
},
{
"epoch": 0.21805908453107817,
"grad_norm": 0.9787989258766174,
"learning_rate": 9.900029496039104e-05,
"loss": 1.268571014404297,
"step": 11500
},
{
"epoch": 0.21995525048352232,
"grad_norm": 0.8455345630645752,
"learning_rate": 9.889495196359346e-05,
"loss": 1.2683941650390624,
"step": 11600
},
{
"epoch": 0.22185141643596648,
"grad_norm": 0.9073353409767151,
"learning_rate": 9.878960896679589e-05,
"loss": 1.2581684875488282,
"step": 11700
},
{
"epoch": 0.22374758238841064,
"grad_norm": 0.8951073288917542,
"learning_rate": 9.868426596999832e-05,
"loss": 1.258204574584961,
"step": 11800
},
{
"epoch": 0.2256437483408548,
"grad_norm": 1.0486690998077393,
"learning_rate": 9.857892297320075e-05,
"loss": 1.247862319946289,
"step": 11900
},
{
"epoch": 0.22753991429329895,
"grad_norm": 0.8603843450546265,
"learning_rate": 9.847357997640317e-05,
"loss": 1.241845016479492,
"step": 12000
},
{
"epoch": 0.22753991429329895,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.28,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.6,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.41000000000000003,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.45,
"eval_NanoBEIR_mean_cosine_map@100": 0.302852595589562,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.36785317460317457,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.3493080002249725,
"eval_NanoBEIR_mean_cosine_precision@1": 0.28,
"eval_NanoBEIR_mean_cosine_precision@10": 0.069,
"eval_NanoBEIR_mean_cosine_precision@3": 0.14666666666666667,
"eval_NanoBEIR_mean_cosine_precision@5": 0.10200000000000001,
"eval_NanoBEIR_mean_cosine_recall@1": 0.19,
"eval_NanoBEIR_mean_cosine_recall@10": 0.47,
"eval_NanoBEIR_mean_cosine_recall@3": 0.31,
"eval_NanoBEIR_mean_cosine_recall@5": 0.355,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.36,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.7,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.46,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.5,
"eval_NanoHotpotQA_cosine_map@100": 0.2899683891353945,
"eval_NanoHotpotQA_cosine_mrr@10": 0.4411031746031746,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.3546937420389296,
"eval_NanoHotpotQA_cosine_precision@1": 0.36,
"eval_NanoHotpotQA_cosine_precision@10": 0.088,
"eval_NanoHotpotQA_cosine_precision@3": 0.1733333333333333,
"eval_NanoHotpotQA_cosine_precision@5": 0.124,
"eval_NanoHotpotQA_cosine_recall@1": 0.18,
"eval_NanoHotpotQA_cosine_recall@10": 0.44,
"eval_NanoHotpotQA_cosine_recall@3": 0.26,
"eval_NanoHotpotQA_cosine_recall@5": 0.31,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.2,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.5,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.36,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.4,
"eval_NanoMSMARCO_cosine_map@100": 0.31573680204372945,
"eval_NanoMSMARCO_cosine_mrr@10": 0.2946031746031746,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.34392225841101537,
"eval_NanoMSMARCO_cosine_precision@1": 0.2,
"eval_NanoMSMARCO_cosine_precision@10": 0.05,
"eval_NanoMSMARCO_cosine_precision@3": 0.12,
"eval_NanoMSMARCO_cosine_precision@5": 0.08,
"eval_NanoMSMARCO_cosine_recall@1": 0.2,
"eval_NanoMSMARCO_cosine_recall@10": 0.5,
"eval_NanoMSMARCO_cosine_recall@3": 0.36,
"eval_NanoMSMARCO_cosine_recall@5": 0.4,
"eval_mse-dev_negative_mse": -123.62611389160156,
"eval_runtime": 10.308,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.3493080002249725,
"eval_steps_per_second": 0.0,
"step": 12000
},
{
"epoch": 0.2294360802457431,
"grad_norm": 0.9303557276725769,
"learning_rate": 9.83682369796056e-05,
"loss": 1.240003662109375,
"step": 12100
},
{
"epoch": 0.23133224619818726,
"grad_norm": 0.9165602326393127,
"learning_rate": 9.826289398280803e-05,
"loss": 1.232986068725586,
"step": 12200
},
{
"epoch": 0.23322841215063142,
"grad_norm": 0.8384730815887451,
"learning_rate": 9.815755098601045e-05,
"loss": 1.2288270568847657,
"step": 12300
},
{
"epoch": 0.23512457810307558,
"grad_norm": 0.9244160652160645,
"learning_rate": 9.805326141918085e-05,
"loss": 1.223012924194336,
"step": 12400
},
{
"epoch": 0.23702074405551973,
"grad_norm": 1.01241135597229,
"learning_rate": 9.794791842238329e-05,
"loss": 1.2164186096191407,
"step": 12500
},
{
"epoch": 0.2389169100079639,
"grad_norm": 0.9336892366409302,
"learning_rate": 9.784257542558571e-05,
"loss": 1.2156867980957031,
"step": 12600
},
{
"epoch": 0.24081307596040805,
"grad_norm": 0.9515780210494995,
"learning_rate": 9.773723242878813e-05,
"loss": 1.2165725708007813,
"step": 12700
},
{
"epoch": 0.2427092419128522,
"grad_norm": 0.8875882029533386,
"learning_rate": 9.763188943199057e-05,
"loss": 1.2044532775878907,
"step": 12800
},
{
"epoch": 0.24460540786529636,
"grad_norm": 0.8906784057617188,
"learning_rate": 9.7526546435193e-05,
"loss": 1.2034928131103515,
"step": 12900
},
{
"epoch": 0.24650157381774052,
"grad_norm": 0.860988438129425,
"learning_rate": 9.742120343839543e-05,
"loss": 1.1968316650390625,
"step": 13000
},
{
"epoch": 0.24650157381774052,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.25,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.5700000000000001,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.38,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.45999999999999996,
"eval_NanoBEIR_mean_cosine_map@100": 0.2897002867515749,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.3452896825396825,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.3305841730427018,
"eval_NanoBEIR_mean_cosine_precision@1": 0.25,
"eval_NanoBEIR_mean_cosine_precision@10": 0.065,
"eval_NanoBEIR_mean_cosine_precision@3": 0.1433333333333333,
"eval_NanoBEIR_mean_cosine_precision@5": 0.10200000000000001,
"eval_NanoBEIR_mean_cosine_recall@1": 0.165,
"eval_NanoBEIR_mean_cosine_recall@10": 0.45,
"eval_NanoBEIR_mean_cosine_recall@3": 0.30000000000000004,
"eval_NanoBEIR_mean_cosine_recall@5": 0.36,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.34,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.64,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.42,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.5,
"eval_NanoHotpotQA_cosine_map@100": 0.2823695142784583,
"eval_NanoHotpotQA_cosine_mrr@10": 0.417079365079365,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.332921649409912,
"eval_NanoHotpotQA_cosine_precision@1": 0.34,
"eval_NanoHotpotQA_cosine_precision@10": 0.08,
"eval_NanoHotpotQA_cosine_precision@3": 0.1733333333333333,
"eval_NanoHotpotQA_cosine_precision@5": 0.12000000000000002,
"eval_NanoHotpotQA_cosine_recall@1": 0.17,
"eval_NanoHotpotQA_cosine_recall@10": 0.4,
"eval_NanoHotpotQA_cosine_recall@3": 0.26,
"eval_NanoHotpotQA_cosine_recall@5": 0.3,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.16,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.5,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.34,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.42,
"eval_NanoMSMARCO_cosine_map@100": 0.29703105922469153,
"eval_NanoMSMARCO_cosine_mrr@10": 0.2735,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.3282466966754917,
"eval_NanoMSMARCO_cosine_precision@1": 0.16,
"eval_NanoMSMARCO_cosine_precision@10": 0.05,
"eval_NanoMSMARCO_cosine_precision@3": 0.11333333333333333,
"eval_NanoMSMARCO_cosine_precision@5": 0.084,
"eval_NanoMSMARCO_cosine_recall@1": 0.16,
"eval_NanoMSMARCO_cosine_recall@10": 0.5,
"eval_NanoMSMARCO_cosine_recall@3": 0.34,
"eval_NanoMSMARCO_cosine_recall@5": 0.42,
"eval_mse-dev_negative_mse": -118.86907958984375,
"eval_runtime": 11.1772,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.3305841730427018,
"eval_steps_per_second": 0.0,
"step": 13000
},
{
"epoch": 0.2483977397701847,
"grad_norm": 0.944284200668335,
"learning_rate": 9.731586044159785e-05,
"loss": 1.1941532135009765,
"step": 13100
},
{
"epoch": 0.25029390572262883,
"grad_norm": 0.846736490726471,
"learning_rate": 9.721051744480028e-05,
"loss": 1.189548873901367,
"step": 13200
},
{
"epoch": 0.252190071675073,
"grad_norm": 0.9077499508857727,
"learning_rate": 9.71051744480027e-05,
"loss": 1.184281463623047,
"step": 13300
},
{
"epoch": 0.25408623762751714,
"grad_norm": 0.9021602869033813,
"learning_rate": 9.699983145120512e-05,
"loss": 1.1755128479003907,
"step": 13400
},
{
"epoch": 0.2559824035799613,
"grad_norm": 0.9804133772850037,
"learning_rate": 9.689448845440755e-05,
"loss": 1.175633773803711,
"step": 13500
},
{
"epoch": 0.25787856953240545,
"grad_norm": 0.8400120139122009,
"learning_rate": 9.678914545760998e-05,
"loss": 1.1707258605957032,
"step": 13600
},
{
"epoch": 0.25977473548484964,
"grad_norm": 0.8351007103919983,
"learning_rate": 9.668380246081241e-05,
"loss": 1.1637205505371093,
"step": 13700
},
{
"epoch": 0.26167090143729377,
"grad_norm": 0.9614461064338684,
"learning_rate": 9.657845946401483e-05,
"loss": 1.1684355926513672,
"step": 13800
},
{
"epoch": 0.26356706738973795,
"grad_norm": 0.9544349312782288,
"learning_rate": 9.647311646721725e-05,
"loss": 1.162786636352539,
"step": 13900
},
{
"epoch": 0.2654632333421821,
"grad_norm": 0.8563331365585327,
"learning_rate": 9.636777347041969e-05,
"loss": 1.1585095977783204,
"step": 14000
},
{
"epoch": 0.2654632333421821,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.28,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.62,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.42,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.5,
"eval_NanoBEIR_mean_cosine_map@100": 0.31463974475417134,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.3753571428571429,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.36789514654221167,
"eval_NanoBEIR_mean_cosine_precision@1": 0.28,
"eval_NanoBEIR_mean_cosine_precision@10": 0.072,
"eval_NanoBEIR_mean_cosine_precision@3": 0.16,
"eval_NanoBEIR_mean_cosine_precision@5": 0.116,
"eval_NanoBEIR_mean_cosine_recall@1": 0.185,
"eval_NanoBEIR_mean_cosine_recall@10": 0.515,
"eval_NanoBEIR_mean_cosine_recall@3": 0.32999999999999996,
"eval_NanoBEIR_mean_cosine_recall@5": 0.405,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.38,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.62,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.48,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.54,
"eval_NanoHotpotQA_cosine_map@100": 0.3112110057061059,
"eval_NanoHotpotQA_cosine_mrr@10": 0.4472222222222223,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.3578654483822233,
"eval_NanoHotpotQA_cosine_precision@1": 0.38,
"eval_NanoHotpotQA_cosine_precision@10": 0.08199999999999999,
"eval_NanoHotpotQA_cosine_precision@3": 0.2,
"eval_NanoHotpotQA_cosine_precision@5": 0.14,
"eval_NanoHotpotQA_cosine_recall@1": 0.19,
"eval_NanoHotpotQA_cosine_recall@10": 0.41,
"eval_NanoHotpotQA_cosine_recall@3": 0.3,
"eval_NanoHotpotQA_cosine_recall@5": 0.35,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.18,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.62,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.36,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.46,
"eval_NanoMSMARCO_cosine_map@100": 0.31806848380223673,
"eval_NanoMSMARCO_cosine_mrr@10": 0.3034920634920635,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.3779248447022001,
"eval_NanoMSMARCO_cosine_precision@1": 0.18,
"eval_NanoMSMARCO_cosine_precision@10": 0.062,
"eval_NanoMSMARCO_cosine_precision@3": 0.12,
"eval_NanoMSMARCO_cosine_precision@5": 0.092,
"eval_NanoMSMARCO_cosine_recall@1": 0.18,
"eval_NanoMSMARCO_cosine_recall@10": 0.62,
"eval_NanoMSMARCO_cosine_recall@3": 0.36,
"eval_NanoMSMARCO_cosine_recall@5": 0.46,
"eval_mse-dev_negative_mse": -115.4122085571289,
"eval_runtime": 14.015,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.36789514654221167,
"eval_steps_per_second": 0.0,
"step": 14000
},
{
"epoch": 0.26735939929462627,
"grad_norm": 0.9541077017784119,
"learning_rate": 9.626243047362213e-05,
"loss": 1.160166244506836,
"step": 14100
},
{
"epoch": 0.26925556524707045,
"grad_norm": 1.0204025506973267,
"learning_rate": 9.615708747682455e-05,
"loss": 1.1503668212890625,
"step": 14200
},
{
"epoch": 0.2711517311995146,
"grad_norm": 1.0752142667770386,
"learning_rate": 9.605174448002698e-05,
"loss": 1.1483226776123048,
"step": 14300
},
{
"epoch": 0.27304789715195876,
"grad_norm": 0.9642768502235413,
"learning_rate": 9.59464014832294e-05,
"loss": 1.1488003540039062,
"step": 14400
},
{
"epoch": 0.2749440631044029,
"grad_norm": 0.8722686171531677,
"learning_rate": 9.584105848643182e-05,
"loss": 1.139219741821289,
"step": 14500
},
{
"epoch": 0.2768402290568471,
"grad_norm": 0.9259271025657654,
"learning_rate": 9.573676891960223e-05,
"loss": 1.134266128540039,
"step": 14600
},
{
"epoch": 0.2787363950092912,
"grad_norm": 1.019303560256958,
"learning_rate": 9.563142592280465e-05,
"loss": 1.136265869140625,
"step": 14700
},
{
"epoch": 0.2806325609617354,
"grad_norm": 0.9323062300682068,
"learning_rate": 9.552608292600709e-05,
"loss": 1.1342037200927735,
"step": 14800
},
{
"epoch": 0.2825287269141795,
"grad_norm": 0.8613787293434143,
"learning_rate": 9.542073992920951e-05,
"loss": 1.132669448852539,
"step": 14900
},
{
"epoch": 0.2844248928666237,
"grad_norm": 0.9772534966468811,
"learning_rate": 9.531539693241194e-05,
"loss": 1.1218692779541015,
"step": 15000
},
{
"epoch": 0.2844248928666237,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.29000000000000004,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.63,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.43,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.55,
"eval_NanoBEIR_mean_cosine_map@100": 0.32670175603229334,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.38711111111111113,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.3792944580347569,
"eval_NanoBEIR_mean_cosine_precision@1": 0.29000000000000004,
"eval_NanoBEIR_mean_cosine_precision@10": 0.07500000000000001,
"eval_NanoBEIR_mean_cosine_precision@3": 0.16,
"eval_NanoBEIR_mean_cosine_precision@5": 0.126,
"eval_NanoBEIR_mean_cosine_recall@1": 0.195,
"eval_NanoBEIR_mean_cosine_recall@10": 0.525,
"eval_NanoBEIR_mean_cosine_recall@3": 0.32999999999999996,
"eval_NanoBEIR_mean_cosine_recall@5": 0.44,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.38,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.66,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.5,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.6,
"eval_NanoHotpotQA_cosine_map@100": 0.3244366589749346,
"eval_NanoHotpotQA_cosine_mrr@10": 0.46277777777777784,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.37914250624163204,
"eval_NanoHotpotQA_cosine_precision@1": 0.38,
"eval_NanoHotpotQA_cosine_precision@10": 0.09,
"eval_NanoHotpotQA_cosine_precision@3": 0.2,
"eval_NanoHotpotQA_cosine_precision@5": 0.15200000000000002,
"eval_NanoHotpotQA_cosine_recall@1": 0.19,
"eval_NanoHotpotQA_cosine_recall@10": 0.45,
"eval_NanoHotpotQA_cosine_recall@3": 0.3,
"eval_NanoHotpotQA_cosine_recall@5": 0.38,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.2,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.6,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.36,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.5,
"eval_NanoMSMARCO_cosine_map@100": 0.3289668530896521,
"eval_NanoMSMARCO_cosine_mrr@10": 0.3114444444444444,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.37944640982788175,
"eval_NanoMSMARCO_cosine_precision@1": 0.2,
"eval_NanoMSMARCO_cosine_precision@10": 0.06000000000000001,
"eval_NanoMSMARCO_cosine_precision@3": 0.12,
"eval_NanoMSMARCO_cosine_precision@5": 0.1,
"eval_NanoMSMARCO_cosine_recall@1": 0.2,
"eval_NanoMSMARCO_cosine_recall@10": 0.6,
"eval_NanoMSMARCO_cosine_recall@3": 0.36,
"eval_NanoMSMARCO_cosine_recall@5": 0.5,
"eval_mse-dev_negative_mse": -111.91387176513672,
"eval_runtime": 11.9368,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.3792944580347569,
"eval_steps_per_second": 0.0,
"step": 15000
},
{
"epoch": 0.28632105881906783,
"grad_norm": 0.9433382749557495,
"learning_rate": 9.521005393561436e-05,
"loss": 1.124610137939453,
"step": 15100
},
{
"epoch": 0.288217224771512,
"grad_norm": 0.880102276802063,
"learning_rate": 9.510471093881679e-05,
"loss": 1.1151537322998046,
"step": 15200
},
{
"epoch": 0.29011339072395614,
"grad_norm": 0.8995987772941589,
"learning_rate": 9.499936794201922e-05,
"loss": 1.119567642211914,
"step": 15300
},
{
"epoch": 0.29200955667640033,
"grad_norm": 0.7987125515937805,
"learning_rate": 9.489402494522165e-05,
"loss": 1.109741439819336,
"step": 15400
},
{
"epoch": 0.29390572262884446,
"grad_norm": 0.8933894038200378,
"learning_rate": 9.478868194842407e-05,
"loss": 1.106731185913086,
"step": 15500
},
{
"epoch": 0.29580188858128864,
"grad_norm": 0.9454442858695984,
"learning_rate": 9.468333895162649e-05,
"loss": 1.0994451904296876,
"step": 15600
},
{
"epoch": 0.29769805453373277,
"grad_norm": 0.9284511804580688,
"learning_rate": 9.457799595482893e-05,
"loss": 1.107660446166992,
"step": 15700
},
{
"epoch": 0.29959422048617695,
"grad_norm": 0.9509237408638,
"learning_rate": 9.447265295803135e-05,
"loss": 1.1057376098632812,
"step": 15800
},
{
"epoch": 0.3014903864386211,
"grad_norm": 0.8351031541824341,
"learning_rate": 9.436730996123379e-05,
"loss": 1.0948797607421874,
"step": 15900
},
{
"epoch": 0.30338655239106527,
"grad_norm": 0.9255380034446716,
"learning_rate": 9.426196696443621e-05,
"loss": 1.0980982208251953,
"step": 16000
},
{
"epoch": 0.30338655239106527,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.28,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.65,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.45,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.54,
"eval_NanoBEIR_mean_cosine_map@100": 0.3266167689165571,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.3881944444444445,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.38609049913548005,
"eval_NanoBEIR_mean_cosine_precision@1": 0.28,
"eval_NanoBEIR_mean_cosine_precision@10": 0.07799999999999999,
"eval_NanoBEIR_mean_cosine_precision@3": 0.16666666666666669,
"eval_NanoBEIR_mean_cosine_precision@5": 0.12200000000000003,
"eval_NanoBEIR_mean_cosine_recall@1": 0.185,
"eval_NanoBEIR_mean_cosine_recall@10": 0.5449999999999999,
"eval_NanoBEIR_mean_cosine_recall@3": 0.35,
"eval_NanoBEIR_mean_cosine_recall@5": 0.43,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.38,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.68,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.5,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.58,
"eval_NanoHotpotQA_cosine_map@100": 0.3233119770078725,
"eval_NanoHotpotQA_cosine_mrr@10": 0.46327777777777784,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.38549342229017597,
"eval_NanoHotpotQA_cosine_precision@1": 0.38,
"eval_NanoHotpotQA_cosine_precision@10": 0.09399999999999999,
"eval_NanoHotpotQA_cosine_precision@3": 0.2,
"eval_NanoHotpotQA_cosine_precision@5": 0.14400000000000002,
"eval_NanoHotpotQA_cosine_recall@1": 0.19,
"eval_NanoHotpotQA_cosine_recall@10": 0.47,
"eval_NanoHotpotQA_cosine_recall@3": 0.3,
"eval_NanoHotpotQA_cosine_recall@5": 0.36,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.18,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.62,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.4,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.5,
"eval_NanoMSMARCO_cosine_map@100": 0.32992156082524177,
"eval_NanoMSMARCO_cosine_mrr@10": 0.3131111111111111,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.3866875759807841,
"eval_NanoMSMARCO_cosine_precision@1": 0.18,
"eval_NanoMSMARCO_cosine_precision@10": 0.062,
"eval_NanoMSMARCO_cosine_precision@3": 0.13333333333333333,
"eval_NanoMSMARCO_cosine_precision@5": 0.10000000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.18,
"eval_NanoMSMARCO_cosine_recall@10": 0.62,
"eval_NanoMSMARCO_cosine_recall@3": 0.4,
"eval_NanoMSMARCO_cosine_recall@5": 0.5,
"eval_mse-dev_negative_mse": -109.29944610595703,
"eval_runtime": 13.2813,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.38609049913548005,
"eval_steps_per_second": 0.0,
"step": 16000
},
{
"epoch": 0.30528271834350945,
"grad_norm": 0.9251424670219421,
"learning_rate": 9.415662396763864e-05,
"loss": 1.0933486938476562,
"step": 16100
},
{
"epoch": 0.3071788842959536,
"grad_norm": 0.9098881483078003,
"learning_rate": 9.405128097084106e-05,
"loss": 1.0872834777832032,
"step": 16200
},
{
"epoch": 0.30907505024839776,
"grad_norm": 0.9585905075073242,
"learning_rate": 9.394593797404348e-05,
"loss": 1.0850564575195312,
"step": 16300
},
{
"epoch": 0.3109712162008419,
"grad_norm": 0.8983785510063171,
"learning_rate": 9.384059497724592e-05,
"loss": 1.0840210723876953,
"step": 16400
},
{
"epoch": 0.3128673821532861,
"grad_norm": 0.8971573114395142,
"learning_rate": 9.373525198044834e-05,
"loss": 1.0830884552001954,
"step": 16500
},
{
"epoch": 0.3147635481057302,
"grad_norm": 0.9502484202384949,
"learning_rate": 9.363096241361875e-05,
"loss": 1.0755316925048828,
"step": 16600
},
{
"epoch": 0.3166597140581744,
"grad_norm": 0.8195205330848694,
"learning_rate": 9.352561941682118e-05,
"loss": 1.0733245086669922,
"step": 16700
},
{
"epoch": 0.3185558800106185,
"grad_norm": 0.866369366645813,
"learning_rate": 9.34202764200236e-05,
"loss": 1.072414016723633,
"step": 16800
},
{
"epoch": 0.3204520459630627,
"grad_norm": 0.8804235458374023,
"learning_rate": 9.331493342322602e-05,
"loss": 1.069804458618164,
"step": 16900
},
{
"epoch": 0.32234821191550683,
"grad_norm": 0.8990177512168884,
"learning_rate": 9.320959042642845e-05,
"loss": 1.0709500122070312,
"step": 17000
},
{
"epoch": 0.32234821191550683,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.31,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.66,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.47,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.55,
"eval_NanoBEIR_mean_cosine_map@100": 0.35180720243649677,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.41262301587301586,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.4079069196220826,
"eval_NanoBEIR_mean_cosine_precision@1": 0.31,
"eval_NanoBEIR_mean_cosine_precision@10": 0.08,
"eval_NanoBEIR_mean_cosine_precision@3": 0.18,
"eval_NanoBEIR_mean_cosine_precision@5": 0.126,
"eval_NanoBEIR_mean_cosine_recall@1": 0.20500000000000002,
"eval_NanoBEIR_mean_cosine_recall@10": 0.56,
"eval_NanoBEIR_mean_cosine_recall@3": 0.38,
"eval_NanoBEIR_mean_cosine_recall@5": 0.445,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.42,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.68,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.5,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.58,
"eval_NanoHotpotQA_cosine_map@100": 0.3515281128331601,
"eval_NanoHotpotQA_cosine_mrr@10": 0.48841269841269835,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.40662948657099507,
"eval_NanoHotpotQA_cosine_precision@1": 0.42,
"eval_NanoHotpotQA_cosine_precision@10": 0.096,
"eval_NanoHotpotQA_cosine_precision@3": 0.21333333333333332,
"eval_NanoHotpotQA_cosine_precision@5": 0.14800000000000002,
"eval_NanoHotpotQA_cosine_recall@1": 0.21,
"eval_NanoHotpotQA_cosine_recall@10": 0.48,
"eval_NanoHotpotQA_cosine_recall@3": 0.32,
"eval_NanoHotpotQA_cosine_recall@5": 0.37,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.2,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.64,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.44,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.52,
"eval_NanoMSMARCO_cosine_map@100": 0.35208629203983344,
"eval_NanoMSMARCO_cosine_mrr@10": 0.3368333333333334,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.4091843526731701,
"eval_NanoMSMARCO_cosine_precision@1": 0.2,
"eval_NanoMSMARCO_cosine_precision@10": 0.064,
"eval_NanoMSMARCO_cosine_precision@3": 0.14666666666666667,
"eval_NanoMSMARCO_cosine_precision@5": 0.10400000000000001,
"eval_NanoMSMARCO_cosine_recall@1": 0.2,
"eval_NanoMSMARCO_cosine_recall@10": 0.64,
"eval_NanoMSMARCO_cosine_recall@3": 0.44,
"eval_NanoMSMARCO_cosine_recall@5": 0.52,
"eval_mse-dev_negative_mse": -106.3768539428711,
"eval_runtime": 10.2249,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.4079069196220826,
"eval_steps_per_second": 0.0,
"step": 17000
},
{
"epoch": 0.324244377867951,
"grad_norm": 0.9269504547119141,
"learning_rate": 9.310424742963088e-05,
"loss": 1.0698513031005858,
"step": 17100
},
{
"epoch": 0.32614054382039515,
"grad_norm": 0.9297342896461487,
"learning_rate": 9.29989044328333e-05,
"loss": 1.06423828125,
"step": 17200
},
{
"epoch": 0.32803670977283933,
"grad_norm": 0.8609415292739868,
"learning_rate": 9.289356143603573e-05,
"loss": 1.0575923919677734,
"step": 17300
},
{
"epoch": 0.32993287572528346,
"grad_norm": 0.9494638442993164,
"learning_rate": 9.278821843923817e-05,
"loss": 1.059657211303711,
"step": 17400
},
{
"epoch": 0.33182904167772764,
"grad_norm": 0.9297378063201904,
"learning_rate": 9.268287544244059e-05,
"loss": 1.0571788024902344,
"step": 17500
},
{
"epoch": 0.33372520763017177,
"grad_norm": 0.8993592262268066,
"learning_rate": 9.257753244564303e-05,
"loss": 1.0546926879882812,
"step": 17600
},
{
"epoch": 0.33562137358261596,
"grad_norm": 0.8981407880783081,
"learning_rate": 9.247218944884545e-05,
"loss": 1.0501728057861328,
"step": 17700
},
{
"epoch": 0.3375175395350601,
"grad_norm": 0.8592208623886108,
"learning_rate": 9.236684645204787e-05,
"loss": 1.0466949462890625,
"step": 17800
},
{
"epoch": 0.33941370548750427,
"grad_norm": 0.8278118371963501,
"learning_rate": 9.22615034552503e-05,
"loss": 1.0484512329101563,
"step": 17900
},
{
"epoch": 0.3413098714399484,
"grad_norm": 0.8379432559013367,
"learning_rate": 9.215616045845272e-05,
"loss": 1.0455326843261719,
"step": 18000
},
{
"epoch": 0.3413098714399484,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.31,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.71,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.51,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.6,
"eval_NanoBEIR_mean_cosine_map@100": 0.3693867458958786,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.43332936507936504,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.43735880478726147,
"eval_NanoBEIR_mean_cosine_precision@1": 0.31,
"eval_NanoBEIR_mean_cosine_precision@10": 0.087,
"eval_NanoBEIR_mean_cosine_precision@3": 0.19333333333333333,
"eval_NanoBEIR_mean_cosine_precision@5": 0.138,
"eval_NanoBEIR_mean_cosine_recall@1": 0.21000000000000002,
"eval_NanoBEIR_mean_cosine_recall@10": 0.61,
"eval_NanoBEIR_mean_cosine_recall@3": 0.41000000000000003,
"eval_NanoBEIR_mean_cosine_recall@5": 0.49,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.4,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.72,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.54,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.62,
"eval_NanoHotpotQA_cosine_map@100": 0.3550150187843317,
"eval_NanoHotpotQA_cosine_mrr@10": 0.4942460317460317,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.4237044581505819,
"eval_NanoHotpotQA_cosine_precision@1": 0.4,
"eval_NanoHotpotQA_cosine_precision@10": 0.10399999999999998,
"eval_NanoHotpotQA_cosine_precision@3": 0.22666666666666668,
"eval_NanoHotpotQA_cosine_precision@5": 0.16,
"eval_NanoHotpotQA_cosine_recall@1": 0.2,
"eval_NanoHotpotQA_cosine_recall@10": 0.52,
"eval_NanoHotpotQA_cosine_recall@3": 0.34,
"eval_NanoHotpotQA_cosine_recall@5": 0.4,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.22,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.7,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.48,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.58,
"eval_NanoMSMARCO_cosine_map@100": 0.3837584730074255,
"eval_NanoMSMARCO_cosine_mrr@10": 0.3724126984126984,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.45101315142394105,
"eval_NanoMSMARCO_cosine_precision@1": 0.22,
"eval_NanoMSMARCO_cosine_precision@10": 0.07,
"eval_NanoMSMARCO_cosine_precision@3": 0.15999999999999998,
"eval_NanoMSMARCO_cosine_precision@5": 0.11599999999999999,
"eval_NanoMSMARCO_cosine_recall@1": 0.22,
"eval_NanoMSMARCO_cosine_recall@10": 0.7,
"eval_NanoMSMARCO_cosine_recall@3": 0.48,
"eval_NanoMSMARCO_cosine_recall@5": 0.58,
"eval_mse-dev_negative_mse": -103.76982879638672,
"eval_runtime": 10.0162,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.43735880478726147,
"eval_steps_per_second": 0.0,
"step": 18000
},
{
"epoch": 0.3432060373923926,
"grad_norm": 0.8932205438613892,
"learning_rate": 9.205081746165516e-05,
"loss": 1.0432756805419923,
"step": 18100
},
{
"epoch": 0.34510220334483677,
"grad_norm": 0.9294377565383911,
"learning_rate": 9.194547446485758e-05,
"loss": 1.0403505706787108,
"step": 18200
},
{
"epoch": 0.3469983692972809,
"grad_norm": 0.8712144494056702,
"learning_rate": 9.184013146806e-05,
"loss": 1.0396759796142578,
"step": 18300
},
{
"epoch": 0.3488945352497251,
"grad_norm": 0.8681181073188782,
"learning_rate": 9.173478847126243e-05,
"loss": 1.0351734161376953,
"step": 18400
},
{
"epoch": 0.3507907012021692,
"grad_norm": 0.8668209910392761,
"learning_rate": 9.162944547446487e-05,
"loss": 1.0318231964111328,
"step": 18500
},
{
"epoch": 0.3526868671546134,
"grad_norm": 0.9021549224853516,
"learning_rate": 9.152410247766729e-05,
"loss": 1.0302366638183593,
"step": 18600
},
{
"epoch": 0.3545830331070575,
"grad_norm": 0.8724125623703003,
"learning_rate": 9.141875948086973e-05,
"loss": 1.0330332183837891,
"step": 18700
},
{
"epoch": 0.3564791990595017,
"grad_norm": 0.9171428680419922,
"learning_rate": 9.131446991404012e-05,
"loss": 1.0219937896728515,
"step": 18800
},
{
"epoch": 0.35837536501194583,
"grad_norm": 0.8523043394088745,
"learning_rate": 9.120912691724254e-05,
"loss": 1.0223383331298828,
"step": 18900
},
{
"epoch": 0.36027153096439,
"grad_norm": 0.8599100112915039,
"learning_rate": 9.110378392044497e-05,
"loss": 1.0254383087158203,
"step": 19000
},
{
"epoch": 0.36027153096439,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.32,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.6799999999999999,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.55,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.63,
"eval_NanoBEIR_mean_cosine_map@100": 0.3761081537709,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.43933333333333335,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.43521214929807284,
"eval_NanoBEIR_mean_cosine_precision@1": 0.32,
"eval_NanoBEIR_mean_cosine_precision@10": 0.08399999999999999,
"eval_NanoBEIR_mean_cosine_precision@3": 0.2033333333333333,
"eval_NanoBEIR_mean_cosine_precision@5": 0.14600000000000002,
"eval_NanoBEIR_mean_cosine_recall@1": 0.22,
"eval_NanoBEIR_mean_cosine_recall@10": 0.5800000000000001,
"eval_NanoBEIR_mean_cosine_recall@3": 0.435,
"eval_NanoBEIR_mean_cosine_recall@5": 0.515,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.4,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.72,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.58,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.66,
"eval_NanoHotpotQA_cosine_map@100": 0.3567383904240635,
"eval_NanoHotpotQA_cosine_mrr@10": 0.49855555555555553,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.426475283640488,
"eval_NanoHotpotQA_cosine_precision@1": 0.4,
"eval_NanoHotpotQA_cosine_precision@10": 0.10399999999999998,
"eval_NanoHotpotQA_cosine_precision@3": 0.2333333333333333,
"eval_NanoHotpotQA_cosine_precision@5": 0.172,
"eval_NanoHotpotQA_cosine_recall@1": 0.2,
"eval_NanoHotpotQA_cosine_recall@10": 0.52,
"eval_NanoHotpotQA_cosine_recall@3": 0.35,
"eval_NanoHotpotQA_cosine_recall@5": 0.43,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.24,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.64,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.52,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.6,
"eval_NanoMSMARCO_cosine_map@100": 0.3954779171177365,
"eval_NanoMSMARCO_cosine_mrr@10": 0.3801111111111111,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.44394901495565775,
"eval_NanoMSMARCO_cosine_precision@1": 0.24,
"eval_NanoMSMARCO_cosine_precision@10": 0.064,
"eval_NanoMSMARCO_cosine_precision@3": 0.1733333333333333,
"eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.24,
"eval_NanoMSMARCO_cosine_recall@10": 0.64,
"eval_NanoMSMARCO_cosine_recall@3": 0.52,
"eval_NanoMSMARCO_cosine_recall@5": 0.6,
"eval_mse-dev_negative_mse": -101.57431030273438,
"eval_runtime": 10.982,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.43521214929807284,
"eval_steps_per_second": 0.0,
"step": 19000
},
{
"epoch": 0.36216769691683415,
"grad_norm": 0.9670674204826355,
"learning_rate": 9.09984409236474e-05,
"loss": 1.0186353302001954,
"step": 19100
},
{
"epoch": 0.36406386286927833,
"grad_norm": 0.8563957810401917,
"learning_rate": 9.089309792684983e-05,
"loss": 1.0215565490722656,
"step": 19200
},
{
"epoch": 0.36596002882172246,
"grad_norm": 0.9011367559432983,
"learning_rate": 9.078775493005225e-05,
"loss": 1.0152357482910157,
"step": 19300
},
{
"epoch": 0.36785619477416664,
"grad_norm": 0.8407337665557861,
"learning_rate": 9.068241193325469e-05,
"loss": 1.0139485931396484,
"step": 19400
},
{
"epoch": 0.3697523607266108,
"grad_norm": 0.8842604756355286,
"learning_rate": 9.057706893645711e-05,
"loss": 1.0125227355957032,
"step": 19500
},
{
"epoch": 0.37164852667905496,
"grad_norm": 0.9665144085884094,
"learning_rate": 9.047172593965954e-05,
"loss": 1.008692398071289,
"step": 19600
},
{
"epoch": 0.3735446926314991,
"grad_norm": 0.8938872218132019,
"learning_rate": 9.036638294286196e-05,
"loss": 1.0044830322265625,
"step": 19700
},
{
"epoch": 0.37544085858394327,
"grad_norm": 0.8201034069061279,
"learning_rate": 9.026103994606438e-05,
"loss": 1.0031690979003907,
"step": 19800
},
{
"epoch": 0.3773370245363874,
"grad_norm": 0.8051674365997314,
"learning_rate": 9.015569694926682e-05,
"loss": 1.001277542114258,
"step": 19900
},
{
"epoch": 0.3792331904888316,
"grad_norm": 0.8701341152191162,
"learning_rate": 9.005035395246924e-05,
"loss": 1.001656494140625,
"step": 20000
},
{
"epoch": 0.3792331904888316,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.35,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.71,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.53,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.6000000000000001,
"eval_NanoBEIR_mean_cosine_map@100": 0.3885181482447372,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.45785714285714285,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.4463748112002441,
"eval_NanoBEIR_mean_cosine_precision@1": 0.35,
"eval_NanoBEIR_mean_cosine_precision@10": 0.08499999999999999,
"eval_NanoBEIR_mean_cosine_precision@3": 0.2,
"eval_NanoBEIR_mean_cosine_precision@5": 0.14,
"eval_NanoBEIR_mean_cosine_recall@1": 0.24,
"eval_NanoBEIR_mean_cosine_recall@10": 0.5900000000000001,
"eval_NanoBEIR_mean_cosine_recall@3": 0.425,
"eval_NanoBEIR_mean_cosine_recall@5": 0.49,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.44,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.76,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.56,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.64,
"eval_NanoHotpotQA_cosine_map@100": 0.37180551870545886,
"eval_NanoHotpotQA_cosine_mrr@10": 0.5252698412698412,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.43738455459561965,
"eval_NanoHotpotQA_cosine_precision@1": 0.44,
"eval_NanoHotpotQA_cosine_precision@10": 0.10399999999999998,
"eval_NanoHotpotQA_cosine_precision@3": 0.2333333333333333,
"eval_NanoHotpotQA_cosine_precision@5": 0.16799999999999998,
"eval_NanoHotpotQA_cosine_recall@1": 0.22,
"eval_NanoHotpotQA_cosine_recall@10": 0.52,
"eval_NanoHotpotQA_cosine_recall@3": 0.35,
"eval_NanoHotpotQA_cosine_recall@5": 0.42,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.26,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.66,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.5,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.56,
"eval_NanoMSMARCO_cosine_map@100": 0.40523077778401556,
"eval_NanoMSMARCO_cosine_mrr@10": 0.39044444444444454,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.4553650678048685,
"eval_NanoMSMARCO_cosine_precision@1": 0.26,
"eval_NanoMSMARCO_cosine_precision@10": 0.066,
"eval_NanoMSMARCO_cosine_precision@3": 0.16666666666666669,
"eval_NanoMSMARCO_cosine_precision@5": 0.11200000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.26,
"eval_NanoMSMARCO_cosine_recall@10": 0.66,
"eval_NanoMSMARCO_cosine_recall@3": 0.5,
"eval_NanoMSMARCO_cosine_recall@5": 0.56,
"eval_mse-dev_negative_mse": -99.66129302978516,
"eval_runtime": 11.4825,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.4463748112002441,
"eval_steps_per_second": 0.0,
"step": 20000
},
{
"epoch": 0.3811293564412757,
"grad_norm": 0.8422971963882446,
"learning_rate": 8.994501095567167e-05,
"loss": 1.0006825256347656,
"step": 20100
},
{
"epoch": 0.3830255223937199,
"grad_norm": 0.955066978931427,
"learning_rate": 8.98396679588741e-05,
"loss": 0.9958713531494141,
"step": 20200
},
{
"epoch": 0.3849216883461641,
"grad_norm": 0.8364739418029785,
"learning_rate": 8.973432496207653e-05,
"loss": 0.9965061950683594,
"step": 20300
},
{
"epoch": 0.3868178542986082,
"grad_norm": 0.9399869441986084,
"learning_rate": 8.962898196527896e-05,
"loss": 0.9909481048583985,
"step": 20400
},
{
"epoch": 0.3887140202510524,
"grad_norm": 0.8677252531051636,
"learning_rate": 8.952363896848139e-05,
"loss": 0.9901930236816406,
"step": 20500
},
{
"epoch": 0.3906101862034965,
"grad_norm": 0.8382641077041626,
"learning_rate": 8.941829597168381e-05,
"loss": 0.9903465270996094,
"step": 20600
},
{
"epoch": 0.3925063521559407,
"grad_norm": 0.9324244856834412,
"learning_rate": 8.931295297488623e-05,
"loss": 0.9927156829833984,
"step": 20700
},
{
"epoch": 0.39440251810838484,
"grad_norm": 0.9975899457931519,
"learning_rate": 8.920760997808866e-05,
"loss": 0.9864664459228516,
"step": 20800
},
{
"epoch": 0.396298684060829,
"grad_norm": 0.8882135152816772,
"learning_rate": 8.910226698129108e-05,
"loss": 0.9842584991455078,
"step": 20900
},
{
"epoch": 0.39819485001327315,
"grad_norm": 0.8512315154075623,
"learning_rate": 8.899692398449352e-05,
"loss": 0.9808792114257813,
"step": 21000
},
{
"epoch": 0.39819485001327315,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.35,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.72,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.51,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.62,
"eval_NanoBEIR_mean_cosine_map@100": 0.3971597947674501,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.45810317460317457,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.4575468587350072,
"eval_NanoBEIR_mean_cosine_precision@1": 0.35,
"eval_NanoBEIR_mean_cosine_precision@10": 0.088,
"eval_NanoBEIR_mean_cosine_precision@3": 0.2,
"eval_NanoBEIR_mean_cosine_precision@5": 0.15,
"eval_NanoBEIR_mean_cosine_recall@1": 0.245,
"eval_NanoBEIR_mean_cosine_recall@10": 0.6100000000000001,
"eval_NanoBEIR_mean_cosine_recall@3": 0.41500000000000004,
"eval_NanoBEIR_mean_cosine_recall@5": 0.52,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.42,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.76,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.56,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.66,
"eval_NanoHotpotQA_cosine_map@100": 0.3788968895489927,
"eval_NanoHotpotQA_cosine_mrr@10": 0.5139603174603175,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.4462090585062046,
"eval_NanoHotpotQA_cosine_precision@1": 0.42,
"eval_NanoHotpotQA_cosine_precision@10": 0.10799999999999998,
"eval_NanoHotpotQA_cosine_precision@3": 0.24666666666666667,
"eval_NanoHotpotQA_cosine_precision@5": 0.184,
"eval_NanoHotpotQA_cosine_recall@1": 0.21,
"eval_NanoHotpotQA_cosine_recall@10": 0.54,
"eval_NanoHotpotQA_cosine_recall@3": 0.37,
"eval_NanoHotpotQA_cosine_recall@5": 0.46,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.28,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.68,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.46,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.58,
"eval_NanoMSMARCO_cosine_map@100": 0.41542269998590753,
"eval_NanoMSMARCO_cosine_mrr@10": 0.4022460317460317,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.46888465896380976,
"eval_NanoMSMARCO_cosine_precision@1": 0.28,
"eval_NanoMSMARCO_cosine_precision@10": 0.068,
"eval_NanoMSMARCO_cosine_precision@3": 0.15333333333333332,
"eval_NanoMSMARCO_cosine_precision@5": 0.11599999999999999,
"eval_NanoMSMARCO_cosine_recall@1": 0.28,
"eval_NanoMSMARCO_cosine_recall@10": 0.68,
"eval_NanoMSMARCO_cosine_recall@3": 0.46,
"eval_NanoMSMARCO_cosine_recall@5": 0.58,
"eval_mse-dev_negative_mse": -97.49221801757812,
"eval_runtime": 11.4324,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.4575468587350072,
"eval_steps_per_second": 0.0,
"step": 21000
},
{
"epoch": 0.40009101596571733,
"grad_norm": 0.7803339958190918,
"learning_rate": 8.889158098769594e-05,
"loss": 0.9800699615478515,
"step": 21100
},
{
"epoch": 0.40198718191816146,
"grad_norm": 0.8740707635879517,
"learning_rate": 8.878623799089836e-05,
"loss": 0.9785236358642578,
"step": 21200
},
{
"epoch": 0.40388334787060565,
"grad_norm": 0.9020572304725647,
"learning_rate": 8.868089499410079e-05,
"loss": 0.9718000793457031,
"step": 21300
},
{
"epoch": 0.4057795138230498,
"grad_norm": 0.8485739827156067,
"learning_rate": 8.857555199730322e-05,
"loss": 0.9725127410888672,
"step": 21400
},
{
"epoch": 0.40767567977549396,
"grad_norm": 0.9113863110542297,
"learning_rate": 8.847020900050565e-05,
"loss": 0.9704845428466797,
"step": 21500
},
{
"epoch": 0.4095718457279381,
"grad_norm": 0.9105412364006042,
"learning_rate": 8.836486600370809e-05,
"loss": 0.9728768157958985,
"step": 21600
},
{
"epoch": 0.4114680116803823,
"grad_norm": 0.9580652713775635,
"learning_rate": 8.825952300691051e-05,
"loss": 0.9713729095458984,
"step": 21700
},
{
"epoch": 0.4133641776328264,
"grad_norm": 0.863349199295044,
"learning_rate": 8.815418001011293e-05,
"loss": 0.9646768951416016,
"step": 21800
},
{
"epoch": 0.4152603435852706,
"grad_norm": 0.8929393291473389,
"learning_rate": 8.804883701331536e-05,
"loss": 0.9623196411132813,
"step": 21900
},
{
"epoch": 0.4171565095377147,
"grad_norm": 0.8821211457252502,
"learning_rate": 8.794349401651779e-05,
"loss": 0.9578647613525391,
"step": 22000
},
{
"epoch": 0.4171565095377147,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.35,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.72,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.55,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.63,
"eval_NanoBEIR_mean_cosine_map@100": 0.398172306882256,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.46505158730158724,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.4595267255374118,
"eval_NanoBEIR_mean_cosine_precision@1": 0.35,
"eval_NanoBEIR_mean_cosine_precision@10": 0.08800000000000001,
"eval_NanoBEIR_mean_cosine_precision@3": 0.20999999999999996,
"eval_NanoBEIR_mean_cosine_precision@5": 0.15,
"eval_NanoBEIR_mean_cosine_recall@1": 0.24,
"eval_NanoBEIR_mean_cosine_recall@10": 0.6100000000000001,
"eval_NanoBEIR_mean_cosine_recall@3": 0.435,
"eval_NanoBEIR_mean_cosine_recall@5": 0.52,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.44,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.76,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.62,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.68,
"eval_NanoHotpotQA_cosine_map@100": 0.38667158218589576,
"eval_NanoHotpotQA_cosine_mrr@10": 0.5341904761904762,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.454887980345426,
"eval_NanoHotpotQA_cosine_precision@1": 0.44,
"eval_NanoHotpotQA_cosine_precision@10": 0.10800000000000001,
"eval_NanoHotpotQA_cosine_precision@3": 0.25999999999999995,
"eval_NanoHotpotQA_cosine_precision@5": 0.184,
"eval_NanoHotpotQA_cosine_recall@1": 0.22,
"eval_NanoHotpotQA_cosine_recall@10": 0.54,
"eval_NanoHotpotQA_cosine_recall@3": 0.39,
"eval_NanoHotpotQA_cosine_recall@5": 0.46,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.26,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.68,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.48,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.58,
"eval_NanoMSMARCO_cosine_map@100": 0.40967303157861634,
"eval_NanoMSMARCO_cosine_mrr@10": 0.39591269841269827,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.46416547072939757,
"eval_NanoMSMARCO_cosine_precision@1": 0.26,
"eval_NanoMSMARCO_cosine_precision@10": 0.068,
"eval_NanoMSMARCO_cosine_precision@3": 0.15999999999999998,
"eval_NanoMSMARCO_cosine_precision@5": 0.11599999999999999,
"eval_NanoMSMARCO_cosine_recall@1": 0.26,
"eval_NanoMSMARCO_cosine_recall@10": 0.68,
"eval_NanoMSMARCO_cosine_recall@3": 0.48,
"eval_NanoMSMARCO_cosine_recall@5": 0.58,
"eval_mse-dev_negative_mse": -95.78128814697266,
"eval_runtime": 11.0251,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.4595267255374118,
"eval_steps_per_second": 0.0,
"step": 22000
},
{
"epoch": 0.4190526754901589,
"grad_norm": 0.8627265095710754,
"learning_rate": 8.783815101972022e-05,
"loss": 0.9553171539306641,
"step": 22100
},
{
"epoch": 0.4209488414426031,
"grad_norm": 0.8205426931381226,
"learning_rate": 8.773280802292264e-05,
"loss": 0.9557749176025391,
"step": 22200
},
{
"epoch": 0.4228450073950472,
"grad_norm": 0.8694571256637573,
"learning_rate": 8.762746502612506e-05,
"loss": 0.9584300994873047,
"step": 22300
},
{
"epoch": 0.4247411733474914,
"grad_norm": 0.8678444623947144,
"learning_rate": 8.752212202932749e-05,
"loss": 0.9544028472900391,
"step": 22400
},
{
"epoch": 0.4266373392999355,
"grad_norm": 0.8822008967399597,
"learning_rate": 8.741677903252991e-05,
"loss": 0.9520068359375,
"step": 22500
},
{
"epoch": 0.4285335052523797,
"grad_norm": 0.951594352722168,
"learning_rate": 8.731143603573235e-05,
"loss": 0.9515534210205078,
"step": 22600
},
{
"epoch": 0.43042967120482384,
"grad_norm": 0.9522872567176819,
"learning_rate": 8.720714646890276e-05,
"loss": 0.9542991638183593,
"step": 22700
},
{
"epoch": 0.432325837157268,
"grad_norm": 0.9078388214111328,
"learning_rate": 8.710180347210518e-05,
"loss": 0.9501979064941406,
"step": 22800
},
{
"epoch": 0.43422200310971215,
"grad_norm": 0.8574204444885254,
"learning_rate": 8.69964604753076e-05,
"loss": 0.9476995086669922,
"step": 22900
},
{
"epoch": 0.43611816906215634,
"grad_norm": 0.8338425159454346,
"learning_rate": 8.689111747851003e-05,
"loss": 0.940532455444336,
"step": 23000
},
{
"epoch": 0.43611816906215634,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.39,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.71,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.5700000000000001,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.61,
"eval_NanoBEIR_mean_cosine_map@100": 0.4144241147192019,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.4880277777777777,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.4688451653582658,
"eval_NanoBEIR_mean_cosine_precision@1": 0.39,
"eval_NanoBEIR_mean_cosine_precision@10": 0.086,
"eval_NanoBEIR_mean_cosine_precision@3": 0.21333333333333332,
"eval_NanoBEIR_mean_cosine_precision@5": 0.14399999999999996,
"eval_NanoBEIR_mean_cosine_recall@1": 0.275,
"eval_NanoBEIR_mean_cosine_recall@10": 0.595,
"eval_NanoBEIR_mean_cosine_recall@3": 0.445,
"eval_NanoBEIR_mean_cosine_recall@5": 0.505,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.46,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.76,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.64,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.64,
"eval_NanoHotpotQA_cosine_map@100": 0.38426119225152133,
"eval_NanoHotpotQA_cosine_mrr@10": 0.5458888888888889,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.4520622291124691,
"eval_NanoHotpotQA_cosine_precision@1": 0.46,
"eval_NanoHotpotQA_cosine_precision@10": 0.10599999999999998,
"eval_NanoHotpotQA_cosine_precision@3": 0.25999999999999995,
"eval_NanoHotpotQA_cosine_precision@5": 0.17199999999999996,
"eval_NanoHotpotQA_cosine_recall@1": 0.23,
"eval_NanoHotpotQA_cosine_recall@10": 0.53,
"eval_NanoHotpotQA_cosine_recall@3": 0.39,
"eval_NanoHotpotQA_cosine_recall@5": 0.43,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.32,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.66,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.5,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.58,
"eval_NanoMSMARCO_cosine_map@100": 0.4445870371868824,
"eval_NanoMSMARCO_cosine_mrr@10": 0.4301666666666666,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.48562810160406256,
"eval_NanoMSMARCO_cosine_precision@1": 0.32,
"eval_NanoMSMARCO_cosine_precision@10": 0.066,
"eval_NanoMSMARCO_cosine_precision@3": 0.16666666666666669,
"eval_NanoMSMARCO_cosine_precision@5": 0.11599999999999999,
"eval_NanoMSMARCO_cosine_recall@1": 0.32,
"eval_NanoMSMARCO_cosine_recall@10": 0.66,
"eval_NanoMSMARCO_cosine_recall@3": 0.5,
"eval_NanoMSMARCO_cosine_recall@5": 0.58,
"eval_mse-dev_negative_mse": -93.92383575439453,
"eval_runtime": 11.5971,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.4688451653582658,
"eval_steps_per_second": 0.0,
"step": 23000
},
{
"epoch": 0.43801433501460046,
"grad_norm": 0.8566615581512451,
"learning_rate": 8.678577448171246e-05,
"loss": 0.9448033142089843,
"step": 23100
},
{
"epoch": 0.43991050096704465,
"grad_norm": 0.8012374639511108,
"learning_rate": 8.668043148491489e-05,
"loss": 0.9424338531494141,
"step": 23200
},
{
"epoch": 0.4418066669194888,
"grad_norm": 0.8802723288536072,
"learning_rate": 8.657508848811732e-05,
"loss": 0.9369033050537109,
"step": 23300
},
{
"epoch": 0.44370283287193296,
"grad_norm": 0.814888596534729,
"learning_rate": 8.646974549131975e-05,
"loss": 0.93183837890625,
"step": 23400
},
{
"epoch": 0.4455989988243771,
"grad_norm": 0.8690612316131592,
"learning_rate": 8.636440249452217e-05,
"loss": 0.9342401123046875,
"step": 23500
},
{
"epoch": 0.4474951647768213,
"grad_norm": 0.8208878040313721,
"learning_rate": 8.625905949772459e-05,
"loss": 0.9391999053955078,
"step": 23600
},
{
"epoch": 0.4493913307292654,
"grad_norm": 0.8126626014709473,
"learning_rate": 8.615371650092702e-05,
"loss": 0.9358238983154297,
"step": 23700
},
{
"epoch": 0.4512874966817096,
"grad_norm": 0.8614762425422668,
"learning_rate": 8.604837350412945e-05,
"loss": 0.9303498077392578,
"step": 23800
},
{
"epoch": 0.4531836626341537,
"grad_norm": 0.8028171062469482,
"learning_rate": 8.594408393729986e-05,
"loss": 0.9305805969238281,
"step": 23900
},
{
"epoch": 0.4550798285865979,
"grad_norm": 0.8973707556724548,
"learning_rate": 8.583874094050229e-05,
"loss": 0.927711410522461,
"step": 24000
},
{
"epoch": 0.4550798285865979,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.39,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.73,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.59,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.6599999999999999,
"eval_NanoBEIR_mean_cosine_map@100": 0.4316413980898389,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.5037738095238096,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.4871808124834046,
"eval_NanoBEIR_mean_cosine_precision@1": 0.39,
"eval_NanoBEIR_mean_cosine_precision@10": 0.08900000000000001,
"eval_NanoBEIR_mean_cosine_precision@3": 0.22666666666666666,
"eval_NanoBEIR_mean_cosine_precision@5": 0.15999999999999998,
"eval_NanoBEIR_mean_cosine_recall@1": 0.275,
"eval_NanoBEIR_mean_cosine_recall@10": 0.62,
"eval_NanoBEIR_mean_cosine_recall@3": 0.455,
"eval_NanoBEIR_mean_cosine_recall@5": 0.5449999999999999,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.46,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.76,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.72,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.74,
"eval_NanoHotpotQA_cosine_map@100": 0.41902772468451716,
"eval_NanoHotpotQA_cosine_mrr@10": 0.5768571428571428,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.479755445861627,
"eval_NanoHotpotQA_cosine_precision@1": 0.46,
"eval_NanoHotpotQA_cosine_precision@10": 0.10800000000000001,
"eval_NanoHotpotQA_cosine_precision@3": 0.3,
"eval_NanoHotpotQA_cosine_precision@5": 0.204,
"eval_NanoHotpotQA_cosine_recall@1": 0.23,
"eval_NanoHotpotQA_cosine_recall@10": 0.54,
"eval_NanoHotpotQA_cosine_recall@3": 0.45,
"eval_NanoHotpotQA_cosine_recall@5": 0.51,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.32,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.7,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.46,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.58,
"eval_NanoMSMARCO_cosine_map@100": 0.44425507149516064,
"eval_NanoMSMARCO_cosine_mrr@10": 0.4306904761904762,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.49460617910518223,
"eval_NanoMSMARCO_cosine_precision@1": 0.32,
"eval_NanoMSMARCO_cosine_precision@10": 0.07,
"eval_NanoMSMARCO_cosine_precision@3": 0.15333333333333332,
"eval_NanoMSMARCO_cosine_precision@5": 0.11599999999999999,
"eval_NanoMSMARCO_cosine_recall@1": 0.32,
"eval_NanoMSMARCO_cosine_recall@10": 0.7,
"eval_NanoMSMARCO_cosine_recall@3": 0.46,
"eval_NanoMSMARCO_cosine_recall@5": 0.58,
"eval_mse-dev_negative_mse": -92.24274444580078,
"eval_runtime": 11.2493,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.4871808124834046,
"eval_steps_per_second": 0.0,
"step": 24000
},
{
"epoch": 0.45697599453904203,
"grad_norm": 0.8175747394561768,
"learning_rate": 8.573339794370471e-05,
"loss": 0.9266593170166015,
"step": 24100
},
{
"epoch": 0.4588721604914862,
"grad_norm": 0.9604556560516357,
"learning_rate": 8.562805494690713e-05,
"loss": 0.9227654266357422,
"step": 24200
},
{
"epoch": 0.4607683264439304,
"grad_norm": 0.82953941822052,
"learning_rate": 8.552271195010956e-05,
"loss": 0.9239090728759766,
"step": 24300
},
{
"epoch": 0.4626644923963745,
"grad_norm": 0.9319136142730713,
"learning_rate": 8.541736895331198e-05,
"loss": 0.9225330352783203,
"step": 24400
},
{
"epoch": 0.4645606583488187,
"grad_norm": 0.8900800943374634,
"learning_rate": 8.531202595651442e-05,
"loss": 0.9169361877441407,
"step": 24500
},
{
"epoch": 0.46645682430126284,
"grad_norm": 0.8238077759742737,
"learning_rate": 8.520668295971684e-05,
"loss": 0.9170392608642578,
"step": 24600
},
{
"epoch": 0.468352990253707,
"grad_norm": 0.9116878509521484,
"learning_rate": 8.510133996291926e-05,
"loss": 0.9195194244384766,
"step": 24700
},
{
"epoch": 0.47024915620615115,
"grad_norm": 0.8857290744781494,
"learning_rate": 8.49959969661217e-05,
"loss": 0.915346450805664,
"step": 24800
},
{
"epoch": 0.47214532215859534,
"grad_norm": 0.8089697360992432,
"learning_rate": 8.489065396932412e-05,
"loss": 0.9137913513183594,
"step": 24900
},
{
"epoch": 0.47404148811103947,
"grad_norm": 0.9027810096740723,
"learning_rate": 8.478531097252656e-05,
"loss": 0.9107527923583985,
"step": 25000
},
{
"epoch": 0.47404148811103947,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.38,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.73,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.54,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.6599999999999999,
"eval_NanoBEIR_mean_cosine_map@100": 0.4163537056013121,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.4892301587301586,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.47172532243736104,
"eval_NanoBEIR_mean_cosine_precision@1": 0.38,
"eval_NanoBEIR_mean_cosine_precision@10": 0.088,
"eval_NanoBEIR_mean_cosine_precision@3": 0.21333333333333335,
"eval_NanoBEIR_mean_cosine_precision@5": 0.15800000000000003,
"eval_NanoBEIR_mean_cosine_recall@1": 0.26,
"eval_NanoBEIR_mean_cosine_recall@10": 0.605,
"eval_NanoBEIR_mean_cosine_recall@3": 0.43,
"eval_NanoBEIR_mean_cosine_recall@5": 0.5449999999999999,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.48,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.8,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.64,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.72,
"eval_NanoHotpotQA_cosine_map@100": 0.41568504331630274,
"eval_NanoHotpotQA_cosine_mrr@10": 0.5788571428571428,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.48124067192024733,
"eval_NanoHotpotQA_cosine_precision@1": 0.48,
"eval_NanoHotpotQA_cosine_precision@10": 0.11,
"eval_NanoHotpotQA_cosine_precision@3": 0.28,
"eval_NanoHotpotQA_cosine_precision@5": 0.196,
"eval_NanoHotpotQA_cosine_recall@1": 0.24,
"eval_NanoHotpotQA_cosine_recall@10": 0.55,
"eval_NanoHotpotQA_cosine_recall@3": 0.42,
"eval_NanoHotpotQA_cosine_recall@5": 0.49,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.28,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.66,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.44,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.6,
"eval_NanoMSMARCO_cosine_map@100": 0.4170223678863214,
"eval_NanoMSMARCO_cosine_mrr@10": 0.39960317460317446,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.46220997295447475,
"eval_NanoMSMARCO_cosine_precision@1": 0.28,
"eval_NanoMSMARCO_cosine_precision@10": 0.066,
"eval_NanoMSMARCO_cosine_precision@3": 0.14666666666666667,
"eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.28,
"eval_NanoMSMARCO_cosine_recall@10": 0.66,
"eval_NanoMSMARCO_cosine_recall@3": 0.44,
"eval_NanoMSMARCO_cosine_recall@5": 0.6,
"eval_mse-dev_negative_mse": -90.7634506225586,
"eval_runtime": 11.8991,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.47172532243736104,
"eval_steps_per_second": 0.0,
"step": 25000
},
{
"epoch": 0.47593765406348365,
"grad_norm": 0.8811827898025513,
"learning_rate": 8.467996797572898e-05,
"loss": 0.913282470703125,
"step": 25100
},
{
"epoch": 0.4778338200159278,
"grad_norm": 0.7843953967094421,
"learning_rate": 8.457462497893141e-05,
"loss": 0.9076313781738281,
"step": 25200
},
{
"epoch": 0.47972998596837196,
"grad_norm": 0.9090595245361328,
"learning_rate": 8.446928198213383e-05,
"loss": 0.9081029510498047,
"step": 25300
},
{
"epoch": 0.4816261519208161,
"grad_norm": 0.9231439828872681,
"learning_rate": 8.436393898533625e-05,
"loss": 0.9092655944824218,
"step": 25400
},
{
"epoch": 0.4835223178732603,
"grad_norm": 0.8273399472236633,
"learning_rate": 8.425859598853869e-05,
"loss": 0.9036608123779297,
"step": 25500
},
{
"epoch": 0.4854184838257044,
"grad_norm": 0.9115743637084961,
"learning_rate": 8.415325299174111e-05,
"loss": 0.9024863433837891,
"step": 25600
},
{
"epoch": 0.4873146497781486,
"grad_norm": 0.8682368993759155,
"learning_rate": 8.404790999494354e-05,
"loss": 0.9058139801025391,
"step": 25700
},
{
"epoch": 0.4892108157305927,
"grad_norm": 0.8775367140769958,
"learning_rate": 8.394256699814596e-05,
"loss": 0.901763916015625,
"step": 25800
},
{
"epoch": 0.4911069816830369,
"grad_norm": 0.8083050847053528,
"learning_rate": 8.383722400134838e-05,
"loss": 0.901358642578125,
"step": 25900
},
{
"epoch": 0.49300314763548103,
"grad_norm": 0.8163812160491943,
"learning_rate": 8.373188100455082e-05,
"loss": 0.8946353912353515,
"step": 26000
},
{
"epoch": 0.49300314763548103,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.38,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.73,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.56,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.6399999999999999,
"eval_NanoBEIR_mean_cosine_map@100": 0.424081987400691,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.49457936507936506,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.4851060192183279,
"eval_NanoBEIR_mean_cosine_precision@1": 0.38,
"eval_NanoBEIR_mean_cosine_precision@10": 0.092,
"eval_NanoBEIR_mean_cosine_precision@3": 0.22,
"eval_NanoBEIR_mean_cosine_precision@5": 0.158,
"eval_NanoBEIR_mean_cosine_recall@1": 0.26,
"eval_NanoBEIR_mean_cosine_recall@10": 0.63,
"eval_NanoBEIR_mean_cosine_recall@3": 0.445,
"eval_NanoBEIR_mean_cosine_recall@5": 0.54,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.48,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.78,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.66,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.7,
"eval_NanoHotpotQA_cosine_map@100": 0.42326811379287077,
"eval_NanoHotpotQA_cosine_mrr@10": 0.5793888888888888,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.4956820213676064,
"eval_NanoHotpotQA_cosine_precision@1": 0.48,
"eval_NanoHotpotQA_cosine_precision@10": 0.11599999999999998,
"eval_NanoHotpotQA_cosine_precision@3": 0.2866666666666667,
"eval_NanoHotpotQA_cosine_precision@5": 0.2,
"eval_NanoHotpotQA_cosine_recall@1": 0.24,
"eval_NanoHotpotQA_cosine_recall@10": 0.58,
"eval_NanoHotpotQA_cosine_recall@3": 0.43,
"eval_NanoHotpotQA_cosine_recall@5": 0.5,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.28,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.68,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.46,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.58,
"eval_NanoMSMARCO_cosine_map@100": 0.4248958610085112,
"eval_NanoMSMARCO_cosine_mrr@10": 0.40976984126984123,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.4745300170690494,
"eval_NanoMSMARCO_cosine_precision@1": 0.28,
"eval_NanoMSMARCO_cosine_precision@10": 0.068,
"eval_NanoMSMARCO_cosine_precision@3": 0.15333333333333332,
"eval_NanoMSMARCO_cosine_precision@5": 0.11599999999999999,
"eval_NanoMSMARCO_cosine_recall@1": 0.28,
"eval_NanoMSMARCO_cosine_recall@10": 0.68,
"eval_NanoMSMARCO_cosine_recall@3": 0.46,
"eval_NanoMSMARCO_cosine_recall@5": 0.58,
"eval_mse-dev_negative_mse": -89.25623321533203,
"eval_runtime": 10.0875,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.4851060192183279,
"eval_steps_per_second": 0.0,
"step": 26000
},
{
"epoch": 0.4948993135879252,
"grad_norm": 0.8695216178894043,
"learning_rate": 8.362653800775326e-05,
"loss": 0.8982176208496093,
"step": 26100
},
{
"epoch": 0.4967954795403694,
"grad_norm": 0.87025386095047,
"learning_rate": 8.352119501095568e-05,
"loss": 0.8945767211914063,
"step": 26200
},
{
"epoch": 0.49869164549281353,
"grad_norm": 0.8507541418075562,
"learning_rate": 8.34158520141581e-05,
"loss": 0.8941314697265625,
"step": 26300
},
{
"epoch": 0.5005878114452577,
"grad_norm": 0.9079861044883728,
"learning_rate": 8.33115624473285e-05,
"loss": 0.8925470733642578,
"step": 26400
},
{
"epoch": 0.5024839773977019,
"grad_norm": 0.8484945893287659,
"learning_rate": 8.320621945053094e-05,
"loss": 0.8947381591796875,
"step": 26500
},
{
"epoch": 0.504380143350146,
"grad_norm": 0.889153003692627,
"learning_rate": 8.310087645373336e-05,
"loss": 0.89056884765625,
"step": 26600
},
{
"epoch": 0.5062763093025902,
"grad_norm": 0.7697421312332153,
"learning_rate": 8.29955334569358e-05,
"loss": 0.889549560546875,
"step": 26700
},
{
"epoch": 0.5081724752550343,
"grad_norm": 0.8403399586677551,
"learning_rate": 8.289019046013822e-05,
"loss": 0.886633529663086,
"step": 26800
},
{
"epoch": 0.5100686412074785,
"grad_norm": 0.9034698009490967,
"learning_rate": 8.278484746334064e-05,
"loss": 0.8839826965332032,
"step": 26900
},
{
"epoch": 0.5119648071599227,
"grad_norm": 0.8018946051597595,
"learning_rate": 8.267950446654307e-05,
"loss": 0.8764205932617187,
"step": 27000
},
{
"epoch": 0.5119648071599227,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.45000000000000007,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.75,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.56,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.6799999999999999,
"eval_NanoBEIR_mean_cosine_map@100": 0.4516633759155257,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.5368730158730158,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.5092015395473641,
"eval_NanoBEIR_mean_cosine_precision@1": 0.45000000000000007,
"eval_NanoBEIR_mean_cosine_precision@10": 0.092,
"eval_NanoBEIR_mean_cosine_precision@3": 0.21666666666666667,
"eval_NanoBEIR_mean_cosine_precision@5": 0.166,
"eval_NanoBEIR_mean_cosine_recall@1": 0.31000000000000005,
"eval_NanoBEIR_mean_cosine_recall@10": 0.635,
"eval_NanoBEIR_mean_cosine_recall@3": 0.44,
"eval_NanoBEIR_mean_cosine_recall@5": 0.565,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.56,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.8,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.66,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.76,
"eval_NanoHotpotQA_cosine_map@100": 0.4514755786098336,
"eval_NanoHotpotQA_cosine_mrr@10": 0.6343888888888888,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.5172578575160077,
"eval_NanoHotpotQA_cosine_precision@1": 0.56,
"eval_NanoHotpotQA_cosine_precision@10": 0.11399999999999999,
"eval_NanoHotpotQA_cosine_precision@3": 0.28,
"eval_NanoHotpotQA_cosine_precision@5": 0.212,
"eval_NanoHotpotQA_cosine_recall@1": 0.28,
"eval_NanoHotpotQA_cosine_recall@10": 0.57,
"eval_NanoHotpotQA_cosine_recall@3": 0.42,
"eval_NanoHotpotQA_cosine_recall@5": 0.53,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.34,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.7,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.46,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.6,
"eval_NanoMSMARCO_cosine_map@100": 0.45185117322121776,
"eval_NanoMSMARCO_cosine_mrr@10": 0.43935714285714284,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.5011452215787204,
"eval_NanoMSMARCO_cosine_precision@1": 0.34,
"eval_NanoMSMARCO_cosine_precision@10": 0.07,
"eval_NanoMSMARCO_cosine_precision@3": 0.15333333333333332,
"eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.34,
"eval_NanoMSMARCO_cosine_recall@10": 0.7,
"eval_NanoMSMARCO_cosine_recall@3": 0.46,
"eval_NanoMSMARCO_cosine_recall@5": 0.6,
"eval_mse-dev_negative_mse": -87.8038558959961,
"eval_runtime": 12.2079,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5092015395473641,
"eval_steps_per_second": 0.0,
"step": 27000
},
{
"epoch": 0.5138609731123668,
"grad_norm": 0.8619687557220459,
"learning_rate": 8.257416146974549e-05,
"loss": 0.8859089660644531,
"step": 27100
},
{
"epoch": 0.5157571390648109,
"grad_norm": 0.8811931610107422,
"learning_rate": 8.246881847294792e-05,
"loss": 0.8839226531982421,
"step": 27200
},
{
"epoch": 0.5176533050172551,
"grad_norm": 0.8505755066871643,
"learning_rate": 8.236347547615035e-05,
"loss": 0.8794448852539063,
"step": 27300
},
{
"epoch": 0.5195494709696993,
"grad_norm": 0.8391817212104797,
"learning_rate": 8.225813247935278e-05,
"loss": 0.8790214538574219,
"step": 27400
},
{
"epoch": 0.5214456369221434,
"grad_norm": 0.7982373237609863,
"learning_rate": 8.21527894825552e-05,
"loss": 0.8788404083251953,
"step": 27500
},
{
"epoch": 0.5233418028745875,
"grad_norm": 0.87211674451828,
"learning_rate": 8.204744648575762e-05,
"loss": 0.8779651641845703,
"step": 27600
},
{
"epoch": 0.5252379688270318,
"grad_norm": 0.8461468815803528,
"learning_rate": 8.194210348896006e-05,
"loss": 0.8749393463134766,
"step": 27700
},
{
"epoch": 0.5271341347794759,
"grad_norm": 0.8423062562942505,
"learning_rate": 8.18367604921625e-05,
"loss": 0.8741777038574219,
"step": 27800
},
{
"epoch": 0.52903030073192,
"grad_norm": 0.8545904159545898,
"learning_rate": 8.173141749536492e-05,
"loss": 0.8700465393066407,
"step": 27900
},
{
"epoch": 0.5309264666843642,
"grad_norm": 0.8632199764251709,
"learning_rate": 8.162607449856734e-05,
"loss": 0.8691284942626953,
"step": 28000
},
{
"epoch": 0.5309264666843642,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.39,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.74,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.54,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.6599999999999999,
"eval_NanoBEIR_mean_cosine_map@100": 0.43119589947238984,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.4965515873015872,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.4856222050166246,
"eval_NanoBEIR_mean_cosine_precision@1": 0.39,
"eval_NanoBEIR_mean_cosine_precision@10": 0.09,
"eval_NanoBEIR_mean_cosine_precision@3": 0.21333333333333332,
"eval_NanoBEIR_mean_cosine_precision@5": 0.15800000000000003,
"eval_NanoBEIR_mean_cosine_recall@1": 0.275,
"eval_NanoBEIR_mean_cosine_recall@10": 0.6200000000000001,
"eval_NanoBEIR_mean_cosine_recall@3": 0.44,
"eval_NanoBEIR_mean_cosine_recall@5": 0.5449999999999999,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.46,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.8,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.6,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.72,
"eval_NanoHotpotQA_cosine_map@100": 0.4123353201122342,
"eval_NanoHotpotQA_cosine_mrr@10": 0.5583888888888888,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.4776237090129175,
"eval_NanoHotpotQA_cosine_precision@1": 0.46,
"eval_NanoHotpotQA_cosine_precision@10": 0.11199999999999999,
"eval_NanoHotpotQA_cosine_precision@3": 0.26666666666666666,
"eval_NanoHotpotQA_cosine_precision@5": 0.196,
"eval_NanoHotpotQA_cosine_recall@1": 0.23,
"eval_NanoHotpotQA_cosine_recall@10": 0.56,
"eval_NanoHotpotQA_cosine_recall@3": 0.4,
"eval_NanoHotpotQA_cosine_recall@5": 0.49,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.32,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.68,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.48,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.6,
"eval_NanoMSMARCO_cosine_map@100": 0.45005647883254546,
"eval_NanoMSMARCO_cosine_mrr@10": 0.43471428571428566,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.49362070102033173,
"eval_NanoMSMARCO_cosine_precision@1": 0.32,
"eval_NanoMSMARCO_cosine_precision@10": 0.068,
"eval_NanoMSMARCO_cosine_precision@3": 0.15999999999999998,
"eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.32,
"eval_NanoMSMARCO_cosine_recall@10": 0.68,
"eval_NanoMSMARCO_cosine_recall@3": 0.48,
"eval_NanoMSMARCO_cosine_recall@5": 0.6,
"eval_mse-dev_negative_mse": -86.44185638427734,
"eval_runtime": 14.6199,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.4856222050166246,
"eval_steps_per_second": 0.0,
"step": 28000
},
{
"epoch": 0.5328226326368084,
"grad_norm": 0.9085125923156738,
"learning_rate": 8.152073150176977e-05,
"loss": 0.874675521850586,
"step": 28100
},
{
"epoch": 0.5347187985892525,
"grad_norm": 0.8658029437065125,
"learning_rate": 8.141538850497219e-05,
"loss": 0.8643728637695313,
"step": 28200
},
{
"epoch": 0.5366149645416967,
"grad_norm": 0.9218304753303528,
"learning_rate": 8.131004550817463e-05,
"loss": 0.8673239898681641,
"step": 28300
},
{
"epoch": 0.5385111304941409,
"grad_norm": 0.8571885228157043,
"learning_rate": 8.120470251137705e-05,
"loss": 0.86698486328125,
"step": 28400
},
{
"epoch": 0.540407296446585,
"grad_norm": 0.8248752355575562,
"learning_rate": 8.109935951457947e-05,
"loss": 0.863829116821289,
"step": 28500
},
{
"epoch": 0.5423034623990292,
"grad_norm": 0.9771467447280884,
"learning_rate": 8.09940165177819e-05,
"loss": 0.8649395751953125,
"step": 28600
},
{
"epoch": 0.5441996283514733,
"grad_norm": 0.8203988075256348,
"learning_rate": 8.088867352098432e-05,
"loss": 0.8629121398925781,
"step": 28700
},
{
"epoch": 0.5460957943039175,
"grad_norm": 0.7756925225257874,
"learning_rate": 8.078333052418676e-05,
"loss": 0.8629222106933594,
"step": 28800
},
{
"epoch": 0.5479919602563617,
"grad_norm": 0.8539568781852722,
"learning_rate": 8.067798752738918e-05,
"loss": 0.8591197204589843,
"step": 28900
},
{
"epoch": 0.5498881262088058,
"grad_norm": 0.8543459177017212,
"learning_rate": 8.057264453059162e-05,
"loss": 0.856646499633789,
"step": 29000
},
{
"epoch": 0.5498881262088058,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.37,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.73,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.5800000000000001,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.6699999999999999,
"eval_NanoBEIR_mean_cosine_map@100": 0.42708582266190265,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.49876190476190474,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.48552032214505464,
"eval_NanoBEIR_mean_cosine_precision@1": 0.37,
"eval_NanoBEIR_mean_cosine_precision@10": 0.091,
"eval_NanoBEIR_mean_cosine_precision@3": 0.2233333333333333,
"eval_NanoBEIR_mean_cosine_precision@5": 0.162,
"eval_NanoBEIR_mean_cosine_recall@1": 0.26,
"eval_NanoBEIR_mean_cosine_recall@10": 0.62,
"eval_NanoBEIR_mean_cosine_recall@3": 0.45499999999999996,
"eval_NanoBEIR_mean_cosine_recall@5": 0.5549999999999999,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.44,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.8,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.68,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.74,
"eval_NanoHotpotQA_cosine_map@100": 0.41616973619547015,
"eval_NanoHotpotQA_cosine_mrr@10": 0.5761666666666667,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.49181648887243534,
"eval_NanoHotpotQA_cosine_precision@1": 0.44,
"eval_NanoHotpotQA_cosine_precision@10": 0.11599999999999998,
"eval_NanoHotpotQA_cosine_precision@3": 0.2866666666666666,
"eval_NanoHotpotQA_cosine_precision@5": 0.204,
"eval_NanoHotpotQA_cosine_recall@1": 0.22,
"eval_NanoHotpotQA_cosine_recall@10": 0.58,
"eval_NanoHotpotQA_cosine_recall@3": 0.43,
"eval_NanoHotpotQA_cosine_recall@5": 0.51,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.3,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.66,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.48,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.6,
"eval_NanoMSMARCO_cosine_map@100": 0.43800190912833514,
"eval_NanoMSMARCO_cosine_mrr@10": 0.42135714285714276,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.47922415541767394,
"eval_NanoMSMARCO_cosine_precision@1": 0.3,
"eval_NanoMSMARCO_cosine_precision@10": 0.066,
"eval_NanoMSMARCO_cosine_precision@3": 0.15999999999999998,
"eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.3,
"eval_NanoMSMARCO_cosine_recall@10": 0.66,
"eval_NanoMSMARCO_cosine_recall@3": 0.48,
"eval_NanoMSMARCO_cosine_recall@5": 0.6,
"eval_mse-dev_negative_mse": -85.04077911376953,
"eval_runtime": 12.4825,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.48552032214505464,
"eval_steps_per_second": 0.0,
"step": 29000
},
{
"epoch": 0.5517842921612499,
"grad_norm": 0.8201111555099487,
"learning_rate": 8.046730153379404e-05,
"loss": 0.8587515258789062,
"step": 29100
},
{
"epoch": 0.5536804581136942,
"grad_norm": 0.8306780457496643,
"learning_rate": 8.036195853699647e-05,
"loss": 0.8544799041748047,
"step": 29200
},
{
"epoch": 0.5555766240661383,
"grad_norm": 0.8447550535202026,
"learning_rate": 8.025661554019889e-05,
"loss": 0.8534080505371093,
"step": 29300
},
{
"epoch": 0.5574727900185824,
"grad_norm": 0.8507358431816101,
"learning_rate": 8.015127254340133e-05,
"loss": 0.8543455505371094,
"step": 29400
},
{
"epoch": 0.5593689559710265,
"grad_norm": 0.8200713396072388,
"learning_rate": 8.004592954660375e-05,
"loss": 0.8533712768554688,
"step": 29500
},
{
"epoch": 0.5612651219234708,
"grad_norm": 0.8041396141052246,
"learning_rate": 7.994058654980617e-05,
"loss": 0.8519126129150391,
"step": 29600
},
{
"epoch": 0.5631612878759149,
"grad_norm": 0.8296621441841125,
"learning_rate": 7.98352435530086e-05,
"loss": 0.8486277008056641,
"step": 29700
},
{
"epoch": 0.565057453828359,
"grad_norm": 0.8634279370307922,
"learning_rate": 7.972990055621102e-05,
"loss": 0.8529573822021485,
"step": 29800
},
{
"epoch": 0.5669536197808032,
"grad_norm": 0.9058282375335693,
"learning_rate": 7.962455755941344e-05,
"loss": 0.8476997375488281,
"step": 29900
},
{
"epoch": 0.5688497857332474,
"grad_norm": 0.8404967784881592,
"learning_rate": 7.951921456261588e-05,
"loss": 0.8465479278564453,
"step": 30000
},
{
"epoch": 0.5688497857332474,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.4,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.75,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.61,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.6699999999999999,
"eval_NanoBEIR_mean_cosine_map@100": 0.44285306339192243,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.5205357142857143,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.5041657136741884,
"eval_NanoBEIR_mean_cosine_precision@1": 0.4,
"eval_NanoBEIR_mean_cosine_precision@10": 0.093,
"eval_NanoBEIR_mean_cosine_precision@3": 0.2366666666666667,
"eval_NanoBEIR_mean_cosine_precision@5": 0.164,
"eval_NanoBEIR_mean_cosine_recall@1": 0.275,
"eval_NanoBEIR_mean_cosine_recall@10": 0.645,
"eval_NanoBEIR_mean_cosine_recall@3": 0.48,
"eval_NanoBEIR_mean_cosine_recall@5": 0.5549999999999999,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.5,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.78,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.72,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.76,
"eval_NanoHotpotQA_cosine_map@100": 0.4425880574674547,
"eval_NanoHotpotQA_cosine_mrr@10": 0.6113333333333334,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.5096854578340355,
"eval_NanoHotpotQA_cosine_precision@1": 0.5,
"eval_NanoHotpotQA_cosine_precision@10": 0.11399999999999999,
"eval_NanoHotpotQA_cosine_precision@3": 0.3066666666666667,
"eval_NanoHotpotQA_cosine_precision@5": 0.212,
"eval_NanoHotpotQA_cosine_recall@1": 0.25,
"eval_NanoHotpotQA_cosine_recall@10": 0.57,
"eval_NanoHotpotQA_cosine_recall@3": 0.46,
"eval_NanoHotpotQA_cosine_recall@5": 0.53,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.3,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.72,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.5,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.58,
"eval_NanoMSMARCO_cosine_map@100": 0.44311806931639014,
"eval_NanoMSMARCO_cosine_mrr@10": 0.4297380952380952,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.49864596951434115,
"eval_NanoMSMARCO_cosine_precision@1": 0.3,
"eval_NanoMSMARCO_cosine_precision@10": 0.07200000000000001,
"eval_NanoMSMARCO_cosine_precision@3": 0.16666666666666669,
"eval_NanoMSMARCO_cosine_precision@5": 0.11600000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.3,
"eval_NanoMSMARCO_cosine_recall@10": 0.72,
"eval_NanoMSMARCO_cosine_recall@3": 0.5,
"eval_NanoMSMARCO_cosine_recall@5": 0.58,
"eval_mse-dev_negative_mse": -83.94352722167969,
"eval_runtime": 11.241,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5041657136741884,
"eval_steps_per_second": 0.0,
"step": 30000
},
{
"epoch": 0.5707459516856915,
"grad_norm": 0.8594946265220642,
"learning_rate": 7.94138715658183e-05,
"loss": 0.8424729156494141,
"step": 30100
},
{
"epoch": 0.5726421176381357,
"grad_norm": 0.8535016775131226,
"learning_rate": 7.930852856902074e-05,
"loss": 0.8437194061279297,
"step": 30200
},
{
"epoch": 0.5745382835905799,
"grad_norm": 0.8929939866065979,
"learning_rate": 7.920318557222316e-05,
"loss": 0.8429566192626953,
"step": 30300
},
{
"epoch": 0.576434449543024,
"grad_norm": 0.7629504203796387,
"learning_rate": 7.909784257542559e-05,
"loss": 0.8431417846679687,
"step": 30400
},
{
"epoch": 0.5783306154954682,
"grad_norm": 0.8285149335861206,
"learning_rate": 7.899355300859598e-05,
"loss": 0.8423690032958985,
"step": 30500
},
{
"epoch": 0.5802267814479123,
"grad_norm": 0.866598904132843,
"learning_rate": 7.888821001179842e-05,
"loss": 0.8403389739990235,
"step": 30600
},
{
"epoch": 0.5821229474003565,
"grad_norm": 0.8084122538566589,
"learning_rate": 7.878286701500086e-05,
"loss": 0.8347031402587891,
"step": 30700
},
{
"epoch": 0.5840191133528007,
"grad_norm": 0.8977468013763428,
"learning_rate": 7.867752401820328e-05,
"loss": 0.8343724822998047,
"step": 30800
},
{
"epoch": 0.5859152793052448,
"grad_norm": 0.8902882933616638,
"learning_rate": 7.85721810214057e-05,
"loss": 0.8348311614990235,
"step": 30900
},
{
"epoch": 0.5878114452576889,
"grad_norm": 0.9056336283683777,
"learning_rate": 7.846683802460813e-05,
"loss": 0.8350757598876953,
"step": 31000
},
{
"epoch": 0.5878114452576889,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.39,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.78,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.5900000000000001,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.6799999999999999,
"eval_NanoBEIR_mean_cosine_map@100": 0.43741232846707523,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.5121309523809523,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.5043257907139727,
"eval_NanoBEIR_mean_cosine_precision@1": 0.39,
"eval_NanoBEIR_mean_cosine_precision@10": 0.096,
"eval_NanoBEIR_mean_cosine_precision@3": 0.22666666666666668,
"eval_NanoBEIR_mean_cosine_precision@5": 0.16399999999999998,
"eval_NanoBEIR_mean_cosine_recall@1": 0.27,
"eval_NanoBEIR_mean_cosine_recall@10": 0.6599999999999999,
"eval_NanoBEIR_mean_cosine_recall@3": 0.46499999999999997,
"eval_NanoBEIR_mean_cosine_recall@5": 0.565,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.48,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.84,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.68,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.74,
"eval_NanoHotpotQA_cosine_map@100": 0.4314881589932104,
"eval_NanoHotpotQA_cosine_mrr@10": 0.5937380952380952,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.508792106805762,
"eval_NanoHotpotQA_cosine_precision@1": 0.48,
"eval_NanoHotpotQA_cosine_precision@10": 0.11999999999999998,
"eval_NanoHotpotQA_cosine_precision@3": 0.2866666666666667,
"eval_NanoHotpotQA_cosine_precision@5": 0.204,
"eval_NanoHotpotQA_cosine_recall@1": 0.24,
"eval_NanoHotpotQA_cosine_recall@10": 0.6,
"eval_NanoHotpotQA_cosine_recall@3": 0.43,
"eval_NanoHotpotQA_cosine_recall@5": 0.51,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.3,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.72,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.5,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.62,
"eval_NanoMSMARCO_cosine_map@100": 0.4433364979409401,
"eval_NanoMSMARCO_cosine_mrr@10": 0.43052380952380953,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.4998594746221832,
"eval_NanoMSMARCO_cosine_precision@1": 0.3,
"eval_NanoMSMARCO_cosine_precision@10": 0.07200000000000001,
"eval_NanoMSMARCO_cosine_precision@3": 0.16666666666666669,
"eval_NanoMSMARCO_cosine_precision@5": 0.124,
"eval_NanoMSMARCO_cosine_recall@1": 0.3,
"eval_NanoMSMARCO_cosine_recall@10": 0.72,
"eval_NanoMSMARCO_cosine_recall@3": 0.5,
"eval_NanoMSMARCO_cosine_recall@5": 0.62,
"eval_mse-dev_negative_mse": -82.8113021850586,
"eval_runtime": 11.2139,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5043257907139727,
"eval_steps_per_second": 0.0,
"step": 31000
},
{
"epoch": 0.5897076112101332,
"grad_norm": 0.841434895992279,
"learning_rate": 7.836149502781055e-05,
"loss": 0.8361685943603515,
"step": 31100
},
{
"epoch": 0.5916037771625773,
"grad_norm": 0.8636693358421326,
"learning_rate": 7.825615203101299e-05,
"loss": 0.8306892395019532,
"step": 31200
},
{
"epoch": 0.5934999431150214,
"grad_norm": 0.9691203236579895,
"learning_rate": 7.815080903421541e-05,
"loss": 0.8314771270751953,
"step": 31300
},
{
"epoch": 0.5953961090674655,
"grad_norm": 0.862746000289917,
"learning_rate": 7.804546603741783e-05,
"loss": 0.8310930633544922,
"step": 31400
},
{
"epoch": 0.5972922750199098,
"grad_norm": 0.9316207766532898,
"learning_rate": 7.794012304062026e-05,
"loss": 0.8304837036132813,
"step": 31500
},
{
"epoch": 0.5991884409723539,
"grad_norm": 0.8787679672241211,
"learning_rate": 7.783478004382268e-05,
"loss": 0.8304119873046875,
"step": 31600
},
{
"epoch": 0.601084606924798,
"grad_norm": 0.8498113751411438,
"learning_rate": 7.772943704702512e-05,
"loss": 0.8277024841308593,
"step": 31700
},
{
"epoch": 0.6029807728772422,
"grad_norm": 0.7722318768501282,
"learning_rate": 7.762409405022754e-05,
"loss": 0.8249209594726562,
"step": 31800
},
{
"epoch": 0.6048769388296864,
"grad_norm": 0.8988415598869324,
"learning_rate": 7.751875105342998e-05,
"loss": 0.8261857604980469,
"step": 31900
},
{
"epoch": 0.6067731047821305,
"grad_norm": 0.8066183924674988,
"learning_rate": 7.74134080566324e-05,
"loss": 0.8235664367675781,
"step": 32000
},
{
"epoch": 0.6067731047821305,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.42000000000000004,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.74,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.61,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.65,
"eval_NanoBEIR_mean_cosine_map@100": 0.45000936240708805,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.5258571428571428,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.5033699936729163,
"eval_NanoBEIR_mean_cosine_precision@1": 0.42000000000000004,
"eval_NanoBEIR_mean_cosine_precision@10": 0.09199999999999998,
"eval_NanoBEIR_mean_cosine_precision@3": 0.2433333333333333,
"eval_NanoBEIR_mean_cosine_precision@5": 0.158,
"eval_NanoBEIR_mean_cosine_recall@1": 0.28500000000000003,
"eval_NanoBEIR_mean_cosine_recall@10": 0.625,
"eval_NanoBEIR_mean_cosine_recall@3": 0.495,
"eval_NanoBEIR_mean_cosine_recall@5": 0.535,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.54,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.82,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.7,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.74,
"eval_NanoHotpotQA_cosine_map@100": 0.45760396761575023,
"eval_NanoHotpotQA_cosine_mrr@10": 0.6278571428571429,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.525620245048735,
"eval_NanoHotpotQA_cosine_precision@1": 0.54,
"eval_NanoHotpotQA_cosine_precision@10": 0.11799999999999997,
"eval_NanoHotpotQA_cosine_precision@3": 0.3133333333333333,
"eval_NanoHotpotQA_cosine_precision@5": 0.204,
"eval_NanoHotpotQA_cosine_recall@1": 0.27,
"eval_NanoHotpotQA_cosine_recall@10": 0.59,
"eval_NanoHotpotQA_cosine_recall@3": 0.47,
"eval_NanoHotpotQA_cosine_recall@5": 0.51,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.3,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.66,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.52,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.56,
"eval_NanoMSMARCO_cosine_map@100": 0.4424147571984259,
"eval_NanoMSMARCO_cosine_mrr@10": 0.42385714285714277,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.48111974229709764,
"eval_NanoMSMARCO_cosine_precision@1": 0.3,
"eval_NanoMSMARCO_cosine_precision@10": 0.066,
"eval_NanoMSMARCO_cosine_precision@3": 0.1733333333333333,
"eval_NanoMSMARCO_cosine_precision@5": 0.11200000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.3,
"eval_NanoMSMARCO_cosine_recall@10": 0.66,
"eval_NanoMSMARCO_cosine_recall@3": 0.52,
"eval_NanoMSMARCO_cosine_recall@5": 0.56,
"eval_mse-dev_negative_mse": -81.73892211914062,
"eval_runtime": 10.8876,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5033699936729163,
"eval_steps_per_second": 0.0,
"step": 32000
},
{
"epoch": 0.6086692707345747,
"grad_norm": 0.8193183541297913,
"learning_rate": 7.730806505983482e-05,
"loss": 0.82093994140625,
"step": 32100
},
{
"epoch": 0.6105654366870189,
"grad_norm": 0.8819192051887512,
"learning_rate": 7.720272206303726e-05,
"loss": 0.8226362609863281,
"step": 32200
},
{
"epoch": 0.612461602639463,
"grad_norm": 0.8473449349403381,
"learning_rate": 7.709737906623968e-05,
"loss": 0.8206555938720703,
"step": 32300
},
{
"epoch": 0.6143577685919072,
"grad_norm": 0.8858373761177063,
"learning_rate": 7.699203606944211e-05,
"loss": 0.8223712921142579,
"step": 32400
},
{
"epoch": 0.6162539345443513,
"grad_norm": 0.8924335837364197,
"learning_rate": 7.688669307264453e-05,
"loss": 0.8162551879882812,
"step": 32500
},
{
"epoch": 0.6181501004967955,
"grad_norm": 0.9139745235443115,
"learning_rate": 7.678135007584696e-05,
"loss": 0.818095932006836,
"step": 32600
},
{
"epoch": 0.6200462664492397,
"grad_norm": 0.8812312483787537,
"learning_rate": 7.667600707904938e-05,
"loss": 0.8147300720214844,
"step": 32700
},
{
"epoch": 0.6219424324016838,
"grad_norm": 0.8906788229942322,
"learning_rate": 7.657066408225182e-05,
"loss": 0.8169952392578125,
"step": 32800
},
{
"epoch": 0.6238385983541279,
"grad_norm": 0.8133891820907593,
"learning_rate": 7.646532108545424e-05,
"loss": 0.815572509765625,
"step": 32900
},
{
"epoch": 0.6257347643065722,
"grad_norm": 0.8570773601531982,
"learning_rate": 7.635997808865668e-05,
"loss": 0.814079818725586,
"step": 33000
},
{
"epoch": 0.6257347643065722,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.4,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.78,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.5800000000000001,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.62,
"eval_NanoBEIR_mean_cosine_map@100": 0.4399737313034061,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.5117857142857143,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.5063679664931301,
"eval_NanoBEIR_mean_cosine_precision@1": 0.4,
"eval_NanoBEIR_mean_cosine_precision@10": 0.097,
"eval_NanoBEIR_mean_cosine_precision@3": 0.22666666666666668,
"eval_NanoBEIR_mean_cosine_precision@5": 0.15200000000000002,
"eval_NanoBEIR_mean_cosine_recall@1": 0.28,
"eval_NanoBEIR_mean_cosine_recall@10": 0.665,
"eval_NanoBEIR_mean_cosine_recall@3": 0.46499999999999997,
"eval_NanoBEIR_mean_cosine_recall@5": 0.515,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.48,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.84,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.66,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.7,
"eval_NanoHotpotQA_cosine_map@100": 0.4291126300296302,
"eval_NanoHotpotQA_cosine_mrr@10": 0.5858809523809524,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.5085208412692355,
"eval_NanoHotpotQA_cosine_precision@1": 0.48,
"eval_NanoHotpotQA_cosine_precision@10": 0.122,
"eval_NanoHotpotQA_cosine_precision@3": 0.2866666666666667,
"eval_NanoHotpotQA_cosine_precision@5": 0.196,
"eval_NanoHotpotQA_cosine_recall@1": 0.24,
"eval_NanoHotpotQA_cosine_recall@10": 0.61,
"eval_NanoHotpotQA_cosine_recall@3": 0.43,
"eval_NanoHotpotQA_cosine_recall@5": 0.49,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.32,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.72,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.5,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.54,
"eval_NanoMSMARCO_cosine_map@100": 0.450834832577182,
"eval_NanoMSMARCO_cosine_mrr@10": 0.43769047619047613,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.5042150917170247,
"eval_NanoMSMARCO_cosine_precision@1": 0.32,
"eval_NanoMSMARCO_cosine_precision@10": 0.07200000000000001,
"eval_NanoMSMARCO_cosine_precision@3": 0.16666666666666669,
"eval_NanoMSMARCO_cosine_precision@5": 0.10800000000000003,
"eval_NanoMSMARCO_cosine_recall@1": 0.32,
"eval_NanoMSMARCO_cosine_recall@10": 0.72,
"eval_NanoMSMARCO_cosine_recall@3": 0.5,
"eval_NanoMSMARCO_cosine_recall@5": 0.54,
"eval_mse-dev_negative_mse": -80.49793243408203,
"eval_runtime": 11.8238,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5063679664931301,
"eval_steps_per_second": 0.0,
"step": 33000
},
{
"epoch": 0.6276309302590163,
"grad_norm": 0.7868529558181763,
"learning_rate": 7.625568852182707e-05,
"loss": 0.8087701416015625,
"step": 33100
},
{
"epoch": 0.6295270962114604,
"grad_norm": 0.9016054272651672,
"learning_rate": 7.61503455250295e-05,
"loss": 0.8097662353515624,
"step": 33200
},
{
"epoch": 0.6314232621639045,
"grad_norm": 0.9913731217384338,
"learning_rate": 7.604500252823192e-05,
"loss": 0.813260498046875,
"step": 33300
},
{
"epoch": 0.6333194281163488,
"grad_norm": 0.8851051330566406,
"learning_rate": 7.593965953143435e-05,
"loss": 0.8086640167236329,
"step": 33400
},
{
"epoch": 0.6352155940687929,
"grad_norm": 0.8317673206329346,
"learning_rate": 7.583431653463678e-05,
"loss": 0.8086080169677734,
"step": 33500
},
{
"epoch": 0.637111760021237,
"grad_norm": 0.7769960165023804,
"learning_rate": 7.572897353783922e-05,
"loss": 0.8093731689453125,
"step": 33600
},
{
"epoch": 0.6390079259736812,
"grad_norm": 0.8762325644493103,
"learning_rate": 7.562363054104164e-05,
"loss": 0.805412826538086,
"step": 33700
},
{
"epoch": 0.6409040919261254,
"grad_norm": 0.8687974810600281,
"learning_rate": 7.551828754424406e-05,
"loss": 0.8043125915527344,
"step": 33800
},
{
"epoch": 0.6428002578785695,
"grad_norm": 0.868188202381134,
"learning_rate": 7.541294454744649e-05,
"loss": 0.8034954833984375,
"step": 33900
},
{
"epoch": 0.6446964238310137,
"grad_norm": 0.8662635087966919,
"learning_rate": 7.530760155064892e-05,
"loss": 0.799036865234375,
"step": 34000
},
{
"epoch": 0.6446964238310137,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.4,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.77,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.61,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.71,
"eval_NanoBEIR_mean_cosine_map@100": 0.44345995358170154,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.5206190476190475,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.5078251449398562,
"eval_NanoBEIR_mean_cosine_precision@1": 0.4,
"eval_NanoBEIR_mean_cosine_precision@10": 0.096,
"eval_NanoBEIR_mean_cosine_precision@3": 0.23333333333333334,
"eval_NanoBEIR_mean_cosine_precision@5": 0.174,
"eval_NanoBEIR_mean_cosine_recall@1": 0.28,
"eval_NanoBEIR_mean_cosine_recall@10": 0.655,
"eval_NanoBEIR_mean_cosine_recall@3": 0.475,
"eval_NanoBEIR_mean_cosine_recall@5": 0.585,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.48,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.84,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.72,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.82,
"eval_NanoHotpotQA_cosine_map@100": 0.4349260827283061,
"eval_NanoHotpotQA_cosine_mrr@10": 0.6048571428571428,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.5166147735280449,
"eval_NanoHotpotQA_cosine_precision@1": 0.48,
"eval_NanoHotpotQA_cosine_precision@10": 0.122,
"eval_NanoHotpotQA_cosine_precision@3": 0.3,
"eval_NanoHotpotQA_cosine_precision@5": 0.22799999999999998,
"eval_NanoHotpotQA_cosine_recall@1": 0.24,
"eval_NanoHotpotQA_cosine_recall@10": 0.61,
"eval_NanoHotpotQA_cosine_recall@3": 0.45,
"eval_NanoHotpotQA_cosine_recall@5": 0.57,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.32,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.7,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.5,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.6,
"eval_NanoMSMARCO_cosine_map@100": 0.45199382443509706,
"eval_NanoMSMARCO_cosine_mrr@10": 0.43638095238095226,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.4990355163516675,
"eval_NanoMSMARCO_cosine_precision@1": 0.32,
"eval_NanoMSMARCO_cosine_precision@10": 0.07,
"eval_NanoMSMARCO_cosine_precision@3": 0.16666666666666669,
"eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.32,
"eval_NanoMSMARCO_cosine_recall@10": 0.7,
"eval_NanoMSMARCO_cosine_recall@3": 0.5,
"eval_NanoMSMARCO_cosine_recall@5": 0.6,
"eval_mse-dev_negative_mse": -79.57255554199219,
"eval_runtime": 11.3457,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5078251449398562,
"eval_steps_per_second": 0.0,
"step": 34000
},
{
"epoch": 0.6465925897834578,
"grad_norm": 0.8791268467903137,
"learning_rate": 7.520225855385135e-05,
"loss": 0.8034612274169922,
"step": 34100
},
{
"epoch": 0.648488755735902,
"grad_norm": 0.9503916501998901,
"learning_rate": 7.509691555705377e-05,
"loss": 0.7990459442138672,
"step": 34200
},
{
"epoch": 0.6503849216883462,
"grad_norm": 0.8711104393005371,
"learning_rate": 7.499157256025619e-05,
"loss": 0.7996244812011719,
"step": 34300
},
{
"epoch": 0.6522810876407903,
"grad_norm": 0.8348352313041687,
"learning_rate": 7.488622956345862e-05,
"loss": 0.8004853820800781,
"step": 34400
},
{
"epoch": 0.6541772535932345,
"grad_norm": 0.8777920007705688,
"learning_rate": 7.478088656666105e-05,
"loss": 0.8000244140625,
"step": 34500
},
{
"epoch": 0.6560734195456787,
"grad_norm": 0.847030758857727,
"learning_rate": 7.467554356986348e-05,
"loss": 0.7975210571289062,
"step": 34600
},
{
"epoch": 0.6579695854981228,
"grad_norm": 0.8619401454925537,
"learning_rate": 7.457020057306591e-05,
"loss": 0.7959075927734375,
"step": 34700
},
{
"epoch": 0.6598657514505669,
"grad_norm": 0.8588744401931763,
"learning_rate": 7.446485757626834e-05,
"loss": 0.7920943450927734,
"step": 34800
},
{
"epoch": 0.6617619174030112,
"grad_norm": 0.7903246879577637,
"learning_rate": 7.435951457947076e-05,
"loss": 0.7915798187255859,
"step": 34900
},
{
"epoch": 0.6636580833554553,
"grad_norm": 0.9617411494255066,
"learning_rate": 7.425417158267318e-05,
"loss": 0.7933383178710938,
"step": 35000
},
{
"epoch": 0.6636580833554553,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.41000000000000003,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.75,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.62,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.6799999999999999,
"eval_NanoBEIR_mean_cosine_map@100": 0.45535390379706975,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.526079365079365,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.5121596378017648,
"eval_NanoBEIR_mean_cosine_precision@1": 0.41000000000000003,
"eval_NanoBEIR_mean_cosine_precision@10": 0.094,
"eval_NanoBEIR_mean_cosine_precision@3": 0.2433333333333333,
"eval_NanoBEIR_mean_cosine_precision@5": 0.16199999999999998,
"eval_NanoBEIR_mean_cosine_recall@1": 0.29000000000000004,
"eval_NanoBEIR_mean_cosine_recall@10": 0.645,
"eval_NanoBEIR_mean_cosine_recall@3": 0.495,
"eval_NanoBEIR_mean_cosine_recall@5": 0.55,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.48,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.8,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.72,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.78,
"eval_NanoHotpotQA_cosine_map@100": 0.44473741551922635,
"eval_NanoHotpotQA_cosine_mrr@10": 0.6006666666666667,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.5139083384213542,
"eval_NanoHotpotQA_cosine_precision@1": 0.48,
"eval_NanoHotpotQA_cosine_precision@10": 0.118,
"eval_NanoHotpotQA_cosine_precision@3": 0.3133333333333333,
"eval_NanoHotpotQA_cosine_precision@5": 0.20799999999999996,
"eval_NanoHotpotQA_cosine_recall@1": 0.24,
"eval_NanoHotpotQA_cosine_recall@10": 0.59,
"eval_NanoHotpotQA_cosine_recall@3": 0.47,
"eval_NanoHotpotQA_cosine_recall@5": 0.52,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.34,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.7,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.52,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.58,
"eval_NanoMSMARCO_cosine_map@100": 0.4659703920749132,
"eval_NanoMSMARCO_cosine_mrr@10": 0.4514920634920634,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.5104109371821753,
"eval_NanoMSMARCO_cosine_precision@1": 0.34,
"eval_NanoMSMARCO_cosine_precision@10": 0.07,
"eval_NanoMSMARCO_cosine_precision@3": 0.1733333333333333,
"eval_NanoMSMARCO_cosine_precision@5": 0.11600000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.34,
"eval_NanoMSMARCO_cosine_recall@10": 0.7,
"eval_NanoMSMARCO_cosine_recall@3": 0.52,
"eval_NanoMSMARCO_cosine_recall@5": 0.58,
"eval_mse-dev_negative_mse": -78.7884292602539,
"eval_runtime": 10.7989,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5121596378017648,
"eval_steps_per_second": 0.0,
"step": 35000
},
{
"epoch": 0.6655542493078994,
"grad_norm": 0.8038257956504822,
"learning_rate": 7.414882858587562e-05,
"loss": 0.7908313751220704,
"step": 35100
},
{
"epoch": 0.6674504152603435,
"grad_norm": 0.8573588132858276,
"learning_rate": 7.404453901904602e-05,
"loss": 0.7913258361816407,
"step": 35200
},
{
"epoch": 0.6693465812127878,
"grad_norm": 0.829589307308197,
"learning_rate": 7.393919602224845e-05,
"loss": 0.7921287536621093,
"step": 35300
},
{
"epoch": 0.6712427471652319,
"grad_norm": 0.8911552429199219,
"learning_rate": 7.383385302545088e-05,
"loss": 0.7928565979003906,
"step": 35400
},
{
"epoch": 0.673138913117676,
"grad_norm": 0.9379572868347168,
"learning_rate": 7.37285100286533e-05,
"loss": 0.7914694213867187,
"step": 35500
},
{
"epoch": 0.6750350790701202,
"grad_norm": 0.9253071546554565,
"learning_rate": 7.362316703185572e-05,
"loss": 0.7871210479736328,
"step": 35600
},
{
"epoch": 0.6769312450225644,
"grad_norm": 0.9133068323135376,
"learning_rate": 7.351782403505816e-05,
"loss": 0.7835692596435547,
"step": 35700
},
{
"epoch": 0.6788274109750085,
"grad_norm": 0.8401673436164856,
"learning_rate": 7.341248103826058e-05,
"loss": 0.7804772186279297,
"step": 35800
},
{
"epoch": 0.6807235769274527,
"grad_norm": 0.8454675674438477,
"learning_rate": 7.330713804146301e-05,
"loss": 0.7870156860351563,
"step": 35900
},
{
"epoch": 0.6826197428798968,
"grad_norm": 0.83338463306427,
"learning_rate": 7.320179504466543e-05,
"loss": 0.7796939849853516,
"step": 36000
},
{
"epoch": 0.6826197428798968,
"eval_NanoBEIR_mean_cosine_accuracy@1": 0.42000000000000004,
"eval_NanoBEIR_mean_cosine_accuracy@10": 0.8,
"eval_NanoBEIR_mean_cosine_accuracy@3": 0.64,
"eval_NanoBEIR_mean_cosine_accuracy@5": 0.69,
"eval_NanoBEIR_mean_cosine_map@100": 0.46799146568426697,
"eval_NanoBEIR_mean_cosine_mrr@10": 0.5508928571428571,
"eval_NanoBEIR_mean_cosine_ndcg@10": 0.5353904032358002,
"eval_NanoBEIR_mean_cosine_precision@1": 0.42000000000000004,
"eval_NanoBEIR_mean_cosine_precision@10": 0.099,
"eval_NanoBEIR_mean_cosine_precision@3": 0.2533333333333333,
"eval_NanoBEIR_mean_cosine_precision@5": 0.17,
"eval_NanoBEIR_mean_cosine_recall@1": 0.29000000000000004,
"eval_NanoBEIR_mean_cosine_recall@10": 0.685,
"eval_NanoBEIR_mean_cosine_recall@3": 0.51,
"eval_NanoBEIR_mean_cosine_recall@5": 0.575,
"eval_NanoHotpotQA_cosine_accuracy@1": 0.52,
"eval_NanoHotpotQA_cosine_accuracy@10": 0.84,
"eval_NanoHotpotQA_cosine_accuracy@3": 0.76,
"eval_NanoHotpotQA_cosine_accuracy@5": 0.78,
"eval_NanoHotpotQA_cosine_map@100": 0.47358422601023775,
"eval_NanoHotpotQA_cosine_mrr@10": 0.6494444444444444,
"eval_NanoHotpotQA_cosine_ndcg@10": 0.5456863439791646,
"eval_NanoHotpotQA_cosine_precision@1": 0.52,
"eval_NanoHotpotQA_cosine_precision@10": 0.122,
"eval_NanoHotpotQA_cosine_precision@3": 0.33333333333333326,
"eval_NanoHotpotQA_cosine_precision@5": 0.22,
"eval_NanoHotpotQA_cosine_recall@1": 0.26,
"eval_NanoHotpotQA_cosine_recall@10": 0.61,
"eval_NanoHotpotQA_cosine_recall@3": 0.5,
"eval_NanoHotpotQA_cosine_recall@5": 0.55,
"eval_NanoMSMARCO_cosine_accuracy@1": 0.32,
"eval_NanoMSMARCO_cosine_accuracy@10": 0.76,
"eval_NanoMSMARCO_cosine_accuracy@3": 0.52,
"eval_NanoMSMARCO_cosine_accuracy@5": 0.6,
"eval_NanoMSMARCO_cosine_map@100": 0.4623987053582962,
"eval_NanoMSMARCO_cosine_mrr@10": 0.4523412698412697,
"eval_NanoMSMARCO_cosine_ndcg@10": 0.5250944624924359,
"eval_NanoMSMARCO_cosine_precision@1": 0.32,
"eval_NanoMSMARCO_cosine_precision@10": 0.07600000000000001,
"eval_NanoMSMARCO_cosine_precision@3": 0.1733333333333333,
"eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002,
"eval_NanoMSMARCO_cosine_recall@1": 0.32,
"eval_NanoMSMARCO_cosine_recall@10": 0.76,
"eval_NanoMSMARCO_cosine_recall@3": 0.52,
"eval_NanoMSMARCO_cosine_recall@5": 0.6,
"eval_mse-dev_negative_mse": -77.74003601074219,
"eval_runtime": 11.1488,
"eval_samples_per_second": 0.0,
"eval_sequential_score": 0.5353904032358002,
"eval_steps_per_second": 0.0,
"step": 36000
}
],
"logging_steps": 100,
"max_steps": 105476,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}