| { | |
| "best_global_step": 36000, | |
| "best_metric": 0.5250944624924359, | |
| "best_model_checkpoint": "ModernBERT-small-distilled-v2/checkpoint-36000", | |
| "epoch": 0.6826197428798968, | |
| "eval_steps": 1000, | |
| "global_step": 36000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0018961659524441578, | |
| "grad_norm": 1.531716227531433, | |
| "learning_rate": 9.38566552901024e-07, | |
| "loss": 4.2698190307617185, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0037923319048883157, | |
| "grad_norm": 1.45695960521698, | |
| "learning_rate": 1.8866135760333712e-06, | |
| "loss": 4.230399475097657, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.005688497857332474, | |
| "grad_norm": 1.4260753393173218, | |
| "learning_rate": 2.8346605991657187e-06, | |
| "loss": 4.128007202148438, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.007584663809776631, | |
| "grad_norm": 1.9604460000991821, | |
| "learning_rate": 3.7827076222980664e-06, | |
| "loss": 3.8576431274414062, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.00948082976222079, | |
| "grad_norm": 0.8248822689056396, | |
| "learning_rate": 4.730754645430414e-06, | |
| "loss": 3.1561373901367187, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.011376995714664948, | |
| "grad_norm": 0.6517618894577026, | |
| "learning_rate": 5.678801668562761e-06, | |
| "loss": 2.552709503173828, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.013273161667109106, | |
| "grad_norm": 0.5630219578742981, | |
| "learning_rate": 6.626848691695109e-06, | |
| "loss": 2.327459716796875, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.015169327619553263, | |
| "grad_norm": 0.4430118799209595, | |
| "learning_rate": 7.574895714827455e-06, | |
| "loss": 2.2655821228027344, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.017065493571997423, | |
| "grad_norm": 0.5574463605880737, | |
| "learning_rate": 8.522942737959804e-06, | |
| "loss": 2.2401161193847656, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.01896165952444158, | |
| "grad_norm": 0.48447561264038086, | |
| "learning_rate": 9.47098976109215e-06, | |
| "loss": 2.22558349609375, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01896165952444158, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.01, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.16999999999999998, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.04, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.06999999999999999, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.030615740568451958, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.04062301587301588, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.05451501792713928, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.01, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.018000000000000002, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.013333333333333332, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.014, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.005, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.13, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.02, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.04, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.02, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.18, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.08, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.12, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.03479001595433966, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.0601904761904762, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.05765775039428842, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.02, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.020000000000000004, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.026666666666666665, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.024, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.01, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.1, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.04, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.06, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.0, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.16, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.0, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.02, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.026441465182564253, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.021055555555555557, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.05137228545999013, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.0, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.016, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.0, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.004, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.0, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.16, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.0, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.02, | |
| "eval_mse-dev_negative_mse": -221.21437072753906, | |
| "eval_runtime": 11.2871, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.05451501792713928, | |
| "eval_steps_per_second": 0.0, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.020857825476885736, | |
| "grad_norm": 0.6331929564476013, | |
| "learning_rate": 1.0419036784224499e-05, | |
| "loss": 2.213970031738281, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.022753991429329896, | |
| "grad_norm": 0.501175045967102, | |
| "learning_rate": 1.1367083807356845e-05, | |
| "loss": 2.191977081298828, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.024650157381774052, | |
| "grad_norm": 0.5054857134819031, | |
| "learning_rate": 1.2315130830489193e-05, | |
| "loss": 2.1839501953125, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.026546323334218212, | |
| "grad_norm": 0.6071318984031677, | |
| "learning_rate": 1.326317785362154e-05, | |
| "loss": 2.1661726379394532, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.02844248928666237, | |
| "grad_norm": 0.508758008480072, | |
| "learning_rate": 1.4211224876753888e-05, | |
| "loss": 2.1598078918457033, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.030338655239106525, | |
| "grad_norm": 0.7203693985939026, | |
| "learning_rate": 1.5159271899886234e-05, | |
| "loss": 2.145241394042969, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.03223482119155068, | |
| "grad_norm": 0.5547841787338257, | |
| "learning_rate": 1.6107318923018582e-05, | |
| "loss": 2.122596435546875, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.034130987143994845, | |
| "grad_norm": 0.7341112494468689, | |
| "learning_rate": 1.705536594615093e-05, | |
| "loss": 2.106784210205078, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.036027153096439, | |
| "grad_norm": 0.6560561656951904, | |
| "learning_rate": 1.800341296928328e-05, | |
| "loss": 2.0941481018066406, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.03792331904888316, | |
| "grad_norm": 0.7147130966186523, | |
| "learning_rate": 1.8951459992415623e-05, | |
| "loss": 2.0796484375, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.03792331904888316, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.06, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.2, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.14, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.15, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.09355619049166879, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.10269444444444445, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.10765372452496824, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.06, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.021, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.04666666666666666, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.030000000000000002, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.045, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.16499999999999998, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.11499999999999999, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.12, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.06, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.16, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.1, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.12, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.057732668001867715, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.08650000000000001, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.0671827764380485, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.06, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.018, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.03333333333333333, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.024, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.03, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.09, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.05, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.06, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.06, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.24, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.18, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.18, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.12937971298146986, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.11888888888888888, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.14812467261188797, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.06, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.024000000000000004, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.06, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.036000000000000004, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.06, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.24, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.18, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.18, | |
| "eval_mse-dev_negative_mse": -206.88653564453125, | |
| "eval_runtime": 12.6634, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.10765372452496824, | |
| "eval_steps_per_second": 0.0, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.039819485001327315, | |
| "grad_norm": 0.6483538746833801, | |
| "learning_rate": 1.989950701554797e-05, | |
| "loss": 2.062061767578125, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.04171565095377147, | |
| "grad_norm": 0.7314621806144714, | |
| "learning_rate": 2.084755403868032e-05, | |
| "loss": 2.054515838623047, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.043611816906215635, | |
| "grad_norm": 0.7281008362770081, | |
| "learning_rate": 2.1795601061812668e-05, | |
| "loss": 2.0381907653808593, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.04550798285865979, | |
| "grad_norm": 0.7791172862052917, | |
| "learning_rate": 2.2743648084945016e-05, | |
| "loss": 2.0266854858398435, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.04740414881110395, | |
| "grad_norm": 0.7451071739196777, | |
| "learning_rate": 2.369169510807736e-05, | |
| "loss": 2.016678466796875, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.049300314763548105, | |
| "grad_norm": 0.8240593671798706, | |
| "learning_rate": 2.463974213120971e-05, | |
| "loss": 2.004122619628906, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.05119648071599226, | |
| "grad_norm": 0.8770548701286316, | |
| "learning_rate": 2.5587789154342057e-05, | |
| "loss": 1.990180206298828, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.053092646668436425, | |
| "grad_norm": 0.8051754236221313, | |
| "learning_rate": 2.6535836177474405e-05, | |
| "loss": 1.9746481323242187, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.05498881262088058, | |
| "grad_norm": 0.8228394389152527, | |
| "learning_rate": 2.7483883200606753e-05, | |
| "loss": 1.9650479125976563, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.05688497857332474, | |
| "grad_norm": 0.9059156775474548, | |
| "learning_rate": 2.84319302237391e-05, | |
| "loss": 1.9538874816894531, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.05688497857332474, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.05, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.27, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.12000000000000001, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.18, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.10268455521269725, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.10739682539682541, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.12427348063841058, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.05, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.031, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.043333333333333335, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.042, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.035, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.215, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.09, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.14500000000000002, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.06, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.3, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.14, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.2, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.09775604930816952, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.12591269841269842, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.1241974731265571, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.06, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.038, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.05333333333333333, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.052000000000000005, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.03, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.19, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.08, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.13, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.04, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.24, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.1, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.16, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.10761306111722498, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.0888809523809524, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.12434948815026406, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.04, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.024, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.03333333333333333, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.032, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.04, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.24, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.1, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.16, | |
| "eval_mse-dev_negative_mse": -194.54396057128906, | |
| "eval_runtime": 11.1789, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.12427348063841058, | |
| "eval_steps_per_second": 0.0, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.058781144525768894, | |
| "grad_norm": 1.0623186826705933, | |
| "learning_rate": 2.937997724687145e-05, | |
| "loss": 1.9401417541503907, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.06067731047821305, | |
| "grad_norm": 0.8394317030906677, | |
| "learning_rate": 3.032802427000379e-05, | |
| "loss": 1.93172607421875, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.06257347643065721, | |
| "grad_norm": 0.7523216009140015, | |
| "learning_rate": 3.127607129313614e-05, | |
| "loss": 1.9180873107910157, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.06446964238310136, | |
| "grad_norm": 0.8299034237861633, | |
| "learning_rate": 3.222411831626849e-05, | |
| "loss": 1.9097779846191407, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.06636580833554552, | |
| "grad_norm": 0.7642733454704285, | |
| "learning_rate": 3.3172165339400835e-05, | |
| "loss": 1.8983055114746095, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.06826197428798969, | |
| "grad_norm": 0.806705892086029, | |
| "learning_rate": 3.412021236253318e-05, | |
| "loss": 1.8924456787109376, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.07015814024043385, | |
| "grad_norm": 0.786217212677002, | |
| "learning_rate": 3.506825938566553e-05, | |
| "loss": 1.8805953979492187, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.072054306192878, | |
| "grad_norm": 0.8994006514549255, | |
| "learning_rate": 3.601630640879788e-05, | |
| "loss": 1.8717079162597656, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.07395047214532216, | |
| "grad_norm": 0.856419026851654, | |
| "learning_rate": 3.696435343193023e-05, | |
| "loss": 1.8591105651855468, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.07584663809776632, | |
| "grad_norm": 0.9824651479721069, | |
| "learning_rate": 3.7912400455062576e-05, | |
| "loss": 1.8524658203125, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.07584663809776632, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.06, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.38, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.19, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.28, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.1284317459612893, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.14647222222222223, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.16958431091390092, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.06, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.044000000000000004, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.06666666666666665, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.06200000000000001, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.04, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.30000000000000004, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.14, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.215, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.08, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.44, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.22, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.32, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.12429452794406634, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.17600000000000002, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.1744798681125654, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.08, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.05600000000000001, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.07999999999999999, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.07600000000000001, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.04, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.28, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.12, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.19, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.04, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.32, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.16, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.24, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.1325689639785123, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.11694444444444443, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.16468875371523642, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.04, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.032, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.05333333333333333, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.04800000000000001, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.04, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.32, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.16, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.24, | |
| "eval_mse-dev_negative_mse": -184.20260620117188, | |
| "eval_runtime": 11.2486, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.16958431091390092, | |
| "eval_steps_per_second": 0.0, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.07774280405021047, | |
| "grad_norm": 0.7716678380966187, | |
| "learning_rate": 3.8860447478194924e-05, | |
| "loss": 1.8416305541992188, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.07963897000265463, | |
| "grad_norm": 0.8711826801300049, | |
| "learning_rate": 3.980849450132727e-05, | |
| "loss": 1.8359121704101562, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.08153513595509879, | |
| "grad_norm": 0.9473533630371094, | |
| "learning_rate": 4.075654152445961e-05, | |
| "loss": 1.825589141845703, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.08343130190754294, | |
| "grad_norm": 0.8626433610916138, | |
| "learning_rate": 4.170458854759196e-05, | |
| "loss": 1.8131285095214844, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.0853274678599871, | |
| "grad_norm": 0.9295884370803833, | |
| "learning_rate": 4.265263557072431e-05, | |
| "loss": 1.8063204956054688, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.08722363381243127, | |
| "grad_norm": 0.9008107781410217, | |
| "learning_rate": 4.360068259385666e-05, | |
| "loss": 1.7949688720703125, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.08911979976487543, | |
| "grad_norm": 0.791011393070221, | |
| "learning_rate": 4.4548729616989006e-05, | |
| "loss": 1.7845721435546875, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.09101596571731958, | |
| "grad_norm": 0.7334835529327393, | |
| "learning_rate": 4.5496776640121354e-05, | |
| "loss": 1.7761888122558593, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.09291213166976374, | |
| "grad_norm": 0.9481487274169922, | |
| "learning_rate": 4.64448236632537e-05, | |
| "loss": 1.7620162963867188, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.0948082976222079, | |
| "grad_norm": 0.870833694934845, | |
| "learning_rate": 4.739287068638605e-05, | |
| "loss": 1.7605400085449219, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.0948082976222079, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.1, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.41000000000000003, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.28, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.33999999999999997, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.1626079879266355, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.19677777777777777, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.19920527873932037, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.1, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.045000000000000005, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.09666666666666665, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.07, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.07, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.305, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.2, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.24, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.12, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.5, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.34, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.42, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.15098296694670035, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.23677777777777778, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.20242024631804575, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.12, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.05800000000000001, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.11999999999999998, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.08800000000000001, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.06, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.29, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.18, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.22, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.08, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.32, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.22, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.26, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.17423300890657065, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.15677777777777777, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.195990311160595, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.08, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.032, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.07333333333333333, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.052000000000000005, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.08, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.32, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.22, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.26, | |
| "eval_mse-dev_negative_mse": -175.1685333251953, | |
| "eval_runtime": 11.7971, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.19920527873932037, | |
| "eval_steps_per_second": 0.0, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.09670446357465205, | |
| "grad_norm": 0.9423368573188782, | |
| "learning_rate": 4.83409177095184e-05, | |
| "loss": 1.7480519104003907, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.09860062952709621, | |
| "grad_norm": 0.980880856513977, | |
| "learning_rate": 4.9288964732650746e-05, | |
| "loss": 1.7419432067871095, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.10049679547954037, | |
| "grad_norm": 0.8834021687507629, | |
| "learning_rate": 5.0237011755783095e-05, | |
| "loss": 1.730076446533203, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.10239296143198452, | |
| "grad_norm": 0.9464291930198669, | |
| "learning_rate": 5.118505877891544e-05, | |
| "loss": 1.727989959716797, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.10428912738442868, | |
| "grad_norm": 0.9521955251693726, | |
| "learning_rate": 5.213310580204779e-05, | |
| "loss": 1.7130671691894532, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.10618529333687285, | |
| "grad_norm": 0.8180538415908813, | |
| "learning_rate": 5.308115282518014e-05, | |
| "loss": 1.7063189697265626, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.108081459289317, | |
| "grad_norm": 0.9113965034484863, | |
| "learning_rate": 5.402919984831249e-05, | |
| "loss": 1.695858917236328, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.10997762524176116, | |
| "grad_norm": 0.8348143100738525, | |
| "learning_rate": 5.497724687144482e-05, | |
| "loss": 1.6884242248535157, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.11187379119420532, | |
| "grad_norm": 1.00839364528656, | |
| "learning_rate": 5.592529389457717e-05, | |
| "loss": 1.6800929260253907, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.11376995714664948, | |
| "grad_norm": 0.9566198587417603, | |
| "learning_rate": 5.687334091770952e-05, | |
| "loss": 1.6699765014648438, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.11376995714664948, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.15, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.42000000000000004, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.29000000000000004, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.35, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.19699705475888196, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.2369126984126984, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.2321280636169713, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.15, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.04700000000000001, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.09999999999999999, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.07600000000000001, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.105, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.33, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.22000000000000003, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.27, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.18, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.46, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.3, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.38, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.17208604019775084, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.2664126984126984, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.21500115424853145, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.18, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.05600000000000001, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.10666666666666666, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.08800000000000001, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.09, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.28, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.16, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.22, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.12, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.38, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.28, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.32, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.2219080693200131, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.20741269841269844, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.24925497298541116, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.12, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.038000000000000006, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.09333333333333332, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.064, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.12, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.38, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.28, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.32, | |
| "eval_mse-dev_negative_mse": -166.4923858642578, | |
| "eval_runtime": 10.5014, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.2321280636169713, | |
| "eval_steps_per_second": 0.0, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.11566612309909363, | |
| "grad_norm": 0.8698049783706665, | |
| "learning_rate": 5.7821387940841866e-05, | |
| "loss": 1.6636680603027343, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.11756228905153779, | |
| "grad_norm": 0.88554447889328, | |
| "learning_rate": 5.8769434963974214e-05, | |
| "loss": 1.6543186950683593, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.11945845500398194, | |
| "grad_norm": 0.9408504366874695, | |
| "learning_rate": 5.971748198710656e-05, | |
| "loss": 1.6451298522949218, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.1213546209564261, | |
| "grad_norm": 0.8811279535293579, | |
| "learning_rate": 6.066552901023891e-05, | |
| "loss": 1.6382298278808594, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.12325078690887026, | |
| "grad_norm": 0.9638504385948181, | |
| "learning_rate": 6.161357603337125e-05, | |
| "loss": 1.6278233337402344, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.12514695286131441, | |
| "grad_norm": 0.9717722535133362, | |
| "learning_rate": 6.25616230565036e-05, | |
| "loss": 1.62345458984375, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.12704311881375857, | |
| "grad_norm": 1.0567059516906738, | |
| "learning_rate": 6.350967007963595e-05, | |
| "loss": 1.6149652099609375, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.12893928476620273, | |
| "grad_norm": 0.9955742359161377, | |
| "learning_rate": 6.44577171027683e-05, | |
| "loss": 1.6053521728515625, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.13083545071864688, | |
| "grad_norm": 1.0742182731628418, | |
| "learning_rate": 6.540576412590064e-05, | |
| "loss": 1.6007347106933594, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.13273161667109104, | |
| "grad_norm": 0.9622364044189453, | |
| "learning_rate": 6.6353811149033e-05, | |
| "loss": 1.587445068359375, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.13273161667109104, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.2, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.46, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.28, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.33, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.2218708796716416, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.2654285714285714, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.257882927462247, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.2, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.052000000000000005, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.09666666666666665, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.07, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.145, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.36, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.21500000000000002, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.255, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.22, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.52, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.28, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.34, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.1843725858934317, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.2868571428571428, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.23487212685023443, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.22, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.064, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.09999999999999998, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.07600000000000001, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.11, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.32, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.15, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.19, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.4, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.28, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.32, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.2593691734498515, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.24400000000000002, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.28089372807425955, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.04, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.09333333333333332, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.064, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.4, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.28, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.32, | |
| "eval_mse-dev_negative_mse": -158.10133361816406, | |
| "eval_runtime": 11.0707, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.257882927462247, | |
| "eval_steps_per_second": 0.0, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.13462778262353522, | |
| "grad_norm": 0.979546070098877, | |
| "learning_rate": 6.730185817216534e-05, | |
| "loss": 1.582412109375, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.13652394857597938, | |
| "grad_norm": 1.0893486738204956, | |
| "learning_rate": 6.82499051952977e-05, | |
| "loss": 1.57244384765625, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.13842011452842354, | |
| "grad_norm": 1.0537185668945312, | |
| "learning_rate": 6.919795221843004e-05, | |
| "loss": 1.5668838500976563, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.1403162804808677, | |
| "grad_norm": 0.9376671314239502, | |
| "learning_rate": 7.014599924156239e-05, | |
| "loss": 1.553501739501953, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.14221244643331185, | |
| "grad_norm": 0.9399901032447815, | |
| "learning_rate": 7.109404626469473e-05, | |
| "loss": 1.5449533081054687, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.144108612385756, | |
| "grad_norm": 0.88112473487854, | |
| "learning_rate": 7.204209328782709e-05, | |
| "loss": 1.5345271301269532, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.14600477833820016, | |
| "grad_norm": 0.9386707544326782, | |
| "learning_rate": 7.299014031095943e-05, | |
| "loss": 1.5340492248535156, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.14790094429064432, | |
| "grad_norm": 0.942371129989624, | |
| "learning_rate": 7.393818733409178e-05, | |
| "loss": 1.5242007446289063, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.14979711024308848, | |
| "grad_norm": 0.8463137745857239, | |
| "learning_rate": 7.488623435722411e-05, | |
| "loss": 1.5181001281738282, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.15169327619553263, | |
| "grad_norm": 0.9643734693527222, | |
| "learning_rate": 7.583428138035647e-05, | |
| "loss": 1.5085635375976563, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.15169327619553263, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.21, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.48, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.31000000000000005, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.33999999999999997, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.2338424020963123, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.28084126984126984, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.27053479230771704, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.21, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.053000000000000005, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.10666666666666666, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.07600000000000001, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.15, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.375, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.23, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.27, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.24, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.52, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.34, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.36, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.20036400143179198, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.30996825396825395, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.24538724835027803, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.24, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.062, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.11999999999999998, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.08800000000000001, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.12, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.31, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.18, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.22, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.44, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.28, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.32, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.2673208027608326, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.2517142857142857, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.295682336265156, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.044000000000000004, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.09333333333333332, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.064, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.44, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.28, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.32, | |
| "eval_mse-dev_negative_mse": -150.10321044921875, | |
| "eval_runtime": 10.6864, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.27053479230771704, | |
| "eval_steps_per_second": 0.0, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.1535894421479768, | |
| "grad_norm": 0.8630809187889099, | |
| "learning_rate": 7.677284793325749e-05, | |
| "loss": 1.5007017517089845, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.15548560810042095, | |
| "grad_norm": 0.8799474835395813, | |
| "learning_rate": 7.772089495638985e-05, | |
| "loss": 1.4950062561035156, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.1573817740528651, | |
| "grad_norm": 0.9594865441322327, | |
| "learning_rate": 7.866894197952219e-05, | |
| "loss": 1.4829434204101561, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.15927794000530926, | |
| "grad_norm": 0.8919075727462769, | |
| "learning_rate": 7.961698900265454e-05, | |
| "loss": 1.4779867553710937, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.16117410595775342, | |
| "grad_norm": 0.9076706767082214, | |
| "learning_rate": 8.056503602578687e-05, | |
| "loss": 1.4736830139160155, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.16307027191019757, | |
| "grad_norm": 0.8629969954490662, | |
| "learning_rate": 8.151308304891923e-05, | |
| "loss": 1.4603062438964844, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.16496643786264173, | |
| "grad_norm": 0.969744086265564, | |
| "learning_rate": 8.246113007205157e-05, | |
| "loss": 1.451029052734375, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.16686260381508589, | |
| "grad_norm": 0.9152198433876038, | |
| "learning_rate": 8.340917709518392e-05, | |
| "loss": 1.4499801635742187, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.16875876976753004, | |
| "grad_norm": 0.7964587211608887, | |
| "learning_rate": 8.435722411831626e-05, | |
| "loss": 1.440777587890625, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.1706549357199742, | |
| "grad_norm": 0.9044669270515442, | |
| "learning_rate": 8.530527114144862e-05, | |
| "loss": 1.4372213745117188, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.1706549357199742, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.23, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.51, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.33999999999999997, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.4, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.25323404391343074, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.3023690476190476, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.2928600617450003, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.23, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.058, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.12, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.08800000000000001, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.16, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.405, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.255, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.31, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.28, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.56, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.38, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.44, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.2338868196262281, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.3493809523809523, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.2824008390246955, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.28, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.07, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.13999999999999999, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.10400000000000001, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.14, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.35, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.21, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.26, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.46, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.3, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.36, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.27258126820063344, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.2553571428571429, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.3033192844653051, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.046, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.1, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.07200000000000001, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.46, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.3, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.36, | |
| "eval_mse-dev_negative_mse": -142.8462371826172, | |
| "eval_runtime": 10.0151, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.2928600617450003, | |
| "eval_steps_per_second": 0.0, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.17255110167241838, | |
| "grad_norm": 0.9912068843841553, | |
| "learning_rate": 8.625331816458096e-05, | |
| "loss": 1.4270211791992187, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.17444726762486254, | |
| "grad_norm": 0.9523755311965942, | |
| "learning_rate": 8.720136518771332e-05, | |
| "loss": 1.4232991027832032, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.1763434335773067, | |
| "grad_norm": 0.9893079996109009, | |
| "learning_rate": 8.814941221084566e-05, | |
| "loss": 1.4135417175292968, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.17823959952975085, | |
| "grad_norm": 0.8273277282714844, | |
| "learning_rate": 8.909745923397801e-05, | |
| "loss": 1.4074359130859375, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.180135765482195, | |
| "grad_norm": 0.9652109146118164, | |
| "learning_rate": 9.004550625711035e-05, | |
| "loss": 1.3981039428710937, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.18203193143463917, | |
| "grad_norm": 0.9654005169868469, | |
| "learning_rate": 9.099355328024271e-05, | |
| "loss": 1.3918597412109375, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.18392809738708332, | |
| "grad_norm": 1.0751373767852783, | |
| "learning_rate": 9.194160030337505e-05, | |
| "loss": 1.3844194030761718, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.18582426333952748, | |
| "grad_norm": 0.8573171496391296, | |
| "learning_rate": 9.28896473265074e-05, | |
| "loss": 1.3740664672851564, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.18772042929197164, | |
| "grad_norm": 0.9025856256484985, | |
| "learning_rate": 9.383769434963975e-05, | |
| "loss": 1.368533172607422, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.1896165952444158, | |
| "grad_norm": 0.936182975769043, | |
| "learning_rate": 9.47857413727721e-05, | |
| "loss": 1.3668016052246095, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.1896165952444158, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.26, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.55, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.34, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.41000000000000003, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.2717915991834402, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.3298809523809524, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.31266733186022544, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.26, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.061000000000000006, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.12666666666666665, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.09200000000000001, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.175, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.43, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.26, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.32, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.34, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.6, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.4, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.46, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.260384776010371, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.3953809523809524, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.3058907512098868, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.34, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.07200000000000001, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.15999999999999998, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.11200000000000002, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.17, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.36, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.24, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.28, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.5, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.28, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.36, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.28319842235650944, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.2643809523809524, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.31944391251056414, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.05, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.09333333333333332, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.07200000000000001, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.5, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.28, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.36, | |
| "eval_mse-dev_negative_mse": -135.70806884765625, | |
| "eval_runtime": 11.1158, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.31266733186022544, | |
| "eval_steps_per_second": 0.0, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.19151276119685995, | |
| "grad_norm": 0.9819117784500122, | |
| "learning_rate": 9.573378839590444e-05, | |
| "loss": 1.35683349609375, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.1934089271493041, | |
| "grad_norm": 0.9364531636238098, | |
| "learning_rate": 9.667235494880547e-05, | |
| "loss": 1.3505201721191407, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.19530509310174826, | |
| "grad_norm": 1.0975953340530396, | |
| "learning_rate": 9.762040197193781e-05, | |
| "loss": 1.3433110046386718, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.19720125905419242, | |
| "grad_norm": 0.8945000171661377, | |
| "learning_rate": 9.856844899507016e-05, | |
| "loss": 1.3337992858886718, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.19909742500663657, | |
| "grad_norm": 0.90827876329422, | |
| "learning_rate": 9.95164960182025e-05, | |
| "loss": 1.3294851684570312, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.20099359095908073, | |
| "grad_norm": 1.0766637325286865, | |
| "learning_rate": 9.994838193156919e-05, | |
| "loss": 1.3274673461914062, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.2028897569115249, | |
| "grad_norm": 0.9869415760040283, | |
| "learning_rate": 9.984303893477163e-05, | |
| "loss": 1.3149089050292968, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.20478592286396904, | |
| "grad_norm": 0.9914052486419678, | |
| "learning_rate": 9.973769593797405e-05, | |
| "loss": 1.3119027709960938, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.2066820888164132, | |
| "grad_norm": 0.8931730389595032, | |
| "learning_rate": 9.963235294117647e-05, | |
| "loss": 1.30553466796875, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.20857825476885736, | |
| "grad_norm": 0.9103732705116272, | |
| "learning_rate": 9.95270099443789e-05, | |
| "loss": 1.2952238464355468, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.20857825476885736, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.28, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.52, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.4, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.49, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.2958090237794817, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.3554563492063492, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.3271554490713938, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.28, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.06100000000000001, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.14666666666666667, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.10800000000000001, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.185, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.41500000000000004, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.30500000000000005, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.375, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.38, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.6, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.46, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.56, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.2970541527466325, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.4416904761904762, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.34337262327682183, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.38, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.07800000000000001, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.18, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.132, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.19, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.39, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.27, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.33, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.44, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.34, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.42, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.29456389481233086, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.26922222222222225, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.31093827486596576, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.044000000000000004, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.11333333333333333, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.084, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.44, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.34, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.42, | |
| "eval_mse-dev_negative_mse": -129.20640563964844, | |
| "eval_runtime": 10.3813, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.3271554490713938, | |
| "eval_steps_per_second": 0.0, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.21047442072130154, | |
| "grad_norm": 1.1640655994415283, | |
| "learning_rate": 9.942166694758133e-05, | |
| "loss": 1.2919923400878905, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.2123705866737457, | |
| "grad_norm": 0.9011592864990234, | |
| "learning_rate": 9.931632395078376e-05, | |
| "loss": 1.2851214599609375, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.21426675262618985, | |
| "grad_norm": 0.9254733324050903, | |
| "learning_rate": 9.921098095398619e-05, | |
| "loss": 1.2769430541992188, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.216162918578634, | |
| "grad_norm": 0.9079636931419373, | |
| "learning_rate": 9.910563795718862e-05, | |
| "loss": 1.2746614837646484, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.21805908453107817, | |
| "grad_norm": 0.9787989258766174, | |
| "learning_rate": 9.900029496039104e-05, | |
| "loss": 1.268571014404297, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.21995525048352232, | |
| "grad_norm": 0.8455345630645752, | |
| "learning_rate": 9.889495196359346e-05, | |
| "loss": 1.2683941650390624, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.22185141643596648, | |
| "grad_norm": 0.9073353409767151, | |
| "learning_rate": 9.878960896679589e-05, | |
| "loss": 1.2581684875488282, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.22374758238841064, | |
| "grad_norm": 0.8951073288917542, | |
| "learning_rate": 9.868426596999832e-05, | |
| "loss": 1.258204574584961, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.2256437483408548, | |
| "grad_norm": 1.0486690998077393, | |
| "learning_rate": 9.857892297320075e-05, | |
| "loss": 1.247862319946289, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.22753991429329895, | |
| "grad_norm": 0.8603843450546265, | |
| "learning_rate": 9.847357997640317e-05, | |
| "loss": 1.241845016479492, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.22753991429329895, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.28, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.6, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.41000000000000003, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.45, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.302852595589562, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.36785317460317457, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.3493080002249725, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.28, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.069, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.14666666666666667, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.10200000000000001, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.19, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.47, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.31, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.355, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.36, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.7, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.46, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.5, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.2899683891353945, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.4411031746031746, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.3546937420389296, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.36, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.088, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.1733333333333333, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.124, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.18, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.44, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.26, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.31, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.2, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.5, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.36, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.4, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.31573680204372945, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.2946031746031746, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.34392225841101537, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.2, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.05, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.12, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.08, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.2, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.5, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.36, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.4, | |
| "eval_mse-dev_negative_mse": -123.62611389160156, | |
| "eval_runtime": 10.308, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.3493080002249725, | |
| "eval_steps_per_second": 0.0, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.2294360802457431, | |
| "grad_norm": 0.9303557276725769, | |
| "learning_rate": 9.83682369796056e-05, | |
| "loss": 1.240003662109375, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.23133224619818726, | |
| "grad_norm": 0.9165602326393127, | |
| "learning_rate": 9.826289398280803e-05, | |
| "loss": 1.232986068725586, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.23322841215063142, | |
| "grad_norm": 0.8384730815887451, | |
| "learning_rate": 9.815755098601045e-05, | |
| "loss": 1.2288270568847657, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.23512457810307558, | |
| "grad_norm": 0.9244160652160645, | |
| "learning_rate": 9.805326141918085e-05, | |
| "loss": 1.223012924194336, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.23702074405551973, | |
| "grad_norm": 1.01241135597229, | |
| "learning_rate": 9.794791842238329e-05, | |
| "loss": 1.2164186096191407, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.2389169100079639, | |
| "grad_norm": 0.9336892366409302, | |
| "learning_rate": 9.784257542558571e-05, | |
| "loss": 1.2156867980957031, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.24081307596040805, | |
| "grad_norm": 0.9515780210494995, | |
| "learning_rate": 9.773723242878813e-05, | |
| "loss": 1.2165725708007813, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.2427092419128522, | |
| "grad_norm": 0.8875882029533386, | |
| "learning_rate": 9.763188943199057e-05, | |
| "loss": 1.2044532775878907, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.24460540786529636, | |
| "grad_norm": 0.8906784057617188, | |
| "learning_rate": 9.7526546435193e-05, | |
| "loss": 1.2034928131103515, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.24650157381774052, | |
| "grad_norm": 0.860988438129425, | |
| "learning_rate": 9.742120343839543e-05, | |
| "loss": 1.1968316650390625, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.24650157381774052, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.25, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.5700000000000001, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.38, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.45999999999999996, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.2897002867515749, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.3452896825396825, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.3305841730427018, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.25, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.065, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.1433333333333333, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.10200000000000001, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.165, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.45, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.30000000000000004, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.36, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.34, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.64, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.42, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.5, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.2823695142784583, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.417079365079365, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.332921649409912, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.34, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.08, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.1733333333333333, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.12000000000000002, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.17, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.4, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.26, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.3, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.16, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.5, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.34, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.42, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.29703105922469153, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.2735, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.3282466966754917, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.16, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.05, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.11333333333333333, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.084, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.16, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.5, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.34, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.42, | |
| "eval_mse-dev_negative_mse": -118.86907958984375, | |
| "eval_runtime": 11.1772, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.3305841730427018, | |
| "eval_steps_per_second": 0.0, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.2483977397701847, | |
| "grad_norm": 0.944284200668335, | |
| "learning_rate": 9.731586044159785e-05, | |
| "loss": 1.1941532135009765, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.25029390572262883, | |
| "grad_norm": 0.846736490726471, | |
| "learning_rate": 9.721051744480028e-05, | |
| "loss": 1.189548873901367, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.252190071675073, | |
| "grad_norm": 0.9077499508857727, | |
| "learning_rate": 9.71051744480027e-05, | |
| "loss": 1.184281463623047, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.25408623762751714, | |
| "grad_norm": 0.9021602869033813, | |
| "learning_rate": 9.699983145120512e-05, | |
| "loss": 1.1755128479003907, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.2559824035799613, | |
| "grad_norm": 0.9804133772850037, | |
| "learning_rate": 9.689448845440755e-05, | |
| "loss": 1.175633773803711, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.25787856953240545, | |
| "grad_norm": 0.8400120139122009, | |
| "learning_rate": 9.678914545760998e-05, | |
| "loss": 1.1707258605957032, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.25977473548484964, | |
| "grad_norm": 0.8351007103919983, | |
| "learning_rate": 9.668380246081241e-05, | |
| "loss": 1.1637205505371093, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.26167090143729377, | |
| "grad_norm": 0.9614461064338684, | |
| "learning_rate": 9.657845946401483e-05, | |
| "loss": 1.1684355926513672, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.26356706738973795, | |
| "grad_norm": 0.9544349312782288, | |
| "learning_rate": 9.647311646721725e-05, | |
| "loss": 1.162786636352539, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.2654632333421821, | |
| "grad_norm": 0.8563331365585327, | |
| "learning_rate": 9.636777347041969e-05, | |
| "loss": 1.1585095977783204, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.2654632333421821, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.28, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.62, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.42, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.5, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.31463974475417134, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.3753571428571429, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.36789514654221167, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.28, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.072, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.16, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.116, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.185, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.515, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.32999999999999996, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.405, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.38, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.62, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.48, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.54, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.3112110057061059, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.4472222222222223, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.3578654483822233, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.38, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.08199999999999999, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.2, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.14, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.19, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.41, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.3, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.35, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.62, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.36, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.46, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.31806848380223673, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.3034920634920635, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.3779248447022001, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.062, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.12, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.092, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.62, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.36, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.46, | |
| "eval_mse-dev_negative_mse": -115.4122085571289, | |
| "eval_runtime": 14.015, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.36789514654221167, | |
| "eval_steps_per_second": 0.0, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.26735939929462627, | |
| "grad_norm": 0.9541077017784119, | |
| "learning_rate": 9.626243047362213e-05, | |
| "loss": 1.160166244506836, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.26925556524707045, | |
| "grad_norm": 1.0204025506973267, | |
| "learning_rate": 9.615708747682455e-05, | |
| "loss": 1.1503668212890625, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.2711517311995146, | |
| "grad_norm": 1.0752142667770386, | |
| "learning_rate": 9.605174448002698e-05, | |
| "loss": 1.1483226776123048, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.27304789715195876, | |
| "grad_norm": 0.9642768502235413, | |
| "learning_rate": 9.59464014832294e-05, | |
| "loss": 1.1488003540039062, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.2749440631044029, | |
| "grad_norm": 0.8722686171531677, | |
| "learning_rate": 9.584105848643182e-05, | |
| "loss": 1.139219741821289, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.2768402290568471, | |
| "grad_norm": 0.9259271025657654, | |
| "learning_rate": 9.573676891960223e-05, | |
| "loss": 1.134266128540039, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.2787363950092912, | |
| "grad_norm": 1.019303560256958, | |
| "learning_rate": 9.563142592280465e-05, | |
| "loss": 1.136265869140625, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.2806325609617354, | |
| "grad_norm": 0.9323062300682068, | |
| "learning_rate": 9.552608292600709e-05, | |
| "loss": 1.1342037200927735, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.2825287269141795, | |
| "grad_norm": 0.8613787293434143, | |
| "learning_rate": 9.542073992920951e-05, | |
| "loss": 1.132669448852539, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.2844248928666237, | |
| "grad_norm": 0.9772534966468811, | |
| "learning_rate": 9.531539693241194e-05, | |
| "loss": 1.1218692779541015, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.2844248928666237, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.29000000000000004, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.63, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.43, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.55, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.32670175603229334, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.38711111111111113, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.3792944580347569, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.29000000000000004, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.07500000000000001, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.16, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.126, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.195, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.525, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.32999999999999996, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.44, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.38, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.66, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.5, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.6, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.3244366589749346, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.46277777777777784, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.37914250624163204, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.38, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.09, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.2, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.15200000000000002, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.19, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.45, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.3, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.38, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.2, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.6, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.36, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.5, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.3289668530896521, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.3114444444444444, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.37944640982788175, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.2, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.06000000000000001, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.12, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.1, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.2, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.6, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.36, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.5, | |
| "eval_mse-dev_negative_mse": -111.91387176513672, | |
| "eval_runtime": 11.9368, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.3792944580347569, | |
| "eval_steps_per_second": 0.0, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.28632105881906783, | |
| "grad_norm": 0.9433382749557495, | |
| "learning_rate": 9.521005393561436e-05, | |
| "loss": 1.124610137939453, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.288217224771512, | |
| "grad_norm": 0.880102276802063, | |
| "learning_rate": 9.510471093881679e-05, | |
| "loss": 1.1151537322998046, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.29011339072395614, | |
| "grad_norm": 0.8995987772941589, | |
| "learning_rate": 9.499936794201922e-05, | |
| "loss": 1.119567642211914, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.29200955667640033, | |
| "grad_norm": 0.7987125515937805, | |
| "learning_rate": 9.489402494522165e-05, | |
| "loss": 1.109741439819336, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.29390572262884446, | |
| "grad_norm": 0.8933894038200378, | |
| "learning_rate": 9.478868194842407e-05, | |
| "loss": 1.106731185913086, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.29580188858128864, | |
| "grad_norm": 0.9454442858695984, | |
| "learning_rate": 9.468333895162649e-05, | |
| "loss": 1.0994451904296876, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.29769805453373277, | |
| "grad_norm": 0.9284511804580688, | |
| "learning_rate": 9.457799595482893e-05, | |
| "loss": 1.107660446166992, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.29959422048617695, | |
| "grad_norm": 0.9509237408638, | |
| "learning_rate": 9.447265295803135e-05, | |
| "loss": 1.1057376098632812, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.3014903864386211, | |
| "grad_norm": 0.8351031541824341, | |
| "learning_rate": 9.436730996123379e-05, | |
| "loss": 1.0948797607421874, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.30338655239106527, | |
| "grad_norm": 0.9255380034446716, | |
| "learning_rate": 9.426196696443621e-05, | |
| "loss": 1.0980982208251953, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.30338655239106527, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.28, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.65, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.45, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.54, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.3266167689165571, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.3881944444444445, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.38609049913548005, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.28, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.07799999999999999, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.16666666666666669, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.12200000000000003, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.185, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.5449999999999999, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.35, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.43, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.38, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.68, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.5, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.58, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.3233119770078725, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.46327777777777784, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.38549342229017597, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.38, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.09399999999999999, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.2, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.14400000000000002, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.19, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.47, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.3, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.36, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.62, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.4, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.5, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.32992156082524177, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.3131111111111111, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.3866875759807841, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.062, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.13333333333333333, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.10000000000000002, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.18, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.62, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.4, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.5, | |
| "eval_mse-dev_negative_mse": -109.29944610595703, | |
| "eval_runtime": 13.2813, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.38609049913548005, | |
| "eval_steps_per_second": 0.0, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.30528271834350945, | |
| "grad_norm": 0.9251424670219421, | |
| "learning_rate": 9.415662396763864e-05, | |
| "loss": 1.0933486938476562, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.3071788842959536, | |
| "grad_norm": 0.9098881483078003, | |
| "learning_rate": 9.405128097084106e-05, | |
| "loss": 1.0872834777832032, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.30907505024839776, | |
| "grad_norm": 0.9585905075073242, | |
| "learning_rate": 9.394593797404348e-05, | |
| "loss": 1.0850564575195312, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.3109712162008419, | |
| "grad_norm": 0.8983785510063171, | |
| "learning_rate": 9.384059497724592e-05, | |
| "loss": 1.0840210723876953, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.3128673821532861, | |
| "grad_norm": 0.8971573114395142, | |
| "learning_rate": 9.373525198044834e-05, | |
| "loss": 1.0830884552001954, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.3147635481057302, | |
| "grad_norm": 0.9502484202384949, | |
| "learning_rate": 9.363096241361875e-05, | |
| "loss": 1.0755316925048828, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.3166597140581744, | |
| "grad_norm": 0.8195205330848694, | |
| "learning_rate": 9.352561941682118e-05, | |
| "loss": 1.0733245086669922, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.3185558800106185, | |
| "grad_norm": 0.866369366645813, | |
| "learning_rate": 9.34202764200236e-05, | |
| "loss": 1.072414016723633, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.3204520459630627, | |
| "grad_norm": 0.8804235458374023, | |
| "learning_rate": 9.331493342322602e-05, | |
| "loss": 1.069804458618164, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.32234821191550683, | |
| "grad_norm": 0.8990177512168884, | |
| "learning_rate": 9.320959042642845e-05, | |
| "loss": 1.0709500122070312, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.32234821191550683, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.31, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.66, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.47, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.55, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.35180720243649677, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.41262301587301586, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.4079069196220826, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.31, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.08, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.18, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.126, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.20500000000000002, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.56, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.38, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.445, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.42, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.68, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.5, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.58, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.3515281128331601, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.48841269841269835, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.40662948657099507, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.42, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.096, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.21333333333333332, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.14800000000000002, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.21, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.48, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.32, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.37, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.2, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.64, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.44, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.52, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.35208629203983344, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.3368333333333334, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.4091843526731701, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.2, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.064, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.14666666666666667, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.10400000000000001, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.2, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.64, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.44, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.52, | |
| "eval_mse-dev_negative_mse": -106.3768539428711, | |
| "eval_runtime": 10.2249, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.4079069196220826, | |
| "eval_steps_per_second": 0.0, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.324244377867951, | |
| "grad_norm": 0.9269504547119141, | |
| "learning_rate": 9.310424742963088e-05, | |
| "loss": 1.0698513031005858, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.32614054382039515, | |
| "grad_norm": 0.9297342896461487, | |
| "learning_rate": 9.29989044328333e-05, | |
| "loss": 1.06423828125, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.32803670977283933, | |
| "grad_norm": 0.8609415292739868, | |
| "learning_rate": 9.289356143603573e-05, | |
| "loss": 1.0575923919677734, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.32993287572528346, | |
| "grad_norm": 0.9494638442993164, | |
| "learning_rate": 9.278821843923817e-05, | |
| "loss": 1.059657211303711, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.33182904167772764, | |
| "grad_norm": 0.9297378063201904, | |
| "learning_rate": 9.268287544244059e-05, | |
| "loss": 1.0571788024902344, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.33372520763017177, | |
| "grad_norm": 0.8993592262268066, | |
| "learning_rate": 9.257753244564303e-05, | |
| "loss": 1.0546926879882812, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.33562137358261596, | |
| "grad_norm": 0.8981407880783081, | |
| "learning_rate": 9.247218944884545e-05, | |
| "loss": 1.0501728057861328, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.3375175395350601, | |
| "grad_norm": 0.8592208623886108, | |
| "learning_rate": 9.236684645204787e-05, | |
| "loss": 1.0466949462890625, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.33941370548750427, | |
| "grad_norm": 0.8278118371963501, | |
| "learning_rate": 9.22615034552503e-05, | |
| "loss": 1.0484512329101563, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.3413098714399484, | |
| "grad_norm": 0.8379432559013367, | |
| "learning_rate": 9.215616045845272e-05, | |
| "loss": 1.0455326843261719, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.3413098714399484, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.31, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.71, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.51, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.3693867458958786, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.43332936507936504, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.43735880478726147, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.31, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.087, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.19333333333333333, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.138, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.21000000000000002, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.61, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.41000000000000003, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.49, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.4, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.72, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.54, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.62, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.3550150187843317, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.4942460317460317, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.4237044581505819, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.4, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.10399999999999998, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.22666666666666668, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.16, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.2, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.52, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.34, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.4, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.22, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.7, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.48, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.58, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.3837584730074255, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.3724126984126984, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.45101315142394105, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.22, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.07, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.15999999999999998, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.11599999999999999, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.22, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.7, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.48, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.58, | |
| "eval_mse-dev_negative_mse": -103.76982879638672, | |
| "eval_runtime": 10.0162, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.43735880478726147, | |
| "eval_steps_per_second": 0.0, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.3432060373923926, | |
| "grad_norm": 0.8932205438613892, | |
| "learning_rate": 9.205081746165516e-05, | |
| "loss": 1.0432756805419923, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.34510220334483677, | |
| "grad_norm": 0.9294377565383911, | |
| "learning_rate": 9.194547446485758e-05, | |
| "loss": 1.0403505706787108, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.3469983692972809, | |
| "grad_norm": 0.8712144494056702, | |
| "learning_rate": 9.184013146806e-05, | |
| "loss": 1.0396759796142578, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.3488945352497251, | |
| "grad_norm": 0.8681181073188782, | |
| "learning_rate": 9.173478847126243e-05, | |
| "loss": 1.0351734161376953, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.3507907012021692, | |
| "grad_norm": 0.8668209910392761, | |
| "learning_rate": 9.162944547446487e-05, | |
| "loss": 1.0318231964111328, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.3526868671546134, | |
| "grad_norm": 0.9021549224853516, | |
| "learning_rate": 9.152410247766729e-05, | |
| "loss": 1.0302366638183593, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.3545830331070575, | |
| "grad_norm": 0.8724125623703003, | |
| "learning_rate": 9.141875948086973e-05, | |
| "loss": 1.0330332183837891, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.3564791990595017, | |
| "grad_norm": 0.9171428680419922, | |
| "learning_rate": 9.131446991404012e-05, | |
| "loss": 1.0219937896728515, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.35837536501194583, | |
| "grad_norm": 0.8523043394088745, | |
| "learning_rate": 9.120912691724254e-05, | |
| "loss": 1.0223383331298828, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.36027153096439, | |
| "grad_norm": 0.8599100112915039, | |
| "learning_rate": 9.110378392044497e-05, | |
| "loss": 1.0254383087158203, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.36027153096439, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.32, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.6799999999999999, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.55, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.63, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.3761081537709, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.43933333333333335, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.43521214929807284, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.32, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.08399999999999999, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.2033333333333333, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.14600000000000002, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.22, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.5800000000000001, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.435, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.515, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.4, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.72, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.58, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.66, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.3567383904240635, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.49855555555555553, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.426475283640488, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.4, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.10399999999999998, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.2333333333333333, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.172, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.2, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.52, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.35, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.43, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.24, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.64, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.52, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.6, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.3954779171177365, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.3801111111111111, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.44394901495565775, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.24, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.064, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.1733333333333333, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.24, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.64, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.52, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.6, | |
| "eval_mse-dev_negative_mse": -101.57431030273438, | |
| "eval_runtime": 10.982, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.43521214929807284, | |
| "eval_steps_per_second": 0.0, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.36216769691683415, | |
| "grad_norm": 0.9670674204826355, | |
| "learning_rate": 9.09984409236474e-05, | |
| "loss": 1.0186353302001954, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.36406386286927833, | |
| "grad_norm": 0.8563957810401917, | |
| "learning_rate": 9.089309792684983e-05, | |
| "loss": 1.0215565490722656, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.36596002882172246, | |
| "grad_norm": 0.9011367559432983, | |
| "learning_rate": 9.078775493005225e-05, | |
| "loss": 1.0152357482910157, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.36785619477416664, | |
| "grad_norm": 0.8407337665557861, | |
| "learning_rate": 9.068241193325469e-05, | |
| "loss": 1.0139485931396484, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.3697523607266108, | |
| "grad_norm": 0.8842604756355286, | |
| "learning_rate": 9.057706893645711e-05, | |
| "loss": 1.0125227355957032, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.37164852667905496, | |
| "grad_norm": 0.9665144085884094, | |
| "learning_rate": 9.047172593965954e-05, | |
| "loss": 1.008692398071289, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.3735446926314991, | |
| "grad_norm": 0.8938872218132019, | |
| "learning_rate": 9.036638294286196e-05, | |
| "loss": 1.0044830322265625, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.37544085858394327, | |
| "grad_norm": 0.8201034069061279, | |
| "learning_rate": 9.026103994606438e-05, | |
| "loss": 1.0031690979003907, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.3773370245363874, | |
| "grad_norm": 0.8051674365997314, | |
| "learning_rate": 9.015569694926682e-05, | |
| "loss": 1.001277542114258, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.3792331904888316, | |
| "grad_norm": 0.8701341152191162, | |
| "learning_rate": 9.005035395246924e-05, | |
| "loss": 1.001656494140625, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.3792331904888316, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.35, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.71, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.53, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6000000000000001, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.3885181482447372, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.45785714285714285, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.4463748112002441, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.35, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.08499999999999999, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.2, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.14, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.24, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.5900000000000001, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.425, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.49, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.44, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.76, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.56, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.64, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.37180551870545886, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.5252698412698412, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.43738455459561965, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.44, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.10399999999999998, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.2333333333333333, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.16799999999999998, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.22, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.52, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.35, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.42, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.26, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.66, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.5, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.56, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.40523077778401556, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.39044444444444454, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.4553650678048685, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.26, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.066, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.16666666666666669, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.11200000000000002, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.26, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.66, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.5, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.56, | |
| "eval_mse-dev_negative_mse": -99.66129302978516, | |
| "eval_runtime": 11.4825, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.4463748112002441, | |
| "eval_steps_per_second": 0.0, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.3811293564412757, | |
| "grad_norm": 0.8422971963882446, | |
| "learning_rate": 8.994501095567167e-05, | |
| "loss": 1.0006825256347656, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.3830255223937199, | |
| "grad_norm": 0.955066978931427, | |
| "learning_rate": 8.98396679588741e-05, | |
| "loss": 0.9958713531494141, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.3849216883461641, | |
| "grad_norm": 0.8364739418029785, | |
| "learning_rate": 8.973432496207653e-05, | |
| "loss": 0.9965061950683594, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.3868178542986082, | |
| "grad_norm": 0.9399869441986084, | |
| "learning_rate": 8.962898196527896e-05, | |
| "loss": 0.9909481048583985, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.3887140202510524, | |
| "grad_norm": 0.8677252531051636, | |
| "learning_rate": 8.952363896848139e-05, | |
| "loss": 0.9901930236816406, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.3906101862034965, | |
| "grad_norm": 0.8382641077041626, | |
| "learning_rate": 8.941829597168381e-05, | |
| "loss": 0.9903465270996094, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.3925063521559407, | |
| "grad_norm": 0.9324244856834412, | |
| "learning_rate": 8.931295297488623e-05, | |
| "loss": 0.9927156829833984, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.39440251810838484, | |
| "grad_norm": 0.9975899457931519, | |
| "learning_rate": 8.920760997808866e-05, | |
| "loss": 0.9864664459228516, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.396298684060829, | |
| "grad_norm": 0.8882135152816772, | |
| "learning_rate": 8.910226698129108e-05, | |
| "loss": 0.9842584991455078, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.39819485001327315, | |
| "grad_norm": 0.8512315154075623, | |
| "learning_rate": 8.899692398449352e-05, | |
| "loss": 0.9808792114257813, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.39819485001327315, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.35, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.72, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.51, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.62, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.3971597947674501, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.45810317460317457, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.4575468587350072, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.35, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.088, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.2, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.15, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.245, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.6100000000000001, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.41500000000000004, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.52, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.42, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.76, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.56, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.66, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.3788968895489927, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.5139603174603175, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.4462090585062046, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.42, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.10799999999999998, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.24666666666666667, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.184, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.21, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.54, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.37, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.46, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.28, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.68, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.46, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.58, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.41542269998590753, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.4022460317460317, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.46888465896380976, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.28, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.068, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.15333333333333332, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.11599999999999999, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.28, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.68, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.46, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.58, | |
| "eval_mse-dev_negative_mse": -97.49221801757812, | |
| "eval_runtime": 11.4324, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.4575468587350072, | |
| "eval_steps_per_second": 0.0, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.40009101596571733, | |
| "grad_norm": 0.7803339958190918, | |
| "learning_rate": 8.889158098769594e-05, | |
| "loss": 0.9800699615478515, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.40198718191816146, | |
| "grad_norm": 0.8740707635879517, | |
| "learning_rate": 8.878623799089836e-05, | |
| "loss": 0.9785236358642578, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.40388334787060565, | |
| "grad_norm": 0.9020572304725647, | |
| "learning_rate": 8.868089499410079e-05, | |
| "loss": 0.9718000793457031, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.4057795138230498, | |
| "grad_norm": 0.8485739827156067, | |
| "learning_rate": 8.857555199730322e-05, | |
| "loss": 0.9725127410888672, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.40767567977549396, | |
| "grad_norm": 0.9113863110542297, | |
| "learning_rate": 8.847020900050565e-05, | |
| "loss": 0.9704845428466797, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.4095718457279381, | |
| "grad_norm": 0.9105412364006042, | |
| "learning_rate": 8.836486600370809e-05, | |
| "loss": 0.9728768157958985, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.4114680116803823, | |
| "grad_norm": 0.9580652713775635, | |
| "learning_rate": 8.825952300691051e-05, | |
| "loss": 0.9713729095458984, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.4133641776328264, | |
| "grad_norm": 0.863349199295044, | |
| "learning_rate": 8.815418001011293e-05, | |
| "loss": 0.9646768951416016, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.4152603435852706, | |
| "grad_norm": 0.8929393291473389, | |
| "learning_rate": 8.804883701331536e-05, | |
| "loss": 0.9623196411132813, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.4171565095377147, | |
| "grad_norm": 0.8821211457252502, | |
| "learning_rate": 8.794349401651779e-05, | |
| "loss": 0.9578647613525391, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.4171565095377147, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.35, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.72, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.55, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.63, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.398172306882256, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.46505158730158724, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.4595267255374118, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.35, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.08800000000000001, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.20999999999999996, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.15, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.24, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.6100000000000001, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.435, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.52, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.44, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.76, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.62, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.68, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.38667158218589576, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.5341904761904762, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.454887980345426, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.44, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.10800000000000001, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.25999999999999995, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.184, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.22, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.54, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.39, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.46, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.26, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.68, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.48, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.58, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.40967303157861634, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.39591269841269827, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.46416547072939757, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.26, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.068, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.15999999999999998, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.11599999999999999, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.26, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.68, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.48, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.58, | |
| "eval_mse-dev_negative_mse": -95.78128814697266, | |
| "eval_runtime": 11.0251, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.4595267255374118, | |
| "eval_steps_per_second": 0.0, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.4190526754901589, | |
| "grad_norm": 0.8627265095710754, | |
| "learning_rate": 8.783815101972022e-05, | |
| "loss": 0.9553171539306641, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.4209488414426031, | |
| "grad_norm": 0.8205426931381226, | |
| "learning_rate": 8.773280802292264e-05, | |
| "loss": 0.9557749176025391, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.4228450073950472, | |
| "grad_norm": 0.8694571256637573, | |
| "learning_rate": 8.762746502612506e-05, | |
| "loss": 0.9584300994873047, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.4247411733474914, | |
| "grad_norm": 0.8678444623947144, | |
| "learning_rate": 8.752212202932749e-05, | |
| "loss": 0.9544028472900391, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.4266373392999355, | |
| "grad_norm": 0.8822008967399597, | |
| "learning_rate": 8.741677903252991e-05, | |
| "loss": 0.9520068359375, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.4285335052523797, | |
| "grad_norm": 0.951594352722168, | |
| "learning_rate": 8.731143603573235e-05, | |
| "loss": 0.9515534210205078, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.43042967120482384, | |
| "grad_norm": 0.9522872567176819, | |
| "learning_rate": 8.720714646890276e-05, | |
| "loss": 0.9542991638183593, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.432325837157268, | |
| "grad_norm": 0.9078388214111328, | |
| "learning_rate": 8.710180347210518e-05, | |
| "loss": 0.9501979064941406, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.43422200310971215, | |
| "grad_norm": 0.8574204444885254, | |
| "learning_rate": 8.69964604753076e-05, | |
| "loss": 0.9476995086669922, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.43611816906215634, | |
| "grad_norm": 0.8338425159454346, | |
| "learning_rate": 8.689111747851003e-05, | |
| "loss": 0.940532455444336, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.43611816906215634, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.39, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.71, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.5700000000000001, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.61, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.4144241147192019, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.4880277777777777, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.4688451653582658, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.39, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.086, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.21333333333333332, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.14399999999999996, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.275, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.595, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.445, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.505, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.46, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.76, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.64, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.64, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.38426119225152133, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.5458888888888889, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.4520622291124691, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.46, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.10599999999999998, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.25999999999999995, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.17199999999999996, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.23, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.53, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.39, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.43, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.32, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.66, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.5, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.58, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.4445870371868824, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.4301666666666666, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.48562810160406256, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.32, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.066, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.16666666666666669, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.11599999999999999, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.32, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.66, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.5, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.58, | |
| "eval_mse-dev_negative_mse": -93.92383575439453, | |
| "eval_runtime": 11.5971, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.4688451653582658, | |
| "eval_steps_per_second": 0.0, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.43801433501460046, | |
| "grad_norm": 0.8566615581512451, | |
| "learning_rate": 8.678577448171246e-05, | |
| "loss": 0.9448033142089843, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.43991050096704465, | |
| "grad_norm": 0.8012374639511108, | |
| "learning_rate": 8.668043148491489e-05, | |
| "loss": 0.9424338531494141, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.4418066669194888, | |
| "grad_norm": 0.8802723288536072, | |
| "learning_rate": 8.657508848811732e-05, | |
| "loss": 0.9369033050537109, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.44370283287193296, | |
| "grad_norm": 0.814888596534729, | |
| "learning_rate": 8.646974549131975e-05, | |
| "loss": 0.93183837890625, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.4455989988243771, | |
| "grad_norm": 0.8690612316131592, | |
| "learning_rate": 8.636440249452217e-05, | |
| "loss": 0.9342401123046875, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.4474951647768213, | |
| "grad_norm": 0.8208878040313721, | |
| "learning_rate": 8.625905949772459e-05, | |
| "loss": 0.9391999053955078, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.4493913307292654, | |
| "grad_norm": 0.8126626014709473, | |
| "learning_rate": 8.615371650092702e-05, | |
| "loss": 0.9358238983154297, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.4512874966817096, | |
| "grad_norm": 0.8614762425422668, | |
| "learning_rate": 8.604837350412945e-05, | |
| "loss": 0.9303498077392578, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.4531836626341537, | |
| "grad_norm": 0.8028171062469482, | |
| "learning_rate": 8.594408393729986e-05, | |
| "loss": 0.9305805969238281, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.4550798285865979, | |
| "grad_norm": 0.8973707556724548, | |
| "learning_rate": 8.583874094050229e-05, | |
| "loss": 0.927711410522461, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.4550798285865979, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.39, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.73, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.59, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6599999999999999, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.4316413980898389, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.5037738095238096, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.4871808124834046, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.39, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.08900000000000001, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.22666666666666666, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.15999999999999998, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.275, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.62, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.455, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.5449999999999999, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.46, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.76, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.72, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.74, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.41902772468451716, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.5768571428571428, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.479755445861627, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.46, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.10800000000000001, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.3, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.204, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.23, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.54, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.45, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.51, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.32, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.7, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.46, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.58, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.44425507149516064, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.4306904761904762, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.49460617910518223, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.32, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.07, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.15333333333333332, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.11599999999999999, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.32, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.7, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.46, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.58, | |
| "eval_mse-dev_negative_mse": -92.24274444580078, | |
| "eval_runtime": 11.2493, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.4871808124834046, | |
| "eval_steps_per_second": 0.0, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.45697599453904203, | |
| "grad_norm": 0.8175747394561768, | |
| "learning_rate": 8.573339794370471e-05, | |
| "loss": 0.9266593170166015, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.4588721604914862, | |
| "grad_norm": 0.9604556560516357, | |
| "learning_rate": 8.562805494690713e-05, | |
| "loss": 0.9227654266357422, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.4607683264439304, | |
| "grad_norm": 0.82953941822052, | |
| "learning_rate": 8.552271195010956e-05, | |
| "loss": 0.9239090728759766, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.4626644923963745, | |
| "grad_norm": 0.9319136142730713, | |
| "learning_rate": 8.541736895331198e-05, | |
| "loss": 0.9225330352783203, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.4645606583488187, | |
| "grad_norm": 0.8900800943374634, | |
| "learning_rate": 8.531202595651442e-05, | |
| "loss": 0.9169361877441407, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.46645682430126284, | |
| "grad_norm": 0.8238077759742737, | |
| "learning_rate": 8.520668295971684e-05, | |
| "loss": 0.9170392608642578, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.468352990253707, | |
| "grad_norm": 0.9116878509521484, | |
| "learning_rate": 8.510133996291926e-05, | |
| "loss": 0.9195194244384766, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.47024915620615115, | |
| "grad_norm": 0.8857290744781494, | |
| "learning_rate": 8.49959969661217e-05, | |
| "loss": 0.915346450805664, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.47214532215859534, | |
| "grad_norm": 0.8089697360992432, | |
| "learning_rate": 8.489065396932412e-05, | |
| "loss": 0.9137913513183594, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.47404148811103947, | |
| "grad_norm": 0.9027810096740723, | |
| "learning_rate": 8.478531097252656e-05, | |
| "loss": 0.9107527923583985, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.47404148811103947, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.38, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.73, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.54, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6599999999999999, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.4163537056013121, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.4892301587301586, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.47172532243736104, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.38, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.088, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.21333333333333335, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.15800000000000003, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.26, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.605, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.43, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.5449999999999999, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.48, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.8, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.64, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.72, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.41568504331630274, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.5788571428571428, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.48124067192024733, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.48, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.11, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.28, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.196, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.24, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.55, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.42, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.49, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.28, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.66, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.44, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.6, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.4170223678863214, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.39960317460317446, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.46220997295447475, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.28, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.066, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.14666666666666667, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.28, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.66, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.44, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.6, | |
| "eval_mse-dev_negative_mse": -90.7634506225586, | |
| "eval_runtime": 11.8991, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.47172532243736104, | |
| "eval_steps_per_second": 0.0, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.47593765406348365, | |
| "grad_norm": 0.8811827898025513, | |
| "learning_rate": 8.467996797572898e-05, | |
| "loss": 0.913282470703125, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.4778338200159278, | |
| "grad_norm": 0.7843953967094421, | |
| "learning_rate": 8.457462497893141e-05, | |
| "loss": 0.9076313781738281, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.47972998596837196, | |
| "grad_norm": 0.9090595245361328, | |
| "learning_rate": 8.446928198213383e-05, | |
| "loss": 0.9081029510498047, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.4816261519208161, | |
| "grad_norm": 0.9231439828872681, | |
| "learning_rate": 8.436393898533625e-05, | |
| "loss": 0.9092655944824218, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.4835223178732603, | |
| "grad_norm": 0.8273399472236633, | |
| "learning_rate": 8.425859598853869e-05, | |
| "loss": 0.9036608123779297, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.4854184838257044, | |
| "grad_norm": 0.9115743637084961, | |
| "learning_rate": 8.415325299174111e-05, | |
| "loss": 0.9024863433837891, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.4873146497781486, | |
| "grad_norm": 0.8682368993759155, | |
| "learning_rate": 8.404790999494354e-05, | |
| "loss": 0.9058139801025391, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.4892108157305927, | |
| "grad_norm": 0.8775367140769958, | |
| "learning_rate": 8.394256699814596e-05, | |
| "loss": 0.901763916015625, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.4911069816830369, | |
| "grad_norm": 0.8083050847053528, | |
| "learning_rate": 8.383722400134838e-05, | |
| "loss": 0.901358642578125, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.49300314763548103, | |
| "grad_norm": 0.8163812160491943, | |
| "learning_rate": 8.373188100455082e-05, | |
| "loss": 0.8946353912353515, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.49300314763548103, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.38, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.73, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.56, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6399999999999999, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.424081987400691, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.49457936507936506, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.4851060192183279, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.38, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.092, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.22, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.158, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.26, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.63, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.445, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.54, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.48, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.78, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.66, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.7, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.42326811379287077, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.5793888888888888, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.4956820213676064, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.48, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.11599999999999998, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.2866666666666667, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.2, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.24, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.58, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.43, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.5, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.28, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.68, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.46, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.58, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.4248958610085112, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.40976984126984123, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.4745300170690494, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.28, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.068, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.15333333333333332, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.11599999999999999, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.28, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.68, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.46, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.58, | |
| "eval_mse-dev_negative_mse": -89.25623321533203, | |
| "eval_runtime": 10.0875, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.4851060192183279, | |
| "eval_steps_per_second": 0.0, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.4948993135879252, | |
| "grad_norm": 0.8695216178894043, | |
| "learning_rate": 8.362653800775326e-05, | |
| "loss": 0.8982176208496093, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.4967954795403694, | |
| "grad_norm": 0.87025386095047, | |
| "learning_rate": 8.352119501095568e-05, | |
| "loss": 0.8945767211914063, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.49869164549281353, | |
| "grad_norm": 0.8507541418075562, | |
| "learning_rate": 8.34158520141581e-05, | |
| "loss": 0.8941314697265625, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.5005878114452577, | |
| "grad_norm": 0.9079861044883728, | |
| "learning_rate": 8.33115624473285e-05, | |
| "loss": 0.8925470733642578, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.5024839773977019, | |
| "grad_norm": 0.8484945893287659, | |
| "learning_rate": 8.320621945053094e-05, | |
| "loss": 0.8947381591796875, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.504380143350146, | |
| "grad_norm": 0.889153003692627, | |
| "learning_rate": 8.310087645373336e-05, | |
| "loss": 0.89056884765625, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.5062763093025902, | |
| "grad_norm": 0.7697421312332153, | |
| "learning_rate": 8.29955334569358e-05, | |
| "loss": 0.889549560546875, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.5081724752550343, | |
| "grad_norm": 0.8403399586677551, | |
| "learning_rate": 8.289019046013822e-05, | |
| "loss": 0.886633529663086, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.5100686412074785, | |
| "grad_norm": 0.9034698009490967, | |
| "learning_rate": 8.278484746334064e-05, | |
| "loss": 0.8839826965332032, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.5119648071599227, | |
| "grad_norm": 0.8018946051597595, | |
| "learning_rate": 8.267950446654307e-05, | |
| "loss": 0.8764205932617187, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.5119648071599227, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.45000000000000007, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.75, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.56, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6799999999999999, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.4516633759155257, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.5368730158730158, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.5092015395473641, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.45000000000000007, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.092, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.21666666666666667, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.166, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.31000000000000005, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.635, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.44, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.565, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.56, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.8, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.66, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.76, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.4514755786098336, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.6343888888888888, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.5172578575160077, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.56, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.11399999999999999, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.28, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.212, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.28, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.57, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.42, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.53, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.34, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.7, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.46, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.6, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.45185117322121776, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.43935714285714284, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.5011452215787204, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.34, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.07, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.15333333333333332, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.34, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.7, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.46, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.6, | |
| "eval_mse-dev_negative_mse": -87.8038558959961, | |
| "eval_runtime": 12.2079, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.5092015395473641, | |
| "eval_steps_per_second": 0.0, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.5138609731123668, | |
| "grad_norm": 0.8619687557220459, | |
| "learning_rate": 8.257416146974549e-05, | |
| "loss": 0.8859089660644531, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.5157571390648109, | |
| "grad_norm": 0.8811931610107422, | |
| "learning_rate": 8.246881847294792e-05, | |
| "loss": 0.8839226531982421, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.5176533050172551, | |
| "grad_norm": 0.8505755066871643, | |
| "learning_rate": 8.236347547615035e-05, | |
| "loss": 0.8794448852539063, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.5195494709696993, | |
| "grad_norm": 0.8391817212104797, | |
| "learning_rate": 8.225813247935278e-05, | |
| "loss": 0.8790214538574219, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.5214456369221434, | |
| "grad_norm": 0.7982373237609863, | |
| "learning_rate": 8.21527894825552e-05, | |
| "loss": 0.8788404083251953, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.5233418028745875, | |
| "grad_norm": 0.87211674451828, | |
| "learning_rate": 8.204744648575762e-05, | |
| "loss": 0.8779651641845703, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.5252379688270318, | |
| "grad_norm": 0.8461468815803528, | |
| "learning_rate": 8.194210348896006e-05, | |
| "loss": 0.8749393463134766, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.5271341347794759, | |
| "grad_norm": 0.8423062562942505, | |
| "learning_rate": 8.18367604921625e-05, | |
| "loss": 0.8741777038574219, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.52903030073192, | |
| "grad_norm": 0.8545904159545898, | |
| "learning_rate": 8.173141749536492e-05, | |
| "loss": 0.8700465393066407, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.5309264666843642, | |
| "grad_norm": 0.8632199764251709, | |
| "learning_rate": 8.162607449856734e-05, | |
| "loss": 0.8691284942626953, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.5309264666843642, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.39, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.74, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.54, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6599999999999999, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.43119589947238984, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.4965515873015872, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.4856222050166246, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.39, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.09, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.21333333333333332, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.15800000000000003, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.275, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.6200000000000001, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.44, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.5449999999999999, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.46, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.8, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.6, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.72, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.4123353201122342, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.5583888888888888, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.4776237090129175, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.46, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.11199999999999999, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.26666666666666666, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.196, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.23, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.56, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.4, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.49, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.32, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.68, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.48, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.6, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.45005647883254546, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.43471428571428566, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.49362070102033173, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.32, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.068, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.15999999999999998, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.32, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.68, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.48, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.6, | |
| "eval_mse-dev_negative_mse": -86.44185638427734, | |
| "eval_runtime": 14.6199, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.4856222050166246, | |
| "eval_steps_per_second": 0.0, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.5328226326368084, | |
| "grad_norm": 0.9085125923156738, | |
| "learning_rate": 8.152073150176977e-05, | |
| "loss": 0.874675521850586, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.5347187985892525, | |
| "grad_norm": 0.8658029437065125, | |
| "learning_rate": 8.141538850497219e-05, | |
| "loss": 0.8643728637695313, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.5366149645416967, | |
| "grad_norm": 0.9218304753303528, | |
| "learning_rate": 8.131004550817463e-05, | |
| "loss": 0.8673239898681641, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.5385111304941409, | |
| "grad_norm": 0.8571885228157043, | |
| "learning_rate": 8.120470251137705e-05, | |
| "loss": 0.86698486328125, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.540407296446585, | |
| "grad_norm": 0.8248752355575562, | |
| "learning_rate": 8.109935951457947e-05, | |
| "loss": 0.863829116821289, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.5423034623990292, | |
| "grad_norm": 0.9771467447280884, | |
| "learning_rate": 8.09940165177819e-05, | |
| "loss": 0.8649395751953125, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.5441996283514733, | |
| "grad_norm": 0.8203988075256348, | |
| "learning_rate": 8.088867352098432e-05, | |
| "loss": 0.8629121398925781, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.5460957943039175, | |
| "grad_norm": 0.7756925225257874, | |
| "learning_rate": 8.078333052418676e-05, | |
| "loss": 0.8629222106933594, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.5479919602563617, | |
| "grad_norm": 0.8539568781852722, | |
| "learning_rate": 8.067798752738918e-05, | |
| "loss": 0.8591197204589843, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.5498881262088058, | |
| "grad_norm": 0.8543459177017212, | |
| "learning_rate": 8.057264453059162e-05, | |
| "loss": 0.856646499633789, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.5498881262088058, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.37, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.73, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.5800000000000001, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6699999999999999, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.42708582266190265, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.49876190476190474, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.48552032214505464, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.37, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.091, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.2233333333333333, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.162, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.26, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.62, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.45499999999999996, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.5549999999999999, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.44, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.8, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.68, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.74, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.41616973619547015, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.5761666666666667, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.49181648887243534, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.44, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.11599999999999998, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.2866666666666666, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.204, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.22, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.58, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.43, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.51, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.3, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.66, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.48, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.6, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.43800190912833514, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.42135714285714276, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.47922415541767394, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.3, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.066, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.15999999999999998, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.3, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.66, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.48, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.6, | |
| "eval_mse-dev_negative_mse": -85.04077911376953, | |
| "eval_runtime": 12.4825, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.48552032214505464, | |
| "eval_steps_per_second": 0.0, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.5517842921612499, | |
| "grad_norm": 0.8201111555099487, | |
| "learning_rate": 8.046730153379404e-05, | |
| "loss": 0.8587515258789062, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.5536804581136942, | |
| "grad_norm": 0.8306780457496643, | |
| "learning_rate": 8.036195853699647e-05, | |
| "loss": 0.8544799041748047, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.5555766240661383, | |
| "grad_norm": 0.8447550535202026, | |
| "learning_rate": 8.025661554019889e-05, | |
| "loss": 0.8534080505371093, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.5574727900185824, | |
| "grad_norm": 0.8507358431816101, | |
| "learning_rate": 8.015127254340133e-05, | |
| "loss": 0.8543455505371094, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.5593689559710265, | |
| "grad_norm": 0.8200713396072388, | |
| "learning_rate": 8.004592954660375e-05, | |
| "loss": 0.8533712768554688, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.5612651219234708, | |
| "grad_norm": 0.8041396141052246, | |
| "learning_rate": 7.994058654980617e-05, | |
| "loss": 0.8519126129150391, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.5631612878759149, | |
| "grad_norm": 0.8296621441841125, | |
| "learning_rate": 7.98352435530086e-05, | |
| "loss": 0.8486277008056641, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.565057453828359, | |
| "grad_norm": 0.8634279370307922, | |
| "learning_rate": 7.972990055621102e-05, | |
| "loss": 0.8529573822021485, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.5669536197808032, | |
| "grad_norm": 0.9058282375335693, | |
| "learning_rate": 7.962455755941344e-05, | |
| "loss": 0.8476997375488281, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.5688497857332474, | |
| "grad_norm": 0.8404967784881592, | |
| "learning_rate": 7.951921456261588e-05, | |
| "loss": 0.8465479278564453, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.5688497857332474, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.4, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.75, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.61, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6699999999999999, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.44285306339192243, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.5205357142857143, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.5041657136741884, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.4, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.093, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.2366666666666667, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.164, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.275, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.645, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.48, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.5549999999999999, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.5, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.78, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.72, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.76, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.4425880574674547, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.6113333333333334, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.5096854578340355, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.5, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.11399999999999999, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.3066666666666667, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.212, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.25, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.57, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.46, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.53, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.3, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.72, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.5, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.58, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.44311806931639014, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.4297380952380952, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.49864596951434115, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.3, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.07200000000000001, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.16666666666666669, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.11600000000000002, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.3, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.72, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.5, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.58, | |
| "eval_mse-dev_negative_mse": -83.94352722167969, | |
| "eval_runtime": 11.241, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.5041657136741884, | |
| "eval_steps_per_second": 0.0, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.5707459516856915, | |
| "grad_norm": 0.8594946265220642, | |
| "learning_rate": 7.94138715658183e-05, | |
| "loss": 0.8424729156494141, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.5726421176381357, | |
| "grad_norm": 0.8535016775131226, | |
| "learning_rate": 7.930852856902074e-05, | |
| "loss": 0.8437194061279297, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.5745382835905799, | |
| "grad_norm": 0.8929939866065979, | |
| "learning_rate": 7.920318557222316e-05, | |
| "loss": 0.8429566192626953, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.576434449543024, | |
| "grad_norm": 0.7629504203796387, | |
| "learning_rate": 7.909784257542559e-05, | |
| "loss": 0.8431417846679687, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.5783306154954682, | |
| "grad_norm": 0.8285149335861206, | |
| "learning_rate": 7.899355300859598e-05, | |
| "loss": 0.8423690032958985, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.5802267814479123, | |
| "grad_norm": 0.866598904132843, | |
| "learning_rate": 7.888821001179842e-05, | |
| "loss": 0.8403389739990235, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.5821229474003565, | |
| "grad_norm": 0.8084122538566589, | |
| "learning_rate": 7.878286701500086e-05, | |
| "loss": 0.8347031402587891, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.5840191133528007, | |
| "grad_norm": 0.8977468013763428, | |
| "learning_rate": 7.867752401820328e-05, | |
| "loss": 0.8343724822998047, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.5859152793052448, | |
| "grad_norm": 0.8902882933616638, | |
| "learning_rate": 7.85721810214057e-05, | |
| "loss": 0.8348311614990235, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.5878114452576889, | |
| "grad_norm": 0.9056336283683777, | |
| "learning_rate": 7.846683802460813e-05, | |
| "loss": 0.8350757598876953, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.5878114452576889, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.39, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.78, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.5900000000000001, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6799999999999999, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.43741232846707523, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.5121309523809523, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.5043257907139727, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.39, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.096, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.22666666666666668, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.16399999999999998, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.27, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.6599999999999999, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.46499999999999997, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.565, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.48, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.84, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.68, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.74, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.4314881589932104, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.5937380952380952, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.508792106805762, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.48, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.11999999999999998, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.2866666666666667, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.204, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.24, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.6, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.43, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.51, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.3, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.72, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.5, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.62, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.4433364979409401, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.43052380952380953, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.4998594746221832, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.3, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.07200000000000001, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.16666666666666669, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.124, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.3, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.72, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.5, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.62, | |
| "eval_mse-dev_negative_mse": -82.8113021850586, | |
| "eval_runtime": 11.2139, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.5043257907139727, | |
| "eval_steps_per_second": 0.0, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.5897076112101332, | |
| "grad_norm": 0.841434895992279, | |
| "learning_rate": 7.836149502781055e-05, | |
| "loss": 0.8361685943603515, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.5916037771625773, | |
| "grad_norm": 0.8636693358421326, | |
| "learning_rate": 7.825615203101299e-05, | |
| "loss": 0.8306892395019532, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.5934999431150214, | |
| "grad_norm": 0.9691203236579895, | |
| "learning_rate": 7.815080903421541e-05, | |
| "loss": 0.8314771270751953, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.5953961090674655, | |
| "grad_norm": 0.862746000289917, | |
| "learning_rate": 7.804546603741783e-05, | |
| "loss": 0.8310930633544922, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.5972922750199098, | |
| "grad_norm": 0.9316207766532898, | |
| "learning_rate": 7.794012304062026e-05, | |
| "loss": 0.8304837036132813, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.5991884409723539, | |
| "grad_norm": 0.8787679672241211, | |
| "learning_rate": 7.783478004382268e-05, | |
| "loss": 0.8304119873046875, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.601084606924798, | |
| "grad_norm": 0.8498113751411438, | |
| "learning_rate": 7.772943704702512e-05, | |
| "loss": 0.8277024841308593, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.6029807728772422, | |
| "grad_norm": 0.7722318768501282, | |
| "learning_rate": 7.762409405022754e-05, | |
| "loss": 0.8249209594726562, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.6048769388296864, | |
| "grad_norm": 0.8988415598869324, | |
| "learning_rate": 7.751875105342998e-05, | |
| "loss": 0.8261857604980469, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.6067731047821305, | |
| "grad_norm": 0.8066183924674988, | |
| "learning_rate": 7.74134080566324e-05, | |
| "loss": 0.8235664367675781, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.6067731047821305, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.42000000000000004, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.74, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.61, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.65, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.45000936240708805, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.5258571428571428, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.5033699936729163, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.42000000000000004, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.09199999999999998, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.2433333333333333, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.158, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.28500000000000003, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.625, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.495, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.535, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.54, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.82, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.7, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.74, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.45760396761575023, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.6278571428571429, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.525620245048735, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.54, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.11799999999999997, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.3133333333333333, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.204, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.27, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.59, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.47, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.51, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.3, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.66, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.52, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.56, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.4424147571984259, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.42385714285714277, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.48111974229709764, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.3, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.066, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.1733333333333333, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.11200000000000002, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.3, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.66, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.52, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.56, | |
| "eval_mse-dev_negative_mse": -81.73892211914062, | |
| "eval_runtime": 10.8876, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.5033699936729163, | |
| "eval_steps_per_second": 0.0, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.6086692707345747, | |
| "grad_norm": 0.8193183541297913, | |
| "learning_rate": 7.730806505983482e-05, | |
| "loss": 0.82093994140625, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.6105654366870189, | |
| "grad_norm": 0.8819192051887512, | |
| "learning_rate": 7.720272206303726e-05, | |
| "loss": 0.8226362609863281, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.612461602639463, | |
| "grad_norm": 0.8473449349403381, | |
| "learning_rate": 7.709737906623968e-05, | |
| "loss": 0.8206555938720703, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.6143577685919072, | |
| "grad_norm": 0.8858373761177063, | |
| "learning_rate": 7.699203606944211e-05, | |
| "loss": 0.8223712921142579, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.6162539345443513, | |
| "grad_norm": 0.8924335837364197, | |
| "learning_rate": 7.688669307264453e-05, | |
| "loss": 0.8162551879882812, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.6181501004967955, | |
| "grad_norm": 0.9139745235443115, | |
| "learning_rate": 7.678135007584696e-05, | |
| "loss": 0.818095932006836, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.6200462664492397, | |
| "grad_norm": 0.8812312483787537, | |
| "learning_rate": 7.667600707904938e-05, | |
| "loss": 0.8147300720214844, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.6219424324016838, | |
| "grad_norm": 0.8906788229942322, | |
| "learning_rate": 7.657066408225182e-05, | |
| "loss": 0.8169952392578125, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.6238385983541279, | |
| "grad_norm": 0.8133891820907593, | |
| "learning_rate": 7.646532108545424e-05, | |
| "loss": 0.815572509765625, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.6257347643065722, | |
| "grad_norm": 0.8570773601531982, | |
| "learning_rate": 7.635997808865668e-05, | |
| "loss": 0.814079818725586, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.6257347643065722, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.4, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.78, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.5800000000000001, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.62, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.4399737313034061, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.5117857142857143, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.5063679664931301, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.4, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.097, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.22666666666666668, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.15200000000000002, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.28, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.665, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.46499999999999997, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.515, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.48, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.84, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.66, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.7, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.4291126300296302, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.5858809523809524, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.5085208412692355, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.48, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.122, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.2866666666666667, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.196, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.24, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.61, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.43, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.49, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.32, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.72, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.5, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.54, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.450834832577182, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.43769047619047613, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.5042150917170247, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.32, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.07200000000000001, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.16666666666666669, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.10800000000000003, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.32, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.72, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.5, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.54, | |
| "eval_mse-dev_negative_mse": -80.49793243408203, | |
| "eval_runtime": 11.8238, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.5063679664931301, | |
| "eval_steps_per_second": 0.0, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.6276309302590163, | |
| "grad_norm": 0.7868529558181763, | |
| "learning_rate": 7.625568852182707e-05, | |
| "loss": 0.8087701416015625, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.6295270962114604, | |
| "grad_norm": 0.9016054272651672, | |
| "learning_rate": 7.61503455250295e-05, | |
| "loss": 0.8097662353515624, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.6314232621639045, | |
| "grad_norm": 0.9913731217384338, | |
| "learning_rate": 7.604500252823192e-05, | |
| "loss": 0.813260498046875, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.6333194281163488, | |
| "grad_norm": 0.8851051330566406, | |
| "learning_rate": 7.593965953143435e-05, | |
| "loss": 0.8086640167236329, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.6352155940687929, | |
| "grad_norm": 0.8317673206329346, | |
| "learning_rate": 7.583431653463678e-05, | |
| "loss": 0.8086080169677734, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.637111760021237, | |
| "grad_norm": 0.7769960165023804, | |
| "learning_rate": 7.572897353783922e-05, | |
| "loss": 0.8093731689453125, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.6390079259736812, | |
| "grad_norm": 0.8762325644493103, | |
| "learning_rate": 7.562363054104164e-05, | |
| "loss": 0.805412826538086, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.6409040919261254, | |
| "grad_norm": 0.8687974810600281, | |
| "learning_rate": 7.551828754424406e-05, | |
| "loss": 0.8043125915527344, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.6428002578785695, | |
| "grad_norm": 0.868188202381134, | |
| "learning_rate": 7.541294454744649e-05, | |
| "loss": 0.8034954833984375, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.6446964238310137, | |
| "grad_norm": 0.8662635087966919, | |
| "learning_rate": 7.530760155064892e-05, | |
| "loss": 0.799036865234375, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.6446964238310137, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.4, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.77, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.61, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.71, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.44345995358170154, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.5206190476190475, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.5078251449398562, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.4, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.096, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.23333333333333334, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.174, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.28, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.655, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.475, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.585, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.48, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.84, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.72, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.82, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.4349260827283061, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.6048571428571428, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.5166147735280449, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.48, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.122, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.3, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.22799999999999998, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.24, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.61, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.45, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.57, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.32, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.7, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.5, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.6, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.45199382443509706, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.43638095238095226, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.4990355163516675, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.32, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.07, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.16666666666666669, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.32, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.7, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.5, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.6, | |
| "eval_mse-dev_negative_mse": -79.57255554199219, | |
| "eval_runtime": 11.3457, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.5078251449398562, | |
| "eval_steps_per_second": 0.0, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.6465925897834578, | |
| "grad_norm": 0.8791268467903137, | |
| "learning_rate": 7.520225855385135e-05, | |
| "loss": 0.8034612274169922, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.648488755735902, | |
| "grad_norm": 0.9503916501998901, | |
| "learning_rate": 7.509691555705377e-05, | |
| "loss": 0.7990459442138672, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.6503849216883462, | |
| "grad_norm": 0.8711104393005371, | |
| "learning_rate": 7.499157256025619e-05, | |
| "loss": 0.7996244812011719, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.6522810876407903, | |
| "grad_norm": 0.8348352313041687, | |
| "learning_rate": 7.488622956345862e-05, | |
| "loss": 0.8004853820800781, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.6541772535932345, | |
| "grad_norm": 0.8777920007705688, | |
| "learning_rate": 7.478088656666105e-05, | |
| "loss": 0.8000244140625, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.6560734195456787, | |
| "grad_norm": 0.847030758857727, | |
| "learning_rate": 7.467554356986348e-05, | |
| "loss": 0.7975210571289062, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.6579695854981228, | |
| "grad_norm": 0.8619401454925537, | |
| "learning_rate": 7.457020057306591e-05, | |
| "loss": 0.7959075927734375, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.6598657514505669, | |
| "grad_norm": 0.8588744401931763, | |
| "learning_rate": 7.446485757626834e-05, | |
| "loss": 0.7920943450927734, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.6617619174030112, | |
| "grad_norm": 0.7903246879577637, | |
| "learning_rate": 7.435951457947076e-05, | |
| "loss": 0.7915798187255859, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.6636580833554553, | |
| "grad_norm": 0.9617411494255066, | |
| "learning_rate": 7.425417158267318e-05, | |
| "loss": 0.7933383178710938, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.6636580833554553, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.41000000000000003, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.75, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.62, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.6799999999999999, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.45535390379706975, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.526079365079365, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.5121596378017648, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.41000000000000003, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.094, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.2433333333333333, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.16199999999999998, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.29000000000000004, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.645, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.495, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.55, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.48, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.8, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.72, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.78, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.44473741551922635, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.6006666666666667, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.5139083384213542, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.48, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.118, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.3133333333333333, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.20799999999999996, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.24, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.59, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.47, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.52, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.34, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.7, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.52, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.58, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.4659703920749132, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.4514920634920634, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.5104109371821753, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.34, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.07, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.1733333333333333, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.11600000000000002, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.34, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.7, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.52, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.58, | |
| "eval_mse-dev_negative_mse": -78.7884292602539, | |
| "eval_runtime": 10.7989, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.5121596378017648, | |
| "eval_steps_per_second": 0.0, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.6655542493078994, | |
| "grad_norm": 0.8038257956504822, | |
| "learning_rate": 7.414882858587562e-05, | |
| "loss": 0.7908313751220704, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.6674504152603435, | |
| "grad_norm": 0.8573588132858276, | |
| "learning_rate": 7.404453901904602e-05, | |
| "loss": 0.7913258361816407, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.6693465812127878, | |
| "grad_norm": 0.829589307308197, | |
| "learning_rate": 7.393919602224845e-05, | |
| "loss": 0.7921287536621093, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.6712427471652319, | |
| "grad_norm": 0.8911552429199219, | |
| "learning_rate": 7.383385302545088e-05, | |
| "loss": 0.7928565979003906, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.673138913117676, | |
| "grad_norm": 0.9379572868347168, | |
| "learning_rate": 7.37285100286533e-05, | |
| "loss": 0.7914694213867187, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.6750350790701202, | |
| "grad_norm": 0.9253071546554565, | |
| "learning_rate": 7.362316703185572e-05, | |
| "loss": 0.7871210479736328, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.6769312450225644, | |
| "grad_norm": 0.9133068323135376, | |
| "learning_rate": 7.351782403505816e-05, | |
| "loss": 0.7835692596435547, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.6788274109750085, | |
| "grad_norm": 0.8401673436164856, | |
| "learning_rate": 7.341248103826058e-05, | |
| "loss": 0.7804772186279297, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.6807235769274527, | |
| "grad_norm": 0.8454675674438477, | |
| "learning_rate": 7.330713804146301e-05, | |
| "loss": 0.7870156860351563, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.6826197428798968, | |
| "grad_norm": 0.83338463306427, | |
| "learning_rate": 7.320179504466543e-05, | |
| "loss": 0.7796939849853516, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.6826197428798968, | |
| "eval_NanoBEIR_mean_cosine_accuracy@1": 0.42000000000000004, | |
| "eval_NanoBEIR_mean_cosine_accuracy@10": 0.8, | |
| "eval_NanoBEIR_mean_cosine_accuracy@3": 0.64, | |
| "eval_NanoBEIR_mean_cosine_accuracy@5": 0.69, | |
| "eval_NanoBEIR_mean_cosine_map@100": 0.46799146568426697, | |
| "eval_NanoBEIR_mean_cosine_mrr@10": 0.5508928571428571, | |
| "eval_NanoBEIR_mean_cosine_ndcg@10": 0.5353904032358002, | |
| "eval_NanoBEIR_mean_cosine_precision@1": 0.42000000000000004, | |
| "eval_NanoBEIR_mean_cosine_precision@10": 0.099, | |
| "eval_NanoBEIR_mean_cosine_precision@3": 0.2533333333333333, | |
| "eval_NanoBEIR_mean_cosine_precision@5": 0.17, | |
| "eval_NanoBEIR_mean_cosine_recall@1": 0.29000000000000004, | |
| "eval_NanoBEIR_mean_cosine_recall@10": 0.685, | |
| "eval_NanoBEIR_mean_cosine_recall@3": 0.51, | |
| "eval_NanoBEIR_mean_cosine_recall@5": 0.575, | |
| "eval_NanoHotpotQA_cosine_accuracy@1": 0.52, | |
| "eval_NanoHotpotQA_cosine_accuracy@10": 0.84, | |
| "eval_NanoHotpotQA_cosine_accuracy@3": 0.76, | |
| "eval_NanoHotpotQA_cosine_accuracy@5": 0.78, | |
| "eval_NanoHotpotQA_cosine_map@100": 0.47358422601023775, | |
| "eval_NanoHotpotQA_cosine_mrr@10": 0.6494444444444444, | |
| "eval_NanoHotpotQA_cosine_ndcg@10": 0.5456863439791646, | |
| "eval_NanoHotpotQA_cosine_precision@1": 0.52, | |
| "eval_NanoHotpotQA_cosine_precision@10": 0.122, | |
| "eval_NanoHotpotQA_cosine_precision@3": 0.33333333333333326, | |
| "eval_NanoHotpotQA_cosine_precision@5": 0.22, | |
| "eval_NanoHotpotQA_cosine_recall@1": 0.26, | |
| "eval_NanoHotpotQA_cosine_recall@10": 0.61, | |
| "eval_NanoHotpotQA_cosine_recall@3": 0.5, | |
| "eval_NanoHotpotQA_cosine_recall@5": 0.55, | |
| "eval_NanoMSMARCO_cosine_accuracy@1": 0.32, | |
| "eval_NanoMSMARCO_cosine_accuracy@10": 0.76, | |
| "eval_NanoMSMARCO_cosine_accuracy@3": 0.52, | |
| "eval_NanoMSMARCO_cosine_accuracy@5": 0.6, | |
| "eval_NanoMSMARCO_cosine_map@100": 0.4623987053582962, | |
| "eval_NanoMSMARCO_cosine_mrr@10": 0.4523412698412697, | |
| "eval_NanoMSMARCO_cosine_ndcg@10": 0.5250944624924359, | |
| "eval_NanoMSMARCO_cosine_precision@1": 0.32, | |
| "eval_NanoMSMARCO_cosine_precision@10": 0.07600000000000001, | |
| "eval_NanoMSMARCO_cosine_precision@3": 0.1733333333333333, | |
| "eval_NanoMSMARCO_cosine_precision@5": 0.12000000000000002, | |
| "eval_NanoMSMARCO_cosine_recall@1": 0.32, | |
| "eval_NanoMSMARCO_cosine_recall@10": 0.76, | |
| "eval_NanoMSMARCO_cosine_recall@3": 0.52, | |
| "eval_NanoMSMARCO_cosine_recall@5": 0.6, | |
| "eval_mse-dev_negative_mse": -77.74003601074219, | |
| "eval_runtime": 11.1488, | |
| "eval_samples_per_second": 0.0, | |
| "eval_sequential_score": 0.5353904032358002, | |
| "eval_steps_per_second": 0.0, | |
| "step": 36000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 105476, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |