| { | |
| "best_global_step": 2600, | |
| "best_metric": 0.08818545192480087, | |
| "best_model_checkpoint": "./reranker-sweep/b42q86ij/checkpoint-2600", | |
| "epoch": 0.9475218658892128, | |
| "eval_steps": 200, | |
| "global_step": 2600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0036443148688046646, | |
| "grad_norm": 2.5712764263153076, | |
| "learning_rate": 3.2727272727272733e-06, | |
| "loss": 0.3268, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.007288629737609329, | |
| "grad_norm": 3.9628148078918457, | |
| "learning_rate": 6.909090909090909e-06, | |
| "loss": 0.247, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.010932944606413994, | |
| "grad_norm": 2.538698196411133, | |
| "learning_rate": 1.0545454545454546e-05, | |
| "loss": 0.2451, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.014577259475218658, | |
| "grad_norm": 2.876335859298706, | |
| "learning_rate": 1.4181818181818181e-05, | |
| "loss": 0.2029, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.018221574344023325, | |
| "grad_norm": 1.6715284585952759, | |
| "learning_rate": 1.781818181818182e-05, | |
| "loss": 0.1739, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.021865889212827987, | |
| "grad_norm": 1.4855901002883911, | |
| "learning_rate": 2.1454545454545455e-05, | |
| "loss": 0.172, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.025510204081632654, | |
| "grad_norm": 2.0233335494995117, | |
| "learning_rate": 2.5090909090909094e-05, | |
| "loss": 0.1425, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.029154518950437316, | |
| "grad_norm": 1.9063516855239868, | |
| "learning_rate": 2.872727272727273e-05, | |
| "loss": 0.138, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03279883381924198, | |
| "grad_norm": 0.9233603477478027, | |
| "learning_rate": 3.236363636363636e-05, | |
| "loss": 0.1304, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.03644314868804665, | |
| "grad_norm": 1.3970320224761963, | |
| "learning_rate": 3.6e-05, | |
| "loss": 0.1561, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04008746355685131, | |
| "grad_norm": 1.0219364166259766, | |
| "learning_rate": 3.963636363636364e-05, | |
| "loss": 0.1627, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.043731778425655975, | |
| "grad_norm": 1.6836103200912476, | |
| "learning_rate": 4.327272727272728e-05, | |
| "loss": 0.1974, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.047376093294460644, | |
| "grad_norm": 1.486811637878418, | |
| "learning_rate": 4.690909090909091e-05, | |
| "loss": 0.1339, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.05102040816326531, | |
| "grad_norm": 1.230664610862732, | |
| "learning_rate": 5.054545454545455e-05, | |
| "loss": 0.1137, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.05466472303206997, | |
| "grad_norm": 1.133725881576538, | |
| "learning_rate": 5.418181818181819e-05, | |
| "loss": 0.1333, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05830903790087463, | |
| "grad_norm": 1.921517014503479, | |
| "learning_rate": 5.7818181818181815e-05, | |
| "loss": 0.1296, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0619533527696793, | |
| "grad_norm": 0.6957170367240906, | |
| "learning_rate": 6.145454545454545e-05, | |
| "loss": 0.1723, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.06559766763848396, | |
| "grad_norm": 0.40991759300231934, | |
| "learning_rate": 6.50909090909091e-05, | |
| "loss": 0.1099, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.06924198250728864, | |
| "grad_norm": 1.0332207679748535, | |
| "learning_rate": 6.872727272727273e-05, | |
| "loss": 0.1105, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.0728862973760933, | |
| "grad_norm": 0.7601434588432312, | |
| "learning_rate": 7.236363636363637e-05, | |
| "loss": 0.0917, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0728862973760933, | |
| "eval_loss": 0.11327829957008362, | |
| "eval_reranker_map": 0.8637301072112248, | |
| "eval_reranker_mrr@10": 0.915392151575617, | |
| "eval_reranker_ndcg@10": 0.9034123916044116, | |
| "eval_runtime": 21.6475, | |
| "eval_samples_per_second": 1155.192, | |
| "eval_steps_per_second": 18.062, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07653061224489796, | |
| "grad_norm": 1.4077112674713135, | |
| "learning_rate": 7.6e-05, | |
| "loss": 0.1012, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.08017492711370262, | |
| "grad_norm": 0.6944614052772522, | |
| "learning_rate": 7.963636363636364e-05, | |
| "loss": 0.1296, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.08381924198250729, | |
| "grad_norm": 1.064735770225525, | |
| "learning_rate": 8.327272727272728e-05, | |
| "loss": 0.1332, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.08746355685131195, | |
| "grad_norm": 0.7208377122879028, | |
| "learning_rate": 8.690909090909091e-05, | |
| "loss": 0.095, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.09110787172011661, | |
| "grad_norm": 0.5791595578193665, | |
| "learning_rate": 9.054545454545455e-05, | |
| "loss": 0.1351, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.09475218658892129, | |
| "grad_norm": 1.8823211193084717, | |
| "learning_rate": 9.418181818181818e-05, | |
| "loss": 0.1138, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.09839650145772595, | |
| "grad_norm": 1.0496199131011963, | |
| "learning_rate": 9.781818181818183e-05, | |
| "loss": 0.1318, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.10204081632653061, | |
| "grad_norm": 0.8584634065628052, | |
| "learning_rate": 9.983799108950993e-05, | |
| "loss": 0.1164, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.10568513119533528, | |
| "grad_norm": 1.8461506366729736, | |
| "learning_rate": 9.943296881328473e-05, | |
| "loss": 0.1418, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.10932944606413994, | |
| "grad_norm": 1.8940588235855103, | |
| "learning_rate": 9.902794653705955e-05, | |
| "loss": 0.1337, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.1129737609329446, | |
| "grad_norm": 1.089218258857727, | |
| "learning_rate": 9.862292426083435e-05, | |
| "loss": 0.1169, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.11661807580174927, | |
| "grad_norm": 0.6664621233940125, | |
| "learning_rate": 9.821790198460917e-05, | |
| "loss": 0.1314, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.12026239067055394, | |
| "grad_norm": 0.5564493536949158, | |
| "learning_rate": 9.781287970838397e-05, | |
| "loss": 0.1197, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1239067055393586, | |
| "grad_norm": 0.6858499050140381, | |
| "learning_rate": 9.740785743215877e-05, | |
| "loss": 0.1002, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.12755102040816327, | |
| "grad_norm": 0.6210586428642273, | |
| "learning_rate": 9.700283515593358e-05, | |
| "loss": 0.1124, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.13119533527696792, | |
| "grad_norm": 1.4063215255737305, | |
| "learning_rate": 9.65978128797084e-05, | |
| "loss": 0.0932, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.1348396501457726, | |
| "grad_norm": 1.0574997663497925, | |
| "learning_rate": 9.61927906034832e-05, | |
| "loss": 0.1629, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.13848396501457727, | |
| "grad_norm": 1.4113630056381226, | |
| "learning_rate": 9.5787768327258e-05, | |
| "loss": 0.1501, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.14212827988338192, | |
| "grad_norm": 0.9053757786750793, | |
| "learning_rate": 9.538274605103282e-05, | |
| "loss": 0.1097, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.1457725947521866, | |
| "grad_norm": 0.8119211792945862, | |
| "learning_rate": 9.497772377480762e-05, | |
| "loss": 0.0756, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1457725947521866, | |
| "eval_loss": 0.11383406072854996, | |
| "eval_reranker_map": 0.8594534683785606, | |
| "eval_reranker_mrr@10": 0.9061123694475903, | |
| "eval_reranker_ndcg@10": 0.8983525842618318, | |
| "eval_runtime": 21.3897, | |
| "eval_samples_per_second": 1169.111, | |
| "eval_steps_per_second": 18.28, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.14941690962099125, | |
| "grad_norm": 0.8926772475242615, | |
| "learning_rate": 9.457270149858242e-05, | |
| "loss": 0.1174, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.15306122448979592, | |
| "grad_norm": 1.475794792175293, | |
| "learning_rate": 9.416767922235723e-05, | |
| "loss": 0.1472, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.15670553935860057, | |
| "grad_norm": 0.6618950963020325, | |
| "learning_rate": 9.376265694613204e-05, | |
| "loss": 0.1391, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.16034985422740525, | |
| "grad_norm": 1.0769526958465576, | |
| "learning_rate": 9.335763466990685e-05, | |
| "loss": 0.1188, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.16399416909620992, | |
| "grad_norm": 0.5829262733459473, | |
| "learning_rate": 9.295261239368166e-05, | |
| "loss": 0.1555, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.16763848396501457, | |
| "grad_norm": 1.0533019304275513, | |
| "learning_rate": 9.254759011745647e-05, | |
| "loss": 0.1148, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.17128279883381925, | |
| "grad_norm": 0.5009166598320007, | |
| "learning_rate": 9.214256784123127e-05, | |
| "loss": 0.0753, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.1749271137026239, | |
| "grad_norm": 1.3245645761489868, | |
| "learning_rate": 9.173754556500607e-05, | |
| "loss": 0.104, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.17857142857142858, | |
| "grad_norm": 1.018767237663269, | |
| "learning_rate": 9.133252328878089e-05, | |
| "loss": 0.1313, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.18221574344023322, | |
| "grad_norm": 0.7106571793556213, | |
| "learning_rate": 9.09275010125557e-05, | |
| "loss": 0.1125, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1858600583090379, | |
| "grad_norm": 1.6412338018417358, | |
| "learning_rate": 9.052247873633051e-05, | |
| "loss": 0.0772, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.18950437317784258, | |
| "grad_norm": 0.652991771697998, | |
| "learning_rate": 9.011745646010531e-05, | |
| "loss": 0.1045, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.19314868804664723, | |
| "grad_norm": 1.3570860624313354, | |
| "learning_rate": 8.971243418388012e-05, | |
| "loss": 0.1101, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.1967930029154519, | |
| "grad_norm": 0.8085044622421265, | |
| "learning_rate": 8.930741190765492e-05, | |
| "loss": 0.109, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.20043731778425655, | |
| "grad_norm": 0.69944828748703, | |
| "learning_rate": 8.890238963142972e-05, | |
| "loss": 0.124, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.20408163265306123, | |
| "grad_norm": 0.6185526847839355, | |
| "learning_rate": 8.849736735520454e-05, | |
| "loss": 0.0934, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.20772594752186588, | |
| "grad_norm": 0.6213017702102661, | |
| "learning_rate": 8.809234507897934e-05, | |
| "loss": 0.1305, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.21137026239067055, | |
| "grad_norm": 0.5960863828659058, | |
| "learning_rate": 8.768732280275416e-05, | |
| "loss": 0.1163, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.21501457725947523, | |
| "grad_norm": 0.899186909198761, | |
| "learning_rate": 8.728230052652896e-05, | |
| "loss": 0.1004, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.21865889212827988, | |
| "grad_norm": 1.859052300453186, | |
| "learning_rate": 8.687727825030377e-05, | |
| "loss": 0.0917, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.21865889212827988, | |
| "eval_loss": 0.12061735987663269, | |
| "eval_reranker_map": 0.8625884506186454, | |
| "eval_reranker_mrr@10": 0.9138017041471176, | |
| "eval_reranker_ndcg@10": 0.9024518526276585, | |
| "eval_runtime": 21.4146, | |
| "eval_samples_per_second": 1167.756, | |
| "eval_steps_per_second": 18.259, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.22230320699708456, | |
| "grad_norm": 1.2568275928497314, | |
| "learning_rate": 8.647225597407857e-05, | |
| "loss": 0.0942, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.2259475218658892, | |
| "grad_norm": 0.7559579610824585, | |
| "learning_rate": 8.606723369785339e-05, | |
| "loss": 0.1223, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.22959183673469388, | |
| "grad_norm": 0.5116895437240601, | |
| "learning_rate": 8.566221142162819e-05, | |
| "loss": 0.1156, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.23323615160349853, | |
| "grad_norm": 0.9851139783859253, | |
| "learning_rate": 8.525718914540301e-05, | |
| "loss": 0.0924, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.2368804664723032, | |
| "grad_norm": 1.11528480052948, | |
| "learning_rate": 8.485216686917781e-05, | |
| "loss": 0.1372, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.24052478134110788, | |
| "grad_norm": 1.3869421482086182, | |
| "learning_rate": 8.444714459295261e-05, | |
| "loss": 0.0984, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.24416909620991253, | |
| "grad_norm": 0.7908770442008972, | |
| "learning_rate": 8.404212231672742e-05, | |
| "loss": 0.0876, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.2478134110787172, | |
| "grad_norm": 0.7059890627861023, | |
| "learning_rate": 8.363710004050223e-05, | |
| "loss": 0.0926, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.25145772594752186, | |
| "grad_norm": 0.9383406043052673, | |
| "learning_rate": 8.323207776427704e-05, | |
| "loss": 0.0819, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.25510204081632654, | |
| "grad_norm": 0.6492351293563843, | |
| "learning_rate": 8.282705548805186e-05, | |
| "loss": 0.1034, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2587463556851312, | |
| "grad_norm": 1.1460670232772827, | |
| "learning_rate": 8.242203321182666e-05, | |
| "loss": 0.1022, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.26239067055393583, | |
| "grad_norm": 0.6701614260673523, | |
| "learning_rate": 8.201701093560146e-05, | |
| "loss": 0.0661, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2660349854227405, | |
| "grad_norm": 1.4577497243881226, | |
| "learning_rate": 8.161198865937626e-05, | |
| "loss": 0.124, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.2696793002915452, | |
| "grad_norm": 0.602418065071106, | |
| "learning_rate": 8.120696638315107e-05, | |
| "loss": 0.1231, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.27332361516034986, | |
| "grad_norm": 0.9659672975540161, | |
| "learning_rate": 8.080194410692589e-05, | |
| "loss": 0.1307, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.27696793002915454, | |
| "grad_norm": 0.3637723922729492, | |
| "learning_rate": 8.039692183070069e-05, | |
| "loss": 0.0973, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.28061224489795916, | |
| "grad_norm": 0.8434582352638245, | |
| "learning_rate": 7.99918995544755e-05, | |
| "loss": 0.0721, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.28425655976676384, | |
| "grad_norm": 0.7171585559844971, | |
| "learning_rate": 7.958687727825031e-05, | |
| "loss": 0.0734, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2879008746355685, | |
| "grad_norm": 0.7634996175765991, | |
| "learning_rate": 7.918185500202511e-05, | |
| "loss": 0.0806, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.2915451895043732, | |
| "grad_norm": 0.7262800931930542, | |
| "learning_rate": 7.877683272579992e-05, | |
| "loss": 0.0824, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2915451895043732, | |
| "eval_loss": 0.09958108514547348, | |
| "eval_reranker_map": 0.869804989912677, | |
| "eval_reranker_mrr@10": 0.9200749609016879, | |
| "eval_reranker_ndcg@10": 0.9079497888185337, | |
| "eval_runtime": 21.3269, | |
| "eval_samples_per_second": 1172.559, | |
| "eval_steps_per_second": 18.334, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.29518950437317787, | |
| "grad_norm": 0.26812535524368286, | |
| "learning_rate": 7.837181044957473e-05, | |
| "loss": 0.1037, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.2988338192419825, | |
| "grad_norm": 0.5044072866439819, | |
| "learning_rate": 7.796678817334954e-05, | |
| "loss": 0.0771, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.30247813411078717, | |
| "grad_norm": 0.6845607757568359, | |
| "learning_rate": 7.756176589712435e-05, | |
| "loss": 0.1407, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.30612244897959184, | |
| "grad_norm": 0.9331074953079224, | |
| "learning_rate": 7.715674362089916e-05, | |
| "loss": 0.1196, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3097667638483965, | |
| "grad_norm": 1.0579673051834106, | |
| "learning_rate": 7.675172134467397e-05, | |
| "loss": 0.1087, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.31341107871720114, | |
| "grad_norm": 1.4271413087844849, | |
| "learning_rate": 7.634669906844876e-05, | |
| "loss": 0.0737, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.3170553935860058, | |
| "grad_norm": 0.9227479100227356, | |
| "learning_rate": 7.594167679222357e-05, | |
| "loss": 0.0986, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.3206997084548105, | |
| "grad_norm": 0.4991419017314911, | |
| "learning_rate": 7.553665451599838e-05, | |
| "loss": 0.1042, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.32434402332361517, | |
| "grad_norm": 0.9503481984138489, | |
| "learning_rate": 7.513163223977319e-05, | |
| "loss": 0.0971, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.32798833819241985, | |
| "grad_norm": 3.130014181137085, | |
| "learning_rate": 7.4726609963548e-05, | |
| "loss": 0.0824, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.33163265306122447, | |
| "grad_norm": 0.8585525155067444, | |
| "learning_rate": 7.43215876873228e-05, | |
| "loss": 0.0842, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.33527696793002915, | |
| "grad_norm": 0.7285027503967285, | |
| "learning_rate": 7.391656541109761e-05, | |
| "loss": 0.1361, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.3389212827988338, | |
| "grad_norm": 0.4131384789943695, | |
| "learning_rate": 7.351154313487241e-05, | |
| "loss": 0.086, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.3425655976676385, | |
| "grad_norm": 0.9633147120475769, | |
| "learning_rate": 7.310652085864723e-05, | |
| "loss": 0.0861, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.3462099125364432, | |
| "grad_norm": 0.9654539227485657, | |
| "learning_rate": 7.270149858242203e-05, | |
| "loss": 0.1039, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3498542274052478, | |
| "grad_norm": 1.1640666723251343, | |
| "learning_rate": 7.229647630619685e-05, | |
| "loss": 0.1085, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.3534985422740525, | |
| "grad_norm": 1.0129121541976929, | |
| "learning_rate": 7.189145402997165e-05, | |
| "loss": 0.1316, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.35714285714285715, | |
| "grad_norm": 1.2108495235443115, | |
| "learning_rate": 7.148643175374647e-05, | |
| "loss": 0.0806, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.3607871720116618, | |
| "grad_norm": 1.3395358324050903, | |
| "learning_rate": 7.108140947752126e-05, | |
| "loss": 0.0873, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.36443148688046645, | |
| "grad_norm": 0.7182661294937134, | |
| "learning_rate": 7.067638720129608e-05, | |
| "loss": 0.0952, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.36443148688046645, | |
| "eval_loss": 0.09812270849943161, | |
| "eval_reranker_map": 0.8729628573406761, | |
| "eval_reranker_mrr@10": 0.920727498247317, | |
| "eval_reranker_ndcg@10": 0.9101050014801282, | |
| "eval_runtime": 21.2214, | |
| "eval_samples_per_second": 1178.387, | |
| "eval_steps_per_second": 18.425, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3680758017492711, | |
| "grad_norm": 0.8469423651695251, | |
| "learning_rate": 7.027136492507088e-05, | |
| "loss": 0.1194, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.3717201166180758, | |
| "grad_norm": 1.2186115980148315, | |
| "learning_rate": 6.98663426488457e-05, | |
| "loss": 0.1114, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.3753644314868805, | |
| "grad_norm": 1.040454626083374, | |
| "learning_rate": 6.94613203726205e-05, | |
| "loss": 0.122, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.37900874635568516, | |
| "grad_norm": 0.5609027147293091, | |
| "learning_rate": 6.90562980963953e-05, | |
| "loss": 0.094, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.3826530612244898, | |
| "grad_norm": 1.0195974111557007, | |
| "learning_rate": 6.865127582017012e-05, | |
| "loss": 0.0971, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.38629737609329445, | |
| "grad_norm": 0.67203289270401, | |
| "learning_rate": 6.824625354394491e-05, | |
| "loss": 0.1285, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.38994169096209913, | |
| "grad_norm": 0.3070160746574402, | |
| "learning_rate": 6.784123126771973e-05, | |
| "loss": 0.103, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.3935860058309038, | |
| "grad_norm": 1.0986084938049316, | |
| "learning_rate": 6.743620899149453e-05, | |
| "loss": 0.1065, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.39723032069970843, | |
| "grad_norm": 1.3802062273025513, | |
| "learning_rate": 6.703118671526935e-05, | |
| "loss": 0.0885, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.4008746355685131, | |
| "grad_norm": 2.0906848907470703, | |
| "learning_rate": 6.662616443904415e-05, | |
| "loss": 0.1022, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.4045189504373178, | |
| "grad_norm": 1.3855655193328857, | |
| "learning_rate": 6.622114216281897e-05, | |
| "loss": 0.1129, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.40816326530612246, | |
| "grad_norm": 0.9681763648986816, | |
| "learning_rate": 6.581611988659376e-05, | |
| "loss": 0.1229, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.41180758017492713, | |
| "grad_norm": 1.2986786365509033, | |
| "learning_rate": 6.541109761036857e-05, | |
| "loss": 0.0999, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.41545189504373176, | |
| "grad_norm": 0.4236406981945038, | |
| "learning_rate": 6.500607533414338e-05, | |
| "loss": 0.0879, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.41909620991253643, | |
| "grad_norm": 0.4405617117881775, | |
| "learning_rate": 6.46010530579182e-05, | |
| "loss": 0.0763, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.4227405247813411, | |
| "grad_norm": 0.7291234731674194, | |
| "learning_rate": 6.4196030781693e-05, | |
| "loss": 0.0852, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.4263848396501458, | |
| "grad_norm": 1.0762977600097656, | |
| "learning_rate": 6.379100850546781e-05, | |
| "loss": 0.0914, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.43002915451895046, | |
| "grad_norm": 1.2228280305862427, | |
| "learning_rate": 6.338598622924262e-05, | |
| "loss": 0.1004, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.4336734693877551, | |
| "grad_norm": 0.7461398243904114, | |
| "learning_rate": 6.298096395301742e-05, | |
| "loss": 0.1143, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.43731778425655976, | |
| "grad_norm": 0.942706286907196, | |
| "learning_rate": 6.257594167679222e-05, | |
| "loss": 0.1364, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.43731778425655976, | |
| "eval_loss": 0.09395871311426163, | |
| "eval_reranker_map": 0.8911714470545177, | |
| "eval_reranker_mrr@10": 0.9400510525085836, | |
| "eval_reranker_ndcg@10": 0.924593388432475, | |
| "eval_runtime": 21.2854, | |
| "eval_samples_per_second": 1174.843, | |
| "eval_steps_per_second": 18.369, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.44096209912536444, | |
| "grad_norm": 0.8438668251037598, | |
| "learning_rate": 6.217091940056703e-05, | |
| "loss": 0.1017, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.4446064139941691, | |
| "grad_norm": 0.6174122095108032, | |
| "learning_rate": 6.176589712434184e-05, | |
| "loss": 0.09, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.44825072886297374, | |
| "grad_norm": 0.6999545097351074, | |
| "learning_rate": 6.136087484811665e-05, | |
| "loss": 0.0687, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.4518950437317784, | |
| "grad_norm": 1.237770676612854, | |
| "learning_rate": 6.095585257189146e-05, | |
| "loss": 0.0733, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.4555393586005831, | |
| "grad_norm": 0.933530330657959, | |
| "learning_rate": 6.055083029566626e-05, | |
| "loss": 0.1049, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.45918367346938777, | |
| "grad_norm": 1.1076560020446777, | |
| "learning_rate": 6.014580801944108e-05, | |
| "loss": 0.0918, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.46282798833819244, | |
| "grad_norm": 1.030834674835205, | |
| "learning_rate": 5.9740785743215874e-05, | |
| "loss": 0.0848, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.46647230320699706, | |
| "grad_norm": 1.2574701309204102, | |
| "learning_rate": 5.933576346699069e-05, | |
| "loss": 0.0736, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.47011661807580174, | |
| "grad_norm": 0.9184115529060364, | |
| "learning_rate": 5.8930741190765494e-05, | |
| "loss": 0.1129, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.4737609329446064, | |
| "grad_norm": 0.5017003417015076, | |
| "learning_rate": 5.8525718914540305e-05, | |
| "loss": 0.0713, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.4774052478134111, | |
| "grad_norm": 0.5321061015129089, | |
| "learning_rate": 5.812069663831511e-05, | |
| "loss": 0.0876, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.48104956268221577, | |
| "grad_norm": 0.43396979570388794, | |
| "learning_rate": 5.7715674362089925e-05, | |
| "loss": 0.0866, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.4846938775510204, | |
| "grad_norm": 0.6829825043678284, | |
| "learning_rate": 5.731065208586472e-05, | |
| "loss": 0.1016, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.48833819241982507, | |
| "grad_norm": 1.6625821590423584, | |
| "learning_rate": 5.690562980963954e-05, | |
| "loss": 0.1061, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.49198250728862974, | |
| "grad_norm": 0.20715846121311188, | |
| "learning_rate": 5.650060753341434e-05, | |
| "loss": 0.0791, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.4956268221574344, | |
| "grad_norm": 0.8938704133033752, | |
| "learning_rate": 5.6095585257189145e-05, | |
| "loss": 0.0938, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.49927113702623904, | |
| "grad_norm": 0.5026534199714661, | |
| "learning_rate": 5.5690562980963955e-05, | |
| "loss": 0.1235, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.5029154518950437, | |
| "grad_norm": 0.47632285952568054, | |
| "learning_rate": 5.528554070473876e-05, | |
| "loss": 0.0693, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.5065597667638484, | |
| "grad_norm": 1.092860460281372, | |
| "learning_rate": 5.4880518428513575e-05, | |
| "loss": 0.065, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.5102040816326531, | |
| "grad_norm": 0.9408676624298096, | |
| "learning_rate": 5.447549615228837e-05, | |
| "loss": 0.0839, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5102040816326531, | |
| "eval_loss": 0.10072223097085953, | |
| "eval_reranker_map": 0.8862202409768546, | |
| "eval_reranker_mrr@10": 0.9337876826834923, | |
| "eval_reranker_ndcg@10": 0.9214271556218577, | |
| "eval_runtime": 22.7831, | |
| "eval_samples_per_second": 1097.612, | |
| "eval_steps_per_second": 17.162, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5138483965014577, | |
| "grad_norm": 2.5904500484466553, | |
| "learning_rate": 5.407047387606319e-05, | |
| "loss": 0.0914, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.5174927113702624, | |
| "grad_norm": 0.23948027193546295, | |
| "learning_rate": 5.366545159983799e-05, | |
| "loss": 0.0786, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.5211370262390671, | |
| "grad_norm": 0.8241820931434631, | |
| "learning_rate": 5.32604293236128e-05, | |
| "loss": 0.0916, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.5247813411078717, | |
| "grad_norm": 0.5767093896865845, | |
| "learning_rate": 5.2855407047387605e-05, | |
| "loss": 0.0606, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.5284256559766763, | |
| "grad_norm": 1.3108361959457397, | |
| "learning_rate": 5.245038477116242e-05, | |
| "loss": 0.1417, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.532069970845481, | |
| "grad_norm": 1.309979796409607, | |
| "learning_rate": 5.2045362494937225e-05, | |
| "loss": 0.0856, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.5357142857142857, | |
| "grad_norm": 1.250825047492981, | |
| "learning_rate": 5.1640340218712035e-05, | |
| "loss": 0.0865, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.5393586005830904, | |
| "grad_norm": 0.47954457998275757, | |
| "learning_rate": 5.123531794248684e-05, | |
| "loss": 0.0917, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.543002915451895, | |
| "grad_norm": 0.6852472424507141, | |
| "learning_rate": 5.083029566626165e-05, | |
| "loss": 0.0774, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.5466472303206997, | |
| "grad_norm": 0.8546938300132751, | |
| "learning_rate": 5.042527339003645e-05, | |
| "loss": 0.0951, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5502915451895044, | |
| "grad_norm": 0.9142554998397827, | |
| "learning_rate": 5.002025111381127e-05, | |
| "loss": 0.074, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.5539358600583091, | |
| "grad_norm": 0.6394762396812439, | |
| "learning_rate": 4.961522883758607e-05, | |
| "loss": 0.0797, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.5575801749271136, | |
| "grad_norm": 1.9928357601165771, | |
| "learning_rate": 4.9210206561360876e-05, | |
| "loss": 0.0817, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.5612244897959183, | |
| "grad_norm": 0.917662501335144, | |
| "learning_rate": 4.8805184285135686e-05, | |
| "loss": 0.1137, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.564868804664723, | |
| "grad_norm": 1.1721371412277222, | |
| "learning_rate": 4.8400162008910496e-05, | |
| "loss": 0.1139, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5685131195335277, | |
| "grad_norm": 0.9727454781532288, | |
| "learning_rate": 4.79951397326853e-05, | |
| "loss": 0.0889, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.5721574344023324, | |
| "grad_norm": 0.8819296360015869, | |
| "learning_rate": 4.759011745646011e-05, | |
| "loss": 0.1075, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.575801749271137, | |
| "grad_norm": 0.7837637662887573, | |
| "learning_rate": 4.718509518023492e-05, | |
| "loss": 0.1021, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.5794460641399417, | |
| "grad_norm": 0.5388606190681458, | |
| "learning_rate": 4.678007290400972e-05, | |
| "loss": 0.1115, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.5830903790087464, | |
| "grad_norm": 1.0936598777770996, | |
| "learning_rate": 4.637505062778453e-05, | |
| "loss": 0.1047, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.5830903790087464, | |
| "eval_loss": 0.095162533223629, | |
| "eval_reranker_map": 0.8900710013122974, | |
| "eval_reranker_mrr@10": 0.9361263909112154, | |
| "eval_reranker_ndcg@10": 0.9229002314461462, | |
| "eval_runtime": 21.9119, | |
| "eval_samples_per_second": 1141.25, | |
| "eval_steps_per_second": 17.844, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.5867346938775511, | |
| "grad_norm": 2.3125267028808594, | |
| "learning_rate": 4.5970028351559336e-05, | |
| "loss": 0.1056, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.5903790087463557, | |
| "grad_norm": 1.1802173852920532, | |
| "learning_rate": 4.5565006075334146e-05, | |
| "loss": 0.116, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.5940233236151603, | |
| "grad_norm": 0.9869738221168518, | |
| "learning_rate": 4.515998379910895e-05, | |
| "loss": 0.0989, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.597667638483965, | |
| "grad_norm": 0.7939630150794983, | |
| "learning_rate": 4.475496152288376e-05, | |
| "loss": 0.1102, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.6013119533527697, | |
| "grad_norm": 0.36633291840553284, | |
| "learning_rate": 4.434993924665857e-05, | |
| "loss": 0.1006, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.6049562682215743, | |
| "grad_norm": 2.6413233280181885, | |
| "learning_rate": 4.394491697043337e-05, | |
| "loss": 0.0956, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.608600583090379, | |
| "grad_norm": 0.8255997896194458, | |
| "learning_rate": 4.353989469420818e-05, | |
| "loss": 0.1003, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.6122448979591837, | |
| "grad_norm": 0.9502624869346619, | |
| "learning_rate": 4.313487241798299e-05, | |
| "loss": 0.0984, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.6158892128279884, | |
| "grad_norm": 1.0512892007827759, | |
| "learning_rate": 4.2729850141757796e-05, | |
| "loss": 0.0734, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.619533527696793, | |
| "grad_norm": 1.4372130632400513, | |
| "learning_rate": 4.2324827865532607e-05, | |
| "loss": 0.079, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.6231778425655977, | |
| "grad_norm": 0.44758155941963196, | |
| "learning_rate": 4.1919805589307417e-05, | |
| "loss": 0.0872, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.6268221574344023, | |
| "grad_norm": 0.7648677825927734, | |
| "learning_rate": 4.151478331308222e-05, | |
| "loss": 0.1077, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.630466472303207, | |
| "grad_norm": 1.3461487293243408, | |
| "learning_rate": 4.110976103685703e-05, | |
| "loss": 0.0833, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.6341107871720116, | |
| "grad_norm": 1.056250810623169, | |
| "learning_rate": 4.070473876063184e-05, | |
| "loss": 0.0984, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.6377551020408163, | |
| "grad_norm": 1.349318265914917, | |
| "learning_rate": 4.0299716484406643e-05, | |
| "loss": 0.0727, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.641399416909621, | |
| "grad_norm": 1.0693271160125732, | |
| "learning_rate": 3.9894694208181453e-05, | |
| "loss": 0.1062, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.6450437317784257, | |
| "grad_norm": 1.0974327325820923, | |
| "learning_rate": 3.948967193195626e-05, | |
| "loss": 0.1013, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.6486880466472303, | |
| "grad_norm": 1.0591496229171753, | |
| "learning_rate": 3.908464965573107e-05, | |
| "loss": 0.0892, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.652332361516035, | |
| "grad_norm": 0.9455766081809998, | |
| "learning_rate": 3.867962737950587e-05, | |
| "loss": 0.0765, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.6559766763848397, | |
| "grad_norm": 0.9994630217552185, | |
| "learning_rate": 3.827460510328068e-05, | |
| "loss": 0.0698, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.6559766763848397, | |
| "eval_loss": 0.09617751091718674, | |
| "eval_reranker_map": 0.886651874259156, | |
| "eval_reranker_mrr@10": 0.9332937856154163, | |
| "eval_reranker_ndcg@10": 0.920815981670818, | |
| "eval_runtime": 21.5965, | |
| "eval_samples_per_second": 1157.917, | |
| "eval_steps_per_second": 18.105, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.6596209912536443, | |
| "grad_norm": 0.2373291403055191, | |
| "learning_rate": 3.786958282705549e-05, | |
| "loss": 0.0658, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.6632653061224489, | |
| "grad_norm": 1.3221744298934937, | |
| "learning_rate": 3.7464560550830294e-05, | |
| "loss": 0.1386, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.6669096209912536, | |
| "grad_norm": 0.5240024924278259, | |
| "learning_rate": 3.7059538274605104e-05, | |
| "loss": 0.1094, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.6705539358600583, | |
| "grad_norm": 1.2826807498931885, | |
| "learning_rate": 3.6654515998379914e-05, | |
| "loss": 0.103, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.674198250728863, | |
| "grad_norm": 0.7587610483169556, | |
| "learning_rate": 3.624949372215472e-05, | |
| "loss": 0.1075, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.6778425655976676, | |
| "grad_norm": 0.8283730149269104, | |
| "learning_rate": 3.584447144592953e-05, | |
| "loss": 0.091, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.6814868804664723, | |
| "grad_norm": 1.2735215425491333, | |
| "learning_rate": 3.543944916970434e-05, | |
| "loss": 0.106, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.685131195335277, | |
| "grad_norm": 1.3818174600601196, | |
| "learning_rate": 3.503442689347915e-05, | |
| "loss": 0.0753, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.6887755102040817, | |
| "grad_norm": 0.7963125109672546, | |
| "learning_rate": 3.462940461725395e-05, | |
| "loss": 0.0685, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.6924198250728864, | |
| "grad_norm": 1.5980868339538574, | |
| "learning_rate": 3.422438234102876e-05, | |
| "loss": 0.1045, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.6960641399416909, | |
| "grad_norm": 0.34244367480278015, | |
| "learning_rate": 3.381936006480357e-05, | |
| "loss": 0.087, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.6997084548104956, | |
| "grad_norm": 1.120538592338562, | |
| "learning_rate": 3.3414337788578374e-05, | |
| "loss": 0.0866, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.7033527696793003, | |
| "grad_norm": 1.1474584341049194, | |
| "learning_rate": 3.300931551235318e-05, | |
| "loss": 0.1253, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.706997084548105, | |
| "grad_norm": 1.1841405630111694, | |
| "learning_rate": 3.260429323612799e-05, | |
| "loss": 0.0915, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.7106413994169096, | |
| "grad_norm": 0.7076147198677063, | |
| "learning_rate": 3.219927095990279e-05, | |
| "loss": 0.061, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 0.8037185668945312, | |
| "learning_rate": 3.17942486836776e-05, | |
| "loss": 0.0744, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.717930029154519, | |
| "grad_norm": 0.6356151103973389, | |
| "learning_rate": 3.138922640745241e-05, | |
| "loss": 0.0643, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.7215743440233237, | |
| "grad_norm": 0.5353262424468994, | |
| "learning_rate": 3.098420413122722e-05, | |
| "loss": 0.0571, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.7252186588921283, | |
| "grad_norm": 1.3092989921569824, | |
| "learning_rate": 3.0579181855002025e-05, | |
| "loss": 0.1004, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.7288629737609329, | |
| "grad_norm": 1.03451669216156, | |
| "learning_rate": 3.0174159578776835e-05, | |
| "loss": 0.1075, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.7288629737609329, | |
| "eval_loss": 0.09355045855045319, | |
| "eval_reranker_map": 0.8897058240573428, | |
| "eval_reranker_mrr@10": 0.9358846105448597, | |
| "eval_reranker_ndcg@10": 0.9236999900731797, | |
| "eval_runtime": 21.3664, | |
| "eval_samples_per_second": 1170.39, | |
| "eval_steps_per_second": 18.3, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.7325072886297376, | |
| "grad_norm": 0.6066477298736572, | |
| "learning_rate": 2.976913730255164e-05, | |
| "loss": 0.0637, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.7361516034985423, | |
| "grad_norm": 0.7921279668807983, | |
| "learning_rate": 2.9364115026326448e-05, | |
| "loss": 0.1167, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.7397959183673469, | |
| "grad_norm": 1.2112895250320435, | |
| "learning_rate": 2.8959092750101258e-05, | |
| "loss": 0.1113, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.7434402332361516, | |
| "grad_norm": 1.007468342781067, | |
| "learning_rate": 2.8554070473876065e-05, | |
| "loss": 0.1314, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.7470845481049563, | |
| "grad_norm": 0.6368584036827087, | |
| "learning_rate": 2.8149048197650875e-05, | |
| "loss": 0.0764, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.750728862973761, | |
| "grad_norm": 0.33551472425460815, | |
| "learning_rate": 2.774402592142568e-05, | |
| "loss": 0.1297, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.7543731778425656, | |
| "grad_norm": 0.8485022187232971, | |
| "learning_rate": 2.733900364520049e-05, | |
| "loss": 0.0841, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.7580174927113703, | |
| "grad_norm": 0.9928489923477173, | |
| "learning_rate": 2.69339813689753e-05, | |
| "loss": 0.0967, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.7616618075801749, | |
| "grad_norm": 1.4591295719146729, | |
| "learning_rate": 2.6528959092750105e-05, | |
| "loss": 0.0916, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.7653061224489796, | |
| "grad_norm": 0.7156339287757874, | |
| "learning_rate": 2.612393681652491e-05, | |
| "loss": 0.1196, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.7689504373177842, | |
| "grad_norm": 1.2134402990341187, | |
| "learning_rate": 2.5718914540299715e-05, | |
| "loss": 0.1072, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.7725947521865889, | |
| "grad_norm": 1.509474754333496, | |
| "learning_rate": 2.5313892264074522e-05, | |
| "loss": 0.0974, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.7762390670553936, | |
| "grad_norm": 0.7905510067939758, | |
| "learning_rate": 2.4908869987849335e-05, | |
| "loss": 0.0772, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.7798833819241983, | |
| "grad_norm": 1.0353072881698608, | |
| "learning_rate": 2.450384771162414e-05, | |
| "loss": 0.1147, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.7835276967930029, | |
| "grad_norm": 0.3747937083244324, | |
| "learning_rate": 2.409882543539895e-05, | |
| "loss": 0.1003, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.7871720116618076, | |
| "grad_norm": 0.6658405661582947, | |
| "learning_rate": 2.3693803159173755e-05, | |
| "loss": 0.0944, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.7908163265306123, | |
| "grad_norm": 0.6835037469863892, | |
| "learning_rate": 2.3288780882948562e-05, | |
| "loss": 0.0886, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.7944606413994169, | |
| "grad_norm": 0.45619040727615356, | |
| "learning_rate": 2.2883758606723372e-05, | |
| "loss": 0.062, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.7981049562682215, | |
| "grad_norm": 0.28890809416770935, | |
| "learning_rate": 2.247873633049818e-05, | |
| "loss": 0.0817, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.8017492711370262, | |
| "grad_norm": 0.8147470355033875, | |
| "learning_rate": 2.2073714054272986e-05, | |
| "loss": 0.1096, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.8017492711370262, | |
| "eval_loss": 0.09192134439945221, | |
| "eval_reranker_map": 0.8933786713152716, | |
| "eval_reranker_mrr@10": 0.940468101170253, | |
| "eval_reranker_ndcg@10": 0.9262327488063826, | |
| "eval_runtime": 21.2807, | |
| "eval_samples_per_second": 1175.1, | |
| "eval_steps_per_second": 18.373, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.8053935860058309, | |
| "grad_norm": 0.7118353843688965, | |
| "learning_rate": 2.1668691778047796e-05, | |
| "loss": 0.0821, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.8090379008746356, | |
| "grad_norm": 0.32003968954086304, | |
| "learning_rate": 2.12636695018226e-05, | |
| "loss": 0.0866, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.8126822157434402, | |
| "grad_norm": 0.9534270763397217, | |
| "learning_rate": 2.085864722559741e-05, | |
| "loss": 0.0824, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.8163265306122449, | |
| "grad_norm": 1.245367169380188, | |
| "learning_rate": 2.0453624949372216e-05, | |
| "loss": 0.108, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.8199708454810496, | |
| "grad_norm": 0.6836312413215637, | |
| "learning_rate": 2.0048602673147023e-05, | |
| "loss": 0.0746, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.8236151603498543, | |
| "grad_norm": 1.6039366722106934, | |
| "learning_rate": 1.9643580396921833e-05, | |
| "loss": 0.0708, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.827259475218659, | |
| "grad_norm": 0.8757897019386292, | |
| "learning_rate": 1.923855812069664e-05, | |
| "loss": 0.0898, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.8309037900874635, | |
| "grad_norm": 0.3051084280014038, | |
| "learning_rate": 1.8833535844471446e-05, | |
| "loss": 0.0876, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.8345481049562682, | |
| "grad_norm": 0.5111737847328186, | |
| "learning_rate": 1.8428513568246256e-05, | |
| "loss": 0.0898, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.8381924198250729, | |
| "grad_norm": 1.0539857149124146, | |
| "learning_rate": 1.802349129202106e-05, | |
| "loss": 0.0935, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.8418367346938775, | |
| "grad_norm": 0.8566706776618958, | |
| "learning_rate": 1.761846901579587e-05, | |
| "loss": 0.0655, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.8454810495626822, | |
| "grad_norm": 1.0090196132659912, | |
| "learning_rate": 1.7213446739570676e-05, | |
| "loss": 0.106, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.8491253644314869, | |
| "grad_norm": 0.8994157910346985, | |
| "learning_rate": 1.6808424463345483e-05, | |
| "loss": 0.0806, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.8527696793002916, | |
| "grad_norm": 0.21777945756912231, | |
| "learning_rate": 1.6403402187120293e-05, | |
| "loss": 0.091, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.8564139941690962, | |
| "grad_norm": 0.42788615822792053, | |
| "learning_rate": 1.59983799108951e-05, | |
| "loss": 0.0575, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.8600583090379009, | |
| "grad_norm": 3.2373886108398438, | |
| "learning_rate": 1.559335763466991e-05, | |
| "loss": 0.059, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.8637026239067055, | |
| "grad_norm": 0.7039571404457092, | |
| "learning_rate": 1.5188335358444717e-05, | |
| "loss": 0.0889, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.8673469387755102, | |
| "grad_norm": 1.1819452047348022, | |
| "learning_rate": 1.4783313082219522e-05, | |
| "loss": 0.0955, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.8709912536443148, | |
| "grad_norm": 1.0368818044662476, | |
| "learning_rate": 1.437829080599433e-05, | |
| "loss": 0.0841, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.8746355685131195, | |
| "grad_norm": 0.2737424075603485, | |
| "learning_rate": 1.3973268529769137e-05, | |
| "loss": 0.0759, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.8746355685131195, | |
| "eval_loss": 0.08963464945554733, | |
| "eval_reranker_map": 0.8923150934556179, | |
| "eval_reranker_mrr@10": 0.9383257833144584, | |
| "eval_reranker_ndcg@10": 0.9256230786956634, | |
| "eval_runtime": 21.3268, | |
| "eval_samples_per_second": 1172.561, | |
| "eval_steps_per_second": 18.334, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.8782798833819242, | |
| "grad_norm": 0.9451560974121094, | |
| "learning_rate": 1.3568246253543945e-05, | |
| "loss": 0.0558, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.8819241982507289, | |
| "grad_norm": 1.2251777648925781, | |
| "learning_rate": 1.3163223977318753e-05, | |
| "loss": 0.0921, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.8855685131195336, | |
| "grad_norm": 0.9714108109474182, | |
| "learning_rate": 1.2758201701093562e-05, | |
| "loss": 0.0865, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.8892128279883382, | |
| "grad_norm": 0.6615180373191833, | |
| "learning_rate": 1.2353179424868368e-05, | |
| "loss": 0.0787, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.8928571428571429, | |
| "grad_norm": 1.7639282941818237, | |
| "learning_rate": 1.1948157148643175e-05, | |
| "loss": 0.0803, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.8965014577259475, | |
| "grad_norm": 0.5499392151832581, | |
| "learning_rate": 1.1543134872417984e-05, | |
| "loss": 0.0838, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.9001457725947521, | |
| "grad_norm": 1.1226394176483154, | |
| "learning_rate": 1.1138112596192792e-05, | |
| "loss": 0.0837, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.9037900874635568, | |
| "grad_norm": 0.6618596315383911, | |
| "learning_rate": 1.0733090319967599e-05, | |
| "loss": 0.097, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.9074344023323615, | |
| "grad_norm": 0.8916273713111877, | |
| "learning_rate": 1.0328068043742405e-05, | |
| "loss": 0.0673, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.9110787172011662, | |
| "grad_norm": 0.7528400421142578, | |
| "learning_rate": 9.923045767517214e-06, | |
| "loss": 0.0944, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.9147230320699709, | |
| "grad_norm": 0.8340189456939697, | |
| "learning_rate": 9.518023491292022e-06, | |
| "loss": 0.0858, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.9183673469387755, | |
| "grad_norm": 0.9990427494049072, | |
| "learning_rate": 9.11300121506683e-06, | |
| "loss": 0.0761, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.9220116618075802, | |
| "grad_norm": 0.758173406124115, | |
| "learning_rate": 8.707978938841636e-06, | |
| "loss": 0.0868, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.9256559766763849, | |
| "grad_norm": 1.4692426919937134, | |
| "learning_rate": 8.302956662616444e-06, | |
| "loss": 0.0398, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.9293002915451894, | |
| "grad_norm": 0.557843804359436, | |
| "learning_rate": 7.897934386391252e-06, | |
| "loss": 0.0494, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.9329446064139941, | |
| "grad_norm": 0.6966714262962341, | |
| "learning_rate": 7.49291211016606e-06, | |
| "loss": 0.123, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.9365889212827988, | |
| "grad_norm": 0.7461113333702087, | |
| "learning_rate": 7.087889833940867e-06, | |
| "loss": 0.0956, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.9402332361516035, | |
| "grad_norm": 0.6671153903007507, | |
| "learning_rate": 6.682867557715675e-06, | |
| "loss": 0.065, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.9438775510204082, | |
| "grad_norm": 0.9417329430580139, | |
| "learning_rate": 6.2778452814904826e-06, | |
| "loss": 0.0662, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.9475218658892128, | |
| "grad_norm": 0.5989621877670288, | |
| "learning_rate": 5.87282300526529e-06, | |
| "loss": 0.0747, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.9475218658892128, | |
| "eval_loss": 0.08818545192480087, | |
| "eval_reranker_map": 0.8938843308244206, | |
| "eval_reranker_mrr@10": 0.9404932678998363, | |
| "eval_reranker_ndcg@10": 0.9271673543093844, | |
| "eval_runtime": 21.37, | |
| "eval_samples_per_second": 1170.192, | |
| "eval_steps_per_second": 18.297, | |
| "step": 2600 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2744, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 8, | |
| "early_stopping_threshold": 5e-05 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |