{ "best_global_step": 2600, "best_metric": 0.08818545192480087, "best_model_checkpoint": "./reranker-sweep/b42q86ij/checkpoint-2600", "epoch": 0.9475218658892128, "eval_steps": 200, "global_step": 2600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0036443148688046646, "grad_norm": 2.5712764263153076, "learning_rate": 3.2727272727272733e-06, "loss": 0.3268, "step": 10 }, { "epoch": 0.007288629737609329, "grad_norm": 3.9628148078918457, "learning_rate": 6.909090909090909e-06, "loss": 0.247, "step": 20 }, { "epoch": 0.010932944606413994, "grad_norm": 2.538698196411133, "learning_rate": 1.0545454545454546e-05, "loss": 0.2451, "step": 30 }, { "epoch": 0.014577259475218658, "grad_norm": 2.876335859298706, "learning_rate": 1.4181818181818181e-05, "loss": 0.2029, "step": 40 }, { "epoch": 0.018221574344023325, "grad_norm": 1.6715284585952759, "learning_rate": 1.781818181818182e-05, "loss": 0.1739, "step": 50 }, { "epoch": 0.021865889212827987, "grad_norm": 1.4855901002883911, "learning_rate": 2.1454545454545455e-05, "loss": 0.172, "step": 60 }, { "epoch": 0.025510204081632654, "grad_norm": 2.0233335494995117, "learning_rate": 2.5090909090909094e-05, "loss": 0.1425, "step": 70 }, { "epoch": 0.029154518950437316, "grad_norm": 1.9063516855239868, "learning_rate": 2.872727272727273e-05, "loss": 0.138, "step": 80 }, { "epoch": 0.03279883381924198, "grad_norm": 0.9233603477478027, "learning_rate": 3.236363636363636e-05, "loss": 0.1304, "step": 90 }, { "epoch": 0.03644314868804665, "grad_norm": 1.3970320224761963, "learning_rate": 3.6e-05, "loss": 0.1561, "step": 100 }, { "epoch": 0.04008746355685131, "grad_norm": 1.0219364166259766, "learning_rate": 3.963636363636364e-05, "loss": 0.1627, "step": 110 }, { "epoch": 0.043731778425655975, "grad_norm": 1.6836103200912476, "learning_rate": 4.327272727272728e-05, "loss": 0.1974, "step": 120 }, { "epoch": 0.047376093294460644, "grad_norm": 1.486811637878418, "learning_rate": 4.690909090909091e-05, "loss": 0.1339, "step": 130 }, { "epoch": 0.05102040816326531, "grad_norm": 1.230664610862732, "learning_rate": 5.054545454545455e-05, "loss": 0.1137, "step": 140 }, { "epoch": 0.05466472303206997, "grad_norm": 1.133725881576538, "learning_rate": 5.418181818181819e-05, "loss": 0.1333, "step": 150 }, { "epoch": 0.05830903790087463, "grad_norm": 1.921517014503479, "learning_rate": 5.7818181818181815e-05, "loss": 0.1296, "step": 160 }, { "epoch": 0.0619533527696793, "grad_norm": 0.6957170367240906, "learning_rate": 6.145454545454545e-05, "loss": 0.1723, "step": 170 }, { "epoch": 0.06559766763848396, "grad_norm": 0.40991759300231934, "learning_rate": 6.50909090909091e-05, "loss": 0.1099, "step": 180 }, { "epoch": 0.06924198250728864, "grad_norm": 1.0332207679748535, "learning_rate": 6.872727272727273e-05, "loss": 0.1105, "step": 190 }, { "epoch": 0.0728862973760933, "grad_norm": 0.7601434588432312, "learning_rate": 7.236363636363637e-05, "loss": 0.0917, "step": 200 }, { "epoch": 0.0728862973760933, "eval_loss": 0.11327829957008362, "eval_reranker_map": 0.8637301072112248, "eval_reranker_mrr@10": 0.915392151575617, "eval_reranker_ndcg@10": 0.9034123916044116, "eval_runtime": 21.6475, "eval_samples_per_second": 1155.192, "eval_steps_per_second": 18.062, "step": 200 }, { "epoch": 0.07653061224489796, "grad_norm": 1.4077112674713135, "learning_rate": 7.6e-05, "loss": 0.1012, "step": 210 }, { "epoch": 0.08017492711370262, "grad_norm": 0.6944614052772522, "learning_rate": 7.963636363636364e-05, "loss": 0.1296, "step": 220 }, { "epoch": 0.08381924198250729, "grad_norm": 1.064735770225525, "learning_rate": 8.327272727272728e-05, "loss": 0.1332, "step": 230 }, { "epoch": 0.08746355685131195, "grad_norm": 0.7208377122879028, "learning_rate": 8.690909090909091e-05, "loss": 0.095, "step": 240 }, { "epoch": 0.09110787172011661, "grad_norm": 0.5791595578193665, "learning_rate": 9.054545454545455e-05, "loss": 0.1351, "step": 250 }, { "epoch": 0.09475218658892129, "grad_norm": 1.8823211193084717, "learning_rate": 9.418181818181818e-05, "loss": 0.1138, "step": 260 }, { "epoch": 0.09839650145772595, "grad_norm": 1.0496199131011963, "learning_rate": 9.781818181818183e-05, "loss": 0.1318, "step": 270 }, { "epoch": 0.10204081632653061, "grad_norm": 0.8584634065628052, "learning_rate": 9.983799108950993e-05, "loss": 0.1164, "step": 280 }, { "epoch": 0.10568513119533528, "grad_norm": 1.8461506366729736, "learning_rate": 9.943296881328473e-05, "loss": 0.1418, "step": 290 }, { "epoch": 0.10932944606413994, "grad_norm": 1.8940588235855103, "learning_rate": 9.902794653705955e-05, "loss": 0.1337, "step": 300 }, { "epoch": 0.1129737609329446, "grad_norm": 1.089218258857727, "learning_rate": 9.862292426083435e-05, "loss": 0.1169, "step": 310 }, { "epoch": 0.11661807580174927, "grad_norm": 0.6664621233940125, "learning_rate": 9.821790198460917e-05, "loss": 0.1314, "step": 320 }, { "epoch": 0.12026239067055394, "grad_norm": 0.5564493536949158, "learning_rate": 9.781287970838397e-05, "loss": 0.1197, "step": 330 }, { "epoch": 0.1239067055393586, "grad_norm": 0.6858499050140381, "learning_rate": 9.740785743215877e-05, "loss": 0.1002, "step": 340 }, { "epoch": 0.12755102040816327, "grad_norm": 0.6210586428642273, "learning_rate": 9.700283515593358e-05, "loss": 0.1124, "step": 350 }, { "epoch": 0.13119533527696792, "grad_norm": 1.4063215255737305, "learning_rate": 9.65978128797084e-05, "loss": 0.0932, "step": 360 }, { "epoch": 0.1348396501457726, "grad_norm": 1.0574997663497925, "learning_rate": 9.61927906034832e-05, "loss": 0.1629, "step": 370 }, { "epoch": 0.13848396501457727, "grad_norm": 1.4113630056381226, "learning_rate": 9.5787768327258e-05, "loss": 0.1501, "step": 380 }, { "epoch": 0.14212827988338192, "grad_norm": 0.9053757786750793, "learning_rate": 9.538274605103282e-05, "loss": 0.1097, "step": 390 }, { "epoch": 0.1457725947521866, "grad_norm": 0.8119211792945862, "learning_rate": 9.497772377480762e-05, "loss": 0.0756, "step": 400 }, { "epoch": 0.1457725947521866, "eval_loss": 0.11383406072854996, "eval_reranker_map": 0.8594534683785606, "eval_reranker_mrr@10": 0.9061123694475903, "eval_reranker_ndcg@10": 0.8983525842618318, "eval_runtime": 21.3897, "eval_samples_per_second": 1169.111, "eval_steps_per_second": 18.28, "step": 400 }, { "epoch": 0.14941690962099125, "grad_norm": 0.8926772475242615, "learning_rate": 9.457270149858242e-05, "loss": 0.1174, "step": 410 }, { "epoch": 0.15306122448979592, "grad_norm": 1.475794792175293, "learning_rate": 9.416767922235723e-05, "loss": 0.1472, "step": 420 }, { "epoch": 0.15670553935860057, "grad_norm": 0.6618950963020325, "learning_rate": 9.376265694613204e-05, "loss": 0.1391, "step": 430 }, { "epoch": 0.16034985422740525, "grad_norm": 1.0769526958465576, "learning_rate": 9.335763466990685e-05, "loss": 0.1188, "step": 440 }, { "epoch": 0.16399416909620992, "grad_norm": 0.5829262733459473, "learning_rate": 9.295261239368166e-05, "loss": 0.1555, "step": 450 }, { "epoch": 0.16763848396501457, "grad_norm": 1.0533019304275513, "learning_rate": 9.254759011745647e-05, "loss": 0.1148, "step": 460 }, { "epoch": 0.17128279883381925, "grad_norm": 0.5009166598320007, "learning_rate": 9.214256784123127e-05, "loss": 0.0753, "step": 470 }, { "epoch": 0.1749271137026239, "grad_norm": 1.3245645761489868, "learning_rate": 9.173754556500607e-05, "loss": 0.104, "step": 480 }, { "epoch": 0.17857142857142858, "grad_norm": 1.018767237663269, "learning_rate": 9.133252328878089e-05, "loss": 0.1313, "step": 490 }, { "epoch": 0.18221574344023322, "grad_norm": 0.7106571793556213, "learning_rate": 9.09275010125557e-05, "loss": 0.1125, "step": 500 }, { "epoch": 0.1858600583090379, "grad_norm": 1.6412338018417358, "learning_rate": 9.052247873633051e-05, "loss": 0.0772, "step": 510 }, { "epoch": 0.18950437317784258, "grad_norm": 0.652991771697998, "learning_rate": 9.011745646010531e-05, "loss": 0.1045, "step": 520 }, { "epoch": 0.19314868804664723, "grad_norm": 1.3570860624313354, "learning_rate": 8.971243418388012e-05, "loss": 0.1101, "step": 530 }, { "epoch": 0.1967930029154519, "grad_norm": 0.8085044622421265, "learning_rate": 8.930741190765492e-05, "loss": 0.109, "step": 540 }, { "epoch": 0.20043731778425655, "grad_norm": 0.69944828748703, "learning_rate": 8.890238963142972e-05, "loss": 0.124, "step": 550 }, { "epoch": 0.20408163265306123, "grad_norm": 0.6185526847839355, "learning_rate": 8.849736735520454e-05, "loss": 0.0934, "step": 560 }, { "epoch": 0.20772594752186588, "grad_norm": 0.6213017702102661, "learning_rate": 8.809234507897934e-05, "loss": 0.1305, "step": 570 }, { "epoch": 0.21137026239067055, "grad_norm": 0.5960863828659058, "learning_rate": 8.768732280275416e-05, "loss": 0.1163, "step": 580 }, { "epoch": 0.21501457725947523, "grad_norm": 0.899186909198761, "learning_rate": 8.728230052652896e-05, "loss": 0.1004, "step": 590 }, { "epoch": 0.21865889212827988, "grad_norm": 1.859052300453186, "learning_rate": 8.687727825030377e-05, "loss": 0.0917, "step": 600 }, { "epoch": 0.21865889212827988, "eval_loss": 0.12061735987663269, "eval_reranker_map": 0.8625884506186454, "eval_reranker_mrr@10": 0.9138017041471176, "eval_reranker_ndcg@10": 0.9024518526276585, "eval_runtime": 21.4146, "eval_samples_per_second": 1167.756, "eval_steps_per_second": 18.259, "step": 600 }, { "epoch": 0.22230320699708456, "grad_norm": 1.2568275928497314, "learning_rate": 8.647225597407857e-05, "loss": 0.0942, "step": 610 }, { "epoch": 0.2259475218658892, "grad_norm": 0.7559579610824585, "learning_rate": 8.606723369785339e-05, "loss": 0.1223, "step": 620 }, { "epoch": 0.22959183673469388, "grad_norm": 0.5116895437240601, "learning_rate": 8.566221142162819e-05, "loss": 0.1156, "step": 630 }, { "epoch": 0.23323615160349853, "grad_norm": 0.9851139783859253, "learning_rate": 8.525718914540301e-05, "loss": 0.0924, "step": 640 }, { "epoch": 0.2368804664723032, "grad_norm": 1.11528480052948, "learning_rate": 8.485216686917781e-05, "loss": 0.1372, "step": 650 }, { "epoch": 0.24052478134110788, "grad_norm": 1.3869421482086182, "learning_rate": 8.444714459295261e-05, "loss": 0.0984, "step": 660 }, { "epoch": 0.24416909620991253, "grad_norm": 0.7908770442008972, "learning_rate": 8.404212231672742e-05, "loss": 0.0876, "step": 670 }, { "epoch": 0.2478134110787172, "grad_norm": 0.7059890627861023, "learning_rate": 8.363710004050223e-05, "loss": 0.0926, "step": 680 }, { "epoch": 0.25145772594752186, "grad_norm": 0.9383406043052673, "learning_rate": 8.323207776427704e-05, "loss": 0.0819, "step": 690 }, { "epoch": 0.25510204081632654, "grad_norm": 0.6492351293563843, "learning_rate": 8.282705548805186e-05, "loss": 0.1034, "step": 700 }, { "epoch": 0.2587463556851312, "grad_norm": 1.1460670232772827, "learning_rate": 8.242203321182666e-05, "loss": 0.1022, "step": 710 }, { "epoch": 0.26239067055393583, "grad_norm": 0.6701614260673523, "learning_rate": 8.201701093560146e-05, "loss": 0.0661, "step": 720 }, { "epoch": 0.2660349854227405, "grad_norm": 1.4577497243881226, "learning_rate": 8.161198865937626e-05, "loss": 0.124, "step": 730 }, { "epoch": 0.2696793002915452, "grad_norm": 0.602418065071106, "learning_rate": 8.120696638315107e-05, "loss": 0.1231, "step": 740 }, { "epoch": 0.27332361516034986, "grad_norm": 0.9659672975540161, "learning_rate": 8.080194410692589e-05, "loss": 0.1307, "step": 750 }, { "epoch": 0.27696793002915454, "grad_norm": 0.3637723922729492, "learning_rate": 8.039692183070069e-05, "loss": 0.0973, "step": 760 }, { "epoch": 0.28061224489795916, "grad_norm": 0.8434582352638245, "learning_rate": 7.99918995544755e-05, "loss": 0.0721, "step": 770 }, { "epoch": 0.28425655976676384, "grad_norm": 0.7171585559844971, "learning_rate": 7.958687727825031e-05, "loss": 0.0734, "step": 780 }, { "epoch": 0.2879008746355685, "grad_norm": 0.7634996175765991, "learning_rate": 7.918185500202511e-05, "loss": 0.0806, "step": 790 }, { "epoch": 0.2915451895043732, "grad_norm": 0.7262800931930542, "learning_rate": 7.877683272579992e-05, "loss": 0.0824, "step": 800 }, { "epoch": 0.2915451895043732, "eval_loss": 0.09958108514547348, "eval_reranker_map": 0.869804989912677, "eval_reranker_mrr@10": 0.9200749609016879, "eval_reranker_ndcg@10": 0.9079497888185337, "eval_runtime": 21.3269, "eval_samples_per_second": 1172.559, "eval_steps_per_second": 18.334, "step": 800 }, { "epoch": 0.29518950437317787, "grad_norm": 0.26812535524368286, "learning_rate": 7.837181044957473e-05, "loss": 0.1037, "step": 810 }, { "epoch": 0.2988338192419825, "grad_norm": 0.5044072866439819, "learning_rate": 7.796678817334954e-05, "loss": 0.0771, "step": 820 }, { "epoch": 0.30247813411078717, "grad_norm": 0.6845607757568359, "learning_rate": 7.756176589712435e-05, "loss": 0.1407, "step": 830 }, { "epoch": 0.30612244897959184, "grad_norm": 0.9331074953079224, "learning_rate": 7.715674362089916e-05, "loss": 0.1196, "step": 840 }, { "epoch": 0.3097667638483965, "grad_norm": 1.0579673051834106, "learning_rate": 7.675172134467397e-05, "loss": 0.1087, "step": 850 }, { "epoch": 0.31341107871720114, "grad_norm": 1.4271413087844849, "learning_rate": 7.634669906844876e-05, "loss": 0.0737, "step": 860 }, { "epoch": 0.3170553935860058, "grad_norm": 0.9227479100227356, "learning_rate": 7.594167679222357e-05, "loss": 0.0986, "step": 870 }, { "epoch": 0.3206997084548105, "grad_norm": 0.4991419017314911, "learning_rate": 7.553665451599838e-05, "loss": 0.1042, "step": 880 }, { "epoch": 0.32434402332361517, "grad_norm": 0.9503481984138489, "learning_rate": 7.513163223977319e-05, "loss": 0.0971, "step": 890 }, { "epoch": 0.32798833819241985, "grad_norm": 3.130014181137085, "learning_rate": 7.4726609963548e-05, "loss": 0.0824, "step": 900 }, { "epoch": 0.33163265306122447, "grad_norm": 0.8585525155067444, "learning_rate": 7.43215876873228e-05, "loss": 0.0842, "step": 910 }, { "epoch": 0.33527696793002915, "grad_norm": 0.7285027503967285, "learning_rate": 7.391656541109761e-05, "loss": 0.1361, "step": 920 }, { "epoch": 0.3389212827988338, "grad_norm": 0.4131384789943695, "learning_rate": 7.351154313487241e-05, "loss": 0.086, "step": 930 }, { "epoch": 0.3425655976676385, "grad_norm": 0.9633147120475769, "learning_rate": 7.310652085864723e-05, "loss": 0.0861, "step": 940 }, { "epoch": 0.3462099125364432, "grad_norm": 0.9654539227485657, "learning_rate": 7.270149858242203e-05, "loss": 0.1039, "step": 950 }, { "epoch": 0.3498542274052478, "grad_norm": 1.1640666723251343, "learning_rate": 7.229647630619685e-05, "loss": 0.1085, "step": 960 }, { "epoch": 0.3534985422740525, "grad_norm": 1.0129121541976929, "learning_rate": 7.189145402997165e-05, "loss": 0.1316, "step": 970 }, { "epoch": 0.35714285714285715, "grad_norm": 1.2108495235443115, "learning_rate": 7.148643175374647e-05, "loss": 0.0806, "step": 980 }, { "epoch": 0.3607871720116618, "grad_norm": 1.3395358324050903, "learning_rate": 7.108140947752126e-05, "loss": 0.0873, "step": 990 }, { "epoch": 0.36443148688046645, "grad_norm": 0.7182661294937134, "learning_rate": 7.067638720129608e-05, "loss": 0.0952, "step": 1000 }, { "epoch": 0.36443148688046645, "eval_loss": 0.09812270849943161, "eval_reranker_map": 0.8729628573406761, "eval_reranker_mrr@10": 0.920727498247317, "eval_reranker_ndcg@10": 0.9101050014801282, "eval_runtime": 21.2214, "eval_samples_per_second": 1178.387, "eval_steps_per_second": 18.425, "step": 1000 }, { "epoch": 0.3680758017492711, "grad_norm": 0.8469423651695251, "learning_rate": 7.027136492507088e-05, "loss": 0.1194, "step": 1010 }, { "epoch": 0.3717201166180758, "grad_norm": 1.2186115980148315, "learning_rate": 6.98663426488457e-05, "loss": 0.1114, "step": 1020 }, { "epoch": 0.3753644314868805, "grad_norm": 1.040454626083374, "learning_rate": 6.94613203726205e-05, "loss": 0.122, "step": 1030 }, { "epoch": 0.37900874635568516, "grad_norm": 0.5609027147293091, "learning_rate": 6.90562980963953e-05, "loss": 0.094, "step": 1040 }, { "epoch": 0.3826530612244898, "grad_norm": 1.0195974111557007, "learning_rate": 6.865127582017012e-05, "loss": 0.0971, "step": 1050 }, { "epoch": 0.38629737609329445, "grad_norm": 0.67203289270401, "learning_rate": 6.824625354394491e-05, "loss": 0.1285, "step": 1060 }, { "epoch": 0.38994169096209913, "grad_norm": 0.3070160746574402, "learning_rate": 6.784123126771973e-05, "loss": 0.103, "step": 1070 }, { "epoch": 0.3935860058309038, "grad_norm": 1.0986084938049316, "learning_rate": 6.743620899149453e-05, "loss": 0.1065, "step": 1080 }, { "epoch": 0.39723032069970843, "grad_norm": 1.3802062273025513, "learning_rate": 6.703118671526935e-05, "loss": 0.0885, "step": 1090 }, { "epoch": 0.4008746355685131, "grad_norm": 2.0906848907470703, "learning_rate": 6.662616443904415e-05, "loss": 0.1022, "step": 1100 }, { "epoch": 0.4045189504373178, "grad_norm": 1.3855655193328857, "learning_rate": 6.622114216281897e-05, "loss": 0.1129, "step": 1110 }, { "epoch": 0.40816326530612246, "grad_norm": 0.9681763648986816, "learning_rate": 6.581611988659376e-05, "loss": 0.1229, "step": 1120 }, { "epoch": 0.41180758017492713, "grad_norm": 1.2986786365509033, "learning_rate": 6.541109761036857e-05, "loss": 0.0999, "step": 1130 }, { "epoch": 0.41545189504373176, "grad_norm": 0.4236406981945038, "learning_rate": 6.500607533414338e-05, "loss": 0.0879, "step": 1140 }, { "epoch": 0.41909620991253643, "grad_norm": 0.4405617117881775, "learning_rate": 6.46010530579182e-05, "loss": 0.0763, "step": 1150 }, { "epoch": 0.4227405247813411, "grad_norm": 0.7291234731674194, "learning_rate": 6.4196030781693e-05, "loss": 0.0852, "step": 1160 }, { "epoch": 0.4263848396501458, "grad_norm": 1.0762977600097656, "learning_rate": 6.379100850546781e-05, "loss": 0.0914, "step": 1170 }, { "epoch": 0.43002915451895046, "grad_norm": 1.2228280305862427, "learning_rate": 6.338598622924262e-05, "loss": 0.1004, "step": 1180 }, { "epoch": 0.4336734693877551, "grad_norm": 0.7461398243904114, "learning_rate": 6.298096395301742e-05, "loss": 0.1143, "step": 1190 }, { "epoch": 0.43731778425655976, "grad_norm": 0.942706286907196, "learning_rate": 6.257594167679222e-05, "loss": 0.1364, "step": 1200 }, { "epoch": 0.43731778425655976, "eval_loss": 0.09395871311426163, "eval_reranker_map": 0.8911714470545177, "eval_reranker_mrr@10": 0.9400510525085836, "eval_reranker_ndcg@10": 0.924593388432475, "eval_runtime": 21.2854, "eval_samples_per_second": 1174.843, "eval_steps_per_second": 18.369, "step": 1200 }, { "epoch": 0.44096209912536444, "grad_norm": 0.8438668251037598, "learning_rate": 6.217091940056703e-05, "loss": 0.1017, "step": 1210 }, { "epoch": 0.4446064139941691, "grad_norm": 0.6174122095108032, "learning_rate": 6.176589712434184e-05, "loss": 0.09, "step": 1220 }, { "epoch": 0.44825072886297374, "grad_norm": 0.6999545097351074, "learning_rate": 6.136087484811665e-05, "loss": 0.0687, "step": 1230 }, { "epoch": 0.4518950437317784, "grad_norm": 1.237770676612854, "learning_rate": 6.095585257189146e-05, "loss": 0.0733, "step": 1240 }, { "epoch": 0.4555393586005831, "grad_norm": 0.933530330657959, "learning_rate": 6.055083029566626e-05, "loss": 0.1049, "step": 1250 }, { "epoch": 0.45918367346938777, "grad_norm": 1.1076560020446777, "learning_rate": 6.014580801944108e-05, "loss": 0.0918, "step": 1260 }, { "epoch": 0.46282798833819244, "grad_norm": 1.030834674835205, "learning_rate": 5.9740785743215874e-05, "loss": 0.0848, "step": 1270 }, { "epoch": 0.46647230320699706, "grad_norm": 1.2574701309204102, "learning_rate": 5.933576346699069e-05, "loss": 0.0736, "step": 1280 }, { "epoch": 0.47011661807580174, "grad_norm": 0.9184115529060364, "learning_rate": 5.8930741190765494e-05, "loss": 0.1129, "step": 1290 }, { "epoch": 0.4737609329446064, "grad_norm": 0.5017003417015076, "learning_rate": 5.8525718914540305e-05, "loss": 0.0713, "step": 1300 }, { "epoch": 0.4774052478134111, "grad_norm": 0.5321061015129089, "learning_rate": 5.812069663831511e-05, "loss": 0.0876, "step": 1310 }, { "epoch": 0.48104956268221577, "grad_norm": 0.43396979570388794, "learning_rate": 5.7715674362089925e-05, "loss": 0.0866, "step": 1320 }, { "epoch": 0.4846938775510204, "grad_norm": 0.6829825043678284, "learning_rate": 5.731065208586472e-05, "loss": 0.1016, "step": 1330 }, { "epoch": 0.48833819241982507, "grad_norm": 1.6625821590423584, "learning_rate": 5.690562980963954e-05, "loss": 0.1061, "step": 1340 }, { "epoch": 0.49198250728862974, "grad_norm": 0.20715846121311188, "learning_rate": 5.650060753341434e-05, "loss": 0.0791, "step": 1350 }, { "epoch": 0.4956268221574344, "grad_norm": 0.8938704133033752, "learning_rate": 5.6095585257189145e-05, "loss": 0.0938, "step": 1360 }, { "epoch": 0.49927113702623904, "grad_norm": 0.5026534199714661, "learning_rate": 5.5690562980963955e-05, "loss": 0.1235, "step": 1370 }, { "epoch": 0.5029154518950437, "grad_norm": 0.47632285952568054, "learning_rate": 5.528554070473876e-05, "loss": 0.0693, "step": 1380 }, { "epoch": 0.5065597667638484, "grad_norm": 1.092860460281372, "learning_rate": 5.4880518428513575e-05, "loss": 0.065, "step": 1390 }, { "epoch": 0.5102040816326531, "grad_norm": 0.9408676624298096, "learning_rate": 5.447549615228837e-05, "loss": 0.0839, "step": 1400 }, { "epoch": 0.5102040816326531, "eval_loss": 0.10072223097085953, "eval_reranker_map": 0.8862202409768546, "eval_reranker_mrr@10": 0.9337876826834923, "eval_reranker_ndcg@10": 0.9214271556218577, "eval_runtime": 22.7831, "eval_samples_per_second": 1097.612, "eval_steps_per_second": 17.162, "step": 1400 }, { "epoch": 0.5138483965014577, "grad_norm": 2.5904500484466553, "learning_rate": 5.407047387606319e-05, "loss": 0.0914, "step": 1410 }, { "epoch": 0.5174927113702624, "grad_norm": 0.23948027193546295, "learning_rate": 5.366545159983799e-05, "loss": 0.0786, "step": 1420 }, { "epoch": 0.5211370262390671, "grad_norm": 0.8241820931434631, "learning_rate": 5.32604293236128e-05, "loss": 0.0916, "step": 1430 }, { "epoch": 0.5247813411078717, "grad_norm": 0.5767093896865845, "learning_rate": 5.2855407047387605e-05, "loss": 0.0606, "step": 1440 }, { "epoch": 0.5284256559766763, "grad_norm": 1.3108361959457397, "learning_rate": 5.245038477116242e-05, "loss": 0.1417, "step": 1450 }, { "epoch": 0.532069970845481, "grad_norm": 1.309979796409607, "learning_rate": 5.2045362494937225e-05, "loss": 0.0856, "step": 1460 }, { "epoch": 0.5357142857142857, "grad_norm": 1.250825047492981, "learning_rate": 5.1640340218712035e-05, "loss": 0.0865, "step": 1470 }, { "epoch": 0.5393586005830904, "grad_norm": 0.47954457998275757, "learning_rate": 5.123531794248684e-05, "loss": 0.0917, "step": 1480 }, { "epoch": 0.543002915451895, "grad_norm": 0.6852472424507141, "learning_rate": 5.083029566626165e-05, "loss": 0.0774, "step": 1490 }, { "epoch": 0.5466472303206997, "grad_norm": 0.8546938300132751, "learning_rate": 5.042527339003645e-05, "loss": 0.0951, "step": 1500 }, { "epoch": 0.5502915451895044, "grad_norm": 0.9142554998397827, "learning_rate": 5.002025111381127e-05, "loss": 0.074, "step": 1510 }, { "epoch": 0.5539358600583091, "grad_norm": 0.6394762396812439, "learning_rate": 4.961522883758607e-05, "loss": 0.0797, "step": 1520 }, { "epoch": 0.5575801749271136, "grad_norm": 1.9928357601165771, "learning_rate": 4.9210206561360876e-05, "loss": 0.0817, "step": 1530 }, { "epoch": 0.5612244897959183, "grad_norm": 0.917662501335144, "learning_rate": 4.8805184285135686e-05, "loss": 0.1137, "step": 1540 }, { "epoch": 0.564868804664723, "grad_norm": 1.1721371412277222, "learning_rate": 4.8400162008910496e-05, "loss": 0.1139, "step": 1550 }, { "epoch": 0.5685131195335277, "grad_norm": 0.9727454781532288, "learning_rate": 4.79951397326853e-05, "loss": 0.0889, "step": 1560 }, { "epoch": 0.5721574344023324, "grad_norm": 0.8819296360015869, "learning_rate": 4.759011745646011e-05, "loss": 0.1075, "step": 1570 }, { "epoch": 0.575801749271137, "grad_norm": 0.7837637662887573, "learning_rate": 4.718509518023492e-05, "loss": 0.1021, "step": 1580 }, { "epoch": 0.5794460641399417, "grad_norm": 0.5388606190681458, "learning_rate": 4.678007290400972e-05, "loss": 0.1115, "step": 1590 }, { "epoch": 0.5830903790087464, "grad_norm": 1.0936598777770996, "learning_rate": 4.637505062778453e-05, "loss": 0.1047, "step": 1600 }, { "epoch": 0.5830903790087464, "eval_loss": 0.095162533223629, "eval_reranker_map": 0.8900710013122974, "eval_reranker_mrr@10": 0.9361263909112154, "eval_reranker_ndcg@10": 0.9229002314461462, "eval_runtime": 21.9119, "eval_samples_per_second": 1141.25, "eval_steps_per_second": 17.844, "step": 1600 }, { "epoch": 0.5867346938775511, "grad_norm": 2.3125267028808594, "learning_rate": 4.5970028351559336e-05, "loss": 0.1056, "step": 1610 }, { "epoch": 0.5903790087463557, "grad_norm": 1.1802173852920532, "learning_rate": 4.5565006075334146e-05, "loss": 0.116, "step": 1620 }, { "epoch": 0.5940233236151603, "grad_norm": 0.9869738221168518, "learning_rate": 4.515998379910895e-05, "loss": 0.0989, "step": 1630 }, { "epoch": 0.597667638483965, "grad_norm": 0.7939630150794983, "learning_rate": 4.475496152288376e-05, "loss": 0.1102, "step": 1640 }, { "epoch": 0.6013119533527697, "grad_norm": 0.36633291840553284, "learning_rate": 4.434993924665857e-05, "loss": 0.1006, "step": 1650 }, { "epoch": 0.6049562682215743, "grad_norm": 2.6413233280181885, "learning_rate": 4.394491697043337e-05, "loss": 0.0956, "step": 1660 }, { "epoch": 0.608600583090379, "grad_norm": 0.8255997896194458, "learning_rate": 4.353989469420818e-05, "loss": 0.1003, "step": 1670 }, { "epoch": 0.6122448979591837, "grad_norm": 0.9502624869346619, "learning_rate": 4.313487241798299e-05, "loss": 0.0984, "step": 1680 }, { "epoch": 0.6158892128279884, "grad_norm": 1.0512892007827759, "learning_rate": 4.2729850141757796e-05, "loss": 0.0734, "step": 1690 }, { "epoch": 0.619533527696793, "grad_norm": 1.4372130632400513, "learning_rate": 4.2324827865532607e-05, "loss": 0.079, "step": 1700 }, { "epoch": 0.6231778425655977, "grad_norm": 0.44758155941963196, "learning_rate": 4.1919805589307417e-05, "loss": 0.0872, "step": 1710 }, { "epoch": 0.6268221574344023, "grad_norm": 0.7648677825927734, "learning_rate": 4.151478331308222e-05, "loss": 0.1077, "step": 1720 }, { "epoch": 0.630466472303207, "grad_norm": 1.3461487293243408, "learning_rate": 4.110976103685703e-05, "loss": 0.0833, "step": 1730 }, { "epoch": 0.6341107871720116, "grad_norm": 1.056250810623169, "learning_rate": 4.070473876063184e-05, "loss": 0.0984, "step": 1740 }, { "epoch": 0.6377551020408163, "grad_norm": 1.349318265914917, "learning_rate": 4.0299716484406643e-05, "loss": 0.0727, "step": 1750 }, { "epoch": 0.641399416909621, "grad_norm": 1.0693271160125732, "learning_rate": 3.9894694208181453e-05, "loss": 0.1062, "step": 1760 }, { "epoch": 0.6450437317784257, "grad_norm": 1.0974327325820923, "learning_rate": 3.948967193195626e-05, "loss": 0.1013, "step": 1770 }, { "epoch": 0.6486880466472303, "grad_norm": 1.0591496229171753, "learning_rate": 3.908464965573107e-05, "loss": 0.0892, "step": 1780 }, { "epoch": 0.652332361516035, "grad_norm": 0.9455766081809998, "learning_rate": 3.867962737950587e-05, "loss": 0.0765, "step": 1790 }, { "epoch": 0.6559766763848397, "grad_norm": 0.9994630217552185, "learning_rate": 3.827460510328068e-05, "loss": 0.0698, "step": 1800 }, { "epoch": 0.6559766763848397, "eval_loss": 0.09617751091718674, "eval_reranker_map": 0.886651874259156, "eval_reranker_mrr@10": 0.9332937856154163, "eval_reranker_ndcg@10": 0.920815981670818, "eval_runtime": 21.5965, "eval_samples_per_second": 1157.917, "eval_steps_per_second": 18.105, "step": 1800 }, { "epoch": 0.6596209912536443, "grad_norm": 0.2373291403055191, "learning_rate": 3.786958282705549e-05, "loss": 0.0658, "step": 1810 }, { "epoch": 0.6632653061224489, "grad_norm": 1.3221744298934937, "learning_rate": 3.7464560550830294e-05, "loss": 0.1386, "step": 1820 }, { "epoch": 0.6669096209912536, "grad_norm": 0.5240024924278259, "learning_rate": 3.7059538274605104e-05, "loss": 0.1094, "step": 1830 }, { "epoch": 0.6705539358600583, "grad_norm": 1.2826807498931885, "learning_rate": 3.6654515998379914e-05, "loss": 0.103, "step": 1840 }, { "epoch": 0.674198250728863, "grad_norm": 0.7587610483169556, "learning_rate": 3.624949372215472e-05, "loss": 0.1075, "step": 1850 }, { "epoch": 0.6778425655976676, "grad_norm": 0.8283730149269104, "learning_rate": 3.584447144592953e-05, "loss": 0.091, "step": 1860 }, { "epoch": 0.6814868804664723, "grad_norm": 1.2735215425491333, "learning_rate": 3.543944916970434e-05, "loss": 0.106, "step": 1870 }, { "epoch": 0.685131195335277, "grad_norm": 1.3818174600601196, "learning_rate": 3.503442689347915e-05, "loss": 0.0753, "step": 1880 }, { "epoch": 0.6887755102040817, "grad_norm": 0.7963125109672546, "learning_rate": 3.462940461725395e-05, "loss": 0.0685, "step": 1890 }, { "epoch": 0.6924198250728864, "grad_norm": 1.5980868339538574, "learning_rate": 3.422438234102876e-05, "loss": 0.1045, "step": 1900 }, { "epoch": 0.6960641399416909, "grad_norm": 0.34244367480278015, "learning_rate": 3.381936006480357e-05, "loss": 0.087, "step": 1910 }, { "epoch": 0.6997084548104956, "grad_norm": 1.120538592338562, "learning_rate": 3.3414337788578374e-05, "loss": 0.0866, "step": 1920 }, { "epoch": 0.7033527696793003, "grad_norm": 1.1474584341049194, "learning_rate": 3.300931551235318e-05, "loss": 0.1253, "step": 1930 }, { "epoch": 0.706997084548105, "grad_norm": 1.1841405630111694, "learning_rate": 3.260429323612799e-05, "loss": 0.0915, "step": 1940 }, { "epoch": 0.7106413994169096, "grad_norm": 0.7076147198677063, "learning_rate": 3.219927095990279e-05, "loss": 0.061, "step": 1950 }, { "epoch": 0.7142857142857143, "grad_norm": 0.8037185668945312, "learning_rate": 3.17942486836776e-05, "loss": 0.0744, "step": 1960 }, { "epoch": 0.717930029154519, "grad_norm": 0.6356151103973389, "learning_rate": 3.138922640745241e-05, "loss": 0.0643, "step": 1970 }, { "epoch": 0.7215743440233237, "grad_norm": 0.5353262424468994, "learning_rate": 3.098420413122722e-05, "loss": 0.0571, "step": 1980 }, { "epoch": 0.7252186588921283, "grad_norm": 1.3092989921569824, "learning_rate": 3.0579181855002025e-05, "loss": 0.1004, "step": 1990 }, { "epoch": 0.7288629737609329, "grad_norm": 1.03451669216156, "learning_rate": 3.0174159578776835e-05, "loss": 0.1075, "step": 2000 }, { "epoch": 0.7288629737609329, "eval_loss": 0.09355045855045319, "eval_reranker_map": 0.8897058240573428, "eval_reranker_mrr@10": 0.9358846105448597, "eval_reranker_ndcg@10": 0.9236999900731797, "eval_runtime": 21.3664, "eval_samples_per_second": 1170.39, "eval_steps_per_second": 18.3, "step": 2000 }, { "epoch": 0.7325072886297376, "grad_norm": 0.6066477298736572, "learning_rate": 2.976913730255164e-05, "loss": 0.0637, "step": 2010 }, { "epoch": 0.7361516034985423, "grad_norm": 0.7921279668807983, "learning_rate": 2.9364115026326448e-05, "loss": 0.1167, "step": 2020 }, { "epoch": 0.7397959183673469, "grad_norm": 1.2112895250320435, "learning_rate": 2.8959092750101258e-05, "loss": 0.1113, "step": 2030 }, { "epoch": 0.7434402332361516, "grad_norm": 1.007468342781067, "learning_rate": 2.8554070473876065e-05, "loss": 0.1314, "step": 2040 }, { "epoch": 0.7470845481049563, "grad_norm": 0.6368584036827087, "learning_rate": 2.8149048197650875e-05, "loss": 0.0764, "step": 2050 }, { "epoch": 0.750728862973761, "grad_norm": 0.33551472425460815, "learning_rate": 2.774402592142568e-05, "loss": 0.1297, "step": 2060 }, { "epoch": 0.7543731778425656, "grad_norm": 0.8485022187232971, "learning_rate": 2.733900364520049e-05, "loss": 0.0841, "step": 2070 }, { "epoch": 0.7580174927113703, "grad_norm": 0.9928489923477173, "learning_rate": 2.69339813689753e-05, "loss": 0.0967, "step": 2080 }, { "epoch": 0.7616618075801749, "grad_norm": 1.4591295719146729, "learning_rate": 2.6528959092750105e-05, "loss": 0.0916, "step": 2090 }, { "epoch": 0.7653061224489796, "grad_norm": 0.7156339287757874, "learning_rate": 2.612393681652491e-05, "loss": 0.1196, "step": 2100 }, { "epoch": 0.7689504373177842, "grad_norm": 1.2134402990341187, "learning_rate": 2.5718914540299715e-05, "loss": 0.1072, "step": 2110 }, { "epoch": 0.7725947521865889, "grad_norm": 1.509474754333496, "learning_rate": 2.5313892264074522e-05, "loss": 0.0974, "step": 2120 }, { "epoch": 0.7762390670553936, "grad_norm": 0.7905510067939758, "learning_rate": 2.4908869987849335e-05, "loss": 0.0772, "step": 2130 }, { "epoch": 0.7798833819241983, "grad_norm": 1.0353072881698608, "learning_rate": 2.450384771162414e-05, "loss": 0.1147, "step": 2140 }, { "epoch": 0.7835276967930029, "grad_norm": 0.3747937083244324, "learning_rate": 2.409882543539895e-05, "loss": 0.1003, "step": 2150 }, { "epoch": 0.7871720116618076, "grad_norm": 0.6658405661582947, "learning_rate": 2.3693803159173755e-05, "loss": 0.0944, "step": 2160 }, { "epoch": 0.7908163265306123, "grad_norm": 0.6835037469863892, "learning_rate": 2.3288780882948562e-05, "loss": 0.0886, "step": 2170 }, { "epoch": 0.7944606413994169, "grad_norm": 0.45619040727615356, "learning_rate": 2.2883758606723372e-05, "loss": 0.062, "step": 2180 }, { "epoch": 0.7981049562682215, "grad_norm": 0.28890809416770935, "learning_rate": 2.247873633049818e-05, "loss": 0.0817, "step": 2190 }, { "epoch": 0.8017492711370262, "grad_norm": 0.8147470355033875, "learning_rate": 2.2073714054272986e-05, "loss": 0.1096, "step": 2200 }, { "epoch": 0.8017492711370262, "eval_loss": 0.09192134439945221, "eval_reranker_map": 0.8933786713152716, "eval_reranker_mrr@10": 0.940468101170253, "eval_reranker_ndcg@10": 0.9262327488063826, "eval_runtime": 21.2807, "eval_samples_per_second": 1175.1, "eval_steps_per_second": 18.373, "step": 2200 }, { "epoch": 0.8053935860058309, "grad_norm": 0.7118353843688965, "learning_rate": 2.1668691778047796e-05, "loss": 0.0821, "step": 2210 }, { "epoch": 0.8090379008746356, "grad_norm": 0.32003968954086304, "learning_rate": 2.12636695018226e-05, "loss": 0.0866, "step": 2220 }, { "epoch": 0.8126822157434402, "grad_norm": 0.9534270763397217, "learning_rate": 2.085864722559741e-05, "loss": 0.0824, "step": 2230 }, { "epoch": 0.8163265306122449, "grad_norm": 1.245367169380188, "learning_rate": 2.0453624949372216e-05, "loss": 0.108, "step": 2240 }, { "epoch": 0.8199708454810496, "grad_norm": 0.6836312413215637, "learning_rate": 2.0048602673147023e-05, "loss": 0.0746, "step": 2250 }, { "epoch": 0.8236151603498543, "grad_norm": 1.6039366722106934, "learning_rate": 1.9643580396921833e-05, "loss": 0.0708, "step": 2260 }, { "epoch": 0.827259475218659, "grad_norm": 0.8757897019386292, "learning_rate": 1.923855812069664e-05, "loss": 0.0898, "step": 2270 }, { "epoch": 0.8309037900874635, "grad_norm": 0.3051084280014038, "learning_rate": 1.8833535844471446e-05, "loss": 0.0876, "step": 2280 }, { "epoch": 0.8345481049562682, "grad_norm": 0.5111737847328186, "learning_rate": 1.8428513568246256e-05, "loss": 0.0898, "step": 2290 }, { "epoch": 0.8381924198250729, "grad_norm": 1.0539857149124146, "learning_rate": 1.802349129202106e-05, "loss": 0.0935, "step": 2300 }, { "epoch": 0.8418367346938775, "grad_norm": 0.8566706776618958, "learning_rate": 1.761846901579587e-05, "loss": 0.0655, "step": 2310 }, { "epoch": 0.8454810495626822, "grad_norm": 1.0090196132659912, "learning_rate": 1.7213446739570676e-05, "loss": 0.106, "step": 2320 }, { "epoch": 0.8491253644314869, "grad_norm": 0.8994157910346985, "learning_rate": 1.6808424463345483e-05, "loss": 0.0806, "step": 2330 }, { "epoch": 0.8527696793002916, "grad_norm": 0.21777945756912231, "learning_rate": 1.6403402187120293e-05, "loss": 0.091, "step": 2340 }, { "epoch": 0.8564139941690962, "grad_norm": 0.42788615822792053, "learning_rate": 1.59983799108951e-05, "loss": 0.0575, "step": 2350 }, { "epoch": 0.8600583090379009, "grad_norm": 3.2373886108398438, "learning_rate": 1.559335763466991e-05, "loss": 0.059, "step": 2360 }, { "epoch": 0.8637026239067055, "grad_norm": 0.7039571404457092, "learning_rate": 1.5188335358444717e-05, "loss": 0.0889, "step": 2370 }, { "epoch": 0.8673469387755102, "grad_norm": 1.1819452047348022, "learning_rate": 1.4783313082219522e-05, "loss": 0.0955, "step": 2380 }, { "epoch": 0.8709912536443148, "grad_norm": 1.0368818044662476, "learning_rate": 1.437829080599433e-05, "loss": 0.0841, "step": 2390 }, { "epoch": 0.8746355685131195, "grad_norm": 0.2737424075603485, "learning_rate": 1.3973268529769137e-05, "loss": 0.0759, "step": 2400 }, { "epoch": 0.8746355685131195, "eval_loss": 0.08963464945554733, "eval_reranker_map": 0.8923150934556179, "eval_reranker_mrr@10": 0.9383257833144584, "eval_reranker_ndcg@10": 0.9256230786956634, "eval_runtime": 21.3268, "eval_samples_per_second": 1172.561, "eval_steps_per_second": 18.334, "step": 2400 }, { "epoch": 0.8782798833819242, "grad_norm": 0.9451560974121094, "learning_rate": 1.3568246253543945e-05, "loss": 0.0558, "step": 2410 }, { "epoch": 0.8819241982507289, "grad_norm": 1.2251777648925781, "learning_rate": 1.3163223977318753e-05, "loss": 0.0921, "step": 2420 }, { "epoch": 0.8855685131195336, "grad_norm": 0.9714108109474182, "learning_rate": 1.2758201701093562e-05, "loss": 0.0865, "step": 2430 }, { "epoch": 0.8892128279883382, "grad_norm": 0.6615180373191833, "learning_rate": 1.2353179424868368e-05, "loss": 0.0787, "step": 2440 }, { "epoch": 0.8928571428571429, "grad_norm": 1.7639282941818237, "learning_rate": 1.1948157148643175e-05, "loss": 0.0803, "step": 2450 }, { "epoch": 0.8965014577259475, "grad_norm": 0.5499392151832581, "learning_rate": 1.1543134872417984e-05, "loss": 0.0838, "step": 2460 }, { "epoch": 0.9001457725947521, "grad_norm": 1.1226394176483154, "learning_rate": 1.1138112596192792e-05, "loss": 0.0837, "step": 2470 }, { "epoch": 0.9037900874635568, "grad_norm": 0.6618596315383911, "learning_rate": 1.0733090319967599e-05, "loss": 0.097, "step": 2480 }, { "epoch": 0.9074344023323615, "grad_norm": 0.8916273713111877, "learning_rate": 1.0328068043742405e-05, "loss": 0.0673, "step": 2490 }, { "epoch": 0.9110787172011662, "grad_norm": 0.7528400421142578, "learning_rate": 9.923045767517214e-06, "loss": 0.0944, "step": 2500 }, { "epoch": 0.9147230320699709, "grad_norm": 0.8340189456939697, "learning_rate": 9.518023491292022e-06, "loss": 0.0858, "step": 2510 }, { "epoch": 0.9183673469387755, "grad_norm": 0.9990427494049072, "learning_rate": 9.11300121506683e-06, "loss": 0.0761, "step": 2520 }, { "epoch": 0.9220116618075802, "grad_norm": 0.758173406124115, "learning_rate": 8.707978938841636e-06, "loss": 0.0868, "step": 2530 }, { "epoch": 0.9256559766763849, "grad_norm": 1.4692426919937134, "learning_rate": 8.302956662616444e-06, "loss": 0.0398, "step": 2540 }, { "epoch": 0.9293002915451894, "grad_norm": 0.557843804359436, "learning_rate": 7.897934386391252e-06, "loss": 0.0494, "step": 2550 }, { "epoch": 0.9329446064139941, "grad_norm": 0.6966714262962341, "learning_rate": 7.49291211016606e-06, "loss": 0.123, "step": 2560 }, { "epoch": 0.9365889212827988, "grad_norm": 0.7461113333702087, "learning_rate": 7.087889833940867e-06, "loss": 0.0956, "step": 2570 }, { "epoch": 0.9402332361516035, "grad_norm": 0.6671153903007507, "learning_rate": 6.682867557715675e-06, "loss": 0.065, "step": 2580 }, { "epoch": 0.9438775510204082, "grad_norm": 0.9417329430580139, "learning_rate": 6.2778452814904826e-06, "loss": 0.0662, "step": 2590 }, { "epoch": 0.9475218658892128, "grad_norm": 0.5989621877670288, "learning_rate": 5.87282300526529e-06, "loss": 0.0747, "step": 2600 }, { "epoch": 0.9475218658892128, "eval_loss": 0.08818545192480087, "eval_reranker_map": 0.8938843308244206, "eval_reranker_mrr@10": 0.9404932678998363, "eval_reranker_ndcg@10": 0.9271673543093844, "eval_runtime": 21.37, "eval_samples_per_second": 1170.192, "eval_steps_per_second": 18.297, "step": 2600 } ], "logging_steps": 10, "max_steps": 2744, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 8, "early_stopping_threshold": 5e-05 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }