diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" --- "a/last-checkpoint/trainer_state.json" +++ "b/last-checkpoint/trainer_state.json" @@ -1,7 +1,7 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.3044371717786742, + "epoch": 0.6088743435573484, "eval_steps": 5000, "global_step": 5000, "is_hyper_param_search": false, @@ -9,3516 +9,3516 @@ "is_world_process_zero": true, "log_history": [ { - "epoch": 0.0006088743435573484, - "grad_norm": 1040.0, - "learning_rate": 7.9999238911415e-06, - "loss": 29.6613, + "epoch": 0.0012177486871146968, + "grad_norm": 469.75, + "learning_rate": 9.99980972785375e-06, + "loss": 26.2289, "step": 10 }, { - "epoch": 0.0012177486871146968, - "grad_norm": 547.5, - "learning_rate": 7.999847782282999e-06, - "loss": 27.1939, + "epoch": 0.0024354973742293936, + "grad_norm": 405.0, + "learning_rate": 9.999619455707499e-06, + "loss": 23.5619, "step": 20 }, { - "epoch": 0.001826623030672045, - "grad_norm": 661.0, - "learning_rate": 7.999771673424498e-06, - "loss": 27.143, + "epoch": 0.00365324606134409, + "grad_norm": 326.75, + "learning_rate": 9.999429183561249e-06, + "loss": 23.5219, "step": 30 }, { - "epoch": 0.0024354973742293936, - "grad_norm": 610.5, - "learning_rate": 7.999695564565998e-06, - "loss": 26.7644, + "epoch": 0.004870994748458787, + "grad_norm": 312.0, + "learning_rate": 9.999238911414998e-06, + "loss": 23.3801, "step": 40 }, { - "epoch": 0.0030443717177867416, - "grad_norm": 559.0, - "learning_rate": 7.999619455707497e-06, - "loss": 26.845, + "epoch": 0.006088743435573483, + "grad_norm": 379.75, + "learning_rate": 9.999048639268747e-06, + "loss": 23.3185, "step": 50 }, { - "epoch": 0.00365324606134409, - "grad_norm": 978.5, - "learning_rate": 7.999543346848999e-06, - "loss": 26.7081, + "epoch": 0.00730649212268818, + "grad_norm": 507.5, + "learning_rate": 9.998858367122496e-06, + "loss": 23.4722, "step": 60 }, { - "epoch": 0.004262120404901438, - "grad_norm": 1142.0, - "learning_rate": 7.999467237990496e-06, - "loss": 26.5517, + "epoch": 0.008524240809802876, + "grad_norm": 302.5, + "learning_rate": 9.998668094976245e-06, + "loss": 23.1901, "step": 70 }, { - "epoch": 0.004870994748458787, - "grad_norm": 652.0, - "learning_rate": 7.999391129131998e-06, - "loss": 26.164, + "epoch": 0.009741989496917574, + "grad_norm": 325.5, + "learning_rate": 9.998477822829995e-06, + "loss": 23.3673, "step": 80 }, { - "epoch": 0.005479869092016135, - "grad_norm": 522.0, - "learning_rate": 7.999315020273497e-06, - "loss": 26.3264, + "epoch": 0.01095973818403227, + "grad_norm": 375.25, + "learning_rate": 9.998287550683744e-06, + "loss": 23.4508, "step": 90 }, { - "epoch": 0.006088743435573483, - "grad_norm": 506.0, - "learning_rate": 7.999238911414997e-06, - "loss": 26.0035, + "epoch": 0.012177486871146967, + "grad_norm": 466.25, + "learning_rate": 9.998097278537493e-06, + "loss": 23.3656, "step": 100 }, { - "epoch": 0.006697617779130832, - "grad_norm": 680.0, - "learning_rate": 7.999162802556496e-06, - "loss": 26.426, + "epoch": 0.013395235558261664, + "grad_norm": 405.0, + "learning_rate": 9.997907006391242e-06, + "loss": 23.2151, "step": 110 }, { - "epoch": 0.00730649212268818, - "grad_norm": 391.25, - "learning_rate": 7.999086693697996e-06, - "loss": 26.5592, + "epoch": 0.01461298424537636, + "grad_norm": 327.5, + "learning_rate": 9.99771673424499e-06, + "loss": 23.481, "step": 120 }, { - "epoch": 0.007915366466245528, - "grad_norm": 529.0, - "learning_rate": 7.999010584839495e-06, - "loss": 26.5778, + "epoch": 0.015830732932491057, + "grad_norm": 598.5, + "learning_rate": 9.997526462098741e-06, + "loss": 23.4568, "step": 130 }, { - "epoch": 0.008524240809802876, - "grad_norm": 416.5, - "learning_rate": 7.998934475980995e-06, - "loss": 26.117, + "epoch": 0.017048481619605753, + "grad_norm": 479.0, + "learning_rate": 9.99733618995249e-06, + "loss": 23.4055, "step": 140 }, { - "epoch": 0.009133115153360225, - "grad_norm": 635.5, - "learning_rate": 7.998858367122495e-06, - "loss": 26.3732, + "epoch": 0.01826623030672045, + "grad_norm": 449.75, + "learning_rate": 9.997145917806239e-06, + "loss": 23.4494, "step": 150 }, { - "epoch": 0.009741989496917574, - "grad_norm": 547.5, - "learning_rate": 7.998782258263996e-06, - "loss": 26.0139, + "epoch": 0.01948397899383515, + "grad_norm": 643.5, + "learning_rate": 9.996955645659988e-06, + "loss": 23.3568, "step": 160 }, { - "epoch": 0.010350863840474922, - "grad_norm": 511.25, - "learning_rate": 7.998706149405494e-06, - "loss": 26.1682, + "epoch": 0.020701727680949845, + "grad_norm": 333.0, + "learning_rate": 9.996765373513737e-06, + "loss": 23.4843, "step": 170 }, { - "epoch": 0.01095973818403227, - "grad_norm": 435.5, - "learning_rate": 7.998630040546995e-06, - "loss": 26.065, + "epoch": 0.02191947636806454, + "grad_norm": 416.0, + "learning_rate": 9.996575101367487e-06, + "loss": 23.1901, "step": 180 }, { - "epoch": 0.011568612527589619, - "grad_norm": 620.5, - "learning_rate": 7.998553931688493e-06, - "loss": 25.868, + "epoch": 0.023137225055179237, + "grad_norm": 512.0, + "learning_rate": 9.996384829221236e-06, + "loss": 23.3191, "step": 190 }, { - "epoch": 0.012177486871146967, - "grad_norm": 1011.5, - "learning_rate": 7.998477822829994e-06, - "loss": 26.3943, + "epoch": 0.024354973742293933, + "grad_norm": 602.0, + "learning_rate": 9.996194557074985e-06, + "loss": 23.602, "step": 200 }, { - "epoch": 0.012786361214704315, - "grad_norm": 625.5, - "learning_rate": 7.998401713971494e-06, - "loss": 25.6513, + "epoch": 0.02557272242940863, + "grad_norm": 361.75, + "learning_rate": 9.996004284928734e-06, + "loss": 23.1068, "step": 210 }, { - "epoch": 0.013395235558261664, - "grad_norm": 464.25, - "learning_rate": 7.998325605112993e-06, - "loss": 25.8677, + "epoch": 0.02679047111652333, + "grad_norm": 597.5, + "learning_rate": 9.995814012782483e-06, + "loss": 23.5984, "step": 220 }, { - "epoch": 0.014004109901819013, - "grad_norm": 418.25, - "learning_rate": 7.998249496254493e-06, - "loss": 25.698, + "epoch": 0.028008219803638025, + "grad_norm": 881.0, + "learning_rate": 9.995623740636233e-06, + "loss": 23.2802, "step": 230 }, { - "epoch": 0.01461298424537636, - "grad_norm": 439.25, - "learning_rate": 7.998173387395992e-06, - "loss": 26.0902, + "epoch": 0.02922596849075272, + "grad_norm": 485.0, + "learning_rate": 9.995433468489982e-06, + "loss": 23.0281, "step": 240 }, { - "epoch": 0.015221858588933709, - "grad_norm": 643.0, - "learning_rate": 7.998097278537492e-06, - "loss": 25.6914, + "epoch": 0.030443717177867417, + "grad_norm": 466.0, + "learning_rate": 9.995243196343731e-06, + "loss": 23.307, "step": 250 }, { - "epoch": 0.015830732932491057, - "grad_norm": 627.0, - "learning_rate": 7.998021169678991e-06, - "loss": 25.5016, + "epoch": 0.03166146586498211, + "grad_norm": 473.25, + "learning_rate": 9.99505292419748e-06, + "loss": 23.3621, "step": 260 }, { - "epoch": 0.016439607276048405, - "grad_norm": 472.75, - "learning_rate": 7.99794506082049e-06, - "loss": 25.5562, + "epoch": 0.03287921455209681, + "grad_norm": 839.0, + "learning_rate": 9.994862652051229e-06, + "loss": 23.326, "step": 270 }, { - "epoch": 0.017048481619605753, - "grad_norm": 560.5, - "learning_rate": 7.99786895196199e-06, - "loss": 25.5837, + "epoch": 0.034096963239211506, + "grad_norm": 734.0, + "learning_rate": 9.99467237990498e-06, + "loss": 23.3765, "step": 280 }, { - "epoch": 0.0176573559631631, - "grad_norm": 661.0, - "learning_rate": 7.99779284310349e-06, - "loss": 25.7263, + "epoch": 0.0353147119263262, + "grad_norm": 499.75, + "learning_rate": 9.994482107758728e-06, + "loss": 23.1504, "step": 290 }, { - "epoch": 0.01826623030672045, - "grad_norm": 444.5, - "learning_rate": 7.99771673424499e-06, - "loss": 26.0324, + "epoch": 0.0365324606134409, + "grad_norm": 449.5, + "learning_rate": 9.994291835612477e-06, + "loss": 23.0619, "step": 300 }, { - "epoch": 0.0188751046502778, - "grad_norm": 521.0, - "learning_rate": 7.99764062538649e-06, - "loss": 25.6377, + "epoch": 0.0377502093005556, + "grad_norm": 318.25, + "learning_rate": 9.994101563466226e-06, + "loss": 23.0764, "step": 310 }, { - "epoch": 0.01948397899383515, - "grad_norm": 643.0, - "learning_rate": 7.99756451652799e-06, - "loss": 25.5232, + "epoch": 0.0389679579876703, + "grad_norm": 382.0, + "learning_rate": 9.993911291319975e-06, + "loss": 23.3104, "step": 320 }, { - "epoch": 0.020092853337392497, - "grad_norm": 463.75, - "learning_rate": 7.99748840766949e-06, - "loss": 25.2923, + "epoch": 0.04018570667478499, + "grad_norm": 605.0, + "learning_rate": 9.993721019173725e-06, + "loss": 23.2775, "step": 330 }, { - "epoch": 0.020701727680949845, - "grad_norm": 411.75, - "learning_rate": 7.99741229881099e-06, - "loss": 25.5332, + "epoch": 0.04140345536189969, + "grad_norm": 399.5, + "learning_rate": 9.993530747027474e-06, + "loss": 22.8484, "step": 340 }, { - "epoch": 0.021310602024507193, - "grad_norm": 419.25, - "learning_rate": 7.997336189952489e-06, - "loss": 25.5767, + "epoch": 0.042621204049014386, + "grad_norm": 425.0, + "learning_rate": 9.993340474881223e-06, + "loss": 23.3281, "step": 350 }, { - "epoch": 0.02191947636806454, - "grad_norm": 697.5, - "learning_rate": 7.997260081093988e-06, - "loss": 25.3458, + "epoch": 0.04383895273612908, + "grad_norm": 313.25, + "learning_rate": 9.993150202734972e-06, + "loss": 23.0878, "step": 360 }, { - "epoch": 0.02252835071162189, - "grad_norm": 633.5, - "learning_rate": 7.997183972235488e-06, - "loss": 25.5739, + "epoch": 0.04505670142324378, + "grad_norm": 670.0, + "learning_rate": 9.992959930588721e-06, + "loss": 23.2712, "step": 370 }, { - "epoch": 0.023137225055179237, - "grad_norm": 488.25, - "learning_rate": 7.997107863376988e-06, - "loss": 25.6053, + "epoch": 0.046274450110358474, + "grad_norm": 478.5, + "learning_rate": 9.992769658442471e-06, + "loss": 23.266, "step": 380 }, { - "epoch": 0.023746099398736585, - "grad_norm": 515.0, - "learning_rate": 7.997031754518487e-06, - "loss": 25.4469, + "epoch": 0.04749219879747317, + "grad_norm": 531.5, + "learning_rate": 9.99257938629622e-06, + "loss": 23.3419, "step": 390 }, { - "epoch": 0.024354973742293933, - "grad_norm": 428.0, - "learning_rate": 7.996955645659987e-06, - "loss": 25.6501, + "epoch": 0.048709947484587866, + "grad_norm": 317.5, + "learning_rate": 9.99238911414997e-06, + "loss": 23.0632, "step": 400 }, { - "epoch": 0.02496384808585128, - "grad_norm": 570.0, - "learning_rate": 7.996879536801488e-06, - "loss": 25.5624, + "epoch": 0.04992769617170256, + "grad_norm": 439.75, + "learning_rate": 9.992198842003718e-06, + "loss": 23.3886, "step": 410 }, { - "epoch": 0.02557272242940863, - "grad_norm": 498.75, - "learning_rate": 7.996803427942986e-06, - "loss": 25.5509, + "epoch": 0.05114544485881726, + "grad_norm": 438.5, + "learning_rate": 9.992008569857467e-06, + "loss": 23.1328, "step": 420 }, { - "epoch": 0.02618159677296598, - "grad_norm": 513.0, - "learning_rate": 7.996727319084487e-06, - "loss": 25.487, + "epoch": 0.05236319354593196, + "grad_norm": 325.75, + "learning_rate": 9.991818297711218e-06, + "loss": 23.0403, "step": 430 }, { - "epoch": 0.02679047111652333, - "grad_norm": 815.5, - "learning_rate": 7.996651210225985e-06, - "loss": 25.7721, + "epoch": 0.05358094223304666, + "grad_norm": 338.75, + "learning_rate": 9.991628025564966e-06, + "loss": 23.3389, "step": 440 }, { - "epoch": 0.027399345460080677, - "grad_norm": 640.5, - "learning_rate": 7.996575101367486e-06, - "loss": 25.3603, + "epoch": 0.054798690920161354, + "grad_norm": 342.5, + "learning_rate": 9.991437753418715e-06, + "loss": 23.4204, "step": 450 }, { - "epoch": 0.028008219803638025, - "grad_norm": 688.0, - "learning_rate": 7.996498992508986e-06, - "loss": 25.2232, + "epoch": 0.05601643960727605, + "grad_norm": 411.0, + "learning_rate": 9.991247481272464e-06, + "loss": 23.1465, "step": 460 }, { - "epoch": 0.028617094147195373, - "grad_norm": 595.0, - "learning_rate": 7.996422883650485e-06, - "loss": 25.2017, + "epoch": 0.057234188294390746, + "grad_norm": 393.25, + "learning_rate": 9.991057209126213e-06, + "loss": 23.0713, "step": 470 }, { - "epoch": 0.02922596849075272, - "grad_norm": 472.0, - "learning_rate": 7.996346774791985e-06, - "loss": 25.3047, + "epoch": 0.05845193698150544, + "grad_norm": 360.5, + "learning_rate": 9.990866936979964e-06, + "loss": 23.1229, "step": 480 }, { - "epoch": 0.02983484283431007, - "grad_norm": 643.5, - "learning_rate": 7.996270665933484e-06, - "loss": 25.2193, + "epoch": 0.05966968566862014, + "grad_norm": 421.75, + "learning_rate": 9.990676664833712e-06, + "loss": 23.5678, "step": 490 }, { - "epoch": 0.030443717177867417, - "grad_norm": 445.25, - "learning_rate": 7.996194557074984e-06, - "loss": 25.2767, + "epoch": 0.060887434355734835, + "grad_norm": 402.75, + "learning_rate": 9.990486392687461e-06, + "loss": 23.4222, "step": 500 }, { - "epoch": 0.031052591521424765, - "grad_norm": 642.0, - "learning_rate": 7.996118448216483e-06, - "loss": 25.1364, + "epoch": 0.06210518304284953, + "grad_norm": 476.0, + "learning_rate": 9.99029612054121e-06, + "loss": 23.4822, "step": 510 }, { - "epoch": 0.03166146586498211, - "grad_norm": 587.5, - "learning_rate": 7.996042339357983e-06, - "loss": 25.345, + "epoch": 0.06332293172996423, + "grad_norm": 464.5, + "learning_rate": 9.990105848394959e-06, + "loss": 23.2711, "step": 520 }, { - "epoch": 0.032270340208539465, - "grad_norm": 807.5, - "learning_rate": 7.995966230499482e-06, - "loss": 25.1371, + "epoch": 0.06454068041707893, + "grad_norm": 490.25, + "learning_rate": 9.98991557624871e-06, + "loss": 23.3672, "step": 530 }, { - "epoch": 0.03287921455209681, - "grad_norm": 639.5, - "learning_rate": 7.995890121640982e-06, - "loss": 25.2102, + "epoch": 0.06575842910419362, + "grad_norm": 313.5, + "learning_rate": 9.989725304102459e-06, + "loss": 23.0706, "step": 540 }, { - "epoch": 0.03348808889565416, - "grad_norm": 463.75, - "learning_rate": 7.995814012782483e-06, - "loss": 25.1115, + "epoch": 0.06697617779130832, + "grad_norm": 336.25, + "learning_rate": 9.989535031956207e-06, + "loss": 23.1567, "step": 550 }, { - "epoch": 0.034096963239211506, - "grad_norm": 696.5, - "learning_rate": 7.995737903923983e-06, - "loss": 25.3349, + "epoch": 0.06819392647842301, + "grad_norm": 722.0, + "learning_rate": 9.989344759809956e-06, + "loss": 23.3509, "step": 560 }, { - "epoch": 0.03470583758276886, - "grad_norm": 544.0, - "learning_rate": 7.995661795065482e-06, - "loss": 25.0044, + "epoch": 0.06941167516553771, + "grad_norm": 372.0, + "learning_rate": 9.989154487663705e-06, + "loss": 23.3139, "step": 570 }, { - "epoch": 0.0353147119263262, - "grad_norm": 610.0, - "learning_rate": 7.995585686206982e-06, - "loss": 24.9601, + "epoch": 0.0706294238526524, + "grad_norm": 320.0, + "learning_rate": 9.988964215517456e-06, + "loss": 23.0371, "step": 580 }, { - "epoch": 0.03592358626988355, - "grad_norm": 445.25, - "learning_rate": 7.995509577348481e-06, - "loss": 24.8361, + "epoch": 0.0718471725397671, + "grad_norm": 499.0, + "learning_rate": 9.988773943371205e-06, + "loss": 23.176, "step": 590 }, { - "epoch": 0.0365324606134409, - "grad_norm": 562.5, - "learning_rate": 7.995433468489981e-06, - "loss": 24.9502, + "epoch": 0.0730649212268818, + "grad_norm": 367.5, + "learning_rate": 9.988583671224953e-06, + "loss": 23.1789, "step": 600 }, { - "epoch": 0.03714133495699825, - "grad_norm": 668.0, - "learning_rate": 7.99535735963148e-06, - "loss": 25.0697, + "epoch": 0.0742826699139965, + "grad_norm": 473.0, + "learning_rate": 9.988393399078702e-06, + "loss": 23.4324, "step": 610 }, { - "epoch": 0.0377502093005556, - "grad_norm": 510.75, - "learning_rate": 7.99528125077298e-06, - "loss": 24.9929, + "epoch": 0.0755004186011112, + "grad_norm": 328.0, + "learning_rate": 9.988203126932451e-06, + "loss": 23.2466, "step": 620 }, { - "epoch": 0.038359083644112946, - "grad_norm": 512.0, - "learning_rate": 7.99520514191448e-06, - "loss": 25.3818, + "epoch": 0.07671816728822589, + "grad_norm": 391.75, + "learning_rate": 9.988012854786202e-06, + "loss": 22.8369, "step": 630 }, { - "epoch": 0.0389679579876703, - "grad_norm": 408.25, - "learning_rate": 7.99512903305598e-06, - "loss": 24.6347, + "epoch": 0.0779359159753406, + "grad_norm": 334.0, + "learning_rate": 9.98782258263995e-06, + "loss": 23.5017, "step": 640 }, { - "epoch": 0.03957683233122764, - "grad_norm": 444.75, - "learning_rate": 7.995052924197479e-06, - "loss": 24.6194, + "epoch": 0.07915366466245528, + "grad_norm": 367.5, + "learning_rate": 9.9876323104937e-06, + "loss": 22.9409, "step": 650 }, { - "epoch": 0.04018570667478499, - "grad_norm": 706.5, - "learning_rate": 7.99497681533898e-06, - "loss": 25.3, + "epoch": 0.08037141334956999, + "grad_norm": 318.75, + "learning_rate": 9.987442038347448e-06, + "loss": 23.3296, "step": 660 }, { - "epoch": 0.04079458101834234, - "grad_norm": 545.5, - "learning_rate": 7.994900706480478e-06, - "loss": 24.8079, + "epoch": 0.08158916203668468, + "grad_norm": 385.75, + "learning_rate": 9.987251766201197e-06, + "loss": 23.0806, "step": 670 }, { - "epoch": 0.04140345536189969, - "grad_norm": 568.5, - "learning_rate": 7.994824597621979e-06, - "loss": 24.6425, + "epoch": 0.08280691072379938, + "grad_norm": 339.25, + "learning_rate": 9.987061494054948e-06, + "loss": 23.2532, "step": 680 }, { - "epoch": 0.042012329705457034, - "grad_norm": 559.0, - "learning_rate": 7.994748488763477e-06, - "loss": 25.3957, + "epoch": 0.08402465941091407, + "grad_norm": 356.25, + "learning_rate": 9.986871221908697e-06, + "loss": 23.0229, "step": 690 }, { - "epoch": 0.042621204049014386, - "grad_norm": 505.75, - "learning_rate": 7.994672379904978e-06, - "loss": 25.0813, + "epoch": 0.08524240809802877, + "grad_norm": 536.0, + "learning_rate": 9.986680949762446e-06, + "loss": 23.0866, "step": 700 }, { - "epoch": 0.04323007839257173, - "grad_norm": 644.5, - "learning_rate": 7.994596271046478e-06, - "loss": 24.7936, + "epoch": 0.08646015678514346, + "grad_norm": 369.5, + "learning_rate": 9.986490677616194e-06, + "loss": 23.3175, "step": 710 }, { - "epoch": 0.04383895273612908, - "grad_norm": 481.75, - "learning_rate": 7.994520162187977e-06, - "loss": 24.94, + "epoch": 0.08767790547225816, + "grad_norm": 388.5, + "learning_rate": 9.986300405469945e-06, + "loss": 23.0809, "step": 720 }, { - "epoch": 0.044447827079686426, - "grad_norm": 673.0, - "learning_rate": 7.994444053329477e-06, - "loss": 24.5047, + "epoch": 0.08889565415937285, + "grad_norm": 360.0, + "learning_rate": 9.986110133323694e-06, + "loss": 23.3686, "step": 730 }, { - "epoch": 0.04505670142324378, - "grad_norm": 473.5, - "learning_rate": 7.994367944470976e-06, - "loss": 24.8956, + "epoch": 0.09011340284648756, + "grad_norm": 321.75, + "learning_rate": 9.985919861177443e-06, + "loss": 23.0928, "step": 740 }, { - "epoch": 0.04566557576680113, - "grad_norm": 532.0, - "learning_rate": 7.994291835612476e-06, - "loss": 24.5649, + "epoch": 0.09133115153360226, + "grad_norm": 393.5, + "learning_rate": 9.985729589031192e-06, + "loss": 23.2937, "step": 750 }, { - "epoch": 0.046274450110358474, - "grad_norm": 395.5, - "learning_rate": 7.994215726753975e-06, - "loss": 24.8702, + "epoch": 0.09254890022071695, + "grad_norm": 676.5, + "learning_rate": 9.98553931688494e-06, + "loss": 23.462, "step": 760 }, { - "epoch": 0.046883324453915826, - "grad_norm": 437.5, - "learning_rate": 7.994139617895475e-06, - "loss": 24.6667, + "epoch": 0.09376664890783165, + "grad_norm": 332.0, + "learning_rate": 9.985349044738691e-06, + "loss": 22.8664, "step": 770 }, { - "epoch": 0.04749219879747317, - "grad_norm": 467.0, - "learning_rate": 7.994063509036976e-06, - "loss": 24.1927, + "epoch": 0.09498439759494634, + "grad_norm": 316.25, + "learning_rate": 9.98515877259244e-06, + "loss": 23.0369, "step": 780 }, { - "epoch": 0.04810107314103052, - "grad_norm": 475.5, - "learning_rate": 7.993987400178474e-06, - "loss": 25.1248, + "epoch": 0.09620214628206104, + "grad_norm": 318.5, + "learning_rate": 9.984968500446189e-06, + "loss": 23.437, "step": 790 }, { - "epoch": 0.048709947484587866, - "grad_norm": 656.0, - "learning_rate": 7.993911291319975e-06, - "loss": 24.3436, + "epoch": 0.09741989496917573, + "grad_norm": 612.0, + "learning_rate": 9.984778228299938e-06, + "loss": 22.9744, "step": 800 }, { - "epoch": 0.04931882182814522, - "grad_norm": 392.0, - "learning_rate": 7.993835182461475e-06, - "loss": 24.6863, + "epoch": 0.09863764365629044, + "grad_norm": 397.0, + "learning_rate": 9.984587956153687e-06, + "loss": 22.9221, "step": 810 }, { - "epoch": 0.04992769617170256, - "grad_norm": 549.5, - "learning_rate": 7.993759073602974e-06, - "loss": 24.9492, + "epoch": 0.09985539234340512, + "grad_norm": 531.5, + "learning_rate": 9.984397684007437e-06, + "loss": 23.1225, "step": 820 }, { - "epoch": 0.050536570515259914, - "grad_norm": 472.5, - "learning_rate": 7.993682964744474e-06, - "loss": 24.5096, + "epoch": 0.10107314103051983, + "grad_norm": 485.75, + "learning_rate": 9.984207411861186e-06, + "loss": 22.9605, "step": 830 }, { - "epoch": 0.05114544485881726, - "grad_norm": 654.5, - "learning_rate": 7.993606855885974e-06, - "loss": 24.7159, + "epoch": 0.10229088971763452, + "grad_norm": 439.25, + "learning_rate": 9.984017139714935e-06, + "loss": 22.8921, "step": 840 }, { - "epoch": 0.05175431920237461, - "grad_norm": 578.5, - "learning_rate": 7.993530747027473e-06, - "loss": 24.7425, + "epoch": 0.10350863840474922, + "grad_norm": 614.5, + "learning_rate": 9.983826867568684e-06, + "loss": 23.1956, "step": 850 }, { - "epoch": 0.05236319354593196, - "grad_norm": 539.0, - "learning_rate": 7.993454638168973e-06, - "loss": 24.9575, + "epoch": 0.10472638709186392, + "grad_norm": 384.5, + "learning_rate": 9.983636595422434e-06, + "loss": 23.2236, "step": 860 }, { - "epoch": 0.052972067889489306, - "grad_norm": 798.0, - "learning_rate": 7.993378529310472e-06, - "loss": 24.9244, + "epoch": 0.10594413577897861, + "grad_norm": 442.75, + "learning_rate": 9.983446323276183e-06, + "loss": 22.8962, "step": 870 }, { - "epoch": 0.05358094223304666, - "grad_norm": 388.5, - "learning_rate": 7.993302420451972e-06, - "loss": 24.9071, + "epoch": 0.10716188446609332, + "grad_norm": 438.0, + "learning_rate": 9.983256051129932e-06, + "loss": 23.0436, "step": 880 }, { - "epoch": 0.054189816576604, - "grad_norm": 604.0, - "learning_rate": 7.993226311593471e-06, - "loss": 24.7988, + "epoch": 0.108379633153208, + "grad_norm": 402.75, + "learning_rate": 9.983065778983681e-06, + "loss": 23.3182, "step": 890 }, { - "epoch": 0.054798690920161354, - "grad_norm": 668.0, - "learning_rate": 7.993150202734971e-06, - "loss": 24.9017, + "epoch": 0.10959738184032271, + "grad_norm": 758.5, + "learning_rate": 9.98287550683743e-06, + "loss": 22.9257, "step": 900 }, { - "epoch": 0.0554075652637187, - "grad_norm": 607.5, - "learning_rate": 7.993074093876472e-06, - "loss": 24.6064, + "epoch": 0.1108151305274374, + "grad_norm": 880.5, + "learning_rate": 9.98268523469118e-06, + "loss": 23.1712, "step": 910 }, { - "epoch": 0.05601643960727605, - "grad_norm": 387.75, - "learning_rate": 7.99299798501797e-06, - "loss": 24.7402, + "epoch": 0.1120328792145521, + "grad_norm": 281.25, + "learning_rate": 9.98249496254493e-06, + "loss": 22.8172, "step": 920 }, { - "epoch": 0.056625313950833395, - "grad_norm": 582.0, - "learning_rate": 7.992921876159471e-06, - "loss": 24.5883, + "epoch": 0.11325062790166679, + "grad_norm": 413.0, + "learning_rate": 9.982304690398678e-06, + "loss": 23.3211, "step": 930 }, { - "epoch": 0.057234188294390746, - "grad_norm": 510.75, - "learning_rate": 7.99284576730097e-06, - "loss": 24.2875, + "epoch": 0.11446837658878149, + "grad_norm": 769.5, + "learning_rate": 9.982114418252427e-06, + "loss": 22.8698, "step": 940 }, { - "epoch": 0.05784306263794809, - "grad_norm": 480.0, - "learning_rate": 7.99276965844247e-06, - "loss": 24.6182, + "epoch": 0.11568612527589618, + "grad_norm": 406.75, + "learning_rate": 9.981924146106178e-06, + "loss": 22.8464, "step": 950 }, { - "epoch": 0.05845193698150544, - "grad_norm": 494.0, - "learning_rate": 7.99269354958397e-06, - "loss": 24.3947, + "epoch": 0.11690387396301088, + "grad_norm": 372.75, + "learning_rate": 9.981733873959926e-06, + "loss": 23.105, "step": 960 }, { - "epoch": 0.05906081132506279, - "grad_norm": 717.0, - "learning_rate": 7.99261744072547e-06, - "loss": 24.9133, + "epoch": 0.11812162265012557, + "grad_norm": 348.0, + "learning_rate": 9.981543601813675e-06, + "loss": 23.2349, "step": 970 }, { - "epoch": 0.05966968566862014, - "grad_norm": 412.0, - "learning_rate": 7.992541331866969e-06, - "loss": 24.6561, + "epoch": 0.11933937133724028, + "grad_norm": 261.75, + "learning_rate": 9.981353329667424e-06, + "loss": 23.0146, "step": 980 }, { - "epoch": 0.06027856001217749, - "grad_norm": 524.0, - "learning_rate": 7.992465223008469e-06, - "loss": 24.5113, + "epoch": 0.12055712002435498, + "grad_norm": 516.5, + "learning_rate": 9.981163057521173e-06, + "loss": 22.9435, "step": 990 }, { - "epoch": 0.060887434355734835, - "grad_norm": 512.5, - "learning_rate": 7.992389114149968e-06, - "loss": 24.9458, + "epoch": 0.12177486871146967, + "grad_norm": 293.0, + "learning_rate": 9.980972785374924e-06, + "loss": 23.3143, "step": 1000 }, { - "epoch": 0.061496308699292186, - "grad_norm": 686.0, - "learning_rate": 7.99231300529147e-06, - "loss": 24.7602, + "epoch": 0.12299261739858437, + "grad_norm": 834.0, + "learning_rate": 9.980782513228672e-06, + "loss": 23.1654, "step": 1010 }, { - "epoch": 0.06210518304284953, - "grad_norm": 448.25, - "learning_rate": 7.992236896432967e-06, - "loss": 24.7965, + "epoch": 0.12421036608569906, + "grad_norm": 313.25, + "learning_rate": 9.980592241082421e-06, + "loss": 22.9153, "step": 1020 }, { - "epoch": 0.06271405738640688, - "grad_norm": 674.0, - "learning_rate": 7.992160787574468e-06, - "loss": 24.6205, + "epoch": 0.12542811477281376, + "grad_norm": 432.0, + "learning_rate": 9.98040196893617e-06, + "loss": 22.8277, "step": 1030 }, { - "epoch": 0.06332293172996423, - "grad_norm": 418.75, - "learning_rate": 7.992084678715966e-06, - "loss": 24.7116, + "epoch": 0.12664586345992845, + "grad_norm": 883.0, + "learning_rate": 9.980211696789919e-06, + "loss": 22.8528, "step": 1040 }, { - "epoch": 0.06393180607352157, - "grad_norm": 504.75, - "learning_rate": 7.992008569857467e-06, - "loss": 24.7183, + "epoch": 0.12786361214704314, + "grad_norm": 351.75, + "learning_rate": 9.98002142464367e-06, + "loss": 22.4458, "step": 1050 }, { - "epoch": 0.06454068041707893, - "grad_norm": 633.5, - "learning_rate": 7.991932460998967e-06, - "loss": 24.7446, + "epoch": 0.12908136083415786, + "grad_norm": 322.0, + "learning_rate": 9.979831152497418e-06, + "loss": 22.9115, "step": 1060 }, { - "epoch": 0.06514955476063627, - "grad_norm": 604.0, - "learning_rate": 7.991856352140467e-06, - "loss": 24.362, + "epoch": 0.13029910952127255, + "grad_norm": 345.5, + "learning_rate": 9.979640880351167e-06, + "loss": 23.1693, "step": 1070 }, { - "epoch": 0.06575842910419362, - "grad_norm": 507.25, - "learning_rate": 7.991780243281966e-06, - "loss": 24.3108, + "epoch": 0.13151685820838724, + "grad_norm": 595.5, + "learning_rate": 9.979450608204916e-06, + "loss": 23.0466, "step": 1080 }, { - "epoch": 0.06636730344775096, - "grad_norm": 382.0, - "learning_rate": 7.991704134423466e-06, - "loss": 24.6816, + "epoch": 0.13273460689550193, + "grad_norm": 890.5, + "learning_rate": 9.979260336058667e-06, + "loss": 23.039, "step": 1090 }, { - "epoch": 0.06697617779130832, - "grad_norm": 436.25, - "learning_rate": 7.991628025564965e-06, - "loss": 24.4788, + "epoch": 0.13395235558261664, + "grad_norm": 331.75, + "learning_rate": 9.979070063912416e-06, + "loss": 22.9233, "step": 1100 }, { - "epoch": 0.06758505213486567, - "grad_norm": 421.5, - "learning_rate": 7.991551916706465e-06, - "loss": 24.6223, + "epoch": 0.13517010426973133, + "grad_norm": 599.5, + "learning_rate": 9.978879791766165e-06, + "loss": 22.7559, "step": 1110 }, { - "epoch": 0.06819392647842301, - "grad_norm": 438.5, - "learning_rate": 7.991475807847964e-06, - "loss": 24.7549, + "epoch": 0.13638785295684602, + "grad_norm": 346.0, + "learning_rate": 9.978689519619913e-06, + "loss": 23.0462, "step": 1120 }, { - "epoch": 0.06880280082198037, - "grad_norm": 501.5, - "learning_rate": 7.991399698989464e-06, - "loss": 24.5644, + "epoch": 0.13760560164396074, + "grad_norm": 637.5, + "learning_rate": 9.978499247473662e-06, + "loss": 22.9928, "step": 1130 }, { - "epoch": 0.06941167516553771, - "grad_norm": 597.0, - "learning_rate": 7.991323590130963e-06, - "loss": 24.3507, + "epoch": 0.13882335033107543, + "grad_norm": 255.875, + "learning_rate": 9.978308975327413e-06, + "loss": 22.8982, "step": 1140 }, { - "epoch": 0.07002054950909506, - "grad_norm": 446.75, - "learning_rate": 7.991247481272463e-06, - "loss": 24.456, + "epoch": 0.14004109901819012, + "grad_norm": 487.0, + "learning_rate": 9.978118703181162e-06, + "loss": 23.2006, "step": 1150 }, { - "epoch": 0.0706294238526524, - "grad_norm": 480.0, - "learning_rate": 7.991171372413964e-06, - "loss": 24.1804, + "epoch": 0.1412588477053048, + "grad_norm": 427.25, + "learning_rate": 9.97792843103491e-06, + "loss": 23.2043, "step": 1160 }, { - "epoch": 0.07123829819620976, - "grad_norm": 483.25, - "learning_rate": 7.991095263555464e-06, - "loss": 24.2766, + "epoch": 0.14247659639241952, + "grad_norm": 925.0, + "learning_rate": 9.97773815888866e-06, + "loss": 22.6858, "step": 1170 }, { - "epoch": 0.0718471725397671, - "grad_norm": 876.0, - "learning_rate": 7.991019154696963e-06, - "loss": 24.5967, + "epoch": 0.1436943450795342, + "grad_norm": 317.0, + "learning_rate": 9.977547886742408e-06, + "loss": 22.8387, "step": 1180 }, { - "epoch": 0.07245604688332445, - "grad_norm": 576.5, - "learning_rate": 7.990943045838463e-06, - "loss": 24.5191, + "epoch": 0.1449120937666489, + "grad_norm": 462.75, + "learning_rate": 9.977357614596159e-06, + "loss": 22.6462, "step": 1190 }, { - "epoch": 0.0730649212268818, - "grad_norm": 398.25, - "learning_rate": 7.990866936979962e-06, - "loss": 24.2136, + "epoch": 0.1461298424537636, + "grad_norm": 527.0, + "learning_rate": 9.977167342449908e-06, + "loss": 23.044, "step": 1200 }, { - "epoch": 0.07367379557043915, - "grad_norm": 626.5, - "learning_rate": 7.990790828121462e-06, - "loss": 24.7589, + "epoch": 0.1473475911408783, + "grad_norm": 310.75, + "learning_rate": 9.976977070303657e-06, + "loss": 23.0414, "step": 1210 }, { - "epoch": 0.0742826699139965, - "grad_norm": 478.75, - "learning_rate": 7.990714719262962e-06, - "loss": 24.5127, + "epoch": 0.148565339827993, + "grad_norm": 364.75, + "learning_rate": 9.976786798157406e-06, + "loss": 22.6991, "step": 1220 }, { - "epoch": 0.07489154425755384, - "grad_norm": 526.0, - "learning_rate": 7.990638610404461e-06, - "loss": 24.5955, + "epoch": 0.1497830885151077, + "grad_norm": 349.0, + "learning_rate": 9.976596526011154e-06, + "loss": 22.9886, "step": 1230 }, { - "epoch": 0.0755004186011112, - "grad_norm": 501.75, - "learning_rate": 7.99056250154596e-06, - "loss": 24.5776, + "epoch": 0.1510008372022224, + "grad_norm": 354.75, + "learning_rate": 9.976406253864905e-06, + "loss": 23.014, "step": 1240 }, { - "epoch": 0.07610929294466855, - "grad_norm": 440.5, - "learning_rate": 7.99048639268746e-06, - "loss": 24.0178, + "epoch": 0.1522185858893371, + "grad_norm": 353.25, + "learning_rate": 9.976215981718654e-06, + "loss": 22.8259, "step": 1250 }, { - "epoch": 0.07671816728822589, - "grad_norm": 479.75, - "learning_rate": 7.99041028382896e-06, - "loss": 24.2665, + "epoch": 0.15343633457645178, + "grad_norm": 304.5, + "learning_rate": 9.976025709572403e-06, + "loss": 22.9747, "step": 1260 }, { - "epoch": 0.07732704163178324, - "grad_norm": 483.5, - "learning_rate": 7.99033417497046e-06, - "loss": 24.4262, + "epoch": 0.15465408326356647, + "grad_norm": 305.5, + "learning_rate": 9.975835437426152e-06, + "loss": 22.9474, "step": 1270 }, { - "epoch": 0.0779359159753406, - "grad_norm": 407.25, - "learning_rate": 7.99025806611196e-06, - "loss": 24.3286, + "epoch": 0.1558718319506812, + "grad_norm": 372.25, + "learning_rate": 9.9756451652799e-06, + "loss": 23.1447, "step": 1280 }, { - "epoch": 0.07854479031889794, - "grad_norm": 450.5, - "learning_rate": 7.990181957253458e-06, - "loss": 24.3012, + "epoch": 0.15708958063779588, + "grad_norm": 313.5, + "learning_rate": 9.975454893133651e-06, + "loss": 22.9684, "step": 1290 }, { - "epoch": 0.07915366466245528, - "grad_norm": 677.5, - "learning_rate": 7.99010584839496e-06, - "loss": 24.7612, + "epoch": 0.15830732932491057, + "grad_norm": 352.5, + "learning_rate": 9.9752646209874e-06, + "loss": 22.856, "step": 1300 }, { - "epoch": 0.07976253900601263, - "grad_norm": 843.5, - "learning_rate": 7.990029739536457e-06, - "loss": 24.9348, + "epoch": 0.15952507801202526, + "grad_norm": 464.0, + "learning_rate": 9.975074348841149e-06, + "loss": 22.9834, "step": 1310 }, { - "epoch": 0.08037141334956999, - "grad_norm": 539.0, - "learning_rate": 7.989953630677959e-06, - "loss": 24.5834, + "epoch": 0.16074282669913997, + "grad_norm": 501.75, + "learning_rate": 9.974884076694898e-06, + "loss": 22.9727, "step": 1320 }, { - "epoch": 0.08098028769312733, - "grad_norm": 495.25, - "learning_rate": 7.989877521819458e-06, - "loss": 24.1917, + "epoch": 0.16196057538625466, + "grad_norm": 321.75, + "learning_rate": 9.974693804548646e-06, + "loss": 22.6032, "step": 1330 }, { - "epoch": 0.08158916203668468, - "grad_norm": 636.5, - "learning_rate": 7.989801412960958e-06, - "loss": 24.5528, + "epoch": 0.16317832407336935, + "grad_norm": 412.25, + "learning_rate": 9.974503532402397e-06, + "loss": 23.0908, "step": 1340 }, { - "epoch": 0.08219803638024203, - "grad_norm": 637.5, - "learning_rate": 7.989725304102457e-06, - "loss": 24.2443, + "epoch": 0.16439607276048407, + "grad_norm": 368.75, + "learning_rate": 9.974313260256146e-06, + "loss": 22.9596, "step": 1350 }, { - "epoch": 0.08280691072379938, - "grad_norm": 475.25, - "learning_rate": 7.989649195243957e-06, - "loss": 24.4378, + "epoch": 0.16561382144759876, + "grad_norm": 527.5, + "learning_rate": 9.974122988109895e-06, + "loss": 22.9562, "step": 1360 }, { - "epoch": 0.08341578506735672, - "grad_norm": 462.0, - "learning_rate": 7.989573086385456e-06, - "loss": 24.1952, + "epoch": 0.16683157013471345, + "grad_norm": 470.0, + "learning_rate": 9.973932715963644e-06, + "loss": 23.0617, "step": 1370 }, { - "epoch": 0.08402465941091407, - "grad_norm": 612.0, - "learning_rate": 7.989496977526956e-06, - "loss": 23.8663, + "epoch": 0.16804931882182814, + "grad_norm": 428.75, + "learning_rate": 9.973742443817393e-06, + "loss": 22.7603, "step": 1380 }, { - "epoch": 0.08463353375447143, - "grad_norm": 378.5, - "learning_rate": 7.989420868668456e-06, - "loss": 24.0396, + "epoch": 0.16926706750894285, + "grad_norm": 380.25, + "learning_rate": 9.973552171671143e-06, + "loss": 22.7434, "step": 1390 }, { - "epoch": 0.08524240809802877, - "grad_norm": 525.0, - "learning_rate": 7.989344759809955e-06, - "loss": 24.4567, + "epoch": 0.17048481619605754, + "grad_norm": 467.0, + "learning_rate": 9.973361899524892e-06, + "loss": 23.0467, "step": 1400 }, { - "epoch": 0.08585128244158612, - "grad_norm": 436.0, - "learning_rate": 7.989268650951455e-06, - "loss": 24.6169, + "epoch": 0.17170256488317223, + "grad_norm": 307.0, + "learning_rate": 9.97317162737864e-06, + "loss": 22.8621, "step": 1410 }, { - "epoch": 0.08646015678514346, - "grad_norm": 735.5, - "learning_rate": 7.989192542092956e-06, - "loss": 24.3255, + "epoch": 0.17292031357028692, + "grad_norm": 846.0, + "learning_rate": 9.97298135523239e-06, + "loss": 22.9859, "step": 1420 }, { - "epoch": 0.08706903112870082, - "grad_norm": 490.75, - "learning_rate": 7.989116433234455e-06, - "loss": 24.4478, + "epoch": 0.17413806225740164, + "grad_norm": 398.75, + "learning_rate": 9.972791083086139e-06, + "loss": 22.7079, "step": 1430 }, { - "epoch": 0.08767790547225816, - "grad_norm": 503.5, - "learning_rate": 7.989040324375955e-06, - "loss": 23.8608, + "epoch": 0.17535581094451633, + "grad_norm": 673.5, + "learning_rate": 9.972600810939889e-06, + "loss": 22.8856, "step": 1440 }, { - "epoch": 0.08828677981581551, - "grad_norm": 504.5, - "learning_rate": 7.988964215517455e-06, - "loss": 24.5023, + "epoch": 0.17657355963163102, + "grad_norm": 614.0, + "learning_rate": 9.972410538793638e-06, + "loss": 22.9402, "step": 1450 }, { - "epoch": 0.08889565415937285, - "grad_norm": 475.0, - "learning_rate": 7.988888106658954e-06, - "loss": 24.4643, + "epoch": 0.1777913083187457, + "grad_norm": 453.75, + "learning_rate": 9.972220266647387e-06, + "loss": 22.5813, "step": 1460 }, { - "epoch": 0.08950452850293021, - "grad_norm": 429.75, - "learning_rate": 7.988811997800454e-06, - "loss": 24.4112, + "epoch": 0.17900905700586042, + "grad_norm": 265.75, + "learning_rate": 9.972029994501136e-06, + "loss": 23.0499, "step": 1470 }, { - "epoch": 0.09011340284648756, - "grad_norm": 474.25, - "learning_rate": 7.988735888941953e-06, - "loss": 24.0898, + "epoch": 0.1802268056929751, + "grad_norm": 368.75, + "learning_rate": 9.971839722354885e-06, + "loss": 23.0325, "step": 1480 }, { - "epoch": 0.0907222771900449, - "grad_norm": 535.0, - "learning_rate": 7.988659780083453e-06, - "loss": 24.243, + "epoch": 0.1814445543800898, + "grad_norm": 324.25, + "learning_rate": 9.971649450208635e-06, + "loss": 22.9105, "step": 1490 }, { - "epoch": 0.09133115153360226, - "grad_norm": 396.75, - "learning_rate": 7.988583671224952e-06, - "loss": 24.9429, + "epoch": 0.18266230306720452, + "grad_norm": 335.25, + "learning_rate": 9.971459178062384e-06, + "loss": 23.0539, "step": 1500 }, { - "epoch": 0.0919400258771596, - "grad_norm": 553.0, - "learning_rate": 7.988507562366452e-06, - "loss": 24.6321, + "epoch": 0.1838800517543192, + "grad_norm": 533.0, + "learning_rate": 9.971268905916133e-06, + "loss": 22.8024, "step": 1510 }, { - "epoch": 0.09254890022071695, - "grad_norm": 550.0, - "learning_rate": 7.988431453507951e-06, - "loss": 24.4815, + "epoch": 0.1850978004414339, + "grad_norm": 259.0, + "learning_rate": 9.971078633769882e-06, + "loss": 23.0791, "step": 1520 }, { - "epoch": 0.09315777456427429, - "grad_norm": 547.5, - "learning_rate": 7.988355344649453e-06, - "loss": 24.044, + "epoch": 0.18631554912854859, + "grad_norm": 435.25, + "learning_rate": 9.97088836162363e-06, + "loss": 22.7367, "step": 1530 }, { - "epoch": 0.09376664890783165, - "grad_norm": 494.25, - "learning_rate": 7.98827923579095e-06, - "loss": 23.9887, + "epoch": 0.1875332978156633, + "grad_norm": 573.0, + "learning_rate": 9.970698089477381e-06, + "loss": 22.9542, "step": 1540 }, { - "epoch": 0.094375523251389, - "grad_norm": 438.25, - "learning_rate": 7.988203126932452e-06, - "loss": 24.1213, + "epoch": 0.188751046502778, + "grad_norm": 420.0, + "learning_rate": 9.97050781733113e-06, + "loss": 22.7956, "step": 1550 }, { - "epoch": 0.09498439759494634, - "grad_norm": 879.5, - "learning_rate": 7.98812701807395e-06, - "loss": 24.2047, + "epoch": 0.18996879518989268, + "grad_norm": 425.0, + "learning_rate": 9.970317545184879e-06, + "loss": 22.7087, "step": 1560 }, { - "epoch": 0.09559327193850368, - "grad_norm": 554.5, - "learning_rate": 7.98805090921545e-06, - "loss": 24.3719, + "epoch": 0.19118654387700737, + "grad_norm": 352.0, + "learning_rate": 9.970127273038628e-06, + "loss": 22.9419, "step": 1570 }, { - "epoch": 0.09620214628206104, - "grad_norm": 867.0, - "learning_rate": 7.98797480035695e-06, - "loss": 24.7275, + "epoch": 0.1924042925641221, + "grad_norm": 446.25, + "learning_rate": 9.969937000892377e-06, + "loss": 22.9542, "step": 1580 }, { - "epoch": 0.09681102062561839, - "grad_norm": 423.5, - "learning_rate": 7.98789869149845e-06, - "loss": 24.1115, + "epoch": 0.19362204125123678, + "grad_norm": 357.25, + "learning_rate": 9.969746728746127e-06, + "loss": 23.0573, "step": 1590 }, { - "epoch": 0.09741989496917573, - "grad_norm": 475.75, - "learning_rate": 7.98782258263995e-06, - "loss": 24.2442, + "epoch": 0.19483978993835147, + "grad_norm": 290.75, + "learning_rate": 9.969556456599876e-06, + "loss": 22.9406, "step": 1600 }, { - "epoch": 0.09802876931273309, - "grad_norm": 589.5, - "learning_rate": 7.987746473781449e-06, - "loss": 24.1252, + "epoch": 0.19605753862546618, + "grad_norm": 452.5, + "learning_rate": 9.969366184453625e-06, + "loss": 23.0729, "step": 1610 }, { - "epoch": 0.09863764365629044, - "grad_norm": 568.5, - "learning_rate": 7.987670364922949e-06, - "loss": 24.2393, + "epoch": 0.19727528731258087, + "grad_norm": 304.75, + "learning_rate": 9.969175912307374e-06, + "loss": 22.5812, "step": 1620 }, { - "epoch": 0.09924651799984778, - "grad_norm": 540.0, - "learning_rate": 7.987594256064448e-06, - "loss": 24.3589, + "epoch": 0.19849303599969556, + "grad_norm": 420.5, + "learning_rate": 9.968985640161123e-06, + "loss": 22.8672, "step": 1630 }, { - "epoch": 0.09985539234340512, - "grad_norm": 473.5, - "learning_rate": 7.987518147205948e-06, - "loss": 23.7216, + "epoch": 0.19971078468681025, + "grad_norm": 570.5, + "learning_rate": 9.968795368014873e-06, + "loss": 22.9808, "step": 1640 }, { - "epoch": 0.10046426668696248, - "grad_norm": 484.75, - "learning_rate": 7.987442038347449e-06, - "loss": 24.146, + "epoch": 0.20092853337392497, + "grad_norm": 497.25, + "learning_rate": 9.968605095868622e-06, + "loss": 22.8251, "step": 1650 }, { - "epoch": 0.10107314103051983, - "grad_norm": 521.0, - "learning_rate": 7.987365929488947e-06, - "loss": 23.83, + "epoch": 0.20214628206103966, + "grad_norm": 371.25, + "learning_rate": 9.968414823722371e-06, + "loss": 22.6911, "step": 1660 }, { - "epoch": 0.10168201537407717, - "grad_norm": 650.0, - "learning_rate": 7.987289820630448e-06, - "loss": 24.0998, + "epoch": 0.20336403074815435, + "grad_norm": 411.5, + "learning_rate": 9.96822455157612e-06, + "loss": 22.7651, "step": 1670 }, { - "epoch": 0.10229088971763452, - "grad_norm": 817.0, - "learning_rate": 7.987213711771948e-06, - "loss": 24.3715, + "epoch": 0.20458177943526903, + "grad_norm": 538.0, + "learning_rate": 9.968034279429869e-06, + "loss": 22.7192, "step": 1680 }, { - "epoch": 0.10289976406119188, - "grad_norm": 456.25, - "learning_rate": 7.987137602913447e-06, - "loss": 24.1391, + "epoch": 0.20579952812238375, + "grad_norm": 296.25, + "learning_rate": 9.96784400728362e-06, + "loss": 22.857, "step": 1690 }, { - "epoch": 0.10350863840474922, - "grad_norm": 491.25, - "learning_rate": 7.987061494054947e-06, - "loss": 24.3116, + "epoch": 0.20701727680949844, + "grad_norm": 442.25, + "learning_rate": 9.967653735137368e-06, + "loss": 22.7056, "step": 1700 }, { - "epoch": 0.10411751274830656, - "grad_norm": 486.0, - "learning_rate": 7.986985385196446e-06, - "loss": 24.544, + "epoch": 0.20823502549661313, + "grad_norm": 286.75, + "learning_rate": 9.967463462991117e-06, + "loss": 22.7335, "step": 1710 }, { - "epoch": 0.10472638709186392, - "grad_norm": 448.0, - "learning_rate": 7.986909276337946e-06, - "loss": 24.0699, + "epoch": 0.20945277418372785, + "grad_norm": 431.0, + "learning_rate": 9.967273190844866e-06, + "loss": 23.0438, "step": 1720 }, { - "epoch": 0.10533526143542127, - "grad_norm": 1032.0, - "learning_rate": 7.986833167479445e-06, - "loss": 24.0863, + "epoch": 0.21067052287084254, + "grad_norm": 418.5, + "learning_rate": 9.967082918698615e-06, + "loss": 22.9182, "step": 1730 }, { - "epoch": 0.10594413577897861, - "grad_norm": 666.5, - "learning_rate": 7.986757058620945e-06, - "loss": 23.7643, + "epoch": 0.21188827155795723, + "grad_norm": 573.0, + "learning_rate": 9.966892646552365e-06, + "loss": 22.9267, "step": 1740 }, { - "epoch": 0.10655301012253596, - "grad_norm": 818.0, - "learning_rate": 7.986680949762444e-06, - "loss": 23.8848, + "epoch": 0.21310602024507191, + "grad_norm": 338.25, + "learning_rate": 9.966702374406114e-06, + "loss": 22.9226, "step": 1750 }, { - "epoch": 0.10716188446609332, - "grad_norm": 592.5, - "learning_rate": 7.986604840903944e-06, - "loss": 23.5618, + "epoch": 0.21432376893218663, + "grad_norm": 380.0, + "learning_rate": 9.966512102259863e-06, + "loss": 22.5836, "step": 1760 }, { - "epoch": 0.10777075880965066, - "grad_norm": 452.0, - "learning_rate": 7.986528732045443e-06, - "loss": 23.7178, + "epoch": 0.21554151761930132, + "grad_norm": 365.75, + "learning_rate": 9.966321830113612e-06, + "loss": 22.8506, "step": 1770 }, { - "epoch": 0.108379633153208, - "grad_norm": 539.0, - "learning_rate": 7.986452623186945e-06, - "loss": 24.337, + "epoch": 0.216759266306416, + "grad_norm": 461.25, + "learning_rate": 9.966131557967361e-06, + "loss": 22.8096, "step": 1780 }, { - "epoch": 0.10898850749676535, - "grad_norm": 635.5, - "learning_rate": 7.986376514328443e-06, - "loss": 23.9829, + "epoch": 0.2179770149935307, + "grad_norm": 326.0, + "learning_rate": 9.96594128582111e-06, + "loss": 22.7708, "step": 1790 }, { - "epoch": 0.10959738184032271, - "grad_norm": 587.0, - "learning_rate": 7.986300405469944e-06, - "loss": 24.2237, + "epoch": 0.21919476368064542, + "grad_norm": 336.5, + "learning_rate": 9.96575101367486e-06, + "loss": 23.11, "step": 1800 }, { - "epoch": 0.11020625618388005, - "grad_norm": 507.75, - "learning_rate": 7.986224296611443e-06, - "loss": 24.1134, + "epoch": 0.2204125123677601, + "grad_norm": 400.25, + "learning_rate": 9.96556074152861e-06, + "loss": 22.8898, "step": 1810 }, { - "epoch": 0.1108151305274374, - "grad_norm": 391.75, - "learning_rate": 7.986148187752943e-06, - "loss": 24.2022, + "epoch": 0.2216302610548748, + "grad_norm": 372.0, + "learning_rate": 9.965370469382358e-06, + "loss": 22.9877, "step": 1820 }, { - "epoch": 0.11142400487099474, - "grad_norm": 654.5, - "learning_rate": 7.986072078894442e-06, - "loss": 23.9758, + "epoch": 0.22284800974198948, + "grad_norm": 330.75, + "learning_rate": 9.965180197236107e-06, + "loss": 23.0434, "step": 1830 }, { - "epoch": 0.1120328792145521, - "grad_norm": 389.25, - "learning_rate": 7.985995970035942e-06, - "loss": 23.7702, + "epoch": 0.2240657584291042, + "grad_norm": 302.75, + "learning_rate": 9.964989925089856e-06, + "loss": 23.0615, "step": 1840 }, { - "epoch": 0.11264175355810944, - "grad_norm": 525.5, - "learning_rate": 7.985919861177442e-06, - "loss": 23.9939, + "epoch": 0.2252835071162189, + "grad_norm": 489.25, + "learning_rate": 9.964799652943606e-06, + "loss": 22.9214, "step": 1850 }, { - "epoch": 0.11325062790166679, - "grad_norm": 944.5, - "learning_rate": 7.985843752318941e-06, - "loss": 23.9027, + "epoch": 0.22650125580333358, + "grad_norm": 355.75, + "learning_rate": 9.964609380797355e-06, + "loss": 22.929, "step": 1860 }, { - "epoch": 0.11385950224522415, - "grad_norm": 728.0, - "learning_rate": 7.98576764346044e-06, - "loss": 24.0871, + "epoch": 0.2277190044904483, + "grad_norm": 318.0, + "learning_rate": 9.964419108651104e-06, + "loss": 22.7594, "step": 1870 }, { - "epoch": 0.11446837658878149, - "grad_norm": 504.5, - "learning_rate": 7.985691534601942e-06, - "loss": 24.2737, + "epoch": 0.22893675317756298, + "grad_norm": 463.0, + "learning_rate": 9.964228836504853e-06, + "loss": 22.6917, "step": 1880 }, { - "epoch": 0.11507725093233884, - "grad_norm": 532.0, - "learning_rate": 7.98561542574344e-06, - "loss": 23.9491, + "epoch": 0.23015450186467767, + "grad_norm": 697.5, + "learning_rate": 9.964038564358602e-06, + "loss": 22.8248, "step": 1890 }, { - "epoch": 0.11568612527589618, - "grad_norm": 852.0, - "learning_rate": 7.985539316884941e-06, - "loss": 23.9135, + "epoch": 0.23137225055179236, + "grad_norm": 908.5, + "learning_rate": 9.963848292212352e-06, + "loss": 22.9424, "step": 1900 }, { - "epoch": 0.11629499961945354, - "grad_norm": 410.5, - "learning_rate": 7.985463208026439e-06, - "loss": 23.7667, + "epoch": 0.23258999923890708, + "grad_norm": 454.0, + "learning_rate": 9.963658020066101e-06, + "loss": 22.5561, "step": 1910 }, { - "epoch": 0.11690387396301088, - "grad_norm": 482.25, - "learning_rate": 7.98538709916794e-06, - "loss": 24.0657, + "epoch": 0.23380774792602177, + "grad_norm": 354.25, + "learning_rate": 9.96346774791985e-06, + "loss": 22.566, "step": 1920 }, { - "epoch": 0.11751274830656823, - "grad_norm": 513.0, - "learning_rate": 7.98531099030944e-06, - "loss": 24.1884, + "epoch": 0.23502549661313646, + "grad_norm": 351.5, + "learning_rate": 9.963277475773599e-06, + "loss": 22.8295, "step": 1930 }, { - "epoch": 0.11812162265012557, - "grad_norm": 404.75, - "learning_rate": 7.98523488145094e-06, - "loss": 23.8655, + "epoch": 0.23624324530025115, + "grad_norm": 306.5, + "learning_rate": 9.963087203627348e-06, + "loss": 22.791, "step": 1940 }, { - "epoch": 0.11873049699368293, - "grad_norm": 415.25, - "learning_rate": 7.985158772592439e-06, - "loss": 23.8893, + "epoch": 0.23746099398736586, + "grad_norm": 381.25, + "learning_rate": 9.962896931481099e-06, + "loss": 22.6901, "step": 1950 }, { - "epoch": 0.11933937133724028, - "grad_norm": 576.5, - "learning_rate": 7.985082663733938e-06, - "loss": 24.1121, + "epoch": 0.23867874267448055, + "grad_norm": 350.75, + "learning_rate": 9.962706659334847e-06, + "loss": 22.5385, "step": 1960 }, { - "epoch": 0.11994824568079762, - "grad_norm": 441.0, - "learning_rate": 7.985006554875438e-06, - "loss": 23.9494, + "epoch": 0.23989649136159524, + "grad_norm": 258.75, + "learning_rate": 9.962516387188596e-06, + "loss": 22.6354, "step": 1970 }, { - "epoch": 0.12055712002435498, - "grad_norm": 465.25, - "learning_rate": 7.984930446016937e-06, - "loss": 24.1016, + "epoch": 0.24111424004870996, + "grad_norm": 284.5, + "learning_rate": 9.962326115042345e-06, + "loss": 22.6697, "step": 1980 }, { - "epoch": 0.12116599436791232, - "grad_norm": 500.5, - "learning_rate": 7.984854337158437e-06, - "loss": 24.2461, + "epoch": 0.24233198873582465, + "grad_norm": 320.5, + "learning_rate": 9.962135842896094e-06, + "loss": 22.739, "step": 1990 }, { - "epoch": 0.12177486871146967, - "grad_norm": 538.0, - "learning_rate": 7.984778228299936e-06, - "loss": 24.4163, + "epoch": 0.24354973742293934, + "grad_norm": 541.0, + "learning_rate": 9.961945570749845e-06, + "loss": 22.7237, "step": 2000 }, { - "epoch": 0.12238374305502701, - "grad_norm": 428.25, - "learning_rate": 7.984702119441436e-06, - "loss": 23.7818, + "epoch": 0.24476748611005403, + "grad_norm": 392.0, + "learning_rate": 9.961755298603593e-06, + "loss": 22.7622, "step": 2010 }, { - "epoch": 0.12299261739858437, - "grad_norm": 441.5, - "learning_rate": 7.984626010582936e-06, - "loss": 23.9228, + "epoch": 0.24598523479716874, + "grad_norm": 703.5, + "learning_rate": 9.961565026457342e-06, + "loss": 22.6726, "step": 2020 }, { - "epoch": 0.12360149174214172, - "grad_norm": 537.0, - "learning_rate": 7.984549901724437e-06, - "loss": 23.7794, + "epoch": 0.24720298348428343, + "grad_norm": 384.25, + "learning_rate": 9.961374754311091e-06, + "loss": 22.7774, "step": 2030 }, { - "epoch": 0.12421036608569906, - "grad_norm": 434.5, - "learning_rate": 7.984473792865936e-06, - "loss": 23.7708, + "epoch": 0.24842073217139812, + "grad_norm": 415.5, + "learning_rate": 9.96118448216484e-06, + "loss": 23.0255, "step": 2040 }, { - "epoch": 0.1248192404292564, - "grad_norm": 494.75, - "learning_rate": 7.984397684007436e-06, - "loss": 23.8304, + "epoch": 0.2496384808585128, + "grad_norm": 519.5, + "learning_rate": 9.96099421001859e-06, + "loss": 22.9514, "step": 2050 }, { - "epoch": 0.12542811477281376, - "grad_norm": 446.5, - "learning_rate": 7.984321575148935e-06, - "loss": 23.8985, + "epoch": 0.25085622954562753, + "grad_norm": 437.75, + "learning_rate": 9.96080393787234e-06, + "loss": 22.6842, "step": 2060 }, { - "epoch": 0.1260369891163711, - "grad_norm": 368.75, - "learning_rate": 7.984245466290435e-06, - "loss": 23.742, + "epoch": 0.2520739782327422, + "grad_norm": 554.5, + "learning_rate": 9.960613665726088e-06, + "loss": 22.5928, "step": 2070 }, { - "epoch": 0.12664586345992845, - "grad_norm": 393.0, - "learning_rate": 7.984169357431935e-06, - "loss": 23.7806, + "epoch": 0.2532917269198569, + "grad_norm": 702.5, + "learning_rate": 9.960423393579837e-06, + "loss": 22.7435, "step": 2080 }, { - "epoch": 0.1272547378034858, - "grad_norm": 440.75, - "learning_rate": 7.984093248573434e-06, - "loss": 23.8871, + "epoch": 0.2545094756069716, + "grad_norm": 519.5, + "learning_rate": 9.960233121433586e-06, + "loss": 22.9607, "step": 2090 }, { - "epoch": 0.12786361214704314, - "grad_norm": 556.0, - "learning_rate": 7.984017139714934e-06, - "loss": 23.8715, + "epoch": 0.2557272242940863, + "grad_norm": 476.0, + "learning_rate": 9.960042849287337e-06, + "loss": 22.789, "step": 2100 }, { - "epoch": 0.1284724864906005, - "grad_norm": 496.5, - "learning_rate": 7.983941030856433e-06, - "loss": 23.5876, + "epoch": 0.256944972981201, + "grad_norm": 492.5, + "learning_rate": 9.959852577141086e-06, + "loss": 22.5661, "step": 2110 }, { - "epoch": 0.12908136083415786, - "grad_norm": 502.75, - "learning_rate": 7.983864921997933e-06, - "loss": 23.7611, + "epoch": 0.2581627216683157, + "grad_norm": 514.0, + "learning_rate": 9.959662304994834e-06, + "loss": 22.9395, "step": 2120 }, { - "epoch": 0.1296902351777152, - "grad_norm": 830.5, - "learning_rate": 7.983788813139434e-06, - "loss": 23.8297, + "epoch": 0.2593804703554304, + "grad_norm": 577.0, + "learning_rate": 9.959472032848583e-06, + "loss": 22.6245, "step": 2130 }, { - "epoch": 0.13029910952127255, - "grad_norm": 553.5, - "learning_rate": 7.983712704280932e-06, - "loss": 23.8472, + "epoch": 0.2605982190425451, + "grad_norm": 326.75, + "learning_rate": 9.959281760702332e-06, + "loss": 22.5902, "step": 2140 }, { - "epoch": 0.1309079838648299, - "grad_norm": 620.5, - "learning_rate": 7.983636595422433e-06, - "loss": 24.02, + "epoch": 0.2618159677296598, + "grad_norm": 367.25, + "learning_rate": 9.959091488556083e-06, + "loss": 22.6378, "step": 2150 }, { - "epoch": 0.13151685820838724, - "grad_norm": 799.5, - "learning_rate": 7.983560486563931e-06, - "loss": 23.7774, + "epoch": 0.2630337164167745, + "grad_norm": 365.0, + "learning_rate": 9.958901216409832e-06, + "loss": 22.8368, "step": 2160 }, { - "epoch": 0.1321257325519446, - "grad_norm": 459.25, - "learning_rate": 7.983484377705432e-06, - "loss": 23.7076, + "epoch": 0.2642514651038892, + "grad_norm": 293.25, + "learning_rate": 9.95871094426358e-06, + "loss": 22.6077, "step": 2170 }, { - "epoch": 0.13273460689550193, - "grad_norm": 484.25, - "learning_rate": 7.983408268846932e-06, - "loss": 24.0992, + "epoch": 0.26546921379100386, + "grad_norm": 734.5, + "learning_rate": 9.95852067211733e-06, + "loss": 22.5928, "step": 2180 }, { - "epoch": 0.13334348123905929, - "grad_norm": 392.5, - "learning_rate": 7.983332159988431e-06, - "loss": 23.6718, + "epoch": 0.26668696247811857, + "grad_norm": 480.75, + "learning_rate": 9.958330399971078e-06, + "loss": 22.7898, "step": 2190 }, { - "epoch": 0.13395235558261664, - "grad_norm": 460.25, - "learning_rate": 7.98325605112993e-06, - "loss": 24.0149, + "epoch": 0.2679047111652333, + "grad_norm": 564.5, + "learning_rate": 9.958140127824829e-06, + "loss": 23.1092, "step": 2200 }, { - "epoch": 0.13456122992617398, - "grad_norm": 403.0, - "learning_rate": 7.98317994227143e-06, - "loss": 23.6711, + "epoch": 0.26912245985234795, + "grad_norm": 371.0, + "learning_rate": 9.957949855678578e-06, + "loss": 22.576, "step": 2210 }, { - "epoch": 0.13517010426973133, - "grad_norm": 525.0, - "learning_rate": 7.98310383341293e-06, - "loss": 24.3896, + "epoch": 0.27034020853946267, + "grad_norm": 304.5, + "learning_rate": 9.957759583532327e-06, + "loss": 22.851, "step": 2220 }, { - "epoch": 0.1357789786132887, - "grad_norm": 585.5, - "learning_rate": 7.98302772455443e-06, - "loss": 23.6051, + "epoch": 0.2715579572265774, + "grad_norm": 324.25, + "learning_rate": 9.957569311386075e-06, + "loss": 22.7072, "step": 2230 }, { - "epoch": 0.13638785295684602, - "grad_norm": 563.0, - "learning_rate": 7.982951615695929e-06, - "loss": 23.8391, + "epoch": 0.27277570591369205, + "grad_norm": 293.5, + "learning_rate": 9.957379039239824e-06, + "loss": 22.6171, "step": 2240 }, { - "epoch": 0.13699672730040338, - "grad_norm": 456.0, - "learning_rate": 7.982875506837429e-06, - "loss": 23.7537, + "epoch": 0.27399345460080676, + "grad_norm": 600.0, + "learning_rate": 9.957188767093575e-06, + "loss": 22.5538, "step": 2250 }, { - "epoch": 0.13760560164396074, - "grad_norm": 476.0, - "learning_rate": 7.982799397978928e-06, - "loss": 23.8925, + "epoch": 0.2752112032879215, + "grad_norm": 322.75, + "learning_rate": 9.956998494947324e-06, + "loss": 23.0903, "step": 2260 }, { - "epoch": 0.13821447598751807, - "grad_norm": 430.75, - "learning_rate": 7.98272328912043e-06, - "loss": 23.7052, + "epoch": 0.27642895197503614, + "grad_norm": 437.0, + "learning_rate": 9.956808222801073e-06, + "loss": 22.719, "step": 2270 }, { - "epoch": 0.13882335033107543, - "grad_norm": 739.5, - "learning_rate": 7.982647180261929e-06, - "loss": 24.0857, + "epoch": 0.27764670066215086, + "grad_norm": 248.75, + "learning_rate": 9.956617950654821e-06, + "loss": 22.6685, "step": 2280 }, { - "epoch": 0.13943222467463276, - "grad_norm": 560.5, - "learning_rate": 7.982571071403428e-06, - "loss": 24.0643, + "epoch": 0.2788644493492655, + "grad_norm": 407.0, + "learning_rate": 9.95642767850857e-06, + "loss": 22.9552, "step": 2290 }, { - "epoch": 0.14004109901819012, - "grad_norm": 457.0, - "learning_rate": 7.982494962544928e-06, - "loss": 23.7443, + "epoch": 0.28008219803638024, + "grad_norm": 501.25, + "learning_rate": 9.956237406362321e-06, + "loss": 22.716, "step": 2300 }, { - "epoch": 0.14064997336174748, - "grad_norm": 705.0, - "learning_rate": 7.982418853686428e-06, - "loss": 23.7807, + "epoch": 0.28129994672349495, + "grad_norm": 466.25, + "learning_rate": 9.95604713421607e-06, + "loss": 22.9931, "step": 2310 }, { - "epoch": 0.1412588477053048, - "grad_norm": 491.75, - "learning_rate": 7.982342744827927e-06, - "loss": 23.6736, + "epoch": 0.2825176954106096, + "grad_norm": 345.5, + "learning_rate": 9.955856862069819e-06, + "loss": 22.9603, "step": 2320 }, { - "epoch": 0.14186772204886217, - "grad_norm": 507.25, - "learning_rate": 7.982266635969427e-06, - "loss": 23.3962, + "epoch": 0.28373544409772433, + "grad_norm": 375.75, + "learning_rate": 9.955666589923568e-06, + "loss": 22.9, "step": 2330 }, { - "epoch": 0.14247659639241952, - "grad_norm": 466.75, - "learning_rate": 7.982190527110926e-06, - "loss": 23.7991, + "epoch": 0.28495319278483905, + "grad_norm": 360.75, + "learning_rate": 9.955476317777318e-06, + "loss": 22.6346, "step": 2340 }, { - "epoch": 0.14308547073597686, - "grad_norm": 612.0, - "learning_rate": 7.982114418252426e-06, - "loss": 23.9104, + "epoch": 0.2861709414719537, + "grad_norm": 332.75, + "learning_rate": 9.955286045631067e-06, + "loss": 22.8878, "step": 2350 }, { - "epoch": 0.1436943450795342, - "grad_norm": 421.5, - "learning_rate": 7.982038309393925e-06, - "loss": 23.8134, + "epoch": 0.2873886901590684, + "grad_norm": 562.5, + "learning_rate": 9.955095773484816e-06, + "loss": 22.8146, "step": 2360 }, { - "epoch": 0.14430321942309157, - "grad_norm": 523.0, - "learning_rate": 7.981962200535425e-06, - "loss": 23.702, + "epoch": 0.28860643884618314, + "grad_norm": 595.5, + "learning_rate": 9.954905501338565e-06, + "loss": 22.6711, "step": 2370 }, { - "epoch": 0.1449120937666489, - "grad_norm": 736.5, - "learning_rate": 7.981886091676926e-06, - "loss": 23.6135, + "epoch": 0.2898241875332978, + "grad_norm": 425.25, + "learning_rate": 9.954715229192314e-06, + "loss": 22.8447, "step": 2380 }, { - "epoch": 0.14552096811020626, - "grad_norm": 848.0, - "learning_rate": 7.981809982818424e-06, - "loss": 23.8981, + "epoch": 0.2910419362204125, + "grad_norm": 633.0, + "learning_rate": 9.954524957046064e-06, + "loss": 22.6317, "step": 2390 }, { - "epoch": 0.1461298424537636, - "grad_norm": 489.75, - "learning_rate": 7.981733873959925e-06, - "loss": 23.5943, + "epoch": 0.2922596849075272, + "grad_norm": 336.25, + "learning_rate": 9.954334684899813e-06, + "loss": 22.7242, "step": 2400 }, { - "epoch": 0.14673871679732095, - "grad_norm": 519.5, - "learning_rate": 7.981657765101423e-06, - "loss": 23.3523, + "epoch": 0.2934774335946419, + "grad_norm": 445.0, + "learning_rate": 9.954144412753562e-06, + "loss": 22.8296, "step": 2410 }, { - "epoch": 0.1473475911408783, - "grad_norm": 467.25, - "learning_rate": 7.981581656242924e-06, - "loss": 23.8777, + "epoch": 0.2946951822817566, + "grad_norm": 339.0, + "learning_rate": 9.95395414060731e-06, + "loss": 22.6007, "step": 2420 }, { - "epoch": 0.14795646548443564, - "grad_norm": 443.0, - "learning_rate": 7.981505547384424e-06, - "loss": 23.6681, + "epoch": 0.2959129309688713, + "grad_norm": 393.75, + "learning_rate": 9.95376386846106e-06, + "loss": 22.9184, "step": 2430 }, { - "epoch": 0.148565339827993, - "grad_norm": 418.0, - "learning_rate": 7.981429438525923e-06, - "loss": 23.6622, + "epoch": 0.297130679655986, + "grad_norm": 611.0, + "learning_rate": 9.95357359631481e-06, + "loss": 22.8158, "step": 2440 }, { - "epoch": 0.14917421417155036, - "grad_norm": 469.75, - "learning_rate": 7.981353329667423e-06, - "loss": 23.9613, + "epoch": 0.2983484283431007, + "grad_norm": 264.5, + "learning_rate": 9.953383324168559e-06, + "loss": 22.4974, "step": 2450 }, { - "epoch": 0.1497830885151077, - "grad_norm": 475.5, - "learning_rate": 7.981277220808922e-06, - "loss": 24.2091, + "epoch": 0.2995661770302154, + "grad_norm": 269.25, + "learning_rate": 9.953193052022308e-06, + "loss": 22.5819, "step": 2460 }, { - "epoch": 0.15039196285866505, - "grad_norm": 469.5, - "learning_rate": 7.981201111950422e-06, - "loss": 23.5705, + "epoch": 0.3007839257173301, + "grad_norm": 363.25, + "learning_rate": 9.953002779876057e-06, + "loss": 22.5765, "step": 2470 }, { - "epoch": 0.1510008372022224, - "grad_norm": 524.0, - "learning_rate": 7.981125003091922e-06, - "loss": 23.7235, + "epoch": 0.3020016744044448, + "grad_norm": 340.25, + "learning_rate": 9.952812507729807e-06, + "loss": 22.8563, "step": 2480 }, { - "epoch": 0.15160971154577974, - "grad_norm": 470.0, - "learning_rate": 7.981048894233421e-06, - "loss": 23.7459, + "epoch": 0.30321942309155947, + "grad_norm": 342.0, + "learning_rate": 9.952622235583556e-06, + "loss": 23.1226, "step": 2490 }, { - "epoch": 0.1522185858893371, - "grad_norm": 483.25, - "learning_rate": 7.980972785374922e-06, - "loss": 23.2756, + "epoch": 0.3044371717786742, + "grad_norm": 340.5, + "learning_rate": 9.952431963437305e-06, + "loss": 22.5687, "step": 2500 }, { - "epoch": 0.15282746023289442, - "grad_norm": 397.0, - "learning_rate": 7.98089667651642e-06, - "loss": 23.3925, + "epoch": 0.30565492046578885, + "grad_norm": 392.5, + "learning_rate": 9.952241691291054e-06, + "loss": 22.8702, "step": 2510 }, { - "epoch": 0.15343633457645178, - "grad_norm": 465.25, - "learning_rate": 7.980820567657921e-06, - "loss": 23.8293, + "epoch": 0.30687266915290357, + "grad_norm": 364.25, + "learning_rate": 9.952051419144803e-06, + "loss": 22.6302, "step": 2520 }, { - "epoch": 0.15404520892000914, - "grad_norm": 519.5, - "learning_rate": 7.980744458799421e-06, - "loss": 24.0543, + "epoch": 0.3080904178400183, + "grad_norm": 549.0, + "learning_rate": 9.951861146998553e-06, + "loss": 23.2274, "step": 2530 }, { - "epoch": 0.15465408326356647, - "grad_norm": 450.25, - "learning_rate": 7.98066834994092e-06, - "loss": 23.3999, + "epoch": 0.30930816652713294, + "grad_norm": 630.5, + "learning_rate": 9.951670874852302e-06, + "loss": 22.7162, "step": 2540 }, { - "epoch": 0.15526295760712383, - "grad_norm": 621.5, - "learning_rate": 7.98059224108242e-06, - "loss": 23.8093, + "epoch": 0.31052591521424766, + "grad_norm": 472.0, + "learning_rate": 9.951480602706051e-06, + "loss": 22.4515, "step": 2550 }, { - "epoch": 0.1558718319506812, - "grad_norm": 370.75, - "learning_rate": 7.98051613222392e-06, - "loss": 24.1084, + "epoch": 0.3117436639013624, + "grad_norm": 550.5, + "learning_rate": 9.9512903305598e-06, + "loss": 22.7794, "step": 2560 }, { - "epoch": 0.15648070629423852, - "grad_norm": 705.0, - "learning_rate": 7.98044002336542e-06, - "loss": 23.6988, + "epoch": 0.31296141258847704, + "grad_norm": 411.25, + "learning_rate": 9.95110005841355e-06, + "loss": 22.6228, "step": 2570 }, { - "epoch": 0.15708958063779588, - "grad_norm": 492.25, - "learning_rate": 7.980363914506919e-06, - "loss": 23.6, + "epoch": 0.31417916127559176, + "grad_norm": 640.0, + "learning_rate": 9.9509097862673e-06, + "loss": 22.6809, "step": 2580 }, { - "epoch": 0.15769845498135324, - "grad_norm": 408.5, - "learning_rate": 7.980287805648418e-06, - "loss": 23.6126, + "epoch": 0.3153969099627065, + "grad_norm": 538.5, + "learning_rate": 9.950719514121048e-06, + "loss": 22.4981, "step": 2590 }, { - "epoch": 0.15830732932491057, - "grad_norm": 499.5, - "learning_rate": 7.980211696789918e-06, - "loss": 23.6962, + "epoch": 0.31661465864982113, + "grad_norm": 685.5, + "learning_rate": 9.950529241974797e-06, + "loss": 22.6211, "step": 2600 }, { - "epoch": 0.15891620366846793, - "grad_norm": 447.5, - "learning_rate": 7.980135587931417e-06, - "loss": 23.7325, + "epoch": 0.31783240733693585, + "grad_norm": 310.0, + "learning_rate": 9.950338969828546e-06, + "loss": 22.239, "step": 2610 }, { - "epoch": 0.15952507801202526, - "grad_norm": 390.75, - "learning_rate": 7.980059479072917e-06, - "loss": 23.474, + "epoch": 0.3190501560240505, + "grad_norm": 295.0, + "learning_rate": 9.950148697682297e-06, + "loss": 22.8624, "step": 2620 }, { - "epoch": 0.16013395235558261, - "grad_norm": 571.5, - "learning_rate": 7.979983370214418e-06, - "loss": 23.73, + "epoch": 0.32026790471116523, + "grad_norm": 528.5, + "learning_rate": 9.949958425536046e-06, + "loss": 22.7837, "step": 2630 }, { - "epoch": 0.16074282669913997, - "grad_norm": 501.5, - "learning_rate": 7.979907261355916e-06, - "loss": 23.5425, + "epoch": 0.32148565339827995, + "grad_norm": 433.5, + "learning_rate": 9.949768153389794e-06, + "loss": 22.6294, "step": 2640 }, { - "epoch": 0.1613517010426973, - "grad_norm": 925.0, - "learning_rate": 7.979831152497417e-06, - "loss": 23.9183, + "epoch": 0.3227034020853946, + "grad_norm": 513.0, + "learning_rate": 9.949577881243543e-06, + "loss": 23.0151, "step": 2650 }, { - "epoch": 0.16196057538625466, - "grad_norm": 386.75, - "learning_rate": 7.979755043638917e-06, - "loss": 23.5385, + "epoch": 0.3239211507725093, + "grad_norm": 499.75, + "learning_rate": 9.949387609097292e-06, + "loss": 22.5745, "step": 2660 }, { - "epoch": 0.16256944972981202, - "grad_norm": 629.5, - "learning_rate": 7.979678934780416e-06, - "loss": 23.3944, + "epoch": 0.32513889945962404, + "grad_norm": 429.75, + "learning_rate": 9.949197336951043e-06, + "loss": 22.5822, "step": 2670 }, { - "epoch": 0.16317832407336935, - "grad_norm": 797.5, - "learning_rate": 7.979602825921916e-06, - "loss": 23.7524, + "epoch": 0.3263566481467387, + "grad_norm": 472.5, + "learning_rate": 9.949007064804792e-06, + "loss": 22.6606, "step": 2680 }, { - "epoch": 0.1637871984169267, - "grad_norm": 679.0, - "learning_rate": 7.979526717063415e-06, - "loss": 23.3417, + "epoch": 0.3275743968338534, + "grad_norm": 418.75, + "learning_rate": 9.94881679265854e-06, + "loss": 22.6881, "step": 2690 }, { - "epoch": 0.16439607276048407, - "grad_norm": 599.5, - "learning_rate": 7.979450608204915e-06, - "loss": 24.1741, + "epoch": 0.32879214552096814, + "grad_norm": 672.0, + "learning_rate": 9.94862652051229e-06, + "loss": 22.7059, "step": 2700 }, { - "epoch": 0.1650049471040414, - "grad_norm": 1185.0, - "learning_rate": 7.979374499346415e-06, - "loss": 23.5533, + "epoch": 0.3300098942080828, + "grad_norm": 332.75, + "learning_rate": 9.94843624836604e-06, + "loss": 22.6674, "step": 2710 }, { - "epoch": 0.16561382144759876, - "grad_norm": 403.0, - "learning_rate": 7.979298390487914e-06, - "loss": 23.7548, + "epoch": 0.3312276428951975, + "grad_norm": 681.5, + "learning_rate": 9.948245976219789e-06, + "loss": 22.8335, "step": 2720 }, { - "epoch": 0.1662226957911561, - "grad_norm": 597.0, - "learning_rate": 7.979222281629415e-06, - "loss": 24.4425, + "epoch": 0.3324453915823122, + "grad_norm": 340.75, + "learning_rate": 9.948055704073538e-06, + "loss": 22.8004, "step": 2730 }, { - "epoch": 0.16683157013471345, - "grad_norm": 648.5, - "learning_rate": 7.979146172770913e-06, - "loss": 23.3017, + "epoch": 0.3336631402694269, + "grad_norm": 726.5, + "learning_rate": 9.947865431927287e-06, + "loss": 22.6123, "step": 2740 }, { - "epoch": 0.1674404444782708, - "grad_norm": 910.5, - "learning_rate": 7.979070063912414e-06, - "loss": 23.7393, + "epoch": 0.3348808889565416, + "grad_norm": 467.5, + "learning_rate": 9.947675159781035e-06, + "loss": 22.7943, "step": 2750 }, { - "epoch": 0.16804931882182814, - "grad_norm": 901.0, - "learning_rate": 7.978993955053912e-06, - "loss": 23.8384, + "epoch": 0.3360986376436563, + "grad_norm": 473.75, + "learning_rate": 9.947484887634786e-06, + "loss": 22.5676, "step": 2760 }, { - "epoch": 0.1686581931653855, - "grad_norm": 569.0, - "learning_rate": 7.978917846195414e-06, - "loss": 23.6584, + "epoch": 0.337316386330771, + "grad_norm": 430.0, + "learning_rate": 9.947294615488535e-06, + "loss": 22.7151, "step": 2770 }, { - "epoch": 0.16926706750894285, - "grad_norm": 438.25, - "learning_rate": 7.978841737336913e-06, - "loss": 23.7324, + "epoch": 0.3385341350178857, + "grad_norm": 285.5, + "learning_rate": 9.947104343342284e-06, + "loss": 22.8741, "step": 2780 }, { - "epoch": 0.16987594185250018, - "grad_norm": 535.0, - "learning_rate": 7.978765628478413e-06, - "loss": 23.9481, + "epoch": 0.33975188370500037, + "grad_norm": 396.5, + "learning_rate": 9.946914071196033e-06, + "loss": 22.5662, "step": 2790 }, { - "epoch": 0.17048481619605754, - "grad_norm": 678.0, - "learning_rate": 7.978689519619912e-06, - "loss": 23.4555, + "epoch": 0.3409696323921151, + "grad_norm": 307.25, + "learning_rate": 9.946723799049781e-06, + "loss": 22.8248, "step": 2800 }, { - "epoch": 0.17109369053961487, - "grad_norm": 645.5, - "learning_rate": 7.978613410761412e-06, - "loss": 23.6862, + "epoch": 0.34218738107922975, + "grad_norm": 676.0, + "learning_rate": 9.946533526903532e-06, + "loss": 22.8945, "step": 2810 }, { - "epoch": 0.17170256488317223, - "grad_norm": 445.25, - "learning_rate": 7.978537301902911e-06, - "loss": 23.776, + "epoch": 0.34340512976634446, + "grad_norm": 429.5, + "learning_rate": 9.946343254757281e-06, + "loss": 22.7002, "step": 2820 }, { - "epoch": 0.1723114392267296, - "grad_norm": 480.25, - "learning_rate": 7.978461193044411e-06, - "loss": 23.5924, + "epoch": 0.3446228784534592, + "grad_norm": 327.25, + "learning_rate": 9.94615298261103e-06, + "loss": 22.7292, "step": 2830 }, { - "epoch": 0.17292031357028692, - "grad_norm": 436.25, - "learning_rate": 7.97838508418591e-06, - "loss": 23.6267, + "epoch": 0.34584062714057384, + "grad_norm": 431.75, + "learning_rate": 9.945962710464779e-06, + "loss": 22.413, "step": 2840 }, { - "epoch": 0.17352918791384428, - "grad_norm": 579.0, - "learning_rate": 7.97830897532741e-06, - "loss": 23.7811, + "epoch": 0.34705837582768856, + "grad_norm": 291.25, + "learning_rate": 9.94577243831853e-06, + "loss": 22.6488, "step": 2850 }, { - "epoch": 0.17413806225740164, - "grad_norm": 535.5, - "learning_rate": 7.97823286646891e-06, - "loss": 23.8594, + "epoch": 0.3482761245148033, + "grad_norm": 408.25, + "learning_rate": 9.945582166172278e-06, + "loss": 22.8355, "step": 2860 }, { - "epoch": 0.17474693660095897, - "grad_norm": 498.0, - "learning_rate": 7.978156757610409e-06, - "loss": 23.2003, + "epoch": 0.34949387320191794, + "grad_norm": 403.5, + "learning_rate": 9.945391894026027e-06, + "loss": 22.7133, "step": 2870 }, { - "epoch": 0.17535581094451633, - "grad_norm": 623.0, - "learning_rate": 7.97808064875191e-06, - "loss": 23.4851, + "epoch": 0.35071162188903265, + "grad_norm": 371.5, + "learning_rate": 9.945201621879776e-06, + "loss": 22.5127, "step": 2880 }, { - "epoch": 0.17596468528807369, - "grad_norm": 467.0, - "learning_rate": 7.97800453989341e-06, - "loss": 23.7073, + "epoch": 0.35192937057614737, + "grad_norm": 490.25, + "learning_rate": 9.945011349733525e-06, + "loss": 22.7957, "step": 2890 }, { - "epoch": 0.17657355963163102, - "grad_norm": 474.75, - "learning_rate": 7.97792843103491e-06, - "loss": 23.5397, + "epoch": 0.35314711926326203, + "grad_norm": 472.75, + "learning_rate": 9.944821077587275e-06, + "loss": 22.3429, "step": 2900 }, { - "epoch": 0.17718243397518837, - "grad_norm": 399.5, - "learning_rate": 7.977852322176409e-06, - "loss": 23.4913, + "epoch": 0.35436486795037675, + "grad_norm": 540.5, + "learning_rate": 9.944630805441024e-06, + "loss": 22.7809, "step": 2910 }, { - "epoch": 0.1777913083187457, - "grad_norm": 454.5, - "learning_rate": 7.977776213317908e-06, - "loss": 23.2491, + "epoch": 0.3555826166374914, + "grad_norm": 330.25, + "learning_rate": 9.944440533294773e-06, + "loss": 22.3547, "step": 2920 }, { - "epoch": 0.17840018266230306, - "grad_norm": 628.5, - "learning_rate": 7.977700104459408e-06, - "loss": 23.9576, + "epoch": 0.35680036532460613, + "grad_norm": 766.5, + "learning_rate": 9.944250261148522e-06, + "loss": 22.3245, "step": 2930 }, { - "epoch": 0.17900905700586042, - "grad_norm": 503.0, - "learning_rate": 7.977623995600908e-06, - "loss": 23.7708, + "epoch": 0.35801811401172084, + "grad_norm": 316.0, + "learning_rate": 9.94405998900227e-06, + "loss": 22.2698, "step": 2940 }, { - "epoch": 0.17961793134941775, - "grad_norm": 866.0, - "learning_rate": 7.977547886742407e-06, - "loss": 23.7517, + "epoch": 0.3592358626988355, + "grad_norm": 320.25, + "learning_rate": 9.94386971685602e-06, + "loss": 22.6861, "step": 2950 }, { - "epoch": 0.1802268056929751, - "grad_norm": 515.5, - "learning_rate": 7.977471777883907e-06, - "loss": 23.4176, + "epoch": 0.3604536113859502, + "grad_norm": 377.0, + "learning_rate": 9.94367944470977e-06, + "loss": 22.624, "step": 2960 }, { - "epoch": 0.18083568003653247, - "grad_norm": 581.0, - "learning_rate": 7.977395669025406e-06, - "loss": 23.7534, + "epoch": 0.36167136007306494, + "grad_norm": 396.0, + "learning_rate": 9.943489172563519e-06, + "loss": 23.0753, "step": 2970 }, { - "epoch": 0.1814445543800898, - "grad_norm": 594.0, - "learning_rate": 7.977319560166907e-06, - "loss": 23.2326, + "epoch": 0.3628891087601796, + "grad_norm": 373.25, + "learning_rate": 9.943298900417268e-06, + "loss": 22.5845, "step": 2980 }, { - "epoch": 0.18205342872364716, - "grad_norm": 472.25, - "learning_rate": 7.977243451308405e-06, - "loss": 23.8834, + "epoch": 0.3641068574472943, + "grad_norm": 312.0, + "learning_rate": 9.943108628271017e-06, + "loss": 22.4724, "step": 2990 }, { - "epoch": 0.18266230306720452, - "grad_norm": 773.0, - "learning_rate": 7.977167342449907e-06, - "loss": 23.5239, + "epoch": 0.36532460613440904, + "grad_norm": 433.75, + "learning_rate": 9.942918356124766e-06, + "loss": 22.6247, "step": 3000 }, { - "epoch": 0.18327117741076185, - "grad_norm": 663.5, - "learning_rate": 7.977091233591404e-06, - "loss": 23.4971, + "epoch": 0.3665423548215237, + "grad_norm": 422.0, + "learning_rate": 9.942728083978516e-06, + "loss": 22.4805, "step": 3010 }, { - "epoch": 0.1838800517543192, - "grad_norm": 632.0, - "learning_rate": 7.977015124732906e-06, - "loss": 23.6645, + "epoch": 0.3677601035086384, + "grad_norm": 409.75, + "learning_rate": 9.942537811832265e-06, + "loss": 22.3236, "step": 3020 }, { - "epoch": 0.18448892609787654, - "grad_norm": 452.75, - "learning_rate": 7.976939015874405e-06, - "loss": 23.514, + "epoch": 0.3689778521957531, + "grad_norm": 488.5, + "learning_rate": 9.942347539686014e-06, + "loss": 22.579, "step": 3030 }, { - "epoch": 0.1850978004414339, - "grad_norm": 816.5, - "learning_rate": 7.976862907015905e-06, - "loss": 23.4402, + "epoch": 0.3701956008828678, + "grad_norm": 335.25, + "learning_rate": 9.942157267539763e-06, + "loss": 22.6445, "step": 3040 }, { - "epoch": 0.18570667478499125, - "grad_norm": 821.0, - "learning_rate": 7.976786798157404e-06, - "loss": 23.4769, + "epoch": 0.3714133495699825, + "grad_norm": 454.0, + "learning_rate": 9.941966995393512e-06, + "loss": 22.8879, "step": 3050 }, { - "epoch": 0.18631554912854859, - "grad_norm": 604.0, - "learning_rate": 7.976710689298904e-06, - "loss": 23.7916, + "epoch": 0.37263109825709717, + "grad_norm": 745.5, + "learning_rate": 9.941776723247262e-06, + "loss": 22.7475, "step": 3060 }, { - "epoch": 0.18692442347210594, - "grad_norm": 743.0, - "learning_rate": 7.976634580440403e-06, - "loss": 23.257, + "epoch": 0.3738488469442119, + "grad_norm": 779.0, + "learning_rate": 9.941586451101011e-06, + "loss": 22.8279, "step": 3070 }, { - "epoch": 0.1875332978156633, - "grad_norm": 985.0, - "learning_rate": 7.976558471581903e-06, - "loss": 23.2621, + "epoch": 0.3750665956313266, + "grad_norm": 693.0, + "learning_rate": 9.94139617895476e-06, + "loss": 22.6676, "step": 3080 }, { - "epoch": 0.18814217215922063, - "grad_norm": 676.0, - "learning_rate": 7.976482362723403e-06, - "loss": 23.3334, + "epoch": 0.37628434431844127, + "grad_norm": 499.0, + "learning_rate": 9.941205906808509e-06, + "loss": 22.7062, "step": 3090 }, { - "epoch": 0.188751046502778, - "grad_norm": 514.0, - "learning_rate": 7.976406253864902e-06, - "loss": 23.5823, + "epoch": 0.377502093005556, + "grad_norm": 345.25, + "learning_rate": 9.941015634662258e-06, + "loss": 22.8102, "step": 3100 }, { - "epoch": 0.18935992084633535, - "grad_norm": 907.5, - "learning_rate": 7.976330145006402e-06, - "loss": 23.5722, + "epoch": 0.3787198416926707, + "grad_norm": 655.0, + "learning_rate": 9.940825362516008e-06, + "loss": 22.2855, "step": 3110 }, { - "epoch": 0.18996879518989268, - "grad_norm": 578.5, - "learning_rate": 7.976254036147903e-06, - "loss": 23.8378, + "epoch": 0.37993759037978536, + "grad_norm": 422.25, + "learning_rate": 9.940635090369757e-06, + "loss": 22.5699, "step": 3120 }, { - "epoch": 0.19057766953345004, - "grad_norm": 577.0, - "learning_rate": 7.976177927289402e-06, - "loss": 23.5676, + "epoch": 0.3811553390669001, + "grad_norm": 325.5, + "learning_rate": 9.940444818223506e-06, + "loss": 22.5509, "step": 3130 }, { - "epoch": 0.19118654387700737, - "grad_norm": 729.5, - "learning_rate": 7.976101818430902e-06, - "loss": 23.6104, + "epoch": 0.38237308775401474, + "grad_norm": 568.5, + "learning_rate": 9.940254546077255e-06, + "loss": 22.5317, "step": 3140 }, { - "epoch": 0.19179541822056473, - "grad_norm": 560.5, - "learning_rate": 7.976025709572402e-06, - "loss": 23.8956, + "epoch": 0.38359083644112946, + "grad_norm": 328.0, + "learning_rate": 9.940064273931004e-06, + "loss": 22.5232, "step": 3150 }, { - "epoch": 0.1924042925641221, - "grad_norm": 439.75, - "learning_rate": 7.975949600713901e-06, - "loss": 23.664, + "epoch": 0.3848085851282442, + "grad_norm": 519.5, + "learning_rate": 9.939874001784754e-06, + "loss": 22.5387, "step": 3160 }, { - "epoch": 0.19301316690767942, - "grad_norm": 544.5, - "learning_rate": 7.9758734918554e-06, - "loss": 23.6346, + "epoch": 0.38602633381535884, + "grad_norm": 381.0, + "learning_rate": 9.939683729638503e-06, + "loss": 22.8314, "step": 3170 }, { - "epoch": 0.19362204125123678, - "grad_norm": 482.75, - "learning_rate": 7.9757973829969e-06, - "loss": 23.4919, + "epoch": 0.38724408250247355, + "grad_norm": 381.5, + "learning_rate": 9.939493457492252e-06, + "loss": 22.4892, "step": 3180 }, { - "epoch": 0.19423091559479413, - "grad_norm": 634.0, - "learning_rate": 7.9757212741384e-06, - "loss": 23.8638, + "epoch": 0.38846183118958827, + "grad_norm": 555.5, + "learning_rate": 9.939303185346001e-06, + "loss": 22.4801, "step": 3190 }, { - "epoch": 0.19483978993835147, - "grad_norm": 412.0, - "learning_rate": 7.9756451652799e-06, - "loss": 23.4453, + "epoch": 0.38967957987670293, + "grad_norm": 305.0, + "learning_rate": 9.93911291319975e-06, + "loss": 22.6149, "step": 3200 }, { - "epoch": 0.19544866428190882, - "grad_norm": 830.5, - "learning_rate": 7.975569056421399e-06, - "loss": 23.3403, + "epoch": 0.39089732856381765, + "grad_norm": 326.5, + "learning_rate": 9.9389226410535e-06, + "loss": 22.4393, "step": 3210 }, { - "epoch": 0.19605753862546618, - "grad_norm": 483.5, - "learning_rate": 7.975492947562898e-06, - "loss": 23.6017, + "epoch": 0.39211507725093236, + "grad_norm": 380.25, + "learning_rate": 9.93873236890725e-06, + "loss": 22.4641, "step": 3220 }, { - "epoch": 0.1966664129690235, - "grad_norm": 572.5, - "learning_rate": 7.9754168387044e-06, - "loss": 23.0888, + "epoch": 0.393332825938047, + "grad_norm": 615.5, + "learning_rate": 9.938542096760998e-06, + "loss": 22.9049, "step": 3230 }, { - "epoch": 0.19727528731258087, - "grad_norm": 383.75, - "learning_rate": 7.975340729845897e-06, - "loss": 23.4226, + "epoch": 0.39455057462516174, + "grad_norm": 297.75, + "learning_rate": 9.938351824614747e-06, + "loss": 22.5782, "step": 3240 }, { - "epoch": 0.1978841616561382, - "grad_norm": 598.5, - "learning_rate": 7.975264620987399e-06, - "loss": 23.4296, + "epoch": 0.3957683233122764, + "grad_norm": 511.0, + "learning_rate": 9.938161552468496e-06, + "loss": 22.6692, "step": 3250 }, { - "epoch": 0.19849303599969556, - "grad_norm": 679.0, - "learning_rate": 7.975188512128897e-06, - "loss": 23.4003, + "epoch": 0.3969860719993911, + "grad_norm": 311.75, + "learning_rate": 9.937971280322246e-06, + "loss": 22.4024, "step": 3260 }, { - "epoch": 0.19910191034325292, - "grad_norm": 487.0, - "learning_rate": 7.975112403270398e-06, - "loss": 23.4645, + "epoch": 0.39820382068650584, + "grad_norm": 350.75, + "learning_rate": 9.937781008175995e-06, + "loss": 22.6302, "step": 3270 }, { - "epoch": 0.19971078468681025, - "grad_norm": 824.5, - "learning_rate": 7.975036294411897e-06, - "loss": 23.2812, + "epoch": 0.3994215693736205, + "grad_norm": 343.0, + "learning_rate": 9.937590736029744e-06, + "loss": 22.7398, "step": 3280 }, { - "epoch": 0.2003196590303676, - "grad_norm": 796.0, - "learning_rate": 7.974960185553397e-06, - "loss": 23.2575, + "epoch": 0.4006393180607352, + "grad_norm": 329.75, + "learning_rate": 9.937400463883493e-06, + "loss": 22.581, "step": 3290 }, { - "epoch": 0.20092853337392497, - "grad_norm": 444.75, - "learning_rate": 7.974884076694896e-06, - "loss": 23.474, + "epoch": 0.40185706674784993, + "grad_norm": 373.5, + "learning_rate": 9.937210191737242e-06, + "loss": 22.6314, "step": 3300 }, { - "epoch": 0.2015374077174823, - "grad_norm": 449.5, - "learning_rate": 7.974807967836396e-06, - "loss": 23.4397, + "epoch": 0.4030748154349646, + "grad_norm": 417.25, + "learning_rate": 9.937019919590993e-06, + "loss": 22.5158, "step": 3310 }, { - "epoch": 0.20214628206103966, - "grad_norm": 722.5, - "learning_rate": 7.974731858977896e-06, - "loss": 23.2618, + "epoch": 0.4042925641220793, + "grad_norm": 291.25, + "learning_rate": 9.936829647444741e-06, + "loss": 22.4056, "step": 3320 }, { - "epoch": 0.20275515640459701, - "grad_norm": 471.75, - "learning_rate": 7.974655750119395e-06, - "loss": 23.6437, + "epoch": 0.40551031280919403, + "grad_norm": 356.5, + "learning_rate": 9.93663937529849e-06, + "loss": 22.748, "step": 3330 }, { - "epoch": 0.20336403074815435, - "grad_norm": 506.75, - "learning_rate": 7.974579641260895e-06, - "loss": 23.1774, + "epoch": 0.4067280614963087, + "grad_norm": 562.0, + "learning_rate": 9.936449103152239e-06, + "loss": 22.7601, "step": 3340 }, { - "epoch": 0.2039729050917117, - "grad_norm": 694.5, - "learning_rate": 7.974503532402396e-06, - "loss": 23.0922, + "epoch": 0.4079458101834234, + "grad_norm": 394.0, + "learning_rate": 9.936258831005988e-06, + "loss": 22.4228, "step": 3350 }, { - "epoch": 0.20458177943526903, - "grad_norm": 450.5, - "learning_rate": 7.974427423543894e-06, - "loss": 23.4778, + "epoch": 0.40916355887053807, + "grad_norm": 458.75, + "learning_rate": 9.936068558859739e-06, + "loss": 22.6764, "step": 3360 }, { - "epoch": 0.2051906537788264, - "grad_norm": 399.75, - "learning_rate": 7.974351314685395e-06, - "loss": 23.4635, + "epoch": 0.4103813075576528, + "grad_norm": 401.25, + "learning_rate": 9.935878286713487e-06, + "loss": 22.561, "step": 3370 }, { - "epoch": 0.20579952812238375, - "grad_norm": 515.5, - "learning_rate": 7.974275205826895e-06, - "loss": 23.1266, + "epoch": 0.4115990562447675, + "grad_norm": 289.0, + "learning_rate": 9.935688014567236e-06, + "loss": 22.6726, "step": 3380 }, { - "epoch": 0.20640840246594108, - "grad_norm": 499.25, - "learning_rate": 7.974199096968394e-06, - "loss": 23.3548, + "epoch": 0.41281680493188216, + "grad_norm": 531.0, + "learning_rate": 9.935497742420985e-06, + "loss": 22.6173, "step": 3390 }, { - "epoch": 0.20701727680949844, - "grad_norm": 432.0, - "learning_rate": 7.974122988109894e-06, - "loss": 23.5029, + "epoch": 0.4140345536189969, + "grad_norm": 299.5, + "learning_rate": 9.935307470274734e-06, + "loss": 22.4596, "step": 3400 }, { - "epoch": 0.2076261511530558, - "grad_norm": 581.5, - "learning_rate": 7.974046879251393e-06, - "loss": 23.5039, + "epoch": 0.4152523023061116, + "grad_norm": 329.75, + "learning_rate": 9.935117198128485e-06, + "loss": 22.6521, "step": 3410 }, { - "epoch": 0.20823502549661313, - "grad_norm": 537.5, - "learning_rate": 7.973970770392893e-06, - "loss": 23.0694, + "epoch": 0.41647005099322626, + "grad_norm": 337.75, + "learning_rate": 9.934926925982233e-06, + "loss": 22.5495, "step": 3420 }, { - "epoch": 0.2088438998401705, - "grad_norm": 490.25, - "learning_rate": 7.973894661534392e-06, - "loss": 23.3756, + "epoch": 0.417687799680341, + "grad_norm": 406.5, + "learning_rate": 9.934736653835982e-06, + "loss": 22.6828, "step": 3430 }, { - "epoch": 0.20945277418372785, - "grad_norm": 610.5, - "learning_rate": 7.973818552675892e-06, - "loss": 23.9835, + "epoch": 0.4189055483674557, + "grad_norm": 393.5, + "learning_rate": 9.934546381689731e-06, + "loss": 22.5028, "step": 3440 }, { - "epoch": 0.21006164852728518, - "grad_norm": 435.25, - "learning_rate": 7.973742443817391e-06, - "loss": 23.0766, + "epoch": 0.42012329705457035, + "grad_norm": 281.0, + "learning_rate": 9.93435610954348e-06, + "loss": 22.5415, "step": 3450 }, { - "epoch": 0.21067052287084254, - "grad_norm": 422.5, - "learning_rate": 7.973666334958891e-06, - "loss": 23.0507, + "epoch": 0.42134104574168507, + "grad_norm": 379.75, + "learning_rate": 9.93416583739723e-06, + "loss": 22.653, "step": 3460 }, { - "epoch": 0.21127939721439987, - "grad_norm": 570.0, - "learning_rate": 7.97359022610039e-06, - "loss": 23.6335, + "epoch": 0.42255879442879973, + "grad_norm": 341.0, + "learning_rate": 9.93397556525098e-06, + "loss": 22.5459, "step": 3470 }, { - "epoch": 0.21188827155795723, - "grad_norm": 384.5, - "learning_rate": 7.973514117241892e-06, - "loss": 23.7522, + "epoch": 0.42377654311591445, + "grad_norm": 456.25, + "learning_rate": 9.933785293104728e-06, + "loss": 22.6737, "step": 3480 }, { - "epoch": 0.21249714590151458, - "grad_norm": 883.5, - "learning_rate": 7.97343800838339e-06, - "loss": 23.1334, + "epoch": 0.42499429180302917, + "grad_norm": 422.75, + "learning_rate": 9.933595020958477e-06, + "loss": 22.3798, "step": 3490 }, { - "epoch": 0.21310602024507191, - "grad_norm": 445.75, - "learning_rate": 7.97336189952489e-06, - "loss": 23.0855, + "epoch": 0.42621204049014383, + "grad_norm": 411.25, + "learning_rate": 9.933404748812226e-06, + "loss": 22.5427, "step": 3500 }, { - "epoch": 0.21371489458862927, - "grad_norm": 566.5, - "learning_rate": 7.97328579066639e-06, - "loss": 23.5073, + "epoch": 0.42742978917725855, + "grad_norm": 354.5, + "learning_rate": 9.933214476665977e-06, + "loss": 22.4864, "step": 3510 }, { - "epoch": 0.21432376893218663, - "grad_norm": 375.0, - "learning_rate": 7.97320968180789e-06, - "loss": 23.1962, + "epoch": 0.42864753786437326, + "grad_norm": 281.5, + "learning_rate": 9.933024204519726e-06, + "loss": 22.3149, "step": 3520 }, { - "epoch": 0.21493264327574396, - "grad_norm": 546.5, - "learning_rate": 7.97313357294939e-06, - "loss": 23.5628, + "epoch": 0.4298652865514879, + "grad_norm": 276.25, + "learning_rate": 9.932833932373474e-06, + "loss": 22.5575, "step": 3530 }, { - "epoch": 0.21554151761930132, - "grad_norm": 445.75, - "learning_rate": 7.973057464090889e-06, - "loss": 23.6678, + "epoch": 0.43108303523860264, + "grad_norm": 385.25, + "learning_rate": 9.932643660227223e-06, + "loss": 22.2717, "step": 3540 }, { - "epoch": 0.21615039196285865, - "grad_norm": 452.75, - "learning_rate": 7.972981355232389e-06, - "loss": 23.2279, + "epoch": 0.4323007839257173, + "grad_norm": 341.5, + "learning_rate": 9.932453388080972e-06, + "loss": 22.1493, "step": 3550 }, { - "epoch": 0.216759266306416, - "grad_norm": 414.75, - "learning_rate": 7.972905246373888e-06, - "loss": 23.5895, + "epoch": 0.433518532612832, + "grad_norm": 325.5, + "learning_rate": 9.932263115934723e-06, + "loss": 22.6066, "step": 3560 }, { - "epoch": 0.21736814064997337, - "grad_norm": 381.0, - "learning_rate": 7.972829137515388e-06, - "loss": 23.2508, + "epoch": 0.43473628129994674, + "grad_norm": 298.75, + "learning_rate": 9.932072843788472e-06, + "loss": 22.8055, "step": 3570 }, { - "epoch": 0.2179770149935307, - "grad_norm": 573.5, - "learning_rate": 7.972753028656887e-06, - "loss": 23.1862, + "epoch": 0.4359540299870614, + "grad_norm": 777.0, + "learning_rate": 9.93188257164222e-06, + "loss": 22.5982, "step": 3580 }, { - "epoch": 0.21858588933708806, - "grad_norm": 563.5, - "learning_rate": 7.972676919798387e-06, - "loss": 23.4532, + "epoch": 0.4371717786741761, + "grad_norm": 612.0, + "learning_rate": 9.93169229949597e-06, + "loss": 22.5007, "step": 3590 }, { - "epoch": 0.21919476368064542, - "grad_norm": 563.0, - "learning_rate": 7.972600810939888e-06, - "loss": 23.3432, + "epoch": 0.43838952736129083, + "grad_norm": 396.75, + "learning_rate": 9.931502027349718e-06, + "loss": 22.478, "step": 3600 }, { - "epoch": 0.21980363802420275, - "grad_norm": 449.25, - "learning_rate": 7.972524702081386e-06, - "loss": 23.3239, + "epoch": 0.4396072760484055, + "grad_norm": 405.5, + "learning_rate": 9.931311755203469e-06, + "loss": 22.6416, "step": 3610 }, { - "epoch": 0.2204125123677601, - "grad_norm": 733.5, - "learning_rate": 7.972448593222887e-06, - "loss": 23.6231, + "epoch": 0.4408250247355202, + "grad_norm": 424.5, + "learning_rate": 9.931121483057218e-06, + "loss": 22.4448, "step": 3620 }, { - "epoch": 0.22102138671131746, - "grad_norm": 388.25, - "learning_rate": 7.972372484364385e-06, - "loss": 23.2343, + "epoch": 0.4420427734226349, + "grad_norm": 330.0, + "learning_rate": 9.930931210910967e-06, + "loss": 22.7654, "step": 3630 }, { - "epoch": 0.2216302610548748, - "grad_norm": 386.0, - "learning_rate": 7.972296375505886e-06, - "loss": 23.293, + "epoch": 0.4432605221097496, + "grad_norm": 471.75, + "learning_rate": 9.930740938764715e-06, + "loss": 22.5143, "step": 3640 }, { - "epoch": 0.22223913539843215, - "grad_norm": 539.0, - "learning_rate": 7.972220266647386e-06, - "loss": 23.8215, + "epoch": 0.4444782707968643, + "grad_norm": 286.25, + "learning_rate": 9.930550666618464e-06, + "loss": 22.3018, "step": 3650 }, { - "epoch": 0.22284800974198948, - "grad_norm": 627.5, - "learning_rate": 7.972144157788885e-06, - "loss": 23.6097, + "epoch": 0.44569601948397897, + "grad_norm": 441.75, + "learning_rate": 9.930360394472215e-06, + "loss": 22.6912, "step": 3660 }, { - "epoch": 0.22345688408554684, - "grad_norm": 552.5, - "learning_rate": 7.972068048930385e-06, - "loss": 23.3981, + "epoch": 0.4469137681710937, + "grad_norm": 269.75, + "learning_rate": 9.930170122325964e-06, + "loss": 22.6082, "step": 3670 }, { - "epoch": 0.2240657584291042, - "grad_norm": 736.0, - "learning_rate": 7.971991940071884e-06, - "loss": 23.202, + "epoch": 0.4481315168582084, + "grad_norm": 559.5, + "learning_rate": 9.929979850179713e-06, + "loss": 22.7375, "step": 3680 }, { - "epoch": 0.22467463277266153, - "grad_norm": 517.5, - "learning_rate": 7.971915831213384e-06, - "loss": 22.9725, + "epoch": 0.44934926554532306, + "grad_norm": 508.0, + "learning_rate": 9.929789578033461e-06, + "loss": 22.7566, "step": 3690 }, { - "epoch": 0.2252835071162189, - "grad_norm": 720.5, - "learning_rate": 7.971839722354883e-06, - "loss": 23.3352, + "epoch": 0.4505670142324378, + "grad_norm": 277.25, + "learning_rate": 9.92959930588721e-06, + "loss": 22.2571, "step": 3700 }, { - "epoch": 0.22589238145977625, - "grad_norm": 673.0, - "learning_rate": 7.971763613496383e-06, - "loss": 23.7148, + "epoch": 0.4517847629195525, + "grad_norm": 330.5, + "learning_rate": 9.929409033740961e-06, + "loss": 22.5837, "step": 3710 }, { - "epoch": 0.22650125580333358, - "grad_norm": 584.0, - "learning_rate": 7.971687504637883e-06, - "loss": 23.3811, + "epoch": 0.45300251160666716, + "grad_norm": 335.75, + "learning_rate": 9.92921876159471e-06, + "loss": 22.1115, "step": 3720 }, { - "epoch": 0.22711013014689094, - "grad_norm": 528.0, - "learning_rate": 7.971611395779382e-06, - "loss": 23.4013, + "epoch": 0.4542202602937819, + "grad_norm": 457.0, + "learning_rate": 9.929028489448459e-06, + "loss": 22.6568, "step": 3730 }, { - "epoch": 0.2277190044904483, - "grad_norm": 431.0, - "learning_rate": 7.971535286920883e-06, - "loss": 23.374, + "epoch": 0.4554380089808966, + "grad_norm": 383.75, + "learning_rate": 9.928838217302208e-06, + "loss": 22.6078, "step": 3740 }, { - "epoch": 0.22832787883400563, - "grad_norm": 388.0, - "learning_rate": 7.971459178062383e-06, - "loss": 23.2945, + "epoch": 0.45665575766801125, + "grad_norm": 312.25, + "learning_rate": 9.928647945155956e-06, + "loss": 22.5092, "step": 3750 }, { - "epoch": 0.22893675317756298, - "grad_norm": 376.5, - "learning_rate": 7.971383069203882e-06, - "loss": 23.2406, + "epoch": 0.45787350635512597, + "grad_norm": 272.25, + "learning_rate": 9.928457673009707e-06, + "loss": 22.6988, "step": 3760 }, { - "epoch": 0.22954562752112032, - "grad_norm": 450.0, - "learning_rate": 7.971306960345382e-06, - "loss": 23.1831, + "epoch": 0.45909125504224063, + "grad_norm": 329.5, + "learning_rate": 9.928267400863456e-06, + "loss": 22.5299, "step": 3770 }, { - "epoch": 0.23015450186467767, - "grad_norm": 422.25, - "learning_rate": 7.971230851486882e-06, - "loss": 23.0082, + "epoch": 0.46030900372935535, + "grad_norm": 454.0, + "learning_rate": 9.928077128717205e-06, + "loss": 22.7585, "step": 3780 }, { - "epoch": 0.23076337620823503, - "grad_norm": 396.75, - "learning_rate": 7.971154742628381e-06, - "loss": 23.6475, + "epoch": 0.46152675241647007, + "grad_norm": 297.75, + "learning_rate": 9.927886856570954e-06, + "loss": 22.5894, "step": 3790 }, { - "epoch": 0.23137225055179236, - "grad_norm": 440.5, - "learning_rate": 7.97107863376988e-06, - "loss": 23.6128, + "epoch": 0.4627445011035847, + "grad_norm": 330.0, + "learning_rate": 9.927696584424702e-06, + "loss": 22.7329, "step": 3800 }, { - "epoch": 0.23198112489534972, - "grad_norm": 421.75, - "learning_rate": 7.97100252491138e-06, - "loss": 23.2097, + "epoch": 0.46396224979069944, + "grad_norm": 377.25, + "learning_rate": 9.927506312278453e-06, + "loss": 22.8173, "step": 3810 }, { - "epoch": 0.23258999923890708, - "grad_norm": 662.5, - "learning_rate": 7.97092641605288e-06, - "loss": 23.3367, + "epoch": 0.46517999847781416, + "grad_norm": 339.75, + "learning_rate": 9.927316040132202e-06, + "loss": 22.5607, "step": 3820 }, { - "epoch": 0.2331988735824644, - "grad_norm": 808.0, - "learning_rate": 7.97085030719438e-06, - "loss": 23.7226, + "epoch": 0.4663977471649288, + "grad_norm": 314.25, + "learning_rate": 9.92712576798595e-06, + "loss": 22.5022, "step": 3830 }, { - "epoch": 0.23380774792602177, - "grad_norm": 615.5, - "learning_rate": 7.970774198335879e-06, - "loss": 23.1694, + "epoch": 0.46761549585204354, + "grad_norm": 693.0, + "learning_rate": 9.9269354958397e-06, + "loss": 22.6175, "step": 3840 }, { - "epoch": 0.23441662226957913, - "grad_norm": 466.25, - "learning_rate": 7.97069808947738e-06, - "loss": 23.564, + "epoch": 0.46883324453915826, + "grad_norm": 620.5, + "learning_rate": 9.926745223693449e-06, + "loss": 22.5456, "step": 3850 }, { - "epoch": 0.23502549661313646, - "grad_norm": 597.5, - "learning_rate": 7.970621980618878e-06, - "loss": 23.5629, + "epoch": 0.4700509932262729, + "grad_norm": 356.75, + "learning_rate": 9.926554951547199e-06, + "loss": 22.3967, "step": 3860 }, { - "epoch": 0.23563437095669382, - "grad_norm": 515.5, - "learning_rate": 7.97054587176038e-06, - "loss": 23.4269, + "epoch": 0.47126874191338763, + "grad_norm": 380.75, + "learning_rate": 9.926364679400948e-06, + "loss": 22.576, "step": 3870 }, { - "epoch": 0.23624324530025115, - "grad_norm": 487.0, - "learning_rate": 7.970469762901877e-06, - "loss": 23.0315, + "epoch": 0.4724864906005023, + "grad_norm": 678.5, + "learning_rate": 9.926174407254697e-06, + "loss": 22.8256, "step": 3880 }, { - "epoch": 0.2368521196438085, - "grad_norm": 372.25, - "learning_rate": 7.970393654043378e-06, - "loss": 23.3436, + "epoch": 0.473704239287617, + "grad_norm": 692.5, + "learning_rate": 9.925984135108446e-06, + "loss": 22.5717, "step": 3890 }, { - "epoch": 0.23746099398736586, - "grad_norm": 640.0, - "learning_rate": 7.970317545184878e-06, - "loss": 23.0826, + "epoch": 0.47492198797473173, + "grad_norm": 263.5, + "learning_rate": 9.925793862962195e-06, + "loss": 22.6193, "step": 3900 }, { - "epoch": 0.2380698683309232, - "grad_norm": 474.25, - "learning_rate": 7.970241436326377e-06, - "loss": 23.1085, + "epoch": 0.4761397366618464, + "grad_norm": 335.0, + "learning_rate": 9.925603590815945e-06, + "loss": 22.4029, "step": 3910 }, { - "epoch": 0.23867874267448055, - "grad_norm": 407.75, - "learning_rate": 7.970165327467877e-06, - "loss": 23.3948, + "epoch": 0.4773574853489611, + "grad_norm": 347.0, + "learning_rate": 9.925413318669694e-06, + "loss": 22.3616, "step": 3920 }, { - "epoch": 0.2392876170180379, - "grad_norm": 364.5, - "learning_rate": 7.970089218609376e-06, - "loss": 23.6434, + "epoch": 0.4785752340360758, + "grad_norm": 319.0, + "learning_rate": 9.925223046523443e-06, + "loss": 22.3703, "step": 3930 }, { - "epoch": 0.23989649136159524, - "grad_norm": 483.75, - "learning_rate": 7.970013109750876e-06, - "loss": 23.1049, + "epoch": 0.4797929827231905, + "grad_norm": 419.0, + "learning_rate": 9.925032774377192e-06, + "loss": 22.7403, "step": 3940 }, { - "epoch": 0.2405053657051526, - "grad_norm": 547.5, - "learning_rate": 7.969937000892376e-06, - "loss": 23.1653, + "epoch": 0.4810107314103052, + "grad_norm": 435.5, + "learning_rate": 9.92484250223094e-06, + "loss": 22.5668, "step": 3950 }, { - "epoch": 0.24111424004870996, - "grad_norm": 612.0, - "learning_rate": 7.969860892033875e-06, - "loss": 23.2834, + "epoch": 0.4822284800974199, + "grad_norm": 308.5, + "learning_rate": 9.924652230084691e-06, + "loss": 22.4282, "step": 3960 }, { - "epoch": 0.2417231143922673, - "grad_norm": 510.75, - "learning_rate": 7.969784783175375e-06, - "loss": 23.5791, + "epoch": 0.4834462287845346, + "grad_norm": 765.5, + "learning_rate": 9.92446195793844e-06, + "loss": 22.7642, "step": 3970 }, { - "epoch": 0.24233198873582465, - "grad_norm": 417.25, - "learning_rate": 7.969708674316874e-06, - "loss": 23.2259, + "epoch": 0.4846639774716493, + "grad_norm": 405.0, + "learning_rate": 9.924271685792189e-06, + "loss": 22.6432, "step": 3980 }, { - "epoch": 0.24294086307938198, - "grad_norm": 454.0, - "learning_rate": 7.969632565458375e-06, - "loss": 23.3012, + "epoch": 0.48588172615876396, + "grad_norm": 371.75, + "learning_rate": 9.924081413645938e-06, + "loss": 22.7205, "step": 3990 }, { - "epoch": 0.24354973742293934, - "grad_norm": 502.5, - "learning_rate": 7.969556456599875e-06, - "loss": 23.4953, + "epoch": 0.4870994748458787, + "grad_norm": 363.0, + "learning_rate": 9.923891141499687e-06, + "loss": 22.3163, "step": 4000 }, { - "epoch": 0.2441586117664967, - "grad_norm": 484.5, - "learning_rate": 7.969480347741375e-06, - "loss": 23.1314, + "epoch": 0.4883172235329934, + "grad_norm": 505.25, + "learning_rate": 9.923700869353437e-06, + "loss": 22.7384, "step": 4010 }, { - "epoch": 0.24476748611005403, - "grad_norm": 449.25, - "learning_rate": 7.969404238882874e-06, - "loss": 23.573, + "epoch": 0.48953497222010806, + "grad_norm": 407.25, + "learning_rate": 9.923510597207186e-06, + "loss": 22.4285, "step": 4020 }, { - "epoch": 0.2453763604536114, - "grad_norm": 391.5, - "learning_rate": 7.969328130024374e-06, - "loss": 23.5152, + "epoch": 0.4907527209072228, + "grad_norm": 483.5, + "learning_rate": 9.923320325060935e-06, + "loss": 22.5651, "step": 4030 }, { - "epoch": 0.24598523479716874, - "grad_norm": 460.0, - "learning_rate": 7.969252021165873e-06, - "loss": 23.2815, + "epoch": 0.4919704695943375, + "grad_norm": 442.0, + "learning_rate": 9.923130052914684e-06, + "loss": 22.8026, "step": 4040 }, { - "epoch": 0.24659410914072608, - "grad_norm": 440.5, - "learning_rate": 7.969175912307373e-06, - "loss": 23.8162, + "epoch": 0.49318821828145215, + "grad_norm": 561.5, + "learning_rate": 9.922939780768433e-06, + "loss": 22.578, "step": 4050 }, { - "epoch": 0.24720298348428343, - "grad_norm": 495.5, - "learning_rate": 7.969099803448872e-06, - "loss": 23.1437, + "epoch": 0.49440596696856687, + "grad_norm": 375.5, + "learning_rate": 9.922749508622183e-06, + "loss": 22.505, "step": 4060 }, { - "epoch": 0.2478118578278408, - "grad_norm": 576.5, - "learning_rate": 7.969023694590372e-06, - "loss": 23.1463, + "epoch": 0.4956237156556816, + "grad_norm": 365.5, + "learning_rate": 9.922559236475932e-06, + "loss": 22.562, "step": 4070 }, { - "epoch": 0.24842073217139812, - "grad_norm": 423.75, - "learning_rate": 7.968947585731871e-06, - "loss": 23.3878, + "epoch": 0.49684146434279625, + "grad_norm": 306.0, + "learning_rate": 9.922368964329681e-06, + "loss": 22.6089, "step": 4080 }, { - "epoch": 0.24902960651495548, - "grad_norm": 465.25, - "learning_rate": 7.968871476873371e-06, - "loss": 23.4622, + "epoch": 0.49805921302991096, + "grad_norm": 342.25, + "learning_rate": 9.92217869218343e-06, + "loss": 22.2594, "step": 4090 }, { - "epoch": 0.2496384808585128, - "grad_norm": 444.0, - "learning_rate": 7.968795368014872e-06, - "loss": 23.1397, + "epoch": 0.4992769617170256, + "grad_norm": 264.75, + "learning_rate": 9.92198842003718e-06, + "loss": 22.5169, "step": 4100 }, { - "epoch": 0.2502473552020702, - "grad_norm": 484.5, - "learning_rate": 7.96871925915637e-06, - "loss": 23.2921, + "epoch": 0.5004947104041404, + "grad_norm": 355.75, + "learning_rate": 9.92179814789093e-06, + "loss": 22.2688, "step": 4110 }, { - "epoch": 0.25085622954562753, - "grad_norm": 427.5, - "learning_rate": 7.968643150297871e-06, - "loss": 23.2433, + "epoch": 0.5017124590912551, + "grad_norm": 298.75, + "learning_rate": 9.921607875744678e-06, + "loss": 22.6661, "step": 4120 }, { - "epoch": 0.25146510388918486, - "grad_norm": 377.0, - "learning_rate": 7.968567041439369e-06, - "loss": 22.8399, + "epoch": 0.5029302077783697, + "grad_norm": 327.0, + "learning_rate": 9.921417603598427e-06, + "loss": 22.6486, "step": 4130 }, { - "epoch": 0.2520739782327422, - "grad_norm": 557.5, - "learning_rate": 7.96849093258087e-06, - "loss": 23.5973, + "epoch": 0.5041479564654844, + "grad_norm": 316.5, + "learning_rate": 9.921227331452176e-06, + "loss": 22.554, "step": 4140 }, { - "epoch": 0.2526828525762996, - "grad_norm": 412.5, - "learning_rate": 7.96841482372237e-06, - "loss": 23.1057, + "epoch": 0.5053657051525992, + "grad_norm": 528.5, + "learning_rate": 9.921037059305927e-06, + "loss": 22.5602, "step": 4150 }, { - "epoch": 0.2532917269198569, - "grad_norm": 470.5, - "learning_rate": 7.96833871486387e-06, - "loss": 23.4342, + "epoch": 0.5065834538397138, + "grad_norm": 579.5, + "learning_rate": 9.920846787159675e-06, + "loss": 22.4661, "step": 4160 }, { - "epoch": 0.25390060126341424, - "grad_norm": 491.25, - "learning_rate": 7.968262606005369e-06, - "loss": 23.3106, + "epoch": 0.5078012025268285, + "grad_norm": 307.5, + "learning_rate": 9.920656515013424e-06, + "loss": 22.8724, "step": 4170 }, { - "epoch": 0.2545094756069716, - "grad_norm": 563.5, - "learning_rate": 7.968186497146869e-06, - "loss": 23.4464, + "epoch": 0.5090189512139432, + "grad_norm": 531.0, + "learning_rate": 9.920466242867173e-06, + "loss": 22.5746, "step": 4180 }, { - "epoch": 0.25511834995052896, - "grad_norm": 739.0, - "learning_rate": 7.968110388288368e-06, - "loss": 23.0911, + "epoch": 0.5102366999010579, + "grad_norm": 415.0, + "learning_rate": 9.920275970720922e-06, + "loss": 22.6794, "step": 4190 }, { - "epoch": 0.2557272242940863, - "grad_norm": 345.0, - "learning_rate": 7.968034279429868e-06, - "loss": 23.2429, + "epoch": 0.5114544485881726, + "grad_norm": 349.25, + "learning_rate": 9.920085698574673e-06, + "loss": 22.4776, "step": 4200 }, { - "epoch": 0.2563360986376437, - "grad_norm": 435.25, - "learning_rate": 7.967958170571367e-06, - "loss": 23.5266, + "epoch": 0.5126721972752873, + "grad_norm": 708.5, + "learning_rate": 9.919895426428421e-06, + "loss": 22.7027, "step": 4210 }, { - "epoch": 0.256944972981201, - "grad_norm": 575.5, - "learning_rate": 7.967882061712868e-06, - "loss": 23.529, + "epoch": 0.513889945962402, + "grad_norm": 647.0, + "learning_rate": 9.91970515428217e-06, + "loss": 22.1834, "step": 4220 }, { - "epoch": 0.25755384732475833, - "grad_norm": 465.75, - "learning_rate": 7.967805952854366e-06, - "loss": 23.1344, + "epoch": 0.5151076946495167, + "grad_norm": 248.375, + "learning_rate": 9.91951488213592e-06, + "loss": 22.3468, "step": 4230 }, { - "epoch": 0.2581627216683157, - "grad_norm": 512.5, - "learning_rate": 7.967729843995868e-06, - "loss": 23.103, + "epoch": 0.5163254433366314, + "grad_norm": 301.75, + "learning_rate": 9.91932460998967e-06, + "loss": 22.2697, "step": 4240 }, { - "epoch": 0.25877159601187305, - "grad_norm": 639.5, - "learning_rate": 7.967653735137367e-06, - "loss": 23.2826, + "epoch": 0.5175431920237461, + "grad_norm": 387.75, + "learning_rate": 9.919134337843419e-06, + "loss": 22.3615, "step": 4250 }, { - "epoch": 0.2593804703554304, - "grad_norm": 457.75, - "learning_rate": 7.967577626278867e-06, - "loss": 23.3039, + "epoch": 0.5187609407108608, + "grad_norm": 316.75, + "learning_rate": 9.918944065697168e-06, + "loss": 22.314, "step": 4260 }, { - "epoch": 0.25998934469898777, - "grad_norm": 405.25, - "learning_rate": 7.967501517420366e-06, - "loss": 23.2088, + "epoch": 0.5199786893979755, + "grad_norm": 310.0, + "learning_rate": 9.918753793550916e-06, + "loss": 22.5011, "step": 4270 }, { - "epoch": 0.2605982190425451, - "grad_norm": 672.5, - "learning_rate": 7.967425408561866e-06, - "loss": 23.4113, + "epoch": 0.5211964380850902, + "grad_norm": 431.5, + "learning_rate": 9.918563521404665e-06, + "loss": 22.4555, "step": 4280 }, { - "epoch": 0.26120709338610243, - "grad_norm": 788.0, - "learning_rate": 7.967349299703365e-06, - "loss": 23.2271, + "epoch": 0.5224141867722049, + "grad_norm": 377.0, + "learning_rate": 9.918373249258416e-06, + "loss": 22.8352, "step": 4290 }, { - "epoch": 0.2618159677296598, - "grad_norm": 1123.0, - "learning_rate": 7.967273190844865e-06, - "loss": 23.6025, + "epoch": 0.5236319354593196, + "grad_norm": 284.25, + "learning_rate": 9.918182977112165e-06, + "loss": 22.6719, "step": 4300 }, { - "epoch": 0.26242484207321715, - "grad_norm": 754.0, - "learning_rate": 7.967197081986364e-06, - "loss": 23.528, + "epoch": 0.5248496841464343, + "grad_norm": 281.75, + "learning_rate": 9.917992704965914e-06, + "loss": 22.5164, "step": 4310 }, { - "epoch": 0.2630337164167745, - "grad_norm": 577.0, - "learning_rate": 7.967120973127864e-06, - "loss": 22.7315, + "epoch": 0.526067432833549, + "grad_norm": 830.0, + "learning_rate": 9.917802432819662e-06, + "loss": 22.4646, "step": 4320 }, { - "epoch": 0.26364259076033186, - "grad_norm": 702.5, - "learning_rate": 7.967044864269363e-06, - "loss": 22.9606, + "epoch": 0.5272851815206637, + "grad_norm": 496.5, + "learning_rate": 9.917612160673413e-06, + "loss": 22.6187, "step": 4330 }, { - "epoch": 0.2642514651038892, - "grad_norm": 462.75, - "learning_rate": 7.966968755410863e-06, - "loss": 22.9621, + "epoch": 0.5285029302077784, + "grad_norm": 495.0, + "learning_rate": 9.917421888527162e-06, + "loss": 22.6751, "step": 4340 }, { - "epoch": 0.2648603394474465, - "grad_norm": 572.5, - "learning_rate": 7.966892646552364e-06, - "loss": 23.2063, + "epoch": 0.529720678894893, + "grad_norm": 428.0, + "learning_rate": 9.91723161638091e-06, + "loss": 22.1483, "step": 4350 }, { - "epoch": 0.26546921379100386, - "grad_norm": 578.5, - "learning_rate": 7.966816537693862e-06, - "loss": 23.5121, + "epoch": 0.5309384275820077, + "grad_norm": 286.5, + "learning_rate": 9.91704134423466e-06, + "loss": 22.5045, "step": 4360 }, { - "epoch": 0.26607808813456124, - "grad_norm": 470.5, - "learning_rate": 7.966740428835363e-06, - "loss": 23.4099, + "epoch": 0.5321561762691225, + "grad_norm": 601.0, + "learning_rate": 9.916851072088408e-06, + "loss": 22.6367, "step": 4370 }, { - "epoch": 0.26668696247811857, - "grad_norm": 445.0, - "learning_rate": 7.966664319976863e-06, - "loss": 23.2292, + "epoch": 0.5333739249562371, + "grad_norm": 403.0, + "learning_rate": 9.916660799942159e-06, + "loss": 22.7377, "step": 4380 }, { - "epoch": 0.2672958368216759, - "grad_norm": 450.0, - "learning_rate": 7.966588211118362e-06, - "loss": 23.3068, + "epoch": 0.5345916736433518, + "grad_norm": 295.75, + "learning_rate": 9.916470527795908e-06, + "loss": 22.26, "step": 4390 }, { - "epoch": 0.2679047111652333, - "grad_norm": 435.0, - "learning_rate": 7.966512102259862e-06, - "loss": 23.0844, + "epoch": 0.5358094223304666, + "grad_norm": 460.5, + "learning_rate": 9.916280255649657e-06, + "loss": 22.4242, "step": 4400 }, { - "epoch": 0.2685135855087906, - "grad_norm": 457.75, - "learning_rate": 7.966435993401362e-06, - "loss": 23.0199, + "epoch": 0.5370271710175812, + "grad_norm": 422.75, + "learning_rate": 9.916089983503406e-06, + "loss": 22.2663, "step": 4410 }, { - "epoch": 0.26912245985234795, - "grad_norm": 783.5, - "learning_rate": 7.966359884542861e-06, - "loss": 23.145, + "epoch": 0.5382449197046959, + "grad_norm": 498.0, + "learning_rate": 9.915899711357155e-06, + "loss": 22.7889, "step": 4420 }, { - "epoch": 0.26973133419590534, - "grad_norm": 460.75, - "learning_rate": 7.96628377568436e-06, - "loss": 23.0592, + "epoch": 0.5394626683918107, + "grad_norm": 316.75, + "learning_rate": 9.915709439210905e-06, + "loss": 22.488, "step": 4430 }, { - "epoch": 0.27034020853946267, - "grad_norm": 515.5, - "learning_rate": 7.96620766682586e-06, - "loss": 22.9736, + "epoch": 0.5406804170789253, + "grad_norm": 291.5, + "learning_rate": 9.915519167064654e-06, + "loss": 22.4644, "step": 4440 }, { - "epoch": 0.27094908288302, - "grad_norm": 446.0, - "learning_rate": 7.966131557967361e-06, - "loss": 23.3037, + "epoch": 0.54189816576604, + "grad_norm": 389.0, + "learning_rate": 9.915328894918403e-06, + "loss": 22.3433, "step": 4450 }, { - "epoch": 0.2715579572265774, - "grad_norm": 448.25, - "learning_rate": 7.96605544910886e-06, - "loss": 23.4446, + "epoch": 0.5431159144531548, + "grad_norm": 302.5, + "learning_rate": 9.915138622772152e-06, + "loss": 22.3713, "step": 4460 }, { - "epoch": 0.2721668315701347, - "grad_norm": 386.75, - "learning_rate": 7.96597934025036e-06, - "loss": 23.1174, + "epoch": 0.5443336631402694, + "grad_norm": 495.25, + "learning_rate": 9.914948350625902e-06, + "loss": 22.7835, "step": 4470 }, { - "epoch": 0.27277570591369205, - "grad_norm": 462.25, - "learning_rate": 7.965903231391858e-06, - "loss": 23.0895, + "epoch": 0.5455514118273841, + "grad_norm": 469.5, + "learning_rate": 9.914758078479651e-06, + "loss": 22.1895, "step": 4480 }, { - "epoch": 0.27338458025724943, - "grad_norm": 681.0, - "learning_rate": 7.96582712253336e-06, - "loss": 23.1573, + "epoch": 0.5467691605144989, + "grad_norm": 295.75, + "learning_rate": 9.9145678063334e-06, + "loss": 22.8347, "step": 4490 }, { - "epoch": 0.27399345460080676, - "grad_norm": 451.5, - "learning_rate": 7.96575101367486e-06, - "loss": 22.9773, + "epoch": 0.5479869092016135, + "grad_norm": 479.25, + "learning_rate": 9.914377534187149e-06, + "loss": 22.5627, "step": 4500 }, { - "epoch": 0.2746023289443641, - "grad_norm": 499.75, - "learning_rate": 7.965674904816359e-06, - "loss": 23.3175, + "epoch": 0.5492046578887282, + "grad_norm": 311.0, + "learning_rate": 9.914187262040898e-06, + "loss": 22.2682, "step": 4510 }, { - "epoch": 0.2752112032879215, - "grad_norm": 609.5, - "learning_rate": 7.965598795957858e-06, - "loss": 23.4505, + "epoch": 0.550422406575843, + "grad_norm": 896.5, + "learning_rate": 9.913996989894648e-06, + "loss": 22.2863, "step": 4520 }, { - "epoch": 0.2758200776314788, - "grad_norm": 423.25, - "learning_rate": 7.965522687099358e-06, - "loss": 23.1152, + "epoch": 0.5516401552629576, + "grad_norm": 618.5, + "learning_rate": 9.913806717748397e-06, + "loss": 22.7516, "step": 4530 }, { - "epoch": 0.27642895197503614, - "grad_norm": 360.5, - "learning_rate": 7.965446578240857e-06, - "loss": 23.0735, + "epoch": 0.5528579039500723, + "grad_norm": 747.0, + "learning_rate": 9.913616445602146e-06, + "loss": 22.3653, "step": 4540 }, { - "epoch": 0.2770378263185935, - "grad_norm": 491.25, - "learning_rate": 7.965370469382357e-06, - "loss": 23.3136, + "epoch": 0.554075652637187, + "grad_norm": 272.5, + "learning_rate": 9.913426173455895e-06, + "loss": 22.181, "step": 4550 }, { - "epoch": 0.27764670066215086, - "grad_norm": 464.0, - "learning_rate": 7.965294360523856e-06, - "loss": 23.1639, + "epoch": 0.5552934013243017, + "grad_norm": 505.5, + "learning_rate": 9.913235901309644e-06, + "loss": 22.4785, "step": 4560 }, { - "epoch": 0.2782555750057082, - "grad_norm": 521.5, - "learning_rate": 7.965218251665356e-06, - "loss": 23.3381, + "epoch": 0.5565111500114164, + "grad_norm": 636.0, + "learning_rate": 9.913045629163394e-06, + "loss": 22.508, "step": 4570 }, { - "epoch": 0.2788644493492655, - "grad_norm": 434.25, - "learning_rate": 7.965142142806856e-06, - "loss": 23.8722, + "epoch": 0.557728898698531, + "grad_norm": 339.0, + "learning_rate": 9.912855357017143e-06, + "loss": 22.3734, "step": 4580 }, { - "epoch": 0.2794733236928229, - "grad_norm": 613.5, - "learning_rate": 7.965066033948355e-06, - "loss": 22.7006, + "epoch": 0.5589466473856458, + "grad_norm": 447.5, + "learning_rate": 9.912665084870892e-06, + "loss": 22.115, "step": 4590 }, { - "epoch": 0.28008219803638024, - "grad_norm": 499.75, - "learning_rate": 7.964989925089856e-06, - "loss": 22.9187, + "epoch": 0.5601643960727605, + "grad_norm": 328.25, + "learning_rate": 9.912474812724641e-06, + "loss": 22.4416, "step": 4600 }, { - "epoch": 0.28069107237993757, - "grad_norm": 968.5, - "learning_rate": 7.964913816231356e-06, - "loss": 22.9807, + "epoch": 0.5613821447598751, + "grad_norm": 770.5, + "learning_rate": 9.91228454057839e-06, + "loss": 22.5458, "step": 4610 }, { - "epoch": 0.28129994672349495, - "grad_norm": 834.0, - "learning_rate": 7.964837707372855e-06, - "loss": 23.3458, + "epoch": 0.5625998934469899, + "grad_norm": 465.5, + "learning_rate": 9.91209426843214e-06, + "loss": 22.454, "step": 4620 }, { - "epoch": 0.2819088210670523, - "grad_norm": 442.75, - "learning_rate": 7.964761598514355e-06, - "loss": 23.0276, + "epoch": 0.5638176421341046, + "grad_norm": 348.0, + "learning_rate": 9.91190399628589e-06, + "loss": 22.1964, "step": 4630 }, { - "epoch": 0.2825176954106096, - "grad_norm": 648.5, - "learning_rate": 7.964685489655855e-06, - "loss": 23.0723, + "epoch": 0.5650353908212192, + "grad_norm": 997.5, + "learning_rate": 9.911713724139638e-06, + "loss": 22.3406, "step": 4640 }, { - "epoch": 0.283126569754167, - "grad_norm": 423.25, - "learning_rate": 7.964609380797354e-06, - "loss": 23.0752, + "epoch": 0.566253139508334, + "grad_norm": 500.75, + "learning_rate": 9.911523451993387e-06, + "loss": 22.1292, "step": 4650 }, { - "epoch": 0.28373544409772433, - "grad_norm": 472.5, - "learning_rate": 7.964533271938854e-06, - "loss": 23.1104, + "epoch": 0.5674708881954487, + "grad_norm": 355.0, + "learning_rate": 9.911333179847136e-06, + "loss": 22.3442, "step": 4660 }, { - "epoch": 0.28434431844128166, - "grad_norm": 595.5, - "learning_rate": 7.964457163080353e-06, - "loss": 23.3532, + "epoch": 0.5686886368825633, + "grad_norm": 433.75, + "learning_rate": 9.911142907700886e-06, + "loss": 22.1724, "step": 4670 }, { - "epoch": 0.28495319278483905, - "grad_norm": 347.0, - "learning_rate": 7.964381054221853e-06, - "loss": 23.0569, + "epoch": 0.5699063855696781, + "grad_norm": 310.0, + "learning_rate": 9.910952635554635e-06, + "loss": 22.4883, "step": 4680 }, { - "epoch": 0.2855620671283964, - "grad_norm": 417.75, - "learning_rate": 7.964304945363352e-06, - "loss": 23.4733, + "epoch": 0.5711241342567928, + "grad_norm": 398.0, + "learning_rate": 9.910762363408384e-06, + "loss": 22.3938, "step": 4690 }, { - "epoch": 0.2861709414719537, - "grad_norm": 461.5, - "learning_rate": 7.964228836504854e-06, - "loss": 23.3129, + "epoch": 0.5723418829439074, + "grad_norm": 308.75, + "learning_rate": 9.910572091262133e-06, + "loss": 22.2949, "step": 4700 }, { - "epoch": 0.2867798158155111, - "grad_norm": 498.25, - "learning_rate": 7.964152727646351e-06, - "loss": 23.4051, + "epoch": 0.5735596316310222, + "grad_norm": 715.0, + "learning_rate": 9.910381819115882e-06, + "loss": 22.4441, "step": 4710 }, { - "epoch": 0.2873886901590684, - "grad_norm": 506.5, - "learning_rate": 7.964076618787853e-06, - "loss": 23.1157, + "epoch": 0.5747773803181369, + "grad_norm": 526.0, + "learning_rate": 9.910191546969633e-06, + "loss": 22.2936, "step": 4720 }, { - "epoch": 0.28799756450262576, - "grad_norm": 687.5, - "learning_rate": 7.96400050992935e-06, - "loss": 23.1517, + "epoch": 0.5759951290052515, + "grad_norm": 285.5, + "learning_rate": 9.910001274823381e-06, + "loss": 22.4871, "step": 4730 }, { - "epoch": 0.28860643884618314, - "grad_norm": 490.25, - "learning_rate": 7.963924401070852e-06, - "loss": 22.461, + "epoch": 0.5772128776923663, + "grad_norm": 820.5, + "learning_rate": 9.90981100267713e-06, + "loss": 22.5226, "step": 4740 }, { - "epoch": 0.2892153131897405, - "grad_norm": 352.75, - "learning_rate": 7.963848292212351e-06, - "loss": 22.8928, + "epoch": 0.578430626379481, + "grad_norm": 763.5, + "learning_rate": 9.909620730530879e-06, + "loss": 22.6878, "step": 4750 }, { - "epoch": 0.2898241875332978, - "grad_norm": 523.5, - "learning_rate": 7.963772183353851e-06, - "loss": 23.0327, + "epoch": 0.5796483750665956, + "grad_norm": 271.0, + "learning_rate": 9.909430458384628e-06, + "loss": 22.1892, "step": 4760 }, { - "epoch": 0.29043306187685514, - "grad_norm": 426.5, - "learning_rate": 7.96369607449535e-06, - "loss": 22.9028, + "epoch": 0.5808661237537103, + "grad_norm": 344.5, + "learning_rate": 9.909240186238379e-06, + "loss": 22.5667, "step": 4770 }, { - "epoch": 0.2910419362204125, - "grad_norm": 664.0, - "learning_rate": 7.96361996563685e-06, - "loss": 23.0223, + "epoch": 0.582083872440825, + "grad_norm": 279.75, + "learning_rate": 9.909049914092127e-06, + "loss": 22.5652, "step": 4780 }, { - "epoch": 0.29165081056396985, - "grad_norm": 534.5, - "learning_rate": 7.96354385677835e-06, - "loss": 23.1204, + "epoch": 0.5833016211279397, + "grad_norm": 529.0, + "learning_rate": 9.908859641945876e-06, + "loss": 22.8018, "step": 4790 }, { - "epoch": 0.2922596849075272, - "grad_norm": 502.25, - "learning_rate": 7.963467747919849e-06, - "loss": 23.1276, + "epoch": 0.5845193698150544, + "grad_norm": 402.5, + "learning_rate": 9.908669369799625e-06, + "loss": 22.7684, "step": 4800 }, { - "epoch": 0.29286855925108457, - "grad_norm": 462.75, - "learning_rate": 7.963391639061349e-06, - "loss": 23.1368, + "epoch": 0.5857371185021691, + "grad_norm": 631.0, + "learning_rate": 9.908479097653374e-06, + "loss": 22.4754, "step": 4810 }, { - "epoch": 0.2934774335946419, - "grad_norm": 427.75, - "learning_rate": 7.963315530202848e-06, - "loss": 22.6855, + "epoch": 0.5869548671892838, + "grad_norm": 260.0, + "learning_rate": 9.908288825507125e-06, + "loss": 22.5717, "step": 4820 }, { - "epoch": 0.29408630793819923, - "grad_norm": 469.75, - "learning_rate": 7.963239421344348e-06, - "loss": 22.8931, + "epoch": 0.5881726158763985, + "grad_norm": 549.0, + "learning_rate": 9.908098553360874e-06, + "loss": 22.4253, "step": 4830 }, { - "epoch": 0.2946951822817566, - "grad_norm": 651.0, - "learning_rate": 7.963163312485849e-06, - "loss": 23.0368, + "epoch": 0.5893903645635132, + "grad_norm": 767.0, + "learning_rate": 9.907908281214622e-06, + "loss": 22.3855, "step": 4840 }, { - "epoch": 0.29530405662531395, - "grad_norm": 584.0, - "learning_rate": 7.963087203627348e-06, - "loss": 23.5478, + "epoch": 0.5906081132506279, + "grad_norm": 874.0, + "learning_rate": 9.907718009068371e-06, + "loss": 21.8794, "step": 4850 }, { - "epoch": 0.2959129309688713, - "grad_norm": 846.5, - "learning_rate": 7.963011094768848e-06, - "loss": 23.2312, + "epoch": 0.5918258619377426, + "grad_norm": 373.25, + "learning_rate": 9.90752773692212e-06, + "loss": 22.1089, "step": 4860 }, { - "epoch": 0.29652180531242867, - "grad_norm": 697.0, - "learning_rate": 7.962934985910348e-06, - "loss": 22.9755, + "epoch": 0.5930436106248573, + "grad_norm": 459.75, + "learning_rate": 9.90733746477587e-06, + "loss": 21.9316, "step": 4870 }, { - "epoch": 0.297130679655986, - "grad_norm": 686.5, - "learning_rate": 7.962858877051847e-06, - "loss": 23.1155, + "epoch": 0.594261359311972, + "grad_norm": 693.0, + "learning_rate": 9.90714719262962e-06, + "loss": 21.9847, "step": 4880 }, { - "epoch": 0.2977395539995433, - "grad_norm": 549.5, - "learning_rate": 7.962782768193347e-06, - "loss": 23.0959, + "epoch": 0.5954791079990867, + "grad_norm": 802.5, + "learning_rate": 9.906956920483368e-06, + "loss": 22.4072, "step": 4890 }, { - "epoch": 0.2983484283431007, - "grad_norm": 430.5, - "learning_rate": 7.962706659334846e-06, - "loss": 22.8863, + "epoch": 0.5966968566862014, + "grad_norm": 467.5, + "learning_rate": 9.906766648337117e-06, + "loss": 22.2952, "step": 4900 }, { - "epoch": 0.29895730268665804, - "grad_norm": 403.0, - "learning_rate": 7.962630550476346e-06, - "loss": 23.0113, + "epoch": 0.5979146053733161, + "grad_norm": 507.0, + "learning_rate": 9.906576376190866e-06, + "loss": 22.2678, "step": 4910 }, { - "epoch": 0.2995661770302154, - "grad_norm": 716.0, - "learning_rate": 7.962554441617845e-06, - "loss": 23.1476, + "epoch": 0.5991323540604307, + "grad_norm": 430.5, + "learning_rate": 9.906386104044617e-06, + "loss": 22.4595, "step": 4920 }, { - "epoch": 0.30017505137377276, - "grad_norm": 402.0, - "learning_rate": 7.962478332759345e-06, - "loss": 22.8651, + "epoch": 0.6003501027475455, + "grad_norm": 357.5, + "learning_rate": 9.906195831898366e-06, + "loss": 22.149, "step": 4930 }, { - "epoch": 0.3007839257173301, - "grad_norm": 439.0, - "learning_rate": 7.962402223900844e-06, - "loss": 22.6472, + "epoch": 0.6015678514346602, + "grad_norm": 518.0, + "learning_rate": 9.906005559752114e-06, + "loss": 22.2035, "step": 4940 }, { - "epoch": 0.3013928000608874, - "grad_norm": 448.0, - "learning_rate": 7.962326115042346e-06, - "loss": 23.111, + "epoch": 0.6027856001217748, + "grad_norm": 688.5, + "learning_rate": 9.905815287605863e-06, + "loss": 22.3291, "step": 4950 }, { - "epoch": 0.3020016744044448, - "grad_norm": 375.25, - "learning_rate": 7.962250006183844e-06, - "loss": 23.1311, + "epoch": 0.6040033488088896, + "grad_norm": 508.75, + "learning_rate": 9.905625015459612e-06, + "loss": 22.1793, "step": 4960 }, { - "epoch": 0.30261054874800214, - "grad_norm": 589.0, - "learning_rate": 7.962173897325345e-06, - "loss": 22.864, + "epoch": 0.6052210974960043, + "grad_norm": 575.0, + "learning_rate": 9.905434743313363e-06, + "loss": 22.5242, "step": 4970 }, { - "epoch": 0.30321942309155947, - "grad_norm": 468.75, - "learning_rate": 7.962097788466843e-06, - "loss": 22.9987, + "epoch": 0.6064388461831189, + "grad_norm": 760.5, + "learning_rate": 9.905244471167112e-06, + "loss": 22.1918, "step": 4980 }, { - "epoch": 0.3038282974351168, - "grad_norm": 608.5, - "learning_rate": 7.962021679608344e-06, - "loss": 23.2671, + "epoch": 0.6076565948702336, + "grad_norm": 335.75, + "learning_rate": 9.90505419902086e-06, + "loss": 22.12, "step": 4990 }, { - "epoch": 0.3044371717786742, - "grad_norm": 388.5, - "learning_rate": 7.961945570749843e-06, - "loss": 23.0071, + "epoch": 0.6088743435573484, + "grad_norm": 343.5, + "learning_rate": 9.90486392687461e-06, + "loss": 22.0701, "step": 5000 }, { - "epoch": 0.3044371717786742, - "eval_loss": 2.88149094581604, - "eval_runtime": 95.0877, - "eval_samples_per_second": 2267.548, - "eval_steps_per_second": 35.43, + "epoch": 0.6088743435573484, + "eval_loss": 2.798462390899658, + "eval_runtime": 95.0278, + "eval_samples_per_second": 2268.978, + "eval_steps_per_second": 35.453, "step": 5000 } ], "logging_steps": 10, - "max_steps": 16423, + "max_steps": 8211, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5000, @@ -3534,8 +3534,8 @@ "attributes": {} } }, - "total_flos": 8.72761379520512e+17, - "train_batch_size": 8, + "total_flos": 1.745522759041024e+18, + "train_batch_size": 16, "trial_name": null, "trial_params": null }