| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 6723, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004462293618920125, |
| "grad_norm": 3.7051006087750524, |
| "learning_rate": 1.337295690936107e-07, |
| "loss": 0.5444, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.00892458723784025, |
| "grad_norm": 3.048784847442942, |
| "learning_rate": 2.823179791976226e-07, |
| "loss": 0.5572, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.013386880856760375, |
| "grad_norm": 2.048957705438653, |
| "learning_rate": 4.309063893016345e-07, |
| "loss": 0.5423, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0178491744756805, |
| "grad_norm": 1.461567031084094, |
| "learning_rate": 5.794947994056464e-07, |
| "loss": 0.5134, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.022311468094600623, |
| "grad_norm": 1.0103720435475612, |
| "learning_rate": 7.280832095096584e-07, |
| "loss": 0.4907, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.02677376171352075, |
| "grad_norm": 0.6889844403965782, |
| "learning_rate": 8.766716196136702e-07, |
| "loss": 0.4818, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.031236055332440876, |
| "grad_norm": 0.5268842063919165, |
| "learning_rate": 1.025260029717682e-06, |
| "loss": 0.4483, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.035698348951361, |
| "grad_norm": 0.46396807974468, |
| "learning_rate": 1.173848439821694e-06, |
| "loss": 0.4457, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.040160642570281124, |
| "grad_norm": 0.45432241367642123, |
| "learning_rate": 1.322436849925706e-06, |
| "loss": 0.4398, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.04462293618920125, |
| "grad_norm": 0.3566878144571345, |
| "learning_rate": 1.4710252600297179e-06, |
| "loss": 0.4295, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.049085229808121376, |
| "grad_norm": 0.3076647669838012, |
| "learning_rate": 1.6196136701337297e-06, |
| "loss": 0.4126, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0535475234270415, |
| "grad_norm": 0.30260053228577904, |
| "learning_rate": 1.7682020802377416e-06, |
| "loss": 0.4021, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.05800981704596162, |
| "grad_norm": 0.3122317267903143, |
| "learning_rate": 1.9167904903417537e-06, |
| "loss": 0.3946, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.06247211066488175, |
| "grad_norm": 0.3476751821982303, |
| "learning_rate": 2.0653789004457653e-06, |
| "loss": 0.3965, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.06693440428380187, |
| "grad_norm": 0.33812027755011803, |
| "learning_rate": 2.213967310549777e-06, |
| "loss": 0.4031, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.071396697902722, |
| "grad_norm": 0.3120124573653305, |
| "learning_rate": 2.362555720653789e-06, |
| "loss": 0.3984, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.07585899152164212, |
| "grad_norm": 0.32219654623219857, |
| "learning_rate": 2.511144130757801e-06, |
| "loss": 0.382, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.08032128514056225, |
| "grad_norm": 0.37321844110875607, |
| "learning_rate": 2.659732540861813e-06, |
| "loss": 0.397, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.08478357875948238, |
| "grad_norm": 0.34121006704676865, |
| "learning_rate": 2.808320950965825e-06, |
| "loss": 0.3877, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0892458723784025, |
| "grad_norm": 0.34259431719098377, |
| "learning_rate": 2.956909361069837e-06, |
| "loss": 0.3778, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.09370816599732262, |
| "grad_norm": 0.4634153760914143, |
| "learning_rate": 3.1054977711738487e-06, |
| "loss": 0.3838, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.09817045961624275, |
| "grad_norm": 0.34508537944318013, |
| "learning_rate": 3.2540861812778607e-06, |
| "loss": 0.3823, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.10263275323516287, |
| "grad_norm": 0.38159809656131194, |
| "learning_rate": 3.4026745913818724e-06, |
| "loss": 0.3641, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.107095046854083, |
| "grad_norm": 0.3204989890164747, |
| "learning_rate": 3.5512630014858845e-06, |
| "loss": 0.3819, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.11155734047300313, |
| "grad_norm": 0.3576572957193155, |
| "learning_rate": 3.6998514115898966e-06, |
| "loss": 0.3727, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.11601963409192324, |
| "grad_norm": 0.35700198129793526, |
| "learning_rate": 3.848439821693909e-06, |
| "loss": 0.3563, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.12048192771084337, |
| "grad_norm": 0.3192301931360082, |
| "learning_rate": 3.99702823179792e-06, |
| "loss": 0.3738, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.1249442213297635, |
| "grad_norm": 0.3196766826358136, |
| "learning_rate": 4.145616641901932e-06, |
| "loss": 0.3587, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.12940651494868363, |
| "grad_norm": 0.610281080707191, |
| "learning_rate": 4.294205052005943e-06, |
| "loss": 0.3664, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.13386880856760375, |
| "grad_norm": 0.35583167134748195, |
| "learning_rate": 4.442793462109955e-06, |
| "loss": 0.3631, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.13833110218652386, |
| "grad_norm": 0.3595334153498753, |
| "learning_rate": 4.591381872213967e-06, |
| "loss": 0.3764, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.142793395805444, |
| "grad_norm": 0.3524084490333156, |
| "learning_rate": 4.7399702823179795e-06, |
| "loss": 0.3644, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.14725568942436412, |
| "grad_norm": 0.3288515185706153, |
| "learning_rate": 4.8885586924219916e-06, |
| "loss": 0.3661, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.15171798304328424, |
| "grad_norm": 0.3351573861983467, |
| "learning_rate": 5.037147102526004e-06, |
| "loss": 0.3532, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.15618027666220438, |
| "grad_norm": 0.33700658750684853, |
| "learning_rate": 5.185735512630016e-06, |
| "loss": 0.3531, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.1606425702811245, |
| "grad_norm": 0.32516141156935247, |
| "learning_rate": 5.334323922734027e-06, |
| "loss": 0.3473, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.1651048639000446, |
| "grad_norm": 0.3337986135592338, |
| "learning_rate": 5.48291233283804e-06, |
| "loss": 0.3511, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.16956715751896476, |
| "grad_norm": 0.3323774922346478, |
| "learning_rate": 5.631500742942051e-06, |
| "loss": 0.3586, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.17402945113788487, |
| "grad_norm": 0.4792457547540971, |
| "learning_rate": 5.780089153046062e-06, |
| "loss": 0.3624, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.178491744756805, |
| "grad_norm": 0.42063892972841144, |
| "learning_rate": 5.9286775631500745e-06, |
| "loss": 0.3608, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.18295403837572513, |
| "grad_norm": 1.0840249935976123, |
| "learning_rate": 6.0772659732540865e-06, |
| "loss": 0.3602, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.18741633199464525, |
| "grad_norm": 0.3558589627850871, |
| "learning_rate": 6.225854383358099e-06, |
| "loss": 0.3514, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.19187862561356536, |
| "grad_norm": 0.40948391676162177, |
| "learning_rate": 6.37444279346211e-06, |
| "loss": 0.3568, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.1963409192324855, |
| "grad_norm": 0.33870866081421813, |
| "learning_rate": 6.523031203566123e-06, |
| "loss": 0.3475, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.20080321285140562, |
| "grad_norm": 0.3491791728647271, |
| "learning_rate": 6.671619613670134e-06, |
| "loss": 0.3455, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.20526550647032574, |
| "grad_norm": 0.35294486681555637, |
| "learning_rate": 6.820208023774146e-06, |
| "loss": 0.358, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.20972780008924588, |
| "grad_norm": 0.3561095555301197, |
| "learning_rate": 6.968796433878157e-06, |
| "loss": 0.3484, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.214190093708166, |
| "grad_norm": 0.37400205186538277, |
| "learning_rate": 7.11738484398217e-06, |
| "loss": 0.3512, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.2186523873270861, |
| "grad_norm": 0.368948524072989, |
| "learning_rate": 7.2659732540861815e-06, |
| "loss": 0.3564, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.22311468094600626, |
| "grad_norm": 0.31992893707470843, |
| "learning_rate": 7.4145616641901944e-06, |
| "loss": 0.3491, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.22757697456492637, |
| "grad_norm": 0.3196176367911485, |
| "learning_rate": 7.563150074294206e-06, |
| "loss": 0.3465, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.2320392681838465, |
| "grad_norm": 0.36129211410497347, |
| "learning_rate": 7.711738484398219e-06, |
| "loss": 0.3406, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.23650156180276663, |
| "grad_norm": 0.3545213437105017, |
| "learning_rate": 7.86032689450223e-06, |
| "loss": 0.3445, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.24096385542168675, |
| "grad_norm": 0.35389453665592974, |
| "learning_rate": 8.008915304606241e-06, |
| "loss": 0.3459, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.24542614904060686, |
| "grad_norm": 0.3543572102436423, |
| "learning_rate": 8.157503714710252e-06, |
| "loss": 0.3412, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.249888442659527, |
| "grad_norm": 0.36271347062518616, |
| "learning_rate": 8.306092124814265e-06, |
| "loss": 0.3387, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.2543507362784471, |
| "grad_norm": 0.4426754614509785, |
| "learning_rate": 8.454680534918276e-06, |
| "loss": 0.3491, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.25881302989736726, |
| "grad_norm": 0.39369474546630295, |
| "learning_rate": 8.60326894502229e-06, |
| "loss": 0.3384, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.26327532351628735, |
| "grad_norm": 0.3582656486286681, |
| "learning_rate": 8.7518573551263e-06, |
| "loss": 0.3544, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.2677376171352075, |
| "grad_norm": 0.34957569136942046, |
| "learning_rate": 8.900445765230312e-06, |
| "loss": 0.3429, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.27219991075412764, |
| "grad_norm": 0.3945306585524228, |
| "learning_rate": 9.049034175334325e-06, |
| "loss": 0.3478, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.2766622043730477, |
| "grad_norm": 0.3424763037614921, |
| "learning_rate": 9.197622585438336e-06, |
| "loss": 0.3312, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.28112449799196787, |
| "grad_norm": 0.3459851560845357, |
| "learning_rate": 9.346210995542349e-06, |
| "loss": 0.3495, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.285586791610888, |
| "grad_norm": 0.3744360789694989, |
| "learning_rate": 9.49479940564636e-06, |
| "loss": 0.3483, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.2900490852298081, |
| "grad_norm": 0.35814716631290894, |
| "learning_rate": 9.643387815750373e-06, |
| "loss": 0.341, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.29451137884872824, |
| "grad_norm": 0.5299368509807092, |
| "learning_rate": 9.791976225854384e-06, |
| "loss": 0.3365, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.2989736724676484, |
| "grad_norm": 0.38875418783791804, |
| "learning_rate": 9.940564635958396e-06, |
| "loss": 0.3424, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.3034359660865685, |
| "grad_norm": 0.45411665098963555, |
| "learning_rate": 9.999975732158253e-06, |
| "loss": 0.3368, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.3078982597054886, |
| "grad_norm": 0.3825559069839744, |
| "learning_rate": 9.999827429534007e-06, |
| "loss": 0.3389, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.31236055332440876, |
| "grad_norm": 0.36176618602239424, |
| "learning_rate": 9.999544310413833e-06, |
| "loss": 0.3395, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.31682284694332885, |
| "grad_norm": 0.40796102054124517, |
| "learning_rate": 9.999126382431823e-06, |
| "loss": 0.3435, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.321285140562249, |
| "grad_norm": 0.3829565760724817, |
| "learning_rate": 9.998573656857085e-06, |
| "loss": 0.3339, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.32574743418116914, |
| "grad_norm": 0.40664177051788203, |
| "learning_rate": 9.997886148593436e-06, |
| "loss": 0.3317, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.3302097278000892, |
| "grad_norm": 0.3640080288216767, |
| "learning_rate": 9.997063876179007e-06, |
| "loss": 0.3452, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.33467202141900937, |
| "grad_norm": 0.41068702259937845, |
| "learning_rate": 9.996106861785741e-06, |
| "loss": 0.3388, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.3391343150379295, |
| "grad_norm": 0.3410374798720669, |
| "learning_rate": 9.995015131218794e-06, |
| "loss": 0.349, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.3435966086568496, |
| "grad_norm": 0.37561698032089047, |
| "learning_rate": 9.99378871391584e-06, |
| "loss": 0.3375, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.34805890227576974, |
| "grad_norm": 0.3844118776195893, |
| "learning_rate": 9.992427642946278e-06, |
| "loss": 0.3339, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.3525211958946899, |
| "grad_norm": 0.35418994101086143, |
| "learning_rate": 9.990931955010335e-06, |
| "loss": 0.3373, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.35698348951361, |
| "grad_norm": 0.3109086430490103, |
| "learning_rate": 9.989301690438087e-06, |
| "loss": 0.3255, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.3614457831325301, |
| "grad_norm": 0.35125455281231016, |
| "learning_rate": 9.987536893188363e-06, |
| "loss": 0.3385, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.36590807675145026, |
| "grad_norm": 0.35539785853231953, |
| "learning_rate": 9.98563761084756e-06, |
| "loss": 0.3334, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.37037037037037035, |
| "grad_norm": 0.8732726071617729, |
| "learning_rate": 9.983603894628366e-06, |
| "loss": 0.3216, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.3748326639892905, |
| "grad_norm": 0.3239156286291644, |
| "learning_rate": 9.98143579936837e-06, |
| "loss": 0.3395, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.37929495760821064, |
| "grad_norm": 0.35416799423504325, |
| "learning_rate": 9.979133383528591e-06, |
| "loss": 0.3419, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.3837572512271307, |
| "grad_norm": 0.3697582867922802, |
| "learning_rate": 9.976696709191899e-06, |
| "loss": 0.3385, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.38821954484605087, |
| "grad_norm": 0.31652698452061007, |
| "learning_rate": 9.974125842061343e-06, |
| "loss": 0.3415, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.392681838464971, |
| "grad_norm": 0.3806737592473093, |
| "learning_rate": 9.971420851458373e-06, |
| "loss": 0.34, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.3971441320838911, |
| "grad_norm": 0.3493066568615837, |
| "learning_rate": 9.968581810320979e-06, |
| "loss": 0.3435, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.40160642570281124, |
| "grad_norm": 0.3441435892323655, |
| "learning_rate": 9.965608795201717e-06, |
| "loss": 0.327, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.4060687193217314, |
| "grad_norm": 0.3347599296485585, |
| "learning_rate": 9.96250188626565e-06, |
| "loss": 0.3356, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.4105310129406515, |
| "grad_norm": 0.30962222868754863, |
| "learning_rate": 9.959261167288185e-06, |
| "loss": 0.3279, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.4149933065595716, |
| "grad_norm": 0.48278711145118325, |
| "learning_rate": 9.955886725652815e-06, |
| "loss": 0.3358, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.41945560017849176, |
| "grad_norm": 0.33458745369628223, |
| "learning_rate": 9.952378652348758e-06, |
| "loss": 0.3359, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.42391789379741185, |
| "grad_norm": 0.36375248941574195, |
| "learning_rate": 9.948737041968509e-06, |
| "loss": 0.333, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.428380187416332, |
| "grad_norm": 0.38000071048511164, |
| "learning_rate": 9.944961992705288e-06, |
| "loss": 0.3393, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.43284248103525214, |
| "grad_norm": 0.3322040446425915, |
| "learning_rate": 9.941053606350389e-06, |
| "loss": 0.3289, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.4373047746541722, |
| "grad_norm": 0.31368274498497106, |
| "learning_rate": 9.937011988290443e-06, |
| "loss": 0.323, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.44176706827309237, |
| "grad_norm": 0.33379459029750647, |
| "learning_rate": 9.932837247504566e-06, |
| "loss": 0.3332, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.4462293618920125, |
| "grad_norm": 0.3481057681661188, |
| "learning_rate": 9.928529496561428e-06, |
| "loss": 0.3249, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.4506916555109326, |
| "grad_norm": 0.3805120805185267, |
| "learning_rate": 9.924088851616216e-06, |
| "loss": 0.3332, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.45515394912985274, |
| "grad_norm": 0.3514086594689373, |
| "learning_rate": 9.919515432407502e-06, |
| "loss": 0.3329, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.4596162427487729, |
| "grad_norm": 0.41618025357077787, |
| "learning_rate": 9.914809362254013e-06, |
| "loss": 0.3387, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.464078536367693, |
| "grad_norm": 0.3777422639135785, |
| "learning_rate": 9.909970768051302e-06, |
| "loss": 0.3359, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.4685408299866131, |
| "grad_norm": 0.32565404467366765, |
| "learning_rate": 9.904999780268341e-06, |
| "loss": 0.3239, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.47300312360553326, |
| "grad_norm": 0.33049786269193765, |
| "learning_rate": 9.899896532943983e-06, |
| "loss": 0.3347, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.47746541722445335, |
| "grad_norm": 0.2971213231433049, |
| "learning_rate": 9.894661163683361e-06, |
| "loss": 0.3256, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.4819277108433735, |
| "grad_norm": 0.3265643495101442, |
| "learning_rate": 9.889293813654179e-06, |
| "loss": 0.3325, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.48639000446229363, |
| "grad_norm": 0.3291514794248415, |
| "learning_rate": 9.883794627582893e-06, |
| "loss": 0.327, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.4908522980812137, |
| "grad_norm": 0.3243181563194639, |
| "learning_rate": 9.878163753750823e-06, |
| "loss": 0.3311, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.49531459170013387, |
| "grad_norm": 0.31360700475820674, |
| "learning_rate": 9.872401343990143e-06, |
| "loss": 0.3349, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.499776885319054, |
| "grad_norm": 0.3427769144056513, |
| "learning_rate": 9.866507553679797e-06, |
| "loss": 0.3302, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.5042391789379741, |
| "grad_norm": 0.3268750022359849, |
| "learning_rate": 9.860482541741298e-06, |
| "loss": 0.3387, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.5087014725568942, |
| "grad_norm": 0.36779760794001126, |
| "learning_rate": 9.854326470634454e-06, |
| "loss": 0.3204, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.5131637661758144, |
| "grad_norm": 0.3437763178921102, |
| "learning_rate": 9.848039506352982e-06, |
| "loss": 0.314, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.5176260597947345, |
| "grad_norm": 0.3146556406530087, |
| "learning_rate": 9.841621818420032e-06, |
| "loss": 0.3291, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.5220883534136547, |
| "grad_norm": 0.3372062278564171, |
| "learning_rate": 9.835073579883614e-06, |
| "loss": 0.3184, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.5265506470325747, |
| "grad_norm": 0.3279517665081858, |
| "learning_rate": 9.82839496731194e-06, |
| "loss": 0.3301, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.5310129406514948, |
| "grad_norm": 0.35435668550704597, |
| "learning_rate": 9.821586160788652e-06, |
| "loss": 0.3192, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.535475234270415, |
| "grad_norm": 0.39621154055386365, |
| "learning_rate": 9.814647343907975e-06, |
| "loss": 0.3237, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.5399375278893351, |
| "grad_norm": 0.314845881198322, |
| "learning_rate": 9.807578703769763e-06, |
| "loss": 0.317, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.5443998215082553, |
| "grad_norm": 0.30404528418981586, |
| "learning_rate": 9.80038043097445e-06, |
| "loss": 0.327, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.5488621151271754, |
| "grad_norm": 0.34277234804316314, |
| "learning_rate": 9.793052719617921e-06, |
| "loss": 0.3305, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.5533244087460955, |
| "grad_norm": 0.315701512000068, |
| "learning_rate": 9.78559576728627e-06, |
| "loss": 0.3159, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.5577867023650156, |
| "grad_norm": 0.4064220753807774, |
| "learning_rate": 9.77800977505047e-06, |
| "loss": 0.3222, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.5622489959839357, |
| "grad_norm": 0.38345533251016317, |
| "learning_rate": 9.770294947460964e-06, |
| "loss": 0.3155, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.5667112896028559, |
| "grad_norm": 0.3666235687309694, |
| "learning_rate": 9.76245149254213e-06, |
| "loss": 0.3223, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.571173583221776, |
| "grad_norm": 0.3254173561944929, |
| "learning_rate": 9.754479621786694e-06, |
| "loss": 0.3253, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.5756358768406962, |
| "grad_norm": 0.36867900317972835, |
| "learning_rate": 9.74637955015001e-06, |
| "loss": 0.3234, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.5800981704596162, |
| "grad_norm": 0.3205667116149489, |
| "learning_rate": 9.738151496044268e-06, |
| "loss": 0.3238, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.5845604640785363, |
| "grad_norm": 0.35181980934522605, |
| "learning_rate": 9.729795681332616e-06, |
| "loss": 0.3212, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.5890227576974565, |
| "grad_norm": 0.3157343074157218, |
| "learning_rate": 9.721312331323159e-06, |
| "loss": 0.324, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.5934850513163766, |
| "grad_norm": 0.3267527618647797, |
| "learning_rate": 9.712701674762894e-06, |
| "loss": 0.3293, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.5979473449352968, |
| "grad_norm": 0.3709092454966587, |
| "learning_rate": 9.703963943831548e-06, |
| "loss": 0.3195, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.6024096385542169, |
| "grad_norm": 0.3404725913270717, |
| "learning_rate": 9.695099374135304e-06, |
| "loss": 0.3261, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.606871932173137, |
| "grad_norm": 0.33688175782559104, |
| "learning_rate": 9.686108204700456e-06, |
| "loss": 0.322, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.6113342257920571, |
| "grad_norm": 0.3264040693280103, |
| "learning_rate": 9.676990677966965e-06, |
| "loss": 0.3262, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.6157965194109772, |
| "grad_norm": 0.2946497602423895, |
| "learning_rate": 9.667747039781915e-06, |
| "loss": 0.3204, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.6202588130298974, |
| "grad_norm": 0.3247321923979366, |
| "learning_rate": 9.65837753939289e-06, |
| "loss": 0.3115, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.6247211066488175, |
| "grad_norm": 0.3609340508110271, |
| "learning_rate": 9.648882429441258e-06, |
| "loss": 0.3299, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.6291834002677377, |
| "grad_norm": 0.3140886682745277, |
| "learning_rate": 9.639261965955339e-06, |
| "loss": 0.3204, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.6336456938866577, |
| "grad_norm": 0.30604068166540555, |
| "learning_rate": 9.629516408343524e-06, |
| "loss": 0.3296, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.6381079875055778, |
| "grad_norm": 0.34254988617650395, |
| "learning_rate": 9.619646019387269e-06, |
| "loss": 0.3291, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.642570281124498, |
| "grad_norm": 0.34269558101387265, |
| "learning_rate": 9.609651065234008e-06, |
| "loss": 0.3308, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.6470325747434181, |
| "grad_norm": 0.3110380628077838, |
| "learning_rate": 9.599531815389987e-06, |
| "loss": 0.3231, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.6514948683623383, |
| "grad_norm": 0.3543086604772253, |
| "learning_rate": 9.589288542712978e-06, |
| "loss": 0.315, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.6559571619812584, |
| "grad_norm": 0.29688329030679655, |
| "learning_rate": 9.578921523404943e-06, |
| "loss": 0.3212, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.6604194556001785, |
| "grad_norm": 0.6477411665067504, |
| "learning_rate": 9.568431037004574e-06, |
| "loss": 0.3184, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.6648817492190986, |
| "grad_norm": 0.4105291056831329, |
| "learning_rate": 9.557817366379756e-06, |
| "loss": 0.3316, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.6693440428380187, |
| "grad_norm": 0.3108965775539251, |
| "learning_rate": 9.547080797719943e-06, |
| "loss": 0.3292, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.6738063364569389, |
| "grad_norm": 0.2925176192434652, |
| "learning_rate": 9.536221620528442e-06, |
| "loss": 0.3188, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.678268630075859, |
| "grad_norm": 0.32680827418487335, |
| "learning_rate": 9.5252401276146e-06, |
| "loss": 0.3189, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.6827309236947792, |
| "grad_norm": 0.31001306605003415, |
| "learning_rate": 9.514136615085917e-06, |
| "loss": 0.3246, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.6871932173136992, |
| "grad_norm": 0.3232212027929326, |
| "learning_rate": 9.502911382340056e-06, |
| "loss": 0.3223, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.6916555109326193, |
| "grad_norm": 0.280224760234745, |
| "learning_rate": 9.491564732056775e-06, |
| "loss": 0.3144, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.6961178045515395, |
| "grad_norm": 0.3237056713223196, |
| "learning_rate": 9.480096970189756e-06, |
| "loss": 0.3182, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.7005800981704596, |
| "grad_norm": 0.31882269101613014, |
| "learning_rate": 9.46850840595837e-06, |
| "loss": 0.3223, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.7050423917893798, |
| "grad_norm": 0.32578619260595243, |
| "learning_rate": 9.456799351839324e-06, |
| "loss": 0.3176, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.7095046854082999, |
| "grad_norm": 0.3385848752838862, |
| "learning_rate": 9.44497012355824e-06, |
| "loss": 0.3284, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.71396697902722, |
| "grad_norm": 0.3130273460464875, |
| "learning_rate": 9.433021040081159e-06, |
| "loss": 0.3195, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.7184292726461401, |
| "grad_norm": 0.3516738754933172, |
| "learning_rate": 9.420952423605904e-06, |
| "loss": 0.3267, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.7228915662650602, |
| "grad_norm": 0.3251716278941906, |
| "learning_rate": 9.408764599553429e-06, |
| "loss": 0.318, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.7273538598839804, |
| "grad_norm": 0.32226390759863277, |
| "learning_rate": 9.396457896559021e-06, |
| "loss": 0.3201, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.7318161535029005, |
| "grad_norm": 0.3035427600136032, |
| "learning_rate": 9.384032646463448e-06, |
| "loss": 0.3176, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.7362784471218207, |
| "grad_norm": 0.35729461477248853, |
| "learning_rate": 9.37148918430401e-06, |
| "loss": 0.3191, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.7407407407407407, |
| "grad_norm": 0.3103181941339719, |
| "learning_rate": 9.358827848305502e-06, |
| "loss": 0.3218, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.7452030343596608, |
| "grad_norm": 0.34994784215316105, |
| "learning_rate": 9.346048979871098e-06, |
| "loss": 0.3212, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.749665327978581, |
| "grad_norm": 0.3270663588481017, |
| "learning_rate": 9.333152923573146e-06, |
| "loss": 0.3194, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.7541276215975011, |
| "grad_norm": 0.3017394614877857, |
| "learning_rate": 9.320140027143869e-06, |
| "loss": 0.3244, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.7585899152164213, |
| "grad_norm": 0.3371418446207733, |
| "learning_rate": 9.307010641466e-06, |
| "loss": 0.3223, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.7630522088353414, |
| "grad_norm": 0.36102711717253444, |
| "learning_rate": 9.293765120563309e-06, |
| "loss": 0.3235, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.7675145024542614, |
| "grad_norm": 0.3007012064707952, |
| "learning_rate": 9.280403821591066e-06, |
| "loss": 0.3208, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.7719767960731816, |
| "grad_norm": 0.2993075622208931, |
| "learning_rate": 9.26692710482641e-06, |
| "loss": 0.3242, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.7764390896921017, |
| "grad_norm": 0.31517853620369296, |
| "learning_rate": 9.253335333658627e-06, |
| "loss": 0.3209, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.7809013833110219, |
| "grad_norm": 0.30592785176098447, |
| "learning_rate": 9.239628874579359e-06, |
| "loss": 0.3211, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.785363676929942, |
| "grad_norm": 0.3511159488999007, |
| "learning_rate": 9.22580809717272e-06, |
| "loss": 0.3159, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.7898259705488622, |
| "grad_norm": 0.3207835963226278, |
| "learning_rate": 9.211873374105325e-06, |
| "loss": 0.323, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.7942882641677822, |
| "grad_norm": 0.3169720632937591, |
| "learning_rate": 9.197825081116255e-06, |
| "loss": 0.3165, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.7987505577867023, |
| "grad_norm": 0.32507773728896977, |
| "learning_rate": 9.183663597006904e-06, |
| "loss": 0.3154, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.8032128514056225, |
| "grad_norm": 0.29822880733968704, |
| "learning_rate": 9.169389303630792e-06, |
| "loss": 0.3202, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.8076751450245426, |
| "grad_norm": 0.27944537447466095, |
| "learning_rate": 9.155002585883238e-06, |
| "loss": 0.3194, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.8121374386434628, |
| "grad_norm": 0.3257811138318699, |
| "learning_rate": 9.140503831691014e-06, |
| "loss": 0.319, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.8165997322623829, |
| "grad_norm": 0.38751538240318106, |
| "learning_rate": 9.125893432001856e-06, |
| "loss": 0.324, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.821062025881303, |
| "grad_norm": 0.35676044959951964, |
| "learning_rate": 9.111171780773938e-06, |
| "loss": 0.3207, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.8255243195002231, |
| "grad_norm": 0.33066533272987186, |
| "learning_rate": 9.096339274965248e-06, |
| "loss": 0.3225, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.8299866131191432, |
| "grad_norm": 0.30077549406173915, |
| "learning_rate": 9.081396314522883e-06, |
| "loss": 0.3156, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.8344489067380634, |
| "grad_norm": 0.4455200746218136, |
| "learning_rate": 9.066343302372262e-06, |
| "loss": 0.3208, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.8389112003569835, |
| "grad_norm": 0.29588329863262136, |
| "learning_rate": 9.051180644406265e-06, |
| "loss": 0.3158, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.8433734939759037, |
| "grad_norm": 0.34195626405047574, |
| "learning_rate": 9.035908749474286e-06, |
| "loss": 0.3048, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.8478357875948237, |
| "grad_norm": 0.6420035384465244, |
| "learning_rate": 9.020528029371209e-06, |
| "loss": 0.3072, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.8522980812137438, |
| "grad_norm": 0.3087614249615553, |
| "learning_rate": 9.005038898826307e-06, |
| "loss": 0.3178, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.856760374832664, |
| "grad_norm": 0.31279375725653125, |
| "learning_rate": 8.989441775492054e-06, |
| "loss": 0.3187, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.8612226684515841, |
| "grad_norm": 0.31745786814951604, |
| "learning_rate": 8.97373707993287e-06, |
| "loss": 0.3138, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.8656849620705043, |
| "grad_norm": 0.32729085562805416, |
| "learning_rate": 8.957925235613774e-06, |
| "loss": 0.3186, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.8701472556894244, |
| "grad_norm": 0.29401213857632397, |
| "learning_rate": 8.942006668888972e-06, |
| "loss": 0.3209, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.8746095493083444, |
| "grad_norm": 0.35071321892553325, |
| "learning_rate": 8.925981808990353e-06, |
| "loss": 0.3067, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.8790718429272646, |
| "grad_norm": 0.3254090010272678, |
| "learning_rate": 8.909851088015929e-06, |
| "loss": 0.319, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.8835341365461847, |
| "grad_norm": 0.31143570006446, |
| "learning_rate": 8.89361494091816e-06, |
| "loss": 0.3173, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.8879964301651049, |
| "grad_norm": 0.3368629213736036, |
| "learning_rate": 8.877273805492251e-06, |
| "loss": 0.3142, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.892458723784025, |
| "grad_norm": 0.32123265502775333, |
| "learning_rate": 8.860828122364333e-06, |
| "loss": 0.3114, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.8969210174029452, |
| "grad_norm": 0.32040081950305627, |
| "learning_rate": 8.844278334979587e-06, |
| "loss": 0.3173, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.9013833110218652, |
| "grad_norm": 0.31575327422413973, |
| "learning_rate": 8.82762488959028e-06, |
| "loss": 0.3161, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.9058456046407853, |
| "grad_norm": 0.2756449995981234, |
| "learning_rate": 8.810868235243746e-06, |
| "loss": 0.3127, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.9103078982597055, |
| "grad_norm": 0.3264003117051716, |
| "learning_rate": 8.79400882377026e-06, |
| "loss": 0.3122, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.9147701918786256, |
| "grad_norm": 0.32023283877896064, |
| "learning_rate": 8.777047109770872e-06, |
| "loss": 0.3161, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.9192324854975458, |
| "grad_norm": 0.32023324438450435, |
| "learning_rate": 8.759983550605132e-06, |
| "loss": 0.3075, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.9236947791164659, |
| "grad_norm": 0.3495626563053784, |
| "learning_rate": 8.74281860637877e-06, |
| "loss": 0.321, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.928157072735386, |
| "grad_norm": 0.365734666051995, |
| "learning_rate": 8.725552739931295e-06, |
| "loss": 0.3172, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.9326193663543061, |
| "grad_norm": 0.5195262244868563, |
| "learning_rate": 8.70818641682349e-06, |
| "loss": 0.3114, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.9370816599732262, |
| "grad_norm": 0.31318241394583657, |
| "learning_rate": 8.690720105324887e-06, |
| "loss": 0.3145, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.9415439535921464, |
| "grad_norm": 0.30511794190937597, |
| "learning_rate": 8.673154276401123e-06, |
| "loss": 0.3107, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.9460062472110665, |
| "grad_norm": 0.33299848484970584, |
| "learning_rate": 8.655489403701244e-06, |
| "loss": 0.3199, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.9504685408299867, |
| "grad_norm": 0.2918060481120866, |
| "learning_rate": 8.63772596354494e-06, |
| "loss": 0.3241, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.9549308344489067, |
| "grad_norm": 0.3267067284562192, |
| "learning_rate": 8.619864434909692e-06, |
| "loss": 0.3144, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.9593931280678268, |
| "grad_norm": 0.3620884501681288, |
| "learning_rate": 8.601905299417865e-06, |
| "loss": 0.3064, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.963855421686747, |
| "grad_norm": 0.3385652298929547, |
| "learning_rate": 8.583849041323717e-06, |
| "loss": 0.3167, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.9683177153056671, |
| "grad_norm": 0.31358881851824566, |
| "learning_rate": 8.565696147500338e-06, |
| "loss": 0.3173, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.9727800089245873, |
| "grad_norm": 0.26819324932064814, |
| "learning_rate": 8.54744710742653e-06, |
| "loss": 0.3098, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.9772423025435074, |
| "grad_norm": 0.3193733001824146, |
| "learning_rate": 8.529102413173605e-06, |
| "loss": 0.3019, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.9817045961624274, |
| "grad_norm": 0.28919148613320567, |
| "learning_rate": 8.510662559392113e-06, |
| "loss": 0.313, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.9861668897813476, |
| "grad_norm": 0.30310308101898775, |
| "learning_rate": 8.492128043298511e-06, |
| "loss": 0.3229, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.9906291834002677, |
| "grad_norm": 0.31282347332567895, |
| "learning_rate": 8.47349936466175e-06, |
| "loss": 0.3159, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.9950914770191879, |
| "grad_norm": 0.28804399192631563, |
| "learning_rate": 8.454777025789805e-06, |
| "loss": 0.313, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.999553770638108, |
| "grad_norm": 0.2868177500862966, |
| "learning_rate": 8.435961531516119e-06, |
| "loss": 0.2984, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.0040160642570282, |
| "grad_norm": 0.3322502574109939, |
| "learning_rate": 8.417053389186009e-06, |
| "loss": 0.2982, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.0084783578759482, |
| "grad_norm": 0.31601076209209755, |
| "learning_rate": 8.398053108642966e-06, |
| "loss": 0.2938, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.0129406514948684, |
| "grad_norm": 0.3085084676907704, |
| "learning_rate": 8.378961202214927e-06, |
| "loss": 0.3022, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.0174029451137885, |
| "grad_norm": 0.3030562804942466, |
| "learning_rate": 8.35977818470044e-06, |
| "loss": 0.2854, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.0218652387327085, |
| "grad_norm": 0.30673490305127743, |
| "learning_rate": 8.3405045733548e-06, |
| "loss": 0.2937, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.0263275323516288, |
| "grad_norm": 0.3079905769883883, |
| "learning_rate": 8.321140887876093e-06, |
| "loss": 0.2937, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.0307898259705488, |
| "grad_norm": 0.2833185510901884, |
| "learning_rate": 8.301687650391184e-06, |
| "loss": 0.2911, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.035252119589469, |
| "grad_norm": 0.2928224557074968, |
| "learning_rate": 8.28214538544164e-06, |
| "loss": 0.2925, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.039714413208389, |
| "grad_norm": 0.3119622799871382, |
| "learning_rate": 8.262514619969583e-06, |
| "loss": 0.2928, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.0441767068273093, |
| "grad_norm": 0.2909193051250449, |
| "learning_rate": 8.242795883303489e-06, |
| "loss": 0.2913, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.0486390004462294, |
| "grad_norm": 0.2878712035948744, |
| "learning_rate": 8.222989707143903e-06, |
| "loss": 0.2963, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.0531012940651494, |
| "grad_norm": 0.31820465000033393, |
| "learning_rate": 8.203096625549109e-06, |
| "loss": 0.293, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.0575635876840697, |
| "grad_norm": 0.3374605509762875, |
| "learning_rate": 8.183117174920733e-06, |
| "loss": 0.2946, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.0620258813029897, |
| "grad_norm": 0.3020068367212028, |
| "learning_rate": 8.163051893989273e-06, |
| "loss": 0.2925, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.06648817492191, |
| "grad_norm": 0.31068593851856324, |
| "learning_rate": 8.142901323799578e-06, |
| "loss": 0.2981, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.07095046854083, |
| "grad_norm": 0.3066541366441537, |
| "learning_rate": 8.122666007696251e-06, |
| "loss": 0.2916, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.07541276215975, |
| "grad_norm": 0.2898134349503606, |
| "learning_rate": 8.102346491309007e-06, |
| "loss": 0.2887, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.0798750557786703, |
| "grad_norm": 0.3017984948209697, |
| "learning_rate": 8.081943322537958e-06, |
| "loss": 0.2975, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.0843373493975903, |
| "grad_norm": 0.3106616772867366, |
| "learning_rate": 8.061457051538832e-06, |
| "loss": 0.2934, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.0887996430165106, |
| "grad_norm": 0.3117195701092512, |
| "learning_rate": 8.040888230708153e-06, |
| "loss": 0.2887, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.0932619366354306, |
| "grad_norm": 0.324577741353284, |
| "learning_rate": 8.02023741466833e-06, |
| "loss": 0.2886, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.0977242302543506, |
| "grad_norm": 0.31435192682062346, |
| "learning_rate": 7.999505160252712e-06, |
| "loss": 0.2913, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.1021865238732709, |
| "grad_norm": 0.30511195151083603, |
| "learning_rate": 7.978692026490576e-06, |
| "loss": 0.2963, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.106648817492191, |
| "grad_norm": 0.30143500498716036, |
| "learning_rate": 7.957798574592042e-06, |
| "loss": 0.2904, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.1111111111111112, |
| "grad_norm": 0.3029934237549778, |
| "learning_rate": 7.936825367932947e-06, |
| "loss": 0.2874, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.1155734047300312, |
| "grad_norm": 0.310379596751207, |
| "learning_rate": 7.91577297203966e-06, |
| "loss": 0.2864, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.1200356983489514, |
| "grad_norm": 0.2756683563500154, |
| "learning_rate": 7.89464195457382e-06, |
| "loss": 0.2838, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.1244979919678715, |
| "grad_norm": 0.2886788009666996, |
| "learning_rate": 7.873432885317036e-06, |
| "loss": 0.2892, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.1289602855867917, |
| "grad_norm": 0.6175230364141268, |
| "learning_rate": 7.852146336155531e-06, |
| "loss": 0.2961, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.1334225792057118, |
| "grad_norm": 0.33735487775658257, |
| "learning_rate": 7.830782881064707e-06, |
| "loss": 0.2989, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.1378848728246318, |
| "grad_norm": 0.2815200857691379, |
| "learning_rate": 7.809343096093676e-06, |
| "loss": 0.2918, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.142347166443552, |
| "grad_norm": 0.3190460322083763, |
| "learning_rate": 7.787827559349727e-06, |
| "loss": 0.2992, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.146809460062472, |
| "grad_norm": 0.316056300209666, |
| "learning_rate": 7.766236850982739e-06, |
| "loss": 0.2882, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.1512717536813923, |
| "grad_norm": 0.30178501428282056, |
| "learning_rate": 7.744571553169534e-06, |
| "loss": 0.2925, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.1557340473003124, |
| "grad_norm": 0.3001665670771815, |
| "learning_rate": 7.722832250098183e-06, |
| "loss": 0.2901, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.1601963409192324, |
| "grad_norm": 0.29583953397424706, |
| "learning_rate": 7.701019527952248e-06, |
| "loss": 0.2971, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.1646586345381527, |
| "grad_norm": 0.3241701984511909, |
| "learning_rate": 7.679133974894984e-06, |
| "loss": 0.2951, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.1691209281570727, |
| "grad_norm": 0.31677851739186413, |
| "learning_rate": 7.657176181053472e-06, |
| "loss": 0.3, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.173583221775993, |
| "grad_norm": 0.3740238542554803, |
| "learning_rate": 7.635146738502714e-06, |
| "loss": 0.2934, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.178045515394913, |
| "grad_norm": 0.31202486519983047, |
| "learning_rate": 7.6130462412496605e-06, |
| "loss": 0.2999, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.182507809013833, |
| "grad_norm": 0.3488189130672866, |
| "learning_rate": 7.590875285217201e-06, |
| "loss": 0.2913, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.1869701026327533, |
| "grad_norm": 0.31070266182233724, |
| "learning_rate": 7.568634468228085e-06, |
| "loss": 0.2847, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.1914323962516733, |
| "grad_norm": 0.29933114524992155, |
| "learning_rate": 7.546324389988817e-06, |
| "loss": 0.3017, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.1958946898705936, |
| "grad_norm": 0.29798742777776227, |
| "learning_rate": 7.5239456520734726e-06, |
| "loss": 0.2972, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.2003569834895136, |
| "grad_norm": 0.3062412855044856, |
| "learning_rate": 7.501498857907485e-06, |
| "loss": 0.2845, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.2048192771084336, |
| "grad_norm": 0.27470746072655944, |
| "learning_rate": 7.478984612751371e-06, |
| "loss": 0.2863, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.2092815707273539, |
| "grad_norm": 0.27866953124323884, |
| "learning_rate": 7.456403523684412e-06, |
| "loss": 0.2863, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.213743864346274, |
| "grad_norm": 0.2909116045188707, |
| "learning_rate": 7.433756199588282e-06, |
| "loss": 0.2926, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.2182061579651942, |
| "grad_norm": 0.29453973806838746, |
| "learning_rate": 7.411043251130634e-06, |
| "loss": 0.3008, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.2226684515841142, |
| "grad_norm": 0.31106123745886194, |
| "learning_rate": 7.388265290748631e-06, |
| "loss": 0.2966, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.2271307452030344, |
| "grad_norm": 0.3011866488239261, |
| "learning_rate": 7.36542293263243e-06, |
| "loss": 0.2937, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.2315930388219545, |
| "grad_norm": 0.2940575796812627, |
| "learning_rate": 7.342516792708627e-06, |
| "loss": 0.2852, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.2360553324408747, |
| "grad_norm": 0.26936074456201886, |
| "learning_rate": 7.319547488623642e-06, |
| "loss": 0.2921, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.2405176260597948, |
| "grad_norm": 0.290558981897068, |
| "learning_rate": 7.296515639727071e-06, |
| "loss": 0.2951, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.2449799196787148, |
| "grad_norm": 0.2852457393680859, |
| "learning_rate": 7.273421867054979e-06, |
| "loss": 0.2883, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.249442213297635, |
| "grad_norm": 0.3014603513538629, |
| "learning_rate": 7.250266793313161e-06, |
| "loss": 0.2865, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.253904506916555, |
| "grad_norm": 0.31555167954226493, |
| "learning_rate": 7.2270510428603465e-06, |
| "loss": 0.2872, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.2583668005354753, |
| "grad_norm": 0.30225060098084444, |
| "learning_rate": 7.2037752416913684e-06, |
| "loss": 0.2917, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.2628290941543954, |
| "grad_norm": 0.3044780265386901, |
| "learning_rate": 7.180440017420277e-06, |
| "loss": 0.2928, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.2672913877733154, |
| "grad_norm": 0.28549429454817354, |
| "learning_rate": 7.157045999263423e-06, |
| "loss": 0.2966, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.2717536813922357, |
| "grad_norm": 0.3366282530568157, |
| "learning_rate": 7.13359381802249e-06, |
| "loss": 0.2934, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.2762159750111557, |
| "grad_norm": 0.37361922324450697, |
| "learning_rate": 7.110084106067483e-06, |
| "loss": 0.2838, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.280678268630076, |
| "grad_norm": 0.3167267835607769, |
| "learning_rate": 7.0865174973196746e-06, |
| "loss": 0.303, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.285140562248996, |
| "grad_norm": 0.3151077338868855, |
| "learning_rate": 7.062894627234525e-06, |
| "loss": 0.286, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.289602855867916, |
| "grad_norm": 0.2948165918607916, |
| "learning_rate": 7.039216132784528e-06, |
| "loss": 0.2823, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.2940651494868363, |
| "grad_norm": 0.34833417956591767, |
| "learning_rate": 7.0154826524420506e-06, |
| "loss": 0.282, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.2985274431057563, |
| "grad_norm": 0.29607288645591945, |
| "learning_rate": 6.9916948261621145e-06, |
| "loss": 0.2903, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.3029897367246766, |
| "grad_norm": 0.3141375337783596, |
| "learning_rate": 6.96785329536513e-06, |
| "loss": 0.3022, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.3074520303435966, |
| "grad_norm": 0.3094386182548052, |
| "learning_rate": 6.943958702919618e-06, |
| "loss": 0.2963, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.3119143239625166, |
| "grad_norm": 0.29702645469864675, |
| "learning_rate": 6.9200116931248575e-06, |
| "loss": 0.2925, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.3163766175814369, |
| "grad_norm": 0.3131467834081039, |
| "learning_rate": 6.896012911693527e-06, |
| "loss": 0.2926, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.320838911200357, |
| "grad_norm": 0.30420307046401684, |
| "learning_rate": 6.871963005734283e-06, |
| "loss": 0.2914, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.3253012048192772, |
| "grad_norm": 0.2874880602952481, |
| "learning_rate": 6.847862623734316e-06, |
| "loss": 0.2907, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.3297634984381972, |
| "grad_norm": 0.31117439728389396, |
| "learning_rate": 6.823712415541867e-06, |
| "loss": 0.2906, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.3342257920571172, |
| "grad_norm": 0.3154785866126041, |
| "learning_rate": 6.7995130323486995e-06, |
| "loss": 0.2905, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.3386880856760375, |
| "grad_norm": 0.3162007988121743, |
| "learning_rate": 6.775265126672544e-06, |
| "loss": 0.2944, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.3431503792949577, |
| "grad_norm": 0.2996244368117481, |
| "learning_rate": 6.750969352339503e-06, |
| "loss": 0.2791, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.3476126729138778, |
| "grad_norm": 0.31285203943822604, |
| "learning_rate": 6.726626364466418e-06, |
| "loss": 0.2978, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.3520749665327978, |
| "grad_norm": 0.3069565443579846, |
| "learning_rate": 6.70223681944321e-06, |
| "loss": 0.2887, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.356537260151718, |
| "grad_norm": 0.3555134690217621, |
| "learning_rate": 6.677801374915175e-06, |
| "loss": 0.2938, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.360999553770638, |
| "grad_norm": 0.32323874683363596, |
| "learning_rate": 6.653320689765257e-06, |
| "loss": 0.2878, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.3654618473895583, |
| "grad_norm": 0.2958320258483459, |
| "learning_rate": 6.628795424096276e-06, |
| "loss": 0.2846, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.3699241410084784, |
| "grad_norm": 0.29591479942269067, |
| "learning_rate": 6.604226239213131e-06, |
| "loss": 0.2912, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.3743864346273984, |
| "grad_norm": 0.275955956029984, |
| "learning_rate": 6.579613797604971e-06, |
| "loss": 0.2841, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.3788487282463187, |
| "grad_norm": 0.3169860752144058, |
| "learning_rate": 6.554958762927328e-06, |
| "loss": 0.2911, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.3833110218652387, |
| "grad_norm": 0.2965804636519538, |
| "learning_rate": 6.530261799984225e-06, |
| "loss": 0.289, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.387773315484159, |
| "grad_norm": 0.3240128684238396, |
| "learning_rate": 6.5055235747102456e-06, |
| "loss": 0.2914, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.392235609103079, |
| "grad_norm": 0.27893092037168543, |
| "learning_rate": 6.480744754152581e-06, |
| "loss": 0.2904, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.396697902721999, |
| "grad_norm": 0.30817444273855016, |
| "learning_rate": 6.455926006453045e-06, |
| "loss": 0.2888, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.4011601963409193, |
| "grad_norm": 0.29700021328473286, |
| "learning_rate": 6.431068000830054e-06, |
| "loss": 0.2892, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.4056224899598393, |
| "grad_norm": 0.31253083958098826, |
| "learning_rate": 6.406171407560587e-06, |
| "loss": 0.2939, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.4100847835787595, |
| "grad_norm": 0.2949725175395465, |
| "learning_rate": 6.381236897962102e-06, |
| "loss": 0.2918, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.4145470771976796, |
| "grad_norm": 0.2835765420987079, |
| "learning_rate": 6.356265144374451e-06, |
| "loss": 0.2912, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.4190093708165996, |
| "grad_norm": 0.2911266952336509, |
| "learning_rate": 6.3312568201417335e-06, |
| "loss": 0.2863, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.4234716644355199, |
| "grad_norm": 0.2975023838090645, |
| "learning_rate": 6.306212599594155e-06, |
| "loss": 0.287, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.42793395805444, |
| "grad_norm": 0.33944728741572633, |
| "learning_rate": 6.281133158029833e-06, |
| "loss": 0.28, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.4323962516733602, |
| "grad_norm": 0.29846101119632584, |
| "learning_rate": 6.256019171696595e-06, |
| "loss": 0.2804, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.4368585452922802, |
| "grad_norm": 0.28248180975947623, |
| "learning_rate": 6.230871317773737e-06, |
| "loss": 0.2926, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.4413208389112002, |
| "grad_norm": 0.2851276821074005, |
| "learning_rate": 6.205690274353775e-06, |
| "loss": 0.2922, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.4457831325301205, |
| "grad_norm": 0.39429209152368455, |
| "learning_rate": 6.1804767204241515e-06, |
| "loss": 0.2836, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.4502454261490407, |
| "grad_norm": 0.28036841762852593, |
| "learning_rate": 6.155231335848927e-06, |
| "loss": 0.297, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.4547077197679608, |
| "grad_norm": 0.31725786994860594, |
| "learning_rate": 6.129954801350455e-06, |
| "loss": 0.2912, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.4591700133868808, |
| "grad_norm": 0.3367577417882933, |
| "learning_rate": 6.1046477984910215e-06, |
| "loss": 0.2941, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.463632307005801, |
| "grad_norm": 0.3029711035622468, |
| "learning_rate": 6.079311009654466e-06, |
| "loss": 0.2714, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.468094600624721, |
| "grad_norm": 0.36320361509650795, |
| "learning_rate": 6.053945118027789e-06, |
| "loss": 0.2998, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.4725568942436413, |
| "grad_norm": 0.3004963431679098, |
| "learning_rate": 6.028550807582718e-06, |
| "loss": 0.2841, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.4770191878625614, |
| "grad_norm": 0.28832303322372754, |
| "learning_rate": 6.00312876305728e-06, |
| "loss": 0.2907, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.4814814814814814, |
| "grad_norm": 0.2738988460065302, |
| "learning_rate": 5.977679669937325e-06, |
| "loss": 0.2865, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.4859437751004017, |
| "grad_norm": 0.3055752634708775, |
| "learning_rate": 5.952204214438049e-06, |
| "loss": 0.2886, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.4904060687193217, |
| "grad_norm": 0.3716780982022642, |
| "learning_rate": 5.926703083485488e-06, |
| "loss": 0.2906, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.494868362338242, |
| "grad_norm": 0.324100431102628, |
| "learning_rate": 5.901176964698e-06, |
| "loss": 0.2857, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.499330655957162, |
| "grad_norm": 0.27889777787988906, |
| "learning_rate": 5.875626546367716e-06, |
| "loss": 0.292, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.503792949576082, |
| "grad_norm": 0.30807078683693484, |
| "learning_rate": 5.850052517441991e-06, |
| "loss": 0.2963, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.5082552431950023, |
| "grad_norm": 0.2762209648675528, |
| "learning_rate": 5.824455567504817e-06, |
| "loss": 0.286, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.5127175368139225, |
| "grad_norm": 0.33369974271939384, |
| "learning_rate": 5.798836386758235e-06, |
| "loss": 0.2954, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.5171798304328425, |
| "grad_norm": 0.2860459702457261, |
| "learning_rate": 5.773195666003724e-06, |
| "loss": 0.2938, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.5216421240517626, |
| "grad_norm": 0.2779676376622302, |
| "learning_rate": 5.747534096623569e-06, |
| "loss": 0.2876, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.5261044176706826, |
| "grad_norm": 0.2955504033746034, |
| "learning_rate": 5.7218523705622275e-06, |
| "loss": 0.2929, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.5305667112896029, |
| "grad_norm": 0.2980806596476541, |
| "learning_rate": 5.696151180307661e-06, |
| "loss": 0.2925, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.5350290049085231, |
| "grad_norm": 0.2794624988961113, |
| "learning_rate": 5.670431218872672e-06, |
| "loss": 0.2931, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.5394912985274432, |
| "grad_norm": 0.2786208372363059, |
| "learning_rate": 5.644693179776213e-06, |
| "loss": 0.2974, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.5439535921463632, |
| "grad_norm": 0.27275146705308617, |
| "learning_rate": 5.618937757024683e-06, |
| "loss": 0.2912, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.5484158857652832, |
| "grad_norm": 0.27976748800298595, |
| "learning_rate": 5.593165645093222e-06, |
| "loss": 0.2951, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.5528781793842035, |
| "grad_norm": 0.2818438825086793, |
| "learning_rate": 5.567377538906977e-06, |
| "loss": 0.2923, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.5573404730031237, |
| "grad_norm": 0.27434503383015063, |
| "learning_rate": 5.541574133822374e-06, |
| "loss": 0.2903, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.5618027666220438, |
| "grad_norm": 0.28657591757615636, |
| "learning_rate": 5.515756125608355e-06, |
| "loss": 0.288, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.5662650602409638, |
| "grad_norm": 0.27282164230782224, |
| "learning_rate": 5.489924210427628e-06, |
| "loss": 0.2896, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.5707273538598838, |
| "grad_norm": 0.417801968862151, |
| "learning_rate": 5.464079084817892e-06, |
| "loss": 0.284, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.575189647478804, |
| "grad_norm": 0.29465703843658075, |
| "learning_rate": 5.4382214456730546e-06, |
| "loss": 0.2918, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.5796519410977243, |
| "grad_norm": 0.28542382527132004, |
| "learning_rate": 5.412351990224438e-06, |
| "loss": 0.2857, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.5841142347166444, |
| "grad_norm": 0.2693415075013077, |
| "learning_rate": 5.386471416021987e-06, |
| "loss": 0.2833, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.5885765283355644, |
| "grad_norm": 0.28674507691911566, |
| "learning_rate": 5.36058042091545e-06, |
| "loss": 0.2788, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.5930388219544847, |
| "grad_norm": 0.28366032186224716, |
| "learning_rate": 5.33467970303557e-06, |
| "loss": 0.2867, |
| "step": 3570 |
| }, |
| { |
| "epoch": 1.5975011155734047, |
| "grad_norm": 0.2953521127402904, |
| "learning_rate": 5.308769960775257e-06, |
| "loss": 0.2939, |
| "step": 3580 |
| }, |
| { |
| "epoch": 1.601963409192325, |
| "grad_norm": 0.2888287000335896, |
| "learning_rate": 5.28285189277076e-06, |
| "loss": 0.2905, |
| "step": 3590 |
| }, |
| { |
| "epoch": 1.606425702811245, |
| "grad_norm": 0.27937721122117365, |
| "learning_rate": 5.2569261978828155e-06, |
| "loss": 0.2982, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.610887996430165, |
| "grad_norm": 0.28062178431057444, |
| "learning_rate": 5.230993575177823e-06, |
| "loss": 0.2925, |
| "step": 3610 |
| }, |
| { |
| "epoch": 1.6153502900490853, |
| "grad_norm": 0.2959051465736045, |
| "learning_rate": 5.2050547239089796e-06, |
| "loss": 0.2766, |
| "step": 3620 |
| }, |
| { |
| "epoch": 1.6198125836680055, |
| "grad_norm": 0.3167636433656721, |
| "learning_rate": 5.179110343497432e-06, |
| "loss": 0.2921, |
| "step": 3630 |
| }, |
| { |
| "epoch": 1.6242748772869255, |
| "grad_norm": 0.27553361706720186, |
| "learning_rate": 5.15316113351342e-06, |
| "loss": 0.2866, |
| "step": 3640 |
| }, |
| { |
| "epoch": 1.6287371709058456, |
| "grad_norm": 0.281198216094774, |
| "learning_rate": 5.1272077936574005e-06, |
| "loss": 0.2869, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.6331994645247656, |
| "grad_norm": 0.28566411066751285, |
| "learning_rate": 5.1012510237411975e-06, |
| "loss": 0.2874, |
| "step": 3660 |
| }, |
| { |
| "epoch": 1.6376617581436859, |
| "grad_norm": 0.27128449582175607, |
| "learning_rate": 5.075291523669118e-06, |
| "loss": 0.2771, |
| "step": 3670 |
| }, |
| { |
| "epoch": 1.6421240517626061, |
| "grad_norm": 0.25597160345393283, |
| "learning_rate": 5.049329993419092e-06, |
| "loss": 0.2882, |
| "step": 3680 |
| }, |
| { |
| "epoch": 1.6465863453815262, |
| "grad_norm": 0.27857161981462486, |
| "learning_rate": 5.023367133023784e-06, |
| "loss": 0.2942, |
| "step": 3690 |
| }, |
| { |
| "epoch": 1.6510486390004462, |
| "grad_norm": 0.2862312874917413, |
| "learning_rate": 4.997403642551733e-06, |
| "loss": 0.2949, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.6555109326193662, |
| "grad_norm": 0.2553590681517932, |
| "learning_rate": 4.971440222088459e-06, |
| "loss": 0.2823, |
| "step": 3710 |
| }, |
| { |
| "epoch": 1.6599732262382865, |
| "grad_norm": 0.2806614787948605, |
| "learning_rate": 4.945477571717602e-06, |
| "loss": 0.2946, |
| "step": 3720 |
| }, |
| { |
| "epoch": 1.6644355198572067, |
| "grad_norm": 0.3072907017218825, |
| "learning_rate": 4.91951639150203e-06, |
| "loss": 0.2814, |
| "step": 3730 |
| }, |
| { |
| "epoch": 1.6688978134761268, |
| "grad_norm": 0.2874398898980506, |
| "learning_rate": 4.8935573814649765e-06, |
| "loss": 0.2915, |
| "step": 3740 |
| }, |
| { |
| "epoch": 1.6733601070950468, |
| "grad_norm": 0.28754909498663717, |
| "learning_rate": 4.867601241571153e-06, |
| "loss": 0.2883, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.6778224007139668, |
| "grad_norm": 0.273658543396969, |
| "learning_rate": 4.841648671707881e-06, |
| "loss": 0.2829, |
| "step": 3760 |
| }, |
| { |
| "epoch": 1.682284694332887, |
| "grad_norm": 0.288492021314724, |
| "learning_rate": 4.815700371666219e-06, |
| "loss": 0.2897, |
| "step": 3770 |
| }, |
| { |
| "epoch": 1.6867469879518073, |
| "grad_norm": 0.302492958266678, |
| "learning_rate": 4.789757041122093e-06, |
| "loss": 0.2806, |
| "step": 3780 |
| }, |
| { |
| "epoch": 1.6912092815707274, |
| "grad_norm": 0.2995952463789012, |
| "learning_rate": 4.763819379617432e-06, |
| "loss": 0.2896, |
| "step": 3790 |
| }, |
| { |
| "epoch": 1.6956715751896474, |
| "grad_norm": 0.30875546003288895, |
| "learning_rate": 4.737888086541298e-06, |
| "loss": 0.2859, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.7001338688085676, |
| "grad_norm": 0.2944845035908801, |
| "learning_rate": 4.711963861111043e-06, |
| "loss": 0.3009, |
| "step": 3810 |
| }, |
| { |
| "epoch": 1.7045961624274877, |
| "grad_norm": 0.3619439737256338, |
| "learning_rate": 4.686047402353433e-06, |
| "loss": 0.2841, |
| "step": 3820 |
| }, |
| { |
| "epoch": 1.709058456046408, |
| "grad_norm": 0.29449858921532607, |
| "learning_rate": 4.660139409085825e-06, |
| "loss": 0.2935, |
| "step": 3830 |
| }, |
| { |
| "epoch": 1.713520749665328, |
| "grad_norm": 0.2829699337998857, |
| "learning_rate": 4.634240579897299e-06, |
| "loss": 0.2921, |
| "step": 3840 |
| }, |
| { |
| "epoch": 1.717983043284248, |
| "grad_norm": 0.2600468326834046, |
| "learning_rate": 4.608351613129841e-06, |
| "loss": 0.2835, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.7224453369031683, |
| "grad_norm": 0.2793462803832517, |
| "learning_rate": 4.582473206859498e-06, |
| "loss": 0.2882, |
| "step": 3860 |
| }, |
| { |
| "epoch": 1.7269076305220885, |
| "grad_norm": 0.2868869922030311, |
| "learning_rate": 4.556606058877567e-06, |
| "loss": 0.2883, |
| "step": 3870 |
| }, |
| { |
| "epoch": 1.7313699241410085, |
| "grad_norm": 0.3862045749220281, |
| "learning_rate": 4.530750866671769e-06, |
| "loss": 0.2924, |
| "step": 3880 |
| }, |
| { |
| "epoch": 1.7358322177599286, |
| "grad_norm": 0.2561819626783043, |
| "learning_rate": 4.504908327407452e-06, |
| "loss": 0.2902, |
| "step": 3890 |
| }, |
| { |
| "epoch": 1.7402945113788486, |
| "grad_norm": 0.26603791907774466, |
| "learning_rate": 4.479079137908781e-06, |
| "loss": 0.2753, |
| "step": 3900 |
| }, |
| { |
| "epoch": 1.7447568049977689, |
| "grad_norm": 0.30516815757249277, |
| "learning_rate": 4.453263994639959e-06, |
| "loss": 0.293, |
| "step": 3910 |
| }, |
| { |
| "epoch": 1.7492190986166891, |
| "grad_norm": 0.2689881363760943, |
| "learning_rate": 4.427463593686442e-06, |
| "loss": 0.2947, |
| "step": 3920 |
| }, |
| { |
| "epoch": 1.7536813922356091, |
| "grad_norm": 0.26851951669717095, |
| "learning_rate": 4.401678630736172e-06, |
| "loss": 0.2835, |
| "step": 3930 |
| }, |
| { |
| "epoch": 1.7581436858545292, |
| "grad_norm": 0.26883125019727905, |
| "learning_rate": 4.3759098010608155e-06, |
| "loss": 0.2782, |
| "step": 3940 |
| }, |
| { |
| "epoch": 1.7626059794734492, |
| "grad_norm": 0.25218594985078024, |
| "learning_rate": 4.350157799497017e-06, |
| "loss": 0.2893, |
| "step": 3950 |
| }, |
| { |
| "epoch": 1.7670682730923695, |
| "grad_norm": 0.26946851688893303, |
| "learning_rate": 4.324423320427669e-06, |
| "loss": 0.279, |
| "step": 3960 |
| }, |
| { |
| "epoch": 1.7715305667112897, |
| "grad_norm": 0.2974783320098152, |
| "learning_rate": 4.298707057763175e-06, |
| "loss": 0.2855, |
| "step": 3970 |
| }, |
| { |
| "epoch": 1.7759928603302098, |
| "grad_norm": 0.2966897122885392, |
| "learning_rate": 4.273009704922757e-06, |
| "loss": 0.2884, |
| "step": 3980 |
| }, |
| { |
| "epoch": 1.7804551539491298, |
| "grad_norm": 0.2902971297459419, |
| "learning_rate": 4.24733195481574e-06, |
| "loss": 0.2796, |
| "step": 3990 |
| }, |
| { |
| "epoch": 1.7849174475680498, |
| "grad_norm": 0.296879555221403, |
| "learning_rate": 4.221674499822878e-06, |
| "loss": 0.2941, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.78937974118697, |
| "grad_norm": 0.2772609554006714, |
| "learning_rate": 4.196038031777688e-06, |
| "loss": 0.2784, |
| "step": 4010 |
| }, |
| { |
| "epoch": 1.7938420348058903, |
| "grad_norm": 0.3728763097589947, |
| "learning_rate": 4.170423241947782e-06, |
| "loss": 0.2825, |
| "step": 4020 |
| }, |
| { |
| "epoch": 1.7983043284248104, |
| "grad_norm": 0.2690864141407203, |
| "learning_rate": 4.144830821016245e-06, |
| "loss": 0.2848, |
| "step": 4030 |
| }, |
| { |
| "epoch": 1.8027666220437304, |
| "grad_norm": 0.2945504927368404, |
| "learning_rate": 4.119261459062992e-06, |
| "loss": 0.2886, |
| "step": 4040 |
| }, |
| { |
| "epoch": 1.8072289156626506, |
| "grad_norm": 0.2817141780614377, |
| "learning_rate": 4.0937158455461805e-06, |
| "loss": 0.2861, |
| "step": 4050 |
| }, |
| { |
| "epoch": 1.8116912092815707, |
| "grad_norm": 0.2908042660570454, |
| "learning_rate": 4.068194669283599e-06, |
| "loss": 0.2855, |
| "step": 4060 |
| }, |
| { |
| "epoch": 1.816153502900491, |
| "grad_norm": 0.26095488586703197, |
| "learning_rate": 4.042698618434115e-06, |
| "loss": 0.2775, |
| "step": 4070 |
| }, |
| { |
| "epoch": 1.820615796519411, |
| "grad_norm": 0.26782163829467626, |
| "learning_rate": 4.017228380479099e-06, |
| "loss": 0.2902, |
| "step": 4080 |
| }, |
| { |
| "epoch": 1.825078090138331, |
| "grad_norm": 0.2893359193551187, |
| "learning_rate": 3.991784642203904e-06, |
| "loss": 0.286, |
| "step": 4090 |
| }, |
| { |
| "epoch": 1.8295403837572513, |
| "grad_norm": 0.28187305106972493, |
| "learning_rate": 3.966368089679337e-06, |
| "loss": 0.2951, |
| "step": 4100 |
| }, |
| { |
| "epoch": 1.8340026773761715, |
| "grad_norm": 0.7149499820103131, |
| "learning_rate": 3.9409794082431585e-06, |
| "loss": 0.278, |
| "step": 4110 |
| }, |
| { |
| "epoch": 1.8384649709950915, |
| "grad_norm": 0.2781228028139923, |
| "learning_rate": 3.915619282481613e-06, |
| "loss": 0.2804, |
| "step": 4120 |
| }, |
| { |
| "epoch": 1.8429272646140116, |
| "grad_norm": 0.27787506538029344, |
| "learning_rate": 3.890288396210958e-06, |
| "loss": 0.2773, |
| "step": 4130 |
| }, |
| { |
| "epoch": 1.8473895582329316, |
| "grad_norm": 0.2775837112621051, |
| "learning_rate": 3.8649874324590355e-06, |
| "loss": 0.2891, |
| "step": 4140 |
| }, |
| { |
| "epoch": 1.8518518518518519, |
| "grad_norm": 0.27038074332915685, |
| "learning_rate": 3.839717073446842e-06, |
| "loss": 0.2819, |
| "step": 4150 |
| }, |
| { |
| "epoch": 1.8563141454707721, |
| "grad_norm": 0.27153282107037896, |
| "learning_rate": 3.8144780005701526e-06, |
| "loss": 0.2808, |
| "step": 4160 |
| }, |
| { |
| "epoch": 1.8607764390896921, |
| "grad_norm": 0.27119597948809526, |
| "learning_rate": 3.7892708943811224e-06, |
| "loss": 0.2828, |
| "step": 4170 |
| }, |
| { |
| "epoch": 1.8652387327086122, |
| "grad_norm": 0.26751701443601517, |
| "learning_rate": 3.7640964345699613e-06, |
| "loss": 0.2872, |
| "step": 4180 |
| }, |
| { |
| "epoch": 1.8697010263275322, |
| "grad_norm": 0.2668805357678453, |
| "learning_rate": 3.738955299946588e-06, |
| "loss": 0.2808, |
| "step": 4190 |
| }, |
| { |
| "epoch": 1.8741633199464525, |
| "grad_norm": 0.26385620413020505, |
| "learning_rate": 3.7138481684223316e-06, |
| "loss": 0.284, |
| "step": 4200 |
| }, |
| { |
| "epoch": 1.8786256135653727, |
| "grad_norm": 0.27804749998996886, |
| "learning_rate": 3.688775716991661e-06, |
| "loss": 0.2901, |
| "step": 4210 |
| }, |
| { |
| "epoch": 1.8830879071842928, |
| "grad_norm": 0.26632920843385016, |
| "learning_rate": 3.6637386217139158e-06, |
| "loss": 0.2817, |
| "step": 4220 |
| }, |
| { |
| "epoch": 1.8875502008032128, |
| "grad_norm": 0.28107011263915094, |
| "learning_rate": 3.6387375576950902e-06, |
| "loss": 0.2875, |
| "step": 4230 |
| }, |
| { |
| "epoch": 1.8920124944221328, |
| "grad_norm": 0.27319254801532444, |
| "learning_rate": 3.613773199069618e-06, |
| "loss": 0.2893, |
| "step": 4240 |
| }, |
| { |
| "epoch": 1.896474788041053, |
| "grad_norm": 0.2895639115697261, |
| "learning_rate": 3.588846218982204e-06, |
| "loss": 0.2869, |
| "step": 4250 |
| }, |
| { |
| "epoch": 1.9009370816599733, |
| "grad_norm": 0.27271032068079937, |
| "learning_rate": 3.563957289569669e-06, |
| "loss": 0.2869, |
| "step": 4260 |
| }, |
| { |
| "epoch": 1.9053993752788934, |
| "grad_norm": 0.26855796153698186, |
| "learning_rate": 3.5391070819428246e-06, |
| "loss": 0.2857, |
| "step": 4270 |
| }, |
| { |
| "epoch": 1.9098616688978134, |
| "grad_norm": 0.28223224193709756, |
| "learning_rate": 3.514296266168381e-06, |
| "loss": 0.2915, |
| "step": 4280 |
| }, |
| { |
| "epoch": 1.9143239625167336, |
| "grad_norm": 0.2641850196296661, |
| "learning_rate": 3.4895255112508773e-06, |
| "loss": 0.2762, |
| "step": 4290 |
| }, |
| { |
| "epoch": 1.9187862561356537, |
| "grad_norm": 0.3420858629489308, |
| "learning_rate": 3.4647954851146437e-06, |
| "loss": 0.2925, |
| "step": 4300 |
| }, |
| { |
| "epoch": 1.923248549754574, |
| "grad_norm": 0.2604588316374171, |
| "learning_rate": 3.4401068545857843e-06, |
| "loss": 0.2822, |
| "step": 4310 |
| }, |
| { |
| "epoch": 1.927710843373494, |
| "grad_norm": 0.2715732112484208, |
| "learning_rate": 3.4154602853742115e-06, |
| "loss": 0.2764, |
| "step": 4320 |
| }, |
| { |
| "epoch": 1.932173136992414, |
| "grad_norm": 0.29731471914640445, |
| "learning_rate": 3.3908564420556778e-06, |
| "loss": 0.284, |
| "step": 4330 |
| }, |
| { |
| "epoch": 1.9366354306113343, |
| "grad_norm": 0.299436802757538, |
| "learning_rate": 3.3662959880538744e-06, |
| "loss": 0.2785, |
| "step": 4340 |
| }, |
| { |
| "epoch": 1.9410977242302545, |
| "grad_norm": 0.25848952586710894, |
| "learning_rate": 3.341779585622522e-06, |
| "loss": 0.2782, |
| "step": 4350 |
| }, |
| { |
| "epoch": 1.9455600178491745, |
| "grad_norm": 0.26318821809460174, |
| "learning_rate": 3.3173078958275355e-06, |
| "loss": 0.2772, |
| "step": 4360 |
| }, |
| { |
| "epoch": 1.9500223114680946, |
| "grad_norm": 0.2849012354934153, |
| "learning_rate": 3.292881578529179e-06, |
| "loss": 0.2878, |
| "step": 4370 |
| }, |
| { |
| "epoch": 1.9544846050870146, |
| "grad_norm": 0.28212180104540435, |
| "learning_rate": 3.268501292364289e-06, |
| "loss": 0.2765, |
| "step": 4380 |
| }, |
| { |
| "epoch": 1.9589468987059349, |
| "grad_norm": 0.28217071136679106, |
| "learning_rate": 3.2441676947285035e-06, |
| "loss": 0.2841, |
| "step": 4390 |
| }, |
| { |
| "epoch": 1.9634091923248551, |
| "grad_norm": 0.4014867685340475, |
| "learning_rate": 3.219881441758541e-06, |
| "loss": 0.2842, |
| "step": 4400 |
| }, |
| { |
| "epoch": 1.9678714859437751, |
| "grad_norm": 0.26510923757329297, |
| "learning_rate": 3.19564318831451e-06, |
| "loss": 0.282, |
| "step": 4410 |
| }, |
| { |
| "epoch": 1.9723337795626952, |
| "grad_norm": 0.26724901583056526, |
| "learning_rate": 3.171453587962246e-06, |
| "loss": 0.2829, |
| "step": 4420 |
| }, |
| { |
| "epoch": 1.9767960731816152, |
| "grad_norm": 0.25923632674916225, |
| "learning_rate": 3.1473132929556927e-06, |
| "loss": 0.2879, |
| "step": 4430 |
| }, |
| { |
| "epoch": 1.9812583668005355, |
| "grad_norm": 0.2819718374318917, |
| "learning_rate": 3.1232229542193126e-06, |
| "loss": 0.2887, |
| "step": 4440 |
| }, |
| { |
| "epoch": 1.9857206604194557, |
| "grad_norm": 0.27554261636665484, |
| "learning_rate": 3.0991832213305367e-06, |
| "loss": 0.2868, |
| "step": 4450 |
| }, |
| { |
| "epoch": 1.9901829540383758, |
| "grad_norm": 0.3200504664553153, |
| "learning_rate": 3.0751947425022465e-06, |
| "loss": 0.2796, |
| "step": 4460 |
| }, |
| { |
| "epoch": 1.9946452476572958, |
| "grad_norm": 0.2610653468140633, |
| "learning_rate": 3.0512581645653007e-06, |
| "loss": 0.2911, |
| "step": 4470 |
| }, |
| { |
| "epoch": 1.9991075412762158, |
| "grad_norm": 0.3126154839381511, |
| "learning_rate": 3.0273741329510852e-06, |
| "loss": 0.287, |
| "step": 4480 |
| }, |
| { |
| "epoch": 2.0035698348951363, |
| "grad_norm": 0.29397339441332226, |
| "learning_rate": 3.0035432916741215e-06, |
| "loss": 0.2665, |
| "step": 4490 |
| }, |
| { |
| "epoch": 2.0080321285140563, |
| "grad_norm": 0.275630796936842, |
| "learning_rate": 2.979766283314688e-06, |
| "loss": 0.2643, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.0124944221329764, |
| "grad_norm": 0.26340335226827566, |
| "learning_rate": 2.9560437490015013e-06, |
| "loss": 0.2727, |
| "step": 4510 |
| }, |
| { |
| "epoch": 2.0169567157518964, |
| "grad_norm": 0.30161717213091244, |
| "learning_rate": 2.9323763283944338e-06, |
| "loss": 0.2625, |
| "step": 4520 |
| }, |
| { |
| "epoch": 2.0214190093708164, |
| "grad_norm": 0.3171900997207839, |
| "learning_rate": 2.9087646596672487e-06, |
| "loss": 0.2598, |
| "step": 4530 |
| }, |
| { |
| "epoch": 2.025881302989737, |
| "grad_norm": 0.3222582665257153, |
| "learning_rate": 2.8852093794904136e-06, |
| "loss": 0.2652, |
| "step": 4540 |
| }, |
| { |
| "epoch": 2.030343596608657, |
| "grad_norm": 0.28771230139561266, |
| "learning_rate": 2.861711123013911e-06, |
| "loss": 0.2708, |
| "step": 4550 |
| }, |
| { |
| "epoch": 2.034805890227577, |
| "grad_norm": 0.26951340411915475, |
| "learning_rate": 2.838270523850135e-06, |
| "loss": 0.271, |
| "step": 4560 |
| }, |
| { |
| "epoch": 2.039268183846497, |
| "grad_norm": 0.26621140063523485, |
| "learning_rate": 2.8148882140567844e-06, |
| "loss": 0.2675, |
| "step": 4570 |
| }, |
| { |
| "epoch": 2.043730477465417, |
| "grad_norm": 0.30555915513232423, |
| "learning_rate": 2.7915648241198386e-06, |
| "loss": 0.263, |
| "step": 4580 |
| }, |
| { |
| "epoch": 2.0481927710843375, |
| "grad_norm": 0.2975295129021857, |
| "learning_rate": 2.7683009829365417e-06, |
| "loss": 0.2598, |
| "step": 4590 |
| }, |
| { |
| "epoch": 2.0526550647032575, |
| "grad_norm": 0.2734727711740158, |
| "learning_rate": 2.745097317798452e-06, |
| "loss": 0.268, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.0571173583221776, |
| "grad_norm": 0.29391413176160824, |
| "learning_rate": 2.7219544543745335e-06, |
| "loss": 0.2661, |
| "step": 4610 |
| }, |
| { |
| "epoch": 2.0615796519410976, |
| "grad_norm": 0.2840209475626469, |
| "learning_rate": 2.698873016694271e-06, |
| "loss": 0.2655, |
| "step": 4620 |
| }, |
| { |
| "epoch": 2.0660419455600176, |
| "grad_norm": 0.2600456780446525, |
| "learning_rate": 2.6758536271308582e-06, |
| "loss": 0.2588, |
| "step": 4630 |
| }, |
| { |
| "epoch": 2.070504239178938, |
| "grad_norm": 0.28816278556575226, |
| "learning_rate": 2.6528969063844022e-06, |
| "loss": 0.2708, |
| "step": 4640 |
| }, |
| { |
| "epoch": 2.074966532797858, |
| "grad_norm": 0.27537400614220536, |
| "learning_rate": 2.630003473465202e-06, |
| "loss": 0.2603, |
| "step": 4650 |
| }, |
| { |
| "epoch": 2.079428826416778, |
| "grad_norm": 0.2799988616872821, |
| "learning_rate": 2.6071739456770394e-06, |
| "loss": 0.265, |
| "step": 4660 |
| }, |
| { |
| "epoch": 2.083891120035698, |
| "grad_norm": 0.27519216667774027, |
| "learning_rate": 2.5844089386005512e-06, |
| "loss": 0.2615, |
| "step": 4670 |
| }, |
| { |
| "epoch": 2.0883534136546187, |
| "grad_norm": 0.2817941036734103, |
| "learning_rate": 2.5617090660766218e-06, |
| "loss": 0.2747, |
| "step": 4680 |
| }, |
| { |
| "epoch": 2.0928157072735387, |
| "grad_norm": 0.2538075181305876, |
| "learning_rate": 2.5390749401898274e-06, |
| "loss": 0.2705, |
| "step": 4690 |
| }, |
| { |
| "epoch": 2.0972780008924587, |
| "grad_norm": 0.30549102286988794, |
| "learning_rate": 2.5165071712519447e-06, |
| "loss": 0.2751, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.101740294511379, |
| "grad_norm": 0.2794818884211519, |
| "learning_rate": 2.4940063677854775e-06, |
| "loss": 0.2668, |
| "step": 4710 |
| }, |
| { |
| "epoch": 2.106202588130299, |
| "grad_norm": 0.27303724559190734, |
| "learning_rate": 2.4715731365072666e-06, |
| "loss": 0.2628, |
| "step": 4720 |
| }, |
| { |
| "epoch": 2.1106648817492193, |
| "grad_norm": 0.29272010242374324, |
| "learning_rate": 2.449208082312111e-06, |
| "loss": 0.2647, |
| "step": 4730 |
| }, |
| { |
| "epoch": 2.1151271753681393, |
| "grad_norm": 0.270738388759199, |
| "learning_rate": 2.4269118082564774e-06, |
| "loss": 0.2617, |
| "step": 4740 |
| }, |
| { |
| "epoch": 2.1195894689870594, |
| "grad_norm": 0.2840129241196674, |
| "learning_rate": 2.4046849155422193e-06, |
| "loss": 0.274, |
| "step": 4750 |
| }, |
| { |
| "epoch": 2.1240517626059794, |
| "grad_norm": 0.2645544390527427, |
| "learning_rate": 2.382528003500384e-06, |
| "loss": 0.2686, |
| "step": 4760 |
| }, |
| { |
| "epoch": 2.1285140562248994, |
| "grad_norm": 0.2865191645329176, |
| "learning_rate": 2.3604416695750364e-06, |
| "loss": 0.2601, |
| "step": 4770 |
| }, |
| { |
| "epoch": 2.13297634984382, |
| "grad_norm": 0.2656950110555082, |
| "learning_rate": 2.3384265093071645e-06, |
| "loss": 0.2652, |
| "step": 4780 |
| }, |
| { |
| "epoch": 2.13743864346274, |
| "grad_norm": 0.2912542173901888, |
| "learning_rate": 2.316483116318608e-06, |
| "loss": 0.2569, |
| "step": 4790 |
| }, |
| { |
| "epoch": 2.14190093708166, |
| "grad_norm": 0.2747221058539636, |
| "learning_rate": 2.2946120822960562e-06, |
| "loss": 0.2662, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.14636323070058, |
| "grad_norm": 0.27225697835899576, |
| "learning_rate": 2.2728139969751005e-06, |
| "loss": 0.267, |
| "step": 4810 |
| }, |
| { |
| "epoch": 2.1508255243195, |
| "grad_norm": 0.2526714399887326, |
| "learning_rate": 2.2510894481243205e-06, |
| "loss": 0.2659, |
| "step": 4820 |
| }, |
| { |
| "epoch": 2.1552878179384205, |
| "grad_norm": 0.2787198573344132, |
| "learning_rate": 2.2294390215294483e-06, |
| "loss": 0.2612, |
| "step": 4830 |
| }, |
| { |
| "epoch": 2.1597501115573405, |
| "grad_norm": 0.2881621459035689, |
| "learning_rate": 2.207863300977558e-06, |
| "loss": 0.2658, |
| "step": 4840 |
| }, |
| { |
| "epoch": 2.1642124051762606, |
| "grad_norm": 0.2651959620965174, |
| "learning_rate": 2.186362868241341e-06, |
| "loss": 0.2656, |
| "step": 4850 |
| }, |
| { |
| "epoch": 2.1686746987951806, |
| "grad_norm": 0.27079635036661953, |
| "learning_rate": 2.164938303063404e-06, |
| "loss": 0.2651, |
| "step": 4860 |
| }, |
| { |
| "epoch": 2.1731369924141006, |
| "grad_norm": 0.2627129405327371, |
| "learning_rate": 2.1435901831406504e-06, |
| "loss": 0.259, |
| "step": 4870 |
| }, |
| { |
| "epoch": 2.177599286033021, |
| "grad_norm": 0.28039269813095946, |
| "learning_rate": 2.1223190841086893e-06, |
| "loss": 0.2672, |
| "step": 4880 |
| }, |
| { |
| "epoch": 2.182061579651941, |
| "grad_norm": 0.2641978867337932, |
| "learning_rate": 2.1011255795263232e-06, |
| "loss": 0.2658, |
| "step": 4890 |
| }, |
| { |
| "epoch": 2.186523873270861, |
| "grad_norm": 0.26996809053622206, |
| "learning_rate": 2.080010240860083e-06, |
| "loss": 0.2714, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.190986166889781, |
| "grad_norm": 0.28824583701799006, |
| "learning_rate": 2.058973637468811e-06, |
| "loss": 0.2676, |
| "step": 4910 |
| }, |
| { |
| "epoch": 2.1954484605087012, |
| "grad_norm": 0.27005127612792346, |
| "learning_rate": 2.0380163365883188e-06, |
| "loss": 0.2738, |
| "step": 4920 |
| }, |
| { |
| "epoch": 2.1999107541276217, |
| "grad_norm": 0.2740103878032033, |
| "learning_rate": 2.01713890331608e-06, |
| "loss": 0.2579, |
| "step": 4930 |
| }, |
| { |
| "epoch": 2.2043730477465417, |
| "grad_norm": 0.2908767212364278, |
| "learning_rate": 1.996341900596008e-06, |
| "loss": 0.2696, |
| "step": 4940 |
| }, |
| { |
| "epoch": 2.208835341365462, |
| "grad_norm": 0.2950349079099773, |
| "learning_rate": 1.9756258892032604e-06, |
| "loss": 0.2645, |
| "step": 4950 |
| }, |
| { |
| "epoch": 2.213297634984382, |
| "grad_norm": 0.26050135319115303, |
| "learning_rate": 1.9549914277291326e-06, |
| "loss": 0.2642, |
| "step": 4960 |
| }, |
| { |
| "epoch": 2.2177599286033023, |
| "grad_norm": 0.26630446740688435, |
| "learning_rate": 1.9344390725659827e-06, |
| "loss": 0.2684, |
| "step": 4970 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 0.2743425147318156, |
| "learning_rate": 1.9139693778922437e-06, |
| "loss": 0.2667, |
| "step": 4980 |
| }, |
| { |
| "epoch": 2.2266845158411424, |
| "grad_norm": 0.2736976794496928, |
| "learning_rate": 1.8935828956574615e-06, |
| "loss": 0.2696, |
| "step": 4990 |
| }, |
| { |
| "epoch": 2.2311468094600624, |
| "grad_norm": 0.26998232015281187, |
| "learning_rate": 1.873280175567434e-06, |
| "loss": 0.2685, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.2356091030789824, |
| "grad_norm": 0.2668005078365468, |
| "learning_rate": 1.8530617650693671e-06, |
| "loss": 0.2658, |
| "step": 5010 |
| }, |
| { |
| "epoch": 2.240071396697903, |
| "grad_norm": 0.27297408370818904, |
| "learning_rate": 1.832928209337133e-06, |
| "loss": 0.2711, |
| "step": 5020 |
| }, |
| { |
| "epoch": 2.244533690316823, |
| "grad_norm": 0.27120763446153706, |
| "learning_rate": 1.8128800512565514e-06, |
| "loss": 0.2632, |
| "step": 5030 |
| }, |
| { |
| "epoch": 2.248995983935743, |
| "grad_norm": 0.3043844117742173, |
| "learning_rate": 1.792917831410767e-06, |
| "loss": 0.2646, |
| "step": 5040 |
| }, |
| { |
| "epoch": 2.253458277554663, |
| "grad_norm": 0.27555460116170827, |
| "learning_rate": 1.7730420880656641e-06, |
| "loss": 0.2627, |
| "step": 5050 |
| }, |
| { |
| "epoch": 2.2579205711735835, |
| "grad_norm": 0.25867849621197275, |
| "learning_rate": 1.7532533571553523e-06, |
| "loss": 0.2723, |
| "step": 5060 |
| }, |
| { |
| "epoch": 2.2623828647925035, |
| "grad_norm": 0.2696220165863831, |
| "learning_rate": 1.7335521722677223e-06, |
| "loss": 0.2567, |
| "step": 5070 |
| }, |
| { |
| "epoch": 2.2668451584114235, |
| "grad_norm": 0.2844615695492899, |
| "learning_rate": 1.7139390646300503e-06, |
| "loss": 0.2636, |
| "step": 5080 |
| }, |
| { |
| "epoch": 2.2713074520303436, |
| "grad_norm": 0.26100844869286605, |
| "learning_rate": 1.6944145630946757e-06, |
| "loss": 0.2547, |
| "step": 5090 |
| }, |
| { |
| "epoch": 2.2757697456492636, |
| "grad_norm": 0.2655209152406865, |
| "learning_rate": 1.6749791941247501e-06, |
| "loss": 0.2667, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.280232039268184, |
| "grad_norm": 0.2670441776783262, |
| "learning_rate": 1.6556334817800247e-06, |
| "loss": 0.2593, |
| "step": 5110 |
| }, |
| { |
| "epoch": 2.284694332887104, |
| "grad_norm": 0.2702953527750314, |
| "learning_rate": 1.636377947702737e-06, |
| "loss": 0.2668, |
| "step": 5120 |
| }, |
| { |
| "epoch": 2.289156626506024, |
| "grad_norm": 0.2801526427682019, |
| "learning_rate": 1.6172131111035305e-06, |
| "loss": 0.2593, |
| "step": 5130 |
| }, |
| { |
| "epoch": 2.293618920124944, |
| "grad_norm": 0.2664506709616671, |
| "learning_rate": 1.598139488747467e-06, |
| "loss": 0.2679, |
| "step": 5140 |
| }, |
| { |
| "epoch": 2.298081213743864, |
| "grad_norm": 0.2842866153483872, |
| "learning_rate": 1.5791575949400801e-06, |
| "loss": 0.2683, |
| "step": 5150 |
| }, |
| { |
| "epoch": 2.3025435073627847, |
| "grad_norm": 0.2713206331551484, |
| "learning_rate": 1.5602679415135203e-06, |
| "loss": 0.2672, |
| "step": 5160 |
| }, |
| { |
| "epoch": 2.3070058009817047, |
| "grad_norm": 0.2680243773548362, |
| "learning_rate": 1.5414710378127407e-06, |
| "loss": 0.2668, |
| "step": 5170 |
| }, |
| { |
| "epoch": 2.3114680946006247, |
| "grad_norm": 0.2937301531788135, |
| "learning_rate": 1.522767390681776e-06, |
| "loss": 0.2621, |
| "step": 5180 |
| }, |
| { |
| "epoch": 2.3159303882195448, |
| "grad_norm": 0.359404031790861, |
| "learning_rate": 1.5041575044500645e-06, |
| "loss": 0.2667, |
| "step": 5190 |
| }, |
| { |
| "epoch": 2.320392681838465, |
| "grad_norm": 0.26130760139495907, |
| "learning_rate": 1.4856418809188538e-06, |
| "loss": 0.2544, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.3248549754573853, |
| "grad_norm": 0.25686940370213246, |
| "learning_rate": 1.4672210193476766e-06, |
| "loss": 0.274, |
| "step": 5210 |
| }, |
| { |
| "epoch": 2.3293172690763053, |
| "grad_norm": 0.2939624963929075, |
| "learning_rate": 1.4488954164408736e-06, |
| "loss": 0.2701, |
| "step": 5220 |
| }, |
| { |
| "epoch": 2.3337795626952254, |
| "grad_norm": 0.2783635407103021, |
| "learning_rate": 1.4306655663342173e-06, |
| "loss": 0.2563, |
| "step": 5230 |
| }, |
| { |
| "epoch": 2.3382418563141454, |
| "grad_norm": 0.29226415842720005, |
| "learning_rate": 1.412531960581572e-06, |
| "loss": 0.2541, |
| "step": 5240 |
| }, |
| { |
| "epoch": 2.3427041499330654, |
| "grad_norm": 0.3552878374922974, |
| "learning_rate": 1.3944950881416541e-06, |
| "loss": 0.2645, |
| "step": 5250 |
| }, |
| { |
| "epoch": 2.347166443551986, |
| "grad_norm": 0.25303935909037156, |
| "learning_rate": 1.3765554353648348e-06, |
| "loss": 0.26, |
| "step": 5260 |
| }, |
| { |
| "epoch": 2.351628737170906, |
| "grad_norm": 0.26959464702039115, |
| "learning_rate": 1.3587134859800378e-06, |
| "loss": 0.2622, |
| "step": 5270 |
| }, |
| { |
| "epoch": 2.356091030789826, |
| "grad_norm": 0.2767449988752219, |
| "learning_rate": 1.3409697210816846e-06, |
| "loss": 0.2631, |
| "step": 5280 |
| }, |
| { |
| "epoch": 2.360553324408746, |
| "grad_norm": 0.2948939949561596, |
| "learning_rate": 1.3233246191167293e-06, |
| "loss": 0.2721, |
| "step": 5290 |
| }, |
| { |
| "epoch": 2.365015618027666, |
| "grad_norm": 0.2806355856045073, |
| "learning_rate": 1.3057786558717593e-06, |
| "loss": 0.2674, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.3694779116465865, |
| "grad_norm": 0.2738252090208611, |
| "learning_rate": 1.2883323044601575e-06, |
| "loss": 0.274, |
| "step": 5310 |
| }, |
| { |
| "epoch": 2.3739402052655065, |
| "grad_norm": 0.2577368049277014, |
| "learning_rate": 1.2709860353093555e-06, |
| "loss": 0.2668, |
| "step": 5320 |
| }, |
| { |
| "epoch": 2.3784024988844266, |
| "grad_norm": 0.29099029260946785, |
| "learning_rate": 1.2537403161481387e-06, |
| "loss": 0.2669, |
| "step": 5330 |
| }, |
| { |
| "epoch": 2.3828647925033466, |
| "grad_norm": 0.264039286727906, |
| "learning_rate": 1.2365956119940436e-06, |
| "loss": 0.2768, |
| "step": 5340 |
| }, |
| { |
| "epoch": 2.3873270861222666, |
| "grad_norm": 0.2686625114381172, |
| "learning_rate": 1.2195523851408153e-06, |
| "loss": 0.2735, |
| "step": 5350 |
| }, |
| { |
| "epoch": 2.391789379741187, |
| "grad_norm": 0.27917896436759787, |
| "learning_rate": 1.2026110951459364e-06, |
| "loss": 0.2709, |
| "step": 5360 |
| }, |
| { |
| "epoch": 2.396251673360107, |
| "grad_norm": 0.2759607663737845, |
| "learning_rate": 1.1857721988182468e-06, |
| "loss": 0.264, |
| "step": 5370 |
| }, |
| { |
| "epoch": 2.400713966979027, |
| "grad_norm": 0.2744901640260912, |
| "learning_rate": 1.169036150205614e-06, |
| "loss": 0.2638, |
| "step": 5380 |
| }, |
| { |
| "epoch": 2.405176260597947, |
| "grad_norm": 0.2639450101124726, |
| "learning_rate": 1.1524034005827028e-06, |
| "loss": 0.2609, |
| "step": 5390 |
| }, |
| { |
| "epoch": 2.4096385542168672, |
| "grad_norm": 0.26462733650289344, |
| "learning_rate": 1.1358743984387939e-06, |
| "loss": 0.2571, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.4141008478357877, |
| "grad_norm": 0.26467540440445797, |
| "learning_rate": 1.1194495894657021e-06, |
| "loss": 0.264, |
| "step": 5410 |
| }, |
| { |
| "epoch": 2.4185631414547077, |
| "grad_norm": 0.2541839967727397, |
| "learning_rate": 1.103129416545749e-06, |
| "loss": 0.2734, |
| "step": 5420 |
| }, |
| { |
| "epoch": 2.4230254350736278, |
| "grad_norm": 0.25980858502680365, |
| "learning_rate": 1.0869143197398313e-06, |
| "loss": 0.2711, |
| "step": 5430 |
| }, |
| { |
| "epoch": 2.427487728692548, |
| "grad_norm": 0.2749178435606242, |
| "learning_rate": 1.070804736275543e-06, |
| "loss": 0.2638, |
| "step": 5440 |
| }, |
| { |
| "epoch": 2.4319500223114683, |
| "grad_norm": 0.2507799683207913, |
| "learning_rate": 1.0548011005353975e-06, |
| "loss": 0.2639, |
| "step": 5450 |
| }, |
| { |
| "epoch": 2.4364123159303883, |
| "grad_norm": 0.27822037022845514, |
| "learning_rate": 1.0389038440451048e-06, |
| "loss": 0.2687, |
| "step": 5460 |
| }, |
| { |
| "epoch": 2.4408746095493083, |
| "grad_norm": 0.25912298984876, |
| "learning_rate": 1.0231133954619449e-06, |
| "loss": 0.2517, |
| "step": 5470 |
| }, |
| { |
| "epoch": 2.4453369031682284, |
| "grad_norm": 0.26989363458508225, |
| "learning_rate": 1.0074301805632014e-06, |
| "loss": 0.261, |
| "step": 5480 |
| }, |
| { |
| "epoch": 2.4497991967871484, |
| "grad_norm": 0.2611164113008286, |
| "learning_rate": 9.918546222346837e-07, |
| "loss": 0.2732, |
| "step": 5490 |
| }, |
| { |
| "epoch": 2.454261490406069, |
| "grad_norm": 0.26664227118678985, |
| "learning_rate": 9.763871404593295e-07, |
| "loss": 0.2635, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.458723784024989, |
| "grad_norm": 0.2845594958886921, |
| "learning_rate": 9.610281523058696e-07, |
| "loss": 0.2724, |
| "step": 5510 |
| }, |
| { |
| "epoch": 2.463186077643909, |
| "grad_norm": 0.26533071138557196, |
| "learning_rate": 9.457780719175924e-07, |
| "loss": 0.2594, |
| "step": 5520 |
| }, |
| { |
| "epoch": 2.467648371262829, |
| "grad_norm": 0.2931575613556212, |
| "learning_rate": 9.306373105011685e-07, |
| "loss": 0.2642, |
| "step": 5530 |
| }, |
| { |
| "epoch": 2.4721106648817495, |
| "grad_norm": 0.2579593656604341, |
| "learning_rate": 9.15606276315571e-07, |
| "loss": 0.2686, |
| "step": 5540 |
| }, |
| { |
| "epoch": 2.4765729585006695, |
| "grad_norm": 0.25308387491964407, |
| "learning_rate": 9.006853746610578e-07, |
| "loss": 0.2748, |
| "step": 5550 |
| }, |
| { |
| "epoch": 2.4810352521195895, |
| "grad_norm": 0.26649803073733885, |
| "learning_rate": 8.858750078682526e-07, |
| "loss": 0.2702, |
| "step": 5560 |
| }, |
| { |
| "epoch": 2.4854975457385096, |
| "grad_norm": 0.26710930733839655, |
| "learning_rate": 8.711755752872875e-07, |
| "loss": 0.2741, |
| "step": 5570 |
| }, |
| { |
| "epoch": 2.4899598393574296, |
| "grad_norm": 0.28129936297807595, |
| "learning_rate": 8.565874732770429e-07, |
| "loss": 0.2711, |
| "step": 5580 |
| }, |
| { |
| "epoch": 2.49442213297635, |
| "grad_norm": 0.2562852808371683, |
| "learning_rate": 8.421110951944533e-07, |
| "loss": 0.2729, |
| "step": 5590 |
| }, |
| { |
| "epoch": 2.49888442659527, |
| "grad_norm": 0.2705997291745019, |
| "learning_rate": 8.277468313839033e-07, |
| "loss": 0.266, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.50334672021419, |
| "grad_norm": 0.27415617746696014, |
| "learning_rate": 8.13495069166706e-07, |
| "loss": 0.2635, |
| "step": 5610 |
| }, |
| { |
| "epoch": 2.50780901383311, |
| "grad_norm": 0.268499538957006, |
| "learning_rate": 7.993561928306503e-07, |
| "loss": 0.2626, |
| "step": 5620 |
| }, |
| { |
| "epoch": 2.51227130745203, |
| "grad_norm": 0.2605445838763644, |
| "learning_rate": 7.853305836196507e-07, |
| "loss": 0.2684, |
| "step": 5630 |
| }, |
| { |
| "epoch": 2.5167336010709507, |
| "grad_norm": 0.259462323778387, |
| "learning_rate": 7.714186197234547e-07, |
| "loss": 0.2669, |
| "step": 5640 |
| }, |
| { |
| "epoch": 2.5211958946898707, |
| "grad_norm": 0.2737812098281487, |
| "learning_rate": 7.576206762674565e-07, |
| "loss": 0.2677, |
| "step": 5650 |
| }, |
| { |
| "epoch": 2.5256581883087907, |
| "grad_norm": 0.24457747335646154, |
| "learning_rate": 7.439371253025718e-07, |
| "loss": 0.2441, |
| "step": 5660 |
| }, |
| { |
| "epoch": 2.5301204819277108, |
| "grad_norm": 0.25318796192153853, |
| "learning_rate": 7.303683357952168e-07, |
| "loss": 0.2692, |
| "step": 5670 |
| }, |
| { |
| "epoch": 2.534582775546631, |
| "grad_norm": 0.25584827852861586, |
| "learning_rate": 7.169146736173477e-07, |
| "loss": 0.2696, |
| "step": 5680 |
| }, |
| { |
| "epoch": 2.5390450691655513, |
| "grad_norm": 0.2602088423877808, |
| "learning_rate": 7.035765015366047e-07, |
| "loss": 0.2668, |
| "step": 5690 |
| }, |
| { |
| "epoch": 2.5435073627844713, |
| "grad_norm": 0.2820757957669124, |
| "learning_rate": 6.903541792065265e-07, |
| "loss": 0.2771, |
| "step": 5700 |
| }, |
| { |
| "epoch": 2.5479696564033913, |
| "grad_norm": 0.289619323277333, |
| "learning_rate": 6.772480631568496e-07, |
| "loss": 0.2677, |
| "step": 5710 |
| }, |
| { |
| "epoch": 2.5524319500223114, |
| "grad_norm": 0.24262073349803384, |
| "learning_rate": 6.642585067839003e-07, |
| "loss": 0.2632, |
| "step": 5720 |
| }, |
| { |
| "epoch": 2.5568942436412314, |
| "grad_norm": 0.3063970631496104, |
| "learning_rate": 6.513858603410605e-07, |
| "loss": 0.2645, |
| "step": 5730 |
| }, |
| { |
| "epoch": 2.561356537260152, |
| "grad_norm": 0.24469985287690035, |
| "learning_rate": 6.386304709293295e-07, |
| "loss": 0.2674, |
| "step": 5740 |
| }, |
| { |
| "epoch": 2.565818830879072, |
| "grad_norm": 0.25778009113854466, |
| "learning_rate": 6.259926824879575e-07, |
| "loss": 0.2686, |
| "step": 5750 |
| }, |
| { |
| "epoch": 2.570281124497992, |
| "grad_norm": 0.2519725071505496, |
| "learning_rate": 6.134728357851777e-07, |
| "loss": 0.2614, |
| "step": 5760 |
| }, |
| { |
| "epoch": 2.574743418116912, |
| "grad_norm": 0.24559660905732697, |
| "learning_rate": 6.010712684090125e-07, |
| "loss": 0.2538, |
| "step": 5770 |
| }, |
| { |
| "epoch": 2.579205711735832, |
| "grad_norm": 0.2549259950940346, |
| "learning_rate": 5.887883147581769e-07, |
| "loss": 0.2669, |
| "step": 5780 |
| }, |
| { |
| "epoch": 2.5836680053547525, |
| "grad_norm": 0.26474210608850174, |
| "learning_rate": 5.766243060330551e-07, |
| "loss": 0.2645, |
| "step": 5790 |
| }, |
| { |
| "epoch": 2.5881302989736725, |
| "grad_norm": 0.2611149545019317, |
| "learning_rate": 5.645795702267731e-07, |
| "loss": 0.2713, |
| "step": 5800 |
| }, |
| { |
| "epoch": 2.5925925925925926, |
| "grad_norm": 0.27028998633235257, |
| "learning_rate": 5.526544321163573e-07, |
| "loss": 0.2765, |
| "step": 5810 |
| }, |
| { |
| "epoch": 2.5970548862115126, |
| "grad_norm": 0.26131292774366577, |
| "learning_rate": 5.408492132539705e-07, |
| "loss": 0.2601, |
| "step": 5820 |
| }, |
| { |
| "epoch": 2.6015171798304326, |
| "grad_norm": 0.25036551401682255, |
| "learning_rate": 5.29164231958249e-07, |
| "loss": 0.2667, |
| "step": 5830 |
| }, |
| { |
| "epoch": 2.605979473449353, |
| "grad_norm": 0.32252379358555433, |
| "learning_rate": 5.175998033057128e-07, |
| "loss": 0.2598, |
| "step": 5840 |
| }, |
| { |
| "epoch": 2.610441767068273, |
| "grad_norm": 0.26111392130818567, |
| "learning_rate": 5.061562391222752e-07, |
| "loss": 0.2708, |
| "step": 5850 |
| }, |
| { |
| "epoch": 2.614904060687193, |
| "grad_norm": 0.2610753618549455, |
| "learning_rate": 4.948338479748293e-07, |
| "loss": 0.264, |
| "step": 5860 |
| }, |
| { |
| "epoch": 2.619366354306113, |
| "grad_norm": 0.24433197279025629, |
| "learning_rate": 4.836329351629343e-07, |
| "loss": 0.2591, |
| "step": 5870 |
| }, |
| { |
| "epoch": 2.6238286479250332, |
| "grad_norm": 0.25566187611860886, |
| "learning_rate": 4.7255380271057637e-07, |
| "loss": 0.2709, |
| "step": 5880 |
| }, |
| { |
| "epoch": 2.6282909415439537, |
| "grad_norm": 0.25059259535846407, |
| "learning_rate": 4.6159674935802867e-07, |
| "loss": 0.2623, |
| "step": 5890 |
| }, |
| { |
| "epoch": 2.6327532351628737, |
| "grad_norm": 0.2623672779635096, |
| "learning_rate": 4.507620705537974e-07, |
| "loss": 0.259, |
| "step": 5900 |
| }, |
| { |
| "epoch": 2.6372155287817938, |
| "grad_norm": 0.2786335146735297, |
| "learning_rate": 4.400500584466505e-07, |
| "loss": 0.2676, |
| "step": 5910 |
| }, |
| { |
| "epoch": 2.641677822400714, |
| "grad_norm": 0.24900048144785913, |
| "learning_rate": 4.294610018777462e-07, |
| "loss": 0.263, |
| "step": 5920 |
| }, |
| { |
| "epoch": 2.646140116019634, |
| "grad_norm": 0.25847202609000797, |
| "learning_rate": 4.1899518637283753e-07, |
| "loss": 0.2677, |
| "step": 5930 |
| }, |
| { |
| "epoch": 2.6506024096385543, |
| "grad_norm": 0.2782722288055866, |
| "learning_rate": 4.0865289413458074e-07, |
| "loss": 0.2617, |
| "step": 5940 |
| }, |
| { |
| "epoch": 2.6550647032574743, |
| "grad_norm": 0.28648904043807316, |
| "learning_rate": 3.984344040349197e-07, |
| "loss": 0.2572, |
| "step": 5950 |
| }, |
| { |
| "epoch": 2.6595269968763944, |
| "grad_norm": 0.2636564981421794, |
| "learning_rate": 3.883399916075714e-07, |
| "loss": 0.2623, |
| "step": 5960 |
| }, |
| { |
| "epoch": 2.663989290495315, |
| "grad_norm": 0.2493631339678549, |
| "learning_rate": 3.783699290405901e-07, |
| "loss": 0.2649, |
| "step": 5970 |
| }, |
| { |
| "epoch": 2.6684515841142344, |
| "grad_norm": 0.2528637606092601, |
| "learning_rate": 3.6852448516903727e-07, |
| "loss": 0.2764, |
| "step": 5980 |
| }, |
| { |
| "epoch": 2.672913877733155, |
| "grad_norm": 0.2729817482325442, |
| "learning_rate": 3.588039254677211e-07, |
| "loss": 0.2622, |
| "step": 5990 |
| }, |
| { |
| "epoch": 2.677376171352075, |
| "grad_norm": 0.3108379178962195, |
| "learning_rate": 3.4920851204405026e-07, |
| "loss": 0.2614, |
| "step": 6000 |
| }, |
| { |
| "epoch": 2.681838464970995, |
| "grad_norm": 0.25516673857932876, |
| "learning_rate": 3.397385036309558e-07, |
| "loss": 0.2545, |
| "step": 6010 |
| }, |
| { |
| "epoch": 2.6863007585899155, |
| "grad_norm": 0.26499244030577884, |
| "learning_rate": 3.303941555799223e-07, |
| "loss": 0.269, |
| "step": 6020 |
| }, |
| { |
| "epoch": 2.6907630522088355, |
| "grad_norm": 0.2644403491762858, |
| "learning_rate": 3.211757198540971e-07, |
| "loss": 0.261, |
| "step": 6030 |
| }, |
| { |
| "epoch": 2.6952253458277555, |
| "grad_norm": 0.2512005184606446, |
| "learning_rate": 3.12083445021501e-07, |
| "loss": 0.2608, |
| "step": 6040 |
| }, |
| { |
| "epoch": 2.6996876394466756, |
| "grad_norm": 0.24284759524855465, |
| "learning_rate": 3.031175762483207e-07, |
| "loss": 0.2573, |
| "step": 6050 |
| }, |
| { |
| "epoch": 2.7041499330655956, |
| "grad_norm": 0.2588341633636346, |
| "learning_rate": 2.942783552923034e-07, |
| "loss": 0.2721, |
| "step": 6060 |
| }, |
| { |
| "epoch": 2.708612226684516, |
| "grad_norm": 0.27162185956855733, |
| "learning_rate": 2.8556602049623515e-07, |
| "loss": 0.2635, |
| "step": 6070 |
| }, |
| { |
| "epoch": 2.713074520303436, |
| "grad_norm": 0.24785721701569988, |
| "learning_rate": 2.769808067815127e-07, |
| "loss": 0.2654, |
| "step": 6080 |
| }, |
| { |
| "epoch": 2.717536813922356, |
| "grad_norm": 0.26379289564503067, |
| "learning_rate": 2.68522945641812e-07, |
| "loss": 0.2703, |
| "step": 6090 |
| }, |
| { |
| "epoch": 2.721999107541276, |
| "grad_norm": 0.25223927852747313, |
| "learning_rate": 2.6019266513684525e-07, |
| "loss": 0.2633, |
| "step": 6100 |
| }, |
| { |
| "epoch": 2.726461401160196, |
| "grad_norm": 0.26022587215739407, |
| "learning_rate": 2.5199018988620925e-07, |
| "loss": 0.2628, |
| "step": 6110 |
| }, |
| { |
| "epoch": 2.7309236947791167, |
| "grad_norm": 0.25618186440473806, |
| "learning_rate": 2.439157410633336e-07, |
| "loss": 0.2549, |
| "step": 6120 |
| }, |
| { |
| "epoch": 2.7353859883980367, |
| "grad_norm": 0.2709291288264617, |
| "learning_rate": 2.3596953638951093e-07, |
| "loss": 0.2673, |
| "step": 6130 |
| }, |
| { |
| "epoch": 2.7398482820169567, |
| "grad_norm": 0.3097723757526494, |
| "learning_rate": 2.2815179012803056e-07, |
| "loss": 0.2667, |
| "step": 6140 |
| }, |
| { |
| "epoch": 2.7443105756358768, |
| "grad_norm": 0.26215128271049865, |
| "learning_rate": 2.2046271307839928e-07, |
| "loss": 0.2659, |
| "step": 6150 |
| }, |
| { |
| "epoch": 2.748772869254797, |
| "grad_norm": 0.26937507169769476, |
| "learning_rate": 2.1290251257065852e-07, |
| "loss": 0.2647, |
| "step": 6160 |
| }, |
| { |
| "epoch": 2.7532351628737173, |
| "grad_norm": 0.28086965259228747, |
| "learning_rate": 2.054713924597923e-07, |
| "loss": 0.2596, |
| "step": 6170 |
| }, |
| { |
| "epoch": 2.7576974564926373, |
| "grad_norm": 0.25164611443705137, |
| "learning_rate": 1.981695531202299e-07, |
| "loss": 0.2613, |
| "step": 6180 |
| }, |
| { |
| "epoch": 2.7621597501115573, |
| "grad_norm": 0.26328055433222686, |
| "learning_rate": 1.9099719144044737e-07, |
| "loss": 0.2585, |
| "step": 6190 |
| }, |
| { |
| "epoch": 2.7666220437304774, |
| "grad_norm": 0.24268837305344704, |
| "learning_rate": 1.8395450081765133e-07, |
| "loss": 0.2594, |
| "step": 6200 |
| }, |
| { |
| "epoch": 2.7710843373493974, |
| "grad_norm": 0.27208555089507047, |
| "learning_rate": 1.7704167115257242e-07, |
| "loss": 0.2701, |
| "step": 6210 |
| }, |
| { |
| "epoch": 2.775546630968318, |
| "grad_norm": 0.2592924605339396, |
| "learning_rate": 1.7025888884433682e-07, |
| "loss": 0.258, |
| "step": 6220 |
| }, |
| { |
| "epoch": 2.780008924587238, |
| "grad_norm": 0.25268564338580296, |
| "learning_rate": 1.636063367854468e-07, |
| "loss": 0.2643, |
| "step": 6230 |
| }, |
| { |
| "epoch": 2.784471218206158, |
| "grad_norm": 0.25499560352534534, |
| "learning_rate": 1.5708419435684463e-07, |
| "loss": 0.2592, |
| "step": 6240 |
| }, |
| { |
| "epoch": 2.788933511825078, |
| "grad_norm": 0.2754254434988614, |
| "learning_rate": 1.506926374230777e-07, |
| "loss": 0.2685, |
| "step": 6250 |
| }, |
| { |
| "epoch": 2.793395805443998, |
| "grad_norm": 0.24677760129233578, |
| "learning_rate": 1.4443183832755558e-07, |
| "loss": 0.2668, |
| "step": 6260 |
| }, |
| { |
| "epoch": 2.7978580990629185, |
| "grad_norm": 0.2419766232491537, |
| "learning_rate": 1.3830196588790535e-07, |
| "loss": 0.2649, |
| "step": 6270 |
| }, |
| { |
| "epoch": 2.8023203926818385, |
| "grad_norm": 0.25179145421288907, |
| "learning_rate": 1.3230318539141586e-07, |
| "loss": 0.2613, |
| "step": 6280 |
| }, |
| { |
| "epoch": 2.8067826863007586, |
| "grad_norm": 0.24418794826639928, |
| "learning_rate": 1.2643565859058182e-07, |
| "loss": 0.2735, |
| "step": 6290 |
| }, |
| { |
| "epoch": 2.8112449799196786, |
| "grad_norm": 0.2484899106044706, |
| "learning_rate": 1.206995436987457e-07, |
| "loss": 0.2676, |
| "step": 6300 |
| }, |
| { |
| "epoch": 2.8157072735385986, |
| "grad_norm": 0.2542428658067045, |
| "learning_rate": 1.1509499538582768e-07, |
| "loss": 0.2634, |
| "step": 6310 |
| }, |
| { |
| "epoch": 2.820169567157519, |
| "grad_norm": 0.24625678831935105, |
| "learning_rate": 1.0962216477415632e-07, |
| "loss": 0.2644, |
| "step": 6320 |
| }, |
| { |
| "epoch": 2.824631860776439, |
| "grad_norm": 0.2557578526542899, |
| "learning_rate": 1.0428119943439396e-07, |
| "loss": 0.2697, |
| "step": 6330 |
| }, |
| { |
| "epoch": 2.829094154395359, |
| "grad_norm": 0.2626526421729072, |
| "learning_rate": 9.907224338155774e-08, |
| "loss": 0.2641, |
| "step": 6340 |
| }, |
| { |
| "epoch": 2.833556448014279, |
| "grad_norm": 0.24742251678799534, |
| "learning_rate": 9.399543707113601e-08, |
| "loss": 0.2672, |
| "step": 6350 |
| }, |
| { |
| "epoch": 2.8380187416331992, |
| "grad_norm": 0.24976844344830124, |
| "learning_rate": 8.905091739530026e-08, |
| "loss": 0.2642, |
| "step": 6360 |
| }, |
| { |
| "epoch": 2.8424810352521197, |
| "grad_norm": 0.2434658676513343, |
| "learning_rate": 8.423881767921637e-08, |
| "loss": 0.2666, |
| "step": 6370 |
| }, |
| { |
| "epoch": 2.8469433288710397, |
| "grad_norm": 0.2709418898144275, |
| "learning_rate": 7.955926767744649e-08, |
| "loss": 0.2678, |
| "step": 6380 |
| }, |
| { |
| "epoch": 2.8514056224899598, |
| "grad_norm": 0.2682012558379756, |
| "learning_rate": 7.501239357045275e-08, |
| "loss": 0.2599, |
| "step": 6390 |
| }, |
| { |
| "epoch": 2.85586791610888, |
| "grad_norm": 0.2541699897689904, |
| "learning_rate": 7.059831796119243e-08, |
| "loss": 0.2637, |
| "step": 6400 |
| }, |
| { |
| "epoch": 2.8603302097278, |
| "grad_norm": 0.2786116189525862, |
| "learning_rate": 6.631715987181653e-08, |
| "loss": 0.2633, |
| "step": 6410 |
| }, |
| { |
| "epoch": 2.8647925033467203, |
| "grad_norm": 0.2549013358787648, |
| "learning_rate": 6.216903474045411e-08, |
| "loss": 0.2675, |
| "step": 6420 |
| }, |
| { |
| "epoch": 2.8692547969656403, |
| "grad_norm": 0.24035403326033933, |
| "learning_rate": 5.815405441810584e-08, |
| "loss": 0.2704, |
| "step": 6430 |
| }, |
| { |
| "epoch": 2.8737170905845604, |
| "grad_norm": 0.24485345786981255, |
| "learning_rate": 5.427232716562314e-08, |
| "loss": 0.2654, |
| "step": 6440 |
| }, |
| { |
| "epoch": 2.878179384203481, |
| "grad_norm": 0.2509308995806849, |
| "learning_rate": 5.05239576507921e-08, |
| "loss": 0.2676, |
| "step": 6450 |
| }, |
| { |
| "epoch": 2.8826416778224004, |
| "grad_norm": 0.25125126609422216, |
| "learning_rate": 4.690904694550913e-08, |
| "loss": 0.2659, |
| "step": 6460 |
| }, |
| { |
| "epoch": 2.887103971441321, |
| "grad_norm": 0.29884930114175684, |
| "learning_rate": 4.342769252305867e-08, |
| "loss": 0.2659, |
| "step": 6470 |
| }, |
| { |
| "epoch": 2.891566265060241, |
| "grad_norm": 0.2621295720462137, |
| "learning_rate": 4.007998825548032e-08, |
| "loss": 0.2635, |
| "step": 6480 |
| }, |
| { |
| "epoch": 2.896028558679161, |
| "grad_norm": 0.2559890093701122, |
| "learning_rate": 3.686602441104137e-08, |
| "loss": 0.2579, |
| "step": 6490 |
| }, |
| { |
| "epoch": 2.9004908522980815, |
| "grad_norm": 0.294711369331024, |
| "learning_rate": 3.378588765180268e-08, |
| "loss": 0.2596, |
| "step": 6500 |
| }, |
| { |
| "epoch": 2.9049531459170015, |
| "grad_norm": 0.2406776827039642, |
| "learning_rate": 3.083966103127833e-08, |
| "loss": 0.2628, |
| "step": 6510 |
| }, |
| { |
| "epoch": 2.9094154395359215, |
| "grad_norm": 0.2592651922892189, |
| "learning_rate": 2.8027423992201265e-08, |
| "loss": 0.2586, |
| "step": 6520 |
| }, |
| { |
| "epoch": 2.9138777331548416, |
| "grad_norm": 0.2544628675769128, |
| "learning_rate": 2.5349252364376132e-08, |
| "loss": 0.2636, |
| "step": 6530 |
| }, |
| { |
| "epoch": 2.9183400267737616, |
| "grad_norm": 0.24915354540620344, |
| "learning_rate": 2.280521836263927e-08, |
| "loss": 0.2736, |
| "step": 6540 |
| }, |
| { |
| "epoch": 2.922802320392682, |
| "grad_norm": 0.25636383494451315, |
| "learning_rate": 2.0395390584908027e-08, |
| "loss": 0.2604, |
| "step": 6550 |
| }, |
| { |
| "epoch": 2.927264614011602, |
| "grad_norm": 0.23743990674304524, |
| "learning_rate": 1.8119834010332236e-08, |
| "loss": 0.2644, |
| "step": 6560 |
| }, |
| { |
| "epoch": 2.931726907630522, |
| "grad_norm": 0.2421382237738656, |
| "learning_rate": 1.5978609997542306e-08, |
| "loss": 0.2695, |
| "step": 6570 |
| }, |
| { |
| "epoch": 2.936189201249442, |
| "grad_norm": 0.25987130839419936, |
| "learning_rate": 1.3971776282994398e-08, |
| "loss": 0.2612, |
| "step": 6580 |
| }, |
| { |
| "epoch": 2.940651494868362, |
| "grad_norm": 0.28118171114124185, |
| "learning_rate": 1.2099386979414484e-08, |
| "loss": 0.2727, |
| "step": 6590 |
| }, |
| { |
| "epoch": 2.9451137884872827, |
| "grad_norm": 0.25875766974667813, |
| "learning_rate": 1.0361492574337827e-08, |
| "loss": 0.2599, |
| "step": 6600 |
| }, |
| { |
| "epoch": 2.9495760821062027, |
| "grad_norm": 0.24856208937215735, |
| "learning_rate": 8.758139928748966e-09, |
| "loss": 0.2585, |
| "step": 6610 |
| }, |
| { |
| "epoch": 2.9540383757251227, |
| "grad_norm": 0.2605005110493228, |
| "learning_rate": 7.289372275816608e-09, |
| "loss": 0.263, |
| "step": 6620 |
| }, |
| { |
| "epoch": 2.9585006693440428, |
| "grad_norm": 0.27730135009271706, |
| "learning_rate": 5.95522921973013e-09, |
| "loss": 0.267, |
| "step": 6630 |
| }, |
| { |
| "epoch": 2.962962962962963, |
| "grad_norm": 0.25897469150617725, |
| "learning_rate": 4.7557467346281975e-09, |
| "loss": 0.2682, |
| "step": 6640 |
| }, |
| { |
| "epoch": 2.9674252565818833, |
| "grad_norm": 0.240962238635422, |
| "learning_rate": 3.690957163633435e-09, |
| "loss": 0.2604, |
| "step": 6650 |
| }, |
| { |
| "epoch": 2.9718875502008033, |
| "grad_norm": 0.29524556685558273, |
| "learning_rate": 2.760889217976459e-09, |
| "loss": 0.2694, |
| "step": 6660 |
| }, |
| { |
| "epoch": 2.9763498438197233, |
| "grad_norm": 0.2399731201737577, |
| "learning_rate": 1.9655679762220494e-09, |
| "loss": 0.2615, |
| "step": 6670 |
| }, |
| { |
| "epoch": 2.9808121374386434, |
| "grad_norm": 0.26022562581894665, |
| "learning_rate": 1.305014883595801e-09, |
| "loss": 0.2624, |
| "step": 6680 |
| }, |
| { |
| "epoch": 2.9852744310575634, |
| "grad_norm": 0.25348317394477593, |
| "learning_rate": 7.792477514034779e-10, |
| "loss": 0.2586, |
| "step": 6690 |
| }, |
| { |
| "epoch": 2.989736724676484, |
| "grad_norm": 0.2596395509558711, |
| "learning_rate": 3.882807565502855e-10, |
| "loss": 0.2687, |
| "step": 6700 |
| }, |
| { |
| "epoch": 2.994199018295404, |
| "grad_norm": 0.25130314991574476, |
| "learning_rate": 1.3212444115950907e-10, |
| "loss": 0.2606, |
| "step": 6710 |
| }, |
| { |
| "epoch": 2.998661311914324, |
| "grad_norm": 0.27465854203632456, |
| "learning_rate": 1.0785712290517503e-11, |
| "loss": 0.26, |
| "step": 6720 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 6723, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 10000000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.6290612428931072e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|