wommr1_16 / trainer_state.json
LHL3341's picture
upload
5f25087 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 6723,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.004462293618920125,
"grad_norm": 3.7051006087750524,
"learning_rate": 1.337295690936107e-07,
"loss": 0.5444,
"step": 10
},
{
"epoch": 0.00892458723784025,
"grad_norm": 3.048784847442942,
"learning_rate": 2.823179791976226e-07,
"loss": 0.5572,
"step": 20
},
{
"epoch": 0.013386880856760375,
"grad_norm": 2.048957705438653,
"learning_rate": 4.309063893016345e-07,
"loss": 0.5423,
"step": 30
},
{
"epoch": 0.0178491744756805,
"grad_norm": 1.461567031084094,
"learning_rate": 5.794947994056464e-07,
"loss": 0.5134,
"step": 40
},
{
"epoch": 0.022311468094600623,
"grad_norm": 1.0103720435475612,
"learning_rate": 7.280832095096584e-07,
"loss": 0.4907,
"step": 50
},
{
"epoch": 0.02677376171352075,
"grad_norm": 0.6889844403965782,
"learning_rate": 8.766716196136702e-07,
"loss": 0.4818,
"step": 60
},
{
"epoch": 0.031236055332440876,
"grad_norm": 0.5268842063919165,
"learning_rate": 1.025260029717682e-06,
"loss": 0.4483,
"step": 70
},
{
"epoch": 0.035698348951361,
"grad_norm": 0.46396807974468,
"learning_rate": 1.173848439821694e-06,
"loss": 0.4457,
"step": 80
},
{
"epoch": 0.040160642570281124,
"grad_norm": 0.45432241367642123,
"learning_rate": 1.322436849925706e-06,
"loss": 0.4398,
"step": 90
},
{
"epoch": 0.04462293618920125,
"grad_norm": 0.3566878144571345,
"learning_rate": 1.4710252600297179e-06,
"loss": 0.4295,
"step": 100
},
{
"epoch": 0.049085229808121376,
"grad_norm": 0.3076647669838012,
"learning_rate": 1.6196136701337297e-06,
"loss": 0.4126,
"step": 110
},
{
"epoch": 0.0535475234270415,
"grad_norm": 0.30260053228577904,
"learning_rate": 1.7682020802377416e-06,
"loss": 0.4021,
"step": 120
},
{
"epoch": 0.05800981704596162,
"grad_norm": 0.3122317267903143,
"learning_rate": 1.9167904903417537e-06,
"loss": 0.3946,
"step": 130
},
{
"epoch": 0.06247211066488175,
"grad_norm": 0.3476751821982303,
"learning_rate": 2.0653789004457653e-06,
"loss": 0.3965,
"step": 140
},
{
"epoch": 0.06693440428380187,
"grad_norm": 0.33812027755011803,
"learning_rate": 2.213967310549777e-06,
"loss": 0.4031,
"step": 150
},
{
"epoch": 0.071396697902722,
"grad_norm": 0.3120124573653305,
"learning_rate": 2.362555720653789e-06,
"loss": 0.3984,
"step": 160
},
{
"epoch": 0.07585899152164212,
"grad_norm": 0.32219654623219857,
"learning_rate": 2.511144130757801e-06,
"loss": 0.382,
"step": 170
},
{
"epoch": 0.08032128514056225,
"grad_norm": 0.37321844110875607,
"learning_rate": 2.659732540861813e-06,
"loss": 0.397,
"step": 180
},
{
"epoch": 0.08478357875948238,
"grad_norm": 0.34121006704676865,
"learning_rate": 2.808320950965825e-06,
"loss": 0.3877,
"step": 190
},
{
"epoch": 0.0892458723784025,
"grad_norm": 0.34259431719098377,
"learning_rate": 2.956909361069837e-06,
"loss": 0.3778,
"step": 200
},
{
"epoch": 0.09370816599732262,
"grad_norm": 0.4634153760914143,
"learning_rate": 3.1054977711738487e-06,
"loss": 0.3838,
"step": 210
},
{
"epoch": 0.09817045961624275,
"grad_norm": 0.34508537944318013,
"learning_rate": 3.2540861812778607e-06,
"loss": 0.3823,
"step": 220
},
{
"epoch": 0.10263275323516287,
"grad_norm": 0.38159809656131194,
"learning_rate": 3.4026745913818724e-06,
"loss": 0.3641,
"step": 230
},
{
"epoch": 0.107095046854083,
"grad_norm": 0.3204989890164747,
"learning_rate": 3.5512630014858845e-06,
"loss": 0.3819,
"step": 240
},
{
"epoch": 0.11155734047300313,
"grad_norm": 0.3576572957193155,
"learning_rate": 3.6998514115898966e-06,
"loss": 0.3727,
"step": 250
},
{
"epoch": 0.11601963409192324,
"grad_norm": 0.35700198129793526,
"learning_rate": 3.848439821693909e-06,
"loss": 0.3563,
"step": 260
},
{
"epoch": 0.12048192771084337,
"grad_norm": 0.3192301931360082,
"learning_rate": 3.99702823179792e-06,
"loss": 0.3738,
"step": 270
},
{
"epoch": 0.1249442213297635,
"grad_norm": 0.3196766826358136,
"learning_rate": 4.145616641901932e-06,
"loss": 0.3587,
"step": 280
},
{
"epoch": 0.12940651494868363,
"grad_norm": 0.610281080707191,
"learning_rate": 4.294205052005943e-06,
"loss": 0.3664,
"step": 290
},
{
"epoch": 0.13386880856760375,
"grad_norm": 0.35583167134748195,
"learning_rate": 4.442793462109955e-06,
"loss": 0.3631,
"step": 300
},
{
"epoch": 0.13833110218652386,
"grad_norm": 0.3595334153498753,
"learning_rate": 4.591381872213967e-06,
"loss": 0.3764,
"step": 310
},
{
"epoch": 0.142793395805444,
"grad_norm": 0.3524084490333156,
"learning_rate": 4.7399702823179795e-06,
"loss": 0.3644,
"step": 320
},
{
"epoch": 0.14725568942436412,
"grad_norm": 0.3288515185706153,
"learning_rate": 4.8885586924219916e-06,
"loss": 0.3661,
"step": 330
},
{
"epoch": 0.15171798304328424,
"grad_norm": 0.3351573861983467,
"learning_rate": 5.037147102526004e-06,
"loss": 0.3532,
"step": 340
},
{
"epoch": 0.15618027666220438,
"grad_norm": 0.33700658750684853,
"learning_rate": 5.185735512630016e-06,
"loss": 0.3531,
"step": 350
},
{
"epoch": 0.1606425702811245,
"grad_norm": 0.32516141156935247,
"learning_rate": 5.334323922734027e-06,
"loss": 0.3473,
"step": 360
},
{
"epoch": 0.1651048639000446,
"grad_norm": 0.3337986135592338,
"learning_rate": 5.48291233283804e-06,
"loss": 0.3511,
"step": 370
},
{
"epoch": 0.16956715751896476,
"grad_norm": 0.3323774922346478,
"learning_rate": 5.631500742942051e-06,
"loss": 0.3586,
"step": 380
},
{
"epoch": 0.17402945113788487,
"grad_norm": 0.4792457547540971,
"learning_rate": 5.780089153046062e-06,
"loss": 0.3624,
"step": 390
},
{
"epoch": 0.178491744756805,
"grad_norm": 0.42063892972841144,
"learning_rate": 5.9286775631500745e-06,
"loss": 0.3608,
"step": 400
},
{
"epoch": 0.18295403837572513,
"grad_norm": 1.0840249935976123,
"learning_rate": 6.0772659732540865e-06,
"loss": 0.3602,
"step": 410
},
{
"epoch": 0.18741633199464525,
"grad_norm": 0.3558589627850871,
"learning_rate": 6.225854383358099e-06,
"loss": 0.3514,
"step": 420
},
{
"epoch": 0.19187862561356536,
"grad_norm": 0.40948391676162177,
"learning_rate": 6.37444279346211e-06,
"loss": 0.3568,
"step": 430
},
{
"epoch": 0.1963409192324855,
"grad_norm": 0.33870866081421813,
"learning_rate": 6.523031203566123e-06,
"loss": 0.3475,
"step": 440
},
{
"epoch": 0.20080321285140562,
"grad_norm": 0.3491791728647271,
"learning_rate": 6.671619613670134e-06,
"loss": 0.3455,
"step": 450
},
{
"epoch": 0.20526550647032574,
"grad_norm": 0.35294486681555637,
"learning_rate": 6.820208023774146e-06,
"loss": 0.358,
"step": 460
},
{
"epoch": 0.20972780008924588,
"grad_norm": 0.3561095555301197,
"learning_rate": 6.968796433878157e-06,
"loss": 0.3484,
"step": 470
},
{
"epoch": 0.214190093708166,
"grad_norm": 0.37400205186538277,
"learning_rate": 7.11738484398217e-06,
"loss": 0.3512,
"step": 480
},
{
"epoch": 0.2186523873270861,
"grad_norm": 0.368948524072989,
"learning_rate": 7.2659732540861815e-06,
"loss": 0.3564,
"step": 490
},
{
"epoch": 0.22311468094600626,
"grad_norm": 0.31992893707470843,
"learning_rate": 7.4145616641901944e-06,
"loss": 0.3491,
"step": 500
},
{
"epoch": 0.22757697456492637,
"grad_norm": 0.3196176367911485,
"learning_rate": 7.563150074294206e-06,
"loss": 0.3465,
"step": 510
},
{
"epoch": 0.2320392681838465,
"grad_norm": 0.36129211410497347,
"learning_rate": 7.711738484398219e-06,
"loss": 0.3406,
"step": 520
},
{
"epoch": 0.23650156180276663,
"grad_norm": 0.3545213437105017,
"learning_rate": 7.86032689450223e-06,
"loss": 0.3445,
"step": 530
},
{
"epoch": 0.24096385542168675,
"grad_norm": 0.35389453665592974,
"learning_rate": 8.008915304606241e-06,
"loss": 0.3459,
"step": 540
},
{
"epoch": 0.24542614904060686,
"grad_norm": 0.3543572102436423,
"learning_rate": 8.157503714710252e-06,
"loss": 0.3412,
"step": 550
},
{
"epoch": 0.249888442659527,
"grad_norm": 0.36271347062518616,
"learning_rate": 8.306092124814265e-06,
"loss": 0.3387,
"step": 560
},
{
"epoch": 0.2543507362784471,
"grad_norm": 0.4426754614509785,
"learning_rate": 8.454680534918276e-06,
"loss": 0.3491,
"step": 570
},
{
"epoch": 0.25881302989736726,
"grad_norm": 0.39369474546630295,
"learning_rate": 8.60326894502229e-06,
"loss": 0.3384,
"step": 580
},
{
"epoch": 0.26327532351628735,
"grad_norm": 0.3582656486286681,
"learning_rate": 8.7518573551263e-06,
"loss": 0.3544,
"step": 590
},
{
"epoch": 0.2677376171352075,
"grad_norm": 0.34957569136942046,
"learning_rate": 8.900445765230312e-06,
"loss": 0.3429,
"step": 600
},
{
"epoch": 0.27219991075412764,
"grad_norm": 0.3945306585524228,
"learning_rate": 9.049034175334325e-06,
"loss": 0.3478,
"step": 610
},
{
"epoch": 0.2766622043730477,
"grad_norm": 0.3424763037614921,
"learning_rate": 9.197622585438336e-06,
"loss": 0.3312,
"step": 620
},
{
"epoch": 0.28112449799196787,
"grad_norm": 0.3459851560845357,
"learning_rate": 9.346210995542349e-06,
"loss": 0.3495,
"step": 630
},
{
"epoch": 0.285586791610888,
"grad_norm": 0.3744360789694989,
"learning_rate": 9.49479940564636e-06,
"loss": 0.3483,
"step": 640
},
{
"epoch": 0.2900490852298081,
"grad_norm": 0.35814716631290894,
"learning_rate": 9.643387815750373e-06,
"loss": 0.341,
"step": 650
},
{
"epoch": 0.29451137884872824,
"grad_norm": 0.5299368509807092,
"learning_rate": 9.791976225854384e-06,
"loss": 0.3365,
"step": 660
},
{
"epoch": 0.2989736724676484,
"grad_norm": 0.38875418783791804,
"learning_rate": 9.940564635958396e-06,
"loss": 0.3424,
"step": 670
},
{
"epoch": 0.3034359660865685,
"grad_norm": 0.45411665098963555,
"learning_rate": 9.999975732158253e-06,
"loss": 0.3368,
"step": 680
},
{
"epoch": 0.3078982597054886,
"grad_norm": 0.3825559069839744,
"learning_rate": 9.999827429534007e-06,
"loss": 0.3389,
"step": 690
},
{
"epoch": 0.31236055332440876,
"grad_norm": 0.36176618602239424,
"learning_rate": 9.999544310413833e-06,
"loss": 0.3395,
"step": 700
},
{
"epoch": 0.31682284694332885,
"grad_norm": 0.40796102054124517,
"learning_rate": 9.999126382431823e-06,
"loss": 0.3435,
"step": 710
},
{
"epoch": 0.321285140562249,
"grad_norm": 0.3829565760724817,
"learning_rate": 9.998573656857085e-06,
"loss": 0.3339,
"step": 720
},
{
"epoch": 0.32574743418116914,
"grad_norm": 0.40664177051788203,
"learning_rate": 9.997886148593436e-06,
"loss": 0.3317,
"step": 730
},
{
"epoch": 0.3302097278000892,
"grad_norm": 0.3640080288216767,
"learning_rate": 9.997063876179007e-06,
"loss": 0.3452,
"step": 740
},
{
"epoch": 0.33467202141900937,
"grad_norm": 0.41068702259937845,
"learning_rate": 9.996106861785741e-06,
"loss": 0.3388,
"step": 750
},
{
"epoch": 0.3391343150379295,
"grad_norm": 0.3410374798720669,
"learning_rate": 9.995015131218794e-06,
"loss": 0.349,
"step": 760
},
{
"epoch": 0.3435966086568496,
"grad_norm": 0.37561698032089047,
"learning_rate": 9.99378871391584e-06,
"loss": 0.3375,
"step": 770
},
{
"epoch": 0.34805890227576974,
"grad_norm": 0.3844118776195893,
"learning_rate": 9.992427642946278e-06,
"loss": 0.3339,
"step": 780
},
{
"epoch": 0.3525211958946899,
"grad_norm": 0.35418994101086143,
"learning_rate": 9.990931955010335e-06,
"loss": 0.3373,
"step": 790
},
{
"epoch": 0.35698348951361,
"grad_norm": 0.3109086430490103,
"learning_rate": 9.989301690438087e-06,
"loss": 0.3255,
"step": 800
},
{
"epoch": 0.3614457831325301,
"grad_norm": 0.35125455281231016,
"learning_rate": 9.987536893188363e-06,
"loss": 0.3385,
"step": 810
},
{
"epoch": 0.36590807675145026,
"grad_norm": 0.35539785853231953,
"learning_rate": 9.98563761084756e-06,
"loss": 0.3334,
"step": 820
},
{
"epoch": 0.37037037037037035,
"grad_norm": 0.8732726071617729,
"learning_rate": 9.983603894628366e-06,
"loss": 0.3216,
"step": 830
},
{
"epoch": 0.3748326639892905,
"grad_norm": 0.3239156286291644,
"learning_rate": 9.98143579936837e-06,
"loss": 0.3395,
"step": 840
},
{
"epoch": 0.37929495760821064,
"grad_norm": 0.35416799423504325,
"learning_rate": 9.979133383528591e-06,
"loss": 0.3419,
"step": 850
},
{
"epoch": 0.3837572512271307,
"grad_norm": 0.3697582867922802,
"learning_rate": 9.976696709191899e-06,
"loss": 0.3385,
"step": 860
},
{
"epoch": 0.38821954484605087,
"grad_norm": 0.31652698452061007,
"learning_rate": 9.974125842061343e-06,
"loss": 0.3415,
"step": 870
},
{
"epoch": 0.392681838464971,
"grad_norm": 0.3806737592473093,
"learning_rate": 9.971420851458373e-06,
"loss": 0.34,
"step": 880
},
{
"epoch": 0.3971441320838911,
"grad_norm": 0.3493066568615837,
"learning_rate": 9.968581810320979e-06,
"loss": 0.3435,
"step": 890
},
{
"epoch": 0.40160642570281124,
"grad_norm": 0.3441435892323655,
"learning_rate": 9.965608795201717e-06,
"loss": 0.327,
"step": 900
},
{
"epoch": 0.4060687193217314,
"grad_norm": 0.3347599296485585,
"learning_rate": 9.96250188626565e-06,
"loss": 0.3356,
"step": 910
},
{
"epoch": 0.4105310129406515,
"grad_norm": 0.30962222868754863,
"learning_rate": 9.959261167288185e-06,
"loss": 0.3279,
"step": 920
},
{
"epoch": 0.4149933065595716,
"grad_norm": 0.48278711145118325,
"learning_rate": 9.955886725652815e-06,
"loss": 0.3358,
"step": 930
},
{
"epoch": 0.41945560017849176,
"grad_norm": 0.33458745369628223,
"learning_rate": 9.952378652348758e-06,
"loss": 0.3359,
"step": 940
},
{
"epoch": 0.42391789379741185,
"grad_norm": 0.36375248941574195,
"learning_rate": 9.948737041968509e-06,
"loss": 0.333,
"step": 950
},
{
"epoch": 0.428380187416332,
"grad_norm": 0.38000071048511164,
"learning_rate": 9.944961992705288e-06,
"loss": 0.3393,
"step": 960
},
{
"epoch": 0.43284248103525214,
"grad_norm": 0.3322040446425915,
"learning_rate": 9.941053606350389e-06,
"loss": 0.3289,
"step": 970
},
{
"epoch": 0.4373047746541722,
"grad_norm": 0.31368274498497106,
"learning_rate": 9.937011988290443e-06,
"loss": 0.323,
"step": 980
},
{
"epoch": 0.44176706827309237,
"grad_norm": 0.33379459029750647,
"learning_rate": 9.932837247504566e-06,
"loss": 0.3332,
"step": 990
},
{
"epoch": 0.4462293618920125,
"grad_norm": 0.3481057681661188,
"learning_rate": 9.928529496561428e-06,
"loss": 0.3249,
"step": 1000
},
{
"epoch": 0.4506916555109326,
"grad_norm": 0.3805120805185267,
"learning_rate": 9.924088851616216e-06,
"loss": 0.3332,
"step": 1010
},
{
"epoch": 0.45515394912985274,
"grad_norm": 0.3514086594689373,
"learning_rate": 9.919515432407502e-06,
"loss": 0.3329,
"step": 1020
},
{
"epoch": 0.4596162427487729,
"grad_norm": 0.41618025357077787,
"learning_rate": 9.914809362254013e-06,
"loss": 0.3387,
"step": 1030
},
{
"epoch": 0.464078536367693,
"grad_norm": 0.3777422639135785,
"learning_rate": 9.909970768051302e-06,
"loss": 0.3359,
"step": 1040
},
{
"epoch": 0.4685408299866131,
"grad_norm": 0.32565404467366765,
"learning_rate": 9.904999780268341e-06,
"loss": 0.3239,
"step": 1050
},
{
"epoch": 0.47300312360553326,
"grad_norm": 0.33049786269193765,
"learning_rate": 9.899896532943983e-06,
"loss": 0.3347,
"step": 1060
},
{
"epoch": 0.47746541722445335,
"grad_norm": 0.2971213231433049,
"learning_rate": 9.894661163683361e-06,
"loss": 0.3256,
"step": 1070
},
{
"epoch": 0.4819277108433735,
"grad_norm": 0.3265643495101442,
"learning_rate": 9.889293813654179e-06,
"loss": 0.3325,
"step": 1080
},
{
"epoch": 0.48639000446229363,
"grad_norm": 0.3291514794248415,
"learning_rate": 9.883794627582893e-06,
"loss": 0.327,
"step": 1090
},
{
"epoch": 0.4908522980812137,
"grad_norm": 0.3243181563194639,
"learning_rate": 9.878163753750823e-06,
"loss": 0.3311,
"step": 1100
},
{
"epoch": 0.49531459170013387,
"grad_norm": 0.31360700475820674,
"learning_rate": 9.872401343990143e-06,
"loss": 0.3349,
"step": 1110
},
{
"epoch": 0.499776885319054,
"grad_norm": 0.3427769144056513,
"learning_rate": 9.866507553679797e-06,
"loss": 0.3302,
"step": 1120
},
{
"epoch": 0.5042391789379741,
"grad_norm": 0.3268750022359849,
"learning_rate": 9.860482541741298e-06,
"loss": 0.3387,
"step": 1130
},
{
"epoch": 0.5087014725568942,
"grad_norm": 0.36779760794001126,
"learning_rate": 9.854326470634454e-06,
"loss": 0.3204,
"step": 1140
},
{
"epoch": 0.5131637661758144,
"grad_norm": 0.3437763178921102,
"learning_rate": 9.848039506352982e-06,
"loss": 0.314,
"step": 1150
},
{
"epoch": 0.5176260597947345,
"grad_norm": 0.3146556406530087,
"learning_rate": 9.841621818420032e-06,
"loss": 0.3291,
"step": 1160
},
{
"epoch": 0.5220883534136547,
"grad_norm": 0.3372062278564171,
"learning_rate": 9.835073579883614e-06,
"loss": 0.3184,
"step": 1170
},
{
"epoch": 0.5265506470325747,
"grad_norm": 0.3279517665081858,
"learning_rate": 9.82839496731194e-06,
"loss": 0.3301,
"step": 1180
},
{
"epoch": 0.5310129406514948,
"grad_norm": 0.35435668550704597,
"learning_rate": 9.821586160788652e-06,
"loss": 0.3192,
"step": 1190
},
{
"epoch": 0.535475234270415,
"grad_norm": 0.39621154055386365,
"learning_rate": 9.814647343907975e-06,
"loss": 0.3237,
"step": 1200
},
{
"epoch": 0.5399375278893351,
"grad_norm": 0.314845881198322,
"learning_rate": 9.807578703769763e-06,
"loss": 0.317,
"step": 1210
},
{
"epoch": 0.5443998215082553,
"grad_norm": 0.30404528418981586,
"learning_rate": 9.80038043097445e-06,
"loss": 0.327,
"step": 1220
},
{
"epoch": 0.5488621151271754,
"grad_norm": 0.34277234804316314,
"learning_rate": 9.793052719617921e-06,
"loss": 0.3305,
"step": 1230
},
{
"epoch": 0.5533244087460955,
"grad_norm": 0.315701512000068,
"learning_rate": 9.78559576728627e-06,
"loss": 0.3159,
"step": 1240
},
{
"epoch": 0.5577867023650156,
"grad_norm": 0.4064220753807774,
"learning_rate": 9.77800977505047e-06,
"loss": 0.3222,
"step": 1250
},
{
"epoch": 0.5622489959839357,
"grad_norm": 0.38345533251016317,
"learning_rate": 9.770294947460964e-06,
"loss": 0.3155,
"step": 1260
},
{
"epoch": 0.5667112896028559,
"grad_norm": 0.3666235687309694,
"learning_rate": 9.76245149254213e-06,
"loss": 0.3223,
"step": 1270
},
{
"epoch": 0.571173583221776,
"grad_norm": 0.3254173561944929,
"learning_rate": 9.754479621786694e-06,
"loss": 0.3253,
"step": 1280
},
{
"epoch": 0.5756358768406962,
"grad_norm": 0.36867900317972835,
"learning_rate": 9.74637955015001e-06,
"loss": 0.3234,
"step": 1290
},
{
"epoch": 0.5800981704596162,
"grad_norm": 0.3205667116149489,
"learning_rate": 9.738151496044268e-06,
"loss": 0.3238,
"step": 1300
},
{
"epoch": 0.5845604640785363,
"grad_norm": 0.35181980934522605,
"learning_rate": 9.729795681332616e-06,
"loss": 0.3212,
"step": 1310
},
{
"epoch": 0.5890227576974565,
"grad_norm": 0.3157343074157218,
"learning_rate": 9.721312331323159e-06,
"loss": 0.324,
"step": 1320
},
{
"epoch": 0.5934850513163766,
"grad_norm": 0.3267527618647797,
"learning_rate": 9.712701674762894e-06,
"loss": 0.3293,
"step": 1330
},
{
"epoch": 0.5979473449352968,
"grad_norm": 0.3709092454966587,
"learning_rate": 9.703963943831548e-06,
"loss": 0.3195,
"step": 1340
},
{
"epoch": 0.6024096385542169,
"grad_norm": 0.3404725913270717,
"learning_rate": 9.695099374135304e-06,
"loss": 0.3261,
"step": 1350
},
{
"epoch": 0.606871932173137,
"grad_norm": 0.33688175782559104,
"learning_rate": 9.686108204700456e-06,
"loss": 0.322,
"step": 1360
},
{
"epoch": 0.6113342257920571,
"grad_norm": 0.3264040693280103,
"learning_rate": 9.676990677966965e-06,
"loss": 0.3262,
"step": 1370
},
{
"epoch": 0.6157965194109772,
"grad_norm": 0.2946497602423895,
"learning_rate": 9.667747039781915e-06,
"loss": 0.3204,
"step": 1380
},
{
"epoch": 0.6202588130298974,
"grad_norm": 0.3247321923979366,
"learning_rate": 9.65837753939289e-06,
"loss": 0.3115,
"step": 1390
},
{
"epoch": 0.6247211066488175,
"grad_norm": 0.3609340508110271,
"learning_rate": 9.648882429441258e-06,
"loss": 0.3299,
"step": 1400
},
{
"epoch": 0.6291834002677377,
"grad_norm": 0.3140886682745277,
"learning_rate": 9.639261965955339e-06,
"loss": 0.3204,
"step": 1410
},
{
"epoch": 0.6336456938866577,
"grad_norm": 0.30604068166540555,
"learning_rate": 9.629516408343524e-06,
"loss": 0.3296,
"step": 1420
},
{
"epoch": 0.6381079875055778,
"grad_norm": 0.34254988617650395,
"learning_rate": 9.619646019387269e-06,
"loss": 0.3291,
"step": 1430
},
{
"epoch": 0.642570281124498,
"grad_norm": 0.34269558101387265,
"learning_rate": 9.609651065234008e-06,
"loss": 0.3308,
"step": 1440
},
{
"epoch": 0.6470325747434181,
"grad_norm": 0.3110380628077838,
"learning_rate": 9.599531815389987e-06,
"loss": 0.3231,
"step": 1450
},
{
"epoch": 0.6514948683623383,
"grad_norm": 0.3543086604772253,
"learning_rate": 9.589288542712978e-06,
"loss": 0.315,
"step": 1460
},
{
"epoch": 0.6559571619812584,
"grad_norm": 0.29688329030679655,
"learning_rate": 9.578921523404943e-06,
"loss": 0.3212,
"step": 1470
},
{
"epoch": 0.6604194556001785,
"grad_norm": 0.6477411665067504,
"learning_rate": 9.568431037004574e-06,
"loss": 0.3184,
"step": 1480
},
{
"epoch": 0.6648817492190986,
"grad_norm": 0.4105291056831329,
"learning_rate": 9.557817366379756e-06,
"loss": 0.3316,
"step": 1490
},
{
"epoch": 0.6693440428380187,
"grad_norm": 0.3108965775539251,
"learning_rate": 9.547080797719943e-06,
"loss": 0.3292,
"step": 1500
},
{
"epoch": 0.6738063364569389,
"grad_norm": 0.2925176192434652,
"learning_rate": 9.536221620528442e-06,
"loss": 0.3188,
"step": 1510
},
{
"epoch": 0.678268630075859,
"grad_norm": 0.32680827418487335,
"learning_rate": 9.5252401276146e-06,
"loss": 0.3189,
"step": 1520
},
{
"epoch": 0.6827309236947792,
"grad_norm": 0.31001306605003415,
"learning_rate": 9.514136615085917e-06,
"loss": 0.3246,
"step": 1530
},
{
"epoch": 0.6871932173136992,
"grad_norm": 0.3232212027929326,
"learning_rate": 9.502911382340056e-06,
"loss": 0.3223,
"step": 1540
},
{
"epoch": 0.6916555109326193,
"grad_norm": 0.280224760234745,
"learning_rate": 9.491564732056775e-06,
"loss": 0.3144,
"step": 1550
},
{
"epoch": 0.6961178045515395,
"grad_norm": 0.3237056713223196,
"learning_rate": 9.480096970189756e-06,
"loss": 0.3182,
"step": 1560
},
{
"epoch": 0.7005800981704596,
"grad_norm": 0.31882269101613014,
"learning_rate": 9.46850840595837e-06,
"loss": 0.3223,
"step": 1570
},
{
"epoch": 0.7050423917893798,
"grad_norm": 0.32578619260595243,
"learning_rate": 9.456799351839324e-06,
"loss": 0.3176,
"step": 1580
},
{
"epoch": 0.7095046854082999,
"grad_norm": 0.3385848752838862,
"learning_rate": 9.44497012355824e-06,
"loss": 0.3284,
"step": 1590
},
{
"epoch": 0.71396697902722,
"grad_norm": 0.3130273460464875,
"learning_rate": 9.433021040081159e-06,
"loss": 0.3195,
"step": 1600
},
{
"epoch": 0.7184292726461401,
"grad_norm": 0.3516738754933172,
"learning_rate": 9.420952423605904e-06,
"loss": 0.3267,
"step": 1610
},
{
"epoch": 0.7228915662650602,
"grad_norm": 0.3251716278941906,
"learning_rate": 9.408764599553429e-06,
"loss": 0.318,
"step": 1620
},
{
"epoch": 0.7273538598839804,
"grad_norm": 0.32226390759863277,
"learning_rate": 9.396457896559021e-06,
"loss": 0.3201,
"step": 1630
},
{
"epoch": 0.7318161535029005,
"grad_norm": 0.3035427600136032,
"learning_rate": 9.384032646463448e-06,
"loss": 0.3176,
"step": 1640
},
{
"epoch": 0.7362784471218207,
"grad_norm": 0.35729461477248853,
"learning_rate": 9.37148918430401e-06,
"loss": 0.3191,
"step": 1650
},
{
"epoch": 0.7407407407407407,
"grad_norm": 0.3103181941339719,
"learning_rate": 9.358827848305502e-06,
"loss": 0.3218,
"step": 1660
},
{
"epoch": 0.7452030343596608,
"grad_norm": 0.34994784215316105,
"learning_rate": 9.346048979871098e-06,
"loss": 0.3212,
"step": 1670
},
{
"epoch": 0.749665327978581,
"grad_norm": 0.3270663588481017,
"learning_rate": 9.333152923573146e-06,
"loss": 0.3194,
"step": 1680
},
{
"epoch": 0.7541276215975011,
"grad_norm": 0.3017394614877857,
"learning_rate": 9.320140027143869e-06,
"loss": 0.3244,
"step": 1690
},
{
"epoch": 0.7585899152164213,
"grad_norm": 0.3371418446207733,
"learning_rate": 9.307010641466e-06,
"loss": 0.3223,
"step": 1700
},
{
"epoch": 0.7630522088353414,
"grad_norm": 0.36102711717253444,
"learning_rate": 9.293765120563309e-06,
"loss": 0.3235,
"step": 1710
},
{
"epoch": 0.7675145024542614,
"grad_norm": 0.3007012064707952,
"learning_rate": 9.280403821591066e-06,
"loss": 0.3208,
"step": 1720
},
{
"epoch": 0.7719767960731816,
"grad_norm": 0.2993075622208931,
"learning_rate": 9.26692710482641e-06,
"loss": 0.3242,
"step": 1730
},
{
"epoch": 0.7764390896921017,
"grad_norm": 0.31517853620369296,
"learning_rate": 9.253335333658627e-06,
"loss": 0.3209,
"step": 1740
},
{
"epoch": 0.7809013833110219,
"grad_norm": 0.30592785176098447,
"learning_rate": 9.239628874579359e-06,
"loss": 0.3211,
"step": 1750
},
{
"epoch": 0.785363676929942,
"grad_norm": 0.3511159488999007,
"learning_rate": 9.22580809717272e-06,
"loss": 0.3159,
"step": 1760
},
{
"epoch": 0.7898259705488622,
"grad_norm": 0.3207835963226278,
"learning_rate": 9.211873374105325e-06,
"loss": 0.323,
"step": 1770
},
{
"epoch": 0.7942882641677822,
"grad_norm": 0.3169720632937591,
"learning_rate": 9.197825081116255e-06,
"loss": 0.3165,
"step": 1780
},
{
"epoch": 0.7987505577867023,
"grad_norm": 0.32507773728896977,
"learning_rate": 9.183663597006904e-06,
"loss": 0.3154,
"step": 1790
},
{
"epoch": 0.8032128514056225,
"grad_norm": 0.29822880733968704,
"learning_rate": 9.169389303630792e-06,
"loss": 0.3202,
"step": 1800
},
{
"epoch": 0.8076751450245426,
"grad_norm": 0.27944537447466095,
"learning_rate": 9.155002585883238e-06,
"loss": 0.3194,
"step": 1810
},
{
"epoch": 0.8121374386434628,
"grad_norm": 0.3257811138318699,
"learning_rate": 9.140503831691014e-06,
"loss": 0.319,
"step": 1820
},
{
"epoch": 0.8165997322623829,
"grad_norm": 0.38751538240318106,
"learning_rate": 9.125893432001856e-06,
"loss": 0.324,
"step": 1830
},
{
"epoch": 0.821062025881303,
"grad_norm": 0.35676044959951964,
"learning_rate": 9.111171780773938e-06,
"loss": 0.3207,
"step": 1840
},
{
"epoch": 0.8255243195002231,
"grad_norm": 0.33066533272987186,
"learning_rate": 9.096339274965248e-06,
"loss": 0.3225,
"step": 1850
},
{
"epoch": 0.8299866131191432,
"grad_norm": 0.30077549406173915,
"learning_rate": 9.081396314522883e-06,
"loss": 0.3156,
"step": 1860
},
{
"epoch": 0.8344489067380634,
"grad_norm": 0.4455200746218136,
"learning_rate": 9.066343302372262e-06,
"loss": 0.3208,
"step": 1870
},
{
"epoch": 0.8389112003569835,
"grad_norm": 0.29588329863262136,
"learning_rate": 9.051180644406265e-06,
"loss": 0.3158,
"step": 1880
},
{
"epoch": 0.8433734939759037,
"grad_norm": 0.34195626405047574,
"learning_rate": 9.035908749474286e-06,
"loss": 0.3048,
"step": 1890
},
{
"epoch": 0.8478357875948237,
"grad_norm": 0.6420035384465244,
"learning_rate": 9.020528029371209e-06,
"loss": 0.3072,
"step": 1900
},
{
"epoch": 0.8522980812137438,
"grad_norm": 0.3087614249615553,
"learning_rate": 9.005038898826307e-06,
"loss": 0.3178,
"step": 1910
},
{
"epoch": 0.856760374832664,
"grad_norm": 0.31279375725653125,
"learning_rate": 8.989441775492054e-06,
"loss": 0.3187,
"step": 1920
},
{
"epoch": 0.8612226684515841,
"grad_norm": 0.31745786814951604,
"learning_rate": 8.97373707993287e-06,
"loss": 0.3138,
"step": 1930
},
{
"epoch": 0.8656849620705043,
"grad_norm": 0.32729085562805416,
"learning_rate": 8.957925235613774e-06,
"loss": 0.3186,
"step": 1940
},
{
"epoch": 0.8701472556894244,
"grad_norm": 0.29401213857632397,
"learning_rate": 8.942006668888972e-06,
"loss": 0.3209,
"step": 1950
},
{
"epoch": 0.8746095493083444,
"grad_norm": 0.35071321892553325,
"learning_rate": 8.925981808990353e-06,
"loss": 0.3067,
"step": 1960
},
{
"epoch": 0.8790718429272646,
"grad_norm": 0.3254090010272678,
"learning_rate": 8.909851088015929e-06,
"loss": 0.319,
"step": 1970
},
{
"epoch": 0.8835341365461847,
"grad_norm": 0.31143570006446,
"learning_rate": 8.89361494091816e-06,
"loss": 0.3173,
"step": 1980
},
{
"epoch": 0.8879964301651049,
"grad_norm": 0.3368629213736036,
"learning_rate": 8.877273805492251e-06,
"loss": 0.3142,
"step": 1990
},
{
"epoch": 0.892458723784025,
"grad_norm": 0.32123265502775333,
"learning_rate": 8.860828122364333e-06,
"loss": 0.3114,
"step": 2000
},
{
"epoch": 0.8969210174029452,
"grad_norm": 0.32040081950305627,
"learning_rate": 8.844278334979587e-06,
"loss": 0.3173,
"step": 2010
},
{
"epoch": 0.9013833110218652,
"grad_norm": 0.31575327422413973,
"learning_rate": 8.82762488959028e-06,
"loss": 0.3161,
"step": 2020
},
{
"epoch": 0.9058456046407853,
"grad_norm": 0.2756449995981234,
"learning_rate": 8.810868235243746e-06,
"loss": 0.3127,
"step": 2030
},
{
"epoch": 0.9103078982597055,
"grad_norm": 0.3264003117051716,
"learning_rate": 8.79400882377026e-06,
"loss": 0.3122,
"step": 2040
},
{
"epoch": 0.9147701918786256,
"grad_norm": 0.32023283877896064,
"learning_rate": 8.777047109770872e-06,
"loss": 0.3161,
"step": 2050
},
{
"epoch": 0.9192324854975458,
"grad_norm": 0.32023324438450435,
"learning_rate": 8.759983550605132e-06,
"loss": 0.3075,
"step": 2060
},
{
"epoch": 0.9236947791164659,
"grad_norm": 0.3495626563053784,
"learning_rate": 8.74281860637877e-06,
"loss": 0.321,
"step": 2070
},
{
"epoch": 0.928157072735386,
"grad_norm": 0.365734666051995,
"learning_rate": 8.725552739931295e-06,
"loss": 0.3172,
"step": 2080
},
{
"epoch": 0.9326193663543061,
"grad_norm": 0.5195262244868563,
"learning_rate": 8.70818641682349e-06,
"loss": 0.3114,
"step": 2090
},
{
"epoch": 0.9370816599732262,
"grad_norm": 0.31318241394583657,
"learning_rate": 8.690720105324887e-06,
"loss": 0.3145,
"step": 2100
},
{
"epoch": 0.9415439535921464,
"grad_norm": 0.30511794190937597,
"learning_rate": 8.673154276401123e-06,
"loss": 0.3107,
"step": 2110
},
{
"epoch": 0.9460062472110665,
"grad_norm": 0.33299848484970584,
"learning_rate": 8.655489403701244e-06,
"loss": 0.3199,
"step": 2120
},
{
"epoch": 0.9504685408299867,
"grad_norm": 0.2918060481120866,
"learning_rate": 8.63772596354494e-06,
"loss": 0.3241,
"step": 2130
},
{
"epoch": 0.9549308344489067,
"grad_norm": 0.3267067284562192,
"learning_rate": 8.619864434909692e-06,
"loss": 0.3144,
"step": 2140
},
{
"epoch": 0.9593931280678268,
"grad_norm": 0.3620884501681288,
"learning_rate": 8.601905299417865e-06,
"loss": 0.3064,
"step": 2150
},
{
"epoch": 0.963855421686747,
"grad_norm": 0.3385652298929547,
"learning_rate": 8.583849041323717e-06,
"loss": 0.3167,
"step": 2160
},
{
"epoch": 0.9683177153056671,
"grad_norm": 0.31358881851824566,
"learning_rate": 8.565696147500338e-06,
"loss": 0.3173,
"step": 2170
},
{
"epoch": 0.9727800089245873,
"grad_norm": 0.26819324932064814,
"learning_rate": 8.54744710742653e-06,
"loss": 0.3098,
"step": 2180
},
{
"epoch": 0.9772423025435074,
"grad_norm": 0.3193733001824146,
"learning_rate": 8.529102413173605e-06,
"loss": 0.3019,
"step": 2190
},
{
"epoch": 0.9817045961624274,
"grad_norm": 0.28919148613320567,
"learning_rate": 8.510662559392113e-06,
"loss": 0.313,
"step": 2200
},
{
"epoch": 0.9861668897813476,
"grad_norm": 0.30310308101898775,
"learning_rate": 8.492128043298511e-06,
"loss": 0.3229,
"step": 2210
},
{
"epoch": 0.9906291834002677,
"grad_norm": 0.31282347332567895,
"learning_rate": 8.47349936466175e-06,
"loss": 0.3159,
"step": 2220
},
{
"epoch": 0.9950914770191879,
"grad_norm": 0.28804399192631563,
"learning_rate": 8.454777025789805e-06,
"loss": 0.313,
"step": 2230
},
{
"epoch": 0.999553770638108,
"grad_norm": 0.2868177500862966,
"learning_rate": 8.435961531516119e-06,
"loss": 0.2984,
"step": 2240
},
{
"epoch": 1.0040160642570282,
"grad_norm": 0.3322502574109939,
"learning_rate": 8.417053389186009e-06,
"loss": 0.2982,
"step": 2250
},
{
"epoch": 1.0084783578759482,
"grad_norm": 0.31601076209209755,
"learning_rate": 8.398053108642966e-06,
"loss": 0.2938,
"step": 2260
},
{
"epoch": 1.0129406514948684,
"grad_norm": 0.3085084676907704,
"learning_rate": 8.378961202214927e-06,
"loss": 0.3022,
"step": 2270
},
{
"epoch": 1.0174029451137885,
"grad_norm": 0.3030562804942466,
"learning_rate": 8.35977818470044e-06,
"loss": 0.2854,
"step": 2280
},
{
"epoch": 1.0218652387327085,
"grad_norm": 0.30673490305127743,
"learning_rate": 8.3405045733548e-06,
"loss": 0.2937,
"step": 2290
},
{
"epoch": 1.0263275323516288,
"grad_norm": 0.3079905769883883,
"learning_rate": 8.321140887876093e-06,
"loss": 0.2937,
"step": 2300
},
{
"epoch": 1.0307898259705488,
"grad_norm": 0.2833185510901884,
"learning_rate": 8.301687650391184e-06,
"loss": 0.2911,
"step": 2310
},
{
"epoch": 1.035252119589469,
"grad_norm": 0.2928224557074968,
"learning_rate": 8.28214538544164e-06,
"loss": 0.2925,
"step": 2320
},
{
"epoch": 1.039714413208389,
"grad_norm": 0.3119622799871382,
"learning_rate": 8.262514619969583e-06,
"loss": 0.2928,
"step": 2330
},
{
"epoch": 1.0441767068273093,
"grad_norm": 0.2909193051250449,
"learning_rate": 8.242795883303489e-06,
"loss": 0.2913,
"step": 2340
},
{
"epoch": 1.0486390004462294,
"grad_norm": 0.2878712035948744,
"learning_rate": 8.222989707143903e-06,
"loss": 0.2963,
"step": 2350
},
{
"epoch": 1.0531012940651494,
"grad_norm": 0.31820465000033393,
"learning_rate": 8.203096625549109e-06,
"loss": 0.293,
"step": 2360
},
{
"epoch": 1.0575635876840697,
"grad_norm": 0.3374605509762875,
"learning_rate": 8.183117174920733e-06,
"loss": 0.2946,
"step": 2370
},
{
"epoch": 1.0620258813029897,
"grad_norm": 0.3020068367212028,
"learning_rate": 8.163051893989273e-06,
"loss": 0.2925,
"step": 2380
},
{
"epoch": 1.06648817492191,
"grad_norm": 0.31068593851856324,
"learning_rate": 8.142901323799578e-06,
"loss": 0.2981,
"step": 2390
},
{
"epoch": 1.07095046854083,
"grad_norm": 0.3066541366441537,
"learning_rate": 8.122666007696251e-06,
"loss": 0.2916,
"step": 2400
},
{
"epoch": 1.07541276215975,
"grad_norm": 0.2898134349503606,
"learning_rate": 8.102346491309007e-06,
"loss": 0.2887,
"step": 2410
},
{
"epoch": 1.0798750557786703,
"grad_norm": 0.3017984948209697,
"learning_rate": 8.081943322537958e-06,
"loss": 0.2975,
"step": 2420
},
{
"epoch": 1.0843373493975903,
"grad_norm": 0.3106616772867366,
"learning_rate": 8.061457051538832e-06,
"loss": 0.2934,
"step": 2430
},
{
"epoch": 1.0887996430165106,
"grad_norm": 0.3117195701092512,
"learning_rate": 8.040888230708153e-06,
"loss": 0.2887,
"step": 2440
},
{
"epoch": 1.0932619366354306,
"grad_norm": 0.324577741353284,
"learning_rate": 8.02023741466833e-06,
"loss": 0.2886,
"step": 2450
},
{
"epoch": 1.0977242302543506,
"grad_norm": 0.31435192682062346,
"learning_rate": 7.999505160252712e-06,
"loss": 0.2913,
"step": 2460
},
{
"epoch": 1.1021865238732709,
"grad_norm": 0.30511195151083603,
"learning_rate": 7.978692026490576e-06,
"loss": 0.2963,
"step": 2470
},
{
"epoch": 1.106648817492191,
"grad_norm": 0.30143500498716036,
"learning_rate": 7.957798574592042e-06,
"loss": 0.2904,
"step": 2480
},
{
"epoch": 1.1111111111111112,
"grad_norm": 0.3029934237549778,
"learning_rate": 7.936825367932947e-06,
"loss": 0.2874,
"step": 2490
},
{
"epoch": 1.1155734047300312,
"grad_norm": 0.310379596751207,
"learning_rate": 7.91577297203966e-06,
"loss": 0.2864,
"step": 2500
},
{
"epoch": 1.1200356983489514,
"grad_norm": 0.2756683563500154,
"learning_rate": 7.89464195457382e-06,
"loss": 0.2838,
"step": 2510
},
{
"epoch": 1.1244979919678715,
"grad_norm": 0.2886788009666996,
"learning_rate": 7.873432885317036e-06,
"loss": 0.2892,
"step": 2520
},
{
"epoch": 1.1289602855867917,
"grad_norm": 0.6175230364141268,
"learning_rate": 7.852146336155531e-06,
"loss": 0.2961,
"step": 2530
},
{
"epoch": 1.1334225792057118,
"grad_norm": 0.33735487775658257,
"learning_rate": 7.830782881064707e-06,
"loss": 0.2989,
"step": 2540
},
{
"epoch": 1.1378848728246318,
"grad_norm": 0.2815200857691379,
"learning_rate": 7.809343096093676e-06,
"loss": 0.2918,
"step": 2550
},
{
"epoch": 1.142347166443552,
"grad_norm": 0.3190460322083763,
"learning_rate": 7.787827559349727e-06,
"loss": 0.2992,
"step": 2560
},
{
"epoch": 1.146809460062472,
"grad_norm": 0.316056300209666,
"learning_rate": 7.766236850982739e-06,
"loss": 0.2882,
"step": 2570
},
{
"epoch": 1.1512717536813923,
"grad_norm": 0.30178501428282056,
"learning_rate": 7.744571553169534e-06,
"loss": 0.2925,
"step": 2580
},
{
"epoch": 1.1557340473003124,
"grad_norm": 0.3001665670771815,
"learning_rate": 7.722832250098183e-06,
"loss": 0.2901,
"step": 2590
},
{
"epoch": 1.1601963409192324,
"grad_norm": 0.29583953397424706,
"learning_rate": 7.701019527952248e-06,
"loss": 0.2971,
"step": 2600
},
{
"epoch": 1.1646586345381527,
"grad_norm": 0.3241701984511909,
"learning_rate": 7.679133974894984e-06,
"loss": 0.2951,
"step": 2610
},
{
"epoch": 1.1691209281570727,
"grad_norm": 0.31677851739186413,
"learning_rate": 7.657176181053472e-06,
"loss": 0.3,
"step": 2620
},
{
"epoch": 1.173583221775993,
"grad_norm": 0.3740238542554803,
"learning_rate": 7.635146738502714e-06,
"loss": 0.2934,
"step": 2630
},
{
"epoch": 1.178045515394913,
"grad_norm": 0.31202486519983047,
"learning_rate": 7.6130462412496605e-06,
"loss": 0.2999,
"step": 2640
},
{
"epoch": 1.182507809013833,
"grad_norm": 0.3488189130672866,
"learning_rate": 7.590875285217201e-06,
"loss": 0.2913,
"step": 2650
},
{
"epoch": 1.1869701026327533,
"grad_norm": 0.31070266182233724,
"learning_rate": 7.568634468228085e-06,
"loss": 0.2847,
"step": 2660
},
{
"epoch": 1.1914323962516733,
"grad_norm": 0.29933114524992155,
"learning_rate": 7.546324389988817e-06,
"loss": 0.3017,
"step": 2670
},
{
"epoch": 1.1958946898705936,
"grad_norm": 0.29798742777776227,
"learning_rate": 7.5239456520734726e-06,
"loss": 0.2972,
"step": 2680
},
{
"epoch": 1.2003569834895136,
"grad_norm": 0.3062412855044856,
"learning_rate": 7.501498857907485e-06,
"loss": 0.2845,
"step": 2690
},
{
"epoch": 1.2048192771084336,
"grad_norm": 0.27470746072655944,
"learning_rate": 7.478984612751371e-06,
"loss": 0.2863,
"step": 2700
},
{
"epoch": 1.2092815707273539,
"grad_norm": 0.27866953124323884,
"learning_rate": 7.456403523684412e-06,
"loss": 0.2863,
"step": 2710
},
{
"epoch": 1.213743864346274,
"grad_norm": 0.2909116045188707,
"learning_rate": 7.433756199588282e-06,
"loss": 0.2926,
"step": 2720
},
{
"epoch": 1.2182061579651942,
"grad_norm": 0.29453973806838746,
"learning_rate": 7.411043251130634e-06,
"loss": 0.3008,
"step": 2730
},
{
"epoch": 1.2226684515841142,
"grad_norm": 0.31106123745886194,
"learning_rate": 7.388265290748631e-06,
"loss": 0.2966,
"step": 2740
},
{
"epoch": 1.2271307452030344,
"grad_norm": 0.3011866488239261,
"learning_rate": 7.36542293263243e-06,
"loss": 0.2937,
"step": 2750
},
{
"epoch": 1.2315930388219545,
"grad_norm": 0.2940575796812627,
"learning_rate": 7.342516792708627e-06,
"loss": 0.2852,
"step": 2760
},
{
"epoch": 1.2360553324408747,
"grad_norm": 0.26936074456201886,
"learning_rate": 7.319547488623642e-06,
"loss": 0.2921,
"step": 2770
},
{
"epoch": 1.2405176260597948,
"grad_norm": 0.290558981897068,
"learning_rate": 7.296515639727071e-06,
"loss": 0.2951,
"step": 2780
},
{
"epoch": 1.2449799196787148,
"grad_norm": 0.2852457393680859,
"learning_rate": 7.273421867054979e-06,
"loss": 0.2883,
"step": 2790
},
{
"epoch": 1.249442213297635,
"grad_norm": 0.3014603513538629,
"learning_rate": 7.250266793313161e-06,
"loss": 0.2865,
"step": 2800
},
{
"epoch": 1.253904506916555,
"grad_norm": 0.31555167954226493,
"learning_rate": 7.2270510428603465e-06,
"loss": 0.2872,
"step": 2810
},
{
"epoch": 1.2583668005354753,
"grad_norm": 0.30225060098084444,
"learning_rate": 7.2037752416913684e-06,
"loss": 0.2917,
"step": 2820
},
{
"epoch": 1.2628290941543954,
"grad_norm": 0.3044780265386901,
"learning_rate": 7.180440017420277e-06,
"loss": 0.2928,
"step": 2830
},
{
"epoch": 1.2672913877733154,
"grad_norm": 0.28549429454817354,
"learning_rate": 7.157045999263423e-06,
"loss": 0.2966,
"step": 2840
},
{
"epoch": 1.2717536813922357,
"grad_norm": 0.3366282530568157,
"learning_rate": 7.13359381802249e-06,
"loss": 0.2934,
"step": 2850
},
{
"epoch": 1.2762159750111557,
"grad_norm": 0.37361922324450697,
"learning_rate": 7.110084106067483e-06,
"loss": 0.2838,
"step": 2860
},
{
"epoch": 1.280678268630076,
"grad_norm": 0.3167267835607769,
"learning_rate": 7.0865174973196746e-06,
"loss": 0.303,
"step": 2870
},
{
"epoch": 1.285140562248996,
"grad_norm": 0.3151077338868855,
"learning_rate": 7.062894627234525e-06,
"loss": 0.286,
"step": 2880
},
{
"epoch": 1.289602855867916,
"grad_norm": 0.2948165918607916,
"learning_rate": 7.039216132784528e-06,
"loss": 0.2823,
"step": 2890
},
{
"epoch": 1.2940651494868363,
"grad_norm": 0.34833417956591767,
"learning_rate": 7.0154826524420506e-06,
"loss": 0.282,
"step": 2900
},
{
"epoch": 1.2985274431057563,
"grad_norm": 0.29607288645591945,
"learning_rate": 6.9916948261621145e-06,
"loss": 0.2903,
"step": 2910
},
{
"epoch": 1.3029897367246766,
"grad_norm": 0.3141375337783596,
"learning_rate": 6.96785329536513e-06,
"loss": 0.3022,
"step": 2920
},
{
"epoch": 1.3074520303435966,
"grad_norm": 0.3094386182548052,
"learning_rate": 6.943958702919618e-06,
"loss": 0.2963,
"step": 2930
},
{
"epoch": 1.3119143239625166,
"grad_norm": 0.29702645469864675,
"learning_rate": 6.9200116931248575e-06,
"loss": 0.2925,
"step": 2940
},
{
"epoch": 1.3163766175814369,
"grad_norm": 0.3131467834081039,
"learning_rate": 6.896012911693527e-06,
"loss": 0.2926,
"step": 2950
},
{
"epoch": 1.320838911200357,
"grad_norm": 0.30420307046401684,
"learning_rate": 6.871963005734283e-06,
"loss": 0.2914,
"step": 2960
},
{
"epoch": 1.3253012048192772,
"grad_norm": 0.2874880602952481,
"learning_rate": 6.847862623734316e-06,
"loss": 0.2907,
"step": 2970
},
{
"epoch": 1.3297634984381972,
"grad_norm": 0.31117439728389396,
"learning_rate": 6.823712415541867e-06,
"loss": 0.2906,
"step": 2980
},
{
"epoch": 1.3342257920571172,
"grad_norm": 0.3154785866126041,
"learning_rate": 6.7995130323486995e-06,
"loss": 0.2905,
"step": 2990
},
{
"epoch": 1.3386880856760375,
"grad_norm": 0.3162007988121743,
"learning_rate": 6.775265126672544e-06,
"loss": 0.2944,
"step": 3000
},
{
"epoch": 1.3431503792949577,
"grad_norm": 0.2996244368117481,
"learning_rate": 6.750969352339503e-06,
"loss": 0.2791,
"step": 3010
},
{
"epoch": 1.3476126729138778,
"grad_norm": 0.31285203943822604,
"learning_rate": 6.726626364466418e-06,
"loss": 0.2978,
"step": 3020
},
{
"epoch": 1.3520749665327978,
"grad_norm": 0.3069565443579846,
"learning_rate": 6.70223681944321e-06,
"loss": 0.2887,
"step": 3030
},
{
"epoch": 1.356537260151718,
"grad_norm": 0.3555134690217621,
"learning_rate": 6.677801374915175e-06,
"loss": 0.2938,
"step": 3040
},
{
"epoch": 1.360999553770638,
"grad_norm": 0.32323874683363596,
"learning_rate": 6.653320689765257e-06,
"loss": 0.2878,
"step": 3050
},
{
"epoch": 1.3654618473895583,
"grad_norm": 0.2958320258483459,
"learning_rate": 6.628795424096276e-06,
"loss": 0.2846,
"step": 3060
},
{
"epoch": 1.3699241410084784,
"grad_norm": 0.29591479942269067,
"learning_rate": 6.604226239213131e-06,
"loss": 0.2912,
"step": 3070
},
{
"epoch": 1.3743864346273984,
"grad_norm": 0.275955956029984,
"learning_rate": 6.579613797604971e-06,
"loss": 0.2841,
"step": 3080
},
{
"epoch": 1.3788487282463187,
"grad_norm": 0.3169860752144058,
"learning_rate": 6.554958762927328e-06,
"loss": 0.2911,
"step": 3090
},
{
"epoch": 1.3833110218652387,
"grad_norm": 0.2965804636519538,
"learning_rate": 6.530261799984225e-06,
"loss": 0.289,
"step": 3100
},
{
"epoch": 1.387773315484159,
"grad_norm": 0.3240128684238396,
"learning_rate": 6.5055235747102456e-06,
"loss": 0.2914,
"step": 3110
},
{
"epoch": 1.392235609103079,
"grad_norm": 0.27893092037168543,
"learning_rate": 6.480744754152581e-06,
"loss": 0.2904,
"step": 3120
},
{
"epoch": 1.396697902721999,
"grad_norm": 0.30817444273855016,
"learning_rate": 6.455926006453045e-06,
"loss": 0.2888,
"step": 3130
},
{
"epoch": 1.4011601963409193,
"grad_norm": 0.29700021328473286,
"learning_rate": 6.431068000830054e-06,
"loss": 0.2892,
"step": 3140
},
{
"epoch": 1.4056224899598393,
"grad_norm": 0.31253083958098826,
"learning_rate": 6.406171407560587e-06,
"loss": 0.2939,
"step": 3150
},
{
"epoch": 1.4100847835787595,
"grad_norm": 0.2949725175395465,
"learning_rate": 6.381236897962102e-06,
"loss": 0.2918,
"step": 3160
},
{
"epoch": 1.4145470771976796,
"grad_norm": 0.2835765420987079,
"learning_rate": 6.356265144374451e-06,
"loss": 0.2912,
"step": 3170
},
{
"epoch": 1.4190093708165996,
"grad_norm": 0.2911266952336509,
"learning_rate": 6.3312568201417335e-06,
"loss": 0.2863,
"step": 3180
},
{
"epoch": 1.4234716644355199,
"grad_norm": 0.2975023838090645,
"learning_rate": 6.306212599594155e-06,
"loss": 0.287,
"step": 3190
},
{
"epoch": 1.42793395805444,
"grad_norm": 0.33944728741572633,
"learning_rate": 6.281133158029833e-06,
"loss": 0.28,
"step": 3200
},
{
"epoch": 1.4323962516733602,
"grad_norm": 0.29846101119632584,
"learning_rate": 6.256019171696595e-06,
"loss": 0.2804,
"step": 3210
},
{
"epoch": 1.4368585452922802,
"grad_norm": 0.28248180975947623,
"learning_rate": 6.230871317773737e-06,
"loss": 0.2926,
"step": 3220
},
{
"epoch": 1.4413208389112002,
"grad_norm": 0.2851276821074005,
"learning_rate": 6.205690274353775e-06,
"loss": 0.2922,
"step": 3230
},
{
"epoch": 1.4457831325301205,
"grad_norm": 0.39429209152368455,
"learning_rate": 6.1804767204241515e-06,
"loss": 0.2836,
"step": 3240
},
{
"epoch": 1.4502454261490407,
"grad_norm": 0.28036841762852593,
"learning_rate": 6.155231335848927e-06,
"loss": 0.297,
"step": 3250
},
{
"epoch": 1.4547077197679608,
"grad_norm": 0.31725786994860594,
"learning_rate": 6.129954801350455e-06,
"loss": 0.2912,
"step": 3260
},
{
"epoch": 1.4591700133868808,
"grad_norm": 0.3367577417882933,
"learning_rate": 6.1046477984910215e-06,
"loss": 0.2941,
"step": 3270
},
{
"epoch": 1.463632307005801,
"grad_norm": 0.3029711035622468,
"learning_rate": 6.079311009654466e-06,
"loss": 0.2714,
"step": 3280
},
{
"epoch": 1.468094600624721,
"grad_norm": 0.36320361509650795,
"learning_rate": 6.053945118027789e-06,
"loss": 0.2998,
"step": 3290
},
{
"epoch": 1.4725568942436413,
"grad_norm": 0.3004963431679098,
"learning_rate": 6.028550807582718e-06,
"loss": 0.2841,
"step": 3300
},
{
"epoch": 1.4770191878625614,
"grad_norm": 0.28832303322372754,
"learning_rate": 6.00312876305728e-06,
"loss": 0.2907,
"step": 3310
},
{
"epoch": 1.4814814814814814,
"grad_norm": 0.2738988460065302,
"learning_rate": 5.977679669937325e-06,
"loss": 0.2865,
"step": 3320
},
{
"epoch": 1.4859437751004017,
"grad_norm": 0.3055752634708775,
"learning_rate": 5.952204214438049e-06,
"loss": 0.2886,
"step": 3330
},
{
"epoch": 1.4904060687193217,
"grad_norm": 0.3716780982022642,
"learning_rate": 5.926703083485488e-06,
"loss": 0.2906,
"step": 3340
},
{
"epoch": 1.494868362338242,
"grad_norm": 0.324100431102628,
"learning_rate": 5.901176964698e-06,
"loss": 0.2857,
"step": 3350
},
{
"epoch": 1.499330655957162,
"grad_norm": 0.27889777787988906,
"learning_rate": 5.875626546367716e-06,
"loss": 0.292,
"step": 3360
},
{
"epoch": 1.503792949576082,
"grad_norm": 0.30807078683693484,
"learning_rate": 5.850052517441991e-06,
"loss": 0.2963,
"step": 3370
},
{
"epoch": 1.5082552431950023,
"grad_norm": 0.2762209648675528,
"learning_rate": 5.824455567504817e-06,
"loss": 0.286,
"step": 3380
},
{
"epoch": 1.5127175368139225,
"grad_norm": 0.33369974271939384,
"learning_rate": 5.798836386758235e-06,
"loss": 0.2954,
"step": 3390
},
{
"epoch": 1.5171798304328425,
"grad_norm": 0.2860459702457261,
"learning_rate": 5.773195666003724e-06,
"loss": 0.2938,
"step": 3400
},
{
"epoch": 1.5216421240517626,
"grad_norm": 0.2779676376622302,
"learning_rate": 5.747534096623569e-06,
"loss": 0.2876,
"step": 3410
},
{
"epoch": 1.5261044176706826,
"grad_norm": 0.2955504033746034,
"learning_rate": 5.7218523705622275e-06,
"loss": 0.2929,
"step": 3420
},
{
"epoch": 1.5305667112896029,
"grad_norm": 0.2980806596476541,
"learning_rate": 5.696151180307661e-06,
"loss": 0.2925,
"step": 3430
},
{
"epoch": 1.5350290049085231,
"grad_norm": 0.2794624988961113,
"learning_rate": 5.670431218872672e-06,
"loss": 0.2931,
"step": 3440
},
{
"epoch": 1.5394912985274432,
"grad_norm": 0.2786208372363059,
"learning_rate": 5.644693179776213e-06,
"loss": 0.2974,
"step": 3450
},
{
"epoch": 1.5439535921463632,
"grad_norm": 0.27275146705308617,
"learning_rate": 5.618937757024683e-06,
"loss": 0.2912,
"step": 3460
},
{
"epoch": 1.5484158857652832,
"grad_norm": 0.27976748800298595,
"learning_rate": 5.593165645093222e-06,
"loss": 0.2951,
"step": 3470
},
{
"epoch": 1.5528781793842035,
"grad_norm": 0.2818438825086793,
"learning_rate": 5.567377538906977e-06,
"loss": 0.2923,
"step": 3480
},
{
"epoch": 1.5573404730031237,
"grad_norm": 0.27434503383015063,
"learning_rate": 5.541574133822374e-06,
"loss": 0.2903,
"step": 3490
},
{
"epoch": 1.5618027666220438,
"grad_norm": 0.28657591757615636,
"learning_rate": 5.515756125608355e-06,
"loss": 0.288,
"step": 3500
},
{
"epoch": 1.5662650602409638,
"grad_norm": 0.27282164230782224,
"learning_rate": 5.489924210427628e-06,
"loss": 0.2896,
"step": 3510
},
{
"epoch": 1.5707273538598838,
"grad_norm": 0.417801968862151,
"learning_rate": 5.464079084817892e-06,
"loss": 0.284,
"step": 3520
},
{
"epoch": 1.575189647478804,
"grad_norm": 0.29465703843658075,
"learning_rate": 5.4382214456730546e-06,
"loss": 0.2918,
"step": 3530
},
{
"epoch": 1.5796519410977243,
"grad_norm": 0.28542382527132004,
"learning_rate": 5.412351990224438e-06,
"loss": 0.2857,
"step": 3540
},
{
"epoch": 1.5841142347166444,
"grad_norm": 0.2693415075013077,
"learning_rate": 5.386471416021987e-06,
"loss": 0.2833,
"step": 3550
},
{
"epoch": 1.5885765283355644,
"grad_norm": 0.28674507691911566,
"learning_rate": 5.36058042091545e-06,
"loss": 0.2788,
"step": 3560
},
{
"epoch": 1.5930388219544847,
"grad_norm": 0.28366032186224716,
"learning_rate": 5.33467970303557e-06,
"loss": 0.2867,
"step": 3570
},
{
"epoch": 1.5975011155734047,
"grad_norm": 0.2953521127402904,
"learning_rate": 5.308769960775257e-06,
"loss": 0.2939,
"step": 3580
},
{
"epoch": 1.601963409192325,
"grad_norm": 0.2888287000335896,
"learning_rate": 5.28285189277076e-06,
"loss": 0.2905,
"step": 3590
},
{
"epoch": 1.606425702811245,
"grad_norm": 0.27937721122117365,
"learning_rate": 5.2569261978828155e-06,
"loss": 0.2982,
"step": 3600
},
{
"epoch": 1.610887996430165,
"grad_norm": 0.28062178431057444,
"learning_rate": 5.230993575177823e-06,
"loss": 0.2925,
"step": 3610
},
{
"epoch": 1.6153502900490853,
"grad_norm": 0.2959051465736045,
"learning_rate": 5.2050547239089796e-06,
"loss": 0.2766,
"step": 3620
},
{
"epoch": 1.6198125836680055,
"grad_norm": 0.3167636433656721,
"learning_rate": 5.179110343497432e-06,
"loss": 0.2921,
"step": 3630
},
{
"epoch": 1.6242748772869255,
"grad_norm": 0.27553361706720186,
"learning_rate": 5.15316113351342e-06,
"loss": 0.2866,
"step": 3640
},
{
"epoch": 1.6287371709058456,
"grad_norm": 0.281198216094774,
"learning_rate": 5.1272077936574005e-06,
"loss": 0.2869,
"step": 3650
},
{
"epoch": 1.6331994645247656,
"grad_norm": 0.28566411066751285,
"learning_rate": 5.1012510237411975e-06,
"loss": 0.2874,
"step": 3660
},
{
"epoch": 1.6376617581436859,
"grad_norm": 0.27128449582175607,
"learning_rate": 5.075291523669118e-06,
"loss": 0.2771,
"step": 3670
},
{
"epoch": 1.6421240517626061,
"grad_norm": 0.25597160345393283,
"learning_rate": 5.049329993419092e-06,
"loss": 0.2882,
"step": 3680
},
{
"epoch": 1.6465863453815262,
"grad_norm": 0.27857161981462486,
"learning_rate": 5.023367133023784e-06,
"loss": 0.2942,
"step": 3690
},
{
"epoch": 1.6510486390004462,
"grad_norm": 0.2862312874917413,
"learning_rate": 4.997403642551733e-06,
"loss": 0.2949,
"step": 3700
},
{
"epoch": 1.6555109326193662,
"grad_norm": 0.2553590681517932,
"learning_rate": 4.971440222088459e-06,
"loss": 0.2823,
"step": 3710
},
{
"epoch": 1.6599732262382865,
"grad_norm": 0.2806614787948605,
"learning_rate": 4.945477571717602e-06,
"loss": 0.2946,
"step": 3720
},
{
"epoch": 1.6644355198572067,
"grad_norm": 0.3072907017218825,
"learning_rate": 4.91951639150203e-06,
"loss": 0.2814,
"step": 3730
},
{
"epoch": 1.6688978134761268,
"grad_norm": 0.2874398898980506,
"learning_rate": 4.8935573814649765e-06,
"loss": 0.2915,
"step": 3740
},
{
"epoch": 1.6733601070950468,
"grad_norm": 0.28754909498663717,
"learning_rate": 4.867601241571153e-06,
"loss": 0.2883,
"step": 3750
},
{
"epoch": 1.6778224007139668,
"grad_norm": 0.273658543396969,
"learning_rate": 4.841648671707881e-06,
"loss": 0.2829,
"step": 3760
},
{
"epoch": 1.682284694332887,
"grad_norm": 0.288492021314724,
"learning_rate": 4.815700371666219e-06,
"loss": 0.2897,
"step": 3770
},
{
"epoch": 1.6867469879518073,
"grad_norm": 0.302492958266678,
"learning_rate": 4.789757041122093e-06,
"loss": 0.2806,
"step": 3780
},
{
"epoch": 1.6912092815707274,
"grad_norm": 0.2995952463789012,
"learning_rate": 4.763819379617432e-06,
"loss": 0.2896,
"step": 3790
},
{
"epoch": 1.6956715751896474,
"grad_norm": 0.30875546003288895,
"learning_rate": 4.737888086541298e-06,
"loss": 0.2859,
"step": 3800
},
{
"epoch": 1.7001338688085676,
"grad_norm": 0.2944845035908801,
"learning_rate": 4.711963861111043e-06,
"loss": 0.3009,
"step": 3810
},
{
"epoch": 1.7045961624274877,
"grad_norm": 0.3619439737256338,
"learning_rate": 4.686047402353433e-06,
"loss": 0.2841,
"step": 3820
},
{
"epoch": 1.709058456046408,
"grad_norm": 0.29449858921532607,
"learning_rate": 4.660139409085825e-06,
"loss": 0.2935,
"step": 3830
},
{
"epoch": 1.713520749665328,
"grad_norm": 0.2829699337998857,
"learning_rate": 4.634240579897299e-06,
"loss": 0.2921,
"step": 3840
},
{
"epoch": 1.717983043284248,
"grad_norm": 0.2600468326834046,
"learning_rate": 4.608351613129841e-06,
"loss": 0.2835,
"step": 3850
},
{
"epoch": 1.7224453369031683,
"grad_norm": 0.2793462803832517,
"learning_rate": 4.582473206859498e-06,
"loss": 0.2882,
"step": 3860
},
{
"epoch": 1.7269076305220885,
"grad_norm": 0.2868869922030311,
"learning_rate": 4.556606058877567e-06,
"loss": 0.2883,
"step": 3870
},
{
"epoch": 1.7313699241410085,
"grad_norm": 0.3862045749220281,
"learning_rate": 4.530750866671769e-06,
"loss": 0.2924,
"step": 3880
},
{
"epoch": 1.7358322177599286,
"grad_norm": 0.2561819626783043,
"learning_rate": 4.504908327407452e-06,
"loss": 0.2902,
"step": 3890
},
{
"epoch": 1.7402945113788486,
"grad_norm": 0.26603791907774466,
"learning_rate": 4.479079137908781e-06,
"loss": 0.2753,
"step": 3900
},
{
"epoch": 1.7447568049977689,
"grad_norm": 0.30516815757249277,
"learning_rate": 4.453263994639959e-06,
"loss": 0.293,
"step": 3910
},
{
"epoch": 1.7492190986166891,
"grad_norm": 0.2689881363760943,
"learning_rate": 4.427463593686442e-06,
"loss": 0.2947,
"step": 3920
},
{
"epoch": 1.7536813922356091,
"grad_norm": 0.26851951669717095,
"learning_rate": 4.401678630736172e-06,
"loss": 0.2835,
"step": 3930
},
{
"epoch": 1.7581436858545292,
"grad_norm": 0.26883125019727905,
"learning_rate": 4.3759098010608155e-06,
"loss": 0.2782,
"step": 3940
},
{
"epoch": 1.7626059794734492,
"grad_norm": 0.25218594985078024,
"learning_rate": 4.350157799497017e-06,
"loss": 0.2893,
"step": 3950
},
{
"epoch": 1.7670682730923695,
"grad_norm": 0.26946851688893303,
"learning_rate": 4.324423320427669e-06,
"loss": 0.279,
"step": 3960
},
{
"epoch": 1.7715305667112897,
"grad_norm": 0.2974783320098152,
"learning_rate": 4.298707057763175e-06,
"loss": 0.2855,
"step": 3970
},
{
"epoch": 1.7759928603302098,
"grad_norm": 0.2966897122885392,
"learning_rate": 4.273009704922757e-06,
"loss": 0.2884,
"step": 3980
},
{
"epoch": 1.7804551539491298,
"grad_norm": 0.2902971297459419,
"learning_rate": 4.24733195481574e-06,
"loss": 0.2796,
"step": 3990
},
{
"epoch": 1.7849174475680498,
"grad_norm": 0.296879555221403,
"learning_rate": 4.221674499822878e-06,
"loss": 0.2941,
"step": 4000
},
{
"epoch": 1.78937974118697,
"grad_norm": 0.2772609554006714,
"learning_rate": 4.196038031777688e-06,
"loss": 0.2784,
"step": 4010
},
{
"epoch": 1.7938420348058903,
"grad_norm": 0.3728763097589947,
"learning_rate": 4.170423241947782e-06,
"loss": 0.2825,
"step": 4020
},
{
"epoch": 1.7983043284248104,
"grad_norm": 0.2690864141407203,
"learning_rate": 4.144830821016245e-06,
"loss": 0.2848,
"step": 4030
},
{
"epoch": 1.8027666220437304,
"grad_norm": 0.2945504927368404,
"learning_rate": 4.119261459062992e-06,
"loss": 0.2886,
"step": 4040
},
{
"epoch": 1.8072289156626506,
"grad_norm": 0.2817141780614377,
"learning_rate": 4.0937158455461805e-06,
"loss": 0.2861,
"step": 4050
},
{
"epoch": 1.8116912092815707,
"grad_norm": 0.2908042660570454,
"learning_rate": 4.068194669283599e-06,
"loss": 0.2855,
"step": 4060
},
{
"epoch": 1.816153502900491,
"grad_norm": 0.26095488586703197,
"learning_rate": 4.042698618434115e-06,
"loss": 0.2775,
"step": 4070
},
{
"epoch": 1.820615796519411,
"grad_norm": 0.26782163829467626,
"learning_rate": 4.017228380479099e-06,
"loss": 0.2902,
"step": 4080
},
{
"epoch": 1.825078090138331,
"grad_norm": 0.2893359193551187,
"learning_rate": 3.991784642203904e-06,
"loss": 0.286,
"step": 4090
},
{
"epoch": 1.8295403837572513,
"grad_norm": 0.28187305106972493,
"learning_rate": 3.966368089679337e-06,
"loss": 0.2951,
"step": 4100
},
{
"epoch": 1.8340026773761715,
"grad_norm": 0.7149499820103131,
"learning_rate": 3.9409794082431585e-06,
"loss": 0.278,
"step": 4110
},
{
"epoch": 1.8384649709950915,
"grad_norm": 0.2781228028139923,
"learning_rate": 3.915619282481613e-06,
"loss": 0.2804,
"step": 4120
},
{
"epoch": 1.8429272646140116,
"grad_norm": 0.27787506538029344,
"learning_rate": 3.890288396210958e-06,
"loss": 0.2773,
"step": 4130
},
{
"epoch": 1.8473895582329316,
"grad_norm": 0.2775837112621051,
"learning_rate": 3.8649874324590355e-06,
"loss": 0.2891,
"step": 4140
},
{
"epoch": 1.8518518518518519,
"grad_norm": 0.27038074332915685,
"learning_rate": 3.839717073446842e-06,
"loss": 0.2819,
"step": 4150
},
{
"epoch": 1.8563141454707721,
"grad_norm": 0.27153282107037896,
"learning_rate": 3.8144780005701526e-06,
"loss": 0.2808,
"step": 4160
},
{
"epoch": 1.8607764390896921,
"grad_norm": 0.27119597948809526,
"learning_rate": 3.7892708943811224e-06,
"loss": 0.2828,
"step": 4170
},
{
"epoch": 1.8652387327086122,
"grad_norm": 0.26751701443601517,
"learning_rate": 3.7640964345699613e-06,
"loss": 0.2872,
"step": 4180
},
{
"epoch": 1.8697010263275322,
"grad_norm": 0.2668805357678453,
"learning_rate": 3.738955299946588e-06,
"loss": 0.2808,
"step": 4190
},
{
"epoch": 1.8741633199464525,
"grad_norm": 0.26385620413020505,
"learning_rate": 3.7138481684223316e-06,
"loss": 0.284,
"step": 4200
},
{
"epoch": 1.8786256135653727,
"grad_norm": 0.27804749998996886,
"learning_rate": 3.688775716991661e-06,
"loss": 0.2901,
"step": 4210
},
{
"epoch": 1.8830879071842928,
"grad_norm": 0.26632920843385016,
"learning_rate": 3.6637386217139158e-06,
"loss": 0.2817,
"step": 4220
},
{
"epoch": 1.8875502008032128,
"grad_norm": 0.28107011263915094,
"learning_rate": 3.6387375576950902e-06,
"loss": 0.2875,
"step": 4230
},
{
"epoch": 1.8920124944221328,
"grad_norm": 0.27319254801532444,
"learning_rate": 3.613773199069618e-06,
"loss": 0.2893,
"step": 4240
},
{
"epoch": 1.896474788041053,
"grad_norm": 0.2895639115697261,
"learning_rate": 3.588846218982204e-06,
"loss": 0.2869,
"step": 4250
},
{
"epoch": 1.9009370816599733,
"grad_norm": 0.27271032068079937,
"learning_rate": 3.563957289569669e-06,
"loss": 0.2869,
"step": 4260
},
{
"epoch": 1.9053993752788934,
"grad_norm": 0.26855796153698186,
"learning_rate": 3.5391070819428246e-06,
"loss": 0.2857,
"step": 4270
},
{
"epoch": 1.9098616688978134,
"grad_norm": 0.28223224193709756,
"learning_rate": 3.514296266168381e-06,
"loss": 0.2915,
"step": 4280
},
{
"epoch": 1.9143239625167336,
"grad_norm": 0.2641850196296661,
"learning_rate": 3.4895255112508773e-06,
"loss": 0.2762,
"step": 4290
},
{
"epoch": 1.9187862561356537,
"grad_norm": 0.3420858629489308,
"learning_rate": 3.4647954851146437e-06,
"loss": 0.2925,
"step": 4300
},
{
"epoch": 1.923248549754574,
"grad_norm": 0.2604588316374171,
"learning_rate": 3.4401068545857843e-06,
"loss": 0.2822,
"step": 4310
},
{
"epoch": 1.927710843373494,
"grad_norm": 0.2715732112484208,
"learning_rate": 3.4154602853742115e-06,
"loss": 0.2764,
"step": 4320
},
{
"epoch": 1.932173136992414,
"grad_norm": 0.29731471914640445,
"learning_rate": 3.3908564420556778e-06,
"loss": 0.284,
"step": 4330
},
{
"epoch": 1.9366354306113343,
"grad_norm": 0.299436802757538,
"learning_rate": 3.3662959880538744e-06,
"loss": 0.2785,
"step": 4340
},
{
"epoch": 1.9410977242302545,
"grad_norm": 0.25848952586710894,
"learning_rate": 3.341779585622522e-06,
"loss": 0.2782,
"step": 4350
},
{
"epoch": 1.9455600178491745,
"grad_norm": 0.26318821809460174,
"learning_rate": 3.3173078958275355e-06,
"loss": 0.2772,
"step": 4360
},
{
"epoch": 1.9500223114680946,
"grad_norm": 0.2849012354934153,
"learning_rate": 3.292881578529179e-06,
"loss": 0.2878,
"step": 4370
},
{
"epoch": 1.9544846050870146,
"grad_norm": 0.28212180104540435,
"learning_rate": 3.268501292364289e-06,
"loss": 0.2765,
"step": 4380
},
{
"epoch": 1.9589468987059349,
"grad_norm": 0.28217071136679106,
"learning_rate": 3.2441676947285035e-06,
"loss": 0.2841,
"step": 4390
},
{
"epoch": 1.9634091923248551,
"grad_norm": 0.4014867685340475,
"learning_rate": 3.219881441758541e-06,
"loss": 0.2842,
"step": 4400
},
{
"epoch": 1.9678714859437751,
"grad_norm": 0.26510923757329297,
"learning_rate": 3.19564318831451e-06,
"loss": 0.282,
"step": 4410
},
{
"epoch": 1.9723337795626952,
"grad_norm": 0.26724901583056526,
"learning_rate": 3.171453587962246e-06,
"loss": 0.2829,
"step": 4420
},
{
"epoch": 1.9767960731816152,
"grad_norm": 0.25923632674916225,
"learning_rate": 3.1473132929556927e-06,
"loss": 0.2879,
"step": 4430
},
{
"epoch": 1.9812583668005355,
"grad_norm": 0.2819718374318917,
"learning_rate": 3.1232229542193126e-06,
"loss": 0.2887,
"step": 4440
},
{
"epoch": 1.9857206604194557,
"grad_norm": 0.27554261636665484,
"learning_rate": 3.0991832213305367e-06,
"loss": 0.2868,
"step": 4450
},
{
"epoch": 1.9901829540383758,
"grad_norm": 0.3200504664553153,
"learning_rate": 3.0751947425022465e-06,
"loss": 0.2796,
"step": 4460
},
{
"epoch": 1.9946452476572958,
"grad_norm": 0.2610653468140633,
"learning_rate": 3.0512581645653007e-06,
"loss": 0.2911,
"step": 4470
},
{
"epoch": 1.9991075412762158,
"grad_norm": 0.3126154839381511,
"learning_rate": 3.0273741329510852e-06,
"loss": 0.287,
"step": 4480
},
{
"epoch": 2.0035698348951363,
"grad_norm": 0.29397339441332226,
"learning_rate": 3.0035432916741215e-06,
"loss": 0.2665,
"step": 4490
},
{
"epoch": 2.0080321285140563,
"grad_norm": 0.275630796936842,
"learning_rate": 2.979766283314688e-06,
"loss": 0.2643,
"step": 4500
},
{
"epoch": 2.0124944221329764,
"grad_norm": 0.26340335226827566,
"learning_rate": 2.9560437490015013e-06,
"loss": 0.2727,
"step": 4510
},
{
"epoch": 2.0169567157518964,
"grad_norm": 0.30161717213091244,
"learning_rate": 2.9323763283944338e-06,
"loss": 0.2625,
"step": 4520
},
{
"epoch": 2.0214190093708164,
"grad_norm": 0.3171900997207839,
"learning_rate": 2.9087646596672487e-06,
"loss": 0.2598,
"step": 4530
},
{
"epoch": 2.025881302989737,
"grad_norm": 0.3222582665257153,
"learning_rate": 2.8852093794904136e-06,
"loss": 0.2652,
"step": 4540
},
{
"epoch": 2.030343596608657,
"grad_norm": 0.28771230139561266,
"learning_rate": 2.861711123013911e-06,
"loss": 0.2708,
"step": 4550
},
{
"epoch": 2.034805890227577,
"grad_norm": 0.26951340411915475,
"learning_rate": 2.838270523850135e-06,
"loss": 0.271,
"step": 4560
},
{
"epoch": 2.039268183846497,
"grad_norm": 0.26621140063523485,
"learning_rate": 2.8148882140567844e-06,
"loss": 0.2675,
"step": 4570
},
{
"epoch": 2.043730477465417,
"grad_norm": 0.30555915513232423,
"learning_rate": 2.7915648241198386e-06,
"loss": 0.263,
"step": 4580
},
{
"epoch": 2.0481927710843375,
"grad_norm": 0.2975295129021857,
"learning_rate": 2.7683009829365417e-06,
"loss": 0.2598,
"step": 4590
},
{
"epoch": 2.0526550647032575,
"grad_norm": 0.2734727711740158,
"learning_rate": 2.745097317798452e-06,
"loss": 0.268,
"step": 4600
},
{
"epoch": 2.0571173583221776,
"grad_norm": 0.29391413176160824,
"learning_rate": 2.7219544543745335e-06,
"loss": 0.2661,
"step": 4610
},
{
"epoch": 2.0615796519410976,
"grad_norm": 0.2840209475626469,
"learning_rate": 2.698873016694271e-06,
"loss": 0.2655,
"step": 4620
},
{
"epoch": 2.0660419455600176,
"grad_norm": 0.2600456780446525,
"learning_rate": 2.6758536271308582e-06,
"loss": 0.2588,
"step": 4630
},
{
"epoch": 2.070504239178938,
"grad_norm": 0.28816278556575226,
"learning_rate": 2.6528969063844022e-06,
"loss": 0.2708,
"step": 4640
},
{
"epoch": 2.074966532797858,
"grad_norm": 0.27537400614220536,
"learning_rate": 2.630003473465202e-06,
"loss": 0.2603,
"step": 4650
},
{
"epoch": 2.079428826416778,
"grad_norm": 0.2799988616872821,
"learning_rate": 2.6071739456770394e-06,
"loss": 0.265,
"step": 4660
},
{
"epoch": 2.083891120035698,
"grad_norm": 0.27519216667774027,
"learning_rate": 2.5844089386005512e-06,
"loss": 0.2615,
"step": 4670
},
{
"epoch": 2.0883534136546187,
"grad_norm": 0.2817941036734103,
"learning_rate": 2.5617090660766218e-06,
"loss": 0.2747,
"step": 4680
},
{
"epoch": 2.0928157072735387,
"grad_norm": 0.2538075181305876,
"learning_rate": 2.5390749401898274e-06,
"loss": 0.2705,
"step": 4690
},
{
"epoch": 2.0972780008924587,
"grad_norm": 0.30549102286988794,
"learning_rate": 2.5165071712519447e-06,
"loss": 0.2751,
"step": 4700
},
{
"epoch": 2.101740294511379,
"grad_norm": 0.2794818884211519,
"learning_rate": 2.4940063677854775e-06,
"loss": 0.2668,
"step": 4710
},
{
"epoch": 2.106202588130299,
"grad_norm": 0.27303724559190734,
"learning_rate": 2.4715731365072666e-06,
"loss": 0.2628,
"step": 4720
},
{
"epoch": 2.1106648817492193,
"grad_norm": 0.29272010242374324,
"learning_rate": 2.449208082312111e-06,
"loss": 0.2647,
"step": 4730
},
{
"epoch": 2.1151271753681393,
"grad_norm": 0.270738388759199,
"learning_rate": 2.4269118082564774e-06,
"loss": 0.2617,
"step": 4740
},
{
"epoch": 2.1195894689870594,
"grad_norm": 0.2840129241196674,
"learning_rate": 2.4046849155422193e-06,
"loss": 0.274,
"step": 4750
},
{
"epoch": 2.1240517626059794,
"grad_norm": 0.2645544390527427,
"learning_rate": 2.382528003500384e-06,
"loss": 0.2686,
"step": 4760
},
{
"epoch": 2.1285140562248994,
"grad_norm": 0.2865191645329176,
"learning_rate": 2.3604416695750364e-06,
"loss": 0.2601,
"step": 4770
},
{
"epoch": 2.13297634984382,
"grad_norm": 0.2656950110555082,
"learning_rate": 2.3384265093071645e-06,
"loss": 0.2652,
"step": 4780
},
{
"epoch": 2.13743864346274,
"grad_norm": 0.2912542173901888,
"learning_rate": 2.316483116318608e-06,
"loss": 0.2569,
"step": 4790
},
{
"epoch": 2.14190093708166,
"grad_norm": 0.2747221058539636,
"learning_rate": 2.2946120822960562e-06,
"loss": 0.2662,
"step": 4800
},
{
"epoch": 2.14636323070058,
"grad_norm": 0.27225697835899576,
"learning_rate": 2.2728139969751005e-06,
"loss": 0.267,
"step": 4810
},
{
"epoch": 2.1508255243195,
"grad_norm": 0.2526714399887326,
"learning_rate": 2.2510894481243205e-06,
"loss": 0.2659,
"step": 4820
},
{
"epoch": 2.1552878179384205,
"grad_norm": 0.2787198573344132,
"learning_rate": 2.2294390215294483e-06,
"loss": 0.2612,
"step": 4830
},
{
"epoch": 2.1597501115573405,
"grad_norm": 0.2881621459035689,
"learning_rate": 2.207863300977558e-06,
"loss": 0.2658,
"step": 4840
},
{
"epoch": 2.1642124051762606,
"grad_norm": 0.2651959620965174,
"learning_rate": 2.186362868241341e-06,
"loss": 0.2656,
"step": 4850
},
{
"epoch": 2.1686746987951806,
"grad_norm": 0.27079635036661953,
"learning_rate": 2.164938303063404e-06,
"loss": 0.2651,
"step": 4860
},
{
"epoch": 2.1731369924141006,
"grad_norm": 0.2627129405327371,
"learning_rate": 2.1435901831406504e-06,
"loss": 0.259,
"step": 4870
},
{
"epoch": 2.177599286033021,
"grad_norm": 0.28039269813095946,
"learning_rate": 2.1223190841086893e-06,
"loss": 0.2672,
"step": 4880
},
{
"epoch": 2.182061579651941,
"grad_norm": 0.2641978867337932,
"learning_rate": 2.1011255795263232e-06,
"loss": 0.2658,
"step": 4890
},
{
"epoch": 2.186523873270861,
"grad_norm": 0.26996809053622206,
"learning_rate": 2.080010240860083e-06,
"loss": 0.2714,
"step": 4900
},
{
"epoch": 2.190986166889781,
"grad_norm": 0.28824583701799006,
"learning_rate": 2.058973637468811e-06,
"loss": 0.2676,
"step": 4910
},
{
"epoch": 2.1954484605087012,
"grad_norm": 0.27005127612792346,
"learning_rate": 2.0380163365883188e-06,
"loss": 0.2738,
"step": 4920
},
{
"epoch": 2.1999107541276217,
"grad_norm": 0.2740103878032033,
"learning_rate": 2.01713890331608e-06,
"loss": 0.2579,
"step": 4930
},
{
"epoch": 2.2043730477465417,
"grad_norm": 0.2908767212364278,
"learning_rate": 1.996341900596008e-06,
"loss": 0.2696,
"step": 4940
},
{
"epoch": 2.208835341365462,
"grad_norm": 0.2950349079099773,
"learning_rate": 1.9756258892032604e-06,
"loss": 0.2645,
"step": 4950
},
{
"epoch": 2.213297634984382,
"grad_norm": 0.26050135319115303,
"learning_rate": 1.9549914277291326e-06,
"loss": 0.2642,
"step": 4960
},
{
"epoch": 2.2177599286033023,
"grad_norm": 0.26630446740688435,
"learning_rate": 1.9344390725659827e-06,
"loss": 0.2684,
"step": 4970
},
{
"epoch": 2.2222222222222223,
"grad_norm": 0.2743425147318156,
"learning_rate": 1.9139693778922437e-06,
"loss": 0.2667,
"step": 4980
},
{
"epoch": 2.2266845158411424,
"grad_norm": 0.2736976794496928,
"learning_rate": 1.8935828956574615e-06,
"loss": 0.2696,
"step": 4990
},
{
"epoch": 2.2311468094600624,
"grad_norm": 0.26998232015281187,
"learning_rate": 1.873280175567434e-06,
"loss": 0.2685,
"step": 5000
},
{
"epoch": 2.2356091030789824,
"grad_norm": 0.2668005078365468,
"learning_rate": 1.8530617650693671e-06,
"loss": 0.2658,
"step": 5010
},
{
"epoch": 2.240071396697903,
"grad_norm": 0.27297408370818904,
"learning_rate": 1.832928209337133e-06,
"loss": 0.2711,
"step": 5020
},
{
"epoch": 2.244533690316823,
"grad_norm": 0.27120763446153706,
"learning_rate": 1.8128800512565514e-06,
"loss": 0.2632,
"step": 5030
},
{
"epoch": 2.248995983935743,
"grad_norm": 0.3043844117742173,
"learning_rate": 1.792917831410767e-06,
"loss": 0.2646,
"step": 5040
},
{
"epoch": 2.253458277554663,
"grad_norm": 0.27555460116170827,
"learning_rate": 1.7730420880656641e-06,
"loss": 0.2627,
"step": 5050
},
{
"epoch": 2.2579205711735835,
"grad_norm": 0.25867849621197275,
"learning_rate": 1.7532533571553523e-06,
"loss": 0.2723,
"step": 5060
},
{
"epoch": 2.2623828647925035,
"grad_norm": 0.2696220165863831,
"learning_rate": 1.7335521722677223e-06,
"loss": 0.2567,
"step": 5070
},
{
"epoch": 2.2668451584114235,
"grad_norm": 0.2844615695492899,
"learning_rate": 1.7139390646300503e-06,
"loss": 0.2636,
"step": 5080
},
{
"epoch": 2.2713074520303436,
"grad_norm": 0.26100844869286605,
"learning_rate": 1.6944145630946757e-06,
"loss": 0.2547,
"step": 5090
},
{
"epoch": 2.2757697456492636,
"grad_norm": 0.2655209152406865,
"learning_rate": 1.6749791941247501e-06,
"loss": 0.2667,
"step": 5100
},
{
"epoch": 2.280232039268184,
"grad_norm": 0.2670441776783262,
"learning_rate": 1.6556334817800247e-06,
"loss": 0.2593,
"step": 5110
},
{
"epoch": 2.284694332887104,
"grad_norm": 0.2702953527750314,
"learning_rate": 1.636377947702737e-06,
"loss": 0.2668,
"step": 5120
},
{
"epoch": 2.289156626506024,
"grad_norm": 0.2801526427682019,
"learning_rate": 1.6172131111035305e-06,
"loss": 0.2593,
"step": 5130
},
{
"epoch": 2.293618920124944,
"grad_norm": 0.2664506709616671,
"learning_rate": 1.598139488747467e-06,
"loss": 0.2679,
"step": 5140
},
{
"epoch": 2.298081213743864,
"grad_norm": 0.2842866153483872,
"learning_rate": 1.5791575949400801e-06,
"loss": 0.2683,
"step": 5150
},
{
"epoch": 2.3025435073627847,
"grad_norm": 0.2713206331551484,
"learning_rate": 1.5602679415135203e-06,
"loss": 0.2672,
"step": 5160
},
{
"epoch": 2.3070058009817047,
"grad_norm": 0.2680243773548362,
"learning_rate": 1.5414710378127407e-06,
"loss": 0.2668,
"step": 5170
},
{
"epoch": 2.3114680946006247,
"grad_norm": 0.2937301531788135,
"learning_rate": 1.522767390681776e-06,
"loss": 0.2621,
"step": 5180
},
{
"epoch": 2.3159303882195448,
"grad_norm": 0.359404031790861,
"learning_rate": 1.5041575044500645e-06,
"loss": 0.2667,
"step": 5190
},
{
"epoch": 2.320392681838465,
"grad_norm": 0.26130760139495907,
"learning_rate": 1.4856418809188538e-06,
"loss": 0.2544,
"step": 5200
},
{
"epoch": 2.3248549754573853,
"grad_norm": 0.25686940370213246,
"learning_rate": 1.4672210193476766e-06,
"loss": 0.274,
"step": 5210
},
{
"epoch": 2.3293172690763053,
"grad_norm": 0.2939624963929075,
"learning_rate": 1.4488954164408736e-06,
"loss": 0.2701,
"step": 5220
},
{
"epoch": 2.3337795626952254,
"grad_norm": 0.2783635407103021,
"learning_rate": 1.4306655663342173e-06,
"loss": 0.2563,
"step": 5230
},
{
"epoch": 2.3382418563141454,
"grad_norm": 0.29226415842720005,
"learning_rate": 1.412531960581572e-06,
"loss": 0.2541,
"step": 5240
},
{
"epoch": 2.3427041499330654,
"grad_norm": 0.3552878374922974,
"learning_rate": 1.3944950881416541e-06,
"loss": 0.2645,
"step": 5250
},
{
"epoch": 2.347166443551986,
"grad_norm": 0.25303935909037156,
"learning_rate": 1.3765554353648348e-06,
"loss": 0.26,
"step": 5260
},
{
"epoch": 2.351628737170906,
"grad_norm": 0.26959464702039115,
"learning_rate": 1.3587134859800378e-06,
"loss": 0.2622,
"step": 5270
},
{
"epoch": 2.356091030789826,
"grad_norm": 0.2767449988752219,
"learning_rate": 1.3409697210816846e-06,
"loss": 0.2631,
"step": 5280
},
{
"epoch": 2.360553324408746,
"grad_norm": 0.2948939949561596,
"learning_rate": 1.3233246191167293e-06,
"loss": 0.2721,
"step": 5290
},
{
"epoch": 2.365015618027666,
"grad_norm": 0.2806355856045073,
"learning_rate": 1.3057786558717593e-06,
"loss": 0.2674,
"step": 5300
},
{
"epoch": 2.3694779116465865,
"grad_norm": 0.2738252090208611,
"learning_rate": 1.2883323044601575e-06,
"loss": 0.274,
"step": 5310
},
{
"epoch": 2.3739402052655065,
"grad_norm": 0.2577368049277014,
"learning_rate": 1.2709860353093555e-06,
"loss": 0.2668,
"step": 5320
},
{
"epoch": 2.3784024988844266,
"grad_norm": 0.29099029260946785,
"learning_rate": 1.2537403161481387e-06,
"loss": 0.2669,
"step": 5330
},
{
"epoch": 2.3828647925033466,
"grad_norm": 0.264039286727906,
"learning_rate": 1.2365956119940436e-06,
"loss": 0.2768,
"step": 5340
},
{
"epoch": 2.3873270861222666,
"grad_norm": 0.2686625114381172,
"learning_rate": 1.2195523851408153e-06,
"loss": 0.2735,
"step": 5350
},
{
"epoch": 2.391789379741187,
"grad_norm": 0.27917896436759787,
"learning_rate": 1.2026110951459364e-06,
"loss": 0.2709,
"step": 5360
},
{
"epoch": 2.396251673360107,
"grad_norm": 0.2759607663737845,
"learning_rate": 1.1857721988182468e-06,
"loss": 0.264,
"step": 5370
},
{
"epoch": 2.400713966979027,
"grad_norm": 0.2744901640260912,
"learning_rate": 1.169036150205614e-06,
"loss": 0.2638,
"step": 5380
},
{
"epoch": 2.405176260597947,
"grad_norm": 0.2639450101124726,
"learning_rate": 1.1524034005827028e-06,
"loss": 0.2609,
"step": 5390
},
{
"epoch": 2.4096385542168672,
"grad_norm": 0.26462733650289344,
"learning_rate": 1.1358743984387939e-06,
"loss": 0.2571,
"step": 5400
},
{
"epoch": 2.4141008478357877,
"grad_norm": 0.26467540440445797,
"learning_rate": 1.1194495894657021e-06,
"loss": 0.264,
"step": 5410
},
{
"epoch": 2.4185631414547077,
"grad_norm": 0.2541839967727397,
"learning_rate": 1.103129416545749e-06,
"loss": 0.2734,
"step": 5420
},
{
"epoch": 2.4230254350736278,
"grad_norm": 0.25980858502680365,
"learning_rate": 1.0869143197398313e-06,
"loss": 0.2711,
"step": 5430
},
{
"epoch": 2.427487728692548,
"grad_norm": 0.2749178435606242,
"learning_rate": 1.070804736275543e-06,
"loss": 0.2638,
"step": 5440
},
{
"epoch": 2.4319500223114683,
"grad_norm": 0.2507799683207913,
"learning_rate": 1.0548011005353975e-06,
"loss": 0.2639,
"step": 5450
},
{
"epoch": 2.4364123159303883,
"grad_norm": 0.27822037022845514,
"learning_rate": 1.0389038440451048e-06,
"loss": 0.2687,
"step": 5460
},
{
"epoch": 2.4408746095493083,
"grad_norm": 0.25912298984876,
"learning_rate": 1.0231133954619449e-06,
"loss": 0.2517,
"step": 5470
},
{
"epoch": 2.4453369031682284,
"grad_norm": 0.26989363458508225,
"learning_rate": 1.0074301805632014e-06,
"loss": 0.261,
"step": 5480
},
{
"epoch": 2.4497991967871484,
"grad_norm": 0.2611164113008286,
"learning_rate": 9.918546222346837e-07,
"loss": 0.2732,
"step": 5490
},
{
"epoch": 2.454261490406069,
"grad_norm": 0.26664227118678985,
"learning_rate": 9.763871404593295e-07,
"loss": 0.2635,
"step": 5500
},
{
"epoch": 2.458723784024989,
"grad_norm": 0.2845594958886921,
"learning_rate": 9.610281523058696e-07,
"loss": 0.2724,
"step": 5510
},
{
"epoch": 2.463186077643909,
"grad_norm": 0.26533071138557196,
"learning_rate": 9.457780719175924e-07,
"loss": 0.2594,
"step": 5520
},
{
"epoch": 2.467648371262829,
"grad_norm": 0.2931575613556212,
"learning_rate": 9.306373105011685e-07,
"loss": 0.2642,
"step": 5530
},
{
"epoch": 2.4721106648817495,
"grad_norm": 0.2579593656604341,
"learning_rate": 9.15606276315571e-07,
"loss": 0.2686,
"step": 5540
},
{
"epoch": 2.4765729585006695,
"grad_norm": 0.25308387491964407,
"learning_rate": 9.006853746610578e-07,
"loss": 0.2748,
"step": 5550
},
{
"epoch": 2.4810352521195895,
"grad_norm": 0.26649803073733885,
"learning_rate": 8.858750078682526e-07,
"loss": 0.2702,
"step": 5560
},
{
"epoch": 2.4854975457385096,
"grad_norm": 0.26710930733839655,
"learning_rate": 8.711755752872875e-07,
"loss": 0.2741,
"step": 5570
},
{
"epoch": 2.4899598393574296,
"grad_norm": 0.28129936297807595,
"learning_rate": 8.565874732770429e-07,
"loss": 0.2711,
"step": 5580
},
{
"epoch": 2.49442213297635,
"grad_norm": 0.2562852808371683,
"learning_rate": 8.421110951944533e-07,
"loss": 0.2729,
"step": 5590
},
{
"epoch": 2.49888442659527,
"grad_norm": 0.2705997291745019,
"learning_rate": 8.277468313839033e-07,
"loss": 0.266,
"step": 5600
},
{
"epoch": 2.50334672021419,
"grad_norm": 0.27415617746696014,
"learning_rate": 8.13495069166706e-07,
"loss": 0.2635,
"step": 5610
},
{
"epoch": 2.50780901383311,
"grad_norm": 0.268499538957006,
"learning_rate": 7.993561928306503e-07,
"loss": 0.2626,
"step": 5620
},
{
"epoch": 2.51227130745203,
"grad_norm": 0.2605445838763644,
"learning_rate": 7.853305836196507e-07,
"loss": 0.2684,
"step": 5630
},
{
"epoch": 2.5167336010709507,
"grad_norm": 0.259462323778387,
"learning_rate": 7.714186197234547e-07,
"loss": 0.2669,
"step": 5640
},
{
"epoch": 2.5211958946898707,
"grad_norm": 0.2737812098281487,
"learning_rate": 7.576206762674565e-07,
"loss": 0.2677,
"step": 5650
},
{
"epoch": 2.5256581883087907,
"grad_norm": 0.24457747335646154,
"learning_rate": 7.439371253025718e-07,
"loss": 0.2441,
"step": 5660
},
{
"epoch": 2.5301204819277108,
"grad_norm": 0.25318796192153853,
"learning_rate": 7.303683357952168e-07,
"loss": 0.2692,
"step": 5670
},
{
"epoch": 2.534582775546631,
"grad_norm": 0.25584827852861586,
"learning_rate": 7.169146736173477e-07,
"loss": 0.2696,
"step": 5680
},
{
"epoch": 2.5390450691655513,
"grad_norm": 0.2602088423877808,
"learning_rate": 7.035765015366047e-07,
"loss": 0.2668,
"step": 5690
},
{
"epoch": 2.5435073627844713,
"grad_norm": 0.2820757957669124,
"learning_rate": 6.903541792065265e-07,
"loss": 0.2771,
"step": 5700
},
{
"epoch": 2.5479696564033913,
"grad_norm": 0.289619323277333,
"learning_rate": 6.772480631568496e-07,
"loss": 0.2677,
"step": 5710
},
{
"epoch": 2.5524319500223114,
"grad_norm": 0.24262073349803384,
"learning_rate": 6.642585067839003e-07,
"loss": 0.2632,
"step": 5720
},
{
"epoch": 2.5568942436412314,
"grad_norm": 0.3063970631496104,
"learning_rate": 6.513858603410605e-07,
"loss": 0.2645,
"step": 5730
},
{
"epoch": 2.561356537260152,
"grad_norm": 0.24469985287690035,
"learning_rate": 6.386304709293295e-07,
"loss": 0.2674,
"step": 5740
},
{
"epoch": 2.565818830879072,
"grad_norm": 0.25778009113854466,
"learning_rate": 6.259926824879575e-07,
"loss": 0.2686,
"step": 5750
},
{
"epoch": 2.570281124497992,
"grad_norm": 0.2519725071505496,
"learning_rate": 6.134728357851777e-07,
"loss": 0.2614,
"step": 5760
},
{
"epoch": 2.574743418116912,
"grad_norm": 0.24559660905732697,
"learning_rate": 6.010712684090125e-07,
"loss": 0.2538,
"step": 5770
},
{
"epoch": 2.579205711735832,
"grad_norm": 0.2549259950940346,
"learning_rate": 5.887883147581769e-07,
"loss": 0.2669,
"step": 5780
},
{
"epoch": 2.5836680053547525,
"grad_norm": 0.26474210608850174,
"learning_rate": 5.766243060330551e-07,
"loss": 0.2645,
"step": 5790
},
{
"epoch": 2.5881302989736725,
"grad_norm": 0.2611149545019317,
"learning_rate": 5.645795702267731e-07,
"loss": 0.2713,
"step": 5800
},
{
"epoch": 2.5925925925925926,
"grad_norm": 0.27028998633235257,
"learning_rate": 5.526544321163573e-07,
"loss": 0.2765,
"step": 5810
},
{
"epoch": 2.5970548862115126,
"grad_norm": 0.26131292774366577,
"learning_rate": 5.408492132539705e-07,
"loss": 0.2601,
"step": 5820
},
{
"epoch": 2.6015171798304326,
"grad_norm": 0.25036551401682255,
"learning_rate": 5.29164231958249e-07,
"loss": 0.2667,
"step": 5830
},
{
"epoch": 2.605979473449353,
"grad_norm": 0.32252379358555433,
"learning_rate": 5.175998033057128e-07,
"loss": 0.2598,
"step": 5840
},
{
"epoch": 2.610441767068273,
"grad_norm": 0.26111392130818567,
"learning_rate": 5.061562391222752e-07,
"loss": 0.2708,
"step": 5850
},
{
"epoch": 2.614904060687193,
"grad_norm": 0.2610753618549455,
"learning_rate": 4.948338479748293e-07,
"loss": 0.264,
"step": 5860
},
{
"epoch": 2.619366354306113,
"grad_norm": 0.24433197279025629,
"learning_rate": 4.836329351629343e-07,
"loss": 0.2591,
"step": 5870
},
{
"epoch": 2.6238286479250332,
"grad_norm": 0.25566187611860886,
"learning_rate": 4.7255380271057637e-07,
"loss": 0.2709,
"step": 5880
},
{
"epoch": 2.6282909415439537,
"grad_norm": 0.25059259535846407,
"learning_rate": 4.6159674935802867e-07,
"loss": 0.2623,
"step": 5890
},
{
"epoch": 2.6327532351628737,
"grad_norm": 0.2623672779635096,
"learning_rate": 4.507620705537974e-07,
"loss": 0.259,
"step": 5900
},
{
"epoch": 2.6372155287817938,
"grad_norm": 0.2786335146735297,
"learning_rate": 4.400500584466505e-07,
"loss": 0.2676,
"step": 5910
},
{
"epoch": 2.641677822400714,
"grad_norm": 0.24900048144785913,
"learning_rate": 4.294610018777462e-07,
"loss": 0.263,
"step": 5920
},
{
"epoch": 2.646140116019634,
"grad_norm": 0.25847202609000797,
"learning_rate": 4.1899518637283753e-07,
"loss": 0.2677,
"step": 5930
},
{
"epoch": 2.6506024096385543,
"grad_norm": 0.2782722288055866,
"learning_rate": 4.0865289413458074e-07,
"loss": 0.2617,
"step": 5940
},
{
"epoch": 2.6550647032574743,
"grad_norm": 0.28648904043807316,
"learning_rate": 3.984344040349197e-07,
"loss": 0.2572,
"step": 5950
},
{
"epoch": 2.6595269968763944,
"grad_norm": 0.2636564981421794,
"learning_rate": 3.883399916075714e-07,
"loss": 0.2623,
"step": 5960
},
{
"epoch": 2.663989290495315,
"grad_norm": 0.2493631339678549,
"learning_rate": 3.783699290405901e-07,
"loss": 0.2649,
"step": 5970
},
{
"epoch": 2.6684515841142344,
"grad_norm": 0.2528637606092601,
"learning_rate": 3.6852448516903727e-07,
"loss": 0.2764,
"step": 5980
},
{
"epoch": 2.672913877733155,
"grad_norm": 0.2729817482325442,
"learning_rate": 3.588039254677211e-07,
"loss": 0.2622,
"step": 5990
},
{
"epoch": 2.677376171352075,
"grad_norm": 0.3108379178962195,
"learning_rate": 3.4920851204405026e-07,
"loss": 0.2614,
"step": 6000
},
{
"epoch": 2.681838464970995,
"grad_norm": 0.25516673857932876,
"learning_rate": 3.397385036309558e-07,
"loss": 0.2545,
"step": 6010
},
{
"epoch": 2.6863007585899155,
"grad_norm": 0.26499244030577884,
"learning_rate": 3.303941555799223e-07,
"loss": 0.269,
"step": 6020
},
{
"epoch": 2.6907630522088355,
"grad_norm": 0.2644403491762858,
"learning_rate": 3.211757198540971e-07,
"loss": 0.261,
"step": 6030
},
{
"epoch": 2.6952253458277555,
"grad_norm": 0.2512005184606446,
"learning_rate": 3.12083445021501e-07,
"loss": 0.2608,
"step": 6040
},
{
"epoch": 2.6996876394466756,
"grad_norm": 0.24284759524855465,
"learning_rate": 3.031175762483207e-07,
"loss": 0.2573,
"step": 6050
},
{
"epoch": 2.7041499330655956,
"grad_norm": 0.2588341633636346,
"learning_rate": 2.942783552923034e-07,
"loss": 0.2721,
"step": 6060
},
{
"epoch": 2.708612226684516,
"grad_norm": 0.27162185956855733,
"learning_rate": 2.8556602049623515e-07,
"loss": 0.2635,
"step": 6070
},
{
"epoch": 2.713074520303436,
"grad_norm": 0.24785721701569988,
"learning_rate": 2.769808067815127e-07,
"loss": 0.2654,
"step": 6080
},
{
"epoch": 2.717536813922356,
"grad_norm": 0.26379289564503067,
"learning_rate": 2.68522945641812e-07,
"loss": 0.2703,
"step": 6090
},
{
"epoch": 2.721999107541276,
"grad_norm": 0.25223927852747313,
"learning_rate": 2.6019266513684525e-07,
"loss": 0.2633,
"step": 6100
},
{
"epoch": 2.726461401160196,
"grad_norm": 0.26022587215739407,
"learning_rate": 2.5199018988620925e-07,
"loss": 0.2628,
"step": 6110
},
{
"epoch": 2.7309236947791167,
"grad_norm": 0.25618186440473806,
"learning_rate": 2.439157410633336e-07,
"loss": 0.2549,
"step": 6120
},
{
"epoch": 2.7353859883980367,
"grad_norm": 0.2709291288264617,
"learning_rate": 2.3596953638951093e-07,
"loss": 0.2673,
"step": 6130
},
{
"epoch": 2.7398482820169567,
"grad_norm": 0.3097723757526494,
"learning_rate": 2.2815179012803056e-07,
"loss": 0.2667,
"step": 6140
},
{
"epoch": 2.7443105756358768,
"grad_norm": 0.26215128271049865,
"learning_rate": 2.2046271307839928e-07,
"loss": 0.2659,
"step": 6150
},
{
"epoch": 2.748772869254797,
"grad_norm": 0.26937507169769476,
"learning_rate": 2.1290251257065852e-07,
"loss": 0.2647,
"step": 6160
},
{
"epoch": 2.7532351628737173,
"grad_norm": 0.28086965259228747,
"learning_rate": 2.054713924597923e-07,
"loss": 0.2596,
"step": 6170
},
{
"epoch": 2.7576974564926373,
"grad_norm": 0.25164611443705137,
"learning_rate": 1.981695531202299e-07,
"loss": 0.2613,
"step": 6180
},
{
"epoch": 2.7621597501115573,
"grad_norm": 0.26328055433222686,
"learning_rate": 1.9099719144044737e-07,
"loss": 0.2585,
"step": 6190
},
{
"epoch": 2.7666220437304774,
"grad_norm": 0.24268837305344704,
"learning_rate": 1.8395450081765133e-07,
"loss": 0.2594,
"step": 6200
},
{
"epoch": 2.7710843373493974,
"grad_norm": 0.27208555089507047,
"learning_rate": 1.7704167115257242e-07,
"loss": 0.2701,
"step": 6210
},
{
"epoch": 2.775546630968318,
"grad_norm": 0.2592924605339396,
"learning_rate": 1.7025888884433682e-07,
"loss": 0.258,
"step": 6220
},
{
"epoch": 2.780008924587238,
"grad_norm": 0.25268564338580296,
"learning_rate": 1.636063367854468e-07,
"loss": 0.2643,
"step": 6230
},
{
"epoch": 2.784471218206158,
"grad_norm": 0.25499560352534534,
"learning_rate": 1.5708419435684463e-07,
"loss": 0.2592,
"step": 6240
},
{
"epoch": 2.788933511825078,
"grad_norm": 0.2754254434988614,
"learning_rate": 1.506926374230777e-07,
"loss": 0.2685,
"step": 6250
},
{
"epoch": 2.793395805443998,
"grad_norm": 0.24677760129233578,
"learning_rate": 1.4443183832755558e-07,
"loss": 0.2668,
"step": 6260
},
{
"epoch": 2.7978580990629185,
"grad_norm": 0.2419766232491537,
"learning_rate": 1.3830196588790535e-07,
"loss": 0.2649,
"step": 6270
},
{
"epoch": 2.8023203926818385,
"grad_norm": 0.25179145421288907,
"learning_rate": 1.3230318539141586e-07,
"loss": 0.2613,
"step": 6280
},
{
"epoch": 2.8067826863007586,
"grad_norm": 0.24418794826639928,
"learning_rate": 1.2643565859058182e-07,
"loss": 0.2735,
"step": 6290
},
{
"epoch": 2.8112449799196786,
"grad_norm": 0.2484899106044706,
"learning_rate": 1.206995436987457e-07,
"loss": 0.2676,
"step": 6300
},
{
"epoch": 2.8157072735385986,
"grad_norm": 0.2542428658067045,
"learning_rate": 1.1509499538582768e-07,
"loss": 0.2634,
"step": 6310
},
{
"epoch": 2.820169567157519,
"grad_norm": 0.24625678831935105,
"learning_rate": 1.0962216477415632e-07,
"loss": 0.2644,
"step": 6320
},
{
"epoch": 2.824631860776439,
"grad_norm": 0.2557578526542899,
"learning_rate": 1.0428119943439396e-07,
"loss": 0.2697,
"step": 6330
},
{
"epoch": 2.829094154395359,
"grad_norm": 0.2626526421729072,
"learning_rate": 9.907224338155774e-08,
"loss": 0.2641,
"step": 6340
},
{
"epoch": 2.833556448014279,
"grad_norm": 0.24742251678799534,
"learning_rate": 9.399543707113601e-08,
"loss": 0.2672,
"step": 6350
},
{
"epoch": 2.8380187416331992,
"grad_norm": 0.24976844344830124,
"learning_rate": 8.905091739530026e-08,
"loss": 0.2642,
"step": 6360
},
{
"epoch": 2.8424810352521197,
"grad_norm": 0.2434658676513343,
"learning_rate": 8.423881767921637e-08,
"loss": 0.2666,
"step": 6370
},
{
"epoch": 2.8469433288710397,
"grad_norm": 0.2709418898144275,
"learning_rate": 7.955926767744649e-08,
"loss": 0.2678,
"step": 6380
},
{
"epoch": 2.8514056224899598,
"grad_norm": 0.2682012558379756,
"learning_rate": 7.501239357045275e-08,
"loss": 0.2599,
"step": 6390
},
{
"epoch": 2.85586791610888,
"grad_norm": 0.2541699897689904,
"learning_rate": 7.059831796119243e-08,
"loss": 0.2637,
"step": 6400
},
{
"epoch": 2.8603302097278,
"grad_norm": 0.2786116189525862,
"learning_rate": 6.631715987181653e-08,
"loss": 0.2633,
"step": 6410
},
{
"epoch": 2.8647925033467203,
"grad_norm": 0.2549013358787648,
"learning_rate": 6.216903474045411e-08,
"loss": 0.2675,
"step": 6420
},
{
"epoch": 2.8692547969656403,
"grad_norm": 0.24035403326033933,
"learning_rate": 5.815405441810584e-08,
"loss": 0.2704,
"step": 6430
},
{
"epoch": 2.8737170905845604,
"grad_norm": 0.24485345786981255,
"learning_rate": 5.427232716562314e-08,
"loss": 0.2654,
"step": 6440
},
{
"epoch": 2.878179384203481,
"grad_norm": 0.2509308995806849,
"learning_rate": 5.05239576507921e-08,
"loss": 0.2676,
"step": 6450
},
{
"epoch": 2.8826416778224004,
"grad_norm": 0.25125126609422216,
"learning_rate": 4.690904694550913e-08,
"loss": 0.2659,
"step": 6460
},
{
"epoch": 2.887103971441321,
"grad_norm": 0.29884930114175684,
"learning_rate": 4.342769252305867e-08,
"loss": 0.2659,
"step": 6470
},
{
"epoch": 2.891566265060241,
"grad_norm": 0.2621295720462137,
"learning_rate": 4.007998825548032e-08,
"loss": 0.2635,
"step": 6480
},
{
"epoch": 2.896028558679161,
"grad_norm": 0.2559890093701122,
"learning_rate": 3.686602441104137e-08,
"loss": 0.2579,
"step": 6490
},
{
"epoch": 2.9004908522980815,
"grad_norm": 0.294711369331024,
"learning_rate": 3.378588765180268e-08,
"loss": 0.2596,
"step": 6500
},
{
"epoch": 2.9049531459170015,
"grad_norm": 0.2406776827039642,
"learning_rate": 3.083966103127833e-08,
"loss": 0.2628,
"step": 6510
},
{
"epoch": 2.9094154395359215,
"grad_norm": 0.2592651922892189,
"learning_rate": 2.8027423992201265e-08,
"loss": 0.2586,
"step": 6520
},
{
"epoch": 2.9138777331548416,
"grad_norm": 0.2544628675769128,
"learning_rate": 2.5349252364376132e-08,
"loss": 0.2636,
"step": 6530
},
{
"epoch": 2.9183400267737616,
"grad_norm": 0.24915354540620344,
"learning_rate": 2.280521836263927e-08,
"loss": 0.2736,
"step": 6540
},
{
"epoch": 2.922802320392682,
"grad_norm": 0.25636383494451315,
"learning_rate": 2.0395390584908027e-08,
"loss": 0.2604,
"step": 6550
},
{
"epoch": 2.927264614011602,
"grad_norm": 0.23743990674304524,
"learning_rate": 1.8119834010332236e-08,
"loss": 0.2644,
"step": 6560
},
{
"epoch": 2.931726907630522,
"grad_norm": 0.2421382237738656,
"learning_rate": 1.5978609997542306e-08,
"loss": 0.2695,
"step": 6570
},
{
"epoch": 2.936189201249442,
"grad_norm": 0.25987130839419936,
"learning_rate": 1.3971776282994398e-08,
"loss": 0.2612,
"step": 6580
},
{
"epoch": 2.940651494868362,
"grad_norm": 0.28118171114124185,
"learning_rate": 1.2099386979414484e-08,
"loss": 0.2727,
"step": 6590
},
{
"epoch": 2.9451137884872827,
"grad_norm": 0.25875766974667813,
"learning_rate": 1.0361492574337827e-08,
"loss": 0.2599,
"step": 6600
},
{
"epoch": 2.9495760821062027,
"grad_norm": 0.24856208937215735,
"learning_rate": 8.758139928748966e-09,
"loss": 0.2585,
"step": 6610
},
{
"epoch": 2.9540383757251227,
"grad_norm": 0.2605005110493228,
"learning_rate": 7.289372275816608e-09,
"loss": 0.263,
"step": 6620
},
{
"epoch": 2.9585006693440428,
"grad_norm": 0.27730135009271706,
"learning_rate": 5.95522921973013e-09,
"loss": 0.267,
"step": 6630
},
{
"epoch": 2.962962962962963,
"grad_norm": 0.25897469150617725,
"learning_rate": 4.7557467346281975e-09,
"loss": 0.2682,
"step": 6640
},
{
"epoch": 2.9674252565818833,
"grad_norm": 0.240962238635422,
"learning_rate": 3.690957163633435e-09,
"loss": 0.2604,
"step": 6650
},
{
"epoch": 2.9718875502008033,
"grad_norm": 0.29524556685558273,
"learning_rate": 2.760889217976459e-09,
"loss": 0.2694,
"step": 6660
},
{
"epoch": 2.9763498438197233,
"grad_norm": 0.2399731201737577,
"learning_rate": 1.9655679762220494e-09,
"loss": 0.2615,
"step": 6670
},
{
"epoch": 2.9808121374386434,
"grad_norm": 0.26022562581894665,
"learning_rate": 1.305014883595801e-09,
"loss": 0.2624,
"step": 6680
},
{
"epoch": 2.9852744310575634,
"grad_norm": 0.25348317394477593,
"learning_rate": 7.792477514034779e-10,
"loss": 0.2586,
"step": 6690
},
{
"epoch": 2.989736724676484,
"grad_norm": 0.2596395509558711,
"learning_rate": 3.882807565502855e-10,
"loss": 0.2687,
"step": 6700
},
{
"epoch": 2.994199018295404,
"grad_norm": 0.25130314991574476,
"learning_rate": 1.3212444115950907e-10,
"loss": 0.2606,
"step": 6710
},
{
"epoch": 2.998661311914324,
"grad_norm": 0.27465854203632456,
"learning_rate": 1.0785712290517503e-11,
"loss": 0.26,
"step": 6720
}
],
"logging_steps": 10,
"max_steps": 6723,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 10000000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.6290612428931072e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}