mat-policy-lora-15k / trainer_state.json
tyrleng's picture
Upload folder using huggingface_hub
8272c17 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 31.380753138075313,
"eval_steps": 500,
"global_step": 15000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02092050209205021,
"grad_norm": 2.293994665145874,
"learning_rate": 1.2000000000000002e-06,
"loss": 1.4456,
"step": 10
},
{
"epoch": 0.04184100418410042,
"grad_norm": 1.97152578830719,
"learning_rate": 2.5333333333333334e-06,
"loss": 1.502,
"step": 20
},
{
"epoch": 0.06276150627615062,
"grad_norm": 1.8830021619796753,
"learning_rate": 3.866666666666667e-06,
"loss": 1.5108,
"step": 30
},
{
"epoch": 0.08368200836820083,
"grad_norm": 1.9566497802734375,
"learning_rate": 5.2e-06,
"loss": 1.4048,
"step": 40
},
{
"epoch": 0.10460251046025104,
"grad_norm": 1.8011934757232666,
"learning_rate": 6.533333333333333e-06,
"loss": 1.2728,
"step": 50
},
{
"epoch": 0.12552301255230125,
"grad_norm": 1.3720723390579224,
"learning_rate": 7.866666666666667e-06,
"loss": 1.1924,
"step": 60
},
{
"epoch": 0.14644351464435146,
"grad_norm": 1.3044836521148682,
"learning_rate": 9.2e-06,
"loss": 0.9494,
"step": 70
},
{
"epoch": 0.16736401673640167,
"grad_norm": 0.8721851706504822,
"learning_rate": 1.0533333333333335e-05,
"loss": 0.8668,
"step": 80
},
{
"epoch": 0.18828451882845187,
"grad_norm": 0.5817344188690186,
"learning_rate": 1.1866666666666668e-05,
"loss": 0.7112,
"step": 90
},
{
"epoch": 0.20920502092050208,
"grad_norm": 0.8804004192352295,
"learning_rate": 1.32e-05,
"loss": 0.6325,
"step": 100
},
{
"epoch": 0.2301255230125523,
"grad_norm": 0.4721251428127289,
"learning_rate": 1.4533333333333335e-05,
"loss": 0.5486,
"step": 110
},
{
"epoch": 0.2510460251046025,
"grad_norm": 0.5858187675476074,
"learning_rate": 1.586666666666667e-05,
"loss": 0.4759,
"step": 120
},
{
"epoch": 0.2719665271966527,
"grad_norm": 0.8786276578903198,
"learning_rate": 1.7199999999999998e-05,
"loss": 0.4455,
"step": 130
},
{
"epoch": 0.2928870292887029,
"grad_norm": 0.581843912601471,
"learning_rate": 1.8533333333333334e-05,
"loss": 0.4113,
"step": 140
},
{
"epoch": 0.3138075313807531,
"grad_norm": 0.6839861273765564,
"learning_rate": 1.9866666666666667e-05,
"loss": 0.3713,
"step": 150
},
{
"epoch": 0.33472803347280333,
"grad_norm": 0.49527251720428467,
"learning_rate": 2.12e-05,
"loss": 0.3411,
"step": 160
},
{
"epoch": 0.35564853556485354,
"grad_norm": 0.3660479784011841,
"learning_rate": 2.2533333333333333e-05,
"loss": 0.3304,
"step": 170
},
{
"epoch": 0.37656903765690375,
"grad_norm": 0.3848160207271576,
"learning_rate": 2.3866666666666666e-05,
"loss": 0.2835,
"step": 180
},
{
"epoch": 0.39748953974895396,
"grad_norm": 0.4325522482395172,
"learning_rate": 2.5200000000000003e-05,
"loss": 0.2554,
"step": 190
},
{
"epoch": 0.41841004184100417,
"grad_norm": 0.6360965371131897,
"learning_rate": 2.6533333333333332e-05,
"loss": 0.2746,
"step": 200
},
{
"epoch": 0.4393305439330544,
"grad_norm": 0.5752055644989014,
"learning_rate": 2.786666666666667e-05,
"loss": 0.2489,
"step": 210
},
{
"epoch": 0.4602510460251046,
"grad_norm": 0.902524471282959,
"learning_rate": 2.9199999999999998e-05,
"loss": 0.2425,
"step": 220
},
{
"epoch": 0.4811715481171548,
"grad_norm": 0.39021795988082886,
"learning_rate": 3.0533333333333335e-05,
"loss": 0.2184,
"step": 230
},
{
"epoch": 0.502092050209205,
"grad_norm": 0.8243975639343262,
"learning_rate": 3.1866666666666664e-05,
"loss": 0.215,
"step": 240
},
{
"epoch": 0.5230125523012552,
"grad_norm": 0.6084154844284058,
"learning_rate": 3.32e-05,
"loss": 0.2175,
"step": 250
},
{
"epoch": 0.5439330543933054,
"grad_norm": 0.5785391330718994,
"learning_rate": 3.453333333333334e-05,
"loss": 0.2032,
"step": 260
},
{
"epoch": 0.5648535564853556,
"grad_norm": 0.9847169518470764,
"learning_rate": 3.586666666666667e-05,
"loss": 0.2173,
"step": 270
},
{
"epoch": 0.5857740585774058,
"grad_norm": 0.3699074983596802,
"learning_rate": 3.72e-05,
"loss": 0.1937,
"step": 280
},
{
"epoch": 0.606694560669456,
"grad_norm": 0.4798552095890045,
"learning_rate": 3.853333333333334e-05,
"loss": 0.1952,
"step": 290
},
{
"epoch": 0.6276150627615062,
"grad_norm": 0.5542566776275635,
"learning_rate": 3.986666666666667e-05,
"loss": 0.1954,
"step": 300
},
{
"epoch": 0.6485355648535565,
"grad_norm": 0.5909680724143982,
"learning_rate": 4.12e-05,
"loss": 0.1852,
"step": 310
},
{
"epoch": 0.6694560669456067,
"grad_norm": 0.6381629109382629,
"learning_rate": 4.2533333333333335e-05,
"loss": 0.1828,
"step": 320
},
{
"epoch": 0.6903765690376569,
"grad_norm": 0.4231165647506714,
"learning_rate": 4.3866666666666665e-05,
"loss": 0.1728,
"step": 330
},
{
"epoch": 0.7112970711297071,
"grad_norm": 0.5259320735931396,
"learning_rate": 4.52e-05,
"loss": 0.1665,
"step": 340
},
{
"epoch": 0.7322175732217573,
"grad_norm": 0.49583449959754944,
"learning_rate": 4.653333333333334e-05,
"loss": 0.1569,
"step": 350
},
{
"epoch": 0.7531380753138075,
"grad_norm": 0.7853049039840698,
"learning_rate": 4.7866666666666674e-05,
"loss": 0.1647,
"step": 360
},
{
"epoch": 0.7740585774058577,
"grad_norm": 0.49986371397972107,
"learning_rate": 4.92e-05,
"loss": 0.1587,
"step": 370
},
{
"epoch": 0.7949790794979079,
"grad_norm": 0.8034011125564575,
"learning_rate": 5.053333333333333e-05,
"loss": 0.1586,
"step": 380
},
{
"epoch": 0.8158995815899581,
"grad_norm": 0.48629796504974365,
"learning_rate": 5.1866666666666676e-05,
"loss": 0.1534,
"step": 390
},
{
"epoch": 0.8368200836820083,
"grad_norm": 0.4972072243690491,
"learning_rate": 5.3200000000000006e-05,
"loss": 0.1568,
"step": 400
},
{
"epoch": 0.8577405857740585,
"grad_norm": 0.8214737176895142,
"learning_rate": 5.4533333333333335e-05,
"loss": 0.1591,
"step": 410
},
{
"epoch": 0.8786610878661087,
"grad_norm": 0.6962748169898987,
"learning_rate": 5.5866666666666665e-05,
"loss": 0.1534,
"step": 420
},
{
"epoch": 0.899581589958159,
"grad_norm": 0.4874151647090912,
"learning_rate": 5.72e-05,
"loss": 0.1607,
"step": 430
},
{
"epoch": 0.9205020920502092,
"grad_norm": 0.8123170137405396,
"learning_rate": 5.853333333333334e-05,
"loss": 0.1557,
"step": 440
},
{
"epoch": 0.9414225941422594,
"grad_norm": 0.6196077466011047,
"learning_rate": 5.9866666666666674e-05,
"loss": 0.1487,
"step": 450
},
{
"epoch": 0.9623430962343096,
"grad_norm": 0.5131699442863464,
"learning_rate": 6.12e-05,
"loss": 0.1706,
"step": 460
},
{
"epoch": 0.9832635983263598,
"grad_norm": 0.4041496217250824,
"learning_rate": 6.253333333333333e-05,
"loss": 0.157,
"step": 470
},
{
"epoch": 1.00418410041841,
"grad_norm": 0.4113733172416687,
"learning_rate": 6.386666666666667e-05,
"loss": 0.143,
"step": 480
},
{
"epoch": 1.0251046025104602,
"grad_norm": 0.5570617914199829,
"learning_rate": 6.52e-05,
"loss": 0.1518,
"step": 490
},
{
"epoch": 1.0460251046025104,
"grad_norm": 0.47739163041114807,
"learning_rate": 6.653333333333334e-05,
"loss": 0.1356,
"step": 500
},
{
"epoch": 1.0669456066945606,
"grad_norm": 0.6506455540657043,
"learning_rate": 6.786666666666667e-05,
"loss": 0.1391,
"step": 510
},
{
"epoch": 1.0878661087866108,
"grad_norm": 0.597764790058136,
"learning_rate": 6.92e-05,
"loss": 0.1471,
"step": 520
},
{
"epoch": 1.108786610878661,
"grad_norm": 0.4553702771663666,
"learning_rate": 7.053333333333334e-05,
"loss": 0.1454,
"step": 530
},
{
"epoch": 1.1297071129707112,
"grad_norm": 0.47173255681991577,
"learning_rate": 7.186666666666667e-05,
"loss": 0.1429,
"step": 540
},
{
"epoch": 1.1506276150627615,
"grad_norm": 0.49343475699424744,
"learning_rate": 7.32e-05,
"loss": 0.1314,
"step": 550
},
{
"epoch": 1.1715481171548117,
"grad_norm": 0.39795058965682983,
"learning_rate": 7.453333333333333e-05,
"loss": 0.1262,
"step": 560
},
{
"epoch": 1.1924686192468619,
"grad_norm": 0.40215185284614563,
"learning_rate": 7.586666666666668e-05,
"loss": 0.1221,
"step": 570
},
{
"epoch": 1.213389121338912,
"grad_norm": 0.5300595760345459,
"learning_rate": 7.72e-05,
"loss": 0.139,
"step": 580
},
{
"epoch": 1.2343096234309623,
"grad_norm": 1.0076900720596313,
"learning_rate": 7.853333333333334e-05,
"loss": 0.1345,
"step": 590
},
{
"epoch": 1.2552301255230125,
"grad_norm": 0.5394683480262756,
"learning_rate": 7.986666666666667e-05,
"loss": 0.138,
"step": 600
},
{
"epoch": 1.2761506276150627,
"grad_norm": 0.5423910617828369,
"learning_rate": 8.120000000000001e-05,
"loss": 0.1392,
"step": 610
},
{
"epoch": 1.297071129707113,
"grad_norm": 0.35370203852653503,
"learning_rate": 8.253333333333334e-05,
"loss": 0.1206,
"step": 620
},
{
"epoch": 1.3179916317991631,
"grad_norm": 0.35600051283836365,
"learning_rate": 8.386666666666667e-05,
"loss": 0.1235,
"step": 630
},
{
"epoch": 1.3389121338912133,
"grad_norm": 0.43719682097435,
"learning_rate": 8.52e-05,
"loss": 0.1179,
"step": 640
},
{
"epoch": 1.3598326359832635,
"grad_norm": 0.4204005300998688,
"learning_rate": 8.653333333333333e-05,
"loss": 0.1307,
"step": 650
},
{
"epoch": 1.3807531380753137,
"grad_norm": 0.4343251883983612,
"learning_rate": 8.786666666666667e-05,
"loss": 0.1217,
"step": 660
},
{
"epoch": 1.401673640167364,
"grad_norm": 0.580028772354126,
"learning_rate": 8.92e-05,
"loss": 0.1355,
"step": 670
},
{
"epoch": 1.4225941422594142,
"grad_norm": 0.41393300890922546,
"learning_rate": 9.053333333333334e-05,
"loss": 0.1247,
"step": 680
},
{
"epoch": 1.4435146443514644,
"grad_norm": 0.40060940384864807,
"learning_rate": 9.186666666666667e-05,
"loss": 0.1258,
"step": 690
},
{
"epoch": 1.4644351464435146,
"grad_norm": 0.4414738714694977,
"learning_rate": 9.320000000000002e-05,
"loss": 0.1262,
"step": 700
},
{
"epoch": 1.4853556485355648,
"grad_norm": 0.444823682308197,
"learning_rate": 9.453333333333335e-05,
"loss": 0.1342,
"step": 710
},
{
"epoch": 1.506276150627615,
"grad_norm": 0.7578327655792236,
"learning_rate": 9.586666666666667e-05,
"loss": 0.1236,
"step": 720
},
{
"epoch": 1.5271966527196654,
"grad_norm": 0.3242191970348358,
"learning_rate": 9.72e-05,
"loss": 0.1155,
"step": 730
},
{
"epoch": 1.5481171548117154,
"grad_norm": 0.34916719794273376,
"learning_rate": 9.853333333333333e-05,
"loss": 0.1088,
"step": 740
},
{
"epoch": 1.5690376569037658,
"grad_norm": 0.34131109714508057,
"learning_rate": 9.986666666666668e-05,
"loss": 0.1152,
"step": 750
},
{
"epoch": 1.5899581589958158,
"grad_norm": 0.54949551820755,
"learning_rate": 9.999990157738453e-05,
"loss": 0.1084,
"step": 760
},
{
"epoch": 1.6108786610878663,
"grad_norm": 0.912358820438385,
"learning_rate": 9.999956135155687e-05,
"loss": 0.1198,
"step": 770
},
{
"epoch": 1.6317991631799162,
"grad_norm": 0.45504382252693176,
"learning_rate": 9.99989781090763e-05,
"loss": 0.125,
"step": 780
},
{
"epoch": 1.6527196652719667,
"grad_norm": 0.5051805377006531,
"learning_rate": 9.999815185277755e-05,
"loss": 0.1144,
"step": 790
},
{
"epoch": 1.6736401673640167,
"grad_norm": 0.36151519417762756,
"learning_rate": 9.999708258667652e-05,
"loss": 0.1073,
"step": 800
},
{
"epoch": 1.694560669456067,
"grad_norm": 0.4040098190307617,
"learning_rate": 9.999577031597029e-05,
"loss": 0.124,
"step": 810
},
{
"epoch": 1.715481171548117,
"grad_norm": 0.4677584767341614,
"learning_rate": 9.999421504703696e-05,
"loss": 0.1061,
"step": 820
},
{
"epoch": 1.7364016736401675,
"grad_norm": 0.4312772750854492,
"learning_rate": 9.999241678743574e-05,
"loss": 0.1088,
"step": 830
},
{
"epoch": 1.7573221757322175,
"grad_norm": 0.44205185770988464,
"learning_rate": 9.999037554590683e-05,
"loss": 0.107,
"step": 840
},
{
"epoch": 1.778242677824268,
"grad_norm": 0.34728363156318665,
"learning_rate": 9.998809133237143e-05,
"loss": 0.1246,
"step": 850
},
{
"epoch": 1.799163179916318,
"grad_norm": 0.3395802676677704,
"learning_rate": 9.998556415793169e-05,
"loss": 0.1001,
"step": 860
},
{
"epoch": 1.8200836820083683,
"grad_norm": 0.3991422951221466,
"learning_rate": 9.998279403487062e-05,
"loss": 0.109,
"step": 870
},
{
"epoch": 1.8410041841004183,
"grad_norm": 0.2969718277454376,
"learning_rate": 9.997978097665205e-05,
"loss": 0.1016,
"step": 880
},
{
"epoch": 1.8619246861924688,
"grad_norm": 0.3824094533920288,
"learning_rate": 9.99765249979206e-05,
"loss": 0.111,
"step": 890
},
{
"epoch": 1.8828451882845187,
"grad_norm": 0.32365161180496216,
"learning_rate": 9.997302611450154e-05,
"loss": 0.1102,
"step": 900
},
{
"epoch": 1.9037656903765692,
"grad_norm": 0.34358587861061096,
"learning_rate": 9.996928434340073e-05,
"loss": 0.1023,
"step": 910
},
{
"epoch": 1.9246861924686192,
"grad_norm": 0.36740976572036743,
"learning_rate": 9.996529970280462e-05,
"loss": 0.1098,
"step": 920
},
{
"epoch": 1.9456066945606696,
"grad_norm": 0.29451724886894226,
"learning_rate": 9.996107221208004e-05,
"loss": 0.1028,
"step": 930
},
{
"epoch": 1.9665271966527196,
"grad_norm": 0.37186339497566223,
"learning_rate": 9.995660189177419e-05,
"loss": 0.0943,
"step": 940
},
{
"epoch": 1.98744769874477,
"grad_norm": 0.3423303961753845,
"learning_rate": 9.995188876361451e-05,
"loss": 0.1076,
"step": 950
},
{
"epoch": 2.00836820083682,
"grad_norm": 0.3213839530944824,
"learning_rate": 9.994693285050857e-05,
"loss": 0.1159,
"step": 960
},
{
"epoch": 2.0292887029288704,
"grad_norm": 0.3634227216243744,
"learning_rate": 9.994173417654395e-05,
"loss": 0.1028,
"step": 970
},
{
"epoch": 2.0502092050209204,
"grad_norm": 0.5577953457832336,
"learning_rate": 9.993629276698821e-05,
"loss": 0.1018,
"step": 980
},
{
"epoch": 2.071129707112971,
"grad_norm": 0.5105161070823669,
"learning_rate": 9.993060864828858e-05,
"loss": 0.1103,
"step": 990
},
{
"epoch": 2.092050209205021,
"grad_norm": 0.458475798368454,
"learning_rate": 9.992468184807206e-05,
"loss": 0.1064,
"step": 1000
},
{
"epoch": 2.1129707112970713,
"grad_norm": 0.5843384265899658,
"learning_rate": 9.991851239514511e-05,
"loss": 0.1124,
"step": 1010
},
{
"epoch": 2.1338912133891212,
"grad_norm": 0.5279590487480164,
"learning_rate": 9.991210031949359e-05,
"loss": 0.1015,
"step": 1020
},
{
"epoch": 2.1548117154811717,
"grad_norm": 0.39147329330444336,
"learning_rate": 9.990544565228259e-05,
"loss": 0.1089,
"step": 1030
},
{
"epoch": 2.1757322175732217,
"grad_norm": 0.37064120173454285,
"learning_rate": 9.989854842585631e-05,
"loss": 0.1114,
"step": 1040
},
{
"epoch": 2.196652719665272,
"grad_norm": 0.3259464502334595,
"learning_rate": 9.989140867373783e-05,
"loss": 0.0913,
"step": 1050
},
{
"epoch": 2.217573221757322,
"grad_norm": 0.3360336124897003,
"learning_rate": 9.988402643062907e-05,
"loss": 0.1062,
"step": 1060
},
{
"epoch": 2.2384937238493725,
"grad_norm": 0.29747986793518066,
"learning_rate": 9.987640173241046e-05,
"loss": 0.111,
"step": 1070
},
{
"epoch": 2.2594142259414225,
"grad_norm": 0.4818101227283478,
"learning_rate": 9.986853461614093e-05,
"loss": 0.1023,
"step": 1080
},
{
"epoch": 2.280334728033473,
"grad_norm": 0.42483437061309814,
"learning_rate": 9.986042512005763e-05,
"loss": 0.0998,
"step": 1090
},
{
"epoch": 2.301255230125523,
"grad_norm": 0.3709806501865387,
"learning_rate": 9.985207328357573e-05,
"loss": 0.1011,
"step": 1100
},
{
"epoch": 2.3221757322175733,
"grad_norm": 0.2622065246105194,
"learning_rate": 9.984347914728829e-05,
"loss": 0.0996,
"step": 1110
},
{
"epoch": 2.3430962343096233,
"grad_norm": 0.37095901370048523,
"learning_rate": 9.983464275296605e-05,
"loss": 0.1018,
"step": 1120
},
{
"epoch": 2.3640167364016738,
"grad_norm": 0.28311190009117126,
"learning_rate": 9.982556414355724e-05,
"loss": 0.1063,
"step": 1130
},
{
"epoch": 2.3849372384937237,
"grad_norm": 0.372185081243515,
"learning_rate": 9.981624336318726e-05,
"loss": 0.1042,
"step": 1140
},
{
"epoch": 2.405857740585774,
"grad_norm": 0.2405327707529068,
"learning_rate": 9.980668045715864e-05,
"loss": 0.0931,
"step": 1150
},
{
"epoch": 2.426778242677824,
"grad_norm": 0.34624001383781433,
"learning_rate": 9.979687547195066e-05,
"loss": 0.0981,
"step": 1160
},
{
"epoch": 2.4476987447698746,
"grad_norm": 0.4329279363155365,
"learning_rate": 9.978682845521927e-05,
"loss": 0.108,
"step": 1170
},
{
"epoch": 2.4686192468619246,
"grad_norm": 0.5653864145278931,
"learning_rate": 9.977653945579673e-05,
"loss": 0.1026,
"step": 1180
},
{
"epoch": 2.489539748953975,
"grad_norm": 0.27248165011405945,
"learning_rate": 9.976600852369144e-05,
"loss": 0.0928,
"step": 1190
},
{
"epoch": 2.510460251046025,
"grad_norm": 0.3201051354408264,
"learning_rate": 9.975523571008769e-05,
"loss": 0.1024,
"step": 1200
},
{
"epoch": 2.5313807531380754,
"grad_norm": 0.3279741704463959,
"learning_rate": 9.97442210673454e-05,
"loss": 0.1023,
"step": 1210
},
{
"epoch": 2.5523012552301254,
"grad_norm": 0.2990843653678894,
"learning_rate": 9.973296464899988e-05,
"loss": 0.0938,
"step": 1220
},
{
"epoch": 2.573221757322176,
"grad_norm": 0.31866946816444397,
"learning_rate": 9.972146650976154e-05,
"loss": 0.1018,
"step": 1230
},
{
"epoch": 2.594142259414226,
"grad_norm": 0.2663305401802063,
"learning_rate": 9.970972670551566e-05,
"loss": 0.0984,
"step": 1240
},
{
"epoch": 2.6150627615062763,
"grad_norm": 0.33940839767456055,
"learning_rate": 9.969774529332212e-05,
"loss": 0.0988,
"step": 1250
},
{
"epoch": 2.6359832635983262,
"grad_norm": 0.2570817172527313,
"learning_rate": 9.968552233141504e-05,
"loss": 0.0978,
"step": 1260
},
{
"epoch": 2.6569037656903767,
"grad_norm": 0.3066076636314392,
"learning_rate": 9.967305787920264e-05,
"loss": 0.0952,
"step": 1270
},
{
"epoch": 2.6778242677824267,
"grad_norm": 0.2956596910953522,
"learning_rate": 9.966035199726684e-05,
"loss": 0.0925,
"step": 1280
},
{
"epoch": 2.698744769874477,
"grad_norm": 0.4384233057498932,
"learning_rate": 9.9647404747363e-05,
"loss": 0.1044,
"step": 1290
},
{
"epoch": 2.719665271966527,
"grad_norm": 0.3082908093929291,
"learning_rate": 9.96342161924196e-05,
"loss": 0.099,
"step": 1300
},
{
"epoch": 2.7405857740585775,
"grad_norm": 0.5076243877410889,
"learning_rate": 9.962078639653797e-05,
"loss": 0.0925,
"step": 1310
},
{
"epoch": 2.7615062761506275,
"grad_norm": 0.3102075159549713,
"learning_rate": 9.960711542499202e-05,
"loss": 0.0996,
"step": 1320
},
{
"epoch": 2.782426778242678,
"grad_norm": 0.31955546140670776,
"learning_rate": 9.959320334422772e-05,
"loss": 0.0889,
"step": 1330
},
{
"epoch": 2.803347280334728,
"grad_norm": 0.27515116333961487,
"learning_rate": 9.957905022186309e-05,
"loss": 0.0902,
"step": 1340
},
{
"epoch": 2.8242677824267783,
"grad_norm": 0.31273916363716125,
"learning_rate": 9.956465612668757e-05,
"loss": 0.0889,
"step": 1350
},
{
"epoch": 2.8451882845188283,
"grad_norm": 0.36738961935043335,
"learning_rate": 9.95500211286619e-05,
"loss": 0.0961,
"step": 1360
},
{
"epoch": 2.8661087866108788,
"grad_norm": 0.3078056275844574,
"learning_rate": 9.953514529891763e-05,
"loss": 0.0804,
"step": 1370
},
{
"epoch": 2.8870292887029287,
"grad_norm": 0.23031267523765564,
"learning_rate": 9.952002870975693e-05,
"loss": 0.0906,
"step": 1380
},
{
"epoch": 2.907949790794979,
"grad_norm": 0.2518852651119232,
"learning_rate": 9.950467143465207e-05,
"loss": 0.084,
"step": 1390
},
{
"epoch": 2.928870292887029,
"grad_norm": 0.4326651394367218,
"learning_rate": 9.94890735482452e-05,
"loss": 0.0828,
"step": 1400
},
{
"epoch": 2.9497907949790796,
"grad_norm": 0.26489028334617615,
"learning_rate": 9.947323512634788e-05,
"loss": 0.088,
"step": 1410
},
{
"epoch": 2.9707112970711296,
"grad_norm": 0.286268025636673,
"learning_rate": 9.945715624594081e-05,
"loss": 0.0901,
"step": 1420
},
{
"epoch": 2.99163179916318,
"grad_norm": 0.40001794695854187,
"learning_rate": 9.944083698517339e-05,
"loss": 0.0913,
"step": 1430
},
{
"epoch": 3.01255230125523,
"grad_norm": 0.26771172881126404,
"learning_rate": 9.942427742336334e-05,
"loss": 0.0883,
"step": 1440
},
{
"epoch": 3.0334728033472804,
"grad_norm": 0.3441821038722992,
"learning_rate": 9.940747764099638e-05,
"loss": 0.0867,
"step": 1450
},
{
"epoch": 3.0543933054393304,
"grad_norm": 0.23330751061439514,
"learning_rate": 9.939043771972574e-05,
"loss": 0.0899,
"step": 1460
},
{
"epoch": 3.075313807531381,
"grad_norm": 0.3174346685409546,
"learning_rate": 9.937315774237186e-05,
"loss": 0.0854,
"step": 1470
},
{
"epoch": 3.096234309623431,
"grad_norm": 0.2819070518016815,
"learning_rate": 9.93556377929219e-05,
"loss": 0.0945,
"step": 1480
},
{
"epoch": 3.1171548117154813,
"grad_norm": 0.2613016664981842,
"learning_rate": 9.933787795652942e-05,
"loss": 0.0963,
"step": 1490
},
{
"epoch": 3.1380753138075312,
"grad_norm": 0.32942673563957214,
"learning_rate": 9.931987831951386e-05,
"loss": 0.096,
"step": 1500
},
{
"epoch": 3.1589958158995817,
"grad_norm": 0.36867812275886536,
"learning_rate": 9.930163896936027e-05,
"loss": 0.1027,
"step": 1510
},
{
"epoch": 3.1799163179916317,
"grad_norm": 0.31028738617897034,
"learning_rate": 9.92831599947187e-05,
"loss": 0.0957,
"step": 1520
},
{
"epoch": 3.200836820083682,
"grad_norm": 0.23857052624225616,
"learning_rate": 9.926444148540393e-05,
"loss": 0.0869,
"step": 1530
},
{
"epoch": 3.221757322175732,
"grad_norm": 0.30246081948280334,
"learning_rate": 9.924548353239495e-05,
"loss": 0.0872,
"step": 1540
},
{
"epoch": 3.2426778242677825,
"grad_norm": 0.2901047170162201,
"learning_rate": 9.922628622783451e-05,
"loss": 0.1011,
"step": 1550
},
{
"epoch": 3.2635983263598325,
"grad_norm": 0.40179872512817383,
"learning_rate": 9.920684966502878e-05,
"loss": 0.0882,
"step": 1560
},
{
"epoch": 3.284518828451883,
"grad_norm": 0.29192686080932617,
"learning_rate": 9.918717393844669e-05,
"loss": 0.0835,
"step": 1570
},
{
"epoch": 3.305439330543933,
"grad_norm": 0.38771605491638184,
"learning_rate": 9.916725914371969e-05,
"loss": 0.0852,
"step": 1580
},
{
"epoch": 3.3263598326359833,
"grad_norm": 0.219853475689888,
"learning_rate": 9.914710537764117e-05,
"loss": 0.089,
"step": 1590
},
{
"epoch": 3.3472803347280333,
"grad_norm": 0.33001509308815,
"learning_rate": 9.912671273816601e-05,
"loss": 0.0893,
"step": 1600
},
{
"epoch": 3.3682008368200838,
"grad_norm": 0.23087120056152344,
"learning_rate": 9.910608132441008e-05,
"loss": 0.0847,
"step": 1610
},
{
"epoch": 3.3891213389121337,
"grad_norm": 0.2661932706832886,
"learning_rate": 9.908521123664981e-05,
"loss": 0.084,
"step": 1620
},
{
"epoch": 3.410041841004184,
"grad_norm": 0.2942771017551422,
"learning_rate": 9.906410257632168e-05,
"loss": 0.0785,
"step": 1630
},
{
"epoch": 3.430962343096234,
"grad_norm": 0.22297202050685883,
"learning_rate": 9.904275544602169e-05,
"loss": 0.0827,
"step": 1640
},
{
"epoch": 3.4518828451882846,
"grad_norm": 0.31877461075782776,
"learning_rate": 9.902116994950493e-05,
"loss": 0.0987,
"step": 1650
},
{
"epoch": 3.4728033472803346,
"grad_norm": 0.403152734041214,
"learning_rate": 9.899934619168501e-05,
"loss": 0.0859,
"step": 1660
},
{
"epoch": 3.493723849372385,
"grad_norm": 0.29642191529273987,
"learning_rate": 9.89772842786336e-05,
"loss": 0.0838,
"step": 1670
},
{
"epoch": 3.514644351464435,
"grad_norm": 0.3551464080810547,
"learning_rate": 9.895498431757989e-05,
"loss": 0.0903,
"step": 1680
},
{
"epoch": 3.5355648535564854,
"grad_norm": 0.3941759169101715,
"learning_rate": 9.893244641691006e-05,
"loss": 0.0916,
"step": 1690
},
{
"epoch": 3.5564853556485354,
"grad_norm": 0.2608686685562134,
"learning_rate": 9.890967068616677e-05,
"loss": 0.0832,
"step": 1700
},
{
"epoch": 3.577405857740586,
"grad_norm": 0.25500214099884033,
"learning_rate": 9.888665723604864e-05,
"loss": 0.0846,
"step": 1710
},
{
"epoch": 3.598326359832636,
"grad_norm": 0.2380712330341339,
"learning_rate": 9.886340617840968e-05,
"loss": 0.0902,
"step": 1720
},
{
"epoch": 3.6192468619246863,
"grad_norm": 0.2662540376186371,
"learning_rate": 9.883991762625876e-05,
"loss": 0.0843,
"step": 1730
},
{
"epoch": 3.6401673640167362,
"grad_norm": 0.33657851815223694,
"learning_rate": 9.881619169375908e-05,
"loss": 0.0852,
"step": 1740
},
{
"epoch": 3.6610878661087867,
"grad_norm": 0.25955089926719666,
"learning_rate": 9.879222849622758e-05,
"loss": 0.0827,
"step": 1750
},
{
"epoch": 3.6820083682008367,
"grad_norm": 0.2592358887195587,
"learning_rate": 9.876802815013439e-05,
"loss": 0.0943,
"step": 1760
},
{
"epoch": 3.702928870292887,
"grad_norm": 0.32307639718055725,
"learning_rate": 9.87435907731023e-05,
"loss": 0.0734,
"step": 1770
},
{
"epoch": 3.723849372384937,
"grad_norm": 0.22112824022769928,
"learning_rate": 9.871891648390614e-05,
"loss": 0.0886,
"step": 1780
},
{
"epoch": 3.7447698744769875,
"grad_norm": 0.32087430357933044,
"learning_rate": 9.869400540247223e-05,
"loss": 0.0882,
"step": 1790
},
{
"epoch": 3.7656903765690375,
"grad_norm": 0.30099472403526306,
"learning_rate": 9.866885764987776e-05,
"loss": 0.0871,
"step": 1800
},
{
"epoch": 3.786610878661088,
"grad_norm": 0.2685706913471222,
"learning_rate": 9.86434733483503e-05,
"loss": 0.0805,
"step": 1810
},
{
"epoch": 3.8075313807531384,
"grad_norm": 0.27751418948173523,
"learning_rate": 9.861785262126705e-05,
"loss": 0.0841,
"step": 1820
},
{
"epoch": 3.8284518828451883,
"grad_norm": 0.28366604447364807,
"learning_rate": 9.85919955931544e-05,
"loss": 0.0846,
"step": 1830
},
{
"epoch": 3.8493723849372383,
"grad_norm": 0.24972891807556152,
"learning_rate": 9.856590238968721e-05,
"loss": 0.0792,
"step": 1840
},
{
"epoch": 3.8702928870292888,
"grad_norm": 0.22181373834609985,
"learning_rate": 9.853957313768824e-05,
"loss": 0.0808,
"step": 1850
},
{
"epoch": 3.891213389121339,
"grad_norm": 0.36809661984443665,
"learning_rate": 9.851300796512755e-05,
"loss": 0.0895,
"step": 1860
},
{
"epoch": 3.912133891213389,
"grad_norm": 0.37876737117767334,
"learning_rate": 9.848620700112188e-05,
"loss": 0.0873,
"step": 1870
},
{
"epoch": 3.933054393305439,
"grad_norm": 0.2749776840209961,
"learning_rate": 9.845917037593396e-05,
"loss": 0.0798,
"step": 1880
},
{
"epoch": 3.9539748953974896,
"grad_norm": 0.28249871730804443,
"learning_rate": 9.843189822097196e-05,
"loss": 0.0772,
"step": 1890
},
{
"epoch": 3.97489539748954,
"grad_norm": 0.26489949226379395,
"learning_rate": 9.84043906687888e-05,
"loss": 0.0871,
"step": 1900
},
{
"epoch": 3.99581589958159,
"grad_norm": 0.2860475778579712,
"learning_rate": 9.837664785308149e-05,
"loss": 0.0934,
"step": 1910
},
{
"epoch": 4.01673640167364,
"grad_norm": 0.33550700545310974,
"learning_rate": 9.834866990869059e-05,
"loss": 0.0844,
"step": 1920
},
{
"epoch": 4.03765690376569,
"grad_norm": 0.2818957567214966,
"learning_rate": 9.832045697159938e-05,
"loss": 0.0852,
"step": 1930
},
{
"epoch": 4.058577405857741,
"grad_norm": 0.36980465054512024,
"learning_rate": 9.829200917893334e-05,
"loss": 0.0922,
"step": 1940
},
{
"epoch": 4.079497907949791,
"grad_norm": 0.3046676516532898,
"learning_rate": 9.826332666895944e-05,
"loss": 0.0867,
"step": 1950
},
{
"epoch": 4.100418410041841,
"grad_norm": 0.3276398777961731,
"learning_rate": 9.823440958108545e-05,
"loss": 0.0764,
"step": 1960
},
{
"epoch": 4.121338912133891,
"grad_norm": 0.2901808023452759,
"learning_rate": 9.820525805585927e-05,
"loss": 0.0796,
"step": 1970
},
{
"epoch": 4.142259414225942,
"grad_norm": 0.27789413928985596,
"learning_rate": 9.81758722349683e-05,
"loss": 0.0897,
"step": 1980
},
{
"epoch": 4.163179916317992,
"grad_norm": 0.23803479969501495,
"learning_rate": 9.814625226123862e-05,
"loss": 0.0905,
"step": 1990
},
{
"epoch": 4.184100418410042,
"grad_norm": 0.27403533458709717,
"learning_rate": 9.811639827863449e-05,
"loss": 0.0832,
"step": 2000
},
{
"epoch": 4.205020920502092,
"grad_norm": 0.2629660964012146,
"learning_rate": 9.808631043225741e-05,
"loss": 0.0755,
"step": 2010
},
{
"epoch": 4.2259414225941425,
"grad_norm": 0.34758999943733215,
"learning_rate": 9.805598886834567e-05,
"loss": 0.0803,
"step": 2020
},
{
"epoch": 4.2468619246861925,
"grad_norm": 0.23933303356170654,
"learning_rate": 9.802543373427344e-05,
"loss": 0.0889,
"step": 2030
},
{
"epoch": 4.2677824267782425,
"grad_norm": 0.24562481045722961,
"learning_rate": 9.799464517855018e-05,
"loss": 0.0824,
"step": 2040
},
{
"epoch": 4.2887029288702925,
"grad_norm": 0.31750568747520447,
"learning_rate": 9.79636233508198e-05,
"loss": 0.079,
"step": 2050
},
{
"epoch": 4.309623430962343,
"grad_norm": 0.2367181032896042,
"learning_rate": 9.793236840186005e-05,
"loss": 0.0757,
"step": 2060
},
{
"epoch": 4.330543933054393,
"grad_norm": 0.22896745800971985,
"learning_rate": 9.790088048358175e-05,
"loss": 0.0712,
"step": 2070
},
{
"epoch": 4.351464435146443,
"grad_norm": 0.26197147369384766,
"learning_rate": 9.786915974902798e-05,
"loss": 0.0812,
"step": 2080
},
{
"epoch": 4.372384937238493,
"grad_norm": 0.2804318964481354,
"learning_rate": 9.783720635237343e-05,
"loss": 0.067,
"step": 2090
},
{
"epoch": 4.393305439330544,
"grad_norm": 0.2822119891643524,
"learning_rate": 9.780502044892362e-05,
"loss": 0.0803,
"step": 2100
},
{
"epoch": 4.414225941422594,
"grad_norm": 0.2411976307630539,
"learning_rate": 9.777260219511415e-05,
"loss": 0.0721,
"step": 2110
},
{
"epoch": 4.435146443514644,
"grad_norm": 0.3058140277862549,
"learning_rate": 9.773995174850989e-05,
"loss": 0.0763,
"step": 2120
},
{
"epoch": 4.456066945606695,
"grad_norm": 0.34904828667640686,
"learning_rate": 9.770706926780428e-05,
"loss": 0.084,
"step": 2130
},
{
"epoch": 4.476987447698745,
"grad_norm": 0.21338161826133728,
"learning_rate": 9.767395491281855e-05,
"loss": 0.0714,
"step": 2140
},
{
"epoch": 4.497907949790795,
"grad_norm": 0.24260395765304565,
"learning_rate": 9.764060884450086e-05,
"loss": 0.0924,
"step": 2150
},
{
"epoch": 4.518828451882845,
"grad_norm": 0.3183063864707947,
"learning_rate": 9.76070312249257e-05,
"loss": 0.0752,
"step": 2160
},
{
"epoch": 4.539748953974895,
"grad_norm": 0.27661818265914917,
"learning_rate": 9.757322221729283e-05,
"loss": 0.0783,
"step": 2170
},
{
"epoch": 4.560669456066946,
"grad_norm": 0.284467488527298,
"learning_rate": 9.753918198592682e-05,
"loss": 0.072,
"step": 2180
},
{
"epoch": 4.581589958158996,
"grad_norm": 0.24314570426940918,
"learning_rate": 9.750491069627593e-05,
"loss": 0.0799,
"step": 2190
},
{
"epoch": 4.602510460251046,
"grad_norm": 0.23835724592208862,
"learning_rate": 9.747040851491149e-05,
"loss": 0.0711,
"step": 2200
},
{
"epoch": 4.623430962343097,
"grad_norm": 0.2222239226102829,
"learning_rate": 9.743567560952711e-05,
"loss": 0.071,
"step": 2210
},
{
"epoch": 4.644351464435147,
"grad_norm": 0.19406676292419434,
"learning_rate": 9.740071214893773e-05,
"loss": 0.0648,
"step": 2220
},
{
"epoch": 4.665271966527197,
"grad_norm": 0.3223171830177307,
"learning_rate": 9.736551830307892e-05,
"loss": 0.0707,
"step": 2230
},
{
"epoch": 4.686192468619247,
"grad_norm": 0.26071134209632874,
"learning_rate": 9.733009424300597e-05,
"loss": 0.0799,
"step": 2240
},
{
"epoch": 4.707112970711297,
"grad_norm": 0.28553199768066406,
"learning_rate": 9.729444014089314e-05,
"loss": 0.0743,
"step": 2250
},
{
"epoch": 4.7280334728033475,
"grad_norm": 0.3291798532009125,
"learning_rate": 9.725855617003275e-05,
"loss": 0.0789,
"step": 2260
},
{
"epoch": 4.7489539748953975,
"grad_norm": 0.20497122406959534,
"learning_rate": 9.72224425048344e-05,
"loss": 0.0833,
"step": 2270
},
{
"epoch": 4.7698744769874475,
"grad_norm": 0.2501441538333893,
"learning_rate": 9.718609932082405e-05,
"loss": 0.0778,
"step": 2280
},
{
"epoch": 4.790794979079498,
"grad_norm": 0.25415176153182983,
"learning_rate": 9.714952679464323e-05,
"loss": 0.0771,
"step": 2290
},
{
"epoch": 4.811715481171548,
"grad_norm": 0.22852188348770142,
"learning_rate": 9.711272510404816e-05,
"loss": 0.0677,
"step": 2300
},
{
"epoch": 4.832635983263598,
"grad_norm": 0.24375076591968536,
"learning_rate": 9.70756944279089e-05,
"loss": 0.0769,
"step": 2310
},
{
"epoch": 4.853556485355648,
"grad_norm": 0.2883508503437042,
"learning_rate": 9.70384349462084e-05,
"loss": 0.0756,
"step": 2320
},
{
"epoch": 4.874476987447698,
"grad_norm": 0.29686424136161804,
"learning_rate": 9.700094684004182e-05,
"loss": 0.0782,
"step": 2330
},
{
"epoch": 4.895397489539749,
"grad_norm": 0.22778484225273132,
"learning_rate": 9.696323029161535e-05,
"loss": 0.0822,
"step": 2340
},
{
"epoch": 4.916317991631799,
"grad_norm": 0.25161683559417725,
"learning_rate": 9.692528548424567e-05,
"loss": 0.0755,
"step": 2350
},
{
"epoch": 4.937238493723849,
"grad_norm": 0.20022211968898773,
"learning_rate": 9.688711260235872e-05,
"loss": 0.0795,
"step": 2360
},
{
"epoch": 4.9581589958159,
"grad_norm": 0.4135867953300476,
"learning_rate": 9.684871183148912e-05,
"loss": 0.0781,
"step": 2370
},
{
"epoch": 4.97907949790795,
"grad_norm": 0.19854891300201416,
"learning_rate": 9.681008335827898e-05,
"loss": 0.0729,
"step": 2380
},
{
"epoch": 5.0,
"grad_norm": 1.996017336845398,
"learning_rate": 9.677122737047724e-05,
"loss": 0.0775,
"step": 2390
},
{
"epoch": 5.02092050209205,
"grad_norm": 0.34404969215393066,
"learning_rate": 9.673214405693857e-05,
"loss": 0.0845,
"step": 2400
},
{
"epoch": 5.0418410041841,
"grad_norm": 0.27230435609817505,
"learning_rate": 9.669283360762258e-05,
"loss": 0.0745,
"step": 2410
},
{
"epoch": 5.062761506276151,
"grad_norm": 0.3299899995326996,
"learning_rate": 9.66532962135928e-05,
"loss": 0.0708,
"step": 2420
},
{
"epoch": 5.083682008368201,
"grad_norm": 0.23986473679542542,
"learning_rate": 9.661353206701582e-05,
"loss": 0.0707,
"step": 2430
},
{
"epoch": 5.104602510460251,
"grad_norm": 0.18780270218849182,
"learning_rate": 9.657354136116035e-05,
"loss": 0.0747,
"step": 2440
},
{
"epoch": 5.125523012552302,
"grad_norm": 0.24139155447483063,
"learning_rate": 9.653332429039625e-05,
"loss": 0.0766,
"step": 2450
},
{
"epoch": 5.146443514644352,
"grad_norm": 0.3180379867553711,
"learning_rate": 9.649288105019356e-05,
"loss": 0.0799,
"step": 2460
},
{
"epoch": 5.167364016736402,
"grad_norm": 0.3000459372997284,
"learning_rate": 9.645221183712165e-05,
"loss": 0.0802,
"step": 2470
},
{
"epoch": 5.188284518828452,
"grad_norm": 0.2193576991558075,
"learning_rate": 9.641131684884817e-05,
"loss": 0.0805,
"step": 2480
},
{
"epoch": 5.209205020920502,
"grad_norm": 0.2673724591732025,
"learning_rate": 9.637019628413813e-05,
"loss": 0.0816,
"step": 2490
},
{
"epoch": 5.2301255230125525,
"grad_norm": 0.2733951807022095,
"learning_rate": 9.632885034285291e-05,
"loss": 0.0731,
"step": 2500
},
{
"epoch": 5.2510460251046025,
"grad_norm": 0.3638957738876343,
"learning_rate": 9.628727922594931e-05,
"loss": 0.0817,
"step": 2510
},
{
"epoch": 5.2719665271966525,
"grad_norm": 0.26588529348373413,
"learning_rate": 9.624548313547862e-05,
"loss": 0.073,
"step": 2520
},
{
"epoch": 5.292887029288703,
"grad_norm": 0.27198341488838196,
"learning_rate": 9.620346227458547e-05,
"loss": 0.0691,
"step": 2530
},
{
"epoch": 5.313807531380753,
"grad_norm": 0.2196781486272812,
"learning_rate": 9.616121684750712e-05,
"loss": 0.0727,
"step": 2540
},
{
"epoch": 5.334728033472803,
"grad_norm": 0.24275390803813934,
"learning_rate": 9.611874705957215e-05,
"loss": 0.082,
"step": 2550
},
{
"epoch": 5.355648535564853,
"grad_norm": 0.18836568295955658,
"learning_rate": 9.607605311719972e-05,
"loss": 0.0833,
"step": 2560
},
{
"epoch": 5.376569037656903,
"grad_norm": 0.20861050486564636,
"learning_rate": 9.603313522789841e-05,
"loss": 0.075,
"step": 2570
},
{
"epoch": 5.397489539748954,
"grad_norm": 0.22330905497074127,
"learning_rate": 9.598999360026529e-05,
"loss": 0.0773,
"step": 2580
},
{
"epoch": 5.418410041841004,
"grad_norm": 0.2990499436855316,
"learning_rate": 9.59466284439849e-05,
"loss": 0.0759,
"step": 2590
},
{
"epoch": 5.439330543933054,
"grad_norm": 0.22060814499855042,
"learning_rate": 9.590303996982815e-05,
"loss": 0.0695,
"step": 2600
},
{
"epoch": 5.460251046025105,
"grad_norm": 0.26148760318756104,
"learning_rate": 9.585922838965145e-05,
"loss": 0.0722,
"step": 2610
},
{
"epoch": 5.481171548117155,
"grad_norm": 0.2634856402873993,
"learning_rate": 9.581519391639549e-05,
"loss": 0.0719,
"step": 2620
},
{
"epoch": 5.502092050209205,
"grad_norm": 0.26813215017318726,
"learning_rate": 9.577093676408439e-05,
"loss": 0.0709,
"step": 2630
},
{
"epoch": 5.523012552301255,
"grad_norm": 0.27922987937927246,
"learning_rate": 9.572645714782453e-05,
"loss": 0.0748,
"step": 2640
},
{
"epoch": 5.543933054393305,
"grad_norm": 0.29743626713752747,
"learning_rate": 9.568175528380354e-05,
"loss": 0.0702,
"step": 2650
},
{
"epoch": 5.564853556485356,
"grad_norm": 0.27912113070487976,
"learning_rate": 9.56368313892893e-05,
"loss": 0.088,
"step": 2660
},
{
"epoch": 5.585774058577406,
"grad_norm": 0.23611173033714294,
"learning_rate": 9.55916856826288e-05,
"loss": 0.0751,
"step": 2670
},
{
"epoch": 5.606694560669456,
"grad_norm": 0.19908355176448822,
"learning_rate": 9.554631838324713e-05,
"loss": 0.0765,
"step": 2680
},
{
"epoch": 5.627615062761507,
"grad_norm": 0.181325763463974,
"learning_rate": 9.55007297116464e-05,
"loss": 0.0723,
"step": 2690
},
{
"epoch": 5.648535564853557,
"grad_norm": 0.22809170186519623,
"learning_rate": 9.545491988940472e-05,
"loss": 0.0817,
"step": 2700
},
{
"epoch": 5.669456066945607,
"grad_norm": 0.24609525501728058,
"learning_rate": 9.540888913917501e-05,
"loss": 0.0729,
"step": 2710
},
{
"epoch": 5.690376569037657,
"grad_norm": 0.3130713105201721,
"learning_rate": 9.536263768468401e-05,
"loss": 0.0743,
"step": 2720
},
{
"epoch": 5.711297071129707,
"grad_norm": 0.27948838472366333,
"learning_rate": 9.531616575073117e-05,
"loss": 0.0759,
"step": 2730
},
{
"epoch": 5.7322175732217575,
"grad_norm": 0.20584532618522644,
"learning_rate": 9.526947356318754e-05,
"loss": 0.0747,
"step": 2740
},
{
"epoch": 5.7531380753138075,
"grad_norm": 0.27664265036582947,
"learning_rate": 9.52225613489947e-05,
"loss": 0.0679,
"step": 2750
},
{
"epoch": 5.7740585774058575,
"grad_norm": 0.24720332026481628,
"learning_rate": 9.517542933616365e-05,
"loss": 0.0736,
"step": 2760
},
{
"epoch": 5.794979079497908,
"grad_norm": 0.23605448007583618,
"learning_rate": 9.512807775377366e-05,
"loss": 0.0719,
"step": 2770
},
{
"epoch": 5.815899581589958,
"grad_norm": 0.25352632999420166,
"learning_rate": 9.508050683197121e-05,
"loss": 0.0706,
"step": 2780
},
{
"epoch": 5.836820083682008,
"grad_norm": 0.2646510899066925,
"learning_rate": 9.503271680196888e-05,
"loss": 0.079,
"step": 2790
},
{
"epoch": 5.857740585774058,
"grad_norm": 0.2195678949356079,
"learning_rate": 9.498470789604413e-05,
"loss": 0.0652,
"step": 2800
},
{
"epoch": 5.878661087866108,
"grad_norm": 0.23384016752243042,
"learning_rate": 9.49364803475383e-05,
"loss": 0.0813,
"step": 2810
},
{
"epoch": 5.899581589958159,
"grad_norm": 0.2790519595146179,
"learning_rate": 9.48880343908554e-05,
"loss": 0.0689,
"step": 2820
},
{
"epoch": 5.920502092050209,
"grad_norm": 0.2614765465259552,
"learning_rate": 9.4839370261461e-05,
"loss": 0.0739,
"step": 2830
},
{
"epoch": 5.941422594142259,
"grad_norm": 0.20076067745685577,
"learning_rate": 9.479048819588098e-05,
"loss": 0.064,
"step": 2840
},
{
"epoch": 5.96234309623431,
"grad_norm": 0.19265033304691315,
"learning_rate": 9.474138843170063e-05,
"loss": 0.0703,
"step": 2850
},
{
"epoch": 5.98326359832636,
"grad_norm": 0.2400965690612793,
"learning_rate": 9.46920712075632e-05,
"loss": 0.0774,
"step": 2860
},
{
"epoch": 6.00418410041841,
"grad_norm": 0.2949677109718323,
"learning_rate": 9.464253676316893e-05,
"loss": 0.0701,
"step": 2870
},
{
"epoch": 6.02510460251046,
"grad_norm": 0.1922580450773239,
"learning_rate": 9.459278533927384e-05,
"loss": 0.0676,
"step": 2880
},
{
"epoch": 6.046025104602511,
"grad_norm": 0.21086367964744568,
"learning_rate": 9.454281717768854e-05,
"loss": 0.0719,
"step": 2890
},
{
"epoch": 6.066945606694561,
"grad_norm": 0.2067558616399765,
"learning_rate": 9.449263252127708e-05,
"loss": 0.0748,
"step": 2900
},
{
"epoch": 6.087866108786611,
"grad_norm": 0.19907134771347046,
"learning_rate": 9.444223161395573e-05,
"loss": 0.0753,
"step": 2910
},
{
"epoch": 6.108786610878661,
"grad_norm": 0.2764277756214142,
"learning_rate": 9.439161470069184e-05,
"loss": 0.0814,
"step": 2920
},
{
"epoch": 6.129707112970712,
"grad_norm": 0.25734201073646545,
"learning_rate": 9.43407820275026e-05,
"loss": 0.0748,
"step": 2930
},
{
"epoch": 6.150627615062762,
"grad_norm": 0.26663076877593994,
"learning_rate": 9.428973384145396e-05,
"loss": 0.0719,
"step": 2940
},
{
"epoch": 6.171548117154812,
"grad_norm": 0.22700175642967224,
"learning_rate": 9.423847039065922e-05,
"loss": 0.0685,
"step": 2950
},
{
"epoch": 6.192468619246862,
"grad_norm": 0.28385862708091736,
"learning_rate": 9.418699192427805e-05,
"loss": 0.0745,
"step": 2960
},
{
"epoch": 6.2133891213389125,
"grad_norm": 0.28692567348480225,
"learning_rate": 9.41352986925151e-05,
"loss": 0.0772,
"step": 2970
},
{
"epoch": 6.2343096234309625,
"grad_norm": 0.2288256287574768,
"learning_rate": 9.408339094661895e-05,
"loss": 0.0706,
"step": 2980
},
{
"epoch": 6.2552301255230125,
"grad_norm": 0.38740843534469604,
"learning_rate": 9.40312689388807e-05,
"loss": 0.0696,
"step": 2990
},
{
"epoch": 6.2761506276150625,
"grad_norm": 0.23974934220314026,
"learning_rate": 9.397893292263292e-05,
"loss": 0.0675,
"step": 3000
},
{
"epoch": 6.297071129707113,
"grad_norm": 0.26130393147468567,
"learning_rate": 9.392638315224829e-05,
"loss": 0.0711,
"step": 3010
},
{
"epoch": 6.317991631799163,
"grad_norm": 0.17046813666820526,
"learning_rate": 9.387361988313846e-05,
"loss": 0.0676,
"step": 3020
},
{
"epoch": 6.338912133891213,
"grad_norm": 0.31989800930023193,
"learning_rate": 9.38206433717527e-05,
"loss": 0.0762,
"step": 3030
},
{
"epoch": 6.359832635983263,
"grad_norm": 0.18395087122917175,
"learning_rate": 9.376745387557681e-05,
"loss": 0.0645,
"step": 3040
},
{
"epoch": 6.380753138075314,
"grad_norm": 0.24593815207481384,
"learning_rate": 9.371405165313169e-05,
"loss": 0.0679,
"step": 3050
},
{
"epoch": 6.401673640167364,
"grad_norm": 0.26608437299728394,
"learning_rate": 9.366043696397222e-05,
"loss": 0.0795,
"step": 3060
},
{
"epoch": 6.422594142259414,
"grad_norm": 0.2634703516960144,
"learning_rate": 9.360661006868592e-05,
"loss": 0.0779,
"step": 3070
},
{
"epoch": 6.443514644351464,
"grad_norm": 0.22274665534496307,
"learning_rate": 9.355257122889173e-05,
"loss": 0.0784,
"step": 3080
},
{
"epoch": 6.464435146443515,
"grad_norm": 0.21002769470214844,
"learning_rate": 9.349832070723871e-05,
"loss": 0.0718,
"step": 3090
},
{
"epoch": 6.485355648535565,
"grad_norm": 0.20783476531505585,
"learning_rate": 9.34438587674048e-05,
"loss": 0.0749,
"step": 3100
},
{
"epoch": 6.506276150627615,
"grad_norm": 0.1763143390417099,
"learning_rate": 9.338918567409545e-05,
"loss": 0.0699,
"step": 3110
},
{
"epoch": 6.527196652719665,
"grad_norm": 0.25238746404647827,
"learning_rate": 9.333430169304247e-05,
"loss": 0.0735,
"step": 3120
},
{
"epoch": 6.548117154811716,
"grad_norm": 0.21795807778835297,
"learning_rate": 9.327920709100259e-05,
"loss": 0.0715,
"step": 3130
},
{
"epoch": 6.569037656903766,
"grad_norm": 0.26992955803871155,
"learning_rate": 9.322390213575631e-05,
"loss": 0.0792,
"step": 3140
},
{
"epoch": 6.589958158995816,
"grad_norm": 0.18660517036914825,
"learning_rate": 9.316838709610648e-05,
"loss": 0.0668,
"step": 3150
},
{
"epoch": 6.610878661087866,
"grad_norm": 0.30259087681770325,
"learning_rate": 9.311266224187706e-05,
"loss": 0.0686,
"step": 3160
},
{
"epoch": 6.631799163179917,
"grad_norm": 0.28926482796669006,
"learning_rate": 9.305672784391175e-05,
"loss": 0.069,
"step": 3170
},
{
"epoch": 6.652719665271967,
"grad_norm": 0.22975818812847137,
"learning_rate": 9.300058417407276e-05,
"loss": 0.0744,
"step": 3180
},
{
"epoch": 6.673640167364017,
"grad_norm": 0.248214989900589,
"learning_rate": 9.29442315052394e-05,
"loss": 0.0674,
"step": 3190
},
{
"epoch": 6.694560669456067,
"grad_norm": 0.23017367720603943,
"learning_rate": 9.288767011130684e-05,
"loss": 0.0767,
"step": 3200
},
{
"epoch": 6.7154811715481175,
"grad_norm": 0.30157166719436646,
"learning_rate": 9.283090026718466e-05,
"loss": 0.075,
"step": 3210
},
{
"epoch": 6.7364016736401675,
"grad_norm": 0.24175700545310974,
"learning_rate": 9.277392224879568e-05,
"loss": 0.0726,
"step": 3220
},
{
"epoch": 6.7573221757322175,
"grad_norm": 0.32189878821372986,
"learning_rate": 9.271673633307445e-05,
"loss": 0.0672,
"step": 3230
},
{
"epoch": 6.7782426778242675,
"grad_norm": 0.2515658736228943,
"learning_rate": 9.265934279796602e-05,
"loss": 0.057,
"step": 3240
},
{
"epoch": 6.799163179916318,
"grad_norm": 0.27900680899620056,
"learning_rate": 9.260174192242453e-05,
"loss": 0.0643,
"step": 3250
},
{
"epoch": 6.820083682008368,
"grad_norm": 0.20859810709953308,
"learning_rate": 9.254393398641185e-05,
"loss": 0.0694,
"step": 3260
},
{
"epoch": 6.841004184100418,
"grad_norm": 0.33800724148750305,
"learning_rate": 9.248591927089628e-05,
"loss": 0.0698,
"step": 3270
},
{
"epoch": 6.861924686192468,
"grad_norm": 0.27680280804634094,
"learning_rate": 9.242769805785115e-05,
"loss": 0.0683,
"step": 3280
},
{
"epoch": 6.882845188284519,
"grad_norm": 0.24083566665649414,
"learning_rate": 9.236927063025342e-05,
"loss": 0.0653,
"step": 3290
},
{
"epoch": 6.903765690376569,
"grad_norm": 0.2222203016281128,
"learning_rate": 9.231063727208234e-05,
"loss": 0.065,
"step": 3300
},
{
"epoch": 6.924686192468619,
"grad_norm": 0.18106094002723694,
"learning_rate": 9.225179826831807e-05,
"loss": 0.0591,
"step": 3310
},
{
"epoch": 6.945606694560669,
"grad_norm": 0.24536311626434326,
"learning_rate": 9.219275390494024e-05,
"loss": 0.0667,
"step": 3320
},
{
"epoch": 6.96652719665272,
"grad_norm": 0.23865914344787598,
"learning_rate": 9.213350446892668e-05,
"loss": 0.0623,
"step": 3330
},
{
"epoch": 6.98744769874477,
"grad_norm": 0.1846132129430771,
"learning_rate": 9.207405024825186e-05,
"loss": 0.0771,
"step": 3340
},
{
"epoch": 7.00836820083682,
"grad_norm": 0.2307969182729721,
"learning_rate": 9.201439153188569e-05,
"loss": 0.0994,
"step": 3350
},
{
"epoch": 7.02928870292887,
"grad_norm": 0.3087303936481476,
"learning_rate": 9.19545286097919e-05,
"loss": 0.0697,
"step": 3360
},
{
"epoch": 7.050209205020921,
"grad_norm": 0.21722181141376495,
"learning_rate": 9.189446177292679e-05,
"loss": 0.0752,
"step": 3370
},
{
"epoch": 7.071129707112971,
"grad_norm": 0.2604837715625763,
"learning_rate": 9.183419131323778e-05,
"loss": 0.0778,
"step": 3380
},
{
"epoch": 7.092050209205021,
"grad_norm": 0.2235862761735916,
"learning_rate": 9.177371752366191e-05,
"loss": 0.0753,
"step": 3390
},
{
"epoch": 7.112970711297071,
"grad_norm": 0.20492425560951233,
"learning_rate": 9.171304069812454e-05,
"loss": 0.0623,
"step": 3400
},
{
"epoch": 7.133891213389122,
"grad_norm": 0.26874053478240967,
"learning_rate": 9.165216113153782e-05,
"loss": 0.0762,
"step": 3410
},
{
"epoch": 7.154811715481172,
"grad_norm": 0.1912374496459961,
"learning_rate": 9.159107911979936e-05,
"loss": 0.0655,
"step": 3420
},
{
"epoch": 7.175732217573222,
"grad_norm": 0.208548441529274,
"learning_rate": 9.152979495979063e-05,
"loss": 0.0675,
"step": 3430
},
{
"epoch": 7.196652719665272,
"grad_norm": 0.26155465841293335,
"learning_rate": 9.146830894937571e-05,
"loss": 0.0692,
"step": 3440
},
{
"epoch": 7.2175732217573225,
"grad_norm": 0.3482346534729004,
"learning_rate": 9.140662138739969e-05,
"loss": 0.0723,
"step": 3450
},
{
"epoch": 7.2384937238493725,
"grad_norm": 0.2451719492673874,
"learning_rate": 9.134473257368732e-05,
"loss": 0.0691,
"step": 3460
},
{
"epoch": 7.2594142259414225,
"grad_norm": 0.2660260498523712,
"learning_rate": 9.128264280904145e-05,
"loss": 0.073,
"step": 3470
},
{
"epoch": 7.2803347280334725,
"grad_norm": 0.32091331481933594,
"learning_rate": 9.122035239524169e-05,
"loss": 0.071,
"step": 3480
},
{
"epoch": 7.301255230125523,
"grad_norm": 0.23416461050510406,
"learning_rate": 9.115786163504285e-05,
"loss": 0.0634,
"step": 3490
},
{
"epoch": 7.322175732217573,
"grad_norm": 0.19586139917373657,
"learning_rate": 9.10951708321735e-05,
"loss": 0.0646,
"step": 3500
},
{
"epoch": 7.343096234309623,
"grad_norm": 0.19783517718315125,
"learning_rate": 9.10322802913345e-05,
"loss": 0.0683,
"step": 3510
},
{
"epoch": 7.364016736401673,
"grad_norm": 0.24476327002048492,
"learning_rate": 9.096919031819751e-05,
"loss": 0.0642,
"step": 3520
},
{
"epoch": 7.384937238493724,
"grad_norm": 0.25556132197380066,
"learning_rate": 9.090590121940348e-05,
"loss": 0.0655,
"step": 3530
},
{
"epoch": 7.405857740585774,
"grad_norm": 0.26956650614738464,
"learning_rate": 9.084241330256121e-05,
"loss": 0.0743,
"step": 3540
},
{
"epoch": 7.426778242677824,
"grad_norm": 0.22216159105300903,
"learning_rate": 9.077872687624586e-05,
"loss": 0.0715,
"step": 3550
},
{
"epoch": 7.447698744769874,
"grad_norm": 0.27579641342163086,
"learning_rate": 9.071484224999735e-05,
"loss": 0.0731,
"step": 3560
},
{
"epoch": 7.468619246861925,
"grad_norm": 0.26512736082077026,
"learning_rate": 9.0650759734319e-05,
"loss": 0.0741,
"step": 3570
},
{
"epoch": 7.489539748953975,
"grad_norm": 0.2138041853904724,
"learning_rate": 9.05864796406759e-05,
"loss": 0.0698,
"step": 3580
},
{
"epoch": 7.510460251046025,
"grad_norm": 0.2592664062976837,
"learning_rate": 9.052200228149343e-05,
"loss": 0.0711,
"step": 3590
},
{
"epoch": 7.531380753138075,
"grad_norm": 0.27044913172721863,
"learning_rate": 9.04573279701558e-05,
"loss": 0.0715,
"step": 3600
},
{
"epoch": 7.552301255230126,
"grad_norm": 0.28558292984962463,
"learning_rate": 9.039245702100448e-05,
"loss": 0.0652,
"step": 3610
},
{
"epoch": 7.573221757322176,
"grad_norm": 0.37628281116485596,
"learning_rate": 9.032738974933664e-05,
"loss": 0.0685,
"step": 3620
},
{
"epoch": 7.594142259414226,
"grad_norm": 0.3403959572315216,
"learning_rate": 9.026212647140365e-05,
"loss": 0.0719,
"step": 3630
},
{
"epoch": 7.615062761506276,
"grad_norm": 0.24478821456432343,
"learning_rate": 9.019666750440956e-05,
"loss": 0.0683,
"step": 3640
},
{
"epoch": 7.635983263598327,
"grad_norm": 0.18002082407474518,
"learning_rate": 9.013101316650956e-05,
"loss": 0.0689,
"step": 3650
},
{
"epoch": 7.656903765690377,
"grad_norm": 0.22669140994548798,
"learning_rate": 9.00651637768084e-05,
"loss": 0.0668,
"step": 3660
},
{
"epoch": 7.677824267782427,
"grad_norm": 0.33720284700393677,
"learning_rate": 8.999911965535885e-05,
"loss": 0.0673,
"step": 3670
},
{
"epoch": 7.698744769874477,
"grad_norm": 0.2454947829246521,
"learning_rate": 8.993288112316014e-05,
"loss": 0.0651,
"step": 3680
},
{
"epoch": 7.7196652719665275,
"grad_norm": 0.24713526666164398,
"learning_rate": 8.986644850215644e-05,
"loss": 0.0731,
"step": 3690
},
{
"epoch": 7.7405857740585775,
"grad_norm": 0.16733984649181366,
"learning_rate": 8.979982211523523e-05,
"loss": 0.0693,
"step": 3700
},
{
"epoch": 7.7615062761506275,
"grad_norm": 0.19022060930728912,
"learning_rate": 8.97330022862258e-05,
"loss": 0.0626,
"step": 3710
},
{
"epoch": 7.7824267782426775,
"grad_norm": 0.18342478573322296,
"learning_rate": 8.96659893398976e-05,
"loss": 0.0762,
"step": 3720
},
{
"epoch": 7.803347280334728,
"grad_norm": 0.24058032035827637,
"learning_rate": 8.959878360195876e-05,
"loss": 0.0662,
"step": 3730
},
{
"epoch": 7.824267782426778,
"grad_norm": 0.22845084965229034,
"learning_rate": 8.953138539905438e-05,
"loss": 0.0641,
"step": 3740
},
{
"epoch": 7.845188284518828,
"grad_norm": 0.21459327638149261,
"learning_rate": 8.946379505876506e-05,
"loss": 0.0711,
"step": 3750
},
{
"epoch": 7.866108786610878,
"grad_norm": 0.19498105347156525,
"learning_rate": 8.939601290960527e-05,
"loss": 0.0649,
"step": 3760
},
{
"epoch": 7.887029288702929,
"grad_norm": 0.23208363354206085,
"learning_rate": 8.932803928102167e-05,
"loss": 0.0676,
"step": 3770
},
{
"epoch": 7.907949790794979,
"grad_norm": 0.2670055627822876,
"learning_rate": 8.925987450339168e-05,
"loss": 0.0719,
"step": 3780
},
{
"epoch": 7.928870292887029,
"grad_norm": 0.15933391451835632,
"learning_rate": 8.919151890802172e-05,
"loss": 0.057,
"step": 3790
},
{
"epoch": 7.949790794979079,
"grad_norm": 0.19486863911151886,
"learning_rate": 8.912297282714564e-05,
"loss": 0.0592,
"step": 3800
},
{
"epoch": 7.97071129707113,
"grad_norm": 0.20606879889965057,
"learning_rate": 8.905423659392316e-05,
"loss": 0.0658,
"step": 3810
},
{
"epoch": 7.99163179916318,
"grad_norm": 0.31158018112182617,
"learning_rate": 8.898531054243822e-05,
"loss": 0.0707,
"step": 3820
},
{
"epoch": 8.01255230125523,
"grad_norm": 0.2499541938304901,
"learning_rate": 8.891619500769729e-05,
"loss": 0.0704,
"step": 3830
},
{
"epoch": 8.03347280334728,
"grad_norm": 0.2598934769630432,
"learning_rate": 8.884689032562785e-05,
"loss": 0.0604,
"step": 3840
},
{
"epoch": 8.05439330543933,
"grad_norm": 0.16147422790527344,
"learning_rate": 8.87773968330767e-05,
"loss": 0.0667,
"step": 3850
},
{
"epoch": 8.07531380753138,
"grad_norm": 0.43483439087867737,
"learning_rate": 8.870771486780832e-05,
"loss": 0.0589,
"step": 3860
},
{
"epoch": 8.096234309623432,
"grad_norm": 0.20415087044239044,
"learning_rate": 8.863784476850322e-05,
"loss": 0.0801,
"step": 3870
},
{
"epoch": 8.117154811715482,
"grad_norm": 0.22121910750865936,
"learning_rate": 8.856778687475635e-05,
"loss": 0.0633,
"step": 3880
},
{
"epoch": 8.138075313807532,
"grad_norm": 0.19645985960960388,
"learning_rate": 8.849754152707541e-05,
"loss": 0.0695,
"step": 3890
},
{
"epoch": 8.158995815899582,
"grad_norm": 0.21949157118797302,
"learning_rate": 8.842710906687916e-05,
"loss": 0.0639,
"step": 3900
},
{
"epoch": 8.179916317991632,
"grad_norm": 0.288669615983963,
"learning_rate": 8.83564898364958e-05,
"loss": 0.0691,
"step": 3910
},
{
"epoch": 8.200836820083682,
"grad_norm": 0.2480822205543518,
"learning_rate": 8.828568417916136e-05,
"loss": 0.068,
"step": 3920
},
{
"epoch": 8.221757322175732,
"grad_norm": 0.2346440553665161,
"learning_rate": 8.821469243901794e-05,
"loss": 0.063,
"step": 3930
},
{
"epoch": 8.242677824267782,
"grad_norm": 0.1865086555480957,
"learning_rate": 8.814351496111201e-05,
"loss": 0.068,
"step": 3940
},
{
"epoch": 8.263598326359833,
"grad_norm": 0.21872031688690186,
"learning_rate": 8.807215209139293e-05,
"loss": 0.0697,
"step": 3950
},
{
"epoch": 8.284518828451883,
"grad_norm": 0.2085472047328949,
"learning_rate": 8.8000604176711e-05,
"loss": 0.0729,
"step": 3960
},
{
"epoch": 8.305439330543933,
"grad_norm": 0.18910904228687286,
"learning_rate": 8.792887156481598e-05,
"loss": 0.0678,
"step": 3970
},
{
"epoch": 8.326359832635983,
"grad_norm": 0.24901749193668365,
"learning_rate": 8.785695460435534e-05,
"loss": 0.0708,
"step": 3980
},
{
"epoch": 8.347280334728033,
"grad_norm": 0.2784505784511566,
"learning_rate": 8.778485364487248e-05,
"loss": 0.0592,
"step": 3990
},
{
"epoch": 8.368200836820083,
"grad_norm": 0.2604342997074127,
"learning_rate": 8.771256903680519e-05,
"loss": 0.0638,
"step": 4000
},
{
"epoch": 8.389121338912133,
"grad_norm": 0.16785018146038055,
"learning_rate": 8.764010113148382e-05,
"loss": 0.067,
"step": 4010
},
{
"epoch": 8.410041841004183,
"grad_norm": 0.18678627908229828,
"learning_rate": 8.756745028112959e-05,
"loss": 0.0595,
"step": 4020
},
{
"epoch": 8.430962343096235,
"grad_norm": 0.24131298065185547,
"learning_rate": 8.749461683885296e-05,
"loss": 0.0648,
"step": 4030
},
{
"epoch": 8.451882845188285,
"grad_norm": 0.2394871860742569,
"learning_rate": 8.742160115865179e-05,
"loss": 0.0648,
"step": 4040
},
{
"epoch": 8.472803347280335,
"grad_norm": 0.25061601400375366,
"learning_rate": 8.734840359540974e-05,
"loss": 0.071,
"step": 4050
},
{
"epoch": 8.493723849372385,
"grad_norm": 0.2007174789905548,
"learning_rate": 8.727502450489446e-05,
"loss": 0.0652,
"step": 4060
},
{
"epoch": 8.514644351464435,
"grad_norm": 0.27728864550590515,
"learning_rate": 8.720146424375591e-05,
"loss": 0.0708,
"step": 4070
},
{
"epoch": 8.535564853556485,
"grad_norm": 0.23519816994667053,
"learning_rate": 8.712772316952458e-05,
"loss": 0.0642,
"step": 4080
},
{
"epoch": 8.556485355648535,
"grad_norm": 0.2769893705844879,
"learning_rate": 8.705380164060982e-05,
"loss": 0.0643,
"step": 4090
},
{
"epoch": 8.577405857740585,
"grad_norm": 0.23661431670188904,
"learning_rate": 8.697970001629799e-05,
"loss": 0.0624,
"step": 4100
},
{
"epoch": 8.598326359832637,
"grad_norm": 0.30667105317115784,
"learning_rate": 8.690541865675084e-05,
"loss": 0.0739,
"step": 4110
},
{
"epoch": 8.619246861924687,
"grad_norm": 0.21583613753318787,
"learning_rate": 8.68309579230037e-05,
"loss": 0.0675,
"step": 4120
},
{
"epoch": 8.640167364016737,
"grad_norm": 0.3050605356693268,
"learning_rate": 8.675631817696372e-05,
"loss": 0.0675,
"step": 4130
},
{
"epoch": 8.661087866108787,
"grad_norm": 0.19947989284992218,
"learning_rate": 8.668149978140808e-05,
"loss": 0.0646,
"step": 4140
},
{
"epoch": 8.682008368200837,
"grad_norm": 0.25193777680397034,
"learning_rate": 8.66065030999823e-05,
"loss": 0.0655,
"step": 4150
},
{
"epoch": 8.702928870292887,
"grad_norm": 0.1913091540336609,
"learning_rate": 8.653132849719845e-05,
"loss": 0.0591,
"step": 4160
},
{
"epoch": 8.723849372384937,
"grad_norm": 0.23112277686595917,
"learning_rate": 8.64559763384333e-05,
"loss": 0.0597,
"step": 4170
},
{
"epoch": 8.744769874476987,
"grad_norm": 0.2377346009016037,
"learning_rate": 8.638044698992669e-05,
"loss": 0.0654,
"step": 4180
},
{
"epoch": 8.765690376569038,
"grad_norm": 0.2642386853694916,
"learning_rate": 8.630474081877959e-05,
"loss": 0.0664,
"step": 4190
},
{
"epoch": 8.786610878661088,
"grad_norm": 0.21720066666603088,
"learning_rate": 8.62288581929525e-05,
"loss": 0.0574,
"step": 4200
},
{
"epoch": 8.807531380753138,
"grad_norm": 0.1970827728509903,
"learning_rate": 8.615279948126343e-05,
"loss": 0.0601,
"step": 4210
},
{
"epoch": 8.828451882845188,
"grad_norm": 0.2270515263080597,
"learning_rate": 8.60765650533863e-05,
"loss": 0.0629,
"step": 4220
},
{
"epoch": 8.849372384937238,
"grad_norm": 0.31851762533187866,
"learning_rate": 8.60001552798491e-05,
"loss": 0.0605,
"step": 4230
},
{
"epoch": 8.870292887029288,
"grad_norm": 0.18598251044750214,
"learning_rate": 8.592357053203202e-05,
"loss": 0.0604,
"step": 4240
},
{
"epoch": 8.891213389121338,
"grad_norm": 0.2765622138977051,
"learning_rate": 8.58468111821657e-05,
"loss": 0.0719,
"step": 4250
},
{
"epoch": 8.91213389121339,
"grad_norm": 0.2147555947303772,
"learning_rate": 8.576987760332943e-05,
"loss": 0.0703,
"step": 4260
},
{
"epoch": 8.93305439330544,
"grad_norm": 0.21149249374866486,
"learning_rate": 8.56927701694493e-05,
"loss": 0.0743,
"step": 4270
},
{
"epoch": 8.95397489539749,
"grad_norm": 0.2396778017282486,
"learning_rate": 8.561548925529643e-05,
"loss": 0.0644,
"step": 4280
},
{
"epoch": 8.97489539748954,
"grad_norm": 0.21674704551696777,
"learning_rate": 8.553803523648506e-05,
"loss": 0.0596,
"step": 4290
},
{
"epoch": 8.99581589958159,
"grad_norm": 0.3351154923439026,
"learning_rate": 8.546040848947086e-05,
"loss": 0.0668,
"step": 4300
},
{
"epoch": 9.01673640167364,
"grad_norm": 0.2393759787082672,
"learning_rate": 8.538260939154894e-05,
"loss": 0.0682,
"step": 4310
},
{
"epoch": 9.03765690376569,
"grad_norm": 0.2055775374174118,
"learning_rate": 8.530463832085218e-05,
"loss": 0.0683,
"step": 4320
},
{
"epoch": 9.05857740585774,
"grad_norm": 0.23119060695171356,
"learning_rate": 8.522649565634927e-05,
"loss": 0.0596,
"step": 4330
},
{
"epoch": 9.07949790794979,
"grad_norm": 0.2209976464509964,
"learning_rate": 8.51481817778429e-05,
"loss": 0.0651,
"step": 4340
},
{
"epoch": 9.100418410041842,
"grad_norm": 0.2024475336074829,
"learning_rate": 8.506969706596797e-05,
"loss": 0.062,
"step": 4350
},
{
"epoch": 9.121338912133892,
"grad_norm": 0.18801653385162354,
"learning_rate": 8.499104190218964e-05,
"loss": 0.0673,
"step": 4360
},
{
"epoch": 9.142259414225942,
"grad_norm": 0.1942189484834671,
"learning_rate": 8.49122166688016e-05,
"loss": 0.0647,
"step": 4370
},
{
"epoch": 9.163179916317992,
"grad_norm": 0.2365776151418686,
"learning_rate": 8.483322174892404e-05,
"loss": 0.0631,
"step": 4380
},
{
"epoch": 9.184100418410042,
"grad_norm": 0.22303998470306396,
"learning_rate": 8.475405752650199e-05,
"loss": 0.0629,
"step": 4390
},
{
"epoch": 9.205020920502092,
"grad_norm": 0.2030114233493805,
"learning_rate": 8.467472438630328e-05,
"loss": 0.063,
"step": 4400
},
{
"epoch": 9.225941422594142,
"grad_norm": 0.20828013122081757,
"learning_rate": 8.459522271391682e-05,
"loss": 0.0733,
"step": 4410
},
{
"epoch": 9.246861924686192,
"grad_norm": 0.22515518963336945,
"learning_rate": 8.451555289575057e-05,
"loss": 0.0606,
"step": 4420
},
{
"epoch": 9.267782426778243,
"grad_norm": 0.20329605042934418,
"learning_rate": 8.443571531902981e-05,
"loss": 0.0634,
"step": 4430
},
{
"epoch": 9.288702928870293,
"grad_norm": 0.21866239607334137,
"learning_rate": 8.435571037179512e-05,
"loss": 0.0587,
"step": 4440
},
{
"epoch": 9.309623430962343,
"grad_norm": 0.2187184989452362,
"learning_rate": 8.427553844290062e-05,
"loss": 0.0731,
"step": 4450
},
{
"epoch": 9.330543933054393,
"grad_norm": 0.2076680064201355,
"learning_rate": 8.419519992201201e-05,
"loss": 0.0593,
"step": 4460
},
{
"epoch": 9.351464435146443,
"grad_norm": 0.29090818762779236,
"learning_rate": 8.411469519960469e-05,
"loss": 0.0691,
"step": 4470
},
{
"epoch": 9.372384937238493,
"grad_norm": 0.20809774100780487,
"learning_rate": 8.403402466696182e-05,
"loss": 0.0615,
"step": 4480
},
{
"epoch": 9.393305439330543,
"grad_norm": 0.23946796357631683,
"learning_rate": 8.395318871617255e-05,
"loss": 0.0616,
"step": 4490
},
{
"epoch": 9.414225941422593,
"grad_norm": 0.21378174424171448,
"learning_rate": 8.387218774012992e-05,
"loss": 0.0585,
"step": 4500
},
{
"epoch": 9.435146443514645,
"grad_norm": 0.21545960009098053,
"learning_rate": 8.379102213252915e-05,
"loss": 0.0641,
"step": 4510
},
{
"epoch": 9.456066945606695,
"grad_norm": 0.19115141034126282,
"learning_rate": 8.370969228786556e-05,
"loss": 0.0729,
"step": 4520
},
{
"epoch": 9.476987447698745,
"grad_norm": 0.21977156400680542,
"learning_rate": 8.362819860143275e-05,
"loss": 0.0664,
"step": 4530
},
{
"epoch": 9.497907949790795,
"grad_norm": 0.18935155868530273,
"learning_rate": 8.354654146932066e-05,
"loss": 0.0613,
"step": 4540
},
{
"epoch": 9.518828451882845,
"grad_norm": 0.20135313272476196,
"learning_rate": 8.346472128841364e-05,
"loss": 0.0619,
"step": 4550
},
{
"epoch": 9.539748953974895,
"grad_norm": 0.1766224056482315,
"learning_rate": 8.338273845638848e-05,
"loss": 0.0565,
"step": 4560
},
{
"epoch": 9.560669456066945,
"grad_norm": 0.18868570029735565,
"learning_rate": 8.330059337171258e-05,
"loss": 0.0633,
"step": 4570
},
{
"epoch": 9.581589958158997,
"grad_norm": 0.2175145000219345,
"learning_rate": 8.32182864336419e-05,
"loss": 0.0641,
"step": 4580
},
{
"epoch": 9.602510460251047,
"grad_norm": 0.3119242787361145,
"learning_rate": 8.313581804221908e-05,
"loss": 0.0681,
"step": 4590
},
{
"epoch": 9.623430962343097,
"grad_norm": 0.149024099111557,
"learning_rate": 8.305318859827147e-05,
"loss": 0.0601,
"step": 4600
},
{
"epoch": 9.644351464435147,
"grad_norm": 0.23879316449165344,
"learning_rate": 8.297039850340923e-05,
"loss": 0.0595,
"step": 4610
},
{
"epoch": 9.665271966527197,
"grad_norm": 0.21861664950847626,
"learning_rate": 8.288744816002331e-05,
"loss": 0.0653,
"step": 4620
},
{
"epoch": 9.686192468619247,
"grad_norm": 0.1617562174797058,
"learning_rate": 8.280433797128357e-05,
"loss": 0.0651,
"step": 4630
},
{
"epoch": 9.707112970711297,
"grad_norm": 0.23158331215381622,
"learning_rate": 8.272106834113674e-05,
"loss": 0.0617,
"step": 4640
},
{
"epoch": 9.728033472803347,
"grad_norm": 0.21476268768310547,
"learning_rate": 8.26376396743045e-05,
"loss": 0.0625,
"step": 4650
},
{
"epoch": 9.748953974895397,
"grad_norm": 0.21035324037075043,
"learning_rate": 8.25540523762815e-05,
"loss": 0.0626,
"step": 4660
},
{
"epoch": 9.769874476987448,
"grad_norm": 0.23255029320716858,
"learning_rate": 8.247030685333346e-05,
"loss": 0.0713,
"step": 4670
},
{
"epoch": 9.790794979079498,
"grad_norm": 0.16255882382392883,
"learning_rate": 8.238640351249503e-05,
"loss": 0.0643,
"step": 4680
},
{
"epoch": 9.811715481171548,
"grad_norm": 0.2551625669002533,
"learning_rate": 8.2302342761568e-05,
"loss": 0.0598,
"step": 4690
},
{
"epoch": 9.832635983263598,
"grad_norm": 0.2389603704214096,
"learning_rate": 8.221812500911919e-05,
"loss": 0.063,
"step": 4700
},
{
"epoch": 9.853556485355648,
"grad_norm": 0.22120417654514313,
"learning_rate": 8.213375066447853e-05,
"loss": 0.0684,
"step": 4710
},
{
"epoch": 9.874476987447698,
"grad_norm": 0.2859916090965271,
"learning_rate": 8.204922013773702e-05,
"loss": 0.0602,
"step": 4720
},
{
"epoch": 9.895397489539748,
"grad_norm": 0.23960047960281372,
"learning_rate": 8.196453383974478e-05,
"loss": 0.0684,
"step": 4730
},
{
"epoch": 9.9163179916318,
"grad_norm": 0.26067450642585754,
"learning_rate": 8.187969218210904e-05,
"loss": 0.0618,
"step": 4740
},
{
"epoch": 9.93723849372385,
"grad_norm": 0.30207592248916626,
"learning_rate": 8.179469557719213e-05,
"loss": 0.0587,
"step": 4750
},
{
"epoch": 9.9581589958159,
"grad_norm": 0.18259580433368683,
"learning_rate": 8.170954443810948e-05,
"loss": 0.0623,
"step": 4760
},
{
"epoch": 9.97907949790795,
"grad_norm": 0.18433575332164764,
"learning_rate": 8.162423917872764e-05,
"loss": 0.0649,
"step": 4770
},
{
"epoch": 10.0,
"grad_norm": 0.33734753727912903,
"learning_rate": 8.153878021366217e-05,
"loss": 0.0584,
"step": 4780
},
{
"epoch": 10.02092050209205,
"grad_norm": 0.21706482768058777,
"learning_rate": 8.14531679582758e-05,
"loss": 0.066,
"step": 4790
},
{
"epoch": 10.0418410041841,
"grad_norm": 0.24494396150112152,
"learning_rate": 8.136740282867621e-05,
"loss": 0.0621,
"step": 4800
},
{
"epoch": 10.06276150627615,
"grad_norm": 0.20361743867397308,
"learning_rate": 8.128148524171418e-05,
"loss": 0.0639,
"step": 4810
},
{
"epoch": 10.0836820083682,
"grad_norm": 0.2107476443052292,
"learning_rate": 8.119541561498146e-05,
"loss": 0.0621,
"step": 4820
},
{
"epoch": 10.104602510460252,
"grad_norm": 0.14518998563289642,
"learning_rate": 8.110919436680877e-05,
"loss": 0.0536,
"step": 4830
},
{
"epoch": 10.125523012552302,
"grad_norm": 0.2824230194091797,
"learning_rate": 8.102282191626378e-05,
"loss": 0.062,
"step": 4840
},
{
"epoch": 10.146443514644352,
"grad_norm": 0.21971572935581207,
"learning_rate": 8.0936298683149e-05,
"loss": 0.0766,
"step": 4850
},
{
"epoch": 10.167364016736402,
"grad_norm": 0.22919374704360962,
"learning_rate": 8.084962508799991e-05,
"loss": 0.0628,
"step": 4860
},
{
"epoch": 10.188284518828452,
"grad_norm": 0.29920902848243713,
"learning_rate": 8.076280155208273e-05,
"loss": 0.063,
"step": 4870
},
{
"epoch": 10.209205020920502,
"grad_norm": 0.20193646848201752,
"learning_rate": 8.067582849739245e-05,
"loss": 0.0556,
"step": 4880
},
{
"epoch": 10.230125523012552,
"grad_norm": 0.18968577682971954,
"learning_rate": 8.058870634665079e-05,
"loss": 0.0644,
"step": 4890
},
{
"epoch": 10.251046025104603,
"grad_norm": 0.18380005657672882,
"learning_rate": 8.050143552330414e-05,
"loss": 0.0629,
"step": 4900
},
{
"epoch": 10.271966527196653,
"grad_norm": 0.20432725548744202,
"learning_rate": 8.041401645152151e-05,
"loss": 0.0615,
"step": 4910
},
{
"epoch": 10.292887029288703,
"grad_norm": 0.20045466721057892,
"learning_rate": 8.032644955619239e-05,
"loss": 0.0566,
"step": 4920
},
{
"epoch": 10.313807531380753,
"grad_norm": 0.23264667391777039,
"learning_rate": 8.023873526292483e-05,
"loss": 0.0618,
"step": 4930
},
{
"epoch": 10.334728033472803,
"grad_norm": 0.1525534689426422,
"learning_rate": 8.015087399804322e-05,
"loss": 0.0619,
"step": 4940
},
{
"epoch": 10.355648535564853,
"grad_norm": 0.24940307438373566,
"learning_rate": 8.006286618858635e-05,
"loss": 0.057,
"step": 4950
},
{
"epoch": 10.376569037656903,
"grad_norm": 0.23391863703727722,
"learning_rate": 7.99747122623052e-05,
"loss": 0.063,
"step": 4960
},
{
"epoch": 10.397489539748953,
"grad_norm": 0.18223020434379578,
"learning_rate": 7.988641264766097e-05,
"loss": 0.0538,
"step": 4970
},
{
"epoch": 10.418410041841003,
"grad_norm": 0.19934974610805511,
"learning_rate": 7.9797967773823e-05,
"loss": 0.0677,
"step": 4980
},
{
"epoch": 10.439330543933055,
"grad_norm": 0.14090116322040558,
"learning_rate": 7.970937807066659e-05,
"loss": 0.0647,
"step": 4990
},
{
"epoch": 10.460251046025105,
"grad_norm": 0.18634556233882904,
"learning_rate": 7.962064396877098e-05,
"loss": 0.061,
"step": 5000
},
{
"epoch": 10.481171548117155,
"grad_norm": 0.2548336982727051,
"learning_rate": 7.953176589941722e-05,
"loss": 0.0643,
"step": 5010
},
{
"epoch": 10.502092050209205,
"grad_norm": 0.18743976950645447,
"learning_rate": 7.944274429458614e-05,
"loss": 0.063,
"step": 5020
},
{
"epoch": 10.523012552301255,
"grad_norm": 0.20314089953899384,
"learning_rate": 7.93535795869562e-05,
"loss": 0.0599,
"step": 5030
},
{
"epoch": 10.543933054393305,
"grad_norm": 0.21856661140918732,
"learning_rate": 7.926427220990134e-05,
"loss": 0.0594,
"step": 5040
},
{
"epoch": 10.564853556485355,
"grad_norm": 0.20998577773571014,
"learning_rate": 7.9174822597489e-05,
"loss": 0.0581,
"step": 5050
},
{
"epoch": 10.585774058577407,
"grad_norm": 0.19951078295707703,
"learning_rate": 7.908523118447789e-05,
"loss": 0.06,
"step": 5060
},
{
"epoch": 10.606694560669457,
"grad_norm": 0.19410060346126556,
"learning_rate": 7.89954984063159e-05,
"loss": 0.0634,
"step": 5070
},
{
"epoch": 10.627615062761507,
"grad_norm": 0.19956843554973602,
"learning_rate": 7.890562469913811e-05,
"loss": 0.0519,
"step": 5080
},
{
"epoch": 10.648535564853557,
"grad_norm": 0.17840741574764252,
"learning_rate": 7.881561049976447e-05,
"loss": 0.0625,
"step": 5090
},
{
"epoch": 10.669456066945607,
"grad_norm": 0.1961093693971634,
"learning_rate": 7.872545624569779e-05,
"loss": 0.0607,
"step": 5100
},
{
"epoch": 10.690376569037657,
"grad_norm": 0.2057664543390274,
"learning_rate": 7.863516237512164e-05,
"loss": 0.0654,
"step": 5110
},
{
"epoch": 10.711297071129707,
"grad_norm": 0.18237608671188354,
"learning_rate": 7.854472932689815e-05,
"loss": 0.0629,
"step": 5120
},
{
"epoch": 10.732217573221757,
"grad_norm": 0.166738823056221,
"learning_rate": 7.845415754056591e-05,
"loss": 0.0673,
"step": 5130
},
{
"epoch": 10.753138075313807,
"grad_norm": 0.21595996618270874,
"learning_rate": 7.836344745633783e-05,
"loss": 0.0549,
"step": 5140
},
{
"epoch": 10.774058577405858,
"grad_norm": 0.25722071528434753,
"learning_rate": 7.8272599515099e-05,
"loss": 0.0576,
"step": 5150
},
{
"epoch": 10.794979079497908,
"grad_norm": 0.19984610378742218,
"learning_rate": 7.818161415840453e-05,
"loss": 0.0617,
"step": 5160
},
{
"epoch": 10.815899581589958,
"grad_norm": 0.2560693323612213,
"learning_rate": 7.809049182847745e-05,
"loss": 0.0607,
"step": 5170
},
{
"epoch": 10.836820083682008,
"grad_norm": 0.18944580852985382,
"learning_rate": 7.799923296820653e-05,
"loss": 0.0489,
"step": 5180
},
{
"epoch": 10.857740585774058,
"grad_norm": 0.2388056218624115,
"learning_rate": 7.790783802114408e-05,
"loss": 0.0581,
"step": 5190
},
{
"epoch": 10.878661087866108,
"grad_norm": 0.18988242745399475,
"learning_rate": 7.781630743150392e-05,
"loss": 0.0619,
"step": 5200
},
{
"epoch": 10.899581589958158,
"grad_norm": 0.19641171395778656,
"learning_rate": 7.772464164415907e-05,
"loss": 0.0673,
"step": 5210
},
{
"epoch": 10.92050209205021,
"grad_norm": 0.24006357789039612,
"learning_rate": 7.763284110463973e-05,
"loss": 0.0651,
"step": 5220
},
{
"epoch": 10.94142259414226,
"grad_norm": 0.1970333606004715,
"learning_rate": 7.754090625913099e-05,
"loss": 0.0585,
"step": 5230
},
{
"epoch": 10.96234309623431,
"grad_norm": 0.26474833488464355,
"learning_rate": 7.744883755447075e-05,
"loss": 0.0637,
"step": 5240
},
{
"epoch": 10.98326359832636,
"grad_norm": 0.23458439111709595,
"learning_rate": 7.735663543814749e-05,
"loss": 0.0567,
"step": 5250
},
{
"epoch": 11.00418410041841,
"grad_norm": 0.17837220430374146,
"learning_rate": 7.726430035829813e-05,
"loss": 0.0551,
"step": 5260
},
{
"epoch": 11.02510460251046,
"grad_norm": 0.22072383761405945,
"learning_rate": 7.717183276370586e-05,
"loss": 0.0656,
"step": 5270
},
{
"epoch": 11.04602510460251,
"grad_norm": 0.1936493068933487,
"learning_rate": 7.707923310379794e-05,
"loss": 0.0636,
"step": 5280
},
{
"epoch": 11.06694560669456,
"grad_norm": 0.2801334261894226,
"learning_rate": 7.698650182864351e-05,
"loss": 0.0662,
"step": 5290
},
{
"epoch": 11.087866108786612,
"grad_norm": 0.21320787072181702,
"learning_rate": 7.689363938895138e-05,
"loss": 0.0628,
"step": 5300
},
{
"epoch": 11.108786610878662,
"grad_norm": 0.2542664110660553,
"learning_rate": 7.680064623606791e-05,
"loss": 0.0564,
"step": 5310
},
{
"epoch": 11.129707112970712,
"grad_norm": 0.19938422739505768,
"learning_rate": 7.670752282197476e-05,
"loss": 0.0603,
"step": 5320
},
{
"epoch": 11.150627615062762,
"grad_norm": 0.2444109469652176,
"learning_rate": 7.66142695992867e-05,
"loss": 0.0617,
"step": 5330
},
{
"epoch": 11.171548117154812,
"grad_norm": 0.17026054859161377,
"learning_rate": 7.652088702124944e-05,
"loss": 0.0552,
"step": 5340
},
{
"epoch": 11.192468619246862,
"grad_norm": 0.20425626635551453,
"learning_rate": 7.64273755417374e-05,
"loss": 0.0567,
"step": 5350
},
{
"epoch": 11.213389121338912,
"grad_norm": 0.2587556838989258,
"learning_rate": 7.633373561525148e-05,
"loss": 0.0628,
"step": 5360
},
{
"epoch": 11.234309623430962,
"grad_norm": 0.19820363819599152,
"learning_rate": 7.623996769691691e-05,
"loss": 0.0652,
"step": 5370
},
{
"epoch": 11.255230125523013,
"grad_norm": 0.216740220785141,
"learning_rate": 7.614607224248103e-05,
"loss": 0.0674,
"step": 5380
},
{
"epoch": 11.276150627615063,
"grad_norm": 0.22586363554000854,
"learning_rate": 7.605204970831096e-05,
"loss": 0.0602,
"step": 5390
},
{
"epoch": 11.297071129707113,
"grad_norm": 0.29689839482307434,
"learning_rate": 7.595790055139163e-05,
"loss": 0.0618,
"step": 5400
},
{
"epoch": 11.317991631799163,
"grad_norm": 0.3049317002296448,
"learning_rate": 7.586362522932323e-05,
"loss": 0.0603,
"step": 5410
},
{
"epoch": 11.338912133891213,
"grad_norm": 0.20710164308547974,
"learning_rate": 7.576922420031929e-05,
"loss": 0.0635,
"step": 5420
},
{
"epoch": 11.359832635983263,
"grad_norm": 0.23157654702663422,
"learning_rate": 7.567469792320428e-05,
"loss": 0.0598,
"step": 5430
},
{
"epoch": 11.380753138075313,
"grad_norm": 0.20519325137138367,
"learning_rate": 7.558004685741137e-05,
"loss": 0.0582,
"step": 5440
},
{
"epoch": 11.401673640167363,
"grad_norm": 0.29819655418395996,
"learning_rate": 7.548527146298036e-05,
"loss": 0.065,
"step": 5450
},
{
"epoch": 11.422594142259415,
"grad_norm": 0.16741812229156494,
"learning_rate": 7.539037220055527e-05,
"loss": 0.0594,
"step": 5460
},
{
"epoch": 11.443514644351465,
"grad_norm": 0.19318008422851562,
"learning_rate": 7.529534953138213e-05,
"loss": 0.061,
"step": 5470
},
{
"epoch": 11.464435146443515,
"grad_norm": 0.27596399188041687,
"learning_rate": 7.520020391730684e-05,
"loss": 0.0546,
"step": 5480
},
{
"epoch": 11.485355648535565,
"grad_norm": 0.1646844744682312,
"learning_rate": 7.510493582077281e-05,
"loss": 0.0554,
"step": 5490
},
{
"epoch": 11.506276150627615,
"grad_norm": 0.19541478157043457,
"learning_rate": 7.500954570481882e-05,
"loss": 0.0586,
"step": 5500
},
{
"epoch": 11.527196652719665,
"grad_norm": 0.23444367945194244,
"learning_rate": 7.491403403307662e-05,
"loss": 0.0538,
"step": 5510
},
{
"epoch": 11.548117154811715,
"grad_norm": 0.19553513824939728,
"learning_rate": 7.481840126976885e-05,
"loss": 0.0531,
"step": 5520
},
{
"epoch": 11.569037656903765,
"grad_norm": 0.22565273940563202,
"learning_rate": 7.472264787970666e-05,
"loss": 0.053,
"step": 5530
},
{
"epoch": 11.589958158995817,
"grad_norm": 0.20270612835884094,
"learning_rate": 7.462677432828751e-05,
"loss": 0.0698,
"step": 5540
},
{
"epoch": 11.610878661087867,
"grad_norm": 0.19266432523727417,
"learning_rate": 7.453078108149287e-05,
"loss": 0.0562,
"step": 5550
},
{
"epoch": 11.631799163179917,
"grad_norm": 0.22119610011577606,
"learning_rate": 7.443466860588599e-05,
"loss": 0.0561,
"step": 5560
},
{
"epoch": 11.652719665271967,
"grad_norm": 0.15727278590202332,
"learning_rate": 7.43384373686096e-05,
"loss": 0.0602,
"step": 5570
},
{
"epoch": 11.673640167364017,
"grad_norm": 0.26173195242881775,
"learning_rate": 7.424208783738367e-05,
"loss": 0.0555,
"step": 5580
},
{
"epoch": 11.694560669456067,
"grad_norm": 0.1976771056652069,
"learning_rate": 7.414562048050315e-05,
"loss": 0.0585,
"step": 5590
},
{
"epoch": 11.715481171548117,
"grad_norm": 0.16339626908302307,
"learning_rate": 7.404903576683559e-05,
"loss": 0.0628,
"step": 5600
},
{
"epoch": 11.736401673640167,
"grad_norm": 0.2986162602901459,
"learning_rate": 7.3952334165819e-05,
"loss": 0.0607,
"step": 5610
},
{
"epoch": 11.757322175732218,
"grad_norm": 0.1576758623123169,
"learning_rate": 7.385551614745952e-05,
"loss": 0.0641,
"step": 5620
},
{
"epoch": 11.778242677824268,
"grad_norm": 0.22546252608299255,
"learning_rate": 7.375858218232905e-05,
"loss": 0.0587,
"step": 5630
},
{
"epoch": 11.799163179916318,
"grad_norm": 0.22091388702392578,
"learning_rate": 7.366153274156312e-05,
"loss": 0.0519,
"step": 5640
},
{
"epoch": 11.820083682008368,
"grad_norm": 0.17188245058059692,
"learning_rate": 7.356436829685844e-05,
"loss": 0.0562,
"step": 5650
},
{
"epoch": 11.841004184100418,
"grad_norm": 0.1593337208032608,
"learning_rate": 7.346708932047074e-05,
"loss": 0.055,
"step": 5660
},
{
"epoch": 11.861924686192468,
"grad_norm": 0.30180731415748596,
"learning_rate": 7.336969628521237e-05,
"loss": 0.0586,
"step": 5670
},
{
"epoch": 11.882845188284518,
"grad_norm": 0.18105119466781616,
"learning_rate": 7.32721896644501e-05,
"loss": 0.0551,
"step": 5680
},
{
"epoch": 11.903765690376568,
"grad_norm": 0.14182309806346893,
"learning_rate": 7.317456993210272e-05,
"loss": 0.0523,
"step": 5690
},
{
"epoch": 11.92468619246862,
"grad_norm": 0.20939673483371735,
"learning_rate": 7.307683756263881e-05,
"loss": 0.067,
"step": 5700
},
{
"epoch": 11.94560669456067,
"grad_norm": 0.2384624034166336,
"learning_rate": 7.297899303107441e-05,
"loss": 0.0542,
"step": 5710
},
{
"epoch": 11.96652719665272,
"grad_norm": 0.21782812476158142,
"learning_rate": 7.288103681297068e-05,
"loss": 0.0597,
"step": 5720
},
{
"epoch": 11.98744769874477,
"grad_norm": 0.27633702754974365,
"learning_rate": 7.278296938443166e-05,
"loss": 0.0661,
"step": 5730
},
{
"epoch": 12.00836820083682,
"grad_norm": 0.21131427586078644,
"learning_rate": 7.26847912221019e-05,
"loss": 0.0601,
"step": 5740
},
{
"epoch": 12.02928870292887,
"grad_norm": 0.15685714781284332,
"learning_rate": 7.258650280316415e-05,
"loss": 0.0566,
"step": 5750
},
{
"epoch": 12.05020920502092,
"grad_norm": 0.20454737544059753,
"learning_rate": 7.248810460533706e-05,
"loss": 0.0563,
"step": 5760
},
{
"epoch": 12.07112970711297,
"grad_norm": 0.23846374452114105,
"learning_rate": 7.238959710687282e-05,
"loss": 0.058,
"step": 5770
},
{
"epoch": 12.092050209205022,
"grad_norm": 0.1912502944469452,
"learning_rate": 7.229098078655489e-05,
"loss": 0.0568,
"step": 5780
},
{
"epoch": 12.112970711297072,
"grad_norm": 0.16738390922546387,
"learning_rate": 7.219225612369565e-05,
"loss": 0.0621,
"step": 5790
},
{
"epoch": 12.133891213389122,
"grad_norm": 0.21919557452201843,
"learning_rate": 7.209342359813404e-05,
"loss": 0.0589,
"step": 5800
},
{
"epoch": 12.154811715481172,
"grad_norm": 0.21319399774074554,
"learning_rate": 7.199448369023327e-05,
"loss": 0.0539,
"step": 5810
},
{
"epoch": 12.175732217573222,
"grad_norm": 0.14287406206130981,
"learning_rate": 7.189543688087845e-05,
"loss": 0.0541,
"step": 5820
},
{
"epoch": 12.196652719665272,
"grad_norm": 0.15173019468784332,
"learning_rate": 7.17962836514743e-05,
"loss": 0.0537,
"step": 5830
},
{
"epoch": 12.217573221757322,
"grad_norm": 0.18111112713813782,
"learning_rate": 7.169702448394279e-05,
"loss": 0.0619,
"step": 5840
},
{
"epoch": 12.238493723849372,
"grad_norm": 0.21038256585597992,
"learning_rate": 7.159765986072071e-05,
"loss": 0.0606,
"step": 5850
},
{
"epoch": 12.259414225941423,
"grad_norm": 0.20237678289413452,
"learning_rate": 7.149819026475751e-05,
"loss": 0.0645,
"step": 5860
},
{
"epoch": 12.280334728033473,
"grad_norm": 0.23611639440059662,
"learning_rate": 7.139861617951275e-05,
"loss": 0.0623,
"step": 5870
},
{
"epoch": 12.301255230125523,
"grad_norm": 0.22483764588832855,
"learning_rate": 7.129893808895395e-05,
"loss": 0.0583,
"step": 5880
},
{
"epoch": 12.322175732217573,
"grad_norm": 0.25868797302246094,
"learning_rate": 7.119915647755404e-05,
"loss": 0.0678,
"step": 5890
},
{
"epoch": 12.343096234309623,
"grad_norm": 0.23106709122657776,
"learning_rate": 7.109927183028914e-05,
"loss": 0.0614,
"step": 5900
},
{
"epoch": 12.364016736401673,
"grad_norm": 0.23294997215270996,
"learning_rate": 7.099928463263619e-05,
"loss": 0.0569,
"step": 5910
},
{
"epoch": 12.384937238493723,
"grad_norm": 0.1758103370666504,
"learning_rate": 7.08991953705705e-05,
"loss": 0.052,
"step": 5920
},
{
"epoch": 12.405857740585773,
"grad_norm": 0.22264644503593445,
"learning_rate": 7.07990045305635e-05,
"loss": 0.0599,
"step": 5930
},
{
"epoch": 12.426778242677825,
"grad_norm": 0.21731536090373993,
"learning_rate": 7.069871259958034e-05,
"loss": 0.055,
"step": 5940
},
{
"epoch": 12.447698744769875,
"grad_norm": 0.15914727747440338,
"learning_rate": 7.059832006507745e-05,
"loss": 0.0544,
"step": 5950
},
{
"epoch": 12.468619246861925,
"grad_norm": 0.2970428764820099,
"learning_rate": 7.049782741500028e-05,
"loss": 0.0585,
"step": 5960
},
{
"epoch": 12.489539748953975,
"grad_norm": 0.24060587584972382,
"learning_rate": 7.039723513778087e-05,
"loss": 0.057,
"step": 5970
},
{
"epoch": 12.510460251046025,
"grad_norm": 0.27191680669784546,
"learning_rate": 7.029654372233544e-05,
"loss": 0.062,
"step": 5980
},
{
"epoch": 12.531380753138075,
"grad_norm": 0.1836891770362854,
"learning_rate": 7.019575365806215e-05,
"loss": 0.0581,
"step": 5990
},
{
"epoch": 12.552301255230125,
"grad_norm": 0.22609245777130127,
"learning_rate": 7.009486543483858e-05,
"loss": 0.0571,
"step": 6000
},
{
"epoch": 12.573221757322175,
"grad_norm": 0.23014889657497406,
"learning_rate": 6.999387954301934e-05,
"loss": 0.0641,
"step": 6010
},
{
"epoch": 12.594142259414227,
"grad_norm": 0.19445940852165222,
"learning_rate": 6.989279647343388e-05,
"loss": 0.0525,
"step": 6020
},
{
"epoch": 12.615062761506277,
"grad_norm": 0.17028450965881348,
"learning_rate": 6.979161671738382e-05,
"loss": 0.0571,
"step": 6030
},
{
"epoch": 12.635983263598327,
"grad_norm": 0.18240980803966522,
"learning_rate": 6.969034076664085e-05,
"loss": 0.053,
"step": 6040
},
{
"epoch": 12.656903765690377,
"grad_norm": 0.19363874197006226,
"learning_rate": 6.958896911344411e-05,
"loss": 0.0569,
"step": 6050
},
{
"epoch": 12.677824267782427,
"grad_norm": 0.19600875675678253,
"learning_rate": 6.948750225049791e-05,
"loss": 0.0483,
"step": 6060
},
{
"epoch": 12.698744769874477,
"grad_norm": 0.17009440064430237,
"learning_rate": 6.938594067096936e-05,
"loss": 0.0503,
"step": 6070
},
{
"epoch": 12.719665271966527,
"grad_norm": 0.23083089292049408,
"learning_rate": 6.928428486848587e-05,
"loss": 0.0562,
"step": 6080
},
{
"epoch": 12.740585774058577,
"grad_norm": 0.18276119232177734,
"learning_rate": 6.918253533713282e-05,
"loss": 0.0569,
"step": 6090
},
{
"epoch": 12.761506276150628,
"grad_norm": 0.19873914122581482,
"learning_rate": 6.908069257145118e-05,
"loss": 0.057,
"step": 6100
},
{
"epoch": 12.782426778242678,
"grad_norm": 0.21409256756305695,
"learning_rate": 6.897875706643506e-05,
"loss": 0.0528,
"step": 6110
},
{
"epoch": 12.803347280334728,
"grad_norm": 0.1738913506269455,
"learning_rate": 6.887672931752927e-05,
"loss": 0.0529,
"step": 6120
},
{
"epoch": 12.824267782426778,
"grad_norm": 0.20664235949516296,
"learning_rate": 6.877460982062706e-05,
"loss": 0.0551,
"step": 6130
},
{
"epoch": 12.845188284518828,
"grad_norm": 0.2231292873620987,
"learning_rate": 6.86723990720675e-05,
"loss": 0.06,
"step": 6140
},
{
"epoch": 12.866108786610878,
"grad_norm": 0.2546100914478302,
"learning_rate": 6.857009756863326e-05,
"loss": 0.0544,
"step": 6150
},
{
"epoch": 12.887029288702928,
"grad_norm": 0.18180951476097107,
"learning_rate": 6.846770580754807e-05,
"loss": 0.0524,
"step": 6160
},
{
"epoch": 12.907949790794978,
"grad_norm": 0.2514965534210205,
"learning_rate": 6.836522428647438e-05,
"loss": 0.0591,
"step": 6170
},
{
"epoch": 12.92887029288703,
"grad_norm": 0.21483202278614044,
"learning_rate": 6.826265350351083e-05,
"loss": 0.0542,
"step": 6180
},
{
"epoch": 12.94979079497908,
"grad_norm": 0.21657870709896088,
"learning_rate": 6.815999395719e-05,
"loss": 0.0586,
"step": 6190
},
{
"epoch": 12.97071129707113,
"grad_norm": 0.20378071069717407,
"learning_rate": 6.805724614647586e-05,
"loss": 0.0574,
"step": 6200
},
{
"epoch": 12.99163179916318,
"grad_norm": 0.11441192775964737,
"learning_rate": 6.795441057076136e-05,
"loss": 0.0634,
"step": 6210
},
{
"epoch": 13.01255230125523,
"grad_norm": 0.15164940059185028,
"learning_rate": 6.785148772986603e-05,
"loss": 0.0487,
"step": 6220
},
{
"epoch": 13.03347280334728,
"grad_norm": 0.23580867052078247,
"learning_rate": 6.774847812403355e-05,
"loss": 0.0535,
"step": 6230
},
{
"epoch": 13.05439330543933,
"grad_norm": 0.16759879887104034,
"learning_rate": 6.76453822539293e-05,
"loss": 0.0593,
"step": 6240
},
{
"epoch": 13.07531380753138,
"grad_norm": 0.37763532996177673,
"learning_rate": 6.754220062063793e-05,
"loss": 0.0569,
"step": 6250
},
{
"epoch": 13.096234309623432,
"grad_norm": 0.23050107061862946,
"learning_rate": 6.743893372566099e-05,
"loss": 0.0551,
"step": 6260
},
{
"epoch": 13.117154811715482,
"grad_norm": 0.18879632651805878,
"learning_rate": 6.733558207091434e-05,
"loss": 0.0571,
"step": 6270
},
{
"epoch": 13.138075313807532,
"grad_norm": 0.23209887742996216,
"learning_rate": 6.723214615872585e-05,
"loss": 0.0626,
"step": 6280
},
{
"epoch": 13.158995815899582,
"grad_norm": 0.16621044278144836,
"learning_rate": 6.712862649183295e-05,
"loss": 0.0537,
"step": 6290
},
{
"epoch": 13.179916317991632,
"grad_norm": 0.2773350477218628,
"learning_rate": 6.70250235733801e-05,
"loss": 0.0624,
"step": 6300
},
{
"epoch": 13.200836820083682,
"grad_norm": 0.17005692422389984,
"learning_rate": 6.692133790691639e-05,
"loss": 0.0564,
"step": 6310
},
{
"epoch": 13.221757322175732,
"grad_norm": 0.2646332085132599,
"learning_rate": 6.681756999639311e-05,
"loss": 0.0557,
"step": 6320
},
{
"epoch": 13.242677824267782,
"grad_norm": 0.25880181789398193,
"learning_rate": 6.671372034616132e-05,
"loss": 0.0492,
"step": 6330
},
{
"epoch": 13.263598326359833,
"grad_norm": 0.17128583788871765,
"learning_rate": 6.660978946096933e-05,
"loss": 0.0545,
"step": 6340
},
{
"epoch": 13.284518828451883,
"grad_norm": 0.2767598330974579,
"learning_rate": 6.650577784596026e-05,
"loss": 0.0612,
"step": 6350
},
{
"epoch": 13.305439330543933,
"grad_norm": 0.17852754890918732,
"learning_rate": 6.640168600666967e-05,
"loss": 0.0475,
"step": 6360
},
{
"epoch": 13.326359832635983,
"grad_norm": 0.2585950195789337,
"learning_rate": 6.629751444902299e-05,
"loss": 0.0569,
"step": 6370
},
{
"epoch": 13.347280334728033,
"grad_norm": 0.27310749888420105,
"learning_rate": 6.619326367933312e-05,
"loss": 0.0578,
"step": 6380
},
{
"epoch": 13.368200836820083,
"grad_norm": 0.1731962114572525,
"learning_rate": 6.608893420429798e-05,
"loss": 0.0568,
"step": 6390
},
{
"epoch": 13.389121338912133,
"grad_norm": 0.196332648396492,
"learning_rate": 6.598452653099803e-05,
"loss": 0.0524,
"step": 6400
},
{
"epoch": 13.410041841004183,
"grad_norm": 0.21636244654655457,
"learning_rate": 6.588004116689375e-05,
"loss": 0.0596,
"step": 6410
},
{
"epoch": 13.430962343096235,
"grad_norm": 0.24683816730976105,
"learning_rate": 6.57754786198233e-05,
"loss": 0.0622,
"step": 6420
},
{
"epoch": 13.451882845188285,
"grad_norm": 0.19918185472488403,
"learning_rate": 6.567083939799992e-05,
"loss": 0.0574,
"step": 6430
},
{
"epoch": 13.472803347280335,
"grad_norm": 0.18632373213768005,
"learning_rate": 6.556612401000954e-05,
"loss": 0.0493,
"step": 6440
},
{
"epoch": 13.493723849372385,
"grad_norm": 0.19650404155254364,
"learning_rate": 6.54613329648083e-05,
"loss": 0.0529,
"step": 6450
},
{
"epoch": 13.514644351464435,
"grad_norm": 0.2306489199399948,
"learning_rate": 6.535646677172005e-05,
"loss": 0.0507,
"step": 6460
},
{
"epoch": 13.535564853556485,
"grad_norm": 0.1978807896375656,
"learning_rate": 6.52515259404339e-05,
"loss": 0.048,
"step": 6470
},
{
"epoch": 13.556485355648535,
"grad_norm": 0.2691389322280884,
"learning_rate": 6.514651098100167e-05,
"loss": 0.0549,
"step": 6480
},
{
"epoch": 13.577405857740585,
"grad_norm": 0.18920831382274628,
"learning_rate": 6.504142240383555e-05,
"loss": 0.0586,
"step": 6490
},
{
"epoch": 13.598326359832637,
"grad_norm": 0.19168440997600555,
"learning_rate": 6.493626071970549e-05,
"loss": 0.0603,
"step": 6500
},
{
"epoch": 13.619246861924687,
"grad_norm": 0.22604981064796448,
"learning_rate": 6.483102643973682e-05,
"loss": 0.0622,
"step": 6510
},
{
"epoch": 13.640167364016737,
"grad_norm": 0.21754129230976105,
"learning_rate": 6.472572007540764e-05,
"loss": 0.0566,
"step": 6520
},
{
"epoch": 13.661087866108787,
"grad_norm": 0.17625564336776733,
"learning_rate": 6.462034213854645e-05,
"loss": 0.0575,
"step": 6530
},
{
"epoch": 13.682008368200837,
"grad_norm": 0.19063302874565125,
"learning_rate": 6.451489314132962e-05,
"loss": 0.0604,
"step": 6540
},
{
"epoch": 13.702928870292887,
"grad_norm": 0.2231472134590149,
"learning_rate": 6.440937359627893e-05,
"loss": 0.0515,
"step": 6550
},
{
"epoch": 13.723849372384937,
"grad_norm": 0.18337441980838776,
"learning_rate": 6.430378401625894e-05,
"loss": 0.054,
"step": 6560
},
{
"epoch": 13.744769874476987,
"grad_norm": 0.19114044308662415,
"learning_rate": 6.419812491447472e-05,
"loss": 0.0581,
"step": 6570
},
{
"epoch": 13.765690376569038,
"grad_norm": 0.22809480130672455,
"learning_rate": 6.409239680446919e-05,
"loss": 0.0529,
"step": 6580
},
{
"epoch": 13.786610878661088,
"grad_norm": 0.20257806777954102,
"learning_rate": 6.398660020012072e-05,
"loss": 0.066,
"step": 6590
},
{
"epoch": 13.807531380753138,
"grad_norm": 0.1861211657524109,
"learning_rate": 6.38807356156405e-05,
"loss": 0.0557,
"step": 6600
},
{
"epoch": 13.828451882845188,
"grad_norm": 0.2235741913318634,
"learning_rate": 6.377480356557022e-05,
"loss": 0.0541,
"step": 6610
},
{
"epoch": 13.849372384937238,
"grad_norm": 0.20858436822891235,
"learning_rate": 6.366880456477942e-05,
"loss": 0.0592,
"step": 6620
},
{
"epoch": 13.870292887029288,
"grad_norm": 0.19966097176074982,
"learning_rate": 6.356273912846312e-05,
"loss": 0.054,
"step": 6630
},
{
"epoch": 13.891213389121338,
"grad_norm": 0.24596375226974487,
"learning_rate": 6.34566077721391e-05,
"loss": 0.0649,
"step": 6640
},
{
"epoch": 13.91213389121339,
"grad_norm": 0.1986909806728363,
"learning_rate": 6.335041101164569e-05,
"loss": 0.0597,
"step": 6650
},
{
"epoch": 13.93305439330544,
"grad_norm": 0.21726685762405396,
"learning_rate": 6.324414936313904e-05,
"loss": 0.0525,
"step": 6660
},
{
"epoch": 13.95397489539749,
"grad_norm": 0.17825187742710114,
"learning_rate": 6.313782334309066e-05,
"loss": 0.0602,
"step": 6670
},
{
"epoch": 13.97489539748954,
"grad_norm": 0.16731053590774536,
"learning_rate": 6.303143346828499e-05,
"loss": 0.0541,
"step": 6680
},
{
"epoch": 13.99581589958159,
"grad_norm": 0.20779544115066528,
"learning_rate": 6.292498025581674e-05,
"loss": 0.0623,
"step": 6690
},
{
"epoch": 14.01673640167364,
"grad_norm": 0.1886224001646042,
"learning_rate": 6.281846422308857e-05,
"loss": 0.0496,
"step": 6700
},
{
"epoch": 14.03765690376569,
"grad_norm": 0.17492704093456268,
"learning_rate": 6.271188588780839e-05,
"loss": 0.0552,
"step": 6710
},
{
"epoch": 14.05857740585774,
"grad_norm": 0.1669052243232727,
"learning_rate": 6.260524576798694e-05,
"loss": 0.0471,
"step": 6720
},
{
"epoch": 14.07949790794979,
"grad_norm": 0.2435608208179474,
"learning_rate": 6.249854438193528e-05,
"loss": 0.0575,
"step": 6730
},
{
"epoch": 14.100418410041842,
"grad_norm": 0.19360196590423584,
"learning_rate": 6.239178224826224e-05,
"loss": 0.0492,
"step": 6740
},
{
"epoch": 14.121338912133892,
"grad_norm": 0.18208938837051392,
"learning_rate": 6.228495988587188e-05,
"loss": 0.0627,
"step": 6750
},
{
"epoch": 14.142259414225942,
"grad_norm": 0.16620376706123352,
"learning_rate": 6.217807781396106e-05,
"loss": 0.0515,
"step": 6760
},
{
"epoch": 14.163179916317992,
"grad_norm": 0.17440137267112732,
"learning_rate": 6.207113655201676e-05,
"loss": 0.0449,
"step": 6770
},
{
"epoch": 14.184100418410042,
"grad_norm": 0.20068810880184174,
"learning_rate": 6.196413661981368e-05,
"loss": 0.0513,
"step": 6780
},
{
"epoch": 14.205020920502092,
"grad_norm": 0.30458423495292664,
"learning_rate": 6.185707853741175e-05,
"loss": 0.0526,
"step": 6790
},
{
"epoch": 14.225941422594142,
"grad_norm": 0.20665733516216278,
"learning_rate": 6.174996282515344e-05,
"loss": 0.0581,
"step": 6800
},
{
"epoch": 14.246861924686192,
"grad_norm": 0.19424675405025482,
"learning_rate": 6.164279000366131e-05,
"loss": 0.058,
"step": 6810
},
{
"epoch": 14.267782426778243,
"grad_norm": 0.2010163515806198,
"learning_rate": 6.153556059383561e-05,
"loss": 0.0549,
"step": 6820
},
{
"epoch": 14.288702928870293,
"grad_norm": 0.18270374834537506,
"learning_rate": 6.142827511685152e-05,
"loss": 0.0517,
"step": 6830
},
{
"epoch": 14.309623430962343,
"grad_norm": 0.16748015582561493,
"learning_rate": 6.132093409415678e-05,
"loss": 0.0457,
"step": 6840
},
{
"epoch": 14.330543933054393,
"grad_norm": 0.20862819254398346,
"learning_rate": 6.121353804746907e-05,
"loss": 0.0528,
"step": 6850
},
{
"epoch": 14.351464435146443,
"grad_norm": 0.22936491668224335,
"learning_rate": 6.110608749877352e-05,
"loss": 0.0473,
"step": 6860
},
{
"epoch": 14.372384937238493,
"grad_norm": 0.21870329976081848,
"learning_rate": 6.0998582970320205e-05,
"loss": 0.0545,
"step": 6870
},
{
"epoch": 14.393305439330543,
"grad_norm": 0.22260119020938873,
"learning_rate": 6.0891024984621506e-05,
"loss": 0.0604,
"step": 6880
},
{
"epoch": 14.414225941422593,
"grad_norm": 0.19479791820049286,
"learning_rate": 6.078341406444961e-05,
"loss": 0.0588,
"step": 6890
},
{
"epoch": 14.435146443514645,
"grad_norm": 0.29949843883514404,
"learning_rate": 6.067575073283405e-05,
"loss": 0.0584,
"step": 6900
},
{
"epoch": 14.456066945606695,
"grad_norm": 0.19155101478099823,
"learning_rate": 6.0568035513059073e-05,
"loss": 0.0517,
"step": 6910
},
{
"epoch": 14.476987447698745,
"grad_norm": 0.24398870766162872,
"learning_rate": 6.046026892866109e-05,
"loss": 0.0557,
"step": 6920
},
{
"epoch": 14.497907949790795,
"grad_norm": 0.2503119707107544,
"learning_rate": 6.0352451503426214e-05,
"loss": 0.0623,
"step": 6930
},
{
"epoch": 14.518828451882845,
"grad_norm": 0.1765798181295395,
"learning_rate": 6.024458376138762e-05,
"loss": 0.0506,
"step": 6940
},
{
"epoch": 14.539748953974895,
"grad_norm": 0.1602436751127243,
"learning_rate": 6.013666622682306e-05,
"loss": 0.0493,
"step": 6950
},
{
"epoch": 14.560669456066945,
"grad_norm": 0.22358030080795288,
"learning_rate": 6.002869942425231e-05,
"loss": 0.0582,
"step": 6960
},
{
"epoch": 14.581589958158997,
"grad_norm": 0.23106183111667633,
"learning_rate": 5.992068387843459e-05,
"loss": 0.0544,
"step": 6970
},
{
"epoch": 14.602510460251047,
"grad_norm": 0.19091829657554626,
"learning_rate": 5.981262011436603e-05,
"loss": 0.0479,
"step": 6980
},
{
"epoch": 14.623430962343097,
"grad_norm": 0.16238416731357574,
"learning_rate": 5.970450865727712e-05,
"loss": 0.0513,
"step": 6990
},
{
"epoch": 14.644351464435147,
"grad_norm": 0.22124448418617249,
"learning_rate": 5.9596350032630156e-05,
"loss": 0.0516,
"step": 7000
},
{
"epoch": 14.665271966527197,
"grad_norm": 0.18922406435012817,
"learning_rate": 5.9488144766116714e-05,
"loss": 0.0504,
"step": 7010
},
{
"epoch": 14.686192468619247,
"grad_norm": 0.19591659307479858,
"learning_rate": 5.9379893383655006e-05,
"loss": 0.0571,
"step": 7020
},
{
"epoch": 14.707112970711297,
"grad_norm": 0.2549462914466858,
"learning_rate": 5.927159641138744e-05,
"loss": 0.0511,
"step": 7030
},
{
"epoch": 14.728033472803347,
"grad_norm": 0.24339456856250763,
"learning_rate": 5.916325437567799e-05,
"loss": 0.053,
"step": 7040
},
{
"epoch": 14.748953974895397,
"grad_norm": 0.2489190697669983,
"learning_rate": 5.905486780310966e-05,
"loss": 0.0517,
"step": 7050
},
{
"epoch": 14.769874476987448,
"grad_norm": 0.2775043249130249,
"learning_rate": 5.8946437220481887e-05,
"loss": 0.0662,
"step": 7060
},
{
"epoch": 14.790794979079498,
"grad_norm": 0.21394887566566467,
"learning_rate": 5.883796315480805e-05,
"loss": 0.0603,
"step": 7070
},
{
"epoch": 14.811715481171548,
"grad_norm": 0.17485828697681427,
"learning_rate": 5.872944613331288e-05,
"loss": 0.0559,
"step": 7080
},
{
"epoch": 14.832635983263598,
"grad_norm": 0.24739952385425568,
"learning_rate": 5.862088668342986e-05,
"loss": 0.052,
"step": 7090
},
{
"epoch": 14.853556485355648,
"grad_norm": 0.15297842025756836,
"learning_rate": 5.8512285332798714e-05,
"loss": 0.057,
"step": 7100
},
{
"epoch": 14.874476987447698,
"grad_norm": 0.18535470962524414,
"learning_rate": 5.840364260926277e-05,
"loss": 0.0526,
"step": 7110
},
{
"epoch": 14.895397489539748,
"grad_norm": 0.19784200191497803,
"learning_rate": 5.8294959040866505e-05,
"loss": 0.0558,
"step": 7120
},
{
"epoch": 14.9163179916318,
"grad_norm": 0.1543385535478592,
"learning_rate": 5.818623515585292e-05,
"loss": 0.049,
"step": 7130
},
{
"epoch": 14.93723849372385,
"grad_norm": 0.17051587998867035,
"learning_rate": 5.8077471482660896e-05,
"loss": 0.0537,
"step": 7140
},
{
"epoch": 14.9581589958159,
"grad_norm": 0.19550904631614685,
"learning_rate": 5.796866854992276e-05,
"loss": 0.0595,
"step": 7150
},
{
"epoch": 14.97907949790795,
"grad_norm": 0.2906009256839752,
"learning_rate": 5.7859826886461676e-05,
"loss": 0.058,
"step": 7160
},
{
"epoch": 15.0,
"grad_norm": 1.7434508800506592,
"learning_rate": 5.775094702128899e-05,
"loss": 0.0554,
"step": 7170
},
{
"epoch": 15.02092050209205,
"grad_norm": 0.23117059469223022,
"learning_rate": 5.7642029483601746e-05,
"loss": 0.056,
"step": 7180
},
{
"epoch": 15.0418410041841,
"grad_norm": 0.24025872349739075,
"learning_rate": 5.753307480278012e-05,
"loss": 0.0631,
"step": 7190
},
{
"epoch": 15.06276150627615,
"grad_norm": 0.31165027618408203,
"learning_rate": 5.742408350838478e-05,
"loss": 0.0571,
"step": 7200
},
{
"epoch": 15.0836820083682,
"grad_norm": 0.2065475434064865,
"learning_rate": 5.7315056130154374e-05,
"loss": 0.0567,
"step": 7210
},
{
"epoch": 15.104602510460252,
"grad_norm": 0.23966439068317413,
"learning_rate": 5.720599319800292e-05,
"loss": 0.0504,
"step": 7220
},
{
"epoch": 15.125523012552302,
"grad_norm": 0.16557417809963226,
"learning_rate": 5.709689524201722e-05,
"loss": 0.0554,
"step": 7230
},
{
"epoch": 15.146443514644352,
"grad_norm": 0.24131985008716583,
"learning_rate": 5.698776279245437e-05,
"loss": 0.0564,
"step": 7240
},
{
"epoch": 15.167364016736402,
"grad_norm": 0.22979691624641418,
"learning_rate": 5.6878596379739036e-05,
"loss": 0.0493,
"step": 7250
},
{
"epoch": 15.188284518828452,
"grad_norm": 0.1828659474849701,
"learning_rate": 5.676939653446103e-05,
"loss": 0.0509,
"step": 7260
},
{
"epoch": 15.209205020920502,
"grad_norm": 0.1639285683631897,
"learning_rate": 5.666016378737261e-05,
"loss": 0.0582,
"step": 7270
},
{
"epoch": 15.230125523012552,
"grad_norm": 0.22438766062259674,
"learning_rate": 5.655089866938596e-05,
"loss": 0.0541,
"step": 7280
},
{
"epoch": 15.251046025104603,
"grad_norm": 0.22549496591091156,
"learning_rate": 5.6441601711570615e-05,
"loss": 0.0532,
"step": 7290
},
{
"epoch": 15.271966527196653,
"grad_norm": 0.22666563093662262,
"learning_rate": 5.633227344515085e-05,
"loss": 0.0462,
"step": 7300
},
{
"epoch": 15.292887029288703,
"grad_norm": 0.1765870302915573,
"learning_rate": 5.6222914401503116e-05,
"loss": 0.0539,
"step": 7310
},
{
"epoch": 15.313807531380753,
"grad_norm": 0.20818251371383667,
"learning_rate": 5.611352511215343e-05,
"loss": 0.0544,
"step": 7320
},
{
"epoch": 15.334728033472803,
"grad_norm": 0.19694848358631134,
"learning_rate": 5.600410610877488e-05,
"loss": 0.0495,
"step": 7330
},
{
"epoch": 15.355648535564853,
"grad_norm": 0.17688891291618347,
"learning_rate": 5.58946579231849e-05,
"loss": 0.0547,
"step": 7340
},
{
"epoch": 15.376569037656903,
"grad_norm": 0.21549320220947266,
"learning_rate": 5.578518108734279e-05,
"loss": 0.0517,
"step": 7350
},
{
"epoch": 15.397489539748953,
"grad_norm": 0.21116769313812256,
"learning_rate": 5.5675676133347096e-05,
"loss": 0.0526,
"step": 7360
},
{
"epoch": 15.418410041841003,
"grad_norm": 0.17821955680847168,
"learning_rate": 5.556614359343307e-05,
"loss": 0.0483,
"step": 7370
},
{
"epoch": 15.439330543933055,
"grad_norm": 0.2199217528104782,
"learning_rate": 5.545658399996999e-05,
"loss": 0.0478,
"step": 7380
},
{
"epoch": 15.460251046025105,
"grad_norm": 0.21454869210720062,
"learning_rate": 5.534699788545862e-05,
"loss": 0.053,
"step": 7390
},
{
"epoch": 15.481171548117155,
"grad_norm": 0.2748502492904663,
"learning_rate": 5.523738578252867e-05,
"loss": 0.0524,
"step": 7400
},
{
"epoch": 15.502092050209205,
"grad_norm": 0.2580304443836212,
"learning_rate": 5.512774822393614e-05,
"loss": 0.054,
"step": 7410
},
{
"epoch": 15.523012552301255,
"grad_norm": 0.22264724969863892,
"learning_rate": 5.5018085742560744e-05,
"loss": 0.0442,
"step": 7420
},
{
"epoch": 15.543933054393305,
"grad_norm": 0.20303569734096527,
"learning_rate": 5.4908398871403365e-05,
"loss": 0.0557,
"step": 7430
},
{
"epoch": 15.564853556485355,
"grad_norm": 0.44104623794555664,
"learning_rate": 5.4798688143583375e-05,
"loss": 0.0549,
"step": 7440
},
{
"epoch": 15.585774058577407,
"grad_norm": 0.1938384473323822,
"learning_rate": 5.468895409233615e-05,
"loss": 0.0549,
"step": 7450
},
{
"epoch": 15.606694560669457,
"grad_norm": 0.15259292721748352,
"learning_rate": 5.4579197251010414e-05,
"loss": 0.0513,
"step": 7460
},
{
"epoch": 15.627615062761507,
"grad_norm": 0.15911541879177094,
"learning_rate": 5.446941815306563e-05,
"loss": 0.0434,
"step": 7470
},
{
"epoch": 15.648535564853557,
"grad_norm": 0.1852342039346695,
"learning_rate": 5.435961733206947e-05,
"loss": 0.0609,
"step": 7480
},
{
"epoch": 15.669456066945607,
"grad_norm": 0.25824370980262756,
"learning_rate": 5.424979532169516e-05,
"loss": 0.0506,
"step": 7490
},
{
"epoch": 15.690376569037657,
"grad_norm": 0.19190651178359985,
"learning_rate": 5.413995265571895e-05,
"loss": 0.054,
"step": 7500
},
{
"epoch": 15.711297071129707,
"grad_norm": 0.24761340022087097,
"learning_rate": 5.403008986801746e-05,
"loss": 0.0558,
"step": 7510
},
{
"epoch": 15.732217573221757,
"grad_norm": 0.21222639083862305,
"learning_rate": 5.3920207492565114e-05,
"loss": 0.0506,
"step": 7520
},
{
"epoch": 15.753138075313807,
"grad_norm": 0.18009130656719208,
"learning_rate": 5.381030606343154e-05,
"loss": 0.0531,
"step": 7530
},
{
"epoch": 15.774058577405858,
"grad_norm": 0.2761033773422241,
"learning_rate": 5.370038611477894e-05,
"loss": 0.0516,
"step": 7540
},
{
"epoch": 15.794979079497908,
"grad_norm": 0.194721981883049,
"learning_rate": 5.359044818085963e-05,
"loss": 0.057,
"step": 7550
},
{
"epoch": 15.815899581589958,
"grad_norm": 0.20458896458148956,
"learning_rate": 5.3480492796013214e-05,
"loss": 0.0477,
"step": 7560
},
{
"epoch": 15.836820083682008,
"grad_norm": 0.13182362914085388,
"learning_rate": 5.33705204946642e-05,
"loss": 0.0475,
"step": 7570
},
{
"epoch": 15.857740585774058,
"grad_norm": 0.14037500321865082,
"learning_rate": 5.326053181131927e-05,
"loss": 0.0521,
"step": 7580
},
{
"epoch": 15.878661087866108,
"grad_norm": 0.237847238779068,
"learning_rate": 5.3150527280564776e-05,
"loss": 0.0578,
"step": 7590
},
{
"epoch": 15.899581589958158,
"grad_norm": 0.24887144565582275,
"learning_rate": 5.3040507437064034e-05,
"loss": 0.0544,
"step": 7600
},
{
"epoch": 15.92050209205021,
"grad_norm": 0.22051165997982025,
"learning_rate": 5.293047281555482e-05,
"loss": 0.0545,
"step": 7610
},
{
"epoch": 15.94142259414226,
"grad_norm": 0.167110413312912,
"learning_rate": 5.2820423950846765e-05,
"loss": 0.047,
"step": 7620
},
{
"epoch": 15.96234309623431,
"grad_norm": 0.16554315388202667,
"learning_rate": 5.2710361377818696e-05,
"loss": 0.0513,
"step": 7630
},
{
"epoch": 15.98326359832636,
"grad_norm": 0.14443959295749664,
"learning_rate": 5.2600285631416026e-05,
"loss": 0.0498,
"step": 7640
},
{
"epoch": 16.00418410041841,
"grad_norm": 0.2642640471458435,
"learning_rate": 5.249019724664826e-05,
"loss": 0.048,
"step": 7650
},
{
"epoch": 16.02510460251046,
"grad_norm": 0.20373564958572388,
"learning_rate": 5.2380096758586315e-05,
"loss": 0.0602,
"step": 7660
},
{
"epoch": 16.04602510460251,
"grad_norm": 0.22573313117027283,
"learning_rate": 5.226998470235993e-05,
"loss": 0.0491,
"step": 7670
},
{
"epoch": 16.06694560669456,
"grad_norm": 0.2332305908203125,
"learning_rate": 5.215986161315507e-05,
"loss": 0.0527,
"step": 7680
},
{
"epoch": 16.08786610878661,
"grad_norm": 0.191607266664505,
"learning_rate": 5.20497280262113e-05,
"loss": 0.0467,
"step": 7690
},
{
"epoch": 16.10878661087866,
"grad_norm": 0.22203144431114197,
"learning_rate": 5.193958447681924e-05,
"loss": 0.051,
"step": 7700
},
{
"epoch": 16.12970711297071,
"grad_norm": 0.20125925540924072,
"learning_rate": 5.182943150031793e-05,
"loss": 0.0459,
"step": 7710
},
{
"epoch": 16.15062761506276,
"grad_norm": 0.2417091727256775,
"learning_rate": 5.1719269632092204e-05,
"loss": 0.0524,
"step": 7720
},
{
"epoch": 16.171548117154813,
"grad_norm": 0.20296020805835724,
"learning_rate": 5.160909940757015e-05,
"loss": 0.0496,
"step": 7730
},
{
"epoch": 16.192468619246863,
"grad_norm": 0.16926750540733337,
"learning_rate": 5.149892136222043e-05,
"loss": 0.0544,
"step": 7740
},
{
"epoch": 16.213389121338913,
"grad_norm": 0.1914985626935959,
"learning_rate": 5.1388736031549744e-05,
"loss": 0.0507,
"step": 7750
},
{
"epoch": 16.234309623430963,
"grad_norm": 0.2034175992012024,
"learning_rate": 5.127854395110021e-05,
"loss": 0.0459,
"step": 7760
},
{
"epoch": 16.255230125523013,
"grad_norm": 0.1641988456249237,
"learning_rate": 5.116834565644671e-05,
"loss": 0.0503,
"step": 7770
},
{
"epoch": 16.276150627615063,
"grad_norm": 0.17824122309684753,
"learning_rate": 5.10581416831944e-05,
"loss": 0.048,
"step": 7780
},
{
"epoch": 16.297071129707113,
"grad_norm": 0.15559978783130646,
"learning_rate": 5.094793256697593e-05,
"loss": 0.0543,
"step": 7790
},
{
"epoch": 16.317991631799163,
"grad_norm": 0.18097707629203796,
"learning_rate": 5.0837718843449075e-05,
"loss": 0.0522,
"step": 7800
},
{
"epoch": 16.338912133891213,
"grad_norm": 0.20327609777450562,
"learning_rate": 5.07275010482939e-05,
"loss": 0.0508,
"step": 7810
},
{
"epoch": 16.359832635983263,
"grad_norm": 0.15760394930839539,
"learning_rate": 5.061727971721032e-05,
"loss": 0.0426,
"step": 7820
},
{
"epoch": 16.380753138075313,
"grad_norm": 0.2155444324016571,
"learning_rate": 5.050705538591538e-05,
"loss": 0.0556,
"step": 7830
},
{
"epoch": 16.401673640167363,
"grad_norm": 0.1983986645936966,
"learning_rate": 5.0396828590140785e-05,
"loss": 0.0567,
"step": 7840
},
{
"epoch": 16.422594142259413,
"grad_norm": 0.19888253509998322,
"learning_rate": 5.0286599865630157e-05,
"loss": 0.0583,
"step": 7850
},
{
"epoch": 16.443514644351463,
"grad_norm": 0.14076486229896545,
"learning_rate": 5.017636974813649e-05,
"loss": 0.051,
"step": 7860
},
{
"epoch": 16.464435146443513,
"grad_norm": 0.22674958407878876,
"learning_rate": 5.006613877341959e-05,
"loss": 0.0479,
"step": 7870
},
{
"epoch": 16.485355648535563,
"grad_norm": 0.23650920391082764,
"learning_rate": 4.99559074772434e-05,
"loss": 0.0521,
"step": 7880
},
{
"epoch": 16.506276150627617,
"grad_norm": 0.2411746233701706,
"learning_rate": 4.9845676395373455e-05,
"loss": 0.0456,
"step": 7890
},
{
"epoch": 16.527196652719667,
"grad_norm": 0.2268081158399582,
"learning_rate": 4.9735446063574184e-05,
"loss": 0.0498,
"step": 7900
},
{
"epoch": 16.548117154811717,
"grad_norm": 0.1393486112356186,
"learning_rate": 4.962521701760645e-05,
"loss": 0.05,
"step": 7910
},
{
"epoch": 16.569037656903767,
"grad_norm": 0.16085295379161835,
"learning_rate": 4.951498979322482e-05,
"loss": 0.0528,
"step": 7920
},
{
"epoch": 16.589958158995817,
"grad_norm": 0.17427772283554077,
"learning_rate": 4.9404764926174996e-05,
"loss": 0.0558,
"step": 7930
},
{
"epoch": 16.610878661087867,
"grad_norm": 0.12604235112667084,
"learning_rate": 4.929454295219127e-05,
"loss": 0.0515,
"step": 7940
},
{
"epoch": 16.631799163179917,
"grad_norm": 0.19448940455913544,
"learning_rate": 4.9184324406993844e-05,
"loss": 0.0567,
"step": 7950
},
{
"epoch": 16.652719665271967,
"grad_norm": 0.24758629500865936,
"learning_rate": 4.907410982628623e-05,
"loss": 0.0572,
"step": 7960
},
{
"epoch": 16.673640167364017,
"grad_norm": 0.15390925109386444,
"learning_rate": 4.896389974575273e-05,
"loss": 0.0576,
"step": 7970
},
{
"epoch": 16.694560669456067,
"grad_norm": 0.18204046785831451,
"learning_rate": 4.885369470105571e-05,
"loss": 0.0569,
"step": 7980
},
{
"epoch": 16.715481171548117,
"grad_norm": 0.2037144899368286,
"learning_rate": 4.874349522783313e-05,
"loss": 0.0535,
"step": 7990
},
{
"epoch": 16.736401673640167,
"grad_norm": 0.20136842131614685,
"learning_rate": 4.863330186169581e-05,
"loss": 0.0605,
"step": 8000
},
{
"epoch": 16.757322175732217,
"grad_norm": 0.21489471197128296,
"learning_rate": 4.8523115138224885e-05,
"loss": 0.0516,
"step": 8010
},
{
"epoch": 16.778242677824267,
"grad_norm": 0.23964069783687592,
"learning_rate": 4.841293559296928e-05,
"loss": 0.0555,
"step": 8020
},
{
"epoch": 16.799163179916317,
"grad_norm": 0.21049214899539948,
"learning_rate": 4.830276376144295e-05,
"loss": 0.0567,
"step": 8030
},
{
"epoch": 16.820083682008367,
"grad_norm": 0.23198960721492767,
"learning_rate": 4.819260017912237e-05,
"loss": 0.0513,
"step": 8040
},
{
"epoch": 16.84100418410042,
"grad_norm": 0.2600829601287842,
"learning_rate": 4.808244538144396e-05,
"loss": 0.0511,
"step": 8050
},
{
"epoch": 16.86192468619247,
"grad_norm": 0.1669810265302658,
"learning_rate": 4.797229990380142e-05,
"loss": 0.0464,
"step": 8060
},
{
"epoch": 16.88284518828452,
"grad_norm": 0.1619091033935547,
"learning_rate": 4.786216428154317e-05,
"loss": 0.0509,
"step": 8070
},
{
"epoch": 16.90376569037657,
"grad_norm": 0.22189967334270477,
"learning_rate": 4.7752039049969685e-05,
"loss": 0.0536,
"step": 8080
},
{
"epoch": 16.92468619246862,
"grad_norm": 0.1866067796945572,
"learning_rate": 4.7641924744330956e-05,
"loss": 0.0437,
"step": 8090
},
{
"epoch": 16.94560669456067,
"grad_norm": 0.24444331228733063,
"learning_rate": 4.7531821899823925e-05,
"loss": 0.0506,
"step": 8100
},
{
"epoch": 16.96652719665272,
"grad_norm": 0.2442953884601593,
"learning_rate": 4.742173105158973e-05,
"loss": 0.0512,
"step": 8110
},
{
"epoch": 16.98744769874477,
"grad_norm": 0.2353048324584961,
"learning_rate": 4.731165273471129e-05,
"loss": 0.0481,
"step": 8120
},
{
"epoch": 17.00836820083682,
"grad_norm": 0.21988806128501892,
"learning_rate": 4.720158748421057e-05,
"loss": 0.0495,
"step": 8130
},
{
"epoch": 17.02928870292887,
"grad_norm": 0.26627087593078613,
"learning_rate": 4.709153583504602e-05,
"loss": 0.0517,
"step": 8140
},
{
"epoch": 17.05020920502092,
"grad_norm": 0.1413852423429489,
"learning_rate": 4.6981498322110027e-05,
"loss": 0.0395,
"step": 8150
},
{
"epoch": 17.07112970711297,
"grad_norm": 0.2996743619441986,
"learning_rate": 4.6871475480226256e-05,
"loss": 0.0568,
"step": 8160
},
{
"epoch": 17.09205020920502,
"grad_norm": 0.2565760016441345,
"learning_rate": 4.6761467844147004e-05,
"loss": 0.055,
"step": 8170
},
{
"epoch": 17.11297071129707,
"grad_norm": 0.23399613797664642,
"learning_rate": 4.665147594855076e-05,
"loss": 0.0467,
"step": 8180
},
{
"epoch": 17.13389121338912,
"grad_norm": 0.22994205355644226,
"learning_rate": 4.654150032803943e-05,
"loss": 0.0548,
"step": 8190
},
{
"epoch": 17.15481171548117,
"grad_norm": 0.24214763939380646,
"learning_rate": 4.643154151713588e-05,
"loss": 0.0531,
"step": 8200
},
{
"epoch": 17.175732217573223,
"grad_norm": 0.2184325009584427,
"learning_rate": 4.6321600050281225e-05,
"loss": 0.0543,
"step": 8210
},
{
"epoch": 17.196652719665273,
"grad_norm": 0.20963142812252045,
"learning_rate": 4.6211676461832264e-05,
"loss": 0.0468,
"step": 8220
},
{
"epoch": 17.217573221757323,
"grad_norm": 0.25760477781295776,
"learning_rate": 4.610177128605899e-05,
"loss": 0.056,
"step": 8230
},
{
"epoch": 17.238493723849373,
"grad_norm": 0.22882136702537537,
"learning_rate": 4.599188505714184e-05,
"loss": 0.0531,
"step": 8240
},
{
"epoch": 17.259414225941423,
"grad_norm": 0.16614781320095062,
"learning_rate": 4.588201830916912e-05,
"loss": 0.0514,
"step": 8250
},
{
"epoch": 17.280334728033473,
"grad_norm": 0.15788774192333221,
"learning_rate": 4.577217157613456e-05,
"loss": 0.0499,
"step": 8260
},
{
"epoch": 17.301255230125523,
"grad_norm": 0.17240086197853088,
"learning_rate": 4.566234539193452e-05,
"loss": 0.0485,
"step": 8270
},
{
"epoch": 17.322175732217573,
"grad_norm": 0.18178971111774445,
"learning_rate": 4.555254029036555e-05,
"loss": 0.0554,
"step": 8280
},
{
"epoch": 17.343096234309623,
"grad_norm": 0.22576147317886353,
"learning_rate": 4.544275680512165e-05,
"loss": 0.0546,
"step": 8290
},
{
"epoch": 17.364016736401673,
"grad_norm": 0.2796344459056854,
"learning_rate": 4.5332995469791836e-05,
"loss": 0.0481,
"step": 8300
},
{
"epoch": 17.384937238493723,
"grad_norm": 0.2606862485408783,
"learning_rate": 4.522325681785744e-05,
"loss": 0.0483,
"step": 8310
},
{
"epoch": 17.405857740585773,
"grad_norm": 0.18557748198509216,
"learning_rate": 4.511354138268952e-05,
"loss": 0.0472,
"step": 8320
},
{
"epoch": 17.426778242677823,
"grad_norm": 0.17844267189502716,
"learning_rate": 4.50038496975463e-05,
"loss": 0.0542,
"step": 8330
},
{
"epoch": 17.447698744769873,
"grad_norm": 0.22394883632659912,
"learning_rate": 4.489418229557063e-05,
"loss": 0.0492,
"step": 8340
},
{
"epoch": 17.468619246861923,
"grad_norm": 0.20772208273410797,
"learning_rate": 4.478453970978722e-05,
"loss": 0.0517,
"step": 8350
},
{
"epoch": 17.489539748953973,
"grad_norm": 0.21835976839065552,
"learning_rate": 4.4674922473100286e-05,
"loss": 0.0493,
"step": 8360
},
{
"epoch": 17.510460251046027,
"grad_norm": 0.20974472165107727,
"learning_rate": 4.4565331118290756e-05,
"loss": 0.0472,
"step": 8370
},
{
"epoch": 17.531380753138077,
"grad_norm": 0.24277760088443756,
"learning_rate": 4.4455766178013775e-05,
"loss": 0.0454,
"step": 8380
},
{
"epoch": 17.552301255230127,
"grad_norm": 0.23588554561138153,
"learning_rate": 4.434622818479615e-05,
"loss": 0.0456,
"step": 8390
},
{
"epoch": 17.573221757322177,
"grad_norm": 0.21761219203472137,
"learning_rate": 4.4236717671033646e-05,
"loss": 0.0524,
"step": 8400
},
{
"epoch": 17.594142259414227,
"grad_norm": 0.25820693373680115,
"learning_rate": 4.412723516898853e-05,
"loss": 0.0547,
"step": 8410
},
{
"epoch": 17.615062761506277,
"grad_norm": 0.21615050733089447,
"learning_rate": 4.40177812107869e-05,
"loss": 0.0465,
"step": 8420
},
{
"epoch": 17.635983263598327,
"grad_norm": 0.16582241654396057,
"learning_rate": 4.390835632841606e-05,
"loss": 0.0492,
"step": 8430
},
{
"epoch": 17.656903765690377,
"grad_norm": 0.17844611406326294,
"learning_rate": 4.3798961053722115e-05,
"loss": 0.0456,
"step": 8440
},
{
"epoch": 17.677824267782427,
"grad_norm": 0.1292414367198944,
"learning_rate": 4.368959591840718e-05,
"loss": 0.0434,
"step": 8450
},
{
"epoch": 17.698744769874477,
"grad_norm": 0.21596643328666687,
"learning_rate": 4.3580261454026865e-05,
"loss": 0.0507,
"step": 8460
},
{
"epoch": 17.719665271966527,
"grad_norm": 0.27433592081069946,
"learning_rate": 4.3470958191987786e-05,
"loss": 0.0484,
"step": 8470
},
{
"epoch": 17.740585774058577,
"grad_norm": 0.1953336000442505,
"learning_rate": 4.336168666354484e-05,
"loss": 0.0484,
"step": 8480
},
{
"epoch": 17.761506276150627,
"grad_norm": 0.16921213269233704,
"learning_rate": 4.325244739979873e-05,
"loss": 0.045,
"step": 8490
},
{
"epoch": 17.782426778242677,
"grad_norm": 0.14465534687042236,
"learning_rate": 4.314324093169332e-05,
"loss": 0.0534,
"step": 8500
},
{
"epoch": 17.803347280334727,
"grad_norm": 0.16474668681621552,
"learning_rate": 4.303406779001302e-05,
"loss": 0.0514,
"step": 8510
},
{
"epoch": 17.824267782426777,
"grad_norm": 0.1937585026025772,
"learning_rate": 4.292492850538038e-05,
"loss": 0.048,
"step": 8520
},
{
"epoch": 17.84518828451883,
"grad_norm": 0.1891181617975235,
"learning_rate": 4.28158236082533e-05,
"loss": 0.0527,
"step": 8530
},
{
"epoch": 17.86610878661088,
"grad_norm": 0.19222994148731232,
"learning_rate": 4.270675362892256e-05,
"loss": 0.0514,
"step": 8540
},
{
"epoch": 17.88702928870293,
"grad_norm": 0.17729121446609497,
"learning_rate": 4.2597719097509246e-05,
"loss": 0.0473,
"step": 8550
},
{
"epoch": 17.90794979079498,
"grad_norm": 0.18024882674217224,
"learning_rate": 4.2488720543962146e-05,
"loss": 0.0504,
"step": 8560
},
{
"epoch": 17.92887029288703,
"grad_norm": 0.19688746333122253,
"learning_rate": 4.23797584980552e-05,
"loss": 0.0478,
"step": 8570
},
{
"epoch": 17.94979079497908,
"grad_norm": 0.2507992088794708,
"learning_rate": 4.227083348938486e-05,
"loss": 0.0454,
"step": 8580
},
{
"epoch": 17.97071129707113,
"grad_norm": 0.19047075510025024,
"learning_rate": 4.2161946047367586e-05,
"loss": 0.0489,
"step": 8590
},
{
"epoch": 17.99163179916318,
"grad_norm": 0.17170806229114532,
"learning_rate": 4.2053096701237294e-05,
"loss": 0.0455,
"step": 8600
},
{
"epoch": 18.01255230125523,
"grad_norm": 0.21080999076366425,
"learning_rate": 4.1944285980042656e-05,
"loss": 0.0735,
"step": 8610
},
{
"epoch": 18.03347280334728,
"grad_norm": 0.22191490232944489,
"learning_rate": 4.183551441264469e-05,
"loss": 0.0493,
"step": 8620
},
{
"epoch": 18.05439330543933,
"grad_norm": 0.1833556592464447,
"learning_rate": 4.172678252771408e-05,
"loss": 0.0518,
"step": 8630
},
{
"epoch": 18.07531380753138,
"grad_norm": 0.1750289648771286,
"learning_rate": 4.16180908537286e-05,
"loss": 0.0508,
"step": 8640
},
{
"epoch": 18.09623430962343,
"grad_norm": 0.24376438558101654,
"learning_rate": 4.150943991897065e-05,
"loss": 0.0492,
"step": 8650
},
{
"epoch": 18.11715481171548,
"grad_norm": 0.19261358678340912,
"learning_rate": 4.1400830251524605e-05,
"loss": 0.0443,
"step": 8660
},
{
"epoch": 18.13807531380753,
"grad_norm": 0.17163701355457306,
"learning_rate": 4.1292262379274215e-05,
"loss": 0.0498,
"step": 8670
},
{
"epoch": 18.15899581589958,
"grad_norm": 0.20435048639774323,
"learning_rate": 4.118373682990016e-05,
"loss": 0.0469,
"step": 8680
},
{
"epoch": 18.179916317991633,
"grad_norm": 0.21016691625118256,
"learning_rate": 4.107525413087737e-05,
"loss": 0.0462,
"step": 8690
},
{
"epoch": 18.200836820083683,
"grad_norm": 0.21283622086048126,
"learning_rate": 4.096681480947252e-05,
"loss": 0.0533,
"step": 8700
},
{
"epoch": 18.221757322175733,
"grad_norm": 0.17881537973880768,
"learning_rate": 4.085841939274146e-05,
"loss": 0.0456,
"step": 8710
},
{
"epoch": 18.242677824267783,
"grad_norm": 0.1933610439300537,
"learning_rate": 4.075006840752662e-05,
"loss": 0.0488,
"step": 8720
},
{
"epoch": 18.263598326359833,
"grad_norm": 0.18333800137043,
"learning_rate": 4.0641762380454515e-05,
"loss": 0.043,
"step": 8730
},
{
"epoch": 18.284518828451883,
"grad_norm": 0.20655131340026855,
"learning_rate": 4.0533501837933134e-05,
"loss": 0.0533,
"step": 8740
},
{
"epoch": 18.305439330543933,
"grad_norm": 0.17780126631259918,
"learning_rate": 4.042528730614936e-05,
"loss": 0.0419,
"step": 8750
},
{
"epoch": 18.326359832635983,
"grad_norm": 0.3027488589286804,
"learning_rate": 4.0317119311066486e-05,
"loss": 0.0538,
"step": 8760
},
{
"epoch": 18.347280334728033,
"grad_norm": 0.19167320430278778,
"learning_rate": 4.02089983784216e-05,
"loss": 0.0499,
"step": 8770
},
{
"epoch": 18.368200836820083,
"grad_norm": 0.22469434142112732,
"learning_rate": 4.010092503372309e-05,
"loss": 0.0507,
"step": 8780
},
{
"epoch": 18.389121338912133,
"grad_norm": 0.19235819578170776,
"learning_rate": 3.999289980224797e-05,
"loss": 0.0463,
"step": 8790
},
{
"epoch": 18.410041841004183,
"grad_norm": 0.2260487973690033,
"learning_rate": 3.9884923209039455e-05,
"loss": 0.0528,
"step": 8800
},
{
"epoch": 18.430962343096233,
"grad_norm": 0.18136419355869293,
"learning_rate": 3.977699577890439e-05,
"loss": 0.0533,
"step": 8810
},
{
"epoch": 18.451882845188283,
"grad_norm": 0.17911958694458008,
"learning_rate": 3.96691180364106e-05,
"loss": 0.0545,
"step": 8820
},
{
"epoch": 18.472803347280333,
"grad_norm": 0.15562835335731506,
"learning_rate": 3.956129050588446e-05,
"loss": 0.0462,
"step": 8830
},
{
"epoch": 18.493723849372383,
"grad_norm": 0.18491879105567932,
"learning_rate": 3.9453513711408275e-05,
"loss": 0.0524,
"step": 8840
},
{
"epoch": 18.514644351464437,
"grad_norm": 0.20958520472049713,
"learning_rate": 3.934578817681774e-05,
"loss": 0.0524,
"step": 8850
},
{
"epoch": 18.535564853556487,
"grad_norm": 0.16132481396198273,
"learning_rate": 3.9238114425699465e-05,
"loss": 0.0457,
"step": 8860
},
{
"epoch": 18.556485355648537,
"grad_norm": 0.18827597796916962,
"learning_rate": 3.91304929813883e-05,
"loss": 0.0545,
"step": 8870
},
{
"epoch": 18.577405857740587,
"grad_norm": 0.24619817733764648,
"learning_rate": 3.902292436696489e-05,
"loss": 0.0514,
"step": 8880
},
{
"epoch": 18.598326359832637,
"grad_norm": 0.17363670468330383,
"learning_rate": 3.891540910525316e-05,
"loss": 0.0491,
"step": 8890
},
{
"epoch": 18.619246861924687,
"grad_norm": 0.1485545039176941,
"learning_rate": 3.8807947718817624e-05,
"loss": 0.0446,
"step": 8900
},
{
"epoch": 18.640167364016737,
"grad_norm": 0.19468659162521362,
"learning_rate": 3.870054072996103e-05,
"loss": 0.0496,
"step": 8910
},
{
"epoch": 18.661087866108787,
"grad_norm": 0.14619556069374084,
"learning_rate": 3.859318866072168e-05,
"loss": 0.0539,
"step": 8920
},
{
"epoch": 18.682008368200837,
"grad_norm": 0.16320869326591492,
"learning_rate": 3.8485892032870965e-05,
"loss": 0.0436,
"step": 8930
},
{
"epoch": 18.702928870292887,
"grad_norm": 0.19678333401679993,
"learning_rate": 3.83786513679108e-05,
"loss": 0.0452,
"step": 8940
},
{
"epoch": 18.723849372384937,
"grad_norm": 0.20504184067249298,
"learning_rate": 3.8271467187071134e-05,
"loss": 0.0458,
"step": 8950
},
{
"epoch": 18.744769874476987,
"grad_norm": 0.1663840264081955,
"learning_rate": 3.816434001130732e-05,
"loss": 0.0468,
"step": 8960
},
{
"epoch": 18.765690376569037,
"grad_norm": 0.1819252073764801,
"learning_rate": 3.8057270361297706e-05,
"loss": 0.0527,
"step": 8970
},
{
"epoch": 18.786610878661087,
"grad_norm": 0.2027643322944641,
"learning_rate": 3.7950258757440985e-05,
"loss": 0.0511,
"step": 8980
},
{
"epoch": 18.807531380753137,
"grad_norm": 0.1975032091140747,
"learning_rate": 3.78433057198538e-05,
"loss": 0.041,
"step": 8990
},
{
"epoch": 18.828451882845187,
"grad_norm": 0.2793610990047455,
"learning_rate": 3.773641176836807e-05,
"loss": 0.0514,
"step": 9000
},
{
"epoch": 18.84937238493724,
"grad_norm": 0.19008958339691162,
"learning_rate": 3.7629577422528555e-05,
"loss": 0.0518,
"step": 9010
},
{
"epoch": 18.87029288702929,
"grad_norm": 0.23253273963928223,
"learning_rate": 3.7522803201590325e-05,
"loss": 0.048,
"step": 9020
},
{
"epoch": 18.89121338912134,
"grad_norm": 0.1523078978061676,
"learning_rate": 3.741608962451621e-05,
"loss": 0.0458,
"step": 9030
},
{
"epoch": 18.91213389121339,
"grad_norm": 0.24275828897953033,
"learning_rate": 3.730943720997427e-05,
"loss": 0.0466,
"step": 9040
},
{
"epoch": 18.93305439330544,
"grad_norm": 0.19571851193904877,
"learning_rate": 3.720284647633532e-05,
"loss": 0.0446,
"step": 9050
},
{
"epoch": 18.95397489539749,
"grad_norm": 0.22840335965156555,
"learning_rate": 3.7096317941670365e-05,
"loss": 0.0438,
"step": 9060
},
{
"epoch": 18.97489539748954,
"grad_norm": 0.17244522273540497,
"learning_rate": 3.698985212374814e-05,
"loss": 0.0454,
"step": 9070
},
{
"epoch": 18.99581589958159,
"grad_norm": 0.1521451771259308,
"learning_rate": 3.6883449540032477e-05,
"loss": 0.0436,
"step": 9080
},
{
"epoch": 19.01673640167364,
"grad_norm": 0.2062705010175705,
"learning_rate": 3.6777110707679905e-05,
"loss": 0.0511,
"step": 9090
},
{
"epoch": 19.03765690376569,
"grad_norm": 0.20016764104366302,
"learning_rate": 3.667083614353715e-05,
"loss": 0.0438,
"step": 9100
},
{
"epoch": 19.05857740585774,
"grad_norm": 0.15178647637367249,
"learning_rate": 3.6564626364138465e-05,
"loss": 0.045,
"step": 9110
},
{
"epoch": 19.07949790794979,
"grad_norm": 0.19940511882305145,
"learning_rate": 3.645848188570331e-05,
"loss": 0.0492,
"step": 9120
},
{
"epoch": 19.10041841004184,
"grad_norm": 0.3522074818611145,
"learning_rate": 3.635240322413374e-05,
"loss": 0.0538,
"step": 9130
},
{
"epoch": 19.12133891213389,
"grad_norm": 0.28608760237693787,
"learning_rate": 3.624639089501187e-05,
"loss": 0.0447,
"step": 9140
},
{
"epoch": 19.14225941422594,
"grad_norm": 0.2104145586490631,
"learning_rate": 3.614044541359749e-05,
"loss": 0.0428,
"step": 9150
},
{
"epoch": 19.16317991631799,
"grad_norm": 0.25791677832603455,
"learning_rate": 3.603456729482541e-05,
"loss": 0.0462,
"step": 9160
},
{
"epoch": 19.184100418410043,
"grad_norm": 0.1877574324607849,
"learning_rate": 3.5928757053303055e-05,
"loss": 0.0548,
"step": 9170
},
{
"epoch": 19.205020920502093,
"grad_norm": 0.1788347214460373,
"learning_rate": 3.5823015203308e-05,
"loss": 0.0487,
"step": 9180
},
{
"epoch": 19.225941422594143,
"grad_norm": 0.21949069201946259,
"learning_rate": 3.57173422587853e-05,
"loss": 0.0529,
"step": 9190
},
{
"epoch": 19.246861924686193,
"grad_norm": 0.15112219750881195,
"learning_rate": 3.561173873334522e-05,
"loss": 0.0469,
"step": 9200
},
{
"epoch": 19.267782426778243,
"grad_norm": 0.18177688121795654,
"learning_rate": 3.550620514026056e-05,
"loss": 0.0446,
"step": 9210
},
{
"epoch": 19.288702928870293,
"grad_norm": 0.14711672067642212,
"learning_rate": 3.54007419924642e-05,
"loss": 0.0482,
"step": 9220
},
{
"epoch": 19.309623430962343,
"grad_norm": 0.20293840765953064,
"learning_rate": 3.52953498025467e-05,
"loss": 0.0491,
"step": 9230
},
{
"epoch": 19.330543933054393,
"grad_norm": 0.1724160760641098,
"learning_rate": 3.519002908275368e-05,
"loss": 0.0495,
"step": 9240
},
{
"epoch": 19.351464435146443,
"grad_norm": 0.1835453063249588,
"learning_rate": 3.508478034498339e-05,
"loss": 0.0471,
"step": 9250
},
{
"epoch": 19.372384937238493,
"grad_norm": 0.16504423320293427,
"learning_rate": 3.497960410078427e-05,
"loss": 0.0489,
"step": 9260
},
{
"epoch": 19.393305439330543,
"grad_norm": 0.19601042568683624,
"learning_rate": 3.487450086135236e-05,
"loss": 0.0494,
"step": 9270
},
{
"epoch": 19.414225941422593,
"grad_norm": 0.19500787556171417,
"learning_rate": 3.476947113752891e-05,
"loss": 0.0475,
"step": 9280
},
{
"epoch": 19.435146443514643,
"grad_norm": 0.15155597031116486,
"learning_rate": 3.4664515439797823e-05,
"loss": 0.0417,
"step": 9290
},
{
"epoch": 19.456066945606693,
"grad_norm": 0.23204268515110016,
"learning_rate": 3.45596342782832e-05,
"loss": 0.0466,
"step": 9300
},
{
"epoch": 19.476987447698743,
"grad_norm": 0.14878489077091217,
"learning_rate": 3.4454828162746936e-05,
"loss": 0.0441,
"step": 9310
},
{
"epoch": 19.497907949790793,
"grad_norm": 0.19891710579395294,
"learning_rate": 3.435009760258608e-05,
"loss": 0.0531,
"step": 9320
},
{
"epoch": 19.518828451882847,
"grad_norm": 0.21309182047843933,
"learning_rate": 3.424544310683057e-05,
"loss": 0.053,
"step": 9330
},
{
"epoch": 19.539748953974897,
"grad_norm": 0.15616843104362488,
"learning_rate": 3.41408651841405e-05,
"loss": 0.0463,
"step": 9340
},
{
"epoch": 19.560669456066947,
"grad_norm": 0.19337613880634308,
"learning_rate": 3.403636434280388e-05,
"loss": 0.05,
"step": 9350
},
{
"epoch": 19.581589958158997,
"grad_norm": 0.19758610427379608,
"learning_rate": 3.393194109073411e-05,
"loss": 0.0533,
"step": 9360
},
{
"epoch": 19.602510460251047,
"grad_norm": 0.21965035796165466,
"learning_rate": 3.3827595935467376e-05,
"loss": 0.0431,
"step": 9370
},
{
"epoch": 19.623430962343097,
"grad_norm": 0.19240938127040863,
"learning_rate": 3.3723329384160344e-05,
"loss": 0.0509,
"step": 9380
},
{
"epoch": 19.644351464435147,
"grad_norm": 0.2612786889076233,
"learning_rate": 3.3619141943587646e-05,
"loss": 0.0507,
"step": 9390
},
{
"epoch": 19.665271966527197,
"grad_norm": 0.1438675969839096,
"learning_rate": 3.351503412013935e-05,
"loss": 0.049,
"step": 9400
},
{
"epoch": 19.686192468619247,
"grad_norm": 0.17203199863433838,
"learning_rate": 3.341100641981863e-05,
"loss": 0.0504,
"step": 9410
},
{
"epoch": 19.707112970711297,
"grad_norm": 0.20994871854782104,
"learning_rate": 3.330705934823919e-05,
"loss": 0.0441,
"step": 9420
},
{
"epoch": 19.728033472803347,
"grad_norm": 0.18428127467632294,
"learning_rate": 3.3203193410622804e-05,
"loss": 0.0455,
"step": 9430
},
{
"epoch": 19.748953974895397,
"grad_norm": 0.22371669113636017,
"learning_rate": 3.309940911179701e-05,
"loss": 0.0475,
"step": 9440
},
{
"epoch": 19.769874476987447,
"grad_norm": 0.1059647798538208,
"learning_rate": 3.2995706956192465e-05,
"loss": 0.047,
"step": 9450
},
{
"epoch": 19.790794979079497,
"grad_norm": 0.18892386555671692,
"learning_rate": 3.289208744784059e-05,
"loss": 0.0565,
"step": 9460
},
{
"epoch": 19.811715481171547,
"grad_norm": 0.2226765900850296,
"learning_rate": 3.2788551090371164e-05,
"loss": 0.0428,
"step": 9470
},
{
"epoch": 19.8326359832636,
"grad_norm": 0.23749548196792603,
"learning_rate": 3.268509838700974e-05,
"loss": 0.0531,
"step": 9480
},
{
"epoch": 19.85355648535565,
"grad_norm": 0.17291848361492157,
"learning_rate": 3.258172984057535e-05,
"loss": 0.0521,
"step": 9490
},
{
"epoch": 19.8744769874477,
"grad_norm": 0.23719993233680725,
"learning_rate": 3.247844595347798e-05,
"loss": 0.0465,
"step": 9500
},
{
"epoch": 19.89539748953975,
"grad_norm": 0.19163990020751953,
"learning_rate": 3.2375247227716077e-05,
"loss": 0.0527,
"step": 9510
},
{
"epoch": 19.9163179916318,
"grad_norm": 0.20488514006137848,
"learning_rate": 3.2272134164874264e-05,
"loss": 0.053,
"step": 9520
},
{
"epoch": 19.93723849372385,
"grad_norm": 0.1729532778263092,
"learning_rate": 3.216910726612073e-05,
"loss": 0.0451,
"step": 9530
},
{
"epoch": 19.9581589958159,
"grad_norm": 0.20507602393627167,
"learning_rate": 3.2066167032204956e-05,
"loss": 0.0494,
"step": 9540
},
{
"epoch": 19.97907949790795,
"grad_norm": 0.20738042891025543,
"learning_rate": 3.196331396345512e-05,
"loss": 0.0478,
"step": 9550
},
{
"epoch": 20.0,
"grad_norm": 0.16895107924938202,
"learning_rate": 3.186054855977577e-05,
"loss": 0.0479,
"step": 9560
},
{
"epoch": 20.02092050209205,
"grad_norm": 0.1991293877363205,
"learning_rate": 3.175787132064542e-05,
"loss": 0.0488,
"step": 9570
},
{
"epoch": 20.0418410041841,
"grad_norm": 0.21318255364894867,
"learning_rate": 3.165528274511397e-05,
"loss": 0.0533,
"step": 9580
},
{
"epoch": 20.06276150627615,
"grad_norm": 0.2068648636341095,
"learning_rate": 3.155278333180047e-05,
"loss": 0.0509,
"step": 9590
},
{
"epoch": 20.0836820083682,
"grad_norm": 0.18824225664138794,
"learning_rate": 3.14503735788906e-05,
"loss": 0.0499,
"step": 9600
},
{
"epoch": 20.10460251046025,
"grad_norm": 0.2116539180278778,
"learning_rate": 3.134805398413419e-05,
"loss": 0.0426,
"step": 9610
},
{
"epoch": 20.1255230125523,
"grad_norm": 0.21344134211540222,
"learning_rate": 3.1245825044842954e-05,
"loss": 0.045,
"step": 9620
},
{
"epoch": 20.14644351464435,
"grad_norm": 0.1412409096956253,
"learning_rate": 3.114368725788791e-05,
"loss": 0.0534,
"step": 9630
},
{
"epoch": 20.1673640167364,
"grad_norm": 0.25958654284477234,
"learning_rate": 3.1041641119697075e-05,
"loss": 0.0496,
"step": 9640
},
{
"epoch": 20.188284518828453,
"grad_norm": 0.2157263457775116,
"learning_rate": 3.093968712625306e-05,
"loss": 0.054,
"step": 9650
},
{
"epoch": 20.209205020920503,
"grad_norm": 0.15186910331249237,
"learning_rate": 3.0837825773090535e-05,
"loss": 0.0419,
"step": 9660
},
{
"epoch": 20.230125523012553,
"grad_norm": 0.18805916607379913,
"learning_rate": 3.073605755529395e-05,
"loss": 0.0478,
"step": 9670
},
{
"epoch": 20.251046025104603,
"grad_norm": 0.2179500162601471,
"learning_rate": 3.063438296749511e-05,
"loss": 0.0468,
"step": 9680
},
{
"epoch": 20.271966527196653,
"grad_norm": 0.18823978304862976,
"learning_rate": 3.053280250387067e-05,
"loss": 0.0465,
"step": 9690
},
{
"epoch": 20.292887029288703,
"grad_norm": 0.20382045209407806,
"learning_rate": 3.043131665813988e-05,
"loss": 0.0499,
"step": 9700
},
{
"epoch": 20.313807531380753,
"grad_norm": 0.1649629771709442,
"learning_rate": 3.0329925923562073e-05,
"loss": 0.0428,
"step": 9710
},
{
"epoch": 20.334728033472803,
"grad_norm": 0.19102589786052704,
"learning_rate": 3.0228630792934277e-05,
"loss": 0.0444,
"step": 9720
},
{
"epoch": 20.355648535564853,
"grad_norm": 0.21547861397266388,
"learning_rate": 3.0127431758588918e-05,
"loss": 0.0547,
"step": 9730
},
{
"epoch": 20.376569037656903,
"grad_norm": 0.2640725374221802,
"learning_rate": 3.002632931239133e-05,
"loss": 0.0421,
"step": 9740
},
{
"epoch": 20.397489539748953,
"grad_norm": 0.19886314868927002,
"learning_rate": 2.992532394573735e-05,
"loss": 0.0492,
"step": 9750
},
{
"epoch": 20.418410041841003,
"grad_norm": 0.17059046030044556,
"learning_rate": 2.982441614955105e-05,
"loss": 0.0444,
"step": 9760
},
{
"epoch": 20.439330543933053,
"grad_norm": 0.20661625266075134,
"learning_rate": 2.972360641428218e-05,
"loss": 0.0457,
"step": 9770
},
{
"epoch": 20.460251046025103,
"grad_norm": 0.19825486838817596,
"learning_rate": 2.9622895229903973e-05,
"loss": 0.0438,
"step": 9780
},
{
"epoch": 20.481171548117153,
"grad_norm": 0.25336378812789917,
"learning_rate": 2.9522283085910612e-05,
"loss": 0.0461,
"step": 9790
},
{
"epoch": 20.502092050209207,
"grad_norm": 0.22274994850158691,
"learning_rate": 2.942177047131489e-05,
"loss": 0.0426,
"step": 9800
},
{
"epoch": 20.523012552301257,
"grad_norm": 0.1447426825761795,
"learning_rate": 2.9321357874645905e-05,
"loss": 0.0518,
"step": 9810
},
{
"epoch": 20.543933054393307,
"grad_norm": 0.23118385672569275,
"learning_rate": 2.9221045783946577e-05,
"loss": 0.049,
"step": 9820
},
{
"epoch": 20.564853556485357,
"grad_norm": 0.17135083675384521,
"learning_rate": 2.9120834686771394e-05,
"loss": 0.0423,
"step": 9830
},
{
"epoch": 20.585774058577407,
"grad_norm": 0.19719673693180084,
"learning_rate": 2.902072507018392e-05,
"loss": 0.0426,
"step": 9840
},
{
"epoch": 20.606694560669457,
"grad_norm": 0.1972011774778366,
"learning_rate": 2.892071742075446e-05,
"loss": 0.0558,
"step": 9850
},
{
"epoch": 20.627615062761507,
"grad_norm": 0.1997687965631485,
"learning_rate": 2.8820812224557812e-05,
"loss": 0.0511,
"step": 9860
},
{
"epoch": 20.648535564853557,
"grad_norm": 0.18193860352039337,
"learning_rate": 2.8721009967170764e-05,
"loss": 0.0521,
"step": 9870
},
{
"epoch": 20.669456066945607,
"grad_norm": 0.13904160261154175,
"learning_rate": 2.8621311133669748e-05,
"loss": 0.0476,
"step": 9880
},
{
"epoch": 20.690376569037657,
"grad_norm": 0.19542591273784637,
"learning_rate": 2.8521716208628595e-05,
"loss": 0.0433,
"step": 9890
},
{
"epoch": 20.711297071129707,
"grad_norm": 0.25673067569732666,
"learning_rate": 2.8422225676116015e-05,
"loss": 0.0482,
"step": 9900
},
{
"epoch": 20.732217573221757,
"grad_norm": 0.193390354514122,
"learning_rate": 2.832284001969342e-05,
"loss": 0.0483,
"step": 9910
},
{
"epoch": 20.753138075313807,
"grad_norm": 0.2549495995044708,
"learning_rate": 2.8223559722412408e-05,
"loss": 0.0438,
"step": 9920
},
{
"epoch": 20.774058577405857,
"grad_norm": 0.20687030255794525,
"learning_rate": 2.8124385266812516e-05,
"loss": 0.0497,
"step": 9930
},
{
"epoch": 20.794979079497907,
"grad_norm": 0.20353694260120392,
"learning_rate": 2.802531713491886e-05,
"loss": 0.0508,
"step": 9940
},
{
"epoch": 20.815899581589957,
"grad_norm": 0.1879122108221054,
"learning_rate": 2.7926355808239822e-05,
"loss": 0.0421,
"step": 9950
},
{
"epoch": 20.836820083682007,
"grad_norm": 0.2186966836452484,
"learning_rate": 2.782750176776458e-05,
"loss": 0.0539,
"step": 9960
},
{
"epoch": 20.85774058577406,
"grad_norm": 0.22850190103054047,
"learning_rate": 2.7728755493960946e-05,
"loss": 0.0525,
"step": 9970
},
{
"epoch": 20.87866108786611,
"grad_norm": 0.1749819964170456,
"learning_rate": 2.7630117466772876e-05,
"loss": 0.0436,
"step": 9980
},
{
"epoch": 20.89958158995816,
"grad_norm": 0.1613214612007141,
"learning_rate": 2.7531588165618278e-05,
"loss": 0.0492,
"step": 9990
},
{
"epoch": 20.92050209205021,
"grad_norm": 0.18235832452774048,
"learning_rate": 2.7433168069386533e-05,
"loss": 0.0448,
"step": 10000
},
{
"epoch": 20.94142259414226,
"grad_norm": 0.18606573343276978,
"learning_rate": 2.7334857656436308e-05,
"loss": 0.0483,
"step": 10010
},
{
"epoch": 20.96234309623431,
"grad_norm": 0.15410400927066803,
"learning_rate": 2.7236657404593157e-05,
"loss": 0.0471,
"step": 10020
},
{
"epoch": 20.98326359832636,
"grad_norm": 0.18072716891765594,
"learning_rate": 2.713856779114716e-05,
"loss": 0.0477,
"step": 10030
},
{
"epoch": 21.00418410041841,
"grad_norm": 0.21399426460266113,
"learning_rate": 2.704058929285074e-05,
"loss": 0.0455,
"step": 10040
},
{
"epoch": 21.02510460251046,
"grad_norm": 0.20552700757980347,
"learning_rate": 2.6942722385916175e-05,
"loss": 0.0476,
"step": 10050
},
{
"epoch": 21.04602510460251,
"grad_norm": 0.23852410912513733,
"learning_rate": 2.6844967546013394e-05,
"loss": 0.0514,
"step": 10060
},
{
"epoch": 21.06694560669456,
"grad_norm": 0.15994329750537872,
"learning_rate": 2.6747325248267673e-05,
"loss": 0.042,
"step": 10070
},
{
"epoch": 21.08786610878661,
"grad_norm": 0.1782161295413971,
"learning_rate": 2.664979596725724e-05,
"loss": 0.0485,
"step": 10080
},
{
"epoch": 21.10878661087866,
"grad_norm": 0.1394461840391159,
"learning_rate": 2.655238017701105e-05,
"loss": 0.0459,
"step": 10090
},
{
"epoch": 21.12970711297071,
"grad_norm": 0.20787139236927032,
"learning_rate": 2.6455078351006455e-05,
"loss": 0.0503,
"step": 10100
},
{
"epoch": 21.15062761506276,
"grad_norm": 0.15584444999694824,
"learning_rate": 2.6357890962166866e-05,
"loss": 0.0388,
"step": 10110
},
{
"epoch": 21.171548117154813,
"grad_norm": 0.24930870532989502,
"learning_rate": 2.6260818482859534e-05,
"loss": 0.0497,
"step": 10120
},
{
"epoch": 21.192468619246863,
"grad_norm": 0.19874218106269836,
"learning_rate": 2.6163861384893156e-05,
"loss": 0.0484,
"step": 10130
},
{
"epoch": 21.213389121338913,
"grad_norm": 0.2356872707605362,
"learning_rate": 2.606702013951564e-05,
"loss": 0.0449,
"step": 10140
},
{
"epoch": 21.234309623430963,
"grad_norm": 0.22813639044761658,
"learning_rate": 2.5970295217411844e-05,
"loss": 0.0461,
"step": 10150
},
{
"epoch": 21.255230125523013,
"grad_norm": 0.25937241315841675,
"learning_rate": 2.5873687088701236e-05,
"loss": 0.0437,
"step": 10160
},
{
"epoch": 21.276150627615063,
"grad_norm": 0.23558540642261505,
"learning_rate": 2.5777196222935596e-05,
"loss": 0.0445,
"step": 10170
},
{
"epoch": 21.297071129707113,
"grad_norm": 0.18446467816829681,
"learning_rate": 2.5680823089096807e-05,
"loss": 0.0441,
"step": 10180
},
{
"epoch": 21.317991631799163,
"grad_norm": 0.1600533127784729,
"learning_rate": 2.558456815559448e-05,
"loss": 0.0459,
"step": 10190
},
{
"epoch": 21.338912133891213,
"grad_norm": 0.18886277079582214,
"learning_rate": 2.548843189026378e-05,
"loss": 0.0468,
"step": 10200
},
{
"epoch": 21.359832635983263,
"grad_norm": 0.2278919667005539,
"learning_rate": 2.5392414760363048e-05,
"loss": 0.047,
"step": 10210
},
{
"epoch": 21.380753138075313,
"grad_norm": 0.23357948660850525,
"learning_rate": 2.529651723257162e-05,
"loss": 0.0468,
"step": 10220
},
{
"epoch": 21.401673640167363,
"grad_norm": 0.20221158862113953,
"learning_rate": 2.5200739772987537e-05,
"loss": 0.049,
"step": 10230
},
{
"epoch": 21.422594142259413,
"grad_norm": 0.18963316082954407,
"learning_rate": 2.5105082847125184e-05,
"loss": 0.0413,
"step": 10240
},
{
"epoch": 21.443514644351463,
"grad_norm": 0.17294231057167053,
"learning_rate": 2.5009546919913218e-05,
"loss": 0.0472,
"step": 10250
},
{
"epoch": 21.464435146443513,
"grad_norm": 0.21104329824447632,
"learning_rate": 2.4914132455692098e-05,
"loss": 0.0418,
"step": 10260
},
{
"epoch": 21.485355648535563,
"grad_norm": 0.17008601129055023,
"learning_rate": 2.4818839918211962e-05,
"loss": 0.0385,
"step": 10270
},
{
"epoch": 21.506276150627617,
"grad_norm": 0.1992831528186798,
"learning_rate": 2.4723669770630376e-05,
"loss": 0.0496,
"step": 10280
},
{
"epoch": 21.527196652719667,
"grad_norm": 0.2100939303636551,
"learning_rate": 2.4628622475509972e-05,
"loss": 0.0482,
"step": 10290
},
{
"epoch": 21.548117154811717,
"grad_norm": 0.21112582087516785,
"learning_rate": 2.4533698494816342e-05,
"loss": 0.0559,
"step": 10300
},
{
"epoch": 21.569037656903767,
"grad_norm": 0.21962668001651764,
"learning_rate": 2.44388982899157e-05,
"loss": 0.0542,
"step": 10310
},
{
"epoch": 21.589958158995817,
"grad_norm": 0.16883806884288788,
"learning_rate": 2.4344222321572636e-05,
"loss": 0.0461,
"step": 10320
},
{
"epoch": 21.610878661087867,
"grad_norm": 0.21127848327159882,
"learning_rate": 2.4249671049947954e-05,
"loss": 0.0448,
"step": 10330
},
{
"epoch": 21.631799163179917,
"grad_norm": 0.15680480003356934,
"learning_rate": 2.4155244934596333e-05,
"loss": 0.0465,
"step": 10340
},
{
"epoch": 21.652719665271967,
"grad_norm": 0.17642885446548462,
"learning_rate": 2.406094443446416e-05,
"loss": 0.0422,
"step": 10350
},
{
"epoch": 21.673640167364017,
"grad_norm": 0.1920289397239685,
"learning_rate": 2.3966770007887317e-05,
"loss": 0.0438,
"step": 10360
},
{
"epoch": 21.694560669456067,
"grad_norm": 0.15706102550029755,
"learning_rate": 2.3872722112588903e-05,
"loss": 0.0404,
"step": 10370
},
{
"epoch": 21.715481171548117,
"grad_norm": 0.2093636691570282,
"learning_rate": 2.3778801205676997e-05,
"loss": 0.0381,
"step": 10380
},
{
"epoch": 21.736401673640167,
"grad_norm": 0.2329166829586029,
"learning_rate": 2.3685007743642524e-05,
"loss": 0.0518,
"step": 10390
},
{
"epoch": 21.757322175732217,
"grad_norm": 0.1390232890844345,
"learning_rate": 2.3591342182356914e-05,
"loss": 0.0468,
"step": 10400
},
{
"epoch": 21.778242677824267,
"grad_norm": 0.32355770468711853,
"learning_rate": 2.3497804977070016e-05,
"loss": 0.0446,
"step": 10410
},
{
"epoch": 21.799163179916317,
"grad_norm": 0.16483452916145325,
"learning_rate": 2.3404396582407777e-05,
"loss": 0.0502,
"step": 10420
},
{
"epoch": 21.820083682008367,
"grad_norm": 0.17312510311603546,
"learning_rate": 2.331111745237007e-05,
"loss": 0.0416,
"step": 10430
},
{
"epoch": 21.84100418410042,
"grad_norm": 0.17626938223838806,
"learning_rate": 2.3217968040328526e-05,
"loss": 0.0499,
"step": 10440
},
{
"epoch": 21.86192468619247,
"grad_norm": 0.2215689867734909,
"learning_rate": 2.3124948799024286e-05,
"loss": 0.0413,
"step": 10450
},
{
"epoch": 21.88284518828452,
"grad_norm": 0.28537923097610474,
"learning_rate": 2.3032060180565828e-05,
"loss": 0.0505,
"step": 10460
},
{
"epoch": 21.90376569037657,
"grad_norm": 0.21920333802700043,
"learning_rate": 2.2939302636426724e-05,
"loss": 0.0424,
"step": 10470
},
{
"epoch": 21.92468619246862,
"grad_norm": 0.2144719809293747,
"learning_rate": 2.2846676617443458e-05,
"loss": 0.0403,
"step": 10480
},
{
"epoch": 21.94560669456067,
"grad_norm": 0.16767054796218872,
"learning_rate": 2.275418257381332e-05,
"loss": 0.0435,
"step": 10490
},
{
"epoch": 21.96652719665272,
"grad_norm": 0.1883849948644638,
"learning_rate": 2.2661820955092083e-05,
"loss": 0.0483,
"step": 10500
},
{
"epoch": 21.98744769874477,
"grad_norm": 0.19924281537532806,
"learning_rate": 2.256959221019193e-05,
"loss": 0.0445,
"step": 10510
},
{
"epoch": 22.00836820083682,
"grad_norm": 0.22506235539913177,
"learning_rate": 2.2477496787379227e-05,
"loss": 0.0548,
"step": 10520
},
{
"epoch": 22.02928870292887,
"grad_norm": 0.24542684853076935,
"learning_rate": 2.238553513427229e-05,
"loss": 0.0482,
"step": 10530
},
{
"epoch": 22.05020920502092,
"grad_norm": 0.22342945635318756,
"learning_rate": 2.2293707697839344e-05,
"loss": 0.0516,
"step": 10540
},
{
"epoch": 22.07112970711297,
"grad_norm": 0.2504042685031891,
"learning_rate": 2.2202014924396214e-05,
"loss": 0.0506,
"step": 10550
},
{
"epoch": 22.09205020920502,
"grad_norm": 0.1865241527557373,
"learning_rate": 2.21104572596042e-05,
"loss": 0.0399,
"step": 10560
},
{
"epoch": 22.11297071129707,
"grad_norm": 0.16631072759628296,
"learning_rate": 2.2019035148468e-05,
"loss": 0.0415,
"step": 10570
},
{
"epoch": 22.13389121338912,
"grad_norm": 0.21772721409797668,
"learning_rate": 2.1927749035333374e-05,
"loss": 0.0453,
"step": 10580
},
{
"epoch": 22.15481171548117,
"grad_norm": 0.18731866776943207,
"learning_rate": 2.1836599363885152e-05,
"loss": 0.0435,
"step": 10590
},
{
"epoch": 22.175732217573223,
"grad_norm": 0.21080322563648224,
"learning_rate": 2.1745586577144993e-05,
"loss": 0.0437,
"step": 10600
},
{
"epoch": 22.196652719665273,
"grad_norm": 0.18895407021045685,
"learning_rate": 2.1654711117469207e-05,
"loss": 0.0455,
"step": 10610
},
{
"epoch": 22.217573221757323,
"grad_norm": 0.2682255506515503,
"learning_rate": 2.1563973426546702e-05,
"loss": 0.0471,
"step": 10620
},
{
"epoch": 22.238493723849373,
"grad_norm": 0.2073366940021515,
"learning_rate": 2.1473373945396728e-05,
"loss": 0.0432,
"step": 10630
},
{
"epoch": 22.259414225941423,
"grad_norm": 0.1627686470746994,
"learning_rate": 2.138291311436679e-05,
"loss": 0.0451,
"step": 10640
},
{
"epoch": 22.280334728033473,
"grad_norm": 0.14305227994918823,
"learning_rate": 2.1292591373130518e-05,
"loss": 0.045,
"step": 10650
},
{
"epoch": 22.301255230125523,
"grad_norm": 0.18689580261707306,
"learning_rate": 2.1202409160685528e-05,
"loss": 0.0426,
"step": 10660
},
{
"epoch": 22.322175732217573,
"grad_norm": 0.16318003833293915,
"learning_rate": 2.1112366915351228e-05,
"loss": 0.0403,
"step": 10670
},
{
"epoch": 22.343096234309623,
"grad_norm": 0.1883779615163803,
"learning_rate": 2.102246507476679e-05,
"loss": 0.0459,
"step": 10680
},
{
"epoch": 22.364016736401673,
"grad_norm": 0.20092038810253143,
"learning_rate": 2.09327040758889e-05,
"loss": 0.0448,
"step": 10690
},
{
"epoch": 22.384937238493723,
"grad_norm": 0.19910837709903717,
"learning_rate": 2.0843084354989767e-05,
"loss": 0.0456,
"step": 10700
},
{
"epoch": 22.405857740585773,
"grad_norm": 0.1743541806936264,
"learning_rate": 2.0753606347654892e-05,
"loss": 0.0469,
"step": 10710
},
{
"epoch": 22.426778242677823,
"grad_norm": 0.21876223385334015,
"learning_rate": 2.0664270488780985e-05,
"loss": 0.0439,
"step": 10720
},
{
"epoch": 22.447698744769873,
"grad_norm": 0.2005428522825241,
"learning_rate": 2.0575077212573905e-05,
"loss": 0.0445,
"step": 10730
},
{
"epoch": 22.468619246861923,
"grad_norm": 0.158347949385643,
"learning_rate": 2.0486026952546484e-05,
"loss": 0.0465,
"step": 10740
},
{
"epoch": 22.489539748953973,
"grad_norm": 0.16677257418632507,
"learning_rate": 2.0397120141516457e-05,
"loss": 0.0402,
"step": 10750
},
{
"epoch": 22.510460251046027,
"grad_norm": 0.1984304040670395,
"learning_rate": 2.0308357211604313e-05,
"loss": 0.0451,
"step": 10760
},
{
"epoch": 22.531380753138077,
"grad_norm": 0.20393149554729462,
"learning_rate": 2.0219738594231224e-05,
"loss": 0.0436,
"step": 10770
},
{
"epoch": 22.552301255230127,
"grad_norm": 0.1970740109682083,
"learning_rate": 2.0131264720116993e-05,
"loss": 0.0432,
"step": 10780
},
{
"epoch": 22.573221757322177,
"grad_norm": 0.2895578444004059,
"learning_rate": 2.0042936019277853e-05,
"loss": 0.0475,
"step": 10790
},
{
"epoch": 22.594142259414227,
"grad_norm": 0.20197248458862305,
"learning_rate": 1.99547529210245e-05,
"loss": 0.0458,
"step": 10800
},
{
"epoch": 22.615062761506277,
"grad_norm": 0.22795496881008148,
"learning_rate": 1.9866715853959934e-05,
"loss": 0.0444,
"step": 10810
},
{
"epoch": 22.635983263598327,
"grad_norm": 0.18115581572055817,
"learning_rate": 1.977882524597734e-05,
"loss": 0.0434,
"step": 10820
},
{
"epoch": 22.656903765690377,
"grad_norm": 0.22426635026931763,
"learning_rate": 1.969108152425813e-05,
"loss": 0.0477,
"step": 10830
},
{
"epoch": 22.677824267782427,
"grad_norm": 0.20142389833927155,
"learning_rate": 1.9603485115269744e-05,
"loss": 0.0497,
"step": 10840
},
{
"epoch": 22.698744769874477,
"grad_norm": 0.19367477297782898,
"learning_rate": 1.9516036444763613e-05,
"loss": 0.0465,
"step": 10850
},
{
"epoch": 22.719665271966527,
"grad_norm": 0.29349711537361145,
"learning_rate": 1.9428735937773173e-05,
"loss": 0.0481,
"step": 10860
},
{
"epoch": 22.740585774058577,
"grad_norm": 0.11351630091667175,
"learning_rate": 1.9341584018611646e-05,
"loss": 0.0425,
"step": 10870
},
{
"epoch": 22.761506276150627,
"grad_norm": 0.18608035147190094,
"learning_rate": 1.9254581110870123e-05,
"loss": 0.0402,
"step": 10880
},
{
"epoch": 22.782426778242677,
"grad_norm": 0.14883318543434143,
"learning_rate": 1.916772763741544e-05,
"loss": 0.0396,
"step": 10890
},
{
"epoch": 22.803347280334727,
"grad_norm": 0.21729174256324768,
"learning_rate": 1.908102402038807e-05,
"loss": 0.041,
"step": 10900
},
{
"epoch": 22.824267782426777,
"grad_norm": 0.18119406700134277,
"learning_rate": 1.8994470681200204e-05,
"loss": 0.0388,
"step": 10910
},
{
"epoch": 22.84518828451883,
"grad_norm": 0.2152002453804016,
"learning_rate": 1.8908068040533578e-05,
"loss": 0.049,
"step": 10920
},
{
"epoch": 22.86610878661088,
"grad_norm": 0.25034478306770325,
"learning_rate": 1.8821816518337455e-05,
"loss": 0.048,
"step": 10930
},
{
"epoch": 22.88702928870293,
"grad_norm": 0.17609989643096924,
"learning_rate": 1.8735716533826663e-05,
"loss": 0.0415,
"step": 10940
},
{
"epoch": 22.90794979079498,
"grad_norm": 0.2574017643928528,
"learning_rate": 1.8649768505479476e-05,
"loss": 0.0494,
"step": 10950
},
{
"epoch": 22.92887029288703,
"grad_norm": 0.24520331621170044,
"learning_rate": 1.8563972851035616e-05,
"loss": 0.0454,
"step": 10960
},
{
"epoch": 22.94979079497908,
"grad_norm": 0.22302265465259552,
"learning_rate": 1.847832998749418e-05,
"loss": 0.0454,
"step": 10970
},
{
"epoch": 22.97071129707113,
"grad_norm": 0.2167111337184906,
"learning_rate": 1.8392840331111644e-05,
"loss": 0.0419,
"step": 10980
},
{
"epoch": 22.99163179916318,
"grad_norm": 0.1871531903743744,
"learning_rate": 1.830750429739989e-05,
"loss": 0.0439,
"step": 10990
},
{
"epoch": 23.01255230125523,
"grad_norm": 0.18190360069274902,
"learning_rate": 1.822232230112409e-05,
"loss": 0.0559,
"step": 11000
},
{
"epoch": 23.03347280334728,
"grad_norm": 0.16407927870750427,
"learning_rate": 1.813729475630071e-05,
"loss": 0.0465,
"step": 11010
},
{
"epoch": 23.05439330543933,
"grad_norm": 0.20566906034946442,
"learning_rate": 1.8052422076195635e-05,
"loss": 0.0553,
"step": 11020
},
{
"epoch": 23.07531380753138,
"grad_norm": 0.2527928054332733,
"learning_rate": 1.7967704673321918e-05,
"loss": 0.0456,
"step": 11030
},
{
"epoch": 23.09623430962343,
"grad_norm": 0.28300896286964417,
"learning_rate": 1.7883142959438004e-05,
"loss": 0.0472,
"step": 11040
},
{
"epoch": 23.11715481171548,
"grad_norm": 0.21645322442054749,
"learning_rate": 1.779873734554558e-05,
"loss": 0.0527,
"step": 11050
},
{
"epoch": 23.13807531380753,
"grad_norm": 0.2419598549604416,
"learning_rate": 1.771448824188761e-05,
"loss": 0.0416,
"step": 11060
},
{
"epoch": 23.15899581589958,
"grad_norm": 0.1742696762084961,
"learning_rate": 1.763039605794644e-05,
"loss": 0.042,
"step": 11070
},
{
"epoch": 23.179916317991633,
"grad_norm": 0.1963113397359848,
"learning_rate": 1.754646120244164e-05,
"loss": 0.0472,
"step": 11080
},
{
"epoch": 23.200836820083683,
"grad_norm": 0.14322315156459808,
"learning_rate": 1.7462684083328144e-05,
"loss": 0.0459,
"step": 11090
},
{
"epoch": 23.221757322175733,
"grad_norm": 0.1473892629146576,
"learning_rate": 1.7379065107794262e-05,
"loss": 0.0484,
"step": 11100
},
{
"epoch": 23.242677824267783,
"grad_norm": 0.20052359998226166,
"learning_rate": 1.7295604682259586e-05,
"loss": 0.0504,
"step": 11110
},
{
"epoch": 23.263598326359833,
"grad_norm": 0.20940597355365753,
"learning_rate": 1.7212303212373175e-05,
"loss": 0.0444,
"step": 11120
},
{
"epoch": 23.284518828451883,
"grad_norm": 0.30127179622650146,
"learning_rate": 1.712916110301146e-05,
"loss": 0.0535,
"step": 11130
},
{
"epoch": 23.305439330543933,
"grad_norm": 0.1848253309726715,
"learning_rate": 1.7046178758276298e-05,
"loss": 0.0456,
"step": 11140
},
{
"epoch": 23.326359832635983,
"grad_norm": 0.175484761595726,
"learning_rate": 1.696335658149309e-05,
"loss": 0.0427,
"step": 11150
},
{
"epoch": 23.347280334728033,
"grad_norm": 0.23676013946533203,
"learning_rate": 1.6880694975208727e-05,
"loss": 0.0479,
"step": 11160
},
{
"epoch": 23.368200836820083,
"grad_norm": 0.21369418501853943,
"learning_rate": 1.6798194341189687e-05,
"loss": 0.0547,
"step": 11170
},
{
"epoch": 23.389121338912133,
"grad_norm": 0.21497584879398346,
"learning_rate": 1.671585508042003e-05,
"loss": 0.0414,
"step": 11180
},
{
"epoch": 23.410041841004183,
"grad_norm": 0.1943253129720688,
"learning_rate": 1.6633677593099483e-05,
"loss": 0.0438,
"step": 11190
},
{
"epoch": 23.430962343096233,
"grad_norm": 0.12377497553825378,
"learning_rate": 1.655166227864154e-05,
"loss": 0.0447,
"step": 11200
},
{
"epoch": 23.451882845188283,
"grad_norm": 0.23389364778995514,
"learning_rate": 1.6469809535671426e-05,
"loss": 0.0461,
"step": 11210
},
{
"epoch": 23.472803347280333,
"grad_norm": 0.19530461728572845,
"learning_rate": 1.638811976202421e-05,
"loss": 0.053,
"step": 11220
},
{
"epoch": 23.493723849372383,
"grad_norm": 0.2097453773021698,
"learning_rate": 1.6306593354742895e-05,
"loss": 0.0517,
"step": 11230
},
{
"epoch": 23.514644351464437,
"grad_norm": 0.20635390281677246,
"learning_rate": 1.6225230710076455e-05,
"loss": 0.0462,
"step": 11240
},
{
"epoch": 23.535564853556487,
"grad_norm": 0.25905895233154297,
"learning_rate": 1.6144032223477924e-05,
"loss": 0.0495,
"step": 11250
},
{
"epoch": 23.556485355648537,
"grad_norm": 0.18913106620311737,
"learning_rate": 1.606299828960243e-05,
"loss": 0.0462,
"step": 11260
},
{
"epoch": 23.577405857740587,
"grad_norm": 0.1762065887451172,
"learning_rate": 1.5982129302305337e-05,
"loss": 0.0446,
"step": 11270
},
{
"epoch": 23.598326359832637,
"grad_norm": 0.20014171302318573,
"learning_rate": 1.590142565464032e-05,
"loss": 0.0425,
"step": 11280
},
{
"epoch": 23.619246861924687,
"grad_norm": 0.16639460623264313,
"learning_rate": 1.5820887738857408e-05,
"loss": 0.0454,
"step": 11290
},
{
"epoch": 23.640167364016737,
"grad_norm": 0.15281502902507782,
"learning_rate": 1.5740515946401134e-05,
"loss": 0.0423,
"step": 11300
},
{
"epoch": 23.661087866108787,
"grad_norm": 0.2168046087026596,
"learning_rate": 1.5660310667908634e-05,
"loss": 0.0448,
"step": 11310
},
{
"epoch": 23.682008368200837,
"grad_norm": 0.19726291298866272,
"learning_rate": 1.5580272293207655e-05,
"loss": 0.0497,
"step": 11320
},
{
"epoch": 23.702928870292887,
"grad_norm": 0.16284000873565674,
"learning_rate": 1.5500401211314796e-05,
"loss": 0.0424,
"step": 11330
},
{
"epoch": 23.723849372384937,
"grad_norm": 0.24472852051258087,
"learning_rate": 1.542069781043351e-05,
"loss": 0.0427,
"step": 11340
},
{
"epoch": 23.744769874476987,
"grad_norm": 0.23519930243492126,
"learning_rate": 1.534116247795226e-05,
"loss": 0.0475,
"step": 11350
},
{
"epoch": 23.765690376569037,
"grad_norm": 0.16218726336956024,
"learning_rate": 1.526179560044267e-05,
"loss": 0.0442,
"step": 11360
},
{
"epoch": 23.786610878661087,
"grad_norm": 0.21716220676898956,
"learning_rate": 1.5182597563657552e-05,
"loss": 0.0456,
"step": 11370
},
{
"epoch": 23.807531380753137,
"grad_norm": 0.2158024162054062,
"learning_rate": 1.5103568752529135e-05,
"loss": 0.0396,
"step": 11380
},
{
"epoch": 23.828451882845187,
"grad_norm": 0.1851579248905182,
"learning_rate": 1.5024709551167142e-05,
"loss": 0.0467,
"step": 11390
},
{
"epoch": 23.84937238493724,
"grad_norm": 0.27738890051841736,
"learning_rate": 1.4946020342856898e-05,
"loss": 0.0429,
"step": 11400
},
{
"epoch": 23.87029288702929,
"grad_norm": 0.22943255305290222,
"learning_rate": 1.4867501510057546e-05,
"loss": 0.0396,
"step": 11410
},
{
"epoch": 23.89121338912134,
"grad_norm": 0.19552598893642426,
"learning_rate": 1.4789153434400094e-05,
"loss": 0.0393,
"step": 11420
},
{
"epoch": 23.91213389121339,
"grad_norm": 0.28375789523124695,
"learning_rate": 1.4710976496685614e-05,
"loss": 0.0458,
"step": 11430
},
{
"epoch": 23.93305439330544,
"grad_norm": 0.23132510483264923,
"learning_rate": 1.4632971076883406e-05,
"loss": 0.0447,
"step": 11440
},
{
"epoch": 23.95397489539749,
"grad_norm": 0.16944925487041473,
"learning_rate": 1.4555137554129117e-05,
"loss": 0.0493,
"step": 11450
},
{
"epoch": 23.97489539748954,
"grad_norm": 0.17942452430725098,
"learning_rate": 1.4477476306722925e-05,
"loss": 0.0426,
"step": 11460
},
{
"epoch": 23.99581589958159,
"grad_norm": 0.18083634972572327,
"learning_rate": 1.439998771212766e-05,
"loss": 0.0475,
"step": 11470
},
{
"epoch": 24.01673640167364,
"grad_norm": 0.19945523142814636,
"learning_rate": 1.4322672146966982e-05,
"loss": 0.0463,
"step": 11480
},
{
"epoch": 24.03765690376569,
"grad_norm": 0.15182597935199738,
"learning_rate": 1.4245529987023621e-05,
"loss": 0.051,
"step": 11490
},
{
"epoch": 24.05857740585774,
"grad_norm": 0.23692505061626434,
"learning_rate": 1.4168561607237436e-05,
"loss": 0.0475,
"step": 11500
},
{
"epoch": 24.07949790794979,
"grad_norm": 0.25118017196655273,
"learning_rate": 1.4091767381703657e-05,
"loss": 0.0381,
"step": 11510
},
{
"epoch": 24.10041841004184,
"grad_norm": 0.19620022177696228,
"learning_rate": 1.4015147683671087e-05,
"loss": 0.0438,
"step": 11520
},
{
"epoch": 24.12133891213389,
"grad_norm": 0.20508503913879395,
"learning_rate": 1.3938702885540239e-05,
"loss": 0.0407,
"step": 11530
},
{
"epoch": 24.14225941422594,
"grad_norm": 0.16729703545570374,
"learning_rate": 1.3862433358861576e-05,
"loss": 0.0379,
"step": 11540
},
{
"epoch": 24.16317991631799,
"grad_norm": 0.18478308618068695,
"learning_rate": 1.3786339474333636e-05,
"loss": 0.0469,
"step": 11550
},
{
"epoch": 24.184100418410043,
"grad_norm": 0.1481492966413498,
"learning_rate": 1.3710421601801265e-05,
"loss": 0.0434,
"step": 11560
},
{
"epoch": 24.205020920502093,
"grad_norm": 0.21265815198421478,
"learning_rate": 1.3634680110253883e-05,
"loss": 0.0493,
"step": 11570
},
{
"epoch": 24.225941422594143,
"grad_norm": 0.25307002663612366,
"learning_rate": 1.3559115367823556e-05,
"loss": 0.0485,
"step": 11580
},
{
"epoch": 24.246861924686193,
"grad_norm": 0.1825999766588211,
"learning_rate": 1.3483727741783342e-05,
"loss": 0.0499,
"step": 11590
},
{
"epoch": 24.267782426778243,
"grad_norm": 0.1724889725446701,
"learning_rate": 1.3408517598545444e-05,
"loss": 0.0397,
"step": 11600
},
{
"epoch": 24.288702928870293,
"grad_norm": 0.18095286190509796,
"learning_rate": 1.3333485303659381e-05,
"loss": 0.0427,
"step": 11610
},
{
"epoch": 24.309623430962343,
"grad_norm": 0.17512592673301697,
"learning_rate": 1.3258631221810331e-05,
"loss": 0.0429,
"step": 11620
},
{
"epoch": 24.330543933054393,
"grad_norm": 0.18383602797985077,
"learning_rate": 1.3183955716817232e-05,
"loss": 0.0433,
"step": 11630
},
{
"epoch": 24.351464435146443,
"grad_norm": 0.19692164659500122,
"learning_rate": 1.3109459151631076e-05,
"loss": 0.0437,
"step": 11640
},
{
"epoch": 24.372384937238493,
"grad_norm": 0.15425680577754974,
"learning_rate": 1.3035141888333202e-05,
"loss": 0.0427,
"step": 11650
},
{
"epoch": 24.393305439330543,
"grad_norm": 0.12884384393692017,
"learning_rate": 1.2961004288133388e-05,
"loss": 0.0422,
"step": 11660
},
{
"epoch": 24.414225941422593,
"grad_norm": 0.1541462391614914,
"learning_rate": 1.2887046711368245e-05,
"loss": 0.0418,
"step": 11670
},
{
"epoch": 24.435146443514643,
"grad_norm": 0.20649564266204834,
"learning_rate": 1.2813269517499399e-05,
"loss": 0.044,
"step": 11680
},
{
"epoch": 24.456066945606693,
"grad_norm": 0.19635066390037537,
"learning_rate": 1.273967306511169e-05,
"loss": 0.0409,
"step": 11690
},
{
"epoch": 24.476987447698743,
"grad_norm": 0.2694770097732544,
"learning_rate": 1.2666257711911566e-05,
"loss": 0.044,
"step": 11700
},
{
"epoch": 24.497907949790793,
"grad_norm": 0.16663697361946106,
"learning_rate": 1.2593023814725214e-05,
"loss": 0.04,
"step": 11710
},
{
"epoch": 24.518828451882847,
"grad_norm": 0.24324466288089752,
"learning_rate": 1.251997172949686e-05,
"loss": 0.0476,
"step": 11720
},
{
"epoch": 24.539748953974897,
"grad_norm": 0.20587030053138733,
"learning_rate": 1.2447101811287109e-05,
"loss": 0.0533,
"step": 11730
},
{
"epoch": 24.560669456066947,
"grad_norm": 0.17844031751155853,
"learning_rate": 1.237441441427114e-05,
"loss": 0.0373,
"step": 11740
},
{
"epoch": 24.581589958158997,
"grad_norm": 0.22329042851924896,
"learning_rate": 1.2301909891737018e-05,
"loss": 0.0386,
"step": 11750
},
{
"epoch": 24.602510460251047,
"grad_norm": 0.20262449979782104,
"learning_rate": 1.2229588596083957e-05,
"loss": 0.0459,
"step": 11760
},
{
"epoch": 24.623430962343097,
"grad_norm": 0.14917254447937012,
"learning_rate": 1.2157450878820608e-05,
"loss": 0.0395,
"step": 11770
},
{
"epoch": 24.644351464435147,
"grad_norm": 0.23771631717681885,
"learning_rate": 1.2085497090563407e-05,
"loss": 0.0519,
"step": 11780
},
{
"epoch": 24.665271966527197,
"grad_norm": 0.20867544412612915,
"learning_rate": 1.2013727581034783e-05,
"loss": 0.0411,
"step": 11790
},
{
"epoch": 24.686192468619247,
"grad_norm": 0.23529614508152008,
"learning_rate": 1.1942142699061498e-05,
"loss": 0.0444,
"step": 11800
},
{
"epoch": 24.707112970711297,
"grad_norm": 0.22313570976257324,
"learning_rate": 1.1870742792572992e-05,
"loss": 0.0439,
"step": 11810
},
{
"epoch": 24.728033472803347,
"grad_norm": 0.16360808908939362,
"learning_rate": 1.1799528208599637e-05,
"loss": 0.0467,
"step": 11820
},
{
"epoch": 24.748953974895397,
"grad_norm": 0.14838069677352905,
"learning_rate": 1.1728499293271079e-05,
"loss": 0.0429,
"step": 11830
},
{
"epoch": 24.769874476987447,
"grad_norm": 0.2394830286502838,
"learning_rate": 1.1657656391814509e-05,
"loss": 0.0481,
"step": 11840
},
{
"epoch": 24.790794979079497,
"grad_norm": 0.27211302518844604,
"learning_rate": 1.1586999848553043e-05,
"loss": 0.0455,
"step": 11850
},
{
"epoch": 24.811715481171547,
"grad_norm": 0.18247251212596893,
"learning_rate": 1.1516530006904053e-05,
"loss": 0.0397,
"step": 11860
},
{
"epoch": 24.8326359832636,
"grad_norm": 0.1879725158214569,
"learning_rate": 1.1446247209377403e-05,
"loss": 0.045,
"step": 11870
},
{
"epoch": 24.85355648535565,
"grad_norm": 0.21421553194522858,
"learning_rate": 1.1376151797573925e-05,
"loss": 0.0434,
"step": 11880
},
{
"epoch": 24.8744769874477,
"grad_norm": 0.18666699528694153,
"learning_rate": 1.1306244112183662e-05,
"loss": 0.0449,
"step": 11890
},
{
"epoch": 24.89539748953975,
"grad_norm": 0.2565382122993469,
"learning_rate": 1.1236524492984203e-05,
"loss": 0.0457,
"step": 11900
},
{
"epoch": 24.9163179916318,
"grad_norm": 0.16528093814849854,
"learning_rate": 1.116699327883911e-05,
"loss": 0.0391,
"step": 11910
},
{
"epoch": 24.93723849372385,
"grad_norm": 0.13788525760173798,
"learning_rate": 1.1097650807696209e-05,
"loss": 0.0465,
"step": 11920
},
{
"epoch": 24.9581589958159,
"grad_norm": 0.20516015589237213,
"learning_rate": 1.1028497416585931e-05,
"loss": 0.0486,
"step": 11930
},
{
"epoch": 24.97907949790795,
"grad_norm": 0.20356875658035278,
"learning_rate": 1.0959533441619762e-05,
"loss": 0.0414,
"step": 11940
},
{
"epoch": 25.0,
"grad_norm": 0.44614553451538086,
"learning_rate": 1.0890759217988527e-05,
"loss": 0.0362,
"step": 11950
},
{
"epoch": 25.02092050209205,
"grad_norm": 0.19991981983184814,
"learning_rate": 1.0822175079960806e-05,
"loss": 0.0464,
"step": 11960
},
{
"epoch": 25.0418410041841,
"grad_norm": 0.1874738484621048,
"learning_rate": 1.0753781360881265e-05,
"loss": 0.0416,
"step": 11970
},
{
"epoch": 25.06276150627615,
"grad_norm": 0.21726657450199127,
"learning_rate": 1.0685578393169055e-05,
"loss": 0.0384,
"step": 11980
},
{
"epoch": 25.0836820083682,
"grad_norm": 0.18857204914093018,
"learning_rate": 1.061756650831625e-05,
"loss": 0.0466,
"step": 11990
},
{
"epoch": 25.10460251046025,
"grad_norm": 0.22070184350013733,
"learning_rate": 1.054974603688616e-05,
"loss": 0.0431,
"step": 12000
},
{
"epoch": 25.1255230125523,
"grad_norm": 0.24246034026145935,
"learning_rate": 1.048211730851173e-05,
"loss": 0.0428,
"step": 12010
},
{
"epoch": 25.14644351464435,
"grad_norm": 0.1892884373664856,
"learning_rate": 1.0414680651894004e-05,
"loss": 0.0421,
"step": 12020
},
{
"epoch": 25.1673640167364,
"grad_norm": 0.18135856091976166,
"learning_rate": 1.034743639480047e-05,
"loss": 0.0438,
"step": 12030
},
{
"epoch": 25.188284518828453,
"grad_norm": 0.2031504511833191,
"learning_rate": 1.0280384864063497e-05,
"loss": 0.0454,
"step": 12040
},
{
"epoch": 25.209205020920503,
"grad_norm": 0.24415917694568634,
"learning_rate": 1.0213526385578704e-05,
"loss": 0.0446,
"step": 12050
},
{
"epoch": 25.230125523012553,
"grad_norm": 0.2209267020225525,
"learning_rate": 1.0146861284303394e-05,
"loss": 0.0426,
"step": 12060
},
{
"epoch": 25.251046025104603,
"grad_norm": 0.1683529168367386,
"learning_rate": 1.0080389884255037e-05,
"loss": 0.0437,
"step": 12070
},
{
"epoch": 25.271966527196653,
"grad_norm": 0.23323380947113037,
"learning_rate": 1.0014112508509588e-05,
"loss": 0.0429,
"step": 12080
},
{
"epoch": 25.292887029288703,
"grad_norm": 0.1716199666261673,
"learning_rate": 9.948029479199994e-06,
"loss": 0.0536,
"step": 12090
},
{
"epoch": 25.313807531380753,
"grad_norm": 0.21185162663459778,
"learning_rate": 9.882141117514632e-06,
"loss": 0.0396,
"step": 12100
},
{
"epoch": 25.334728033472803,
"grad_norm": 0.17677056789398193,
"learning_rate": 9.816447743695656e-06,
"loss": 0.0416,
"step": 12110
},
{
"epoch": 25.355648535564853,
"grad_norm": 0.18352273106575012,
"learning_rate": 9.75094967703758e-06,
"loss": 0.0475,
"step": 12120
},
{
"epoch": 25.376569037656903,
"grad_norm": 0.17828936874866486,
"learning_rate": 9.685647235885597e-06,
"loss": 0.0417,
"step": 12130
},
{
"epoch": 25.397489539748953,
"grad_norm": 0.19843615591526031,
"learning_rate": 9.620540737634087e-06,
"loss": 0.043,
"step": 12140
},
{
"epoch": 25.418410041841003,
"grad_norm": 0.17798744142055511,
"learning_rate": 9.555630498725133e-06,
"loss": 0.0492,
"step": 12150
},
{
"epoch": 25.439330543933053,
"grad_norm": 0.19702628254890442,
"learning_rate": 9.49091683464684e-06,
"loss": 0.041,
"step": 12160
},
{
"epoch": 25.460251046025103,
"grad_norm": 0.16698186099529266,
"learning_rate": 9.426400059931955e-06,
"loss": 0.0475,
"step": 12170
},
{
"epoch": 25.481171548117153,
"grad_norm": 0.2713240087032318,
"learning_rate": 9.362080488156245e-06,
"loss": 0.0494,
"step": 12180
},
{
"epoch": 25.502092050209207,
"grad_norm": 0.1628587692975998,
"learning_rate": 9.29795843193697e-06,
"loss": 0.0414,
"step": 12190
},
{
"epoch": 25.523012552301257,
"grad_norm": 0.19244007766246796,
"learning_rate": 9.234034202931447e-06,
"loss": 0.0468,
"step": 12200
},
{
"epoch": 25.543933054393307,
"grad_norm": 0.27872234582901,
"learning_rate": 9.170308111835418e-06,
"loss": 0.047,
"step": 12210
},
{
"epoch": 25.564853556485357,
"grad_norm": 0.23407423496246338,
"learning_rate": 9.106780468381631e-06,
"loss": 0.0376,
"step": 12220
},
{
"epoch": 25.585774058577407,
"grad_norm": 0.1899377554655075,
"learning_rate": 9.043451581338302e-06,
"loss": 0.043,
"step": 12230
},
{
"epoch": 25.606694560669457,
"grad_norm": 0.3231353461742401,
"learning_rate": 8.980321758507615e-06,
"loss": 0.0488,
"step": 12240
},
{
"epoch": 25.627615062761507,
"grad_norm": 0.19835972785949707,
"learning_rate": 8.91739130672425e-06,
"loss": 0.0468,
"step": 12250
},
{
"epoch": 25.648535564853557,
"grad_norm": 0.12836074829101562,
"learning_rate": 8.85466053185382e-06,
"loss": 0.049,
"step": 12260
},
{
"epoch": 25.669456066945607,
"grad_norm": 0.18373306095600128,
"learning_rate": 8.792129738791455e-06,
"loss": 0.0403,
"step": 12270
},
{
"epoch": 25.690376569037657,
"grad_norm": 0.2383619099855423,
"learning_rate": 8.729799231460318e-06,
"loss": 0.0416,
"step": 12280
},
{
"epoch": 25.711297071129707,
"grad_norm": 0.17554903030395508,
"learning_rate": 8.66766931281009e-06,
"loss": 0.0471,
"step": 12290
},
{
"epoch": 25.732217573221757,
"grad_norm": 0.17318107187747955,
"learning_rate": 8.6057402848155e-06,
"loss": 0.0449,
"step": 12300
},
{
"epoch": 25.753138075313807,
"grad_norm": 0.18916559219360352,
"learning_rate": 8.544012448474904e-06,
"loss": 0.0448,
"step": 12310
},
{
"epoch": 25.774058577405857,
"grad_norm": 0.1694333255290985,
"learning_rate": 8.482486103808779e-06,
"loss": 0.0423,
"step": 12320
},
{
"epoch": 25.794979079497907,
"grad_norm": 0.22283673286437988,
"learning_rate": 8.42116154985828e-06,
"loss": 0.0395,
"step": 12330
},
{
"epoch": 25.815899581589957,
"grad_norm": 0.24180816113948822,
"learning_rate": 8.360039084683779e-06,
"loss": 0.0405,
"step": 12340
},
{
"epoch": 25.836820083682007,
"grad_norm": 0.11373143643140793,
"learning_rate": 8.299119005363404e-06,
"loss": 0.0381,
"step": 12350
},
{
"epoch": 25.85774058577406,
"grad_norm": 0.21663306653499603,
"learning_rate": 8.238401607991647e-06,
"loss": 0.0464,
"step": 12360
},
{
"epoch": 25.87866108786611,
"grad_norm": 0.16208253800868988,
"learning_rate": 8.177887187677847e-06,
"loss": 0.0382,
"step": 12370
},
{
"epoch": 25.89958158995816,
"grad_norm": 0.2553822696208954,
"learning_rate": 8.117576038544838e-06,
"loss": 0.0439,
"step": 12380
},
{
"epoch": 25.92050209205021,
"grad_norm": 0.1499353051185608,
"learning_rate": 8.057468453727479e-06,
"loss": 0.0344,
"step": 12390
},
{
"epoch": 25.94142259414226,
"grad_norm": 0.22214387357234955,
"learning_rate": 7.997564725371182e-06,
"loss": 0.0437,
"step": 12400
},
{
"epoch": 25.96234309623431,
"grad_norm": 0.20151209831237793,
"learning_rate": 7.937865144630601e-06,
"loss": 0.0455,
"step": 12410
},
{
"epoch": 25.98326359832636,
"grad_norm": 0.2003437876701355,
"learning_rate": 7.878370001668116e-06,
"loss": 0.0466,
"step": 12420
},
{
"epoch": 26.00418410041841,
"grad_norm": 0.17744222283363342,
"learning_rate": 7.819079585652461e-06,
"loss": 0.0438,
"step": 12430
},
{
"epoch": 26.02510460251046,
"grad_norm": 0.280471533536911,
"learning_rate": 7.759994184757358e-06,
"loss": 0.047,
"step": 12440
},
{
"epoch": 26.04602510460251,
"grad_norm": 0.19019319117069244,
"learning_rate": 7.701114086160027e-06,
"loss": 0.0452,
"step": 12450
},
{
"epoch": 26.06694560669456,
"grad_norm": 0.1652364730834961,
"learning_rate": 7.642439576039884e-06,
"loss": 0.0462,
"step": 12460
},
{
"epoch": 26.08786610878661,
"grad_norm": 0.2389875054359436,
"learning_rate": 7.583970939577101e-06,
"loss": 0.0437,
"step": 12470
},
{
"epoch": 26.10878661087866,
"grad_norm": 0.1723814159631729,
"learning_rate": 7.525708460951197e-06,
"loss": 0.0481,
"step": 12480
},
{
"epoch": 26.12970711297071,
"grad_norm": 0.21704067289829254,
"learning_rate": 7.467652423339733e-06,
"loss": 0.0464,
"step": 12490
},
{
"epoch": 26.15062761506276,
"grad_norm": 0.18124651908874512,
"learning_rate": 7.409803108916841e-06,
"loss": 0.0426,
"step": 12500
},
{
"epoch": 26.171548117154813,
"grad_norm": 0.1521020233631134,
"learning_rate": 7.35216079885192e-06,
"loss": 0.0414,
"step": 12510
},
{
"epoch": 26.192468619246863,
"grad_norm": 0.22082534432411194,
"learning_rate": 7.29472577330827e-06,
"loss": 0.0408,
"step": 12520
},
{
"epoch": 26.213389121338913,
"grad_norm": 0.19320394098758698,
"learning_rate": 7.237498311441676e-06,
"loss": 0.0411,
"step": 12530
},
{
"epoch": 26.234309623430963,
"grad_norm": 0.2235615849494934,
"learning_rate": 7.180478691399134e-06,
"loss": 0.0443,
"step": 12540
},
{
"epoch": 26.255230125523013,
"grad_norm": 0.15242235362529755,
"learning_rate": 7.123667190317396e-06,
"loss": 0.0377,
"step": 12550
},
{
"epoch": 26.276150627615063,
"grad_norm": 0.22409453988075256,
"learning_rate": 7.06706408432169e-06,
"loss": 0.043,
"step": 12560
},
{
"epoch": 26.297071129707113,
"grad_norm": 0.20269347727298737,
"learning_rate": 7.010669648524404e-06,
"loss": 0.0382,
"step": 12570
},
{
"epoch": 26.317991631799163,
"grad_norm": 0.25661835074424744,
"learning_rate": 6.954484157023661e-06,
"loss": 0.0475,
"step": 12580
},
{
"epoch": 26.338912133891213,
"grad_norm": 0.25946345925331116,
"learning_rate": 6.898507882902078e-06,
"loss": 0.0416,
"step": 12590
},
{
"epoch": 26.359832635983263,
"grad_norm": 0.15361930429935455,
"learning_rate": 6.842741098225358e-06,
"loss": 0.0477,
"step": 12600
},
{
"epoch": 26.380753138075313,
"grad_norm": 0.1839352697134018,
"learning_rate": 6.787184074041031e-06,
"loss": 0.0415,
"step": 12610
},
{
"epoch": 26.401673640167363,
"grad_norm": 0.19836997985839844,
"learning_rate": 6.731837080377129e-06,
"loss": 0.0391,
"step": 12620
},
{
"epoch": 26.422594142259413,
"grad_norm": 0.15911194682121277,
"learning_rate": 6.676700386240814e-06,
"loss": 0.0423,
"step": 12630
},
{
"epoch": 26.443514644351463,
"grad_norm": 0.18121299147605896,
"learning_rate": 6.621774259617125e-06,
"loss": 0.041,
"step": 12640
},
{
"epoch": 26.464435146443513,
"grad_norm": 0.21054553985595703,
"learning_rate": 6.567058967467704e-06,
"loss": 0.0467,
"step": 12650
},
{
"epoch": 26.485355648535563,
"grad_norm": 0.22585222125053406,
"learning_rate": 6.51255477572939e-06,
"loss": 0.0458,
"step": 12660
},
{
"epoch": 26.506276150627617,
"grad_norm": 0.23863200843334198,
"learning_rate": 6.45826194931306e-06,
"loss": 0.0387,
"step": 12670
},
{
"epoch": 26.527196652719667,
"grad_norm": 0.18971945345401764,
"learning_rate": 6.4041807521022454e-06,
"loss": 0.0426,
"step": 12680
},
{
"epoch": 26.548117154811717,
"grad_norm": 0.252937376499176,
"learning_rate": 6.350311446951868e-06,
"loss": 0.0456,
"step": 12690
},
{
"epoch": 26.569037656903767,
"grad_norm": 0.20583398640155792,
"learning_rate": 6.29665429568701e-06,
"loss": 0.041,
"step": 12700
},
{
"epoch": 26.589958158995817,
"grad_norm": 0.1962657868862152,
"learning_rate": 6.2432095591015705e-06,
"loss": 0.042,
"step": 12710
},
{
"epoch": 26.610878661087867,
"grad_norm": 0.15278510749340057,
"learning_rate": 6.1899774969570444e-06,
"loss": 0.048,
"step": 12720
},
{
"epoch": 26.631799163179917,
"grad_norm": 0.20337019860744476,
"learning_rate": 6.136958367981272e-06,
"loss": 0.0432,
"step": 12730
},
{
"epoch": 26.652719665271967,
"grad_norm": 0.19672217965126038,
"learning_rate": 6.084152429867113e-06,
"loss": 0.0404,
"step": 12740
},
{
"epoch": 26.673640167364017,
"grad_norm": 0.19690439105033875,
"learning_rate": 6.0315599392712865e-06,
"loss": 0.0424,
"step": 12750
},
{
"epoch": 26.694560669456067,
"grad_norm": 0.1715863049030304,
"learning_rate": 5.979181151813057e-06,
"loss": 0.0383,
"step": 12760
},
{
"epoch": 26.715481171548117,
"grad_norm": 0.17519311606884003,
"learning_rate": 5.927016322072992e-06,
"loss": 0.0422,
"step": 12770
},
{
"epoch": 26.736401673640167,
"grad_norm": 0.1893201619386673,
"learning_rate": 5.875065703591787e-06,
"loss": 0.0437,
"step": 12780
},
{
"epoch": 26.757322175732217,
"grad_norm": 0.26540607213974,
"learning_rate": 5.823329548868939e-06,
"loss": 0.0433,
"step": 12790
},
{
"epoch": 26.778242677824267,
"grad_norm": 0.23649485409259796,
"learning_rate": 5.77180810936162e-06,
"loss": 0.044,
"step": 12800
},
{
"epoch": 26.799163179916317,
"grad_norm": 0.20049796998500824,
"learning_rate": 5.720501635483366e-06,
"loss": 0.0444,
"step": 12810
},
{
"epoch": 26.820083682008367,
"grad_norm": 0.22236889600753784,
"learning_rate": 5.669410376602918e-06,
"loss": 0.0526,
"step": 12820
},
{
"epoch": 26.84100418410042,
"grad_norm": 0.2502869665622711,
"learning_rate": 5.618534581043011e-06,
"loss": 0.0497,
"step": 12830
},
{
"epoch": 26.86192468619247,
"grad_norm": 0.16508817672729492,
"learning_rate": 5.5678744960791005e-06,
"loss": 0.0423,
"step": 12840
},
{
"epoch": 26.88284518828452,
"grad_norm": 0.22233647108078003,
"learning_rate": 5.517430367938237e-06,
"loss": 0.0464,
"step": 12850
},
{
"epoch": 26.90376569037657,
"grad_norm": 0.20295816659927368,
"learning_rate": 5.467202441797842e-06,
"loss": 0.0446,
"step": 12860
},
{
"epoch": 26.92468619246862,
"grad_norm": 0.22901266813278198,
"learning_rate": 5.417190961784497e-06,
"loss": 0.0388,
"step": 12870
},
{
"epoch": 26.94560669456067,
"grad_norm": 0.22929273545742035,
"learning_rate": 5.3673961709727885e-06,
"loss": 0.0435,
"step": 12880
},
{
"epoch": 26.96652719665272,
"grad_norm": 0.30230170488357544,
"learning_rate": 5.317818311384115e-06,
"loss": 0.0434,
"step": 12890
},
{
"epoch": 26.98744769874477,
"grad_norm": 0.20971617102622986,
"learning_rate": 5.2684576239854895e-06,
"loss": 0.0476,
"step": 12900
},
{
"epoch": 27.00836820083682,
"grad_norm": 0.16513416171073914,
"learning_rate": 5.219314348688414e-06,
"loss": 0.0406,
"step": 12910
},
{
"epoch": 27.02928870292887,
"grad_norm": 0.16726276278495789,
"learning_rate": 5.170388724347658e-06,
"loss": 0.0427,
"step": 12920
},
{
"epoch": 27.05020920502092,
"grad_norm": 0.20750859379768372,
"learning_rate": 5.1216809887601245e-06,
"loss": 0.0412,
"step": 12930
},
{
"epoch": 27.07112970711297,
"grad_norm": 0.22323675453662872,
"learning_rate": 5.073191378663733e-06,
"loss": 0.0421,
"step": 12940
},
{
"epoch": 27.09205020920502,
"grad_norm": 0.24951502680778503,
"learning_rate": 5.024920129736188e-06,
"loss": 0.0439,
"step": 12950
},
{
"epoch": 27.11297071129707,
"grad_norm": 0.18910029530525208,
"learning_rate": 4.976867476593894e-06,
"loss": 0.0411,
"step": 12960
},
{
"epoch": 27.13389121338912,
"grad_norm": 0.17495794594287872,
"learning_rate": 4.929033652790821e-06,
"loss": 0.037,
"step": 12970
},
{
"epoch": 27.15481171548117,
"grad_norm": 0.15714290738105774,
"learning_rate": 4.881418890817296e-06,
"loss": 0.0385,
"step": 12980
},
{
"epoch": 27.175732217573223,
"grad_norm": 0.151872456073761,
"learning_rate": 4.834023422098971e-06,
"loss": 0.0409,
"step": 12990
},
{
"epoch": 27.196652719665273,
"grad_norm": 0.2499384582042694,
"learning_rate": 4.7868474769956266e-06,
"loss": 0.0466,
"step": 13000
},
{
"epoch": 27.217573221757323,
"grad_norm": 0.19462943077087402,
"learning_rate": 4.7398912848000636e-06,
"loss": 0.0421,
"step": 13010
},
{
"epoch": 27.238493723849373,
"grad_norm": 0.24162252247333527,
"learning_rate": 4.6931550737370264e-06,
"loss": 0.0457,
"step": 13020
},
{
"epoch": 27.259414225941423,
"grad_norm": 0.14873123168945312,
"learning_rate": 4.646639070962067e-06,
"loss": 0.0481,
"step": 13030
},
{
"epoch": 27.280334728033473,
"grad_norm": 0.278059184551239,
"learning_rate": 4.600343502560439e-06,
"loss": 0.0462,
"step": 13040
},
{
"epoch": 27.301255230125523,
"grad_norm": 0.24148941040039062,
"learning_rate": 4.55426859354599e-06,
"loss": 0.0485,
"step": 13050
},
{
"epoch": 27.322175732217573,
"grad_norm": 0.1880323588848114,
"learning_rate": 4.5084145678600805e-06,
"loss": 0.047,
"step": 13060
},
{
"epoch": 27.343096234309623,
"grad_norm": 0.20042458176612854,
"learning_rate": 4.462781648370518e-06,
"loss": 0.0354,
"step": 13070
},
{
"epoch": 27.364016736401673,
"grad_norm": 0.23684915900230408,
"learning_rate": 4.417370056870418e-06,
"loss": 0.0419,
"step": 13080
},
{
"epoch": 27.384937238493723,
"grad_norm": 0.18872502446174622,
"learning_rate": 4.372180014077193e-06,
"loss": 0.0452,
"step": 13090
},
{
"epoch": 27.405857740585773,
"grad_norm": 0.1701870709657669,
"learning_rate": 4.327211739631415e-06,
"loss": 0.0372,
"step": 13100
},
{
"epoch": 27.426778242677823,
"grad_norm": 0.21626059710979462,
"learning_rate": 4.282465452095802e-06,
"loss": 0.0432,
"step": 13110
},
{
"epoch": 27.447698744769873,
"grad_norm": 0.18621067702770233,
"learning_rate": 4.237941368954124e-06,
"loss": 0.0425,
"step": 13120
},
{
"epoch": 27.468619246861923,
"grad_norm": 0.17577169835567474,
"learning_rate": 4.193639706610147e-06,
"loss": 0.0449,
"step": 13130
},
{
"epoch": 27.489539748953973,
"grad_norm": 0.20410019159317017,
"learning_rate": 4.149560680386588e-06,
"loss": 0.0411,
"step": 13140
},
{
"epoch": 27.510460251046027,
"grad_norm": 0.13330447673797607,
"learning_rate": 4.105704504524094e-06,
"loss": 0.0478,
"step": 13150
},
{
"epoch": 27.531380753138077,
"grad_norm": 0.228925421833992,
"learning_rate": 4.0620713921801334e-06,
"loss": 0.0424,
"step": 13160
},
{
"epoch": 27.552301255230127,
"grad_norm": 0.2237783670425415,
"learning_rate": 4.0186615554280385e-06,
"loss": 0.0412,
"step": 13170
},
{
"epoch": 27.573221757322177,
"grad_norm": 0.17737041413784027,
"learning_rate": 3.975475205255929e-06,
"loss": 0.0386,
"step": 13180
},
{
"epoch": 27.594142259414227,
"grad_norm": 0.36772456765174866,
"learning_rate": 3.932512551565676e-06,
"loss": 0.0397,
"step": 13190
},
{
"epoch": 27.615062761506277,
"grad_norm": 0.22337375581264496,
"learning_rate": 3.889773803171936e-06,
"loss": 0.0453,
"step": 13200
},
{
"epoch": 27.635983263598327,
"grad_norm": 0.18996427953243256,
"learning_rate": 3.847259167801076e-06,
"loss": 0.037,
"step": 13210
},
{
"epoch": 27.656903765690377,
"grad_norm": 0.19474196434020996,
"learning_rate": 3.804968852090185e-06,
"loss": 0.0422,
"step": 13220
},
{
"epoch": 27.677824267782427,
"grad_norm": 0.19374209642410278,
"learning_rate": 3.762903061586104e-06,
"loss": 0.0537,
"step": 13230
},
{
"epoch": 27.698744769874477,
"grad_norm": 0.31516656279563904,
"learning_rate": 3.721062000744363e-06,
"loss": 0.0462,
"step": 13240
},
{
"epoch": 27.719665271966527,
"grad_norm": 0.18470466136932373,
"learning_rate": 3.679445872928244e-06,
"loss": 0.0417,
"step": 13250
},
{
"epoch": 27.740585774058577,
"grad_norm": 0.1719268262386322,
"learning_rate": 3.6380548804077707e-06,
"loss": 0.0478,
"step": 13260
},
{
"epoch": 27.761506276150627,
"grad_norm": 0.16592474281787872,
"learning_rate": 3.5968892243587016e-06,
"loss": 0.0448,
"step": 13270
},
{
"epoch": 27.782426778242677,
"grad_norm": 0.18621480464935303,
"learning_rate": 3.555949104861611e-06,
"loss": 0.0393,
"step": 13280
},
{
"epoch": 27.803347280334727,
"grad_norm": 0.17843583226203918,
"learning_rate": 3.5152347209008394e-06,
"loss": 0.0534,
"step": 13290
},
{
"epoch": 27.824267782426777,
"grad_norm": 0.1969691812992096,
"learning_rate": 3.4747462703636104e-06,
"loss": 0.0494,
"step": 13300
},
{
"epoch": 27.84518828451883,
"grad_norm": 0.16685451567173004,
"learning_rate": 3.434483950038986e-06,
"loss": 0.0403,
"step": 13310
},
{
"epoch": 27.86610878661088,
"grad_norm": 0.18303453922271729,
"learning_rate": 3.3944479556169806e-06,
"loss": 0.0503,
"step": 13320
},
{
"epoch": 27.88702928870293,
"grad_norm": 0.2683485448360443,
"learning_rate": 3.3546384816875665e-06,
"loss": 0.0422,
"step": 13330
},
{
"epoch": 27.90794979079498,
"grad_norm": 0.11434085667133331,
"learning_rate": 3.315055721739746e-06,
"loss": 0.0405,
"step": 13340
},
{
"epoch": 27.92887029288703,
"grad_norm": 0.1836562305688858,
"learning_rate": 3.275699868160592e-06,
"loss": 0.0416,
"step": 13350
},
{
"epoch": 27.94979079497908,
"grad_norm": 0.20131796598434448,
"learning_rate": 3.23657111223436e-06,
"loss": 0.0465,
"step": 13360
},
{
"epoch": 27.97071129707113,
"grad_norm": 0.20107074081897736,
"learning_rate": 3.1976696441414764e-06,
"loss": 0.0418,
"step": 13370
},
{
"epoch": 27.99163179916318,
"grad_norm": 0.19358371198177338,
"learning_rate": 3.158995652957719e-06,
"loss": 0.0394,
"step": 13380
},
{
"epoch": 28.01255230125523,
"grad_norm": 0.18561676144599915,
"learning_rate": 3.1205493266531937e-06,
"loss": 0.0425,
"step": 13390
},
{
"epoch": 28.03347280334728,
"grad_norm": 0.18126420676708221,
"learning_rate": 3.082330852091497e-06,
"loss": 0.0389,
"step": 13400
},
{
"epoch": 28.05439330543933,
"grad_norm": 0.2034623920917511,
"learning_rate": 3.0443404150287847e-06,
"loss": 0.0429,
"step": 13410
},
{
"epoch": 28.07531380753138,
"grad_norm": 0.16916005313396454,
"learning_rate": 3.0065782001128475e-06,
"loss": 0.037,
"step": 13420
},
{
"epoch": 28.09623430962343,
"grad_norm": 0.1833481341600418,
"learning_rate": 2.9690443908822252e-06,
"loss": 0.0399,
"step": 13430
},
{
"epoch": 28.11715481171548,
"grad_norm": 0.21004605293273926,
"learning_rate": 2.9317391697653518e-06,
"loss": 0.0473,
"step": 13440
},
{
"epoch": 28.13807531380753,
"grad_norm": 0.14262895286083221,
"learning_rate": 2.8946627180795936e-06,
"loss": 0.0443,
"step": 13450
},
{
"epoch": 28.15899581589958,
"grad_norm": 0.22318962216377258,
"learning_rate": 2.8578152160304573e-06,
"loss": 0.0491,
"step": 13460
},
{
"epoch": 28.179916317991633,
"grad_norm": 0.22127728164196014,
"learning_rate": 2.821196842710638e-06,
"loss": 0.0482,
"step": 13470
},
{
"epoch": 28.200836820083683,
"grad_norm": 0.18599243462085724,
"learning_rate": 2.7848077760991853e-06,
"loss": 0.0456,
"step": 13480
},
{
"epoch": 28.221757322175733,
"grad_norm": 0.2014002501964569,
"learning_rate": 2.7486481930606434e-06,
"loss": 0.0427,
"step": 13490
},
{
"epoch": 28.242677824267783,
"grad_norm": 0.15174347162246704,
"learning_rate": 2.712718269344161e-06,
"loss": 0.0421,
"step": 13500
},
{
"epoch": 28.263598326359833,
"grad_norm": 0.2831948697566986,
"learning_rate": 2.677018179582669e-06,
"loss": 0.0458,
"step": 13510
},
{
"epoch": 28.284518828451883,
"grad_norm": 0.16338075697422028,
"learning_rate": 2.641548097292024e-06,
"loss": 0.0468,
"step": 13520
},
{
"epoch": 28.305439330543933,
"grad_norm": 0.16803938150405884,
"learning_rate": 2.606308194870133e-06,
"loss": 0.0459,
"step": 13530
},
{
"epoch": 28.326359832635983,
"grad_norm": 0.17965620756149292,
"learning_rate": 2.5712986435961707e-06,
"loss": 0.0466,
"step": 13540
},
{
"epoch": 28.347280334728033,
"grad_norm": 0.19231706857681274,
"learning_rate": 2.536519613629723e-06,
"loss": 0.043,
"step": 13550
},
{
"epoch": 28.368200836820083,
"grad_norm": 0.22725863754749298,
"learning_rate": 2.501971274009923e-06,
"loss": 0.0432,
"step": 13560
},
{
"epoch": 28.389121338912133,
"grad_norm": 0.18293611705303192,
"learning_rate": 2.467653792654695e-06,
"loss": 0.0487,
"step": 13570
},
{
"epoch": 28.410041841004183,
"grad_norm": 0.22322218120098114,
"learning_rate": 2.4335673363598822e-06,
"loss": 0.0416,
"step": 13580
},
{
"epoch": 28.430962343096233,
"grad_norm": 0.19227294623851776,
"learning_rate": 2.399712070798471e-06,
"loss": 0.042,
"step": 13590
},
{
"epoch": 28.451882845188283,
"grad_norm": 0.23683269321918488,
"learning_rate": 2.3660881605197694e-06,
"loss": 0.0437,
"step": 13600
},
{
"epoch": 28.472803347280333,
"grad_norm": 0.1566876918077469,
"learning_rate": 2.332695768948617e-06,
"loss": 0.0448,
"step": 13610
},
{
"epoch": 28.493723849372383,
"grad_norm": 0.23689772188663483,
"learning_rate": 2.299535058384583e-06,
"loss": 0.0439,
"step": 13620
},
{
"epoch": 28.514644351464437,
"grad_norm": 0.20031625032424927,
"learning_rate": 2.266606190001186e-06,
"loss": 0.0439,
"step": 13630
},
{
"epoch": 28.535564853556487,
"grad_norm": 0.19360850751399994,
"learning_rate": 2.2339093238450737e-06,
"loss": 0.0414,
"step": 13640
},
{
"epoch": 28.556485355648537,
"grad_norm": 0.21083463728427887,
"learning_rate": 2.20144461883533e-06,
"loss": 0.0436,
"step": 13650
},
{
"epoch": 28.577405857740587,
"grad_norm": 0.19224782288074493,
"learning_rate": 2.1692122327625908e-06,
"loss": 0.0409,
"step": 13660
},
{
"epoch": 28.598326359832637,
"grad_norm": 0.20601852238178253,
"learning_rate": 2.137212322288379e-06,
"loss": 0.0462,
"step": 13670
},
{
"epoch": 28.619246861924687,
"grad_norm": 0.19003936648368835,
"learning_rate": 2.105445042944282e-06,
"loss": 0.0407,
"step": 13680
},
{
"epoch": 28.640167364016737,
"grad_norm": 0.15093214809894562,
"learning_rate": 2.0739105491312027e-06,
"loss": 0.0427,
"step": 13690
},
{
"epoch": 28.661087866108787,
"grad_norm": 0.2095935046672821,
"learning_rate": 2.0426089941186443e-06,
"loss": 0.042,
"step": 13700
},
{
"epoch": 28.682008368200837,
"grad_norm": 0.30906569957733154,
"learning_rate": 2.0115405300439093e-06,
"loss": 0.0479,
"step": 13710
},
{
"epoch": 28.702928870292887,
"grad_norm": 0.2684055268764496,
"learning_rate": 1.9807053079114013e-06,
"loss": 0.0445,
"step": 13720
},
{
"epoch": 28.723849372384937,
"grad_norm": 0.19294482469558716,
"learning_rate": 1.9501034775919024e-06,
"loss": 0.0411,
"step": 13730
},
{
"epoch": 28.744769874476987,
"grad_norm": 0.18607813119888306,
"learning_rate": 1.9197351878217917e-06,
"loss": 0.0422,
"step": 13740
},
{
"epoch": 28.765690376569037,
"grad_norm": 0.16477899253368378,
"learning_rate": 1.8896005862023669e-06,
"loss": 0.0398,
"step": 13750
},
{
"epoch": 28.786610878661087,
"grad_norm": 0.16216787695884705,
"learning_rate": 1.8596998191991288e-06,
"loss": 0.0425,
"step": 13760
},
{
"epoch": 28.807531380753137,
"grad_norm": 0.2556508779525757,
"learning_rate": 1.8300330321410208e-06,
"loss": 0.0521,
"step": 13770
},
{
"epoch": 28.828451882845187,
"grad_norm": 0.15799850225448608,
"learning_rate": 1.8006003692197794e-06,
"loss": 0.0432,
"step": 13780
},
{
"epoch": 28.84937238493724,
"grad_norm": 0.28834351897239685,
"learning_rate": 1.7714019734892062e-06,
"loss": 0.0423,
"step": 13790
},
{
"epoch": 28.87029288702929,
"grad_norm": 0.21049931645393372,
"learning_rate": 1.7424379868644759e-06,
"loss": 0.0412,
"step": 13800
},
{
"epoch": 28.89121338912134,
"grad_norm": 0.1965758353471756,
"learning_rate": 1.71370855012144e-06,
"loss": 0.0412,
"step": 13810
},
{
"epoch": 28.91213389121339,
"grad_norm": 0.15672869980335236,
"learning_rate": 1.6852138028959574e-06,
"loss": 0.0396,
"step": 13820
},
{
"epoch": 28.93305439330544,
"grad_norm": 0.21490143239498138,
"learning_rate": 1.6569538836832044e-06,
"loss": 0.0489,
"step": 13830
},
{
"epoch": 28.95397489539749,
"grad_norm": 0.2109925001859665,
"learning_rate": 1.6289289298370147e-06,
"loss": 0.0458,
"step": 13840
},
{
"epoch": 28.97489539748954,
"grad_norm": 0.1507532149553299,
"learning_rate": 1.6011390775691748e-06,
"loss": 0.0462,
"step": 13850
},
{
"epoch": 28.99581589958159,
"grad_norm": 0.17817844450473785,
"learning_rate": 1.5735844619488238e-06,
"loss": 0.0432,
"step": 13860
},
{
"epoch": 29.01673640167364,
"grad_norm": 0.22161857783794403,
"learning_rate": 1.5462652169017322e-06,
"loss": 0.0359,
"step": 13870
},
{
"epoch": 29.03765690376569,
"grad_norm": 0.2035890519618988,
"learning_rate": 1.5191814752097023e-06,
"loss": 0.0454,
"step": 13880
},
{
"epoch": 29.05857740585774,
"grad_norm": 0.1549598127603531,
"learning_rate": 1.492333368509896e-06,
"loss": 0.0434,
"step": 13890
},
{
"epoch": 29.07949790794979,
"grad_norm": 0.17815828323364258,
"learning_rate": 1.4657210272941923e-06,
"loss": 0.0404,
"step": 13900
},
{
"epoch": 29.10041841004184,
"grad_norm": 0.19060222804546356,
"learning_rate": 1.4393445809085748e-06,
"loss": 0.0414,
"step": 13910
},
{
"epoch": 29.12133891213389,
"grad_norm": 0.1861022710800171,
"learning_rate": 1.4132041575524834e-06,
"loss": 0.0447,
"step": 13920
},
{
"epoch": 29.14225941422594,
"grad_norm": 0.20010648667812347,
"learning_rate": 1.387299884278187e-06,
"loss": 0.0444,
"step": 13930
},
{
"epoch": 29.16317991631799,
"grad_norm": 0.3000384569168091,
"learning_rate": 1.3616318869901945e-06,
"loss": 0.0428,
"step": 13940
},
{
"epoch": 29.184100418410043,
"grad_norm": 0.17813272774219513,
"learning_rate": 1.336200290444606e-06,
"loss": 0.0478,
"step": 13950
},
{
"epoch": 29.205020920502093,
"grad_norm": 0.15890157222747803,
"learning_rate": 1.3110052182485454e-06,
"loss": 0.0389,
"step": 13960
},
{
"epoch": 29.225941422594143,
"grad_norm": 0.1963435560464859,
"learning_rate": 1.2860467928595298e-06,
"loss": 0.0456,
"step": 13970
},
{
"epoch": 29.246861924686193,
"grad_norm": 0.21045024693012238,
"learning_rate": 1.2613251355848732e-06,
"loss": 0.0433,
"step": 13980
},
{
"epoch": 29.267782426778243,
"grad_norm": 0.19739368557929993,
"learning_rate": 1.2368403665811324e-06,
"loss": 0.0414,
"step": 13990
},
{
"epoch": 29.288702928870293,
"grad_norm": 0.23470686376094818,
"learning_rate": 1.2125926048534686e-06,
"loss": 0.0424,
"step": 14000
},
{
"epoch": 29.309623430962343,
"grad_norm": 0.11903766542673111,
"learning_rate": 1.1885819682551259e-06,
"loss": 0.0374,
"step": 14010
},
{
"epoch": 29.330543933054393,
"grad_norm": 0.16985724866390228,
"learning_rate": 1.164808573486814e-06,
"loss": 0.036,
"step": 14020
},
{
"epoch": 29.351464435146443,
"grad_norm": 0.19907903671264648,
"learning_rate": 1.1412725360961608e-06,
"loss": 0.0404,
"step": 14030
},
{
"epoch": 29.372384937238493,
"grad_norm": 0.15983760356903076,
"learning_rate": 1.1179739704771486e-06,
"loss": 0.0447,
"step": 14040
},
{
"epoch": 29.393305439330543,
"grad_norm": 0.21767009794712067,
"learning_rate": 1.0949129898695675e-06,
"loss": 0.0498,
"step": 14050
},
{
"epoch": 29.414225941422593,
"grad_norm": 0.23430395126342773,
"learning_rate": 1.0720897063584423e-06,
"loss": 0.0426,
"step": 14060
},
{
"epoch": 29.435146443514643,
"grad_norm": 0.14188767969608307,
"learning_rate": 1.0495042308735103e-06,
"loss": 0.0408,
"step": 14070
},
{
"epoch": 29.456066945606693,
"grad_norm": 0.18734532594680786,
"learning_rate": 1.0271566731886617e-06,
"loss": 0.0402,
"step": 14080
},
{
"epoch": 29.476987447698743,
"grad_norm": 0.16820771992206573,
"learning_rate": 1.005047141921428e-06,
"loss": 0.0429,
"step": 14090
},
{
"epoch": 29.497907949790793,
"grad_norm": 0.1820981651544571,
"learning_rate": 9.831757445324274e-07,
"loss": 0.0488,
"step": 14100
},
{
"epoch": 29.518828451882847,
"grad_norm": 0.19326896965503693,
"learning_rate": 9.615425873248761e-07,
"loss": 0.039,
"step": 14110
},
{
"epoch": 29.539748953974897,
"grad_norm": 0.16431112587451935,
"learning_rate": 9.401477754440502e-07,
"loss": 0.0416,
"step": 14120
},
{
"epoch": 29.560669456066947,
"grad_norm": 0.2652297914028168,
"learning_rate": 9.189914128767684e-07,
"loss": 0.0412,
"step": 14130
},
{
"epoch": 29.581589958158997,
"grad_norm": 0.19096574187278748,
"learning_rate": 8.980736024508996e-07,
"loss": 0.0362,
"step": 14140
},
{
"epoch": 29.602510460251047,
"grad_norm": 0.31353315711021423,
"learning_rate": 8.77394445834867e-07,
"loss": 0.0421,
"step": 14150
},
{
"epoch": 29.623430962343097,
"grad_norm": 0.27862218022346497,
"learning_rate": 8.569540435371281e-07,
"loss": 0.0445,
"step": 14160
},
{
"epoch": 29.644351464435147,
"grad_norm": 0.21319982409477234,
"learning_rate": 8.367524949057348e-07,
"loss": 0.0451,
"step": 14170
},
{
"epoch": 29.665271966527197,
"grad_norm": 0.1328536719083786,
"learning_rate": 8.167898981277844e-07,
"loss": 0.0401,
"step": 14180
},
{
"epoch": 29.686192468619247,
"grad_norm": 0.21670937538146973,
"learning_rate": 7.970663502290143e-07,
"loss": 0.0375,
"step": 14190
},
{
"epoch": 29.707112970711297,
"grad_norm": 0.21872220933437347,
"learning_rate": 7.775819470732692e-07,
"loss": 0.0407,
"step": 14200
},
{
"epoch": 29.728033472803347,
"grad_norm": 0.16718120872974396,
"learning_rate": 7.583367833620681e-07,
"loss": 0.04,
"step": 14210
},
{
"epoch": 29.748953974895397,
"grad_norm": 0.1932561844587326,
"learning_rate": 7.39330952634143e-07,
"loss": 0.0482,
"step": 14220
},
{
"epoch": 29.769874476987447,
"grad_norm": 0.17794445157051086,
"learning_rate": 7.205645472649681e-07,
"loss": 0.0407,
"step": 14230
},
{
"epoch": 29.790794979079497,
"grad_norm": 0.19033575057983398,
"learning_rate": 7.020376584663202e-07,
"loss": 0.0446,
"step": 14240
},
{
"epoch": 29.811715481171547,
"grad_norm": 0.20763246715068817,
"learning_rate": 6.83750376285841e-07,
"loss": 0.0467,
"step": 14250
},
{
"epoch": 29.8326359832636,
"grad_norm": 0.15791551768779755,
"learning_rate": 6.657027896065982e-07,
"loss": 0.0462,
"step": 14260
},
{
"epoch": 29.85355648535565,
"grad_norm": 0.1774657666683197,
"learning_rate": 6.478949861466355e-07,
"loss": 0.0404,
"step": 14270
},
{
"epoch": 29.8744769874477,
"grad_norm": 0.17342764139175415,
"learning_rate": 6.303270524585736e-07,
"loss": 0.0456,
"step": 14280
},
{
"epoch": 29.89539748953975,
"grad_norm": 0.1711912751197815,
"learning_rate": 6.129990739291713e-07,
"loss": 0.0445,
"step": 14290
},
{
"epoch": 29.9163179916318,
"grad_norm": 0.19799375534057617,
"learning_rate": 5.959111347789093e-07,
"loss": 0.0364,
"step": 14300
},
{
"epoch": 29.93723849372385,
"grad_norm": 0.21139439940452576,
"learning_rate": 5.790633180615956e-07,
"loss": 0.0404,
"step": 14310
},
{
"epoch": 29.9581589958159,
"grad_norm": 0.2200784832239151,
"learning_rate": 5.624557056639446e-07,
"loss": 0.0377,
"step": 14320
},
{
"epoch": 29.97907949790795,
"grad_norm": 0.16954976320266724,
"learning_rate": 5.460883783051984e-07,
"loss": 0.0399,
"step": 14330
},
{
"epoch": 30.0,
"grad_norm": 0.5953750014305115,
"learning_rate": 5.299614155367171e-07,
"loss": 0.0394,
"step": 14340
},
{
"epoch": 30.02092050209205,
"grad_norm": 0.1778673529624939,
"learning_rate": 5.140748957415897e-07,
"loss": 0.0447,
"step": 14350
},
{
"epoch": 30.0418410041841,
"grad_norm": 0.2141769528388977,
"learning_rate": 4.984288961342787e-07,
"loss": 0.049,
"step": 14360
},
{
"epoch": 30.06276150627615,
"grad_norm": 0.1671600043773651,
"learning_rate": 4.830234927602206e-07,
"loss": 0.0447,
"step": 14370
},
{
"epoch": 30.0836820083682,
"grad_norm": 0.19291211664676666,
"learning_rate": 4.6785876049545986e-07,
"loss": 0.039,
"step": 14380
},
{
"epoch": 30.10460251046025,
"grad_norm": 0.20223639905452728,
"learning_rate": 4.5293477304629297e-07,
"loss": 0.0461,
"step": 14390
},
{
"epoch": 30.1255230125523,
"grad_norm": 0.22967307269573212,
"learning_rate": 4.382516029489081e-07,
"loss": 0.0443,
"step": 14400
},
{
"epoch": 30.14644351464435,
"grad_norm": 0.22878475487232208,
"learning_rate": 4.2380932156902975e-07,
"loss": 0.0441,
"step": 14410
},
{
"epoch": 30.1673640167364,
"grad_norm": 0.1665542870759964,
"learning_rate": 4.0960799910156335e-07,
"loss": 0.0394,
"step": 14420
},
{
"epoch": 30.188284518828453,
"grad_norm": 0.216718852519989,
"learning_rate": 3.956477045702844e-07,
"loss": 0.043,
"step": 14430
},
{
"epoch": 30.209205020920503,
"grad_norm": 0.20483459532260895,
"learning_rate": 3.819285058274613e-07,
"loss": 0.0477,
"step": 14440
},
{
"epoch": 30.230125523012553,
"grad_norm": 0.20638415217399597,
"learning_rate": 3.684504695535496e-07,
"loss": 0.036,
"step": 14450
},
{
"epoch": 30.251046025104603,
"grad_norm": 0.11912164092063904,
"learning_rate": 3.552136612568813e-07,
"loss": 0.0351,
"step": 14460
},
{
"epoch": 30.271966527196653,
"grad_norm": 0.20847609639167786,
"learning_rate": 3.422181452733042e-07,
"loss": 0.0406,
"step": 14470
},
{
"epoch": 30.292887029288703,
"grad_norm": 0.15011854469776154,
"learning_rate": 3.294639847659209e-07,
"loss": 0.0491,
"step": 14480
},
{
"epoch": 30.313807531380753,
"grad_norm": 0.21889813244342804,
"learning_rate": 3.169512417247389e-07,
"loss": 0.0359,
"step": 14490
},
{
"epoch": 30.334728033472803,
"grad_norm": 0.21624627709388733,
"learning_rate": 3.046799769663822e-07,
"loss": 0.0499,
"step": 14500
},
{
"epoch": 30.355648535564853,
"grad_norm": 0.209271639585495,
"learning_rate": 2.926502501338191e-07,
"loss": 0.046,
"step": 14510
},
{
"epoch": 30.376569037656903,
"grad_norm": 0.260744571685791,
"learning_rate": 2.808621196960404e-07,
"loss": 0.0428,
"step": 14520
},
{
"epoch": 30.397489539748953,
"grad_norm": 0.22880905866622925,
"learning_rate": 2.6931564294778164e-07,
"loss": 0.0351,
"step": 14530
},
{
"epoch": 30.418410041841003,
"grad_norm": 0.1641969531774521,
"learning_rate": 2.58010876009257e-07,
"loss": 0.0414,
"step": 14540
},
{
"epoch": 30.439330543933053,
"grad_norm": 0.15474116802215576,
"learning_rate": 2.4694787382589237e-07,
"loss": 0.0362,
"step": 14550
},
{
"epoch": 30.460251046025103,
"grad_norm": 0.23926879465579987,
"learning_rate": 2.3612669016802592e-07,
"loss": 0.0376,
"step": 14560
},
{
"epoch": 30.481171548117153,
"grad_norm": 0.19552695751190186,
"learning_rate": 2.2554737763068045e-07,
"loss": 0.0478,
"step": 14570
},
{
"epoch": 30.502092050209207,
"grad_norm": 0.17018336057662964,
"learning_rate": 2.152099876332858e-07,
"loss": 0.0455,
"step": 14580
},
{
"epoch": 30.523012552301257,
"grad_norm": 0.1531943380832672,
"learning_rate": 2.051145704194457e-07,
"loss": 0.0386,
"step": 14590
},
{
"epoch": 30.543933054393307,
"grad_norm": 0.17155568301677704,
"learning_rate": 1.9526117505667129e-07,
"loss": 0.0424,
"step": 14600
},
{
"epoch": 30.564853556485357,
"grad_norm": 0.1632601022720337,
"learning_rate": 1.856498494361758e-07,
"loss": 0.0376,
"step": 14610
},
{
"epoch": 30.585774058577407,
"grad_norm": 0.2161942720413208,
"learning_rate": 1.7628064027260803e-07,
"loss": 0.0402,
"step": 14620
},
{
"epoch": 30.606694560669457,
"grad_norm": 0.22608569264411926,
"learning_rate": 1.671535931038415e-07,
"loss": 0.0443,
"step": 14630
},
{
"epoch": 30.627615062761507,
"grad_norm": 0.15184949338436127,
"learning_rate": 1.5826875229076333e-07,
"loss": 0.0417,
"step": 14640
},
{
"epoch": 30.648535564853557,
"grad_norm": 0.1841401606798172,
"learning_rate": 1.496261610170302e-07,
"loss": 0.0397,
"step": 14650
},
{
"epoch": 30.669456066945607,
"grad_norm": 0.17780251801013947,
"learning_rate": 1.4122586128888503e-07,
"loss": 0.0545,
"step": 14660
},
{
"epoch": 30.690376569037657,
"grad_norm": 0.32660233974456787,
"learning_rate": 1.3306789393494612e-07,
"loss": 0.0437,
"step": 14670
},
{
"epoch": 30.711297071129707,
"grad_norm": 0.20620398223400116,
"learning_rate": 1.2515229860599054e-07,
"loss": 0.0414,
"step": 14680
},
{
"epoch": 30.732217573221757,
"grad_norm": 0.3603353798389435,
"learning_rate": 1.1747911377478771e-07,
"loss": 0.0465,
"step": 14690
},
{
"epoch": 30.753138075313807,
"grad_norm": 0.18610107898712158,
"learning_rate": 1.1004837673589952e-07,
"loss": 0.0385,
"step": 14700
},
{
"epoch": 30.774058577405857,
"grad_norm": 0.24775725603103638,
"learning_rate": 1.0286012360550267e-07,
"loss": 0.0507,
"step": 14710
},
{
"epoch": 30.794979079497907,
"grad_norm": 0.19726818799972534,
"learning_rate": 9.591438932121111e-08,
"loss": 0.0421,
"step": 14720
},
{
"epoch": 30.815899581589957,
"grad_norm": 0.2514331340789795,
"learning_rate": 8.921120764189272e-08,
"loss": 0.039,
"step": 14730
},
{
"epoch": 30.836820083682007,
"grad_norm": 0.21896202862262726,
"learning_rate": 8.275061114753068e-08,
"loss": 0.0551,
"step": 14740
},
{
"epoch": 30.85774058577406,
"grad_norm": 0.21271194517612457,
"learning_rate": 7.65326312390624e-08,
"loss": 0.0478,
"step": 14750
},
{
"epoch": 30.87866108786611,
"grad_norm": 0.20337381958961487,
"learning_rate": 7.055729813819079e-08,
"loss": 0.0449,
"step": 14760
},
{
"epoch": 30.89958158995816,
"grad_norm": 0.18087029457092285,
"learning_rate": 6.48246408872899e-08,
"loss": 0.044,
"step": 14770
},
{
"epoch": 30.92050209205021,
"grad_norm": 0.15678484737873077,
"learning_rate": 5.9334687349227314e-08,
"loss": 0.0434,
"step": 14780
},
{
"epoch": 30.94142259414226,
"grad_norm": 0.18289516866207123,
"learning_rate": 5.4087464207236426e-08,
"loss": 0.0419,
"step": 14790
},
{
"epoch": 30.96234309623431,
"grad_norm": 0.21055713295936584,
"learning_rate": 4.9082996964794345e-08,
"loss": 0.0502,
"step": 14800
},
{
"epoch": 30.98326359832636,
"grad_norm": 0.16997161507606506,
"learning_rate": 4.432130994548866e-08,
"loss": 0.045,
"step": 14810
},
{
"epoch": 31.00418410041841,
"grad_norm": 0.15064077079296112,
"learning_rate": 3.980242629291198e-08,
"loss": 0.0556,
"step": 14820
},
{
"epoch": 31.02510460251046,
"grad_norm": 0.23443807661533356,
"learning_rate": 3.5526367970539765e-08,
"loss": 0.0377,
"step": 14830
},
{
"epoch": 31.04602510460251,
"grad_norm": 0.24141825735569,
"learning_rate": 3.1493155761613826e-08,
"loss": 0.043,
"step": 14840
},
{
"epoch": 31.06694560669456,
"grad_norm": 0.1744871884584427,
"learning_rate": 2.7702809269058992e-08,
"loss": 0.0407,
"step": 14850
},
{
"epoch": 31.08786610878661,
"grad_norm": 0.18967264890670776,
"learning_rate": 2.4155346915394337e-08,
"loss": 0.0447,
"step": 14860
},
{
"epoch": 31.10878661087866,
"grad_norm": 0.19735538959503174,
"learning_rate": 2.085078594261103e-08,
"loss": 0.0426,
"step": 14870
},
{
"epoch": 31.12970711297071,
"grad_norm": 0.13576459884643555,
"learning_rate": 1.7789142412122372e-08,
"loss": 0.0398,
"step": 14880
},
{
"epoch": 31.15062761506276,
"grad_norm": 0.17847837507724762,
"learning_rate": 1.4970431204663905e-08,
"loss": 0.0403,
"step": 14890
},
{
"epoch": 31.171548117154813,
"grad_norm": 0.27608200907707214,
"learning_rate": 1.2394666020226764e-08,
"loss": 0.045,
"step": 14900
},
{
"epoch": 31.192468619246863,
"grad_norm": 0.246599480509758,
"learning_rate": 1.0061859378007743e-08,
"loss": 0.0404,
"step": 14910
},
{
"epoch": 31.213389121338913,
"grad_norm": 0.19810283184051514,
"learning_rate": 7.97202261630936e-09,
"loss": 0.0424,
"step": 14920
},
{
"epoch": 31.234309623430963,
"grad_norm": 0.14922457933425903,
"learning_rate": 6.125165892539863e-09,
"loss": 0.035,
"step": 14930
},
{
"epoch": 31.255230125523013,
"grad_norm": 0.1568155139684677,
"learning_rate": 4.5212981831022076e-09,
"loss": 0.0384,
"step": 14940
},
{
"epoch": 31.276150627615063,
"grad_norm": 0.20648132264614105,
"learning_rate": 3.1604272834051542e-09,
"loss": 0.0546,
"step": 14950
},
{
"epoch": 31.297071129707113,
"grad_norm": 0.14501909911632538,
"learning_rate": 2.04255980778556e-09,
"loss": 0.0369,
"step": 14960
},
{
"epoch": 31.317991631799163,
"grad_norm": 0.20130616426467896,
"learning_rate": 1.1677011895028234e-09,
"loss": 0.0464,
"step": 14970
},
{
"epoch": 31.338912133891213,
"grad_norm": 0.22078537940979004,
"learning_rate": 5.358556807000259e-10,
"loss": 0.0453,
"step": 14980
},
{
"epoch": 31.359832635983263,
"grad_norm": 0.20974136888980865,
"learning_rate": 1.4702635238728058e-10,
"loss": 0.0401,
"step": 14990
},
{
"epoch": 31.380753138075313,
"grad_norm": 0.31738701462745667,
"learning_rate": 1.2150944139754927e-12,
"loss": 0.0377,
"step": 15000
}
],
"logging_steps": 10,
"max_steps": 15000,
"num_input_tokens_seen": 0,
"num_train_epochs": 32,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}