task_bottle_0406_20k / trainer_state.json
Dongkkka's picture
Upload folder using huggingface_hub
c7849f1 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.25,
"eval_steps": 500,
"global_step": 20000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"grad_norm": 1.4432214498519897,
"learning_rate": 2.25e-07,
"loss": 1.2176,
"step": 10
},
{
"grad_norm": 1.5728261470794678,
"learning_rate": 4.75e-07,
"loss": 1.2201,
"step": 20
},
{
"grad_norm": 1.7649030685424805,
"learning_rate": 7.25e-07,
"loss": 1.2206,
"step": 30
},
{
"grad_norm": 1.3215194940567017,
"learning_rate": 9.75e-07,
"loss": 1.2173,
"step": 40
},
{
"grad_norm": 1.1600717306137085,
"learning_rate": 1.2250000000000001e-06,
"loss": 1.2025,
"step": 50
},
{
"grad_norm": 0.8358485698699951,
"learning_rate": 1.475e-06,
"loss": 1.1694,
"step": 60
},
{
"grad_norm": 0.7083495855331421,
"learning_rate": 1.7250000000000002e-06,
"loss": 1.1466,
"step": 70
},
{
"grad_norm": 0.4341243505477905,
"learning_rate": 1.975e-06,
"loss": 1.1362,
"step": 80
},
{
"grad_norm": 0.5044889450073242,
"learning_rate": 2.225e-06,
"loss": 1.1188,
"step": 90
},
{
"grad_norm": 0.47925513982772827,
"learning_rate": 2.4750000000000004e-06,
"loss": 1.1153,
"step": 100
},
{
"grad_norm": 0.38645315170288086,
"learning_rate": 2.725e-06,
"loss": 1.1092,
"step": 110
},
{
"grad_norm": 0.3141016662120819,
"learning_rate": 2.975e-06,
"loss": 1.1073,
"step": 120
},
{
"grad_norm": 0.24226568639278412,
"learning_rate": 3.225e-06,
"loss": 1.1137,
"step": 130
},
{
"grad_norm": 0.28608769178390503,
"learning_rate": 3.4750000000000006e-06,
"loss": 1.1113,
"step": 140
},
{
"grad_norm": 0.23423372209072113,
"learning_rate": 3.725e-06,
"loss": 1.109,
"step": 150
},
{
"grad_norm": 0.3078203499317169,
"learning_rate": 3.975e-06,
"loss": 1.0943,
"step": 160
},
{
"grad_norm": 0.28202998638153076,
"learning_rate": 4.225e-06,
"loss": 1.086,
"step": 170
},
{
"grad_norm": 0.2905002534389496,
"learning_rate": 4.475e-06,
"loss": 1.0802,
"step": 180
},
{
"grad_norm": 0.4020580053329468,
"learning_rate": 4.7250000000000005e-06,
"loss": 1.0717,
"step": 190
},
{
"grad_norm": 0.32478901743888855,
"learning_rate": 4.975000000000001e-06,
"loss": 1.057,
"step": 200
},
{
"grad_norm": 0.3583512306213379,
"learning_rate": 5.225e-06,
"loss": 1.0553,
"step": 210
},
{
"grad_norm": 0.45024362206459045,
"learning_rate": 5.475e-06,
"loss": 1.0609,
"step": 220
},
{
"grad_norm": 0.37869516015052795,
"learning_rate": 5.725e-06,
"loss": 1.0491,
"step": 230
},
{
"grad_norm": 0.41009441018104553,
"learning_rate": 5.975e-06,
"loss": 1.0478,
"step": 240
},
{
"grad_norm": 0.36011573672294617,
"learning_rate": 6.2250000000000005e-06,
"loss": 1.0444,
"step": 250
},
{
"grad_norm": 0.4219399094581604,
"learning_rate": 6.475000000000001e-06,
"loss": 1.0465,
"step": 260
},
{
"grad_norm": 0.38675457239151,
"learning_rate": 6.725000000000001e-06,
"loss": 1.0392,
"step": 270
},
{
"grad_norm": 0.548558235168457,
"learning_rate": 6.975000000000001e-06,
"loss": 1.0355,
"step": 280
},
{
"grad_norm": 0.6899825930595398,
"learning_rate": 7.2249999999999994e-06,
"loss": 1.0218,
"step": 290
},
{
"grad_norm": 1.5976816415786743,
"learning_rate": 7.4750000000000004e-06,
"loss": 1.0082,
"step": 300
},
{
"grad_norm": 1.2727433443069458,
"learning_rate": 7.725e-06,
"loss": 0.9888,
"step": 310
},
{
"grad_norm": 1.0883558988571167,
"learning_rate": 7.975e-06,
"loss": 0.9611,
"step": 320
},
{
"grad_norm": 1.0092294216156006,
"learning_rate": 8.225e-06,
"loss": 0.9325,
"step": 330
},
{
"grad_norm": 1.1305841207504272,
"learning_rate": 8.475000000000001e-06,
"loss": 0.9152,
"step": 340
},
{
"grad_norm": 0.8867524862289429,
"learning_rate": 8.725e-06,
"loss": 0.908,
"step": 350
},
{
"grad_norm": 1.259811520576477,
"learning_rate": 8.975e-06,
"loss": 0.8933,
"step": 360
},
{
"grad_norm": 1.06889009475708,
"learning_rate": 9.225e-06,
"loss": 0.8738,
"step": 370
},
{
"grad_norm": 1.2798677682876587,
"learning_rate": 9.475e-06,
"loss": 0.8585,
"step": 380
},
{
"grad_norm": 1.078529953956604,
"learning_rate": 9.725000000000001e-06,
"loss": 0.8468,
"step": 390
},
{
"grad_norm": 1.662562608718872,
"learning_rate": 9.975e-06,
"loss": 0.808,
"step": 400
},
{
"grad_norm": 1.4320045709609985,
"learning_rate": 1.0225e-05,
"loss": 0.7797,
"step": 410
},
{
"grad_norm": 1.7881171703338623,
"learning_rate": 1.0475e-05,
"loss": 0.7256,
"step": 420
},
{
"grad_norm": 1.8525526523590088,
"learning_rate": 1.0725e-05,
"loss": 0.7013,
"step": 430
},
{
"grad_norm": 2.307291269302368,
"learning_rate": 1.0975e-05,
"loss": 0.6795,
"step": 440
},
{
"grad_norm": 1.3419337272644043,
"learning_rate": 1.1225e-05,
"loss": 0.6455,
"step": 450
},
{
"grad_norm": 1.9353570938110352,
"learning_rate": 1.1475000000000001e-05,
"loss": 0.6027,
"step": 460
},
{
"grad_norm": 1.9074933528900146,
"learning_rate": 1.1725e-05,
"loss": 0.5679,
"step": 470
},
{
"grad_norm": 2.188081741333008,
"learning_rate": 1.1975e-05,
"loss": 0.5357,
"step": 480
},
{
"grad_norm": 2.6344354152679443,
"learning_rate": 1.2225e-05,
"loss": 0.4859,
"step": 490
},
{
"grad_norm": 2.0352654457092285,
"learning_rate": 1.2475e-05,
"loss": 0.4646,
"step": 500
},
{
"grad_norm": 2.3691606521606445,
"learning_rate": 1.2725000000000001e-05,
"loss": 0.4303,
"step": 510
},
{
"grad_norm": 1.9416195154190063,
"learning_rate": 1.2975e-05,
"loss": 0.4039,
"step": 520
},
{
"grad_norm": 2.360607862472534,
"learning_rate": 1.3225000000000001e-05,
"loss": 0.3815,
"step": 530
},
{
"grad_norm": 2.2228074073791504,
"learning_rate": 1.3475000000000002e-05,
"loss": 0.3611,
"step": 540
},
{
"grad_norm": 2.24345326423645,
"learning_rate": 1.3725000000000002e-05,
"loss": 0.326,
"step": 550
},
{
"grad_norm": 1.9834187030792236,
"learning_rate": 1.3975000000000003e-05,
"loss": 0.3165,
"step": 560
},
{
"grad_norm": 2.3031227588653564,
"learning_rate": 1.4225e-05,
"loss": 0.2947,
"step": 570
},
{
"grad_norm": 2.648381471633911,
"learning_rate": 1.4475e-05,
"loss": 0.2736,
"step": 580
},
{
"grad_norm": 2.803637981414795,
"learning_rate": 1.4725e-05,
"loss": 0.2526,
"step": 590
},
{
"grad_norm": 2.120967388153076,
"learning_rate": 1.4975e-05,
"loss": 0.2509,
"step": 600
},
{
"grad_norm": 2.4047417640686035,
"learning_rate": 1.5225e-05,
"loss": 0.2239,
"step": 610
},
{
"grad_norm": 1.69230055809021,
"learning_rate": 1.5475e-05,
"loss": 0.2176,
"step": 620
},
{
"grad_norm": 2.378695011138916,
"learning_rate": 1.5725e-05,
"loss": 0.2005,
"step": 630
},
{
"grad_norm": 1.6571719646453857,
"learning_rate": 1.5975000000000002e-05,
"loss": 0.1789,
"step": 640
},
{
"grad_norm": 2.107168197631836,
"learning_rate": 1.6225e-05,
"loss": 0.1756,
"step": 650
},
{
"grad_norm": 1.9598829746246338,
"learning_rate": 1.6475e-05,
"loss": 0.1759,
"step": 660
},
{
"grad_norm": 1.8880751132965088,
"learning_rate": 1.6725000000000003e-05,
"loss": 0.1627,
"step": 670
},
{
"grad_norm": 2.0237114429473877,
"learning_rate": 1.6975000000000003e-05,
"loss": 0.1573,
"step": 680
},
{
"grad_norm": 2.319857597351074,
"learning_rate": 1.7225e-05,
"loss": 0.1573,
"step": 690
},
{
"grad_norm": 2.4036319255828857,
"learning_rate": 1.7475e-05,
"loss": 0.1535,
"step": 700
},
{
"grad_norm": 2.049755334854126,
"learning_rate": 1.7725e-05,
"loss": 0.1671,
"step": 710
},
{
"grad_norm": 2.00295090675354,
"learning_rate": 1.7975e-05,
"loss": 0.1514,
"step": 720
},
{
"grad_norm": 2.2543041706085205,
"learning_rate": 1.8225e-05,
"loss": 0.1315,
"step": 730
},
{
"grad_norm": 2.125260591506958,
"learning_rate": 1.8475000000000002e-05,
"loss": 0.1482,
"step": 740
},
{
"grad_norm": 2.1975486278533936,
"learning_rate": 1.8725e-05,
"loss": 0.1441,
"step": 750
},
{
"grad_norm": 2.1511855125427246,
"learning_rate": 1.8975e-05,
"loss": 0.1447,
"step": 760
},
{
"grad_norm": 1.8105329275131226,
"learning_rate": 1.9225e-05,
"loss": 0.1388,
"step": 770
},
{
"grad_norm": 2.322596549987793,
"learning_rate": 1.9475000000000002e-05,
"loss": 0.1394,
"step": 780
},
{
"grad_norm": 1.4760518074035645,
"learning_rate": 1.9725000000000002e-05,
"loss": 0.1326,
"step": 790
},
{
"grad_norm": 1.71709144115448,
"learning_rate": 1.9975e-05,
"loss": 0.1343,
"step": 800
},
{
"grad_norm": 1.6595476865768433,
"learning_rate": 2.0225000000000004e-05,
"loss": 0.1289,
"step": 810
},
{
"grad_norm": 1.7283124923706055,
"learning_rate": 2.0475e-05,
"loss": 0.1229,
"step": 820
},
{
"grad_norm": 1.847590446472168,
"learning_rate": 2.0725e-05,
"loss": 0.1253,
"step": 830
},
{
"grad_norm": 1.5455881357192993,
"learning_rate": 2.0975e-05,
"loss": 0.1213,
"step": 840
},
{
"grad_norm": 1.900499701499939,
"learning_rate": 2.1225e-05,
"loss": 0.1255,
"step": 850
},
{
"grad_norm": 2.0430359840393066,
"learning_rate": 2.1475e-05,
"loss": 0.1285,
"step": 860
},
{
"grad_norm": 1.9488919973373413,
"learning_rate": 2.1725e-05,
"loss": 0.1202,
"step": 870
},
{
"grad_norm": 2.0237948894500732,
"learning_rate": 2.1975000000000002e-05,
"loss": 0.1242,
"step": 880
},
{
"grad_norm": 1.9511979818344116,
"learning_rate": 2.2225e-05,
"loss": 0.1149,
"step": 890
},
{
"grad_norm": 2.1467931270599365,
"learning_rate": 2.2475e-05,
"loss": 0.1124,
"step": 900
},
{
"grad_norm": 1.8478883504867554,
"learning_rate": 2.2725000000000003e-05,
"loss": 0.1128,
"step": 910
},
{
"grad_norm": 1.758474588394165,
"learning_rate": 2.2975000000000003e-05,
"loss": 0.1079,
"step": 920
},
{
"grad_norm": 1.6769682168960571,
"learning_rate": 2.3225000000000002e-05,
"loss": 0.1068,
"step": 930
},
{
"grad_norm": 1.5823380947113037,
"learning_rate": 2.3475e-05,
"loss": 0.1005,
"step": 940
},
{
"grad_norm": 1.388976812362671,
"learning_rate": 2.3725e-05,
"loss": 0.1069,
"step": 950
},
{
"grad_norm": 1.6260298490524292,
"learning_rate": 2.3975e-05,
"loss": 0.1084,
"step": 960
},
{
"grad_norm": 1.9050042629241943,
"learning_rate": 2.4225e-05,
"loss": 0.1075,
"step": 970
},
{
"grad_norm": 2.01832914352417,
"learning_rate": 2.4475000000000002e-05,
"loss": 0.1097,
"step": 980
},
{
"grad_norm": 1.7089976072311401,
"learning_rate": 2.4725e-05,
"loss": 0.106,
"step": 990
},
{
"grad_norm": 1.495911717414856,
"learning_rate": 2.4975e-05,
"loss": 0.1145,
"step": 1000
},
{
"grad_norm": 1.6462352275848389,
"learning_rate": 2.5225e-05,
"loss": 0.103,
"step": 1010
},
{
"grad_norm": 1.2390409708023071,
"learning_rate": 2.5475e-05,
"loss": 0.1083,
"step": 1020
},
{
"grad_norm": 1.2533819675445557,
"learning_rate": 2.5725e-05,
"loss": 0.1114,
"step": 1030
},
{
"grad_norm": 1.3296815156936646,
"learning_rate": 2.5974999999999998e-05,
"loss": 0.1002,
"step": 1040
},
{
"grad_norm": 1.5312912464141846,
"learning_rate": 2.6225e-05,
"loss": 0.0978,
"step": 1050
},
{
"grad_norm": 1.5894063711166382,
"learning_rate": 2.6475e-05,
"loss": 0.0967,
"step": 1060
},
{
"grad_norm": 1.6358612775802612,
"learning_rate": 2.6725e-05,
"loss": 0.0999,
"step": 1070
},
{
"grad_norm": 1.8921128511428833,
"learning_rate": 2.6975000000000002e-05,
"loss": 0.0922,
"step": 1080
},
{
"grad_norm": 1.783052682876587,
"learning_rate": 2.7225e-05,
"loss": 0.0923,
"step": 1090
},
{
"grad_norm": 1.5674539804458618,
"learning_rate": 2.7475e-05,
"loss": 0.1028,
"step": 1100
},
{
"grad_norm": 1.3623286485671997,
"learning_rate": 2.7725e-05,
"loss": 0.0875,
"step": 1110
},
{
"grad_norm": 1.5205024480819702,
"learning_rate": 2.7975000000000002e-05,
"loss": 0.0987,
"step": 1120
},
{
"grad_norm": 1.4222640991210938,
"learning_rate": 2.8225e-05,
"loss": 0.0977,
"step": 1130
},
{
"grad_norm": 1.4713680744171143,
"learning_rate": 2.8475e-05,
"loss": 0.0894,
"step": 1140
},
{
"grad_norm": 1.4934260845184326,
"learning_rate": 2.8725e-05,
"loss": 0.1008,
"step": 1150
},
{
"grad_norm": 1.3262085914611816,
"learning_rate": 2.8975000000000003e-05,
"loss": 0.0888,
"step": 1160
},
{
"grad_norm": 1.3066847324371338,
"learning_rate": 2.9225000000000002e-05,
"loss": 0.0931,
"step": 1170
},
{
"grad_norm": 1.570601224899292,
"learning_rate": 2.9475e-05,
"loss": 0.0938,
"step": 1180
},
{
"grad_norm": 1.1952320337295532,
"learning_rate": 2.9725000000000004e-05,
"loss": 0.0886,
"step": 1190
},
{
"grad_norm": 1.5063790082931519,
"learning_rate": 2.9975000000000004e-05,
"loss": 0.0947,
"step": 1200
},
{
"grad_norm": 1.5905829668045044,
"learning_rate": 3.0225000000000003e-05,
"loss": 0.0893,
"step": 1210
},
{
"grad_norm": 1.1922318935394287,
"learning_rate": 3.0475000000000002e-05,
"loss": 0.0784,
"step": 1220
},
{
"grad_norm": 0.9978857636451721,
"learning_rate": 3.0725e-05,
"loss": 0.0813,
"step": 1230
},
{
"grad_norm": 1.6586356163024902,
"learning_rate": 3.0975e-05,
"loss": 0.0908,
"step": 1240
},
{
"grad_norm": 1.605808973312378,
"learning_rate": 3.122500000000001e-05,
"loss": 0.0899,
"step": 1250
},
{
"grad_norm": 1.4405441284179688,
"learning_rate": 3.1475e-05,
"loss": 0.0785,
"step": 1260
},
{
"grad_norm": 1.1686655282974243,
"learning_rate": 3.1725e-05,
"loss": 0.0931,
"step": 1270
},
{
"grad_norm": 1.3049031496047974,
"learning_rate": 3.1975e-05,
"loss": 0.0878,
"step": 1280
},
{
"grad_norm": 1.106573462486267,
"learning_rate": 3.2225e-05,
"loss": 0.0873,
"step": 1290
},
{
"grad_norm": 1.315805435180664,
"learning_rate": 3.2474999999999997e-05,
"loss": 0.084,
"step": 1300
},
{
"grad_norm": 1.4285238981246948,
"learning_rate": 3.2725e-05,
"loss": 0.0799,
"step": 1310
},
{
"grad_norm": 1.3433620929718018,
"learning_rate": 3.2975e-05,
"loss": 0.0929,
"step": 1320
},
{
"grad_norm": 1.210469126701355,
"learning_rate": 3.3225e-05,
"loss": 0.0787,
"step": 1330
},
{
"grad_norm": 1.4694474935531616,
"learning_rate": 3.3475e-05,
"loss": 0.0779,
"step": 1340
},
{
"grad_norm": 1.232349157333374,
"learning_rate": 3.3725e-05,
"loss": 0.0889,
"step": 1350
},
{
"grad_norm": 1.6629468202590942,
"learning_rate": 3.3975e-05,
"loss": 0.0903,
"step": 1360
},
{
"grad_norm": 1.3241008520126343,
"learning_rate": 3.4225e-05,
"loss": 0.0797,
"step": 1370
},
{
"grad_norm": 1.1734715700149536,
"learning_rate": 3.4475000000000005e-05,
"loss": 0.0774,
"step": 1380
},
{
"grad_norm": 1.503920555114746,
"learning_rate": 3.4725000000000004e-05,
"loss": 0.0748,
"step": 1390
},
{
"grad_norm": 0.890848696231842,
"learning_rate": 3.4975e-05,
"loss": 0.075,
"step": 1400
},
{
"grad_norm": 1.2756930589675903,
"learning_rate": 3.5225e-05,
"loss": 0.0761,
"step": 1410
},
{
"grad_norm": 1.3272982835769653,
"learning_rate": 3.5475e-05,
"loss": 0.0757,
"step": 1420
},
{
"grad_norm": 1.2470251321792603,
"learning_rate": 3.5725e-05,
"loss": 0.0898,
"step": 1430
},
{
"grad_norm": 1.2361502647399902,
"learning_rate": 3.5975e-05,
"loss": 0.0766,
"step": 1440
},
{
"grad_norm": 1.2339922189712524,
"learning_rate": 3.6225000000000006e-05,
"loss": 0.0737,
"step": 1450
},
{
"grad_norm": 1.0657695531845093,
"learning_rate": 3.6475000000000006e-05,
"loss": 0.0836,
"step": 1460
},
{
"grad_norm": 1.262316346168518,
"learning_rate": 3.6725000000000005e-05,
"loss": 0.0724,
"step": 1470
},
{
"grad_norm": 1.256439447402954,
"learning_rate": 3.6975000000000004e-05,
"loss": 0.071,
"step": 1480
},
{
"grad_norm": 1.0629289150238037,
"learning_rate": 3.7225000000000004e-05,
"loss": 0.0801,
"step": 1490
},
{
"grad_norm": 0.9662951827049255,
"learning_rate": 3.7475e-05,
"loss": 0.0748,
"step": 1500
},
{
"grad_norm": 1.4304341077804565,
"learning_rate": 3.7725e-05,
"loss": 0.0784,
"step": 1510
},
{
"grad_norm": 1.2773442268371582,
"learning_rate": 3.7975e-05,
"loss": 0.0756,
"step": 1520
},
{
"grad_norm": 1.1348503828048706,
"learning_rate": 3.8225e-05,
"loss": 0.0736,
"step": 1530
},
{
"grad_norm": 1.0709927082061768,
"learning_rate": 3.8475e-05,
"loss": 0.0754,
"step": 1540
},
{
"grad_norm": 1.0270332098007202,
"learning_rate": 3.8725e-05,
"loss": 0.0751,
"step": 1550
},
{
"grad_norm": 1.0089706182479858,
"learning_rate": 3.8975e-05,
"loss": 0.0731,
"step": 1560
},
{
"grad_norm": 1.1657140254974365,
"learning_rate": 3.9225e-05,
"loss": 0.0794,
"step": 1570
},
{
"grad_norm": 1.2647517919540405,
"learning_rate": 3.9475000000000004e-05,
"loss": 0.0692,
"step": 1580
},
{
"grad_norm": 0.9981470704078674,
"learning_rate": 3.9725e-05,
"loss": 0.0725,
"step": 1590
},
{
"grad_norm": 0.9286271929740906,
"learning_rate": 3.9975e-05,
"loss": 0.0753,
"step": 1600
},
{
"grad_norm": 1.0005375146865845,
"learning_rate": 4.0225e-05,
"loss": 0.0707,
"step": 1610
},
{
"grad_norm": 1.1060173511505127,
"learning_rate": 4.0475e-05,
"loss": 0.0685,
"step": 1620
},
{
"grad_norm": 1.0261473655700684,
"learning_rate": 4.0725e-05,
"loss": 0.072,
"step": 1630
},
{
"grad_norm": 1.0329949855804443,
"learning_rate": 4.0975e-05,
"loss": 0.0743,
"step": 1640
},
{
"grad_norm": 0.9242092370986938,
"learning_rate": 4.1225e-05,
"loss": 0.0664,
"step": 1650
},
{
"grad_norm": 0.963623583316803,
"learning_rate": 4.1475000000000005e-05,
"loss": 0.0653,
"step": 1660
},
{
"grad_norm": 1.1713886260986328,
"learning_rate": 4.1725000000000005e-05,
"loss": 0.0734,
"step": 1670
},
{
"grad_norm": 1.1296294927597046,
"learning_rate": 4.1975000000000004e-05,
"loss": 0.0698,
"step": 1680
},
{
"grad_norm": 1.015258550643921,
"learning_rate": 4.2225e-05,
"loss": 0.065,
"step": 1690
},
{
"grad_norm": 0.919792115688324,
"learning_rate": 4.2475e-05,
"loss": 0.0759,
"step": 1700
},
{
"grad_norm": 0.8036102652549744,
"learning_rate": 4.2725e-05,
"loss": 0.0666,
"step": 1710
},
{
"grad_norm": 1.2176201343536377,
"learning_rate": 4.2975e-05,
"loss": 0.0713,
"step": 1720
},
{
"grad_norm": 0.8653284907341003,
"learning_rate": 4.322500000000001e-05,
"loss": 0.0675,
"step": 1730
},
{
"grad_norm": 1.0311731100082397,
"learning_rate": 4.3475000000000006e-05,
"loss": 0.0651,
"step": 1740
},
{
"grad_norm": 0.9236791729927063,
"learning_rate": 4.3725000000000006e-05,
"loss": 0.0619,
"step": 1750
},
{
"grad_norm": 0.8307299613952637,
"learning_rate": 4.3975e-05,
"loss": 0.0627,
"step": 1760
},
{
"grad_norm": 0.8774833083152771,
"learning_rate": 4.4225e-05,
"loss": 0.0761,
"step": 1770
},
{
"grad_norm": 0.7616822123527527,
"learning_rate": 4.4475e-05,
"loss": 0.0668,
"step": 1780
},
{
"grad_norm": 0.9956639409065247,
"learning_rate": 4.4725e-05,
"loss": 0.0644,
"step": 1790
},
{
"grad_norm": 0.8970800042152405,
"learning_rate": 4.4975e-05,
"loss": 0.0704,
"step": 1800
},
{
"grad_norm": 0.9267357587814331,
"learning_rate": 4.5225e-05,
"loss": 0.0656,
"step": 1810
},
{
"grad_norm": 1.125333309173584,
"learning_rate": 4.5475e-05,
"loss": 0.0678,
"step": 1820
},
{
"grad_norm": 0.8214185237884521,
"learning_rate": 4.5725e-05,
"loss": 0.0642,
"step": 1830
},
{
"grad_norm": 0.8470892906188965,
"learning_rate": 4.5975e-05,
"loss": 0.0596,
"step": 1840
},
{
"grad_norm": 0.9773886203765869,
"learning_rate": 4.6225e-05,
"loss": 0.0619,
"step": 1850
},
{
"grad_norm": 0.9647141695022583,
"learning_rate": 4.6475000000000005e-05,
"loss": 0.0638,
"step": 1860
},
{
"grad_norm": 0.7629884481430054,
"learning_rate": 4.6725000000000004e-05,
"loss": 0.0637,
"step": 1870
},
{
"grad_norm": 1.3230962753295898,
"learning_rate": 4.6975000000000003e-05,
"loss": 0.0603,
"step": 1880
},
{
"grad_norm": 0.6659095287322998,
"learning_rate": 4.7225e-05,
"loss": 0.0587,
"step": 1890
},
{
"grad_norm": 0.8982052803039551,
"learning_rate": 4.7475e-05,
"loss": 0.0638,
"step": 1900
},
{
"grad_norm": 1.0291091203689575,
"learning_rate": 4.7725e-05,
"loss": 0.0642,
"step": 1910
},
{
"grad_norm": 1.109850287437439,
"learning_rate": 4.7975e-05,
"loss": 0.0581,
"step": 1920
},
{
"grad_norm": 1.130811333656311,
"learning_rate": 4.822500000000001e-05,
"loss": 0.0597,
"step": 1930
},
{
"grad_norm": 0.752946138381958,
"learning_rate": 4.8475000000000006e-05,
"loss": 0.0733,
"step": 1940
},
{
"grad_norm": 0.8653437495231628,
"learning_rate": 4.8725000000000005e-05,
"loss": 0.0598,
"step": 1950
},
{
"grad_norm": 0.6604033708572388,
"learning_rate": 4.8975000000000005e-05,
"loss": 0.0582,
"step": 1960
},
{
"grad_norm": 0.7046467661857605,
"learning_rate": 4.9225000000000004e-05,
"loss": 0.0638,
"step": 1970
},
{
"grad_norm": 1.0231807231903076,
"learning_rate": 4.9475e-05,
"loss": 0.0688,
"step": 1980
},
{
"grad_norm": 0.8604083061218262,
"learning_rate": 4.9725e-05,
"loss": 0.0593,
"step": 1990
},
{
"grad_norm": 0.9460083246231079,
"learning_rate": 4.9975e-05,
"loss": 0.0627,
"step": 2000
},
{
"grad_norm": 0.9021515846252441,
"learning_rate": 5.0225e-05,
"loss": 0.0631,
"step": 2010
},
{
"grad_norm": 0.7849692106246948,
"learning_rate": 5.047500000000001e-05,
"loss": 0.0643,
"step": 2020
},
{
"grad_norm": 0.7976584434509277,
"learning_rate": 5.0725e-05,
"loss": 0.0562,
"step": 2030
},
{
"grad_norm": 0.9405306577682495,
"learning_rate": 5.0975000000000006e-05,
"loss": 0.0638,
"step": 2040
},
{
"grad_norm": 0.7782784700393677,
"learning_rate": 5.1225e-05,
"loss": 0.0645,
"step": 2050
},
{
"grad_norm": 0.9583297371864319,
"learning_rate": 5.1475000000000004e-05,
"loss": 0.0614,
"step": 2060
},
{
"grad_norm": 0.8113962411880493,
"learning_rate": 5.1725000000000004e-05,
"loss": 0.0611,
"step": 2070
},
{
"grad_norm": 1.0610644817352295,
"learning_rate": 5.197500000000001e-05,
"loss": 0.0622,
"step": 2080
},
{
"grad_norm": 0.7754601836204529,
"learning_rate": 5.2225e-05,
"loss": 0.0608,
"step": 2090
},
{
"grad_norm": 0.8049399256706238,
"learning_rate": 5.247500000000001e-05,
"loss": 0.0573,
"step": 2100
},
{
"grad_norm": 0.882763683795929,
"learning_rate": 5.2725e-05,
"loss": 0.0587,
"step": 2110
},
{
"grad_norm": 0.784490704536438,
"learning_rate": 5.297500000000001e-05,
"loss": 0.0545,
"step": 2120
},
{
"grad_norm": 0.8992010951042175,
"learning_rate": 5.3225e-05,
"loss": 0.0611,
"step": 2130
},
{
"grad_norm": 0.735640823841095,
"learning_rate": 5.3475e-05,
"loss": 0.0606,
"step": 2140
},
{
"grad_norm": 0.6198451519012451,
"learning_rate": 5.3725000000000005e-05,
"loss": 0.0598,
"step": 2150
},
{
"grad_norm": 0.7771381735801697,
"learning_rate": 5.3975e-05,
"loss": 0.05,
"step": 2160
},
{
"grad_norm": 0.738305389881134,
"learning_rate": 5.4225000000000003e-05,
"loss": 0.0574,
"step": 2170
},
{
"grad_norm": 0.794254720211029,
"learning_rate": 5.4474999999999996e-05,
"loss": 0.0596,
"step": 2180
},
{
"grad_norm": 0.732258677482605,
"learning_rate": 5.4725e-05,
"loss": 0.0573,
"step": 2190
},
{
"grad_norm": 0.918854832649231,
"learning_rate": 5.4975e-05,
"loss": 0.0533,
"step": 2200
},
{
"grad_norm": 0.7550817131996155,
"learning_rate": 5.522500000000001e-05,
"loss": 0.0548,
"step": 2210
},
{
"grad_norm": 0.6007593274116516,
"learning_rate": 5.5475e-05,
"loss": 0.0551,
"step": 2220
},
{
"grad_norm": 0.8961713910102844,
"learning_rate": 5.5725000000000006e-05,
"loss": 0.0547,
"step": 2230
},
{
"grad_norm": 0.8532932996749878,
"learning_rate": 5.5975e-05,
"loss": 0.059,
"step": 2240
},
{
"grad_norm": 0.736585259437561,
"learning_rate": 5.6225000000000005e-05,
"loss": 0.0621,
"step": 2250
},
{
"grad_norm": 0.9078760743141174,
"learning_rate": 5.6475e-05,
"loss": 0.0651,
"step": 2260
},
{
"grad_norm": 0.7268538475036621,
"learning_rate": 5.6725e-05,
"loss": 0.0632,
"step": 2270
},
{
"grad_norm": 0.7124704718589783,
"learning_rate": 5.6975e-05,
"loss": 0.0542,
"step": 2280
},
{
"grad_norm": 0.6639304757118225,
"learning_rate": 5.722500000000001e-05,
"loss": 0.0498,
"step": 2290
},
{
"grad_norm": 0.47446736693382263,
"learning_rate": 5.7475e-05,
"loss": 0.0536,
"step": 2300
},
{
"grad_norm": 0.6784394979476929,
"learning_rate": 5.772500000000001e-05,
"loss": 0.0565,
"step": 2310
},
{
"grad_norm": 0.826998770236969,
"learning_rate": 5.7975e-05,
"loss": 0.0549,
"step": 2320
},
{
"grad_norm": 0.6909579634666443,
"learning_rate": 5.8225000000000006e-05,
"loss": 0.0591,
"step": 2330
},
{
"grad_norm": 0.7020758986473083,
"learning_rate": 5.8475000000000005e-05,
"loss": 0.0558,
"step": 2340
},
{
"grad_norm": 0.9214292764663696,
"learning_rate": 5.8725000000000004e-05,
"loss": 0.0532,
"step": 2350
},
{
"grad_norm": 0.6545830965042114,
"learning_rate": 5.8975000000000004e-05,
"loss": 0.0611,
"step": 2360
},
{
"grad_norm": 0.5123686194419861,
"learning_rate": 5.922500000000001e-05,
"loss": 0.0569,
"step": 2370
},
{
"grad_norm": 0.9388223886489868,
"learning_rate": 5.9475e-05,
"loss": 0.0577,
"step": 2380
},
{
"grad_norm": 0.6229625940322876,
"learning_rate": 5.9724999999999995e-05,
"loss": 0.056,
"step": 2390
},
{
"grad_norm": 0.5820695757865906,
"learning_rate": 5.9975e-05,
"loss": 0.0562,
"step": 2400
},
{
"grad_norm": 0.7807344794273376,
"learning_rate": 6.0225e-05,
"loss": 0.0517,
"step": 2410
},
{
"grad_norm": 0.6235376596450806,
"learning_rate": 6.0475000000000006e-05,
"loss": 0.053,
"step": 2420
},
{
"grad_norm": 0.6682143807411194,
"learning_rate": 6.0725e-05,
"loss": 0.0538,
"step": 2430
},
{
"grad_norm": 0.625502347946167,
"learning_rate": 6.0975000000000005e-05,
"loss": 0.0503,
"step": 2440
},
{
"grad_norm": 0.7932114005088806,
"learning_rate": 6.1225e-05,
"loss": 0.0534,
"step": 2450
},
{
"grad_norm": 0.746182918548584,
"learning_rate": 6.1475e-05,
"loss": 0.0605,
"step": 2460
},
{
"grad_norm": 0.4769274890422821,
"learning_rate": 6.1725e-05,
"loss": 0.0489,
"step": 2470
},
{
"grad_norm": 0.7346721291542053,
"learning_rate": 6.1975e-05,
"loss": 0.0539,
"step": 2480
},
{
"grad_norm": 0.7812864780426025,
"learning_rate": 6.2225e-05,
"loss": 0.0498,
"step": 2490
},
{
"grad_norm": 0.7502540349960327,
"learning_rate": 6.2475e-05,
"loss": 0.0554,
"step": 2500
},
{
"grad_norm": 0.7815192937850952,
"learning_rate": 6.2725e-05,
"loss": 0.0573,
"step": 2510
},
{
"grad_norm": 0.647578775882721,
"learning_rate": 6.297500000000001e-05,
"loss": 0.0522,
"step": 2520
},
{
"grad_norm": 0.7370708584785461,
"learning_rate": 6.3225e-05,
"loss": 0.0492,
"step": 2530
},
{
"grad_norm": 0.5644780993461609,
"learning_rate": 6.347500000000001e-05,
"loss": 0.0487,
"step": 2540
},
{
"grad_norm": 0.7326799035072327,
"learning_rate": 6.3725e-05,
"loss": 0.0561,
"step": 2550
},
{
"grad_norm": 0.6633973717689514,
"learning_rate": 6.397500000000001e-05,
"loss": 0.0474,
"step": 2560
},
{
"grad_norm": 0.7116085290908813,
"learning_rate": 6.4225e-05,
"loss": 0.0482,
"step": 2570
},
{
"grad_norm": 0.6394766569137573,
"learning_rate": 6.447500000000001e-05,
"loss": 0.0578,
"step": 2580
},
{
"grad_norm": 0.7912008166313171,
"learning_rate": 6.4725e-05,
"loss": 0.0505,
"step": 2590
},
{
"grad_norm": 0.744045078754425,
"learning_rate": 6.497500000000001e-05,
"loss": 0.0521,
"step": 2600
},
{
"grad_norm": 0.8164146542549133,
"learning_rate": 6.5225e-05,
"loss": 0.0501,
"step": 2610
},
{
"grad_norm": 0.5666782259941101,
"learning_rate": 6.5475e-05,
"loss": 0.0499,
"step": 2620
},
{
"grad_norm": 0.8259328007698059,
"learning_rate": 6.5725e-05,
"loss": 0.0541,
"step": 2630
},
{
"grad_norm": 0.5527114868164062,
"learning_rate": 6.5975e-05,
"loss": 0.051,
"step": 2640
},
{
"grad_norm": 0.58927983045578,
"learning_rate": 6.6225e-05,
"loss": 0.0513,
"step": 2650
},
{
"grad_norm": 0.4701420068740845,
"learning_rate": 6.6475e-05,
"loss": 0.0476,
"step": 2660
},
{
"grad_norm": 0.4634196162223816,
"learning_rate": 6.672500000000001e-05,
"loss": 0.053,
"step": 2670
},
{
"grad_norm": 0.7381142973899841,
"learning_rate": 6.6975e-05,
"loss": 0.0475,
"step": 2680
},
{
"grad_norm": 0.635261595249176,
"learning_rate": 6.722500000000001e-05,
"loss": 0.0566,
"step": 2690
},
{
"grad_norm": 0.5740917921066284,
"learning_rate": 6.7475e-05,
"loss": 0.0509,
"step": 2700
},
{
"grad_norm": 0.591770350933075,
"learning_rate": 6.7725e-05,
"loss": 0.0562,
"step": 2710
},
{
"grad_norm": 0.771838366985321,
"learning_rate": 6.7975e-05,
"loss": 0.0502,
"step": 2720
},
{
"grad_norm": 0.7171633243560791,
"learning_rate": 6.8225e-05,
"loss": 0.0548,
"step": 2730
},
{
"grad_norm": 0.6417476534843445,
"learning_rate": 6.8475e-05,
"loss": 0.0557,
"step": 2740
},
{
"grad_norm": 0.7948347330093384,
"learning_rate": 6.8725e-05,
"loss": 0.0518,
"step": 2750
},
{
"grad_norm": 0.6874246001243591,
"learning_rate": 6.8975e-05,
"loss": 0.0529,
"step": 2760
},
{
"grad_norm": 0.5800514817237854,
"learning_rate": 6.9225e-05,
"loss": 0.0457,
"step": 2770
},
{
"grad_norm": 0.5260016322135925,
"learning_rate": 6.9475e-05,
"loss": 0.0521,
"step": 2780
},
{
"grad_norm": 0.5826354026794434,
"learning_rate": 6.9725e-05,
"loss": 0.0512,
"step": 2790
},
{
"grad_norm": 0.570827305316925,
"learning_rate": 6.997500000000001e-05,
"loss": 0.0488,
"step": 2800
},
{
"grad_norm": 0.6201433539390564,
"learning_rate": 7.022500000000001e-05,
"loss": 0.045,
"step": 2810
},
{
"grad_norm": 0.6378107070922852,
"learning_rate": 7.0475e-05,
"loss": 0.0432,
"step": 2820
},
{
"grad_norm": 0.7345712780952454,
"learning_rate": 7.072500000000001e-05,
"loss": 0.0533,
"step": 2830
},
{
"grad_norm": 0.5760173797607422,
"learning_rate": 7.0975e-05,
"loss": 0.0522,
"step": 2840
},
{
"grad_norm": 0.5737773180007935,
"learning_rate": 7.122500000000001e-05,
"loss": 0.0467,
"step": 2850
},
{
"grad_norm": 0.5438277125358582,
"learning_rate": 7.1475e-05,
"loss": 0.0486,
"step": 2860
},
{
"grad_norm": 0.6070303916931152,
"learning_rate": 7.172500000000001e-05,
"loss": 0.05,
"step": 2870
},
{
"grad_norm": 0.5815456509590149,
"learning_rate": 7.1975e-05,
"loss": 0.0462,
"step": 2880
},
{
"grad_norm": 0.6135198473930359,
"learning_rate": 7.2225e-05,
"loss": 0.0446,
"step": 2890
},
{
"grad_norm": 0.7103997468948364,
"learning_rate": 7.2475e-05,
"loss": 0.0457,
"step": 2900
},
{
"grad_norm": 0.4404158592224121,
"learning_rate": 7.272499999999999e-05,
"loss": 0.0449,
"step": 2910
},
{
"grad_norm": 0.5680925250053406,
"learning_rate": 7.2975e-05,
"loss": 0.0436,
"step": 2920
},
{
"grad_norm": 0.6294459104537964,
"learning_rate": 7.3225e-05,
"loss": 0.0523,
"step": 2930
},
{
"grad_norm": 0.6799387335777283,
"learning_rate": 7.347500000000001e-05,
"loss": 0.0542,
"step": 2940
},
{
"grad_norm": 0.5161760449409485,
"learning_rate": 7.3725e-05,
"loss": 0.0523,
"step": 2950
},
{
"grad_norm": 0.5557070970535278,
"learning_rate": 7.397500000000001e-05,
"loss": 0.0558,
"step": 2960
},
{
"grad_norm": 0.5801904201507568,
"learning_rate": 7.4225e-05,
"loss": 0.0482,
"step": 2970
},
{
"grad_norm": 0.5068460702896118,
"learning_rate": 7.447500000000001e-05,
"loss": 0.048,
"step": 2980
},
{
"grad_norm": 0.49014368653297424,
"learning_rate": 7.4725e-05,
"loss": 0.0463,
"step": 2990
},
{
"grad_norm": 0.47462910413742065,
"learning_rate": 7.4975e-05,
"loss": 0.0488,
"step": 3000
},
{
"grad_norm": 0.6453213691711426,
"learning_rate": 7.5225e-05,
"loss": 0.0541,
"step": 3010
},
{
"grad_norm": 0.6153588891029358,
"learning_rate": 7.5475e-05,
"loss": 0.0469,
"step": 3020
},
{
"grad_norm": 0.4591832458972931,
"learning_rate": 7.5725e-05,
"loss": 0.0445,
"step": 3030
},
{
"grad_norm": 0.6439560651779175,
"learning_rate": 7.5975e-05,
"loss": 0.0425,
"step": 3040
},
{
"grad_norm": 0.5622746348381042,
"learning_rate": 7.6225e-05,
"loss": 0.0477,
"step": 3050
},
{
"grad_norm": 0.47292307019233704,
"learning_rate": 7.6475e-05,
"loss": 0.0452,
"step": 3060
},
{
"grad_norm": 0.6778805255889893,
"learning_rate": 7.672500000000001e-05,
"loss": 0.0477,
"step": 3070
},
{
"grad_norm": 0.4940475821495056,
"learning_rate": 7.697500000000001e-05,
"loss": 0.0445,
"step": 3080
},
{
"grad_norm": 0.5741375088691711,
"learning_rate": 7.722500000000001e-05,
"loss": 0.0457,
"step": 3090
},
{
"grad_norm": 0.6149645447731018,
"learning_rate": 7.747500000000001e-05,
"loss": 0.0495,
"step": 3100
},
{
"grad_norm": 0.5375564694404602,
"learning_rate": 7.7725e-05,
"loss": 0.054,
"step": 3110
},
{
"grad_norm": 0.5478602051734924,
"learning_rate": 7.797500000000001e-05,
"loss": 0.0423,
"step": 3120
},
{
"grad_norm": 0.4536065459251404,
"learning_rate": 7.8225e-05,
"loss": 0.0524,
"step": 3130
},
{
"grad_norm": 0.5834420323371887,
"learning_rate": 7.8475e-05,
"loss": 0.0447,
"step": 3140
},
{
"grad_norm": 0.5539586544036865,
"learning_rate": 7.8725e-05,
"loss": 0.0481,
"step": 3150
},
{
"grad_norm": 0.6485045552253723,
"learning_rate": 7.8975e-05,
"loss": 0.0424,
"step": 3160
},
{
"grad_norm": 0.4400906264781952,
"learning_rate": 7.9225e-05,
"loss": 0.046,
"step": 3170
},
{
"grad_norm": 0.42406129837036133,
"learning_rate": 7.9475e-05,
"loss": 0.0487,
"step": 3180
},
{
"grad_norm": 0.43238261342048645,
"learning_rate": 7.9725e-05,
"loss": 0.0491,
"step": 3190
},
{
"grad_norm": 0.5488142967224121,
"learning_rate": 7.9975e-05,
"loss": 0.042,
"step": 3200
},
{
"grad_norm": 0.43411511182785034,
"learning_rate": 8.022500000000001e-05,
"loss": 0.043,
"step": 3210
},
{
"grad_norm": 0.5469537377357483,
"learning_rate": 8.0475e-05,
"loss": 0.0455,
"step": 3220
},
{
"grad_norm": 0.5347734689712524,
"learning_rate": 8.072500000000001e-05,
"loss": 0.0519,
"step": 3230
},
{
"grad_norm": 0.7352176308631897,
"learning_rate": 8.0975e-05,
"loss": 0.0482,
"step": 3240
},
{
"grad_norm": 0.45272985100746155,
"learning_rate": 8.122500000000001e-05,
"loss": 0.0487,
"step": 3250
},
{
"grad_norm": 0.5572296977043152,
"learning_rate": 8.1475e-05,
"loss": 0.0485,
"step": 3260
},
{
"grad_norm": 0.7316561937332153,
"learning_rate": 8.172500000000001e-05,
"loss": 0.0475,
"step": 3270
},
{
"grad_norm": 0.6347060203552246,
"learning_rate": 8.1975e-05,
"loss": 0.0438,
"step": 3280
},
{
"grad_norm": 0.6271874904632568,
"learning_rate": 8.2225e-05,
"loss": 0.0489,
"step": 3290
},
{
"grad_norm": 0.3748721778392792,
"learning_rate": 8.2475e-05,
"loss": 0.0476,
"step": 3300
},
{
"grad_norm": 0.4579646587371826,
"learning_rate": 8.2725e-05,
"loss": 0.0442,
"step": 3310
},
{
"grad_norm": 0.46649906039237976,
"learning_rate": 8.2975e-05,
"loss": 0.0495,
"step": 3320
},
{
"grad_norm": 0.5870574712753296,
"learning_rate": 8.3225e-05,
"loss": 0.0452,
"step": 3330
},
{
"grad_norm": 0.42938050627708435,
"learning_rate": 8.347500000000001e-05,
"loss": 0.0445,
"step": 3340
},
{
"grad_norm": 0.4771455228328705,
"learning_rate": 8.3725e-05,
"loss": 0.0452,
"step": 3350
},
{
"grad_norm": 0.46662503480911255,
"learning_rate": 8.397500000000001e-05,
"loss": 0.0445,
"step": 3360
},
{
"grad_norm": 0.43966543674468994,
"learning_rate": 8.422500000000001e-05,
"loss": 0.0427,
"step": 3370
},
{
"grad_norm": 0.5394494533538818,
"learning_rate": 8.447500000000001e-05,
"loss": 0.0453,
"step": 3380
},
{
"grad_norm": 0.4468139708042145,
"learning_rate": 8.4725e-05,
"loss": 0.0479,
"step": 3390
},
{
"grad_norm": 0.5478911399841309,
"learning_rate": 8.4975e-05,
"loss": 0.048,
"step": 3400
},
{
"grad_norm": 0.4762585759162903,
"learning_rate": 8.5225e-05,
"loss": 0.05,
"step": 3410
},
{
"grad_norm": 0.5173057913780212,
"learning_rate": 8.5475e-05,
"loss": 0.0442,
"step": 3420
},
{
"grad_norm": 0.5589162707328796,
"learning_rate": 8.5725e-05,
"loss": 0.0436,
"step": 3430
},
{
"grad_norm": 0.5997108817100525,
"learning_rate": 8.5975e-05,
"loss": 0.0467,
"step": 3440
},
{
"grad_norm": 0.4055095911026001,
"learning_rate": 8.6225e-05,
"loss": 0.0401,
"step": 3450
},
{
"grad_norm": 0.593838632106781,
"learning_rate": 8.6475e-05,
"loss": 0.0428,
"step": 3460
},
{
"grad_norm": 0.6099951863288879,
"learning_rate": 8.672500000000001e-05,
"loss": 0.0488,
"step": 3470
},
{
"grad_norm": 0.4896881580352783,
"learning_rate": 8.6975e-05,
"loss": 0.0454,
"step": 3480
},
{
"grad_norm": 0.4892227053642273,
"learning_rate": 8.7225e-05,
"loss": 0.043,
"step": 3490
},
{
"grad_norm": 0.4351995885372162,
"learning_rate": 8.747500000000001e-05,
"loss": 0.0423,
"step": 3500
},
{
"grad_norm": 0.6612355709075928,
"learning_rate": 8.7725e-05,
"loss": 0.0494,
"step": 3510
},
{
"grad_norm": 0.490257203578949,
"learning_rate": 8.797500000000001e-05,
"loss": 0.0442,
"step": 3520
},
{
"grad_norm": 0.4129466414451599,
"learning_rate": 8.8225e-05,
"loss": 0.0451,
"step": 3530
},
{
"grad_norm": 0.5617825984954834,
"learning_rate": 8.847500000000001e-05,
"loss": 0.0438,
"step": 3540
},
{
"grad_norm": 0.6246066093444824,
"learning_rate": 8.8725e-05,
"loss": 0.0445,
"step": 3550
},
{
"grad_norm": 0.40516310930252075,
"learning_rate": 8.897500000000001e-05,
"loss": 0.0458,
"step": 3560
},
{
"grad_norm": 0.5519590973854065,
"learning_rate": 8.9225e-05,
"loss": 0.0458,
"step": 3570
},
{
"grad_norm": 0.4888351261615753,
"learning_rate": 8.9475e-05,
"loss": 0.0475,
"step": 3580
},
{
"grad_norm": 0.39518243074417114,
"learning_rate": 8.9725e-05,
"loss": 0.043,
"step": 3590
},
{
"grad_norm": 0.45062577724456787,
"learning_rate": 8.9975e-05,
"loss": 0.0435,
"step": 3600
},
{
"grad_norm": 0.47647690773010254,
"learning_rate": 9.0225e-05,
"loss": 0.0432,
"step": 3610
},
{
"grad_norm": 0.5328305959701538,
"learning_rate": 9.0475e-05,
"loss": 0.0436,
"step": 3620
},
{
"grad_norm": 0.4746423661708832,
"learning_rate": 9.072500000000001e-05,
"loss": 0.0459,
"step": 3630
},
{
"grad_norm": 0.5875771045684814,
"learning_rate": 9.0975e-05,
"loss": 0.0427,
"step": 3640
},
{
"grad_norm": 0.4863992929458618,
"learning_rate": 9.122500000000001e-05,
"loss": 0.0405,
"step": 3650
},
{
"grad_norm": 0.5402488112449646,
"learning_rate": 9.1475e-05,
"loss": 0.0419,
"step": 3660
},
{
"grad_norm": 0.5397239327430725,
"learning_rate": 9.172500000000001e-05,
"loss": 0.0415,
"step": 3670
},
{
"grad_norm": 0.5445768237113953,
"learning_rate": 9.1975e-05,
"loss": 0.0401,
"step": 3680
},
{
"grad_norm": 0.5987370014190674,
"learning_rate": 9.2225e-05,
"loss": 0.0471,
"step": 3690
},
{
"grad_norm": 0.7309731841087341,
"learning_rate": 9.2475e-05,
"loss": 0.0383,
"step": 3700
},
{
"grad_norm": 0.5340412259101868,
"learning_rate": 9.2725e-05,
"loss": 0.0423,
"step": 3710
},
{
"grad_norm": 0.46930262446403503,
"learning_rate": 9.2975e-05,
"loss": 0.0427,
"step": 3720
},
{
"grad_norm": 0.4816801846027374,
"learning_rate": 9.3225e-05,
"loss": 0.0457,
"step": 3730
},
{
"grad_norm": 0.5450388193130493,
"learning_rate": 9.3475e-05,
"loss": 0.0465,
"step": 3740
},
{
"grad_norm": 0.48289021849632263,
"learning_rate": 9.3725e-05,
"loss": 0.0438,
"step": 3750
},
{
"grad_norm": 0.38312482833862305,
"learning_rate": 9.397500000000001e-05,
"loss": 0.0448,
"step": 3760
},
{
"grad_norm": 0.4564635455608368,
"learning_rate": 9.422500000000001e-05,
"loss": 0.0376,
"step": 3770
},
{
"grad_norm": 0.501518189907074,
"learning_rate": 9.4475e-05,
"loss": 0.0422,
"step": 3780
},
{
"grad_norm": 0.5535281896591187,
"learning_rate": 9.472500000000001e-05,
"loss": 0.0415,
"step": 3790
},
{
"grad_norm": 0.5068562626838684,
"learning_rate": 9.4975e-05,
"loss": 0.044,
"step": 3800
},
{
"grad_norm": 0.4238268733024597,
"learning_rate": 9.522500000000001e-05,
"loss": 0.0512,
"step": 3810
},
{
"grad_norm": 0.40982863306999207,
"learning_rate": 9.5475e-05,
"loss": 0.0514,
"step": 3820
},
{
"grad_norm": 0.4888747036457062,
"learning_rate": 9.572500000000001e-05,
"loss": 0.0417,
"step": 3830
},
{
"grad_norm": 0.36337292194366455,
"learning_rate": 9.5975e-05,
"loss": 0.0452,
"step": 3840
},
{
"grad_norm": 0.5338613986968994,
"learning_rate": 9.622500000000001e-05,
"loss": 0.0439,
"step": 3850
},
{
"grad_norm": 0.5002034306526184,
"learning_rate": 9.6475e-05,
"loss": 0.046,
"step": 3860
},
{
"grad_norm": 0.5638089179992676,
"learning_rate": 9.6725e-05,
"loss": 0.0414,
"step": 3870
},
{
"grad_norm": 0.4765036106109619,
"learning_rate": 9.6975e-05,
"loss": 0.042,
"step": 3880
},
{
"grad_norm": 0.443176805973053,
"learning_rate": 9.7225e-05,
"loss": 0.046,
"step": 3890
},
{
"grad_norm": 0.39989593625068665,
"learning_rate": 9.747500000000001e-05,
"loss": 0.0472,
"step": 3900
},
{
"grad_norm": 0.44405046105384827,
"learning_rate": 9.7725e-05,
"loss": 0.0426,
"step": 3910
},
{
"grad_norm": 0.5469679832458496,
"learning_rate": 9.797500000000001e-05,
"loss": 0.0494,
"step": 3920
},
{
"grad_norm": 0.5193902254104614,
"learning_rate": 9.8225e-05,
"loss": 0.0418,
"step": 3930
},
{
"grad_norm": 0.44379061460494995,
"learning_rate": 9.847500000000001e-05,
"loss": 0.0439,
"step": 3940
},
{
"grad_norm": 0.3936866819858551,
"learning_rate": 9.8725e-05,
"loss": 0.0427,
"step": 3950
},
{
"grad_norm": 0.3606123924255371,
"learning_rate": 9.897500000000001e-05,
"loss": 0.0414,
"step": 3960
},
{
"grad_norm": 0.4419628381729126,
"learning_rate": 9.9225e-05,
"loss": 0.0386,
"step": 3970
},
{
"grad_norm": 0.43262916803359985,
"learning_rate": 9.9475e-05,
"loss": 0.0397,
"step": 3980
},
{
"grad_norm": 0.35829615592956543,
"learning_rate": 9.9725e-05,
"loss": 0.0388,
"step": 3990
},
{
"grad_norm": 0.4421238899230957,
"learning_rate": 9.9975e-05,
"loss": 0.0407,
"step": 4000
},
{
"grad_norm": 0.3859304189682007,
"learning_rate": 9.999999653982884e-05,
"loss": 0.0399,
"step": 4010
},
{
"grad_norm": 0.31160181760787964,
"learning_rate": 9.999998457874392e-05,
"loss": 0.0379,
"step": 4020
},
{
"grad_norm": 0.43243879079818726,
"learning_rate": 9.999996407402913e-05,
"loss": 0.0423,
"step": 4030
},
{
"grad_norm": 0.4805966317653656,
"learning_rate": 9.999993502568801e-05,
"loss": 0.041,
"step": 4040
},
{
"grad_norm": 0.4580017626285553,
"learning_rate": 9.999989743372548e-05,
"loss": 0.0421,
"step": 4050
},
{
"grad_norm": 0.4651296138763428,
"learning_rate": 9.999985129814798e-05,
"loss": 0.0418,
"step": 4060
},
{
"grad_norm": 0.4289516508579254,
"learning_rate": 9.99997966189634e-05,
"loss": 0.0376,
"step": 4070
},
{
"grad_norm": 0.4172024130821228,
"learning_rate": 9.999973339618107e-05,
"loss": 0.0403,
"step": 4080
},
{
"grad_norm": 0.4561389088630676,
"learning_rate": 9.999966162981179e-05,
"loss": 0.0404,
"step": 4090
},
{
"grad_norm": 0.4419674575328827,
"learning_rate": 9.999958131986784e-05,
"loss": 0.0399,
"step": 4100
},
{
"grad_norm": 0.3951598107814789,
"learning_rate": 9.999949246636293e-05,
"loss": 0.0449,
"step": 4110
},
{
"grad_norm": 0.3458001911640167,
"learning_rate": 9.999939506931224e-05,
"loss": 0.0429,
"step": 4120
},
{
"grad_norm": 0.5925477147102356,
"learning_rate": 9.999928912873243e-05,
"loss": 0.046,
"step": 4130
},
{
"grad_norm": 0.35705140233039856,
"learning_rate": 9.999917464464159e-05,
"loss": 0.0417,
"step": 4140
},
{
"grad_norm": 0.39123034477233887,
"learning_rate": 9.999905161705929e-05,
"loss": 0.0412,
"step": 4150
},
{
"grad_norm": 0.4060784578323364,
"learning_rate": 9.999892004600653e-05,
"loss": 0.037,
"step": 4160
},
{
"grad_norm": 0.37971585988998413,
"learning_rate": 9.999877993150581e-05,
"loss": 0.0422,
"step": 4170
},
{
"grad_norm": 0.5471509695053101,
"learning_rate": 9.999863127358108e-05,
"loss": 0.0423,
"step": 4180
},
{
"grad_norm": 0.38467666506767273,
"learning_rate": 9.999847407225773e-05,
"loss": 0.0423,
"step": 4190
},
{
"grad_norm": 0.464873731136322,
"learning_rate": 9.999830832756262e-05,
"loss": 0.038,
"step": 4200
},
{
"grad_norm": 0.47094425559043884,
"learning_rate": 9.999813403952407e-05,
"loss": 0.0379,
"step": 4210
},
{
"grad_norm": 0.4815444052219391,
"learning_rate": 9.999795120817187e-05,
"loss": 0.0399,
"step": 4220
},
{
"grad_norm": 0.4033340513706207,
"learning_rate": 9.999775983353725e-05,
"loss": 0.037,
"step": 4230
},
{
"grad_norm": 0.3992239832878113,
"learning_rate": 9.999755991565292e-05,
"loss": 0.0361,
"step": 4240
},
{
"grad_norm": 0.4696410894393921,
"learning_rate": 9.999735145455303e-05,
"loss": 0.0386,
"step": 4250
},
{
"grad_norm": 0.32953622937202454,
"learning_rate": 9.99971344502732e-05,
"loss": 0.0376,
"step": 4260
},
{
"grad_norm": 0.3710175156593323,
"learning_rate": 9.999690890285053e-05,
"loss": 0.0389,
"step": 4270
},
{
"grad_norm": 0.4310072362422943,
"learning_rate": 9.999667481232356e-05,
"loss": 0.0414,
"step": 4280
},
{
"grad_norm": 0.386064738035202,
"learning_rate": 9.999643217873225e-05,
"loss": 0.0349,
"step": 4290
},
{
"grad_norm": 0.3644770085811615,
"learning_rate": 9.999618100211809e-05,
"loss": 0.0352,
"step": 4300
},
{
"grad_norm": 0.474679559469223,
"learning_rate": 9.999592128252402e-05,
"loss": 0.0373,
"step": 4310
},
{
"grad_norm": 0.3991343379020691,
"learning_rate": 9.999565301999437e-05,
"loss": 0.0398,
"step": 4320
},
{
"grad_norm": 0.35236024856567383,
"learning_rate": 9.999537621457502e-05,
"loss": 0.0366,
"step": 4330
},
{
"grad_norm": 0.3644237816333771,
"learning_rate": 9.999509086631323e-05,
"loss": 0.0398,
"step": 4340
},
{
"grad_norm": 0.4314301013946533,
"learning_rate": 9.99947969752578e-05,
"loss": 0.0443,
"step": 4350
},
{
"grad_norm": 0.3834511339664459,
"learning_rate": 9.999449454145891e-05,
"loss": 0.0382,
"step": 4360
},
{
"grad_norm": 0.347470760345459,
"learning_rate": 9.999418356496827e-05,
"loss": 0.0431,
"step": 4370
},
{
"grad_norm": 0.45474135875701904,
"learning_rate": 9.999386404583899e-05,
"loss": 0.0412,
"step": 4380
},
{
"grad_norm": 0.3765535056591034,
"learning_rate": 9.999353598412568e-05,
"loss": 0.0452,
"step": 4390
},
{
"grad_norm": 0.28268489241600037,
"learning_rate": 9.999319937988442e-05,
"loss": 0.0364,
"step": 4400
},
{
"grad_norm": 0.4939219057559967,
"learning_rate": 9.999285423317268e-05,
"loss": 0.0399,
"step": 4410
},
{
"grad_norm": 0.3784657418727875,
"learning_rate": 9.999250054404947e-05,
"loss": 0.0413,
"step": 4420
},
{
"grad_norm": 0.3653174638748169,
"learning_rate": 9.99921383125752e-05,
"loss": 0.0363,
"step": 4430
},
{
"grad_norm": 0.48910290002822876,
"learning_rate": 9.99917675388118e-05,
"loss": 0.0391,
"step": 4440
},
{
"grad_norm": 0.3770338296890259,
"learning_rate": 9.99913882228226e-05,
"loss": 0.0389,
"step": 4450
},
{
"grad_norm": 0.4055532217025757,
"learning_rate": 9.999100036467242e-05,
"loss": 0.037,
"step": 4460
},
{
"grad_norm": 0.26587072014808655,
"learning_rate": 9.999060396442753e-05,
"loss": 0.0356,
"step": 4470
},
{
"grad_norm": 0.41433122754096985,
"learning_rate": 9.999019902215566e-05,
"loss": 0.0379,
"step": 4480
},
{
"grad_norm": 0.4437147080898285,
"learning_rate": 9.998978553792602e-05,
"loss": 0.042,
"step": 4490
},
{
"grad_norm": 0.42392024397850037,
"learning_rate": 9.998936351180926e-05,
"loss": 0.0379,
"step": 4500
},
{
"grad_norm": 0.3875804841518402,
"learning_rate": 9.998893294387747e-05,
"loss": 0.0349,
"step": 4510
},
{
"grad_norm": 0.37056973576545715,
"learning_rate": 9.998849383420426e-05,
"loss": 0.036,
"step": 4520
},
{
"grad_norm": 0.3561025857925415,
"learning_rate": 9.998804618286465e-05,
"loss": 0.0396,
"step": 4530
},
{
"grad_norm": 0.5577656626701355,
"learning_rate": 9.99875899899351e-05,
"loss": 0.039,
"step": 4540
},
{
"grad_norm": 0.40716665983200073,
"learning_rate": 9.99871252554936e-05,
"loss": 0.034,
"step": 4550
},
{
"grad_norm": 0.39829781651496887,
"learning_rate": 9.998665197961955e-05,
"loss": 0.0351,
"step": 4560
},
{
"grad_norm": 0.33696404099464417,
"learning_rate": 9.998617016239379e-05,
"loss": 0.0346,
"step": 4570
},
{
"grad_norm": 0.38184264302253723,
"learning_rate": 9.998567980389869e-05,
"loss": 0.0409,
"step": 4580
},
{
"grad_norm": 0.4413798451423645,
"learning_rate": 9.998518090421802e-05,
"loss": 0.0407,
"step": 4590
},
{
"grad_norm": 0.3794148862361908,
"learning_rate": 9.998467346343703e-05,
"loss": 0.037,
"step": 4600
},
{
"grad_norm": 0.35736557841300964,
"learning_rate": 9.998415748164243e-05,
"loss": 0.0417,
"step": 4610
},
{
"grad_norm": 0.30888238549232483,
"learning_rate": 9.998363295892238e-05,
"loss": 0.0369,
"step": 4620
},
{
"grad_norm": 0.3827449083328247,
"learning_rate": 9.998309989536652e-05,
"loss": 0.0369,
"step": 4630
},
{
"grad_norm": 0.3302731513977051,
"learning_rate": 9.998255829106593e-05,
"loss": 0.0394,
"step": 4640
},
{
"grad_norm": 0.3657535910606384,
"learning_rate": 9.998200814611316e-05,
"loss": 0.0361,
"step": 4650
},
{
"grad_norm": 0.29982033371925354,
"learning_rate": 9.998144946060219e-05,
"loss": 0.0375,
"step": 4660
},
{
"grad_norm": 0.5621727705001831,
"learning_rate": 9.998088223462852e-05,
"loss": 0.0374,
"step": 4670
},
{
"grad_norm": 0.37400975823402405,
"learning_rate": 9.998030646828905e-05,
"loss": 0.0322,
"step": 4680
},
{
"grad_norm": 0.4110400676727295,
"learning_rate": 9.997972216168217e-05,
"loss": 0.0348,
"step": 4690
},
{
"grad_norm": 0.512689471244812,
"learning_rate": 9.997912931490771e-05,
"loss": 0.044,
"step": 4700
},
{
"grad_norm": 0.4697954058647156,
"learning_rate": 9.9978527928067e-05,
"loss": 0.0397,
"step": 4710
},
{
"grad_norm": 0.4623161256313324,
"learning_rate": 9.997791800126277e-05,
"loss": 0.0343,
"step": 4720
},
{
"grad_norm": 0.44245240092277527,
"learning_rate": 9.997729953459927e-05,
"loss": 0.037,
"step": 4730
},
{
"grad_norm": 0.35390836000442505,
"learning_rate": 9.997667252818214e-05,
"loss": 0.038,
"step": 4740
},
{
"grad_norm": 0.3483659029006958,
"learning_rate": 9.997603698211855e-05,
"loss": 0.0377,
"step": 4750
},
{
"grad_norm": 0.46905985474586487,
"learning_rate": 9.99753928965171e-05,
"loss": 0.0388,
"step": 4760
},
{
"grad_norm": 0.3583959937095642,
"learning_rate": 9.997474027148781e-05,
"loss": 0.0397,
"step": 4770
},
{
"grad_norm": 0.382405161857605,
"learning_rate": 9.997407910714223e-05,
"loss": 0.0363,
"step": 4780
},
{
"grad_norm": 0.36503803730010986,
"learning_rate": 9.997340940359332e-05,
"loss": 0.039,
"step": 4790
},
{
"grad_norm": 0.3273303508758545,
"learning_rate": 9.997273116095552e-05,
"loss": 0.0383,
"step": 4800
},
{
"grad_norm": 0.5083199739456177,
"learning_rate": 9.997204437934473e-05,
"loss": 0.0341,
"step": 4810
},
{
"grad_norm": 0.3080153167247772,
"learning_rate": 9.997134905887829e-05,
"loss": 0.0369,
"step": 4820
},
{
"grad_norm": 0.30154985189437866,
"learning_rate": 9.997064519967501e-05,
"loss": 0.0348,
"step": 4830
},
{
"grad_norm": 0.3803936839103699,
"learning_rate": 9.996993280185517e-05,
"loss": 0.0348,
"step": 4840
},
{
"grad_norm": 0.2470090538263321,
"learning_rate": 9.99692118655405e-05,
"loss": 0.0343,
"step": 4850
},
{
"grad_norm": 0.4055911600589752,
"learning_rate": 9.996848239085417e-05,
"loss": 0.0339,
"step": 4860
},
{
"grad_norm": 0.353442519903183,
"learning_rate": 9.996774437792085e-05,
"loss": 0.0386,
"step": 4870
},
{
"grad_norm": 0.4112813472747803,
"learning_rate": 9.996699782686664e-05,
"loss": 0.035,
"step": 4880
},
{
"grad_norm": 0.38305455446243286,
"learning_rate": 9.996624273781909e-05,
"loss": 0.0377,
"step": 4890
},
{
"grad_norm": 0.3009546995162964,
"learning_rate": 9.996547911090725e-05,
"loss": 0.0341,
"step": 4900
},
{
"grad_norm": 0.31697291135787964,
"learning_rate": 9.996470694626157e-05,
"loss": 0.0322,
"step": 4910
},
{
"grad_norm": 0.3814009130001068,
"learning_rate": 9.996392624401403e-05,
"loss": 0.0351,
"step": 4920
},
{
"grad_norm": 0.3119279742240906,
"learning_rate": 9.996313700429801e-05,
"loss": 0.0363,
"step": 4930
},
{
"grad_norm": 0.3278277516365051,
"learning_rate": 9.996233922724836e-05,
"loss": 0.0314,
"step": 4940
},
{
"grad_norm": 0.5751047134399414,
"learning_rate": 9.996153291300141e-05,
"loss": 0.0371,
"step": 4950
},
{
"grad_norm": 0.3887098431587219,
"learning_rate": 9.996071806169494e-05,
"loss": 0.0378,
"step": 4960
},
{
"grad_norm": 0.2754780948162079,
"learning_rate": 9.995989467346817e-05,
"loss": 0.0303,
"step": 4970
},
{
"grad_norm": 0.35232511162757874,
"learning_rate": 9.995906274846183e-05,
"loss": 0.0388,
"step": 4980
},
{
"grad_norm": 0.3542734980583191,
"learning_rate": 9.995822228681803e-05,
"loss": 0.0375,
"step": 4990
},
{
"grad_norm": 0.34408876299858093,
"learning_rate": 9.99573732886804e-05,
"loss": 0.0358,
"step": 5000
},
{
"grad_norm": 0.39004313945770264,
"learning_rate": 9.995651575419402e-05,
"loss": 0.0375,
"step": 5010
},
{
"grad_norm": 0.3584635257720947,
"learning_rate": 9.995564968350541e-05,
"loss": 0.0372,
"step": 5020
},
{
"grad_norm": 0.3550162613391876,
"learning_rate": 9.995477507676256e-05,
"loss": 0.032,
"step": 5030
},
{
"grad_norm": 0.37321898341178894,
"learning_rate": 9.995389193411493e-05,
"loss": 0.0311,
"step": 5040
},
{
"grad_norm": 0.38647428154945374,
"learning_rate": 9.995300025571339e-05,
"loss": 0.0357,
"step": 5050
},
{
"grad_norm": 0.3812742233276367,
"learning_rate": 9.995210004171034e-05,
"loss": 0.0381,
"step": 5060
},
{
"grad_norm": 0.408979207277298,
"learning_rate": 9.995119129225956e-05,
"loss": 0.0362,
"step": 5070
},
{
"grad_norm": 0.3359850347042084,
"learning_rate": 9.995027400751637e-05,
"loss": 0.0351,
"step": 5080
},
{
"grad_norm": 0.40490883588790894,
"learning_rate": 9.994934818763751e-05,
"loss": 0.0387,
"step": 5090
},
{
"grad_norm": 0.4428238868713379,
"learning_rate": 9.994841383278115e-05,
"loss": 0.0359,
"step": 5100
},
{
"grad_norm": 0.3872987926006317,
"learning_rate": 9.994747094310695e-05,
"loss": 0.0385,
"step": 5110
},
{
"grad_norm": 0.31335529685020447,
"learning_rate": 9.994651951877604e-05,
"loss": 0.0324,
"step": 5120
},
{
"grad_norm": 0.3368927538394928,
"learning_rate": 9.994555955995099e-05,
"loss": 0.0331,
"step": 5130
},
{
"grad_norm": 0.41906657814979553,
"learning_rate": 9.994459106679581e-05,
"loss": 0.0358,
"step": 5140
},
{
"grad_norm": 0.37263497710227966,
"learning_rate": 9.994361403947603e-05,
"loss": 0.0362,
"step": 5150
},
{
"grad_norm": 0.32935646176338196,
"learning_rate": 9.994262847815854e-05,
"loss": 0.0357,
"step": 5160
},
{
"grad_norm": 0.2794903516769409,
"learning_rate": 9.99416343830118e-05,
"loss": 0.0332,
"step": 5170
},
{
"grad_norm": 0.31271782517433167,
"learning_rate": 9.994063175420565e-05,
"loss": 0.03,
"step": 5180
},
{
"grad_norm": 0.3257259726524353,
"learning_rate": 9.99396205919114e-05,
"loss": 0.0372,
"step": 5190
},
{
"grad_norm": 0.3059450089931488,
"learning_rate": 9.993860089630185e-05,
"loss": 0.033,
"step": 5200
},
{
"grad_norm": 0.2800959646701813,
"learning_rate": 9.993757266755123e-05,
"loss": 0.0364,
"step": 5210
},
{
"grad_norm": 0.3736426532268524,
"learning_rate": 9.993653590583522e-05,
"loss": 0.0404,
"step": 5220
},
{
"grad_norm": 0.3520766794681549,
"learning_rate": 9.993549061133102e-05,
"loss": 0.0349,
"step": 5230
},
{
"grad_norm": 0.4305180311203003,
"learning_rate": 9.993443678421719e-05,
"loss": 0.031,
"step": 5240
},
{
"grad_norm": 0.30332329869270325,
"learning_rate": 9.993337442467384e-05,
"loss": 0.0354,
"step": 5250
},
{
"grad_norm": 0.357280433177948,
"learning_rate": 9.993230353288248e-05,
"loss": 0.0333,
"step": 5260
},
{
"grad_norm": 0.38651740550994873,
"learning_rate": 9.993122410902608e-05,
"loss": 0.0336,
"step": 5270
},
{
"grad_norm": 0.3957500457763672,
"learning_rate": 9.993013615328912e-05,
"loss": 0.0337,
"step": 5280
},
{
"grad_norm": 0.30943194031715393,
"learning_rate": 9.992903966585747e-05,
"loss": 0.0351,
"step": 5290
},
{
"grad_norm": 0.29888811707496643,
"learning_rate": 9.992793464691852e-05,
"loss": 0.0371,
"step": 5300
},
{
"grad_norm": 0.32478439807891846,
"learning_rate": 9.992682109666105e-05,
"loss": 0.0401,
"step": 5310
},
{
"grad_norm": 0.4232689142227173,
"learning_rate": 9.992569901527538e-05,
"loss": 0.0424,
"step": 5320
},
{
"grad_norm": 0.30550339818000793,
"learning_rate": 9.99245684029532e-05,
"loss": 0.0325,
"step": 5330
},
{
"grad_norm": 0.2808350622653961,
"learning_rate": 9.992342925988774e-05,
"loss": 0.0363,
"step": 5340
},
{
"grad_norm": 0.37217894196510315,
"learning_rate": 9.992228158627361e-05,
"loss": 0.0349,
"step": 5350
},
{
"grad_norm": 0.29954707622528076,
"learning_rate": 9.992112538230693e-05,
"loss": 0.0314,
"step": 5360
},
{
"grad_norm": 0.4495854675769806,
"learning_rate": 9.991996064818527e-05,
"loss": 0.0326,
"step": 5370
},
{
"grad_norm": 0.3042782247066498,
"learning_rate": 9.991878738410768e-05,
"loss": 0.0338,
"step": 5380
},
{
"grad_norm": 0.3876139521598816,
"learning_rate": 9.991760559027457e-05,
"loss": 0.0337,
"step": 5390
},
{
"grad_norm": 0.40060412883758545,
"learning_rate": 9.991641526688793e-05,
"loss": 0.03,
"step": 5400
},
{
"grad_norm": 0.3015538156032562,
"learning_rate": 9.991521641415113e-05,
"loss": 0.0322,
"step": 5410
},
{
"grad_norm": 0.28446367383003235,
"learning_rate": 9.991400903226904e-05,
"loss": 0.0318,
"step": 5420
},
{
"grad_norm": 0.42916229367256165,
"learning_rate": 9.991279312144794e-05,
"loss": 0.0368,
"step": 5430
},
{
"grad_norm": 0.32056114077568054,
"learning_rate": 9.991156868189564e-05,
"loss": 0.0331,
"step": 5440
},
{
"grad_norm": 0.4189446270465851,
"learning_rate": 9.991033571382131e-05,
"loss": 0.0354,
"step": 5450
},
{
"grad_norm": 0.3427908718585968,
"learning_rate": 9.990909421743569e-05,
"loss": 0.0322,
"step": 5460
},
{
"grad_norm": 0.29680436849594116,
"learning_rate": 9.990784419295085e-05,
"loss": 0.0315,
"step": 5470
},
{
"grad_norm": 0.3475859463214874,
"learning_rate": 9.990658564058044e-05,
"loss": 0.0356,
"step": 5480
},
{
"grad_norm": 0.2693791091442108,
"learning_rate": 9.990531856053948e-05,
"loss": 0.0341,
"step": 5490
},
{
"grad_norm": 0.38264793157577515,
"learning_rate": 9.99040429530445e-05,
"loss": 0.0305,
"step": 5500
},
{
"grad_norm": 0.4049840271472931,
"learning_rate": 9.990275881831346e-05,
"loss": 0.0345,
"step": 5510
},
{
"grad_norm": 0.4251091480255127,
"learning_rate": 9.990146615656577e-05,
"loss": 0.0366,
"step": 5520
},
{
"grad_norm": 0.28920596837997437,
"learning_rate": 9.990016496802233e-05,
"loss": 0.0328,
"step": 5530
},
{
"grad_norm": 0.3436765670776367,
"learning_rate": 9.989885525290548e-05,
"loss": 0.0313,
"step": 5540
},
{
"grad_norm": 0.29532015323638916,
"learning_rate": 9.989753701143897e-05,
"loss": 0.0341,
"step": 5550
},
{
"grad_norm": 0.3735540509223938,
"learning_rate": 9.989621024384812e-05,
"loss": 0.0316,
"step": 5560
},
{
"grad_norm": 0.4115545451641083,
"learning_rate": 9.989487495035959e-05,
"loss": 0.0323,
"step": 5570
},
{
"grad_norm": 0.3986823260784149,
"learning_rate": 9.989353113120156e-05,
"loss": 0.0314,
"step": 5580
},
{
"grad_norm": 0.28433433175086975,
"learning_rate": 9.989217878660366e-05,
"loss": 0.0315,
"step": 5590
},
{
"grad_norm": 0.2662765681743622,
"learning_rate": 9.989081791679695e-05,
"loss": 0.0303,
"step": 5600
},
{
"grad_norm": 0.30207517743110657,
"learning_rate": 9.988944852201397e-05,
"loss": 0.0311,
"step": 5610
},
{
"grad_norm": 0.33197832107543945,
"learning_rate": 9.988807060248873e-05,
"loss": 0.0304,
"step": 5620
},
{
"grad_norm": 0.2492564171552658,
"learning_rate": 9.988668415845665e-05,
"loss": 0.0287,
"step": 5630
},
{
"grad_norm": 0.43481743335723877,
"learning_rate": 9.988528919015466e-05,
"loss": 0.0294,
"step": 5640
},
{
"grad_norm": 0.3716481029987335,
"learning_rate": 9.988388569782112e-05,
"loss": 0.0306,
"step": 5650
},
{
"grad_norm": 0.2877008616924286,
"learning_rate": 9.988247368169583e-05,
"loss": 0.0408,
"step": 5660
},
{
"grad_norm": 0.31733956933021545,
"learning_rate": 9.988105314202007e-05,
"loss": 0.0334,
"step": 5670
},
{
"grad_norm": 0.42810311913490295,
"learning_rate": 9.987962407903659e-05,
"loss": 0.0319,
"step": 5680
},
{
"grad_norm": 0.3225559592247009,
"learning_rate": 9.987818649298957e-05,
"loss": 0.0293,
"step": 5690
},
{
"grad_norm": 0.3467288613319397,
"learning_rate": 9.987674038412465e-05,
"loss": 0.0339,
"step": 5700
},
{
"grad_norm": 0.45623427629470825,
"learning_rate": 9.987528575268891e-05,
"loss": 0.0319,
"step": 5710
},
{
"grad_norm": 0.2944740653038025,
"learning_rate": 9.987382259893095e-05,
"loss": 0.0318,
"step": 5720
},
{
"grad_norm": 0.3048604726791382,
"learning_rate": 9.987235092310074e-05,
"loss": 0.0331,
"step": 5730
},
{
"grad_norm": 0.3735939860343933,
"learning_rate": 9.987087072544978e-05,
"loss": 0.0327,
"step": 5740
},
{
"grad_norm": 0.3563518524169922,
"learning_rate": 9.9869382006231e-05,
"loss": 0.0321,
"step": 5750
},
{
"grad_norm": 0.3049486577510834,
"learning_rate": 9.986788476569875e-05,
"loss": 0.0295,
"step": 5760
},
{
"grad_norm": 0.31051209568977356,
"learning_rate": 9.986637900410887e-05,
"loss": 0.0349,
"step": 5770
},
{
"grad_norm": 0.3869427442550659,
"learning_rate": 9.986486472171869e-05,
"loss": 0.0322,
"step": 5780
},
{
"grad_norm": 0.4189186692237854,
"learning_rate": 9.986334191878692e-05,
"loss": 0.0342,
"step": 5790
},
{
"grad_norm": 0.43416473269462585,
"learning_rate": 9.986181059557378e-05,
"loss": 0.031,
"step": 5800
},
{
"grad_norm": 0.3650332987308502,
"learning_rate": 9.986027075234094e-05,
"loss": 0.0299,
"step": 5810
},
{
"grad_norm": 0.3248865306377411,
"learning_rate": 9.985872238935152e-05,
"loss": 0.0332,
"step": 5820
},
{
"grad_norm": 0.3286206126213074,
"learning_rate": 9.985716550687008e-05,
"loss": 0.0336,
"step": 5830
},
{
"grad_norm": 0.373761385679245,
"learning_rate": 9.985560010516264e-05,
"loss": 0.033,
"step": 5840
},
{
"grad_norm": 0.2942267954349518,
"learning_rate": 9.985402618449668e-05,
"loss": 0.0315,
"step": 5850
},
{
"grad_norm": 0.27267733216285706,
"learning_rate": 9.985244374514118e-05,
"loss": 0.0335,
"step": 5860
},
{
"grad_norm": 0.35840272903442383,
"learning_rate": 9.985085278736651e-05,
"loss": 0.0358,
"step": 5870
},
{
"grad_norm": 0.3712354004383087,
"learning_rate": 9.984925331144452e-05,
"loss": 0.0309,
"step": 5880
},
{
"grad_norm": 0.33825939893722534,
"learning_rate": 9.984764531764851e-05,
"loss": 0.0293,
"step": 5890
},
{
"grad_norm": 0.3744773864746094,
"learning_rate": 9.984602880625326e-05,
"loss": 0.0305,
"step": 5900
},
{
"grad_norm": 0.3779265582561493,
"learning_rate": 9.9844403777535e-05,
"loss": 0.0336,
"step": 5910
},
{
"grad_norm": 0.35232800245285034,
"learning_rate": 9.984277023177135e-05,
"loss": 0.03,
"step": 5920
},
{
"grad_norm": 0.24044311046600342,
"learning_rate": 9.984112816924148e-05,
"loss": 0.0277,
"step": 5930
},
{
"grad_norm": 0.31373876333236694,
"learning_rate": 9.983947759022596e-05,
"loss": 0.0338,
"step": 5940
},
{
"grad_norm": 0.3518969714641571,
"learning_rate": 9.983781849500682e-05,
"loss": 0.0295,
"step": 5950
},
{
"grad_norm": 0.3579181432723999,
"learning_rate": 9.98361508838676e-05,
"loss": 0.0298,
"step": 5960
},
{
"grad_norm": 0.2864387333393097,
"learning_rate": 9.98344747570932e-05,
"loss": 0.0316,
"step": 5970
},
{
"grad_norm": 0.3602316379547119,
"learning_rate": 9.983279011497004e-05,
"loss": 0.0311,
"step": 5980
},
{
"grad_norm": 0.2712448537349701,
"learning_rate": 9.983109695778596e-05,
"loss": 0.032,
"step": 5990
},
{
"grad_norm": 0.3071662187576294,
"learning_rate": 9.982939528583032e-05,
"loss": 0.0334,
"step": 6000
},
{
"grad_norm": 0.38609135150909424,
"learning_rate": 9.982768509939385e-05,
"loss": 0.0285,
"step": 6010
},
{
"grad_norm": 0.2968983054161072,
"learning_rate": 9.982596639876879e-05,
"loss": 0.0292,
"step": 6020
},
{
"grad_norm": 0.2913767695426941,
"learning_rate": 9.982423918424881e-05,
"loss": 0.0276,
"step": 6030
},
{
"grad_norm": 0.2720889151096344,
"learning_rate": 9.982250345612908e-05,
"loss": 0.0305,
"step": 6040
},
{
"grad_norm": 0.2988946735858917,
"learning_rate": 9.982075921470611e-05,
"loss": 0.0309,
"step": 6050
},
{
"grad_norm": 0.3016357421875,
"learning_rate": 9.981900646027802e-05,
"loss": 0.0284,
"step": 6060
},
{
"grad_norm": 0.3710649311542511,
"learning_rate": 9.981724519314425e-05,
"loss": 0.0321,
"step": 6070
},
{
"grad_norm": 0.32104140520095825,
"learning_rate": 9.981547541360581e-05,
"loss": 0.035,
"step": 6080
},
{
"grad_norm": 0.2984887361526489,
"learning_rate": 9.981369712196508e-05,
"loss": 0.0282,
"step": 6090
},
{
"grad_norm": 0.29977795481681824,
"learning_rate": 9.981191031852592e-05,
"loss": 0.0338,
"step": 6100
},
{
"grad_norm": 0.29678675532341003,
"learning_rate": 9.981011500359362e-05,
"loss": 0.0301,
"step": 6110
},
{
"grad_norm": 0.3343314230442047,
"learning_rate": 9.9808311177475e-05,
"loss": 0.0351,
"step": 6120
},
{
"grad_norm": 0.32088878750801086,
"learning_rate": 9.980649884047826e-05,
"loss": 0.0318,
"step": 6130
},
{
"grad_norm": 0.3055264949798584,
"learning_rate": 9.980467799291307e-05,
"loss": 0.0309,
"step": 6140
},
{
"grad_norm": 0.4143613874912262,
"learning_rate": 9.980284863509058e-05,
"loss": 0.0338,
"step": 6150
},
{
"grad_norm": 0.35827532410621643,
"learning_rate": 9.980101076732334e-05,
"loss": 0.0316,
"step": 6160
},
{
"grad_norm": 0.42603468894958496,
"learning_rate": 9.979916438992544e-05,
"loss": 0.0338,
"step": 6170
},
{
"grad_norm": 0.3776651620864868,
"learning_rate": 9.979730950321237e-05,
"loss": 0.0317,
"step": 6180
},
{
"grad_norm": 0.31897443532943726,
"learning_rate": 9.979544610750104e-05,
"loss": 0.0286,
"step": 6190
},
{
"grad_norm": 0.247903972864151,
"learning_rate": 9.97935742031099e-05,
"loss": 0.0309,
"step": 6200
},
{
"grad_norm": 0.2799277603626251,
"learning_rate": 9.979169379035878e-05,
"loss": 0.0306,
"step": 6210
},
{
"grad_norm": 0.455984503030777,
"learning_rate": 9.978980486956899e-05,
"loss": 0.0344,
"step": 6220
},
{
"grad_norm": 0.36091381311416626,
"learning_rate": 9.978790744106332e-05,
"loss": 0.034,
"step": 6230
},
{
"grad_norm": 0.3725851774215698,
"learning_rate": 9.978600150516594e-05,
"loss": 0.0315,
"step": 6240
},
{
"grad_norm": 0.3584500849246979,
"learning_rate": 9.978408706220259e-05,
"loss": 0.036,
"step": 6250
},
{
"grad_norm": 0.29220858216285706,
"learning_rate": 9.978216411250032e-05,
"loss": 0.0372,
"step": 6260
},
{
"grad_norm": 0.31616538763046265,
"learning_rate": 9.978023265638778e-05,
"loss": 0.0298,
"step": 6270
},
{
"grad_norm": 0.3402068614959717,
"learning_rate": 9.977829269419495e-05,
"loss": 0.0332,
"step": 6280
},
{
"grad_norm": 0.3208469748497009,
"learning_rate": 9.977634422625335e-05,
"loss": 0.0298,
"step": 6290
},
{
"grad_norm": 0.2783920168876648,
"learning_rate": 9.97743872528959e-05,
"loss": 0.0376,
"step": 6300
},
{
"grad_norm": 0.36577823758125305,
"learning_rate": 9.9772421774457e-05,
"loss": 0.0326,
"step": 6310
},
{
"grad_norm": 0.3835756182670593,
"learning_rate": 9.977044779127252e-05,
"loss": 0.0316,
"step": 6320
},
{
"grad_norm": 0.35437050461769104,
"learning_rate": 9.976846530367971e-05,
"loss": 0.0338,
"step": 6330
},
{
"grad_norm": 0.35576331615448,
"learning_rate": 9.976647431201735e-05,
"loss": 0.0337,
"step": 6340
},
{
"grad_norm": 0.3176501989364624,
"learning_rate": 9.976447481662568e-05,
"loss": 0.0349,
"step": 6350
},
{
"grad_norm": 0.3097964823246002,
"learning_rate": 9.976246681784629e-05,
"loss": 0.0326,
"step": 6360
},
{
"grad_norm": 0.2853200435638428,
"learning_rate": 9.976045031602234e-05,
"loss": 0.0339,
"step": 6370
},
{
"grad_norm": 0.4057390093803406,
"learning_rate": 9.975842531149837e-05,
"loss": 0.0358,
"step": 6380
},
{
"grad_norm": 0.26724973320961,
"learning_rate": 9.975639180462043e-05,
"loss": 0.0341,
"step": 6390
},
{
"grad_norm": 0.29897722601890564,
"learning_rate": 9.975434979573596e-05,
"loss": 0.0299,
"step": 6400
},
{
"grad_norm": 0.3819584846496582,
"learning_rate": 9.97522992851939e-05,
"loss": 0.0345,
"step": 6410
},
{
"grad_norm": 0.37220311164855957,
"learning_rate": 9.975024027334461e-05,
"loss": 0.0341,
"step": 6420
},
{
"grad_norm": 0.3132483959197998,
"learning_rate": 9.974817276053993e-05,
"loss": 0.0309,
"step": 6430
},
{
"grad_norm": 0.32484710216522217,
"learning_rate": 9.974609674713315e-05,
"loss": 0.0319,
"step": 6440
},
{
"grad_norm": 0.35498714447021484,
"learning_rate": 9.9744012233479e-05,
"loss": 0.0305,
"step": 6450
},
{
"grad_norm": 0.36161261796951294,
"learning_rate": 9.974191921993366e-05,
"loss": 0.0305,
"step": 6460
},
{
"grad_norm": 0.2131146341562271,
"learning_rate": 9.973981770685474e-05,
"loss": 0.0313,
"step": 6470
},
{
"grad_norm": 0.3179318308830261,
"learning_rate": 9.97377076946014e-05,
"loss": 0.0334,
"step": 6480
},
{
"grad_norm": 0.2530212998390198,
"learning_rate": 9.973558918353412e-05,
"loss": 0.0336,
"step": 6490
},
{
"grad_norm": 0.2359841912984848,
"learning_rate": 9.973346217401494e-05,
"loss": 0.0293,
"step": 6500
},
{
"grad_norm": 0.3290267288684845,
"learning_rate": 9.973132666640726e-05,
"loss": 0.0299,
"step": 6510
},
{
"grad_norm": 0.2806616425514221,
"learning_rate": 9.972918266107602e-05,
"loss": 0.0275,
"step": 6520
},
{
"grad_norm": 0.289767861366272,
"learning_rate": 9.972703015838756e-05,
"loss": 0.0315,
"step": 6530
},
{
"grad_norm": 0.30729788541793823,
"learning_rate": 9.97248691587097e-05,
"loss": 0.0297,
"step": 6540
},
{
"grad_norm": 0.29331719875335693,
"learning_rate": 9.972269966241166e-05,
"loss": 0.031,
"step": 6550
},
{
"grad_norm": 0.26936954259872437,
"learning_rate": 9.972052166986417e-05,
"loss": 0.0304,
"step": 6560
},
{
"grad_norm": 0.25787121057510376,
"learning_rate": 9.971833518143938e-05,
"loss": 0.0352,
"step": 6570
},
{
"grad_norm": 0.29834407567977905,
"learning_rate": 9.971614019751093e-05,
"loss": 0.0284,
"step": 6580
},
{
"grad_norm": 0.22129976749420166,
"learning_rate": 9.971393671845383e-05,
"loss": 0.0317,
"step": 6590
},
{
"grad_norm": 0.2899000942707062,
"learning_rate": 9.971172474464464e-05,
"loss": 0.0336,
"step": 6600
},
{
"grad_norm": 0.29629984498023987,
"learning_rate": 9.97095042764613e-05,
"loss": 0.0347,
"step": 6610
},
{
"grad_norm": 0.389089971780777,
"learning_rate": 9.970727531428324e-05,
"loss": 0.0263,
"step": 6620
},
{
"grad_norm": 0.24067829549312592,
"learning_rate": 9.970503785849132e-05,
"loss": 0.0354,
"step": 6630
},
{
"grad_norm": 0.3028007745742798,
"learning_rate": 9.970279190946788e-05,
"loss": 0.0309,
"step": 6640
},
{
"grad_norm": 0.305187463760376,
"learning_rate": 9.970053746759667e-05,
"loss": 0.0274,
"step": 6650
},
{
"grad_norm": 0.2956674098968506,
"learning_rate": 9.969827453326292e-05,
"loss": 0.0325,
"step": 6660
},
{
"grad_norm": 0.30844828486442566,
"learning_rate": 9.969600310685332e-05,
"loss": 0.0334,
"step": 6670
},
{
"grad_norm": 0.23201362788677216,
"learning_rate": 9.969372318875596e-05,
"loss": 0.0299,
"step": 6680
},
{
"grad_norm": 0.38267290592193604,
"learning_rate": 9.969143477936043e-05,
"loss": 0.0314,
"step": 6690
},
{
"grad_norm": 0.22565823793411255,
"learning_rate": 9.968913787905775e-05,
"loss": 0.0311,
"step": 6700
},
{
"grad_norm": 0.31415167450904846,
"learning_rate": 9.968683248824045e-05,
"loss": 0.0305,
"step": 6710
},
{
"grad_norm": 0.2609900236129761,
"learning_rate": 9.968451860730238e-05,
"loss": 0.0316,
"step": 6720
},
{
"grad_norm": 0.3864940404891968,
"learning_rate": 9.968219623663896e-05,
"loss": 0.0275,
"step": 6730
},
{
"grad_norm": 0.3017069101333618,
"learning_rate": 9.967986537664702e-05,
"loss": 0.0304,
"step": 6740
},
{
"grad_norm": 0.29363933205604553,
"learning_rate": 9.967752602772483e-05,
"loss": 0.0278,
"step": 6750
},
{
"grad_norm": 0.33982428908348083,
"learning_rate": 9.967517819027212e-05,
"loss": 0.0283,
"step": 6760
},
{
"grad_norm": 0.29549187421798706,
"learning_rate": 9.967282186469009e-05,
"loss": 0.0287,
"step": 6770
},
{
"grad_norm": 0.2883082926273346,
"learning_rate": 9.967045705138135e-05,
"loss": 0.0298,
"step": 6780
},
{
"grad_norm": 0.2381998896598816,
"learning_rate": 9.966808375074998e-05,
"loss": 0.0295,
"step": 6790
},
{
"grad_norm": 0.2355462908744812,
"learning_rate": 9.966570196320154e-05,
"loss": 0.03,
"step": 6800
},
{
"grad_norm": 0.31626275181770325,
"learning_rate": 9.966331168914299e-05,
"loss": 0.0305,
"step": 6810
},
{
"grad_norm": 0.2476751059293747,
"learning_rate": 9.966091292898277e-05,
"loss": 0.0328,
"step": 6820
},
{
"grad_norm": 0.27976781129837036,
"learning_rate": 9.965850568313076e-05,
"loss": 0.0269,
"step": 6830
},
{
"grad_norm": 0.23612117767333984,
"learning_rate": 9.965608995199827e-05,
"loss": 0.0301,
"step": 6840
},
{
"grad_norm": 0.3097231686115265,
"learning_rate": 9.965366573599812e-05,
"loss": 0.0331,
"step": 6850
},
{
"grad_norm": 0.3308660387992859,
"learning_rate": 9.965123303554453e-05,
"loss": 0.0336,
"step": 6860
},
{
"grad_norm": 0.22662802040576935,
"learning_rate": 9.964879185105317e-05,
"loss": 0.0256,
"step": 6870
},
{
"grad_norm": 0.3358778655529022,
"learning_rate": 9.964634218294119e-05,
"loss": 0.0339,
"step": 6880
},
{
"grad_norm": 0.2232239544391632,
"learning_rate": 9.964388403162714e-05,
"loss": 0.0264,
"step": 6890
},
{
"grad_norm": 0.2929636836051941,
"learning_rate": 9.96414173975311e-05,
"loss": 0.0277,
"step": 6900
},
{
"grad_norm": 0.29091522097587585,
"learning_rate": 9.963894228107451e-05,
"loss": 0.0263,
"step": 6910
},
{
"grad_norm": 0.27420079708099365,
"learning_rate": 9.963645868268032e-05,
"loss": 0.0307,
"step": 6920
},
{
"grad_norm": 0.34498360753059387,
"learning_rate": 9.963396660277289e-05,
"loss": 0.0304,
"step": 6930
},
{
"grad_norm": 0.22748416662216187,
"learning_rate": 9.963146604177807e-05,
"loss": 0.0274,
"step": 6940
},
{
"grad_norm": 0.27264806628227234,
"learning_rate": 9.962895700012311e-05,
"loss": 0.0276,
"step": 6950
},
{
"grad_norm": 0.2757764756679535,
"learning_rate": 9.962643947823677e-05,
"loss": 0.0317,
"step": 6960
},
{
"grad_norm": 0.27199292182922363,
"learning_rate": 9.962391347654921e-05,
"loss": 0.0285,
"step": 6970
},
{
"grad_norm": 0.2811877131462097,
"learning_rate": 9.962137899549204e-05,
"loss": 0.0302,
"step": 6980
},
{
"grad_norm": 0.3445790410041809,
"learning_rate": 9.961883603549835e-05,
"loss": 0.0288,
"step": 6990
},
{
"grad_norm": 0.2968343198299408,
"learning_rate": 9.961628459700267e-05,
"loss": 0.0311,
"step": 7000
},
{
"grad_norm": 0.21912340819835663,
"learning_rate": 9.961372468044095e-05,
"loss": 0.0309,
"step": 7010
},
{
"grad_norm": 0.23689767718315125,
"learning_rate": 9.961115628625062e-05,
"loss": 0.0297,
"step": 7020
},
{
"grad_norm": 0.23842456936836243,
"learning_rate": 9.960857941487056e-05,
"loss": 0.0249,
"step": 7030
},
{
"grad_norm": 0.39411240816116333,
"learning_rate": 9.960599406674106e-05,
"loss": 0.0277,
"step": 7040
},
{
"grad_norm": 0.38863542675971985,
"learning_rate": 9.960340024230393e-05,
"loss": 0.0269,
"step": 7050
},
{
"grad_norm": 0.3907487690448761,
"learning_rate": 9.960079794200232e-05,
"loss": 0.0295,
"step": 7060
},
{
"grad_norm": 0.2819722592830658,
"learning_rate": 9.959818716628096e-05,
"loss": 0.0263,
"step": 7070
},
{
"grad_norm": 0.2811400890350342,
"learning_rate": 9.95955679155859e-05,
"loss": 0.0271,
"step": 7080
},
{
"grad_norm": 0.28184035420417786,
"learning_rate": 9.959294019036472e-05,
"loss": 0.0289,
"step": 7090
},
{
"grad_norm": 0.3162672817707062,
"learning_rate": 9.959030399106646e-05,
"loss": 0.0248,
"step": 7100
},
{
"grad_norm": 0.28583642840385437,
"learning_rate": 9.958765931814153e-05,
"loss": 0.0307,
"step": 7110
},
{
"grad_norm": 0.2085208147764206,
"learning_rate": 9.958500617204184e-05,
"loss": 0.0301,
"step": 7120
},
{
"grad_norm": 0.32559746503829956,
"learning_rate": 9.958234455322075e-05,
"loss": 0.0267,
"step": 7130
},
{
"grad_norm": 0.4059063792228699,
"learning_rate": 9.957967446213308e-05,
"loss": 0.0299,
"step": 7140
},
{
"grad_norm": 0.32962876558303833,
"learning_rate": 9.957699589923501e-05,
"loss": 0.0265,
"step": 7150
},
{
"grad_norm": 0.2489539533853531,
"learning_rate": 9.957430886498431e-05,
"loss": 0.0266,
"step": 7160
},
{
"grad_norm": 0.26507315039634705,
"learning_rate": 9.957161335984008e-05,
"loss": 0.0251,
"step": 7170
},
{
"grad_norm": 0.31960949301719666,
"learning_rate": 9.956890938426291e-05,
"loss": 0.0297,
"step": 7180
},
{
"grad_norm": 0.39985284209251404,
"learning_rate": 9.956619693871482e-05,
"loss": 0.0291,
"step": 7190
},
{
"grad_norm": 0.3071722388267517,
"learning_rate": 9.956347602365934e-05,
"loss": 0.0299,
"step": 7200
},
{
"grad_norm": 0.2650475800037384,
"learning_rate": 9.956074663956135e-05,
"loss": 0.0296,
"step": 7210
},
{
"grad_norm": 0.380520761013031,
"learning_rate": 9.955800878688726e-05,
"loss": 0.0299,
"step": 7220
},
{
"grad_norm": 0.32119613885879517,
"learning_rate": 9.955526246610489e-05,
"loss": 0.0308,
"step": 7230
},
{
"grad_norm": 0.3378693461418152,
"learning_rate": 9.955250767768349e-05,
"loss": 0.0256,
"step": 7240
},
{
"grad_norm": 0.2946402132511139,
"learning_rate": 9.95497444220938e-05,
"loss": 0.0266,
"step": 7250
},
{
"grad_norm": 0.2820631265640259,
"learning_rate": 9.954697269980797e-05,
"loss": 0.026,
"step": 7260
},
{
"grad_norm": 0.30381670594215393,
"learning_rate": 9.954419251129962e-05,
"loss": 0.0263,
"step": 7270
},
{
"grad_norm": 0.2934808135032654,
"learning_rate": 9.95414038570438e-05,
"loss": 0.0247,
"step": 7280
},
{
"grad_norm": 0.24104171991348267,
"learning_rate": 9.953860673751703e-05,
"loss": 0.0273,
"step": 7290
},
{
"grad_norm": 0.24186210334300995,
"learning_rate": 9.953580115319725e-05,
"loss": 0.0267,
"step": 7300
},
{
"grad_norm": 0.34005916118621826,
"learning_rate": 9.953298710456387e-05,
"loss": 0.026,
"step": 7310
},
{
"grad_norm": 0.2795884609222412,
"learning_rate": 9.953016459209771e-05,
"loss": 0.0273,
"step": 7320
},
{
"grad_norm": 0.30493995547294617,
"learning_rate": 9.952733361628108e-05,
"loss": 0.0312,
"step": 7330
},
{
"grad_norm": 0.29594191908836365,
"learning_rate": 9.952449417759772e-05,
"loss": 0.0284,
"step": 7340
},
{
"grad_norm": 0.4051782190799713,
"learning_rate": 9.952164627653279e-05,
"loss": 0.0305,
"step": 7350
},
{
"grad_norm": 0.2247595191001892,
"learning_rate": 9.951878991357292e-05,
"loss": 0.026,
"step": 7360
},
{
"grad_norm": 0.2571432292461395,
"learning_rate": 9.951592508920622e-05,
"loss": 0.028,
"step": 7370
},
{
"grad_norm": 0.2743799090385437,
"learning_rate": 9.951305180392219e-05,
"loss": 0.0262,
"step": 7380
},
{
"grad_norm": 0.33545973896980286,
"learning_rate": 9.951017005821178e-05,
"loss": 0.0299,
"step": 7390
},
{
"grad_norm": 0.2926540970802307,
"learning_rate": 9.95072798525674e-05,
"loss": 0.0291,
"step": 7400
},
{
"grad_norm": 0.3546326458454132,
"learning_rate": 9.950438118748293e-05,
"loss": 0.0317,
"step": 7410
},
{
"grad_norm": 0.30050361156463623,
"learning_rate": 9.950147406345366e-05,
"loss": 0.0281,
"step": 7420
},
{
"grad_norm": 0.23915919661521912,
"learning_rate": 9.949855848097635e-05,
"loss": 0.0349,
"step": 7430
},
{
"grad_norm": 0.19533124566078186,
"learning_rate": 9.949563444054916e-05,
"loss": 0.0296,
"step": 7440
},
{
"grad_norm": 0.35138800740242004,
"learning_rate": 9.949270194267178e-05,
"loss": 0.0304,
"step": 7450
},
{
"grad_norm": 0.2618047893047333,
"learning_rate": 9.948976098784526e-05,
"loss": 0.0293,
"step": 7460
},
{
"grad_norm": 0.36352285742759705,
"learning_rate": 9.948681157657213e-05,
"loss": 0.0331,
"step": 7470
},
{
"grad_norm": 0.30749431252479553,
"learning_rate": 9.948385370935638e-05,
"loss": 0.0276,
"step": 7480
},
{
"grad_norm": 0.28992339968681335,
"learning_rate": 9.94808873867034e-05,
"loss": 0.0296,
"step": 7490
},
{
"grad_norm": 0.21981099247932434,
"learning_rate": 9.947791260912009e-05,
"loss": 0.0303,
"step": 7500
},
{
"grad_norm": 0.3701888918876648,
"learning_rate": 9.947492937711474e-05,
"loss": 0.0247,
"step": 7510
},
{
"grad_norm": 0.29776903986930847,
"learning_rate": 9.947193769119707e-05,
"loss": 0.0243,
"step": 7520
},
{
"grad_norm": 0.26625514030456543,
"learning_rate": 9.946893755187834e-05,
"loss": 0.0292,
"step": 7530
},
{
"grad_norm": 0.3425240218639374,
"learning_rate": 9.946592895967115e-05,
"loss": 0.0279,
"step": 7540
},
{
"grad_norm": 0.3067252039909363,
"learning_rate": 9.94629119150896e-05,
"loss": 0.0272,
"step": 7550
},
{
"grad_norm": 0.3026585578918457,
"learning_rate": 9.94598864186492e-05,
"loss": 0.0303,
"step": 7560
},
{
"grad_norm": 0.25410696864128113,
"learning_rate": 9.945685247086696e-05,
"loss": 0.0256,
"step": 7570
},
{
"grad_norm": 0.29221490025520325,
"learning_rate": 9.945381007226129e-05,
"loss": 0.0304,
"step": 7580
},
{
"grad_norm": 0.24581211805343628,
"learning_rate": 9.945075922335203e-05,
"loss": 0.0234,
"step": 7590
},
{
"grad_norm": 0.28238850831985474,
"learning_rate": 9.944769992466049e-05,
"loss": 0.0263,
"step": 7600
},
{
"grad_norm": 0.251505047082901,
"learning_rate": 9.944463217670945e-05,
"loss": 0.0291,
"step": 7610
},
{
"grad_norm": 0.2315089851617813,
"learning_rate": 9.944155598002307e-05,
"loss": 0.0281,
"step": 7620
},
{
"grad_norm": 0.26602116227149963,
"learning_rate": 9.943847133512701e-05,
"loss": 0.026,
"step": 7630
},
{
"grad_norm": 0.40367060899734497,
"learning_rate": 9.943537824254834e-05,
"loss": 0.034,
"step": 7640
},
{
"grad_norm": 0.26691651344299316,
"learning_rate": 9.943227670281559e-05,
"loss": 0.0315,
"step": 7650
},
{
"grad_norm": 0.24326331913471222,
"learning_rate": 9.942916671645873e-05,
"loss": 0.0243,
"step": 7660
},
{
"grad_norm": 0.3013291656970978,
"learning_rate": 9.942604828400916e-05,
"loss": 0.0277,
"step": 7670
},
{
"grad_norm": 0.3073980510234833,
"learning_rate": 9.942292140599975e-05,
"loss": 0.0278,
"step": 7680
},
{
"grad_norm": 0.2990018427371979,
"learning_rate": 9.94197860829648e-05,
"loss": 0.0284,
"step": 7690
},
{
"grad_norm": 0.273559033870697,
"learning_rate": 9.941664231544004e-05,
"loss": 0.0274,
"step": 7700
},
{
"grad_norm": 0.2964268624782562,
"learning_rate": 9.941349010396264e-05,
"loss": 0.0271,
"step": 7710
},
{
"grad_norm": 0.28047996759414673,
"learning_rate": 9.941032944907125e-05,
"loss": 0.0224,
"step": 7720
},
{
"grad_norm": 0.2647440433502197,
"learning_rate": 9.940716035130596e-05,
"loss": 0.0277,
"step": 7730
},
{
"grad_norm": 0.3013063073158264,
"learning_rate": 9.940398281120821e-05,
"loss": 0.0257,
"step": 7740
},
{
"grad_norm": 0.2597537338733673,
"learning_rate": 9.940079682932102e-05,
"loss": 0.0283,
"step": 7750
},
{
"grad_norm": 0.21689435839653015,
"learning_rate": 9.939760240618877e-05,
"loss": 0.0271,
"step": 7760
},
{
"grad_norm": 0.26306426525115967,
"learning_rate": 9.939439954235729e-05,
"loss": 0.025,
"step": 7770
},
{
"grad_norm": 0.2899826467037201,
"learning_rate": 9.939118823837387e-05,
"loss": 0.0245,
"step": 7780
},
{
"grad_norm": 0.19139663875102997,
"learning_rate": 9.938796849478725e-05,
"loss": 0.0226,
"step": 7790
},
{
"grad_norm": 0.1981378048658371,
"learning_rate": 9.938474031214755e-05,
"loss": 0.0254,
"step": 7800
},
{
"grad_norm": 0.2784959077835083,
"learning_rate": 9.938150369100643e-05,
"loss": 0.026,
"step": 7810
},
{
"grad_norm": 0.25330811738967896,
"learning_rate": 9.93782586319169e-05,
"loss": 0.0249,
"step": 7820
},
{
"grad_norm": 0.2706470191478729,
"learning_rate": 9.937500513543348e-05,
"loss": 0.0244,
"step": 7830
},
{
"grad_norm": 0.2947620153427124,
"learning_rate": 9.937174320211207e-05,
"loss": 0.0261,
"step": 7840
},
{
"grad_norm": 0.23056454956531525,
"learning_rate": 9.936847283251009e-05,
"loss": 0.0229,
"step": 7850
},
{
"grad_norm": 0.3381933271884918,
"learning_rate": 9.936519402718632e-05,
"loss": 0.0251,
"step": 7860
},
{
"grad_norm": 0.2765756845474243,
"learning_rate": 9.936190678670102e-05,
"loss": 0.0247,
"step": 7870
},
{
"grad_norm": 0.22710005939006805,
"learning_rate": 9.935861111161593e-05,
"loss": 0.0292,
"step": 7880
},
{
"grad_norm": 0.2768805921077728,
"learning_rate": 9.935530700249416e-05,
"loss": 0.023,
"step": 7890
},
{
"grad_norm": 0.207578644156456,
"learning_rate": 9.935199445990028e-05,
"loss": 0.0252,
"step": 7900
},
{
"grad_norm": 0.3019058406352997,
"learning_rate": 9.934867348440033e-05,
"loss": 0.0295,
"step": 7910
},
{
"grad_norm": 0.26228857040405273,
"learning_rate": 9.934534407656176e-05,
"loss": 0.0264,
"step": 7920
},
{
"grad_norm": 0.3489040434360504,
"learning_rate": 9.93420062369535e-05,
"loss": 0.0289,
"step": 7930
},
{
"grad_norm": 0.26589077711105347,
"learning_rate": 9.933865996614589e-05,
"loss": 0.029,
"step": 7940
},
{
"grad_norm": 0.24701766669750214,
"learning_rate": 9.933530526471068e-05,
"loss": 0.0236,
"step": 7950
},
{
"grad_norm": 0.28054970502853394,
"learning_rate": 9.933194213322114e-05,
"loss": 0.0269,
"step": 7960
},
{
"grad_norm": 0.25400447845458984,
"learning_rate": 9.932857057225192e-05,
"loss": 0.0274,
"step": 7970
},
{
"grad_norm": 0.2344646155834198,
"learning_rate": 9.932519058237912e-05,
"loss": 0.0283,
"step": 7980
},
{
"grad_norm": 0.2422647625207901,
"learning_rate": 9.932180216418032e-05,
"loss": 0.0237,
"step": 7990
},
{
"grad_norm": 0.23054273426532745,
"learning_rate": 9.931840531823446e-05,
"loss": 0.0243,
"step": 8000
},
{
"grad_norm": 0.2869581878185272,
"learning_rate": 9.9315000045122e-05,
"loss": 0.0262,
"step": 8010
},
{
"grad_norm": 0.30287492275238037,
"learning_rate": 9.931158634542481e-05,
"loss": 0.028,
"step": 8020
},
{
"grad_norm": 0.22392156720161438,
"learning_rate": 9.930816421972617e-05,
"loss": 0.0259,
"step": 8030
},
{
"grad_norm": 0.3009633421897888,
"learning_rate": 9.930473366861086e-05,
"loss": 0.0276,
"step": 8040
},
{
"grad_norm": 0.2293577939271927,
"learning_rate": 9.930129469266505e-05,
"loss": 0.0242,
"step": 8050
},
{
"grad_norm": 0.20792780816555023,
"learning_rate": 9.929784729247638e-05,
"loss": 0.0238,
"step": 8060
},
{
"grad_norm": 0.30487683415412903,
"learning_rate": 9.929439146863389e-05,
"loss": 0.0269,
"step": 8070
},
{
"grad_norm": 0.2773206830024719,
"learning_rate": 9.92909272217281e-05,
"loss": 0.0289,
"step": 8080
},
{
"grad_norm": 0.2209886908531189,
"learning_rate": 9.928745455235097e-05,
"loss": 0.0239,
"step": 8090
},
{
"grad_norm": 0.2363114356994629,
"learning_rate": 9.928397346109588e-05,
"loss": 0.0258,
"step": 8100
},
{
"grad_norm": 0.23011241853237152,
"learning_rate": 9.928048394855762e-05,
"loss": 0.0287,
"step": 8110
},
{
"grad_norm": 0.25448304414749146,
"learning_rate": 9.92769860153325e-05,
"loss": 0.0278,
"step": 8120
},
{
"grad_norm": 0.26419103145599365,
"learning_rate": 9.927347966201819e-05,
"loss": 0.0247,
"step": 8130
},
{
"grad_norm": 0.30355438590049744,
"learning_rate": 9.926996488921383e-05,
"loss": 0.0285,
"step": 8140
},
{
"grad_norm": 0.30782219767570496,
"learning_rate": 9.926644169752001e-05,
"loss": 0.029,
"step": 8150
},
{
"grad_norm": 0.25190019607543945,
"learning_rate": 9.926291008753875e-05,
"loss": 0.0275,
"step": 8160
},
{
"grad_norm": 0.2714674174785614,
"learning_rate": 9.92593700598735e-05,
"loss": 0.0272,
"step": 8170
},
{
"grad_norm": 0.29583925008773804,
"learning_rate": 9.925582161512915e-05,
"loss": 0.0261,
"step": 8180
},
{
"grad_norm": 0.2607842981815338,
"learning_rate": 9.925226475391205e-05,
"loss": 0.0279,
"step": 8190
},
{
"grad_norm": 0.23056012392044067,
"learning_rate": 9.924869947682993e-05,
"loss": 0.0231,
"step": 8200
},
{
"grad_norm": 0.2710048258304596,
"learning_rate": 9.924512578449204e-05,
"loss": 0.0262,
"step": 8210
},
{
"grad_norm": 0.3241911828517914,
"learning_rate": 9.924154367750901e-05,
"loss": 0.0224,
"step": 8220
},
{
"grad_norm": 0.28296151757240295,
"learning_rate": 9.923795315649293e-05,
"loss": 0.0245,
"step": 8230
},
{
"grad_norm": 0.2435009628534317,
"learning_rate": 9.92343542220573e-05,
"loss": 0.0241,
"step": 8240
},
{
"grad_norm": 0.2360232025384903,
"learning_rate": 9.92307468748171e-05,
"loss": 0.025,
"step": 8250
},
{
"grad_norm": 0.3190790116786957,
"learning_rate": 9.922713111538873e-05,
"loss": 0.027,
"step": 8260
},
{
"grad_norm": 0.20300662517547607,
"learning_rate": 9.922350694439003e-05,
"loss": 0.0284,
"step": 8270
},
{
"grad_norm": 0.2054835557937622,
"learning_rate": 9.921987436244024e-05,
"loss": 0.0276,
"step": 8280
},
{
"grad_norm": 0.2487817406654358,
"learning_rate": 9.921623337016008e-05,
"loss": 0.0255,
"step": 8290
},
{
"grad_norm": 0.24625623226165771,
"learning_rate": 9.921258396817172e-05,
"loss": 0.0239,
"step": 8300
},
{
"grad_norm": 0.3312872350215912,
"learning_rate": 9.920892615709874e-05,
"loss": 0.0272,
"step": 8310
},
{
"grad_norm": 0.24326156079769135,
"learning_rate": 9.920525993756612e-05,
"loss": 0.0272,
"step": 8320
},
{
"grad_norm": 0.26025789976119995,
"learning_rate": 9.920158531020036e-05,
"loss": 0.0247,
"step": 8330
},
{
"grad_norm": 0.2548008859157562,
"learning_rate": 9.919790227562933e-05,
"loss": 0.025,
"step": 8340
},
{
"grad_norm": 0.20518814027309418,
"learning_rate": 9.919421083448237e-05,
"loss": 0.0232,
"step": 8350
},
{
"grad_norm": 0.24752728641033173,
"learning_rate": 9.919051098739022e-05,
"loss": 0.0235,
"step": 8360
},
{
"grad_norm": 0.25243431329727173,
"learning_rate": 9.918680273498514e-05,
"loss": 0.0245,
"step": 8370
},
{
"grad_norm": 0.2901827096939087,
"learning_rate": 9.918308607790072e-05,
"loss": 0.0254,
"step": 8380
},
{
"grad_norm": 0.2677813470363617,
"learning_rate": 9.917936101677205e-05,
"loss": 0.023,
"step": 8390
},
{
"grad_norm": 0.3392478823661804,
"learning_rate": 9.917562755223564e-05,
"loss": 0.0223,
"step": 8400
},
{
"grad_norm": 0.23790103197097778,
"learning_rate": 9.917188568492944e-05,
"loss": 0.0227,
"step": 8410
},
{
"grad_norm": 0.34391242265701294,
"learning_rate": 9.916813541549283e-05,
"loss": 0.026,
"step": 8420
},
{
"grad_norm": 0.2735089957714081,
"learning_rate": 9.916437674456663e-05,
"loss": 0.0211,
"step": 8430
},
{
"grad_norm": 0.19652947783470154,
"learning_rate": 9.916060967279308e-05,
"loss": 0.0243,
"step": 8440
},
{
"grad_norm": 0.25613489747047424,
"learning_rate": 9.91568342008159e-05,
"loss": 0.0267,
"step": 8450
},
{
"grad_norm": 0.2633569538593292,
"learning_rate": 9.915305032928019e-05,
"loss": 0.0254,
"step": 8460
},
{
"grad_norm": 0.23426930606365204,
"learning_rate": 9.914925805883253e-05,
"loss": 0.0259,
"step": 8470
},
{
"grad_norm": 0.2730519771575928,
"learning_rate": 9.914545739012088e-05,
"loss": 0.0246,
"step": 8480
},
{
"grad_norm": 0.21916942298412323,
"learning_rate": 9.91416483237947e-05,
"loss": 0.023,
"step": 8490
},
{
"grad_norm": 0.2629518210887909,
"learning_rate": 9.913783086050485e-05,
"loss": 0.0233,
"step": 8500
},
{
"grad_norm": 0.2678612172603607,
"learning_rate": 9.913400500090364e-05,
"loss": 0.023,
"step": 8510
},
{
"grad_norm": 0.26758983731269836,
"learning_rate": 9.913017074564479e-05,
"loss": 0.0255,
"step": 8520
},
{
"grad_norm": 0.20346862077713013,
"learning_rate": 9.912632809538348e-05,
"loss": 0.0287,
"step": 8530
},
{
"grad_norm": 0.24763819575309753,
"learning_rate": 9.912247705077629e-05,
"loss": 0.0286,
"step": 8540
},
{
"grad_norm": 0.3322838544845581,
"learning_rate": 9.911861761248127e-05,
"loss": 0.0261,
"step": 8550
},
{
"grad_norm": 0.22574086487293243,
"learning_rate": 9.91147497811579e-05,
"loss": 0.0256,
"step": 8560
},
{
"grad_norm": 0.3162856698036194,
"learning_rate": 9.911087355746709e-05,
"loss": 0.0223,
"step": 8570
},
{
"grad_norm": 0.3085162043571472,
"learning_rate": 9.910698894207117e-05,
"loss": 0.0253,
"step": 8580
},
{
"grad_norm": 0.21497994661331177,
"learning_rate": 9.910309593563392e-05,
"loss": 0.023,
"step": 8590
},
{
"grad_norm": 0.19948522746562958,
"learning_rate": 9.909919453882057e-05,
"loss": 0.0223,
"step": 8600
},
{
"grad_norm": 0.2361244112253189,
"learning_rate": 9.90952847522977e-05,
"loss": 0.0241,
"step": 8610
},
{
"grad_norm": 0.29840973019599915,
"learning_rate": 9.909136657673346e-05,
"loss": 0.0258,
"step": 8620
},
{
"grad_norm": 0.25735464692115784,
"learning_rate": 9.908744001279731e-05,
"loss": 0.021,
"step": 8630
},
{
"grad_norm": 0.2598496079444885,
"learning_rate": 9.90835050611602e-05,
"loss": 0.0223,
"step": 8640
},
{
"grad_norm": 0.23943184316158295,
"learning_rate": 9.90795617224945e-05,
"loss": 0.0266,
"step": 8650
},
{
"grad_norm": 0.21757365763187408,
"learning_rate": 9.907560999747405e-05,
"loss": 0.0253,
"step": 8660
},
{
"grad_norm": 0.2191021740436554,
"learning_rate": 9.907164988677408e-05,
"loss": 0.0257,
"step": 8670
},
{
"grad_norm": 0.2412397861480713,
"learning_rate": 9.906768139107124e-05,
"loss": 0.0249,
"step": 8680
},
{
"grad_norm": 0.2552809417247772,
"learning_rate": 9.906370451104367e-05,
"loss": 0.0239,
"step": 8690
},
{
"grad_norm": 0.34299716353416443,
"learning_rate": 9.905971924737088e-05,
"loss": 0.0219,
"step": 8700
},
{
"grad_norm": 0.20640961825847626,
"learning_rate": 9.905572560073387e-05,
"loss": 0.0234,
"step": 8710
},
{
"grad_norm": 0.3168725371360779,
"learning_rate": 9.905172357181501e-05,
"loss": 0.0203,
"step": 8720
},
{
"grad_norm": 0.2035190910100937,
"learning_rate": 9.904771316129817e-05,
"loss": 0.0268,
"step": 8730
},
{
"grad_norm": 0.274885892868042,
"learning_rate": 9.904369436986862e-05,
"loss": 0.0227,
"step": 8740
},
{
"grad_norm": 0.3238915205001831,
"learning_rate": 9.903966719821303e-05,
"loss": 0.0274,
"step": 8750
},
{
"grad_norm": 0.31647101044654846,
"learning_rate": 9.903563164701956e-05,
"loss": 0.0248,
"step": 8760
},
{
"grad_norm": 0.25814852118492126,
"learning_rate": 9.903158771697778e-05,
"loss": 0.0212,
"step": 8770
},
{
"grad_norm": 0.28999999165534973,
"learning_rate": 9.902753540877867e-05,
"loss": 0.0215,
"step": 8780
},
{
"grad_norm": 0.23847296833992004,
"learning_rate": 9.902347472311466e-05,
"loss": 0.0233,
"step": 8790
},
{
"grad_norm": 0.21799112856388092,
"learning_rate": 9.901940566067962e-05,
"loss": 0.0281,
"step": 8800
},
{
"grad_norm": 0.2692367136478424,
"learning_rate": 9.901532822216883e-05,
"loss": 0.0255,
"step": 8810
},
{
"grad_norm": 0.20797976851463318,
"learning_rate": 9.901124240827904e-05,
"loss": 0.0267,
"step": 8820
},
{
"grad_norm": 0.16693612933158875,
"learning_rate": 9.900714821970835e-05,
"loss": 0.0285,
"step": 8830
},
{
"grad_norm": 0.23021844029426575,
"learning_rate": 9.900304565715641e-05,
"loss": 0.0265,
"step": 8840
},
{
"grad_norm": 0.2509411871433258,
"learning_rate": 9.899893472132419e-05,
"loss": 0.0263,
"step": 8850
},
{
"grad_norm": 0.24572598934173584,
"learning_rate": 9.899481541291415e-05,
"loss": 0.0237,
"step": 8860
},
{
"grad_norm": 0.2603510320186615,
"learning_rate": 9.899068773263016e-05,
"loss": 0.0254,
"step": 8870
},
{
"grad_norm": 0.24887840449810028,
"learning_rate": 9.898655168117754e-05,
"loss": 0.0237,
"step": 8880
},
{
"grad_norm": 0.261653333902359,
"learning_rate": 9.898240725926302e-05,
"loss": 0.0253,
"step": 8890
},
{
"grad_norm": 0.304040789604187,
"learning_rate": 9.897825446759478e-05,
"loss": 0.0206,
"step": 8900
},
{
"grad_norm": 0.26770681142807007,
"learning_rate": 9.897409330688241e-05,
"loss": 0.0246,
"step": 8910
},
{
"grad_norm": 0.2028878927230835,
"learning_rate": 9.896992377783692e-05,
"loss": 0.0224,
"step": 8920
},
{
"grad_norm": 0.20525990426540375,
"learning_rate": 9.89657458811708e-05,
"loss": 0.0224,
"step": 8930
},
{
"grad_norm": 0.2241036295890808,
"learning_rate": 9.896155961759792e-05,
"loss": 0.0221,
"step": 8940
},
{
"grad_norm": 0.29430943727493286,
"learning_rate": 9.895736498783361e-05,
"loss": 0.026,
"step": 8950
},
{
"grad_norm": 0.3020664155483246,
"learning_rate": 9.895316199259462e-05,
"loss": 0.0283,
"step": 8960
},
{
"grad_norm": 0.24055476486682892,
"learning_rate": 9.894895063259909e-05,
"loss": 0.0259,
"step": 8970
},
{
"grad_norm": 0.2667890191078186,
"learning_rate": 9.894473090856667e-05,
"loss": 0.0268,
"step": 8980
},
{
"grad_norm": 0.2760610580444336,
"learning_rate": 9.894050282121839e-05,
"loss": 0.0255,
"step": 8990
},
{
"grad_norm": 0.2546814978122711,
"learning_rate": 9.893626637127668e-05,
"loss": 0.0282,
"step": 9000
},
{
"grad_norm": 0.23294395208358765,
"learning_rate": 9.893202155946546e-05,
"loss": 0.0267,
"step": 9010
},
{
"grad_norm": 0.29643920063972473,
"learning_rate": 9.892776838651006e-05,
"loss": 0.021,
"step": 9020
},
{
"grad_norm": 0.2604086399078369,
"learning_rate": 9.892350685313722e-05,
"loss": 0.0266,
"step": 9030
},
{
"grad_norm": 0.2267618030309677,
"learning_rate": 9.891923696007513e-05,
"loss": 0.0242,
"step": 9040
},
{
"grad_norm": 0.22060492634773254,
"learning_rate": 9.891495870805336e-05,
"loss": 0.0212,
"step": 9050
},
{
"grad_norm": 0.230683833360672,
"learning_rate": 9.891067209780298e-05,
"loss": 0.0226,
"step": 9060
},
{
"grad_norm": 0.25970616936683655,
"learning_rate": 9.890637713005646e-05,
"loss": 0.024,
"step": 9070
},
{
"grad_norm": 0.3181430399417877,
"learning_rate": 9.890207380554767e-05,
"loss": 0.0244,
"step": 9080
},
{
"grad_norm": 0.30151650309562683,
"learning_rate": 9.889776212501196e-05,
"loss": 0.0254,
"step": 9090
},
{
"grad_norm": 0.20736676454544067,
"learning_rate": 9.889344208918605e-05,
"loss": 0.0228,
"step": 9100
},
{
"grad_norm": 0.17086367309093475,
"learning_rate": 9.888911369880812e-05,
"loss": 0.0225,
"step": 9110
},
{
"grad_norm": 0.24429209530353546,
"learning_rate": 9.888477695461777e-05,
"loss": 0.0252,
"step": 9120
},
{
"grad_norm": 0.2361510843038559,
"learning_rate": 9.888043185735607e-05,
"loss": 0.0237,
"step": 9130
},
{
"grad_norm": 0.25494277477264404,
"learning_rate": 9.887607840776542e-05,
"loss": 0.0253,
"step": 9140
},
{
"grad_norm": 0.21166761219501495,
"learning_rate": 9.887171660658975e-05,
"loss": 0.023,
"step": 9150
},
{
"grad_norm": 0.2803945541381836,
"learning_rate": 9.886734645457435e-05,
"loss": 0.0227,
"step": 9160
},
{
"grad_norm": 0.22324180603027344,
"learning_rate": 9.886296795246597e-05,
"loss": 0.0263,
"step": 9170
},
{
"grad_norm": 0.24454787373542786,
"learning_rate": 9.885858110101276e-05,
"loss": 0.0224,
"step": 9180
},
{
"grad_norm": 0.2701004147529602,
"learning_rate": 9.885418590096434e-05,
"loss": 0.0238,
"step": 9190
},
{
"grad_norm": 0.26043701171875,
"learning_rate": 9.88497823530717e-05,
"loss": 0.0245,
"step": 9200
},
{
"grad_norm": 0.31409865617752075,
"learning_rate": 9.884537045808732e-05,
"loss": 0.0254,
"step": 9210
},
{
"grad_norm": 0.30593937635421753,
"learning_rate": 9.884095021676502e-05,
"loss": 0.0282,
"step": 9220
},
{
"grad_norm": 0.2996588349342346,
"learning_rate": 9.883652162986017e-05,
"loss": 0.0239,
"step": 9230
},
{
"grad_norm": 0.31036072969436646,
"learning_rate": 9.883208469812943e-05,
"loss": 0.0242,
"step": 9240
},
{
"grad_norm": 0.2229168713092804,
"learning_rate": 9.882763942233098e-05,
"loss": 0.0227,
"step": 9250
},
{
"grad_norm": 0.21954940259456635,
"learning_rate": 9.882318580322441e-05,
"loss": 0.0228,
"step": 9260
},
{
"grad_norm": 0.20078396797180176,
"learning_rate": 9.881872384157067e-05,
"loss": 0.0238,
"step": 9270
},
{
"grad_norm": 0.35384637117385864,
"learning_rate": 9.881425353813225e-05,
"loss": 0.0232,
"step": 9280
},
{
"grad_norm": 0.23953738808631897,
"learning_rate": 9.880977489367296e-05,
"loss": 0.0242,
"step": 9290
},
{
"grad_norm": 0.31235888600349426,
"learning_rate": 9.88052879089581e-05,
"loss": 0.0248,
"step": 9300
},
{
"grad_norm": 0.29150423407554626,
"learning_rate": 9.880079258475434e-05,
"loss": 0.0228,
"step": 9310
},
{
"grad_norm": 0.21773847937583923,
"learning_rate": 9.879628892182985e-05,
"loss": 0.0205,
"step": 9320
},
{
"grad_norm": 0.21790239214897156,
"learning_rate": 9.879177692095416e-05,
"loss": 0.0227,
"step": 9330
},
{
"grad_norm": 0.22045554220676422,
"learning_rate": 9.878725658289825e-05,
"loss": 0.0215,
"step": 9340
},
{
"grad_norm": 0.2567881643772125,
"learning_rate": 9.878272790843454e-05,
"loss": 0.0222,
"step": 9350
},
{
"grad_norm": 0.27277886867523193,
"learning_rate": 9.877819089833682e-05,
"loss": 0.0208,
"step": 9360
},
{
"grad_norm": 0.2389393150806427,
"learning_rate": 9.877364555338038e-05,
"loss": 0.0201,
"step": 9370
},
{
"grad_norm": 0.22118143737316132,
"learning_rate": 9.876909187434186e-05,
"loss": 0.0223,
"step": 9380
},
{
"grad_norm": 0.23778778314590454,
"learning_rate": 9.876452986199939e-05,
"loss": 0.0198,
"step": 9390
},
{
"grad_norm": 0.23677578568458557,
"learning_rate": 9.875995951713248e-05,
"loss": 0.0228,
"step": 9400
},
{
"grad_norm": 0.2681795060634613,
"learning_rate": 9.875538084052207e-05,
"loss": 0.0191,
"step": 9410
},
{
"grad_norm": 0.26710137724876404,
"learning_rate": 9.875079383295053e-05,
"loss": 0.0224,
"step": 9420
},
{
"grad_norm": 0.27031534910202026,
"learning_rate": 9.874619849520167e-05,
"loss": 0.0243,
"step": 9430
},
{
"grad_norm": 0.2640977203845978,
"learning_rate": 9.874159482806069e-05,
"loss": 0.021,
"step": 9440
},
{
"grad_norm": 0.27031025290489197,
"learning_rate": 9.873698283231426e-05,
"loss": 0.0247,
"step": 9450
},
{
"grad_norm": 0.2868097424507141,
"learning_rate": 9.87323625087504e-05,
"loss": 0.0214,
"step": 9460
},
{
"grad_norm": 0.21061162650585175,
"learning_rate": 9.872773385815863e-05,
"loss": 0.0228,
"step": 9470
},
{
"grad_norm": 0.19812670350074768,
"learning_rate": 9.872309688132986e-05,
"loss": 0.0233,
"step": 9480
},
{
"grad_norm": 0.33431848883628845,
"learning_rate": 9.871845157905639e-05,
"loss": 0.0217,
"step": 9490
},
{
"grad_norm": 0.2857918441295624,
"learning_rate": 9.871379795213201e-05,
"loss": 0.0234,
"step": 9500
},
{
"grad_norm": 0.24880020320415497,
"learning_rate": 9.87091360013519e-05,
"loss": 0.0241,
"step": 9510
},
{
"grad_norm": 0.2224482148885727,
"learning_rate": 9.870446572751262e-05,
"loss": 0.022,
"step": 9520
},
{
"grad_norm": 0.2825033962726593,
"learning_rate": 9.869978713141224e-05,
"loss": 0.0186,
"step": 9530
},
{
"grad_norm": 0.2358483225107193,
"learning_rate": 9.869510021385016e-05,
"loss": 0.0211,
"step": 9540
},
{
"grad_norm": 0.21550311148166656,
"learning_rate": 9.869040497562727e-05,
"loss": 0.0267,
"step": 9550
},
{
"grad_norm": 0.29373157024383545,
"learning_rate": 9.868570141754587e-05,
"loss": 0.0225,
"step": 9560
},
{
"grad_norm": 0.32460886240005493,
"learning_rate": 9.868098954040965e-05,
"loss": 0.023,
"step": 9570
},
{
"grad_norm": 0.36684587597846985,
"learning_rate": 9.867626934502374e-05,
"loss": 0.0255,
"step": 9580
},
{
"grad_norm": 0.30278316140174866,
"learning_rate": 9.86715408321947e-05,
"loss": 0.026,
"step": 9590
},
{
"grad_norm": 0.25676724314689636,
"learning_rate": 9.86668040027305e-05,
"loss": 0.0205,
"step": 9600
},
{
"grad_norm": 0.23263034224510193,
"learning_rate": 9.866205885744053e-05,
"loss": 0.024,
"step": 9610
},
{
"grad_norm": 0.2575293481349945,
"learning_rate": 9.865730539713563e-05,
"loss": 0.0199,
"step": 9620
},
{
"grad_norm": 0.27320706844329834,
"learning_rate": 9.8652543622628e-05,
"loss": 0.0201,
"step": 9630
},
{
"grad_norm": 0.22485195100307465,
"learning_rate": 9.864777353473132e-05,
"loss": 0.0253,
"step": 9640
},
{
"grad_norm": 0.23003463447093964,
"learning_rate": 9.864299513426068e-05,
"loss": 0.0244,
"step": 9650
},
{
"grad_norm": 0.2642805278301239,
"learning_rate": 9.863820842203254e-05,
"loss": 0.0246,
"step": 9660
},
{
"grad_norm": 0.2115945667028427,
"learning_rate": 9.863341339886483e-05,
"loss": 0.0192,
"step": 9670
},
{
"grad_norm": 0.18153990805149078,
"learning_rate": 9.86286100655769e-05,
"loss": 0.0191,
"step": 9680
},
{
"grad_norm": 0.2407338172197342,
"learning_rate": 9.862379842298953e-05,
"loss": 0.0235,
"step": 9690
},
{
"grad_norm": 0.2723984122276306,
"learning_rate": 9.861897847192485e-05,
"loss": 0.0245,
"step": 9700
},
{
"grad_norm": 0.23615650832653046,
"learning_rate": 9.86141502132065e-05,
"loss": 0.0221,
"step": 9710
},
{
"grad_norm": 0.2216765582561493,
"learning_rate": 9.860931364765946e-05,
"loss": 0.0222,
"step": 9720
},
{
"grad_norm": 0.215382382273674,
"learning_rate": 9.860446877611021e-05,
"loss": 0.0197,
"step": 9730
},
{
"grad_norm": 0.17680546641349792,
"learning_rate": 9.859961559938655e-05,
"loss": 0.021,
"step": 9740
},
{
"grad_norm": 0.2677190601825714,
"learning_rate": 9.85947541183178e-05,
"loss": 0.0221,
"step": 9750
},
{
"grad_norm": 0.2742480933666229,
"learning_rate": 9.858988433373463e-05,
"loss": 0.0216,
"step": 9760
},
{
"grad_norm": 0.24318669736385345,
"learning_rate": 9.858500624646918e-05,
"loss": 0.0199,
"step": 9770
},
{
"grad_norm": 0.2163631170988083,
"learning_rate": 9.858011985735497e-05,
"loss": 0.0172,
"step": 9780
},
{
"grad_norm": 0.1700691431760788,
"learning_rate": 9.857522516722693e-05,
"loss": 0.019,
"step": 9790
},
{
"grad_norm": 0.2203397899866104,
"learning_rate": 9.857032217692145e-05,
"loss": 0.0205,
"step": 9800
},
{
"grad_norm": 0.1914152354001999,
"learning_rate": 9.856541088727631e-05,
"loss": 0.0277,
"step": 9810
},
{
"grad_norm": 0.35746708512306213,
"learning_rate": 9.856049129913072e-05,
"loss": 0.0248,
"step": 9820
},
{
"grad_norm": 0.2433345466852188,
"learning_rate": 9.85555634133253e-05,
"loss": 0.0247,
"step": 9830
},
{
"grad_norm": 0.22579282522201538,
"learning_rate": 9.855062723070208e-05,
"loss": 0.0256,
"step": 9840
},
{
"grad_norm": 0.2790820002555847,
"learning_rate": 9.854568275210454e-05,
"loss": 0.026,
"step": 9850
},
{
"grad_norm": 0.22638730704784393,
"learning_rate": 9.854072997837754e-05,
"loss": 0.0221,
"step": 9860
},
{
"grad_norm": 0.18334059417247772,
"learning_rate": 9.853576891036737e-05,
"loss": 0.0185,
"step": 9870
},
{
"grad_norm": 0.2652938961982727,
"learning_rate": 9.853079954892177e-05,
"loss": 0.02,
"step": 9880
},
{
"grad_norm": 0.21239930391311646,
"learning_rate": 9.852582189488983e-05,
"loss": 0.0206,
"step": 9890
},
{
"grad_norm": 0.26186928153038025,
"learning_rate": 9.852083594912212e-05,
"loss": 0.0218,
"step": 9900
},
{
"grad_norm": 0.23631031811237335,
"learning_rate": 9.851584171247058e-05,
"loss": 0.0203,
"step": 9910
},
{
"grad_norm": 0.28428515791893005,
"learning_rate": 9.851083918578863e-05,
"loss": 0.0198,
"step": 9920
},
{
"grad_norm": 0.1825859546661377,
"learning_rate": 9.850582836993103e-05,
"loss": 0.022,
"step": 9930
},
{
"grad_norm": 0.18512263894081116,
"learning_rate": 9.850080926575397e-05,
"loss": 0.0178,
"step": 9940
},
{
"grad_norm": 0.27902159094810486,
"learning_rate": 9.849578187411515e-05,
"loss": 0.0242,
"step": 9950
},
{
"grad_norm": 0.20011653006076813,
"learning_rate": 9.849074619587354e-05,
"loss": 0.0205,
"step": 9960
},
{
"grad_norm": 0.22842735052108765,
"learning_rate": 9.848570223188964e-05,
"loss": 0.0212,
"step": 9970
},
{
"grad_norm": 0.18112444877624512,
"learning_rate": 9.848064998302531e-05,
"loss": 0.0164,
"step": 9980
},
{
"grad_norm": 0.25175318121910095,
"learning_rate": 9.847558945014386e-05,
"loss": 0.0208,
"step": 9990
},
{
"grad_norm": 0.22751906514167786,
"learning_rate": 9.847052063410996e-05,
"loss": 0.0216,
"step": 10000
},
{
"grad_norm": 0.2501031756401062,
"learning_rate": 9.846544353578977e-05,
"loss": 0.0187,
"step": 10010
},
{
"grad_norm": 0.2205076813697815,
"learning_rate": 9.846035815605081e-05,
"loss": 0.0211,
"step": 10020
},
{
"grad_norm": 0.29377835988998413,
"learning_rate": 9.845526449576204e-05,
"loss": 0.0214,
"step": 10030
},
{
"grad_norm": 0.21597570180892944,
"learning_rate": 9.845016255579383e-05,
"loss": 0.0195,
"step": 10040
},
{
"grad_norm": 0.19214503467082977,
"learning_rate": 9.844505233701794e-05,
"loss": 0.0187,
"step": 10050
},
{
"grad_norm": 0.25303030014038086,
"learning_rate": 9.843993384030757e-05,
"loss": 0.0195,
"step": 10060
},
{
"grad_norm": 0.21829861402511597,
"learning_rate": 9.843480706653737e-05,
"loss": 0.0187,
"step": 10070
},
{
"grad_norm": 0.2369549721479416,
"learning_rate": 9.84296720165833e-05,
"loss": 0.0255,
"step": 10080
},
{
"grad_norm": 0.2949647903442383,
"learning_rate": 9.842452869132286e-05,
"loss": 0.0203,
"step": 10090
},
{
"grad_norm": 0.2276425063610077,
"learning_rate": 9.841937709163489e-05,
"loss": 0.0187,
"step": 10100
},
{
"grad_norm": 0.18210828304290771,
"learning_rate": 9.841421721839962e-05,
"loss": 0.0169,
"step": 10110
},
{
"grad_norm": 0.26168766617774963,
"learning_rate": 9.840904907249879e-05,
"loss": 0.0205,
"step": 10120
},
{
"grad_norm": 0.2785622775554657,
"learning_rate": 9.840387265481545e-05,
"loss": 0.0216,
"step": 10130
},
{
"grad_norm": 0.2521674931049347,
"learning_rate": 9.839868796623411e-05,
"loss": 0.0181,
"step": 10140
},
{
"grad_norm": 0.21578259766101837,
"learning_rate": 9.839349500764072e-05,
"loss": 0.0181,
"step": 10150
},
{
"grad_norm": 0.2497054636478424,
"learning_rate": 9.83882937799226e-05,
"loss": 0.0213,
"step": 10160
},
{
"grad_norm": 0.3409843444824219,
"learning_rate": 9.838308428396849e-05,
"loss": 0.0218,
"step": 10170
},
{
"grad_norm": 0.29681217670440674,
"learning_rate": 9.837786652066854e-05,
"loss": 0.0229,
"step": 10180
},
{
"grad_norm": 0.24830268323421478,
"learning_rate": 9.837264049091437e-05,
"loss": 0.0209,
"step": 10190
},
{
"grad_norm": 0.21985377371311188,
"learning_rate": 9.836740619559893e-05,
"loss": 0.02,
"step": 10200
},
{
"grad_norm": 0.2771212160587311,
"learning_rate": 9.836216363561659e-05,
"loss": 0.024,
"step": 10210
},
{
"grad_norm": 0.2835344970226288,
"learning_rate": 9.835691281186322e-05,
"loss": 0.0263,
"step": 10220
},
{
"grad_norm": 0.31411153078079224,
"learning_rate": 9.8351653725236e-05,
"loss": 0.0208,
"step": 10230
},
{
"grad_norm": 0.1784379929304123,
"learning_rate": 9.83463863766336e-05,
"loss": 0.0189,
"step": 10240
},
{
"grad_norm": 0.19949562847614288,
"learning_rate": 9.834111076695602e-05,
"loss": 0.0179,
"step": 10250
},
{
"grad_norm": 0.27524039149284363,
"learning_rate": 9.833582689710477e-05,
"loss": 0.0237,
"step": 10260
},
{
"grad_norm": 0.19682864844799042,
"learning_rate": 9.833053476798268e-05,
"loss": 0.0219,
"step": 10270
},
{
"grad_norm": 0.29202038049697876,
"learning_rate": 9.832523438049404e-05,
"loss": 0.0231,
"step": 10280
},
{
"grad_norm": 0.21768589317798615,
"learning_rate": 9.831992573554454e-05,
"loss": 0.018,
"step": 10290
},
{
"grad_norm": 0.1866806596517563,
"learning_rate": 9.831460883404128e-05,
"loss": 0.0191,
"step": 10300
},
{
"grad_norm": 0.27372342348098755,
"learning_rate": 9.830928367689278e-05,
"loss": 0.0188,
"step": 10310
},
{
"grad_norm": 0.2904968559741974,
"learning_rate": 9.830395026500896e-05,
"loss": 0.0204,
"step": 10320
},
{
"grad_norm": 0.26565808057785034,
"learning_rate": 9.829860859930115e-05,
"loss": 0.0219,
"step": 10330
},
{
"grad_norm": 0.18882012367248535,
"learning_rate": 9.829325868068212e-05,
"loss": 0.0192,
"step": 10340
},
{
"grad_norm": 0.2175988107919693,
"learning_rate": 9.8287900510066e-05,
"loss": 0.0252,
"step": 10350
},
{
"grad_norm": 0.26010170578956604,
"learning_rate": 9.828253408836834e-05,
"loss": 0.0241,
"step": 10360
},
{
"grad_norm": 0.27464449405670166,
"learning_rate": 9.827715941650615e-05,
"loss": 0.0203,
"step": 10370
},
{
"grad_norm": 0.22996774315834045,
"learning_rate": 9.82717764953978e-05,
"loss": 0.0177,
"step": 10380
},
{
"grad_norm": 0.16900886595249176,
"learning_rate": 9.826638532596308e-05,
"loss": 0.0182,
"step": 10390
},
{
"grad_norm": 0.2104736566543579,
"learning_rate": 9.82609859091232e-05,
"loss": 0.0186,
"step": 10400
},
{
"grad_norm": 0.20910122990608215,
"learning_rate": 9.825557824580076e-05,
"loss": 0.0189,
"step": 10410
},
{
"grad_norm": 0.22928059101104736,
"learning_rate": 9.82501623369198e-05,
"loss": 0.0183,
"step": 10420
},
{
"grad_norm": 0.3247082233428955,
"learning_rate": 9.824473818340574e-05,
"loss": 0.0212,
"step": 10430
},
{
"grad_norm": 0.1974494755268097,
"learning_rate": 9.823930578618541e-05,
"loss": 0.0196,
"step": 10440
},
{
"grad_norm": 0.2238084077835083,
"learning_rate": 9.823386514618709e-05,
"loss": 0.0186,
"step": 10450
},
{
"grad_norm": 0.17666709423065186,
"learning_rate": 9.82284162643404e-05,
"loss": 0.0191,
"step": 10460
},
{
"grad_norm": 0.1988755762577057,
"learning_rate": 9.822295914157642e-05,
"loss": 0.0216,
"step": 10470
},
{
"grad_norm": 0.19286410510540009,
"learning_rate": 9.821749377882763e-05,
"loss": 0.019,
"step": 10480
},
{
"grad_norm": 0.20021604001522064,
"learning_rate": 9.821202017702791e-05,
"loss": 0.0191,
"step": 10490
},
{
"grad_norm": 0.3215916156768799,
"learning_rate": 9.820653833711253e-05,
"loss": 0.0195,
"step": 10500
},
{
"grad_norm": 0.19581910967826843,
"learning_rate": 9.820104826001822e-05,
"loss": 0.0205,
"step": 10510
},
{
"grad_norm": 0.253947377204895,
"learning_rate": 9.819554994668305e-05,
"loss": 0.0213,
"step": 10520
},
{
"grad_norm": 0.18200746178627014,
"learning_rate": 9.819004339804654e-05,
"loss": 0.0209,
"step": 10530
},
{
"grad_norm": 0.27813297510147095,
"learning_rate": 9.818452861504961e-05,
"loss": 0.0207,
"step": 10540
},
{
"grad_norm": 0.24935060739517212,
"learning_rate": 9.81790055986346e-05,
"loss": 0.0223,
"step": 10550
},
{
"grad_norm": 0.20500843226909637,
"learning_rate": 9.817347434974523e-05,
"loss": 0.0194,
"step": 10560
},
{
"grad_norm": 0.2651246190071106,
"learning_rate": 9.816793486932664e-05,
"loss": 0.0209,
"step": 10570
},
{
"grad_norm": 0.17405372858047485,
"learning_rate": 9.816238715832538e-05,
"loss": 0.017,
"step": 10580
},
{
"grad_norm": 0.23822303116321564,
"learning_rate": 9.815683121768939e-05,
"loss": 0.0229,
"step": 10590
},
{
"grad_norm": 0.24041873216629028,
"learning_rate": 9.815126704836804e-05,
"loss": 0.0223,
"step": 10600
},
{
"grad_norm": 0.20693781971931458,
"learning_rate": 9.81456946513121e-05,
"loss": 0.0222,
"step": 10610
},
{
"grad_norm": 0.23794129490852356,
"learning_rate": 9.814011402747373e-05,
"loss": 0.0245,
"step": 10620
},
{
"grad_norm": 0.27533841133117676,
"learning_rate": 9.813452517780651e-05,
"loss": 0.0198,
"step": 10630
},
{
"grad_norm": 0.24632394313812256,
"learning_rate": 9.81289281032654e-05,
"loss": 0.0241,
"step": 10640
},
{
"grad_norm": 0.1977643370628357,
"learning_rate": 9.812332280480683e-05,
"loss": 0.0189,
"step": 10650
},
{
"grad_norm": 0.23927035927772522,
"learning_rate": 9.811770928338854e-05,
"loss": 0.0186,
"step": 10660
},
{
"grad_norm": 0.16118136048316956,
"learning_rate": 9.811208753996979e-05,
"loss": 0.0177,
"step": 10670
},
{
"grad_norm": 0.2045382857322693,
"learning_rate": 9.810645757551113e-05,
"loss": 0.0166,
"step": 10680
},
{
"grad_norm": 0.19995464384555817,
"learning_rate": 9.810081939097459e-05,
"loss": 0.0185,
"step": 10690
},
{
"grad_norm": 0.15094652771949768,
"learning_rate": 9.809517298732356e-05,
"loss": 0.0194,
"step": 10700
},
{
"grad_norm": 0.1950456202030182,
"learning_rate": 9.80895183655229e-05,
"loss": 0.0168,
"step": 10710
},
{
"grad_norm": 0.1724327653646469,
"learning_rate": 9.808385552653877e-05,
"loss": 0.0215,
"step": 10720
},
{
"grad_norm": 0.2382352352142334,
"learning_rate": 9.807818447133886e-05,
"loss": 0.0209,
"step": 10730
},
{
"grad_norm": 0.2521894872188568,
"learning_rate": 9.807250520089215e-05,
"loss": 0.0178,
"step": 10740
},
{
"grad_norm": 0.24485500156879425,
"learning_rate": 9.806681771616908e-05,
"loss": 0.018,
"step": 10750
},
{
"grad_norm": 0.21275334060192108,
"learning_rate": 9.80611220181415e-05,
"loss": 0.0207,
"step": 10760
},
{
"grad_norm": 0.17692400515079498,
"learning_rate": 9.805541810778264e-05,
"loss": 0.0212,
"step": 10770
},
{
"grad_norm": 0.2477423995733261,
"learning_rate": 9.804970598606716e-05,
"loss": 0.0188,
"step": 10780
},
{
"grad_norm": 0.288703054189682,
"learning_rate": 9.804398565397106e-05,
"loss": 0.0233,
"step": 10790
},
{
"grad_norm": 0.28948503732681274,
"learning_rate": 9.803825711247183e-05,
"loss": 0.0206,
"step": 10800
},
{
"grad_norm": 0.2287627011537552,
"learning_rate": 9.803252036254831e-05,
"loss": 0.0162,
"step": 10810
},
{
"grad_norm": 0.24518610537052155,
"learning_rate": 9.802677540518076e-05,
"loss": 0.021,
"step": 10820
},
{
"grad_norm": 0.1741969734430313,
"learning_rate": 9.802102224135081e-05,
"loss": 0.0203,
"step": 10830
},
{
"grad_norm": 0.20629048347473145,
"learning_rate": 9.801526087204155e-05,
"loss": 0.0184,
"step": 10840
},
{
"grad_norm": 0.21048341691493988,
"learning_rate": 9.800949129823743e-05,
"loss": 0.0166,
"step": 10850
},
{
"grad_norm": 0.16457433998584747,
"learning_rate": 9.80037135209243e-05,
"loss": 0.0206,
"step": 10860
},
{
"grad_norm": 0.20526692271232605,
"learning_rate": 9.799792754108946e-05,
"loss": 0.02,
"step": 10870
},
{
"grad_norm": 0.29450953006744385,
"learning_rate": 9.799213335972152e-05,
"loss": 0.0192,
"step": 10880
},
{
"grad_norm": 0.20133313536643982,
"learning_rate": 9.798633097781058e-05,
"loss": 0.0226,
"step": 10890
},
{
"grad_norm": 0.24203220009803772,
"learning_rate": 9.79805203963481e-05,
"loss": 0.0182,
"step": 10900
},
{
"grad_norm": 0.19610702991485596,
"learning_rate": 9.797470161632697e-05,
"loss": 0.0197,
"step": 10910
},
{
"grad_norm": 0.18819737434387207,
"learning_rate": 9.796887463874145e-05,
"loss": 0.0215,
"step": 10920
},
{
"grad_norm": 0.2768223285675049,
"learning_rate": 9.796303946458718e-05,
"loss": 0.0185,
"step": 10930
},
{
"grad_norm": 0.2169492244720459,
"learning_rate": 9.795719609486127e-05,
"loss": 0.0175,
"step": 10940
},
{
"grad_norm": 0.21109357476234436,
"learning_rate": 9.795134453056219e-05,
"loss": 0.0152,
"step": 10950
},
{
"grad_norm": 0.26809945702552795,
"learning_rate": 9.794548477268979e-05,
"loss": 0.0163,
"step": 10960
},
{
"grad_norm": 0.24783478677272797,
"learning_rate": 9.793961682224537e-05,
"loss": 0.0163,
"step": 10970
},
{
"grad_norm": 0.2160470336675644,
"learning_rate": 9.793374068023156e-05,
"loss": 0.0203,
"step": 10980
},
{
"grad_norm": 0.25220364332199097,
"learning_rate": 9.792785634765247e-05,
"loss": 0.0209,
"step": 10990
},
{
"grad_norm": 0.24036410450935364,
"learning_rate": 9.792196382551357e-05,
"loss": 0.0208,
"step": 11000
},
{
"grad_norm": 0.2242831289768219,
"learning_rate": 9.791606311482171e-05,
"loss": 0.018,
"step": 11010
},
{
"grad_norm": 0.25045308470726013,
"learning_rate": 9.791015421658518e-05,
"loss": 0.0186,
"step": 11020
},
{
"grad_norm": 0.21759934723377228,
"learning_rate": 9.790423713181362e-05,
"loss": 0.0199,
"step": 11030
},
{
"grad_norm": 0.22301211953163147,
"learning_rate": 9.789831186151814e-05,
"loss": 0.0174,
"step": 11040
},
{
"grad_norm": 0.24609437584877014,
"learning_rate": 9.789237840671118e-05,
"loss": 0.0198,
"step": 11050
},
{
"grad_norm": 0.23131372034549713,
"learning_rate": 9.78864367684066e-05,
"loss": 0.0181,
"step": 11060
},
{
"grad_norm": 0.18929408490657806,
"learning_rate": 9.788048694761968e-05,
"loss": 0.0182,
"step": 11070
},
{
"grad_norm": 0.21103902161121368,
"learning_rate": 9.787452894536709e-05,
"loss": 0.0189,
"step": 11080
},
{
"grad_norm": 0.22037075459957123,
"learning_rate": 9.786856276266685e-05,
"loss": 0.0175,
"step": 11090
},
{
"grad_norm": 0.2759108245372772,
"learning_rate": 9.786258840053845e-05,
"loss": 0.0199,
"step": 11100
},
{
"grad_norm": 0.24369940161705017,
"learning_rate": 9.785660586000273e-05,
"loss": 0.0178,
"step": 11110
},
{
"grad_norm": 0.2781594395637512,
"learning_rate": 9.785061514208196e-05,
"loss": 0.0181,
"step": 11120
},
{
"grad_norm": 0.26724958419799805,
"learning_rate": 9.784461624779977e-05,
"loss": 0.0191,
"step": 11130
},
{
"grad_norm": 0.2424626648426056,
"learning_rate": 9.783860917818123e-05,
"loss": 0.02,
"step": 11140
},
{
"grad_norm": 0.20541176199913025,
"learning_rate": 9.783259393425277e-05,
"loss": 0.0212,
"step": 11150
},
{
"grad_norm": 0.26179322600364685,
"learning_rate": 9.782657051704221e-05,
"loss": 0.0145,
"step": 11160
},
{
"grad_norm": 0.22024081647396088,
"learning_rate": 9.782053892757883e-05,
"loss": 0.0163,
"step": 11170
},
{
"grad_norm": 0.2568201720714569,
"learning_rate": 9.781449916689324e-05,
"loss": 0.021,
"step": 11180
},
{
"grad_norm": 0.24892078340053558,
"learning_rate": 9.780845123601746e-05,
"loss": 0.0183,
"step": 11190
},
{
"grad_norm": 0.18937984108924866,
"learning_rate": 9.780239513598492e-05,
"loss": 0.019,
"step": 11200
},
{
"grad_norm": 0.19871971011161804,
"learning_rate": 9.779633086783047e-05,
"loss": 0.0223,
"step": 11210
},
{
"grad_norm": 0.23312966525554657,
"learning_rate": 9.779025843259031e-05,
"loss": 0.0172,
"step": 11220
},
{
"grad_norm": 0.23910565674304962,
"learning_rate": 9.778417783130204e-05,
"loss": 0.0179,
"step": 11230
},
{
"grad_norm": 0.2221440076828003,
"learning_rate": 9.777808906500468e-05,
"loss": 0.0177,
"step": 11240
},
{
"grad_norm": 0.25122180581092834,
"learning_rate": 9.777199213473862e-05,
"loss": 0.0198,
"step": 11250
},
{
"grad_norm": 0.20925727486610413,
"learning_rate": 9.77658870415457e-05,
"loss": 0.0226,
"step": 11260
},
{
"grad_norm": 0.250315397977829,
"learning_rate": 9.775977378646906e-05,
"loss": 0.0245,
"step": 11270
},
{
"grad_norm": 0.15758101642131805,
"learning_rate": 9.775365237055331e-05,
"loss": 0.0177,
"step": 11280
},
{
"grad_norm": 0.27685776352882385,
"learning_rate": 9.774752279484445e-05,
"loss": 0.0182,
"step": 11290
},
{
"grad_norm": 0.19731733202934265,
"learning_rate": 9.774138506038984e-05,
"loss": 0.0192,
"step": 11300
},
{
"grad_norm": 0.20881041884422302,
"learning_rate": 9.773523916823826e-05,
"loss": 0.0217,
"step": 11310
},
{
"grad_norm": 0.20949961245059967,
"learning_rate": 9.772908511943986e-05,
"loss": 0.0168,
"step": 11320
},
{
"grad_norm": 0.15379177033901215,
"learning_rate": 9.77229229150462e-05,
"loss": 0.0187,
"step": 11330
},
{
"grad_norm": 0.19608713686466217,
"learning_rate": 9.771675255611024e-05,
"loss": 0.0217,
"step": 11340
},
{
"grad_norm": 0.19298717379570007,
"learning_rate": 9.771057404368632e-05,
"loss": 0.0215,
"step": 11350
},
{
"grad_norm": 0.2094731628894806,
"learning_rate": 9.770438737883018e-05,
"loss": 0.0183,
"step": 11360
},
{
"grad_norm": 0.26386967301368713,
"learning_rate": 9.769819256259898e-05,
"loss": 0.0181,
"step": 11370
},
{
"grad_norm": 0.31744125485420227,
"learning_rate": 9.769198959605119e-05,
"loss": 0.0212,
"step": 11380
},
{
"grad_norm": 0.234335258603096,
"learning_rate": 9.768577848024678e-05,
"loss": 0.0205,
"step": 11390
},
{
"grad_norm": 0.21479220688343048,
"learning_rate": 9.767955921624702e-05,
"loss": 0.0196,
"step": 11400
},
{
"grad_norm": 0.3026210069656372,
"learning_rate": 9.767333180511465e-05,
"loss": 0.0209,
"step": 11410
},
{
"grad_norm": 0.246705561876297,
"learning_rate": 9.766709624791373e-05,
"loss": 0.023,
"step": 11420
},
{
"grad_norm": 0.22756491601467133,
"learning_rate": 9.766085254570975e-05,
"loss": 0.0223,
"step": 11430
},
{
"grad_norm": 0.31085556745529175,
"learning_rate": 9.76546006995696e-05,
"loss": 0.022,
"step": 11440
},
{
"grad_norm": 0.2894473373889923,
"learning_rate": 9.764834071056155e-05,
"loss": 0.0185,
"step": 11450
},
{
"grad_norm": 0.18649722635746002,
"learning_rate": 9.764207257975526e-05,
"loss": 0.0204,
"step": 11460
},
{
"grad_norm": 0.2198372781276703,
"learning_rate": 9.763579630822179e-05,
"loss": 0.0205,
"step": 11470
},
{
"grad_norm": 0.21714451909065247,
"learning_rate": 9.762951189703356e-05,
"loss": 0.02,
"step": 11480
},
{
"grad_norm": 0.16019868850708008,
"learning_rate": 9.762321934726442e-05,
"loss": 0.0179,
"step": 11490
},
{
"grad_norm": 0.18601438403129578,
"learning_rate": 9.761691865998959e-05,
"loss": 0.0227,
"step": 11500
},
{
"grad_norm": 0.22601468861103058,
"learning_rate": 9.76106098362857e-05,
"loss": 0.0188,
"step": 11510
},
{
"grad_norm": 0.21661071479320526,
"learning_rate": 9.760429287723072e-05,
"loss": 0.0216,
"step": 11520
},
{
"grad_norm": 0.17467108368873596,
"learning_rate": 9.759796778390406e-05,
"loss": 0.0191,
"step": 11530
},
{
"grad_norm": 0.2121114879846573,
"learning_rate": 9.759163455738653e-05,
"loss": 0.0201,
"step": 11540
},
{
"grad_norm": 0.2389015406370163,
"learning_rate": 9.75852931987603e-05,
"loss": 0.0157,
"step": 11550
},
{
"grad_norm": 0.2745116651058197,
"learning_rate": 9.757894370910891e-05,
"loss": 0.0218,
"step": 11560
},
{
"grad_norm": 0.2628045082092285,
"learning_rate": 9.757258608951733e-05,
"loss": 0.0217,
"step": 11570
},
{
"grad_norm": 0.2073826789855957,
"learning_rate": 9.75662203410719e-05,
"loss": 0.0173,
"step": 11580
},
{
"grad_norm": 0.23417799174785614,
"learning_rate": 9.755984646486034e-05,
"loss": 0.0168,
"step": 11590
},
{
"grad_norm": 0.14510011672973633,
"learning_rate": 9.75534644619718e-05,
"loss": 0.0151,
"step": 11600
},
{
"grad_norm": 0.2490101009607315,
"learning_rate": 9.754707433349676e-05,
"loss": 0.0186,
"step": 11610
},
{
"grad_norm": 0.21024779975414276,
"learning_rate": 9.754067608052715e-05,
"loss": 0.0216,
"step": 11620
},
{
"grad_norm": 0.2229691743850708,
"learning_rate": 9.753426970415622e-05,
"loss": 0.0173,
"step": 11630
},
{
"grad_norm": 0.2373073399066925,
"learning_rate": 9.752785520547868e-05,
"loss": 0.0192,
"step": 11640
},
{
"grad_norm": 0.2037600576877594,
"learning_rate": 9.752143258559056e-05,
"loss": 0.0168,
"step": 11650
},
{
"grad_norm": 0.2464536875486374,
"learning_rate": 9.751500184558933e-05,
"loss": 0.0177,
"step": 11660
},
{
"grad_norm": 0.22050020098686218,
"learning_rate": 9.750856298657383e-05,
"loss": 0.0156,
"step": 11670
},
{
"grad_norm": 0.19472891092300415,
"learning_rate": 9.750211600964428e-05,
"loss": 0.0152,
"step": 11680
},
{
"grad_norm": 0.33395636081695557,
"learning_rate": 9.749566091590226e-05,
"loss": 0.0207,
"step": 11690
},
{
"grad_norm": 0.2324424535036087,
"learning_rate": 9.748919770645083e-05,
"loss": 0.0212,
"step": 11700
},
{
"grad_norm": 0.16833680868148804,
"learning_rate": 9.748272638239432e-05,
"loss": 0.0197,
"step": 11710
},
{
"grad_norm": 0.20751014351844788,
"learning_rate": 9.747624694483855e-05,
"loss": 0.0208,
"step": 11720
},
{
"grad_norm": 0.1955762505531311,
"learning_rate": 9.746975939489065e-05,
"loss": 0.0158,
"step": 11730
},
{
"grad_norm": 0.15604378283023834,
"learning_rate": 9.746326373365918e-05,
"loss": 0.0164,
"step": 11740
},
{
"grad_norm": 0.22884982824325562,
"learning_rate": 9.745675996225403e-05,
"loss": 0.0165,
"step": 11750
},
{
"grad_norm": 0.24646146595478058,
"learning_rate": 9.745024808178657e-05,
"loss": 0.0184,
"step": 11760
},
{
"grad_norm": 0.269307017326355,
"learning_rate": 9.744372809336947e-05,
"loss": 0.018,
"step": 11770
},
{
"grad_norm": 0.17529548704624176,
"learning_rate": 9.743719999811682e-05,
"loss": 0.0225,
"step": 11780
},
{
"grad_norm": 0.17929251492023468,
"learning_rate": 9.743066379714412e-05,
"loss": 0.0162,
"step": 11790
},
{
"grad_norm": 0.17738574743270874,
"learning_rate": 9.74241194915682e-05,
"loss": 0.0236,
"step": 11800
},
{
"grad_norm": 0.21286842226982117,
"learning_rate": 9.741756708250731e-05,
"loss": 0.0166,
"step": 11810
},
{
"grad_norm": 0.2095780074596405,
"learning_rate": 9.741100657108109e-05,
"loss": 0.0216,
"step": 11820
},
{
"grad_norm": 0.21511229872703552,
"learning_rate": 9.740443795841054e-05,
"loss": 0.0194,
"step": 11830
},
{
"grad_norm": 0.20982207357883453,
"learning_rate": 9.739786124561805e-05,
"loss": 0.0178,
"step": 11840
},
{
"grad_norm": 0.14046761393547058,
"learning_rate": 9.73912764338274e-05,
"loss": 0.0201,
"step": 11850
},
{
"grad_norm": 0.21997249126434326,
"learning_rate": 9.738468352416377e-05,
"loss": 0.0205,
"step": 11860
},
{
"grad_norm": 0.23343665897846222,
"learning_rate": 9.737808251775369e-05,
"loss": 0.0204,
"step": 11870
},
{
"grad_norm": 0.22911225259304047,
"learning_rate": 9.737147341572512e-05,
"loss": 0.0175,
"step": 11880
},
{
"grad_norm": 0.21971078217029572,
"learning_rate": 9.736485621920735e-05,
"loss": 0.0182,
"step": 11890
},
{
"grad_norm": 0.2601747512817383,
"learning_rate": 9.735823092933108e-05,
"loss": 0.0257,
"step": 11900
},
{
"grad_norm": 0.17831431329250336,
"learning_rate": 9.735159754722838e-05,
"loss": 0.0178,
"step": 11910
},
{
"grad_norm": 0.16906364262104034,
"learning_rate": 9.734495607403275e-05,
"loss": 0.0184,
"step": 11920
},
{
"grad_norm": 0.15772491693496704,
"learning_rate": 9.733830651087901e-05,
"loss": 0.019,
"step": 11930
},
{
"grad_norm": 0.19316568970680237,
"learning_rate": 9.733164885890338e-05,
"loss": 0.0152,
"step": 11940
},
{
"grad_norm": 0.20866607129573822,
"learning_rate": 9.732498311924349e-05,
"loss": 0.021,
"step": 11950
},
{
"grad_norm": 0.20008015632629395,
"learning_rate": 9.731830929303833e-05,
"loss": 0.0189,
"step": 11960
},
{
"grad_norm": 0.25079259276390076,
"learning_rate": 9.731162738142827e-05,
"loss": 0.0193,
"step": 11970
},
{
"grad_norm": 0.21481575071811676,
"learning_rate": 9.730493738555506e-05,
"loss": 0.0235,
"step": 11980
},
{
"grad_norm": 0.2959555983543396,
"learning_rate": 9.729823930656186e-05,
"loss": 0.0192,
"step": 11990
},
{
"grad_norm": 0.2065613716840744,
"learning_rate": 9.729153314559316e-05,
"loss": 0.0188,
"step": 12000
},
{
"grad_norm": 0.20867669582366943,
"learning_rate": 9.728481890379486e-05,
"loss": 0.0213,
"step": 12010
},
{
"grad_norm": 0.25730040669441223,
"learning_rate": 9.727809658231428e-05,
"loss": 0.0205,
"step": 12020
},
{
"grad_norm": 0.29852187633514404,
"learning_rate": 9.727136618230003e-05,
"loss": 0.0206,
"step": 12030
},
{
"grad_norm": 0.24638858437538147,
"learning_rate": 9.726462770490219e-05,
"loss": 0.0209,
"step": 12040
},
{
"grad_norm": 0.14810189604759216,
"learning_rate": 9.725788115127214e-05,
"loss": 0.0175,
"step": 12050
},
{
"grad_norm": 0.2689899504184723,
"learning_rate": 9.725112652256274e-05,
"loss": 0.0188,
"step": 12060
},
{
"grad_norm": 0.2269291877746582,
"learning_rate": 9.724436381992812e-05,
"loss": 0.018,
"step": 12070
},
{
"grad_norm": 0.23517954349517822,
"learning_rate": 9.723759304452387e-05,
"loss": 0.0185,
"step": 12080
},
{
"grad_norm": 0.2234342098236084,
"learning_rate": 9.72308141975069e-05,
"loss": 0.0168,
"step": 12090
},
{
"grad_norm": 0.25882044434547424,
"learning_rate": 9.722402728003557e-05,
"loss": 0.0182,
"step": 12100
},
{
"grad_norm": 0.1912042200565338,
"learning_rate": 9.721723229326953e-05,
"loss": 0.0177,
"step": 12110
},
{
"grad_norm": 0.20937858521938324,
"learning_rate": 9.721042923836992e-05,
"loss": 0.02,
"step": 12120
},
{
"grad_norm": 0.2105017900466919,
"learning_rate": 9.720361811649914e-05,
"loss": 0.0185,
"step": 12130
},
{
"grad_norm": 0.19680309295654297,
"learning_rate": 9.719679892882106e-05,
"loss": 0.0148,
"step": 12140
},
{
"grad_norm": 0.22222547233104706,
"learning_rate": 9.718997167650085e-05,
"loss": 0.0174,
"step": 12150
},
{
"grad_norm": 0.21008817851543427,
"learning_rate": 9.718313636070515e-05,
"loss": 0.0198,
"step": 12160
},
{
"grad_norm": 0.2227344512939453,
"learning_rate": 9.717629298260192e-05,
"loss": 0.0199,
"step": 12170
},
{
"grad_norm": 0.18639013171195984,
"learning_rate": 9.716944154336047e-05,
"loss": 0.0164,
"step": 12180
},
{
"grad_norm": 0.15603755414485931,
"learning_rate": 9.716258204415157e-05,
"loss": 0.0172,
"step": 12190
},
{
"grad_norm": 0.22610436379909515,
"learning_rate": 9.715571448614728e-05,
"loss": 0.0202,
"step": 12200
},
{
"grad_norm": 0.2384321540594101,
"learning_rate": 9.71488388705211e-05,
"loss": 0.0173,
"step": 12210
},
{
"grad_norm": 0.21900944411754608,
"learning_rate": 9.714195519844788e-05,
"loss": 0.018,
"step": 12220
},
{
"grad_norm": 0.23115262389183044,
"learning_rate": 9.713506347110386e-05,
"loss": 0.016,
"step": 12230
},
{
"grad_norm": 0.21534818410873413,
"learning_rate": 9.712816368966662e-05,
"loss": 0.0179,
"step": 12240
},
{
"grad_norm": 0.24454522132873535,
"learning_rate": 9.712125585531517e-05,
"loss": 0.0195,
"step": 12250
},
{
"grad_norm": 0.20938843488693237,
"learning_rate": 9.711433996922988e-05,
"loss": 0.0189,
"step": 12260
},
{
"grad_norm": 0.2134719341993332,
"learning_rate": 9.710741603259245e-05,
"loss": 0.022,
"step": 12270
},
{
"grad_norm": 0.16311052441596985,
"learning_rate": 9.710048404658603e-05,
"loss": 0.0195,
"step": 12280
},
{
"grad_norm": 0.1718016266822815,
"learning_rate": 9.709354401239508e-05,
"loss": 0.017,
"step": 12290
},
{
"grad_norm": 0.19942371547222137,
"learning_rate": 9.708659593120546e-05,
"loss": 0.0181,
"step": 12300
},
{
"grad_norm": 0.2483605444431305,
"learning_rate": 9.707963980420443e-05,
"loss": 0.0213,
"step": 12310
},
{
"grad_norm": 0.26829585433006287,
"learning_rate": 9.707267563258058e-05,
"loss": 0.0157,
"step": 12320
},
{
"grad_norm": 0.19812586903572083,
"learning_rate": 9.70657034175239e-05,
"loss": 0.0186,
"step": 12330
},
{
"grad_norm": 0.2553393840789795,
"learning_rate": 9.705872316022577e-05,
"loss": 0.0218,
"step": 12340
},
{
"grad_norm": 0.1981436163187027,
"learning_rate": 9.705173486187891e-05,
"loss": 0.018,
"step": 12350
},
{
"grad_norm": 0.23439428210258484,
"learning_rate": 9.704473852367741e-05,
"loss": 0.0229,
"step": 12360
},
{
"grad_norm": 0.2348204404115677,
"learning_rate": 9.70377341468168e-05,
"loss": 0.0184,
"step": 12370
},
{
"grad_norm": 0.23930864036083221,
"learning_rate": 9.703072173249389e-05,
"loss": 0.0186,
"step": 12380
},
{
"grad_norm": 0.2222331166267395,
"learning_rate": 9.702370128190693e-05,
"loss": 0.0182,
"step": 12390
},
{
"grad_norm": 0.1899869740009308,
"learning_rate": 9.701667279625552e-05,
"loss": 0.0215,
"step": 12400
},
{
"grad_norm": 0.18148602545261383,
"learning_rate": 9.700963627674065e-05,
"loss": 0.02,
"step": 12410
},
{
"grad_norm": 0.22572743892669678,
"learning_rate": 9.700259172456466e-05,
"loss": 0.017,
"step": 12420
},
{
"grad_norm": 0.21851639449596405,
"learning_rate": 9.699553914093124e-05,
"loss": 0.0156,
"step": 12430
},
{
"grad_norm": 0.18523038923740387,
"learning_rate": 9.698847852704553e-05,
"loss": 0.0169,
"step": 12440
},
{
"grad_norm": 0.20364411175251007,
"learning_rate": 9.6981409884114e-05,
"loss": 0.0254,
"step": 12450
},
{
"grad_norm": 0.18808870017528534,
"learning_rate": 9.697433321334443e-05,
"loss": 0.0187,
"step": 12460
},
{
"grad_norm": 0.21851050853729248,
"learning_rate": 9.696724851594607e-05,
"loss": 0.0218,
"step": 12470
},
{
"grad_norm": 0.18276174366474152,
"learning_rate": 9.696015579312952e-05,
"loss": 0.0166,
"step": 12480
},
{
"grad_norm": 0.2173735648393631,
"learning_rate": 9.695305504610668e-05,
"loss": 0.0176,
"step": 12490
},
{
"grad_norm": 0.25715184211730957,
"learning_rate": 9.694594627609092e-05,
"loss": 0.0155,
"step": 12500
},
{
"grad_norm": 0.15247225761413574,
"learning_rate": 9.693882948429691e-05,
"loss": 0.018,
"step": 12510
},
{
"grad_norm": 0.22634707391262054,
"learning_rate": 9.693170467194071e-05,
"loss": 0.0182,
"step": 12520
},
{
"grad_norm": 0.2305721938610077,
"learning_rate": 9.692457184023977e-05,
"loss": 0.0203,
"step": 12530
},
{
"grad_norm": 0.22422613203525543,
"learning_rate": 9.691743099041291e-05,
"loss": 0.0175,
"step": 12540
},
{
"grad_norm": 0.23468373715877533,
"learning_rate": 9.691028212368027e-05,
"loss": 0.0187,
"step": 12550
},
{
"grad_norm": 0.13476166129112244,
"learning_rate": 9.690312524126342e-05,
"loss": 0.0158,
"step": 12560
},
{
"grad_norm": 0.15493354201316833,
"learning_rate": 9.689596034438527e-05,
"loss": 0.0164,
"step": 12570
},
{
"grad_norm": 0.167372927069664,
"learning_rate": 9.688878743427012e-05,
"loss": 0.0163,
"step": 12580
},
{
"grad_norm": 0.2126246839761734,
"learning_rate": 9.688160651214359e-05,
"loss": 0.0172,
"step": 12590
},
{
"grad_norm": 0.2218184620141983,
"learning_rate": 9.687441757923273e-05,
"loss": 0.0179,
"step": 12600
},
{
"grad_norm": 0.21006833016872406,
"learning_rate": 9.68672206367659e-05,
"loss": 0.0188,
"step": 12610
},
{
"grad_norm": 0.17882493138313293,
"learning_rate": 9.686001568597291e-05,
"loss": 0.0194,
"step": 12620
},
{
"grad_norm": 0.21169313788414001,
"learning_rate": 9.685280272808486e-05,
"loss": 0.0167,
"step": 12630
},
{
"grad_norm": 0.2793411910533905,
"learning_rate": 9.684558176433424e-05,
"loss": 0.0178,
"step": 12640
},
{
"grad_norm": 0.2542753517627716,
"learning_rate": 9.683835279595495e-05,
"loss": 0.0167,
"step": 12650
},
{
"grad_norm": 0.20705217123031616,
"learning_rate": 9.683111582418216e-05,
"loss": 0.0139,
"step": 12660
},
{
"grad_norm": 0.2595694363117218,
"learning_rate": 9.682387085025254e-05,
"loss": 0.0148,
"step": 12670
},
{
"grad_norm": 0.2114218771457672,
"learning_rate": 9.681661787540401e-05,
"loss": 0.019,
"step": 12680
},
{
"grad_norm": 0.1812354326248169,
"learning_rate": 9.680935690087593e-05,
"loss": 0.0191,
"step": 12690
},
{
"grad_norm": 0.16314566135406494,
"learning_rate": 9.680208792790901e-05,
"loss": 0.0158,
"step": 12700
},
{
"grad_norm": 0.15389156341552734,
"learning_rate": 9.679481095774529e-05,
"loss": 0.015,
"step": 12710
},
{
"grad_norm": 0.17251567542552948,
"learning_rate": 9.678752599162822e-05,
"loss": 0.0176,
"step": 12720
},
{
"grad_norm": 0.24053889513015747,
"learning_rate": 9.678023303080259e-05,
"loss": 0.0163,
"step": 12730
},
{
"grad_norm": 0.19373218715190887,
"learning_rate": 9.677293207651459e-05,
"loss": 0.0164,
"step": 12740
},
{
"grad_norm": 0.18236075341701508,
"learning_rate": 9.676562313001173e-05,
"loss": 0.0168,
"step": 12750
},
{
"grad_norm": 0.19816508889198303,
"learning_rate": 9.675830619254293e-05,
"loss": 0.0181,
"step": 12760
},
{
"grad_norm": 0.2341136932373047,
"learning_rate": 9.675098126535843e-05,
"loss": 0.0195,
"step": 12770
},
{
"grad_norm": 0.22628170251846313,
"learning_rate": 9.674364834970988e-05,
"loss": 0.0215,
"step": 12780
},
{
"grad_norm": 0.22022584080696106,
"learning_rate": 9.673630744685028e-05,
"loss": 0.0161,
"step": 12790
},
{
"grad_norm": 0.2745243310928345,
"learning_rate": 9.672895855803397e-05,
"loss": 0.0179,
"step": 12800
},
{
"grad_norm": 0.25176534056663513,
"learning_rate": 9.672160168451667e-05,
"loss": 0.0155,
"step": 12810
},
{
"grad_norm": 0.25133299827575684,
"learning_rate": 9.671423682755549e-05,
"loss": 0.0185,
"step": 12820
},
{
"grad_norm": 0.1872323751449585,
"learning_rate": 9.670686398840888e-05,
"loss": 0.0175,
"step": 12830
},
{
"grad_norm": 0.16792120039463043,
"learning_rate": 9.669948316833664e-05,
"loss": 0.0166,
"step": 12840
},
{
"grad_norm": 0.22031652927398682,
"learning_rate": 9.669209436859997e-05,
"loss": 0.0178,
"step": 12850
},
{
"grad_norm": 0.27442485094070435,
"learning_rate": 9.66846975904614e-05,
"loss": 0.0192,
"step": 12860
},
{
"grad_norm": 0.2164306640625,
"learning_rate": 9.667729283518483e-05,
"loss": 0.016,
"step": 12870
},
{
"grad_norm": 0.17880965769290924,
"learning_rate": 9.666988010403557e-05,
"loss": 0.0156,
"step": 12880
},
{
"grad_norm": 0.21804316341876984,
"learning_rate": 9.66624593982802e-05,
"loss": 0.0167,
"step": 12890
},
{
"grad_norm": 0.20562773942947388,
"learning_rate": 9.665503071918675e-05,
"loss": 0.0172,
"step": 12900
},
{
"grad_norm": 0.22280491888523102,
"learning_rate": 9.664759406802456e-05,
"loss": 0.0158,
"step": 12910
},
{
"grad_norm": 0.20018264651298523,
"learning_rate": 9.664014944606437e-05,
"loss": 0.0187,
"step": 12920
},
{
"grad_norm": 0.20155006647109985,
"learning_rate": 9.663269685457822e-05,
"loss": 0.018,
"step": 12930
},
{
"grad_norm": 0.2240087389945984,
"learning_rate": 9.662523629483962e-05,
"loss": 0.0173,
"step": 12940
},
{
"grad_norm": 0.20313407480716705,
"learning_rate": 9.661776776812333e-05,
"loss": 0.0153,
"step": 12950
},
{
"grad_norm": 0.23612536489963531,
"learning_rate": 9.661029127570553e-05,
"loss": 0.0183,
"step": 12960
},
{
"grad_norm": 0.18177230656147003,
"learning_rate": 9.660280681886373e-05,
"loss": 0.0174,
"step": 12970
},
{
"grad_norm": 0.18900875747203827,
"learning_rate": 9.659531439887685e-05,
"loss": 0.017,
"step": 12980
},
{
"grad_norm": 0.2031058371067047,
"learning_rate": 9.658781401702511e-05,
"loss": 0.0238,
"step": 12990
},
{
"grad_norm": 0.17947645485401154,
"learning_rate": 9.658030567459015e-05,
"loss": 0.0147,
"step": 13000
},
{
"grad_norm": 0.19303488731384277,
"learning_rate": 9.65727893728549e-05,
"loss": 0.0179,
"step": 13010
},
{
"grad_norm": 0.2641489505767822,
"learning_rate": 9.656526511310375e-05,
"loss": 0.0171,
"step": 13020
},
{
"grad_norm": 0.19446289539337158,
"learning_rate": 9.655773289662233e-05,
"loss": 0.0168,
"step": 13030
},
{
"grad_norm": 0.22106243669986725,
"learning_rate": 9.655019272469772e-05,
"loss": 0.0212,
"step": 13040
},
{
"grad_norm": 0.20340028405189514,
"learning_rate": 9.654264459861832e-05,
"loss": 0.0192,
"step": 13050
},
{
"grad_norm": 0.16195544600486755,
"learning_rate": 9.653508851967391e-05,
"loss": 0.0167,
"step": 13060
},
{
"grad_norm": 0.17701230943202972,
"learning_rate": 9.65275244891556e-05,
"loss": 0.0155,
"step": 13070
},
{
"grad_norm": 0.2103540152311325,
"learning_rate": 9.651995250835591e-05,
"loss": 0.0155,
"step": 13080
},
{
"grad_norm": 0.14413878321647644,
"learning_rate": 9.651237257856862e-05,
"loss": 0.0149,
"step": 13090
},
{
"grad_norm": 0.22007222473621368,
"learning_rate": 9.6504784701089e-05,
"loss": 0.0148,
"step": 13100
},
{
"grad_norm": 0.205142542719841,
"learning_rate": 9.649718887721357e-05,
"loss": 0.0178,
"step": 13110
},
{
"grad_norm": 0.1939914971590042,
"learning_rate": 9.648958510824028e-05,
"loss": 0.016,
"step": 13120
},
{
"grad_norm": 0.18156073987483978,
"learning_rate": 9.648197339546837e-05,
"loss": 0.018,
"step": 13130
},
{
"grad_norm": 0.23905836045742035,
"learning_rate": 9.647435374019851e-05,
"loss": 0.0181,
"step": 13140
},
{
"grad_norm": 0.25276172161102295,
"learning_rate": 9.646672614373266e-05,
"loss": 0.0195,
"step": 13150
},
{
"grad_norm": 0.2888982892036438,
"learning_rate": 9.645909060737418e-05,
"loss": 0.0176,
"step": 13160
},
{
"grad_norm": 0.18068911135196686,
"learning_rate": 9.645144713242778e-05,
"loss": 0.0216,
"step": 13170
},
{
"grad_norm": 0.22713512182235718,
"learning_rate": 9.64437957201995e-05,
"loss": 0.0182,
"step": 13180
},
{
"grad_norm": 0.18977467715740204,
"learning_rate": 9.643613637199678e-05,
"loss": 0.0186,
"step": 13190
},
{
"grad_norm": 0.2133685052394867,
"learning_rate": 9.642846908912839e-05,
"loss": 0.0176,
"step": 13200
},
{
"grad_norm": 0.15161854028701782,
"learning_rate": 9.642079387290444e-05,
"loss": 0.0164,
"step": 13210
},
{
"grad_norm": 0.16772955656051636,
"learning_rate": 9.641311072463644e-05,
"loss": 0.0194,
"step": 13220
},
{
"grad_norm": 0.21082094311714172,
"learning_rate": 9.640541964563722e-05,
"loss": 0.0207,
"step": 13230
},
{
"grad_norm": 0.2545044720172882,
"learning_rate": 9.639772063722096e-05,
"loss": 0.0219,
"step": 13240
},
{
"grad_norm": 0.1294104903936386,
"learning_rate": 9.639001370070324e-05,
"loss": 0.0174,
"step": 13250
},
{
"grad_norm": 0.25759389996528625,
"learning_rate": 9.638229883740095e-05,
"loss": 0.017,
"step": 13260
},
{
"grad_norm": 0.1531188040971756,
"learning_rate": 9.637457604863233e-05,
"loss": 0.0155,
"step": 13270
},
{
"grad_norm": 0.1654675155878067,
"learning_rate": 9.636684533571703e-05,
"loss": 0.0179,
"step": 13280
},
{
"grad_norm": 0.18904055655002594,
"learning_rate": 9.635910669997599e-05,
"loss": 0.0146,
"step": 13290
},
{
"grad_norm": 0.22429679334163666,
"learning_rate": 9.635136014273154e-05,
"loss": 0.0181,
"step": 13300
},
{
"grad_norm": 0.23229560256004333,
"learning_rate": 9.634360566530735e-05,
"loss": 0.0167,
"step": 13310
},
{
"grad_norm": 0.24955442547798157,
"learning_rate": 9.633584326902845e-05,
"loss": 0.0177,
"step": 13320
},
{
"grad_norm": 0.22871516644954681,
"learning_rate": 9.632807295522124e-05,
"loss": 0.0175,
"step": 13330
},
{
"grad_norm": 0.20401830971240997,
"learning_rate": 9.632029472521342e-05,
"loss": 0.0165,
"step": 13340
},
{
"grad_norm": 0.1854197382926941,
"learning_rate": 9.631250858033409e-05,
"loss": 0.0152,
"step": 13350
},
{
"grad_norm": 0.21331296861171722,
"learning_rate": 9.630471452191371e-05,
"loss": 0.0162,
"step": 13360
},
{
"grad_norm": 0.22700276970863342,
"learning_rate": 9.629691255128405e-05,
"loss": 0.0161,
"step": 13370
},
{
"grad_norm": 0.1945546269416809,
"learning_rate": 9.628910266977825e-05,
"loss": 0.0175,
"step": 13380
},
{
"grad_norm": 0.17648547887802124,
"learning_rate": 9.628128487873083e-05,
"loss": 0.0141,
"step": 13390
},
{
"grad_norm": 0.13480976223945618,
"learning_rate": 9.627345917947761e-05,
"loss": 0.0157,
"step": 13400
},
{
"grad_norm": 0.19314318895339966,
"learning_rate": 9.626562557335579e-05,
"loss": 0.0147,
"step": 13410
},
{
"grad_norm": 0.17685508728027344,
"learning_rate": 9.625778406170393e-05,
"loss": 0.0141,
"step": 13420
},
{
"grad_norm": 0.1886247843503952,
"learning_rate": 9.624993464586193e-05,
"loss": 0.018,
"step": 13430
},
{
"grad_norm": 0.24076367914676666,
"learning_rate": 9.624207732717105e-05,
"loss": 0.0189,
"step": 13440
},
{
"grad_norm": 0.19189012050628662,
"learning_rate": 9.623421210697386e-05,
"loss": 0.0164,
"step": 13450
},
{
"grad_norm": 0.18599392473697662,
"learning_rate": 9.622633898661434e-05,
"loss": 0.0175,
"step": 13460
},
{
"grad_norm": 0.23745107650756836,
"learning_rate": 9.621845796743778e-05,
"loss": 0.0207,
"step": 13470
},
{
"grad_norm": 0.21209335327148438,
"learning_rate": 9.621056905079082e-05,
"loss": 0.017,
"step": 13480
},
{
"grad_norm": 0.19345171749591827,
"learning_rate": 9.620267223802149e-05,
"loss": 0.015,
"step": 13490
},
{
"grad_norm": 0.2062082141637802,
"learning_rate": 9.619476753047911e-05,
"loss": 0.0149,
"step": 13500
},
{
"grad_norm": 0.2011566311120987,
"learning_rate": 9.618685492951438e-05,
"loss": 0.0178,
"step": 13510
},
{
"grad_norm": 0.28119274973869324,
"learning_rate": 9.617893443647938e-05,
"loss": 0.0148,
"step": 13520
},
{
"grad_norm": 0.2512681484222412,
"learning_rate": 9.617100605272746e-05,
"loss": 0.0143,
"step": 13530
},
{
"grad_norm": 0.2108534872531891,
"learning_rate": 9.616306977961338e-05,
"loss": 0.015,
"step": 13540
},
{
"grad_norm": 0.1767207682132721,
"learning_rate": 9.615512561849326e-05,
"loss": 0.0153,
"step": 13550
},
{
"grad_norm": 0.20185871422290802,
"learning_rate": 9.61471735707245e-05,
"loss": 0.0196,
"step": 13560
},
{
"grad_norm": 0.1924266219139099,
"learning_rate": 9.613921363766592e-05,
"loss": 0.0158,
"step": 13570
},
{
"grad_norm": 0.14430062472820282,
"learning_rate": 9.613124582067763e-05,
"loss": 0.0144,
"step": 13580
},
{
"grad_norm": 0.21115557849407196,
"learning_rate": 9.612327012112112e-05,
"loss": 0.0166,
"step": 13590
},
{
"grad_norm": 0.2514290511608124,
"learning_rate": 9.611528654035921e-05,
"loss": 0.0146,
"step": 13600
},
{
"grad_norm": 0.20375177264213562,
"learning_rate": 9.610729507975611e-05,
"loss": 0.0177,
"step": 13610
},
{
"grad_norm": 0.19954927265644073,
"learning_rate": 9.609929574067731e-05,
"loss": 0.0162,
"step": 13620
},
{
"grad_norm": 0.1741858273744583,
"learning_rate": 9.609128852448967e-05,
"loss": 0.016,
"step": 13630
},
{
"grad_norm": 0.21210941672325134,
"learning_rate": 9.608327343256143e-05,
"loss": 0.0173,
"step": 13640
},
{
"grad_norm": 0.2422517091035843,
"learning_rate": 9.607525046626216e-05,
"loss": 0.0182,
"step": 13650
},
{
"grad_norm": 0.19217988848686218,
"learning_rate": 9.606721962696272e-05,
"loss": 0.0157,
"step": 13660
},
{
"grad_norm": 0.14938074350357056,
"learning_rate": 9.60591809160354e-05,
"loss": 0.0168,
"step": 13670
},
{
"grad_norm": 0.2352713644504547,
"learning_rate": 9.605113433485378e-05,
"loss": 0.0174,
"step": 13680
},
{
"grad_norm": 0.16971217095851898,
"learning_rate": 9.604307988479279e-05,
"loss": 0.0174,
"step": 13690
},
{
"grad_norm": 0.17832019925117493,
"learning_rate": 9.603501756722876e-05,
"loss": 0.0144,
"step": 13700
},
{
"grad_norm": 0.21248316764831543,
"learning_rate": 9.602694738353927e-05,
"loss": 0.0176,
"step": 13710
},
{
"grad_norm": 0.19290727376937866,
"learning_rate": 9.601886933510331e-05,
"loss": 0.0188,
"step": 13720
},
{
"grad_norm": 0.15208296477794647,
"learning_rate": 9.60107834233012e-05,
"loss": 0.0157,
"step": 13730
},
{
"grad_norm": 0.20310387015342712,
"learning_rate": 9.60026896495146e-05,
"loss": 0.0176,
"step": 13740
},
{
"grad_norm": 0.24995331466197968,
"learning_rate": 9.599458801512652e-05,
"loss": 0.0148,
"step": 13750
},
{
"grad_norm": 0.19693876802921295,
"learning_rate": 9.598647852152129e-05,
"loss": 0.0146,
"step": 13760
},
{
"grad_norm": 0.21272483468055725,
"learning_rate": 9.597836117008462e-05,
"loss": 0.0179,
"step": 13770
},
{
"grad_norm": 0.20627114176750183,
"learning_rate": 9.597023596220356e-05,
"loss": 0.0187,
"step": 13780
},
{
"grad_norm": 0.2480172961950302,
"learning_rate": 9.596210289926643e-05,
"loss": 0.0162,
"step": 13790
},
{
"grad_norm": 0.22674791514873505,
"learning_rate": 9.5953961982663e-05,
"loss": 0.0161,
"step": 13800
},
{
"grad_norm": 0.17927564680576324,
"learning_rate": 9.594581321378431e-05,
"loss": 0.0158,
"step": 13810
},
{
"grad_norm": 0.2015874683856964,
"learning_rate": 9.593765659402276e-05,
"loss": 0.0164,
"step": 13820
},
{
"grad_norm": 0.15319640934467316,
"learning_rate": 9.59294921247721e-05,
"loss": 0.0146,
"step": 13830
},
{
"grad_norm": 0.19437569379806519,
"learning_rate": 9.59213198074274e-05,
"loss": 0.0145,
"step": 13840
},
{
"grad_norm": 0.18472889065742493,
"learning_rate": 9.59131396433851e-05,
"loss": 0.0172,
"step": 13850
},
{
"grad_norm": 0.17919857800006866,
"learning_rate": 9.590495163404297e-05,
"loss": 0.0186,
"step": 13860
},
{
"grad_norm": 0.1990271508693695,
"learning_rate": 9.589675578080009e-05,
"loss": 0.0175,
"step": 13870
},
{
"grad_norm": 0.23030996322631836,
"learning_rate": 9.588855208505694e-05,
"loss": 0.0175,
"step": 13880
},
{
"grad_norm": 0.16449768841266632,
"learning_rate": 9.588034054821529e-05,
"loss": 0.0143,
"step": 13890
},
{
"grad_norm": 0.2096904218196869,
"learning_rate": 9.587212117167826e-05,
"loss": 0.0159,
"step": 13900
},
{
"grad_norm": 0.14766645431518555,
"learning_rate": 9.586389395685033e-05,
"loss": 0.0128,
"step": 13910
},
{
"grad_norm": 0.2344929277896881,
"learning_rate": 9.585565890513733e-05,
"loss": 0.0147,
"step": 13920
},
{
"grad_norm": 0.18312610685825348,
"learning_rate": 9.584741601794636e-05,
"loss": 0.0153,
"step": 13930
},
{
"grad_norm": 0.24041059613227844,
"learning_rate": 9.58391652966859e-05,
"loss": 0.0157,
"step": 13940
},
{
"grad_norm": 0.19608065485954285,
"learning_rate": 9.583090674276583e-05,
"loss": 0.017,
"step": 13950
},
{
"grad_norm": 0.27070385217666626,
"learning_rate": 9.582264035759726e-05,
"loss": 0.0161,
"step": 13960
},
{
"grad_norm": 0.1791030615568161,
"learning_rate": 9.58143661425927e-05,
"loss": 0.016,
"step": 13970
},
{
"grad_norm": 0.21301402151584625,
"learning_rate": 9.580608409916601e-05,
"loss": 0.0175,
"step": 13980
},
{
"grad_norm": 0.2352142035961151,
"learning_rate": 9.579779422873233e-05,
"loss": 0.017,
"step": 13990
},
{
"grad_norm": 0.22693519294261932,
"learning_rate": 9.578949653270819e-05,
"loss": 0.0211,
"step": 14000
},
{
"grad_norm": 0.18038628995418549,
"learning_rate": 9.578119101251144e-05,
"loss": 0.0144,
"step": 14010
},
{
"grad_norm": 0.2149980664253235,
"learning_rate": 9.577287766956127e-05,
"loss": 0.0171,
"step": 14020
},
{
"grad_norm": 0.22819343209266663,
"learning_rate": 9.57645565052782e-05,
"loss": 0.0149,
"step": 14030
},
{
"grad_norm": 0.22780239582061768,
"learning_rate": 9.575622752108407e-05,
"loss": 0.0172,
"step": 14040
},
{
"grad_norm": 0.2205226868391037,
"learning_rate": 9.57478907184021e-05,
"loss": 0.0179,
"step": 14050
},
{
"grad_norm": 0.20616720616817474,
"learning_rate": 9.573954609865681e-05,
"loss": 0.0158,
"step": 14060
},
{
"grad_norm": 0.16584700345993042,
"learning_rate": 9.573119366327408e-05,
"loss": 0.0173,
"step": 14070
},
{
"grad_norm": 0.2013131082057953,
"learning_rate": 9.57228334136811e-05,
"loss": 0.0153,
"step": 14080
},
{
"grad_norm": 0.35026389360427856,
"learning_rate": 9.571446535130641e-05,
"loss": 0.0165,
"step": 14090
},
{
"grad_norm": 0.17682762444019318,
"learning_rate": 9.570608947757988e-05,
"loss": 0.0161,
"step": 14100
},
{
"grad_norm": 0.25271111726760864,
"learning_rate": 9.569770579393274e-05,
"loss": 0.0162,
"step": 14110
},
{
"grad_norm": 0.1814207136631012,
"learning_rate": 9.56893143017975e-05,
"loss": 0.0157,
"step": 14120
},
{
"grad_norm": 0.20927149057388306,
"learning_rate": 9.568091500260806e-05,
"loss": 0.016,
"step": 14130
},
{
"grad_norm": 0.18713536858558655,
"learning_rate": 9.567250789779961e-05,
"loss": 0.0149,
"step": 14140
},
{
"grad_norm": 0.16728201508522034,
"learning_rate": 9.566409298880872e-05,
"loss": 0.0176,
"step": 14150
},
{
"grad_norm": 0.18786218762397766,
"learning_rate": 9.565567027707326e-05,
"loss": 0.0178,
"step": 14160
},
{
"grad_norm": 0.2244640588760376,
"learning_rate": 9.56472397640324e-05,
"loss": 0.0166,
"step": 14170
},
{
"grad_norm": 0.21699632704257965,
"learning_rate": 9.563880145112675e-05,
"loss": 0.0202,
"step": 14180
},
{
"grad_norm": 0.18745189905166626,
"learning_rate": 9.563035533979814e-05,
"loss": 0.0166,
"step": 14190
},
{
"grad_norm": 0.19325166940689087,
"learning_rate": 9.562190143148981e-05,
"loss": 0.0204,
"step": 14200
},
{
"grad_norm": 0.14940635859966278,
"learning_rate": 9.561343972764627e-05,
"loss": 0.0129,
"step": 14210
},
{
"grad_norm": 0.23209263384342194,
"learning_rate": 9.560497022971343e-05,
"loss": 0.0154,
"step": 14220
},
{
"grad_norm": 0.1903807520866394,
"learning_rate": 9.559649293913847e-05,
"loss": 0.015,
"step": 14230
},
{
"grad_norm": 0.1858266294002533,
"learning_rate": 9.558800785736993e-05,
"loss": 0.0179,
"step": 14240
},
{
"grad_norm": 0.24292738735675812,
"learning_rate": 9.557951498585767e-05,
"loss": 0.0188,
"step": 14250
},
{
"grad_norm": 0.2099161595106125,
"learning_rate": 9.557101432605293e-05,
"loss": 0.0163,
"step": 14260
},
{
"grad_norm": 0.23850341141223907,
"learning_rate": 9.556250587940818e-05,
"loss": 0.0177,
"step": 14270
},
{
"grad_norm": 0.22388815879821777,
"learning_rate": 9.555398964737734e-05,
"loss": 0.0166,
"step": 14280
},
{
"grad_norm": 0.2458370327949524,
"learning_rate": 9.554546563141555e-05,
"loss": 0.0168,
"step": 14290
},
{
"grad_norm": 0.21718600392341614,
"learning_rate": 9.553693383297937e-05,
"loss": 0.0188,
"step": 14300
},
{
"grad_norm": 0.21735943853855133,
"learning_rate": 9.552839425352663e-05,
"loss": 0.0162,
"step": 14310
},
{
"grad_norm": 0.15514010190963745,
"learning_rate": 9.551984689451652e-05,
"loss": 0.017,
"step": 14320
},
{
"grad_norm": 0.21300536394119263,
"learning_rate": 9.551129175740953e-05,
"loss": 0.0176,
"step": 14330
},
{
"grad_norm": 0.17090490460395813,
"learning_rate": 9.550272884366754e-05,
"loss": 0.0149,
"step": 14340
},
{
"grad_norm": 0.20311683416366577,
"learning_rate": 9.549415815475369e-05,
"loss": 0.0152,
"step": 14350
},
{
"grad_norm": 0.14933757483959198,
"learning_rate": 9.548557969213247e-05,
"loss": 0.0152,
"step": 14360
},
{
"grad_norm": 0.15693646669387817,
"learning_rate": 9.547699345726972e-05,
"loss": 0.0122,
"step": 14370
},
{
"grad_norm": 0.13444781303405762,
"learning_rate": 9.546839945163257e-05,
"loss": 0.0125,
"step": 14380
},
{
"grad_norm": 0.22073717415332794,
"learning_rate": 9.545979767668953e-05,
"loss": 0.0176,
"step": 14390
},
{
"grad_norm": 0.25846320390701294,
"learning_rate": 9.54511881339104e-05,
"loss": 0.016,
"step": 14400
},
{
"grad_norm": 0.2418988198041916,
"learning_rate": 9.54425708247663e-05,
"loss": 0.0196,
"step": 14410
},
{
"grad_norm": 0.2165713757276535,
"learning_rate": 9.543394575072972e-05,
"loss": 0.017,
"step": 14420
},
{
"grad_norm": 0.22406932711601257,
"learning_rate": 9.542531291327441e-05,
"loss": 0.0149,
"step": 14430
},
{
"grad_norm": 0.18911834061145782,
"learning_rate": 9.541667231387552e-05,
"loss": 0.0147,
"step": 14440
},
{
"grad_norm": 0.20237530767917633,
"learning_rate": 9.540802395400949e-05,
"loss": 0.0158,
"step": 14450
},
{
"grad_norm": 0.21332259476184845,
"learning_rate": 9.539936783515406e-05,
"loss": 0.016,
"step": 14460
},
{
"grad_norm": 0.16064292192459106,
"learning_rate": 9.539070395878835e-05,
"loss": 0.0169,
"step": 14470
},
{
"grad_norm": 0.19493581354618073,
"learning_rate": 9.538203232639277e-05,
"loss": 0.0154,
"step": 14480
},
{
"grad_norm": 0.19080771505832672,
"learning_rate": 9.537335293944907e-05,
"loss": 0.0144,
"step": 14490
},
{
"grad_norm": 0.20722368359565735,
"learning_rate": 9.536466579944032e-05,
"loss": 0.0158,
"step": 14500
},
{
"grad_norm": 0.18488697707653046,
"learning_rate": 9.535597090785091e-05,
"loss": 0.0169,
"step": 14510
},
{
"grad_norm": 0.19848453998565674,
"learning_rate": 9.534726826616656e-05,
"loss": 0.0143,
"step": 14520
},
{
"grad_norm": 0.18197226524353027,
"learning_rate": 9.53385578758743e-05,
"loss": 0.015,
"step": 14530
},
{
"grad_norm": 0.15892091393470764,
"learning_rate": 9.532983973846252e-05,
"loss": 0.0187,
"step": 14540
},
{
"grad_norm": 0.17897970974445343,
"learning_rate": 9.53211138554209e-05,
"loss": 0.0154,
"step": 14550
},
{
"grad_norm": 0.1671089380979538,
"learning_rate": 9.531238022824047e-05,
"loss": 0.0159,
"step": 14560
},
{
"grad_norm": 0.2073274850845337,
"learning_rate": 9.530363885841355e-05,
"loss": 0.0173,
"step": 14570
},
{
"grad_norm": 0.26559683680534363,
"learning_rate": 9.52948897474338e-05,
"loss": 0.0167,
"step": 14580
},
{
"grad_norm": 0.15245188772678375,
"learning_rate": 9.528613289679622e-05,
"loss": 0.015,
"step": 14590
},
{
"grad_norm": 0.17604784667491913,
"learning_rate": 9.52773683079971e-05,
"loss": 0.0143,
"step": 14600
},
{
"grad_norm": 0.16941019892692566,
"learning_rate": 9.526859598253407e-05,
"loss": 0.0145,
"step": 14610
},
{
"grad_norm": 0.20509329438209534,
"learning_rate": 9.525981592190609e-05,
"loss": 0.0168,
"step": 14620
},
{
"grad_norm": 0.21102458238601685,
"learning_rate": 9.525102812761342e-05,
"loss": 0.0171,
"step": 14630
},
{
"grad_norm": 0.1973593533039093,
"learning_rate": 9.524223260115768e-05,
"loss": 0.0157,
"step": 14640
},
{
"grad_norm": 0.2082795649766922,
"learning_rate": 9.523342934404175e-05,
"loss": 0.0142,
"step": 14650
},
{
"grad_norm": 0.1476413905620575,
"learning_rate": 9.522461835776989e-05,
"loss": 0.0182,
"step": 14660
},
{
"grad_norm": 0.16569702327251434,
"learning_rate": 9.521579964384764e-05,
"loss": 0.0143,
"step": 14670
},
{
"grad_norm": 0.14856582880020142,
"learning_rate": 9.52069732037819e-05,
"loss": 0.0157,
"step": 14680
},
{
"grad_norm": 0.25279876589775085,
"learning_rate": 9.519813903908083e-05,
"loss": 0.0166,
"step": 14690
},
{
"grad_norm": 0.19278138875961304,
"learning_rate": 9.5189297151254e-05,
"loss": 0.0163,
"step": 14700
},
{
"grad_norm": 0.126902237534523,
"learning_rate": 9.518044754181218e-05,
"loss": 0.0128,
"step": 14710
},
{
"grad_norm": 0.18358808755874634,
"learning_rate": 9.51715902122676e-05,
"loss": 0.0155,
"step": 14720
},
{
"grad_norm": 0.20050916075706482,
"learning_rate": 9.516272516413368e-05,
"loss": 0.0158,
"step": 14730
},
{
"grad_norm": 0.21295876801013947,
"learning_rate": 9.515385239892525e-05,
"loss": 0.0184,
"step": 14740
},
{
"grad_norm": 0.20115697383880615,
"learning_rate": 9.514497191815839e-05,
"loss": 0.016,
"step": 14750
},
{
"grad_norm": 0.17910780012607574,
"learning_rate": 9.513608372335055e-05,
"loss": 0.0138,
"step": 14760
},
{
"grad_norm": 0.1855442374944687,
"learning_rate": 9.512718781602045e-05,
"loss": 0.0155,
"step": 14770
},
{
"grad_norm": 0.20865614712238312,
"learning_rate": 9.511828419768823e-05,
"loss": 0.0149,
"step": 14780
},
{
"grad_norm": 0.20891661942005157,
"learning_rate": 9.510937286987521e-05,
"loss": 0.0175,
"step": 14790
},
{
"grad_norm": 0.14707662165164948,
"learning_rate": 9.510045383410408e-05,
"loss": 0.0142,
"step": 14800
},
{
"grad_norm": 0.29158416390419006,
"learning_rate": 9.509152709189892e-05,
"loss": 0.0147,
"step": 14810
},
{
"grad_norm": 0.17530637979507446,
"learning_rate": 9.508259264478504e-05,
"loss": 0.0172,
"step": 14820
},
{
"grad_norm": 0.1772986799478531,
"learning_rate": 9.507365049428909e-05,
"loss": 0.017,
"step": 14830
},
{
"grad_norm": 0.19525521993637085,
"learning_rate": 9.506470064193902e-05,
"loss": 0.0172,
"step": 14840
},
{
"grad_norm": 0.1994933784008026,
"learning_rate": 9.505574308926414e-05,
"loss": 0.0152,
"step": 14850
},
{
"grad_norm": 0.19826896488666534,
"learning_rate": 9.504677783779505e-05,
"loss": 0.0149,
"step": 14860
},
{
"grad_norm": 0.22154049575328827,
"learning_rate": 9.503780488906365e-05,
"loss": 0.0153,
"step": 14870
},
{
"grad_norm": 0.24092242121696472,
"learning_rate": 9.502882424460319e-05,
"loss": 0.0171,
"step": 14880
},
{
"grad_norm": 0.18074648082256317,
"learning_rate": 9.501983590594821e-05,
"loss": 0.017,
"step": 14890
},
{
"grad_norm": 0.21665740013122559,
"learning_rate": 9.501083987463455e-05,
"loss": 0.0161,
"step": 14900
},
{
"grad_norm": 0.22991301119327545,
"learning_rate": 9.500183615219942e-05,
"loss": 0.0147,
"step": 14910
},
{
"grad_norm": 0.1284361481666565,
"learning_rate": 9.49928247401813e-05,
"loss": 0.0196,
"step": 14920
},
{
"grad_norm": 0.17300018668174744,
"learning_rate": 9.498380564011997e-05,
"loss": 0.0173,
"step": 14930
},
{
"grad_norm": 0.24073739349842072,
"learning_rate": 9.497477885355656e-05,
"loss": 0.0183,
"step": 14940
},
{
"grad_norm": 0.18364053964614868,
"learning_rate": 9.496574438203353e-05,
"loss": 0.0146,
"step": 14950
},
{
"grad_norm": 0.1605290174484253,
"learning_rate": 9.495670222709459e-05,
"loss": 0.0136,
"step": 14960
},
{
"grad_norm": 0.2259046733379364,
"learning_rate": 9.494765239028483e-05,
"loss": 0.0205,
"step": 14970
},
{
"grad_norm": 0.18903137743473053,
"learning_rate": 9.493859487315057e-05,
"loss": 0.0152,
"step": 14980
},
{
"grad_norm": 0.20537538826465607,
"learning_rate": 9.492952967723953e-05,
"loss": 0.0152,
"step": 14990
},
{
"grad_norm": 0.15892380475997925,
"learning_rate": 9.492045680410068e-05,
"loss": 0.018,
"step": 15000
},
{
"grad_norm": 0.282776802778244,
"learning_rate": 9.491137625528436e-05,
"loss": 0.0158,
"step": 15010
},
{
"grad_norm": 0.15211687982082367,
"learning_rate": 9.490228803234215e-05,
"loss": 0.0181,
"step": 15020
},
{
"grad_norm": 0.2093910574913025,
"learning_rate": 9.489319213682701e-05,
"loss": 0.0158,
"step": 15030
},
{
"grad_norm": 0.18943658471107483,
"learning_rate": 9.488408857029316e-05,
"loss": 0.0153,
"step": 15040
},
{
"grad_norm": 0.19043107330799103,
"learning_rate": 9.487497733429616e-05,
"loss": 0.0161,
"step": 15050
},
{
"grad_norm": 0.20405618846416473,
"learning_rate": 9.486585843039286e-05,
"loss": 0.0133,
"step": 15060
},
{
"grad_norm": 0.1904863864183426,
"learning_rate": 9.485673186014143e-05,
"loss": 0.0143,
"step": 15070
},
{
"grad_norm": 0.17432111501693726,
"learning_rate": 9.484759762510137e-05,
"loss": 0.0148,
"step": 15080
},
{
"grad_norm": 0.22494779527187347,
"learning_rate": 9.483845572683346e-05,
"loss": 0.015,
"step": 15090
},
{
"grad_norm": 0.2297731190919876,
"learning_rate": 9.48293061668998e-05,
"loss": 0.0149,
"step": 15100
},
{
"grad_norm": 0.17565256357192993,
"learning_rate": 9.48201489468638e-05,
"loss": 0.015,
"step": 15110
},
{
"grad_norm": 0.17492714524269104,
"learning_rate": 9.481098406829016e-05,
"loss": 0.0152,
"step": 15120
},
{
"grad_norm": 0.2154332548379898,
"learning_rate": 9.480181153274495e-05,
"loss": 0.0165,
"step": 15130
},
{
"grad_norm": 0.1828521341085434,
"learning_rate": 9.479263134179548e-05,
"loss": 0.0199,
"step": 15140
},
{
"grad_norm": 0.17296741902828217,
"learning_rate": 9.478344349701039e-05,
"loss": 0.018,
"step": 15150
},
{
"grad_norm": 0.13950493931770325,
"learning_rate": 9.477424799995964e-05,
"loss": 0.0157,
"step": 15160
},
{
"grad_norm": 0.15247932076454163,
"learning_rate": 9.476504485221448e-05,
"loss": 0.0156,
"step": 15170
},
{
"grad_norm": 0.24140794575214386,
"learning_rate": 9.475583405534748e-05,
"loss": 0.0162,
"step": 15180
},
{
"grad_norm": 0.18423013389110565,
"learning_rate": 9.474661561093251e-05,
"loss": 0.0198,
"step": 15190
},
{
"grad_norm": 0.21005667746067047,
"learning_rate": 9.473738952054478e-05,
"loss": 0.0154,
"step": 15200
},
{
"grad_norm": 0.18604272603988647,
"learning_rate": 9.472815578576073e-05,
"loss": 0.0163,
"step": 15210
},
{
"grad_norm": 0.19538111984729767,
"learning_rate": 9.471891440815817e-05,
"loss": 0.0149,
"step": 15220
},
{
"grad_norm": 0.13286109268665314,
"learning_rate": 9.470966538931621e-05,
"loss": 0.0142,
"step": 15230
},
{
"grad_norm": 0.20065544545650482,
"learning_rate": 9.470040873081525e-05,
"loss": 0.0158,
"step": 15240
},
{
"grad_norm": 0.20127904415130615,
"learning_rate": 9.469114443423698e-05,
"loss": 0.0138,
"step": 15250
},
{
"grad_norm": 0.15562212467193604,
"learning_rate": 9.468187250116445e-05,
"loss": 0.0161,
"step": 15260
},
{
"grad_norm": 0.21185919642448425,
"learning_rate": 9.467259293318197e-05,
"loss": 0.0149,
"step": 15270
},
{
"grad_norm": 0.1858607530593872,
"learning_rate": 9.466330573187514e-05,
"loss": 0.0162,
"step": 15280
},
{
"grad_norm": 0.20158518850803375,
"learning_rate": 9.46540108988309e-05,
"loss": 0.0136,
"step": 15290
},
{
"grad_norm": 0.19349181652069092,
"learning_rate": 9.46447084356375e-05,
"loss": 0.0138,
"step": 15300
},
{
"grad_norm": 0.14437109231948853,
"learning_rate": 9.463539834388447e-05,
"loss": 0.0127,
"step": 15310
},
{
"grad_norm": 0.1690860241651535,
"learning_rate": 9.462608062516263e-05,
"loss": 0.0163,
"step": 15320
},
{
"grad_norm": 0.1996581107378006,
"learning_rate": 9.461675528106413e-05,
"loss": 0.014,
"step": 15330
},
{
"grad_norm": 0.20567628741264343,
"learning_rate": 9.460742231318244e-05,
"loss": 0.0163,
"step": 15340
},
{
"grad_norm": 0.1955721229314804,
"learning_rate": 9.459808172311229e-05,
"loss": 0.0161,
"step": 15350
},
{
"grad_norm": 0.18463757634162903,
"learning_rate": 9.458873351244972e-05,
"loss": 0.0164,
"step": 15360
},
{
"grad_norm": 0.16779930889606476,
"learning_rate": 9.457937768279211e-05,
"loss": 0.0188,
"step": 15370
},
{
"grad_norm": 0.21408873796463013,
"learning_rate": 9.45700142357381e-05,
"loss": 0.0183,
"step": 15380
},
{
"grad_norm": 0.2272716909646988,
"learning_rate": 9.456064317288765e-05,
"loss": 0.0153,
"step": 15390
},
{
"grad_norm": 0.18865090608596802,
"learning_rate": 9.455126449584201e-05,
"loss": 0.0167,
"step": 15400
},
{
"grad_norm": 0.18456076085567474,
"learning_rate": 9.454187820620375e-05,
"loss": 0.0158,
"step": 15410
},
{
"grad_norm": 0.2326492965221405,
"learning_rate": 9.453248430557673e-05,
"loss": 0.0161,
"step": 15420
},
{
"grad_norm": 0.19523780047893524,
"learning_rate": 9.452308279556611e-05,
"loss": 0.0166,
"step": 15430
},
{
"grad_norm": 0.17657940089702606,
"learning_rate": 9.451367367777835e-05,
"loss": 0.0163,
"step": 15440
},
{
"grad_norm": 0.18257446587085724,
"learning_rate": 9.450425695382122e-05,
"loss": 0.014,
"step": 15450
},
{
"grad_norm": 0.12774761021137238,
"learning_rate": 9.449483262530375e-05,
"loss": 0.016,
"step": 15460
},
{
"grad_norm": 0.20910178124904633,
"learning_rate": 9.448540069383633e-05,
"loss": 0.0166,
"step": 15470
},
{
"grad_norm": 0.23977041244506836,
"learning_rate": 9.447596116103061e-05,
"loss": 0.0176,
"step": 15480
},
{
"grad_norm": 0.15449866652488708,
"learning_rate": 9.446651402849955e-05,
"loss": 0.0205,
"step": 15490
},
{
"grad_norm": 0.18119966983795166,
"learning_rate": 9.44570592978574e-05,
"loss": 0.0177,
"step": 15500
},
{
"grad_norm": 0.30893462896347046,
"learning_rate": 9.444759697071972e-05,
"loss": 0.0178,
"step": 15510
},
{
"grad_norm": 0.16717420518398285,
"learning_rate": 9.443812704870336e-05,
"loss": 0.0152,
"step": 15520
},
{
"grad_norm": 0.1347653716802597,
"learning_rate": 9.442864953342649e-05,
"loss": 0.0149,
"step": 15530
},
{
"grad_norm": 0.12874631583690643,
"learning_rate": 9.441916442650852e-05,
"loss": 0.0156,
"step": 15540
},
{
"grad_norm": 0.22822712361812592,
"learning_rate": 9.440967172957023e-05,
"loss": 0.0176,
"step": 15550
},
{
"grad_norm": 0.24268729984760284,
"learning_rate": 9.440017144423364e-05,
"loss": 0.0147,
"step": 15560
},
{
"grad_norm": 0.16856762766838074,
"learning_rate": 9.439066357212209e-05,
"loss": 0.0143,
"step": 15570
},
{
"grad_norm": 0.21613584458827972,
"learning_rate": 9.438114811486022e-05,
"loss": 0.0125,
"step": 15580
},
{
"grad_norm": 0.1790010631084442,
"learning_rate": 9.4371625074074e-05,
"loss": 0.0158,
"step": 15590
},
{
"grad_norm": 0.15197868645191193,
"learning_rate": 9.436209445139059e-05,
"loss": 0.0159,
"step": 15600
},
{
"grad_norm": 0.1518256664276123,
"learning_rate": 9.435255624843855e-05,
"loss": 0.0146,
"step": 15610
},
{
"grad_norm": 0.16170205175876617,
"learning_rate": 9.43430104668477e-05,
"loss": 0.0136,
"step": 15620
},
{
"grad_norm": 0.12304233759641647,
"learning_rate": 9.433345710824914e-05,
"loss": 0.0149,
"step": 15630
},
{
"grad_norm": 0.14309190213680267,
"learning_rate": 9.432389617427529e-05,
"loss": 0.0151,
"step": 15640
},
{
"grad_norm": 0.18983034789562225,
"learning_rate": 9.431432766655984e-05,
"loss": 0.0156,
"step": 15650
},
{
"grad_norm": 0.23073400557041168,
"learning_rate": 9.430475158673778e-05,
"loss": 0.016,
"step": 15660
},
{
"grad_norm": 0.1544876992702484,
"learning_rate": 9.429516793644542e-05,
"loss": 0.0157,
"step": 15670
},
{
"grad_norm": 0.17597949504852295,
"learning_rate": 9.428557671732034e-05,
"loss": 0.0154,
"step": 15680
},
{
"grad_norm": 0.16778236627578735,
"learning_rate": 9.42759779310014e-05,
"loss": 0.0142,
"step": 15690
},
{
"grad_norm": 0.15050950646400452,
"learning_rate": 9.426637157912879e-05,
"loss": 0.0127,
"step": 15700
},
{
"grad_norm": 0.241947203874588,
"learning_rate": 9.425675766334397e-05,
"loss": 0.0149,
"step": 15710
},
{
"grad_norm": 0.2014119029045105,
"learning_rate": 9.424713618528968e-05,
"loss": 0.0148,
"step": 15720
},
{
"grad_norm": 0.18093015253543854,
"learning_rate": 9.423750714661e-05,
"loss": 0.0165,
"step": 15730
},
{
"grad_norm": 0.17958128452301025,
"learning_rate": 9.422787054895022e-05,
"loss": 0.0161,
"step": 15740
},
{
"grad_norm": 0.20761585235595703,
"learning_rate": 9.4218226393957e-05,
"loss": 0.0152,
"step": 15750
},
{
"grad_norm": 0.23117801547050476,
"learning_rate": 9.420857468327828e-05,
"loss": 0.0185,
"step": 15760
},
{
"grad_norm": 0.17901940643787384,
"learning_rate": 9.419891541856323e-05,
"loss": 0.0168,
"step": 15770
},
{
"grad_norm": 0.19466669857501984,
"learning_rate": 9.41892486014624e-05,
"loss": 0.0158,
"step": 15780
},
{
"grad_norm": 0.18765583634376526,
"learning_rate": 9.417957423362756e-05,
"loss": 0.0148,
"step": 15790
},
{
"grad_norm": 0.224447101354599,
"learning_rate": 9.416989231671178e-05,
"loss": 0.014,
"step": 15800
},
{
"grad_norm": 0.20261625945568085,
"learning_rate": 9.416020285236946e-05,
"loss": 0.0128,
"step": 15810
},
{
"grad_norm": 0.143612340092659,
"learning_rate": 9.415050584225626e-05,
"loss": 0.0192,
"step": 15820
},
{
"grad_norm": 0.22563102841377258,
"learning_rate": 9.414080128802914e-05,
"loss": 0.0137,
"step": 15830
},
{
"grad_norm": 0.1522049754858017,
"learning_rate": 9.413108919134632e-05,
"loss": 0.0188,
"step": 15840
},
{
"grad_norm": 0.1357893943786621,
"learning_rate": 9.412136955386734e-05,
"loss": 0.0163,
"step": 15850
},
{
"grad_norm": 0.1896251142024994,
"learning_rate": 9.411164237725303e-05,
"loss": 0.0155,
"step": 15860
},
{
"grad_norm": 0.18479351699352264,
"learning_rate": 9.41019076631655e-05,
"loss": 0.0142,
"step": 15870
},
{
"grad_norm": 0.15194737911224365,
"learning_rate": 9.409216541326815e-05,
"loss": 0.0157,
"step": 15880
},
{
"grad_norm": 0.21557243168354034,
"learning_rate": 9.408241562922564e-05,
"loss": 0.0152,
"step": 15890
},
{
"grad_norm": 0.17626039683818817,
"learning_rate": 9.407265831270395e-05,
"loss": 0.0181,
"step": 15900
},
{
"grad_norm": 0.23398922383785248,
"learning_rate": 9.406289346537035e-05,
"loss": 0.0158,
"step": 15910
},
{
"grad_norm": 0.1912570744752884,
"learning_rate": 9.405312108889339e-05,
"loss": 0.0197,
"step": 15920
},
{
"grad_norm": 0.17436890304088593,
"learning_rate": 9.404334118494288e-05,
"loss": 0.0143,
"step": 15930
},
{
"grad_norm": 0.1936892718076706,
"learning_rate": 9.403355375518995e-05,
"loss": 0.016,
"step": 15940
},
{
"grad_norm": 0.2042391300201416,
"learning_rate": 9.4023758801307e-05,
"loss": 0.0177,
"step": 15950
},
{
"grad_norm": 0.1821664571762085,
"learning_rate": 9.401395632496774e-05,
"loss": 0.0154,
"step": 15960
},
{
"grad_norm": 0.13864484429359436,
"learning_rate": 9.400414632784711e-05,
"loss": 0.0142,
"step": 15970
},
{
"grad_norm": 0.17017197608947754,
"learning_rate": 9.39943288116214e-05,
"loss": 0.014,
"step": 15980
},
{
"grad_norm": 0.13913613557815552,
"learning_rate": 9.398450377796815e-05,
"loss": 0.0135,
"step": 15990
},
{
"grad_norm": 0.19775359332561493,
"learning_rate": 9.397467122856616e-05,
"loss": 0.018,
"step": 16000
},
{
"grad_norm": 0.2045111358165741,
"learning_rate": 9.396483116509558e-05,
"loss": 0.0165,
"step": 16010
},
{
"grad_norm": 0.12941493093967438,
"learning_rate": 9.39549835892378e-05,
"loss": 0.017,
"step": 16020
},
{
"grad_norm": 0.2243223935365677,
"learning_rate": 9.39451285026755e-05,
"loss": 0.0154,
"step": 16030
},
{
"grad_norm": 0.13051633536815643,
"learning_rate": 9.393526590709262e-05,
"loss": 0.0151,
"step": 16040
},
{
"grad_norm": 0.1551182121038437,
"learning_rate": 9.392539580417444e-05,
"loss": 0.0154,
"step": 16050
},
{
"grad_norm": 0.19762742519378662,
"learning_rate": 9.391551819560747e-05,
"loss": 0.0162,
"step": 16060
},
{
"grad_norm": 0.15003331005573273,
"learning_rate": 9.390563308307955e-05,
"loss": 0.0137,
"step": 16070
},
{
"grad_norm": 0.17223556339740753,
"learning_rate": 9.389574046827974e-05,
"loss": 0.0164,
"step": 16080
},
{
"grad_norm": 0.17182190716266632,
"learning_rate": 9.388584035289845e-05,
"loss": 0.0139,
"step": 16090
},
{
"grad_norm": 0.16798833012580872,
"learning_rate": 9.387593273862732e-05,
"loss": 0.015,
"step": 16100
},
{
"grad_norm": 0.21085280179977417,
"learning_rate": 9.386601762715929e-05,
"loss": 0.0137,
"step": 16110
},
{
"grad_norm": 0.20345155894756317,
"learning_rate": 9.38560950201886e-05,
"loss": 0.0132,
"step": 16120
},
{
"grad_norm": 0.19954214990139008,
"learning_rate": 9.384616491941071e-05,
"loss": 0.0158,
"step": 16130
},
{
"grad_norm": 0.1865217238664627,
"learning_rate": 9.383622732652245e-05,
"loss": 0.0143,
"step": 16140
},
{
"grad_norm": 0.20265163481235504,
"learning_rate": 9.382628224322187e-05,
"loss": 0.0138,
"step": 16150
},
{
"grad_norm": 0.19903923571109772,
"learning_rate": 9.381632967120829e-05,
"loss": 0.0154,
"step": 16160
},
{
"grad_norm": 0.15950298309326172,
"learning_rate": 9.380636961218235e-05,
"loss": 0.0139,
"step": 16170
},
{
"grad_norm": 0.15036417543888092,
"learning_rate": 9.379640206784597e-05,
"loss": 0.0158,
"step": 16180
},
{
"grad_norm": 0.18149831891059875,
"learning_rate": 9.378642703990229e-05,
"loss": 0.0151,
"step": 16190
},
{
"grad_norm": 0.17164242267608643,
"learning_rate": 9.37764445300558e-05,
"loss": 0.0147,
"step": 16200
},
{
"grad_norm": 0.1989191621541977,
"learning_rate": 9.376645454001222e-05,
"loss": 0.0144,
"step": 16210
},
{
"grad_norm": 0.17502199113368988,
"learning_rate": 9.375645707147858e-05,
"loss": 0.0158,
"step": 16220
},
{
"grad_norm": 0.12843117117881775,
"learning_rate": 9.374645212616316e-05,
"loss": 0.0142,
"step": 16230
},
{
"grad_norm": 0.23635591566562653,
"learning_rate": 9.373643970577555e-05,
"loss": 0.016,
"step": 16240
},
{
"grad_norm": 0.1574121117591858,
"learning_rate": 9.372641981202659e-05,
"loss": 0.0124,
"step": 16250
},
{
"grad_norm": 0.19652670621871948,
"learning_rate": 9.37163924466284e-05,
"loss": 0.0184,
"step": 16260
},
{
"grad_norm": 0.1898224800825119,
"learning_rate": 9.370635761129438e-05,
"loss": 0.0157,
"step": 16270
},
{
"grad_norm": 0.19329339265823364,
"learning_rate": 9.36963153077392e-05,
"loss": 0.0181,
"step": 16280
},
{
"grad_norm": 0.14157329499721527,
"learning_rate": 9.368626553767888e-05,
"loss": 0.0131,
"step": 16290
},
{
"grad_norm": 0.21744973957538605,
"learning_rate": 9.367620830283057e-05,
"loss": 0.0157,
"step": 16300
},
{
"grad_norm": 0.17362214624881744,
"learning_rate": 9.366614360491281e-05,
"loss": 0.0155,
"step": 16310
},
{
"grad_norm": 0.1637934446334839,
"learning_rate": 9.365607144564539e-05,
"loss": 0.0142,
"step": 16320
},
{
"grad_norm": 0.19428077340126038,
"learning_rate": 9.364599182674934e-05,
"loss": 0.0139,
"step": 16330
},
{
"grad_norm": 0.1941431313753128,
"learning_rate": 9.3635904749947e-05,
"loss": 0.0128,
"step": 16340
},
{
"grad_norm": 0.1572149097919464,
"learning_rate": 9.362581021696202e-05,
"loss": 0.0155,
"step": 16350
},
{
"grad_norm": 0.1676313579082489,
"learning_rate": 9.361570822951921e-05,
"loss": 0.0172,
"step": 16360
},
{
"grad_norm": 0.1637125164270401,
"learning_rate": 9.360559878934476e-05,
"loss": 0.0126,
"step": 16370
},
{
"grad_norm": 0.23842716217041016,
"learning_rate": 9.359548189816611e-05,
"loss": 0.0131,
"step": 16380
},
{
"grad_norm": 0.27230238914489746,
"learning_rate": 9.358535755771193e-05,
"loss": 0.015,
"step": 16390
},
{
"grad_norm": 0.16588149964809418,
"learning_rate": 9.357522576971221e-05,
"loss": 0.0175,
"step": 16400
},
{
"grad_norm": 0.1918477714061737,
"learning_rate": 9.356508653589819e-05,
"loss": 0.0157,
"step": 16410
},
{
"grad_norm": 0.20776967704296112,
"learning_rate": 9.355493985800237e-05,
"loss": 0.0161,
"step": 16420
},
{
"grad_norm": 0.2630099356174469,
"learning_rate": 9.354478573775857e-05,
"loss": 0.0122,
"step": 16430
},
{
"grad_norm": 0.19244301319122314,
"learning_rate": 9.353462417690186e-05,
"loss": 0.0136,
"step": 16440
},
{
"grad_norm": 0.24064555764198303,
"learning_rate": 9.352445517716853e-05,
"loss": 0.0152,
"step": 16450
},
{
"grad_norm": 0.2524726688861847,
"learning_rate": 9.351427874029621e-05,
"loss": 0.0172,
"step": 16460
},
{
"grad_norm": 0.15851224958896637,
"learning_rate": 9.350409486802379e-05,
"loss": 0.0142,
"step": 16470
},
{
"grad_norm": 0.19096924364566803,
"learning_rate": 9.349390356209138e-05,
"loss": 0.014,
"step": 16480
},
{
"grad_norm": 0.1291278898715973,
"learning_rate": 9.348370482424042e-05,
"loss": 0.0134,
"step": 16490
},
{
"grad_norm": 0.1367338001728058,
"learning_rate": 9.347349865621357e-05,
"loss": 0.0133,
"step": 16500
},
{
"grad_norm": 0.201796293258667,
"learning_rate": 9.346328505975481e-05,
"loss": 0.0113,
"step": 16510
},
{
"grad_norm": 0.1760982722043991,
"learning_rate": 9.345306403660936e-05,
"loss": 0.0152,
"step": 16520
},
{
"grad_norm": 0.16534508764743805,
"learning_rate": 9.344283558852371e-05,
"loss": 0.0152,
"step": 16530
},
{
"grad_norm": 0.16075266897678375,
"learning_rate": 9.343259971724563e-05,
"loss": 0.0141,
"step": 16540
},
{
"grad_norm": 0.18174687027931213,
"learning_rate": 9.342235642452413e-05,
"loss": 0.0123,
"step": 16550
},
{
"grad_norm": 0.2342667430639267,
"learning_rate": 9.341210571210954e-05,
"loss": 0.0157,
"step": 16560
},
{
"grad_norm": 0.18194811046123505,
"learning_rate": 9.340184758175338e-05,
"loss": 0.0152,
"step": 16570
},
{
"grad_norm": 0.16675084829330444,
"learning_rate": 9.339158203520854e-05,
"loss": 0.0133,
"step": 16580
},
{
"grad_norm": 0.132276251912117,
"learning_rate": 9.338130907422908e-05,
"loss": 0.0143,
"step": 16590
},
{
"grad_norm": 0.15153101086616516,
"learning_rate": 9.337102870057037e-05,
"loss": 0.0141,
"step": 16600
},
{
"grad_norm": 0.1930789202451706,
"learning_rate": 9.336074091598907e-05,
"loss": 0.0142,
"step": 16610
},
{
"grad_norm": 0.22428837418556213,
"learning_rate": 9.335044572224306e-05,
"loss": 0.0136,
"step": 16620
},
{
"grad_norm": 0.198419988155365,
"learning_rate": 9.334014312109151e-05,
"loss": 0.0168,
"step": 16630
},
{
"grad_norm": 0.21213442087173462,
"learning_rate": 9.332983311429486e-05,
"loss": 0.0149,
"step": 16640
},
{
"grad_norm": 0.1644720733165741,
"learning_rate": 9.33195157036148e-05,
"loss": 0.0187,
"step": 16650
},
{
"grad_norm": 0.13675765693187714,
"learning_rate": 9.330919089081432e-05,
"loss": 0.0127,
"step": 16660
},
{
"grad_norm": 0.18107080459594727,
"learning_rate": 9.32988586776576e-05,
"loss": 0.0166,
"step": 16670
},
{
"grad_norm": 0.12249067425727844,
"learning_rate": 9.328851906591016e-05,
"loss": 0.0124,
"step": 16680
},
{
"grad_norm": 0.15982289612293243,
"learning_rate": 9.327817205733875e-05,
"loss": 0.0125,
"step": 16690
},
{
"grad_norm": 0.14900913834571838,
"learning_rate": 9.326781765371142e-05,
"loss": 0.013,
"step": 16700
},
{
"grad_norm": 0.10367929190397263,
"learning_rate": 9.325745585679741e-05,
"loss": 0.0142,
"step": 16710
},
{
"grad_norm": 0.14036177098751068,
"learning_rate": 9.32470866683673e-05,
"loss": 0.0121,
"step": 16720
},
{
"grad_norm": 0.2109682857990265,
"learning_rate": 9.323671009019288e-05,
"loss": 0.0128,
"step": 16730
},
{
"grad_norm": 0.1827767938375473,
"learning_rate": 9.322632612404725e-05,
"loss": 0.0167,
"step": 16740
},
{
"grad_norm": 0.2175469547510147,
"learning_rate": 9.321593477170471e-05,
"loss": 0.0156,
"step": 16750
},
{
"grad_norm": 0.16873490810394287,
"learning_rate": 9.320553603494088e-05,
"loss": 0.0153,
"step": 16760
},
{
"grad_norm": 0.206427663564682,
"learning_rate": 9.319512991553261e-05,
"loss": 0.0163,
"step": 16770
},
{
"grad_norm": 0.20298726856708527,
"learning_rate": 9.318471641525803e-05,
"loss": 0.0158,
"step": 16780
},
{
"grad_norm": 0.1599826216697693,
"learning_rate": 9.317429553589652e-05,
"loss": 0.0137,
"step": 16790
},
{
"grad_norm": 0.15837444365024567,
"learning_rate": 9.316386727922873e-05,
"loss": 0.0143,
"step": 16800
},
{
"grad_norm": 0.1554252803325653,
"learning_rate": 9.315343164703656e-05,
"loss": 0.0111,
"step": 16810
},
{
"grad_norm": 0.20519639551639557,
"learning_rate": 9.314298864110316e-05,
"loss": 0.014,
"step": 16820
},
{
"grad_norm": 0.19669020175933838,
"learning_rate": 9.313253826321295e-05,
"loss": 0.0145,
"step": 16830
},
{
"grad_norm": 0.20008881390094757,
"learning_rate": 9.312208051515165e-05,
"loss": 0.013,
"step": 16840
},
{
"grad_norm": 0.17566107213497162,
"learning_rate": 9.311161539870618e-05,
"loss": 0.0163,
"step": 16850
},
{
"grad_norm": 0.20041196048259735,
"learning_rate": 9.310114291566474e-05,
"loss": 0.0135,
"step": 16860
},
{
"grad_norm": 0.16520047187805176,
"learning_rate": 9.309066306781679e-05,
"loss": 0.014,
"step": 16870
},
{
"grad_norm": 0.18632209300994873,
"learning_rate": 9.308017585695306e-05,
"loss": 0.0148,
"step": 16880
},
{
"grad_norm": 0.22278279066085815,
"learning_rate": 9.306968128486552e-05,
"loss": 0.0133,
"step": 16890
},
{
"grad_norm": 0.1548001766204834,
"learning_rate": 9.30591793533474e-05,
"loss": 0.014,
"step": 16900
},
{
"grad_norm": 0.17120777070522308,
"learning_rate": 9.304867006419321e-05,
"loss": 0.0125,
"step": 16910
},
{
"grad_norm": 0.2500787079334259,
"learning_rate": 9.303815341919868e-05,
"loss": 0.0179,
"step": 16920
},
{
"grad_norm": 0.19493171572685242,
"learning_rate": 9.302762942016084e-05,
"loss": 0.0133,
"step": 16930
},
{
"grad_norm": 0.1815623939037323,
"learning_rate": 9.301709806887792e-05,
"loss": 0.0132,
"step": 16940
},
{
"grad_norm": 0.1313970983028412,
"learning_rate": 9.300655936714948e-05,
"loss": 0.0157,
"step": 16950
},
{
"grad_norm": 0.14121149480342865,
"learning_rate": 9.299601331677627e-05,
"loss": 0.0168,
"step": 16960
},
{
"grad_norm": 0.2180827260017395,
"learning_rate": 9.298545991956033e-05,
"loss": 0.0169,
"step": 16970
},
{
"grad_norm": 0.17011718451976776,
"learning_rate": 9.297489917730493e-05,
"loss": 0.0144,
"step": 16980
},
{
"grad_norm": 0.1423613578081131,
"learning_rate": 9.296433109181464e-05,
"loss": 0.0132,
"step": 16990
},
{
"grad_norm": 0.14495424926280975,
"learning_rate": 9.295375566489523e-05,
"loss": 0.0153,
"step": 17000
},
{
"grad_norm": 0.2507295608520508,
"learning_rate": 9.294317289835379e-05,
"loss": 0.0188,
"step": 17010
},
{
"grad_norm": 0.1876184493303299,
"learning_rate": 9.293258279399859e-05,
"loss": 0.0159,
"step": 17020
},
{
"grad_norm": 0.21413366496562958,
"learning_rate": 9.292198535363919e-05,
"loss": 0.0139,
"step": 17030
},
{
"grad_norm": 0.19295363128185272,
"learning_rate": 9.291138057908641e-05,
"loss": 0.0154,
"step": 17040
},
{
"grad_norm": 0.2160753458738327,
"learning_rate": 9.290076847215234e-05,
"loss": 0.0154,
"step": 17050
},
{
"grad_norm": 0.1723812073469162,
"learning_rate": 9.289014903465025e-05,
"loss": 0.0146,
"step": 17060
},
{
"grad_norm": 0.21360719203948975,
"learning_rate": 9.287952226839475e-05,
"loss": 0.0152,
"step": 17070
},
{
"grad_norm": 0.22365033626556396,
"learning_rate": 9.286888817520164e-05,
"loss": 0.0123,
"step": 17080
},
{
"grad_norm": 0.16959181427955627,
"learning_rate": 9.285824675688803e-05,
"loss": 0.0163,
"step": 17090
},
{
"grad_norm": 0.16515499353408813,
"learning_rate": 9.28475980152722e-05,
"loss": 0.0126,
"step": 17100
},
{
"grad_norm": 0.21997210383415222,
"learning_rate": 9.283694195217379e-05,
"loss": 0.0128,
"step": 17110
},
{
"grad_norm": 0.1831325888633728,
"learning_rate": 9.282627856941356e-05,
"loss": 0.0142,
"step": 17120
},
{
"grad_norm": 0.23816367983818054,
"learning_rate": 9.281560786881363e-05,
"loss": 0.0139,
"step": 17130
},
{
"grad_norm": 0.23735882341861725,
"learning_rate": 9.280492985219733e-05,
"loss": 0.0161,
"step": 17140
},
{
"grad_norm": 0.20103737711906433,
"learning_rate": 9.279424452138924e-05,
"loss": 0.0149,
"step": 17150
},
{
"grad_norm": 0.15094976127147675,
"learning_rate": 9.278355187821517e-05,
"loss": 0.0159,
"step": 17160
},
{
"grad_norm": 0.20321892201900482,
"learning_rate": 9.277285192450224e-05,
"loss": 0.0123,
"step": 17170
},
{
"grad_norm": 0.22879266738891602,
"learning_rate": 9.276214466207875e-05,
"loss": 0.0138,
"step": 17180
},
{
"grad_norm": 0.25783663988113403,
"learning_rate": 9.275143009277427e-05,
"loss": 0.0187,
"step": 17190
},
{
"grad_norm": 0.21469669044017792,
"learning_rate": 9.274070821841964e-05,
"loss": 0.0135,
"step": 17200
},
{
"grad_norm": 0.1551484763622284,
"learning_rate": 9.272997904084696e-05,
"loss": 0.0129,
"step": 17210
},
{
"grad_norm": 0.16763684153556824,
"learning_rate": 9.271924256188951e-05,
"loss": 0.0148,
"step": 17220
},
{
"grad_norm": 0.21348515152931213,
"learning_rate": 9.270849878338189e-05,
"loss": 0.0177,
"step": 17230
},
{
"grad_norm": 0.16984668374061584,
"learning_rate": 9.269774770715991e-05,
"loss": 0.0157,
"step": 17240
},
{
"grad_norm": 0.18757328391075134,
"learning_rate": 9.268698933506061e-05,
"loss": 0.0141,
"step": 17250
},
{
"grad_norm": 0.18285399675369263,
"learning_rate": 9.267622366892235e-05,
"loss": 0.0151,
"step": 17260
},
{
"grad_norm": 0.21176794171333313,
"learning_rate": 9.266545071058465e-05,
"loss": 0.0146,
"step": 17270
},
{
"grad_norm": 0.18544535338878632,
"learning_rate": 9.265467046188833e-05,
"loss": 0.0143,
"step": 17280
},
{
"grad_norm": 0.16830794513225555,
"learning_rate": 9.264388292467543e-05,
"loss": 0.0133,
"step": 17290
},
{
"grad_norm": 0.2336021363735199,
"learning_rate": 9.263308810078926e-05,
"loss": 0.0151,
"step": 17300
},
{
"grad_norm": 0.17986004054546356,
"learning_rate": 9.262228599207434e-05,
"loss": 0.0131,
"step": 17310
},
{
"grad_norm": 0.18517930805683136,
"learning_rate": 9.261147660037647e-05,
"loss": 0.0153,
"step": 17320
},
{
"grad_norm": 0.21676015853881836,
"learning_rate": 9.26006599275427e-05,
"loss": 0.0173,
"step": 17330
},
{
"grad_norm": 0.18562880158424377,
"learning_rate": 9.258983597542124e-05,
"loss": 0.0144,
"step": 17340
},
{
"grad_norm": 0.1738789826631546,
"learning_rate": 9.257900474586167e-05,
"loss": 0.0159,
"step": 17350
},
{
"grad_norm": 0.16082869470119476,
"learning_rate": 9.256816624071471e-05,
"loss": 0.014,
"step": 17360
},
{
"grad_norm": 0.16219903528690338,
"learning_rate": 9.25573204618324e-05,
"loss": 0.0126,
"step": 17370
},
{
"grad_norm": 0.1800624579191208,
"learning_rate": 9.254646741106796e-05,
"loss": 0.0147,
"step": 17380
},
{
"grad_norm": 0.13241958618164062,
"learning_rate": 9.253560709027589e-05,
"loss": 0.0142,
"step": 17390
},
{
"grad_norm": 0.19069504737854004,
"learning_rate": 9.252473950131192e-05,
"loss": 0.0154,
"step": 17400
},
{
"grad_norm": 0.19009700417518616,
"learning_rate": 9.251386464603302e-05,
"loss": 0.0152,
"step": 17410
},
{
"grad_norm": 0.18059396743774414,
"learning_rate": 9.250298252629741e-05,
"loss": 0.0133,
"step": 17420
},
{
"grad_norm": 0.2055145800113678,
"learning_rate": 9.249209314396454e-05,
"loss": 0.0146,
"step": 17430
},
{
"grad_norm": 0.1910519003868103,
"learning_rate": 9.248119650089513e-05,
"loss": 0.0146,
"step": 17440
},
{
"grad_norm": 0.18890254199504852,
"learning_rate": 9.247029259895108e-05,
"loss": 0.0169,
"step": 17450
},
{
"grad_norm": 0.1751728057861328,
"learning_rate": 9.24593814399956e-05,
"loss": 0.0135,
"step": 17460
},
{
"grad_norm": 0.14816506206989288,
"learning_rate": 9.244846302589309e-05,
"loss": 0.0158,
"step": 17470
},
{
"grad_norm": 0.18479114770889282,
"learning_rate": 9.243753735850923e-05,
"loss": 0.0144,
"step": 17480
},
{
"grad_norm": 0.11731395870447159,
"learning_rate": 9.24266044397109e-05,
"loss": 0.0155,
"step": 17490
},
{
"grad_norm": 0.18621566891670227,
"learning_rate": 9.241566427136624e-05,
"loss": 0.0148,
"step": 17500
},
{
"grad_norm": 0.1920589804649353,
"learning_rate": 9.240471685534463e-05,
"loss": 0.0157,
"step": 17510
},
{
"grad_norm": 0.13983657956123352,
"learning_rate": 9.239376219351667e-05,
"loss": 0.0123,
"step": 17520
},
{
"grad_norm": 0.2200879007577896,
"learning_rate": 9.238280028775425e-05,
"loss": 0.0144,
"step": 17530
},
{
"grad_norm": 0.18812744319438934,
"learning_rate": 9.237183113993041e-05,
"loss": 0.0133,
"step": 17540
},
{
"grad_norm": 0.16559596359729767,
"learning_rate": 9.236085475191952e-05,
"loss": 0.013,
"step": 17550
},
{
"grad_norm": 0.12157084792852402,
"learning_rate": 9.234987112559709e-05,
"loss": 0.0144,
"step": 17560
},
{
"grad_norm": 0.18532204627990723,
"learning_rate": 9.233888026283999e-05,
"loss": 0.0147,
"step": 17570
},
{
"grad_norm": 0.19556936621665955,
"learning_rate": 9.232788216552619e-05,
"loss": 0.0148,
"step": 17580
},
{
"grad_norm": 0.13391506671905518,
"learning_rate": 9.231687683553502e-05,
"loss": 0.0144,
"step": 17590
},
{
"grad_norm": 0.16114942729473114,
"learning_rate": 9.230586427474698e-05,
"loss": 0.0117,
"step": 17600
},
{
"grad_norm": 0.13999807834625244,
"learning_rate": 9.229484448504379e-05,
"loss": 0.0138,
"step": 17610
},
{
"grad_norm": 0.13610270619392395,
"learning_rate": 9.228381746830843e-05,
"loss": 0.0122,
"step": 17620
},
{
"grad_norm": 0.17827920615673065,
"learning_rate": 9.227278322642514e-05,
"loss": 0.0147,
"step": 17630
},
{
"grad_norm": 0.15096025168895721,
"learning_rate": 9.226174176127937e-05,
"loss": 0.0127,
"step": 17640
},
{
"grad_norm": 0.21146535873413086,
"learning_rate": 9.22506930747578e-05,
"loss": 0.0113,
"step": 17650
},
{
"grad_norm": 0.22395026683807373,
"learning_rate": 9.223963716874831e-05,
"loss": 0.011,
"step": 17660
},
{
"grad_norm": 0.14799124002456665,
"learning_rate": 9.222857404514012e-05,
"loss": 0.0135,
"step": 17670
},
{
"grad_norm": 0.17478370666503906,
"learning_rate": 9.221750370582355e-05,
"loss": 0.0163,
"step": 17680
},
{
"grad_norm": 0.17225725948810577,
"learning_rate": 9.220642615269028e-05,
"loss": 0.0139,
"step": 17690
},
{
"grad_norm": 0.1988813281059265,
"learning_rate": 9.219534138763311e-05,
"loss": 0.0137,
"step": 17700
},
{
"grad_norm": 0.16782264411449432,
"learning_rate": 9.218424941254613e-05,
"loss": 0.0131,
"step": 17710
},
{
"grad_norm": 0.19038794934749603,
"learning_rate": 9.217315022932468e-05,
"loss": 0.0156,
"step": 17720
},
{
"grad_norm": 0.15249748528003693,
"learning_rate": 9.216204383986528e-05,
"loss": 0.0139,
"step": 17730
},
{
"grad_norm": 0.14543838798999786,
"learning_rate": 9.215093024606574e-05,
"loss": 0.0163,
"step": 17740
},
{
"grad_norm": 0.1764708012342453,
"learning_rate": 9.213980944982506e-05,
"loss": 0.0124,
"step": 17750
},
{
"grad_norm": 0.15326310694217682,
"learning_rate": 9.212868145304346e-05,
"loss": 0.0179,
"step": 17760
},
{
"grad_norm": 0.1893349587917328,
"learning_rate": 9.211754625762241e-05,
"loss": 0.0135,
"step": 17770
},
{
"grad_norm": 0.13428562879562378,
"learning_rate": 9.210640386546463e-05,
"loss": 0.0138,
"step": 17780
},
{
"grad_norm": 0.14793631434440613,
"learning_rate": 9.209525427847405e-05,
"loss": 0.0146,
"step": 17790
},
{
"grad_norm": 0.16333283483982086,
"learning_rate": 9.208409749855583e-05,
"loss": 0.0128,
"step": 17800
},
{
"grad_norm": 0.2120400369167328,
"learning_rate": 9.207293352761633e-05,
"loss": 0.0181,
"step": 17810
},
{
"grad_norm": 0.15726876258850098,
"learning_rate": 9.206176236756319e-05,
"loss": 0.0151,
"step": 17820
},
{
"grad_norm": 0.16201718151569366,
"learning_rate": 9.205058402030525e-05,
"loss": 0.0126,
"step": 17830
},
{
"grad_norm": 0.13875119388103485,
"learning_rate": 9.203939848775259e-05,
"loss": 0.0134,
"step": 17840
},
{
"grad_norm": 0.12077285349369049,
"learning_rate": 9.202820577181652e-05,
"loss": 0.0134,
"step": 17850
},
{
"grad_norm": 0.25868210196495056,
"learning_rate": 9.201700587440953e-05,
"loss": 0.014,
"step": 17860
},
{
"grad_norm": 0.21567592024803162,
"learning_rate": 9.200579879744544e-05,
"loss": 0.0181,
"step": 17870
},
{
"grad_norm": 0.24580392241477966,
"learning_rate": 9.199458454283918e-05,
"loss": 0.0133,
"step": 17880
},
{
"grad_norm": 0.20059938728809357,
"learning_rate": 9.198336311250697e-05,
"loss": 0.0157,
"step": 17890
},
{
"grad_norm": 0.20259112119674683,
"learning_rate": 9.197213450836626e-05,
"loss": 0.0151,
"step": 17900
},
{
"grad_norm": 0.2202037274837494,
"learning_rate": 9.19608987323357e-05,
"loss": 0.0161,
"step": 17910
},
{
"grad_norm": 0.16498060524463654,
"learning_rate": 9.194965578633517e-05,
"loss": 0.0161,
"step": 17920
},
{
"grad_norm": 0.15870080888271332,
"learning_rate": 9.193840567228582e-05,
"loss": 0.0129,
"step": 17930
},
{
"grad_norm": 0.17275480926036835,
"learning_rate": 9.192714839210994e-05,
"loss": 0.0163,
"step": 17940
},
{
"grad_norm": 0.190365269780159,
"learning_rate": 9.19158839477311e-05,
"loss": 0.0121,
"step": 17950
},
{
"grad_norm": 0.2227722406387329,
"learning_rate": 9.190461234107411e-05,
"loss": 0.0174,
"step": 17960
},
{
"grad_norm": 0.21828261017799377,
"learning_rate": 9.189333357406496e-05,
"loss": 0.0152,
"step": 17970
},
{
"grad_norm": 0.16810466349124908,
"learning_rate": 9.188204764863089e-05,
"loss": 0.0145,
"step": 17980
},
{
"grad_norm": 0.180048868060112,
"learning_rate": 9.187075456670033e-05,
"loss": 0.0152,
"step": 17990
},
{
"grad_norm": 0.16667920351028442,
"learning_rate": 9.1859454330203e-05,
"loss": 0.0135,
"step": 18000
},
{
"grad_norm": 0.2036157101392746,
"learning_rate": 9.18481469410698e-05,
"loss": 0.0155,
"step": 18010
},
{
"grad_norm": 0.1677679866552353,
"learning_rate": 9.183683240123281e-05,
"loss": 0.0117,
"step": 18020
},
{
"grad_norm": 0.20066669583320618,
"learning_rate": 9.182551071262541e-05,
"loss": 0.013,
"step": 18030
},
{
"grad_norm": 0.17513668537139893,
"learning_rate": 9.181418187718218e-05,
"loss": 0.0131,
"step": 18040
},
{
"grad_norm": 0.16238491237163544,
"learning_rate": 9.180284589683888e-05,
"loss": 0.015,
"step": 18050
},
{
"grad_norm": 0.1820688396692276,
"learning_rate": 9.17915027735325e-05,
"loss": 0.0157,
"step": 18060
},
{
"grad_norm": 0.19918294250965118,
"learning_rate": 9.178015250920133e-05,
"loss": 0.0167,
"step": 18070
},
{
"grad_norm": 0.17456094920635223,
"learning_rate": 9.176879510578477e-05,
"loss": 0.0136,
"step": 18080
},
{
"grad_norm": 0.202936589717865,
"learning_rate": 9.17574305652235e-05,
"loss": 0.0157,
"step": 18090
},
{
"grad_norm": 0.16613461077213287,
"learning_rate": 9.174605888945942e-05,
"loss": 0.0153,
"step": 18100
},
{
"grad_norm": 0.1491008847951889,
"learning_rate": 9.173468008043564e-05,
"loss": 0.0132,
"step": 18110
},
{
"grad_norm": 0.189555361866951,
"learning_rate": 9.172329414009648e-05,
"loss": 0.0195,
"step": 18120
},
{
"grad_norm": 0.18217122554779053,
"learning_rate": 9.171190107038747e-05,
"loss": 0.0146,
"step": 18130
},
{
"grad_norm": 0.2065577208995819,
"learning_rate": 9.170050087325541e-05,
"loss": 0.0122,
"step": 18140
},
{
"grad_norm": 0.17024259269237518,
"learning_rate": 9.168909355064824e-05,
"loss": 0.0123,
"step": 18150
},
{
"grad_norm": 0.25083598494529724,
"learning_rate": 9.167767910451519e-05,
"loss": 0.0135,
"step": 18160
},
{
"grad_norm": 0.1365453153848648,
"learning_rate": 9.166625753680669e-05,
"loss": 0.0128,
"step": 18170
},
{
"grad_norm": 0.1414394974708557,
"learning_rate": 9.165482884947431e-05,
"loss": 0.0122,
"step": 18180
},
{
"grad_norm": 0.12690173089504242,
"learning_rate": 9.164339304447098e-05,
"loss": 0.014,
"step": 18190
},
{
"grad_norm": 0.17947471141815186,
"learning_rate": 9.163195012375072e-05,
"loss": 0.015,
"step": 18200
},
{
"grad_norm": 0.16831102967262268,
"learning_rate": 9.16205000892688e-05,
"loss": 0.0131,
"step": 18210
},
{
"grad_norm": 0.15917395055294037,
"learning_rate": 9.160904294298175e-05,
"loss": 0.0137,
"step": 18220
},
{
"grad_norm": 0.18532268702983856,
"learning_rate": 9.159757868684727e-05,
"loss": 0.0155,
"step": 18230
},
{
"grad_norm": 0.16364696621894836,
"learning_rate": 9.15861073228243e-05,
"loss": 0.0155,
"step": 18240
},
{
"grad_norm": 0.19000813364982605,
"learning_rate": 9.157462885287296e-05,
"loss": 0.0147,
"step": 18250
},
{
"grad_norm": 0.16765892505645752,
"learning_rate": 9.156314327895461e-05,
"loss": 0.0149,
"step": 18260
},
{
"grad_norm": 0.1955818086862564,
"learning_rate": 9.155165060303185e-05,
"loss": 0.0158,
"step": 18270
},
{
"grad_norm": 0.19901178777217865,
"learning_rate": 9.154015082706841e-05,
"loss": 0.0154,
"step": 18280
},
{
"grad_norm": 0.22178158164024353,
"learning_rate": 9.152864395302936e-05,
"loss": 0.013,
"step": 18290
},
{
"grad_norm": 0.13203920423984528,
"learning_rate": 9.151712998288085e-05,
"loss": 0.0122,
"step": 18300
},
{
"grad_norm": 0.20603708922863007,
"learning_rate": 9.150560891859031e-05,
"loss": 0.0184,
"step": 18310
},
{
"grad_norm": 0.13683779537677765,
"learning_rate": 9.14940807621264e-05,
"loss": 0.0126,
"step": 18320
},
{
"grad_norm": 0.2200845330953598,
"learning_rate": 9.148254551545894e-05,
"loss": 0.0177,
"step": 18330
},
{
"grad_norm": 0.1949702948331833,
"learning_rate": 9.147100318055901e-05,
"loss": 0.015,
"step": 18340
},
{
"grad_norm": 0.1641172617673874,
"learning_rate": 9.145945375939888e-05,
"loss": 0.0136,
"step": 18350
},
{
"grad_norm": 0.2542160451412201,
"learning_rate": 9.144789725395203e-05,
"loss": 0.0134,
"step": 18360
},
{
"grad_norm": 0.22699974477291107,
"learning_rate": 9.14363336661931e-05,
"loss": 0.0115,
"step": 18370
},
{
"grad_norm": 0.182538703083992,
"learning_rate": 9.142476299809806e-05,
"loss": 0.0147,
"step": 18380
},
{
"grad_norm": 0.1428644359111786,
"learning_rate": 9.1413185251644e-05,
"loss": 0.0119,
"step": 18390
},
{
"grad_norm": 0.1707461029291153,
"learning_rate": 9.140160042880923e-05,
"loss": 0.0123,
"step": 18400
},
{
"grad_norm": 0.21117953956127167,
"learning_rate": 9.139000853157327e-05,
"loss": 0.0131,
"step": 18410
},
{
"grad_norm": 0.18192517757415771,
"learning_rate": 9.137840956191688e-05,
"loss": 0.0136,
"step": 18420
},
{
"grad_norm": 0.17930957674980164,
"learning_rate": 9.136680352182199e-05,
"loss": 0.0144,
"step": 18430
},
{
"grad_norm": 0.19649390876293182,
"learning_rate": 9.135519041327177e-05,
"loss": 0.0154,
"step": 18440
},
{
"grad_norm": 0.23601211607456207,
"learning_rate": 9.134357023825058e-05,
"loss": 0.0163,
"step": 18450
},
{
"grad_norm": 0.19874411821365356,
"learning_rate": 9.133194299874398e-05,
"loss": 0.0121,
"step": 18460
},
{
"grad_norm": 0.15509752929210663,
"learning_rate": 9.132030869673876e-05,
"loss": 0.0124,
"step": 18470
},
{
"grad_norm": 0.1924155354499817,
"learning_rate": 9.130866733422288e-05,
"loss": 0.012,
"step": 18480
},
{
"grad_norm": 0.1527821570634842,
"learning_rate": 9.129701891318556e-05,
"loss": 0.0166,
"step": 18490
},
{
"grad_norm": 0.1380731761455536,
"learning_rate": 9.128536343561718e-05,
"loss": 0.0129,
"step": 18500
},
{
"grad_norm": 0.18899448215961456,
"learning_rate": 9.127370090350934e-05,
"loss": 0.0139,
"step": 18510
},
{
"grad_norm": 0.16718028485774994,
"learning_rate": 9.126203131885487e-05,
"loss": 0.0145,
"step": 18520
},
{
"grad_norm": 0.12178225070238113,
"learning_rate": 9.125035468364775e-05,
"loss": 0.0122,
"step": 18530
},
{
"grad_norm": 0.2568337321281433,
"learning_rate": 9.123867099988322e-05,
"loss": 0.0135,
"step": 18540
},
{
"grad_norm": 0.16624175012111664,
"learning_rate": 9.122698026955769e-05,
"loss": 0.0155,
"step": 18550
},
{
"grad_norm": 0.1602160781621933,
"learning_rate": 9.12152824946688e-05,
"loss": 0.0136,
"step": 18560
},
{
"grad_norm": 0.1256667822599411,
"learning_rate": 9.120357767721538e-05,
"loss": 0.0123,
"step": 18570
},
{
"grad_norm": 0.1104787215590477,
"learning_rate": 9.119186581919745e-05,
"loss": 0.012,
"step": 18580
},
{
"grad_norm": 0.2000863254070282,
"learning_rate": 9.118014692261624e-05,
"loss": 0.0132,
"step": 18590
},
{
"grad_norm": 0.14768925309181213,
"learning_rate": 9.116842098947422e-05,
"loss": 0.0131,
"step": 18600
},
{
"grad_norm": 0.2148488461971283,
"learning_rate": 9.115668802177499e-05,
"loss": 0.0161,
"step": 18610
},
{
"grad_norm": 0.18289022147655487,
"learning_rate": 9.114494802152342e-05,
"loss": 0.0156,
"step": 18620
},
{
"grad_norm": 0.2317921668291092,
"learning_rate": 9.113320099072555e-05,
"loss": 0.0159,
"step": 18630
},
{
"grad_norm": 0.14073435962200165,
"learning_rate": 9.112144693138864e-05,
"loss": 0.012,
"step": 18640
},
{
"grad_norm": 0.13443489372730255,
"learning_rate": 9.110968584552111e-05,
"loss": 0.0121,
"step": 18650
},
{
"grad_norm": 0.13705019652843475,
"learning_rate": 9.109791773513264e-05,
"loss": 0.0131,
"step": 18660
},
{
"grad_norm": 0.16150884330272675,
"learning_rate": 9.108614260223403e-05,
"loss": 0.0134,
"step": 18670
},
{
"grad_norm": 0.1957915872335434,
"learning_rate": 9.107436044883738e-05,
"loss": 0.0153,
"step": 18680
},
{
"grad_norm": 0.1729505956172943,
"learning_rate": 9.10625712769559e-05,
"loss": 0.0139,
"step": 18690
},
{
"grad_norm": 0.12898528575897217,
"learning_rate": 9.105077508860406e-05,
"loss": 0.0135,
"step": 18700
},
{
"grad_norm": 0.21195413172245026,
"learning_rate": 9.103897188579751e-05,
"loss": 0.0122,
"step": 18710
},
{
"grad_norm": 0.16921821236610413,
"learning_rate": 9.102716167055308e-05,
"loss": 0.0121,
"step": 18720
},
{
"grad_norm": 0.1766607016324997,
"learning_rate": 9.10153444448888e-05,
"loss": 0.015,
"step": 18730
},
{
"grad_norm": 0.18350866436958313,
"learning_rate": 9.100352021082393e-05,
"loss": 0.0124,
"step": 18740
},
{
"grad_norm": 0.1389177292585373,
"learning_rate": 9.099168897037891e-05,
"loss": 0.0111,
"step": 18750
},
{
"grad_norm": 0.12991195917129517,
"learning_rate": 9.097985072557538e-05,
"loss": 0.0099,
"step": 18760
},
{
"grad_norm": 0.15150626003742218,
"learning_rate": 9.096800547843615e-05,
"loss": 0.0114,
"step": 18770
},
{
"grad_norm": 0.12411829829216003,
"learning_rate": 9.095615323098526e-05,
"loss": 0.0139,
"step": 18780
},
{
"grad_norm": 0.13244572281837463,
"learning_rate": 9.094429398524795e-05,
"loss": 0.012,
"step": 18790
},
{
"grad_norm": 0.19732749462127686,
"learning_rate": 9.093242774325061e-05,
"loss": 0.0141,
"step": 18800
},
{
"grad_norm": 0.1617794632911682,
"learning_rate": 9.092055450702088e-05,
"loss": 0.0118,
"step": 18810
},
{
"grad_norm": 0.1828819066286087,
"learning_rate": 9.090867427858756e-05,
"loss": 0.0165,
"step": 18820
},
{
"grad_norm": 0.15517683327198029,
"learning_rate": 9.089678705998066e-05,
"loss": 0.0138,
"step": 18830
},
{
"grad_norm": 0.21763205528259277,
"learning_rate": 9.088489285323139e-05,
"loss": 0.0136,
"step": 18840
},
{
"grad_norm": 0.1687460094690323,
"learning_rate": 9.087299166037212e-05,
"loss": 0.013,
"step": 18850
},
{
"grad_norm": 0.19683127105236053,
"learning_rate": 9.086108348343647e-05,
"loss": 0.0119,
"step": 18860
},
{
"grad_norm": 0.20528091490268707,
"learning_rate": 9.08491683244592e-05,
"loss": 0.0116,
"step": 18870
},
{
"grad_norm": 0.15028712153434753,
"learning_rate": 9.08372461854763e-05,
"loss": 0.0135,
"step": 18880
},
{
"grad_norm": 0.18136392533779144,
"learning_rate": 9.082531706852492e-05,
"loss": 0.0164,
"step": 18890
},
{
"grad_norm": 0.14097478985786438,
"learning_rate": 9.081338097564342e-05,
"loss": 0.0121,
"step": 18900
},
{
"grad_norm": 0.18584544956684113,
"learning_rate": 9.080143790887137e-05,
"loss": 0.0136,
"step": 18910
},
{
"grad_norm": 0.17267796397209167,
"learning_rate": 9.07894878702495e-05,
"loss": 0.0137,
"step": 18920
},
{
"grad_norm": 0.15958671271800995,
"learning_rate": 9.077753086181974e-05,
"loss": 0.014,
"step": 18930
},
{
"grad_norm": 0.1633959263563156,
"learning_rate": 9.076556688562524e-05,
"loss": 0.0128,
"step": 18940
},
{
"grad_norm": 0.20178668200969696,
"learning_rate": 9.075359594371029e-05,
"loss": 0.014,
"step": 18950
},
{
"grad_norm": 0.17557993531227112,
"learning_rate": 9.07416180381204e-05,
"loss": 0.0143,
"step": 18960
},
{
"grad_norm": 0.15889889001846313,
"learning_rate": 9.072963317090228e-05,
"loss": 0.0134,
"step": 18970
},
{
"grad_norm": 0.17338280379772186,
"learning_rate": 9.071764134410382e-05,
"loss": 0.0134,
"step": 18980
},
{
"grad_norm": 0.169789120554924,
"learning_rate": 9.070564255977407e-05,
"loss": 0.015,
"step": 18990
},
{
"grad_norm": 0.19041848182678223,
"learning_rate": 9.06936368199633e-05,
"loss": 0.0156,
"step": 19000
},
{
"grad_norm": 0.19369029998779297,
"learning_rate": 9.0681624126723e-05,
"loss": 0.0143,
"step": 19010
},
{
"grad_norm": 0.16650448739528656,
"learning_rate": 9.066960448210576e-05,
"loss": 0.0127,
"step": 19020
},
{
"grad_norm": 0.20635902881622314,
"learning_rate": 9.065757788816543e-05,
"loss": 0.0147,
"step": 19030
},
{
"grad_norm": 0.1792377382516861,
"learning_rate": 9.064554434695705e-05,
"loss": 0.0115,
"step": 19040
},
{
"grad_norm": 0.17120027542114258,
"learning_rate": 9.063350386053677e-05,
"loss": 0.0128,
"step": 19050
},
{
"grad_norm": 0.16933444142341614,
"learning_rate": 9.062145643096202e-05,
"loss": 0.0132,
"step": 19060
},
{
"grad_norm": 0.19243763387203217,
"learning_rate": 9.060940206029136e-05,
"loss": 0.0143,
"step": 19070
},
{
"grad_norm": 0.19466683268547058,
"learning_rate": 9.059734075058457e-05,
"loss": 0.0173,
"step": 19080
},
{
"grad_norm": 0.1642509400844574,
"learning_rate": 9.058527250390257e-05,
"loss": 0.0132,
"step": 19090
},
{
"grad_norm": 0.20794539153575897,
"learning_rate": 9.057319732230752e-05,
"loss": 0.0145,
"step": 19100
},
{
"grad_norm": 0.1727791279554367,
"learning_rate": 9.056111520786273e-05,
"loss": 0.0139,
"step": 19110
},
{
"grad_norm": 0.20383897423744202,
"learning_rate": 9.054902616263268e-05,
"loss": 0.0149,
"step": 19120
},
{
"grad_norm": 0.14843112230300903,
"learning_rate": 9.05369301886831e-05,
"loss": 0.0132,
"step": 19130
},
{
"grad_norm": 0.14206208288669586,
"learning_rate": 9.052482728808083e-05,
"loss": 0.0124,
"step": 19140
},
{
"grad_norm": 0.17277280986309052,
"learning_rate": 9.051271746289391e-05,
"loss": 0.0129,
"step": 19150
},
{
"grad_norm": 0.17671369016170502,
"learning_rate": 9.050060071519162e-05,
"loss": 0.0115,
"step": 19160
},
{
"grad_norm": 0.1999529004096985,
"learning_rate": 9.048847704704437e-05,
"loss": 0.0128,
"step": 19170
},
{
"grad_norm": 0.22170741856098175,
"learning_rate": 9.047634646052376e-05,
"loss": 0.0153,
"step": 19180
},
{
"grad_norm": 0.17207320034503937,
"learning_rate": 9.046420895770256e-05,
"loss": 0.0139,
"step": 19190
},
{
"grad_norm": 0.17688094079494476,
"learning_rate": 9.045206454065473e-05,
"loss": 0.0121,
"step": 19200
},
{
"grad_norm": 0.1338597536087036,
"learning_rate": 9.043991321145546e-05,
"loss": 0.0118,
"step": 19210
},
{
"grad_norm": 0.14638498425483704,
"learning_rate": 9.042775497218105e-05,
"loss": 0.0113,
"step": 19220
},
{
"grad_norm": 0.1648397445678711,
"learning_rate": 9.041558982490901e-05,
"loss": 0.0132,
"step": 19230
},
{
"grad_norm": 0.11956263333559036,
"learning_rate": 9.040341777171805e-05,
"loss": 0.0123,
"step": 19240
},
{
"grad_norm": 0.19842317700386047,
"learning_rate": 9.039123881468802e-05,
"loss": 0.0134,
"step": 19250
},
{
"grad_norm": 0.14097820222377777,
"learning_rate": 9.037905295589998e-05,
"loss": 0.0118,
"step": 19260
},
{
"grad_norm": 0.1730496734380722,
"learning_rate": 9.036686019743617e-05,
"loss": 0.0112,
"step": 19270
},
{
"grad_norm": 0.23227731883525848,
"learning_rate": 9.035466054137997e-05,
"loss": 0.0139,
"step": 19280
},
{
"grad_norm": 0.19463399052619934,
"learning_rate": 9.0342453989816e-05,
"loss": 0.0144,
"step": 19290
},
{
"grad_norm": 0.16545595228672028,
"learning_rate": 9.033024054483e-05,
"loss": 0.0115,
"step": 19300
},
{
"grad_norm": 0.12306548655033112,
"learning_rate": 9.031802020850894e-05,
"loss": 0.0114,
"step": 19310
},
{
"grad_norm": 0.18252205848693848,
"learning_rate": 9.030579298294092e-05,
"loss": 0.0124,
"step": 19320
},
{
"grad_norm": 0.2004423439502716,
"learning_rate": 9.029355887021524e-05,
"loss": 0.0156,
"step": 19330
},
{
"grad_norm": 0.151464581489563,
"learning_rate": 9.028131787242238e-05,
"loss": 0.0135,
"step": 19340
},
{
"grad_norm": 0.13323767483234406,
"learning_rate": 9.026906999165399e-05,
"loss": 0.0124,
"step": 19350
},
{
"grad_norm": 0.16443386673927307,
"learning_rate": 9.025681523000291e-05,
"loss": 0.013,
"step": 19360
},
{
"grad_norm": 0.19285282492637634,
"learning_rate": 9.024455358956315e-05,
"loss": 0.0123,
"step": 19370
},
{
"grad_norm": 0.21474361419677734,
"learning_rate": 9.023228507242984e-05,
"loss": 0.0136,
"step": 19380
},
{
"grad_norm": 0.20101943612098694,
"learning_rate": 9.022000968069937e-05,
"loss": 0.0137,
"step": 19390
},
{
"grad_norm": 0.17402178049087524,
"learning_rate": 9.020772741646928e-05,
"loss": 0.0136,
"step": 19400
},
{
"grad_norm": 0.22035923600196838,
"learning_rate": 9.019543828183826e-05,
"loss": 0.017,
"step": 19410
},
{
"grad_norm": 0.12223270535469055,
"learning_rate": 9.018314227890616e-05,
"loss": 0.0126,
"step": 19420
},
{
"grad_norm": 0.1647353619337082,
"learning_rate": 9.017083940977408e-05,
"loss": 0.0132,
"step": 19430
},
{
"grad_norm": 0.17604686319828033,
"learning_rate": 9.015852967654422e-05,
"loss": 0.0137,
"step": 19440
},
{
"grad_norm": 0.2017330378293991,
"learning_rate": 9.014621308131996e-05,
"loss": 0.0141,
"step": 19450
},
{
"grad_norm": 0.2677697241306305,
"learning_rate": 9.01338896262059e-05,
"loss": 0.012,
"step": 19460
},
{
"grad_norm": 0.18286541104316711,
"learning_rate": 9.012155931330777e-05,
"loss": 0.0153,
"step": 19470
},
{
"grad_norm": 0.18797534704208374,
"learning_rate": 9.010922214473246e-05,
"loss": 0.0147,
"step": 19480
},
{
"grad_norm": 0.20319977402687073,
"learning_rate": 9.009687812258808e-05,
"loss": 0.0132,
"step": 19490
},
{
"grad_norm": 0.18280743062496185,
"learning_rate": 9.00845272489839e-05,
"loss": 0.0107,
"step": 19500
},
{
"grad_norm": 0.1853189617395401,
"learning_rate": 9.007216952603031e-05,
"loss": 0.0155,
"step": 19510
},
{
"grad_norm": 0.1349164843559265,
"learning_rate": 9.005980495583894e-05,
"loss": 0.0118,
"step": 19520
},
{
"grad_norm": 0.22038881480693817,
"learning_rate": 9.004743354052252e-05,
"loss": 0.0137,
"step": 19530
},
{
"grad_norm": 0.16567367315292358,
"learning_rate": 9.003505528219503e-05,
"loss": 0.0118,
"step": 19540
},
{
"grad_norm": 0.2835250794887543,
"learning_rate": 9.002267018297154e-05,
"loss": 0.0148,
"step": 19550
},
{
"grad_norm": 0.15941092371940613,
"learning_rate": 9.001027824496834e-05,
"loss": 0.013,
"step": 19560
},
{
"grad_norm": 0.1713016778230667,
"learning_rate": 8.999787947030287e-05,
"loss": 0.0141,
"step": 19570
},
{
"grad_norm": 0.18185533583164215,
"learning_rate": 8.998547386109376e-05,
"loss": 0.0112,
"step": 19580
},
{
"grad_norm": 0.18436452746391296,
"learning_rate": 8.997306141946073e-05,
"loss": 0.0141,
"step": 19590
},
{
"grad_norm": 0.17156757414340973,
"learning_rate": 8.996064214752481e-05,
"loss": 0.0153,
"step": 19600
},
{
"grad_norm": 0.1925446093082428,
"learning_rate": 8.994821604740806e-05,
"loss": 0.0125,
"step": 19610
},
{
"grad_norm": 0.1612347662448883,
"learning_rate": 8.993578312123377e-05,
"loss": 0.0147,
"step": 19620
},
{
"grad_norm": 0.20712831616401672,
"learning_rate": 8.992334337112639e-05,
"loss": 0.0127,
"step": 19630
},
{
"grad_norm": 0.18162360787391663,
"learning_rate": 8.991089679921154e-05,
"loss": 0.0151,
"step": 19640
},
{
"grad_norm": 0.15992426872253418,
"learning_rate": 8.989844340761599e-05,
"loss": 0.0125,
"step": 19650
},
{
"grad_norm": 0.15727925300598145,
"learning_rate": 8.988598319846768e-05,
"loss": 0.0129,
"step": 19660
},
{
"grad_norm": 0.17979225516319275,
"learning_rate": 8.987351617389574e-05,
"loss": 0.0133,
"step": 19670
},
{
"grad_norm": 0.15485969185829163,
"learning_rate": 8.98610423360304e-05,
"loss": 0.0144,
"step": 19680
},
{
"grad_norm": 0.1544332504272461,
"learning_rate": 8.984856168700317e-05,
"loss": 0.0129,
"step": 19690
},
{
"grad_norm": 0.1792757511138916,
"learning_rate": 8.983607422894658e-05,
"loss": 0.0114,
"step": 19700
},
{
"grad_norm": 0.15564578771591187,
"learning_rate": 8.982357996399442e-05,
"loss": 0.0145,
"step": 19710
},
{
"grad_norm": 0.14563113451004028,
"learning_rate": 8.981107889428164e-05,
"loss": 0.0118,
"step": 19720
},
{
"grad_norm": 0.20107707381248474,
"learning_rate": 8.979857102194428e-05,
"loss": 0.0152,
"step": 19730
},
{
"grad_norm": 0.16668701171875,
"learning_rate": 8.978605634911968e-05,
"loss": 0.0132,
"step": 19740
},
{
"grad_norm": 0.17781060934066772,
"learning_rate": 8.977353487794616e-05,
"loss": 0.0129,
"step": 19750
},
{
"grad_norm": 0.13641557097434998,
"learning_rate": 8.976100661056334e-05,
"loss": 0.0119,
"step": 19760
},
{
"grad_norm": 0.21125675737857819,
"learning_rate": 8.974847154911197e-05,
"loss": 0.0148,
"step": 19770
},
{
"grad_norm": 0.19573289155960083,
"learning_rate": 8.973592969573393e-05,
"loss": 0.012,
"step": 19780
},
{
"grad_norm": 0.15342311561107635,
"learning_rate": 8.972338105257228e-05,
"loss": 0.0137,
"step": 19790
},
{
"grad_norm": 0.13895206153392792,
"learning_rate": 8.971082562177125e-05,
"loss": 0.0112,
"step": 19800
},
{
"grad_norm": 0.13383953273296356,
"learning_rate": 8.96982634054762e-05,
"loss": 0.0141,
"step": 19810
},
{
"grad_norm": 0.17109906673431396,
"learning_rate": 8.96856944058337e-05,
"loss": 0.0135,
"step": 19820
},
{
"grad_norm": 0.18697112798690796,
"learning_rate": 8.967311862499144e-05,
"loss": 0.0146,
"step": 19830
},
{
"grad_norm": 0.1723792403936386,
"learning_rate": 8.966053606509825e-05,
"loss": 0.0135,
"step": 19840
},
{
"grad_norm": 0.15005964040756226,
"learning_rate": 8.964794672830417e-05,
"loss": 0.0119,
"step": 19850
},
{
"grad_norm": 0.15086112916469574,
"learning_rate": 8.963535061676038e-05,
"loss": 0.0151,
"step": 19860
},
{
"grad_norm": 0.11962204426527023,
"learning_rate": 8.962274773261918e-05,
"loss": 0.0137,
"step": 19870
},
{
"grad_norm": 0.19353535771369934,
"learning_rate": 8.961013807803409e-05,
"loss": 0.0148,
"step": 19880
},
{
"grad_norm": 0.20438653230667114,
"learning_rate": 8.959752165515973e-05,
"loss": 0.013,
"step": 19890
},
{
"grad_norm": 0.18479126691818237,
"learning_rate": 8.958489846615193e-05,
"loss": 0.0143,
"step": 19900
},
{
"grad_norm": 0.152145653963089,
"learning_rate": 8.957226851316762e-05,
"loss": 0.0167,
"step": 19910
},
{
"grad_norm": 0.18823210895061493,
"learning_rate": 8.955963179836493e-05,
"loss": 0.0135,
"step": 19920
},
{
"grad_norm": 0.1508171707391739,
"learning_rate": 8.954698832390312e-05,
"loss": 0.0125,
"step": 19930
},
{
"grad_norm": 0.12273351103067398,
"learning_rate": 8.953433809194263e-05,
"loss": 0.0126,
"step": 19940
},
{
"grad_norm": 0.15448608994483948,
"learning_rate": 8.9521681104645e-05,
"loss": 0.0112,
"step": 19950
},
{
"grad_norm": 0.16210980713367462,
"learning_rate": 8.9509017364173e-05,
"loss": 0.0142,
"step": 19960
},
{
"grad_norm": 0.16296792030334473,
"learning_rate": 8.949634687269052e-05,
"loss": 0.0133,
"step": 19970
},
{
"grad_norm": 0.22923751175403595,
"learning_rate": 8.948366963236259e-05,
"loss": 0.0109,
"step": 19980
},
{
"grad_norm": 0.180278941988945,
"learning_rate": 8.947098564535538e-05,
"loss": 0.0108,
"step": 19990
},
{
"grad_norm": 0.1744961142539978,
"learning_rate": 8.945829491383627e-05,
"loss": 0.0131,
"step": 20000
}
],
"logging_steps": 10,
"max_steps": 80000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 20000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 48,
"trial_name": null,
"trial_params": null
}