bottomtest1 / trainer_state.json
eddierex's picture
Upload GR00T_N1_5 checkpoint 20000 for OpenWBC bottle task
08f6817 verified
raw
history blame
280 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 20000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"grad_norm": 4.4084038734436035,
"learning_rate": 9e-07,
"loss": 1.3469,
"step": 10
},
{
"grad_norm": 2.854828119277954,
"learning_rate": 1.9e-06,
"loss": 1.3141,
"step": 20
},
{
"grad_norm": 1.38284432888031,
"learning_rate": 2.9e-06,
"loss": 1.2086,
"step": 30
},
{
"grad_norm": 0.6830054521560669,
"learning_rate": 3.9e-06,
"loss": 1.1273,
"step": 40
},
{
"grad_norm": 0.3296288847923279,
"learning_rate": 4.9000000000000005e-06,
"loss": 1.0875,
"step": 50
},
{
"grad_norm": 0.3340240716934204,
"learning_rate": 5.9e-06,
"loss": 1.075,
"step": 60
},
{
"grad_norm": 0.2849108874797821,
"learning_rate": 6.900000000000001e-06,
"loss": 1.0703,
"step": 70
},
{
"grad_norm": 0.31928372383117676,
"learning_rate": 7.9e-06,
"loss": 1.0582,
"step": 80
},
{
"grad_norm": 0.3880952298641205,
"learning_rate": 8.9e-06,
"loss": 1.0555,
"step": 90
},
{
"grad_norm": 0.30215585231781006,
"learning_rate": 9.900000000000002e-06,
"loss": 1.048,
"step": 100
},
{
"grad_norm": 0.43695512413978577,
"learning_rate": 1.09e-05,
"loss": 1.043,
"step": 110
},
{
"grad_norm": 0.4662553668022156,
"learning_rate": 1.19e-05,
"loss": 1.0406,
"step": 120
},
{
"grad_norm": 0.7485678195953369,
"learning_rate": 1.29e-05,
"loss": 1.0172,
"step": 130
},
{
"grad_norm": 0.7978800535202026,
"learning_rate": 1.3900000000000002e-05,
"loss": 1.0072,
"step": 140
},
{
"grad_norm": 0.7703062295913696,
"learning_rate": 1.49e-05,
"loss": 0.9766,
"step": 150
},
{
"grad_norm": 1.0931109189987183,
"learning_rate": 1.59e-05,
"loss": 0.9051,
"step": 160
},
{
"grad_norm": 1.3214606046676636,
"learning_rate": 1.69e-05,
"loss": 0.8164,
"step": 170
},
{
"grad_norm": 1.6509478092193604,
"learning_rate": 1.79e-05,
"loss": 0.7283,
"step": 180
},
{
"grad_norm": 1.649338722229004,
"learning_rate": 1.8900000000000002e-05,
"loss": 0.6459,
"step": 190
},
{
"grad_norm": 1.5137853622436523,
"learning_rate": 1.9900000000000003e-05,
"loss": 0.5609,
"step": 200
},
{
"grad_norm": 1.676445722579956,
"learning_rate": 2.09e-05,
"loss": 0.5043,
"step": 210
},
{
"grad_norm": 1.5472042560577393,
"learning_rate": 2.19e-05,
"loss": 0.4341,
"step": 220
},
{
"grad_norm": 2.0228428840637207,
"learning_rate": 2.29e-05,
"loss": 0.38,
"step": 230
},
{
"grad_norm": 2.0979020595550537,
"learning_rate": 2.39e-05,
"loss": 0.3448,
"step": 240
},
{
"grad_norm": 1.9566534757614136,
"learning_rate": 2.4900000000000002e-05,
"loss": 0.3183,
"step": 250
},
{
"grad_norm": 1.620675802230835,
"learning_rate": 2.5900000000000003e-05,
"loss": 0.2805,
"step": 260
},
{
"grad_norm": 2.3286774158477783,
"learning_rate": 2.6900000000000003e-05,
"loss": 0.2455,
"step": 270
},
{
"grad_norm": 1.9605939388275146,
"learning_rate": 2.7900000000000004e-05,
"loss": 0.2306,
"step": 280
},
{
"grad_norm": 2.058300733566284,
"learning_rate": 2.8899999999999998e-05,
"loss": 0.2136,
"step": 290
},
{
"grad_norm": 2.3196494579315186,
"learning_rate": 2.9900000000000002e-05,
"loss": 0.1968,
"step": 300
},
{
"grad_norm": 2.2974038124084473,
"learning_rate": 3.09e-05,
"loss": 0.1902,
"step": 310
},
{
"grad_norm": 2.038285732269287,
"learning_rate": 3.19e-05,
"loss": 0.161,
"step": 320
},
{
"grad_norm": 1.532688856124878,
"learning_rate": 3.29e-05,
"loss": 0.1528,
"step": 330
},
{
"grad_norm": 2.092379331588745,
"learning_rate": 3.3900000000000004e-05,
"loss": 0.143,
"step": 340
},
{
"grad_norm": 2.0367798805236816,
"learning_rate": 3.49e-05,
"loss": 0.1339,
"step": 350
},
{
"grad_norm": 2.3606696128845215,
"learning_rate": 3.59e-05,
"loss": 0.1321,
"step": 360
},
{
"grad_norm": 1.742854118347168,
"learning_rate": 3.69e-05,
"loss": 0.1226,
"step": 370
},
{
"grad_norm": 2.3252644538879395,
"learning_rate": 3.79e-05,
"loss": 0.1165,
"step": 380
},
{
"grad_norm": 2.4858038425445557,
"learning_rate": 3.8900000000000004e-05,
"loss": 0.1215,
"step": 390
},
{
"grad_norm": 2.094243049621582,
"learning_rate": 3.99e-05,
"loss": 0.1252,
"step": 400
},
{
"grad_norm": 1.9110815525054932,
"learning_rate": 4.09e-05,
"loss": 0.1219,
"step": 410
},
{
"grad_norm": 2.131819725036621,
"learning_rate": 4.19e-05,
"loss": 0.1087,
"step": 420
},
{
"grad_norm": 2.0112192630767822,
"learning_rate": 4.29e-05,
"loss": 0.1132,
"step": 430
},
{
"grad_norm": 2.2110557556152344,
"learning_rate": 4.39e-05,
"loss": 0.1113,
"step": 440
},
{
"grad_norm": 2.171856164932251,
"learning_rate": 4.49e-05,
"loss": 0.1198,
"step": 450
},
{
"grad_norm": 1.8911032676696777,
"learning_rate": 4.5900000000000004e-05,
"loss": 0.1119,
"step": 460
},
{
"grad_norm": 2.165710926055908,
"learning_rate": 4.69e-05,
"loss": 0.1167,
"step": 470
},
{
"grad_norm": 2.065707206726074,
"learning_rate": 4.79e-05,
"loss": 0.1074,
"step": 480
},
{
"grad_norm": 1.9315952062606812,
"learning_rate": 4.89e-05,
"loss": 0.1016,
"step": 490
},
{
"grad_norm": 2.0874738693237305,
"learning_rate": 4.99e-05,
"loss": 0.0999,
"step": 500
},
{
"grad_norm": 1.8715001344680786,
"learning_rate": 5.0900000000000004e-05,
"loss": 0.0998,
"step": 510
},
{
"grad_norm": 1.6038424968719482,
"learning_rate": 5.19e-05,
"loss": 0.0966,
"step": 520
},
{
"grad_norm": 1.9660725593566895,
"learning_rate": 5.2900000000000005e-05,
"loss": 0.0977,
"step": 530
},
{
"grad_norm": 1.8537325859069824,
"learning_rate": 5.390000000000001e-05,
"loss": 0.1056,
"step": 540
},
{
"grad_norm": 1.6274349689483643,
"learning_rate": 5.4900000000000006e-05,
"loss": 0.094,
"step": 550
},
{
"grad_norm": 1.6068252325057983,
"learning_rate": 5.590000000000001e-05,
"loss": 0.1028,
"step": 560
},
{
"grad_norm": 1.5526419878005981,
"learning_rate": 5.69e-05,
"loss": 0.1021,
"step": 570
},
{
"grad_norm": 1.9871398210525513,
"learning_rate": 5.79e-05,
"loss": 0.0982,
"step": 580
},
{
"grad_norm": 1.8751955032348633,
"learning_rate": 5.89e-05,
"loss": 0.0894,
"step": 590
},
{
"grad_norm": 1.8565336465835571,
"learning_rate": 5.99e-05,
"loss": 0.0947,
"step": 600
},
{
"grad_norm": 1.7967112064361572,
"learning_rate": 6.09e-05,
"loss": 0.0876,
"step": 610
},
{
"grad_norm": 1.6690943241119385,
"learning_rate": 6.19e-05,
"loss": 0.0907,
"step": 620
},
{
"grad_norm": 1.8858799934387207,
"learning_rate": 6.29e-05,
"loss": 0.0909,
"step": 630
},
{
"grad_norm": 1.6637696027755737,
"learning_rate": 6.390000000000001e-05,
"loss": 0.0874,
"step": 640
},
{
"grad_norm": 1.7460479736328125,
"learning_rate": 6.49e-05,
"loss": 0.09,
"step": 650
},
{
"grad_norm": 1.7727495431900024,
"learning_rate": 6.59e-05,
"loss": 0.092,
"step": 660
},
{
"grad_norm": 1.4950904846191406,
"learning_rate": 6.690000000000001e-05,
"loss": 0.0923,
"step": 670
},
{
"grad_norm": 1.44473397731781,
"learning_rate": 6.790000000000001e-05,
"loss": 0.0891,
"step": 680
},
{
"grad_norm": 1.5126936435699463,
"learning_rate": 6.89e-05,
"loss": 0.0874,
"step": 690
},
{
"grad_norm": 1.394876480102539,
"learning_rate": 6.99e-05,
"loss": 0.0849,
"step": 700
},
{
"grad_norm": 1.681939721107483,
"learning_rate": 7.09e-05,
"loss": 0.0966,
"step": 710
},
{
"grad_norm": 1.6944371461868286,
"learning_rate": 7.19e-05,
"loss": 0.0864,
"step": 720
},
{
"grad_norm": 1.6403831243515015,
"learning_rate": 7.29e-05,
"loss": 0.0764,
"step": 730
},
{
"grad_norm": 1.59284245967865,
"learning_rate": 7.390000000000001e-05,
"loss": 0.0874,
"step": 740
},
{
"grad_norm": 1.707362174987793,
"learning_rate": 7.49e-05,
"loss": 0.0837,
"step": 750
},
{
"grad_norm": 1.5193947553634644,
"learning_rate": 7.59e-05,
"loss": 0.0939,
"step": 760
},
{
"grad_norm": 1.5924936532974243,
"learning_rate": 7.69e-05,
"loss": 0.0903,
"step": 770
},
{
"grad_norm": 1.5755540132522583,
"learning_rate": 7.790000000000001e-05,
"loss": 0.0854,
"step": 780
},
{
"grad_norm": 1.28225576877594,
"learning_rate": 7.890000000000001e-05,
"loss": 0.0902,
"step": 790
},
{
"grad_norm": 1.3005540370941162,
"learning_rate": 7.99e-05,
"loss": 0.0885,
"step": 800
},
{
"grad_norm": 1.4294734001159668,
"learning_rate": 8.090000000000001e-05,
"loss": 0.084,
"step": 810
},
{
"grad_norm": 1.44032883644104,
"learning_rate": 8.19e-05,
"loss": 0.0816,
"step": 820
},
{
"grad_norm": 1.2144129276275635,
"learning_rate": 8.29e-05,
"loss": 0.0845,
"step": 830
},
{
"grad_norm": 1.4295352697372437,
"learning_rate": 8.39e-05,
"loss": 0.0838,
"step": 840
},
{
"grad_norm": 1.4729937314987183,
"learning_rate": 8.49e-05,
"loss": 0.0849,
"step": 850
},
{
"grad_norm": 1.2805378437042236,
"learning_rate": 8.59e-05,
"loss": 0.0899,
"step": 860
},
{
"grad_norm": 1.2634493112564087,
"learning_rate": 8.69e-05,
"loss": 0.0815,
"step": 870
},
{
"grad_norm": 1.3772571086883545,
"learning_rate": 8.790000000000001e-05,
"loss": 0.0818,
"step": 880
},
{
"grad_norm": 1.173386573791504,
"learning_rate": 8.89e-05,
"loss": 0.0792,
"step": 890
},
{
"grad_norm": 1.140397548675537,
"learning_rate": 8.99e-05,
"loss": 0.0738,
"step": 900
},
{
"grad_norm": 1.2523930072784424,
"learning_rate": 9.090000000000001e-05,
"loss": 0.0728,
"step": 910
},
{
"grad_norm": 1.2446190118789673,
"learning_rate": 9.190000000000001e-05,
"loss": 0.0733,
"step": 920
},
{
"grad_norm": 1.123772144317627,
"learning_rate": 9.290000000000001e-05,
"loss": 0.0752,
"step": 930
},
{
"grad_norm": 1.2798773050308228,
"learning_rate": 9.39e-05,
"loss": 0.0718,
"step": 940
},
{
"grad_norm": 1.1665390729904175,
"learning_rate": 9.49e-05,
"loss": 0.08,
"step": 950
},
{
"grad_norm": 1.1190357208251953,
"learning_rate": 9.59e-05,
"loss": 0.0757,
"step": 960
},
{
"grad_norm": 1.172494888305664,
"learning_rate": 9.69e-05,
"loss": 0.0768,
"step": 970
},
{
"grad_norm": 1.2636840343475342,
"learning_rate": 9.790000000000001e-05,
"loss": 0.0777,
"step": 980
},
{
"grad_norm": 1.0782413482666016,
"learning_rate": 9.89e-05,
"loss": 0.0781,
"step": 990
},
{
"grad_norm": 1.1838284730911255,
"learning_rate": 9.99e-05,
"loss": 0.0818,
"step": 1000
},
{
"grad_norm": 1.178709626197815,
"learning_rate": 9.999994463727085e-05,
"loss": 0.0745,
"step": 1010
},
{
"grad_norm": 1.0647282600402832,
"learning_rate": 9.999975326009292e-05,
"loss": 0.0764,
"step": 1020
},
{
"grad_norm": 1.127692461013794,
"learning_rate": 9.999942518549879e-05,
"loss": 0.0814,
"step": 1030
},
{
"grad_norm": 1.0108965635299683,
"learning_rate": 9.999896041438544e-05,
"loss": 0.0792,
"step": 1040
},
{
"grad_norm": 0.9025714993476868,
"learning_rate": 9.999835894802353e-05,
"loss": 0.0722,
"step": 1050
},
{
"grad_norm": 1.064939022064209,
"learning_rate": 9.999762078805743e-05,
"loss": 0.0717,
"step": 1060
},
{
"grad_norm": 1.1429728269577026,
"learning_rate": 9.999674593650526e-05,
"loss": 0.069,
"step": 1070
},
{
"grad_norm": 1.0148142576217651,
"learning_rate": 9.99957343957588e-05,
"loss": 0.0643,
"step": 1080
},
{
"grad_norm": 0.9264249205589294,
"learning_rate": 9.99945861685836e-05,
"loss": 0.0679,
"step": 1090
},
{
"grad_norm": 1.1930052042007446,
"learning_rate": 9.999330125811884e-05,
"loss": 0.0737,
"step": 1100
},
{
"grad_norm": 1.004508137702942,
"learning_rate": 9.999187966787744e-05,
"loss": 0.0643,
"step": 1110
},
{
"grad_norm": 0.9657332897186279,
"learning_rate": 9.999032140174595e-05,
"loss": 0.0749,
"step": 1120
},
{
"grad_norm": 0.8985906839370728,
"learning_rate": 9.998862646398464e-05,
"loss": 0.0746,
"step": 1130
},
{
"grad_norm": 1.174508810043335,
"learning_rate": 9.998679485922739e-05,
"loss": 0.068,
"step": 1140
},
{
"grad_norm": 1.1320881843566895,
"learning_rate": 9.998482659248174e-05,
"loss": 0.0749,
"step": 1150
},
{
"grad_norm": 0.8892739415168762,
"learning_rate": 9.998272166912883e-05,
"loss": 0.065,
"step": 1160
},
{
"grad_norm": 1.0125502347946167,
"learning_rate": 9.998048009492347e-05,
"loss": 0.0676,
"step": 1170
},
{
"grad_norm": 0.9409289956092834,
"learning_rate": 9.997810187599403e-05,
"loss": 0.0691,
"step": 1180
},
{
"grad_norm": 1.0810723304748535,
"learning_rate": 9.997558701884249e-05,
"loss": 0.0672,
"step": 1190
},
{
"grad_norm": 0.9851813316345215,
"learning_rate": 9.997293553034433e-05,
"loss": 0.0711,
"step": 1200
},
{
"grad_norm": 0.9922870993614197,
"learning_rate": 9.997014741774866e-05,
"loss": 0.0646,
"step": 1210
},
{
"grad_norm": 0.8688313364982605,
"learning_rate": 9.996722268867803e-05,
"loss": 0.0598,
"step": 1220
},
{
"grad_norm": 0.7204305529594421,
"learning_rate": 9.996416135112858e-05,
"loss": 0.061,
"step": 1230
},
{
"grad_norm": 0.9210626482963562,
"learning_rate": 9.996096341346988e-05,
"loss": 0.0651,
"step": 1240
},
{
"grad_norm": 0.894849419593811,
"learning_rate": 9.995762888444495e-05,
"loss": 0.0677,
"step": 1250
},
{
"grad_norm": 0.8269813656806946,
"learning_rate": 9.995415777317027e-05,
"loss": 0.0586,
"step": 1260
},
{
"grad_norm": 0.705187976360321,
"learning_rate": 9.995055008913574e-05,
"loss": 0.0716,
"step": 1270
},
{
"grad_norm": 0.9853078722953796,
"learning_rate": 9.994680584220463e-05,
"loss": 0.0643,
"step": 1280
},
{
"grad_norm": 0.8162582516670227,
"learning_rate": 9.994292504261355e-05,
"loss": 0.0662,
"step": 1290
},
{
"grad_norm": 0.975835919380188,
"learning_rate": 9.993890770097247e-05,
"loss": 0.0635,
"step": 1300
},
{
"grad_norm": 1.0302761793136597,
"learning_rate": 9.993475382826467e-05,
"loss": 0.0572,
"step": 1310
},
{
"grad_norm": 0.7187492251396179,
"learning_rate": 9.993046343584664e-05,
"loss": 0.0697,
"step": 1320
},
{
"grad_norm": 0.7942295670509338,
"learning_rate": 9.992603653544816e-05,
"loss": 0.0577,
"step": 1330
},
{
"grad_norm": 0.8199202418327332,
"learning_rate": 9.992147313917222e-05,
"loss": 0.0529,
"step": 1340
},
{
"grad_norm": 0.8115194439888,
"learning_rate": 9.991677325949497e-05,
"loss": 0.0624,
"step": 1350
},
{
"grad_norm": 0.6720649600028992,
"learning_rate": 9.991193690926568e-05,
"loss": 0.0646,
"step": 1360
},
{
"grad_norm": 0.9936930537223816,
"learning_rate": 9.990696410170678e-05,
"loss": 0.0564,
"step": 1370
},
{
"grad_norm": 0.7325035929679871,
"learning_rate": 9.990185485041371e-05,
"loss": 0.0564,
"step": 1380
},
{
"grad_norm": 0.7739620804786682,
"learning_rate": 9.989660916935498e-05,
"loss": 0.0522,
"step": 1390
},
{
"grad_norm": 0.6889415979385376,
"learning_rate": 9.989122707287208e-05,
"loss": 0.0494,
"step": 1400
},
{
"grad_norm": 0.7498676776885986,
"learning_rate": 9.988570857567945e-05,
"loss": 0.0511,
"step": 1410
},
{
"grad_norm": 0.7862585783004761,
"learning_rate": 9.988005369286446e-05,
"loss": 0.0514,
"step": 1420
},
{
"grad_norm": 0.86005699634552,
"learning_rate": 9.987426243988734e-05,
"loss": 0.0594,
"step": 1430
},
{
"grad_norm": 0.7341459393501282,
"learning_rate": 9.986833483258114e-05,
"loss": 0.0524,
"step": 1440
},
{
"grad_norm": 0.6387414336204529,
"learning_rate": 9.986227088715173e-05,
"loss": 0.0502,
"step": 1450
},
{
"grad_norm": 0.6452542543411255,
"learning_rate": 9.98560706201777e-05,
"loss": 0.0552,
"step": 1460
},
{
"grad_norm": 0.6964207291603088,
"learning_rate": 9.984973404861036e-05,
"loss": 0.053,
"step": 1470
},
{
"grad_norm": 0.9187918305397034,
"learning_rate": 9.984326118977361e-05,
"loss": 0.0523,
"step": 1480
},
{
"grad_norm": 0.5924795269966125,
"learning_rate": 9.983665206136406e-05,
"loss": 0.0572,
"step": 1490
},
{
"grad_norm": 0.7473819851875305,
"learning_rate": 9.982990668145075e-05,
"loss": 0.0536,
"step": 1500
},
{
"grad_norm": 0.7378273606300354,
"learning_rate": 9.982302506847534e-05,
"loss": 0.0563,
"step": 1510
},
{
"grad_norm": 0.7275499105453491,
"learning_rate": 9.981600724125189e-05,
"loss": 0.054,
"step": 1520
},
{
"grad_norm": 0.8484076261520386,
"learning_rate": 9.980885321896685e-05,
"loss": 0.0517,
"step": 1530
},
{
"grad_norm": 0.7533107995986938,
"learning_rate": 9.980156302117905e-05,
"loss": 0.0555,
"step": 1540
},
{
"grad_norm": 0.7630091905593872,
"learning_rate": 9.979413666781963e-05,
"loss": 0.0537,
"step": 1550
},
{
"grad_norm": 0.6897077560424805,
"learning_rate": 9.978657417919193e-05,
"loss": 0.0524,
"step": 1560
},
{
"grad_norm": 0.8400923013687134,
"learning_rate": 9.977887557597153e-05,
"loss": 0.0539,
"step": 1570
},
{
"grad_norm": 0.9766692519187927,
"learning_rate": 9.97710408792061e-05,
"loss": 0.0511,
"step": 1580
},
{
"grad_norm": 0.8205817341804504,
"learning_rate": 9.976307011031542e-05,
"loss": 0.0521,
"step": 1590
},
{
"grad_norm": 0.8941313624382019,
"learning_rate": 9.975496329109126e-05,
"loss": 0.0572,
"step": 1600
},
{
"grad_norm": 0.68316650390625,
"learning_rate": 9.974672044369732e-05,
"loss": 0.0495,
"step": 1610
},
{
"grad_norm": 0.7722634077072144,
"learning_rate": 9.97383415906693e-05,
"loss": 0.048,
"step": 1620
},
{
"grad_norm": 0.7082585692405701,
"learning_rate": 9.97298267549146e-05,
"loss": 0.0501,
"step": 1630
},
{
"grad_norm": 0.9164435863494873,
"learning_rate": 9.972117595971249e-05,
"loss": 0.0517,
"step": 1640
},
{
"grad_norm": 0.6809993386268616,
"learning_rate": 9.971238922871391e-05,
"loss": 0.0471,
"step": 1650
},
{
"grad_norm": 0.5702314376831055,
"learning_rate": 9.970346658594142e-05,
"loss": 0.0449,
"step": 1660
},
{
"grad_norm": 0.773526668548584,
"learning_rate": 9.969440805578923e-05,
"loss": 0.0498,
"step": 1670
},
{
"grad_norm": 0.7197460532188416,
"learning_rate": 9.968521366302298e-05,
"loss": 0.0467,
"step": 1680
},
{
"grad_norm": 0.6625840067863464,
"learning_rate": 9.967588343277981e-05,
"loss": 0.045,
"step": 1690
},
{
"grad_norm": 0.8736388087272644,
"learning_rate": 9.966641739056818e-05,
"loss": 0.0545,
"step": 1700
},
{
"grad_norm": 0.750792384147644,
"learning_rate": 9.965681556226793e-05,
"loss": 0.0473,
"step": 1710
},
{
"grad_norm": 0.786637008190155,
"learning_rate": 9.964707797413006e-05,
"loss": 0.0507,
"step": 1720
},
{
"grad_norm": 0.6412456631660461,
"learning_rate": 9.963720465277679e-05,
"loss": 0.0487,
"step": 1730
},
{
"grad_norm": 0.7418937087059021,
"learning_rate": 9.96271956252014e-05,
"loss": 0.0466,
"step": 1740
},
{
"grad_norm": 0.654487669467926,
"learning_rate": 9.961705091876816e-05,
"loss": 0.0437,
"step": 1750
},
{
"grad_norm": 0.5268855690956116,
"learning_rate": 9.960677056121235e-05,
"loss": 0.0425,
"step": 1760
},
{
"grad_norm": 0.6897957921028137,
"learning_rate": 9.959635458064005e-05,
"loss": 0.0524,
"step": 1770
},
{
"grad_norm": 0.7622124552726746,
"learning_rate": 9.958580300552815e-05,
"loss": 0.0486,
"step": 1780
},
{
"grad_norm": 0.6903027296066284,
"learning_rate": 9.957511586472426e-05,
"loss": 0.0484,
"step": 1790
},
{
"grad_norm": 0.6610746383666992,
"learning_rate": 9.956429318744662e-05,
"loss": 0.0501,
"step": 1800
},
{
"grad_norm": 0.6257076263427734,
"learning_rate": 9.955333500328404e-05,
"loss": 0.0461,
"step": 1810
},
{
"grad_norm": 0.6198020577430725,
"learning_rate": 9.95422413421957e-05,
"loss": 0.0471,
"step": 1820
},
{
"grad_norm": 0.5837280750274658,
"learning_rate": 9.953101223451133e-05,
"loss": 0.0458,
"step": 1830
},
{
"grad_norm": 0.5286070704460144,
"learning_rate": 9.951964771093085e-05,
"loss": 0.0439,
"step": 1840
},
{
"grad_norm": 0.700628399848938,
"learning_rate": 9.950814780252442e-05,
"loss": 0.0449,
"step": 1850
},
{
"grad_norm": 0.659744918346405,
"learning_rate": 9.949651254073236e-05,
"loss": 0.0422,
"step": 1860
},
{
"grad_norm": 0.6888746619224548,
"learning_rate": 9.948474195736504e-05,
"loss": 0.0437,
"step": 1870
},
{
"grad_norm": 0.7431710958480835,
"learning_rate": 9.947283608460277e-05,
"loss": 0.0444,
"step": 1880
},
{
"grad_norm": 0.5975768566131592,
"learning_rate": 9.946079495499577e-05,
"loss": 0.0428,
"step": 1890
},
{
"grad_norm": 0.7167385816574097,
"learning_rate": 9.944861860146401e-05,
"loss": 0.0478,
"step": 1900
},
{
"grad_norm": 0.8195943236351013,
"learning_rate": 9.943630705729719e-05,
"loss": 0.0462,
"step": 1910
},
{
"grad_norm": 0.6716237664222717,
"learning_rate": 9.942386035615459e-05,
"loss": 0.0403,
"step": 1920
},
{
"grad_norm": 0.609624981880188,
"learning_rate": 9.941127853206503e-05,
"loss": 0.0417,
"step": 1930
},
{
"grad_norm": 0.5453461408615112,
"learning_rate": 9.939856161942673e-05,
"loss": 0.0513,
"step": 1940
},
{
"grad_norm": 0.6557628512382507,
"learning_rate": 9.938570965300724e-05,
"loss": 0.0426,
"step": 1950
},
{
"grad_norm": 0.7052960991859436,
"learning_rate": 9.937272266794335e-05,
"loss": 0.0427,
"step": 1960
},
{
"grad_norm": 0.7427675724029541,
"learning_rate": 9.935960069974096e-05,
"loss": 0.0437,
"step": 1970
},
{
"grad_norm": 0.7904492616653442,
"learning_rate": 9.934634378427506e-05,
"loss": 0.0474,
"step": 1980
},
{
"grad_norm": 0.6603449583053589,
"learning_rate": 9.933295195778954e-05,
"loss": 0.041,
"step": 1990
},
{
"grad_norm": 0.6774188876152039,
"learning_rate": 9.931942525689715e-05,
"loss": 0.0419,
"step": 2000
},
{
"grad_norm": 0.8625200390815735,
"learning_rate": 9.930576371857936e-05,
"loss": 0.0433,
"step": 2010
},
{
"grad_norm": 0.6208633184432983,
"learning_rate": 9.929196738018629e-05,
"loss": 0.0434,
"step": 2020
},
{
"grad_norm": 0.7620671391487122,
"learning_rate": 9.927803627943662e-05,
"loss": 0.0396,
"step": 2030
},
{
"grad_norm": 0.7727156281471252,
"learning_rate": 9.926397045441744e-05,
"loss": 0.0438,
"step": 2040
},
{
"grad_norm": 0.7673093676567078,
"learning_rate": 9.924976994358417e-05,
"loss": 0.0463,
"step": 2050
},
{
"grad_norm": 0.6097151041030884,
"learning_rate": 9.923543478576048e-05,
"loss": 0.0424,
"step": 2060
},
{
"grad_norm": 0.8347402215003967,
"learning_rate": 9.922096502013813e-05,
"loss": 0.0433,
"step": 2070
},
{
"grad_norm": 0.7491519451141357,
"learning_rate": 9.92063606862769e-05,
"loss": 0.0463,
"step": 2080
},
{
"grad_norm": 0.7769274711608887,
"learning_rate": 9.919162182410453e-05,
"loss": 0.0468,
"step": 2090
},
{
"grad_norm": 0.5936280488967896,
"learning_rate": 9.917674847391645e-05,
"loss": 0.0429,
"step": 2100
},
{
"grad_norm": 0.6929172873497009,
"learning_rate": 9.916174067637584e-05,
"loss": 0.0431,
"step": 2110
},
{
"grad_norm": 0.6833594441413879,
"learning_rate": 9.914659847251348e-05,
"loss": 0.0417,
"step": 2120
},
{
"grad_norm": 0.7112385630607605,
"learning_rate": 9.913132190372753e-05,
"loss": 0.044,
"step": 2130
},
{
"grad_norm": 0.48244747519493103,
"learning_rate": 9.911591101178359e-05,
"loss": 0.0437,
"step": 2140
},
{
"grad_norm": 0.7037522792816162,
"learning_rate": 9.910036583881443e-05,
"loss": 0.0414,
"step": 2150
},
{
"grad_norm": 0.6704358458518982,
"learning_rate": 9.908468642731995e-05,
"loss": 0.0366,
"step": 2160
},
{
"grad_norm": 0.5901470184326172,
"learning_rate": 9.906887282016707e-05,
"loss": 0.0446,
"step": 2170
},
{
"grad_norm": 0.6559775471687317,
"learning_rate": 9.90529250605896e-05,
"loss": 0.0422,
"step": 2180
},
{
"grad_norm": 0.8128564357757568,
"learning_rate": 9.903684319218809e-05,
"loss": 0.0444,
"step": 2190
},
{
"grad_norm": 0.5437960624694824,
"learning_rate": 9.902062725892976e-05,
"loss": 0.044,
"step": 2200
},
{
"grad_norm": 0.8856530785560608,
"learning_rate": 9.900427730514834e-05,
"loss": 0.0466,
"step": 2210
},
{
"grad_norm": 0.5385216474533081,
"learning_rate": 9.8987793375544e-05,
"loss": 0.0451,
"step": 2220
},
{
"grad_norm": 0.6559721231460571,
"learning_rate": 9.897117551518318e-05,
"loss": 0.0437,
"step": 2230
},
{
"grad_norm": 0.8500446081161499,
"learning_rate": 9.895442376949844e-05,
"loss": 0.0429,
"step": 2240
},
{
"grad_norm": 0.538585364818573,
"learning_rate": 9.893753818428845e-05,
"loss": 0.0475,
"step": 2250
},
{
"grad_norm": 0.608948826789856,
"learning_rate": 9.892051880571773e-05,
"loss": 0.0468,
"step": 2260
},
{
"grad_norm": 0.6505542993545532,
"learning_rate": 9.890336568031663e-05,
"loss": 0.0467,
"step": 2270
},
{
"grad_norm": 0.5538124442100525,
"learning_rate": 9.888607885498113e-05,
"loss": 0.0409,
"step": 2280
},
{
"grad_norm": 0.6162983775138855,
"learning_rate": 9.886865837697275e-05,
"loss": 0.0385,
"step": 2290
},
{
"grad_norm": 0.5613241195678711,
"learning_rate": 9.88511042939184e-05,
"loss": 0.0403,
"step": 2300
},
{
"grad_norm": 0.6898803114891052,
"learning_rate": 9.883341665381028e-05,
"loss": 0.0406,
"step": 2310
},
{
"grad_norm": 0.7443089485168457,
"learning_rate": 9.881559550500575e-05,
"loss": 0.04,
"step": 2320
},
{
"grad_norm": 0.6974295377731323,
"learning_rate": 9.879764089622712e-05,
"loss": 0.0449,
"step": 2330
},
{
"grad_norm": 0.6060269474983215,
"learning_rate": 9.87795528765616e-05,
"loss": 0.0416,
"step": 2340
},
{
"grad_norm": 0.6247705221176147,
"learning_rate": 9.876133149546118e-05,
"loss": 0.0404,
"step": 2350
},
{
"grad_norm": 0.5524375438690186,
"learning_rate": 9.874297680274238e-05,
"loss": 0.0429,
"step": 2360
},
{
"grad_norm": 0.6544674634933472,
"learning_rate": 9.872448884858624e-05,
"loss": 0.0419,
"step": 2370
},
{
"grad_norm": 0.7922749519348145,
"learning_rate": 9.870586768353815e-05,
"loss": 0.0403,
"step": 2380
},
{
"grad_norm": 0.5878010392189026,
"learning_rate": 9.868711335850764e-05,
"loss": 0.0413,
"step": 2390
},
{
"grad_norm": 0.577335774898529,
"learning_rate": 9.866822592476833e-05,
"loss": 0.0403,
"step": 2400
},
{
"grad_norm": 0.6214291453361511,
"learning_rate": 9.86492054339577e-05,
"loss": 0.036,
"step": 2410
},
{
"grad_norm": 0.6292899250984192,
"learning_rate": 9.863005193807711e-05,
"loss": 0.0387,
"step": 2420
},
{
"grad_norm": 0.6217591166496277,
"learning_rate": 9.861076548949143e-05,
"loss": 0.0386,
"step": 2430
},
{
"grad_norm": 0.642356812953949,
"learning_rate": 9.859134614092912e-05,
"loss": 0.0381,
"step": 2440
},
{
"grad_norm": 0.615687906742096,
"learning_rate": 9.857179394548191e-05,
"loss": 0.0387,
"step": 2450
},
{
"grad_norm": 0.6285825967788696,
"learning_rate": 9.855210895660477e-05,
"loss": 0.0422,
"step": 2460
},
{
"grad_norm": 0.5457389950752258,
"learning_rate": 9.853229122811568e-05,
"loss": 0.036,
"step": 2470
},
{
"grad_norm": 0.7624576091766357,
"learning_rate": 9.851234081419559e-05,
"loss": 0.0456,
"step": 2480
},
{
"grad_norm": 0.6762944459915161,
"learning_rate": 9.849225776938814e-05,
"loss": 0.039,
"step": 2490
},
{
"grad_norm": 0.607412576675415,
"learning_rate": 9.847204214859964e-05,
"loss": 0.0407,
"step": 2500
},
{
"grad_norm": 0.5367524027824402,
"learning_rate": 9.845169400709879e-05,
"loss": 0.0409,
"step": 2510
},
{
"grad_norm": 0.6632624864578247,
"learning_rate": 9.843121340051664e-05,
"loss": 0.0377,
"step": 2520
},
{
"grad_norm": 0.6622194647789001,
"learning_rate": 9.841060038484641e-05,
"loss": 0.0369,
"step": 2530
},
{
"grad_norm": 0.4699934124946594,
"learning_rate": 9.838985501644328e-05,
"loss": 0.0346,
"step": 2540
},
{
"grad_norm": 0.7941675186157227,
"learning_rate": 9.83689773520243e-05,
"loss": 0.0417,
"step": 2550
},
{
"grad_norm": 0.5687143802642822,
"learning_rate": 9.834796744866819e-05,
"loss": 0.0358,
"step": 2560
},
{
"grad_norm": 0.5689958333969116,
"learning_rate": 9.832682536381525e-05,
"loss": 0.0353,
"step": 2570
},
{
"grad_norm": 0.7229174971580505,
"learning_rate": 9.830555115526711e-05,
"loss": 0.0398,
"step": 2580
},
{
"grad_norm": 0.6610782742500305,
"learning_rate": 9.828414488118667e-05,
"loss": 0.0373,
"step": 2590
},
{
"grad_norm": 0.6783902049064636,
"learning_rate": 9.826260660009785e-05,
"loss": 0.0378,
"step": 2600
},
{
"grad_norm": 0.5838411450386047,
"learning_rate": 9.824093637088547e-05,
"loss": 0.0374,
"step": 2610
},
{
"grad_norm": 0.5158525109291077,
"learning_rate": 9.821913425279514e-05,
"loss": 0.0346,
"step": 2620
},
{
"grad_norm": 0.5010576248168945,
"learning_rate": 9.8197200305433e-05,
"loss": 0.0367,
"step": 2630
},
{
"grad_norm": 0.5873668193817139,
"learning_rate": 9.817513458876564e-05,
"loss": 0.0389,
"step": 2640
},
{
"grad_norm": 0.669737696647644,
"learning_rate": 9.815293716311987e-05,
"loss": 0.0391,
"step": 2650
},
{
"grad_norm": 0.6051730513572693,
"learning_rate": 9.813060808918262e-05,
"loss": 0.0358,
"step": 2660
},
{
"grad_norm": 0.6769428253173828,
"learning_rate": 9.810814742800069e-05,
"loss": 0.0376,
"step": 2670
},
{
"grad_norm": 0.5549777150154114,
"learning_rate": 9.808555524098074e-05,
"loss": 0.035,
"step": 2680
},
{
"grad_norm": 0.6324145197868347,
"learning_rate": 9.806283158988887e-05,
"loss": 0.037,
"step": 2690
},
{
"grad_norm": 0.522504448890686,
"learning_rate": 9.803997653685072e-05,
"loss": 0.0353,
"step": 2700
},
{
"grad_norm": 0.6257413029670715,
"learning_rate": 9.801699014435112e-05,
"loss": 0.0398,
"step": 2710
},
{
"grad_norm": 0.6243777871131897,
"learning_rate": 9.799387247523398e-05,
"loss": 0.0329,
"step": 2720
},
{
"grad_norm": 0.5824418663978577,
"learning_rate": 9.797062359270215e-05,
"loss": 0.0368,
"step": 2730
},
{
"grad_norm": 0.5653295516967773,
"learning_rate": 9.794724356031715e-05,
"loss": 0.038,
"step": 2740
},
{
"grad_norm": 0.5860871076583862,
"learning_rate": 9.792373244199913e-05,
"loss": 0.0366,
"step": 2750
},
{
"grad_norm": 0.6080830693244934,
"learning_rate": 9.790009030202658e-05,
"loss": 0.0391,
"step": 2760
},
{
"grad_norm": 0.5480026602745056,
"learning_rate": 9.78763172050362e-05,
"loss": 0.0342,
"step": 2770
},
{
"grad_norm": 0.4915301203727722,
"learning_rate": 9.785241321602274e-05,
"loss": 0.0382,
"step": 2780
},
{
"grad_norm": 0.5721039772033691,
"learning_rate": 9.782837840033879e-05,
"loss": 0.0387,
"step": 2790
},
{
"grad_norm": 0.6181113123893738,
"learning_rate": 9.780421282369461e-05,
"loss": 0.0343,
"step": 2800
},
{
"grad_norm": 0.5936803817749023,
"learning_rate": 9.777991655215797e-05,
"loss": 0.0325,
"step": 2810
},
{
"grad_norm": 0.6910586357116699,
"learning_rate": 9.775548965215394e-05,
"loss": 0.0314,
"step": 2820
},
{
"grad_norm": 0.5371761918067932,
"learning_rate": 9.773093219046474e-05,
"loss": 0.036,
"step": 2830
},
{
"grad_norm": 0.4973508417606354,
"learning_rate": 9.770624423422954e-05,
"loss": 0.0376,
"step": 2840
},
{
"grad_norm": 0.6510023474693298,
"learning_rate": 9.768142585094426e-05,
"loss": 0.0353,
"step": 2850
},
{
"grad_norm": 0.6040503978729248,
"learning_rate": 9.765647710846142e-05,
"loss": 0.0347,
"step": 2860
},
{
"grad_norm": 0.5976170301437378,
"learning_rate": 9.763139807498991e-05,
"loss": 0.034,
"step": 2870
},
{
"grad_norm": 0.6797834038734436,
"learning_rate": 9.760618881909487e-05,
"loss": 0.0339,
"step": 2880
},
{
"grad_norm": 0.5329861044883728,
"learning_rate": 9.758084940969744e-05,
"loss": 0.0317,
"step": 2890
},
{
"grad_norm": 0.6726340055465698,
"learning_rate": 9.755537991607459e-05,
"loss": 0.0319,
"step": 2900
},
{
"grad_norm": 0.608772873878479,
"learning_rate": 9.752978040785895e-05,
"loss": 0.0313,
"step": 2910
},
{
"grad_norm": 0.7140541076660156,
"learning_rate": 9.750405095503859e-05,
"loss": 0.0311,
"step": 2920
},
{
"grad_norm": 0.5499521493911743,
"learning_rate": 9.747819162795686e-05,
"loss": 0.0367,
"step": 2930
},
{
"grad_norm": 0.6744058728218079,
"learning_rate": 9.745220249731217e-05,
"loss": 0.0407,
"step": 2940
},
{
"grad_norm": 0.7178553342819214,
"learning_rate": 9.742608363415781e-05,
"loss": 0.041,
"step": 2950
},
{
"grad_norm": 0.6183580160140991,
"learning_rate": 9.739983510990176e-05,
"loss": 0.0394,
"step": 2960
},
{
"grad_norm": 0.553803026676178,
"learning_rate": 9.737345699630647e-05,
"loss": 0.0329,
"step": 2970
},
{
"grad_norm": 0.4599069654941559,
"learning_rate": 9.734694936548869e-05,
"loss": 0.0313,
"step": 2980
},
{
"grad_norm": 0.5753762125968933,
"learning_rate": 9.732031228991932e-05,
"loss": 0.0304,
"step": 2990
},
{
"grad_norm": 0.6017200946807861,
"learning_rate": 9.729354584242302e-05,
"loss": 0.0343,
"step": 3000
},
{
"grad_norm": 0.6461474299430847,
"learning_rate": 9.726665009617832e-05,
"loss": 0.0379,
"step": 3010
},
{
"grad_norm": 0.5760048627853394,
"learning_rate": 9.723962512471714e-05,
"loss": 0.0342,
"step": 3020
},
{
"grad_norm": 0.621891438961029,
"learning_rate": 9.72124710019247e-05,
"loss": 0.0316,
"step": 3030
},
{
"grad_norm": 0.524480402469635,
"learning_rate": 9.718518780203934e-05,
"loss": 0.0292,
"step": 3040
},
{
"grad_norm": 0.6318420171737671,
"learning_rate": 9.715777559965228e-05,
"loss": 0.0316,
"step": 3050
},
{
"grad_norm": 0.566210150718689,
"learning_rate": 9.713023446970746e-05,
"loss": 0.0309,
"step": 3060
},
{
"grad_norm": 0.704804003238678,
"learning_rate": 9.710256448750126e-05,
"loss": 0.0332,
"step": 3070
},
{
"grad_norm": 0.7099645733833313,
"learning_rate": 9.707476572868235e-05,
"loss": 0.0333,
"step": 3080
},
{
"grad_norm": 0.6047917604446411,
"learning_rate": 9.704683826925149e-05,
"loss": 0.0314,
"step": 3090
},
{
"grad_norm": 0.7446325421333313,
"learning_rate": 9.701878218556129e-05,
"loss": 0.035,
"step": 3100
},
{
"grad_norm": 0.6377901434898376,
"learning_rate": 9.699059755431598e-05,
"loss": 0.0387,
"step": 3110
},
{
"grad_norm": 0.5415878295898438,
"learning_rate": 9.696228445257132e-05,
"loss": 0.0299,
"step": 3120
},
{
"grad_norm": 0.6148945093154907,
"learning_rate": 9.693384295773419e-05,
"loss": 0.0391,
"step": 3130
},
{
"grad_norm": 0.578273594379425,
"learning_rate": 9.690527314756259e-05,
"loss": 0.0305,
"step": 3140
},
{
"grad_norm": 0.5767846703529358,
"learning_rate": 9.687657510016527e-05,
"loss": 0.0327,
"step": 3150
},
{
"grad_norm": 0.532577633857727,
"learning_rate": 9.684774889400161e-05,
"loss": 0.0276,
"step": 3160
},
{
"grad_norm": 0.5682797431945801,
"learning_rate": 9.681879460788135e-05,
"loss": 0.0295,
"step": 3170
},
{
"grad_norm": 0.6185994744300842,
"learning_rate": 9.67897123209644e-05,
"loss": 0.0306,
"step": 3180
},
{
"grad_norm": 0.6122663617134094,
"learning_rate": 9.676050211276062e-05,
"loss": 0.0323,
"step": 3190
},
{
"grad_norm": 0.6145076751708984,
"learning_rate": 9.673116406312962e-05,
"loss": 0.0289,
"step": 3200
},
{
"grad_norm": 0.7365098595619202,
"learning_rate": 9.67016982522805e-05,
"loss": 0.0264,
"step": 3210
},
{
"grad_norm": 0.6660006642341614,
"learning_rate": 9.667210476077164e-05,
"loss": 0.0307,
"step": 3220
},
{
"grad_norm": 0.43615856766700745,
"learning_rate": 9.664238366951055e-05,
"loss": 0.0347,
"step": 3230
},
{
"grad_norm": 0.5423561334609985,
"learning_rate": 9.661253505975355e-05,
"loss": 0.0286,
"step": 3240
},
{
"grad_norm": 0.6123466491699219,
"learning_rate": 9.658255901310557e-05,
"loss": 0.0297,
"step": 3250
},
{
"grad_norm": 0.5747650861740112,
"learning_rate": 9.655245561152e-05,
"loss": 0.0311,
"step": 3260
},
{
"grad_norm": 0.552060604095459,
"learning_rate": 9.65222249372984e-05,
"loss": 0.0287,
"step": 3270
},
{
"grad_norm": 0.5556790828704834,
"learning_rate": 9.649186707309026e-05,
"loss": 0.0274,
"step": 3280
},
{
"grad_norm": 0.5429115295410156,
"learning_rate": 9.646138210189283e-05,
"loss": 0.0297,
"step": 3290
},
{
"grad_norm": 0.6687198281288147,
"learning_rate": 9.643077010705087e-05,
"loss": 0.029,
"step": 3300
},
{
"grad_norm": 0.5589491724967957,
"learning_rate": 9.640003117225637e-05,
"loss": 0.0296,
"step": 3310
},
{
"grad_norm": 0.5250815749168396,
"learning_rate": 9.636916538154846e-05,
"loss": 0.0313,
"step": 3320
},
{
"grad_norm": 0.5884456038475037,
"learning_rate": 9.633817281931296e-05,
"loss": 0.0279,
"step": 3330
},
{
"grad_norm": 0.5253452658653259,
"learning_rate": 9.630705357028242e-05,
"loss": 0.0297,
"step": 3340
},
{
"grad_norm": 0.44990262389183044,
"learning_rate": 9.627580771953563e-05,
"loss": 0.0293,
"step": 3350
},
{
"grad_norm": 0.5788272023200989,
"learning_rate": 9.624443535249759e-05,
"loss": 0.028,
"step": 3360
},
{
"grad_norm": 0.4931959807872772,
"learning_rate": 9.621293655493913e-05,
"loss": 0.0282,
"step": 3370
},
{
"grad_norm": 0.49001696705818176,
"learning_rate": 9.618131141297675e-05,
"loss": 0.025,
"step": 3380
},
{
"grad_norm": 0.6005767583847046,
"learning_rate": 9.614956001307242e-05,
"loss": 0.0282,
"step": 3390
},
{
"grad_norm": 0.5819389224052429,
"learning_rate": 9.611768244203321e-05,
"loss": 0.0287,
"step": 3400
},
{
"grad_norm": 0.5611231923103333,
"learning_rate": 9.60856787870112e-05,
"loss": 0.0315,
"step": 3410
},
{
"grad_norm": 0.49898526072502136,
"learning_rate": 9.605354913550318e-05,
"loss": 0.0267,
"step": 3420
},
{
"grad_norm": 0.5152934789657593,
"learning_rate": 9.602129357535037e-05,
"loss": 0.0248,
"step": 3430
},
{
"grad_norm": 0.4927721917629242,
"learning_rate": 9.598891219473825e-05,
"loss": 0.0272,
"step": 3440
},
{
"grad_norm": 0.5922147035598755,
"learning_rate": 9.595640508219625e-05,
"loss": 0.0225,
"step": 3450
},
{
"grad_norm": 0.6173586845397949,
"learning_rate": 9.592377232659761e-05,
"loss": 0.0269,
"step": 3460
},
{
"grad_norm": 0.6940552592277527,
"learning_rate": 9.589101401715904e-05,
"loss": 0.0314,
"step": 3470
},
{
"grad_norm": 0.5696406364440918,
"learning_rate": 9.585813024344045e-05,
"loss": 0.0271,
"step": 3480
},
{
"grad_norm": 0.5202314853668213,
"learning_rate": 9.58251210953449e-05,
"loss": 0.0247,
"step": 3490
},
{
"grad_norm": 0.5222875475883484,
"learning_rate": 9.579198666311809e-05,
"loss": 0.0238,
"step": 3500
},
{
"grad_norm": 0.6505278944969177,
"learning_rate": 9.575872703734832e-05,
"loss": 0.0295,
"step": 3510
},
{
"grad_norm": 0.619454026222229,
"learning_rate": 9.572534230896611e-05,
"loss": 0.0268,
"step": 3520
},
{
"grad_norm": 0.48343542218208313,
"learning_rate": 9.569183256924403e-05,
"loss": 0.0269,
"step": 3530
},
{
"grad_norm": 0.4461105763912201,
"learning_rate": 9.565819790979646e-05,
"loss": 0.0255,
"step": 3540
},
{
"grad_norm": 0.5143640637397766,
"learning_rate": 9.562443842257925e-05,
"loss": 0.0257,
"step": 3550
},
{
"grad_norm": 0.5106943249702454,
"learning_rate": 9.559055419988956e-05,
"loss": 0.0265,
"step": 3560
},
{
"grad_norm": 0.6030021905899048,
"learning_rate": 9.555654533436557e-05,
"loss": 0.0269,
"step": 3570
},
{
"grad_norm": 0.5566527843475342,
"learning_rate": 9.552241191898621e-05,
"loss": 0.0279,
"step": 3580
},
{
"grad_norm": 0.4313097894191742,
"learning_rate": 9.548815404707092e-05,
"loss": 0.0246,
"step": 3590
},
{
"grad_norm": 0.6710801720619202,
"learning_rate": 9.545377181227942e-05,
"loss": 0.0242,
"step": 3600
},
{
"grad_norm": 0.4369684159755707,
"learning_rate": 9.541926530861145e-05,
"loss": 0.0238,
"step": 3610
},
{
"grad_norm": 0.48853105306625366,
"learning_rate": 9.538463463040645e-05,
"loss": 0.0226,
"step": 3620
},
{
"grad_norm": 0.5221312642097473,
"learning_rate": 9.534987987234337e-05,
"loss": 0.0267,
"step": 3630
},
{
"grad_norm": 0.5919408798217773,
"learning_rate": 9.53150011294404e-05,
"loss": 0.0234,
"step": 3640
},
{
"grad_norm": 0.5544537305831909,
"learning_rate": 9.527999849705471e-05,
"loss": 0.0229,
"step": 3650
},
{
"grad_norm": 0.5028519034385681,
"learning_rate": 9.524487207088213e-05,
"loss": 0.0229,
"step": 3660
},
{
"grad_norm": 0.5428268313407898,
"learning_rate": 9.520962194695698e-05,
"loss": 0.0234,
"step": 3670
},
{
"grad_norm": 0.4685244858264923,
"learning_rate": 9.517424822165175e-05,
"loss": 0.023,
"step": 3680
},
{
"grad_norm": 0.6033672094345093,
"learning_rate": 9.513875099167685e-05,
"loss": 0.028,
"step": 3690
},
{
"grad_norm": 0.5230802297592163,
"learning_rate": 9.510313035408035e-05,
"loss": 0.0211,
"step": 3700
},
{
"grad_norm": 0.5911836624145508,
"learning_rate": 9.506738640624775e-05,
"loss": 0.0226,
"step": 3710
},
{
"grad_norm": 0.4973941445350647,
"learning_rate": 9.50315192459016e-05,
"loss": 0.0233,
"step": 3720
},
{
"grad_norm": 0.5748140811920166,
"learning_rate": 9.499552897110136e-05,
"loss": 0.0258,
"step": 3730
},
{
"grad_norm": 0.5241221785545349,
"learning_rate": 9.495941568024304e-05,
"loss": 0.025,
"step": 3740
},
{
"grad_norm": 0.42789241671562195,
"learning_rate": 9.492317947205904e-05,
"loss": 0.0231,
"step": 3750
},
{
"grad_norm": 0.6186679601669312,
"learning_rate": 9.488682044561775e-05,
"loss": 0.0259,
"step": 3760
},
{
"grad_norm": 0.6295249462127686,
"learning_rate": 9.485033870032335e-05,
"loss": 0.0204,
"step": 3770
},
{
"grad_norm": 0.5628687739372253,
"learning_rate": 9.481373433591556e-05,
"loss": 0.0252,
"step": 3780
},
{
"grad_norm": 0.4300912916660309,
"learning_rate": 9.47770074524693e-05,
"loss": 0.0224,
"step": 3790
},
{
"grad_norm": 0.5620883703231812,
"learning_rate": 9.474015815039446e-05,
"loss": 0.0224,
"step": 3800
},
{
"grad_norm": 0.4041639268398285,
"learning_rate": 9.470318653043565e-05,
"loss": 0.0275,
"step": 3810
},
{
"grad_norm": 0.6825946569442749,
"learning_rate": 9.466609269367185e-05,
"loss": 0.0288,
"step": 3820
},
{
"grad_norm": 0.528630256652832,
"learning_rate": 9.46288767415162e-05,
"loss": 0.0222,
"step": 3830
},
{
"grad_norm": 0.5332393646240234,
"learning_rate": 9.459153877571567e-05,
"loss": 0.0231,
"step": 3840
},
{
"grad_norm": 0.41178372502326965,
"learning_rate": 9.455407889835087e-05,
"loss": 0.0234,
"step": 3850
},
{
"grad_norm": 0.4013999402523041,
"learning_rate": 9.451649721183564e-05,
"loss": 0.026,
"step": 3860
},
{
"grad_norm": 0.5963751673698425,
"learning_rate": 9.447879381891692e-05,
"loss": 0.0207,
"step": 3870
},
{
"grad_norm": 0.46209827065467834,
"learning_rate": 9.444096882267428e-05,
"loss": 0.0217,
"step": 3880
},
{
"grad_norm": 0.5197754502296448,
"learning_rate": 9.440302232651988e-05,
"loss": 0.0265,
"step": 3890
},
{
"grad_norm": 0.6915350556373596,
"learning_rate": 9.436495443419795e-05,
"loss": 0.0297,
"step": 3900
},
{
"grad_norm": 0.48685312271118164,
"learning_rate": 9.432676524978466e-05,
"loss": 0.0233,
"step": 3910
},
{
"grad_norm": 0.47423553466796875,
"learning_rate": 9.42884548776878e-05,
"loss": 0.0288,
"step": 3920
},
{
"grad_norm": 0.4048287868499756,
"learning_rate": 9.425002342264646e-05,
"loss": 0.0211,
"step": 3930
},
{
"grad_norm": 0.44486644864082336,
"learning_rate": 9.421147098973077e-05,
"loss": 0.0238,
"step": 3940
},
{
"grad_norm": 0.5044620633125305,
"learning_rate": 9.41727976843416e-05,
"loss": 0.023,
"step": 3950
},
{
"grad_norm": 0.6554492712020874,
"learning_rate": 9.413400361221029e-05,
"loss": 0.0224,
"step": 3960
},
{
"grad_norm": 0.5063493251800537,
"learning_rate": 9.409508887939835e-05,
"loss": 0.02,
"step": 3970
},
{
"grad_norm": 0.5956079959869385,
"learning_rate": 9.40560535922972e-05,
"loss": 0.02,
"step": 3980
},
{
"grad_norm": 0.6585922837257385,
"learning_rate": 9.40168978576278e-05,
"loss": 0.0224,
"step": 3990
},
{
"grad_norm": 0.4550802707672119,
"learning_rate": 9.397762178244043e-05,
"loss": 0.0235,
"step": 4000
},
{
"grad_norm": 0.4042211174964905,
"learning_rate": 9.393822547411439e-05,
"loss": 0.0208,
"step": 4010
},
{
"grad_norm": 0.4356859028339386,
"learning_rate": 9.389870904035769e-05,
"loss": 0.0199,
"step": 4020
},
{
"grad_norm": 0.45227131247520447,
"learning_rate": 9.385907258920672e-05,
"loss": 0.0227,
"step": 4030
},
{
"grad_norm": 0.4879400134086609,
"learning_rate": 9.381931622902607e-05,
"loss": 0.0192,
"step": 4040
},
{
"grad_norm": 0.48629337549209595,
"learning_rate": 9.377944006850807e-05,
"loss": 0.0225,
"step": 4050
},
{
"grad_norm": 0.4865284562110901,
"learning_rate": 9.373944421667265e-05,
"loss": 0.0212,
"step": 4060
},
{
"grad_norm": 0.6001793742179871,
"learning_rate": 9.369932878286691e-05,
"loss": 0.0184,
"step": 4070
},
{
"grad_norm": 0.5513994097709656,
"learning_rate": 9.365909387676494e-05,
"loss": 0.021,
"step": 4080
},
{
"grad_norm": 0.6248122453689575,
"learning_rate": 9.361873960836744e-05,
"loss": 0.0208,
"step": 4090
},
{
"grad_norm": 0.4238356947898865,
"learning_rate": 9.357826608800142e-05,
"loss": 0.0182,
"step": 4100
},
{
"grad_norm": 0.4596106708049774,
"learning_rate": 9.353767342631994e-05,
"loss": 0.023,
"step": 4110
},
{
"grad_norm": 0.39670488238334656,
"learning_rate": 9.34969617343018e-05,
"loss": 0.023,
"step": 4120
},
{
"grad_norm": 0.5592398047447205,
"learning_rate": 9.345613112325122e-05,
"loss": 0.0237,
"step": 4130
},
{
"grad_norm": 0.5025811195373535,
"learning_rate": 9.34151817047975e-05,
"loss": 0.0207,
"step": 4140
},
{
"grad_norm": 0.5580839514732361,
"learning_rate": 9.33741135908948e-05,
"loss": 0.0224,
"step": 4150
},
{
"grad_norm": 0.5201109051704407,
"learning_rate": 9.33329268938218e-05,
"loss": 0.0206,
"step": 4160
},
{
"grad_norm": 0.513161838054657,
"learning_rate": 9.329162172618132e-05,
"loss": 0.0231,
"step": 4170
},
{
"grad_norm": 0.4309871792793274,
"learning_rate": 9.325019820090013e-05,
"loss": 0.022,
"step": 4180
},
{
"grad_norm": 0.43937987089157104,
"learning_rate": 9.320865643122855e-05,
"loss": 0.0233,
"step": 4190
},
{
"grad_norm": 0.4912075400352478,
"learning_rate": 9.316699653074023e-05,
"loss": 0.0182,
"step": 4200
},
{
"grad_norm": 0.4246988892555237,
"learning_rate": 9.312521861333172e-05,
"loss": 0.0192,
"step": 4210
},
{
"grad_norm": 0.39220497012138367,
"learning_rate": 9.308332279322224e-05,
"loss": 0.0194,
"step": 4220
},
{
"grad_norm": 0.5391877293586731,
"learning_rate": 9.304130918495338e-05,
"loss": 0.02,
"step": 4230
},
{
"grad_norm": 0.4661605954170227,
"learning_rate": 9.299917790338874e-05,
"loss": 0.0201,
"step": 4240
},
{
"grad_norm": 0.3973395824432373,
"learning_rate": 9.295692906371363e-05,
"loss": 0.0213,
"step": 4250
},
{
"grad_norm": 0.4798072278499603,
"learning_rate": 9.291456278143476e-05,
"loss": 0.02,
"step": 4260
},
{
"grad_norm": 0.6149327158927917,
"learning_rate": 9.287207917237994e-05,
"loss": 0.0203,
"step": 4270
},
{
"grad_norm": 0.5336242318153381,
"learning_rate": 9.282947835269773e-05,
"loss": 0.0215,
"step": 4280
},
{
"grad_norm": 0.5178649425506592,
"learning_rate": 9.278676043885715e-05,
"loss": 0.0194,
"step": 4290
},
{
"grad_norm": 0.4814247190952301,
"learning_rate": 9.274392554764733e-05,
"loss": 0.0171,
"step": 4300
},
{
"grad_norm": 0.32441624999046326,
"learning_rate": 9.270097379617723e-05,
"loss": 0.0184,
"step": 4310
},
{
"grad_norm": 0.5904157757759094,
"learning_rate": 9.26579053018753e-05,
"loss": 0.0204,
"step": 4320
},
{
"grad_norm": 0.45750078558921814,
"learning_rate": 9.261472018248918e-05,
"loss": 0.0191,
"step": 4330
},
{
"grad_norm": 0.5417255759239197,
"learning_rate": 9.25714185560853e-05,
"loss": 0.0208,
"step": 4340
},
{
"grad_norm": 0.41097697615623474,
"learning_rate": 9.252800054104868e-05,
"loss": 0.0238,
"step": 4350
},
{
"grad_norm": 0.42212045192718506,
"learning_rate": 9.248446625608252e-05,
"loss": 0.0196,
"step": 4360
},
{
"grad_norm": 0.5629220008850098,
"learning_rate": 9.244081582020789e-05,
"loss": 0.0208,
"step": 4370
},
{
"grad_norm": 0.4684229791164398,
"learning_rate": 9.239704935276339e-05,
"loss": 0.0226,
"step": 4380
},
{
"grad_norm": 0.43924668431282043,
"learning_rate": 9.235316697340489e-05,
"loss": 0.0247,
"step": 4390
},
{
"grad_norm": 0.5363799929618835,
"learning_rate": 9.230916880210512e-05,
"loss": 0.0193,
"step": 4400
},
{
"grad_norm": 0.4902143180370331,
"learning_rate": 9.226505495915342e-05,
"loss": 0.0204,
"step": 4410
},
{
"grad_norm": 0.4934154450893402,
"learning_rate": 9.222082556515536e-05,
"loss": 0.0204,
"step": 4420
},
{
"grad_norm": 0.5217421650886536,
"learning_rate": 9.217648074103242e-05,
"loss": 0.0187,
"step": 4430
},
{
"grad_norm": 0.5157290101051331,
"learning_rate": 9.213202060802161e-05,
"loss": 0.0193,
"step": 4440
},
{
"grad_norm": 0.48323342204093933,
"learning_rate": 9.208744528767528e-05,
"loss": 0.0222,
"step": 4450
},
{
"grad_norm": 0.46333587169647217,
"learning_rate": 9.204275490186064e-05,
"loss": 0.0188,
"step": 4460
},
{
"grad_norm": 0.48506951332092285,
"learning_rate": 9.199794957275949e-05,
"loss": 0.0181,
"step": 4470
},
{
"grad_norm": 0.547903835773468,
"learning_rate": 9.19530294228679e-05,
"loss": 0.0201,
"step": 4480
},
{
"grad_norm": 0.5428500771522522,
"learning_rate": 9.190799457499583e-05,
"loss": 0.023,
"step": 4490
},
{
"grad_norm": 0.4775592088699341,
"learning_rate": 9.186284515226686e-05,
"loss": 0.0187,
"step": 4500
},
{
"grad_norm": 0.515190839767456,
"learning_rate": 9.181758127811777e-05,
"loss": 0.0174,
"step": 4510
},
{
"grad_norm": 0.523912250995636,
"learning_rate": 9.177220307629825e-05,
"loss": 0.0183,
"step": 4520
},
{
"grad_norm": 0.46641191840171814,
"learning_rate": 9.172671067087059e-05,
"loss": 0.0218,
"step": 4530
},
{
"grad_norm": 0.4439499080181122,
"learning_rate": 9.16811041862093e-05,
"loss": 0.0194,
"step": 4540
},
{
"grad_norm": 0.39287662506103516,
"learning_rate": 9.163538374700076e-05,
"loss": 0.0174,
"step": 4550
},
{
"grad_norm": 0.49717986583709717,
"learning_rate": 9.158954947824287e-05,
"loss": 0.0192,
"step": 4560
},
{
"grad_norm": 0.4734252691268921,
"learning_rate": 9.154360150524482e-05,
"loss": 0.0177,
"step": 4570
},
{
"grad_norm": 0.37044256925582886,
"learning_rate": 9.14975399536266e-05,
"loss": 0.0216,
"step": 4580
},
{
"grad_norm": 0.36238211393356323,
"learning_rate": 9.14513649493187e-05,
"loss": 0.0241,
"step": 4590
},
{
"grad_norm": 0.3750474750995636,
"learning_rate": 9.140507661856187e-05,
"loss": 0.0188,
"step": 4600
},
{
"grad_norm": 0.6232521533966064,
"learning_rate": 9.135867508790661e-05,
"loss": 0.0232,
"step": 4610
},
{
"grad_norm": 0.40693479776382446,
"learning_rate": 9.131216048421291e-05,
"loss": 0.0191,
"step": 4620
},
{
"grad_norm": 0.46917590498924255,
"learning_rate": 9.126553293464998e-05,
"loss": 0.0192,
"step": 4630
},
{
"grad_norm": 0.6374298930168152,
"learning_rate": 9.121879256669572e-05,
"loss": 0.0209,
"step": 4640
},
{
"grad_norm": 0.49459344148635864,
"learning_rate": 9.117193950813652e-05,
"loss": 0.0191,
"step": 4650
},
{
"grad_norm": 0.3625936508178711,
"learning_rate": 9.112497388706685e-05,
"loss": 0.02,
"step": 4660
},
{
"grad_norm": 0.5517452955245972,
"learning_rate": 9.10778958318889e-05,
"loss": 0.0178,
"step": 4670
},
{
"grad_norm": 0.5170832276344299,
"learning_rate": 9.103070547131232e-05,
"loss": 0.0172,
"step": 4680
},
{
"grad_norm": 0.5125618577003479,
"learning_rate": 9.098340293435375e-05,
"loss": 0.0172,
"step": 4690
},
{
"grad_norm": 0.6093714833259583,
"learning_rate": 9.093598835033649e-05,
"loss": 0.0253,
"step": 4700
},
{
"grad_norm": 0.48880383372306824,
"learning_rate": 9.088846184889021e-05,
"loss": 0.02,
"step": 4710
},
{
"grad_norm": 0.3739651143550873,
"learning_rate": 9.084082355995057e-05,
"loss": 0.0172,
"step": 4720
},
{
"grad_norm": 0.41044220328330994,
"learning_rate": 9.079307361375882e-05,
"loss": 0.0178,
"step": 4730
},
{
"grad_norm": 0.42307567596435547,
"learning_rate": 9.074521214086149e-05,
"loss": 0.0184,
"step": 4740
},
{
"grad_norm": 0.39302605390548706,
"learning_rate": 9.069723927211001e-05,
"loss": 0.0175,
"step": 4750
},
{
"grad_norm": 0.4822540581226349,
"learning_rate": 9.064915513866037e-05,
"loss": 0.0205,
"step": 4760
},
{
"grad_norm": 0.4448596239089966,
"learning_rate": 9.060095987197279e-05,
"loss": 0.0246,
"step": 4770
},
{
"grad_norm": 0.3715053200721741,
"learning_rate": 9.055265360381126e-05,
"loss": 0.0182,
"step": 4780
},
{
"grad_norm": 0.3781905770301819,
"learning_rate": 9.050423646624326e-05,
"loss": 0.0218,
"step": 4790
},
{
"grad_norm": 0.4780808091163635,
"learning_rate": 9.045570859163943e-05,
"loss": 0.0203,
"step": 4800
},
{
"grad_norm": 0.45449933409690857,
"learning_rate": 9.04070701126731e-05,
"loss": 0.0168,
"step": 4810
},
{
"grad_norm": 0.455399751663208,
"learning_rate": 9.035832116232001e-05,
"loss": 0.0192,
"step": 4820
},
{
"grad_norm": 0.569148600101471,
"learning_rate": 9.030946187385796e-05,
"loss": 0.0191,
"step": 4830
},
{
"grad_norm": 0.39798951148986816,
"learning_rate": 9.026049238086635e-05,
"loss": 0.0183,
"step": 4840
},
{
"grad_norm": 0.4741237759590149,
"learning_rate": 9.021141281722591e-05,
"loss": 0.0185,
"step": 4850
},
{
"grad_norm": 0.44035014510154724,
"learning_rate": 9.01622233171183e-05,
"loss": 0.0169,
"step": 4860
},
{
"grad_norm": 0.4502478241920471,
"learning_rate": 9.011292401502574e-05,
"loss": 0.0207,
"step": 4870
},
{
"grad_norm": 0.38435491919517517,
"learning_rate": 9.006351504573063e-05,
"loss": 0.0183,
"step": 4880
},
{
"grad_norm": 0.36374086141586304,
"learning_rate": 9.001399654431519e-05,
"loss": 0.0195,
"step": 4890
},
{
"grad_norm": 0.4774911105632782,
"learning_rate": 8.996436864616116e-05,
"loss": 0.0177,
"step": 4900
},
{
"grad_norm": 0.3256385624408722,
"learning_rate": 8.991463148694925e-05,
"loss": 0.0166,
"step": 4910
},
{
"grad_norm": 0.5121222734451294,
"learning_rate": 8.986478520265902e-05,
"loss": 0.0191,
"step": 4920
},
{
"grad_norm": 0.5058743953704834,
"learning_rate": 8.981482992956827e-05,
"loss": 0.0202,
"step": 4930
},
{
"grad_norm": 0.436591774225235,
"learning_rate": 8.976476580425282e-05,
"loss": 0.0165,
"step": 4940
},
{
"grad_norm": 0.4705229699611664,
"learning_rate": 8.971459296358606e-05,
"loss": 0.0195,
"step": 4950
},
{
"grad_norm": 0.423905611038208,
"learning_rate": 8.966431154473864e-05,
"loss": 0.0201,
"step": 4960
},
{
"grad_norm": 0.4129910469055176,
"learning_rate": 8.961392168517803e-05,
"loss": 0.0149,
"step": 4970
},
{
"grad_norm": 0.45044130086898804,
"learning_rate": 8.956342352266821e-05,
"loss": 0.0209,
"step": 4980
},
{
"grad_norm": 0.43892666697502136,
"learning_rate": 8.95128171952692e-05,
"loss": 0.0204,
"step": 4990
},
{
"grad_norm": 0.4579017758369446,
"learning_rate": 8.946210284133676e-05,
"loss": 0.0172,
"step": 5000
},
{
"grad_norm": 0.38334211707115173,
"learning_rate": 8.941128059952201e-05,
"loss": 0.0176,
"step": 5010
},
{
"grad_norm": 0.4932248592376709,
"learning_rate": 8.936035060877102e-05,
"loss": 0.0197,
"step": 5020
},
{
"grad_norm": 0.4217004179954529,
"learning_rate": 8.930931300832443e-05,
"loss": 0.0159,
"step": 5030
},
{
"grad_norm": 0.5817981958389282,
"learning_rate": 8.925816793771711e-05,
"loss": 0.0162,
"step": 5040
},
{
"grad_norm": 0.43254992365837097,
"learning_rate": 8.92069155367777e-05,
"loss": 0.0193,
"step": 5050
},
{
"grad_norm": 0.5035724639892578,
"learning_rate": 8.915555594562834e-05,
"loss": 0.0208,
"step": 5060
},
{
"grad_norm": 0.4568447172641754,
"learning_rate": 8.910408930468416e-05,
"loss": 0.0165,
"step": 5070
},
{
"grad_norm": 0.43123510479927063,
"learning_rate": 8.905251575465303e-05,
"loss": 0.0158,
"step": 5080
},
{
"grad_norm": 0.4527429938316345,
"learning_rate": 8.900083543653502e-05,
"loss": 0.0169,
"step": 5090
},
{
"grad_norm": 0.41953304409980774,
"learning_rate": 8.894904849162218e-05,
"loss": 0.0184,
"step": 5100
},
{
"grad_norm": 0.415479838848114,
"learning_rate": 8.889715506149802e-05,
"loss": 0.0184,
"step": 5110
},
{
"grad_norm": 0.3731771409511566,
"learning_rate": 8.884515528803722e-05,
"loss": 0.0154,
"step": 5120
},
{
"grad_norm": 0.41724300384521484,
"learning_rate": 8.879304931340517e-05,
"loss": 0.0163,
"step": 5130
},
{
"grad_norm": 0.47969841957092285,
"learning_rate": 8.874083728005759e-05,
"loss": 0.0177,
"step": 5140
},
{
"grad_norm": 0.34264758229255676,
"learning_rate": 8.868851933074021e-05,
"loss": 0.0182,
"step": 5150
},
{
"grad_norm": 0.4063286781311035,
"learning_rate": 8.863609560848829e-05,
"loss": 0.0182,
"step": 5160
},
{
"grad_norm": 0.42877396941185,
"learning_rate": 8.85835662566263e-05,
"loss": 0.0165,
"step": 5170
},
{
"grad_norm": 0.4081937372684479,
"learning_rate": 8.853093141876747e-05,
"loss": 0.0148,
"step": 5180
},
{
"grad_norm": 0.40108522772789,
"learning_rate": 8.847819123881343e-05,
"loss": 0.0174,
"step": 5190
},
{
"grad_norm": 0.4415458142757416,
"learning_rate": 8.842534586095383e-05,
"loss": 0.0165,
"step": 5200
},
{
"grad_norm": 0.48253053426742554,
"learning_rate": 8.837239542966593e-05,
"loss": 0.0212,
"step": 5210
},
{
"grad_norm": 0.4505639970302582,
"learning_rate": 8.831934008971417e-05,
"loss": 0.0211,
"step": 5220
},
{
"grad_norm": 0.4596928656101227,
"learning_rate": 8.826617998614982e-05,
"loss": 0.0182,
"step": 5230
},
{
"grad_norm": 0.4037300646305084,
"learning_rate": 8.821291526431056e-05,
"loss": 0.0146,
"step": 5240
},
{
"grad_norm": 0.3879428505897522,
"learning_rate": 8.815954606982015e-05,
"loss": 0.0173,
"step": 5250
},
{
"grad_norm": 0.4592052102088928,
"learning_rate": 8.810607254858789e-05,
"loss": 0.0158,
"step": 5260
},
{
"grad_norm": 0.43258506059646606,
"learning_rate": 8.805249484680838e-05,
"loss": 0.0176,
"step": 5270
},
{
"grad_norm": 0.5278488993644714,
"learning_rate": 8.799881311096096e-05,
"loss": 0.0175,
"step": 5280
},
{
"grad_norm": 0.536786675453186,
"learning_rate": 8.794502748780949e-05,
"loss": 0.0191,
"step": 5290
},
{
"grad_norm": 0.39231422543525696,
"learning_rate": 8.78911381244018e-05,
"loss": 0.0213,
"step": 5300
},
{
"grad_norm": 0.3669500946998596,
"learning_rate": 8.783714516806933e-05,
"loss": 0.0227,
"step": 5310
},
{
"grad_norm": 0.4094989001750946,
"learning_rate": 8.77830487664268e-05,
"loss": 0.0239,
"step": 5320
},
{
"grad_norm": 0.3868635296821594,
"learning_rate": 8.772884906737167e-05,
"loss": 0.0174,
"step": 5330
},
{
"grad_norm": 0.4408529996871948,
"learning_rate": 8.767454621908387e-05,
"loss": 0.02,
"step": 5340
},
{
"grad_norm": 0.4143405258655548,
"learning_rate": 8.76201403700253e-05,
"loss": 0.0188,
"step": 5350
},
{
"grad_norm": 0.4451104402542114,
"learning_rate": 8.756563166893949e-05,
"loss": 0.0157,
"step": 5360
},
{
"grad_norm": 0.45531290769577026,
"learning_rate": 8.751102026485113e-05,
"loss": 0.0164,
"step": 5370
},
{
"grad_norm": 0.4830329418182373,
"learning_rate": 8.745630630706571e-05,
"loss": 0.0164,
"step": 5380
},
{
"grad_norm": 0.390863835811615,
"learning_rate": 8.740148994516912e-05,
"loss": 0.0157,
"step": 5390
},
{
"grad_norm": 0.32199040055274963,
"learning_rate": 8.73465713290272e-05,
"loss": 0.0147,
"step": 5400
},
{
"grad_norm": 0.4165448844432831,
"learning_rate": 8.729155060878533e-05,
"loss": 0.0162,
"step": 5410
},
{
"grad_norm": 0.3976683020591736,
"learning_rate": 8.723642793486809e-05,
"loss": 0.0155,
"step": 5420
},
{
"grad_norm": 0.38279062509536743,
"learning_rate": 8.718120345797873e-05,
"loss": 0.0187,
"step": 5430
},
{
"grad_norm": 0.3204955458641052,
"learning_rate": 8.712587732909889e-05,
"loss": 0.0173,
"step": 5440
},
{
"grad_norm": 0.4341621398925781,
"learning_rate": 8.707044969948806e-05,
"loss": 0.0164,
"step": 5450
},
{
"grad_norm": 0.38356325030326843,
"learning_rate": 8.701492072068329e-05,
"loss": 0.0171,
"step": 5460
},
{
"grad_norm": 0.45178765058517456,
"learning_rate": 8.695929054449869e-05,
"loss": 0.0159,
"step": 5470
},
{
"grad_norm": 0.34176012873649597,
"learning_rate": 8.690355932302501e-05,
"loss": 0.0191,
"step": 5480
},
{
"grad_norm": 0.4089793562889099,
"learning_rate": 8.684772720862931e-05,
"loss": 0.0171,
"step": 5490
},
{
"grad_norm": 0.48740607500076294,
"learning_rate": 8.679179435395446e-05,
"loss": 0.0156,
"step": 5500
},
{
"grad_norm": 0.45742067694664,
"learning_rate": 8.673576091191874e-05,
"loss": 0.018,
"step": 5510
},
{
"grad_norm": 0.334735244512558,
"learning_rate": 8.667962703571541e-05,
"loss": 0.0194,
"step": 5520
},
{
"grad_norm": 0.40391767024993896,
"learning_rate": 8.662339287881238e-05,
"loss": 0.0168,
"step": 5530
},
{
"grad_norm": 0.46624624729156494,
"learning_rate": 8.656705859495169e-05,
"loss": 0.0178,
"step": 5540
},
{
"grad_norm": 0.33784207701683044,
"learning_rate": 8.651062433814912e-05,
"loss": 0.0173,
"step": 5550
},
{
"grad_norm": 0.4161977469921112,
"learning_rate": 8.645409026269375e-05,
"loss": 0.0162,
"step": 5560
},
{
"grad_norm": 0.4053378701210022,
"learning_rate": 8.639745652314759e-05,
"loss": 0.0168,
"step": 5570
},
{
"grad_norm": 0.3393418490886688,
"learning_rate": 8.634072327434515e-05,
"loss": 0.0149,
"step": 5580
},
{
"grad_norm": 0.40152478218078613,
"learning_rate": 8.628389067139294e-05,
"loss": 0.0163,
"step": 5590
},
{
"grad_norm": 0.3247140347957611,
"learning_rate": 8.622695886966911e-05,
"loss": 0.0154,
"step": 5600
},
{
"grad_norm": 0.48909687995910645,
"learning_rate": 8.616992802482308e-05,
"loss": 0.0143,
"step": 5610
},
{
"grad_norm": 0.4496266841888428,
"learning_rate": 8.611279829277496e-05,
"loss": 0.0148,
"step": 5620
},
{
"grad_norm": 0.4135376513004303,
"learning_rate": 8.605556982971528e-05,
"loss": 0.0142,
"step": 5630
},
{
"grad_norm": 0.47809404134750366,
"learning_rate": 8.599824279210447e-05,
"loss": 0.0142,
"step": 5640
},
{
"grad_norm": 0.5461645126342773,
"learning_rate": 8.594081733667243e-05,
"loss": 0.0149,
"step": 5650
},
{
"grad_norm": 0.358334481716156,
"learning_rate": 8.58832936204182e-05,
"loss": 0.0227,
"step": 5660
},
{
"grad_norm": 0.5356438755989075,
"learning_rate": 8.582567180060942e-05,
"loss": 0.0153,
"step": 5670
},
{
"grad_norm": 0.5985286831855774,
"learning_rate": 8.576795203478194e-05,
"loss": 0.0173,
"step": 5680
},
{
"grad_norm": 0.44569990038871765,
"learning_rate": 8.571013448073939e-05,
"loss": 0.0193,
"step": 5690
},
{
"grad_norm": 0.4444361627101898,
"learning_rate": 8.565221929655275e-05,
"loss": 0.0216,
"step": 5700
},
{
"grad_norm": 0.3684118986129761,
"learning_rate": 8.559420664055992e-05,
"loss": 0.0207,
"step": 5710
},
{
"grad_norm": 0.45600274205207825,
"learning_rate": 8.553609667136532e-05,
"loss": 0.0202,
"step": 5720
},
{
"grad_norm": 0.45347586274147034,
"learning_rate": 8.547788954783936e-05,
"loss": 0.02,
"step": 5730
},
{
"grad_norm": 0.477226585149765,
"learning_rate": 8.541958542911808e-05,
"loss": 0.0232,
"step": 5740
},
{
"grad_norm": 0.4014797508716583,
"learning_rate": 8.536118447460275e-05,
"loss": 0.0206,
"step": 5750
},
{
"grad_norm": 0.3849544823169708,
"learning_rate": 8.530268684395932e-05,
"loss": 0.0188,
"step": 5760
},
{
"grad_norm": 0.43162935972213745,
"learning_rate": 8.524409269711807e-05,
"loss": 0.0216,
"step": 5770
},
{
"grad_norm": 0.3942539691925049,
"learning_rate": 8.51854021942732e-05,
"loss": 0.0196,
"step": 5780
},
{
"grad_norm": 0.4294084906578064,
"learning_rate": 8.512661549588227e-05,
"loss": 0.0192,
"step": 5790
},
{
"grad_norm": 0.45174744725227356,
"learning_rate": 8.506773276266588e-05,
"loss": 0.0182,
"step": 5800
},
{
"grad_norm": 0.39539965987205505,
"learning_rate": 8.500875415560721e-05,
"loss": 0.016,
"step": 5810
},
{
"grad_norm": 0.46286967396736145,
"learning_rate": 8.494967983595144e-05,
"loss": 0.0175,
"step": 5820
},
{
"grad_norm": 0.46430596709251404,
"learning_rate": 8.489050996520558e-05,
"loss": 0.0182,
"step": 5830
},
{
"grad_norm": 0.45418673753738403,
"learning_rate": 8.483124470513775e-05,
"loss": 0.0173,
"step": 5840
},
{
"grad_norm": 0.34853413701057434,
"learning_rate": 8.477188421777692e-05,
"loss": 0.0172,
"step": 5850
},
{
"grad_norm": 0.39042121171951294,
"learning_rate": 8.47124286654124e-05,
"loss": 0.018,
"step": 5860
},
{
"grad_norm": 0.40436068177223206,
"learning_rate": 8.465287821059341e-05,
"loss": 0.0186,
"step": 5870
},
{
"grad_norm": 0.46700358390808105,
"learning_rate": 8.45932330161286e-05,
"loss": 0.017,
"step": 5880
},
{
"grad_norm": 0.42868727445602417,
"learning_rate": 8.453349324508567e-05,
"loss": 0.015,
"step": 5890
},
{
"grad_norm": 0.49199414253234863,
"learning_rate": 8.447365906079088e-05,
"loss": 0.0168,
"step": 5900
},
{
"grad_norm": 0.35933196544647217,
"learning_rate": 8.441373062682856e-05,
"loss": 0.017,
"step": 5910
},
{
"grad_norm": 0.34801632165908813,
"learning_rate": 8.43537081070408e-05,
"loss": 0.0151,
"step": 5920
},
{
"grad_norm": 0.3106619715690613,
"learning_rate": 8.429359166552689e-05,
"loss": 0.0131,
"step": 5930
},
{
"grad_norm": 0.3804507255554199,
"learning_rate": 8.423338146664284e-05,
"loss": 0.0173,
"step": 5940
},
{
"grad_norm": 0.3459800183773041,
"learning_rate": 8.417307767500107e-05,
"loss": 0.0133,
"step": 5950
},
{
"grad_norm": 0.40529242157936096,
"learning_rate": 8.411268045546983e-05,
"loss": 0.0134,
"step": 5960
},
{
"grad_norm": 0.40981313586235046,
"learning_rate": 8.405218997317281e-05,
"loss": 0.0172,
"step": 5970
},
{
"grad_norm": 0.36945050954818726,
"learning_rate": 8.399160639348869e-05,
"loss": 0.0148,
"step": 5980
},
{
"grad_norm": 0.48703187704086304,
"learning_rate": 8.393092988205065e-05,
"loss": 0.0172,
"step": 5990
},
{
"grad_norm": 0.45176467299461365,
"learning_rate": 8.387016060474597e-05,
"loss": 0.0182,
"step": 6000
},
{
"grad_norm": 0.3710270822048187,
"learning_rate": 8.380929872771551e-05,
"loss": 0.0139,
"step": 6010
},
{
"grad_norm": 0.423194020986557,
"learning_rate": 8.374834441735335e-05,
"loss": 0.0141,
"step": 6020
},
{
"grad_norm": 0.40545573830604553,
"learning_rate": 8.368729784030622e-05,
"loss": 0.0136,
"step": 6030
},
{
"grad_norm": 0.2971954345703125,
"learning_rate": 8.362615916347315e-05,
"loss": 0.0158,
"step": 6040
},
{
"grad_norm": 0.3459785580635071,
"learning_rate": 8.356492855400493e-05,
"loss": 0.0151,
"step": 6050
},
{
"grad_norm": 0.3283204734325409,
"learning_rate": 8.350360617930371e-05,
"loss": 0.0135,
"step": 6060
},
{
"grad_norm": 0.4347304701805115,
"learning_rate": 8.344219220702255e-05,
"loss": 0.0155,
"step": 6070
},
{
"grad_norm": 0.4429846405982971,
"learning_rate": 8.338068680506485e-05,
"loss": 0.0165,
"step": 6080
},
{
"grad_norm": 0.45707541704177856,
"learning_rate": 8.33190901415841e-05,
"loss": 0.0132,
"step": 6090
},
{
"grad_norm": 0.4395197927951813,
"learning_rate": 8.325740238498317e-05,
"loss": 0.0168,
"step": 6100
},
{
"grad_norm": 0.4128543436527252,
"learning_rate": 8.319562370391406e-05,
"loss": 0.0157,
"step": 6110
},
{
"grad_norm": 0.4431177079677582,
"learning_rate": 8.31337542672773e-05,
"loss": 0.0195,
"step": 6120
},
{
"grad_norm": 0.4641300141811371,
"learning_rate": 8.307179424422158e-05,
"loss": 0.0164,
"step": 6130
},
{
"grad_norm": 0.4123786687850952,
"learning_rate": 8.300974380414327e-05,
"loss": 0.0147,
"step": 6140
},
{
"grad_norm": 0.42310845851898193,
"learning_rate": 8.294760311668586e-05,
"loss": 0.0184,
"step": 6150
},
{
"grad_norm": 0.4000084102153778,
"learning_rate": 8.288537235173961e-05,
"loss": 0.0149,
"step": 6160
},
{
"grad_norm": 0.4384673237800598,
"learning_rate": 8.282305167944108e-05,
"loss": 0.0174,
"step": 6170
},
{
"grad_norm": 0.2722078263759613,
"learning_rate": 8.276064127017262e-05,
"loss": 0.0154,
"step": 6180
},
{
"grad_norm": 0.31681233644485474,
"learning_rate": 8.269814129456189e-05,
"loss": 0.0139,
"step": 6190
},
{
"grad_norm": 0.3676801919937134,
"learning_rate": 8.263555192348143e-05,
"loss": 0.0145,
"step": 6200
},
{
"grad_norm": 0.38583555817604065,
"learning_rate": 8.257287332804819e-05,
"loss": 0.0148,
"step": 6210
},
{
"grad_norm": 0.38549646735191345,
"learning_rate": 8.251010567962307e-05,
"loss": 0.0157,
"step": 6220
},
{
"grad_norm": 0.38993483781814575,
"learning_rate": 8.244724914981041e-05,
"loss": 0.0151,
"step": 6230
},
{
"grad_norm": 0.4005882441997528,
"learning_rate": 8.238430391045757e-05,
"loss": 0.0148,
"step": 6240
},
{
"grad_norm": 0.34108078479766846,
"learning_rate": 8.232127013365445e-05,
"loss": 0.0176,
"step": 6250
},
{
"grad_norm": 0.4081633985042572,
"learning_rate": 8.225814799173295e-05,
"loss": 0.0181,
"step": 6260
},
{
"grad_norm": 0.24599479138851166,
"learning_rate": 8.219493765726663e-05,
"loss": 0.0136,
"step": 6270
},
{
"grad_norm": 0.4071696698665619,
"learning_rate": 8.21316393030701e-05,
"loss": 0.0159,
"step": 6280
},
{
"grad_norm": 0.41659772396087646,
"learning_rate": 8.206825310219865e-05,
"loss": 0.0133,
"step": 6290
},
{
"grad_norm": 0.4144860804080963,
"learning_rate": 8.200477922794776e-05,
"loss": 0.0178,
"step": 6300
},
{
"grad_norm": 0.3404088318347931,
"learning_rate": 8.194121785385256e-05,
"loss": 0.016,
"step": 6310
},
{
"grad_norm": 0.4269247055053711,
"learning_rate": 8.187756915368741e-05,
"loss": 0.0144,
"step": 6320
},
{
"grad_norm": 0.34299951791763306,
"learning_rate": 8.181383330146544e-05,
"loss": 0.0161,
"step": 6330
},
{
"grad_norm": 0.27657780051231384,
"learning_rate": 8.175001047143804e-05,
"loss": 0.0172,
"step": 6340
},
{
"grad_norm": 0.4395776391029358,
"learning_rate": 8.168610083809438e-05,
"loss": 0.0173,
"step": 6350
},
{
"grad_norm": 0.41414085030555725,
"learning_rate": 8.162210457616095e-05,
"loss": 0.0182,
"step": 6360
},
{
"grad_norm": 0.3814326822757721,
"learning_rate": 8.155802186060109e-05,
"loss": 0.0171,
"step": 6370
},
{
"grad_norm": 0.3817397654056549,
"learning_rate": 8.149385286661453e-05,
"loss": 0.0174,
"step": 6380
},
{
"grad_norm": 0.40936893224716187,
"learning_rate": 8.14295977696368e-05,
"loss": 0.0171,
"step": 6390
},
{
"grad_norm": 0.36244097352027893,
"learning_rate": 8.13652567453389e-05,
"loss": 0.0134,
"step": 6400
},
{
"grad_norm": 0.4146624803543091,
"learning_rate": 8.130082996962676e-05,
"loss": 0.0175,
"step": 6410
},
{
"grad_norm": 0.4154786169528961,
"learning_rate": 8.123631761864068e-05,
"loss": 0.0178,
"step": 6420
},
{
"grad_norm": 0.4006327986717224,
"learning_rate": 8.1171719868755e-05,
"loss": 0.0142,
"step": 6430
},
{
"grad_norm": 0.45199131965637207,
"learning_rate": 8.110703689657748e-05,
"loss": 0.0164,
"step": 6440
},
{
"grad_norm": 0.35770922899246216,
"learning_rate": 8.104226887894892e-05,
"loss": 0.0147,
"step": 6450
},
{
"grad_norm": 0.30330339074134827,
"learning_rate": 8.097741599294257e-05,
"loss": 0.0143,
"step": 6460
},
{
"grad_norm": 0.32697442173957825,
"learning_rate": 8.091247841586378e-05,
"loss": 0.0143,
"step": 6470
},
{
"grad_norm": 0.3217572867870331,
"learning_rate": 8.084745632524939e-05,
"loss": 0.016,
"step": 6480
},
{
"grad_norm": 0.42020222544670105,
"learning_rate": 8.07823498988673e-05,
"loss": 0.017,
"step": 6490
},
{
"grad_norm": 0.3531644642353058,
"learning_rate": 8.071715931471602e-05,
"loss": 0.0144,
"step": 6500
},
{
"grad_norm": 0.33470600843429565,
"learning_rate": 8.06518847510241e-05,
"loss": 0.0135,
"step": 6510
},
{
"grad_norm": 0.34689798951148987,
"learning_rate": 8.058652638624971e-05,
"loss": 0.0139,
"step": 6520
},
{
"grad_norm": 0.32701587677001953,
"learning_rate": 8.052108439908013e-05,
"loss": 0.0158,
"step": 6530
},
{
"grad_norm": 0.38156113028526306,
"learning_rate": 8.045555896843125e-05,
"loss": 0.0155,
"step": 6540
},
{
"grad_norm": 0.448223352432251,
"learning_rate": 8.03899502734471e-05,
"loss": 0.0157,
"step": 6550
},
{
"grad_norm": 0.5395865440368652,
"learning_rate": 8.032425849349931e-05,
"loss": 0.0158,
"step": 6560
},
{
"grad_norm": 0.5085408091545105,
"learning_rate": 8.025848380818674e-05,
"loss": 0.0194,
"step": 6570
},
{
"grad_norm": 0.3302910625934601,
"learning_rate": 8.019262639733487e-05,
"loss": 0.0141,
"step": 6580
},
{
"grad_norm": 0.5127670764923096,
"learning_rate": 8.012668644099531e-05,
"loss": 0.0159,
"step": 6590
},
{
"grad_norm": 0.36805611848831177,
"learning_rate": 8.006066411944542e-05,
"loss": 0.0183,
"step": 6600
},
{
"grad_norm": 0.35506105422973633,
"learning_rate": 7.999455961318769e-05,
"loss": 0.0196,
"step": 6610
},
{
"grad_norm": 0.3373585343360901,
"learning_rate": 7.992837310294932e-05,
"loss": 0.0125,
"step": 6620
},
{
"grad_norm": 0.4130808413028717,
"learning_rate": 7.986210476968167e-05,
"loss": 0.0174,
"step": 6630
},
{
"grad_norm": 0.455818772315979,
"learning_rate": 7.97957547945599e-05,
"loss": 0.0161,
"step": 6640
},
{
"grad_norm": 0.35132884979248047,
"learning_rate": 7.972932335898226e-05,
"loss": 0.0138,
"step": 6650
},
{
"grad_norm": 0.39049220085144043,
"learning_rate": 7.966281064456975e-05,
"loss": 0.0164,
"step": 6660
},
{
"grad_norm": 0.46879708766937256,
"learning_rate": 7.959621683316563e-05,
"loss": 0.0172,
"step": 6670
},
{
"grad_norm": 0.4622355103492737,
"learning_rate": 7.952954210683481e-05,
"loss": 0.0158,
"step": 6680
},
{
"grad_norm": 0.35040467977523804,
"learning_rate": 7.946278664786345e-05,
"loss": 0.015,
"step": 6690
},
{
"grad_norm": 0.36279842257499695,
"learning_rate": 7.939595063875842e-05,
"loss": 0.0144,
"step": 6700
},
{
"grad_norm": 0.3879755735397339,
"learning_rate": 7.932903426224683e-05,
"loss": 0.015,
"step": 6710
},
{
"grad_norm": 0.33389654755592346,
"learning_rate": 7.926203770127552e-05,
"loss": 0.0155,
"step": 6720
},
{
"grad_norm": 0.36863645911216736,
"learning_rate": 7.919496113901046e-05,
"loss": 0.0125,
"step": 6730
},
{
"grad_norm": 0.33035746216773987,
"learning_rate": 7.912780475883649e-05,
"loss": 0.0143,
"step": 6740
},
{
"grad_norm": 0.377729594707489,
"learning_rate": 7.906056874435652e-05,
"loss": 0.0125,
"step": 6750
},
{
"grad_norm": 0.311613529920578,
"learning_rate": 7.899325327939131e-05,
"loss": 0.0121,
"step": 6760
},
{
"grad_norm": 0.3309403955936432,
"learning_rate": 7.892585854797872e-05,
"loss": 0.0123,
"step": 6770
},
{
"grad_norm": 0.3694356083869934,
"learning_rate": 7.88583847343734e-05,
"loss": 0.0141,
"step": 6780
},
{
"grad_norm": 0.46677365899086,
"learning_rate": 7.879083202304616e-05,
"loss": 0.0134,
"step": 6790
},
{
"grad_norm": 0.3483867347240448,
"learning_rate": 7.872320059868355e-05,
"loss": 0.0143,
"step": 6800
},
{
"grad_norm": 0.30982255935668945,
"learning_rate": 7.865549064618729e-05,
"loss": 0.0135,
"step": 6810
},
{
"grad_norm": 0.3763626217842102,
"learning_rate": 7.858770235067381e-05,
"loss": 0.0155,
"step": 6820
},
{
"grad_norm": 0.43991050124168396,
"learning_rate": 7.851983589747374e-05,
"loss": 0.0126,
"step": 6830
},
{
"grad_norm": 0.4573172330856323,
"learning_rate": 7.845189147213133e-05,
"loss": 0.0154,
"step": 6840
},
{
"grad_norm": 0.5023182034492493,
"learning_rate": 7.838386926040407e-05,
"loss": 0.0163,
"step": 6850
},
{
"grad_norm": 0.4336654841899872,
"learning_rate": 7.83157694482621e-05,
"loss": 0.0173,
"step": 6860
},
{
"grad_norm": 0.2743140459060669,
"learning_rate": 7.824759222188768e-05,
"loss": 0.0125,
"step": 6870
},
{
"grad_norm": 0.3396323621273041,
"learning_rate": 7.817933776767478e-05,
"loss": 0.0165,
"step": 6880
},
{
"grad_norm": 0.35954833030700684,
"learning_rate": 7.811100627222842e-05,
"loss": 0.012,
"step": 6890
},
{
"grad_norm": 0.3257628381252289,
"learning_rate": 7.804259792236435e-05,
"loss": 0.0135,
"step": 6900
},
{
"grad_norm": 0.49320679903030396,
"learning_rate": 7.797411290510835e-05,
"loss": 0.0128,
"step": 6910
},
{
"grad_norm": 0.36475473642349243,
"learning_rate": 7.790555140769586e-05,
"loss": 0.0148,
"step": 6920
},
{
"grad_norm": 0.30804768204689026,
"learning_rate": 7.78369136175714e-05,
"loss": 0.0158,
"step": 6930
},
{
"grad_norm": 0.2891436219215393,
"learning_rate": 7.776819972238806e-05,
"loss": 0.0127,
"step": 6940
},
{
"grad_norm": 0.3640880584716797,
"learning_rate": 7.7699409910007e-05,
"loss": 0.0135,
"step": 6950
},
{
"grad_norm": 0.29762402176856995,
"learning_rate": 7.763054436849694e-05,
"loss": 0.0148,
"step": 6960
},
{
"grad_norm": 0.3800078332424164,
"learning_rate": 7.756160328613364e-05,
"loss": 0.0124,
"step": 6970
},
{
"grad_norm": 0.3265116810798645,
"learning_rate": 7.749258685139942e-05,
"loss": 0.0148,
"step": 6980
},
{
"grad_norm": 0.37737640738487244,
"learning_rate": 7.742349525298253e-05,
"loss": 0.0132,
"step": 6990
},
{
"grad_norm": 0.21071003377437592,
"learning_rate": 7.735432867977679e-05,
"loss": 0.0136,
"step": 7000
},
{
"grad_norm": 0.30626237392425537,
"learning_rate": 7.728508732088096e-05,
"loss": 0.0168,
"step": 7010
},
{
"grad_norm": 0.3753643035888672,
"learning_rate": 7.721577136559825e-05,
"loss": 0.0154,
"step": 7020
},
{
"grad_norm": 0.3369854986667633,
"learning_rate": 7.714638100343588e-05,
"loss": 0.0126,
"step": 7030
},
{
"grad_norm": 0.36830559372901917,
"learning_rate": 7.707691642410444e-05,
"loss": 0.014,
"step": 7040
},
{
"grad_norm": 0.4115355908870697,
"learning_rate": 7.70073778175174e-05,
"loss": 0.0148,
"step": 7050
},
{
"grad_norm": 0.2858520448207855,
"learning_rate": 7.69377653737907e-05,
"loss": 0.0139,
"step": 7060
},
{
"grad_norm": 0.38406574726104736,
"learning_rate": 7.686807928324209e-05,
"loss": 0.013,
"step": 7070
},
{
"grad_norm": 0.3661177456378937,
"learning_rate": 7.679831973639065e-05,
"loss": 0.0128,
"step": 7080
},
{
"grad_norm": 0.2673167586326599,
"learning_rate": 7.672848692395637e-05,
"loss": 0.0135,
"step": 7090
},
{
"grad_norm": 0.3895997703075409,
"learning_rate": 7.665858103685944e-05,
"loss": 0.0116,
"step": 7100
},
{
"grad_norm": 0.3720957934856415,
"learning_rate": 7.658860226621991e-05,
"loss": 0.0144,
"step": 7110
},
{
"grad_norm": 0.3137901723384857,
"learning_rate": 7.651855080335708e-05,
"loss": 0.0146,
"step": 7120
},
{
"grad_norm": 0.29563164710998535,
"learning_rate": 7.644842683978896e-05,
"loss": 0.0123,
"step": 7130
},
{
"grad_norm": 0.39437034726142883,
"learning_rate": 7.63782305672318e-05,
"loss": 0.0155,
"step": 7140
},
{
"grad_norm": 0.3885636627674103,
"learning_rate": 7.63079621775995e-05,
"loss": 0.0124,
"step": 7150
},
{
"grad_norm": 0.35217997431755066,
"learning_rate": 7.623762186300319e-05,
"loss": 0.0138,
"step": 7160
},
{
"grad_norm": 0.3057458698749542,
"learning_rate": 7.616720981575057e-05,
"loss": 0.0116,
"step": 7170
},
{
"grad_norm": 0.3529524505138397,
"learning_rate": 7.609672622834552e-05,
"loss": 0.0134,
"step": 7180
},
{
"grad_norm": 0.3752933442592621,
"learning_rate": 7.602617129348747e-05,
"loss": 0.0118,
"step": 7190
},
{
"grad_norm": 0.34898602962493896,
"learning_rate": 7.595554520407088e-05,
"loss": 0.0145,
"step": 7200
},
{
"grad_norm": 0.39017370343208313,
"learning_rate": 7.588484815318484e-05,
"loss": 0.0135,
"step": 7210
},
{
"grad_norm": 0.37441468238830566,
"learning_rate": 7.581408033411234e-05,
"loss": 0.0137,
"step": 7220
},
{
"grad_norm": 0.40523502230644226,
"learning_rate": 7.574324194032995e-05,
"loss": 0.0147,
"step": 7230
},
{
"grad_norm": 0.2746640145778656,
"learning_rate": 7.567233316550705e-05,
"loss": 0.0118,
"step": 7240
},
{
"grad_norm": 0.3495345115661621,
"learning_rate": 7.560135420350562e-05,
"loss": 0.0117,
"step": 7250
},
{
"grad_norm": 0.4313659965991974,
"learning_rate": 7.553030524837935e-05,
"loss": 0.0128,
"step": 7260
},
{
"grad_norm": 0.4218427538871765,
"learning_rate": 7.545918649437341e-05,
"loss": 0.0121,
"step": 7270
},
{
"grad_norm": 0.37825003266334534,
"learning_rate": 7.538799813592377e-05,
"loss": 0.0107,
"step": 7280
},
{
"grad_norm": 0.36705559492111206,
"learning_rate": 7.531674036765662e-05,
"loss": 0.0134,
"step": 7290
},
{
"grad_norm": 0.3276546001434326,
"learning_rate": 7.524541338438807e-05,
"loss": 0.0145,
"step": 7300
},
{
"grad_norm": 0.45257559418678284,
"learning_rate": 7.517401738112328e-05,
"loss": 0.0138,
"step": 7310
},
{
"grad_norm": 0.36200499534606934,
"learning_rate": 7.510255255305628e-05,
"loss": 0.0143,
"step": 7320
},
{
"grad_norm": 0.37191298604011536,
"learning_rate": 7.503101909556911e-05,
"loss": 0.0166,
"step": 7330
},
{
"grad_norm": 0.3438247740268707,
"learning_rate": 7.495941720423154e-05,
"loss": 0.0142,
"step": 7340
},
{
"grad_norm": 0.4581180810928345,
"learning_rate": 7.488774707480042e-05,
"loss": 0.0153,
"step": 7350
},
{
"grad_norm": 0.359670490026474,
"learning_rate": 7.481600890321911e-05,
"loss": 0.0122,
"step": 7360
},
{
"grad_norm": 0.46836525201797485,
"learning_rate": 7.474420288561708e-05,
"loss": 0.0135,
"step": 7370
},
{
"grad_norm": 0.33389872312545776,
"learning_rate": 7.467232921830921e-05,
"loss": 0.0134,
"step": 7380
},
{
"grad_norm": 0.33688122034072876,
"learning_rate": 7.460038809779537e-05,
"loss": 0.0136,
"step": 7390
},
{
"grad_norm": 0.41096070408821106,
"learning_rate": 7.452837972075983e-05,
"loss": 0.0127,
"step": 7400
},
{
"grad_norm": 0.41126012802124023,
"learning_rate": 7.445630428407074e-05,
"loss": 0.015,
"step": 7410
},
{
"grad_norm": 0.3702179789543152,
"learning_rate": 7.43841619847796e-05,
"loss": 0.0126,
"step": 7420
},
{
"grad_norm": 0.35743647813796997,
"learning_rate": 7.431195302012072e-05,
"loss": 0.0197,
"step": 7430
},
{
"grad_norm": 0.2985547184944153,
"learning_rate": 7.423967758751061e-05,
"loss": 0.0159,
"step": 7440
},
{
"grad_norm": 0.39072185754776,
"learning_rate": 7.416733588454758e-05,
"loss": 0.0154,
"step": 7450
},
{
"grad_norm": 0.4116431474685669,
"learning_rate": 7.409492810901106e-05,
"loss": 0.0163,
"step": 7460
},
{
"grad_norm": 0.4209757149219513,
"learning_rate": 7.402245445886116e-05,
"loss": 0.0197,
"step": 7470
},
{
"grad_norm": 0.348718523979187,
"learning_rate": 7.394991513223806e-05,
"loss": 0.0142,
"step": 7480
},
{
"grad_norm": 0.3451424837112427,
"learning_rate": 7.38773103274615e-05,
"loss": 0.0147,
"step": 7490
},
{
"grad_norm": 0.32023975253105164,
"learning_rate": 7.380464024303028e-05,
"loss": 0.0159,
"step": 7500
},
{
"grad_norm": 0.4120250344276428,
"learning_rate": 7.373190507762162e-05,
"loss": 0.0123,
"step": 7510
},
{
"grad_norm": 0.3678809106349945,
"learning_rate": 7.365910503009066e-05,
"loss": 0.0117,
"step": 7520
},
{
"grad_norm": 0.36750277876853943,
"learning_rate": 7.358624029946996e-05,
"loss": 0.0136,
"step": 7530
},
{
"grad_norm": 0.3163510262966156,
"learning_rate": 7.351331108496893e-05,
"loss": 0.014,
"step": 7540
},
{
"grad_norm": 0.4134257435798645,
"learning_rate": 7.344031758597325e-05,
"loss": 0.0133,
"step": 7550
},
{
"grad_norm": 0.392423540353775,
"learning_rate": 7.336726000204435e-05,
"loss": 0.0141,
"step": 7560
},
{
"grad_norm": 0.3356896638870239,
"learning_rate": 7.32941385329189e-05,
"loss": 0.0125,
"step": 7570
},
{
"grad_norm": 0.31061065196990967,
"learning_rate": 7.322095337850816e-05,
"loss": 0.0153,
"step": 7580
},
{
"grad_norm": 0.33278927206993103,
"learning_rate": 7.314770473889758e-05,
"loss": 0.0104,
"step": 7590
},
{
"grad_norm": 0.35223352909088135,
"learning_rate": 7.307439281434615e-05,
"loss": 0.0117,
"step": 7600
},
{
"grad_norm": 0.37879201769828796,
"learning_rate": 7.300101780528585e-05,
"loss": 0.0132,
"step": 7610
},
{
"grad_norm": 0.33135613799095154,
"learning_rate": 7.292757991232117e-05,
"loss": 0.0142,
"step": 7620
},
{
"grad_norm": 0.3964645564556122,
"learning_rate": 7.285407933622848e-05,
"loss": 0.012,
"step": 7630
},
{
"grad_norm": 0.3188134431838989,
"learning_rate": 7.278051627795557e-05,
"loss": 0.0182,
"step": 7640
},
{
"grad_norm": 0.36549797654151917,
"learning_rate": 7.270689093862105e-05,
"loss": 0.0146,
"step": 7650
},
{
"grad_norm": 0.3195458948612213,
"learning_rate": 7.263320351951374e-05,
"loss": 0.0105,
"step": 7660
},
{
"grad_norm": 0.3117104768753052,
"learning_rate": 7.255945422209227e-05,
"loss": 0.0127,
"step": 7670
},
{
"grad_norm": 0.31559836864471436,
"learning_rate": 7.248564324798437e-05,
"loss": 0.0139,
"step": 7680
},
{
"grad_norm": 0.2703145742416382,
"learning_rate": 7.241177079898644e-05,
"loss": 0.014,
"step": 7690
},
{
"grad_norm": 0.30897757411003113,
"learning_rate": 7.233783707706295e-05,
"loss": 0.0127,
"step": 7700
},
{
"grad_norm": 0.3173775374889374,
"learning_rate": 7.226384228434586e-05,
"loss": 0.0133,
"step": 7710
},
{
"grad_norm": 0.36799511313438416,
"learning_rate": 7.21897866231341e-05,
"loss": 0.0105,
"step": 7720
},
{
"grad_norm": 0.2746727764606476,
"learning_rate": 7.211567029589303e-05,
"loss": 0.0138,
"step": 7730
},
{
"grad_norm": 0.3362380266189575,
"learning_rate": 7.204149350525387e-05,
"loss": 0.0123,
"step": 7740
},
{
"grad_norm": 0.33995118737220764,
"learning_rate": 7.196725645401309e-05,
"loss": 0.0127,
"step": 7750
},
{
"grad_norm": 0.332607626914978,
"learning_rate": 7.1892959345132e-05,
"loss": 0.0139,
"step": 7760
},
{
"grad_norm": 0.2877747416496277,
"learning_rate": 7.181860238173605e-05,
"loss": 0.0111,
"step": 7770
},
{
"grad_norm": 0.30644649267196655,
"learning_rate": 7.174418576711432e-05,
"loss": 0.012,
"step": 7780
},
{
"grad_norm": 0.29927390813827515,
"learning_rate": 7.1669709704719e-05,
"loss": 0.0109,
"step": 7790
},
{
"grad_norm": 0.3413328528404236,
"learning_rate": 7.159517439816481e-05,
"loss": 0.0143,
"step": 7800
},
{
"grad_norm": 0.24794118106365204,
"learning_rate": 7.152058005122842e-05,
"loss": 0.0148,
"step": 7810
},
{
"grad_norm": 0.30149683356285095,
"learning_rate": 7.144592686784793e-05,
"loss": 0.0118,
"step": 7820
},
{
"grad_norm": 0.33201098442077637,
"learning_rate": 7.137121505212229e-05,
"loss": 0.0111,
"step": 7830
},
{
"grad_norm": 0.40392014384269714,
"learning_rate": 7.129644480831077e-05,
"loss": 0.0126,
"step": 7840
},
{
"grad_norm": 0.32815903425216675,
"learning_rate": 7.122161634083234e-05,
"loss": 0.0104,
"step": 7850
},
{
"grad_norm": 0.31929904222488403,
"learning_rate": 7.114672985426516e-05,
"loss": 0.0121,
"step": 7860
},
{
"grad_norm": 0.3497408926486969,
"learning_rate": 7.107178555334606e-05,
"loss": 0.0113,
"step": 7870
},
{
"grad_norm": 0.3377891778945923,
"learning_rate": 7.099678364296989e-05,
"loss": 0.0135,
"step": 7880
},
{
"grad_norm": 0.27795737981796265,
"learning_rate": 7.0921724328189e-05,
"loss": 0.0115,
"step": 7890
},
{
"grad_norm": 0.3064858615398407,
"learning_rate": 7.084660781421268e-05,
"loss": 0.0142,
"step": 7900
},
{
"grad_norm": 0.32354864478111267,
"learning_rate": 7.077143430640662e-05,
"loss": 0.0154,
"step": 7910
},
{
"grad_norm": 0.38219061493873596,
"learning_rate": 7.069620401029232e-05,
"loss": 0.0116,
"step": 7920
},
{
"grad_norm": 0.3809864819049835,
"learning_rate": 7.062091713154655e-05,
"loss": 0.0132,
"step": 7930
},
{
"grad_norm": 0.3125777542591095,
"learning_rate": 7.054557387600075e-05,
"loss": 0.0149,
"step": 7940
},
{
"grad_norm": 0.3452930152416229,
"learning_rate": 7.04701744496405e-05,
"loss": 0.0106,
"step": 7950
},
{
"grad_norm": 0.37026354670524597,
"learning_rate": 7.039471905860495e-05,
"loss": 0.0122,
"step": 7960
},
{
"grad_norm": 0.32580164074897766,
"learning_rate": 7.031920790918628e-05,
"loss": 0.013,
"step": 7970
},
{
"grad_norm": 0.288991242647171,
"learning_rate": 7.024364120782906e-05,
"loss": 0.0132,
"step": 7980
},
{
"grad_norm": 0.325033575296402,
"learning_rate": 7.016801916112978e-05,
"loss": 0.0102,
"step": 7990
},
{
"grad_norm": 0.27642202377319336,
"learning_rate": 7.009234197583623e-05,
"loss": 0.0111,
"step": 8000
},
{
"grad_norm": 0.3329485058784485,
"learning_rate": 7.001660985884692e-05,
"loss": 0.0123,
"step": 8010
},
{
"grad_norm": 0.3785879611968994,
"learning_rate": 6.994082301721063e-05,
"loss": 0.0151,
"step": 8020
},
{
"grad_norm": 0.34150007367134094,
"learning_rate": 6.986498165812563e-05,
"loss": 0.0117,
"step": 8030
},
{
"grad_norm": 0.439264178276062,
"learning_rate": 6.978908598893932e-05,
"loss": 0.0139,
"step": 8040
},
{
"grad_norm": 0.3500205874443054,
"learning_rate": 6.971313621714756e-05,
"loss": 0.0112,
"step": 8050
},
{
"grad_norm": 0.2777945399284363,
"learning_rate": 6.96371325503941e-05,
"loss": 0.011,
"step": 8060
},
{
"grad_norm": 0.382794052362442,
"learning_rate": 6.956107519647014e-05,
"loss": 0.0125,
"step": 8070
},
{
"grad_norm": 0.30284425616264343,
"learning_rate": 6.94849643633135e-05,
"loss": 0.0133,
"step": 8080
},
{
"grad_norm": 0.24802178144454956,
"learning_rate": 6.940880025900834e-05,
"loss": 0.0107,
"step": 8090
},
{
"grad_norm": 0.2981727719306946,
"learning_rate": 6.933258309178438e-05,
"loss": 0.0122,
"step": 8100
},
{
"grad_norm": 0.21943475306034088,
"learning_rate": 6.925631307001646e-05,
"loss": 0.0165,
"step": 8110
},
{
"grad_norm": 0.3769550621509552,
"learning_rate": 6.91799904022239e-05,
"loss": 0.0143,
"step": 8120
},
{
"grad_norm": 0.2876106798648834,
"learning_rate": 6.910361529706997e-05,
"loss": 0.0131,
"step": 8130
},
{
"grad_norm": 0.33470556139945984,
"learning_rate": 6.902718796336131e-05,
"loss": 0.0137,
"step": 8140
},
{
"grad_norm": 0.4283612370491028,
"learning_rate": 6.895070861004729e-05,
"loss": 0.0129,
"step": 8150
},
{
"grad_norm": 0.2765333652496338,
"learning_rate": 6.887417744621956e-05,
"loss": 0.0137,
"step": 8160
},
{
"grad_norm": 0.32283779978752136,
"learning_rate": 6.87975946811114e-05,
"loss": 0.0125,
"step": 8170
},
{
"grad_norm": 0.3863534927368164,
"learning_rate": 6.872096052409718e-05,
"loss": 0.0139,
"step": 8180
},
{
"grad_norm": 0.29665520787239075,
"learning_rate": 6.864427518469174e-05,
"loss": 0.0129,
"step": 8190
},
{
"grad_norm": 0.3554975986480713,
"learning_rate": 6.856753887254986e-05,
"loss": 0.0107,
"step": 8200
},
{
"grad_norm": 0.3163433074951172,
"learning_rate": 6.849075179746572e-05,
"loss": 0.0115,
"step": 8210
},
{
"grad_norm": 0.3252999782562256,
"learning_rate": 6.841391416937221e-05,
"loss": 0.01,
"step": 8220
},
{
"grad_norm": 0.38887593150138855,
"learning_rate": 6.833702619834053e-05,
"loss": 0.0107,
"step": 8230
},
{
"grad_norm": 0.3632328510284424,
"learning_rate": 6.82600880945794e-05,
"loss": 0.0106,
"step": 8240
},
{
"grad_norm": 0.2744136154651642,
"learning_rate": 6.818310006843468e-05,
"loss": 0.0108,
"step": 8250
},
{
"grad_norm": 0.3894250988960266,
"learning_rate": 6.810606233038868e-05,
"loss": 0.0117,
"step": 8260
},
{
"grad_norm": 0.3185424506664276,
"learning_rate": 6.802897509105966e-05,
"loss": 0.0136,
"step": 8270
},
{
"grad_norm": 0.28028440475463867,
"learning_rate": 6.79518385612012e-05,
"loss": 0.0129,
"step": 8280
},
{
"grad_norm": 0.25754812359809875,
"learning_rate": 6.787465295170157e-05,
"loss": 0.0121,
"step": 8290
},
{
"grad_norm": 0.2997465431690216,
"learning_rate": 6.779741847358332e-05,
"loss": 0.0108,
"step": 8300
},
{
"grad_norm": 0.3573647141456604,
"learning_rate": 6.772013533800256e-05,
"loss": 0.0133,
"step": 8310
},
{
"grad_norm": 0.3089340627193451,
"learning_rate": 6.764280375624843e-05,
"loss": 0.0116,
"step": 8320
},
{
"grad_norm": 0.31227874755859375,
"learning_rate": 6.756542393974252e-05,
"loss": 0.0108,
"step": 8330
},
{
"grad_norm": 0.381428986787796,
"learning_rate": 6.748799610003828e-05,
"loss": 0.0112,
"step": 8340
},
{
"grad_norm": 0.278055340051651,
"learning_rate": 6.741052044882048e-05,
"loss": 0.0117,
"step": 8350
},
{
"grad_norm": 0.2949462831020355,
"learning_rate": 6.73329971979046e-05,
"loss": 0.0108,
"step": 8360
},
{
"grad_norm": 0.3816727101802826,
"learning_rate": 6.725542655923625e-05,
"loss": 0.0116,
"step": 8370
},
{
"grad_norm": 0.2636021673679352,
"learning_rate": 6.717780874489057e-05,
"loss": 0.0128,
"step": 8380
},
{
"grad_norm": 0.38993480801582336,
"learning_rate": 6.710014396707172e-05,
"loss": 0.0119,
"step": 8390
},
{
"grad_norm": 0.3024043142795563,
"learning_rate": 6.702243243811221e-05,
"loss": 0.0109,
"step": 8400
},
{
"grad_norm": 0.36139100790023804,
"learning_rate": 6.694467437047244e-05,
"loss": 0.0118,
"step": 8410
},
{
"grad_norm": 0.34032484889030457,
"learning_rate": 6.686686997673997e-05,
"loss": 0.0122,
"step": 8420
},
{
"grad_norm": 0.3963174819946289,
"learning_rate": 6.678901946962903e-05,
"loss": 0.0104,
"step": 8430
},
{
"grad_norm": 0.2162044644355774,
"learning_rate": 6.671112306197996e-05,
"loss": 0.0124,
"step": 8440
},
{
"grad_norm": 0.2702636122703552,
"learning_rate": 6.663318096675854e-05,
"loss": 0.0125,
"step": 8450
},
{
"grad_norm": 0.42705151438713074,
"learning_rate": 6.655519339705552e-05,
"loss": 0.0118,
"step": 8460
},
{
"grad_norm": 0.29198169708251953,
"learning_rate": 6.647716056608588e-05,
"loss": 0.0132,
"step": 8470
},
{
"grad_norm": 0.24624188244342804,
"learning_rate": 6.639908268718843e-05,
"loss": 0.011,
"step": 8480
},
{
"grad_norm": 0.29472094774246216,
"learning_rate": 6.632095997382514e-05,
"loss": 0.0103,
"step": 8490
},
{
"grad_norm": 0.28369250893592834,
"learning_rate": 6.624279263958047e-05,
"loss": 0.0121,
"step": 8500
},
{
"grad_norm": 0.3822849690914154,
"learning_rate": 6.616458089816097e-05,
"loss": 0.0121,
"step": 8510
},
{
"grad_norm": 0.3733149766921997,
"learning_rate": 6.608632496339454e-05,
"loss": 0.0127,
"step": 8520
},
{
"grad_norm": 0.25586628913879395,
"learning_rate": 6.600802504922988e-05,
"loss": 0.0149,
"step": 8530
},
{
"grad_norm": 0.36027905344963074,
"learning_rate": 6.592968136973604e-05,
"loss": 0.0132,
"step": 8540
},
{
"grad_norm": 0.30015572905540466,
"learning_rate": 6.585129413910159e-05,
"loss": 0.0119,
"step": 8550
},
{
"grad_norm": 0.3715488016605377,
"learning_rate": 6.577286357163424e-05,
"loss": 0.0123,
"step": 8560
},
{
"grad_norm": 0.39481574296951294,
"learning_rate": 6.569438988176018e-05,
"loss": 0.0103,
"step": 8570
},
{
"grad_norm": 0.35289764404296875,
"learning_rate": 6.561587328402347e-05,
"loss": 0.0127,
"step": 8580
},
{
"grad_norm": 0.34876325726509094,
"learning_rate": 6.553731399308549e-05,
"loss": 0.0127,
"step": 8590
},
{
"grad_norm": 0.26030921936035156,
"learning_rate": 6.545871222372436e-05,
"loss": 0.011,
"step": 8600
},
{
"grad_norm": 0.3516961336135864,
"learning_rate": 6.538006819083426e-05,
"loss": 0.0115,
"step": 8610
},
{
"grad_norm": 0.3771360218524933,
"learning_rate": 6.530138210942505e-05,
"loss": 0.011,
"step": 8620
},
{
"grad_norm": 0.2810113728046417,
"learning_rate": 6.522265419462141e-05,
"loss": 0.0089,
"step": 8630
},
{
"grad_norm": 0.2637802064418793,
"learning_rate": 6.514388466166248e-05,
"loss": 0.0105,
"step": 8640
},
{
"grad_norm": 0.2962155342102051,
"learning_rate": 6.506507372590119e-05,
"loss": 0.0126,
"step": 8650
},
{
"grad_norm": 0.2789287269115448,
"learning_rate": 6.498622160280355e-05,
"loss": 0.0128,
"step": 8660
},
{
"grad_norm": 0.2849268317222595,
"learning_rate": 6.490732850794832e-05,
"loss": 0.0133,
"step": 8670
},
{
"grad_norm": 0.26055100560188293,
"learning_rate": 6.482839465702616e-05,
"loss": 0.0131,
"step": 8680
},
{
"grad_norm": 0.32526013255119324,
"learning_rate": 6.474942026583923e-05,
"loss": 0.0127,
"step": 8690
},
{
"grad_norm": 0.33485621213912964,
"learning_rate": 6.467040555030052e-05,
"loss": 0.0108,
"step": 8700
},
{
"grad_norm": 0.2516840994358063,
"learning_rate": 6.459135072643321e-05,
"loss": 0.0115,
"step": 8710
},
{
"grad_norm": 0.31756162643432617,
"learning_rate": 6.451225601037019e-05,
"loss": 0.0091,
"step": 8720
},
{
"grad_norm": 0.2860639691352844,
"learning_rate": 6.443312161835338e-05,
"loss": 0.0128,
"step": 8730
},
{
"grad_norm": 0.2940915822982788,
"learning_rate": 6.43539477667332e-05,
"loss": 0.0104,
"step": 8740
},
{
"grad_norm": 0.24016410112380981,
"learning_rate": 6.427473467196793e-05,
"loss": 0.0143,
"step": 8750
},
{
"grad_norm": 0.29369375109672546,
"learning_rate": 6.419548255062315e-05,
"loss": 0.0103,
"step": 8760
},
{
"grad_norm": 0.32075247168540955,
"learning_rate": 6.411619161937112e-05,
"loss": 0.0097,
"step": 8770
},
{
"grad_norm": 0.3262137174606323,
"learning_rate": 6.403686209499022e-05,
"loss": 0.0104,
"step": 8780
},
{
"grad_norm": 0.3471458852291107,
"learning_rate": 6.395749419436437e-05,
"loss": 0.0099,
"step": 8790
},
{
"grad_norm": 0.30499032139778137,
"learning_rate": 6.387808813448234e-05,
"loss": 0.0133,
"step": 8800
},
{
"grad_norm": 0.34709998965263367,
"learning_rate": 6.37986441324373e-05,
"loss": 0.0118,
"step": 8810
},
{
"grad_norm": 0.2934912443161011,
"learning_rate": 6.37191624054261e-05,
"loss": 0.0125,
"step": 8820
},
{
"grad_norm": 0.324332058429718,
"learning_rate": 6.363964317074872e-05,
"loss": 0.0154,
"step": 8830
},
{
"grad_norm": 0.2829475402832031,
"learning_rate": 6.356008664580776e-05,
"loss": 0.0134,
"step": 8840
},
{
"grad_norm": 0.2962392270565033,
"learning_rate": 6.348049304810771e-05,
"loss": 0.0142,
"step": 8850
},
{
"grad_norm": 0.3759327530860901,
"learning_rate": 6.340086259525442e-05,
"loss": 0.0129,
"step": 8860
},
{
"grad_norm": 0.36553898453712463,
"learning_rate": 6.332119550495448e-05,
"loss": 0.0131,
"step": 8870
},
{
"grad_norm": 0.33665499091148376,
"learning_rate": 6.324149199501473e-05,
"loss": 0.0113,
"step": 8880
},
{
"grad_norm": 0.3106566071510315,
"learning_rate": 6.316175228334146e-05,
"loss": 0.0128,
"step": 8890
},
{
"grad_norm": 0.3394717276096344,
"learning_rate": 6.308197658794003e-05,
"loss": 0.0089,
"step": 8900
},
{
"grad_norm": 0.30129188299179077,
"learning_rate": 6.300216512691417e-05,
"loss": 0.0114,
"step": 8910
},
{
"grad_norm": 0.3877800405025482,
"learning_rate": 6.292231811846532e-05,
"loss": 0.0112,
"step": 8920
},
{
"grad_norm": 0.31548699736595154,
"learning_rate": 6.284243578089217e-05,
"loss": 0.0124,
"step": 8930
},
{
"grad_norm": 0.286324143409729,
"learning_rate": 6.276251833258999e-05,
"loss": 0.0104,
"step": 8940
},
{
"grad_norm": 0.31045469641685486,
"learning_rate": 6.268256599205003e-05,
"loss": 0.0124,
"step": 8950
},
{
"grad_norm": 0.31697219610214233,
"learning_rate": 6.260257897785892e-05,
"loss": 0.0137,
"step": 8960
},
{
"grad_norm": 0.37782853841781616,
"learning_rate": 6.252255750869811e-05,
"loss": 0.0118,
"step": 8970
},
{
"grad_norm": 0.30432772636413574,
"learning_rate": 6.244250180334325e-05,
"loss": 0.0128,
"step": 8980
},
{
"grad_norm": 0.3652268052101135,
"learning_rate": 6.236241208066356e-05,
"loss": 0.0127,
"step": 8990
},
{
"grad_norm": 0.2324247658252716,
"learning_rate": 6.228228855962133e-05,
"loss": 0.0162,
"step": 9000
},
{
"grad_norm": 0.31128183007240295,
"learning_rate": 6.220213145927115e-05,
"loss": 0.0135,
"step": 9010
},
{
"grad_norm": 0.32616373896598816,
"learning_rate": 6.212194099875951e-05,
"loss": 0.0101,
"step": 9020
},
{
"grad_norm": 0.25326231122016907,
"learning_rate": 6.204171739732405e-05,
"loss": 0.0133,
"step": 9030
},
{
"grad_norm": 0.3350052237510681,
"learning_rate": 6.196146087429303e-05,
"loss": 0.0134,
"step": 9040
},
{
"grad_norm": 0.40826472640037537,
"learning_rate": 6.188117164908474e-05,
"loss": 0.0112,
"step": 9050
},
{
"grad_norm": 0.2690710127353668,
"learning_rate": 6.180084994120684e-05,
"loss": 0.0116,
"step": 9060
},
{
"grad_norm": 0.20586135983467102,
"learning_rate": 6.17204959702558e-05,
"loss": 0.012,
"step": 9070
},
{
"grad_norm": 0.30860739946365356,
"learning_rate": 6.164010995591635e-05,
"loss": 0.0128,
"step": 9080
},
{
"grad_norm": 0.36940038204193115,
"learning_rate": 6.155969211796076e-05,
"loss": 0.0118,
"step": 9090
},
{
"grad_norm": 0.32097283005714417,
"learning_rate": 6.147924267624829e-05,
"loss": 0.0122,
"step": 9100
},
{
"grad_norm": 0.3452341556549072,
"learning_rate": 6.13987618507247e-05,
"loss": 0.0124,
"step": 9110
},
{
"grad_norm": 0.25516965985298157,
"learning_rate": 6.131824986142147e-05,
"loss": 0.0123,
"step": 9120
},
{
"grad_norm": 0.3512190878391266,
"learning_rate": 6.123770692845529e-05,
"loss": 0.0108,
"step": 9130
},
{
"grad_norm": 0.3161102831363678,
"learning_rate": 6.11571332720275e-05,
"loss": 0.0121,
"step": 9140
},
{
"grad_norm": 0.2909663915634155,
"learning_rate": 6.107652911242336e-05,
"loss": 0.0099,
"step": 9150
},
{
"grad_norm": 0.3107944130897522,
"learning_rate": 6.0995894670011586e-05,
"loss": 0.0108,
"step": 9160
},
{
"grad_norm": 0.2862789034843445,
"learning_rate": 6.091523016524368e-05,
"loss": 0.0121,
"step": 9170
},
{
"grad_norm": 0.3161313235759735,
"learning_rate": 6.083453581865328e-05,
"loss": 0.0105,
"step": 9180
},
{
"grad_norm": 0.2773940861225128,
"learning_rate": 6.075381185085568e-05,
"loss": 0.0108,
"step": 9190
},
{
"grad_norm": 0.2752029299736023,
"learning_rate": 6.067305848254709e-05,
"loss": 0.0115,
"step": 9200
},
{
"grad_norm": 0.35372427105903625,
"learning_rate": 6.059227593450418e-05,
"loss": 0.0119,
"step": 9210
},
{
"grad_norm": 0.3142034411430359,
"learning_rate": 6.051146442758333e-05,
"loss": 0.0125,
"step": 9220
},
{
"grad_norm": 0.2529461979866028,
"learning_rate": 6.043062418272012e-05,
"loss": 0.0107,
"step": 9230
},
{
"grad_norm": 0.3262726962566376,
"learning_rate": 6.0349755420928666e-05,
"loss": 0.0127,
"step": 9240
},
{
"grad_norm": 0.26650935411453247,
"learning_rate": 6.0268858363301105e-05,
"loss": 0.0106,
"step": 9250
},
{
"grad_norm": 0.3812367916107178,
"learning_rate": 6.018793323100689e-05,
"loss": 0.0104,
"step": 9260
},
{
"grad_norm": 0.2846297025680542,
"learning_rate": 6.0106980245292255e-05,
"loss": 0.0116,
"step": 9270
},
{
"grad_norm": 0.3379952907562256,
"learning_rate": 6.002599962747957e-05,
"loss": 0.0131,
"step": 9280
},
{
"grad_norm": 0.28340259194374084,
"learning_rate": 5.994499159896673e-05,
"loss": 0.0109,
"step": 9290
},
{
"grad_norm": 0.32905933260917664,
"learning_rate": 5.9863956381226607e-05,
"loss": 0.0127,
"step": 9300
},
{
"grad_norm": 0.34781473875045776,
"learning_rate": 5.9782894195806394e-05,
"loss": 0.0097,
"step": 9310
},
{
"grad_norm": 0.3538030683994293,
"learning_rate": 5.9701805264327004e-05,
"loss": 0.0101,
"step": 9320
},
{
"grad_norm": 0.3605461120605469,
"learning_rate": 5.96206898084825e-05,
"loss": 0.0102,
"step": 9330
},
{
"grad_norm": 0.29366081953048706,
"learning_rate": 5.953954805003942e-05,
"loss": 0.0105,
"step": 9340
},
{
"grad_norm": 0.26942163705825806,
"learning_rate": 5.945838021083623e-05,
"loss": 0.0119,
"step": 9350
},
{
"grad_norm": 0.3048942983150482,
"learning_rate": 5.9377186512782714e-05,
"loss": 0.0104,
"step": 9360
},
{
"grad_norm": 0.3276144862174988,
"learning_rate": 5.929596717785935e-05,
"loss": 0.0092,
"step": 9370
},
{
"grad_norm": 0.327134370803833,
"learning_rate": 5.921472242811668e-05,
"loss": 0.0106,
"step": 9380
},
{
"grad_norm": 0.2601287066936493,
"learning_rate": 5.913345248567475e-05,
"loss": 0.0095,
"step": 9390
},
{
"grad_norm": 0.3127315044403076,
"learning_rate": 5.905215757272248e-05,
"loss": 0.0113,
"step": 9400
},
{
"grad_norm": 0.2669365406036377,
"learning_rate": 5.897083791151706e-05,
"loss": 0.0082,
"step": 9410
},
{
"grad_norm": 0.3623141348361969,
"learning_rate": 5.888949372438336e-05,
"loss": 0.0114,
"step": 9420
},
{
"grad_norm": 0.28753525018692017,
"learning_rate": 5.8808125233713255e-05,
"loss": 0.0123,
"step": 9430
},
{
"grad_norm": 0.2539946436882019,
"learning_rate": 5.872673266196509e-05,
"loss": 0.0113,
"step": 9440
},
{
"grad_norm": 0.30448299646377563,
"learning_rate": 5.864531623166305e-05,
"loss": 0.0131,
"step": 9450
},
{
"grad_norm": 0.35516154766082764,
"learning_rate": 5.856387616539656e-05,
"loss": 0.0103,
"step": 9460
},
{
"grad_norm": 0.2838006615638733,
"learning_rate": 5.848241268581967e-05,
"loss": 0.0099,
"step": 9470
},
{
"grad_norm": 0.2405579686164856,
"learning_rate": 5.840092601565037e-05,
"loss": 0.0117,
"step": 9480
},
{
"grad_norm": 0.2804074287414551,
"learning_rate": 5.8319416377670144e-05,
"loss": 0.0104,
"step": 9490
},
{
"grad_norm": 0.3132031559944153,
"learning_rate": 5.82378839947232e-05,
"loss": 0.0102,
"step": 9500
},
{
"grad_norm": 0.2594463527202606,
"learning_rate": 5.815632908971599e-05,
"loss": 0.0123,
"step": 9510
},
{
"grad_norm": 0.3764408230781555,
"learning_rate": 5.80747518856165e-05,
"loss": 0.0088,
"step": 9520
},
{
"grad_norm": 0.3109181821346283,
"learning_rate": 5.799315260545367e-05,
"loss": 0.0078,
"step": 9530
},
{
"grad_norm": 0.26161524653434753,
"learning_rate": 5.791153147231686e-05,
"loss": 0.01,
"step": 9540
},
{
"grad_norm": 0.38785234093666077,
"learning_rate": 5.782988870935509e-05,
"loss": 0.0135,
"step": 9550
},
{
"grad_norm": 0.3314606547355652,
"learning_rate": 5.774822453977657e-05,
"loss": 0.0114,
"step": 9560
},
{
"grad_norm": 0.2779804766178131,
"learning_rate": 5.7666539186848036e-05,
"loss": 0.011,
"step": 9570
},
{
"grad_norm": 0.2708481252193451,
"learning_rate": 5.758483287389411e-05,
"loss": 0.0128,
"step": 9580
},
{
"grad_norm": 0.27679356932640076,
"learning_rate": 5.7503105824296735e-05,
"loss": 0.0128,
"step": 9590
},
{
"grad_norm": 0.2817783057689667,
"learning_rate": 5.742135826149453e-05,
"loss": 0.0091,
"step": 9600
},
{
"grad_norm": 0.2382742166519165,
"learning_rate": 5.7339590408982223e-05,
"loss": 0.0115,
"step": 9610
},
{
"grad_norm": 0.23625558614730835,
"learning_rate": 5.725780249031e-05,
"loss": 0.0086,
"step": 9620
},
{
"grad_norm": 0.30190321803092957,
"learning_rate": 5.717599472908292e-05,
"loss": 0.0097,
"step": 9630
},
{
"grad_norm": 0.31747740507125854,
"learning_rate": 5.7094167348960237e-05,
"loss": 0.0129,
"step": 9640
},
{
"grad_norm": 0.27004364132881165,
"learning_rate": 5.7012320573654945e-05,
"loss": 0.0126,
"step": 9650
},
{
"grad_norm": 0.2586357593536377,
"learning_rate": 5.693045462693295e-05,
"loss": 0.0124,
"step": 9660
},
{
"grad_norm": 0.2702503502368927,
"learning_rate": 5.684856973261266e-05,
"loss": 0.009,
"step": 9670
},
{
"grad_norm": 0.33263716101646423,
"learning_rate": 5.6766666114564215e-05,
"loss": 0.0087,
"step": 9680
},
{
"grad_norm": 0.3644093871116638,
"learning_rate": 5.668474399670899e-05,
"loss": 0.0119,
"step": 9690
},
{
"grad_norm": 0.29676637053489685,
"learning_rate": 5.660280360301896e-05,
"loss": 0.0115,
"step": 9700
},
{
"grad_norm": 0.2950310707092285,
"learning_rate": 5.652084515751599e-05,
"loss": 0.0098,
"step": 9710
},
{
"grad_norm": 0.30780714750289917,
"learning_rate": 5.643886888427137e-05,
"loss": 0.0105,
"step": 9720
},
{
"grad_norm": 0.31481608748435974,
"learning_rate": 5.6356875007405074e-05,
"loss": 0.0101,
"step": 9730
},
{
"grad_norm": 0.2832598090171814,
"learning_rate": 5.627486375108525e-05,
"loss": 0.0113,
"step": 9740
},
{
"grad_norm": 0.3724987208843231,
"learning_rate": 5.619283533952754e-05,
"loss": 0.0122,
"step": 9750
},
{
"grad_norm": 0.47322213649749756,
"learning_rate": 5.6110789996994474e-05,
"loss": 0.0114,
"step": 9760
},
{
"grad_norm": 0.31387490034103394,
"learning_rate": 5.602872794779491e-05,
"loss": 0.0097,
"step": 9770
},
{
"grad_norm": 0.33728551864624023,
"learning_rate": 5.594664941628334e-05,
"loss": 0.0084,
"step": 9780
},
{
"grad_norm": 0.2974473237991333,
"learning_rate": 5.5864554626859324e-05,
"loss": 0.0096,
"step": 9790
},
{
"grad_norm": 0.2743162214756012,
"learning_rate": 5.578244380396691e-05,
"loss": 0.0102,
"step": 9800
},
{
"grad_norm": 0.2396203577518463,
"learning_rate": 5.570031717209394e-05,
"loss": 0.0172,
"step": 9810
},
{
"grad_norm": 0.3160618245601654,
"learning_rate": 5.561817495577147e-05,
"loss": 0.0117,
"step": 9820
},
{
"grad_norm": 0.31184670329093933,
"learning_rate": 5.5536017379573215e-05,
"loss": 0.0131,
"step": 9830
},
{
"grad_norm": 0.2700205445289612,
"learning_rate": 5.545384466811483e-05,
"loss": 0.0131,
"step": 9840
},
{
"grad_norm": 0.29386454820632935,
"learning_rate": 5.5371657046053384e-05,
"loss": 0.013,
"step": 9850
},
{
"grad_norm": 0.30857086181640625,
"learning_rate": 5.528945473808669e-05,
"loss": 0.0112,
"step": 9860
},
{
"grad_norm": 0.31214991211891174,
"learning_rate": 5.520723796895272e-05,
"loss": 0.0082,
"step": 9870
},
{
"grad_norm": 0.3384791910648346,
"learning_rate": 5.512500696342897e-05,
"loss": 0.0087,
"step": 9880
},
{
"grad_norm": 0.26187098026275635,
"learning_rate": 5.504276194633188e-05,
"loss": 0.0093,
"step": 9890
},
{
"grad_norm": 0.3037872314453125,
"learning_rate": 5.49605031425162e-05,
"loss": 0.0092,
"step": 9900
},
{
"grad_norm": 0.28506723046302795,
"learning_rate": 5.487823077687434e-05,
"loss": 0.0099,
"step": 9910
},
{
"grad_norm": 0.3275642693042755,
"learning_rate": 5.4795945074335806e-05,
"loss": 0.009,
"step": 9920
},
{
"grad_norm": 0.29687318205833435,
"learning_rate": 5.471364625986657e-05,
"loss": 0.0112,
"step": 9930
},
{
"grad_norm": 0.24183917045593262,
"learning_rate": 5.463133455846845e-05,
"loss": 0.0088,
"step": 9940
},
{
"grad_norm": 0.32168254256248474,
"learning_rate": 5.4549010195178505e-05,
"loss": 0.0141,
"step": 9950
},
{
"grad_norm": 0.2662130892276764,
"learning_rate": 5.446667339506838e-05,
"loss": 0.0104,
"step": 9960
},
{
"grad_norm": 0.32166045904159546,
"learning_rate": 5.4384324383243756e-05,
"loss": 0.0098,
"step": 9970
},
{
"grad_norm": 0.22883684933185577,
"learning_rate": 5.430196338484368e-05,
"loss": 0.0078,
"step": 9980
},
{
"grad_norm": 0.2235536277294159,
"learning_rate": 5.4219590625039975e-05,
"loss": 0.0097,
"step": 9990
},
{
"grad_norm": 0.2653367221355438,
"learning_rate": 5.413720632903664e-05,
"loss": 0.0103,
"step": 10000
},
{
"grad_norm": 0.25344082713127136,
"learning_rate": 5.405481072206917e-05,
"loss": 0.0089,
"step": 10010
},
{
"grad_norm": 0.335708886384964,
"learning_rate": 5.397240402940402e-05,
"loss": 0.0102,
"step": 10020
},
{
"grad_norm": 0.2804220914840698,
"learning_rate": 5.388998647633794e-05,
"loss": 0.0119,
"step": 10030
},
{
"grad_norm": 0.3300700783729553,
"learning_rate": 5.380755828819737e-05,
"loss": 0.009,
"step": 10040
},
{
"grad_norm": 0.3184926211833954,
"learning_rate": 5.3725119690337846e-05,
"loss": 0.01,
"step": 10050
},
{
"grad_norm": 0.2888861894607544,
"learning_rate": 5.3642670908143324e-05,
"loss": 0.0085,
"step": 10060
},
{
"grad_norm": 0.2378275841474533,
"learning_rate": 5.356021216702562e-05,
"loss": 0.0083,
"step": 10070
},
{
"grad_norm": 0.30194157361984253,
"learning_rate": 5.347774369242381e-05,
"loss": 0.0144,
"step": 10080
},
{
"grad_norm": 0.30574026703834534,
"learning_rate": 5.3395265709803545e-05,
"loss": 0.0106,
"step": 10090
},
{
"grad_norm": 0.2707749307155609,
"learning_rate": 5.331277844465647e-05,
"loss": 0.009,
"step": 10100
},
{
"grad_norm": 0.2563587427139282,
"learning_rate": 5.323028212249963e-05,
"loss": 0.0079,
"step": 10110
},
{
"grad_norm": 0.23816479742527008,
"learning_rate": 5.314777696887481e-05,
"loss": 0.0092,
"step": 10120
},
{
"grad_norm": 0.2609695494174957,
"learning_rate": 5.306526320934796e-05,
"loss": 0.0115,
"step": 10130
},
{
"grad_norm": 0.32091790437698364,
"learning_rate": 5.298274106950854e-05,
"loss": 0.009,
"step": 10140
},
{
"grad_norm": 0.352911114692688,
"learning_rate": 5.290021077496893e-05,
"loss": 0.0088,
"step": 10150
},
{
"grad_norm": 0.2584521174430847,
"learning_rate": 5.2817672551363816e-05,
"loss": 0.0098,
"step": 10160
},
{
"grad_norm": 0.25784599781036377,
"learning_rate": 5.273512662434952e-05,
"loss": 0.0107,
"step": 10170
},
{
"grad_norm": 0.3719595670700073,
"learning_rate": 5.265257321960349e-05,
"loss": 0.011,
"step": 10180
},
{
"grad_norm": 0.30774036049842834,
"learning_rate": 5.257001256282357e-05,
"loss": 0.0096,
"step": 10190
},
{
"grad_norm": 0.3356902599334717,
"learning_rate": 5.248744487972742e-05,
"loss": 0.0094,
"step": 10200
},
{
"grad_norm": 0.3237636685371399,
"learning_rate": 5.240487039605196e-05,
"loss": 0.0113,
"step": 10210
},
{
"grad_norm": 0.3064771890640259,
"learning_rate": 5.232228933755267e-05,
"loss": 0.0128,
"step": 10220
},
{
"grad_norm": 0.3693506419658661,
"learning_rate": 5.2239701930003006e-05,
"loss": 0.0099,
"step": 10230
},
{
"grad_norm": 0.2279125452041626,
"learning_rate": 5.215710839919379e-05,
"loss": 0.0092,
"step": 10240
},
{
"grad_norm": 0.2967457175254822,
"learning_rate": 5.207450897093257e-05,
"loss": 0.0082,
"step": 10250
},
{
"grad_norm": 0.2727642059326172,
"learning_rate": 5.1991903871043046e-05,
"loss": 0.0125,
"step": 10260
},
{
"grad_norm": 0.3166670501232147,
"learning_rate": 5.190929332536439e-05,
"loss": 0.0109,
"step": 10270
},
{
"grad_norm": 0.3417263329029083,
"learning_rate": 5.182667755975071e-05,
"loss": 0.0121,
"step": 10280
},
{
"grad_norm": 0.24792048335075378,
"learning_rate": 5.1744056800070315e-05,
"loss": 0.0089,
"step": 10290
},
{
"grad_norm": 0.28874701261520386,
"learning_rate": 5.166143127220524e-05,
"loss": 0.0099,
"step": 10300
},
{
"grad_norm": 0.33919915556907654,
"learning_rate": 5.1578801202050485e-05,
"loss": 0.0092,
"step": 10310
},
{
"grad_norm": 0.24734066426753998,
"learning_rate": 5.149616681551355e-05,
"loss": 0.0084,
"step": 10320
},
{
"grad_norm": 0.29600366950035095,
"learning_rate": 5.141352833851367e-05,
"loss": 0.011,
"step": 10330
},
{
"grad_norm": 0.26344043016433716,
"learning_rate": 5.1330885996981285e-05,
"loss": 0.0084,
"step": 10340
},
{
"grad_norm": 0.29486021399497986,
"learning_rate": 5.124824001685741e-05,
"loss": 0.0128,
"step": 10350
},
{
"grad_norm": 0.3321808874607086,
"learning_rate": 5.116559062409298e-05,
"loss": 0.0108,
"step": 10360
},
{
"grad_norm": 0.3323744535446167,
"learning_rate": 5.10829380446483e-05,
"loss": 0.0102,
"step": 10370
},
{
"grad_norm": 0.2918391227722168,
"learning_rate": 5.100028250449235e-05,
"loss": 0.0086,
"step": 10380
},
{
"grad_norm": 0.2917460501194,
"learning_rate": 5.0917624229602234e-05,
"loss": 0.0092,
"step": 10390
},
{
"grad_norm": 0.24862314760684967,
"learning_rate": 5.0834963445962524e-05,
"loss": 0.0092,
"step": 10400
},
{
"grad_norm": 0.3336447775363922,
"learning_rate": 5.075230037956461e-05,
"loss": 0.0089,
"step": 10410
},
{
"grad_norm": 0.2436840683221817,
"learning_rate": 5.0669635256406213e-05,
"loss": 0.0074,
"step": 10420
},
{
"grad_norm": 0.23710206151008606,
"learning_rate": 5.058696830249058e-05,
"loss": 0.0109,
"step": 10430
},
{
"grad_norm": 0.2947542071342468,
"learning_rate": 5.050429974382602e-05,
"loss": 0.0099,
"step": 10440
},
{
"grad_norm": 0.28282612562179565,
"learning_rate": 5.042162980642523e-05,
"loss": 0.0083,
"step": 10450
},
{
"grad_norm": 0.2697611451148987,
"learning_rate": 5.033895871630462e-05,
"loss": 0.0096,
"step": 10460
},
{
"grad_norm": 0.28840044140815735,
"learning_rate": 5.025628669948386e-05,
"loss": 0.0099,
"step": 10470
},
{
"grad_norm": 0.26551976799964905,
"learning_rate": 5.017361398198502e-05,
"loss": 0.0095,
"step": 10480
},
{
"grad_norm": 0.344992458820343,
"learning_rate": 5.009094078983221e-05,
"loss": 0.009,
"step": 10490
},
{
"grad_norm": 0.33870258927345276,
"learning_rate": 5.000826734905073e-05,
"loss": 0.0086,
"step": 10500
},
{
"grad_norm": 0.26343870162963867,
"learning_rate": 4.9925593885666645e-05,
"loss": 0.0095,
"step": 10510
},
{
"grad_norm": 0.30816709995269775,
"learning_rate": 4.984292062570602e-05,
"loss": 0.0095,
"step": 10520
},
{
"grad_norm": 0.24630755186080933,
"learning_rate": 4.976024779519442e-05,
"loss": 0.009,
"step": 10530
},
{
"grad_norm": 0.2802034616470337,
"learning_rate": 4.9677575620156194e-05,
"loss": 0.0094,
"step": 10540
},
{
"grad_norm": 0.41168075799942017,
"learning_rate": 4.959490432661391e-05,
"loss": 0.012,
"step": 10550
},
{
"grad_norm": 0.2359355241060257,
"learning_rate": 4.9512234140587726e-05,
"loss": 0.0089,
"step": 10560
},
{
"grad_norm": 0.26179102063179016,
"learning_rate": 4.942956528809477e-05,
"loss": 0.011,
"step": 10570
},
{
"grad_norm": 0.21987201273441315,
"learning_rate": 4.934689799514854e-05,
"loss": 0.008,
"step": 10580
},
{
"grad_norm": 0.2534409165382385,
"learning_rate": 4.926423248775827e-05,
"loss": 0.0107,
"step": 10590
},
{
"grad_norm": 0.32994967699050903,
"learning_rate": 4.918156899192826e-05,
"loss": 0.0106,
"step": 10600
},
{
"grad_norm": 0.19284388422966003,
"learning_rate": 4.909890773365738e-05,
"loss": 0.0117,
"step": 10610
},
{
"grad_norm": 0.2593536078929901,
"learning_rate": 4.9016248938938344e-05,
"loss": 0.0125,
"step": 10620
},
{
"grad_norm": 0.34668925404548645,
"learning_rate": 4.8933592833757156e-05,
"loss": 0.0087,
"step": 10630
},
{
"grad_norm": 0.30534157156944275,
"learning_rate": 4.8850939644092435e-05,
"loss": 0.0119,
"step": 10640
},
{
"grad_norm": 0.2502377927303314,
"learning_rate": 4.876828959591485e-05,
"loss": 0.0087,
"step": 10650
},
{
"grad_norm": 0.2559858560562134,
"learning_rate": 4.8685642915186474e-05,
"loss": 0.0098,
"step": 10660
},
{
"grad_norm": 0.17750589549541473,
"learning_rate": 4.860299982786018e-05,
"loss": 0.0084,
"step": 10670
},
{
"grad_norm": 0.29540231823921204,
"learning_rate": 4.852036055987901e-05,
"loss": 0.0069,
"step": 10680
},
{
"grad_norm": 0.20562411844730377,
"learning_rate": 4.843772533717558e-05,
"loss": 0.0083,
"step": 10690
},
{
"grad_norm": 0.25039413571357727,
"learning_rate": 4.835509438567142e-05,
"loss": 0.0092,
"step": 10700
},
{
"grad_norm": 0.2514692544937134,
"learning_rate": 4.827246793127639e-05,
"loss": 0.0077,
"step": 10710
},
{
"grad_norm": 0.30662453174591064,
"learning_rate": 4.818984619988807e-05,
"loss": 0.0107,
"step": 10720
},
{
"grad_norm": 0.26225242018699646,
"learning_rate": 4.810722941739115e-05,
"loss": 0.0104,
"step": 10730
},
{
"grad_norm": 0.3066365122795105,
"learning_rate": 4.8024617809656684e-05,
"loss": 0.0085,
"step": 10740
},
{
"grad_norm": 0.27893194556236267,
"learning_rate": 4.794201160254171e-05,
"loss": 0.0077,
"step": 10750
},
{
"grad_norm": 0.2807646095752716,
"learning_rate": 4.785941102188844e-05,
"loss": 0.0096,
"step": 10760
},
{
"grad_norm": 0.299655944108963,
"learning_rate": 4.7776816293523686e-05,
"loss": 0.0108,
"step": 10770
},
{
"grad_norm": 0.2901594638824463,
"learning_rate": 4.769422764325832e-05,
"loss": 0.0086,
"step": 10780
},
{
"grad_norm": 0.3350774943828583,
"learning_rate": 4.76116452968865e-05,
"loss": 0.0124,
"step": 10790
},
{
"grad_norm": 0.29051363468170166,
"learning_rate": 4.752906948018525e-05,
"loss": 0.0097,
"step": 10800
},
{
"grad_norm": 0.26909443736076355,
"learning_rate": 4.7446500418913684e-05,
"loss": 0.0076,
"step": 10810
},
{
"grad_norm": 0.28262028098106384,
"learning_rate": 4.736393833881247e-05,
"loss": 0.0097,
"step": 10820
},
{
"grad_norm": 0.22907830774784088,
"learning_rate": 4.7281383465603194e-05,
"loss": 0.0094,
"step": 10830
},
{
"grad_norm": 0.38901764154434204,
"learning_rate": 4.71988360249877e-05,
"loss": 0.0085,
"step": 10840
},
{
"grad_norm": 0.2833888828754425,
"learning_rate": 4.7116296242647554e-05,
"loss": 0.0076,
"step": 10850
},
{
"grad_norm": 0.2758328318595886,
"learning_rate": 4.703376434424336e-05,
"loss": 0.0106,
"step": 10860
},
{
"grad_norm": 0.26278847455978394,
"learning_rate": 4.695124055541421e-05,
"loss": 0.01,
"step": 10870
},
{
"grad_norm": 0.3171476125717163,
"learning_rate": 4.6868725101776934e-05,
"loss": 0.0086,
"step": 10880
},
{
"grad_norm": 0.3477509915828705,
"learning_rate": 4.678621820892567e-05,
"loss": 0.0101,
"step": 10890
},
{
"grad_norm": 0.30033016204833984,
"learning_rate": 4.670372010243111e-05,
"loss": 0.0088,
"step": 10900
},
{
"grad_norm": 0.3467949628829956,
"learning_rate": 4.662123100783992e-05,
"loss": 0.0095,
"step": 10910
},
{
"grad_norm": 0.24811244010925293,
"learning_rate": 4.653875115067415e-05,
"loss": 0.0102,
"step": 10920
},
{
"grad_norm": 0.33560365438461304,
"learning_rate": 4.6456280756430545e-05,
"loss": 0.0096,
"step": 10930
},
{
"grad_norm": 0.26739415526390076,
"learning_rate": 4.637382005058004e-05,
"loss": 0.01,
"step": 10940
},
{
"grad_norm": 0.2914515733718872,
"learning_rate": 4.629136925856705e-05,
"loss": 0.0068,
"step": 10950
},
{
"grad_norm": 0.256402850151062,
"learning_rate": 4.6208928605808895e-05,
"loss": 0.0079,
"step": 10960
},
{
"grad_norm": 0.19222067296504974,
"learning_rate": 4.612649831769519e-05,
"loss": 0.0082,
"step": 10970
},
{
"grad_norm": 0.33148807287216187,
"learning_rate": 4.604407861958715e-05,
"loss": 0.0094,
"step": 10980
},
{
"grad_norm": 0.35632526874542236,
"learning_rate": 4.5961669736817114e-05,
"loss": 0.0102,
"step": 10990
},
{
"grad_norm": 0.3180091381072998,
"learning_rate": 4.5879271894687814e-05,
"loss": 0.0096,
"step": 11000
},
{
"grad_norm": 0.3015517592430115,
"learning_rate": 4.5796885318471826e-05,
"loss": 0.0075,
"step": 11010
},
{
"grad_norm": 0.24222354590892792,
"learning_rate": 4.571451023341086e-05,
"loss": 0.0083,
"step": 11020
},
{
"grad_norm": 0.3168736696243286,
"learning_rate": 4.563214686471527e-05,
"loss": 0.0091,
"step": 11030
},
{
"grad_norm": 0.20201337337493896,
"learning_rate": 4.5549795437563365e-05,
"loss": 0.0082,
"step": 11040
},
{
"grad_norm": 0.3413758873939514,
"learning_rate": 4.546745617710081e-05,
"loss": 0.0091,
"step": 11050
},
{
"grad_norm": 0.19693852961063385,
"learning_rate": 4.5385129308440014e-05,
"loss": 0.0089,
"step": 11060
},
{
"grad_norm": 0.2525441646575928,
"learning_rate": 4.530281505665944e-05,
"loss": 0.0088,
"step": 11070
},
{
"grad_norm": 0.2551596462726593,
"learning_rate": 4.5220513646803134e-05,
"loss": 0.0105,
"step": 11080
},
{
"grad_norm": 0.1865827739238739,
"learning_rate": 4.513822530388003e-05,
"loss": 0.0077,
"step": 11090
},
{
"grad_norm": 0.25236818194389343,
"learning_rate": 4.5055950252863296e-05,
"loss": 0.0078,
"step": 11100
},
{
"grad_norm": 0.2237762063741684,
"learning_rate": 4.4973688718689803e-05,
"loss": 0.0081,
"step": 11110
},
{
"grad_norm": 0.22772948443889618,
"learning_rate": 4.4891440926259406e-05,
"loss": 0.0076,
"step": 11120
},
{
"grad_norm": 0.23078931868076324,
"learning_rate": 4.480920710043443e-05,
"loss": 0.0088,
"step": 11130
},
{
"grad_norm": 0.19781197607517242,
"learning_rate": 4.4726987466039044e-05,
"loss": 0.0098,
"step": 11140
},
{
"grad_norm": 0.24051713943481445,
"learning_rate": 4.46447822478586e-05,
"loss": 0.0118,
"step": 11150
},
{
"grad_norm": 0.23845115303993225,
"learning_rate": 4.4562591670638974e-05,
"loss": 0.0066,
"step": 11160
},
{
"grad_norm": 0.3238190710544586,
"learning_rate": 4.4480415959086105e-05,
"loss": 0.0073,
"step": 11170
},
{
"grad_norm": 0.2884441912174225,
"learning_rate": 4.439825533786522e-05,
"loss": 0.0105,
"step": 11180
},
{
"grad_norm": 0.3803757429122925,
"learning_rate": 4.431611003160035e-05,
"loss": 0.0086,
"step": 11190
},
{
"grad_norm": 0.17523354291915894,
"learning_rate": 4.4233980264873636e-05,
"loss": 0.0093,
"step": 11200
},
{
"grad_norm": 0.2824532687664032,
"learning_rate": 4.4151866262224684e-05,
"loss": 0.0108,
"step": 11210
},
{
"grad_norm": 0.3265339732170105,
"learning_rate": 4.406976824815006e-05,
"loss": 0.0079,
"step": 11220
},
{
"grad_norm": 0.24177534878253937,
"learning_rate": 4.3987686447102595e-05,
"loss": 0.0085,
"step": 11230
},
{
"grad_norm": 0.21874769032001495,
"learning_rate": 4.3905621083490804e-05,
"loss": 0.0076,
"step": 11240
},
{
"grad_norm": 0.2425031214952469,
"learning_rate": 4.3823572381678286e-05,
"loss": 0.0089,
"step": 11250
},
{
"grad_norm": 0.2610780894756317,
"learning_rate": 4.374154056598301e-05,
"loss": 0.0118,
"step": 11260
},
{
"grad_norm": 0.36238113045692444,
"learning_rate": 4.3659525860676845e-05,
"loss": 0.0131,
"step": 11270
},
{
"grad_norm": 0.25657838582992554,
"learning_rate": 4.3577528489984854e-05,
"loss": 0.0081,
"step": 11280
},
{
"grad_norm": 0.27643704414367676,
"learning_rate": 4.349554867808476e-05,
"loss": 0.008,
"step": 11290
},
{
"grad_norm": 0.2851879596710205,
"learning_rate": 4.34135866491062e-05,
"loss": 0.0084,
"step": 11300
},
{
"grad_norm": 0.3266191780567169,
"learning_rate": 4.333164262713022e-05,
"loss": 0.0119,
"step": 11310
},
{
"grad_norm": 0.3193758428096771,
"learning_rate": 4.324971683618868e-05,
"loss": 0.0075,
"step": 11320
},
{
"grad_norm": 0.32918694615364075,
"learning_rate": 4.316780950026354e-05,
"loss": 0.0098,
"step": 11330
},
{
"grad_norm": 0.2517104148864746,
"learning_rate": 4.308592084328637e-05,
"loss": 0.0098,
"step": 11340
},
{
"grad_norm": 0.31300199031829834,
"learning_rate": 4.3004051089137576e-05,
"loss": 0.0095,
"step": 11350
},
{
"grad_norm": 0.2529290020465851,
"learning_rate": 4.292220046164597e-05,
"loss": 0.0073,
"step": 11360
},
{
"grad_norm": 0.23040375113487244,
"learning_rate": 4.2840369184588035e-05,
"loss": 0.0077,
"step": 11370
},
{
"grad_norm": 0.30496788024902344,
"learning_rate": 4.2758557481687345e-05,
"loss": 0.0096,
"step": 11380
},
{
"grad_norm": 0.2186974138021469,
"learning_rate": 4.267676557661403e-05,
"loss": 0.0084,
"step": 11390
},
{
"grad_norm": 0.251447856426239,
"learning_rate": 4.2594993692983955e-05,
"loss": 0.0082,
"step": 11400
},
{
"grad_norm": 0.29543429613113403,
"learning_rate": 4.251324205435837e-05,
"loss": 0.0093,
"step": 11410
},
{
"grad_norm": 0.3692440688610077,
"learning_rate": 4.243151088424312e-05,
"loss": 0.0108,
"step": 11420
},
{
"grad_norm": 0.27703699469566345,
"learning_rate": 4.234980040608813e-05,
"loss": 0.0118,
"step": 11430
},
{
"grad_norm": 0.2971097528934479,
"learning_rate": 4.22681108432867e-05,
"loss": 0.0097,
"step": 11440
},
{
"grad_norm": 0.2624056041240692,
"learning_rate": 4.2186442419174984e-05,
"loss": 0.0085,
"step": 11450
},
{
"grad_norm": 0.2369023561477661,
"learning_rate": 4.210479535703133e-05,
"loss": 0.0081,
"step": 11460
},
{
"grad_norm": 0.18464338779449463,
"learning_rate": 4.202316988007567e-05,
"loss": 0.0095,
"step": 11470
},
{
"grad_norm": 0.21752451360225677,
"learning_rate": 4.194156621146901e-05,
"loss": 0.008,
"step": 11480
},
{
"grad_norm": 0.20453378558158875,
"learning_rate": 4.1859984574312596e-05,
"loss": 0.0069,
"step": 11490
},
{
"grad_norm": 0.21684104204177856,
"learning_rate": 4.177842519164752e-05,
"loss": 0.0117,
"step": 11500
},
{
"grad_norm": 0.22559356689453125,
"learning_rate": 4.169688828645404e-05,
"loss": 0.0082,
"step": 11510
},
{
"grad_norm": 0.31731340289115906,
"learning_rate": 4.161537408165092e-05,
"loss": 0.0098,
"step": 11520
},
{
"grad_norm": 0.2910335063934326,
"learning_rate": 4.1533882800094924e-05,
"loss": 0.0092,
"step": 11530
},
{
"grad_norm": 0.2638309597969055,
"learning_rate": 4.145241466458005e-05,
"loss": 0.0097,
"step": 11540
},
{
"grad_norm": 0.2704751193523407,
"learning_rate": 4.13709698978371e-05,
"loss": 0.0066,
"step": 11550
},
{
"grad_norm": 0.24327205121517181,
"learning_rate": 4.1289548722532944e-05,
"loss": 0.0101,
"step": 11560
},
{
"grad_norm": 0.2812352180480957,
"learning_rate": 4.120815136126999e-05,
"loss": 0.0096,
"step": 11570
},
{
"grad_norm": 0.1935969889163971,
"learning_rate": 4.112677803658548e-05,
"loss": 0.0073,
"step": 11580
},
{
"grad_norm": 0.2166028618812561,
"learning_rate": 4.1045428970951e-05,
"loss": 0.0073,
"step": 11590
},
{
"grad_norm": 0.20732693374156952,
"learning_rate": 4.0964104386771785e-05,
"loss": 0.0068,
"step": 11600
},
{
"grad_norm": 0.36443421244621277,
"learning_rate": 4.0882804506386144e-05,
"loss": 0.009,
"step": 11610
},
{
"grad_norm": 0.2749943733215332,
"learning_rate": 4.080152955206485e-05,
"loss": 0.011,
"step": 11620
},
{
"grad_norm": 0.34515616297721863,
"learning_rate": 4.0720279746010505e-05,
"loss": 0.0071,
"step": 11630
},
{
"grad_norm": 0.35089951753616333,
"learning_rate": 4.063905531035699e-05,
"loss": 0.0085,
"step": 11640
},
{
"grad_norm": 0.271034300327301,
"learning_rate": 4.055785646716882e-05,
"loss": 0.0083,
"step": 11650
},
{
"grad_norm": 0.3269774913787842,
"learning_rate": 4.047668343844051e-05,
"loss": 0.0079,
"step": 11660
},
{
"grad_norm": 0.2104874551296234,
"learning_rate": 4.039553644609604e-05,
"loss": 0.0066,
"step": 11670
},
{
"grad_norm": 0.23121942579746246,
"learning_rate": 4.0314415711988176e-05,
"loss": 0.0067,
"step": 11680
},
{
"grad_norm": 0.3726987838745117,
"learning_rate": 4.023332145789792e-05,
"loss": 0.0108,
"step": 11690
},
{
"grad_norm": 0.28051552176475525,
"learning_rate": 4.015225390553385e-05,
"loss": 0.0099,
"step": 11700
},
{
"grad_norm": 0.30161479115486145,
"learning_rate": 4.007121327653158e-05,
"loss": 0.0085,
"step": 11710
},
{
"grad_norm": 0.2895340919494629,
"learning_rate": 3.9990199792453064e-05,
"loss": 0.0101,
"step": 11720
},
{
"grad_norm": 0.20414268970489502,
"learning_rate": 3.9909213674786103e-05,
"loss": 0.0067,
"step": 11730
},
{
"grad_norm": 0.17530548572540283,
"learning_rate": 3.982825514494363e-05,
"loss": 0.0077,
"step": 11740
},
{
"grad_norm": 0.2952740490436554,
"learning_rate": 3.974732442426319e-05,
"loss": 0.0076,
"step": 11750
},
{
"grad_norm": 0.2763718068599701,
"learning_rate": 3.966642173400629e-05,
"loss": 0.0081,
"step": 11760
},
{
"grad_norm": 0.24036982655525208,
"learning_rate": 3.9585547295357764e-05,
"loss": 0.0078,
"step": 11770
},
{
"grad_norm": 0.1980796903371811,
"learning_rate": 3.950470132942526e-05,
"loss": 0.0107,
"step": 11780
},
{
"grad_norm": 0.22392122447490692,
"learning_rate": 3.942388405723856e-05,
"loss": 0.0072,
"step": 11790
},
{
"grad_norm": 0.3153569996356964,
"learning_rate": 3.9343095699749e-05,
"loss": 0.0118,
"step": 11800
},
{
"grad_norm": 0.2480199635028839,
"learning_rate": 3.9262336477828874e-05,
"loss": 0.008,
"step": 11810
},
{
"grad_norm": 0.33051490783691406,
"learning_rate": 3.9181606612270794e-05,
"loss": 0.0103,
"step": 11820
},
{
"grad_norm": 0.2385939210653305,
"learning_rate": 3.910090632378713e-05,
"loss": 0.0092,
"step": 11830
},
{
"grad_norm": 0.23588930070400238,
"learning_rate": 3.90202358330094e-05,
"loss": 0.0078,
"step": 11840
},
{
"grad_norm": 0.24540403485298157,
"learning_rate": 3.8939595360487656e-05,
"loss": 0.0098,
"step": 11850
},
{
"grad_norm": 0.2659599184989929,
"learning_rate": 3.885898512668984e-05,
"loss": 0.0099,
"step": 11860
},
{
"grad_norm": 0.2260056883096695,
"learning_rate": 3.877840535200127e-05,
"loss": 0.0111,
"step": 11870
},
{
"grad_norm": 0.2496788501739502,
"learning_rate": 3.869785625672397e-05,
"loss": 0.0078,
"step": 11880
},
{
"grad_norm": 0.24494805932044983,
"learning_rate": 3.8617338061076094e-05,
"loss": 0.0092,
"step": 11890
},
{
"grad_norm": 0.27489352226257324,
"learning_rate": 3.853685098519132e-05,
"loss": 0.0143,
"step": 11900
},
{
"grad_norm": 0.1963207870721817,
"learning_rate": 3.845639524911823e-05,
"loss": 0.0076,
"step": 11910
},
{
"grad_norm": 0.26988306641578674,
"learning_rate": 3.837597107281974e-05,
"loss": 0.0084,
"step": 11920
},
{
"grad_norm": 0.2814962565898895,
"learning_rate": 3.829557867617247e-05,
"loss": 0.0087,
"step": 11930
},
{
"grad_norm": 0.2576271891593933,
"learning_rate": 3.821521827896618e-05,
"loss": 0.0074,
"step": 11940
},
{
"grad_norm": 0.35358384251594543,
"learning_rate": 3.81348901009031e-05,
"loss": 0.0116,
"step": 11950
},
{
"grad_norm": 0.23834174871444702,
"learning_rate": 3.805459436159741e-05,
"loss": 0.0092,
"step": 11960
},
{
"grad_norm": 0.198640838265419,
"learning_rate": 3.797433128057461e-05,
"loss": 0.0081,
"step": 11970
},
{
"grad_norm": 0.2608760595321655,
"learning_rate": 3.789410107727089e-05,
"loss": 0.0118,
"step": 11980
},
{
"grad_norm": 0.3514273762702942,
"learning_rate": 3.781390397103257e-05,
"loss": 0.0086,
"step": 11990
},
{
"grad_norm": 0.2969302833080292,
"learning_rate": 3.7733740181115455e-05,
"loss": 0.0094,
"step": 12000
},
{
"grad_norm": 0.2919679880142212,
"learning_rate": 3.7653609926684306e-05,
"loss": 0.0109,
"step": 12010
},
{
"grad_norm": 0.25600647926330566,
"learning_rate": 3.757351342681217e-05,
"loss": 0.0099,
"step": 12020
},
{
"grad_norm": 0.3260495066642761,
"learning_rate": 3.749345090047982e-05,
"loss": 0.01,
"step": 12030
},
{
"grad_norm": 0.28559738397598267,
"learning_rate": 3.741342256657515e-05,
"loss": 0.0103,
"step": 12040
},
{
"grad_norm": 0.2582564055919647,
"learning_rate": 3.7333428643892567e-05,
"loss": 0.0075,
"step": 12050
},
{
"grad_norm": 0.21928203105926514,
"learning_rate": 3.725346935113239e-05,
"loss": 0.0077,
"step": 12060
},
{
"grad_norm": 0.2669205963611603,
"learning_rate": 3.717354490690029e-05,
"loss": 0.0084,
"step": 12070
},
{
"grad_norm": 0.21352745592594147,
"learning_rate": 3.709365552970664e-05,
"loss": 0.0086,
"step": 12080
},
{
"grad_norm": 0.25914400815963745,
"learning_rate": 3.7013801437965945e-05,
"loss": 0.0072,
"step": 12090
},
{
"grad_norm": 0.27507898211479187,
"learning_rate": 3.693398284999623e-05,
"loss": 0.0085,
"step": 12100
},
{
"grad_norm": 0.2519773840904236,
"learning_rate": 3.6854199984018484e-05,
"loss": 0.0083,
"step": 12110
},
{
"grad_norm": 0.2433474063873291,
"learning_rate": 3.677445305815601e-05,
"loss": 0.0098,
"step": 12120
},
{
"grad_norm": 0.22368106245994568,
"learning_rate": 3.669474229043387e-05,
"loss": 0.0079,
"step": 12130
},
{
"grad_norm": 0.22219599783420563,
"learning_rate": 3.6615067898778235e-05,
"loss": 0.0061,
"step": 12140
},
{
"grad_norm": 0.2246261090040207,
"learning_rate": 3.6535430101015866e-05,
"loss": 0.0076,
"step": 12150
},
{
"grad_norm": 0.24473929405212402,
"learning_rate": 3.645582911487345e-05,
"loss": 0.0088,
"step": 12160
},
{
"grad_norm": 0.28539374470710754,
"learning_rate": 3.637626515797706e-05,
"loss": 0.0088,
"step": 12170
},
{
"grad_norm": 0.25773289799690247,
"learning_rate": 3.629673844785152e-05,
"loss": 0.0067,
"step": 12180
},
{
"grad_norm": 0.21043069660663605,
"learning_rate": 3.621724920191979e-05,
"loss": 0.0074,
"step": 12190
},
{
"grad_norm": 0.2802955210208893,
"learning_rate": 3.6137797637502444e-05,
"loss": 0.0099,
"step": 12200
},
{
"grad_norm": 0.22656969726085663,
"learning_rate": 3.6058383971817035e-05,
"loss": 0.0068,
"step": 12210
},
{
"grad_norm": 0.33790233731269836,
"learning_rate": 3.59790084219775e-05,
"loss": 0.0084,
"step": 12220
},
{
"grad_norm": 0.29881054162979126,
"learning_rate": 3.589967120499353e-05,
"loss": 0.0069,
"step": 12230
},
{
"grad_norm": 0.3114658296108246,
"learning_rate": 3.5820372537770075e-05,
"loss": 0.0073,
"step": 12240
},
{
"grad_norm": 0.25627395510673523,
"learning_rate": 3.5741112637106655e-05,
"loss": 0.0089,
"step": 12250
},
{
"grad_norm": 0.20619019865989685,
"learning_rate": 3.5661891719696804e-05,
"loss": 0.008,
"step": 12260
},
{
"grad_norm": 0.24497458338737488,
"learning_rate": 3.5582710002127504e-05,
"loss": 0.011,
"step": 12270
},
{
"grad_norm": 0.23106102645397186,
"learning_rate": 3.550356770087853e-05,
"loss": 0.0086,
"step": 12280
},
{
"grad_norm": 0.2801083028316498,
"learning_rate": 3.5424465032321914e-05,
"loss": 0.0081,
"step": 12290
},
{
"grad_norm": 0.27025705575942993,
"learning_rate": 3.5345402212721335e-05,
"loss": 0.0082,
"step": 12300
},
{
"grad_norm": 0.2505914270877838,
"learning_rate": 3.526637945823152e-05,
"loss": 0.009,
"step": 12310
},
{
"grad_norm": 0.2746097445487976,
"learning_rate": 3.518739698489767e-05,
"loss": 0.0064,
"step": 12320
},
{
"grad_norm": 0.2840472459793091,
"learning_rate": 3.510845500865485e-05,
"loss": 0.0073,
"step": 12330
},
{
"grad_norm": 0.27252042293548584,
"learning_rate": 3.502955374532739e-05,
"loss": 0.011,
"step": 12340
},
{
"grad_norm": 0.21478882431983948,
"learning_rate": 3.495069341062836e-05,
"loss": 0.0082,
"step": 12350
},
{
"grad_norm": 0.30031028389930725,
"learning_rate": 3.4871874220158896e-05,
"loss": 0.0115,
"step": 12360
},
{
"grad_norm": 0.29065871238708496,
"learning_rate": 3.479309638940762e-05,
"loss": 0.009,
"step": 12370
},
{
"grad_norm": 0.3161035478115082,
"learning_rate": 3.4714360133750146e-05,
"loss": 0.0085,
"step": 12380
},
{
"grad_norm": 0.29907113313674927,
"learning_rate": 3.463566566844839e-05,
"loss": 0.0078,
"step": 12390
},
{
"grad_norm": 0.2610228359699249,
"learning_rate": 3.4557013208650016e-05,
"loss": 0.0093,
"step": 12400
},
{
"grad_norm": 0.19939985871315002,
"learning_rate": 3.4478402969387857e-05,
"loss": 0.0075,
"step": 12410
},
{
"grad_norm": 0.22081556916236877,
"learning_rate": 3.4399835165579266e-05,
"loss": 0.0075,
"step": 12420
},
{
"grad_norm": 0.20169289410114288,
"learning_rate": 3.4321310012025645e-05,
"loss": 0.0063,
"step": 12430
},
{
"grad_norm": 0.2489660531282425,
"learning_rate": 3.424282772341176e-05,
"loss": 0.0074,
"step": 12440
},
{
"grad_norm": 0.28628960251808167,
"learning_rate": 3.416438851430519e-05,
"loss": 0.013,
"step": 12450
},
{
"grad_norm": 0.29382604360580444,
"learning_rate": 3.408599259915577e-05,
"loss": 0.0077,
"step": 12460
},
{
"grad_norm": 0.3110322952270508,
"learning_rate": 3.400764019229487e-05,
"loss": 0.0108,
"step": 12470
},
{
"grad_norm": 0.23523154854774475,
"learning_rate": 3.3929331507935035e-05,
"loss": 0.0069,
"step": 12480
},
{
"grad_norm": 0.273687481880188,
"learning_rate": 3.3851066760169196e-05,
"loss": 0.0075,
"step": 12490
},
{
"grad_norm": 0.19932150840759277,
"learning_rate": 3.377284616297021e-05,
"loss": 0.007,
"step": 12500
},
{
"grad_norm": 0.24185536801815033,
"learning_rate": 3.3694669930190166e-05,
"loss": 0.0068,
"step": 12510
},
{
"grad_norm": 0.2853364646434784,
"learning_rate": 3.36165382755599e-05,
"loss": 0.0075,
"step": 12520
},
{
"grad_norm": 0.23251241445541382,
"learning_rate": 3.35384514126884e-05,
"loss": 0.009,
"step": 12530
},
{
"grad_norm": 0.2282250076532364,
"learning_rate": 3.3460409555062154e-05,
"loss": 0.007,
"step": 12540
},
{
"grad_norm": 0.2507518231868744,
"learning_rate": 3.3382412916044645e-05,
"loss": 0.0073,
"step": 12550
},
{
"grad_norm": 0.17678532004356384,
"learning_rate": 3.330446170887566e-05,
"loss": 0.006,
"step": 12560
},
{
"grad_norm": 0.22255797684192657,
"learning_rate": 3.3226556146670834e-05,
"loss": 0.0069,
"step": 12570
},
{
"grad_norm": 0.22277423739433289,
"learning_rate": 3.314869644242102e-05,
"loss": 0.0067,
"step": 12580
},
{
"grad_norm": 0.24665354192256927,
"learning_rate": 3.3070882808991674e-05,
"loss": 0.0077,
"step": 12590
},
{
"grad_norm": 0.2221645563840866,
"learning_rate": 3.2993115459122305e-05,
"loss": 0.008,
"step": 12600
},
{
"grad_norm": 0.25327399373054504,
"learning_rate": 3.2915394605425835e-05,
"loss": 0.007,
"step": 12610
},
{
"grad_norm": 0.2379884123802185,
"learning_rate": 3.283772046038816e-05,
"loss": 0.0084,
"step": 12620
},
{
"grad_norm": 0.2786004841327667,
"learning_rate": 3.276009323636739e-05,
"loss": 0.007,
"step": 12630
},
{
"grad_norm": 0.23235200345516205,
"learning_rate": 3.268251314559344e-05,
"loss": 0.0071,
"step": 12640
},
{
"grad_norm": 0.2734534740447998,
"learning_rate": 3.2604980400167254e-05,
"loss": 0.0065,
"step": 12650
},
{
"grad_norm": 0.20516322553157806,
"learning_rate": 3.252749521206042e-05,
"loss": 0.0057,
"step": 12660
},
{
"grad_norm": 0.2804936170578003,
"learning_rate": 3.2450057793114494e-05,
"loss": 0.0058,
"step": 12670
},
{
"grad_norm": 0.23371683061122894,
"learning_rate": 3.2372668355040435e-05,
"loss": 0.0072,
"step": 12680
},
{
"grad_norm": 0.24273928999900818,
"learning_rate": 3.2295327109418005e-05,
"loss": 0.0076,
"step": 12690
},
{
"grad_norm": 0.2837400734424591,
"learning_rate": 3.221803426769518e-05,
"loss": 0.0059,
"step": 12700
},
{
"grad_norm": 0.1821887344121933,
"learning_rate": 3.214079004118768e-05,
"loss": 0.0062,
"step": 12710
},
{
"grad_norm": 0.27825450897216797,
"learning_rate": 3.2063594641078234e-05,
"loss": 0.0075,
"step": 12720
},
{
"grad_norm": 0.30339550971984863,
"learning_rate": 3.198644827841616e-05,
"loss": 0.0066,
"step": 12730
},
{
"grad_norm": 0.23971441388130188,
"learning_rate": 3.1909351164116654e-05,
"loss": 0.0066,
"step": 12740
},
{
"grad_norm": 0.18594853579998016,
"learning_rate": 3.183230350896026e-05,
"loss": 0.0085,
"step": 12750
},
{
"grad_norm": 0.2139957696199417,
"learning_rate": 3.1755305523592337e-05,
"loss": 0.0084,
"step": 12760
},
{
"grad_norm": 0.282361775636673,
"learning_rate": 3.167835741852245e-05,
"loss": 0.0078,
"step": 12770
},
{
"grad_norm": 0.23739394545555115,
"learning_rate": 3.160145940412378e-05,
"loss": 0.0095,
"step": 12780
},
{
"grad_norm": 0.22370891273021698,
"learning_rate": 3.1524611690632545e-05,
"loss": 0.0059,
"step": 12790
},
{
"grad_norm": 0.2226971536874771,
"learning_rate": 3.144781448814746e-05,
"loss": 0.0074,
"step": 12800
},
{
"grad_norm": 0.27098506689071655,
"learning_rate": 3.1371068006629145e-05,
"loss": 0.006,
"step": 12810
},
{
"grad_norm": 0.23244546353816986,
"learning_rate": 3.129437245589956e-05,
"loss": 0.0071,
"step": 12820
},
{
"grad_norm": 0.23745259642601013,
"learning_rate": 3.121772804564143e-05,
"loss": 0.0066,
"step": 12830
},
{
"grad_norm": 0.25362375378608704,
"learning_rate": 3.11411349853976e-05,
"loss": 0.0064,
"step": 12840
},
{
"grad_norm": 0.2746460437774658,
"learning_rate": 3.10645934845706e-05,
"loss": 0.0064,
"step": 12850
},
{
"grad_norm": 0.2910458445549011,
"learning_rate": 3.098810375242196e-05,
"loss": 0.008,
"step": 12860
},
{
"grad_norm": 0.1930484175682068,
"learning_rate": 3.0911665998071704e-05,
"loss": 0.0058,
"step": 12870
},
{
"grad_norm": 0.26174429059028625,
"learning_rate": 3.083528043049774e-05,
"loss": 0.0061,
"step": 12880
},
{
"grad_norm": 0.237471342086792,
"learning_rate": 3.0758947258535255e-05,
"loss": 0.0082,
"step": 12890
},
{
"grad_norm": 0.2530456483364105,
"learning_rate": 3.068266669087625e-05,
"loss": 0.0078,
"step": 12900
},
{
"grad_norm": 0.21137702465057373,
"learning_rate": 3.060643893606887e-05,
"loss": 0.006,
"step": 12910
},
{
"grad_norm": 0.1681884080171585,
"learning_rate": 3.053026420251693e-05,
"loss": 0.0068,
"step": 12920
},
{
"grad_norm": 0.25592848658561707,
"learning_rate": 3.0454142698479183e-05,
"loss": 0.0069,
"step": 12930
},
{
"grad_norm": 0.2088608294725418,
"learning_rate": 3.0378074632068954e-05,
"loss": 0.0087,
"step": 12940
},
{
"grad_norm": 0.22707495093345642,
"learning_rate": 3.0302060211253408e-05,
"loss": 0.0066,
"step": 12950
},
{
"grad_norm": 0.2611274719238281,
"learning_rate": 3.0226099643853073e-05,
"loss": 0.0075,
"step": 12960
},
{
"grad_norm": 0.20526066422462463,
"learning_rate": 3.0150193137541283e-05,
"loss": 0.006,
"step": 12970
},
{
"grad_norm": 0.26528921723365784,
"learning_rate": 3.0074340899843467e-05,
"loss": 0.0074,
"step": 12980
},
{
"grad_norm": 0.294217973947525,
"learning_rate": 2.999854313813677e-05,
"loss": 0.0112,
"step": 12990
},
{
"grad_norm": 0.2798357307910919,
"learning_rate": 2.9922800059649382e-05,
"loss": 0.0074,
"step": 13000
},
{
"grad_norm": 0.25348061323165894,
"learning_rate": 2.9847111871459976e-05,
"loss": 0.008,
"step": 13010
},
{
"grad_norm": 0.25376802682876587,
"learning_rate": 2.977147878049721e-05,
"loss": 0.0072,
"step": 13020
},
{
"grad_norm": 0.23868699371814728,
"learning_rate": 2.9695900993539006e-05,
"loss": 0.0069,
"step": 13030
},
{
"grad_norm": 0.24584409594535828,
"learning_rate": 2.9620378717212183e-05,
"loss": 0.0113,
"step": 13040
},
{
"grad_norm": 0.2317948341369629,
"learning_rate": 2.9544912157991745e-05,
"loss": 0.0087,
"step": 13050
},
{
"grad_norm": 0.3443603217601776,
"learning_rate": 2.9469501522200405e-05,
"loss": 0.006,
"step": 13060
},
{
"grad_norm": 0.1847532093524933,
"learning_rate": 2.9394147016007946e-05,
"loss": 0.0064,
"step": 13070
},
{
"grad_norm": 0.2874523401260376,
"learning_rate": 2.9318848845430702e-05,
"loss": 0.0058,
"step": 13080
},
{
"grad_norm": 0.20815210044384003,
"learning_rate": 2.9243607216331013e-05,
"loss": 0.0061,
"step": 13090
},
{
"grad_norm": 0.1735462099313736,
"learning_rate": 2.916842233441661e-05,
"loss": 0.0049,
"step": 13100
},
{
"grad_norm": 0.2947513163089752,
"learning_rate": 2.90932944052401e-05,
"loss": 0.0078,
"step": 13110
},
{
"grad_norm": 0.185662180185318,
"learning_rate": 2.9018223634198354e-05,
"loss": 0.006,
"step": 13120
},
{
"grad_norm": 0.159962996840477,
"learning_rate": 2.8943210226532025e-05,
"loss": 0.0085,
"step": 13130
},
{
"grad_norm": 0.23721252381801605,
"learning_rate": 2.8868254387324857e-05,
"loss": 0.0082,
"step": 13140
},
{
"grad_norm": 0.2839058041572571,
"learning_rate": 2.8793356321503306e-05,
"loss": 0.0084,
"step": 13150
},
{
"grad_norm": 0.3259228765964508,
"learning_rate": 2.87185162338358e-05,
"loss": 0.0067,
"step": 13160
},
{
"grad_norm": 0.2640034258365631,
"learning_rate": 2.8643734328932253e-05,
"loss": 0.0087,
"step": 13170
},
{
"grad_norm": 0.2673685848712921,
"learning_rate": 2.856901081124359e-05,
"loss": 0.0068,
"step": 13180
},
{
"grad_norm": 0.2533598840236664,
"learning_rate": 2.8494345885061002e-05,
"loss": 0.0084,
"step": 13190
},
{
"grad_norm": 0.27771884202957153,
"learning_rate": 2.8419739754515616e-05,
"loss": 0.008,
"step": 13200
},
{
"grad_norm": 0.18158374726772308,
"learning_rate": 2.8345192623577666e-05,
"loss": 0.0066,
"step": 13210
},
{
"grad_norm": 0.20402629673480988,
"learning_rate": 2.8270704696056193e-05,
"loss": 0.0085,
"step": 13220
},
{
"grad_norm": 0.22918453812599182,
"learning_rate": 2.8196276175598367e-05,
"loss": 0.0089,
"step": 13230
},
{
"grad_norm": 0.26338842511177063,
"learning_rate": 2.8121907265688884e-05,
"loss": 0.0097,
"step": 13240
},
{
"grad_norm": 0.2552448511123657,
"learning_rate": 2.804759816964957e-05,
"loss": 0.007,
"step": 13250
},
{
"grad_norm": 0.2544209957122803,
"learning_rate": 2.797334909063857e-05,
"loss": 0.0068,
"step": 13260
},
{
"grad_norm": 0.20611265301704407,
"learning_rate": 2.7899160231650056e-05,
"loss": 0.0057,
"step": 13270
},
{
"grad_norm": 0.2455790638923645,
"learning_rate": 2.7825031795513585e-05,
"loss": 0.0064,
"step": 13280
},
{
"grad_norm": 0.16692101955413818,
"learning_rate": 2.775096398489341e-05,
"loss": 0.0054,
"step": 13290
},
{
"grad_norm": 0.19417795538902283,
"learning_rate": 2.7676957002288163e-05,
"loss": 0.0069,
"step": 13300
},
{
"grad_norm": 0.246934175491333,
"learning_rate": 2.760301105003003e-05,
"loss": 0.0067,
"step": 13310
},
{
"grad_norm": 0.18580402433872223,
"learning_rate": 2.752912633028446e-05,
"loss": 0.0065,
"step": 13320
},
{
"grad_norm": 0.18205900490283966,
"learning_rate": 2.7455303045049474e-05,
"loss": 0.0065,
"step": 13330
},
{
"grad_norm": 0.24550791084766388,
"learning_rate": 2.7381541396155098e-05,
"loss": 0.0069,
"step": 13340
},
{
"grad_norm": 0.2719643712043762,
"learning_rate": 2.730784158526286e-05,
"loss": 0.0059,
"step": 13350
},
{
"grad_norm": 0.19565212726593018,
"learning_rate": 2.723420381386521e-05,
"loss": 0.0072,
"step": 13360
},
{
"grad_norm": 0.2644125521183014,
"learning_rate": 2.7160628283285018e-05,
"loss": 0.0074,
"step": 13370
},
{
"grad_norm": 0.20331737399101257,
"learning_rate": 2.7087115194675007e-05,
"loss": 0.0059,
"step": 13380
},
{
"grad_norm": 0.18478238582611084,
"learning_rate": 2.701366474901712e-05,
"loss": 0.005,
"step": 13390
},
{
"grad_norm": 0.2070227712392807,
"learning_rate": 2.6940277147122085e-05,
"loss": 0.0063,
"step": 13400
},
{
"grad_norm": 0.24267740547657013,
"learning_rate": 2.686695258962878e-05,
"loss": 0.0061,
"step": 13410
},
{
"grad_norm": 0.1909894049167633,
"learning_rate": 2.679369127700375e-05,
"loss": 0.0056,
"step": 13420
},
{
"grad_norm": 0.19610315561294556,
"learning_rate": 2.672049340954067e-05,
"loss": 0.007,
"step": 13430
},
{
"grad_norm": 0.2082044929265976,
"learning_rate": 2.6647359187359676e-05,
"loss": 0.0086,
"step": 13440
},
{
"grad_norm": 0.22661364078521729,
"learning_rate": 2.6574288810406946e-05,
"loss": 0.0072,
"step": 13450
},
{
"grad_norm": 0.2202974408864975,
"learning_rate": 2.6501282478454083e-05,
"loss": 0.0077,
"step": 13460
},
{
"grad_norm": 0.2299632430076599,
"learning_rate": 2.6428340391097618e-05,
"loss": 0.0109,
"step": 13470
},
{
"grad_norm": 0.3141759932041168,
"learning_rate": 2.6355462747758485e-05,
"loss": 0.0065,
"step": 13480
},
{
"grad_norm": 0.23412247002124786,
"learning_rate": 2.6282649747681304e-05,
"loss": 0.0059,
"step": 13490
},
{
"grad_norm": 0.2105311006307602,
"learning_rate": 2.620990158993406e-05,
"loss": 0.0054,
"step": 13500
},
{
"grad_norm": 0.21451091766357422,
"learning_rate": 2.6137218473407477e-05,
"loss": 0.007,
"step": 13510
},
{
"grad_norm": 0.18160901963710785,
"learning_rate": 2.606460059681436e-05,
"loss": 0.0051,
"step": 13520
},
{
"grad_norm": 0.2187230885028839,
"learning_rate": 2.599204815868928e-05,
"loss": 0.0053,
"step": 13530
},
{
"grad_norm": 0.20273251831531525,
"learning_rate": 2.5919561357387756e-05,
"loss": 0.0054,
"step": 13540
},
{
"grad_norm": 0.23314417898654938,
"learning_rate": 2.5847140391085972e-05,
"loss": 0.006,
"step": 13550
},
{
"grad_norm": 0.2788357436656952,
"learning_rate": 2.5774785457780103e-05,
"loss": 0.0093,
"step": 13560
},
{
"grad_norm": 0.20386862754821777,
"learning_rate": 2.5702496755285753e-05,
"loss": 0.0064,
"step": 13570
},
{
"grad_norm": 0.21573026478290558,
"learning_rate": 2.5630274481237483e-05,
"loss": 0.0057,
"step": 13580
},
{
"grad_norm": 0.1928456872701645,
"learning_rate": 2.5558118833088197e-05,
"loss": 0.0065,
"step": 13590
},
{
"grad_norm": 0.23650315403938293,
"learning_rate": 2.548603000810872e-05,
"loss": 0.0049,
"step": 13600
},
{
"grad_norm": 0.19839148223400116,
"learning_rate": 2.5414008203387152e-05,
"loss": 0.0068,
"step": 13610
},
{
"grad_norm": 0.24050778150558472,
"learning_rate": 2.534205361582834e-05,
"loss": 0.006,
"step": 13620
},
{
"grad_norm": 0.18059442937374115,
"learning_rate": 2.527016644215338e-05,
"loss": 0.0067,
"step": 13630
},
{
"grad_norm": 0.18381164968013763,
"learning_rate": 2.519834687889905e-05,
"loss": 0.007,
"step": 13640
},
{
"grad_norm": 0.2277163565158844,
"learning_rate": 2.5126595122417295e-05,
"loss": 0.0074,
"step": 13650
},
{
"grad_norm": 0.1679578423500061,
"learning_rate": 2.5054911368874713e-05,
"loss": 0.0052,
"step": 13660
},
{
"grad_norm": 0.19412367045879364,
"learning_rate": 2.4983295814251916e-05,
"loss": 0.0077,
"step": 13670
},
{
"grad_norm": 0.22263267636299133,
"learning_rate": 2.4911748654343105e-05,
"loss": 0.006,
"step": 13680
},
{
"grad_norm": 0.23625227808952332,
"learning_rate": 2.4840270084755463e-05,
"loss": 0.0062,
"step": 13690
},
{
"grad_norm": 0.22303491830825806,
"learning_rate": 2.4768860300908685e-05,
"loss": 0.0059,
"step": 13700
},
{
"grad_norm": 0.20615343749523163,
"learning_rate": 2.469751949803443e-05,
"loss": 0.0069,
"step": 13710
},
{
"grad_norm": 0.22624443471431732,
"learning_rate": 2.4626247871175666e-05,
"loss": 0.008,
"step": 13720
},
{
"grad_norm": 0.25985249876976013,
"learning_rate": 2.4555045615186346e-05,
"loss": 0.0065,
"step": 13730
},
{
"grad_norm": 0.2458672672510147,
"learning_rate": 2.4483912924730677e-05,
"loss": 0.0072,
"step": 13740
},
{
"grad_norm": 0.21012453734874725,
"learning_rate": 2.4412849994282742e-05,
"loss": 0.0048,
"step": 13750
},
{
"grad_norm": 0.14065012335777283,
"learning_rate": 2.434185701812592e-05,
"loss": 0.0052,
"step": 13760
},
{
"grad_norm": 0.19618520140647888,
"learning_rate": 2.4270934190352218e-05,
"loss": 0.0071,
"step": 13770
},
{
"grad_norm": 0.1482352614402771,
"learning_rate": 2.4200081704861998e-05,
"loss": 0.0074,
"step": 13780
},
{
"grad_norm": 0.20872756838798523,
"learning_rate": 2.412929975536321e-05,
"loss": 0.0069,
"step": 13790
},
{
"grad_norm": 0.20830373466014862,
"learning_rate": 2.4058588535371017e-05,
"loss": 0.0062,
"step": 13800
},
{
"grad_norm": 0.2667636275291443,
"learning_rate": 2.3987948238207243e-05,
"loss": 0.0057,
"step": 13810
},
{
"grad_norm": 0.24201737344264984,
"learning_rate": 2.3917379056999678e-05,
"loss": 0.0067,
"step": 13820
},
{
"grad_norm": 0.17350678145885468,
"learning_rate": 2.3846881184681824e-05,
"loss": 0.0046,
"step": 13830
},
{
"grad_norm": 0.17855527997016907,
"learning_rate": 2.377645481399214e-05,
"loss": 0.0052,
"step": 13840
},
{
"grad_norm": 0.24410289525985718,
"learning_rate": 2.3706100137473667e-05,
"loss": 0.0069,
"step": 13850
},
{
"grad_norm": 0.17104095220565796,
"learning_rate": 2.3635817347473394e-05,
"loss": 0.0078,
"step": 13860
},
{
"grad_norm": 0.20315547287464142,
"learning_rate": 2.3565606636141757e-05,
"loss": 0.007,
"step": 13870
},
{
"grad_norm": 0.24569718539714813,
"learning_rate": 2.3495468195432203e-05,
"loss": 0.0071,
"step": 13880
},
{
"grad_norm": 0.1952163428068161,
"learning_rate": 2.3425402217100507e-05,
"loss": 0.0052,
"step": 13890
},
{
"grad_norm": 0.18253405392169952,
"learning_rate": 2.3355408892704424e-05,
"loss": 0.0068,
"step": 13900
},
{
"grad_norm": 0.16422688961029053,
"learning_rate": 2.3285488413603003e-05,
"loss": 0.0046,
"step": 13910
},
{
"grad_norm": 0.2229805588722229,
"learning_rate": 2.321564097095615e-05,
"loss": 0.0048,
"step": 13920
},
{
"grad_norm": 0.3146781921386719,
"learning_rate": 2.3145866755724142e-05,
"loss": 0.0062,
"step": 13930
},
{
"grad_norm": 0.24816946685314178,
"learning_rate": 2.307616595866699e-05,
"loss": 0.0063,
"step": 13940
},
{
"grad_norm": 0.20742735266685486,
"learning_rate": 2.3006538770344032e-05,
"loss": 0.0067,
"step": 13950
},
{
"grad_norm": 0.17962752282619476,
"learning_rate": 2.293698538111334e-05,
"loss": 0.0056,
"step": 13960
},
{
"grad_norm": 0.2705751359462738,
"learning_rate": 2.28675059811312e-05,
"loss": 0.0065,
"step": 13970
},
{
"grad_norm": 0.23835845291614532,
"learning_rate": 2.279810076035167e-05,
"loss": 0.0071,
"step": 13980
},
{
"grad_norm": 0.24078679084777832,
"learning_rate": 2.272876990852596e-05,
"loss": 0.0076,
"step": 13990
},
{
"grad_norm": 0.22596397995948792,
"learning_rate": 2.265951361520195e-05,
"loss": 0.0101,
"step": 14000
},
{
"grad_norm": 0.19356085360050201,
"learning_rate": 2.2590332069723748e-05,
"loss": 0.0048,
"step": 14010
},
{
"grad_norm": 0.16043022274971008,
"learning_rate": 2.2521225461231004e-05,
"loss": 0.0067,
"step": 14020
},
{
"grad_norm": 0.2451379895210266,
"learning_rate": 2.2452193978658597e-05,
"loss": 0.0049,
"step": 14030
},
{
"grad_norm": 0.19134803116321564,
"learning_rate": 2.238323781073594e-05,
"loss": 0.0071,
"step": 14040
},
{
"grad_norm": 0.1770501583814621,
"learning_rate": 2.2314357145986552e-05,
"loss": 0.007,
"step": 14050
},
{
"grad_norm": 0.19176732003688812,
"learning_rate": 2.224555217272757e-05,
"loss": 0.007,
"step": 14060
},
{
"grad_norm": 0.2289755940437317,
"learning_rate": 2.2176823079069127e-05,
"loss": 0.0065,
"step": 14070
},
{
"grad_norm": 0.27378588914871216,
"learning_rate": 2.210817005291398e-05,
"loss": 0.0058,
"step": 14080
},
{
"grad_norm": 0.2435673326253891,
"learning_rate": 2.203959328195686e-05,
"loss": 0.0069,
"step": 14090
},
{
"grad_norm": 0.22978395223617554,
"learning_rate": 2.1971092953684026e-05,
"loss": 0.006,
"step": 14100
},
{
"grad_norm": 0.19038143754005432,
"learning_rate": 2.1902669255372788e-05,
"loss": 0.0054,
"step": 14110
},
{
"grad_norm": 0.16993741691112518,
"learning_rate": 2.1834322374090897e-05,
"loss": 0.0064,
"step": 14120
},
{
"grad_norm": 0.22536249458789825,
"learning_rate": 2.1766052496696153e-05,
"loss": 0.0061,
"step": 14130
},
{
"grad_norm": 0.1695421189069748,
"learning_rate": 2.169785980983577e-05,
"loss": 0.0053,
"step": 14140
},
{
"grad_norm": 0.20765681564807892,
"learning_rate": 2.162974449994593e-05,
"loss": 0.0055,
"step": 14150
},
{
"grad_norm": 0.20967355370521545,
"learning_rate": 2.1561706753251337e-05,
"loss": 0.0071,
"step": 14160
},
{
"grad_norm": 0.24757470190525055,
"learning_rate": 2.1493746755764544e-05,
"loss": 0.006,
"step": 14170
},
{
"grad_norm": 0.17080722749233246,
"learning_rate": 2.1425864693285635e-05,
"loss": 0.0087,
"step": 14180
},
{
"grad_norm": 0.1654953807592392,
"learning_rate": 2.1358060751401547e-05,
"loss": 0.0062,
"step": 14190
},
{
"grad_norm": 0.23503398895263672,
"learning_rate": 2.129033511548566e-05,
"loss": 0.0078,
"step": 14200
},
{
"grad_norm": 0.19658979773521423,
"learning_rate": 2.1222687970697315e-05,
"loss": 0.0043,
"step": 14210
},
{
"grad_norm": 0.24506372213363647,
"learning_rate": 2.1155119501981173e-05,
"loss": 0.0062,
"step": 14220
},
{
"grad_norm": 0.23663446307182312,
"learning_rate": 2.1087629894066895e-05,
"loss": 0.0053,
"step": 14230
},
{
"grad_norm": 0.20866233110427856,
"learning_rate": 2.1020219331468473e-05,
"loss": 0.0076,
"step": 14240
},
{
"grad_norm": 0.2166251242160797,
"learning_rate": 2.095288799848379e-05,
"loss": 0.0069,
"step": 14250
},
{
"grad_norm": 0.17026379704475403,
"learning_rate": 2.088563607919417e-05,
"loss": 0.0052,
"step": 14260
},
{
"grad_norm": 0.26690876483917236,
"learning_rate": 2.0818463757463786e-05,
"loss": 0.0055,
"step": 14270
},
{
"grad_norm": 0.21364854276180267,
"learning_rate": 2.0751371216939175e-05,
"loss": 0.0062,
"step": 14280
},
{
"grad_norm": 0.25960573554039,
"learning_rate": 2.068435864104882e-05,
"loss": 0.0057,
"step": 14290
},
{
"grad_norm": 0.2138037234544754,
"learning_rate": 2.0617426213002506e-05,
"loss": 0.0089,
"step": 14300
},
{
"grad_norm": 0.20547112822532654,
"learning_rate": 2.055057411579097e-05,
"loss": 0.0059,
"step": 14310
},
{
"grad_norm": 0.32553014159202576,
"learning_rate": 2.0483802532185286e-05,
"loss": 0.0066,
"step": 14320
},
{
"grad_norm": 0.22871890664100647,
"learning_rate": 2.041711164473638e-05,
"loss": 0.0072,
"step": 14330
},
{
"grad_norm": 0.20716866850852966,
"learning_rate": 2.0350501635774637e-05,
"loss": 0.0055,
"step": 14340
},
{
"grad_norm": 0.17182990908622742,
"learning_rate": 2.0283972687409247e-05,
"loss": 0.0063,
"step": 14350
},
{
"grad_norm": 0.16304871439933777,
"learning_rate": 2.021752498152784e-05,
"loss": 0.0061,
"step": 14360
},
{
"grad_norm": 0.22496426105499268,
"learning_rate": 2.015115869979589e-05,
"loss": 0.0046,
"step": 14370
},
{
"grad_norm": 0.1372428685426712,
"learning_rate": 2.0084874023656265e-05,
"loss": 0.0049,
"step": 14380
},
{
"grad_norm": 0.22879809141159058,
"learning_rate": 2.001867113432877e-05,
"loss": 0.0075,
"step": 14390
},
{
"grad_norm": 0.18383388221263885,
"learning_rate": 1.995255021280954e-05,
"loss": 0.0059,
"step": 14400
},
{
"grad_norm": 0.1956668198108673,
"learning_rate": 1.9886511439870688e-05,
"loss": 0.007,
"step": 14410
},
{
"grad_norm": 0.2605588436126709,
"learning_rate": 1.9820554996059675e-05,
"loss": 0.0064,
"step": 14420
},
{
"grad_norm": 0.21275591850280762,
"learning_rate": 1.9754681061698893e-05,
"loss": 0.0047,
"step": 14430
},
{
"grad_norm": 0.15767215192317963,
"learning_rate": 1.9688889816885185e-05,
"loss": 0.0051,
"step": 14440
},
{
"grad_norm": 0.206788569688797,
"learning_rate": 1.962318144148928e-05,
"loss": 0.0053,
"step": 14450
},
{
"grad_norm": 0.21391050517559052,
"learning_rate": 1.955755611515539e-05,
"loss": 0.0054,
"step": 14460
},
{
"grad_norm": 0.15545658767223358,
"learning_rate": 1.9492014017300642e-05,
"loss": 0.0068,
"step": 14470
},
{
"grad_norm": 0.1992073655128479,
"learning_rate": 1.942655532711461e-05,
"loss": 0.005,
"step": 14480
},
{
"grad_norm": 0.17155469954013824,
"learning_rate": 1.9361180223558882e-05,
"loss": 0.0068,
"step": 14490
},
{
"grad_norm": 0.1756027191877365,
"learning_rate": 1.929588888536647e-05,
"loss": 0.0063,
"step": 14500
},
{
"grad_norm": 0.21194574236869812,
"learning_rate": 1.9230681491041425e-05,
"loss": 0.0057,
"step": 14510
},
{
"grad_norm": 0.18223026394844055,
"learning_rate": 1.9165558218858264e-05,
"loss": 0.0052,
"step": 14520
},
{
"grad_norm": 0.250331848859787,
"learning_rate": 1.9100519246861505e-05,
"loss": 0.0053,
"step": 14530
},
{
"grad_norm": 0.2652829885482788,
"learning_rate": 1.9035564752865248e-05,
"loss": 0.0076,
"step": 14540
},
{
"grad_norm": 0.23563922941684723,
"learning_rate": 1.897069491445258e-05,
"loss": 0.0057,
"step": 14550
},
{
"grad_norm": 0.21190163493156433,
"learning_rate": 1.890590990897515e-05,
"loss": 0.0057,
"step": 14560
},
{
"grad_norm": 0.19106173515319824,
"learning_rate": 1.884120991355272e-05,
"loss": 0.0059,
"step": 14570
},
{
"grad_norm": 0.19955740869045258,
"learning_rate": 1.8776595105072576e-05,
"loss": 0.0054,
"step": 14580
},
{
"grad_norm": 0.1958319991827011,
"learning_rate": 1.8712065660189166e-05,
"loss": 0.0054,
"step": 14590
},
{
"grad_norm": 0.20281006395816803,
"learning_rate": 1.8647621755323513e-05,
"loss": 0.0054,
"step": 14600
},
{
"grad_norm": 0.2089354693889618,
"learning_rate": 1.858326356666278e-05,
"loss": 0.0055,
"step": 14610
},
{
"grad_norm": 0.3210635781288147,
"learning_rate": 1.851899127015983e-05,
"loss": 0.0061,
"step": 14620
},
{
"grad_norm": 0.2990008592605591,
"learning_rate": 1.8454805041532626e-05,
"loss": 0.0067,
"step": 14630
},
{
"grad_norm": 0.2500159740447998,
"learning_rate": 1.8390705056263906e-05,
"loss": 0.0056,
"step": 14640
},
{
"grad_norm": 0.2781897485256195,
"learning_rate": 1.832669148960057e-05,
"loss": 0.0049,
"step": 14650
},
{
"grad_norm": 0.17028845846652985,
"learning_rate": 1.8262764516553233e-05,
"loss": 0.0085,
"step": 14660
},
{
"grad_norm": 0.2419242262840271,
"learning_rate": 1.8198924311895843e-05,
"loss": 0.0053,
"step": 14670
},
{
"grad_norm": 0.13097338378429413,
"learning_rate": 1.813517105016505e-05,
"loss": 0.0059,
"step": 14680
},
{
"grad_norm": 0.20473894476890564,
"learning_rate": 1.8071504905659888e-05,
"loss": 0.0055,
"step": 14690
},
{
"grad_norm": 0.1552768498659134,
"learning_rate": 1.800792605244109e-05,
"loss": 0.0053,
"step": 14700
},
{
"grad_norm": 0.14139609038829803,
"learning_rate": 1.7944434664330844e-05,
"loss": 0.0042,
"step": 14710
},
{
"grad_norm": 0.14989487826824188,
"learning_rate": 1.7881030914912212e-05,
"loss": 0.0051,
"step": 14720
},
{
"grad_norm": 0.2136131227016449,
"learning_rate": 1.7817714977528577e-05,
"loss": 0.0056,
"step": 14730
},
{
"grad_norm": 0.22079499065876007,
"learning_rate": 1.7754487025283332e-05,
"loss": 0.0078,
"step": 14740
},
{
"grad_norm": 0.18154866993427277,
"learning_rate": 1.7691347231039275e-05,
"loss": 0.0057,
"step": 14750
},
{
"grad_norm": 0.17814518511295319,
"learning_rate": 1.7628295767418164e-05,
"loss": 0.0047,
"step": 14760
},
{
"grad_norm": 0.17428627610206604,
"learning_rate": 1.7565332806800333e-05,
"loss": 0.0055,
"step": 14770
},
{
"grad_norm": 0.2982683777809143,
"learning_rate": 1.750245852132408e-05,
"loss": 0.0052,
"step": 14780
},
{
"grad_norm": 0.2404620200395584,
"learning_rate": 1.7439673082885323e-05,
"loss": 0.0057,
"step": 14790
},
{
"grad_norm": 0.14670974016189575,
"learning_rate": 1.7376976663137047e-05,
"loss": 0.0056,
"step": 14800
},
{
"grad_norm": 0.2664196193218231,
"learning_rate": 1.7314369433488853e-05,
"loss": 0.0051,
"step": 14810
},
{
"grad_norm": 0.19232824444770813,
"learning_rate": 1.7251851565106548e-05,
"loss": 0.0066,
"step": 14820
},
{
"grad_norm": 0.1771378368139267,
"learning_rate": 1.7189423228911574e-05,
"loss": 0.0058,
"step": 14830
},
{
"grad_norm": 0.17615680396556854,
"learning_rate": 1.7127084595580606e-05,
"loss": 0.0061,
"step": 14840
},
{
"grad_norm": 0.20852655172348022,
"learning_rate": 1.706483583554513e-05,
"loss": 0.0051,
"step": 14850
},
{
"grad_norm": 0.16545358300209045,
"learning_rate": 1.700267711899083e-05,
"loss": 0.0042,
"step": 14860
},
{
"grad_norm": 0.20421037077903748,
"learning_rate": 1.69406086158573e-05,
"loss": 0.0044,
"step": 14870
},
{
"grad_norm": 0.19449642300605774,
"learning_rate": 1.6878630495837455e-05,
"loss": 0.0061,
"step": 14880
},
{
"grad_norm": 0.23133471608161926,
"learning_rate": 1.681674292837707e-05,
"loss": 0.0064,
"step": 14890
},
{
"grad_norm": 0.14292384684085846,
"learning_rate": 1.6754946082674444e-05,
"loss": 0.0058,
"step": 14900
},
{
"grad_norm": 0.12971968948841095,
"learning_rate": 1.6693240127679748e-05,
"loss": 0.0044,
"step": 14910
},
{
"grad_norm": 0.1865580976009369,
"learning_rate": 1.663162523209475e-05,
"loss": 0.0071,
"step": 14920
},
{
"grad_norm": 0.18029199540615082,
"learning_rate": 1.6570101564372193e-05,
"loss": 0.007,
"step": 14930
},
{
"grad_norm": 0.24925662577152252,
"learning_rate": 1.650866929271543e-05,
"loss": 0.0062,
"step": 14940
},
{
"grad_norm": 0.17494648694992065,
"learning_rate": 1.644732858507797e-05,
"loss": 0.0055,
"step": 14950
},
{
"grad_norm": 0.15730054676532745,
"learning_rate": 1.6386079609162943e-05,
"loss": 0.0047,
"step": 14960
},
{
"grad_norm": 0.2508449852466583,
"learning_rate": 1.6324922532422742e-05,
"loss": 0.0098,
"step": 14970
},
{
"grad_norm": 0.190720796585083,
"learning_rate": 1.6263857522058434e-05,
"loss": 0.0054,
"step": 14980
},
{
"grad_norm": 0.1414359211921692,
"learning_rate": 1.6202884745019443e-05,
"loss": 0.0046,
"step": 14990
},
{
"grad_norm": 0.14060048758983612,
"learning_rate": 1.614200436800304e-05,
"loss": 0.0061,
"step": 15000
},
{
"grad_norm": 0.19313286244869232,
"learning_rate": 1.6081216557453814e-05,
"loss": 0.0049,
"step": 15010
},
{
"grad_norm": 0.17659884691238403,
"learning_rate": 1.6020521479563367e-05,
"loss": 0.0054,
"step": 15020
},
{
"grad_norm": 0.14806364476680756,
"learning_rate": 1.5959919300269654e-05,
"loss": 0.0041,
"step": 15030
},
{
"grad_norm": 0.15692979097366333,
"learning_rate": 1.5899410185256764e-05,
"loss": 0.0037,
"step": 15040
},
{
"grad_norm": 0.14825497567653656,
"learning_rate": 1.583899429995431e-05,
"loss": 0.0046,
"step": 15050
},
{
"grad_norm": 0.1588299572467804,
"learning_rate": 1.5778671809536993e-05,
"loss": 0.0043,
"step": 15060
},
{
"grad_norm": 0.13866159319877625,
"learning_rate": 1.5718442878924246e-05,
"loss": 0.0044,
"step": 15070
},
{
"grad_norm": 0.2178276777267456,
"learning_rate": 1.5658307672779593e-05,
"loss": 0.0055,
"step": 15080
},
{
"grad_norm": 0.1646864116191864,
"learning_rate": 1.5598266355510427e-05,
"loss": 0.0045,
"step": 15090
},
{
"grad_norm": 0.21160311996936798,
"learning_rate": 1.553831909126744e-05,
"loss": 0.0049,
"step": 15100
},
{
"grad_norm": 0.23382562398910522,
"learning_rate": 1.5478466043944135e-05,
"loss": 0.0066,
"step": 15110
},
{
"grad_norm": 0.18930543959140778,
"learning_rate": 1.5418707377176468e-05,
"loss": 0.0053,
"step": 15120
},
{
"grad_norm": 0.1883104145526886,
"learning_rate": 1.535904325434233e-05,
"loss": 0.0051,
"step": 15130
},
{
"grad_norm": 0.21340157091617584,
"learning_rate": 1.529947383856118e-05,
"loss": 0.0073,
"step": 15140
},
{
"grad_norm": 0.23821984231472015,
"learning_rate": 1.5239999292693524e-05,
"loss": 0.0067,
"step": 15150
},
{
"grad_norm": 0.14447997510433197,
"learning_rate": 1.5180619779340505e-05,
"loss": 0.0067,
"step": 15160
},
{
"grad_norm": 0.14597798883914948,
"learning_rate": 1.5121335460843428e-05,
"loss": 0.0051,
"step": 15170
},
{
"grad_norm": 0.19506734609603882,
"learning_rate": 1.5062146499283347e-05,
"loss": 0.0062,
"step": 15180
},
{
"grad_norm": 0.18002669513225555,
"learning_rate": 1.5003053056480643e-05,
"loss": 0.007,
"step": 15190
},
{
"grad_norm": 0.2782810926437378,
"learning_rate": 1.4944055293994551e-05,
"loss": 0.0051,
"step": 15200
},
{
"grad_norm": 0.2039763480424881,
"learning_rate": 1.4885153373122656e-05,
"loss": 0.006,
"step": 15210
},
{
"grad_norm": 0.1816200613975525,
"learning_rate": 1.482634745490059e-05,
"loss": 0.005,
"step": 15220
},
{
"grad_norm": 0.15879617631435394,
"learning_rate": 1.4767637700101466e-05,
"loss": 0.0047,
"step": 15230
},
{
"grad_norm": 0.19043923914432526,
"learning_rate": 1.4709024269235528e-05,
"loss": 0.0051,
"step": 15240
},
{
"grad_norm": 0.2104177474975586,
"learning_rate": 1.4650507322549684e-05,
"loss": 0.0047,
"step": 15250
},
{
"grad_norm": 0.22558549046516418,
"learning_rate": 1.4592087020026972e-05,
"loss": 0.0055,
"step": 15260
},
{
"grad_norm": 0.1903344690799713,
"learning_rate": 1.4533763521386318e-05,
"loss": 0.0051,
"step": 15270
},
{
"grad_norm": 0.21042105555534363,
"learning_rate": 1.44755369860819e-05,
"loss": 0.0059,
"step": 15280
},
{
"grad_norm": 0.2088128626346588,
"learning_rate": 1.441740757330287e-05,
"loss": 0.0048,
"step": 15290
},
{
"grad_norm": 0.2233678549528122,
"learning_rate": 1.4359375441972844e-05,
"loss": 0.0043,
"step": 15300
},
{
"grad_norm": 0.13520531356334686,
"learning_rate": 1.4301440750749395e-05,
"loss": 0.0038,
"step": 15310
},
{
"grad_norm": 0.21610109508037567,
"learning_rate": 1.4243603658023808e-05,
"loss": 0.0056,
"step": 15320
},
{
"grad_norm": 0.16219377517700195,
"learning_rate": 1.4185864321920444e-05,
"loss": 0.0043,
"step": 15330
},
{
"grad_norm": 0.2393598109483719,
"learning_rate": 1.4128222900296485e-05,
"loss": 0.0057,
"step": 15340
},
{
"grad_norm": 0.16220656037330627,
"learning_rate": 1.407067955074135e-05,
"loss": 0.0056,
"step": 15350
},
{
"grad_norm": 0.22726091742515564,
"learning_rate": 1.4013234430576356e-05,
"loss": 0.0054,
"step": 15360
},
{
"grad_norm": 0.1691230684518814,
"learning_rate": 1.3955887696854286e-05,
"loss": 0.0066,
"step": 15370
},
{
"grad_norm": 0.24777837097644806,
"learning_rate": 1.38986395063589e-05,
"loss": 0.0073,
"step": 15380
},
{
"grad_norm": 0.2199578583240509,
"learning_rate": 1.3841490015604597e-05,
"loss": 0.0059,
"step": 15390
},
{
"grad_norm": 0.22531665861606598,
"learning_rate": 1.3784439380835879e-05,
"loss": 0.0068,
"step": 15400
},
{
"grad_norm": 0.16674892604351044,
"learning_rate": 1.3727487758026986e-05,
"loss": 0.0055,
"step": 15410
},
{
"grad_norm": 0.19342897832393646,
"learning_rate": 1.3670635302881525e-05,
"loss": 0.0056,
"step": 15420
},
{
"grad_norm": 0.12783855199813843,
"learning_rate": 1.3613882170831888e-05,
"loss": 0.0066,
"step": 15430
},
{
"grad_norm": 0.17184005677700043,
"learning_rate": 1.355722851703901e-05,
"loss": 0.0069,
"step": 15440
},
{
"grad_norm": 0.20452845096588135,
"learning_rate": 1.3500674496391814e-05,
"loss": 0.0047,
"step": 15450
},
{
"grad_norm": 0.13389019668102264,
"learning_rate": 1.3444220263506795e-05,
"loss": 0.0066,
"step": 15460
},
{
"grad_norm": 0.24266892671585083,
"learning_rate": 1.3387865972727714e-05,
"loss": 0.0067,
"step": 15470
},
{
"grad_norm": 0.1648254692554474,
"learning_rate": 1.3331611778125036e-05,
"loss": 0.0065,
"step": 15480
},
{
"grad_norm": 0.20072923600673676,
"learning_rate": 1.3275457833495564e-05,
"loss": 0.0102,
"step": 15490
},
{
"grad_norm": 0.18705320358276367,
"learning_rate": 1.3219404292362065e-05,
"loss": 0.0077,
"step": 15500
},
{
"grad_norm": 0.1857132464647293,
"learning_rate": 1.3163451307972751e-05,
"loss": 0.0067,
"step": 15510
},
{
"grad_norm": 0.2010364979505539,
"learning_rate": 1.3107599033300977e-05,
"loss": 0.006,
"step": 15520
},
{
"grad_norm": 0.1669953614473343,
"learning_rate": 1.305184762104471e-05,
"loss": 0.006,
"step": 15530
},
{
"grad_norm": 0.16274145245552063,
"learning_rate": 1.2996197223626178e-05,
"loss": 0.0041,
"step": 15540
},
{
"grad_norm": 0.18755671381950378,
"learning_rate": 1.2940647993191457e-05,
"loss": 0.0053,
"step": 15550
},
{
"grad_norm": 0.20959579944610596,
"learning_rate": 1.2885200081610005e-05,
"loss": 0.005,
"step": 15560
},
{
"grad_norm": 0.14389164745807648,
"learning_rate": 1.2829853640474316e-05,
"loss": 0.0045,
"step": 15570
},
{
"grad_norm": 0.15106278657913208,
"learning_rate": 1.2774608821099438e-05,
"loss": 0.0044,
"step": 15580
},
{
"grad_norm": 0.1607772409915924,
"learning_rate": 1.2719465774522577e-05,
"loss": 0.0055,
"step": 15590
},
{
"grad_norm": 0.1950947344303131,
"learning_rate": 1.2664424651502755e-05,
"loss": 0.0064,
"step": 15600
},
{
"grad_norm": 0.20571626722812653,
"learning_rate": 1.260948560252026e-05,
"loss": 0.0054,
"step": 15610
},
{
"grad_norm": 0.1459115594625473,
"learning_rate": 1.2554648777776396e-05,
"loss": 0.0048,
"step": 15620
},
{
"grad_norm": 0.1424199789762497,
"learning_rate": 1.2499914327192919e-05,
"loss": 0.0056,
"step": 15630
},
{
"grad_norm": 0.13897298276424408,
"learning_rate": 1.2445282400411722e-05,
"loss": 0.0055,
"step": 15640
},
{
"grad_norm": 0.13337059319019318,
"learning_rate": 1.2390753146794437e-05,
"loss": 0.0052,
"step": 15650
},
{
"grad_norm": 0.22809644043445587,
"learning_rate": 1.2336326715421925e-05,
"loss": 0.0055,
"step": 15660
},
{
"grad_norm": 0.13422560691833496,
"learning_rate": 1.2282003255094005e-05,
"loss": 0.0051,
"step": 15670
},
{
"grad_norm": 0.18627096712589264,
"learning_rate": 1.2227782914328928e-05,
"loss": 0.0052,
"step": 15680
},
{
"grad_norm": 0.18976987898349762,
"learning_rate": 1.2173665841363018e-05,
"loss": 0.0044,
"step": 15690
},
{
"grad_norm": 0.16836141049861908,
"learning_rate": 1.211965218415032e-05,
"loss": 0.0041,
"step": 15700
},
{
"grad_norm": 0.14203710854053497,
"learning_rate": 1.2065742090362082e-05,
"loss": 0.0055,
"step": 15710
},
{
"grad_norm": 0.16482926905155182,
"learning_rate": 1.2011935707386457e-05,
"loss": 0.0055,
"step": 15720
},
{
"grad_norm": 0.23353098332881927,
"learning_rate": 1.1958233182328044e-05,
"loss": 0.0052,
"step": 15730
},
{
"grad_norm": 0.2196880280971527,
"learning_rate": 1.1904634662007474e-05,
"loss": 0.0052,
"step": 15740
},
{
"grad_norm": 0.20127354562282562,
"learning_rate": 1.1851140292961088e-05,
"loss": 0.0052,
"step": 15750
},
{
"grad_norm": 0.19632349908351898,
"learning_rate": 1.1797750221440424e-05,
"loss": 0.006,
"step": 15760
},
{
"grad_norm": 0.22671037912368774,
"learning_rate": 1.1744464593411897e-05,
"loss": 0.0061,
"step": 15770
},
{
"grad_norm": 0.1899847388267517,
"learning_rate": 1.1691283554556399e-05,
"loss": 0.005,
"step": 15780
},
{
"grad_norm": 0.2320375293493271,
"learning_rate": 1.1638207250268834e-05,
"loss": 0.0048,
"step": 15790
},
{
"grad_norm": 0.1803273856639862,
"learning_rate": 1.158523582565782e-05,
"loss": 0.0052,
"step": 15800
},
{
"grad_norm": 0.12426243722438812,
"learning_rate": 1.1532369425545192e-05,
"loss": 0.0037,
"step": 15810
},
{
"grad_norm": 0.15763874351978302,
"learning_rate": 1.1479608194465662e-05,
"loss": 0.0092,
"step": 15820
},
{
"grad_norm": 0.16793958842754364,
"learning_rate": 1.1426952276666442e-05,
"loss": 0.0044,
"step": 15830
},
{
"grad_norm": 0.18338541686534882,
"learning_rate": 1.1374401816106778e-05,
"loss": 0.0073,
"step": 15840
},
{
"grad_norm": 0.16605933010578156,
"learning_rate": 1.1321956956457646e-05,
"loss": 0.005,
"step": 15850
},
{
"grad_norm": 0.23071573674678802,
"learning_rate": 1.1269617841101277e-05,
"loss": 0.0058,
"step": 15860
},
{
"grad_norm": 0.16531772911548615,
"learning_rate": 1.1217384613130804e-05,
"loss": 0.0046,
"step": 15870
},
{
"grad_norm": 0.17097795009613037,
"learning_rate": 1.11652574153499e-05,
"loss": 0.0056,
"step": 15880
},
{
"grad_norm": 0.14396753907203674,
"learning_rate": 1.1113236390272303e-05,
"loss": 0.0052,
"step": 15890
},
{
"grad_norm": 0.19578398764133453,
"learning_rate": 1.106132168012155e-05,
"loss": 0.0074,
"step": 15900
},
{
"grad_norm": 0.15604498982429504,
"learning_rate": 1.1009513426830448e-05,
"loss": 0.0057,
"step": 15910
},
{
"grad_norm": 0.1655254364013672,
"learning_rate": 1.0957811772040777e-05,
"loss": 0.0081,
"step": 15920
},
{
"grad_norm": 0.20122294127941132,
"learning_rate": 1.0906216857102913e-05,
"loss": 0.005,
"step": 15930
},
{
"grad_norm": 0.1938413679599762,
"learning_rate": 1.0854728823075355e-05,
"loss": 0.0055,
"step": 15940
},
{
"grad_norm": 0.14376655220985413,
"learning_rate": 1.0803347810724452e-05,
"loss": 0.0068,
"step": 15950
},
{
"grad_norm": 0.14233747124671936,
"learning_rate": 1.0752073960523911e-05,
"loss": 0.0051,
"step": 15960
},
{
"grad_norm": 0.14280758798122406,
"learning_rate": 1.070090741265447e-05,
"loss": 0.0047,
"step": 15970
},
{
"grad_norm": 0.14656981825828552,
"learning_rate": 1.0649848307003547e-05,
"loss": 0.0051,
"step": 15980
},
{
"grad_norm": 0.13522081077098846,
"learning_rate": 1.0598896783164757e-05,
"loss": 0.0049,
"step": 15990
},
{
"grad_norm": 0.12241239100694656,
"learning_rate": 1.0548052980437645e-05,
"loss": 0.0073,
"step": 16000
},
{
"grad_norm": 0.1648959517478943,
"learning_rate": 1.049731703782722e-05,
"loss": 0.0055,
"step": 16010
},
{
"grad_norm": 0.13855500519275665,
"learning_rate": 1.0446689094043587e-05,
"loss": 0.0073,
"step": 16020
},
{
"grad_norm": 0.1769500970840454,
"learning_rate": 1.039616928750165e-05,
"loss": 0.0049,
"step": 16030
},
{
"grad_norm": 0.15365423262119293,
"learning_rate": 1.0345757756320612e-05,
"loss": 0.0043,
"step": 16040
},
{
"grad_norm": 0.1871398538351059,
"learning_rate": 1.0295454638323666e-05,
"loss": 0.0055,
"step": 16050
},
{
"grad_norm": 0.22193288803100586,
"learning_rate": 1.0245260071037632e-05,
"loss": 0.0053,
"step": 16060
},
{
"grad_norm": 0.18220891058444977,
"learning_rate": 1.0195174191692518e-05,
"loss": 0.0043,
"step": 16070
},
{
"grad_norm": 0.15648461878299713,
"learning_rate": 1.014519713722124e-05,
"loss": 0.0052,
"step": 16080
},
{
"grad_norm": 0.18102756142616272,
"learning_rate": 1.0095329044259132e-05,
"loss": 0.0043,
"step": 16090
},
{
"grad_norm": 0.14919497072696686,
"learning_rate": 1.004557004914365e-05,
"loss": 0.005,
"step": 16100
},
{
"grad_norm": 0.17376764118671417,
"learning_rate": 9.995920287914007e-06,
"loss": 0.0042,
"step": 16110
},
{
"grad_norm": 0.13198114931583405,
"learning_rate": 9.946379896310737e-06,
"loss": 0.004,
"step": 16120
},
{
"grad_norm": 0.16041940450668335,
"learning_rate": 9.896949009775396e-06,
"loss": 0.0048,
"step": 16130
},
{
"grad_norm": 0.20756278932094574,
"learning_rate": 9.847627763450134e-06,
"loss": 0.0049,
"step": 16140
},
{
"grad_norm": 0.22414100170135498,
"learning_rate": 9.798416292177337e-06,
"loss": 0.0045,
"step": 16150
},
{
"grad_norm": 0.22440825402736664,
"learning_rate": 9.74931473049932e-06,
"loss": 0.0054,
"step": 16160
},
{
"grad_norm": 0.1703796684741974,
"learning_rate": 9.700323212657847e-06,
"loss": 0.0042,
"step": 16170
},
{
"grad_norm": 0.15423569083213806,
"learning_rate": 9.65144187259388e-06,
"loss": 0.0053,
"step": 16180
},
{
"grad_norm": 0.16278226673603058,
"learning_rate": 9.602670843947132e-06,
"loss": 0.0049,
"step": 16190
},
{
"grad_norm": 0.1522931307554245,
"learning_rate": 9.554010260055713e-06,
"loss": 0.0046,
"step": 16200
},
{
"grad_norm": 0.1324155330657959,
"learning_rate": 9.505460253955834e-06,
"loss": 0.0044,
"step": 16210
},
{
"grad_norm": 0.12835343182086945,
"learning_rate": 9.457020958381324e-06,
"loss": 0.0055,
"step": 16220
},
{
"grad_norm": 0.13485650718212128,
"learning_rate": 9.408692505763395e-06,
"loss": 0.0048,
"step": 16230
},
{
"grad_norm": 0.17823177576065063,
"learning_rate": 9.360475028230181e-06,
"loss": 0.0051,
"step": 16240
},
{
"grad_norm": 0.13854046165943146,
"learning_rate": 9.312368657606412e-06,
"loss": 0.0038,
"step": 16250
},
{
"grad_norm": 0.20396800339221954,
"learning_rate": 9.264373525413096e-06,
"loss": 0.0072,
"step": 16260
},
{
"grad_norm": 0.13611064851284027,
"learning_rate": 9.216489762867058e-06,
"loss": 0.0042,
"step": 16270
},
{
"grad_norm": 0.23162850737571716,
"learning_rate": 9.168717500880708e-06,
"loss": 0.0068,
"step": 16280
},
{
"grad_norm": 0.14248082041740417,
"learning_rate": 9.121056870061574e-06,
"loss": 0.0035,
"step": 16290
},
{
"grad_norm": 0.20380237698554993,
"learning_rate": 9.073508000711983e-06,
"loss": 0.0048,
"step": 16300
},
{
"grad_norm": 0.18490472435951233,
"learning_rate": 9.026071022828758e-06,
"loss": 0.0062,
"step": 16310
},
{
"grad_norm": 0.22372393310070038,
"learning_rate": 8.978746066102771e-06,
"loss": 0.0049,
"step": 16320
},
{
"grad_norm": 0.15033039450645447,
"learning_rate": 8.931533259918634e-06,
"loss": 0.0037,
"step": 16330
},
{
"grad_norm": 0.13687333464622498,
"learning_rate": 8.884432733354382e-06,
"loss": 0.004,
"step": 16340
},
{
"grad_norm": 0.15738479793071747,
"learning_rate": 8.837444615181029e-06,
"loss": 0.005,
"step": 16350
},
{
"grad_norm": 0.2072679102420807,
"learning_rate": 8.790569033862323e-06,
"loss": 0.0062,
"step": 16360
},
{
"grad_norm": 0.19508348405361176,
"learning_rate": 8.7438061175543e-06,
"loss": 0.0039,
"step": 16370
},
{
"grad_norm": 0.18191103637218475,
"learning_rate": 8.697155994104978e-06,
"loss": 0.0037,
"step": 16380
},
{
"grad_norm": 0.16201141476631165,
"learning_rate": 8.650618791054033e-06,
"loss": 0.0046,
"step": 16390
},
{
"grad_norm": 0.1699906885623932,
"learning_rate": 8.604194635632373e-06,
"loss": 0.0056,
"step": 16400
},
{
"grad_norm": 0.14311489462852478,
"learning_rate": 8.557883654761906e-06,
"loss": 0.0051,
"step": 16410
},
{
"grad_norm": 0.17263031005859375,
"learning_rate": 8.511685975055061e-06,
"loss": 0.0052,
"step": 16420
},
{
"grad_norm": 0.15772181749343872,
"learning_rate": 8.46560172281452e-06,
"loss": 0.0038,
"step": 16430
},
{
"grad_norm": 0.1426163911819458,
"learning_rate": 8.419631024032893e-06,
"loss": 0.004,
"step": 16440
},
{
"grad_norm": 0.15554185211658478,
"learning_rate": 8.373774004392293e-06,
"loss": 0.0048,
"step": 16450
},
{
"grad_norm": 0.16801729798316956,
"learning_rate": 8.32803078926409e-06,
"loss": 0.0058,
"step": 16460
},
{
"grad_norm": 0.15965138375759125,
"learning_rate": 8.282401503708454e-06,
"loss": 0.0044,
"step": 16470
},
{
"grad_norm": 0.1287917047739029,
"learning_rate": 8.23688627247412e-06,
"loss": 0.0051,
"step": 16480
},
{
"grad_norm": 0.16028323769569397,
"learning_rate": 8.191485219998007e-06,
"loss": 0.0042,
"step": 16490
},
{
"grad_norm": 0.11672808229923248,
"learning_rate": 8.146198470404843e-06,
"loss": 0.0046,
"step": 16500
},
{
"grad_norm": 0.13132800161838531,
"learning_rate": 8.101026147506897e-06,
"loss": 0.003,
"step": 16510
},
{
"grad_norm": 0.1448666900396347,
"learning_rate": 8.05596837480353e-06,
"loss": 0.0053,
"step": 16520
},
{
"grad_norm": 0.13154615461826324,
"learning_rate": 8.011025275480998e-06,
"loss": 0.0051,
"step": 16530
},
{
"grad_norm": 0.1266883760690689,
"learning_rate": 7.966196972412027e-06,
"loss": 0.0037,
"step": 16540
},
{
"grad_norm": 0.1132773905992508,
"learning_rate": 7.92148358815547e-06,
"loss": 0.0034,
"step": 16550
},
{
"grad_norm": 0.16018636524677277,
"learning_rate": 7.87688524495604e-06,
"loss": 0.0049,
"step": 16560
},
{
"grad_norm": 0.1342909187078476,
"learning_rate": 7.83240206474386e-06,
"loss": 0.0054,
"step": 16570
},
{
"grad_norm": 0.12503987550735474,
"learning_rate": 7.788034169134272e-06,
"loss": 0.0043,
"step": 16580
},
{
"grad_norm": 0.11737470328807831,
"learning_rate": 7.743781679427414e-06,
"loss": 0.0043,
"step": 16590
},
{
"grad_norm": 0.11028105020523071,
"learning_rate": 7.699644716607895e-06,
"loss": 0.0038,
"step": 16600
},
{
"grad_norm": 0.09312699735164642,
"learning_rate": 7.655623401344486e-06,
"loss": 0.0036,
"step": 16610
},
{
"grad_norm": 0.1259354054927826,
"learning_rate": 7.611717853989775e-06,
"loss": 0.0045,
"step": 16620
},
{
"grad_norm": 0.14075908064842224,
"learning_rate": 7.567928194579854e-06,
"loss": 0.0058,
"step": 16630
},
{
"grad_norm": 0.16171559691429138,
"learning_rate": 7.524254542833997e-06,
"loss": 0.0049,
"step": 16640
},
{
"grad_norm": 0.1568921059370041,
"learning_rate": 7.480697018154286e-06,
"loss": 0.009,
"step": 16650
},
{
"grad_norm": 0.14268898963928223,
"learning_rate": 7.437255739625332e-06,
"loss": 0.0036,
"step": 16660
},
{
"grad_norm": 0.2208922803401947,
"learning_rate": 7.393930826013923e-06,
"loss": 0.0064,
"step": 16670
},
{
"grad_norm": 0.1548379510641098,
"learning_rate": 7.350722395768722e-06,
"loss": 0.0043,
"step": 16680
},
{
"grad_norm": 0.1384071558713913,
"learning_rate": 7.307630567019963e-06,
"loss": 0.0036,
"step": 16690
},
{
"grad_norm": 0.10652858018875122,
"learning_rate": 7.264655457579e-06,
"loss": 0.0041,
"step": 16700
},
{
"grad_norm": 0.10340376943349838,
"learning_rate": 7.221797184938184e-06,
"loss": 0.0059,
"step": 16710
},
{
"grad_norm": 0.11764318495988846,
"learning_rate": 7.179055866270373e-06,
"loss": 0.0036,
"step": 16720
},
{
"grad_norm": 0.21147853136062622,
"learning_rate": 7.136431618428707e-06,
"loss": 0.0045,
"step": 16730
},
{
"grad_norm": 0.19434772431850433,
"learning_rate": 7.09392455794628e-06,
"loss": 0.0062,
"step": 16740
},
{
"grad_norm": 0.14892810583114624,
"learning_rate": 7.051534801035725e-06,
"loss": 0.0053,
"step": 16750
},
{
"grad_norm": 0.17711889743804932,
"learning_rate": 7.00926246358905e-06,
"loss": 0.0055,
"step": 16760
},
{
"grad_norm": 0.13662099838256836,
"learning_rate": 6.967107661177191e-06,
"loss": 0.0056,
"step": 16770
},
{
"grad_norm": 0.13581331074237823,
"learning_rate": 6.925070509049786e-06,
"loss": 0.0053,
"step": 16780
},
{
"grad_norm": 0.09662957489490509,
"learning_rate": 6.883151122134812e-06,
"loss": 0.0037,
"step": 16790
},
{
"grad_norm": 0.14342249929904938,
"learning_rate": 6.8413496150382394e-06,
"loss": 0.0047,
"step": 16800
},
{
"grad_norm": 0.07608351111412048,
"learning_rate": 6.7996661020438165e-06,
"loss": 0.0029,
"step": 16810
},
{
"grad_norm": 0.14846809208393097,
"learning_rate": 6.758100697112662e-06,
"loss": 0.0038,
"step": 16820
},
{
"grad_norm": 0.18177205324172974,
"learning_rate": 6.716653513883026e-06,
"loss": 0.005,
"step": 16830
},
{
"grad_norm": 0.15042972564697266,
"learning_rate": 6.675324665669913e-06,
"loss": 0.0039,
"step": 16840
},
{
"grad_norm": 0.17908424139022827,
"learning_rate": 6.634114265464803e-06,
"loss": 0.0056,
"step": 16850
},
{
"grad_norm": 0.1749325394630432,
"learning_rate": 6.59302242593538e-06,
"loss": 0.0041,
"step": 16860
},
{
"grad_norm": 0.11305996030569077,
"learning_rate": 6.552049259425141e-06,
"loss": 0.0042,
"step": 16870
},
{
"grad_norm": 0.11477645486593246,
"learning_rate": 6.511194877953181e-06,
"loss": 0.0046,
"step": 16880
},
{
"grad_norm": 0.1085256040096283,
"learning_rate": 6.470459393213813e-06,
"loss": 0.0046,
"step": 16890
},
{
"grad_norm": 0.16111595928668976,
"learning_rate": 6.429842916576279e-06,
"loss": 0.0049,
"step": 16900
},
{
"grad_norm": 0.1468374878168106,
"learning_rate": 6.389345559084503e-06,
"loss": 0.0036,
"step": 16910
},
{
"grad_norm": 0.17139410972595215,
"learning_rate": 6.348967431456682e-06,
"loss": 0.0064,
"step": 16920
},
{
"grad_norm": 0.17251119017601013,
"learning_rate": 6.30870864408511e-06,
"loss": 0.0036,
"step": 16930
},
{
"grad_norm": 0.14736191928386688,
"learning_rate": 6.268569307035754e-06,
"loss": 0.0041,
"step": 16940
},
{
"grad_norm": 0.1500120311975479,
"learning_rate": 6.228549530048022e-06,
"loss": 0.0058,
"step": 16950
},
{
"grad_norm": 0.14884720742702484,
"learning_rate": 6.1886494225344814e-06,
"loss": 0.0064,
"step": 16960
},
{
"grad_norm": 0.15160712599754333,
"learning_rate": 6.148869093580479e-06,
"loss": 0.0056,
"step": 16970
},
{
"grad_norm": 0.1517210155725479,
"learning_rate": 6.109208651943921e-06,
"loss": 0.0047,
"step": 16980
},
{
"grad_norm": 0.155134379863739,
"learning_rate": 6.069668206054946e-06,
"loss": 0.0038,
"step": 16990
},
{
"grad_norm": 0.14380772411823273,
"learning_rate": 6.0302478640156145e-06,
"loss": 0.0053,
"step": 17000
},
{
"grad_norm": 0.16657549142837524,
"learning_rate": 5.990947733599644e-06,
"loss": 0.0068,
"step": 17010
},
{
"grad_norm": 0.16626840829849243,
"learning_rate": 5.951767922252105e-06,
"loss": 0.0059,
"step": 17020
},
{
"grad_norm": 0.14325283467769623,
"learning_rate": 5.912708537089068e-06,
"loss": 0.0039,
"step": 17030
},
{
"grad_norm": 0.15190692245960236,
"learning_rate": 5.873769684897434e-06,
"loss": 0.005,
"step": 17040
},
{
"grad_norm": 0.13617077469825745,
"learning_rate": 5.834951472134514e-06,
"loss": 0.0059,
"step": 17050
},
{
"grad_norm": 0.16800183057785034,
"learning_rate": 5.796254004927832e-06,
"loss": 0.0052,
"step": 17060
},
{
"grad_norm": 0.1636773645877838,
"learning_rate": 5.757677389074806e-06,
"loss": 0.0047,
"step": 17070
},
{
"grad_norm": 0.11355911195278168,
"learning_rate": 5.719221730042385e-06,
"loss": 0.0033,
"step": 17080
},
{
"grad_norm": 0.1289564073085785,
"learning_rate": 5.680887132966911e-06,
"loss": 0.0057,
"step": 17090
},
{
"grad_norm": 0.14525209367275238,
"learning_rate": 5.642673702653683e-06,
"loss": 0.004,
"step": 17100
},
{
"grad_norm": 0.18314318358898163,
"learning_rate": 5.604581543576781e-06,
"loss": 0.0041,
"step": 17110
},
{
"grad_norm": 0.17528216540813446,
"learning_rate": 5.566610759878704e-06,
"loss": 0.0052,
"step": 17120
},
{
"grad_norm": 0.1443951427936554,
"learning_rate": 5.528761455370119e-06,
"loss": 0.0041,
"step": 17130
},
{
"grad_norm": 0.20061659812927246,
"learning_rate": 5.491033733529594e-06,
"loss": 0.0051,
"step": 17140
},
{
"grad_norm": 0.12190601229667664,
"learning_rate": 5.453427697503255e-06,
"loss": 0.0049,
"step": 17150
},
{
"grad_norm": 0.13548479974269867,
"learning_rate": 5.415943450104599e-06,
"loss": 0.0047,
"step": 17160
},
{
"grad_norm": 0.15832222998142242,
"learning_rate": 5.378581093814111e-06,
"loss": 0.0037,
"step": 17170
},
{
"grad_norm": 0.10824442654848099,
"learning_rate": 5.3413407307790375e-06,
"loss": 0.0041,
"step": 17180
},
{
"grad_norm": 0.18595543503761292,
"learning_rate": 5.30422246281313e-06,
"loss": 0.0063,
"step": 17190
},
{
"grad_norm": 0.13622011244297028,
"learning_rate": 5.267226391396296e-06,
"loss": 0.0032,
"step": 17200
},
{
"grad_norm": 0.10188230872154236,
"learning_rate": 5.2303526176744e-06,
"loss": 0.003,
"step": 17210
},
{
"grad_norm": 0.13925378024578094,
"learning_rate": 5.193601242458929e-06,
"loss": 0.0043,
"step": 17220
},
{
"grad_norm": 0.11982742697000504,
"learning_rate": 5.156972366226714e-06,
"loss": 0.0058,
"step": 17230
},
{
"grad_norm": 0.1453980952501297,
"learning_rate": 5.120466089119735e-06,
"loss": 0.0052,
"step": 17240
},
{
"grad_norm": 0.12326829135417938,
"learning_rate": 5.084082510944749e-06,
"loss": 0.0039,
"step": 17250
},
{
"grad_norm": 0.17726564407348633,
"learning_rate": 5.047821731173058e-06,
"loss": 0.0047,
"step": 17260
},
{
"grad_norm": 0.15553253889083862,
"learning_rate": 5.011683848940274e-06,
"loss": 0.0042,
"step": 17270
},
{
"grad_norm": 0.11963417381048203,
"learning_rate": 4.975668963045954e-06,
"loss": 0.004,
"step": 17280
},
{
"grad_norm": 0.12937189638614655,
"learning_rate": 4.9397771719534525e-06,
"loss": 0.0032,
"step": 17290
},
{
"grad_norm": 0.12897104024887085,
"learning_rate": 4.904008573789548e-06,
"loss": 0.0045,
"step": 17300
},
{
"grad_norm": 0.09982036799192429,
"learning_rate": 4.8683632663442005e-06,
"loss": 0.0035,
"step": 17310
},
{
"grad_norm": 0.15100431442260742,
"learning_rate": 4.832841347070343e-06,
"loss": 0.0047,
"step": 17320
},
{
"grad_norm": 0.175477996468544,
"learning_rate": 4.797442913083539e-06,
"loss": 0.0059,
"step": 17330
},
{
"grad_norm": 0.14300963282585144,
"learning_rate": 4.7621680611617596e-06,
"loss": 0.0038,
"step": 17340
},
{
"grad_norm": 0.2541049122810364,
"learning_rate": 4.727016887745095e-06,
"loss": 0.0053,
"step": 17350
},
{
"grad_norm": 0.12595532834529877,
"learning_rate": 4.691989488935511e-06,
"loss": 0.0041,
"step": 17360
},
{
"grad_norm": 0.11065539717674255,
"learning_rate": 4.657085960496588e-06,
"loss": 0.0036,
"step": 17370
},
{
"grad_norm": 0.09352098405361176,
"learning_rate": 4.6223063978532265e-06,
"loss": 0.0052,
"step": 17380
},
{
"grad_norm": 0.09857888519763947,
"learning_rate": 4.587650896091439e-06,
"loss": 0.0046,
"step": 17390
},
{
"grad_norm": 0.1510113924741745,
"learning_rate": 4.553119549958035e-06,
"loss": 0.0059,
"step": 17400
},
{
"grad_norm": 0.14610646665096283,
"learning_rate": 4.518712453860385e-06,
"loss": 0.0047,
"step": 17410
},
{
"grad_norm": 0.10412902384996414,
"learning_rate": 4.484429701866205e-06,
"loss": 0.0038,
"step": 17420
},
{
"grad_norm": 0.1347024291753769,
"learning_rate": 4.4502713877031975e-06,
"loss": 0.004,
"step": 17430
},
{
"grad_norm": 0.11108066141605377,
"learning_rate": 4.416237604758911e-06,
"loss": 0.0044,
"step": 17440
},
{
"grad_norm": 0.17151008546352386,
"learning_rate": 4.3823284460804025e-06,
"loss": 0.0054,
"step": 17450
},
{
"grad_norm": 0.1379305124282837,
"learning_rate": 4.348544004374011e-06,
"loss": 0.0033,
"step": 17460
},
{
"grad_norm": 0.1281946748495102,
"learning_rate": 4.314884372005123e-06,
"loss": 0.0049,
"step": 17470
},
{
"grad_norm": 0.15030136704444885,
"learning_rate": 4.281349640997867e-06,
"loss": 0.0044,
"step": 17480
},
{
"grad_norm": 0.11368031054735184,
"learning_rate": 4.247939903034942e-06,
"loss": 0.0045,
"step": 17490
},
{
"grad_norm": 0.10988582670688629,
"learning_rate": 4.214655249457284e-06,
"loss": 0.0043,
"step": 17500
},
{
"grad_norm": 0.13686129450798035,
"learning_rate": 4.181495771263855e-06,
"loss": 0.0047,
"step": 17510
},
{
"grad_norm": 0.08308565616607666,
"learning_rate": 4.148461559111427e-06,
"loss": 0.003,
"step": 17520
},
{
"grad_norm": 0.10827736556529999,
"learning_rate": 4.115552703314252e-06,
"loss": 0.0044,
"step": 17530
},
{
"grad_norm": 0.10387372225522995,
"learning_rate": 4.082769293843886e-06,
"loss": 0.0036,
"step": 17540
},
{
"grad_norm": 0.06729380041360855,
"learning_rate": 4.050111420328939e-06,
"loss": 0.0034,
"step": 17550
},
{
"grad_norm": 0.1021871566772461,
"learning_rate": 4.017579172054764e-06,
"loss": 0.0039,
"step": 17560
},
{
"grad_norm": 0.1491686850786209,
"learning_rate": 3.985172637963308e-06,
"loss": 0.0043,
"step": 17570
},
{
"grad_norm": 0.14715075492858887,
"learning_rate": 3.952891906652784e-06,
"loss": 0.0036,
"step": 17580
},
{
"grad_norm": 0.10194284468889236,
"learning_rate": 3.920737066377478e-06,
"loss": 0.0051,
"step": 17590
},
{
"grad_norm": 0.09443909674882889,
"learning_rate": 3.888708205047509e-06,
"loss": 0.003,
"step": 17600
},
{
"grad_norm": 0.13160809874534607,
"learning_rate": 3.856805410228542e-06,
"loss": 0.0045,
"step": 17610
},
{
"grad_norm": 0.10030721873044968,
"learning_rate": 3.82502876914162e-06,
"loss": 0.0038,
"step": 17620
},
{
"grad_norm": 0.11953095346689224,
"learning_rate": 3.7933783686628586e-06,
"loss": 0.005,
"step": 17630
},
{
"grad_norm": 0.09695965051651001,
"learning_rate": 3.7618542953232306e-06,
"loss": 0.0046,
"step": 17640
},
{
"grad_norm": 0.07293650507926941,
"learning_rate": 3.7304566353083658e-06,
"loss": 0.0029,
"step": 17650
},
{
"grad_norm": 0.09447503089904785,
"learning_rate": 3.6991854744582555e-06,
"loss": 0.003,
"step": 17660
},
{
"grad_norm": 0.127981036901474,
"learning_rate": 3.6680408982670777e-06,
"loss": 0.0039,
"step": 17670
},
{
"grad_norm": 0.10943655669689178,
"learning_rate": 3.637022991882899e-06,
"loss": 0.0054,
"step": 17680
},
{
"grad_norm": 0.1618594229221344,
"learning_rate": 3.606131840107485e-06,
"loss": 0.0046,
"step": 17690
},
{
"grad_norm": 0.15760213136672974,
"learning_rate": 3.575367527396084e-06,
"loss": 0.0045,
"step": 17700
},
{
"grad_norm": 0.11885190010070801,
"learning_rate": 3.5447301378571386e-06,
"loss": 0.0034,
"step": 17710
},
{
"grad_norm": 0.10916793346405029,
"learning_rate": 3.514219755252113e-06,
"loss": 0.0054,
"step": 17720
},
{
"grad_norm": 0.11321078985929489,
"learning_rate": 3.4838364629952213e-06,
"loss": 0.0045,
"step": 17730
},
{
"grad_norm": 0.08594832569360733,
"learning_rate": 3.4535803441532123e-06,
"loss": 0.0066,
"step": 17740
},
{
"grad_norm": 0.13212276995182037,
"learning_rate": 3.4234514814451836e-06,
"loss": 0.0037,
"step": 17750
},
{
"grad_norm": 0.1763709932565689,
"learning_rate": 3.393449957242273e-06,
"loss": 0.0076,
"step": 17760
},
{
"grad_norm": 0.13847731053829193,
"learning_rate": 3.363575853567524e-06,
"loss": 0.0043,
"step": 17770
},
{
"grad_norm": 0.1049138754606247,
"learning_rate": 3.3338292520955826e-06,
"loss": 0.0049,
"step": 17780
},
{
"grad_norm": 0.11970741301774979,
"learning_rate": 3.304210234152516e-06,
"loss": 0.0046,
"step": 17790
},
{
"grad_norm": 0.1296226680278778,
"learning_rate": 3.2747188807155993e-06,
"loss": 0.0038,
"step": 17800
},
{
"grad_norm": 0.09861905127763748,
"learning_rate": 3.2453552724130643e-06,
"loss": 0.0053,
"step": 17810
},
{
"grad_norm": 0.12149041891098022,
"learning_rate": 3.216119489523889e-06,
"loss": 0.0047,
"step": 17820
},
{
"grad_norm": 0.12130892276763916,
"learning_rate": 3.1870116119775917e-06,
"loss": 0.0036,
"step": 17830
},
{
"grad_norm": 0.08184216916561127,
"learning_rate": 3.158031719353999e-06,
"loss": 0.0047,
"step": 17840
},
{
"grad_norm": 0.12849783897399902,
"learning_rate": 3.1291798908830273e-06,
"loss": 0.0037,
"step": 17850
},
{
"grad_norm": 0.0973939523100853,
"learning_rate": 3.1004562054444853e-06,
"loss": 0.004,
"step": 17860
},
{
"grad_norm": 0.10297653824090958,
"learning_rate": 3.071860741567806e-06,
"loss": 0.0056,
"step": 17870
},
{
"grad_norm": 0.1703691929578781,
"learning_rate": 3.04339357743193e-06,
"loss": 0.0036,
"step": 17880
},
{
"grad_norm": 0.09887855499982834,
"learning_rate": 3.0150547908649628e-06,
"loss": 0.0048,
"step": 17890
},
{
"grad_norm": 0.17256423830986023,
"learning_rate": 2.9868444593440957e-06,
"loss": 0.0044,
"step": 17900
},
{
"grad_norm": 0.14721833169460297,
"learning_rate": 2.9587626599952846e-06,
"loss": 0.0052,
"step": 17910
},
{
"grad_norm": 0.10446737706661224,
"learning_rate": 2.930809469593082e-06,
"loss": 0.0058,
"step": 17920
},
{
"grad_norm": 0.1434236317873001,
"learning_rate": 2.9029849645604733e-06,
"loss": 0.0038,
"step": 17930
},
{
"grad_norm": 0.1383984535932541,
"learning_rate": 2.8752892209685632e-06,
"loss": 0.0054,
"step": 17940
},
{
"grad_norm": 0.10035323351621628,
"learning_rate": 2.847722314536483e-06,
"loss": 0.0033,
"step": 17950
},
{
"grad_norm": 0.11949007958173752,
"learning_rate": 2.820284320631078e-06,
"loss": 0.0063,
"step": 17960
},
{
"grad_norm": 0.1266811639070511,
"learning_rate": 2.792975314266788e-06,
"loss": 0.0041,
"step": 17970
},
{
"grad_norm": 0.12894940376281738,
"learning_rate": 2.7657953701054007e-06,
"loss": 0.0037,
"step": 17980
},
{
"grad_norm": 0.08531206846237183,
"learning_rate": 2.7387445624558306e-06,
"loss": 0.0056,
"step": 17990
},
{
"grad_norm": 0.09096170216798782,
"learning_rate": 2.7118229652739747e-06,
"loss": 0.0036,
"step": 18000
},
{
"grad_norm": 0.13481144607067108,
"learning_rate": 2.6850306521624236e-06,
"loss": 0.0047,
"step": 18010
},
{
"grad_norm": 0.0847296267747879,
"learning_rate": 2.6583676963703507e-06,
"loss": 0.0034,
"step": 18020
},
{
"grad_norm": 0.07553955912590027,
"learning_rate": 2.631834170793268e-06,
"loss": 0.0039,
"step": 18030
},
{
"grad_norm": 0.1235010102391243,
"learning_rate": 2.6054301479728036e-06,
"loss": 0.0031,
"step": 18040
},
{
"grad_norm": 0.07297364622354507,
"learning_rate": 2.579155700096575e-06,
"loss": 0.0046,
"step": 18050
},
{
"grad_norm": 0.10736757516860962,
"learning_rate": 2.5530108989978873e-06,
"loss": 0.006,
"step": 18060
},
{
"grad_norm": 0.11740373820066452,
"learning_rate": 2.5269958161556416e-06,
"loss": 0.0064,
"step": 18070
},
{
"grad_norm": 0.0979749783873558,
"learning_rate": 2.5011105226940888e-06,
"loss": 0.0034,
"step": 18080
},
{
"grad_norm": 0.13769516348838806,
"learning_rate": 2.4753550893826248e-06,
"loss": 0.0048,
"step": 18090
},
{
"grad_norm": 0.08744233101606369,
"learning_rate": 2.4497295866356296e-06,
"loss": 0.004,
"step": 18100
},
{
"grad_norm": 0.09951108694076538,
"learning_rate": 2.424234084512228e-06,
"loss": 0.0036,
"step": 18110
},
{
"grad_norm": 0.1635640263557434,
"learning_rate": 2.3988686527161687e-06,
"loss": 0.0074,
"step": 18120
},
{
"grad_norm": 0.14396807551383972,
"learning_rate": 2.373633360595573e-06,
"loss": 0.005,
"step": 18130
},
{
"grad_norm": 0.14187557995319366,
"learning_rate": 2.3485282771427585e-06,
"loss": 0.0028,
"step": 18140
},
{
"grad_norm": 0.08442353457212448,
"learning_rate": 2.3235534709940665e-06,
"loss": 0.0031,
"step": 18150
},
{
"grad_norm": 0.1230187937617302,
"learning_rate": 2.2987090104296617e-06,
"loss": 0.004,
"step": 18160
},
{
"grad_norm": 0.10204652696847916,
"learning_rate": 2.273994963373355e-06,
"loss": 0.0038,
"step": 18170
},
{
"grad_norm": 0.07479498535394669,
"learning_rate": 2.249411397392409e-06,
"loss": 0.0045,
"step": 18180
},
{
"grad_norm": 0.10246019065380096,
"learning_rate": 2.2249583796973506e-06,
"loss": 0.0048,
"step": 18190
},
{
"grad_norm": 0.16304340958595276,
"learning_rate": 2.200635977141796e-06,
"loss": 0.0047,
"step": 18200
},
{
"grad_norm": 0.1268751174211502,
"learning_rate": 2.17644425622226e-06,
"loss": 0.0042,
"step": 18210
},
{
"grad_norm": 0.07579465210437775,
"learning_rate": 2.152383283077991e-06,
"loss": 0.0042,
"step": 18220
},
{
"grad_norm": 0.11345085501670837,
"learning_rate": 2.128453123490781e-06,
"loss": 0.0049,
"step": 18230
},
{
"grad_norm": 0.1875070482492447,
"learning_rate": 2.1046538428847462e-06,
"loss": 0.0053,
"step": 18240
},
{
"grad_norm": 0.09485717862844467,
"learning_rate": 2.0809855063262273e-06,
"loss": 0.0042,
"step": 18250
},
{
"grad_norm": 0.07932358235120773,
"learning_rate": 2.057448178523558e-06,
"loss": 0.0043,
"step": 18260
},
{
"grad_norm": 0.11418958753347397,
"learning_rate": 2.034041923826885e-06,
"loss": 0.0051,
"step": 18270
},
{
"grad_norm": 0.08102735131978989,
"learning_rate": 2.0107668062280204e-06,
"loss": 0.0048,
"step": 18280
},
{
"grad_norm": 0.08639351278543472,
"learning_rate": 1.9876228893602357e-06,
"loss": 0.0037,
"step": 18290
},
{
"grad_norm": 0.09763824194669724,
"learning_rate": 1.9646102364981266e-06,
"loss": 0.004,
"step": 18300
},
{
"grad_norm": 0.08055881410837173,
"learning_rate": 1.9417289105574053e-06,
"loss": 0.0072,
"step": 18310
},
{
"grad_norm": 0.09894809126853943,
"learning_rate": 1.9189789740947427e-06,
"loss": 0.0036,
"step": 18320
},
{
"grad_norm": 0.10058688372373581,
"learning_rate": 1.896360489307597e-06,
"loss": 0.0063,
"step": 18330
},
{
"grad_norm": 0.1028638482093811,
"learning_rate": 1.8738735180340362e-06,
"loss": 0.004,
"step": 18340
},
{
"grad_norm": 0.0817807987332344,
"learning_rate": 1.8515181217525824e-06,
"loss": 0.0038,
"step": 18350
},
{
"grad_norm": 0.07613148540258408,
"learning_rate": 1.8292943615820457e-06,
"loss": 0.0036,
"step": 18360
},
{
"grad_norm": 0.05594291910529137,
"learning_rate": 1.8072022982813296e-06,
"loss": 0.0034,
"step": 18370
},
{
"grad_norm": 0.10103534907102585,
"learning_rate": 1.7852419922492925e-06,
"loss": 0.005,
"step": 18380
},
{
"grad_norm": 0.06014038249850273,
"learning_rate": 1.763413503524569e-06,
"loss": 0.0031,
"step": 18390
},
{
"grad_norm": 0.11110295355319977,
"learning_rate": 1.7417168917854165e-06,
"loss": 0.0032,
"step": 18400
},
{
"grad_norm": 0.08939255774021149,
"learning_rate": 1.720152216349552e-06,
"loss": 0.0034,
"step": 18410
},
{
"grad_norm": 0.08942482620477676,
"learning_rate": 1.6987195361739595e-06,
"loss": 0.0036,
"step": 18420
},
{
"grad_norm": 0.1863851249217987,
"learning_rate": 1.6774189098547832e-06,
"loss": 0.0051,
"step": 18430
},
{
"grad_norm": 0.12386361509561539,
"learning_rate": 1.6562503956271069e-06,
"loss": 0.0047,
"step": 18440
},
{
"grad_norm": 0.12683850526809692,
"learning_rate": 1.6352140513648417e-06,
"loss": 0.0043,
"step": 18450
},
{
"grad_norm": 0.09401161968708038,
"learning_rate": 1.6143099345805712e-06,
"loss": 0.0029,
"step": 18460
},
{
"grad_norm": 0.08010539412498474,
"learning_rate": 1.5935381024253293e-06,
"loss": 0.0035,
"step": 18470
},
{
"grad_norm": 0.05355304852128029,
"learning_rate": 1.572898611688517e-06,
"loss": 0.0034,
"step": 18480
},
{
"grad_norm": 0.12306501716375351,
"learning_rate": 1.5523915187977133e-06,
"loss": 0.0056,
"step": 18490
},
{
"grad_norm": 0.10398924350738525,
"learning_rate": 1.532016879818532e-06,
"loss": 0.0039,
"step": 18500
},
{
"grad_norm": 0.07574418187141418,
"learning_rate": 1.51177475045447e-06,
"loss": 0.0037,
"step": 18510
},
{
"grad_norm": 0.0989949107170105,
"learning_rate": 1.4916651860467035e-06,
"loss": 0.0038,
"step": 18520
},
{
"grad_norm": 0.06689750403165817,
"learning_rate": 1.471688241574043e-06,
"loss": 0.0031,
"step": 18530
},
{
"grad_norm": 0.09178074449300766,
"learning_rate": 1.451843971652672e-06,
"loss": 0.0041,
"step": 18540
},
{
"grad_norm": 0.12429051101207733,
"learning_rate": 1.432132430536076e-06,
"loss": 0.0042,
"step": 18550
},
{
"grad_norm": 0.08976944535970688,
"learning_rate": 1.412553672114869e-06,
"loss": 0.0035,
"step": 18560
},
{
"grad_norm": 0.07835699617862701,
"learning_rate": 1.3931077499166056e-06,
"loss": 0.0036,
"step": 18570
},
{
"grad_norm": 0.07831160724163055,
"learning_rate": 1.3737947171057085e-06,
"loss": 0.0035,
"step": 18580
},
{
"grad_norm": 0.08674316853284836,
"learning_rate": 1.3546146264832582e-06,
"loss": 0.0038,
"step": 18590
},
{
"grad_norm": 0.06477545201778412,
"learning_rate": 1.3355675304869086e-06,
"loss": 0.0037,
"step": 18600
},
{
"grad_norm": 0.11489281058311462,
"learning_rate": 1.3166534811906827e-06,
"loss": 0.005,
"step": 18610
},
{
"grad_norm": 0.1530413031578064,
"learning_rate": 1.2978725303048666e-06,
"loss": 0.0049,
"step": 18620
},
{
"grad_norm": 0.12522374093532562,
"learning_rate": 1.2792247291758762e-06,
"loss": 0.0045,
"step": 18630
},
{
"grad_norm": 0.06969575583934784,
"learning_rate": 1.2607101287860635e-06,
"loss": 0.0036,
"step": 18640
},
{
"grad_norm": 0.08598778396844864,
"learning_rate": 1.2423287797536654e-06,
"loss": 0.0033,
"step": 18650
},
{
"grad_norm": 0.06391648203134537,
"learning_rate": 1.2240807323325776e-06,
"loss": 0.0037,
"step": 18660
},
{
"grad_norm": 0.10801202058792114,
"learning_rate": 1.205966036412254e-06,
"loss": 0.0033,
"step": 18670
},
{
"grad_norm": 0.11867705732584,
"learning_rate": 1.1879847415175949e-06,
"loss": 0.0051,
"step": 18680
},
{
"grad_norm": 0.11671072244644165,
"learning_rate": 1.1701368968087712e-06,
"loss": 0.0046,
"step": 18690
},
{
"grad_norm": 0.05049500986933708,
"learning_rate": 1.1524225510811116e-06,
"loss": 0.0036,
"step": 18700
},
{
"grad_norm": 0.07130085676908493,
"learning_rate": 1.1348417527649535e-06,
"loss": 0.0034,
"step": 18710
},
{
"grad_norm": 0.054281286895275116,
"learning_rate": 1.1173945499255268e-06,
"loss": 0.0031,
"step": 18720
},
{
"grad_norm": 0.07449881732463837,
"learning_rate": 1.1000809902628307e-06,
"loss": 0.0052,
"step": 18730
},
{
"grad_norm": 0.09705372154712677,
"learning_rate": 1.082901121111468e-06,
"loss": 0.0034,
"step": 18740
},
{
"grad_norm": 0.10779692232608795,
"learning_rate": 1.0658549894405456e-06,
"loss": 0.0032,
"step": 18750
},
{
"grad_norm": 0.050938431173563004,
"learning_rate": 1.0489426418535342e-06,
"loss": 0.0026,
"step": 18760
},
{
"grad_norm": 0.07344609498977661,
"learning_rate": 1.0321641245881474e-06,
"loss": 0.003,
"step": 18770
},
{
"grad_norm": 0.0742596834897995,
"learning_rate": 1.015519483516214e-06,
"loss": 0.0037,
"step": 18780
},
{
"grad_norm": 0.06837273389101028,
"learning_rate": 9.990087641435443e-07,
"loss": 0.003,
"step": 18790
},
{
"grad_norm": 0.08643893897533417,
"learning_rate": 9.826320116098132e-07,
"loss": 0.0044,
"step": 18800
},
{
"grad_norm": 0.04812360182404518,
"learning_rate": 9.663892706884447e-07,
"loss": 0.0031,
"step": 18810
},
{
"grad_norm": 0.08176058530807495,
"learning_rate": 9.502805857864616e-07,
"loss": 0.0061,
"step": 18820
},
{
"grad_norm": 0.08198662847280502,
"learning_rate": 9.34306000944396e-07,
"loss": 0.0039,
"step": 18830
},
{
"grad_norm": 0.07465307414531708,
"learning_rate": 9.184655598361624e-07,
"loss": 0.0034,
"step": 18840
},
{
"grad_norm": 0.07182762771844864,
"learning_rate": 9.027593057689076e-07,
"loss": 0.0038,
"step": 18850
},
{
"grad_norm": 0.0734962522983551,
"learning_rate": 8.871872816829441e-07,
"loss": 0.0031,
"step": 18860
},
{
"grad_norm": 0.06601841002702713,
"learning_rate": 8.717495301515777e-07,
"loss": 0.0028,
"step": 18870
},
{
"grad_norm": 0.10713382810354233,
"learning_rate": 8.564460933810415e-07,
"loss": 0.0044,
"step": 18880
},
{
"grad_norm": 0.09342646598815918,
"learning_rate": 8.412770132103453e-07,
"loss": 0.006,
"step": 18890
},
{
"grad_norm": 0.0634833425283432,
"learning_rate": 8.262423311111711e-07,
"loss": 0.0034,
"step": 18900
},
{
"grad_norm": 0.07692574709653854,
"learning_rate": 8.113420881877665e-07,
"loss": 0.0037,
"step": 18910
},
{
"grad_norm": 0.07447660714387894,
"learning_rate": 7.965763251768288e-07,
"loss": 0.004,
"step": 18920
},
{
"grad_norm": 0.1114739403128624,
"learning_rate": 7.819450824473995e-07,
"loss": 0.0046,
"step": 18930
},
{
"grad_norm": 0.07135684788227081,
"learning_rate": 7.674484000007198e-07,
"loss": 0.004,
"step": 18940
},
{
"grad_norm": 0.08017963916063309,
"learning_rate": 7.530863174701752e-07,
"loss": 0.0042,
"step": 18950
},
{
"grad_norm": 0.08928892016410828,
"learning_rate": 7.38858874121151e-07,
"loss": 0.0052,
"step": 18960
},
{
"grad_norm": 0.1515638679265976,
"learning_rate": 7.247661088509328e-07,
"loss": 0.0039,
"step": 18970
},
{
"grad_norm": 0.09839137643575668,
"learning_rate": 7.108080601886002e-07,
"loss": 0.0042,
"step": 18980
},
{
"grad_norm": 0.060578133910894394,
"learning_rate": 6.969847662949336e-07,
"loss": 0.0043,
"step": 18990
},
{
"grad_norm": 0.07045385986566544,
"learning_rate": 6.832962649622798e-07,
"loss": 0.0043,
"step": 19000
},
{
"grad_norm": 0.11689253896474838,
"learning_rate": 6.697425936144863e-07,
"loss": 0.0033,
"step": 19010
},
{
"grad_norm": 0.051963455975055695,
"learning_rate": 6.563237893067731e-07,
"loss": 0.0035,
"step": 19020
},
{
"grad_norm": 0.08776947855949402,
"learning_rate": 6.430398887256328e-07,
"loss": 0.0041,
"step": 19030
},
{
"grad_norm": 0.053857170045375824,
"learning_rate": 6.298909281887478e-07,
"loss": 0.0029,
"step": 19040
},
{
"grad_norm": 0.1286555677652359,
"learning_rate": 6.168769436448673e-07,
"loss": 0.0033,
"step": 19050
},
{
"grad_norm": 0.08176587522029877,
"learning_rate": 6.03997970673742e-07,
"loss": 0.003,
"step": 19060
},
{
"grad_norm": 0.09932407736778259,
"learning_rate": 5.912540444859782e-07,
"loss": 0.0044,
"step": 19070
},
{
"grad_norm": 0.0911400094628334,
"learning_rate": 5.786451999229837e-07,
"loss": 0.007,
"step": 19080
},
{
"grad_norm": 0.0852760598063469,
"learning_rate": 5.661714714568722e-07,
"loss": 0.004,
"step": 19090
},
{
"grad_norm": 0.08842559158802032,
"learning_rate": 5.538328931903259e-07,
"loss": 0.0036,
"step": 19100
},
{
"grad_norm": 0.07818997651338577,
"learning_rate": 5.416294988565551e-07,
"loss": 0.0033,
"step": 19110
},
{
"grad_norm": 0.059740930795669556,
"learning_rate": 5.29561321819172e-07,
"loss": 0.004,
"step": 19120
},
{
"grad_norm": 0.07137617468833923,
"learning_rate": 5.176283950721061e-07,
"loss": 0.0034,
"step": 19130
},
{
"grad_norm": 0.08400587737560272,
"learning_rate": 5.058307512395332e-07,
"loss": 0.0037,
"step": 19140
},
{
"grad_norm": 0.19625791907310486,
"learning_rate": 4.941684225757526e-07,
"loss": 0.0035,
"step": 19150
},
{
"grad_norm": 0.03737051039934158,
"learning_rate": 4.826414409651314e-07,
"loss": 0.0031,
"step": 19160
},
{
"grad_norm": 0.1145489290356636,
"learning_rate": 4.712498379219943e-07,
"loss": 0.0033,
"step": 19170
},
{
"grad_norm": 0.10778439044952393,
"learning_rate": 4.599936445905506e-07,
"loss": 0.0047,
"step": 19180
},
{
"grad_norm": 0.056398406624794006,
"learning_rate": 4.4887289174480594e-07,
"loss": 0.0035,
"step": 19190
},
{
"grad_norm": 0.05212453380227089,
"learning_rate": 4.378876097884621e-07,
"loss": 0.0036,
"step": 19200
},
{
"grad_norm": 0.0517825148999691,
"learning_rate": 4.2703782875487264e-07,
"loss": 0.0027,
"step": 19210
},
{
"grad_norm": 0.04213884100317955,
"learning_rate": 4.163235783069208e-07,
"loss": 0.0025,
"step": 19220
},
{
"grad_norm": 0.0969507098197937,
"learning_rate": 4.057448877369585e-07,
"loss": 0.0041,
"step": 19230
},
{
"grad_norm": 0.05608559772372246,
"learning_rate": 3.9530178596672295e-07,
"loss": 0.0037,
"step": 19240
},
{
"grad_norm": 0.056393902748823166,
"learning_rate": 3.849943015472479e-07,
"loss": 0.0047,
"step": 19250
},
{
"grad_norm": 0.10194101184606552,
"learning_rate": 3.748224626588137e-07,
"loss": 0.0031,
"step": 19260
},
{
"grad_norm": 0.0955602377653122,
"learning_rate": 3.647862971108307e-07,
"loss": 0.0035,
"step": 19270
},
{
"grad_norm": 0.06831927597522736,
"learning_rate": 3.5488583234179473e-07,
"loss": 0.0046,
"step": 19280
},
{
"grad_norm": 0.044883936643600464,
"learning_rate": 3.4512109541920413e-07,
"loss": 0.0041,
"step": 19290
},
{
"grad_norm": 0.046192340552806854,
"learning_rate": 3.354921130394706e-07,
"loss": 0.0026,
"step": 19300
},
{
"grad_norm": 0.04406873881816864,
"learning_rate": 3.259989115278639e-07,
"loss": 0.0029,
"step": 19310
},
{
"grad_norm": 0.07073112577199936,
"learning_rate": 3.1664151683843403e-07,
"loss": 0.0042,
"step": 19320
},
{
"grad_norm": 0.05474907159805298,
"learning_rate": 3.074199545539447e-07,
"loss": 0.0052,
"step": 19330
},
{
"grad_norm": 0.047355033457279205,
"learning_rate": 2.983342498857955e-07,
"loss": 0.0043,
"step": 19340
},
{
"grad_norm": 0.10388905555009842,
"learning_rate": 2.893844276739499e-07,
"loss": 0.003,
"step": 19350
},
{
"grad_norm": 0.10018634796142578,
"learning_rate": 2.8057051238688514e-07,
"loss": 0.0038,
"step": 19360
},
{
"grad_norm": 0.03913424164056778,
"learning_rate": 2.71892528121509e-07,
"loss": 0.0032,
"step": 19370
},
{
"grad_norm": 0.07924854755401611,
"learning_rate": 2.633504986030988e-07,
"loss": 0.0043,
"step": 19380
},
{
"grad_norm": 0.09704583883285522,
"learning_rate": 2.549444471852347e-07,
"loss": 0.0055,
"step": 19390
},
{
"grad_norm": 0.05486253276467323,
"learning_rate": 2.4667439684974423e-07,
"loss": 0.0038,
"step": 19400
},
{
"grad_norm": 0.12703447043895721,
"learning_rate": 2.3854037020662467e-07,
"loss": 0.0056,
"step": 19410
},
{
"grad_norm": 0.06610122323036194,
"learning_rate": 2.3054238949399288e-07,
"loss": 0.0027,
"step": 19420
},
{
"grad_norm": 0.08677727729082108,
"learning_rate": 2.2268047657802993e-07,
"loss": 0.0037,
"step": 19430
},
{
"grad_norm": 0.06518864631652832,
"learning_rate": 2.149546529529034e-07,
"loss": 0.0039,
"step": 19440
},
{
"grad_norm": 0.11082996428012848,
"learning_rate": 2.0736493974071736e-07,
"loss": 0.004,
"step": 19450
},
{
"grad_norm": 0.036419276148080826,
"learning_rate": 1.9991135769145686e-07,
"loss": 0.003,
"step": 19460
},
{
"grad_norm": 0.0653953105211258,
"learning_rate": 1.9259392718293245e-07,
"loss": 0.0042,
"step": 19470
},
{
"grad_norm": 0.09075991064310074,
"learning_rate": 1.8541266822072467e-07,
"loss": 0.0056,
"step": 19480
},
{
"grad_norm": 0.06219499185681343,
"learning_rate": 1.7836760043811184e-07,
"loss": 0.0033,
"step": 19490
},
{
"grad_norm": 0.11633436381816864,
"learning_rate": 1.7145874309604792e-07,
"loss": 0.0031,
"step": 19500
},
{
"grad_norm": 0.11897324025630951,
"learning_rate": 1.6468611508308474e-07,
"loss": 0.0054,
"step": 19510
},
{
"grad_norm": 0.055197566747665405,
"learning_rate": 1.5804973491532204e-07,
"loss": 0.0032,
"step": 19520
},
{
"grad_norm": 0.07045387476682663,
"learning_rate": 1.5154962073637424e-07,
"loss": 0.0043,
"step": 19530
},
{
"grad_norm": 0.06476310640573502,
"learning_rate": 1.4518579031730372e-07,
"loss": 0.0034,
"step": 19540
},
{
"grad_norm": 0.10052386671304703,
"learning_rate": 1.389582610565876e-07,
"loss": 0.0041,
"step": 19550
},
{
"grad_norm": 0.12584728002548218,
"learning_rate": 1.3286704998003995e-07,
"loss": 0.0034,
"step": 19560
},
{
"grad_norm": 0.04220246151089668,
"learning_rate": 1.2691217374080632e-07,
"loss": 0.005,
"step": 19570
},
{
"grad_norm": 0.0714455097913742,
"learning_rate": 1.2109364861929705e-07,
"loss": 0.0024,
"step": 19580
},
{
"grad_norm": 0.07059883326292038,
"learning_rate": 1.1541149052312628e-07,
"loss": 0.0045,
"step": 19590
},
{
"grad_norm": 0.05011598765850067,
"learning_rate": 1.0986571498710074e-07,
"loss": 0.0046,
"step": 19600
},
{
"grad_norm": 0.03681618720293045,
"learning_rate": 1.0445633717316438e-07,
"loss": 0.003,
"step": 19610
},
{
"grad_norm": 0.13256072998046875,
"learning_rate": 9.918337187034277e-08,
"loss": 0.0038,
"step": 19620
},
{
"grad_norm": 0.08195763826370239,
"learning_rate": 9.404683349472643e-08,
"loss": 0.0029,
"step": 19630
},
{
"grad_norm": 0.06682359427213669,
"learning_rate": 8.904673608940983e-08,
"loss": 0.005,
"step": 19640
},
{
"grad_norm": 0.11099401861429214,
"learning_rate": 8.418309332447471e-08,
"loss": 0.0044,
"step": 19650
},
{
"grad_norm": 0.06852664053440094,
"learning_rate": 7.945591849692902e-08,
"loss": 0.0033,
"step": 19660
},
{
"grad_norm": 0.10303331166505814,
"learning_rate": 7.486522453069578e-08,
"loss": 0.0044,
"step": 19670
},
{
"grad_norm": 0.11328206211328506,
"learning_rate": 7.041102397655208e-08,
"loss": 0.0048,
"step": 19680
},
{
"grad_norm": 0.053033504635095596,
"learning_rate": 6.609332901210685e-08,
"loss": 0.0031,
"step": 19690
},
{
"grad_norm": 0.06228000670671463,
"learning_rate": 6.191215144178419e-08,
"loss": 0.0028,
"step": 19700
},
{
"grad_norm": 0.06171086058020592,
"learning_rate": 5.786750269675678e-08,
"loss": 0.0041,
"step": 19710
},
{
"grad_norm": 0.11801750957965851,
"learning_rate": 5.395939383494031e-08,
"loss": 0.0035,
"step": 19720
},
{
"grad_norm": 0.0650201365351677,
"learning_rate": 5.018783554095463e-08,
"loss": 0.0061,
"step": 19730
},
{
"grad_norm": 0.06774918735027313,
"learning_rate": 4.655283812610156e-08,
"loss": 0.0037,
"step": 19740
},
{
"grad_norm": 0.05833545699715614,
"learning_rate": 4.305441152831491e-08,
"loss": 0.0036,
"step": 19750
},
{
"grad_norm": 0.061846163123846054,
"learning_rate": 3.9692565312171584e-08,
"loss": 0.0026,
"step": 19760
},
{
"grad_norm": 0.11921104788780212,
"learning_rate": 3.6467308668824975e-08,
"loss": 0.0042,
"step": 19770
},
{
"grad_norm": 0.0699174776673317,
"learning_rate": 3.3378650416004964e-08,
"loss": 0.0039,
"step": 19780
},
{
"grad_norm": 0.05499502271413803,
"learning_rate": 3.042659899797906e-08,
"loss": 0.0035,
"step": 19790
},
{
"grad_norm": 0.050804853439331055,
"learning_rate": 2.76111624855524e-08,
"loss": 0.003,
"step": 19800
},
{
"grad_norm": 0.08485697954893112,
"learning_rate": 2.4932348576017784e-08,
"loss": 0.0034,
"step": 19810
},
{
"grad_norm": 0.11028487235307693,
"learning_rate": 2.239016459314458e-08,
"loss": 0.0039,
"step": 19820
},
{
"grad_norm": 0.11637432128190994,
"learning_rate": 1.9984617487173174e-08,
"loss": 0.004,
"step": 19830
},
{
"grad_norm": 0.07264947891235352,
"learning_rate": 1.7715713834776105e-08,
"loss": 0.0038,
"step": 19840
},
{
"grad_norm": 0.0735667273402214,
"learning_rate": 1.5583459839046964e-08,
"loss": 0.0037,
"step": 19850
},
{
"grad_norm": 0.09691016376018524,
"learning_rate": 1.3587861329489304e-08,
"loss": 0.0048,
"step": 19860
},
{
"grad_norm": 0.08705077320337296,
"learning_rate": 1.1728923761994415e-08,
"loss": 0.0046,
"step": 19870
},
{
"grad_norm": 0.06617732346057892,
"learning_rate": 1.0006652218819135e-08,
"loss": 0.0049,
"step": 19880
},
{
"grad_norm": 0.060623060911893845,
"learning_rate": 8.421051408596947e-09,
"loss": 0.0052,
"step": 19890
},
{
"grad_norm": 0.10678309947252274,
"learning_rate": 6.972125666299123e-09,
"loss": 0.0043,
"step": 19900
},
{
"grad_norm": 0.11491920053958893,
"learning_rate": 5.659878953229169e-09,
"loss": 0.0058,
"step": 19910
},
{
"grad_norm": 0.0382302887737751,
"learning_rate": 4.48431485701728e-09,
"loss": 0.0035,
"step": 19920
},
{
"grad_norm": 0.10758187621831894,
"learning_rate": 3.4454365916203322e-09,
"loss": 0.0036,
"step": 19930
},
{
"grad_norm": 0.11459647119045258,
"learning_rate": 2.5432469972830332e-09,
"loss": 0.0041,
"step": 19940
},
{
"grad_norm": 0.10030755400657654,
"learning_rate": 1.7777485405601203e-09,
"loss": 0.0031,
"step": 19950
},
{
"grad_norm": 0.0468946136534214,
"learning_rate": 1.1489433142941597e-09,
"loss": 0.004,
"step": 19960
},
{
"grad_norm": 0.12162579596042633,
"learning_rate": 6.568330376210963e-10,
"loss": 0.0043,
"step": 19970
},
{
"grad_norm": 0.051232777535915375,
"learning_rate": 3.0141905594249787e-10,
"loss": 0.0031,
"step": 19980
},
{
"grad_norm": 0.05374916270375252,
"learning_rate": 8.270234094776008e-11,
"loss": 0.0028,
"step": 19990
},
{
"grad_norm": 0.07233867049217224,
"learning_rate": 6.834906085551041e-13,
"loss": 0.0039,
"step": 20000
}
],
"logging_steps": 10,
"max_steps": 20000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 48,
"trial_name": null,
"trial_params": null
}