e-zorzi's picture
Add files using upload-large-folder tool
0957cba verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 20000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"grad_norm": 1.3594739437103271,
"learning_rate": 9e-07,
"loss": 1.1913,
"step": 10
},
{
"grad_norm": 1.0572824478149414,
"learning_rate": 1.9e-06,
"loss": 1.1841,
"step": 20
},
{
"grad_norm": 0.5717663764953613,
"learning_rate": 2.9e-06,
"loss": 1.1508,
"step": 30
},
{
"grad_norm": 0.3898443877696991,
"learning_rate": 3.9e-06,
"loss": 1.1205,
"step": 40
},
{
"grad_norm": 0.28664326667785645,
"learning_rate": 4.9000000000000005e-06,
"loss": 1.0888,
"step": 50
},
{
"grad_norm": 0.1729290783405304,
"learning_rate": 5.9e-06,
"loss": 1.0782,
"step": 60
},
{
"grad_norm": 0.17002208530902863,
"learning_rate": 6.900000000000001e-06,
"loss": 1.0691,
"step": 70
},
{
"grad_norm": 0.2152942717075348,
"learning_rate": 7.9e-06,
"loss": 1.0562,
"step": 80
},
{
"grad_norm": 0.19103780388832092,
"learning_rate": 8.9e-06,
"loss": 1.0479,
"step": 90
},
{
"grad_norm": 0.3243984878063202,
"learning_rate": 9.900000000000002e-06,
"loss": 1.0372,
"step": 100
},
{
"grad_norm": 0.1820673942565918,
"learning_rate": 1.09e-05,
"loss": 1.0272,
"step": 110
},
{
"grad_norm": 0.21819084882736206,
"learning_rate": 1.19e-05,
"loss": 1.0236,
"step": 120
},
{
"grad_norm": 0.20377595722675323,
"learning_rate": 1.29e-05,
"loss": 1.0237,
"step": 130
},
{
"grad_norm": 0.20572194457054138,
"learning_rate": 1.3900000000000002e-05,
"loss": 1.0228,
"step": 140
},
{
"grad_norm": 0.20157840847969055,
"learning_rate": 1.49e-05,
"loss": 1.0217,
"step": 150
},
{
"grad_norm": 0.23459017276763916,
"learning_rate": 1.59e-05,
"loss": 1.0192,
"step": 160
},
{
"grad_norm": 0.32469043135643005,
"learning_rate": 1.69e-05,
"loss": 1.0063,
"step": 170
},
{
"grad_norm": 0.36008527874946594,
"learning_rate": 1.79e-05,
"loss": 0.9873,
"step": 180
},
{
"grad_norm": 0.5633573532104492,
"learning_rate": 1.8900000000000002e-05,
"loss": 0.9672,
"step": 190
},
{
"grad_norm": 0.7019369006156921,
"learning_rate": 1.9900000000000003e-05,
"loss": 0.9315,
"step": 200
},
{
"grad_norm": 0.5538105964660645,
"learning_rate": 2.09e-05,
"loss": 0.8958,
"step": 210
},
{
"grad_norm": 0.5306029319763184,
"learning_rate": 2.19e-05,
"loss": 0.8707,
"step": 220
},
{
"grad_norm": 0.6606974005699158,
"learning_rate": 2.29e-05,
"loss": 0.8479,
"step": 230
},
{
"grad_norm": 0.8058410882949829,
"learning_rate": 2.39e-05,
"loss": 0.8169,
"step": 240
},
{
"grad_norm": 0.7277475595474243,
"learning_rate": 2.4900000000000002e-05,
"loss": 0.77,
"step": 250
},
{
"grad_norm": 0.6617355942726135,
"learning_rate": 2.5900000000000003e-05,
"loss": 0.7456,
"step": 260
},
{
"grad_norm": 0.8156651258468628,
"learning_rate": 2.6900000000000003e-05,
"loss": 0.6984,
"step": 270
},
{
"grad_norm": 0.7090954780578613,
"learning_rate": 2.7900000000000004e-05,
"loss": 0.6774,
"step": 280
},
{
"grad_norm": 0.8667084574699402,
"learning_rate": 2.8899999999999998e-05,
"loss": 0.6429,
"step": 290
},
{
"grad_norm": 0.946596622467041,
"learning_rate": 2.9900000000000002e-05,
"loss": 0.6052,
"step": 300
},
{
"grad_norm": 0.8120863437652588,
"learning_rate": 3.09e-05,
"loss": 0.5681,
"step": 310
},
{
"grad_norm": 0.9630921483039856,
"learning_rate": 3.19e-05,
"loss": 0.5267,
"step": 320
},
{
"grad_norm": 0.9185823798179626,
"learning_rate": 3.29e-05,
"loss": 0.497,
"step": 330
},
{
"grad_norm": 0.9909350872039795,
"learning_rate": 3.3900000000000004e-05,
"loss": 0.4704,
"step": 340
},
{
"grad_norm": 0.7408623695373535,
"learning_rate": 3.49e-05,
"loss": 0.4463,
"step": 350
},
{
"grad_norm": 0.8417967557907104,
"learning_rate": 3.59e-05,
"loss": 0.4515,
"step": 360
},
{
"grad_norm": 0.9200495481491089,
"learning_rate": 3.69e-05,
"loss": 0.417,
"step": 370
},
{
"grad_norm": 1.146302342414856,
"learning_rate": 3.79e-05,
"loss": 0.3937,
"step": 380
},
{
"grad_norm": 1.0057293176651,
"learning_rate": 3.8900000000000004e-05,
"loss": 0.3773,
"step": 390
},
{
"grad_norm": 1.112216591835022,
"learning_rate": 3.99e-05,
"loss": 0.348,
"step": 400
},
{
"grad_norm": 1.0176512002944946,
"learning_rate": 4.09e-05,
"loss": 0.3392,
"step": 410
},
{
"grad_norm": 1.0310163497924805,
"learning_rate": 4.19e-05,
"loss": 0.3065,
"step": 420
},
{
"grad_norm": 1.022374153137207,
"learning_rate": 4.29e-05,
"loss": 0.2808,
"step": 430
},
{
"grad_norm": 1.368080735206604,
"learning_rate": 4.39e-05,
"loss": 0.2624,
"step": 440
},
{
"grad_norm": 1.1092591285705566,
"learning_rate": 4.49e-05,
"loss": 0.2405,
"step": 450
},
{
"grad_norm": 0.9738430380821228,
"learning_rate": 4.5900000000000004e-05,
"loss": 0.2254,
"step": 460
},
{
"grad_norm": 1.033246636390686,
"learning_rate": 4.69e-05,
"loss": 0.2162,
"step": 470
},
{
"grad_norm": 0.9855560064315796,
"learning_rate": 4.79e-05,
"loss": 0.2088,
"step": 480
},
{
"grad_norm": 1.0313360691070557,
"learning_rate": 4.89e-05,
"loss": 0.2188,
"step": 490
},
{
"grad_norm": 1.100176215171814,
"learning_rate": 4.99e-05,
"loss": 0.2007,
"step": 500
},
{
"grad_norm": 1.0784265995025635,
"learning_rate": 5.0900000000000004e-05,
"loss": 0.2016,
"step": 510
},
{
"grad_norm": 1.0822303295135498,
"learning_rate": 5.19e-05,
"loss": 0.1961,
"step": 520
},
{
"grad_norm": 1.067589282989502,
"learning_rate": 5.2900000000000005e-05,
"loss": 0.1801,
"step": 530
},
{
"grad_norm": 1.1917147636413574,
"learning_rate": 5.390000000000001e-05,
"loss": 0.1705,
"step": 540
},
{
"grad_norm": 1.3141072988510132,
"learning_rate": 5.4900000000000006e-05,
"loss": 0.1851,
"step": 550
},
{
"grad_norm": 1.002855658531189,
"learning_rate": 5.590000000000001e-05,
"loss": 0.1663,
"step": 560
},
{
"grad_norm": 1.167011022567749,
"learning_rate": 5.69e-05,
"loss": 0.1741,
"step": 570
},
{
"grad_norm": 1.0936863422393799,
"learning_rate": 5.79e-05,
"loss": 0.1661,
"step": 580
},
{
"grad_norm": 0.9669778347015381,
"learning_rate": 5.89e-05,
"loss": 0.1648,
"step": 590
},
{
"grad_norm": 0.9405611753463745,
"learning_rate": 5.99e-05,
"loss": 0.1627,
"step": 600
},
{
"grad_norm": 1.0284767150878906,
"learning_rate": 6.09e-05,
"loss": 0.1496,
"step": 610
},
{
"grad_norm": 1.1097605228424072,
"learning_rate": 6.19e-05,
"loss": 0.1628,
"step": 620
},
{
"grad_norm": 0.9104214310646057,
"learning_rate": 6.29e-05,
"loss": 0.1302,
"step": 630
},
{
"grad_norm": 0.8578998446464539,
"learning_rate": 6.390000000000001e-05,
"loss": 0.1326,
"step": 640
},
{
"grad_norm": 1.1287304162979126,
"learning_rate": 6.49e-05,
"loss": 0.1127,
"step": 650
},
{
"grad_norm": 0.8655268549919128,
"learning_rate": 6.59e-05,
"loss": 0.1202,
"step": 660
},
{
"grad_norm": 0.9937160015106201,
"learning_rate": 6.690000000000001e-05,
"loss": 0.1198,
"step": 670
},
{
"grad_norm": 0.9691420197486877,
"learning_rate": 6.790000000000001e-05,
"loss": 0.1096,
"step": 680
},
{
"grad_norm": 1.0945252180099487,
"learning_rate": 6.89e-05,
"loss": 0.105,
"step": 690
},
{
"grad_norm": 1.0388752222061157,
"learning_rate": 6.99e-05,
"loss": 0.1027,
"step": 700
},
{
"grad_norm": 0.881949245929718,
"learning_rate": 7.09e-05,
"loss": 0.1044,
"step": 710
},
{
"grad_norm": 0.8678519129753113,
"learning_rate": 7.19e-05,
"loss": 0.0842,
"step": 720
},
{
"grad_norm": 1.2314260005950928,
"learning_rate": 7.29e-05,
"loss": 0.0841,
"step": 730
},
{
"grad_norm": 0.7337191700935364,
"learning_rate": 7.390000000000001e-05,
"loss": 0.0771,
"step": 740
},
{
"grad_norm": 1.194354772567749,
"learning_rate": 7.49e-05,
"loss": 0.0791,
"step": 750
},
{
"grad_norm": 1.0703870058059692,
"learning_rate": 7.59e-05,
"loss": 0.0697,
"step": 760
},
{
"grad_norm": 0.9820927977561951,
"learning_rate": 7.69e-05,
"loss": 0.0798,
"step": 770
},
{
"grad_norm": 1.099042534828186,
"learning_rate": 7.790000000000001e-05,
"loss": 0.0736,
"step": 780
},
{
"grad_norm": 0.9056155681610107,
"learning_rate": 7.890000000000001e-05,
"loss": 0.0756,
"step": 790
},
{
"grad_norm": 0.8292648792266846,
"learning_rate": 7.99e-05,
"loss": 0.0796,
"step": 800
},
{
"grad_norm": 0.9507290720939636,
"learning_rate": 8.090000000000001e-05,
"loss": 0.0829,
"step": 810
},
{
"grad_norm": 0.9466397762298584,
"learning_rate": 8.19e-05,
"loss": 0.0688,
"step": 820
},
{
"grad_norm": 0.7956731915473938,
"learning_rate": 8.29e-05,
"loss": 0.0747,
"step": 830
},
{
"grad_norm": 0.7995853424072266,
"learning_rate": 8.39e-05,
"loss": 0.0634,
"step": 840
},
{
"grad_norm": 0.7665478587150574,
"learning_rate": 8.49e-05,
"loss": 0.0661,
"step": 850
},
{
"grad_norm": 0.9283880591392517,
"learning_rate": 8.59e-05,
"loss": 0.0702,
"step": 860
},
{
"grad_norm": 1.126967191696167,
"learning_rate": 8.69e-05,
"loss": 0.0716,
"step": 870
},
{
"grad_norm": 0.8662194609642029,
"learning_rate": 8.790000000000001e-05,
"loss": 0.0667,
"step": 880
},
{
"grad_norm": 0.9572857022285461,
"learning_rate": 8.89e-05,
"loss": 0.0791,
"step": 890
},
{
"grad_norm": 0.9036967158317566,
"learning_rate": 8.99e-05,
"loss": 0.0745,
"step": 900
},
{
"grad_norm": 0.7550048828125,
"learning_rate": 9.090000000000001e-05,
"loss": 0.0746,
"step": 910
},
{
"grad_norm": 0.9990408420562744,
"learning_rate": 9.190000000000001e-05,
"loss": 0.0648,
"step": 920
},
{
"grad_norm": 0.8286410570144653,
"learning_rate": 9.290000000000001e-05,
"loss": 0.0697,
"step": 930
},
{
"grad_norm": 0.9783310890197754,
"learning_rate": 9.39e-05,
"loss": 0.0749,
"step": 940
},
{
"grad_norm": 0.9899768233299255,
"learning_rate": 9.49e-05,
"loss": 0.0722,
"step": 950
},
{
"grad_norm": 0.7450554370880127,
"learning_rate": 9.59e-05,
"loss": 0.0599,
"step": 960
},
{
"grad_norm": 0.7791635394096375,
"learning_rate": 9.69e-05,
"loss": 0.0654,
"step": 970
},
{
"grad_norm": 0.7614015340805054,
"learning_rate": 9.790000000000001e-05,
"loss": 0.0558,
"step": 980
},
{
"grad_norm": 0.9096309542655945,
"learning_rate": 9.89e-05,
"loss": 0.0581,
"step": 990
},
{
"grad_norm": 0.668950080871582,
"learning_rate": 9.99e-05,
"loss": 0.0652,
"step": 1000
},
{
"grad_norm": 0.8658283948898315,
"learning_rate": 9.999994463727085e-05,
"loss": 0.0529,
"step": 1010
},
{
"grad_norm": 0.7495288848876953,
"learning_rate": 9.999975326009292e-05,
"loss": 0.059,
"step": 1020
},
{
"grad_norm": 0.9980189204216003,
"learning_rate": 9.999942518549879e-05,
"loss": 0.0638,
"step": 1030
},
{
"grad_norm": 0.7826606035232544,
"learning_rate": 9.999896041438544e-05,
"loss": 0.0546,
"step": 1040
},
{
"grad_norm": 0.6360778212547302,
"learning_rate": 9.999835894802353e-05,
"loss": 0.054,
"step": 1050
},
{
"grad_norm": 0.7757160067558289,
"learning_rate": 9.999762078805743e-05,
"loss": 0.0591,
"step": 1060
},
{
"grad_norm": 0.7390689849853516,
"learning_rate": 9.999674593650526e-05,
"loss": 0.0595,
"step": 1070
},
{
"grad_norm": 0.6460424065589905,
"learning_rate": 9.99957343957588e-05,
"loss": 0.0658,
"step": 1080
},
{
"grad_norm": 0.8082983493804932,
"learning_rate": 9.99945861685836e-05,
"loss": 0.0596,
"step": 1090
},
{
"grad_norm": 0.7415626645088196,
"learning_rate": 9.999330125811884e-05,
"loss": 0.0483,
"step": 1100
},
{
"grad_norm": 0.8829818367958069,
"learning_rate": 9.999187966787744e-05,
"loss": 0.0619,
"step": 1110
},
{
"grad_norm": 0.8239393830299377,
"learning_rate": 9.999032140174595e-05,
"loss": 0.0528,
"step": 1120
},
{
"grad_norm": 0.8529507517814636,
"learning_rate": 9.998862646398464e-05,
"loss": 0.0654,
"step": 1130
},
{
"grad_norm": 0.7502208948135376,
"learning_rate": 9.998679485922739e-05,
"loss": 0.0526,
"step": 1140
},
{
"grad_norm": 0.6970030069351196,
"learning_rate": 9.998482659248174e-05,
"loss": 0.0547,
"step": 1150
},
{
"grad_norm": 0.9376399517059326,
"learning_rate": 9.998272166912883e-05,
"loss": 0.0557,
"step": 1160
},
{
"grad_norm": 0.7249330282211304,
"learning_rate": 9.998048009492347e-05,
"loss": 0.0504,
"step": 1170
},
{
"grad_norm": 0.8968970775604248,
"learning_rate": 9.997810187599403e-05,
"loss": 0.0526,
"step": 1180
},
{
"grad_norm": 0.7676458358764648,
"learning_rate": 9.997558701884249e-05,
"loss": 0.0506,
"step": 1190
},
{
"grad_norm": 0.6501711010932922,
"learning_rate": 9.997293553034433e-05,
"loss": 0.061,
"step": 1200
},
{
"grad_norm": 0.677116870880127,
"learning_rate": 9.997014741774866e-05,
"loss": 0.0462,
"step": 1210
},
{
"grad_norm": 0.8147766590118408,
"learning_rate": 9.996722268867803e-05,
"loss": 0.0486,
"step": 1220
},
{
"grad_norm": 0.706069827079773,
"learning_rate": 9.996416135112858e-05,
"loss": 0.0511,
"step": 1230
},
{
"grad_norm": 0.6159539818763733,
"learning_rate": 9.996096341346988e-05,
"loss": 0.0492,
"step": 1240
},
{
"grad_norm": 0.6369336843490601,
"learning_rate": 9.995762888444495e-05,
"loss": 0.0479,
"step": 1250
},
{
"grad_norm": 0.7543830275535583,
"learning_rate": 9.995415777317027e-05,
"loss": 0.0493,
"step": 1260
},
{
"grad_norm": 0.7505154609680176,
"learning_rate": 9.995055008913574e-05,
"loss": 0.053,
"step": 1270
},
{
"grad_norm": 0.5397493243217468,
"learning_rate": 9.994680584220463e-05,
"loss": 0.0432,
"step": 1280
},
{
"grad_norm": 0.6707198619842529,
"learning_rate": 9.994292504261355e-05,
"loss": 0.0472,
"step": 1290
},
{
"grad_norm": 0.8792182803153992,
"learning_rate": 9.993890770097247e-05,
"loss": 0.0453,
"step": 1300
},
{
"grad_norm": 0.7324561476707458,
"learning_rate": 9.993475382826467e-05,
"loss": 0.0479,
"step": 1310
},
{
"grad_norm": 0.8385289907455444,
"learning_rate": 9.993046343584664e-05,
"loss": 0.0549,
"step": 1320
},
{
"grad_norm": 0.5908923745155334,
"learning_rate": 9.992603653544816e-05,
"loss": 0.0483,
"step": 1330
},
{
"grad_norm": 0.63700932264328,
"learning_rate": 9.992147313917222e-05,
"loss": 0.0485,
"step": 1340
},
{
"grad_norm": 0.7525864839553833,
"learning_rate": 9.991677325949497e-05,
"loss": 0.0469,
"step": 1350
},
{
"grad_norm": 0.5628486275672913,
"learning_rate": 9.991193690926568e-05,
"loss": 0.0459,
"step": 1360
},
{
"grad_norm": 0.795554518699646,
"learning_rate": 9.990696410170678e-05,
"loss": 0.0467,
"step": 1370
},
{
"grad_norm": 0.7957155704498291,
"learning_rate": 9.990185485041371e-05,
"loss": 0.0481,
"step": 1380
},
{
"grad_norm": 0.5773254632949829,
"learning_rate": 9.989660916935498e-05,
"loss": 0.0471,
"step": 1390
},
{
"grad_norm": 0.6150880455970764,
"learning_rate": 9.989122707287208e-05,
"loss": 0.0426,
"step": 1400
},
{
"grad_norm": 0.7106145620346069,
"learning_rate": 9.988570857567945e-05,
"loss": 0.0537,
"step": 1410
},
{
"grad_norm": 0.9491516947746277,
"learning_rate": 9.988005369286446e-05,
"loss": 0.0525,
"step": 1420
},
{
"grad_norm": 0.6860232353210449,
"learning_rate": 9.987426243988734e-05,
"loss": 0.0429,
"step": 1430
},
{
"grad_norm": 0.7841853499412537,
"learning_rate": 9.986833483258114e-05,
"loss": 0.0524,
"step": 1440
},
{
"grad_norm": 0.6175568103790283,
"learning_rate": 9.986227088715173e-05,
"loss": 0.0385,
"step": 1450
},
{
"grad_norm": 0.5932314991950989,
"learning_rate": 9.98560706201777e-05,
"loss": 0.0408,
"step": 1460
},
{
"grad_norm": 0.7410153150558472,
"learning_rate": 9.984973404861036e-05,
"loss": 0.043,
"step": 1470
},
{
"grad_norm": 0.8330276608467102,
"learning_rate": 9.984326118977361e-05,
"loss": 0.051,
"step": 1480
},
{
"grad_norm": 0.7202706933021545,
"learning_rate": 9.983665206136406e-05,
"loss": 0.0493,
"step": 1490
},
{
"grad_norm": 0.574433445930481,
"learning_rate": 9.982990668145075e-05,
"loss": 0.0466,
"step": 1500
},
{
"grad_norm": 0.7351802587509155,
"learning_rate": 9.982302506847534e-05,
"loss": 0.057,
"step": 1510
},
{
"grad_norm": 0.819564163684845,
"learning_rate": 9.981600724125189e-05,
"loss": 0.0555,
"step": 1520
},
{
"grad_norm": 0.6065496206283569,
"learning_rate": 9.980885321896685e-05,
"loss": 0.0509,
"step": 1530
},
{
"grad_norm": 0.6572223901748657,
"learning_rate": 9.980156302117905e-05,
"loss": 0.044,
"step": 1540
},
{
"grad_norm": 0.6978927254676819,
"learning_rate": 9.979413666781963e-05,
"loss": 0.0465,
"step": 1550
},
{
"grad_norm": 0.5508580803871155,
"learning_rate": 9.978657417919193e-05,
"loss": 0.0452,
"step": 1560
},
{
"grad_norm": 0.5769541263580322,
"learning_rate": 9.977887557597153e-05,
"loss": 0.0475,
"step": 1570
},
{
"grad_norm": 0.5610742568969727,
"learning_rate": 9.97710408792061e-05,
"loss": 0.0469,
"step": 1580
},
{
"grad_norm": 0.5692776441574097,
"learning_rate": 9.976307011031542e-05,
"loss": 0.0449,
"step": 1590
},
{
"grad_norm": 0.5226185321807861,
"learning_rate": 9.975496329109126e-05,
"loss": 0.0476,
"step": 1600
},
{
"grad_norm": 0.7111744284629822,
"learning_rate": 9.974672044369732e-05,
"loss": 0.047,
"step": 1610
},
{
"grad_norm": 0.514858067035675,
"learning_rate": 9.97383415906693e-05,
"loss": 0.043,
"step": 1620
},
{
"grad_norm": 0.5856963396072388,
"learning_rate": 9.97298267549146e-05,
"loss": 0.0471,
"step": 1630
},
{
"grad_norm": 0.6191436052322388,
"learning_rate": 9.972117595971249e-05,
"loss": 0.0422,
"step": 1640
},
{
"grad_norm": 0.5670982599258423,
"learning_rate": 9.971238922871391e-05,
"loss": 0.0419,
"step": 1650
},
{
"grad_norm": 0.7190003991127014,
"learning_rate": 9.970346658594142e-05,
"loss": 0.0453,
"step": 1660
},
{
"grad_norm": 0.6552428007125854,
"learning_rate": 9.969440805578923e-05,
"loss": 0.046,
"step": 1670
},
{
"grad_norm": 0.578118622303009,
"learning_rate": 9.968521366302298e-05,
"loss": 0.0392,
"step": 1680
},
{
"grad_norm": 0.7054030895233154,
"learning_rate": 9.967588343277981e-05,
"loss": 0.0455,
"step": 1690
},
{
"grad_norm": 0.6531293392181396,
"learning_rate": 9.966641739056818e-05,
"loss": 0.0421,
"step": 1700
},
{
"grad_norm": 0.6111751198768616,
"learning_rate": 9.965681556226793e-05,
"loss": 0.0517,
"step": 1710
},
{
"grad_norm": 0.4928556978702545,
"learning_rate": 9.964707797413006e-05,
"loss": 0.044,
"step": 1720
},
{
"grad_norm": 0.6597058773040771,
"learning_rate": 9.963720465277679e-05,
"loss": 0.047,
"step": 1730
},
{
"grad_norm": 0.6202155351638794,
"learning_rate": 9.96271956252014e-05,
"loss": 0.0384,
"step": 1740
},
{
"grad_norm": 0.5262959599494934,
"learning_rate": 9.961705091876816e-05,
"loss": 0.0425,
"step": 1750
},
{
"grad_norm": 0.6935763955116272,
"learning_rate": 9.960677056121235e-05,
"loss": 0.0409,
"step": 1760
},
{
"grad_norm": 0.6149827837944031,
"learning_rate": 9.959635458064005e-05,
"loss": 0.0383,
"step": 1770
},
{
"grad_norm": 0.5901826024055481,
"learning_rate": 9.958580300552815e-05,
"loss": 0.0426,
"step": 1780
},
{
"grad_norm": 0.5597098469734192,
"learning_rate": 9.957511586472426e-05,
"loss": 0.0352,
"step": 1790
},
{
"grad_norm": 0.5581690073013306,
"learning_rate": 9.956429318744662e-05,
"loss": 0.0366,
"step": 1800
},
{
"grad_norm": 0.5969916582107544,
"learning_rate": 9.955333500328404e-05,
"loss": 0.0355,
"step": 1810
},
{
"grad_norm": 0.5474916696548462,
"learning_rate": 9.95422413421957e-05,
"loss": 0.0376,
"step": 1820
},
{
"grad_norm": 0.5651562809944153,
"learning_rate": 9.953101223451133e-05,
"loss": 0.0359,
"step": 1830
},
{
"grad_norm": 0.6243921518325806,
"learning_rate": 9.951964771093085e-05,
"loss": 0.0373,
"step": 1840
},
{
"grad_norm": 0.4624647796154022,
"learning_rate": 9.950814780252442e-05,
"loss": 0.0347,
"step": 1850
},
{
"grad_norm": 0.5893751382827759,
"learning_rate": 9.949651254073236e-05,
"loss": 0.0408,
"step": 1860
},
{
"grad_norm": 0.526287317276001,
"learning_rate": 9.948474195736504e-05,
"loss": 0.0388,
"step": 1870
},
{
"grad_norm": 0.6111840605735779,
"learning_rate": 9.947283608460277e-05,
"loss": 0.0346,
"step": 1880
},
{
"grad_norm": 0.46461328864097595,
"learning_rate": 9.946079495499577e-05,
"loss": 0.0411,
"step": 1890
},
{
"grad_norm": 0.610548734664917,
"learning_rate": 9.944861860146401e-05,
"loss": 0.0407,
"step": 1900
},
{
"grad_norm": 0.5339504480361938,
"learning_rate": 9.943630705729719e-05,
"loss": 0.0398,
"step": 1910
},
{
"grad_norm": 0.46559029817581177,
"learning_rate": 9.942386035615459e-05,
"loss": 0.039,
"step": 1920
},
{
"grad_norm": 0.7745798826217651,
"learning_rate": 9.941127853206503e-05,
"loss": 0.04,
"step": 1930
},
{
"grad_norm": 0.5811882019042969,
"learning_rate": 9.939856161942673e-05,
"loss": 0.0425,
"step": 1940
},
{
"grad_norm": 0.4856541156768799,
"learning_rate": 9.938570965300724e-05,
"loss": 0.0363,
"step": 1950
},
{
"grad_norm": 0.5952467918395996,
"learning_rate": 9.937272266794335e-05,
"loss": 0.0439,
"step": 1960
},
{
"grad_norm": 0.5669976472854614,
"learning_rate": 9.935960069974096e-05,
"loss": 0.05,
"step": 1970
},
{
"grad_norm": 0.5959198474884033,
"learning_rate": 9.934634378427506e-05,
"loss": 0.0382,
"step": 1980
},
{
"grad_norm": 0.520875096321106,
"learning_rate": 9.933295195778954e-05,
"loss": 0.0386,
"step": 1990
},
{
"grad_norm": 0.4351758360862732,
"learning_rate": 9.931942525689715e-05,
"loss": 0.0488,
"step": 2000
},
{
"grad_norm": 0.6345981359481812,
"learning_rate": 9.930576371857936e-05,
"loss": 0.0391,
"step": 2010
},
{
"grad_norm": 0.6230748295783997,
"learning_rate": 9.929196738018629e-05,
"loss": 0.0388,
"step": 2020
},
{
"grad_norm": 0.5425089001655579,
"learning_rate": 9.927803627943662e-05,
"loss": 0.0395,
"step": 2030
},
{
"grad_norm": 0.49332770705223083,
"learning_rate": 9.926397045441744e-05,
"loss": 0.039,
"step": 2040
},
{
"grad_norm": 0.6731558442115784,
"learning_rate": 9.924976994358417e-05,
"loss": 0.0427,
"step": 2050
},
{
"grad_norm": 0.5310463309288025,
"learning_rate": 9.923543478576048e-05,
"loss": 0.0474,
"step": 2060
},
{
"grad_norm": 0.548930823802948,
"learning_rate": 9.922096502013813e-05,
"loss": 0.0423,
"step": 2070
},
{
"grad_norm": 0.5744786262512207,
"learning_rate": 9.92063606862769e-05,
"loss": 0.0372,
"step": 2080
},
{
"grad_norm": 0.6390929222106934,
"learning_rate": 9.919162182410453e-05,
"loss": 0.0368,
"step": 2090
},
{
"grad_norm": 0.5252511501312256,
"learning_rate": 9.917674847391645e-05,
"loss": 0.038,
"step": 2100
},
{
"grad_norm": 0.5656434297561646,
"learning_rate": 9.916174067637584e-05,
"loss": 0.0333,
"step": 2110
},
{
"grad_norm": 0.5288258790969849,
"learning_rate": 9.914659847251348e-05,
"loss": 0.0406,
"step": 2120
},
{
"grad_norm": 0.5040147304534912,
"learning_rate": 9.913132190372753e-05,
"loss": 0.0369,
"step": 2130
},
{
"grad_norm": 0.5128138661384583,
"learning_rate": 9.911591101178359e-05,
"loss": 0.0368,
"step": 2140
},
{
"grad_norm": 0.4942684769630432,
"learning_rate": 9.910036583881443e-05,
"loss": 0.0334,
"step": 2150
},
{
"grad_norm": 0.5318565368652344,
"learning_rate": 9.908468642731995e-05,
"loss": 0.0325,
"step": 2160
},
{
"grad_norm": 0.5772367715835571,
"learning_rate": 9.906887282016707e-05,
"loss": 0.0344,
"step": 2170
},
{
"grad_norm": 0.5957911014556885,
"learning_rate": 9.90529250605896e-05,
"loss": 0.0368,
"step": 2180
},
{
"grad_norm": 0.6259480714797974,
"learning_rate": 9.903684319218809e-05,
"loss": 0.0375,
"step": 2190
},
{
"grad_norm": 0.691277801990509,
"learning_rate": 9.902062725892976e-05,
"loss": 0.0402,
"step": 2200
},
{
"grad_norm": 0.624859094619751,
"learning_rate": 9.900427730514834e-05,
"loss": 0.0316,
"step": 2210
},
{
"grad_norm": 0.46915674209594727,
"learning_rate": 9.8987793375544e-05,
"loss": 0.0352,
"step": 2220
},
{
"grad_norm": 0.5559591054916382,
"learning_rate": 9.897117551518318e-05,
"loss": 0.0353,
"step": 2230
},
{
"grad_norm": 0.47577548027038574,
"learning_rate": 9.895442376949844e-05,
"loss": 0.0395,
"step": 2240
},
{
"grad_norm": 0.7231595516204834,
"learning_rate": 9.893753818428845e-05,
"loss": 0.0442,
"step": 2250
},
{
"grad_norm": 0.4607575535774231,
"learning_rate": 9.892051880571773e-05,
"loss": 0.037,
"step": 2260
},
{
"grad_norm": 0.4901242256164551,
"learning_rate": 9.890336568031663e-05,
"loss": 0.0342,
"step": 2270
},
{
"grad_norm": 0.46413323283195496,
"learning_rate": 9.888607885498113e-05,
"loss": 0.0386,
"step": 2280
},
{
"grad_norm": 0.5028432607650757,
"learning_rate": 9.886865837697275e-05,
"loss": 0.0384,
"step": 2290
},
{
"grad_norm": 0.6079827547073364,
"learning_rate": 9.88511042939184e-05,
"loss": 0.0416,
"step": 2300
},
{
"grad_norm": 0.6189248561859131,
"learning_rate": 9.883341665381028e-05,
"loss": 0.0372,
"step": 2310
},
{
"grad_norm": 0.569456160068512,
"learning_rate": 9.881559550500575e-05,
"loss": 0.0317,
"step": 2320
},
{
"grad_norm": 0.5782006978988647,
"learning_rate": 9.879764089622712e-05,
"loss": 0.0363,
"step": 2330
},
{
"grad_norm": 0.6612024307250977,
"learning_rate": 9.87795528765616e-05,
"loss": 0.0386,
"step": 2340
},
{
"grad_norm": 0.45619797706604004,
"learning_rate": 9.876133149546118e-05,
"loss": 0.0385,
"step": 2350
},
{
"grad_norm": 0.4743977189064026,
"learning_rate": 9.874297680274238e-05,
"loss": 0.0384,
"step": 2360
},
{
"grad_norm": 0.5303918719291687,
"learning_rate": 9.872448884858624e-05,
"loss": 0.0364,
"step": 2370
},
{
"grad_norm": 0.5923212766647339,
"learning_rate": 9.870586768353815e-05,
"loss": 0.0366,
"step": 2380
},
{
"grad_norm": 0.5156052112579346,
"learning_rate": 9.868711335850764e-05,
"loss": 0.0412,
"step": 2390
},
{
"grad_norm": 0.4702778458595276,
"learning_rate": 9.866822592476833e-05,
"loss": 0.0353,
"step": 2400
},
{
"grad_norm": 0.4955006241798401,
"learning_rate": 9.86492054339577e-05,
"loss": 0.0356,
"step": 2410
},
{
"grad_norm": 0.4722374677658081,
"learning_rate": 9.863005193807711e-05,
"loss": 0.0328,
"step": 2420
},
{
"grad_norm": 0.5261074900627136,
"learning_rate": 9.861076548949143e-05,
"loss": 0.0314,
"step": 2430
},
{
"grad_norm": 0.43109720945358276,
"learning_rate": 9.859134614092912e-05,
"loss": 0.0306,
"step": 2440
},
{
"grad_norm": 0.5150691270828247,
"learning_rate": 9.857179394548191e-05,
"loss": 0.0331,
"step": 2450
},
{
"grad_norm": 0.413881778717041,
"learning_rate": 9.855210895660477e-05,
"loss": 0.0313,
"step": 2460
},
{
"grad_norm": 0.5778813362121582,
"learning_rate": 9.853229122811568e-05,
"loss": 0.0327,
"step": 2470
},
{
"grad_norm": 0.5499809980392456,
"learning_rate": 9.851234081419559e-05,
"loss": 0.0371,
"step": 2480
},
{
"grad_norm": 0.533755898475647,
"learning_rate": 9.849225776938814e-05,
"loss": 0.0347,
"step": 2490
},
{
"grad_norm": 0.5036794543266296,
"learning_rate": 9.847204214859964e-05,
"loss": 0.0365,
"step": 2500
},
{
"grad_norm": 0.4547636806964874,
"learning_rate": 9.845169400709879e-05,
"loss": 0.0284,
"step": 2510
},
{
"grad_norm": 0.4148177206516266,
"learning_rate": 9.843121340051664e-05,
"loss": 0.0338,
"step": 2520
},
{
"grad_norm": 0.4307814836502075,
"learning_rate": 9.841060038484641e-05,
"loss": 0.0401,
"step": 2530
},
{
"grad_norm": 0.5055217146873474,
"learning_rate": 9.838985501644328e-05,
"loss": 0.0413,
"step": 2540
},
{
"grad_norm": 0.5252987742424011,
"learning_rate": 9.83689773520243e-05,
"loss": 0.0334,
"step": 2550
},
{
"grad_norm": 0.5325053334236145,
"learning_rate": 9.834796744866819e-05,
"loss": 0.0339,
"step": 2560
},
{
"grad_norm": 0.5485632419586182,
"learning_rate": 9.832682536381525e-05,
"loss": 0.0354,
"step": 2570
},
{
"grad_norm": 0.5406777262687683,
"learning_rate": 9.830555115526711e-05,
"loss": 0.0368,
"step": 2580
},
{
"grad_norm": 0.37698280811309814,
"learning_rate": 9.828414488118667e-05,
"loss": 0.0336,
"step": 2590
},
{
"grad_norm": 0.5253736972808838,
"learning_rate": 9.826260660009785e-05,
"loss": 0.0337,
"step": 2600
},
{
"grad_norm": 0.482319176197052,
"learning_rate": 9.824093637088547e-05,
"loss": 0.0299,
"step": 2610
},
{
"grad_norm": 0.43845584988594055,
"learning_rate": 9.821913425279514e-05,
"loss": 0.032,
"step": 2620
},
{
"grad_norm": 0.4526597559452057,
"learning_rate": 9.8197200305433e-05,
"loss": 0.034,
"step": 2630
},
{
"grad_norm": 0.45589521527290344,
"learning_rate": 9.817513458876564e-05,
"loss": 0.0464,
"step": 2640
},
{
"grad_norm": 0.5381149649620056,
"learning_rate": 9.815293716311987e-05,
"loss": 0.0334,
"step": 2650
},
{
"grad_norm": 0.5279123187065125,
"learning_rate": 9.813060808918262e-05,
"loss": 0.0318,
"step": 2660
},
{
"grad_norm": 0.3532435894012451,
"learning_rate": 9.810814742800069e-05,
"loss": 0.0285,
"step": 2670
},
{
"grad_norm": 0.3765302896499634,
"learning_rate": 9.808555524098074e-05,
"loss": 0.0289,
"step": 2680
},
{
"grad_norm": 0.46037837862968445,
"learning_rate": 9.806283158988887e-05,
"loss": 0.0291,
"step": 2690
},
{
"grad_norm": 0.483735591173172,
"learning_rate": 9.803997653685072e-05,
"loss": 0.0392,
"step": 2700
},
{
"grad_norm": 0.45865148305892944,
"learning_rate": 9.801699014435112e-05,
"loss": 0.0393,
"step": 2710
},
{
"grad_norm": 0.4620376229286194,
"learning_rate": 9.799387247523398e-05,
"loss": 0.0352,
"step": 2720
},
{
"grad_norm": 0.41832435131073,
"learning_rate": 9.797062359270215e-05,
"loss": 0.0319,
"step": 2730
},
{
"grad_norm": 0.4439375400543213,
"learning_rate": 9.794724356031715e-05,
"loss": 0.0307,
"step": 2740
},
{
"grad_norm": 0.5037664771080017,
"learning_rate": 9.792373244199913e-05,
"loss": 0.0306,
"step": 2750
},
{
"grad_norm": 0.378164678812027,
"learning_rate": 9.790009030202658e-05,
"loss": 0.0313,
"step": 2760
},
{
"grad_norm": 0.5053073763847351,
"learning_rate": 9.78763172050362e-05,
"loss": 0.0295,
"step": 2770
},
{
"grad_norm": 0.4680381119251251,
"learning_rate": 9.785241321602274e-05,
"loss": 0.0277,
"step": 2780
},
{
"grad_norm": 0.4624013304710388,
"learning_rate": 9.782837840033879e-05,
"loss": 0.0288,
"step": 2790
},
{
"grad_norm": 0.5074241757392883,
"learning_rate": 9.780421282369461e-05,
"loss": 0.0292,
"step": 2800
},
{
"grad_norm": 0.4835506081581116,
"learning_rate": 9.777991655215797e-05,
"loss": 0.0294,
"step": 2810
},
{
"grad_norm": 0.5738292336463928,
"learning_rate": 9.775548965215394e-05,
"loss": 0.0295,
"step": 2820
},
{
"grad_norm": 0.5334445238113403,
"learning_rate": 9.773093219046474e-05,
"loss": 0.0293,
"step": 2830
},
{
"grad_norm": 0.4011390507221222,
"learning_rate": 9.770624423422954e-05,
"loss": 0.0291,
"step": 2840
},
{
"grad_norm": 0.41171419620513916,
"learning_rate": 9.768142585094426e-05,
"loss": 0.0302,
"step": 2850
},
{
"grad_norm": 0.46391263604164124,
"learning_rate": 9.765647710846142e-05,
"loss": 0.0405,
"step": 2860
},
{
"grad_norm": 0.5071845650672913,
"learning_rate": 9.763139807498991e-05,
"loss": 0.0285,
"step": 2870
},
{
"grad_norm": 0.4814237058162689,
"learning_rate": 9.760618881909487e-05,
"loss": 0.0317,
"step": 2880
},
{
"grad_norm": 0.5396919846534729,
"learning_rate": 9.758084940969744e-05,
"loss": 0.0316,
"step": 2890
},
{
"grad_norm": 0.5363779664039612,
"learning_rate": 9.755537991607459e-05,
"loss": 0.027,
"step": 2900
},
{
"grad_norm": 0.505138099193573,
"learning_rate": 9.752978040785895e-05,
"loss": 0.0354,
"step": 2910
},
{
"grad_norm": 0.5476271510124207,
"learning_rate": 9.750405095503859e-05,
"loss": 0.0299,
"step": 2920
},
{
"grad_norm": 0.5189036130905151,
"learning_rate": 9.747819162795686e-05,
"loss": 0.0331,
"step": 2930
},
{
"grad_norm": 0.45717042684555054,
"learning_rate": 9.745220249731217e-05,
"loss": 0.026,
"step": 2940
},
{
"grad_norm": 0.4337165355682373,
"learning_rate": 9.742608363415781e-05,
"loss": 0.0272,
"step": 2950
},
{
"grad_norm": 0.4811023771762848,
"learning_rate": 9.739983510990176e-05,
"loss": 0.0288,
"step": 2960
},
{
"grad_norm": 0.3455168902873993,
"learning_rate": 9.737345699630647e-05,
"loss": 0.0298,
"step": 2970
},
{
"grad_norm": 0.5057815313339233,
"learning_rate": 9.734694936548869e-05,
"loss": 0.0332,
"step": 2980
},
{
"grad_norm": 0.38619765639305115,
"learning_rate": 9.732031228991932e-05,
"loss": 0.0256,
"step": 2990
},
{
"grad_norm": 0.3297816514968872,
"learning_rate": 9.729354584242302e-05,
"loss": 0.0355,
"step": 3000
},
{
"grad_norm": 0.5174765586853027,
"learning_rate": 9.726665009617832e-05,
"loss": 0.0309,
"step": 3010
},
{
"grad_norm": 0.43245866894721985,
"learning_rate": 9.723962512471714e-05,
"loss": 0.033,
"step": 3020
},
{
"grad_norm": 0.516598105430603,
"learning_rate": 9.72124710019247e-05,
"loss": 0.03,
"step": 3030
},
{
"grad_norm": 0.48712822794914246,
"learning_rate": 9.718518780203934e-05,
"loss": 0.0322,
"step": 3040
},
{
"grad_norm": 0.3674415946006775,
"learning_rate": 9.715777559965228e-05,
"loss": 0.0319,
"step": 3050
},
{
"grad_norm": 0.4218079149723053,
"learning_rate": 9.713023446970746e-05,
"loss": 0.0255,
"step": 3060
},
{
"grad_norm": 0.4967867136001587,
"learning_rate": 9.710256448750126e-05,
"loss": 0.0311,
"step": 3070
},
{
"grad_norm": 0.497653067111969,
"learning_rate": 9.707476572868235e-05,
"loss": 0.0341,
"step": 3080
},
{
"grad_norm": 0.4222137928009033,
"learning_rate": 9.704683826925149e-05,
"loss": 0.0273,
"step": 3090
},
{
"grad_norm": 0.37705838680267334,
"learning_rate": 9.701878218556129e-05,
"loss": 0.036,
"step": 3100
},
{
"grad_norm": 0.5626199841499329,
"learning_rate": 9.699059755431598e-05,
"loss": 0.0331,
"step": 3110
},
{
"grad_norm": 0.46293774247169495,
"learning_rate": 9.696228445257132e-05,
"loss": 0.0277,
"step": 3120
},
{
"grad_norm": 0.42764750123023987,
"learning_rate": 9.693384295773419e-05,
"loss": 0.0327,
"step": 3130
},
{
"grad_norm": 0.4717363715171814,
"learning_rate": 9.690527314756259e-05,
"loss": 0.0339,
"step": 3140
},
{
"grad_norm": 0.458967387676239,
"learning_rate": 9.687657510016527e-05,
"loss": 0.0261,
"step": 3150
},
{
"grad_norm": 0.45871081948280334,
"learning_rate": 9.684774889400161e-05,
"loss": 0.0309,
"step": 3160
},
{
"grad_norm": 0.5132860541343689,
"learning_rate": 9.681879460788135e-05,
"loss": 0.0264,
"step": 3170
},
{
"grad_norm": 0.4729975461959839,
"learning_rate": 9.67897123209644e-05,
"loss": 0.0315,
"step": 3180
},
{
"grad_norm": 0.4921012818813324,
"learning_rate": 9.676050211276062e-05,
"loss": 0.035,
"step": 3190
},
{
"grad_norm": 0.4574073255062103,
"learning_rate": 9.673116406312962e-05,
"loss": 0.0284,
"step": 3200
},
{
"grad_norm": 0.48541590571403503,
"learning_rate": 9.67016982522805e-05,
"loss": 0.028,
"step": 3210
},
{
"grad_norm": 0.4924331307411194,
"learning_rate": 9.667210476077164e-05,
"loss": 0.028,
"step": 3220
},
{
"grad_norm": 0.5730510950088501,
"learning_rate": 9.664238366951055e-05,
"loss": 0.0288,
"step": 3230
},
{
"grad_norm": 0.5551027059555054,
"learning_rate": 9.661253505975355e-05,
"loss": 0.0269,
"step": 3240
},
{
"grad_norm": 0.4366356134414673,
"learning_rate": 9.658255901310557e-05,
"loss": 0.0301,
"step": 3250
},
{
"grad_norm": 0.5327138304710388,
"learning_rate": 9.655245561152e-05,
"loss": 0.0278,
"step": 3260
},
{
"grad_norm": 0.4516207277774811,
"learning_rate": 9.65222249372984e-05,
"loss": 0.0266,
"step": 3270
},
{
"grad_norm": 0.4709407687187195,
"learning_rate": 9.649186707309026e-05,
"loss": 0.0325,
"step": 3280
},
{
"grad_norm": 0.36673372983932495,
"learning_rate": 9.646138210189283e-05,
"loss": 0.0285,
"step": 3290
},
{
"grad_norm": 0.5308244824409485,
"learning_rate": 9.643077010705087e-05,
"loss": 0.0281,
"step": 3300
},
{
"grad_norm": 0.45568153262138367,
"learning_rate": 9.640003117225637e-05,
"loss": 0.0286,
"step": 3310
},
{
"grad_norm": 0.4082559049129486,
"learning_rate": 9.636916538154846e-05,
"loss": 0.0241,
"step": 3320
},
{
"grad_norm": 0.48012563586235046,
"learning_rate": 9.633817281931296e-05,
"loss": 0.0297,
"step": 3330
},
{
"grad_norm": 0.4177444875240326,
"learning_rate": 9.630705357028242e-05,
"loss": 0.032,
"step": 3340
},
{
"grad_norm": 0.48793429136276245,
"learning_rate": 9.627580771953563e-05,
"loss": 0.0285,
"step": 3350
},
{
"grad_norm": 0.4371464252471924,
"learning_rate": 9.624443535249759e-05,
"loss": 0.0275,
"step": 3360
},
{
"grad_norm": 0.4983312487602234,
"learning_rate": 9.621293655493913e-05,
"loss": 0.0254,
"step": 3370
},
{
"grad_norm": 0.5624396204948425,
"learning_rate": 9.618131141297675e-05,
"loss": 0.027,
"step": 3380
},
{
"grad_norm": 0.43570947647094727,
"learning_rate": 9.614956001307242e-05,
"loss": 0.0301,
"step": 3390
},
{
"grad_norm": 0.4448493719100952,
"learning_rate": 9.611768244203321e-05,
"loss": 0.0351,
"step": 3400
},
{
"grad_norm": 0.4213621914386749,
"learning_rate": 9.60856787870112e-05,
"loss": 0.0292,
"step": 3410
},
{
"grad_norm": 0.4154338836669922,
"learning_rate": 9.605354913550318e-05,
"loss": 0.0262,
"step": 3420
},
{
"grad_norm": 0.45102718472480774,
"learning_rate": 9.602129357535037e-05,
"loss": 0.0313,
"step": 3430
},
{
"grad_norm": 0.38145503401756287,
"learning_rate": 9.598891219473825e-05,
"loss": 0.027,
"step": 3440
},
{
"grad_norm": 0.41790488362312317,
"learning_rate": 9.595640508219625e-05,
"loss": 0.0291,
"step": 3450
},
{
"grad_norm": 0.4644753336906433,
"learning_rate": 9.592377232659761e-05,
"loss": 0.0249,
"step": 3460
},
{
"grad_norm": 0.4731713533401489,
"learning_rate": 9.589101401715904e-05,
"loss": 0.0263,
"step": 3470
},
{
"grad_norm": 0.42398542165756226,
"learning_rate": 9.585813024344045e-05,
"loss": 0.026,
"step": 3480
},
{
"grad_norm": 0.5419644117355347,
"learning_rate": 9.58251210953449e-05,
"loss": 0.0296,
"step": 3490
},
{
"grad_norm": 0.463670939207077,
"learning_rate": 9.579198666311809e-05,
"loss": 0.0238,
"step": 3500
},
{
"grad_norm": 0.39643239974975586,
"learning_rate": 9.575872703734832e-05,
"loss": 0.0292,
"step": 3510
},
{
"grad_norm": 0.3542700409889221,
"learning_rate": 9.572534230896611e-05,
"loss": 0.0231,
"step": 3520
},
{
"grad_norm": 0.43060752749443054,
"learning_rate": 9.569183256924403e-05,
"loss": 0.025,
"step": 3530
},
{
"grad_norm": 0.40233463048934937,
"learning_rate": 9.565819790979646e-05,
"loss": 0.0422,
"step": 3540
},
{
"grad_norm": 0.4497774839401245,
"learning_rate": 9.562443842257925e-05,
"loss": 0.029,
"step": 3550
},
{
"grad_norm": 0.5018470287322998,
"learning_rate": 9.559055419988956e-05,
"loss": 0.0283,
"step": 3560
},
{
"grad_norm": 0.47868454456329346,
"learning_rate": 9.555654533436557e-05,
"loss": 0.0349,
"step": 3570
},
{
"grad_norm": 0.4413691759109497,
"learning_rate": 9.552241191898621e-05,
"loss": 0.0238,
"step": 3580
},
{
"grad_norm": 0.40998080372810364,
"learning_rate": 9.548815404707092e-05,
"loss": 0.03,
"step": 3590
},
{
"grad_norm": 0.43824273347854614,
"learning_rate": 9.545377181227942e-05,
"loss": 0.0284,
"step": 3600
},
{
"grad_norm": 0.4570449888706207,
"learning_rate": 9.541926530861145e-05,
"loss": 0.0266,
"step": 3610
},
{
"grad_norm": 0.44766074419021606,
"learning_rate": 9.538463463040645e-05,
"loss": 0.0278,
"step": 3620
},
{
"grad_norm": 0.481611967086792,
"learning_rate": 9.534987987234337e-05,
"loss": 0.0277,
"step": 3630
},
{
"grad_norm": 0.4858357608318329,
"learning_rate": 9.53150011294404e-05,
"loss": 0.0265,
"step": 3640
},
{
"grad_norm": 0.40574368834495544,
"learning_rate": 9.527999849705471e-05,
"loss": 0.0297,
"step": 3650
},
{
"grad_norm": 0.4581122100353241,
"learning_rate": 9.524487207088213e-05,
"loss": 0.0224,
"step": 3660
},
{
"grad_norm": 0.4100882411003113,
"learning_rate": 9.520962194695698e-05,
"loss": 0.0239,
"step": 3670
},
{
"grad_norm": 0.40333643555641174,
"learning_rate": 9.517424822165175e-05,
"loss": 0.0238,
"step": 3680
},
{
"grad_norm": 0.5596145987510681,
"learning_rate": 9.513875099167685e-05,
"loss": 0.0245,
"step": 3690
},
{
"grad_norm": 0.5230712890625,
"learning_rate": 9.510313035408035e-05,
"loss": 0.0262,
"step": 3700
},
{
"grad_norm": 0.39155617356300354,
"learning_rate": 9.506738640624775e-05,
"loss": 0.0264,
"step": 3710
},
{
"grad_norm": 0.4129464328289032,
"learning_rate": 9.50315192459016e-05,
"loss": 0.0208,
"step": 3720
},
{
"grad_norm": 0.5159543752670288,
"learning_rate": 9.499552897110136e-05,
"loss": 0.0239,
"step": 3730
},
{
"grad_norm": 0.5178094506263733,
"learning_rate": 9.495941568024304e-05,
"loss": 0.0253,
"step": 3740
},
{
"grad_norm": 0.43580612540245056,
"learning_rate": 9.492317947205904e-05,
"loss": 0.0268,
"step": 3750
},
{
"grad_norm": 0.4596274495124817,
"learning_rate": 9.488682044561775e-05,
"loss": 0.0256,
"step": 3760
},
{
"grad_norm": 0.41573286056518555,
"learning_rate": 9.485033870032335e-05,
"loss": 0.0243,
"step": 3770
},
{
"grad_norm": 0.47876912355422974,
"learning_rate": 9.481373433591556e-05,
"loss": 0.0215,
"step": 3780
},
{
"grad_norm": 0.4741547703742981,
"learning_rate": 9.47770074524693e-05,
"loss": 0.027,
"step": 3790
},
{
"grad_norm": 0.4306631088256836,
"learning_rate": 9.474015815039446e-05,
"loss": 0.0277,
"step": 3800
},
{
"grad_norm": 0.46127429604530334,
"learning_rate": 9.470318653043565e-05,
"loss": 0.0273,
"step": 3810
},
{
"grad_norm": 0.5021414160728455,
"learning_rate": 9.466609269367185e-05,
"loss": 0.0263,
"step": 3820
},
{
"grad_norm": 0.5333779454231262,
"learning_rate": 9.46288767415162e-05,
"loss": 0.0234,
"step": 3830
},
{
"grad_norm": 0.4366990625858307,
"learning_rate": 9.459153877571567e-05,
"loss": 0.0225,
"step": 3840
},
{
"grad_norm": 0.4819251298904419,
"learning_rate": 9.455407889835087e-05,
"loss": 0.0238,
"step": 3850
},
{
"grad_norm": 0.3999616503715515,
"learning_rate": 9.451649721183564e-05,
"loss": 0.0234,
"step": 3860
},
{
"grad_norm": 0.37807697057724,
"learning_rate": 9.447879381891692e-05,
"loss": 0.0258,
"step": 3870
},
{
"grad_norm": 0.5266739130020142,
"learning_rate": 9.444096882267428e-05,
"loss": 0.0329,
"step": 3880
},
{
"grad_norm": 0.3961910903453827,
"learning_rate": 9.440302232651988e-05,
"loss": 0.0226,
"step": 3890
},
{
"grad_norm": 0.3786242604255676,
"learning_rate": 9.436495443419795e-05,
"loss": 0.024,
"step": 3900
},
{
"grad_norm": 0.4175941050052643,
"learning_rate": 9.432676524978466e-05,
"loss": 0.0219,
"step": 3910
},
{
"grad_norm": 0.44096827507019043,
"learning_rate": 9.42884548776878e-05,
"loss": 0.0253,
"step": 3920
},
{
"grad_norm": 0.41201087832450867,
"learning_rate": 9.425002342264646e-05,
"loss": 0.0223,
"step": 3930
},
{
"grad_norm": 0.5009353160858154,
"learning_rate": 9.421147098973077e-05,
"loss": 0.0266,
"step": 3940
},
{
"grad_norm": 0.5505723357200623,
"learning_rate": 9.41727976843416e-05,
"loss": 0.0258,
"step": 3950
},
{
"grad_norm": 0.45981982350349426,
"learning_rate": 9.413400361221029e-05,
"loss": 0.0279,
"step": 3960
},
{
"grad_norm": 0.4804719388484955,
"learning_rate": 9.409508887939835e-05,
"loss": 0.022,
"step": 3970
},
{
"grad_norm": 0.4238436222076416,
"learning_rate": 9.40560535922972e-05,
"loss": 0.0212,
"step": 3980
},
{
"grad_norm": 0.403974324464798,
"learning_rate": 9.40168978576278e-05,
"loss": 0.0189,
"step": 3990
},
{
"grad_norm": 0.48837044835090637,
"learning_rate": 9.397762178244043e-05,
"loss": 0.0244,
"step": 4000
},
{
"grad_norm": 0.48128196597099304,
"learning_rate": 9.393822547411439e-05,
"loss": 0.0217,
"step": 4010
},
{
"grad_norm": 0.3272818624973297,
"learning_rate": 9.389870904035769e-05,
"loss": 0.0242,
"step": 4020
},
{
"grad_norm": 0.36953118443489075,
"learning_rate": 9.385907258920672e-05,
"loss": 0.0246,
"step": 4030
},
{
"grad_norm": 0.41161492466926575,
"learning_rate": 9.381931622902607e-05,
"loss": 0.021,
"step": 4040
},
{
"grad_norm": 0.4544064998626709,
"learning_rate": 9.377944006850807e-05,
"loss": 0.0193,
"step": 4050
},
{
"grad_norm": 0.47396498918533325,
"learning_rate": 9.373944421667265e-05,
"loss": 0.0213,
"step": 4060
},
{
"grad_norm": 0.4621795117855072,
"learning_rate": 9.369932878286691e-05,
"loss": 0.0266,
"step": 4070
},
{
"grad_norm": 0.5184421539306641,
"learning_rate": 9.365909387676494e-05,
"loss": 0.0196,
"step": 4080
},
{
"grad_norm": 0.4004800319671631,
"learning_rate": 9.361873960836744e-05,
"loss": 0.0263,
"step": 4090
},
{
"grad_norm": 0.3737598657608032,
"learning_rate": 9.357826608800142e-05,
"loss": 0.0196,
"step": 4100
},
{
"grad_norm": 0.4000731110572815,
"learning_rate": 9.353767342631994e-05,
"loss": 0.0203,
"step": 4110
},
{
"grad_norm": 0.3826330006122589,
"learning_rate": 9.34969617343018e-05,
"loss": 0.0219,
"step": 4120
},
{
"grad_norm": 0.5988262891769409,
"learning_rate": 9.345613112325122e-05,
"loss": 0.0204,
"step": 4130
},
{
"grad_norm": 0.4280189275741577,
"learning_rate": 9.34151817047975e-05,
"loss": 0.0224,
"step": 4140
},
{
"grad_norm": 0.3716961145401001,
"learning_rate": 9.33741135908948e-05,
"loss": 0.0262,
"step": 4150
},
{
"grad_norm": 0.4295980930328369,
"learning_rate": 9.33329268938218e-05,
"loss": 0.0207,
"step": 4160
},
{
"grad_norm": 0.425942063331604,
"learning_rate": 9.329162172618132e-05,
"loss": 0.0238,
"step": 4170
},
{
"grad_norm": 0.416522741317749,
"learning_rate": 9.325019820090013e-05,
"loss": 0.0226,
"step": 4180
},
{
"grad_norm": 0.5610533952713013,
"learning_rate": 9.320865643122855e-05,
"loss": 0.0208,
"step": 4190
},
{
"grad_norm": 0.379802942276001,
"learning_rate": 9.316699653074023e-05,
"loss": 0.022,
"step": 4200
},
{
"grad_norm": 0.4576219618320465,
"learning_rate": 9.312521861333172e-05,
"loss": 0.0166,
"step": 4210
},
{
"grad_norm": 0.45310190320014954,
"learning_rate": 9.308332279322224e-05,
"loss": 0.0242,
"step": 4220
},
{
"grad_norm": 0.4080248177051544,
"learning_rate": 9.304130918495338e-05,
"loss": 0.0224,
"step": 4230
},
{
"grad_norm": 0.33399489521980286,
"learning_rate": 9.299917790338874e-05,
"loss": 0.0187,
"step": 4240
},
{
"grad_norm": 0.356057733297348,
"learning_rate": 9.295692906371363e-05,
"loss": 0.0173,
"step": 4250
},
{
"grad_norm": 0.42619287967681885,
"learning_rate": 9.291456278143476e-05,
"loss": 0.0264,
"step": 4260
},
{
"grad_norm": 0.3479536175727844,
"learning_rate": 9.287207917237994e-05,
"loss": 0.0213,
"step": 4270
},
{
"grad_norm": 0.3362795114517212,
"learning_rate": 9.282947835269773e-05,
"loss": 0.0206,
"step": 4280
},
{
"grad_norm": 0.43236204981803894,
"learning_rate": 9.278676043885715e-05,
"loss": 0.0191,
"step": 4290
},
{
"grad_norm": 0.32585880160331726,
"learning_rate": 9.274392554764733e-05,
"loss": 0.0194,
"step": 4300
},
{
"grad_norm": 0.4723697900772095,
"learning_rate": 9.270097379617723e-05,
"loss": 0.016,
"step": 4310
},
{
"grad_norm": 0.42713454365730286,
"learning_rate": 9.26579053018753e-05,
"loss": 0.0154,
"step": 4320
},
{
"grad_norm": 0.33830246329307556,
"learning_rate": 9.261472018248918e-05,
"loss": 0.0146,
"step": 4330
},
{
"grad_norm": 0.4066753387451172,
"learning_rate": 9.25714185560853e-05,
"loss": 0.0259,
"step": 4340
},
{
"grad_norm": 0.448772668838501,
"learning_rate": 9.252800054104868e-05,
"loss": 0.0187,
"step": 4350
},
{
"grad_norm": 0.4219300448894501,
"learning_rate": 9.248446625608252e-05,
"loss": 0.0208,
"step": 4360
},
{
"grad_norm": 0.39920371770858765,
"learning_rate": 9.244081582020789e-05,
"loss": 0.0175,
"step": 4370
},
{
"grad_norm": 0.42131638526916504,
"learning_rate": 9.239704935276339e-05,
"loss": 0.0182,
"step": 4380
},
{
"grad_norm": 0.45648935437202454,
"learning_rate": 9.235316697340489e-05,
"loss": 0.0158,
"step": 4390
},
{
"grad_norm": 0.42188429832458496,
"learning_rate": 9.230916880210512e-05,
"loss": 0.0183,
"step": 4400
},
{
"grad_norm": 0.36581969261169434,
"learning_rate": 9.226505495915342e-05,
"loss": 0.0147,
"step": 4410
},
{
"grad_norm": 0.42502549290657043,
"learning_rate": 9.222082556515536e-05,
"loss": 0.0198,
"step": 4420
},
{
"grad_norm": 0.35229989886283875,
"learning_rate": 9.217648074103242e-05,
"loss": 0.0153,
"step": 4430
},
{
"grad_norm": 0.4085313379764557,
"learning_rate": 9.213202060802161e-05,
"loss": 0.0192,
"step": 4440
},
{
"grad_norm": 0.4650028645992279,
"learning_rate": 9.208744528767528e-05,
"loss": 0.0173,
"step": 4450
},
{
"grad_norm": 0.4048616886138916,
"learning_rate": 9.204275490186064e-05,
"loss": 0.0204,
"step": 4460
},
{
"grad_norm": 0.4178619980812073,
"learning_rate": 9.199794957275949e-05,
"loss": 0.0204,
"step": 4470
},
{
"grad_norm": 0.46256691217422485,
"learning_rate": 9.19530294228679e-05,
"loss": 0.0177,
"step": 4480
},
{
"grad_norm": 0.35352519154548645,
"learning_rate": 9.190799457499583e-05,
"loss": 0.028,
"step": 4490
},
{
"grad_norm": 0.4470050632953644,
"learning_rate": 9.186284515226686e-05,
"loss": 0.0194,
"step": 4500
},
{
"grad_norm": 0.3508913815021515,
"learning_rate": 9.181758127811777e-05,
"loss": 0.0241,
"step": 4510
},
{
"grad_norm": 0.411702424287796,
"learning_rate": 9.177220307629825e-05,
"loss": 0.0204,
"step": 4520
},
{
"grad_norm": 0.4468960762023926,
"learning_rate": 9.172671067087059e-05,
"loss": 0.0194,
"step": 4530
},
{
"grad_norm": 0.4807928204536438,
"learning_rate": 9.16811041862093e-05,
"loss": 0.0256,
"step": 4540
},
{
"grad_norm": 0.39205247163772583,
"learning_rate": 9.163538374700076e-05,
"loss": 0.0185,
"step": 4550
},
{
"grad_norm": 0.44329723715782166,
"learning_rate": 9.158954947824287e-05,
"loss": 0.0178,
"step": 4560
},
{
"grad_norm": 0.47283023595809937,
"learning_rate": 9.154360150524482e-05,
"loss": 0.0174,
"step": 4570
},
{
"grad_norm": 0.38849857449531555,
"learning_rate": 9.14975399536266e-05,
"loss": 0.0143,
"step": 4580
},
{
"grad_norm": 0.3656264543533325,
"learning_rate": 9.14513649493187e-05,
"loss": 0.0212,
"step": 4590
},
{
"grad_norm": 0.4674840271472931,
"learning_rate": 9.140507661856187e-05,
"loss": 0.0153,
"step": 4600
},
{
"grad_norm": 0.4313472509384155,
"learning_rate": 9.135867508790661e-05,
"loss": 0.0214,
"step": 4610
},
{
"grad_norm": 0.3471619486808777,
"learning_rate": 9.131216048421291e-05,
"loss": 0.0165,
"step": 4620
},
{
"grad_norm": 0.4542539715766907,
"learning_rate": 9.126553293464998e-05,
"loss": 0.0189,
"step": 4630
},
{
"grad_norm": 0.47608688473701477,
"learning_rate": 9.121879256669572e-05,
"loss": 0.017,
"step": 4640
},
{
"grad_norm": 0.3959465026855469,
"learning_rate": 9.117193950813652e-05,
"loss": 0.0164,
"step": 4650
},
{
"grad_norm": 0.408431738615036,
"learning_rate": 9.112497388706685e-05,
"loss": 0.0255,
"step": 4660
},
{
"grad_norm": 0.4116475582122803,
"learning_rate": 9.10778958318889e-05,
"loss": 0.0174,
"step": 4670
},
{
"grad_norm": 0.3917919993400574,
"learning_rate": 9.103070547131232e-05,
"loss": 0.0199,
"step": 4680
},
{
"grad_norm": 0.3482106029987335,
"learning_rate": 9.098340293435375e-05,
"loss": 0.0179,
"step": 4690
},
{
"grad_norm": 0.34646838903427124,
"learning_rate": 9.093598835033649e-05,
"loss": 0.0174,
"step": 4700
},
{
"grad_norm": 0.39419376850128174,
"learning_rate": 9.088846184889021e-05,
"loss": 0.0191,
"step": 4710
},
{
"grad_norm": 0.4543268084526062,
"learning_rate": 9.084082355995057e-05,
"loss": 0.0213,
"step": 4720
},
{
"grad_norm": 0.4212946891784668,
"learning_rate": 9.079307361375882e-05,
"loss": 0.0181,
"step": 4730
},
{
"grad_norm": 0.3014923334121704,
"learning_rate": 9.074521214086149e-05,
"loss": 0.019,
"step": 4740
},
{
"grad_norm": 0.36527299880981445,
"learning_rate": 9.069723927211001e-05,
"loss": 0.0179,
"step": 4750
},
{
"grad_norm": 0.3752840757369995,
"learning_rate": 9.064915513866037e-05,
"loss": 0.0183,
"step": 4760
},
{
"grad_norm": 0.42201003432273865,
"learning_rate": 9.060095987197279e-05,
"loss": 0.0162,
"step": 4770
},
{
"grad_norm": 0.3307137191295624,
"learning_rate": 9.055265360381126e-05,
"loss": 0.0206,
"step": 4780
},
{
"grad_norm": 0.33322593569755554,
"learning_rate": 9.050423646624326e-05,
"loss": 0.016,
"step": 4790
},
{
"grad_norm": 0.35324618220329285,
"learning_rate": 9.045570859163943e-05,
"loss": 0.0194,
"step": 4800
},
{
"grad_norm": 0.427572637796402,
"learning_rate": 9.04070701126731e-05,
"loss": 0.015,
"step": 4810
},
{
"grad_norm": 0.3561609983444214,
"learning_rate": 9.035832116232001e-05,
"loss": 0.0145,
"step": 4820
},
{
"grad_norm": 0.37716561555862427,
"learning_rate": 9.030946187385796e-05,
"loss": 0.016,
"step": 4830
},
{
"grad_norm": 0.39859738945961,
"learning_rate": 9.026049238086635e-05,
"loss": 0.0178,
"step": 4840
},
{
"grad_norm": 0.4500395655632019,
"learning_rate": 9.021141281722591e-05,
"loss": 0.0202,
"step": 4850
},
{
"grad_norm": 0.34830138087272644,
"learning_rate": 9.01622233171183e-05,
"loss": 0.0169,
"step": 4860
},
{
"grad_norm": 0.3729107677936554,
"learning_rate": 9.011292401502574e-05,
"loss": 0.0212,
"step": 4870
},
{
"grad_norm": 0.3912448585033417,
"learning_rate": 9.006351504573063e-05,
"loss": 0.0146,
"step": 4880
},
{
"grad_norm": 0.4137353003025055,
"learning_rate": 9.001399654431519e-05,
"loss": 0.0171,
"step": 4890
},
{
"grad_norm": 0.4444160759449005,
"learning_rate": 8.996436864616116e-05,
"loss": 0.0162,
"step": 4900
},
{
"grad_norm": 0.3148241639137268,
"learning_rate": 8.991463148694925e-05,
"loss": 0.0191,
"step": 4910
},
{
"grad_norm": 0.4391416907310486,
"learning_rate": 8.986478520265902e-05,
"loss": 0.0187,
"step": 4920
},
{
"grad_norm": 0.4296688139438629,
"learning_rate": 8.981482992956827e-05,
"loss": 0.0143,
"step": 4930
},
{
"grad_norm": 0.29728299379348755,
"learning_rate": 8.976476580425282e-05,
"loss": 0.0148,
"step": 4940
},
{
"grad_norm": 0.4356195032596588,
"learning_rate": 8.971459296358606e-05,
"loss": 0.0287,
"step": 4950
},
{
"grad_norm": 0.4179481565952301,
"learning_rate": 8.966431154473864e-05,
"loss": 0.0157,
"step": 4960
},
{
"grad_norm": 0.3610477149486542,
"learning_rate": 8.961392168517803e-05,
"loss": 0.0159,
"step": 4970
},
{
"grad_norm": 0.34345686435699463,
"learning_rate": 8.956342352266821e-05,
"loss": 0.016,
"step": 4980
},
{
"grad_norm": 0.3698787987232208,
"learning_rate": 8.95128171952692e-05,
"loss": 0.0214,
"step": 4990
},
{
"grad_norm": 0.327648788690567,
"learning_rate": 8.946210284133676e-05,
"loss": 0.0173,
"step": 5000
},
{
"grad_norm": 0.2809329330921173,
"learning_rate": 8.941128059952201e-05,
"loss": 0.0132,
"step": 5010
},
{
"grad_norm": 0.31239569187164307,
"learning_rate": 8.936035060877102e-05,
"loss": 0.0244,
"step": 5020
},
{
"grad_norm": 0.40824398398399353,
"learning_rate": 8.930931300832443e-05,
"loss": 0.0181,
"step": 5030
},
{
"grad_norm": 0.36586353182792664,
"learning_rate": 8.925816793771711e-05,
"loss": 0.0148,
"step": 5040
},
{
"grad_norm": 0.2970711886882782,
"learning_rate": 8.92069155367777e-05,
"loss": 0.0139,
"step": 5050
},
{
"grad_norm": 0.3478129506111145,
"learning_rate": 8.915555594562834e-05,
"loss": 0.0172,
"step": 5060
},
{
"grad_norm": 0.3716062903404236,
"learning_rate": 8.910408930468416e-05,
"loss": 0.016,
"step": 5070
},
{
"grad_norm": 0.35524141788482666,
"learning_rate": 8.905251575465303e-05,
"loss": 0.0128,
"step": 5080
},
{
"grad_norm": 0.46534451842308044,
"learning_rate": 8.900083543653502e-05,
"loss": 0.0192,
"step": 5090
},
{
"grad_norm": 0.4329080283641815,
"learning_rate": 8.894904849162218e-05,
"loss": 0.0176,
"step": 5100
},
{
"grad_norm": 0.33941879868507385,
"learning_rate": 8.889715506149802e-05,
"loss": 0.0161,
"step": 5110
},
{
"grad_norm": 0.37832191586494446,
"learning_rate": 8.884515528803722e-05,
"loss": 0.0157,
"step": 5120
},
{
"grad_norm": 0.36182844638824463,
"learning_rate": 8.879304931340517e-05,
"loss": 0.0148,
"step": 5130
},
{
"grad_norm": 0.3270893096923828,
"learning_rate": 8.874083728005759e-05,
"loss": 0.014,
"step": 5140
},
{
"grad_norm": 0.3815666437149048,
"learning_rate": 8.868851933074021e-05,
"loss": 0.0207,
"step": 5150
},
{
"grad_norm": 0.4211690425872803,
"learning_rate": 8.863609560848829e-05,
"loss": 0.0147,
"step": 5160
},
{
"grad_norm": 0.38520973920822144,
"learning_rate": 8.85835662566263e-05,
"loss": 0.0167,
"step": 5170
},
{
"grad_norm": 0.4238542914390564,
"learning_rate": 8.853093141876747e-05,
"loss": 0.0123,
"step": 5180
},
{
"grad_norm": 0.4630364179611206,
"learning_rate": 8.847819123881343e-05,
"loss": 0.0157,
"step": 5190
},
{
"grad_norm": 0.3982260227203369,
"learning_rate": 8.842534586095383e-05,
"loss": 0.0156,
"step": 5200
},
{
"grad_norm": 0.418029248714447,
"learning_rate": 8.837239542966593e-05,
"loss": 0.0147,
"step": 5210
},
{
"grad_norm": 0.42096608877182007,
"learning_rate": 8.831934008971417e-05,
"loss": 0.0155,
"step": 5220
},
{
"grad_norm": 0.40651246905326843,
"learning_rate": 8.826617998614982e-05,
"loss": 0.0134,
"step": 5230
},
{
"grad_norm": 0.35442405939102173,
"learning_rate": 8.821291526431056e-05,
"loss": 0.0149,
"step": 5240
},
{
"grad_norm": 0.4477267563343048,
"learning_rate": 8.815954606982015e-05,
"loss": 0.017,
"step": 5250
},
{
"grad_norm": 0.42138704657554626,
"learning_rate": 8.810607254858789e-05,
"loss": 0.0161,
"step": 5260
},
{
"grad_norm": 0.3887590169906616,
"learning_rate": 8.805249484680838e-05,
"loss": 0.0177,
"step": 5270
},
{
"grad_norm": 0.3340283930301666,
"learning_rate": 8.799881311096096e-05,
"loss": 0.0132,
"step": 5280
},
{
"grad_norm": 0.37869396805763245,
"learning_rate": 8.794502748780949e-05,
"loss": 0.0158,
"step": 5290
},
{
"grad_norm": 0.38290390372276306,
"learning_rate": 8.78911381244018e-05,
"loss": 0.0142,
"step": 5300
},
{
"grad_norm": 0.3591060936450958,
"learning_rate": 8.783714516806933e-05,
"loss": 0.0151,
"step": 5310
},
{
"grad_norm": 0.4037642478942871,
"learning_rate": 8.77830487664268e-05,
"loss": 0.0128,
"step": 5320
},
{
"grad_norm": 0.25061705708503723,
"learning_rate": 8.772884906737167e-05,
"loss": 0.0162,
"step": 5330
},
{
"grad_norm": 0.3075624108314514,
"learning_rate": 8.767454621908387e-05,
"loss": 0.0136,
"step": 5340
},
{
"grad_norm": 0.38026294112205505,
"learning_rate": 8.76201403700253e-05,
"loss": 0.0136,
"step": 5350
},
{
"grad_norm": 0.48398804664611816,
"learning_rate": 8.756563166893949e-05,
"loss": 0.0132,
"step": 5360
},
{
"grad_norm": 0.3468417823314667,
"learning_rate": 8.751102026485113e-05,
"loss": 0.0151,
"step": 5370
},
{
"grad_norm": 0.3630031943321228,
"learning_rate": 8.745630630706571e-05,
"loss": 0.0158,
"step": 5380
},
{
"grad_norm": 0.5174519419670105,
"learning_rate": 8.740148994516912e-05,
"loss": 0.0165,
"step": 5390
},
{
"grad_norm": 0.3353045582771301,
"learning_rate": 8.73465713290272e-05,
"loss": 0.0133,
"step": 5400
},
{
"grad_norm": 0.3345337212085724,
"learning_rate": 8.729155060878533e-05,
"loss": 0.0186,
"step": 5410
},
{
"grad_norm": 0.3502795100212097,
"learning_rate": 8.723642793486809e-05,
"loss": 0.017,
"step": 5420
},
{
"grad_norm": 0.33117562532424927,
"learning_rate": 8.718120345797873e-05,
"loss": 0.0156,
"step": 5430
},
{
"grad_norm": 0.3348385989665985,
"learning_rate": 8.712587732909889e-05,
"loss": 0.0131,
"step": 5440
},
{
"grad_norm": 0.31218665838241577,
"learning_rate": 8.707044969948806e-05,
"loss": 0.0139,
"step": 5450
},
{
"grad_norm": 0.36143720149993896,
"learning_rate": 8.701492072068329e-05,
"loss": 0.0162,
"step": 5460
},
{
"grad_norm": 0.397625207901001,
"learning_rate": 8.695929054449869e-05,
"loss": 0.0162,
"step": 5470
},
{
"grad_norm": 0.35156044363975525,
"learning_rate": 8.690355932302501e-05,
"loss": 0.0149,
"step": 5480
},
{
"grad_norm": 0.3862064778804779,
"learning_rate": 8.684772720862931e-05,
"loss": 0.0134,
"step": 5490
},
{
"grad_norm": 0.33415740728378296,
"learning_rate": 8.679179435395446e-05,
"loss": 0.0156,
"step": 5500
},
{
"grad_norm": 0.33985161781311035,
"learning_rate": 8.673576091191874e-05,
"loss": 0.0152,
"step": 5510
},
{
"grad_norm": 0.43412765860557556,
"learning_rate": 8.667962703571541e-05,
"loss": 0.0153,
"step": 5520
},
{
"grad_norm": 0.29241663217544556,
"learning_rate": 8.662339287881238e-05,
"loss": 0.0145,
"step": 5530
},
{
"grad_norm": 0.26985955238342285,
"learning_rate": 8.656705859495169e-05,
"loss": 0.0095,
"step": 5540
},
{
"grad_norm": 0.3288934528827667,
"learning_rate": 8.651062433814912e-05,
"loss": 0.0148,
"step": 5550
},
{
"grad_norm": 0.32042691111564636,
"learning_rate": 8.645409026269375e-05,
"loss": 0.0178,
"step": 5560
},
{
"grad_norm": 0.29201775789260864,
"learning_rate": 8.639745652314759e-05,
"loss": 0.0136,
"step": 5570
},
{
"grad_norm": 0.33705347776412964,
"learning_rate": 8.634072327434515e-05,
"loss": 0.0199,
"step": 5580
},
{
"grad_norm": 0.43964189291000366,
"learning_rate": 8.628389067139294e-05,
"loss": 0.0153,
"step": 5590
},
{
"grad_norm": 0.3852575421333313,
"learning_rate": 8.622695886966911e-05,
"loss": 0.0124,
"step": 5600
},
{
"grad_norm": 0.3601333200931549,
"learning_rate": 8.616992802482308e-05,
"loss": 0.0115,
"step": 5610
},
{
"grad_norm": 0.3712993562221527,
"learning_rate": 8.611279829277496e-05,
"loss": 0.0129,
"step": 5620
},
{
"grad_norm": 0.3430801033973694,
"learning_rate": 8.605556982971528e-05,
"loss": 0.0119,
"step": 5630
},
{
"grad_norm": 0.2783951163291931,
"learning_rate": 8.599824279210447e-05,
"loss": 0.0113,
"step": 5640
},
{
"grad_norm": 0.3604603111743927,
"learning_rate": 8.594081733667243e-05,
"loss": 0.016,
"step": 5650
},
{
"grad_norm": 0.4052552282810211,
"learning_rate": 8.58832936204182e-05,
"loss": 0.0141,
"step": 5660
},
{
"grad_norm": 0.2946913242340088,
"learning_rate": 8.582567180060942e-05,
"loss": 0.0188,
"step": 5670
},
{
"grad_norm": 0.29554295539855957,
"learning_rate": 8.576795203478194e-05,
"loss": 0.0147,
"step": 5680
},
{
"grad_norm": 0.30913200974464417,
"learning_rate": 8.571013448073939e-05,
"loss": 0.0155,
"step": 5690
},
{
"grad_norm": 0.3153333067893982,
"learning_rate": 8.565221929655275e-05,
"loss": 0.0116,
"step": 5700
},
{
"grad_norm": 0.26914530992507935,
"learning_rate": 8.559420664055992e-05,
"loss": 0.0125,
"step": 5710
},
{
"grad_norm": 0.3266845941543579,
"learning_rate": 8.553609667136532e-05,
"loss": 0.0127,
"step": 5720
},
{
"grad_norm": 0.36770594120025635,
"learning_rate": 8.547788954783936e-05,
"loss": 0.0132,
"step": 5730
},
{
"grad_norm": 0.3868075907230377,
"learning_rate": 8.541958542911808e-05,
"loss": 0.0137,
"step": 5740
},
{
"grad_norm": 0.3873762786388397,
"learning_rate": 8.536118447460275e-05,
"loss": 0.016,
"step": 5750
},
{
"grad_norm": 0.34997740387916565,
"learning_rate": 8.530268684395932e-05,
"loss": 0.012,
"step": 5760
},
{
"grad_norm": 0.36314913630485535,
"learning_rate": 8.524409269711807e-05,
"loss": 0.014,
"step": 5770
},
{
"grad_norm": 0.2800992727279663,
"learning_rate": 8.51854021942732e-05,
"loss": 0.0111,
"step": 5780
},
{
"grad_norm": 0.3715326488018036,
"learning_rate": 8.512661549588227e-05,
"loss": 0.0128,
"step": 5790
},
{
"grad_norm": 0.3508760631084442,
"learning_rate": 8.506773276266588e-05,
"loss": 0.0123,
"step": 5800
},
{
"grad_norm": 0.31156125664711,
"learning_rate": 8.500875415560721e-05,
"loss": 0.0104,
"step": 5810
},
{
"grad_norm": 0.28672730922698975,
"learning_rate": 8.494967983595144e-05,
"loss": 0.0138,
"step": 5820
},
{
"grad_norm": 0.2949328124523163,
"learning_rate": 8.489050996520558e-05,
"loss": 0.0111,
"step": 5830
},
{
"grad_norm": 0.3339660167694092,
"learning_rate": 8.483124470513775e-05,
"loss": 0.0125,
"step": 5840
},
{
"grad_norm": 0.37675192952156067,
"learning_rate": 8.477188421777692e-05,
"loss": 0.013,
"step": 5850
},
{
"grad_norm": 0.4156615436077118,
"learning_rate": 8.47124286654124e-05,
"loss": 0.0188,
"step": 5860
},
{
"grad_norm": 0.3377411365509033,
"learning_rate": 8.465287821059341e-05,
"loss": 0.0197,
"step": 5870
},
{
"grad_norm": 0.31554165482521057,
"learning_rate": 8.45932330161286e-05,
"loss": 0.0125,
"step": 5880
},
{
"grad_norm": 0.3891998529434204,
"learning_rate": 8.453349324508567e-05,
"loss": 0.0169,
"step": 5890
},
{
"grad_norm": 0.2835284173488617,
"learning_rate": 8.447365906079088e-05,
"loss": 0.0172,
"step": 5900
},
{
"grad_norm": 0.3825901746749878,
"learning_rate": 8.441373062682856e-05,
"loss": 0.0146,
"step": 5910
},
{
"grad_norm": 0.3294428884983063,
"learning_rate": 8.43537081070408e-05,
"loss": 0.0218,
"step": 5920
},
{
"grad_norm": 0.3541003167629242,
"learning_rate": 8.429359166552689e-05,
"loss": 0.0132,
"step": 5930
},
{
"grad_norm": 0.3192877173423767,
"learning_rate": 8.423338146664284e-05,
"loss": 0.014,
"step": 5940
},
{
"grad_norm": 0.3500727713108063,
"learning_rate": 8.417307767500107e-05,
"loss": 0.0115,
"step": 5950
},
{
"grad_norm": 0.3229285478591919,
"learning_rate": 8.411268045546983e-05,
"loss": 0.0121,
"step": 5960
},
{
"grad_norm": 0.4392866790294647,
"learning_rate": 8.405218997317281e-05,
"loss": 0.0125,
"step": 5970
},
{
"grad_norm": 0.3409421145915985,
"learning_rate": 8.399160639348869e-05,
"loss": 0.0115,
"step": 5980
},
{
"grad_norm": 0.3397701680660248,
"learning_rate": 8.393092988205065e-05,
"loss": 0.0144,
"step": 5990
},
{
"grad_norm": 0.2932409346103668,
"learning_rate": 8.387016060474597e-05,
"loss": 0.0155,
"step": 6000
},
{
"grad_norm": 0.30588042736053467,
"learning_rate": 8.380929872771551e-05,
"loss": 0.018,
"step": 6010
},
{
"grad_norm": 0.2569223642349243,
"learning_rate": 8.374834441735335e-05,
"loss": 0.0139,
"step": 6020
},
{
"grad_norm": 0.37832796573638916,
"learning_rate": 8.368729784030622e-05,
"loss": 0.012,
"step": 6030
},
{
"grad_norm": 0.3126446604728699,
"learning_rate": 8.362615916347315e-05,
"loss": 0.0138,
"step": 6040
},
{
"grad_norm": 0.2546840310096741,
"learning_rate": 8.356492855400493e-05,
"loss": 0.0122,
"step": 6050
},
{
"grad_norm": 0.3276226818561554,
"learning_rate": 8.350360617930371e-05,
"loss": 0.0118,
"step": 6060
},
{
"grad_norm": 0.3676457405090332,
"learning_rate": 8.344219220702255e-05,
"loss": 0.0117,
"step": 6070
},
{
"grad_norm": 0.35417577624320984,
"learning_rate": 8.338068680506485e-05,
"loss": 0.0104,
"step": 6080
},
{
"grad_norm": 0.26148155331611633,
"learning_rate": 8.33190901415841e-05,
"loss": 0.0142,
"step": 6090
},
{
"grad_norm": 0.29308485984802246,
"learning_rate": 8.325740238498317e-05,
"loss": 0.0111,
"step": 6100
},
{
"grad_norm": 0.41223078966140747,
"learning_rate": 8.319562370391406e-05,
"loss": 0.0135,
"step": 6110
},
{
"grad_norm": 0.38607892394065857,
"learning_rate": 8.31337542672773e-05,
"loss": 0.0131,
"step": 6120
},
{
"grad_norm": 0.3013926148414612,
"learning_rate": 8.307179424422158e-05,
"loss": 0.0115,
"step": 6130
},
{
"grad_norm": 0.34792882204055786,
"learning_rate": 8.300974380414327e-05,
"loss": 0.0165,
"step": 6140
},
{
"grad_norm": 0.3160726726055145,
"learning_rate": 8.294760311668586e-05,
"loss": 0.0133,
"step": 6150
},
{
"grad_norm": 0.3607368469238281,
"learning_rate": 8.288537235173961e-05,
"loss": 0.0152,
"step": 6160
},
{
"grad_norm": 0.2962538003921509,
"learning_rate": 8.282305167944108e-05,
"loss": 0.0146,
"step": 6170
},
{
"grad_norm": 0.3576897978782654,
"learning_rate": 8.276064127017262e-05,
"loss": 0.0132,
"step": 6180
},
{
"grad_norm": 0.3158738911151886,
"learning_rate": 8.269814129456189e-05,
"loss": 0.0165,
"step": 6190
},
{
"grad_norm": 0.3886716663837433,
"learning_rate": 8.263555192348143e-05,
"loss": 0.0164,
"step": 6200
},
{
"grad_norm": 0.33075031638145447,
"learning_rate": 8.257287332804819e-05,
"loss": 0.024,
"step": 6210
},
{
"grad_norm": 0.40429654717445374,
"learning_rate": 8.251010567962307e-05,
"loss": 0.0142,
"step": 6220
},
{
"grad_norm": 0.26305797696113586,
"learning_rate": 8.244724914981041e-05,
"loss": 0.0127,
"step": 6230
},
{
"grad_norm": 0.26651525497436523,
"learning_rate": 8.238430391045757e-05,
"loss": 0.0099,
"step": 6240
},
{
"grad_norm": 0.22140610218048096,
"learning_rate": 8.232127013365445e-05,
"loss": 0.0162,
"step": 6250
},
{
"grad_norm": 0.3048286736011505,
"learning_rate": 8.225814799173295e-05,
"loss": 0.0219,
"step": 6260
},
{
"grad_norm": 0.27734512090682983,
"learning_rate": 8.219493765726663e-05,
"loss": 0.012,
"step": 6270
},
{
"grad_norm": 0.3474031090736389,
"learning_rate": 8.21316393030701e-05,
"loss": 0.0122,
"step": 6280
},
{
"grad_norm": 0.3461661636829376,
"learning_rate": 8.206825310219865e-05,
"loss": 0.0142,
"step": 6290
},
{
"grad_norm": 0.3480895161628723,
"learning_rate": 8.200477922794776e-05,
"loss": 0.0155,
"step": 6300
},
{
"grad_norm": 0.29770898818969727,
"learning_rate": 8.194121785385256e-05,
"loss": 0.0122,
"step": 6310
},
{
"grad_norm": 0.27077922224998474,
"learning_rate": 8.187756915368741e-05,
"loss": 0.0114,
"step": 6320
},
{
"grad_norm": 0.37504443526268005,
"learning_rate": 8.181383330146544e-05,
"loss": 0.0123,
"step": 6330
},
{
"grad_norm": 0.27733808755874634,
"learning_rate": 8.175001047143804e-05,
"loss": 0.0152,
"step": 6340
},
{
"grad_norm": 0.2905326187610626,
"learning_rate": 8.168610083809438e-05,
"loss": 0.014,
"step": 6350
},
{
"grad_norm": 0.3680465519428253,
"learning_rate": 8.162210457616095e-05,
"loss": 0.0128,
"step": 6360
},
{
"grad_norm": 0.41437506675720215,
"learning_rate": 8.155802186060109e-05,
"loss": 0.0168,
"step": 6370
},
{
"grad_norm": 0.2748274505138397,
"learning_rate": 8.149385286661453e-05,
"loss": 0.014,
"step": 6380
},
{
"grad_norm": 0.27940356731414795,
"learning_rate": 8.14295977696368e-05,
"loss": 0.0106,
"step": 6390
},
{
"grad_norm": 0.3001856505870819,
"learning_rate": 8.13652567453389e-05,
"loss": 0.0161,
"step": 6400
},
{
"grad_norm": 0.3228931725025177,
"learning_rate": 8.130082996962676e-05,
"loss": 0.0108,
"step": 6410
},
{
"grad_norm": 0.37547504901885986,
"learning_rate": 8.123631761864068e-05,
"loss": 0.0108,
"step": 6420
},
{
"grad_norm": 0.3306344449520111,
"learning_rate": 8.1171719868755e-05,
"loss": 0.012,
"step": 6430
},
{
"grad_norm": 0.30303868651390076,
"learning_rate": 8.110703689657748e-05,
"loss": 0.0132,
"step": 6440
},
{
"grad_norm": 0.3219710886478424,
"learning_rate": 8.104226887894892e-05,
"loss": 0.0247,
"step": 6450
},
{
"grad_norm": 0.25483033061027527,
"learning_rate": 8.097741599294257e-05,
"loss": 0.0137,
"step": 6460
},
{
"grad_norm": 0.34695756435394287,
"learning_rate": 8.091247841586378e-05,
"loss": 0.0118,
"step": 6470
},
{
"grad_norm": 0.31917816400527954,
"learning_rate": 8.084745632524939e-05,
"loss": 0.0153,
"step": 6480
},
{
"grad_norm": 0.298793762922287,
"learning_rate": 8.07823498988673e-05,
"loss": 0.013,
"step": 6490
},
{
"grad_norm": 0.29808494448661804,
"learning_rate": 8.071715931471602e-05,
"loss": 0.0141,
"step": 6500
},
{
"grad_norm": 0.2866530120372772,
"learning_rate": 8.06518847510241e-05,
"loss": 0.013,
"step": 6510
},
{
"grad_norm": 0.2647181451320648,
"learning_rate": 8.058652638624971e-05,
"loss": 0.013,
"step": 6520
},
{
"grad_norm": 0.2521056830883026,
"learning_rate": 8.052108439908013e-05,
"loss": 0.0117,
"step": 6530
},
{
"grad_norm": 0.30903002619743347,
"learning_rate": 8.045555896843125e-05,
"loss": 0.0127,
"step": 6540
},
{
"grad_norm": 0.3295891284942627,
"learning_rate": 8.03899502734471e-05,
"loss": 0.0116,
"step": 6550
},
{
"grad_norm": 0.3196203410625458,
"learning_rate": 8.032425849349931e-05,
"loss": 0.0096,
"step": 6560
},
{
"grad_norm": 0.2777307331562042,
"learning_rate": 8.025848380818674e-05,
"loss": 0.0137,
"step": 6570
},
{
"grad_norm": 0.2528461217880249,
"learning_rate": 8.019262639733487e-05,
"loss": 0.012,
"step": 6580
},
{
"grad_norm": 0.29351767897605896,
"learning_rate": 8.012668644099531e-05,
"loss": 0.0116,
"step": 6590
},
{
"grad_norm": 0.3499806821346283,
"learning_rate": 8.006066411944542e-05,
"loss": 0.0145,
"step": 6600
},
{
"grad_norm": 0.3001391589641571,
"learning_rate": 7.999455961318769e-05,
"loss": 0.0107,
"step": 6610
},
{
"grad_norm": 0.34684231877326965,
"learning_rate": 7.992837310294932e-05,
"loss": 0.0174,
"step": 6620
},
{
"grad_norm": 0.29393184185028076,
"learning_rate": 7.986210476968167e-05,
"loss": 0.0139,
"step": 6630
},
{
"grad_norm": 0.29339519143104553,
"learning_rate": 7.97957547945599e-05,
"loss": 0.0174,
"step": 6640
},
{
"grad_norm": 0.4209054112434387,
"learning_rate": 7.972932335898226e-05,
"loss": 0.0115,
"step": 6650
},
{
"grad_norm": 0.28844037652015686,
"learning_rate": 7.966281064456975e-05,
"loss": 0.0121,
"step": 6660
},
{
"grad_norm": 0.3526553511619568,
"learning_rate": 7.959621683316563e-05,
"loss": 0.0119,
"step": 6670
},
{
"grad_norm": 0.42336228489875793,
"learning_rate": 7.952954210683481e-05,
"loss": 0.016,
"step": 6680
},
{
"grad_norm": 0.3053176701068878,
"learning_rate": 7.946278664786345e-05,
"loss": 0.0143,
"step": 6690
},
{
"grad_norm": 0.25422704219818115,
"learning_rate": 7.939595063875842e-05,
"loss": 0.0129,
"step": 6700
},
{
"grad_norm": 0.2565918266773224,
"learning_rate": 7.932903426224683e-05,
"loss": 0.0118,
"step": 6710
},
{
"grad_norm": 0.29806122183799744,
"learning_rate": 7.926203770127552e-05,
"loss": 0.0112,
"step": 6720
},
{
"grad_norm": 0.29177016019821167,
"learning_rate": 7.919496113901046e-05,
"loss": 0.0138,
"step": 6730
},
{
"grad_norm": 0.24790076911449432,
"learning_rate": 7.912780475883649e-05,
"loss": 0.0104,
"step": 6740
},
{
"grad_norm": 0.3190149664878845,
"learning_rate": 7.906056874435652e-05,
"loss": 0.0127,
"step": 6750
},
{
"grad_norm": 0.28794440627098083,
"learning_rate": 7.899325327939131e-05,
"loss": 0.0094,
"step": 6760
},
{
"grad_norm": 0.24909764528274536,
"learning_rate": 7.892585854797872e-05,
"loss": 0.0135,
"step": 6770
},
{
"grad_norm": 0.358511745929718,
"learning_rate": 7.88583847343734e-05,
"loss": 0.0119,
"step": 6780
},
{
"grad_norm": 0.2664150893688202,
"learning_rate": 7.879083202304616e-05,
"loss": 0.0137,
"step": 6790
},
{
"grad_norm": 0.24875850975513458,
"learning_rate": 7.872320059868355e-05,
"loss": 0.0144,
"step": 6800
},
{
"grad_norm": 0.2646051347255707,
"learning_rate": 7.865549064618729e-05,
"loss": 0.0101,
"step": 6810
},
{
"grad_norm": 0.3005359172821045,
"learning_rate": 7.858770235067381e-05,
"loss": 0.0189,
"step": 6820
},
{
"grad_norm": 0.30753788352012634,
"learning_rate": 7.851983589747374e-05,
"loss": 0.0128,
"step": 6830
},
{
"grad_norm": 0.3018791675567627,
"learning_rate": 7.845189147213133e-05,
"loss": 0.0103,
"step": 6840
},
{
"grad_norm": 0.27601730823516846,
"learning_rate": 7.838386926040407e-05,
"loss": 0.012,
"step": 6850
},
{
"grad_norm": 0.18579219281673431,
"learning_rate": 7.83157694482621e-05,
"loss": 0.0088,
"step": 6860
},
{
"grad_norm": 0.3077498972415924,
"learning_rate": 7.824759222188768e-05,
"loss": 0.0135,
"step": 6870
},
{
"grad_norm": 0.3342336118221283,
"learning_rate": 7.817933776767478e-05,
"loss": 0.0107,
"step": 6880
},
{
"grad_norm": 0.2645319998264313,
"learning_rate": 7.811100627222842e-05,
"loss": 0.0079,
"step": 6890
},
{
"grad_norm": 0.34547173976898193,
"learning_rate": 7.804259792236435e-05,
"loss": 0.0106,
"step": 6900
},
{
"grad_norm": 0.3139336407184601,
"learning_rate": 7.797411290510835e-05,
"loss": 0.0143,
"step": 6910
},
{
"grad_norm": 0.32347217202186584,
"learning_rate": 7.790555140769586e-05,
"loss": 0.0116,
"step": 6920
},
{
"grad_norm": 0.2938658595085144,
"learning_rate": 7.78369136175714e-05,
"loss": 0.0151,
"step": 6930
},
{
"grad_norm": 0.3143678605556488,
"learning_rate": 7.776819972238806e-05,
"loss": 0.0157,
"step": 6940
},
{
"grad_norm": 0.3636862337589264,
"learning_rate": 7.7699409910007e-05,
"loss": 0.012,
"step": 6950
},
{
"grad_norm": 0.2957841157913208,
"learning_rate": 7.763054436849694e-05,
"loss": 0.0137,
"step": 6960
},
{
"grad_norm": 0.3176383078098297,
"learning_rate": 7.756160328613364e-05,
"loss": 0.0138,
"step": 6970
},
{
"grad_norm": 0.38332003355026245,
"learning_rate": 7.749258685139942e-05,
"loss": 0.0103,
"step": 6980
},
{
"grad_norm": 0.3329831659793854,
"learning_rate": 7.742349525298253e-05,
"loss": 0.0118,
"step": 6990
},
{
"grad_norm": 0.373206228017807,
"learning_rate": 7.735432867977679e-05,
"loss": 0.0096,
"step": 7000
},
{
"grad_norm": 0.22941261529922485,
"learning_rate": 7.728508732088096e-05,
"loss": 0.0141,
"step": 7010
},
{
"grad_norm": 0.3203655183315277,
"learning_rate": 7.721577136559825e-05,
"loss": 0.0142,
"step": 7020
},
{
"grad_norm": 0.2580220103263855,
"learning_rate": 7.714638100343588e-05,
"loss": 0.0119,
"step": 7030
},
{
"grad_norm": 0.23703204095363617,
"learning_rate": 7.707691642410444e-05,
"loss": 0.0114,
"step": 7040
},
{
"grad_norm": 0.2933865189552307,
"learning_rate": 7.70073778175174e-05,
"loss": 0.0125,
"step": 7050
},
{
"grad_norm": 0.3587990403175354,
"learning_rate": 7.69377653737907e-05,
"loss": 0.0103,
"step": 7060
},
{
"grad_norm": 0.3031073212623596,
"learning_rate": 7.686807928324209e-05,
"loss": 0.0119,
"step": 7070
},
{
"grad_norm": 0.24994587898254395,
"learning_rate": 7.679831973639065e-05,
"loss": 0.0108,
"step": 7080
},
{
"grad_norm": 0.2641933262348175,
"learning_rate": 7.672848692395637e-05,
"loss": 0.0124,
"step": 7090
},
{
"grad_norm": 0.3555925488471985,
"learning_rate": 7.665858103685944e-05,
"loss": 0.0106,
"step": 7100
},
{
"grad_norm": 0.32579174637794495,
"learning_rate": 7.658860226621991e-05,
"loss": 0.0129,
"step": 7110
},
{
"grad_norm": 0.2926287353038788,
"learning_rate": 7.651855080335708e-05,
"loss": 0.0127,
"step": 7120
},
{
"grad_norm": 0.32705703377723694,
"learning_rate": 7.644842683978896e-05,
"loss": 0.0103,
"step": 7130
},
{
"grad_norm": 0.3596729636192322,
"learning_rate": 7.63782305672318e-05,
"loss": 0.0153,
"step": 7140
},
{
"grad_norm": 0.3586880564689636,
"learning_rate": 7.63079621775995e-05,
"loss": 0.0132,
"step": 7150
},
{
"grad_norm": 0.23977535963058472,
"learning_rate": 7.623762186300319e-05,
"loss": 0.0125,
"step": 7160
},
{
"grad_norm": 0.3540763854980469,
"learning_rate": 7.616720981575057e-05,
"loss": 0.0133,
"step": 7170
},
{
"grad_norm": 0.33983471989631653,
"learning_rate": 7.609672622834552e-05,
"loss": 0.0102,
"step": 7180
},
{
"grad_norm": 0.30762892961502075,
"learning_rate": 7.602617129348747e-05,
"loss": 0.0108,
"step": 7190
},
{
"grad_norm": 0.3010900020599365,
"learning_rate": 7.595554520407088e-05,
"loss": 0.0111,
"step": 7200
},
{
"grad_norm": 0.2769547402858734,
"learning_rate": 7.588484815318484e-05,
"loss": 0.0126,
"step": 7210
},
{
"grad_norm": 0.28370919823646545,
"learning_rate": 7.581408033411234e-05,
"loss": 0.0101,
"step": 7220
},
{
"grad_norm": 0.32081568241119385,
"learning_rate": 7.574324194032995e-05,
"loss": 0.009,
"step": 7230
},
{
"grad_norm": 0.2977130711078644,
"learning_rate": 7.567233316550705e-05,
"loss": 0.012,
"step": 7240
},
{
"grad_norm": 0.22645479440689087,
"learning_rate": 7.560135420350562e-05,
"loss": 0.0087,
"step": 7250
},
{
"grad_norm": 0.3486950397491455,
"learning_rate": 7.553030524837935e-05,
"loss": 0.0184,
"step": 7260
},
{
"grad_norm": 0.3366019129753113,
"learning_rate": 7.545918649437341e-05,
"loss": 0.0109,
"step": 7270
},
{
"grad_norm": 0.3164430856704712,
"learning_rate": 7.538799813592377e-05,
"loss": 0.0121,
"step": 7280
},
{
"grad_norm": 0.30747735500335693,
"learning_rate": 7.531674036765662e-05,
"loss": 0.0127,
"step": 7290
},
{
"grad_norm": 0.2376401573419571,
"learning_rate": 7.524541338438807e-05,
"loss": 0.0085,
"step": 7300
},
{
"grad_norm": 0.263528436422348,
"learning_rate": 7.517401738112328e-05,
"loss": 0.0116,
"step": 7310
},
{
"grad_norm": 0.35192421078681946,
"learning_rate": 7.510255255305628e-05,
"loss": 0.0097,
"step": 7320
},
{
"grad_norm": 0.3433808386325836,
"learning_rate": 7.503101909556911e-05,
"loss": 0.0106,
"step": 7330
},
{
"grad_norm": 0.2994978129863739,
"learning_rate": 7.495941720423154e-05,
"loss": 0.0101,
"step": 7340
},
{
"grad_norm": 0.2600700557231903,
"learning_rate": 7.488774707480042e-05,
"loss": 0.0116,
"step": 7350
},
{
"grad_norm": 0.3085675537586212,
"learning_rate": 7.481600890321911e-05,
"loss": 0.0088,
"step": 7360
},
{
"grad_norm": 0.2523610591888428,
"learning_rate": 7.474420288561708e-05,
"loss": 0.0149,
"step": 7370
},
{
"grad_norm": 0.3608851432800293,
"learning_rate": 7.467232921830921e-05,
"loss": 0.015,
"step": 7380
},
{
"grad_norm": 0.27163684368133545,
"learning_rate": 7.460038809779537e-05,
"loss": 0.0092,
"step": 7390
},
{
"grad_norm": 0.3165534436702728,
"learning_rate": 7.452837972075983e-05,
"loss": 0.0094,
"step": 7400
},
{
"grad_norm": 0.2729543149471283,
"learning_rate": 7.445630428407074e-05,
"loss": 0.0093,
"step": 7410
},
{
"grad_norm": 0.35123759508132935,
"learning_rate": 7.43841619847796e-05,
"loss": 0.0106,
"step": 7420
},
{
"grad_norm": 0.2810732424259186,
"learning_rate": 7.431195302012072e-05,
"loss": 0.0097,
"step": 7430
},
{
"grad_norm": 0.3775671720504761,
"learning_rate": 7.423967758751061e-05,
"loss": 0.0119,
"step": 7440
},
{
"grad_norm": 0.2957786023616791,
"learning_rate": 7.416733588454758e-05,
"loss": 0.0129,
"step": 7450
},
{
"grad_norm": 0.3434288799762726,
"learning_rate": 7.409492810901106e-05,
"loss": 0.0132,
"step": 7460
},
{
"grad_norm": 0.35347649455070496,
"learning_rate": 7.402245445886116e-05,
"loss": 0.0147,
"step": 7470
},
{
"grad_norm": 0.3285079598426819,
"learning_rate": 7.394991513223806e-05,
"loss": 0.011,
"step": 7480
},
{
"grad_norm": 0.29947003722190857,
"learning_rate": 7.38773103274615e-05,
"loss": 0.0132,
"step": 7490
},
{
"grad_norm": 0.28526970744132996,
"learning_rate": 7.380464024303028e-05,
"loss": 0.0105,
"step": 7500
},
{
"grad_norm": 0.2930798828601837,
"learning_rate": 7.373190507762162e-05,
"loss": 0.0127,
"step": 7510
},
{
"grad_norm": 0.2921172082424164,
"learning_rate": 7.365910503009066e-05,
"loss": 0.0156,
"step": 7520
},
{
"grad_norm": 0.3323417007923126,
"learning_rate": 7.358624029946996e-05,
"loss": 0.0088,
"step": 7530
},
{
"grad_norm": 0.29670819640159607,
"learning_rate": 7.351331108496893e-05,
"loss": 0.0095,
"step": 7540
},
{
"grad_norm": 0.32244303822517395,
"learning_rate": 7.344031758597325e-05,
"loss": 0.0137,
"step": 7550
},
{
"grad_norm": 0.25546425580978394,
"learning_rate": 7.336726000204435e-05,
"loss": 0.0101,
"step": 7560
},
{
"grad_norm": 0.30756881833076477,
"learning_rate": 7.32941385329189e-05,
"loss": 0.0129,
"step": 7570
},
{
"grad_norm": 0.2749859690666199,
"learning_rate": 7.322095337850816e-05,
"loss": 0.0095,
"step": 7580
},
{
"grad_norm": 0.41934898495674133,
"learning_rate": 7.314770473889758e-05,
"loss": 0.0154,
"step": 7590
},
{
"grad_norm": 0.2750692665576935,
"learning_rate": 7.307439281434615e-05,
"loss": 0.0089,
"step": 7600
},
{
"grad_norm": 0.28263887763023376,
"learning_rate": 7.300101780528585e-05,
"loss": 0.0127,
"step": 7610
},
{
"grad_norm": 0.2647198438644409,
"learning_rate": 7.292757991232117e-05,
"loss": 0.0155,
"step": 7620
},
{
"grad_norm": 0.30357304215431213,
"learning_rate": 7.285407933622848e-05,
"loss": 0.0122,
"step": 7630
},
{
"grad_norm": 0.2601131796836853,
"learning_rate": 7.278051627795557e-05,
"loss": 0.0173,
"step": 7640
},
{
"grad_norm": 0.2693704664707184,
"learning_rate": 7.270689093862105e-05,
"loss": 0.0123,
"step": 7650
},
{
"grad_norm": 0.3310806453227997,
"learning_rate": 7.263320351951374e-05,
"loss": 0.009,
"step": 7660
},
{
"grad_norm": 0.2853841483592987,
"learning_rate": 7.255945422209227e-05,
"loss": 0.0104,
"step": 7670
},
{
"grad_norm": 0.19990304112434387,
"learning_rate": 7.248564324798437e-05,
"loss": 0.0105,
"step": 7680
},
{
"grad_norm": 0.20870745182037354,
"learning_rate": 7.241177079898644e-05,
"loss": 0.0126,
"step": 7690
},
{
"grad_norm": 0.29264724254608154,
"learning_rate": 7.233783707706295e-05,
"loss": 0.0108,
"step": 7700
},
{
"grad_norm": 0.26418036222457886,
"learning_rate": 7.226384228434586e-05,
"loss": 0.0121,
"step": 7710
},
{
"grad_norm": 0.19485041499137878,
"learning_rate": 7.21897866231341e-05,
"loss": 0.0136,
"step": 7720
},
{
"grad_norm": 0.21631906926631927,
"learning_rate": 7.211567029589303e-05,
"loss": 0.0103,
"step": 7730
},
{
"grad_norm": 0.2685507833957672,
"learning_rate": 7.204149350525387e-05,
"loss": 0.0088,
"step": 7740
},
{
"grad_norm": 0.21937017142772675,
"learning_rate": 7.196725645401309e-05,
"loss": 0.0118,
"step": 7750
},
{
"grad_norm": 0.40596023201942444,
"learning_rate": 7.1892959345132e-05,
"loss": 0.0106,
"step": 7760
},
{
"grad_norm": 0.3677843511104584,
"learning_rate": 7.181860238173605e-05,
"loss": 0.0155,
"step": 7770
},
{
"grad_norm": 0.3440069854259491,
"learning_rate": 7.174418576711432e-05,
"loss": 0.012,
"step": 7780
},
{
"grad_norm": 0.3115421533584595,
"learning_rate": 7.1669709704719e-05,
"loss": 0.0129,
"step": 7790
},
{
"grad_norm": 0.2567780315876007,
"learning_rate": 7.159517439816481e-05,
"loss": 0.0186,
"step": 7800
},
{
"grad_norm": 0.26565659046173096,
"learning_rate": 7.152058005122842e-05,
"loss": 0.0117,
"step": 7810
},
{
"grad_norm": 0.2598077654838562,
"learning_rate": 7.144592686784793e-05,
"loss": 0.0105,
"step": 7820
},
{
"grad_norm": 0.3031388819217682,
"learning_rate": 7.137121505212229e-05,
"loss": 0.0103,
"step": 7830
},
{
"grad_norm": 0.334942102432251,
"learning_rate": 7.129644480831077e-05,
"loss": 0.0198,
"step": 7840
},
{
"grad_norm": 0.28533506393432617,
"learning_rate": 7.122161634083234e-05,
"loss": 0.0103,
"step": 7850
},
{
"grad_norm": 0.27979883551597595,
"learning_rate": 7.114672985426516e-05,
"loss": 0.0097,
"step": 7860
},
{
"grad_norm": 0.21115346252918243,
"learning_rate": 7.107178555334606e-05,
"loss": 0.0151,
"step": 7870
},
{
"grad_norm": 0.36340436339378357,
"learning_rate": 7.099678364296989e-05,
"loss": 0.0115,
"step": 7880
},
{
"grad_norm": 0.21899573504924774,
"learning_rate": 7.0921724328189e-05,
"loss": 0.0102,
"step": 7890
},
{
"grad_norm": 0.3320227861404419,
"learning_rate": 7.084660781421268e-05,
"loss": 0.0132,
"step": 7900
},
{
"grad_norm": 0.27939140796661377,
"learning_rate": 7.077143430640662e-05,
"loss": 0.0119,
"step": 7910
},
{
"grad_norm": 0.36499500274658203,
"learning_rate": 7.069620401029232e-05,
"loss": 0.0099,
"step": 7920
},
{
"grad_norm": 0.3523150682449341,
"learning_rate": 7.062091713154655e-05,
"loss": 0.0093,
"step": 7930
},
{
"grad_norm": 0.2645350396633148,
"learning_rate": 7.054557387600075e-05,
"loss": 0.0097,
"step": 7940
},
{
"grad_norm": 0.20298346877098083,
"learning_rate": 7.04701744496405e-05,
"loss": 0.0086,
"step": 7950
},
{
"grad_norm": 0.20196221768856049,
"learning_rate": 7.039471905860495e-05,
"loss": 0.0096,
"step": 7960
},
{
"grad_norm": 0.25855553150177,
"learning_rate": 7.031920790918628e-05,
"loss": 0.0097,
"step": 7970
},
{
"grad_norm": 0.28350019454956055,
"learning_rate": 7.024364120782906e-05,
"loss": 0.0105,
"step": 7980
},
{
"grad_norm": 0.28542742133140564,
"learning_rate": 7.016801916112978e-05,
"loss": 0.0137,
"step": 7990
},
{
"grad_norm": 0.24646037817001343,
"learning_rate": 7.009234197583623e-05,
"loss": 0.0103,
"step": 8000
},
{
"grad_norm": 0.26659995317459106,
"learning_rate": 7.001660985884692e-05,
"loss": 0.0088,
"step": 8010
},
{
"grad_norm": 0.3342621624469757,
"learning_rate": 6.994082301721063e-05,
"loss": 0.0114,
"step": 8020
},
{
"grad_norm": 0.2519735097885132,
"learning_rate": 6.986498165812563e-05,
"loss": 0.009,
"step": 8030
},
{
"grad_norm": 0.25942492485046387,
"learning_rate": 6.978908598893932e-05,
"loss": 0.0089,
"step": 8040
},
{
"grad_norm": 0.2715606391429901,
"learning_rate": 6.971313621714756e-05,
"loss": 0.0129,
"step": 8050
},
{
"grad_norm": 0.28495460748672485,
"learning_rate": 6.96371325503941e-05,
"loss": 0.0086,
"step": 8060
},
{
"grad_norm": 0.2748803198337555,
"learning_rate": 6.956107519647014e-05,
"loss": 0.0079,
"step": 8070
},
{
"grad_norm": 0.19981186091899872,
"learning_rate": 6.94849643633135e-05,
"loss": 0.011,
"step": 8080
},
{
"grad_norm": 0.25709986686706543,
"learning_rate": 6.940880025900834e-05,
"loss": 0.0091,
"step": 8090
},
{
"grad_norm": 0.31318721175193787,
"learning_rate": 6.933258309178438e-05,
"loss": 0.0128,
"step": 8100
},
{
"grad_norm": 0.2338728904724121,
"learning_rate": 6.925631307001646e-05,
"loss": 0.01,
"step": 8110
},
{
"grad_norm": 0.2807973027229309,
"learning_rate": 6.91799904022239e-05,
"loss": 0.0117,
"step": 8120
},
{
"grad_norm": 0.16702820360660553,
"learning_rate": 6.910361529706997e-05,
"loss": 0.0081,
"step": 8130
},
{
"grad_norm": 0.2894277572631836,
"learning_rate": 6.902718796336131e-05,
"loss": 0.0086,
"step": 8140
},
{
"grad_norm": 0.3167431950569153,
"learning_rate": 6.895070861004729e-05,
"loss": 0.0099,
"step": 8150
},
{
"grad_norm": 0.35957372188568115,
"learning_rate": 6.887417744621956e-05,
"loss": 0.0103,
"step": 8160
},
{
"grad_norm": 0.2503855526447296,
"learning_rate": 6.87975946811114e-05,
"loss": 0.0088,
"step": 8170
},
{
"grad_norm": 0.24923115968704224,
"learning_rate": 6.872096052409718e-05,
"loss": 0.0101,
"step": 8180
},
{
"grad_norm": 0.27979594469070435,
"learning_rate": 6.864427518469174e-05,
"loss": 0.0088,
"step": 8190
},
{
"grad_norm": 0.22051957249641418,
"learning_rate": 6.856753887254986e-05,
"loss": 0.009,
"step": 8200
},
{
"grad_norm": 0.2485746592283249,
"learning_rate": 6.849075179746572e-05,
"loss": 0.0132,
"step": 8210
},
{
"grad_norm": 0.29662564396858215,
"learning_rate": 6.841391416937221e-05,
"loss": 0.0109,
"step": 8220
},
{
"grad_norm": 0.22791483998298645,
"learning_rate": 6.833702619834053e-05,
"loss": 0.0119,
"step": 8230
},
{
"grad_norm": 0.2398047000169754,
"learning_rate": 6.82600880945794e-05,
"loss": 0.0114,
"step": 8240
},
{
"grad_norm": 0.28467315435409546,
"learning_rate": 6.818310006843468e-05,
"loss": 0.0109,
"step": 8250
},
{
"grad_norm": 0.39719972014427185,
"learning_rate": 6.810606233038868e-05,
"loss": 0.0125,
"step": 8260
},
{
"grad_norm": 0.367841899394989,
"learning_rate": 6.802897509105966e-05,
"loss": 0.0093,
"step": 8270
},
{
"grad_norm": 0.28418073058128357,
"learning_rate": 6.79518385612012e-05,
"loss": 0.011,
"step": 8280
},
{
"grad_norm": 0.33649322390556335,
"learning_rate": 6.787465295170157e-05,
"loss": 0.0111,
"step": 8290
},
{
"grad_norm": 0.20563358068466187,
"learning_rate": 6.779741847358332e-05,
"loss": 0.0101,
"step": 8300
},
{
"grad_norm": 0.2554636299610138,
"learning_rate": 6.772013533800256e-05,
"loss": 0.0078,
"step": 8310
},
{
"grad_norm": 0.35546278953552246,
"learning_rate": 6.764280375624843e-05,
"loss": 0.0112,
"step": 8320
},
{
"grad_norm": 0.33014950156211853,
"learning_rate": 6.756542393974252e-05,
"loss": 0.0136,
"step": 8330
},
{
"grad_norm": 0.34563302993774414,
"learning_rate": 6.748799610003828e-05,
"loss": 0.0111,
"step": 8340
},
{
"grad_norm": 0.28476187586784363,
"learning_rate": 6.741052044882048e-05,
"loss": 0.0133,
"step": 8350
},
{
"grad_norm": 0.21680086851119995,
"learning_rate": 6.73329971979046e-05,
"loss": 0.01,
"step": 8360
},
{
"grad_norm": 0.24411574006080627,
"learning_rate": 6.725542655923625e-05,
"loss": 0.0101,
"step": 8370
},
{
"grad_norm": 0.2673936188220978,
"learning_rate": 6.717780874489057e-05,
"loss": 0.0142,
"step": 8380
},
{
"grad_norm": 0.22214150428771973,
"learning_rate": 6.710014396707172e-05,
"loss": 0.0081,
"step": 8390
},
{
"grad_norm": 0.21424426138401031,
"learning_rate": 6.702243243811221e-05,
"loss": 0.0094,
"step": 8400
},
{
"grad_norm": 0.326800137758255,
"learning_rate": 6.694467437047244e-05,
"loss": 0.0109,
"step": 8410
},
{
"grad_norm": 0.33343732357025146,
"learning_rate": 6.686686997673997e-05,
"loss": 0.0073,
"step": 8420
},
{
"grad_norm": 0.2529543936252594,
"learning_rate": 6.678901946962903e-05,
"loss": 0.0091,
"step": 8430
},
{
"grad_norm": 0.33415693044662476,
"learning_rate": 6.671112306197996e-05,
"loss": 0.0093,
"step": 8440
},
{
"grad_norm": 0.21618202328681946,
"learning_rate": 6.663318096675854e-05,
"loss": 0.0109,
"step": 8450
},
{
"grad_norm": 0.17679205536842346,
"learning_rate": 6.655519339705552e-05,
"loss": 0.0081,
"step": 8460
},
{
"grad_norm": 0.31180447340011597,
"learning_rate": 6.647716056608588e-05,
"loss": 0.0075,
"step": 8470
},
{
"grad_norm": 0.3059850335121155,
"learning_rate": 6.639908268718843e-05,
"loss": 0.0104,
"step": 8480
},
{
"grad_norm": 0.2953120768070221,
"learning_rate": 6.632095997382514e-05,
"loss": 0.0136,
"step": 8490
},
{
"grad_norm": 0.26256608963012695,
"learning_rate": 6.624279263958047e-05,
"loss": 0.0093,
"step": 8500
},
{
"grad_norm": 0.31494539976119995,
"learning_rate": 6.616458089816097e-05,
"loss": 0.0106,
"step": 8510
},
{
"grad_norm": 0.376949280500412,
"learning_rate": 6.608632496339454e-05,
"loss": 0.0099,
"step": 8520
},
{
"grad_norm": 0.2433367669582367,
"learning_rate": 6.600802504922988e-05,
"loss": 0.0102,
"step": 8530
},
{
"grad_norm": 0.37388041615486145,
"learning_rate": 6.592968136973604e-05,
"loss": 0.0105,
"step": 8540
},
{
"grad_norm": 0.28184112906455994,
"learning_rate": 6.585129413910159e-05,
"loss": 0.0097,
"step": 8550
},
{
"grad_norm": 0.2922300696372986,
"learning_rate": 6.577286357163424e-05,
"loss": 0.01,
"step": 8560
},
{
"grad_norm": 0.2782540023326874,
"learning_rate": 6.569438988176018e-05,
"loss": 0.0082,
"step": 8570
},
{
"grad_norm": 0.27248328924179077,
"learning_rate": 6.561587328402347e-05,
"loss": 0.0088,
"step": 8580
},
{
"grad_norm": 0.24880534410476685,
"learning_rate": 6.553731399308549e-05,
"loss": 0.0079,
"step": 8590
},
{
"grad_norm": 0.2515351176261902,
"learning_rate": 6.545871222372436e-05,
"loss": 0.0073,
"step": 8600
},
{
"grad_norm": 0.18304027616977692,
"learning_rate": 6.538006819083426e-05,
"loss": 0.0071,
"step": 8610
},
{
"grad_norm": 0.3344535231590271,
"learning_rate": 6.530138210942505e-05,
"loss": 0.0101,
"step": 8620
},
{
"grad_norm": 0.2146245390176773,
"learning_rate": 6.522265419462141e-05,
"loss": 0.0121,
"step": 8630
},
{
"grad_norm": 0.24218611419200897,
"learning_rate": 6.514388466166248e-05,
"loss": 0.0096,
"step": 8640
},
{
"grad_norm": 0.16334691643714905,
"learning_rate": 6.506507372590119e-05,
"loss": 0.0079,
"step": 8650
},
{
"grad_norm": 0.33363232016563416,
"learning_rate": 6.498622160280355e-05,
"loss": 0.0098,
"step": 8660
},
{
"grad_norm": 0.35225972533226013,
"learning_rate": 6.490732850794832e-05,
"loss": 0.0103,
"step": 8670
},
{
"grad_norm": 0.3227727711200714,
"learning_rate": 6.482839465702616e-05,
"loss": 0.0107,
"step": 8680
},
{
"grad_norm": 0.2620507776737213,
"learning_rate": 6.474942026583923e-05,
"loss": 0.0104,
"step": 8690
},
{
"grad_norm": 0.2854481041431427,
"learning_rate": 6.467040555030052e-05,
"loss": 0.0153,
"step": 8700
},
{
"grad_norm": 0.24487437307834625,
"learning_rate": 6.459135072643321e-05,
"loss": 0.0099,
"step": 8710
},
{
"grad_norm": 0.23461481928825378,
"learning_rate": 6.451225601037019e-05,
"loss": 0.0109,
"step": 8720
},
{
"grad_norm": 0.2263409048318863,
"learning_rate": 6.443312161835338e-05,
"loss": 0.0105,
"step": 8730
},
{
"grad_norm": 0.3112694025039673,
"learning_rate": 6.43539477667332e-05,
"loss": 0.0116,
"step": 8740
},
{
"grad_norm": 0.2899706959724426,
"learning_rate": 6.427473467196793e-05,
"loss": 0.0109,
"step": 8750
},
{
"grad_norm": 0.319865882396698,
"learning_rate": 6.419548255062315e-05,
"loss": 0.0142,
"step": 8760
},
{
"grad_norm": 0.2569391429424286,
"learning_rate": 6.411619161937112e-05,
"loss": 0.0185,
"step": 8770
},
{
"grad_norm": 0.2740200161933899,
"learning_rate": 6.403686209499022e-05,
"loss": 0.0101,
"step": 8780
},
{
"grad_norm": 0.29832252860069275,
"learning_rate": 6.395749419436437e-05,
"loss": 0.0094,
"step": 8790
},
{
"grad_norm": 0.275097519159317,
"learning_rate": 6.387808813448234e-05,
"loss": 0.0106,
"step": 8800
},
{
"grad_norm": 0.32685816287994385,
"learning_rate": 6.37986441324373e-05,
"loss": 0.0086,
"step": 8810
},
{
"grad_norm": 0.28648194670677185,
"learning_rate": 6.37191624054261e-05,
"loss": 0.0117,
"step": 8820
},
{
"grad_norm": 0.2401561737060547,
"learning_rate": 6.363964317074872e-05,
"loss": 0.0081,
"step": 8830
},
{
"grad_norm": 0.2832534909248352,
"learning_rate": 6.356008664580776e-05,
"loss": 0.0136,
"step": 8840
},
{
"grad_norm": 0.2052382379770279,
"learning_rate": 6.348049304810771e-05,
"loss": 0.0097,
"step": 8850
},
{
"grad_norm": 0.3278440833091736,
"learning_rate": 6.340086259525442e-05,
"loss": 0.0083,
"step": 8860
},
{
"grad_norm": 0.34554144740104675,
"learning_rate": 6.332119550495448e-05,
"loss": 0.0098,
"step": 8870
},
{
"grad_norm": 0.2610031068325043,
"learning_rate": 6.324149199501473e-05,
"loss": 0.01,
"step": 8880
},
{
"grad_norm": 0.22511707246303558,
"learning_rate": 6.316175228334146e-05,
"loss": 0.0092,
"step": 8890
},
{
"grad_norm": 0.2637081742286682,
"learning_rate": 6.308197658794003e-05,
"loss": 0.0128,
"step": 8900
},
{
"grad_norm": 0.31135818362236023,
"learning_rate": 6.300216512691417e-05,
"loss": 0.008,
"step": 8910
},
{
"grad_norm": 0.23880526423454285,
"learning_rate": 6.292231811846532e-05,
"loss": 0.0101,
"step": 8920
},
{
"grad_norm": 0.25867730379104614,
"learning_rate": 6.284243578089217e-05,
"loss": 0.0088,
"step": 8930
},
{
"grad_norm": 0.26295626163482666,
"learning_rate": 6.276251833258999e-05,
"loss": 0.0081,
"step": 8940
},
{
"grad_norm": 0.28615128993988037,
"learning_rate": 6.268256599205003e-05,
"loss": 0.0104,
"step": 8950
},
{
"grad_norm": 0.292758971452713,
"learning_rate": 6.260257897785892e-05,
"loss": 0.009,
"step": 8960
},
{
"grad_norm": 0.2305050492286682,
"learning_rate": 6.252255750869811e-05,
"loss": 0.01,
"step": 8970
},
{
"grad_norm": 0.2941057085990906,
"learning_rate": 6.244250180334325e-05,
"loss": 0.0136,
"step": 8980
},
{
"grad_norm": 0.3271690607070923,
"learning_rate": 6.236241208066356e-05,
"loss": 0.0111,
"step": 8990
},
{
"grad_norm": 0.2185642123222351,
"learning_rate": 6.228228855962133e-05,
"loss": 0.0074,
"step": 9000
},
{
"grad_norm": 0.26744088530540466,
"learning_rate": 6.220213145927115e-05,
"loss": 0.0067,
"step": 9010
},
{
"grad_norm": 0.20686663687229156,
"learning_rate": 6.212194099875951e-05,
"loss": 0.0094,
"step": 9020
},
{
"grad_norm": 0.2725589871406555,
"learning_rate": 6.204171739732405e-05,
"loss": 0.0083,
"step": 9030
},
{
"grad_norm": 0.3449211120605469,
"learning_rate": 6.196146087429303e-05,
"loss": 0.0085,
"step": 9040
},
{
"grad_norm": 0.24136734008789062,
"learning_rate": 6.188117164908474e-05,
"loss": 0.0094,
"step": 9050
},
{
"grad_norm": 0.2191270887851715,
"learning_rate": 6.180084994120684e-05,
"loss": 0.011,
"step": 9060
},
{
"grad_norm": 0.4127255082130432,
"learning_rate": 6.17204959702558e-05,
"loss": 0.0115,
"step": 9070
},
{
"grad_norm": 0.25778821110725403,
"learning_rate": 6.164010995591635e-05,
"loss": 0.0073,
"step": 9080
},
{
"grad_norm": 0.19585411250591278,
"learning_rate": 6.155969211796076e-05,
"loss": 0.0093,
"step": 9090
},
{
"grad_norm": 0.1971423476934433,
"learning_rate": 6.147924267624829e-05,
"loss": 0.0065,
"step": 9100
},
{
"grad_norm": 0.18513402342796326,
"learning_rate": 6.13987618507247e-05,
"loss": 0.0082,
"step": 9110
},
{
"grad_norm": 0.30468112230300903,
"learning_rate": 6.131824986142147e-05,
"loss": 0.0108,
"step": 9120
},
{
"grad_norm": 0.2643079161643982,
"learning_rate": 6.123770692845529e-05,
"loss": 0.0081,
"step": 9130
},
{
"grad_norm": 0.35840684175491333,
"learning_rate": 6.11571332720275e-05,
"loss": 0.0081,
"step": 9140
},
{
"grad_norm": 0.34239524602890015,
"learning_rate": 6.107652911242336e-05,
"loss": 0.0118,
"step": 9150
},
{
"grad_norm": 0.3136473596096039,
"learning_rate": 6.0995894670011586e-05,
"loss": 0.0128,
"step": 9160
},
{
"grad_norm": 0.3185141980648041,
"learning_rate": 6.091523016524368e-05,
"loss": 0.0133,
"step": 9170
},
{
"grad_norm": 0.2437521517276764,
"learning_rate": 6.083453581865328e-05,
"loss": 0.0116,
"step": 9180
},
{
"grad_norm": 0.24852575361728668,
"learning_rate": 6.075381185085568e-05,
"loss": 0.0101,
"step": 9190
},
{
"grad_norm": 0.21035079658031464,
"learning_rate": 6.067305848254709e-05,
"loss": 0.0089,
"step": 9200
},
{
"grad_norm": 0.19124074280261993,
"learning_rate": 6.059227593450418e-05,
"loss": 0.01,
"step": 9210
},
{
"grad_norm": 0.22028234601020813,
"learning_rate": 6.051146442758333e-05,
"loss": 0.0128,
"step": 9220
},
{
"grad_norm": 0.2855907082557678,
"learning_rate": 6.043062418272012e-05,
"loss": 0.0103,
"step": 9230
},
{
"grad_norm": 0.23253290355205536,
"learning_rate": 6.0349755420928666e-05,
"loss": 0.0075,
"step": 9240
},
{
"grad_norm": 0.22832125425338745,
"learning_rate": 6.0268858363301105e-05,
"loss": 0.0074,
"step": 9250
},
{
"grad_norm": 0.22071580588817596,
"learning_rate": 6.018793323100689e-05,
"loss": 0.0106,
"step": 9260
},
{
"grad_norm": 0.3454406261444092,
"learning_rate": 6.0106980245292255e-05,
"loss": 0.011,
"step": 9270
},
{
"grad_norm": 0.3467009663581848,
"learning_rate": 6.002599962747957e-05,
"loss": 0.0087,
"step": 9280
},
{
"grad_norm": 0.2289619743824005,
"learning_rate": 5.994499159896673e-05,
"loss": 0.0068,
"step": 9290
},
{
"grad_norm": 0.2502879202365875,
"learning_rate": 5.9863956381226607e-05,
"loss": 0.0138,
"step": 9300
},
{
"grad_norm": 0.23016954958438873,
"learning_rate": 5.9782894195806394e-05,
"loss": 0.0088,
"step": 9310
},
{
"grad_norm": 0.3265341520309448,
"learning_rate": 5.9701805264327004e-05,
"loss": 0.0122,
"step": 9320
},
{
"grad_norm": 0.2779223322868347,
"learning_rate": 5.96206898084825e-05,
"loss": 0.0073,
"step": 9330
},
{
"grad_norm": 0.20430560410022736,
"learning_rate": 5.953954805003942e-05,
"loss": 0.0106,
"step": 9340
},
{
"grad_norm": 0.22642415761947632,
"learning_rate": 5.945838021083623e-05,
"loss": 0.0083,
"step": 9350
},
{
"grad_norm": 0.22150662541389465,
"learning_rate": 5.9377186512782714e-05,
"loss": 0.0065,
"step": 9360
},
{
"grad_norm": 0.364218145608902,
"learning_rate": 5.929596717785935e-05,
"loss": 0.0089,
"step": 9370
},
{
"grad_norm": 0.24294275045394897,
"learning_rate": 5.921472242811668e-05,
"loss": 0.0079,
"step": 9380
},
{
"grad_norm": 0.23857471346855164,
"learning_rate": 5.913345248567475e-05,
"loss": 0.0125,
"step": 9390
},
{
"grad_norm": 0.17391999065876007,
"learning_rate": 5.905215757272248e-05,
"loss": 0.0162,
"step": 9400
},
{
"grad_norm": 0.2742446959018707,
"learning_rate": 5.897083791151706e-05,
"loss": 0.009,
"step": 9410
},
{
"grad_norm": 0.27407529950141907,
"learning_rate": 5.888949372438336e-05,
"loss": 0.0104,
"step": 9420
},
{
"grad_norm": 0.1971682459115982,
"learning_rate": 5.8808125233713255e-05,
"loss": 0.008,
"step": 9430
},
{
"grad_norm": 0.17385496199131012,
"learning_rate": 5.872673266196509e-05,
"loss": 0.007,
"step": 9440
},
{
"grad_norm": 0.2608735263347626,
"learning_rate": 5.864531623166305e-05,
"loss": 0.0083,
"step": 9450
},
{
"grad_norm": 0.2396305799484253,
"learning_rate": 5.856387616539656e-05,
"loss": 0.0082,
"step": 9460
},
{
"grad_norm": 0.26889148354530334,
"learning_rate": 5.848241268581967e-05,
"loss": 0.0085,
"step": 9470
},
{
"grad_norm": 0.26665395498275757,
"learning_rate": 5.840092601565037e-05,
"loss": 0.0094,
"step": 9480
},
{
"grad_norm": 0.23885580897331238,
"learning_rate": 5.8319416377670144e-05,
"loss": 0.008,
"step": 9490
},
{
"grad_norm": 0.2632520794868469,
"learning_rate": 5.82378839947232e-05,
"loss": 0.0098,
"step": 9500
},
{
"grad_norm": 0.3209339678287506,
"learning_rate": 5.815632908971599e-05,
"loss": 0.0106,
"step": 9510
},
{
"grad_norm": 0.282398521900177,
"learning_rate": 5.80747518856165e-05,
"loss": 0.0099,
"step": 9520
},
{
"grad_norm": 0.3100825250148773,
"learning_rate": 5.799315260545367e-05,
"loss": 0.0134,
"step": 9530
},
{
"grad_norm": 0.2550257444381714,
"learning_rate": 5.791153147231686e-05,
"loss": 0.0135,
"step": 9540
},
{
"grad_norm": 0.3137185275554657,
"learning_rate": 5.782988870935509e-05,
"loss": 0.008,
"step": 9550
},
{
"grad_norm": 0.23910042643547058,
"learning_rate": 5.774822453977657e-05,
"loss": 0.0087,
"step": 9560
},
{
"grad_norm": 0.21105986833572388,
"learning_rate": 5.7666539186848036e-05,
"loss": 0.009,
"step": 9570
},
{
"grad_norm": 0.2725152373313904,
"learning_rate": 5.758483287389411e-05,
"loss": 0.0143,
"step": 9580
},
{
"grad_norm": 0.2424250990152359,
"learning_rate": 5.7503105824296735e-05,
"loss": 0.0122,
"step": 9590
},
{
"grad_norm": 0.20699156820774078,
"learning_rate": 5.742135826149453e-05,
"loss": 0.0092,
"step": 9600
},
{
"grad_norm": 0.19423116743564606,
"learning_rate": 5.7339590408982223e-05,
"loss": 0.0065,
"step": 9610
},
{
"grad_norm": 0.2932196259498596,
"learning_rate": 5.725780249031e-05,
"loss": 0.0091,
"step": 9620
},
{
"grad_norm": 0.3803527057170868,
"learning_rate": 5.717599472908292e-05,
"loss": 0.0109,
"step": 9630
},
{
"grad_norm": 0.3079898953437805,
"learning_rate": 5.7094167348960237e-05,
"loss": 0.0084,
"step": 9640
},
{
"grad_norm": 0.2345152646303177,
"learning_rate": 5.7012320573654945e-05,
"loss": 0.0093,
"step": 9650
},
{
"grad_norm": 0.19556953012943268,
"learning_rate": 5.693045462693295e-05,
"loss": 0.008,
"step": 9660
},
{
"grad_norm": 0.22584684193134308,
"learning_rate": 5.684856973261266e-05,
"loss": 0.0073,
"step": 9670
},
{
"grad_norm": 0.2571251094341278,
"learning_rate": 5.6766666114564215e-05,
"loss": 0.0099,
"step": 9680
},
{
"grad_norm": 0.3253817856311798,
"learning_rate": 5.668474399670899e-05,
"loss": 0.0089,
"step": 9690
},
{
"grad_norm": 0.23601661622524261,
"learning_rate": 5.660280360301896e-05,
"loss": 0.0085,
"step": 9700
},
{
"grad_norm": 0.18850122392177582,
"learning_rate": 5.652084515751599e-05,
"loss": 0.0063,
"step": 9710
},
{
"grad_norm": 0.2345300316810608,
"learning_rate": 5.643886888427137e-05,
"loss": 0.009,
"step": 9720
},
{
"grad_norm": 0.21616721153259277,
"learning_rate": 5.6356875007405074e-05,
"loss": 0.0105,
"step": 9730
},
{
"grad_norm": 0.23095466196537018,
"learning_rate": 5.627486375108525e-05,
"loss": 0.0113,
"step": 9740
},
{
"grad_norm": 0.20307700335979462,
"learning_rate": 5.619283533952754e-05,
"loss": 0.0098,
"step": 9750
},
{
"grad_norm": 0.3022615313529968,
"learning_rate": 5.6110789996994474e-05,
"loss": 0.0087,
"step": 9760
},
{
"grad_norm": 0.2918921411037445,
"learning_rate": 5.602872794779491e-05,
"loss": 0.0115,
"step": 9770
},
{
"grad_norm": 0.3074280023574829,
"learning_rate": 5.594664941628334e-05,
"loss": 0.0087,
"step": 9780
},
{
"grad_norm": 0.2652987241744995,
"learning_rate": 5.5864554626859324e-05,
"loss": 0.0102,
"step": 9790
},
{
"grad_norm": 0.2364000827074051,
"learning_rate": 5.578244380396691e-05,
"loss": 0.0071,
"step": 9800
},
{
"grad_norm": 0.25912168622016907,
"learning_rate": 5.570031717209394e-05,
"loss": 0.0073,
"step": 9810
},
{
"grad_norm": 0.2132926732301712,
"learning_rate": 5.561817495577147e-05,
"loss": 0.0081,
"step": 9820
},
{
"grad_norm": 0.3217675983905792,
"learning_rate": 5.5536017379573215e-05,
"loss": 0.0095,
"step": 9830
},
{
"grad_norm": 0.21416251361370087,
"learning_rate": 5.545384466811483e-05,
"loss": 0.0088,
"step": 9840
},
{
"grad_norm": 0.30158206820487976,
"learning_rate": 5.5371657046053384e-05,
"loss": 0.0127,
"step": 9850
},
{
"grad_norm": 0.17979058623313904,
"learning_rate": 5.528945473808669e-05,
"loss": 0.0083,
"step": 9860
},
{
"grad_norm": 0.1995510756969452,
"learning_rate": 5.520723796895272e-05,
"loss": 0.0063,
"step": 9870
},
{
"grad_norm": 0.21220991015434265,
"learning_rate": 5.512500696342897e-05,
"loss": 0.0077,
"step": 9880
},
{
"grad_norm": 0.3290112316608429,
"learning_rate": 5.504276194633188e-05,
"loss": 0.0107,
"step": 9890
},
{
"grad_norm": 0.28488659858703613,
"learning_rate": 5.49605031425162e-05,
"loss": 0.0081,
"step": 9900
},
{
"grad_norm": 0.32763949036598206,
"learning_rate": 5.487823077687434e-05,
"loss": 0.0135,
"step": 9910
},
{
"grad_norm": 0.23580661416053772,
"learning_rate": 5.4795945074335806e-05,
"loss": 0.0094,
"step": 9920
},
{
"grad_norm": 0.22253672778606415,
"learning_rate": 5.471364625986657e-05,
"loss": 0.008,
"step": 9930
},
{
"grad_norm": 0.20649607479572296,
"learning_rate": 5.463133455846845e-05,
"loss": 0.0064,
"step": 9940
},
{
"grad_norm": 0.21485736966133118,
"learning_rate": 5.4549010195178505e-05,
"loss": 0.0142,
"step": 9950
},
{
"grad_norm": 0.22330300509929657,
"learning_rate": 5.446667339506838e-05,
"loss": 0.008,
"step": 9960
},
{
"grad_norm": 0.2618495523929596,
"learning_rate": 5.4384324383243756e-05,
"loss": 0.0099,
"step": 9970
},
{
"grad_norm": 0.28015658259391785,
"learning_rate": 5.430196338484368e-05,
"loss": 0.011,
"step": 9980
},
{
"grad_norm": 0.20648691058158875,
"learning_rate": 5.4219590625039975e-05,
"loss": 0.0083,
"step": 9990
},
{
"grad_norm": 0.22049671411514282,
"learning_rate": 5.413720632903664e-05,
"loss": 0.008,
"step": 10000
},
{
"grad_norm": 0.26092201471328735,
"learning_rate": 5.405481072206917e-05,
"loss": 0.0068,
"step": 10010
},
{
"grad_norm": 0.2266596406698227,
"learning_rate": 5.397240402940402e-05,
"loss": 0.0107,
"step": 10020
},
{
"grad_norm": 0.2024102360010147,
"learning_rate": 5.388998647633794e-05,
"loss": 0.0081,
"step": 10030
},
{
"grad_norm": 0.29881739616394043,
"learning_rate": 5.380755828819737e-05,
"loss": 0.0109,
"step": 10040
},
{
"grad_norm": 0.31106695532798767,
"learning_rate": 5.3725119690337846e-05,
"loss": 0.0079,
"step": 10050
},
{
"grad_norm": 0.22892136871814728,
"learning_rate": 5.3642670908143324e-05,
"loss": 0.007,
"step": 10060
},
{
"grad_norm": 0.30826979875564575,
"learning_rate": 5.356021216702562e-05,
"loss": 0.0073,
"step": 10070
},
{
"grad_norm": 0.27872055768966675,
"learning_rate": 5.347774369242381e-05,
"loss": 0.0075,
"step": 10080
},
{
"grad_norm": 0.3187792897224426,
"learning_rate": 5.3395265709803545e-05,
"loss": 0.0072,
"step": 10090
},
{
"grad_norm": 0.24945269525051117,
"learning_rate": 5.331277844465647e-05,
"loss": 0.0121,
"step": 10100
},
{
"grad_norm": 0.21182595193386078,
"learning_rate": 5.323028212249963e-05,
"loss": 0.012,
"step": 10110
},
{
"grad_norm": 0.28724876046180725,
"learning_rate": 5.314777696887481e-05,
"loss": 0.0088,
"step": 10120
},
{
"grad_norm": 0.2590867877006531,
"learning_rate": 5.306526320934796e-05,
"loss": 0.0099,
"step": 10130
},
{
"grad_norm": 0.2745634615421295,
"learning_rate": 5.298274106950854e-05,
"loss": 0.007,
"step": 10140
},
{
"grad_norm": 0.22778289020061493,
"learning_rate": 5.290021077496893e-05,
"loss": 0.0081,
"step": 10150
},
{
"grad_norm": 0.2155156284570694,
"learning_rate": 5.2817672551363816e-05,
"loss": 0.0073,
"step": 10160
},
{
"grad_norm": 0.2104647010564804,
"learning_rate": 5.273512662434952e-05,
"loss": 0.0085,
"step": 10170
},
{
"grad_norm": 0.2083871066570282,
"learning_rate": 5.265257321960349e-05,
"loss": 0.0093,
"step": 10180
},
{
"grad_norm": 0.25575950741767883,
"learning_rate": 5.257001256282357e-05,
"loss": 0.0086,
"step": 10190
},
{
"grad_norm": 0.2434515506029129,
"learning_rate": 5.248744487972742e-05,
"loss": 0.0094,
"step": 10200
},
{
"grad_norm": 0.28249382972717285,
"learning_rate": 5.240487039605196e-05,
"loss": 0.0087,
"step": 10210
},
{
"grad_norm": 0.20960280299186707,
"learning_rate": 5.232228933755267e-05,
"loss": 0.0097,
"step": 10220
},
{
"grad_norm": 0.17936058342456818,
"learning_rate": 5.2239701930003006e-05,
"loss": 0.0065,
"step": 10230
},
{
"grad_norm": 0.21344617009162903,
"learning_rate": 5.215710839919379e-05,
"loss": 0.0077,
"step": 10240
},
{
"grad_norm": 0.2035842388868332,
"learning_rate": 5.207450897093257e-05,
"loss": 0.0086,
"step": 10250
},
{
"grad_norm": 0.21530857682228088,
"learning_rate": 5.1991903871043046e-05,
"loss": 0.0071,
"step": 10260
},
{
"grad_norm": 0.21515516936779022,
"learning_rate": 5.190929332536439e-05,
"loss": 0.0084,
"step": 10270
},
{
"grad_norm": 0.30118030309677124,
"learning_rate": 5.182667755975071e-05,
"loss": 0.0092,
"step": 10280
},
{
"grad_norm": 0.2057802826166153,
"learning_rate": 5.1744056800070315e-05,
"loss": 0.0078,
"step": 10290
},
{
"grad_norm": 0.26955923438072205,
"learning_rate": 5.166143127220524e-05,
"loss": 0.0097,
"step": 10300
},
{
"grad_norm": 0.2716047763824463,
"learning_rate": 5.1578801202050485e-05,
"loss": 0.0088,
"step": 10310
},
{
"grad_norm": 0.2820281386375427,
"learning_rate": 5.149616681551355e-05,
"loss": 0.0088,
"step": 10320
},
{
"grad_norm": 0.25352105498313904,
"learning_rate": 5.141352833851367e-05,
"loss": 0.0073,
"step": 10330
},
{
"grad_norm": 0.15962743759155273,
"learning_rate": 5.1330885996981285e-05,
"loss": 0.0078,
"step": 10340
},
{
"grad_norm": 0.22716277837753296,
"learning_rate": 5.124824001685741e-05,
"loss": 0.0075,
"step": 10350
},
{
"grad_norm": 0.20729690790176392,
"learning_rate": 5.116559062409298e-05,
"loss": 0.0059,
"step": 10360
},
{
"grad_norm": 0.2034323364496231,
"learning_rate": 5.10829380446483e-05,
"loss": 0.0094,
"step": 10370
},
{
"grad_norm": 0.21604637801647186,
"learning_rate": 5.100028250449235e-05,
"loss": 0.008,
"step": 10380
},
{
"grad_norm": 0.2015807330608368,
"learning_rate": 5.0917624229602234e-05,
"loss": 0.0065,
"step": 10390
},
{
"grad_norm": 0.18266531825065613,
"learning_rate": 5.0834963445962524e-05,
"loss": 0.0089,
"step": 10400
},
{
"grad_norm": 0.252404123544693,
"learning_rate": 5.075230037956461e-05,
"loss": 0.0083,
"step": 10410
},
{
"grad_norm": 0.22738321125507355,
"learning_rate": 5.0669635256406213e-05,
"loss": 0.0145,
"step": 10420
},
{
"grad_norm": 0.3449110984802246,
"learning_rate": 5.058696830249058e-05,
"loss": 0.01,
"step": 10430
},
{
"grad_norm": 0.30087509751319885,
"learning_rate": 5.050429974382602e-05,
"loss": 0.0156,
"step": 10440
},
{
"grad_norm": 0.27621498703956604,
"learning_rate": 5.042162980642523e-05,
"loss": 0.0075,
"step": 10450
},
{
"grad_norm": 0.2740698754787445,
"learning_rate": 5.033895871630462e-05,
"loss": 0.011,
"step": 10460
},
{
"grad_norm": 0.23271113634109497,
"learning_rate": 5.025628669948386e-05,
"loss": 0.007,
"step": 10470
},
{
"grad_norm": 0.25937408208847046,
"learning_rate": 5.017361398198502e-05,
"loss": 0.0065,
"step": 10480
},
{
"grad_norm": 0.2318635731935501,
"learning_rate": 5.009094078983221e-05,
"loss": 0.0069,
"step": 10490
},
{
"grad_norm": 0.20766286551952362,
"learning_rate": 5.000826734905073e-05,
"loss": 0.0096,
"step": 10500
},
{
"grad_norm": 0.2584581971168518,
"learning_rate": 4.9925593885666645e-05,
"loss": 0.0086,
"step": 10510
},
{
"grad_norm": 0.2603772282600403,
"learning_rate": 4.984292062570602e-05,
"loss": 0.0063,
"step": 10520
},
{
"grad_norm": 0.23702503740787506,
"learning_rate": 4.976024779519442e-05,
"loss": 0.0079,
"step": 10530
},
{
"grad_norm": 0.2646651268005371,
"learning_rate": 4.9677575620156194e-05,
"loss": 0.0092,
"step": 10540
},
{
"grad_norm": 0.1895063817501068,
"learning_rate": 4.959490432661391e-05,
"loss": 0.0093,
"step": 10550
},
{
"grad_norm": 0.21291036903858185,
"learning_rate": 4.9512234140587726e-05,
"loss": 0.0075,
"step": 10560
},
{
"grad_norm": 0.20933032035827637,
"learning_rate": 4.942956528809477e-05,
"loss": 0.0077,
"step": 10570
},
{
"grad_norm": 0.20019420981407166,
"learning_rate": 4.934689799514854e-05,
"loss": 0.0097,
"step": 10580
},
{
"grad_norm": 0.26344871520996094,
"learning_rate": 4.926423248775827e-05,
"loss": 0.0096,
"step": 10590
},
{
"grad_norm": 0.20692236721515656,
"learning_rate": 4.918156899192826e-05,
"loss": 0.0131,
"step": 10600
},
{
"grad_norm": 0.23554596304893494,
"learning_rate": 4.909890773365738e-05,
"loss": 0.0137,
"step": 10610
},
{
"grad_norm": 0.20528851449489594,
"learning_rate": 4.9016248938938344e-05,
"loss": 0.0105,
"step": 10620
},
{
"grad_norm": 0.26955676078796387,
"learning_rate": 4.8933592833757156e-05,
"loss": 0.0132,
"step": 10630
},
{
"grad_norm": 0.24838532507419586,
"learning_rate": 4.8850939644092435e-05,
"loss": 0.0136,
"step": 10640
},
{
"grad_norm": 0.2874491214752197,
"learning_rate": 4.876828959591485e-05,
"loss": 0.0079,
"step": 10650
},
{
"grad_norm": 0.22860568761825562,
"learning_rate": 4.8685642915186474e-05,
"loss": 0.0089,
"step": 10660
},
{
"grad_norm": 0.23194189369678497,
"learning_rate": 4.860299982786018e-05,
"loss": 0.0108,
"step": 10670
},
{
"grad_norm": 0.23060914874076843,
"learning_rate": 4.852036055987901e-05,
"loss": 0.0097,
"step": 10680
},
{
"grad_norm": 0.385558545589447,
"learning_rate": 4.843772533717558e-05,
"loss": 0.0093,
"step": 10690
},
{
"grad_norm": 0.24155037105083466,
"learning_rate": 4.835509438567142e-05,
"loss": 0.009,
"step": 10700
},
{
"grad_norm": 0.25894564390182495,
"learning_rate": 4.827246793127639e-05,
"loss": 0.0079,
"step": 10710
},
{
"grad_norm": 0.22689871490001678,
"learning_rate": 4.818984619988807e-05,
"loss": 0.0059,
"step": 10720
},
{
"grad_norm": 0.17574647068977356,
"learning_rate": 4.810722941739115e-05,
"loss": 0.0056,
"step": 10730
},
{
"grad_norm": 0.2433113157749176,
"learning_rate": 4.8024617809656684e-05,
"loss": 0.0084,
"step": 10740
},
{
"grad_norm": 0.28003692626953125,
"learning_rate": 4.794201160254171e-05,
"loss": 0.0094,
"step": 10750
},
{
"grad_norm": 0.23488906025886536,
"learning_rate": 4.785941102188844e-05,
"loss": 0.006,
"step": 10760
},
{
"grad_norm": 0.2274412214756012,
"learning_rate": 4.7776816293523686e-05,
"loss": 0.0084,
"step": 10770
},
{
"grad_norm": 0.19838649034500122,
"learning_rate": 4.769422764325832e-05,
"loss": 0.0064,
"step": 10780
},
{
"grad_norm": 0.18760640919208527,
"learning_rate": 4.76116452968865e-05,
"loss": 0.0062,
"step": 10790
},
{
"grad_norm": 0.2381298542022705,
"learning_rate": 4.752906948018525e-05,
"loss": 0.0071,
"step": 10800
},
{
"grad_norm": 0.2588251829147339,
"learning_rate": 4.7446500418913684e-05,
"loss": 0.0095,
"step": 10810
},
{
"grad_norm": 0.2380070835351944,
"learning_rate": 4.736393833881247e-05,
"loss": 0.0068,
"step": 10820
},
{
"grad_norm": 0.23917271196842194,
"learning_rate": 4.7281383465603194e-05,
"loss": 0.0097,
"step": 10830
},
{
"grad_norm": 0.18200017511844635,
"learning_rate": 4.71988360249877e-05,
"loss": 0.006,
"step": 10840
},
{
"grad_norm": 0.23980382084846497,
"learning_rate": 4.7116296242647554e-05,
"loss": 0.0085,
"step": 10850
},
{
"grad_norm": 0.24523408710956573,
"learning_rate": 4.703376434424336e-05,
"loss": 0.0086,
"step": 10860
},
{
"grad_norm": 0.18079398572444916,
"learning_rate": 4.695124055541421e-05,
"loss": 0.0086,
"step": 10870
},
{
"grad_norm": 0.21983122825622559,
"learning_rate": 4.6868725101776934e-05,
"loss": 0.0076,
"step": 10880
},
{
"grad_norm": 0.22801896929740906,
"learning_rate": 4.678621820892567e-05,
"loss": 0.0077,
"step": 10890
},
{
"grad_norm": 0.23620279133319855,
"learning_rate": 4.670372010243111e-05,
"loss": 0.0085,
"step": 10900
},
{
"grad_norm": 0.1909436583518982,
"learning_rate": 4.662123100783992e-05,
"loss": 0.0094,
"step": 10910
},
{
"grad_norm": 0.22192321717739105,
"learning_rate": 4.653875115067415e-05,
"loss": 0.0081,
"step": 10920
},
{
"grad_norm": 0.17492982745170593,
"learning_rate": 4.6456280756430545e-05,
"loss": 0.0076,
"step": 10930
},
{
"grad_norm": 0.17999714612960815,
"learning_rate": 4.637382005058004e-05,
"loss": 0.0072,
"step": 10940
},
{
"grad_norm": 0.19913795590400696,
"learning_rate": 4.629136925856705e-05,
"loss": 0.0108,
"step": 10950
},
{
"grad_norm": 0.1705617606639862,
"learning_rate": 4.6208928605808895e-05,
"loss": 0.0086,
"step": 10960
},
{
"grad_norm": 0.2795408368110657,
"learning_rate": 4.612649831769519e-05,
"loss": 0.0093,
"step": 10970
},
{
"grad_norm": 0.2092956155538559,
"learning_rate": 4.604407861958715e-05,
"loss": 0.0077,
"step": 10980
},
{
"grad_norm": 0.2425389438867569,
"learning_rate": 4.5961669736817114e-05,
"loss": 0.0069,
"step": 10990
},
{
"grad_norm": 0.23790328204631805,
"learning_rate": 4.5879271894687814e-05,
"loss": 0.0064,
"step": 11000
},
{
"grad_norm": 0.20760825276374817,
"learning_rate": 4.5796885318471826e-05,
"loss": 0.008,
"step": 11010
},
{
"grad_norm": 0.24325376749038696,
"learning_rate": 4.571451023341086e-05,
"loss": 0.0097,
"step": 11020
},
{
"grad_norm": 0.24800147116184235,
"learning_rate": 4.563214686471527e-05,
"loss": 0.0113,
"step": 11030
},
{
"grad_norm": 0.31807875633239746,
"learning_rate": 4.5549795437563365e-05,
"loss": 0.0088,
"step": 11040
},
{
"grad_norm": 0.24229101836681366,
"learning_rate": 4.546745617710081e-05,
"loss": 0.009,
"step": 11050
},
{
"grad_norm": 0.22642278671264648,
"learning_rate": 4.5385129308440014e-05,
"loss": 0.0072,
"step": 11060
},
{
"grad_norm": 0.2451431155204773,
"learning_rate": 4.530281505665944e-05,
"loss": 0.0062,
"step": 11070
},
{
"grad_norm": 0.1859740912914276,
"learning_rate": 4.5220513646803134e-05,
"loss": 0.0119,
"step": 11080
},
{
"grad_norm": 0.24542033672332764,
"learning_rate": 4.513822530388003e-05,
"loss": 0.0085,
"step": 11090
},
{
"grad_norm": 0.28155162930488586,
"learning_rate": 4.5055950252863296e-05,
"loss": 0.0074,
"step": 11100
},
{
"grad_norm": 0.2138509750366211,
"learning_rate": 4.4973688718689803e-05,
"loss": 0.0079,
"step": 11110
},
{
"grad_norm": 0.2261638045310974,
"learning_rate": 4.4891440926259406e-05,
"loss": 0.0071,
"step": 11120
},
{
"grad_norm": 0.2542162835597992,
"learning_rate": 4.480920710043443e-05,
"loss": 0.0097,
"step": 11130
},
{
"grad_norm": 0.15774618089199066,
"learning_rate": 4.4726987466039044e-05,
"loss": 0.0062,
"step": 11140
},
{
"grad_norm": 0.23213805258274078,
"learning_rate": 4.46447822478586e-05,
"loss": 0.0063,
"step": 11150
},
{
"grad_norm": 0.25310662388801575,
"learning_rate": 4.4562591670638974e-05,
"loss": 0.0057,
"step": 11160
},
{
"grad_norm": 0.2297695130109787,
"learning_rate": 4.4480415959086105e-05,
"loss": 0.0093,
"step": 11170
},
{
"grad_norm": 0.2621958255767822,
"learning_rate": 4.439825533786522e-05,
"loss": 0.0071,
"step": 11180
},
{
"grad_norm": 0.22085212171077728,
"learning_rate": 4.431611003160035e-05,
"loss": 0.0098,
"step": 11190
},
{
"grad_norm": 0.18988831341266632,
"learning_rate": 4.4233980264873636e-05,
"loss": 0.0051,
"step": 11200
},
{
"grad_norm": 0.2241099625825882,
"learning_rate": 4.4151866262224684e-05,
"loss": 0.0067,
"step": 11210
},
{
"grad_norm": 0.27083107829093933,
"learning_rate": 4.406976824815006e-05,
"loss": 0.0068,
"step": 11220
},
{
"grad_norm": 0.26719561219215393,
"learning_rate": 4.3987686447102595e-05,
"loss": 0.0074,
"step": 11230
},
{
"grad_norm": 0.20571677386760712,
"learning_rate": 4.3905621083490804e-05,
"loss": 0.0068,
"step": 11240
},
{
"grad_norm": 0.15599504113197327,
"learning_rate": 4.3823572381678286e-05,
"loss": 0.006,
"step": 11250
},
{
"grad_norm": 0.21513454616069794,
"learning_rate": 4.374154056598301e-05,
"loss": 0.0073,
"step": 11260
},
{
"grad_norm": 0.22201840579509735,
"learning_rate": 4.3659525860676845e-05,
"loss": 0.0064,
"step": 11270
},
{
"grad_norm": 0.22313819825649261,
"learning_rate": 4.3577528489984854e-05,
"loss": 0.006,
"step": 11280
},
{
"grad_norm": 0.23398296535015106,
"learning_rate": 4.349554867808476e-05,
"loss": 0.0061,
"step": 11290
},
{
"grad_norm": 0.21648810803890228,
"learning_rate": 4.34135866491062e-05,
"loss": 0.0068,
"step": 11300
},
{
"grad_norm": 0.2085336595773697,
"learning_rate": 4.333164262713022e-05,
"loss": 0.0142,
"step": 11310
},
{
"grad_norm": 0.27589038014411926,
"learning_rate": 4.324971683618868e-05,
"loss": 0.0101,
"step": 11320
},
{
"grad_norm": 0.21786828339099884,
"learning_rate": 4.316780950026354e-05,
"loss": 0.0053,
"step": 11330
},
{
"grad_norm": 0.2073872834444046,
"learning_rate": 4.308592084328637e-05,
"loss": 0.009,
"step": 11340
},
{
"grad_norm": 0.22114667296409607,
"learning_rate": 4.3004051089137576e-05,
"loss": 0.0079,
"step": 11350
},
{
"grad_norm": 0.22930344939231873,
"learning_rate": 4.292220046164597e-05,
"loss": 0.0069,
"step": 11360
},
{
"grad_norm": 0.22985005378723145,
"learning_rate": 4.2840369184588035e-05,
"loss": 0.0063,
"step": 11370
},
{
"grad_norm": 0.18045322597026825,
"learning_rate": 4.2758557481687345e-05,
"loss": 0.0055,
"step": 11380
},
{
"grad_norm": 0.2663944661617279,
"learning_rate": 4.267676557661403e-05,
"loss": 0.0079,
"step": 11390
},
{
"grad_norm": 0.24303773045539856,
"learning_rate": 4.2594993692983955e-05,
"loss": 0.0065,
"step": 11400
},
{
"grad_norm": 0.15300486981868744,
"learning_rate": 4.251324205435837e-05,
"loss": 0.0067,
"step": 11410
},
{
"grad_norm": 0.22211404144763947,
"learning_rate": 4.243151088424312e-05,
"loss": 0.0069,
"step": 11420
},
{
"grad_norm": 0.21946389973163605,
"learning_rate": 4.234980040608813e-05,
"loss": 0.0084,
"step": 11430
},
{
"grad_norm": 0.21228325366973877,
"learning_rate": 4.22681108432867e-05,
"loss": 0.0063,
"step": 11440
},
{
"grad_norm": 0.23013344407081604,
"learning_rate": 4.2186442419174984e-05,
"loss": 0.0063,
"step": 11450
},
{
"grad_norm": 0.20971117913722992,
"learning_rate": 4.210479535703133e-05,
"loss": 0.0083,
"step": 11460
},
{
"grad_norm": 0.22944243252277374,
"learning_rate": 4.202316988007567e-05,
"loss": 0.0081,
"step": 11470
},
{
"grad_norm": 0.2118869572877884,
"learning_rate": 4.194156621146901e-05,
"loss": 0.0087,
"step": 11480
},
{
"grad_norm": 0.26815682649612427,
"learning_rate": 4.1859984574312596e-05,
"loss": 0.0093,
"step": 11490
},
{
"grad_norm": 0.21731431782245636,
"learning_rate": 4.177842519164752e-05,
"loss": 0.0065,
"step": 11500
},
{
"grad_norm": 0.2921445965766907,
"learning_rate": 4.169688828645404e-05,
"loss": 0.0073,
"step": 11510
},
{
"grad_norm": 0.1848747879266739,
"learning_rate": 4.161537408165092e-05,
"loss": 0.0056,
"step": 11520
},
{
"grad_norm": 0.18249960243701935,
"learning_rate": 4.1533882800094924e-05,
"loss": 0.0082,
"step": 11530
},
{
"grad_norm": 0.15499596297740936,
"learning_rate": 4.145241466458005e-05,
"loss": 0.0094,
"step": 11540
},
{
"grad_norm": 0.19080030918121338,
"learning_rate": 4.13709698978371e-05,
"loss": 0.0077,
"step": 11550
},
{
"grad_norm": 0.1779751032590866,
"learning_rate": 4.1289548722532944e-05,
"loss": 0.007,
"step": 11560
},
{
"grad_norm": 0.22897697985172272,
"learning_rate": 4.120815136126999e-05,
"loss": 0.0075,
"step": 11570
},
{
"grad_norm": 0.2294849157333374,
"learning_rate": 4.112677803658548e-05,
"loss": 0.0095,
"step": 11580
},
{
"grad_norm": 0.20816902816295624,
"learning_rate": 4.1045428970951e-05,
"loss": 0.0066,
"step": 11590
},
{
"grad_norm": 0.22296547889709473,
"learning_rate": 4.0964104386771785e-05,
"loss": 0.0088,
"step": 11600
},
{
"grad_norm": 0.15643584728240967,
"learning_rate": 4.0882804506386144e-05,
"loss": 0.0053,
"step": 11610
},
{
"grad_norm": 0.28590163588523865,
"learning_rate": 4.080152955206485e-05,
"loss": 0.0062,
"step": 11620
},
{
"grad_norm": 0.18030160665512085,
"learning_rate": 4.0720279746010505e-05,
"loss": 0.0082,
"step": 11630
},
{
"grad_norm": 0.19158947467803955,
"learning_rate": 4.063905531035699e-05,
"loss": 0.0068,
"step": 11640
},
{
"grad_norm": 0.2148633450269699,
"learning_rate": 4.055785646716882e-05,
"loss": 0.0067,
"step": 11650
},
{
"grad_norm": 0.2393154799938202,
"learning_rate": 4.047668343844051e-05,
"loss": 0.0101,
"step": 11660
},
{
"grad_norm": 0.31594404578208923,
"learning_rate": 4.039553644609604e-05,
"loss": 0.0075,
"step": 11670
},
{
"grad_norm": 0.3099832236766815,
"learning_rate": 4.0314415711988176e-05,
"loss": 0.0084,
"step": 11680
},
{
"grad_norm": 0.2928639054298401,
"learning_rate": 4.023332145789792e-05,
"loss": 0.0064,
"step": 11690
},
{
"grad_norm": 0.2324325442314148,
"learning_rate": 4.015225390553385e-05,
"loss": 0.0101,
"step": 11700
},
{
"grad_norm": 0.1799289435148239,
"learning_rate": 4.007121327653158e-05,
"loss": 0.0067,
"step": 11710
},
{
"grad_norm": 0.2345605045557022,
"learning_rate": 3.9990199792453064e-05,
"loss": 0.0104,
"step": 11720
},
{
"grad_norm": 0.22300177812576294,
"learning_rate": 3.9909213674786103e-05,
"loss": 0.0071,
"step": 11730
},
{
"grad_norm": 0.18316997587680817,
"learning_rate": 3.982825514494363e-05,
"loss": 0.0116,
"step": 11740
},
{
"grad_norm": 0.1820177286863327,
"learning_rate": 3.974732442426319e-05,
"loss": 0.0065,
"step": 11750
},
{
"grad_norm": 0.2372979074716568,
"learning_rate": 3.966642173400629e-05,
"loss": 0.0092,
"step": 11760
},
{
"grad_norm": 0.22073568403720856,
"learning_rate": 3.9585547295357764e-05,
"loss": 0.0073,
"step": 11770
},
{
"grad_norm": 0.18935738503932953,
"learning_rate": 3.950470132942526e-05,
"loss": 0.0062,
"step": 11780
},
{
"grad_norm": 0.1814570277929306,
"learning_rate": 3.942388405723856e-05,
"loss": 0.006,
"step": 11790
},
{
"grad_norm": 0.2537878751754761,
"learning_rate": 3.9343095699749e-05,
"loss": 0.0092,
"step": 11800
},
{
"grad_norm": 0.20250701904296875,
"learning_rate": 3.9262336477828874e-05,
"loss": 0.0086,
"step": 11810
},
{
"grad_norm": 0.20579147338867188,
"learning_rate": 3.9181606612270794e-05,
"loss": 0.0068,
"step": 11820
},
{
"grad_norm": 0.15340419113636017,
"learning_rate": 3.910090632378713e-05,
"loss": 0.0047,
"step": 11830
},
{
"grad_norm": 0.21056094765663147,
"learning_rate": 3.90202358330094e-05,
"loss": 0.0087,
"step": 11840
},
{
"grad_norm": 0.1647689789533615,
"learning_rate": 3.8939595360487656e-05,
"loss": 0.0061,
"step": 11850
},
{
"grad_norm": 0.17378567159175873,
"learning_rate": 3.885898512668984e-05,
"loss": 0.0055,
"step": 11860
},
{
"grad_norm": 0.179255411028862,
"learning_rate": 3.877840535200127e-05,
"loss": 0.0112,
"step": 11870
},
{
"grad_norm": 0.2528724670410156,
"learning_rate": 3.869785625672397e-05,
"loss": 0.0057,
"step": 11880
},
{
"grad_norm": 0.30834850668907166,
"learning_rate": 3.8617338061076094e-05,
"loss": 0.0065,
"step": 11890
},
{
"grad_norm": 0.21379484236240387,
"learning_rate": 3.853685098519132e-05,
"loss": 0.0061,
"step": 11900
},
{
"grad_norm": 0.21671782433986664,
"learning_rate": 3.845639524911823e-05,
"loss": 0.0058,
"step": 11910
},
{
"grad_norm": 0.1984700709581375,
"learning_rate": 3.837597107281974e-05,
"loss": 0.0061,
"step": 11920
},
{
"grad_norm": 0.13126368820667267,
"learning_rate": 3.829557867617247e-05,
"loss": 0.0076,
"step": 11930
},
{
"grad_norm": 0.22941109538078308,
"learning_rate": 3.821521827896618e-05,
"loss": 0.0089,
"step": 11940
},
{
"grad_norm": 0.20077037811279297,
"learning_rate": 3.81348901009031e-05,
"loss": 0.0079,
"step": 11950
},
{
"grad_norm": 0.27187028527259827,
"learning_rate": 3.805459436159741e-05,
"loss": 0.0054,
"step": 11960
},
{
"grad_norm": 0.250042200088501,
"learning_rate": 3.797433128057461e-05,
"loss": 0.0098,
"step": 11970
},
{
"grad_norm": 0.1824905276298523,
"learning_rate": 3.789410107727089e-05,
"loss": 0.008,
"step": 11980
},
{
"grad_norm": 0.1979016214609146,
"learning_rate": 3.781390397103257e-05,
"loss": 0.0078,
"step": 11990
},
{
"grad_norm": 0.18383477628231049,
"learning_rate": 3.7733740181115455e-05,
"loss": 0.0097,
"step": 12000
},
{
"grad_norm": 0.14712467789649963,
"learning_rate": 3.7653609926684306e-05,
"loss": 0.0064,
"step": 12010
},
{
"grad_norm": 0.2039942592382431,
"learning_rate": 3.757351342681217e-05,
"loss": 0.0064,
"step": 12020
},
{
"grad_norm": 0.19466397166252136,
"learning_rate": 3.749345090047982e-05,
"loss": 0.0064,
"step": 12030
},
{
"grad_norm": 0.24284909665584564,
"learning_rate": 3.741342256657515e-05,
"loss": 0.0065,
"step": 12040
},
{
"grad_norm": 0.21631167829036713,
"learning_rate": 3.7333428643892567e-05,
"loss": 0.0053,
"step": 12050
},
{
"grad_norm": 0.17816664278507233,
"learning_rate": 3.725346935113239e-05,
"loss": 0.0058,
"step": 12060
},
{
"grad_norm": 0.17582198977470398,
"learning_rate": 3.717354490690029e-05,
"loss": 0.0087,
"step": 12070
},
{
"grad_norm": 0.17063601315021515,
"learning_rate": 3.709365552970664e-05,
"loss": 0.0066,
"step": 12080
},
{
"grad_norm": 0.19023405015468597,
"learning_rate": 3.7013801437965945e-05,
"loss": 0.0062,
"step": 12090
},
{
"grad_norm": 0.17706483602523804,
"learning_rate": 3.693398284999623e-05,
"loss": 0.0084,
"step": 12100
},
{
"grad_norm": 0.20245179533958435,
"learning_rate": 3.6854199984018484e-05,
"loss": 0.0058,
"step": 12110
},
{
"grad_norm": 0.20159313082695007,
"learning_rate": 3.677445305815601e-05,
"loss": 0.0064,
"step": 12120
},
{
"grad_norm": 0.21612194180488586,
"learning_rate": 3.669474229043387e-05,
"loss": 0.0077,
"step": 12130
},
{
"grad_norm": 0.18706907331943512,
"learning_rate": 3.6615067898778235e-05,
"loss": 0.0077,
"step": 12140
},
{
"grad_norm": 0.24379310011863708,
"learning_rate": 3.6535430101015866e-05,
"loss": 0.0088,
"step": 12150
},
{
"grad_norm": 0.3636125922203064,
"learning_rate": 3.645582911487345e-05,
"loss": 0.0069,
"step": 12160
},
{
"grad_norm": 0.20372240245342255,
"learning_rate": 3.637626515797706e-05,
"loss": 0.0049,
"step": 12170
},
{
"grad_norm": 0.2063855528831482,
"learning_rate": 3.629673844785152e-05,
"loss": 0.0058,
"step": 12180
},
{
"grad_norm": 0.1806401014328003,
"learning_rate": 3.621724920191979e-05,
"loss": 0.0067,
"step": 12190
},
{
"grad_norm": 0.21033532917499542,
"learning_rate": 3.6137797637502444e-05,
"loss": 0.0111,
"step": 12200
},
{
"grad_norm": 0.24749749898910522,
"learning_rate": 3.6058383971817035e-05,
"loss": 0.0071,
"step": 12210
},
{
"grad_norm": 0.22450430691242218,
"learning_rate": 3.59790084219775e-05,
"loss": 0.0068,
"step": 12220
},
{
"grad_norm": 0.36298665404319763,
"learning_rate": 3.589967120499353e-05,
"loss": 0.0114,
"step": 12230
},
{
"grad_norm": 0.32570943236351013,
"learning_rate": 3.5820372537770075e-05,
"loss": 0.0074,
"step": 12240
},
{
"grad_norm": 0.2093043178319931,
"learning_rate": 3.5741112637106655e-05,
"loss": 0.0057,
"step": 12250
},
{
"grad_norm": 0.19811396300792694,
"learning_rate": 3.5661891719696804e-05,
"loss": 0.0094,
"step": 12260
},
{
"grad_norm": 0.15150699019432068,
"learning_rate": 3.5582710002127504e-05,
"loss": 0.0057,
"step": 12270
},
{
"grad_norm": 0.18276816606521606,
"learning_rate": 3.550356770087853e-05,
"loss": 0.0069,
"step": 12280
},
{
"grad_norm": 0.1798425316810608,
"learning_rate": 3.5424465032321914e-05,
"loss": 0.0088,
"step": 12290
},
{
"grad_norm": 0.22988200187683105,
"learning_rate": 3.5345402212721335e-05,
"loss": 0.0098,
"step": 12300
},
{
"grad_norm": 0.1798902451992035,
"learning_rate": 3.526637945823152e-05,
"loss": 0.0072,
"step": 12310
},
{
"grad_norm": 0.18393878638744354,
"learning_rate": 3.518739698489767e-05,
"loss": 0.0066,
"step": 12320
},
{
"grad_norm": 0.2207607924938202,
"learning_rate": 3.510845500865485e-05,
"loss": 0.0097,
"step": 12330
},
{
"grad_norm": 0.24832548201084137,
"learning_rate": 3.502955374532739e-05,
"loss": 0.0071,
"step": 12340
},
{
"grad_norm": 0.25870540738105774,
"learning_rate": 3.495069341062836e-05,
"loss": 0.0075,
"step": 12350
},
{
"grad_norm": 0.18368877470493317,
"learning_rate": 3.4871874220158896e-05,
"loss": 0.0068,
"step": 12360
},
{
"grad_norm": 0.18090079724788666,
"learning_rate": 3.479309638940762e-05,
"loss": 0.0052,
"step": 12370
},
{
"grad_norm": 0.16779513657093048,
"learning_rate": 3.4714360133750146e-05,
"loss": 0.0073,
"step": 12380
},
{
"grad_norm": 0.15940751135349274,
"learning_rate": 3.463566566844839e-05,
"loss": 0.0051,
"step": 12390
},
{
"grad_norm": 0.24262839555740356,
"learning_rate": 3.4557013208650016e-05,
"loss": 0.0094,
"step": 12400
},
{
"grad_norm": 0.24909386038780212,
"learning_rate": 3.4478402969387857e-05,
"loss": 0.0079,
"step": 12410
},
{
"grad_norm": 0.19093437492847443,
"learning_rate": 3.4399835165579266e-05,
"loss": 0.0071,
"step": 12420
},
{
"grad_norm": 0.25968360900878906,
"learning_rate": 3.4321310012025645e-05,
"loss": 0.0081,
"step": 12430
},
{
"grad_norm": 0.2364204376935959,
"learning_rate": 3.424282772341176e-05,
"loss": 0.0084,
"step": 12440
},
{
"grad_norm": 0.23931537568569183,
"learning_rate": 3.416438851430519e-05,
"loss": 0.0071,
"step": 12450
},
{
"grad_norm": 0.21466895937919617,
"learning_rate": 3.408599259915577e-05,
"loss": 0.0067,
"step": 12460
},
{
"grad_norm": 0.2476363629102707,
"learning_rate": 3.400764019229487e-05,
"loss": 0.0056,
"step": 12470
},
{
"grad_norm": 0.2308126986026764,
"learning_rate": 3.3929331507935035e-05,
"loss": 0.0091,
"step": 12480
},
{
"grad_norm": 0.222612202167511,
"learning_rate": 3.3851066760169196e-05,
"loss": 0.012,
"step": 12490
},
{
"grad_norm": 0.18202485144138336,
"learning_rate": 3.377284616297021e-05,
"loss": 0.0065,
"step": 12500
},
{
"grad_norm": 0.18596436083316803,
"learning_rate": 3.3694669930190166e-05,
"loss": 0.009,
"step": 12510
},
{
"grad_norm": 0.21673136949539185,
"learning_rate": 3.36165382755599e-05,
"loss": 0.0085,
"step": 12520
},
{
"grad_norm": 0.18619108200073242,
"learning_rate": 3.35384514126884e-05,
"loss": 0.0049,
"step": 12530
},
{
"grad_norm": 0.14304818212985992,
"learning_rate": 3.3460409555062154e-05,
"loss": 0.0058,
"step": 12540
},
{
"grad_norm": 0.188069149851799,
"learning_rate": 3.3382412916044645e-05,
"loss": 0.0062,
"step": 12550
},
{
"grad_norm": 0.2641826570034027,
"learning_rate": 3.330446170887566e-05,
"loss": 0.0083,
"step": 12560
},
{
"grad_norm": 0.20169927179813385,
"learning_rate": 3.3226556146670834e-05,
"loss": 0.0054,
"step": 12570
},
{
"grad_norm": 0.2031773030757904,
"learning_rate": 3.314869644242102e-05,
"loss": 0.0058,
"step": 12580
},
{
"grad_norm": 0.2010979801416397,
"learning_rate": 3.3070882808991674e-05,
"loss": 0.0126,
"step": 12590
},
{
"grad_norm": 0.2006341814994812,
"learning_rate": 3.2993115459122305e-05,
"loss": 0.0056,
"step": 12600
},
{
"grad_norm": 0.21729214489459991,
"learning_rate": 3.2915394605425835e-05,
"loss": 0.0081,
"step": 12610
},
{
"grad_norm": 0.18181125819683075,
"learning_rate": 3.283772046038816e-05,
"loss": 0.0066,
"step": 12620
},
{
"grad_norm": 0.1822119951248169,
"learning_rate": 3.276009323636739e-05,
"loss": 0.0072,
"step": 12630
},
{
"grad_norm": 0.2782512605190277,
"learning_rate": 3.268251314559344e-05,
"loss": 0.0063,
"step": 12640
},
{
"grad_norm": 0.3128264844417572,
"learning_rate": 3.2604980400167254e-05,
"loss": 0.0083,
"step": 12650
},
{
"grad_norm": 0.18374745547771454,
"learning_rate": 3.252749521206042e-05,
"loss": 0.0064,
"step": 12660
},
{
"grad_norm": 0.2119012176990509,
"learning_rate": 3.2450057793114494e-05,
"loss": 0.0134,
"step": 12670
},
{
"grad_norm": 0.1813078075647354,
"learning_rate": 3.2372668355040435e-05,
"loss": 0.0055,
"step": 12680
},
{
"grad_norm": 0.3598603308200836,
"learning_rate": 3.2295327109418005e-05,
"loss": 0.0101,
"step": 12690
},
{
"grad_norm": 0.2781860828399658,
"learning_rate": 3.221803426769518e-05,
"loss": 0.0087,
"step": 12700
},
{
"grad_norm": 0.19660301506519318,
"learning_rate": 3.214079004118768e-05,
"loss": 0.0088,
"step": 12710
},
{
"grad_norm": 0.18734294176101685,
"learning_rate": 3.2063594641078234e-05,
"loss": 0.0093,
"step": 12720
},
{
"grad_norm": 0.19372877478599548,
"learning_rate": 3.198644827841616e-05,
"loss": 0.0097,
"step": 12730
},
{
"grad_norm": 0.1916368454694748,
"learning_rate": 3.1909351164116654e-05,
"loss": 0.0062,
"step": 12740
},
{
"grad_norm": 0.1832991987466812,
"learning_rate": 3.183230350896026e-05,
"loss": 0.0084,
"step": 12750
},
{
"grad_norm": 0.2088966965675354,
"learning_rate": 3.1755305523592337e-05,
"loss": 0.0081,
"step": 12760
},
{
"grad_norm": 0.25852876901626587,
"learning_rate": 3.167835741852245e-05,
"loss": 0.0076,
"step": 12770
},
{
"grad_norm": 0.21350952982902527,
"learning_rate": 3.160145940412378e-05,
"loss": 0.0081,
"step": 12780
},
{
"grad_norm": 0.22510845959186554,
"learning_rate": 3.1524611690632545e-05,
"loss": 0.0044,
"step": 12790
},
{
"grad_norm": 0.1736249029636383,
"learning_rate": 3.144781448814746e-05,
"loss": 0.0083,
"step": 12800
},
{
"grad_norm": 0.21229802072048187,
"learning_rate": 3.1371068006629145e-05,
"loss": 0.0095,
"step": 12810
},
{
"grad_norm": 0.194345623254776,
"learning_rate": 3.129437245589956e-05,
"loss": 0.0084,
"step": 12820
},
{
"grad_norm": 0.2444475144147873,
"learning_rate": 3.121772804564143e-05,
"loss": 0.0103,
"step": 12830
},
{
"grad_norm": 0.23777633905410767,
"learning_rate": 3.11411349853976e-05,
"loss": 0.0071,
"step": 12840
},
{
"grad_norm": 0.19102996587753296,
"learning_rate": 3.10645934845706e-05,
"loss": 0.006,
"step": 12850
},
{
"grad_norm": 0.16968311369419098,
"learning_rate": 3.098810375242196e-05,
"loss": 0.0068,
"step": 12860
},
{
"grad_norm": 0.21856744587421417,
"learning_rate": 3.0911665998071704e-05,
"loss": 0.0082,
"step": 12870
},
{
"grad_norm": 0.14362917840480804,
"learning_rate": 3.083528043049774e-05,
"loss": 0.0067,
"step": 12880
},
{
"grad_norm": 0.18467217683792114,
"learning_rate": 3.0758947258535255e-05,
"loss": 0.0061,
"step": 12890
},
{
"grad_norm": 0.18513968586921692,
"learning_rate": 3.068266669087625e-05,
"loss": 0.0078,
"step": 12900
},
{
"grad_norm": 0.20277082920074463,
"learning_rate": 3.060643893606887e-05,
"loss": 0.0074,
"step": 12910
},
{
"grad_norm": 0.1835511475801468,
"learning_rate": 3.053026420251693e-05,
"loss": 0.0053,
"step": 12920
},
{
"grad_norm": 0.16996018588542938,
"learning_rate": 3.0454142698479183e-05,
"loss": 0.006,
"step": 12930
},
{
"grad_norm": 0.24543605744838715,
"learning_rate": 3.0378074632068954e-05,
"loss": 0.0066,
"step": 12940
},
{
"grad_norm": 0.1390630453824997,
"learning_rate": 3.0302060211253408e-05,
"loss": 0.0106,
"step": 12950
},
{
"grad_norm": 0.23156525194644928,
"learning_rate": 3.0226099643853073e-05,
"loss": 0.0066,
"step": 12960
},
{
"grad_norm": 0.150277242064476,
"learning_rate": 3.0150193137541283e-05,
"loss": 0.0075,
"step": 12970
},
{
"grad_norm": 0.1806921511888504,
"learning_rate": 3.0074340899843467e-05,
"loss": 0.0078,
"step": 12980
},
{
"grad_norm": 0.1816614270210266,
"learning_rate": 2.999854313813677e-05,
"loss": 0.0049,
"step": 12990
},
{
"grad_norm": 0.18437013030052185,
"learning_rate": 2.9922800059649382e-05,
"loss": 0.0071,
"step": 13000
},
{
"grad_norm": 0.20052628219127655,
"learning_rate": 2.9847111871459976e-05,
"loss": 0.0053,
"step": 13010
},
{
"grad_norm": 0.1503404974937439,
"learning_rate": 2.977147878049721e-05,
"loss": 0.0061,
"step": 13020
},
{
"grad_norm": 0.17255957424640656,
"learning_rate": 2.9695900993539006e-05,
"loss": 0.0054,
"step": 13030
},
{
"grad_norm": 0.17958895862102509,
"learning_rate": 2.9620378717212183e-05,
"loss": 0.0062,
"step": 13040
},
{
"grad_norm": 0.1710553616285324,
"learning_rate": 2.9544912157991745e-05,
"loss": 0.0088,
"step": 13050
},
{
"grad_norm": 0.2009718269109726,
"learning_rate": 2.9469501522200405e-05,
"loss": 0.0049,
"step": 13060
},
{
"grad_norm": 0.23603618144989014,
"learning_rate": 2.9394147016007946e-05,
"loss": 0.0062,
"step": 13070
},
{
"grad_norm": 0.18876326084136963,
"learning_rate": 2.9318848845430702e-05,
"loss": 0.0067,
"step": 13080
},
{
"grad_norm": 0.1263684630393982,
"learning_rate": 2.9243607216331013e-05,
"loss": 0.004,
"step": 13090
},
{
"grad_norm": 0.18033087253570557,
"learning_rate": 2.916842233441661e-05,
"loss": 0.0103,
"step": 13100
},
{
"grad_norm": 0.17048488557338715,
"learning_rate": 2.90932944052401e-05,
"loss": 0.0065,
"step": 13110
},
{
"grad_norm": 0.20619574189186096,
"learning_rate": 2.9018223634198354e-05,
"loss": 0.0088,
"step": 13120
},
{
"grad_norm": 0.32705435156822205,
"learning_rate": 2.8943210226532025e-05,
"loss": 0.0072,
"step": 13130
},
{
"grad_norm": 0.25954321026802063,
"learning_rate": 2.8868254387324857e-05,
"loss": 0.0123,
"step": 13140
},
{
"grad_norm": 0.12593084573745728,
"learning_rate": 2.8793356321503306e-05,
"loss": 0.0061,
"step": 13150
},
{
"grad_norm": 0.15029622614383698,
"learning_rate": 2.87185162338358e-05,
"loss": 0.0061,
"step": 13160
},
{
"grad_norm": 0.1333160251379013,
"learning_rate": 2.8643734328932253e-05,
"loss": 0.0081,
"step": 13170
},
{
"grad_norm": 0.14292830228805542,
"learning_rate": 2.856901081124359e-05,
"loss": 0.0063,
"step": 13180
},
{
"grad_norm": 0.17720255255699158,
"learning_rate": 2.8494345885061002e-05,
"loss": 0.0099,
"step": 13190
},
{
"grad_norm": 0.2270614057779312,
"learning_rate": 2.8419739754515616e-05,
"loss": 0.0094,
"step": 13200
},
{
"grad_norm": 0.20722974836826324,
"learning_rate": 2.8345192623577666e-05,
"loss": 0.0063,
"step": 13210
},
{
"grad_norm": 0.19621099531650543,
"learning_rate": 2.8270704696056193e-05,
"loss": 0.0136,
"step": 13220
},
{
"grad_norm": 0.16113749146461487,
"learning_rate": 2.8196276175598367e-05,
"loss": 0.005,
"step": 13230
},
{
"grad_norm": 0.1010737493634224,
"learning_rate": 2.8121907265688884e-05,
"loss": 0.0041,
"step": 13240
},
{
"grad_norm": 0.2032041996717453,
"learning_rate": 2.804759816964957e-05,
"loss": 0.0093,
"step": 13250
},
{
"grad_norm": 0.24636270105838776,
"learning_rate": 2.797334909063857e-05,
"loss": 0.008,
"step": 13260
},
{
"grad_norm": 0.23668859899044037,
"learning_rate": 2.7899160231650056e-05,
"loss": 0.0081,
"step": 13270
},
{
"grad_norm": 0.18992413580417633,
"learning_rate": 2.7825031795513585e-05,
"loss": 0.0067,
"step": 13280
},
{
"grad_norm": 0.23678800463676453,
"learning_rate": 2.775096398489341e-05,
"loss": 0.0054,
"step": 13290
},
{
"grad_norm": 0.18498824536800385,
"learning_rate": 2.7676957002288163e-05,
"loss": 0.0066,
"step": 13300
},
{
"grad_norm": 0.16668982803821564,
"learning_rate": 2.760301105003003e-05,
"loss": 0.0097,
"step": 13310
},
{
"grad_norm": 0.2797868847846985,
"learning_rate": 2.752912633028446e-05,
"loss": 0.0065,
"step": 13320
},
{
"grad_norm": 0.1960904598236084,
"learning_rate": 2.7455303045049474e-05,
"loss": 0.0087,
"step": 13330
},
{
"grad_norm": 0.17691780626773834,
"learning_rate": 2.7381541396155098e-05,
"loss": 0.008,
"step": 13340
},
{
"grad_norm": 0.18473778665065765,
"learning_rate": 2.730784158526286e-05,
"loss": 0.0078,
"step": 13350
},
{
"grad_norm": 0.20810966193675995,
"learning_rate": 2.723420381386521e-05,
"loss": 0.0069,
"step": 13360
},
{
"grad_norm": 0.15738220512866974,
"learning_rate": 2.7160628283285018e-05,
"loss": 0.0073,
"step": 13370
},
{
"grad_norm": 0.2037407010793686,
"learning_rate": 2.7087115194675007e-05,
"loss": 0.0079,
"step": 13380
},
{
"grad_norm": 0.2184944599866867,
"learning_rate": 2.701366474901712e-05,
"loss": 0.0082,
"step": 13390
},
{
"grad_norm": 0.15587963163852692,
"learning_rate": 2.6940277147122085e-05,
"loss": 0.0059,
"step": 13400
},
{
"grad_norm": 0.16783714294433594,
"learning_rate": 2.686695258962878e-05,
"loss": 0.007,
"step": 13410
},
{
"grad_norm": 0.17781268060207367,
"learning_rate": 2.679369127700375e-05,
"loss": 0.007,
"step": 13420
},
{
"grad_norm": 0.18915528059005737,
"learning_rate": 2.672049340954067e-05,
"loss": 0.0087,
"step": 13430
},
{
"grad_norm": 0.12304963916540146,
"learning_rate": 2.6647359187359676e-05,
"loss": 0.0064,
"step": 13440
},
{
"grad_norm": 0.14121095836162567,
"learning_rate": 2.6574288810406946e-05,
"loss": 0.0057,
"step": 13450
},
{
"grad_norm": 0.16274137794971466,
"learning_rate": 2.6501282478454083e-05,
"loss": 0.0051,
"step": 13460
},
{
"grad_norm": 0.10483109205961227,
"learning_rate": 2.6428340391097618e-05,
"loss": 0.0062,
"step": 13470
},
{
"grad_norm": 0.2279292345046997,
"learning_rate": 2.6355462747758485e-05,
"loss": 0.0073,
"step": 13480
},
{
"grad_norm": 0.18314263224601746,
"learning_rate": 2.6282649747681304e-05,
"loss": 0.0051,
"step": 13490
},
{
"grad_norm": 0.1461828052997589,
"learning_rate": 2.620990158993406e-05,
"loss": 0.0058,
"step": 13500
},
{
"grad_norm": 0.20838026702404022,
"learning_rate": 2.6137218473407477e-05,
"loss": 0.0061,
"step": 13510
},
{
"grad_norm": 0.19161008298397064,
"learning_rate": 2.606460059681436e-05,
"loss": 0.0057,
"step": 13520
},
{
"grad_norm": 0.1648252159357071,
"learning_rate": 2.599204815868928e-05,
"loss": 0.0057,
"step": 13530
},
{
"grad_norm": 0.13666298985481262,
"learning_rate": 2.5919561357387756e-05,
"loss": 0.0054,
"step": 13540
},
{
"grad_norm": 0.13384267687797546,
"learning_rate": 2.5847140391085972e-05,
"loss": 0.0048,
"step": 13550
},
{
"grad_norm": 0.11047980189323425,
"learning_rate": 2.5774785457780103e-05,
"loss": 0.0077,
"step": 13560
},
{
"grad_norm": 0.13502903282642365,
"learning_rate": 2.5702496755285753e-05,
"loss": 0.0051,
"step": 13570
},
{
"grad_norm": 0.1964835673570633,
"learning_rate": 2.5630274481237483e-05,
"loss": 0.0066,
"step": 13580
},
{
"grad_norm": 0.1709243208169937,
"learning_rate": 2.5558118833088197e-05,
"loss": 0.006,
"step": 13590
},
{
"grad_norm": 0.19570232927799225,
"learning_rate": 2.548603000810872e-05,
"loss": 0.0072,
"step": 13600
},
{
"grad_norm": 0.1558714509010315,
"learning_rate": 2.5414008203387152e-05,
"loss": 0.0076,
"step": 13610
},
{
"grad_norm": 0.18019653856754303,
"learning_rate": 2.534205361582834e-05,
"loss": 0.0053,
"step": 13620
},
{
"grad_norm": 0.16142991185188293,
"learning_rate": 2.527016644215338e-05,
"loss": 0.0088,
"step": 13630
},
{
"grad_norm": 0.13523423671722412,
"learning_rate": 2.519834687889905e-05,
"loss": 0.0062,
"step": 13640
},
{
"grad_norm": 0.15555405616760254,
"learning_rate": 2.5126595122417295e-05,
"loss": 0.0051,
"step": 13650
},
{
"grad_norm": 0.1433010846376419,
"learning_rate": 2.5054911368874713e-05,
"loss": 0.0054,
"step": 13660
},
{
"grad_norm": 0.1764252781867981,
"learning_rate": 2.4983295814251916e-05,
"loss": 0.0052,
"step": 13670
},
{
"grad_norm": 0.2680763006210327,
"learning_rate": 2.4911748654343105e-05,
"loss": 0.0092,
"step": 13680
},
{
"grad_norm": 0.2182915061712265,
"learning_rate": 2.4840270084755463e-05,
"loss": 0.0066,
"step": 13690
},
{
"grad_norm": 0.21932873129844666,
"learning_rate": 2.4768860300908685e-05,
"loss": 0.0081,
"step": 13700
},
{
"grad_norm": 0.16487343609333038,
"learning_rate": 2.469751949803443e-05,
"loss": 0.0094,
"step": 13710
},
{
"grad_norm": 0.1933409422636032,
"learning_rate": 2.4626247871175666e-05,
"loss": 0.0084,
"step": 13720
},
{
"grad_norm": 0.22529985010623932,
"learning_rate": 2.4555045615186346e-05,
"loss": 0.006,
"step": 13730
},
{
"grad_norm": 0.14443622529506683,
"learning_rate": 2.4483912924730677e-05,
"loss": 0.0048,
"step": 13740
},
{
"grad_norm": 0.29311496019363403,
"learning_rate": 2.4412849994282742e-05,
"loss": 0.0077,
"step": 13750
},
{
"grad_norm": 0.1520247906446457,
"learning_rate": 2.434185701812592e-05,
"loss": 0.0091,
"step": 13760
},
{
"grad_norm": 0.2153899073600769,
"learning_rate": 2.4270934190352218e-05,
"loss": 0.0052,
"step": 13770
},
{
"grad_norm": 0.18413366377353668,
"learning_rate": 2.4200081704861998e-05,
"loss": 0.0046,
"step": 13780
},
{
"grad_norm": 0.15907230973243713,
"learning_rate": 2.412929975536321e-05,
"loss": 0.0067,
"step": 13790
},
{
"grad_norm": 0.1581483632326126,
"learning_rate": 2.4058588535371017e-05,
"loss": 0.0048,
"step": 13800
},
{
"grad_norm": 0.11827662587165833,
"learning_rate": 2.3987948238207243e-05,
"loss": 0.0044,
"step": 13810
},
{
"grad_norm": 0.147860586643219,
"learning_rate": 2.3917379056999678e-05,
"loss": 0.0054,
"step": 13820
},
{
"grad_norm": 0.13074715435504913,
"learning_rate": 2.3846881184681824e-05,
"loss": 0.0064,
"step": 13830
},
{
"grad_norm": 0.23691308498382568,
"learning_rate": 2.377645481399214e-05,
"loss": 0.0063,
"step": 13840
},
{
"grad_norm": 0.1883758157491684,
"learning_rate": 2.3706100137473667e-05,
"loss": 0.005,
"step": 13850
},
{
"grad_norm": 0.1975412368774414,
"learning_rate": 2.3635817347473394e-05,
"loss": 0.0094,
"step": 13860
},
{
"grad_norm": 0.1934109926223755,
"learning_rate": 2.3565606636141757e-05,
"loss": 0.0054,
"step": 13870
},
{
"grad_norm": 0.21237199008464813,
"learning_rate": 2.3495468195432203e-05,
"loss": 0.0067,
"step": 13880
},
{
"grad_norm": 0.1800747960805893,
"learning_rate": 2.3425402217100507e-05,
"loss": 0.0072,
"step": 13890
},
{
"grad_norm": 0.16725149750709534,
"learning_rate": 2.3355408892704424e-05,
"loss": 0.0049,
"step": 13900
},
{
"grad_norm": 0.1488545536994934,
"learning_rate": 2.3285488413603003e-05,
"loss": 0.0084,
"step": 13910
},
{
"grad_norm": 0.18190857768058777,
"learning_rate": 2.321564097095615e-05,
"loss": 0.0054,
"step": 13920
},
{
"grad_norm": 0.18374863266944885,
"learning_rate": 2.3145866755724142e-05,
"loss": 0.006,
"step": 13930
},
{
"grad_norm": 0.11474452167749405,
"learning_rate": 2.307616595866699e-05,
"loss": 0.0044,
"step": 13940
},
{
"grad_norm": 0.16919605433940887,
"learning_rate": 2.3006538770344032e-05,
"loss": 0.0062,
"step": 13950
},
{
"grad_norm": 0.14991624653339386,
"learning_rate": 2.293698538111334e-05,
"loss": 0.0081,
"step": 13960
},
{
"grad_norm": 0.20199425518512726,
"learning_rate": 2.28675059811312e-05,
"loss": 0.0068,
"step": 13970
},
{
"grad_norm": 0.15437351167201996,
"learning_rate": 2.279810076035167e-05,
"loss": 0.0049,
"step": 13980
},
{
"grad_norm": 0.16406899690628052,
"learning_rate": 2.272876990852596e-05,
"loss": 0.0073,
"step": 13990
},
{
"grad_norm": 0.13990004360675812,
"learning_rate": 2.265951361520195e-05,
"loss": 0.0058,
"step": 14000
},
{
"grad_norm": 0.1541512906551361,
"learning_rate": 2.2590332069723748e-05,
"loss": 0.0059,
"step": 14010
},
{
"grad_norm": 0.15612509846687317,
"learning_rate": 2.2521225461231004e-05,
"loss": 0.0118,
"step": 14020
},
{
"grad_norm": 0.10496876388788223,
"learning_rate": 2.2452193978658597e-05,
"loss": 0.0068,
"step": 14030
},
{
"grad_norm": 0.2270757555961609,
"learning_rate": 2.238323781073594e-05,
"loss": 0.0076,
"step": 14040
},
{
"grad_norm": 0.18619896471500397,
"learning_rate": 2.2314357145986552e-05,
"loss": 0.0056,
"step": 14050
},
{
"grad_norm": 0.1321837157011032,
"learning_rate": 2.224555217272757e-05,
"loss": 0.0044,
"step": 14060
},
{
"grad_norm": 0.15203258395195007,
"learning_rate": 2.2176823079069127e-05,
"loss": 0.0055,
"step": 14070
},
{
"grad_norm": 0.20918844640254974,
"learning_rate": 2.210817005291398e-05,
"loss": 0.006,
"step": 14080
},
{
"grad_norm": 0.22175315022468567,
"learning_rate": 2.203959328195686e-05,
"loss": 0.0061,
"step": 14090
},
{
"grad_norm": 0.09628665447235107,
"learning_rate": 2.1971092953684026e-05,
"loss": 0.0048,
"step": 14100
},
{
"grad_norm": 0.20158182084560394,
"learning_rate": 2.1902669255372788e-05,
"loss": 0.0045,
"step": 14110
},
{
"grad_norm": 0.1197918951511383,
"learning_rate": 2.1834322374090897e-05,
"loss": 0.0077,
"step": 14120
},
{
"grad_norm": 0.16859853267669678,
"learning_rate": 2.1766052496696153e-05,
"loss": 0.0061,
"step": 14130
},
{
"grad_norm": 0.1983485370874405,
"learning_rate": 2.169785980983577e-05,
"loss": 0.0046,
"step": 14140
},
{
"grad_norm": 0.18621300160884857,
"learning_rate": 2.162974449994593e-05,
"loss": 0.0063,
"step": 14150
},
{
"grad_norm": 0.1051415279507637,
"learning_rate": 2.1561706753251337e-05,
"loss": 0.0048,
"step": 14160
},
{
"grad_norm": 0.18732018768787384,
"learning_rate": 2.1493746755764544e-05,
"loss": 0.0068,
"step": 14170
},
{
"grad_norm": 0.13659246265888214,
"learning_rate": 2.1425864693285635e-05,
"loss": 0.0051,
"step": 14180
},
{
"grad_norm": 0.14370456337928772,
"learning_rate": 2.1358060751401547e-05,
"loss": 0.0044,
"step": 14190
},
{
"grad_norm": 0.14926820993423462,
"learning_rate": 2.129033511548566e-05,
"loss": 0.0052,
"step": 14200
},
{
"grad_norm": 0.15225178003311157,
"learning_rate": 2.1222687970697315e-05,
"loss": 0.0071,
"step": 14210
},
{
"grad_norm": 0.1812531054019928,
"learning_rate": 2.1155119501981173e-05,
"loss": 0.0063,
"step": 14220
},
{
"grad_norm": 0.21825918555259705,
"learning_rate": 2.1087629894066895e-05,
"loss": 0.0065,
"step": 14230
},
{
"grad_norm": 0.2558552026748657,
"learning_rate": 2.1020219331468473e-05,
"loss": 0.0077,
"step": 14240
},
{
"grad_norm": 0.16796202957630157,
"learning_rate": 2.095288799848379e-05,
"loss": 0.0045,
"step": 14250
},
{
"grad_norm": 0.15485888719558716,
"learning_rate": 2.088563607919417e-05,
"loss": 0.005,
"step": 14260
},
{
"grad_norm": 0.16367082297801971,
"learning_rate": 2.0818463757463786e-05,
"loss": 0.005,
"step": 14270
},
{
"grad_norm": 0.23867768049240112,
"learning_rate": 2.0751371216939175e-05,
"loss": 0.0092,
"step": 14280
},
{
"grad_norm": 0.2102659046649933,
"learning_rate": 2.068435864104882e-05,
"loss": 0.0043,
"step": 14290
},
{
"grad_norm": 0.18887297809123993,
"learning_rate": 2.0617426213002506e-05,
"loss": 0.0056,
"step": 14300
},
{
"grad_norm": 0.18655873835086823,
"learning_rate": 2.055057411579097e-05,
"loss": 0.0061,
"step": 14310
},
{
"grad_norm": 0.20691068470478058,
"learning_rate": 2.0483802532185286e-05,
"loss": 0.0061,
"step": 14320
},
{
"grad_norm": 0.15043525397777557,
"learning_rate": 2.041711164473638e-05,
"loss": 0.0055,
"step": 14330
},
{
"grad_norm": 0.14773303270339966,
"learning_rate": 2.0350501635774637e-05,
"loss": 0.0043,
"step": 14340
},
{
"grad_norm": 0.16621702909469604,
"learning_rate": 2.0283972687409247e-05,
"loss": 0.0057,
"step": 14350
},
{
"grad_norm": 0.14620442688465118,
"learning_rate": 2.021752498152784e-05,
"loss": 0.0062,
"step": 14360
},
{
"grad_norm": 0.17155922949314117,
"learning_rate": 2.015115869979589e-05,
"loss": 0.0054,
"step": 14370
},
{
"grad_norm": 0.16646042466163635,
"learning_rate": 2.0084874023656265e-05,
"loss": 0.0045,
"step": 14380
},
{
"grad_norm": 0.15475299954414368,
"learning_rate": 2.001867113432877e-05,
"loss": 0.0092,
"step": 14390
},
{
"grad_norm": 0.1650644838809967,
"learning_rate": 1.995255021280954e-05,
"loss": 0.006,
"step": 14400
},
{
"grad_norm": 0.21478888392448425,
"learning_rate": 1.9886511439870688e-05,
"loss": 0.0055,
"step": 14410
},
{
"grad_norm": 0.1912056803703308,
"learning_rate": 1.9820554996059675e-05,
"loss": 0.008,
"step": 14420
},
{
"grad_norm": 0.16896963119506836,
"learning_rate": 1.9754681061698893e-05,
"loss": 0.0064,
"step": 14430
},
{
"grad_norm": 0.17615389823913574,
"learning_rate": 1.9688889816885185e-05,
"loss": 0.0059,
"step": 14440
},
{
"grad_norm": 0.17325952649116516,
"learning_rate": 1.962318144148928e-05,
"loss": 0.0061,
"step": 14450
},
{
"grad_norm": 0.16517771780490875,
"learning_rate": 1.955755611515539e-05,
"loss": 0.007,
"step": 14460
},
{
"grad_norm": 0.11359621584415436,
"learning_rate": 1.9492014017300642e-05,
"loss": 0.0038,
"step": 14470
},
{
"grad_norm": 0.1052914410829544,
"learning_rate": 1.942655532711461e-05,
"loss": 0.0066,
"step": 14480
},
{
"grad_norm": 0.119536854326725,
"learning_rate": 1.9361180223558882e-05,
"loss": 0.0055,
"step": 14490
},
{
"grad_norm": 0.11046026647090912,
"learning_rate": 1.929588888536647e-05,
"loss": 0.0047,
"step": 14500
},
{
"grad_norm": 0.13877028226852417,
"learning_rate": 1.9230681491041425e-05,
"loss": 0.006,
"step": 14510
},
{
"grad_norm": 0.18733717501163483,
"learning_rate": 1.9165558218858264e-05,
"loss": 0.005,
"step": 14520
},
{
"grad_norm": 0.12102721631526947,
"learning_rate": 1.9100519246861505e-05,
"loss": 0.0052,
"step": 14530
},
{
"grad_norm": 0.14464715123176575,
"learning_rate": 1.9035564752865248e-05,
"loss": 0.0048,
"step": 14540
},
{
"grad_norm": 0.15074194967746735,
"learning_rate": 1.897069491445258e-05,
"loss": 0.0045,
"step": 14550
},
{
"grad_norm": 0.15628309547901154,
"learning_rate": 1.890590990897515e-05,
"loss": 0.0041,
"step": 14560
},
{
"grad_norm": 0.1759437769651413,
"learning_rate": 1.884120991355272e-05,
"loss": 0.008,
"step": 14570
},
{
"grad_norm": 0.14988595247268677,
"learning_rate": 1.8776595105072576e-05,
"loss": 0.0044,
"step": 14580
},
{
"grad_norm": 0.15233299136161804,
"learning_rate": 1.8712065660189166e-05,
"loss": 0.0058,
"step": 14590
},
{
"grad_norm": 0.118324413895607,
"learning_rate": 1.8647621755323513e-05,
"loss": 0.0049,
"step": 14600
},
{
"grad_norm": 0.11879850178956985,
"learning_rate": 1.858326356666278e-05,
"loss": 0.0049,
"step": 14610
},
{
"grad_norm": 0.14314360916614532,
"learning_rate": 1.851899127015983e-05,
"loss": 0.0048,
"step": 14620
},
{
"grad_norm": 0.12158121913671494,
"learning_rate": 1.8454805041532626e-05,
"loss": 0.0048,
"step": 14630
},
{
"grad_norm": 0.1919155865907669,
"learning_rate": 1.8390705056263906e-05,
"loss": 0.0064,
"step": 14640
},
{
"grad_norm": 0.2054741382598877,
"learning_rate": 1.832669148960057e-05,
"loss": 0.0072,
"step": 14650
},
{
"grad_norm": 0.16221170127391815,
"learning_rate": 1.8262764516553233e-05,
"loss": 0.0078,
"step": 14660
},
{
"grad_norm": 0.17063555121421814,
"learning_rate": 1.8198924311895843e-05,
"loss": 0.0066,
"step": 14670
},
{
"grad_norm": 0.14749523997306824,
"learning_rate": 1.813517105016505e-05,
"loss": 0.0044,
"step": 14680
},
{
"grad_norm": 0.15508879721164703,
"learning_rate": 1.8071504905659888e-05,
"loss": 0.0049,
"step": 14690
},
{
"grad_norm": 0.1870722770690918,
"learning_rate": 1.800792605244109e-05,
"loss": 0.0075,
"step": 14700
},
{
"grad_norm": 0.11199451237916946,
"learning_rate": 1.7944434664330844e-05,
"loss": 0.0047,
"step": 14710
},
{
"grad_norm": 0.15122726559638977,
"learning_rate": 1.7881030914912212e-05,
"loss": 0.0053,
"step": 14720
},
{
"grad_norm": 0.18994122743606567,
"learning_rate": 1.7817714977528577e-05,
"loss": 0.0063,
"step": 14730
},
{
"grad_norm": 0.14027075469493866,
"learning_rate": 1.7754487025283332e-05,
"loss": 0.0047,
"step": 14740
},
{
"grad_norm": 0.15433025360107422,
"learning_rate": 1.7691347231039275e-05,
"loss": 0.0045,
"step": 14750
},
{
"grad_norm": 0.12539328634738922,
"learning_rate": 1.7628295767418164e-05,
"loss": 0.0066,
"step": 14760
},
{
"grad_norm": 0.16263306140899658,
"learning_rate": 1.7565332806800333e-05,
"loss": 0.0057,
"step": 14770
},
{
"grad_norm": 0.20516878366470337,
"learning_rate": 1.750245852132408e-05,
"loss": 0.0069,
"step": 14780
},
{
"grad_norm": 0.155752494931221,
"learning_rate": 1.7439673082885323e-05,
"loss": 0.0042,
"step": 14790
},
{
"grad_norm": 0.21590086817741394,
"learning_rate": 1.7376976663137047e-05,
"loss": 0.0068,
"step": 14800
},
{
"grad_norm": 0.1473667174577713,
"learning_rate": 1.7314369433488853e-05,
"loss": 0.0086,
"step": 14810
},
{
"grad_norm": 0.18347211182117462,
"learning_rate": 1.7251851565106548e-05,
"loss": 0.0054,
"step": 14820
},
{
"grad_norm": 0.1619826704263687,
"learning_rate": 1.7189423228911574e-05,
"loss": 0.0052,
"step": 14830
},
{
"grad_norm": 0.10061666369438171,
"learning_rate": 1.7127084595580606e-05,
"loss": 0.0056,
"step": 14840
},
{
"grad_norm": 0.16300608217716217,
"learning_rate": 1.706483583554513e-05,
"loss": 0.009,
"step": 14850
},
{
"grad_norm": 0.16306783258914948,
"learning_rate": 1.700267711899083e-05,
"loss": 0.0121,
"step": 14860
},
{
"grad_norm": 0.15848280489444733,
"learning_rate": 1.69406086158573e-05,
"loss": 0.0065,
"step": 14870
},
{
"grad_norm": 0.1942242980003357,
"learning_rate": 1.6878630495837455e-05,
"loss": 0.0087,
"step": 14880
},
{
"grad_norm": 0.262200266122818,
"learning_rate": 1.681674292837707e-05,
"loss": 0.0063,
"step": 14890
},
{
"grad_norm": 0.21699029207229614,
"learning_rate": 1.6754946082674444e-05,
"loss": 0.0072,
"step": 14900
},
{
"grad_norm": 0.22985686361789703,
"learning_rate": 1.6693240127679748e-05,
"loss": 0.0044,
"step": 14910
},
{
"grad_norm": 0.15517984330654144,
"learning_rate": 1.663162523209475e-05,
"loss": 0.0104,
"step": 14920
},
{
"grad_norm": 0.1608007550239563,
"learning_rate": 1.6570101564372193e-05,
"loss": 0.0105,
"step": 14930
},
{
"grad_norm": 0.23195034265518188,
"learning_rate": 1.650866929271543e-05,
"loss": 0.0086,
"step": 14940
},
{
"grad_norm": 0.1510450690984726,
"learning_rate": 1.644732858507797e-05,
"loss": 0.0067,
"step": 14950
},
{
"grad_norm": 0.16938410699367523,
"learning_rate": 1.6386079609162943e-05,
"loss": 0.0054,
"step": 14960
},
{
"grad_norm": 0.17797565460205078,
"learning_rate": 1.6324922532422742e-05,
"loss": 0.0052,
"step": 14970
},
{
"grad_norm": 0.1584869772195816,
"learning_rate": 1.6263857522058434e-05,
"loss": 0.0079,
"step": 14980
},
{
"grad_norm": 0.17295604944229126,
"learning_rate": 1.6202884745019443e-05,
"loss": 0.0069,
"step": 14990
},
{
"grad_norm": 0.15914912521839142,
"learning_rate": 1.614200436800304e-05,
"loss": 0.0075,
"step": 15000
},
{
"grad_norm": 0.1304100751876831,
"learning_rate": 1.6081216557453814e-05,
"loss": 0.0056,
"step": 15010
},
{
"grad_norm": 0.1199740543961525,
"learning_rate": 1.6020521479563367e-05,
"loss": 0.0043,
"step": 15020
},
{
"grad_norm": 0.14026935398578644,
"learning_rate": 1.5959919300269654e-05,
"loss": 0.0041,
"step": 15030
},
{
"grad_norm": 0.12699109315872192,
"learning_rate": 1.5899410185256764e-05,
"loss": 0.005,
"step": 15040
},
{
"grad_norm": 0.13951744139194489,
"learning_rate": 1.583899429995431e-05,
"loss": 0.0044,
"step": 15050
},
{
"grad_norm": 0.1004301905632019,
"learning_rate": 1.5778671809536993e-05,
"loss": 0.0072,
"step": 15060
},
{
"grad_norm": 0.1335887461900711,
"learning_rate": 1.5718442878924246e-05,
"loss": 0.005,
"step": 15070
},
{
"grad_norm": 0.17112179100513458,
"learning_rate": 1.5658307672779593e-05,
"loss": 0.0067,
"step": 15080
},
{
"grad_norm": 0.16543389856815338,
"learning_rate": 1.5598266355510427e-05,
"loss": 0.0046,
"step": 15090
},
{
"grad_norm": 0.13843472301959991,
"learning_rate": 1.553831909126744e-05,
"loss": 0.0074,
"step": 15100
},
{
"grad_norm": 0.15110492706298828,
"learning_rate": 1.5478466043944135e-05,
"loss": 0.0054,
"step": 15110
},
{
"grad_norm": 0.22796016931533813,
"learning_rate": 1.5418707377176468e-05,
"loss": 0.0078,
"step": 15120
},
{
"grad_norm": 0.14223039150238037,
"learning_rate": 1.535904325434233e-05,
"loss": 0.0054,
"step": 15130
},
{
"grad_norm": 0.09044605493545532,
"learning_rate": 1.529947383856118e-05,
"loss": 0.005,
"step": 15140
},
{
"grad_norm": 0.18607757985591888,
"learning_rate": 1.5239999292693524e-05,
"loss": 0.0067,
"step": 15150
},
{
"grad_norm": 0.12722477316856384,
"learning_rate": 1.5180619779340505e-05,
"loss": 0.0077,
"step": 15160
},
{
"grad_norm": 0.134136363863945,
"learning_rate": 1.5121335460843428e-05,
"loss": 0.0045,
"step": 15170
},
{
"grad_norm": 0.09909097850322723,
"learning_rate": 1.5062146499283347e-05,
"loss": 0.0036,
"step": 15180
},
{
"grad_norm": 0.1600300371646881,
"learning_rate": 1.5003053056480643e-05,
"loss": 0.005,
"step": 15190
},
{
"grad_norm": 0.0979742482304573,
"learning_rate": 1.4944055293994551e-05,
"loss": 0.0045,
"step": 15200
},
{
"grad_norm": 0.1393214762210846,
"learning_rate": 1.4885153373122656e-05,
"loss": 0.0059,
"step": 15210
},
{
"grad_norm": 0.16196110844612122,
"learning_rate": 1.482634745490059e-05,
"loss": 0.0047,
"step": 15220
},
{
"grad_norm": 0.1884429156780243,
"learning_rate": 1.4767637700101466e-05,
"loss": 0.0072,
"step": 15230
},
{
"grad_norm": 0.20021089911460876,
"learning_rate": 1.4709024269235528e-05,
"loss": 0.0057,
"step": 15240
},
{
"grad_norm": 0.12900827825069427,
"learning_rate": 1.4650507322549684e-05,
"loss": 0.0047,
"step": 15250
},
{
"grad_norm": 0.14503392577171326,
"learning_rate": 1.4592087020026972e-05,
"loss": 0.0055,
"step": 15260
},
{
"grad_norm": 0.14478757977485657,
"learning_rate": 1.4533763521386318e-05,
"loss": 0.0079,
"step": 15270
},
{
"grad_norm": 0.1581815630197525,
"learning_rate": 1.44755369860819e-05,
"loss": 0.0098,
"step": 15280
},
{
"grad_norm": 0.16933442652225494,
"learning_rate": 1.441740757330287e-05,
"loss": 0.0071,
"step": 15290
},
{
"grad_norm": 0.1398744434118271,
"learning_rate": 1.4359375441972844e-05,
"loss": 0.0077,
"step": 15300
},
{
"grad_norm": 0.11288022249937057,
"learning_rate": 1.4301440750749395e-05,
"loss": 0.0038,
"step": 15310
},
{
"grad_norm": 0.12249251455068588,
"learning_rate": 1.4243603658023808e-05,
"loss": 0.0039,
"step": 15320
},
{
"grad_norm": 0.14226596057415009,
"learning_rate": 1.4185864321920444e-05,
"loss": 0.0075,
"step": 15330
},
{
"grad_norm": 0.16929765045642853,
"learning_rate": 1.4128222900296485e-05,
"loss": 0.0054,
"step": 15340
},
{
"grad_norm": 0.12506860494613647,
"learning_rate": 1.407067955074135e-05,
"loss": 0.0052,
"step": 15350
},
{
"grad_norm": 0.13736629486083984,
"learning_rate": 1.4013234430576356e-05,
"loss": 0.0074,
"step": 15360
},
{
"grad_norm": 0.1849488615989685,
"learning_rate": 1.3955887696854286e-05,
"loss": 0.0081,
"step": 15370
},
{
"grad_norm": 0.10855989158153534,
"learning_rate": 1.38986395063589e-05,
"loss": 0.0045,
"step": 15380
},
{
"grad_norm": 0.1332855224609375,
"learning_rate": 1.3841490015604597e-05,
"loss": 0.0051,
"step": 15390
},
{
"grad_norm": 0.13144734501838684,
"learning_rate": 1.3784439380835879e-05,
"loss": 0.0057,
"step": 15400
},
{
"grad_norm": 0.1505713164806366,
"learning_rate": 1.3727487758026986e-05,
"loss": 0.0046,
"step": 15410
},
{
"grad_norm": 0.18800579011440277,
"learning_rate": 1.3670635302881525e-05,
"loss": 0.0073,
"step": 15420
},
{
"grad_norm": 0.09939403086900711,
"learning_rate": 1.3613882170831888e-05,
"loss": 0.0035,
"step": 15430
},
{
"grad_norm": 0.14586606621742249,
"learning_rate": 1.355722851703901e-05,
"loss": 0.0041,
"step": 15440
},
{
"grad_norm": 0.15744535624980927,
"learning_rate": 1.3500674496391814e-05,
"loss": 0.0076,
"step": 15450
},
{
"grad_norm": 0.11027566343545914,
"learning_rate": 1.3444220263506795e-05,
"loss": 0.0044,
"step": 15460
},
{
"grad_norm": 0.16130055487155914,
"learning_rate": 1.3387865972727714e-05,
"loss": 0.005,
"step": 15470
},
{
"grad_norm": 0.11999835073947906,
"learning_rate": 1.3331611778125036e-05,
"loss": 0.0055,
"step": 15480
},
{
"grad_norm": 0.11079049110412598,
"learning_rate": 1.3275457833495564e-05,
"loss": 0.0063,
"step": 15490
},
{
"grad_norm": 0.11384254693984985,
"learning_rate": 1.3219404292362065e-05,
"loss": 0.005,
"step": 15500
},
{
"grad_norm": 0.13722357153892517,
"learning_rate": 1.3163451307972751e-05,
"loss": 0.0099,
"step": 15510
},
{
"grad_norm": 0.11740435659885406,
"learning_rate": 1.3107599033300977e-05,
"loss": 0.0037,
"step": 15520
},
{
"grad_norm": 0.09946759045124054,
"learning_rate": 1.305184762104471e-05,
"loss": 0.0047,
"step": 15530
},
{
"grad_norm": 0.09446703642606735,
"learning_rate": 1.2996197223626178e-05,
"loss": 0.0042,
"step": 15540
},
{
"grad_norm": 0.14761124551296234,
"learning_rate": 1.2940647993191457e-05,
"loss": 0.0045,
"step": 15550
},
{
"grad_norm": 0.15722501277923584,
"learning_rate": 1.2885200081610005e-05,
"loss": 0.0068,
"step": 15560
},
{
"grad_norm": 0.15671776235103607,
"learning_rate": 1.2829853640474316e-05,
"loss": 0.0043,
"step": 15570
},
{
"grad_norm": 0.08559279143810272,
"learning_rate": 1.2774608821099438e-05,
"loss": 0.0047,
"step": 15580
},
{
"grad_norm": 0.13336020708084106,
"learning_rate": 1.2719465774522577e-05,
"loss": 0.0066,
"step": 15590
},
{
"grad_norm": 0.17194277048110962,
"learning_rate": 1.2664424651502755e-05,
"loss": 0.0066,
"step": 15600
},
{
"grad_norm": 0.17069080471992493,
"learning_rate": 1.260948560252026e-05,
"loss": 0.0097,
"step": 15610
},
{
"grad_norm": 0.15968455374240875,
"learning_rate": 1.2554648777776396e-05,
"loss": 0.0039,
"step": 15620
},
{
"grad_norm": 0.13884195685386658,
"learning_rate": 1.2499914327192919e-05,
"loss": 0.004,
"step": 15630
},
{
"grad_norm": 0.1465432047843933,
"learning_rate": 1.2445282400411722e-05,
"loss": 0.0055,
"step": 15640
},
{
"grad_norm": 0.13197597861289978,
"learning_rate": 1.2390753146794437e-05,
"loss": 0.0052,
"step": 15650
},
{
"grad_norm": 0.15008197724819183,
"learning_rate": 1.2336326715421925e-05,
"loss": 0.0046,
"step": 15660
},
{
"grad_norm": 0.13557665050029755,
"learning_rate": 1.2282003255094005e-05,
"loss": 0.0045,
"step": 15670
},
{
"grad_norm": 0.1551276594400406,
"learning_rate": 1.2227782914328928e-05,
"loss": 0.0065,
"step": 15680
},
{
"grad_norm": 0.14345529675483704,
"learning_rate": 1.2173665841363018e-05,
"loss": 0.0033,
"step": 15690
},
{
"grad_norm": 0.12947356700897217,
"learning_rate": 1.211965218415032e-05,
"loss": 0.0042,
"step": 15700
},
{
"grad_norm": 0.08433026820421219,
"learning_rate": 1.2065742090362082e-05,
"loss": 0.0041,
"step": 15710
},
{
"grad_norm": 0.16045640408992767,
"learning_rate": 1.2011935707386457e-05,
"loss": 0.0058,
"step": 15720
},
{
"grad_norm": 0.10582105070352554,
"learning_rate": 1.1958233182328044e-05,
"loss": 0.0053,
"step": 15730
},
{
"grad_norm": 0.0928897112607956,
"learning_rate": 1.1904634662007474e-05,
"loss": 0.0041,
"step": 15740
},
{
"grad_norm": 0.13550110161304474,
"learning_rate": 1.1851140292961088e-05,
"loss": 0.0058,
"step": 15750
},
{
"grad_norm": 0.1278456449508667,
"learning_rate": 1.1797750221440424e-05,
"loss": 0.0074,
"step": 15760
},
{
"grad_norm": 0.194366917014122,
"learning_rate": 1.1744464593411897e-05,
"loss": 0.0055,
"step": 15770
},
{
"grad_norm": 0.1663769781589508,
"learning_rate": 1.1691283554556399e-05,
"loss": 0.0037,
"step": 15780
},
{
"grad_norm": 0.13579991459846497,
"learning_rate": 1.1638207250268834e-05,
"loss": 0.0087,
"step": 15790
},
{
"grad_norm": 0.12101404368877411,
"learning_rate": 1.158523582565782e-05,
"loss": 0.0059,
"step": 15800
},
{
"grad_norm": 0.1226835548877716,
"learning_rate": 1.1532369425545192e-05,
"loss": 0.0057,
"step": 15810
},
{
"grad_norm": 0.10354090481996536,
"learning_rate": 1.1479608194465662e-05,
"loss": 0.0097,
"step": 15820
},
{
"grad_norm": 0.10664427280426025,
"learning_rate": 1.1426952276666442e-05,
"loss": 0.0054,
"step": 15830
},
{
"grad_norm": 0.15973882377147675,
"learning_rate": 1.1374401816106778e-05,
"loss": 0.005,
"step": 15840
},
{
"grad_norm": 0.12123812735080719,
"learning_rate": 1.1321956956457646e-05,
"loss": 0.0052,
"step": 15850
},
{
"grad_norm": 0.15946902334690094,
"learning_rate": 1.1269617841101277e-05,
"loss": 0.0069,
"step": 15860
},
{
"grad_norm": 0.10081616789102554,
"learning_rate": 1.1217384613130804e-05,
"loss": 0.0091,
"step": 15870
},
{
"grad_norm": 0.11432350426912308,
"learning_rate": 1.11652574153499e-05,
"loss": 0.0065,
"step": 15880
},
{
"grad_norm": 0.08289428800344467,
"learning_rate": 1.1113236390272303e-05,
"loss": 0.0035,
"step": 15890
},
{
"grad_norm": 0.08323133736848831,
"learning_rate": 1.106132168012155e-05,
"loss": 0.0047,
"step": 15900
},
{
"grad_norm": 0.12318079173564911,
"learning_rate": 1.1009513426830448e-05,
"loss": 0.0044,
"step": 15910
},
{
"grad_norm": 0.16217155754566193,
"learning_rate": 1.0957811772040777e-05,
"loss": 0.0064,
"step": 15920
},
{
"grad_norm": 0.1420840173959732,
"learning_rate": 1.0906216857102913e-05,
"loss": 0.0059,
"step": 15930
},
{
"grad_norm": 0.16004914045333862,
"learning_rate": 1.0854728823075355e-05,
"loss": 0.0054,
"step": 15940
},
{
"grad_norm": 0.15781958401203156,
"learning_rate": 1.0803347810724452e-05,
"loss": 0.0096,
"step": 15950
},
{
"grad_norm": 0.12208251655101776,
"learning_rate": 1.0752073960523911e-05,
"loss": 0.0034,
"step": 15960
},
{
"grad_norm": 0.13120897114276886,
"learning_rate": 1.070090741265447e-05,
"loss": 0.0044,
"step": 15970
},
{
"grad_norm": 0.10382191836833954,
"learning_rate": 1.0649848307003547e-05,
"loss": 0.0039,
"step": 15980
},
{
"grad_norm": 0.09419625997543335,
"learning_rate": 1.0598896783164757e-05,
"loss": 0.0051,
"step": 15990
},
{
"grad_norm": 0.10848691314458847,
"learning_rate": 1.0548052980437645e-05,
"loss": 0.0042,
"step": 16000
},
{
"grad_norm": 0.13647383451461792,
"learning_rate": 1.049731703782722e-05,
"loss": 0.0061,
"step": 16010
},
{
"grad_norm": 0.1732291877269745,
"learning_rate": 1.0446689094043587e-05,
"loss": 0.0047,
"step": 16020
},
{
"grad_norm": 0.14692017436027527,
"learning_rate": 1.039616928750165e-05,
"loss": 0.0096,
"step": 16030
},
{
"grad_norm": 0.14247895777225494,
"learning_rate": 1.0345757756320612e-05,
"loss": 0.0047,
"step": 16040
},
{
"grad_norm": 0.10627839714288712,
"learning_rate": 1.0295454638323666e-05,
"loss": 0.0036,
"step": 16050
},
{
"grad_norm": 0.124620720744133,
"learning_rate": 1.0245260071037632e-05,
"loss": 0.0058,
"step": 16060
},
{
"grad_norm": 0.1555354744195938,
"learning_rate": 1.0195174191692518e-05,
"loss": 0.0046,
"step": 16070
},
{
"grad_norm": 0.16408278048038483,
"learning_rate": 1.014519713722124e-05,
"loss": 0.0085,
"step": 16080
},
{
"grad_norm": 0.09864310175180435,
"learning_rate": 1.0095329044259132e-05,
"loss": 0.0055,
"step": 16090
},
{
"grad_norm": 0.08889558166265488,
"learning_rate": 1.004557004914365e-05,
"loss": 0.004,
"step": 16100
},
{
"grad_norm": 0.10330936312675476,
"learning_rate": 9.995920287914007e-06,
"loss": 0.0037,
"step": 16110
},
{
"grad_norm": 0.11861933022737503,
"learning_rate": 9.946379896310737e-06,
"loss": 0.0048,
"step": 16120
},
{
"grad_norm": 0.12653601169586182,
"learning_rate": 9.896949009775396e-06,
"loss": 0.0075,
"step": 16130
},
{
"grad_norm": 0.09070475399494171,
"learning_rate": 9.847627763450134e-06,
"loss": 0.0036,
"step": 16140
},
{
"grad_norm": 0.12315920740365982,
"learning_rate": 9.798416292177337e-06,
"loss": 0.0055,
"step": 16150
},
{
"grad_norm": 0.111320361495018,
"learning_rate": 9.74931473049932e-06,
"loss": 0.0061,
"step": 16160
},
{
"grad_norm": 0.15200473368167877,
"learning_rate": 9.700323212657847e-06,
"loss": 0.0047,
"step": 16170
},
{
"grad_norm": 0.13636194169521332,
"learning_rate": 9.65144187259388e-06,
"loss": 0.0046,
"step": 16180
},
{
"grad_norm": 0.1087312325835228,
"learning_rate": 9.602670843947132e-06,
"loss": 0.007,
"step": 16190
},
{
"grad_norm": 0.13450048863887787,
"learning_rate": 9.554010260055713e-06,
"loss": 0.0049,
"step": 16200
},
{
"grad_norm": 0.09377999603748322,
"learning_rate": 9.505460253955834e-06,
"loss": 0.0127,
"step": 16210
},
{
"grad_norm": 0.1321907937526703,
"learning_rate": 9.457020958381324e-06,
"loss": 0.004,
"step": 16220
},
{
"grad_norm": 0.1684373915195465,
"learning_rate": 9.408692505763395e-06,
"loss": 0.0062,
"step": 16230
},
{
"grad_norm": 0.1340956687927246,
"learning_rate": 9.360475028230181e-06,
"loss": 0.0069,
"step": 16240
},
{
"grad_norm": 0.11114493757486343,
"learning_rate": 9.312368657606412e-06,
"loss": 0.0066,
"step": 16250
},
{
"grad_norm": 0.08800835907459259,
"learning_rate": 9.264373525413096e-06,
"loss": 0.0041,
"step": 16260
},
{
"grad_norm": 0.1255185306072235,
"learning_rate": 9.216489762867058e-06,
"loss": 0.0051,
"step": 16270
},
{
"grad_norm": 0.13304315507411957,
"learning_rate": 9.168717500880708e-06,
"loss": 0.0069,
"step": 16280
},
{
"grad_norm": 0.13777510821819305,
"learning_rate": 9.121056870061574e-06,
"loss": 0.0048,
"step": 16290
},
{
"grad_norm": 0.1482883095741272,
"learning_rate": 9.073508000711983e-06,
"loss": 0.0063,
"step": 16300
},
{
"grad_norm": 0.12836310267448425,
"learning_rate": 9.026071022828758e-06,
"loss": 0.0068,
"step": 16310
},
{
"grad_norm": 0.13698478043079376,
"learning_rate": 8.978746066102771e-06,
"loss": 0.0049,
"step": 16320
},
{
"grad_norm": 0.139384463429451,
"learning_rate": 8.931533259918634e-06,
"loss": 0.005,
"step": 16330
},
{
"grad_norm": 0.08790681511163712,
"learning_rate": 8.884432733354382e-06,
"loss": 0.0062,
"step": 16340
},
{
"grad_norm": 0.1132570132613182,
"learning_rate": 8.837444615181029e-06,
"loss": 0.0055,
"step": 16350
},
{
"grad_norm": 0.15096323192119598,
"learning_rate": 8.790569033862323e-06,
"loss": 0.0061,
"step": 16360
},
{
"grad_norm": 0.12137813121080399,
"learning_rate": 8.7438061175543e-06,
"loss": 0.0058,
"step": 16370
},
{
"grad_norm": 0.15718196332454681,
"learning_rate": 8.697155994104978e-06,
"loss": 0.0059,
"step": 16380
},
{
"grad_norm": 0.12548978626728058,
"learning_rate": 8.650618791054033e-06,
"loss": 0.0041,
"step": 16390
},
{
"grad_norm": 0.09397879242897034,
"learning_rate": 8.604194635632373e-06,
"loss": 0.0035,
"step": 16400
},
{
"grad_norm": 0.12385103106498718,
"learning_rate": 8.557883654761906e-06,
"loss": 0.0046,
"step": 16410
},
{
"grad_norm": 0.12034054100513458,
"learning_rate": 8.511685975055061e-06,
"loss": 0.0049,
"step": 16420
},
{
"grad_norm": 0.07118933647871017,
"learning_rate": 8.46560172281452e-06,
"loss": 0.0037,
"step": 16430
},
{
"grad_norm": 0.09793756902217865,
"learning_rate": 8.419631024032893e-06,
"loss": 0.0037,
"step": 16440
},
{
"grad_norm": 0.08896943926811218,
"learning_rate": 8.373774004392293e-06,
"loss": 0.0042,
"step": 16450
},
{
"grad_norm": 0.09761831164360046,
"learning_rate": 8.32803078926409e-06,
"loss": 0.0042,
"step": 16460
},
{
"grad_norm": 0.1202096939086914,
"learning_rate": 8.282401503708454e-06,
"loss": 0.0047,
"step": 16470
},
{
"grad_norm": 0.08817831426858902,
"learning_rate": 8.23688627247412e-06,
"loss": 0.0038,
"step": 16480
},
{
"grad_norm": 0.05380531772971153,
"learning_rate": 8.191485219998007e-06,
"loss": 0.0031,
"step": 16490
},
{
"grad_norm": 0.11282724887132645,
"learning_rate": 8.146198470404843e-06,
"loss": 0.0063,
"step": 16500
},
{
"grad_norm": 0.13265129923820496,
"learning_rate": 8.101026147506897e-06,
"loss": 0.0062,
"step": 16510
},
{
"grad_norm": 0.09475216269493103,
"learning_rate": 8.05596837480353e-06,
"loss": 0.0048,
"step": 16520
},
{
"grad_norm": 0.11130519211292267,
"learning_rate": 8.011025275480998e-06,
"loss": 0.0063,
"step": 16530
},
{
"grad_norm": 0.09376892447471619,
"learning_rate": 7.966196972412027e-06,
"loss": 0.0057,
"step": 16540
},
{
"grad_norm": 0.12341774255037308,
"learning_rate": 7.92148358815547e-06,
"loss": 0.005,
"step": 16550
},
{
"grad_norm": 0.1459421068429947,
"learning_rate": 7.87688524495604e-06,
"loss": 0.006,
"step": 16560
},
{
"grad_norm": 0.12143165618181229,
"learning_rate": 7.83240206474386e-06,
"loss": 0.0045,
"step": 16570
},
{
"grad_norm": 0.0978287011384964,
"learning_rate": 7.788034169134272e-06,
"loss": 0.0059,
"step": 16580
},
{
"grad_norm": 0.10168706625699997,
"learning_rate": 7.743781679427414e-06,
"loss": 0.0044,
"step": 16590
},
{
"grad_norm": 0.08256179839372635,
"learning_rate": 7.699644716607895e-06,
"loss": 0.0036,
"step": 16600
},
{
"grad_norm": 0.059778597205877304,
"learning_rate": 7.655623401344486e-06,
"loss": 0.0056,
"step": 16610
},
{
"grad_norm": 0.11977694928646088,
"learning_rate": 7.611717853989775e-06,
"loss": 0.0059,
"step": 16620
},
{
"grad_norm": 0.13426047563552856,
"learning_rate": 7.567928194579854e-06,
"loss": 0.0067,
"step": 16630
},
{
"grad_norm": 0.12195713818073273,
"learning_rate": 7.524254542833997e-06,
"loss": 0.0042,
"step": 16640
},
{
"grad_norm": 0.1642487794160843,
"learning_rate": 7.480697018154286e-06,
"loss": 0.0061,
"step": 16650
},
{
"grad_norm": 0.08334190398454666,
"learning_rate": 7.437255739625332e-06,
"loss": 0.0034,
"step": 16660
},
{
"grad_norm": 0.09278412163257599,
"learning_rate": 7.393930826013923e-06,
"loss": 0.0049,
"step": 16670
},
{
"grad_norm": 0.08380815386772156,
"learning_rate": 7.350722395768722e-06,
"loss": 0.004,
"step": 16680
},
{
"grad_norm": 0.08418401330709457,
"learning_rate": 7.307630567019963e-06,
"loss": 0.0041,
"step": 16690
},
{
"grad_norm": 0.10579981654882431,
"learning_rate": 7.264655457579e-06,
"loss": 0.0047,
"step": 16700
},
{
"grad_norm": 0.09246329963207245,
"learning_rate": 7.221797184938184e-06,
"loss": 0.0043,
"step": 16710
},
{
"grad_norm": 0.10854664444923401,
"learning_rate": 7.179055866270373e-06,
"loss": 0.0037,
"step": 16720
},
{
"grad_norm": 0.09156115353107452,
"learning_rate": 7.136431618428707e-06,
"loss": 0.0038,
"step": 16730
},
{
"grad_norm": 0.056634001433849335,
"learning_rate": 7.09392455794628e-06,
"loss": 0.0059,
"step": 16740
},
{
"grad_norm": 0.06214802712202072,
"learning_rate": 7.051534801035725e-06,
"loss": 0.003,
"step": 16750
},
{
"grad_norm": 0.11566188186407089,
"learning_rate": 7.00926246358905e-06,
"loss": 0.0078,
"step": 16760
},
{
"grad_norm": 0.10004517436027527,
"learning_rate": 6.967107661177191e-06,
"loss": 0.0043,
"step": 16770
},
{
"grad_norm": 0.14847685396671295,
"learning_rate": 6.925070509049786e-06,
"loss": 0.0049,
"step": 16780
},
{
"grad_norm": 0.08145119249820709,
"learning_rate": 6.883151122134812e-06,
"loss": 0.0035,
"step": 16790
},
{
"grad_norm": 0.07774826884269714,
"learning_rate": 6.8413496150382394e-06,
"loss": 0.0051,
"step": 16800
},
{
"grad_norm": 0.09139399975538254,
"learning_rate": 6.7996661020438165e-06,
"loss": 0.0048,
"step": 16810
},
{
"grad_norm": 0.11714893579483032,
"learning_rate": 6.758100697112662e-06,
"loss": 0.0049,
"step": 16820
},
{
"grad_norm": 0.10748549550771713,
"learning_rate": 6.716653513883026e-06,
"loss": 0.0034,
"step": 16830
},
{
"grad_norm": 0.13849052786827087,
"learning_rate": 6.675324665669913e-06,
"loss": 0.0074,
"step": 16840
},
{
"grad_norm": 0.10507303476333618,
"learning_rate": 6.634114265464803e-06,
"loss": 0.0037,
"step": 16850
},
{
"grad_norm": 0.11181323230266571,
"learning_rate": 6.59302242593538e-06,
"loss": 0.0046,
"step": 16860
},
{
"grad_norm": 0.0852261483669281,
"learning_rate": 6.552049259425141e-06,
"loss": 0.0035,
"step": 16870
},
{
"grad_norm": 0.12089522182941437,
"learning_rate": 6.511194877953181e-06,
"loss": 0.0071,
"step": 16880
},
{
"grad_norm": 0.10575058311223984,
"learning_rate": 6.470459393213813e-06,
"loss": 0.0047,
"step": 16890
},
{
"grad_norm": 0.1121765598654747,
"learning_rate": 6.429842916576279e-06,
"loss": 0.0064,
"step": 16900
},
{
"grad_norm": 0.11497557908296585,
"learning_rate": 6.389345559084503e-06,
"loss": 0.0038,
"step": 16910
},
{
"grad_norm": 0.10686445236206055,
"learning_rate": 6.348967431456682e-06,
"loss": 0.0078,
"step": 16920
},
{
"grad_norm": 0.11743766814470291,
"learning_rate": 6.30870864408511e-06,
"loss": 0.007,
"step": 16930
},
{
"grad_norm": 0.08018866926431656,
"learning_rate": 6.268569307035754e-06,
"loss": 0.0058,
"step": 16940
},
{
"grad_norm": 0.0866788849234581,
"learning_rate": 6.228549530048022e-06,
"loss": 0.0043,
"step": 16950
},
{
"grad_norm": 0.12864826619625092,
"learning_rate": 6.1886494225344814e-06,
"loss": 0.0053,
"step": 16960
},
{
"grad_norm": 0.08638875186443329,
"learning_rate": 6.148869093580479e-06,
"loss": 0.0041,
"step": 16970
},
{
"grad_norm": 0.11416413635015488,
"learning_rate": 6.109208651943921e-06,
"loss": 0.0041,
"step": 16980
},
{
"grad_norm": 0.08974229544401169,
"learning_rate": 6.069668206054946e-06,
"loss": 0.0059,
"step": 16990
},
{
"grad_norm": 0.08993878960609436,
"learning_rate": 6.0302478640156145e-06,
"loss": 0.0047,
"step": 17000
},
{
"grad_norm": 0.06425460427999496,
"learning_rate": 5.990947733599644e-06,
"loss": 0.0041,
"step": 17010
},
{
"grad_norm": 0.15895497798919678,
"learning_rate": 5.951767922252105e-06,
"loss": 0.0066,
"step": 17020
},
{
"grad_norm": 0.12571494281291962,
"learning_rate": 5.912708537089068e-06,
"loss": 0.004,
"step": 17030
},
{
"grad_norm": 0.13179023563861847,
"learning_rate": 5.873769684897434e-06,
"loss": 0.0088,
"step": 17040
},
{
"grad_norm": 0.12288176268339157,
"learning_rate": 5.834951472134514e-06,
"loss": 0.0062,
"step": 17050
},
{
"grad_norm": 0.12752611935138702,
"learning_rate": 5.796254004927832e-06,
"loss": 0.0082,
"step": 17060
},
{
"grad_norm": 0.10792287439107895,
"learning_rate": 5.757677389074806e-06,
"loss": 0.0079,
"step": 17070
},
{
"grad_norm": 0.12195702642202377,
"learning_rate": 5.719221730042385e-06,
"loss": 0.0041,
"step": 17080
},
{
"grad_norm": 0.10768016427755356,
"learning_rate": 5.680887132966911e-06,
"loss": 0.006,
"step": 17090
},
{
"grad_norm": 0.09891042858362198,
"learning_rate": 5.642673702653683e-06,
"loss": 0.004,
"step": 17100
},
{
"grad_norm": 0.0941566675901413,
"learning_rate": 5.604581543576781e-06,
"loss": 0.0056,
"step": 17110
},
{
"grad_norm": 0.10946481674909592,
"learning_rate": 5.566610759878704e-06,
"loss": 0.0054,
"step": 17120
},
{
"grad_norm": 0.07776937633752823,
"learning_rate": 5.528761455370119e-06,
"loss": 0.0032,
"step": 17130
},
{
"grad_norm": 0.07869040220975876,
"learning_rate": 5.491033733529594e-06,
"loss": 0.0036,
"step": 17140
},
{
"grad_norm": 0.08763402700424194,
"learning_rate": 5.453427697503255e-06,
"loss": 0.0044,
"step": 17150
},
{
"grad_norm": 0.12128516286611557,
"learning_rate": 5.415943450104599e-06,
"loss": 0.0061,
"step": 17160
},
{
"grad_norm": 0.1140231043100357,
"learning_rate": 5.378581093814111e-06,
"loss": 0.0048,
"step": 17170
},
{
"grad_norm": 0.07997678965330124,
"learning_rate": 5.3413407307790375e-06,
"loss": 0.005,
"step": 17180
},
{
"grad_norm": 0.13127312064170837,
"learning_rate": 5.30422246281313e-06,
"loss": 0.0057,
"step": 17190
},
{
"grad_norm": 0.058916617184877396,
"learning_rate": 5.267226391396296e-06,
"loss": 0.0029,
"step": 17200
},
{
"grad_norm": 0.1319904774427414,
"learning_rate": 5.2303526176744e-06,
"loss": 0.0044,
"step": 17210
},
{
"grad_norm": 0.11650247871875763,
"learning_rate": 5.193601242458929e-06,
"loss": 0.0051,
"step": 17220
},
{
"grad_norm": 0.07709518074989319,
"learning_rate": 5.156972366226714e-06,
"loss": 0.0055,
"step": 17230
},
{
"grad_norm": 0.08256576210260391,
"learning_rate": 5.120466089119735e-06,
"loss": 0.0043,
"step": 17240
},
{
"grad_norm": 0.06434403359889984,
"learning_rate": 5.084082510944749e-06,
"loss": 0.0027,
"step": 17250
},
{
"grad_norm": 0.06728126853704453,
"learning_rate": 5.047821731173058e-06,
"loss": 0.0038,
"step": 17260
},
{
"grad_norm": 0.090157650411129,
"learning_rate": 5.011683848940274e-06,
"loss": 0.0051,
"step": 17270
},
{
"grad_norm": 0.07788987457752228,
"learning_rate": 4.975668963045954e-06,
"loss": 0.0045,
"step": 17280
},
{
"grad_norm": 0.1306411623954773,
"learning_rate": 4.9397771719534525e-06,
"loss": 0.0064,
"step": 17290
},
{
"grad_norm": 0.10654694586992264,
"learning_rate": 4.904008573789548e-06,
"loss": 0.0054,
"step": 17300
},
{
"grad_norm": 0.09149842709302902,
"learning_rate": 4.8683632663442005e-06,
"loss": 0.0065,
"step": 17310
},
{
"grad_norm": 0.1496347337961197,
"learning_rate": 4.832841347070343e-06,
"loss": 0.0057,
"step": 17320
},
{
"grad_norm": 0.08486293256282806,
"learning_rate": 4.797442913083539e-06,
"loss": 0.004,
"step": 17330
},
{
"grad_norm": 0.06311454623937607,
"learning_rate": 4.7621680611617596e-06,
"loss": 0.0079,
"step": 17340
},
{
"grad_norm": 0.0772392749786377,
"learning_rate": 4.727016887745095e-06,
"loss": 0.0059,
"step": 17350
},
{
"grad_norm": 0.08348176628351212,
"learning_rate": 4.691989488935511e-06,
"loss": 0.0056,
"step": 17360
},
{
"grad_norm": 0.08476629853248596,
"learning_rate": 4.657085960496588e-06,
"loss": 0.0053,
"step": 17370
},
{
"grad_norm": 0.08960162848234177,
"learning_rate": 4.6223063978532265e-06,
"loss": 0.0054,
"step": 17380
},
{
"grad_norm": 0.06759439408779144,
"learning_rate": 4.587650896091439e-06,
"loss": 0.0028,
"step": 17390
},
{
"grad_norm": 0.08276814222335815,
"learning_rate": 4.553119549958035e-06,
"loss": 0.0057,
"step": 17400
},
{
"grad_norm": 0.10904081165790558,
"learning_rate": 4.518712453860385e-06,
"loss": 0.0059,
"step": 17410
},
{
"grad_norm": 0.06815115362405777,
"learning_rate": 4.484429701866205e-06,
"loss": 0.0032,
"step": 17420
},
{
"grad_norm": 0.058988507837057114,
"learning_rate": 4.4502713877031975e-06,
"loss": 0.003,
"step": 17430
},
{
"grad_norm": 0.07423502206802368,
"learning_rate": 4.416237604758911e-06,
"loss": 0.0035,
"step": 17440
},
{
"grad_norm": 0.08950015157461166,
"learning_rate": 4.3823284460804025e-06,
"loss": 0.0067,
"step": 17450
},
{
"grad_norm": 0.12428207695484161,
"learning_rate": 4.348544004374011e-06,
"loss": 0.0047,
"step": 17460
},
{
"grad_norm": 0.101078100502491,
"learning_rate": 4.314884372005123e-06,
"loss": 0.0044,
"step": 17470
},
{
"grad_norm": 0.09248964488506317,
"learning_rate": 4.281349640997867e-06,
"loss": 0.0046,
"step": 17480
},
{
"grad_norm": 0.05857797712087631,
"learning_rate": 4.247939903034942e-06,
"loss": 0.0056,
"step": 17490
},
{
"grad_norm": 0.08585874736309052,
"learning_rate": 4.214655249457284e-06,
"loss": 0.0084,
"step": 17500
},
{
"grad_norm": 0.09325850009918213,
"learning_rate": 4.181495771263855e-06,
"loss": 0.0038,
"step": 17510
},
{
"grad_norm": 0.08757645636796951,
"learning_rate": 4.148461559111427e-06,
"loss": 0.0047,
"step": 17520
},
{
"grad_norm": 0.0964057445526123,
"learning_rate": 4.115552703314252e-06,
"loss": 0.0045,
"step": 17530
},
{
"grad_norm": 0.06798765808343887,
"learning_rate": 4.082769293843886e-06,
"loss": 0.0033,
"step": 17540
},
{
"grad_norm": 0.05117792263627052,
"learning_rate": 4.050111420328939e-06,
"loss": 0.0038,
"step": 17550
},
{
"grad_norm": 0.1059410348534584,
"learning_rate": 4.017579172054764e-06,
"loss": 0.0057,
"step": 17560
},
{
"grad_norm": 0.08445511013269424,
"learning_rate": 3.985172637963308e-06,
"loss": 0.0041,
"step": 17570
},
{
"grad_norm": 0.06766512989997864,
"learning_rate": 3.952891906652784e-06,
"loss": 0.0032,
"step": 17580
},
{
"grad_norm": 0.10279787331819534,
"learning_rate": 3.920737066377478e-06,
"loss": 0.005,
"step": 17590
},
{
"grad_norm": 0.10965480655431747,
"learning_rate": 3.888708205047509e-06,
"loss": 0.0077,
"step": 17600
},
{
"grad_norm": 0.09529787302017212,
"learning_rate": 3.856805410228542e-06,
"loss": 0.005,
"step": 17610
},
{
"grad_norm": 0.07802815735340118,
"learning_rate": 3.82502876914162e-06,
"loss": 0.0044,
"step": 17620
},
{
"grad_norm": 0.063714899122715,
"learning_rate": 3.7933783686628586e-06,
"loss": 0.0064,
"step": 17630
},
{
"grad_norm": 0.0940946713089943,
"learning_rate": 3.7618542953232306e-06,
"loss": 0.0048,
"step": 17640
},
{
"grad_norm": 0.10446714609861374,
"learning_rate": 3.7304566353083658e-06,
"loss": 0.0095,
"step": 17650
},
{
"grad_norm": 0.07436424493789673,
"learning_rate": 3.6991854744582555e-06,
"loss": 0.0061,
"step": 17660
},
{
"grad_norm": 0.0746212750673294,
"learning_rate": 3.6680408982670777e-06,
"loss": 0.0039,
"step": 17670
},
{
"grad_norm": 0.1222284585237503,
"learning_rate": 3.637022991882899e-06,
"loss": 0.0084,
"step": 17680
},
{
"grad_norm": 0.09644754230976105,
"learning_rate": 3.606131840107485e-06,
"loss": 0.0046,
"step": 17690
},
{
"grad_norm": 0.15303337574005127,
"learning_rate": 3.575367527396084e-06,
"loss": 0.0084,
"step": 17700
},
{
"grad_norm": 0.07879474014043808,
"learning_rate": 3.5447301378571386e-06,
"loss": 0.0037,
"step": 17710
},
{
"grad_norm": 0.07187385112047195,
"learning_rate": 3.514219755252113e-06,
"loss": 0.0065,
"step": 17720
},
{
"grad_norm": 0.09277717024087906,
"learning_rate": 3.4838364629952213e-06,
"loss": 0.0074,
"step": 17730
},
{
"grad_norm": 0.09220196306705475,
"learning_rate": 3.4535803441532123e-06,
"loss": 0.0038,
"step": 17740
},
{
"grad_norm": 0.08987642079591751,
"learning_rate": 3.4234514814451836e-06,
"loss": 0.0043,
"step": 17750
},
{
"grad_norm": 0.08693262934684753,
"learning_rate": 3.393449957242273e-06,
"loss": 0.0049,
"step": 17760
},
{
"grad_norm": 0.12766656279563904,
"learning_rate": 3.363575853567524e-06,
"loss": 0.0059,
"step": 17770
},
{
"grad_norm": 0.1169864758849144,
"learning_rate": 3.3338292520955826e-06,
"loss": 0.0055,
"step": 17780
},
{
"grad_norm": 0.09048964083194733,
"learning_rate": 3.304210234152516e-06,
"loss": 0.0034,
"step": 17790
},
{
"grad_norm": 0.06841456145048141,
"learning_rate": 3.2747188807155993e-06,
"loss": 0.0031,
"step": 17800
},
{
"grad_norm": 0.051626693457365036,
"learning_rate": 3.2453552724130643e-06,
"loss": 0.0054,
"step": 17810
},
{
"grad_norm": 0.06671755760908127,
"learning_rate": 3.216119489523889e-06,
"loss": 0.0049,
"step": 17820
},
{
"grad_norm": 0.09001406282186508,
"learning_rate": 3.1870116119775917e-06,
"loss": 0.005,
"step": 17830
},
{
"grad_norm": 0.05401616171002388,
"learning_rate": 3.158031719353999e-06,
"loss": 0.0043,
"step": 17840
},
{
"grad_norm": 0.09218642860651016,
"learning_rate": 3.1291798908830273e-06,
"loss": 0.0066,
"step": 17850
},
{
"grad_norm": 0.05930738151073456,
"learning_rate": 3.1004562054444853e-06,
"loss": 0.0037,
"step": 17860
},
{
"grad_norm": 0.060345377773046494,
"learning_rate": 3.071860741567806e-06,
"loss": 0.0041,
"step": 17870
},
{
"grad_norm": 0.04504251852631569,
"learning_rate": 3.04339357743193e-06,
"loss": 0.0041,
"step": 17880
},
{
"grad_norm": 0.0834588035941124,
"learning_rate": 3.0150547908649628e-06,
"loss": 0.0049,
"step": 17890
},
{
"grad_norm": 0.06421680748462677,
"learning_rate": 2.9868444593440957e-06,
"loss": 0.0055,
"step": 17900
},
{
"grad_norm": 0.10178251564502716,
"learning_rate": 2.9587626599952846e-06,
"loss": 0.0068,
"step": 17910
},
{
"grad_norm": 0.06935402005910873,
"learning_rate": 2.930809469593082e-06,
"loss": 0.0067,
"step": 17920
},
{
"grad_norm": 0.09632204473018646,
"learning_rate": 2.9029849645604733e-06,
"loss": 0.0046,
"step": 17930
},
{
"grad_norm": 0.0971589982509613,
"learning_rate": 2.8752892209685632e-06,
"loss": 0.0053,
"step": 17940
},
{
"grad_norm": 0.06326834857463837,
"learning_rate": 2.847722314536483e-06,
"loss": 0.0066,
"step": 17950
},
{
"grad_norm": 0.08715000748634338,
"learning_rate": 2.820284320631078e-06,
"loss": 0.0057,
"step": 17960
},
{
"grad_norm": 0.11452065408229828,
"learning_rate": 2.792975314266788e-06,
"loss": 0.0046,
"step": 17970
},
{
"grad_norm": 0.10101866722106934,
"learning_rate": 2.7657953701054007e-06,
"loss": 0.0077,
"step": 17980
},
{
"grad_norm": 0.06383367627859116,
"learning_rate": 2.7387445624558306e-06,
"loss": 0.0084,
"step": 17990
},
{
"grad_norm": 0.0860927402973175,
"learning_rate": 2.7118229652739747e-06,
"loss": 0.0068,
"step": 18000
},
{
"grad_norm": 0.09062928706407547,
"learning_rate": 2.6850306521624236e-06,
"loss": 0.0077,
"step": 18010
},
{
"grad_norm": 0.0975007563829422,
"learning_rate": 2.6583676963703507e-06,
"loss": 0.0053,
"step": 18020
},
{
"grad_norm": 0.056053951382637024,
"learning_rate": 2.631834170793268e-06,
"loss": 0.0037,
"step": 18030
},
{
"grad_norm": 0.09389690309762955,
"learning_rate": 2.6054301479728036e-06,
"loss": 0.0039,
"step": 18040
},
{
"grad_norm": 0.08401983976364136,
"learning_rate": 2.579155700096575e-06,
"loss": 0.0057,
"step": 18050
},
{
"grad_norm": 0.06630973517894745,
"learning_rate": 2.5530108989978873e-06,
"loss": 0.007,
"step": 18060
},
{
"grad_norm": 0.08125243335962296,
"learning_rate": 2.5269958161556416e-06,
"loss": 0.0059,
"step": 18070
},
{
"grad_norm": 0.0948253944516182,
"learning_rate": 2.5011105226940888e-06,
"loss": 0.0065,
"step": 18080
},
{
"grad_norm": 0.06006864085793495,
"learning_rate": 2.4753550893826248e-06,
"loss": 0.0025,
"step": 18090
},
{
"grad_norm": 0.06576282531023026,
"learning_rate": 2.4497295866356296e-06,
"loss": 0.0082,
"step": 18100
},
{
"grad_norm": 0.0732223391532898,
"learning_rate": 2.424234084512228e-06,
"loss": 0.0062,
"step": 18110
},
{
"grad_norm": 0.0863489881157875,
"learning_rate": 2.3988686527161687e-06,
"loss": 0.0033,
"step": 18120
},
{
"grad_norm": 0.05276194587349892,
"learning_rate": 2.373633360595573e-06,
"loss": 0.0056,
"step": 18130
},
{
"grad_norm": 0.06433786451816559,
"learning_rate": 2.3485282771427585e-06,
"loss": 0.0048,
"step": 18140
},
{
"grad_norm": 0.04832305759191513,
"learning_rate": 2.3235534709940665e-06,
"loss": 0.0062,
"step": 18150
},
{
"grad_norm": 0.05605657771229744,
"learning_rate": 2.2987090104296617e-06,
"loss": 0.0047,
"step": 18160
},
{
"grad_norm": 0.06013290956616402,
"learning_rate": 2.273994963373355e-06,
"loss": 0.0055,
"step": 18170
},
{
"grad_norm": 0.054950859397649765,
"learning_rate": 2.249411397392409e-06,
"loss": 0.0049,
"step": 18180
},
{
"grad_norm": 0.04367905482649803,
"learning_rate": 2.2249583796973506e-06,
"loss": 0.0067,
"step": 18190
},
{
"grad_norm": 0.06013049557805061,
"learning_rate": 2.200635977141796e-06,
"loss": 0.004,
"step": 18200
},
{
"grad_norm": 0.05286560580134392,
"learning_rate": 2.17644425622226e-06,
"loss": 0.0072,
"step": 18210
},
{
"grad_norm": 0.04693790152668953,
"learning_rate": 2.152383283077991e-06,
"loss": 0.0033,
"step": 18220
},
{
"grad_norm": 0.08133646100759506,
"learning_rate": 2.128453123490781e-06,
"loss": 0.0049,
"step": 18230
},
{
"grad_norm": 0.11808016151189804,
"learning_rate": 2.1046538428847462e-06,
"loss": 0.0075,
"step": 18240
},
{
"grad_norm": 0.13297493755817413,
"learning_rate": 2.0809855063262273e-06,
"loss": 0.0039,
"step": 18250
},
{
"grad_norm": 0.06416473537683487,
"learning_rate": 2.057448178523558e-06,
"loss": 0.0042,
"step": 18260
},
{
"grad_norm": 0.08213549107313156,
"learning_rate": 2.034041923826885e-06,
"loss": 0.004,
"step": 18270
},
{
"grad_norm": 0.08681398630142212,
"learning_rate": 2.0107668062280204e-06,
"loss": 0.0062,
"step": 18280
},
{
"grad_norm": 0.04406105354428291,
"learning_rate": 1.9876228893602357e-06,
"loss": 0.0026,
"step": 18290
},
{
"grad_norm": 0.09231877326965332,
"learning_rate": 1.9646102364981266e-06,
"loss": 0.0048,
"step": 18300
},
{
"grad_norm": 0.09094957262277603,
"learning_rate": 1.9417289105574053e-06,
"loss": 0.0045,
"step": 18310
},
{
"grad_norm": 0.07368715852499008,
"learning_rate": 1.9189789740947427e-06,
"loss": 0.0048,
"step": 18320
},
{
"grad_norm": 0.05653895437717438,
"learning_rate": 1.896360489307597e-06,
"loss": 0.0071,
"step": 18330
},
{
"grad_norm": 0.061923108994960785,
"learning_rate": 1.8738735180340362e-06,
"loss": 0.0049,
"step": 18340
},
{
"grad_norm": 0.06822076439857483,
"learning_rate": 1.8515181217525824e-06,
"loss": 0.0044,
"step": 18350
},
{
"grad_norm": 0.06153898686170578,
"learning_rate": 1.8292943615820457e-06,
"loss": 0.0038,
"step": 18360
},
{
"grad_norm": 0.0566900372505188,
"learning_rate": 1.8072022982813296e-06,
"loss": 0.0047,
"step": 18370
},
{
"grad_norm": 0.10939788818359375,
"learning_rate": 1.7852419922492925e-06,
"loss": 0.0052,
"step": 18380
},
{
"grad_norm": 0.06535179167985916,
"learning_rate": 1.763413503524569e-06,
"loss": 0.0033,
"step": 18390
},
{
"grad_norm": 0.11079724133014679,
"learning_rate": 1.7417168917854165e-06,
"loss": 0.0075,
"step": 18400
},
{
"grad_norm": 0.08610212802886963,
"learning_rate": 1.720152216349552e-06,
"loss": 0.0078,
"step": 18410
},
{
"grad_norm": 0.05728074163198471,
"learning_rate": 1.6987195361739595e-06,
"loss": 0.003,
"step": 18420
},
{
"grad_norm": 0.08600207418203354,
"learning_rate": 1.6774189098547832e-06,
"loss": 0.0081,
"step": 18430
},
{
"grad_norm": 0.08694013953208923,
"learning_rate": 1.6562503956271069e-06,
"loss": 0.0043,
"step": 18440
},
{
"grad_norm": 0.06410788744688034,
"learning_rate": 1.6352140513648417e-06,
"loss": 0.0041,
"step": 18450
},
{
"grad_norm": 0.035524092614650726,
"learning_rate": 1.6143099345805712e-06,
"loss": 0.0046,
"step": 18460
},
{
"grad_norm": 0.06390909850597382,
"learning_rate": 1.5935381024253293e-06,
"loss": 0.0055,
"step": 18470
},
{
"grad_norm": 0.05754880607128143,
"learning_rate": 1.572898611688517e-06,
"loss": 0.0066,
"step": 18480
},
{
"grad_norm": 0.048745110630989075,
"learning_rate": 1.5523915187977133e-06,
"loss": 0.0034,
"step": 18490
},
{
"grad_norm": 0.051464542746543884,
"learning_rate": 1.532016879818532e-06,
"loss": 0.0052,
"step": 18500
},
{
"grad_norm": 0.04558296129107475,
"learning_rate": 1.51177475045447e-06,
"loss": 0.0039,
"step": 18510
},
{
"grad_norm": 0.050785310566425323,
"learning_rate": 1.4916651860467035e-06,
"loss": 0.0044,
"step": 18520
},
{
"grad_norm": 0.07850246131420135,
"learning_rate": 1.471688241574043e-06,
"loss": 0.0051,
"step": 18530
},
{
"grad_norm": 0.043779075145721436,
"learning_rate": 1.451843971652672e-06,
"loss": 0.0056,
"step": 18540
},
{
"grad_norm": 0.09892037510871887,
"learning_rate": 1.432132430536076e-06,
"loss": 0.005,
"step": 18550
},
{
"grad_norm": 0.0711660087108612,
"learning_rate": 1.412553672114869e-06,
"loss": 0.0049,
"step": 18560
},
{
"grad_norm": 0.040360577404499054,
"learning_rate": 1.3931077499166056e-06,
"loss": 0.0041,
"step": 18570
},
{
"grad_norm": 0.10091473162174225,
"learning_rate": 1.3737947171057085e-06,
"loss": 0.0053,
"step": 18580
},
{
"grad_norm": 0.06029286980628967,
"learning_rate": 1.3546146264832582e-06,
"loss": 0.0051,
"step": 18590
},
{
"grad_norm": 0.06340325623750687,
"learning_rate": 1.3355675304869086e-06,
"loss": 0.0054,
"step": 18600
},
{
"grad_norm": 0.059224601835012436,
"learning_rate": 1.3166534811906827e-06,
"loss": 0.0037,
"step": 18610
},
{
"grad_norm": 0.06791342049837112,
"learning_rate": 1.2978725303048666e-06,
"loss": 0.0042,
"step": 18620
},
{
"grad_norm": 0.0800284743309021,
"learning_rate": 1.2792247291758762e-06,
"loss": 0.0041,
"step": 18630
},
{
"grad_norm": 0.08240664750337601,
"learning_rate": 1.2607101287860635e-06,
"loss": 0.0038,
"step": 18640
},
{
"grad_norm": 0.047691259533166885,
"learning_rate": 1.2423287797536654e-06,
"loss": 0.0047,
"step": 18650
},
{
"grad_norm": 0.06712372601032257,
"learning_rate": 1.2240807323325776e-06,
"loss": 0.004,
"step": 18660
},
{
"grad_norm": 0.049060530960559845,
"learning_rate": 1.205966036412254e-06,
"loss": 0.0031,
"step": 18670
},
{
"grad_norm": 0.03923507779836655,
"learning_rate": 1.1879847415175949e-06,
"loss": 0.0057,
"step": 18680
},
{
"grad_norm": 0.11399254202842712,
"learning_rate": 1.1701368968087712e-06,
"loss": 0.0053,
"step": 18690
},
{
"grad_norm": 0.0680961012840271,
"learning_rate": 1.1524225510811116e-06,
"loss": 0.0046,
"step": 18700
},
{
"grad_norm": 0.04934504255652428,
"learning_rate": 1.1348417527649535e-06,
"loss": 0.0041,
"step": 18710
},
{
"grad_norm": 0.0424002967774868,
"learning_rate": 1.1173945499255268e-06,
"loss": 0.0048,
"step": 18720
},
{
"grad_norm": 0.037728987634181976,
"learning_rate": 1.1000809902628307e-06,
"loss": 0.003,
"step": 18730
},
{
"grad_norm": 0.03390166163444519,
"learning_rate": 1.082901121111468e-06,
"loss": 0.0041,
"step": 18740
},
{
"grad_norm": 0.044707078486680984,
"learning_rate": 1.0658549894405456e-06,
"loss": 0.0039,
"step": 18750
},
{
"grad_norm": 0.0984845906496048,
"learning_rate": 1.0489426418535342e-06,
"loss": 0.0056,
"step": 18760
},
{
"grad_norm": 0.03741137310862541,
"learning_rate": 1.0321641245881474e-06,
"loss": 0.0042,
"step": 18770
},
{
"grad_norm": 0.03353625163435936,
"learning_rate": 1.015519483516214e-06,
"loss": 0.0029,
"step": 18780
},
{
"grad_norm": 0.03772426396608353,
"learning_rate": 9.990087641435443e-07,
"loss": 0.0035,
"step": 18790
},
{
"grad_norm": 0.10662414133548737,
"learning_rate": 9.826320116098132e-07,
"loss": 0.0068,
"step": 18800
},
{
"grad_norm": 0.03115723840892315,
"learning_rate": 9.663892706884447e-07,
"loss": 0.0024,
"step": 18810
},
{
"grad_norm": 0.042881883680820465,
"learning_rate": 9.502805857864616e-07,
"loss": 0.004,
"step": 18820
},
{
"grad_norm": 0.0726175531744957,
"learning_rate": 9.34306000944396e-07,
"loss": 0.0039,
"step": 18830
},
{
"grad_norm": 0.10121230781078339,
"learning_rate": 9.184655598361624e-07,
"loss": 0.0064,
"step": 18840
},
{
"grad_norm": 0.07452309876680374,
"learning_rate": 9.027593057689076e-07,
"loss": 0.0077,
"step": 18850
},
{
"grad_norm": 0.047714851796627045,
"learning_rate": 8.871872816829441e-07,
"loss": 0.0043,
"step": 18860
},
{
"grad_norm": 0.05202336609363556,
"learning_rate": 8.717495301515777e-07,
"loss": 0.0063,
"step": 18870
},
{
"grad_norm": 0.026929423213005066,
"learning_rate": 8.564460933810415e-07,
"loss": 0.0032,
"step": 18880
},
{
"grad_norm": 0.1266394555568695,
"learning_rate": 8.412770132103453e-07,
"loss": 0.0078,
"step": 18890
},
{
"grad_norm": 0.04397597163915634,
"learning_rate": 8.262423311111711e-07,
"loss": 0.0046,
"step": 18900
},
{
"grad_norm": 0.04359564557671547,
"learning_rate": 8.113420881877665e-07,
"loss": 0.0032,
"step": 18910
},
{
"grad_norm": 0.05606939271092415,
"learning_rate": 7.965763251768288e-07,
"loss": 0.0047,
"step": 18920
},
{
"grad_norm": 0.032073911279439926,
"learning_rate": 7.819450824473995e-07,
"loss": 0.0044,
"step": 18930
},
{
"grad_norm": 0.052406635135412216,
"learning_rate": 7.674484000007198e-07,
"loss": 0.0043,
"step": 18940
},
{
"grad_norm": 0.06158546730875969,
"learning_rate": 7.530863174701752e-07,
"loss": 0.0052,
"step": 18950
},
{
"grad_norm": 0.06597442924976349,
"learning_rate": 7.38858874121151e-07,
"loss": 0.0061,
"step": 18960
},
{
"grad_norm": 0.04494604840874672,
"learning_rate": 7.247661088509328e-07,
"loss": 0.0054,
"step": 18970
},
{
"grad_norm": 0.03968639299273491,
"learning_rate": 7.108080601886002e-07,
"loss": 0.0034,
"step": 18980
},
{
"grad_norm": 0.04516436532139778,
"learning_rate": 6.969847662949336e-07,
"loss": 0.0046,
"step": 18990
},
{
"grad_norm": 0.03492464870214462,
"learning_rate": 6.832962649622798e-07,
"loss": 0.0041,
"step": 19000
},
{
"grad_norm": 0.03456168621778488,
"learning_rate": 6.697425936144863e-07,
"loss": 0.007,
"step": 19010
},
{
"grad_norm": 0.059465594589710236,
"learning_rate": 6.563237893067731e-07,
"loss": 0.0047,
"step": 19020
},
{
"grad_norm": 0.08866643160581589,
"learning_rate": 6.430398887256328e-07,
"loss": 0.0032,
"step": 19030
},
{
"grad_norm": 0.037074267864227295,
"learning_rate": 6.298909281887478e-07,
"loss": 0.0043,
"step": 19040
},
{
"grad_norm": 0.03368697687983513,
"learning_rate": 6.168769436448673e-07,
"loss": 0.0033,
"step": 19050
},
{
"grad_norm": 0.058988627046346664,
"learning_rate": 6.03997970673742e-07,
"loss": 0.003,
"step": 19060
},
{
"grad_norm": 0.04054597020149231,
"learning_rate": 5.912540444859782e-07,
"loss": 0.0054,
"step": 19070
},
{
"grad_norm": 0.04888832941651344,
"learning_rate": 5.786451999229837e-07,
"loss": 0.0043,
"step": 19080
},
{
"grad_norm": 0.09000938385725021,
"learning_rate": 5.661714714568722e-07,
"loss": 0.0075,
"step": 19090
},
{
"grad_norm": 0.08032537251710892,
"learning_rate": 5.538328931903259e-07,
"loss": 0.0065,
"step": 19100
},
{
"grad_norm": 0.03778580576181412,
"learning_rate": 5.416294988565551e-07,
"loss": 0.0038,
"step": 19110
},
{
"grad_norm": 0.0301471296697855,
"learning_rate": 5.29561321819172e-07,
"loss": 0.0064,
"step": 19120
},
{
"grad_norm": 0.04304756596684456,
"learning_rate": 5.176283950721061e-07,
"loss": 0.0037,
"step": 19130
},
{
"grad_norm": 0.08168292790651321,
"learning_rate": 5.058307512395332e-07,
"loss": 0.006,
"step": 19140
},
{
"grad_norm": 0.03314193710684776,
"learning_rate": 4.941684225757526e-07,
"loss": 0.0072,
"step": 19150
},
{
"grad_norm": 0.03319231793284416,
"learning_rate": 4.826414409651314e-07,
"loss": 0.0051,
"step": 19160
},
{
"grad_norm": 0.04385793209075928,
"learning_rate": 4.712498379219943e-07,
"loss": 0.0063,
"step": 19170
},
{
"grad_norm": 0.08488308638334274,
"learning_rate": 4.599936445905506e-07,
"loss": 0.0073,
"step": 19180
},
{
"grad_norm": 0.026116928085684776,
"learning_rate": 4.4887289174480594e-07,
"loss": 0.0029,
"step": 19190
},
{
"grad_norm": 0.038966987282037735,
"learning_rate": 4.378876097884621e-07,
"loss": 0.005,
"step": 19200
},
{
"grad_norm": 0.055600617080926895,
"learning_rate": 4.2703782875487264e-07,
"loss": 0.0062,
"step": 19210
},
{
"grad_norm": 0.029152419418096542,
"learning_rate": 4.163235783069208e-07,
"loss": 0.0047,
"step": 19220
},
{
"grad_norm": 0.02960650622844696,
"learning_rate": 4.057448877369585e-07,
"loss": 0.0037,
"step": 19230
},
{
"grad_norm": 0.05459560081362724,
"learning_rate": 3.9530178596672295e-07,
"loss": 0.0035,
"step": 19240
},
{
"grad_norm": 0.0414460264146328,
"learning_rate": 3.849943015472479e-07,
"loss": 0.0036,
"step": 19250
},
{
"grad_norm": 0.05241796001791954,
"learning_rate": 3.748224626588137e-07,
"loss": 0.0052,
"step": 19260
},
{
"grad_norm": 0.06404601037502289,
"learning_rate": 3.647862971108307e-07,
"loss": 0.0092,
"step": 19270
},
{
"grad_norm": 0.03816540539264679,
"learning_rate": 3.5488583234179473e-07,
"loss": 0.0034,
"step": 19280
},
{
"grad_norm": 0.042802464216947556,
"learning_rate": 3.4512109541920413e-07,
"loss": 0.0067,
"step": 19290
},
{
"grad_norm": 0.0605001300573349,
"learning_rate": 3.354921130394706e-07,
"loss": 0.0053,
"step": 19300
},
{
"grad_norm": 0.08837027847766876,
"learning_rate": 3.259989115278639e-07,
"loss": 0.0087,
"step": 19310
},
{
"grad_norm": 0.10174474865198135,
"learning_rate": 3.1664151683843403e-07,
"loss": 0.0043,
"step": 19320
},
{
"grad_norm": 0.03807970881462097,
"learning_rate": 3.074199545539447e-07,
"loss": 0.0027,
"step": 19330
},
{
"grad_norm": 0.02359367161989212,
"learning_rate": 2.983342498857955e-07,
"loss": 0.0043,
"step": 19340
},
{
"grad_norm": 0.034910764545202255,
"learning_rate": 2.893844276739499e-07,
"loss": 0.0035,
"step": 19350
},
{
"grad_norm": 0.08157561719417572,
"learning_rate": 2.8057051238688514e-07,
"loss": 0.0036,
"step": 19360
},
{
"grad_norm": 0.03558242693543434,
"learning_rate": 2.71892528121509e-07,
"loss": 0.0056,
"step": 19370
},
{
"grad_norm": 0.04014763608574867,
"learning_rate": 2.633504986030988e-07,
"loss": 0.006,
"step": 19380
},
{
"grad_norm": 0.049834150820970535,
"learning_rate": 2.549444471852347e-07,
"loss": 0.0035,
"step": 19390
},
{
"grad_norm": 0.04006095975637436,
"learning_rate": 2.4667439684974423e-07,
"loss": 0.0067,
"step": 19400
},
{
"grad_norm": 0.034812238067388535,
"learning_rate": 2.3854037020662467e-07,
"loss": 0.0041,
"step": 19410
},
{
"grad_norm": 0.04246693477034569,
"learning_rate": 2.3054238949399288e-07,
"loss": 0.0056,
"step": 19420
},
{
"grad_norm": 0.061797671020030975,
"learning_rate": 2.2268047657802993e-07,
"loss": 0.005,
"step": 19430
},
{
"grad_norm": 0.029228825122117996,
"learning_rate": 2.149546529529034e-07,
"loss": 0.005,
"step": 19440
},
{
"grad_norm": 0.030314629897475243,
"learning_rate": 2.0736493974071736e-07,
"loss": 0.0042,
"step": 19450
},
{
"grad_norm": 0.054420553147792816,
"learning_rate": 1.9991135769145686e-07,
"loss": 0.0044,
"step": 19460
},
{
"grad_norm": 0.026409875601530075,
"learning_rate": 1.9259392718293245e-07,
"loss": 0.004,
"step": 19470
},
{
"grad_norm": 0.026196150109171867,
"learning_rate": 1.8541266822072467e-07,
"loss": 0.0058,
"step": 19480
},
{
"grad_norm": 0.07092944532632828,
"learning_rate": 1.7836760043811184e-07,
"loss": 0.0037,
"step": 19490
},
{
"grad_norm": 0.10308902710676193,
"learning_rate": 1.7145874309604792e-07,
"loss": 0.008,
"step": 19500
},
{
"grad_norm": 0.06316528469324112,
"learning_rate": 1.6468611508308474e-07,
"loss": 0.0056,
"step": 19510
},
{
"grad_norm": 0.04925818368792534,
"learning_rate": 1.5804973491532204e-07,
"loss": 0.0041,
"step": 19520
},
{
"grad_norm": 0.06427190452814102,
"learning_rate": 1.5154962073637424e-07,
"loss": 0.0044,
"step": 19530
},
{
"grad_norm": 0.027820497751235962,
"learning_rate": 1.4518579031730372e-07,
"loss": 0.0035,
"step": 19540
},
{
"grad_norm": 0.08228770643472672,
"learning_rate": 1.389582610565876e-07,
"loss": 0.0038,
"step": 19550
},
{
"grad_norm": 0.035630207508802414,
"learning_rate": 1.3286704998003995e-07,
"loss": 0.0029,
"step": 19560
},
{
"grad_norm": 0.06858760118484497,
"learning_rate": 1.2691217374080632e-07,
"loss": 0.0059,
"step": 19570
},
{
"grad_norm": 0.030942440032958984,
"learning_rate": 1.2109364861929705e-07,
"loss": 0.0028,
"step": 19580
},
{
"grad_norm": 0.03261679783463478,
"learning_rate": 1.1541149052312628e-07,
"loss": 0.0048,
"step": 19590
},
{
"grad_norm": 0.02540130726993084,
"learning_rate": 1.0986571498710074e-07,
"loss": 0.0046,
"step": 19600
},
{
"grad_norm": 0.023928454145789146,
"learning_rate": 1.0445633717316438e-07,
"loss": 0.0136,
"step": 19610
},
{
"grad_norm": 0.03369027003645897,
"learning_rate": 9.918337187034277e-08,
"loss": 0.0058,
"step": 19620
},
{
"grad_norm": 0.03948524594306946,
"learning_rate": 9.404683349472643e-08,
"loss": 0.0055,
"step": 19630
},
{
"grad_norm": 0.03470523655414581,
"learning_rate": 8.904673608940983e-08,
"loss": 0.0056,
"step": 19640
},
{
"grad_norm": 0.024040725082159042,
"learning_rate": 8.418309332447471e-08,
"loss": 0.0092,
"step": 19650
},
{
"grad_norm": 0.023585395887494087,
"learning_rate": 7.945591849692902e-08,
"loss": 0.0042,
"step": 19660
},
{
"grad_norm": 0.029787050560116768,
"learning_rate": 7.486522453069578e-08,
"loss": 0.0058,
"step": 19670
},
{
"grad_norm": 0.016384869813919067,
"learning_rate": 7.041102397655208e-08,
"loss": 0.0067,
"step": 19680
},
{
"grad_norm": 0.054265912622213364,
"learning_rate": 6.609332901210685e-08,
"loss": 0.0058,
"step": 19690
},
{
"grad_norm": 0.04399024322628975,
"learning_rate": 6.191215144178419e-08,
"loss": 0.0055,
"step": 19700
},
{
"grad_norm": 0.04143797978758812,
"learning_rate": 5.786750269675678e-08,
"loss": 0.0045,
"step": 19710
},
{
"grad_norm": 0.03326734900474548,
"learning_rate": 5.395939383494031e-08,
"loss": 0.0058,
"step": 19720
},
{
"grad_norm": 0.015909343957901,
"learning_rate": 5.018783554095463e-08,
"loss": 0.003,
"step": 19730
},
{
"grad_norm": 0.08597715944051743,
"learning_rate": 4.655283812610156e-08,
"loss": 0.0037,
"step": 19740
},
{
"grad_norm": 0.031575653702020645,
"learning_rate": 4.305441152831491e-08,
"loss": 0.0038,
"step": 19750
},
{
"grad_norm": 0.0561877079308033,
"learning_rate": 3.9692565312171584e-08,
"loss": 0.004,
"step": 19760
},
{
"grad_norm": 0.08059482276439667,
"learning_rate": 3.6467308668824975e-08,
"loss": 0.0045,
"step": 19770
},
{
"grad_norm": 0.02128739282488823,
"learning_rate": 3.3378650416004964e-08,
"loss": 0.0034,
"step": 19780
},
{
"grad_norm": 0.030546288937330246,
"learning_rate": 3.042659899797906e-08,
"loss": 0.0049,
"step": 19790
},
{
"grad_norm": 0.023038653656840324,
"learning_rate": 2.76111624855524e-08,
"loss": 0.0035,
"step": 19800
},
{
"grad_norm": 0.03481736406683922,
"learning_rate": 2.4932348576017784e-08,
"loss": 0.0049,
"step": 19810
},
{
"grad_norm": 0.028808515518903732,
"learning_rate": 2.239016459314458e-08,
"loss": 0.0041,
"step": 19820
},
{
"grad_norm": 0.06456957757472992,
"learning_rate": 1.9984617487173174e-08,
"loss": 0.004,
"step": 19830
},
{
"grad_norm": 0.027450889348983765,
"learning_rate": 1.7715713834776105e-08,
"loss": 0.0039,
"step": 19840
},
{
"grad_norm": 0.07811158150434494,
"learning_rate": 1.5583459839046964e-08,
"loss": 0.0059,
"step": 19850
},
{
"grad_norm": 0.053056903183460236,
"learning_rate": 1.3587861329489304e-08,
"loss": 0.0042,
"step": 19860
},
{
"grad_norm": 0.02446102909743786,
"learning_rate": 1.1728923761994415e-08,
"loss": 0.0023,
"step": 19870
},
{
"grad_norm": 0.047315411269664764,
"learning_rate": 1.0006652218819135e-08,
"loss": 0.0042,
"step": 19880
},
{
"grad_norm": 0.035163022577762604,
"learning_rate": 8.421051408596947e-09,
"loss": 0.0077,
"step": 19890
},
{
"grad_norm": 0.020078569650650024,
"learning_rate": 6.972125666299123e-09,
"loss": 0.0069,
"step": 19900
},
{
"grad_norm": 0.045009635388851166,
"learning_rate": 5.659878953229169e-09,
"loss": 0.0045,
"step": 19910
},
{
"grad_norm": 0.036355242133140564,
"learning_rate": 4.48431485701728e-09,
"loss": 0.0043,
"step": 19920
},
{
"grad_norm": 0.044731684029102325,
"learning_rate": 3.4454365916203322e-09,
"loss": 0.0055,
"step": 19930
},
{
"grad_norm": 0.02352859079837799,
"learning_rate": 2.5432469972830332e-09,
"loss": 0.0029,
"step": 19940
},
{
"grad_norm": 0.04830089956521988,
"learning_rate": 1.7777485405601203e-09,
"loss": 0.0031,
"step": 19950
},
{
"grad_norm": 0.01771678775548935,
"learning_rate": 1.1489433142941597e-09,
"loss": 0.0034,
"step": 19960
},
{
"grad_norm": 0.021128326654434204,
"learning_rate": 6.568330376210963e-10,
"loss": 0.0041,
"step": 19970
},
{
"grad_norm": 0.04460067301988602,
"learning_rate": 3.0141905594249787e-10,
"loss": 0.0051,
"step": 19980
},
{
"grad_norm": 0.021370982751250267,
"learning_rate": 8.270234094776008e-11,
"loss": 0.0049,
"step": 19990
},
{
"grad_norm": 0.022540045902132988,
"learning_rate": 6.834906085551041e-13,
"loss": 0.0044,
"step": 20000
}
],
"logging_steps": 10,
"max_steps": 20000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 24,
"trial_name": null,
"trial_params": null
}