headtrack_30k / trainer_state.json
Dongkkka's picture
Upload folder using huggingface_hub
08e97a9 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.3,
"eval_steps": 500,
"global_step": 30000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"grad_norm": 2.47322154045105,
"learning_rate": 1.8e-07,
"loss": 1.3541,
"step": 10
},
{
"grad_norm": 2.554671049118042,
"learning_rate": 3.8e-07,
"loss": 1.3667,
"step": 20
},
{
"grad_norm": 2.3967344760894775,
"learning_rate": 5.8e-07,
"loss": 1.351,
"step": 30
},
{
"grad_norm": 2.0986547470092773,
"learning_rate": 7.8e-07,
"loss": 1.3457,
"step": 40
},
{
"grad_norm": 2.090355634689331,
"learning_rate": 9.8e-07,
"loss": 1.3238,
"step": 50
},
{
"grad_norm": 1.3943339586257935,
"learning_rate": 1.18e-06,
"loss": 1.3015,
"step": 60
},
{
"grad_norm": 1.176537036895752,
"learning_rate": 1.3800000000000001e-06,
"loss": 1.254,
"step": 70
},
{
"grad_norm": 0.8108309507369995,
"learning_rate": 1.5800000000000003e-06,
"loss": 1.2257,
"step": 80
},
{
"grad_norm": 0.7492016553878784,
"learning_rate": 1.7800000000000001e-06,
"loss": 1.1949,
"step": 90
},
{
"grad_norm": 0.5984260439872742,
"learning_rate": 1.98e-06,
"loss": 1.1803,
"step": 100
},
{
"grad_norm": 0.45398518443107605,
"learning_rate": 2.1800000000000003e-06,
"loss": 1.1486,
"step": 110
},
{
"grad_norm": 0.3421183228492737,
"learning_rate": 2.38e-06,
"loss": 1.1417,
"step": 120
},
{
"grad_norm": 0.453471839427948,
"learning_rate": 2.5800000000000003e-06,
"loss": 1.1415,
"step": 130
},
{
"grad_norm": 0.3436436653137207,
"learning_rate": 2.78e-06,
"loss": 1.1536,
"step": 140
},
{
"grad_norm": 0.2761518359184265,
"learning_rate": 2.9800000000000003e-06,
"loss": 1.151,
"step": 150
},
{
"grad_norm": 0.2847629189491272,
"learning_rate": 3.1800000000000005e-06,
"loss": 1.1371,
"step": 160
},
{
"grad_norm": 0.2499399036169052,
"learning_rate": 3.38e-06,
"loss": 1.1377,
"step": 170
},
{
"grad_norm": 0.3377789556980133,
"learning_rate": 3.58e-06,
"loss": 1.1361,
"step": 180
},
{
"grad_norm": 0.39922669529914856,
"learning_rate": 3.7800000000000002e-06,
"loss": 1.1307,
"step": 190
},
{
"grad_norm": 0.38898763060569763,
"learning_rate": 3.98e-06,
"loss": 1.1293,
"step": 200
},
{
"grad_norm": 0.37199997901916504,
"learning_rate": 4.18e-06,
"loss": 1.132,
"step": 210
},
{
"grad_norm": 0.3737226724624634,
"learning_rate": 4.38e-06,
"loss": 1.1173,
"step": 220
},
{
"grad_norm": 0.41701287031173706,
"learning_rate": 4.58e-06,
"loss": 1.1092,
"step": 230
},
{
"grad_norm": 0.37127238512039185,
"learning_rate": 4.780000000000001e-06,
"loss": 1.0874,
"step": 240
},
{
"grad_norm": 0.377769410610199,
"learning_rate": 4.98e-06,
"loss": 1.0752,
"step": 250
},
{
"grad_norm": 0.40639528632164,
"learning_rate": 5.18e-06,
"loss": 1.0763,
"step": 260
},
{
"grad_norm": 0.45707887411117554,
"learning_rate": 5.38e-06,
"loss": 1.0654,
"step": 270
},
{
"grad_norm": 0.3577777147293091,
"learning_rate": 5.580000000000001e-06,
"loss": 1.0626,
"step": 280
},
{
"grad_norm": 0.44091716408729553,
"learning_rate": 5.78e-06,
"loss": 1.0506,
"step": 290
},
{
"grad_norm": 0.5065949559211731,
"learning_rate": 5.98e-06,
"loss": 1.0486,
"step": 300
},
{
"grad_norm": 0.40768519043922424,
"learning_rate": 6.18e-06,
"loss": 1.0475,
"step": 310
},
{
"grad_norm": 0.3999238908290863,
"learning_rate": 6.38e-06,
"loss": 1.0496,
"step": 320
},
{
"grad_norm": 0.4539977014064789,
"learning_rate": 6.58e-06,
"loss": 1.0382,
"step": 330
},
{
"grad_norm": 0.4279543459415436,
"learning_rate": 6.78e-06,
"loss": 1.0321,
"step": 340
},
{
"grad_norm": 0.572868824005127,
"learning_rate": 6.98e-06,
"loss": 1.0361,
"step": 350
},
{
"grad_norm": 0.7423160076141357,
"learning_rate": 7.180000000000001e-06,
"loss": 1.0229,
"step": 360
},
{
"grad_norm": 0.6083391904830933,
"learning_rate": 7.3800000000000005e-06,
"loss": 1.0066,
"step": 370
},
{
"grad_norm": 0.7845941781997681,
"learning_rate": 7.580000000000001e-06,
"loss": 0.9973,
"step": 380
},
{
"grad_norm": 0.8202695846557617,
"learning_rate": 7.78e-06,
"loss": 0.9842,
"step": 390
},
{
"grad_norm": 1.003799319267273,
"learning_rate": 7.98e-06,
"loss": 0.9593,
"step": 400
},
{
"grad_norm": 1.0290818214416504,
"learning_rate": 8.18e-06,
"loss": 0.9326,
"step": 410
},
{
"grad_norm": 0.9908057451248169,
"learning_rate": 8.380000000000001e-06,
"loss": 0.8944,
"step": 420
},
{
"grad_norm": 1.069297194480896,
"learning_rate": 8.580000000000001e-06,
"loss": 0.8615,
"step": 430
},
{
"grad_norm": 1.4509676694869995,
"learning_rate": 8.78e-06,
"loss": 0.829,
"step": 440
},
{
"grad_norm": 1.1007381677627563,
"learning_rate": 8.98e-06,
"loss": 0.7987,
"step": 450
},
{
"grad_norm": 1.1426440477371216,
"learning_rate": 9.180000000000002e-06,
"loss": 0.7687,
"step": 460
},
{
"grad_norm": 1.2663944959640503,
"learning_rate": 9.38e-06,
"loss": 0.7481,
"step": 470
},
{
"grad_norm": 1.2125422954559326,
"learning_rate": 9.58e-06,
"loss": 0.7177,
"step": 480
},
{
"grad_norm": 1.4529684782028198,
"learning_rate": 9.78e-06,
"loss": 0.688,
"step": 490
},
{
"grad_norm": 1.2782946825027466,
"learning_rate": 9.980000000000001e-06,
"loss": 0.6622,
"step": 500
},
{
"grad_norm": 1.3107942342758179,
"learning_rate": 1.018e-05,
"loss": 0.632,
"step": 510
},
{
"grad_norm": 1.3640458583831787,
"learning_rate": 1.038e-05,
"loss": 0.6102,
"step": 520
},
{
"grad_norm": 1.5290552377700806,
"learning_rate": 1.058e-05,
"loss": 0.587,
"step": 530
},
{
"grad_norm": 1.355726718902588,
"learning_rate": 1.0780000000000002e-05,
"loss": 0.563,
"step": 540
},
{
"grad_norm": 1.5167773962020874,
"learning_rate": 1.098e-05,
"loss": 0.5297,
"step": 550
},
{
"grad_norm": 1.3675698041915894,
"learning_rate": 1.118e-05,
"loss": 0.5078,
"step": 560
},
{
"grad_norm": 1.5590559244155884,
"learning_rate": 1.1380000000000001e-05,
"loss": 0.4944,
"step": 570
},
{
"grad_norm": 1.3931667804718018,
"learning_rate": 1.1580000000000001e-05,
"loss": 0.476,
"step": 580
},
{
"grad_norm": 1.481071949005127,
"learning_rate": 1.178e-05,
"loss": 0.4501,
"step": 590
},
{
"grad_norm": 1.8843756914138794,
"learning_rate": 1.198e-05,
"loss": 0.4445,
"step": 600
},
{
"grad_norm": 1.8522933721542358,
"learning_rate": 1.2180000000000002e-05,
"loss": 0.4157,
"step": 610
},
{
"grad_norm": 1.8875459432601929,
"learning_rate": 1.238e-05,
"loss": 0.413,
"step": 620
},
{
"grad_norm": 1.7943925857543945,
"learning_rate": 1.258e-05,
"loss": 0.3932,
"step": 630
},
{
"grad_norm": 1.6024821996688843,
"learning_rate": 1.278e-05,
"loss": 0.3799,
"step": 640
},
{
"grad_norm": 1.91981041431427,
"learning_rate": 1.2980000000000001e-05,
"loss": 0.3688,
"step": 650
},
{
"grad_norm": 1.5477911233901978,
"learning_rate": 1.3180000000000001e-05,
"loss": 0.359,
"step": 660
},
{
"grad_norm": 1.8666833639144897,
"learning_rate": 1.338e-05,
"loss": 0.3447,
"step": 670
},
{
"grad_norm": 1.873641848564148,
"learning_rate": 1.358e-05,
"loss": 0.3336,
"step": 680
},
{
"grad_norm": 1.6930667161941528,
"learning_rate": 1.3780000000000002e-05,
"loss": 0.3242,
"step": 690
},
{
"grad_norm": 1.8604819774627686,
"learning_rate": 1.3980000000000002e-05,
"loss": 0.3085,
"step": 700
},
{
"grad_norm": 2.4730148315429688,
"learning_rate": 1.4180000000000001e-05,
"loss": 0.3181,
"step": 710
},
{
"grad_norm": 1.7881016731262207,
"learning_rate": 1.4380000000000001e-05,
"loss": 0.2946,
"step": 720
},
{
"grad_norm": 1.8197489976882935,
"learning_rate": 1.4580000000000003e-05,
"loss": 0.2729,
"step": 730
},
{
"grad_norm": 1.5902010202407837,
"learning_rate": 1.4779999999999999e-05,
"loss": 0.2768,
"step": 740
},
{
"grad_norm": 2.1884756088256836,
"learning_rate": 1.4979999999999999e-05,
"loss": 0.2703,
"step": 750
},
{
"grad_norm": 1.566630482673645,
"learning_rate": 1.518e-05,
"loss": 0.2704,
"step": 760
},
{
"grad_norm": 2.237656593322754,
"learning_rate": 1.538e-05,
"loss": 0.2548,
"step": 770
},
{
"grad_norm": 2.109281539916992,
"learning_rate": 1.558e-05,
"loss": 0.2497,
"step": 780
},
{
"grad_norm": 2.126896858215332,
"learning_rate": 1.578e-05,
"loss": 0.2333,
"step": 790
},
{
"grad_norm": 1.431854486465454,
"learning_rate": 1.598e-05,
"loss": 0.2318,
"step": 800
},
{
"grad_norm": 1.5760784149169922,
"learning_rate": 1.618e-05,
"loss": 0.2188,
"step": 810
},
{
"grad_norm": 2.163811683654785,
"learning_rate": 1.6380000000000002e-05,
"loss": 0.2161,
"step": 820
},
{
"grad_norm": 1.6761753559112549,
"learning_rate": 1.658e-05,
"loss": 0.2113,
"step": 830
},
{
"grad_norm": 1.8158925771713257,
"learning_rate": 1.6780000000000002e-05,
"loss": 0.2033,
"step": 840
},
{
"grad_norm": 2.6011154651641846,
"learning_rate": 1.698e-05,
"loss": 0.2014,
"step": 850
},
{
"grad_norm": 1.9855470657348633,
"learning_rate": 1.718e-05,
"loss": 0.2005,
"step": 860
},
{
"grad_norm": 2.884546995162964,
"learning_rate": 1.7380000000000003e-05,
"loss": 0.1839,
"step": 870
},
{
"grad_norm": 1.792996883392334,
"learning_rate": 1.758e-05,
"loss": 0.1782,
"step": 880
},
{
"grad_norm": 2.064910411834717,
"learning_rate": 1.7780000000000003e-05,
"loss": 0.1651,
"step": 890
},
{
"grad_norm": 2.0864901542663574,
"learning_rate": 1.798e-05,
"loss": 0.1519,
"step": 900
},
{
"grad_norm": 1.6949527263641357,
"learning_rate": 1.818e-05,
"loss": 0.1458,
"step": 910
},
{
"grad_norm": 1.77072012424469,
"learning_rate": 1.838e-05,
"loss": 0.1477,
"step": 920
},
{
"grad_norm": 1.7796998023986816,
"learning_rate": 1.858e-05,
"loss": 0.1508,
"step": 930
},
{
"grad_norm": 2.0563416481018066,
"learning_rate": 1.878e-05,
"loss": 0.1373,
"step": 940
},
{
"grad_norm": 1.5273425579071045,
"learning_rate": 1.898e-05,
"loss": 0.1417,
"step": 950
},
{
"grad_norm": 2.0723495483398438,
"learning_rate": 1.918e-05,
"loss": 0.1415,
"step": 960
},
{
"grad_norm": 1.857120156288147,
"learning_rate": 1.938e-05,
"loss": 0.1382,
"step": 970
},
{
"grad_norm": 2.464312791824341,
"learning_rate": 1.9580000000000002e-05,
"loss": 0.145,
"step": 980
},
{
"grad_norm": 2.2345287799835205,
"learning_rate": 1.978e-05,
"loss": 0.1388,
"step": 990
},
{
"grad_norm": 1.707922101020813,
"learning_rate": 1.9980000000000002e-05,
"loss": 0.1418,
"step": 1000
},
{
"grad_norm": 1.7697720527648926,
"learning_rate": 2.0180000000000003e-05,
"loss": 0.1341,
"step": 1010
},
{
"grad_norm": 1.818679690361023,
"learning_rate": 2.038e-05,
"loss": 0.1363,
"step": 1020
},
{
"grad_norm": 2.0030717849731445,
"learning_rate": 2.0580000000000003e-05,
"loss": 0.1364,
"step": 1030
},
{
"grad_norm": 1.891526222229004,
"learning_rate": 2.078e-05,
"loss": 0.1296,
"step": 1040
},
{
"grad_norm": 2.0210978984832764,
"learning_rate": 2.098e-05,
"loss": 0.1214,
"step": 1050
},
{
"grad_norm": 1.7460476160049438,
"learning_rate": 2.118e-05,
"loss": 0.1178,
"step": 1060
},
{
"grad_norm": 2.0199179649353027,
"learning_rate": 2.138e-05,
"loss": 0.1216,
"step": 1070
},
{
"grad_norm": 1.982852816581726,
"learning_rate": 2.158e-05,
"loss": 0.1176,
"step": 1080
},
{
"grad_norm": 2.053598403930664,
"learning_rate": 2.178e-05,
"loss": 0.1195,
"step": 1090
},
{
"grad_norm": 1.8797863721847534,
"learning_rate": 2.198e-05,
"loss": 0.1294,
"step": 1100
},
{
"grad_norm": 1.9531686305999756,
"learning_rate": 2.218e-05,
"loss": 0.1114,
"step": 1110
},
{
"grad_norm": 1.813335657119751,
"learning_rate": 2.2380000000000003e-05,
"loss": 0.1218,
"step": 1120
},
{
"grad_norm": 1.4582650661468506,
"learning_rate": 2.258e-05,
"loss": 0.1183,
"step": 1130
},
{
"grad_norm": 2.1429901123046875,
"learning_rate": 2.2780000000000002e-05,
"loss": 0.1156,
"step": 1140
},
{
"grad_norm": 1.813545823097229,
"learning_rate": 2.298e-05,
"loss": 0.1225,
"step": 1150
},
{
"grad_norm": 2.017956256866455,
"learning_rate": 2.318e-05,
"loss": 0.1151,
"step": 1160
},
{
"grad_norm": 1.8820877075195312,
"learning_rate": 2.3380000000000003e-05,
"loss": 0.1195,
"step": 1170
},
{
"grad_norm": 1.7476214170455933,
"learning_rate": 2.358e-05,
"loss": 0.1189,
"step": 1180
},
{
"grad_norm": 1.9252310991287231,
"learning_rate": 2.3780000000000003e-05,
"loss": 0.1126,
"step": 1190
},
{
"grad_norm": 1.512940526008606,
"learning_rate": 2.398e-05,
"loss": 0.1198,
"step": 1200
},
{
"grad_norm": 1.8000988960266113,
"learning_rate": 2.418e-05,
"loss": 0.1117,
"step": 1210
},
{
"grad_norm": 1.5912659168243408,
"learning_rate": 2.438e-05,
"loss": 0.105,
"step": 1220
},
{
"grad_norm": 2.5894980430603027,
"learning_rate": 2.4580000000000002e-05,
"loss": 0.1116,
"step": 1230
},
{
"grad_norm": 1.9173959493637085,
"learning_rate": 2.478e-05,
"loss": 0.1132,
"step": 1240
},
{
"grad_norm": 2.345515251159668,
"learning_rate": 2.498e-05,
"loss": 0.1153,
"step": 1250
},
{
"grad_norm": 2.1796514987945557,
"learning_rate": 2.5180000000000003e-05,
"loss": 0.0973,
"step": 1260
},
{
"grad_norm": 1.5757145881652832,
"learning_rate": 2.5380000000000004e-05,
"loss": 0.1166,
"step": 1270
},
{
"grad_norm": 1.9904712438583374,
"learning_rate": 2.5580000000000002e-05,
"loss": 0.1088,
"step": 1280
},
{
"grad_norm": 2.3049025535583496,
"learning_rate": 2.5779999999999997e-05,
"loss": 0.1079,
"step": 1290
},
{
"grad_norm": 1.8384699821472168,
"learning_rate": 2.598e-05,
"loss": 0.106,
"step": 1300
},
{
"grad_norm": 2.0108070373535156,
"learning_rate": 2.618e-05,
"loss": 0.1027,
"step": 1310
},
{
"grad_norm": 1.599073052406311,
"learning_rate": 2.6379999999999998e-05,
"loss": 0.1189,
"step": 1320
},
{
"grad_norm": 1.7642953395843506,
"learning_rate": 2.658e-05,
"loss": 0.1021,
"step": 1330
},
{
"grad_norm": 1.6248481273651123,
"learning_rate": 2.678e-05,
"loss": 0.0942,
"step": 1340
},
{
"grad_norm": 1.5131402015686035,
"learning_rate": 2.698e-05,
"loss": 0.1064,
"step": 1350
},
{
"grad_norm": 1.8669644594192505,
"learning_rate": 2.718e-05,
"loss": 0.1084,
"step": 1360
},
{
"grad_norm": 1.998981237411499,
"learning_rate": 2.738e-05,
"loss": 0.0964,
"step": 1370
},
{
"grad_norm": 1.9910788536071777,
"learning_rate": 2.758e-05,
"loss": 0.0933,
"step": 1380
},
{
"grad_norm": 1.8466424942016602,
"learning_rate": 2.778e-05,
"loss": 0.09,
"step": 1390
},
{
"grad_norm": 1.6654435396194458,
"learning_rate": 2.798e-05,
"loss": 0.0926,
"step": 1400
},
{
"grad_norm": 1.728222131729126,
"learning_rate": 2.818e-05,
"loss": 0.0945,
"step": 1410
},
{
"grad_norm": 1.7263716459274292,
"learning_rate": 2.8380000000000003e-05,
"loss": 0.0951,
"step": 1420
},
{
"grad_norm": 1.734931468963623,
"learning_rate": 2.858e-05,
"loss": 0.1099,
"step": 1430
},
{
"grad_norm": 1.6889578104019165,
"learning_rate": 2.8780000000000002e-05,
"loss": 0.0939,
"step": 1440
},
{
"grad_norm": 1.6777198314666748,
"learning_rate": 2.898e-05,
"loss": 0.087,
"step": 1450
},
{
"grad_norm": 1.748708963394165,
"learning_rate": 2.9180000000000002e-05,
"loss": 0.0993,
"step": 1460
},
{
"grad_norm": 1.6266566514968872,
"learning_rate": 2.9380000000000003e-05,
"loss": 0.0906,
"step": 1470
},
{
"grad_norm": 1.117978811264038,
"learning_rate": 2.958e-05,
"loss": 0.0901,
"step": 1480
},
{
"grad_norm": 1.562015175819397,
"learning_rate": 2.9780000000000003e-05,
"loss": 0.0984,
"step": 1490
},
{
"grad_norm": 1.7531830072402954,
"learning_rate": 2.998e-05,
"loss": 0.0924,
"step": 1500
},
{
"grad_norm": 1.4399970769882202,
"learning_rate": 3.0180000000000002e-05,
"loss": 0.0949,
"step": 1510
},
{
"grad_norm": 1.5905520915985107,
"learning_rate": 3.0380000000000004e-05,
"loss": 0.0951,
"step": 1520
},
{
"grad_norm": 1.9324289560317993,
"learning_rate": 3.058e-05,
"loss": 0.0875,
"step": 1530
},
{
"grad_norm": 1.8375229835510254,
"learning_rate": 3.078e-05,
"loss": 0.0925,
"step": 1540
},
{
"grad_norm": 1.8989778757095337,
"learning_rate": 3.0980000000000005e-05,
"loss": 0.0919,
"step": 1550
},
{
"grad_norm": 1.6824874877929688,
"learning_rate": 3.118e-05,
"loss": 0.0928,
"step": 1560
},
{
"grad_norm": 1.7934844493865967,
"learning_rate": 3.138e-05,
"loss": 0.0944,
"step": 1570
},
{
"grad_norm": 1.8108294010162354,
"learning_rate": 3.1580000000000006e-05,
"loss": 0.0895,
"step": 1580
},
{
"grad_norm": 1.4770379066467285,
"learning_rate": 3.1780000000000004e-05,
"loss": 0.085,
"step": 1590
},
{
"grad_norm": 1.576690673828125,
"learning_rate": 3.198e-05,
"loss": 0.0963,
"step": 1600
},
{
"grad_norm": 1.2559643983840942,
"learning_rate": 3.218e-05,
"loss": 0.0856,
"step": 1610
},
{
"grad_norm": 2.0229685306549072,
"learning_rate": 3.238e-05,
"loss": 0.0857,
"step": 1620
},
{
"grad_norm": 1.7076901197433472,
"learning_rate": 3.2579999999999996e-05,
"loss": 0.0954,
"step": 1630
},
{
"grad_norm": 2.049443244934082,
"learning_rate": 3.278e-05,
"loss": 0.0937,
"step": 1640
},
{
"grad_norm": 1.7413936853408813,
"learning_rate": 3.298e-05,
"loss": 0.0864,
"step": 1650
},
{
"grad_norm": 1.6512004137039185,
"learning_rate": 3.318e-05,
"loss": 0.086,
"step": 1660
},
{
"grad_norm": 1.6482833623886108,
"learning_rate": 3.338e-05,
"loss": 0.0921,
"step": 1670
},
{
"grad_norm": 1.646140694618225,
"learning_rate": 3.358e-05,
"loss": 0.0856,
"step": 1680
},
{
"grad_norm": 1.54812490940094,
"learning_rate": 3.378e-05,
"loss": 0.0819,
"step": 1690
},
{
"grad_norm": 1.646413803100586,
"learning_rate": 3.398e-05,
"loss": 0.0991,
"step": 1700
},
{
"grad_norm": 1.7182942628860474,
"learning_rate": 3.418e-05,
"loss": 0.0866,
"step": 1710
},
{
"grad_norm": 1.6291934251785278,
"learning_rate": 3.438e-05,
"loss": 0.0891,
"step": 1720
},
{
"grad_norm": 1.3367891311645508,
"learning_rate": 3.4580000000000004e-05,
"loss": 0.0864,
"step": 1730
},
{
"grad_norm": 1.670127511024475,
"learning_rate": 3.478e-05,
"loss": 0.0826,
"step": 1740
},
{
"grad_norm": 1.263794183731079,
"learning_rate": 3.498e-05,
"loss": 0.0823,
"step": 1750
},
{
"grad_norm": 1.2835052013397217,
"learning_rate": 3.518e-05,
"loss": 0.0789,
"step": 1760
},
{
"grad_norm": 1.7164093255996704,
"learning_rate": 3.5380000000000003e-05,
"loss": 0.0973,
"step": 1770
},
{
"grad_norm": 1.360939860343933,
"learning_rate": 3.558e-05,
"loss": 0.0882,
"step": 1780
},
{
"grad_norm": 1.6290953159332275,
"learning_rate": 3.578e-05,
"loss": 0.083,
"step": 1790
},
{
"grad_norm": 1.4504064321517944,
"learning_rate": 3.5980000000000004e-05,
"loss": 0.0926,
"step": 1800
},
{
"grad_norm": 1.495097041130066,
"learning_rate": 3.618e-05,
"loss": 0.0841,
"step": 1810
},
{
"grad_norm": 1.4847289323806763,
"learning_rate": 3.638e-05,
"loss": 0.0848,
"step": 1820
},
{
"grad_norm": 1.649829626083374,
"learning_rate": 3.6580000000000006e-05,
"loss": 0.0812,
"step": 1830
},
{
"grad_norm": 1.4198445081710815,
"learning_rate": 3.6780000000000004e-05,
"loss": 0.0798,
"step": 1840
},
{
"grad_norm": 1.7414052486419678,
"learning_rate": 3.698e-05,
"loss": 0.0799,
"step": 1850
},
{
"grad_norm": 1.5118916034698486,
"learning_rate": 3.7180000000000007e-05,
"loss": 0.0794,
"step": 1860
},
{
"grad_norm": 1.7728333473205566,
"learning_rate": 3.7380000000000005e-05,
"loss": 0.0816,
"step": 1870
},
{
"grad_norm": 1.4119099378585815,
"learning_rate": 3.758e-05,
"loss": 0.0795,
"step": 1880
},
{
"grad_norm": 1.4366050958633423,
"learning_rate": 3.778000000000001e-05,
"loss": 0.0771,
"step": 1890
},
{
"grad_norm": 1.3541799783706665,
"learning_rate": 3.7980000000000006e-05,
"loss": 0.0833,
"step": 1900
},
{
"grad_norm": 1.5392347574234009,
"learning_rate": 3.818e-05,
"loss": 0.085,
"step": 1910
},
{
"grad_norm": 1.500915765762329,
"learning_rate": 3.838e-05,
"loss": 0.0752,
"step": 1920
},
{
"grad_norm": 1.705000400543213,
"learning_rate": 3.858e-05,
"loss": 0.0816,
"step": 1930
},
{
"grad_norm": 1.6263823509216309,
"learning_rate": 3.878e-05,
"loss": 0.0925,
"step": 1940
},
{
"grad_norm": 1.429979681968689,
"learning_rate": 3.898e-05,
"loss": 0.0817,
"step": 1950
},
{
"grad_norm": 1.6098904609680176,
"learning_rate": 3.918e-05,
"loss": 0.0771,
"step": 1960
},
{
"grad_norm": 1.1158515214920044,
"learning_rate": 3.938e-05,
"loss": 0.0806,
"step": 1970
},
{
"grad_norm": 1.2952687740325928,
"learning_rate": 3.958e-05,
"loss": 0.0839,
"step": 1980
},
{
"grad_norm": 1.4825958013534546,
"learning_rate": 3.978e-05,
"loss": 0.0764,
"step": 1990
},
{
"grad_norm": 1.162972331047058,
"learning_rate": 3.998e-05,
"loss": 0.078,
"step": 2000
},
{
"grad_norm": 1.2341431379318237,
"learning_rate": 4.018e-05,
"loss": 0.0791,
"step": 2010
},
{
"grad_norm": 1.4787181615829468,
"learning_rate": 4.038e-05,
"loss": 0.0804,
"step": 2020
},
{
"grad_norm": 1.3419722318649292,
"learning_rate": 4.058e-05,
"loss": 0.0726,
"step": 2030
},
{
"grad_norm": 1.2373441457748413,
"learning_rate": 4.078e-05,
"loss": 0.0811,
"step": 2040
},
{
"grad_norm": 1.595677137374878,
"learning_rate": 4.0980000000000004e-05,
"loss": 0.084,
"step": 2050
},
{
"grad_norm": 1.3520156145095825,
"learning_rate": 4.118e-05,
"loss": 0.0811,
"step": 2060
},
{
"grad_norm": 1.1588202714920044,
"learning_rate": 4.138e-05,
"loss": 0.081,
"step": 2070
},
{
"grad_norm": 1.2145284414291382,
"learning_rate": 4.1580000000000005e-05,
"loss": 0.0783,
"step": 2080
},
{
"grad_norm": 1.0782665014266968,
"learning_rate": 4.178e-05,
"loss": 0.0804,
"step": 2090
},
{
"grad_norm": 1.3053752183914185,
"learning_rate": 4.198e-05,
"loss": 0.0785,
"step": 2100
},
{
"grad_norm": 1.4105626344680786,
"learning_rate": 4.2180000000000006e-05,
"loss": 0.0794,
"step": 2110
},
{
"grad_norm": 1.27247154712677,
"learning_rate": 4.2380000000000004e-05,
"loss": 0.0741,
"step": 2120
},
{
"grad_norm": 1.4848867654800415,
"learning_rate": 4.258e-05,
"loss": 0.0849,
"step": 2130
},
{
"grad_norm": 1.5192539691925049,
"learning_rate": 4.278e-05,
"loss": 0.0819,
"step": 2140
},
{
"grad_norm": 1.677297592163086,
"learning_rate": 4.2980000000000005e-05,
"loss": 0.0812,
"step": 2150
},
{
"grad_norm": 1.2418543100357056,
"learning_rate": 4.318e-05,
"loss": 0.0671,
"step": 2160
},
{
"grad_norm": 1.105080008506775,
"learning_rate": 4.338e-05,
"loss": 0.0777,
"step": 2170
},
{
"grad_norm": 1.1512064933776855,
"learning_rate": 4.3580000000000006e-05,
"loss": 0.0784,
"step": 2180
},
{
"grad_norm": 1.164589524269104,
"learning_rate": 4.3780000000000004e-05,
"loss": 0.0755,
"step": 2190
},
{
"grad_norm": 1.8655939102172852,
"learning_rate": 4.398e-05,
"loss": 0.0698,
"step": 2200
},
{
"grad_norm": 1.3707654476165771,
"learning_rate": 4.418000000000001e-05,
"loss": 0.0736,
"step": 2210
},
{
"grad_norm": 1.2294223308563232,
"learning_rate": 4.438e-05,
"loss": 0.0721,
"step": 2220
},
{
"grad_norm": 1.0154931545257568,
"learning_rate": 4.458e-05,
"loss": 0.0719,
"step": 2230
},
{
"grad_norm": 1.0982472896575928,
"learning_rate": 4.478e-05,
"loss": 0.0764,
"step": 2240
},
{
"grad_norm": 1.222943902015686,
"learning_rate": 4.498e-05,
"loss": 0.0828,
"step": 2250
},
{
"grad_norm": 1.0855426788330078,
"learning_rate": 4.518e-05,
"loss": 0.0834,
"step": 2260
},
{
"grad_norm": 1.4420665502548218,
"learning_rate": 4.538e-05,
"loss": 0.0811,
"step": 2270
},
{
"grad_norm": 0.8978270888328552,
"learning_rate": 4.558e-05,
"loss": 0.0743,
"step": 2280
},
{
"grad_norm": 0.984847366809845,
"learning_rate": 4.578e-05,
"loss": 0.0685,
"step": 2290
},
{
"grad_norm": 1.1125538349151611,
"learning_rate": 4.5980000000000004e-05,
"loss": 0.0728,
"step": 2300
},
{
"grad_norm": 1.0328806638717651,
"learning_rate": 4.618e-05,
"loss": 0.0748,
"step": 2310
},
{
"grad_norm": 0.9082860350608826,
"learning_rate": 4.638e-05,
"loss": 0.0753,
"step": 2320
},
{
"grad_norm": 1.2041865587234497,
"learning_rate": 4.6580000000000005e-05,
"loss": 0.0777,
"step": 2330
},
{
"grad_norm": 0.8943385481834412,
"learning_rate": 4.678e-05,
"loss": 0.0757,
"step": 2340
},
{
"grad_norm": 1.3722305297851562,
"learning_rate": 4.698e-05,
"loss": 0.0722,
"step": 2350
},
{
"grad_norm": 1.38225519657135,
"learning_rate": 4.718e-05,
"loss": 0.0782,
"step": 2360
},
{
"grad_norm": 1.0935956239700317,
"learning_rate": 4.7380000000000004e-05,
"loss": 0.0741,
"step": 2370
},
{
"grad_norm": 0.9566012024879456,
"learning_rate": 4.758e-05,
"loss": 0.0781,
"step": 2380
},
{
"grad_norm": 1.022346019744873,
"learning_rate": 4.778e-05,
"loss": 0.0712,
"step": 2390
},
{
"grad_norm": 0.8095312714576721,
"learning_rate": 4.7980000000000005e-05,
"loss": 0.0734,
"step": 2400
},
{
"grad_norm": 0.9501664638519287,
"learning_rate": 4.818e-05,
"loss": 0.069,
"step": 2410
},
{
"grad_norm": 1.172012209892273,
"learning_rate": 4.838e-05,
"loss": 0.0747,
"step": 2420
},
{
"grad_norm": 1.157981038093567,
"learning_rate": 4.8580000000000006e-05,
"loss": 0.0737,
"step": 2430
},
{
"grad_norm": 1.4430627822875977,
"learning_rate": 4.8780000000000004e-05,
"loss": 0.0732,
"step": 2440
},
{
"grad_norm": 1.2398930788040161,
"learning_rate": 4.898e-05,
"loss": 0.0734,
"step": 2450
},
{
"grad_norm": 1.2574845552444458,
"learning_rate": 4.918000000000001e-05,
"loss": 0.0814,
"step": 2460
},
{
"grad_norm": 1.1682604551315308,
"learning_rate": 4.9380000000000005e-05,
"loss": 0.07,
"step": 2470
},
{
"grad_norm": 1.03831946849823,
"learning_rate": 4.958e-05,
"loss": 0.076,
"step": 2480
},
{
"grad_norm": 1.021917462348938,
"learning_rate": 4.978e-05,
"loss": 0.071,
"step": 2490
},
{
"grad_norm": 0.910327136516571,
"learning_rate": 4.9980000000000006e-05,
"loss": 0.0737,
"step": 2500
},
{
"grad_norm": 0.8853943943977356,
"learning_rate": 5.0180000000000004e-05,
"loss": 0.0776,
"step": 2510
},
{
"grad_norm": 0.9305204749107361,
"learning_rate": 5.038e-05,
"loss": 0.0702,
"step": 2520
},
{
"grad_norm": 1.1481506824493408,
"learning_rate": 5.058000000000001e-05,
"loss": 0.0659,
"step": 2530
},
{
"grad_norm": 0.8893597722053528,
"learning_rate": 5.0780000000000005e-05,
"loss": 0.0664,
"step": 2540
},
{
"grad_norm": 1.3460512161254883,
"learning_rate": 5.098e-05,
"loss": 0.0759,
"step": 2550
},
{
"grad_norm": 1.0590441226959229,
"learning_rate": 5.118000000000001e-05,
"loss": 0.0679,
"step": 2560
},
{
"grad_norm": 0.8986999988555908,
"learning_rate": 5.1380000000000006e-05,
"loss": 0.0676,
"step": 2570
},
{
"grad_norm": 1.1072858572006226,
"learning_rate": 5.1580000000000004e-05,
"loss": 0.0754,
"step": 2580
},
{
"grad_norm": 1.118714451789856,
"learning_rate": 5.178000000000001e-05,
"loss": 0.0677,
"step": 2590
},
{
"grad_norm": 1.1817659139633179,
"learning_rate": 5.198000000000001e-05,
"loss": 0.0723,
"step": 2600
},
{
"grad_norm": 1.1865016222000122,
"learning_rate": 5.2180000000000005e-05,
"loss": 0.0714,
"step": 2610
},
{
"grad_norm": 1.191595435142517,
"learning_rate": 5.238000000000001e-05,
"loss": 0.0663,
"step": 2620
},
{
"grad_norm": 0.8069055676460266,
"learning_rate": 5.258000000000001e-05,
"loss": 0.0739,
"step": 2630
},
{
"grad_norm": 1.0193506479263306,
"learning_rate": 5.2780000000000006e-05,
"loss": 0.0694,
"step": 2640
},
{
"grad_norm": 0.9555947184562683,
"learning_rate": 5.2980000000000004e-05,
"loss": 0.0707,
"step": 2650
},
{
"grad_norm": 0.7661393284797668,
"learning_rate": 5.318000000000001e-05,
"loss": 0.0676,
"step": 2660
},
{
"grad_norm": 0.8302662968635559,
"learning_rate": 5.338000000000001e-05,
"loss": 0.0678,
"step": 2670
},
{
"grad_norm": 0.959805428981781,
"learning_rate": 5.3580000000000005e-05,
"loss": 0.0629,
"step": 2680
},
{
"grad_norm": 0.9672425985336304,
"learning_rate": 5.378e-05,
"loss": 0.0723,
"step": 2690
},
{
"grad_norm": 0.8574071526527405,
"learning_rate": 5.3979999999999995e-05,
"loss": 0.0697,
"step": 2700
},
{
"grad_norm": 1.0239274501800537,
"learning_rate": 5.418e-05,
"loss": 0.0719,
"step": 2710
},
{
"grad_norm": 1.321614384651184,
"learning_rate": 5.438e-05,
"loss": 0.0625,
"step": 2720
},
{
"grad_norm": 1.24257230758667,
"learning_rate": 5.4579999999999996e-05,
"loss": 0.0726,
"step": 2730
},
{
"grad_norm": 0.8882970809936523,
"learning_rate": 5.478e-05,
"loss": 0.0708,
"step": 2740
},
{
"grad_norm": 0.8559255003929138,
"learning_rate": 5.498e-05,
"loss": 0.0674,
"step": 2750
},
{
"grad_norm": 0.9045839309692383,
"learning_rate": 5.518e-05,
"loss": 0.0704,
"step": 2760
},
{
"grad_norm": 0.614384114742279,
"learning_rate": 5.538e-05,
"loss": 0.0623,
"step": 2770
},
{
"grad_norm": 0.795306384563446,
"learning_rate": 5.558e-05,
"loss": 0.0648,
"step": 2780
},
{
"grad_norm": 0.886047899723053,
"learning_rate": 5.578e-05,
"loss": 0.0651,
"step": 2790
},
{
"grad_norm": 0.6749942302703857,
"learning_rate": 5.5979999999999996e-05,
"loss": 0.0642,
"step": 2800
},
{
"grad_norm": 0.8019365072250366,
"learning_rate": 5.618e-05,
"loss": 0.0609,
"step": 2810
},
{
"grad_norm": 0.795505702495575,
"learning_rate": 5.638e-05,
"loss": 0.0545,
"step": 2820
},
{
"grad_norm": 0.960997998714447,
"learning_rate": 5.658e-05,
"loss": 0.0671,
"step": 2830
},
{
"grad_norm": 0.8016205430030823,
"learning_rate": 5.678e-05,
"loss": 0.0673,
"step": 2840
},
{
"grad_norm": 0.9191439747810364,
"learning_rate": 5.698e-05,
"loss": 0.0606,
"step": 2850
},
{
"grad_norm": 0.9075175523757935,
"learning_rate": 5.718e-05,
"loss": 0.0609,
"step": 2860
},
{
"grad_norm": 0.8477808833122253,
"learning_rate": 5.738e-05,
"loss": 0.0646,
"step": 2870
},
{
"grad_norm": 0.8051248788833618,
"learning_rate": 5.758e-05,
"loss": 0.0603,
"step": 2880
},
{
"grad_norm": 0.7463070750236511,
"learning_rate": 5.778e-05,
"loss": 0.0606,
"step": 2890
},
{
"grad_norm": 1.0391595363616943,
"learning_rate": 5.7980000000000004e-05,
"loss": 0.0625,
"step": 2900
},
{
"grad_norm": 1.04859459400177,
"learning_rate": 5.818e-05,
"loss": 0.0623,
"step": 2910
},
{
"grad_norm": 1.0546082258224487,
"learning_rate": 5.838e-05,
"loss": 0.0605,
"step": 2920
},
{
"grad_norm": 0.848749041557312,
"learning_rate": 5.858e-05,
"loss": 0.0707,
"step": 2930
},
{
"grad_norm": 0.9367526173591614,
"learning_rate": 5.878e-05,
"loss": 0.0692,
"step": 2940
},
{
"grad_norm": 0.7209922671318054,
"learning_rate": 5.898e-05,
"loss": 0.0655,
"step": 2950
},
{
"grad_norm": 0.8936607837677002,
"learning_rate": 5.918e-05,
"loss": 0.0668,
"step": 2960
},
{
"grad_norm": 0.8779922723770142,
"learning_rate": 5.9380000000000004e-05,
"loss": 0.0628,
"step": 2970
},
{
"grad_norm": 0.7889860272407532,
"learning_rate": 5.958e-05,
"loss": 0.0606,
"step": 2980
},
{
"grad_norm": 0.9793576598167419,
"learning_rate": 5.978e-05,
"loss": 0.0605,
"step": 2990
},
{
"grad_norm": 0.8431522250175476,
"learning_rate": 5.9980000000000005e-05,
"loss": 0.063,
"step": 3000
},
{
"grad_norm": 0.776141881942749,
"learning_rate": 6.018e-05,
"loss": 0.0714,
"step": 3010
},
{
"grad_norm": 1.036238193511963,
"learning_rate": 6.038e-05,
"loss": 0.0658,
"step": 3020
},
{
"grad_norm": 0.7154647707939148,
"learning_rate": 6.0580000000000006e-05,
"loss": 0.0628,
"step": 3030
},
{
"grad_norm": 0.7451269626617432,
"learning_rate": 6.0780000000000004e-05,
"loss": 0.0603,
"step": 3040
},
{
"grad_norm": 0.6692603826522827,
"learning_rate": 6.098e-05,
"loss": 0.0624,
"step": 3050
},
{
"grad_norm": 1.0105844736099243,
"learning_rate": 6.118000000000001e-05,
"loss": 0.0643,
"step": 3060
},
{
"grad_norm": 0.9448408484458923,
"learning_rate": 6.138e-05,
"loss": 0.0654,
"step": 3070
},
{
"grad_norm": 0.8452638983726501,
"learning_rate": 6.158e-05,
"loss": 0.0622,
"step": 3080
},
{
"grad_norm": 0.9193839430809021,
"learning_rate": 6.178000000000001e-05,
"loss": 0.0613,
"step": 3090
},
{
"grad_norm": 0.92941814661026,
"learning_rate": 6.198e-05,
"loss": 0.0639,
"step": 3100
},
{
"grad_norm": 0.8201425075531006,
"learning_rate": 6.218e-05,
"loss": 0.0711,
"step": 3110
},
{
"grad_norm": 0.7324569225311279,
"learning_rate": 6.238000000000001e-05,
"loss": 0.0567,
"step": 3120
},
{
"grad_norm": 0.8137140274047852,
"learning_rate": 6.258e-05,
"loss": 0.0673,
"step": 3130
},
{
"grad_norm": 0.8640687465667725,
"learning_rate": 6.278e-05,
"loss": 0.0591,
"step": 3140
},
{
"grad_norm": 0.9455040097236633,
"learning_rate": 6.298000000000001e-05,
"loss": 0.0665,
"step": 3150
},
{
"grad_norm": 0.84979248046875,
"learning_rate": 6.318e-05,
"loss": 0.0565,
"step": 3160
},
{
"grad_norm": 0.6208917498588562,
"learning_rate": 6.338e-05,
"loss": 0.0613,
"step": 3170
},
{
"grad_norm": 0.6841639280319214,
"learning_rate": 6.358000000000001e-05,
"loss": 0.0612,
"step": 3180
},
{
"grad_norm": 0.7783037424087524,
"learning_rate": 6.378e-05,
"loss": 0.0635,
"step": 3190
},
{
"grad_norm": 0.6852115392684937,
"learning_rate": 6.398000000000001e-05,
"loss": 0.0567,
"step": 3200
},
{
"grad_norm": 0.7794846296310425,
"learning_rate": 6.418000000000001e-05,
"loss": 0.0556,
"step": 3210
},
{
"grad_norm": 0.9447958469390869,
"learning_rate": 6.438e-05,
"loss": 0.0619,
"step": 3220
},
{
"grad_norm": 0.6817174553871155,
"learning_rate": 6.458000000000001e-05,
"loss": 0.0694,
"step": 3230
},
{
"grad_norm": 0.8658831119537354,
"learning_rate": 6.478000000000001e-05,
"loss": 0.0622,
"step": 3240
},
{
"grad_norm": 0.741489052772522,
"learning_rate": 6.498e-05,
"loss": 0.0619,
"step": 3250
},
{
"grad_norm": 0.7767813205718994,
"learning_rate": 6.518000000000001e-05,
"loss": 0.0618,
"step": 3260
},
{
"grad_norm": 0.8194187879562378,
"learning_rate": 6.538000000000001e-05,
"loss": 0.0651,
"step": 3270
},
{
"grad_norm": 0.6319522857666016,
"learning_rate": 6.558e-05,
"loss": 0.0569,
"step": 3280
},
{
"grad_norm": 0.7433358430862427,
"learning_rate": 6.578000000000001e-05,
"loss": 0.0605,
"step": 3290
},
{
"grad_norm": 0.7706665992736816,
"learning_rate": 6.598e-05,
"loss": 0.0611,
"step": 3300
},
{
"grad_norm": 0.6321914792060852,
"learning_rate": 6.618e-05,
"loss": 0.0599,
"step": 3310
},
{
"grad_norm": 0.6240274310112,
"learning_rate": 6.638e-05,
"loss": 0.0619,
"step": 3320
},
{
"grad_norm": 0.7866604328155518,
"learning_rate": 6.658e-05,
"loss": 0.0589,
"step": 3330
},
{
"grad_norm": 0.8589717745780945,
"learning_rate": 6.678e-05,
"loss": 0.0609,
"step": 3340
},
{
"grad_norm": 0.7344950437545776,
"learning_rate": 6.698e-05,
"loss": 0.0627,
"step": 3350
},
{
"grad_norm": 0.879833996295929,
"learning_rate": 6.718e-05,
"loss": 0.0586,
"step": 3360
},
{
"grad_norm": 0.5960578918457031,
"learning_rate": 6.738e-05,
"loss": 0.0574,
"step": 3370
},
{
"grad_norm": 0.8435735702514648,
"learning_rate": 6.758e-05,
"loss": 0.0576,
"step": 3380
},
{
"grad_norm": 0.6635606288909912,
"learning_rate": 6.778e-05,
"loss": 0.0615,
"step": 3390
},
{
"grad_norm": 0.656762421131134,
"learning_rate": 6.798e-05,
"loss": 0.0621,
"step": 3400
},
{
"grad_norm": 0.7073556780815125,
"learning_rate": 6.818e-05,
"loss": 0.0673,
"step": 3410
},
{
"grad_norm": 0.8063334226608276,
"learning_rate": 6.838e-05,
"loss": 0.0591,
"step": 3420
},
{
"grad_norm": 0.6699609160423279,
"learning_rate": 6.858e-05,
"loss": 0.0542,
"step": 3430
},
{
"grad_norm": 0.9240128993988037,
"learning_rate": 6.878e-05,
"loss": 0.0627,
"step": 3440
},
{
"grad_norm": 0.6918993592262268,
"learning_rate": 6.898e-05,
"loss": 0.051,
"step": 3450
},
{
"grad_norm": 0.5891866683959961,
"learning_rate": 6.918e-05,
"loss": 0.0568,
"step": 3460
},
{
"grad_norm": 0.8400129079818726,
"learning_rate": 6.938e-05,
"loss": 0.065,
"step": 3470
},
{
"grad_norm": 0.7416741251945496,
"learning_rate": 6.958e-05,
"loss": 0.0603,
"step": 3480
},
{
"grad_norm": 0.749572217464447,
"learning_rate": 6.978e-05,
"loss": 0.0595,
"step": 3490
},
{
"grad_norm": 0.8416393399238586,
"learning_rate": 6.998e-05,
"loss": 0.0554,
"step": 3500
},
{
"grad_norm": 0.8583645224571228,
"learning_rate": 7.018e-05,
"loss": 0.0632,
"step": 3510
},
{
"grad_norm": 0.5239070653915405,
"learning_rate": 7.038e-05,
"loss": 0.0579,
"step": 3520
},
{
"grad_norm": 0.6293121576309204,
"learning_rate": 7.058e-05,
"loss": 0.058,
"step": 3530
},
{
"grad_norm": 0.7208630442619324,
"learning_rate": 7.078e-05,
"loss": 0.0582,
"step": 3540
},
{
"grad_norm": 0.72175532579422,
"learning_rate": 7.098e-05,
"loss": 0.0573,
"step": 3550
},
{
"grad_norm": 0.8187160491943359,
"learning_rate": 7.118e-05,
"loss": 0.0594,
"step": 3560
},
{
"grad_norm": 0.7715169191360474,
"learning_rate": 7.138e-05,
"loss": 0.0611,
"step": 3570
},
{
"grad_norm": 0.5853291153907776,
"learning_rate": 7.158e-05,
"loss": 0.0615,
"step": 3580
},
{
"grad_norm": 0.6136788725852966,
"learning_rate": 7.178000000000001e-05,
"loss": 0.0528,
"step": 3590
},
{
"grad_norm": 0.74713134765625,
"learning_rate": 7.198e-05,
"loss": 0.0559,
"step": 3600
},
{
"grad_norm": 0.7303146123886108,
"learning_rate": 7.218e-05,
"loss": 0.0566,
"step": 3610
},
{
"grad_norm": 0.7068881988525391,
"learning_rate": 7.238000000000001e-05,
"loss": 0.0553,
"step": 3620
},
{
"grad_norm": 0.6625126600265503,
"learning_rate": 7.258e-05,
"loss": 0.056,
"step": 3630
},
{
"grad_norm": 0.7499129176139832,
"learning_rate": 7.278e-05,
"loss": 0.0511,
"step": 3640
},
{
"grad_norm": 0.8373027443885803,
"learning_rate": 7.298000000000001e-05,
"loss": 0.0519,
"step": 3650
},
{
"grad_norm": 0.6602651476860046,
"learning_rate": 7.318e-05,
"loss": 0.0558,
"step": 3660
},
{
"grad_norm": 0.7000454068183899,
"learning_rate": 7.338e-05,
"loss": 0.0535,
"step": 3670
},
{
"grad_norm": 0.6676427721977234,
"learning_rate": 7.358000000000001e-05,
"loss": 0.0499,
"step": 3680
},
{
"grad_norm": 0.6426597833633423,
"learning_rate": 7.378e-05,
"loss": 0.0617,
"step": 3690
},
{
"grad_norm": 0.830104649066925,
"learning_rate": 7.398e-05,
"loss": 0.0516,
"step": 3700
},
{
"grad_norm": 0.6411163210868835,
"learning_rate": 7.418000000000001e-05,
"loss": 0.0534,
"step": 3710
},
{
"grad_norm": 0.44452545046806335,
"learning_rate": 7.438e-05,
"loss": 0.0554,
"step": 3720
},
{
"grad_norm": 0.698541522026062,
"learning_rate": 7.458000000000001e-05,
"loss": 0.057,
"step": 3730
},
{
"grad_norm": 0.8207803964614868,
"learning_rate": 7.478e-05,
"loss": 0.0584,
"step": 3740
},
{
"grad_norm": 0.6441124081611633,
"learning_rate": 7.498e-05,
"loss": 0.0569,
"step": 3750
},
{
"grad_norm": 0.5994744896888733,
"learning_rate": 7.518000000000001e-05,
"loss": 0.0577,
"step": 3760
},
{
"grad_norm": 0.5829905867576599,
"learning_rate": 7.538e-05,
"loss": 0.0492,
"step": 3770
},
{
"grad_norm": 0.6664201021194458,
"learning_rate": 7.558e-05,
"loss": 0.0558,
"step": 3780
},
{
"grad_norm": 0.6259579062461853,
"learning_rate": 7.578000000000001e-05,
"loss": 0.0538,
"step": 3790
},
{
"grad_norm": 0.6106650233268738,
"learning_rate": 7.598e-05,
"loss": 0.0588,
"step": 3800
},
{
"grad_norm": 0.7316349744796753,
"learning_rate": 7.618e-05,
"loss": 0.0632,
"step": 3810
},
{
"grad_norm": 0.6063339114189148,
"learning_rate": 7.638000000000001e-05,
"loss": 0.0657,
"step": 3820
},
{
"grad_norm": 0.537717878818512,
"learning_rate": 7.658e-05,
"loss": 0.0518,
"step": 3830
},
{
"grad_norm": 0.717360258102417,
"learning_rate": 7.678000000000001e-05,
"loss": 0.0541,
"step": 3840
},
{
"grad_norm": 0.66494220495224,
"learning_rate": 7.698000000000001e-05,
"loss": 0.0578,
"step": 3850
},
{
"grad_norm": 0.5406110882759094,
"learning_rate": 7.718e-05,
"loss": 0.0558,
"step": 3860
},
{
"grad_norm": 0.529040515422821,
"learning_rate": 7.738000000000001e-05,
"loss": 0.0505,
"step": 3870
},
{
"grad_norm": 0.8478358387947083,
"learning_rate": 7.758000000000001e-05,
"loss": 0.0541,
"step": 3880
},
{
"grad_norm": 0.731211245059967,
"learning_rate": 7.778e-05,
"loss": 0.0602,
"step": 3890
},
{
"grad_norm": 0.5998793244361877,
"learning_rate": 7.798000000000001e-05,
"loss": 0.0605,
"step": 3900
},
{
"grad_norm": 0.6982648968696594,
"learning_rate": 7.818000000000001e-05,
"loss": 0.0567,
"step": 3910
},
{
"grad_norm": 0.8007875084877014,
"learning_rate": 7.838e-05,
"loss": 0.0661,
"step": 3920
},
{
"grad_norm": 0.5826359391212463,
"learning_rate": 7.858000000000001e-05,
"loss": 0.0528,
"step": 3930
},
{
"grad_norm": 0.6248698830604553,
"learning_rate": 7.878e-05,
"loss": 0.0547,
"step": 3940
},
{
"grad_norm": 0.659730851650238,
"learning_rate": 7.897999999999999e-05,
"loss": 0.0554,
"step": 3950
},
{
"grad_norm": 0.719498336315155,
"learning_rate": 7.918e-05,
"loss": 0.0524,
"step": 3960
},
{
"grad_norm": 0.6503873467445374,
"learning_rate": 7.938e-05,
"loss": 0.0508,
"step": 3970
},
{
"grad_norm": 0.47922801971435547,
"learning_rate": 7.958e-05,
"loss": 0.0496,
"step": 3980
},
{
"grad_norm": 0.44167473912239075,
"learning_rate": 7.978e-05,
"loss": 0.0525,
"step": 3990
},
{
"grad_norm": 0.5266247391700745,
"learning_rate": 7.998e-05,
"loss": 0.0565,
"step": 4000
},
{
"grad_norm": 0.45653995871543884,
"learning_rate": 8.018e-05,
"loss": 0.0508,
"step": 4010
},
{
"grad_norm": 0.48484233021736145,
"learning_rate": 8.038e-05,
"loss": 0.0498,
"step": 4020
},
{
"grad_norm": 0.6050201058387756,
"learning_rate": 8.058e-05,
"loss": 0.0563,
"step": 4030
},
{
"grad_norm": 0.4977985918521881,
"learning_rate": 8.078e-05,
"loss": 0.0504,
"step": 4040
},
{
"grad_norm": 0.5700753927230835,
"learning_rate": 8.098e-05,
"loss": 0.0541,
"step": 4050
},
{
"grad_norm": 0.6012455224990845,
"learning_rate": 8.118e-05,
"loss": 0.0541,
"step": 4060
},
{
"grad_norm": 0.642540693283081,
"learning_rate": 8.138e-05,
"loss": 0.0502,
"step": 4070
},
{
"grad_norm": 0.5907772779464722,
"learning_rate": 8.158e-05,
"loss": 0.0543,
"step": 4080
},
{
"grad_norm": 0.6549518704414368,
"learning_rate": 8.178e-05,
"loss": 0.0532,
"step": 4090
},
{
"grad_norm": 0.58327716588974,
"learning_rate": 8.198e-05,
"loss": 0.0501,
"step": 4100
},
{
"grad_norm": 0.5398632287979126,
"learning_rate": 8.218e-05,
"loss": 0.0569,
"step": 4110
},
{
"grad_norm": 0.3961046040058136,
"learning_rate": 8.238000000000001e-05,
"loss": 0.057,
"step": 4120
},
{
"grad_norm": 0.6423419117927551,
"learning_rate": 8.258e-05,
"loss": 0.06,
"step": 4130
},
{
"grad_norm": 0.4764283299446106,
"learning_rate": 8.278e-05,
"loss": 0.0525,
"step": 4140
},
{
"grad_norm": 0.6112470030784607,
"learning_rate": 8.298000000000001e-05,
"loss": 0.0551,
"step": 4150
},
{
"grad_norm": 0.6958449482917786,
"learning_rate": 8.318e-05,
"loss": 0.0519,
"step": 4160
},
{
"grad_norm": 0.5838958024978638,
"learning_rate": 8.338e-05,
"loss": 0.0556,
"step": 4170
},
{
"grad_norm": 0.5253514647483826,
"learning_rate": 8.358e-05,
"loss": 0.053,
"step": 4180
},
{
"grad_norm": 0.6679093837738037,
"learning_rate": 8.378e-05,
"loss": 0.0568,
"step": 4190
},
{
"grad_norm": 0.6899139285087585,
"learning_rate": 8.398e-05,
"loss": 0.0499,
"step": 4200
},
{
"grad_norm": 0.5431265234947205,
"learning_rate": 8.418e-05,
"loss": 0.0517,
"step": 4210
},
{
"grad_norm": 0.7031345963478088,
"learning_rate": 8.438e-05,
"loss": 0.0527,
"step": 4220
},
{
"grad_norm": 0.5682622790336609,
"learning_rate": 8.458e-05,
"loss": 0.0503,
"step": 4230
},
{
"grad_norm": 0.6090052127838135,
"learning_rate": 8.478e-05,
"loss": 0.0524,
"step": 4240
},
{
"grad_norm": 0.5961223840713501,
"learning_rate": 8.498e-05,
"loss": 0.0521,
"step": 4250
},
{
"grad_norm": 0.6355141401290894,
"learning_rate": 8.518000000000001e-05,
"loss": 0.0546,
"step": 4260
},
{
"grad_norm": 0.6242051124572754,
"learning_rate": 8.538e-05,
"loss": 0.0532,
"step": 4270
},
{
"grad_norm": 0.6339730024337769,
"learning_rate": 8.558e-05,
"loss": 0.0557,
"step": 4280
},
{
"grad_norm": 0.586755096912384,
"learning_rate": 8.578000000000001e-05,
"loss": 0.0502,
"step": 4290
},
{
"grad_norm": 0.5713655352592468,
"learning_rate": 8.598e-05,
"loss": 0.0472,
"step": 4300
},
{
"grad_norm": 0.8385711312294006,
"learning_rate": 8.618e-05,
"loss": 0.0515,
"step": 4310
},
{
"grad_norm": 0.5189787149429321,
"learning_rate": 8.638000000000001e-05,
"loss": 0.0553,
"step": 4320
},
{
"grad_norm": 0.5025926232337952,
"learning_rate": 8.658e-05,
"loss": 0.0519,
"step": 4330
},
{
"grad_norm": 0.6731868386268616,
"learning_rate": 8.678e-05,
"loss": 0.0538,
"step": 4340
},
{
"grad_norm": 0.48949819803237915,
"learning_rate": 8.698000000000001e-05,
"loss": 0.0584,
"step": 4350
},
{
"grad_norm": 0.5631033778190613,
"learning_rate": 8.718e-05,
"loss": 0.0483,
"step": 4360
},
{
"grad_norm": 0.5556790232658386,
"learning_rate": 8.738000000000001e-05,
"loss": 0.0541,
"step": 4370
},
{
"grad_norm": 0.48878926038742065,
"learning_rate": 8.758000000000001e-05,
"loss": 0.0544,
"step": 4380
},
{
"grad_norm": 0.5779238939285278,
"learning_rate": 8.778e-05,
"loss": 0.0595,
"step": 4390
},
{
"grad_norm": 0.6489167809486389,
"learning_rate": 8.798000000000001e-05,
"loss": 0.0514,
"step": 4400
},
{
"grad_norm": 0.5112762451171875,
"learning_rate": 8.818000000000001e-05,
"loss": 0.0519,
"step": 4410
},
{
"grad_norm": 0.4689539968967438,
"learning_rate": 8.838e-05,
"loss": 0.0521,
"step": 4420
},
{
"grad_norm": 0.681573212146759,
"learning_rate": 8.858000000000001e-05,
"loss": 0.0501,
"step": 4430
},
{
"grad_norm": 0.606069803237915,
"learning_rate": 8.878000000000001e-05,
"loss": 0.0523,
"step": 4440
},
{
"grad_norm": 0.5324500799179077,
"learning_rate": 8.898e-05,
"loss": 0.0527,
"step": 4450
},
{
"grad_norm": 0.6580777764320374,
"learning_rate": 8.918000000000001e-05,
"loss": 0.0505,
"step": 4460
},
{
"grad_norm": 0.527855396270752,
"learning_rate": 8.938e-05,
"loss": 0.0473,
"step": 4470
},
{
"grad_norm": 0.5778601765632629,
"learning_rate": 8.958e-05,
"loss": 0.0532,
"step": 4480
},
{
"grad_norm": 0.5860138535499573,
"learning_rate": 8.978000000000001e-05,
"loss": 0.058,
"step": 4490
},
{
"grad_norm": 0.4269658029079437,
"learning_rate": 8.998e-05,
"loss": 0.0491,
"step": 4500
},
{
"grad_norm": 0.5216055512428284,
"learning_rate": 9.018000000000001e-05,
"loss": 0.0465,
"step": 4510
},
{
"grad_norm": 0.4757327735424042,
"learning_rate": 9.038000000000001e-05,
"loss": 0.0482,
"step": 4520
},
{
"grad_norm": 0.6530254483222961,
"learning_rate": 9.058e-05,
"loss": 0.0531,
"step": 4530
},
{
"grad_norm": 0.6443282961845398,
"learning_rate": 9.078000000000001e-05,
"loss": 0.0527,
"step": 4540
},
{
"grad_norm": 0.3884166479110718,
"learning_rate": 9.098000000000001e-05,
"loss": 0.0453,
"step": 4550
},
{
"grad_norm": 0.4695942997932434,
"learning_rate": 9.118e-05,
"loss": 0.0469,
"step": 4560
},
{
"grad_norm": 0.515466034412384,
"learning_rate": 9.138e-05,
"loss": 0.0475,
"step": 4570
},
{
"grad_norm": 0.6207025647163391,
"learning_rate": 9.158e-05,
"loss": 0.0577,
"step": 4580
},
{
"grad_norm": 0.5003461837768555,
"learning_rate": 9.178e-05,
"loss": 0.0559,
"step": 4590
},
{
"grad_norm": 0.4500420093536377,
"learning_rate": 9.198e-05,
"loss": 0.0487,
"step": 4600
},
{
"grad_norm": 0.4946185350418091,
"learning_rate": 9.218e-05,
"loss": 0.0553,
"step": 4610
},
{
"grad_norm": 0.5304247736930847,
"learning_rate": 9.238e-05,
"loss": 0.0514,
"step": 4620
},
{
"grad_norm": 0.5968793630599976,
"learning_rate": 9.258e-05,
"loss": 0.0515,
"step": 4630
},
{
"grad_norm": 0.44556036591529846,
"learning_rate": 9.278e-05,
"loss": 0.0515,
"step": 4640
},
{
"grad_norm": 0.4487205445766449,
"learning_rate": 9.298e-05,
"loss": 0.0468,
"step": 4650
},
{
"grad_norm": 0.4759437143802643,
"learning_rate": 9.318e-05,
"loss": 0.0527,
"step": 4660
},
{
"grad_norm": 0.5389978885650635,
"learning_rate": 9.338e-05,
"loss": 0.0498,
"step": 4670
},
{
"grad_norm": 0.48506706953048706,
"learning_rate": 9.358e-05,
"loss": 0.0456,
"step": 4680
},
{
"grad_norm": 0.5717599391937256,
"learning_rate": 9.378e-05,
"loss": 0.0503,
"step": 4690
},
{
"grad_norm": 0.5160998702049255,
"learning_rate": 9.398e-05,
"loss": 0.0574,
"step": 4700
},
{
"grad_norm": 0.48355981707572937,
"learning_rate": 9.418e-05,
"loss": 0.0527,
"step": 4710
},
{
"grad_norm": 0.48949745297431946,
"learning_rate": 9.438e-05,
"loss": 0.0447,
"step": 4720
},
{
"grad_norm": 0.5389792323112488,
"learning_rate": 9.458e-05,
"loss": 0.0499,
"step": 4730
},
{
"grad_norm": 0.6133130192756653,
"learning_rate": 9.478e-05,
"loss": 0.0494,
"step": 4740
},
{
"grad_norm": 0.4897119998931885,
"learning_rate": 9.498e-05,
"loss": 0.0494,
"step": 4750
},
{
"grad_norm": 0.4778435230255127,
"learning_rate": 9.518000000000001e-05,
"loss": 0.0547,
"step": 4760
},
{
"grad_norm": 0.5236737728118896,
"learning_rate": 9.538e-05,
"loss": 0.0572,
"step": 4770
},
{
"grad_norm": 0.4555071294307709,
"learning_rate": 9.558e-05,
"loss": 0.0511,
"step": 4780
},
{
"grad_norm": 0.5759738087654114,
"learning_rate": 9.578000000000001e-05,
"loss": 0.054,
"step": 4790
},
{
"grad_norm": 0.48411187529563904,
"learning_rate": 9.598e-05,
"loss": 0.0526,
"step": 4800
},
{
"grad_norm": 0.38313448429107666,
"learning_rate": 9.618e-05,
"loss": 0.0474,
"step": 4810
},
{
"grad_norm": 0.5272445678710938,
"learning_rate": 9.638000000000001e-05,
"loss": 0.048,
"step": 4820
},
{
"grad_norm": 0.42718085646629333,
"learning_rate": 9.658e-05,
"loss": 0.0466,
"step": 4830
},
{
"grad_norm": 0.4786417484283447,
"learning_rate": 9.678e-05,
"loss": 0.0482,
"step": 4840
},
{
"grad_norm": 0.41158753633499146,
"learning_rate": 9.698000000000001e-05,
"loss": 0.0487,
"step": 4850
},
{
"grad_norm": 0.5833826661109924,
"learning_rate": 9.718e-05,
"loss": 0.0458,
"step": 4860
},
{
"grad_norm": 0.5385518074035645,
"learning_rate": 9.738e-05,
"loss": 0.0537,
"step": 4870
},
{
"grad_norm": 0.39039990305900574,
"learning_rate": 9.758000000000001e-05,
"loss": 0.0469,
"step": 4880
},
{
"grad_norm": 0.49037373065948486,
"learning_rate": 9.778e-05,
"loss": 0.0497,
"step": 4890
},
{
"grad_norm": 0.3821620047092438,
"learning_rate": 9.798000000000001e-05,
"loss": 0.0484,
"step": 4900
},
{
"grad_norm": 0.4072476327419281,
"learning_rate": 9.818000000000001e-05,
"loss": 0.0444,
"step": 4910
},
{
"grad_norm": 0.5461483001708984,
"learning_rate": 9.838e-05,
"loss": 0.0518,
"step": 4920
},
{
"grad_norm": 0.5809780359268188,
"learning_rate": 9.858000000000001e-05,
"loss": 0.052,
"step": 4930
},
{
"grad_norm": 0.459931880235672,
"learning_rate": 9.878e-05,
"loss": 0.0458,
"step": 4940
},
{
"grad_norm": 0.5790089964866638,
"learning_rate": 9.898e-05,
"loss": 0.0512,
"step": 4950
},
{
"grad_norm": 0.5393385291099548,
"learning_rate": 9.918000000000001e-05,
"loss": 0.0515,
"step": 4960
},
{
"grad_norm": 0.5185596942901611,
"learning_rate": 9.938e-05,
"loss": 0.0446,
"step": 4970
},
{
"grad_norm": 0.47808054089546204,
"learning_rate": 9.958e-05,
"loss": 0.0547,
"step": 4980
},
{
"grad_norm": 0.5483494400978088,
"learning_rate": 9.978000000000001e-05,
"loss": 0.0519,
"step": 4990
},
{
"grad_norm": 0.640078604221344,
"learning_rate": 9.998e-05,
"loss": 0.0492,
"step": 5000
},
{
"grad_norm": 0.5359305739402771,
"learning_rate": 9.999999778549045e-05,
"loss": 0.0478,
"step": 5010
},
{
"grad_norm": 0.5365594029426575,
"learning_rate": 9.999999013039593e-05,
"loss": 0.0537,
"step": 5020
},
{
"grad_norm": 0.44698792695999146,
"learning_rate": 9.999997700737766e-05,
"loss": 0.0479,
"step": 5030
},
{
"grad_norm": 0.5172343254089355,
"learning_rate": 9.999995841643709e-05,
"loss": 0.0456,
"step": 5040
},
{
"grad_norm": 0.6057367920875549,
"learning_rate": 9.999993435757623e-05,
"loss": 0.0479,
"step": 5050
},
{
"grad_norm": 0.6013140678405762,
"learning_rate": 9.999990483079773e-05,
"loss": 0.0512,
"step": 5060
},
{
"grad_norm": 0.6480153799057007,
"learning_rate": 9.999986983610481e-05,
"loss": 0.0488,
"step": 5070
},
{
"grad_norm": 0.5187014937400818,
"learning_rate": 9.99998293735013e-05,
"loss": 0.0468,
"step": 5080
},
{
"grad_norm": 0.6135046482086182,
"learning_rate": 9.999978344299161e-05,
"loss": 0.0499,
"step": 5090
},
{
"grad_norm": 0.5284314751625061,
"learning_rate": 9.99997320445808e-05,
"loss": 0.0486,
"step": 5100
},
{
"grad_norm": 0.4867863655090332,
"learning_rate": 9.999967517827444e-05,
"loss": 0.0497,
"step": 5110
},
{
"grad_norm": 0.4951011538505554,
"learning_rate": 9.999961284407879e-05,
"loss": 0.0454,
"step": 5120
},
{
"grad_norm": 0.38466599583625793,
"learning_rate": 9.999954504200067e-05,
"loss": 0.0481,
"step": 5130
},
{
"grad_norm": 0.4279499053955078,
"learning_rate": 9.999947177204744e-05,
"loss": 0.0498,
"step": 5140
},
{
"grad_norm": 0.5256782174110413,
"learning_rate": 9.999939303422718e-05,
"loss": 0.0485,
"step": 5150
},
{
"grad_norm": 0.5838196277618408,
"learning_rate": 9.999930882854847e-05,
"loss": 0.0484,
"step": 5160
},
{
"grad_norm": 0.4046095311641693,
"learning_rate": 9.999921915502051e-05,
"loss": 0.0445,
"step": 5170
},
{
"grad_norm": 0.47813159227371216,
"learning_rate": 9.99991240136531e-05,
"loss": 0.0416,
"step": 5180
},
{
"grad_norm": 0.38682985305786133,
"learning_rate": 9.999902340445668e-05,
"loss": 0.048,
"step": 5190
},
{
"grad_norm": 0.41376394033432007,
"learning_rate": 9.999891732744224e-05,
"loss": 0.0452,
"step": 5200
},
{
"grad_norm": 0.5256609320640564,
"learning_rate": 9.999880578262135e-05,
"loss": 0.0531,
"step": 5210
},
{
"grad_norm": 0.4892725944519043,
"learning_rate": 9.999868877000624e-05,
"loss": 0.055,
"step": 5220
},
{
"grad_norm": 0.4581248164176941,
"learning_rate": 9.99985662896097e-05,
"loss": 0.051,
"step": 5230
},
{
"grad_norm": 0.37346044182777405,
"learning_rate": 9.999843834144513e-05,
"loss": 0.0421,
"step": 5240
},
{
"grad_norm": 0.4182156026363373,
"learning_rate": 9.99983049255265e-05,
"loss": 0.0461,
"step": 5250
},
{
"grad_norm": 0.3648982048034668,
"learning_rate": 9.999816604186843e-05,
"loss": 0.0463,
"step": 5260
},
{
"grad_norm": 0.4943232834339142,
"learning_rate": 9.999802169048609e-05,
"loss": 0.0492,
"step": 5270
},
{
"grad_norm": 0.5068058967590332,
"learning_rate": 9.999787187139527e-05,
"loss": 0.0501,
"step": 5280
},
{
"grad_norm": 0.39948412775993347,
"learning_rate": 9.999771658461234e-05,
"loss": 0.0489,
"step": 5290
},
{
"grad_norm": 0.5398508310317993,
"learning_rate": 9.999755583015431e-05,
"loss": 0.0507,
"step": 5300
},
{
"grad_norm": 0.41905033588409424,
"learning_rate": 9.999738960803874e-05,
"loss": 0.0541,
"step": 5310
},
{
"grad_norm": 0.4965907037258148,
"learning_rate": 9.99972179182838e-05,
"loss": 0.0551,
"step": 5320
},
{
"grad_norm": 0.5021808743476868,
"learning_rate": 9.99970407609083e-05,
"loss": 0.0466,
"step": 5330
},
{
"grad_norm": 0.4862288534641266,
"learning_rate": 9.999685813593159e-05,
"loss": 0.0515,
"step": 5340
},
{
"grad_norm": 0.3867112398147583,
"learning_rate": 9.999667004337362e-05,
"loss": 0.049,
"step": 5350
},
{
"grad_norm": 0.3625693619251251,
"learning_rate": 9.9996476483255e-05,
"loss": 0.0423,
"step": 5360
},
{
"grad_norm": 0.5136606097221375,
"learning_rate": 9.999627745559688e-05,
"loss": 0.0441,
"step": 5370
},
{
"grad_norm": 0.3735745847225189,
"learning_rate": 9.999607296042101e-05,
"loss": 0.0439,
"step": 5380
},
{
"grad_norm": 0.4072813093662262,
"learning_rate": 9.99958629977498e-05,
"loss": 0.0452,
"step": 5390
},
{
"grad_norm": 0.49238812923431396,
"learning_rate": 9.999564756760615e-05,
"loss": 0.0409,
"step": 5400
},
{
"grad_norm": 0.5166733860969543,
"learning_rate": 9.999542667001366e-05,
"loss": 0.0443,
"step": 5410
},
{
"grad_norm": 0.3779316246509552,
"learning_rate": 9.999520030499647e-05,
"loss": 0.0444,
"step": 5420
},
{
"grad_norm": 0.5949298739433289,
"learning_rate": 9.999496847257936e-05,
"loss": 0.0479,
"step": 5430
},
{
"grad_norm": 0.4279668927192688,
"learning_rate": 9.999473117278764e-05,
"loss": 0.0463,
"step": 5440
},
{
"grad_norm": 0.48803475499153137,
"learning_rate": 9.999448840564731e-05,
"loss": 0.0488,
"step": 5450
},
{
"grad_norm": 0.4599801301956177,
"learning_rate": 9.999424017118488e-05,
"loss": 0.0448,
"step": 5460
},
{
"grad_norm": 0.5404091477394104,
"learning_rate": 9.999398646942751e-05,
"loss": 0.0452,
"step": 5470
},
{
"grad_norm": 0.3551173210144043,
"learning_rate": 9.999372730040296e-05,
"loss": 0.0464,
"step": 5480
},
{
"grad_norm": 0.4278314411640167,
"learning_rate": 9.999346266413953e-05,
"loss": 0.0439,
"step": 5490
},
{
"grad_norm": 0.38749974966049194,
"learning_rate": 9.99931925606662e-05,
"loss": 0.0411,
"step": 5500
},
{
"grad_norm": 0.4055821895599365,
"learning_rate": 9.99929169900125e-05,
"loss": 0.0475,
"step": 5510
},
{
"grad_norm": 0.4296989440917969,
"learning_rate": 9.999263595220855e-05,
"loss": 0.0484,
"step": 5520
},
{
"grad_norm": 0.42303040623664856,
"learning_rate": 9.99923494472851e-05,
"loss": 0.0465,
"step": 5530
},
{
"grad_norm": 0.37740418314933777,
"learning_rate": 9.999205747527348e-05,
"loss": 0.0429,
"step": 5540
},
{
"grad_norm": 0.4639616012573242,
"learning_rate": 9.999176003620561e-05,
"loss": 0.0454,
"step": 5550
},
{
"grad_norm": 0.4544775187969208,
"learning_rate": 9.999145713011405e-05,
"loss": 0.0461,
"step": 5560
},
{
"grad_norm": 0.4225658178329468,
"learning_rate": 9.999114875703186e-05,
"loss": 0.0466,
"step": 5570
},
{
"grad_norm": 0.41953185200691223,
"learning_rate": 9.999083491699281e-05,
"loss": 0.0415,
"step": 5580
},
{
"grad_norm": 0.4081561267375946,
"learning_rate": 9.999051561003123e-05,
"loss": 0.0452,
"step": 5590
},
{
"grad_norm": 0.4112689197063446,
"learning_rate": 9.999019083618202e-05,
"loss": 0.0415,
"step": 5600
},
{
"grad_norm": 0.3751114308834076,
"learning_rate": 9.99898605954807e-05,
"loss": 0.0431,
"step": 5610
},
{
"grad_norm": 0.5612055063247681,
"learning_rate": 9.998952488796338e-05,
"loss": 0.0435,
"step": 5620
},
{
"grad_norm": 0.44595974683761597,
"learning_rate": 9.998918371366676e-05,
"loss": 0.0427,
"step": 5630
},
{
"grad_norm": 0.4542756676673889,
"learning_rate": 9.99888370726282e-05,
"loss": 0.0431,
"step": 5640
},
{
"grad_norm": 0.4422266185283661,
"learning_rate": 9.998848496488556e-05,
"loss": 0.0425,
"step": 5650
},
{
"grad_norm": 0.5079705715179443,
"learning_rate": 9.998812739047736e-05,
"loss": 0.0509,
"step": 5660
},
{
"grad_norm": 0.37379220128059387,
"learning_rate": 9.99877643494427e-05,
"loss": 0.0435,
"step": 5670
},
{
"grad_norm": 0.5022728443145752,
"learning_rate": 9.998739584182128e-05,
"loss": 0.045,
"step": 5680
},
{
"grad_norm": 0.34019261598587036,
"learning_rate": 9.998702186765342e-05,
"loss": 0.0396,
"step": 5690
},
{
"grad_norm": 0.429571270942688,
"learning_rate": 9.998664242698e-05,
"loss": 0.0444,
"step": 5700
},
{
"grad_norm": 0.4040437638759613,
"learning_rate": 9.998625751984251e-05,
"loss": 0.0423,
"step": 5710
},
{
"grad_norm": 0.3098539710044861,
"learning_rate": 9.998586714628307e-05,
"loss": 0.0442,
"step": 5720
},
{
"grad_norm": 0.40934598445892334,
"learning_rate": 9.998547130634432e-05,
"loss": 0.0444,
"step": 5730
},
{
"grad_norm": 0.36247605085372925,
"learning_rate": 9.99850700000696e-05,
"loss": 0.046,
"step": 5740
},
{
"grad_norm": 0.3583070635795593,
"learning_rate": 9.998466322750278e-05,
"loss": 0.0446,
"step": 5750
},
{
"grad_norm": 0.4379226863384247,
"learning_rate": 9.998425098868834e-05,
"loss": 0.0421,
"step": 5760
},
{
"grad_norm": 0.44016969203948975,
"learning_rate": 9.998383328367136e-05,
"loss": 0.0451,
"step": 5770
},
{
"grad_norm": 0.36797237396240234,
"learning_rate": 9.99834101124975e-05,
"loss": 0.0432,
"step": 5780
},
{
"grad_norm": 0.5354440808296204,
"learning_rate": 9.998298147521309e-05,
"loss": 0.0471,
"step": 5790
},
{
"grad_norm": 0.42204543948173523,
"learning_rate": 9.998254737186496e-05,
"loss": 0.0437,
"step": 5800
},
{
"grad_norm": 0.37009891867637634,
"learning_rate": 9.99821078025006e-05,
"loss": 0.0418,
"step": 5810
},
{
"grad_norm": 0.3839789032936096,
"learning_rate": 9.998166276716807e-05,
"loss": 0.0461,
"step": 5820
},
{
"grad_norm": 0.3813186585903168,
"learning_rate": 9.998121226591606e-05,
"loss": 0.0453,
"step": 5830
},
{
"grad_norm": 0.41642969846725464,
"learning_rate": 9.998075629879382e-05,
"loss": 0.046,
"step": 5840
},
{
"grad_norm": 0.44692158699035645,
"learning_rate": 9.99802948658512e-05,
"loss": 0.0465,
"step": 5850
},
{
"grad_norm": 0.4168972373008728,
"learning_rate": 9.99798279671387e-05,
"loss": 0.0481,
"step": 5860
},
{
"grad_norm": 0.5036101341247559,
"learning_rate": 9.997935560270734e-05,
"loss": 0.0493,
"step": 5870
},
{
"grad_norm": 0.37841522693634033,
"learning_rate": 9.997887777260879e-05,
"loss": 0.0455,
"step": 5880
},
{
"grad_norm": 0.32745361328125,
"learning_rate": 9.997839447689532e-05,
"loss": 0.0411,
"step": 5890
},
{
"grad_norm": 0.6070501804351807,
"learning_rate": 9.997790571561978e-05,
"loss": 0.045,
"step": 5900
},
{
"grad_norm": 0.4841315448284149,
"learning_rate": 9.99774114888356e-05,
"loss": 0.0476,
"step": 5910
},
{
"grad_norm": 0.36092159152030945,
"learning_rate": 9.997691179659684e-05,
"loss": 0.0423,
"step": 5920
},
{
"grad_norm": 0.5659105777740479,
"learning_rate": 9.997640663895815e-05,
"loss": 0.0397,
"step": 5930
},
{
"grad_norm": 0.42008090019226074,
"learning_rate": 9.997589601597477e-05,
"loss": 0.0459,
"step": 5940
},
{
"grad_norm": 0.39080119132995605,
"learning_rate": 9.997537992770252e-05,
"loss": 0.0415,
"step": 5950
},
{
"grad_norm": 0.4096440374851227,
"learning_rate": 9.997485837419788e-05,
"loss": 0.0391,
"step": 5960
},
{
"grad_norm": 0.40421029925346375,
"learning_rate": 9.997433135551786e-05,
"loss": 0.0434,
"step": 5970
},
{
"grad_norm": 0.3745020925998688,
"learning_rate": 9.997379887172009e-05,
"loss": 0.0416,
"step": 5980
},
{
"grad_norm": 0.3250255584716797,
"learning_rate": 9.997326092286281e-05,
"loss": 0.0448,
"step": 5990
},
{
"grad_norm": 0.4551812708377838,
"learning_rate": 9.997271750900486e-05,
"loss": 0.0459,
"step": 6000
},
{
"grad_norm": 0.41356295347213745,
"learning_rate": 9.997216863020565e-05,
"loss": 0.0415,
"step": 6010
},
{
"grad_norm": 0.4534608721733093,
"learning_rate": 9.99716142865252e-05,
"loss": 0.0406,
"step": 6020
},
{
"grad_norm": 0.42121708393096924,
"learning_rate": 9.997105447802415e-05,
"loss": 0.0396,
"step": 6030
},
{
"grad_norm": 0.30873870849609375,
"learning_rate": 9.997048920476373e-05,
"loss": 0.0425,
"step": 6040
},
{
"grad_norm": 0.353882372379303,
"learning_rate": 9.996991846680572e-05,
"loss": 0.0438,
"step": 6050
},
{
"grad_norm": 0.3697453439235687,
"learning_rate": 9.996934226421257e-05,
"loss": 0.041,
"step": 6060
},
{
"grad_norm": 0.3074539005756378,
"learning_rate": 9.996876059704726e-05,
"loss": 0.0425,
"step": 6070
},
{
"grad_norm": 0.44980335235595703,
"learning_rate": 9.996817346537343e-05,
"loss": 0.0469,
"step": 6080
},
{
"grad_norm": 0.4123890697956085,
"learning_rate": 9.996758086925526e-05,
"loss": 0.0383,
"step": 6090
},
{
"grad_norm": 0.5198633074760437,
"learning_rate": 9.996698280875759e-05,
"loss": 0.0466,
"step": 6100
},
{
"grad_norm": 0.3925876319408417,
"learning_rate": 9.99663792839458e-05,
"loss": 0.043,
"step": 6110
},
{
"grad_norm": 0.46037840843200684,
"learning_rate": 9.99657702948859e-05,
"loss": 0.0502,
"step": 6120
},
{
"grad_norm": 0.3833174705505371,
"learning_rate": 9.996515584164448e-05,
"loss": 0.0428,
"step": 6130
},
{
"grad_norm": 0.4500272572040558,
"learning_rate": 9.996453592428873e-05,
"loss": 0.0433,
"step": 6140
},
{
"grad_norm": 0.5032421350479126,
"learning_rate": 9.996391054288646e-05,
"loss": 0.0468,
"step": 6150
},
{
"grad_norm": 0.5007117986679077,
"learning_rate": 9.996327969750605e-05,
"loss": 0.0425,
"step": 6160
},
{
"grad_norm": 0.4883004128932953,
"learning_rate": 9.996264338821649e-05,
"loss": 0.0464,
"step": 6170
},
{
"grad_norm": 0.43868330121040344,
"learning_rate": 9.996200161508735e-05,
"loss": 0.043,
"step": 6180
},
{
"grad_norm": 0.39309653639793396,
"learning_rate": 9.996135437818885e-05,
"loss": 0.0401,
"step": 6190
},
{
"grad_norm": 0.391730397939682,
"learning_rate": 9.996070167759175e-05,
"loss": 0.0423,
"step": 6200
},
{
"grad_norm": 0.45517247915267944,
"learning_rate": 9.996004351336743e-05,
"loss": 0.0434,
"step": 6210
},
{
"grad_norm": 0.4464564621448517,
"learning_rate": 9.995937988558785e-05,
"loss": 0.0466,
"step": 6220
},
{
"grad_norm": 0.4495352804660797,
"learning_rate": 9.995871079432561e-05,
"loss": 0.0432,
"step": 6230
},
{
"grad_norm": 0.45086175203323364,
"learning_rate": 9.995803623965389e-05,
"loss": 0.0415,
"step": 6240
},
{
"grad_norm": 0.37620794773101807,
"learning_rate": 9.995735622164641e-05,
"loss": 0.0448,
"step": 6250
},
{
"grad_norm": 0.42657265067100525,
"learning_rate": 9.995667074037758e-05,
"loss": 0.0432,
"step": 6260
},
{
"grad_norm": 0.3334987163543701,
"learning_rate": 9.995597979592232e-05,
"loss": 0.0379,
"step": 6270
},
{
"grad_norm": 0.44120296835899353,
"learning_rate": 9.995528338835625e-05,
"loss": 0.0416,
"step": 6280
},
{
"grad_norm": 0.32200494408607483,
"learning_rate": 9.995458151775547e-05,
"loss": 0.0378,
"step": 6290
},
{
"grad_norm": 0.3396989703178406,
"learning_rate": 9.995387418419677e-05,
"loss": 0.0466,
"step": 6300
},
{
"grad_norm": 0.42114686965942383,
"learning_rate": 9.99531613877575e-05,
"loss": 0.0425,
"step": 6310
},
{
"grad_norm": 0.474528044462204,
"learning_rate": 9.995244312851559e-05,
"loss": 0.041,
"step": 6320
},
{
"grad_norm": 0.46179601550102234,
"learning_rate": 9.995171940654961e-05,
"loss": 0.0438,
"step": 6330
},
{
"grad_norm": 0.3656163513660431,
"learning_rate": 9.995099022193871e-05,
"loss": 0.0433,
"step": 6340
},
{
"grad_norm": 0.4622880220413208,
"learning_rate": 9.995025557476261e-05,
"loss": 0.0438,
"step": 6350
},
{
"grad_norm": 0.3380992114543915,
"learning_rate": 9.994951546510165e-05,
"loss": 0.0455,
"step": 6360
},
{
"grad_norm": 0.4336497485637665,
"learning_rate": 9.994876989303679e-05,
"loss": 0.0434,
"step": 6370
},
{
"grad_norm": 0.4440676271915436,
"learning_rate": 9.994801885864955e-05,
"loss": 0.0469,
"step": 6380
},
{
"grad_norm": 0.40563204884529114,
"learning_rate": 9.994726236202205e-05,
"loss": 0.0437,
"step": 6390
},
{
"grad_norm": 0.4887060821056366,
"learning_rate": 9.994650040323704e-05,
"loss": 0.039,
"step": 6400
},
{
"grad_norm": 0.3582417964935303,
"learning_rate": 9.994573298237784e-05,
"loss": 0.0461,
"step": 6410
},
{
"grad_norm": 0.389259397983551,
"learning_rate": 9.994496009952837e-05,
"loss": 0.0454,
"step": 6420
},
{
"grad_norm": 0.4717545211315155,
"learning_rate": 9.994418175477316e-05,
"loss": 0.0409,
"step": 6430
},
{
"grad_norm": 0.4662460386753082,
"learning_rate": 9.994339794819733e-05,
"loss": 0.0395,
"step": 6440
},
{
"grad_norm": 0.4239952266216278,
"learning_rate": 9.994260867988658e-05,
"loss": 0.0421,
"step": 6450
},
{
"grad_norm": 0.3706813454627991,
"learning_rate": 9.994181394992723e-05,
"loss": 0.0409,
"step": 6460
},
{
"grad_norm": 0.311974436044693,
"learning_rate": 9.994101375840618e-05,
"loss": 0.0409,
"step": 6470
},
{
"grad_norm": 0.4108457565307617,
"learning_rate": 9.994020810541098e-05,
"loss": 0.0427,
"step": 6480
},
{
"grad_norm": 0.5299150347709656,
"learning_rate": 9.99393969910297e-05,
"loss": 0.0455,
"step": 6490
},
{
"grad_norm": 0.4159790873527527,
"learning_rate": 9.993858041535104e-05,
"loss": 0.0395,
"step": 6500
},
{
"grad_norm": 0.37181058526039124,
"learning_rate": 9.99377583784643e-05,
"loss": 0.0404,
"step": 6510
},
{
"grad_norm": 0.4369353950023651,
"learning_rate": 9.993693088045939e-05,
"loss": 0.04,
"step": 6520
},
{
"grad_norm": 0.31191983819007874,
"learning_rate": 9.99360979214268e-05,
"loss": 0.0392,
"step": 6530
},
{
"grad_norm": 0.41286006569862366,
"learning_rate": 9.99352595014576e-05,
"loss": 0.0401,
"step": 6540
},
{
"grad_norm": 0.5197806358337402,
"learning_rate": 9.993441562064354e-05,
"loss": 0.0429,
"step": 6550
},
{
"grad_norm": 0.4494584798812866,
"learning_rate": 9.993356627907685e-05,
"loss": 0.0439,
"step": 6560
},
{
"grad_norm": 0.3176094591617584,
"learning_rate": 9.99327114768504e-05,
"loss": 0.0449,
"step": 6570
},
{
"grad_norm": 0.3514400124549866,
"learning_rate": 9.99318512140577e-05,
"loss": 0.0378,
"step": 6580
},
{
"grad_norm": 0.3636268377304077,
"learning_rate": 9.993098549079284e-05,
"loss": 0.0407,
"step": 6590
},
{
"grad_norm": 0.3437356650829315,
"learning_rate": 9.993011430715047e-05,
"loss": 0.0458,
"step": 6600
},
{
"grad_norm": 0.3289712369441986,
"learning_rate": 9.992923766322586e-05,
"loss": 0.0434,
"step": 6610
},
{
"grad_norm": 0.4166223406791687,
"learning_rate": 9.99283555591149e-05,
"loss": 0.0348,
"step": 6620
},
{
"grad_norm": 0.48521310091018677,
"learning_rate": 9.992746799491404e-05,
"loss": 0.0458,
"step": 6630
},
{
"grad_norm": 0.3943791091442108,
"learning_rate": 9.992657497072033e-05,
"loss": 0.0432,
"step": 6640
},
{
"grad_norm": 0.4498193562030792,
"learning_rate": 9.992567648663147e-05,
"loss": 0.0384,
"step": 6650
},
{
"grad_norm": 0.37011364102363586,
"learning_rate": 9.992477254274568e-05,
"loss": 0.0437,
"step": 6660
},
{
"grad_norm": 0.39060381054878235,
"learning_rate": 9.992386313916183e-05,
"loss": 0.0447,
"step": 6670
},
{
"grad_norm": 0.3888266682624817,
"learning_rate": 9.992294827597934e-05,
"loss": 0.043,
"step": 6680
},
{
"grad_norm": 0.4197387099266052,
"learning_rate": 9.992202795329831e-05,
"loss": 0.0415,
"step": 6690
},
{
"grad_norm": 0.37308135628700256,
"learning_rate": 9.992110217121936e-05,
"loss": 0.0419,
"step": 6700
},
{
"grad_norm": 0.4515061378479004,
"learning_rate": 9.992017092984372e-05,
"loss": 0.0418,
"step": 6710
},
{
"grad_norm": 0.380657821893692,
"learning_rate": 9.991923422927326e-05,
"loss": 0.0416,
"step": 6720
},
{
"grad_norm": 0.34842175245285034,
"learning_rate": 9.991829206961037e-05,
"loss": 0.0394,
"step": 6730
},
{
"grad_norm": 0.4592163562774658,
"learning_rate": 9.991734445095813e-05,
"loss": 0.0411,
"step": 6740
},
{
"grad_norm": 0.35680124163627625,
"learning_rate": 9.991639137342015e-05,
"loss": 0.0383,
"step": 6750
},
{
"grad_norm": 0.408777117729187,
"learning_rate": 9.991543283710064e-05,
"loss": 0.0389,
"step": 6760
},
{
"grad_norm": 0.4205833077430725,
"learning_rate": 9.991446884210445e-05,
"loss": 0.0379,
"step": 6770
},
{
"grad_norm": 0.3167155385017395,
"learning_rate": 9.9913499388537e-05,
"loss": 0.0386,
"step": 6780
},
{
"grad_norm": 0.3687867522239685,
"learning_rate": 9.99125244765043e-05,
"loss": 0.0389,
"step": 6790
},
{
"grad_norm": 0.43708962202072144,
"learning_rate": 9.991154410611296e-05,
"loss": 0.0385,
"step": 6800
},
{
"grad_norm": 0.4002821743488312,
"learning_rate": 9.99105582774702e-05,
"loss": 0.0385,
"step": 6810
},
{
"grad_norm": 0.25123393535614014,
"learning_rate": 9.990956699068384e-05,
"loss": 0.04,
"step": 6820
},
{
"grad_norm": 0.33417415618896484,
"learning_rate": 9.990857024586224e-05,
"loss": 0.0389,
"step": 6830
},
{
"grad_norm": 0.36421480774879456,
"learning_rate": 9.990756804311446e-05,
"loss": 0.0389,
"step": 6840
},
{
"grad_norm": 0.4153655469417572,
"learning_rate": 9.990656038255006e-05,
"loss": 0.0422,
"step": 6850
},
{
"grad_norm": 0.3924720883369446,
"learning_rate": 9.990554726427926e-05,
"loss": 0.0441,
"step": 6860
},
{
"grad_norm": 0.26164504885673523,
"learning_rate": 9.990452868841284e-05,
"loss": 0.0374,
"step": 6870
},
{
"grad_norm": 0.3427872955799103,
"learning_rate": 9.99035046550622e-05,
"loss": 0.0431,
"step": 6880
},
{
"grad_norm": 0.3402078151702881,
"learning_rate": 9.99024751643393e-05,
"loss": 0.038,
"step": 6890
},
{
"grad_norm": 0.441245973110199,
"learning_rate": 9.990144021635677e-05,
"loss": 0.0382,
"step": 6900
},
{
"grad_norm": 0.5558918714523315,
"learning_rate": 9.990039981122775e-05,
"loss": 0.0391,
"step": 6910
},
{
"grad_norm": 0.3786897659301758,
"learning_rate": 9.989935394906602e-05,
"loss": 0.0416,
"step": 6920
},
{
"grad_norm": 0.4389326870441437,
"learning_rate": 9.989830262998598e-05,
"loss": 0.0405,
"step": 6930
},
{
"grad_norm": 0.3421044647693634,
"learning_rate": 9.989724585410259e-05,
"loss": 0.0387,
"step": 6940
},
{
"grad_norm": 0.4314367473125458,
"learning_rate": 9.989618362153139e-05,
"loss": 0.0367,
"step": 6950
},
{
"grad_norm": 0.4381873309612274,
"learning_rate": 9.989511593238859e-05,
"loss": 0.0407,
"step": 6960
},
{
"grad_norm": 0.317180335521698,
"learning_rate": 9.98940427867909e-05,
"loss": 0.0372,
"step": 6970
},
{
"grad_norm": 0.4186713695526123,
"learning_rate": 9.989296418485573e-05,
"loss": 0.0402,
"step": 6980
},
{
"grad_norm": 0.3646087050437927,
"learning_rate": 9.989188012670101e-05,
"loss": 0.0384,
"step": 6990
},
{
"grad_norm": 0.3898526430130005,
"learning_rate": 9.989079061244528e-05,
"loss": 0.0381,
"step": 7000
},
{
"grad_norm": 0.33809134364128113,
"learning_rate": 9.988969564220769e-05,
"loss": 0.0417,
"step": 7010
},
{
"grad_norm": 0.3403474986553192,
"learning_rate": 9.988859521610801e-05,
"loss": 0.04,
"step": 7020
},
{
"grad_norm": 0.33483290672302246,
"learning_rate": 9.988748933426656e-05,
"loss": 0.0357,
"step": 7030
},
{
"grad_norm": 0.4800526201725006,
"learning_rate": 9.988637799680428e-05,
"loss": 0.0388,
"step": 7040
},
{
"grad_norm": 0.4275050759315491,
"learning_rate": 9.98852612038427e-05,
"loss": 0.0383,
"step": 7050
},
{
"grad_norm": 0.3904567062854767,
"learning_rate": 9.988413895550397e-05,
"loss": 0.04,
"step": 7060
},
{
"grad_norm": 0.3881678581237793,
"learning_rate": 9.98830112519108e-05,
"loss": 0.0361,
"step": 7070
},
{
"grad_norm": 0.33301061391830444,
"learning_rate": 9.98818780931865e-05,
"loss": 0.0378,
"step": 7080
},
{
"grad_norm": 0.35649630427360535,
"learning_rate": 9.988073947945502e-05,
"loss": 0.039,
"step": 7090
},
{
"grad_norm": 0.29644250869750977,
"learning_rate": 9.987959541084087e-05,
"loss": 0.0332,
"step": 7100
},
{
"grad_norm": 0.34819456934928894,
"learning_rate": 9.987844588746915e-05,
"loss": 0.0385,
"step": 7110
},
{
"grad_norm": 0.35695385932922363,
"learning_rate": 9.987729090946558e-05,
"loss": 0.0385,
"step": 7120
},
{
"grad_norm": 0.3464387357234955,
"learning_rate": 9.987613047695647e-05,
"loss": 0.0336,
"step": 7130
},
{
"grad_norm": 0.4068809151649475,
"learning_rate": 9.987496459006871e-05,
"loss": 0.0396,
"step": 7140
},
{
"grad_norm": 0.3142383396625519,
"learning_rate": 9.987379324892982e-05,
"loss": 0.0344,
"step": 7150
},
{
"grad_norm": 0.5131341218948364,
"learning_rate": 9.987261645366788e-05,
"loss": 0.0361,
"step": 7160
},
{
"grad_norm": 0.33127930760383606,
"learning_rate": 9.987143420441158e-05,
"loss": 0.0347,
"step": 7170
},
{
"grad_norm": 0.2790936827659607,
"learning_rate": 9.987024650129022e-05,
"loss": 0.0388,
"step": 7180
},
{
"grad_norm": 0.4384543001651764,
"learning_rate": 9.986905334443368e-05,
"loss": 0.0365,
"step": 7190
},
{
"grad_norm": 0.35652369260787964,
"learning_rate": 9.986785473397245e-05,
"loss": 0.0385,
"step": 7200
},
{
"grad_norm": 0.29669007658958435,
"learning_rate": 9.98666506700376e-05,
"loss": 0.0389,
"step": 7210
},
{
"grad_norm": 0.3526216745376587,
"learning_rate": 9.986544115276081e-05,
"loss": 0.0387,
"step": 7220
},
{
"grad_norm": 0.34248629212379456,
"learning_rate": 9.986422618227433e-05,
"loss": 0.0401,
"step": 7230
},
{
"grad_norm": 0.36855003237724304,
"learning_rate": 9.986300575871106e-05,
"loss": 0.0343,
"step": 7240
},
{
"grad_norm": 0.4254695177078247,
"learning_rate": 9.986177988220444e-05,
"loss": 0.035,
"step": 7250
},
{
"grad_norm": 0.28067904710769653,
"learning_rate": 9.986054855288856e-05,
"loss": 0.0353,
"step": 7260
},
{
"grad_norm": 0.39105573296546936,
"learning_rate": 9.985931177089802e-05,
"loss": 0.0361,
"step": 7270
},
{
"grad_norm": 0.32079729437828064,
"learning_rate": 9.985806953636814e-05,
"loss": 0.032,
"step": 7280
},
{
"grad_norm": 0.318202406167984,
"learning_rate": 9.985682184943471e-05,
"loss": 0.0388,
"step": 7290
},
{
"grad_norm": 0.34409984946250916,
"learning_rate": 9.98555687102342e-05,
"loss": 0.0369,
"step": 7300
},
{
"grad_norm": 0.3259167969226837,
"learning_rate": 9.985431011890367e-05,
"loss": 0.0337,
"step": 7310
},
{
"grad_norm": 0.36389780044555664,
"learning_rate": 9.985304607558075e-05,
"loss": 0.0372,
"step": 7320
},
{
"grad_norm": 0.44165968894958496,
"learning_rate": 9.985177658040364e-05,
"loss": 0.041,
"step": 7330
},
{
"grad_norm": 0.3812031149864197,
"learning_rate": 9.985050163351119e-05,
"loss": 0.0391,
"step": 7340
},
{
"grad_norm": 0.411511093378067,
"learning_rate": 9.984922123504286e-05,
"loss": 0.0412,
"step": 7350
},
{
"grad_norm": 0.3777042031288147,
"learning_rate": 9.984793538513862e-05,
"loss": 0.0354,
"step": 7360
},
{
"grad_norm": 0.36922356486320496,
"learning_rate": 9.984664408393912e-05,
"loss": 0.0354,
"step": 7370
},
{
"grad_norm": 0.384548544883728,
"learning_rate": 9.984534733158556e-05,
"loss": 0.0361,
"step": 7380
},
{
"grad_norm": 0.3720257878303528,
"learning_rate": 9.984404512821977e-05,
"loss": 0.0386,
"step": 7390
},
{
"grad_norm": 0.41220420598983765,
"learning_rate": 9.984273747398411e-05,
"loss": 0.0366,
"step": 7400
},
{
"grad_norm": 0.40043506026268005,
"learning_rate": 9.984142436902165e-05,
"loss": 0.041,
"step": 7410
},
{
"grad_norm": 0.3745039701461792,
"learning_rate": 9.984010581347596e-05,
"loss": 0.0379,
"step": 7420
},
{
"grad_norm": 0.37869614362716675,
"learning_rate": 9.983878180749121e-05,
"loss": 0.0446,
"step": 7430
},
{
"grad_norm": 0.38546255230903625,
"learning_rate": 9.983745235121222e-05,
"loss": 0.0402,
"step": 7440
},
{
"grad_norm": 0.39511385560035706,
"learning_rate": 9.983611744478438e-05,
"loss": 0.0393,
"step": 7450
},
{
"grad_norm": 0.37394529581069946,
"learning_rate": 9.983477708835365e-05,
"loss": 0.0401,
"step": 7460
},
{
"grad_norm": 0.3911571204662323,
"learning_rate": 9.983343128206664e-05,
"loss": 0.0427,
"step": 7470
},
{
"grad_norm": 0.39284974336624146,
"learning_rate": 9.983208002607049e-05,
"loss": 0.0382,
"step": 7480
},
{
"grad_norm": 0.35614168643951416,
"learning_rate": 9.9830723320513e-05,
"loss": 0.038,
"step": 7490
},
{
"grad_norm": 0.24101810157299042,
"learning_rate": 9.982936116554254e-05,
"loss": 0.0384,
"step": 7500
},
{
"grad_norm": 0.3587195575237274,
"learning_rate": 9.982799356130803e-05,
"loss": 0.0343,
"step": 7510
},
{
"grad_norm": 0.32080957293510437,
"learning_rate": 9.982662050795908e-05,
"loss": 0.033,
"step": 7520
},
{
"grad_norm": 0.4107617437839508,
"learning_rate": 9.982524200564583e-05,
"loss": 0.0386,
"step": 7530
},
{
"grad_norm": 0.29533591866493225,
"learning_rate": 9.982385805451901e-05,
"loss": 0.0375,
"step": 7540
},
{
"grad_norm": 0.30470678210258484,
"learning_rate": 9.982246865472998e-05,
"loss": 0.037,
"step": 7550
},
{
"grad_norm": 0.27039673924446106,
"learning_rate": 9.982107380643069e-05,
"loss": 0.0391,
"step": 7560
},
{
"grad_norm": 0.3668907582759857,
"learning_rate": 9.981967350977368e-05,
"loss": 0.0332,
"step": 7570
},
{
"grad_norm": 0.3666737377643585,
"learning_rate": 9.981826776491208e-05,
"loss": 0.0375,
"step": 7580
},
{
"grad_norm": 0.2968786358833313,
"learning_rate": 9.98168565719996e-05,
"loss": 0.0314,
"step": 7590
},
{
"grad_norm": 0.32582610845565796,
"learning_rate": 9.98154399311906e-05,
"loss": 0.0342,
"step": 7600
},
{
"grad_norm": 0.39996621012687683,
"learning_rate": 9.981401784263997e-05,
"loss": 0.0355,
"step": 7610
},
{
"grad_norm": 0.43698522448539734,
"learning_rate": 9.981259030650326e-05,
"loss": 0.0395,
"step": 7620
},
{
"grad_norm": 0.3867236077785492,
"learning_rate": 9.981115732293655e-05,
"loss": 0.0346,
"step": 7630
},
{
"grad_norm": 0.37524399161338806,
"learning_rate": 9.980971889209659e-05,
"loss": 0.043,
"step": 7640
},
{
"grad_norm": 0.29783526062965393,
"learning_rate": 9.980827501414064e-05,
"loss": 0.0403,
"step": 7650
},
{
"grad_norm": 0.311921626329422,
"learning_rate": 9.980682568922663e-05,
"loss": 0.0316,
"step": 7660
},
{
"grad_norm": 0.287200003862381,
"learning_rate": 9.980537091751304e-05,
"loss": 0.0346,
"step": 7670
},
{
"grad_norm": 0.36688342690467834,
"learning_rate": 9.980391069915897e-05,
"loss": 0.0366,
"step": 7680
},
{
"grad_norm": 0.3305376470088959,
"learning_rate": 9.98024450343241e-05,
"loss": 0.0383,
"step": 7690
},
{
"grad_norm": 0.24818134307861328,
"learning_rate": 9.980097392316872e-05,
"loss": 0.0377,
"step": 7700
},
{
"grad_norm": 0.323808878660202,
"learning_rate": 9.97994973658537e-05,
"loss": 0.0384,
"step": 7710
},
{
"grad_norm": 0.3871549963951111,
"learning_rate": 9.979801536254054e-05,
"loss": 0.0311,
"step": 7720
},
{
"grad_norm": 0.34450316429138184,
"learning_rate": 9.979652791339127e-05,
"loss": 0.0367,
"step": 7730
},
{
"grad_norm": 0.3580043911933899,
"learning_rate": 9.97950350185686e-05,
"loss": 0.0344,
"step": 7740
},
{
"grad_norm": 0.3398709297180176,
"learning_rate": 9.979353667823574e-05,
"loss": 0.036,
"step": 7750
},
{
"grad_norm": 0.3904750943183899,
"learning_rate": 9.979203289255658e-05,
"loss": 0.0359,
"step": 7760
},
{
"grad_norm": 0.3727996051311493,
"learning_rate": 9.979052366169557e-05,
"loss": 0.0339,
"step": 7770
},
{
"grad_norm": 0.30604737997055054,
"learning_rate": 9.978900898581775e-05,
"loss": 0.0361,
"step": 7780
},
{
"grad_norm": 0.443472295999527,
"learning_rate": 9.978748886508875e-05,
"loss": 0.032,
"step": 7790
},
{
"grad_norm": 0.34036096930503845,
"learning_rate": 9.978596329967484e-05,
"loss": 0.0397,
"step": 7800
},
{
"grad_norm": 0.33206358551979065,
"learning_rate": 9.978443228974284e-05,
"loss": 0.0382,
"step": 7810
},
{
"grad_norm": 0.3829366862773895,
"learning_rate": 9.978289583546015e-05,
"loss": 0.0356,
"step": 7820
},
{
"grad_norm": 0.44401052594184875,
"learning_rate": 9.978135393699484e-05,
"loss": 0.0332,
"step": 7830
},
{
"grad_norm": 0.41893044114112854,
"learning_rate": 9.977980659451548e-05,
"loss": 0.036,
"step": 7840
},
{
"grad_norm": 0.35988131165504456,
"learning_rate": 9.977825380819135e-05,
"loss": 0.0312,
"step": 7850
},
{
"grad_norm": 0.41945135593414307,
"learning_rate": 9.97766955781922e-05,
"loss": 0.0346,
"step": 7860
},
{
"grad_norm": 0.2597813606262207,
"learning_rate": 9.977513190468848e-05,
"loss": 0.033,
"step": 7870
},
{
"grad_norm": 0.3056352734565735,
"learning_rate": 9.977356278785116e-05,
"loss": 0.0373,
"step": 7880
},
{
"grad_norm": 0.24130724370479584,
"learning_rate": 9.977198822785184e-05,
"loss": 0.0326,
"step": 7890
},
{
"grad_norm": 0.22455158829689026,
"learning_rate": 9.977040822486273e-05,
"loss": 0.0335,
"step": 7900
},
{
"grad_norm": 0.3280586302280426,
"learning_rate": 9.97688227790566e-05,
"loss": 0.0388,
"step": 7910
},
{
"grad_norm": 0.3441072702407837,
"learning_rate": 9.976723189060684e-05,
"loss": 0.0338,
"step": 7920
},
{
"grad_norm": 0.3744092583656311,
"learning_rate": 9.976563555968742e-05,
"loss": 0.0359,
"step": 7930
},
{
"grad_norm": 0.25684285163879395,
"learning_rate": 9.976403378647292e-05,
"loss": 0.0374,
"step": 7940
},
{
"grad_norm": 0.27023550868034363,
"learning_rate": 9.97624265711385e-05,
"loss": 0.0312,
"step": 7950
},
{
"grad_norm": 0.3883087635040283,
"learning_rate": 9.976081391385993e-05,
"loss": 0.0361,
"step": 7960
},
{
"grad_norm": 0.26038286089897156,
"learning_rate": 9.975919581481356e-05,
"loss": 0.0336,
"step": 7970
},
{
"grad_norm": 0.2714274227619171,
"learning_rate": 9.975757227417634e-05,
"loss": 0.0348,
"step": 7980
},
{
"grad_norm": 0.3606083393096924,
"learning_rate": 9.975594329212586e-05,
"loss": 0.0335,
"step": 7990
},
{
"grad_norm": 0.32764312624931335,
"learning_rate": 9.97543088688402e-05,
"loss": 0.0317,
"step": 8000
},
{
"grad_norm": 0.41286933422088623,
"learning_rate": 9.975266900449814e-05,
"loss": 0.0353,
"step": 8010
},
{
"grad_norm": 0.39461225271224976,
"learning_rate": 9.975102369927898e-05,
"loss": 0.0386,
"step": 8020
},
{
"grad_norm": 0.37906038761138916,
"learning_rate": 9.974937295336269e-05,
"loss": 0.0325,
"step": 8030
},
{
"grad_norm": 0.41981109976768494,
"learning_rate": 9.974771676692975e-05,
"loss": 0.0385,
"step": 8040
},
{
"grad_norm": 0.3024406433105469,
"learning_rate": 9.974605514016131e-05,
"loss": 0.0328,
"step": 8050
},
{
"grad_norm": 0.31535062193870544,
"learning_rate": 9.974438807323907e-05,
"loss": 0.0302,
"step": 8060
},
{
"grad_norm": 0.3659132421016693,
"learning_rate": 9.974271556634535e-05,
"loss": 0.0334,
"step": 8070
},
{
"grad_norm": 0.3096853792667389,
"learning_rate": 9.974103761966302e-05,
"loss": 0.0331,
"step": 8080
},
{
"grad_norm": 0.3210420310497284,
"learning_rate": 9.973935423337563e-05,
"loss": 0.0317,
"step": 8090
},
{
"grad_norm": 0.3665407598018646,
"learning_rate": 9.973766540766722e-05,
"loss": 0.0339,
"step": 8100
},
{
"grad_norm": 0.3262102007865906,
"learning_rate": 9.97359711427225e-05,
"loss": 0.037,
"step": 8110
},
{
"grad_norm": 0.33875203132629395,
"learning_rate": 9.973427143872677e-05,
"loss": 0.0361,
"step": 8120
},
{
"grad_norm": 0.33026519417762756,
"learning_rate": 9.973256629586589e-05,
"loss": 0.0323,
"step": 8130
},
{
"grad_norm": 0.3862609267234802,
"learning_rate": 9.973085571432632e-05,
"loss": 0.0366,
"step": 8140
},
{
"grad_norm": 0.3992314636707306,
"learning_rate": 9.972913969429513e-05,
"loss": 0.035,
"step": 8150
},
{
"grad_norm": 0.3993546962738037,
"learning_rate": 9.972741823596e-05,
"loss": 0.0344,
"step": 8160
},
{
"grad_norm": 0.38570424914360046,
"learning_rate": 9.972569133950917e-05,
"loss": 0.0351,
"step": 8170
},
{
"grad_norm": 0.29162174463272095,
"learning_rate": 9.972395900513151e-05,
"loss": 0.0344,
"step": 8180
},
{
"grad_norm": 0.3276876211166382,
"learning_rate": 9.972222123301645e-05,
"loss": 0.0333,
"step": 8190
},
{
"grad_norm": 0.3507404029369354,
"learning_rate": 9.972047802335403e-05,
"loss": 0.0307,
"step": 8200
},
{
"grad_norm": 0.32428833842277527,
"learning_rate": 9.971872937633488e-05,
"loss": 0.0345,
"step": 8210
},
{
"grad_norm": 0.3443835377693176,
"learning_rate": 9.971697529215024e-05,
"loss": 0.0325,
"step": 8220
},
{
"grad_norm": 0.3595477044582367,
"learning_rate": 9.971521577099192e-05,
"loss": 0.0326,
"step": 8230
},
{
"grad_norm": 0.3238294720649719,
"learning_rate": 9.971345081305236e-05,
"loss": 0.0294,
"step": 8240
},
{
"grad_norm": 0.29126474261283875,
"learning_rate": 9.971168041852456e-05,
"loss": 0.0315,
"step": 8250
},
{
"grad_norm": 0.29169949889183044,
"learning_rate": 9.970990458760215e-05,
"loss": 0.0345,
"step": 8260
},
{
"grad_norm": 0.3826935887336731,
"learning_rate": 9.970812332047929e-05,
"loss": 0.0365,
"step": 8270
},
{
"grad_norm": 0.30441099405288696,
"learning_rate": 9.97063366173508e-05,
"loss": 0.0358,
"step": 8280
},
{
"grad_norm": 0.37987688183784485,
"learning_rate": 9.970454447841207e-05,
"loss": 0.0335,
"step": 8290
},
{
"grad_norm": 0.31489792466163635,
"learning_rate": 9.970274690385909e-05,
"loss": 0.0302,
"step": 8300
},
{
"grad_norm": 0.3352809250354767,
"learning_rate": 9.970094389388844e-05,
"loss": 0.0357,
"step": 8310
},
{
"grad_norm": 0.2745957672595978,
"learning_rate": 9.969913544869728e-05,
"loss": 0.0333,
"step": 8320
},
{
"grad_norm": 0.3647013008594513,
"learning_rate": 9.96973215684834e-05,
"loss": 0.031,
"step": 8330
},
{
"grad_norm": 0.3371700644493103,
"learning_rate": 9.969550225344513e-05,
"loss": 0.032,
"step": 8340
},
{
"grad_norm": 0.3446926176548004,
"learning_rate": 9.969367750378147e-05,
"loss": 0.0308,
"step": 8350
},
{
"grad_norm": 0.2754344642162323,
"learning_rate": 9.969184731969194e-05,
"loss": 0.029,
"step": 8360
},
{
"grad_norm": 0.3323659300804138,
"learning_rate": 9.96900117013767e-05,
"loss": 0.0313,
"step": 8370
},
{
"grad_norm": 0.4204343557357788,
"learning_rate": 9.96881706490365e-05,
"loss": 0.0328,
"step": 8380
},
{
"grad_norm": 0.3801116943359375,
"learning_rate": 9.968632416287265e-05,
"loss": 0.0323,
"step": 8390
},
{
"grad_norm": 0.33352887630462646,
"learning_rate": 9.96844722430871e-05,
"loss": 0.0313,
"step": 8400
},
{
"grad_norm": 0.31583085656166077,
"learning_rate": 9.968261488988235e-05,
"loss": 0.0314,
"step": 8410
},
{
"grad_norm": 0.35499894618988037,
"learning_rate": 9.968075210346155e-05,
"loss": 0.0306,
"step": 8420
},
{
"grad_norm": 0.31201276183128357,
"learning_rate": 9.967888388402839e-05,
"loss": 0.0297,
"step": 8430
},
{
"grad_norm": 0.31827130913734436,
"learning_rate": 9.967701023178717e-05,
"loss": 0.0315,
"step": 8440
},
{
"grad_norm": 0.3984922170639038,
"learning_rate": 9.967513114694282e-05,
"loss": 0.0317,
"step": 8450
},
{
"grad_norm": 0.3843662738800049,
"learning_rate": 9.967324662970079e-05,
"loss": 0.0339,
"step": 8460
},
{
"grad_norm": 0.34678617119789124,
"learning_rate": 9.96713566802672e-05,
"loss": 0.0348,
"step": 8470
},
{
"grad_norm": 0.3988155424594879,
"learning_rate": 9.966946129884873e-05,
"loss": 0.0316,
"step": 8480
},
{
"grad_norm": 0.37476977705955505,
"learning_rate": 9.966756048565265e-05,
"loss": 0.0298,
"step": 8490
},
{
"grad_norm": 0.4248075783252716,
"learning_rate": 9.966565424088681e-05,
"loss": 0.0335,
"step": 8500
},
{
"grad_norm": 0.34596335887908936,
"learning_rate": 9.96637425647597e-05,
"loss": 0.0321,
"step": 8510
},
{
"grad_norm": 0.3652588129043579,
"learning_rate": 9.966182545748038e-05,
"loss": 0.0336,
"step": 8520
},
{
"grad_norm": 0.2632305920124054,
"learning_rate": 9.96599029192585e-05,
"loss": 0.0353,
"step": 8530
},
{
"grad_norm": 0.3150872588157654,
"learning_rate": 9.965797495030428e-05,
"loss": 0.0358,
"step": 8540
},
{
"grad_norm": 0.4308720529079437,
"learning_rate": 9.96560415508286e-05,
"loss": 0.0335,
"step": 8550
},
{
"grad_norm": 0.44409430027008057,
"learning_rate": 9.965410272104286e-05,
"loss": 0.0339,
"step": 8560
},
{
"grad_norm": 0.4050586223602295,
"learning_rate": 9.96521584611591e-05,
"loss": 0.0289,
"step": 8570
},
{
"grad_norm": 0.2942954897880554,
"learning_rate": 9.965020877138994e-05,
"loss": 0.0339,
"step": 8580
},
{
"grad_norm": 0.32356417179107666,
"learning_rate": 9.964825365194861e-05,
"loss": 0.0312,
"step": 8590
},
{
"grad_norm": 0.24951142072677612,
"learning_rate": 9.96462931030489e-05,
"loss": 0.0299,
"step": 8600
},
{
"grad_norm": 0.3212977349758148,
"learning_rate": 9.96443271249052e-05,
"loss": 0.0313,
"step": 8610
},
{
"grad_norm": 0.3393869996070862,
"learning_rate": 9.964235571773255e-05,
"loss": 0.0328,
"step": 8620
},
{
"grad_norm": 0.3633405566215515,
"learning_rate": 9.96403788817465e-05,
"loss": 0.0284,
"step": 8630
},
{
"grad_norm": 0.35588517785072327,
"learning_rate": 9.963839661716325e-05,
"loss": 0.0312,
"step": 8640
},
{
"grad_norm": 0.22534853219985962,
"learning_rate": 9.963640892419958e-05,
"loss": 0.0317,
"step": 8650
},
{
"grad_norm": 0.29241955280303955,
"learning_rate": 9.963441580307286e-05,
"loss": 0.0322,
"step": 8660
},
{
"grad_norm": 0.3198605477809906,
"learning_rate": 9.963241725400104e-05,
"loss": 0.0329,
"step": 8670
},
{
"grad_norm": 0.3798952102661133,
"learning_rate": 9.963041327720271e-05,
"loss": 0.0333,
"step": 8680
},
{
"grad_norm": 0.3572438955307007,
"learning_rate": 9.962840387289697e-05,
"loss": 0.033,
"step": 8690
},
{
"grad_norm": 0.37413084506988525,
"learning_rate": 9.962638904130363e-05,
"loss": 0.0292,
"step": 8700
},
{
"grad_norm": 0.2612970769405365,
"learning_rate": 9.962436878264298e-05,
"loss": 0.0287,
"step": 8710
},
{
"grad_norm": 0.30455777049064636,
"learning_rate": 9.962234309713598e-05,
"loss": 0.0266,
"step": 8720
},
{
"grad_norm": 0.37009328603744507,
"learning_rate": 9.962031198500414e-05,
"loss": 0.0335,
"step": 8730
},
{
"grad_norm": 0.3331073820590973,
"learning_rate": 9.961827544646958e-05,
"loss": 0.0295,
"step": 8740
},
{
"grad_norm": 0.3138063848018646,
"learning_rate": 9.961623348175501e-05,
"loss": 0.0334,
"step": 8750
},
{
"grad_norm": 0.2899344563484192,
"learning_rate": 9.961418609108377e-05,
"loss": 0.0301,
"step": 8760
},
{
"grad_norm": 0.23412205278873444,
"learning_rate": 9.961213327467971e-05,
"loss": 0.0255,
"step": 8770
},
{
"grad_norm": 0.2888118028640747,
"learning_rate": 9.961007503276736e-05,
"loss": 0.0287,
"step": 8780
},
{
"grad_norm": 0.3145732879638672,
"learning_rate": 9.960801136557179e-05,
"loss": 0.0281,
"step": 8790
},
{
"grad_norm": 0.34552791714668274,
"learning_rate": 9.960594227331866e-05,
"loss": 0.0339,
"step": 8800
},
{
"grad_norm": 0.43196219205856323,
"learning_rate": 9.960386775623429e-05,
"loss": 0.0325,
"step": 8810
},
{
"grad_norm": 0.39344289898872375,
"learning_rate": 9.96017878145455e-05,
"loss": 0.0336,
"step": 8820
},
{
"grad_norm": 0.37254130840301514,
"learning_rate": 9.959970244847977e-05,
"loss": 0.0358,
"step": 8830
},
{
"grad_norm": 0.3082648813724518,
"learning_rate": 9.959761165826518e-05,
"loss": 0.0328,
"step": 8840
},
{
"grad_norm": 0.342195063829422,
"learning_rate": 9.959551544413033e-05,
"loss": 0.0367,
"step": 8850
},
{
"grad_norm": 0.29704222083091736,
"learning_rate": 9.959341380630448e-05,
"loss": 0.0315,
"step": 8860
},
{
"grad_norm": 0.30029696226119995,
"learning_rate": 9.959130674501746e-05,
"loss": 0.0333,
"step": 8870
},
{
"grad_norm": 0.4003520607948303,
"learning_rate": 9.958919426049968e-05,
"loss": 0.0307,
"step": 8880
},
{
"grad_norm": 0.3678707778453827,
"learning_rate": 9.958707635298219e-05,
"loss": 0.0335,
"step": 8890
},
{
"grad_norm": 0.4012024402618408,
"learning_rate": 9.958495302269657e-05,
"loss": 0.0281,
"step": 8900
},
{
"grad_norm": 0.25086092948913574,
"learning_rate": 9.958282426987503e-05,
"loss": 0.0302,
"step": 8910
},
{
"grad_norm": 0.3453119695186615,
"learning_rate": 9.95806900947504e-05,
"loss": 0.028,
"step": 8920
},
{
"grad_norm": 0.31570178270339966,
"learning_rate": 9.957855049755604e-05,
"loss": 0.0293,
"step": 8930
},
{
"grad_norm": 0.292519211769104,
"learning_rate": 9.957640547852593e-05,
"loss": 0.0278,
"step": 8940
},
{
"grad_norm": 0.35711362957954407,
"learning_rate": 9.957425503789466e-05,
"loss": 0.0325,
"step": 8950
},
{
"grad_norm": 0.34886813163757324,
"learning_rate": 9.957209917589738e-05,
"loss": 0.035,
"step": 8960
},
{
"grad_norm": 0.3956030607223511,
"learning_rate": 9.956993789276987e-05,
"loss": 0.0332,
"step": 8970
},
{
"grad_norm": 0.32648199796676636,
"learning_rate": 9.956777118874847e-05,
"loss": 0.0334,
"step": 8980
},
{
"grad_norm": 0.36385777592658997,
"learning_rate": 9.956559906407016e-05,
"loss": 0.0314,
"step": 8990
},
{
"grad_norm": 0.30030733346939087,
"learning_rate": 9.956342151897245e-05,
"loss": 0.0369,
"step": 9000
},
{
"grad_norm": 0.2771800458431244,
"learning_rate": 9.956123855369346e-05,
"loss": 0.0342,
"step": 9010
},
{
"grad_norm": 0.2924581468105316,
"learning_rate": 9.955905016847196e-05,
"loss": 0.0265,
"step": 9020
},
{
"grad_norm": 0.2809629440307617,
"learning_rate": 9.955685636354723e-05,
"loss": 0.0337,
"step": 9030
},
{
"grad_norm": 0.32476192712783813,
"learning_rate": 9.95546571391592e-05,
"loss": 0.0329,
"step": 9040
},
{
"grad_norm": 0.4399145543575287,
"learning_rate": 9.955245249554837e-05,
"loss": 0.0286,
"step": 9050
},
{
"grad_norm": 0.277260422706604,
"learning_rate": 9.955024243295582e-05,
"loss": 0.0293,
"step": 9060
},
{
"grad_norm": 0.2997894883155823,
"learning_rate": 9.954802695162328e-05,
"loss": 0.0302,
"step": 9070
},
{
"grad_norm": 0.2895433306694031,
"learning_rate": 9.954580605179302e-05,
"loss": 0.0328,
"step": 9080
},
{
"grad_norm": 0.39543765783309937,
"learning_rate": 9.954357973370788e-05,
"loss": 0.0312,
"step": 9090
},
{
"grad_norm": 0.3004811704158783,
"learning_rate": 9.954134799761135e-05,
"loss": 0.0289,
"step": 9100
},
{
"grad_norm": 0.27135422825813293,
"learning_rate": 9.953911084374748e-05,
"loss": 0.031,
"step": 9110
},
{
"grad_norm": 0.280902236700058,
"learning_rate": 9.953686827236093e-05,
"loss": 0.0304,
"step": 9120
},
{
"grad_norm": 0.3989546597003937,
"learning_rate": 9.953462028369695e-05,
"loss": 0.029,
"step": 9130
},
{
"grad_norm": 0.30520907044410706,
"learning_rate": 9.953236687800136e-05,
"loss": 0.0299,
"step": 9140
},
{
"grad_norm": 0.2653387784957886,
"learning_rate": 9.95301080555206e-05,
"loss": 0.0273,
"step": 9150
},
{
"grad_norm": 0.2956410050392151,
"learning_rate": 9.952784381650171e-05,
"loss": 0.0287,
"step": 9160
},
{
"grad_norm": 0.3985331058502197,
"learning_rate": 9.952557416119226e-05,
"loss": 0.0313,
"step": 9170
},
{
"grad_norm": 0.33697912096977234,
"learning_rate": 9.95232990898405e-05,
"loss": 0.0273,
"step": 9180
},
{
"grad_norm": 0.3343970775604248,
"learning_rate": 9.95210186026952e-05,
"loss": 0.0291,
"step": 9190
},
{
"grad_norm": 0.30179843306541443,
"learning_rate": 9.951873270000576e-05,
"loss": 0.0308,
"step": 9200
},
{
"grad_norm": 0.33838656544685364,
"learning_rate": 9.951644138202216e-05,
"loss": 0.0314,
"step": 9210
},
{
"grad_norm": 0.44132405519485474,
"learning_rate": 9.951414464899498e-05,
"loss": 0.0324,
"step": 9220
},
{
"grad_norm": 0.3706565201282501,
"learning_rate": 9.951184250117538e-05,
"loss": 0.0286,
"step": 9230
},
{
"grad_norm": 0.321702778339386,
"learning_rate": 9.950953493881513e-05,
"loss": 0.0318,
"step": 9240
},
{
"grad_norm": 0.3702629506587982,
"learning_rate": 9.950722196216658e-05,
"loss": 0.0277,
"step": 9250
},
{
"grad_norm": 0.2816333472728729,
"learning_rate": 9.950490357148265e-05,
"loss": 0.0293,
"step": 9260
},
{
"grad_norm": 0.29560866951942444,
"learning_rate": 9.950257976701692e-05,
"loss": 0.0302,
"step": 9270
},
{
"grad_norm": 0.3872072696685791,
"learning_rate": 9.950025054902348e-05,
"loss": 0.0312,
"step": 9280
},
{
"grad_norm": 0.383853942155838,
"learning_rate": 9.949791591775706e-05,
"loss": 0.0296,
"step": 9290
},
{
"grad_norm": 0.2638419568538666,
"learning_rate": 9.949557587347298e-05,
"loss": 0.0312,
"step": 9300
},
{
"grad_norm": 0.2792360186576843,
"learning_rate": 9.949323041642713e-05,
"loss": 0.0266,
"step": 9310
},
{
"grad_norm": 0.34312698245048523,
"learning_rate": 9.949087954687602e-05,
"loss": 0.0274,
"step": 9320
},
{
"grad_norm": 0.2778584957122803,
"learning_rate": 9.948852326507672e-05,
"loss": 0.0274,
"step": 9330
},
{
"grad_norm": 0.35982704162597656,
"learning_rate": 9.948616157128694e-05,
"loss": 0.027,
"step": 9340
},
{
"grad_norm": 0.18458214402198792,
"learning_rate": 9.948379446576493e-05,
"loss": 0.0304,
"step": 9350
},
{
"grad_norm": 0.29817137122154236,
"learning_rate": 9.948142194876952e-05,
"loss": 0.0273,
"step": 9360
},
{
"grad_norm": 0.3318087160587311,
"learning_rate": 9.947904402056024e-05,
"loss": 0.0274,
"step": 9370
},
{
"grad_norm": 0.31133589148521423,
"learning_rate": 9.947666068139708e-05,
"loss": 0.0289,
"step": 9380
},
{
"grad_norm": 0.2739274799823761,
"learning_rate": 9.947427193154071e-05,
"loss": 0.0254,
"step": 9390
},
{
"grad_norm": 0.31195876002311707,
"learning_rate": 9.947187777125233e-05,
"loss": 0.029,
"step": 9400
},
{
"grad_norm": 0.3110937774181366,
"learning_rate": 9.946947820079377e-05,
"loss": 0.025,
"step": 9410
},
{
"grad_norm": 0.31233981251716614,
"learning_rate": 9.946707322042747e-05,
"loss": 0.0283,
"step": 9420
},
{
"grad_norm": 0.2944711148738861,
"learning_rate": 9.94646628304164e-05,
"loss": 0.0298,
"step": 9430
},
{
"grad_norm": 0.28662657737731934,
"learning_rate": 9.946224703102418e-05,
"loss": 0.0266,
"step": 9440
},
{
"grad_norm": 0.26988640427589417,
"learning_rate": 9.945982582251498e-05,
"loss": 0.0299,
"step": 9450
},
{
"grad_norm": 0.34679415822029114,
"learning_rate": 9.94573992051536e-05,
"loss": 0.0259,
"step": 9460
},
{
"grad_norm": 0.27944183349609375,
"learning_rate": 9.94549671792054e-05,
"loss": 0.0262,
"step": 9470
},
{
"grad_norm": 0.2616512179374695,
"learning_rate": 9.945252974493635e-05,
"loss": 0.0285,
"step": 9480
},
{
"grad_norm": 0.3048749566078186,
"learning_rate": 9.9450086902613e-05,
"loss": 0.028,
"step": 9490
},
{
"grad_norm": 0.27681034803390503,
"learning_rate": 9.944763865250248e-05,
"loss": 0.0289,
"step": 9500
},
{
"grad_norm": 0.34117570519447327,
"learning_rate": 9.944518499487254e-05,
"loss": 0.0303,
"step": 9510
},
{
"grad_norm": 0.30211806297302246,
"learning_rate": 9.944272592999151e-05,
"loss": 0.0261,
"step": 9520
},
{
"grad_norm": 0.2559513747692108,
"learning_rate": 9.94402614581283e-05,
"loss": 0.0234,
"step": 9530
},
{
"grad_norm": 0.2847338318824768,
"learning_rate": 9.943779157955244e-05,
"loss": 0.0251,
"step": 9540
},
{
"grad_norm": 0.27755439281463623,
"learning_rate": 9.943531629453403e-05,
"loss": 0.0325,
"step": 9550
},
{
"grad_norm": 0.2980590760707855,
"learning_rate": 9.943283560334375e-05,
"loss": 0.0292,
"step": 9560
},
{
"grad_norm": 0.3490363359451294,
"learning_rate": 9.943034950625288e-05,
"loss": 0.0281,
"step": 9570
},
{
"grad_norm": 0.3410196304321289,
"learning_rate": 9.942785800353332e-05,
"loss": 0.0326,
"step": 9580
},
{
"grad_norm": 0.28175175189971924,
"learning_rate": 9.942536109545751e-05,
"loss": 0.0313,
"step": 9590
},
{
"grad_norm": 0.310227632522583,
"learning_rate": 9.942285878229853e-05,
"loss": 0.0259,
"step": 9600
},
{
"grad_norm": 0.2682930827140808,
"learning_rate": 9.942035106433001e-05,
"loss": 0.0301,
"step": 9610
},
{
"grad_norm": 0.3313826620578766,
"learning_rate": 9.94178379418262e-05,
"loss": 0.0246,
"step": 9620
},
{
"grad_norm": 0.36503440141677856,
"learning_rate": 9.941531941506194e-05,
"loss": 0.0262,
"step": 9630
},
{
"grad_norm": 0.29071158170700073,
"learning_rate": 9.941279548431263e-05,
"loss": 0.0313,
"step": 9640
},
{
"grad_norm": 0.35149767994880676,
"learning_rate": 9.941026614985431e-05,
"loss": 0.0305,
"step": 9650
},
{
"grad_norm": 0.34794124960899353,
"learning_rate": 9.940773141196357e-05,
"loss": 0.0303,
"step": 9660
},
{
"grad_norm": 0.4192151427268982,
"learning_rate": 9.94051912709176e-05,
"loss": 0.0267,
"step": 9670
},
{
"grad_norm": 0.24447749555110931,
"learning_rate": 9.940264572699421e-05,
"loss": 0.0262,
"step": 9680
},
{
"grad_norm": 0.25707894563674927,
"learning_rate": 9.940009478047174e-05,
"loss": 0.0308,
"step": 9690
},
{
"grad_norm": 0.38117682933807373,
"learning_rate": 9.939753843162918e-05,
"loss": 0.0291,
"step": 9700
},
{
"grad_norm": 0.3183417022228241,
"learning_rate": 9.939497668074609e-05,
"loss": 0.0278,
"step": 9710
},
{
"grad_norm": 0.28810641169548035,
"learning_rate": 9.93924095281026e-05,
"loss": 0.0268,
"step": 9720
},
{
"grad_norm": 0.2791811525821686,
"learning_rate": 9.938983697397948e-05,
"loss": 0.0253,
"step": 9730
},
{
"grad_norm": 0.2798166573047638,
"learning_rate": 9.938725901865805e-05,
"loss": 0.0278,
"step": 9740
},
{
"grad_norm": 0.4268825054168701,
"learning_rate": 9.93846756624202e-05,
"loss": 0.0307,
"step": 9750
},
{
"grad_norm": 0.27204859256744385,
"learning_rate": 9.938208690554849e-05,
"loss": 0.0278,
"step": 9760
},
{
"grad_norm": 0.331249475479126,
"learning_rate": 9.9379492748326e-05,
"loss": 0.0262,
"step": 9770
},
{
"grad_norm": 0.2749841809272766,
"learning_rate": 9.937689319103641e-05,
"loss": 0.0246,
"step": 9780
},
{
"grad_norm": 0.2845693826675415,
"learning_rate": 9.937428823396404e-05,
"loss": 0.0256,
"step": 9790
},
{
"grad_norm": 0.39069777727127075,
"learning_rate": 9.937167787739372e-05,
"loss": 0.0265,
"step": 9800
},
{
"grad_norm": 0.3815779685974121,
"learning_rate": 9.936906212161095e-05,
"loss": 0.0361,
"step": 9810
},
{
"grad_norm": 0.3536510765552521,
"learning_rate": 9.936644096690176e-05,
"loss": 0.0292,
"step": 9820
},
{
"grad_norm": 0.24151362478733063,
"learning_rate": 9.936381441355282e-05,
"loss": 0.0307,
"step": 9830
},
{
"grad_norm": 0.2514011859893799,
"learning_rate": 9.936118246185136e-05,
"loss": 0.0286,
"step": 9840
},
{
"grad_norm": 0.2729114592075348,
"learning_rate": 9.935854511208518e-05,
"loss": 0.0331,
"step": 9850
},
{
"grad_norm": 0.24144065380096436,
"learning_rate": 9.935590236454272e-05,
"loss": 0.0271,
"step": 9860
},
{
"grad_norm": 0.24960418045520782,
"learning_rate": 9.935325421951298e-05,
"loss": 0.0226,
"step": 9870
},
{
"grad_norm": 0.33603090047836304,
"learning_rate": 9.935060067728557e-05,
"loss": 0.0251,
"step": 9880
},
{
"grad_norm": 0.2939727008342743,
"learning_rate": 9.934794173815067e-05,
"loss": 0.0264,
"step": 9890
},
{
"grad_norm": 0.3095412254333496,
"learning_rate": 9.934527740239906e-05,
"loss": 0.0253,
"step": 9900
},
{
"grad_norm": 0.31406790018081665,
"learning_rate": 9.934260767032209e-05,
"loss": 0.0276,
"step": 9910
},
{
"grad_norm": 0.43631061911582947,
"learning_rate": 9.933993254221172e-05,
"loss": 0.0264,
"step": 9920
},
{
"grad_norm": 0.3380442261695862,
"learning_rate": 9.933725201836053e-05,
"loss": 0.0258,
"step": 9930
},
{
"grad_norm": 0.32592689990997314,
"learning_rate": 9.933456609906162e-05,
"loss": 0.0232,
"step": 9940
},
{
"grad_norm": 0.36649060249328613,
"learning_rate": 9.933187478460875e-05,
"loss": 0.0318,
"step": 9950
},
{
"grad_norm": 0.34584930539131165,
"learning_rate": 9.93291780752962e-05,
"loss": 0.0272,
"step": 9960
},
{
"grad_norm": 0.3089320957660675,
"learning_rate": 9.932647597141893e-05,
"loss": 0.0261,
"step": 9970
},
{
"grad_norm": 0.262470006942749,
"learning_rate": 9.932376847327239e-05,
"loss": 0.0222,
"step": 9980
},
{
"grad_norm": 0.2766602337360382,
"learning_rate": 9.932105558115268e-05,
"loss": 0.0261,
"step": 9990
},
{
"grad_norm": 0.2999885678291321,
"learning_rate": 9.931833729535651e-05,
"loss": 0.027,
"step": 10000
},
{
"grad_norm": 0.24559569358825684,
"learning_rate": 9.931561361618111e-05,
"loss": 0.0225,
"step": 10010
},
{
"grad_norm": 0.32899388670921326,
"learning_rate": 9.931288454392435e-05,
"loss": 0.0259,
"step": 10020
},
{
"grad_norm": 0.28741541504859924,
"learning_rate": 9.931015007888467e-05,
"loss": 0.0287,
"step": 10030
},
{
"grad_norm": 0.3540486991405487,
"learning_rate": 9.930741022136112e-05,
"loss": 0.0253,
"step": 10040
},
{
"grad_norm": 0.29190653562545776,
"learning_rate": 9.930466497165333e-05,
"loss": 0.0243,
"step": 10050
},
{
"grad_norm": 0.3176731467247009,
"learning_rate": 9.93019143300615e-05,
"loss": 0.0232,
"step": 10060
},
{
"grad_norm": 0.3307768702507019,
"learning_rate": 9.929915829688644e-05,
"loss": 0.0255,
"step": 10070
},
{
"grad_norm": 0.2830311954021454,
"learning_rate": 9.929639687242955e-05,
"loss": 0.0346,
"step": 10080
},
{
"grad_norm": 0.27388209104537964,
"learning_rate": 9.929363005699281e-05,
"loss": 0.0266,
"step": 10090
},
{
"grad_norm": 0.2982178032398224,
"learning_rate": 9.92908578508788e-05,
"loss": 0.0247,
"step": 10100
},
{
"grad_norm": 0.2913467288017273,
"learning_rate": 9.928808025439069e-05,
"loss": 0.0226,
"step": 10110
},
{
"grad_norm": 0.22734889388084412,
"learning_rate": 9.928529726783223e-05,
"loss": 0.025,
"step": 10120
},
{
"grad_norm": 0.2596288323402405,
"learning_rate": 9.928250889150774e-05,
"loss": 0.0278,
"step": 10130
},
{
"grad_norm": 0.2922511100769043,
"learning_rate": 9.92797151257222e-05,
"loss": 0.0251,
"step": 10140
},
{
"grad_norm": 0.2565639615058899,
"learning_rate": 9.927691597078108e-05,
"loss": 0.0235,
"step": 10150
},
{
"grad_norm": 0.29596906900405884,
"learning_rate": 9.927411142699053e-05,
"loss": 0.0271,
"step": 10160
},
{
"grad_norm": 0.3740658760070801,
"learning_rate": 9.927130149465725e-05,
"loss": 0.0293,
"step": 10170
},
{
"grad_norm": 0.2871254086494446,
"learning_rate": 9.92684861740885e-05,
"loss": 0.0285,
"step": 10180
},
{
"grad_norm": 0.31287142634391785,
"learning_rate": 9.926566546559217e-05,
"loss": 0.0279,
"step": 10190
},
{
"grad_norm": 0.3119584619998932,
"learning_rate": 9.926283936947673e-05,
"loss": 0.0274,
"step": 10200
},
{
"grad_norm": 0.40003690123558044,
"learning_rate": 9.926000788605126e-05,
"loss": 0.0322,
"step": 10210
},
{
"grad_norm": 0.29922744631767273,
"learning_rate": 9.92571710156254e-05,
"loss": 0.0326,
"step": 10220
},
{
"grad_norm": 0.27242857217788696,
"learning_rate": 9.925432875850936e-05,
"loss": 0.0271,
"step": 10230
},
{
"grad_norm": 0.26456117630004883,
"learning_rate": 9.925148111501396e-05,
"loss": 0.0242,
"step": 10240
},
{
"grad_norm": 0.32744866609573364,
"learning_rate": 9.924862808545066e-05,
"loss": 0.0249,
"step": 10250
},
{
"grad_norm": 0.26610448956489563,
"learning_rate": 9.924576967013141e-05,
"loss": 0.0281,
"step": 10260
},
{
"grad_norm": 0.22870276868343353,
"learning_rate": 9.924290586936887e-05,
"loss": 0.0286,
"step": 10270
},
{
"grad_norm": 0.3537036180496216,
"learning_rate": 9.924003668347614e-05,
"loss": 0.0289,
"step": 10280
},
{
"grad_norm": 0.2901184856891632,
"learning_rate": 9.923716211276704e-05,
"loss": 0.0242,
"step": 10290
},
{
"grad_norm": 0.26121214032173157,
"learning_rate": 9.923428215755594e-05,
"loss": 0.0252,
"step": 10300
},
{
"grad_norm": 0.3089354634284973,
"learning_rate": 9.923139681815775e-05,
"loss": 0.0249,
"step": 10310
},
{
"grad_norm": 0.37658077478408813,
"learning_rate": 9.922850609488801e-05,
"loss": 0.0247,
"step": 10320
},
{
"grad_norm": 0.31644684076309204,
"learning_rate": 9.922560998806287e-05,
"loss": 0.026,
"step": 10330
},
{
"grad_norm": 0.26241806149482727,
"learning_rate": 9.922270849799905e-05,
"loss": 0.0233,
"step": 10340
},
{
"grad_norm": 0.2751232087612152,
"learning_rate": 9.92198016250138e-05,
"loss": 0.0298,
"step": 10350
},
{
"grad_norm": 0.38322126865386963,
"learning_rate": 9.921688936942506e-05,
"loss": 0.029,
"step": 10360
},
{
"grad_norm": 0.37642014026641846,
"learning_rate": 9.921397173155129e-05,
"loss": 0.0248,
"step": 10370
},
{
"grad_norm": 0.27151739597320557,
"learning_rate": 9.921104871171157e-05,
"loss": 0.0239,
"step": 10380
},
{
"grad_norm": 0.2456636130809784,
"learning_rate": 9.920812031022554e-05,
"loss": 0.0252,
"step": 10390
},
{
"grad_norm": 0.2831108570098877,
"learning_rate": 9.920518652741348e-05,
"loss": 0.024,
"step": 10400
},
{
"grad_norm": 0.3394615650177002,
"learning_rate": 9.920224736359618e-05,
"loss": 0.0237,
"step": 10410
},
{
"grad_norm": 0.2199944406747818,
"learning_rate": 9.91993028190951e-05,
"loss": 0.0221,
"step": 10420
},
{
"grad_norm": 0.2913322150707245,
"learning_rate": 9.919635289423222e-05,
"loss": 0.0263,
"step": 10430
},
{
"grad_norm": 0.2598559856414795,
"learning_rate": 9.919339758933015e-05,
"loss": 0.0258,
"step": 10440
},
{
"grad_norm": 0.26241859793663025,
"learning_rate": 9.919043690471209e-05,
"loss": 0.0234,
"step": 10450
},
{
"grad_norm": 0.20276913046836853,
"learning_rate": 9.91874708407018e-05,
"loss": 0.0238,
"step": 10460
},
{
"grad_norm": 0.3424587845802307,
"learning_rate": 9.918449939762367e-05,
"loss": 0.0263,
"step": 10470
},
{
"grad_norm": 0.3462485373020172,
"learning_rate": 9.91815225758026e-05,
"loss": 0.0266,
"step": 10480
},
{
"grad_norm": 0.2496924102306366,
"learning_rate": 9.917854037556419e-05,
"loss": 0.0247,
"step": 10490
},
{
"grad_norm": 0.32007890939712524,
"learning_rate": 9.917555279723454e-05,
"loss": 0.025,
"step": 10500
},
{
"grad_norm": 0.32690268754959106,
"learning_rate": 9.917255984114036e-05,
"loss": 0.0279,
"step": 10510
},
{
"grad_norm": 0.3497851490974426,
"learning_rate": 9.916956150760896e-05,
"loss": 0.0286,
"step": 10520
},
{
"grad_norm": 0.36993327736854553,
"learning_rate": 9.916655779696826e-05,
"loss": 0.0263,
"step": 10530
},
{
"grad_norm": 0.2767674922943115,
"learning_rate": 9.916354870954671e-05,
"loss": 0.026,
"step": 10540
},
{
"grad_norm": 0.30353987216949463,
"learning_rate": 9.91605342456734e-05,
"loss": 0.0291,
"step": 10550
},
{
"grad_norm": 0.2900550365447998,
"learning_rate": 9.915751440567795e-05,
"loss": 0.0236,
"step": 10560
},
{
"grad_norm": 0.28913983702659607,
"learning_rate": 9.915448918989066e-05,
"loss": 0.0262,
"step": 10570
},
{
"grad_norm": 0.25887352228164673,
"learning_rate": 9.915145859864232e-05,
"loss": 0.0227,
"step": 10580
},
{
"grad_norm": 0.3585141599178314,
"learning_rate": 9.914842263226437e-05,
"loss": 0.0285,
"step": 10590
},
{
"grad_norm": 0.2905959188938141,
"learning_rate": 9.914538129108882e-05,
"loss": 0.0272,
"step": 10600
},
{
"grad_norm": 0.3087858259677887,
"learning_rate": 9.914233457544825e-05,
"loss": 0.029,
"step": 10610
},
{
"grad_norm": 0.3118123710155487,
"learning_rate": 9.913928248567586e-05,
"loss": 0.032,
"step": 10620
},
{
"grad_norm": 0.28956368565559387,
"learning_rate": 9.913622502210542e-05,
"loss": 0.0248,
"step": 10630
},
{
"grad_norm": 0.2686956524848938,
"learning_rate": 9.913316218507128e-05,
"loss": 0.028,
"step": 10640
},
{
"grad_norm": 0.32353708148002625,
"learning_rate": 9.91300939749084e-05,
"loss": 0.0247,
"step": 10650
},
{
"grad_norm": 0.3663651645183563,
"learning_rate": 9.91270203919523e-05,
"loss": 0.0255,
"step": 10660
},
{
"grad_norm": 0.2726764678955078,
"learning_rate": 9.912394143653912e-05,
"loss": 0.0228,
"step": 10670
},
{
"grad_norm": 0.26007094979286194,
"learning_rate": 9.912085710900555e-05,
"loss": 0.0204,
"step": 10680
},
{
"grad_norm": 0.32784974575042725,
"learning_rate": 9.911776740968892e-05,
"loss": 0.022,
"step": 10690
},
{
"grad_norm": 0.2323165386915207,
"learning_rate": 9.911467233892709e-05,
"loss": 0.0245,
"step": 10700
},
{
"grad_norm": 0.32837867736816406,
"learning_rate": 9.911157189705853e-05,
"loss": 0.0219,
"step": 10710
},
{
"grad_norm": 0.31170499324798584,
"learning_rate": 9.910846608442229e-05,
"loss": 0.0269,
"step": 10720
},
{
"grad_norm": 0.315674751996994,
"learning_rate": 9.910535490135805e-05,
"loss": 0.0264,
"step": 10730
},
{
"grad_norm": 0.3200323283672333,
"learning_rate": 9.910223834820603e-05,
"loss": 0.0224,
"step": 10740
},
{
"grad_norm": 0.32182928919792175,
"learning_rate": 9.909911642530703e-05,
"loss": 0.0228,
"step": 10750
},
{
"grad_norm": 0.19040712714195251,
"learning_rate": 9.909598913300249e-05,
"loss": 0.024,
"step": 10760
},
{
"grad_norm": 0.25349172949790955,
"learning_rate": 9.909285647163438e-05,
"loss": 0.0248,
"step": 10770
},
{
"grad_norm": 0.2588328719139099,
"learning_rate": 9.908971844154531e-05,
"loss": 0.0226,
"step": 10780
},
{
"grad_norm": 0.38836732506752014,
"learning_rate": 9.908657504307843e-05,
"loss": 0.0284,
"step": 10790
},
{
"grad_norm": 0.29620569944381714,
"learning_rate": 9.908342627657751e-05,
"loss": 0.025,
"step": 10800
},
{
"grad_norm": 0.2874799072742462,
"learning_rate": 9.908027214238689e-05,
"loss": 0.0207,
"step": 10810
},
{
"grad_norm": 0.3038623332977295,
"learning_rate": 9.90771126408515e-05,
"loss": 0.0271,
"step": 10820
},
{
"grad_norm": 0.2247525006532669,
"learning_rate": 9.907394777231685e-05,
"loss": 0.0262,
"step": 10830
},
{
"grad_norm": 0.37762928009033203,
"learning_rate": 9.907077753712905e-05,
"loss": 0.0243,
"step": 10840
},
{
"grad_norm": 0.2622915506362915,
"learning_rate": 9.906760193563482e-05,
"loss": 0.0214,
"step": 10850
},
{
"grad_norm": 0.27154263854026794,
"learning_rate": 9.906442096818139e-05,
"loss": 0.0266,
"step": 10860
},
{
"grad_norm": 0.3040686845779419,
"learning_rate": 9.906123463511665e-05,
"loss": 0.025,
"step": 10870
},
{
"grad_norm": 0.3142922520637512,
"learning_rate": 9.905804293678907e-05,
"loss": 0.0245,
"step": 10880
},
{
"grad_norm": 0.34915608167648315,
"learning_rate": 9.905484587354766e-05,
"loss": 0.0277,
"step": 10890
},
{
"grad_norm": 0.259529709815979,
"learning_rate": 9.905164344574205e-05,
"loss": 0.0217,
"step": 10900
},
{
"grad_norm": 0.2816735506057739,
"learning_rate": 9.904843565372248e-05,
"loss": 0.0243,
"step": 10910
},
{
"grad_norm": 0.24057213962078094,
"learning_rate": 9.904522249783972e-05,
"loss": 0.027,
"step": 10920
},
{
"grad_norm": 0.32466766238212585,
"learning_rate": 9.904200397844517e-05,
"loss": 0.0263,
"step": 10930
},
{
"grad_norm": 0.23282542824745178,
"learning_rate": 9.903878009589078e-05,
"loss": 0.0251,
"step": 10940
},
{
"grad_norm": 0.21603845059871674,
"learning_rate": 9.903555085052915e-05,
"loss": 0.0208,
"step": 10950
},
{
"grad_norm": 0.2442980855703354,
"learning_rate": 9.903231624271338e-05,
"loss": 0.0225,
"step": 10960
},
{
"grad_norm": 0.22851979732513428,
"learning_rate": 9.902907627279724e-05,
"loss": 0.0222,
"step": 10970
},
{
"grad_norm": 0.24267442524433136,
"learning_rate": 9.902583094113504e-05,
"loss": 0.0256,
"step": 10980
},
{
"grad_norm": 0.31775084137916565,
"learning_rate": 9.902258024808168e-05,
"loss": 0.0291,
"step": 10990
},
{
"grad_norm": 0.2924094498157501,
"learning_rate": 9.901932419399264e-05,
"loss": 0.0257,
"step": 11000
},
{
"grad_norm": 0.2933579981327057,
"learning_rate": 9.9016062779224e-05,
"loss": 0.0229,
"step": 11010
},
{
"grad_norm": 0.3177044689655304,
"learning_rate": 9.901279600413242e-05,
"loss": 0.0245,
"step": 11020
},
{
"grad_norm": 0.28048941493034363,
"learning_rate": 9.900952386907518e-05,
"loss": 0.0262,
"step": 11030
},
{
"grad_norm": 0.2474532425403595,
"learning_rate": 9.90062463744101e-05,
"loss": 0.0233,
"step": 11040
},
{
"grad_norm": 0.28395146131515503,
"learning_rate": 9.900296352049558e-05,
"loss": 0.024,
"step": 11050
},
{
"grad_norm": 0.2619207203388214,
"learning_rate": 9.899967530769065e-05,
"loss": 0.0235,
"step": 11060
},
{
"grad_norm": 0.30180880427360535,
"learning_rate": 9.899638173635489e-05,
"loss": 0.0245,
"step": 11070
},
{
"grad_norm": 0.32666757702827454,
"learning_rate": 9.899308280684849e-05,
"loss": 0.0254,
"step": 11080
},
{
"grad_norm": 0.29050329327583313,
"learning_rate": 9.898977851953222e-05,
"loss": 0.0248,
"step": 11090
},
{
"grad_norm": 0.26069408655166626,
"learning_rate": 9.898646887476741e-05,
"loss": 0.0253,
"step": 11100
},
{
"grad_norm": 0.2715095281600952,
"learning_rate": 9.898315387291603e-05,
"loss": 0.024,
"step": 11110
},
{
"grad_norm": 0.30262792110443115,
"learning_rate": 9.89798335143406e-05,
"loss": 0.0229,
"step": 11120
},
{
"grad_norm": 0.250119149684906,
"learning_rate": 9.897650779940419e-05,
"loss": 0.0234,
"step": 11130
},
{
"grad_norm": 0.25867602229118347,
"learning_rate": 9.897317672847054e-05,
"loss": 0.0253,
"step": 11140
},
{
"grad_norm": 0.24908185005187988,
"learning_rate": 9.89698403019039e-05,
"loss": 0.0253,
"step": 11150
},
{
"grad_norm": 0.2526894509792328,
"learning_rate": 9.896649852006917e-05,
"loss": 0.0202,
"step": 11160
},
{
"grad_norm": 0.2598399519920349,
"learning_rate": 9.896315138333177e-05,
"loss": 0.0218,
"step": 11170
},
{
"grad_norm": 0.3139992356300354,
"learning_rate": 9.895979889205774e-05,
"loss": 0.0256,
"step": 11180
},
{
"grad_norm": 0.3360525071620941,
"learning_rate": 9.895644104661372e-05,
"loss": 0.024,
"step": 11190
},
{
"grad_norm": 0.29311561584472656,
"learning_rate": 9.895307784736691e-05,
"loss": 0.0242,
"step": 11200
},
{
"grad_norm": 0.28087183833122253,
"learning_rate": 9.894970929468512e-05,
"loss": 0.0274,
"step": 11210
},
{
"grad_norm": 0.3319748640060425,
"learning_rate": 9.89463353889367e-05,
"loss": 0.0225,
"step": 11220
},
{
"grad_norm": 0.24747245013713837,
"learning_rate": 9.894295613049065e-05,
"loss": 0.0242,
"step": 11230
},
{
"grad_norm": 0.24396617710590363,
"learning_rate": 9.893957151971649e-05,
"loss": 0.0213,
"step": 11240
},
{
"grad_norm": 0.3030615746974945,
"learning_rate": 9.893618155698436e-05,
"loss": 0.024,
"step": 11250
},
{
"grad_norm": 0.4588127136230469,
"learning_rate": 9.8932786242665e-05,
"loss": 0.0255,
"step": 11260
},
{
"grad_norm": 0.3490641415119171,
"learning_rate": 9.89293855771297e-05,
"loss": 0.0288,
"step": 11270
},
{
"grad_norm": 0.27173346281051636,
"learning_rate": 9.892597956075036e-05,
"loss": 0.0229,
"step": 11280
},
{
"grad_norm": 0.2763062119483948,
"learning_rate": 9.892256819389947e-05,
"loss": 0.021,
"step": 11290
},
{
"grad_norm": 0.28217893838882446,
"learning_rate": 9.891915147695006e-05,
"loss": 0.023,
"step": 11300
},
{
"grad_norm": 0.3276444375514984,
"learning_rate": 9.891572941027577e-05,
"loss": 0.028,
"step": 11310
},
{
"grad_norm": 0.32746949791908264,
"learning_rate": 9.89123019942509e-05,
"loss": 0.0216,
"step": 11320
},
{
"grad_norm": 0.24576155841350555,
"learning_rate": 9.89088692292502e-05,
"loss": 0.0238,
"step": 11330
},
{
"grad_norm": 0.25501197576522827,
"learning_rate": 9.89054311156491e-05,
"loss": 0.0261,
"step": 11340
},
{
"grad_norm": 0.2533109486103058,
"learning_rate": 9.890198765382357e-05,
"loss": 0.0262,
"step": 11350
},
{
"grad_norm": 0.2431899756193161,
"learning_rate": 9.889853884415021e-05,
"loss": 0.0209,
"step": 11360
},
{
"grad_norm": 0.36103177070617676,
"learning_rate": 9.889508468700614e-05,
"loss": 0.0231,
"step": 11370
},
{
"grad_norm": 0.33830827474594116,
"learning_rate": 9.889162518276915e-05,
"loss": 0.0259,
"step": 11380
},
{
"grad_norm": 0.23270663619041443,
"learning_rate": 9.888816033181752e-05,
"loss": 0.0244,
"step": 11390
},
{
"grad_norm": 0.20536769926548004,
"learning_rate": 9.888469013453018e-05,
"loss": 0.0219,
"step": 11400
},
{
"grad_norm": 0.3499191105365753,
"learning_rate": 9.888121459128663e-05,
"loss": 0.0264,
"step": 11410
},
{
"grad_norm": 0.30719149112701416,
"learning_rate": 9.887773370246693e-05,
"loss": 0.0293,
"step": 11420
},
{
"grad_norm": 0.251043438911438,
"learning_rate": 9.887424746845177e-05,
"loss": 0.0269,
"step": 11430
},
{
"grad_norm": 0.3854045271873474,
"learning_rate": 9.887075588962239e-05,
"loss": 0.0258,
"step": 11440
},
{
"grad_norm": 0.34220167994499207,
"learning_rate": 9.88672589663606e-05,
"loss": 0.0215,
"step": 11450
},
{
"grad_norm": 0.2944101393222809,
"learning_rate": 9.886375669904886e-05,
"loss": 0.0246,
"step": 11460
},
{
"grad_norm": 0.31172212958335876,
"learning_rate": 9.886024908807014e-05,
"loss": 0.0259,
"step": 11470
},
{
"grad_norm": 0.2905510663986206,
"learning_rate": 9.885673613380806e-05,
"loss": 0.0245,
"step": 11480
},
{
"grad_norm": 0.25882405042648315,
"learning_rate": 9.885321783664676e-05,
"loss": 0.0215,
"step": 11490
},
{
"grad_norm": 0.2980867922306061,
"learning_rate": 9.884969419697101e-05,
"loss": 0.0269,
"step": 11500
},
{
"grad_norm": 0.23805956542491913,
"learning_rate": 9.884616521516614e-05,
"loss": 0.0231,
"step": 11510
},
{
"grad_norm": 0.26967158913612366,
"learning_rate": 9.88426308916181e-05,
"loss": 0.0251,
"step": 11520
},
{
"grad_norm": 0.2954730987548828,
"learning_rate": 9.883909122671335e-05,
"loss": 0.0248,
"step": 11530
},
{
"grad_norm": 0.2668631076812744,
"learning_rate": 9.883554622083904e-05,
"loss": 0.0266,
"step": 11540
},
{
"grad_norm": 0.24218979477882385,
"learning_rate": 9.88319958743828e-05,
"loss": 0.0206,
"step": 11550
},
{
"grad_norm": 0.23225300014019012,
"learning_rate": 9.882844018773291e-05,
"loss": 0.026,
"step": 11560
},
{
"grad_norm": 0.37825363874435425,
"learning_rate": 9.882487916127823e-05,
"loss": 0.0251,
"step": 11570
},
{
"grad_norm": 0.25535348057746887,
"learning_rate": 9.882131279540815e-05,
"loss": 0.0218,
"step": 11580
},
{
"grad_norm": 0.29476526379585266,
"learning_rate": 9.881774109051271e-05,
"loss": 0.0217,
"step": 11590
},
{
"grad_norm": 0.24866236746311188,
"learning_rate": 9.881416404698252e-05,
"loss": 0.02,
"step": 11600
},
{
"grad_norm": 0.28169095516204834,
"learning_rate": 9.881058166520873e-05,
"loss": 0.0225,
"step": 11610
},
{
"grad_norm": 0.22448545694351196,
"learning_rate": 9.880699394558311e-05,
"loss": 0.026,
"step": 11620
},
{
"grad_norm": 0.2574785351753235,
"learning_rate": 9.880340088849801e-05,
"loss": 0.0212,
"step": 11630
},
{
"grad_norm": 0.2824767827987671,
"learning_rate": 9.879980249434637e-05,
"loss": 0.0229,
"step": 11640
},
{
"grad_norm": 0.30236056447029114,
"learning_rate": 9.879619876352168e-05,
"loss": 0.0235,
"step": 11650
},
{
"grad_norm": 0.2773892283439636,
"learning_rate": 9.879258969641809e-05,
"loss": 0.0226,
"step": 11660
},
{
"grad_norm": 0.2351124882698059,
"learning_rate": 9.878897529343023e-05,
"loss": 0.02,
"step": 11670
},
{
"grad_norm": 0.2677806317806244,
"learning_rate": 9.878535555495338e-05,
"loss": 0.0209,
"step": 11680
},
{
"grad_norm": 0.307692289352417,
"learning_rate": 9.87817304813834e-05,
"loss": 0.0264,
"step": 11690
},
{
"grad_norm": 0.32135266065597534,
"learning_rate": 9.877810007311671e-05,
"loss": 0.025,
"step": 11700
},
{
"grad_norm": 0.22899632155895233,
"learning_rate": 9.877446433055035e-05,
"loss": 0.0224,
"step": 11710
},
{
"grad_norm": 0.26118001341819763,
"learning_rate": 9.877082325408191e-05,
"loss": 0.0248,
"step": 11720
},
{
"grad_norm": 0.24061886966228485,
"learning_rate": 9.876717684410954e-05,
"loss": 0.0186,
"step": 11730
},
{
"grad_norm": 0.1753281205892563,
"learning_rate": 9.876352510103204e-05,
"loss": 0.0188,
"step": 11740
},
{
"grad_norm": 0.2684527635574341,
"learning_rate": 9.875986802524875e-05,
"loss": 0.0218,
"step": 11750
},
{
"grad_norm": 0.260416716337204,
"learning_rate": 9.87562056171596e-05,
"loss": 0.0236,
"step": 11760
},
{
"grad_norm": 0.33505305647850037,
"learning_rate": 9.875253787716511e-05,
"loss": 0.0225,
"step": 11770
},
{
"grad_norm": 0.27778613567352295,
"learning_rate": 9.874886480566637e-05,
"loss": 0.0264,
"step": 11780
},
{
"grad_norm": 0.28708258271217346,
"learning_rate": 9.874518640306507e-05,
"loss": 0.0206,
"step": 11790
},
{
"grad_norm": 0.28900209069252014,
"learning_rate": 9.874150266976347e-05,
"loss": 0.0277,
"step": 11800
},
{
"grad_norm": 0.29606348276138306,
"learning_rate": 9.873781360616443e-05,
"loss": 0.0209,
"step": 11810
},
{
"grad_norm": 0.29875603318214417,
"learning_rate": 9.873411921267137e-05,
"loss": 0.0256,
"step": 11820
},
{
"grad_norm": 0.2832305133342743,
"learning_rate": 9.873041948968829e-05,
"loss": 0.0248,
"step": 11830
},
{
"grad_norm": 0.2795998752117157,
"learning_rate": 9.872671443761981e-05,
"loss": 0.0199,
"step": 11840
},
{
"grad_norm": 0.2344684898853302,
"learning_rate": 9.872300405687109e-05,
"loss": 0.0247,
"step": 11850
},
{
"grad_norm": 0.25030753016471863,
"learning_rate": 9.871928834784792e-05,
"loss": 0.0247,
"step": 11860
},
{
"grad_norm": 0.24843540787696838,
"learning_rate": 9.871556731095661e-05,
"loss": 0.0262,
"step": 11870
},
{
"grad_norm": 0.2917851507663727,
"learning_rate": 9.871184094660411e-05,
"loss": 0.0229,
"step": 11880
},
{
"grad_norm": 0.30386418104171753,
"learning_rate": 9.870810925519791e-05,
"loss": 0.0224,
"step": 11890
},
{
"grad_norm": 0.29892122745513916,
"learning_rate": 9.870437223714612e-05,
"loss": 0.0303,
"step": 11900
},
{
"grad_norm": 0.2550772428512573,
"learning_rate": 9.87006298928574e-05,
"loss": 0.0224,
"step": 11910
},
{
"grad_norm": 0.24139836430549622,
"learning_rate": 9.869688222274103e-05,
"loss": 0.0229,
"step": 11920
},
{
"grad_norm": 0.25056102871894836,
"learning_rate": 9.869312922720681e-05,
"loss": 0.0223,
"step": 11930
},
{
"grad_norm": 0.27529701590538025,
"learning_rate": 9.868937090666521e-05,
"loss": 0.0203,
"step": 11940
},
{
"grad_norm": 0.23277588188648224,
"learning_rate": 9.86856072615272e-05,
"loss": 0.0252,
"step": 11950
},
{
"grad_norm": 0.22750338912010193,
"learning_rate": 9.868183829220438e-05,
"loss": 0.0237,
"step": 11960
},
{
"grad_norm": 0.32950612902641296,
"learning_rate": 9.867806399910893e-05,
"loss": 0.0232,
"step": 11970
},
{
"grad_norm": 0.2424909621477127,
"learning_rate": 9.867428438265356e-05,
"loss": 0.0271,
"step": 11980
},
{
"grad_norm": 0.31055083870887756,
"learning_rate": 9.867049944325165e-05,
"loss": 0.0237,
"step": 11990
},
{
"grad_norm": 0.2964540123939514,
"learning_rate": 9.86667091813171e-05,
"loss": 0.0242,
"step": 12000
},
{
"grad_norm": 0.22154490649700165,
"learning_rate": 9.866291359726438e-05,
"loss": 0.025,
"step": 12010
},
{
"grad_norm": 0.2891708016395569,
"learning_rate": 9.865911269150861e-05,
"loss": 0.0238,
"step": 12020
},
{
"grad_norm": 0.33079537749290466,
"learning_rate": 9.865530646446544e-05,
"loss": 0.0244,
"step": 12030
},
{
"grad_norm": 0.338015079498291,
"learning_rate": 9.86514949165511e-05,
"loss": 0.0257,
"step": 12040
},
{
"grad_norm": 0.3109557628631592,
"learning_rate": 9.864767804818243e-05,
"loss": 0.0217,
"step": 12050
},
{
"grad_norm": 0.25862064957618713,
"learning_rate": 9.86438558597768e-05,
"loss": 0.0228,
"step": 12060
},
{
"grad_norm": 0.2686013877391815,
"learning_rate": 9.864002835175225e-05,
"loss": 0.0225,
"step": 12070
},
{
"grad_norm": 0.29204437136650085,
"learning_rate": 9.863619552452734e-05,
"loss": 0.023,
"step": 12080
},
{
"grad_norm": 0.29866817593574524,
"learning_rate": 9.863235737852119e-05,
"loss": 0.0208,
"step": 12090
},
{
"grad_norm": 0.2732008695602417,
"learning_rate": 9.862851391415356e-05,
"loss": 0.0232,
"step": 12100
},
{
"grad_norm": 0.22158949077129364,
"learning_rate": 9.862466513184477e-05,
"loss": 0.0224,
"step": 12110
},
{
"grad_norm": 0.262530118227005,
"learning_rate": 9.86208110320157e-05,
"loss": 0.0254,
"step": 12120
},
{
"grad_norm": 0.22451408207416534,
"learning_rate": 9.861695161508784e-05,
"loss": 0.0225,
"step": 12130
},
{
"grad_norm": 0.2171703577041626,
"learning_rate": 9.861308688148324e-05,
"loss": 0.0193,
"step": 12140
},
{
"grad_norm": 0.22081957757472992,
"learning_rate": 9.860921683162455e-05,
"loss": 0.0214,
"step": 12150
},
{
"grad_norm": 0.26277512311935425,
"learning_rate": 9.860534146593499e-05,
"loss": 0.0245,
"step": 12160
},
{
"grad_norm": 0.23533903062343597,
"learning_rate": 9.860146078483836e-05,
"loss": 0.024,
"step": 12170
},
{
"grad_norm": 0.29710593819618225,
"learning_rate": 9.859757478875905e-05,
"loss": 0.021,
"step": 12180
},
{
"grad_norm": 0.2295854687690735,
"learning_rate": 9.859368347812204e-05,
"loss": 0.0201,
"step": 12190
},
{
"grad_norm": 0.30774781107902527,
"learning_rate": 9.858978685335285e-05,
"loss": 0.0264,
"step": 12200
},
{
"grad_norm": 0.20226168632507324,
"learning_rate": 9.858588491487763e-05,
"loss": 0.0215,
"step": 12210
},
{
"grad_norm": 0.27741652727127075,
"learning_rate": 9.858197766312308e-05,
"loss": 0.0216,
"step": 12220
},
{
"grad_norm": 0.24613800644874573,
"learning_rate": 9.857806509851649e-05,
"loss": 0.0205,
"step": 12230
},
{
"grad_norm": 0.3640504777431488,
"learning_rate": 9.857414722148574e-05,
"loss": 0.0211,
"step": 12240
},
{
"grad_norm": 0.28440797328948975,
"learning_rate": 9.857022403245928e-05,
"loss": 0.0239,
"step": 12250
},
{
"grad_norm": 0.29080355167388916,
"learning_rate": 9.856629553186615e-05,
"loss": 0.0227,
"step": 12260
},
{
"grad_norm": 0.27145934104919434,
"learning_rate": 9.856236172013595e-05,
"loss": 0.027,
"step": 12270
},
{
"grad_norm": 0.2906491458415985,
"learning_rate": 9.85584225976989e-05,
"loss": 0.0233,
"step": 12280
},
{
"grad_norm": 0.28224772214889526,
"learning_rate": 9.855447816498575e-05,
"loss": 0.0231,
"step": 12290
},
{
"grad_norm": 0.24714432656764984,
"learning_rate": 9.855052842242787e-05,
"loss": 0.0228,
"step": 12300
},
{
"grad_norm": 0.3262035846710205,
"learning_rate": 9.85465733704572e-05,
"loss": 0.0237,
"step": 12310
},
{
"grad_norm": 0.32095709443092346,
"learning_rate": 9.854261300950624e-05,
"loss": 0.0205,
"step": 12320
},
{
"grad_norm": 0.2512904107570648,
"learning_rate": 9.853864734000813e-05,
"loss": 0.0221,
"step": 12330
},
{
"grad_norm": 0.3358675241470337,
"learning_rate": 9.85346763623965e-05,
"loss": 0.0255,
"step": 12340
},
{
"grad_norm": 0.20079484581947327,
"learning_rate": 9.853070007710564e-05,
"loss": 0.0222,
"step": 12350
},
{
"grad_norm": 0.33502423763275146,
"learning_rate": 9.85267184845704e-05,
"loss": 0.0278,
"step": 12360
},
{
"grad_norm": 0.25794944167137146,
"learning_rate": 9.852273158522616e-05,
"loss": 0.0238,
"step": 12370
},
{
"grad_norm": 0.25033092498779297,
"learning_rate": 9.851873937950896e-05,
"loss": 0.0239,
"step": 12380
},
{
"grad_norm": 0.22771897912025452,
"learning_rate": 9.851474186785537e-05,
"loss": 0.022,
"step": 12390
},
{
"grad_norm": 0.2817453145980835,
"learning_rate": 9.851073905070254e-05,
"loss": 0.0249,
"step": 12400
},
{
"grad_norm": 0.2184731364250183,
"learning_rate": 9.850673092848824e-05,
"loss": 0.0216,
"step": 12410
},
{
"grad_norm": 0.3057407736778259,
"learning_rate": 9.850271750165077e-05,
"loss": 0.0209,
"step": 12420
},
{
"grad_norm": 0.23111845552921295,
"learning_rate": 9.849869877062902e-05,
"loss": 0.0185,
"step": 12430
},
{
"grad_norm": 0.3109472990036011,
"learning_rate": 9.849467473586252e-05,
"loss": 0.0206,
"step": 12440
},
{
"grad_norm": 0.2345299869775772,
"learning_rate": 9.849064539779127e-05,
"loss": 0.0296,
"step": 12450
},
{
"grad_norm": 0.2816222012042999,
"learning_rate": 9.848661075685594e-05,
"loss": 0.0216,
"step": 12460
},
{
"grad_norm": 0.27281153202056885,
"learning_rate": 9.848257081349778e-05,
"loss": 0.0256,
"step": 12470
},
{
"grad_norm": 0.24961847066879272,
"learning_rate": 9.847852556815856e-05,
"loss": 0.0202,
"step": 12480
},
{
"grad_norm": 0.24978555738925934,
"learning_rate": 9.847447502128067e-05,
"loss": 0.0194,
"step": 12490
},
{
"grad_norm": 0.2963050305843353,
"learning_rate": 9.847041917330708e-05,
"loss": 0.0207,
"step": 12500
},
{
"grad_norm": 0.19255203008651733,
"learning_rate": 9.846635802468132e-05,
"loss": 0.0203,
"step": 12510
},
{
"grad_norm": 0.2264157235622406,
"learning_rate": 9.84622915758475e-05,
"loss": 0.0219,
"step": 12520
},
{
"grad_norm": 0.2677379548549652,
"learning_rate": 9.845821982725034e-05,
"loss": 0.0236,
"step": 12530
},
{
"grad_norm": 0.2737414240837097,
"learning_rate": 9.845414277933514e-05,
"loss": 0.0224,
"step": 12540
},
{
"grad_norm": 0.2770121097564697,
"learning_rate": 9.845006043254771e-05,
"loss": 0.023,
"step": 12550
},
{
"grad_norm": 0.30070507526397705,
"learning_rate": 9.844597278733451e-05,
"loss": 0.0208,
"step": 12560
},
{
"grad_norm": 0.21809224784374237,
"learning_rate": 9.844187984414259e-05,
"loss": 0.0197,
"step": 12570
},
{
"grad_norm": 0.2893144190311432,
"learning_rate": 9.84377816034195e-05,
"loss": 0.0202,
"step": 12580
},
{
"grad_norm": 0.25457078218460083,
"learning_rate": 9.843367806561345e-05,
"loss": 0.0209,
"step": 12590
},
{
"grad_norm": 0.23254764080047607,
"learning_rate": 9.842956923117317e-05,
"loss": 0.0229,
"step": 12600
},
{
"grad_norm": 0.26205703616142273,
"learning_rate": 9.842545510054802e-05,
"loss": 0.0214,
"step": 12610
},
{
"grad_norm": 0.2194891721010208,
"learning_rate": 9.842133567418792e-05,
"loss": 0.0242,
"step": 12620
},
{
"grad_norm": 0.27267029881477356,
"learning_rate": 9.841721095254333e-05,
"loss": 0.0209,
"step": 12630
},
{
"grad_norm": 0.2827804982662201,
"learning_rate": 9.841308093606537e-05,
"loss": 0.0217,
"step": 12640
},
{
"grad_norm": 0.24511675536632538,
"learning_rate": 9.840894562520565e-05,
"loss": 0.0225,
"step": 12650
},
{
"grad_norm": 0.2633540630340576,
"learning_rate": 9.840480502041642e-05,
"loss": 0.018,
"step": 12660
},
{
"grad_norm": 0.3520581126213074,
"learning_rate": 9.840065912215049e-05,
"loss": 0.0199,
"step": 12670
},
{
"grad_norm": 0.29521462321281433,
"learning_rate": 9.839650793086124e-05,
"loss": 0.0224,
"step": 12680
},
{
"grad_norm": 0.22336840629577637,
"learning_rate": 9.839235144700265e-05,
"loss": 0.0224,
"step": 12690
},
{
"grad_norm": 0.2869412302970886,
"learning_rate": 9.838818967102926e-05,
"loss": 0.0204,
"step": 12700
},
{
"grad_norm": 0.26980113983154297,
"learning_rate": 9.83840226033962e-05,
"loss": 0.0191,
"step": 12710
},
{
"grad_norm": 0.2633763551712036,
"learning_rate": 9.837985024455918e-05,
"loss": 0.0218,
"step": 12720
},
{
"grad_norm": 0.26450225710868835,
"learning_rate": 9.837567259497447e-05,
"loss": 0.0211,
"step": 12730
},
{
"grad_norm": 0.3228972554206848,
"learning_rate": 9.837148965509894e-05,
"loss": 0.0217,
"step": 12740
},
{
"grad_norm": 0.25866827368736267,
"learning_rate": 9.836730142539001e-05,
"loss": 0.0219,
"step": 12750
},
{
"grad_norm": 0.1909390091896057,
"learning_rate": 9.836310790630574e-05,
"loss": 0.0206,
"step": 12760
},
{
"grad_norm": 0.282137393951416,
"learning_rate": 9.83589090983047e-05,
"loss": 0.0219,
"step": 12770
},
{
"grad_norm": 0.29815107583999634,
"learning_rate": 9.835470500184605e-05,
"loss": 0.0251,
"step": 12780
},
{
"grad_norm": 0.21475166082382202,
"learning_rate": 9.835049561738957e-05,
"loss": 0.0201,
"step": 12790
},
{
"grad_norm": 0.16367527842521667,
"learning_rate": 9.834628094539558e-05,
"loss": 0.0207,
"step": 12800
},
{
"grad_norm": 0.2078922986984253,
"learning_rate": 9.834206098632499e-05,
"loss": 0.0181,
"step": 12810
},
{
"grad_norm": 0.32321515679359436,
"learning_rate": 9.833783574063931e-05,
"loss": 0.0224,
"step": 12820
},
{
"grad_norm": 0.254509299993515,
"learning_rate": 9.833360520880058e-05,
"loss": 0.0215,
"step": 12830
},
{
"grad_norm": 0.20796674489974976,
"learning_rate": 9.832936939127144e-05,
"loss": 0.0218,
"step": 12840
},
{
"grad_norm": 0.2586333155632019,
"learning_rate": 9.832512828851515e-05,
"loss": 0.0213,
"step": 12850
},
{
"grad_norm": 0.3397057354450226,
"learning_rate": 9.832088190099546e-05,
"loss": 0.0229,
"step": 12860
},
{
"grad_norm": 0.2928526997566223,
"learning_rate": 9.831663022917679e-05,
"loss": 0.0208,
"step": 12870
},
{
"grad_norm": 0.2452399730682373,
"learning_rate": 9.831237327352407e-05,
"loss": 0.0205,
"step": 12880
},
{
"grad_norm": 0.2470838874578476,
"learning_rate": 9.830811103450286e-05,
"loss": 0.0225,
"step": 12890
},
{
"grad_norm": 0.23086868226528168,
"learning_rate": 9.830384351257924e-05,
"loss": 0.0215,
"step": 12900
},
{
"grad_norm": 0.2385035753250122,
"learning_rate": 9.829957070821993e-05,
"loss": 0.0201,
"step": 12910
},
{
"grad_norm": 0.27688005566596985,
"learning_rate": 9.829529262189218e-05,
"loss": 0.0217,
"step": 12920
},
{
"grad_norm": 0.22894538938999176,
"learning_rate": 9.829100925406385e-05,
"loss": 0.0222,
"step": 12930
},
{
"grad_norm": 0.24850626289844513,
"learning_rate": 9.828672060520333e-05,
"loss": 0.0228,
"step": 12940
},
{
"grad_norm": 0.2373901605606079,
"learning_rate": 9.828242667577966e-05,
"loss": 0.0195,
"step": 12950
},
{
"grad_norm": 0.3337841331958771,
"learning_rate": 9.82781274662624e-05,
"loss": 0.0224,
"step": 12960
},
{
"grad_norm": 0.2683013677597046,
"learning_rate": 9.82738229771217e-05,
"loss": 0.0201,
"step": 12970
},
{
"grad_norm": 0.22946369647979736,
"learning_rate": 9.826951320882829e-05,
"loss": 0.0232,
"step": 12980
},
{
"grad_norm": 0.3642555773258209,
"learning_rate": 9.826519816185351e-05,
"loss": 0.0295,
"step": 12990
},
{
"grad_norm": 0.2674192488193512,
"learning_rate": 9.826087783666921e-05,
"loss": 0.0197,
"step": 13000
},
{
"grad_norm": 0.2643831670284271,
"learning_rate": 9.825655223374787e-05,
"loss": 0.0213,
"step": 13010
},
{
"grad_norm": 0.2896205484867096,
"learning_rate": 9.825222135356253e-05,
"loss": 0.0219,
"step": 13020
},
{
"grad_norm": 0.2912057042121887,
"learning_rate": 9.82478851965868e-05,
"loss": 0.0198,
"step": 13030
},
{
"grad_norm": 0.2574427127838135,
"learning_rate": 9.82435437632949e-05,
"loss": 0.026,
"step": 13040
},
{
"grad_norm": 0.26649150252342224,
"learning_rate": 9.823919705416158e-05,
"loss": 0.0235,
"step": 13050
},
{
"grad_norm": 0.27899083495140076,
"learning_rate": 9.82348450696622e-05,
"loss": 0.0187,
"step": 13060
},
{
"grad_norm": 0.26895758509635925,
"learning_rate": 9.823048781027268e-05,
"loss": 0.0191,
"step": 13070
},
{
"grad_norm": 0.3179563879966736,
"learning_rate": 9.822612527646953e-05,
"loss": 0.0195,
"step": 13080
},
{
"grad_norm": 0.29025205969810486,
"learning_rate": 9.822175746872984e-05,
"loss": 0.0204,
"step": 13090
},
{
"grad_norm": 0.3235120177268982,
"learning_rate": 9.821738438753123e-05,
"loss": 0.0181,
"step": 13100
},
{
"grad_norm": 0.3427630364894867,
"learning_rate": 9.821300603335196e-05,
"loss": 0.023,
"step": 13110
},
{
"grad_norm": 0.2155032753944397,
"learning_rate": 9.820862240667085e-05,
"loss": 0.0194,
"step": 13120
},
{
"grad_norm": 0.1895771026611328,
"learning_rate": 9.820423350796726e-05,
"loss": 0.0222,
"step": 13130
},
{
"grad_norm": 0.21924172341823578,
"learning_rate": 9.819983933772118e-05,
"loss": 0.0222,
"step": 13140
},
{
"grad_norm": 0.28095996379852295,
"learning_rate": 9.819543989641314e-05,
"loss": 0.0241,
"step": 13150
},
{
"grad_norm": 0.30448660254478455,
"learning_rate": 9.819103518452423e-05,
"loss": 0.0216,
"step": 13160
},
{
"grad_norm": 0.22671166062355042,
"learning_rate": 9.818662520253618e-05,
"loss": 0.0239,
"step": 13170
},
{
"grad_norm": 0.26471537351608276,
"learning_rate": 9.818220995093126e-05,
"loss": 0.0208,
"step": 13180
},
{
"grad_norm": 0.2450067400932312,
"learning_rate": 9.817778943019228e-05,
"loss": 0.0224,
"step": 13190
},
{
"grad_norm": 0.2603633403778076,
"learning_rate": 9.81733636408027e-05,
"loss": 0.0212,
"step": 13200
},
{
"grad_norm": 0.21010948717594147,
"learning_rate": 9.816893258324649e-05,
"loss": 0.02,
"step": 13210
},
{
"grad_norm": 0.24279074370861053,
"learning_rate": 9.816449625800823e-05,
"loss": 0.0248,
"step": 13220
},
{
"grad_norm": 0.3085545301437378,
"learning_rate": 9.816005466557308e-05,
"loss": 0.0242,
"step": 13230
},
{
"grad_norm": 0.40831342339515686,
"learning_rate": 9.815560780642674e-05,
"loss": 0.0271,
"step": 13240
},
{
"grad_norm": 0.3233740031719208,
"learning_rate": 9.815115568105555e-05,
"loss": 0.0218,
"step": 13250
},
{
"grad_norm": 0.34060555696487427,
"learning_rate": 9.814669828994638e-05,
"loss": 0.0225,
"step": 13260
},
{
"grad_norm": 0.15952211618423462,
"learning_rate": 9.814223563358665e-05,
"loss": 0.0195,
"step": 13270
},
{
"grad_norm": 0.24137960374355316,
"learning_rate": 9.813776771246443e-05,
"loss": 0.0215,
"step": 13280
},
{
"grad_norm": 0.2525627613067627,
"learning_rate": 9.813329452706829e-05,
"loss": 0.0177,
"step": 13290
},
{
"grad_norm": 0.254814088344574,
"learning_rate": 9.812881607788744e-05,
"loss": 0.0225,
"step": 13300
},
{
"grad_norm": 0.23892925679683685,
"learning_rate": 9.812433236541163e-05,
"loss": 0.0202,
"step": 13310
},
{
"grad_norm": 0.23065580427646637,
"learning_rate": 9.811984339013116e-05,
"loss": 0.0205,
"step": 13320
},
{
"grad_norm": 0.22503262758255005,
"learning_rate": 9.811534915253698e-05,
"loss": 0.0201,
"step": 13330
},
{
"grad_norm": 0.2968944311141968,
"learning_rate": 9.811084965312056e-05,
"loss": 0.0197,
"step": 13340
},
{
"grad_norm": 0.24379391968250275,
"learning_rate": 9.810634489237396e-05,
"loss": 0.02,
"step": 13350
},
{
"grad_norm": 0.2536276876926422,
"learning_rate": 9.81018348707898e-05,
"loss": 0.0211,
"step": 13360
},
{
"grad_norm": 0.29255765676498413,
"learning_rate": 9.809731958886131e-05,
"loss": 0.022,
"step": 13370
},
{
"grad_norm": 0.2708306610584259,
"learning_rate": 9.809279904708224e-05,
"loss": 0.0215,
"step": 13380
},
{
"grad_norm": 0.28281521797180176,
"learning_rate": 9.808827324594699e-05,
"loss": 0.0184,
"step": 13390
},
{
"grad_norm": 0.21512441337108612,
"learning_rate": 9.808374218595046e-05,
"loss": 0.0206,
"step": 13400
},
{
"grad_norm": 0.24113260209560394,
"learning_rate": 9.80792058675882e-05,
"loss": 0.0177,
"step": 13410
},
{
"grad_norm": 0.24429504573345184,
"learning_rate": 9.807466429135627e-05,
"loss": 0.0184,
"step": 13420
},
{
"grad_norm": 0.2660101056098938,
"learning_rate": 9.807011745775132e-05,
"loss": 0.0217,
"step": 13430
},
{
"grad_norm": 0.30305543541908264,
"learning_rate": 9.806556536727061e-05,
"loss": 0.0245,
"step": 13440
},
{
"grad_norm": 0.19905737042427063,
"learning_rate": 9.806100802041193e-05,
"loss": 0.0191,
"step": 13450
},
{
"grad_norm": 0.3235015273094177,
"learning_rate": 9.805644541767368e-05,
"loss": 0.0216,
"step": 13460
},
{
"grad_norm": 0.21118152141571045,
"learning_rate": 9.805187755955478e-05,
"loss": 0.025,
"step": 13470
},
{
"grad_norm": 0.3211771845817566,
"learning_rate": 9.804730444655483e-05,
"loss": 0.0213,
"step": 13480
},
{
"grad_norm": 0.3155767619609833,
"learning_rate": 9.804272607917388e-05,
"loss": 0.0196,
"step": 13490
},
{
"grad_norm": 0.24246951937675476,
"learning_rate": 9.803814245791265e-05,
"loss": 0.0195,
"step": 13500
},
{
"grad_norm": 0.22159545123577118,
"learning_rate": 9.803355358327239e-05,
"loss": 0.0223,
"step": 13510
},
{
"grad_norm": 0.2195427268743515,
"learning_rate": 9.802895945575492e-05,
"loss": 0.0181,
"step": 13520
},
{
"grad_norm": 0.2641836702823639,
"learning_rate": 9.802436007586266e-05,
"loss": 0.0179,
"step": 13530
},
{
"grad_norm": 0.31182828545570374,
"learning_rate": 9.801975544409858e-05,
"loss": 0.0186,
"step": 13540
},
{
"grad_norm": 0.3050755262374878,
"learning_rate": 9.801514556096625e-05,
"loss": 0.0198,
"step": 13550
},
{
"grad_norm": 0.28792455792427063,
"learning_rate": 9.801053042696977e-05,
"loss": 0.0251,
"step": 13560
},
{
"grad_norm": 0.26547062397003174,
"learning_rate": 9.800591004261388e-05,
"loss": 0.0204,
"step": 13570
},
{
"grad_norm": 0.2921355962753296,
"learning_rate": 9.800128440840385e-05,
"loss": 0.0193,
"step": 13580
},
{
"grad_norm": 0.25927549600601196,
"learning_rate": 9.799665352484552e-05,
"loss": 0.0193,
"step": 13590
},
{
"grad_norm": 0.2548466622829437,
"learning_rate": 9.799201739244532e-05,
"loss": 0.0171,
"step": 13600
},
{
"grad_norm": 0.25240445137023926,
"learning_rate": 9.798737601171025e-05,
"loss": 0.0205,
"step": 13610
},
{
"grad_norm": 0.28021761775016785,
"learning_rate": 9.79827293831479e-05,
"loss": 0.0206,
"step": 13620
},
{
"grad_norm": 0.23501914739608765,
"learning_rate": 9.797807750726638e-05,
"loss": 0.0197,
"step": 13630
},
{
"grad_norm": 0.23027177155017853,
"learning_rate": 9.797342038457446e-05,
"loss": 0.02,
"step": 13640
},
{
"grad_norm": 0.2953159213066101,
"learning_rate": 9.796875801558141e-05,
"loss": 0.0209,
"step": 13650
},
{
"grad_norm": 0.28940150141716003,
"learning_rate": 9.79640904007971e-05,
"loss": 0.0185,
"step": 13660
},
{
"grad_norm": 0.2605695128440857,
"learning_rate": 9.795941754073199e-05,
"loss": 0.0204,
"step": 13670
},
{
"grad_norm": 0.2594354748725891,
"learning_rate": 9.795473943589705e-05,
"loss": 0.0198,
"step": 13680
},
{
"grad_norm": 0.3043878376483917,
"learning_rate": 9.795005608680394e-05,
"loss": 0.0205,
"step": 13690
},
{
"grad_norm": 0.2961346507072449,
"learning_rate": 9.794536749396477e-05,
"loss": 0.0207,
"step": 13700
},
{
"grad_norm": 0.25186237692832947,
"learning_rate": 9.79406736578923e-05,
"loss": 0.0224,
"step": 13710
},
{
"grad_norm": 0.2529714107513428,
"learning_rate": 9.793597457909984e-05,
"loss": 0.0231,
"step": 13720
},
{
"grad_norm": 0.2206045389175415,
"learning_rate": 9.793127025810127e-05,
"loss": 0.0197,
"step": 13730
},
{
"grad_norm": 0.24219457805156708,
"learning_rate": 9.792656069541104e-05,
"loss": 0.023,
"step": 13740
},
{
"grad_norm": 0.2709461450576782,
"learning_rate": 9.79218458915442e-05,
"loss": 0.0184,
"step": 13750
},
{
"grad_norm": 0.25277814269065857,
"learning_rate": 9.791712584701634e-05,
"loss": 0.0189,
"step": 13760
},
{
"grad_norm": 0.3072836399078369,
"learning_rate": 9.791240056234364e-05,
"loss": 0.0226,
"step": 13770
},
{
"grad_norm": 0.24425631761550903,
"learning_rate": 9.790767003804283e-05,
"loss": 0.0235,
"step": 13780
},
{
"grad_norm": 0.2431863695383072,
"learning_rate": 9.790293427463126e-05,
"loss": 0.0195,
"step": 13790
},
{
"grad_norm": 0.2647920548915863,
"learning_rate": 9.789819327262684e-05,
"loss": 0.0204,
"step": 13800
},
{
"grad_norm": 0.2761968672275543,
"learning_rate": 9.7893447032548e-05,
"loss": 0.0202,
"step": 13810
},
{
"grad_norm": 0.3204233944416046,
"learning_rate": 9.78886955549138e-05,
"loss": 0.0223,
"step": 13820
},
{
"grad_norm": 0.3227749466896057,
"learning_rate": 9.788393884024387e-05,
"loss": 0.0199,
"step": 13830
},
{
"grad_norm": 0.2341262400150299,
"learning_rate": 9.787917688905836e-05,
"loss": 0.0173,
"step": 13840
},
{
"grad_norm": 0.24751389026641846,
"learning_rate": 9.787440970187807e-05,
"loss": 0.0212,
"step": 13850
},
{
"grad_norm": 0.3391607105731964,
"learning_rate": 9.786963727922429e-05,
"loss": 0.0217,
"step": 13860
},
{
"grad_norm": 0.24142494797706604,
"learning_rate": 9.786485962161897e-05,
"loss": 0.0207,
"step": 13870
},
{
"grad_norm": 0.22415021061897278,
"learning_rate": 9.786007672958455e-05,
"loss": 0.0214,
"step": 13880
},
{
"grad_norm": 0.24598553776741028,
"learning_rate": 9.78552886036441e-05,
"loss": 0.0192,
"step": 13890
},
{
"grad_norm": 0.2527545988559723,
"learning_rate": 9.785049524432124e-05,
"loss": 0.0218,
"step": 13900
},
{
"grad_norm": 0.25652772188186646,
"learning_rate": 9.784569665214016e-05,
"loss": 0.0174,
"step": 13910
},
{
"grad_norm": 0.2599169611930847,
"learning_rate": 9.784089282762563e-05,
"loss": 0.0178,
"step": 13920
},
{
"grad_norm": 0.22017325460910797,
"learning_rate": 9.7836083771303e-05,
"loss": 0.0191,
"step": 13930
},
{
"grad_norm": 0.26483476161956787,
"learning_rate": 9.783126948369817e-05,
"loss": 0.02,
"step": 13940
},
{
"grad_norm": 0.24816817045211792,
"learning_rate": 9.78264499653376e-05,
"loss": 0.0225,
"step": 13950
},
{
"grad_norm": 0.301017701625824,
"learning_rate": 9.782162521674838e-05,
"loss": 0.0202,
"step": 13960
},
{
"grad_norm": 0.21281808614730835,
"learning_rate": 9.781679523845812e-05,
"loss": 0.0224,
"step": 13970
},
{
"grad_norm": 0.27746960520744324,
"learning_rate": 9.781196003099502e-05,
"loss": 0.0227,
"step": 13980
},
{
"grad_norm": 0.23323650658130646,
"learning_rate": 9.780711959488786e-05,
"loss": 0.024,
"step": 13990
},
{
"grad_norm": 0.23771823942661285,
"learning_rate": 9.780227393066599e-05,
"loss": 0.0253,
"step": 14000
},
{
"grad_norm": 0.2411212921142578,
"learning_rate": 9.77974230388593e-05,
"loss": 0.0171,
"step": 14010
},
{
"grad_norm": 0.23389559984207153,
"learning_rate": 9.779256691999829e-05,
"loss": 0.0201,
"step": 14020
},
{
"grad_norm": 0.21322768926620483,
"learning_rate": 9.778770557461403e-05,
"loss": 0.0196,
"step": 14030
},
{
"grad_norm": 0.37174296379089355,
"learning_rate": 9.778283900323812e-05,
"loss": 0.0225,
"step": 14040
},
{
"grad_norm": 0.3192085921764374,
"learning_rate": 9.777796720640277e-05,
"loss": 0.0217,
"step": 14050
},
{
"grad_norm": 0.21999326348304749,
"learning_rate": 9.777309018464078e-05,
"loss": 0.0206,
"step": 14060
},
{
"grad_norm": 0.24725963175296783,
"learning_rate": 9.776820793848547e-05,
"loss": 0.021,
"step": 14070
},
{
"grad_norm": 0.28076156973838806,
"learning_rate": 9.776332046847075e-05,
"loss": 0.0194,
"step": 14080
},
{
"grad_norm": 0.20957839488983154,
"learning_rate": 9.775842777513111e-05,
"loss": 0.0204,
"step": 14090
},
{
"grad_norm": 0.2436676174402237,
"learning_rate": 9.775352985900163e-05,
"loss": 0.0188,
"step": 14100
},
{
"grad_norm": 0.19794858992099762,
"learning_rate": 9.774862672061791e-05,
"loss": 0.0173,
"step": 14110
},
{
"grad_norm": 0.27602648735046387,
"learning_rate": 9.774371836051616e-05,
"loss": 0.0205,
"step": 14120
},
{
"grad_norm": 0.345101922750473,
"learning_rate": 9.773880477923315e-05,
"loss": 0.0188,
"step": 14130
},
{
"grad_norm": 0.23460453748703003,
"learning_rate": 9.773388597730623e-05,
"loss": 0.0171,
"step": 14140
},
{
"grad_norm": 0.29975447058677673,
"learning_rate": 9.77289619552733e-05,
"loss": 0.021,
"step": 14150
},
{
"grad_norm": 0.2128962129354477,
"learning_rate": 9.772403271367285e-05,
"loss": 0.0205,
"step": 14160
},
{
"grad_norm": 0.3548290729522705,
"learning_rate": 9.771909825304396e-05,
"loss": 0.0203,
"step": 14170
},
{
"grad_norm": 0.2556484043598175,
"learning_rate": 9.771415857392619e-05,
"loss": 0.0244,
"step": 14180
},
{
"grad_norm": 0.23943066596984863,
"learning_rate": 9.770921367685978e-05,
"loss": 0.0202,
"step": 14190
},
{
"grad_norm": 0.21408414840698242,
"learning_rate": 9.770426356238551e-05,
"loss": 0.0224,
"step": 14200
},
{
"grad_norm": 0.2104213684797287,
"learning_rate": 9.769930823104469e-05,
"loss": 0.0159,
"step": 14210
},
{
"grad_norm": 0.21027202904224396,
"learning_rate": 9.769434768337926e-05,
"loss": 0.0187,
"step": 14220
},
{
"grad_norm": 0.2199854850769043,
"learning_rate": 9.768938191993164e-05,
"loss": 0.0188,
"step": 14230
},
{
"grad_norm": 0.2762155532836914,
"learning_rate": 9.768441094124494e-05,
"loss": 0.0227,
"step": 14240
},
{
"grad_norm": 0.29159846901893616,
"learning_rate": 9.767943474786275e-05,
"loss": 0.0222,
"step": 14250
},
{
"grad_norm": 0.21874549984931946,
"learning_rate": 9.767445334032923e-05,
"loss": 0.0193,
"step": 14260
},
{
"grad_norm": 0.22311334311962128,
"learning_rate": 9.766946671918919e-05,
"loss": 0.022,
"step": 14270
},
{
"grad_norm": 0.22728633880615234,
"learning_rate": 9.766447488498796e-05,
"loss": 0.0197,
"step": 14280
},
{
"grad_norm": 0.3445588946342468,
"learning_rate": 9.765947783827139e-05,
"loss": 0.0204,
"step": 14290
},
{
"grad_norm": 0.2580435872077942,
"learning_rate": 9.765447557958599e-05,
"loss": 0.0235,
"step": 14300
},
{
"grad_norm": 0.2422706037759781,
"learning_rate": 9.764946810947879e-05,
"loss": 0.0199,
"step": 14310
},
{
"grad_norm": 0.3658464252948761,
"learning_rate": 9.764445542849738e-05,
"loss": 0.0219,
"step": 14320
},
{
"grad_norm": 0.2647552788257599,
"learning_rate": 9.763943753718998e-05,
"loss": 0.0218,
"step": 14330
},
{
"grad_norm": 0.22951330244541168,
"learning_rate": 9.76344144361053e-05,
"loss": 0.0195,
"step": 14340
},
{
"grad_norm": 0.20395617187023163,
"learning_rate": 9.762938612579269e-05,
"loss": 0.0202,
"step": 14350
},
{
"grad_norm": 0.2150609940290451,
"learning_rate": 9.762435260680202e-05,
"loss": 0.018,
"step": 14360
},
{
"grad_norm": 0.219925656914711,
"learning_rate": 9.761931387968373e-05,
"loss": 0.0175,
"step": 14370
},
{
"grad_norm": 0.21585464477539062,
"learning_rate": 9.76142699449889e-05,
"loss": 0.0183,
"step": 14380
},
{
"grad_norm": 0.21576769649982452,
"learning_rate": 9.760922080326908e-05,
"loss": 0.0203,
"step": 14390
},
{
"grad_norm": 0.23750628530979156,
"learning_rate": 9.760416645507644e-05,
"loss": 0.0202,
"step": 14400
},
{
"grad_norm": 0.255420982837677,
"learning_rate": 9.759910690096375e-05,
"loss": 0.0212,
"step": 14410
},
{
"grad_norm": 0.3221551179885864,
"learning_rate": 9.759404214148429e-05,
"loss": 0.0192,
"step": 14420
},
{
"grad_norm": 0.2521008551120758,
"learning_rate": 9.758897217719191e-05,
"loss": 0.0175,
"step": 14430
},
{
"grad_norm": 0.2588962912559509,
"learning_rate": 9.758389700864113e-05,
"loss": 0.0182,
"step": 14440
},
{
"grad_norm": 0.2366463840007782,
"learning_rate": 9.757881663638688e-05,
"loss": 0.0186,
"step": 14450
},
{
"grad_norm": 0.2948257327079773,
"learning_rate": 9.757373106098478e-05,
"loss": 0.02,
"step": 14460
},
{
"grad_norm": 0.30074384808540344,
"learning_rate": 9.756864028299097e-05,
"loss": 0.0215,
"step": 14470
},
{
"grad_norm": 0.28855177760124207,
"learning_rate": 9.75635443029622e-05,
"loss": 0.0196,
"step": 14480
},
{
"grad_norm": 0.1691199094057083,
"learning_rate": 9.755844312145572e-05,
"loss": 0.0191,
"step": 14490
},
{
"grad_norm": 0.27137795090675354,
"learning_rate": 9.755333673902941e-05,
"loss": 0.0181,
"step": 14500
},
{
"grad_norm": 0.24129636585712433,
"learning_rate": 9.75482251562417e-05,
"loss": 0.0214,
"step": 14510
},
{
"grad_norm": 0.2521432340145111,
"learning_rate": 9.754310837365155e-05,
"loss": 0.0181,
"step": 14520
},
{
"grad_norm": 0.28629830479621887,
"learning_rate": 9.753798639181856e-05,
"loss": 0.0185,
"step": 14530
},
{
"grad_norm": 0.25693315267562866,
"learning_rate": 9.753285921130286e-05,
"loss": 0.0224,
"step": 14540
},
{
"grad_norm": 0.23772841691970825,
"learning_rate": 9.752772683266512e-05,
"loss": 0.0178,
"step": 14550
},
{
"grad_norm": 0.20092158019542694,
"learning_rate": 9.752258925646665e-05,
"loss": 0.0188,
"step": 14560
},
{
"grad_norm": 0.25228646397590637,
"learning_rate": 9.751744648326926e-05,
"loss": 0.0195,
"step": 14570
},
{
"grad_norm": 0.2741185426712036,
"learning_rate": 9.751229851363536e-05,
"loss": 0.0203,
"step": 14580
},
{
"grad_norm": 0.3167235255241394,
"learning_rate": 9.750714534812793e-05,
"loss": 0.0201,
"step": 14590
},
{
"grad_norm": 0.2366606742143631,
"learning_rate": 9.750198698731053e-05,
"loss": 0.0186,
"step": 14600
},
{
"grad_norm": 0.29967039823532104,
"learning_rate": 9.749682343174722e-05,
"loss": 0.02,
"step": 14610
},
{
"grad_norm": 0.26064541935920715,
"learning_rate": 9.749165468200272e-05,
"loss": 0.0203,
"step": 14620
},
{
"grad_norm": 0.28458788990974426,
"learning_rate": 9.748648073864229e-05,
"loss": 0.022,
"step": 14630
},
{
"grad_norm": 0.18595005571842194,
"learning_rate": 9.748130160223168e-05,
"loss": 0.0183,
"step": 14640
},
{
"grad_norm": 0.266886830329895,
"learning_rate": 9.747611727333734e-05,
"loss": 0.0166,
"step": 14650
},
{
"grad_norm": 0.1858520209789276,
"learning_rate": 9.74709277525262e-05,
"loss": 0.0236,
"step": 14660
},
{
"grad_norm": 0.282135009765625,
"learning_rate": 9.746573304036576e-05,
"loss": 0.0191,
"step": 14670
},
{
"grad_norm": 0.22414512932300568,
"learning_rate": 9.746053313742412e-05,
"loss": 0.0198,
"step": 14680
},
{
"grad_norm": 0.2562432289123535,
"learning_rate": 9.745532804426994e-05,
"loss": 0.0192,
"step": 14690
},
{
"grad_norm": 0.25288599729537964,
"learning_rate": 9.745011776147242e-05,
"loss": 0.0191,
"step": 14700
},
{
"grad_norm": 0.19824698567390442,
"learning_rate": 9.744490228960138e-05,
"loss": 0.0171,
"step": 14710
},
{
"grad_norm": 0.256298303604126,
"learning_rate": 9.743968162922713e-05,
"loss": 0.0205,
"step": 14720
},
{
"grad_norm": 0.2583651542663574,
"learning_rate": 9.743445578092064e-05,
"loss": 0.02,
"step": 14730
},
{
"grad_norm": 0.3547195792198181,
"learning_rate": 9.742922474525338e-05,
"loss": 0.0244,
"step": 14740
},
{
"grad_norm": 0.22905513644218445,
"learning_rate": 9.742398852279741e-05,
"loss": 0.0196,
"step": 14750
},
{
"grad_norm": 0.24030999839305878,
"learning_rate": 9.741874711412535e-05,
"loss": 0.0184,
"step": 14760
},
{
"grad_norm": 0.19056032598018646,
"learning_rate": 9.741350051981042e-05,
"loss": 0.0209,
"step": 14770
},
{
"grad_norm": 0.22818054258823395,
"learning_rate": 9.740824874042633e-05,
"loss": 0.0197,
"step": 14780
},
{
"grad_norm": 0.2896696627140045,
"learning_rate": 9.740299177654746e-05,
"loss": 0.0205,
"step": 14790
},
{
"grad_norm": 0.23162594437599182,
"learning_rate": 9.739772962874867e-05,
"loss": 0.0192,
"step": 14800
},
{
"grad_norm": 0.23662790656089783,
"learning_rate": 9.739246229760541e-05,
"loss": 0.0195,
"step": 14810
},
{
"grad_norm": 0.30700597167015076,
"learning_rate": 9.738718978369376e-05,
"loss": 0.0207,
"step": 14820
},
{
"grad_norm": 0.17646534740924835,
"learning_rate": 9.738191208759025e-05,
"loss": 0.0201,
"step": 14830
},
{
"grad_norm": 0.2281806915998459,
"learning_rate": 9.73766292098721e-05,
"loss": 0.0204,
"step": 14840
},
{
"grad_norm": 0.25043541193008423,
"learning_rate": 9.737134115111699e-05,
"loss": 0.0196,
"step": 14850
},
{
"grad_norm": 0.278923362493515,
"learning_rate": 9.736604791190323e-05,
"loss": 0.0165,
"step": 14860
},
{
"grad_norm": 0.1961689442396164,
"learning_rate": 9.73607494928097e-05,
"loss": 0.0183,
"step": 14870
},
{
"grad_norm": 0.234564870595932,
"learning_rate": 9.735544589441581e-05,
"loss": 0.0206,
"step": 14880
},
{
"grad_norm": 0.23098857700824738,
"learning_rate": 9.735013711730154e-05,
"loss": 0.0198,
"step": 14890
},
{
"grad_norm": 0.17101159691810608,
"learning_rate": 9.734482316204747e-05,
"loss": 0.0186,
"step": 14900
},
{
"grad_norm": 0.2384091466665268,
"learning_rate": 9.733950402923473e-05,
"loss": 0.0169,
"step": 14910
},
{
"grad_norm": 0.2314639389514923,
"learning_rate": 9.7334179719445e-05,
"loss": 0.021,
"step": 14920
},
{
"grad_norm": 0.3325411081314087,
"learning_rate": 9.732885023326053e-05,
"loss": 0.0193,
"step": 14930
},
{
"grad_norm": 0.2880927324295044,
"learning_rate": 9.732351557126418e-05,
"loss": 0.0229,
"step": 14940
},
{
"grad_norm": 0.29757729172706604,
"learning_rate": 9.731817573403929e-05,
"loss": 0.019,
"step": 14950
},
{
"grad_norm": 0.2836707532405853,
"learning_rate": 9.731283072216985e-05,
"loss": 0.0185,
"step": 14960
},
{
"grad_norm": 0.2036943882703781,
"learning_rate": 9.730748053624039e-05,
"loss": 0.0246,
"step": 14970
},
{
"grad_norm": 0.23931244015693665,
"learning_rate": 9.730212517683598e-05,
"loss": 0.0187,
"step": 14980
},
{
"grad_norm": 0.23276709020137787,
"learning_rate": 9.729676464454228e-05,
"loss": 0.0173,
"step": 14990
},
{
"grad_norm": 0.25916847586631775,
"learning_rate": 9.72913989399455e-05,
"loss": 0.0206,
"step": 15000
},
{
"grad_norm": 0.2327517718076706,
"learning_rate": 9.728602806363242e-05,
"loss": 0.0175,
"step": 15010
},
{
"grad_norm": 0.2676856517791748,
"learning_rate": 9.728065201619043e-05,
"loss": 0.0195,
"step": 15020
},
{
"grad_norm": 0.26770108938217163,
"learning_rate": 9.727527079820742e-05,
"loss": 0.0171,
"step": 15030
},
{
"grad_norm": 0.2789733409881592,
"learning_rate": 9.726988441027186e-05,
"loss": 0.0159,
"step": 15040
},
{
"grad_norm": 0.19679932296276093,
"learning_rate": 9.726449285297281e-05,
"loss": 0.0177,
"step": 15050
},
{
"grad_norm": 0.2140314131975174,
"learning_rate": 9.72590961268999e-05,
"loss": 0.0168,
"step": 15060
},
{
"grad_norm": 0.2961437702178955,
"learning_rate": 9.725369423264328e-05,
"loss": 0.0168,
"step": 15070
},
{
"grad_norm": 0.23276185989379883,
"learning_rate": 9.72482871707937e-05,
"loss": 0.0189,
"step": 15080
},
{
"grad_norm": 0.2881372272968292,
"learning_rate": 9.724287494194247e-05,
"loss": 0.0176,
"step": 15090
},
{
"grad_norm": 0.3392719626426697,
"learning_rate": 9.723745754668147e-05,
"loss": 0.0186,
"step": 15100
},
{
"grad_norm": 0.28607383370399475,
"learning_rate": 9.723203498560313e-05,
"loss": 0.02,
"step": 15110
},
{
"grad_norm": 0.27815496921539307,
"learning_rate": 9.722660725930046e-05,
"loss": 0.0191,
"step": 15120
},
{
"grad_norm": 0.26892364025115967,
"learning_rate": 9.722117436836702e-05,
"loss": 0.0199,
"step": 15130
},
{
"grad_norm": 0.22978749871253967,
"learning_rate": 9.721573631339696e-05,
"loss": 0.0207,
"step": 15140
},
{
"grad_norm": 0.20402418076992035,
"learning_rate": 9.721029309498494e-05,
"loss": 0.0201,
"step": 15150
},
{
"grad_norm": 0.24020260572433472,
"learning_rate": 9.720484471372627e-05,
"loss": 0.0197,
"step": 15160
},
{
"grad_norm": 0.20848724246025085,
"learning_rate": 9.719939117021673e-05,
"loss": 0.0199,
"step": 15170
},
{
"grad_norm": 0.29797449707984924,
"learning_rate": 9.719393246505275e-05,
"loss": 0.0194,
"step": 15180
},
{
"grad_norm": 0.30025288462638855,
"learning_rate": 9.718846859883128e-05,
"loss": 0.0219,
"step": 15190
},
{
"grad_norm": 0.2575812339782715,
"learning_rate": 9.718299957214982e-05,
"loss": 0.0205,
"step": 15200
},
{
"grad_norm": 0.3019827902317047,
"learning_rate": 9.717752538560646e-05,
"loss": 0.0203,
"step": 15210
},
{
"grad_norm": 0.2757616639137268,
"learning_rate": 9.717204603979986e-05,
"loss": 0.0191,
"step": 15220
},
{
"grad_norm": 0.3016200363636017,
"learning_rate": 9.716656153532922e-05,
"loss": 0.0171,
"step": 15230
},
{
"grad_norm": 0.23993328213691711,
"learning_rate": 9.716107187279434e-05,
"loss": 0.0198,
"step": 15240
},
{
"grad_norm": 0.22839435935020447,
"learning_rate": 9.715557705279555e-05,
"loss": 0.0173,
"step": 15250
},
{
"grad_norm": 0.22564002871513367,
"learning_rate": 9.715007707593372e-05,
"loss": 0.0202,
"step": 15260
},
{
"grad_norm": 0.19124171137809753,
"learning_rate": 9.714457194281036e-05,
"loss": 0.0181,
"step": 15270
},
{
"grad_norm": 0.32519978284835815,
"learning_rate": 9.713906165402751e-05,
"loss": 0.0202,
"step": 15280
},
{
"grad_norm": 0.21475209295749664,
"learning_rate": 9.713354621018774e-05,
"loss": 0.0188,
"step": 15290
},
{
"grad_norm": 0.19821572303771973,
"learning_rate": 9.712802561189422e-05,
"loss": 0.0172,
"step": 15300
},
{
"grad_norm": 0.32506078481674194,
"learning_rate": 9.712249985975069e-05,
"loss": 0.0179,
"step": 15310
},
{
"grad_norm": 0.28665006160736084,
"learning_rate": 9.71169689543614e-05,
"loss": 0.0201,
"step": 15320
},
{
"grad_norm": 0.1634398251771927,
"learning_rate": 9.711143289633123e-05,
"loss": 0.0169,
"step": 15330
},
{
"grad_norm": 0.25451749563217163,
"learning_rate": 9.710589168626561e-05,
"loss": 0.0194,
"step": 15340
},
{
"grad_norm": 0.25250038504600525,
"learning_rate": 9.710034532477048e-05,
"loss": 0.0191,
"step": 15350
},
{
"grad_norm": 0.20981234312057495,
"learning_rate": 9.709479381245239e-05,
"loss": 0.0195,
"step": 15360
},
{
"grad_norm": 0.26432904601097107,
"learning_rate": 9.708923714991847e-05,
"loss": 0.0234,
"step": 15370
},
{
"grad_norm": 0.21719463169574738,
"learning_rate": 9.708367533777638e-05,
"loss": 0.0226,
"step": 15380
},
{
"grad_norm": 0.247014582157135,
"learning_rate": 9.707810837663431e-05,
"loss": 0.0195,
"step": 15390
},
{
"grad_norm": 0.22526317834854126,
"learning_rate": 9.707253626710113e-05,
"loss": 0.0221,
"step": 15400
},
{
"grad_norm": 0.2209337204694748,
"learning_rate": 9.706695900978613e-05,
"loss": 0.0197,
"step": 15410
},
{
"grad_norm": 0.24105608463287354,
"learning_rate": 9.706137660529926e-05,
"loss": 0.0196,
"step": 15420
},
{
"grad_norm": 0.25565317273139954,
"learning_rate": 9.705578905425101e-05,
"loss": 0.02,
"step": 15430
},
{
"grad_norm": 0.2159339338541031,
"learning_rate": 9.705019635725241e-05,
"loss": 0.0214,
"step": 15440
},
{
"grad_norm": 0.27560457587242126,
"learning_rate": 9.704459851491508e-05,
"loss": 0.0184,
"step": 15450
},
{
"grad_norm": 0.2387838512659073,
"learning_rate": 9.703899552785118e-05,
"loss": 0.0193,
"step": 15460
},
{
"grad_norm": 0.2243328094482422,
"learning_rate": 9.703338739667346e-05,
"loss": 0.0223,
"step": 15470
},
{
"grad_norm": 0.229559987783432,
"learning_rate": 9.70277741219952e-05,
"loss": 0.0208,
"step": 15480
},
{
"grad_norm": 0.28965842723846436,
"learning_rate": 9.702215570443027e-05,
"loss": 0.0257,
"step": 15490
},
{
"grad_norm": 0.3054785430431366,
"learning_rate": 9.701653214459309e-05,
"loss": 0.0241,
"step": 15500
},
{
"grad_norm": 0.34266725182533264,
"learning_rate": 9.701090344309865e-05,
"loss": 0.0211,
"step": 15510
},
{
"grad_norm": 0.2969988286495209,
"learning_rate": 9.700526960056247e-05,
"loss": 0.0204,
"step": 15520
},
{
"grad_norm": 0.2747593820095062,
"learning_rate": 9.699963061760068e-05,
"loss": 0.0197,
"step": 15530
},
{
"grad_norm": 0.16242137551307678,
"learning_rate": 9.699398649482997e-05,
"loss": 0.0195,
"step": 15540
},
{
"grad_norm": 0.395180881023407,
"learning_rate": 9.698833723286753e-05,
"loss": 0.0205,
"step": 15550
},
{
"grad_norm": 0.22436875104904175,
"learning_rate": 9.698268283233118e-05,
"loss": 0.0187,
"step": 15560
},
{
"grad_norm": 0.23885248601436615,
"learning_rate": 9.697702329383929e-05,
"loss": 0.0182,
"step": 15570
},
{
"grad_norm": 0.24845798313617706,
"learning_rate": 9.697135861801074e-05,
"loss": 0.0158,
"step": 15580
},
{
"grad_norm": 0.23030376434326172,
"learning_rate": 9.696568880546505e-05,
"loss": 0.0191,
"step": 15590
},
{
"grad_norm": 0.2087312787771225,
"learning_rate": 9.696001385682223e-05,
"loss": 0.0203,
"step": 15600
},
{
"grad_norm": 0.20425690710544586,
"learning_rate": 9.695433377270291e-05,
"loss": 0.0181,
"step": 15610
},
{
"grad_norm": 0.2104547619819641,
"learning_rate": 9.694864855372824e-05,
"loss": 0.0166,
"step": 15620
},
{
"grad_norm": 0.2342647910118103,
"learning_rate": 9.694295820051995e-05,
"loss": 0.017,
"step": 15630
},
{
"grad_norm": 0.27400583028793335,
"learning_rate": 9.693726271370032e-05,
"loss": 0.0192,
"step": 15640
},
{
"grad_norm": 0.19918233156204224,
"learning_rate": 9.693156209389221e-05,
"loss": 0.0183,
"step": 15650
},
{
"grad_norm": 0.2711070477962494,
"learning_rate": 9.692585634171905e-05,
"loss": 0.0209,
"step": 15660
},
{
"grad_norm": 0.24387693405151367,
"learning_rate": 9.692014545780476e-05,
"loss": 0.0195,
"step": 15670
},
{
"grad_norm": 0.2516650855541229,
"learning_rate": 9.691442944277393e-05,
"loss": 0.0206,
"step": 15680
},
{
"grad_norm": 0.22077317535877228,
"learning_rate": 9.690870829725162e-05,
"loss": 0.0171,
"step": 15690
},
{
"grad_norm": 0.19970019161701202,
"learning_rate": 9.69029820218635e-05,
"loss": 0.0153,
"step": 15700
},
{
"grad_norm": 0.2740638554096222,
"learning_rate": 9.689725061723579e-05,
"loss": 0.0195,
"step": 15710
},
{
"grad_norm": 0.2535877525806427,
"learning_rate": 9.689151408399527e-05,
"loss": 0.0195,
"step": 15720
},
{
"grad_norm": 0.3257971704006195,
"learning_rate": 9.688577242276924e-05,
"loss": 0.0208,
"step": 15730
},
{
"grad_norm": 0.28868475556373596,
"learning_rate": 9.688002563418566e-05,
"loss": 0.0195,
"step": 15740
},
{
"grad_norm": 0.2290724515914917,
"learning_rate": 9.687427371887293e-05,
"loss": 0.0195,
"step": 15750
},
{
"grad_norm": 0.2544545531272888,
"learning_rate": 9.686851667746012e-05,
"loss": 0.0211,
"step": 15760
},
{
"grad_norm": 0.17678581178188324,
"learning_rate": 9.686275451057677e-05,
"loss": 0.0197,
"step": 15770
},
{
"grad_norm": 0.33382585644721985,
"learning_rate": 9.685698721885308e-05,
"loss": 0.0191,
"step": 15780
},
{
"grad_norm": 0.24783273041248322,
"learning_rate": 9.68512148029197e-05,
"loss": 0.0183,
"step": 15790
},
{
"grad_norm": 0.2595932185649872,
"learning_rate": 9.684543726340791e-05,
"loss": 0.0195,
"step": 15800
},
{
"grad_norm": 0.23103168606758118,
"learning_rate": 9.683965460094952e-05,
"loss": 0.0173,
"step": 15810
},
{
"grad_norm": 0.19797135889530182,
"learning_rate": 9.683386681617694e-05,
"loss": 0.0212,
"step": 15820
},
{
"grad_norm": 0.24339400231838226,
"learning_rate": 9.68280739097231e-05,
"loss": 0.0171,
"step": 15830
},
{
"grad_norm": 0.23345626890659332,
"learning_rate": 9.682227588222148e-05,
"loss": 0.0234,
"step": 15840
},
{
"grad_norm": 0.221258282661438,
"learning_rate": 9.681647273430618e-05,
"loss": 0.0182,
"step": 15850
},
{
"grad_norm": 0.28538239002227783,
"learning_rate": 9.681066446661182e-05,
"loss": 0.0205,
"step": 15860
},
{
"grad_norm": 0.24031752347946167,
"learning_rate": 9.680485107977357e-05,
"loss": 0.0183,
"step": 15870
},
{
"grad_norm": 0.22477011382579803,
"learning_rate": 9.679903257442716e-05,
"loss": 0.0205,
"step": 15880
},
{
"grad_norm": 0.23822762072086334,
"learning_rate": 9.679320895120891e-05,
"loss": 0.0199,
"step": 15890
},
{
"grad_norm": 0.24312853813171387,
"learning_rate": 9.67873802107557e-05,
"loss": 0.0224,
"step": 15900
},
{
"grad_norm": 0.1952960044145584,
"learning_rate": 9.67815463537049e-05,
"loss": 0.0208,
"step": 15910
},
{
"grad_norm": 0.24588929116725922,
"learning_rate": 9.677570738069457e-05,
"loss": 0.0249,
"step": 15920
},
{
"grad_norm": 0.3298582434654236,
"learning_rate": 9.676986329236318e-05,
"loss": 0.0194,
"step": 15930
},
{
"grad_norm": 0.263412743806839,
"learning_rate": 9.676401408934987e-05,
"loss": 0.0193,
"step": 15940
},
{
"grad_norm": 0.21354520320892334,
"learning_rate": 9.675815977229428e-05,
"loss": 0.0223,
"step": 15950
},
{
"grad_norm": 0.22564445436000824,
"learning_rate": 9.675230034183664e-05,
"loss": 0.0194,
"step": 15960
},
{
"grad_norm": 0.216091126203537,
"learning_rate": 9.674643579861773e-05,
"loss": 0.0188,
"step": 15970
},
{
"grad_norm": 0.182041734457016,
"learning_rate": 9.674056614327886e-05,
"loss": 0.0174,
"step": 15980
},
{
"grad_norm": 0.2135583609342575,
"learning_rate": 9.673469137646198e-05,
"loss": 0.0172,
"step": 15990
},
{
"grad_norm": 0.2592829465866089,
"learning_rate": 9.67288114988095e-05,
"loss": 0.0224,
"step": 16000
},
{
"grad_norm": 0.30103909969329834,
"learning_rate": 9.672292651096447e-05,
"loss": 0.02,
"step": 16010
},
{
"grad_norm": 0.18954713642597198,
"learning_rate": 9.671703641357042e-05,
"loss": 0.0212,
"step": 16020
},
{
"grad_norm": 0.2024218887090683,
"learning_rate": 9.67111412072715e-05,
"loss": 0.0186,
"step": 16030
},
{
"grad_norm": 0.23591367900371552,
"learning_rate": 9.670524089271242e-05,
"loss": 0.0175,
"step": 16040
},
{
"grad_norm": 0.2588534951210022,
"learning_rate": 9.669933547053842e-05,
"loss": 0.0189,
"step": 16050
},
{
"grad_norm": 0.28336942195892334,
"learning_rate": 9.669342494139531e-05,
"loss": 0.0188,
"step": 16060
},
{
"grad_norm": 0.21044746041297913,
"learning_rate": 9.668750930592943e-05,
"loss": 0.0169,
"step": 16070
},
{
"grad_norm": 0.2560344934463501,
"learning_rate": 9.668158856478775e-05,
"loss": 0.019,
"step": 16080
},
{
"grad_norm": 0.2520523965358734,
"learning_rate": 9.66756627186177e-05,
"loss": 0.0174,
"step": 16090
},
{
"grad_norm": 0.27171728014945984,
"learning_rate": 9.666973176806737e-05,
"loss": 0.018,
"step": 16100
},
{
"grad_norm": 0.28915536403656006,
"learning_rate": 9.666379571378534e-05,
"loss": 0.0186,
"step": 16110
},
{
"grad_norm": 0.2225058674812317,
"learning_rate": 9.665785455642076e-05,
"loss": 0.0181,
"step": 16120
},
{
"grad_norm": 0.26176416873931885,
"learning_rate": 9.665190829662337e-05,
"loss": 0.0198,
"step": 16130
},
{
"grad_norm": 0.29473400115966797,
"learning_rate": 9.664595693504342e-05,
"loss": 0.0191,
"step": 16140
},
{
"grad_norm": 0.29856547713279724,
"learning_rate": 9.664000047233175e-05,
"loss": 0.0198,
"step": 16150
},
{
"grad_norm": 0.2818152904510498,
"learning_rate": 9.663403890913976e-05,
"loss": 0.0201,
"step": 16160
},
{
"grad_norm": 0.25227588415145874,
"learning_rate": 9.662807224611938e-05,
"loss": 0.0178,
"step": 16170
},
{
"grad_norm": 0.2221623659133911,
"learning_rate": 9.662210048392311e-05,
"loss": 0.0182,
"step": 16180
},
{
"grad_norm": 0.2256096452474594,
"learning_rate": 9.661612362320405e-05,
"loss": 0.0173,
"step": 16190
},
{
"grad_norm": 0.23687534034252167,
"learning_rate": 9.661014166461579e-05,
"loss": 0.0176,
"step": 16200
},
{
"grad_norm": 0.22864700853824615,
"learning_rate": 9.66041546088125e-05,
"loss": 0.0175,
"step": 16210
},
{
"grad_norm": 0.2747984528541565,
"learning_rate": 9.659816245644895e-05,
"loss": 0.0205,
"step": 16220
},
{
"grad_norm": 0.2141098976135254,
"learning_rate": 9.65921652081804e-05,
"loss": 0.0168,
"step": 16230
},
{
"grad_norm": 0.32376036047935486,
"learning_rate": 9.658616286466271e-05,
"loss": 0.0203,
"step": 16240
},
{
"grad_norm": 0.24155114591121674,
"learning_rate": 9.65801554265523e-05,
"loss": 0.0158,
"step": 16250
},
{
"grad_norm": 0.2217189073562622,
"learning_rate": 9.657414289450612e-05,
"loss": 0.0227,
"step": 16260
},
{
"grad_norm": 0.28855669498443604,
"learning_rate": 9.656812526918171e-05,
"loss": 0.0186,
"step": 16270
},
{
"grad_norm": 0.203464537858963,
"learning_rate": 9.656210255123712e-05,
"loss": 0.0219,
"step": 16280
},
{
"grad_norm": 0.18729417026042938,
"learning_rate": 9.6556074741331e-05,
"loss": 0.0162,
"step": 16290
},
{
"grad_norm": 0.26334065198898315,
"learning_rate": 9.655004184012256e-05,
"loss": 0.0197,
"step": 16300
},
{
"grad_norm": 0.2082899808883667,
"learning_rate": 9.654400384827152e-05,
"loss": 0.019,
"step": 16310
},
{
"grad_norm": 0.21090373396873474,
"learning_rate": 9.653796076643818e-05,
"loss": 0.0185,
"step": 16320
},
{
"grad_norm": 0.23750464618206024,
"learning_rate": 9.653191259528344e-05,
"loss": 0.0166,
"step": 16330
},
{
"grad_norm": 0.23250509798526764,
"learning_rate": 9.65258593354687e-05,
"loss": 0.0166,
"step": 16340
},
{
"grad_norm": 0.31120097637176514,
"learning_rate": 9.651980098765591e-05,
"loss": 0.0196,
"step": 16350
},
{
"grad_norm": 0.27687424421310425,
"learning_rate": 9.651373755250765e-05,
"loss": 0.0215,
"step": 16360
},
{
"grad_norm": 0.19686377048492432,
"learning_rate": 9.650766903068697e-05,
"loss": 0.0159,
"step": 16370
},
{
"grad_norm": 0.20663948357105255,
"learning_rate": 9.650159542285753e-05,
"loss": 0.0161,
"step": 16380
},
{
"grad_norm": 0.24702349305152893,
"learning_rate": 9.649551672968353e-05,
"loss": 0.0177,
"step": 16390
},
{
"grad_norm": 0.21502424776554108,
"learning_rate": 9.648943295182973e-05,
"loss": 0.0217,
"step": 16400
},
{
"grad_norm": 0.20918725430965424,
"learning_rate": 9.648334408996144e-05,
"loss": 0.0195,
"step": 16410
},
{
"grad_norm": 0.2542378306388855,
"learning_rate": 9.647725014474452e-05,
"loss": 0.0203,
"step": 16420
},
{
"grad_norm": 0.22581025958061218,
"learning_rate": 9.64711511168454e-05,
"loss": 0.0166,
"step": 16430
},
{
"grad_norm": 0.21214455366134644,
"learning_rate": 9.646504700693108e-05,
"loss": 0.0164,
"step": 16440
},
{
"grad_norm": 0.22136463224887848,
"learning_rate": 9.645893781566907e-05,
"loss": 0.0203,
"step": 16450
},
{
"grad_norm": 0.3346709907054901,
"learning_rate": 9.645282354372744e-05,
"loss": 0.0199,
"step": 16460
},
{
"grad_norm": 0.22477203607559204,
"learning_rate": 9.644670419177491e-05,
"loss": 0.0165,
"step": 16470
},
{
"grad_norm": 0.22344274818897247,
"learning_rate": 9.644057976048062e-05,
"loss": 0.0184,
"step": 16480
},
{
"grad_norm": 0.20499740540981293,
"learning_rate": 9.643445025051435e-05,
"loss": 0.0168,
"step": 16490
},
{
"grad_norm": 0.1834016740322113,
"learning_rate": 9.642831566254641e-05,
"loss": 0.0179,
"step": 16500
},
{
"grad_norm": 0.2168210744857788,
"learning_rate": 9.642217599724769e-05,
"loss": 0.0133,
"step": 16510
},
{
"grad_norm": 0.22100216150283813,
"learning_rate": 9.64160312552896e-05,
"loss": 0.0173,
"step": 16520
},
{
"grad_norm": 0.24819809198379517,
"learning_rate": 9.64098814373441e-05,
"loss": 0.0192,
"step": 16530
},
{
"grad_norm": 0.27600330114364624,
"learning_rate": 9.640372654408374e-05,
"loss": 0.0169,
"step": 16540
},
{
"grad_norm": 0.1754896193742752,
"learning_rate": 9.639756657618162e-05,
"loss": 0.0153,
"step": 16550
},
{
"grad_norm": 0.2038646787405014,
"learning_rate": 9.639140153431138e-05,
"loss": 0.0196,
"step": 16560
},
{
"grad_norm": 0.22871485352516174,
"learning_rate": 9.638523141914721e-05,
"loss": 0.0203,
"step": 16570
},
{
"grad_norm": 0.2602187991142273,
"learning_rate": 9.637905623136388e-05,
"loss": 0.0169,
"step": 16580
},
{
"grad_norm": 0.19301439821720123,
"learning_rate": 9.637287597163669e-05,
"loss": 0.0178,
"step": 16590
},
{
"grad_norm": 0.20549316704273224,
"learning_rate": 9.63666906406415e-05,
"loss": 0.0172,
"step": 16600
},
{
"grad_norm": 0.18847408890724182,
"learning_rate": 9.636050023905473e-05,
"loss": 0.0147,
"step": 16610
},
{
"grad_norm": 0.20408432185649872,
"learning_rate": 9.635430476755336e-05,
"loss": 0.0174,
"step": 16620
},
{
"grad_norm": 0.19497902691364288,
"learning_rate": 9.63481042268149e-05,
"loss": 0.0195,
"step": 16630
},
{
"grad_norm": 0.22574973106384277,
"learning_rate": 9.634189861751745e-05,
"loss": 0.0159,
"step": 16640
},
{
"grad_norm": 0.21444794535636902,
"learning_rate": 9.633568794033967e-05,
"loss": 0.0226,
"step": 16650
},
{
"grad_norm": 0.24873623251914978,
"learning_rate": 9.63294721959607e-05,
"loss": 0.0161,
"step": 16660
},
{
"grad_norm": 0.26096534729003906,
"learning_rate": 9.63232513850603e-05,
"loss": 0.0203,
"step": 16670
},
{
"grad_norm": 0.22449441254138947,
"learning_rate": 9.631702550831878e-05,
"loss": 0.0177,
"step": 16680
},
{
"grad_norm": 0.23263201117515564,
"learning_rate": 9.631079456641698e-05,
"loss": 0.0157,
"step": 16690
},
{
"grad_norm": 0.22929039597511292,
"learning_rate": 9.630455856003632e-05,
"loss": 0.0165,
"step": 16700
},
{
"grad_norm": 0.16631029546260834,
"learning_rate": 9.629831748985876e-05,
"loss": 0.0177,
"step": 16710
},
{
"grad_norm": 0.23983977735042572,
"learning_rate": 9.629207135656679e-05,
"loss": 0.015,
"step": 16720
},
{
"grad_norm": 0.25655442476272583,
"learning_rate": 9.628582016084353e-05,
"loss": 0.0165,
"step": 16730
},
{
"grad_norm": 0.29186007380485535,
"learning_rate": 9.627956390337254e-05,
"loss": 0.0199,
"step": 16740
},
{
"grad_norm": 0.27269017696380615,
"learning_rate": 9.627330258483802e-05,
"loss": 0.0191,
"step": 16750
},
{
"grad_norm": 0.34987184405326843,
"learning_rate": 9.62670362059247e-05,
"loss": 0.0193,
"step": 16760
},
{
"grad_norm": 0.32363274693489075,
"learning_rate": 9.626076476731786e-05,
"loss": 0.0201,
"step": 16770
},
{
"grad_norm": 0.2766304314136505,
"learning_rate": 9.625448826970336e-05,
"loss": 0.0181,
"step": 16780
},
{
"grad_norm": 0.27444687485694885,
"learning_rate": 9.624820671376755e-05,
"loss": 0.0158,
"step": 16790
},
{
"grad_norm": 0.22991421818733215,
"learning_rate": 9.62419201001974e-05,
"loss": 0.0168,
"step": 16800
},
{
"grad_norm": 0.26918646693229675,
"learning_rate": 9.623562842968037e-05,
"loss": 0.0143,
"step": 16810
},
{
"grad_norm": 0.29095256328582764,
"learning_rate": 9.622933170290454e-05,
"loss": 0.0172,
"step": 16820
},
{
"grad_norm": 0.21555963158607483,
"learning_rate": 9.622302992055849e-05,
"loss": 0.0183,
"step": 16830
},
{
"grad_norm": 0.2406519651412964,
"learning_rate": 9.62167230833314e-05,
"loss": 0.0159,
"step": 16840
},
{
"grad_norm": 0.26476937532424927,
"learning_rate": 9.621041119191295e-05,
"loss": 0.0196,
"step": 16850
},
{
"grad_norm": 0.22450637817382812,
"learning_rate": 9.620409424699342e-05,
"loss": 0.0169,
"step": 16860
},
{
"grad_norm": 0.2418346256017685,
"learning_rate": 9.619777224926359e-05,
"loss": 0.0173,
"step": 16870
},
{
"grad_norm": 0.254596084356308,
"learning_rate": 9.619144519941485e-05,
"loss": 0.0167,
"step": 16880
},
{
"grad_norm": 0.22944311797618866,
"learning_rate": 9.618511309813912e-05,
"loss": 0.0179,
"step": 16890
},
{
"grad_norm": 0.2638665735721588,
"learning_rate": 9.617877594612886e-05,
"loss": 0.0193,
"step": 16900
},
{
"grad_norm": 0.24032092094421387,
"learning_rate": 9.617243374407707e-05,
"loss": 0.0174,
"step": 16910
},
{
"grad_norm": 0.25759685039520264,
"learning_rate": 9.616608649267736e-05,
"loss": 0.0202,
"step": 16920
},
{
"grad_norm": 0.24273432791233063,
"learning_rate": 9.615973419262385e-05,
"loss": 0.0165,
"step": 16930
},
{
"grad_norm": 0.24848264455795288,
"learning_rate": 9.615337684461119e-05,
"loss": 0.0176,
"step": 16940
},
{
"grad_norm": 0.26598796248435974,
"learning_rate": 9.614701444933465e-05,
"loss": 0.0202,
"step": 16950
},
{
"grad_norm": 0.2356976568698883,
"learning_rate": 9.614064700748997e-05,
"loss": 0.0213,
"step": 16960
},
{
"grad_norm": 0.2209087312221527,
"learning_rate": 9.613427451977352e-05,
"loss": 0.0215,
"step": 16970
},
{
"grad_norm": 0.2411251813173294,
"learning_rate": 9.612789698688216e-05,
"loss": 0.0175,
"step": 16980
},
{
"grad_norm": 0.20533424615859985,
"learning_rate": 9.612151440951334e-05,
"loss": 0.015,
"step": 16990
},
{
"grad_norm": 0.2369515299797058,
"learning_rate": 9.611512678836506e-05,
"loss": 0.0191,
"step": 17000
},
{
"grad_norm": 0.22637242078781128,
"learning_rate": 9.610873412413584e-05,
"loss": 0.0241,
"step": 17010
},
{
"grad_norm": 0.2567742168903351,
"learning_rate": 9.610233641752476e-05,
"loss": 0.0204,
"step": 17020
},
{
"grad_norm": 0.23093710839748383,
"learning_rate": 9.609593366923151e-05,
"loss": 0.0183,
"step": 17030
},
{
"grad_norm": 0.38678979873657227,
"learning_rate": 9.608952587995625e-05,
"loss": 0.0214,
"step": 17040
},
{
"grad_norm": 0.2566768229007721,
"learning_rate": 9.608311305039972e-05,
"loss": 0.0234,
"step": 17050
},
{
"grad_norm": 0.22188995778560638,
"learning_rate": 9.607669518126326e-05,
"loss": 0.0201,
"step": 17060
},
{
"grad_norm": 0.2851111590862274,
"learning_rate": 9.607027227324866e-05,
"loss": 0.0207,
"step": 17070
},
{
"grad_norm": 0.25374385714530945,
"learning_rate": 9.606384432705837e-05,
"loss": 0.0166,
"step": 17080
},
{
"grad_norm": 0.20746755599975586,
"learning_rate": 9.60574113433953e-05,
"loss": 0.0203,
"step": 17090
},
{
"grad_norm": 0.25531473755836487,
"learning_rate": 9.6050973322963e-05,
"loss": 0.0159,
"step": 17100
},
{
"grad_norm": 0.2727971076965332,
"learning_rate": 9.604453026646547e-05,
"loss": 0.0167,
"step": 17110
},
{
"grad_norm": 0.24038639664649963,
"learning_rate": 9.603808217460735e-05,
"loss": 0.0183,
"step": 17120
},
{
"grad_norm": 0.25764548778533936,
"learning_rate": 9.603162904809377e-05,
"loss": 0.0181,
"step": 17130
},
{
"grad_norm": 0.19836722314357758,
"learning_rate": 9.602517088763045e-05,
"loss": 0.0191,
"step": 17140
},
{
"grad_norm": 0.275120347738266,
"learning_rate": 9.601870769392365e-05,
"loss": 0.0176,
"step": 17150
},
{
"grad_norm": 0.22377800941467285,
"learning_rate": 9.601223946768017e-05,
"loss": 0.0194,
"step": 17160
},
{
"grad_norm": 0.31283488869667053,
"learning_rate": 9.600576620960734e-05,
"loss": 0.0165,
"step": 17170
},
{
"grad_norm": 0.16364149749279022,
"learning_rate": 9.599928792041308e-05,
"loss": 0.0166,
"step": 17180
},
{
"grad_norm": 0.23527780175209045,
"learning_rate": 9.599280460080587e-05,
"loss": 0.0209,
"step": 17190
},
{
"grad_norm": 0.2357402741909027,
"learning_rate": 9.59863162514947e-05,
"loss": 0.0161,
"step": 17200
},
{
"grad_norm": 0.22466665506362915,
"learning_rate": 9.597982287318911e-05,
"loss": 0.0146,
"step": 17210
},
{
"grad_norm": 0.21593722701072693,
"learning_rate": 9.597332446659923e-05,
"loss": 0.0178,
"step": 17220
},
{
"grad_norm": 0.27758243680000305,
"learning_rate": 9.59668210324357e-05,
"loss": 0.0209,
"step": 17230
},
{
"grad_norm": 0.24260929226875305,
"learning_rate": 9.596031257140974e-05,
"loss": 0.0191,
"step": 17240
},
{
"grad_norm": 0.3037336468696594,
"learning_rate": 9.59537990842331e-05,
"loss": 0.0168,
"step": 17250
},
{
"grad_norm": 0.17514759302139282,
"learning_rate": 9.594728057161806e-05,
"loss": 0.0194,
"step": 17260
},
{
"grad_norm": 0.2643806040287018,
"learning_rate": 9.594075703427752e-05,
"loss": 0.0171,
"step": 17270
},
{
"grad_norm": 0.12826664745807648,
"learning_rate": 9.593422847292486e-05,
"loss": 0.0168,
"step": 17280
},
{
"grad_norm": 0.18956172466278076,
"learning_rate": 9.592769488827402e-05,
"loss": 0.0159,
"step": 17290
},
{
"grad_norm": 0.20451869070529938,
"learning_rate": 9.592115628103952e-05,
"loss": 0.0185,
"step": 17300
},
{
"grad_norm": 0.23827993869781494,
"learning_rate": 9.591461265193643e-05,
"loss": 0.0167,
"step": 17310
},
{
"grad_norm": 0.20346641540527344,
"learning_rate": 9.590806400168032e-05,
"loss": 0.018,
"step": 17320
},
{
"grad_norm": 0.20496748387813568,
"learning_rate": 9.590151033098735e-05,
"loss": 0.0199,
"step": 17330
},
{
"grad_norm": 0.24242371320724487,
"learning_rate": 9.589495164057423e-05,
"loss": 0.0171,
"step": 17340
},
{
"grad_norm": 0.24139827489852905,
"learning_rate": 9.58883879311582e-05,
"loss": 0.0193,
"step": 17350
},
{
"grad_norm": 0.20769353210926056,
"learning_rate": 9.588181920345705e-05,
"loss": 0.0172,
"step": 17360
},
{
"grad_norm": 0.19775322079658508,
"learning_rate": 9.587524545818913e-05,
"loss": 0.0153,
"step": 17370
},
{
"grad_norm": 0.2737868130207062,
"learning_rate": 9.586866669607335e-05,
"loss": 0.0168,
"step": 17380
},
{
"grad_norm": 0.19795580208301544,
"learning_rate": 9.586208291782915e-05,
"loss": 0.0178,
"step": 17390
},
{
"grad_norm": 0.2711477279663086,
"learning_rate": 9.58554941241765e-05,
"loss": 0.0201,
"step": 17400
},
{
"grad_norm": 0.24536965787410736,
"learning_rate": 9.584890031583596e-05,
"loss": 0.0183,
"step": 17410
},
{
"grad_norm": 0.34444183111190796,
"learning_rate": 9.584230149352861e-05,
"loss": 0.0177,
"step": 17420
},
{
"grad_norm": 0.27383139729499817,
"learning_rate": 9.58356976579761e-05,
"loss": 0.0176,
"step": 17430
},
{
"grad_norm": 0.2097596675157547,
"learning_rate": 9.58290888099006e-05,
"loss": 0.019,
"step": 17440
},
{
"grad_norm": 0.25391826033592224,
"learning_rate": 9.582247495002486e-05,
"loss": 0.0187,
"step": 17450
},
{
"grad_norm": 0.19036152958869934,
"learning_rate": 9.581585607907214e-05,
"loss": 0.0167,
"step": 17460
},
{
"grad_norm": 0.22091791033744812,
"learning_rate": 9.580923219776628e-05,
"loss": 0.0188,
"step": 17470
},
{
"grad_norm": 0.2458617240190506,
"learning_rate": 9.580260330683167e-05,
"loss": 0.0183,
"step": 17480
},
{
"grad_norm": 0.23816366493701935,
"learning_rate": 9.579596940699322e-05,
"loss": 0.0172,
"step": 17490
},
{
"grad_norm": 0.2567879855632782,
"learning_rate": 9.578933049897643e-05,
"loss": 0.0179,
"step": 17500
},
{
"grad_norm": 0.24667951464653015,
"learning_rate": 9.578268658350728e-05,
"loss": 0.0178,
"step": 17510
},
{
"grad_norm": 0.24973371624946594,
"learning_rate": 9.577603766131235e-05,
"loss": 0.0152,
"step": 17520
},
{
"grad_norm": 0.2715742886066437,
"learning_rate": 9.576938373311878e-05,
"loss": 0.0186,
"step": 17530
},
{
"grad_norm": 0.25942331552505493,
"learning_rate": 9.576272479965421e-05,
"loss": 0.0177,
"step": 17540
},
{
"grad_norm": 0.2839871346950531,
"learning_rate": 9.575606086164687e-05,
"loss": 0.0163,
"step": 17550
},
{
"grad_norm": 0.22360889613628387,
"learning_rate": 9.57493919198255e-05,
"loss": 0.0169,
"step": 17560
},
{
"grad_norm": 0.2493814378976822,
"learning_rate": 9.57427179749194e-05,
"loss": 0.0189,
"step": 17570
},
{
"grad_norm": 0.2246805876493454,
"learning_rate": 9.573603902765846e-05,
"loss": 0.0182,
"step": 17580
},
{
"grad_norm": 0.2053459733724594,
"learning_rate": 9.572935507877304e-05,
"loss": 0.0178,
"step": 17590
},
{
"grad_norm": 0.24530400335788727,
"learning_rate": 9.57226661289941e-05,
"loss": 0.0153,
"step": 17600
},
{
"grad_norm": 0.2642321288585663,
"learning_rate": 9.571597217905315e-05,
"loss": 0.017,
"step": 17610
},
{
"grad_norm": 0.18858280777931213,
"learning_rate": 9.57092732296822e-05,
"loss": 0.0162,
"step": 17620
},
{
"grad_norm": 0.24414698779582977,
"learning_rate": 9.570256928161385e-05,
"loss": 0.0184,
"step": 17630
},
{
"grad_norm": 0.25567445158958435,
"learning_rate": 9.569586033558126e-05,
"loss": 0.018,
"step": 17640
},
{
"grad_norm": 0.23591777682304382,
"learning_rate": 9.568914639231807e-05,
"loss": 0.0141,
"step": 17650
},
{
"grad_norm": 0.2599583864212036,
"learning_rate": 9.568242745255852e-05,
"loss": 0.0154,
"step": 17660
},
{
"grad_norm": 0.17688848078250885,
"learning_rate": 9.567570351703739e-05,
"loss": 0.0181,
"step": 17670
},
{
"grad_norm": 0.15748238563537598,
"learning_rate": 9.566897458649001e-05,
"loss": 0.0213,
"step": 17680
},
{
"grad_norm": 0.26078203320503235,
"learning_rate": 9.566224066165221e-05,
"loss": 0.0164,
"step": 17690
},
{
"grad_norm": 0.2592557966709137,
"learning_rate": 9.565550174326043e-05,
"loss": 0.0187,
"step": 17700
},
{
"grad_norm": 0.1889854073524475,
"learning_rate": 9.564875783205162e-05,
"loss": 0.0163,
"step": 17710
},
{
"grad_norm": 0.1754608303308487,
"learning_rate": 9.564200892876328e-05,
"loss": 0.019,
"step": 17720
},
{
"grad_norm": 0.18894585967063904,
"learning_rate": 9.563525503413348e-05,
"loss": 0.016,
"step": 17730
},
{
"grad_norm": 0.22855456173419952,
"learning_rate": 9.562849614890079e-05,
"loss": 0.0198,
"step": 17740
},
{
"grad_norm": 0.24553902447223663,
"learning_rate": 9.562173227380436e-05,
"loss": 0.0144,
"step": 17750
},
{
"grad_norm": 0.28026923537254333,
"learning_rate": 9.561496340958389e-05,
"loss": 0.0248,
"step": 17760
},
{
"grad_norm": 0.24462267756462097,
"learning_rate": 9.560818955697959e-05,
"loss": 0.0191,
"step": 17770
},
{
"grad_norm": 0.2001575380563736,
"learning_rate": 9.560141071673228e-05,
"loss": 0.017,
"step": 17780
},
{
"grad_norm": 0.22568632662296295,
"learning_rate": 9.559462688958323e-05,
"loss": 0.0188,
"step": 17790
},
{
"grad_norm": 0.26937243342399597,
"learning_rate": 9.558783807627434e-05,
"loss": 0.0172,
"step": 17800
},
{
"grad_norm": 0.27089959383010864,
"learning_rate": 9.558104427754801e-05,
"loss": 0.0209,
"step": 17810
},
{
"grad_norm": 0.2073383927345276,
"learning_rate": 9.557424549414722e-05,
"loss": 0.0177,
"step": 17820
},
{
"grad_norm": 0.21644099056720734,
"learning_rate": 9.556744172681546e-05,
"loss": 0.0152,
"step": 17830
},
{
"grad_norm": 0.21143971383571625,
"learning_rate": 9.556063297629677e-05,
"loss": 0.0176,
"step": 17840
},
{
"grad_norm": 0.20659197866916656,
"learning_rate": 9.555381924333578e-05,
"loss": 0.0164,
"step": 17850
},
{
"grad_norm": 0.24294114112854004,
"learning_rate": 9.554700052867758e-05,
"loss": 0.0173,
"step": 17860
},
{
"grad_norm": 0.27616581320762634,
"learning_rate": 9.554017683306789e-05,
"loss": 0.0211,
"step": 17870
},
{
"grad_norm": 0.224766805768013,
"learning_rate": 9.553334815725294e-05,
"loss": 0.0162,
"step": 17880
},
{
"grad_norm": 0.271637886762619,
"learning_rate": 9.552651450197949e-05,
"loss": 0.0193,
"step": 17890
},
{
"grad_norm": 0.2578735649585724,
"learning_rate": 9.551967586799486e-05,
"loss": 0.017,
"step": 17900
},
{
"grad_norm": 0.1847911775112152,
"learning_rate": 9.551283225604692e-05,
"loss": 0.0198,
"step": 17910
},
{
"grad_norm": 0.23736201226711273,
"learning_rate": 9.550598366688406e-05,
"loss": 0.0198,
"step": 17920
},
{
"grad_norm": 0.22950343787670135,
"learning_rate": 9.549913010125526e-05,
"loss": 0.0164,
"step": 17930
},
{
"grad_norm": 0.24767309427261353,
"learning_rate": 9.549227155990999e-05,
"loss": 0.0176,
"step": 17940
},
{
"grad_norm": 0.18506084382534027,
"learning_rate": 9.548540804359828e-05,
"loss": 0.0154,
"step": 17950
},
{
"grad_norm": 0.2415982335805893,
"learning_rate": 9.547853955307077e-05,
"loss": 0.0206,
"step": 17960
},
{
"grad_norm": 0.24404023587703705,
"learning_rate": 9.547166608907853e-05,
"loss": 0.0178,
"step": 17970
},
{
"grad_norm": 0.2148423045873642,
"learning_rate": 9.546478765237326e-05,
"loss": 0.0161,
"step": 17980
},
{
"grad_norm": 0.18518278002738953,
"learning_rate": 9.545790424370715e-05,
"loss": 0.0181,
"step": 17990
},
{
"grad_norm": 0.2136387825012207,
"learning_rate": 9.5451015863833e-05,
"loss": 0.0161,
"step": 18000
},
{
"grad_norm": 0.27653440833091736,
"learning_rate": 9.544412251350408e-05,
"loss": 0.0197,
"step": 18010
},
{
"grad_norm": 0.27941060066223145,
"learning_rate": 9.543722419347422e-05,
"loss": 0.0159,
"step": 18020
},
{
"grad_norm": 0.24115628004074097,
"learning_rate": 9.543032090449788e-05,
"loss": 0.0149,
"step": 18030
},
{
"grad_norm": 0.1998482495546341,
"learning_rate": 9.542341264732992e-05,
"loss": 0.0161,
"step": 18040
},
{
"grad_norm": 0.20984019339084625,
"learning_rate": 9.541649942272585e-05,
"loss": 0.0174,
"step": 18050
},
{
"grad_norm": 0.25274404883384705,
"learning_rate": 9.54095812314417e-05,
"loss": 0.0203,
"step": 18060
},
{
"grad_norm": 0.253617525100708,
"learning_rate": 9.540265807423401e-05,
"loss": 0.0189,
"step": 18070
},
{
"grad_norm": 0.24331985414028168,
"learning_rate": 9.53957299518599e-05,
"loss": 0.0161,
"step": 18080
},
{
"grad_norm": 0.2861975133419037,
"learning_rate": 9.5388796865077e-05,
"loss": 0.0181,
"step": 18090
},
{
"grad_norm": 0.21068565547466278,
"learning_rate": 9.538185881464353e-05,
"loss": 0.0184,
"step": 18100
},
{
"grad_norm": 0.23860912024974823,
"learning_rate": 9.537491580131821e-05,
"loss": 0.016,
"step": 18110
},
{
"grad_norm": 0.20223286747932434,
"learning_rate": 9.53679678258603e-05,
"loss": 0.0211,
"step": 18120
},
{
"grad_norm": 0.2065197378396988,
"learning_rate": 9.536101488902966e-05,
"loss": 0.0186,
"step": 18130
},
{
"grad_norm": 0.26642438769340515,
"learning_rate": 9.535405699158663e-05,
"loss": 0.0147,
"step": 18140
},
{
"grad_norm": 0.21723781526088715,
"learning_rate": 9.53470941342921e-05,
"loss": 0.0137,
"step": 18150
},
{
"grad_norm": 0.2028735876083374,
"learning_rate": 9.534012631790756e-05,
"loss": 0.0158,
"step": 18160
},
{
"grad_norm": 0.1708681732416153,
"learning_rate": 9.533315354319494e-05,
"loss": 0.0146,
"step": 18170
},
{
"grad_norm": 0.2970765233039856,
"learning_rate": 9.532617581091682e-05,
"loss": 0.0161,
"step": 18180
},
{
"grad_norm": 0.2009182721376419,
"learning_rate": 9.531919312183629e-05,
"loss": 0.0174,
"step": 18190
},
{
"grad_norm": 0.22930586338043213,
"learning_rate": 9.531220547671688e-05,
"loss": 0.018,
"step": 18200
},
{
"grad_norm": 0.220820352435112,
"learning_rate": 9.530521287632285e-05,
"loss": 0.0157,
"step": 18210
},
{
"grad_norm": 0.23223398625850677,
"learning_rate": 9.529821532141884e-05,
"loss": 0.0169,
"step": 18220
},
{
"grad_norm": 0.259183794260025,
"learning_rate": 9.52912128127701e-05,
"loss": 0.0199,
"step": 18230
},
{
"grad_norm": 0.2559663951396942,
"learning_rate": 9.528420535114244e-05,
"loss": 0.0207,
"step": 18240
},
{
"grad_norm": 0.22421777248382568,
"learning_rate": 9.527719293730215e-05,
"loss": 0.0186,
"step": 18250
},
{
"grad_norm": 0.2555665969848633,
"learning_rate": 9.527017557201611e-05,
"loss": 0.0175,
"step": 18260
},
{
"grad_norm": 0.2150653451681137,
"learning_rate": 9.526315325605176e-05,
"loss": 0.0191,
"step": 18270
},
{
"grad_norm": 0.18184694647789001,
"learning_rate": 9.525612599017699e-05,
"loss": 0.0182,
"step": 18280
},
{
"grad_norm": 0.21569839119911194,
"learning_rate": 9.524909377516033e-05,
"loss": 0.0163,
"step": 18290
},
{
"grad_norm": 0.22885504364967346,
"learning_rate": 9.524205661177081e-05,
"loss": 0.0146,
"step": 18300
},
{
"grad_norm": 0.2409166693687439,
"learning_rate": 9.523501450077801e-05,
"loss": 0.0205,
"step": 18310
},
{
"grad_norm": 0.2402997761964798,
"learning_rate": 9.522796744295202e-05,
"loss": 0.0156,
"step": 18320
},
{
"grad_norm": 0.24736376106739044,
"learning_rate": 9.522091543906352e-05,
"loss": 0.0213,
"step": 18330
},
{
"grad_norm": 0.2833060622215271,
"learning_rate": 9.521385848988369e-05,
"loss": 0.0181,
"step": 18340
},
{
"grad_norm": 0.2343091368675232,
"learning_rate": 9.520679659618428e-05,
"loss": 0.0175,
"step": 18350
},
{
"grad_norm": 0.212591752409935,
"learning_rate": 9.519972975873754e-05,
"loss": 0.0168,
"step": 18360
},
{
"grad_norm": 0.23697754740715027,
"learning_rate": 9.519265797831633e-05,
"loss": 0.0162,
"step": 18370
},
{
"grad_norm": 0.21745391190052032,
"learning_rate": 9.518558125569399e-05,
"loss": 0.0169,
"step": 18380
},
{
"grad_norm": 0.2738904058933258,
"learning_rate": 9.517849959164442e-05,
"loss": 0.0149,
"step": 18390
},
{
"grad_norm": 0.22212840616703033,
"learning_rate": 9.517141298694205e-05,
"loss": 0.0158,
"step": 18400
},
{
"grad_norm": 0.2424498349428177,
"learning_rate": 9.516432144236188e-05,
"loss": 0.0171,
"step": 18410
},
{
"grad_norm": 0.2065114676952362,
"learning_rate": 9.515722495867941e-05,
"loss": 0.0163,
"step": 18420
},
{
"grad_norm": 0.2845364809036255,
"learning_rate": 9.515012353667072e-05,
"loss": 0.0187,
"step": 18430
},
{
"grad_norm": 0.23354214429855347,
"learning_rate": 9.51430171771124e-05,
"loss": 0.0195,
"step": 18440
},
{
"grad_norm": 0.261211097240448,
"learning_rate": 9.513590588078159e-05,
"loss": 0.0187,
"step": 18450
},
{
"grad_norm": 0.17390166223049164,
"learning_rate": 9.512878964845597e-05,
"loss": 0.0149,
"step": 18460
},
{
"grad_norm": 0.2599153518676758,
"learning_rate": 9.512166848091377e-05,
"loss": 0.0149,
"step": 18470
},
{
"grad_norm": 0.20262955129146576,
"learning_rate": 9.511454237893376e-05,
"loss": 0.0161,
"step": 18480
},
{
"grad_norm": 0.2117965966463089,
"learning_rate": 9.51074113432952e-05,
"loss": 0.0207,
"step": 18490
},
{
"grad_norm": 0.2957056760787964,
"learning_rate": 9.510027537477797e-05,
"loss": 0.0169,
"step": 18500
},
{
"grad_norm": 0.2731700539588928,
"learning_rate": 9.509313447416242e-05,
"loss": 0.0176,
"step": 18510
},
{
"grad_norm": 0.26647451519966125,
"learning_rate": 9.508598864222949e-05,
"loss": 0.0186,
"step": 18520
},
{
"grad_norm": 0.1898316591978073,
"learning_rate": 9.507883787976062e-05,
"loss": 0.0171,
"step": 18530
},
{
"grad_norm": 0.26751694083213806,
"learning_rate": 9.507168218753781e-05,
"loss": 0.0188,
"step": 18540
},
{
"grad_norm": 0.21939140558242798,
"learning_rate": 9.506452156634362e-05,
"loss": 0.0183,
"step": 18550
},
{
"grad_norm": 0.19924594461917877,
"learning_rate": 9.505735601696109e-05,
"loss": 0.0163,
"step": 18560
},
{
"grad_norm": 0.19633571803569794,
"learning_rate": 9.505018554017385e-05,
"loss": 0.0145,
"step": 18570
},
{
"grad_norm": 0.214951291680336,
"learning_rate": 9.504301013676604e-05,
"loss": 0.0147,
"step": 18580
},
{
"grad_norm": 0.2576771080493927,
"learning_rate": 9.503582980752238e-05,
"loss": 0.0162,
"step": 18590
},
{
"grad_norm": 0.2438470870256424,
"learning_rate": 9.502864455322809e-05,
"loss": 0.0168,
"step": 18600
},
{
"grad_norm": 0.21890531480312347,
"learning_rate": 9.502145437466891e-05,
"loss": 0.0185,
"step": 18610
},
{
"grad_norm": 0.2068784534931183,
"learning_rate": 9.501425927263116e-05,
"loss": 0.0189,
"step": 18620
},
{
"grad_norm": 0.2585833668708801,
"learning_rate": 9.500705924790172e-05,
"loss": 0.0184,
"step": 18630
},
{
"grad_norm": 0.18611527979373932,
"learning_rate": 9.499985430126794e-05,
"loss": 0.0148,
"step": 18640
},
{
"grad_norm": 0.21463438868522644,
"learning_rate": 9.499264443351775e-05,
"loss": 0.0158,
"step": 18650
},
{
"grad_norm": 0.19403278827667236,
"learning_rate": 9.498542964543961e-05,
"loss": 0.0165,
"step": 18660
},
{
"grad_norm": 0.22292093932628632,
"learning_rate": 9.497820993782252e-05,
"loss": 0.0151,
"step": 18670
},
{
"grad_norm": 0.17556989192962646,
"learning_rate": 9.497098531145601e-05,
"loss": 0.0178,
"step": 18680
},
{
"grad_norm": 0.26126596331596375,
"learning_rate": 9.496375576713017e-05,
"loss": 0.0165,
"step": 18690
},
{
"grad_norm": 0.17960461974143982,
"learning_rate": 9.49565213056356e-05,
"loss": 0.0165,
"step": 18700
},
{
"grad_norm": 0.2099495530128479,
"learning_rate": 9.494928192776342e-05,
"loss": 0.0142,
"step": 18710
},
{
"grad_norm": 0.19007565081119537,
"learning_rate": 9.494203763430538e-05,
"loss": 0.0154,
"step": 18720
},
{
"grad_norm": 0.23223096132278442,
"learning_rate": 9.493478842605366e-05,
"loss": 0.0185,
"step": 18730
},
{
"grad_norm": 0.2481299638748169,
"learning_rate": 9.492753430380105e-05,
"loss": 0.016,
"step": 18740
},
{
"grad_norm": 0.2259487509727478,
"learning_rate": 9.492027526834083e-05,
"loss": 0.0161,
"step": 18750
},
{
"grad_norm": 0.17821355164051056,
"learning_rate": 9.491301132046684e-05,
"loss": 0.0129,
"step": 18760
},
{
"grad_norm": 0.18956951797008514,
"learning_rate": 9.490574246097345e-05,
"loss": 0.0143,
"step": 18770
},
{
"grad_norm": 0.1658146232366562,
"learning_rate": 9.48984686906556e-05,
"loss": 0.0161,
"step": 18780
},
{
"grad_norm": 0.1845589280128479,
"learning_rate": 9.489119001030871e-05,
"loss": 0.0144,
"step": 18790
},
{
"grad_norm": 0.22135934233665466,
"learning_rate": 9.488390642072878e-05,
"loss": 0.0166,
"step": 18800
},
{
"grad_norm": 0.23337242007255554,
"learning_rate": 9.48766179227123e-05,
"loss": 0.0157,
"step": 18810
},
{
"grad_norm": 0.24434435367584229,
"learning_rate": 9.486932451705636e-05,
"loss": 0.0209,
"step": 18820
},
{
"grad_norm": 0.21083883941173553,
"learning_rate": 9.486202620455857e-05,
"loss": 0.0167,
"step": 18830
},
{
"grad_norm": 0.26065054535865784,
"learning_rate": 9.485472298601704e-05,
"loss": 0.0165,
"step": 18840
},
{
"grad_norm": 0.2413932830095291,
"learning_rate": 9.484741486223043e-05,
"loss": 0.0166,
"step": 18850
},
{
"grad_norm": 0.21706706285476685,
"learning_rate": 9.484010183399797e-05,
"loss": 0.0148,
"step": 18860
},
{
"grad_norm": 0.19135063886642456,
"learning_rate": 9.483278390211938e-05,
"loss": 0.0148,
"step": 18870
},
{
"grad_norm": 0.19527922570705414,
"learning_rate": 9.482546106739496e-05,
"loss": 0.0176,
"step": 18880
},
{
"grad_norm": 0.24653393030166626,
"learning_rate": 9.48181333306255e-05,
"loss": 0.0193,
"step": 18890
},
{
"grad_norm": 0.26578599214553833,
"learning_rate": 9.481080069261237e-05,
"loss": 0.0156,
"step": 18900
},
{
"grad_norm": 0.2269006073474884,
"learning_rate": 9.480346315415745e-05,
"loss": 0.0173,
"step": 18910
},
{
"grad_norm": 0.20283561944961548,
"learning_rate": 9.479612071606314e-05,
"loss": 0.0167,
"step": 18920
},
{
"grad_norm": 0.21275149285793304,
"learning_rate": 9.478877337913244e-05,
"loss": 0.0185,
"step": 18930
},
{
"grad_norm": 0.23655854165554047,
"learning_rate": 9.478142114416881e-05,
"loss": 0.0166,
"step": 18940
},
{
"grad_norm": 0.27066153287887573,
"learning_rate": 9.47740640119763e-05,
"loss": 0.0173,
"step": 18950
},
{
"grad_norm": 0.22452524304389954,
"learning_rate": 9.476670198335947e-05,
"loss": 0.0188,
"step": 18960
},
{
"grad_norm": 0.21628102660179138,
"learning_rate": 9.47593350591234e-05,
"loss": 0.0164,
"step": 18970
},
{
"grad_norm": 0.22073863446712494,
"learning_rate": 9.475196324007376e-05,
"loss": 0.0164,
"step": 18980
},
{
"grad_norm": 0.1865048110485077,
"learning_rate": 9.474458652701669e-05,
"loss": 0.0176,
"step": 18990
},
{
"grad_norm": 0.2237597554922104,
"learning_rate": 9.473720492075892e-05,
"loss": 0.0184,
"step": 19000
},
{
"grad_norm": 0.21400532126426697,
"learning_rate": 9.472981842210768e-05,
"loss": 0.0159,
"step": 19010
},
{
"grad_norm": 0.1770801693201065,
"learning_rate": 9.472242703187074e-05,
"loss": 0.016,
"step": 19020
},
{
"grad_norm": 0.21562041342258453,
"learning_rate": 9.471503075085643e-05,
"loss": 0.0184,
"step": 19030
},
{
"grad_norm": 0.25867006182670593,
"learning_rate": 9.470762957987359e-05,
"loss": 0.0147,
"step": 19040
},
{
"grad_norm": 0.17780904471874237,
"learning_rate": 9.470022351973158e-05,
"loss": 0.0153,
"step": 19050
},
{
"grad_norm": 0.18168801069259644,
"learning_rate": 9.469281257124034e-05,
"loss": 0.015,
"step": 19060
},
{
"grad_norm": 0.21822762489318848,
"learning_rate": 9.46853967352103e-05,
"loss": 0.0173,
"step": 19070
},
{
"grad_norm": 0.22131718695163727,
"learning_rate": 9.467797601245246e-05,
"loss": 0.0203,
"step": 19080
},
{
"grad_norm": 0.18401233851909637,
"learning_rate": 9.467055040377834e-05,
"loss": 0.0177,
"step": 19090
},
{
"grad_norm": 0.2105451226234436,
"learning_rate": 9.466311990999999e-05,
"loss": 0.0165,
"step": 19100
},
{
"grad_norm": 0.2062365561723709,
"learning_rate": 9.465568453193e-05,
"loss": 0.0165,
"step": 19110
},
{
"grad_norm": 0.3424419164657593,
"learning_rate": 9.464824427038148e-05,
"loss": 0.0185,
"step": 19120
},
{
"grad_norm": 0.20042471587657928,
"learning_rate": 9.46407991261681e-05,
"loss": 0.0159,
"step": 19130
},
{
"grad_norm": 0.21363820135593414,
"learning_rate": 9.463334910010404e-05,
"loss": 0.0155,
"step": 19140
},
{
"grad_norm": 0.1989668905735016,
"learning_rate": 9.462589419300403e-05,
"loss": 0.0168,
"step": 19150
},
{
"grad_norm": 0.18865354359149933,
"learning_rate": 9.461843440568333e-05,
"loss": 0.0147,
"step": 19160
},
{
"grad_norm": 0.21299093961715698,
"learning_rate": 9.461096973895773e-05,
"loss": 0.0158,
"step": 19170
},
{
"grad_norm": 0.2013334184885025,
"learning_rate": 9.460350019364355e-05,
"loss": 0.0196,
"step": 19180
},
{
"grad_norm": 0.2315259426832199,
"learning_rate": 9.459602577055764e-05,
"loss": 0.0178,
"step": 19190
},
{
"grad_norm": 0.19243505597114563,
"learning_rate": 9.45885464705174e-05,
"loss": 0.0163,
"step": 19200
},
{
"grad_norm": 0.19622260332107544,
"learning_rate": 9.458106229434076e-05,
"loss": 0.014,
"step": 19210
},
{
"grad_norm": 0.20563949644565582,
"learning_rate": 9.457357324284617e-05,
"loss": 0.0133,
"step": 19220
},
{
"grad_norm": 0.20626185834407806,
"learning_rate": 9.456607931685262e-05,
"loss": 0.0149,
"step": 19230
},
{
"grad_norm": 0.20093387365341187,
"learning_rate": 9.455858051717965e-05,
"loss": 0.0149,
"step": 19240
},
{
"grad_norm": 0.20239786803722382,
"learning_rate": 9.45510768446473e-05,
"loss": 0.0179,
"step": 19250
},
{
"grad_norm": 0.23519350588321686,
"learning_rate": 9.454356830007618e-05,
"loss": 0.0136,
"step": 19260
},
{
"grad_norm": 0.19403128325939178,
"learning_rate": 9.45360548842874e-05,
"loss": 0.0149,
"step": 19270
},
{
"grad_norm": 0.25772014260292053,
"learning_rate": 9.452853659810261e-05,
"loss": 0.0171,
"step": 19280
},
{
"grad_norm": 0.18387538194656372,
"learning_rate": 9.452101344234401e-05,
"loss": 0.0165,
"step": 19290
},
{
"grad_norm": 0.2201598435640335,
"learning_rate": 9.451348541783431e-05,
"loss": 0.0125,
"step": 19300
},
{
"grad_norm": 0.17011705040931702,
"learning_rate": 9.450595252539678e-05,
"loss": 0.0136,
"step": 19310
},
{
"grad_norm": 0.16841693222522736,
"learning_rate": 9.449841476585518e-05,
"loss": 0.0151,
"step": 19320
},
{
"grad_norm": 0.17615078389644623,
"learning_rate": 9.449087214003384e-05,
"loss": 0.0186,
"step": 19330
},
{
"grad_norm": 0.2136228233575821,
"learning_rate": 9.448332464875765e-05,
"loss": 0.0178,
"step": 19340
},
{
"grad_norm": 0.17352472245693207,
"learning_rate": 9.447577229285192e-05,
"loss": 0.0152,
"step": 19350
},
{
"grad_norm": 0.2533184289932251,
"learning_rate": 9.446821507314261e-05,
"loss": 0.0161,
"step": 19360
},
{
"grad_norm": 0.2478211224079132,
"learning_rate": 9.446065299045617e-05,
"loss": 0.0158,
"step": 19370
},
{
"grad_norm": 0.184827521443367,
"learning_rate": 9.445308604561955e-05,
"loss": 0.016,
"step": 19380
},
{
"grad_norm": 0.19070421159267426,
"learning_rate": 9.444551423946028e-05,
"loss": 0.018,
"step": 19390
},
{
"grad_norm": 0.17417584359645844,
"learning_rate": 9.443793757280638e-05,
"loss": 0.0167,
"step": 19400
},
{
"grad_norm": 0.19516630470752716,
"learning_rate": 9.443035604648646e-05,
"loss": 0.0183,
"step": 19410
},
{
"grad_norm": 0.21007031202316284,
"learning_rate": 9.44227696613296e-05,
"loss": 0.0147,
"step": 19420
},
{
"grad_norm": 0.22228588163852692,
"learning_rate": 9.441517841816542e-05,
"loss": 0.0146,
"step": 19430
},
{
"grad_norm": 0.1996290683746338,
"learning_rate": 9.440758231782413e-05,
"loss": 0.0145,
"step": 19440
},
{
"grad_norm": 0.2389068454504013,
"learning_rate": 9.439998136113639e-05,
"loss": 0.0161,
"step": 19450
},
{
"grad_norm": 0.2581833302974701,
"learning_rate": 9.439237554893344e-05,
"loss": 0.0142,
"step": 19460
},
{
"grad_norm": 0.1934615671634674,
"learning_rate": 9.438476488204705e-05,
"loss": 0.0189,
"step": 19470
},
{
"grad_norm": 0.2351747751235962,
"learning_rate": 9.43771493613095e-05,
"loss": 0.0177,
"step": 19480
},
{
"grad_norm": 0.20744049549102783,
"learning_rate": 9.436952898755362e-05,
"loss": 0.0152,
"step": 19490
},
{
"grad_norm": 0.2298344522714615,
"learning_rate": 9.436190376161276e-05,
"loss": 0.0129,
"step": 19500
},
{
"grad_norm": 0.19509683549404144,
"learning_rate": 9.43542736843208e-05,
"loss": 0.0193,
"step": 19510
},
{
"grad_norm": 0.28534120321273804,
"learning_rate": 9.434663875651216e-05,
"loss": 0.0149,
"step": 19520
},
{
"grad_norm": 0.26728376746177673,
"learning_rate": 9.433899897902177e-05,
"loss": 0.0169,
"step": 19530
},
{
"grad_norm": 0.17947261035442352,
"learning_rate": 9.433135435268511e-05,
"loss": 0.0145,
"step": 19540
},
{
"grad_norm": 0.2602401673793793,
"learning_rate": 9.432370487833819e-05,
"loss": 0.0178,
"step": 19550
},
{
"grad_norm": 0.20871134102344513,
"learning_rate": 9.431605055681756e-05,
"loss": 0.0149,
"step": 19560
},
{
"grad_norm": 0.17870604991912842,
"learning_rate": 9.430839138896026e-05,
"loss": 0.0172,
"step": 19570
},
{
"grad_norm": 0.20555749535560608,
"learning_rate": 9.43007273756039e-05,
"loss": 0.0128,
"step": 19580
},
{
"grad_norm": 0.24009230732917786,
"learning_rate": 9.429305851758658e-05,
"loss": 0.0168,
"step": 19590
},
{
"grad_norm": 0.2157120704650879,
"learning_rate": 9.428538481574699e-05,
"loss": 0.0182,
"step": 19600
},
{
"grad_norm": 0.22232243418693542,
"learning_rate": 9.42777062709243e-05,
"loss": 0.0155,
"step": 19610
},
{
"grad_norm": 0.24278074502944946,
"learning_rate": 9.427002288395821e-05,
"loss": 0.0167,
"step": 19620
},
{
"grad_norm": 0.2559744715690613,
"learning_rate": 9.426233465568898e-05,
"loss": 0.015,
"step": 19630
},
{
"grad_norm": 0.22816607356071472,
"learning_rate": 9.42546415869574e-05,
"loss": 0.0185,
"step": 19640
},
{
"grad_norm": 0.2328685224056244,
"learning_rate": 9.424694367860473e-05,
"loss": 0.0162,
"step": 19650
},
{
"grad_norm": 0.20901796221733093,
"learning_rate": 9.423924093147284e-05,
"loss": 0.0152,
"step": 19660
},
{
"grad_norm": 0.2872743010520935,
"learning_rate": 9.423153334640407e-05,
"loss": 0.016,
"step": 19670
},
{
"grad_norm": 0.17910341918468475,
"learning_rate": 9.42238209242413e-05,
"loss": 0.0178,
"step": 19680
},
{
"grad_norm": 0.20844197273254395,
"learning_rate": 9.421610366582798e-05,
"loss": 0.0147,
"step": 19690
},
{
"grad_norm": 0.2056276798248291,
"learning_rate": 9.420838157200803e-05,
"loss": 0.0141,
"step": 19700
},
{
"grad_norm": 0.2005912810564041,
"learning_rate": 9.420065464362594e-05,
"loss": 0.0167,
"step": 19710
},
{
"grad_norm": 0.1822415292263031,
"learning_rate": 9.419292288152673e-05,
"loss": 0.0158,
"step": 19720
},
{
"grad_norm": 0.19640015065670013,
"learning_rate": 9.418518628655588e-05,
"loss": 0.019,
"step": 19730
},
{
"grad_norm": 0.20280957221984863,
"learning_rate": 9.417744485955951e-05,
"loss": 0.0165,
"step": 19740
},
{
"grad_norm": 0.21713192760944366,
"learning_rate": 9.41696986013842e-05,
"loss": 0.0148,
"step": 19750
},
{
"grad_norm": 0.1741454154253006,
"learning_rate": 9.416194751287705e-05,
"loss": 0.0139,
"step": 19760
},
{
"grad_norm": 0.22186775505542755,
"learning_rate": 9.415419159488572e-05,
"loss": 0.0176,
"step": 19770
},
{
"grad_norm": 0.2876559793949127,
"learning_rate": 9.414643084825837e-05,
"loss": 0.0153,
"step": 19780
},
{
"grad_norm": 0.17327694594860077,
"learning_rate": 9.413866527384372e-05,
"loss": 0.0155,
"step": 19790
},
{
"grad_norm": 0.20157267153263092,
"learning_rate": 9.4130894872491e-05,
"loss": 0.0129,
"step": 19800
},
{
"grad_norm": 0.22156211733818054,
"learning_rate": 9.412311964504998e-05,
"loss": 0.0155,
"step": 19810
},
{
"grad_norm": 0.23381781578063965,
"learning_rate": 9.411533959237091e-05,
"loss": 0.0162,
"step": 19820
},
{
"grad_norm": 0.23688384890556335,
"learning_rate": 9.410755471530464e-05,
"loss": 0.0175,
"step": 19830
},
{
"grad_norm": 0.15220996737480164,
"learning_rate": 9.40997650147025e-05,
"loss": 0.0164,
"step": 19840
},
{
"grad_norm": 0.1738215535879135,
"learning_rate": 9.409197049141637e-05,
"loss": 0.0143,
"step": 19850
},
{
"grad_norm": 0.16357380151748657,
"learning_rate": 9.408417114629863e-05,
"loss": 0.0173,
"step": 19860
},
{
"grad_norm": 0.17979925870895386,
"learning_rate": 9.40763669802022e-05,
"loss": 0.0184,
"step": 19870
},
{
"grad_norm": 0.19944514334201813,
"learning_rate": 9.406855799398056e-05,
"loss": 0.0182,
"step": 19880
},
{
"grad_norm": 0.2147817760705948,
"learning_rate": 9.406074418848767e-05,
"loss": 0.017,
"step": 19890
},
{
"grad_norm": 0.21749483048915863,
"learning_rate": 9.405292556457805e-05,
"loss": 0.018,
"step": 19900
},
{
"grad_norm": 0.26651832461357117,
"learning_rate": 9.404510212310671e-05,
"loss": 0.0201,
"step": 19910
},
{
"grad_norm": 0.1734253466129303,
"learning_rate": 9.403727386492924e-05,
"loss": 0.0163,
"step": 19920
},
{
"grad_norm": 0.22233816981315613,
"learning_rate": 9.40294407909017e-05,
"loss": 0.0161,
"step": 19930
},
{
"grad_norm": 0.17614853382110596,
"learning_rate": 9.40216029018807e-05,
"loss": 0.0161,
"step": 19940
},
{
"grad_norm": 0.20520544052124023,
"learning_rate": 9.401376019872338e-05,
"loss": 0.0134,
"step": 19950
},
{
"grad_norm": 0.22282147407531738,
"learning_rate": 9.400591268228746e-05,
"loss": 0.0168,
"step": 19960
},
{
"grad_norm": 0.22932884097099304,
"learning_rate": 9.399806035343106e-05,
"loss": 0.0194,
"step": 19970
},
{
"grad_norm": 0.25537630915641785,
"learning_rate": 9.399020321301294e-05,
"loss": 0.0155,
"step": 19980
},
{
"grad_norm": 0.16550827026367188,
"learning_rate": 9.398234126189234e-05,
"loss": 0.0154,
"step": 19990
},
{
"grad_norm": 0.21164970099925995,
"learning_rate": 9.397447450092902e-05,
"loss": 0.0173,
"step": 20000
},
{
"grad_norm": 0.24687914550304413,
"learning_rate": 9.39666029309833e-05,
"loss": 0.0201,
"step": 20010
},
{
"grad_norm": 0.2638387382030487,
"learning_rate": 9.395872655291596e-05,
"loss": 0.0164,
"step": 20020
},
{
"grad_norm": 0.21427874267101288,
"learning_rate": 9.395084536758838e-05,
"loss": 0.0159,
"step": 20030
},
{
"grad_norm": 0.2500334680080414,
"learning_rate": 9.394295937586243e-05,
"loss": 0.0156,
"step": 20040
},
{
"grad_norm": 0.3239850103855133,
"learning_rate": 9.393506857860052e-05,
"loss": 0.0148,
"step": 20050
},
{
"grad_norm": 0.17661415040493011,
"learning_rate": 9.392717297666555e-05,
"loss": 0.0161,
"step": 20060
},
{
"grad_norm": 0.19975602626800537,
"learning_rate": 9.391927257092101e-05,
"loss": 0.0179,
"step": 20070
},
{
"grad_norm": 0.23306478559970856,
"learning_rate": 9.391136736223085e-05,
"loss": 0.0145,
"step": 20080
},
{
"grad_norm": 0.2603534162044525,
"learning_rate": 9.390345735145956e-05,
"loss": 0.0186,
"step": 20090
},
{
"grad_norm": 0.3492478132247925,
"learning_rate": 9.389554253947219e-05,
"loss": 0.0209,
"step": 20100
},
{
"grad_norm": 0.2827115058898926,
"learning_rate": 9.388762292713428e-05,
"loss": 0.0203,
"step": 20110
},
{
"grad_norm": 0.2664572298526764,
"learning_rate": 9.38796985153119e-05,
"loss": 0.019,
"step": 20120
},
{
"grad_norm": 0.25753986835479736,
"learning_rate": 9.387176930487169e-05,
"loss": 0.018,
"step": 20130
},
{
"grad_norm": 0.17414173483848572,
"learning_rate": 9.386383529668072e-05,
"loss": 0.016,
"step": 20140
},
{
"grad_norm": 0.202108234167099,
"learning_rate": 9.385589649160669e-05,
"loss": 0.0186,
"step": 20150
},
{
"grad_norm": 0.23871807754039764,
"learning_rate": 9.384795289051775e-05,
"loss": 0.0151,
"step": 20160
},
{
"grad_norm": 0.22860831022262573,
"learning_rate": 9.384000449428261e-05,
"loss": 0.0141,
"step": 20170
},
{
"grad_norm": 0.1896970570087433,
"learning_rate": 9.383205130377048e-05,
"loss": 0.0151,
"step": 20180
},
{
"grad_norm": 0.20248743891716003,
"learning_rate": 9.382409331985114e-05,
"loss": 0.0172,
"step": 20190
},
{
"grad_norm": 0.18169482052326202,
"learning_rate": 9.381613054339482e-05,
"loss": 0.0146,
"step": 20200
},
{
"grad_norm": 0.23816770315170288,
"learning_rate": 9.380816297527235e-05,
"loss": 0.0159,
"step": 20210
},
{
"grad_norm": 0.23363809287548065,
"learning_rate": 9.380019061635506e-05,
"loss": 0.019,
"step": 20220
},
{
"grad_norm": 0.2147899717092514,
"learning_rate": 9.379221346751474e-05,
"loss": 0.0136,
"step": 20230
},
{
"grad_norm": 0.2629246413707733,
"learning_rate": 9.378423152962382e-05,
"loss": 0.0153,
"step": 20240
},
{
"grad_norm": 0.24167490005493164,
"learning_rate": 9.377624480355517e-05,
"loss": 0.0184,
"step": 20250
},
{
"grad_norm": 0.19889342784881592,
"learning_rate": 9.376825329018219e-05,
"loss": 0.019,
"step": 20260
},
{
"grad_norm": 0.2099040448665619,
"learning_rate": 9.376025699037884e-05,
"loss": 0.0143,
"step": 20270
},
{
"grad_norm": 0.25897860527038574,
"learning_rate": 9.37522559050196e-05,
"loss": 0.0153,
"step": 20280
},
{
"grad_norm": 0.1966579705476761,
"learning_rate": 9.37442500349794e-05,
"loss": 0.0164,
"step": 20290
},
{
"grad_norm": 0.26289817690849304,
"learning_rate": 9.373623938113381e-05,
"loss": 0.0154,
"step": 20300
},
{
"grad_norm": 0.19626647233963013,
"learning_rate": 9.372822394435883e-05,
"loss": 0.02,
"step": 20310
},
{
"grad_norm": 0.1939084380865097,
"learning_rate": 9.372020372553102e-05,
"loss": 0.016,
"step": 20320
},
{
"grad_norm": 0.1999606341123581,
"learning_rate": 9.371217872552746e-05,
"loss": 0.0154,
"step": 20330
},
{
"grad_norm": 0.19327686727046967,
"learning_rate": 9.370414894522576e-05,
"loss": 0.0153,
"step": 20340
},
{
"grad_norm": 0.22085945308208466,
"learning_rate": 9.369611438550406e-05,
"loss": 0.0177,
"step": 20350
},
{
"grad_norm": 0.26224273443222046,
"learning_rate": 9.368807504724095e-05,
"loss": 0.016,
"step": 20360
},
{
"grad_norm": 0.19646985828876495,
"learning_rate": 9.368003093131565e-05,
"loss": 0.0154,
"step": 20370
},
{
"grad_norm": 0.2008865922689438,
"learning_rate": 9.367198203860785e-05,
"loss": 0.018,
"step": 20380
},
{
"grad_norm": 0.19568470120429993,
"learning_rate": 9.366392836999774e-05,
"loss": 0.0181,
"step": 20390
},
{
"grad_norm": 0.18460358679294586,
"learning_rate": 9.365586992636607e-05,
"loss": 0.0168,
"step": 20400
},
{
"grad_norm": 0.25228118896484375,
"learning_rate": 9.364780670859412e-05,
"loss": 0.0164,
"step": 20410
},
{
"grad_norm": 0.23357588052749634,
"learning_rate": 9.363973871756364e-05,
"loss": 0.0129,
"step": 20420
},
{
"grad_norm": 0.2548837661743164,
"learning_rate": 9.363166595415696e-05,
"loss": 0.0121,
"step": 20430
},
{
"grad_norm": 0.16061502695083618,
"learning_rate": 9.362358841925686e-05,
"loss": 0.0157,
"step": 20440
},
{
"grad_norm": 0.2448650449514389,
"learning_rate": 9.361550611374674e-05,
"loss": 0.0148,
"step": 20450
},
{
"grad_norm": 0.21893996000289917,
"learning_rate": 9.360741903851043e-05,
"loss": 0.0165,
"step": 20460
},
{
"grad_norm": 0.22877977788448334,
"learning_rate": 9.359932719443236e-05,
"loss": 0.0164,
"step": 20470
},
{
"grad_norm": 0.22097840905189514,
"learning_rate": 9.35912305823974e-05,
"loss": 0.0175,
"step": 20480
},
{
"grad_norm": 0.2514427602291107,
"learning_rate": 9.358312920329101e-05,
"loss": 0.0193,
"step": 20490
},
{
"grad_norm": 0.17240166664123535,
"learning_rate": 9.357502305799914e-05,
"loss": 0.0176,
"step": 20500
},
{
"grad_norm": 0.21320685744285583,
"learning_rate": 9.356691214740824e-05,
"loss": 0.0142,
"step": 20510
},
{
"grad_norm": 0.21088501811027527,
"learning_rate": 9.355879647240535e-05,
"loss": 0.0198,
"step": 20520
},
{
"grad_norm": 0.21060901880264282,
"learning_rate": 9.355067603387798e-05,
"loss": 0.0175,
"step": 20530
},
{
"grad_norm": 0.19547854363918304,
"learning_rate": 9.354255083271412e-05,
"loss": 0.0131,
"step": 20540
},
{
"grad_norm": 0.17447546124458313,
"learning_rate": 9.353442086980239e-05,
"loss": 0.016,
"step": 20550
},
{
"grad_norm": 0.2414809912443161,
"learning_rate": 9.352628614603185e-05,
"loss": 0.0183,
"step": 20560
},
{
"grad_norm": 0.19553828239440918,
"learning_rate": 9.351814666229209e-05,
"loss": 0.0157,
"step": 20570
},
{
"grad_norm": 0.21528875827789307,
"learning_rate": 9.351000241947324e-05,
"loss": 0.0174,
"step": 20580
},
{
"grad_norm": 0.21160905063152313,
"learning_rate": 9.350185341846594e-05,
"loss": 0.0159,
"step": 20590
},
{
"grad_norm": 0.2174428254365921,
"learning_rate": 9.349369966016134e-05,
"loss": 0.014,
"step": 20600
},
{
"grad_norm": 0.192820206284523,
"learning_rate": 9.348554114545117e-05,
"loss": 0.015,
"step": 20610
},
{
"grad_norm": 0.2764177620410919,
"learning_rate": 9.347737787522758e-05,
"loss": 0.0136,
"step": 20620
},
{
"grad_norm": 0.20158930122852325,
"learning_rate": 9.346920985038332e-05,
"loss": 0.0162,
"step": 20630
},
{
"grad_norm": 0.1876758635044098,
"learning_rate": 9.346103707181162e-05,
"loss": 0.0164,
"step": 20640
},
{
"grad_norm": 0.1939225047826767,
"learning_rate": 9.345285954040626e-05,
"loss": 0.0176,
"step": 20650
},
{
"grad_norm": 0.20861920714378357,
"learning_rate": 9.34446772570615e-05,
"loss": 0.0179,
"step": 20660
},
{
"grad_norm": 0.23533208668231964,
"learning_rate": 9.343649022267214e-05,
"loss": 0.0174,
"step": 20670
},
{
"grad_norm": 0.24126026034355164,
"learning_rate": 9.342829843813353e-05,
"loss": 0.0163,
"step": 20680
},
{
"grad_norm": 0.21258068084716797,
"learning_rate": 9.342010190434149e-05,
"loss": 0.0159,
"step": 20690
},
{
"grad_norm": 0.20059002935886383,
"learning_rate": 9.34119006221924e-05,
"loss": 0.0173,
"step": 20700
},
{
"grad_norm": 0.23242899775505066,
"learning_rate": 9.340369459258313e-05,
"loss": 0.0161,
"step": 20710
},
{
"grad_norm": 0.2322058081626892,
"learning_rate": 9.339548381641106e-05,
"loss": 0.0163,
"step": 20720
},
{
"grad_norm": 0.21510280668735504,
"learning_rate": 9.338726829457413e-05,
"loss": 0.0174,
"step": 20730
},
{
"grad_norm": 0.24406039714813232,
"learning_rate": 9.337904802797078e-05,
"loss": 0.0188,
"step": 20740
},
{
"grad_norm": 0.21909946203231812,
"learning_rate": 9.337082301749993e-05,
"loss": 0.0155,
"step": 20750
},
{
"grad_norm": 0.17972180247306824,
"learning_rate": 9.336259326406109e-05,
"loss": 0.0171,
"step": 20760
},
{
"grad_norm": 0.2577114999294281,
"learning_rate": 9.335435876855427e-05,
"loss": 0.0193,
"step": 20770
},
{
"grad_norm": 0.1579129993915558,
"learning_rate": 9.334611953187994e-05,
"loss": 0.0139,
"step": 20780
},
{
"grad_norm": 0.25546857714653015,
"learning_rate": 9.333787555493914e-05,
"loss": 0.0153,
"step": 20790
},
{
"grad_norm": 0.2245105355978012,
"learning_rate": 9.332962683863345e-05,
"loss": 0.0156,
"step": 20800
},
{
"grad_norm": 0.28271248936653137,
"learning_rate": 9.332137338386489e-05,
"loss": 0.0147,
"step": 20810
},
{
"grad_norm": 0.19915854930877686,
"learning_rate": 9.33131151915361e-05,
"loss": 0.0153,
"step": 20820
},
{
"grad_norm": 0.18703337013721466,
"learning_rate": 9.330485226255012e-05,
"loss": 0.017,
"step": 20830
},
{
"grad_norm": 0.14349418878555298,
"learning_rate": 9.329658459781061e-05,
"loss": 0.014,
"step": 20840
},
{
"grad_norm": 0.2399742752313614,
"learning_rate": 9.328831219822172e-05,
"loss": 0.0151,
"step": 20850
},
{
"grad_norm": 0.20987749099731445,
"learning_rate": 9.328003506468808e-05,
"loss": 0.016,
"step": 20860
},
{
"grad_norm": 0.22967706620693207,
"learning_rate": 9.327175319811488e-05,
"loss": 0.0141,
"step": 20870
},
{
"grad_norm": 0.22494818270206451,
"learning_rate": 9.326346659940781e-05,
"loss": 0.0161,
"step": 20880
},
{
"grad_norm": 0.23504319787025452,
"learning_rate": 9.325517526947308e-05,
"loss": 0.0144,
"step": 20890
},
{
"grad_norm": 0.21370932459831238,
"learning_rate": 9.32468792092174e-05,
"loss": 0.0129,
"step": 20900
},
{
"grad_norm": 0.19011889398097992,
"learning_rate": 9.323857841954803e-05,
"loss": 0.0159,
"step": 20910
},
{
"grad_norm": 0.18483437597751617,
"learning_rate": 9.323027290137276e-05,
"loss": 0.0149,
"step": 20920
},
{
"grad_norm": 0.18339982628822327,
"learning_rate": 9.322196265559981e-05,
"loss": 0.0145,
"step": 20930
},
{
"grad_norm": 0.24630165100097656,
"learning_rate": 9.321364768313803e-05,
"loss": 0.0183,
"step": 20940
},
{
"grad_norm": 0.2203260064125061,
"learning_rate": 9.32053279848967e-05,
"loss": 0.0137,
"step": 20950
},
{
"grad_norm": 0.25817951560020447,
"learning_rate": 9.319700356178567e-05,
"loss": 0.0144,
"step": 20960
},
{
"grad_norm": 0.20414921641349792,
"learning_rate": 9.318867441471527e-05,
"loss": 0.0149,
"step": 20970
},
{
"grad_norm": 0.16952864825725555,
"learning_rate": 9.318034054459637e-05,
"loss": 0.0161,
"step": 20980
},
{
"grad_norm": 0.21634019911289215,
"learning_rate": 9.317200195234034e-05,
"loss": 0.0145,
"step": 20990
},
{
"grad_norm": 0.23945434391498566,
"learning_rate": 9.316365863885909e-05,
"loss": 0.0192,
"step": 21000
},
{
"grad_norm": 0.1654985100030899,
"learning_rate": 9.315531060506502e-05,
"loss": 0.0157,
"step": 21010
},
{
"grad_norm": 0.19484049081802368,
"learning_rate": 9.314695785187108e-05,
"loss": 0.0176,
"step": 21020
},
{
"grad_norm": 0.21691042184829712,
"learning_rate": 9.313860038019069e-05,
"loss": 0.0153,
"step": 21030
},
{
"grad_norm": 0.26378920674324036,
"learning_rate": 9.313023819093782e-05,
"loss": 0.0178,
"step": 21040
},
{
"grad_norm": 0.28963178396224976,
"learning_rate": 9.312187128502695e-05,
"loss": 0.0203,
"step": 21050
},
{
"grad_norm": 0.21935831010341644,
"learning_rate": 9.311349966337307e-05,
"loss": 0.0144,
"step": 21060
},
{
"grad_norm": 0.17089690268039703,
"learning_rate": 9.310512332689169e-05,
"loss": 0.0189,
"step": 21070
},
{
"grad_norm": 0.18906037509441376,
"learning_rate": 9.309674227649883e-05,
"loss": 0.0159,
"step": 21080
},
{
"grad_norm": 0.19601677358150482,
"learning_rate": 9.308835651311103e-05,
"loss": 0.0158,
"step": 21090
},
{
"grad_norm": 0.21335552632808685,
"learning_rate": 9.307996603764533e-05,
"loss": 0.0148,
"step": 21100
},
{
"grad_norm": 0.24205279350280762,
"learning_rate": 9.307157085101932e-05,
"loss": 0.0179,
"step": 21110
},
{
"grad_norm": 0.19505095481872559,
"learning_rate": 9.306317095415109e-05,
"loss": 0.0148,
"step": 21120
},
{
"grad_norm": 0.3368663787841797,
"learning_rate": 9.305476634795922e-05,
"loss": 0.0172,
"step": 21130
},
{
"grad_norm": 0.24451889097690582,
"learning_rate": 9.304635703336284e-05,
"loss": 0.0177,
"step": 21140
},
{
"grad_norm": 0.2271011620759964,
"learning_rate": 9.303794301128157e-05,
"loss": 0.0172,
"step": 21150
},
{
"grad_norm": 0.21570229530334473,
"learning_rate": 9.302952428263555e-05,
"loss": 0.016,
"step": 21160
},
{
"grad_norm": 0.2091713696718216,
"learning_rate": 9.302110084834545e-05,
"loss": 0.0152,
"step": 21170
},
{
"grad_norm": 0.2397819459438324,
"learning_rate": 9.301267270933245e-05,
"loss": 0.0167,
"step": 21180
},
{
"grad_norm": 0.23981717228889465,
"learning_rate": 9.300423986651823e-05,
"loss": 0.019,
"step": 21190
},
{
"grad_norm": 0.23592530190944672,
"learning_rate": 9.299580232082501e-05,
"loss": 0.0167,
"step": 21200
},
{
"grad_norm": 0.24473746120929718,
"learning_rate": 9.298736007317547e-05,
"loss": 0.0143,
"step": 21210
},
{
"grad_norm": 0.2057255357503891,
"learning_rate": 9.297891312449288e-05,
"loss": 0.0167,
"step": 21220
},
{
"grad_norm": 0.22062143683433533,
"learning_rate": 9.297046147570094e-05,
"loss": 0.0169,
"step": 21230
},
{
"grad_norm": 0.1654421091079712,
"learning_rate": 9.296200512772396e-05,
"loss": 0.0155,
"step": 21240
},
{
"grad_norm": 0.2394648939371109,
"learning_rate": 9.295354408148668e-05,
"loss": 0.014,
"step": 21250
},
{
"grad_norm": 0.20795094966888428,
"learning_rate": 9.294507833791441e-05,
"loss": 0.0152,
"step": 21260
},
{
"grad_norm": 0.2580958604812622,
"learning_rate": 9.293660789793295e-05,
"loss": 0.0172,
"step": 21270
},
{
"grad_norm": 0.18256065249443054,
"learning_rate": 9.292813276246858e-05,
"loss": 0.0161,
"step": 21280
},
{
"grad_norm": 0.19090405106544495,
"learning_rate": 9.291965293244816e-05,
"loss": 0.0166,
"step": 21290
},
{
"grad_norm": 0.26941341161727905,
"learning_rate": 9.291116840879904e-05,
"loss": 0.0145,
"step": 21300
},
{
"grad_norm": 0.18729177117347717,
"learning_rate": 9.290267919244904e-05,
"loss": 0.0184,
"step": 21310
},
{
"grad_norm": 0.15456588566303253,
"learning_rate": 9.289418528432655e-05,
"loss": 0.0165,
"step": 21320
},
{
"grad_norm": 0.20851314067840576,
"learning_rate": 9.288568668536045e-05,
"loss": 0.0123,
"step": 21330
},
{
"grad_norm": 0.2293507605791092,
"learning_rate": 9.287718339648013e-05,
"loss": 0.0162,
"step": 21340
},
{
"grad_norm": 0.2490718513727188,
"learning_rate": 9.28686754186155e-05,
"loss": 0.0163,
"step": 21350
},
{
"grad_norm": 0.1911923587322235,
"learning_rate": 9.286016275269698e-05,
"loss": 0.015,
"step": 21360
},
{
"grad_norm": 0.188841313123703,
"learning_rate": 9.285164539965551e-05,
"loss": 0.0139,
"step": 21370
},
{
"grad_norm": 0.26565125584602356,
"learning_rate": 9.284312336042251e-05,
"loss": 0.0132,
"step": 21380
},
{
"grad_norm": 0.2394629269838333,
"learning_rate": 9.283459663592996e-05,
"loss": 0.0168,
"step": 21390
},
{
"grad_norm": 0.19435308873653412,
"learning_rate": 9.282606522711033e-05,
"loss": 0.0146,
"step": 21400
},
{
"grad_norm": 0.19722171127796173,
"learning_rate": 9.281752913489657e-05,
"loss": 0.0147,
"step": 21410
},
{
"grad_norm": 0.16051748394966125,
"learning_rate": 9.280898836022222e-05,
"loss": 0.0151,
"step": 21420
},
{
"grad_norm": 0.22683489322662354,
"learning_rate": 9.280044290402126e-05,
"loss": 0.0157,
"step": 21430
},
{
"grad_norm": 0.20663219690322876,
"learning_rate": 9.279189276722821e-05,
"loss": 0.0171,
"step": 21440
},
{
"grad_norm": 0.2679346203804016,
"learning_rate": 9.278333795077812e-05,
"loss": 0.0195,
"step": 21450
},
{
"grad_norm": 0.21232666075229645,
"learning_rate": 9.27747784556065e-05,
"loss": 0.0136,
"step": 21460
},
{
"grad_norm": 0.25075575709342957,
"learning_rate": 9.276621428264942e-05,
"loss": 0.0127,
"step": 21470
},
{
"grad_norm": 0.21216872334480286,
"learning_rate": 9.275764543284345e-05,
"loss": 0.0142,
"step": 21480
},
{
"grad_norm": 0.2122514694929123,
"learning_rate": 9.274907190712566e-05,
"loss": 0.0154,
"step": 21490
},
{
"grad_norm": 0.19179250299930573,
"learning_rate": 9.274049370643363e-05,
"loss": 0.0169,
"step": 21500
},
{
"grad_norm": 0.21835725009441376,
"learning_rate": 9.273191083170547e-05,
"loss": 0.0127,
"step": 21510
},
{
"grad_norm": 0.2395332008600235,
"learning_rate": 9.27233232838798e-05,
"loss": 0.0161,
"step": 21520
},
{
"grad_norm": 0.22382347285747528,
"learning_rate": 9.27147310638957e-05,
"loss": 0.0155,
"step": 21530
},
{
"grad_norm": 0.21594788134098053,
"learning_rate": 9.270613417269286e-05,
"loss": 0.0169,
"step": 21540
},
{
"grad_norm": 0.2419319599866867,
"learning_rate": 9.269753261121138e-05,
"loss": 0.0154,
"step": 21550
},
{
"grad_norm": 0.2286730855703354,
"learning_rate": 9.268892638039194e-05,
"loss": 0.0159,
"step": 21560
},
{
"grad_norm": 0.1851305514574051,
"learning_rate": 9.268031548117569e-05,
"loss": 0.0144,
"step": 21570
},
{
"grad_norm": 0.16771528124809265,
"learning_rate": 9.26716999145043e-05,
"loss": 0.0185,
"step": 21580
},
{
"grad_norm": 0.24980562925338745,
"learning_rate": 9.266307968131998e-05,
"loss": 0.0168,
"step": 21590
},
{
"grad_norm": 0.2545185089111328,
"learning_rate": 9.26544547825654e-05,
"loss": 0.0174,
"step": 21600
},
{
"grad_norm": 0.20524127781391144,
"learning_rate": 9.264582521918376e-05,
"loss": 0.0157,
"step": 21610
},
{
"grad_norm": 0.18211904168128967,
"learning_rate": 9.263719099211881e-05,
"loss": 0.0166,
"step": 21620
},
{
"grad_norm": 0.15426014363765717,
"learning_rate": 9.262855210231476e-05,
"loss": 0.0166,
"step": 21630
},
{
"grad_norm": 0.15671566128730774,
"learning_rate": 9.261990855071633e-05,
"loss": 0.0161,
"step": 21640
},
{
"grad_norm": 0.2167554795742035,
"learning_rate": 9.261126033826878e-05,
"loss": 0.0141,
"step": 21650
},
{
"grad_norm": 0.19651272892951965,
"learning_rate": 9.260260746591786e-05,
"loss": 0.0159,
"step": 21660
},
{
"grad_norm": 0.21581247448921204,
"learning_rate": 9.259394993460985e-05,
"loss": 0.0159,
"step": 21670
},
{
"grad_norm": 0.22810497879981995,
"learning_rate": 9.258528774529151e-05,
"loss": 0.0161,
"step": 21680
},
{
"grad_norm": 0.22470343112945557,
"learning_rate": 9.257662089891013e-05,
"loss": 0.0166,
"step": 21690
},
{
"grad_norm": 0.19753903150558472,
"learning_rate": 9.25679493964135e-05,
"loss": 0.016,
"step": 21700
},
{
"grad_norm": 0.20362699031829834,
"learning_rate": 9.255927323874994e-05,
"loss": 0.016,
"step": 21710
},
{
"grad_norm": 0.24484552443027496,
"learning_rate": 9.255059242686822e-05,
"loss": 0.0218,
"step": 21720
},
{
"grad_norm": 0.28058183193206787,
"learning_rate": 9.254190696171769e-05,
"loss": 0.0186,
"step": 21730
},
{
"grad_norm": 0.2401166409254074,
"learning_rate": 9.25332168442482e-05,
"loss": 0.0153,
"step": 21740
},
{
"grad_norm": 0.18627475202083588,
"learning_rate": 9.252452207541004e-05,
"loss": 0.0151,
"step": 21750
},
{
"grad_norm": 0.18535223603248596,
"learning_rate": 9.251582265615409e-05,
"loss": 0.0155,
"step": 21760
},
{
"grad_norm": 0.17740526795387268,
"learning_rate": 9.250711858743169e-05,
"loss": 0.0175,
"step": 21770
},
{
"grad_norm": 0.26088157296180725,
"learning_rate": 9.24984098701947e-05,
"loss": 0.0155,
"step": 21780
},
{
"grad_norm": 0.21444660425186157,
"learning_rate": 9.248969650539552e-05,
"loss": 0.0142,
"step": 21790
},
{
"grad_norm": 0.23764176666736603,
"learning_rate": 9.2480978493987e-05,
"loss": 0.0146,
"step": 21800
},
{
"grad_norm": 0.24186286330223083,
"learning_rate": 9.247225583692256e-05,
"loss": 0.0163,
"step": 21810
},
{
"grad_norm": 0.26205840706825256,
"learning_rate": 9.246352853515607e-05,
"loss": 0.0134,
"step": 21820
},
{
"grad_norm": 0.22400116920471191,
"learning_rate": 9.245479658964194e-05,
"loss": 0.0151,
"step": 21830
},
{
"grad_norm": 0.20029689371585846,
"learning_rate": 9.244606000133507e-05,
"loss": 0.0155,
"step": 21840
},
{
"grad_norm": 0.2253129929304123,
"learning_rate": 9.24373187711909e-05,
"loss": 0.017,
"step": 21850
},
{
"grad_norm": 0.1444292813539505,
"learning_rate": 9.242857290016537e-05,
"loss": 0.0149,
"step": 21860
},
{
"grad_norm": 0.2714897394180298,
"learning_rate": 9.241982238921488e-05,
"loss": 0.0169,
"step": 21870
},
{
"grad_norm": 0.22298938035964966,
"learning_rate": 9.24110672392964e-05,
"loss": 0.0146,
"step": 21880
},
{
"grad_norm": 0.21249057352542877,
"learning_rate": 9.240230745136737e-05,
"loss": 0.0163,
"step": 21890
},
{
"grad_norm": 0.24658262729644775,
"learning_rate": 9.239354302638575e-05,
"loss": 0.0164,
"step": 21900
},
{
"grad_norm": 0.18486113846302032,
"learning_rate": 9.238477396531e-05,
"loss": 0.0148,
"step": 21910
},
{
"grad_norm": 0.17722158133983612,
"learning_rate": 9.23760002690991e-05,
"loss": 0.0147,
"step": 21920
},
{
"grad_norm": 0.2587215006351471,
"learning_rate": 9.236722193871252e-05,
"loss": 0.0155,
"step": 21930
},
{
"grad_norm": 0.16541887819766998,
"learning_rate": 9.235843897511023e-05,
"loss": 0.0153,
"step": 21940
},
{
"grad_norm": 0.15841276943683624,
"learning_rate": 9.234965137925276e-05,
"loss": 0.0129,
"step": 21950
},
{
"grad_norm": 0.23887085914611816,
"learning_rate": 9.234085915210108e-05,
"loss": 0.0146,
"step": 21960
},
{
"grad_norm": 0.181078240275383,
"learning_rate": 9.23320622946167e-05,
"loss": 0.0173,
"step": 21970
},
{
"grad_norm": 0.22938847541809082,
"learning_rate": 9.232326080776163e-05,
"loss": 0.0178,
"step": 21980
},
{
"grad_norm": 0.16398344933986664,
"learning_rate": 9.23144546924984e-05,
"loss": 0.0166,
"step": 21990
},
{
"grad_norm": 0.30050989985466003,
"learning_rate": 9.230564394979e-05,
"loss": 0.0135,
"step": 22000
},
{
"grad_norm": 0.1774117350578308,
"learning_rate": 9.22968285806e-05,
"loss": 0.0147,
"step": 22010
},
{
"grad_norm": 0.19958597421646118,
"learning_rate": 9.228800858589242e-05,
"loss": 0.0146,
"step": 22020
},
{
"grad_norm": 0.18918026983737946,
"learning_rate": 9.227918396663179e-05,
"loss": 0.0139,
"step": 22030
},
{
"grad_norm": 0.22280901670455933,
"learning_rate": 9.227035472378319e-05,
"loss": 0.0154,
"step": 22040
},
{
"grad_norm": 0.24022266268730164,
"learning_rate": 9.226152085831213e-05,
"loss": 0.013,
"step": 22050
},
{
"grad_norm": 0.22554033994674683,
"learning_rate": 9.22526823711847e-05,
"loss": 0.0131,
"step": 22060
},
{
"grad_norm": 0.18703500926494598,
"learning_rate": 9.224383926336745e-05,
"loss": 0.0154,
"step": 22070
},
{
"grad_norm": 0.22093725204467773,
"learning_rate": 9.223499153582744e-05,
"loss": 0.0143,
"step": 22080
},
{
"grad_norm": 0.21581970155239105,
"learning_rate": 9.222613918953226e-05,
"loss": 0.014,
"step": 22090
},
{
"grad_norm": 0.16765768826007843,
"learning_rate": 9.221728222544999e-05,
"loss": 0.0178,
"step": 22100
},
{
"grad_norm": 0.2585461139678955,
"learning_rate": 9.22084206445492e-05,
"loss": 0.014,
"step": 22110
},
{
"grad_norm": 0.21266622841358185,
"learning_rate": 9.2199554447799e-05,
"loss": 0.015,
"step": 22120
},
{
"grad_norm": 0.2095349133014679,
"learning_rate": 9.219068363616897e-05,
"loss": 0.018,
"step": 22130
},
{
"grad_norm": 0.1255059838294983,
"learning_rate": 9.218180821062919e-05,
"loss": 0.0152,
"step": 22140
},
{
"grad_norm": 0.20356354117393494,
"learning_rate": 9.21729281721503e-05,
"loss": 0.0142,
"step": 22150
},
{
"grad_norm": 0.19390271604061127,
"learning_rate": 9.216404352170339e-05,
"loss": 0.0139,
"step": 22160
},
{
"grad_norm": 0.29199182987213135,
"learning_rate": 9.215515426026007e-05,
"loss": 0.015,
"step": 22170
},
{
"grad_norm": 0.27190372347831726,
"learning_rate": 9.214626038879246e-05,
"loss": 0.016,
"step": 22180
},
{
"grad_norm": 0.21578331291675568,
"learning_rate": 9.21373619082732e-05,
"loss": 0.0151,
"step": 22190
},
{
"grad_norm": 0.2100040465593338,
"learning_rate": 9.212845881967535e-05,
"loss": 0.0161,
"step": 22200
},
{
"grad_norm": 0.18035203218460083,
"learning_rate": 9.211955112397262e-05,
"loss": 0.0151,
"step": 22210
},
{
"grad_norm": 0.21208736300468445,
"learning_rate": 9.211063882213909e-05,
"loss": 0.017,
"step": 22220
},
{
"grad_norm": 0.19938045740127563,
"learning_rate": 9.210172191514942e-05,
"loss": 0.0149,
"step": 22230
},
{
"grad_norm": 0.17603902518749237,
"learning_rate": 9.209280040397874e-05,
"loss": 0.0154,
"step": 22240
},
{
"grad_norm": 0.23635786771774292,
"learning_rate": 9.208387428960268e-05,
"loss": 0.0199,
"step": 22250
},
{
"grad_norm": 0.19247859716415405,
"learning_rate": 9.20749435729974e-05,
"loss": 0.0166,
"step": 22260
},
{
"grad_norm": 0.17111243307590485,
"learning_rate": 9.206600825513957e-05,
"loss": 0.0139,
"step": 22270
},
{
"grad_norm": 0.17398229241371155,
"learning_rate": 9.20570683370063e-05,
"loss": 0.0147,
"step": 22280
},
{
"grad_norm": 0.14633597433567047,
"learning_rate": 9.204812381957528e-05,
"loss": 0.0144,
"step": 22290
},
{
"grad_norm": 0.18436771631240845,
"learning_rate": 9.203917470382465e-05,
"loss": 0.0121,
"step": 22300
},
{
"grad_norm": 0.23613686859607697,
"learning_rate": 9.203022099073309e-05,
"loss": 0.0152,
"step": 22310
},
{
"grad_norm": 0.30081671476364136,
"learning_rate": 9.202126268127976e-05,
"loss": 0.0146,
"step": 22320
},
{
"grad_norm": 0.24067933857440948,
"learning_rate": 9.20122997764443e-05,
"loss": 0.0182,
"step": 22330
},
{
"grad_norm": 0.20103171467781067,
"learning_rate": 9.200333227720692e-05,
"loss": 0.016,
"step": 22340
},
{
"grad_norm": 0.19687743484973907,
"learning_rate": 9.199436018454826e-05,
"loss": 0.0149,
"step": 22350
},
{
"grad_norm": 0.25894713401794434,
"learning_rate": 9.198538349944952e-05,
"loss": 0.0147,
"step": 22360
},
{
"grad_norm": 0.1842220425605774,
"learning_rate": 9.197640222289234e-05,
"loss": 0.015,
"step": 22370
},
{
"grad_norm": 0.14746177196502686,
"learning_rate": 9.196741635585895e-05,
"loss": 0.0129,
"step": 22380
},
{
"grad_norm": 0.1876509040594101,
"learning_rate": 9.195842589933199e-05,
"loss": 0.0174,
"step": 22390
},
{
"grad_norm": 0.19653862714767456,
"learning_rate": 9.194943085429466e-05,
"loss": 0.0166,
"step": 22400
},
{
"grad_norm": 0.18987902998924255,
"learning_rate": 9.194043122173065e-05,
"loss": 0.014,
"step": 22410
},
{
"grad_norm": 0.16859357059001923,
"learning_rate": 9.193142700262413e-05,
"loss": 0.0173,
"step": 22420
},
{
"grad_norm": 0.2265775054693222,
"learning_rate": 9.192241819795979e-05,
"loss": 0.0159,
"step": 22430
},
{
"grad_norm": 0.2058074176311493,
"learning_rate": 9.191340480872284e-05,
"loss": 0.017,
"step": 22440
},
{
"grad_norm": 0.24698784947395325,
"learning_rate": 9.190438683589895e-05,
"loss": 0.0154,
"step": 22450
},
{
"grad_norm": 0.20847007632255554,
"learning_rate": 9.189536428047432e-05,
"loss": 0.0141,
"step": 22460
},
{
"grad_norm": 0.16210098564624786,
"learning_rate": 9.188633714343564e-05,
"loss": 0.0148,
"step": 22470
},
{
"grad_norm": 0.27115052938461304,
"learning_rate": 9.18773054257701e-05,
"loss": 0.0134,
"step": 22480
},
{
"grad_norm": 0.15684786438941956,
"learning_rate": 9.18682691284654e-05,
"loss": 0.0132,
"step": 22490
},
{
"grad_norm": 0.2694545090198517,
"learning_rate": 9.185922825250974e-05,
"loss": 0.0147,
"step": 22500
},
{
"grad_norm": 0.2178843766450882,
"learning_rate": 9.185018279889181e-05,
"loss": 0.0155,
"step": 22510
},
{
"grad_norm": 0.17947521805763245,
"learning_rate": 9.184113276860082e-05,
"loss": 0.014,
"step": 22520
},
{
"grad_norm": 0.16482184827327728,
"learning_rate": 9.183207816262645e-05,
"loss": 0.0153,
"step": 22530
},
{
"grad_norm": 0.2352295070886612,
"learning_rate": 9.182301898195891e-05,
"loss": 0.0163,
"step": 22540
},
{
"grad_norm": 0.2273463010787964,
"learning_rate": 9.181395522758889e-05,
"loss": 0.0145,
"step": 22550
},
{
"grad_norm": 0.18331757187843323,
"learning_rate": 9.180488690050759e-05,
"loss": 0.0158,
"step": 22560
},
{
"grad_norm": 0.30773892998695374,
"learning_rate": 9.179581400170671e-05,
"loss": 0.0143,
"step": 22570
},
{
"grad_norm": 0.23257498443126678,
"learning_rate": 9.178673653217845e-05,
"loss": 0.0124,
"step": 22580
},
{
"grad_norm": 0.19392426311969757,
"learning_rate": 9.177765449291551e-05,
"loss": 0.0141,
"step": 22590
},
{
"grad_norm": 0.220121830701828,
"learning_rate": 9.176856788491109e-05,
"loss": 0.0161,
"step": 22600
},
{
"grad_norm": 0.19694368541240692,
"learning_rate": 9.175947670915887e-05,
"loss": 0.015,
"step": 22610
},
{
"grad_norm": 0.1852843016386032,
"learning_rate": 9.175038096665309e-05,
"loss": 0.012,
"step": 22620
},
{
"grad_norm": 0.1881941854953766,
"learning_rate": 9.17412806583884e-05,
"loss": 0.0143,
"step": 22630
},
{
"grad_norm": 0.21659281849861145,
"learning_rate": 9.173217578536002e-05,
"loss": 0.0142,
"step": 22640
},
{
"grad_norm": 0.21505822241306305,
"learning_rate": 9.172306634856362e-05,
"loss": 0.0165,
"step": 22650
},
{
"grad_norm": 0.19481819868087769,
"learning_rate": 9.171395234899545e-05,
"loss": 0.0138,
"step": 22660
},
{
"grad_norm": 0.22649617493152618,
"learning_rate": 9.170483378765214e-05,
"loss": 0.0192,
"step": 22670
},
{
"grad_norm": 0.2129245400428772,
"learning_rate": 9.169571066553091e-05,
"loss": 0.0148,
"step": 22680
},
{
"grad_norm": 0.1935233324766159,
"learning_rate": 9.168658298362946e-05,
"loss": 0.0154,
"step": 22690
},
{
"grad_norm": 0.18985940515995026,
"learning_rate": 9.167745074294598e-05,
"loss": 0.0147,
"step": 22700
},
{
"grad_norm": 0.21253089606761932,
"learning_rate": 9.166831394447913e-05,
"loss": 0.0167,
"step": 22710
},
{
"grad_norm": 0.21559609472751617,
"learning_rate": 9.165917258922812e-05,
"loss": 0.0137,
"step": 22720
},
{
"grad_norm": 0.20335878431797028,
"learning_rate": 9.165002667819262e-05,
"loss": 0.0142,
"step": 22730
},
{
"grad_norm": 0.1861836463212967,
"learning_rate": 9.164087621237282e-05,
"loss": 0.0149,
"step": 22740
},
{
"grad_norm": 0.20750901103019714,
"learning_rate": 9.163172119276942e-05,
"loss": 0.0145,
"step": 22750
},
{
"grad_norm": 0.17172884941101074,
"learning_rate": 9.162256162038358e-05,
"loss": 0.0129,
"step": 22760
},
{
"grad_norm": 0.17812736332416534,
"learning_rate": 9.161339749621698e-05,
"loss": 0.016,
"step": 22770
},
{
"grad_norm": 0.1545380800962448,
"learning_rate": 9.160422882127177e-05,
"loss": 0.0142,
"step": 22780
},
{
"grad_norm": 0.20724381506443024,
"learning_rate": 9.159505559655069e-05,
"loss": 0.0145,
"step": 22790
},
{
"grad_norm": 0.21726366877555847,
"learning_rate": 9.158587782305684e-05,
"loss": 0.0148,
"step": 22800
},
{
"grad_norm": 0.1817689836025238,
"learning_rate": 9.157669550179391e-05,
"loss": 0.0172,
"step": 22810
},
{
"grad_norm": 0.20288459956645966,
"learning_rate": 9.156750863376609e-05,
"loss": 0.012,
"step": 22820
},
{
"grad_norm": 0.27603045105934143,
"learning_rate": 9.155831721997801e-05,
"loss": 0.016,
"step": 22830
},
{
"grad_norm": 0.1986236870288849,
"learning_rate": 9.154912126143484e-05,
"loss": 0.0142,
"step": 22840
},
{
"grad_norm": 0.26990512013435364,
"learning_rate": 9.153992075914224e-05,
"loss": 0.0169,
"step": 22850
},
{
"grad_norm": 0.19406422972679138,
"learning_rate": 9.153071571410635e-05,
"loss": 0.0154,
"step": 22860
},
{
"grad_norm": 0.20222462713718414,
"learning_rate": 9.152150612733384e-05,
"loss": 0.014,
"step": 22870
},
{
"grad_norm": 0.1865585893392563,
"learning_rate": 9.151229199983184e-05,
"loss": 0.0145,
"step": 22880
},
{
"grad_norm": 0.1840989887714386,
"learning_rate": 9.150307333260802e-05,
"loss": 0.0141,
"step": 22890
},
{
"grad_norm": 0.21253526210784912,
"learning_rate": 9.149385012667048e-05,
"loss": 0.0168,
"step": 22900
},
{
"grad_norm": 0.1894034892320633,
"learning_rate": 9.148462238302788e-05,
"loss": 0.0157,
"step": 22910
},
{
"grad_norm": 0.17287315428256989,
"learning_rate": 9.147539010268936e-05,
"loss": 0.0169,
"step": 22920
},
{
"grad_norm": 0.22205814719200134,
"learning_rate": 9.14661532866645e-05,
"loss": 0.0151,
"step": 22930
},
{
"grad_norm": 0.1727629154920578,
"learning_rate": 9.145691193596348e-05,
"loss": 0.0149,
"step": 22940
},
{
"grad_norm": 0.21813349425792694,
"learning_rate": 9.144766605159691e-05,
"loss": 0.0156,
"step": 22950
},
{
"grad_norm": 0.22872646152973175,
"learning_rate": 9.14384156345759e-05,
"loss": 0.016,
"step": 22960
},
{
"grad_norm": 0.18975701928138733,
"learning_rate": 9.142916068591204e-05,
"loss": 0.0163,
"step": 22970
},
{
"grad_norm": 0.2046053558588028,
"learning_rate": 9.141990120661746e-05,
"loss": 0.0141,
"step": 22980
},
{
"grad_norm": 0.2238152176141739,
"learning_rate": 9.141063719770475e-05,
"loss": 0.0128,
"step": 22990
},
{
"grad_norm": 0.20782746374607086,
"learning_rate": 9.140136866018704e-05,
"loss": 0.0167,
"step": 23000
},
{
"grad_norm": 0.19436068832874298,
"learning_rate": 9.139209559507788e-05,
"loss": 0.015,
"step": 23010
},
{
"grad_norm": 0.19544769823551178,
"learning_rate": 9.13828180033914e-05,
"loss": 0.0139,
"step": 23020
},
{
"grad_norm": 0.2249060720205307,
"learning_rate": 9.137353588614212e-05,
"loss": 0.0127,
"step": 23030
},
{
"grad_norm": 0.15047655999660492,
"learning_rate": 9.136424924434519e-05,
"loss": 0.0154,
"step": 23040
},
{
"grad_norm": 0.2030155062675476,
"learning_rate": 9.135495807901615e-05,
"loss": 0.0134,
"step": 23050
},
{
"grad_norm": 0.2389880269765854,
"learning_rate": 9.134566239117108e-05,
"loss": 0.0157,
"step": 23060
},
{
"grad_norm": 0.21059682965278625,
"learning_rate": 9.13363621818265e-05,
"loss": 0.0154,
"step": 23070
},
{
"grad_norm": 0.2781943678855896,
"learning_rate": 9.132705745199953e-05,
"loss": 0.0176,
"step": 23080
},
{
"grad_norm": 0.25491416454315186,
"learning_rate": 9.131774820270768e-05,
"loss": 0.0169,
"step": 23090
},
{
"grad_norm": 0.23441153764724731,
"learning_rate": 9.130843443496901e-05,
"loss": 0.0174,
"step": 23100
},
{
"grad_norm": 0.1828630417585373,
"learning_rate": 9.129911614980206e-05,
"loss": 0.0139,
"step": 23110
},
{
"grad_norm": 0.14871282875537872,
"learning_rate": 9.128979334822584e-05,
"loss": 0.0133,
"step": 23120
},
{
"grad_norm": 0.22658561170101166,
"learning_rate": 9.128046603125992e-05,
"loss": 0.0142,
"step": 23130
},
{
"grad_norm": 0.22888781130313873,
"learning_rate": 9.12711341999243e-05,
"loss": 0.0132,
"step": 23140
},
{
"grad_norm": 0.20940542221069336,
"learning_rate": 9.12617978552395e-05,
"loss": 0.0149,
"step": 23150
},
{
"grad_norm": 0.21227090060710907,
"learning_rate": 9.12524569982265e-05,
"loss": 0.0142,
"step": 23160
},
{
"grad_norm": 0.24097922444343567,
"learning_rate": 9.124311162990684e-05,
"loss": 0.0139,
"step": 23170
},
{
"grad_norm": 0.23145781457424164,
"learning_rate": 9.12337617513025e-05,
"loss": 0.0149,
"step": 23180
},
{
"grad_norm": 0.20231445133686066,
"learning_rate": 9.122440736343596e-05,
"loss": 0.0181,
"step": 23190
},
{
"grad_norm": 0.17131511867046356,
"learning_rate": 9.12150484673302e-05,
"loss": 0.0194,
"step": 23200
},
{
"grad_norm": 0.18629176914691925,
"learning_rate": 9.120568506400873e-05,
"loss": 0.0138,
"step": 23210
},
{
"grad_norm": 0.18614289164543152,
"learning_rate": 9.119631715449548e-05,
"loss": 0.0124,
"step": 23220
},
{
"grad_norm": 0.17157837748527527,
"learning_rate": 9.118694473981493e-05,
"loss": 0.0147,
"step": 23230
},
{
"grad_norm": 0.13805267214775085,
"learning_rate": 9.117756782099203e-05,
"loss": 0.0139,
"step": 23240
},
{
"grad_norm": 0.20274634659290314,
"learning_rate": 9.11681863990522e-05,
"loss": 0.0114,
"step": 23250
},
{
"grad_norm": 0.19627954065799713,
"learning_rate": 9.115880047502142e-05,
"loss": 0.0127,
"step": 23260
},
{
"grad_norm": 0.1853030025959015,
"learning_rate": 9.114941004992609e-05,
"loss": 0.0138,
"step": 23270
},
{
"grad_norm": 0.18201330304145813,
"learning_rate": 9.114001512479317e-05,
"loss": 0.0154,
"step": 23280
},
{
"grad_norm": 0.2335299700498581,
"learning_rate": 9.113061570065003e-05,
"loss": 0.0136,
"step": 23290
},
{
"grad_norm": 0.163480743765831,
"learning_rate": 9.112121177852459e-05,
"loss": 0.0139,
"step": 23300
},
{
"grad_norm": 0.22948700189590454,
"learning_rate": 9.111180335944527e-05,
"loss": 0.0139,
"step": 23310
},
{
"grad_norm": 0.1871514916419983,
"learning_rate": 9.110239044444093e-05,
"loss": 0.0152,
"step": 23320
},
{
"grad_norm": 0.1818545013666153,
"learning_rate": 9.109297303454099e-05,
"loss": 0.0131,
"step": 23330
},
{
"grad_norm": 0.16701292991638184,
"learning_rate": 9.108355113077526e-05,
"loss": 0.0123,
"step": 23340
},
{
"grad_norm": 0.28000277280807495,
"learning_rate": 9.107412473417419e-05,
"loss": 0.0144,
"step": 23350
},
{
"grad_norm": 0.2582252621650696,
"learning_rate": 9.106469384576858e-05,
"loss": 0.0158,
"step": 23360
},
{
"grad_norm": 0.17312873899936676,
"learning_rate": 9.105525846658978e-05,
"loss": 0.0158,
"step": 23370
},
{
"grad_norm": 0.2704509198665619,
"learning_rate": 9.104581859766965e-05,
"loss": 0.0177,
"step": 23380
},
{
"grad_norm": 0.23072083294391632,
"learning_rate": 9.10363742400405e-05,
"loss": 0.0152,
"step": 23390
},
{
"grad_norm": 0.16527651250362396,
"learning_rate": 9.102692539473518e-05,
"loss": 0.0144,
"step": 23400
},
{
"grad_norm": 0.20676404237747192,
"learning_rate": 9.101747206278697e-05,
"loss": 0.0159,
"step": 23410
},
{
"grad_norm": 0.2146616131067276,
"learning_rate": 9.100801424522968e-05,
"loss": 0.0178,
"step": 23420
},
{
"grad_norm": 0.2614075839519501,
"learning_rate": 9.099855194309762e-05,
"loss": 0.0173,
"step": 23430
},
{
"grad_norm": 0.17789052426815033,
"learning_rate": 9.098908515742554e-05,
"loss": 0.0135,
"step": 23440
},
{
"grad_norm": 0.2318316549062729,
"learning_rate": 9.097961388924873e-05,
"loss": 0.0162,
"step": 23450
},
{
"grad_norm": 0.20885102450847626,
"learning_rate": 9.097013813960298e-05,
"loss": 0.0165,
"step": 23460
},
{
"grad_norm": 0.2531566917896271,
"learning_rate": 9.09606579095245e-05,
"loss": 0.0157,
"step": 23470
},
{
"grad_norm": 0.17769184708595276,
"learning_rate": 9.095117320005008e-05,
"loss": 0.0137,
"step": 23480
},
{
"grad_norm": 0.19554953277111053,
"learning_rate": 9.094168401221691e-05,
"loss": 0.0119,
"step": 23490
},
{
"grad_norm": 0.22157321870326996,
"learning_rate": 9.093219034706273e-05,
"loss": 0.015,
"step": 23500
},
{
"grad_norm": 0.18708685040473938,
"learning_rate": 9.092269220562577e-05,
"loss": 0.0163,
"step": 23510
},
{
"grad_norm": 0.2525607943534851,
"learning_rate": 9.09131895889447e-05,
"loss": 0.0153,
"step": 23520
},
{
"grad_norm": 0.1881437450647354,
"learning_rate": 9.090368249805873e-05,
"loss": 0.0164,
"step": 23530
},
{
"grad_norm": 0.2073652744293213,
"learning_rate": 9.089417093400754e-05,
"loss": 0.0157,
"step": 23540
},
{
"grad_norm": 0.18113574385643005,
"learning_rate": 9.088465489783131e-05,
"loss": 0.014,
"step": 23550
},
{
"grad_norm": 0.2287263572216034,
"learning_rate": 9.087513439057068e-05,
"loss": 0.0116,
"step": 23560
},
{
"grad_norm": 0.23755474388599396,
"learning_rate": 9.08656094132668e-05,
"loss": 0.0138,
"step": 23570
},
{
"grad_norm": 0.24319809675216675,
"learning_rate": 9.085607996696134e-05,
"loss": 0.019,
"step": 23580
},
{
"grad_norm": 0.2438126504421234,
"learning_rate": 9.084654605269639e-05,
"loss": 0.0143,
"step": 23590
},
{
"grad_norm": 0.23479968309402466,
"learning_rate": 9.083700767151457e-05,
"loss": 0.014,
"step": 23600
},
{
"grad_norm": 0.19559985399246216,
"learning_rate": 9.082746482445898e-05,
"loss": 0.0138,
"step": 23610
},
{
"grad_norm": 0.1752758026123047,
"learning_rate": 9.081791751257325e-05,
"loss": 0.0132,
"step": 23620
},
{
"grad_norm": 0.2757525146007538,
"learning_rate": 9.080836573690142e-05,
"loss": 0.0144,
"step": 23630
},
{
"grad_norm": 0.23443222045898438,
"learning_rate": 9.079880949848805e-05,
"loss": 0.0145,
"step": 23640
},
{
"grad_norm": 0.21670211851596832,
"learning_rate": 9.078924879837822e-05,
"loss": 0.0123,
"step": 23650
},
{
"grad_norm": 0.21093955636024475,
"learning_rate": 9.077968363761747e-05,
"loss": 0.0133,
"step": 23660
},
{
"grad_norm": 0.22188355028629303,
"learning_rate": 9.077011401725182e-05,
"loss": 0.0149,
"step": 23670
},
{
"grad_norm": 0.1894949972629547,
"learning_rate": 9.07605399383278e-05,
"loss": 0.0124,
"step": 23680
},
{
"grad_norm": 0.22568151354789734,
"learning_rate": 9.075096140189243e-05,
"loss": 0.0126,
"step": 23690
},
{
"grad_norm": 0.20633463561534882,
"learning_rate": 9.074137840899318e-05,
"loss": 0.0143,
"step": 23700
},
{
"grad_norm": 0.20695094764232635,
"learning_rate": 9.073179096067804e-05,
"loss": 0.0128,
"step": 23710
},
{
"grad_norm": 0.177053764462471,
"learning_rate": 9.072219905799549e-05,
"loss": 0.0155,
"step": 23720
},
{
"grad_norm": 0.20552749931812286,
"learning_rate": 9.071260270199447e-05,
"loss": 0.0135,
"step": 23730
},
{
"grad_norm": 0.2141244113445282,
"learning_rate": 9.070300189372441e-05,
"loss": 0.0156,
"step": 23740
},
{
"grad_norm": 0.20599396526813507,
"learning_rate": 9.069339663423528e-05,
"loss": 0.0136,
"step": 23750
},
{
"grad_norm": 0.17279891669750214,
"learning_rate": 9.068378692457747e-05,
"loss": 0.0154,
"step": 23760
},
{
"grad_norm": 0.20797847211360931,
"learning_rate": 9.067417276580189e-05,
"loss": 0.014,
"step": 23770
},
{
"grad_norm": 0.17896415293216705,
"learning_rate": 9.066455415895993e-05,
"loss": 0.0144,
"step": 23780
},
{
"grad_norm": 0.18460604548454285,
"learning_rate": 9.065493110510346e-05,
"loss": 0.0137,
"step": 23790
},
{
"grad_norm": 0.17211906611919403,
"learning_rate": 9.064530360528484e-05,
"loss": 0.0142,
"step": 23800
},
{
"grad_norm": 0.27202412486076355,
"learning_rate": 9.063567166055695e-05,
"loss": 0.0139,
"step": 23810
},
{
"grad_norm": 0.2137269675731659,
"learning_rate": 9.062603527197308e-05,
"loss": 0.0172,
"step": 23820
},
{
"grad_norm": 0.21203388273715973,
"learning_rate": 9.06163944405871e-05,
"loss": 0.0124,
"step": 23830
},
{
"grad_norm": 0.20699001848697662,
"learning_rate": 9.060674916745327e-05,
"loss": 0.0143,
"step": 23840
},
{
"grad_norm": 0.18704131245613098,
"learning_rate": 9.05970994536264e-05,
"loss": 0.0133,
"step": 23850
},
{
"grad_norm": 0.18146492540836334,
"learning_rate": 9.05874453001618e-05,
"loss": 0.0129,
"step": 23860
},
{
"grad_norm": 0.17791873216629028,
"learning_rate": 9.057778670811517e-05,
"loss": 0.0135,
"step": 23870
},
{
"grad_norm": 0.16056740283966064,
"learning_rate": 9.056812367854281e-05,
"loss": 0.013,
"step": 23880
},
{
"grad_norm": 0.22617898881435394,
"learning_rate": 9.055845621250143e-05,
"loss": 0.0149,
"step": 23890
},
{
"grad_norm": 0.1643180102109909,
"learning_rate": 9.054878431104825e-05,
"loss": 0.0147,
"step": 23900
},
{
"grad_norm": 0.2128770351409912,
"learning_rate": 9.0539107975241e-05,
"loss": 0.0133,
"step": 23910
},
{
"grad_norm": 0.20491258800029755,
"learning_rate": 9.052942720613784e-05,
"loss": 0.0134,
"step": 23920
},
{
"grad_norm": 0.241663858294487,
"learning_rate": 9.051974200479745e-05,
"loss": 0.0122,
"step": 23930
},
{
"grad_norm": 0.23031777143478394,
"learning_rate": 9.051005237227901e-05,
"loss": 0.014,
"step": 23940
},
{
"grad_norm": 0.22506073117256165,
"learning_rate": 9.050035830964215e-05,
"loss": 0.014,
"step": 23950
},
{
"grad_norm": 0.19109565019607544,
"learning_rate": 9.049065981794698e-05,
"loss": 0.0157,
"step": 23960
},
{
"grad_norm": 0.15611767768859863,
"learning_rate": 9.048095689825414e-05,
"loss": 0.0114,
"step": 23970
},
{
"grad_norm": 0.1800951212644577,
"learning_rate": 9.047124955162472e-05,
"loss": 0.0122,
"step": 23980
},
{
"grad_norm": 0.1684034764766693,
"learning_rate": 9.046153777912028e-05,
"loss": 0.0136,
"step": 23990
},
{
"grad_norm": 0.20641621947288513,
"learning_rate": 9.045182158180292e-05,
"loss": 0.0139,
"step": 24000
},
{
"grad_norm": 0.23477306962013245,
"learning_rate": 9.044210096073516e-05,
"loss": 0.0131,
"step": 24010
},
{
"grad_norm": 0.21521511673927307,
"learning_rate": 9.043237591698004e-05,
"loss": 0.0154,
"step": 24020
},
{
"grad_norm": 0.19954660534858704,
"learning_rate": 9.04226464516011e-05,
"loss": 0.0134,
"step": 24030
},
{
"grad_norm": 0.27395713329315186,
"learning_rate": 9.041291256566229e-05,
"loss": 0.0164,
"step": 24040
},
{
"grad_norm": 0.23552460968494415,
"learning_rate": 9.040317426022814e-05,
"loss": 0.0159,
"step": 24050
},
{
"grad_norm": 0.17470824718475342,
"learning_rate": 9.03934315363636e-05,
"loss": 0.017,
"step": 24060
},
{
"grad_norm": 0.20106098055839539,
"learning_rate": 9.038368439513409e-05,
"loss": 0.0138,
"step": 24070
},
{
"grad_norm": 0.1621820032596588,
"learning_rate": 9.03739328376056e-05,
"loss": 0.0128,
"step": 24080
},
{
"grad_norm": 0.19426368176937103,
"learning_rate": 9.036417686484451e-05,
"loss": 0.0162,
"step": 24090
},
{
"grad_norm": 0.1972815990447998,
"learning_rate": 9.035441647791773e-05,
"loss": 0.0143,
"step": 24100
},
{
"grad_norm": 0.13212399184703827,
"learning_rate": 9.034465167789263e-05,
"loss": 0.0133,
"step": 24110
},
{
"grad_norm": 0.24731220304965973,
"learning_rate": 9.033488246583706e-05,
"loss": 0.0144,
"step": 24120
},
{
"grad_norm": 0.20434601604938507,
"learning_rate": 9.032510884281941e-05,
"loss": 0.0127,
"step": 24130
},
{
"grad_norm": 0.22800716757774353,
"learning_rate": 9.031533080990848e-05,
"loss": 0.0135,
"step": 24140
},
{
"grad_norm": 0.14135698974132538,
"learning_rate": 9.030554836817358e-05,
"loss": 0.0133,
"step": 24150
},
{
"grad_norm": 0.18815097212791443,
"learning_rate": 9.029576151868451e-05,
"loss": 0.014,
"step": 24160
},
{
"grad_norm": 0.1988460123538971,
"learning_rate": 9.028597026251155e-05,
"loss": 0.0121,
"step": 24170
},
{
"grad_norm": 0.22878068685531616,
"learning_rate": 9.027617460072547e-05,
"loss": 0.017,
"step": 24180
},
{
"grad_norm": 0.17404484748840332,
"learning_rate": 9.026637453439745e-05,
"loss": 0.0182,
"step": 24190
},
{
"grad_norm": 0.18336114287376404,
"learning_rate": 9.025657006459927e-05,
"loss": 0.0131,
"step": 24200
},
{
"grad_norm": 0.2783775329589844,
"learning_rate": 9.024676119240311e-05,
"loss": 0.0146,
"step": 24210
},
{
"grad_norm": 0.20794598758220673,
"learning_rate": 9.023694791888166e-05,
"loss": 0.017,
"step": 24220
},
{
"grad_norm": 0.17894412577152252,
"learning_rate": 9.022713024510808e-05,
"loss": 0.0145,
"step": 24230
},
{
"grad_norm": 0.2050257921218872,
"learning_rate": 9.021730817215601e-05,
"loss": 0.0136,
"step": 24240
},
{
"grad_norm": 0.20929528772830963,
"learning_rate": 9.02074817010996e-05,
"loss": 0.0151,
"step": 24250
},
{
"grad_norm": 0.2159581482410431,
"learning_rate": 9.019765083301342e-05,
"loss": 0.0139,
"step": 24260
},
{
"grad_norm": 0.18301215767860413,
"learning_rate": 9.01878155689726e-05,
"loss": 0.0122,
"step": 24270
},
{
"grad_norm": 0.2517687678337097,
"learning_rate": 9.017797591005268e-05,
"loss": 0.0137,
"step": 24280
},
{
"grad_norm": 0.23124970495700836,
"learning_rate": 9.016813185732972e-05,
"loss": 0.0178,
"step": 24290
},
{
"grad_norm": 0.14354126155376434,
"learning_rate": 9.015828341188027e-05,
"loss": 0.0143,
"step": 24300
},
{
"grad_norm": 0.18401013314723969,
"learning_rate": 9.01484305747813e-05,
"loss": 0.0153,
"step": 24310
},
{
"grad_norm": 0.20167894661426544,
"learning_rate": 9.013857334711033e-05,
"loss": 0.0132,
"step": 24320
},
{
"grad_norm": 0.15036405622959137,
"learning_rate": 9.012871172994534e-05,
"loss": 0.014,
"step": 24330
},
{
"grad_norm": 0.18220263719558716,
"learning_rate": 9.011884572436476e-05,
"loss": 0.0147,
"step": 24340
},
{
"grad_norm": 0.17381809651851654,
"learning_rate": 9.010897533144754e-05,
"loss": 0.013,
"step": 24350
},
{
"grad_norm": 0.1628270298242569,
"learning_rate": 9.009910055227306e-05,
"loss": 0.0151,
"step": 24360
},
{
"grad_norm": 0.20888406038284302,
"learning_rate": 9.008922138792124e-05,
"loss": 0.0154,
"step": 24370
},
{
"grad_norm": 0.1570347249507904,
"learning_rate": 9.007933783947244e-05,
"loss": 0.0137,
"step": 24380
},
{
"grad_norm": 0.1836668998003006,
"learning_rate": 9.006944990800752e-05,
"loss": 0.0151,
"step": 24390
},
{
"grad_norm": 0.20798632502555847,
"learning_rate": 9.005955759460779e-05,
"loss": 0.0139,
"step": 24400
},
{
"grad_norm": 0.17567555606365204,
"learning_rate": 9.004966090035508e-05,
"loss": 0.015,
"step": 24410
},
{
"grad_norm": 0.2556035816669464,
"learning_rate": 9.003975982633166e-05,
"loss": 0.0137,
"step": 24420
},
{
"grad_norm": 0.21839715540409088,
"learning_rate": 9.00298543736203e-05,
"loss": 0.0156,
"step": 24430
},
{
"grad_norm": 0.1876654326915741,
"learning_rate": 9.001994454330427e-05,
"loss": 0.0172,
"step": 24440
},
{
"grad_norm": 0.20417526364326477,
"learning_rate": 9.001003033646727e-05,
"loss": 0.0154,
"step": 24450
},
{
"grad_norm": 0.16093535721302032,
"learning_rate": 9.00001117541935e-05,
"loss": 0.0141,
"step": 24460
},
{
"grad_norm": 0.20546258985996246,
"learning_rate": 8.999018879756764e-05,
"loss": 0.0131,
"step": 24470
},
{
"grad_norm": 0.13915178179740906,
"learning_rate": 8.998026146767487e-05,
"loss": 0.0135,
"step": 24480
},
{
"grad_norm": 0.2658989429473877,
"learning_rate": 8.99703297656008e-05,
"loss": 0.0164,
"step": 24490
},
{
"grad_norm": 0.2229062020778656,
"learning_rate": 8.996039369243156e-05,
"loss": 0.0179,
"step": 24500
},
{
"grad_norm": 0.22348475456237793,
"learning_rate": 8.995045324925378e-05,
"loss": 0.0149,
"step": 24510
},
{
"grad_norm": 0.16096004843711853,
"learning_rate": 8.994050843715448e-05,
"loss": 0.0141,
"step": 24520
},
{
"grad_norm": 0.1520673632621765,
"learning_rate": 8.993055925722121e-05,
"loss": 0.0155,
"step": 24530
},
{
"grad_norm": 0.19288542866706848,
"learning_rate": 8.992060571054202e-05,
"loss": 0.0176,
"step": 24540
},
{
"grad_norm": 0.24025534093379974,
"learning_rate": 8.991064779820542e-05,
"loss": 0.0146,
"step": 24550
},
{
"grad_norm": 0.20292890071868896,
"learning_rate": 8.990068552130036e-05,
"loss": 0.0154,
"step": 24560
},
{
"grad_norm": 0.22602087259292603,
"learning_rate": 8.989071888091634e-05,
"loss": 0.0174,
"step": 24570
},
{
"grad_norm": 0.22065483033657074,
"learning_rate": 8.988074787814329e-05,
"loss": 0.013,
"step": 24580
},
{
"grad_norm": 0.22227178514003754,
"learning_rate": 8.987077251407158e-05,
"loss": 0.012,
"step": 24590
},
{
"grad_norm": 0.2217588573694229,
"learning_rate": 8.986079278979216e-05,
"loss": 0.0164,
"step": 24600
},
{
"grad_norm": 0.19181472063064575,
"learning_rate": 8.985080870639635e-05,
"loss": 0.0145,
"step": 24610
},
{
"grad_norm": 0.22570356726646423,
"learning_rate": 8.984082026497603e-05,
"loss": 0.015,
"step": 24620
},
{
"grad_norm": 0.17008230090141296,
"learning_rate": 8.98308274666235e-05,
"loss": 0.0126,
"step": 24630
},
{
"grad_norm": 0.20422600209712982,
"learning_rate": 8.982083031243155e-05,
"loss": 0.0127,
"step": 24640
},
{
"grad_norm": 0.1393459290266037,
"learning_rate": 8.98108288034935e-05,
"loss": 0.0128,
"step": 24650
},
{
"grad_norm": 0.24756769835948944,
"learning_rate": 8.980082294090305e-05,
"loss": 0.0148,
"step": 24660
},
{
"grad_norm": 0.170320063829422,
"learning_rate": 8.979081272575443e-05,
"loss": 0.0151,
"step": 24670
},
{
"grad_norm": 0.20775482058525085,
"learning_rate": 8.978079815914236e-05,
"loss": 0.0149,
"step": 24680
},
{
"grad_norm": 0.20452362298965454,
"learning_rate": 8.977077924216202e-05,
"loss": 0.0161,
"step": 24690
},
{
"grad_norm": 0.1973673403263092,
"learning_rate": 8.976075597590905e-05,
"loss": 0.0137,
"step": 24700
},
{
"grad_norm": 0.23274099826812744,
"learning_rate": 8.975072836147958e-05,
"loss": 0.0132,
"step": 24710
},
{
"grad_norm": 0.1778196394443512,
"learning_rate": 8.974069639997025e-05,
"loss": 0.0172,
"step": 24720
},
{
"grad_norm": 0.22104007005691528,
"learning_rate": 8.973066009247808e-05,
"loss": 0.0141,
"step": 24730
},
{
"grad_norm": 0.15248070657253265,
"learning_rate": 8.972061944010066e-05,
"loss": 0.0141,
"step": 24740
},
{
"grad_norm": 0.2708389163017273,
"learning_rate": 8.971057444393603e-05,
"loss": 0.0126,
"step": 24750
},
{
"grad_norm": 0.18148130178451538,
"learning_rate": 8.970052510508268e-05,
"loss": 0.0179,
"step": 24760
},
{
"grad_norm": 0.16653865575790405,
"learning_rate": 8.969047142463959e-05,
"loss": 0.0117,
"step": 24770
},
{
"grad_norm": 0.2350323647260666,
"learning_rate": 8.968041340370621e-05,
"loss": 0.0134,
"step": 24780
},
{
"grad_norm": 0.18603160977363586,
"learning_rate": 8.96703510433825e-05,
"loss": 0.016,
"step": 24790
},
{
"grad_norm": 0.2362491935491562,
"learning_rate": 8.966028434476883e-05,
"loss": 0.0146,
"step": 24800
},
{
"grad_norm": 0.2748473286628723,
"learning_rate": 8.96502133089661e-05,
"loss": 0.0163,
"step": 24810
},
{
"grad_norm": 0.216475710272789,
"learning_rate": 8.964013793707564e-05,
"loss": 0.0136,
"step": 24820
},
{
"grad_norm": 0.223602756857872,
"learning_rate": 8.963005823019932e-05,
"loss": 0.0135,
"step": 24830
},
{
"grad_norm": 0.21306408941745758,
"learning_rate": 8.961997418943939e-05,
"loss": 0.0124,
"step": 24840
},
{
"grad_norm": 0.159842848777771,
"learning_rate": 8.960988581589865e-05,
"loss": 0.0134,
"step": 24850
},
{
"grad_norm": 0.2029435783624649,
"learning_rate": 8.959979311068037e-05,
"loss": 0.0114,
"step": 24860
},
{
"grad_norm": 0.23242947459220886,
"learning_rate": 8.958969607488823e-05,
"loss": 0.0114,
"step": 24870
},
{
"grad_norm": 0.22794553637504578,
"learning_rate": 8.957959470962647e-05,
"loss": 0.0135,
"step": 24880
},
{
"grad_norm": 0.256059855222702,
"learning_rate": 8.956948901599971e-05,
"loss": 0.0117,
"step": 24890
},
{
"grad_norm": 0.20883019268512726,
"learning_rate": 8.955937899511315e-05,
"loss": 0.0143,
"step": 24900
},
{
"grad_norm": 0.15798425674438477,
"learning_rate": 8.954926464807238e-05,
"loss": 0.0135,
"step": 24910
},
{
"grad_norm": 0.19378982484340668,
"learning_rate": 8.953914597598347e-05,
"loss": 0.0165,
"step": 24920
},
{
"grad_norm": 0.22431515157222748,
"learning_rate": 8.952902297995303e-05,
"loss": 0.0145,
"step": 24930
},
{
"grad_norm": 0.20018118619918823,
"learning_rate": 8.951889566108804e-05,
"loss": 0.0126,
"step": 24940
},
{
"grad_norm": 0.1766684353351593,
"learning_rate": 8.950876402049606e-05,
"loss": 0.0136,
"step": 24950
},
{
"grad_norm": 0.17362891137599945,
"learning_rate": 8.949862805928504e-05,
"loss": 0.0143,
"step": 24960
},
{
"grad_norm": 0.2080824375152588,
"learning_rate": 8.948848777856343e-05,
"loss": 0.0123,
"step": 24970
},
{
"grad_norm": 0.25880157947540283,
"learning_rate": 8.947834317944017e-05,
"loss": 0.0168,
"step": 24980
},
{
"grad_norm": 0.14517757296562195,
"learning_rate": 8.946819426302466e-05,
"loss": 0.0147,
"step": 24990
},
{
"grad_norm": 0.23001737892627716,
"learning_rate": 8.945804103042676e-05,
"loss": 0.0145,
"step": 25000
},
{
"grad_norm": 0.16731934249401093,
"learning_rate": 8.944788348275681e-05,
"loss": 0.0138,
"step": 25010
},
{
"grad_norm": 0.2097262442111969,
"learning_rate": 8.943772162112565e-05,
"loss": 0.0133,
"step": 25020
},
{
"grad_norm": 0.21776002645492554,
"learning_rate": 8.942755544664454e-05,
"loss": 0.0144,
"step": 25030
},
{
"grad_norm": 0.21143537759780884,
"learning_rate": 8.941738496042525e-05,
"loss": 0.012,
"step": 25040
},
{
"grad_norm": 0.20147302746772766,
"learning_rate": 8.940721016357999e-05,
"loss": 0.014,
"step": 25050
},
{
"grad_norm": 0.17477922141551971,
"learning_rate": 8.939703105722148e-05,
"loss": 0.0149,
"step": 25060
},
{
"grad_norm": 0.23321568965911865,
"learning_rate": 8.93868476424629e-05,
"loss": 0.0179,
"step": 25070
},
{
"grad_norm": 0.19054847955703735,
"learning_rate": 8.937665992041786e-05,
"loss": 0.0132,
"step": 25080
},
{
"grad_norm": 0.19033202528953552,
"learning_rate": 8.93664678922005e-05,
"loss": 0.0138,
"step": 25090
},
{
"grad_norm": 0.21861092746257782,
"learning_rate": 8.93562715589254e-05,
"loss": 0.0152,
"step": 25100
},
{
"grad_norm": 0.1533270925283432,
"learning_rate": 8.934607092170762e-05,
"loss": 0.014,
"step": 25110
},
{
"grad_norm": 0.1720176637172699,
"learning_rate": 8.933586598166266e-05,
"loss": 0.0155,
"step": 25120
},
{
"grad_norm": 0.17896032333374023,
"learning_rate": 8.932565673990655e-05,
"loss": 0.0143,
"step": 25130
},
{
"grad_norm": 0.17787151038646698,
"learning_rate": 8.931544319755574e-05,
"loss": 0.0153,
"step": 25140
},
{
"grad_norm": 0.20071110129356384,
"learning_rate": 8.930522535572718e-05,
"loss": 0.0133,
"step": 25150
},
{
"grad_norm": 0.18091028928756714,
"learning_rate": 8.929500321553826e-05,
"loss": 0.0122,
"step": 25160
},
{
"grad_norm": 0.19485972821712494,
"learning_rate": 8.928477677810686e-05,
"loss": 0.0127,
"step": 25170
},
{
"grad_norm": 0.2231544554233551,
"learning_rate": 8.927454604455137e-05,
"loss": 0.0139,
"step": 25180
},
{
"grad_norm": 0.23672042787075043,
"learning_rate": 8.926431101599053e-05,
"loss": 0.014,
"step": 25190
},
{
"grad_norm": 0.18106228113174438,
"learning_rate": 8.925407169354369e-05,
"loss": 0.0116,
"step": 25200
},
{
"grad_norm": 0.16468533873558044,
"learning_rate": 8.92438280783306e-05,
"loss": 0.0113,
"step": 25210
},
{
"grad_norm": 0.2718585729598999,
"learning_rate": 8.923358017147146e-05,
"loss": 0.0163,
"step": 25220
},
{
"grad_norm": 0.2014102339744568,
"learning_rate": 8.922332797408697e-05,
"loss": 0.0144,
"step": 25230
},
{
"grad_norm": 0.2678363621234894,
"learning_rate": 8.921307148729831e-05,
"loss": 0.0137,
"step": 25240
},
{
"grad_norm": 0.17882150411605835,
"learning_rate": 8.920281071222712e-05,
"loss": 0.0121,
"step": 25250
},
{
"grad_norm": 0.24254170060157776,
"learning_rate": 8.919254564999548e-05,
"loss": 0.0122,
"step": 25260
},
{
"grad_norm": 0.22848983108997345,
"learning_rate": 8.918227630172598e-05,
"loss": 0.0148,
"step": 25270
},
{
"grad_norm": 0.2279837429523468,
"learning_rate": 8.917200266854165e-05,
"loss": 0.0133,
"step": 25280
},
{
"grad_norm": 0.2629374861717224,
"learning_rate": 8.9161724751566e-05,
"loss": 0.0159,
"step": 25290
},
{
"grad_norm": 0.18192367255687714,
"learning_rate": 8.915144255192302e-05,
"loss": 0.0143,
"step": 25300
},
{
"grad_norm": 0.2204187661409378,
"learning_rate": 8.914115607073714e-05,
"loss": 0.017,
"step": 25310
},
{
"grad_norm": 0.15653793513774872,
"learning_rate": 8.913086530913327e-05,
"loss": 0.0153,
"step": 25320
},
{
"grad_norm": 0.19547070562839508,
"learning_rate": 8.912057026823681e-05,
"loss": 0.0118,
"step": 25330
},
{
"grad_norm": 0.26632529497146606,
"learning_rate": 8.91102709491736e-05,
"loss": 0.013,
"step": 25340
},
{
"grad_norm": 0.23742526769638062,
"learning_rate": 8.909996735306996e-05,
"loss": 0.012,
"step": 25350
},
{
"grad_norm": 0.27239763736724854,
"learning_rate": 8.908965948105268e-05,
"loss": 0.0134,
"step": 25360
},
{
"grad_norm": 0.18642865121364594,
"learning_rate": 8.907934733424901e-05,
"loss": 0.0148,
"step": 25370
},
{
"grad_norm": 0.20425890386104584,
"learning_rate": 8.906903091378666e-05,
"loss": 0.0143,
"step": 25380
},
{
"grad_norm": 0.18161271512508392,
"learning_rate": 8.905871022079384e-05,
"loss": 0.0151,
"step": 25390
},
{
"grad_norm": 0.17329958081245422,
"learning_rate": 8.90483852563992e-05,
"loss": 0.0148,
"step": 25400
},
{
"grad_norm": 0.15716147422790527,
"learning_rate": 8.903805602173185e-05,
"loss": 0.0187,
"step": 25410
},
{
"grad_norm": 0.1766807734966278,
"learning_rate": 8.902772251792137e-05,
"loss": 0.0156,
"step": 25420
},
{
"grad_norm": 0.19636660814285278,
"learning_rate": 8.901738474609786e-05,
"loss": 0.0132,
"step": 25430
},
{
"grad_norm": 0.20554254949092865,
"learning_rate": 8.900704270739179e-05,
"loss": 0.016,
"step": 25440
},
{
"grad_norm": 0.14951704442501068,
"learning_rate": 8.89966964029342e-05,
"loss": 0.0111,
"step": 25450
},
{
"grad_norm": 0.21583913266658783,
"learning_rate": 8.898634583385652e-05,
"loss": 0.0156,
"step": 25460
},
{
"grad_norm": 0.2214123010635376,
"learning_rate": 8.897599100129065e-05,
"loss": 0.018,
"step": 25470
},
{
"grad_norm": 0.19157958030700684,
"learning_rate": 8.896563190636903e-05,
"loss": 0.0161,
"step": 25480
},
{
"grad_norm": 0.1713401824235916,
"learning_rate": 8.895526855022448e-05,
"loss": 0.0155,
"step": 25490
},
{
"grad_norm": 0.22314561903476715,
"learning_rate": 8.894490093399033e-05,
"loss": 0.014,
"step": 25500
},
{
"grad_norm": 0.1553162783384323,
"learning_rate": 8.893452905880035e-05,
"loss": 0.0143,
"step": 25510
},
{
"grad_norm": 0.19584983587265015,
"learning_rate": 8.892415292578883e-05,
"loss": 0.0177,
"step": 25520
},
{
"grad_norm": 0.20513120293617249,
"learning_rate": 8.891377253609046e-05,
"loss": 0.0163,
"step": 25530
},
{
"grad_norm": 0.1911878138780594,
"learning_rate": 8.890338789084043e-05,
"loss": 0.0161,
"step": 25540
},
{
"grad_norm": 0.18822386860847473,
"learning_rate": 8.88929989911744e-05,
"loss": 0.011,
"step": 25550
},
{
"grad_norm": 0.1524885892868042,
"learning_rate": 8.888260583822847e-05,
"loss": 0.0129,
"step": 25560
},
{
"grad_norm": 0.2081162929534912,
"learning_rate": 8.887220843313921e-05,
"loss": 0.0139,
"step": 25570
},
{
"grad_norm": 0.22650867700576782,
"learning_rate": 8.88618067770437e-05,
"loss": 0.0155,
"step": 25580
},
{
"grad_norm": 0.21249236166477203,
"learning_rate": 8.885140087107942e-05,
"loss": 0.0163,
"step": 25590
},
{
"grad_norm": 0.1845710575580597,
"learning_rate": 8.884099071638436e-05,
"loss": 0.0145,
"step": 25600
},
{
"grad_norm": 0.20314599573612213,
"learning_rate": 8.883057631409695e-05,
"loss": 0.012,
"step": 25610
},
{
"grad_norm": 0.22037655115127563,
"learning_rate": 8.882015766535608e-05,
"loss": 0.0141,
"step": 25620
},
{
"grad_norm": 0.22149570286273956,
"learning_rate": 8.880973477130115e-05,
"loss": 0.0156,
"step": 25630
},
{
"grad_norm": 0.19525587558746338,
"learning_rate": 8.879930763307197e-05,
"loss": 0.0133,
"step": 25640
},
{
"grad_norm": 0.17419621348381042,
"learning_rate": 8.878887625180884e-05,
"loss": 0.0137,
"step": 25650
},
{
"grad_norm": 0.25345706939697266,
"learning_rate": 8.877844062865253e-05,
"loss": 0.0144,
"step": 25660
},
{
"grad_norm": 0.2026941478252411,
"learning_rate": 8.876800076474424e-05,
"loss": 0.0191,
"step": 25670
},
{
"grad_norm": 0.19127890467643738,
"learning_rate": 8.875755666122568e-05,
"loss": 0.0156,
"step": 25680
},
{
"grad_norm": 0.22991883754730225,
"learning_rate": 8.8747108319239e-05,
"loss": 0.0115,
"step": 25690
},
{
"grad_norm": 0.18754807114601135,
"learning_rate": 8.87366557399268e-05,
"loss": 0.0151,
"step": 25700
},
{
"grad_norm": 0.18135304749011993,
"learning_rate": 8.872619892443217e-05,
"loss": 0.0151,
"step": 25710
},
{
"grad_norm": 0.16874587535858154,
"learning_rate": 8.871573787389865e-05,
"loss": 0.013,
"step": 25720
},
{
"grad_norm": 0.1952400803565979,
"learning_rate": 8.870527258947024e-05,
"loss": 0.0152,
"step": 25730
},
{
"grad_norm": 0.1622031182050705,
"learning_rate": 8.869480307229143e-05,
"loss": 0.0159,
"step": 25740
},
{
"grad_norm": 0.21519528329372406,
"learning_rate": 8.868432932350712e-05,
"loss": 0.0149,
"step": 25750
},
{
"grad_norm": 0.2549628019332886,
"learning_rate": 8.867385134426272e-05,
"loss": 0.0148,
"step": 25760
},
{
"grad_norm": 0.20081402361392975,
"learning_rate": 8.866336913570407e-05,
"loss": 0.0128,
"step": 25770
},
{
"grad_norm": 0.2072092890739441,
"learning_rate": 8.865288269897751e-05,
"loss": 0.0147,
"step": 25780
},
{
"grad_norm": 0.2012583464384079,
"learning_rate": 8.864239203522981e-05,
"loss": 0.015,
"step": 25790
},
{
"grad_norm": 0.2645418643951416,
"learning_rate": 8.863189714560822e-05,
"loss": 0.0119,
"step": 25800
},
{
"grad_norm": 0.1994146853685379,
"learning_rate": 8.862139803126043e-05,
"loss": 0.0136,
"step": 25810
},
{
"grad_norm": 0.16972285509109497,
"learning_rate": 8.861089469333463e-05,
"loss": 0.0137,
"step": 25820
},
{
"grad_norm": 0.18160787224769592,
"learning_rate": 8.860038713297944e-05,
"loss": 0.0157,
"step": 25830
},
{
"grad_norm": 0.15314006805419922,
"learning_rate": 8.858987535134394e-05,
"loss": 0.0137,
"step": 25840
},
{
"grad_norm": 0.12870992720127106,
"learning_rate": 8.857935934957769e-05,
"loss": 0.0137,
"step": 25850
},
{
"grad_norm": 0.19985873997211456,
"learning_rate": 8.856883912883071e-05,
"loss": 0.0175,
"step": 25860
},
{
"grad_norm": 0.1666407734155655,
"learning_rate": 8.855831469025346e-05,
"loss": 0.0122,
"step": 25870
},
{
"grad_norm": 0.14283858239650726,
"learning_rate": 8.854778603499689e-05,
"loss": 0.017,
"step": 25880
},
{
"grad_norm": 0.203786700963974,
"learning_rate": 8.85372531642124e-05,
"loss": 0.0135,
"step": 25890
},
{
"grad_norm": 0.20282699167728424,
"learning_rate": 8.852671607905185e-05,
"loss": 0.0134,
"step": 25900
},
{
"grad_norm": 0.22731496393680573,
"learning_rate": 8.851617478066754e-05,
"loss": 0.0158,
"step": 25910
},
{
"grad_norm": 0.19563531875610352,
"learning_rate": 8.850562927021227e-05,
"loss": 0.0149,
"step": 25920
},
{
"grad_norm": 0.1704259216785431,
"learning_rate": 8.849507954883928e-05,
"loss": 0.0113,
"step": 25930
},
{
"grad_norm": 0.1772235631942749,
"learning_rate": 8.848452561770226e-05,
"loss": 0.0113,
"step": 25940
},
{
"grad_norm": 0.19219110906124115,
"learning_rate": 8.847396747795538e-05,
"loss": 0.0138,
"step": 25950
},
{
"grad_norm": 0.16359460353851318,
"learning_rate": 8.846340513075327e-05,
"loss": 0.0125,
"step": 25960
},
{
"grad_norm": 0.15491141378879547,
"learning_rate": 8.845283857725099e-05,
"loss": 0.0109,
"step": 25970
},
{
"grad_norm": 0.1785901039838791,
"learning_rate": 8.844226781860409e-05,
"loss": 0.0134,
"step": 25980
},
{
"grad_norm": 0.21501228213310242,
"learning_rate": 8.84316928559686e-05,
"loss": 0.013,
"step": 25990
},
{
"grad_norm": 0.20008786022663116,
"learning_rate": 8.842111369050094e-05,
"loss": 0.012,
"step": 26000
},
{
"grad_norm": 0.21582910418510437,
"learning_rate": 8.841053032335808e-05,
"loss": 0.0189,
"step": 26010
},
{
"grad_norm": 0.24350406229496002,
"learning_rate": 8.839994275569735e-05,
"loss": 0.0133,
"step": 26020
},
{
"grad_norm": 0.20001371204853058,
"learning_rate": 8.838935098867662e-05,
"loss": 0.0166,
"step": 26030
},
{
"grad_norm": 0.27071765065193176,
"learning_rate": 8.837875502345418e-05,
"loss": 0.0133,
"step": 26040
},
{
"grad_norm": 0.22188922762870789,
"learning_rate": 8.83681548611888e-05,
"loss": 0.012,
"step": 26050
},
{
"grad_norm": 0.20551364123821259,
"learning_rate": 8.835755050303969e-05,
"loss": 0.0133,
"step": 26060
},
{
"grad_norm": 0.19251608848571777,
"learning_rate": 8.834694195016653e-05,
"loss": 0.0151,
"step": 26070
},
{
"grad_norm": 0.1723404973745346,
"learning_rate": 8.833632920372942e-05,
"loss": 0.0137,
"step": 26080
},
{
"grad_norm": 0.21637320518493652,
"learning_rate": 8.832571226488903e-05,
"loss": 0.0153,
"step": 26090
},
{
"grad_norm": 0.2538270056247711,
"learning_rate": 8.831509113480634e-05,
"loss": 0.0149,
"step": 26100
},
{
"grad_norm": 0.17992226779460907,
"learning_rate": 8.83044658146429e-05,
"loss": 0.0133,
"step": 26110
},
{
"grad_norm": 0.2082221955060959,
"learning_rate": 8.829383630556067e-05,
"loss": 0.0122,
"step": 26120
},
{
"grad_norm": 0.20343394577503204,
"learning_rate": 8.828320260872207e-05,
"loss": 0.0139,
"step": 26130
},
{
"grad_norm": 0.17990347743034363,
"learning_rate": 8.827256472529e-05,
"loss": 0.0128,
"step": 26140
},
{
"grad_norm": 0.2202722579240799,
"learning_rate": 8.826192265642778e-05,
"loss": 0.0157,
"step": 26150
},
{
"grad_norm": 0.18976671993732452,
"learning_rate": 8.825127640329923e-05,
"loss": 0.0119,
"step": 26160
},
{
"grad_norm": 0.21775297820568085,
"learning_rate": 8.824062596706861e-05,
"loss": 0.0164,
"step": 26170
},
{
"grad_norm": 0.19183680415153503,
"learning_rate": 8.822997134890062e-05,
"loss": 0.0122,
"step": 26180
},
{
"grad_norm": 0.1663697361946106,
"learning_rate": 8.821931254996044e-05,
"loss": 0.0133,
"step": 26190
},
{
"grad_norm": 0.1694713979959488,
"learning_rate": 8.82086495714137e-05,
"loss": 0.0154,
"step": 26200
},
{
"grad_norm": 0.18789827823638916,
"learning_rate": 8.81979824144265e-05,
"loss": 0.0162,
"step": 26210
},
{
"grad_norm": 0.21716323494911194,
"learning_rate": 8.818731108016536e-05,
"loss": 0.0142,
"step": 26220
},
{
"grad_norm": 0.20898616313934326,
"learning_rate": 8.81766355697973e-05,
"loss": 0.0176,
"step": 26230
},
{
"grad_norm": 0.20870767533779144,
"learning_rate": 8.816595588448977e-05,
"loss": 0.0129,
"step": 26240
},
{
"grad_norm": 0.18642646074295044,
"learning_rate": 8.81552720254107e-05,
"loss": 0.0113,
"step": 26250
},
{
"grad_norm": 0.20856960117816925,
"learning_rate": 8.814458399372842e-05,
"loss": 0.0135,
"step": 26260
},
{
"grad_norm": 0.16934159398078918,
"learning_rate": 8.813389179061181e-05,
"loss": 0.0123,
"step": 26270
},
{
"grad_norm": 0.19757625460624695,
"learning_rate": 8.812319541723012e-05,
"loss": 0.0105,
"step": 26280
},
{
"grad_norm": 0.16270165145397186,
"learning_rate": 8.811249487475309e-05,
"loss": 0.0122,
"step": 26290
},
{
"grad_norm": 0.2062361240386963,
"learning_rate": 8.810179016435092e-05,
"loss": 0.0152,
"step": 26300
},
{
"grad_norm": 0.19589756429195404,
"learning_rate": 8.809108128719428e-05,
"loss": 0.0121,
"step": 26310
},
{
"grad_norm": 0.12294069677591324,
"learning_rate": 8.808036824445424e-05,
"loss": 0.0146,
"step": 26320
},
{
"grad_norm": 0.17404891550540924,
"learning_rate": 8.806965103730238e-05,
"loss": 0.0153,
"step": 26330
},
{
"grad_norm": 0.2562452256679535,
"learning_rate": 8.805892966691074e-05,
"loss": 0.0143,
"step": 26340
},
{
"grad_norm": 0.23337846994400024,
"learning_rate": 8.804820413445175e-05,
"loss": 0.0164,
"step": 26350
},
{
"grad_norm": 0.2396707385778427,
"learning_rate": 8.803747444109837e-05,
"loss": 0.0145,
"step": 26360
},
{
"grad_norm": 0.1897064745426178,
"learning_rate": 8.802674058802399e-05,
"loss": 0.0126,
"step": 26370
},
{
"grad_norm": 0.19712260365486145,
"learning_rate": 8.801600257640241e-05,
"loss": 0.0157,
"step": 26380
},
{
"grad_norm": 0.17782928049564362,
"learning_rate": 8.800526040740795e-05,
"loss": 0.0155,
"step": 26390
},
{
"grad_norm": 0.1928723156452179,
"learning_rate": 8.799451408221535e-05,
"loss": 0.0127,
"step": 26400
},
{
"grad_norm": 0.17179611325263977,
"learning_rate": 8.798376360199982e-05,
"loss": 0.0134,
"step": 26410
},
{
"grad_norm": 0.17854979634284973,
"learning_rate": 8.797300896793701e-05,
"loss": 0.0144,
"step": 26420
},
{
"grad_norm": 0.18036052584648132,
"learning_rate": 8.796225018120302e-05,
"loss": 0.0126,
"step": 26430
},
{
"grad_norm": 0.15296590328216553,
"learning_rate": 8.795148724297444e-05,
"loss": 0.0144,
"step": 26440
},
{
"grad_norm": 0.14553052186965942,
"learning_rate": 8.794072015442825e-05,
"loss": 0.0122,
"step": 26450
},
{
"grad_norm": 0.2545054256916046,
"learning_rate": 8.792994891674198e-05,
"loss": 0.0134,
"step": 26460
},
{
"grad_norm": 0.2066628336906433,
"learning_rate": 8.79191735310935e-05,
"loss": 0.0135,
"step": 26470
},
{
"grad_norm": 0.20902188122272491,
"learning_rate": 8.790839399866122e-05,
"loss": 0.0142,
"step": 26480
},
{
"grad_norm": 0.1952153593301773,
"learning_rate": 8.789761032062397e-05,
"loss": 0.0151,
"step": 26490
},
{
"grad_norm": 0.1802971065044403,
"learning_rate": 8.788682249816103e-05,
"loss": 0.0128,
"step": 26500
},
{
"grad_norm": 0.16834264993667603,
"learning_rate": 8.787603053245215e-05,
"loss": 0.0138,
"step": 26510
},
{
"grad_norm": 0.22321510314941406,
"learning_rate": 8.78652344246775e-05,
"loss": 0.0133,
"step": 26520
},
{
"grad_norm": 0.18334518373012543,
"learning_rate": 8.785443417601776e-05,
"loss": 0.0137,
"step": 26530
},
{
"grad_norm": 0.18657852709293365,
"learning_rate": 8.784362978765401e-05,
"loss": 0.0159,
"step": 26540
},
{
"grad_norm": 0.2138308882713318,
"learning_rate": 8.783282126076779e-05,
"loss": 0.0142,
"step": 26550
},
{
"grad_norm": 0.154995858669281,
"learning_rate": 8.782200859654112e-05,
"loss": 0.0132,
"step": 26560
},
{
"grad_norm": 0.2004760503768921,
"learning_rate": 8.781119179615646e-05,
"loss": 0.0166,
"step": 26570
},
{
"grad_norm": 0.21278400719165802,
"learning_rate": 8.780037086079674e-05,
"loss": 0.016,
"step": 26580
},
{
"grad_norm": 0.18658854067325592,
"learning_rate": 8.778954579164527e-05,
"loss": 0.0118,
"step": 26590
},
{
"grad_norm": 0.17720405757427216,
"learning_rate": 8.777871658988588e-05,
"loss": 0.0127,
"step": 26600
},
{
"grad_norm": 0.2561233937740326,
"learning_rate": 8.776788325670285e-05,
"loss": 0.0164,
"step": 26610
},
{
"grad_norm": 0.17624244093894958,
"learning_rate": 8.775704579328089e-05,
"loss": 0.0156,
"step": 26620
},
{
"grad_norm": 0.3170694410800934,
"learning_rate": 8.774620420080517e-05,
"loss": 0.0142,
"step": 26630
},
{
"grad_norm": 0.1428108811378479,
"learning_rate": 8.773535848046131e-05,
"loss": 0.0129,
"step": 26640
},
{
"grad_norm": 0.15282820165157318,
"learning_rate": 8.772450863343538e-05,
"loss": 0.0122,
"step": 26650
},
{
"grad_norm": 0.1680191308259964,
"learning_rate": 8.77136546609139e-05,
"loss": 0.0136,
"step": 26660
},
{
"grad_norm": 0.1819058656692505,
"learning_rate": 8.770279656408385e-05,
"loss": 0.0131,
"step": 26670
},
{
"grad_norm": 0.15053746104240417,
"learning_rate": 8.769193434413265e-05,
"loss": 0.0121,
"step": 26680
},
{
"grad_norm": 0.15997923910617828,
"learning_rate": 8.76810680022482e-05,
"loss": 0.0128,
"step": 26690
},
{
"grad_norm": 0.19415059685707092,
"learning_rate": 8.767019753961878e-05,
"loss": 0.0122,
"step": 26700
},
{
"grad_norm": 0.17333634197711945,
"learning_rate": 8.765932295743321e-05,
"loss": 0.015,
"step": 26710
},
{
"grad_norm": 0.22342133522033691,
"learning_rate": 8.764844425688068e-05,
"loss": 0.0129,
"step": 26720
},
{
"grad_norm": 0.155137300491333,
"learning_rate": 8.763756143915092e-05,
"loss": 0.0121,
"step": 26730
},
{
"grad_norm": 0.16613472998142242,
"learning_rate": 8.7626674505434e-05,
"loss": 0.0107,
"step": 26740
},
{
"grad_norm": 0.16115880012512207,
"learning_rate": 8.761578345692053e-05,
"loss": 0.0126,
"step": 26750
},
{
"grad_norm": 0.21547505259513855,
"learning_rate": 8.760488829480156e-05,
"loss": 0.0128,
"step": 26760
},
{
"grad_norm": 0.23196235299110413,
"learning_rate": 8.759398902026854e-05,
"loss": 0.0133,
"step": 26770
},
{
"grad_norm": 0.21111854910850525,
"learning_rate": 8.758308563451339e-05,
"loss": 0.0149,
"step": 26780
},
{
"grad_norm": 0.19990575313568115,
"learning_rate": 8.75721781387285e-05,
"loss": 0.0101,
"step": 26790
},
{
"grad_norm": 0.1742956042289734,
"learning_rate": 8.75612665341067e-05,
"loss": 0.0134,
"step": 26800
},
{
"grad_norm": 0.17443598806858063,
"learning_rate": 8.755035082184126e-05,
"loss": 0.0131,
"step": 26810
},
{
"grad_norm": 0.1800273358821869,
"learning_rate": 8.753943100312592e-05,
"loss": 0.0126,
"step": 26820
},
{
"grad_norm": 0.16605105996131897,
"learning_rate": 8.752850707915484e-05,
"loss": 0.0148,
"step": 26830
},
{
"grad_norm": 0.18857042491436005,
"learning_rate": 8.751757905112264e-05,
"loss": 0.0121,
"step": 26840
},
{
"grad_norm": 0.1778421401977539,
"learning_rate": 8.75066469202244e-05,
"loss": 0.0129,
"step": 26850
},
{
"grad_norm": 0.1562901735305786,
"learning_rate": 8.749571068765567e-05,
"loss": 0.0114,
"step": 26860
},
{
"grad_norm": 0.22905878722667694,
"learning_rate": 8.748477035461238e-05,
"loss": 0.0145,
"step": 26870
},
{
"grad_norm": 0.3024975061416626,
"learning_rate": 8.747382592229095e-05,
"loss": 0.0147,
"step": 26880
},
{
"grad_norm": 0.16097016632556915,
"learning_rate": 8.746287739188828e-05,
"loss": 0.0167,
"step": 26890
},
{
"grad_norm": 0.13104328513145447,
"learning_rate": 8.745192476460165e-05,
"loss": 0.0127,
"step": 26900
},
{
"grad_norm": 0.15686337649822235,
"learning_rate": 8.744096804162882e-05,
"loss": 0.0132,
"step": 26910
},
{
"grad_norm": 0.17414389550685883,
"learning_rate": 8.743000722416804e-05,
"loss": 0.0131,
"step": 26920
},
{
"grad_norm": 0.2515980303287506,
"learning_rate": 8.741904231341793e-05,
"loss": 0.0135,
"step": 26930
},
{
"grad_norm": 0.1402544379234314,
"learning_rate": 8.740807331057762e-05,
"loss": 0.0142,
"step": 26940
},
{
"grad_norm": 0.20550718903541565,
"learning_rate": 8.739710021684667e-05,
"loss": 0.0169,
"step": 26950
},
{
"grad_norm": 0.21412119269371033,
"learning_rate": 8.738612303342503e-05,
"loss": 0.0153,
"step": 26960
},
{
"grad_norm": 0.23892386257648468,
"learning_rate": 8.73751417615132e-05,
"loss": 0.0136,
"step": 26970
},
{
"grad_norm": 0.20890675485134125,
"learning_rate": 8.736415640231208e-05,
"loss": 0.0131,
"step": 26980
},
{
"grad_norm": 0.1736547201871872,
"learning_rate": 8.735316695702297e-05,
"loss": 0.0144,
"step": 26990
},
{
"grad_norm": 0.17582859098911285,
"learning_rate": 8.734217342684769e-05,
"loss": 0.0126,
"step": 27000
},
{
"grad_norm": 0.2060476392507553,
"learning_rate": 8.733117581298847e-05,
"loss": 0.0145,
"step": 27010
},
{
"grad_norm": 0.20875637233257294,
"learning_rate": 8.732017411664796e-05,
"loss": 0.0127,
"step": 27020
},
{
"grad_norm": 0.16377364099025726,
"learning_rate": 8.730916833902936e-05,
"loss": 0.0132,
"step": 27030
},
{
"grad_norm": 0.21675199270248413,
"learning_rate": 8.729815848133618e-05,
"loss": 0.0117,
"step": 27040
},
{
"grad_norm": 0.172952339053154,
"learning_rate": 8.728714454477247e-05,
"loss": 0.0123,
"step": 27050
},
{
"grad_norm": 0.1896006464958191,
"learning_rate": 8.727612653054269e-05,
"loss": 0.0155,
"step": 27060
},
{
"grad_norm": 0.18309657275676727,
"learning_rate": 8.726510443985176e-05,
"loss": 0.0128,
"step": 27070
},
{
"grad_norm": 0.19412779808044434,
"learning_rate": 8.725407827390503e-05,
"loss": 0.0147,
"step": 27080
},
{
"grad_norm": 0.16986395418643951,
"learning_rate": 8.724304803390833e-05,
"loss": 0.0111,
"step": 27090
},
{
"grad_norm": 0.19317303597927094,
"learning_rate": 8.723201372106788e-05,
"loss": 0.0122,
"step": 27100
},
{
"grad_norm": 0.14348508417606354,
"learning_rate": 8.722097533659038e-05,
"loss": 0.012,
"step": 27110
},
{
"grad_norm": 0.18144506216049194,
"learning_rate": 8.720993288168299e-05,
"loss": 0.0138,
"step": 27120
},
{
"grad_norm": 0.1836540699005127,
"learning_rate": 8.719888635755327e-05,
"loss": 0.0131,
"step": 27130
},
{
"grad_norm": 0.1599224954843521,
"learning_rate": 8.718783576540928e-05,
"loss": 0.0116,
"step": 27140
},
{
"grad_norm": 0.21616582572460175,
"learning_rate": 8.717678110645948e-05,
"loss": 0.0151,
"step": 27150
},
{
"grad_norm": 0.17849023640155792,
"learning_rate": 8.716572238191279e-05,
"loss": 0.0159,
"step": 27160
},
{
"grad_norm": 0.2699224054813385,
"learning_rate": 8.715465959297857e-05,
"loss": 0.0168,
"step": 27170
},
{
"grad_norm": 0.19859661161899567,
"learning_rate": 8.714359274086665e-05,
"loss": 0.0131,
"step": 27180
},
{
"grad_norm": 0.19852979481220245,
"learning_rate": 8.713252182678726e-05,
"loss": 0.0157,
"step": 27190
},
{
"grad_norm": 0.17707674205303192,
"learning_rate": 8.712144685195112e-05,
"loss": 0.0126,
"step": 27200
},
{
"grad_norm": 0.21748629212379456,
"learning_rate": 8.711036781756936e-05,
"loss": 0.0141,
"step": 27210
},
{
"grad_norm": 0.17880921065807343,
"learning_rate": 8.709928472485357e-05,
"loss": 0.0111,
"step": 27220
},
{
"grad_norm": 0.24127905070781708,
"learning_rate": 8.708819757501579e-05,
"loss": 0.0127,
"step": 27230
},
{
"grad_norm": 0.22769764065742493,
"learning_rate": 8.707710636926846e-05,
"loss": 0.0131,
"step": 27240
},
{
"grad_norm": 0.24227939546108246,
"learning_rate": 8.706601110882455e-05,
"loss": 0.0149,
"step": 27250
},
{
"grad_norm": 0.19728736579418182,
"learning_rate": 8.705491179489738e-05,
"loss": 0.0129,
"step": 27260
},
{
"grad_norm": 0.17270460724830627,
"learning_rate": 8.704380842870077e-05,
"loss": 0.0122,
"step": 27270
},
{
"grad_norm": 0.1484808772802353,
"learning_rate": 8.703270101144895e-05,
"loss": 0.0117,
"step": 27280
},
{
"grad_norm": 0.19262616336345673,
"learning_rate": 8.702158954435664e-05,
"loss": 0.0135,
"step": 27290
},
{
"grad_norm": 0.23880822956562042,
"learning_rate": 8.701047402863896e-05,
"loss": 0.0159,
"step": 27300
},
{
"grad_norm": 0.18311969935894012,
"learning_rate": 8.699935446551148e-05,
"loss": 0.0144,
"step": 27310
},
{
"grad_norm": 0.18611888587474823,
"learning_rate": 8.698823085619022e-05,
"loss": 0.0138,
"step": 27320
},
{
"grad_norm": 0.17694741487503052,
"learning_rate": 8.697710320189166e-05,
"loss": 0.0112,
"step": 27330
},
{
"grad_norm": 0.1960524618625641,
"learning_rate": 8.696597150383268e-05,
"loss": 0.0118,
"step": 27340
},
{
"grad_norm": 0.2268739491701126,
"learning_rate": 8.695483576323063e-05,
"loss": 0.0126,
"step": 27350
},
{
"grad_norm": 0.14451560378074646,
"learning_rate": 8.69436959813033e-05,
"loss": 0.0112,
"step": 27360
},
{
"grad_norm": 0.1837204545736313,
"learning_rate": 8.693255215926892e-05,
"loss": 0.0135,
"step": 27370
},
{
"grad_norm": 0.22977836430072784,
"learning_rate": 8.692140429834617e-05,
"loss": 0.0137,
"step": 27380
},
{
"grad_norm": 0.18952079117298126,
"learning_rate": 8.691025239975415e-05,
"loss": 0.013,
"step": 27390
},
{
"grad_norm": 0.1871054768562317,
"learning_rate": 8.689909646471243e-05,
"loss": 0.0119,
"step": 27400
},
{
"grad_norm": 0.14948269724845886,
"learning_rate": 8.688793649444099e-05,
"loss": 0.0122,
"step": 27410
},
{
"grad_norm": 0.19388332962989807,
"learning_rate": 8.687677249016029e-05,
"loss": 0.0129,
"step": 27420
},
{
"grad_norm": 0.2243487536907196,
"learning_rate": 8.686560445309118e-05,
"loss": 0.0148,
"step": 27430
},
{
"grad_norm": 0.1868431121110916,
"learning_rate": 8.685443238445499e-05,
"loss": 0.0128,
"step": 27440
},
{
"grad_norm": 0.1388297975063324,
"learning_rate": 8.68432562854735e-05,
"loss": 0.0138,
"step": 27450
},
{
"grad_norm": 0.15557223558425903,
"learning_rate": 8.683207615736887e-05,
"loss": 0.0112,
"step": 27460
},
{
"grad_norm": 0.17421898245811462,
"learning_rate": 8.682089200136379e-05,
"loss": 0.0148,
"step": 27470
},
{
"grad_norm": 0.20171713829040527,
"learning_rate": 8.680970381868132e-05,
"loss": 0.0135,
"step": 27480
},
{
"grad_norm": 0.20627743005752563,
"learning_rate": 8.679851161054498e-05,
"loss": 0.0119,
"step": 27490
},
{
"grad_norm": 0.15651550889015198,
"learning_rate": 8.678731537817873e-05,
"loss": 0.0125,
"step": 27500
},
{
"grad_norm": 0.15714424848556519,
"learning_rate": 8.677611512280697e-05,
"loss": 0.0118,
"step": 27510
},
{
"grad_norm": 0.14530454576015472,
"learning_rate": 8.676491084565457e-05,
"loss": 0.0124,
"step": 27520
},
{
"grad_norm": 0.1886354237794876,
"learning_rate": 8.675370254794678e-05,
"loss": 0.0102,
"step": 27530
},
{
"grad_norm": 0.16549897193908691,
"learning_rate": 8.674249023090935e-05,
"loss": 0.0134,
"step": 27540
},
{
"grad_norm": 0.19222389161586761,
"learning_rate": 8.673127389576843e-05,
"loss": 0.0128,
"step": 27550
},
{
"grad_norm": 0.21641723811626434,
"learning_rate": 8.67200535437506e-05,
"loss": 0.0114,
"step": 27560
},
{
"grad_norm": 0.14683885872364044,
"learning_rate": 8.670882917608296e-05,
"loss": 0.0148,
"step": 27570
},
{
"grad_norm": 0.2236359715461731,
"learning_rate": 8.669760079399292e-05,
"loss": 0.0128,
"step": 27580
},
{
"grad_norm": 0.1656251698732376,
"learning_rate": 8.668636839870845e-05,
"loss": 0.0158,
"step": 27590
},
{
"grad_norm": 0.2022354155778885,
"learning_rate": 8.667513199145789e-05,
"loss": 0.0134,
"step": 27600
},
{
"grad_norm": 0.16965201497077942,
"learning_rate": 8.666389157347002e-05,
"loss": 0.0143,
"step": 27610
},
{
"grad_norm": 0.21600964665412903,
"learning_rate": 8.66526471459741e-05,
"loss": 0.0176,
"step": 27620
},
{
"grad_norm": 0.18543939292430878,
"learning_rate": 8.66413987101998e-05,
"loss": 0.0157,
"step": 27630
},
{
"grad_norm": 0.2895159125328064,
"learning_rate": 8.663014626737723e-05,
"loss": 0.0136,
"step": 27640
},
{
"grad_norm": 0.18034076690673828,
"learning_rate": 8.661888981873691e-05,
"loss": 0.0132,
"step": 27650
},
{
"grad_norm": 0.1948971003293991,
"learning_rate": 8.660762936550988e-05,
"loss": 0.0129,
"step": 27660
},
{
"grad_norm": 0.16037020087242126,
"learning_rate": 8.659636490892753e-05,
"loss": 0.0115,
"step": 27670
},
{
"grad_norm": 0.20323437452316284,
"learning_rate": 8.658509645022174e-05,
"loss": 0.011,
"step": 27680
},
{
"grad_norm": 0.2092437893152237,
"learning_rate": 8.657382399062481e-05,
"loss": 0.013,
"step": 27690
},
{
"grad_norm": 0.23704589903354645,
"learning_rate": 8.656254753136946e-05,
"loss": 0.0146,
"step": 27700
},
{
"grad_norm": 0.20448334515094757,
"learning_rate": 8.655126707368891e-05,
"loss": 0.0125,
"step": 27710
},
{
"grad_norm": 0.18248756229877472,
"learning_rate": 8.653998261881672e-05,
"loss": 0.0109,
"step": 27720
},
{
"grad_norm": 0.1941586583852768,
"learning_rate": 8.652869416798699e-05,
"loss": 0.0112,
"step": 27730
},
{
"grad_norm": 0.181466206908226,
"learning_rate": 8.651740172243417e-05,
"loss": 0.0128,
"step": 27740
},
{
"grad_norm": 0.18117640912532806,
"learning_rate": 8.65061052833932e-05,
"loss": 0.0137,
"step": 27750
},
{
"grad_norm": 0.15962450206279755,
"learning_rate": 8.649480485209945e-05,
"loss": 0.0118,
"step": 27760
},
{
"grad_norm": 0.17997871339321136,
"learning_rate": 8.64835004297887e-05,
"loss": 0.0143,
"step": 27770
},
{
"grad_norm": 0.20928233861923218,
"learning_rate": 8.64721920176972e-05,
"loss": 0.0106,
"step": 27780
},
{
"grad_norm": 0.18144932389259338,
"learning_rate": 8.646087961706164e-05,
"loss": 0.0135,
"step": 27790
},
{
"grad_norm": 0.17991112172603607,
"learning_rate": 8.644956322911908e-05,
"loss": 0.0116,
"step": 27800
},
{
"grad_norm": 0.17153599858283997,
"learning_rate": 8.643824285510709e-05,
"loss": 0.0122,
"step": 27810
},
{
"grad_norm": 0.23814387619495392,
"learning_rate": 8.642691849626364e-05,
"loss": 0.0153,
"step": 27820
},
{
"grad_norm": 0.18724940717220306,
"learning_rate": 8.641559015382717e-05,
"loss": 0.0118,
"step": 27830
},
{
"grad_norm": 0.15284952521324158,
"learning_rate": 8.640425782903649e-05,
"loss": 0.0116,
"step": 27840
},
{
"grad_norm": 0.18523651361465454,
"learning_rate": 8.639292152313091e-05,
"loss": 0.0126,
"step": 27850
},
{
"grad_norm": 0.17586593329906464,
"learning_rate": 8.638158123735015e-05,
"loss": 0.0134,
"step": 27860
},
{
"grad_norm": 0.17588144540786743,
"learning_rate": 8.637023697293436e-05,
"loss": 0.014,
"step": 27870
},
{
"grad_norm": 0.15913942456245422,
"learning_rate": 8.635888873112414e-05,
"loss": 0.0135,
"step": 27880
},
{
"grad_norm": 0.2409949004650116,
"learning_rate": 8.634753651316052e-05,
"loss": 0.0154,
"step": 27890
},
{
"grad_norm": 0.18579398095607758,
"learning_rate": 8.633618032028496e-05,
"loss": 0.0128,
"step": 27900
},
{
"grad_norm": 0.25115251541137695,
"learning_rate": 8.632482015373934e-05,
"loss": 0.0162,
"step": 27910
},
{
"grad_norm": 0.1500532329082489,
"learning_rate": 8.6313456014766e-05,
"loss": 0.0118,
"step": 27920
},
{
"grad_norm": 0.19730953872203827,
"learning_rate": 8.630208790460771e-05,
"loss": 0.0152,
"step": 27930
},
{
"grad_norm": 0.24407461285591125,
"learning_rate": 8.629071582450768e-05,
"loss": 0.0129,
"step": 27940
},
{
"grad_norm": 0.1834212690591812,
"learning_rate": 8.62793397757095e-05,
"loss": 0.0174,
"step": 27950
},
{
"grad_norm": 0.1906377226114273,
"learning_rate": 8.626795975945729e-05,
"loss": 0.0126,
"step": 27960
},
{
"grad_norm": 0.20611874759197235,
"learning_rate": 8.625657577699551e-05,
"loss": 0.0143,
"step": 27970
},
{
"grad_norm": 0.22802181541919708,
"learning_rate": 8.624518782956914e-05,
"loss": 0.0129,
"step": 27980
},
{
"grad_norm": 0.19341625273227692,
"learning_rate": 8.62337959184235e-05,
"loss": 0.0138,
"step": 27990
},
{
"grad_norm": 0.2044740617275238,
"learning_rate": 8.622240004480441e-05,
"loss": 0.0131,
"step": 28000
},
{
"grad_norm": 0.25316351652145386,
"learning_rate": 8.621100020995814e-05,
"loss": 0.0147,
"step": 28010
},
{
"grad_norm": 0.18751955032348633,
"learning_rate": 8.619959641513132e-05,
"loss": 0.0133,
"step": 28020
},
{
"grad_norm": 0.24041640758514404,
"learning_rate": 8.618818866157105e-05,
"loss": 0.0133,
"step": 28030
},
{
"grad_norm": 0.15245193243026733,
"learning_rate": 8.617677695052487e-05,
"loss": 0.0133,
"step": 28040
},
{
"grad_norm": 0.17806296050548553,
"learning_rate": 8.616536128324078e-05,
"loss": 0.0165,
"step": 28050
},
{
"grad_norm": 0.2666396200656891,
"learning_rate": 8.615394166096712e-05,
"loss": 0.0133,
"step": 28060
},
{
"grad_norm": 0.1795058250427246,
"learning_rate": 8.614251808495279e-05,
"loss": 0.012,
"step": 28070
},
{
"grad_norm": 0.1847299188375473,
"learning_rate": 8.6131090556447e-05,
"loss": 0.0121,
"step": 28080
},
{
"grad_norm": 0.16904352605342865,
"learning_rate": 8.611965907669947e-05,
"loss": 0.0101,
"step": 28090
},
{
"grad_norm": 0.2383386343717575,
"learning_rate": 8.610822364696034e-05,
"loss": 0.015,
"step": 28100
},
{
"grad_norm": 0.14433036744594574,
"learning_rate": 8.609678426848015e-05,
"loss": 0.0111,
"step": 28110
},
{
"grad_norm": 0.18644677102565765,
"learning_rate": 8.60853409425099e-05,
"loss": 0.0118,
"step": 28120
},
{
"grad_norm": 0.13823361694812775,
"learning_rate": 8.607389367030104e-05,
"loss": 0.0117,
"step": 28130
},
{
"grad_norm": 0.1907871514558792,
"learning_rate": 8.606244245310538e-05,
"loss": 0.013,
"step": 28140
},
{
"grad_norm": 0.18961817026138306,
"learning_rate": 8.605098729217525e-05,
"loss": 0.0102,
"step": 28150
},
{
"grad_norm": 0.1441236287355423,
"learning_rate": 8.603952818876335e-05,
"loss": 0.0134,
"step": 28160
},
{
"grad_norm": 0.16797539591789246,
"learning_rate": 8.602806514412281e-05,
"loss": 0.0152,
"step": 28170
},
{
"grad_norm": 0.18718941509723663,
"learning_rate": 8.601659815950726e-05,
"loss": 0.0121,
"step": 28180
},
{
"grad_norm": 0.19151407480239868,
"learning_rate": 8.600512723617067e-05,
"loss": 0.0144,
"step": 28190
},
{
"grad_norm": 0.1924273818731308,
"learning_rate": 8.59936523753675e-05,
"loss": 0.0126,
"step": 28200
},
{
"grad_norm": 0.12516771256923676,
"learning_rate": 8.598217357835264e-05,
"loss": 0.0115,
"step": 28210
},
{
"grad_norm": 0.1990862786769867,
"learning_rate": 8.597069084638135e-05,
"loss": 0.0136,
"step": 28220
},
{
"grad_norm": 0.1527869999408722,
"learning_rate": 8.595920418070939e-05,
"loss": 0.0115,
"step": 28230
},
{
"grad_norm": 0.147520512342453,
"learning_rate": 8.594771358259295e-05,
"loss": 0.0136,
"step": 28240
},
{
"grad_norm": 0.3083323836326599,
"learning_rate": 8.593621905328858e-05,
"loss": 0.0123,
"step": 28250
},
{
"grad_norm": 0.15598563849925995,
"learning_rate": 8.592472059405333e-05,
"loss": 0.0136,
"step": 28260
},
{
"grad_norm": 0.15811626613140106,
"learning_rate": 8.591321820614464e-05,
"loss": 0.0104,
"step": 28270
},
{
"grad_norm": 0.17856080830097198,
"learning_rate": 8.590171189082041e-05,
"loss": 0.0123,
"step": 28280
},
{
"grad_norm": 0.1795537769794464,
"learning_rate": 8.589020164933894e-05,
"loss": 0.0107,
"step": 28290
},
{
"grad_norm": 0.17818602919578552,
"learning_rate": 8.587868748295898e-05,
"loss": 0.0126,
"step": 28300
},
{
"grad_norm": 0.12199205905199051,
"learning_rate": 8.586716939293971e-05,
"loss": 0.0137,
"step": 28310
},
{
"grad_norm": 0.19500301778316498,
"learning_rate": 8.58556473805407e-05,
"loss": 0.0144,
"step": 28320
},
{
"grad_norm": 0.16873633861541748,
"learning_rate": 8.584412144702202e-05,
"loss": 0.0108,
"step": 28330
},
{
"grad_norm": 0.16769428551197052,
"learning_rate": 8.58325915936441e-05,
"loss": 0.0112,
"step": 28340
},
{
"grad_norm": 0.14065007865428925,
"learning_rate": 8.582105782166783e-05,
"loss": 0.0111,
"step": 28350
},
{
"grad_norm": 0.18229477107524872,
"learning_rate": 8.580952013235455e-05,
"loss": 0.0108,
"step": 28360
},
{
"grad_norm": 0.2582354247570038,
"learning_rate": 8.579797852696596e-05,
"loss": 0.0119,
"step": 28370
},
{
"grad_norm": 0.18101894855499268,
"learning_rate": 8.578643300676428e-05,
"loss": 0.0136,
"step": 28380
},
{
"grad_norm": 0.18537025153636932,
"learning_rate": 8.577488357301209e-05,
"loss": 0.0143,
"step": 28390
},
{
"grad_norm": 0.21140316128730774,
"learning_rate": 8.576333022697242e-05,
"loss": 0.0109,
"step": 28400
},
{
"grad_norm": 0.16614943742752075,
"learning_rate": 8.575177296990873e-05,
"loss": 0.0165,
"step": 28410
},
{
"grad_norm": 0.16962102055549622,
"learning_rate": 8.574021180308489e-05,
"loss": 0.0119,
"step": 28420
},
{
"grad_norm": 0.19473373889923096,
"learning_rate": 8.572864672776523e-05,
"loss": 0.013,
"step": 28430
},
{
"grad_norm": 0.19951479136943817,
"learning_rate": 8.571707774521447e-05,
"loss": 0.013,
"step": 28440
},
{
"grad_norm": 0.19348788261413574,
"learning_rate": 8.57055048566978e-05,
"loss": 0.0136,
"step": 28450
},
{
"grad_norm": 0.24966219067573547,
"learning_rate": 8.569392806348078e-05,
"loss": 0.0142,
"step": 28460
},
{
"grad_norm": 0.18688538670539856,
"learning_rate": 8.568234736682947e-05,
"loss": 0.0153,
"step": 28470
},
{
"grad_norm": 0.18647579848766327,
"learning_rate": 8.567076276801029e-05,
"loss": 0.0147,
"step": 28480
},
{
"grad_norm": 0.14750352501869202,
"learning_rate": 8.565917426829013e-05,
"loss": 0.0127,
"step": 28490
},
{
"grad_norm": 0.20846188068389893,
"learning_rate": 8.564758186893628e-05,
"loss": 0.0138,
"step": 28500
},
{
"grad_norm": 0.18948955833911896,
"learning_rate": 8.563598557121649e-05,
"loss": 0.0105,
"step": 28510
},
{
"grad_norm": 0.2050204575061798,
"learning_rate": 8.562438537639888e-05,
"loss": 0.0127,
"step": 28520
},
{
"grad_norm": 0.20523923635482788,
"learning_rate": 8.561278128575206e-05,
"loss": 0.0137,
"step": 28530
},
{
"grad_norm": 0.195975661277771,
"learning_rate": 8.5601173300545e-05,
"loss": 0.013,
"step": 28540
},
{
"grad_norm": 0.21493001282215118,
"learning_rate": 8.558956142204717e-05,
"loss": 0.0139,
"step": 28550
},
{
"grad_norm": 0.1399243324995041,
"learning_rate": 8.55779456515284e-05,
"loss": 0.0143,
"step": 28560
},
{
"grad_norm": 0.11191680282354355,
"learning_rate": 8.556632599025898e-05,
"loss": 0.0126,
"step": 28570
},
{
"grad_norm": 0.18659430742263794,
"learning_rate": 8.555470243950964e-05,
"loss": 0.0128,
"step": 28580
},
{
"grad_norm": 0.17905016243457794,
"learning_rate": 8.554307500055148e-05,
"loss": 0.0122,
"step": 28590
},
{
"grad_norm": 0.165314719080925,
"learning_rate": 8.553144367465609e-05,
"loss": 0.0117,
"step": 28600
},
{
"grad_norm": 0.1951138973236084,
"learning_rate": 8.551980846309544e-05,
"loss": 0.0119,
"step": 28610
},
{
"grad_norm": 0.17219603061676025,
"learning_rate": 8.550816936714193e-05,
"loss": 0.0122,
"step": 28620
},
{
"grad_norm": 0.18891964852809906,
"learning_rate": 8.549652638806841e-05,
"loss": 0.0155,
"step": 28630
},
{
"grad_norm": 0.15256862342357635,
"learning_rate": 8.548487952714812e-05,
"loss": 0.0119,
"step": 28640
},
{
"grad_norm": 0.18874719738960266,
"learning_rate": 8.547322878565478e-05,
"loss": 0.0142,
"step": 28650
},
{
"grad_norm": 0.2267162799835205,
"learning_rate": 8.546157416486245e-05,
"loss": 0.0111,
"step": 28660
},
{
"grad_norm": 0.18347524106502533,
"learning_rate": 8.54499156660457e-05,
"loss": 0.0123,
"step": 28670
},
{
"grad_norm": 0.2024136781692505,
"learning_rate": 8.543825329047947e-05,
"loss": 0.0141,
"step": 28680
},
{
"grad_norm": 0.19219805300235748,
"learning_rate": 8.542658703943913e-05,
"loss": 0.013,
"step": 28690
},
{
"grad_norm": 0.20163536071777344,
"learning_rate": 8.541491691420051e-05,
"loss": 0.0143,
"step": 28700
},
{
"grad_norm": 0.20172348618507385,
"learning_rate": 8.54032429160398e-05,
"loss": 0.0133,
"step": 28710
},
{
"grad_norm": 0.22644536197185516,
"learning_rate": 8.539156504623369e-05,
"loss": 0.0114,
"step": 28720
},
{
"grad_norm": 0.20980650186538696,
"learning_rate": 8.537988330605923e-05,
"loss": 0.014,
"step": 28730
},
{
"grad_norm": 0.18033821880817413,
"learning_rate": 8.536819769679393e-05,
"loss": 0.0115,
"step": 28740
},
{
"grad_norm": 0.15536117553710938,
"learning_rate": 8.53565082197157e-05,
"loss": 0.0141,
"step": 28750
},
{
"grad_norm": 0.21701377630233765,
"learning_rate": 8.534481487610289e-05,
"loss": 0.0132,
"step": 28760
},
{
"grad_norm": 0.19303607940673828,
"learning_rate": 8.533311766723428e-05,
"loss": 0.0143,
"step": 28770
},
{
"grad_norm": 0.16089658439159393,
"learning_rate": 8.532141659438901e-05,
"loss": 0.0136,
"step": 28780
},
{
"grad_norm": 0.16383571922779083,
"learning_rate": 8.530971165884675e-05,
"loss": 0.0113,
"step": 28790
},
{
"grad_norm": 0.175754114985466,
"learning_rate": 8.529800286188752e-05,
"loss": 0.0116,
"step": 28800
},
{
"grad_norm": 0.14809109270572662,
"learning_rate": 8.528629020479175e-05,
"loss": 0.0114,
"step": 28810
},
{
"grad_norm": 0.18954558670520782,
"learning_rate": 8.527457368884033e-05,
"loss": 0.0148,
"step": 28820
},
{
"grad_norm": 0.15618684887886047,
"learning_rate": 8.526285331531458e-05,
"loss": 0.0111,
"step": 28830
},
{
"grad_norm": 0.19574670493602753,
"learning_rate": 8.525112908549621e-05,
"loss": 0.0134,
"step": 28840
},
{
"grad_norm": 0.22770293056964874,
"learning_rate": 8.523940100066735e-05,
"loss": 0.0118,
"step": 28850
},
{
"grad_norm": 0.14471113681793213,
"learning_rate": 8.52276690621106e-05,
"loss": 0.0124,
"step": 28860
},
{
"grad_norm": 0.163731649518013,
"learning_rate": 8.521593327110889e-05,
"loss": 0.0125,
"step": 28870
},
{
"grad_norm": 0.20715269446372986,
"learning_rate": 8.520419362894569e-05,
"loss": 0.0117,
"step": 28880
},
{
"grad_norm": 0.2353220283985138,
"learning_rate": 8.51924501369048e-05,
"loss": 0.0112,
"step": 28890
},
{
"grad_norm": 0.16888241469860077,
"learning_rate": 8.518070279627047e-05,
"loss": 0.0131,
"step": 28900
},
{
"grad_norm": 0.18317660689353943,
"learning_rate": 8.516895160832737e-05,
"loss": 0.0113,
"step": 28910
},
{
"grad_norm": 0.1913095861673355,
"learning_rate": 8.515719657436061e-05,
"loss": 0.0106,
"step": 28920
},
{
"grad_norm": 0.16577529907226562,
"learning_rate": 8.514543769565568e-05,
"loss": 0.0152,
"step": 28930
},
{
"grad_norm": 0.13656282424926758,
"learning_rate": 8.513367497349853e-05,
"loss": 0.0114,
"step": 28940
},
{
"grad_norm": 0.19207116961479187,
"learning_rate": 8.51219084091755e-05,
"loss": 0.0126,
"step": 28950
},
{
"grad_norm": 0.1479605734348297,
"learning_rate": 8.511013800397338e-05,
"loss": 0.0153,
"step": 28960
},
{
"grad_norm": 0.17726215720176697,
"learning_rate": 8.509836375917937e-05,
"loss": 0.0126,
"step": 28970
},
{
"grad_norm": 0.24432331323623657,
"learning_rate": 8.508658567608104e-05,
"loss": 0.0125,
"step": 28980
},
{
"grad_norm": 0.1874416172504425,
"learning_rate": 8.507480375596647e-05,
"loss": 0.0148,
"step": 28990
},
{
"grad_norm": 0.23588506877422333,
"learning_rate": 8.506301800012408e-05,
"loss": 0.0134,
"step": 29000
},
{
"grad_norm": 0.19267885386943817,
"learning_rate": 8.505122840984278e-05,
"loss": 0.0123,
"step": 29010
},
{
"grad_norm": 0.14792896807193756,
"learning_rate": 8.503943498641182e-05,
"loss": 0.0133,
"step": 29020
},
{
"grad_norm": 0.21639281511306763,
"learning_rate": 8.502763773112095e-05,
"loss": 0.012,
"step": 29030
},
{
"grad_norm": 0.19488239288330078,
"learning_rate": 8.501583664526026e-05,
"loss": 0.0157,
"step": 29040
},
{
"grad_norm": 0.1856304556131363,
"learning_rate": 8.500403173012032e-05,
"loss": 0.0097,
"step": 29050
},
{
"grad_norm": 0.15881025791168213,
"learning_rate": 8.499222298699211e-05,
"loss": 0.0121,
"step": 29060
},
{
"grad_norm": 0.19815614819526672,
"learning_rate": 8.498041041716701e-05,
"loss": 0.0135,
"step": 29070
},
{
"grad_norm": 0.16614429652690887,
"learning_rate": 8.496859402193681e-05,
"loss": 0.013,
"step": 29080
},
{
"grad_norm": 0.2017555981874466,
"learning_rate": 8.495677380259374e-05,
"loss": 0.0121,
"step": 29090
},
{
"grad_norm": 0.21205055713653564,
"learning_rate": 8.494494976043045e-05,
"loss": 0.0129,
"step": 29100
},
{
"grad_norm": 0.15883712470531464,
"learning_rate": 8.493312189673998e-05,
"loss": 0.0117,
"step": 29110
},
{
"grad_norm": 0.22783473134040833,
"learning_rate": 8.492129021281584e-05,
"loss": 0.0133,
"step": 29120
},
{
"grad_norm": 0.2064107209444046,
"learning_rate": 8.490945470995188e-05,
"loss": 0.0136,
"step": 29130
},
{
"grad_norm": 0.14446167647838593,
"learning_rate": 8.489761538944247e-05,
"loss": 0.0147,
"step": 29140
},
{
"grad_norm": 0.2097829133272171,
"learning_rate": 8.48857722525823e-05,
"loss": 0.0138,
"step": 29150
},
{
"grad_norm": 0.19569893181324005,
"learning_rate": 8.487392530066652e-05,
"loss": 0.0148,
"step": 29160
},
{
"grad_norm": 0.20244553685188293,
"learning_rate": 8.486207453499069e-05,
"loss": 0.0129,
"step": 29170
},
{
"grad_norm": 0.1293058842420578,
"learning_rate": 8.485021995685082e-05,
"loss": 0.0132,
"step": 29180
},
{
"grad_norm": 0.19508272409439087,
"learning_rate": 8.483836156754328e-05,
"loss": 0.0129,
"step": 29190
},
{
"grad_norm": 0.13909363746643066,
"learning_rate": 8.482649936836491e-05,
"loss": 0.0104,
"step": 29200
},
{
"grad_norm": 0.16894637048244476,
"learning_rate": 8.481463336061293e-05,
"loss": 0.0118,
"step": 29210
},
{
"grad_norm": 0.17273752391338348,
"learning_rate": 8.480276354558496e-05,
"loss": 0.0119,
"step": 29220
},
{
"grad_norm": 0.21895958483219147,
"learning_rate": 8.479088992457913e-05,
"loss": 0.0139,
"step": 29230
},
{
"grad_norm": 0.17813675105571747,
"learning_rate": 8.477901249889387e-05,
"loss": 0.016,
"step": 29240
},
{
"grad_norm": 0.20593753457069397,
"learning_rate": 8.47671312698281e-05,
"loss": 0.0136,
"step": 29250
},
{
"grad_norm": 0.1907685250043869,
"learning_rate": 8.475524623868112e-05,
"loss": 0.0119,
"step": 29260
},
{
"grad_norm": 0.1305512636899948,
"learning_rate": 8.474335740675266e-05,
"loss": 0.0115,
"step": 29270
},
{
"grad_norm": 0.16224084794521332,
"learning_rate": 8.473146477534289e-05,
"loss": 0.0117,
"step": 29280
},
{
"grad_norm": 0.1430281549692154,
"learning_rate": 8.471956834575232e-05,
"loss": 0.0126,
"step": 29290
},
{
"grad_norm": 0.1667489856481552,
"learning_rate": 8.470766811928197e-05,
"loss": 0.0133,
"step": 29300
},
{
"grad_norm": 0.1585753858089447,
"learning_rate": 8.469576409723323e-05,
"loss": 0.0117,
"step": 29310
},
{
"grad_norm": 0.17134876549243927,
"learning_rate": 8.468385628090788e-05,
"loss": 0.0123,
"step": 29320
},
{
"grad_norm": 0.1318117082118988,
"learning_rate": 8.467194467160815e-05,
"loss": 0.0133,
"step": 29330
},
{
"grad_norm": 0.16312025487422943,
"learning_rate": 8.466002927063667e-05,
"loss": 0.0106,
"step": 29340
},
{
"grad_norm": 0.16416870057582855,
"learning_rate": 8.464811007929651e-05,
"loss": 0.013,
"step": 29350
},
{
"grad_norm": 0.19791357219219208,
"learning_rate": 8.463618709889114e-05,
"loss": 0.0133,
"step": 29360
},
{
"grad_norm": 0.14834952354431152,
"learning_rate": 8.462426033072442e-05,
"loss": 0.0131,
"step": 29370
},
{
"grad_norm": 0.20697247982025146,
"learning_rate": 8.461232977610061e-05,
"loss": 0.0122,
"step": 29380
},
{
"grad_norm": 0.187699556350708,
"learning_rate": 8.46003954363245e-05,
"loss": 0.0119,
"step": 29390
},
{
"grad_norm": 0.18760277330875397,
"learning_rate": 8.458845731270115e-05,
"loss": 0.012,
"step": 29400
},
{
"grad_norm": 0.17339487373828888,
"learning_rate": 8.45765154065361e-05,
"loss": 0.0099,
"step": 29410
},
{
"grad_norm": 0.1999693661928177,
"learning_rate": 8.456456971913532e-05,
"loss": 0.0098,
"step": 29420
},
{
"grad_norm": 0.15931078791618347,
"learning_rate": 8.455262025180517e-05,
"loss": 0.0121,
"step": 29430
},
{
"grad_norm": 0.15840600430965424,
"learning_rate": 8.454066700585242e-05,
"loss": 0.0114,
"step": 29440
},
{
"grad_norm": 0.17567718029022217,
"learning_rate": 8.452870998258423e-05,
"loss": 0.012,
"step": 29450
},
{
"grad_norm": 0.1787082403898239,
"learning_rate": 8.451674918330825e-05,
"loss": 0.0132,
"step": 29460
},
{
"grad_norm": 0.18018962442874908,
"learning_rate": 8.450478460933246e-05,
"loss": 0.0105,
"step": 29470
},
{
"grad_norm": 0.15651020407676697,
"learning_rate": 8.449281626196532e-05,
"loss": 0.0127,
"step": 29480
},
{
"grad_norm": 0.18134862184524536,
"learning_rate": 8.448084414251564e-05,
"loss": 0.01,
"step": 29490
},
{
"grad_norm": 0.2032192200422287,
"learning_rate": 8.446886825229271e-05,
"loss": 0.0121,
"step": 29500
},
{
"grad_norm": 0.14923639595508575,
"learning_rate": 8.445688859260615e-05,
"loss": 0.0132,
"step": 29510
},
{
"grad_norm": 0.23174427449703217,
"learning_rate": 8.444490516476606e-05,
"loss": 0.0137,
"step": 29520
},
{
"grad_norm": 0.1746220737695694,
"learning_rate": 8.443291797008293e-05,
"loss": 0.0108,
"step": 29530
},
{
"grad_norm": 0.177895188331604,
"learning_rate": 8.442092700986765e-05,
"loss": 0.0155,
"step": 29540
},
{
"grad_norm": 0.1803567260503769,
"learning_rate": 8.440893228543156e-05,
"loss": 0.0119,
"step": 29550
},
{
"grad_norm": 0.20342227816581726,
"learning_rate": 8.439693379808638e-05,
"loss": 0.0132,
"step": 29560
},
{
"grad_norm": 0.16396351158618927,
"learning_rate": 8.43849315491442e-05,
"loss": 0.0124,
"step": 29570
},
{
"grad_norm": 0.1352321207523346,
"learning_rate": 8.437292553991763e-05,
"loss": 0.0109,
"step": 29580
},
{
"grad_norm": 0.19388611614704132,
"learning_rate": 8.436091577171959e-05,
"loss": 0.0114,
"step": 29590
},
{
"grad_norm": 0.23504306375980377,
"learning_rate": 8.434890224586347e-05,
"loss": 0.0144,
"step": 29600
},
{
"grad_norm": 0.27649304270744324,
"learning_rate": 8.433688496366303e-05,
"loss": 0.0154,
"step": 29610
},
{
"grad_norm": 0.21790184080600739,
"learning_rate": 8.432486392643248e-05,
"loss": 0.0133,
"step": 29620
},
{
"grad_norm": 0.23398706316947937,
"learning_rate": 8.431283913548643e-05,
"loss": 0.0113,
"step": 29630
},
{
"grad_norm": 0.16118748486042023,
"learning_rate": 8.430081059213985e-05,
"loss": 0.0114,
"step": 29640
},
{
"grad_norm": 0.1283956617116928,
"learning_rate": 8.428877829770823e-05,
"loss": 0.0118,
"step": 29650
},
{
"grad_norm": 0.20732879638671875,
"learning_rate": 8.427674225350735e-05,
"loss": 0.0123,
"step": 29660
},
{
"grad_norm": 0.18760187923908234,
"learning_rate": 8.426470246085347e-05,
"loss": 0.011,
"step": 29670
},
{
"grad_norm": 0.14654579758644104,
"learning_rate": 8.425265892106324e-05,
"loss": 0.0108,
"step": 29680
},
{
"grad_norm": 0.15680843591690063,
"learning_rate": 8.424061163545374e-05,
"loss": 0.0117,
"step": 29690
},
{
"grad_norm": 0.1776723712682724,
"learning_rate": 8.422856060534243e-05,
"loss": 0.0121,
"step": 29700
},
{
"grad_norm": 0.1705937683582306,
"learning_rate": 8.421650583204718e-05,
"loss": 0.0122,
"step": 29710
},
{
"grad_norm": 0.17986483871936798,
"learning_rate": 8.420444731688633e-05,
"loss": 0.0116,
"step": 29720
},
{
"grad_norm": 0.17028284072875977,
"learning_rate": 8.419238506117852e-05,
"loss": 0.0155,
"step": 29730
},
{
"grad_norm": 0.18307262659072876,
"learning_rate": 8.418031906624289e-05,
"loss": 0.0107,
"step": 29740
},
{
"grad_norm": 0.18997417390346527,
"learning_rate": 8.416824933339898e-05,
"loss": 0.0136,
"step": 29750
},
{
"grad_norm": 0.14866510033607483,
"learning_rate": 8.415617586396667e-05,
"loss": 0.0131,
"step": 29760
},
{
"grad_norm": 0.1801193207502365,
"learning_rate": 8.414409865926632e-05,
"loss": 0.0149,
"step": 29770
},
{
"grad_norm": 0.18089812994003296,
"learning_rate": 8.413201772061867e-05,
"loss": 0.0122,
"step": 29780
},
{
"grad_norm": 0.20078693330287933,
"learning_rate": 8.411993304934488e-05,
"loss": 0.0129,
"step": 29790
},
{
"grad_norm": 0.2205827832221985,
"learning_rate": 8.410784464676654e-05,
"loss": 0.0155,
"step": 29800
},
{
"grad_norm": 0.16440054774284363,
"learning_rate": 8.409575251420556e-05,
"loss": 0.0104,
"step": 29810
},
{
"grad_norm": 0.14770565927028656,
"learning_rate": 8.408365665298435e-05,
"loss": 0.0112,
"step": 29820
},
{
"grad_norm": 0.1756041944026947,
"learning_rate": 8.40715570644257e-05,
"loss": 0.0138,
"step": 29830
},
{
"grad_norm": 0.19235102832317352,
"learning_rate": 8.40594537498528e-05,
"loss": 0.0115,
"step": 29840
},
{
"grad_norm": 0.24119681119918823,
"learning_rate": 8.404734671058924e-05,
"loss": 0.0149,
"step": 29850
},
{
"grad_norm": 0.19353336095809937,
"learning_rate": 8.403523594795902e-05,
"loss": 0.0138,
"step": 29860
},
{
"grad_norm": 0.16554079949855804,
"learning_rate": 8.402312146328659e-05,
"loss": 0.0137,
"step": 29870
},
{
"grad_norm": 0.18109650909900665,
"learning_rate": 8.401100325789675e-05,
"loss": 0.0143,
"step": 29880
},
{
"grad_norm": 0.22677451372146606,
"learning_rate": 8.399888133311472e-05,
"loss": 0.0156,
"step": 29890
},
{
"grad_norm": 0.14986349642276764,
"learning_rate": 8.398675569026613e-05,
"loss": 0.0118,
"step": 29900
},
{
"grad_norm": 0.18745896220207214,
"learning_rate": 8.397462633067705e-05,
"loss": 0.0133,
"step": 29910
},
{
"grad_norm": 0.16973541676998138,
"learning_rate": 8.396249325567392e-05,
"loss": 0.0125,
"step": 29920
},
{
"grad_norm": 0.16721414029598236,
"learning_rate": 8.395035646658357e-05,
"loss": 0.0151,
"step": 29930
},
{
"grad_norm": 0.15872403979301453,
"learning_rate": 8.39382159647333e-05,
"loss": 0.0137,
"step": 29940
},
{
"grad_norm": 0.20979169011116028,
"learning_rate": 8.392607175145075e-05,
"loss": 0.0135,
"step": 29950
},
{
"grad_norm": 0.1652858406305313,
"learning_rate": 8.3913923828064e-05,
"loss": 0.0106,
"step": 29960
},
{
"grad_norm": 0.179422065615654,
"learning_rate": 8.390177219590152e-05,
"loss": 0.0105,
"step": 29970
},
{
"grad_norm": 0.1856880486011505,
"learning_rate": 8.388961685629222e-05,
"loss": 0.011,
"step": 29980
},
{
"grad_norm": 0.2280905395746231,
"learning_rate": 8.387745781056536e-05,
"loss": 0.0114,
"step": 29990
},
{
"grad_norm": 0.16093143820762634,
"learning_rate": 8.386529506005065e-05,
"loss": 0.0122,
"step": 30000
}
],
"logging_steps": 10,
"max_steps": 100000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 5000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 48,
"trial_name": null,
"trial_params": null
}