pick_multi_obj_6_fix_10k / trainer_state.json
Dongkkka's picture
Upload folder using huggingface_hub
9abe5b4 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 88.49557522123894,
"eval_steps": 500,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08849557522123894,
"grad_norm": 6.029814720153809,
"learning_rate": 1.8e-07,
"loss": 0.8556,
"step": 10
},
{
"epoch": 0.17699115044247787,
"grad_norm": 5.713383674621582,
"learning_rate": 3.8e-07,
"loss": 0.8595,
"step": 20
},
{
"epoch": 0.26548672566371684,
"grad_norm": 5.548460960388184,
"learning_rate": 5.8e-07,
"loss": 0.8439,
"step": 30
},
{
"epoch": 0.35398230088495575,
"grad_norm": 4.973753929138184,
"learning_rate": 7.8e-07,
"loss": 0.8031,
"step": 40
},
{
"epoch": 0.4424778761061947,
"grad_norm": 4.316400051116943,
"learning_rate": 9.8e-07,
"loss": 0.7054,
"step": 50
},
{
"epoch": 0.5309734513274337,
"grad_norm": 2.757798194885254,
"learning_rate": 1.18e-06,
"loss": 0.6014,
"step": 60
},
{
"epoch": 0.6194690265486725,
"grad_norm": 2.4811291694641113,
"learning_rate": 1.3800000000000001e-06,
"loss": 0.4621,
"step": 70
},
{
"epoch": 0.7079646017699115,
"grad_norm": 1.1165071725845337,
"learning_rate": 1.5800000000000003e-06,
"loss": 0.3772,
"step": 80
},
{
"epoch": 0.7964601769911505,
"grad_norm": 0.6678406596183777,
"learning_rate": 1.7800000000000001e-06,
"loss": 0.2607,
"step": 90
},
{
"epoch": 0.8849557522123894,
"grad_norm": 0.5095370411872864,
"learning_rate": 1.98e-06,
"loss": 0.2246,
"step": 100
},
{
"epoch": 0.9734513274336283,
"grad_norm": 0.3825031518936157,
"learning_rate": 2.1800000000000003e-06,
"loss": 0.1869,
"step": 110
},
{
"epoch": 1.0619469026548674,
"grad_norm": 0.31909793615341187,
"learning_rate": 2.38e-06,
"loss": 0.1633,
"step": 120
},
{
"epoch": 1.1504424778761062,
"grad_norm": 0.27751412987709045,
"learning_rate": 2.5800000000000003e-06,
"loss": 0.1487,
"step": 130
},
{
"epoch": 1.238938053097345,
"grad_norm": 0.2241702675819397,
"learning_rate": 2.78e-06,
"loss": 0.1311,
"step": 140
},
{
"epoch": 1.3274336283185841,
"grad_norm": 0.29508182406425476,
"learning_rate": 2.9800000000000003e-06,
"loss": 0.1256,
"step": 150
},
{
"epoch": 1.415929203539823,
"grad_norm": 0.20176096260547638,
"learning_rate": 3.1800000000000005e-06,
"loss": 0.1161,
"step": 160
},
{
"epoch": 1.504424778761062,
"grad_norm": 0.23275262117385864,
"learning_rate": 3.38e-06,
"loss": 0.1121,
"step": 170
},
{
"epoch": 1.592920353982301,
"grad_norm": 0.21292689442634583,
"learning_rate": 3.58e-06,
"loss": 0.1022,
"step": 180
},
{
"epoch": 1.6814159292035398,
"grad_norm": 0.352565735578537,
"learning_rate": 3.7800000000000002e-06,
"loss": 0.0989,
"step": 190
},
{
"epoch": 1.7699115044247788,
"grad_norm": 0.1692201793193817,
"learning_rate": 3.98e-06,
"loss": 0.0942,
"step": 200
},
{
"epoch": 1.8584070796460177,
"grad_norm": 0.1878710687160492,
"learning_rate": 4.18e-06,
"loss": 0.0852,
"step": 210
},
{
"epoch": 1.9469026548672566,
"grad_norm": 0.18498444557189941,
"learning_rate": 4.38e-06,
"loss": 0.0827,
"step": 220
},
{
"epoch": 2.0353982300884956,
"grad_norm": 0.18947617709636688,
"learning_rate": 4.58e-06,
"loss": 0.0785,
"step": 230
},
{
"epoch": 2.1238938053097347,
"grad_norm": 0.18357056379318237,
"learning_rate": 4.780000000000001e-06,
"loss": 0.0751,
"step": 240
},
{
"epoch": 2.2123893805309733,
"grad_norm": 0.1946367770433426,
"learning_rate": 4.98e-06,
"loss": 0.0724,
"step": 250
},
{
"epoch": 2.3008849557522124,
"grad_norm": 0.17680567502975464,
"learning_rate": 5.18e-06,
"loss": 0.0673,
"step": 260
},
{
"epoch": 2.3893805309734515,
"grad_norm": 0.1475750356912613,
"learning_rate": 5.38e-06,
"loss": 0.0649,
"step": 270
},
{
"epoch": 2.47787610619469,
"grad_norm": 0.16969521343708038,
"learning_rate": 5.580000000000001e-06,
"loss": 0.0626,
"step": 280
},
{
"epoch": 2.566371681415929,
"grad_norm": 0.1681053340435028,
"learning_rate": 5.78e-06,
"loss": 0.0592,
"step": 290
},
{
"epoch": 2.6548672566371683,
"grad_norm": 0.1768426150083542,
"learning_rate": 5.98e-06,
"loss": 0.058,
"step": 300
},
{
"epoch": 2.7433628318584073,
"grad_norm": 0.21786381304264069,
"learning_rate": 6.18e-06,
"loss": 0.056,
"step": 310
},
{
"epoch": 2.831858407079646,
"grad_norm": 0.1621713936328888,
"learning_rate": 6.38e-06,
"loss": 0.0552,
"step": 320
},
{
"epoch": 2.920353982300885,
"grad_norm": 0.19948254525661469,
"learning_rate": 6.58e-06,
"loss": 0.0514,
"step": 330
},
{
"epoch": 3.0088495575221237,
"grad_norm": 0.2901497185230255,
"learning_rate": 6.78e-06,
"loss": 0.0499,
"step": 340
},
{
"epoch": 3.0973451327433628,
"grad_norm": 0.21824175119400024,
"learning_rate": 6.98e-06,
"loss": 0.0469,
"step": 350
},
{
"epoch": 3.185840707964602,
"grad_norm": 0.18174271285533905,
"learning_rate": 7.180000000000001e-06,
"loss": 0.0481,
"step": 360
},
{
"epoch": 3.274336283185841,
"grad_norm": 0.19068759679794312,
"learning_rate": 7.3800000000000005e-06,
"loss": 0.0461,
"step": 370
},
{
"epoch": 3.3628318584070795,
"grad_norm": 0.23360292613506317,
"learning_rate": 7.580000000000001e-06,
"loss": 0.0418,
"step": 380
},
{
"epoch": 3.4513274336283186,
"grad_norm": 0.1604355126619339,
"learning_rate": 7.78e-06,
"loss": 0.0404,
"step": 390
},
{
"epoch": 3.5398230088495577,
"grad_norm": 0.1779727339744568,
"learning_rate": 7.98e-06,
"loss": 0.0401,
"step": 400
},
{
"epoch": 3.6283185840707963,
"grad_norm": 0.15471251308918,
"learning_rate": 8.18e-06,
"loss": 0.0389,
"step": 410
},
{
"epoch": 3.7168141592920354,
"grad_norm": 0.1907050460577011,
"learning_rate": 8.380000000000001e-06,
"loss": 0.0383,
"step": 420
},
{
"epoch": 3.8053097345132745,
"grad_norm": 0.180426225066185,
"learning_rate": 8.580000000000001e-06,
"loss": 0.0357,
"step": 430
},
{
"epoch": 3.893805309734513,
"grad_norm": 0.16844674944877625,
"learning_rate": 8.78e-06,
"loss": 0.0359,
"step": 440
},
{
"epoch": 3.982300884955752,
"grad_norm": 0.17488037049770355,
"learning_rate": 8.98e-06,
"loss": 0.0353,
"step": 450
},
{
"epoch": 4.070796460176991,
"grad_norm": 0.1792651265859604,
"learning_rate": 9.180000000000002e-06,
"loss": 0.0327,
"step": 460
},
{
"epoch": 4.15929203539823,
"grad_norm": 0.19573146104812622,
"learning_rate": 9.38e-06,
"loss": 0.0326,
"step": 470
},
{
"epoch": 4.247787610619469,
"grad_norm": 0.26041167974472046,
"learning_rate": 9.58e-06,
"loss": 0.0321,
"step": 480
},
{
"epoch": 4.336283185840708,
"grad_norm": 0.18472440540790558,
"learning_rate": 9.78e-06,
"loss": 0.0304,
"step": 490
},
{
"epoch": 4.424778761061947,
"grad_norm": 0.14232812821865082,
"learning_rate": 9.980000000000001e-06,
"loss": 0.032,
"step": 500
},
{
"epoch": 4.513274336283186,
"grad_norm": 0.18009839951992035,
"learning_rate": 1.018e-05,
"loss": 0.0294,
"step": 510
},
{
"epoch": 4.601769911504425,
"grad_norm": 0.24435491859912872,
"learning_rate": 1.038e-05,
"loss": 0.0296,
"step": 520
},
{
"epoch": 4.6902654867256635,
"grad_norm": 0.2446989119052887,
"learning_rate": 1.058e-05,
"loss": 0.0278,
"step": 530
},
{
"epoch": 4.778761061946903,
"grad_norm": 0.18773755431175232,
"learning_rate": 1.0780000000000002e-05,
"loss": 0.0284,
"step": 540
},
{
"epoch": 4.867256637168142,
"grad_norm": 0.21854208409786224,
"learning_rate": 1.098e-05,
"loss": 0.0284,
"step": 550
},
{
"epoch": 4.95575221238938,
"grad_norm": 0.18152262270450592,
"learning_rate": 1.118e-05,
"loss": 0.0275,
"step": 560
},
{
"epoch": 5.04424778761062,
"grad_norm": 0.20031234622001648,
"learning_rate": 1.1380000000000001e-05,
"loss": 0.0261,
"step": 570
},
{
"epoch": 5.132743362831858,
"grad_norm": 0.19933964312076569,
"learning_rate": 1.1580000000000001e-05,
"loss": 0.0266,
"step": 580
},
{
"epoch": 5.221238938053097,
"grad_norm": 0.18228860199451447,
"learning_rate": 1.178e-05,
"loss": 0.0259,
"step": 590
},
{
"epoch": 5.3097345132743365,
"grad_norm": 0.224607452750206,
"learning_rate": 1.198e-05,
"loss": 0.0273,
"step": 600
},
{
"epoch": 5.398230088495575,
"grad_norm": 0.2106594741344452,
"learning_rate": 1.2180000000000002e-05,
"loss": 0.0261,
"step": 610
},
{
"epoch": 5.486725663716814,
"grad_norm": 0.28607967495918274,
"learning_rate": 1.238e-05,
"loss": 0.0258,
"step": 620
},
{
"epoch": 5.575221238938053,
"grad_norm": 0.20972339808940887,
"learning_rate": 1.258e-05,
"loss": 0.026,
"step": 630
},
{
"epoch": 5.663716814159292,
"grad_norm": 0.2404285967350006,
"learning_rate": 1.278e-05,
"loss": 0.024,
"step": 640
},
{
"epoch": 5.752212389380531,
"grad_norm": 0.24622325599193573,
"learning_rate": 1.2980000000000001e-05,
"loss": 0.0242,
"step": 650
},
{
"epoch": 5.84070796460177,
"grad_norm": 0.19178038835525513,
"learning_rate": 1.3180000000000001e-05,
"loss": 0.0238,
"step": 660
},
{
"epoch": 5.929203539823009,
"grad_norm": 0.26031139492988586,
"learning_rate": 1.338e-05,
"loss": 0.0233,
"step": 670
},
{
"epoch": 6.017699115044247,
"grad_norm": 0.2246655374765396,
"learning_rate": 1.358e-05,
"loss": 0.0236,
"step": 680
},
{
"epoch": 6.106194690265487,
"grad_norm": 0.2755095064640045,
"learning_rate": 1.3780000000000002e-05,
"loss": 0.0228,
"step": 690
},
{
"epoch": 6.1946902654867255,
"grad_norm": 0.17769227921962738,
"learning_rate": 1.3980000000000002e-05,
"loss": 0.024,
"step": 700
},
{
"epoch": 6.283185840707965,
"grad_norm": 0.21013042330741882,
"learning_rate": 1.4180000000000001e-05,
"loss": 0.0236,
"step": 710
},
{
"epoch": 6.371681415929204,
"grad_norm": 0.22938348352909088,
"learning_rate": 1.4380000000000001e-05,
"loss": 0.0228,
"step": 720
},
{
"epoch": 6.460176991150442,
"grad_norm": 0.18899478018283844,
"learning_rate": 1.4580000000000003e-05,
"loss": 0.0221,
"step": 730
},
{
"epoch": 6.548672566371682,
"grad_norm": 0.2541691064834595,
"learning_rate": 1.4779999999999999e-05,
"loss": 0.0215,
"step": 740
},
{
"epoch": 6.6371681415929205,
"grad_norm": 0.24163757264614105,
"learning_rate": 1.4979999999999999e-05,
"loss": 0.0225,
"step": 750
},
{
"epoch": 6.725663716814159,
"grad_norm": 0.1695130616426468,
"learning_rate": 1.518e-05,
"loss": 0.022,
"step": 760
},
{
"epoch": 6.814159292035399,
"grad_norm": 0.20755034685134888,
"learning_rate": 1.538e-05,
"loss": 0.0223,
"step": 770
},
{
"epoch": 6.902654867256637,
"grad_norm": 0.2123480886220932,
"learning_rate": 1.558e-05,
"loss": 0.021,
"step": 780
},
{
"epoch": 6.991150442477876,
"grad_norm": 0.1984192132949829,
"learning_rate": 1.578e-05,
"loss": 0.0204,
"step": 790
},
{
"epoch": 7.079646017699115,
"grad_norm": 0.20947502553462982,
"learning_rate": 1.598e-05,
"loss": 0.0208,
"step": 800
},
{
"epoch": 7.168141592920354,
"grad_norm": 0.18240587413311005,
"learning_rate": 1.618e-05,
"loss": 0.0208,
"step": 810
},
{
"epoch": 7.256637168141593,
"grad_norm": 0.2724882960319519,
"learning_rate": 1.6380000000000002e-05,
"loss": 0.0206,
"step": 820
},
{
"epoch": 7.345132743362832,
"grad_norm": 0.1758035570383072,
"learning_rate": 1.658e-05,
"loss": 0.0207,
"step": 830
},
{
"epoch": 7.433628318584071,
"grad_norm": 0.22411009669303894,
"learning_rate": 1.6780000000000002e-05,
"loss": 0.0208,
"step": 840
},
{
"epoch": 7.522123893805309,
"grad_norm": 0.23918718099594116,
"learning_rate": 1.698e-05,
"loss": 0.0217,
"step": 850
},
{
"epoch": 7.610619469026549,
"grad_norm": 0.17702209949493408,
"learning_rate": 1.718e-05,
"loss": 0.021,
"step": 860
},
{
"epoch": 7.699115044247788,
"grad_norm": 0.17671605944633484,
"learning_rate": 1.7380000000000003e-05,
"loss": 0.0201,
"step": 870
},
{
"epoch": 7.787610619469026,
"grad_norm": 0.24670277535915375,
"learning_rate": 1.758e-05,
"loss": 0.0195,
"step": 880
},
{
"epoch": 7.876106194690266,
"grad_norm": 0.21365919709205627,
"learning_rate": 1.7780000000000003e-05,
"loss": 0.0194,
"step": 890
},
{
"epoch": 7.964601769911504,
"grad_norm": 0.192793071269989,
"learning_rate": 1.798e-05,
"loss": 0.0197,
"step": 900
},
{
"epoch": 8.053097345132743,
"grad_norm": 0.21986353397369385,
"learning_rate": 1.818e-05,
"loss": 0.0192,
"step": 910
},
{
"epoch": 8.141592920353983,
"grad_norm": 0.22282905876636505,
"learning_rate": 1.838e-05,
"loss": 0.0198,
"step": 920
},
{
"epoch": 8.230088495575222,
"grad_norm": 0.21167707443237305,
"learning_rate": 1.858e-05,
"loss": 0.02,
"step": 930
},
{
"epoch": 8.31858407079646,
"grad_norm": 0.24645684659481049,
"learning_rate": 1.878e-05,
"loss": 0.0187,
"step": 940
},
{
"epoch": 8.4070796460177,
"grad_norm": 0.2618500888347626,
"learning_rate": 1.898e-05,
"loss": 0.0184,
"step": 950
},
{
"epoch": 8.495575221238939,
"grad_norm": 0.2853642404079437,
"learning_rate": 1.918e-05,
"loss": 0.02,
"step": 960
},
{
"epoch": 8.584070796460177,
"grad_norm": 0.22935572266578674,
"learning_rate": 1.938e-05,
"loss": 0.0187,
"step": 970
},
{
"epoch": 8.672566371681416,
"grad_norm": 0.18018950521945953,
"learning_rate": 1.9580000000000002e-05,
"loss": 0.0187,
"step": 980
},
{
"epoch": 8.761061946902656,
"grad_norm": 0.2860763669013977,
"learning_rate": 1.978e-05,
"loss": 0.019,
"step": 990
},
{
"epoch": 8.849557522123893,
"grad_norm": 0.19469276070594788,
"learning_rate": 1.9980000000000002e-05,
"loss": 0.0191,
"step": 1000
},
{
"epoch": 8.938053097345133,
"grad_norm": 0.2059198021888733,
"learning_rate": 2.0180000000000003e-05,
"loss": 0.0177,
"step": 1010
},
{
"epoch": 9.026548672566372,
"grad_norm": 0.22872787714004517,
"learning_rate": 2.038e-05,
"loss": 0.0187,
"step": 1020
},
{
"epoch": 9.11504424778761,
"grad_norm": 0.21850481629371643,
"learning_rate": 2.0580000000000003e-05,
"loss": 0.0177,
"step": 1030
},
{
"epoch": 9.20353982300885,
"grad_norm": 0.3010428249835968,
"learning_rate": 2.078e-05,
"loss": 0.0183,
"step": 1040
},
{
"epoch": 9.29203539823009,
"grad_norm": 0.30720841884613037,
"learning_rate": 2.098e-05,
"loss": 0.0175,
"step": 1050
},
{
"epoch": 9.380530973451327,
"grad_norm": 0.25102201104164124,
"learning_rate": 2.118e-05,
"loss": 0.0182,
"step": 1060
},
{
"epoch": 9.469026548672566,
"grad_norm": 0.22691412270069122,
"learning_rate": 2.138e-05,
"loss": 0.018,
"step": 1070
},
{
"epoch": 9.557522123893806,
"grad_norm": 0.25092026591300964,
"learning_rate": 2.158e-05,
"loss": 0.0187,
"step": 1080
},
{
"epoch": 9.646017699115044,
"grad_norm": 0.21945276856422424,
"learning_rate": 2.178e-05,
"loss": 0.0178,
"step": 1090
},
{
"epoch": 9.734513274336283,
"grad_norm": 0.2584983706474304,
"learning_rate": 2.198e-05,
"loss": 0.0178,
"step": 1100
},
{
"epoch": 9.823008849557523,
"grad_norm": 0.23842653632164001,
"learning_rate": 2.218e-05,
"loss": 0.0176,
"step": 1110
},
{
"epoch": 9.91150442477876,
"grad_norm": 0.17505544424057007,
"learning_rate": 2.2380000000000003e-05,
"loss": 0.0182,
"step": 1120
},
{
"epoch": 10.0,
"grad_norm": 0.19007155299186707,
"learning_rate": 2.258e-05,
"loss": 0.0187,
"step": 1130
},
{
"epoch": 10.08849557522124,
"grad_norm": 0.24683013558387756,
"learning_rate": 2.2780000000000002e-05,
"loss": 0.0169,
"step": 1140
},
{
"epoch": 10.176991150442477,
"grad_norm": 0.2595357298851013,
"learning_rate": 2.298e-05,
"loss": 0.0171,
"step": 1150
},
{
"epoch": 10.265486725663717,
"grad_norm": 0.2851751744747162,
"learning_rate": 2.318e-05,
"loss": 0.0176,
"step": 1160
},
{
"epoch": 10.353982300884956,
"grad_norm": 0.2224997580051422,
"learning_rate": 2.3380000000000003e-05,
"loss": 0.0175,
"step": 1170
},
{
"epoch": 10.442477876106194,
"grad_norm": 0.2187778502702713,
"learning_rate": 2.358e-05,
"loss": 0.0164,
"step": 1180
},
{
"epoch": 10.530973451327434,
"grad_norm": 0.21035735309123993,
"learning_rate": 2.3780000000000003e-05,
"loss": 0.0165,
"step": 1190
},
{
"epoch": 10.619469026548673,
"grad_norm": 0.23563535511493683,
"learning_rate": 2.398e-05,
"loss": 0.0165,
"step": 1200
},
{
"epoch": 10.70796460176991,
"grad_norm": 0.20609630644321442,
"learning_rate": 2.418e-05,
"loss": 0.0161,
"step": 1210
},
{
"epoch": 10.79646017699115,
"grad_norm": 0.17000283300876617,
"learning_rate": 2.438e-05,
"loss": 0.0168,
"step": 1220
},
{
"epoch": 10.88495575221239,
"grad_norm": 0.22076988220214844,
"learning_rate": 2.4580000000000002e-05,
"loss": 0.0163,
"step": 1230
},
{
"epoch": 10.973451327433628,
"grad_norm": 0.18816152215003967,
"learning_rate": 2.478e-05,
"loss": 0.0168,
"step": 1240
},
{
"epoch": 11.061946902654867,
"grad_norm": 0.19269531965255737,
"learning_rate": 2.498e-05,
"loss": 0.0167,
"step": 1250
},
{
"epoch": 11.150442477876107,
"grad_norm": 0.195516899228096,
"learning_rate": 2.5180000000000003e-05,
"loss": 0.0155,
"step": 1260
},
{
"epoch": 11.238938053097344,
"grad_norm": 0.19087128341197968,
"learning_rate": 2.5380000000000004e-05,
"loss": 0.0165,
"step": 1270
},
{
"epoch": 11.327433628318584,
"grad_norm": 0.21465033292770386,
"learning_rate": 2.5580000000000002e-05,
"loss": 0.0157,
"step": 1280
},
{
"epoch": 11.415929203539823,
"grad_norm": 0.21152853965759277,
"learning_rate": 2.5779999999999997e-05,
"loss": 0.016,
"step": 1290
},
{
"epoch": 11.504424778761061,
"grad_norm": 0.26900339126586914,
"learning_rate": 2.598e-05,
"loss": 0.0153,
"step": 1300
},
{
"epoch": 11.5929203539823,
"grad_norm": 0.23565775156021118,
"learning_rate": 2.618e-05,
"loss": 0.0154,
"step": 1310
},
{
"epoch": 11.68141592920354,
"grad_norm": 0.23392872512340546,
"learning_rate": 2.6379999999999998e-05,
"loss": 0.0162,
"step": 1320
},
{
"epoch": 11.769911504424778,
"grad_norm": 0.23122538626194,
"learning_rate": 2.658e-05,
"loss": 0.0156,
"step": 1330
},
{
"epoch": 11.858407079646017,
"grad_norm": 0.2104092538356781,
"learning_rate": 2.678e-05,
"loss": 0.0161,
"step": 1340
},
{
"epoch": 11.946902654867257,
"grad_norm": 0.16211697459220886,
"learning_rate": 2.698e-05,
"loss": 0.0151,
"step": 1350
},
{
"epoch": 12.035398230088495,
"grad_norm": 0.22679129242897034,
"learning_rate": 2.718e-05,
"loss": 0.0157,
"step": 1360
},
{
"epoch": 12.123893805309734,
"grad_norm": 0.2528381645679474,
"learning_rate": 2.738e-05,
"loss": 0.0152,
"step": 1370
},
{
"epoch": 12.212389380530974,
"grad_norm": 0.257403701543808,
"learning_rate": 2.758e-05,
"loss": 0.0154,
"step": 1380
},
{
"epoch": 12.300884955752213,
"grad_norm": 0.22304107248783112,
"learning_rate": 2.778e-05,
"loss": 0.0151,
"step": 1390
},
{
"epoch": 12.389380530973451,
"grad_norm": 0.23130843043327332,
"learning_rate": 2.798e-05,
"loss": 0.0149,
"step": 1400
},
{
"epoch": 12.47787610619469,
"grad_norm": 0.22789767384529114,
"learning_rate": 2.818e-05,
"loss": 0.0153,
"step": 1410
},
{
"epoch": 12.56637168141593,
"grad_norm": 0.24470973014831543,
"learning_rate": 2.8380000000000003e-05,
"loss": 0.0143,
"step": 1420
},
{
"epoch": 12.654867256637168,
"grad_norm": 0.14789143204689026,
"learning_rate": 2.858e-05,
"loss": 0.0151,
"step": 1430
},
{
"epoch": 12.743362831858407,
"grad_norm": 0.19800685346126556,
"learning_rate": 2.8780000000000002e-05,
"loss": 0.0144,
"step": 1440
},
{
"epoch": 12.831858407079647,
"grad_norm": 0.19041651487350464,
"learning_rate": 2.898e-05,
"loss": 0.0161,
"step": 1450
},
{
"epoch": 12.920353982300885,
"grad_norm": 0.20260532200336456,
"learning_rate": 2.9180000000000002e-05,
"loss": 0.016,
"step": 1460
},
{
"epoch": 13.008849557522124,
"grad_norm": 0.25979915261268616,
"learning_rate": 2.9380000000000003e-05,
"loss": 0.0152,
"step": 1470
},
{
"epoch": 13.097345132743364,
"grad_norm": 0.2235214114189148,
"learning_rate": 2.958e-05,
"loss": 0.0152,
"step": 1480
},
{
"epoch": 13.185840707964601,
"grad_norm": 0.2452046126127243,
"learning_rate": 2.9780000000000003e-05,
"loss": 0.0155,
"step": 1490
},
{
"epoch": 13.274336283185841,
"grad_norm": 0.18082256615161896,
"learning_rate": 2.998e-05,
"loss": 0.0156,
"step": 1500
},
{
"epoch": 13.36283185840708,
"grad_norm": 0.2241387665271759,
"learning_rate": 3.0180000000000002e-05,
"loss": 0.0152,
"step": 1510
},
{
"epoch": 13.451327433628318,
"grad_norm": 0.19304688274860382,
"learning_rate": 3.0380000000000004e-05,
"loss": 0.0145,
"step": 1520
},
{
"epoch": 13.539823008849558,
"grad_norm": 0.2725679874420166,
"learning_rate": 3.058e-05,
"loss": 0.0149,
"step": 1530
},
{
"epoch": 13.628318584070797,
"grad_norm": 0.22139526903629303,
"learning_rate": 3.078e-05,
"loss": 0.0141,
"step": 1540
},
{
"epoch": 13.716814159292035,
"grad_norm": 0.2034563273191452,
"learning_rate": 3.0980000000000005e-05,
"loss": 0.0138,
"step": 1550
},
{
"epoch": 13.805309734513274,
"grad_norm": 0.20761550962924957,
"learning_rate": 3.118e-05,
"loss": 0.014,
"step": 1560
},
{
"epoch": 13.893805309734514,
"grad_norm": 0.21596471965312958,
"learning_rate": 3.138e-05,
"loss": 0.0141,
"step": 1570
},
{
"epoch": 13.982300884955752,
"grad_norm": 0.19941861927509308,
"learning_rate": 3.1580000000000006e-05,
"loss": 0.0141,
"step": 1580
},
{
"epoch": 14.070796460176991,
"grad_norm": 0.17475123703479767,
"learning_rate": 3.1780000000000004e-05,
"loss": 0.0142,
"step": 1590
},
{
"epoch": 14.15929203539823,
"grad_norm": 0.2039012759923935,
"learning_rate": 3.198e-05,
"loss": 0.0147,
"step": 1600
},
{
"epoch": 14.247787610619469,
"grad_norm": 0.23782622814178467,
"learning_rate": 3.218e-05,
"loss": 0.0143,
"step": 1610
},
{
"epoch": 14.336283185840708,
"grad_norm": 0.2800450325012207,
"learning_rate": 3.238e-05,
"loss": 0.0141,
"step": 1620
},
{
"epoch": 14.424778761061948,
"grad_norm": 0.21049116551876068,
"learning_rate": 3.2579999999999996e-05,
"loss": 0.0135,
"step": 1630
},
{
"epoch": 14.513274336283185,
"grad_norm": 0.22005024552345276,
"learning_rate": 3.278e-05,
"loss": 0.0143,
"step": 1640
},
{
"epoch": 14.601769911504425,
"grad_norm": 0.22420290112495422,
"learning_rate": 3.298e-05,
"loss": 0.0139,
"step": 1650
},
{
"epoch": 14.690265486725664,
"grad_norm": 0.19930607080459595,
"learning_rate": 3.318e-05,
"loss": 0.0137,
"step": 1660
},
{
"epoch": 14.778761061946902,
"grad_norm": 0.19809278845787048,
"learning_rate": 3.338e-05,
"loss": 0.0141,
"step": 1670
},
{
"epoch": 14.867256637168142,
"grad_norm": 0.16252639889717102,
"learning_rate": 3.358e-05,
"loss": 0.0142,
"step": 1680
},
{
"epoch": 14.955752212389381,
"grad_norm": 0.18918722867965698,
"learning_rate": 3.378e-05,
"loss": 0.0143,
"step": 1690
},
{
"epoch": 15.044247787610619,
"grad_norm": 0.19039537012577057,
"learning_rate": 3.398e-05,
"loss": 0.0138,
"step": 1700
},
{
"epoch": 15.132743362831858,
"grad_norm": 0.14957837760448456,
"learning_rate": 3.418e-05,
"loss": 0.0137,
"step": 1710
},
{
"epoch": 15.221238938053098,
"grad_norm": 0.16923746466636658,
"learning_rate": 3.438e-05,
"loss": 0.0143,
"step": 1720
},
{
"epoch": 15.309734513274336,
"grad_norm": 0.16197200119495392,
"learning_rate": 3.4580000000000004e-05,
"loss": 0.0132,
"step": 1730
},
{
"epoch": 15.398230088495575,
"grad_norm": 0.1780109852552414,
"learning_rate": 3.478e-05,
"loss": 0.0134,
"step": 1740
},
{
"epoch": 15.486725663716815,
"grad_norm": 0.20671486854553223,
"learning_rate": 3.498e-05,
"loss": 0.0134,
"step": 1750
},
{
"epoch": 15.575221238938052,
"grad_norm": 0.2506687641143799,
"learning_rate": 3.518e-05,
"loss": 0.0127,
"step": 1760
},
{
"epoch": 15.663716814159292,
"grad_norm": 0.16043032705783844,
"learning_rate": 3.5380000000000003e-05,
"loss": 0.0128,
"step": 1770
},
{
"epoch": 15.752212389380531,
"grad_norm": 0.17977970838546753,
"learning_rate": 3.558e-05,
"loss": 0.013,
"step": 1780
},
{
"epoch": 15.84070796460177,
"grad_norm": 0.21391505002975464,
"learning_rate": 3.578e-05,
"loss": 0.0132,
"step": 1790
},
{
"epoch": 15.929203539823009,
"grad_norm": 0.1991083323955536,
"learning_rate": 3.5980000000000004e-05,
"loss": 0.0133,
"step": 1800
},
{
"epoch": 16.01769911504425,
"grad_norm": 0.25892403721809387,
"learning_rate": 3.618e-05,
"loss": 0.0126,
"step": 1810
},
{
"epoch": 16.106194690265486,
"grad_norm": 0.20870722830295563,
"learning_rate": 3.638e-05,
"loss": 0.0126,
"step": 1820
},
{
"epoch": 16.194690265486727,
"grad_norm": 0.18789322674274445,
"learning_rate": 3.6580000000000006e-05,
"loss": 0.0126,
"step": 1830
},
{
"epoch": 16.283185840707965,
"grad_norm": 0.2000027447938919,
"learning_rate": 3.6780000000000004e-05,
"loss": 0.013,
"step": 1840
},
{
"epoch": 16.371681415929203,
"grad_norm": 0.15799404680728912,
"learning_rate": 3.698e-05,
"loss": 0.0124,
"step": 1850
},
{
"epoch": 16.460176991150444,
"grad_norm": 0.22756226360797882,
"learning_rate": 3.7180000000000007e-05,
"loss": 0.0127,
"step": 1860
},
{
"epoch": 16.548672566371682,
"grad_norm": 0.21301455795764923,
"learning_rate": 3.7380000000000005e-05,
"loss": 0.0134,
"step": 1870
},
{
"epoch": 16.63716814159292,
"grad_norm": 0.1839960813522339,
"learning_rate": 3.758e-05,
"loss": 0.0128,
"step": 1880
},
{
"epoch": 16.72566371681416,
"grad_norm": 0.16055116057395935,
"learning_rate": 3.778000000000001e-05,
"loss": 0.0136,
"step": 1890
},
{
"epoch": 16.8141592920354,
"grad_norm": 0.30682259798049927,
"learning_rate": 3.7980000000000006e-05,
"loss": 0.0127,
"step": 1900
},
{
"epoch": 16.902654867256636,
"grad_norm": 0.22010301053524017,
"learning_rate": 3.818e-05,
"loss": 0.0134,
"step": 1910
},
{
"epoch": 16.991150442477878,
"grad_norm": 0.18450503051280975,
"learning_rate": 3.838e-05,
"loss": 0.0125,
"step": 1920
},
{
"epoch": 17.079646017699115,
"grad_norm": 0.14620986580848694,
"learning_rate": 3.858e-05,
"loss": 0.0133,
"step": 1930
},
{
"epoch": 17.168141592920353,
"grad_norm": 0.17002588510513306,
"learning_rate": 3.878e-05,
"loss": 0.0124,
"step": 1940
},
{
"epoch": 17.256637168141594,
"grad_norm": 0.17407143115997314,
"learning_rate": 3.898e-05,
"loss": 0.0131,
"step": 1950
},
{
"epoch": 17.345132743362832,
"grad_norm": 0.25174498558044434,
"learning_rate": 3.918e-05,
"loss": 0.0119,
"step": 1960
},
{
"epoch": 17.43362831858407,
"grad_norm": 0.2005157768726349,
"learning_rate": 3.938e-05,
"loss": 0.012,
"step": 1970
},
{
"epoch": 17.52212389380531,
"grad_norm": 0.19487272202968597,
"learning_rate": 3.958e-05,
"loss": 0.013,
"step": 1980
},
{
"epoch": 17.61061946902655,
"grad_norm": 0.2406047135591507,
"learning_rate": 3.978e-05,
"loss": 0.0134,
"step": 1990
},
{
"epoch": 17.699115044247787,
"grad_norm": 0.17815567553043365,
"learning_rate": 3.998e-05,
"loss": 0.0126,
"step": 2000
},
{
"epoch": 17.787610619469028,
"grad_norm": 0.20930008590221405,
"learning_rate": 4.018e-05,
"loss": 0.0123,
"step": 2010
},
{
"epoch": 17.876106194690266,
"grad_norm": 0.18442749977111816,
"learning_rate": 4.038e-05,
"loss": 0.0122,
"step": 2020
},
{
"epoch": 17.964601769911503,
"grad_norm": 0.16639980673789978,
"learning_rate": 4.058e-05,
"loss": 0.0129,
"step": 2030
},
{
"epoch": 18.053097345132745,
"grad_norm": 0.22504822909832,
"learning_rate": 4.078e-05,
"loss": 0.0131,
"step": 2040
},
{
"epoch": 18.141592920353983,
"grad_norm": 0.24628128111362457,
"learning_rate": 4.0980000000000004e-05,
"loss": 0.0118,
"step": 2050
},
{
"epoch": 18.23008849557522,
"grad_norm": 0.14474467933177948,
"learning_rate": 4.118e-05,
"loss": 0.0119,
"step": 2060
},
{
"epoch": 18.31858407079646,
"grad_norm": 0.19983842968940735,
"learning_rate": 4.138e-05,
"loss": 0.0119,
"step": 2070
},
{
"epoch": 18.4070796460177,
"grad_norm": 0.26629599928855896,
"learning_rate": 4.1580000000000005e-05,
"loss": 0.0126,
"step": 2080
},
{
"epoch": 18.495575221238937,
"grad_norm": 0.18293090164661407,
"learning_rate": 4.178e-05,
"loss": 0.0132,
"step": 2090
},
{
"epoch": 18.58407079646018,
"grad_norm": 0.22794148325920105,
"learning_rate": 4.198e-05,
"loss": 0.0117,
"step": 2100
},
{
"epoch": 18.672566371681416,
"grad_norm": 0.1965721845626831,
"learning_rate": 4.2180000000000006e-05,
"loss": 0.0126,
"step": 2110
},
{
"epoch": 18.761061946902654,
"grad_norm": 0.21172276139259338,
"learning_rate": 4.2380000000000004e-05,
"loss": 0.0118,
"step": 2120
},
{
"epoch": 18.849557522123895,
"grad_norm": 0.21038956940174103,
"learning_rate": 4.258e-05,
"loss": 0.0125,
"step": 2130
},
{
"epoch": 18.938053097345133,
"grad_norm": 0.23481352627277374,
"learning_rate": 4.278e-05,
"loss": 0.012,
"step": 2140
},
{
"epoch": 19.02654867256637,
"grad_norm": 0.18684646487236023,
"learning_rate": 4.2980000000000005e-05,
"loss": 0.0121,
"step": 2150
},
{
"epoch": 19.115044247787612,
"grad_norm": 0.23360024392604828,
"learning_rate": 4.318e-05,
"loss": 0.0119,
"step": 2160
},
{
"epoch": 19.20353982300885,
"grad_norm": 0.2147015929222107,
"learning_rate": 4.338e-05,
"loss": 0.0118,
"step": 2170
},
{
"epoch": 19.292035398230087,
"grad_norm": 0.20503421127796173,
"learning_rate": 4.3580000000000006e-05,
"loss": 0.0118,
"step": 2180
},
{
"epoch": 19.38053097345133,
"grad_norm": 0.19846957921981812,
"learning_rate": 4.3780000000000004e-05,
"loss": 0.0116,
"step": 2190
},
{
"epoch": 19.469026548672566,
"grad_norm": 0.22245913743972778,
"learning_rate": 4.398e-05,
"loss": 0.0127,
"step": 2200
},
{
"epoch": 19.557522123893804,
"grad_norm": 0.20002296566963196,
"learning_rate": 4.418000000000001e-05,
"loss": 0.0124,
"step": 2210
},
{
"epoch": 19.646017699115045,
"grad_norm": 0.22656002640724182,
"learning_rate": 4.438e-05,
"loss": 0.0111,
"step": 2220
},
{
"epoch": 19.734513274336283,
"grad_norm": 0.1582970917224884,
"learning_rate": 4.458e-05,
"loss": 0.0116,
"step": 2230
},
{
"epoch": 19.82300884955752,
"grad_norm": 0.18759259581565857,
"learning_rate": 4.478e-05,
"loss": 0.0114,
"step": 2240
},
{
"epoch": 19.911504424778762,
"grad_norm": 0.23023168742656708,
"learning_rate": 4.498e-05,
"loss": 0.0114,
"step": 2250
},
{
"epoch": 20.0,
"grad_norm": 0.23070260882377625,
"learning_rate": 4.518e-05,
"loss": 0.0113,
"step": 2260
},
{
"epoch": 20.088495575221238,
"grad_norm": 0.21114951372146606,
"learning_rate": 4.538e-05,
"loss": 0.0116,
"step": 2270
},
{
"epoch": 20.17699115044248,
"grad_norm": 0.21285273134708405,
"learning_rate": 4.558e-05,
"loss": 0.0116,
"step": 2280
},
{
"epoch": 20.265486725663717,
"grad_norm": 0.20174440741539001,
"learning_rate": 4.578e-05,
"loss": 0.0112,
"step": 2290
},
{
"epoch": 20.353982300884955,
"grad_norm": 0.22542431950569153,
"learning_rate": 4.5980000000000004e-05,
"loss": 0.0123,
"step": 2300
},
{
"epoch": 20.442477876106196,
"grad_norm": 0.1854224056005478,
"learning_rate": 4.618e-05,
"loss": 0.0121,
"step": 2310
},
{
"epoch": 20.530973451327434,
"grad_norm": 0.2263476550579071,
"learning_rate": 4.638e-05,
"loss": 0.0116,
"step": 2320
},
{
"epoch": 20.61946902654867,
"grad_norm": 0.18328975141048431,
"learning_rate": 4.6580000000000005e-05,
"loss": 0.0114,
"step": 2330
},
{
"epoch": 20.707964601769913,
"grad_norm": 0.18268951773643494,
"learning_rate": 4.678e-05,
"loss": 0.0108,
"step": 2340
},
{
"epoch": 20.79646017699115,
"grad_norm": 0.16564399003982544,
"learning_rate": 4.698e-05,
"loss": 0.0113,
"step": 2350
},
{
"epoch": 20.884955752212388,
"grad_norm": 0.19969241321086884,
"learning_rate": 4.718e-05,
"loss": 0.0117,
"step": 2360
},
{
"epoch": 20.97345132743363,
"grad_norm": 0.15618184208869934,
"learning_rate": 4.7380000000000004e-05,
"loss": 0.0108,
"step": 2370
},
{
"epoch": 21.061946902654867,
"grad_norm": 0.21858179569244385,
"learning_rate": 4.758e-05,
"loss": 0.0113,
"step": 2380
},
{
"epoch": 21.150442477876105,
"grad_norm": 0.17541509866714478,
"learning_rate": 4.778e-05,
"loss": 0.0125,
"step": 2390
},
{
"epoch": 21.238938053097346,
"grad_norm": 0.16505980491638184,
"learning_rate": 4.7980000000000005e-05,
"loss": 0.0119,
"step": 2400
},
{
"epoch": 21.327433628318584,
"grad_norm": 0.23706042766571045,
"learning_rate": 4.818e-05,
"loss": 0.0121,
"step": 2410
},
{
"epoch": 21.41592920353982,
"grad_norm": 0.16708803176879883,
"learning_rate": 4.838e-05,
"loss": 0.0114,
"step": 2420
},
{
"epoch": 21.504424778761063,
"grad_norm": 0.15749461948871613,
"learning_rate": 4.8580000000000006e-05,
"loss": 0.0119,
"step": 2430
},
{
"epoch": 21.5929203539823,
"grad_norm": 0.1923663467168808,
"learning_rate": 4.8780000000000004e-05,
"loss": 0.0106,
"step": 2440
},
{
"epoch": 21.68141592920354,
"grad_norm": 0.1585606038570404,
"learning_rate": 4.898e-05,
"loss": 0.0112,
"step": 2450
},
{
"epoch": 21.76991150442478,
"grad_norm": 0.19709140062332153,
"learning_rate": 4.918000000000001e-05,
"loss": 0.0111,
"step": 2460
},
{
"epoch": 21.858407079646017,
"grad_norm": 0.22520484030246735,
"learning_rate": 4.9380000000000005e-05,
"loss": 0.0105,
"step": 2470
},
{
"epoch": 21.946902654867255,
"grad_norm": 0.22094905376434326,
"learning_rate": 4.958e-05,
"loss": 0.0115,
"step": 2480
},
{
"epoch": 22.035398230088497,
"grad_norm": 0.184371218085289,
"learning_rate": 4.978e-05,
"loss": 0.011,
"step": 2490
},
{
"epoch": 22.123893805309734,
"grad_norm": 0.2175336331129074,
"learning_rate": 4.9980000000000006e-05,
"loss": 0.0115,
"step": 2500
},
{
"epoch": 22.212389380530972,
"grad_norm": 0.1742861568927765,
"learning_rate": 5.0180000000000004e-05,
"loss": 0.0106,
"step": 2510
},
{
"epoch": 22.300884955752213,
"grad_norm": 0.1842757761478424,
"learning_rate": 5.038e-05,
"loss": 0.0106,
"step": 2520
},
{
"epoch": 22.38938053097345,
"grad_norm": 0.1987450271844864,
"learning_rate": 5.058000000000001e-05,
"loss": 0.0114,
"step": 2530
},
{
"epoch": 22.47787610619469,
"grad_norm": 0.20004890859127045,
"learning_rate": 5.0780000000000005e-05,
"loss": 0.0112,
"step": 2540
},
{
"epoch": 22.56637168141593,
"grad_norm": 0.19499580562114716,
"learning_rate": 5.098e-05,
"loss": 0.011,
"step": 2550
},
{
"epoch": 22.654867256637168,
"grad_norm": 0.16874614357948303,
"learning_rate": 5.118000000000001e-05,
"loss": 0.0104,
"step": 2560
},
{
"epoch": 22.743362831858406,
"grad_norm": 0.19128884375095367,
"learning_rate": 5.1380000000000006e-05,
"loss": 0.0104,
"step": 2570
},
{
"epoch": 22.831858407079647,
"grad_norm": 0.18968653678894043,
"learning_rate": 5.1580000000000004e-05,
"loss": 0.0105,
"step": 2580
},
{
"epoch": 22.920353982300885,
"grad_norm": 0.21244198083877563,
"learning_rate": 5.178000000000001e-05,
"loss": 0.0106,
"step": 2590
},
{
"epoch": 23.008849557522122,
"grad_norm": 0.1860707700252533,
"learning_rate": 5.198000000000001e-05,
"loss": 0.0106,
"step": 2600
},
{
"epoch": 23.097345132743364,
"grad_norm": 0.17499375343322754,
"learning_rate": 5.2180000000000005e-05,
"loss": 0.0102,
"step": 2610
},
{
"epoch": 23.1858407079646,
"grad_norm": 0.18945537507534027,
"learning_rate": 5.238000000000001e-05,
"loss": 0.0109,
"step": 2620
},
{
"epoch": 23.27433628318584,
"grad_norm": 0.19200605154037476,
"learning_rate": 5.258000000000001e-05,
"loss": 0.0107,
"step": 2630
},
{
"epoch": 23.36283185840708,
"grad_norm": 0.2066267877817154,
"learning_rate": 5.2780000000000006e-05,
"loss": 0.0102,
"step": 2640
},
{
"epoch": 23.451327433628318,
"grad_norm": 0.21788930892944336,
"learning_rate": 5.2980000000000004e-05,
"loss": 0.0105,
"step": 2650
},
{
"epoch": 23.539823008849556,
"grad_norm": 0.21332311630249023,
"learning_rate": 5.318000000000001e-05,
"loss": 0.0104,
"step": 2660
},
{
"epoch": 23.628318584070797,
"grad_norm": 0.21296343207359314,
"learning_rate": 5.338000000000001e-05,
"loss": 0.0104,
"step": 2670
},
{
"epoch": 23.716814159292035,
"grad_norm": 0.18731948733329773,
"learning_rate": 5.3580000000000005e-05,
"loss": 0.0107,
"step": 2680
},
{
"epoch": 23.805309734513273,
"grad_norm": 0.20445139706134796,
"learning_rate": 5.378e-05,
"loss": 0.0105,
"step": 2690
},
{
"epoch": 23.893805309734514,
"grad_norm": 0.2132468819618225,
"learning_rate": 5.3979999999999995e-05,
"loss": 0.0106,
"step": 2700
},
{
"epoch": 23.98230088495575,
"grad_norm": 0.1869552731513977,
"learning_rate": 5.418e-05,
"loss": 0.01,
"step": 2710
},
{
"epoch": 24.07079646017699,
"grad_norm": 0.18613892793655396,
"learning_rate": 5.438e-05,
"loss": 0.0102,
"step": 2720
},
{
"epoch": 24.15929203539823,
"grad_norm": 0.18388274312019348,
"learning_rate": 5.4579999999999996e-05,
"loss": 0.0102,
"step": 2730
},
{
"epoch": 24.24778761061947,
"grad_norm": 0.20290076732635498,
"learning_rate": 5.478e-05,
"loss": 0.0102,
"step": 2740
},
{
"epoch": 24.336283185840706,
"grad_norm": 0.17434221506118774,
"learning_rate": 5.498e-05,
"loss": 0.0103,
"step": 2750
},
{
"epoch": 24.424778761061948,
"grad_norm": 0.16907170414924622,
"learning_rate": 5.518e-05,
"loss": 0.0097,
"step": 2760
},
{
"epoch": 24.513274336283185,
"grad_norm": 0.12880559265613556,
"learning_rate": 5.538e-05,
"loss": 0.0101,
"step": 2770
},
{
"epoch": 24.601769911504427,
"grad_norm": 0.19324956834316254,
"learning_rate": 5.558e-05,
"loss": 0.0106,
"step": 2780
},
{
"epoch": 24.690265486725664,
"grad_norm": 0.18656569719314575,
"learning_rate": 5.578e-05,
"loss": 0.0097,
"step": 2790
},
{
"epoch": 24.778761061946902,
"grad_norm": 0.15899035334587097,
"learning_rate": 5.5979999999999996e-05,
"loss": 0.01,
"step": 2800
},
{
"epoch": 24.86725663716814,
"grad_norm": 0.15733803808689117,
"learning_rate": 5.618e-05,
"loss": 0.0112,
"step": 2810
},
{
"epoch": 24.95575221238938,
"grad_norm": 0.17346592247486115,
"learning_rate": 5.638e-05,
"loss": 0.0099,
"step": 2820
},
{
"epoch": 25.04424778761062,
"grad_norm": 0.1815352588891983,
"learning_rate": 5.658e-05,
"loss": 0.0104,
"step": 2830
},
{
"epoch": 25.13274336283186,
"grad_norm": 0.198966845870018,
"learning_rate": 5.678e-05,
"loss": 0.0109,
"step": 2840
},
{
"epoch": 25.221238938053098,
"grad_norm": 0.17682309448719025,
"learning_rate": 5.698e-05,
"loss": 0.0106,
"step": 2850
},
{
"epoch": 25.309734513274336,
"grad_norm": 0.22696511447429657,
"learning_rate": 5.718e-05,
"loss": 0.0096,
"step": 2860
},
{
"epoch": 25.398230088495577,
"grad_norm": 0.16920962929725647,
"learning_rate": 5.738e-05,
"loss": 0.011,
"step": 2870
},
{
"epoch": 25.486725663716815,
"grad_norm": 0.2623240053653717,
"learning_rate": 5.758e-05,
"loss": 0.0098,
"step": 2880
},
{
"epoch": 25.575221238938052,
"grad_norm": 0.19749803841114044,
"learning_rate": 5.778e-05,
"loss": 0.0103,
"step": 2890
},
{
"epoch": 25.663716814159294,
"grad_norm": 0.17247773706912994,
"learning_rate": 5.7980000000000004e-05,
"loss": 0.0107,
"step": 2900
},
{
"epoch": 25.75221238938053,
"grad_norm": 0.20530232787132263,
"learning_rate": 5.818e-05,
"loss": 0.0095,
"step": 2910
},
{
"epoch": 25.84070796460177,
"grad_norm": 0.19921380281448364,
"learning_rate": 5.838e-05,
"loss": 0.0101,
"step": 2920
},
{
"epoch": 25.92920353982301,
"grad_norm": 0.16885505616664886,
"learning_rate": 5.858e-05,
"loss": 0.0099,
"step": 2930
},
{
"epoch": 26.01769911504425,
"grad_norm": 0.20461390912532806,
"learning_rate": 5.878e-05,
"loss": 0.0103,
"step": 2940
},
{
"epoch": 26.106194690265486,
"grad_norm": 0.16449180245399475,
"learning_rate": 5.898e-05,
"loss": 0.0107,
"step": 2950
},
{
"epoch": 26.194690265486727,
"grad_norm": 0.20002399384975433,
"learning_rate": 5.918e-05,
"loss": 0.0099,
"step": 2960
},
{
"epoch": 26.283185840707965,
"grad_norm": 0.189950630068779,
"learning_rate": 5.9380000000000004e-05,
"loss": 0.01,
"step": 2970
},
{
"epoch": 26.371681415929203,
"grad_norm": 0.1926291584968567,
"learning_rate": 5.958e-05,
"loss": 0.0106,
"step": 2980
},
{
"epoch": 26.460176991150444,
"grad_norm": 0.23954054713249207,
"learning_rate": 5.978e-05,
"loss": 0.0102,
"step": 2990
},
{
"epoch": 26.548672566371682,
"grad_norm": 0.21134018898010254,
"learning_rate": 5.9980000000000005e-05,
"loss": 0.0101,
"step": 3000
},
{
"epoch": 26.63716814159292,
"grad_norm": 0.18221573531627655,
"learning_rate": 6.018e-05,
"loss": 0.0095,
"step": 3010
},
{
"epoch": 26.72566371681416,
"grad_norm": 0.15541145205497742,
"learning_rate": 6.038e-05,
"loss": 0.0098,
"step": 3020
},
{
"epoch": 26.8141592920354,
"grad_norm": 0.20895129442214966,
"learning_rate": 6.0580000000000006e-05,
"loss": 0.0099,
"step": 3030
},
{
"epoch": 26.902654867256636,
"grad_norm": 0.24560369551181793,
"learning_rate": 6.0780000000000004e-05,
"loss": 0.0097,
"step": 3040
},
{
"epoch": 26.991150442477878,
"grad_norm": 0.209951251745224,
"learning_rate": 6.098e-05,
"loss": 0.0102,
"step": 3050
},
{
"epoch": 27.079646017699115,
"grad_norm": 0.23731729388237,
"learning_rate": 6.118000000000001e-05,
"loss": 0.0107,
"step": 3060
},
{
"epoch": 27.168141592920353,
"grad_norm": 0.21880139410495758,
"learning_rate": 6.138e-05,
"loss": 0.0095,
"step": 3070
},
{
"epoch": 27.256637168141594,
"grad_norm": 0.20155078172683716,
"learning_rate": 6.158e-05,
"loss": 0.0096,
"step": 3080
},
{
"epoch": 27.345132743362832,
"grad_norm": 0.18142688274383545,
"learning_rate": 6.178000000000001e-05,
"loss": 0.0094,
"step": 3090
},
{
"epoch": 27.43362831858407,
"grad_norm": 0.2185787856578827,
"learning_rate": 6.198e-05,
"loss": 0.0097,
"step": 3100
},
{
"epoch": 27.52212389380531,
"grad_norm": 0.19645309448242188,
"learning_rate": 6.218e-05,
"loss": 0.0102,
"step": 3110
},
{
"epoch": 27.61061946902655,
"grad_norm": 0.15861794352531433,
"learning_rate": 6.238000000000001e-05,
"loss": 0.01,
"step": 3120
},
{
"epoch": 27.699115044247787,
"grad_norm": 0.19991379976272583,
"learning_rate": 6.258e-05,
"loss": 0.0102,
"step": 3130
},
{
"epoch": 27.787610619469028,
"grad_norm": 0.1209021508693695,
"learning_rate": 6.278e-05,
"loss": 0.01,
"step": 3140
},
{
"epoch": 27.876106194690266,
"grad_norm": 0.16849768161773682,
"learning_rate": 6.298000000000001e-05,
"loss": 0.0098,
"step": 3150
},
{
"epoch": 27.964601769911503,
"grad_norm": 0.17233285307884216,
"learning_rate": 6.318e-05,
"loss": 0.0099,
"step": 3160
},
{
"epoch": 28.053097345132745,
"grad_norm": 0.1882430613040924,
"learning_rate": 6.338e-05,
"loss": 0.0091,
"step": 3170
},
{
"epoch": 28.141592920353983,
"grad_norm": 0.150844544172287,
"learning_rate": 6.358000000000001e-05,
"loss": 0.009,
"step": 3180
},
{
"epoch": 28.23008849557522,
"grad_norm": 0.16444620490074158,
"learning_rate": 6.378e-05,
"loss": 0.0096,
"step": 3190
},
{
"epoch": 28.31858407079646,
"grad_norm": 0.15817801654338837,
"learning_rate": 6.398000000000001e-05,
"loss": 0.0098,
"step": 3200
},
{
"epoch": 28.4070796460177,
"grad_norm": 0.15221363306045532,
"learning_rate": 6.418000000000001e-05,
"loss": 0.0093,
"step": 3210
},
{
"epoch": 28.495575221238937,
"grad_norm": 0.22582747042179108,
"learning_rate": 6.438e-05,
"loss": 0.0095,
"step": 3220
},
{
"epoch": 28.58407079646018,
"grad_norm": 0.2243175506591797,
"learning_rate": 6.458000000000001e-05,
"loss": 0.0099,
"step": 3230
},
{
"epoch": 28.672566371681416,
"grad_norm": 0.22440826892852783,
"learning_rate": 6.478000000000001e-05,
"loss": 0.0096,
"step": 3240
},
{
"epoch": 28.761061946902654,
"grad_norm": 0.18992920219898224,
"learning_rate": 6.498e-05,
"loss": 0.0094,
"step": 3250
},
{
"epoch": 28.849557522123895,
"grad_norm": 0.22126157581806183,
"learning_rate": 6.518000000000001e-05,
"loss": 0.0098,
"step": 3260
},
{
"epoch": 28.938053097345133,
"grad_norm": 0.17253299057483673,
"learning_rate": 6.538000000000001e-05,
"loss": 0.0093,
"step": 3270
},
{
"epoch": 29.02654867256637,
"grad_norm": 0.15734551846981049,
"learning_rate": 6.558e-05,
"loss": 0.0088,
"step": 3280
},
{
"epoch": 29.115044247787612,
"grad_norm": 0.18129082024097443,
"learning_rate": 6.578000000000001e-05,
"loss": 0.0095,
"step": 3290
},
{
"epoch": 29.20353982300885,
"grad_norm": 0.20513580739498138,
"learning_rate": 6.598e-05,
"loss": 0.0098,
"step": 3300
},
{
"epoch": 29.292035398230087,
"grad_norm": 0.15419155359268188,
"learning_rate": 6.618e-05,
"loss": 0.0098,
"step": 3310
},
{
"epoch": 29.38053097345133,
"grad_norm": 0.14091163873672485,
"learning_rate": 6.638e-05,
"loss": 0.01,
"step": 3320
},
{
"epoch": 29.469026548672566,
"grad_norm": 0.19755586981773376,
"learning_rate": 6.658e-05,
"loss": 0.0101,
"step": 3330
},
{
"epoch": 29.557522123893804,
"grad_norm": 0.16471098363399506,
"learning_rate": 6.678e-05,
"loss": 0.0094,
"step": 3340
},
{
"epoch": 29.646017699115045,
"grad_norm": 0.15273906290531158,
"learning_rate": 6.698e-05,
"loss": 0.0098,
"step": 3350
},
{
"epoch": 29.734513274336283,
"grad_norm": 0.18923990428447723,
"learning_rate": 6.718e-05,
"loss": 0.0096,
"step": 3360
},
{
"epoch": 29.82300884955752,
"grad_norm": 0.16429926455020905,
"learning_rate": 6.738e-05,
"loss": 0.0089,
"step": 3370
},
{
"epoch": 29.911504424778762,
"grad_norm": 0.18430180847644806,
"learning_rate": 6.758e-05,
"loss": 0.0088,
"step": 3380
},
{
"epoch": 30.0,
"grad_norm": 0.17353755235671997,
"learning_rate": 6.778e-05,
"loss": 0.0097,
"step": 3390
},
{
"epoch": 30.088495575221238,
"grad_norm": 0.22288672626018524,
"learning_rate": 6.798e-05,
"loss": 0.0083,
"step": 3400
},
{
"epoch": 30.17699115044248,
"grad_norm": 0.1450582891702652,
"learning_rate": 6.818e-05,
"loss": 0.0091,
"step": 3410
},
{
"epoch": 30.265486725663717,
"grad_norm": 0.20816563069820404,
"learning_rate": 6.838e-05,
"loss": 0.0089,
"step": 3420
},
{
"epoch": 30.353982300884955,
"grad_norm": 0.16875436902046204,
"learning_rate": 6.858e-05,
"loss": 0.0084,
"step": 3430
},
{
"epoch": 30.442477876106196,
"grad_norm": 0.14342516660690308,
"learning_rate": 6.878e-05,
"loss": 0.0097,
"step": 3440
},
{
"epoch": 30.530973451327434,
"grad_norm": 0.14947320520877838,
"learning_rate": 6.898e-05,
"loss": 0.009,
"step": 3450
},
{
"epoch": 30.61946902654867,
"grad_norm": 0.13617642223834991,
"learning_rate": 6.918e-05,
"loss": 0.0092,
"step": 3460
},
{
"epoch": 30.707964601769913,
"grad_norm": 0.12668970227241516,
"learning_rate": 6.938e-05,
"loss": 0.0098,
"step": 3470
},
{
"epoch": 30.79646017699115,
"grad_norm": 0.19894877076148987,
"learning_rate": 6.958e-05,
"loss": 0.0092,
"step": 3480
},
{
"epoch": 30.884955752212388,
"grad_norm": 0.1800524890422821,
"learning_rate": 6.978e-05,
"loss": 0.0092,
"step": 3490
},
{
"epoch": 30.97345132743363,
"grad_norm": 0.18502476811408997,
"learning_rate": 6.998e-05,
"loss": 0.0096,
"step": 3500
},
{
"epoch": 31.061946902654867,
"grad_norm": 0.17043296992778778,
"learning_rate": 7.018e-05,
"loss": 0.0097,
"step": 3510
},
{
"epoch": 31.150442477876105,
"grad_norm": 0.2069322019815445,
"learning_rate": 7.038e-05,
"loss": 0.0098,
"step": 3520
},
{
"epoch": 31.238938053097346,
"grad_norm": 0.2311137467622757,
"learning_rate": 7.058e-05,
"loss": 0.0094,
"step": 3530
},
{
"epoch": 31.327433628318584,
"grad_norm": 0.16992923617362976,
"learning_rate": 7.078e-05,
"loss": 0.0095,
"step": 3540
},
{
"epoch": 31.41592920353982,
"grad_norm": 0.16129949688911438,
"learning_rate": 7.098e-05,
"loss": 0.0089,
"step": 3550
},
{
"epoch": 31.504424778761063,
"grad_norm": 0.19930200278759003,
"learning_rate": 7.118e-05,
"loss": 0.0089,
"step": 3560
},
{
"epoch": 31.5929203539823,
"grad_norm": 0.1485586166381836,
"learning_rate": 7.138e-05,
"loss": 0.009,
"step": 3570
},
{
"epoch": 31.68141592920354,
"grad_norm": 0.2192714512348175,
"learning_rate": 7.158e-05,
"loss": 0.0094,
"step": 3580
},
{
"epoch": 31.76991150442478,
"grad_norm": 0.185531347990036,
"learning_rate": 7.178000000000001e-05,
"loss": 0.0093,
"step": 3590
},
{
"epoch": 31.858407079646017,
"grad_norm": 0.19907477498054504,
"learning_rate": 7.198e-05,
"loss": 0.0094,
"step": 3600
},
{
"epoch": 31.946902654867255,
"grad_norm": 0.1861497461795807,
"learning_rate": 7.218e-05,
"loss": 0.0096,
"step": 3610
},
{
"epoch": 32.0353982300885,
"grad_norm": 0.14380060136318207,
"learning_rate": 7.238000000000001e-05,
"loss": 0.0095,
"step": 3620
},
{
"epoch": 32.123893805309734,
"grad_norm": 0.16309955716133118,
"learning_rate": 7.258e-05,
"loss": 0.0093,
"step": 3630
},
{
"epoch": 32.21238938053097,
"grad_norm": 0.13984215259552002,
"learning_rate": 7.278e-05,
"loss": 0.0094,
"step": 3640
},
{
"epoch": 32.30088495575221,
"grad_norm": 0.1498541384935379,
"learning_rate": 7.298000000000001e-05,
"loss": 0.0097,
"step": 3650
},
{
"epoch": 32.389380530973455,
"grad_norm": 0.18653202056884766,
"learning_rate": 7.318e-05,
"loss": 0.0091,
"step": 3660
},
{
"epoch": 32.47787610619469,
"grad_norm": 0.1598045825958252,
"learning_rate": 7.338e-05,
"loss": 0.0094,
"step": 3670
},
{
"epoch": 32.56637168141593,
"grad_norm": 0.16379061341285706,
"learning_rate": 7.358000000000001e-05,
"loss": 0.0088,
"step": 3680
},
{
"epoch": 32.65486725663717,
"grad_norm": 0.1803268939256668,
"learning_rate": 7.378e-05,
"loss": 0.0093,
"step": 3690
},
{
"epoch": 32.743362831858406,
"grad_norm": 0.14064811170101166,
"learning_rate": 7.398e-05,
"loss": 0.009,
"step": 3700
},
{
"epoch": 32.83185840707964,
"grad_norm": 0.12818744778633118,
"learning_rate": 7.418000000000001e-05,
"loss": 0.0092,
"step": 3710
},
{
"epoch": 32.92035398230089,
"grad_norm": 0.1366705447435379,
"learning_rate": 7.438e-05,
"loss": 0.0085,
"step": 3720
},
{
"epoch": 33.008849557522126,
"grad_norm": 0.18504424393177032,
"learning_rate": 7.458000000000001e-05,
"loss": 0.0093,
"step": 3730
},
{
"epoch": 33.097345132743364,
"grad_norm": 0.14896735548973083,
"learning_rate": 7.478e-05,
"loss": 0.009,
"step": 3740
},
{
"epoch": 33.1858407079646,
"grad_norm": 0.13870835304260254,
"learning_rate": 7.498e-05,
"loss": 0.0093,
"step": 3750
},
{
"epoch": 33.27433628318584,
"grad_norm": 0.1251486837863922,
"learning_rate": 7.518000000000001e-05,
"loss": 0.0083,
"step": 3760
},
{
"epoch": 33.36283185840708,
"grad_norm": 0.2125852108001709,
"learning_rate": 7.538e-05,
"loss": 0.0094,
"step": 3770
},
{
"epoch": 33.45132743362832,
"grad_norm": 0.14534027874469757,
"learning_rate": 7.558e-05,
"loss": 0.0086,
"step": 3780
},
{
"epoch": 33.53982300884956,
"grad_norm": 0.1484474241733551,
"learning_rate": 7.578000000000001e-05,
"loss": 0.009,
"step": 3790
},
{
"epoch": 33.6283185840708,
"grad_norm": 0.1549052894115448,
"learning_rate": 7.598e-05,
"loss": 0.0091,
"step": 3800
},
{
"epoch": 33.716814159292035,
"grad_norm": 0.13797472417354584,
"learning_rate": 7.618e-05,
"loss": 0.0091,
"step": 3810
},
{
"epoch": 33.80530973451327,
"grad_norm": 0.15759176015853882,
"learning_rate": 7.638000000000001e-05,
"loss": 0.0078,
"step": 3820
},
{
"epoch": 33.89380530973451,
"grad_norm": 0.13823020458221436,
"learning_rate": 7.658e-05,
"loss": 0.009,
"step": 3830
},
{
"epoch": 33.982300884955755,
"grad_norm": 0.12496250122785568,
"learning_rate": 7.678000000000001e-05,
"loss": 0.0083,
"step": 3840
},
{
"epoch": 34.07079646017699,
"grad_norm": 0.13213366270065308,
"learning_rate": 7.698000000000001e-05,
"loss": 0.0091,
"step": 3850
},
{
"epoch": 34.15929203539823,
"grad_norm": 0.18073223531246185,
"learning_rate": 7.718e-05,
"loss": 0.0081,
"step": 3860
},
{
"epoch": 34.24778761061947,
"grad_norm": 0.1358853280544281,
"learning_rate": 7.738000000000001e-05,
"loss": 0.0086,
"step": 3870
},
{
"epoch": 34.336283185840706,
"grad_norm": 0.16672301292419434,
"learning_rate": 7.758000000000001e-05,
"loss": 0.0087,
"step": 3880
},
{
"epoch": 34.424778761061944,
"grad_norm": 0.11548765748739243,
"learning_rate": 7.778e-05,
"loss": 0.0087,
"step": 3890
},
{
"epoch": 34.51327433628319,
"grad_norm": 0.15918992459774017,
"learning_rate": 7.798000000000001e-05,
"loss": 0.0088,
"step": 3900
},
{
"epoch": 34.60176991150443,
"grad_norm": 0.17667222023010254,
"learning_rate": 7.818000000000001e-05,
"loss": 0.0088,
"step": 3910
},
{
"epoch": 34.690265486725664,
"grad_norm": 0.1465061604976654,
"learning_rate": 7.838e-05,
"loss": 0.0085,
"step": 3920
},
{
"epoch": 34.7787610619469,
"grad_norm": 0.18721789121627808,
"learning_rate": 7.858000000000001e-05,
"loss": 0.0087,
"step": 3930
},
{
"epoch": 34.86725663716814,
"grad_norm": 0.16090698540210724,
"learning_rate": 7.878e-05,
"loss": 0.0085,
"step": 3940
},
{
"epoch": 34.95575221238938,
"grad_norm": 0.16545279324054718,
"learning_rate": 7.897999999999999e-05,
"loss": 0.0082,
"step": 3950
},
{
"epoch": 35.04424778761062,
"grad_norm": 0.16116927564144135,
"learning_rate": 7.918e-05,
"loss": 0.0084,
"step": 3960
},
{
"epoch": 35.13274336283186,
"grad_norm": 0.19407112896442413,
"learning_rate": 7.938e-05,
"loss": 0.008,
"step": 3970
},
{
"epoch": 35.2212389380531,
"grad_norm": 0.13435247540473938,
"learning_rate": 7.958e-05,
"loss": 0.0088,
"step": 3980
},
{
"epoch": 35.309734513274336,
"grad_norm": 0.15754403173923492,
"learning_rate": 7.978e-05,
"loss": 0.0092,
"step": 3990
},
{
"epoch": 35.39823008849557,
"grad_norm": 0.18012726306915283,
"learning_rate": 7.998e-05,
"loss": 0.0089,
"step": 4000
},
{
"epoch": 35.48672566371681,
"grad_norm": 0.17743121087551117,
"learning_rate": 8.018e-05,
"loss": 0.0089,
"step": 4010
},
{
"epoch": 35.575221238938056,
"grad_norm": 0.1524660289287567,
"learning_rate": 8.038e-05,
"loss": 0.0086,
"step": 4020
},
{
"epoch": 35.663716814159294,
"grad_norm": 0.17956510186195374,
"learning_rate": 8.058e-05,
"loss": 0.0086,
"step": 4030
},
{
"epoch": 35.75221238938053,
"grad_norm": 0.17635688185691833,
"learning_rate": 8.078e-05,
"loss": 0.0086,
"step": 4040
},
{
"epoch": 35.84070796460177,
"grad_norm": 0.16003718972206116,
"learning_rate": 8.098e-05,
"loss": 0.0091,
"step": 4050
},
{
"epoch": 35.92920353982301,
"grad_norm": 0.14748983085155487,
"learning_rate": 8.118e-05,
"loss": 0.0096,
"step": 4060
},
{
"epoch": 36.017699115044245,
"grad_norm": 0.16579337418079376,
"learning_rate": 8.138e-05,
"loss": 0.0092,
"step": 4070
},
{
"epoch": 36.10619469026549,
"grad_norm": 0.15083903074264526,
"learning_rate": 8.158e-05,
"loss": 0.0091,
"step": 4080
},
{
"epoch": 36.19469026548673,
"grad_norm": 0.13731341063976288,
"learning_rate": 8.178e-05,
"loss": 0.0083,
"step": 4090
},
{
"epoch": 36.283185840707965,
"grad_norm": 0.164901465177536,
"learning_rate": 8.198e-05,
"loss": 0.0087,
"step": 4100
},
{
"epoch": 36.3716814159292,
"grad_norm": 0.1438867747783661,
"learning_rate": 8.218e-05,
"loss": 0.0082,
"step": 4110
},
{
"epoch": 36.46017699115044,
"grad_norm": 0.15548557043075562,
"learning_rate": 8.238000000000001e-05,
"loss": 0.0083,
"step": 4120
},
{
"epoch": 36.54867256637168,
"grad_norm": 0.14919929206371307,
"learning_rate": 8.258e-05,
"loss": 0.0083,
"step": 4130
},
{
"epoch": 36.63716814159292,
"grad_norm": 0.13696545362472534,
"learning_rate": 8.278e-05,
"loss": 0.0081,
"step": 4140
},
{
"epoch": 36.72566371681416,
"grad_norm": 0.15954339504241943,
"learning_rate": 8.298000000000001e-05,
"loss": 0.0078,
"step": 4150
},
{
"epoch": 36.8141592920354,
"grad_norm": 0.1508786976337433,
"learning_rate": 8.318e-05,
"loss": 0.0093,
"step": 4160
},
{
"epoch": 36.902654867256636,
"grad_norm": 0.1239413172006607,
"learning_rate": 8.338e-05,
"loss": 0.0086,
"step": 4170
},
{
"epoch": 36.991150442477874,
"grad_norm": 0.15482401847839355,
"learning_rate": 8.358e-05,
"loss": 0.0084,
"step": 4180
},
{
"epoch": 37.07964601769911,
"grad_norm": 0.16805481910705566,
"learning_rate": 8.378e-05,
"loss": 0.0091,
"step": 4190
},
{
"epoch": 37.16814159292036,
"grad_norm": 0.21241982281208038,
"learning_rate": 8.398e-05,
"loss": 0.009,
"step": 4200
},
{
"epoch": 37.256637168141594,
"grad_norm": 0.1799900233745575,
"learning_rate": 8.418e-05,
"loss": 0.0088,
"step": 4210
},
{
"epoch": 37.34513274336283,
"grad_norm": 0.1303999274969101,
"learning_rate": 8.438e-05,
"loss": 0.0086,
"step": 4220
},
{
"epoch": 37.43362831858407,
"grad_norm": 0.16632936894893646,
"learning_rate": 8.458e-05,
"loss": 0.0089,
"step": 4230
},
{
"epoch": 37.52212389380531,
"grad_norm": 0.16269199550151825,
"learning_rate": 8.478e-05,
"loss": 0.0088,
"step": 4240
},
{
"epoch": 37.610619469026545,
"grad_norm": 0.15422070026397705,
"learning_rate": 8.498e-05,
"loss": 0.0088,
"step": 4250
},
{
"epoch": 37.69911504424779,
"grad_norm": 0.14053797721862793,
"learning_rate": 8.518000000000001e-05,
"loss": 0.0086,
"step": 4260
},
{
"epoch": 37.78761061946903,
"grad_norm": 0.15649044513702393,
"learning_rate": 8.538e-05,
"loss": 0.0086,
"step": 4270
},
{
"epoch": 37.876106194690266,
"grad_norm": 0.16178472340106964,
"learning_rate": 8.558e-05,
"loss": 0.0092,
"step": 4280
},
{
"epoch": 37.9646017699115,
"grad_norm": 0.19397889077663422,
"learning_rate": 8.578000000000001e-05,
"loss": 0.0081,
"step": 4290
},
{
"epoch": 38.05309734513274,
"grad_norm": 0.18350431323051453,
"learning_rate": 8.598e-05,
"loss": 0.0098,
"step": 4300
},
{
"epoch": 38.14159292035398,
"grad_norm": 0.12597690522670746,
"learning_rate": 8.618e-05,
"loss": 0.0087,
"step": 4310
},
{
"epoch": 38.230088495575224,
"grad_norm": 0.15449459850788116,
"learning_rate": 8.638000000000001e-05,
"loss": 0.0083,
"step": 4320
},
{
"epoch": 38.31858407079646,
"grad_norm": 0.177949458360672,
"learning_rate": 8.658e-05,
"loss": 0.0098,
"step": 4330
},
{
"epoch": 38.4070796460177,
"grad_norm": 0.1619928777217865,
"learning_rate": 8.678e-05,
"loss": 0.0078,
"step": 4340
},
{
"epoch": 38.49557522123894,
"grad_norm": 0.15013732016086578,
"learning_rate": 8.698000000000001e-05,
"loss": 0.0083,
"step": 4350
},
{
"epoch": 38.584070796460175,
"grad_norm": 0.17102967202663422,
"learning_rate": 8.718e-05,
"loss": 0.0085,
"step": 4360
},
{
"epoch": 38.67256637168141,
"grad_norm": 0.14002537727355957,
"learning_rate": 8.738000000000001e-05,
"loss": 0.0083,
"step": 4370
},
{
"epoch": 38.76106194690266,
"grad_norm": 0.14207163453102112,
"learning_rate": 8.758000000000001e-05,
"loss": 0.0083,
"step": 4380
},
{
"epoch": 38.849557522123895,
"grad_norm": 0.16239385306835175,
"learning_rate": 8.778e-05,
"loss": 0.0085,
"step": 4390
},
{
"epoch": 38.93805309734513,
"grad_norm": 0.15160299837589264,
"learning_rate": 8.798000000000001e-05,
"loss": 0.0077,
"step": 4400
},
{
"epoch": 39.02654867256637,
"grad_norm": 0.20175650715827942,
"learning_rate": 8.818000000000001e-05,
"loss": 0.008,
"step": 4410
},
{
"epoch": 39.11504424778761,
"grad_norm": 0.15453539788722992,
"learning_rate": 8.838e-05,
"loss": 0.008,
"step": 4420
},
{
"epoch": 39.203539823008846,
"grad_norm": 0.18454639613628387,
"learning_rate": 8.858000000000001e-05,
"loss": 0.0088,
"step": 4430
},
{
"epoch": 39.29203539823009,
"grad_norm": 0.2145855724811554,
"learning_rate": 8.878000000000001e-05,
"loss": 0.0086,
"step": 4440
},
{
"epoch": 39.38053097345133,
"grad_norm": 0.24812659621238708,
"learning_rate": 8.898e-05,
"loss": 0.0086,
"step": 4450
},
{
"epoch": 39.469026548672566,
"grad_norm": 0.14961618185043335,
"learning_rate": 8.918000000000001e-05,
"loss": 0.0082,
"step": 4460
},
{
"epoch": 39.557522123893804,
"grad_norm": 0.1429668813943863,
"learning_rate": 8.938e-05,
"loss": 0.0083,
"step": 4470
},
{
"epoch": 39.64601769911504,
"grad_norm": 0.12803004682064056,
"learning_rate": 8.958e-05,
"loss": 0.0076,
"step": 4480
},
{
"epoch": 39.73451327433628,
"grad_norm": 0.13182350993156433,
"learning_rate": 8.978000000000001e-05,
"loss": 0.0079,
"step": 4490
},
{
"epoch": 39.823008849557525,
"grad_norm": 0.1274401843547821,
"learning_rate": 8.998e-05,
"loss": 0.0083,
"step": 4500
},
{
"epoch": 39.91150442477876,
"grad_norm": 0.13591696321964264,
"learning_rate": 9.018000000000001e-05,
"loss": 0.0084,
"step": 4510
},
{
"epoch": 40.0,
"grad_norm": 0.1654900312423706,
"learning_rate": 9.038000000000001e-05,
"loss": 0.0082,
"step": 4520
},
{
"epoch": 40.08849557522124,
"grad_norm": 0.1695890575647354,
"learning_rate": 9.058e-05,
"loss": 0.008,
"step": 4530
},
{
"epoch": 40.176991150442475,
"grad_norm": 0.15948446094989777,
"learning_rate": 9.078000000000001e-05,
"loss": 0.0088,
"step": 4540
},
{
"epoch": 40.26548672566372,
"grad_norm": 0.12526074051856995,
"learning_rate": 9.098000000000001e-05,
"loss": 0.0076,
"step": 4550
},
{
"epoch": 40.35398230088496,
"grad_norm": 0.1264461874961853,
"learning_rate": 9.118e-05,
"loss": 0.009,
"step": 4560
},
{
"epoch": 40.442477876106196,
"grad_norm": 0.1087394431233406,
"learning_rate": 9.138e-05,
"loss": 0.0082,
"step": 4570
},
{
"epoch": 40.530973451327434,
"grad_norm": 0.12300600856542587,
"learning_rate": 9.158e-05,
"loss": 0.0081,
"step": 4580
},
{
"epoch": 40.61946902654867,
"grad_norm": 0.2179492563009262,
"learning_rate": 9.178e-05,
"loss": 0.0085,
"step": 4590
},
{
"epoch": 40.70796460176991,
"grad_norm": 0.1449461728334427,
"learning_rate": 9.198e-05,
"loss": 0.0079,
"step": 4600
},
{
"epoch": 40.796460176991154,
"grad_norm": 0.16602952778339386,
"learning_rate": 9.218e-05,
"loss": 0.0083,
"step": 4610
},
{
"epoch": 40.88495575221239,
"grad_norm": 0.1600906252861023,
"learning_rate": 9.238e-05,
"loss": 0.0085,
"step": 4620
},
{
"epoch": 40.97345132743363,
"grad_norm": 0.1462162286043167,
"learning_rate": 9.258e-05,
"loss": 0.0081,
"step": 4630
},
{
"epoch": 41.06194690265487,
"grad_norm": 0.12794408202171326,
"learning_rate": 9.278e-05,
"loss": 0.0089,
"step": 4640
},
{
"epoch": 41.150442477876105,
"grad_norm": 0.1656036525964737,
"learning_rate": 9.298e-05,
"loss": 0.0079,
"step": 4650
},
{
"epoch": 41.23893805309734,
"grad_norm": 0.17827680706977844,
"learning_rate": 9.318e-05,
"loss": 0.0086,
"step": 4660
},
{
"epoch": 41.32743362831859,
"grad_norm": 0.18648305535316467,
"learning_rate": 9.338e-05,
"loss": 0.0088,
"step": 4670
},
{
"epoch": 41.415929203539825,
"grad_norm": 0.14734520018100739,
"learning_rate": 9.358e-05,
"loss": 0.0079,
"step": 4680
},
{
"epoch": 41.50442477876106,
"grad_norm": 0.148806631565094,
"learning_rate": 9.378e-05,
"loss": 0.0082,
"step": 4690
},
{
"epoch": 41.5929203539823,
"grad_norm": 0.17897126078605652,
"learning_rate": 9.398e-05,
"loss": 0.0093,
"step": 4700
},
{
"epoch": 41.68141592920354,
"grad_norm": 0.18038998544216156,
"learning_rate": 9.418e-05,
"loss": 0.0087,
"step": 4710
},
{
"epoch": 41.769911504424776,
"grad_norm": 0.12283733487129211,
"learning_rate": 9.438e-05,
"loss": 0.0078,
"step": 4720
},
{
"epoch": 41.85840707964602,
"grad_norm": 0.1232951357960701,
"learning_rate": 9.458e-05,
"loss": 0.0085,
"step": 4730
},
{
"epoch": 41.94690265486726,
"grad_norm": 0.13569188117980957,
"learning_rate": 9.478e-05,
"loss": 0.0087,
"step": 4740
},
{
"epoch": 42.0353982300885,
"grad_norm": 0.11398052424192429,
"learning_rate": 9.498e-05,
"loss": 0.0084,
"step": 4750
},
{
"epoch": 42.123893805309734,
"grad_norm": 0.13281014561653137,
"learning_rate": 9.518000000000001e-05,
"loss": 0.0076,
"step": 4760
},
{
"epoch": 42.21238938053097,
"grad_norm": 0.17270945012569427,
"learning_rate": 9.538e-05,
"loss": 0.0082,
"step": 4770
},
{
"epoch": 42.30088495575221,
"grad_norm": 0.15716494619846344,
"learning_rate": 9.558e-05,
"loss": 0.0086,
"step": 4780
},
{
"epoch": 42.389380530973455,
"grad_norm": 0.12443627417087555,
"learning_rate": 9.578000000000001e-05,
"loss": 0.0075,
"step": 4790
},
{
"epoch": 42.47787610619469,
"grad_norm": 0.12717044353485107,
"learning_rate": 9.598e-05,
"loss": 0.008,
"step": 4800
},
{
"epoch": 42.56637168141593,
"grad_norm": 0.14651887118816376,
"learning_rate": 9.618e-05,
"loss": 0.0076,
"step": 4810
},
{
"epoch": 42.65486725663717,
"grad_norm": 0.13384868204593658,
"learning_rate": 9.638000000000001e-05,
"loss": 0.0082,
"step": 4820
},
{
"epoch": 42.743362831858406,
"grad_norm": 0.16453304886817932,
"learning_rate": 9.658e-05,
"loss": 0.0083,
"step": 4830
},
{
"epoch": 42.83185840707964,
"grad_norm": 0.17644605040550232,
"learning_rate": 9.678e-05,
"loss": 0.0079,
"step": 4840
},
{
"epoch": 42.92035398230089,
"grad_norm": 0.18505771458148956,
"learning_rate": 9.698000000000001e-05,
"loss": 0.0082,
"step": 4850
},
{
"epoch": 43.008849557522126,
"grad_norm": 0.17535194754600525,
"learning_rate": 9.718e-05,
"loss": 0.0082,
"step": 4860
},
{
"epoch": 43.097345132743364,
"grad_norm": 0.15071967244148254,
"learning_rate": 9.738e-05,
"loss": 0.0081,
"step": 4870
},
{
"epoch": 43.1858407079646,
"grad_norm": 0.11829250305891037,
"learning_rate": 9.758000000000001e-05,
"loss": 0.0077,
"step": 4880
},
{
"epoch": 43.27433628318584,
"grad_norm": 0.1169343814253807,
"learning_rate": 9.778e-05,
"loss": 0.0075,
"step": 4890
},
{
"epoch": 43.36283185840708,
"grad_norm": 0.12891387939453125,
"learning_rate": 9.798000000000001e-05,
"loss": 0.0073,
"step": 4900
},
{
"epoch": 43.45132743362832,
"grad_norm": 0.1629272848367691,
"learning_rate": 9.818000000000001e-05,
"loss": 0.0081,
"step": 4910
},
{
"epoch": 43.53982300884956,
"grad_norm": 0.1464705765247345,
"learning_rate": 9.838e-05,
"loss": 0.0082,
"step": 4920
},
{
"epoch": 43.6283185840708,
"grad_norm": 0.13340625166893005,
"learning_rate": 9.858000000000001e-05,
"loss": 0.0091,
"step": 4930
},
{
"epoch": 43.716814159292035,
"grad_norm": 0.1393061727285385,
"learning_rate": 9.878e-05,
"loss": 0.008,
"step": 4940
},
{
"epoch": 43.80530973451327,
"grad_norm": 0.1344250589609146,
"learning_rate": 9.898e-05,
"loss": 0.0078,
"step": 4950
},
{
"epoch": 43.89380530973451,
"grad_norm": 0.12341362237930298,
"learning_rate": 9.918000000000001e-05,
"loss": 0.0083,
"step": 4960
},
{
"epoch": 43.982300884955755,
"grad_norm": 0.11524364352226257,
"learning_rate": 9.938e-05,
"loss": 0.0083,
"step": 4970
},
{
"epoch": 44.07079646017699,
"grad_norm": 0.1671265959739685,
"learning_rate": 9.958e-05,
"loss": 0.008,
"step": 4980
},
{
"epoch": 44.15929203539823,
"grad_norm": 0.12943553924560547,
"learning_rate": 9.978000000000001e-05,
"loss": 0.0084,
"step": 4990
},
{
"epoch": 44.24778761061947,
"grad_norm": 0.12595415115356445,
"learning_rate": 9.998e-05,
"loss": 0.0077,
"step": 5000
},
{
"epoch": 44.336283185840706,
"grad_norm": 0.1713840663433075,
"learning_rate": 9.999999778549045e-05,
"loss": 0.0087,
"step": 5010
},
{
"epoch": 44.424778761061944,
"grad_norm": 0.16503138840198517,
"learning_rate": 9.999999013039593e-05,
"loss": 0.0085,
"step": 5020
},
{
"epoch": 44.51327433628319,
"grad_norm": 0.14200608432292938,
"learning_rate": 9.999997700737766e-05,
"loss": 0.0082,
"step": 5030
},
{
"epoch": 44.60176991150443,
"grad_norm": 0.13645635545253754,
"learning_rate": 9.999995841643709e-05,
"loss": 0.0082,
"step": 5040
},
{
"epoch": 44.690265486725664,
"grad_norm": 0.12640200555324554,
"learning_rate": 9.999993435757623e-05,
"loss": 0.0085,
"step": 5050
},
{
"epoch": 44.7787610619469,
"grad_norm": 0.11563556641340256,
"learning_rate": 9.999990483079773e-05,
"loss": 0.0083,
"step": 5060
},
{
"epoch": 44.86725663716814,
"grad_norm": 0.1313731074333191,
"learning_rate": 9.999986983610481e-05,
"loss": 0.0075,
"step": 5070
},
{
"epoch": 44.95575221238938,
"grad_norm": 0.1426715850830078,
"learning_rate": 9.99998293735013e-05,
"loss": 0.0075,
"step": 5080
},
{
"epoch": 45.04424778761062,
"grad_norm": 0.1411140263080597,
"learning_rate": 9.999978344299161e-05,
"loss": 0.008,
"step": 5090
},
{
"epoch": 45.13274336283186,
"grad_norm": 0.1875256448984146,
"learning_rate": 9.99997320445808e-05,
"loss": 0.0083,
"step": 5100
},
{
"epoch": 45.2212389380531,
"grad_norm": 0.1731545478105545,
"learning_rate": 9.999967517827444e-05,
"loss": 0.0081,
"step": 5110
},
{
"epoch": 45.309734513274336,
"grad_norm": 0.12668772041797638,
"learning_rate": 9.999961284407879e-05,
"loss": 0.0079,
"step": 5120
},
{
"epoch": 45.39823008849557,
"grad_norm": 0.1660381704568863,
"learning_rate": 9.999954504200067e-05,
"loss": 0.0085,
"step": 5130
},
{
"epoch": 45.48672566371681,
"grad_norm": 0.159927099943161,
"learning_rate": 9.999947177204744e-05,
"loss": 0.0079,
"step": 5140
},
{
"epoch": 45.575221238938056,
"grad_norm": 0.08492934703826904,
"learning_rate": 9.999939303422718e-05,
"loss": 0.0078,
"step": 5150
},
{
"epoch": 45.663716814159294,
"grad_norm": 0.13286644220352173,
"learning_rate": 9.999930882854847e-05,
"loss": 0.0077,
"step": 5160
},
{
"epoch": 45.75221238938053,
"grad_norm": 0.165427565574646,
"learning_rate": 9.999921915502051e-05,
"loss": 0.0078,
"step": 5170
},
{
"epoch": 45.84070796460177,
"grad_norm": 0.1565057635307312,
"learning_rate": 9.99991240136531e-05,
"loss": 0.0084,
"step": 5180
},
{
"epoch": 45.92920353982301,
"grad_norm": 0.13537095487117767,
"learning_rate": 9.999902340445668e-05,
"loss": 0.0078,
"step": 5190
},
{
"epoch": 46.017699115044245,
"grad_norm": 0.1399421989917755,
"learning_rate": 9.999891732744224e-05,
"loss": 0.0076,
"step": 5200
},
{
"epoch": 46.10619469026549,
"grad_norm": 0.14480353891849518,
"learning_rate": 9.999880578262135e-05,
"loss": 0.0076,
"step": 5210
},
{
"epoch": 46.19469026548673,
"grad_norm": 0.15610076487064362,
"learning_rate": 9.999868877000624e-05,
"loss": 0.0076,
"step": 5220
},
{
"epoch": 46.283185840707965,
"grad_norm": 0.18092435598373413,
"learning_rate": 9.99985662896097e-05,
"loss": 0.0079,
"step": 5230
},
{
"epoch": 46.3716814159292,
"grad_norm": 0.12604007124900818,
"learning_rate": 9.999843834144513e-05,
"loss": 0.0078,
"step": 5240
},
{
"epoch": 46.46017699115044,
"grad_norm": 0.21595871448516846,
"learning_rate": 9.99983049255265e-05,
"loss": 0.008,
"step": 5250
},
{
"epoch": 46.54867256637168,
"grad_norm": 0.16414035856723785,
"learning_rate": 9.999816604186843e-05,
"loss": 0.0076,
"step": 5260
},
{
"epoch": 46.63716814159292,
"grad_norm": 0.14966443181037903,
"learning_rate": 9.999802169048609e-05,
"loss": 0.0085,
"step": 5270
},
{
"epoch": 46.72566371681416,
"grad_norm": 0.1894393116235733,
"learning_rate": 9.999787187139527e-05,
"loss": 0.0078,
"step": 5280
},
{
"epoch": 46.8141592920354,
"grad_norm": 0.1692032665014267,
"learning_rate": 9.999771658461234e-05,
"loss": 0.0079,
"step": 5290
},
{
"epoch": 46.902654867256636,
"grad_norm": 0.18700039386749268,
"learning_rate": 9.999755583015431e-05,
"loss": 0.008,
"step": 5300
},
{
"epoch": 46.991150442477874,
"grad_norm": 0.1777140349149704,
"learning_rate": 9.999738960803874e-05,
"loss": 0.0087,
"step": 5310
},
{
"epoch": 47.07964601769911,
"grad_norm": 0.1733047068119049,
"learning_rate": 9.99972179182838e-05,
"loss": 0.0079,
"step": 5320
},
{
"epoch": 47.16814159292036,
"grad_norm": 0.16074609756469727,
"learning_rate": 9.99970407609083e-05,
"loss": 0.0083,
"step": 5330
},
{
"epoch": 47.256637168141594,
"grad_norm": 0.11764027923345566,
"learning_rate": 9.999685813593159e-05,
"loss": 0.0079,
"step": 5340
},
{
"epoch": 47.34513274336283,
"grad_norm": 0.14405815303325653,
"learning_rate": 9.999667004337362e-05,
"loss": 0.0075,
"step": 5350
},
{
"epoch": 47.43362831858407,
"grad_norm": 0.13123449683189392,
"learning_rate": 9.9996476483255e-05,
"loss": 0.0071,
"step": 5360
},
{
"epoch": 47.52212389380531,
"grad_norm": 0.1139673963189125,
"learning_rate": 9.999627745559688e-05,
"loss": 0.0078,
"step": 5370
},
{
"epoch": 47.610619469026545,
"grad_norm": 0.14024882018566132,
"learning_rate": 9.999607296042101e-05,
"loss": 0.0072,
"step": 5380
},
{
"epoch": 47.69911504424779,
"grad_norm": 0.14336134493350983,
"learning_rate": 9.99958629977498e-05,
"loss": 0.0081,
"step": 5390
},
{
"epoch": 47.78761061946903,
"grad_norm": 0.15486717224121094,
"learning_rate": 9.999564756760615e-05,
"loss": 0.0078,
"step": 5400
},
{
"epoch": 47.876106194690266,
"grad_norm": 0.14090169966220856,
"learning_rate": 9.999542667001366e-05,
"loss": 0.0085,
"step": 5410
},
{
"epoch": 47.9646017699115,
"grad_norm": 0.13845594227313995,
"learning_rate": 9.999520030499647e-05,
"loss": 0.0078,
"step": 5420
},
{
"epoch": 48.05309734513274,
"grad_norm": 0.14902648329734802,
"learning_rate": 9.999496847257936e-05,
"loss": 0.0078,
"step": 5430
},
{
"epoch": 48.14159292035398,
"grad_norm": 0.13788892328739166,
"learning_rate": 9.999473117278764e-05,
"loss": 0.007,
"step": 5440
},
{
"epoch": 48.230088495575224,
"grad_norm": 0.16489292681217194,
"learning_rate": 9.999448840564731e-05,
"loss": 0.0078,
"step": 5450
},
{
"epoch": 48.31858407079646,
"grad_norm": 0.13535885512828827,
"learning_rate": 9.999424017118488e-05,
"loss": 0.0082,
"step": 5460
},
{
"epoch": 48.4070796460177,
"grad_norm": 0.14194975793361664,
"learning_rate": 9.999398646942751e-05,
"loss": 0.0075,
"step": 5470
},
{
"epoch": 48.49557522123894,
"grad_norm": 0.12652724981307983,
"learning_rate": 9.999372730040296e-05,
"loss": 0.0076,
"step": 5480
},
{
"epoch": 48.584070796460175,
"grad_norm": 0.17050671577453613,
"learning_rate": 9.999346266413953e-05,
"loss": 0.0081,
"step": 5490
},
{
"epoch": 48.67256637168141,
"grad_norm": 0.17964069545269012,
"learning_rate": 9.99931925606662e-05,
"loss": 0.008,
"step": 5500
},
{
"epoch": 48.76106194690266,
"grad_norm": 0.1285768449306488,
"learning_rate": 9.99929169900125e-05,
"loss": 0.0073,
"step": 5510
},
{
"epoch": 48.849557522123895,
"grad_norm": 0.12192533910274506,
"learning_rate": 9.999263595220855e-05,
"loss": 0.008,
"step": 5520
},
{
"epoch": 48.93805309734513,
"grad_norm": 0.14079219102859497,
"learning_rate": 9.99923494472851e-05,
"loss": 0.0081,
"step": 5530
},
{
"epoch": 49.02654867256637,
"grad_norm": 0.11866286396980286,
"learning_rate": 9.999205747527348e-05,
"loss": 0.0081,
"step": 5540
},
{
"epoch": 49.11504424778761,
"grad_norm": 0.17622506618499756,
"learning_rate": 9.999176003620561e-05,
"loss": 0.0074,
"step": 5550
},
{
"epoch": 49.203539823008846,
"grad_norm": 0.1561884731054306,
"learning_rate": 9.999145713011405e-05,
"loss": 0.0071,
"step": 5560
},
{
"epoch": 49.29203539823009,
"grad_norm": 0.15959735214710236,
"learning_rate": 9.999114875703186e-05,
"loss": 0.0072,
"step": 5570
},
{
"epoch": 49.38053097345133,
"grad_norm": 0.13429850339889526,
"learning_rate": 9.999083491699281e-05,
"loss": 0.0075,
"step": 5580
},
{
"epoch": 49.469026548672566,
"grad_norm": 0.13098657131195068,
"learning_rate": 9.999051561003123e-05,
"loss": 0.0068,
"step": 5590
},
{
"epoch": 49.557522123893804,
"grad_norm": 0.1348903477191925,
"learning_rate": 9.999019083618202e-05,
"loss": 0.008,
"step": 5600
},
{
"epoch": 49.64601769911504,
"grad_norm": 0.14096537232398987,
"learning_rate": 9.99898605954807e-05,
"loss": 0.0071,
"step": 5610
},
{
"epoch": 49.73451327433628,
"grad_norm": 0.09773388504981995,
"learning_rate": 9.998952488796338e-05,
"loss": 0.0071,
"step": 5620
},
{
"epoch": 49.823008849557525,
"grad_norm": 0.1066397875547409,
"learning_rate": 9.998918371366676e-05,
"loss": 0.007,
"step": 5630
},
{
"epoch": 49.91150442477876,
"grad_norm": 0.13253827393054962,
"learning_rate": 9.99888370726282e-05,
"loss": 0.0081,
"step": 5640
},
{
"epoch": 50.0,
"grad_norm": 0.10580658912658691,
"learning_rate": 9.998848496488556e-05,
"loss": 0.0074,
"step": 5650
},
{
"epoch": 50.08849557522124,
"grad_norm": 0.12896136939525604,
"learning_rate": 9.998812739047736e-05,
"loss": 0.0071,
"step": 5660
},
{
"epoch": 50.176991150442475,
"grad_norm": 0.12507116794586182,
"learning_rate": 9.99877643494427e-05,
"loss": 0.0081,
"step": 5670
},
{
"epoch": 50.26548672566372,
"grad_norm": 0.13797657191753387,
"learning_rate": 9.998739584182128e-05,
"loss": 0.0067,
"step": 5680
},
{
"epoch": 50.35398230088496,
"grad_norm": 0.1256704032421112,
"learning_rate": 9.998702186765342e-05,
"loss": 0.0073,
"step": 5690
},
{
"epoch": 50.442477876106196,
"grad_norm": 0.14041030406951904,
"learning_rate": 9.998664242698e-05,
"loss": 0.0078,
"step": 5700
},
{
"epoch": 50.530973451327434,
"grad_norm": 0.13407254219055176,
"learning_rate": 9.998625751984251e-05,
"loss": 0.0072,
"step": 5710
},
{
"epoch": 50.61946902654867,
"grad_norm": 0.11884211003780365,
"learning_rate": 9.998586714628307e-05,
"loss": 0.0073,
"step": 5720
},
{
"epoch": 50.70796460176991,
"grad_norm": 0.10131306946277618,
"learning_rate": 9.998547130634432e-05,
"loss": 0.0068,
"step": 5730
},
{
"epoch": 50.796460176991154,
"grad_norm": 0.12167004495859146,
"learning_rate": 9.99850700000696e-05,
"loss": 0.0066,
"step": 5740
},
{
"epoch": 50.88495575221239,
"grad_norm": 0.10352536290884018,
"learning_rate": 9.998466322750278e-05,
"loss": 0.0071,
"step": 5750
},
{
"epoch": 50.97345132743363,
"grad_norm": 0.10714670270681381,
"learning_rate": 9.998425098868834e-05,
"loss": 0.0075,
"step": 5760
},
{
"epoch": 51.06194690265487,
"grad_norm": 0.11024036258459091,
"learning_rate": 9.998383328367136e-05,
"loss": 0.0071,
"step": 5770
},
{
"epoch": 51.150442477876105,
"grad_norm": 0.10746333748102188,
"learning_rate": 9.99834101124975e-05,
"loss": 0.0071,
"step": 5780
},
{
"epoch": 51.23893805309734,
"grad_norm": 0.11430933326482773,
"learning_rate": 9.998298147521309e-05,
"loss": 0.0067,
"step": 5790
},
{
"epoch": 51.32743362831859,
"grad_norm": 0.13216055929660797,
"learning_rate": 9.998254737186496e-05,
"loss": 0.0068,
"step": 5800
},
{
"epoch": 51.415929203539825,
"grad_norm": 0.14904490113258362,
"learning_rate": 9.99821078025006e-05,
"loss": 0.0075,
"step": 5810
},
{
"epoch": 51.50442477876106,
"grad_norm": 0.1333301067352295,
"learning_rate": 9.998166276716807e-05,
"loss": 0.0075,
"step": 5820
},
{
"epoch": 51.5929203539823,
"grad_norm": 0.13811925053596497,
"learning_rate": 9.998121226591606e-05,
"loss": 0.0069,
"step": 5830
},
{
"epoch": 51.68141592920354,
"grad_norm": 0.1468496173620224,
"learning_rate": 9.998075629879382e-05,
"loss": 0.0072,
"step": 5840
},
{
"epoch": 51.769911504424776,
"grad_norm": 0.14314375817775726,
"learning_rate": 9.99802948658512e-05,
"loss": 0.0075,
"step": 5850
},
{
"epoch": 51.85840707964602,
"grad_norm": 0.13941283524036407,
"learning_rate": 9.99798279671387e-05,
"loss": 0.0077,
"step": 5860
},
{
"epoch": 51.94690265486726,
"grad_norm": 0.13589324057102203,
"learning_rate": 9.997935560270734e-05,
"loss": 0.0074,
"step": 5870
},
{
"epoch": 52.0353982300885,
"grad_norm": 0.14620272815227509,
"learning_rate": 9.997887777260879e-05,
"loss": 0.0075,
"step": 5880
},
{
"epoch": 52.123893805309734,
"grad_norm": 0.12211687862873077,
"learning_rate": 9.997839447689532e-05,
"loss": 0.0068,
"step": 5890
},
{
"epoch": 52.21238938053097,
"grad_norm": 0.14531292021274567,
"learning_rate": 9.997790571561978e-05,
"loss": 0.0073,
"step": 5900
},
{
"epoch": 52.30088495575221,
"grad_norm": 0.19999101758003235,
"learning_rate": 9.99774114888356e-05,
"loss": 0.0079,
"step": 5910
},
{
"epoch": 52.389380530973455,
"grad_norm": 0.12175704538822174,
"learning_rate": 9.997691179659684e-05,
"loss": 0.0069,
"step": 5920
},
{
"epoch": 52.47787610619469,
"grad_norm": 0.14504700899124146,
"learning_rate": 9.997640663895815e-05,
"loss": 0.0071,
"step": 5930
},
{
"epoch": 52.56637168141593,
"grad_norm": 0.11807917058467865,
"learning_rate": 9.997589601597477e-05,
"loss": 0.0071,
"step": 5940
},
{
"epoch": 52.65486725663717,
"grad_norm": 0.14919745922088623,
"learning_rate": 9.997537992770252e-05,
"loss": 0.0076,
"step": 5950
},
{
"epoch": 52.743362831858406,
"grad_norm": 0.16913174092769623,
"learning_rate": 9.997485837419788e-05,
"loss": 0.0079,
"step": 5960
},
{
"epoch": 52.83185840707964,
"grad_norm": 0.1366829127073288,
"learning_rate": 9.997433135551786e-05,
"loss": 0.007,
"step": 5970
},
{
"epoch": 52.92035398230089,
"grad_norm": 0.18243227899074554,
"learning_rate": 9.997379887172009e-05,
"loss": 0.0074,
"step": 5980
},
{
"epoch": 53.008849557522126,
"grad_norm": 0.14823484420776367,
"learning_rate": 9.997326092286281e-05,
"loss": 0.0073,
"step": 5990
},
{
"epoch": 53.097345132743364,
"grad_norm": 0.1373046487569809,
"learning_rate": 9.997271750900486e-05,
"loss": 0.0075,
"step": 6000
},
{
"epoch": 53.1858407079646,
"grad_norm": 0.1174549013376236,
"learning_rate": 9.997216863020565e-05,
"loss": 0.007,
"step": 6010
},
{
"epoch": 53.27433628318584,
"grad_norm": 0.14685441553592682,
"learning_rate": 9.99716142865252e-05,
"loss": 0.0072,
"step": 6020
},
{
"epoch": 53.36283185840708,
"grad_norm": 0.12311328202486038,
"learning_rate": 9.997105447802415e-05,
"loss": 0.007,
"step": 6030
},
{
"epoch": 53.45132743362832,
"grad_norm": 0.14539910852909088,
"learning_rate": 9.997048920476373e-05,
"loss": 0.007,
"step": 6040
},
{
"epoch": 53.53982300884956,
"grad_norm": 0.1585048884153366,
"learning_rate": 9.996991846680572e-05,
"loss": 0.007,
"step": 6050
},
{
"epoch": 53.6283185840708,
"grad_norm": 0.1574210524559021,
"learning_rate": 9.996934226421257e-05,
"loss": 0.0066,
"step": 6060
},
{
"epoch": 53.716814159292035,
"grad_norm": 0.13777823746204376,
"learning_rate": 9.996876059704726e-05,
"loss": 0.0073,
"step": 6070
},
{
"epoch": 53.80530973451327,
"grad_norm": 0.1269245743751526,
"learning_rate": 9.996817346537343e-05,
"loss": 0.0076,
"step": 6080
},
{
"epoch": 53.89380530973451,
"grad_norm": 0.12032314389944077,
"learning_rate": 9.996758086925526e-05,
"loss": 0.0077,
"step": 6090
},
{
"epoch": 53.982300884955755,
"grad_norm": 0.1316395401954651,
"learning_rate": 9.996698280875759e-05,
"loss": 0.007,
"step": 6100
},
{
"epoch": 54.07079646017699,
"grad_norm": 0.13027605414390564,
"learning_rate": 9.99663792839458e-05,
"loss": 0.0082,
"step": 6110
},
{
"epoch": 54.15929203539823,
"grad_norm": 0.09656640887260437,
"learning_rate": 9.99657702948859e-05,
"loss": 0.0066,
"step": 6120
},
{
"epoch": 54.24778761061947,
"grad_norm": 0.10608144849538803,
"learning_rate": 9.996515584164448e-05,
"loss": 0.0069,
"step": 6130
},
{
"epoch": 54.336283185840706,
"grad_norm": 0.13142161071300507,
"learning_rate": 9.996453592428873e-05,
"loss": 0.0078,
"step": 6140
},
{
"epoch": 54.424778761061944,
"grad_norm": 0.15804174542427063,
"learning_rate": 9.996391054288646e-05,
"loss": 0.0071,
"step": 6150
},
{
"epoch": 54.51327433628319,
"grad_norm": 0.13912999629974365,
"learning_rate": 9.996327969750605e-05,
"loss": 0.0079,
"step": 6160
},
{
"epoch": 54.60176991150443,
"grad_norm": 0.15766675770282745,
"learning_rate": 9.996264338821649e-05,
"loss": 0.0073,
"step": 6170
},
{
"epoch": 54.690265486725664,
"grad_norm": 0.11186890304088593,
"learning_rate": 9.996200161508735e-05,
"loss": 0.0069,
"step": 6180
},
{
"epoch": 54.7787610619469,
"grad_norm": 0.10806110501289368,
"learning_rate": 9.996135437818885e-05,
"loss": 0.007,
"step": 6190
},
{
"epoch": 54.86725663716814,
"grad_norm": 0.16035513579845428,
"learning_rate": 9.996070167759175e-05,
"loss": 0.007,
"step": 6200
},
{
"epoch": 54.95575221238938,
"grad_norm": 0.16528643667697906,
"learning_rate": 9.996004351336743e-05,
"loss": 0.0077,
"step": 6210
},
{
"epoch": 55.04424778761062,
"grad_norm": 0.15115606784820557,
"learning_rate": 9.995937988558785e-05,
"loss": 0.0075,
"step": 6220
},
{
"epoch": 55.13274336283186,
"grad_norm": 0.13698093593120575,
"learning_rate": 9.995871079432561e-05,
"loss": 0.0073,
"step": 6230
},
{
"epoch": 55.2212389380531,
"grad_norm": 0.13265031576156616,
"learning_rate": 9.995803623965389e-05,
"loss": 0.0081,
"step": 6240
},
{
"epoch": 55.309734513274336,
"grad_norm": 0.18100149929523468,
"learning_rate": 9.995735622164641e-05,
"loss": 0.0072,
"step": 6250
},
{
"epoch": 55.39823008849557,
"grad_norm": 0.1533287614583969,
"learning_rate": 9.995667074037758e-05,
"loss": 0.0067,
"step": 6260
},
{
"epoch": 55.48672566371681,
"grad_norm": 0.13572955131530762,
"learning_rate": 9.995597979592232e-05,
"loss": 0.0077,
"step": 6270
},
{
"epoch": 55.575221238938056,
"grad_norm": 0.10485535860061646,
"learning_rate": 9.995528338835625e-05,
"loss": 0.0071,
"step": 6280
},
{
"epoch": 55.663716814159294,
"grad_norm": 0.10920023918151855,
"learning_rate": 9.995458151775547e-05,
"loss": 0.0071,
"step": 6290
},
{
"epoch": 55.75221238938053,
"grad_norm": 0.10756077617406845,
"learning_rate": 9.995387418419677e-05,
"loss": 0.0067,
"step": 6300
},
{
"epoch": 55.84070796460177,
"grad_norm": 0.12476219236850739,
"learning_rate": 9.99531613877575e-05,
"loss": 0.007,
"step": 6310
},
{
"epoch": 55.92920353982301,
"grad_norm": 0.11445712298154831,
"learning_rate": 9.995244312851559e-05,
"loss": 0.007,
"step": 6320
},
{
"epoch": 56.017699115044245,
"grad_norm": 0.1060228943824768,
"learning_rate": 9.995171940654961e-05,
"loss": 0.007,
"step": 6330
},
{
"epoch": 56.10619469026549,
"grad_norm": 0.15434004366397858,
"learning_rate": 9.995099022193871e-05,
"loss": 0.0075,
"step": 6340
},
{
"epoch": 56.19469026548673,
"grad_norm": 0.10044840723276138,
"learning_rate": 9.995025557476261e-05,
"loss": 0.0065,
"step": 6350
},
{
"epoch": 56.283185840707965,
"grad_norm": 0.10664913058280945,
"learning_rate": 9.994951546510165e-05,
"loss": 0.0077,
"step": 6360
},
{
"epoch": 56.3716814159292,
"grad_norm": 0.10567296296358109,
"learning_rate": 9.994876989303679e-05,
"loss": 0.0067,
"step": 6370
},
{
"epoch": 56.46017699115044,
"grad_norm": 0.12378619611263275,
"learning_rate": 9.994801885864955e-05,
"loss": 0.0071,
"step": 6380
},
{
"epoch": 56.54867256637168,
"grad_norm": 0.1165599524974823,
"learning_rate": 9.994726236202205e-05,
"loss": 0.0072,
"step": 6390
},
{
"epoch": 56.63716814159292,
"grad_norm": 0.13224077224731445,
"learning_rate": 9.994650040323704e-05,
"loss": 0.0064,
"step": 6400
},
{
"epoch": 56.72566371681416,
"grad_norm": 0.12277481704950333,
"learning_rate": 9.994573298237784e-05,
"loss": 0.0072,
"step": 6410
},
{
"epoch": 56.8141592920354,
"grad_norm": 0.11215825378894806,
"learning_rate": 9.994496009952837e-05,
"loss": 0.0069,
"step": 6420
},
{
"epoch": 56.902654867256636,
"grad_norm": 0.13313978910446167,
"learning_rate": 9.994418175477316e-05,
"loss": 0.0068,
"step": 6430
},
{
"epoch": 56.991150442477874,
"grad_norm": 0.1526990532875061,
"learning_rate": 9.994339794819733e-05,
"loss": 0.0066,
"step": 6440
},
{
"epoch": 57.07964601769911,
"grad_norm": 0.11533765494823456,
"learning_rate": 9.994260867988658e-05,
"loss": 0.0069,
"step": 6450
},
{
"epoch": 57.16814159292036,
"grad_norm": 0.1275179386138916,
"learning_rate": 9.994181394992723e-05,
"loss": 0.0072,
"step": 6460
},
{
"epoch": 57.256637168141594,
"grad_norm": 0.12770402431488037,
"learning_rate": 9.994101375840618e-05,
"loss": 0.007,
"step": 6470
},
{
"epoch": 57.34513274336283,
"grad_norm": 0.13415737450122833,
"learning_rate": 9.994020810541098e-05,
"loss": 0.0069,
"step": 6480
},
{
"epoch": 57.43362831858407,
"grad_norm": 0.11421578377485275,
"learning_rate": 9.99393969910297e-05,
"loss": 0.0062,
"step": 6490
},
{
"epoch": 57.52212389380531,
"grad_norm": 0.1381864696741104,
"learning_rate": 9.993858041535104e-05,
"loss": 0.0067,
"step": 6500
},
{
"epoch": 57.610619469026545,
"grad_norm": 0.12277805805206299,
"learning_rate": 9.99377583784643e-05,
"loss": 0.0071,
"step": 6510
},
{
"epoch": 57.69911504424779,
"grad_norm": 0.11379420757293701,
"learning_rate": 9.993693088045939e-05,
"loss": 0.0067,
"step": 6520
},
{
"epoch": 57.78761061946903,
"grad_norm": 0.10273974388837814,
"learning_rate": 9.99360979214268e-05,
"loss": 0.006,
"step": 6530
},
{
"epoch": 57.876106194690266,
"grad_norm": 0.09692050516605377,
"learning_rate": 9.99352595014576e-05,
"loss": 0.0068,
"step": 6540
},
{
"epoch": 57.9646017699115,
"grad_norm": 0.10250181704759598,
"learning_rate": 9.993441562064354e-05,
"loss": 0.0064,
"step": 6550
},
{
"epoch": 58.05309734513274,
"grad_norm": 0.10750356316566467,
"learning_rate": 9.993356627907685e-05,
"loss": 0.0071,
"step": 6560
},
{
"epoch": 58.14159292035398,
"grad_norm": 0.13370372354984283,
"learning_rate": 9.99327114768504e-05,
"loss": 0.0067,
"step": 6570
},
{
"epoch": 58.230088495575224,
"grad_norm": 0.14788718521595,
"learning_rate": 9.99318512140577e-05,
"loss": 0.0067,
"step": 6580
},
{
"epoch": 58.31858407079646,
"grad_norm": 0.15469089150428772,
"learning_rate": 9.993098549079284e-05,
"loss": 0.0073,
"step": 6590
},
{
"epoch": 58.4070796460177,
"grad_norm": 0.16022972762584686,
"learning_rate": 9.993011430715047e-05,
"loss": 0.0073,
"step": 6600
},
{
"epoch": 58.49557522123894,
"grad_norm": 0.12116739153862,
"learning_rate": 9.992923766322586e-05,
"loss": 0.0068,
"step": 6610
},
{
"epoch": 58.584070796460175,
"grad_norm": 0.12725108861923218,
"learning_rate": 9.99283555591149e-05,
"loss": 0.0065,
"step": 6620
},
{
"epoch": 58.67256637168141,
"grad_norm": 0.11936835944652557,
"learning_rate": 9.992746799491404e-05,
"loss": 0.0065,
"step": 6630
},
{
"epoch": 58.76106194690266,
"grad_norm": 0.13782432675361633,
"learning_rate": 9.992657497072033e-05,
"loss": 0.0069,
"step": 6640
},
{
"epoch": 58.849557522123895,
"grad_norm": 0.12118148803710938,
"learning_rate": 9.992567648663147e-05,
"loss": 0.007,
"step": 6650
},
{
"epoch": 58.93805309734513,
"grad_norm": 0.12604814767837524,
"learning_rate": 9.992477254274568e-05,
"loss": 0.0061,
"step": 6660
},
{
"epoch": 59.02654867256637,
"grad_norm": 0.13500675559043884,
"learning_rate": 9.992386313916183e-05,
"loss": 0.0067,
"step": 6670
},
{
"epoch": 59.11504424778761,
"grad_norm": 0.12500520050525665,
"learning_rate": 9.992294827597934e-05,
"loss": 0.0072,
"step": 6680
},
{
"epoch": 59.203539823008846,
"grad_norm": 0.1168852224946022,
"learning_rate": 9.992202795329831e-05,
"loss": 0.0062,
"step": 6690
},
{
"epoch": 59.29203539823009,
"grad_norm": 0.12165257334709167,
"learning_rate": 9.992110217121936e-05,
"loss": 0.0065,
"step": 6700
},
{
"epoch": 59.38053097345133,
"grad_norm": 0.13444724678993225,
"learning_rate": 9.992017092984372e-05,
"loss": 0.0068,
"step": 6710
},
{
"epoch": 59.469026548672566,
"grad_norm": 0.15891699492931366,
"learning_rate": 9.991923422927326e-05,
"loss": 0.0071,
"step": 6720
},
{
"epoch": 59.557522123893804,
"grad_norm": 0.1080576628446579,
"learning_rate": 9.991829206961037e-05,
"loss": 0.0068,
"step": 6730
},
{
"epoch": 59.64601769911504,
"grad_norm": 0.10575453191995621,
"learning_rate": 9.991734445095813e-05,
"loss": 0.0066,
"step": 6740
},
{
"epoch": 59.73451327433628,
"grad_norm": 0.08989618718624115,
"learning_rate": 9.991639137342015e-05,
"loss": 0.007,
"step": 6750
},
{
"epoch": 59.823008849557525,
"grad_norm": 0.12456994503736496,
"learning_rate": 9.991543283710064e-05,
"loss": 0.007,
"step": 6760
},
{
"epoch": 59.91150442477876,
"grad_norm": 0.10034193098545074,
"learning_rate": 9.991446884210445e-05,
"loss": 0.007,
"step": 6770
},
{
"epoch": 60.0,
"grad_norm": 0.11604133248329163,
"learning_rate": 9.9913499388537e-05,
"loss": 0.0067,
"step": 6780
},
{
"epoch": 60.08849557522124,
"grad_norm": 0.13283856213092804,
"learning_rate": 9.99125244765043e-05,
"loss": 0.0067,
"step": 6790
},
{
"epoch": 60.176991150442475,
"grad_norm": 0.11176202446222305,
"learning_rate": 9.991154410611296e-05,
"loss": 0.0067,
"step": 6800
},
{
"epoch": 60.26548672566372,
"grad_norm": 0.11676081269979477,
"learning_rate": 9.99105582774702e-05,
"loss": 0.0068,
"step": 6810
},
{
"epoch": 60.35398230088496,
"grad_norm": 0.1053016260266304,
"learning_rate": 9.990956699068384e-05,
"loss": 0.0066,
"step": 6820
},
{
"epoch": 60.442477876106196,
"grad_norm": 0.08537508547306061,
"learning_rate": 9.990857024586224e-05,
"loss": 0.0061,
"step": 6830
},
{
"epoch": 60.530973451327434,
"grad_norm": 0.08726681023836136,
"learning_rate": 9.990756804311446e-05,
"loss": 0.0067,
"step": 6840
},
{
"epoch": 60.61946902654867,
"grad_norm": 0.13327188789844513,
"learning_rate": 9.990656038255006e-05,
"loss": 0.0065,
"step": 6850
},
{
"epoch": 60.70796460176991,
"grad_norm": 0.16907227039337158,
"learning_rate": 9.990554726427926e-05,
"loss": 0.0066,
"step": 6860
},
{
"epoch": 60.796460176991154,
"grad_norm": 0.12004328519105911,
"learning_rate": 9.990452868841284e-05,
"loss": 0.0069,
"step": 6870
},
{
"epoch": 60.88495575221239,
"grad_norm": 0.10381284356117249,
"learning_rate": 9.99035046550622e-05,
"loss": 0.0066,
"step": 6880
},
{
"epoch": 60.97345132743363,
"grad_norm": 0.11252574622631073,
"learning_rate": 9.99024751643393e-05,
"loss": 0.0065,
"step": 6890
},
{
"epoch": 61.06194690265487,
"grad_norm": 0.14611081779003143,
"learning_rate": 9.990144021635677e-05,
"loss": 0.0067,
"step": 6900
},
{
"epoch": 61.150442477876105,
"grad_norm": 0.09365980327129364,
"learning_rate": 9.990039981122775e-05,
"loss": 0.0068,
"step": 6910
},
{
"epoch": 61.23893805309734,
"grad_norm": 0.13181225955486298,
"learning_rate": 9.989935394906602e-05,
"loss": 0.0068,
"step": 6920
},
{
"epoch": 61.32743362831859,
"grad_norm": 0.1608838140964508,
"learning_rate": 9.989830262998598e-05,
"loss": 0.0067,
"step": 6930
},
{
"epoch": 61.415929203539825,
"grad_norm": 0.14466796815395355,
"learning_rate": 9.989724585410259e-05,
"loss": 0.0068,
"step": 6940
},
{
"epoch": 61.50442477876106,
"grad_norm": 0.15593942999839783,
"learning_rate": 9.989618362153139e-05,
"loss": 0.0067,
"step": 6950
},
{
"epoch": 61.5929203539823,
"grad_norm": 0.13513709604740143,
"learning_rate": 9.989511593238859e-05,
"loss": 0.0072,
"step": 6960
},
{
"epoch": 61.68141592920354,
"grad_norm": 0.10657244175672531,
"learning_rate": 9.98940427867909e-05,
"loss": 0.0067,
"step": 6970
},
{
"epoch": 61.769911504424776,
"grad_norm": 0.11525707691907883,
"learning_rate": 9.989296418485573e-05,
"loss": 0.0067,
"step": 6980
},
{
"epoch": 61.85840707964602,
"grad_norm": 0.10681252926588058,
"learning_rate": 9.989188012670101e-05,
"loss": 0.0063,
"step": 6990
},
{
"epoch": 61.94690265486726,
"grad_norm": 0.1459273248910904,
"learning_rate": 9.989079061244528e-05,
"loss": 0.0065,
"step": 7000
},
{
"epoch": 62.0353982300885,
"grad_norm": 0.1311957687139511,
"learning_rate": 9.988969564220769e-05,
"loss": 0.0068,
"step": 7010
},
{
"epoch": 62.123893805309734,
"grad_norm": 0.10953950136899948,
"learning_rate": 9.988859521610801e-05,
"loss": 0.0074,
"step": 7020
},
{
"epoch": 62.21238938053097,
"grad_norm": 0.13867156207561493,
"learning_rate": 9.988748933426656e-05,
"loss": 0.0069,
"step": 7030
},
{
"epoch": 62.30088495575221,
"grad_norm": 0.1676408052444458,
"learning_rate": 9.988637799680428e-05,
"loss": 0.007,
"step": 7040
},
{
"epoch": 62.389380530973455,
"grad_norm": 0.18464261293411255,
"learning_rate": 9.98852612038427e-05,
"loss": 0.0076,
"step": 7050
},
{
"epoch": 62.47787610619469,
"grad_norm": 0.13487857580184937,
"learning_rate": 9.988413895550397e-05,
"loss": 0.0076,
"step": 7060
},
{
"epoch": 62.56637168141593,
"grad_norm": 0.1034436747431755,
"learning_rate": 9.98830112519108e-05,
"loss": 0.0069,
"step": 7070
},
{
"epoch": 62.65486725663717,
"grad_norm": 0.09467529505491257,
"learning_rate": 9.98818780931865e-05,
"loss": 0.0067,
"step": 7080
},
{
"epoch": 62.743362831858406,
"grad_norm": 0.11496268212795258,
"learning_rate": 9.988073947945502e-05,
"loss": 0.0069,
"step": 7090
},
{
"epoch": 62.83185840707964,
"grad_norm": 0.13266722857952118,
"learning_rate": 9.987959541084087e-05,
"loss": 0.0066,
"step": 7100
},
{
"epoch": 62.92035398230089,
"grad_norm": 0.12566514313220978,
"learning_rate": 9.987844588746915e-05,
"loss": 0.0066,
"step": 7110
},
{
"epoch": 63.008849557522126,
"grad_norm": 0.1609344482421875,
"learning_rate": 9.987729090946558e-05,
"loss": 0.007,
"step": 7120
},
{
"epoch": 63.097345132743364,
"grad_norm": 0.15092191100120544,
"learning_rate": 9.987613047695647e-05,
"loss": 0.0067,
"step": 7130
},
{
"epoch": 63.1858407079646,
"grad_norm": 0.13600541651248932,
"learning_rate": 9.987496459006871e-05,
"loss": 0.0064,
"step": 7140
},
{
"epoch": 63.27433628318584,
"grad_norm": 0.12328796088695526,
"learning_rate": 9.987379324892982e-05,
"loss": 0.0065,
"step": 7150
},
{
"epoch": 63.36283185840708,
"grad_norm": 0.1525561362504959,
"learning_rate": 9.987261645366788e-05,
"loss": 0.0067,
"step": 7160
},
{
"epoch": 63.45132743362832,
"grad_norm": 0.12145034223794937,
"learning_rate": 9.987143420441158e-05,
"loss": 0.0067,
"step": 7170
},
{
"epoch": 63.53982300884956,
"grad_norm": 0.11069979518651962,
"learning_rate": 9.987024650129022e-05,
"loss": 0.0068,
"step": 7180
},
{
"epoch": 63.6283185840708,
"grad_norm": 0.11291412264108658,
"learning_rate": 9.986905334443368e-05,
"loss": 0.0069,
"step": 7190
},
{
"epoch": 63.716814159292035,
"grad_norm": 0.0968683585524559,
"learning_rate": 9.986785473397245e-05,
"loss": 0.0061,
"step": 7200
},
{
"epoch": 63.80530973451327,
"grad_norm": 0.10504832863807678,
"learning_rate": 9.98666506700376e-05,
"loss": 0.0061,
"step": 7210
},
{
"epoch": 63.89380530973451,
"grad_norm": 0.09243866056203842,
"learning_rate": 9.986544115276081e-05,
"loss": 0.0062,
"step": 7220
},
{
"epoch": 63.982300884955755,
"grad_norm": 0.08823885023593903,
"learning_rate": 9.986422618227433e-05,
"loss": 0.006,
"step": 7230
},
{
"epoch": 64.070796460177,
"grad_norm": 0.10525842010974884,
"learning_rate": 9.986300575871106e-05,
"loss": 0.0062,
"step": 7240
},
{
"epoch": 64.15929203539822,
"grad_norm": 0.11518089473247528,
"learning_rate": 9.986177988220444e-05,
"loss": 0.0058,
"step": 7250
},
{
"epoch": 64.24778761061947,
"grad_norm": 0.10956887900829315,
"learning_rate": 9.986054855288856e-05,
"loss": 0.0058,
"step": 7260
},
{
"epoch": 64.33628318584071,
"grad_norm": 0.1005554124712944,
"learning_rate": 9.985931177089802e-05,
"loss": 0.0064,
"step": 7270
},
{
"epoch": 64.42477876106194,
"grad_norm": 0.14025098085403442,
"learning_rate": 9.985806953636814e-05,
"loss": 0.0063,
"step": 7280
},
{
"epoch": 64.51327433628319,
"grad_norm": 0.11949311941862106,
"learning_rate": 9.985682184943471e-05,
"loss": 0.0071,
"step": 7290
},
{
"epoch": 64.60176991150442,
"grad_norm": 0.11547757685184479,
"learning_rate": 9.98555687102342e-05,
"loss": 0.0059,
"step": 7300
},
{
"epoch": 64.69026548672566,
"grad_norm": 0.10911315679550171,
"learning_rate": 9.985431011890367e-05,
"loss": 0.0064,
"step": 7310
},
{
"epoch": 64.77876106194691,
"grad_norm": 0.11663784086704254,
"learning_rate": 9.985304607558075e-05,
"loss": 0.0063,
"step": 7320
},
{
"epoch": 64.86725663716814,
"grad_norm": 0.12374185770750046,
"learning_rate": 9.985177658040364e-05,
"loss": 0.0067,
"step": 7330
},
{
"epoch": 64.95575221238938,
"grad_norm": 0.11252603679895401,
"learning_rate": 9.985050163351119e-05,
"loss": 0.006,
"step": 7340
},
{
"epoch": 65.04424778761062,
"grad_norm": 0.12345760315656662,
"learning_rate": 9.984922123504286e-05,
"loss": 0.0067,
"step": 7350
},
{
"epoch": 65.13274336283186,
"grad_norm": 0.10149062424898148,
"learning_rate": 9.984793538513862e-05,
"loss": 0.0072,
"step": 7360
},
{
"epoch": 65.22123893805309,
"grad_norm": 0.13657146692276,
"learning_rate": 9.984664408393912e-05,
"loss": 0.0063,
"step": 7370
},
{
"epoch": 65.30973451327434,
"grad_norm": 0.11091844737529755,
"learning_rate": 9.984534733158556e-05,
"loss": 0.0069,
"step": 7380
},
{
"epoch": 65.39823008849558,
"grad_norm": 0.09856518357992172,
"learning_rate": 9.984404512821977e-05,
"loss": 0.0065,
"step": 7390
},
{
"epoch": 65.48672566371681,
"grad_norm": 0.1332857757806778,
"learning_rate": 9.984273747398411e-05,
"loss": 0.0061,
"step": 7400
},
{
"epoch": 65.57522123893806,
"grad_norm": 0.11882749944925308,
"learning_rate": 9.984142436902165e-05,
"loss": 0.0072,
"step": 7410
},
{
"epoch": 65.66371681415929,
"grad_norm": 0.11369358748197556,
"learning_rate": 9.984010581347596e-05,
"loss": 0.0061,
"step": 7420
},
{
"epoch": 65.75221238938053,
"grad_norm": 0.13496249914169312,
"learning_rate": 9.983878180749121e-05,
"loss": 0.0063,
"step": 7430
},
{
"epoch": 65.84070796460178,
"grad_norm": 0.12470993399620056,
"learning_rate": 9.983745235121222e-05,
"loss": 0.0062,
"step": 7440
},
{
"epoch": 65.929203539823,
"grad_norm": 0.09955979138612747,
"learning_rate": 9.983611744478438e-05,
"loss": 0.0069,
"step": 7450
},
{
"epoch": 66.01769911504425,
"grad_norm": 0.13134808838367462,
"learning_rate": 9.983477708835365e-05,
"loss": 0.0066,
"step": 7460
},
{
"epoch": 66.10619469026548,
"grad_norm": 0.10295764356851578,
"learning_rate": 9.983343128206664e-05,
"loss": 0.0059,
"step": 7470
},
{
"epoch": 66.19469026548673,
"grad_norm": 0.12271054089069366,
"learning_rate": 9.983208002607049e-05,
"loss": 0.0062,
"step": 7480
},
{
"epoch": 66.28318584070796,
"grad_norm": 0.15199494361877441,
"learning_rate": 9.9830723320513e-05,
"loss": 0.0064,
"step": 7490
},
{
"epoch": 66.3716814159292,
"grad_norm": 0.10865994542837143,
"learning_rate": 9.982936116554254e-05,
"loss": 0.0065,
"step": 7500
},
{
"epoch": 66.46017699115045,
"grad_norm": 0.09021376818418503,
"learning_rate": 9.982799356130803e-05,
"loss": 0.0067,
"step": 7510
},
{
"epoch": 66.54867256637168,
"grad_norm": 0.16522467136383057,
"learning_rate": 9.982662050795908e-05,
"loss": 0.0059,
"step": 7520
},
{
"epoch": 66.63716814159292,
"grad_norm": 0.10417914390563965,
"learning_rate": 9.982524200564583e-05,
"loss": 0.0063,
"step": 7530
},
{
"epoch": 66.72566371681415,
"grad_norm": 0.1503831446170807,
"learning_rate": 9.982385805451901e-05,
"loss": 0.0065,
"step": 7540
},
{
"epoch": 66.8141592920354,
"grad_norm": 0.11018391698598862,
"learning_rate": 9.982246865472998e-05,
"loss": 0.0059,
"step": 7550
},
{
"epoch": 66.90265486725664,
"grad_norm": 0.11249592155218124,
"learning_rate": 9.982107380643069e-05,
"loss": 0.0065,
"step": 7560
},
{
"epoch": 66.99115044247787,
"grad_norm": 0.10396026074886322,
"learning_rate": 9.981967350977368e-05,
"loss": 0.0066,
"step": 7570
},
{
"epoch": 67.07964601769912,
"grad_norm": 0.13432832062244415,
"learning_rate": 9.981826776491208e-05,
"loss": 0.0062,
"step": 7580
},
{
"epoch": 67.16814159292035,
"grad_norm": 0.11561453342437744,
"learning_rate": 9.98168565719996e-05,
"loss": 0.0066,
"step": 7590
},
{
"epoch": 67.2566371681416,
"grad_norm": 0.10294978320598602,
"learning_rate": 9.98154399311906e-05,
"loss": 0.0061,
"step": 7600
},
{
"epoch": 67.34513274336283,
"grad_norm": 0.09747711569070816,
"learning_rate": 9.981401784263997e-05,
"loss": 0.0067,
"step": 7610
},
{
"epoch": 67.43362831858407,
"grad_norm": 0.10972411185503006,
"learning_rate": 9.981259030650326e-05,
"loss": 0.0063,
"step": 7620
},
{
"epoch": 67.52212389380531,
"grad_norm": 0.11169084161520004,
"learning_rate": 9.981115732293655e-05,
"loss": 0.006,
"step": 7630
},
{
"epoch": 67.61061946902655,
"grad_norm": 0.10598007589578629,
"learning_rate": 9.980971889209659e-05,
"loss": 0.0063,
"step": 7640
},
{
"epoch": 67.69911504424779,
"grad_norm": 0.11927840113639832,
"learning_rate": 9.980827501414064e-05,
"loss": 0.0059,
"step": 7650
},
{
"epoch": 67.78761061946902,
"grad_norm": 0.10992605984210968,
"learning_rate": 9.980682568922663e-05,
"loss": 0.0066,
"step": 7660
},
{
"epoch": 67.87610619469027,
"grad_norm": 0.13199156522750854,
"learning_rate": 9.980537091751304e-05,
"loss": 0.0066,
"step": 7670
},
{
"epoch": 67.96460176991151,
"grad_norm": 0.1461779773235321,
"learning_rate": 9.980391069915897e-05,
"loss": 0.0064,
"step": 7680
},
{
"epoch": 68.05309734513274,
"grad_norm": 0.14791648089885712,
"learning_rate": 9.98024450343241e-05,
"loss": 0.0064,
"step": 7690
},
{
"epoch": 68.14159292035399,
"grad_norm": 0.14366358518600464,
"learning_rate": 9.980097392316872e-05,
"loss": 0.0068,
"step": 7700
},
{
"epoch": 68.23008849557522,
"grad_norm": 0.1535428911447525,
"learning_rate": 9.97994973658537e-05,
"loss": 0.0059,
"step": 7710
},
{
"epoch": 68.31858407079646,
"grad_norm": 0.1114254891872406,
"learning_rate": 9.979801536254054e-05,
"loss": 0.0062,
"step": 7720
},
{
"epoch": 68.40707964601769,
"grad_norm": 0.10827835649251938,
"learning_rate": 9.979652791339127e-05,
"loss": 0.0064,
"step": 7730
},
{
"epoch": 68.49557522123894,
"grad_norm": 0.1307893693447113,
"learning_rate": 9.97950350185686e-05,
"loss": 0.007,
"step": 7740
},
{
"epoch": 68.58407079646018,
"grad_norm": 0.09643508493900299,
"learning_rate": 9.979353667823574e-05,
"loss": 0.0059,
"step": 7750
},
{
"epoch": 68.67256637168141,
"grad_norm": 0.1439676731824875,
"learning_rate": 9.979203289255658e-05,
"loss": 0.0056,
"step": 7760
},
{
"epoch": 68.76106194690266,
"grad_norm": 0.10104666650295258,
"learning_rate": 9.979052366169557e-05,
"loss": 0.006,
"step": 7770
},
{
"epoch": 68.84955752212389,
"grad_norm": 0.1201610267162323,
"learning_rate": 9.978900898581775e-05,
"loss": 0.0055,
"step": 7780
},
{
"epoch": 68.93805309734513,
"grad_norm": 0.11689028143882751,
"learning_rate": 9.978748886508875e-05,
"loss": 0.0062,
"step": 7790
},
{
"epoch": 69.02654867256638,
"grad_norm": 0.12445169687271118,
"learning_rate": 9.978596329967484e-05,
"loss": 0.0063,
"step": 7800
},
{
"epoch": 69.11504424778761,
"grad_norm": 0.10243967920541763,
"learning_rate": 9.978443228974284e-05,
"loss": 0.0066,
"step": 7810
},
{
"epoch": 69.20353982300885,
"grad_norm": 0.1291644275188446,
"learning_rate": 9.978289583546015e-05,
"loss": 0.007,
"step": 7820
},
{
"epoch": 69.29203539823008,
"grad_norm": 0.09051987528800964,
"learning_rate": 9.978135393699484e-05,
"loss": 0.0064,
"step": 7830
},
{
"epoch": 69.38053097345133,
"grad_norm": 0.10411614924669266,
"learning_rate": 9.977980659451548e-05,
"loss": 0.0066,
"step": 7840
},
{
"epoch": 69.46902654867256,
"grad_norm": 0.16520874202251434,
"learning_rate": 9.977825380819135e-05,
"loss": 0.007,
"step": 7850
},
{
"epoch": 69.5575221238938,
"grad_norm": 0.10322892665863037,
"learning_rate": 9.97766955781922e-05,
"loss": 0.007,
"step": 7860
},
{
"epoch": 69.64601769911505,
"grad_norm": 0.10508093237876892,
"learning_rate": 9.977513190468848e-05,
"loss": 0.0059,
"step": 7870
},
{
"epoch": 69.73451327433628,
"grad_norm": 0.1304185390472412,
"learning_rate": 9.977356278785116e-05,
"loss": 0.0061,
"step": 7880
},
{
"epoch": 69.82300884955752,
"grad_norm": 0.1335097700357437,
"learning_rate": 9.977198822785184e-05,
"loss": 0.0073,
"step": 7890
},
{
"epoch": 69.91150442477876,
"grad_norm": 0.10650800913572311,
"learning_rate": 9.977040822486273e-05,
"loss": 0.006,
"step": 7900
},
{
"epoch": 70.0,
"grad_norm": 0.12559041380882263,
"learning_rate": 9.97688227790566e-05,
"loss": 0.0066,
"step": 7910
},
{
"epoch": 70.08849557522124,
"grad_norm": 0.11108136922121048,
"learning_rate": 9.976723189060684e-05,
"loss": 0.0064,
"step": 7920
},
{
"epoch": 70.17699115044248,
"grad_norm": 0.11059076339006424,
"learning_rate": 9.976563555968742e-05,
"loss": 0.0063,
"step": 7930
},
{
"epoch": 70.26548672566372,
"grad_norm": 0.10653873533010483,
"learning_rate": 9.976403378647292e-05,
"loss": 0.0063,
"step": 7940
},
{
"epoch": 70.35398230088495,
"grad_norm": 0.09831434488296509,
"learning_rate": 9.97624265711385e-05,
"loss": 0.0058,
"step": 7950
},
{
"epoch": 70.4424778761062,
"grad_norm": 0.11119065433740616,
"learning_rate": 9.976081391385993e-05,
"loss": 0.0064,
"step": 7960
},
{
"epoch": 70.53097345132744,
"grad_norm": 0.09702420979738235,
"learning_rate": 9.975919581481356e-05,
"loss": 0.0065,
"step": 7970
},
{
"epoch": 70.61946902654867,
"grad_norm": 0.127532497048378,
"learning_rate": 9.975757227417634e-05,
"loss": 0.0067,
"step": 7980
},
{
"epoch": 70.70796460176992,
"grad_norm": 0.12315838038921356,
"learning_rate": 9.975594329212586e-05,
"loss": 0.0064,
"step": 7990
},
{
"epoch": 70.79646017699115,
"grad_norm": 0.09994253516197205,
"learning_rate": 9.97543088688402e-05,
"loss": 0.0062,
"step": 8000
},
{
"epoch": 70.88495575221239,
"grad_norm": 0.10381423681974411,
"learning_rate": 9.975266900449814e-05,
"loss": 0.0061,
"step": 8010
},
{
"epoch": 70.97345132743362,
"grad_norm": 0.13495273888111115,
"learning_rate": 9.975102369927898e-05,
"loss": 0.0066,
"step": 8020
},
{
"epoch": 71.06194690265487,
"grad_norm": 0.12326859682798386,
"learning_rate": 9.974937295336269e-05,
"loss": 0.0065,
"step": 8030
},
{
"epoch": 71.15044247787611,
"grad_norm": 0.09616561233997345,
"learning_rate": 9.974771676692975e-05,
"loss": 0.0061,
"step": 8040
},
{
"epoch": 71.23893805309734,
"grad_norm": 0.12078515440225601,
"learning_rate": 9.974605514016131e-05,
"loss": 0.0061,
"step": 8050
},
{
"epoch": 71.32743362831859,
"grad_norm": 0.11471603810787201,
"learning_rate": 9.974438807323907e-05,
"loss": 0.0064,
"step": 8060
},
{
"epoch": 71.41592920353982,
"grad_norm": 0.10006176680326462,
"learning_rate": 9.974271556634535e-05,
"loss": 0.0061,
"step": 8070
},
{
"epoch": 71.50442477876106,
"grad_norm": 0.08977751433849335,
"learning_rate": 9.974103761966302e-05,
"loss": 0.0056,
"step": 8080
},
{
"epoch": 71.59292035398231,
"grad_norm": 0.10365013033151627,
"learning_rate": 9.973935423337563e-05,
"loss": 0.0058,
"step": 8090
},
{
"epoch": 71.68141592920354,
"grad_norm": 0.11227709800004959,
"learning_rate": 9.973766540766722e-05,
"loss": 0.006,
"step": 8100
},
{
"epoch": 71.76991150442478,
"grad_norm": 0.1021062508225441,
"learning_rate": 9.97359711427225e-05,
"loss": 0.0057,
"step": 8110
},
{
"epoch": 71.85840707964601,
"grad_norm": 0.09944093972444534,
"learning_rate": 9.973427143872677e-05,
"loss": 0.0064,
"step": 8120
},
{
"epoch": 71.94690265486726,
"grad_norm": 0.10755596309900284,
"learning_rate": 9.973256629586589e-05,
"loss": 0.0052,
"step": 8130
},
{
"epoch": 72.03539823008849,
"grad_norm": 0.12734898924827576,
"learning_rate": 9.973085571432632e-05,
"loss": 0.0065,
"step": 8140
},
{
"epoch": 72.12389380530973,
"grad_norm": 0.12120959162712097,
"learning_rate": 9.972913969429513e-05,
"loss": 0.0057,
"step": 8150
},
{
"epoch": 72.21238938053098,
"grad_norm": 0.10789244621992111,
"learning_rate": 9.972741823596e-05,
"loss": 0.006,
"step": 8160
},
{
"epoch": 72.30088495575221,
"grad_norm": 0.09750421345233917,
"learning_rate": 9.972569133950917e-05,
"loss": 0.0063,
"step": 8170
},
{
"epoch": 72.38938053097345,
"grad_norm": 0.07650677859783173,
"learning_rate": 9.972395900513151e-05,
"loss": 0.0063,
"step": 8180
},
{
"epoch": 72.47787610619469,
"grad_norm": 0.08634928613901138,
"learning_rate": 9.972222123301645e-05,
"loss": 0.0062,
"step": 8190
},
{
"epoch": 72.56637168141593,
"grad_norm": 0.10441482812166214,
"learning_rate": 9.972047802335403e-05,
"loss": 0.0058,
"step": 8200
},
{
"epoch": 72.65486725663717,
"grad_norm": 0.10933029651641846,
"learning_rate": 9.971872937633488e-05,
"loss": 0.0055,
"step": 8210
},
{
"epoch": 72.7433628318584,
"grad_norm": 0.07779169827699661,
"learning_rate": 9.971697529215024e-05,
"loss": 0.006,
"step": 8220
},
{
"epoch": 72.83185840707965,
"grad_norm": 0.09137412160634995,
"learning_rate": 9.971521577099192e-05,
"loss": 0.0064,
"step": 8230
},
{
"epoch": 72.92035398230088,
"grad_norm": 0.10819313675165176,
"learning_rate": 9.971345081305236e-05,
"loss": 0.0062,
"step": 8240
},
{
"epoch": 73.00884955752213,
"grad_norm": 0.1135842502117157,
"learning_rate": 9.971168041852456e-05,
"loss": 0.0062,
"step": 8250
},
{
"epoch": 73.09734513274336,
"grad_norm": 0.11758588254451752,
"learning_rate": 9.970990458760215e-05,
"loss": 0.0061,
"step": 8260
},
{
"epoch": 73.1858407079646,
"grad_norm": 0.11972816288471222,
"learning_rate": 9.970812332047929e-05,
"loss": 0.0059,
"step": 8270
},
{
"epoch": 73.27433628318585,
"grad_norm": 0.1334552764892578,
"learning_rate": 9.97063366173508e-05,
"loss": 0.0063,
"step": 8280
},
{
"epoch": 73.36283185840708,
"grad_norm": 0.13754241168498993,
"learning_rate": 9.970454447841207e-05,
"loss": 0.006,
"step": 8290
},
{
"epoch": 73.45132743362832,
"grad_norm": 0.14387445151805878,
"learning_rate": 9.970274690385909e-05,
"loss": 0.0064,
"step": 8300
},
{
"epoch": 73.53982300884955,
"grad_norm": 0.1262485235929489,
"learning_rate": 9.970094389388844e-05,
"loss": 0.0061,
"step": 8310
},
{
"epoch": 73.6283185840708,
"grad_norm": 0.11576636880636215,
"learning_rate": 9.969913544869728e-05,
"loss": 0.0058,
"step": 8320
},
{
"epoch": 73.71681415929204,
"grad_norm": 0.1376902163028717,
"learning_rate": 9.96973215684834e-05,
"loss": 0.0062,
"step": 8330
},
{
"epoch": 73.80530973451327,
"grad_norm": 0.15589570999145508,
"learning_rate": 9.969550225344513e-05,
"loss": 0.0062,
"step": 8340
},
{
"epoch": 73.89380530973452,
"grad_norm": 0.10897907614707947,
"learning_rate": 9.969367750378147e-05,
"loss": 0.0059,
"step": 8350
},
{
"epoch": 73.98230088495575,
"grad_norm": 0.09046350419521332,
"learning_rate": 9.969184731969194e-05,
"loss": 0.0058,
"step": 8360
},
{
"epoch": 74.070796460177,
"grad_norm": 0.09479265660047531,
"learning_rate": 9.96900117013767e-05,
"loss": 0.0059,
"step": 8370
},
{
"epoch": 74.15929203539822,
"grad_norm": 0.1085423082113266,
"learning_rate": 9.96881706490365e-05,
"loss": 0.006,
"step": 8380
},
{
"epoch": 74.24778761061947,
"grad_norm": 0.12503911554813385,
"learning_rate": 9.968632416287265e-05,
"loss": 0.0058,
"step": 8390
},
{
"epoch": 74.33628318584071,
"grad_norm": 0.11865498870611191,
"learning_rate": 9.96844722430871e-05,
"loss": 0.0057,
"step": 8400
},
{
"epoch": 74.42477876106194,
"grad_norm": 0.1037907749414444,
"learning_rate": 9.968261488988235e-05,
"loss": 0.0064,
"step": 8410
},
{
"epoch": 74.51327433628319,
"grad_norm": 0.10178150236606598,
"learning_rate": 9.968075210346155e-05,
"loss": 0.0057,
"step": 8420
},
{
"epoch": 74.60176991150442,
"grad_norm": 0.1001485213637352,
"learning_rate": 9.967888388402839e-05,
"loss": 0.0061,
"step": 8430
},
{
"epoch": 74.69026548672566,
"grad_norm": 0.11811663210391998,
"learning_rate": 9.967701023178717e-05,
"loss": 0.006,
"step": 8440
},
{
"epoch": 74.77876106194691,
"grad_norm": 0.12607654929161072,
"learning_rate": 9.967513114694282e-05,
"loss": 0.0059,
"step": 8450
},
{
"epoch": 74.86725663716814,
"grad_norm": 0.1423610895872116,
"learning_rate": 9.967324662970079e-05,
"loss": 0.0059,
"step": 8460
},
{
"epoch": 74.95575221238938,
"grad_norm": 0.12265769392251968,
"learning_rate": 9.96713566802672e-05,
"loss": 0.0059,
"step": 8470
},
{
"epoch": 75.04424778761062,
"grad_norm": 0.08160112053155899,
"learning_rate": 9.966946129884873e-05,
"loss": 0.0061,
"step": 8480
},
{
"epoch": 75.13274336283186,
"grad_norm": 0.09482406079769135,
"learning_rate": 9.966756048565265e-05,
"loss": 0.0059,
"step": 8490
},
{
"epoch": 75.22123893805309,
"grad_norm": 0.10568396747112274,
"learning_rate": 9.966565424088681e-05,
"loss": 0.0061,
"step": 8500
},
{
"epoch": 75.30973451327434,
"grad_norm": 0.10592056810855865,
"learning_rate": 9.96637425647597e-05,
"loss": 0.0064,
"step": 8510
},
{
"epoch": 75.39823008849558,
"grad_norm": 0.09835847467184067,
"learning_rate": 9.966182545748038e-05,
"loss": 0.0057,
"step": 8520
},
{
"epoch": 75.48672566371681,
"grad_norm": 0.11802490055561066,
"learning_rate": 9.96599029192585e-05,
"loss": 0.0065,
"step": 8530
},
{
"epoch": 75.57522123893806,
"grad_norm": 0.12815123796463013,
"learning_rate": 9.965797495030428e-05,
"loss": 0.0064,
"step": 8540
},
{
"epoch": 75.66371681415929,
"grad_norm": 0.09517830610275269,
"learning_rate": 9.96560415508286e-05,
"loss": 0.0062,
"step": 8550
},
{
"epoch": 75.75221238938053,
"grad_norm": 0.08576002717018127,
"learning_rate": 9.965410272104286e-05,
"loss": 0.0059,
"step": 8560
},
{
"epoch": 75.84070796460178,
"grad_norm": 0.09037141501903534,
"learning_rate": 9.96521584611591e-05,
"loss": 0.0062,
"step": 8570
},
{
"epoch": 75.929203539823,
"grad_norm": 0.11598179489374161,
"learning_rate": 9.965020877138994e-05,
"loss": 0.0058,
"step": 8580
},
{
"epoch": 76.01769911504425,
"grad_norm": 0.1054382398724556,
"learning_rate": 9.964825365194861e-05,
"loss": 0.0064,
"step": 8590
},
{
"epoch": 76.10619469026548,
"grad_norm": 0.13920103013515472,
"learning_rate": 9.96462931030489e-05,
"loss": 0.006,
"step": 8600
},
{
"epoch": 76.19469026548673,
"grad_norm": 0.08667236566543579,
"learning_rate": 9.96443271249052e-05,
"loss": 0.0061,
"step": 8610
},
{
"epoch": 76.28318584070796,
"grad_norm": 0.09789692610502243,
"learning_rate": 9.964235571773255e-05,
"loss": 0.0064,
"step": 8620
},
{
"epoch": 76.3716814159292,
"grad_norm": 0.09337849915027618,
"learning_rate": 9.96403788817465e-05,
"loss": 0.0057,
"step": 8630
},
{
"epoch": 76.46017699115045,
"grad_norm": 0.08435383439064026,
"learning_rate": 9.963839661716325e-05,
"loss": 0.0062,
"step": 8640
},
{
"epoch": 76.54867256637168,
"grad_norm": 0.1057974249124527,
"learning_rate": 9.963640892419958e-05,
"loss": 0.0061,
"step": 8650
},
{
"epoch": 76.63716814159292,
"grad_norm": 0.11115610599517822,
"learning_rate": 9.963441580307286e-05,
"loss": 0.0064,
"step": 8660
},
{
"epoch": 76.72566371681415,
"grad_norm": 0.10767662525177002,
"learning_rate": 9.963241725400104e-05,
"loss": 0.0063,
"step": 8670
},
{
"epoch": 76.8141592920354,
"grad_norm": 0.12147220969200134,
"learning_rate": 9.963041327720271e-05,
"loss": 0.0059,
"step": 8680
},
{
"epoch": 76.90265486725664,
"grad_norm": 0.11704272031784058,
"learning_rate": 9.962840387289697e-05,
"loss": 0.0063,
"step": 8690
},
{
"epoch": 76.99115044247787,
"grad_norm": 0.10685360431671143,
"learning_rate": 9.962638904130363e-05,
"loss": 0.0063,
"step": 8700
},
{
"epoch": 77.07964601769912,
"grad_norm": 0.12996336817741394,
"learning_rate": 9.962436878264298e-05,
"loss": 0.0062,
"step": 8710
},
{
"epoch": 77.16814159292035,
"grad_norm": 0.10194937884807587,
"learning_rate": 9.962234309713598e-05,
"loss": 0.0061,
"step": 8720
},
{
"epoch": 77.2566371681416,
"grad_norm": 0.09662755578756332,
"learning_rate": 9.962031198500414e-05,
"loss": 0.0062,
"step": 8730
},
{
"epoch": 77.34513274336283,
"grad_norm": 0.09166355431079865,
"learning_rate": 9.961827544646958e-05,
"loss": 0.0061,
"step": 8740
},
{
"epoch": 77.43362831858407,
"grad_norm": 0.10736475884914398,
"learning_rate": 9.961623348175501e-05,
"loss": 0.0063,
"step": 8750
},
{
"epoch": 77.52212389380531,
"grad_norm": 0.0980466902256012,
"learning_rate": 9.961418609108377e-05,
"loss": 0.0059,
"step": 8760
},
{
"epoch": 77.61061946902655,
"grad_norm": 0.09984292089939117,
"learning_rate": 9.961213327467971e-05,
"loss": 0.0055,
"step": 8770
},
{
"epoch": 77.69911504424779,
"grad_norm": 0.13165168464183807,
"learning_rate": 9.961007503276736e-05,
"loss": 0.0059,
"step": 8780
},
{
"epoch": 77.78761061946902,
"grad_norm": 0.13526293635368347,
"learning_rate": 9.960801136557179e-05,
"loss": 0.0063,
"step": 8790
},
{
"epoch": 77.87610619469027,
"grad_norm": 0.12890978157520294,
"learning_rate": 9.960594227331866e-05,
"loss": 0.0058,
"step": 8800
},
{
"epoch": 77.96460176991151,
"grad_norm": 0.10762617737054825,
"learning_rate": 9.960386775623429e-05,
"loss": 0.006,
"step": 8810
},
{
"epoch": 78.05309734513274,
"grad_norm": 0.12391962856054306,
"learning_rate": 9.96017878145455e-05,
"loss": 0.0067,
"step": 8820
},
{
"epoch": 78.14159292035399,
"grad_norm": 0.11198613792657852,
"learning_rate": 9.959970244847977e-05,
"loss": 0.0057,
"step": 8830
},
{
"epoch": 78.23008849557522,
"grad_norm": 0.12982113659381866,
"learning_rate": 9.959761165826518e-05,
"loss": 0.0061,
"step": 8840
},
{
"epoch": 78.31858407079646,
"grad_norm": 0.12969951331615448,
"learning_rate": 9.959551544413033e-05,
"loss": 0.0062,
"step": 8850
},
{
"epoch": 78.40707964601769,
"grad_norm": 0.1363687664270401,
"learning_rate": 9.959341380630448e-05,
"loss": 0.0058,
"step": 8860
},
{
"epoch": 78.49557522123894,
"grad_norm": 0.1254795491695404,
"learning_rate": 9.959130674501746e-05,
"loss": 0.0058,
"step": 8870
},
{
"epoch": 78.58407079646018,
"grad_norm": 0.07944416999816895,
"learning_rate": 9.958919426049968e-05,
"loss": 0.0058,
"step": 8880
},
{
"epoch": 78.67256637168141,
"grad_norm": 0.10879120975732803,
"learning_rate": 9.958707635298219e-05,
"loss": 0.0056,
"step": 8890
},
{
"epoch": 78.76106194690266,
"grad_norm": 0.12182161211967468,
"learning_rate": 9.958495302269657e-05,
"loss": 0.0061,
"step": 8900
},
{
"epoch": 78.84955752212389,
"grad_norm": 0.1068938598036766,
"learning_rate": 9.958282426987503e-05,
"loss": 0.0062,
"step": 8910
},
{
"epoch": 78.93805309734513,
"grad_norm": 0.11309578269720078,
"learning_rate": 9.95806900947504e-05,
"loss": 0.0057,
"step": 8920
},
{
"epoch": 79.02654867256638,
"grad_norm": 0.09065814316272736,
"learning_rate": 9.957855049755604e-05,
"loss": 0.0055,
"step": 8930
},
{
"epoch": 79.11504424778761,
"grad_norm": 0.07722936570644379,
"learning_rate": 9.957640547852593e-05,
"loss": 0.006,
"step": 8940
},
{
"epoch": 79.20353982300885,
"grad_norm": 0.10858945548534393,
"learning_rate": 9.957425503789466e-05,
"loss": 0.006,
"step": 8950
},
{
"epoch": 79.29203539823008,
"grad_norm": 0.11869582533836365,
"learning_rate": 9.957209917589738e-05,
"loss": 0.0053,
"step": 8960
},
{
"epoch": 79.38053097345133,
"grad_norm": 0.1038854718208313,
"learning_rate": 9.956993789276987e-05,
"loss": 0.0053,
"step": 8970
},
{
"epoch": 79.46902654867256,
"grad_norm": 0.10900082439184189,
"learning_rate": 9.956777118874847e-05,
"loss": 0.0056,
"step": 8980
},
{
"epoch": 79.5575221238938,
"grad_norm": 0.0852367952466011,
"learning_rate": 9.956559906407016e-05,
"loss": 0.0058,
"step": 8990
},
{
"epoch": 79.64601769911505,
"grad_norm": 0.10679316520690918,
"learning_rate": 9.956342151897245e-05,
"loss": 0.0054,
"step": 9000
},
{
"epoch": 79.73451327433628,
"grad_norm": 0.08642175048589706,
"learning_rate": 9.956123855369346e-05,
"loss": 0.0062,
"step": 9010
},
{
"epoch": 79.82300884955752,
"grad_norm": 0.1008024737238884,
"learning_rate": 9.955905016847196e-05,
"loss": 0.006,
"step": 9020
},
{
"epoch": 79.91150442477876,
"grad_norm": 0.08043470978736877,
"learning_rate": 9.955685636354723e-05,
"loss": 0.0056,
"step": 9030
},
{
"epoch": 80.0,
"grad_norm": 0.08366107195615768,
"learning_rate": 9.95546571391592e-05,
"loss": 0.0057,
"step": 9040
},
{
"epoch": 80.08849557522124,
"grad_norm": 0.1145307868719101,
"learning_rate": 9.955245249554837e-05,
"loss": 0.0061,
"step": 9050
},
{
"epoch": 80.17699115044248,
"grad_norm": 0.09086012840270996,
"learning_rate": 9.955024243295582e-05,
"loss": 0.0057,
"step": 9060
},
{
"epoch": 80.26548672566372,
"grad_norm": 0.09050711244344711,
"learning_rate": 9.954802695162328e-05,
"loss": 0.0061,
"step": 9070
},
{
"epoch": 80.35398230088495,
"grad_norm": 0.1267324984073639,
"learning_rate": 9.954580605179302e-05,
"loss": 0.0057,
"step": 9080
},
{
"epoch": 80.4424778761062,
"grad_norm": 0.10124458372592926,
"learning_rate": 9.954357973370788e-05,
"loss": 0.006,
"step": 9090
},
{
"epoch": 80.53097345132744,
"grad_norm": 0.10128167271614075,
"learning_rate": 9.954134799761135e-05,
"loss": 0.0061,
"step": 9100
},
{
"epoch": 80.61946902654867,
"grad_norm": 0.10916615277528763,
"learning_rate": 9.953911084374748e-05,
"loss": 0.0064,
"step": 9110
},
{
"epoch": 80.70796460176992,
"grad_norm": 0.11554095894098282,
"learning_rate": 9.953686827236093e-05,
"loss": 0.0055,
"step": 9120
},
{
"epoch": 80.79646017699115,
"grad_norm": 0.1443350613117218,
"learning_rate": 9.953462028369695e-05,
"loss": 0.0062,
"step": 9130
},
{
"epoch": 80.88495575221239,
"grad_norm": 0.09688980877399445,
"learning_rate": 9.953236687800136e-05,
"loss": 0.0062,
"step": 9140
},
{
"epoch": 80.97345132743362,
"grad_norm": 0.08535933494567871,
"learning_rate": 9.95301080555206e-05,
"loss": 0.0066,
"step": 9150
},
{
"epoch": 81.06194690265487,
"grad_norm": 0.08409234136343002,
"learning_rate": 9.952784381650171e-05,
"loss": 0.006,
"step": 9160
},
{
"epoch": 81.15044247787611,
"grad_norm": 0.12123929709196091,
"learning_rate": 9.952557416119226e-05,
"loss": 0.0061,
"step": 9170
},
{
"epoch": 81.23893805309734,
"grad_norm": 0.08133456110954285,
"learning_rate": 9.95232990898405e-05,
"loss": 0.0053,
"step": 9180
},
{
"epoch": 81.32743362831859,
"grad_norm": 0.1189822256565094,
"learning_rate": 9.95210186026952e-05,
"loss": 0.006,
"step": 9190
},
{
"epoch": 81.41592920353982,
"grad_norm": 0.1182321310043335,
"learning_rate": 9.951873270000576e-05,
"loss": 0.0059,
"step": 9200
},
{
"epoch": 81.50442477876106,
"grad_norm": 0.16531887650489807,
"learning_rate": 9.951644138202216e-05,
"loss": 0.0059,
"step": 9210
},
{
"epoch": 81.59292035398231,
"grad_norm": 0.09983012825250626,
"learning_rate": 9.951414464899498e-05,
"loss": 0.0065,
"step": 9220
},
{
"epoch": 81.68141592920354,
"grad_norm": 0.11131998151540756,
"learning_rate": 9.951184250117538e-05,
"loss": 0.0056,
"step": 9230
},
{
"epoch": 81.76991150442478,
"grad_norm": 0.11074583977460861,
"learning_rate": 9.950953493881513e-05,
"loss": 0.0059,
"step": 9240
},
{
"epoch": 81.85840707964601,
"grad_norm": 0.11894248425960541,
"learning_rate": 9.950722196216658e-05,
"loss": 0.0052,
"step": 9250
},
{
"epoch": 81.94690265486726,
"grad_norm": 0.10545102506875992,
"learning_rate": 9.950490357148265e-05,
"loss": 0.0058,
"step": 9260
},
{
"epoch": 82.03539823008849,
"grad_norm": 0.1360078603029251,
"learning_rate": 9.950257976701692e-05,
"loss": 0.0061,
"step": 9270
},
{
"epoch": 82.12389380530973,
"grad_norm": 0.06910306215286255,
"learning_rate": 9.950025054902348e-05,
"loss": 0.0055,
"step": 9280
},
{
"epoch": 82.21238938053098,
"grad_norm": 0.09763657301664352,
"learning_rate": 9.949791591775706e-05,
"loss": 0.0056,
"step": 9290
},
{
"epoch": 82.30088495575221,
"grad_norm": 0.07559313625097275,
"learning_rate": 9.949557587347298e-05,
"loss": 0.0051,
"step": 9300
},
{
"epoch": 82.38938053097345,
"grad_norm": 0.07049795985221863,
"learning_rate": 9.949323041642713e-05,
"loss": 0.0057,
"step": 9310
},
{
"epoch": 82.47787610619469,
"grad_norm": 0.10614913702011108,
"learning_rate": 9.949087954687602e-05,
"loss": 0.0055,
"step": 9320
},
{
"epoch": 82.56637168141593,
"grad_norm": 0.10284052789211273,
"learning_rate": 9.948852326507672e-05,
"loss": 0.0059,
"step": 9330
},
{
"epoch": 82.65486725663717,
"grad_norm": 0.12747420370578766,
"learning_rate": 9.948616157128694e-05,
"loss": 0.0061,
"step": 9340
},
{
"epoch": 82.7433628318584,
"grad_norm": 0.10097663104534149,
"learning_rate": 9.948379446576493e-05,
"loss": 0.0065,
"step": 9350
},
{
"epoch": 82.83185840707965,
"grad_norm": 0.1118328720331192,
"learning_rate": 9.948142194876952e-05,
"loss": 0.0058,
"step": 9360
},
{
"epoch": 82.92035398230088,
"grad_norm": 0.10961694270372391,
"learning_rate": 9.947904402056024e-05,
"loss": 0.0054,
"step": 9370
},
{
"epoch": 83.00884955752213,
"grad_norm": 0.10540442168712616,
"learning_rate": 9.947666068139708e-05,
"loss": 0.0059,
"step": 9380
},
{
"epoch": 83.09734513274336,
"grad_norm": 0.08544182032346725,
"learning_rate": 9.947427193154071e-05,
"loss": 0.0056,
"step": 9390
},
{
"epoch": 83.1858407079646,
"grad_norm": 0.09622497111558914,
"learning_rate": 9.947187777125233e-05,
"loss": 0.006,
"step": 9400
},
{
"epoch": 83.27433628318585,
"grad_norm": 0.07578998804092407,
"learning_rate": 9.946947820079377e-05,
"loss": 0.0053,
"step": 9410
},
{
"epoch": 83.36283185840708,
"grad_norm": 0.09491860866546631,
"learning_rate": 9.946707322042747e-05,
"loss": 0.0051,
"step": 9420
},
{
"epoch": 83.45132743362832,
"grad_norm": 0.08115492761135101,
"learning_rate": 9.94646628304164e-05,
"loss": 0.0052,
"step": 9430
},
{
"epoch": 83.53982300884955,
"grad_norm": 0.1010664626955986,
"learning_rate": 9.946224703102418e-05,
"loss": 0.0057,
"step": 9440
},
{
"epoch": 83.6283185840708,
"grad_norm": 0.10234019160270691,
"learning_rate": 9.945982582251498e-05,
"loss": 0.0059,
"step": 9450
},
{
"epoch": 83.71681415929204,
"grad_norm": 0.1145630031824112,
"learning_rate": 9.94573992051536e-05,
"loss": 0.0057,
"step": 9460
},
{
"epoch": 83.80530973451327,
"grad_norm": 0.10085400193929672,
"learning_rate": 9.94549671792054e-05,
"loss": 0.0064,
"step": 9470
},
{
"epoch": 83.89380530973452,
"grad_norm": 0.11343911290168762,
"learning_rate": 9.945252974493635e-05,
"loss": 0.0056,
"step": 9480
},
{
"epoch": 83.98230088495575,
"grad_norm": 0.12302631139755249,
"learning_rate": 9.9450086902613e-05,
"loss": 0.006,
"step": 9490
},
{
"epoch": 84.070796460177,
"grad_norm": 0.10083619505167007,
"learning_rate": 9.944763865250248e-05,
"loss": 0.0053,
"step": 9500
},
{
"epoch": 84.15929203539822,
"grad_norm": 0.0939771980047226,
"learning_rate": 9.944518499487254e-05,
"loss": 0.0065,
"step": 9510
},
{
"epoch": 84.24778761061947,
"grad_norm": 0.09466631710529327,
"learning_rate": 9.944272592999151e-05,
"loss": 0.006,
"step": 9520
},
{
"epoch": 84.33628318584071,
"grad_norm": 0.10912278294563293,
"learning_rate": 9.94402614581283e-05,
"loss": 0.0068,
"step": 9530
},
{
"epoch": 84.42477876106194,
"grad_norm": 0.09015228599309921,
"learning_rate": 9.943779157955244e-05,
"loss": 0.0055,
"step": 9540
},
{
"epoch": 84.51327433628319,
"grad_norm": 0.08392563462257385,
"learning_rate": 9.943531629453403e-05,
"loss": 0.0055,
"step": 9550
},
{
"epoch": 84.60176991150442,
"grad_norm": 0.09547723829746246,
"learning_rate": 9.943283560334375e-05,
"loss": 0.0056,
"step": 9560
},
{
"epoch": 84.69026548672566,
"grad_norm": 0.10921365767717361,
"learning_rate": 9.943034950625288e-05,
"loss": 0.0051,
"step": 9570
},
{
"epoch": 84.77876106194691,
"grad_norm": 0.09915768355131149,
"learning_rate": 9.942785800353332e-05,
"loss": 0.0063,
"step": 9580
},
{
"epoch": 84.86725663716814,
"grad_norm": 0.127979576587677,
"learning_rate": 9.942536109545751e-05,
"loss": 0.0055,
"step": 9590
},
{
"epoch": 84.95575221238938,
"grad_norm": 0.12383809685707092,
"learning_rate": 9.942285878229853e-05,
"loss": 0.0054,
"step": 9600
},
{
"epoch": 85.04424778761062,
"grad_norm": 0.0929873138666153,
"learning_rate": 9.942035106433001e-05,
"loss": 0.0054,
"step": 9610
},
{
"epoch": 85.13274336283186,
"grad_norm": 0.10625676810741425,
"learning_rate": 9.94178379418262e-05,
"loss": 0.0053,
"step": 9620
},
{
"epoch": 85.22123893805309,
"grad_norm": 0.07674217224121094,
"learning_rate": 9.941531941506194e-05,
"loss": 0.0055,
"step": 9630
},
{
"epoch": 85.30973451327434,
"grad_norm": 0.1119031086564064,
"learning_rate": 9.941279548431263e-05,
"loss": 0.0062,
"step": 9640
},
{
"epoch": 85.39823008849558,
"grad_norm": 0.07825129479169846,
"learning_rate": 9.941026614985431e-05,
"loss": 0.0055,
"step": 9650
},
{
"epoch": 85.48672566371681,
"grad_norm": 0.09553010016679764,
"learning_rate": 9.940773141196357e-05,
"loss": 0.0055,
"step": 9660
},
{
"epoch": 85.57522123893806,
"grad_norm": 0.08578657358884811,
"learning_rate": 9.94051912709176e-05,
"loss": 0.0059,
"step": 9670
},
{
"epoch": 85.66371681415929,
"grad_norm": 0.13160476088523865,
"learning_rate": 9.940264572699421e-05,
"loss": 0.0055,
"step": 9680
},
{
"epoch": 85.75221238938053,
"grad_norm": 0.09317557513713837,
"learning_rate": 9.940009478047174e-05,
"loss": 0.0058,
"step": 9690
},
{
"epoch": 85.84070796460178,
"grad_norm": 0.10249894112348557,
"learning_rate": 9.939753843162918e-05,
"loss": 0.0052,
"step": 9700
},
{
"epoch": 85.929203539823,
"grad_norm": 0.12268459796905518,
"learning_rate": 9.939497668074609e-05,
"loss": 0.0063,
"step": 9710
},
{
"epoch": 86.01769911504425,
"grad_norm": 0.10745935142040253,
"learning_rate": 9.93924095281026e-05,
"loss": 0.006,
"step": 9720
},
{
"epoch": 86.10619469026548,
"grad_norm": 0.13324441015720367,
"learning_rate": 9.938983697397948e-05,
"loss": 0.0054,
"step": 9730
},
{
"epoch": 86.19469026548673,
"grad_norm": 0.12653110921382904,
"learning_rate": 9.938725901865805e-05,
"loss": 0.0057,
"step": 9740
},
{
"epoch": 86.28318584070796,
"grad_norm": 0.1267603188753128,
"learning_rate": 9.93846756624202e-05,
"loss": 0.0053,
"step": 9750
},
{
"epoch": 86.3716814159292,
"grad_norm": 0.1272428333759308,
"learning_rate": 9.938208690554849e-05,
"loss": 0.0062,
"step": 9760
},
{
"epoch": 86.46017699115045,
"grad_norm": 0.11038095504045486,
"learning_rate": 9.9379492748326e-05,
"loss": 0.006,
"step": 9770
},
{
"epoch": 86.54867256637168,
"grad_norm": 0.12999387085437775,
"learning_rate": 9.937689319103641e-05,
"loss": 0.006,
"step": 9780
},
{
"epoch": 86.63716814159292,
"grad_norm": 0.12017898261547089,
"learning_rate": 9.937428823396404e-05,
"loss": 0.0061,
"step": 9790
},
{
"epoch": 86.72566371681415,
"grad_norm": 0.09171116352081299,
"learning_rate": 9.937167787739372e-05,
"loss": 0.006,
"step": 9800
},
{
"epoch": 86.8141592920354,
"grad_norm": 0.12420973926782608,
"learning_rate": 9.936906212161095e-05,
"loss": 0.0053,
"step": 9810
},
{
"epoch": 86.90265486725664,
"grad_norm": 0.1406574696302414,
"learning_rate": 9.936644096690176e-05,
"loss": 0.0053,
"step": 9820
},
{
"epoch": 86.99115044247787,
"grad_norm": 0.1257728487253189,
"learning_rate": 9.936381441355282e-05,
"loss": 0.0056,
"step": 9830
},
{
"epoch": 87.07964601769912,
"grad_norm": 0.09478408843278885,
"learning_rate": 9.936118246185136e-05,
"loss": 0.0057,
"step": 9840
},
{
"epoch": 87.16814159292035,
"grad_norm": 0.1045784205198288,
"learning_rate": 9.935854511208518e-05,
"loss": 0.0055,
"step": 9850
},
{
"epoch": 87.2566371681416,
"grad_norm": 0.1083742156624794,
"learning_rate": 9.935590236454272e-05,
"loss": 0.0056,
"step": 9860
},
{
"epoch": 87.34513274336283,
"grad_norm": 0.12038854509592056,
"learning_rate": 9.935325421951298e-05,
"loss": 0.0058,
"step": 9870
},
{
"epoch": 87.43362831858407,
"grad_norm": 0.127515971660614,
"learning_rate": 9.935060067728557e-05,
"loss": 0.0053,
"step": 9880
},
{
"epoch": 87.52212389380531,
"grad_norm": 0.10194288939237595,
"learning_rate": 9.934794173815067e-05,
"loss": 0.0056,
"step": 9890
},
{
"epoch": 87.61061946902655,
"grad_norm": 0.11484695971012115,
"learning_rate": 9.934527740239906e-05,
"loss": 0.0053,
"step": 9900
},
{
"epoch": 87.69911504424779,
"grad_norm": 0.13571931421756744,
"learning_rate": 9.934260767032209e-05,
"loss": 0.0054,
"step": 9910
},
{
"epoch": 87.78761061946902,
"grad_norm": 0.09630174189805984,
"learning_rate": 9.933993254221172e-05,
"loss": 0.005,
"step": 9920
},
{
"epoch": 87.87610619469027,
"grad_norm": 0.11174452304840088,
"learning_rate": 9.933725201836053e-05,
"loss": 0.0061,
"step": 9930
},
{
"epoch": 87.96460176991151,
"grad_norm": 0.12019488960504532,
"learning_rate": 9.933456609906162e-05,
"loss": 0.0058,
"step": 9940
},
{
"epoch": 88.05309734513274,
"grad_norm": 0.09590188413858414,
"learning_rate": 9.933187478460875e-05,
"loss": 0.0055,
"step": 9950
},
{
"epoch": 88.14159292035399,
"grad_norm": 0.13018690049648285,
"learning_rate": 9.93291780752962e-05,
"loss": 0.0062,
"step": 9960
},
{
"epoch": 88.23008849557522,
"grad_norm": 0.12045933306217194,
"learning_rate": 9.932647597141893e-05,
"loss": 0.006,
"step": 9970
},
{
"epoch": 88.31858407079646,
"grad_norm": 0.09797903150320053,
"learning_rate": 9.932376847327239e-05,
"loss": 0.0058,
"step": 9980
},
{
"epoch": 88.40707964601769,
"grad_norm": 0.09055085480213165,
"learning_rate": 9.932105558115268e-05,
"loss": 0.0056,
"step": 9990
},
{
"epoch": 88.49557522123894,
"grad_norm": 0.07018600404262543,
"learning_rate": 9.931833729535651e-05,
"loss": 0.0064,
"step": 10000
}
],
"logging_steps": 10,
"max_steps": 100000,
"num_input_tokens_seen": 0,
"num_train_epochs": 885,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 512,
"trial_name": null,
"trial_params": null
}