SD1_100K_A_Inv / trainer_state.json
AusmitM's picture
Upload 31 files
8804a90 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"eval_steps": 20000,
"global_step": 309400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03232062055591468,
"grad_norm": 113.44369506835938,
"learning_rate": 9.900000000000002e-06,
"loss": 5.1994,
"step": 100
},
{
"epoch": 0.06464124111182935,
"grad_norm": 33.283287048339844,
"learning_rate": 1.9900000000000003e-05,
"loss": 4.5771,
"step": 200
},
{
"epoch": 0.09696186166774402,
"grad_norm": 26.25897789001465,
"learning_rate": 2.9900000000000002e-05,
"loss": 4.1609,
"step": 300
},
{
"epoch": 0.1292824822236587,
"grad_norm": 11.137129783630371,
"learning_rate": 3.99e-05,
"loss": 4.0668,
"step": 400
},
{
"epoch": 0.16160310277957338,
"grad_norm": 14.905611991882324,
"learning_rate": 4.99e-05,
"loss": 4.0021,
"step": 500
},
{
"epoch": 0.19392372333548805,
"grad_norm": 2.45811128616333,
"learning_rate": 5.9900000000000006e-05,
"loss": 3.9788,
"step": 600
},
{
"epoch": 0.22624434389140272,
"grad_norm": 3.4350666999816895,
"learning_rate": 6.99e-05,
"loss": 3.9595,
"step": 700
},
{
"epoch": 0.2585649644473174,
"grad_norm": 21.33148956298828,
"learning_rate": 7.99e-05,
"loss": 3.9243,
"step": 800
},
{
"epoch": 0.2908855850032321,
"grad_norm": 2.3939037322998047,
"learning_rate": 8.989999999999999e-05,
"loss": 3.8641,
"step": 900
},
{
"epoch": 0.32320620555914675,
"grad_norm": 2.719916343688965,
"learning_rate": 9.99e-05,
"loss": 3.8605,
"step": 1000
},
{
"epoch": 0.3555268261150614,
"grad_norm": 2.285353899002075,
"learning_rate": 0.0001099,
"loss": 3.8334,
"step": 1100
},
{
"epoch": 0.3878474466709761,
"grad_norm": 2.2743117809295654,
"learning_rate": 0.00011990000000000001,
"loss": 3.8158,
"step": 1200
},
{
"epoch": 0.42016806722689076,
"grad_norm": 2.1339423656463623,
"learning_rate": 0.00012989999999999999,
"loss": 3.7871,
"step": 1300
},
{
"epoch": 0.45248868778280543,
"grad_norm": 2.2670485973358154,
"learning_rate": 0.0001399,
"loss": 3.779,
"step": 1400
},
{
"epoch": 0.4848093083387201,
"grad_norm": 2.1925437450408936,
"learning_rate": 0.0001499,
"loss": 3.7351,
"step": 1500
},
{
"epoch": 0.5171299288946348,
"grad_norm": 2.1954660415649414,
"learning_rate": 0.00015989999999999998,
"loss": 3.7266,
"step": 1600
},
{
"epoch": 0.5494505494505495,
"grad_norm": 3.2180988788604736,
"learning_rate": 0.0001699,
"loss": 3.7417,
"step": 1700
},
{
"epoch": 0.5817711700064642,
"grad_norm": 2.362977981567383,
"learning_rate": 0.0001799,
"loss": 3.7068,
"step": 1800
},
{
"epoch": 0.6140917905623788,
"grad_norm": 2.1778812408447266,
"learning_rate": 0.0001899,
"loss": 3.7141,
"step": 1900
},
{
"epoch": 0.6464124111182935,
"grad_norm": 2.239959716796875,
"learning_rate": 0.0001999,
"loss": 3.6816,
"step": 2000
},
{
"epoch": 0.6787330316742082,
"grad_norm": 2.3324010372161865,
"learning_rate": 0.0002099,
"loss": 3.6757,
"step": 2100
},
{
"epoch": 0.7110536522301228,
"grad_norm": 2.222890853881836,
"learning_rate": 0.0002199,
"loss": 3.6602,
"step": 2200
},
{
"epoch": 0.7433742727860375,
"grad_norm": 2.055497407913208,
"learning_rate": 0.0002299,
"loss": 3.657,
"step": 2300
},
{
"epoch": 0.7756948933419522,
"grad_norm": 2.1699202060699463,
"learning_rate": 0.0002399,
"loss": 3.6371,
"step": 2400
},
{
"epoch": 0.8080155138978669,
"grad_norm": 2.2787368297576904,
"learning_rate": 0.0002499,
"loss": 3.6215,
"step": 2500
},
{
"epoch": 0.8403361344537815,
"grad_norm": 2.3619353771209717,
"learning_rate": 0.00025990000000000003,
"loss": 3.6471,
"step": 2600
},
{
"epoch": 0.8726567550096962,
"grad_norm": 2.054823160171509,
"learning_rate": 0.0002699,
"loss": 3.5954,
"step": 2700
},
{
"epoch": 0.9049773755656109,
"grad_norm": 2.221311092376709,
"learning_rate": 0.0002799,
"loss": 3.5998,
"step": 2800
},
{
"epoch": 0.9372979961215255,
"grad_norm": 2.4884724617004395,
"learning_rate": 0.0002899,
"loss": 3.5862,
"step": 2900
},
{
"epoch": 0.9696186166774402,
"grad_norm": 2.2477798461914062,
"learning_rate": 0.0002999,
"loss": 3.5723,
"step": 3000
},
{
"epoch": 1.0019392372333549,
"grad_norm": 2.155560255050659,
"learning_rate": 0.0003099,
"loss": 3.5919,
"step": 3100
},
{
"epoch": 1.0342598577892697,
"grad_norm": 1.4968713521957397,
"learning_rate": 0.0003199,
"loss": 3.5275,
"step": 3200
},
{
"epoch": 1.0665804783451842,
"grad_norm": 1.2746202945709229,
"learning_rate": 0.00032990000000000005,
"loss": 3.5395,
"step": 3300
},
{
"epoch": 1.098901098901099,
"grad_norm": 1.3098007440567017,
"learning_rate": 0.00033989999999999997,
"loss": 3.5354,
"step": 3400
},
{
"epoch": 1.1312217194570136,
"grad_norm": 1.745730996131897,
"learning_rate": 0.0003499,
"loss": 3.5099,
"step": 3500
},
{
"epoch": 1.1635423400129283,
"grad_norm": 1.8114116191864014,
"learning_rate": 0.0003599,
"loss": 3.5412,
"step": 3600
},
{
"epoch": 1.195862960568843,
"grad_norm": 2.3928165435791016,
"learning_rate": 0.0003699,
"loss": 3.5332,
"step": 3700
},
{
"epoch": 1.2281835811247577,
"grad_norm": 1.126379370689392,
"learning_rate": 0.0003799,
"loss": 3.5205,
"step": 3800
},
{
"epoch": 1.2605042016806722,
"grad_norm": 1.7738536596298218,
"learning_rate": 0.00038990000000000004,
"loss": 3.5306,
"step": 3900
},
{
"epoch": 1.292824822236587,
"grad_norm": 2.3915224075317383,
"learning_rate": 0.00039989999999999996,
"loss": 3.5113,
"step": 4000
},
{
"epoch": 1.3251454427925016,
"grad_norm": 1.2352187633514404,
"learning_rate": 0.0004099,
"loss": 3.4997,
"step": 4100
},
{
"epoch": 1.3574660633484164,
"grad_norm": 1.6244854927062988,
"learning_rate": 0.0004199,
"loss": 3.5079,
"step": 4200
},
{
"epoch": 1.389786683904331,
"grad_norm": 1.1132093667984009,
"learning_rate": 0.0004299,
"loss": 3.5052,
"step": 4300
},
{
"epoch": 1.4221073044602457,
"grad_norm": 1.9960055351257324,
"learning_rate": 0.0004399,
"loss": 3.5056,
"step": 4400
},
{
"epoch": 1.4544279250161603,
"grad_norm": 1.619606852531433,
"learning_rate": 0.00044990000000000004,
"loss": 3.4965,
"step": 4500
},
{
"epoch": 1.486748545572075,
"grad_norm": 1.8198740482330322,
"learning_rate": 0.0004599,
"loss": 3.4824,
"step": 4600
},
{
"epoch": 1.5190691661279896,
"grad_norm": 2.4110352993011475,
"learning_rate": 0.0004699,
"loss": 3.4957,
"step": 4700
},
{
"epoch": 1.5513897866839044,
"grad_norm": 1.0220048427581787,
"learning_rate": 0.0004799,
"loss": 3.4719,
"step": 4800
},
{
"epoch": 1.5837104072398192,
"grad_norm": 1.1443417072296143,
"learning_rate": 0.0004899,
"loss": 3.4902,
"step": 4900
},
{
"epoch": 1.6160310277957337,
"grad_norm": 1.0778316259384155,
"learning_rate": 0.0004999000000000001,
"loss": 3.4851,
"step": 5000
},
{
"epoch": 1.6483516483516483,
"grad_norm": 1.5312631130218506,
"learning_rate": 0.0005099,
"loss": 3.4704,
"step": 5100
},
{
"epoch": 1.680672268907563,
"grad_norm": 1.2108691930770874,
"learning_rate": 0.0005199,
"loss": 3.4674,
"step": 5200
},
{
"epoch": 1.7129928894634778,
"grad_norm": 1.3876620531082153,
"learning_rate": 0.0005299,
"loss": 3.4695,
"step": 5300
},
{
"epoch": 1.7453135100193924,
"grad_norm": 1.118224024772644,
"learning_rate": 0.0005399000000000001,
"loss": 3.4654,
"step": 5400
},
{
"epoch": 1.777634130575307,
"grad_norm": 1.0403189659118652,
"learning_rate": 0.0005499000000000001,
"loss": 3.4498,
"step": 5500
},
{
"epoch": 1.8099547511312217,
"grad_norm": 1.0136079788208008,
"learning_rate": 0.0005599,
"loss": 3.4364,
"step": 5600
},
{
"epoch": 1.8422753716871365,
"grad_norm": 1.3502916097640991,
"learning_rate": 0.0005698999999999999,
"loss": 3.4396,
"step": 5700
},
{
"epoch": 1.874595992243051,
"grad_norm": 1.0874897241592407,
"learning_rate": 0.0005799,
"loss": 3.4767,
"step": 5800
},
{
"epoch": 1.9069166127989656,
"grad_norm": 1.2160725593566895,
"learning_rate": 0.0005899,
"loss": 3.4555,
"step": 5900
},
{
"epoch": 1.9392372333548804,
"grad_norm": 0.9763041734695435,
"learning_rate": 0.0005999,
"loss": 3.4453,
"step": 6000
},
{
"epoch": 1.9715578539107952,
"grad_norm": 1.3038817644119263,
"learning_rate": 0.0006099,
"loss": 3.4526,
"step": 6100
},
{
"epoch": 2.0038784744667097,
"grad_norm": 1.0602381229400635,
"learning_rate": 0.0006199,
"loss": 3.4462,
"step": 6200
},
{
"epoch": 2.0361990950226243,
"grad_norm": 0.7091767191886902,
"learning_rate": 0.0006299000000000001,
"loss": 3.3692,
"step": 6300
},
{
"epoch": 2.0685197155785393,
"grad_norm": 0.9198008179664612,
"learning_rate": 0.0006399,
"loss": 3.3699,
"step": 6400
},
{
"epoch": 2.100840336134454,
"grad_norm": 0.7512125372886658,
"learning_rate": 0.0006499,
"loss": 3.3667,
"step": 6500
},
{
"epoch": 2.1331609566903684,
"grad_norm": 1.0284326076507568,
"learning_rate": 0.0006599,
"loss": 3.3685,
"step": 6600
},
{
"epoch": 2.165481577246283,
"grad_norm": 1.1990816593170166,
"learning_rate": 0.0006699000000000001,
"loss": 3.3774,
"step": 6700
},
{
"epoch": 2.197802197802198,
"grad_norm": 0.9412187933921814,
"learning_rate": 0.0006799,
"loss": 3.3603,
"step": 6800
},
{
"epoch": 2.2301228183581125,
"grad_norm": 1.1793667078018188,
"learning_rate": 0.0006899,
"loss": 3.3585,
"step": 6900
},
{
"epoch": 2.262443438914027,
"grad_norm": 1.0098427534103394,
"learning_rate": 0.0006998999999999999,
"loss": 3.3701,
"step": 7000
},
{
"epoch": 2.2947640594699417,
"grad_norm": 1.2947179079055786,
"learning_rate": 0.0007099,
"loss": 3.3552,
"step": 7100
},
{
"epoch": 2.3270846800258567,
"grad_norm": 1.0354315042495728,
"learning_rate": 0.0007199,
"loss": 3.3738,
"step": 7200
},
{
"epoch": 2.3594053005817712,
"grad_norm": 1.6114907264709473,
"learning_rate": 0.0007299,
"loss": 3.3601,
"step": 7300
},
{
"epoch": 2.391725921137686,
"grad_norm": 1.2489641904830933,
"learning_rate": 0.0007399,
"loss": 3.3666,
"step": 7400
},
{
"epoch": 2.4240465416936003,
"grad_norm": 1.2799603939056396,
"learning_rate": 0.0007499000000000001,
"loss": 3.3767,
"step": 7500
},
{
"epoch": 2.4563671622495153,
"grad_norm": 0.9481520652770996,
"learning_rate": 0.0007599,
"loss": 3.3799,
"step": 7600
},
{
"epoch": 2.48868778280543,
"grad_norm": 1.095807671546936,
"learning_rate": 0.0007699,
"loss": 3.3691,
"step": 7700
},
{
"epoch": 2.5210084033613445,
"grad_norm": 0.9669839143753052,
"learning_rate": 0.0007799,
"loss": 3.3424,
"step": 7800
},
{
"epoch": 2.553329023917259,
"grad_norm": 0.819404125213623,
"learning_rate": 0.0007899000000000001,
"loss": 3.3339,
"step": 7900
},
{
"epoch": 2.585649644473174,
"grad_norm": 0.797839879989624,
"learning_rate": 0.0007999000000000001,
"loss": 3.3667,
"step": 8000
},
{
"epoch": 2.6179702650290886,
"grad_norm": 1.1646702289581299,
"learning_rate": 0.0008099,
"loss": 3.37,
"step": 8100
},
{
"epoch": 2.650290885585003,
"grad_norm": 1.1598727703094482,
"learning_rate": 0.0008198999999999999,
"loss": 3.3375,
"step": 8200
},
{
"epoch": 2.682611506140918,
"grad_norm": 1.25111722946167,
"learning_rate": 0.0008299,
"loss": 3.3584,
"step": 8300
},
{
"epoch": 2.7149321266968327,
"grad_norm": 1.2236987352371216,
"learning_rate": 0.0008399,
"loss": 3.3714,
"step": 8400
},
{
"epoch": 2.7472527472527473,
"grad_norm": 0.9083935022354126,
"learning_rate": 0.0008499,
"loss": 3.3567,
"step": 8500
},
{
"epoch": 2.779573367808662,
"grad_norm": 1.0694997310638428,
"learning_rate": 0.0008599,
"loss": 3.3439,
"step": 8600
},
{
"epoch": 2.8118939883645764,
"grad_norm": 1.1277011632919312,
"learning_rate": 0.0008699000000000001,
"loss": 3.3346,
"step": 8700
},
{
"epoch": 2.8442146089204914,
"grad_norm": 1.3444178104400635,
"learning_rate": 0.0008799000000000001,
"loss": 3.3451,
"step": 8800
},
{
"epoch": 2.876535229476406,
"grad_norm": 0.9961609244346619,
"learning_rate": 0.0008899,
"loss": 3.3484,
"step": 8900
},
{
"epoch": 2.9088558500323205,
"grad_norm": 0.9413596391677856,
"learning_rate": 0.0008999,
"loss": 3.3289,
"step": 9000
},
{
"epoch": 2.9411764705882355,
"grad_norm": 1.064833164215088,
"learning_rate": 0.0009099,
"loss": 3.3546,
"step": 9100
},
{
"epoch": 2.97349709114415,
"grad_norm": 0.9576635360717773,
"learning_rate": 0.0009199000000000001,
"loss": 3.3515,
"step": 9200
},
{
"epoch": 3.0058177117000646,
"grad_norm": 0.8599377274513245,
"learning_rate": 0.0009299,
"loss": 3.3331,
"step": 9300
},
{
"epoch": 3.038138332255979,
"grad_norm": 0.8528821468353271,
"learning_rate": 0.0009399,
"loss": 3.2379,
"step": 9400
},
{
"epoch": 3.070458952811894,
"grad_norm": 1.010067105293274,
"learning_rate": 0.0009498999999999999,
"loss": 3.2548,
"step": 9500
},
{
"epoch": 3.1027795733678087,
"grad_norm": 1.0422749519348145,
"learning_rate": 0.0009599,
"loss": 3.2458,
"step": 9600
},
{
"epoch": 3.1351001939237233,
"grad_norm": 1.1780894994735718,
"learning_rate": 0.0009699,
"loss": 3.2479,
"step": 9700
},
{
"epoch": 3.167420814479638,
"grad_norm": 1.0699479579925537,
"learning_rate": 0.0009799,
"loss": 3.2466,
"step": 9800
},
{
"epoch": 3.199741435035553,
"grad_norm": 0.7220283150672913,
"learning_rate": 0.0009899,
"loss": 3.2282,
"step": 9900
},
{
"epoch": 3.2320620555914674,
"grad_norm": 0.9945827126502991,
"learning_rate": 0.0009999,
"loss": 3.2569,
"step": 10000
},
{
"epoch": 3.264382676147382,
"grad_norm": 0.9884164929389954,
"learning_rate": 0.001,
"loss": 3.2202,
"step": 10100
},
{
"epoch": 3.2967032967032965,
"grad_norm": 0.9047076106071472,
"learning_rate": 0.001,
"loss": 3.2181,
"step": 10200
},
{
"epoch": 3.3290239172592115,
"grad_norm": 1.3361272811889648,
"learning_rate": 0.001,
"loss": 3.2336,
"step": 10300
},
{
"epoch": 3.361344537815126,
"grad_norm": 1.0945632457733154,
"learning_rate": 0.001,
"loss": 3.2355,
"step": 10400
},
{
"epoch": 3.3936651583710407,
"grad_norm": 1.010493278503418,
"learning_rate": 0.001,
"loss": 3.2467,
"step": 10500
},
{
"epoch": 3.425985778926955,
"grad_norm": 0.941750705242157,
"learning_rate": 0.001,
"loss": 3.2268,
"step": 10600
},
{
"epoch": 3.45830639948287,
"grad_norm": 0.6709722876548767,
"learning_rate": 0.001,
"loss": 3.2282,
"step": 10700
},
{
"epoch": 3.490627020038785,
"grad_norm": 0.8207814693450928,
"learning_rate": 0.001,
"loss": 3.2243,
"step": 10800
},
{
"epoch": 3.5229476405946993,
"grad_norm": 0.9374675154685974,
"learning_rate": 0.001,
"loss": 3.2398,
"step": 10900
},
{
"epoch": 3.555268261150614,
"grad_norm": 1.0046817064285278,
"learning_rate": 0.001,
"loss": 3.2387,
"step": 11000
},
{
"epoch": 3.587588881706529,
"grad_norm": 1.0659905672073364,
"learning_rate": 0.001,
"loss": 3.2373,
"step": 11100
},
{
"epoch": 3.6199095022624435,
"grad_norm": 0.8840706944465637,
"learning_rate": 0.001,
"loss": 3.2419,
"step": 11200
},
{
"epoch": 3.652230122818358,
"grad_norm": 0.8998158574104309,
"learning_rate": 0.001,
"loss": 3.2298,
"step": 11300
},
{
"epoch": 3.684550743374273,
"grad_norm": 0.7526170015335083,
"learning_rate": 0.001,
"loss": 3.2266,
"step": 11400
},
{
"epoch": 3.7168713639301876,
"grad_norm": 1.02177095413208,
"learning_rate": 0.001,
"loss": 3.2299,
"step": 11500
},
{
"epoch": 3.749191984486102,
"grad_norm": 1.0600848197937012,
"learning_rate": 0.001,
"loss": 3.2301,
"step": 11600
},
{
"epoch": 3.7815126050420167,
"grad_norm": 1.2486552000045776,
"learning_rate": 0.001,
"loss": 3.2267,
"step": 11700
},
{
"epoch": 3.8138332255979313,
"grad_norm": 1.2336997985839844,
"learning_rate": 0.001,
"loss": 3.2322,
"step": 11800
},
{
"epoch": 3.8461538461538463,
"grad_norm": 1.1906139850616455,
"learning_rate": 0.001,
"loss": 3.2321,
"step": 11900
},
{
"epoch": 3.878474466709761,
"grad_norm": 0.8188307881355286,
"learning_rate": 0.001,
"loss": 3.2081,
"step": 12000
},
{
"epoch": 3.9107950872656754,
"grad_norm": 0.9523578882217407,
"learning_rate": 0.001,
"loss": 3.2217,
"step": 12100
},
{
"epoch": 3.9431157078215904,
"grad_norm": 1.0636179447174072,
"learning_rate": 0.001,
"loss": 3.2072,
"step": 12200
},
{
"epoch": 3.975436328377505,
"grad_norm": 1.092067003250122,
"learning_rate": 0.001,
"loss": 3.2053,
"step": 12300
},
{
"epoch": 4.0077569489334195,
"grad_norm": 0.7680415511131287,
"learning_rate": 0.001,
"loss": 3.2015,
"step": 12400
},
{
"epoch": 4.040077569489334,
"grad_norm": 1.0675336122512817,
"learning_rate": 0.001,
"loss": 3.1144,
"step": 12500
},
{
"epoch": 4.072398190045249,
"grad_norm": 1.0144174098968506,
"learning_rate": 0.001,
"loss": 3.1165,
"step": 12600
},
{
"epoch": 4.104718810601163,
"grad_norm": 1.1183968782424927,
"learning_rate": 0.001,
"loss": 3.131,
"step": 12700
},
{
"epoch": 4.137039431157079,
"grad_norm": 0.8900427222251892,
"learning_rate": 0.001,
"loss": 3.1315,
"step": 12800
},
{
"epoch": 4.169360051712993,
"grad_norm": 0.9315294027328491,
"learning_rate": 0.001,
"loss": 3.1344,
"step": 12900
},
{
"epoch": 4.201680672268908,
"grad_norm": 1.214819312095642,
"learning_rate": 0.001,
"loss": 3.132,
"step": 13000
},
{
"epoch": 4.234001292824822,
"grad_norm": 0.7954821586608887,
"learning_rate": 0.001,
"loss": 3.1417,
"step": 13100
},
{
"epoch": 4.266321913380737,
"grad_norm": 0.927521824836731,
"learning_rate": 0.001,
"loss": 3.1207,
"step": 13200
},
{
"epoch": 4.298642533936651,
"grad_norm": 0.9967598915100098,
"learning_rate": 0.001,
"loss": 3.1457,
"step": 13300
},
{
"epoch": 4.330963154492566,
"grad_norm": 0.8189947009086609,
"learning_rate": 0.001,
"loss": 3.1434,
"step": 13400
},
{
"epoch": 4.3632837750484805,
"grad_norm": 0.8321822285652161,
"learning_rate": 0.001,
"loss": 3.1667,
"step": 13500
},
{
"epoch": 4.395604395604396,
"grad_norm": 0.9342663884162903,
"learning_rate": 0.001,
"loss": 3.1384,
"step": 13600
},
{
"epoch": 4.4279250161603105,
"grad_norm": 1.075761079788208,
"learning_rate": 0.001,
"loss": 3.1525,
"step": 13700
},
{
"epoch": 4.460245636716225,
"grad_norm": 0.8055844902992249,
"learning_rate": 0.001,
"loss": 3.1573,
"step": 13800
},
{
"epoch": 4.49256625727214,
"grad_norm": 1.074677586555481,
"learning_rate": 0.001,
"loss": 3.1582,
"step": 13900
},
{
"epoch": 4.524886877828054,
"grad_norm": 0.9087541699409485,
"learning_rate": 0.001,
"loss": 3.1483,
"step": 14000
},
{
"epoch": 4.557207498383969,
"grad_norm": 0.9732751846313477,
"learning_rate": 0.001,
"loss": 3.1317,
"step": 14100
},
{
"epoch": 4.589528118939883,
"grad_norm": 1.3061445951461792,
"learning_rate": 0.001,
"loss": 3.1436,
"step": 14200
},
{
"epoch": 4.621848739495798,
"grad_norm": 1.0117545127868652,
"learning_rate": 0.001,
"loss": 3.1514,
"step": 14300
},
{
"epoch": 4.654169360051713,
"grad_norm": 0.9364200830459595,
"learning_rate": 0.001,
"loss": 3.1453,
"step": 14400
},
{
"epoch": 4.686489980607628,
"grad_norm": 0.9485110640525818,
"learning_rate": 0.001,
"loss": 3.1505,
"step": 14500
},
{
"epoch": 4.7188106011635425,
"grad_norm": 0.7017455101013184,
"learning_rate": 0.001,
"loss": 3.1556,
"step": 14600
},
{
"epoch": 4.751131221719457,
"grad_norm": 0.7182191610336304,
"learning_rate": 0.001,
"loss": 3.1469,
"step": 14700
},
{
"epoch": 4.783451842275372,
"grad_norm": 0.871917724609375,
"learning_rate": 0.001,
"loss": 3.1624,
"step": 14800
},
{
"epoch": 4.815772462831286,
"grad_norm": 0.9741417169570923,
"learning_rate": 0.001,
"loss": 3.151,
"step": 14900
},
{
"epoch": 4.848093083387201,
"grad_norm": 0.966870903968811,
"learning_rate": 0.001,
"loss": 3.1085,
"step": 15000
},
{
"epoch": 4.880413703943116,
"grad_norm": 0.8608819842338562,
"learning_rate": 0.001,
"loss": 3.1423,
"step": 15100
},
{
"epoch": 4.912734324499031,
"grad_norm": 0.8575353622436523,
"learning_rate": 0.001,
"loss": 3.1509,
"step": 15200
},
{
"epoch": 4.945054945054945,
"grad_norm": 0.7007643580436707,
"learning_rate": 0.001,
"loss": 3.1507,
"step": 15300
},
{
"epoch": 4.97737556561086,
"grad_norm": 0.8379340767860413,
"learning_rate": 0.001,
"loss": 3.1407,
"step": 15400
},
{
"epoch": 5.009696186166774,
"grad_norm": 0.9032299518585205,
"learning_rate": 0.001,
"loss": 3.1197,
"step": 15500
},
{
"epoch": 5.042016806722689,
"grad_norm": 0.922421932220459,
"learning_rate": 0.001,
"loss": 3.0103,
"step": 15600
},
{
"epoch": 5.0743374272786035,
"grad_norm": 0.8886153697967529,
"learning_rate": 0.001,
"loss": 3.0239,
"step": 15700
},
{
"epoch": 5.106658047834518,
"grad_norm": 1.210507869720459,
"learning_rate": 0.001,
"loss": 3.0331,
"step": 15800
},
{
"epoch": 5.1389786683904335,
"grad_norm": 0.9552029371261597,
"learning_rate": 0.001,
"loss": 3.0191,
"step": 15900
},
{
"epoch": 5.171299288946348,
"grad_norm": 0.9923421740531921,
"learning_rate": 0.001,
"loss": 3.0519,
"step": 16000
},
{
"epoch": 5.203619909502263,
"grad_norm": 0.646263062953949,
"learning_rate": 0.001,
"loss": 3.0327,
"step": 16100
},
{
"epoch": 5.235940530058177,
"grad_norm": 0.9746344685554504,
"learning_rate": 0.001,
"loss": 3.0388,
"step": 16200
},
{
"epoch": 5.268261150614092,
"grad_norm": 0.894690215587616,
"learning_rate": 0.001,
"loss": 3.0451,
"step": 16300
},
{
"epoch": 5.300581771170006,
"grad_norm": 1.1923508644104004,
"learning_rate": 0.001,
"loss": 3.039,
"step": 16400
},
{
"epoch": 5.332902391725921,
"grad_norm": 0.9015272259712219,
"learning_rate": 0.001,
"loss": 3.0746,
"step": 16500
},
{
"epoch": 5.365223012281835,
"grad_norm": 1.0535194873809814,
"learning_rate": 0.001,
"loss": 3.0663,
"step": 16600
},
{
"epoch": 5.397543632837751,
"grad_norm": 0.8790969848632812,
"learning_rate": 0.001,
"loss": 3.0455,
"step": 16700
},
{
"epoch": 5.429864253393665,
"grad_norm": 0.8354184627532959,
"learning_rate": 0.001,
"loss": 3.0653,
"step": 16800
},
{
"epoch": 5.46218487394958,
"grad_norm": 1.1250206232070923,
"learning_rate": 0.001,
"loss": 3.0639,
"step": 16900
},
{
"epoch": 5.4945054945054945,
"grad_norm": 0.9991565942764282,
"learning_rate": 0.001,
"loss": 3.0435,
"step": 17000
},
{
"epoch": 5.526826115061409,
"grad_norm": 0.757892370223999,
"learning_rate": 0.001,
"loss": 3.0669,
"step": 17100
},
{
"epoch": 5.559146735617324,
"grad_norm": 0.8190027475357056,
"learning_rate": 0.001,
"loss": 3.0609,
"step": 17200
},
{
"epoch": 5.591467356173238,
"grad_norm": 0.9603754878044128,
"learning_rate": 0.001,
"loss": 3.0439,
"step": 17300
},
{
"epoch": 5.623787976729153,
"grad_norm": 0.5893958806991577,
"learning_rate": 0.001,
"loss": 3.0683,
"step": 17400
},
{
"epoch": 5.656108597285068,
"grad_norm": 0.927288830280304,
"learning_rate": 0.001,
"loss": 3.0408,
"step": 17500
},
{
"epoch": 5.688429217840983,
"grad_norm": 0.90427565574646,
"learning_rate": 0.001,
"loss": 3.0687,
"step": 17600
},
{
"epoch": 5.720749838396897,
"grad_norm": 0.8372429013252258,
"learning_rate": 0.001,
"loss": 3.0582,
"step": 17700
},
{
"epoch": 5.753070458952812,
"grad_norm": 0.9206283092498779,
"learning_rate": 0.001,
"loss": 3.0717,
"step": 17800
},
{
"epoch": 5.785391079508726,
"grad_norm": 0.970551609992981,
"learning_rate": 0.001,
"loss": 3.0689,
"step": 17900
},
{
"epoch": 5.817711700064641,
"grad_norm": 0.9441208839416504,
"learning_rate": 0.001,
"loss": 3.0412,
"step": 18000
},
{
"epoch": 5.850032320620556,
"grad_norm": 0.869175910949707,
"learning_rate": 0.001,
"loss": 3.0553,
"step": 18100
},
{
"epoch": 5.882352941176471,
"grad_norm": 0.6702381372451782,
"learning_rate": 0.001,
"loss": 3.0553,
"step": 18200
},
{
"epoch": 5.914673561732386,
"grad_norm": 1.0862089395523071,
"learning_rate": 0.001,
"loss": 3.0741,
"step": 18300
},
{
"epoch": 5.9469941822883,
"grad_norm": 0.9103309512138367,
"learning_rate": 0.001,
"loss": 3.0748,
"step": 18400
},
{
"epoch": 5.979314802844215,
"grad_norm": 1.0982170104980469,
"learning_rate": 0.001,
"loss": 3.054,
"step": 18500
},
{
"epoch": 6.011635423400129,
"grad_norm": 1.0017380714416504,
"learning_rate": 0.001,
"loss": 3.0038,
"step": 18600
},
{
"epoch": 6.043956043956044,
"grad_norm": 1.0425845384597778,
"learning_rate": 0.001,
"loss": 2.9489,
"step": 18700
},
{
"epoch": 6.076276664511958,
"grad_norm": 1.030131220817566,
"learning_rate": 0.001,
"loss": 2.9291,
"step": 18800
},
{
"epoch": 6.108597285067873,
"grad_norm": 1.0179742574691772,
"learning_rate": 0.001,
"loss": 2.9501,
"step": 18900
},
{
"epoch": 6.140917905623788,
"grad_norm": 1.0169600248336792,
"learning_rate": 0.001,
"loss": 2.9339,
"step": 19000
},
{
"epoch": 6.173238526179703,
"grad_norm": 0.7874507308006287,
"learning_rate": 0.001,
"loss": 2.9623,
"step": 19100
},
{
"epoch": 6.2055591467356175,
"grad_norm": 0.7536994218826294,
"learning_rate": 0.001,
"loss": 2.9431,
"step": 19200
},
{
"epoch": 6.237879767291532,
"grad_norm": 1.3374706506729126,
"learning_rate": 0.001,
"loss": 2.9691,
"step": 19300
},
{
"epoch": 6.270200387847447,
"grad_norm": 0.9803719520568848,
"learning_rate": 0.001,
"loss": 2.9555,
"step": 19400
},
{
"epoch": 6.302521008403361,
"grad_norm": 0.898348331451416,
"learning_rate": 0.001,
"loss": 2.9691,
"step": 19500
},
{
"epoch": 6.334841628959276,
"grad_norm": 1.0121514797210693,
"learning_rate": 0.001,
"loss": 2.9758,
"step": 19600
},
{
"epoch": 6.36716224951519,
"grad_norm": 0.8829805850982666,
"learning_rate": 0.001,
"loss": 2.9724,
"step": 19700
},
{
"epoch": 6.399482870071106,
"grad_norm": 1.1278361082077026,
"learning_rate": 0.001,
"loss": 2.9596,
"step": 19800
},
{
"epoch": 6.43180349062702,
"grad_norm": 0.9255377650260925,
"learning_rate": 0.001,
"loss": 2.9845,
"step": 19900
},
{
"epoch": 6.464124111182935,
"grad_norm": 0.867389440536499,
"learning_rate": 0.001,
"loss": 2.9652,
"step": 20000
},
{
"epoch": 6.496444731738849,
"grad_norm": 0.8110164999961853,
"learning_rate": 0.001,
"loss": 2.9839,
"step": 20100
},
{
"epoch": 6.528765352294764,
"grad_norm": 1.071718454360962,
"learning_rate": 0.001,
"loss": 2.9835,
"step": 20200
},
{
"epoch": 6.5610859728506785,
"grad_norm": 1.1645890474319458,
"learning_rate": 0.001,
"loss": 2.9578,
"step": 20300
},
{
"epoch": 6.593406593406593,
"grad_norm": 0.9051645994186401,
"learning_rate": 0.001,
"loss": 2.9964,
"step": 20400
},
{
"epoch": 6.625727213962508,
"grad_norm": 1.0463429689407349,
"learning_rate": 0.001,
"loss": 2.9768,
"step": 20500
},
{
"epoch": 6.658047834518423,
"grad_norm": 1.1838918924331665,
"learning_rate": 0.001,
"loss": 2.9879,
"step": 20600
},
{
"epoch": 6.690368455074338,
"grad_norm": 0.7269249558448792,
"learning_rate": 0.001,
"loss": 2.9872,
"step": 20700
},
{
"epoch": 6.722689075630252,
"grad_norm": 0.8935712575912476,
"learning_rate": 0.001,
"loss": 2.978,
"step": 20800
},
{
"epoch": 6.755009696186167,
"grad_norm": 1.0895768404006958,
"learning_rate": 0.001,
"loss": 2.9891,
"step": 20900
},
{
"epoch": 6.787330316742081,
"grad_norm": 0.7446025609970093,
"learning_rate": 0.001,
"loss": 2.9789,
"step": 21000
},
{
"epoch": 6.819650937297996,
"grad_norm": 1.0650365352630615,
"learning_rate": 0.001,
"loss": 2.9689,
"step": 21100
},
{
"epoch": 6.85197155785391,
"grad_norm": 1.0426945686340332,
"learning_rate": 0.001,
"loss": 3.003,
"step": 21200
},
{
"epoch": 6.884292178409826,
"grad_norm": 0.9406901597976685,
"learning_rate": 0.001,
"loss": 2.9873,
"step": 21300
},
{
"epoch": 6.91661279896574,
"grad_norm": 0.8841379284858704,
"learning_rate": 0.001,
"loss": 2.9859,
"step": 21400
},
{
"epoch": 6.948933419521655,
"grad_norm": 0.9247796535491943,
"learning_rate": 0.001,
"loss": 2.9769,
"step": 21500
},
{
"epoch": 6.98125404007757,
"grad_norm": 0.9202895164489746,
"learning_rate": 0.001,
"loss": 2.9972,
"step": 21600
},
{
"epoch": 7.013574660633484,
"grad_norm": 1.2635228633880615,
"learning_rate": 0.001,
"loss": 2.9374,
"step": 21700
},
{
"epoch": 7.045895281189399,
"grad_norm": 1.269637107849121,
"learning_rate": 0.001,
"loss": 2.8665,
"step": 21800
},
{
"epoch": 7.078215901745313,
"grad_norm": 1.1895501613616943,
"learning_rate": 0.001,
"loss": 2.8506,
"step": 21900
},
{
"epoch": 7.110536522301228,
"grad_norm": 0.8704085350036621,
"learning_rate": 0.001,
"loss": 2.8815,
"step": 22000
},
{
"epoch": 7.142857142857143,
"grad_norm": 1.042540192604065,
"learning_rate": 0.001,
"loss": 2.8699,
"step": 22100
},
{
"epoch": 7.175177763413058,
"grad_norm": 0.9110554456710815,
"learning_rate": 0.001,
"loss": 2.8704,
"step": 22200
},
{
"epoch": 7.207498383968972,
"grad_norm": 0.951602578163147,
"learning_rate": 0.001,
"loss": 2.8674,
"step": 22300
},
{
"epoch": 7.239819004524887,
"grad_norm": 0.988113284111023,
"learning_rate": 0.001,
"loss": 2.8939,
"step": 22400
},
{
"epoch": 7.2721396250808015,
"grad_norm": 1.3134607076644897,
"learning_rate": 0.001,
"loss": 2.8967,
"step": 22500
},
{
"epoch": 7.304460245636716,
"grad_norm": 0.9608508348464966,
"learning_rate": 0.001,
"loss": 2.8733,
"step": 22600
},
{
"epoch": 7.336780866192631,
"grad_norm": 0.9588029384613037,
"learning_rate": 0.001,
"loss": 2.8962,
"step": 22700
},
{
"epoch": 7.369101486748546,
"grad_norm": 1.15349280834198,
"learning_rate": 0.001,
"loss": 2.8861,
"step": 22800
},
{
"epoch": 7.401422107304461,
"grad_norm": 1.198407530784607,
"learning_rate": 0.001,
"loss": 2.8782,
"step": 22900
},
{
"epoch": 7.433742727860375,
"grad_norm": 0.9935774803161621,
"learning_rate": 0.001,
"loss": 2.9192,
"step": 23000
},
{
"epoch": 7.46606334841629,
"grad_norm": 1.0621734857559204,
"learning_rate": 0.001,
"loss": 2.9049,
"step": 23100
},
{
"epoch": 7.498383968972204,
"grad_norm": 1.2372674942016602,
"learning_rate": 0.001,
"loss": 2.9289,
"step": 23200
},
{
"epoch": 7.530704589528119,
"grad_norm": 1.0882078409194946,
"learning_rate": 0.001,
"loss": 2.9114,
"step": 23300
},
{
"epoch": 7.563025210084033,
"grad_norm": 0.8493301272392273,
"learning_rate": 0.001,
"loss": 2.9269,
"step": 23400
},
{
"epoch": 7.595345830639948,
"grad_norm": 0.7285293340682983,
"learning_rate": 0.001,
"loss": 2.9255,
"step": 23500
},
{
"epoch": 7.6276664511958625,
"grad_norm": 1.0530344247817993,
"learning_rate": 0.001,
"loss": 2.9048,
"step": 23600
},
{
"epoch": 7.659987071751778,
"grad_norm": 1.0908581018447876,
"learning_rate": 0.001,
"loss": 2.9228,
"step": 23700
},
{
"epoch": 7.6923076923076925,
"grad_norm": 0.9890776872634888,
"learning_rate": 0.001,
"loss": 2.9278,
"step": 23800
},
{
"epoch": 7.724628312863607,
"grad_norm": 0.9740605354309082,
"learning_rate": 0.001,
"loss": 2.9043,
"step": 23900
},
{
"epoch": 7.756948933419522,
"grad_norm": 1.2037266492843628,
"learning_rate": 0.001,
"loss": 2.9253,
"step": 24000
},
{
"epoch": 7.789269553975436,
"grad_norm": 1.1273502111434937,
"learning_rate": 0.001,
"loss": 2.9193,
"step": 24100
},
{
"epoch": 7.821590174531351,
"grad_norm": 1.109514594078064,
"learning_rate": 0.001,
"loss": 2.9252,
"step": 24200
},
{
"epoch": 7.853910795087265,
"grad_norm": 1.082470178604126,
"learning_rate": 0.001,
"loss": 2.9334,
"step": 24300
},
{
"epoch": 7.886231415643181,
"grad_norm": 1.14096200466156,
"learning_rate": 0.001,
"loss": 2.9195,
"step": 24400
},
{
"epoch": 7.918552036199095,
"grad_norm": 1.1023954153060913,
"learning_rate": 0.001,
"loss": 2.9416,
"step": 24500
},
{
"epoch": 7.95087265675501,
"grad_norm": 0.8876301646232605,
"learning_rate": 0.001,
"loss": 2.9058,
"step": 24600
},
{
"epoch": 7.983193277310924,
"grad_norm": 1.178880214691162,
"learning_rate": 0.001,
"loss": 2.9248,
"step": 24700
},
{
"epoch": 8.015513897866839,
"grad_norm": 1.0578535795211792,
"learning_rate": 0.001,
"loss": 2.8523,
"step": 24800
},
{
"epoch": 8.047834518422754,
"grad_norm": 1.3258286714553833,
"learning_rate": 0.001,
"loss": 2.8026,
"step": 24900
},
{
"epoch": 8.080155138978668,
"grad_norm": 1.2805758714675903,
"learning_rate": 0.001,
"loss": 2.7986,
"step": 25000
},
{
"epoch": 8.112475759534583,
"grad_norm": 0.9195302128791809,
"learning_rate": 0.001,
"loss": 2.8141,
"step": 25100
},
{
"epoch": 8.144796380090497,
"grad_norm": 1.085827112197876,
"learning_rate": 0.001,
"loss": 2.813,
"step": 25200
},
{
"epoch": 8.177117000646412,
"grad_norm": 1.5205888748168945,
"learning_rate": 0.001,
"loss": 2.8136,
"step": 25300
},
{
"epoch": 8.209437621202326,
"grad_norm": 1.387506127357483,
"learning_rate": 0.001,
"loss": 2.809,
"step": 25400
},
{
"epoch": 8.241758241758241,
"grad_norm": 0.9745718240737915,
"learning_rate": 0.001,
"loss": 2.8106,
"step": 25500
},
{
"epoch": 8.274078862314157,
"grad_norm": 1.1379154920578003,
"learning_rate": 0.001,
"loss": 2.8474,
"step": 25600
},
{
"epoch": 8.306399482870072,
"grad_norm": 1.5041371583938599,
"learning_rate": 0.001,
"loss": 2.8457,
"step": 25700
},
{
"epoch": 8.338720103425986,
"grad_norm": 1.2022348642349243,
"learning_rate": 0.001,
"loss": 2.8391,
"step": 25800
},
{
"epoch": 8.371040723981901,
"grad_norm": 1.372659683227539,
"learning_rate": 0.001,
"loss": 2.8052,
"step": 25900
},
{
"epoch": 8.403361344537815,
"grad_norm": 1.4856975078582764,
"learning_rate": 0.001,
"loss": 2.8294,
"step": 26000
},
{
"epoch": 8.43568196509373,
"grad_norm": 1.080346703529358,
"learning_rate": 0.001,
"loss": 2.8348,
"step": 26100
},
{
"epoch": 8.468002585649645,
"grad_norm": 1.1464358568191528,
"learning_rate": 0.001,
"loss": 2.8426,
"step": 26200
},
{
"epoch": 8.50032320620556,
"grad_norm": 1.155603051185608,
"learning_rate": 0.001,
"loss": 2.8648,
"step": 26300
},
{
"epoch": 8.532643826761474,
"grad_norm": 1.1736012697219849,
"learning_rate": 0.001,
"loss": 2.8652,
"step": 26400
},
{
"epoch": 8.564964447317388,
"grad_norm": 1.1918305158615112,
"learning_rate": 0.001,
"loss": 2.8356,
"step": 26500
},
{
"epoch": 8.597285067873303,
"grad_norm": 0.9844455122947693,
"learning_rate": 0.001,
"loss": 2.8573,
"step": 26600
},
{
"epoch": 8.629605688429217,
"grad_norm": 1.0055450201034546,
"learning_rate": 0.001,
"loss": 2.8432,
"step": 26700
},
{
"epoch": 8.661926308985132,
"grad_norm": 1.429309368133545,
"learning_rate": 0.001,
"loss": 2.8591,
"step": 26800
},
{
"epoch": 8.694246929541046,
"grad_norm": 1.6253108978271484,
"learning_rate": 0.001,
"loss": 2.8641,
"step": 26900
},
{
"epoch": 8.726567550096961,
"grad_norm": 1.1094082593917847,
"learning_rate": 0.001,
"loss": 2.8623,
"step": 27000
},
{
"epoch": 8.758888170652877,
"grad_norm": 1.6157804727554321,
"learning_rate": 0.001,
"loss": 2.8444,
"step": 27100
},
{
"epoch": 8.791208791208792,
"grad_norm": 1.1055054664611816,
"learning_rate": 0.001,
"loss": 2.8503,
"step": 27200
},
{
"epoch": 8.823529411764707,
"grad_norm": 1.0860084295272827,
"learning_rate": 0.001,
"loss": 2.8627,
"step": 27300
},
{
"epoch": 8.855850032320621,
"grad_norm": 1.8660216331481934,
"learning_rate": 0.001,
"loss": 2.863,
"step": 27400
},
{
"epoch": 8.888170652876536,
"grad_norm": 0.9799934029579163,
"learning_rate": 0.001,
"loss": 2.8736,
"step": 27500
},
{
"epoch": 8.92049127343245,
"grad_norm": 1.2370038032531738,
"learning_rate": 0.001,
"loss": 2.8695,
"step": 27600
},
{
"epoch": 8.952811893988365,
"grad_norm": 1.547116994857788,
"learning_rate": 0.001,
"loss": 2.8767,
"step": 27700
},
{
"epoch": 8.98513251454428,
"grad_norm": 1.3884578943252563,
"learning_rate": 0.001,
"loss": 2.8653,
"step": 27800
},
{
"epoch": 9.017453135100194,
"grad_norm": 1.0243676900863647,
"learning_rate": 0.001,
"loss": 2.7814,
"step": 27900
},
{
"epoch": 9.049773755656108,
"grad_norm": 1.246034026145935,
"learning_rate": 0.001,
"loss": 2.7149,
"step": 28000
},
{
"epoch": 9.082094376212023,
"grad_norm": 1.4059250354766846,
"learning_rate": 0.001,
"loss": 2.7479,
"step": 28100
},
{
"epoch": 9.114414996767938,
"grad_norm": 1.1591936349868774,
"learning_rate": 0.001,
"loss": 2.7149,
"step": 28200
},
{
"epoch": 9.146735617323852,
"grad_norm": 1.3190242052078247,
"learning_rate": 0.001,
"loss": 2.7682,
"step": 28300
},
{
"epoch": 9.179056237879767,
"grad_norm": 1.0201836824417114,
"learning_rate": 0.001,
"loss": 2.7568,
"step": 28400
},
{
"epoch": 9.211376858435681,
"grad_norm": 1.3165394067764282,
"learning_rate": 0.001,
"loss": 2.761,
"step": 28500
},
{
"epoch": 9.243697478991596,
"grad_norm": 1.5911014080047607,
"learning_rate": 0.001,
"loss": 2.7625,
"step": 28600
},
{
"epoch": 9.276018099547512,
"grad_norm": 1.216652750968933,
"learning_rate": 0.001,
"loss": 2.7949,
"step": 28700
},
{
"epoch": 9.308338720103427,
"grad_norm": 1.302988886833191,
"learning_rate": 0.001,
"loss": 2.7728,
"step": 28800
},
{
"epoch": 9.340659340659341,
"grad_norm": 1.0725343227386475,
"learning_rate": 0.001,
"loss": 2.7737,
"step": 28900
},
{
"epoch": 9.372979961215256,
"grad_norm": 1.4870610237121582,
"learning_rate": 0.001,
"loss": 2.7814,
"step": 29000
},
{
"epoch": 9.40530058177117,
"grad_norm": 1.8429652452468872,
"learning_rate": 0.001,
"loss": 2.7755,
"step": 29100
},
{
"epoch": 9.437621202327085,
"grad_norm": 1.161829948425293,
"learning_rate": 0.001,
"loss": 2.7867,
"step": 29200
},
{
"epoch": 9.469941822883,
"grad_norm": 1.0724472999572754,
"learning_rate": 0.001,
"loss": 2.7889,
"step": 29300
},
{
"epoch": 9.502262443438914,
"grad_norm": 1.493570327758789,
"learning_rate": 0.001,
"loss": 2.7713,
"step": 29400
},
{
"epoch": 9.534583063994829,
"grad_norm": 1.497449278831482,
"learning_rate": 0.001,
"loss": 2.7983,
"step": 29500
},
{
"epoch": 9.566903684550743,
"grad_norm": 1.244484543800354,
"learning_rate": 0.001,
"loss": 2.7882,
"step": 29600
},
{
"epoch": 9.599224305106658,
"grad_norm": 1.44032621383667,
"learning_rate": 0.001,
"loss": 2.8069,
"step": 29700
},
{
"epoch": 9.631544925662572,
"grad_norm": 0.8440661430358887,
"learning_rate": 0.001,
"loss": 2.8004,
"step": 29800
},
{
"epoch": 9.663865546218487,
"grad_norm": 1.1102241277694702,
"learning_rate": 0.001,
"loss": 2.793,
"step": 29900
},
{
"epoch": 9.696186166774401,
"grad_norm": 0.9780630469322205,
"learning_rate": 0.001,
"loss": 2.791,
"step": 30000
},
{
"epoch": 9.728506787330316,
"grad_norm": 1.3218283653259277,
"learning_rate": 0.001,
"loss": 2.7984,
"step": 30100
},
{
"epoch": 9.760827407886232,
"grad_norm": 1.3901311159133911,
"learning_rate": 0.001,
"loss": 2.7985,
"step": 30200
},
{
"epoch": 9.793148028442147,
"grad_norm": 1.4005396366119385,
"learning_rate": 0.001,
"loss": 2.7985,
"step": 30300
},
{
"epoch": 9.825468648998061,
"grad_norm": 1.001150369644165,
"learning_rate": 0.001,
"loss": 2.807,
"step": 30400
},
{
"epoch": 9.857789269553976,
"grad_norm": 1.3119566440582275,
"learning_rate": 0.001,
"loss": 2.8018,
"step": 30500
},
{
"epoch": 9.89010989010989,
"grad_norm": 1.1732113361358643,
"learning_rate": 0.001,
"loss": 2.8034,
"step": 30600
},
{
"epoch": 9.922430510665805,
"grad_norm": 1.5517836809158325,
"learning_rate": 0.001,
"loss": 2.8192,
"step": 30700
},
{
"epoch": 9.95475113122172,
"grad_norm": 1.5819275379180908,
"learning_rate": 0.001,
"loss": 2.8089,
"step": 30800
},
{
"epoch": 9.987071751777634,
"grad_norm": 1.0927438735961914,
"learning_rate": 0.001,
"loss": 2.8199,
"step": 30900
},
{
"epoch": 10.019392372333549,
"grad_norm": 1.000216007232666,
"learning_rate": 0.001,
"loss": 2.7301,
"step": 31000
},
{
"epoch": 10.051712992889463,
"grad_norm": 1.1941349506378174,
"learning_rate": 0.001,
"loss": 2.6769,
"step": 31100
},
{
"epoch": 10.084033613445378,
"grad_norm": 1.313490629196167,
"learning_rate": 0.001,
"loss": 2.697,
"step": 31200
},
{
"epoch": 10.116354234001292,
"grad_norm": 0.816692590713501,
"learning_rate": 0.001,
"loss": 2.7027,
"step": 31300
},
{
"epoch": 10.148674854557207,
"grad_norm": 1.0504392385482788,
"learning_rate": 0.001,
"loss": 2.6844,
"step": 31400
},
{
"epoch": 10.180995475113122,
"grad_norm": 0.7781311869621277,
"learning_rate": 0.001,
"loss": 2.7091,
"step": 31500
},
{
"epoch": 10.213316095669036,
"grad_norm": 1.0855755805969238,
"learning_rate": 0.001,
"loss": 2.7003,
"step": 31600
},
{
"epoch": 10.24563671622495,
"grad_norm": 0.8881792426109314,
"learning_rate": 0.001,
"loss": 2.7052,
"step": 31700
},
{
"epoch": 10.277957336780867,
"grad_norm": 0.7997879385948181,
"learning_rate": 0.001,
"loss": 2.7191,
"step": 31800
},
{
"epoch": 10.310277957336782,
"grad_norm": 0.7601060271263123,
"learning_rate": 0.001,
"loss": 2.7178,
"step": 31900
},
{
"epoch": 10.342598577892696,
"grad_norm": 1.0980945825576782,
"learning_rate": 0.001,
"loss": 2.7123,
"step": 32000
},
{
"epoch": 10.37491919844861,
"grad_norm": 0.8539420366287231,
"learning_rate": 0.001,
"loss": 2.7201,
"step": 32100
},
{
"epoch": 10.407239819004525,
"grad_norm": 0.9446685314178467,
"learning_rate": 0.001,
"loss": 2.7538,
"step": 32200
},
{
"epoch": 10.43956043956044,
"grad_norm": 1.2443652153015137,
"learning_rate": 0.001,
"loss": 2.7351,
"step": 32300
},
{
"epoch": 10.471881060116354,
"grad_norm": 1.1149390935897827,
"learning_rate": 0.001,
"loss": 2.7451,
"step": 32400
},
{
"epoch": 10.504201680672269,
"grad_norm": 1.2788093090057373,
"learning_rate": 0.001,
"loss": 2.7387,
"step": 32500
},
{
"epoch": 10.536522301228183,
"grad_norm": 0.862935483455658,
"learning_rate": 0.001,
"loss": 2.7337,
"step": 32600
},
{
"epoch": 10.568842921784098,
"grad_norm": 1.0679367780685425,
"learning_rate": 0.001,
"loss": 2.7335,
"step": 32700
},
{
"epoch": 10.601163542340013,
"grad_norm": 1.15278160572052,
"learning_rate": 0.001,
"loss": 2.7268,
"step": 32800
},
{
"epoch": 10.633484162895927,
"grad_norm": 0.8131126165390015,
"learning_rate": 0.001,
"loss": 2.7518,
"step": 32900
},
{
"epoch": 10.665804783451842,
"grad_norm": 0.9842662811279297,
"learning_rate": 0.001,
"loss": 2.723,
"step": 33000
},
{
"epoch": 10.698125404007756,
"grad_norm": 1.0702661275863647,
"learning_rate": 0.001,
"loss": 2.7401,
"step": 33100
},
{
"epoch": 10.73044602456367,
"grad_norm": 1.0665313005447388,
"learning_rate": 0.001,
"loss": 2.7595,
"step": 33200
},
{
"epoch": 10.762766645119587,
"grad_norm": 0.853649914264679,
"learning_rate": 0.001,
"loss": 2.7734,
"step": 33300
},
{
"epoch": 10.795087265675502,
"grad_norm": 1.1607190370559692,
"learning_rate": 0.001,
"loss": 2.7514,
"step": 33400
},
{
"epoch": 10.827407886231416,
"grad_norm": 1.0019394159317017,
"learning_rate": 0.001,
"loss": 2.7674,
"step": 33500
},
{
"epoch": 10.85972850678733,
"grad_norm": 1.0300143957138062,
"learning_rate": 0.001,
"loss": 2.7567,
"step": 33600
},
{
"epoch": 10.892049127343245,
"grad_norm": 0.9865690469741821,
"learning_rate": 0.001,
"loss": 2.7641,
"step": 33700
},
{
"epoch": 10.92436974789916,
"grad_norm": 0.9428331255912781,
"learning_rate": 0.001,
"loss": 2.7485,
"step": 33800
},
{
"epoch": 10.956690368455074,
"grad_norm": 0.8437827229499817,
"learning_rate": 0.001,
"loss": 2.7633,
"step": 33900
},
{
"epoch": 10.989010989010989,
"grad_norm": 1.3695709705352783,
"learning_rate": 0.001,
"loss": 2.7574,
"step": 34000
},
{
"epoch": 11.021331609566904,
"grad_norm": 1.3747538328170776,
"learning_rate": 0.001,
"loss": 2.7082,
"step": 34100
},
{
"epoch": 11.053652230122818,
"grad_norm": 0.9120927453041077,
"learning_rate": 0.001,
"loss": 2.6415,
"step": 34200
},
{
"epoch": 11.085972850678733,
"grad_norm": 1.1730124950408936,
"learning_rate": 0.001,
"loss": 2.6401,
"step": 34300
},
{
"epoch": 11.118293471234647,
"grad_norm": 0.9366941452026367,
"learning_rate": 0.001,
"loss": 2.6628,
"step": 34400
},
{
"epoch": 11.150614091790562,
"grad_norm": 1.0878065824508667,
"learning_rate": 0.001,
"loss": 2.6478,
"step": 34500
},
{
"epoch": 11.182934712346476,
"grad_norm": 0.989831805229187,
"learning_rate": 0.001,
"loss": 2.6668,
"step": 34600
},
{
"epoch": 11.215255332902391,
"grad_norm": 1.0725752115249634,
"learning_rate": 0.001,
"loss": 2.6692,
"step": 34700
},
{
"epoch": 11.247575953458306,
"grad_norm": 1.2443684339523315,
"learning_rate": 0.001,
"loss": 2.6533,
"step": 34800
},
{
"epoch": 11.279896574014222,
"grad_norm": 0.7641411423683167,
"learning_rate": 0.001,
"loss": 2.67,
"step": 34900
},
{
"epoch": 11.312217194570136,
"grad_norm": 1.127281904220581,
"learning_rate": 0.001,
"loss": 2.6623,
"step": 35000
},
{
"epoch": 11.344537815126051,
"grad_norm": 1.0336980819702148,
"learning_rate": 0.001,
"loss": 2.6648,
"step": 35100
},
{
"epoch": 11.376858435681966,
"grad_norm": 0.899639368057251,
"learning_rate": 0.001,
"loss": 2.6684,
"step": 35200
},
{
"epoch": 11.40917905623788,
"grad_norm": 1.1687790155410767,
"learning_rate": 0.001,
"loss": 2.6739,
"step": 35300
},
{
"epoch": 11.441499676793795,
"grad_norm": 1.050125241279602,
"learning_rate": 0.001,
"loss": 2.6682,
"step": 35400
},
{
"epoch": 11.47382029734971,
"grad_norm": 0.8031840920448303,
"learning_rate": 0.001,
"loss": 2.6905,
"step": 35500
},
{
"epoch": 11.506140917905624,
"grad_norm": 1.2813186645507812,
"learning_rate": 0.001,
"loss": 2.7205,
"step": 35600
},
{
"epoch": 11.538461538461538,
"grad_norm": 0.9722304344177246,
"learning_rate": 0.001,
"loss": 2.6864,
"step": 35700
},
{
"epoch": 11.570782159017453,
"grad_norm": 0.9341808557510376,
"learning_rate": 0.001,
"loss": 2.6956,
"step": 35800
},
{
"epoch": 11.603102779573367,
"grad_norm": 1.0142689943313599,
"learning_rate": 0.001,
"loss": 2.6816,
"step": 35900
},
{
"epoch": 11.635423400129282,
"grad_norm": 0.813972532749176,
"learning_rate": 0.001,
"loss": 2.6859,
"step": 36000
},
{
"epoch": 11.667744020685197,
"grad_norm": 1.170541763305664,
"learning_rate": 0.001,
"loss": 2.596,
"step": 36100
},
{
"epoch": 11.700064641241111,
"grad_norm": 1.1519564390182495,
"learning_rate": 0.001,
"loss": 2.5998,
"step": 36200
},
{
"epoch": 11.732385261797026,
"grad_norm": 1.2155847549438477,
"learning_rate": 0.001,
"loss": 2.6024,
"step": 36300
},
{
"epoch": 11.764705882352942,
"grad_norm": 1.2150511741638184,
"learning_rate": 0.001,
"loss": 2.6089,
"step": 36400
},
{
"epoch": 11.797026502908857,
"grad_norm": 0.9761043787002563,
"learning_rate": 0.001,
"loss": 2.6097,
"step": 36500
},
{
"epoch": 11.829347123464771,
"grad_norm": 0.753853440284729,
"learning_rate": 0.001,
"loss": 2.6199,
"step": 36600
},
{
"epoch": 11.861667744020686,
"grad_norm": 1.1278780698776245,
"learning_rate": 0.001,
"loss": 2.6138,
"step": 36700
},
{
"epoch": 11.8939883645766,
"grad_norm": 0.9861252307891846,
"learning_rate": 0.001,
"loss": 2.6216,
"step": 36800
},
{
"epoch": 11.926308985132515,
"grad_norm": 0.7999703884124756,
"learning_rate": 0.001,
"loss": 2.6214,
"step": 36900
},
{
"epoch": 11.95862960568843,
"grad_norm": 0.8605625629425049,
"learning_rate": 0.001,
"loss": 2.6279,
"step": 37000
},
{
"epoch": 11.990950226244344,
"grad_norm": 1.095085620880127,
"learning_rate": 0.001,
"loss": 2.6221,
"step": 37100
},
{
"epoch": 12.023270846800258,
"grad_norm": 1.4295034408569336,
"learning_rate": 0.001,
"loss": 2.6083,
"step": 37200
},
{
"epoch": 12.055591467356173,
"grad_norm": 0.8960357308387756,
"learning_rate": 0.001,
"loss": 2.5875,
"step": 37300
},
{
"epoch": 12.087912087912088,
"grad_norm": 0.999830961227417,
"learning_rate": 0.001,
"loss": 2.5932,
"step": 37400
},
{
"epoch": 12.120232708468002,
"grad_norm": 1.110213041305542,
"learning_rate": 0.001,
"loss": 2.5877,
"step": 37500
},
{
"epoch": 12.152553329023917,
"grad_norm": 1.1710408926010132,
"learning_rate": 0.001,
"loss": 2.6005,
"step": 37600
},
{
"epoch": 12.184873949579831,
"grad_norm": 1.1342028379440308,
"learning_rate": 0.001,
"loss": 2.6232,
"step": 37700
},
{
"epoch": 12.217194570135746,
"grad_norm": 1.1005823612213135,
"learning_rate": 0.001,
"loss": 2.6374,
"step": 37800
},
{
"epoch": 12.24951519069166,
"grad_norm": 0.8393723964691162,
"learning_rate": 0.001,
"loss": 2.6276,
"step": 37900
},
{
"epoch": 12.281835811247577,
"grad_norm": 0.8750357031822205,
"learning_rate": 0.001,
"loss": 2.6316,
"step": 38000
},
{
"epoch": 12.314156431803491,
"grad_norm": 0.9854604601860046,
"learning_rate": 0.001,
"loss": 2.6355,
"step": 38100
},
{
"epoch": 12.346477052359406,
"grad_norm": 0.927906334400177,
"learning_rate": 0.001,
"loss": 2.617,
"step": 38200
},
{
"epoch": 12.37879767291532,
"grad_norm": 1.4400506019592285,
"learning_rate": 0.001,
"loss": 2.6094,
"step": 38300
},
{
"epoch": 12.411118293471235,
"grad_norm": 0.9851289987564087,
"learning_rate": 0.001,
"loss": 2.6556,
"step": 38400
},
{
"epoch": 12.44343891402715,
"grad_norm": 1.1679672002792358,
"learning_rate": 0.001,
"loss": 2.6526,
"step": 38500
},
{
"epoch": 12.475759534583064,
"grad_norm": 0.9106985330581665,
"learning_rate": 0.001,
"loss": 2.6378,
"step": 38600
},
{
"epoch": 12.508080155138979,
"grad_norm": 0.8666954636573792,
"learning_rate": 0.001,
"loss": 2.6454,
"step": 38700
},
{
"epoch": 12.540400775694893,
"grad_norm": 1.5032638311386108,
"learning_rate": 0.001,
"loss": 2.6457,
"step": 38800
},
{
"epoch": 12.572721396250808,
"grad_norm": 1.29007089138031,
"learning_rate": 0.001,
"loss": 2.6457,
"step": 38900
},
{
"epoch": 12.605042016806722,
"grad_norm": 1.187584400177002,
"learning_rate": 0.001,
"loss": 2.6636,
"step": 39000
},
{
"epoch": 12.637362637362637,
"grad_norm": 1.0897037982940674,
"learning_rate": 0.001,
"loss": 2.67,
"step": 39100
},
{
"epoch": 12.669683257918551,
"grad_norm": 1.2954078912734985,
"learning_rate": 0.001,
"loss": 2.6616,
"step": 39200
},
{
"epoch": 12.702003878474466,
"grad_norm": 0.8234089016914368,
"learning_rate": 0.001,
"loss": 2.6467,
"step": 39300
},
{
"epoch": 12.73432449903038,
"grad_norm": 0.9669334292411804,
"learning_rate": 0.001,
"loss": 2.6853,
"step": 39400
},
{
"epoch": 12.766645119586297,
"grad_norm": 0.931905210018158,
"learning_rate": 0.001,
"loss": 2.6756,
"step": 39500
},
{
"epoch": 12.798965740142211,
"grad_norm": 0.8956810235977173,
"learning_rate": 0.001,
"loss": 2.6557,
"step": 39600
},
{
"epoch": 12.831286360698126,
"grad_norm": 1.1757956743240356,
"learning_rate": 0.001,
"loss": 2.6833,
"step": 39700
},
{
"epoch": 12.86360698125404,
"grad_norm": 0.9770579934120178,
"learning_rate": 0.001,
"loss": 2.6633,
"step": 39800
},
{
"epoch": 12.895927601809955,
"grad_norm": 1.209007740020752,
"learning_rate": 0.001,
"loss": 2.6647,
"step": 39900
},
{
"epoch": 12.92824822236587,
"grad_norm": 1.2638388872146606,
"learning_rate": 0.001,
"loss": 2.673,
"step": 40000
},
{
"epoch": 12.960568842921784,
"grad_norm": 1.2949479818344116,
"learning_rate": 0.001,
"loss": 2.679,
"step": 40100
},
{
"epoch": 12.992889463477699,
"grad_norm": 1.257927656173706,
"learning_rate": 0.001,
"loss": 2.6769,
"step": 40200
},
{
"epoch": 13.025210084033613,
"grad_norm": 1.076108694076538,
"learning_rate": 0.001,
"loss": 2.5622,
"step": 40300
},
{
"epoch": 13.057530704589528,
"grad_norm": 1.1594815254211426,
"learning_rate": 0.001,
"loss": 2.5509,
"step": 40400
},
{
"epoch": 13.089851325145442,
"grad_norm": 0.9643914103507996,
"learning_rate": 0.001,
"loss": 2.5681,
"step": 40500
},
{
"epoch": 13.122171945701357,
"grad_norm": 1.0107759237289429,
"learning_rate": 0.001,
"loss": 2.5477,
"step": 40600
},
{
"epoch": 13.154492566257272,
"grad_norm": 0.7948814630508423,
"learning_rate": 0.001,
"loss": 2.5575,
"step": 40700
},
{
"epoch": 13.186813186813186,
"grad_norm": 1.3021531105041504,
"learning_rate": 0.001,
"loss": 2.5826,
"step": 40800
},
{
"epoch": 13.2191338073691,
"grad_norm": 0.9879547357559204,
"learning_rate": 0.001,
"loss": 2.5756,
"step": 40900
},
{
"epoch": 13.251454427925015,
"grad_norm": 0.8074339032173157,
"learning_rate": 0.001,
"loss": 2.558,
"step": 41000
},
{
"epoch": 13.283775048480932,
"grad_norm": 0.9623432159423828,
"learning_rate": 0.001,
"loss": 2.56,
"step": 41100
},
{
"epoch": 13.316095669036846,
"grad_norm": 1.2546730041503906,
"learning_rate": 0.001,
"loss": 2.5823,
"step": 41200
},
{
"epoch": 13.34841628959276,
"grad_norm": 1.051803469657898,
"learning_rate": 0.001,
"loss": 2.5743,
"step": 41300
},
{
"epoch": 13.380736910148675,
"grad_norm": 1.1379426717758179,
"learning_rate": 0.001,
"loss": 2.5904,
"step": 41400
},
{
"epoch": 13.41305753070459,
"grad_norm": 0.9064072370529175,
"learning_rate": 0.001,
"loss": 2.6003,
"step": 41500
},
{
"epoch": 13.445378151260504,
"grad_norm": 1.031472086906433,
"learning_rate": 0.001,
"loss": 2.6042,
"step": 41600
},
{
"epoch": 13.477698771816419,
"grad_norm": 1.0278077125549316,
"learning_rate": 0.001,
"loss": 2.5967,
"step": 41700
},
{
"epoch": 13.510019392372334,
"grad_norm": 0.9069276452064514,
"learning_rate": 0.001,
"loss": 2.6164,
"step": 41800
},
{
"epoch": 13.542340012928248,
"grad_norm": 1.1582199335098267,
"learning_rate": 0.001,
"loss": 2.596,
"step": 41900
},
{
"epoch": 13.574660633484163,
"grad_norm": 0.9175617694854736,
"learning_rate": 0.001,
"loss": 2.5997,
"step": 42000
},
{
"epoch": 13.606981254040077,
"grad_norm": 1.11009681224823,
"learning_rate": 0.001,
"loss": 2.6208,
"step": 42100
},
{
"epoch": 13.639301874595992,
"grad_norm": 1.0659406185150146,
"learning_rate": 0.001,
"loss": 2.6103,
"step": 42200
},
{
"epoch": 13.671622495151906,
"grad_norm": 1.2223795652389526,
"learning_rate": 0.001,
"loss": 2.6163,
"step": 42300
},
{
"epoch": 13.70394311570782,
"grad_norm": 0.9805840849876404,
"learning_rate": 0.001,
"loss": 2.6003,
"step": 42400
},
{
"epoch": 13.736263736263737,
"grad_norm": 1.130028247833252,
"learning_rate": 0.001,
"loss": 2.6155,
"step": 42500
},
{
"epoch": 13.768584356819652,
"grad_norm": 1.0690525770187378,
"learning_rate": 0.001,
"loss": 2.6287,
"step": 42600
},
{
"epoch": 13.800904977375566,
"grad_norm": 1.1252151727676392,
"learning_rate": 0.001,
"loss": 2.618,
"step": 42700
},
{
"epoch": 13.83322559793148,
"grad_norm": 1.2166658639907837,
"learning_rate": 0.001,
"loss": 2.6358,
"step": 42800
},
{
"epoch": 13.865546218487395,
"grad_norm": 1.0091367959976196,
"learning_rate": 0.001,
"loss": 2.6259,
"step": 42900
},
{
"epoch": 13.89786683904331,
"grad_norm": 1.2160910367965698,
"learning_rate": 0.001,
"loss": 2.6537,
"step": 43000
},
{
"epoch": 13.930187459599225,
"grad_norm": 1.0695080757141113,
"learning_rate": 0.001,
"loss": 2.6389,
"step": 43100
},
{
"epoch": 13.96250808015514,
"grad_norm": 0.9037491083145142,
"learning_rate": 0.001,
"loss": 2.6302,
"step": 43200
},
{
"epoch": 13.994828700711054,
"grad_norm": 1.2553186416625977,
"learning_rate": 0.001,
"loss": 2.6346,
"step": 43300
},
{
"epoch": 14.027149321266968,
"grad_norm": 1.0879606008529663,
"learning_rate": 0.001,
"loss": 2.5208,
"step": 43400
},
{
"epoch": 14.059469941822883,
"grad_norm": 1.118901252746582,
"learning_rate": 0.001,
"loss": 2.5002,
"step": 43500
},
{
"epoch": 14.091790562378797,
"grad_norm": 1.2253042459487915,
"learning_rate": 0.001,
"loss": 2.5058,
"step": 43600
},
{
"epoch": 14.124111182934712,
"grad_norm": 1.366318941116333,
"learning_rate": 0.001,
"loss": 2.5225,
"step": 43700
},
{
"epoch": 14.156431803490626,
"grad_norm": 1.1060069799423218,
"learning_rate": 0.001,
"loss": 2.5134,
"step": 43800
},
{
"epoch": 14.188752424046541,
"grad_norm": 1.1791584491729736,
"learning_rate": 0.001,
"loss": 2.541,
"step": 43900
},
{
"epoch": 14.221073044602456,
"grad_norm": 1.2990303039550781,
"learning_rate": 0.001,
"loss": 2.5358,
"step": 44000
},
{
"epoch": 14.25339366515837,
"grad_norm": 0.881264328956604,
"learning_rate": 0.001,
"loss": 2.5145,
"step": 44100
},
{
"epoch": 14.285714285714286,
"grad_norm": 0.991474449634552,
"learning_rate": 0.001,
"loss": 2.5161,
"step": 44200
},
{
"epoch": 14.318034906270201,
"grad_norm": 1.303600788116455,
"learning_rate": 0.001,
"loss": 2.5507,
"step": 44300
},
{
"epoch": 14.350355526826116,
"grad_norm": 1.3152434825897217,
"learning_rate": 0.001,
"loss": 2.5458,
"step": 44400
},
{
"epoch": 14.38267614738203,
"grad_norm": 1.014987826347351,
"learning_rate": 0.001,
"loss": 2.5469,
"step": 44500
},
{
"epoch": 14.414996767937945,
"grad_norm": 0.9973893165588379,
"learning_rate": 0.001,
"loss": 2.5583,
"step": 44600
},
{
"epoch": 14.44731738849386,
"grad_norm": 1.0743112564086914,
"learning_rate": 0.001,
"loss": 2.5419,
"step": 44700
},
{
"epoch": 14.479638009049774,
"grad_norm": 1.2921549081802368,
"learning_rate": 0.001,
"loss": 2.5509,
"step": 44800
},
{
"epoch": 14.511958629605688,
"grad_norm": 0.9177213907241821,
"learning_rate": 0.001,
"loss": 2.5699,
"step": 44900
},
{
"epoch": 14.544279250161603,
"grad_norm": 1.4217877388000488,
"learning_rate": 0.001,
"loss": 2.5507,
"step": 45000
},
{
"epoch": 14.576599870717518,
"grad_norm": 1.3486171960830688,
"learning_rate": 0.001,
"loss": 2.5519,
"step": 45100
},
{
"epoch": 14.608920491273432,
"grad_norm": 1.2178871631622314,
"learning_rate": 0.001,
"loss": 2.5565,
"step": 45200
},
{
"epoch": 14.641241111829347,
"grad_norm": 1.29816734790802,
"learning_rate": 0.001,
"loss": 2.5733,
"step": 45300
},
{
"epoch": 14.673561732385261,
"grad_norm": 1.0312505960464478,
"learning_rate": 0.001,
"loss": 2.5617,
"step": 45400
},
{
"epoch": 14.705882352941176,
"grad_norm": 1.1762622594833374,
"learning_rate": 0.001,
"loss": 2.5717,
"step": 45500
},
{
"epoch": 14.738202973497092,
"grad_norm": 1.0525074005126953,
"learning_rate": 0.001,
"loss": 2.5774,
"step": 45600
},
{
"epoch": 14.770523594053007,
"grad_norm": 1.54285728931427,
"learning_rate": 0.001,
"loss": 2.5915,
"step": 45700
},
{
"epoch": 14.802844214608921,
"grad_norm": 1.0963060855865479,
"learning_rate": 0.001,
"loss": 2.5708,
"step": 45800
},
{
"epoch": 14.835164835164836,
"grad_norm": 0.9922949075698853,
"learning_rate": 0.001,
"loss": 2.5994,
"step": 45900
},
{
"epoch": 14.86748545572075,
"grad_norm": 1.2420673370361328,
"learning_rate": 0.001,
"loss": 2.5955,
"step": 46000
},
{
"epoch": 14.899806076276665,
"grad_norm": 0.9536318778991699,
"learning_rate": 0.001,
"loss": 2.5818,
"step": 46100
},
{
"epoch": 14.93212669683258,
"grad_norm": 1.0575299263000488,
"learning_rate": 0.001,
"loss": 2.6045,
"step": 46200
},
{
"epoch": 14.964447317388494,
"grad_norm": 0.8865509629249573,
"learning_rate": 0.001,
"loss": 2.5853,
"step": 46300
},
{
"epoch": 14.996767937944409,
"grad_norm": 1.2456960678100586,
"learning_rate": 0.001,
"loss": 2.5969,
"step": 46400
},
{
"epoch": 15.029088558500323,
"grad_norm": 1.0670504570007324,
"learning_rate": 0.001,
"loss": 2.4761,
"step": 46500
},
{
"epoch": 15.061409179056238,
"grad_norm": 0.948182225227356,
"learning_rate": 0.001,
"loss": 2.4568,
"step": 46600
},
{
"epoch": 15.093729799612152,
"grad_norm": 1.298714280128479,
"learning_rate": 0.001,
"loss": 2.4873,
"step": 46700
},
{
"epoch": 15.126050420168067,
"grad_norm": 1.0651124715805054,
"learning_rate": 0.001,
"loss": 2.4539,
"step": 46800
},
{
"epoch": 15.158371040723981,
"grad_norm": 0.9363376498222351,
"learning_rate": 0.001,
"loss": 2.4645,
"step": 46900
},
{
"epoch": 15.190691661279896,
"grad_norm": 1.1798454523086548,
"learning_rate": 0.001,
"loss": 2.4686,
"step": 47000
},
{
"epoch": 15.22301228183581,
"grad_norm": 1.0877801179885864,
"learning_rate": 0.001,
"loss": 2.479,
"step": 47100
},
{
"epoch": 15.255332902391725,
"grad_norm": 1.2323899269104004,
"learning_rate": 0.001,
"loss": 2.5143,
"step": 47200
},
{
"epoch": 15.287653522947641,
"grad_norm": 1.1232088804244995,
"learning_rate": 0.001,
"loss": 2.4904,
"step": 47300
},
{
"epoch": 15.319974143503556,
"grad_norm": 1.3587777614593506,
"learning_rate": 0.001,
"loss": 2.499,
"step": 47400
},
{
"epoch": 15.35229476405947,
"grad_norm": 1.5187313556671143,
"learning_rate": 0.001,
"loss": 2.5202,
"step": 47500
},
{
"epoch": 15.384615384615385,
"grad_norm": 1.5214544534683228,
"learning_rate": 0.001,
"loss": 2.4968,
"step": 47600
},
{
"epoch": 15.4169360051713,
"grad_norm": 1.4066399335861206,
"learning_rate": 0.001,
"loss": 2.5064,
"step": 47700
},
{
"epoch": 15.449256625727214,
"grad_norm": 1.1718268394470215,
"learning_rate": 0.001,
"loss": 2.5161,
"step": 47800
},
{
"epoch": 15.481577246283129,
"grad_norm": 1.0770251750946045,
"learning_rate": 0.001,
"loss": 2.5122,
"step": 47900
},
{
"epoch": 15.513897866839043,
"grad_norm": 1.092182993888855,
"learning_rate": 0.001,
"loss": 2.5292,
"step": 48000
},
{
"epoch": 15.546218487394958,
"grad_norm": 1.0473302602767944,
"learning_rate": 0.001,
"loss": 2.5315,
"step": 48100
},
{
"epoch": 15.578539107950872,
"grad_norm": 1.0130027532577515,
"learning_rate": 0.001,
"loss": 2.5181,
"step": 48200
},
{
"epoch": 15.610859728506787,
"grad_norm": 1.1945054531097412,
"learning_rate": 0.001,
"loss": 2.5199,
"step": 48300
},
{
"epoch": 15.643180349062701,
"grad_norm": 1.765254259109497,
"learning_rate": 0.001,
"loss": 2.5338,
"step": 48400
},
{
"epoch": 15.675500969618616,
"grad_norm": 1.1778308153152466,
"learning_rate": 0.001,
"loss": 2.5448,
"step": 48500
},
{
"epoch": 15.70782159017453,
"grad_norm": 1.2698488235473633,
"learning_rate": 0.001,
"loss": 2.5294,
"step": 48600
},
{
"epoch": 15.740142210730447,
"grad_norm": 1.0903241634368896,
"learning_rate": 0.001,
"loss": 2.5441,
"step": 48700
},
{
"epoch": 15.772462831286362,
"grad_norm": 0.9908322691917419,
"learning_rate": 0.001,
"loss": 2.5434,
"step": 48800
},
{
"epoch": 15.804783451842276,
"grad_norm": 1.0519664287567139,
"learning_rate": 0.001,
"loss": 2.5406,
"step": 48900
},
{
"epoch": 15.83710407239819,
"grad_norm": 1.250427484512329,
"learning_rate": 0.001,
"loss": 2.5543,
"step": 49000
},
{
"epoch": 15.869424692954105,
"grad_norm": 1.1209453344345093,
"learning_rate": 0.001,
"loss": 2.5426,
"step": 49100
},
{
"epoch": 15.90174531351002,
"grad_norm": 1.2598960399627686,
"learning_rate": 0.001,
"loss": 2.5517,
"step": 49200
},
{
"epoch": 15.934065934065934,
"grad_norm": 1.168419599533081,
"learning_rate": 0.001,
"loss": 2.557,
"step": 49300
},
{
"epoch": 15.966386554621849,
"grad_norm": 1.2239935398101807,
"learning_rate": 0.001,
"loss": 2.5589,
"step": 49400
},
{
"epoch": 15.998707175177763,
"grad_norm": 1.3314663171768188,
"learning_rate": 0.001,
"loss": 2.5477,
"step": 49500
},
{
"epoch": 16.031027795733678,
"grad_norm": 1.2716763019561768,
"learning_rate": 0.001,
"loss": 2.4259,
"step": 49600
},
{
"epoch": 16.063348416289593,
"grad_norm": 1.186794400215149,
"learning_rate": 0.001,
"loss": 2.4269,
"step": 49700
},
{
"epoch": 16.095669036845507,
"grad_norm": 1.5052367448806763,
"learning_rate": 0.001,
"loss": 2.4288,
"step": 49800
},
{
"epoch": 16.12798965740142,
"grad_norm": 1.3346431255340576,
"learning_rate": 0.001,
"loss": 2.4482,
"step": 49900
},
{
"epoch": 16.160310277957336,
"grad_norm": 1.0178332328796387,
"learning_rate": 0.001,
"loss": 2.4402,
"step": 50000
},
{
"epoch": 16.19263089851325,
"grad_norm": 1.1986984014511108,
"learning_rate": 0.001,
"loss": 2.4467,
"step": 50100
},
{
"epoch": 16.224951519069165,
"grad_norm": 1.4956904649734497,
"learning_rate": 0.001,
"loss": 2.4564,
"step": 50200
},
{
"epoch": 16.25727213962508,
"grad_norm": 1.2278879880905151,
"learning_rate": 0.001,
"loss": 2.4519,
"step": 50300
},
{
"epoch": 16.289592760180994,
"grad_norm": 1.304221272468567,
"learning_rate": 0.001,
"loss": 2.4559,
"step": 50400
},
{
"epoch": 16.32191338073691,
"grad_norm": 1.3971552848815918,
"learning_rate": 0.001,
"loss": 2.4522,
"step": 50500
},
{
"epoch": 16.354234001292824,
"grad_norm": 1.3419325351715088,
"learning_rate": 0.001,
"loss": 2.4699,
"step": 50600
},
{
"epoch": 16.386554621848738,
"grad_norm": 1.2903776168823242,
"learning_rate": 0.001,
"loss": 2.4723,
"step": 50700
},
{
"epoch": 16.418875242404653,
"grad_norm": 1.5094326734542847,
"learning_rate": 0.001,
"loss": 2.4804,
"step": 50800
},
{
"epoch": 16.451195862960567,
"grad_norm": 1.5980035066604614,
"learning_rate": 0.001,
"loss": 2.483,
"step": 50900
},
{
"epoch": 16.483516483516482,
"grad_norm": 1.2993049621582031,
"learning_rate": 0.001,
"loss": 2.4846,
"step": 51000
},
{
"epoch": 16.5158371040724,
"grad_norm": 1.2067556381225586,
"learning_rate": 0.001,
"loss": 2.4769,
"step": 51100
},
{
"epoch": 16.548157724628314,
"grad_norm": 1.9052283763885498,
"learning_rate": 0.001,
"loss": 2.479,
"step": 51200
},
{
"epoch": 16.58047834518423,
"grad_norm": 1.3466124534606934,
"learning_rate": 0.001,
"loss": 2.5064,
"step": 51300
},
{
"epoch": 16.612798965740144,
"grad_norm": 1.563071846961975,
"learning_rate": 0.001,
"loss": 2.4885,
"step": 51400
},
{
"epoch": 16.645119586296058,
"grad_norm": 1.5232013463974,
"learning_rate": 0.001,
"loss": 2.489,
"step": 51500
},
{
"epoch": 16.677440206851973,
"grad_norm": 1.1990022659301758,
"learning_rate": 0.001,
"loss": 2.4958,
"step": 51600
},
{
"epoch": 16.709760827407887,
"grad_norm": 1.1869248151779175,
"learning_rate": 0.001,
"loss": 2.5014,
"step": 51700
},
{
"epoch": 16.742081447963802,
"grad_norm": 1.4302977323532104,
"learning_rate": 0.001,
"loss": 2.5028,
"step": 51800
},
{
"epoch": 16.774402068519716,
"grad_norm": 1.3441663980484009,
"learning_rate": 0.001,
"loss": 2.5031,
"step": 51900
},
{
"epoch": 16.80672268907563,
"grad_norm": 1.2463428974151611,
"learning_rate": 0.001,
"loss": 2.5206,
"step": 52000
},
{
"epoch": 16.839043309631545,
"grad_norm": 1.5516313314437866,
"learning_rate": 0.001,
"loss": 2.5044,
"step": 52100
},
{
"epoch": 16.87136393018746,
"grad_norm": 1.4924278259277344,
"learning_rate": 0.001,
"loss": 2.5015,
"step": 52200
},
{
"epoch": 16.903684550743375,
"grad_norm": 1.4840755462646484,
"learning_rate": 0.001,
"loss": 2.5115,
"step": 52300
},
{
"epoch": 16.93600517129929,
"grad_norm": 1.1688814163208008,
"learning_rate": 0.001,
"loss": 2.5057,
"step": 52400
},
{
"epoch": 16.968325791855204,
"grad_norm": 1.1291621923446655,
"learning_rate": 0.001,
"loss": 2.5316,
"step": 52500
},
{
"epoch": 17.00064641241112,
"grad_norm": 1.3141711950302124,
"learning_rate": 0.001,
"loss": 2.5096,
"step": 52600
},
{
"epoch": 17.032967032967033,
"grad_norm": 1.1805791854858398,
"learning_rate": 0.001,
"loss": 2.3768,
"step": 52700
},
{
"epoch": 17.065287653522947,
"grad_norm": 1.2402406930923462,
"learning_rate": 0.001,
"loss": 2.3891,
"step": 52800
},
{
"epoch": 17.097608274078862,
"grad_norm": 1.5982283353805542,
"learning_rate": 0.001,
"loss": 2.3722,
"step": 52900
},
{
"epoch": 17.129928894634777,
"grad_norm": 1.4602590799331665,
"learning_rate": 0.001,
"loss": 2.3982,
"step": 53000
},
{
"epoch": 17.16224951519069,
"grad_norm": 2.0189146995544434,
"learning_rate": 0.001,
"loss": 2.4175,
"step": 53100
},
{
"epoch": 17.194570135746606,
"grad_norm": 2.113309621810913,
"learning_rate": 0.001,
"loss": 2.4099,
"step": 53200
},
{
"epoch": 17.22689075630252,
"grad_norm": 1.5319947004318237,
"learning_rate": 0.001,
"loss": 2.4323,
"step": 53300
},
{
"epoch": 17.259211376858435,
"grad_norm": 1.7054084539413452,
"learning_rate": 0.001,
"loss": 2.4122,
"step": 53400
},
{
"epoch": 17.29153199741435,
"grad_norm": 2.107525110244751,
"learning_rate": 0.001,
"loss": 2.4175,
"step": 53500
},
{
"epoch": 17.323852617970264,
"grad_norm": 1.9685229063034058,
"learning_rate": 0.001,
"loss": 2.42,
"step": 53600
},
{
"epoch": 17.35617323852618,
"grad_norm": 1.7401131391525269,
"learning_rate": 0.001,
"loss": 2.4364,
"step": 53700
},
{
"epoch": 17.388493859082093,
"grad_norm": 2.035468101501465,
"learning_rate": 0.001,
"loss": 2.4363,
"step": 53800
},
{
"epoch": 17.420814479638008,
"grad_norm": 1.4379023313522339,
"learning_rate": 0.001,
"loss": 2.4419,
"step": 53900
},
{
"epoch": 17.453135100193922,
"grad_norm": 1.6685974597930908,
"learning_rate": 0.001,
"loss": 2.439,
"step": 54000
},
{
"epoch": 17.485455720749837,
"grad_norm": 1.5203348398208618,
"learning_rate": 0.001,
"loss": 2.4467,
"step": 54100
},
{
"epoch": 17.517776341305755,
"grad_norm": 1.2775732278823853,
"learning_rate": 0.001,
"loss": 2.4467,
"step": 54200
},
{
"epoch": 17.55009696186167,
"grad_norm": 1.7329598665237427,
"learning_rate": 0.001,
"loss": 2.4492,
"step": 54300
},
{
"epoch": 17.582417582417584,
"grad_norm": 1.3014206886291504,
"learning_rate": 0.001,
"loss": 2.4777,
"step": 54400
},
{
"epoch": 17.6147382029735,
"grad_norm": 1.263486623764038,
"learning_rate": 0.001,
"loss": 2.4548,
"step": 54500
},
{
"epoch": 17.647058823529413,
"grad_norm": 2.006847620010376,
"learning_rate": 0.001,
"loss": 2.4647,
"step": 54600
},
{
"epoch": 17.679379444085328,
"grad_norm": 2.0060877799987793,
"learning_rate": 0.001,
"loss": 2.4461,
"step": 54700
},
{
"epoch": 17.711700064641242,
"grad_norm": 1.688281774520874,
"learning_rate": 0.001,
"loss": 2.4669,
"step": 54800
},
{
"epoch": 17.744020685197157,
"grad_norm": 1.5485999584197998,
"learning_rate": 0.001,
"loss": 2.4972,
"step": 54900
},
{
"epoch": 17.77634130575307,
"grad_norm": 1.3471914529800415,
"learning_rate": 0.001,
"loss": 2.4706,
"step": 55000
},
{
"epoch": 17.808661926308986,
"grad_norm": 1.660112738609314,
"learning_rate": 0.001,
"loss": 2.4731,
"step": 55100
},
{
"epoch": 17.8409825468649,
"grad_norm": 1.4767402410507202,
"learning_rate": 0.001,
"loss": 2.468,
"step": 55200
},
{
"epoch": 17.873303167420815,
"grad_norm": 1.243491530418396,
"learning_rate": 0.001,
"loss": 2.4797,
"step": 55300
},
{
"epoch": 17.90562378797673,
"grad_norm": 1.5120997428894043,
"learning_rate": 0.001,
"loss": 2.4638,
"step": 55400
},
{
"epoch": 17.937944408532644,
"grad_norm": 1.463824987411499,
"learning_rate": 0.001,
"loss": 2.4688,
"step": 55500
},
{
"epoch": 17.97026502908856,
"grad_norm": 1.5150164365768433,
"learning_rate": 0.001,
"loss": 2.4871,
"step": 55600
},
{
"epoch": 18.002585649644473,
"grad_norm": 1.273226261138916,
"learning_rate": 0.001,
"loss": 2.5058,
"step": 55700
},
{
"epoch": 18.034906270200388,
"grad_norm": 1.1136760711669922,
"learning_rate": 0.001,
"loss": 2.3403,
"step": 55800
},
{
"epoch": 18.067226890756302,
"grad_norm": 1.2880661487579346,
"learning_rate": 0.001,
"loss": 2.3427,
"step": 55900
},
{
"epoch": 18.099547511312217,
"grad_norm": 1.3773696422576904,
"learning_rate": 0.001,
"loss": 2.3565,
"step": 56000
},
{
"epoch": 18.13186813186813,
"grad_norm": 0.9006345868110657,
"learning_rate": 0.001,
"loss": 2.3755,
"step": 56100
},
{
"epoch": 18.164188752424046,
"grad_norm": 1.2340940237045288,
"learning_rate": 0.001,
"loss": 2.3949,
"step": 56200
},
{
"epoch": 18.19650937297996,
"grad_norm": 1.297279953956604,
"learning_rate": 0.001,
"loss": 2.3835,
"step": 56300
},
{
"epoch": 18.228829993535875,
"grad_norm": 1.0448439121246338,
"learning_rate": 0.001,
"loss": 2.3986,
"step": 56400
},
{
"epoch": 18.26115061409179,
"grad_norm": 1.0477882623672485,
"learning_rate": 0.001,
"loss": 2.3906,
"step": 56500
},
{
"epoch": 18.293471234647704,
"grad_norm": 1.030461072921753,
"learning_rate": 0.001,
"loss": 2.3882,
"step": 56600
},
{
"epoch": 18.32579185520362,
"grad_norm": 0.8635056614875793,
"learning_rate": 0.001,
"loss": 2.4023,
"step": 56700
},
{
"epoch": 18.358112475759533,
"grad_norm": 1.3237051963806152,
"learning_rate": 0.001,
"loss": 2.369,
"step": 56800
},
{
"epoch": 18.390433096315448,
"grad_norm": 1.3418434858322144,
"learning_rate": 0.001,
"loss": 2.4058,
"step": 56900
},
{
"epoch": 18.422753716871362,
"grad_norm": 0.8876633644104004,
"learning_rate": 0.001,
"loss": 2.4028,
"step": 57000
},
{
"epoch": 18.455074337427277,
"grad_norm": 1.2339004278182983,
"learning_rate": 0.001,
"loss": 2.4131,
"step": 57100
},
{
"epoch": 18.48739495798319,
"grad_norm": 1.1238473653793335,
"learning_rate": 0.001,
"loss": 2.3856,
"step": 57200
},
{
"epoch": 18.51971557853911,
"grad_norm": 1.535725474357605,
"learning_rate": 0.001,
"loss": 2.4254,
"step": 57300
},
{
"epoch": 18.552036199095024,
"grad_norm": 0.8891725540161133,
"learning_rate": 0.001,
"loss": 2.4406,
"step": 57400
},
{
"epoch": 18.58435681965094,
"grad_norm": 1.3219631910324097,
"learning_rate": 0.001,
"loss": 2.4282,
"step": 57500
},
{
"epoch": 18.616677440206853,
"grad_norm": 1.4664770364761353,
"learning_rate": 0.001,
"loss": 2.4167,
"step": 57600
},
{
"epoch": 18.648998060762768,
"grad_norm": 1.4198930263519287,
"learning_rate": 0.001,
"loss": 2.4329,
"step": 57700
},
{
"epoch": 18.681318681318682,
"grad_norm": 1.31005859375,
"learning_rate": 0.001,
"loss": 2.4255,
"step": 57800
},
{
"epoch": 18.713639301874597,
"grad_norm": 1.3033839464187622,
"learning_rate": 0.001,
"loss": 2.4145,
"step": 57900
},
{
"epoch": 18.74595992243051,
"grad_norm": 1.0991932153701782,
"learning_rate": 0.001,
"loss": 2.4405,
"step": 58000
},
{
"epoch": 18.778280542986426,
"grad_norm": 1.0855860710144043,
"learning_rate": 0.001,
"loss": 2.4528,
"step": 58100
},
{
"epoch": 18.81060116354234,
"grad_norm": 1.543005347251892,
"learning_rate": 0.001,
"loss": 2.4497,
"step": 58200
},
{
"epoch": 18.842921784098255,
"grad_norm": 1.1537874937057495,
"learning_rate": 0.001,
"loss": 2.4345,
"step": 58300
},
{
"epoch": 18.87524240465417,
"grad_norm": 1.6207787990570068,
"learning_rate": 0.001,
"loss": 2.4552,
"step": 58400
},
{
"epoch": 18.907563025210084,
"grad_norm": 0.987137496471405,
"learning_rate": 0.001,
"loss": 2.4263,
"step": 58500
},
{
"epoch": 18.939883645766,
"grad_norm": 1.1179084777832031,
"learning_rate": 0.001,
"loss": 2.4583,
"step": 58600
},
{
"epoch": 18.972204266321913,
"grad_norm": 0.999499499797821,
"learning_rate": 0.001,
"loss": 2.4537,
"step": 58700
},
{
"epoch": 19.004524886877828,
"grad_norm": 1.0300583839416504,
"learning_rate": 0.001,
"loss": 2.4514,
"step": 58800
},
{
"epoch": 19.036845507433743,
"grad_norm": 0.9608945846557617,
"learning_rate": 0.001,
"loss": 2.326,
"step": 58900
},
{
"epoch": 19.069166127989657,
"grad_norm": 0.9459260702133179,
"learning_rate": 0.001,
"loss": 2.3132,
"step": 59000
},
{
"epoch": 19.10148674854557,
"grad_norm": 1.2012510299682617,
"learning_rate": 0.001,
"loss": 2.332,
"step": 59100
},
{
"epoch": 19.133807369101486,
"grad_norm": 1.1166940927505493,
"learning_rate": 0.001,
"loss": 2.3535,
"step": 59200
},
{
"epoch": 19.1661279896574,
"grad_norm": 1.3316816091537476,
"learning_rate": 0.001,
"loss": 2.3331,
"step": 59300
},
{
"epoch": 19.198448610213315,
"grad_norm": 1.1380234956741333,
"learning_rate": 0.001,
"loss": 2.3438,
"step": 59400
},
{
"epoch": 19.23076923076923,
"grad_norm": 1.0130378007888794,
"learning_rate": 0.001,
"loss": 2.3425,
"step": 59500
},
{
"epoch": 19.263089851325145,
"grad_norm": 1.0612099170684814,
"learning_rate": 0.001,
"loss": 2.3591,
"step": 59600
},
{
"epoch": 19.29541047188106,
"grad_norm": 1.0072628259658813,
"learning_rate": 0.001,
"loss": 2.3543,
"step": 59700
},
{
"epoch": 19.327731092436974,
"grad_norm": 1.1276565790176392,
"learning_rate": 0.001,
"loss": 2.3558,
"step": 59800
},
{
"epoch": 19.360051712992888,
"grad_norm": 1.0294878482818604,
"learning_rate": 0.001,
"loss": 2.3793,
"step": 59900
},
{
"epoch": 19.392372333548803,
"grad_norm": 1.3939874172210693,
"learning_rate": 0.001,
"loss": 2.3649,
"step": 60000
},
{
"epoch": 19.424692954104717,
"grad_norm": 1.295091986656189,
"learning_rate": 0.001,
"loss": 2.3757,
"step": 60100
},
{
"epoch": 19.457013574660632,
"grad_norm": 1.3220593929290771,
"learning_rate": 0.001,
"loss": 2.3635,
"step": 60200
},
{
"epoch": 19.489334195216546,
"grad_norm": 1.4696449041366577,
"learning_rate": 0.001,
"loss": 2.3927,
"step": 60300
},
{
"epoch": 19.521654815772465,
"grad_norm": 0.9687468409538269,
"learning_rate": 0.001,
"loss": 2.3806,
"step": 60400
},
{
"epoch": 19.55397543632838,
"grad_norm": 1.0483554601669312,
"learning_rate": 0.001,
"loss": 2.3805,
"step": 60500
},
{
"epoch": 19.586296056884294,
"grad_norm": 0.9296285510063171,
"learning_rate": 0.001,
"loss": 2.3839,
"step": 60600
},
{
"epoch": 19.618616677440208,
"grad_norm": 1.5007354021072388,
"learning_rate": 0.001,
"loss": 2.3968,
"step": 60700
},
{
"epoch": 19.650937297996123,
"grad_norm": 1.5555963516235352,
"learning_rate": 0.001,
"loss": 2.3942,
"step": 60800
},
{
"epoch": 19.683257918552037,
"grad_norm": 1.1963167190551758,
"learning_rate": 0.001,
"loss": 2.3854,
"step": 60900
},
{
"epoch": 19.715578539107952,
"grad_norm": 1.2514575719833374,
"learning_rate": 0.001,
"loss": 2.404,
"step": 61000
},
{
"epoch": 19.747899159663866,
"grad_norm": 1.4767061471939087,
"learning_rate": 0.001,
"loss": 2.3834,
"step": 61100
},
{
"epoch": 19.78021978021978,
"grad_norm": 0.9871048927307129,
"learning_rate": 0.001,
"loss": 2.3927,
"step": 61200
},
{
"epoch": 19.812540400775696,
"grad_norm": 1.4090756177902222,
"learning_rate": 0.001,
"loss": 2.4223,
"step": 61300
},
{
"epoch": 19.84486102133161,
"grad_norm": 1.3110271692276,
"learning_rate": 0.001,
"loss": 2.3889,
"step": 61400
},
{
"epoch": 19.877181641887525,
"grad_norm": 0.9297486543655396,
"learning_rate": 0.001,
"loss": 2.431,
"step": 61500
},
{
"epoch": 19.90950226244344,
"grad_norm": 1.3082588911056519,
"learning_rate": 0.001,
"loss": 2.4306,
"step": 61600
},
{
"epoch": 19.941822882999354,
"grad_norm": 0.9515864849090576,
"learning_rate": 0.001,
"loss": 2.4443,
"step": 61700
},
{
"epoch": 19.97414350355527,
"grad_norm": 1.187511920928955,
"learning_rate": 0.001,
"loss": 2.4338,
"step": 61800
},
{
"epoch": 20.006464124111183,
"grad_norm": 1.113945722579956,
"learning_rate": 0.001,
"loss": 2.4036,
"step": 61900
},
{
"epoch": 20.038784744667097,
"grad_norm": 0.9578619599342346,
"learning_rate": 0.001,
"loss": 2.3059,
"step": 62000
},
{
"epoch": 20.071105365223012,
"grad_norm": 1.0143957138061523,
"learning_rate": 0.001,
"loss": 2.2843,
"step": 62100
},
{
"epoch": 20.103425985778927,
"grad_norm": 1.097559928894043,
"learning_rate": 0.001,
"loss": 2.3054,
"step": 62200
},
{
"epoch": 20.13574660633484,
"grad_norm": 0.9272734522819519,
"learning_rate": 0.001,
"loss": 2.292,
"step": 62300
},
{
"epoch": 20.168067226890756,
"grad_norm": 1.0882327556610107,
"learning_rate": 0.001,
"loss": 2.3145,
"step": 62400
},
{
"epoch": 20.20038784744667,
"grad_norm": 1.3917256593704224,
"learning_rate": 0.001,
"loss": 2.3216,
"step": 62500
},
{
"epoch": 20.232708468002585,
"grad_norm": 1.114766001701355,
"learning_rate": 0.001,
"loss": 2.3222,
"step": 62600
},
{
"epoch": 20.2650290885585,
"grad_norm": 1.3342783451080322,
"learning_rate": 0.001,
"loss": 2.3123,
"step": 62700
},
{
"epoch": 20.297349709114414,
"grad_norm": 1.06283438205719,
"learning_rate": 0.001,
"loss": 2.3179,
"step": 62800
},
{
"epoch": 20.32967032967033,
"grad_norm": 0.93958979845047,
"learning_rate": 0.001,
"loss": 2.3198,
"step": 62900
},
{
"epoch": 20.361990950226243,
"grad_norm": 1.4212366342544556,
"learning_rate": 0.001,
"loss": 2.3144,
"step": 63000
},
{
"epoch": 20.394311570782158,
"grad_norm": 1.3134227991104126,
"learning_rate": 0.001,
"loss": 2.3527,
"step": 63100
},
{
"epoch": 20.426632191338072,
"grad_norm": 1.1288926601409912,
"learning_rate": 0.001,
"loss": 2.3462,
"step": 63200
},
{
"epoch": 20.458952811893987,
"grad_norm": 1.0301005840301514,
"learning_rate": 0.001,
"loss": 2.3592,
"step": 63300
},
{
"epoch": 20.4912734324499,
"grad_norm": 0.9638227820396423,
"learning_rate": 0.001,
"loss": 2.3568,
"step": 63400
},
{
"epoch": 20.52359405300582,
"grad_norm": 1.5197877883911133,
"learning_rate": 0.001,
"loss": 2.3549,
"step": 63500
},
{
"epoch": 20.555914673561734,
"grad_norm": 1.1526395082473755,
"learning_rate": 0.001,
"loss": 2.3606,
"step": 63600
},
{
"epoch": 20.58823529411765,
"grad_norm": 1.2824829816818237,
"learning_rate": 0.001,
"loss": 2.3527,
"step": 63700
},
{
"epoch": 20.620555914673563,
"grad_norm": 1.1107521057128906,
"learning_rate": 0.001,
"loss": 2.3621,
"step": 63800
},
{
"epoch": 20.652876535229478,
"grad_norm": 1.2559133768081665,
"learning_rate": 0.001,
"loss": 2.3635,
"step": 63900
},
{
"epoch": 20.685197155785392,
"grad_norm": 1.1576194763183594,
"learning_rate": 0.001,
"loss": 2.3666,
"step": 64000
},
{
"epoch": 20.717517776341307,
"grad_norm": 1.067619800567627,
"learning_rate": 0.001,
"loss": 2.3763,
"step": 64100
},
{
"epoch": 20.74983839689722,
"grad_norm": 1.0354868173599243,
"learning_rate": 0.001,
"loss": 2.3594,
"step": 64200
},
{
"epoch": 20.782159017453136,
"grad_norm": 1.3898648023605347,
"learning_rate": 0.001,
"loss": 2.3996,
"step": 64300
},
{
"epoch": 20.81447963800905,
"grad_norm": 1.0457971096038818,
"learning_rate": 0.001,
"loss": 2.3974,
"step": 64400
},
{
"epoch": 20.846800258564965,
"grad_norm": 1.394001841545105,
"learning_rate": 0.001,
"loss": 2.376,
"step": 64500
},
{
"epoch": 20.87912087912088,
"grad_norm": 1.2913107872009277,
"learning_rate": 0.001,
"loss": 2.3719,
"step": 64600
},
{
"epoch": 20.911441499676794,
"grad_norm": 0.8956575393676758,
"learning_rate": 0.001,
"loss": 2.3725,
"step": 64700
},
{
"epoch": 20.94376212023271,
"grad_norm": 0.9267522692680359,
"learning_rate": 0.001,
"loss": 2.3908,
"step": 64800
},
{
"epoch": 20.976082740788623,
"grad_norm": 1.1674199104309082,
"learning_rate": 0.001,
"loss": 2.3816,
"step": 64900
},
{
"epoch": 21.008403361344538,
"grad_norm": 1.149228811264038,
"learning_rate": 0.001,
"loss": 2.3687,
"step": 65000
},
{
"epoch": 21.040723981900452,
"grad_norm": 1.1210534572601318,
"learning_rate": 0.001,
"loss": 2.2492,
"step": 65100
},
{
"epoch": 21.073044602456367,
"grad_norm": 1.153964638710022,
"learning_rate": 0.001,
"loss": 2.2474,
"step": 65200
},
{
"epoch": 21.10536522301228,
"grad_norm": 1.2374367713928223,
"learning_rate": 0.001,
"loss": 2.2591,
"step": 65300
},
{
"epoch": 21.137685843568196,
"grad_norm": 1.231076955795288,
"learning_rate": 0.001,
"loss": 2.2824,
"step": 65400
},
{
"epoch": 21.17000646412411,
"grad_norm": 1.1518474817276,
"learning_rate": 0.001,
"loss": 2.2904,
"step": 65500
},
{
"epoch": 21.202327084680025,
"grad_norm": 1.1400446891784668,
"learning_rate": 0.001,
"loss": 2.2937,
"step": 65600
},
{
"epoch": 21.23464770523594,
"grad_norm": 1.0334277153015137,
"learning_rate": 0.001,
"loss": 2.291,
"step": 65700
},
{
"epoch": 21.266968325791854,
"grad_norm": 1.1228870153427124,
"learning_rate": 0.001,
"loss": 2.2916,
"step": 65800
},
{
"epoch": 21.29928894634777,
"grad_norm": 1.0719165802001953,
"learning_rate": 0.001,
"loss": 2.2946,
"step": 65900
},
{
"epoch": 21.331609566903683,
"grad_norm": 1.0845648050308228,
"learning_rate": 0.001,
"loss": 2.3075,
"step": 66000
},
{
"epoch": 21.363930187459598,
"grad_norm": 1.3496699333190918,
"learning_rate": 0.001,
"loss": 2.2954,
"step": 66100
},
{
"epoch": 21.396250808015512,
"grad_norm": 0.8247977495193481,
"learning_rate": 0.001,
"loss": 2.2924,
"step": 66200
},
{
"epoch": 21.428571428571427,
"grad_norm": 0.9616690278053284,
"learning_rate": 0.001,
"loss": 2.305,
"step": 66300
},
{
"epoch": 21.46089204912734,
"grad_norm": 1.060357689857483,
"learning_rate": 0.001,
"loss": 2.3309,
"step": 66400
},
{
"epoch": 21.49321266968326,
"grad_norm": 1.2382253408432007,
"learning_rate": 0.001,
"loss": 2.3049,
"step": 66500
},
{
"epoch": 21.525533290239174,
"grad_norm": 0.8520820736885071,
"learning_rate": 0.001,
"loss": 2.3077,
"step": 66600
},
{
"epoch": 21.55785391079509,
"grad_norm": 1.147915005683899,
"learning_rate": 0.001,
"loss": 2.3323,
"step": 66700
},
{
"epoch": 21.590174531351003,
"grad_norm": 1.319478154182434,
"learning_rate": 0.001,
"loss": 2.3191,
"step": 66800
},
{
"epoch": 21.622495151906918,
"grad_norm": 1.131990671157837,
"learning_rate": 0.001,
"loss": 2.323,
"step": 66900
},
{
"epoch": 21.654815772462833,
"grad_norm": 1.0251647233963013,
"learning_rate": 0.001,
"loss": 2.3486,
"step": 67000
},
{
"epoch": 21.687136393018747,
"grad_norm": 1.1523061990737915,
"learning_rate": 0.001,
"loss": 2.341,
"step": 67100
},
{
"epoch": 21.71945701357466,
"grad_norm": 1.333418607711792,
"learning_rate": 0.001,
"loss": 2.3534,
"step": 67200
},
{
"epoch": 21.751777634130576,
"grad_norm": 0.9779078364372253,
"learning_rate": 0.001,
"loss": 2.3478,
"step": 67300
},
{
"epoch": 21.78409825468649,
"grad_norm": 1.1211507320404053,
"learning_rate": 0.001,
"loss": 2.332,
"step": 67400
},
{
"epoch": 21.816418875242405,
"grad_norm": 1.406153678894043,
"learning_rate": 0.001,
"loss": 2.3506,
"step": 67500
},
{
"epoch": 21.84873949579832,
"grad_norm": 0.9729377031326294,
"learning_rate": 0.001,
"loss": 2.3466,
"step": 67600
},
{
"epoch": 21.881060116354234,
"grad_norm": 1.1170735359191895,
"learning_rate": 0.001,
"loss": 2.3591,
"step": 67700
},
{
"epoch": 21.91338073691015,
"grad_norm": 1.214516282081604,
"learning_rate": 0.001,
"loss": 2.3676,
"step": 67800
},
{
"epoch": 21.945701357466064,
"grad_norm": 1.2979365587234497,
"learning_rate": 0.001,
"loss": 2.3666,
"step": 67900
},
{
"epoch": 21.978021978021978,
"grad_norm": 1.5110282897949219,
"learning_rate": 0.001,
"loss": 2.3846,
"step": 68000
},
{
"epoch": 22.010342598577893,
"grad_norm": 1.2588374614715576,
"learning_rate": 0.001,
"loss": 2.323,
"step": 68100
},
{
"epoch": 22.042663219133807,
"grad_norm": 1.0757921934127808,
"learning_rate": 0.001,
"loss": 2.2411,
"step": 68200
},
{
"epoch": 22.07498383968972,
"grad_norm": 1.0231963396072388,
"learning_rate": 0.001,
"loss": 2.2505,
"step": 68300
},
{
"epoch": 22.107304460245636,
"grad_norm": 1.2770941257476807,
"learning_rate": 0.001,
"loss": 2.2263,
"step": 68400
},
{
"epoch": 22.13962508080155,
"grad_norm": 1.3017656803131104,
"learning_rate": 0.001,
"loss": 2.2273,
"step": 68500
},
{
"epoch": 22.171945701357465,
"grad_norm": 1.176435947418213,
"learning_rate": 0.001,
"loss": 2.2569,
"step": 68600
},
{
"epoch": 22.20426632191338,
"grad_norm": 1.3054310083389282,
"learning_rate": 0.001,
"loss": 2.2676,
"step": 68700
},
{
"epoch": 22.236586942469295,
"grad_norm": 1.0871057510375977,
"learning_rate": 0.001,
"loss": 2.268,
"step": 68800
},
{
"epoch": 22.26890756302521,
"grad_norm": 1.239288330078125,
"learning_rate": 0.001,
"loss": 2.2647,
"step": 68900
},
{
"epoch": 22.301228183581124,
"grad_norm": 1.0190906524658203,
"learning_rate": 0.001,
"loss": 2.2857,
"step": 69000
},
{
"epoch": 22.33354880413704,
"grad_norm": 1.2209452390670776,
"learning_rate": 0.001,
"loss": 2.2612,
"step": 69100
},
{
"epoch": 22.365869424692953,
"grad_norm": 1.2406755685806274,
"learning_rate": 0.001,
"loss": 2.2864,
"step": 69200
},
{
"epoch": 22.398190045248867,
"grad_norm": 1.1042966842651367,
"learning_rate": 0.001,
"loss": 2.2763,
"step": 69300
},
{
"epoch": 22.430510665804782,
"grad_norm": 1.0039794445037842,
"learning_rate": 0.001,
"loss": 2.2695,
"step": 69400
},
{
"epoch": 22.462831286360696,
"grad_norm": 1.4037723541259766,
"learning_rate": 0.001,
"loss": 2.2778,
"step": 69500
},
{
"epoch": 22.49515190691661,
"grad_norm": 1.3101537227630615,
"learning_rate": 0.001,
"loss": 2.2784,
"step": 69600
},
{
"epoch": 22.52747252747253,
"grad_norm": 1.0466303825378418,
"learning_rate": 0.001,
"loss": 2.2998,
"step": 69700
},
{
"epoch": 22.559793148028444,
"grad_norm": 1.2835876941680908,
"learning_rate": 0.001,
"loss": 2.2933,
"step": 69800
},
{
"epoch": 22.59211376858436,
"grad_norm": 1.040469765663147,
"learning_rate": 0.001,
"loss": 2.2951,
"step": 69900
},
{
"epoch": 22.624434389140273,
"grad_norm": 0.8917735815048218,
"learning_rate": 0.001,
"loss": 2.3084,
"step": 70000
},
{
"epoch": 22.656755009696187,
"grad_norm": 1.2921438217163086,
"learning_rate": 0.001,
"loss": 2.3097,
"step": 70100
},
{
"epoch": 22.689075630252102,
"grad_norm": 1.1070172786712646,
"learning_rate": 0.001,
"loss": 2.3189,
"step": 70200
},
{
"epoch": 22.721396250808017,
"grad_norm": 1.3595770597457886,
"learning_rate": 0.001,
"loss": 2.3035,
"step": 70300
},
{
"epoch": 22.75371687136393,
"grad_norm": 1.0014970302581787,
"learning_rate": 0.001,
"loss": 2.3059,
"step": 70400
},
{
"epoch": 22.786037491919846,
"grad_norm": 1.2341176271438599,
"learning_rate": 0.001,
"loss": 2.3161,
"step": 70500
},
{
"epoch": 22.81835811247576,
"grad_norm": 0.9569864273071289,
"learning_rate": 0.001,
"loss": 2.3215,
"step": 70600
},
{
"epoch": 22.850678733031675,
"grad_norm": 1.036069631576538,
"learning_rate": 0.001,
"loss": 2.335,
"step": 70700
},
{
"epoch": 22.88299935358759,
"grad_norm": 1.5049176216125488,
"learning_rate": 0.001,
"loss": 2.3246,
"step": 70800
},
{
"epoch": 22.915319974143504,
"grad_norm": 1.2657185792922974,
"learning_rate": 0.001,
"loss": 2.3259,
"step": 70900
},
{
"epoch": 22.94764059469942,
"grad_norm": 1.2280467748641968,
"learning_rate": 0.001,
"loss": 2.3253,
"step": 71000
},
{
"epoch": 22.979961215255333,
"grad_norm": 1.2914707660675049,
"learning_rate": 0.001,
"loss": 2.3534,
"step": 71100
},
{
"epoch": 23.012281835811248,
"grad_norm": 1.0989599227905273,
"learning_rate": 0.001,
"loss": 2.2715,
"step": 71200
},
{
"epoch": 23.044602456367162,
"grad_norm": 1.1108335256576538,
"learning_rate": 0.001,
"loss": 2.1853,
"step": 71300
},
{
"epoch": 23.076923076923077,
"grad_norm": 1.5226976871490479,
"learning_rate": 0.001,
"loss": 2.215,
"step": 71400
},
{
"epoch": 23.10924369747899,
"grad_norm": 1.087694525718689,
"learning_rate": 0.001,
"loss": 2.216,
"step": 71500
},
{
"epoch": 23.141564318034906,
"grad_norm": 1.6098570823669434,
"learning_rate": 0.001,
"loss": 2.2233,
"step": 71600
},
{
"epoch": 23.17388493859082,
"grad_norm": 1.3562836647033691,
"learning_rate": 0.001,
"loss": 2.2185,
"step": 71700
},
{
"epoch": 23.206205559146735,
"grad_norm": 1.253631353378296,
"learning_rate": 0.001,
"loss": 2.2379,
"step": 71800
},
{
"epoch": 23.23852617970265,
"grad_norm": 1.4562768936157227,
"learning_rate": 0.001,
"loss": 2.2423,
"step": 71900
},
{
"epoch": 23.270846800258564,
"grad_norm": 1.1356525421142578,
"learning_rate": 0.001,
"loss": 2.2214,
"step": 72000
},
{
"epoch": 23.30316742081448,
"grad_norm": 1.1421269178390503,
"learning_rate": 0.001,
"loss": 2.2432,
"step": 72100
},
{
"epoch": 23.335488041370393,
"grad_norm": 1.1426451206207275,
"learning_rate": 0.001,
"loss": 2.2497,
"step": 72200
},
{
"epoch": 23.367808661926308,
"grad_norm": 1.230992078781128,
"learning_rate": 0.001,
"loss": 2.249,
"step": 72300
},
{
"epoch": 23.400129282482222,
"grad_norm": 1.4944993257522583,
"learning_rate": 0.001,
"loss": 2.2725,
"step": 72400
},
{
"epoch": 23.432449903038137,
"grad_norm": 1.104407787322998,
"learning_rate": 0.001,
"loss": 2.24,
"step": 72500
},
{
"epoch": 23.46477052359405,
"grad_norm": 1.4007248878479004,
"learning_rate": 0.001,
"loss": 2.2582,
"step": 72600
},
{
"epoch": 23.49709114414997,
"grad_norm": 1.5281628370285034,
"learning_rate": 0.001,
"loss": 2.2614,
"step": 72700
},
{
"epoch": 23.529411764705884,
"grad_norm": 1.3542243242263794,
"learning_rate": 0.001,
"loss": 2.2539,
"step": 72800
},
{
"epoch": 23.5617323852618,
"grad_norm": 1.7197226285934448,
"learning_rate": 0.001,
"loss": 2.271,
"step": 72900
},
{
"epoch": 23.594053005817713,
"grad_norm": 1.0739434957504272,
"learning_rate": 0.001,
"loss": 2.2626,
"step": 73000
},
{
"epoch": 23.626373626373628,
"grad_norm": 1.0851984024047852,
"learning_rate": 0.001,
"loss": 2.2636,
"step": 73100
},
{
"epoch": 23.658694246929542,
"grad_norm": 1.173173189163208,
"learning_rate": 0.001,
"loss": 2.2793,
"step": 73200
},
{
"epoch": 23.691014867485457,
"grad_norm": 1.310870885848999,
"learning_rate": 0.001,
"loss": 2.2891,
"step": 73300
},
{
"epoch": 23.72333548804137,
"grad_norm": 1.3357535600662231,
"learning_rate": 0.001,
"loss": 2.289,
"step": 73400
},
{
"epoch": 23.755656108597286,
"grad_norm": 1.8570516109466553,
"learning_rate": 0.001,
"loss": 2.2864,
"step": 73500
},
{
"epoch": 23.7879767291532,
"grad_norm": 1.019691824913025,
"learning_rate": 0.001,
"loss": 2.3127,
"step": 73600
},
{
"epoch": 23.820297349709115,
"grad_norm": 1.171330213546753,
"learning_rate": 0.001,
"loss": 2.302,
"step": 73700
},
{
"epoch": 23.85261797026503,
"grad_norm": 1.340964913368225,
"learning_rate": 0.001,
"loss": 2.2851,
"step": 73800
},
{
"epoch": 23.884938590820944,
"grad_norm": 1.0584162473678589,
"learning_rate": 0.001,
"loss": 2.2952,
"step": 73900
},
{
"epoch": 23.91725921137686,
"grad_norm": 1.089120864868164,
"learning_rate": 0.001,
"loss": 2.2938,
"step": 74000
},
{
"epoch": 23.949579831932773,
"grad_norm": 0.9918783903121948,
"learning_rate": 0.001,
"loss": 2.3159,
"step": 74100
},
{
"epoch": 23.981900452488688,
"grad_norm": 1.2158150672912598,
"learning_rate": 0.001,
"loss": 2.3232,
"step": 74200
},
{
"epoch": 24.014221073044602,
"grad_norm": 1.7537885904312134,
"learning_rate": 0.001,
"loss": 2.2393,
"step": 74300
},
{
"epoch": 24.046541693600517,
"grad_norm": 1.2171244621276855,
"learning_rate": 0.001,
"loss": 2.1603,
"step": 74400
},
{
"epoch": 24.07886231415643,
"grad_norm": 1.923531413078308,
"learning_rate": 0.001,
"loss": 2.1792,
"step": 74500
},
{
"epoch": 24.111182934712346,
"grad_norm": 1.4310954809188843,
"learning_rate": 0.001,
"loss": 2.1884,
"step": 74600
},
{
"epoch": 24.14350355526826,
"grad_norm": 1.3417888879776,
"learning_rate": 0.001,
"loss": 2.2052,
"step": 74700
},
{
"epoch": 24.175824175824175,
"grad_norm": 1.4067453145980835,
"learning_rate": 0.001,
"loss": 2.1844,
"step": 74800
},
{
"epoch": 24.20814479638009,
"grad_norm": 1.3039509057998657,
"learning_rate": 0.001,
"loss": 2.1862,
"step": 74900
},
{
"epoch": 24.240465416936004,
"grad_norm": 1.3245545625686646,
"learning_rate": 0.001,
"loss": 2.2082,
"step": 75000
},
{
"epoch": 24.27278603749192,
"grad_norm": 1.1734683513641357,
"learning_rate": 0.001,
"loss": 2.1925,
"step": 75100
},
{
"epoch": 24.305106658047833,
"grad_norm": 1.2977125644683838,
"learning_rate": 0.001,
"loss": 2.2278,
"step": 75200
},
{
"epoch": 24.337427278603748,
"grad_norm": 1.3284844160079956,
"learning_rate": 0.001,
"loss": 2.2107,
"step": 75300
},
{
"epoch": 24.369747899159663,
"grad_norm": 1.7949256896972656,
"learning_rate": 0.001,
"loss": 2.2315,
"step": 75400
},
{
"epoch": 24.402068519715577,
"grad_norm": 1.2842888832092285,
"learning_rate": 0.001,
"loss": 2.2449,
"step": 75500
},
{
"epoch": 24.43438914027149,
"grad_norm": 1.664375901222229,
"learning_rate": 0.001,
"loss": 2.2103,
"step": 75600
},
{
"epoch": 24.466709760827406,
"grad_norm": 1.819553017616272,
"learning_rate": 0.001,
"loss": 2.2411,
"step": 75700
},
{
"epoch": 24.49903038138332,
"grad_norm": 1.7226625680923462,
"learning_rate": 0.001,
"loss": 2.2335,
"step": 75800
},
{
"epoch": 24.53135100193924,
"grad_norm": 1.389763355255127,
"learning_rate": 0.001,
"loss": 2.2298,
"step": 75900
},
{
"epoch": 24.563671622495153,
"grad_norm": 1.4180145263671875,
"learning_rate": 0.001,
"loss": 2.2553,
"step": 76000
},
{
"epoch": 24.595992243051068,
"grad_norm": 1.7032394409179688,
"learning_rate": 0.001,
"loss": 2.253,
"step": 76100
},
{
"epoch": 24.628312863606983,
"grad_norm": 1.2484254837036133,
"learning_rate": 0.001,
"loss": 2.2626,
"step": 76200
},
{
"epoch": 24.660633484162897,
"grad_norm": 1.4861352443695068,
"learning_rate": 0.001,
"loss": 2.2621,
"step": 76300
},
{
"epoch": 24.69295410471881,
"grad_norm": 1.2266833782196045,
"learning_rate": 0.001,
"loss": 2.2577,
"step": 76400
},
{
"epoch": 24.725274725274726,
"grad_norm": 1.5920053720474243,
"learning_rate": 0.001,
"loss": 2.2622,
"step": 76500
},
{
"epoch": 24.75759534583064,
"grad_norm": 1.5401145219802856,
"learning_rate": 0.001,
"loss": 2.2672,
"step": 76600
},
{
"epoch": 24.789915966386555,
"grad_norm": 1.6406484842300415,
"learning_rate": 0.001,
"loss": 2.2677,
"step": 76700
},
{
"epoch": 24.82223658694247,
"grad_norm": 1.2690526247024536,
"learning_rate": 0.001,
"loss": 2.2716,
"step": 76800
},
{
"epoch": 24.854557207498384,
"grad_norm": 1.3956623077392578,
"learning_rate": 0.001,
"loss": 2.2565,
"step": 76900
},
{
"epoch": 24.8868778280543,
"grad_norm": 1.2422696352005005,
"learning_rate": 0.001,
"loss": 2.2735,
"step": 77000
},
{
"epoch": 24.919198448610214,
"grad_norm": 1.3382925987243652,
"learning_rate": 0.001,
"loss": 2.2897,
"step": 77100
},
{
"epoch": 24.951519069166128,
"grad_norm": 1.3177549839019775,
"learning_rate": 0.001,
"loss": 2.2879,
"step": 77200
},
{
"epoch": 24.983839689722043,
"grad_norm": 1.4060649871826172,
"learning_rate": 0.001,
"loss": 2.2852,
"step": 77300
},
{
"epoch": 25.016160310277957,
"grad_norm": 2.507507801055908,
"learning_rate": 0.001,
"loss": 2.1969,
"step": 77400
},
{
"epoch": 25.048480930833872,
"grad_norm": 1.7902899980545044,
"learning_rate": 0.001,
"loss": 2.159,
"step": 77500
},
{
"epoch": 25.080801551389786,
"grad_norm": 2.543278455734253,
"learning_rate": 0.001,
"loss": 2.1503,
"step": 77600
},
{
"epoch": 25.1131221719457,
"grad_norm": 2.190962553024292,
"learning_rate": 0.001,
"loss": 2.1718,
"step": 77700
},
{
"epoch": 25.145442792501616,
"grad_norm": 2.3507230281829834,
"learning_rate": 0.001,
"loss": 2.1762,
"step": 77800
},
{
"epoch": 25.17776341305753,
"grad_norm": 2.4458060264587402,
"learning_rate": 0.001,
"loss": 2.1659,
"step": 77900
},
{
"epoch": 25.210084033613445,
"grad_norm": 1.6360588073730469,
"learning_rate": 0.001,
"loss": 2.1637,
"step": 78000
},
{
"epoch": 25.24240465416936,
"grad_norm": 2.420311212539673,
"learning_rate": 0.001,
"loss": 2.1622,
"step": 78100
},
{
"epoch": 25.274725274725274,
"grad_norm": 1.5954487323760986,
"learning_rate": 0.001,
"loss": 2.2073,
"step": 78200
},
{
"epoch": 25.30704589528119,
"grad_norm": 2.6514792442321777,
"learning_rate": 0.001,
"loss": 2.1782,
"step": 78300
},
{
"epoch": 25.339366515837103,
"grad_norm": 2.318582057952881,
"learning_rate": 0.001,
"loss": 2.1766,
"step": 78400
},
{
"epoch": 25.371687136393017,
"grad_norm": 2.036907434463501,
"learning_rate": 0.001,
"loss": 2.1729,
"step": 78500
},
{
"epoch": 25.404007756948932,
"grad_norm": 2.1478607654571533,
"learning_rate": 0.001,
"loss": 2.1956,
"step": 78600
},
{
"epoch": 25.436328377504847,
"grad_norm": 1.987168788909912,
"learning_rate": 0.001,
"loss": 2.1986,
"step": 78700
},
{
"epoch": 25.46864899806076,
"grad_norm": 1.57612144947052,
"learning_rate": 0.001,
"loss": 2.2138,
"step": 78800
},
{
"epoch": 25.50096961861668,
"grad_norm": 1.8580467700958252,
"learning_rate": 0.001,
"loss": 2.2137,
"step": 78900
},
{
"epoch": 25.533290239172594,
"grad_norm": 2.277358293533325,
"learning_rate": 0.001,
"loss": 2.2186,
"step": 79000
},
{
"epoch": 25.56561085972851,
"grad_norm": 2.014516592025757,
"learning_rate": 0.001,
"loss": 2.2176,
"step": 79100
},
{
"epoch": 25.597931480284423,
"grad_norm": 1.9379520416259766,
"learning_rate": 0.001,
"loss": 2.2346,
"step": 79200
},
{
"epoch": 25.630252100840337,
"grad_norm": 1.767410397529602,
"learning_rate": 0.001,
"loss": 2.225,
"step": 79300
},
{
"epoch": 25.662572721396252,
"grad_norm": 1.7645798921585083,
"learning_rate": 0.001,
"loss": 2.2424,
"step": 79400
},
{
"epoch": 25.694893341952167,
"grad_norm": 1.68009614944458,
"learning_rate": 0.001,
"loss": 2.2434,
"step": 79500
},
{
"epoch": 25.72721396250808,
"grad_norm": 1.9485399723052979,
"learning_rate": 0.001,
"loss": 2.2394,
"step": 79600
},
{
"epoch": 25.759534583063996,
"grad_norm": 2.297874927520752,
"learning_rate": 0.001,
"loss": 2.2385,
"step": 79700
},
{
"epoch": 25.79185520361991,
"grad_norm": 1.9281902313232422,
"learning_rate": 0.001,
"loss": 2.2504,
"step": 79800
},
{
"epoch": 25.824175824175825,
"grad_norm": 1.8228880167007446,
"learning_rate": 0.001,
"loss": 2.2296,
"step": 79900
},
{
"epoch": 25.85649644473174,
"grad_norm": 2.050082206726074,
"learning_rate": 0.001,
"loss": 2.2528,
"step": 80000
},
{
"epoch": 25.888817065287654,
"grad_norm": 2.1452155113220215,
"learning_rate": 0.001,
"loss": 2.2329,
"step": 80100
},
{
"epoch": 25.92113768584357,
"grad_norm": 1.783553957939148,
"learning_rate": 0.001,
"loss": 2.2716,
"step": 80200
},
{
"epoch": 25.953458306399483,
"grad_norm": 2.0124473571777344,
"learning_rate": 0.001,
"loss": 2.2467,
"step": 80300
},
{
"epoch": 25.985778926955398,
"grad_norm": 1.8177739381790161,
"learning_rate": 0.001,
"loss": 2.2752,
"step": 80400
},
{
"epoch": 26.018099547511312,
"grad_norm": 1.366523265838623,
"learning_rate": 0.001,
"loss": 2.1745,
"step": 80500
},
{
"epoch": 26.050420168067227,
"grad_norm": 1.7601717710494995,
"learning_rate": 0.001,
"loss": 2.1084,
"step": 80600
},
{
"epoch": 26.08274078862314,
"grad_norm": 1.2369937896728516,
"learning_rate": 0.001,
"loss": 2.1185,
"step": 80700
},
{
"epoch": 26.115061409179056,
"grad_norm": 1.2424700260162354,
"learning_rate": 0.001,
"loss": 2.1177,
"step": 80800
},
{
"epoch": 26.14738202973497,
"grad_norm": 1.8751611709594727,
"learning_rate": 0.001,
"loss": 2.152,
"step": 80900
},
{
"epoch": 26.179702650290885,
"grad_norm": 1.1152818202972412,
"learning_rate": 0.001,
"loss": 2.1489,
"step": 81000
},
{
"epoch": 26.2120232708468,
"grad_norm": 1.1283564567565918,
"learning_rate": 0.001,
"loss": 2.1572,
"step": 81100
},
{
"epoch": 26.244343891402714,
"grad_norm": 1.639378309249878,
"learning_rate": 0.001,
"loss": 2.1561,
"step": 81200
},
{
"epoch": 26.27666451195863,
"grad_norm": 1.1049447059631348,
"learning_rate": 0.001,
"loss": 2.1588,
"step": 81300
},
{
"epoch": 26.308985132514543,
"grad_norm": 1.0583877563476562,
"learning_rate": 0.001,
"loss": 2.1731,
"step": 81400
},
{
"epoch": 26.341305753070458,
"grad_norm": 1.3335222005844116,
"learning_rate": 0.001,
"loss": 2.1745,
"step": 81500
},
{
"epoch": 26.373626373626372,
"grad_norm": 1.6638619899749756,
"learning_rate": 0.001,
"loss": 2.1599,
"step": 81600
},
{
"epoch": 26.405946994182287,
"grad_norm": 1.8034693002700806,
"learning_rate": 0.001,
"loss": 2.1863,
"step": 81700
},
{
"epoch": 26.4382676147382,
"grad_norm": 1.0890341997146606,
"learning_rate": 0.001,
"loss": 2.1944,
"step": 81800
},
{
"epoch": 26.470588235294116,
"grad_norm": 1.7226943969726562,
"learning_rate": 0.001,
"loss": 2.1819,
"step": 81900
},
{
"epoch": 26.50290885585003,
"grad_norm": 1.294287085533142,
"learning_rate": 0.001,
"loss": 2.1832,
"step": 82000
},
{
"epoch": 26.53522947640595,
"grad_norm": 1.3550304174423218,
"learning_rate": 0.001,
"loss": 2.2035,
"step": 82100
},
{
"epoch": 26.567550096961863,
"grad_norm": 1.1257293224334717,
"learning_rate": 0.001,
"loss": 2.1898,
"step": 82200
},
{
"epoch": 26.599870717517778,
"grad_norm": 1.2646733522415161,
"learning_rate": 0.001,
"loss": 2.1997,
"step": 82300
},
{
"epoch": 26.632191338073692,
"grad_norm": 1.2385530471801758,
"learning_rate": 0.001,
"loss": 2.2071,
"step": 82400
},
{
"epoch": 26.664511958629607,
"grad_norm": 1.4374717473983765,
"learning_rate": 0.001,
"loss": 2.2082,
"step": 82500
},
{
"epoch": 26.69683257918552,
"grad_norm": 1.3186120986938477,
"learning_rate": 0.001,
"loss": 2.2149,
"step": 82600
},
{
"epoch": 26.729153199741436,
"grad_norm": 1.2617899179458618,
"learning_rate": 0.001,
"loss": 2.2039,
"step": 82700
},
{
"epoch": 26.76147382029735,
"grad_norm": 1.0441887378692627,
"learning_rate": 0.001,
"loss": 2.2324,
"step": 82800
},
{
"epoch": 26.793794440853265,
"grad_norm": 1.5578880310058594,
"learning_rate": 0.001,
"loss": 2.2155,
"step": 82900
},
{
"epoch": 26.82611506140918,
"grad_norm": 1.1477580070495605,
"learning_rate": 0.001,
"loss": 2.2219,
"step": 83000
},
{
"epoch": 26.858435681965094,
"grad_norm": 1.3140536546707153,
"learning_rate": 0.001,
"loss": 2.2309,
"step": 83100
},
{
"epoch": 26.89075630252101,
"grad_norm": 1.0781357288360596,
"learning_rate": 0.001,
"loss": 2.2302,
"step": 83200
},
{
"epoch": 26.923076923076923,
"grad_norm": 1.6410821676254272,
"learning_rate": 0.001,
"loss": 2.2319,
"step": 83300
},
{
"epoch": 26.955397543632838,
"grad_norm": 1.5108658075332642,
"learning_rate": 0.001,
"loss": 2.2258,
"step": 83400
},
{
"epoch": 26.987718164188752,
"grad_norm": 1.4185115098953247,
"learning_rate": 0.001,
"loss": 2.2219,
"step": 83500
},
{
"epoch": 27.020038784744667,
"grad_norm": 1.2338390350341797,
"learning_rate": 0.001,
"loss": 2.1582,
"step": 83600
},
{
"epoch": 27.05235940530058,
"grad_norm": 1.1917002201080322,
"learning_rate": 0.001,
"loss": 2.0996,
"step": 83700
},
{
"epoch": 27.084680025856496,
"grad_norm": 1.9231431484222412,
"learning_rate": 0.001,
"loss": 2.1101,
"step": 83800
},
{
"epoch": 27.11700064641241,
"grad_norm": 0.9797855615615845,
"learning_rate": 0.001,
"loss": 2.1199,
"step": 83900
},
{
"epoch": 27.149321266968325,
"grad_norm": 1.3979872465133667,
"learning_rate": 0.001,
"loss": 2.1181,
"step": 84000
},
{
"epoch": 27.18164188752424,
"grad_norm": 1.0003714561462402,
"learning_rate": 0.001,
"loss": 2.1446,
"step": 84100
},
{
"epoch": 27.213962508080154,
"grad_norm": 1.7641527652740479,
"learning_rate": 0.001,
"loss": 2.1024,
"step": 84200
},
{
"epoch": 27.24628312863607,
"grad_norm": 0.9522609114646912,
"learning_rate": 0.001,
"loss": 2.126,
"step": 84300
},
{
"epoch": 27.278603749191983,
"grad_norm": 1.028588891029358,
"learning_rate": 0.001,
"loss": 2.1288,
"step": 84400
},
{
"epoch": 27.310924369747898,
"grad_norm": 1.1693410873413086,
"learning_rate": 0.001,
"loss": 2.1359,
"step": 84500
},
{
"epoch": 27.343244990303813,
"grad_norm": 1.3971128463745117,
"learning_rate": 0.001,
"loss": 2.1631,
"step": 84600
},
{
"epoch": 27.375565610859727,
"grad_norm": 1.0558292865753174,
"learning_rate": 0.001,
"loss": 2.1556,
"step": 84700
},
{
"epoch": 27.40788623141564,
"grad_norm": 1.113844633102417,
"learning_rate": 0.001,
"loss": 2.1462,
"step": 84800
},
{
"epoch": 27.440206851971556,
"grad_norm": 1.251039981842041,
"learning_rate": 0.001,
"loss": 2.1483,
"step": 84900
},
{
"epoch": 27.47252747252747,
"grad_norm": 1.2447925806045532,
"learning_rate": 0.001,
"loss": 2.1503,
"step": 85000
},
{
"epoch": 27.50484809308339,
"grad_norm": 1.154356598854065,
"learning_rate": 0.001,
"loss": 2.1789,
"step": 85100
},
{
"epoch": 27.537168713639304,
"grad_norm": 1.1376807689666748,
"learning_rate": 0.001,
"loss": 2.1802,
"step": 85200
},
{
"epoch": 27.569489334195218,
"grad_norm": 1.3358043432235718,
"learning_rate": 0.001,
"loss": 2.1827,
"step": 85300
},
{
"epoch": 27.601809954751133,
"grad_norm": 1.147119402885437,
"learning_rate": 0.001,
"loss": 2.173,
"step": 85400
},
{
"epoch": 27.634130575307047,
"grad_norm": 1.3377326726913452,
"learning_rate": 0.001,
"loss": 2.1811,
"step": 85500
},
{
"epoch": 27.66645119586296,
"grad_norm": 1.1831611394882202,
"learning_rate": 0.001,
"loss": 2.1735,
"step": 85600
},
{
"epoch": 27.698771816418876,
"grad_norm": 1.2356758117675781,
"learning_rate": 0.001,
"loss": 2.1968,
"step": 85700
},
{
"epoch": 27.73109243697479,
"grad_norm": 1.4294730424880981,
"learning_rate": 0.001,
"loss": 2.178,
"step": 85800
},
{
"epoch": 27.763413057530705,
"grad_norm": 1.5439614057540894,
"learning_rate": 0.001,
"loss": 2.1808,
"step": 85900
},
{
"epoch": 27.79573367808662,
"grad_norm": 1.3789457082748413,
"learning_rate": 0.001,
"loss": 2.1897,
"step": 86000
},
{
"epoch": 27.828054298642535,
"grad_norm": 0.9521170258522034,
"learning_rate": 0.001,
"loss": 2.1824,
"step": 86100
},
{
"epoch": 27.86037491919845,
"grad_norm": 0.98322594165802,
"learning_rate": 0.001,
"loss": 2.2039,
"step": 86200
},
{
"epoch": 27.892695539754364,
"grad_norm": 1.7284862995147705,
"learning_rate": 0.001,
"loss": 2.1833,
"step": 86300
},
{
"epoch": 27.92501616031028,
"grad_norm": 1.160421371459961,
"learning_rate": 0.001,
"loss": 2.1943,
"step": 86400
},
{
"epoch": 27.957336780866193,
"grad_norm": 1.2484320402145386,
"learning_rate": 0.001,
"loss": 2.2137,
"step": 86500
},
{
"epoch": 27.989657401422107,
"grad_norm": 1.6111912727355957,
"learning_rate": 0.001,
"loss": 2.1995,
"step": 86600
},
{
"epoch": 28.021978021978022,
"grad_norm": 1.159625768661499,
"learning_rate": 0.001,
"loss": 2.1394,
"step": 86700
},
{
"epoch": 28.054298642533936,
"grad_norm": 1.2188056707382202,
"learning_rate": 0.001,
"loss": 2.0749,
"step": 86800
},
{
"epoch": 28.08661926308985,
"grad_norm": 1.3605949878692627,
"learning_rate": 0.001,
"loss": 2.0627,
"step": 86900
},
{
"epoch": 28.118939883645766,
"grad_norm": 1.0128086805343628,
"learning_rate": 0.001,
"loss": 2.0837,
"step": 87000
},
{
"epoch": 28.15126050420168,
"grad_norm": 1.420502781867981,
"learning_rate": 0.001,
"loss": 2.0804,
"step": 87100
},
{
"epoch": 28.183581124757595,
"grad_norm": 1.1222341060638428,
"learning_rate": 0.001,
"loss": 2.0978,
"step": 87200
},
{
"epoch": 28.21590174531351,
"grad_norm": 1.2059674263000488,
"learning_rate": 0.001,
"loss": 2.1132,
"step": 87300
},
{
"epoch": 28.248222365869424,
"grad_norm": 1.1298748254776,
"learning_rate": 0.001,
"loss": 2.1136,
"step": 87400
},
{
"epoch": 28.28054298642534,
"grad_norm": 1.2116446495056152,
"learning_rate": 0.001,
"loss": 2.1205,
"step": 87500
},
{
"epoch": 28.312863606981253,
"grad_norm": 1.4095208644866943,
"learning_rate": 0.001,
"loss": 2.1073,
"step": 87600
},
{
"epoch": 28.345184227537167,
"grad_norm": 1.0656698942184448,
"learning_rate": 0.001,
"loss": 2.1035,
"step": 87700
},
{
"epoch": 28.377504848093082,
"grad_norm": 1.166192650794983,
"learning_rate": 0.001,
"loss": 2.1497,
"step": 87800
},
{
"epoch": 28.409825468648997,
"grad_norm": 1.2223316431045532,
"learning_rate": 0.001,
"loss": 2.1285,
"step": 87900
},
{
"epoch": 28.44214608920491,
"grad_norm": 1.3476072549819946,
"learning_rate": 0.001,
"loss": 2.1229,
"step": 88000
},
{
"epoch": 28.474466709760826,
"grad_norm": 1.5452789068222046,
"learning_rate": 0.001,
"loss": 2.1377,
"step": 88100
},
{
"epoch": 28.50678733031674,
"grad_norm": 1.204521656036377,
"learning_rate": 0.001,
"loss": 2.1411,
"step": 88200
},
{
"epoch": 28.53910795087266,
"grad_norm": 1.2663288116455078,
"learning_rate": 0.001,
"loss": 2.156,
"step": 88300
},
{
"epoch": 28.571428571428573,
"grad_norm": 1.0826762914657593,
"learning_rate": 0.001,
"loss": 2.1547,
"step": 88400
},
{
"epoch": 28.603749191984488,
"grad_norm": 1.1127820014953613,
"learning_rate": 0.001,
"loss": 2.1708,
"step": 88500
},
{
"epoch": 28.636069812540402,
"grad_norm": 1.3881254196166992,
"learning_rate": 0.001,
"loss": 2.1594,
"step": 88600
},
{
"epoch": 28.668390433096317,
"grad_norm": 1.1154764890670776,
"learning_rate": 0.001,
"loss": 2.1618,
"step": 88700
},
{
"epoch": 28.70071105365223,
"grad_norm": 1.3774234056472778,
"learning_rate": 0.001,
"loss": 2.1696,
"step": 88800
},
{
"epoch": 28.733031674208146,
"grad_norm": 1.477734923362732,
"learning_rate": 0.001,
"loss": 2.1735,
"step": 88900
},
{
"epoch": 28.76535229476406,
"grad_norm": 1.1354868412017822,
"learning_rate": 0.001,
"loss": 2.1755,
"step": 89000
},
{
"epoch": 28.797672915319975,
"grad_norm": 1.2781658172607422,
"learning_rate": 0.001,
"loss": 2.1702,
"step": 89100
},
{
"epoch": 28.82999353587589,
"grad_norm": 0.9861193895339966,
"learning_rate": 0.001,
"loss": 2.1754,
"step": 89200
},
{
"epoch": 28.862314156431804,
"grad_norm": 1.2006617784500122,
"learning_rate": 0.001,
"loss": 2.1712,
"step": 89300
},
{
"epoch": 28.89463477698772,
"grad_norm": 1.0458413362503052,
"learning_rate": 0.001,
"loss": 2.1622,
"step": 89400
},
{
"epoch": 28.926955397543633,
"grad_norm": 1.6380434036254883,
"learning_rate": 0.001,
"loss": 2.1827,
"step": 89500
},
{
"epoch": 28.959276018099548,
"grad_norm": 1.4402202367782593,
"learning_rate": 0.001,
"loss": 2.1788,
"step": 89600
},
{
"epoch": 28.991596638655462,
"grad_norm": 1.1642043590545654,
"learning_rate": 0.001,
"loss": 2.1953,
"step": 89700
},
{
"epoch": 29.023917259211377,
"grad_norm": 1.3228559494018555,
"learning_rate": 0.001,
"loss": 2.097,
"step": 89800
},
{
"epoch": 29.05623787976729,
"grad_norm": 1.251704216003418,
"learning_rate": 0.001,
"loss": 2.042,
"step": 89900
},
{
"epoch": 29.088558500323206,
"grad_norm": 1.0149487257003784,
"learning_rate": 0.001,
"loss": 2.0505,
"step": 90000
},
{
"epoch": 29.12087912087912,
"grad_norm": 1.255851149559021,
"learning_rate": 0.001,
"loss": 2.0693,
"step": 90100
},
{
"epoch": 29.153199741435035,
"grad_norm": 1.5572729110717773,
"learning_rate": 0.001,
"loss": 2.0668,
"step": 90200
},
{
"epoch": 29.18552036199095,
"grad_norm": 1.1453217267990112,
"learning_rate": 0.001,
"loss": 2.0868,
"step": 90300
},
{
"epoch": 29.217840982546864,
"grad_norm": 1.1141669750213623,
"learning_rate": 0.001,
"loss": 2.0751,
"step": 90400
},
{
"epoch": 29.25016160310278,
"grad_norm": 1.3564319610595703,
"learning_rate": 0.001,
"loss": 2.0766,
"step": 90500
},
{
"epoch": 29.282482223658693,
"grad_norm": 1.0817420482635498,
"learning_rate": 0.001,
"loss": 2.0784,
"step": 90600
},
{
"epoch": 29.314802844214608,
"grad_norm": 1.1262016296386719,
"learning_rate": 0.001,
"loss": 2.0984,
"step": 90700
},
{
"epoch": 29.347123464770522,
"grad_norm": 0.9490855932235718,
"learning_rate": 0.001,
"loss": 2.1047,
"step": 90800
},
{
"epoch": 29.379444085326437,
"grad_norm": 1.128937005996704,
"learning_rate": 0.001,
"loss": 2.0962,
"step": 90900
},
{
"epoch": 29.41176470588235,
"grad_norm": 1.2524522542953491,
"learning_rate": 0.001,
"loss": 2.1144,
"step": 91000
},
{
"epoch": 29.444085326438266,
"grad_norm": 1.3518542051315308,
"learning_rate": 0.001,
"loss": 2.1241,
"step": 91100
},
{
"epoch": 29.47640594699418,
"grad_norm": 1.2834372520446777,
"learning_rate": 0.001,
"loss": 2.1124,
"step": 91200
},
{
"epoch": 29.5087265675501,
"grad_norm": 1.3267338275909424,
"learning_rate": 0.001,
"loss": 2.1215,
"step": 91300
},
{
"epoch": 29.541047188106013,
"grad_norm": 1.4632035493850708,
"learning_rate": 0.001,
"loss": 2.143,
"step": 91400
},
{
"epoch": 29.573367808661928,
"grad_norm": 1.2071197032928467,
"learning_rate": 0.001,
"loss": 2.1225,
"step": 91500
},
{
"epoch": 29.605688429217842,
"grad_norm": 1.339823842048645,
"learning_rate": 0.001,
"loss": 2.1094,
"step": 91600
},
{
"epoch": 29.638009049773757,
"grad_norm": 1.1348124742507935,
"learning_rate": 0.001,
"loss": 2.1373,
"step": 91700
},
{
"epoch": 29.67032967032967,
"grad_norm": 1.0786309242248535,
"learning_rate": 0.001,
"loss": 2.146,
"step": 91800
},
{
"epoch": 29.702650290885586,
"grad_norm": 1.2192673683166504,
"learning_rate": 0.001,
"loss": 2.1523,
"step": 91900
},
{
"epoch": 29.7349709114415,
"grad_norm": 0.9565330147743225,
"learning_rate": 0.001,
"loss": 2.1408,
"step": 92000
},
{
"epoch": 29.767291531997415,
"grad_norm": 1.2919212579727173,
"learning_rate": 0.001,
"loss": 2.1431,
"step": 92100
},
{
"epoch": 29.79961215255333,
"grad_norm": 1.1823819875717163,
"learning_rate": 0.001,
"loss": 2.1612,
"step": 92200
},
{
"epoch": 29.831932773109244,
"grad_norm": 1.1808539628982544,
"learning_rate": 0.001,
"loss": 2.1541,
"step": 92300
},
{
"epoch": 29.86425339366516,
"grad_norm": 0.9185066223144531,
"learning_rate": 0.001,
"loss": 2.1528,
"step": 92400
},
{
"epoch": 29.896574014221073,
"grad_norm": 1.387736439704895,
"learning_rate": 0.001,
"loss": 2.1577,
"step": 92500
},
{
"epoch": 29.928894634776988,
"grad_norm": 1.27926504611969,
"learning_rate": 0.001,
"loss": 2.1618,
"step": 92600
},
{
"epoch": 29.961215255332903,
"grad_norm": 1.438068151473999,
"learning_rate": 0.001,
"loss": 2.1501,
"step": 92700
},
{
"epoch": 29.993535875888817,
"grad_norm": 1.2931363582611084,
"learning_rate": 0.001,
"loss": 2.1733,
"step": 92800
},
{
"epoch": 30.02585649644473,
"grad_norm": 1.4011821746826172,
"learning_rate": 0.001,
"loss": 2.0494,
"step": 92900
},
{
"epoch": 30.058177117000646,
"grad_norm": 1.3266193866729736,
"learning_rate": 0.001,
"loss": 2.028,
"step": 93000
},
{
"epoch": 30.09049773755656,
"grad_norm": 1.4412564039230347,
"learning_rate": 0.001,
"loss": 2.0236,
"step": 93100
},
{
"epoch": 30.122818358112475,
"grad_norm": 1.2266614437103271,
"learning_rate": 0.001,
"loss": 2.0305,
"step": 93200
},
{
"epoch": 30.15513897866839,
"grad_norm": 1.2787266969680786,
"learning_rate": 0.001,
"loss": 2.0371,
"step": 93300
},
{
"epoch": 30.187459599224304,
"grad_norm": 1.3180530071258545,
"learning_rate": 0.001,
"loss": 2.0685,
"step": 93400
},
{
"epoch": 30.21978021978022,
"grad_norm": 0.980501115322113,
"learning_rate": 0.001,
"loss": 2.0615,
"step": 93500
},
{
"epoch": 30.252100840336134,
"grad_norm": 1.0844141244888306,
"learning_rate": 0.001,
"loss": 2.0757,
"step": 93600
},
{
"epoch": 30.284421460892048,
"grad_norm": 1.7597213983535767,
"learning_rate": 0.001,
"loss": 2.0663,
"step": 93700
},
{
"epoch": 30.316742081447963,
"grad_norm": 1.1686208248138428,
"learning_rate": 0.001,
"loss": 2.0461,
"step": 93800
},
{
"epoch": 30.349062702003877,
"grad_norm": 1.2640702724456787,
"learning_rate": 0.001,
"loss": 2.081,
"step": 93900
},
{
"epoch": 30.381383322559792,
"grad_norm": 1.1923021078109741,
"learning_rate": 0.001,
"loss": 2.0855,
"step": 94000
},
{
"epoch": 30.413703943115706,
"grad_norm": 1.25620698928833,
"learning_rate": 0.001,
"loss": 2.1024,
"step": 94100
},
{
"epoch": 30.44602456367162,
"grad_norm": 1.1417943239212036,
"learning_rate": 0.001,
"loss": 2.0802,
"step": 94200
},
{
"epoch": 30.478345184227535,
"grad_norm": 1.3786128759384155,
"learning_rate": 0.001,
"loss": 2.1161,
"step": 94300
},
{
"epoch": 30.51066580478345,
"grad_norm": 1.1850334405899048,
"learning_rate": 0.001,
"loss": 2.1171,
"step": 94400
},
{
"epoch": 30.542986425339368,
"grad_norm": 1.1608952283859253,
"learning_rate": 0.001,
"loss": 2.1074,
"step": 94500
},
{
"epoch": 30.575307045895283,
"grad_norm": 1.5039657354354858,
"learning_rate": 0.001,
"loss": 2.1199,
"step": 94600
},
{
"epoch": 30.607627666451197,
"grad_norm": 1.1668621301651,
"learning_rate": 0.001,
"loss": 2.1185,
"step": 94700
},
{
"epoch": 30.639948287007112,
"grad_norm": 1.6132876873016357,
"learning_rate": 0.001,
"loss": 2.0857,
"step": 94800
},
{
"epoch": 30.672268907563026,
"grad_norm": 1.538486123085022,
"learning_rate": 0.001,
"loss": 2.0991,
"step": 94900
},
{
"epoch": 30.70458952811894,
"grad_norm": 1.506345272064209,
"learning_rate": 0.001,
"loss": 2.1122,
"step": 95000
},
{
"epoch": 30.736910148674855,
"grad_norm": 1.0374579429626465,
"learning_rate": 0.001,
"loss": 2.1348,
"step": 95100
},
{
"epoch": 30.76923076923077,
"grad_norm": 1.2800602912902832,
"learning_rate": 0.001,
"loss": 2.1188,
"step": 95200
},
{
"epoch": 30.801551389786685,
"grad_norm": 1.1404392719268799,
"learning_rate": 0.001,
"loss": 2.1272,
"step": 95300
},
{
"epoch": 30.8338720103426,
"grad_norm": 1.3880510330200195,
"learning_rate": 0.001,
"loss": 2.1493,
"step": 95400
},
{
"epoch": 30.866192630898514,
"grad_norm": 1.391794204711914,
"learning_rate": 0.001,
"loss": 2.1378,
"step": 95500
},
{
"epoch": 30.89851325145443,
"grad_norm": 1.0275532007217407,
"learning_rate": 0.001,
"loss": 2.1227,
"step": 95600
},
{
"epoch": 30.930833872010343,
"grad_norm": 1.4005444049835205,
"learning_rate": 0.001,
"loss": 2.1576,
"step": 95700
},
{
"epoch": 30.963154492566257,
"grad_norm": 1.2828242778778076,
"learning_rate": 0.001,
"loss": 2.1411,
"step": 95800
},
{
"epoch": 30.995475113122172,
"grad_norm": 1.341699242591858,
"learning_rate": 0.001,
"loss": 2.1246,
"step": 95900
},
{
"epoch": 31.027795733678087,
"grad_norm": 1.4933757781982422,
"learning_rate": 0.001,
"loss": 2.0016,
"step": 96000
},
{
"epoch": 31.060116354234,
"grad_norm": 1.5427340269088745,
"learning_rate": 0.001,
"loss": 2.0132,
"step": 96100
},
{
"epoch": 31.092436974789916,
"grad_norm": 1.1902391910552979,
"learning_rate": 0.001,
"loss": 2.0301,
"step": 96200
},
{
"epoch": 31.12475759534583,
"grad_norm": 1.2572218179702759,
"learning_rate": 0.001,
"loss": 2.0339,
"step": 96300
},
{
"epoch": 31.157078215901745,
"grad_norm": 1.2706958055496216,
"learning_rate": 0.001,
"loss": 2.032,
"step": 96400
},
{
"epoch": 31.18939883645766,
"grad_norm": 1.4089903831481934,
"learning_rate": 0.001,
"loss": 2.0138,
"step": 96500
},
{
"epoch": 31.221719457013574,
"grad_norm": 1.3937467336654663,
"learning_rate": 0.001,
"loss": 2.0442,
"step": 96600
},
{
"epoch": 31.25404007756949,
"grad_norm": 1.353804588317871,
"learning_rate": 0.001,
"loss": 2.0317,
"step": 96700
},
{
"epoch": 31.286360698125403,
"grad_norm": 1.279462456703186,
"learning_rate": 0.001,
"loss": 2.057,
"step": 96800
},
{
"epoch": 31.318681318681318,
"grad_norm": 1.0817734003067017,
"learning_rate": 0.001,
"loss": 2.0597,
"step": 96900
},
{
"epoch": 31.351001939237232,
"grad_norm": 1.5528923273086548,
"learning_rate": 0.001,
"loss": 2.0555,
"step": 97000
},
{
"epoch": 31.383322559793147,
"grad_norm": 1.3770098686218262,
"learning_rate": 0.001,
"loss": 2.0565,
"step": 97100
},
{
"epoch": 31.41564318034906,
"grad_norm": 1.2134709358215332,
"learning_rate": 0.001,
"loss": 2.077,
"step": 97200
},
{
"epoch": 31.447963800904976,
"grad_norm": 1.6369162797927856,
"learning_rate": 0.001,
"loss": 2.0746,
"step": 97300
},
{
"epoch": 31.48028442146089,
"grad_norm": 1.389773964881897,
"learning_rate": 0.001,
"loss": 2.0696,
"step": 97400
},
{
"epoch": 31.51260504201681,
"grad_norm": 1.4730721712112427,
"learning_rate": 0.001,
"loss": 2.086,
"step": 97500
},
{
"epoch": 31.544925662572723,
"grad_norm": 1.5068094730377197,
"learning_rate": 0.001,
"loss": 2.0827,
"step": 97600
},
{
"epoch": 31.577246283128638,
"grad_norm": 1.334282636642456,
"learning_rate": 0.001,
"loss": 2.0834,
"step": 97700
},
{
"epoch": 31.609566903684552,
"grad_norm": 1.3712047338485718,
"learning_rate": 0.001,
"loss": 2.083,
"step": 97800
},
{
"epoch": 31.641887524240467,
"grad_norm": 1.0409879684448242,
"learning_rate": 0.001,
"loss": 2.0913,
"step": 97900
},
{
"epoch": 31.67420814479638,
"grad_norm": 1.559335708618164,
"learning_rate": 0.001,
"loss": 2.1005,
"step": 98000
},
{
"epoch": 31.706528765352296,
"grad_norm": 1.1505087614059448,
"learning_rate": 0.001,
"loss": 2.1066,
"step": 98100
},
{
"epoch": 31.73884938590821,
"grad_norm": 1.2901155948638916,
"learning_rate": 0.001,
"loss": 2.0871,
"step": 98200
},
{
"epoch": 31.771170006464125,
"grad_norm": 1.6031471490859985,
"learning_rate": 0.001,
"loss": 2.1081,
"step": 98300
},
{
"epoch": 31.80349062702004,
"grad_norm": 1.2014280557632446,
"learning_rate": 0.001,
"loss": 2.1044,
"step": 98400
},
{
"epoch": 31.835811247575954,
"grad_norm": 1.3353430032730103,
"learning_rate": 0.001,
"loss": 2.1145,
"step": 98500
},
{
"epoch": 31.86813186813187,
"grad_norm": 1.4211585521697998,
"learning_rate": 0.001,
"loss": 2.1067,
"step": 98600
},
{
"epoch": 31.900452488687783,
"grad_norm": 1.28533935546875,
"learning_rate": 0.001,
"loss": 2.1234,
"step": 98700
},
{
"epoch": 31.932773109243698,
"grad_norm": 1.4658890962600708,
"learning_rate": 0.001,
"loss": 2.1224,
"step": 98800
},
{
"epoch": 31.965093729799612,
"grad_norm": 1.3019647598266602,
"learning_rate": 0.001,
"loss": 2.1161,
"step": 98900
},
{
"epoch": 31.997414350355527,
"grad_norm": 1.580609679222107,
"learning_rate": 0.001,
"loss": 2.1106,
"step": 99000
},
{
"epoch": 32.02973497091144,
"grad_norm": 1.6000734567642212,
"learning_rate": 0.001,
"loss": 2.0183,
"step": 99100
},
{
"epoch": 32.062055591467356,
"grad_norm": 1.5266070365905762,
"learning_rate": 0.001,
"loss": 1.992,
"step": 99200
},
{
"epoch": 32.09437621202327,
"grad_norm": 1.4625951051712036,
"learning_rate": 0.001,
"loss": 1.9784,
"step": 99300
},
{
"epoch": 32.126696832579185,
"grad_norm": 1.5137872695922852,
"learning_rate": 0.001,
"loss": 1.9986,
"step": 99400
},
{
"epoch": 32.1590174531351,
"grad_norm": 1.565382719039917,
"learning_rate": 0.001,
"loss": 2.014,
"step": 99500
},
{
"epoch": 32.191338073691014,
"grad_norm": 1.1044955253601074,
"learning_rate": 0.001,
"loss": 2.0119,
"step": 99600
},
{
"epoch": 32.22365869424693,
"grad_norm": 1.5801650285720825,
"learning_rate": 0.001,
"loss": 2.0215,
"step": 99700
},
{
"epoch": 32.25597931480284,
"grad_norm": 1.502664566040039,
"learning_rate": 0.001,
"loss": 2.0201,
"step": 99800
},
{
"epoch": 32.28829993535876,
"grad_norm": 1.5750117301940918,
"learning_rate": 0.001,
"loss": 2.0281,
"step": 99900
},
{
"epoch": 32.32062055591467,
"grad_norm": 1.4533034563064575,
"learning_rate": 0.001,
"loss": 2.0448,
"step": 100000
},
{
"epoch": 32.35294117647059,
"grad_norm": 1.4341336488723755,
"learning_rate": 0.001,
"loss": 2.0461,
"step": 100100
},
{
"epoch": 32.3852617970265,
"grad_norm": 1.501747488975525,
"learning_rate": 0.001,
"loss": 2.0465,
"step": 100200
},
{
"epoch": 32.417582417582416,
"grad_norm": 1.3563640117645264,
"learning_rate": 0.001,
"loss": 2.0374,
"step": 100300
},
{
"epoch": 32.44990303813833,
"grad_norm": 1.6985423564910889,
"learning_rate": 0.001,
"loss": 2.042,
"step": 100400
},
{
"epoch": 32.482223658694245,
"grad_norm": 1.352845311164856,
"learning_rate": 0.001,
"loss": 2.0394,
"step": 100500
},
{
"epoch": 32.51454427925016,
"grad_norm": 1.6858887672424316,
"learning_rate": 0.001,
"loss": 2.0446,
"step": 100600
},
{
"epoch": 32.546864899806074,
"grad_norm": 1.6969914436340332,
"learning_rate": 0.001,
"loss": 2.0659,
"step": 100700
},
{
"epoch": 32.57918552036199,
"grad_norm": 1.2938587665557861,
"learning_rate": 0.001,
"loss": 2.0486,
"step": 100800
},
{
"epoch": 32.6115061409179,
"grad_norm": 1.2119784355163574,
"learning_rate": 0.001,
"loss": 2.065,
"step": 100900
},
{
"epoch": 32.64382676147382,
"grad_norm": 1.3899317979812622,
"learning_rate": 0.001,
"loss": 2.0688,
"step": 101000
},
{
"epoch": 32.67614738202973,
"grad_norm": 1.5170328617095947,
"learning_rate": 0.001,
"loss": 2.065,
"step": 101100
},
{
"epoch": 32.70846800258565,
"grad_norm": 1.3387643098831177,
"learning_rate": 0.001,
"loss": 2.0754,
"step": 101200
},
{
"epoch": 32.74078862314156,
"grad_norm": 2.1560583114624023,
"learning_rate": 0.001,
"loss": 2.0846,
"step": 101300
},
{
"epoch": 32.773109243697476,
"grad_norm": 1.1874232292175293,
"learning_rate": 0.001,
"loss": 2.0913,
"step": 101400
},
{
"epoch": 32.80542986425339,
"grad_norm": 1.5885916948318481,
"learning_rate": 0.001,
"loss": 2.0901,
"step": 101500
},
{
"epoch": 32.837750484809305,
"grad_norm": 1.571648359298706,
"learning_rate": 0.001,
"loss": 2.09,
"step": 101600
},
{
"epoch": 32.87007110536522,
"grad_norm": 1.2718168497085571,
"learning_rate": 0.001,
"loss": 2.086,
"step": 101700
},
{
"epoch": 32.902391725921134,
"grad_norm": 1.189794659614563,
"learning_rate": 0.001,
"loss": 2.0902,
"step": 101800
},
{
"epoch": 32.93471234647705,
"grad_norm": 1.6280981302261353,
"learning_rate": 0.001,
"loss": 2.0996,
"step": 101900
},
{
"epoch": 32.967032967032964,
"grad_norm": 1.2445026636123657,
"learning_rate": 0.001,
"loss": 2.0995,
"step": 102000
},
{
"epoch": 32.999353587588885,
"grad_norm": 1.5590155124664307,
"learning_rate": 0.001,
"loss": 2.0958,
"step": 102100
},
{
"epoch": 33.0316742081448,
"grad_norm": 1.9303966760635376,
"learning_rate": 0.001,
"loss": 1.9653,
"step": 102200
},
{
"epoch": 33.063994828700714,
"grad_norm": 1.5534723997116089,
"learning_rate": 0.001,
"loss": 1.9536,
"step": 102300
},
{
"epoch": 33.09631544925663,
"grad_norm": 1.7266992330551147,
"learning_rate": 0.001,
"loss": 1.9677,
"step": 102400
},
{
"epoch": 33.12863606981254,
"grad_norm": 2.0274415016174316,
"learning_rate": 0.001,
"loss": 1.9669,
"step": 102500
},
{
"epoch": 33.16095669036846,
"grad_norm": 1.6314760446548462,
"learning_rate": 0.001,
"loss": 1.9827,
"step": 102600
},
{
"epoch": 33.19327731092437,
"grad_norm": 1.4350862503051758,
"learning_rate": 0.001,
"loss": 1.9811,
"step": 102700
},
{
"epoch": 33.22559793148029,
"grad_norm": 1.9013689756393433,
"learning_rate": 0.001,
"loss": 1.9947,
"step": 102800
},
{
"epoch": 33.2579185520362,
"grad_norm": 1.85300874710083,
"learning_rate": 0.001,
"loss": 2.0024,
"step": 102900
},
{
"epoch": 33.290239172592116,
"grad_norm": 1.71381676197052,
"learning_rate": 0.001,
"loss": 2.0084,
"step": 103000
},
{
"epoch": 33.32255979314803,
"grad_norm": 1.7121025323867798,
"learning_rate": 0.001,
"loss": 2.0024,
"step": 103100
},
{
"epoch": 33.354880413703945,
"grad_norm": 1.8725587129592896,
"learning_rate": 0.001,
"loss": 2.0229,
"step": 103200
},
{
"epoch": 33.38720103425986,
"grad_norm": 1.6383904218673706,
"learning_rate": 0.001,
"loss": 2.0222,
"step": 103300
},
{
"epoch": 33.419521654815775,
"grad_norm": 1.5853757858276367,
"learning_rate": 0.001,
"loss": 2.0423,
"step": 103400
},
{
"epoch": 33.45184227537169,
"grad_norm": 1.7861037254333496,
"learning_rate": 0.001,
"loss": 2.0429,
"step": 103500
},
{
"epoch": 33.484162895927604,
"grad_norm": 1.4143872261047363,
"learning_rate": 0.001,
"loss": 2.0305,
"step": 103600
},
{
"epoch": 33.51648351648352,
"grad_norm": 1.3351759910583496,
"learning_rate": 0.001,
"loss": 2.0328,
"step": 103700
},
{
"epoch": 33.54880413703943,
"grad_norm": 1.6123368740081787,
"learning_rate": 0.001,
"loss": 2.0436,
"step": 103800
},
{
"epoch": 33.58112475759535,
"grad_norm": 1.5616430044174194,
"learning_rate": 0.001,
"loss": 2.0368,
"step": 103900
},
{
"epoch": 33.61344537815126,
"grad_norm": 1.4323886632919312,
"learning_rate": 0.001,
"loss": 2.0552,
"step": 104000
},
{
"epoch": 33.645765998707176,
"grad_norm": 1.7153393030166626,
"learning_rate": 0.001,
"loss": 2.0487,
"step": 104100
},
{
"epoch": 33.67808661926309,
"grad_norm": 1.5671979188919067,
"learning_rate": 0.001,
"loss": 2.0577,
"step": 104200
},
{
"epoch": 33.710407239819006,
"grad_norm": 1.7711577415466309,
"learning_rate": 0.001,
"loss": 2.0493,
"step": 104300
},
{
"epoch": 33.74272786037492,
"grad_norm": 1.6455186605453491,
"learning_rate": 0.001,
"loss": 2.0717,
"step": 104400
},
{
"epoch": 33.775048480930835,
"grad_norm": 1.6012450456619263,
"learning_rate": 0.001,
"loss": 2.0727,
"step": 104500
},
{
"epoch": 33.80736910148675,
"grad_norm": 2.0459935665130615,
"learning_rate": 0.001,
"loss": 2.0627,
"step": 104600
},
{
"epoch": 33.839689722042664,
"grad_norm": 1.5936315059661865,
"learning_rate": 0.001,
"loss": 2.0791,
"step": 104700
},
{
"epoch": 33.87201034259858,
"grad_norm": 2.3798508644104004,
"learning_rate": 0.001,
"loss": 2.0771,
"step": 104800
},
{
"epoch": 33.90433096315449,
"grad_norm": 1.5202367305755615,
"learning_rate": 0.001,
"loss": 2.0747,
"step": 104900
},
{
"epoch": 33.93665158371041,
"grad_norm": 1.810958743095398,
"learning_rate": 0.001,
"loss": 2.0842,
"step": 105000
},
{
"epoch": 33.96897220426632,
"grad_norm": 1.8689950704574585,
"learning_rate": 0.001,
"loss": 2.1037,
"step": 105100
},
{
"epoch": 34.00129282482224,
"grad_norm": 1.2684123516082764,
"learning_rate": 0.001,
"loss": 2.0769,
"step": 105200
},
{
"epoch": 34.03361344537815,
"grad_norm": 2.0106089115142822,
"learning_rate": 0.001,
"loss": 1.9514,
"step": 105300
},
{
"epoch": 34.065934065934066,
"grad_norm": 1.3193442821502686,
"learning_rate": 0.001,
"loss": 1.9505,
"step": 105400
},
{
"epoch": 34.09825468648998,
"grad_norm": 1.62157142162323,
"learning_rate": 0.001,
"loss": 1.9503,
"step": 105500
},
{
"epoch": 34.130575307045895,
"grad_norm": 1.8132089376449585,
"learning_rate": 0.001,
"loss": 1.9696,
"step": 105600
},
{
"epoch": 34.16289592760181,
"grad_norm": 1.5851075649261475,
"learning_rate": 0.001,
"loss": 1.9705,
"step": 105700
},
{
"epoch": 34.195216548157724,
"grad_norm": 1.1907926797866821,
"learning_rate": 0.001,
"loss": 1.9756,
"step": 105800
},
{
"epoch": 34.22753716871364,
"grad_norm": 1.9979790449142456,
"learning_rate": 0.001,
"loss": 1.9721,
"step": 105900
},
{
"epoch": 34.25985778926955,
"grad_norm": 1.5261240005493164,
"learning_rate": 0.001,
"loss": 1.9795,
"step": 106000
},
{
"epoch": 34.29217840982547,
"grad_norm": 1.6692026853561401,
"learning_rate": 0.001,
"loss": 1.9855,
"step": 106100
},
{
"epoch": 34.32449903038138,
"grad_norm": 2.0377280712127686,
"learning_rate": 0.001,
"loss": 1.9959,
"step": 106200
},
{
"epoch": 34.3568196509373,
"grad_norm": 1.3200879096984863,
"learning_rate": 0.001,
"loss": 2.0081,
"step": 106300
},
{
"epoch": 34.38914027149321,
"grad_norm": 2.1755311489105225,
"learning_rate": 0.001,
"loss": 1.9839,
"step": 106400
},
{
"epoch": 34.421460892049126,
"grad_norm": 1.659410834312439,
"learning_rate": 0.001,
"loss": 2.0166,
"step": 106500
},
{
"epoch": 34.45378151260504,
"grad_norm": 1.6596027612686157,
"learning_rate": 0.001,
"loss": 2.0029,
"step": 106600
},
{
"epoch": 34.486102133160955,
"grad_norm": 1.490046501159668,
"learning_rate": 0.001,
"loss": 1.9976,
"step": 106700
},
{
"epoch": 34.51842275371687,
"grad_norm": 1.6935290098190308,
"learning_rate": 0.001,
"loss": 2.0203,
"step": 106800
},
{
"epoch": 34.550743374272784,
"grad_norm": 1.5543889999389648,
"learning_rate": 0.001,
"loss": 2.0136,
"step": 106900
},
{
"epoch": 34.5830639948287,
"grad_norm": 1.4732424020767212,
"learning_rate": 0.001,
"loss": 2.017,
"step": 107000
},
{
"epoch": 34.61538461538461,
"grad_norm": 1.509547233581543,
"learning_rate": 0.001,
"loss": 2.0381,
"step": 107100
},
{
"epoch": 34.64770523594053,
"grad_norm": 2.1105563640594482,
"learning_rate": 0.001,
"loss": 2.037,
"step": 107200
},
{
"epoch": 34.68002585649644,
"grad_norm": 1.399300456047058,
"learning_rate": 0.001,
"loss": 2.0363,
"step": 107300
},
{
"epoch": 34.71234647705236,
"grad_norm": 1.9533933401107788,
"learning_rate": 0.001,
"loss": 2.0391,
"step": 107400
},
{
"epoch": 34.74466709760827,
"grad_norm": 1.9212638139724731,
"learning_rate": 0.001,
"loss": 2.0377,
"step": 107500
},
{
"epoch": 34.776987718164186,
"grad_norm": 1.9131107330322266,
"learning_rate": 0.001,
"loss": 2.0449,
"step": 107600
},
{
"epoch": 34.8093083387201,
"grad_norm": 1.5715868473052979,
"learning_rate": 0.001,
"loss": 2.0533,
"step": 107700
},
{
"epoch": 34.841628959276015,
"grad_norm": 1.5210678577423096,
"learning_rate": 0.001,
"loss": 2.0515,
"step": 107800
},
{
"epoch": 34.87394957983193,
"grad_norm": 1.4609169960021973,
"learning_rate": 0.001,
"loss": 2.0555,
"step": 107900
},
{
"epoch": 34.906270200387844,
"grad_norm": 1.274895191192627,
"learning_rate": 0.001,
"loss": 2.0561,
"step": 108000
},
{
"epoch": 34.93859082094376,
"grad_norm": 1.3937968015670776,
"learning_rate": 0.001,
"loss": 2.0723,
"step": 108100
},
{
"epoch": 34.97091144149967,
"grad_norm": 1.4828203916549683,
"learning_rate": 0.001,
"loss": 2.0713,
"step": 108200
},
{
"epoch": 35.003232062055595,
"grad_norm": 1.7049474716186523,
"learning_rate": 0.001,
"loss": 2.0587,
"step": 108300
},
{
"epoch": 35.03555268261151,
"grad_norm": 1.3542168140411377,
"learning_rate": 0.001,
"loss": 1.928,
"step": 108400
},
{
"epoch": 35.067873303167424,
"grad_norm": 2.243741750717163,
"learning_rate": 0.001,
"loss": 1.9264,
"step": 108500
},
{
"epoch": 35.10019392372334,
"grad_norm": 1.1347894668579102,
"learning_rate": 0.001,
"loss": 1.9211,
"step": 108600
},
{
"epoch": 35.13251454427925,
"grad_norm": 1.671585202217102,
"learning_rate": 0.001,
"loss": 1.9593,
"step": 108700
},
{
"epoch": 35.16483516483517,
"grad_norm": 1.1993159055709839,
"learning_rate": 0.001,
"loss": 1.9503,
"step": 108800
},
{
"epoch": 35.19715578539108,
"grad_norm": 1.3364758491516113,
"learning_rate": 0.001,
"loss": 1.9685,
"step": 108900
},
{
"epoch": 35.229476405947,
"grad_norm": 1.625673532485962,
"learning_rate": 0.001,
"loss": 1.9594,
"step": 109000
},
{
"epoch": 35.26179702650291,
"grad_norm": 1.8384968042373657,
"learning_rate": 0.001,
"loss": 1.9819,
"step": 109100
},
{
"epoch": 35.294117647058826,
"grad_norm": 1.3008297681808472,
"learning_rate": 0.001,
"loss": 1.9637,
"step": 109200
},
{
"epoch": 35.32643826761474,
"grad_norm": 1.42014741897583,
"learning_rate": 0.001,
"loss": 1.9671,
"step": 109300
},
{
"epoch": 35.358758888170655,
"grad_norm": 1.323197364807129,
"learning_rate": 0.001,
"loss": 1.9704,
"step": 109400
},
{
"epoch": 35.39107950872657,
"grad_norm": 1.5077106952667236,
"learning_rate": 0.001,
"loss": 1.9726,
"step": 109500
},
{
"epoch": 35.423400129282484,
"grad_norm": 2.083890199661255,
"learning_rate": 0.001,
"loss": 1.9629,
"step": 109600
},
{
"epoch": 35.4557207498384,
"grad_norm": 1.543499231338501,
"learning_rate": 0.001,
"loss": 1.9994,
"step": 109700
},
{
"epoch": 35.48804137039431,
"grad_norm": 1.34257972240448,
"learning_rate": 0.001,
"loss": 1.9949,
"step": 109800
},
{
"epoch": 35.52036199095023,
"grad_norm": 1.394116759300232,
"learning_rate": 0.001,
"loss": 2.0154,
"step": 109900
},
{
"epoch": 35.55268261150614,
"grad_norm": 1.224687933921814,
"learning_rate": 0.001,
"loss": 2.0221,
"step": 110000
},
{
"epoch": 35.58500323206206,
"grad_norm": 1.5279735326766968,
"learning_rate": 0.001,
"loss": 1.9958,
"step": 110100
},
{
"epoch": 35.61732385261797,
"grad_norm": 1.4509029388427734,
"learning_rate": 0.001,
"loss": 2.0004,
"step": 110200
},
{
"epoch": 35.649644473173886,
"grad_norm": 1.1077178716659546,
"learning_rate": 0.001,
"loss": 2.038,
"step": 110300
},
{
"epoch": 35.6819650937298,
"grad_norm": 1.495784044265747,
"learning_rate": 0.001,
"loss": 2.0186,
"step": 110400
},
{
"epoch": 35.714285714285715,
"grad_norm": 1.239295244216919,
"learning_rate": 0.001,
"loss": 2.0075,
"step": 110500
},
{
"epoch": 35.74660633484163,
"grad_norm": 1.6918079853057861,
"learning_rate": 0.001,
"loss": 2.0214,
"step": 110600
},
{
"epoch": 35.778926955397544,
"grad_norm": 1.210204005241394,
"learning_rate": 0.001,
"loss": 2.024,
"step": 110700
},
{
"epoch": 35.81124757595346,
"grad_norm": 1.7192320823669434,
"learning_rate": 0.001,
"loss": 2.0059,
"step": 110800
},
{
"epoch": 35.84356819650937,
"grad_norm": 1.3810573816299438,
"learning_rate": 0.001,
"loss": 2.0528,
"step": 110900
},
{
"epoch": 35.87588881706529,
"grad_norm": 1.758357048034668,
"learning_rate": 0.001,
"loss": 2.0309,
"step": 111000
},
{
"epoch": 35.9082094376212,
"grad_norm": 1.845845103263855,
"learning_rate": 0.001,
"loss": 2.0404,
"step": 111100
},
{
"epoch": 35.94053005817712,
"grad_norm": 1.2828718423843384,
"learning_rate": 0.001,
"loss": 2.0384,
"step": 111200
},
{
"epoch": 35.97285067873303,
"grad_norm": 1.4219506978988647,
"learning_rate": 0.001,
"loss": 2.0465,
"step": 111300
},
{
"epoch": 36.005171299288946,
"grad_norm": 1.2361496686935425,
"learning_rate": 0.001,
"loss": 2.0539,
"step": 111400
},
{
"epoch": 36.03749191984486,
"grad_norm": 1.4928691387176514,
"learning_rate": 0.001,
"loss": 1.9142,
"step": 111500
},
{
"epoch": 36.069812540400775,
"grad_norm": 1.2020481824874878,
"learning_rate": 0.001,
"loss": 1.9197,
"step": 111600
},
{
"epoch": 36.10213316095669,
"grad_norm": 1.018372893333435,
"learning_rate": 0.001,
"loss": 1.9154,
"step": 111700
},
{
"epoch": 36.134453781512605,
"grad_norm": 1.3498088121414185,
"learning_rate": 0.001,
"loss": 1.913,
"step": 111800
},
{
"epoch": 36.16677440206852,
"grad_norm": 1.097247838973999,
"learning_rate": 0.001,
"loss": 1.9381,
"step": 111900
},
{
"epoch": 36.199095022624434,
"grad_norm": 1.3852177858352661,
"learning_rate": 0.001,
"loss": 1.9463,
"step": 112000
},
{
"epoch": 36.23141564318035,
"grad_norm": 1.4728375673294067,
"learning_rate": 0.001,
"loss": 1.9594,
"step": 112100
},
{
"epoch": 36.26373626373626,
"grad_norm": 1.5016732215881348,
"learning_rate": 0.001,
"loss": 1.9393,
"step": 112200
},
{
"epoch": 36.29605688429218,
"grad_norm": 1.4927202463150024,
"learning_rate": 0.001,
"loss": 1.9546,
"step": 112300
},
{
"epoch": 36.32837750484809,
"grad_norm": 1.1831008195877075,
"learning_rate": 0.001,
"loss": 1.9703,
"step": 112400
},
{
"epoch": 36.36069812540401,
"grad_norm": 1.76808762550354,
"learning_rate": 0.001,
"loss": 1.9709,
"step": 112500
},
{
"epoch": 36.39301874595992,
"grad_norm": 1.2848598957061768,
"learning_rate": 0.001,
"loss": 1.9609,
"step": 112600
},
{
"epoch": 36.425339366515836,
"grad_norm": 1.5108144283294678,
"learning_rate": 0.001,
"loss": 1.9567,
"step": 112700
},
{
"epoch": 36.45765998707175,
"grad_norm": 1.2236725091934204,
"learning_rate": 0.001,
"loss": 1.9863,
"step": 112800
},
{
"epoch": 36.489980607627665,
"grad_norm": 1.0058413743972778,
"learning_rate": 0.001,
"loss": 1.9919,
"step": 112900
},
{
"epoch": 36.52230122818358,
"grad_norm": 1.4160171747207642,
"learning_rate": 0.001,
"loss": 1.9634,
"step": 113000
},
{
"epoch": 36.554621848739494,
"grad_norm": 1.1909221410751343,
"learning_rate": 0.001,
"loss": 1.9777,
"step": 113100
},
{
"epoch": 36.58694246929541,
"grad_norm": 1.2544959783554077,
"learning_rate": 0.001,
"loss": 1.9854,
"step": 113200
},
{
"epoch": 36.61926308985132,
"grad_norm": 1.5229424238204956,
"learning_rate": 0.001,
"loss": 2.0088,
"step": 113300
},
{
"epoch": 36.65158371040724,
"grad_norm": 1.0623260736465454,
"learning_rate": 0.001,
"loss": 1.9908,
"step": 113400
},
{
"epoch": 36.68390433096315,
"grad_norm": 1.053356409072876,
"learning_rate": 0.001,
"loss": 1.9932,
"step": 113500
},
{
"epoch": 36.71622495151907,
"grad_norm": 1.0189151763916016,
"learning_rate": 0.001,
"loss": 1.9961,
"step": 113600
},
{
"epoch": 36.74854557207498,
"grad_norm": 1.2587002515792847,
"learning_rate": 0.001,
"loss": 2.0122,
"step": 113700
},
{
"epoch": 36.780866192630896,
"grad_norm": 1.2830156087875366,
"learning_rate": 0.001,
"loss": 2.0072,
"step": 113800
},
{
"epoch": 36.81318681318681,
"grad_norm": 1.6737384796142578,
"learning_rate": 0.001,
"loss": 2.0168,
"step": 113900
},
{
"epoch": 36.845507433742725,
"grad_norm": 1.3077422380447388,
"learning_rate": 0.001,
"loss": 2.0139,
"step": 114000
},
{
"epoch": 36.87782805429864,
"grad_norm": 1.417799711227417,
"learning_rate": 0.001,
"loss": 2.0118,
"step": 114100
},
{
"epoch": 36.910148674854554,
"grad_norm": 1.3692338466644287,
"learning_rate": 0.001,
"loss": 2.0219,
"step": 114200
},
{
"epoch": 36.94246929541047,
"grad_norm": 1.2055531740188599,
"learning_rate": 0.001,
"loss": 2.0191,
"step": 114300
},
{
"epoch": 36.97478991596638,
"grad_norm": 1.2253512144088745,
"learning_rate": 0.001,
"loss": 2.0287,
"step": 114400
},
{
"epoch": 37.007110536522305,
"grad_norm": 1.2838011980056763,
"learning_rate": 0.001,
"loss": 1.9971,
"step": 114500
},
{
"epoch": 37.03943115707822,
"grad_norm": 1.2368170022964478,
"learning_rate": 0.001,
"loss": 1.8909,
"step": 114600
},
{
"epoch": 37.071751777634134,
"grad_norm": 1.07204270362854,
"learning_rate": 0.001,
"loss": 1.8982,
"step": 114700
},
{
"epoch": 37.10407239819005,
"grad_norm": 1.3839013576507568,
"learning_rate": 0.001,
"loss": 1.9024,
"step": 114800
},
{
"epoch": 37.13639301874596,
"grad_norm": 2.0179762840270996,
"learning_rate": 0.001,
"loss": 1.9065,
"step": 114900
},
{
"epoch": 37.16871363930188,
"grad_norm": 1.2924718856811523,
"learning_rate": 0.001,
"loss": 1.9107,
"step": 115000
},
{
"epoch": 37.20103425985779,
"grad_norm": 1.395383596420288,
"learning_rate": 0.001,
"loss": 1.9157,
"step": 115100
},
{
"epoch": 37.23335488041371,
"grad_norm": 1.0912832021713257,
"learning_rate": 0.001,
"loss": 1.9446,
"step": 115200
},
{
"epoch": 37.26567550096962,
"grad_norm": 1.6009647846221924,
"learning_rate": 0.001,
"loss": 1.927,
"step": 115300
},
{
"epoch": 37.297996121525536,
"grad_norm": 1.2845613956451416,
"learning_rate": 0.001,
"loss": 1.9379,
"step": 115400
},
{
"epoch": 37.33031674208145,
"grad_norm": 1.1216648817062378,
"learning_rate": 0.001,
"loss": 1.9436,
"step": 115500
},
{
"epoch": 37.362637362637365,
"grad_norm": 1.1084158420562744,
"learning_rate": 0.001,
"loss": 1.9347,
"step": 115600
},
{
"epoch": 37.39495798319328,
"grad_norm": 1.501246452331543,
"learning_rate": 0.001,
"loss": 1.9566,
"step": 115700
},
{
"epoch": 37.427278603749194,
"grad_norm": 1.1051548719406128,
"learning_rate": 0.001,
"loss": 1.9608,
"step": 115800
},
{
"epoch": 37.45959922430511,
"grad_norm": 1.2411729097366333,
"learning_rate": 0.001,
"loss": 1.9485,
"step": 115900
},
{
"epoch": 37.49191984486102,
"grad_norm": 1.4812703132629395,
"learning_rate": 0.001,
"loss": 1.9647,
"step": 116000
},
{
"epoch": 37.52424046541694,
"grad_norm": 1.242192029953003,
"learning_rate": 0.001,
"loss": 1.9573,
"step": 116100
},
{
"epoch": 37.55656108597285,
"grad_norm": 1.146277904510498,
"learning_rate": 0.001,
"loss": 1.9583,
"step": 116200
},
{
"epoch": 37.58888170652877,
"grad_norm": 1.420883297920227,
"learning_rate": 0.001,
"loss": 1.9733,
"step": 116300
},
{
"epoch": 37.62120232708468,
"grad_norm": 1.1877259016036987,
"learning_rate": 0.001,
"loss": 1.9787,
"step": 116400
},
{
"epoch": 37.653522947640596,
"grad_norm": 1.1888118982315063,
"learning_rate": 0.001,
"loss": 1.972,
"step": 116500
},
{
"epoch": 37.68584356819651,
"grad_norm": 1.042897343635559,
"learning_rate": 0.001,
"loss": 1.9742,
"step": 116600
},
{
"epoch": 37.718164188752425,
"grad_norm": 1.110247254371643,
"learning_rate": 0.001,
"loss": 1.9944,
"step": 116700
},
{
"epoch": 37.75048480930834,
"grad_norm": 0.9629383087158203,
"learning_rate": 0.001,
"loss": 1.9785,
"step": 116800
},
{
"epoch": 37.782805429864254,
"grad_norm": 1.3344351053237915,
"learning_rate": 0.001,
"loss": 2.0116,
"step": 116900
},
{
"epoch": 37.81512605042017,
"grad_norm": 1.3152480125427246,
"learning_rate": 0.001,
"loss": 1.9854,
"step": 117000
},
{
"epoch": 37.84744667097608,
"grad_norm": 1.245644211769104,
"learning_rate": 0.001,
"loss": 1.9971,
"step": 117100
},
{
"epoch": 37.879767291532,
"grad_norm": 1.1488375663757324,
"learning_rate": 0.001,
"loss": 1.9812,
"step": 117200
},
{
"epoch": 37.91208791208791,
"grad_norm": 1.0456823110580444,
"learning_rate": 0.001,
"loss": 2.0149,
"step": 117300
},
{
"epoch": 37.94440853264383,
"grad_norm": 1.369343638420105,
"learning_rate": 0.001,
"loss": 2.0129,
"step": 117400
},
{
"epoch": 37.97672915319974,
"grad_norm": 1.4738596677780151,
"learning_rate": 0.001,
"loss": 2.021,
"step": 117500
},
{
"epoch": 38.009049773755656,
"grad_norm": 1.3079040050506592,
"learning_rate": 0.001,
"loss": 1.9785,
"step": 117600
},
{
"epoch": 38.04137039431157,
"grad_norm": 1.6166199445724487,
"learning_rate": 0.001,
"loss": 1.883,
"step": 117700
},
{
"epoch": 38.073691014867485,
"grad_norm": 1.3175252676010132,
"learning_rate": 0.001,
"loss": 1.871,
"step": 117800
},
{
"epoch": 38.1060116354234,
"grad_norm": 1.0515940189361572,
"learning_rate": 0.001,
"loss": 1.8938,
"step": 117900
},
{
"epoch": 38.138332255979314,
"grad_norm": 1.0703885555267334,
"learning_rate": 0.001,
"loss": 1.9082,
"step": 118000
},
{
"epoch": 38.17065287653523,
"grad_norm": 1.1041367053985596,
"learning_rate": 0.001,
"loss": 1.9039,
"step": 118100
},
{
"epoch": 38.20297349709114,
"grad_norm": 1.2935791015625,
"learning_rate": 0.001,
"loss": 1.9165,
"step": 118200
},
{
"epoch": 38.23529411764706,
"grad_norm": 1.1646063327789307,
"learning_rate": 0.001,
"loss": 1.8891,
"step": 118300
},
{
"epoch": 38.26761473820297,
"grad_norm": 1.0428200960159302,
"learning_rate": 0.001,
"loss": 1.9169,
"step": 118400
},
{
"epoch": 38.29993535875889,
"grad_norm": 0.9607976078987122,
"learning_rate": 0.001,
"loss": 1.9173,
"step": 118500
},
{
"epoch": 38.3322559793148,
"grad_norm": 2.0434305667877197,
"learning_rate": 0.001,
"loss": 1.9229,
"step": 118600
},
{
"epoch": 38.364576599870716,
"grad_norm": 1.2759087085723877,
"learning_rate": 0.001,
"loss": 1.9232,
"step": 118700
},
{
"epoch": 38.39689722042663,
"grad_norm": 1.1599791049957275,
"learning_rate": 0.001,
"loss": 1.9373,
"step": 118800
},
{
"epoch": 38.429217840982545,
"grad_norm": 1.1653485298156738,
"learning_rate": 0.001,
"loss": 1.9279,
"step": 118900
},
{
"epoch": 38.46153846153846,
"grad_norm": 1.1542648077011108,
"learning_rate": 0.001,
"loss": 1.9215,
"step": 119000
},
{
"epoch": 38.493859082094374,
"grad_norm": 1.0891329050064087,
"learning_rate": 0.001,
"loss": 1.9305,
"step": 119100
},
{
"epoch": 38.52617970265029,
"grad_norm": 1.2211647033691406,
"learning_rate": 0.001,
"loss": 1.9546,
"step": 119200
},
{
"epoch": 38.558500323206204,
"grad_norm": 1.691691279411316,
"learning_rate": 0.001,
"loss": 1.9608,
"step": 119300
},
{
"epoch": 38.59082094376212,
"grad_norm": 1.301088571548462,
"learning_rate": 0.001,
"loss": 1.9468,
"step": 119400
},
{
"epoch": 38.62314156431803,
"grad_norm": 1.126369833946228,
"learning_rate": 0.001,
"loss": 1.9543,
"step": 119500
},
{
"epoch": 38.65546218487395,
"grad_norm": 1.0784813165664673,
"learning_rate": 0.001,
"loss": 1.9531,
"step": 119600
},
{
"epoch": 38.68778280542986,
"grad_norm": 1.1354749202728271,
"learning_rate": 0.001,
"loss": 1.9629,
"step": 119700
},
{
"epoch": 38.720103425985776,
"grad_norm": 1.3400801420211792,
"learning_rate": 0.001,
"loss": 1.9768,
"step": 119800
},
{
"epoch": 38.75242404654169,
"grad_norm": 1.5241564512252808,
"learning_rate": 0.001,
"loss": 1.9631,
"step": 119900
},
{
"epoch": 38.784744667097605,
"grad_norm": 1.2316350936889648,
"learning_rate": 0.001,
"loss": 1.9713,
"step": 120000
},
{
"epoch": 38.81706528765352,
"grad_norm": 1.0691965818405151,
"learning_rate": 0.001,
"loss": 1.9635,
"step": 120100
},
{
"epoch": 38.849385908209435,
"grad_norm": 1.1982426643371582,
"learning_rate": 0.001,
"loss": 1.996,
"step": 120200
},
{
"epoch": 38.88170652876535,
"grad_norm": 1.0837290287017822,
"learning_rate": 0.001,
"loss": 1.9862,
"step": 120300
},
{
"epoch": 38.914027149321264,
"grad_norm": 1.4738332033157349,
"learning_rate": 0.001,
"loss": 1.9908,
"step": 120400
},
{
"epoch": 38.94634776987718,
"grad_norm": 1.73765230178833,
"learning_rate": 0.001,
"loss": 1.9781,
"step": 120500
},
{
"epoch": 38.97866839043309,
"grad_norm": 1.286083459854126,
"learning_rate": 0.001,
"loss": 2.0017,
"step": 120600
},
{
"epoch": 39.010989010989015,
"grad_norm": 1.5349410772323608,
"learning_rate": 0.001,
"loss": 1.9367,
"step": 120700
},
{
"epoch": 39.04330963154493,
"grad_norm": 1.5377883911132812,
"learning_rate": 0.001,
"loss": 1.859,
"step": 120800
},
{
"epoch": 39.075630252100844,
"grad_norm": 1.2859725952148438,
"learning_rate": 0.001,
"loss": 1.8716,
"step": 120900
},
{
"epoch": 39.10795087265676,
"grad_norm": 1.770250916481018,
"learning_rate": 0.001,
"loss": 1.8772,
"step": 121000
},
{
"epoch": 39.14027149321267,
"grad_norm": 1.3946150541305542,
"learning_rate": 0.001,
"loss": 1.8759,
"step": 121100
},
{
"epoch": 39.17259211376859,
"grad_norm": 1.2503063678741455,
"learning_rate": 0.001,
"loss": 1.8674,
"step": 121200
},
{
"epoch": 39.2049127343245,
"grad_norm": 1.0887434482574463,
"learning_rate": 0.001,
"loss": 1.8681,
"step": 121300
},
{
"epoch": 39.237233354880416,
"grad_norm": 1.2380496263504028,
"learning_rate": 0.001,
"loss": 1.9177,
"step": 121400
},
{
"epoch": 39.26955397543633,
"grad_norm": 1.4318231344223022,
"learning_rate": 0.001,
"loss": 1.9009,
"step": 121500
},
{
"epoch": 39.301874595992246,
"grad_norm": 1.4162850379943848,
"learning_rate": 0.001,
"loss": 1.8917,
"step": 121600
},
{
"epoch": 39.33419521654816,
"grad_norm": 1.4503649473190308,
"learning_rate": 0.001,
"loss": 1.9165,
"step": 121700
},
{
"epoch": 39.366515837104075,
"grad_norm": 1.4559428691864014,
"learning_rate": 0.001,
"loss": 1.9064,
"step": 121800
},
{
"epoch": 39.39883645765999,
"grad_norm": 1.3657339811325073,
"learning_rate": 0.001,
"loss": 1.8925,
"step": 121900
},
{
"epoch": 39.431157078215904,
"grad_norm": 1.1839957237243652,
"learning_rate": 0.001,
"loss": 1.9145,
"step": 122000
},
{
"epoch": 39.46347769877182,
"grad_norm": 1.3001296520233154,
"learning_rate": 0.001,
"loss": 1.9223,
"step": 122100
},
{
"epoch": 39.49579831932773,
"grad_norm": 1.4840679168701172,
"learning_rate": 0.001,
"loss": 1.9204,
"step": 122200
},
{
"epoch": 39.52811893988365,
"grad_norm": 1.4856712818145752,
"learning_rate": 0.001,
"loss": 1.935,
"step": 122300
},
{
"epoch": 39.56043956043956,
"grad_norm": 1.253468632698059,
"learning_rate": 0.001,
"loss": 1.9202,
"step": 122400
},
{
"epoch": 39.59276018099548,
"grad_norm": 1.2646362781524658,
"learning_rate": 0.001,
"loss": 1.9373,
"step": 122500
},
{
"epoch": 39.62508080155139,
"grad_norm": 1.436712622642517,
"learning_rate": 0.001,
"loss": 1.9379,
"step": 122600
},
{
"epoch": 39.657401422107306,
"grad_norm": 1.2675755023956299,
"learning_rate": 0.001,
"loss": 1.9559,
"step": 122700
},
{
"epoch": 39.68972204266322,
"grad_norm": 1.567535400390625,
"learning_rate": 0.001,
"loss": 1.9614,
"step": 122800
},
{
"epoch": 39.722042663219135,
"grad_norm": 1.2379465103149414,
"learning_rate": 0.001,
"loss": 1.9453,
"step": 122900
},
{
"epoch": 39.75436328377505,
"grad_norm": 1.4940135478973389,
"learning_rate": 0.001,
"loss": 1.9551,
"step": 123000
},
{
"epoch": 39.786683904330964,
"grad_norm": 1.5482773780822754,
"learning_rate": 0.001,
"loss": 1.9755,
"step": 123100
},
{
"epoch": 39.81900452488688,
"grad_norm": 1.1395546197891235,
"learning_rate": 0.001,
"loss": 1.9733,
"step": 123200
},
{
"epoch": 39.85132514544279,
"grad_norm": 2.028623342514038,
"learning_rate": 0.001,
"loss": 1.9566,
"step": 123300
},
{
"epoch": 39.88364576599871,
"grad_norm": 1.3051038980484009,
"learning_rate": 0.001,
"loss": 1.9707,
"step": 123400
},
{
"epoch": 39.91596638655462,
"grad_norm": 1.2123631238937378,
"learning_rate": 0.001,
"loss": 1.972,
"step": 123500
},
{
"epoch": 39.94828700711054,
"grad_norm": 1.4840129613876343,
"learning_rate": 0.001,
"loss": 1.9554,
"step": 123600
},
{
"epoch": 39.98060762766645,
"grad_norm": 1.2305253744125366,
"learning_rate": 0.001,
"loss": 1.9722,
"step": 123700
},
{
"epoch": 40.012928248222366,
"grad_norm": 1.2095211744308472,
"learning_rate": 0.001,
"loss": 1.9293,
"step": 123800
},
{
"epoch": 40.04524886877828,
"grad_norm": 1.7045320272445679,
"learning_rate": 0.001,
"loss": 1.8624,
"step": 123900
},
{
"epoch": 40.077569489334195,
"grad_norm": 1.7265287637710571,
"learning_rate": 0.001,
"loss": 1.8376,
"step": 124000
},
{
"epoch": 40.10989010989011,
"grad_norm": 1.1483691930770874,
"learning_rate": 0.001,
"loss": 1.8393,
"step": 124100
},
{
"epoch": 40.142210730446024,
"grad_norm": 1.6978778839111328,
"learning_rate": 0.001,
"loss": 1.8468,
"step": 124200
},
{
"epoch": 40.17453135100194,
"grad_norm": 1.343893051147461,
"learning_rate": 0.001,
"loss": 1.8629,
"step": 124300
},
{
"epoch": 40.20685197155785,
"grad_norm": 1.4871058464050293,
"learning_rate": 0.001,
"loss": 1.8865,
"step": 124400
},
{
"epoch": 40.23917259211377,
"grad_norm": 1.637841820716858,
"learning_rate": 0.001,
"loss": 1.8659,
"step": 124500
},
{
"epoch": 40.27149321266968,
"grad_norm": 1.8821446895599365,
"learning_rate": 0.001,
"loss": 1.8728,
"step": 124600
},
{
"epoch": 40.3038138332256,
"grad_norm": 1.7137219905853271,
"learning_rate": 0.001,
"loss": 1.8914,
"step": 124700
},
{
"epoch": 40.33613445378151,
"grad_norm": 1.8872164487838745,
"learning_rate": 0.001,
"loss": 1.8869,
"step": 124800
},
{
"epoch": 40.368455074337426,
"grad_norm": 1.2478173971176147,
"learning_rate": 0.001,
"loss": 1.9071,
"step": 124900
},
{
"epoch": 40.40077569489334,
"grad_norm": 1.838136911392212,
"learning_rate": 0.001,
"loss": 1.9017,
"step": 125000
},
{
"epoch": 40.433096315449255,
"grad_norm": 1.546654462814331,
"learning_rate": 0.001,
"loss": 1.8874,
"step": 125100
},
{
"epoch": 40.46541693600517,
"grad_norm": 1.3680957555770874,
"learning_rate": 0.001,
"loss": 1.9164,
"step": 125200
},
{
"epoch": 40.497737556561084,
"grad_norm": 1.4279357194900513,
"learning_rate": 0.001,
"loss": 1.9252,
"step": 125300
},
{
"epoch": 40.530058177117,
"grad_norm": 1.331580400466919,
"learning_rate": 0.001,
"loss": 1.915,
"step": 125400
},
{
"epoch": 40.56237879767291,
"grad_norm": 1.5645660161972046,
"learning_rate": 0.001,
"loss": 1.9173,
"step": 125500
},
{
"epoch": 40.59469941822883,
"grad_norm": 1.7686409950256348,
"learning_rate": 0.001,
"loss": 1.9249,
"step": 125600
},
{
"epoch": 40.62702003878474,
"grad_norm": 1.4038866758346558,
"learning_rate": 0.001,
"loss": 1.9211,
"step": 125700
},
{
"epoch": 40.65934065934066,
"grad_norm": 1.7020624876022339,
"learning_rate": 0.001,
"loss": 1.9155,
"step": 125800
},
{
"epoch": 40.69166127989657,
"grad_norm": 1.450563907623291,
"learning_rate": 0.001,
"loss": 1.9234,
"step": 125900
},
{
"epoch": 40.723981900452486,
"grad_norm": 1.3181536197662354,
"learning_rate": 0.001,
"loss": 1.9412,
"step": 126000
},
{
"epoch": 40.7563025210084,
"grad_norm": 1.0498440265655518,
"learning_rate": 0.001,
"loss": 1.9281,
"step": 126100
},
{
"epoch": 40.788623141564315,
"grad_norm": 1.4465446472167969,
"learning_rate": 0.001,
"loss": 1.9506,
"step": 126200
},
{
"epoch": 40.82094376212023,
"grad_norm": 1.2336151599884033,
"learning_rate": 0.001,
"loss": 1.9491,
"step": 126300
},
{
"epoch": 40.853264382676144,
"grad_norm": 1.3242679834365845,
"learning_rate": 0.001,
"loss": 1.9514,
"step": 126400
},
{
"epoch": 40.88558500323206,
"grad_norm": 1.2799617052078247,
"learning_rate": 0.001,
"loss": 1.9405,
"step": 126500
},
{
"epoch": 40.91790562378797,
"grad_norm": 1.2991124391555786,
"learning_rate": 0.001,
"loss": 1.9466,
"step": 126600
},
{
"epoch": 40.95022624434389,
"grad_norm": 1.4150607585906982,
"learning_rate": 0.001,
"loss": 1.9604,
"step": 126700
},
{
"epoch": 40.9825468648998,
"grad_norm": 1.208999514579773,
"learning_rate": 0.001,
"loss": 1.9608,
"step": 126800
},
{
"epoch": 41.014867485455724,
"grad_norm": 1.668093204498291,
"learning_rate": 0.001,
"loss": 1.8746,
"step": 126900
},
{
"epoch": 41.04718810601164,
"grad_norm": 1.4752800464630127,
"learning_rate": 0.001,
"loss": 1.8344,
"step": 127000
},
{
"epoch": 41.07950872656755,
"grad_norm": 2.1280136108398438,
"learning_rate": 0.001,
"loss": 1.8427,
"step": 127100
},
{
"epoch": 41.11182934712347,
"grad_norm": 1.9451837539672852,
"learning_rate": 0.001,
"loss": 1.8396,
"step": 127200
},
{
"epoch": 41.14414996767938,
"grad_norm": 1.3411965370178223,
"learning_rate": 0.001,
"loss": 1.8439,
"step": 127300
},
{
"epoch": 41.1764705882353,
"grad_norm": 1.758414387702942,
"learning_rate": 0.001,
"loss": 1.8294,
"step": 127400
},
{
"epoch": 41.20879120879121,
"grad_norm": 1.796096682548523,
"learning_rate": 0.001,
"loss": 1.8607,
"step": 127500
},
{
"epoch": 41.241111829347126,
"grad_norm": 1.3038780689239502,
"learning_rate": 0.001,
"loss": 1.8796,
"step": 127600
},
{
"epoch": 41.27343244990304,
"grad_norm": 1.5630769729614258,
"learning_rate": 0.001,
"loss": 1.8847,
"step": 127700
},
{
"epoch": 41.305753070458955,
"grad_norm": 1.3789194822311401,
"learning_rate": 0.001,
"loss": 1.8714,
"step": 127800
},
{
"epoch": 41.33807369101487,
"grad_norm": 1.3722786903381348,
"learning_rate": 0.001,
"loss": 1.8615,
"step": 127900
},
{
"epoch": 41.370394311570784,
"grad_norm": 2.0524232387542725,
"learning_rate": 0.001,
"loss": 1.9055,
"step": 128000
},
{
"epoch": 41.4027149321267,
"grad_norm": 1.6414809226989746,
"learning_rate": 0.001,
"loss": 1.8853,
"step": 128100
},
{
"epoch": 41.43503555268261,
"grad_norm": 1.334133267402649,
"learning_rate": 0.001,
"loss": 1.874,
"step": 128200
},
{
"epoch": 41.46735617323853,
"grad_norm": 1.618760585784912,
"learning_rate": 0.001,
"loss": 1.8997,
"step": 128300
},
{
"epoch": 41.49967679379444,
"grad_norm": 1.7455990314483643,
"learning_rate": 0.001,
"loss": 1.8986,
"step": 128400
},
{
"epoch": 41.53199741435036,
"grad_norm": 2.0014233589172363,
"learning_rate": 0.001,
"loss": 1.8999,
"step": 128500
},
{
"epoch": 41.56431803490627,
"grad_norm": 1.647352695465088,
"learning_rate": 0.001,
"loss": 1.895,
"step": 128600
},
{
"epoch": 41.596638655462186,
"grad_norm": 1.543352723121643,
"learning_rate": 0.001,
"loss": 1.8822,
"step": 128700
},
{
"epoch": 41.6289592760181,
"grad_norm": 1.7632330656051636,
"learning_rate": 0.001,
"loss": 1.8975,
"step": 128800
},
{
"epoch": 41.661279896574015,
"grad_norm": 2.0121119022369385,
"learning_rate": 0.001,
"loss": 1.906,
"step": 128900
},
{
"epoch": 41.69360051712993,
"grad_norm": 1.5535619258880615,
"learning_rate": 0.001,
"loss": 1.9233,
"step": 129000
},
{
"epoch": 41.725921137685845,
"grad_norm": 1.4161769151687622,
"learning_rate": 0.001,
"loss": 1.9226,
"step": 129100
},
{
"epoch": 41.75824175824176,
"grad_norm": 1.93502676486969,
"learning_rate": 0.001,
"loss": 1.9377,
"step": 129200
},
{
"epoch": 41.790562378797674,
"grad_norm": 1.6511423587799072,
"learning_rate": 0.001,
"loss": 1.9139,
"step": 129300
},
{
"epoch": 41.82288299935359,
"grad_norm": 1.5430395603179932,
"learning_rate": 0.001,
"loss": 1.9264,
"step": 129400
},
{
"epoch": 41.8552036199095,
"grad_norm": 1.556210994720459,
"learning_rate": 0.001,
"loss": 1.9407,
"step": 129500
},
{
"epoch": 41.88752424046542,
"grad_norm": 2.07692551612854,
"learning_rate": 0.001,
"loss": 1.9227,
"step": 129600
},
{
"epoch": 41.91984486102133,
"grad_norm": 1.8000415563583374,
"learning_rate": 0.001,
"loss": 1.9491,
"step": 129700
},
{
"epoch": 41.95216548157725,
"grad_norm": 1.6233563423156738,
"learning_rate": 0.001,
"loss": 1.9245,
"step": 129800
},
{
"epoch": 41.98448610213316,
"grad_norm": 1.6756561994552612,
"learning_rate": 0.001,
"loss": 1.9551,
"step": 129900
},
{
"epoch": 42.016806722689076,
"grad_norm": 1.9280065298080444,
"learning_rate": 0.001,
"loss": 1.8467,
"step": 130000
},
{
"epoch": 42.04912734324499,
"grad_norm": 1.9981783628463745,
"learning_rate": 0.001,
"loss": 1.8106,
"step": 130100
},
{
"epoch": 42.081447963800905,
"grad_norm": 2.082932949066162,
"learning_rate": 0.001,
"loss": 1.8251,
"step": 130200
},
{
"epoch": 42.11376858435682,
"grad_norm": 2.3820343017578125,
"learning_rate": 0.001,
"loss": 1.8222,
"step": 130300
},
{
"epoch": 42.146089204912734,
"grad_norm": 1.403403401374817,
"learning_rate": 0.001,
"loss": 1.8253,
"step": 130400
},
{
"epoch": 42.17840982546865,
"grad_norm": 2.2364022731781006,
"learning_rate": 0.001,
"loss": 1.8458,
"step": 130500
},
{
"epoch": 42.21073044602456,
"grad_norm": 2.2380168437957764,
"learning_rate": 0.001,
"loss": 1.8439,
"step": 130600
},
{
"epoch": 42.24305106658048,
"grad_norm": 1.7103081941604614,
"learning_rate": 0.001,
"loss": 1.8403,
"step": 130700
},
{
"epoch": 42.27537168713639,
"grad_norm": 1.7879369258880615,
"learning_rate": 0.001,
"loss": 1.8448,
"step": 130800
},
{
"epoch": 42.30769230769231,
"grad_norm": 2.4792017936706543,
"learning_rate": 0.001,
"loss": 1.8527,
"step": 130900
},
{
"epoch": 42.34001292824822,
"grad_norm": 2.2171385288238525,
"learning_rate": 0.001,
"loss": 1.8467,
"step": 131000
},
{
"epoch": 42.372333548804136,
"grad_norm": 1.9090009927749634,
"learning_rate": 0.001,
"loss": 1.8737,
"step": 131100
},
{
"epoch": 42.40465416936005,
"grad_norm": 1.862734079360962,
"learning_rate": 0.001,
"loss": 1.8801,
"step": 131200
},
{
"epoch": 42.436974789915965,
"grad_norm": 1.944027066230774,
"learning_rate": 0.001,
"loss": 1.8575,
"step": 131300
},
{
"epoch": 42.46929541047188,
"grad_norm": 1.8651403188705444,
"learning_rate": 0.001,
"loss": 1.8745,
"step": 131400
},
{
"epoch": 42.501616031027794,
"grad_norm": 1.9831877946853638,
"learning_rate": 0.001,
"loss": 1.8786,
"step": 131500
},
{
"epoch": 42.53393665158371,
"grad_norm": 1.9882594347000122,
"learning_rate": 0.001,
"loss": 1.872,
"step": 131600
},
{
"epoch": 42.56625727213962,
"grad_norm": 2.680168867111206,
"learning_rate": 0.001,
"loss": 1.8877,
"step": 131700
},
{
"epoch": 42.59857789269554,
"grad_norm": 1.6203826665878296,
"learning_rate": 0.001,
"loss": 1.9111,
"step": 131800
},
{
"epoch": 42.63089851325145,
"grad_norm": 1.6531736850738525,
"learning_rate": 0.001,
"loss": 1.8921,
"step": 131900
},
{
"epoch": 42.66321913380737,
"grad_norm": 1.7691742181777954,
"learning_rate": 0.001,
"loss": 1.9,
"step": 132000
},
{
"epoch": 42.69553975436328,
"grad_norm": 2.291424036026001,
"learning_rate": 0.001,
"loss": 1.8941,
"step": 132100
},
{
"epoch": 42.727860374919196,
"grad_norm": 1.9580634832382202,
"learning_rate": 0.001,
"loss": 1.9028,
"step": 132200
},
{
"epoch": 42.76018099547511,
"grad_norm": 1.9733229875564575,
"learning_rate": 0.001,
"loss": 1.9039,
"step": 132300
},
{
"epoch": 42.792501616031025,
"grad_norm": 1.861606478691101,
"learning_rate": 0.001,
"loss": 1.9105,
"step": 132400
},
{
"epoch": 42.82482223658694,
"grad_norm": 1.7530721426010132,
"learning_rate": 0.001,
"loss": 1.916,
"step": 132500
},
{
"epoch": 42.857142857142854,
"grad_norm": 2.0133233070373535,
"learning_rate": 0.001,
"loss": 1.913,
"step": 132600
},
{
"epoch": 42.88946347769877,
"grad_norm": 1.8621476888656616,
"learning_rate": 0.001,
"loss": 1.9032,
"step": 132700
},
{
"epoch": 42.92178409825468,
"grad_norm": 2.1451456546783447,
"learning_rate": 0.001,
"loss": 1.9438,
"step": 132800
},
{
"epoch": 42.9541047188106,
"grad_norm": 1.651073694229126,
"learning_rate": 0.001,
"loss": 1.9355,
"step": 132900
},
{
"epoch": 42.98642533936652,
"grad_norm": 1.9844690561294556,
"learning_rate": 0.001,
"loss": 1.9233,
"step": 133000
},
{
"epoch": 43.018745959922434,
"grad_norm": 1.3748688697814941,
"learning_rate": 0.001,
"loss": 1.8581,
"step": 133100
},
{
"epoch": 43.05106658047835,
"grad_norm": 1.2402000427246094,
"learning_rate": 0.001,
"loss": 1.787,
"step": 133200
},
{
"epoch": 43.08338720103426,
"grad_norm": 1.3622288703918457,
"learning_rate": 0.001,
"loss": 1.7909,
"step": 133300
},
{
"epoch": 43.11570782159018,
"grad_norm": 1.5441625118255615,
"learning_rate": 0.001,
"loss": 1.8092,
"step": 133400
},
{
"epoch": 43.14802844214609,
"grad_norm": 1.443248987197876,
"learning_rate": 0.001,
"loss": 1.8175,
"step": 133500
},
{
"epoch": 43.18034906270201,
"grad_norm": 1.303268551826477,
"learning_rate": 0.001,
"loss": 1.839,
"step": 133600
},
{
"epoch": 43.21266968325792,
"grad_norm": 1.8648037910461426,
"learning_rate": 0.001,
"loss": 1.8477,
"step": 133700
},
{
"epoch": 43.244990303813836,
"grad_norm": 1.5793299674987793,
"learning_rate": 0.001,
"loss": 1.8161,
"step": 133800
},
{
"epoch": 43.27731092436975,
"grad_norm": 1.2750500440597534,
"learning_rate": 0.001,
"loss": 1.8347,
"step": 133900
},
{
"epoch": 43.309631544925665,
"grad_norm": 1.87288236618042,
"learning_rate": 0.001,
"loss": 1.851,
"step": 134000
},
{
"epoch": 43.34195216548158,
"grad_norm": 1.6583763360977173,
"learning_rate": 0.001,
"loss": 1.8523,
"step": 134100
},
{
"epoch": 43.374272786037494,
"grad_norm": 1.6558666229248047,
"learning_rate": 0.001,
"loss": 1.8563,
"step": 134200
},
{
"epoch": 43.40659340659341,
"grad_norm": 1.6735751628875732,
"learning_rate": 0.001,
"loss": 1.8612,
"step": 134300
},
{
"epoch": 43.43891402714932,
"grad_norm": 1.3858246803283691,
"learning_rate": 0.001,
"loss": 1.8426,
"step": 134400
},
{
"epoch": 43.47123464770524,
"grad_norm": 1.8875099420547485,
"learning_rate": 0.001,
"loss": 1.8739,
"step": 134500
},
{
"epoch": 43.50355526826115,
"grad_norm": 1.4078848361968994,
"learning_rate": 0.001,
"loss": 1.8585,
"step": 134600
},
{
"epoch": 43.53587588881707,
"grad_norm": 1.3911489248275757,
"learning_rate": 0.001,
"loss": 1.8548,
"step": 134700
},
{
"epoch": 43.56819650937298,
"grad_norm": 1.5766676664352417,
"learning_rate": 0.001,
"loss": 1.8771,
"step": 134800
},
{
"epoch": 43.600517129928896,
"grad_norm": 1.8566309213638306,
"learning_rate": 0.001,
"loss": 1.8583,
"step": 134900
},
{
"epoch": 43.63283775048481,
"grad_norm": 1.2162312269210815,
"learning_rate": 0.001,
"loss": 1.8837,
"step": 135000
},
{
"epoch": 43.665158371040725,
"grad_norm": 1.2028443813323975,
"learning_rate": 0.001,
"loss": 1.8684,
"step": 135100
},
{
"epoch": 43.69747899159664,
"grad_norm": 1.4986368417739868,
"learning_rate": 0.001,
"loss": 1.888,
"step": 135200
},
{
"epoch": 43.729799612152554,
"grad_norm": 1.400987148284912,
"learning_rate": 0.001,
"loss": 1.8848,
"step": 135300
},
{
"epoch": 43.76212023270847,
"grad_norm": 1.536422848701477,
"learning_rate": 0.001,
"loss": 1.8994,
"step": 135400
},
{
"epoch": 43.79444085326438,
"grad_norm": 1.6821508407592773,
"learning_rate": 0.001,
"loss": 1.9077,
"step": 135500
},
{
"epoch": 43.8267614738203,
"grad_norm": 1.1288377046585083,
"learning_rate": 0.001,
"loss": 1.8833,
"step": 135600
},
{
"epoch": 43.85908209437621,
"grad_norm": 1.2076668739318848,
"learning_rate": 0.001,
"loss": 1.8983,
"step": 135700
},
{
"epoch": 43.89140271493213,
"grad_norm": 1.4700038433074951,
"learning_rate": 0.001,
"loss": 1.906,
"step": 135800
},
{
"epoch": 43.92372333548804,
"grad_norm": 1.7205662727355957,
"learning_rate": 0.001,
"loss": 1.9095,
"step": 135900
},
{
"epoch": 43.956043956043956,
"grad_norm": 1.2482390403747559,
"learning_rate": 0.001,
"loss": 1.8971,
"step": 136000
},
{
"epoch": 43.98836457659987,
"grad_norm": 1.927675724029541,
"learning_rate": 0.001,
"loss": 1.9145,
"step": 136100
},
{
"epoch": 44.020685197155785,
"grad_norm": 1.3814749717712402,
"learning_rate": 0.001,
"loss": 1.8382,
"step": 136200
},
{
"epoch": 44.0530058177117,
"grad_norm": 1.3042851686477661,
"learning_rate": 0.001,
"loss": 1.796,
"step": 136300
},
{
"epoch": 44.085326438267614,
"grad_norm": 1.4995719194412231,
"learning_rate": 0.001,
"loss": 1.7771,
"step": 136400
},
{
"epoch": 44.11764705882353,
"grad_norm": 1.3295960426330566,
"learning_rate": 0.001,
"loss": 1.807,
"step": 136500
},
{
"epoch": 44.14996767937944,
"grad_norm": 1.2102105617523193,
"learning_rate": 0.001,
"loss": 1.7994,
"step": 136600
},
{
"epoch": 44.18228829993536,
"grad_norm": 1.1424363851547241,
"learning_rate": 0.001,
"loss": 1.8154,
"step": 136700
},
{
"epoch": 44.21460892049127,
"grad_norm": 1.3782585859298706,
"learning_rate": 0.001,
"loss": 1.8056,
"step": 136800
},
{
"epoch": 44.24692954104719,
"grad_norm": 1.4626226425170898,
"learning_rate": 0.001,
"loss": 1.8078,
"step": 136900
},
{
"epoch": 44.2792501616031,
"grad_norm": 1.2396538257598877,
"learning_rate": 0.001,
"loss": 1.8336,
"step": 137000
},
{
"epoch": 44.311570782159016,
"grad_norm": 1.3221837282180786,
"learning_rate": 0.001,
"loss": 1.8088,
"step": 137100
},
{
"epoch": 44.34389140271493,
"grad_norm": 1.2316101789474487,
"learning_rate": 0.001,
"loss": 1.8322,
"step": 137200
},
{
"epoch": 44.376212023270845,
"grad_norm": 1.264435052871704,
"learning_rate": 0.001,
"loss": 1.8232,
"step": 137300
},
{
"epoch": 44.40853264382676,
"grad_norm": 1.1061835289001465,
"learning_rate": 0.001,
"loss": 1.8281,
"step": 137400
},
{
"epoch": 44.440853264382675,
"grad_norm": 1.1366873979568481,
"learning_rate": 0.001,
"loss": 1.8283,
"step": 137500
},
{
"epoch": 44.47317388493859,
"grad_norm": 1.7174246311187744,
"learning_rate": 0.001,
"loss": 1.8599,
"step": 137600
},
{
"epoch": 44.505494505494504,
"grad_norm": 1.4456156492233276,
"learning_rate": 0.001,
"loss": 1.8474,
"step": 137700
},
{
"epoch": 44.53781512605042,
"grad_norm": 0.972943902015686,
"learning_rate": 0.001,
"loss": 1.8544,
"step": 137800
},
{
"epoch": 44.57013574660633,
"grad_norm": 1.6857820749282837,
"learning_rate": 0.001,
"loss": 1.8733,
"step": 137900
},
{
"epoch": 44.60245636716225,
"grad_norm": 1.701591968536377,
"learning_rate": 0.001,
"loss": 1.8474,
"step": 138000
},
{
"epoch": 44.63477698771816,
"grad_norm": 1.1282321214675903,
"learning_rate": 0.001,
"loss": 1.8592,
"step": 138100
},
{
"epoch": 44.66709760827408,
"grad_norm": 1.5330438613891602,
"learning_rate": 0.001,
"loss": 1.8709,
"step": 138200
},
{
"epoch": 44.69941822882999,
"grad_norm": 1.1971231698989868,
"learning_rate": 0.001,
"loss": 1.8878,
"step": 138300
},
{
"epoch": 44.731738849385906,
"grad_norm": 1.7220470905303955,
"learning_rate": 0.001,
"loss": 1.8817,
"step": 138400
},
{
"epoch": 44.76405946994182,
"grad_norm": 1.196541666984558,
"learning_rate": 0.001,
"loss": 1.8801,
"step": 138500
},
{
"epoch": 44.796380090497735,
"grad_norm": 1.4516090154647827,
"learning_rate": 0.001,
"loss": 1.8725,
"step": 138600
},
{
"epoch": 44.82870071105365,
"grad_norm": 1.449439287185669,
"learning_rate": 0.001,
"loss": 1.8915,
"step": 138700
},
{
"epoch": 44.861021331609564,
"grad_norm": 1.433020830154419,
"learning_rate": 0.001,
"loss": 1.8824,
"step": 138800
},
{
"epoch": 44.89334195216548,
"grad_norm": 1.412376046180725,
"learning_rate": 0.001,
"loss": 1.8862,
"step": 138900
},
{
"epoch": 44.92566257272139,
"grad_norm": 1.0682293176651,
"learning_rate": 0.001,
"loss": 1.892,
"step": 139000
},
{
"epoch": 44.95798319327731,
"grad_norm": 1.2839839458465576,
"learning_rate": 0.001,
"loss": 1.9081,
"step": 139100
},
{
"epoch": 44.99030381383322,
"grad_norm": 1.5696237087249756,
"learning_rate": 0.001,
"loss": 1.8943,
"step": 139200
},
{
"epoch": 45.022624434389144,
"grad_norm": 1.4640036821365356,
"learning_rate": 0.001,
"loss": 1.8173,
"step": 139300
},
{
"epoch": 45.05494505494506,
"grad_norm": 1.5870167016983032,
"learning_rate": 0.001,
"loss": 1.7714,
"step": 139400
},
{
"epoch": 45.08726567550097,
"grad_norm": 1.6109189987182617,
"learning_rate": 0.001,
"loss": 1.7732,
"step": 139500
},
{
"epoch": 45.11958629605689,
"grad_norm": 1.1997172832489014,
"learning_rate": 0.001,
"loss": 1.787,
"step": 139600
},
{
"epoch": 45.1519069166128,
"grad_norm": 1.2339918613433838,
"learning_rate": 0.001,
"loss": 1.7585,
"step": 139700
},
{
"epoch": 45.18422753716872,
"grad_norm": 1.179796814918518,
"learning_rate": 0.001,
"loss": 1.7848,
"step": 139800
},
{
"epoch": 45.21654815772463,
"grad_norm": 1.5013426542282104,
"learning_rate": 0.001,
"loss": 1.7957,
"step": 139900
},
{
"epoch": 45.248868778280546,
"grad_norm": 1.3376390933990479,
"learning_rate": 0.001,
"loss": 1.8045,
"step": 140000
},
{
"epoch": 45.28118939883646,
"grad_norm": 1.2788093090057373,
"learning_rate": 0.001,
"loss": 1.7864,
"step": 140100
},
{
"epoch": 45.313510019392375,
"grad_norm": 1.4019917249679565,
"learning_rate": 0.001,
"loss": 1.8004,
"step": 140200
},
{
"epoch": 45.34583063994829,
"grad_norm": 1.2221229076385498,
"learning_rate": 0.001,
"loss": 1.8066,
"step": 140300
},
{
"epoch": 45.378151260504204,
"grad_norm": 1.4707577228546143,
"learning_rate": 0.001,
"loss": 1.8257,
"step": 140400
},
{
"epoch": 45.41047188106012,
"grad_norm": 1.2390767335891724,
"learning_rate": 0.001,
"loss": 1.8273,
"step": 140500
},
{
"epoch": 45.44279250161603,
"grad_norm": 1.0136756896972656,
"learning_rate": 0.001,
"loss": 1.8305,
"step": 140600
},
{
"epoch": 45.47511312217195,
"grad_norm": 1.0152579545974731,
"learning_rate": 0.001,
"loss": 1.8296,
"step": 140700
},
{
"epoch": 45.50743374272786,
"grad_norm": 1.436432957649231,
"learning_rate": 0.001,
"loss": 1.8522,
"step": 140800
},
{
"epoch": 45.53975436328378,
"grad_norm": 1.0323009490966797,
"learning_rate": 0.001,
"loss": 1.8393,
"step": 140900
},
{
"epoch": 45.57207498383969,
"grad_norm": 1.4058171510696411,
"learning_rate": 0.001,
"loss": 1.8404,
"step": 141000
},
{
"epoch": 45.604395604395606,
"grad_norm": 1.2600674629211426,
"learning_rate": 0.001,
"loss": 1.8454,
"step": 141100
},
{
"epoch": 45.63671622495152,
"grad_norm": 1.2156469821929932,
"learning_rate": 0.001,
"loss": 1.8537,
"step": 141200
},
{
"epoch": 45.669036845507435,
"grad_norm": 1.3858039379119873,
"learning_rate": 0.001,
"loss": 1.8662,
"step": 141300
},
{
"epoch": 45.70135746606335,
"grad_norm": 1.4224052429199219,
"learning_rate": 0.001,
"loss": 1.8397,
"step": 141400
},
{
"epoch": 45.733678086619264,
"grad_norm": 1.3239479064941406,
"learning_rate": 0.001,
"loss": 1.8671,
"step": 141500
},
{
"epoch": 45.76599870717518,
"grad_norm": 1.519800066947937,
"learning_rate": 0.001,
"loss": 1.873,
"step": 141600
},
{
"epoch": 45.79831932773109,
"grad_norm": 1.158959150314331,
"learning_rate": 0.001,
"loss": 1.8647,
"step": 141700
},
{
"epoch": 45.83063994828701,
"grad_norm": 1.415377140045166,
"learning_rate": 0.001,
"loss": 1.8647,
"step": 141800
},
{
"epoch": 45.86296056884292,
"grad_norm": 1.214583396911621,
"learning_rate": 0.001,
"loss": 1.864,
"step": 141900
},
{
"epoch": 45.89528118939884,
"grad_norm": 1.3497668504714966,
"learning_rate": 0.001,
"loss": 1.8847,
"step": 142000
},
{
"epoch": 45.92760180995475,
"grad_norm": 1.2246413230895996,
"learning_rate": 0.001,
"loss": 1.899,
"step": 142100
},
{
"epoch": 45.959922430510666,
"grad_norm": 1.4457169771194458,
"learning_rate": 0.001,
"loss": 1.8664,
"step": 142200
},
{
"epoch": 45.99224305106658,
"grad_norm": 1.2997976541519165,
"learning_rate": 0.001,
"loss": 1.8957,
"step": 142300
},
{
"epoch": 46.024563671622495,
"grad_norm": 1.4182782173156738,
"learning_rate": 0.001,
"loss": 1.7877,
"step": 142400
},
{
"epoch": 46.05688429217841,
"grad_norm": 1.2993918657302856,
"learning_rate": 0.001,
"loss": 1.7651,
"step": 142500
},
{
"epoch": 46.089204912734324,
"grad_norm": 1.2173502445220947,
"learning_rate": 0.001,
"loss": 1.7561,
"step": 142600
},
{
"epoch": 46.12152553329024,
"grad_norm": 1.4112443923950195,
"learning_rate": 0.001,
"loss": 1.7623,
"step": 142700
},
{
"epoch": 46.15384615384615,
"grad_norm": 1.5435090065002441,
"learning_rate": 0.001,
"loss": 1.7749,
"step": 142800
},
{
"epoch": 46.18616677440207,
"grad_norm": 1.3198472261428833,
"learning_rate": 0.001,
"loss": 1.7711,
"step": 142900
},
{
"epoch": 46.21848739495798,
"grad_norm": 1.6420048475265503,
"learning_rate": 0.001,
"loss": 1.7993,
"step": 143000
},
{
"epoch": 46.2508080155139,
"grad_norm": 1.378956913948059,
"learning_rate": 0.001,
"loss": 1.7934,
"step": 143100
},
{
"epoch": 46.28312863606981,
"grad_norm": 1.5023648738861084,
"learning_rate": 0.001,
"loss": 1.7963,
"step": 143200
},
{
"epoch": 46.315449256625726,
"grad_norm": 1.281911849975586,
"learning_rate": 0.001,
"loss": 1.8086,
"step": 143300
},
{
"epoch": 46.34776987718164,
"grad_norm": 1.1474652290344238,
"learning_rate": 0.001,
"loss": 1.8174,
"step": 143400
},
{
"epoch": 46.380090497737555,
"grad_norm": 1.4720494747161865,
"learning_rate": 0.001,
"loss": 1.7957,
"step": 143500
},
{
"epoch": 46.41241111829347,
"grad_norm": 1.2694511413574219,
"learning_rate": 0.001,
"loss": 1.8011,
"step": 143600
},
{
"epoch": 46.444731738849384,
"grad_norm": 1.3873778581619263,
"learning_rate": 0.001,
"loss": 1.8205,
"step": 143700
},
{
"epoch": 46.4770523594053,
"grad_norm": 1.3449006080627441,
"learning_rate": 0.001,
"loss": 1.8294,
"step": 143800
},
{
"epoch": 46.50937297996121,
"grad_norm": 1.5486829280853271,
"learning_rate": 0.001,
"loss": 1.8243,
"step": 143900
},
{
"epoch": 46.54169360051713,
"grad_norm": 1.3362038135528564,
"learning_rate": 0.001,
"loss": 1.8159,
"step": 144000
},
{
"epoch": 46.57401422107304,
"grad_norm": 1.412407636642456,
"learning_rate": 0.001,
"loss": 1.8099,
"step": 144100
},
{
"epoch": 46.60633484162896,
"grad_norm": 1.3122761249542236,
"learning_rate": 0.001,
"loss": 1.8263,
"step": 144200
},
{
"epoch": 46.63865546218487,
"grad_norm": 1.3933433294296265,
"learning_rate": 0.001,
"loss": 1.8221,
"step": 144300
},
{
"epoch": 46.670976082740786,
"grad_norm": 1.0872950553894043,
"learning_rate": 0.001,
"loss": 1.8414,
"step": 144400
},
{
"epoch": 46.7032967032967,
"grad_norm": 1.0704154968261719,
"learning_rate": 0.001,
"loss": 1.8611,
"step": 144500
},
{
"epoch": 46.735617323852615,
"grad_norm": 1.1374051570892334,
"learning_rate": 0.001,
"loss": 1.8597,
"step": 144600
},
{
"epoch": 46.76793794440853,
"grad_norm": 1.6145614385604858,
"learning_rate": 0.001,
"loss": 1.8636,
"step": 144700
},
{
"epoch": 46.800258564964444,
"grad_norm": 1.250145673751831,
"learning_rate": 0.001,
"loss": 1.8532,
"step": 144800
},
{
"epoch": 46.83257918552036,
"grad_norm": 1.1978321075439453,
"learning_rate": 0.001,
"loss": 1.8524,
"step": 144900
},
{
"epoch": 46.864899806076274,
"grad_norm": 1.2241102457046509,
"learning_rate": 0.001,
"loss": 1.8546,
"step": 145000
},
{
"epoch": 46.89722042663219,
"grad_norm": 1.416428565979004,
"learning_rate": 0.001,
"loss": 1.8539,
"step": 145100
},
{
"epoch": 46.9295410471881,
"grad_norm": 1.2089383602142334,
"learning_rate": 0.001,
"loss": 1.8663,
"step": 145200
},
{
"epoch": 46.96186166774402,
"grad_norm": 1.3217616081237793,
"learning_rate": 0.001,
"loss": 1.8666,
"step": 145300
},
{
"epoch": 46.99418228829994,
"grad_norm": 1.6821898221969604,
"learning_rate": 0.001,
"loss": 1.8631,
"step": 145400
},
{
"epoch": 47.02650290885585,
"grad_norm": 1.6980481147766113,
"learning_rate": 0.001,
"loss": 1.7668,
"step": 145500
},
{
"epoch": 47.05882352941177,
"grad_norm": 1.5981022119522095,
"learning_rate": 0.001,
"loss": 1.7509,
"step": 145600
},
{
"epoch": 47.09114414996768,
"grad_norm": 1.54694664478302,
"learning_rate": 0.001,
"loss": 1.7445,
"step": 145700
},
{
"epoch": 47.1234647705236,
"grad_norm": 1.3953649997711182,
"learning_rate": 0.001,
"loss": 1.7486,
"step": 145800
},
{
"epoch": 47.15578539107951,
"grad_norm": 1.4519585371017456,
"learning_rate": 0.001,
"loss": 1.7573,
"step": 145900
},
{
"epoch": 47.188106011635426,
"grad_norm": 2.0267391204833984,
"learning_rate": 0.001,
"loss": 1.7892,
"step": 146000
},
{
"epoch": 47.22042663219134,
"grad_norm": 1.5844534635543823,
"learning_rate": 0.001,
"loss": 1.7733,
"step": 146100
},
{
"epoch": 47.252747252747255,
"grad_norm": 1.2251486778259277,
"learning_rate": 0.001,
"loss": 1.7747,
"step": 146200
},
{
"epoch": 47.28506787330317,
"grad_norm": 1.4506583213806152,
"learning_rate": 0.001,
"loss": 1.7746,
"step": 146300
},
{
"epoch": 47.317388493859085,
"grad_norm": 1.6739964485168457,
"learning_rate": 0.001,
"loss": 1.7853,
"step": 146400
},
{
"epoch": 47.349709114415,
"grad_norm": 1.7414036989212036,
"learning_rate": 0.001,
"loss": 1.7778,
"step": 146500
},
{
"epoch": 47.382029734970914,
"grad_norm": 1.4631189107894897,
"learning_rate": 0.001,
"loss": 1.7837,
"step": 146600
},
{
"epoch": 47.41435035552683,
"grad_norm": 1.6663905382156372,
"learning_rate": 0.001,
"loss": 1.7969,
"step": 146700
},
{
"epoch": 47.44667097608274,
"grad_norm": 1.7138595581054688,
"learning_rate": 0.001,
"loss": 1.7962,
"step": 146800
},
{
"epoch": 47.47899159663866,
"grad_norm": 1.4735912084579468,
"learning_rate": 0.001,
"loss": 1.8132,
"step": 146900
},
{
"epoch": 47.51131221719457,
"grad_norm": 1.72100031375885,
"learning_rate": 0.001,
"loss": 1.8061,
"step": 147000
},
{
"epoch": 47.543632837750486,
"grad_norm": 1.1838710308074951,
"learning_rate": 0.001,
"loss": 1.7897,
"step": 147100
},
{
"epoch": 47.5759534583064,
"grad_norm": 1.4770824909210205,
"learning_rate": 0.001,
"loss": 1.8138,
"step": 147200
},
{
"epoch": 47.608274078862316,
"grad_norm": 1.287657380104065,
"learning_rate": 0.001,
"loss": 1.8097,
"step": 147300
},
{
"epoch": 47.64059469941823,
"grad_norm": 1.8169690370559692,
"learning_rate": 0.001,
"loss": 1.8287,
"step": 147400
},
{
"epoch": 47.672915319974145,
"grad_norm": 0.9983140826225281,
"learning_rate": 0.001,
"loss": 1.8407,
"step": 147500
},
{
"epoch": 47.70523594053006,
"grad_norm": 1.3537484407424927,
"learning_rate": 0.001,
"loss": 1.8438,
"step": 147600
},
{
"epoch": 47.737556561085974,
"grad_norm": 1.5467236042022705,
"learning_rate": 0.001,
"loss": 1.8278,
"step": 147700
},
{
"epoch": 47.76987718164189,
"grad_norm": 1.1953139305114746,
"learning_rate": 0.001,
"loss": 1.8182,
"step": 147800
},
{
"epoch": 47.8021978021978,
"grad_norm": 1.4098021984100342,
"learning_rate": 0.001,
"loss": 1.8585,
"step": 147900
},
{
"epoch": 47.83451842275372,
"grad_norm": 1.4294242858886719,
"learning_rate": 0.001,
"loss": 1.8365,
"step": 148000
},
{
"epoch": 47.86683904330963,
"grad_norm": 1.4361600875854492,
"learning_rate": 0.001,
"loss": 1.8329,
"step": 148100
},
{
"epoch": 47.89915966386555,
"grad_norm": 1.189009428024292,
"learning_rate": 0.001,
"loss": 1.8502,
"step": 148200
},
{
"epoch": 47.93148028442146,
"grad_norm": 1.3974965810775757,
"learning_rate": 0.001,
"loss": 1.8439,
"step": 148300
},
{
"epoch": 47.963800904977376,
"grad_norm": 1.1778879165649414,
"learning_rate": 0.001,
"loss": 1.8449,
"step": 148400
},
{
"epoch": 47.99612152553329,
"grad_norm": 1.9312989711761475,
"learning_rate": 0.001,
"loss": 1.8532,
"step": 148500
},
{
"epoch": 48.028442146089205,
"grad_norm": 1.6134992837905884,
"learning_rate": 0.001,
"loss": 1.7465,
"step": 148600
},
{
"epoch": 48.06076276664512,
"grad_norm": 1.2701274156570435,
"learning_rate": 0.001,
"loss": 1.7405,
"step": 148700
},
{
"epoch": 48.093083387201034,
"grad_norm": 1.5127066373825073,
"learning_rate": 0.001,
"loss": 1.7266,
"step": 148800
},
{
"epoch": 48.12540400775695,
"grad_norm": 1.2889701128005981,
"learning_rate": 0.001,
"loss": 1.7455,
"step": 148900
},
{
"epoch": 48.15772462831286,
"grad_norm": 1.8702465295791626,
"learning_rate": 0.001,
"loss": 1.7568,
"step": 149000
},
{
"epoch": 48.19004524886878,
"grad_norm": 1.6072839498519897,
"learning_rate": 0.001,
"loss": 1.7385,
"step": 149100
},
{
"epoch": 48.22236586942469,
"grad_norm": 1.8324649333953857,
"learning_rate": 0.001,
"loss": 1.766,
"step": 149200
},
{
"epoch": 48.25468648998061,
"grad_norm": 1.686521053314209,
"learning_rate": 0.001,
"loss": 1.7666,
"step": 149300
},
{
"epoch": 48.28700711053652,
"grad_norm": 1.4718869924545288,
"learning_rate": 0.001,
"loss": 1.7506,
"step": 149400
},
{
"epoch": 48.319327731092436,
"grad_norm": 1.7188520431518555,
"learning_rate": 0.001,
"loss": 1.7695,
"step": 149500
},
{
"epoch": 48.35164835164835,
"grad_norm": 1.57921302318573,
"learning_rate": 0.001,
"loss": 1.7729,
"step": 149600
},
{
"epoch": 48.383968972204265,
"grad_norm": 1.2841829061508179,
"learning_rate": 0.001,
"loss": 1.7712,
"step": 149700
},
{
"epoch": 48.41628959276018,
"grad_norm": 1.3768117427825928,
"learning_rate": 0.001,
"loss": 1.7834,
"step": 149800
},
{
"epoch": 48.448610213316094,
"grad_norm": 1.1611604690551758,
"learning_rate": 0.001,
"loss": 1.788,
"step": 149900
},
{
"epoch": 48.48093083387201,
"grad_norm": 1.3717632293701172,
"learning_rate": 0.001,
"loss": 1.7862,
"step": 150000
},
{
"epoch": 48.51325145442792,
"grad_norm": 1.4986873865127563,
"learning_rate": 0.001,
"loss": 1.7847,
"step": 150100
},
{
"epoch": 48.54557207498384,
"grad_norm": 1.6464810371398926,
"learning_rate": 0.001,
"loss": 1.7995,
"step": 150200
},
{
"epoch": 48.57789269553975,
"grad_norm": 1.5459891557693481,
"learning_rate": 0.001,
"loss": 1.7868,
"step": 150300
},
{
"epoch": 48.61021331609567,
"grad_norm": 1.5244051218032837,
"learning_rate": 0.001,
"loss": 1.8098,
"step": 150400
},
{
"epoch": 48.64253393665158,
"grad_norm": 1.25544011592865,
"learning_rate": 0.001,
"loss": 1.815,
"step": 150500
},
{
"epoch": 48.674854557207496,
"grad_norm": 1.3828271627426147,
"learning_rate": 0.001,
"loss": 1.8117,
"step": 150600
},
{
"epoch": 48.70717517776341,
"grad_norm": 1.4629228115081787,
"learning_rate": 0.001,
"loss": 1.8145,
"step": 150700
},
{
"epoch": 48.739495798319325,
"grad_norm": 1.336828589439392,
"learning_rate": 0.001,
"loss": 1.8303,
"step": 150800
},
{
"epoch": 48.77181641887524,
"grad_norm": 1.5909345149993896,
"learning_rate": 0.001,
"loss": 1.8385,
"step": 150900
},
{
"epoch": 48.804137039431154,
"grad_norm": 1.407732605934143,
"learning_rate": 0.001,
"loss": 1.8142,
"step": 151000
},
{
"epoch": 48.83645765998707,
"grad_norm": 1.2567416429519653,
"learning_rate": 0.001,
"loss": 1.8275,
"step": 151100
},
{
"epoch": 48.86877828054298,
"grad_norm": 1.3103371858596802,
"learning_rate": 0.001,
"loss": 1.8312,
"step": 151200
},
{
"epoch": 48.9010989010989,
"grad_norm": 1.4088467359542847,
"learning_rate": 0.001,
"loss": 1.8218,
"step": 151300
},
{
"epoch": 48.93341952165481,
"grad_norm": 1.8668159246444702,
"learning_rate": 0.001,
"loss": 1.8424,
"step": 151400
},
{
"epoch": 48.96574014221073,
"grad_norm": 1.5781002044677734,
"learning_rate": 0.001,
"loss": 1.8504,
"step": 151500
},
{
"epoch": 48.99806076276664,
"grad_norm": 1.4598246812820435,
"learning_rate": 0.001,
"loss": 1.8163,
"step": 151600
},
{
"epoch": 49.03038138332256,
"grad_norm": 1.3983478546142578,
"learning_rate": 0.001,
"loss": 1.7089,
"step": 151700
},
{
"epoch": 49.06270200387848,
"grad_norm": 1.577358603477478,
"learning_rate": 0.001,
"loss": 1.7011,
"step": 151800
},
{
"epoch": 49.09502262443439,
"grad_norm": 2.1866214275360107,
"learning_rate": 0.001,
"loss": 1.715,
"step": 151900
},
{
"epoch": 49.12734324499031,
"grad_norm": 1.976177453994751,
"learning_rate": 0.001,
"loss": 1.731,
"step": 152000
},
{
"epoch": 49.15966386554622,
"grad_norm": 1.2965463399887085,
"learning_rate": 0.001,
"loss": 1.7506,
"step": 152100
},
{
"epoch": 49.191984486102136,
"grad_norm": 1.4683022499084473,
"learning_rate": 0.001,
"loss": 1.7458,
"step": 152200
},
{
"epoch": 49.22430510665805,
"grad_norm": 1.3126236200332642,
"learning_rate": 0.001,
"loss": 1.7486,
"step": 152300
},
{
"epoch": 49.256625727213965,
"grad_norm": 1.6978693008422852,
"learning_rate": 0.001,
"loss": 1.7525,
"step": 152400
},
{
"epoch": 49.28894634776988,
"grad_norm": 1.4591189622879028,
"learning_rate": 0.001,
"loss": 1.7625,
"step": 152500
},
{
"epoch": 49.321266968325794,
"grad_norm": 1.7097078561782837,
"learning_rate": 0.001,
"loss": 1.7696,
"step": 152600
},
{
"epoch": 49.35358758888171,
"grad_norm": 1.3662595748901367,
"learning_rate": 0.001,
"loss": 1.7443,
"step": 152700
},
{
"epoch": 49.38590820943762,
"grad_norm": 1.8225806951522827,
"learning_rate": 0.001,
"loss": 1.7724,
"step": 152800
},
{
"epoch": 49.41822882999354,
"grad_norm": 1.3173518180847168,
"learning_rate": 0.001,
"loss": 1.7782,
"step": 152900
},
{
"epoch": 49.45054945054945,
"grad_norm": 1.3523099422454834,
"learning_rate": 0.001,
"loss": 1.7784,
"step": 153000
},
{
"epoch": 49.48287007110537,
"grad_norm": 1.639769434928894,
"learning_rate": 0.001,
"loss": 1.7701,
"step": 153100
},
{
"epoch": 49.51519069166128,
"grad_norm": 1.6762608289718628,
"learning_rate": 0.001,
"loss": 1.7884,
"step": 153200
},
{
"epoch": 49.547511312217196,
"grad_norm": 1.9911901950836182,
"learning_rate": 0.001,
"loss": 1.785,
"step": 153300
},
{
"epoch": 49.57983193277311,
"grad_norm": 1.4912683963775635,
"learning_rate": 0.001,
"loss": 1.7702,
"step": 153400
},
{
"epoch": 49.612152553329025,
"grad_norm": 1.8878214359283447,
"learning_rate": 0.001,
"loss": 1.7945,
"step": 153500
},
{
"epoch": 49.64447317388494,
"grad_norm": 1.3476299047470093,
"learning_rate": 0.001,
"loss": 1.7988,
"step": 153600
},
{
"epoch": 49.676793794440854,
"grad_norm": 1.8301808834075928,
"learning_rate": 0.001,
"loss": 1.7997,
"step": 153700
},
{
"epoch": 49.70911441499677,
"grad_norm": 1.6317429542541504,
"learning_rate": 0.001,
"loss": 1.8126,
"step": 153800
},
{
"epoch": 49.74143503555268,
"grad_norm": 1.8560541868209839,
"learning_rate": 0.001,
"loss": 1.787,
"step": 153900
},
{
"epoch": 49.7737556561086,
"grad_norm": 1.715898036956787,
"learning_rate": 0.001,
"loss": 1.8086,
"step": 154000
},
{
"epoch": 49.80607627666451,
"grad_norm": 1.7181135416030884,
"learning_rate": 0.001,
"loss": 1.8184,
"step": 154100
},
{
"epoch": 49.83839689722043,
"grad_norm": 1.3545619249343872,
"learning_rate": 0.001,
"loss": 1.798,
"step": 154200
},
{
"epoch": 49.87071751777634,
"grad_norm": 1.5673587322235107,
"learning_rate": 0.001,
"loss": 1.8144,
"step": 154300
},
{
"epoch": 49.903038138332256,
"grad_norm": 1.5232983827590942,
"learning_rate": 0.001,
"loss": 1.83,
"step": 154400
},
{
"epoch": 49.93535875888817,
"grad_norm": 1.9223566055297852,
"learning_rate": 0.001,
"loss": 1.8302,
"step": 154500
},
{
"epoch": 49.967679379444085,
"grad_norm": 2.017540693283081,
"learning_rate": 0.001,
"loss": 1.8268,
"step": 154600
},
{
"epoch": 50.0,
"grad_norm": 2.689202308654785,
"learning_rate": 0.001,
"loss": 1.798,
"step": 154700
},
{
"epoch": 50.032320620555915,
"grad_norm": 2.791679859161377,
"learning_rate": 0.001,
"loss": 1.6968,
"step": 154800
},
{
"epoch": 50.06464124111183,
"grad_norm": 2.2272870540618896,
"learning_rate": 0.001,
"loss": 1.7063,
"step": 154900
},
{
"epoch": 50.096961861667744,
"grad_norm": 2.1595311164855957,
"learning_rate": 0.001,
"loss": 1.7114,
"step": 155000
},
{
"epoch": 50.12928248222366,
"grad_norm": 2.6563737392425537,
"learning_rate": 0.001,
"loss": 1.7146,
"step": 155100
},
{
"epoch": 50.16160310277957,
"grad_norm": 2.2539663314819336,
"learning_rate": 0.001,
"loss": 1.7092,
"step": 155200
},
{
"epoch": 50.19392372333549,
"grad_norm": 2.3137271404266357,
"learning_rate": 0.001,
"loss": 1.7244,
"step": 155300
},
{
"epoch": 50.2262443438914,
"grad_norm": 1.7513139247894287,
"learning_rate": 0.001,
"loss": 1.7423,
"step": 155400
},
{
"epoch": 50.25856496444732,
"grad_norm": 2.2095792293548584,
"learning_rate": 0.001,
"loss": 1.7539,
"step": 155500
},
{
"epoch": 50.29088558500323,
"grad_norm": 2.0682663917541504,
"learning_rate": 0.001,
"loss": 1.7331,
"step": 155600
},
{
"epoch": 50.323206205559146,
"grad_norm": 2.1839566230773926,
"learning_rate": 0.001,
"loss": 1.7547,
"step": 155700
},
{
"epoch": 50.35552682611506,
"grad_norm": 1.9047203063964844,
"learning_rate": 0.001,
"loss": 1.7365,
"step": 155800
},
{
"epoch": 50.387847446670975,
"grad_norm": 2.428255558013916,
"learning_rate": 0.001,
"loss": 1.7581,
"step": 155900
},
{
"epoch": 50.42016806722689,
"grad_norm": 2.507028102874756,
"learning_rate": 0.001,
"loss": 1.7549,
"step": 156000
},
{
"epoch": 50.452488687782804,
"grad_norm": 2.5041208267211914,
"learning_rate": 0.001,
"loss": 1.7576,
"step": 156100
},
{
"epoch": 50.48480930833872,
"grad_norm": 1.992263913154602,
"learning_rate": 0.001,
"loss": 1.7626,
"step": 156200
},
{
"epoch": 50.51712992889463,
"grad_norm": 2.5200653076171875,
"learning_rate": 0.001,
"loss": 1.7646,
"step": 156300
},
{
"epoch": 50.54945054945055,
"grad_norm": 1.9477589130401611,
"learning_rate": 0.001,
"loss": 1.7755,
"step": 156400
},
{
"epoch": 50.58177117000646,
"grad_norm": 2.513901710510254,
"learning_rate": 0.001,
"loss": 1.7761,
"step": 156500
},
{
"epoch": 50.61409179056238,
"grad_norm": 2.063380718231201,
"learning_rate": 0.001,
"loss": 1.7887,
"step": 156600
},
{
"epoch": 50.64641241111829,
"grad_norm": 2.3076422214508057,
"learning_rate": 0.001,
"loss": 1.7691,
"step": 156700
},
{
"epoch": 50.678733031674206,
"grad_norm": 2.060290813446045,
"learning_rate": 0.001,
"loss": 1.7846,
"step": 156800
},
{
"epoch": 50.71105365223012,
"grad_norm": 2.024672031402588,
"learning_rate": 0.001,
"loss": 1.8027,
"step": 156900
},
{
"epoch": 50.743374272786035,
"grad_norm": 1.648667335510254,
"learning_rate": 0.001,
"loss": 1.803,
"step": 157000
},
{
"epoch": 50.77569489334195,
"grad_norm": 1.7075327634811401,
"learning_rate": 0.001,
"loss": 1.8164,
"step": 157100
},
{
"epoch": 50.808015513897864,
"grad_norm": 2.504213809967041,
"learning_rate": 0.001,
"loss": 1.8026,
"step": 157200
},
{
"epoch": 50.84033613445378,
"grad_norm": 2.695814847946167,
"learning_rate": 0.001,
"loss": 1.8074,
"step": 157300
},
{
"epoch": 50.87265675500969,
"grad_norm": 2.0666091442108154,
"learning_rate": 0.001,
"loss": 1.8145,
"step": 157400
},
{
"epoch": 50.90497737556561,
"grad_norm": 2.5069732666015625,
"learning_rate": 0.001,
"loss": 1.8024,
"step": 157500
},
{
"epoch": 50.93729799612152,
"grad_norm": 2.342129945755005,
"learning_rate": 0.001,
"loss": 1.8082,
"step": 157600
},
{
"epoch": 50.96961861667744,
"grad_norm": 1.9542409181594849,
"learning_rate": 0.001,
"loss": 1.8148,
"step": 157700
},
{
"epoch": 51.00193923723336,
"grad_norm": 1.3802143335342407,
"learning_rate": 0.001,
"loss": 1.8217,
"step": 157800
},
{
"epoch": 51.03425985778927,
"grad_norm": 1.5042811632156372,
"learning_rate": 0.001,
"loss": 1.6777,
"step": 157900
},
{
"epoch": 51.06658047834519,
"grad_norm": 1.4837565422058105,
"learning_rate": 0.001,
"loss": 1.6913,
"step": 158000
},
{
"epoch": 51.0989010989011,
"grad_norm": 1.344022274017334,
"learning_rate": 0.001,
"loss": 1.7002,
"step": 158100
},
{
"epoch": 51.13122171945702,
"grad_norm": 1.92073655128479,
"learning_rate": 0.001,
"loss": 1.686,
"step": 158200
},
{
"epoch": 51.16354234001293,
"grad_norm": 1.4150066375732422,
"learning_rate": 0.001,
"loss": 1.7084,
"step": 158300
},
{
"epoch": 51.195862960568846,
"grad_norm": 1.204180359840393,
"learning_rate": 0.001,
"loss": 1.7351,
"step": 158400
},
{
"epoch": 51.22818358112476,
"grad_norm": 1.1352612972259521,
"learning_rate": 0.001,
"loss": 1.7265,
"step": 158500
},
{
"epoch": 51.260504201680675,
"grad_norm": 1.162936806678772,
"learning_rate": 0.001,
"loss": 1.725,
"step": 158600
},
{
"epoch": 51.29282482223659,
"grad_norm": 1.576052188873291,
"learning_rate": 0.001,
"loss": 1.7206,
"step": 158700
},
{
"epoch": 51.325145442792504,
"grad_norm": 1.7167997360229492,
"learning_rate": 0.001,
"loss": 1.7223,
"step": 158800
},
{
"epoch": 51.35746606334842,
"grad_norm": 1.7025160789489746,
"learning_rate": 0.001,
"loss": 1.7241,
"step": 158900
},
{
"epoch": 51.38978668390433,
"grad_norm": 1.5888192653656006,
"learning_rate": 0.001,
"loss": 1.7366,
"step": 159000
},
{
"epoch": 51.42210730446025,
"grad_norm": 1.263992190361023,
"learning_rate": 0.001,
"loss": 1.7557,
"step": 159100
},
{
"epoch": 51.45442792501616,
"grad_norm": 1.2514597177505493,
"learning_rate": 0.001,
"loss": 1.746,
"step": 159200
},
{
"epoch": 51.48674854557208,
"grad_norm": 1.5387557744979858,
"learning_rate": 0.001,
"loss": 1.7493,
"step": 159300
},
{
"epoch": 51.51906916612799,
"grad_norm": 1.4407093524932861,
"learning_rate": 0.001,
"loss": 1.7484,
"step": 159400
},
{
"epoch": 51.551389786683906,
"grad_norm": 1.0381523370742798,
"learning_rate": 0.001,
"loss": 1.7688,
"step": 159500
},
{
"epoch": 51.58371040723982,
"grad_norm": 1.8995901346206665,
"learning_rate": 0.001,
"loss": 1.7602,
"step": 159600
},
{
"epoch": 51.616031027795735,
"grad_norm": 1.7138015031814575,
"learning_rate": 0.001,
"loss": 1.7752,
"step": 159700
},
{
"epoch": 51.64835164835165,
"grad_norm": 1.5023847818374634,
"learning_rate": 0.001,
"loss": 1.7622,
"step": 159800
},
{
"epoch": 51.680672268907564,
"grad_norm": 1.310754656791687,
"learning_rate": 0.001,
"loss": 1.7766,
"step": 159900
},
{
"epoch": 51.71299288946348,
"grad_norm": 1.8020479679107666,
"learning_rate": 0.001,
"loss": 1.7752,
"step": 160000
},
{
"epoch": 51.74531351001939,
"grad_norm": 1.5402014255523682,
"learning_rate": 0.001,
"loss": 1.7729,
"step": 160100
},
{
"epoch": 51.77763413057531,
"grad_norm": 1.6112022399902344,
"learning_rate": 0.001,
"loss": 1.7996,
"step": 160200
},
{
"epoch": 51.80995475113122,
"grad_norm": 1.7574292421340942,
"learning_rate": 0.001,
"loss": 1.7823,
"step": 160300
},
{
"epoch": 51.84227537168714,
"grad_norm": 1.387109398841858,
"learning_rate": 0.001,
"loss": 1.7757,
"step": 160400
},
{
"epoch": 51.87459599224305,
"grad_norm": 1.5390779972076416,
"learning_rate": 0.001,
"loss": 1.8018,
"step": 160500
},
{
"epoch": 51.906916612798966,
"grad_norm": 1.2343939542770386,
"learning_rate": 0.001,
"loss": 1.7902,
"step": 160600
},
{
"epoch": 51.93923723335488,
"grad_norm": 1.5530798435211182,
"learning_rate": 0.001,
"loss": 1.794,
"step": 160700
},
{
"epoch": 51.971557853910795,
"grad_norm": 1.6617400646209717,
"learning_rate": 0.001,
"loss": 1.8228,
"step": 160800
},
{
"epoch": 52.00387847446671,
"grad_norm": 1.4502238035202026,
"learning_rate": 0.001,
"loss": 1.8277,
"step": 160900
},
{
"epoch": 52.036199095022624,
"grad_norm": 1.5478730201721191,
"learning_rate": 0.001,
"loss": 1.6583,
"step": 161000
},
{
"epoch": 52.06851971557854,
"grad_norm": 1.4118196964263916,
"learning_rate": 0.001,
"loss": 1.6602,
"step": 161100
},
{
"epoch": 52.10084033613445,
"grad_norm": 1.292994737625122,
"learning_rate": 0.001,
"loss": 1.7036,
"step": 161200
},
{
"epoch": 52.13316095669037,
"grad_norm": 1.2106192111968994,
"learning_rate": 0.001,
"loss": 1.7012,
"step": 161300
},
{
"epoch": 52.16548157724628,
"grad_norm": 1.943745493888855,
"learning_rate": 0.001,
"loss": 1.6879,
"step": 161400
},
{
"epoch": 52.1978021978022,
"grad_norm": 1.3990156650543213,
"learning_rate": 0.001,
"loss": 1.7025,
"step": 161500
},
{
"epoch": 52.23012281835811,
"grad_norm": 1.3725271224975586,
"learning_rate": 0.001,
"loss": 1.7142,
"step": 161600
},
{
"epoch": 52.262443438914026,
"grad_norm": 1.4279320240020752,
"learning_rate": 0.001,
"loss": 1.7224,
"step": 161700
},
{
"epoch": 52.29476405946994,
"grad_norm": 1.4990894794464111,
"learning_rate": 0.001,
"loss": 1.7182,
"step": 161800
},
{
"epoch": 52.327084680025855,
"grad_norm": 1.810198426246643,
"learning_rate": 0.001,
"loss": 1.7324,
"step": 161900
},
{
"epoch": 52.35940530058177,
"grad_norm": 1.5801573991775513,
"learning_rate": 0.001,
"loss": 1.7286,
"step": 162000
},
{
"epoch": 52.391725921137684,
"grad_norm": 1.611038088798523,
"learning_rate": 0.001,
"loss": 1.7272,
"step": 162100
},
{
"epoch": 52.4240465416936,
"grad_norm": 1.4676355123519897,
"learning_rate": 0.001,
"loss": 1.728,
"step": 162200
},
{
"epoch": 52.456367162249514,
"grad_norm": 1.0789649486541748,
"learning_rate": 0.001,
"loss": 1.7314,
"step": 162300
},
{
"epoch": 52.48868778280543,
"grad_norm": 1.4516468048095703,
"learning_rate": 0.001,
"loss": 1.7339,
"step": 162400
},
{
"epoch": 52.52100840336134,
"grad_norm": 1.4005082845687866,
"learning_rate": 0.001,
"loss": 1.7325,
"step": 162500
},
{
"epoch": 52.55332902391726,
"grad_norm": 1.4502547979354858,
"learning_rate": 0.001,
"loss": 1.7495,
"step": 162600
},
{
"epoch": 52.58564964447317,
"grad_norm": 1.5237691402435303,
"learning_rate": 0.001,
"loss": 1.7472,
"step": 162700
},
{
"epoch": 52.617970265029086,
"grad_norm": 1.3226033449172974,
"learning_rate": 0.001,
"loss": 1.7567,
"step": 162800
},
{
"epoch": 52.650290885585,
"grad_norm": 1.2818242311477661,
"learning_rate": 0.001,
"loss": 1.7681,
"step": 162900
},
{
"epoch": 52.682611506140915,
"grad_norm": 1.0916748046875,
"learning_rate": 0.001,
"loss": 1.7691,
"step": 163000
},
{
"epoch": 52.71493212669683,
"grad_norm": 1.3001798391342163,
"learning_rate": 0.001,
"loss": 1.7714,
"step": 163100
},
{
"epoch": 52.747252747252745,
"grad_norm": 1.2963659763336182,
"learning_rate": 0.001,
"loss": 1.7761,
"step": 163200
},
{
"epoch": 52.77957336780866,
"grad_norm": 1.3647947311401367,
"learning_rate": 0.001,
"loss": 1.7662,
"step": 163300
},
{
"epoch": 52.811893988364574,
"grad_norm": 0.9298973679542542,
"learning_rate": 0.001,
"loss": 1.7631,
"step": 163400
},
{
"epoch": 52.84421460892049,
"grad_norm": 1.4588956832885742,
"learning_rate": 0.001,
"loss": 1.7802,
"step": 163500
},
{
"epoch": 52.8765352294764,
"grad_norm": 1.4117836952209473,
"learning_rate": 0.001,
"loss": 1.7943,
"step": 163600
},
{
"epoch": 52.90885585003232,
"grad_norm": 1.3966608047485352,
"learning_rate": 0.001,
"loss": 1.7876,
"step": 163700
},
{
"epoch": 52.94117647058823,
"grad_norm": 1.2959389686584473,
"learning_rate": 0.001,
"loss": 1.7929,
"step": 163800
},
{
"epoch": 52.97349709114415,
"grad_norm": 1.3205302953720093,
"learning_rate": 0.001,
"loss": 1.8011,
"step": 163900
},
{
"epoch": 53.00581771170007,
"grad_norm": 1.3091896772384644,
"learning_rate": 0.001,
"loss": 1.7603,
"step": 164000
},
{
"epoch": 53.03813833225598,
"grad_norm": 1.2790523767471313,
"learning_rate": 0.001,
"loss": 1.6707,
"step": 164100
},
{
"epoch": 53.0704589528119,
"grad_norm": 1.5966098308563232,
"learning_rate": 0.001,
"loss": 1.6345,
"step": 164200
},
{
"epoch": 53.10277957336781,
"grad_norm": 1.3328355550765991,
"learning_rate": 0.001,
"loss": 1.6752,
"step": 164300
},
{
"epoch": 53.135100193923726,
"grad_norm": 1.163203239440918,
"learning_rate": 0.001,
"loss": 1.6842,
"step": 164400
},
{
"epoch": 53.16742081447964,
"grad_norm": 1.2743290662765503,
"learning_rate": 0.001,
"loss": 1.6861,
"step": 164500
},
{
"epoch": 53.199741435035556,
"grad_norm": 1.0141232013702393,
"learning_rate": 0.001,
"loss": 1.6939,
"step": 164600
},
{
"epoch": 53.23206205559147,
"grad_norm": 1.482993483543396,
"learning_rate": 0.001,
"loss": 1.6931,
"step": 164700
},
{
"epoch": 53.264382676147385,
"grad_norm": 1.7234373092651367,
"learning_rate": 0.001,
"loss": 1.7039,
"step": 164800
},
{
"epoch": 53.2967032967033,
"grad_norm": 1.4995521306991577,
"learning_rate": 0.001,
"loss": 1.7132,
"step": 164900
},
{
"epoch": 53.329023917259214,
"grad_norm": 1.4815672636032104,
"learning_rate": 0.001,
"loss": 1.724,
"step": 165000
},
{
"epoch": 53.36134453781513,
"grad_norm": 1.2232056856155396,
"learning_rate": 0.001,
"loss": 1.7088,
"step": 165100
},
{
"epoch": 53.39366515837104,
"grad_norm": 1.2571676969528198,
"learning_rate": 0.001,
"loss": 1.7178,
"step": 165200
},
{
"epoch": 53.42598577892696,
"grad_norm": 1.4687373638153076,
"learning_rate": 0.001,
"loss": 1.7219,
"step": 165300
},
{
"epoch": 53.45830639948287,
"grad_norm": 1.3377008438110352,
"learning_rate": 0.001,
"loss": 1.7198,
"step": 165400
},
{
"epoch": 53.49062702003879,
"grad_norm": 1.3167778253555298,
"learning_rate": 0.001,
"loss": 1.7242,
"step": 165500
},
{
"epoch": 53.5229476405947,
"grad_norm": 1.419568419456482,
"learning_rate": 0.001,
"loss": 1.7394,
"step": 165600
},
{
"epoch": 53.555268261150616,
"grad_norm": 1.1394786834716797,
"learning_rate": 0.001,
"loss": 1.7384,
"step": 165700
},
{
"epoch": 53.58758888170653,
"grad_norm": 1.2440487146377563,
"learning_rate": 0.001,
"loss": 1.7182,
"step": 165800
},
{
"epoch": 53.619909502262445,
"grad_norm": 1.3777581453323364,
"learning_rate": 0.001,
"loss": 1.7425,
"step": 165900
},
{
"epoch": 53.65223012281836,
"grad_norm": 1.3118562698364258,
"learning_rate": 0.001,
"loss": 1.7385,
"step": 166000
},
{
"epoch": 53.684550743374274,
"grad_norm": 1.4936604499816895,
"learning_rate": 0.001,
"loss": 1.7678,
"step": 166100
},
{
"epoch": 53.71687136393019,
"grad_norm": 1.2285600900650024,
"learning_rate": 0.001,
"loss": 1.7554,
"step": 166200
},
{
"epoch": 53.7491919844861,
"grad_norm": 1.1528136730194092,
"learning_rate": 0.001,
"loss": 1.7527,
"step": 166300
},
{
"epoch": 53.78151260504202,
"grad_norm": 1.2130924463272095,
"learning_rate": 0.001,
"loss": 1.7759,
"step": 166400
},
{
"epoch": 53.81383322559793,
"grad_norm": 1.3952350616455078,
"learning_rate": 0.001,
"loss": 1.7483,
"step": 166500
},
{
"epoch": 53.84615384615385,
"grad_norm": 1.3838261365890503,
"learning_rate": 0.001,
"loss": 1.7562,
"step": 166600
},
{
"epoch": 53.87847446670976,
"grad_norm": 1.4428045749664307,
"learning_rate": 0.001,
"loss": 1.7718,
"step": 166700
},
{
"epoch": 53.910795087265676,
"grad_norm": 0.8531396389007568,
"learning_rate": 0.001,
"loss": 1.7756,
"step": 166800
},
{
"epoch": 53.94311570782159,
"grad_norm": 1.3588244915008545,
"learning_rate": 0.001,
"loss": 1.7818,
"step": 166900
},
{
"epoch": 53.975436328377505,
"grad_norm": 1.445267915725708,
"learning_rate": 0.001,
"loss": 1.786,
"step": 167000
},
{
"epoch": 54.00775694893342,
"grad_norm": 1.230377435684204,
"learning_rate": 0.001,
"loss": 1.7735,
"step": 167100
},
{
"epoch": 54.040077569489334,
"grad_norm": 1.4522416591644287,
"learning_rate": 0.001,
"loss": 1.6596,
"step": 167200
},
{
"epoch": 54.07239819004525,
"grad_norm": 1.166033148765564,
"learning_rate": 0.001,
"loss": 1.6483,
"step": 167300
},
{
"epoch": 54.10471881060116,
"grad_norm": 1.9019384384155273,
"learning_rate": 0.001,
"loss": 1.6614,
"step": 167400
},
{
"epoch": 54.13703943115708,
"grad_norm": 1.1690678596496582,
"learning_rate": 0.001,
"loss": 1.6648,
"step": 167500
},
{
"epoch": 54.16936005171299,
"grad_norm": 1.9268842935562134,
"learning_rate": 0.001,
"loss": 1.6621,
"step": 167600
},
{
"epoch": 54.20168067226891,
"grad_norm": 1.696664810180664,
"learning_rate": 0.001,
"loss": 1.6677,
"step": 167700
},
{
"epoch": 54.23400129282482,
"grad_norm": 1.063439965248108,
"learning_rate": 0.001,
"loss": 1.6917,
"step": 167800
},
{
"epoch": 54.266321913380736,
"grad_norm": 1.134600281715393,
"learning_rate": 0.001,
"loss": 1.6965,
"step": 167900
},
{
"epoch": 54.29864253393665,
"grad_norm": 1.2934255599975586,
"learning_rate": 0.001,
"loss": 1.689,
"step": 168000
},
{
"epoch": 54.330963154492565,
"grad_norm": 2.243715524673462,
"learning_rate": 0.001,
"loss": 1.6987,
"step": 168100
},
{
"epoch": 54.36328377504848,
"grad_norm": 1.3866597414016724,
"learning_rate": 0.001,
"loss": 1.702,
"step": 168200
},
{
"epoch": 54.395604395604394,
"grad_norm": 1.2409111261367798,
"learning_rate": 0.001,
"loss": 1.7087,
"step": 168300
},
{
"epoch": 54.42792501616031,
"grad_norm": 1.5567268133163452,
"learning_rate": 0.001,
"loss": 1.7126,
"step": 168400
},
{
"epoch": 54.46024563671622,
"grad_norm": 1.2952624559402466,
"learning_rate": 0.001,
"loss": 1.7203,
"step": 168500
},
{
"epoch": 54.49256625727214,
"grad_norm": 1.1444261074066162,
"learning_rate": 0.001,
"loss": 1.708,
"step": 168600
},
{
"epoch": 54.52488687782805,
"grad_norm": 1.1505794525146484,
"learning_rate": 0.001,
"loss": 1.7202,
"step": 168700
},
{
"epoch": 54.55720749838397,
"grad_norm": 1.1038155555725098,
"learning_rate": 0.001,
"loss": 1.7309,
"step": 168800
},
{
"epoch": 54.58952811893988,
"grad_norm": 1.279555320739746,
"learning_rate": 0.001,
"loss": 1.7425,
"step": 168900
},
{
"epoch": 54.621848739495796,
"grad_norm": 1.2210204601287842,
"learning_rate": 0.001,
"loss": 1.7291,
"step": 169000
},
{
"epoch": 54.65416936005171,
"grad_norm": 1.162758231163025,
"learning_rate": 0.001,
"loss": 1.7489,
"step": 169100
},
{
"epoch": 54.686489980607625,
"grad_norm": 1.2171443700790405,
"learning_rate": 0.001,
"loss": 1.7409,
"step": 169200
},
{
"epoch": 54.71881060116354,
"grad_norm": 1.1131047010421753,
"learning_rate": 0.001,
"loss": 1.7244,
"step": 169300
},
{
"epoch": 54.751131221719454,
"grad_norm": 1.2578123807907104,
"learning_rate": 0.001,
"loss": 1.7508,
"step": 169400
},
{
"epoch": 54.78345184227537,
"grad_norm": 1.4146089553833008,
"learning_rate": 0.001,
"loss": 1.7498,
"step": 169500
},
{
"epoch": 54.81577246283128,
"grad_norm": 1.197304129600525,
"learning_rate": 0.001,
"loss": 1.756,
"step": 169600
},
{
"epoch": 54.8480930833872,
"grad_norm": 1.5083588361740112,
"learning_rate": 0.001,
"loss": 1.7443,
"step": 169700
},
{
"epoch": 54.88041370394311,
"grad_norm": 1.3559470176696777,
"learning_rate": 0.001,
"loss": 1.7423,
"step": 169800
},
{
"epoch": 54.91273432449903,
"grad_norm": 1.4131273031234741,
"learning_rate": 0.001,
"loss": 1.7629,
"step": 169900
},
{
"epoch": 54.94505494505494,
"grad_norm": 1.3871897459030151,
"learning_rate": 0.001,
"loss": 1.774,
"step": 170000
},
{
"epoch": 54.977375565610856,
"grad_norm": 1.2662379741668701,
"learning_rate": 0.001,
"loss": 1.7679,
"step": 170100
},
{
"epoch": 55.00969618616678,
"grad_norm": 1.1516029834747314,
"learning_rate": 0.001,
"loss": 1.7404,
"step": 170200
},
{
"epoch": 55.04201680672269,
"grad_norm": 1.3849647045135498,
"learning_rate": 0.001,
"loss": 1.6419,
"step": 170300
},
{
"epoch": 55.07433742727861,
"grad_norm": 1.3158921003341675,
"learning_rate": 0.001,
"loss": 1.6436,
"step": 170400
},
{
"epoch": 55.10665804783452,
"grad_norm": 1.2415001392364502,
"learning_rate": 0.001,
"loss": 1.6498,
"step": 170500
},
{
"epoch": 55.138978668390436,
"grad_norm": 1.701054334640503,
"learning_rate": 0.001,
"loss": 1.6463,
"step": 170600
},
{
"epoch": 55.17129928894635,
"grad_norm": 1.5335909128189087,
"learning_rate": 0.001,
"loss": 1.6358,
"step": 170700
},
{
"epoch": 55.203619909502265,
"grad_norm": 1.6681565046310425,
"learning_rate": 0.001,
"loss": 1.6698,
"step": 170800
},
{
"epoch": 55.23594053005818,
"grad_norm": 1.212498426437378,
"learning_rate": 0.001,
"loss": 1.6935,
"step": 170900
},
{
"epoch": 55.268261150614094,
"grad_norm": 1.3680095672607422,
"learning_rate": 0.001,
"loss": 1.6751,
"step": 171000
},
{
"epoch": 55.30058177117001,
"grad_norm": 1.35792076587677,
"learning_rate": 0.001,
"loss": 1.6836,
"step": 171100
},
{
"epoch": 55.33290239172592,
"grad_norm": 1.379514217376709,
"learning_rate": 0.001,
"loss": 1.6869,
"step": 171200
},
{
"epoch": 55.36522301228184,
"grad_norm": 1.1992942094802856,
"learning_rate": 0.001,
"loss": 1.679,
"step": 171300
},
{
"epoch": 55.39754363283775,
"grad_norm": 1.0317083597183228,
"learning_rate": 0.001,
"loss": 1.7091,
"step": 171400
},
{
"epoch": 55.42986425339367,
"grad_norm": 1.0589741468429565,
"learning_rate": 0.001,
"loss": 1.7057,
"step": 171500
},
{
"epoch": 55.46218487394958,
"grad_norm": 1.4371219873428345,
"learning_rate": 0.001,
"loss": 1.706,
"step": 171600
},
{
"epoch": 55.494505494505496,
"grad_norm": 1.698289155960083,
"learning_rate": 0.001,
"loss": 1.7089,
"step": 171700
},
{
"epoch": 55.52682611506141,
"grad_norm": 1.3261172771453857,
"learning_rate": 0.001,
"loss": 1.6997,
"step": 171800
},
{
"epoch": 55.559146735617325,
"grad_norm": 1.3861745595932007,
"learning_rate": 0.001,
"loss": 1.718,
"step": 171900
},
{
"epoch": 55.59146735617324,
"grad_norm": 1.6267752647399902,
"learning_rate": 0.001,
"loss": 1.7208,
"step": 172000
},
{
"epoch": 55.623787976729155,
"grad_norm": 1.1328678131103516,
"learning_rate": 0.001,
"loss": 1.7223,
"step": 172100
},
{
"epoch": 55.65610859728507,
"grad_norm": 1.0877766609191895,
"learning_rate": 0.001,
"loss": 1.7178,
"step": 172200
},
{
"epoch": 55.688429217840984,
"grad_norm": 1.3555140495300293,
"learning_rate": 0.001,
"loss": 1.7219,
"step": 172300
},
{
"epoch": 55.7207498383969,
"grad_norm": 1.6600762605667114,
"learning_rate": 0.001,
"loss": 1.7403,
"step": 172400
},
{
"epoch": 55.75307045895281,
"grad_norm": 1.2952337265014648,
"learning_rate": 0.001,
"loss": 1.7195,
"step": 172500
},
{
"epoch": 55.78539107950873,
"grad_norm": 1.478588342666626,
"learning_rate": 0.001,
"loss": 1.7345,
"step": 172600
},
{
"epoch": 55.81771170006464,
"grad_norm": 1.2930961847305298,
"learning_rate": 0.001,
"loss": 1.7363,
"step": 172700
},
{
"epoch": 55.85003232062056,
"grad_norm": 1.9322788715362549,
"learning_rate": 0.001,
"loss": 1.7384,
"step": 172800
},
{
"epoch": 55.88235294117647,
"grad_norm": 1.3968391418457031,
"learning_rate": 0.001,
"loss": 1.7524,
"step": 172900
},
{
"epoch": 55.914673561732386,
"grad_norm": 1.6979691982269287,
"learning_rate": 0.001,
"loss": 1.7424,
"step": 173000
},
{
"epoch": 55.9469941822883,
"grad_norm": 1.0870234966278076,
"learning_rate": 0.001,
"loss": 1.7632,
"step": 173100
},
{
"epoch": 55.979314802844215,
"grad_norm": 1.294993281364441,
"learning_rate": 0.001,
"loss": 1.7644,
"step": 173200
},
{
"epoch": 56.01163542340013,
"grad_norm": 1.5237852334976196,
"learning_rate": 0.001,
"loss": 1.7015,
"step": 173300
},
{
"epoch": 56.043956043956044,
"grad_norm": 1.554025650024414,
"learning_rate": 0.001,
"loss": 1.6182,
"step": 173400
},
{
"epoch": 56.07627666451196,
"grad_norm": 1.3839563131332397,
"learning_rate": 0.001,
"loss": 1.6317,
"step": 173500
},
{
"epoch": 56.10859728506787,
"grad_norm": 1.6912566423416138,
"learning_rate": 0.001,
"loss": 1.6477,
"step": 173600
},
{
"epoch": 56.14091790562379,
"grad_norm": 1.4600452184677124,
"learning_rate": 0.001,
"loss": 1.6555,
"step": 173700
},
{
"epoch": 56.1732385261797,
"grad_norm": 1.976299524307251,
"learning_rate": 0.001,
"loss": 1.6481,
"step": 173800
},
{
"epoch": 56.20555914673562,
"grad_norm": 1.5101046562194824,
"learning_rate": 0.001,
"loss": 1.6432,
"step": 173900
},
{
"epoch": 56.23787976729153,
"grad_norm": 1.807742953300476,
"learning_rate": 0.001,
"loss": 1.6469,
"step": 174000
},
{
"epoch": 56.270200387847446,
"grad_norm": 1.5435492992401123,
"learning_rate": 0.001,
"loss": 1.66,
"step": 174100
},
{
"epoch": 56.30252100840336,
"grad_norm": 1.4853410720825195,
"learning_rate": 0.001,
"loss": 1.6784,
"step": 174200
},
{
"epoch": 56.334841628959275,
"grad_norm": 1.39516019821167,
"learning_rate": 0.001,
"loss": 1.6785,
"step": 174300
},
{
"epoch": 56.36716224951519,
"grad_norm": 1.7564011812210083,
"learning_rate": 0.001,
"loss": 1.6742,
"step": 174400
},
{
"epoch": 56.399482870071104,
"grad_norm": 1.4495413303375244,
"learning_rate": 0.001,
"loss": 1.6787,
"step": 174500
},
{
"epoch": 56.43180349062702,
"grad_norm": 1.738829493522644,
"learning_rate": 0.001,
"loss": 1.6904,
"step": 174600
},
{
"epoch": 56.46412411118293,
"grad_norm": 1.2273107767105103,
"learning_rate": 0.001,
"loss": 1.6761,
"step": 174700
},
{
"epoch": 56.49644473173885,
"grad_norm": 1.4229944944381714,
"learning_rate": 0.001,
"loss": 1.6872,
"step": 174800
},
{
"epoch": 56.52876535229476,
"grad_norm": 1.1950942277908325,
"learning_rate": 0.001,
"loss": 1.6886,
"step": 174900
},
{
"epoch": 56.56108597285068,
"grad_norm": 1.4346133470535278,
"learning_rate": 0.001,
"loss": 1.7026,
"step": 175000
},
{
"epoch": 56.59340659340659,
"grad_norm": 1.2384557723999023,
"learning_rate": 0.001,
"loss": 1.7176,
"step": 175100
},
{
"epoch": 56.625727213962506,
"grad_norm": 1.3948698043823242,
"learning_rate": 0.001,
"loss": 1.7093,
"step": 175200
},
{
"epoch": 56.65804783451842,
"grad_norm": 1.547871708869934,
"learning_rate": 0.001,
"loss": 1.7273,
"step": 175300
},
{
"epoch": 56.690368455074335,
"grad_norm": 1.5837923288345337,
"learning_rate": 0.001,
"loss": 1.706,
"step": 175400
},
{
"epoch": 56.72268907563025,
"grad_norm": 1.3171043395996094,
"learning_rate": 0.001,
"loss": 1.7245,
"step": 175500
},
{
"epoch": 56.755009696186164,
"grad_norm": 1.7027974128723145,
"learning_rate": 0.001,
"loss": 1.72,
"step": 175600
},
{
"epoch": 56.78733031674208,
"grad_norm": 1.5932236909866333,
"learning_rate": 0.001,
"loss": 1.731,
"step": 175700
},
{
"epoch": 56.81965093729799,
"grad_norm": 1.2864384651184082,
"learning_rate": 0.001,
"loss": 1.732,
"step": 175800
},
{
"epoch": 56.85197155785391,
"grad_norm": 1.7263994216918945,
"learning_rate": 0.001,
"loss": 1.7377,
"step": 175900
},
{
"epoch": 56.88429217840982,
"grad_norm": 1.3134857416152954,
"learning_rate": 0.001,
"loss": 1.7407,
"step": 176000
},
{
"epoch": 56.91661279896574,
"grad_norm": 1.2284256219863892,
"learning_rate": 0.001,
"loss": 1.7405,
"step": 176100
},
{
"epoch": 56.94893341952165,
"grad_norm": 1.7951955795288086,
"learning_rate": 0.001,
"loss": 1.7399,
"step": 176200
},
{
"epoch": 56.981254040077566,
"grad_norm": 1.8334953784942627,
"learning_rate": 0.001,
"loss": 1.7412,
"step": 176300
},
{
"epoch": 57.01357466063349,
"grad_norm": 1.490244746208191,
"learning_rate": 0.001,
"loss": 1.6897,
"step": 176400
},
{
"epoch": 57.0458952811894,
"grad_norm": 1.785170316696167,
"learning_rate": 0.001,
"loss": 1.6153,
"step": 176500
},
{
"epoch": 57.07821590174532,
"grad_norm": 2.492884635925293,
"learning_rate": 0.001,
"loss": 1.6217,
"step": 176600
},
{
"epoch": 57.11053652230123,
"grad_norm": 2.1509578227996826,
"learning_rate": 0.001,
"loss": 1.6313,
"step": 176700
},
{
"epoch": 57.142857142857146,
"grad_norm": 1.789116382598877,
"learning_rate": 0.001,
"loss": 1.6308,
"step": 176800
},
{
"epoch": 57.17517776341306,
"grad_norm": 1.6912237405776978,
"learning_rate": 0.001,
"loss": 1.6443,
"step": 176900
},
{
"epoch": 57.207498383968975,
"grad_norm": 1.6607928276062012,
"learning_rate": 0.001,
"loss": 1.6519,
"step": 177000
},
{
"epoch": 57.23981900452489,
"grad_norm": 1.7817693948745728,
"learning_rate": 0.001,
"loss": 1.6476,
"step": 177100
},
{
"epoch": 57.272139625080804,
"grad_norm": 1.3845174312591553,
"learning_rate": 0.001,
"loss": 1.6536,
"step": 177200
},
{
"epoch": 57.30446024563672,
"grad_norm": 1.780415415763855,
"learning_rate": 0.001,
"loss": 1.6431,
"step": 177300
},
{
"epoch": 57.33678086619263,
"grad_norm": 1.345583438873291,
"learning_rate": 0.001,
"loss": 1.6722,
"step": 177400
},
{
"epoch": 57.36910148674855,
"grad_norm": 2.1150074005126953,
"learning_rate": 0.001,
"loss": 1.6676,
"step": 177500
},
{
"epoch": 57.40142210730446,
"grad_norm": 1.7680250406265259,
"learning_rate": 0.001,
"loss": 1.6723,
"step": 177600
},
{
"epoch": 57.43374272786038,
"grad_norm": 1.5597569942474365,
"learning_rate": 0.001,
"loss": 1.6801,
"step": 177700
},
{
"epoch": 57.46606334841629,
"grad_norm": 1.5379387140274048,
"learning_rate": 0.001,
"loss": 1.6728,
"step": 177800
},
{
"epoch": 57.498383968972206,
"grad_norm": 1.645971655845642,
"learning_rate": 0.001,
"loss": 1.6907,
"step": 177900
},
{
"epoch": 57.53070458952812,
"grad_norm": 1.28190016746521,
"learning_rate": 0.001,
"loss": 1.6902,
"step": 178000
},
{
"epoch": 57.563025210084035,
"grad_norm": 2.0813686847686768,
"learning_rate": 0.001,
"loss": 1.6944,
"step": 178100
},
{
"epoch": 57.59534583063995,
"grad_norm": 1.533876895904541,
"learning_rate": 0.001,
"loss": 1.6836,
"step": 178200
},
{
"epoch": 57.627666451195864,
"grad_norm": 1.5302281379699707,
"learning_rate": 0.001,
"loss": 1.7007,
"step": 178300
},
{
"epoch": 57.65998707175178,
"grad_norm": 1.3833650350570679,
"learning_rate": 0.001,
"loss": 1.6855,
"step": 178400
},
{
"epoch": 57.69230769230769,
"grad_norm": 1.61258065700531,
"learning_rate": 0.001,
"loss": 1.7141,
"step": 178500
},
{
"epoch": 57.72462831286361,
"grad_norm": 1.4593497514724731,
"learning_rate": 0.001,
"loss": 1.6981,
"step": 178600
},
{
"epoch": 57.75694893341952,
"grad_norm": 1.5033830404281616,
"learning_rate": 0.001,
"loss": 1.7213,
"step": 178700
},
{
"epoch": 57.78926955397544,
"grad_norm": 1.7307718992233276,
"learning_rate": 0.001,
"loss": 1.7133,
"step": 178800
},
{
"epoch": 57.82159017453135,
"grad_norm": 1.674391746520996,
"learning_rate": 0.001,
"loss": 1.7114,
"step": 178900
},
{
"epoch": 57.853910795087266,
"grad_norm": 1.642048716545105,
"learning_rate": 0.001,
"loss": 1.7125,
"step": 179000
},
{
"epoch": 57.88623141564318,
"grad_norm": 1.9265817403793335,
"learning_rate": 0.001,
"loss": 1.7243,
"step": 179100
},
{
"epoch": 57.918552036199095,
"grad_norm": 1.534619927406311,
"learning_rate": 0.001,
"loss": 1.7354,
"step": 179200
},
{
"epoch": 57.95087265675501,
"grad_norm": 1.3853662014007568,
"learning_rate": 0.001,
"loss": 1.7486,
"step": 179300
},
{
"epoch": 57.983193277310924,
"grad_norm": 1.3320893049240112,
"learning_rate": 0.001,
"loss": 1.7432,
"step": 179400
},
{
"epoch": 58.01551389786684,
"grad_norm": 1.929592490196228,
"learning_rate": 0.001,
"loss": 1.6612,
"step": 179500
},
{
"epoch": 58.04783451842275,
"grad_norm": 1.2655434608459473,
"learning_rate": 0.001,
"loss": 1.6113,
"step": 179600
},
{
"epoch": 58.08015513897867,
"grad_norm": 1.8946388959884644,
"learning_rate": 0.001,
"loss": 1.6196,
"step": 179700
},
{
"epoch": 58.11247575953458,
"grad_norm": 1.6740760803222656,
"learning_rate": 0.001,
"loss": 1.5944,
"step": 179800
},
{
"epoch": 58.1447963800905,
"grad_norm": 1.9058560132980347,
"learning_rate": 0.001,
"loss": 1.6275,
"step": 179900
},
{
"epoch": 58.17711700064641,
"grad_norm": 1.8207988739013672,
"learning_rate": 0.001,
"loss": 1.6079,
"step": 180000
},
{
"epoch": 58.209437621202326,
"grad_norm": 1.7238407135009766,
"learning_rate": 0.001,
"loss": 1.6269,
"step": 180100
},
{
"epoch": 58.24175824175824,
"grad_norm": 1.809914231300354,
"learning_rate": 0.001,
"loss": 1.6507,
"step": 180200
},
{
"epoch": 58.274078862314155,
"grad_norm": 1.8771950006484985,
"learning_rate": 0.001,
"loss": 1.6316,
"step": 180300
},
{
"epoch": 58.30639948287007,
"grad_norm": 2.1245312690734863,
"learning_rate": 0.001,
"loss": 1.6455,
"step": 180400
},
{
"epoch": 58.338720103425985,
"grad_norm": 1.507102608680725,
"learning_rate": 0.001,
"loss": 1.662,
"step": 180500
},
{
"epoch": 58.3710407239819,
"grad_norm": 1.8217893838882446,
"learning_rate": 0.001,
"loss": 1.6656,
"step": 180600
},
{
"epoch": 58.403361344537814,
"grad_norm": 1.8754676580429077,
"learning_rate": 0.001,
"loss": 1.6481,
"step": 180700
},
{
"epoch": 58.43568196509373,
"grad_norm": 1.8230457305908203,
"learning_rate": 0.001,
"loss": 1.6674,
"step": 180800
},
{
"epoch": 58.46800258564964,
"grad_norm": 2.2292656898498535,
"learning_rate": 0.001,
"loss": 1.6704,
"step": 180900
},
{
"epoch": 58.50032320620556,
"grad_norm": 1.9867464303970337,
"learning_rate": 0.001,
"loss": 1.669,
"step": 181000
},
{
"epoch": 58.53264382676147,
"grad_norm": 1.8236154317855835,
"learning_rate": 0.001,
"loss": 1.6707,
"step": 181100
},
{
"epoch": 58.56496444731739,
"grad_norm": 1.3619102239608765,
"learning_rate": 0.001,
"loss": 1.6735,
"step": 181200
},
{
"epoch": 58.5972850678733,
"grad_norm": 1.4769443273544312,
"learning_rate": 0.001,
"loss": 1.6882,
"step": 181300
},
{
"epoch": 58.629605688429216,
"grad_norm": 2.5227391719818115,
"learning_rate": 0.001,
"loss": 1.6807,
"step": 181400
},
{
"epoch": 58.66192630898513,
"grad_norm": 1.9169458150863647,
"learning_rate": 0.001,
"loss": 1.6877,
"step": 181500
},
{
"epoch": 58.694246929541045,
"grad_norm": 1.9800595045089722,
"learning_rate": 0.001,
"loss": 1.6846,
"step": 181600
},
{
"epoch": 58.72656755009696,
"grad_norm": 1.5677974224090576,
"learning_rate": 0.001,
"loss": 1.6981,
"step": 181700
},
{
"epoch": 58.758888170652874,
"grad_norm": 1.9665014743804932,
"learning_rate": 0.001,
"loss": 1.6963,
"step": 181800
},
{
"epoch": 58.79120879120879,
"grad_norm": 2.1127920150756836,
"learning_rate": 0.001,
"loss": 1.7346,
"step": 181900
},
{
"epoch": 58.8235294117647,
"grad_norm": 1.8396202325820923,
"learning_rate": 0.001,
"loss": 1.7076,
"step": 182000
},
{
"epoch": 58.85585003232062,
"grad_norm": 2.098335027694702,
"learning_rate": 0.001,
"loss": 1.7025,
"step": 182100
},
{
"epoch": 58.88817065287653,
"grad_norm": 1.597739338874817,
"learning_rate": 0.001,
"loss": 1.7145,
"step": 182200
},
{
"epoch": 58.92049127343245,
"grad_norm": 1.9156646728515625,
"learning_rate": 0.001,
"loss": 1.7235,
"step": 182300
},
{
"epoch": 58.95281189398836,
"grad_norm": 2.0628559589385986,
"learning_rate": 0.001,
"loss": 1.7255,
"step": 182400
},
{
"epoch": 58.985132514544276,
"grad_norm": 1.4248301982879639,
"learning_rate": 0.001,
"loss": 1.7222,
"step": 182500
},
{
"epoch": 59.0174531351002,
"grad_norm": 1.9326345920562744,
"learning_rate": 0.001,
"loss": 1.652,
"step": 182600
},
{
"epoch": 59.04977375565611,
"grad_norm": 1.6768263578414917,
"learning_rate": 0.001,
"loss": 1.5961,
"step": 182700
},
{
"epoch": 59.08209437621203,
"grad_norm": 2.4055697917938232,
"learning_rate": 0.001,
"loss": 1.5885,
"step": 182800
},
{
"epoch": 59.11441499676794,
"grad_norm": 2.089221954345703,
"learning_rate": 0.001,
"loss": 1.6047,
"step": 182900
},
{
"epoch": 59.146735617323856,
"grad_norm": 2.2153096199035645,
"learning_rate": 0.001,
"loss": 1.5914,
"step": 183000
},
{
"epoch": 59.17905623787977,
"grad_norm": 1.6174402236938477,
"learning_rate": 0.001,
"loss": 1.6094,
"step": 183100
},
{
"epoch": 59.211376858435685,
"grad_norm": 1.7545989751815796,
"learning_rate": 0.001,
"loss": 1.6415,
"step": 183200
},
{
"epoch": 59.2436974789916,
"grad_norm": 1.6180590391159058,
"learning_rate": 0.001,
"loss": 1.6332,
"step": 183300
},
{
"epoch": 59.276018099547514,
"grad_norm": 1.4139974117279053,
"learning_rate": 0.001,
"loss": 1.6373,
"step": 183400
},
{
"epoch": 59.30833872010343,
"grad_norm": 2.0663397312164307,
"learning_rate": 0.001,
"loss": 1.6387,
"step": 183500
},
{
"epoch": 59.34065934065934,
"grad_norm": 1.6867034435272217,
"learning_rate": 0.001,
"loss": 1.6548,
"step": 183600
},
{
"epoch": 59.37297996121526,
"grad_norm": 1.6726882457733154,
"learning_rate": 0.001,
"loss": 1.6555,
"step": 183700
},
{
"epoch": 59.40530058177117,
"grad_norm": 1.7690714597702026,
"learning_rate": 0.001,
"loss": 1.6526,
"step": 183800
},
{
"epoch": 59.43762120232709,
"grad_norm": 1.4407790899276733,
"learning_rate": 0.001,
"loss": 1.6619,
"step": 183900
},
{
"epoch": 59.469941822883,
"grad_norm": 1.3931773900985718,
"learning_rate": 0.001,
"loss": 1.6647,
"step": 184000
},
{
"epoch": 59.502262443438916,
"grad_norm": 1.713707447052002,
"learning_rate": 0.001,
"loss": 1.6664,
"step": 184100
},
{
"epoch": 59.53458306399483,
"grad_norm": 1.4662772417068481,
"learning_rate": 0.001,
"loss": 1.6614,
"step": 184200
},
{
"epoch": 59.566903684550745,
"grad_norm": 1.6400699615478516,
"learning_rate": 0.001,
"loss": 1.6717,
"step": 184300
},
{
"epoch": 59.59922430510666,
"grad_norm": 1.428658366203308,
"learning_rate": 0.001,
"loss": 1.6698,
"step": 184400
},
{
"epoch": 59.631544925662574,
"grad_norm": 1.601353645324707,
"learning_rate": 0.001,
"loss": 1.6647,
"step": 184500
},
{
"epoch": 59.66386554621849,
"grad_norm": 1.7401574850082397,
"learning_rate": 0.001,
"loss": 1.6853,
"step": 184600
},
{
"epoch": 59.6961861667744,
"grad_norm": 1.6120574474334717,
"learning_rate": 0.001,
"loss": 1.6766,
"step": 184700
},
{
"epoch": 59.72850678733032,
"grad_norm": 1.4168685674667358,
"learning_rate": 0.001,
"loss": 1.6836,
"step": 184800
},
{
"epoch": 59.76082740788623,
"grad_norm": 1.7387588024139404,
"learning_rate": 0.001,
"loss": 1.6833,
"step": 184900
},
{
"epoch": 59.79314802844215,
"grad_norm": 1.6142842769622803,
"learning_rate": 0.001,
"loss": 1.7001,
"step": 185000
},
{
"epoch": 59.82546864899806,
"grad_norm": 1.520094394683838,
"learning_rate": 0.001,
"loss": 1.7084,
"step": 185100
},
{
"epoch": 59.857789269553976,
"grad_norm": 1.6498136520385742,
"learning_rate": 0.001,
"loss": 1.7055,
"step": 185200
},
{
"epoch": 59.89010989010989,
"grad_norm": 1.3799690008163452,
"learning_rate": 0.001,
"loss": 1.6949,
"step": 185300
},
{
"epoch": 59.922430510665805,
"grad_norm": 1.5039490461349487,
"learning_rate": 0.001,
"loss": 1.7057,
"step": 185400
},
{
"epoch": 59.95475113122172,
"grad_norm": 2.032569408416748,
"learning_rate": 0.001,
"loss": 1.7085,
"step": 185500
},
{
"epoch": 59.987071751777634,
"grad_norm": 1.669844388961792,
"learning_rate": 0.001,
"loss": 1.7086,
"step": 185600
},
{
"epoch": 60.01939237233355,
"grad_norm": 1.638071060180664,
"learning_rate": 0.001,
"loss": 1.6557,
"step": 185700
},
{
"epoch": 60.05171299288946,
"grad_norm": 2.303903102874756,
"learning_rate": 0.001,
"loss": 1.5925,
"step": 185800
},
{
"epoch": 60.08403361344538,
"grad_norm": 1.742456316947937,
"learning_rate": 0.001,
"loss": 1.587,
"step": 185900
},
{
"epoch": 60.11635423400129,
"grad_norm": 1.2366214990615845,
"learning_rate": 0.001,
"loss": 1.6073,
"step": 186000
},
{
"epoch": 60.14867485455721,
"grad_norm": 1.7127535343170166,
"learning_rate": 0.001,
"loss": 1.6022,
"step": 186100
},
{
"epoch": 60.18099547511312,
"grad_norm": 1.181023359298706,
"learning_rate": 0.001,
"loss": 1.6293,
"step": 186200
},
{
"epoch": 60.213316095669036,
"grad_norm": 1.2192198038101196,
"learning_rate": 0.001,
"loss": 1.6217,
"step": 186300
},
{
"epoch": 60.24563671622495,
"grad_norm": 1.5543640851974487,
"learning_rate": 0.001,
"loss": 1.6107,
"step": 186400
},
{
"epoch": 60.277957336780865,
"grad_norm": 1.1227211952209473,
"learning_rate": 0.001,
"loss": 1.6365,
"step": 186500
},
{
"epoch": 60.31027795733678,
"grad_norm": 1.0597680807113647,
"learning_rate": 0.001,
"loss": 1.6166,
"step": 186600
},
{
"epoch": 60.342598577892694,
"grad_norm": 1.4818602800369263,
"learning_rate": 0.001,
"loss": 1.6446,
"step": 186700
},
{
"epoch": 60.37491919844861,
"grad_norm": 1.3480159044265747,
"learning_rate": 0.001,
"loss": 1.6248,
"step": 186800
},
{
"epoch": 60.40723981900452,
"grad_norm": 1.5055954456329346,
"learning_rate": 0.001,
"loss": 1.6269,
"step": 186900
},
{
"epoch": 60.43956043956044,
"grad_norm": 1.6578614711761475,
"learning_rate": 0.001,
"loss": 1.6322,
"step": 187000
},
{
"epoch": 60.47188106011635,
"grad_norm": 1.2708501815795898,
"learning_rate": 0.001,
"loss": 1.6435,
"step": 187100
},
{
"epoch": 60.50420168067227,
"grad_norm": 1.213304877281189,
"learning_rate": 0.001,
"loss": 1.6495,
"step": 187200
},
{
"epoch": 60.53652230122818,
"grad_norm": 1.300215721130371,
"learning_rate": 0.001,
"loss": 1.6774,
"step": 187300
},
{
"epoch": 60.568842921784096,
"grad_norm": 1.3916350603103638,
"learning_rate": 0.001,
"loss": 1.6679,
"step": 187400
},
{
"epoch": 60.60116354234001,
"grad_norm": 1.5513008832931519,
"learning_rate": 0.001,
"loss": 1.6526,
"step": 187500
},
{
"epoch": 60.633484162895925,
"grad_norm": 1.6689372062683105,
"learning_rate": 0.001,
"loss": 1.656,
"step": 187600
},
{
"epoch": 60.66580478345184,
"grad_norm": 1.6529994010925293,
"learning_rate": 0.001,
"loss": 1.6587,
"step": 187700
},
{
"epoch": 60.698125404007754,
"grad_norm": 1.4164494276046753,
"learning_rate": 0.001,
"loss": 1.6801,
"step": 187800
},
{
"epoch": 60.73044602456367,
"grad_norm": 1.7202731370925903,
"learning_rate": 0.001,
"loss": 1.674,
"step": 187900
},
{
"epoch": 60.762766645119584,
"grad_norm": 1.2557040452957153,
"learning_rate": 0.001,
"loss": 1.6891,
"step": 188000
},
{
"epoch": 60.7950872656755,
"grad_norm": 1.393248200416565,
"learning_rate": 0.001,
"loss": 1.6826,
"step": 188100
},
{
"epoch": 60.82740788623141,
"grad_norm": 1.4197131395339966,
"learning_rate": 0.001,
"loss": 1.7021,
"step": 188200
},
{
"epoch": 60.85972850678733,
"grad_norm": 1.243842363357544,
"learning_rate": 0.001,
"loss": 1.6776,
"step": 188300
},
{
"epoch": 60.89204912734324,
"grad_norm": 1.5487949848175049,
"learning_rate": 0.001,
"loss": 1.7033,
"step": 188400
},
{
"epoch": 60.924369747899156,
"grad_norm": 1.9102970361709595,
"learning_rate": 0.001,
"loss": 1.6951,
"step": 188500
},
{
"epoch": 60.95669036845507,
"grad_norm": 1.1745511293411255,
"learning_rate": 0.001,
"loss": 1.6816,
"step": 188600
},
{
"epoch": 60.98901098901099,
"grad_norm": 1.4164996147155762,
"learning_rate": 0.001,
"loss": 1.7036,
"step": 188700
},
{
"epoch": 61.02133160956691,
"grad_norm": 1.3019585609436035,
"learning_rate": 0.001,
"loss": 1.6329,
"step": 188800
},
{
"epoch": 61.05365223012282,
"grad_norm": 1.4903287887573242,
"learning_rate": 0.001,
"loss": 1.5689,
"step": 188900
},
{
"epoch": 61.085972850678736,
"grad_norm": 1.4800775051116943,
"learning_rate": 0.001,
"loss": 1.5883,
"step": 189000
},
{
"epoch": 61.11829347123465,
"grad_norm": 1.1266968250274658,
"learning_rate": 0.001,
"loss": 1.5951,
"step": 189100
},
{
"epoch": 61.150614091790565,
"grad_norm": 1.840097188949585,
"learning_rate": 0.001,
"loss": 1.6005,
"step": 189200
},
{
"epoch": 61.18293471234648,
"grad_norm": 1.932331919670105,
"learning_rate": 0.001,
"loss": 1.6065,
"step": 189300
},
{
"epoch": 61.215255332902395,
"grad_norm": 1.543431282043457,
"learning_rate": 0.001,
"loss": 1.6173,
"step": 189400
},
{
"epoch": 61.24757595345831,
"grad_norm": 1.4862014055252075,
"learning_rate": 0.001,
"loss": 1.6023,
"step": 189500
},
{
"epoch": 61.279896574014224,
"grad_norm": 1.5869604349136353,
"learning_rate": 0.001,
"loss": 1.6039,
"step": 189600
},
{
"epoch": 61.31221719457014,
"grad_norm": 1.3288321495056152,
"learning_rate": 0.001,
"loss": 1.6332,
"step": 189700
},
{
"epoch": 61.34453781512605,
"grad_norm": 1.0867267847061157,
"learning_rate": 0.001,
"loss": 1.6153,
"step": 189800
},
{
"epoch": 61.37685843568197,
"grad_norm": 1.447813630104065,
"learning_rate": 0.001,
"loss": 1.6349,
"step": 189900
},
{
"epoch": 61.40917905623788,
"grad_norm": 1.2495040893554688,
"learning_rate": 0.001,
"loss": 1.6337,
"step": 190000
},
{
"epoch": 61.441499676793796,
"grad_norm": 1.3944239616394043,
"learning_rate": 0.001,
"loss": 1.6448,
"step": 190100
},
{
"epoch": 61.47382029734971,
"grad_norm": 1.4838260412216187,
"learning_rate": 0.001,
"loss": 1.6196,
"step": 190200
},
{
"epoch": 61.506140917905626,
"grad_norm": 1.3022147417068481,
"learning_rate": 0.001,
"loss": 1.6427,
"step": 190300
},
{
"epoch": 61.53846153846154,
"grad_norm": 1.3423457145690918,
"learning_rate": 0.001,
"loss": 1.6558,
"step": 190400
},
{
"epoch": 61.570782159017455,
"grad_norm": 1.270648717880249,
"learning_rate": 0.001,
"loss": 1.6486,
"step": 190500
},
{
"epoch": 61.60310277957337,
"grad_norm": 1.671781301498413,
"learning_rate": 0.001,
"loss": 1.647,
"step": 190600
},
{
"epoch": 61.635423400129284,
"grad_norm": 1.0591039657592773,
"learning_rate": 0.001,
"loss": 1.6451,
"step": 190700
},
{
"epoch": 61.6677440206852,
"grad_norm": 1.2032123804092407,
"learning_rate": 0.001,
"loss": 1.6495,
"step": 190800
},
{
"epoch": 61.70006464124111,
"grad_norm": 1.9419327974319458,
"learning_rate": 0.001,
"loss": 1.6573,
"step": 190900
},
{
"epoch": 61.73238526179703,
"grad_norm": 1.8041101694107056,
"learning_rate": 0.001,
"loss": 1.6546,
"step": 191000
},
{
"epoch": 61.76470588235294,
"grad_norm": 1.3328889608383179,
"learning_rate": 0.001,
"loss": 1.6769,
"step": 191100
},
{
"epoch": 61.79702650290886,
"grad_norm": 1.5163036584854126,
"learning_rate": 0.001,
"loss": 1.6669,
"step": 191200
},
{
"epoch": 61.82934712346477,
"grad_norm": 1.349266767501831,
"learning_rate": 0.001,
"loss": 1.6793,
"step": 191300
},
{
"epoch": 61.861667744020686,
"grad_norm": 1.4275840520858765,
"learning_rate": 0.001,
"loss": 1.6818,
"step": 191400
},
{
"epoch": 61.8939883645766,
"grad_norm": 1.475961685180664,
"learning_rate": 0.001,
"loss": 1.6815,
"step": 191500
},
{
"epoch": 61.926308985132515,
"grad_norm": 1.5726628303527832,
"learning_rate": 0.001,
"loss": 1.6682,
"step": 191600
},
{
"epoch": 61.95862960568843,
"grad_norm": 1.38363778591156,
"learning_rate": 0.001,
"loss": 1.6794,
"step": 191700
},
{
"epoch": 61.990950226244344,
"grad_norm": 1.2430803775787354,
"learning_rate": 0.001,
"loss": 1.702,
"step": 191800
},
{
"epoch": 62.02327084680026,
"grad_norm": 1.368446707725525,
"learning_rate": 0.001,
"loss": 1.6099,
"step": 191900
},
{
"epoch": 62.05559146735617,
"grad_norm": 1.095283031463623,
"learning_rate": 0.001,
"loss": 1.5752,
"step": 192000
},
{
"epoch": 62.08791208791209,
"grad_norm": 1.393159031867981,
"learning_rate": 0.001,
"loss": 1.5772,
"step": 192100
},
{
"epoch": 62.120232708468,
"grad_norm": 1.2382171154022217,
"learning_rate": 0.001,
"loss": 1.5867,
"step": 192200
},
{
"epoch": 62.15255332902392,
"grad_norm": 1.3655904531478882,
"learning_rate": 0.001,
"loss": 1.5889,
"step": 192300
},
{
"epoch": 62.18487394957983,
"grad_norm": 1.4175256490707397,
"learning_rate": 0.001,
"loss": 1.5975,
"step": 192400
},
{
"epoch": 62.217194570135746,
"grad_norm": 1.2587052583694458,
"learning_rate": 0.001,
"loss": 1.6041,
"step": 192500
},
{
"epoch": 62.24951519069166,
"grad_norm": 1.8340458869934082,
"learning_rate": 0.001,
"loss": 1.5917,
"step": 192600
},
{
"epoch": 62.281835811247575,
"grad_norm": 1.2407337427139282,
"learning_rate": 0.001,
"loss": 1.604,
"step": 192700
},
{
"epoch": 62.31415643180349,
"grad_norm": 1.2123467922210693,
"learning_rate": 0.001,
"loss": 1.6059,
"step": 192800
},
{
"epoch": 62.346477052359404,
"grad_norm": 1.3114657402038574,
"learning_rate": 0.001,
"loss": 1.6083,
"step": 192900
},
{
"epoch": 62.37879767291532,
"grad_norm": 1.7987451553344727,
"learning_rate": 0.001,
"loss": 1.6117,
"step": 193000
},
{
"epoch": 62.41111829347123,
"grad_norm": 1.8280270099639893,
"learning_rate": 0.001,
"loss": 1.6151,
"step": 193100
},
{
"epoch": 62.44343891402715,
"grad_norm": 1.516326904296875,
"learning_rate": 0.001,
"loss": 1.63,
"step": 193200
},
{
"epoch": 62.47575953458306,
"grad_norm": 1.1470481157302856,
"learning_rate": 0.001,
"loss": 1.6373,
"step": 193300
},
{
"epoch": 62.50808015513898,
"grad_norm": 1.4997236728668213,
"learning_rate": 0.001,
"loss": 1.618,
"step": 193400
},
{
"epoch": 62.54040077569489,
"grad_norm": 1.1891118288040161,
"learning_rate": 0.001,
"loss": 1.64,
"step": 193500
},
{
"epoch": 62.572721396250806,
"grad_norm": 1.4351845979690552,
"learning_rate": 0.001,
"loss": 1.639,
"step": 193600
},
{
"epoch": 62.60504201680672,
"grad_norm": 1.5203590393066406,
"learning_rate": 0.001,
"loss": 1.6484,
"step": 193700
},
{
"epoch": 62.637362637362635,
"grad_norm": 1.3365706205368042,
"learning_rate": 0.001,
"loss": 1.66,
"step": 193800
},
{
"epoch": 62.66968325791855,
"grad_norm": 1.2810354232788086,
"learning_rate": 0.001,
"loss": 1.6524,
"step": 193900
},
{
"epoch": 62.702003878474464,
"grad_norm": 1.2262632846832275,
"learning_rate": 0.001,
"loss": 1.6403,
"step": 194000
},
{
"epoch": 62.73432449903038,
"grad_norm": 1.404079794883728,
"learning_rate": 0.001,
"loss": 1.6542,
"step": 194100
},
{
"epoch": 62.76664511958629,
"grad_norm": 0.9942919015884399,
"learning_rate": 0.001,
"loss": 1.6668,
"step": 194200
},
{
"epoch": 62.79896574014221,
"grad_norm": 0.9326017498970032,
"learning_rate": 0.001,
"loss": 1.6636,
"step": 194300
},
{
"epoch": 62.83128636069812,
"grad_norm": 1.221508264541626,
"learning_rate": 0.001,
"loss": 1.6628,
"step": 194400
},
{
"epoch": 62.86360698125404,
"grad_norm": 1.3761545419692993,
"learning_rate": 0.001,
"loss": 1.6593,
"step": 194500
},
{
"epoch": 62.89592760180995,
"grad_norm": 1.4750157594680786,
"learning_rate": 0.001,
"loss": 1.6685,
"step": 194600
},
{
"epoch": 62.928248222365866,
"grad_norm": 1.1970282793045044,
"learning_rate": 0.001,
"loss": 1.6811,
"step": 194700
},
{
"epoch": 62.96056884292178,
"grad_norm": 1.3890151977539062,
"learning_rate": 0.001,
"loss": 1.678,
"step": 194800
},
{
"epoch": 62.992889463477695,
"grad_norm": 1.246107578277588,
"learning_rate": 0.001,
"loss": 1.6759,
"step": 194900
},
{
"epoch": 63.02521008403362,
"grad_norm": 1.2954941987991333,
"learning_rate": 0.001,
"loss": 1.6029,
"step": 195000
},
{
"epoch": 63.05753070458953,
"grad_norm": 1.8841441869735718,
"learning_rate": 0.001,
"loss": 1.5563,
"step": 195100
},
{
"epoch": 63.089851325145446,
"grad_norm": 1.5582014322280884,
"learning_rate": 0.001,
"loss": 1.5777,
"step": 195200
},
{
"epoch": 63.12217194570136,
"grad_norm": 1.4565472602844238,
"learning_rate": 0.001,
"loss": 1.5718,
"step": 195300
},
{
"epoch": 63.154492566257275,
"grad_norm": 1.4416550397872925,
"learning_rate": 0.001,
"loss": 1.5738,
"step": 195400
},
{
"epoch": 63.18681318681319,
"grad_norm": 1.4541130065917969,
"learning_rate": 0.001,
"loss": 1.5861,
"step": 195500
},
{
"epoch": 63.219133807369104,
"grad_norm": 1.5667710304260254,
"learning_rate": 0.001,
"loss": 1.5822,
"step": 195600
},
{
"epoch": 63.25145442792502,
"grad_norm": 1.415602445602417,
"learning_rate": 0.001,
"loss": 1.5934,
"step": 195700
},
{
"epoch": 63.28377504848093,
"grad_norm": 1.4232977628707886,
"learning_rate": 0.001,
"loss": 1.6013,
"step": 195800
},
{
"epoch": 63.31609566903685,
"grad_norm": 1.4085842370986938,
"learning_rate": 0.001,
"loss": 1.5978,
"step": 195900
},
{
"epoch": 63.34841628959276,
"grad_norm": 1.247901439666748,
"learning_rate": 0.001,
"loss": 1.6006,
"step": 196000
},
{
"epoch": 63.38073691014868,
"grad_norm": 1.6161179542541504,
"learning_rate": 0.001,
"loss": 1.5963,
"step": 196100
},
{
"epoch": 63.41305753070459,
"grad_norm": 1.4580583572387695,
"learning_rate": 0.001,
"loss": 1.6122,
"step": 196200
},
{
"epoch": 63.445378151260506,
"grad_norm": 1.6338632106781006,
"learning_rate": 0.001,
"loss": 1.6249,
"step": 196300
},
{
"epoch": 63.47769877181642,
"grad_norm": 1.6578707695007324,
"learning_rate": 0.001,
"loss": 1.6097,
"step": 196400
},
{
"epoch": 63.510019392372335,
"grad_norm": 1.5980095863342285,
"learning_rate": 0.001,
"loss": 1.617,
"step": 196500
},
{
"epoch": 63.54234001292825,
"grad_norm": 1.0995980501174927,
"learning_rate": 0.001,
"loss": 1.6297,
"step": 196600
},
{
"epoch": 63.574660633484164,
"grad_norm": 1.4633560180664062,
"learning_rate": 0.001,
"loss": 1.6169,
"step": 196700
},
{
"epoch": 63.60698125404008,
"grad_norm": 1.428837537765503,
"learning_rate": 0.001,
"loss": 1.6364,
"step": 196800
},
{
"epoch": 63.63930187459599,
"grad_norm": 1.0487279891967773,
"learning_rate": 0.001,
"loss": 1.6567,
"step": 196900
},
{
"epoch": 63.67162249515191,
"grad_norm": 1.2603679895401,
"learning_rate": 0.001,
"loss": 1.646,
"step": 197000
},
{
"epoch": 63.70394311570782,
"grad_norm": 1.264657974243164,
"learning_rate": 0.001,
"loss": 1.6411,
"step": 197100
},
{
"epoch": 63.73626373626374,
"grad_norm": 1.2870080471038818,
"learning_rate": 0.001,
"loss": 1.6392,
"step": 197200
},
{
"epoch": 63.76858435681965,
"grad_norm": 1.1723002195358276,
"learning_rate": 0.001,
"loss": 1.6656,
"step": 197300
},
{
"epoch": 63.800904977375566,
"grad_norm": 1.2961125373840332,
"learning_rate": 0.001,
"loss": 1.6515,
"step": 197400
},
{
"epoch": 63.83322559793148,
"grad_norm": 1.5006647109985352,
"learning_rate": 0.001,
"loss": 1.6418,
"step": 197500
},
{
"epoch": 63.865546218487395,
"grad_norm": 1.268092393875122,
"learning_rate": 0.001,
"loss": 1.6733,
"step": 197600
},
{
"epoch": 63.89786683904331,
"grad_norm": 1.032017469406128,
"learning_rate": 0.001,
"loss": 1.6558,
"step": 197700
},
{
"epoch": 63.930187459599225,
"grad_norm": 1.181327223777771,
"learning_rate": 0.001,
"loss": 1.6631,
"step": 197800
},
{
"epoch": 63.96250808015514,
"grad_norm": 1.2115685939788818,
"learning_rate": 0.001,
"loss": 1.6711,
"step": 197900
},
{
"epoch": 63.994828700711054,
"grad_norm": 1.368562936782837,
"learning_rate": 0.001,
"loss": 1.6624,
"step": 198000
},
{
"epoch": 64.02714932126698,
"grad_norm": 1.4267082214355469,
"learning_rate": 0.001,
"loss": 1.582,
"step": 198100
},
{
"epoch": 64.05946994182288,
"grad_norm": 1.3782719373703003,
"learning_rate": 0.001,
"loss": 1.5388,
"step": 198200
},
{
"epoch": 64.0917905623788,
"grad_norm": 1.500325083732605,
"learning_rate": 0.001,
"loss": 1.555,
"step": 198300
},
{
"epoch": 64.12411118293471,
"grad_norm": 1.6411162614822388,
"learning_rate": 0.001,
"loss": 1.5433,
"step": 198400
},
{
"epoch": 64.15643180349063,
"grad_norm": 1.4161418676376343,
"learning_rate": 0.001,
"loss": 1.5618,
"step": 198500
},
{
"epoch": 64.18875242404654,
"grad_norm": 1.2888981103897095,
"learning_rate": 0.001,
"loss": 1.5708,
"step": 198600
},
{
"epoch": 64.22107304460246,
"grad_norm": 1.5764001607894897,
"learning_rate": 0.001,
"loss": 1.578,
"step": 198700
},
{
"epoch": 64.25339366515837,
"grad_norm": 1.4483191967010498,
"learning_rate": 0.001,
"loss": 1.5664,
"step": 198800
},
{
"epoch": 64.28571428571429,
"grad_norm": 1.844832181930542,
"learning_rate": 0.001,
"loss": 1.6023,
"step": 198900
},
{
"epoch": 64.3180349062702,
"grad_norm": 1.3805255889892578,
"learning_rate": 0.001,
"loss": 1.5934,
"step": 199000
},
{
"epoch": 64.35035552682612,
"grad_norm": 1.3433549404144287,
"learning_rate": 0.001,
"loss": 1.5915,
"step": 199100
},
{
"epoch": 64.38267614738203,
"grad_norm": 1.7375874519348145,
"learning_rate": 0.001,
"loss": 1.5885,
"step": 199200
},
{
"epoch": 64.41499676793795,
"grad_norm": 1.5128262042999268,
"learning_rate": 0.001,
"loss": 1.5863,
"step": 199300
},
{
"epoch": 64.44731738849386,
"grad_norm": 1.608688235282898,
"learning_rate": 0.001,
"loss": 1.6058,
"step": 199400
},
{
"epoch": 64.47963800904978,
"grad_norm": 1.6296998262405396,
"learning_rate": 0.001,
"loss": 1.6096,
"step": 199500
},
{
"epoch": 64.51195862960569,
"grad_norm": 1.5886939764022827,
"learning_rate": 0.001,
"loss": 1.6216,
"step": 199600
},
{
"epoch": 64.54427925016161,
"grad_norm": 1.421675443649292,
"learning_rate": 0.001,
"loss": 1.6242,
"step": 199700
},
{
"epoch": 64.57659987071752,
"grad_norm": 1.5409778356552124,
"learning_rate": 0.001,
"loss": 1.624,
"step": 199800
},
{
"epoch": 64.60892049127344,
"grad_norm": 1.9098420143127441,
"learning_rate": 0.001,
"loss": 1.645,
"step": 199900
},
{
"epoch": 64.64124111182934,
"grad_norm": 1.2659186124801636,
"learning_rate": 0.001,
"loss": 1.632,
"step": 200000
},
{
"epoch": 64.67356173238527,
"grad_norm": 1.7314198017120361,
"learning_rate": 0.001,
"loss": 1.6317,
"step": 200100
},
{
"epoch": 64.70588235294117,
"grad_norm": 1.4244946241378784,
"learning_rate": 0.001,
"loss": 1.647,
"step": 200200
},
{
"epoch": 64.7382029734971,
"grad_norm": 1.355060338973999,
"learning_rate": 0.001,
"loss": 1.6356,
"step": 200300
},
{
"epoch": 64.770523594053,
"grad_norm": 1.4022140502929688,
"learning_rate": 0.001,
"loss": 1.6212,
"step": 200400
},
{
"epoch": 64.80284421460892,
"grad_norm": 1.4149821996688843,
"learning_rate": 0.001,
"loss": 1.6613,
"step": 200500
},
{
"epoch": 64.83516483516483,
"grad_norm": 1.4350502490997314,
"learning_rate": 0.001,
"loss": 1.6431,
"step": 200600
},
{
"epoch": 64.86748545572075,
"grad_norm": 1.5340101718902588,
"learning_rate": 0.001,
"loss": 1.6499,
"step": 200700
},
{
"epoch": 64.89980607627666,
"grad_norm": 1.2688623666763306,
"learning_rate": 0.001,
"loss": 1.6539,
"step": 200800
},
{
"epoch": 64.93212669683258,
"grad_norm": 1.8362963199615479,
"learning_rate": 0.001,
"loss": 1.6486,
"step": 200900
},
{
"epoch": 64.96444731738849,
"grad_norm": 1.0354406833648682,
"learning_rate": 0.001,
"loss": 1.6534,
"step": 201000
},
{
"epoch": 64.99676793794441,
"grad_norm": 1.8147214651107788,
"learning_rate": 0.001,
"loss": 1.6395,
"step": 201100
},
{
"epoch": 65.02908855850032,
"grad_norm": 1.3064634799957275,
"learning_rate": 0.001,
"loss": 1.5475,
"step": 201200
},
{
"epoch": 65.06140917905624,
"grad_norm": 1.3053911924362183,
"learning_rate": 0.001,
"loss": 1.525,
"step": 201300
},
{
"epoch": 65.09372979961215,
"grad_norm": 1.8994085788726807,
"learning_rate": 0.001,
"loss": 1.5361,
"step": 201400
},
{
"epoch": 65.12605042016807,
"grad_norm": 1.4329079389572144,
"learning_rate": 0.001,
"loss": 1.5706,
"step": 201500
},
{
"epoch": 65.15837104072398,
"grad_norm": 1.4145162105560303,
"learning_rate": 0.001,
"loss": 1.5803,
"step": 201600
},
{
"epoch": 65.1906916612799,
"grad_norm": 1.7784736156463623,
"learning_rate": 0.001,
"loss": 1.5627,
"step": 201700
},
{
"epoch": 65.2230122818358,
"grad_norm": 1.8180545568466187,
"learning_rate": 0.001,
"loss": 1.5645,
"step": 201800
},
{
"epoch": 65.25533290239173,
"grad_norm": 1.5060557126998901,
"learning_rate": 0.001,
"loss": 1.5773,
"step": 201900
},
{
"epoch": 65.28765352294764,
"grad_norm": 1.6624622344970703,
"learning_rate": 0.001,
"loss": 1.5682,
"step": 202000
},
{
"epoch": 65.31997414350356,
"grad_norm": 1.513641119003296,
"learning_rate": 0.001,
"loss": 1.5841,
"step": 202100
},
{
"epoch": 65.35229476405947,
"grad_norm": 1.789554476737976,
"learning_rate": 0.001,
"loss": 1.6033,
"step": 202200
},
{
"epoch": 65.38461538461539,
"grad_norm": 2.5331172943115234,
"learning_rate": 0.001,
"loss": 1.5851,
"step": 202300
},
{
"epoch": 65.4169360051713,
"grad_norm": 1.5825227499008179,
"learning_rate": 0.001,
"loss": 1.5732,
"step": 202400
},
{
"epoch": 65.44925662572722,
"grad_norm": 1.7947232723236084,
"learning_rate": 0.001,
"loss": 1.5976,
"step": 202500
},
{
"epoch": 65.48157724628312,
"grad_norm": 1.7513070106506348,
"learning_rate": 0.001,
"loss": 1.5981,
"step": 202600
},
{
"epoch": 65.51389786683905,
"grad_norm": 1.9662882089614868,
"learning_rate": 0.001,
"loss": 1.622,
"step": 202700
},
{
"epoch": 65.54621848739495,
"grad_norm": 1.6715610027313232,
"learning_rate": 0.001,
"loss": 1.6087,
"step": 202800
},
{
"epoch": 65.57853910795087,
"grad_norm": 1.2199307680130005,
"learning_rate": 0.001,
"loss": 1.6237,
"step": 202900
},
{
"epoch": 65.61085972850678,
"grad_norm": 1.4250069856643677,
"learning_rate": 0.001,
"loss": 1.6225,
"step": 203000
},
{
"epoch": 65.6431803490627,
"grad_norm": 1.7255460023880005,
"learning_rate": 0.001,
"loss": 1.6139,
"step": 203100
},
{
"epoch": 65.67550096961861,
"grad_norm": 1.2933099269866943,
"learning_rate": 0.001,
"loss": 1.6239,
"step": 203200
},
{
"epoch": 65.70782159017453,
"grad_norm": 1.560722827911377,
"learning_rate": 0.001,
"loss": 1.6286,
"step": 203300
},
{
"epoch": 65.74014221073044,
"grad_norm": 1.5156254768371582,
"learning_rate": 0.001,
"loss": 1.6239,
"step": 203400
},
{
"epoch": 65.77246283128636,
"grad_norm": 1.7756282091140747,
"learning_rate": 0.001,
"loss": 1.629,
"step": 203500
},
{
"epoch": 65.80478345184227,
"grad_norm": 1.8276307582855225,
"learning_rate": 0.001,
"loss": 1.6207,
"step": 203600
},
{
"epoch": 65.83710407239819,
"grad_norm": 1.3041716814041138,
"learning_rate": 0.001,
"loss": 1.6341,
"step": 203700
},
{
"epoch": 65.8694246929541,
"grad_norm": 1.604331374168396,
"learning_rate": 0.001,
"loss": 1.629,
"step": 203800
},
{
"epoch": 65.90174531351002,
"grad_norm": 1.3833218812942505,
"learning_rate": 0.001,
"loss": 1.6493,
"step": 203900
},
{
"epoch": 65.93406593406593,
"grad_norm": 1.6882765293121338,
"learning_rate": 0.001,
"loss": 1.6559,
"step": 204000
},
{
"epoch": 65.96638655462185,
"grad_norm": 1.5404090881347656,
"learning_rate": 0.001,
"loss": 1.639,
"step": 204100
},
{
"epoch": 65.99870717517777,
"grad_norm": 1.791722059249878,
"learning_rate": 0.001,
"loss": 1.6264,
"step": 204200
},
{
"epoch": 66.03102779573368,
"grad_norm": 1.9372780323028564,
"learning_rate": 0.001,
"loss": 1.5299,
"step": 204300
},
{
"epoch": 66.0633484162896,
"grad_norm": 1.8623173236846924,
"learning_rate": 0.001,
"loss": 1.5294,
"step": 204400
},
{
"epoch": 66.0956690368455,
"grad_norm": 2.164478063583374,
"learning_rate": 0.001,
"loss": 1.554,
"step": 204500
},
{
"epoch": 66.12798965740143,
"grad_norm": 1.894229531288147,
"learning_rate": 0.001,
"loss": 1.55,
"step": 204600
},
{
"epoch": 66.16031027795734,
"grad_norm": 1.493912935256958,
"learning_rate": 0.001,
"loss": 1.56,
"step": 204700
},
{
"epoch": 66.19263089851326,
"grad_norm": 1.607630968093872,
"learning_rate": 0.001,
"loss": 1.5709,
"step": 204800
},
{
"epoch": 66.22495151906917,
"grad_norm": 1.5320664644241333,
"learning_rate": 0.001,
"loss": 1.5523,
"step": 204900
},
{
"epoch": 66.25727213962509,
"grad_norm": 1.4517817497253418,
"learning_rate": 0.001,
"loss": 1.5756,
"step": 205000
},
{
"epoch": 66.289592760181,
"grad_norm": 1.4102898836135864,
"learning_rate": 0.001,
"loss": 1.5582,
"step": 205100
},
{
"epoch": 66.32191338073692,
"grad_norm": 1.6045602560043335,
"learning_rate": 0.001,
"loss": 1.5812,
"step": 205200
},
{
"epoch": 66.35423400129282,
"grad_norm": 2.0872433185577393,
"learning_rate": 0.001,
"loss": 1.5771,
"step": 205300
},
{
"epoch": 66.38655462184875,
"grad_norm": 1.7898145914077759,
"learning_rate": 0.001,
"loss": 1.5632,
"step": 205400
},
{
"epoch": 66.41887524240465,
"grad_norm": 1.5753815174102783,
"learning_rate": 0.001,
"loss": 1.5897,
"step": 205500
},
{
"epoch": 66.45119586296057,
"grad_norm": 1.8376567363739014,
"learning_rate": 0.001,
"loss": 1.5991,
"step": 205600
},
{
"epoch": 66.48351648351648,
"grad_norm": 1.246621012687683,
"learning_rate": 0.001,
"loss": 1.5738,
"step": 205700
},
{
"epoch": 66.5158371040724,
"grad_norm": 1.9372217655181885,
"learning_rate": 0.001,
"loss": 1.5895,
"step": 205800
},
{
"epoch": 66.54815772462831,
"grad_norm": 1.6086442470550537,
"learning_rate": 0.001,
"loss": 1.5999,
"step": 205900
},
{
"epoch": 66.58047834518423,
"grad_norm": 1.5425879955291748,
"learning_rate": 0.001,
"loss": 1.6072,
"step": 206000
},
{
"epoch": 66.61279896574014,
"grad_norm": 1.7193233966827393,
"learning_rate": 0.001,
"loss": 1.6019,
"step": 206100
},
{
"epoch": 66.64511958629606,
"grad_norm": 2.3257246017456055,
"learning_rate": 0.001,
"loss": 1.6003,
"step": 206200
},
{
"epoch": 66.67744020685197,
"grad_norm": 1.939846396446228,
"learning_rate": 0.001,
"loss": 1.6202,
"step": 206300
},
{
"epoch": 66.70976082740789,
"grad_norm": 1.9590939283370972,
"learning_rate": 0.001,
"loss": 1.6085,
"step": 206400
},
{
"epoch": 66.7420814479638,
"grad_norm": 1.5885615348815918,
"learning_rate": 0.001,
"loss": 1.6189,
"step": 206500
},
{
"epoch": 66.77440206851972,
"grad_norm": 1.4376970529556274,
"learning_rate": 0.001,
"loss": 1.6185,
"step": 206600
},
{
"epoch": 66.80672268907563,
"grad_norm": 1.5070077180862427,
"learning_rate": 0.001,
"loss": 1.6083,
"step": 206700
},
{
"epoch": 66.83904330963155,
"grad_norm": 1.8947584629058838,
"learning_rate": 0.001,
"loss": 1.6184,
"step": 206800
},
{
"epoch": 66.87136393018746,
"grad_norm": 1.4360790252685547,
"learning_rate": 0.001,
"loss": 1.6363,
"step": 206900
},
{
"epoch": 66.90368455074338,
"grad_norm": 1.6498112678527832,
"learning_rate": 0.001,
"loss": 1.6361,
"step": 207000
},
{
"epoch": 66.93600517129929,
"grad_norm": 1.7067687511444092,
"learning_rate": 0.001,
"loss": 1.6187,
"step": 207100
},
{
"epoch": 66.96832579185521,
"grad_norm": 1.905935287475586,
"learning_rate": 0.001,
"loss": 1.6511,
"step": 207200
},
{
"epoch": 67.00064641241111,
"grad_norm": 2.8338687419891357,
"learning_rate": 0.001,
"loss": 1.6223,
"step": 207300
},
{
"epoch": 67.03296703296704,
"grad_norm": 1.7403825521469116,
"learning_rate": 0.001,
"loss": 1.5059,
"step": 207400
},
{
"epoch": 67.06528765352294,
"grad_norm": 1.6658779382705688,
"learning_rate": 0.001,
"loss": 1.5186,
"step": 207500
},
{
"epoch": 67.09760827407887,
"grad_norm": 1.9711863994598389,
"learning_rate": 0.001,
"loss": 1.5343,
"step": 207600
},
{
"epoch": 67.12992889463477,
"grad_norm": 1.5921905040740967,
"learning_rate": 0.001,
"loss": 1.5336,
"step": 207700
},
{
"epoch": 67.1622495151907,
"grad_norm": 1.9078835248947144,
"learning_rate": 0.001,
"loss": 1.5284,
"step": 207800
},
{
"epoch": 67.1945701357466,
"grad_norm": 2.686221122741699,
"learning_rate": 0.001,
"loss": 1.5466,
"step": 207900
},
{
"epoch": 67.22689075630252,
"grad_norm": 1.8036460876464844,
"learning_rate": 0.001,
"loss": 1.5566,
"step": 208000
},
{
"epoch": 67.25921137685843,
"grad_norm": 1.9834649562835693,
"learning_rate": 0.001,
"loss": 1.5432,
"step": 208100
},
{
"epoch": 67.29153199741435,
"grad_norm": 2.482227087020874,
"learning_rate": 0.001,
"loss": 1.5466,
"step": 208200
},
{
"epoch": 67.32385261797026,
"grad_norm": 1.6784040927886963,
"learning_rate": 0.001,
"loss": 1.5603,
"step": 208300
},
{
"epoch": 67.35617323852618,
"grad_norm": 1.9004853963851929,
"learning_rate": 0.001,
"loss": 1.5535,
"step": 208400
},
{
"epoch": 67.38849385908209,
"grad_norm": 2.2348580360412598,
"learning_rate": 0.001,
"loss": 1.5645,
"step": 208500
},
{
"epoch": 67.42081447963801,
"grad_norm": 1.5861483812332153,
"learning_rate": 0.001,
"loss": 1.5818,
"step": 208600
},
{
"epoch": 67.45313510019392,
"grad_norm": 2.3207311630249023,
"learning_rate": 0.001,
"loss": 1.5708,
"step": 208700
},
{
"epoch": 67.48545572074984,
"grad_norm": 3.079047918319702,
"learning_rate": 0.001,
"loss": 1.5933,
"step": 208800
},
{
"epoch": 67.51777634130575,
"grad_norm": 1.9215753078460693,
"learning_rate": 0.001,
"loss": 1.5945,
"step": 208900
},
{
"epoch": 67.55009696186167,
"grad_norm": 1.2818607091903687,
"learning_rate": 0.001,
"loss": 1.5983,
"step": 209000
},
{
"epoch": 67.58241758241758,
"grad_norm": 1.4433931112289429,
"learning_rate": 0.001,
"loss": 1.608,
"step": 209100
},
{
"epoch": 67.6147382029735,
"grad_norm": 1.771817922592163,
"learning_rate": 0.001,
"loss": 1.5869,
"step": 209200
},
{
"epoch": 67.6470588235294,
"grad_norm": 2.404127597808838,
"learning_rate": 0.001,
"loss": 1.5879,
"step": 209300
},
{
"epoch": 67.67937944408533,
"grad_norm": 1.46668541431427,
"learning_rate": 0.001,
"loss": 1.6071,
"step": 209400
},
{
"epoch": 67.71170006464124,
"grad_norm": 2.0058186054229736,
"learning_rate": 0.001,
"loss": 1.5945,
"step": 209500
},
{
"epoch": 67.74402068519716,
"grad_norm": 1.8062503337860107,
"learning_rate": 0.001,
"loss": 1.6054,
"step": 209600
},
{
"epoch": 67.77634130575306,
"grad_norm": 2.3619446754455566,
"learning_rate": 0.001,
"loss": 1.6172,
"step": 209700
},
{
"epoch": 67.80866192630899,
"grad_norm": 1.6072543859481812,
"learning_rate": 0.001,
"loss": 1.5929,
"step": 209800
},
{
"epoch": 67.8409825468649,
"grad_norm": 1.9901092052459717,
"learning_rate": 0.001,
"loss": 1.6103,
"step": 209900
},
{
"epoch": 67.87330316742081,
"grad_norm": 1.511841893196106,
"learning_rate": 0.001,
"loss": 1.6441,
"step": 210000
},
{
"epoch": 67.90562378797672,
"grad_norm": 1.9327521324157715,
"learning_rate": 0.001,
"loss": 1.6185,
"step": 210100
},
{
"epoch": 67.93794440853264,
"grad_norm": 1.9379057884216309,
"learning_rate": 0.001,
"loss": 1.6331,
"step": 210200
},
{
"epoch": 67.97026502908855,
"grad_norm": 2.142944097518921,
"learning_rate": 0.001,
"loss": 1.6382,
"step": 210300
},
{
"epoch": 68.00258564964447,
"grad_norm": 1.401830792427063,
"learning_rate": 0.001,
"loss": 1.6396,
"step": 210400
},
{
"epoch": 68.0349062702004,
"grad_norm": 1.776995301246643,
"learning_rate": 0.001,
"loss": 1.4978,
"step": 210500
},
{
"epoch": 68.0672268907563,
"grad_norm": 1.409732699394226,
"learning_rate": 0.001,
"loss": 1.5218,
"step": 210600
},
{
"epoch": 68.09954751131222,
"grad_norm": 2.3525753021240234,
"learning_rate": 0.001,
"loss": 1.5259,
"step": 210700
},
{
"epoch": 68.13186813186813,
"grad_norm": 1.5566622018814087,
"learning_rate": 0.001,
"loss": 1.5528,
"step": 210800
},
{
"epoch": 68.16418875242405,
"grad_norm": 1.6221765279769897,
"learning_rate": 0.001,
"loss": 1.5335,
"step": 210900
},
{
"epoch": 68.19650937297996,
"grad_norm": 1.8986177444458008,
"learning_rate": 0.001,
"loss": 1.546,
"step": 211000
},
{
"epoch": 68.22882999353588,
"grad_norm": 1.4100441932678223,
"learning_rate": 0.001,
"loss": 1.5445,
"step": 211100
},
{
"epoch": 68.26115061409179,
"grad_norm": 1.8427026271820068,
"learning_rate": 0.001,
"loss": 1.5541,
"step": 211200
},
{
"epoch": 68.29347123464771,
"grad_norm": 2.2507922649383545,
"learning_rate": 0.001,
"loss": 1.5481,
"step": 211300
},
{
"epoch": 68.32579185520362,
"grad_norm": 1.769182801246643,
"learning_rate": 0.001,
"loss": 1.5397,
"step": 211400
},
{
"epoch": 68.35811247575954,
"grad_norm": 1.8999907970428467,
"learning_rate": 0.001,
"loss": 1.5467,
"step": 211500
},
{
"epoch": 68.39043309631545,
"grad_norm": 1.141658067703247,
"learning_rate": 0.001,
"loss": 1.5666,
"step": 211600
},
{
"epoch": 68.42275371687137,
"grad_norm": 1.5240708589553833,
"learning_rate": 0.001,
"loss": 1.5606,
"step": 211700
},
{
"epoch": 68.45507433742728,
"grad_norm": 1.4288796186447144,
"learning_rate": 0.001,
"loss": 1.5732,
"step": 211800
},
{
"epoch": 68.4873949579832,
"grad_norm": 1.3086295127868652,
"learning_rate": 0.001,
"loss": 1.5758,
"step": 211900
},
{
"epoch": 68.5197155785391,
"grad_norm": 1.2653098106384277,
"learning_rate": 0.001,
"loss": 1.5713,
"step": 212000
},
{
"epoch": 68.55203619909503,
"grad_norm": 1.4599053859710693,
"learning_rate": 0.001,
"loss": 1.5817,
"step": 212100
},
{
"epoch": 68.58435681965094,
"grad_norm": 1.642386794090271,
"learning_rate": 0.001,
"loss": 1.5719,
"step": 212200
},
{
"epoch": 68.61667744020686,
"grad_norm": 1.565251350402832,
"learning_rate": 0.001,
"loss": 1.5889,
"step": 212300
},
{
"epoch": 68.64899806076276,
"grad_norm": 1.2315536737442017,
"learning_rate": 0.001,
"loss": 1.5831,
"step": 212400
},
{
"epoch": 68.68131868131869,
"grad_norm": 2.1367335319519043,
"learning_rate": 0.001,
"loss": 1.5915,
"step": 212500
},
{
"epoch": 68.7136393018746,
"grad_norm": 1.3036410808563232,
"learning_rate": 0.001,
"loss": 1.5882,
"step": 212600
},
{
"epoch": 68.74595992243052,
"grad_norm": 1.7766282558441162,
"learning_rate": 0.001,
"loss": 1.5965,
"step": 212700
},
{
"epoch": 68.77828054298642,
"grad_norm": 1.840104579925537,
"learning_rate": 0.001,
"loss": 1.5873,
"step": 212800
},
{
"epoch": 68.81060116354234,
"grad_norm": 1.8919962644577026,
"learning_rate": 0.001,
"loss": 1.6055,
"step": 212900
},
{
"epoch": 68.84292178409825,
"grad_norm": 1.4418390989303589,
"learning_rate": 0.001,
"loss": 1.626,
"step": 213000
},
{
"epoch": 68.87524240465417,
"grad_norm": 1.54845130443573,
"learning_rate": 0.001,
"loss": 1.6256,
"step": 213100
},
{
"epoch": 68.90756302521008,
"grad_norm": 1.4561160802841187,
"learning_rate": 0.001,
"loss": 1.6176,
"step": 213200
},
{
"epoch": 68.939883645766,
"grad_norm": 1.3307286500930786,
"learning_rate": 0.001,
"loss": 1.6251,
"step": 213300
},
{
"epoch": 68.97220426632191,
"grad_norm": 1.0837733745574951,
"learning_rate": 0.001,
"loss": 1.6189,
"step": 213400
},
{
"epoch": 69.00452488687783,
"grad_norm": 1.1446514129638672,
"learning_rate": 0.001,
"loss": 1.61,
"step": 213500
},
{
"epoch": 69.03684550743374,
"grad_norm": 1.1614508628845215,
"learning_rate": 0.001,
"loss": 1.5068,
"step": 213600
},
{
"epoch": 69.06916612798966,
"grad_norm": 1.7956161499023438,
"learning_rate": 0.001,
"loss": 1.5062,
"step": 213700
},
{
"epoch": 69.10148674854557,
"grad_norm": 1.4920622110366821,
"learning_rate": 0.001,
"loss": 1.5255,
"step": 213800
},
{
"epoch": 69.13380736910149,
"grad_norm": 1.180904507637024,
"learning_rate": 0.001,
"loss": 1.5239,
"step": 213900
},
{
"epoch": 69.1661279896574,
"grad_norm": 1.4228894710540771,
"learning_rate": 0.001,
"loss": 1.5292,
"step": 214000
},
{
"epoch": 69.19844861021332,
"grad_norm": 1.5594555139541626,
"learning_rate": 0.001,
"loss": 1.5186,
"step": 214100
},
{
"epoch": 69.23076923076923,
"grad_norm": 1.0493789911270142,
"learning_rate": 0.001,
"loss": 1.5329,
"step": 214200
},
{
"epoch": 69.26308985132515,
"grad_norm": 1.764050841331482,
"learning_rate": 0.001,
"loss": 1.542,
"step": 214300
},
{
"epoch": 69.29541047188106,
"grad_norm": 1.9596831798553467,
"learning_rate": 0.001,
"loss": 1.5347,
"step": 214400
},
{
"epoch": 69.32773109243698,
"grad_norm": 1.615365982055664,
"learning_rate": 0.001,
"loss": 1.5616,
"step": 214500
},
{
"epoch": 69.36005171299288,
"grad_norm": 1.2732131481170654,
"learning_rate": 0.001,
"loss": 1.5411,
"step": 214600
},
{
"epoch": 69.3923723335488,
"grad_norm": 1.531451940536499,
"learning_rate": 0.001,
"loss": 1.5435,
"step": 214700
},
{
"epoch": 69.42469295410471,
"grad_norm": 1.7909483909606934,
"learning_rate": 0.001,
"loss": 1.543,
"step": 214800
},
{
"epoch": 69.45701357466064,
"grad_norm": 0.9809910655021667,
"learning_rate": 0.001,
"loss": 1.5491,
"step": 214900
},
{
"epoch": 69.48933419521654,
"grad_norm": 1.3432776927947998,
"learning_rate": 0.001,
"loss": 1.5731,
"step": 215000
},
{
"epoch": 69.52165481577246,
"grad_norm": 1.2507835626602173,
"learning_rate": 0.001,
"loss": 1.5713,
"step": 215100
},
{
"epoch": 69.55397543632837,
"grad_norm": 1.3285642862319946,
"learning_rate": 0.001,
"loss": 1.5724,
"step": 215200
},
{
"epoch": 69.5862960568843,
"grad_norm": 1.9070652723312378,
"learning_rate": 0.001,
"loss": 1.5737,
"step": 215300
},
{
"epoch": 69.6186166774402,
"grad_norm": 1.5597374439239502,
"learning_rate": 0.001,
"loss": 1.5839,
"step": 215400
},
{
"epoch": 69.65093729799612,
"grad_norm": 1.8160336017608643,
"learning_rate": 0.001,
"loss": 1.589,
"step": 215500
},
{
"epoch": 69.68325791855203,
"grad_norm": 1.1347708702087402,
"learning_rate": 0.001,
"loss": 1.5807,
"step": 215600
},
{
"epoch": 69.71557853910795,
"grad_norm": 1.2475730180740356,
"learning_rate": 0.001,
"loss": 1.5937,
"step": 215700
},
{
"epoch": 69.74789915966386,
"grad_norm": 1.8533042669296265,
"learning_rate": 0.001,
"loss": 1.5843,
"step": 215800
},
{
"epoch": 69.78021978021978,
"grad_norm": 1.7402023077011108,
"learning_rate": 0.001,
"loss": 1.5811,
"step": 215900
},
{
"epoch": 69.81254040077569,
"grad_norm": 1.4845796823501587,
"learning_rate": 0.001,
"loss": 1.5987,
"step": 216000
},
{
"epoch": 69.84486102133161,
"grad_norm": 1.5384836196899414,
"learning_rate": 0.001,
"loss": 1.6039,
"step": 216100
},
{
"epoch": 69.87718164188752,
"grad_norm": 1.451719045639038,
"learning_rate": 0.001,
"loss": 1.5907,
"step": 216200
},
{
"epoch": 69.90950226244344,
"grad_norm": 1.1956146955490112,
"learning_rate": 0.001,
"loss": 1.5954,
"step": 216300
},
{
"epoch": 69.94182288299935,
"grad_norm": 1.8268797397613525,
"learning_rate": 0.001,
"loss": 1.619,
"step": 216400
},
{
"epoch": 69.97414350355527,
"grad_norm": 1.3545068502426147,
"learning_rate": 0.001,
"loss": 1.6064,
"step": 216500
},
{
"epoch": 70.00646412411119,
"grad_norm": 1.1828221082687378,
"learning_rate": 0.001,
"loss": 1.6031,
"step": 216600
},
{
"epoch": 70.0387847446671,
"grad_norm": 1.4522184133529663,
"learning_rate": 0.001,
"loss": 1.4794,
"step": 216700
},
{
"epoch": 70.07110536522302,
"grad_norm": 1.7155815362930298,
"learning_rate": 0.001,
"loss": 1.4898,
"step": 216800
},
{
"epoch": 70.10342598577893,
"grad_norm": 1.4515830278396606,
"learning_rate": 0.001,
"loss": 1.5079,
"step": 216900
},
{
"epoch": 70.13574660633485,
"grad_norm": 1.3109537363052368,
"learning_rate": 0.001,
"loss": 1.5131,
"step": 217000
},
{
"epoch": 70.16806722689076,
"grad_norm": 1.8338853120803833,
"learning_rate": 0.001,
"loss": 1.5118,
"step": 217100
},
{
"epoch": 70.20038784744668,
"grad_norm": 1.5463823080062866,
"learning_rate": 0.001,
"loss": 1.512,
"step": 217200
},
{
"epoch": 70.23270846800258,
"grad_norm": 1.2792097330093384,
"learning_rate": 0.001,
"loss": 1.5257,
"step": 217300
},
{
"epoch": 70.2650290885585,
"grad_norm": 1.662739872932434,
"learning_rate": 0.001,
"loss": 1.5272,
"step": 217400
},
{
"epoch": 70.29734970911441,
"grad_norm": 1.3703052997589111,
"learning_rate": 0.001,
"loss": 1.5373,
"step": 217500
},
{
"epoch": 70.32967032967034,
"grad_norm": 1.3139020204544067,
"learning_rate": 0.001,
"loss": 1.5459,
"step": 217600
},
{
"epoch": 70.36199095022624,
"grad_norm": 1.8041199445724487,
"learning_rate": 0.001,
"loss": 1.5372,
"step": 217700
},
{
"epoch": 70.39431157078216,
"grad_norm": 1.9823251962661743,
"learning_rate": 0.001,
"loss": 1.5454,
"step": 217800
},
{
"epoch": 70.42663219133807,
"grad_norm": 1.5236910581588745,
"learning_rate": 0.001,
"loss": 1.5506,
"step": 217900
},
{
"epoch": 70.458952811894,
"grad_norm": 1.4975502490997314,
"learning_rate": 0.001,
"loss": 1.5514,
"step": 218000
},
{
"epoch": 70.4912734324499,
"grad_norm": 0.9770181179046631,
"learning_rate": 0.001,
"loss": 1.5633,
"step": 218100
},
{
"epoch": 70.52359405300582,
"grad_norm": 1.632096767425537,
"learning_rate": 0.001,
"loss": 1.5659,
"step": 218200
},
{
"epoch": 70.55591467356173,
"grad_norm": 1.3791656494140625,
"learning_rate": 0.001,
"loss": 1.5788,
"step": 218300
},
{
"epoch": 70.58823529411765,
"grad_norm": 1.3625118732452393,
"learning_rate": 0.001,
"loss": 1.5848,
"step": 218400
},
{
"epoch": 70.62055591467356,
"grad_norm": 1.355443000793457,
"learning_rate": 0.001,
"loss": 1.5531,
"step": 218500
},
{
"epoch": 70.65287653522948,
"grad_norm": 1.3710728883743286,
"learning_rate": 0.001,
"loss": 1.5684,
"step": 218600
},
{
"epoch": 70.68519715578539,
"grad_norm": 1.2288410663604736,
"learning_rate": 0.001,
"loss": 1.56,
"step": 218700
},
{
"epoch": 70.71751777634131,
"grad_norm": 1.7254223823547363,
"learning_rate": 0.001,
"loss": 1.5803,
"step": 218800
},
{
"epoch": 70.74983839689722,
"grad_norm": 1.6320266723632812,
"learning_rate": 0.001,
"loss": 1.5777,
"step": 218900
},
{
"epoch": 70.78215901745314,
"grad_norm": 1.209186315536499,
"learning_rate": 0.001,
"loss": 1.5888,
"step": 219000
},
{
"epoch": 70.81447963800905,
"grad_norm": 1.2234702110290527,
"learning_rate": 0.001,
"loss": 1.5897,
"step": 219100
},
{
"epoch": 70.84680025856497,
"grad_norm": 1.058451771736145,
"learning_rate": 0.001,
"loss": 1.6031,
"step": 219200
},
{
"epoch": 70.87912087912088,
"grad_norm": 1.3177103996276855,
"learning_rate": 0.001,
"loss": 1.5897,
"step": 219300
},
{
"epoch": 70.9114414996768,
"grad_norm": 1.2803094387054443,
"learning_rate": 0.001,
"loss": 1.6078,
"step": 219400
},
{
"epoch": 70.9437621202327,
"grad_norm": 1.410070776939392,
"learning_rate": 0.001,
"loss": 1.595,
"step": 219500
},
{
"epoch": 70.97608274078863,
"grad_norm": 2.0982401371002197,
"learning_rate": 0.001,
"loss": 1.5901,
"step": 219600
},
{
"epoch": 71.00840336134453,
"grad_norm": 1.0597091913223267,
"learning_rate": 0.001,
"loss": 1.582,
"step": 219700
},
{
"epoch": 71.04072398190046,
"grad_norm": 2.105226755142212,
"learning_rate": 0.001,
"loss": 1.4687,
"step": 219800
},
{
"epoch": 71.07304460245636,
"grad_norm": 1.2170047760009766,
"learning_rate": 0.001,
"loss": 1.4939,
"step": 219900
},
{
"epoch": 71.10536522301229,
"grad_norm": 1.4154855012893677,
"learning_rate": 0.001,
"loss": 1.4787,
"step": 220000
},
{
"epoch": 71.13768584356819,
"grad_norm": 1.7579776048660278,
"learning_rate": 0.001,
"loss": 1.499,
"step": 220100
},
{
"epoch": 71.17000646412411,
"grad_norm": 1.5543571710586548,
"learning_rate": 0.001,
"loss": 1.4995,
"step": 220200
},
{
"epoch": 71.20232708468002,
"grad_norm": 1.4604703187942505,
"learning_rate": 0.001,
"loss": 1.5156,
"step": 220300
},
{
"epoch": 71.23464770523594,
"grad_norm": 1.5122308731079102,
"learning_rate": 0.001,
"loss": 1.5249,
"step": 220400
},
{
"epoch": 71.26696832579185,
"grad_norm": 1.566140055656433,
"learning_rate": 0.001,
"loss": 1.4966,
"step": 220500
},
{
"epoch": 71.29928894634777,
"grad_norm": 1.962449550628662,
"learning_rate": 0.001,
"loss": 1.5303,
"step": 220600
},
{
"epoch": 71.33160956690368,
"grad_norm": 1.3906453847885132,
"learning_rate": 0.001,
"loss": 1.5293,
"step": 220700
},
{
"epoch": 71.3639301874596,
"grad_norm": 1.6068905591964722,
"learning_rate": 0.001,
"loss": 1.5271,
"step": 220800
},
{
"epoch": 71.39625080801551,
"grad_norm": 1.3614650964736938,
"learning_rate": 0.001,
"loss": 1.5508,
"step": 220900
},
{
"epoch": 71.42857142857143,
"grad_norm": 1.6165804862976074,
"learning_rate": 0.001,
"loss": 1.5458,
"step": 221000
},
{
"epoch": 71.46089204912734,
"grad_norm": 1.724016785621643,
"learning_rate": 0.001,
"loss": 1.5337,
"step": 221100
},
{
"epoch": 71.49321266968326,
"grad_norm": 1.3093929290771484,
"learning_rate": 0.001,
"loss": 1.5535,
"step": 221200
},
{
"epoch": 71.52553329023917,
"grad_norm": 1.233283281326294,
"learning_rate": 0.001,
"loss": 1.545,
"step": 221300
},
{
"epoch": 71.55785391079509,
"grad_norm": 1.4728742837905884,
"learning_rate": 0.001,
"loss": 1.5582,
"step": 221400
},
{
"epoch": 71.590174531351,
"grad_norm": 1.3168997764587402,
"learning_rate": 0.001,
"loss": 1.5633,
"step": 221500
},
{
"epoch": 71.62249515190692,
"grad_norm": 1.5132778882980347,
"learning_rate": 0.001,
"loss": 1.5636,
"step": 221600
},
{
"epoch": 71.65481577246283,
"grad_norm": 1.6981767416000366,
"learning_rate": 0.001,
"loss": 1.5719,
"step": 221700
},
{
"epoch": 71.68713639301875,
"grad_norm": 1.4577381610870361,
"learning_rate": 0.001,
"loss": 1.5592,
"step": 221800
},
{
"epoch": 71.71945701357465,
"grad_norm": 1.7084128856658936,
"learning_rate": 0.001,
"loss": 1.5581,
"step": 221900
},
{
"epoch": 71.75177763413058,
"grad_norm": 1.0470802783966064,
"learning_rate": 0.001,
"loss": 1.5716,
"step": 222000
},
{
"epoch": 71.78409825468648,
"grad_norm": 1.6497925519943237,
"learning_rate": 0.001,
"loss": 1.5946,
"step": 222100
},
{
"epoch": 71.8164188752424,
"grad_norm": 1.3580806255340576,
"learning_rate": 0.001,
"loss": 1.5868,
"step": 222200
},
{
"epoch": 71.84873949579831,
"grad_norm": 1.7498301267623901,
"learning_rate": 0.001,
"loss": 1.5963,
"step": 222300
},
{
"epoch": 71.88106011635423,
"grad_norm": 1.8118958473205566,
"learning_rate": 0.001,
"loss": 1.5845,
"step": 222400
},
{
"epoch": 71.91338073691014,
"grad_norm": 1.2654248476028442,
"learning_rate": 0.001,
"loss": 1.5879,
"step": 222500
},
{
"epoch": 71.94570135746606,
"grad_norm": 1.555355429649353,
"learning_rate": 0.001,
"loss": 1.5891,
"step": 222600
},
{
"epoch": 71.97802197802197,
"grad_norm": 1.1465693712234497,
"learning_rate": 0.001,
"loss": 1.6082,
"step": 222700
},
{
"epoch": 72.01034259857789,
"grad_norm": 1.6664994955062866,
"learning_rate": 0.001,
"loss": 1.5414,
"step": 222800
},
{
"epoch": 72.04266321913381,
"grad_norm": 1.639346957206726,
"learning_rate": 0.001,
"loss": 1.4734,
"step": 222900
},
{
"epoch": 72.07498383968972,
"grad_norm": 1.4530357122421265,
"learning_rate": 0.001,
"loss": 1.4757,
"step": 223000
},
{
"epoch": 72.10730446024564,
"grad_norm": 1.1979373693466187,
"learning_rate": 0.001,
"loss": 1.4811,
"step": 223100
},
{
"epoch": 72.13962508080155,
"grad_norm": 1.339179277420044,
"learning_rate": 0.001,
"loss": 1.4943,
"step": 223200
},
{
"epoch": 72.17194570135747,
"grad_norm": 1.2545099258422852,
"learning_rate": 0.001,
"loss": 1.4921,
"step": 223300
},
{
"epoch": 72.20426632191338,
"grad_norm": 1.280022382736206,
"learning_rate": 0.001,
"loss": 1.5095,
"step": 223400
},
{
"epoch": 72.2365869424693,
"grad_norm": 1.3083187341690063,
"learning_rate": 0.001,
"loss": 1.5142,
"step": 223500
},
{
"epoch": 72.26890756302521,
"grad_norm": 1.3355222940444946,
"learning_rate": 0.001,
"loss": 1.5164,
"step": 223600
},
{
"epoch": 72.30122818358113,
"grad_norm": 1.3752424716949463,
"learning_rate": 0.001,
"loss": 1.5246,
"step": 223700
},
{
"epoch": 72.33354880413704,
"grad_norm": 1.200433611869812,
"learning_rate": 0.001,
"loss": 1.5067,
"step": 223800
},
{
"epoch": 72.36586942469296,
"grad_norm": 1.5312621593475342,
"learning_rate": 0.001,
"loss": 1.5259,
"step": 223900
},
{
"epoch": 72.39819004524887,
"grad_norm": 1.517161250114441,
"learning_rate": 0.001,
"loss": 1.5268,
"step": 224000
},
{
"epoch": 72.43051066580479,
"grad_norm": 1.5134165287017822,
"learning_rate": 0.001,
"loss": 1.5434,
"step": 224100
},
{
"epoch": 72.4628312863607,
"grad_norm": 2.0942986011505127,
"learning_rate": 0.001,
"loss": 1.5519,
"step": 224200
},
{
"epoch": 72.49515190691662,
"grad_norm": 1.622260332107544,
"learning_rate": 0.001,
"loss": 1.541,
"step": 224300
},
{
"epoch": 72.52747252747253,
"grad_norm": 1.4113531112670898,
"learning_rate": 0.001,
"loss": 1.5439,
"step": 224400
},
{
"epoch": 72.55979314802845,
"grad_norm": 1.36627197265625,
"learning_rate": 0.001,
"loss": 1.5327,
"step": 224500
},
{
"epoch": 72.59211376858435,
"grad_norm": 1.434559941291809,
"learning_rate": 0.001,
"loss": 1.5554,
"step": 224600
},
{
"epoch": 72.62443438914028,
"grad_norm": 1.475899338722229,
"learning_rate": 0.001,
"loss": 1.5536,
"step": 224700
},
{
"epoch": 72.65675500969618,
"grad_norm": 1.4711800813674927,
"learning_rate": 0.001,
"loss": 1.5546,
"step": 224800
},
{
"epoch": 72.6890756302521,
"grad_norm": 1.2975748777389526,
"learning_rate": 0.001,
"loss": 1.5602,
"step": 224900
},
{
"epoch": 72.72139625080801,
"grad_norm": 1.4378626346588135,
"learning_rate": 0.001,
"loss": 1.568,
"step": 225000
},
{
"epoch": 72.75371687136393,
"grad_norm": 1.3611150979995728,
"learning_rate": 0.001,
"loss": 1.5704,
"step": 225100
},
{
"epoch": 72.78603749191984,
"grad_norm": 1.6263785362243652,
"learning_rate": 0.001,
"loss": 1.5769,
"step": 225200
},
{
"epoch": 72.81835811247576,
"grad_norm": 1.392359733581543,
"learning_rate": 0.001,
"loss": 1.5909,
"step": 225300
},
{
"epoch": 72.85067873303167,
"grad_norm": 1.764510989189148,
"learning_rate": 0.001,
"loss": 1.582,
"step": 225400
},
{
"epoch": 72.88299935358759,
"grad_norm": 1.4342442750930786,
"learning_rate": 0.001,
"loss": 1.5679,
"step": 225500
},
{
"epoch": 72.9153199741435,
"grad_norm": 1.422317385673523,
"learning_rate": 0.001,
"loss": 1.5692,
"step": 225600
},
{
"epoch": 72.94764059469942,
"grad_norm": 1.768009901046753,
"learning_rate": 0.001,
"loss": 1.5812,
"step": 225700
},
{
"epoch": 72.97996121525533,
"grad_norm": 1.7353980541229248,
"learning_rate": 0.001,
"loss": 1.5985,
"step": 225800
},
{
"epoch": 73.01228183581125,
"grad_norm": 1.470423698425293,
"learning_rate": 0.001,
"loss": 1.5372,
"step": 225900
},
{
"epoch": 73.04460245636716,
"grad_norm": 1.3820226192474365,
"learning_rate": 0.001,
"loss": 1.4558,
"step": 226000
},
{
"epoch": 73.07692307692308,
"grad_norm": 1.4334419965744019,
"learning_rate": 0.001,
"loss": 1.4857,
"step": 226100
},
{
"epoch": 73.10924369747899,
"grad_norm": 1.552611231803894,
"learning_rate": 0.001,
"loss": 1.4796,
"step": 226200
},
{
"epoch": 73.14156431803491,
"grad_norm": 1.4738566875457764,
"learning_rate": 0.001,
"loss": 1.4877,
"step": 226300
},
{
"epoch": 73.17388493859082,
"grad_norm": 1.2708693742752075,
"learning_rate": 0.001,
"loss": 1.4875,
"step": 226400
},
{
"epoch": 73.20620555914674,
"grad_norm": 1.2074936628341675,
"learning_rate": 0.001,
"loss": 1.5055,
"step": 226500
},
{
"epoch": 73.23852617970265,
"grad_norm": 1.4904911518096924,
"learning_rate": 0.001,
"loss": 1.4873,
"step": 226600
},
{
"epoch": 73.27084680025857,
"grad_norm": 1.4835336208343506,
"learning_rate": 0.001,
"loss": 1.5047,
"step": 226700
},
{
"epoch": 73.30316742081448,
"grad_norm": 1.8338134288787842,
"learning_rate": 0.001,
"loss": 1.5133,
"step": 226800
},
{
"epoch": 73.3354880413704,
"grad_norm": 1.603265643119812,
"learning_rate": 0.001,
"loss": 1.5093,
"step": 226900
},
{
"epoch": 73.3678086619263,
"grad_norm": 1.803918719291687,
"learning_rate": 0.001,
"loss": 1.5151,
"step": 227000
},
{
"epoch": 73.40012928248223,
"grad_norm": 1.5095746517181396,
"learning_rate": 0.001,
"loss": 1.5201,
"step": 227100
},
{
"epoch": 73.43244990303813,
"grad_norm": 1.6134722232818604,
"learning_rate": 0.001,
"loss": 1.5281,
"step": 227200
},
{
"epoch": 73.46477052359405,
"grad_norm": 1.5454301834106445,
"learning_rate": 0.001,
"loss": 1.5203,
"step": 227300
},
{
"epoch": 73.49709114414996,
"grad_norm": 1.8803797960281372,
"learning_rate": 0.001,
"loss": 1.5316,
"step": 227400
},
{
"epoch": 73.52941176470588,
"grad_norm": 1.5477262735366821,
"learning_rate": 0.001,
"loss": 1.5481,
"step": 227500
},
{
"epoch": 73.56173238526179,
"grad_norm": 1.8320611715316772,
"learning_rate": 0.001,
"loss": 1.5553,
"step": 227600
},
{
"epoch": 73.59405300581771,
"grad_norm": 2.3626441955566406,
"learning_rate": 0.001,
"loss": 1.541,
"step": 227700
},
{
"epoch": 73.62637362637362,
"grad_norm": 1.5265443325042725,
"learning_rate": 0.001,
"loss": 1.5509,
"step": 227800
},
{
"epoch": 73.65869424692954,
"grad_norm": 1.6350661516189575,
"learning_rate": 0.001,
"loss": 1.5515,
"step": 227900
},
{
"epoch": 73.69101486748545,
"grad_norm": 1.40945303440094,
"learning_rate": 0.001,
"loss": 1.5467,
"step": 228000
},
{
"epoch": 73.72333548804137,
"grad_norm": 1.6472688913345337,
"learning_rate": 0.001,
"loss": 1.5666,
"step": 228100
},
{
"epoch": 73.75565610859728,
"grad_norm": 1.753916621208191,
"learning_rate": 0.001,
"loss": 1.5645,
"step": 228200
},
{
"epoch": 73.7879767291532,
"grad_norm": 1.5692429542541504,
"learning_rate": 0.001,
"loss": 1.5616,
"step": 228300
},
{
"epoch": 73.82029734970911,
"grad_norm": 1.4695786237716675,
"learning_rate": 0.001,
"loss": 1.563,
"step": 228400
},
{
"epoch": 73.85261797026503,
"grad_norm": 1.5109291076660156,
"learning_rate": 0.001,
"loss": 1.5702,
"step": 228500
},
{
"epoch": 73.88493859082094,
"grad_norm": 1.3853473663330078,
"learning_rate": 0.001,
"loss": 1.5721,
"step": 228600
},
{
"epoch": 73.91725921137686,
"grad_norm": 1.181089162826538,
"learning_rate": 0.001,
"loss": 1.5778,
"step": 228700
},
{
"epoch": 73.94957983193277,
"grad_norm": 1.3903465270996094,
"learning_rate": 0.001,
"loss": 1.5663,
"step": 228800
},
{
"epoch": 73.98190045248869,
"grad_norm": 1.7607699632644653,
"learning_rate": 0.001,
"loss": 1.5841,
"step": 228900
},
{
"epoch": 74.01422107304461,
"grad_norm": 1.933079481124878,
"learning_rate": 0.001,
"loss": 1.4981,
"step": 229000
},
{
"epoch": 74.04654169360052,
"grad_norm": 1.6522470712661743,
"learning_rate": 0.001,
"loss": 1.4555,
"step": 229100
},
{
"epoch": 74.07886231415644,
"grad_norm": 1.9484916925430298,
"learning_rate": 0.001,
"loss": 1.4581,
"step": 229200
},
{
"epoch": 74.11118293471235,
"grad_norm": 1.5861504077911377,
"learning_rate": 0.001,
"loss": 1.4818,
"step": 229300
},
{
"epoch": 74.14350355526827,
"grad_norm": 1.9591395854949951,
"learning_rate": 0.001,
"loss": 1.47,
"step": 229400
},
{
"epoch": 74.17582417582418,
"grad_norm": 1.5973981618881226,
"learning_rate": 0.001,
"loss": 1.4859,
"step": 229500
},
{
"epoch": 74.2081447963801,
"grad_norm": 1.7444751262664795,
"learning_rate": 0.001,
"loss": 1.4906,
"step": 229600
},
{
"epoch": 74.240465416936,
"grad_norm": 2.131800889968872,
"learning_rate": 0.001,
"loss": 1.4872,
"step": 229700
},
{
"epoch": 74.27278603749193,
"grad_norm": 1.7644002437591553,
"learning_rate": 0.001,
"loss": 1.4969,
"step": 229800
},
{
"epoch": 74.30510665804783,
"grad_norm": 1.646075963973999,
"learning_rate": 0.001,
"loss": 1.494,
"step": 229900
},
{
"epoch": 74.33742727860376,
"grad_norm": 1.8765732049942017,
"learning_rate": 0.001,
"loss": 1.51,
"step": 230000
},
{
"epoch": 74.36974789915966,
"grad_norm": 1.7023745775222778,
"learning_rate": 0.001,
"loss": 1.5053,
"step": 230100
},
{
"epoch": 74.40206851971558,
"grad_norm": 1.703138828277588,
"learning_rate": 0.001,
"loss": 1.5195,
"step": 230200
},
{
"epoch": 74.43438914027149,
"grad_norm": 1.6692290306091309,
"learning_rate": 0.001,
"loss": 1.5259,
"step": 230300
},
{
"epoch": 74.46670976082741,
"grad_norm": 1.418053388595581,
"learning_rate": 0.001,
"loss": 1.5187,
"step": 230400
},
{
"epoch": 74.49903038138332,
"grad_norm": 1.8168318271636963,
"learning_rate": 0.001,
"loss": 1.535,
"step": 230500
},
{
"epoch": 74.53135100193924,
"grad_norm": 1.8448301553726196,
"learning_rate": 0.001,
"loss": 1.5297,
"step": 230600
},
{
"epoch": 74.56367162249515,
"grad_norm": 1.8441119194030762,
"learning_rate": 0.001,
"loss": 1.538,
"step": 230700
},
{
"epoch": 74.59599224305107,
"grad_norm": 1.601678490638733,
"learning_rate": 0.001,
"loss": 1.5382,
"step": 230800
},
{
"epoch": 74.62831286360698,
"grad_norm": 1.40133535861969,
"learning_rate": 0.001,
"loss": 1.5457,
"step": 230900
},
{
"epoch": 74.6606334841629,
"grad_norm": 1.3961271047592163,
"learning_rate": 0.001,
"loss": 1.5447,
"step": 231000
},
{
"epoch": 74.69295410471881,
"grad_norm": 1.5519992113113403,
"learning_rate": 0.001,
"loss": 1.5392,
"step": 231100
},
{
"epoch": 74.72527472527473,
"grad_norm": 1.3843839168548584,
"learning_rate": 0.001,
"loss": 1.5474,
"step": 231200
},
{
"epoch": 74.75759534583064,
"grad_norm": 1.7330049276351929,
"learning_rate": 0.001,
"loss": 1.5534,
"step": 231300
},
{
"epoch": 74.78991596638656,
"grad_norm": 1.7051910161972046,
"learning_rate": 0.001,
"loss": 1.5532,
"step": 231400
},
{
"epoch": 74.82223658694247,
"grad_norm": 1.8478976488113403,
"learning_rate": 0.001,
"loss": 1.5618,
"step": 231500
},
{
"epoch": 74.85455720749839,
"grad_norm": 1.7964320182800293,
"learning_rate": 0.001,
"loss": 1.5505,
"step": 231600
},
{
"epoch": 74.8868778280543,
"grad_norm": 2.2320656776428223,
"learning_rate": 0.001,
"loss": 1.5654,
"step": 231700
},
{
"epoch": 74.91919844861022,
"grad_norm": 1.874264121055603,
"learning_rate": 0.001,
"loss": 1.5705,
"step": 231800
},
{
"epoch": 74.95151906916612,
"grad_norm": 1.4276374578475952,
"learning_rate": 0.001,
"loss": 1.5734,
"step": 231900
},
{
"epoch": 74.98383968972205,
"grad_norm": 1.7904925346374512,
"learning_rate": 0.001,
"loss": 1.5732,
"step": 232000
},
{
"epoch": 75.01616031027795,
"grad_norm": 1.8452962636947632,
"learning_rate": 0.001,
"loss": 1.4718,
"step": 232100
},
{
"epoch": 75.04848093083388,
"grad_norm": 2.6169495582580566,
"learning_rate": 0.001,
"loss": 1.4575,
"step": 232200
},
{
"epoch": 75.08080155138978,
"grad_norm": 1.707862377166748,
"learning_rate": 0.001,
"loss": 1.4386,
"step": 232300
},
{
"epoch": 75.1131221719457,
"grad_norm": 1.8327504396438599,
"learning_rate": 0.001,
"loss": 1.4743,
"step": 232400
},
{
"epoch": 75.14544279250161,
"grad_norm": 2.308570384979248,
"learning_rate": 0.001,
"loss": 1.4821,
"step": 232500
},
{
"epoch": 75.17776341305753,
"grad_norm": 2.6504032611846924,
"learning_rate": 0.001,
"loss": 1.4765,
"step": 232600
},
{
"epoch": 75.21008403361344,
"grad_norm": 2.242478132247925,
"learning_rate": 0.001,
"loss": 1.4719,
"step": 232700
},
{
"epoch": 75.24240465416936,
"grad_norm": 2.303994655609131,
"learning_rate": 0.001,
"loss": 1.4753,
"step": 232800
},
{
"epoch": 75.27472527472527,
"grad_norm": 2.3544199466705322,
"learning_rate": 0.001,
"loss": 1.5065,
"step": 232900
},
{
"epoch": 75.30704589528119,
"grad_norm": 1.9674110412597656,
"learning_rate": 0.001,
"loss": 1.5036,
"step": 233000
},
{
"epoch": 75.3393665158371,
"grad_norm": 1.8192665576934814,
"learning_rate": 0.001,
"loss": 1.495,
"step": 233100
},
{
"epoch": 75.37168713639302,
"grad_norm": 2.133833408355713,
"learning_rate": 0.001,
"loss": 1.4959,
"step": 233200
},
{
"epoch": 75.40400775694893,
"grad_norm": 2.5970709323883057,
"learning_rate": 0.001,
"loss": 1.5083,
"step": 233300
},
{
"epoch": 75.43632837750485,
"grad_norm": 2.0291748046875,
"learning_rate": 0.001,
"loss": 1.5127,
"step": 233400
},
{
"epoch": 75.46864899806076,
"grad_norm": 1.7293260097503662,
"learning_rate": 0.001,
"loss": 1.5247,
"step": 233500
},
{
"epoch": 75.50096961861668,
"grad_norm": 2.267519235610962,
"learning_rate": 0.001,
"loss": 1.5094,
"step": 233600
},
{
"epoch": 75.53329023917259,
"grad_norm": 1.6786974668502808,
"learning_rate": 0.001,
"loss": 1.5213,
"step": 233700
},
{
"epoch": 75.56561085972851,
"grad_norm": 1.8709112405776978,
"learning_rate": 0.001,
"loss": 1.5404,
"step": 233800
},
{
"epoch": 75.59793148028442,
"grad_norm": 1.9633324146270752,
"learning_rate": 0.001,
"loss": 1.5208,
"step": 233900
},
{
"epoch": 75.63025210084034,
"grad_norm": 1.9440094232559204,
"learning_rate": 0.001,
"loss": 1.5271,
"step": 234000
},
{
"epoch": 75.66257272139624,
"grad_norm": 1.935779333114624,
"learning_rate": 0.001,
"loss": 1.5306,
"step": 234100
},
{
"epoch": 75.69489334195217,
"grad_norm": 2.249645948410034,
"learning_rate": 0.001,
"loss": 1.5325,
"step": 234200
},
{
"epoch": 75.72721396250807,
"grad_norm": 2.0431575775146484,
"learning_rate": 0.001,
"loss": 1.5446,
"step": 234300
},
{
"epoch": 75.759534583064,
"grad_norm": 2.305968999862671,
"learning_rate": 0.001,
"loss": 1.5575,
"step": 234400
},
{
"epoch": 75.7918552036199,
"grad_norm": 1.8218010663986206,
"learning_rate": 0.001,
"loss": 1.5389,
"step": 234500
},
{
"epoch": 75.82417582417582,
"grad_norm": 2.15047025680542,
"learning_rate": 0.001,
"loss": 1.5369,
"step": 234600
},
{
"epoch": 75.85649644473173,
"grad_norm": 2.2102482318878174,
"learning_rate": 0.001,
"loss": 1.5545,
"step": 234700
},
{
"epoch": 75.88881706528765,
"grad_norm": 2.4484729766845703,
"learning_rate": 0.001,
"loss": 1.5664,
"step": 234800
},
{
"epoch": 75.92113768584356,
"grad_norm": 2.1117191314697266,
"learning_rate": 0.001,
"loss": 1.5666,
"step": 234900
},
{
"epoch": 75.95345830639948,
"grad_norm": 2.8818562030792236,
"learning_rate": 0.001,
"loss": 1.5675,
"step": 235000
},
{
"epoch": 75.98577892695539,
"grad_norm": 2.7250523567199707,
"learning_rate": 0.001,
"loss": 1.5668,
"step": 235100
},
{
"epoch": 76.01809954751131,
"grad_norm": 2.0469698905944824,
"learning_rate": 0.001,
"loss": 1.5111,
"step": 235200
},
{
"epoch": 76.05042016806723,
"grad_norm": 1.6739189624786377,
"learning_rate": 0.001,
"loss": 1.4421,
"step": 235300
},
{
"epoch": 76.08274078862314,
"grad_norm": 1.7519482374191284,
"learning_rate": 0.001,
"loss": 1.4533,
"step": 235400
},
{
"epoch": 76.11506140917906,
"grad_norm": 1.2195477485656738,
"learning_rate": 0.001,
"loss": 1.4447,
"step": 235500
},
{
"epoch": 76.14738202973497,
"grad_norm": 1.7513339519500732,
"learning_rate": 0.001,
"loss": 1.4578,
"step": 235600
},
{
"epoch": 76.17970265029089,
"grad_norm": 1.6105413436889648,
"learning_rate": 0.001,
"loss": 1.4734,
"step": 235700
},
{
"epoch": 76.2120232708468,
"grad_norm": 1.4778788089752197,
"learning_rate": 0.001,
"loss": 1.4741,
"step": 235800
},
{
"epoch": 76.24434389140272,
"grad_norm": 1.4792375564575195,
"learning_rate": 0.001,
"loss": 1.4773,
"step": 235900
},
{
"epoch": 76.27666451195863,
"grad_norm": 1.8309617042541504,
"learning_rate": 0.001,
"loss": 1.4803,
"step": 236000
},
{
"epoch": 76.30898513251455,
"grad_norm": 1.7483227252960205,
"learning_rate": 0.001,
"loss": 1.4851,
"step": 236100
},
{
"epoch": 76.34130575307046,
"grad_norm": 1.1991394758224487,
"learning_rate": 0.001,
"loss": 1.4842,
"step": 236200
},
{
"epoch": 76.37362637362638,
"grad_norm": 1.6364467144012451,
"learning_rate": 0.001,
"loss": 1.5046,
"step": 236300
},
{
"epoch": 76.40594699418229,
"grad_norm": 1.481478214263916,
"learning_rate": 0.001,
"loss": 1.5063,
"step": 236400
},
{
"epoch": 76.43826761473821,
"grad_norm": 1.7369900941848755,
"learning_rate": 0.001,
"loss": 1.5113,
"step": 236500
},
{
"epoch": 76.47058823529412,
"grad_norm": 1.894484043121338,
"learning_rate": 0.001,
"loss": 1.501,
"step": 236600
},
{
"epoch": 76.50290885585004,
"grad_norm": 1.4116283655166626,
"learning_rate": 0.001,
"loss": 1.5078,
"step": 236700
},
{
"epoch": 76.53522947640595,
"grad_norm": 1.3731389045715332,
"learning_rate": 0.001,
"loss": 1.5154,
"step": 236800
},
{
"epoch": 76.56755009696187,
"grad_norm": 2.088660478591919,
"learning_rate": 0.001,
"loss": 1.5316,
"step": 236900
},
{
"epoch": 76.59987071751777,
"grad_norm": 1.4767850637435913,
"learning_rate": 0.001,
"loss": 1.5417,
"step": 237000
},
{
"epoch": 76.6321913380737,
"grad_norm": 1.584747076034546,
"learning_rate": 0.001,
"loss": 1.5225,
"step": 237100
},
{
"epoch": 76.6645119586296,
"grad_norm": 1.8084192276000977,
"learning_rate": 0.001,
"loss": 1.5176,
"step": 237200
},
{
"epoch": 76.69683257918552,
"grad_norm": 1.3601627349853516,
"learning_rate": 0.001,
"loss": 1.5344,
"step": 237300
},
{
"epoch": 76.72915319974143,
"grad_norm": 1.4814226627349854,
"learning_rate": 0.001,
"loss": 1.5454,
"step": 237400
},
{
"epoch": 76.76147382029735,
"grad_norm": 1.7931079864501953,
"learning_rate": 0.001,
"loss": 1.5104,
"step": 237500
},
{
"epoch": 76.79379444085326,
"grad_norm": 1.7696950435638428,
"learning_rate": 0.001,
"loss": 1.547,
"step": 237600
},
{
"epoch": 76.82611506140918,
"grad_norm": 1.9078216552734375,
"learning_rate": 0.001,
"loss": 1.5307,
"step": 237700
},
{
"epoch": 76.85843568196509,
"grad_norm": 1.6045186519622803,
"learning_rate": 0.001,
"loss": 1.5539,
"step": 237800
},
{
"epoch": 76.89075630252101,
"grad_norm": 1.2418279647827148,
"learning_rate": 0.001,
"loss": 1.5479,
"step": 237900
},
{
"epoch": 76.92307692307692,
"grad_norm": 1.5800580978393555,
"learning_rate": 0.001,
"loss": 1.5538,
"step": 238000
},
{
"epoch": 76.95539754363284,
"grad_norm": 1.7912734746932983,
"learning_rate": 0.001,
"loss": 1.5633,
"step": 238100
},
{
"epoch": 76.98771816418875,
"grad_norm": 1.3797253370285034,
"learning_rate": 0.001,
"loss": 1.5384,
"step": 238200
},
{
"epoch": 77.02003878474467,
"grad_norm": 1.6609480381011963,
"learning_rate": 0.001,
"loss": 1.4973,
"step": 238300
},
{
"epoch": 77.05235940530058,
"grad_norm": 1.7042680978775024,
"learning_rate": 0.001,
"loss": 1.4333,
"step": 238400
},
{
"epoch": 77.0846800258565,
"grad_norm": 1.32135009765625,
"learning_rate": 0.001,
"loss": 1.4579,
"step": 238500
},
{
"epoch": 77.11700064641241,
"grad_norm": 1.302638053894043,
"learning_rate": 0.001,
"loss": 1.4579,
"step": 238600
},
{
"epoch": 77.14932126696833,
"grad_norm": 1.463110089302063,
"learning_rate": 0.001,
"loss": 1.4563,
"step": 238700
},
{
"epoch": 77.18164188752424,
"grad_norm": 1.5074396133422852,
"learning_rate": 0.001,
"loss": 1.4651,
"step": 238800
},
{
"epoch": 77.21396250808016,
"grad_norm": 1.4624207019805908,
"learning_rate": 0.001,
"loss": 1.4642,
"step": 238900
},
{
"epoch": 77.24628312863607,
"grad_norm": 1.4944427013397217,
"learning_rate": 0.001,
"loss": 1.4593,
"step": 239000
},
{
"epoch": 77.27860374919199,
"grad_norm": 2.2866570949554443,
"learning_rate": 0.001,
"loss": 1.4749,
"step": 239100
},
{
"epoch": 77.3109243697479,
"grad_norm": 2.0275280475616455,
"learning_rate": 0.001,
"loss": 1.487,
"step": 239200
},
{
"epoch": 77.34324499030382,
"grad_norm": 1.6738691329956055,
"learning_rate": 0.001,
"loss": 1.4711,
"step": 239300
},
{
"epoch": 77.37556561085972,
"grad_norm": 1.8057903051376343,
"learning_rate": 0.001,
"loss": 1.5001,
"step": 239400
},
{
"epoch": 77.40788623141565,
"grad_norm": 1.3166172504425049,
"learning_rate": 0.001,
"loss": 1.4909,
"step": 239500
},
{
"epoch": 77.44020685197155,
"grad_norm": 1.8812742233276367,
"learning_rate": 0.001,
"loss": 1.4962,
"step": 239600
},
{
"epoch": 77.47252747252747,
"grad_norm": 1.6621681451797485,
"learning_rate": 0.001,
"loss": 1.5079,
"step": 239700
},
{
"epoch": 77.50484809308338,
"grad_norm": 1.238905668258667,
"learning_rate": 0.001,
"loss": 1.4916,
"step": 239800
},
{
"epoch": 77.5371687136393,
"grad_norm": 1.3346996307373047,
"learning_rate": 0.001,
"loss": 1.5014,
"step": 239900
},
{
"epoch": 77.56948933419521,
"grad_norm": 1.3015086650848389,
"learning_rate": 0.001,
"loss": 1.5032,
"step": 240000
},
{
"epoch": 77.60180995475113,
"grad_norm": 1.2718425989151,
"learning_rate": 0.001,
"loss": 1.498,
"step": 240100
},
{
"epoch": 77.63413057530704,
"grad_norm": 1.5175855159759521,
"learning_rate": 0.001,
"loss": 1.5267,
"step": 240200
},
{
"epoch": 77.66645119586296,
"grad_norm": 1.2660694122314453,
"learning_rate": 0.001,
"loss": 1.5257,
"step": 240300
},
{
"epoch": 77.69877181641887,
"grad_norm": 1.3283731937408447,
"learning_rate": 0.001,
"loss": 1.5231,
"step": 240400
},
{
"epoch": 77.73109243697479,
"grad_norm": 1.6311471462249756,
"learning_rate": 0.001,
"loss": 1.5289,
"step": 240500
},
{
"epoch": 77.7634130575307,
"grad_norm": 1.6861320734024048,
"learning_rate": 0.001,
"loss": 1.5224,
"step": 240600
},
{
"epoch": 77.79573367808662,
"grad_norm": 1.559901237487793,
"learning_rate": 0.001,
"loss": 1.5299,
"step": 240700
},
{
"epoch": 77.82805429864253,
"grad_norm": 1.2827755212783813,
"learning_rate": 0.001,
"loss": 1.5427,
"step": 240800
},
{
"epoch": 77.86037491919845,
"grad_norm": 1.285235047340393,
"learning_rate": 0.001,
"loss": 1.5308,
"step": 240900
},
{
"epoch": 77.89269553975436,
"grad_norm": 1.173821210861206,
"learning_rate": 0.001,
"loss": 1.5255,
"step": 241000
},
{
"epoch": 77.92501616031028,
"grad_norm": 1.1771650314331055,
"learning_rate": 0.001,
"loss": 1.5458,
"step": 241100
},
{
"epoch": 77.95733678086619,
"grad_norm": 1.4045838117599487,
"learning_rate": 0.001,
"loss": 1.5496,
"step": 241200
},
{
"epoch": 77.98965740142211,
"grad_norm": 1.7626705169677734,
"learning_rate": 0.001,
"loss": 1.5402,
"step": 241300
},
{
"epoch": 78.02197802197803,
"grad_norm": 1.3559398651123047,
"learning_rate": 0.001,
"loss": 1.4808,
"step": 241400
},
{
"epoch": 78.05429864253394,
"grad_norm": 1.2719939947128296,
"learning_rate": 0.001,
"loss": 1.4271,
"step": 241500
},
{
"epoch": 78.08661926308986,
"grad_norm": 1.233944296836853,
"learning_rate": 0.001,
"loss": 1.4339,
"step": 241600
},
{
"epoch": 78.11893988364577,
"grad_norm": 1.182702660560608,
"learning_rate": 0.001,
"loss": 1.4389,
"step": 241700
},
{
"epoch": 78.15126050420169,
"grad_norm": 1.380983829498291,
"learning_rate": 0.001,
"loss": 1.4442,
"step": 241800
},
{
"epoch": 78.1835811247576,
"grad_norm": 1.6417757272720337,
"learning_rate": 0.001,
"loss": 1.4632,
"step": 241900
},
{
"epoch": 78.21590174531352,
"grad_norm": 1.3505266904830933,
"learning_rate": 0.001,
"loss": 1.4413,
"step": 242000
},
{
"epoch": 78.24822236586942,
"grad_norm": 1.4652029275894165,
"learning_rate": 0.001,
"loss": 1.4585,
"step": 242100
},
{
"epoch": 78.28054298642535,
"grad_norm": 1.0839201211929321,
"learning_rate": 0.001,
"loss": 1.4762,
"step": 242200
},
{
"epoch": 78.31286360698125,
"grad_norm": 2.6254734992980957,
"learning_rate": 0.001,
"loss": 1.4708,
"step": 242300
},
{
"epoch": 78.34518422753717,
"grad_norm": 1.1155922412872314,
"learning_rate": 0.001,
"loss": 1.4752,
"step": 242400
},
{
"epoch": 78.37750484809308,
"grad_norm": 1.1666878461837769,
"learning_rate": 0.001,
"loss": 1.4932,
"step": 242500
},
{
"epoch": 78.409825468649,
"grad_norm": 1.032177448272705,
"learning_rate": 0.001,
"loss": 1.4745,
"step": 242600
},
{
"epoch": 78.44214608920491,
"grad_norm": 1.311974287033081,
"learning_rate": 0.001,
"loss": 1.4818,
"step": 242700
},
{
"epoch": 78.47446670976083,
"grad_norm": 1.2213555574417114,
"learning_rate": 0.001,
"loss": 1.4865,
"step": 242800
},
{
"epoch": 78.50678733031674,
"grad_norm": 1.4418728351593018,
"learning_rate": 0.001,
"loss": 1.4842,
"step": 242900
},
{
"epoch": 78.53910795087266,
"grad_norm": 1.4388240575790405,
"learning_rate": 0.001,
"loss": 1.514,
"step": 243000
},
{
"epoch": 78.57142857142857,
"grad_norm": 1.6182565689086914,
"learning_rate": 0.001,
"loss": 1.5051,
"step": 243100
},
{
"epoch": 78.60374919198449,
"grad_norm": 1.4511289596557617,
"learning_rate": 0.001,
"loss": 1.5017,
"step": 243200
},
{
"epoch": 78.6360698125404,
"grad_norm": 1.4314979314804077,
"learning_rate": 0.001,
"loss": 1.5093,
"step": 243300
},
{
"epoch": 78.66839043309632,
"grad_norm": 1.3016643524169922,
"learning_rate": 0.001,
"loss": 1.5105,
"step": 243400
},
{
"epoch": 78.70071105365223,
"grad_norm": 1.3273727893829346,
"learning_rate": 0.001,
"loss": 1.5116,
"step": 243500
},
{
"epoch": 78.73303167420815,
"grad_norm": 1.3437339067459106,
"learning_rate": 0.001,
"loss": 1.5217,
"step": 243600
},
{
"epoch": 78.76535229476406,
"grad_norm": 1.8093808889389038,
"learning_rate": 0.001,
"loss": 1.523,
"step": 243700
},
{
"epoch": 78.79767291531998,
"grad_norm": 1.3972852230072021,
"learning_rate": 0.001,
"loss": 1.5323,
"step": 243800
},
{
"epoch": 78.82999353587589,
"grad_norm": 1.1998374462127686,
"learning_rate": 0.001,
"loss": 1.5329,
"step": 243900
},
{
"epoch": 78.86231415643181,
"grad_norm": 1.5502599477767944,
"learning_rate": 0.001,
"loss": 1.5409,
"step": 244000
},
{
"epoch": 78.89463477698771,
"grad_norm": 1.1933624744415283,
"learning_rate": 0.001,
"loss": 1.5201,
"step": 244100
},
{
"epoch": 78.92695539754364,
"grad_norm": 1.1095892190933228,
"learning_rate": 0.001,
"loss": 1.539,
"step": 244200
},
{
"epoch": 78.95927601809954,
"grad_norm": 1.1915327310562134,
"learning_rate": 0.001,
"loss": 1.5508,
"step": 244300
},
{
"epoch": 78.99159663865547,
"grad_norm": 0.9810858368873596,
"learning_rate": 0.001,
"loss": 1.5415,
"step": 244400
},
{
"epoch": 79.02391725921137,
"grad_norm": 1.1638388633728027,
"learning_rate": 0.001,
"loss": 1.4601,
"step": 244500
},
{
"epoch": 79.0562378797673,
"grad_norm": 1.7552450895309448,
"learning_rate": 0.001,
"loss": 1.4161,
"step": 244600
},
{
"epoch": 79.0885585003232,
"grad_norm": 1.2643893957138062,
"learning_rate": 0.001,
"loss": 1.4078,
"step": 244700
},
{
"epoch": 79.12087912087912,
"grad_norm": 1.566015601158142,
"learning_rate": 0.001,
"loss": 1.4305,
"step": 244800
},
{
"epoch": 79.15319974143503,
"grad_norm": 1.5437681674957275,
"learning_rate": 0.001,
"loss": 1.4536,
"step": 244900
},
{
"epoch": 79.18552036199095,
"grad_norm": 1.9942206144332886,
"learning_rate": 0.001,
"loss": 1.4516,
"step": 245000
},
{
"epoch": 79.21784098254686,
"grad_norm": 1.1073012351989746,
"learning_rate": 0.001,
"loss": 1.4669,
"step": 245100
},
{
"epoch": 79.25016160310278,
"grad_norm": 1.2855706214904785,
"learning_rate": 0.001,
"loss": 1.4514,
"step": 245200
},
{
"epoch": 79.28248222365869,
"grad_norm": 1.5170331001281738,
"learning_rate": 0.001,
"loss": 1.4588,
"step": 245300
},
{
"epoch": 79.31480284421461,
"grad_norm": 1.9636250734329224,
"learning_rate": 0.001,
"loss": 1.4642,
"step": 245400
},
{
"epoch": 79.34712346477052,
"grad_norm": 1.4256482124328613,
"learning_rate": 0.001,
"loss": 1.4727,
"step": 245500
},
{
"epoch": 79.37944408532644,
"grad_norm": 1.501046895980835,
"learning_rate": 0.001,
"loss": 1.4727,
"step": 245600
},
{
"epoch": 79.41176470588235,
"grad_norm": 1.8273255825042725,
"learning_rate": 0.001,
"loss": 1.4813,
"step": 245700
},
{
"epoch": 79.44408532643827,
"grad_norm": 1.8996268510818481,
"learning_rate": 0.001,
"loss": 1.4857,
"step": 245800
},
{
"epoch": 79.47640594699418,
"grad_norm": 1.5301989316940308,
"learning_rate": 0.001,
"loss": 1.476,
"step": 245900
},
{
"epoch": 79.5087265675501,
"grad_norm": 1.4433398246765137,
"learning_rate": 0.001,
"loss": 1.4826,
"step": 246000
},
{
"epoch": 79.541047188106,
"grad_norm": 1.4371416568756104,
"learning_rate": 0.001,
"loss": 1.4859,
"step": 246100
},
{
"epoch": 79.57336780866193,
"grad_norm": 1.288563847541809,
"learning_rate": 0.001,
"loss": 1.4988,
"step": 246200
},
{
"epoch": 79.60568842921784,
"grad_norm": 1.3247302770614624,
"learning_rate": 0.001,
"loss": 1.4985,
"step": 246300
},
{
"epoch": 79.63800904977376,
"grad_norm": 1.4887481927871704,
"learning_rate": 0.001,
"loss": 1.497,
"step": 246400
},
{
"epoch": 79.67032967032966,
"grad_norm": 1.3735618591308594,
"learning_rate": 0.001,
"loss": 1.5049,
"step": 246500
},
{
"epoch": 79.70265029088559,
"grad_norm": 1.4121897220611572,
"learning_rate": 0.001,
"loss": 1.5014,
"step": 246600
},
{
"epoch": 79.7349709114415,
"grad_norm": 1.568111777305603,
"learning_rate": 0.001,
"loss": 1.5004,
"step": 246700
},
{
"epoch": 79.76729153199742,
"grad_norm": 1.5190857648849487,
"learning_rate": 0.001,
"loss": 1.5016,
"step": 246800
},
{
"epoch": 79.79961215255332,
"grad_norm": 1.287047028541565,
"learning_rate": 0.001,
"loss": 1.5353,
"step": 246900
},
{
"epoch": 79.83193277310924,
"grad_norm": 1.403714656829834,
"learning_rate": 0.001,
"loss": 1.5267,
"step": 247000
},
{
"epoch": 79.86425339366515,
"grad_norm": 1.180440068244934,
"learning_rate": 0.001,
"loss": 1.5313,
"step": 247100
},
{
"epoch": 79.89657401422107,
"grad_norm": 1.1998322010040283,
"learning_rate": 0.001,
"loss": 1.5293,
"step": 247200
},
{
"epoch": 79.92889463477698,
"grad_norm": 1.2172197103500366,
"learning_rate": 0.001,
"loss": 1.5364,
"step": 247300
},
{
"epoch": 79.9612152553329,
"grad_norm": 1.5113470554351807,
"learning_rate": 0.001,
"loss": 1.5482,
"step": 247400
},
{
"epoch": 79.99353587588882,
"grad_norm": 1.7513564825057983,
"learning_rate": 0.001,
"loss": 1.531,
"step": 247500
},
{
"epoch": 80.02585649644473,
"grad_norm": 1.1894452571868896,
"learning_rate": 0.001,
"loss": 1.4383,
"step": 247600
},
{
"epoch": 80.05817711700065,
"grad_norm": 1.2569270133972168,
"learning_rate": 0.001,
"loss": 1.416,
"step": 247700
},
{
"epoch": 80.09049773755656,
"grad_norm": 1.8621593713760376,
"learning_rate": 0.001,
"loss": 1.4129,
"step": 247800
},
{
"epoch": 80.12281835811248,
"grad_norm": 1.2772419452667236,
"learning_rate": 0.001,
"loss": 1.429,
"step": 247900
},
{
"epoch": 80.15513897866839,
"grad_norm": 1.5088549852371216,
"learning_rate": 0.001,
"loss": 1.4407,
"step": 248000
},
{
"epoch": 80.18745959922431,
"grad_norm": 1.2808599472045898,
"learning_rate": 0.001,
"loss": 1.4328,
"step": 248100
},
{
"epoch": 80.21978021978022,
"grad_norm": 1.5279263257980347,
"learning_rate": 0.001,
"loss": 1.4476,
"step": 248200
},
{
"epoch": 80.25210084033614,
"grad_norm": 1.5252046585083008,
"learning_rate": 0.001,
"loss": 1.4425,
"step": 248300
},
{
"epoch": 80.28442146089205,
"grad_norm": 1.759626030921936,
"learning_rate": 0.001,
"loss": 1.4678,
"step": 248400
},
{
"epoch": 80.31674208144797,
"grad_norm": 1.4535683393478394,
"learning_rate": 0.001,
"loss": 1.4482,
"step": 248500
},
{
"epoch": 80.34906270200388,
"grad_norm": 1.4096509218215942,
"learning_rate": 0.001,
"loss": 1.4594,
"step": 248600
},
{
"epoch": 80.3813833225598,
"grad_norm": 1.2100415229797363,
"learning_rate": 0.001,
"loss": 1.4807,
"step": 248700
},
{
"epoch": 80.4137039431157,
"grad_norm": 1.5168434381484985,
"learning_rate": 0.001,
"loss": 1.4731,
"step": 248800
},
{
"epoch": 80.44602456367163,
"grad_norm": 1.6755900382995605,
"learning_rate": 0.001,
"loss": 1.4677,
"step": 248900
},
{
"epoch": 80.47834518422754,
"grad_norm": 1.6561522483825684,
"learning_rate": 0.001,
"loss": 1.4783,
"step": 249000
},
{
"epoch": 80.51066580478346,
"grad_norm": 1.4312539100646973,
"learning_rate": 0.001,
"loss": 1.4754,
"step": 249100
},
{
"epoch": 80.54298642533936,
"grad_norm": 1.3774590492248535,
"learning_rate": 0.001,
"loss": 1.4781,
"step": 249200
},
{
"epoch": 80.57530704589529,
"grad_norm": 1.4128397703170776,
"learning_rate": 0.001,
"loss": 1.4897,
"step": 249300
},
{
"epoch": 80.6076276664512,
"grad_norm": 1.261557698249817,
"learning_rate": 0.001,
"loss": 1.4966,
"step": 249400
},
{
"epoch": 80.63994828700712,
"grad_norm": 1.7835651636123657,
"learning_rate": 0.001,
"loss": 1.4864,
"step": 249500
},
{
"epoch": 80.67226890756302,
"grad_norm": 1.3453576564788818,
"learning_rate": 0.001,
"loss": 1.4925,
"step": 249600
},
{
"epoch": 80.70458952811894,
"grad_norm": 1.269067406654358,
"learning_rate": 0.001,
"loss": 1.4857,
"step": 249700
},
{
"epoch": 80.73691014867485,
"grad_norm": 2.2933781147003174,
"learning_rate": 0.001,
"loss": 1.51,
"step": 249800
},
{
"epoch": 80.76923076923077,
"grad_norm": 1.3368571996688843,
"learning_rate": 0.001,
"loss": 1.496,
"step": 249900
},
{
"epoch": 80.80155138978668,
"grad_norm": 1.5495718717575073,
"learning_rate": 0.001,
"loss": 1.5008,
"step": 250000
},
{
"epoch": 80.8338720103426,
"grad_norm": 1.5590369701385498,
"learning_rate": 0.001,
"loss": 1.5045,
"step": 250100
},
{
"epoch": 80.86619263089851,
"grad_norm": 1.4353771209716797,
"learning_rate": 0.001,
"loss": 1.5153,
"step": 250200
},
{
"epoch": 80.89851325145443,
"grad_norm": 1.0782513618469238,
"learning_rate": 0.001,
"loss": 1.5366,
"step": 250300
},
{
"epoch": 80.93083387201034,
"grad_norm": 1.3433213233947754,
"learning_rate": 0.001,
"loss": 1.5223,
"step": 250400
},
{
"epoch": 80.96315449256626,
"grad_norm": 1.6848622560501099,
"learning_rate": 0.001,
"loss": 1.5359,
"step": 250500
},
{
"epoch": 80.99547511312217,
"grad_norm": 1.3130286931991577,
"learning_rate": 0.001,
"loss": 1.5322,
"step": 250600
},
{
"epoch": 81.02779573367809,
"grad_norm": 1.2962126731872559,
"learning_rate": 0.001,
"loss": 1.4071,
"step": 250700
},
{
"epoch": 81.060116354234,
"grad_norm": 1.4461277723312378,
"learning_rate": 0.001,
"loss": 1.4242,
"step": 250800
},
{
"epoch": 81.09243697478992,
"grad_norm": 1.6120673418045044,
"learning_rate": 0.001,
"loss": 1.4029,
"step": 250900
},
{
"epoch": 81.12475759534583,
"grad_norm": 1.6296929121017456,
"learning_rate": 0.001,
"loss": 1.4263,
"step": 251000
},
{
"epoch": 81.15707821590175,
"grad_norm": 1.6019946336746216,
"learning_rate": 0.001,
"loss": 1.4285,
"step": 251100
},
{
"epoch": 81.18939883645766,
"grad_norm": 1.4875445365905762,
"learning_rate": 0.001,
"loss": 1.4387,
"step": 251200
},
{
"epoch": 81.22171945701358,
"grad_norm": 1.2833311557769775,
"learning_rate": 0.001,
"loss": 1.4167,
"step": 251300
},
{
"epoch": 81.25404007756948,
"grad_norm": 1.8684656620025635,
"learning_rate": 0.001,
"loss": 1.4276,
"step": 251400
},
{
"epoch": 81.2863606981254,
"grad_norm": 1.2863260507583618,
"learning_rate": 0.001,
"loss": 1.4624,
"step": 251500
},
{
"epoch": 81.31868131868131,
"grad_norm": 1.9092422723770142,
"learning_rate": 0.001,
"loss": 1.4576,
"step": 251600
},
{
"epoch": 81.35100193923724,
"grad_norm": 1.2243609428405762,
"learning_rate": 0.001,
"loss": 1.4517,
"step": 251700
},
{
"epoch": 81.38332255979314,
"grad_norm": 1.2571626901626587,
"learning_rate": 0.001,
"loss": 1.4541,
"step": 251800
},
{
"epoch": 81.41564318034906,
"grad_norm": 1.7485051155090332,
"learning_rate": 0.001,
"loss": 1.4439,
"step": 251900
},
{
"epoch": 81.44796380090497,
"grad_norm": 2.036057710647583,
"learning_rate": 0.001,
"loss": 1.4817,
"step": 252000
},
{
"epoch": 81.4802844214609,
"grad_norm": 1.6407066583633423,
"learning_rate": 0.001,
"loss": 1.4732,
"step": 252100
},
{
"epoch": 81.5126050420168,
"grad_norm": 1.3501890897750854,
"learning_rate": 0.001,
"loss": 1.4662,
"step": 252200
},
{
"epoch": 81.54492566257272,
"grad_norm": 1.4880703687667847,
"learning_rate": 0.001,
"loss": 1.4659,
"step": 252300
},
{
"epoch": 81.57724628312863,
"grad_norm": 1.2374626398086548,
"learning_rate": 0.001,
"loss": 1.4867,
"step": 252400
},
{
"epoch": 81.60956690368455,
"grad_norm": 1.559194803237915,
"learning_rate": 0.001,
"loss": 1.4935,
"step": 252500
},
{
"epoch": 81.64188752424046,
"grad_norm": 1.5244473218917847,
"learning_rate": 0.001,
"loss": 1.4731,
"step": 252600
},
{
"epoch": 81.67420814479638,
"grad_norm": 1.5921167135238647,
"learning_rate": 0.001,
"loss": 1.4795,
"step": 252700
},
{
"epoch": 81.70652876535229,
"grad_norm": 1.672304630279541,
"learning_rate": 0.001,
"loss": 1.4872,
"step": 252800
},
{
"epoch": 81.73884938590821,
"grad_norm": 1.6150751113891602,
"learning_rate": 0.001,
"loss": 1.5039,
"step": 252900
},
{
"epoch": 81.77117000646412,
"grad_norm": 1.2743347883224487,
"learning_rate": 0.001,
"loss": 1.4856,
"step": 253000
},
{
"epoch": 81.80349062702004,
"grad_norm": 1.7275660037994385,
"learning_rate": 0.001,
"loss": 1.5095,
"step": 253100
},
{
"epoch": 81.83581124757595,
"grad_norm": 1.778511643409729,
"learning_rate": 0.001,
"loss": 1.5192,
"step": 253200
},
{
"epoch": 81.86813186813187,
"grad_norm": 1.483957052230835,
"learning_rate": 0.001,
"loss": 1.5149,
"step": 253300
},
{
"epoch": 81.90045248868778,
"grad_norm": 1.586846113204956,
"learning_rate": 0.001,
"loss": 1.5079,
"step": 253400
},
{
"epoch": 81.9327731092437,
"grad_norm": 2.0547091960906982,
"learning_rate": 0.001,
"loss": 1.5322,
"step": 253500
},
{
"epoch": 81.9650937297996,
"grad_norm": 1.5635367631912231,
"learning_rate": 0.001,
"loss": 1.5191,
"step": 253600
},
{
"epoch": 81.99741435035553,
"grad_norm": 1.4796775579452515,
"learning_rate": 0.001,
"loss": 1.5176,
"step": 253700
},
{
"epoch": 82.02973497091145,
"grad_norm": 1.479210615158081,
"learning_rate": 0.001,
"loss": 1.4066,
"step": 253800
},
{
"epoch": 82.06205559146736,
"grad_norm": 1.9869581460952759,
"learning_rate": 0.001,
"loss": 1.404,
"step": 253900
},
{
"epoch": 82.09437621202328,
"grad_norm": 1.41694176197052,
"learning_rate": 0.001,
"loss": 1.4167,
"step": 254000
},
{
"epoch": 82.12669683257919,
"grad_norm": 2.2012135982513428,
"learning_rate": 0.001,
"loss": 1.4148,
"step": 254100
},
{
"epoch": 82.1590174531351,
"grad_norm": 1.582972526550293,
"learning_rate": 0.001,
"loss": 1.425,
"step": 254200
},
{
"epoch": 82.19133807369101,
"grad_norm": 1.4067134857177734,
"learning_rate": 0.001,
"loss": 1.4294,
"step": 254300
},
{
"epoch": 82.22365869424694,
"grad_norm": 1.7295600175857544,
"learning_rate": 0.001,
"loss": 1.4232,
"step": 254400
},
{
"epoch": 82.25597931480284,
"grad_norm": 1.199151635169983,
"learning_rate": 0.001,
"loss": 1.4323,
"step": 254500
},
{
"epoch": 82.28829993535876,
"grad_norm": 1.210278034210205,
"learning_rate": 0.001,
"loss": 1.4329,
"step": 254600
},
{
"epoch": 82.32062055591467,
"grad_norm": 1.3613992929458618,
"learning_rate": 0.001,
"loss": 1.4363,
"step": 254700
},
{
"epoch": 82.3529411764706,
"grad_norm": 2.0656394958496094,
"learning_rate": 0.001,
"loss": 1.4495,
"step": 254800
},
{
"epoch": 82.3852617970265,
"grad_norm": 1.796547293663025,
"learning_rate": 0.001,
"loss": 1.4605,
"step": 254900
},
{
"epoch": 82.41758241758242,
"grad_norm": 1.6070237159729004,
"learning_rate": 0.001,
"loss": 1.4455,
"step": 255000
},
{
"epoch": 82.44990303813833,
"grad_norm": 1.657780408859253,
"learning_rate": 0.001,
"loss": 1.4424,
"step": 255100
},
{
"epoch": 82.48222365869425,
"grad_norm": 1.9245177507400513,
"learning_rate": 0.001,
"loss": 1.4549,
"step": 255200
},
{
"epoch": 82.51454427925016,
"grad_norm": 2.279123544692993,
"learning_rate": 0.001,
"loss": 1.468,
"step": 255300
},
{
"epoch": 82.54686489980608,
"grad_norm": 1.8537899255752563,
"learning_rate": 0.001,
"loss": 1.4766,
"step": 255400
},
{
"epoch": 82.57918552036199,
"grad_norm": 1.2981986999511719,
"learning_rate": 0.001,
"loss": 1.4577,
"step": 255500
},
{
"epoch": 82.61150614091791,
"grad_norm": 1.694447636604309,
"learning_rate": 0.001,
"loss": 1.4843,
"step": 255600
},
{
"epoch": 82.64382676147382,
"grad_norm": 2.3249664306640625,
"learning_rate": 0.001,
"loss": 1.4704,
"step": 255700
},
{
"epoch": 82.67614738202974,
"grad_norm": 1.9651449918746948,
"learning_rate": 0.001,
"loss": 1.4804,
"step": 255800
},
{
"epoch": 82.70846800258565,
"grad_norm": 1.6320222616195679,
"learning_rate": 0.001,
"loss": 1.4993,
"step": 255900
},
{
"epoch": 82.74078862314157,
"grad_norm": 1.9455903768539429,
"learning_rate": 0.001,
"loss": 1.4862,
"step": 256000
},
{
"epoch": 82.77310924369748,
"grad_norm": 1.7584723234176636,
"learning_rate": 0.001,
"loss": 1.4873,
"step": 256100
},
{
"epoch": 82.8054298642534,
"grad_norm": 1.4015384912490845,
"learning_rate": 0.001,
"loss": 1.4884,
"step": 256200
},
{
"epoch": 82.8377504848093,
"grad_norm": 1.6554213762283325,
"learning_rate": 0.001,
"loss": 1.4922,
"step": 256300
},
{
"epoch": 82.87007110536523,
"grad_norm": 2.2378883361816406,
"learning_rate": 0.001,
"loss": 1.4975,
"step": 256400
},
{
"epoch": 82.90239172592113,
"grad_norm": 1.633915662765503,
"learning_rate": 0.001,
"loss": 1.5221,
"step": 256500
},
{
"epoch": 82.93471234647706,
"grad_norm": 1.896621823310852,
"learning_rate": 0.001,
"loss": 1.5058,
"step": 256600
},
{
"epoch": 82.96703296703296,
"grad_norm": 1.408779263496399,
"learning_rate": 0.001,
"loss": 1.5066,
"step": 256700
},
{
"epoch": 82.99935358758889,
"grad_norm": 1.6087642908096313,
"learning_rate": 0.001,
"loss": 1.4986,
"step": 256800
},
{
"epoch": 83.03167420814479,
"grad_norm": 1.8113195896148682,
"learning_rate": 0.001,
"loss": 1.3956,
"step": 256900
},
{
"epoch": 83.06399482870071,
"grad_norm": 1.6769685745239258,
"learning_rate": 0.001,
"loss": 1.3916,
"step": 257000
},
{
"epoch": 83.09631544925662,
"grad_norm": 1.9285463094711304,
"learning_rate": 0.001,
"loss": 1.4024,
"step": 257100
},
{
"epoch": 83.12863606981254,
"grad_norm": 1.453759789466858,
"learning_rate": 0.001,
"loss": 1.4032,
"step": 257200
},
{
"epoch": 83.16095669036845,
"grad_norm": 1.9637484550476074,
"learning_rate": 0.001,
"loss": 1.4126,
"step": 257300
},
{
"epoch": 83.19327731092437,
"grad_norm": 2.4130241870880127,
"learning_rate": 0.001,
"loss": 1.4154,
"step": 257400
},
{
"epoch": 83.22559793148028,
"grad_norm": 1.6804895401000977,
"learning_rate": 0.001,
"loss": 1.4111,
"step": 257500
},
{
"epoch": 83.2579185520362,
"grad_norm": 1.816179871559143,
"learning_rate": 0.001,
"loss": 1.4297,
"step": 257600
},
{
"epoch": 83.29023917259211,
"grad_norm": 2.7931365966796875,
"learning_rate": 0.001,
"loss": 1.4333,
"step": 257700
},
{
"epoch": 83.32255979314803,
"grad_norm": 2.2509469985961914,
"learning_rate": 0.001,
"loss": 1.4247,
"step": 257800
},
{
"epoch": 83.35488041370394,
"grad_norm": 1.733638882637024,
"learning_rate": 0.001,
"loss": 1.4293,
"step": 257900
},
{
"epoch": 83.38720103425986,
"grad_norm": 2.0615646839141846,
"learning_rate": 0.001,
"loss": 1.4304,
"step": 258000
},
{
"epoch": 83.41952165481577,
"grad_norm": 1.8422954082489014,
"learning_rate": 0.001,
"loss": 1.4391,
"step": 258100
},
{
"epoch": 83.45184227537169,
"grad_norm": 2.084277391433716,
"learning_rate": 0.001,
"loss": 1.455,
"step": 258200
},
{
"epoch": 83.4841628959276,
"grad_norm": 1.990598440170288,
"learning_rate": 0.001,
"loss": 1.4575,
"step": 258300
},
{
"epoch": 83.51648351648352,
"grad_norm": 1.9928582906723022,
"learning_rate": 0.001,
"loss": 1.4622,
"step": 258400
},
{
"epoch": 83.54880413703943,
"grad_norm": 1.6820632219314575,
"learning_rate": 0.001,
"loss": 1.4477,
"step": 258500
},
{
"epoch": 83.58112475759535,
"grad_norm": 1.4622422456741333,
"learning_rate": 0.001,
"loss": 1.4602,
"step": 258600
},
{
"epoch": 83.61344537815125,
"grad_norm": 1.9164870977401733,
"learning_rate": 0.001,
"loss": 1.4846,
"step": 258700
},
{
"epoch": 83.64576599870718,
"grad_norm": 2.162792921066284,
"learning_rate": 0.001,
"loss": 1.48,
"step": 258800
},
{
"epoch": 83.67808661926308,
"grad_norm": 2.626990795135498,
"learning_rate": 0.001,
"loss": 1.4729,
"step": 258900
},
{
"epoch": 83.710407239819,
"grad_norm": 1.8493727445602417,
"learning_rate": 0.001,
"loss": 1.4754,
"step": 259000
},
{
"epoch": 83.74272786037491,
"grad_norm": 2.3368582725524902,
"learning_rate": 0.001,
"loss": 1.4843,
"step": 259100
},
{
"epoch": 83.77504848093083,
"grad_norm": 2.14288067817688,
"learning_rate": 0.001,
"loss": 1.4815,
"step": 259200
},
{
"epoch": 83.80736910148674,
"grad_norm": 2.0774693489074707,
"learning_rate": 0.001,
"loss": 1.4933,
"step": 259300
},
{
"epoch": 83.83968972204266,
"grad_norm": 2.4030613899230957,
"learning_rate": 0.001,
"loss": 1.4974,
"step": 259400
},
{
"epoch": 83.87201034259857,
"grad_norm": 1.7850096225738525,
"learning_rate": 0.001,
"loss": 1.4967,
"step": 259500
},
{
"epoch": 83.9043309631545,
"grad_norm": 1.7380095720291138,
"learning_rate": 0.001,
"loss": 1.4988,
"step": 259600
},
{
"epoch": 83.9366515837104,
"grad_norm": 1.620004653930664,
"learning_rate": 0.001,
"loss": 1.5072,
"step": 259700
},
{
"epoch": 83.96897220426632,
"grad_norm": 2.2414209842681885,
"learning_rate": 0.001,
"loss": 1.5217,
"step": 259800
},
{
"epoch": 84.00129282482224,
"grad_norm": 1.7334810495376587,
"learning_rate": 0.001,
"loss": 1.4948,
"step": 259900
},
{
"epoch": 84.03361344537815,
"grad_norm": 1.9263193607330322,
"learning_rate": 0.001,
"loss": 1.3888,
"step": 260000
},
{
"epoch": 84.06593406593407,
"grad_norm": 1.596864104270935,
"learning_rate": 0.001,
"loss": 1.3919,
"step": 260100
},
{
"epoch": 84.09825468648998,
"grad_norm": 1.529685378074646,
"learning_rate": 0.001,
"loss": 1.3771,
"step": 260200
},
{
"epoch": 84.1305753070459,
"grad_norm": 1.3261326551437378,
"learning_rate": 0.001,
"loss": 1.4221,
"step": 260300
},
{
"epoch": 84.16289592760181,
"grad_norm": 1.412488579750061,
"learning_rate": 0.001,
"loss": 1.4084,
"step": 260400
},
{
"epoch": 84.19521654815773,
"grad_norm": 1.5648341178894043,
"learning_rate": 0.001,
"loss": 1.4197,
"step": 260500
},
{
"epoch": 84.22753716871364,
"grad_norm": 1.4410159587860107,
"learning_rate": 0.001,
"loss": 1.4181,
"step": 260600
},
{
"epoch": 84.25985778926956,
"grad_norm": 2.8147940635681152,
"learning_rate": 0.001,
"loss": 1.4123,
"step": 260700
},
{
"epoch": 84.29217840982547,
"grad_norm": 1.6910955905914307,
"learning_rate": 0.001,
"loss": 1.4099,
"step": 260800
},
{
"epoch": 84.32449903038139,
"grad_norm": 1.6648961305618286,
"learning_rate": 0.001,
"loss": 1.4261,
"step": 260900
},
{
"epoch": 84.3568196509373,
"grad_norm": 3.2388651371002197,
"learning_rate": 0.001,
"loss": 1.449,
"step": 261000
},
{
"epoch": 84.38914027149322,
"grad_norm": 1.9411053657531738,
"learning_rate": 0.001,
"loss": 1.4306,
"step": 261100
},
{
"epoch": 84.42146089204913,
"grad_norm": 1.7887365818023682,
"learning_rate": 0.001,
"loss": 1.4554,
"step": 261200
},
{
"epoch": 84.45378151260505,
"grad_norm": 2.1713664531707764,
"learning_rate": 0.001,
"loss": 1.4414,
"step": 261300
},
{
"epoch": 84.48610213316095,
"grad_norm": 1.6247268915176392,
"learning_rate": 0.001,
"loss": 1.4438,
"step": 261400
},
{
"epoch": 84.51842275371688,
"grad_norm": 1.6007441282272339,
"learning_rate": 0.001,
"loss": 1.4546,
"step": 261500
},
{
"epoch": 84.55074337427278,
"grad_norm": 1.7280800342559814,
"learning_rate": 0.001,
"loss": 1.4641,
"step": 261600
},
{
"epoch": 84.5830639948287,
"grad_norm": 1.4836840629577637,
"learning_rate": 0.001,
"loss": 1.4621,
"step": 261700
},
{
"epoch": 84.61538461538461,
"grad_norm": 1.3228708505630493,
"learning_rate": 0.001,
"loss": 1.4535,
"step": 261800
},
{
"epoch": 84.64770523594053,
"grad_norm": 1.2039247751235962,
"learning_rate": 0.001,
"loss": 1.4504,
"step": 261900
},
{
"epoch": 84.68002585649644,
"grad_norm": 1.3510963916778564,
"learning_rate": 0.001,
"loss": 1.4611,
"step": 262000
},
{
"epoch": 84.71234647705236,
"grad_norm": 1.8786565065383911,
"learning_rate": 0.001,
"loss": 1.4809,
"step": 262100
},
{
"epoch": 84.74466709760827,
"grad_norm": 2.283278226852417,
"learning_rate": 0.001,
"loss": 1.4757,
"step": 262200
},
{
"epoch": 84.7769877181642,
"grad_norm": 1.2619625329971313,
"learning_rate": 0.001,
"loss": 1.4844,
"step": 262300
},
{
"epoch": 84.8093083387201,
"grad_norm": 1.4168131351470947,
"learning_rate": 0.001,
"loss": 1.4884,
"step": 262400
},
{
"epoch": 84.84162895927602,
"grad_norm": 1.4280415773391724,
"learning_rate": 0.001,
"loss": 1.4846,
"step": 262500
},
{
"epoch": 84.87394957983193,
"grad_norm": 2.1055214405059814,
"learning_rate": 0.001,
"loss": 1.4845,
"step": 262600
},
{
"epoch": 84.90627020038785,
"grad_norm": 2.787269353866577,
"learning_rate": 0.001,
"loss": 1.4872,
"step": 262700
},
{
"epoch": 84.93859082094376,
"grad_norm": 1.399965524673462,
"learning_rate": 0.001,
"loss": 1.5049,
"step": 262800
},
{
"epoch": 84.97091144149968,
"grad_norm": 1.5080381631851196,
"learning_rate": 0.001,
"loss": 1.5069,
"step": 262900
},
{
"epoch": 85.00323206205559,
"grad_norm": 2.1978836059570312,
"learning_rate": 0.001,
"loss": 1.4949,
"step": 263000
},
{
"epoch": 85.03555268261151,
"grad_norm": 1.3249891996383667,
"learning_rate": 0.001,
"loss": 1.37,
"step": 263100
},
{
"epoch": 85.06787330316742,
"grad_norm": 1.6576632261276245,
"learning_rate": 0.001,
"loss": 1.3804,
"step": 263200
},
{
"epoch": 85.10019392372334,
"grad_norm": 1.4127508401870728,
"learning_rate": 0.001,
"loss": 1.3937,
"step": 263300
},
{
"epoch": 85.13251454427925,
"grad_norm": 1.4045134782791138,
"learning_rate": 0.001,
"loss": 1.3971,
"step": 263400
},
{
"epoch": 85.16483516483517,
"grad_norm": 1.5815248489379883,
"learning_rate": 0.001,
"loss": 1.4044,
"step": 263500
},
{
"epoch": 85.19715578539108,
"grad_norm": 1.9947961568832397,
"learning_rate": 0.001,
"loss": 1.3958,
"step": 263600
},
{
"epoch": 85.229476405947,
"grad_norm": 1.3920848369598389,
"learning_rate": 0.001,
"loss": 1.401,
"step": 263700
},
{
"epoch": 85.2617970265029,
"grad_norm": 1.7426060438156128,
"learning_rate": 0.001,
"loss": 1.4183,
"step": 263800
},
{
"epoch": 85.29411764705883,
"grad_norm": 1.9438815116882324,
"learning_rate": 0.001,
"loss": 1.4087,
"step": 263900
},
{
"epoch": 85.32643826761473,
"grad_norm": 3.285012722015381,
"learning_rate": 0.001,
"loss": 1.4252,
"step": 264000
},
{
"epoch": 85.35875888817066,
"grad_norm": 1.551334261894226,
"learning_rate": 0.001,
"loss": 1.4356,
"step": 264100
},
{
"epoch": 85.39107950872656,
"grad_norm": 1.8503215312957764,
"learning_rate": 0.001,
"loss": 1.4199,
"step": 264200
},
{
"epoch": 85.42340012928248,
"grad_norm": 1.434235692024231,
"learning_rate": 0.001,
"loss": 1.4396,
"step": 264300
},
{
"epoch": 85.45572074983839,
"grad_norm": 1.1429784297943115,
"learning_rate": 0.001,
"loss": 1.4341,
"step": 264400
},
{
"epoch": 85.48804137039431,
"grad_norm": 1.928701639175415,
"learning_rate": 0.001,
"loss": 1.453,
"step": 264500
},
{
"epoch": 85.52036199095022,
"grad_norm": 1.24684476852417,
"learning_rate": 0.001,
"loss": 1.4297,
"step": 264600
},
{
"epoch": 85.55268261150614,
"grad_norm": 1.9832147359848022,
"learning_rate": 0.001,
"loss": 1.4598,
"step": 264700
},
{
"epoch": 85.58500323206205,
"grad_norm": 1.3334790468215942,
"learning_rate": 0.001,
"loss": 1.4518,
"step": 264800
},
{
"epoch": 85.61732385261797,
"grad_norm": 1.4837751388549805,
"learning_rate": 0.001,
"loss": 1.4636,
"step": 264900
},
{
"epoch": 85.64964447317388,
"grad_norm": 1.547782063484192,
"learning_rate": 0.001,
"loss": 1.4695,
"step": 265000
},
{
"epoch": 85.6819650937298,
"grad_norm": 1.674836277961731,
"learning_rate": 0.001,
"loss": 1.4553,
"step": 265100
},
{
"epoch": 85.71428571428571,
"grad_norm": 2.6639299392700195,
"learning_rate": 0.001,
"loss": 1.4768,
"step": 265200
},
{
"epoch": 85.74660633484163,
"grad_norm": 1.4166233539581299,
"learning_rate": 0.001,
"loss": 1.4565,
"step": 265300
},
{
"epoch": 85.77892695539754,
"grad_norm": 1.1681371927261353,
"learning_rate": 0.001,
"loss": 1.46,
"step": 265400
},
{
"epoch": 85.81124757595346,
"grad_norm": 2.1296963691711426,
"learning_rate": 0.001,
"loss": 1.4889,
"step": 265500
},
{
"epoch": 85.84356819650937,
"grad_norm": 1.5797781944274902,
"learning_rate": 0.001,
"loss": 1.4638,
"step": 265600
},
{
"epoch": 85.87588881706529,
"grad_norm": 1.7881137132644653,
"learning_rate": 0.001,
"loss": 1.4752,
"step": 265700
},
{
"epoch": 85.9082094376212,
"grad_norm": 1.2332497835159302,
"learning_rate": 0.001,
"loss": 1.4873,
"step": 265800
},
{
"epoch": 85.94053005817712,
"grad_norm": 1.7854421138763428,
"learning_rate": 0.001,
"loss": 1.4971,
"step": 265900
},
{
"epoch": 85.97285067873302,
"grad_norm": 1.4832534790039062,
"learning_rate": 0.001,
"loss": 1.4837,
"step": 266000
},
{
"epoch": 86.00517129928895,
"grad_norm": 1.3114742040634155,
"learning_rate": 0.001,
"loss": 1.4811,
"step": 266100
},
{
"epoch": 86.03749191984487,
"grad_norm": 1.5295010805130005,
"learning_rate": 0.001,
"loss": 1.3754,
"step": 266200
},
{
"epoch": 86.06981254040078,
"grad_norm": 1.905587077140808,
"learning_rate": 0.001,
"loss": 1.3851,
"step": 266300
},
{
"epoch": 86.1021331609567,
"grad_norm": 1.2385417222976685,
"learning_rate": 0.001,
"loss": 1.3739,
"step": 266400
},
{
"epoch": 86.1344537815126,
"grad_norm": 2.0491955280303955,
"learning_rate": 0.001,
"loss": 1.3927,
"step": 266500
},
{
"epoch": 86.16677440206853,
"grad_norm": 1.4316233396530151,
"learning_rate": 0.001,
"loss": 1.3902,
"step": 266600
},
{
"epoch": 86.19909502262443,
"grad_norm": 2.0543875694274902,
"learning_rate": 0.001,
"loss": 1.3802,
"step": 266700
},
{
"epoch": 86.23141564318036,
"grad_norm": 1.4741928577423096,
"learning_rate": 0.001,
"loss": 1.4082,
"step": 266800
},
{
"epoch": 86.26373626373626,
"grad_norm": 1.246631383895874,
"learning_rate": 0.001,
"loss": 1.4175,
"step": 266900
},
{
"epoch": 86.29605688429218,
"grad_norm": 1.2465566396713257,
"learning_rate": 0.001,
"loss": 1.4193,
"step": 267000
},
{
"epoch": 86.32837750484809,
"grad_norm": 2.4174885749816895,
"learning_rate": 0.001,
"loss": 1.4122,
"step": 267100
},
{
"epoch": 86.36069812540401,
"grad_norm": 2.284865140914917,
"learning_rate": 0.001,
"loss": 1.4077,
"step": 267200
},
{
"epoch": 86.39301874595992,
"grad_norm": 1.5470408201217651,
"learning_rate": 0.001,
"loss": 1.4288,
"step": 267300
},
{
"epoch": 86.42533936651584,
"grad_norm": 1.2510464191436768,
"learning_rate": 0.001,
"loss": 1.4268,
"step": 267400
},
{
"epoch": 86.45765998707175,
"grad_norm": 1.6958582401275635,
"learning_rate": 0.001,
"loss": 1.4252,
"step": 267500
},
{
"epoch": 86.48998060762767,
"grad_norm": 1.337631106376648,
"learning_rate": 0.001,
"loss": 1.4409,
"step": 267600
},
{
"epoch": 86.52230122818358,
"grad_norm": 2.136993646621704,
"learning_rate": 0.001,
"loss": 1.4443,
"step": 267700
},
{
"epoch": 86.5546218487395,
"grad_norm": 1.556563377380371,
"learning_rate": 0.001,
"loss": 1.4458,
"step": 267800
},
{
"epoch": 86.58694246929541,
"grad_norm": 1.3264302015304565,
"learning_rate": 0.001,
"loss": 1.4506,
"step": 267900
},
{
"epoch": 86.61926308985133,
"grad_norm": 1.9455862045288086,
"learning_rate": 0.001,
"loss": 1.4437,
"step": 268000
},
{
"epoch": 86.65158371040724,
"grad_norm": 1.4218875169754028,
"learning_rate": 0.001,
"loss": 1.4613,
"step": 268100
},
{
"epoch": 86.68390433096316,
"grad_norm": 1.6667139530181885,
"learning_rate": 0.001,
"loss": 1.4553,
"step": 268200
},
{
"epoch": 86.71622495151907,
"grad_norm": 1.3904166221618652,
"learning_rate": 0.001,
"loss": 1.4655,
"step": 268300
},
{
"epoch": 86.74854557207499,
"grad_norm": 1.9042881727218628,
"learning_rate": 0.001,
"loss": 1.4657,
"step": 268400
},
{
"epoch": 86.7808661926309,
"grad_norm": 1.1454037427902222,
"learning_rate": 0.001,
"loss": 1.4521,
"step": 268500
},
{
"epoch": 86.81318681318682,
"grad_norm": 1.6706260442733765,
"learning_rate": 0.001,
"loss": 1.4817,
"step": 268600
},
{
"epoch": 86.84550743374272,
"grad_norm": 1.468371033668518,
"learning_rate": 0.001,
"loss": 1.4599,
"step": 268700
},
{
"epoch": 86.87782805429865,
"grad_norm": 1.1414262056350708,
"learning_rate": 0.001,
"loss": 1.4702,
"step": 268800
},
{
"epoch": 86.91014867485455,
"grad_norm": 1.4835313558578491,
"learning_rate": 0.001,
"loss": 1.4802,
"step": 268900
},
{
"epoch": 86.94246929541048,
"grad_norm": 1.785538673400879,
"learning_rate": 0.001,
"loss": 1.4688,
"step": 269000
},
{
"epoch": 86.97478991596638,
"grad_norm": 1.1166988611221313,
"learning_rate": 0.001,
"loss": 1.4886,
"step": 269100
},
{
"epoch": 87.0071105365223,
"grad_norm": 1.2215816974639893,
"learning_rate": 0.001,
"loss": 1.4617,
"step": 269200
},
{
"epoch": 87.03943115707821,
"grad_norm": 1.267557978630066,
"learning_rate": 0.001,
"loss": 1.3595,
"step": 269300
},
{
"epoch": 87.07175177763413,
"grad_norm": 1.3655972480773926,
"learning_rate": 0.001,
"loss": 1.3799,
"step": 269400
},
{
"epoch": 87.10407239819004,
"grad_norm": 1.4063516855239868,
"learning_rate": 0.001,
"loss": 1.3841,
"step": 269500
},
{
"epoch": 87.13639301874596,
"grad_norm": 1.688720941543579,
"learning_rate": 0.001,
"loss": 1.3949,
"step": 269600
},
{
"epoch": 87.16871363930187,
"grad_norm": 2.142454147338867,
"learning_rate": 0.001,
"loss": 1.372,
"step": 269700
},
{
"epoch": 87.20103425985779,
"grad_norm": 1.5391592979431152,
"learning_rate": 0.001,
"loss": 1.3894,
"step": 269800
},
{
"epoch": 87.2333548804137,
"grad_norm": 1.9259381294250488,
"learning_rate": 0.001,
"loss": 1.393,
"step": 269900
},
{
"epoch": 87.26567550096962,
"grad_norm": 1.3825037479400635,
"learning_rate": 0.001,
"loss": 1.3998,
"step": 270000
},
{
"epoch": 87.29799612152553,
"grad_norm": 1.6389166116714478,
"learning_rate": 0.001,
"loss": 1.4141,
"step": 270100
},
{
"epoch": 87.33031674208145,
"grad_norm": 1.6546417474746704,
"learning_rate": 0.001,
"loss": 1.4115,
"step": 270200
},
{
"epoch": 87.36263736263736,
"grad_norm": 1.4968101978302002,
"learning_rate": 0.001,
"loss": 1.4083,
"step": 270300
},
{
"epoch": 87.39495798319328,
"grad_norm": 1.302238941192627,
"learning_rate": 0.001,
"loss": 1.4143,
"step": 270400
},
{
"epoch": 87.42727860374919,
"grad_norm": 1.899917721748352,
"learning_rate": 0.001,
"loss": 1.4216,
"step": 270500
},
{
"epoch": 87.45959922430511,
"grad_norm": 1.4192540645599365,
"learning_rate": 0.001,
"loss": 1.4287,
"step": 270600
},
{
"epoch": 87.49191984486102,
"grad_norm": 1.4450047016143799,
"learning_rate": 0.001,
"loss": 1.4465,
"step": 270700
},
{
"epoch": 87.52424046541694,
"grad_norm": 1.1823523044586182,
"learning_rate": 0.001,
"loss": 1.4158,
"step": 270800
},
{
"epoch": 87.55656108597285,
"grad_norm": 1.489056944847107,
"learning_rate": 0.001,
"loss": 1.4336,
"step": 270900
},
{
"epoch": 87.58888170652877,
"grad_norm": 1.1466692686080933,
"learning_rate": 0.001,
"loss": 1.4453,
"step": 271000
},
{
"epoch": 87.62120232708467,
"grad_norm": 1.4787135124206543,
"learning_rate": 0.001,
"loss": 1.447,
"step": 271100
},
{
"epoch": 87.6535229476406,
"grad_norm": 1.5493061542510986,
"learning_rate": 0.001,
"loss": 1.4309,
"step": 271200
},
{
"epoch": 87.6858435681965,
"grad_norm": 1.8036699295043945,
"learning_rate": 0.001,
"loss": 1.449,
"step": 271300
},
{
"epoch": 87.71816418875243,
"grad_norm": 1.628659725189209,
"learning_rate": 0.001,
"loss": 1.4558,
"step": 271400
},
{
"epoch": 87.75048480930833,
"grad_norm": 2.0171215534210205,
"learning_rate": 0.001,
"loss": 1.4451,
"step": 271500
},
{
"epoch": 87.78280542986425,
"grad_norm": 1.3943383693695068,
"learning_rate": 0.001,
"loss": 1.4518,
"step": 271600
},
{
"epoch": 87.81512605042016,
"grad_norm": 1.1058683395385742,
"learning_rate": 0.001,
"loss": 1.4634,
"step": 271700
},
{
"epoch": 87.84744667097608,
"grad_norm": 1.244297981262207,
"learning_rate": 0.001,
"loss": 1.4672,
"step": 271800
},
{
"epoch": 87.87976729153199,
"grad_norm": 1.8520456552505493,
"learning_rate": 0.001,
"loss": 1.4765,
"step": 271900
},
{
"epoch": 87.91208791208791,
"grad_norm": 1.0921056270599365,
"learning_rate": 0.001,
"loss": 1.4854,
"step": 272000
},
{
"epoch": 87.94440853264382,
"grad_norm": 1.8864128589630127,
"learning_rate": 0.001,
"loss": 1.4814,
"step": 272100
},
{
"epoch": 87.97672915319974,
"grad_norm": 1.423569679260254,
"learning_rate": 0.001,
"loss": 1.4885,
"step": 272200
},
{
"epoch": 88.00904977375566,
"grad_norm": 1.6555652618408203,
"learning_rate": 0.001,
"loss": 1.4544,
"step": 272300
},
{
"epoch": 88.04137039431157,
"grad_norm": 1.9587502479553223,
"learning_rate": 0.001,
"loss": 1.3591,
"step": 272400
},
{
"epoch": 88.07369101486749,
"grad_norm": 1.5171904563903809,
"learning_rate": 0.001,
"loss": 1.3578,
"step": 272500
},
{
"epoch": 88.1060116354234,
"grad_norm": 1.5223625898361206,
"learning_rate": 0.001,
"loss": 1.3711,
"step": 272600
},
{
"epoch": 88.13833225597932,
"grad_norm": 1.2753205299377441,
"learning_rate": 0.001,
"loss": 1.3702,
"step": 272700
},
{
"epoch": 88.17065287653523,
"grad_norm": 1.760709285736084,
"learning_rate": 0.001,
"loss": 1.3807,
"step": 272800
},
{
"epoch": 88.20297349709115,
"grad_norm": 1.6494214534759521,
"learning_rate": 0.001,
"loss": 1.3763,
"step": 272900
},
{
"epoch": 88.23529411764706,
"grad_norm": 1.3722658157348633,
"learning_rate": 0.001,
"loss": 1.3851,
"step": 273000
},
{
"epoch": 88.26761473820298,
"grad_norm": 1.349332571029663,
"learning_rate": 0.001,
"loss": 1.4007,
"step": 273100
},
{
"epoch": 88.29993535875889,
"grad_norm": 1.384621262550354,
"learning_rate": 0.001,
"loss": 1.4118,
"step": 273200
},
{
"epoch": 88.33225597931481,
"grad_norm": 1.5978822708129883,
"learning_rate": 0.001,
"loss": 1.4077,
"step": 273300
},
{
"epoch": 88.36457659987072,
"grad_norm": 1.614707350730896,
"learning_rate": 0.001,
"loss": 1.4044,
"step": 273400
},
{
"epoch": 88.39689722042664,
"grad_norm": 1.2889578342437744,
"learning_rate": 0.001,
"loss": 1.4159,
"step": 273500
},
{
"epoch": 88.42921784098255,
"grad_norm": 1.2051821947097778,
"learning_rate": 0.001,
"loss": 1.4153,
"step": 273600
},
{
"epoch": 88.46153846153847,
"grad_norm": 1.505126953125,
"learning_rate": 0.001,
"loss": 1.4146,
"step": 273700
},
{
"epoch": 88.49385908209437,
"grad_norm": 1.3672128915786743,
"learning_rate": 0.001,
"loss": 1.4233,
"step": 273800
},
{
"epoch": 88.5261797026503,
"grad_norm": 1.5844954252243042,
"learning_rate": 0.001,
"loss": 1.4354,
"step": 273900
},
{
"epoch": 88.5585003232062,
"grad_norm": 1.1671549081802368,
"learning_rate": 0.001,
"loss": 1.4261,
"step": 274000
},
{
"epoch": 88.59082094376213,
"grad_norm": 1.7736015319824219,
"learning_rate": 0.001,
"loss": 1.4343,
"step": 274100
},
{
"epoch": 88.62314156431803,
"grad_norm": 1.6177502870559692,
"learning_rate": 0.001,
"loss": 1.4385,
"step": 274200
},
{
"epoch": 88.65546218487395,
"grad_norm": 1.4046214818954468,
"learning_rate": 0.001,
"loss": 1.4426,
"step": 274300
},
{
"epoch": 88.68778280542986,
"grad_norm": 1.4468072652816772,
"learning_rate": 0.001,
"loss": 1.4563,
"step": 274400
},
{
"epoch": 88.72010342598578,
"grad_norm": 1.2877278327941895,
"learning_rate": 0.001,
"loss": 1.4573,
"step": 274500
},
{
"epoch": 88.75242404654169,
"grad_norm": 1.874245285987854,
"learning_rate": 0.001,
"loss": 1.437,
"step": 274600
},
{
"epoch": 88.78474466709761,
"grad_norm": 1.740172266960144,
"learning_rate": 0.001,
"loss": 1.4548,
"step": 274700
},
{
"epoch": 88.81706528765352,
"grad_norm": 2.0147006511688232,
"learning_rate": 0.001,
"loss": 1.4542,
"step": 274800
},
{
"epoch": 88.84938590820944,
"grad_norm": 2.030195713043213,
"learning_rate": 0.001,
"loss": 1.4624,
"step": 274900
},
{
"epoch": 88.88170652876535,
"grad_norm": 1.3581018447875977,
"learning_rate": 0.001,
"loss": 1.4679,
"step": 275000
},
{
"epoch": 88.91402714932127,
"grad_norm": 1.2543132305145264,
"learning_rate": 0.001,
"loss": 1.4649,
"step": 275100
},
{
"epoch": 88.94634776987718,
"grad_norm": 1.4189682006835938,
"learning_rate": 0.001,
"loss": 1.4535,
"step": 275200
},
{
"epoch": 88.9786683904331,
"grad_norm": 2.033797264099121,
"learning_rate": 0.001,
"loss": 1.4647,
"step": 275300
},
{
"epoch": 89.01098901098901,
"grad_norm": 1.2707444429397583,
"learning_rate": 0.001,
"loss": 1.4318,
"step": 275400
},
{
"epoch": 89.04330963154493,
"grad_norm": 1.6407828330993652,
"learning_rate": 0.001,
"loss": 1.3663,
"step": 275500
},
{
"epoch": 89.07563025210084,
"grad_norm": 1.7446123361587524,
"learning_rate": 0.001,
"loss": 1.3561,
"step": 275600
},
{
"epoch": 89.10795087265676,
"grad_norm": 1.7070893049240112,
"learning_rate": 0.001,
"loss": 1.3785,
"step": 275700
},
{
"epoch": 89.14027149321267,
"grad_norm": 1.4096546173095703,
"learning_rate": 0.001,
"loss": 1.3707,
"step": 275800
},
{
"epoch": 89.17259211376859,
"grad_norm": 1.5572234392166138,
"learning_rate": 0.001,
"loss": 1.3817,
"step": 275900
},
{
"epoch": 89.2049127343245,
"grad_norm": 1.4868851900100708,
"learning_rate": 0.001,
"loss": 1.387,
"step": 276000
},
{
"epoch": 89.23723335488042,
"grad_norm": 1.7338111400604248,
"learning_rate": 0.001,
"loss": 1.3882,
"step": 276100
},
{
"epoch": 89.26955397543632,
"grad_norm": 1.4389114379882812,
"learning_rate": 0.001,
"loss": 1.3906,
"step": 276200
},
{
"epoch": 89.30187459599225,
"grad_norm": 1.546191692352295,
"learning_rate": 0.001,
"loss": 1.3786,
"step": 276300
},
{
"epoch": 89.33419521654815,
"grad_norm": 1.2728110551834106,
"learning_rate": 0.001,
"loss": 1.4024,
"step": 276400
},
{
"epoch": 89.36651583710407,
"grad_norm": 1.728053092956543,
"learning_rate": 0.001,
"loss": 1.4192,
"step": 276500
},
{
"epoch": 89.39883645765998,
"grad_norm": 1.7624201774597168,
"learning_rate": 0.001,
"loss": 1.4122,
"step": 276600
},
{
"epoch": 89.4311570782159,
"grad_norm": 1.3859518766403198,
"learning_rate": 0.001,
"loss": 1.4031,
"step": 276700
},
{
"epoch": 89.46347769877181,
"grad_norm": 1.5684387683868408,
"learning_rate": 0.001,
"loss": 1.4146,
"step": 276800
},
{
"epoch": 89.49579831932773,
"grad_norm": 1.493183970451355,
"learning_rate": 0.001,
"loss": 1.4187,
"step": 276900
},
{
"epoch": 89.52811893988364,
"grad_norm": 1.4836277961730957,
"learning_rate": 0.001,
"loss": 1.4199,
"step": 277000
},
{
"epoch": 89.56043956043956,
"grad_norm": 1.4376671314239502,
"learning_rate": 0.001,
"loss": 1.4178,
"step": 277100
},
{
"epoch": 89.59276018099547,
"grad_norm": 1.6110737323760986,
"learning_rate": 0.001,
"loss": 1.4165,
"step": 277200
},
{
"epoch": 89.62508080155139,
"grad_norm": 1.447060227394104,
"learning_rate": 0.001,
"loss": 1.4457,
"step": 277300
},
{
"epoch": 89.6574014221073,
"grad_norm": 1.2219505310058594,
"learning_rate": 0.001,
"loss": 1.4378,
"step": 277400
},
{
"epoch": 89.68972204266322,
"grad_norm": 2.083582878112793,
"learning_rate": 0.001,
"loss": 1.4247,
"step": 277500
},
{
"epoch": 89.72204266321913,
"grad_norm": 1.6131013631820679,
"learning_rate": 0.001,
"loss": 1.4497,
"step": 277600
},
{
"epoch": 89.75436328377505,
"grad_norm": 1.2412773370742798,
"learning_rate": 0.001,
"loss": 1.4375,
"step": 277700
},
{
"epoch": 89.78668390433096,
"grad_norm": 1.619038701057434,
"learning_rate": 0.001,
"loss": 1.4347,
"step": 277800
},
{
"epoch": 89.81900452488688,
"grad_norm": 1.4823715686798096,
"learning_rate": 0.001,
"loss": 1.4662,
"step": 277900
},
{
"epoch": 89.85132514544279,
"grad_norm": 2.2037734985351562,
"learning_rate": 0.001,
"loss": 1.4334,
"step": 278000
},
{
"epoch": 89.88364576599871,
"grad_norm": 1.715324878692627,
"learning_rate": 0.001,
"loss": 1.4549,
"step": 278100
},
{
"epoch": 89.91596638655462,
"grad_norm": 1.643113374710083,
"learning_rate": 0.001,
"loss": 1.4477,
"step": 278200
},
{
"epoch": 89.94828700711054,
"grad_norm": 1.3043150901794434,
"learning_rate": 0.001,
"loss": 1.4623,
"step": 278300
},
{
"epoch": 89.98060762766644,
"grad_norm": 2.1376888751983643,
"learning_rate": 0.001,
"loss": 1.4497,
"step": 278400
},
{
"epoch": 90.01292824822237,
"grad_norm": 2.751443862915039,
"learning_rate": 0.001,
"loss": 1.4045,
"step": 278500
},
{
"epoch": 90.04524886877829,
"grad_norm": 1.382408857345581,
"learning_rate": 0.001,
"loss": 1.3524,
"step": 278600
},
{
"epoch": 90.0775694893342,
"grad_norm": 1.7884782552719116,
"learning_rate": 0.001,
"loss": 1.357,
"step": 278700
},
{
"epoch": 90.10989010989012,
"grad_norm": 1.7769843339920044,
"learning_rate": 0.001,
"loss": 1.352,
"step": 278800
},
{
"epoch": 90.14221073044602,
"grad_norm": 1.3789559602737427,
"learning_rate": 0.001,
"loss": 1.3698,
"step": 278900
},
{
"epoch": 90.17453135100195,
"grad_norm": 1.8564437627792358,
"learning_rate": 0.001,
"loss": 1.3642,
"step": 279000
},
{
"epoch": 90.20685197155785,
"grad_norm": 1.7183071374893188,
"learning_rate": 0.001,
"loss": 1.3561,
"step": 279100
},
{
"epoch": 90.23917259211377,
"grad_norm": 1.4420616626739502,
"learning_rate": 0.001,
"loss": 1.3924,
"step": 279200
},
{
"epoch": 90.27149321266968,
"grad_norm": 1.652576208114624,
"learning_rate": 0.001,
"loss": 1.3891,
"step": 279300
},
{
"epoch": 90.3038138332256,
"grad_norm": 1.4430058002471924,
"learning_rate": 0.001,
"loss": 1.3758,
"step": 279400
},
{
"epoch": 90.33613445378151,
"grad_norm": 1.5071574449539185,
"learning_rate": 0.001,
"loss": 1.3871,
"step": 279500
},
{
"epoch": 90.36845507433743,
"grad_norm": 1.622592568397522,
"learning_rate": 0.001,
"loss": 1.3958,
"step": 279600
},
{
"epoch": 90.40077569489334,
"grad_norm": 1.8727059364318848,
"learning_rate": 0.001,
"loss": 1.3956,
"step": 279700
},
{
"epoch": 90.43309631544926,
"grad_norm": 1.5023539066314697,
"learning_rate": 0.001,
"loss": 1.3878,
"step": 279800
},
{
"epoch": 90.46541693600517,
"grad_norm": 1.6353498697280884,
"learning_rate": 0.001,
"loss": 1.4131,
"step": 279900
},
{
"epoch": 90.49773755656109,
"grad_norm": 1.4944193363189697,
"learning_rate": 0.001,
"loss": 1.4118,
"step": 280000
},
{
"epoch": 90.530058177117,
"grad_norm": 1.550230622291565,
"learning_rate": 0.001,
"loss": 1.4177,
"step": 280100
},
{
"epoch": 90.56237879767292,
"grad_norm": 1.6260823011398315,
"learning_rate": 0.001,
"loss": 1.4105,
"step": 280200
},
{
"epoch": 90.59469941822883,
"grad_norm": 1.7901148796081543,
"learning_rate": 0.001,
"loss": 1.4237,
"step": 280300
},
{
"epoch": 90.62702003878475,
"grad_norm": 1.7166590690612793,
"learning_rate": 0.001,
"loss": 1.4241,
"step": 280400
},
{
"epoch": 90.65934065934066,
"grad_norm": 1.6885669231414795,
"learning_rate": 0.001,
"loss": 1.4078,
"step": 280500
},
{
"epoch": 90.69166127989658,
"grad_norm": 2.0084264278411865,
"learning_rate": 0.001,
"loss": 1.4381,
"step": 280600
},
{
"epoch": 90.72398190045249,
"grad_norm": 2.050732135772705,
"learning_rate": 0.001,
"loss": 1.4323,
"step": 280700
},
{
"epoch": 90.75630252100841,
"grad_norm": 1.9119924306869507,
"learning_rate": 0.001,
"loss": 1.4294,
"step": 280800
},
{
"epoch": 90.78862314156432,
"grad_norm": 1.8119657039642334,
"learning_rate": 0.001,
"loss": 1.4261,
"step": 280900
},
{
"epoch": 90.82094376212024,
"grad_norm": 1.9015934467315674,
"learning_rate": 0.001,
"loss": 1.4534,
"step": 281000
},
{
"epoch": 90.85326438267614,
"grad_norm": 1.812975287437439,
"learning_rate": 0.001,
"loss": 1.4645,
"step": 281100
},
{
"epoch": 90.88558500323207,
"grad_norm": 1.9151250123977661,
"learning_rate": 0.001,
"loss": 1.4598,
"step": 281200
},
{
"epoch": 90.91790562378797,
"grad_norm": 1.9323469400405884,
"learning_rate": 0.001,
"loss": 1.4657,
"step": 281300
},
{
"epoch": 90.9502262443439,
"grad_norm": 1.3795750141143799,
"learning_rate": 0.001,
"loss": 1.4455,
"step": 281400
},
{
"epoch": 90.9825468648998,
"grad_norm": 1.4775996208190918,
"learning_rate": 0.001,
"loss": 1.4577,
"step": 281500
},
{
"epoch": 91.01486748545572,
"grad_norm": 1.7066103219985962,
"learning_rate": 0.001,
"loss": 1.3822,
"step": 281600
},
{
"epoch": 91.04718810601163,
"grad_norm": 1.2869503498077393,
"learning_rate": 0.001,
"loss": 1.3376,
"step": 281700
},
{
"epoch": 91.07950872656755,
"grad_norm": 1.8983968496322632,
"learning_rate": 0.001,
"loss": 1.3431,
"step": 281800
},
{
"epoch": 91.11182934712346,
"grad_norm": 1.9564945697784424,
"learning_rate": 0.001,
"loss": 1.3623,
"step": 281900
},
{
"epoch": 91.14414996767938,
"grad_norm": 1.5554038286209106,
"learning_rate": 0.001,
"loss": 1.369,
"step": 282000
},
{
"epoch": 91.17647058823529,
"grad_norm": 1.8451589345932007,
"learning_rate": 0.001,
"loss": 1.3661,
"step": 282100
},
{
"epoch": 91.20879120879121,
"grad_norm": 1.8111636638641357,
"learning_rate": 0.001,
"loss": 1.3709,
"step": 282200
},
{
"epoch": 91.24111182934712,
"grad_norm": 1.7260061502456665,
"learning_rate": 0.001,
"loss": 1.3703,
"step": 282300
},
{
"epoch": 91.27343244990304,
"grad_norm": 2.1305065155029297,
"learning_rate": 0.001,
"loss": 1.3697,
"step": 282400
},
{
"epoch": 91.30575307045895,
"grad_norm": 1.5473634004592896,
"learning_rate": 0.001,
"loss": 1.3866,
"step": 282500
},
{
"epoch": 91.33807369101487,
"grad_norm": 1.3852416276931763,
"learning_rate": 0.001,
"loss": 1.383,
"step": 282600
},
{
"epoch": 91.37039431157078,
"grad_norm": 1.899824857711792,
"learning_rate": 0.001,
"loss": 1.3934,
"step": 282700
},
{
"epoch": 91.4027149321267,
"grad_norm": 1.6592535972595215,
"learning_rate": 0.001,
"loss": 1.4009,
"step": 282800
},
{
"epoch": 91.4350355526826,
"grad_norm": 1.8192658424377441,
"learning_rate": 0.001,
"loss": 1.3915,
"step": 282900
},
{
"epoch": 91.46735617323853,
"grad_norm": 1.6893870830535889,
"learning_rate": 0.001,
"loss": 1.3993,
"step": 283000
},
{
"epoch": 91.49967679379444,
"grad_norm": 1.8176219463348389,
"learning_rate": 0.001,
"loss": 1.4131,
"step": 283100
},
{
"epoch": 91.53199741435036,
"grad_norm": 1.5680086612701416,
"learning_rate": 0.001,
"loss": 1.4065,
"step": 283200
},
{
"epoch": 91.56431803490626,
"grad_norm": 1.828738808631897,
"learning_rate": 0.001,
"loss": 1.4054,
"step": 283300
},
{
"epoch": 91.59663865546219,
"grad_norm": 1.3039504289627075,
"learning_rate": 0.001,
"loss": 1.4171,
"step": 283400
},
{
"epoch": 91.6289592760181,
"grad_norm": 1.849231243133545,
"learning_rate": 0.001,
"loss": 1.4133,
"step": 283500
},
{
"epoch": 91.66127989657402,
"grad_norm": 1.7335554361343384,
"learning_rate": 0.001,
"loss": 1.4347,
"step": 283600
},
{
"epoch": 91.69360051712992,
"grad_norm": 1.6021090745925903,
"learning_rate": 0.001,
"loss": 1.4229,
"step": 283700
},
{
"epoch": 91.72592113768584,
"grad_norm": 1.8389188051223755,
"learning_rate": 0.001,
"loss": 1.4254,
"step": 283800
},
{
"epoch": 91.75824175824175,
"grad_norm": 1.4818696975708008,
"learning_rate": 0.001,
"loss": 1.4276,
"step": 283900
},
{
"epoch": 91.79056237879767,
"grad_norm": 2.0308756828308105,
"learning_rate": 0.001,
"loss": 1.4423,
"step": 284000
},
{
"epoch": 91.82288299935358,
"grad_norm": 1.9242323637008667,
"learning_rate": 0.001,
"loss": 1.4399,
"step": 284100
},
{
"epoch": 91.8552036199095,
"grad_norm": 1.9083664417266846,
"learning_rate": 0.001,
"loss": 1.4535,
"step": 284200
},
{
"epoch": 91.88752424046541,
"grad_norm": 2.0432400703430176,
"learning_rate": 0.001,
"loss": 1.4408,
"step": 284300
},
{
"epoch": 91.91984486102133,
"grad_norm": 1.565138339996338,
"learning_rate": 0.001,
"loss": 1.4351,
"step": 284400
},
{
"epoch": 91.95216548157724,
"grad_norm": 2.194920063018799,
"learning_rate": 0.001,
"loss": 1.4471,
"step": 284500
},
{
"epoch": 91.98448610213316,
"grad_norm": 1.545792579650879,
"learning_rate": 0.001,
"loss": 1.4397,
"step": 284600
},
{
"epoch": 92.01680672268908,
"grad_norm": 2.3659920692443848,
"learning_rate": 0.001,
"loss": 1.3656,
"step": 284700
},
{
"epoch": 92.04912734324499,
"grad_norm": 3.0130562782287598,
"learning_rate": 0.001,
"loss": 1.3451,
"step": 284800
},
{
"epoch": 92.08144796380091,
"grad_norm": 1.6045893430709839,
"learning_rate": 0.001,
"loss": 1.3519,
"step": 284900
},
{
"epoch": 92.11376858435682,
"grad_norm": 1.9951777458190918,
"learning_rate": 0.001,
"loss": 1.3523,
"step": 285000
},
{
"epoch": 92.14608920491274,
"grad_norm": 1.7994177341461182,
"learning_rate": 0.001,
"loss": 1.3532,
"step": 285100
},
{
"epoch": 92.17840982546865,
"grad_norm": 1.8829383850097656,
"learning_rate": 0.001,
"loss": 1.3646,
"step": 285200
},
{
"epoch": 92.21073044602457,
"grad_norm": 1.5709656476974487,
"learning_rate": 0.001,
"loss": 1.3591,
"step": 285300
},
{
"epoch": 92.24305106658048,
"grad_norm": 1.4151982069015503,
"learning_rate": 0.001,
"loss": 1.3666,
"step": 285400
},
{
"epoch": 92.2753716871364,
"grad_norm": 2.813156843185425,
"learning_rate": 0.001,
"loss": 1.388,
"step": 285500
},
{
"epoch": 92.3076923076923,
"grad_norm": 2.141181707382202,
"learning_rate": 0.001,
"loss": 1.3718,
"step": 285600
},
{
"epoch": 92.34001292824823,
"grad_norm": 2.4286043643951416,
"learning_rate": 0.001,
"loss": 1.388,
"step": 285700
},
{
"epoch": 92.37233354880414,
"grad_norm": 2.7005491256713867,
"learning_rate": 0.001,
"loss": 1.3813,
"step": 285800
},
{
"epoch": 92.40465416936006,
"grad_norm": 1.7971080541610718,
"learning_rate": 0.001,
"loss": 1.3825,
"step": 285900
},
{
"epoch": 92.43697478991596,
"grad_norm": 1.8688595294952393,
"learning_rate": 0.001,
"loss": 1.3938,
"step": 286000
},
{
"epoch": 92.46929541047189,
"grad_norm": 2.3091495037078857,
"learning_rate": 0.001,
"loss": 1.3983,
"step": 286100
},
{
"epoch": 92.5016160310278,
"grad_norm": 1.972983479499817,
"learning_rate": 0.001,
"loss": 1.385,
"step": 286200
},
{
"epoch": 92.53393665158372,
"grad_norm": 2.646979570388794,
"learning_rate": 0.001,
"loss": 1.4115,
"step": 286300
},
{
"epoch": 92.56625727213962,
"grad_norm": 1.8654998540878296,
"learning_rate": 0.001,
"loss": 1.3997,
"step": 286400
},
{
"epoch": 92.59857789269554,
"grad_norm": 1.8353911638259888,
"learning_rate": 0.001,
"loss": 1.4048,
"step": 286500
},
{
"epoch": 92.63089851325145,
"grad_norm": 1.9789825677871704,
"learning_rate": 0.001,
"loss": 1.4107,
"step": 286600
},
{
"epoch": 92.66321913380737,
"grad_norm": 2.219052791595459,
"learning_rate": 0.001,
"loss": 1.4268,
"step": 286700
},
{
"epoch": 92.69553975436328,
"grad_norm": 2.2139179706573486,
"learning_rate": 0.001,
"loss": 1.4118,
"step": 286800
},
{
"epoch": 92.7278603749192,
"grad_norm": 2.3915624618530273,
"learning_rate": 0.001,
"loss": 1.4126,
"step": 286900
},
{
"epoch": 92.76018099547511,
"grad_norm": 3.5310840606689453,
"learning_rate": 0.001,
"loss": 1.4344,
"step": 287000
},
{
"epoch": 92.79250161603103,
"grad_norm": 3.0643367767333984,
"learning_rate": 0.001,
"loss": 1.4189,
"step": 287100
},
{
"epoch": 92.82482223658694,
"grad_norm": 3.3198719024658203,
"learning_rate": 0.001,
"loss": 1.441,
"step": 287200
},
{
"epoch": 92.85714285714286,
"grad_norm": 1.597151279449463,
"learning_rate": 0.001,
"loss": 1.4311,
"step": 287300
},
{
"epoch": 92.88946347769877,
"grad_norm": 1.5445133447647095,
"learning_rate": 0.001,
"loss": 1.4289,
"step": 287400
},
{
"epoch": 92.92178409825469,
"grad_norm": 2.8522164821624756,
"learning_rate": 0.001,
"loss": 1.4284,
"step": 287500
},
{
"epoch": 92.9541047188106,
"grad_norm": 2.093144178390503,
"learning_rate": 0.001,
"loss": 1.4327,
"step": 287600
},
{
"epoch": 92.98642533936652,
"grad_norm": 1.3801674842834473,
"learning_rate": 0.001,
"loss": 1.4522,
"step": 287700
},
{
"epoch": 93.01874595992243,
"grad_norm": 2.2896697521209717,
"learning_rate": 0.001,
"loss": 1.3783,
"step": 287800
},
{
"epoch": 93.05106658047835,
"grad_norm": 2.3245482444763184,
"learning_rate": 0.001,
"loss": 1.3282,
"step": 287900
},
{
"epoch": 93.08338720103426,
"grad_norm": 1.4789022207260132,
"learning_rate": 0.001,
"loss": 1.329,
"step": 288000
},
{
"epoch": 93.11570782159018,
"grad_norm": 1.8415120840072632,
"learning_rate": 0.001,
"loss": 1.3544,
"step": 288100
},
{
"epoch": 93.14802844214609,
"grad_norm": 1.6748054027557373,
"learning_rate": 0.001,
"loss": 1.3446,
"step": 288200
},
{
"epoch": 93.180349062702,
"grad_norm": 1.9157007932662964,
"learning_rate": 0.001,
"loss": 1.3454,
"step": 288300
},
{
"epoch": 93.21266968325791,
"grad_norm": 2.0357260704040527,
"learning_rate": 0.001,
"loss": 1.3463,
"step": 288400
},
{
"epoch": 93.24499030381384,
"grad_norm": 1.5724585056304932,
"learning_rate": 0.001,
"loss": 1.3515,
"step": 288500
},
{
"epoch": 93.27731092436974,
"grad_norm": 1.6937826871871948,
"learning_rate": 0.001,
"loss": 1.3722,
"step": 288600
},
{
"epoch": 93.30963154492567,
"grad_norm": 1.246242642402649,
"learning_rate": 0.001,
"loss": 1.3608,
"step": 288700
},
{
"epoch": 93.34195216548157,
"grad_norm": 1.6182515621185303,
"learning_rate": 0.001,
"loss": 1.3816,
"step": 288800
},
{
"epoch": 93.3742727860375,
"grad_norm": 2.0054874420166016,
"learning_rate": 0.001,
"loss": 1.3722,
"step": 288900
},
{
"epoch": 93.4065934065934,
"grad_norm": 2.126291275024414,
"learning_rate": 0.001,
"loss": 1.3721,
"step": 289000
},
{
"epoch": 93.43891402714932,
"grad_norm": 1.4355053901672363,
"learning_rate": 0.001,
"loss": 1.3871,
"step": 289100
},
{
"epoch": 93.47123464770523,
"grad_norm": 2.010754108428955,
"learning_rate": 0.001,
"loss": 1.3885,
"step": 289200
},
{
"epoch": 93.50355526826115,
"grad_norm": 1.3670854568481445,
"learning_rate": 0.001,
"loss": 1.3948,
"step": 289300
},
{
"epoch": 93.53587588881706,
"grad_norm": 1.4175864458084106,
"learning_rate": 0.001,
"loss": 1.3982,
"step": 289400
},
{
"epoch": 93.56819650937298,
"grad_norm": 2.6126651763916016,
"learning_rate": 0.001,
"loss": 1.4164,
"step": 289500
},
{
"epoch": 93.60051712992889,
"grad_norm": 1.7548456192016602,
"learning_rate": 0.001,
"loss": 1.3967,
"step": 289600
},
{
"epoch": 93.63283775048481,
"grad_norm": 1.2660231590270996,
"learning_rate": 0.001,
"loss": 1.394,
"step": 289700
},
{
"epoch": 93.66515837104072,
"grad_norm": 1.2814079523086548,
"learning_rate": 0.001,
"loss": 1.4106,
"step": 289800
},
{
"epoch": 93.69747899159664,
"grad_norm": 1.2180604934692383,
"learning_rate": 0.001,
"loss": 1.4285,
"step": 289900
},
{
"epoch": 93.72979961215255,
"grad_norm": 2.0289041996002197,
"learning_rate": 0.001,
"loss": 1.4151,
"step": 290000
},
{
"epoch": 93.76212023270847,
"grad_norm": 1.379878282546997,
"learning_rate": 0.001,
"loss": 1.4212,
"step": 290100
},
{
"epoch": 93.79444085326438,
"grad_norm": 1.6776106357574463,
"learning_rate": 0.001,
"loss": 1.4362,
"step": 290200
},
{
"epoch": 93.8267614738203,
"grad_norm": 1.7783920764923096,
"learning_rate": 0.001,
"loss": 1.4347,
"step": 290300
},
{
"epoch": 93.8590820943762,
"grad_norm": 2.01831316947937,
"learning_rate": 0.001,
"loss": 1.4237,
"step": 290400
},
{
"epoch": 93.89140271493213,
"grad_norm": 2.4345901012420654,
"learning_rate": 0.001,
"loss": 1.4323,
"step": 290500
},
{
"epoch": 93.92372333548803,
"grad_norm": 1.4624724388122559,
"learning_rate": 0.001,
"loss": 1.4257,
"step": 290600
},
{
"epoch": 93.95604395604396,
"grad_norm": 1.7283406257629395,
"learning_rate": 0.001,
"loss": 1.4538,
"step": 290700
},
{
"epoch": 93.98836457659988,
"grad_norm": 2.014925241470337,
"learning_rate": 0.001,
"loss": 1.4296,
"step": 290800
},
{
"epoch": 94.02068519715579,
"grad_norm": 1.3549288511276245,
"learning_rate": 0.001,
"loss": 1.3744,
"step": 290900
},
{
"epoch": 94.0530058177117,
"grad_norm": 1.424655556678772,
"learning_rate": 0.001,
"loss": 1.3224,
"step": 291000
},
{
"epoch": 94.08532643826761,
"grad_norm": 1.3687740564346313,
"learning_rate": 0.001,
"loss": 1.3355,
"step": 291100
},
{
"epoch": 94.11764705882354,
"grad_norm": 1.4774508476257324,
"learning_rate": 0.001,
"loss": 1.3339,
"step": 291200
},
{
"epoch": 94.14996767937944,
"grad_norm": 1.6459767818450928,
"learning_rate": 0.001,
"loss": 1.3499,
"step": 291300
},
{
"epoch": 94.18228829993537,
"grad_norm": 1.4307767152786255,
"learning_rate": 0.001,
"loss": 1.3354,
"step": 291400
},
{
"epoch": 94.21460892049127,
"grad_norm": 1.650320053100586,
"learning_rate": 0.001,
"loss": 1.3508,
"step": 291500
},
{
"epoch": 94.2469295410472,
"grad_norm": 1.236221432685852,
"learning_rate": 0.001,
"loss": 1.3495,
"step": 291600
},
{
"epoch": 94.2792501616031,
"grad_norm": 2.103527545928955,
"learning_rate": 0.001,
"loss": 1.3587,
"step": 291700
},
{
"epoch": 94.31157078215902,
"grad_norm": 1.647996425628662,
"learning_rate": 0.001,
"loss": 1.3607,
"step": 291800
},
{
"epoch": 94.34389140271493,
"grad_norm": 2.096320867538452,
"learning_rate": 0.001,
"loss": 1.361,
"step": 291900
},
{
"epoch": 94.37621202327085,
"grad_norm": 2.003998041152954,
"learning_rate": 0.001,
"loss": 1.3591,
"step": 292000
},
{
"epoch": 94.40853264382676,
"grad_norm": 1.35490882396698,
"learning_rate": 0.001,
"loss": 1.3731,
"step": 292100
},
{
"epoch": 94.44085326438268,
"grad_norm": 1.3877946138381958,
"learning_rate": 0.001,
"loss": 1.3887,
"step": 292200
},
{
"epoch": 94.47317388493859,
"grad_norm": 1.6850929260253906,
"learning_rate": 0.001,
"loss": 1.3902,
"step": 292300
},
{
"epoch": 94.50549450549451,
"grad_norm": 1.870916724205017,
"learning_rate": 0.001,
"loss": 1.3924,
"step": 292400
},
{
"epoch": 94.53781512605042,
"grad_norm": 1.4658701419830322,
"learning_rate": 0.001,
"loss": 1.3969,
"step": 292500
},
{
"epoch": 94.57013574660634,
"grad_norm": 1.3580251932144165,
"learning_rate": 0.001,
"loss": 1.3774,
"step": 292600
},
{
"epoch": 94.60245636716225,
"grad_norm": 1.2977242469787598,
"learning_rate": 0.001,
"loss": 1.3918,
"step": 292700
},
{
"epoch": 94.63477698771817,
"grad_norm": 1.645932912826538,
"learning_rate": 0.001,
"loss": 1.3933,
"step": 292800
},
{
"epoch": 94.66709760827408,
"grad_norm": 2.167123794555664,
"learning_rate": 0.001,
"loss": 1.4075,
"step": 292900
},
{
"epoch": 94.69941822883,
"grad_norm": 1.1674609184265137,
"learning_rate": 0.001,
"loss": 1.4102,
"step": 293000
},
{
"epoch": 94.7317388493859,
"grad_norm": 1.6270956993103027,
"learning_rate": 0.001,
"loss": 1.4173,
"step": 293100
},
{
"epoch": 94.76405946994183,
"grad_norm": 1.2232261896133423,
"learning_rate": 0.001,
"loss": 1.4259,
"step": 293200
},
{
"epoch": 94.79638009049773,
"grad_norm": 1.3110573291778564,
"learning_rate": 0.001,
"loss": 1.427,
"step": 293300
},
{
"epoch": 94.82870071105366,
"grad_norm": 1.368855595588684,
"learning_rate": 0.001,
"loss": 1.4135,
"step": 293400
},
{
"epoch": 94.86102133160956,
"grad_norm": 1.7230993509292603,
"learning_rate": 0.001,
"loss": 1.4043,
"step": 293500
},
{
"epoch": 94.89334195216549,
"grad_norm": 1.5591554641723633,
"learning_rate": 0.001,
"loss": 1.4271,
"step": 293600
},
{
"epoch": 94.9256625727214,
"grad_norm": 1.8541635274887085,
"learning_rate": 0.001,
"loss": 1.4325,
"step": 293700
},
{
"epoch": 94.95798319327731,
"grad_norm": 1.5044015645980835,
"learning_rate": 0.001,
"loss": 1.4358,
"step": 293800
},
{
"epoch": 94.99030381383322,
"grad_norm": 1.679853081703186,
"learning_rate": 0.001,
"loss": 1.431,
"step": 293900
},
{
"epoch": 95.02262443438914,
"grad_norm": 1.3926348686218262,
"learning_rate": 0.001,
"loss": 1.363,
"step": 294000
},
{
"epoch": 95.05494505494505,
"grad_norm": 1.3716071844100952,
"learning_rate": 0.001,
"loss": 1.3219,
"step": 294100
},
{
"epoch": 95.08726567550097,
"grad_norm": 1.6231685876846313,
"learning_rate": 0.001,
"loss": 1.3236,
"step": 294200
},
{
"epoch": 95.11958629605688,
"grad_norm": 1.5676648616790771,
"learning_rate": 0.001,
"loss": 1.3203,
"step": 294300
},
{
"epoch": 95.1519069166128,
"grad_norm": 1.576737880706787,
"learning_rate": 0.001,
"loss": 1.3477,
"step": 294400
},
{
"epoch": 95.18422753716871,
"grad_norm": 1.466307282447815,
"learning_rate": 0.001,
"loss": 1.3387,
"step": 294500
},
{
"epoch": 95.21654815772463,
"grad_norm": 1.8757154941558838,
"learning_rate": 0.001,
"loss": 1.3486,
"step": 294600
},
{
"epoch": 95.24886877828054,
"grad_norm": 1.3650315999984741,
"learning_rate": 0.001,
"loss": 1.3346,
"step": 294700
},
{
"epoch": 95.28118939883646,
"grad_norm": 1.3587350845336914,
"learning_rate": 0.001,
"loss": 1.3518,
"step": 294800
},
{
"epoch": 95.31351001939237,
"grad_norm": 1.2925024032592773,
"learning_rate": 0.001,
"loss": 1.3627,
"step": 294900
},
{
"epoch": 95.34583063994829,
"grad_norm": 1.84067964553833,
"learning_rate": 0.001,
"loss": 1.3586,
"step": 295000
},
{
"epoch": 95.3781512605042,
"grad_norm": 2.0140841007232666,
"learning_rate": 0.001,
"loss": 1.375,
"step": 295100
},
{
"epoch": 95.41047188106012,
"grad_norm": 1.6531531810760498,
"learning_rate": 0.001,
"loss": 1.3599,
"step": 295200
},
{
"epoch": 95.44279250161603,
"grad_norm": 1.875063419342041,
"learning_rate": 0.001,
"loss": 1.3817,
"step": 295300
},
{
"epoch": 95.47511312217195,
"grad_norm": 1.1672395467758179,
"learning_rate": 0.001,
"loss": 1.383,
"step": 295400
},
{
"epoch": 95.50743374272786,
"grad_norm": 1.1308726072311401,
"learning_rate": 0.001,
"loss": 1.3782,
"step": 295500
},
{
"epoch": 95.53975436328378,
"grad_norm": 1.5952845811843872,
"learning_rate": 0.001,
"loss": 1.3883,
"step": 295600
},
{
"epoch": 95.57207498383968,
"grad_norm": 1.3622543811798096,
"learning_rate": 0.001,
"loss": 1.3841,
"step": 295700
},
{
"epoch": 95.6043956043956,
"grad_norm": 1.927077054977417,
"learning_rate": 0.001,
"loss": 1.3853,
"step": 295800
},
{
"epoch": 95.63671622495151,
"grad_norm": 1.565975308418274,
"learning_rate": 0.001,
"loss": 1.3973,
"step": 295900
},
{
"epoch": 95.66903684550743,
"grad_norm": 1.1944808959960938,
"learning_rate": 0.001,
"loss": 1.3878,
"step": 296000
},
{
"epoch": 95.70135746606334,
"grad_norm": 1.8510597944259644,
"learning_rate": 0.001,
"loss": 1.3971,
"step": 296100
},
{
"epoch": 95.73367808661926,
"grad_norm": 1.73808753490448,
"learning_rate": 0.001,
"loss": 1.4215,
"step": 296200
},
{
"epoch": 95.76599870717517,
"grad_norm": 1.3161669969558716,
"learning_rate": 0.001,
"loss": 1.3952,
"step": 296300
},
{
"epoch": 95.7983193277311,
"grad_norm": 1.5162066221237183,
"learning_rate": 0.001,
"loss": 1.4164,
"step": 296400
},
{
"epoch": 95.830639948287,
"grad_norm": 1.520003318786621,
"learning_rate": 0.001,
"loss": 1.4094,
"step": 296500
},
{
"epoch": 95.86296056884292,
"grad_norm": 1.3342549800872803,
"learning_rate": 0.001,
"loss": 1.4156,
"step": 296600
},
{
"epoch": 95.89528118939883,
"grad_norm": 1.4298732280731201,
"learning_rate": 0.001,
"loss": 1.4212,
"step": 296700
},
{
"epoch": 95.92760180995475,
"grad_norm": 1.7872447967529297,
"learning_rate": 0.001,
"loss": 1.4259,
"step": 296800
},
{
"epoch": 95.95992243051066,
"grad_norm": 1.2885979413986206,
"learning_rate": 0.001,
"loss": 1.4186,
"step": 296900
},
{
"epoch": 95.99224305106658,
"grad_norm": 1.2090197801589966,
"learning_rate": 0.001,
"loss": 1.4237,
"step": 297000
},
{
"epoch": 96.0245636716225,
"grad_norm": 1.53062105178833,
"learning_rate": 0.001,
"loss": 1.3642,
"step": 297100
},
{
"epoch": 96.05688429217841,
"grad_norm": 1.6323614120483398,
"learning_rate": 0.001,
"loss": 1.3123,
"step": 297200
},
{
"epoch": 96.08920491273433,
"grad_norm": 1.32908296585083,
"learning_rate": 0.001,
"loss": 1.3278,
"step": 297300
},
{
"epoch": 96.12152553329024,
"grad_norm": 1.26341712474823,
"learning_rate": 0.001,
"loss": 1.3218,
"step": 297400
},
{
"epoch": 96.15384615384616,
"grad_norm": 1.4131050109863281,
"learning_rate": 0.001,
"loss": 1.3322,
"step": 297500
},
{
"epoch": 96.18616677440207,
"grad_norm": 1.8546245098114014,
"learning_rate": 0.001,
"loss": 1.3301,
"step": 297600
},
{
"epoch": 96.21848739495799,
"grad_norm": 1.5039724111557007,
"learning_rate": 0.001,
"loss": 1.3453,
"step": 297700
},
{
"epoch": 96.2508080155139,
"grad_norm": 1.752182960510254,
"learning_rate": 0.001,
"loss": 1.3376,
"step": 297800
},
{
"epoch": 96.28312863606982,
"grad_norm": 1.3808088302612305,
"learning_rate": 0.001,
"loss": 1.3277,
"step": 297900
},
{
"epoch": 96.31544925662573,
"grad_norm": 1.6190468072891235,
"learning_rate": 0.001,
"loss": 1.3542,
"step": 298000
},
{
"epoch": 96.34776987718165,
"grad_norm": 1.9144762754440308,
"learning_rate": 0.001,
"loss": 1.3687,
"step": 298100
},
{
"epoch": 96.38009049773756,
"grad_norm": 1.2472035884857178,
"learning_rate": 0.001,
"loss": 1.3788,
"step": 298200
},
{
"epoch": 96.41241111829348,
"grad_norm": 1.2860407829284668,
"learning_rate": 0.001,
"loss": 1.3554,
"step": 298300
},
{
"epoch": 96.44473173884938,
"grad_norm": 1.6460773944854736,
"learning_rate": 0.001,
"loss": 1.3596,
"step": 298400
},
{
"epoch": 96.4770523594053,
"grad_norm": 1.6944340467453003,
"learning_rate": 0.001,
"loss": 1.3742,
"step": 298500
},
{
"epoch": 96.50937297996121,
"grad_norm": 2.076917886734009,
"learning_rate": 0.001,
"loss": 1.3831,
"step": 298600
},
{
"epoch": 96.54169360051714,
"grad_norm": 1.1751590967178345,
"learning_rate": 0.001,
"loss": 1.385,
"step": 298700
},
{
"epoch": 96.57401422107304,
"grad_norm": 1.7542001008987427,
"learning_rate": 0.001,
"loss": 1.3931,
"step": 298800
},
{
"epoch": 96.60633484162896,
"grad_norm": 1.7114346027374268,
"learning_rate": 0.001,
"loss": 1.3904,
"step": 298900
},
{
"epoch": 96.63865546218487,
"grad_norm": 1.9079195261001587,
"learning_rate": 0.001,
"loss": 1.3692,
"step": 299000
},
{
"epoch": 96.6709760827408,
"grad_norm": 1.387211799621582,
"learning_rate": 0.001,
"loss": 1.3851,
"step": 299100
},
{
"epoch": 96.7032967032967,
"grad_norm": 1.264164924621582,
"learning_rate": 0.001,
"loss": 1.3984,
"step": 299200
},
{
"epoch": 96.73561732385262,
"grad_norm": 1.5514090061187744,
"learning_rate": 0.001,
"loss": 1.3975,
"step": 299300
},
{
"epoch": 96.76793794440853,
"grad_norm": 1.4951378107070923,
"learning_rate": 0.001,
"loss": 1.4159,
"step": 299400
},
{
"epoch": 96.80025856496445,
"grad_norm": 1.847012996673584,
"learning_rate": 0.001,
"loss": 1.4089,
"step": 299500
},
{
"epoch": 96.83257918552036,
"grad_norm": 1.565603494644165,
"learning_rate": 0.001,
"loss": 1.4111,
"step": 299600
},
{
"epoch": 96.86489980607628,
"grad_norm": 1.2334107160568237,
"learning_rate": 0.001,
"loss": 1.4101,
"step": 299700
},
{
"epoch": 96.89722042663219,
"grad_norm": 1.2206110954284668,
"learning_rate": 0.001,
"loss": 1.4177,
"step": 299800
},
{
"epoch": 96.92954104718811,
"grad_norm": 1.3281985521316528,
"learning_rate": 0.001,
"loss": 1.4061,
"step": 299900
},
{
"epoch": 96.96186166774402,
"grad_norm": 1.514419436454773,
"learning_rate": 0.001,
"loss": 1.4247,
"step": 300000
},
{
"epoch": 96.99418228829994,
"grad_norm": 1.4396289587020874,
"learning_rate": 0.001,
"loss": 1.4064,
"step": 300100
},
{
"epoch": 97.02650290885585,
"grad_norm": 1.2071473598480225,
"learning_rate": 0.001,
"loss": 1.3271,
"step": 300200
},
{
"epoch": 97.05882352941177,
"grad_norm": 1.5665583610534668,
"learning_rate": 0.001,
"loss": 1.3159,
"step": 300300
},
{
"epoch": 97.09114414996768,
"grad_norm": 1.9283638000488281,
"learning_rate": 0.001,
"loss": 1.3176,
"step": 300400
},
{
"epoch": 97.1234647705236,
"grad_norm": 1.1790565252304077,
"learning_rate": 0.001,
"loss": 1.3134,
"step": 300500
},
{
"epoch": 97.1557853910795,
"grad_norm": 1.4134197235107422,
"learning_rate": 0.001,
"loss": 1.3288,
"step": 300600
},
{
"epoch": 97.18810601163543,
"grad_norm": 1.5686752796173096,
"learning_rate": 0.001,
"loss": 1.3287,
"step": 300700
},
{
"epoch": 97.22042663219133,
"grad_norm": 1.3202617168426514,
"learning_rate": 0.001,
"loss": 1.3291,
"step": 300800
},
{
"epoch": 97.25274725274726,
"grad_norm": 1.5901153087615967,
"learning_rate": 0.001,
"loss": 1.3391,
"step": 300900
},
{
"epoch": 97.28506787330316,
"grad_norm": 1.9898815155029297,
"learning_rate": 0.001,
"loss": 1.3483,
"step": 301000
},
{
"epoch": 97.31738849385908,
"grad_norm": 1.3884061574935913,
"learning_rate": 0.001,
"loss": 1.3591,
"step": 301100
},
{
"epoch": 97.34970911441499,
"grad_norm": 1.2868521213531494,
"learning_rate": 0.001,
"loss": 1.3553,
"step": 301200
},
{
"epoch": 97.38202973497091,
"grad_norm": 1.1923433542251587,
"learning_rate": 0.001,
"loss": 1.3638,
"step": 301300
},
{
"epoch": 97.41435035552682,
"grad_norm": 1.232994556427002,
"learning_rate": 0.001,
"loss": 1.3487,
"step": 301400
},
{
"epoch": 97.44667097608274,
"grad_norm": 1.6176105737686157,
"learning_rate": 0.001,
"loss": 1.3713,
"step": 301500
},
{
"epoch": 97.47899159663865,
"grad_norm": 1.3008984327316284,
"learning_rate": 0.001,
"loss": 1.3635,
"step": 301600
},
{
"epoch": 97.51131221719457,
"grad_norm": 1.520272135734558,
"learning_rate": 0.001,
"loss": 1.3869,
"step": 301700
},
{
"epoch": 97.54363283775048,
"grad_norm": 1.3893316984176636,
"learning_rate": 0.001,
"loss": 1.3769,
"step": 301800
},
{
"epoch": 97.5759534583064,
"grad_norm": 1.3842289447784424,
"learning_rate": 0.001,
"loss": 1.3699,
"step": 301900
},
{
"epoch": 97.60827407886231,
"grad_norm": 1.4758777618408203,
"learning_rate": 0.001,
"loss": 1.3931,
"step": 302000
},
{
"epoch": 97.64059469941823,
"grad_norm": 1.738865852355957,
"learning_rate": 0.001,
"loss": 1.3826,
"step": 302100
},
{
"epoch": 97.67291531997414,
"grad_norm": 2.019986867904663,
"learning_rate": 0.001,
"loss": 1.3695,
"step": 302200
},
{
"epoch": 97.70523594053006,
"grad_norm": 1.6837821006774902,
"learning_rate": 0.001,
"loss": 1.3765,
"step": 302300
},
{
"epoch": 97.73755656108597,
"grad_norm": 1.2452991008758545,
"learning_rate": 0.001,
"loss": 1.3805,
"step": 302400
},
{
"epoch": 97.76987718164189,
"grad_norm": 1.489229440689087,
"learning_rate": 0.001,
"loss": 1.394,
"step": 302500
},
{
"epoch": 97.8021978021978,
"grad_norm": 1.229744791984558,
"learning_rate": 0.001,
"loss": 1.4094,
"step": 302600
},
{
"epoch": 97.83451842275372,
"grad_norm": 1.2610152959823608,
"learning_rate": 0.001,
"loss": 1.4014,
"step": 302700
},
{
"epoch": 97.86683904330962,
"grad_norm": 1.180010199546814,
"learning_rate": 0.001,
"loss": 1.3983,
"step": 302800
},
{
"epoch": 97.89915966386555,
"grad_norm": 1.1906663179397583,
"learning_rate": 0.001,
"loss": 1.3998,
"step": 302900
},
{
"epoch": 97.93148028442145,
"grad_norm": 1.3478801250457764,
"learning_rate": 0.001,
"loss": 1.4172,
"step": 303000
},
{
"epoch": 97.96380090497738,
"grad_norm": 2.488024950027466,
"learning_rate": 0.001,
"loss": 1.4144,
"step": 303100
},
{
"epoch": 97.99612152553328,
"grad_norm": 1.6468634605407715,
"learning_rate": 0.001,
"loss": 1.4048,
"step": 303200
},
{
"epoch": 98.0284421460892,
"grad_norm": 1.5927938222885132,
"learning_rate": 0.001,
"loss": 1.297,
"step": 303300
},
{
"epoch": 98.06076276664513,
"grad_norm": 1.2960963249206543,
"learning_rate": 0.001,
"loss": 1.2978,
"step": 303400
},
{
"epoch": 98.09308338720103,
"grad_norm": 1.713445782661438,
"learning_rate": 0.001,
"loss": 1.3146,
"step": 303500
},
{
"epoch": 98.12540400775696,
"grad_norm": 1.6118299961090088,
"learning_rate": 0.001,
"loss": 1.3146,
"step": 303600
},
{
"epoch": 98.15772462831286,
"grad_norm": 1.4335243701934814,
"learning_rate": 0.001,
"loss": 1.3206,
"step": 303700
},
{
"epoch": 98.19004524886878,
"grad_norm": 1.9087425470352173,
"learning_rate": 0.001,
"loss": 1.3339,
"step": 303800
},
{
"epoch": 98.22236586942469,
"grad_norm": 1.7270716428756714,
"learning_rate": 0.001,
"loss": 1.3316,
"step": 303900
},
{
"epoch": 98.25468648998061,
"grad_norm": 1.5996944904327393,
"learning_rate": 0.001,
"loss": 1.3386,
"step": 304000
},
{
"epoch": 98.28700711053652,
"grad_norm": 1.5665717124938965,
"learning_rate": 0.001,
"loss": 1.3378,
"step": 304100
},
{
"epoch": 98.31932773109244,
"grad_norm": 1.7300177812576294,
"learning_rate": 0.001,
"loss": 1.3368,
"step": 304200
},
{
"epoch": 98.35164835164835,
"grad_norm": 1.506001591682434,
"learning_rate": 0.001,
"loss": 1.3383,
"step": 304300
},
{
"epoch": 98.38396897220427,
"grad_norm": 1.8939203023910522,
"learning_rate": 0.001,
"loss": 1.3454,
"step": 304400
},
{
"epoch": 98.41628959276018,
"grad_norm": 2.432687759399414,
"learning_rate": 0.001,
"loss": 1.3542,
"step": 304500
},
{
"epoch": 98.4486102133161,
"grad_norm": 1.764639139175415,
"learning_rate": 0.001,
"loss": 1.363,
"step": 304600
},
{
"epoch": 98.48093083387201,
"grad_norm": 1.528296709060669,
"learning_rate": 0.001,
"loss": 1.3519,
"step": 304700
},
{
"epoch": 98.51325145442793,
"grad_norm": 1.5113193988800049,
"learning_rate": 0.001,
"loss": 1.3514,
"step": 304800
},
{
"epoch": 98.54557207498384,
"grad_norm": 1.426236629486084,
"learning_rate": 0.001,
"loss": 1.3889,
"step": 304900
},
{
"epoch": 98.57789269553976,
"grad_norm": 1.766783356666565,
"learning_rate": 0.001,
"loss": 1.3707,
"step": 305000
},
{
"epoch": 98.61021331609567,
"grad_norm": 1.1614407300949097,
"learning_rate": 0.001,
"loss": 1.3697,
"step": 305100
},
{
"epoch": 98.64253393665159,
"grad_norm": 1.2347285747528076,
"learning_rate": 0.001,
"loss": 1.3776,
"step": 305200
},
{
"epoch": 98.6748545572075,
"grad_norm": 1.4319281578063965,
"learning_rate": 0.001,
"loss": 1.3862,
"step": 305300
},
{
"epoch": 98.70717517776342,
"grad_norm": 1.8043638467788696,
"learning_rate": 0.001,
"loss": 1.377,
"step": 305400
},
{
"epoch": 98.73949579831933,
"grad_norm": 1.7270032167434692,
"learning_rate": 0.001,
"loss": 1.3851,
"step": 305500
},
{
"epoch": 98.77181641887525,
"grad_norm": 1.621086597442627,
"learning_rate": 0.001,
"loss": 1.3959,
"step": 305600
},
{
"epoch": 98.80413703943115,
"grad_norm": 1.4632642269134521,
"learning_rate": 0.001,
"loss": 1.3906,
"step": 305700
},
{
"epoch": 98.83645765998708,
"grad_norm": 1.3503309488296509,
"learning_rate": 0.001,
"loss": 1.3863,
"step": 305800
},
{
"epoch": 98.86877828054298,
"grad_norm": 1.5197378396987915,
"learning_rate": 0.001,
"loss": 1.3895,
"step": 305900
},
{
"epoch": 98.9010989010989,
"grad_norm": 1.3938710689544678,
"learning_rate": 0.001,
"loss": 1.3891,
"step": 306000
},
{
"epoch": 98.93341952165481,
"grad_norm": 1.2191146612167358,
"learning_rate": 0.001,
"loss": 1.406,
"step": 306100
},
{
"epoch": 98.96574014221073,
"grad_norm": 1.2720669507980347,
"learning_rate": 0.001,
"loss": 1.4065,
"step": 306200
},
{
"epoch": 98.99806076276664,
"grad_norm": 2.131666898727417,
"learning_rate": 0.001,
"loss": 1.3987,
"step": 306300
},
{
"epoch": 99.03038138332256,
"grad_norm": 1.6609143018722534,
"learning_rate": 0.001,
"loss": 1.2968,
"step": 306400
},
{
"epoch": 99.06270200387847,
"grad_norm": 1.4163116216659546,
"learning_rate": 0.001,
"loss": 1.2919,
"step": 306500
},
{
"epoch": 99.09502262443439,
"grad_norm": 1.4382485151290894,
"learning_rate": 0.001,
"loss": 1.3104,
"step": 306600
},
{
"epoch": 99.1273432449903,
"grad_norm": 1.3695274591445923,
"learning_rate": 0.001,
"loss": 1.3187,
"step": 306700
},
{
"epoch": 99.15966386554622,
"grad_norm": 2.090116500854492,
"learning_rate": 0.001,
"loss": 1.3037,
"step": 306800
},
{
"epoch": 99.19198448610213,
"grad_norm": 1.7685065269470215,
"learning_rate": 0.001,
"loss": 1.3185,
"step": 306900
},
{
"epoch": 99.22430510665805,
"grad_norm": 1.4047720432281494,
"learning_rate": 0.001,
"loss": 1.3167,
"step": 307000
},
{
"epoch": 99.25662572721396,
"grad_norm": 1.699637532234192,
"learning_rate": 0.001,
"loss": 1.3277,
"step": 307100
},
{
"epoch": 99.28894634776988,
"grad_norm": 1.614418625831604,
"learning_rate": 0.001,
"loss": 1.3475,
"step": 307200
},
{
"epoch": 99.32126696832579,
"grad_norm": 1.5218583345413208,
"learning_rate": 0.001,
"loss": 1.3379,
"step": 307300
},
{
"epoch": 99.35358758888171,
"grad_norm": 1.8946406841278076,
"learning_rate": 0.001,
"loss": 1.3394,
"step": 307400
},
{
"epoch": 99.38590820943762,
"grad_norm": 2.1262855529785156,
"learning_rate": 0.001,
"loss": 1.3493,
"step": 307500
},
{
"epoch": 99.41822882999354,
"grad_norm": 1.8946560621261597,
"learning_rate": 0.001,
"loss": 1.3528,
"step": 307600
},
{
"epoch": 99.45054945054945,
"grad_norm": 1.5395762920379639,
"learning_rate": 0.001,
"loss": 1.3461,
"step": 307700
},
{
"epoch": 99.48287007110537,
"grad_norm": 2.264472723007202,
"learning_rate": 0.001,
"loss": 1.3557,
"step": 307800
},
{
"epoch": 99.51519069166127,
"grad_norm": 1.5566797256469727,
"learning_rate": 0.001,
"loss": 1.373,
"step": 307900
},
{
"epoch": 99.5475113122172,
"grad_norm": 2.553978204727173,
"learning_rate": 0.001,
"loss": 1.3613,
"step": 308000
},
{
"epoch": 99.5798319327731,
"grad_norm": 1.5590407848358154,
"learning_rate": 0.001,
"loss": 1.3815,
"step": 308100
},
{
"epoch": 99.61215255332903,
"grad_norm": 1.2775064706802368,
"learning_rate": 0.001,
"loss": 1.3751,
"step": 308200
},
{
"epoch": 99.64447317388493,
"grad_norm": 1.6800312995910645,
"learning_rate": 0.001,
"loss": 1.3617,
"step": 308300
},
{
"epoch": 99.67679379444085,
"grad_norm": 1.5396053791046143,
"learning_rate": 0.001,
"loss": 1.3789,
"step": 308400
},
{
"epoch": 99.70911441499676,
"grad_norm": 2.490694284439087,
"learning_rate": 0.001,
"loss": 1.3785,
"step": 308500
},
{
"epoch": 99.74143503555268,
"grad_norm": 1.9777220487594604,
"learning_rate": 0.001,
"loss": 1.3768,
"step": 308600
},
{
"epoch": 99.77375565610859,
"grad_norm": 2.8109829425811768,
"learning_rate": 0.001,
"loss": 1.3853,
"step": 308700
},
{
"epoch": 99.80607627666451,
"grad_norm": 1.528751254081726,
"learning_rate": 0.001,
"loss": 1.3772,
"step": 308800
},
{
"epoch": 99.83839689722042,
"grad_norm": 1.5922292470932007,
"learning_rate": 0.001,
"loss": 1.3837,
"step": 308900
},
{
"epoch": 99.87071751777634,
"grad_norm": 1.7995340824127197,
"learning_rate": 0.001,
"loss": 1.399,
"step": 309000
},
{
"epoch": 99.90303813833225,
"grad_norm": 1.7829585075378418,
"learning_rate": 0.001,
"loss": 1.4036,
"step": 309100
},
{
"epoch": 99.93535875888817,
"grad_norm": 1.6563310623168945,
"learning_rate": 0.001,
"loss": 1.3975,
"step": 309200
},
{
"epoch": 99.96767937944408,
"grad_norm": 1.8799587488174438,
"learning_rate": 0.001,
"loss": 1.4018,
"step": 309300
},
{
"epoch": 100.0,
"grad_norm": 1.8644742965698242,
"learning_rate": 0.001,
"loss": 1.3632,
"step": 309400
},
{
"epoch": 100.0,
"step": 309400,
"total_flos": 6.973099854336e+17,
"train_loss": 1.5836482462177142,
"train_runtime": 25792.9375,
"train_samples_per_second": 383.826,
"train_steps_per_second": 11.996
}
],
"logging_steps": 100,
"max_steps": 309400,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.973099854336e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}