AusmitM's picture
Upload 31 files
a31cf70 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 99.5475113122172,
"eval_steps": 20000,
"global_step": 308000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03232062055591468,
"grad_norm": 13.562658309936523,
"learning_rate": 9.900000000000002e-06,
"loss": 4.2119,
"step": 100
},
{
"epoch": 0.06464124111182935,
"grad_norm": 17.95138931274414,
"learning_rate": 1.9900000000000003e-05,
"loss": 4.0516,
"step": 200
},
{
"epoch": 0.09696186166774402,
"grad_norm": 11.31137752532959,
"learning_rate": 2.9900000000000002e-05,
"loss": 3.9654,
"step": 300
},
{
"epoch": 0.1292824822236587,
"grad_norm": 17.110918045043945,
"learning_rate": 3.99e-05,
"loss": 3.7782,
"step": 400
},
{
"epoch": 0.16160310277957338,
"grad_norm": 11.72215461730957,
"learning_rate": 4.99e-05,
"loss": 3.4622,
"step": 500
},
{
"epoch": 0.19392372333548805,
"grad_norm": 7.6839447021484375,
"learning_rate": 5.9900000000000006e-05,
"loss": 3.0781,
"step": 600
},
{
"epoch": 0.22624434389140272,
"grad_norm": 9.97439193725586,
"learning_rate": 6.99e-05,
"loss": 2.8028,
"step": 700
},
{
"epoch": 0.2585649644473174,
"grad_norm": 7.437990665435791,
"learning_rate": 7.99e-05,
"loss": 2.6209,
"step": 800
},
{
"epoch": 0.2908855850032321,
"grad_norm": 5.37725830078125,
"learning_rate": 8.989999999999999e-05,
"loss": 2.4793,
"step": 900
},
{
"epoch": 0.32320620555914675,
"grad_norm": 3.4234330654144287,
"learning_rate": 9.99e-05,
"loss": 2.4139,
"step": 1000
},
{
"epoch": 0.3555268261150614,
"grad_norm": 3.771662473678589,
"learning_rate": 0.0001099,
"loss": 2.3273,
"step": 1100
},
{
"epoch": 0.3878474466709761,
"grad_norm": 3.9465994834899902,
"learning_rate": 0.00011990000000000001,
"loss": 2.3011,
"step": 1200
},
{
"epoch": 0.42016806722689076,
"grad_norm": 3.0348823070526123,
"learning_rate": 0.00012989999999999999,
"loss": 2.2512,
"step": 1300
},
{
"epoch": 0.45248868778280543,
"grad_norm": 2.580744504928589,
"learning_rate": 0.0001399,
"loss": 2.2338,
"step": 1400
},
{
"epoch": 0.4848093083387201,
"grad_norm": 2.7396256923675537,
"learning_rate": 0.0001499,
"loss": 2.2165,
"step": 1500
},
{
"epoch": 0.5171299288946348,
"grad_norm": 2.427551507949829,
"learning_rate": 0.00015989999999999998,
"loss": 2.1936,
"step": 1600
},
{
"epoch": 0.5494505494505495,
"grad_norm": 4.367044448852539,
"learning_rate": 0.0001699,
"loss": 2.1448,
"step": 1700
},
{
"epoch": 0.5817711700064642,
"grad_norm": 3.5235722064971924,
"learning_rate": 0.0001799,
"loss": 2.1785,
"step": 1800
},
{
"epoch": 0.6140917905623788,
"grad_norm": 1.780901551246643,
"learning_rate": 0.0001899,
"loss": 2.1839,
"step": 1900
},
{
"epoch": 0.6464124111182935,
"grad_norm": 1.8098434209823608,
"learning_rate": 0.0001999,
"loss": 2.1043,
"step": 2000
},
{
"epoch": 0.6787330316742082,
"grad_norm": 2.296581268310547,
"learning_rate": 0.0002099,
"loss": 2.1415,
"step": 2100
},
{
"epoch": 0.7110536522301228,
"grad_norm": 2.007957696914673,
"learning_rate": 0.0002199,
"loss": 2.1096,
"step": 2200
},
{
"epoch": 0.7433742727860375,
"grad_norm": 3.1119742393493652,
"learning_rate": 0.0002299,
"loss": 2.1146,
"step": 2300
},
{
"epoch": 0.7756948933419522,
"grad_norm": 1.6894687414169312,
"learning_rate": 0.0002399,
"loss": 2.0824,
"step": 2400
},
{
"epoch": 0.8080155138978669,
"grad_norm": 1.2437957525253296,
"learning_rate": 0.0002499,
"loss": 2.0671,
"step": 2500
},
{
"epoch": 0.8403361344537815,
"grad_norm": 2.4846837520599365,
"learning_rate": 0.00025990000000000003,
"loss": 2.1032,
"step": 2600
},
{
"epoch": 0.8726567550096962,
"grad_norm": 1.7763832807540894,
"learning_rate": 0.0002699,
"loss": 2.1034,
"step": 2700
},
{
"epoch": 0.9049773755656109,
"grad_norm": 2.0607247352600098,
"learning_rate": 0.0002799,
"loss": 2.0898,
"step": 2800
},
{
"epoch": 0.9372979961215255,
"grad_norm": 1.1315553188323975,
"learning_rate": 0.0002899,
"loss": 2.0906,
"step": 2900
},
{
"epoch": 0.9696186166774402,
"grad_norm": 1.3324776887893677,
"learning_rate": 0.0002999,
"loss": 2.065,
"step": 3000
},
{
"epoch": 1.0019392372333549,
"grad_norm": 1.5390121936798096,
"learning_rate": 0.0003099,
"loss": 2.0899,
"step": 3100
},
{
"epoch": 1.0342598577892697,
"grad_norm": 3.3471102714538574,
"learning_rate": 0.0003199,
"loss": 1.986,
"step": 3200
},
{
"epoch": 1.0665804783451842,
"grad_norm": 2.176851749420166,
"learning_rate": 0.00032990000000000005,
"loss": 2.0288,
"step": 3300
},
{
"epoch": 1.098901098901099,
"grad_norm": 1.6547337770462036,
"learning_rate": 0.00033989999999999997,
"loss": 2.0137,
"step": 3400
},
{
"epoch": 1.1312217194570136,
"grad_norm": 2.7004611492156982,
"learning_rate": 0.0003499,
"loss": 2.0361,
"step": 3500
},
{
"epoch": 1.1635423400129283,
"grad_norm": 9.764275550842285,
"learning_rate": 0.0003599,
"loss": 2.0461,
"step": 3600
},
{
"epoch": 1.195862960568843,
"grad_norm": 2.408719062805176,
"learning_rate": 0.0003699,
"loss": 2.013,
"step": 3700
},
{
"epoch": 1.2281835811247577,
"grad_norm": 1.4798204898834229,
"learning_rate": 0.0003799,
"loss": 2.0373,
"step": 3800
},
{
"epoch": 1.2605042016806722,
"grad_norm": 1.6942540407180786,
"learning_rate": 0.00038990000000000004,
"loss": 2.0252,
"step": 3900
},
{
"epoch": 1.292824822236587,
"grad_norm": 1.9354304075241089,
"learning_rate": 0.00039989999999999996,
"loss": 1.968,
"step": 4000
},
{
"epoch": 1.3251454427925016,
"grad_norm": 1.3031123876571655,
"learning_rate": 0.0004099,
"loss": 1.9987,
"step": 4100
},
{
"epoch": 1.3574660633484164,
"grad_norm": 2.0432260036468506,
"learning_rate": 0.0004199,
"loss": 2.0022,
"step": 4200
},
{
"epoch": 1.389786683904331,
"grad_norm": 1.3270591497421265,
"learning_rate": 0.0004299,
"loss": 1.9947,
"step": 4300
},
{
"epoch": 1.4221073044602457,
"grad_norm": 2.2205493450164795,
"learning_rate": 0.0004399,
"loss": 2.0379,
"step": 4400
},
{
"epoch": 1.4544279250161603,
"grad_norm": 1.547240138053894,
"learning_rate": 0.00044990000000000004,
"loss": 1.9993,
"step": 4500
},
{
"epoch": 1.486748545572075,
"grad_norm": 1.6051744222640991,
"learning_rate": 0.0004599,
"loss": 1.9934,
"step": 4600
},
{
"epoch": 1.5190691661279896,
"grad_norm": 1.6245908737182617,
"learning_rate": 0.0004699,
"loss": 2.0052,
"step": 4700
},
{
"epoch": 1.5513897866839044,
"grad_norm": 1.2813587188720703,
"learning_rate": 0.0004799,
"loss": 2.0224,
"step": 4800
},
{
"epoch": 1.5837104072398192,
"grad_norm": 1.2587623596191406,
"learning_rate": 0.0004899,
"loss": 2.0377,
"step": 4900
},
{
"epoch": 1.6160310277957337,
"grad_norm": 1.2050402164459229,
"learning_rate": 0.0004999000000000001,
"loss": 2.0159,
"step": 5000
},
{
"epoch": 1.6483516483516483,
"grad_norm": 1.2096720933914185,
"learning_rate": 0.0005099,
"loss": 1.9913,
"step": 5100
},
{
"epoch": 1.680672268907563,
"grad_norm": 1.371106743812561,
"learning_rate": 0.0005199,
"loss": 1.9823,
"step": 5200
},
{
"epoch": 1.7129928894634778,
"grad_norm": 1.6344568729400635,
"learning_rate": 0.0005299,
"loss": 1.9601,
"step": 5300
},
{
"epoch": 1.7453135100193924,
"grad_norm": 1.475213646888733,
"learning_rate": 0.0005399000000000001,
"loss": 2.0069,
"step": 5400
},
{
"epoch": 1.777634130575307,
"grad_norm": 1.147307276725769,
"learning_rate": 0.0005499000000000001,
"loss": 1.9979,
"step": 5500
},
{
"epoch": 1.8099547511312217,
"grad_norm": 1.5867249965667725,
"learning_rate": 0.0005599,
"loss": 1.9819,
"step": 5600
},
{
"epoch": 1.8422753716871365,
"grad_norm": 1.2033599615097046,
"learning_rate": 0.0005698999999999999,
"loss": 1.9972,
"step": 5700
},
{
"epoch": 1.874595992243051,
"grad_norm": 1.5934315919876099,
"learning_rate": 0.0005799,
"loss": 1.9636,
"step": 5800
},
{
"epoch": 1.9069166127989656,
"grad_norm": 1.7510161399841309,
"learning_rate": 0.0005899,
"loss": 1.9905,
"step": 5900
},
{
"epoch": 1.9392372333548804,
"grad_norm": 1.3442281484603882,
"learning_rate": 0.0005999,
"loss": 2.0058,
"step": 6000
},
{
"epoch": 1.9715578539107952,
"grad_norm": 1.8174034357070923,
"learning_rate": 0.0006099,
"loss": 1.9725,
"step": 6100
},
{
"epoch": 2.0038784744667097,
"grad_norm": 1.3350085020065308,
"learning_rate": 0.0006199,
"loss": 2.0277,
"step": 6200
},
{
"epoch": 2.0361990950226243,
"grad_norm": 1.1999424695968628,
"learning_rate": 0.0006299000000000001,
"loss": 1.9098,
"step": 6300
},
{
"epoch": 2.0685197155785393,
"grad_norm": 1.2381353378295898,
"learning_rate": 0.0006399,
"loss": 1.9531,
"step": 6400
},
{
"epoch": 2.100840336134454,
"grad_norm": 1.414468765258789,
"learning_rate": 0.0006499,
"loss": 1.9276,
"step": 6500
},
{
"epoch": 2.1331609566903684,
"grad_norm": 1.050279140472412,
"learning_rate": 0.0006599,
"loss": 1.9068,
"step": 6600
},
{
"epoch": 2.165481577246283,
"grad_norm": 1.3866891860961914,
"learning_rate": 0.0006699000000000001,
"loss": 1.9283,
"step": 6700
},
{
"epoch": 2.197802197802198,
"grad_norm": 1.0788445472717285,
"learning_rate": 0.0006799,
"loss": 1.9478,
"step": 6800
},
{
"epoch": 2.2301228183581125,
"grad_norm": 1.032123327255249,
"learning_rate": 0.0006899,
"loss": 1.9247,
"step": 6900
},
{
"epoch": 2.262443438914027,
"grad_norm": 1.3964852094650269,
"learning_rate": 0.0006998999999999999,
"loss": 1.9256,
"step": 7000
},
{
"epoch": 2.2947640594699417,
"grad_norm": 1.1780961751937866,
"learning_rate": 0.0007099,
"loss": 1.9308,
"step": 7100
},
{
"epoch": 2.3270846800258567,
"grad_norm": 1.3616302013397217,
"learning_rate": 0.0007199,
"loss": 1.9456,
"step": 7200
},
{
"epoch": 2.3594053005817712,
"grad_norm": 1.266806960105896,
"learning_rate": 0.0007299,
"loss": 1.9409,
"step": 7300
},
{
"epoch": 2.391725921137686,
"grad_norm": 1.1740697622299194,
"learning_rate": 0.0007399,
"loss": 1.9325,
"step": 7400
},
{
"epoch": 2.4240465416936003,
"grad_norm": 1.1045465469360352,
"learning_rate": 0.0007499000000000001,
"loss": 1.9336,
"step": 7500
},
{
"epoch": 2.4563671622495153,
"grad_norm": 1.4932281970977783,
"learning_rate": 0.0007599,
"loss": 1.9374,
"step": 7600
},
{
"epoch": 2.48868778280543,
"grad_norm": 1.0262266397476196,
"learning_rate": 0.0007699,
"loss": 1.9295,
"step": 7700
},
{
"epoch": 2.5210084033613445,
"grad_norm": 1.2064828872680664,
"learning_rate": 0.0007799,
"loss": 1.9178,
"step": 7800
},
{
"epoch": 2.553329023917259,
"grad_norm": 1.2155147790908813,
"learning_rate": 0.0007899000000000001,
"loss": 1.9659,
"step": 7900
},
{
"epoch": 2.585649644473174,
"grad_norm": 1.2359004020690918,
"learning_rate": 0.0007999000000000001,
"loss": 1.943,
"step": 8000
},
{
"epoch": 2.6179702650290886,
"grad_norm": 1.1473153829574585,
"learning_rate": 0.0008099,
"loss": 1.9309,
"step": 8100
},
{
"epoch": 2.650290885585003,
"grad_norm": 1.13937246799469,
"learning_rate": 0.0008198999999999999,
"loss": 1.9133,
"step": 8200
},
{
"epoch": 2.682611506140918,
"grad_norm": 1.46646249294281,
"learning_rate": 0.0008299,
"loss": 1.9746,
"step": 8300
},
{
"epoch": 2.7149321266968327,
"grad_norm": 1.3891866207122803,
"learning_rate": 0.0008399,
"loss": 1.9353,
"step": 8400
},
{
"epoch": 2.7472527472527473,
"grad_norm": 1.0530701875686646,
"learning_rate": 0.0008499,
"loss": 1.9152,
"step": 8500
},
{
"epoch": 2.779573367808662,
"grad_norm": 1.3477870225906372,
"learning_rate": 0.0008599,
"loss": 1.9334,
"step": 8600
},
{
"epoch": 2.8118939883645764,
"grad_norm": 1.0107545852661133,
"learning_rate": 0.0008699000000000001,
"loss": 1.9586,
"step": 8700
},
{
"epoch": 2.8442146089204914,
"grad_norm": 1.2573806047439575,
"learning_rate": 0.0008799000000000001,
"loss": 1.9526,
"step": 8800
},
{
"epoch": 2.876535229476406,
"grad_norm": 1.7003774642944336,
"learning_rate": 0.0008899,
"loss": 1.933,
"step": 8900
},
{
"epoch": 2.9088558500323205,
"grad_norm": 0.9627268314361572,
"learning_rate": 0.0008999,
"loss": 1.9588,
"step": 9000
},
{
"epoch": 2.9411764705882355,
"grad_norm": 1.1297580003738403,
"learning_rate": 0.0009099,
"loss": 1.961,
"step": 9100
},
{
"epoch": 2.97349709114415,
"grad_norm": 1.230385184288025,
"learning_rate": 0.0009199000000000001,
"loss": 1.9194,
"step": 9200
},
{
"epoch": 3.0058177117000646,
"grad_norm": 0.9041159152984619,
"learning_rate": 0.0009299,
"loss": 1.9607,
"step": 9300
},
{
"epoch": 3.038138332255979,
"grad_norm": 1.1520296335220337,
"learning_rate": 0.0009399,
"loss": 1.8419,
"step": 9400
},
{
"epoch": 3.070458952811894,
"grad_norm": 0.8856098651885986,
"learning_rate": 0.0009498999999999999,
"loss": 1.865,
"step": 9500
},
{
"epoch": 3.1027795733678087,
"grad_norm": 1.082818627357483,
"learning_rate": 0.0009599,
"loss": 1.8975,
"step": 9600
},
{
"epoch": 3.1351001939237233,
"grad_norm": 1.1733167171478271,
"learning_rate": 0.0009699,
"loss": 1.8623,
"step": 9700
},
{
"epoch": 3.167420814479638,
"grad_norm": 1.0028611421585083,
"learning_rate": 0.0009799,
"loss": 1.8665,
"step": 9800
},
{
"epoch": 3.199741435035553,
"grad_norm": 1.318609356880188,
"learning_rate": 0.0009899,
"loss": 1.8917,
"step": 9900
},
{
"epoch": 3.2320620555914674,
"grad_norm": 1.452332854270935,
"learning_rate": 0.0009999,
"loss": 1.8757,
"step": 10000
},
{
"epoch": 3.264382676147382,
"grad_norm": 1.2408781051635742,
"learning_rate": 0.001,
"loss": 1.825,
"step": 10100
},
{
"epoch": 3.2967032967032965,
"grad_norm": 0.9759476780891418,
"learning_rate": 0.001,
"loss": 1.8435,
"step": 10200
},
{
"epoch": 3.3290239172592115,
"grad_norm": 1.2983310222625732,
"learning_rate": 0.001,
"loss": 1.8448,
"step": 10300
},
{
"epoch": 3.361344537815126,
"grad_norm": 1.7093514204025269,
"learning_rate": 0.001,
"loss": 1.8535,
"step": 10400
},
{
"epoch": 3.3936651583710407,
"grad_norm": 0.990372359752655,
"learning_rate": 0.001,
"loss": 1.8681,
"step": 10500
},
{
"epoch": 3.425985778926955,
"grad_norm": 30.755449295043945,
"learning_rate": 0.001,
"loss": 1.8494,
"step": 10600
},
{
"epoch": 3.45830639948287,
"grad_norm": 1.0039843320846558,
"learning_rate": 0.001,
"loss": 1.8672,
"step": 10700
},
{
"epoch": 3.490627020038785,
"grad_norm": 1.0978604555130005,
"learning_rate": 0.001,
"loss": 1.8695,
"step": 10800
},
{
"epoch": 3.5229476405946993,
"grad_norm": 1.1592726707458496,
"learning_rate": 0.001,
"loss": 1.8769,
"step": 10900
},
{
"epoch": 3.555268261150614,
"grad_norm": 1.8122249841690063,
"learning_rate": 0.001,
"loss": 1.8361,
"step": 11000
},
{
"epoch": 3.587588881706529,
"grad_norm": 1.182767391204834,
"learning_rate": 0.001,
"loss": 1.866,
"step": 11100
},
{
"epoch": 3.6199095022624435,
"grad_norm": 1.7454653978347778,
"learning_rate": 0.001,
"loss": 1.8712,
"step": 11200
},
{
"epoch": 3.652230122818358,
"grad_norm": 1.3222342729568481,
"learning_rate": 0.001,
"loss": 1.8495,
"step": 11300
},
{
"epoch": 3.684550743374273,
"grad_norm": 1.1197530031204224,
"learning_rate": 0.001,
"loss": 1.8636,
"step": 11400
},
{
"epoch": 3.7168713639301876,
"grad_norm": 1.0089489221572876,
"learning_rate": 0.001,
"loss": 1.8698,
"step": 11500
},
{
"epoch": 3.749191984486102,
"grad_norm": 1.1160805225372314,
"learning_rate": 0.001,
"loss": 1.8626,
"step": 11600
},
{
"epoch": 3.7815126050420167,
"grad_norm": 1.6491235494613647,
"learning_rate": 0.001,
"loss": 1.8567,
"step": 11700
},
{
"epoch": 3.8138332255979313,
"grad_norm": 1.2563141584396362,
"learning_rate": 0.001,
"loss": 1.8366,
"step": 11800
},
{
"epoch": 3.8461538461538463,
"grad_norm": 1.25520658493042,
"learning_rate": 0.001,
"loss": 1.8873,
"step": 11900
},
{
"epoch": 3.878474466709761,
"grad_norm": 0.7658872604370117,
"learning_rate": 0.001,
"loss": 1.8845,
"step": 12000
},
{
"epoch": 3.9107950872656754,
"grad_norm": 1.1348446607589722,
"learning_rate": 0.001,
"loss": 1.8875,
"step": 12100
},
{
"epoch": 3.9431157078215904,
"grad_norm": 1.0871976613998413,
"learning_rate": 0.001,
"loss": 1.8727,
"step": 12200
},
{
"epoch": 3.975436328377505,
"grad_norm": 1.2896612882614136,
"learning_rate": 0.001,
"loss": 1.865,
"step": 12300
},
{
"epoch": 4.0077569489334195,
"grad_norm": 1.233090877532959,
"learning_rate": 0.001,
"loss": 1.8577,
"step": 12400
},
{
"epoch": 4.040077569489334,
"grad_norm": 1.2701627016067505,
"learning_rate": 0.001,
"loss": 1.7695,
"step": 12500
},
{
"epoch": 4.072398190045249,
"grad_norm": 0.9071168899536133,
"learning_rate": 0.001,
"loss": 1.7788,
"step": 12600
},
{
"epoch": 4.104718810601163,
"grad_norm": 0.851148247718811,
"learning_rate": 0.001,
"loss": 1.8106,
"step": 12700
},
{
"epoch": 4.137039431157079,
"grad_norm": 1.1509605646133423,
"learning_rate": 0.001,
"loss": 1.7861,
"step": 12800
},
{
"epoch": 4.169360051712993,
"grad_norm": 1.0977354049682617,
"learning_rate": 0.001,
"loss": 1.8142,
"step": 12900
},
{
"epoch": 4.201680672268908,
"grad_norm": 1.4111992120742798,
"learning_rate": 0.001,
"loss": 1.8264,
"step": 13000
},
{
"epoch": 4.234001292824822,
"grad_norm": 0.900395929813385,
"learning_rate": 0.001,
"loss": 1.8613,
"step": 13100
},
{
"epoch": 4.266321913380737,
"grad_norm": 1.2737363576889038,
"learning_rate": 0.001,
"loss": 1.7886,
"step": 13200
},
{
"epoch": 4.298642533936651,
"grad_norm": 1.435176968574524,
"learning_rate": 0.001,
"loss": 1.8192,
"step": 13300
},
{
"epoch": 4.330963154492566,
"grad_norm": 0.7658246755599976,
"learning_rate": 0.001,
"loss": 1.8316,
"step": 13400
},
{
"epoch": 4.3632837750484805,
"grad_norm": 1.1032602787017822,
"learning_rate": 0.001,
"loss": 1.8161,
"step": 13500
},
{
"epoch": 4.395604395604396,
"grad_norm": 1.1794319152832031,
"learning_rate": 0.001,
"loss": 1.7846,
"step": 13600
},
{
"epoch": 4.4279250161603105,
"grad_norm": 0.8004180192947388,
"learning_rate": 0.001,
"loss": 1.7987,
"step": 13700
},
{
"epoch": 4.460245636716225,
"grad_norm": 1.0232980251312256,
"learning_rate": 0.001,
"loss": 1.7836,
"step": 13800
},
{
"epoch": 4.49256625727214,
"grad_norm": 1.3699309825897217,
"learning_rate": 0.001,
"loss": 1.8018,
"step": 13900
},
{
"epoch": 4.524886877828054,
"grad_norm": 1.0403003692626953,
"learning_rate": 0.001,
"loss": 1.7962,
"step": 14000
},
{
"epoch": 4.557207498383969,
"grad_norm": 1.176257610321045,
"learning_rate": 0.001,
"loss": 1.8054,
"step": 14100
},
{
"epoch": 4.589528118939883,
"grad_norm": 0.7099631428718567,
"learning_rate": 0.001,
"loss": 1.8299,
"step": 14200
},
{
"epoch": 4.621848739495798,
"grad_norm": 0.7507422566413879,
"learning_rate": 0.001,
"loss": 1.8118,
"step": 14300
},
{
"epoch": 4.654169360051713,
"grad_norm": 0.9232106804847717,
"learning_rate": 0.001,
"loss": 1.8049,
"step": 14400
},
{
"epoch": 4.686489980607628,
"grad_norm": 1.074141263961792,
"learning_rate": 0.001,
"loss": 1.8488,
"step": 14500
},
{
"epoch": 4.7188106011635425,
"grad_norm": 2.9028780460357666,
"learning_rate": 0.001,
"loss": 1.8251,
"step": 14600
},
{
"epoch": 4.751131221719457,
"grad_norm": 0.8118910789489746,
"learning_rate": 0.001,
"loss": 1.8049,
"step": 14700
},
{
"epoch": 4.783451842275372,
"grad_norm": 0.8545799255371094,
"learning_rate": 0.001,
"loss": 1.8078,
"step": 14800
},
{
"epoch": 4.815772462831286,
"grad_norm": 1.1466169357299805,
"learning_rate": 0.001,
"loss": 1.7987,
"step": 14900
},
{
"epoch": 4.848093083387201,
"grad_norm": 1.126753568649292,
"learning_rate": 0.001,
"loss": 1.7949,
"step": 15000
},
{
"epoch": 4.880413703943116,
"grad_norm": 0.7958624362945557,
"learning_rate": 0.001,
"loss": 1.7877,
"step": 15100
},
{
"epoch": 4.912734324499031,
"grad_norm": 1.2399948835372925,
"learning_rate": 0.001,
"loss": 1.8136,
"step": 15200
},
{
"epoch": 4.945054945054945,
"grad_norm": 1.478546380996704,
"learning_rate": 0.001,
"loss": 1.8386,
"step": 15300
},
{
"epoch": 4.97737556561086,
"grad_norm": 1.4831817150115967,
"learning_rate": 0.001,
"loss": 1.828,
"step": 15400
},
{
"epoch": 5.009696186166774,
"grad_norm": 1.126725196838379,
"learning_rate": 0.001,
"loss": 1.7936,
"step": 15500
},
{
"epoch": 5.042016806722689,
"grad_norm": 0.9983710646629333,
"learning_rate": 0.001,
"loss": 1.6942,
"step": 15600
},
{
"epoch": 5.0743374272786035,
"grad_norm": 1.1261931657791138,
"learning_rate": 0.001,
"loss": 1.7046,
"step": 15700
},
{
"epoch": 5.106658047834518,
"grad_norm": 0.8670969605445862,
"learning_rate": 0.001,
"loss": 1.7256,
"step": 15800
},
{
"epoch": 5.1389786683904335,
"grad_norm": 1.0237650871276855,
"learning_rate": 0.001,
"loss": 1.7319,
"step": 15900
},
{
"epoch": 5.171299288946348,
"grad_norm": 0.8006519079208374,
"learning_rate": 0.001,
"loss": 1.7529,
"step": 16000
},
{
"epoch": 5.203619909502263,
"grad_norm": 0.9771319627761841,
"learning_rate": 0.001,
"loss": 1.7032,
"step": 16100
},
{
"epoch": 5.235940530058177,
"grad_norm": 0.9730778336524963,
"learning_rate": 0.001,
"loss": 1.7376,
"step": 16200
},
{
"epoch": 5.268261150614092,
"grad_norm": 0.8748031854629517,
"learning_rate": 0.001,
"loss": 1.7531,
"step": 16300
},
{
"epoch": 5.300581771170006,
"grad_norm": 0.7881172895431519,
"learning_rate": 0.001,
"loss": 1.7506,
"step": 16400
},
{
"epoch": 5.332902391725921,
"grad_norm": 2.841874599456787,
"learning_rate": 0.001,
"loss": 1.7433,
"step": 16500
},
{
"epoch": 5.365223012281835,
"grad_norm": 0.8375452756881714,
"learning_rate": 0.001,
"loss": 1.7232,
"step": 16600
},
{
"epoch": 5.397543632837751,
"grad_norm": 1.30898916721344,
"learning_rate": 0.001,
"loss": 1.7317,
"step": 16700
},
{
"epoch": 5.429864253393665,
"grad_norm": 0.9335517883300781,
"learning_rate": 0.001,
"loss": 1.7807,
"step": 16800
},
{
"epoch": 5.46218487394958,
"grad_norm": 1.097780704498291,
"learning_rate": 0.001,
"loss": 1.7398,
"step": 16900
},
{
"epoch": 5.4945054945054945,
"grad_norm": 0.9672789573669434,
"learning_rate": 0.001,
"loss": 1.7514,
"step": 17000
},
{
"epoch": 5.526826115061409,
"grad_norm": 0.6645662784576416,
"learning_rate": 0.001,
"loss": 1.7464,
"step": 17100
},
{
"epoch": 5.559146735617324,
"grad_norm": 0.8699595928192139,
"learning_rate": 0.001,
"loss": 1.7568,
"step": 17200
},
{
"epoch": 5.591467356173238,
"grad_norm": 0.897167980670929,
"learning_rate": 0.001,
"loss": 1.7206,
"step": 17300
},
{
"epoch": 5.623787976729153,
"grad_norm": 0.8125122785568237,
"learning_rate": 0.001,
"loss": 1.731,
"step": 17400
},
{
"epoch": 5.656108597285068,
"grad_norm": 1.1313683986663818,
"learning_rate": 0.001,
"loss": 1.7657,
"step": 17500
},
{
"epoch": 5.688429217840983,
"grad_norm": 0.751796543598175,
"learning_rate": 0.001,
"loss": 1.7696,
"step": 17600
},
{
"epoch": 5.720749838396897,
"grad_norm": 0.7507619857788086,
"learning_rate": 0.001,
"loss": 1.7562,
"step": 17700
},
{
"epoch": 5.753070458952812,
"grad_norm": 0.7618012428283691,
"learning_rate": 0.001,
"loss": 1.7589,
"step": 17800
},
{
"epoch": 5.785391079508726,
"grad_norm": 1.1747350692749023,
"learning_rate": 0.001,
"loss": 1.7558,
"step": 17900
},
{
"epoch": 5.817711700064641,
"grad_norm": 0.8985292911529541,
"learning_rate": 0.001,
"loss": 1.7461,
"step": 18000
},
{
"epoch": 5.850032320620556,
"grad_norm": 0.7552205324172974,
"learning_rate": 0.001,
"loss": 1.7702,
"step": 18100
},
{
"epoch": 5.882352941176471,
"grad_norm": 0.8019037246704102,
"learning_rate": 0.001,
"loss": 1.7779,
"step": 18200
},
{
"epoch": 5.914673561732386,
"grad_norm": 0.9410333037376404,
"learning_rate": 0.001,
"loss": 1.7969,
"step": 18300
},
{
"epoch": 5.9469941822883,
"grad_norm": 1.1927149295806885,
"learning_rate": 0.001,
"loss": 1.7573,
"step": 18400
},
{
"epoch": 5.979314802844215,
"grad_norm": 1.060541033744812,
"learning_rate": 0.001,
"loss": 1.7674,
"step": 18500
},
{
"epoch": 6.011635423400129,
"grad_norm": 1.0835535526275635,
"learning_rate": 0.001,
"loss": 1.6796,
"step": 18600
},
{
"epoch": 6.043956043956044,
"grad_norm": 0.9343781471252441,
"learning_rate": 0.001,
"loss": 1.6642,
"step": 18700
},
{
"epoch": 6.076276664511958,
"grad_norm": 0.9074199795722961,
"learning_rate": 0.001,
"loss": 1.6493,
"step": 18800
},
{
"epoch": 6.108597285067873,
"grad_norm": 0.9927520155906677,
"learning_rate": 0.001,
"loss": 1.6346,
"step": 18900
},
{
"epoch": 6.140917905623788,
"grad_norm": 0.7810798287391663,
"learning_rate": 0.001,
"loss": 1.6908,
"step": 19000
},
{
"epoch": 6.173238526179703,
"grad_norm": 0.6857879757881165,
"learning_rate": 0.001,
"loss": 1.6729,
"step": 19100
},
{
"epoch": 6.2055591467356175,
"grad_norm": 0.8595390915870667,
"learning_rate": 0.001,
"loss": 1.6679,
"step": 19200
},
{
"epoch": 6.237879767291532,
"grad_norm": 0.9147341847419739,
"learning_rate": 0.001,
"loss": 1.7043,
"step": 19300
},
{
"epoch": 6.270200387847447,
"grad_norm": 0.9340370893478394,
"learning_rate": 0.001,
"loss": 1.7241,
"step": 19400
},
{
"epoch": 6.302521008403361,
"grad_norm": 1.0489479303359985,
"learning_rate": 0.001,
"loss": 1.6671,
"step": 19500
},
{
"epoch": 6.334841628959276,
"grad_norm": 0.7707657814025879,
"learning_rate": 0.001,
"loss": 1.6772,
"step": 19600
},
{
"epoch": 6.36716224951519,
"grad_norm": 0.748698353767395,
"learning_rate": 0.001,
"loss": 1.7033,
"step": 19700
},
{
"epoch": 6.399482870071106,
"grad_norm": 0.7804653644561768,
"learning_rate": 0.001,
"loss": 1.7091,
"step": 19800
},
{
"epoch": 6.43180349062702,
"grad_norm": 1.2131924629211426,
"learning_rate": 0.001,
"loss": 1.6997,
"step": 19900
},
{
"epoch": 6.464124111182935,
"grad_norm": 0.9598875641822815,
"learning_rate": 0.001,
"loss": 1.676,
"step": 20000
},
{
"epoch": 6.496444731738849,
"grad_norm": 1.1061313152313232,
"learning_rate": 0.001,
"loss": 1.7048,
"step": 20100
},
{
"epoch": 6.528765352294764,
"grad_norm": 0.8944084644317627,
"learning_rate": 0.001,
"loss": 1.6876,
"step": 20200
},
{
"epoch": 6.5610859728506785,
"grad_norm": 1.101098895072937,
"learning_rate": 0.001,
"loss": 1.6805,
"step": 20300
},
{
"epoch": 6.593406593406593,
"grad_norm": 0.9273419380187988,
"learning_rate": 0.001,
"loss": 1.6776,
"step": 20400
},
{
"epoch": 6.625727213962508,
"grad_norm": 1.4434058666229248,
"learning_rate": 0.001,
"loss": 1.6771,
"step": 20500
},
{
"epoch": 6.658047834518423,
"grad_norm": 0.9152409434318542,
"learning_rate": 0.001,
"loss": 1.685,
"step": 20600
},
{
"epoch": 6.690368455074338,
"grad_norm": 1.1112756729125977,
"learning_rate": 0.001,
"loss": 1.6881,
"step": 20700
},
{
"epoch": 6.722689075630252,
"grad_norm": 0.9489529132843018,
"learning_rate": 0.001,
"loss": 1.6868,
"step": 20800
},
{
"epoch": 6.755009696186167,
"grad_norm": 0.7932117581367493,
"learning_rate": 0.001,
"loss": 1.6925,
"step": 20900
},
{
"epoch": 6.787330316742081,
"grad_norm": 0.893706202507019,
"learning_rate": 0.001,
"loss": 1.7308,
"step": 21000
},
{
"epoch": 6.819650937297996,
"grad_norm": 1.0104913711547852,
"learning_rate": 0.001,
"loss": 1.7075,
"step": 21100
},
{
"epoch": 6.85197155785391,
"grad_norm": 0.8653020858764648,
"learning_rate": 0.001,
"loss": 1.7352,
"step": 21200
},
{
"epoch": 6.884292178409826,
"grad_norm": 0.8224719166755676,
"learning_rate": 0.001,
"loss": 1.7129,
"step": 21300
},
{
"epoch": 6.91661279896574,
"grad_norm": 1.0622234344482422,
"learning_rate": 0.001,
"loss": 1.7153,
"step": 21400
},
{
"epoch": 6.948933419521655,
"grad_norm": 1.1205211877822876,
"learning_rate": 0.001,
"loss": 1.741,
"step": 21500
},
{
"epoch": 6.98125404007757,
"grad_norm": 1.1996233463287354,
"learning_rate": 0.001,
"loss": 1.7005,
"step": 21600
},
{
"epoch": 7.013574660633484,
"grad_norm": 1.043961763381958,
"learning_rate": 0.001,
"loss": 1.6296,
"step": 21700
},
{
"epoch": 7.045895281189399,
"grad_norm": 0.9840327501296997,
"learning_rate": 0.001,
"loss": 1.5904,
"step": 21800
},
{
"epoch": 7.078215901745313,
"grad_norm": 1.1553939580917358,
"learning_rate": 0.001,
"loss": 1.5837,
"step": 21900
},
{
"epoch": 7.110536522301228,
"grad_norm": 0.9404619336128235,
"learning_rate": 0.001,
"loss": 1.6182,
"step": 22000
},
{
"epoch": 7.142857142857143,
"grad_norm": 0.7575173377990723,
"learning_rate": 0.001,
"loss": 1.6091,
"step": 22100
},
{
"epoch": 7.175177763413058,
"grad_norm": 0.8651929497718811,
"learning_rate": 0.001,
"loss": 1.6097,
"step": 22200
},
{
"epoch": 7.207498383968972,
"grad_norm": 0.7588925361633301,
"learning_rate": 0.001,
"loss": 1.6596,
"step": 22300
},
{
"epoch": 7.239819004524887,
"grad_norm": 0.9948194026947021,
"learning_rate": 0.001,
"loss": 1.6307,
"step": 22400
},
{
"epoch": 7.2721396250808015,
"grad_norm": 0.7452443242073059,
"learning_rate": 0.001,
"loss": 1.6545,
"step": 22500
},
{
"epoch": 7.304460245636716,
"grad_norm": 0.7591370940208435,
"learning_rate": 0.001,
"loss": 1.6257,
"step": 22600
},
{
"epoch": 7.336780866192631,
"grad_norm": 1.1810401678085327,
"learning_rate": 0.001,
"loss": 1.6381,
"step": 22700
},
{
"epoch": 7.369101486748546,
"grad_norm": 0.785140335559845,
"learning_rate": 0.001,
"loss": 1.6453,
"step": 22800
},
{
"epoch": 7.401422107304461,
"grad_norm": 1.0665206909179688,
"learning_rate": 0.001,
"loss": 1.656,
"step": 22900
},
{
"epoch": 7.433742727860375,
"grad_norm": 1.0718743801116943,
"learning_rate": 0.001,
"loss": 1.6676,
"step": 23000
},
{
"epoch": 7.46606334841629,
"grad_norm": 0.8834295868873596,
"learning_rate": 0.001,
"loss": 1.6464,
"step": 23100
},
{
"epoch": 7.498383968972204,
"grad_norm": 0.9542046189308167,
"learning_rate": 0.001,
"loss": 1.6544,
"step": 23200
},
{
"epoch": 7.530704589528119,
"grad_norm": 0.7912190556526184,
"learning_rate": 0.001,
"loss": 1.6348,
"step": 23300
},
{
"epoch": 7.563025210084033,
"grad_norm": 46.410430908203125,
"learning_rate": 0.001,
"loss": 1.6686,
"step": 23400
},
{
"epoch": 7.595345830639948,
"grad_norm": 0.779313325881958,
"learning_rate": 0.001,
"loss": 1.6581,
"step": 23500
},
{
"epoch": 7.6276664511958625,
"grad_norm": 1.3436274528503418,
"learning_rate": 0.001,
"loss": 1.6553,
"step": 23600
},
{
"epoch": 7.659987071751778,
"grad_norm": 0.8226367831230164,
"learning_rate": 0.001,
"loss": 1.6682,
"step": 23700
},
{
"epoch": 7.6923076923076925,
"grad_norm": 0.8935831189155579,
"learning_rate": 0.001,
"loss": 1.6315,
"step": 23800
},
{
"epoch": 7.724628312863607,
"grad_norm": 1.181753158569336,
"learning_rate": 0.001,
"loss": 1.6274,
"step": 23900
},
{
"epoch": 7.756948933419522,
"grad_norm": 0.674148440361023,
"learning_rate": 0.001,
"loss": 1.6386,
"step": 24000
},
{
"epoch": 7.789269553975436,
"grad_norm": 1.0500868558883667,
"learning_rate": 0.001,
"loss": 1.6534,
"step": 24100
},
{
"epoch": 7.821590174531351,
"grad_norm": 0.9021979570388794,
"learning_rate": 0.001,
"loss": 1.6714,
"step": 24200
},
{
"epoch": 7.853910795087265,
"grad_norm": 1.0396559238433838,
"learning_rate": 0.001,
"loss": 1.6539,
"step": 24300
},
{
"epoch": 7.886231415643181,
"grad_norm": 0.9892787337303162,
"learning_rate": 0.001,
"loss": 1.6734,
"step": 24400
},
{
"epoch": 7.918552036199095,
"grad_norm": 0.8489758968353271,
"learning_rate": 0.001,
"loss": 1.6485,
"step": 24500
},
{
"epoch": 7.95087265675501,
"grad_norm": 1.0946519374847412,
"learning_rate": 0.001,
"loss": 1.6992,
"step": 24600
},
{
"epoch": 7.983193277310924,
"grad_norm": 0.9034314155578613,
"learning_rate": 0.001,
"loss": 1.6708,
"step": 24700
},
{
"epoch": 8.015513897866839,
"grad_norm": 0.7086787819862366,
"learning_rate": 0.001,
"loss": 1.548,
"step": 24800
},
{
"epoch": 8.047834518422754,
"grad_norm": 0.42088550329208374,
"learning_rate": 0.001,
"loss": 1.5713,
"step": 24900
},
{
"epoch": 8.080155138978668,
"grad_norm": 0.5306487083435059,
"learning_rate": 0.001,
"loss": 1.568,
"step": 25000
},
{
"epoch": 8.112475759534583,
"grad_norm": 0.7567152976989746,
"learning_rate": 0.001,
"loss": 1.5902,
"step": 25100
},
{
"epoch": 8.144796380090497,
"grad_norm": 0.8452807664871216,
"learning_rate": 0.001,
"loss": 1.6055,
"step": 25200
},
{
"epoch": 8.177117000646412,
"grad_norm": 0.7313001751899719,
"learning_rate": 0.001,
"loss": 1.5539,
"step": 25300
},
{
"epoch": 8.209437621202326,
"grad_norm": 0.8463912606239319,
"learning_rate": 0.001,
"loss": 1.6055,
"step": 25400
},
{
"epoch": 8.241758241758241,
"grad_norm": 0.7595904469490051,
"learning_rate": 0.001,
"loss": 1.5831,
"step": 25500
},
{
"epoch": 8.274078862314157,
"grad_norm": 0.8072571158409119,
"learning_rate": 0.001,
"loss": 1.5837,
"step": 25600
},
{
"epoch": 8.306399482870072,
"grad_norm": 0.5768963694572449,
"learning_rate": 0.001,
"loss": 1.589,
"step": 25700
},
{
"epoch": 8.338720103425986,
"grad_norm": 0.7995852828025818,
"learning_rate": 0.001,
"loss": 1.5792,
"step": 25800
},
{
"epoch": 8.371040723981901,
"grad_norm": 0.8774004578590393,
"learning_rate": 0.001,
"loss": 1.5867,
"step": 25900
},
{
"epoch": 8.403361344537815,
"grad_norm": 1.035139560699463,
"learning_rate": 0.001,
"loss": 1.5771,
"step": 26000
},
{
"epoch": 8.43568196509373,
"grad_norm": 0.6128062605857849,
"learning_rate": 0.001,
"loss": 1.573,
"step": 26100
},
{
"epoch": 8.468002585649645,
"grad_norm": 0.6861528158187866,
"learning_rate": 0.001,
"loss": 1.5973,
"step": 26200
},
{
"epoch": 8.50032320620556,
"grad_norm": 0.6741127371788025,
"learning_rate": 0.001,
"loss": 1.6055,
"step": 26300
},
{
"epoch": 8.532643826761474,
"grad_norm": 0.7249501943588257,
"learning_rate": 0.001,
"loss": 1.6032,
"step": 26400
},
{
"epoch": 8.564964447317388,
"grad_norm": 0.5071559548377991,
"learning_rate": 0.001,
"loss": 1.6135,
"step": 26500
},
{
"epoch": 8.597285067873303,
"grad_norm": 0.6695942282676697,
"learning_rate": 0.001,
"loss": 1.6317,
"step": 26600
},
{
"epoch": 8.629605688429217,
"grad_norm": 0.8934532999992371,
"learning_rate": 0.001,
"loss": 1.6195,
"step": 26700
},
{
"epoch": 8.661926308985132,
"grad_norm": 0.6500742435455322,
"learning_rate": 0.001,
"loss": 1.5722,
"step": 26800
},
{
"epoch": 8.694246929541046,
"grad_norm": 0.6032618880271912,
"learning_rate": 0.001,
"loss": 1.5891,
"step": 26900
},
{
"epoch": 8.726567550096961,
"grad_norm": 0.8172997236251831,
"learning_rate": 0.001,
"loss": 1.6224,
"step": 27000
},
{
"epoch": 8.758888170652877,
"grad_norm": 0.7582260370254517,
"learning_rate": 0.001,
"loss": 1.6274,
"step": 27100
},
{
"epoch": 8.791208791208792,
"grad_norm": 0.7059329152107239,
"learning_rate": 0.001,
"loss": 1.6109,
"step": 27200
},
{
"epoch": 8.823529411764707,
"grad_norm": 0.8909659385681152,
"learning_rate": 0.001,
"loss": 1.6163,
"step": 27300
},
{
"epoch": 8.855850032320621,
"grad_norm": 0.6082348227500916,
"learning_rate": 0.001,
"loss": 1.6392,
"step": 27400
},
{
"epoch": 8.888170652876536,
"grad_norm": 0.6296877861022949,
"learning_rate": 0.001,
"loss": 1.6235,
"step": 27500
},
{
"epoch": 8.92049127343245,
"grad_norm": 0.5239854454994202,
"learning_rate": 0.001,
"loss": 1.5809,
"step": 27600
},
{
"epoch": 8.952811893988365,
"grad_norm": 0.5151014924049377,
"learning_rate": 0.001,
"loss": 1.6231,
"step": 27700
},
{
"epoch": 8.98513251454428,
"grad_norm": 0.6107050180435181,
"learning_rate": 0.001,
"loss": 1.6294,
"step": 27800
},
{
"epoch": 9.017453135100194,
"grad_norm": 1.571207880973816,
"learning_rate": 0.001,
"loss": 1.5311,
"step": 27900
},
{
"epoch": 9.049773755656108,
"grad_norm": 1.3317564725875854,
"learning_rate": 0.001,
"loss": 1.5232,
"step": 28000
},
{
"epoch": 9.082094376212023,
"grad_norm": 0.9770411252975464,
"learning_rate": 0.001,
"loss": 1.5323,
"step": 28100
},
{
"epoch": 9.114414996767938,
"grad_norm": 1.5269267559051514,
"learning_rate": 0.001,
"loss": 1.538,
"step": 28200
},
{
"epoch": 9.146735617323852,
"grad_norm": 1.4465818405151367,
"learning_rate": 0.001,
"loss": 1.5356,
"step": 28300
},
{
"epoch": 9.179056237879767,
"grad_norm": 1.4123685359954834,
"learning_rate": 0.001,
"loss": 1.5354,
"step": 28400
},
{
"epoch": 9.211376858435681,
"grad_norm": 1.1594544649124146,
"learning_rate": 0.001,
"loss": 1.4929,
"step": 28500
},
{
"epoch": 9.243697478991596,
"grad_norm": 1.5278046131134033,
"learning_rate": 0.001,
"loss": 1.5353,
"step": 28600
},
{
"epoch": 9.276018099547512,
"grad_norm": 1.3489573001861572,
"learning_rate": 0.001,
"loss": 1.5223,
"step": 28700
},
{
"epoch": 9.308338720103427,
"grad_norm": 0.9945598244667053,
"learning_rate": 0.001,
"loss": 1.565,
"step": 28800
},
{
"epoch": 9.340659340659341,
"grad_norm": 1.3939306735992432,
"learning_rate": 0.001,
"loss": 1.5747,
"step": 28900
},
{
"epoch": 9.372979961215256,
"grad_norm": 1.0500473976135254,
"learning_rate": 0.001,
"loss": 1.5677,
"step": 29000
},
{
"epoch": 9.40530058177117,
"grad_norm": 1.1538209915161133,
"learning_rate": 0.001,
"loss": 1.5602,
"step": 29100
},
{
"epoch": 9.437621202327085,
"grad_norm": 1.2732844352722168,
"learning_rate": 0.001,
"loss": 1.5582,
"step": 29200
},
{
"epoch": 9.469941822883,
"grad_norm": 1.4475446939468384,
"learning_rate": 0.001,
"loss": 1.5542,
"step": 29300
},
{
"epoch": 9.502262443438914,
"grad_norm": 1.5488345623016357,
"learning_rate": 0.001,
"loss": 1.5589,
"step": 29400
},
{
"epoch": 9.534583063994829,
"grad_norm": 0.9935588836669922,
"learning_rate": 0.001,
"loss": 1.5645,
"step": 29500
},
{
"epoch": 9.566903684550743,
"grad_norm": 1.3134706020355225,
"learning_rate": 0.001,
"loss": 1.5479,
"step": 29600
},
{
"epoch": 9.599224305106658,
"grad_norm": 1.4117892980575562,
"learning_rate": 0.001,
"loss": 1.5534,
"step": 29700
},
{
"epoch": 9.631544925662572,
"grad_norm": 1.0989189147949219,
"learning_rate": 0.001,
"loss": 1.5924,
"step": 29800
},
{
"epoch": 9.663865546218487,
"grad_norm": 0.9962360858917236,
"learning_rate": 0.001,
"loss": 1.5521,
"step": 29900
},
{
"epoch": 9.696186166774401,
"grad_norm": 1.0200791358947754,
"learning_rate": 0.001,
"loss": 1.5421,
"step": 30000
},
{
"epoch": 9.728506787330316,
"grad_norm": 1.2421667575836182,
"learning_rate": 0.001,
"loss": 1.5971,
"step": 30100
},
{
"epoch": 9.760827407886232,
"grad_norm": 1.2540074586868286,
"learning_rate": 0.001,
"loss": 1.562,
"step": 30200
},
{
"epoch": 9.793148028442147,
"grad_norm": 1.1804842948913574,
"learning_rate": 0.001,
"loss": 1.5673,
"step": 30300
},
{
"epoch": 9.825468648998061,
"grad_norm": 1.2499170303344727,
"learning_rate": 0.001,
"loss": 1.5783,
"step": 30400
},
{
"epoch": 9.857789269553976,
"grad_norm": 1.1028128862380981,
"learning_rate": 0.001,
"loss": 1.5888,
"step": 30500
},
{
"epoch": 9.89010989010989,
"grad_norm": 1.251753330230713,
"learning_rate": 0.001,
"loss": 1.5749,
"step": 30600
},
{
"epoch": 9.922430510665805,
"grad_norm": 1.2316412925720215,
"learning_rate": 0.001,
"loss": 1.5755,
"step": 30700
},
{
"epoch": 9.95475113122172,
"grad_norm": 0.9406437873840332,
"learning_rate": 0.001,
"loss": 1.5741,
"step": 30800
},
{
"epoch": 9.987071751777634,
"grad_norm": 1.3652178049087524,
"learning_rate": 0.001,
"loss": 1.5867,
"step": 30900
},
{
"epoch": 10.019392372333549,
"grad_norm": 1.1299371719360352,
"learning_rate": 0.001,
"loss": 1.5492,
"step": 31000
},
{
"epoch": 10.051712992889463,
"grad_norm": 1.1812490224838257,
"learning_rate": 0.001,
"loss": 1.4886,
"step": 31100
},
{
"epoch": 10.084033613445378,
"grad_norm": 1.179519534111023,
"learning_rate": 0.001,
"loss": 1.4934,
"step": 31200
},
{
"epoch": 10.116354234001292,
"grad_norm": 1.120004653930664,
"learning_rate": 0.001,
"loss": 1.4879,
"step": 31300
},
{
"epoch": 10.148674854557207,
"grad_norm": 1.0279499292373657,
"learning_rate": 0.001,
"loss": 1.4963,
"step": 31400
},
{
"epoch": 10.180995475113122,
"grad_norm": 1.1619842052459717,
"learning_rate": 0.001,
"loss": 1.5114,
"step": 31500
},
{
"epoch": 10.213316095669036,
"grad_norm": 1.1536353826522827,
"learning_rate": 0.001,
"loss": 1.4935,
"step": 31600
},
{
"epoch": 10.24563671622495,
"grad_norm": 1.0345031023025513,
"learning_rate": 0.001,
"loss": 1.506,
"step": 31700
},
{
"epoch": 10.277957336780867,
"grad_norm": 1.0397800207138062,
"learning_rate": 0.001,
"loss": 1.5026,
"step": 31800
},
{
"epoch": 10.310277957336782,
"grad_norm": 0.8596133589744568,
"learning_rate": 0.001,
"loss": 1.5256,
"step": 31900
},
{
"epoch": 10.342598577892696,
"grad_norm": 1.1593185663223267,
"learning_rate": 0.001,
"loss": 1.5055,
"step": 32000
},
{
"epoch": 10.37491919844861,
"grad_norm": 1.0919194221496582,
"learning_rate": 0.001,
"loss": 1.4956,
"step": 32100
},
{
"epoch": 10.407239819004525,
"grad_norm": 1.1217265129089355,
"learning_rate": 0.001,
"loss": 1.5337,
"step": 32200
},
{
"epoch": 10.43956043956044,
"grad_norm": 0.9850316643714905,
"learning_rate": 0.001,
"loss": 1.516,
"step": 32300
},
{
"epoch": 10.471881060116354,
"grad_norm": 1.0271371603012085,
"learning_rate": 0.001,
"loss": 1.5346,
"step": 32400
},
{
"epoch": 10.504201680672269,
"grad_norm": 1.0458983182907104,
"learning_rate": 0.001,
"loss": 1.5167,
"step": 32500
},
{
"epoch": 10.536522301228183,
"grad_norm": 1.0349985361099243,
"learning_rate": 0.001,
"loss": 1.5203,
"step": 32600
},
{
"epoch": 10.568842921784098,
"grad_norm": 1.091556191444397,
"learning_rate": 0.001,
"loss": 1.5097,
"step": 32700
},
{
"epoch": 10.601163542340013,
"grad_norm": 1.1958649158477783,
"learning_rate": 0.001,
"loss": 1.5038,
"step": 32800
},
{
"epoch": 10.633484162895927,
"grad_norm": 1.1409047842025757,
"learning_rate": 0.001,
"loss": 1.4975,
"step": 32900
},
{
"epoch": 10.665804783451842,
"grad_norm": 1.205556035041809,
"learning_rate": 0.001,
"loss": 1.5596,
"step": 33000
},
{
"epoch": 10.698125404007756,
"grad_norm": 1.0502017736434937,
"learning_rate": 0.001,
"loss": 1.5111,
"step": 33100
},
{
"epoch": 10.73044602456367,
"grad_norm": 1.3859450817108154,
"learning_rate": 0.001,
"loss": 1.508,
"step": 33200
},
{
"epoch": 10.762766645119587,
"grad_norm": 1.0951238870620728,
"learning_rate": 0.001,
"loss": 1.5417,
"step": 33300
},
{
"epoch": 10.795087265675502,
"grad_norm": 2.344174385070801,
"learning_rate": 0.001,
"loss": 1.5599,
"step": 33400
},
{
"epoch": 10.827407886231416,
"grad_norm": 0.9748075604438782,
"learning_rate": 0.001,
"loss": 1.5558,
"step": 33500
},
{
"epoch": 10.85972850678733,
"grad_norm": 0.9509823322296143,
"learning_rate": 0.001,
"loss": 1.5253,
"step": 33600
},
{
"epoch": 10.892049127343245,
"grad_norm": 0.9078757166862488,
"learning_rate": 0.001,
"loss": 1.543,
"step": 33700
},
{
"epoch": 10.92436974789916,
"grad_norm": 0.9902774691581726,
"learning_rate": 0.001,
"loss": 1.5438,
"step": 33800
},
{
"epoch": 10.956690368455074,
"grad_norm": 1.2797644138336182,
"learning_rate": 0.001,
"loss": 1.5345,
"step": 33900
},
{
"epoch": 10.989010989010989,
"grad_norm": 1.1902170181274414,
"learning_rate": 0.001,
"loss": 1.5337,
"step": 34000
},
{
"epoch": 11.021331609566904,
"grad_norm": 1.0825999975204468,
"learning_rate": 0.001,
"loss": 1.4888,
"step": 34100
},
{
"epoch": 11.053652230122818,
"grad_norm": 1.1632471084594727,
"learning_rate": 0.001,
"loss": 1.4294,
"step": 34200
},
{
"epoch": 11.085972850678733,
"grad_norm": 1.212774634361267,
"learning_rate": 0.001,
"loss": 1.4312,
"step": 34300
},
{
"epoch": 11.118293471234647,
"grad_norm": 1.018050193786621,
"learning_rate": 0.001,
"loss": 1.4696,
"step": 34400
},
{
"epoch": 11.150614091790562,
"grad_norm": 1.0512523651123047,
"learning_rate": 0.001,
"loss": 1.4331,
"step": 34500
},
{
"epoch": 11.182934712346476,
"grad_norm": 0.9138154983520508,
"learning_rate": 0.001,
"loss": 1.4497,
"step": 34600
},
{
"epoch": 11.215255332902391,
"grad_norm": 1.0956708192825317,
"learning_rate": 0.001,
"loss": 1.4666,
"step": 34700
},
{
"epoch": 11.247575953458306,
"grad_norm": 1.0326133966445923,
"learning_rate": 0.001,
"loss": 1.4538,
"step": 34800
},
{
"epoch": 11.279896574014222,
"grad_norm": 0.9127147793769836,
"learning_rate": 0.001,
"loss": 1.4677,
"step": 34900
},
{
"epoch": 11.312217194570136,
"grad_norm": 0.9023076295852661,
"learning_rate": 0.001,
"loss": 1.4532,
"step": 35000
},
{
"epoch": 11.344537815126051,
"grad_norm": 1.0806233882904053,
"learning_rate": 0.001,
"loss": 1.464,
"step": 35100
},
{
"epoch": 11.376858435681966,
"grad_norm": 1.0325735807418823,
"learning_rate": 0.001,
"loss": 1.4889,
"step": 35200
},
{
"epoch": 11.40917905623788,
"grad_norm": 0.9904654026031494,
"learning_rate": 0.001,
"loss": 1.4935,
"step": 35300
},
{
"epoch": 11.441499676793795,
"grad_norm": 1.2254970073699951,
"learning_rate": 0.001,
"loss": 1.491,
"step": 35400
},
{
"epoch": 11.47382029734971,
"grad_norm": 1.5757197141647339,
"learning_rate": 0.001,
"loss": 1.4954,
"step": 35500
},
{
"epoch": 11.506140917905624,
"grad_norm": 0.9046617746353149,
"learning_rate": 0.001,
"loss": 1.4765,
"step": 35600
},
{
"epoch": 11.538461538461538,
"grad_norm": 0.987343430519104,
"learning_rate": 0.001,
"loss": 1.5013,
"step": 35700
},
{
"epoch": 11.570782159017453,
"grad_norm": 1.0150471925735474,
"learning_rate": 0.001,
"loss": 1.4783,
"step": 35800
},
{
"epoch": 11.603102779573367,
"grad_norm": 1.2784874439239502,
"learning_rate": 0.001,
"loss": 1.4966,
"step": 35900
},
{
"epoch": 11.635423400129282,
"grad_norm": 1.0940210819244385,
"learning_rate": 0.001,
"loss": 1.5001,
"step": 36000
},
{
"epoch": 11.667744020685197,
"grad_norm": 1.2780746221542358,
"learning_rate": 0.001,
"loss": 1.4962,
"step": 36100
},
{
"epoch": 11.700064641241111,
"grad_norm": 0.9342361688613892,
"learning_rate": 0.001,
"loss": 1.5119,
"step": 36200
},
{
"epoch": 11.732385261797026,
"grad_norm": 1.034030556678772,
"learning_rate": 0.001,
"loss": 1.4994,
"step": 36300
},
{
"epoch": 11.764705882352942,
"grad_norm": 1.0301884412765503,
"learning_rate": 0.001,
"loss": 1.5109,
"step": 36400
},
{
"epoch": 11.797026502908857,
"grad_norm": 1.0798345804214478,
"learning_rate": 0.001,
"loss": 1.4931,
"step": 36500
},
{
"epoch": 11.829347123464771,
"grad_norm": 0.9824477434158325,
"learning_rate": 0.001,
"loss": 1.4757,
"step": 36600
},
{
"epoch": 11.861667744020686,
"grad_norm": 0.970503568649292,
"learning_rate": 0.001,
"loss": 1.5167,
"step": 36700
},
{
"epoch": 11.8939883645766,
"grad_norm": 1.0813010931015015,
"learning_rate": 0.001,
"loss": 1.497,
"step": 36800
},
{
"epoch": 11.926308985132515,
"grad_norm": 1.0717248916625977,
"learning_rate": 0.001,
"loss": 1.5072,
"step": 36900
},
{
"epoch": 11.95862960568843,
"grad_norm": 0.9872753024101257,
"learning_rate": 0.001,
"loss": 1.5011,
"step": 37000
},
{
"epoch": 11.990950226244344,
"grad_norm": 0.9820966124534607,
"learning_rate": 0.001,
"loss": 1.5042,
"step": 37100
},
{
"epoch": 12.023270846800258,
"grad_norm": 0.968085527420044,
"learning_rate": 0.001,
"loss": 1.4501,
"step": 37200
},
{
"epoch": 12.055591467356173,
"grad_norm": 0.980858325958252,
"learning_rate": 0.001,
"loss": 1.4073,
"step": 37300
},
{
"epoch": 12.087912087912088,
"grad_norm": 1.4215143918991089,
"learning_rate": 0.001,
"loss": 1.4116,
"step": 37400
},
{
"epoch": 12.120232708468002,
"grad_norm": 0.9262951612472534,
"learning_rate": 0.001,
"loss": 1.4064,
"step": 37500
},
{
"epoch": 12.152553329023917,
"grad_norm": 1.0522440671920776,
"learning_rate": 0.001,
"loss": 1.4034,
"step": 37600
},
{
"epoch": 12.184873949579831,
"grad_norm": 1.0187525749206543,
"learning_rate": 0.001,
"loss": 1.4318,
"step": 37700
},
{
"epoch": 12.217194570135746,
"grad_norm": 0.9597002863883972,
"learning_rate": 0.001,
"loss": 1.4324,
"step": 37800
},
{
"epoch": 12.24951519069166,
"grad_norm": 1.0441052913665771,
"learning_rate": 0.001,
"loss": 1.4142,
"step": 37900
},
{
"epoch": 12.281835811247577,
"grad_norm": 0.8995744585990906,
"learning_rate": 0.001,
"loss": 1.432,
"step": 38000
},
{
"epoch": 12.314156431803491,
"grad_norm": 1.096145510673523,
"learning_rate": 0.001,
"loss": 1.4187,
"step": 38100
},
{
"epoch": 12.346477052359406,
"grad_norm": 0.9527760148048401,
"learning_rate": 0.001,
"loss": 1.4671,
"step": 38200
},
{
"epoch": 12.37879767291532,
"grad_norm": 1.1196210384368896,
"learning_rate": 0.001,
"loss": 1.4443,
"step": 38300
},
{
"epoch": 12.411118293471235,
"grad_norm": 0.9554662108421326,
"learning_rate": 0.001,
"loss": 1.4614,
"step": 38400
},
{
"epoch": 12.44343891402715,
"grad_norm": 0.9521270394325256,
"learning_rate": 0.001,
"loss": 1.4679,
"step": 38500
},
{
"epoch": 12.475759534583064,
"grad_norm": 1.0394660234451294,
"learning_rate": 0.001,
"loss": 1.4449,
"step": 38600
},
{
"epoch": 12.508080155138979,
"grad_norm": 1.0146692991256714,
"learning_rate": 0.001,
"loss": 1.4368,
"step": 38700
},
{
"epoch": 12.540400775694893,
"grad_norm": 1.3197181224822998,
"learning_rate": 0.001,
"loss": 1.4654,
"step": 38800
},
{
"epoch": 12.572721396250808,
"grad_norm": 1.0358250141143799,
"learning_rate": 0.001,
"loss": 1.4495,
"step": 38900
},
{
"epoch": 12.605042016806722,
"grad_norm": 1.08975088596344,
"learning_rate": 0.001,
"loss": 1.4504,
"step": 39000
},
{
"epoch": 12.637362637362637,
"grad_norm": 0.9866904020309448,
"learning_rate": 0.001,
"loss": 1.435,
"step": 39100
},
{
"epoch": 12.669683257918551,
"grad_norm": 1.085909366607666,
"learning_rate": 0.001,
"loss": 1.4448,
"step": 39200
},
{
"epoch": 12.702003878474466,
"grad_norm": 0.9458845257759094,
"learning_rate": 0.001,
"loss": 1.4559,
"step": 39300
},
{
"epoch": 12.73432449903038,
"grad_norm": 1.0192725658416748,
"learning_rate": 0.001,
"loss": 1.458,
"step": 39400
},
{
"epoch": 12.766645119586297,
"grad_norm": 1.2316535711288452,
"learning_rate": 0.001,
"loss": 1.4697,
"step": 39500
},
{
"epoch": 12.798965740142211,
"grad_norm": 0.9104325771331787,
"learning_rate": 0.001,
"loss": 1.5081,
"step": 39600
},
{
"epoch": 12.831286360698126,
"grad_norm": 1.11668860912323,
"learning_rate": 0.001,
"loss": 1.4984,
"step": 39700
},
{
"epoch": 12.86360698125404,
"grad_norm": 0.9999051690101624,
"learning_rate": 0.001,
"loss": 1.4485,
"step": 39800
},
{
"epoch": 12.895927601809955,
"grad_norm": 1.0887517929077148,
"learning_rate": 0.001,
"loss": 1.4528,
"step": 39900
},
{
"epoch": 12.92824822236587,
"grad_norm": 1.1782604455947876,
"learning_rate": 0.001,
"loss": 1.4756,
"step": 40000
},
{
"epoch": 12.960568842921784,
"grad_norm": 0.8981242775917053,
"learning_rate": 0.001,
"loss": 1.4514,
"step": 40100
},
{
"epoch": 12.992889463477699,
"grad_norm": 0.875098705291748,
"learning_rate": 0.001,
"loss": 1.4544,
"step": 40200
},
{
"epoch": 13.025210084033613,
"grad_norm": 1.2988208532333374,
"learning_rate": 0.001,
"loss": 1.3966,
"step": 40300
},
{
"epoch": 13.057530704589528,
"grad_norm": 1.8159008026123047,
"learning_rate": 0.001,
"loss": 1.3734,
"step": 40400
},
{
"epoch": 13.089851325145442,
"grad_norm": 0.9084588289260864,
"learning_rate": 0.001,
"loss": 1.369,
"step": 40500
},
{
"epoch": 13.122171945701357,
"grad_norm": 0.8925555944442749,
"learning_rate": 0.001,
"loss": 1.3684,
"step": 40600
},
{
"epoch": 13.154492566257272,
"grad_norm": 0.8304650187492371,
"learning_rate": 0.001,
"loss": 1.3887,
"step": 40700
},
{
"epoch": 13.186813186813186,
"grad_norm": 1.0589717626571655,
"learning_rate": 0.001,
"loss": 1.3834,
"step": 40800
},
{
"epoch": 13.2191338073691,
"grad_norm": 0.9229005575180054,
"learning_rate": 0.001,
"loss": 1.3972,
"step": 40900
},
{
"epoch": 13.251454427925015,
"grad_norm": 0.9296209216117859,
"learning_rate": 0.001,
"loss": 1.3909,
"step": 41000
},
{
"epoch": 13.283775048480932,
"grad_norm": 0.8965741991996765,
"learning_rate": 0.001,
"loss": 1.3834,
"step": 41100
},
{
"epoch": 13.316095669036846,
"grad_norm": 0.9377841353416443,
"learning_rate": 0.001,
"loss": 1.4088,
"step": 41200
},
{
"epoch": 13.34841628959276,
"grad_norm": 1.0339752435684204,
"learning_rate": 0.001,
"loss": 1.4098,
"step": 41300
},
{
"epoch": 13.380736910148675,
"grad_norm": 0.8679028749465942,
"learning_rate": 0.001,
"loss": 1.3976,
"step": 41400
},
{
"epoch": 13.41305753070459,
"grad_norm": 0.9596776366233826,
"learning_rate": 0.001,
"loss": 1.3852,
"step": 41500
},
{
"epoch": 13.445378151260504,
"grad_norm": 1.042893886566162,
"learning_rate": 0.001,
"loss": 1.4066,
"step": 41600
},
{
"epoch": 13.477698771816419,
"grad_norm": 1.0267068147659302,
"learning_rate": 0.001,
"loss": 1.4032,
"step": 41700
},
{
"epoch": 13.510019392372334,
"grad_norm": 1.0795466899871826,
"learning_rate": 0.001,
"loss": 1.4233,
"step": 41800
},
{
"epoch": 13.542340012928248,
"grad_norm": 0.9310310482978821,
"learning_rate": 0.001,
"loss": 1.4176,
"step": 41900
},
{
"epoch": 13.574660633484163,
"grad_norm": 0.7695964574813843,
"learning_rate": 0.001,
"loss": 1.4068,
"step": 42000
},
{
"epoch": 13.606981254040077,
"grad_norm": 0.9554638266563416,
"learning_rate": 0.001,
"loss": 1.4462,
"step": 42100
},
{
"epoch": 13.639301874595992,
"grad_norm": 0.963115394115448,
"learning_rate": 0.001,
"loss": 1.4159,
"step": 42200
},
{
"epoch": 13.671622495151906,
"grad_norm": 0.9717909693717957,
"learning_rate": 0.001,
"loss": 1.4081,
"step": 42300
},
{
"epoch": 13.70394311570782,
"grad_norm": 1.1150710582733154,
"learning_rate": 0.001,
"loss": 1.4377,
"step": 42400
},
{
"epoch": 13.736263736263737,
"grad_norm": 1.0072553157806396,
"learning_rate": 0.001,
"loss": 1.4576,
"step": 42500
},
{
"epoch": 13.768584356819652,
"grad_norm": 0.9450471997261047,
"learning_rate": 0.001,
"loss": 1.4349,
"step": 42600
},
{
"epoch": 13.800904977375566,
"grad_norm": 0.8816408514976501,
"learning_rate": 0.001,
"loss": 1.4237,
"step": 42700
},
{
"epoch": 13.83322559793148,
"grad_norm": 0.9339333772659302,
"learning_rate": 0.001,
"loss": 1.4434,
"step": 42800
},
{
"epoch": 13.865546218487395,
"grad_norm": 1.1679960489273071,
"learning_rate": 0.001,
"loss": 1.4531,
"step": 42900
},
{
"epoch": 13.89786683904331,
"grad_norm": 1.0511928796768188,
"learning_rate": 0.001,
"loss": 1.4101,
"step": 43000
},
{
"epoch": 13.930187459599225,
"grad_norm": 0.8638760447502136,
"learning_rate": 0.001,
"loss": 1.4366,
"step": 43100
},
{
"epoch": 13.96250808015514,
"grad_norm": 1.1316864490509033,
"learning_rate": 0.001,
"loss": 1.434,
"step": 43200
},
{
"epoch": 13.994828700711054,
"grad_norm": 1.0850309133529663,
"learning_rate": 0.001,
"loss": 1.4175,
"step": 43300
},
{
"epoch": 14.027149321266968,
"grad_norm": 1.1530749797821045,
"learning_rate": 0.001,
"loss": 1.3574,
"step": 43400
},
{
"epoch": 14.059469941822883,
"grad_norm": 1.2573904991149902,
"learning_rate": 0.001,
"loss": 1.3626,
"step": 43500
},
{
"epoch": 14.091790562378797,
"grad_norm": 1.0096023082733154,
"learning_rate": 0.001,
"loss": 1.3593,
"step": 43600
},
{
"epoch": 14.124111182934712,
"grad_norm": 1.0445042848587036,
"learning_rate": 0.001,
"loss": 1.337,
"step": 43700
},
{
"epoch": 14.156431803490626,
"grad_norm": 0.976283609867096,
"learning_rate": 0.001,
"loss": 1.3559,
"step": 43800
},
{
"epoch": 14.188752424046541,
"grad_norm": 0.9800626635551453,
"learning_rate": 0.001,
"loss": 1.3801,
"step": 43900
},
{
"epoch": 14.221073044602456,
"grad_norm": 1.07820725440979,
"learning_rate": 0.001,
"loss": 1.3517,
"step": 44000
},
{
"epoch": 14.25339366515837,
"grad_norm": 1.0982788801193237,
"learning_rate": 0.001,
"loss": 1.3222,
"step": 44100
},
{
"epoch": 14.285714285714286,
"grad_norm": 1.013915777206421,
"learning_rate": 0.001,
"loss": 1.3454,
"step": 44200
},
{
"epoch": 14.318034906270201,
"grad_norm": 0.9462135434150696,
"learning_rate": 0.001,
"loss": 1.3472,
"step": 44300
},
{
"epoch": 14.350355526826116,
"grad_norm": 1.1129679679870605,
"learning_rate": 0.001,
"loss": 1.3625,
"step": 44400
},
{
"epoch": 14.38267614738203,
"grad_norm": 1.1635687351226807,
"learning_rate": 0.001,
"loss": 1.3752,
"step": 44500
},
{
"epoch": 14.414996767937945,
"grad_norm": 0.9367783665657043,
"learning_rate": 0.001,
"loss": 1.398,
"step": 44600
},
{
"epoch": 14.44731738849386,
"grad_norm": 4.812443256378174,
"learning_rate": 0.001,
"loss": 1.3708,
"step": 44700
},
{
"epoch": 14.479638009049774,
"grad_norm": 1.1130398511886597,
"learning_rate": 0.001,
"loss": 1.3928,
"step": 44800
},
{
"epoch": 14.511958629605688,
"grad_norm": 1.0559415817260742,
"learning_rate": 0.001,
"loss": 1.367,
"step": 44900
},
{
"epoch": 14.544279250161603,
"grad_norm": 1.052778959274292,
"learning_rate": 0.001,
"loss": 1.3692,
"step": 45000
},
{
"epoch": 14.576599870717518,
"grad_norm": 1.0826404094696045,
"learning_rate": 0.001,
"loss": 1.3857,
"step": 45100
},
{
"epoch": 14.608920491273432,
"grad_norm": 0.9068247675895691,
"learning_rate": 0.001,
"loss": 1.3805,
"step": 45200
},
{
"epoch": 14.641241111829347,
"grad_norm": 1.2357306480407715,
"learning_rate": 0.001,
"loss": 1.4152,
"step": 45300
},
{
"epoch": 14.673561732385261,
"grad_norm": 0.9457151293754578,
"learning_rate": 0.001,
"loss": 1.4111,
"step": 45400
},
{
"epoch": 14.705882352941176,
"grad_norm": 0.8796570301055908,
"learning_rate": 0.001,
"loss": 1.3951,
"step": 45500
},
{
"epoch": 14.738202973497092,
"grad_norm": 0.9057885408401489,
"learning_rate": 0.001,
"loss": 1.4123,
"step": 45600
},
{
"epoch": 14.770523594053007,
"grad_norm": 1.1413110494613647,
"learning_rate": 0.001,
"loss": 1.3942,
"step": 45700
},
{
"epoch": 14.802844214608921,
"grad_norm": 0.8065590262413025,
"learning_rate": 0.001,
"loss": 1.4036,
"step": 45800
},
{
"epoch": 14.835164835164836,
"grad_norm": 0.9088504314422607,
"learning_rate": 0.001,
"loss": 1.3785,
"step": 45900
},
{
"epoch": 14.86748545572075,
"grad_norm": 1.1512057781219482,
"learning_rate": 0.001,
"loss": 1.411,
"step": 46000
},
{
"epoch": 14.899806076276665,
"grad_norm": 0.9676141142845154,
"learning_rate": 0.001,
"loss": 1.3749,
"step": 46100
},
{
"epoch": 14.93212669683258,
"grad_norm": 1.2688740491867065,
"learning_rate": 0.001,
"loss": 1.4197,
"step": 46200
},
{
"epoch": 14.964447317388494,
"grad_norm": 0.9541943669319153,
"learning_rate": 0.001,
"loss": 1.4337,
"step": 46300
},
{
"epoch": 14.996767937944409,
"grad_norm": 1.5543314218521118,
"learning_rate": 0.001,
"loss": 1.3808,
"step": 46400
},
{
"epoch": 15.029088558500323,
"grad_norm": 1.0888712406158447,
"learning_rate": 0.001,
"loss": 1.3064,
"step": 46500
},
{
"epoch": 15.061409179056238,
"grad_norm": 1.151442527770996,
"learning_rate": 0.001,
"loss": 1.3064,
"step": 46600
},
{
"epoch": 15.093729799612152,
"grad_norm": 1.339379072189331,
"learning_rate": 0.001,
"loss": 1.3393,
"step": 46700
},
{
"epoch": 15.126050420168067,
"grad_norm": 1.1313862800598145,
"learning_rate": 0.001,
"loss": 1.313,
"step": 46800
},
{
"epoch": 15.158371040723981,
"grad_norm": 0.9059498906135559,
"learning_rate": 0.001,
"loss": 1.3322,
"step": 46900
},
{
"epoch": 15.190691661279896,
"grad_norm": 0.9920981526374817,
"learning_rate": 0.001,
"loss": 1.325,
"step": 47000
},
{
"epoch": 15.22301228183581,
"grad_norm": 0.7398461103439331,
"learning_rate": 0.001,
"loss": 1.3382,
"step": 47100
},
{
"epoch": 15.255332902391725,
"grad_norm": 1.0884451866149902,
"learning_rate": 0.001,
"loss": 1.3054,
"step": 47200
},
{
"epoch": 15.287653522947641,
"grad_norm": 1.2823668718338013,
"learning_rate": 0.001,
"loss": 1.3264,
"step": 47300
},
{
"epoch": 15.319974143503556,
"grad_norm": 1.3318175077438354,
"learning_rate": 0.001,
"loss": 1.3511,
"step": 47400
},
{
"epoch": 15.35229476405947,
"grad_norm": 1.1119569540023804,
"learning_rate": 0.001,
"loss": 1.3449,
"step": 47500
},
{
"epoch": 15.384615384615385,
"grad_norm": 1.1583483219146729,
"learning_rate": 0.001,
"loss": 1.3145,
"step": 47600
},
{
"epoch": 15.4169360051713,
"grad_norm": 1.323159098625183,
"learning_rate": 0.001,
"loss": 1.3609,
"step": 47700
},
{
"epoch": 15.449256625727214,
"grad_norm": 1.1489546298980713,
"learning_rate": 0.001,
"loss": 1.3374,
"step": 47800
},
{
"epoch": 15.481577246283129,
"grad_norm": 1.1882226467132568,
"learning_rate": 0.001,
"loss": 1.353,
"step": 47900
},
{
"epoch": 15.513897866839043,
"grad_norm": 1.6996192932128906,
"learning_rate": 0.001,
"loss": 1.3764,
"step": 48000
},
{
"epoch": 15.546218487394958,
"grad_norm": 1.079559564590454,
"learning_rate": 0.001,
"loss": 1.3654,
"step": 48100
},
{
"epoch": 15.578539107950872,
"grad_norm": 0.9709523916244507,
"learning_rate": 0.001,
"loss": 1.361,
"step": 48200
},
{
"epoch": 15.610859728506787,
"grad_norm": 1.1908799409866333,
"learning_rate": 0.001,
"loss": 1.3668,
"step": 48300
},
{
"epoch": 15.643180349062701,
"grad_norm": 0.8918905854225159,
"learning_rate": 0.001,
"loss": 1.3524,
"step": 48400
},
{
"epoch": 15.675500969618616,
"grad_norm": 0.880649983882904,
"learning_rate": 0.001,
"loss": 1.3517,
"step": 48500
},
{
"epoch": 15.70782159017453,
"grad_norm": 1.1815990209579468,
"learning_rate": 0.001,
"loss": 1.349,
"step": 48600
},
{
"epoch": 15.740142210730447,
"grad_norm": 1.071019172668457,
"learning_rate": 0.001,
"loss": 1.3596,
"step": 48700
},
{
"epoch": 15.772462831286362,
"grad_norm": 1.1914271116256714,
"learning_rate": 0.001,
"loss": 1.3663,
"step": 48800
},
{
"epoch": 15.804783451842276,
"grad_norm": 1.4547832012176514,
"learning_rate": 0.001,
"loss": 1.3611,
"step": 48900
},
{
"epoch": 15.83710407239819,
"grad_norm": 1.1460895538330078,
"learning_rate": 0.001,
"loss": 1.3605,
"step": 49000
},
{
"epoch": 15.869424692954105,
"grad_norm": 1.155902624130249,
"learning_rate": 0.001,
"loss": 1.3833,
"step": 49100
},
{
"epoch": 15.90174531351002,
"grad_norm": 1.1332170963287354,
"learning_rate": 0.001,
"loss": 1.366,
"step": 49200
},
{
"epoch": 15.934065934065934,
"grad_norm": 1.009746789932251,
"learning_rate": 0.001,
"loss": 1.3741,
"step": 49300
},
{
"epoch": 15.966386554621849,
"grad_norm": 1.1160213947296143,
"learning_rate": 0.001,
"loss": 1.3727,
"step": 49400
},
{
"epoch": 15.998707175177763,
"grad_norm": 0.9431664347648621,
"learning_rate": 0.001,
"loss": 1.3603,
"step": 49500
},
{
"epoch": 16.031027795733678,
"grad_norm": 1.071413516998291,
"learning_rate": 0.001,
"loss": 1.2621,
"step": 49600
},
{
"epoch": 16.063348416289593,
"grad_norm": 1.035800576210022,
"learning_rate": 0.001,
"loss": 1.2914,
"step": 49700
},
{
"epoch": 16.095669036845507,
"grad_norm": 1.001190423965454,
"learning_rate": 0.001,
"loss": 1.2734,
"step": 49800
},
{
"epoch": 16.12798965740142,
"grad_norm": 0.9272027611732483,
"learning_rate": 0.001,
"loss": 1.2658,
"step": 49900
},
{
"epoch": 16.160310277957336,
"grad_norm": 0.8124894499778748,
"learning_rate": 0.001,
"loss": 1.2945,
"step": 50000
},
{
"epoch": 16.19263089851325,
"grad_norm": 1.0019245147705078,
"learning_rate": 0.001,
"loss": 1.3106,
"step": 50100
},
{
"epoch": 16.224951519069165,
"grad_norm": 1.071997880935669,
"learning_rate": 0.001,
"loss": 1.3253,
"step": 50200
},
{
"epoch": 16.25727213962508,
"grad_norm": 0.8945184946060181,
"learning_rate": 0.001,
"loss": 1.2997,
"step": 50300
},
{
"epoch": 16.289592760180994,
"grad_norm": 0.8286392092704773,
"learning_rate": 0.001,
"loss": 1.3153,
"step": 50400
},
{
"epoch": 16.32191338073691,
"grad_norm": 1.137093424797058,
"learning_rate": 0.001,
"loss": 1.3035,
"step": 50500
},
{
"epoch": 16.354234001292824,
"grad_norm": 1.379814863204956,
"learning_rate": 0.001,
"loss": 1.2933,
"step": 50600
},
{
"epoch": 16.386554621848738,
"grad_norm": 0.8075309991836548,
"learning_rate": 0.001,
"loss": 1.3175,
"step": 50700
},
{
"epoch": 16.418875242404653,
"grad_norm": 1.3296177387237549,
"learning_rate": 0.001,
"loss": 1.3143,
"step": 50800
},
{
"epoch": 16.451195862960567,
"grad_norm": 1.5358154773712158,
"learning_rate": 0.001,
"loss": 1.3137,
"step": 50900
},
{
"epoch": 16.483516483516482,
"grad_norm": 1.2911955118179321,
"learning_rate": 0.001,
"loss": 1.3108,
"step": 51000
},
{
"epoch": 16.5158371040724,
"grad_norm": 0.6920326352119446,
"learning_rate": 0.001,
"loss": 1.3177,
"step": 51100
},
{
"epoch": 16.548157724628314,
"grad_norm": 1.075358510017395,
"learning_rate": 0.001,
"loss": 1.3267,
"step": 51200
},
{
"epoch": 16.58047834518423,
"grad_norm": 0.9144567251205444,
"learning_rate": 0.001,
"loss": 1.3318,
"step": 51300
},
{
"epoch": 16.612798965740144,
"grad_norm": 1.1483515501022339,
"learning_rate": 0.001,
"loss": 1.3312,
"step": 51400
},
{
"epoch": 16.645119586296058,
"grad_norm": 1.050889253616333,
"learning_rate": 0.001,
"loss": 1.3428,
"step": 51500
},
{
"epoch": 16.677440206851973,
"grad_norm": 1.0265402793884277,
"learning_rate": 0.001,
"loss": 1.3533,
"step": 51600
},
{
"epoch": 16.709760827407887,
"grad_norm": 0.9257956743240356,
"learning_rate": 0.001,
"loss": 1.3497,
"step": 51700
},
{
"epoch": 16.742081447963802,
"grad_norm": 1.9063913822174072,
"learning_rate": 0.001,
"loss": 1.3191,
"step": 51800
},
{
"epoch": 16.774402068519716,
"grad_norm": 1.0440740585327148,
"learning_rate": 0.001,
"loss": 1.3378,
"step": 51900
},
{
"epoch": 16.80672268907563,
"grad_norm": 1.0758187770843506,
"learning_rate": 0.001,
"loss": 1.3278,
"step": 52000
},
{
"epoch": 16.839043309631545,
"grad_norm": 1.1654984951019287,
"learning_rate": 0.001,
"loss": 1.3627,
"step": 52100
},
{
"epoch": 16.87136393018746,
"grad_norm": 0.9538053870201111,
"learning_rate": 0.001,
"loss": 1.3415,
"step": 52200
},
{
"epoch": 16.903684550743375,
"grad_norm": 1.0862343311309814,
"learning_rate": 0.001,
"loss": 1.351,
"step": 52300
},
{
"epoch": 16.93600517129929,
"grad_norm": 1.0913732051849365,
"learning_rate": 0.001,
"loss": 1.3614,
"step": 52400
},
{
"epoch": 16.968325791855204,
"grad_norm": 1.0344431400299072,
"learning_rate": 0.001,
"loss": 1.3385,
"step": 52500
},
{
"epoch": 17.00064641241112,
"grad_norm": 1.0967899560928345,
"learning_rate": 0.001,
"loss": 1.3154,
"step": 52600
},
{
"epoch": 17.032967032967033,
"grad_norm": 1.7565858364105225,
"learning_rate": 0.001,
"loss": 1.2238,
"step": 52700
},
{
"epoch": 17.065287653522947,
"grad_norm": 1.8335599899291992,
"learning_rate": 0.001,
"loss": 1.2362,
"step": 52800
},
{
"epoch": 17.097608274078862,
"grad_norm": 1.951370358467102,
"learning_rate": 0.001,
"loss": 1.2447,
"step": 52900
},
{
"epoch": 17.129928894634777,
"grad_norm": 1.7428096532821655,
"learning_rate": 0.001,
"loss": 1.273,
"step": 53000
},
{
"epoch": 17.16224951519069,
"grad_norm": 1.5402082204818726,
"learning_rate": 0.001,
"loss": 1.2413,
"step": 53100
},
{
"epoch": 17.194570135746606,
"grad_norm": 1.3417208194732666,
"learning_rate": 0.001,
"loss": 1.2456,
"step": 53200
},
{
"epoch": 17.22689075630252,
"grad_norm": 1.6393389701843262,
"learning_rate": 0.001,
"loss": 1.2635,
"step": 53300
},
{
"epoch": 17.259211376858435,
"grad_norm": 1.5430525541305542,
"learning_rate": 0.001,
"loss": 1.2979,
"step": 53400
},
{
"epoch": 17.29153199741435,
"grad_norm": 1.640655517578125,
"learning_rate": 0.001,
"loss": 1.2704,
"step": 53500
},
{
"epoch": 17.323852617970264,
"grad_norm": 1.7041141986846924,
"learning_rate": 0.001,
"loss": 1.3015,
"step": 53600
},
{
"epoch": 17.35617323852618,
"grad_norm": 1.5950192213058472,
"learning_rate": 0.001,
"loss": 1.2574,
"step": 53700
},
{
"epoch": 17.388493859082093,
"grad_norm": 1.8514941930770874,
"learning_rate": 0.001,
"loss": 1.2826,
"step": 53800
},
{
"epoch": 17.420814479638008,
"grad_norm": 1.5289716720581055,
"learning_rate": 0.001,
"loss": 1.2847,
"step": 53900
},
{
"epoch": 17.453135100193922,
"grad_norm": 1.5072901248931885,
"learning_rate": 0.001,
"loss": 1.3026,
"step": 54000
},
{
"epoch": 17.485455720749837,
"grad_norm": 1.9386134147644043,
"learning_rate": 0.001,
"loss": 1.3189,
"step": 54100
},
{
"epoch": 17.517776341305755,
"grad_norm": 1.3212685585021973,
"learning_rate": 0.001,
"loss": 1.2961,
"step": 54200
},
{
"epoch": 17.55009696186167,
"grad_norm": 1.8733723163604736,
"learning_rate": 0.001,
"loss": 1.2841,
"step": 54300
},
{
"epoch": 17.582417582417584,
"grad_norm": 1.5845545530319214,
"learning_rate": 0.001,
"loss": 1.2776,
"step": 54400
},
{
"epoch": 17.6147382029735,
"grad_norm": 1.2121365070343018,
"learning_rate": 0.001,
"loss": 1.3145,
"step": 54500
},
{
"epoch": 17.647058823529413,
"grad_norm": 1.1913347244262695,
"learning_rate": 0.001,
"loss": 1.3027,
"step": 54600
},
{
"epoch": 17.679379444085328,
"grad_norm": 1.4041755199432373,
"learning_rate": 0.001,
"loss": 1.3157,
"step": 54700
},
{
"epoch": 17.711700064641242,
"grad_norm": 1.5565708875656128,
"learning_rate": 0.001,
"loss": 1.3094,
"step": 54800
},
{
"epoch": 17.744020685197157,
"grad_norm": 1.6354928016662598,
"learning_rate": 0.001,
"loss": 1.3378,
"step": 54900
},
{
"epoch": 17.77634130575307,
"grad_norm": 1.461625099182129,
"learning_rate": 0.001,
"loss": 1.3012,
"step": 55000
},
{
"epoch": 17.808661926308986,
"grad_norm": 1.9901479482650757,
"learning_rate": 0.001,
"loss": 1.3125,
"step": 55100
},
{
"epoch": 17.8409825468649,
"grad_norm": 1.8469276428222656,
"learning_rate": 0.001,
"loss": 1.3086,
"step": 55200
},
{
"epoch": 17.873303167420815,
"grad_norm": 1.725543737411499,
"learning_rate": 0.001,
"loss": 1.351,
"step": 55300
},
{
"epoch": 17.90562378797673,
"grad_norm": 1.799232006072998,
"learning_rate": 0.001,
"loss": 1.3276,
"step": 55400
},
{
"epoch": 17.937944408532644,
"grad_norm": 1.447696566581726,
"learning_rate": 0.001,
"loss": 1.3347,
"step": 55500
},
{
"epoch": 17.97026502908856,
"grad_norm": 1.706591010093689,
"learning_rate": 0.001,
"loss": 1.3436,
"step": 55600
},
{
"epoch": 18.002585649644473,
"grad_norm": 1.0814664363861084,
"learning_rate": 0.001,
"loss": 1.3459,
"step": 55700
},
{
"epoch": 18.034906270200388,
"grad_norm": 1.1114346981048584,
"learning_rate": 0.001,
"loss": 1.1934,
"step": 55800
},
{
"epoch": 18.067226890756302,
"grad_norm": 1.211247205734253,
"learning_rate": 0.001,
"loss": 1.2291,
"step": 55900
},
{
"epoch": 18.099547511312217,
"grad_norm": 1.1112585067749023,
"learning_rate": 0.001,
"loss": 1.2196,
"step": 56000
},
{
"epoch": 18.13186813186813,
"grad_norm": 1.2427830696105957,
"learning_rate": 0.001,
"loss": 1.2401,
"step": 56100
},
{
"epoch": 18.164188752424046,
"grad_norm": 1.1477326154708862,
"learning_rate": 0.001,
"loss": 1.2263,
"step": 56200
},
{
"epoch": 18.19650937297996,
"grad_norm": 1.2850887775421143,
"learning_rate": 0.001,
"loss": 1.2521,
"step": 56300
},
{
"epoch": 18.228829993535875,
"grad_norm": 1.1225168704986572,
"learning_rate": 0.001,
"loss": 1.2548,
"step": 56400
},
{
"epoch": 18.26115061409179,
"grad_norm": 1.1634271144866943,
"learning_rate": 0.001,
"loss": 1.2274,
"step": 56500
},
{
"epoch": 18.293471234647704,
"grad_norm": 1.162834644317627,
"learning_rate": 0.001,
"loss": 1.2562,
"step": 56600
},
{
"epoch": 18.32579185520362,
"grad_norm": 1.3091973066329956,
"learning_rate": 0.001,
"loss": 1.2844,
"step": 56700
},
{
"epoch": 18.358112475759533,
"grad_norm": 1.1031546592712402,
"learning_rate": 0.001,
"loss": 1.2595,
"step": 56800
},
{
"epoch": 18.390433096315448,
"grad_norm": 1.2217282056808472,
"learning_rate": 0.001,
"loss": 1.2533,
"step": 56900
},
{
"epoch": 18.422753716871362,
"grad_norm": 1.2840886116027832,
"learning_rate": 0.001,
"loss": 1.2639,
"step": 57000
},
{
"epoch": 18.455074337427277,
"grad_norm": 0.935218870639801,
"learning_rate": 0.001,
"loss": 1.2536,
"step": 57100
},
{
"epoch": 18.48739495798319,
"grad_norm": 1.423843502998352,
"learning_rate": 0.001,
"loss": 1.2741,
"step": 57200
},
{
"epoch": 18.51971557853911,
"grad_norm": 1.232347011566162,
"learning_rate": 0.001,
"loss": 1.2671,
"step": 57300
},
{
"epoch": 18.552036199095024,
"grad_norm": 1.2617510557174683,
"learning_rate": 0.001,
"loss": 1.2726,
"step": 57400
},
{
"epoch": 18.58435681965094,
"grad_norm": 1.3618507385253906,
"learning_rate": 0.001,
"loss": 1.2602,
"step": 57500
},
{
"epoch": 18.616677440206853,
"grad_norm": 1.2484835386276245,
"learning_rate": 0.001,
"loss": 1.2583,
"step": 57600
},
{
"epoch": 18.648998060762768,
"grad_norm": 1.2037937641143799,
"learning_rate": 0.001,
"loss": 1.2757,
"step": 57700
},
{
"epoch": 18.681318681318682,
"grad_norm": 1.1551131010055542,
"learning_rate": 0.001,
"loss": 1.2876,
"step": 57800
},
{
"epoch": 18.713639301874597,
"grad_norm": 1.0815056562423706,
"learning_rate": 0.001,
"loss": 1.3025,
"step": 57900
},
{
"epoch": 18.74595992243051,
"grad_norm": 1.0868639945983887,
"learning_rate": 0.001,
"loss": 1.3064,
"step": 58000
},
{
"epoch": 18.778280542986426,
"grad_norm": 1.2222222089767456,
"learning_rate": 0.001,
"loss": 1.2787,
"step": 58100
},
{
"epoch": 18.81060116354234,
"grad_norm": 1.2130047082901,
"learning_rate": 0.001,
"loss": 1.2716,
"step": 58200
},
{
"epoch": 18.842921784098255,
"grad_norm": 1.2509404420852661,
"learning_rate": 0.001,
"loss": 1.2875,
"step": 58300
},
{
"epoch": 18.87524240465417,
"grad_norm": 1.3974509239196777,
"learning_rate": 0.001,
"loss": 1.3006,
"step": 58400
},
{
"epoch": 18.907563025210084,
"grad_norm": 1.0603617429733276,
"learning_rate": 0.001,
"loss": 1.3321,
"step": 58500
},
{
"epoch": 18.939883645766,
"grad_norm": 1.0816590785980225,
"learning_rate": 0.001,
"loss": 1.2933,
"step": 58600
},
{
"epoch": 18.972204266321913,
"grad_norm": 1.3309355974197388,
"learning_rate": 0.001,
"loss": 1.2893,
"step": 58700
},
{
"epoch": 19.004524886877828,
"grad_norm": 1.2768296003341675,
"learning_rate": 0.001,
"loss": 1.3172,
"step": 58800
},
{
"epoch": 19.036845507433743,
"grad_norm": 1.4616053104400635,
"learning_rate": 0.001,
"loss": 1.1704,
"step": 58900
},
{
"epoch": 19.069166127989657,
"grad_norm": 1.417331337928772,
"learning_rate": 0.001,
"loss": 1.1887,
"step": 59000
},
{
"epoch": 19.10148674854557,
"grad_norm": 1.2152198553085327,
"learning_rate": 0.001,
"loss": 1.1897,
"step": 59100
},
{
"epoch": 19.133807369101486,
"grad_norm": 1.1913012266159058,
"learning_rate": 0.001,
"loss": 1.2236,
"step": 59200
},
{
"epoch": 19.1661279896574,
"grad_norm": 0.9392966628074646,
"learning_rate": 0.001,
"loss": 1.226,
"step": 59300
},
{
"epoch": 19.198448610213315,
"grad_norm": 1.0119884014129639,
"learning_rate": 0.001,
"loss": 1.2228,
"step": 59400
},
{
"epoch": 19.23076923076923,
"grad_norm": 1.276890754699707,
"learning_rate": 0.001,
"loss": 1.2287,
"step": 59500
},
{
"epoch": 19.263089851325145,
"grad_norm": 1.1096450090408325,
"learning_rate": 0.001,
"loss": 1.2371,
"step": 59600
},
{
"epoch": 19.29541047188106,
"grad_norm": 1.1702587604522705,
"learning_rate": 0.001,
"loss": 1.2268,
"step": 59700
},
{
"epoch": 19.327731092436974,
"grad_norm": 1.0416254997253418,
"learning_rate": 0.001,
"loss": 1.2284,
"step": 59800
},
{
"epoch": 19.360051712992888,
"grad_norm": 1.093361735343933,
"learning_rate": 0.001,
"loss": 1.2461,
"step": 59900
},
{
"epoch": 19.392372333548803,
"grad_norm": 1.0348377227783203,
"learning_rate": 0.001,
"loss": 1.2525,
"step": 60000
},
{
"epoch": 19.424692954104717,
"grad_norm": 1.9038110971450806,
"learning_rate": 0.001,
"loss": 1.2481,
"step": 60100
},
{
"epoch": 19.457013574660632,
"grad_norm": 1.2229783535003662,
"learning_rate": 0.001,
"loss": 1.2186,
"step": 60200
},
{
"epoch": 19.489334195216546,
"grad_norm": 1.1477720737457275,
"learning_rate": 0.001,
"loss": 1.2482,
"step": 60300
},
{
"epoch": 19.521654815772465,
"grad_norm": 1.255200743675232,
"learning_rate": 0.001,
"loss": 1.2552,
"step": 60400
},
{
"epoch": 19.55397543632838,
"grad_norm": 1.0543900728225708,
"learning_rate": 0.001,
"loss": 1.2373,
"step": 60500
},
{
"epoch": 19.586296056884294,
"grad_norm": 1.2552344799041748,
"learning_rate": 0.001,
"loss": 1.2511,
"step": 60600
},
{
"epoch": 19.618616677440208,
"grad_norm": 1.2354882955551147,
"learning_rate": 0.001,
"loss": 1.2509,
"step": 60700
},
{
"epoch": 19.650937297996123,
"grad_norm": 1.1915310621261597,
"learning_rate": 0.001,
"loss": 1.2541,
"step": 60800
},
{
"epoch": 19.683257918552037,
"grad_norm": 1.3169018030166626,
"learning_rate": 0.001,
"loss": 1.246,
"step": 60900
},
{
"epoch": 19.715578539107952,
"grad_norm": 1.34446120262146,
"learning_rate": 0.001,
"loss": 1.2651,
"step": 61000
},
{
"epoch": 19.747899159663866,
"grad_norm": 1.141524076461792,
"learning_rate": 0.001,
"loss": 1.2607,
"step": 61100
},
{
"epoch": 19.78021978021978,
"grad_norm": 1.2178977727890015,
"learning_rate": 0.001,
"loss": 1.2887,
"step": 61200
},
{
"epoch": 19.812540400775696,
"grad_norm": 0.9515364766120911,
"learning_rate": 0.001,
"loss": 1.2445,
"step": 61300
},
{
"epoch": 19.84486102133161,
"grad_norm": 1.2744394540786743,
"learning_rate": 0.001,
"loss": 1.2612,
"step": 61400
},
{
"epoch": 19.877181641887525,
"grad_norm": 1.0756638050079346,
"learning_rate": 0.001,
"loss": 1.2727,
"step": 61500
},
{
"epoch": 19.90950226244344,
"grad_norm": 1.0071052312850952,
"learning_rate": 0.001,
"loss": 1.2548,
"step": 61600
},
{
"epoch": 19.941822882999354,
"grad_norm": 1.349400520324707,
"learning_rate": 0.001,
"loss": 1.2679,
"step": 61700
},
{
"epoch": 19.97414350355527,
"grad_norm": 3.5351197719573975,
"learning_rate": 0.001,
"loss": 1.2702,
"step": 61800
},
{
"epoch": 20.006464124111183,
"grad_norm": 1.235960602760315,
"learning_rate": 0.001,
"loss": 1.2695,
"step": 61900
},
{
"epoch": 20.038784744667097,
"grad_norm": 1.0976554155349731,
"learning_rate": 0.001,
"loss": 1.1755,
"step": 62000
},
{
"epoch": 20.071105365223012,
"grad_norm": 1.267794132232666,
"learning_rate": 0.001,
"loss": 1.1877,
"step": 62100
},
{
"epoch": 20.103425985778927,
"grad_norm": 1.089699625968933,
"learning_rate": 0.001,
"loss": 1.1617,
"step": 62200
},
{
"epoch": 20.13574660633484,
"grad_norm": 1.0829448699951172,
"learning_rate": 0.001,
"loss": 1.1832,
"step": 62300
},
{
"epoch": 20.168067226890756,
"grad_norm": 1.2309054136276245,
"learning_rate": 0.001,
"loss": 1.2009,
"step": 62400
},
{
"epoch": 20.20038784744667,
"grad_norm": 1.1939396858215332,
"learning_rate": 0.001,
"loss": 1.1831,
"step": 62500
},
{
"epoch": 20.232708468002585,
"grad_norm": 1.3018850088119507,
"learning_rate": 0.001,
"loss": 1.192,
"step": 62600
},
{
"epoch": 20.2650290885585,
"grad_norm": 0.9886336922645569,
"learning_rate": 0.001,
"loss": 1.2002,
"step": 62700
},
{
"epoch": 20.297349709114414,
"grad_norm": 1.4016402959823608,
"learning_rate": 0.001,
"loss": 1.2176,
"step": 62800
},
{
"epoch": 20.32967032967033,
"grad_norm": 1.0506421327590942,
"learning_rate": 0.001,
"loss": 1.2063,
"step": 62900
},
{
"epoch": 20.361990950226243,
"grad_norm": 1.0052201747894287,
"learning_rate": 0.001,
"loss": 1.2195,
"step": 63000
},
{
"epoch": 20.394311570782158,
"grad_norm": 1.0880728960037231,
"learning_rate": 0.001,
"loss": 1.2034,
"step": 63100
},
{
"epoch": 20.426632191338072,
"grad_norm": 1.157288670539856,
"learning_rate": 0.001,
"loss": 1.2042,
"step": 63200
},
{
"epoch": 20.458952811893987,
"grad_norm": 0.956786036491394,
"learning_rate": 0.001,
"loss": 1.1999,
"step": 63300
},
{
"epoch": 20.4912734324499,
"grad_norm": 1.3962632417678833,
"learning_rate": 0.001,
"loss": 1.2276,
"step": 63400
},
{
"epoch": 20.52359405300582,
"grad_norm": 1.192475438117981,
"learning_rate": 0.001,
"loss": 1.196,
"step": 63500
},
{
"epoch": 20.555914673561734,
"grad_norm": 1.085652470588684,
"learning_rate": 0.001,
"loss": 1.2409,
"step": 63600
},
{
"epoch": 20.58823529411765,
"grad_norm": 1.0996172428131104,
"learning_rate": 0.001,
"loss": 1.2265,
"step": 63700
},
{
"epoch": 20.620555914673563,
"grad_norm": 1.1674541234970093,
"learning_rate": 0.001,
"loss": 1.2195,
"step": 63800
},
{
"epoch": 20.652876535229478,
"grad_norm": 1.3376907110214233,
"learning_rate": 0.001,
"loss": 1.2255,
"step": 63900
},
{
"epoch": 20.685197155785392,
"grad_norm": 1.0617411136627197,
"learning_rate": 0.001,
"loss": 1.2072,
"step": 64000
},
{
"epoch": 20.717517776341307,
"grad_norm": 0.935806930065155,
"learning_rate": 0.001,
"loss": 1.2313,
"step": 64100
},
{
"epoch": 20.74983839689722,
"grad_norm": 1.2648018598556519,
"learning_rate": 0.001,
"loss": 1.2438,
"step": 64200
},
{
"epoch": 20.782159017453136,
"grad_norm": 1.2684612274169922,
"learning_rate": 0.001,
"loss": 1.2323,
"step": 64300
},
{
"epoch": 20.81447963800905,
"grad_norm": 1.0561871528625488,
"learning_rate": 0.001,
"loss": 1.2478,
"step": 64400
},
{
"epoch": 20.846800258564965,
"grad_norm": 1.1016511917114258,
"learning_rate": 0.001,
"loss": 1.2459,
"step": 64500
},
{
"epoch": 20.87912087912088,
"grad_norm": 1.1570508480072021,
"learning_rate": 0.001,
"loss": 1.272,
"step": 64600
},
{
"epoch": 20.911441499676794,
"grad_norm": 1.2147791385650635,
"learning_rate": 0.001,
"loss": 1.2439,
"step": 64700
},
{
"epoch": 20.94376212023271,
"grad_norm": 1.2510056495666504,
"learning_rate": 0.001,
"loss": 1.2652,
"step": 64800
},
{
"epoch": 20.976082740788623,
"grad_norm": 0.9977579116821289,
"learning_rate": 0.001,
"loss": 1.2608,
"step": 64900
},
{
"epoch": 21.008403361344538,
"grad_norm": 1.4413050413131714,
"learning_rate": 0.001,
"loss": 1.2153,
"step": 65000
},
{
"epoch": 21.040723981900452,
"grad_norm": 1.1104098558425903,
"learning_rate": 0.001,
"loss": 1.1585,
"step": 65100
},
{
"epoch": 21.073044602456367,
"grad_norm": 1.1735037565231323,
"learning_rate": 0.001,
"loss": 1.1601,
"step": 65200
},
{
"epoch": 21.10536522301228,
"grad_norm": 1.2895740270614624,
"learning_rate": 0.001,
"loss": 1.1557,
"step": 65300
},
{
"epoch": 21.137685843568196,
"grad_norm": 1.7517699003219604,
"learning_rate": 0.001,
"loss": 1.1728,
"step": 65400
},
{
"epoch": 21.17000646412411,
"grad_norm": 1.2219549417495728,
"learning_rate": 0.001,
"loss": 1.1527,
"step": 65500
},
{
"epoch": 21.202327084680025,
"grad_norm": 1.2364577054977417,
"learning_rate": 0.001,
"loss": 1.1771,
"step": 65600
},
{
"epoch": 21.23464770523594,
"grad_norm": 1.2639278173446655,
"learning_rate": 0.001,
"loss": 1.1716,
"step": 65700
},
{
"epoch": 21.266968325791854,
"grad_norm": 1.2081694602966309,
"learning_rate": 0.001,
"loss": 1.1708,
"step": 65800
},
{
"epoch": 21.29928894634777,
"grad_norm": 1.219407558441162,
"learning_rate": 0.001,
"loss": 1.1861,
"step": 65900
},
{
"epoch": 21.331609566903683,
"grad_norm": 1.067844271659851,
"learning_rate": 0.001,
"loss": 1.1979,
"step": 66000
},
{
"epoch": 21.363930187459598,
"grad_norm": 1.2782212495803833,
"learning_rate": 0.001,
"loss": 1.1796,
"step": 66100
},
{
"epoch": 21.396250808015512,
"grad_norm": 1.3282757997512817,
"learning_rate": 0.001,
"loss": 1.186,
"step": 66200
},
{
"epoch": 21.428571428571427,
"grad_norm": 1.018190860748291,
"learning_rate": 0.001,
"loss": 1.242,
"step": 66300
},
{
"epoch": 21.46089204912734,
"grad_norm": 1.1868315935134888,
"learning_rate": 0.001,
"loss": 1.2631,
"step": 66400
},
{
"epoch": 21.49321266968326,
"grad_norm": 1.3459444046020508,
"learning_rate": 0.001,
"loss": 1.2075,
"step": 66500
},
{
"epoch": 21.525533290239174,
"grad_norm": 0.9880927801132202,
"learning_rate": 0.001,
"loss": 1.1989,
"step": 66600
},
{
"epoch": 21.55785391079509,
"grad_norm": 1.5468779802322388,
"learning_rate": 0.001,
"loss": 1.2444,
"step": 66700
},
{
"epoch": 21.590174531351003,
"grad_norm": 0.9835416674613953,
"learning_rate": 0.001,
"loss": 1.2407,
"step": 66800
},
{
"epoch": 21.622495151906918,
"grad_norm": 1.1654820442199707,
"learning_rate": 0.001,
"loss": 1.2372,
"step": 66900
},
{
"epoch": 21.654815772462833,
"grad_norm": 1.2552804946899414,
"learning_rate": 0.001,
"loss": 1.2291,
"step": 67000
},
{
"epoch": 21.687136393018747,
"grad_norm": 3.475341796875,
"learning_rate": 0.001,
"loss": 1.2355,
"step": 67100
},
{
"epoch": 21.71945701357466,
"grad_norm": 1.0410614013671875,
"learning_rate": 0.001,
"loss": 1.234,
"step": 67200
},
{
"epoch": 21.751777634130576,
"grad_norm": 1.168955683708191,
"learning_rate": 0.001,
"loss": 1.2406,
"step": 67300
},
{
"epoch": 21.78409825468649,
"grad_norm": 1.7832200527191162,
"learning_rate": 0.001,
"loss": 1.2689,
"step": 67400
},
{
"epoch": 21.816418875242405,
"grad_norm": 1.3156075477600098,
"learning_rate": 0.001,
"loss": 1.2598,
"step": 67500
},
{
"epoch": 21.84873949579832,
"grad_norm": 1.005856990814209,
"learning_rate": 0.001,
"loss": 1.2477,
"step": 67600
},
{
"epoch": 21.881060116354234,
"grad_norm": 0.9797137379646301,
"learning_rate": 0.001,
"loss": 1.2534,
"step": 67700
},
{
"epoch": 21.91338073691015,
"grad_norm": 0.9590547680854797,
"learning_rate": 0.001,
"loss": 1.2328,
"step": 67800
},
{
"epoch": 21.945701357466064,
"grad_norm": 1.1797035932540894,
"learning_rate": 0.001,
"loss": 1.2347,
"step": 67900
},
{
"epoch": 21.978021978021978,
"grad_norm": 1.065169334411621,
"learning_rate": 0.001,
"loss": 1.2465,
"step": 68000
},
{
"epoch": 22.010342598577893,
"grad_norm": 1.0149396657943726,
"learning_rate": 0.001,
"loss": 1.1883,
"step": 68100
},
{
"epoch": 22.042663219133807,
"grad_norm": 1.1508020162582397,
"learning_rate": 0.001,
"loss": 1.156,
"step": 68200
},
{
"epoch": 22.07498383968972,
"grad_norm": 3.199270725250244,
"learning_rate": 0.001,
"loss": 1.1385,
"step": 68300
},
{
"epoch": 22.107304460245636,
"grad_norm": 1.0568350553512573,
"learning_rate": 0.001,
"loss": 1.1544,
"step": 68400
},
{
"epoch": 22.13962508080155,
"grad_norm": 5.397552013397217,
"learning_rate": 0.001,
"loss": 1.149,
"step": 68500
},
{
"epoch": 22.171945701357465,
"grad_norm": 1.1484830379486084,
"learning_rate": 0.001,
"loss": 1.1669,
"step": 68600
},
{
"epoch": 22.20426632191338,
"grad_norm": 1.3436305522918701,
"learning_rate": 0.001,
"loss": 1.1669,
"step": 68700
},
{
"epoch": 22.236586942469295,
"grad_norm": 1.3443758487701416,
"learning_rate": 0.001,
"loss": 1.1538,
"step": 68800
},
{
"epoch": 22.26890756302521,
"grad_norm": 1.4839012622833252,
"learning_rate": 0.001,
"loss": 1.1557,
"step": 68900
},
{
"epoch": 22.301228183581124,
"grad_norm": 1.2252734899520874,
"learning_rate": 0.001,
"loss": 1.1756,
"step": 69000
},
{
"epoch": 22.33354880413704,
"grad_norm": 1.0860698223114014,
"learning_rate": 0.001,
"loss": 1.161,
"step": 69100
},
{
"epoch": 22.365869424692953,
"grad_norm": 0.9651658535003662,
"learning_rate": 0.001,
"loss": 1.2006,
"step": 69200
},
{
"epoch": 22.398190045248867,
"grad_norm": 1.0372250080108643,
"learning_rate": 0.001,
"loss": 1.1829,
"step": 69300
},
{
"epoch": 22.430510665804782,
"grad_norm": 1.0921285152435303,
"learning_rate": 0.001,
"loss": 1.1988,
"step": 69400
},
{
"epoch": 22.462831286360696,
"grad_norm": 1.0957082509994507,
"learning_rate": 0.001,
"loss": 1.1702,
"step": 69500
},
{
"epoch": 22.49515190691661,
"grad_norm": 0.9881604909896851,
"learning_rate": 0.001,
"loss": 1.1969,
"step": 69600
},
{
"epoch": 22.52747252747253,
"grad_norm": 1.0762852430343628,
"learning_rate": 0.001,
"loss": 1.1866,
"step": 69700
},
{
"epoch": 22.559793148028444,
"grad_norm": 1.1281818151474,
"learning_rate": 0.001,
"loss": 1.1812,
"step": 69800
},
{
"epoch": 22.59211376858436,
"grad_norm": 1.1947038173675537,
"learning_rate": 0.001,
"loss": 1.1999,
"step": 69900
},
{
"epoch": 22.624434389140273,
"grad_norm": 1.0397061109542847,
"learning_rate": 0.001,
"loss": 1.1993,
"step": 70000
},
{
"epoch": 22.656755009696187,
"grad_norm": 0.9127522110939026,
"learning_rate": 0.001,
"loss": 1.1945,
"step": 70100
},
{
"epoch": 22.689075630252102,
"grad_norm": 1.0395044088363647,
"learning_rate": 0.001,
"loss": 1.2217,
"step": 70200
},
{
"epoch": 22.721396250808017,
"grad_norm": 1.1878328323364258,
"learning_rate": 0.001,
"loss": 1.211,
"step": 70300
},
{
"epoch": 22.75371687136393,
"grad_norm": 1.0908920764923096,
"learning_rate": 0.001,
"loss": 1.1992,
"step": 70400
},
{
"epoch": 22.786037491919846,
"grad_norm": 1.1181707382202148,
"learning_rate": 0.001,
"loss": 1.1994,
"step": 70500
},
{
"epoch": 22.81835811247576,
"grad_norm": 0.9277530312538147,
"learning_rate": 0.001,
"loss": 1.2147,
"step": 70600
},
{
"epoch": 22.850678733031675,
"grad_norm": 1.4166144132614136,
"learning_rate": 0.001,
"loss": 1.1931,
"step": 70700
},
{
"epoch": 22.88299935358759,
"grad_norm": 1.0426658391952515,
"learning_rate": 0.001,
"loss": 1.2126,
"step": 70800
},
{
"epoch": 22.915319974143504,
"grad_norm": 1.1179332733154297,
"learning_rate": 0.001,
"loss": 1.2002,
"step": 70900
},
{
"epoch": 22.94764059469942,
"grad_norm": 1.1418015956878662,
"learning_rate": 0.001,
"loss": 1.206,
"step": 71000
},
{
"epoch": 22.979961215255333,
"grad_norm": 1.1119362115859985,
"learning_rate": 0.001,
"loss": 1.2136,
"step": 71100
},
{
"epoch": 23.012281835811248,
"grad_norm": 1.1023640632629395,
"learning_rate": 0.001,
"loss": 1.133,
"step": 71200
},
{
"epoch": 23.044602456367162,
"grad_norm": 1.1139122247695923,
"learning_rate": 0.001,
"loss": 1.1339,
"step": 71300
},
{
"epoch": 23.076923076923077,
"grad_norm": 1.0510680675506592,
"learning_rate": 0.001,
"loss": 1.0907,
"step": 71400
},
{
"epoch": 23.10924369747899,
"grad_norm": 1.1501266956329346,
"learning_rate": 0.001,
"loss": 1.1127,
"step": 71500
},
{
"epoch": 23.141564318034906,
"grad_norm": 1.1204190254211426,
"learning_rate": 0.001,
"loss": 1.1262,
"step": 71600
},
{
"epoch": 23.17388493859082,
"grad_norm": 1.0314933061599731,
"learning_rate": 0.001,
"loss": 1.1214,
"step": 71700
},
{
"epoch": 23.206205559146735,
"grad_norm": 1.4279398918151855,
"learning_rate": 0.001,
"loss": 1.1146,
"step": 71800
},
{
"epoch": 23.23852617970265,
"grad_norm": 0.9070366621017456,
"learning_rate": 0.001,
"loss": 1.1344,
"step": 71900
},
{
"epoch": 23.270846800258564,
"grad_norm": 1.1681766510009766,
"learning_rate": 0.001,
"loss": 1.1446,
"step": 72000
},
{
"epoch": 23.30316742081448,
"grad_norm": 1.0091142654418945,
"learning_rate": 0.001,
"loss": 1.1534,
"step": 72100
},
{
"epoch": 23.335488041370393,
"grad_norm": 1.163992166519165,
"learning_rate": 0.001,
"loss": 1.1348,
"step": 72200
},
{
"epoch": 23.367808661926308,
"grad_norm": 1.4471473693847656,
"learning_rate": 0.001,
"loss": 1.1372,
"step": 72300
},
{
"epoch": 23.400129282482222,
"grad_norm": 1.072084665298462,
"learning_rate": 0.001,
"loss": 1.1513,
"step": 72400
},
{
"epoch": 23.432449903038137,
"grad_norm": 1.074723720550537,
"learning_rate": 0.001,
"loss": 1.1345,
"step": 72500
},
{
"epoch": 23.46477052359405,
"grad_norm": 1.3358465433120728,
"learning_rate": 0.001,
"loss": 1.148,
"step": 72600
},
{
"epoch": 23.49709114414997,
"grad_norm": 1.1754719018936157,
"learning_rate": 0.001,
"loss": 1.1531,
"step": 72700
},
{
"epoch": 23.529411764705884,
"grad_norm": 1.1433322429656982,
"learning_rate": 0.001,
"loss": 1.1684,
"step": 72800
},
{
"epoch": 23.5617323852618,
"grad_norm": 1.201206922531128,
"learning_rate": 0.001,
"loss": 1.1648,
"step": 72900
},
{
"epoch": 23.594053005817713,
"grad_norm": 1.3061528205871582,
"learning_rate": 0.001,
"loss": 1.177,
"step": 73000
},
{
"epoch": 23.626373626373628,
"grad_norm": 1.055266261100769,
"learning_rate": 0.001,
"loss": 1.1715,
"step": 73100
},
{
"epoch": 23.658694246929542,
"grad_norm": 1.279536247253418,
"learning_rate": 0.001,
"loss": 1.1822,
"step": 73200
},
{
"epoch": 23.691014867485457,
"grad_norm": 1.0013713836669922,
"learning_rate": 0.001,
"loss": 1.182,
"step": 73300
},
{
"epoch": 23.72333548804137,
"grad_norm": 1.2121005058288574,
"learning_rate": 0.001,
"loss": 1.1937,
"step": 73400
},
{
"epoch": 23.755656108597286,
"grad_norm": 1.1226108074188232,
"learning_rate": 0.001,
"loss": 1.1872,
"step": 73500
},
{
"epoch": 23.7879767291532,
"grad_norm": 1.16231369972229,
"learning_rate": 0.001,
"loss": 1.2158,
"step": 73600
},
{
"epoch": 23.820297349709115,
"grad_norm": 0.9862212538719177,
"learning_rate": 0.001,
"loss": 1.1814,
"step": 73700
},
{
"epoch": 23.85261797026503,
"grad_norm": 1.0801526308059692,
"learning_rate": 0.001,
"loss": 1.1786,
"step": 73800
},
{
"epoch": 23.884938590820944,
"grad_norm": 1.2229857444763184,
"learning_rate": 0.001,
"loss": 1.1822,
"step": 73900
},
{
"epoch": 23.91725921137686,
"grad_norm": 1.4451088905334473,
"learning_rate": 0.001,
"loss": 1.1959,
"step": 74000
},
{
"epoch": 23.949579831932773,
"grad_norm": 0.986847460269928,
"learning_rate": 0.001,
"loss": 1.1885,
"step": 74100
},
{
"epoch": 23.981900452488688,
"grad_norm": 1.1042050123214722,
"learning_rate": 0.001,
"loss": 1.1995,
"step": 74200
},
{
"epoch": 24.014221073044602,
"grad_norm": 0.8930547833442688,
"learning_rate": 0.001,
"loss": 1.1226,
"step": 74300
},
{
"epoch": 24.046541693600517,
"grad_norm": 1.0590393543243408,
"learning_rate": 0.001,
"loss": 1.0869,
"step": 74400
},
{
"epoch": 24.07886231415643,
"grad_norm": 1.775429368019104,
"learning_rate": 0.001,
"loss": 1.0912,
"step": 74500
},
{
"epoch": 24.111182934712346,
"grad_norm": 1.147539734840393,
"learning_rate": 0.001,
"loss": 1.088,
"step": 74600
},
{
"epoch": 24.14350355526826,
"grad_norm": 0.8840130567550659,
"learning_rate": 0.001,
"loss": 1.1231,
"step": 74700
},
{
"epoch": 24.175824175824175,
"grad_norm": 1.4045928716659546,
"learning_rate": 0.001,
"loss": 1.1347,
"step": 74800
},
{
"epoch": 24.20814479638009,
"grad_norm": 1.090484857559204,
"learning_rate": 0.001,
"loss": 1.0942,
"step": 74900
},
{
"epoch": 24.240465416936004,
"grad_norm": 1.1950263977050781,
"learning_rate": 0.001,
"loss": 1.1123,
"step": 75000
},
{
"epoch": 24.27278603749192,
"grad_norm": 2.7916829586029053,
"learning_rate": 0.001,
"loss": 1.113,
"step": 75100
},
{
"epoch": 24.305106658047833,
"grad_norm": 1.1619802713394165,
"learning_rate": 0.001,
"loss": 1.1258,
"step": 75200
},
{
"epoch": 24.337427278603748,
"grad_norm": 0.9073593616485596,
"learning_rate": 0.001,
"loss": 1.1345,
"step": 75300
},
{
"epoch": 24.369747899159663,
"grad_norm": 0.8575296401977539,
"learning_rate": 0.001,
"loss": 1.1108,
"step": 75400
},
{
"epoch": 24.402068519715577,
"grad_norm": 1.386518955230713,
"learning_rate": 0.001,
"loss": 1.1076,
"step": 75500
},
{
"epoch": 24.43438914027149,
"grad_norm": 76.49897003173828,
"learning_rate": 0.001,
"loss": 1.1144,
"step": 75600
},
{
"epoch": 24.466709760827406,
"grad_norm": 0.9959169626235962,
"learning_rate": 0.001,
"loss": 1.1121,
"step": 75700
},
{
"epoch": 24.49903038138332,
"grad_norm": 0.896315336227417,
"learning_rate": 0.001,
"loss": 1.1449,
"step": 75800
},
{
"epoch": 24.53135100193924,
"grad_norm": 0.8647759556770325,
"learning_rate": 0.001,
"loss": 1.1313,
"step": 75900
},
{
"epoch": 24.563671622495153,
"grad_norm": 1.080623984336853,
"learning_rate": 0.001,
"loss": 1.1644,
"step": 76000
},
{
"epoch": 24.595992243051068,
"grad_norm": 1.1319677829742432,
"learning_rate": 0.001,
"loss": 1.1349,
"step": 76100
},
{
"epoch": 24.628312863606983,
"grad_norm": 1.0553691387176514,
"learning_rate": 0.001,
"loss": 1.1447,
"step": 76200
},
{
"epoch": 24.660633484162897,
"grad_norm": 1.051260232925415,
"learning_rate": 0.001,
"loss": 1.1257,
"step": 76300
},
{
"epoch": 24.69295410471881,
"grad_norm": 1.0142831802368164,
"learning_rate": 0.001,
"loss": 1.161,
"step": 76400
},
{
"epoch": 24.725274725274726,
"grad_norm": 1.0020849704742432,
"learning_rate": 0.001,
"loss": 1.1579,
"step": 76500
},
{
"epoch": 24.75759534583064,
"grad_norm": 0.996508002281189,
"learning_rate": 0.001,
"loss": 1.1621,
"step": 76600
},
{
"epoch": 24.789915966386555,
"grad_norm": 1.1945658922195435,
"learning_rate": 0.001,
"loss": 1.1346,
"step": 76700
},
{
"epoch": 24.82223658694247,
"grad_norm": 0.9318341612815857,
"learning_rate": 0.001,
"loss": 1.1534,
"step": 76800
},
{
"epoch": 24.854557207498384,
"grad_norm": 1.0282987356185913,
"learning_rate": 0.001,
"loss": 1.1679,
"step": 76900
},
{
"epoch": 24.8868778280543,
"grad_norm": 0.9928200840950012,
"learning_rate": 0.001,
"loss": 1.1851,
"step": 77000
},
{
"epoch": 24.919198448610214,
"grad_norm": 1.1055632829666138,
"learning_rate": 0.001,
"loss": 1.1779,
"step": 77100
},
{
"epoch": 24.951519069166128,
"grad_norm": 0.9744527339935303,
"learning_rate": 0.001,
"loss": 1.1775,
"step": 77200
},
{
"epoch": 24.983839689722043,
"grad_norm": 1.0894049406051636,
"learning_rate": 0.001,
"loss": 1.1786,
"step": 77300
},
{
"epoch": 25.016160310277957,
"grad_norm": 0.2896379232406616,
"learning_rate": 0.001,
"loss": 1.0473,
"step": 77400
},
{
"epoch": 25.048480930833872,
"grad_norm": 0.6147540211677551,
"learning_rate": 0.001,
"loss": 1.1036,
"step": 77500
},
{
"epoch": 25.080801551389786,
"grad_norm": 0.5519897937774658,
"learning_rate": 0.001,
"loss": 1.0645,
"step": 77600
},
{
"epoch": 25.1131221719457,
"grad_norm": 0.30935660004615784,
"learning_rate": 0.001,
"loss": 1.0736,
"step": 77700
},
{
"epoch": 25.145442792501616,
"grad_norm": 1.2920175790786743,
"learning_rate": 0.001,
"loss": 1.0879,
"step": 77800
},
{
"epoch": 25.17776341305753,
"grad_norm": 0.775704562664032,
"learning_rate": 0.001,
"loss": 1.106,
"step": 77900
},
{
"epoch": 25.210084033613445,
"grad_norm": 0.5069411993026733,
"learning_rate": 0.001,
"loss": 1.096,
"step": 78000
},
{
"epoch": 25.24240465416936,
"grad_norm": 1.0527359247207642,
"learning_rate": 0.001,
"loss": 1.0937,
"step": 78100
},
{
"epoch": 25.274725274725274,
"grad_norm": 1.1386781930923462,
"learning_rate": 0.001,
"loss": 1.0713,
"step": 78200
},
{
"epoch": 25.30704589528119,
"grad_norm": 0.48567479848861694,
"learning_rate": 0.001,
"loss": 1.1076,
"step": 78300
},
{
"epoch": 25.339366515837103,
"grad_norm": 0.3815343379974365,
"learning_rate": 0.001,
"loss": 1.1034,
"step": 78400
},
{
"epoch": 25.371687136393017,
"grad_norm": 0.11455032974481583,
"learning_rate": 0.001,
"loss": 1.1048,
"step": 78500
},
{
"epoch": 25.404007756948932,
"grad_norm": 0.6132304072380066,
"learning_rate": 0.001,
"loss": 1.1235,
"step": 78600
},
{
"epoch": 25.436328377504847,
"grad_norm": 0.564409613609314,
"learning_rate": 0.001,
"loss": 1.1046,
"step": 78700
},
{
"epoch": 25.46864899806076,
"grad_norm": 0.3146139085292816,
"learning_rate": 0.001,
"loss": 1.1204,
"step": 78800
},
{
"epoch": 25.50096961861668,
"grad_norm": 0.21083228290081024,
"learning_rate": 0.001,
"loss": 1.0924,
"step": 78900
},
{
"epoch": 25.533290239172594,
"grad_norm": 0.9692633152008057,
"learning_rate": 0.001,
"loss": 1.1529,
"step": 79000
},
{
"epoch": 25.56561085972851,
"grad_norm": 0.813957929611206,
"learning_rate": 0.001,
"loss": 1.1256,
"step": 79100
},
{
"epoch": 25.597931480284423,
"grad_norm": 1.2155555486679077,
"learning_rate": 0.001,
"loss": 1.1369,
"step": 79200
},
{
"epoch": 25.630252100840337,
"grad_norm": 0.21164648234844208,
"learning_rate": 0.001,
"loss": 1.1224,
"step": 79300
},
{
"epoch": 25.662572721396252,
"grad_norm": 1.0323749780654907,
"learning_rate": 0.001,
"loss": 1.1336,
"step": 79400
},
{
"epoch": 25.694893341952167,
"grad_norm": 0.2119108885526657,
"learning_rate": 0.001,
"loss": 1.1362,
"step": 79500
},
{
"epoch": 25.72721396250808,
"grad_norm": 0.8170724511146545,
"learning_rate": 0.001,
"loss": 1.125,
"step": 79600
},
{
"epoch": 25.759534583063996,
"grad_norm": 0.5977357625961304,
"learning_rate": 0.001,
"loss": 1.1174,
"step": 79700
},
{
"epoch": 25.79185520361991,
"grad_norm": 0.6432121396064758,
"learning_rate": 0.001,
"loss": 1.1451,
"step": 79800
},
{
"epoch": 25.824175824175825,
"grad_norm": 0.4329550862312317,
"learning_rate": 0.001,
"loss": 1.1333,
"step": 79900
},
{
"epoch": 25.85649644473174,
"grad_norm": 0.66637122631073,
"learning_rate": 0.001,
"loss": 1.1565,
"step": 80000
},
{
"epoch": 25.888817065287654,
"grad_norm": 0.4904533922672272,
"learning_rate": 0.001,
"loss": 1.1281,
"step": 80100
},
{
"epoch": 25.92113768584357,
"grad_norm": 0.5748351812362671,
"learning_rate": 0.001,
"loss": 1.1416,
"step": 80200
},
{
"epoch": 25.953458306399483,
"grad_norm": 0.5132641792297363,
"learning_rate": 0.001,
"loss": 1.1453,
"step": 80300
},
{
"epoch": 25.985778926955398,
"grad_norm": 0.8233006596565247,
"learning_rate": 0.001,
"loss": 1.142,
"step": 80400
},
{
"epoch": 26.018099547511312,
"grad_norm": 1.450419306755066,
"learning_rate": 0.001,
"loss": 1.1022,
"step": 80500
},
{
"epoch": 26.050420168067227,
"grad_norm": 0.9929932951927185,
"learning_rate": 0.001,
"loss": 1.0498,
"step": 80600
},
{
"epoch": 26.08274078862314,
"grad_norm": 1.1767561435699463,
"learning_rate": 0.001,
"loss": 1.0544,
"step": 80700
},
{
"epoch": 26.115061409179056,
"grad_norm": 1.4456899166107178,
"learning_rate": 0.001,
"loss": 1.0704,
"step": 80800
},
{
"epoch": 26.14738202973497,
"grad_norm": 1.3941863775253296,
"learning_rate": 0.001,
"loss": 1.0555,
"step": 80900
},
{
"epoch": 26.179702650290885,
"grad_norm": 1.3099164962768555,
"learning_rate": 0.001,
"loss": 1.0503,
"step": 81000
},
{
"epoch": 26.2120232708468,
"grad_norm": 1.0749870538711548,
"learning_rate": 0.001,
"loss": 1.0674,
"step": 81100
},
{
"epoch": 26.244343891402714,
"grad_norm": 1.2630252838134766,
"learning_rate": 0.001,
"loss": 1.069,
"step": 81200
},
{
"epoch": 26.27666451195863,
"grad_norm": 1.1559174060821533,
"learning_rate": 0.001,
"loss": 1.0838,
"step": 81300
},
{
"epoch": 26.308985132514543,
"grad_norm": 1.3933382034301758,
"learning_rate": 0.001,
"loss": 1.0832,
"step": 81400
},
{
"epoch": 26.341305753070458,
"grad_norm": 1.12375807762146,
"learning_rate": 0.001,
"loss": 1.1035,
"step": 81500
},
{
"epoch": 26.373626373626372,
"grad_norm": 1.1551575660705566,
"learning_rate": 0.001,
"loss": 1.0778,
"step": 81600
},
{
"epoch": 26.405946994182287,
"grad_norm": 1.2731724977493286,
"learning_rate": 0.001,
"loss": 1.0918,
"step": 81700
},
{
"epoch": 26.4382676147382,
"grad_norm": 1.1205143928527832,
"learning_rate": 0.001,
"loss": 1.0749,
"step": 81800
},
{
"epoch": 26.470588235294116,
"grad_norm": 1.176575779914856,
"learning_rate": 0.001,
"loss": 1.079,
"step": 81900
},
{
"epoch": 26.50290885585003,
"grad_norm": 1.095289707183838,
"learning_rate": 0.001,
"loss": 1.0939,
"step": 82000
},
{
"epoch": 26.53522947640595,
"grad_norm": 1.2680126428604126,
"learning_rate": 0.001,
"loss": 1.0829,
"step": 82100
},
{
"epoch": 26.567550096961863,
"grad_norm": 1.43263578414917,
"learning_rate": 0.001,
"loss": 1.101,
"step": 82200
},
{
"epoch": 26.599870717517778,
"grad_norm": 1.2441238164901733,
"learning_rate": 0.001,
"loss": 1.107,
"step": 82300
},
{
"epoch": 26.632191338073692,
"grad_norm": 1.180927038192749,
"learning_rate": 0.001,
"loss": 1.0939,
"step": 82400
},
{
"epoch": 26.664511958629607,
"grad_norm": 1.0788034200668335,
"learning_rate": 0.001,
"loss": 1.1133,
"step": 82500
},
{
"epoch": 26.69683257918552,
"grad_norm": 1.4446241855621338,
"learning_rate": 0.001,
"loss": 1.1139,
"step": 82600
},
{
"epoch": 26.729153199741436,
"grad_norm": 1.2069514989852905,
"learning_rate": 0.001,
"loss": 1.1228,
"step": 82700
},
{
"epoch": 26.76147382029735,
"grad_norm": 1.1585434675216675,
"learning_rate": 0.001,
"loss": 1.0998,
"step": 82800
},
{
"epoch": 26.793794440853265,
"grad_norm": 1.341698169708252,
"learning_rate": 0.001,
"loss": 1.1213,
"step": 82900
},
{
"epoch": 26.82611506140918,
"grad_norm": 1.0288625955581665,
"learning_rate": 0.001,
"loss": 1.1245,
"step": 83000
},
{
"epoch": 26.858435681965094,
"grad_norm": 1.1505529880523682,
"learning_rate": 0.001,
"loss": 1.1668,
"step": 83100
},
{
"epoch": 26.89075630252101,
"grad_norm": 1.243490219116211,
"learning_rate": 0.001,
"loss": 1.1389,
"step": 83200
},
{
"epoch": 26.923076923076923,
"grad_norm": 1.4656898975372314,
"learning_rate": 0.001,
"loss": 1.1215,
"step": 83300
},
{
"epoch": 26.955397543632838,
"grad_norm": 1.286278247833252,
"learning_rate": 0.001,
"loss": 1.1255,
"step": 83400
},
{
"epoch": 26.987718164188752,
"grad_norm": 1.291344404220581,
"learning_rate": 0.001,
"loss": 1.1213,
"step": 83500
},
{
"epoch": 27.020038784744667,
"grad_norm": 1.1009141206741333,
"learning_rate": 0.001,
"loss": 1.0931,
"step": 83600
},
{
"epoch": 27.05235940530058,
"grad_norm": 1.21869695186615,
"learning_rate": 0.001,
"loss": 1.0301,
"step": 83700
},
{
"epoch": 27.084680025856496,
"grad_norm": 1.084806203842163,
"learning_rate": 0.001,
"loss": 1.0152,
"step": 83800
},
{
"epoch": 27.11700064641241,
"grad_norm": 1.2538260221481323,
"learning_rate": 0.001,
"loss": 1.0652,
"step": 83900
},
{
"epoch": 27.149321266968325,
"grad_norm": 1.1370879411697388,
"learning_rate": 0.001,
"loss": 1.0567,
"step": 84000
},
{
"epoch": 27.18164188752424,
"grad_norm": 1.0101512670516968,
"learning_rate": 0.001,
"loss": 1.0426,
"step": 84100
},
{
"epoch": 27.213962508080154,
"grad_norm": 1.0626932382583618,
"learning_rate": 0.001,
"loss": 1.0708,
"step": 84200
},
{
"epoch": 27.24628312863607,
"grad_norm": 1.3760011196136475,
"learning_rate": 0.001,
"loss": 1.0445,
"step": 84300
},
{
"epoch": 27.278603749191983,
"grad_norm": 6.08766508102417,
"learning_rate": 0.001,
"loss": 1.0801,
"step": 84400
},
{
"epoch": 27.310924369747898,
"grad_norm": 1.370469570159912,
"learning_rate": 0.001,
"loss": 1.0628,
"step": 84500
},
{
"epoch": 27.343244990303813,
"grad_norm": 1.2071462869644165,
"learning_rate": 0.001,
"loss": 1.091,
"step": 84600
},
{
"epoch": 27.375565610859727,
"grad_norm": 1.372673511505127,
"learning_rate": 0.001,
"loss": 1.0651,
"step": 84700
},
{
"epoch": 27.40788623141564,
"grad_norm": 1.2071818113327026,
"learning_rate": 0.001,
"loss": 1.0622,
"step": 84800
},
{
"epoch": 27.440206851971556,
"grad_norm": 1.0134035348892212,
"learning_rate": 0.001,
"loss": 1.0684,
"step": 84900
},
{
"epoch": 27.47252747252747,
"grad_norm": 1.3614178895950317,
"learning_rate": 0.001,
"loss": 1.0721,
"step": 85000
},
{
"epoch": 27.50484809308339,
"grad_norm": 1.2039440870285034,
"learning_rate": 0.001,
"loss": 1.0645,
"step": 85100
},
{
"epoch": 27.537168713639304,
"grad_norm": 1.2189968824386597,
"learning_rate": 0.001,
"loss": 1.0833,
"step": 85200
},
{
"epoch": 27.569489334195218,
"grad_norm": 1.1108815670013428,
"learning_rate": 0.001,
"loss": 1.0945,
"step": 85300
},
{
"epoch": 27.601809954751133,
"grad_norm": 1.3825979232788086,
"learning_rate": 0.001,
"loss": 1.0647,
"step": 85400
},
{
"epoch": 27.634130575307047,
"grad_norm": 1.2646015882492065,
"learning_rate": 0.001,
"loss": 1.0858,
"step": 85500
},
{
"epoch": 27.66645119586296,
"grad_norm": 1.1898399591445923,
"learning_rate": 0.001,
"loss": 1.081,
"step": 85600
},
{
"epoch": 27.698771816418876,
"grad_norm": 1.1825064420700073,
"learning_rate": 0.001,
"loss": 1.0948,
"step": 85700
},
{
"epoch": 27.73109243697479,
"grad_norm": 1.337872862815857,
"learning_rate": 0.001,
"loss": 1.0779,
"step": 85800
},
{
"epoch": 27.763413057530705,
"grad_norm": 1.146846890449524,
"learning_rate": 0.001,
"loss": 1.0819,
"step": 85900
},
{
"epoch": 27.79573367808662,
"grad_norm": 1.468003511428833,
"learning_rate": 0.001,
"loss": 1.0798,
"step": 86000
},
{
"epoch": 27.828054298642535,
"grad_norm": 1.125246286392212,
"learning_rate": 0.001,
"loss": 1.0924,
"step": 86100
},
{
"epoch": 27.86037491919845,
"grad_norm": 1.1755690574645996,
"learning_rate": 0.001,
"loss": 1.0965,
"step": 86200
},
{
"epoch": 27.892695539754364,
"grad_norm": 1.1506797075271606,
"learning_rate": 0.001,
"loss": 1.1024,
"step": 86300
},
{
"epoch": 27.92501616031028,
"grad_norm": 1.084913730621338,
"learning_rate": 0.001,
"loss": 1.0961,
"step": 86400
},
{
"epoch": 27.957336780866193,
"grad_norm": 1.0553350448608398,
"learning_rate": 0.001,
"loss": 1.1164,
"step": 86500
},
{
"epoch": 27.989657401422107,
"grad_norm": 1.1251089572906494,
"learning_rate": 0.001,
"loss": 1.1268,
"step": 86600
},
{
"epoch": 28.021978021978022,
"grad_norm": 1.3656169176101685,
"learning_rate": 0.001,
"loss": 1.0753,
"step": 86700
},
{
"epoch": 28.054298642533936,
"grad_norm": 1.0576167106628418,
"learning_rate": 0.001,
"loss": 0.9991,
"step": 86800
},
{
"epoch": 28.08661926308985,
"grad_norm": 1.0733096599578857,
"learning_rate": 0.001,
"loss": 1.0239,
"step": 86900
},
{
"epoch": 28.118939883645766,
"grad_norm": 1.265825629234314,
"learning_rate": 0.001,
"loss": 1.0298,
"step": 87000
},
{
"epoch": 28.15126050420168,
"grad_norm": 1.655713438987732,
"learning_rate": 0.001,
"loss": 1.016,
"step": 87100
},
{
"epoch": 28.183581124757595,
"grad_norm": 1.3085395097732544,
"learning_rate": 0.001,
"loss": 1.0343,
"step": 87200
},
{
"epoch": 28.21590174531351,
"grad_norm": 1.0143513679504395,
"learning_rate": 0.001,
"loss": 1.0293,
"step": 87300
},
{
"epoch": 28.248222365869424,
"grad_norm": 1.4002749919891357,
"learning_rate": 0.001,
"loss": 1.0416,
"step": 87400
},
{
"epoch": 28.28054298642534,
"grad_norm": 1.082223892211914,
"learning_rate": 0.001,
"loss": 1.0368,
"step": 87500
},
{
"epoch": 28.312863606981253,
"grad_norm": 1.3155796527862549,
"learning_rate": 0.001,
"loss": 1.051,
"step": 87600
},
{
"epoch": 28.345184227537167,
"grad_norm": 1.3327791690826416,
"learning_rate": 0.001,
"loss": 1.0128,
"step": 87700
},
{
"epoch": 28.377504848093082,
"grad_norm": 0.9752927422523499,
"learning_rate": 0.001,
"loss": 1.0389,
"step": 87800
},
{
"epoch": 28.409825468648997,
"grad_norm": 1.3138093948364258,
"learning_rate": 0.001,
"loss": 1.0696,
"step": 87900
},
{
"epoch": 28.44214608920491,
"grad_norm": 0.9144354462623596,
"learning_rate": 0.001,
"loss": 1.047,
"step": 88000
},
{
"epoch": 28.474466709760826,
"grad_norm": 1.2346453666687012,
"learning_rate": 0.001,
"loss": 1.0398,
"step": 88100
},
{
"epoch": 28.50678733031674,
"grad_norm": 1.703855037689209,
"learning_rate": 0.001,
"loss": 1.0524,
"step": 88200
},
{
"epoch": 28.53910795087266,
"grad_norm": 1.0008935928344727,
"learning_rate": 0.001,
"loss": 1.0512,
"step": 88300
},
{
"epoch": 28.571428571428573,
"grad_norm": 1.1595375537872314,
"learning_rate": 0.001,
"loss": 1.0562,
"step": 88400
},
{
"epoch": 28.603749191984488,
"grad_norm": 0.9839758276939392,
"learning_rate": 0.001,
"loss": 1.0767,
"step": 88500
},
{
"epoch": 28.636069812540402,
"grad_norm": 0.9803606271743774,
"learning_rate": 0.001,
"loss": 1.0523,
"step": 88600
},
{
"epoch": 28.668390433096317,
"grad_norm": 1.0053520202636719,
"learning_rate": 0.001,
"loss": 1.069,
"step": 88700
},
{
"epoch": 28.70071105365223,
"grad_norm": 1.0004795789718628,
"learning_rate": 0.001,
"loss": 1.0402,
"step": 88800
},
{
"epoch": 28.733031674208146,
"grad_norm": 1.1498825550079346,
"learning_rate": 0.001,
"loss": 1.0616,
"step": 88900
},
{
"epoch": 28.76535229476406,
"grad_norm": 1.0905274152755737,
"learning_rate": 0.001,
"loss": 1.0813,
"step": 89000
},
{
"epoch": 28.797672915319975,
"grad_norm": 0.9474394917488098,
"learning_rate": 0.001,
"loss": 1.0742,
"step": 89100
},
{
"epoch": 28.82999353587589,
"grad_norm": 1.319617509841919,
"learning_rate": 0.001,
"loss": 1.0922,
"step": 89200
},
{
"epoch": 28.862314156431804,
"grad_norm": 1.1449226140975952,
"learning_rate": 0.001,
"loss": 1.1161,
"step": 89300
},
{
"epoch": 28.89463477698772,
"grad_norm": 1.1758530139923096,
"learning_rate": 0.001,
"loss": 1.0891,
"step": 89400
},
{
"epoch": 28.926955397543633,
"grad_norm": 1.2047131061553955,
"learning_rate": 0.001,
"loss": 1.0833,
"step": 89500
},
{
"epoch": 28.959276018099548,
"grad_norm": 1.0821508169174194,
"learning_rate": 0.001,
"loss": 1.0829,
"step": 89600
},
{
"epoch": 28.991596638655462,
"grad_norm": 1.1592580080032349,
"learning_rate": 0.001,
"loss": 1.0958,
"step": 89700
},
{
"epoch": 29.023917259211377,
"grad_norm": 1.1290960311889648,
"learning_rate": 0.001,
"loss": 1.0154,
"step": 89800
},
{
"epoch": 29.05623787976729,
"grad_norm": 1.4548909664154053,
"learning_rate": 0.001,
"loss": 0.9883,
"step": 89900
},
{
"epoch": 29.088558500323206,
"grad_norm": 1.4414570331573486,
"learning_rate": 0.001,
"loss": 0.9885,
"step": 90000
},
{
"epoch": 29.12087912087912,
"grad_norm": 1.0668739080429077,
"learning_rate": 0.001,
"loss": 1.0079,
"step": 90100
},
{
"epoch": 29.153199741435035,
"grad_norm": 1.2436254024505615,
"learning_rate": 0.001,
"loss": 1.0177,
"step": 90200
},
{
"epoch": 29.18552036199095,
"grad_norm": 1.1421066522598267,
"learning_rate": 0.001,
"loss": 1.0225,
"step": 90300
},
{
"epoch": 29.217840982546864,
"grad_norm": 0.9146871566772461,
"learning_rate": 0.001,
"loss": 1.0021,
"step": 90400
},
{
"epoch": 29.25016160310278,
"grad_norm": 1.0086992979049683,
"learning_rate": 0.001,
"loss": 1.0366,
"step": 90500
},
{
"epoch": 29.282482223658693,
"grad_norm": 0.9744951128959656,
"learning_rate": 0.001,
"loss": 1.0344,
"step": 90600
},
{
"epoch": 29.314802844214608,
"grad_norm": 1.1832053661346436,
"learning_rate": 0.001,
"loss": 1.0138,
"step": 90700
},
{
"epoch": 29.347123464770522,
"grad_norm": 1.3362852334976196,
"learning_rate": 0.001,
"loss": 1.0176,
"step": 90800
},
{
"epoch": 29.379444085326437,
"grad_norm": 1.1680186986923218,
"learning_rate": 0.001,
"loss": 1.0305,
"step": 90900
},
{
"epoch": 29.41176470588235,
"grad_norm": 1.1634215116500854,
"learning_rate": 0.001,
"loss": 1.0087,
"step": 91000
},
{
"epoch": 29.444085326438266,
"grad_norm": 1.0064040422439575,
"learning_rate": 0.001,
"loss": 1.0162,
"step": 91100
},
{
"epoch": 29.47640594699418,
"grad_norm": 1.3981666564941406,
"learning_rate": 0.001,
"loss": 1.0381,
"step": 91200
},
{
"epoch": 29.5087265675501,
"grad_norm": 1.1178312301635742,
"learning_rate": 0.001,
"loss": 1.0377,
"step": 91300
},
{
"epoch": 29.541047188106013,
"grad_norm": 1.013622522354126,
"learning_rate": 0.001,
"loss": 1.0503,
"step": 91400
},
{
"epoch": 29.573367808661928,
"grad_norm": 1.1112215518951416,
"learning_rate": 0.001,
"loss": 1.0493,
"step": 91500
},
{
"epoch": 29.605688429217842,
"grad_norm": 1.1293482780456543,
"learning_rate": 0.001,
"loss": 1.043,
"step": 91600
},
{
"epoch": 29.638009049773757,
"grad_norm": 1.1543068885803223,
"learning_rate": 0.001,
"loss": 1.0458,
"step": 91700
},
{
"epoch": 29.67032967032967,
"grad_norm": 0.9590059518814087,
"learning_rate": 0.001,
"loss": 1.0692,
"step": 91800
},
{
"epoch": 29.702650290885586,
"grad_norm": 0.9611120820045471,
"learning_rate": 0.001,
"loss": 1.0507,
"step": 91900
},
{
"epoch": 29.7349709114415,
"grad_norm": 1.105459451675415,
"learning_rate": 0.001,
"loss": 1.0477,
"step": 92000
},
{
"epoch": 29.767291531997415,
"grad_norm": 1.2844051122665405,
"learning_rate": 0.001,
"loss": 1.0547,
"step": 92100
},
{
"epoch": 29.79961215255333,
"grad_norm": 1.194689154624939,
"learning_rate": 0.001,
"loss": 1.0415,
"step": 92200
},
{
"epoch": 29.831932773109244,
"grad_norm": 1.2303766012191772,
"learning_rate": 0.001,
"loss": 1.0577,
"step": 92300
},
{
"epoch": 29.86425339366516,
"grad_norm": 1.0361992120742798,
"learning_rate": 0.001,
"loss": 1.0653,
"step": 92400
},
{
"epoch": 29.896574014221073,
"grad_norm": 1.1771466732025146,
"learning_rate": 0.001,
"loss": 1.0595,
"step": 92500
},
{
"epoch": 29.928894634776988,
"grad_norm": 1.1949156522750854,
"learning_rate": 0.001,
"loss": 1.061,
"step": 92600
},
{
"epoch": 29.961215255332903,
"grad_norm": 1.147527813911438,
"learning_rate": 0.001,
"loss": 1.0707,
"step": 92700
},
{
"epoch": 29.993535875888817,
"grad_norm": 1.1559338569641113,
"learning_rate": 0.001,
"loss": 1.0782,
"step": 92800
},
{
"epoch": 30.02585649644473,
"grad_norm": 1.3547965288162231,
"learning_rate": 0.001,
"loss": 1.0003,
"step": 92900
},
{
"epoch": 30.058177117000646,
"grad_norm": 1.4437899589538574,
"learning_rate": 0.001,
"loss": 0.966,
"step": 93000
},
{
"epoch": 30.09049773755656,
"grad_norm": 1.0247992277145386,
"learning_rate": 0.001,
"loss": 0.9779,
"step": 93100
},
{
"epoch": 30.122818358112475,
"grad_norm": 1.0951383113861084,
"learning_rate": 0.001,
"loss": 0.9782,
"step": 93200
},
{
"epoch": 30.15513897866839,
"grad_norm": 1.2668837308883667,
"learning_rate": 0.001,
"loss": 0.977,
"step": 93300
},
{
"epoch": 30.187459599224304,
"grad_norm": 1.2851572036743164,
"learning_rate": 0.001,
"loss": 0.9819,
"step": 93400
},
{
"epoch": 30.21978021978022,
"grad_norm": 1.1531295776367188,
"learning_rate": 0.001,
"loss": 0.9862,
"step": 93500
},
{
"epoch": 30.252100840336134,
"grad_norm": 1.1562260389328003,
"learning_rate": 0.001,
"loss": 0.9874,
"step": 93600
},
{
"epoch": 30.284421460892048,
"grad_norm": 1.5142194032669067,
"learning_rate": 0.001,
"loss": 1.0211,
"step": 93700
},
{
"epoch": 30.316742081447963,
"grad_norm": 1.2942471504211426,
"learning_rate": 0.001,
"loss": 0.9893,
"step": 93800
},
{
"epoch": 30.349062702003877,
"grad_norm": 1.273345708847046,
"learning_rate": 0.001,
"loss": 1.0102,
"step": 93900
},
{
"epoch": 30.381383322559792,
"grad_norm": 1.251236915588379,
"learning_rate": 0.001,
"loss": 1.0103,
"step": 94000
},
{
"epoch": 30.413703943115706,
"grad_norm": 1.4119136333465576,
"learning_rate": 0.001,
"loss": 0.9897,
"step": 94100
},
{
"epoch": 30.44602456367162,
"grad_norm": 1.2509639263153076,
"learning_rate": 0.001,
"loss": 1.0158,
"step": 94200
},
{
"epoch": 30.478345184227535,
"grad_norm": 1.3147063255310059,
"learning_rate": 0.001,
"loss": 1.0271,
"step": 94300
},
{
"epoch": 30.51066580478345,
"grad_norm": 1.2182319164276123,
"learning_rate": 0.001,
"loss": 1.0446,
"step": 94400
},
{
"epoch": 30.542986425339368,
"grad_norm": 3.014864206314087,
"learning_rate": 0.001,
"loss": 1.0268,
"step": 94500
},
{
"epoch": 30.575307045895283,
"grad_norm": 1.1657381057739258,
"learning_rate": 0.001,
"loss": 1.0381,
"step": 94600
},
{
"epoch": 30.607627666451197,
"grad_norm": 1.2702171802520752,
"learning_rate": 0.001,
"loss": 1.022,
"step": 94700
},
{
"epoch": 30.639948287007112,
"grad_norm": 1.3262053728103638,
"learning_rate": 0.001,
"loss": 1.0328,
"step": 94800
},
{
"epoch": 30.672268907563026,
"grad_norm": 1.1520822048187256,
"learning_rate": 0.001,
"loss": 1.0414,
"step": 94900
},
{
"epoch": 30.70458952811894,
"grad_norm": 1.637629747390747,
"learning_rate": 0.001,
"loss": 1.0357,
"step": 95000
},
{
"epoch": 30.736910148674855,
"grad_norm": 1.3296183347702026,
"learning_rate": 0.001,
"loss": 1.0383,
"step": 95100
},
{
"epoch": 30.76923076923077,
"grad_norm": 1.197227120399475,
"learning_rate": 0.001,
"loss": 1.0412,
"step": 95200
},
{
"epoch": 30.801551389786685,
"grad_norm": 1.2573148012161255,
"learning_rate": 0.001,
"loss": 1.0402,
"step": 95300
},
{
"epoch": 30.8338720103426,
"grad_norm": 1.1908835172653198,
"learning_rate": 0.001,
"loss": 1.0467,
"step": 95400
},
{
"epoch": 30.866192630898514,
"grad_norm": 1.181947946548462,
"learning_rate": 0.001,
"loss": 1.0488,
"step": 95500
},
{
"epoch": 30.89851325145443,
"grad_norm": 1.127081036567688,
"learning_rate": 0.001,
"loss": 1.028,
"step": 95600
},
{
"epoch": 30.930833872010343,
"grad_norm": 1.8955832719802856,
"learning_rate": 0.001,
"loss": 1.0532,
"step": 95700
},
{
"epoch": 30.963154492566257,
"grad_norm": 1.178209900856018,
"learning_rate": 0.001,
"loss": 1.0188,
"step": 95800
},
{
"epoch": 30.995475113122172,
"grad_norm": 1.3171616792678833,
"learning_rate": 0.001,
"loss": 1.019,
"step": 95900
},
{
"epoch": 31.027795733678087,
"grad_norm": 0.9879239201545715,
"learning_rate": 0.001,
"loss": 0.9763,
"step": 96000
},
{
"epoch": 31.060116354234,
"grad_norm": 0.9748243093490601,
"learning_rate": 0.001,
"loss": 0.9399,
"step": 96100
},
{
"epoch": 31.092436974789916,
"grad_norm": 1.102300763130188,
"learning_rate": 0.001,
"loss": 0.9548,
"step": 96200
},
{
"epoch": 31.12475759534583,
"grad_norm": 1.0693408250808716,
"learning_rate": 0.001,
"loss": 0.964,
"step": 96300
},
{
"epoch": 31.157078215901745,
"grad_norm": 1.0911110639572144,
"learning_rate": 0.001,
"loss": 0.9647,
"step": 96400
},
{
"epoch": 31.18939883645766,
"grad_norm": 1.2917101383209229,
"learning_rate": 0.001,
"loss": 0.9822,
"step": 96500
},
{
"epoch": 31.221719457013574,
"grad_norm": 1.0852818489074707,
"learning_rate": 0.001,
"loss": 0.9762,
"step": 96600
},
{
"epoch": 31.25404007756949,
"grad_norm": 1.5321288108825684,
"learning_rate": 0.001,
"loss": 0.9643,
"step": 96700
},
{
"epoch": 31.286360698125403,
"grad_norm": 1.1907495260238647,
"learning_rate": 0.001,
"loss": 0.9786,
"step": 96800
},
{
"epoch": 31.318681318681318,
"grad_norm": 1.236419439315796,
"learning_rate": 0.001,
"loss": 1.0093,
"step": 96900
},
{
"epoch": 31.351001939237232,
"grad_norm": 1.4393523931503296,
"learning_rate": 0.001,
"loss": 0.9868,
"step": 97000
},
{
"epoch": 31.383322559793147,
"grad_norm": 1.1615034341812134,
"learning_rate": 0.001,
"loss": 0.9932,
"step": 97100
},
{
"epoch": 31.41564318034906,
"grad_norm": 1.4453788995742798,
"learning_rate": 0.001,
"loss": 0.9853,
"step": 97200
},
{
"epoch": 31.447963800904976,
"grad_norm": 1.1026290655136108,
"learning_rate": 0.001,
"loss": 0.9995,
"step": 97300
},
{
"epoch": 31.48028442146089,
"grad_norm": 1.1974660158157349,
"learning_rate": 0.001,
"loss": 0.9895,
"step": 97400
},
{
"epoch": 31.51260504201681,
"grad_norm": 1.2898255586624146,
"learning_rate": 0.001,
"loss": 1.0166,
"step": 97500
},
{
"epoch": 31.544925662572723,
"grad_norm": 1.1666028499603271,
"learning_rate": 0.001,
"loss": 1.0055,
"step": 97600
},
{
"epoch": 31.577246283128638,
"grad_norm": 1.4688661098480225,
"learning_rate": 0.001,
"loss": 0.9944,
"step": 97700
},
{
"epoch": 31.609566903684552,
"grad_norm": 1.2777717113494873,
"learning_rate": 0.001,
"loss": 1.0156,
"step": 97800
},
{
"epoch": 31.641887524240467,
"grad_norm": 1.1962950229644775,
"learning_rate": 0.001,
"loss": 1.0111,
"step": 97900
},
{
"epoch": 31.67420814479638,
"grad_norm": 1.0366114377975464,
"learning_rate": 0.001,
"loss": 1.012,
"step": 98000
},
{
"epoch": 31.706528765352296,
"grad_norm": 1.0451583862304688,
"learning_rate": 0.001,
"loss": 1.0146,
"step": 98100
},
{
"epoch": 31.73884938590821,
"grad_norm": 1.020317554473877,
"learning_rate": 0.001,
"loss": 1.0263,
"step": 98200
},
{
"epoch": 31.771170006464125,
"grad_norm": 1.1409897804260254,
"learning_rate": 0.001,
"loss": 1.0167,
"step": 98300
},
{
"epoch": 31.80349062702004,
"grad_norm": 1.5148199796676636,
"learning_rate": 0.001,
"loss": 1.0359,
"step": 98400
},
{
"epoch": 31.835811247575954,
"grad_norm": 1.0931096076965332,
"learning_rate": 0.001,
"loss": 1.0176,
"step": 98500
},
{
"epoch": 31.86813186813187,
"grad_norm": 1.3822332620620728,
"learning_rate": 0.001,
"loss": 1.0394,
"step": 98600
},
{
"epoch": 31.900452488687783,
"grad_norm": 1.0372686386108398,
"learning_rate": 0.001,
"loss": 1.0125,
"step": 98700
},
{
"epoch": 31.932773109243698,
"grad_norm": 1.1177948713302612,
"learning_rate": 0.001,
"loss": 1.0355,
"step": 98800
},
{
"epoch": 31.965093729799612,
"grad_norm": 22.84385108947754,
"learning_rate": 0.001,
"loss": 1.0112,
"step": 98900
},
{
"epoch": 31.997414350355527,
"grad_norm": 1.431148648262024,
"learning_rate": 0.001,
"loss": 1.0157,
"step": 99000
},
{
"epoch": 32.02973497091144,
"grad_norm": 1.1543240547180176,
"learning_rate": 0.001,
"loss": 0.9505,
"step": 99100
},
{
"epoch": 32.062055591467356,
"grad_norm": 1.2091968059539795,
"learning_rate": 0.001,
"loss": 0.9292,
"step": 99200
},
{
"epoch": 32.09437621202327,
"grad_norm": 1.3215097188949585,
"learning_rate": 0.001,
"loss": 0.9619,
"step": 99300
},
{
"epoch": 32.126696832579185,
"grad_norm": 1.0096403360366821,
"learning_rate": 0.001,
"loss": 0.9312,
"step": 99400
},
{
"epoch": 32.1590174531351,
"grad_norm": 1.2567592859268188,
"learning_rate": 0.001,
"loss": 0.9507,
"step": 99500
},
{
"epoch": 32.191338073691014,
"grad_norm": 1.2970302104949951,
"learning_rate": 0.001,
"loss": 0.9646,
"step": 99600
},
{
"epoch": 32.22365869424693,
"grad_norm": 1.2226184606552124,
"learning_rate": 0.001,
"loss": 0.9644,
"step": 99700
},
{
"epoch": 32.25597931480284,
"grad_norm": 1.4690126180648804,
"learning_rate": 0.001,
"loss": 0.9591,
"step": 99800
},
{
"epoch": 32.28829993535876,
"grad_norm": 5.503747463226318,
"learning_rate": 0.001,
"loss": 0.9596,
"step": 99900
},
{
"epoch": 32.32062055591467,
"grad_norm": 1.5522727966308594,
"learning_rate": 0.001,
"loss": 0.9552,
"step": 100000
},
{
"epoch": 32.35294117647059,
"grad_norm": 1.183016061782837,
"learning_rate": 0.001,
"loss": 0.9799,
"step": 100100
},
{
"epoch": 32.3852617970265,
"grad_norm": 1.1440593004226685,
"learning_rate": 0.001,
"loss": 0.9745,
"step": 100200
},
{
"epoch": 32.417582417582416,
"grad_norm": 1.1371045112609863,
"learning_rate": 0.001,
"loss": 0.9723,
"step": 100300
},
{
"epoch": 32.44990303813833,
"grad_norm": 1.2485049962997437,
"learning_rate": 0.001,
"loss": 0.9701,
"step": 100400
},
{
"epoch": 32.482223658694245,
"grad_norm": 1.0270694494247437,
"learning_rate": 0.001,
"loss": 0.9945,
"step": 100500
},
{
"epoch": 32.51454427925016,
"grad_norm": 1.3093137741088867,
"learning_rate": 0.001,
"loss": 0.9781,
"step": 100600
},
{
"epoch": 32.546864899806074,
"grad_norm": 0.982565701007843,
"learning_rate": 0.001,
"loss": 0.9586,
"step": 100700
},
{
"epoch": 32.57918552036199,
"grad_norm": 1.180829644203186,
"learning_rate": 0.001,
"loss": 1.0096,
"step": 100800
},
{
"epoch": 32.6115061409179,
"grad_norm": 1.8596967458724976,
"learning_rate": 0.001,
"loss": 1.0101,
"step": 100900
},
{
"epoch": 32.64382676147382,
"grad_norm": 1.1227686405181885,
"learning_rate": 0.001,
"loss": 0.9968,
"step": 101000
},
{
"epoch": 32.67614738202973,
"grad_norm": 1.3988664150238037,
"learning_rate": 0.001,
"loss": 0.9934,
"step": 101100
},
{
"epoch": 32.70846800258565,
"grad_norm": 1.0867431163787842,
"learning_rate": 0.001,
"loss": 1.0071,
"step": 101200
},
{
"epoch": 32.74078862314156,
"grad_norm": 1.2575784921646118,
"learning_rate": 0.001,
"loss": 1.0145,
"step": 101300
},
{
"epoch": 32.773109243697476,
"grad_norm": 1.1998168230056763,
"learning_rate": 0.001,
"loss": 0.9904,
"step": 101400
},
{
"epoch": 32.80542986425339,
"grad_norm": 0.9470672011375427,
"learning_rate": 0.001,
"loss": 1.0114,
"step": 101500
},
{
"epoch": 32.837750484809305,
"grad_norm": 1.0151934623718262,
"learning_rate": 0.001,
"loss": 1.0152,
"step": 101600
},
{
"epoch": 32.87007110536522,
"grad_norm": 1.1765100955963135,
"learning_rate": 0.001,
"loss": 1.023,
"step": 101700
},
{
"epoch": 32.902391725921134,
"grad_norm": 1.2009962797164917,
"learning_rate": 0.001,
"loss": 0.9987,
"step": 101800
},
{
"epoch": 32.93471234647705,
"grad_norm": 1.1376994848251343,
"learning_rate": 0.001,
"loss": 0.9883,
"step": 101900
},
{
"epoch": 32.967032967032964,
"grad_norm": 0.9074110984802246,
"learning_rate": 0.001,
"loss": 1.0078,
"step": 102000
},
{
"epoch": 32.999353587588885,
"grad_norm": 0.8598896861076355,
"learning_rate": 0.001,
"loss": 0.9777,
"step": 102100
},
{
"epoch": 33.0316742081448,
"grad_norm": 0.7127558588981628,
"learning_rate": 0.001,
"loss": 0.9143,
"step": 102200
},
{
"epoch": 33.063994828700714,
"grad_norm": 0.7595661878585815,
"learning_rate": 0.001,
"loss": 0.9234,
"step": 102300
},
{
"epoch": 33.09631544925663,
"grad_norm": 0.8373937010765076,
"learning_rate": 0.001,
"loss": 0.9316,
"step": 102400
},
{
"epoch": 33.12863606981254,
"grad_norm": 1.197033166885376,
"learning_rate": 0.001,
"loss": 0.949,
"step": 102500
},
{
"epoch": 33.16095669036846,
"grad_norm": 0.6342483758926392,
"learning_rate": 0.001,
"loss": 0.9397,
"step": 102600
},
{
"epoch": 33.19327731092437,
"grad_norm": 1.198174238204956,
"learning_rate": 0.001,
"loss": 0.9458,
"step": 102700
},
{
"epoch": 33.22559793148029,
"grad_norm": 0.8614729642868042,
"learning_rate": 0.001,
"loss": 0.9565,
"step": 102800
},
{
"epoch": 33.2579185520362,
"grad_norm": 0.9009416699409485,
"learning_rate": 0.001,
"loss": 0.9174,
"step": 102900
},
{
"epoch": 33.290239172592116,
"grad_norm": 1.1099083423614502,
"learning_rate": 0.001,
"loss": 0.9521,
"step": 103000
},
{
"epoch": 33.32255979314803,
"grad_norm": 0.8563976883888245,
"learning_rate": 0.001,
"loss": 0.9295,
"step": 103100
},
{
"epoch": 33.354880413703945,
"grad_norm": 0.8638460040092468,
"learning_rate": 0.001,
"loss": 0.9511,
"step": 103200
},
{
"epoch": 33.38720103425986,
"grad_norm": 0.7926732301712036,
"learning_rate": 0.001,
"loss": 0.9582,
"step": 103300
},
{
"epoch": 33.419521654815775,
"grad_norm": 0.6091955900192261,
"learning_rate": 0.001,
"loss": 0.948,
"step": 103400
},
{
"epoch": 33.45184227537169,
"grad_norm": 1.9351041316986084,
"learning_rate": 0.001,
"loss": 0.9555,
"step": 103500
},
{
"epoch": 33.484162895927604,
"grad_norm": 0.9850326776504517,
"learning_rate": 0.001,
"loss": 0.945,
"step": 103600
},
{
"epoch": 33.51648351648352,
"grad_norm": 1.0357599258422852,
"learning_rate": 0.001,
"loss": 0.975,
"step": 103700
},
{
"epoch": 33.54880413703943,
"grad_norm": 0.6513636112213135,
"learning_rate": 0.001,
"loss": 0.9802,
"step": 103800
},
{
"epoch": 33.58112475759535,
"grad_norm": 1.0478070974349976,
"learning_rate": 0.001,
"loss": 0.9791,
"step": 103900
},
{
"epoch": 33.61344537815126,
"grad_norm": 0.8873677253723145,
"learning_rate": 0.001,
"loss": 0.975,
"step": 104000
},
{
"epoch": 33.645765998707176,
"grad_norm": 0.8857697248458862,
"learning_rate": 0.001,
"loss": 0.9868,
"step": 104100
},
{
"epoch": 33.67808661926309,
"grad_norm": 0.8290280699729919,
"learning_rate": 0.001,
"loss": 0.9874,
"step": 104200
},
{
"epoch": 33.710407239819006,
"grad_norm": 1.063020944595337,
"learning_rate": 0.001,
"loss": 0.9847,
"step": 104300
},
{
"epoch": 33.74272786037492,
"grad_norm": 0.8536366820335388,
"learning_rate": 0.001,
"loss": 0.9826,
"step": 104400
},
{
"epoch": 33.775048480930835,
"grad_norm": 1.7910808324813843,
"learning_rate": 0.001,
"loss": 1.0049,
"step": 104500
},
{
"epoch": 33.80736910148675,
"grad_norm": 0.9663560390472412,
"learning_rate": 0.001,
"loss": 0.9915,
"step": 104600
},
{
"epoch": 33.839689722042664,
"grad_norm": 0.8727264404296875,
"learning_rate": 0.001,
"loss": 1.0047,
"step": 104700
},
{
"epoch": 33.87201034259858,
"grad_norm": 1.2965424060821533,
"learning_rate": 0.001,
"loss": 0.9907,
"step": 104800
},
{
"epoch": 33.90433096315449,
"grad_norm": 1.0414011478424072,
"learning_rate": 0.001,
"loss": 1.0034,
"step": 104900
},
{
"epoch": 33.93665158371041,
"grad_norm": 0.7418168783187866,
"learning_rate": 0.001,
"loss": 0.9997,
"step": 105000
},
{
"epoch": 33.96897220426632,
"grad_norm": 0.7445783615112305,
"learning_rate": 0.001,
"loss": 0.9919,
"step": 105100
},
{
"epoch": 34.00129282482224,
"grad_norm": 1.2291535139083862,
"learning_rate": 0.001,
"loss": 0.9792,
"step": 105200
},
{
"epoch": 34.03361344537815,
"grad_norm": 1.6661193370819092,
"learning_rate": 0.001,
"loss": 0.9099,
"step": 105300
},
{
"epoch": 34.065934065934066,
"grad_norm": 1.3818449974060059,
"learning_rate": 0.001,
"loss": 0.9103,
"step": 105400
},
{
"epoch": 34.09825468648998,
"grad_norm": 1.2478028535842896,
"learning_rate": 0.001,
"loss": 0.9196,
"step": 105500
},
{
"epoch": 34.130575307045895,
"grad_norm": 1.4901819229125977,
"learning_rate": 0.001,
"loss": 0.9162,
"step": 105600
},
{
"epoch": 34.16289592760181,
"grad_norm": 1.3518427610397339,
"learning_rate": 0.001,
"loss": 0.9214,
"step": 105700
},
{
"epoch": 34.195216548157724,
"grad_norm": 1.1966758966445923,
"learning_rate": 0.001,
"loss": 0.921,
"step": 105800
},
{
"epoch": 34.22753716871364,
"grad_norm": 1.0546095371246338,
"learning_rate": 0.001,
"loss": 0.9203,
"step": 105900
},
{
"epoch": 34.25985778926955,
"grad_norm": 1.6791573762893677,
"learning_rate": 0.001,
"loss": 0.9263,
"step": 106000
},
{
"epoch": 34.29217840982547,
"grad_norm": 1.7650243043899536,
"learning_rate": 0.001,
"loss": 0.9263,
"step": 106100
},
{
"epoch": 34.32449903038138,
"grad_norm": 1.4087214469909668,
"learning_rate": 0.001,
"loss": 0.9372,
"step": 106200
},
{
"epoch": 34.3568196509373,
"grad_norm": 1.335076928138733,
"learning_rate": 0.001,
"loss": 0.9456,
"step": 106300
},
{
"epoch": 34.38914027149321,
"grad_norm": 1.5549242496490479,
"learning_rate": 0.001,
"loss": 0.9418,
"step": 106400
},
{
"epoch": 34.421460892049126,
"grad_norm": 2.1881766319274902,
"learning_rate": 0.001,
"loss": 0.9424,
"step": 106500
},
{
"epoch": 34.45378151260504,
"grad_norm": 1.419062614440918,
"learning_rate": 0.001,
"loss": 0.9557,
"step": 106600
},
{
"epoch": 34.486102133160955,
"grad_norm": 1.5608348846435547,
"learning_rate": 0.001,
"loss": 0.9554,
"step": 106700
},
{
"epoch": 34.51842275371687,
"grad_norm": 1.1787161827087402,
"learning_rate": 0.001,
"loss": 0.934,
"step": 106800
},
{
"epoch": 34.550743374272784,
"grad_norm": 1.2317980527877808,
"learning_rate": 0.001,
"loss": 0.9724,
"step": 106900
},
{
"epoch": 34.5830639948287,
"grad_norm": 1.6141093969345093,
"learning_rate": 0.001,
"loss": 0.9421,
"step": 107000
},
{
"epoch": 34.61538461538461,
"grad_norm": 1.3813108205795288,
"learning_rate": 0.001,
"loss": 0.9603,
"step": 107100
},
{
"epoch": 34.64770523594053,
"grad_norm": 1.9711265563964844,
"learning_rate": 0.001,
"loss": 0.9639,
"step": 107200
},
{
"epoch": 34.68002585649644,
"grad_norm": 1.3804035186767578,
"learning_rate": 0.001,
"loss": 0.9641,
"step": 107300
},
{
"epoch": 34.71234647705236,
"grad_norm": 1.8484543561935425,
"learning_rate": 0.001,
"loss": 0.9766,
"step": 107400
},
{
"epoch": 34.74466709760827,
"grad_norm": 1.755317211151123,
"learning_rate": 0.001,
"loss": 0.9875,
"step": 107500
},
{
"epoch": 34.776987718164186,
"grad_norm": 1.6666924953460693,
"learning_rate": 0.001,
"loss": 0.9852,
"step": 107600
},
{
"epoch": 34.8093083387201,
"grad_norm": 1.394085168838501,
"learning_rate": 0.001,
"loss": 0.9903,
"step": 107700
},
{
"epoch": 34.841628959276015,
"grad_norm": 1.8693289756774902,
"learning_rate": 0.001,
"loss": 0.9803,
"step": 107800
},
{
"epoch": 34.87394957983193,
"grad_norm": 1.5679796934127808,
"learning_rate": 0.001,
"loss": 0.989,
"step": 107900
},
{
"epoch": 34.906270200387844,
"grad_norm": 1.6920912265777588,
"learning_rate": 0.001,
"loss": 0.9789,
"step": 108000
},
{
"epoch": 34.93859082094376,
"grad_norm": 1.1828668117523193,
"learning_rate": 0.001,
"loss": 0.9783,
"step": 108100
},
{
"epoch": 34.97091144149967,
"grad_norm": 1.4724658727645874,
"learning_rate": 0.001,
"loss": 0.9847,
"step": 108200
},
{
"epoch": 35.003232062055595,
"grad_norm": 1.3865439891815186,
"learning_rate": 0.001,
"loss": 0.9699,
"step": 108300
},
{
"epoch": 35.03555268261151,
"grad_norm": 1.377656102180481,
"learning_rate": 0.001,
"loss": 0.8899,
"step": 108400
},
{
"epoch": 35.067873303167424,
"grad_norm": 1.2798742055892944,
"learning_rate": 0.001,
"loss": 0.9105,
"step": 108500
},
{
"epoch": 35.10019392372334,
"grad_norm": 1.2324934005737305,
"learning_rate": 0.001,
"loss": 0.8821,
"step": 108600
},
{
"epoch": 35.13251454427925,
"grad_norm": 1.3357598781585693,
"learning_rate": 0.001,
"loss": 0.8884,
"step": 108700
},
{
"epoch": 35.16483516483517,
"grad_norm": 1.5128265619277954,
"learning_rate": 0.001,
"loss": 0.9217,
"step": 108800
},
{
"epoch": 35.19715578539108,
"grad_norm": 1.328216314315796,
"learning_rate": 0.001,
"loss": 0.9182,
"step": 108900
},
{
"epoch": 35.229476405947,
"grad_norm": 1.1615173816680908,
"learning_rate": 0.001,
"loss": 0.8841,
"step": 109000
},
{
"epoch": 35.26179702650291,
"grad_norm": 1.154482364654541,
"learning_rate": 0.001,
"loss": 0.9154,
"step": 109100
},
{
"epoch": 35.294117647058826,
"grad_norm": 1.2462170124053955,
"learning_rate": 0.001,
"loss": 0.9134,
"step": 109200
},
{
"epoch": 35.32643826761474,
"grad_norm": 1.3430410623550415,
"learning_rate": 0.001,
"loss": 0.9442,
"step": 109300
},
{
"epoch": 35.358758888170655,
"grad_norm": 1.0239200592041016,
"learning_rate": 0.001,
"loss": 0.9379,
"step": 109400
},
{
"epoch": 35.39107950872657,
"grad_norm": 1.3149348497390747,
"learning_rate": 0.001,
"loss": 0.9362,
"step": 109500
},
{
"epoch": 35.423400129282484,
"grad_norm": 1.2154160737991333,
"learning_rate": 0.001,
"loss": 0.9421,
"step": 109600
},
{
"epoch": 35.4557207498384,
"grad_norm": 1.1552187204360962,
"learning_rate": 0.001,
"loss": 0.9314,
"step": 109700
},
{
"epoch": 35.48804137039431,
"grad_norm": 1.5669306516647339,
"learning_rate": 0.001,
"loss": 0.9438,
"step": 109800
},
{
"epoch": 35.52036199095023,
"grad_norm": 1.1329694986343384,
"learning_rate": 0.001,
"loss": 0.9255,
"step": 109900
},
{
"epoch": 35.55268261150614,
"grad_norm": 1.1270161867141724,
"learning_rate": 0.001,
"loss": 0.9267,
"step": 110000
},
{
"epoch": 35.58500323206206,
"grad_norm": 1.4306011199951172,
"learning_rate": 0.001,
"loss": 0.9465,
"step": 110100
},
{
"epoch": 35.61732385261797,
"grad_norm": 1.260940670967102,
"learning_rate": 0.001,
"loss": 0.9495,
"step": 110200
},
{
"epoch": 35.649644473173886,
"grad_norm": 1.591579794883728,
"learning_rate": 0.001,
"loss": 0.9466,
"step": 110300
},
{
"epoch": 35.6819650937298,
"grad_norm": 1.207124948501587,
"learning_rate": 0.001,
"loss": 0.9448,
"step": 110400
},
{
"epoch": 35.714285714285715,
"grad_norm": 1.2340648174285889,
"learning_rate": 0.001,
"loss": 0.9448,
"step": 110500
},
{
"epoch": 35.74660633484163,
"grad_norm": 1.2188169956207275,
"learning_rate": 0.001,
"loss": 0.962,
"step": 110600
},
{
"epoch": 35.778926955397544,
"grad_norm": 1.1369333267211914,
"learning_rate": 0.001,
"loss": 0.9493,
"step": 110700
},
{
"epoch": 35.81124757595346,
"grad_norm": 1.1521050930023193,
"learning_rate": 0.001,
"loss": 0.9591,
"step": 110800
},
{
"epoch": 35.84356819650937,
"grad_norm": 1.3265663385391235,
"learning_rate": 0.001,
"loss": 0.962,
"step": 110900
},
{
"epoch": 35.87588881706529,
"grad_norm": 1.1715490818023682,
"learning_rate": 0.001,
"loss": 0.9482,
"step": 111000
},
{
"epoch": 35.9082094376212,
"grad_norm": 1.5694321393966675,
"learning_rate": 0.001,
"loss": 0.9593,
"step": 111100
},
{
"epoch": 35.94053005817712,
"grad_norm": 1.4879381656646729,
"learning_rate": 0.001,
"loss": 0.9633,
"step": 111200
},
{
"epoch": 35.97285067873303,
"grad_norm": 1.109749674797058,
"learning_rate": 0.001,
"loss": 0.9738,
"step": 111300
},
{
"epoch": 36.005171299288946,
"grad_norm": 1.32253098487854,
"learning_rate": 0.001,
"loss": 0.974,
"step": 111400
},
{
"epoch": 36.03749191984486,
"grad_norm": 1.4396809339523315,
"learning_rate": 0.001,
"loss": 0.8579,
"step": 111500
},
{
"epoch": 36.069812540400775,
"grad_norm": 1.4428057670593262,
"learning_rate": 0.001,
"loss": 0.8854,
"step": 111600
},
{
"epoch": 36.10213316095669,
"grad_norm": 1.3725905418395996,
"learning_rate": 0.001,
"loss": 0.9104,
"step": 111700
},
{
"epoch": 36.134453781512605,
"grad_norm": 1.1569488048553467,
"learning_rate": 0.001,
"loss": 0.8795,
"step": 111800
},
{
"epoch": 36.16677440206852,
"grad_norm": 1.3119192123413086,
"learning_rate": 0.001,
"loss": 0.8881,
"step": 111900
},
{
"epoch": 36.199095022624434,
"grad_norm": 1.108713984489441,
"learning_rate": 0.001,
"loss": 0.8895,
"step": 112000
},
{
"epoch": 36.23141564318035,
"grad_norm": 1.3103444576263428,
"learning_rate": 0.001,
"loss": 0.884,
"step": 112100
},
{
"epoch": 36.26373626373626,
"grad_norm": 1.0280040502548218,
"learning_rate": 0.001,
"loss": 0.8962,
"step": 112200
},
{
"epoch": 36.29605688429218,
"grad_norm": 1.3514018058776855,
"learning_rate": 0.001,
"loss": 0.8936,
"step": 112300
},
{
"epoch": 36.32837750484809,
"grad_norm": 1.2298413515090942,
"learning_rate": 0.001,
"loss": 0.8838,
"step": 112400
},
{
"epoch": 36.36069812540401,
"grad_norm": 1.0672255754470825,
"learning_rate": 0.001,
"loss": 0.9085,
"step": 112500
},
{
"epoch": 36.39301874595992,
"grad_norm": 1.4790087938308716,
"learning_rate": 0.001,
"loss": 0.9262,
"step": 112600
},
{
"epoch": 36.425339366515836,
"grad_norm": 1.1427301168441772,
"learning_rate": 0.001,
"loss": 0.9084,
"step": 112700
},
{
"epoch": 36.45765998707175,
"grad_norm": 1.0541187524795532,
"learning_rate": 0.001,
"loss": 0.9191,
"step": 112800
},
{
"epoch": 36.489980607627665,
"grad_norm": 1.4663690328598022,
"learning_rate": 0.001,
"loss": 0.9199,
"step": 112900
},
{
"epoch": 36.52230122818358,
"grad_norm": 20.26365089416504,
"learning_rate": 0.001,
"loss": 0.943,
"step": 113000
},
{
"epoch": 36.554621848739494,
"grad_norm": 1.370599389076233,
"learning_rate": 0.001,
"loss": 0.9374,
"step": 113100
},
{
"epoch": 36.58694246929541,
"grad_norm": 1.2954767942428589,
"learning_rate": 0.001,
"loss": 0.9299,
"step": 113200
},
{
"epoch": 36.61926308985132,
"grad_norm": 1.1597362756729126,
"learning_rate": 0.001,
"loss": 0.9112,
"step": 113300
},
{
"epoch": 36.65158371040724,
"grad_norm": 1.1877658367156982,
"learning_rate": 0.001,
"loss": 0.9379,
"step": 113400
},
{
"epoch": 36.68390433096315,
"grad_norm": 1.0057965517044067,
"learning_rate": 0.001,
"loss": 0.9497,
"step": 113500
},
{
"epoch": 36.71622495151907,
"grad_norm": 1.238929033279419,
"learning_rate": 0.001,
"loss": 0.9472,
"step": 113600
},
{
"epoch": 36.74854557207498,
"grad_norm": 1.2580220699310303,
"learning_rate": 0.001,
"loss": 0.9394,
"step": 113700
},
{
"epoch": 36.780866192630896,
"grad_norm": 1.4125406742095947,
"learning_rate": 0.001,
"loss": 0.9507,
"step": 113800
},
{
"epoch": 36.81318681318681,
"grad_norm": 1.0683133602142334,
"learning_rate": 0.001,
"loss": 0.9365,
"step": 113900
},
{
"epoch": 36.845507433742725,
"grad_norm": 1.269522786140442,
"learning_rate": 0.001,
"loss": 0.9508,
"step": 114000
},
{
"epoch": 36.87782805429864,
"grad_norm": 1.5544761419296265,
"learning_rate": 0.001,
"loss": 0.9311,
"step": 114100
},
{
"epoch": 36.910148674854554,
"grad_norm": 1.7500112056732178,
"learning_rate": 0.001,
"loss": 0.9612,
"step": 114200
},
{
"epoch": 36.94246929541047,
"grad_norm": 1.395186185836792,
"learning_rate": 0.001,
"loss": 0.9531,
"step": 114300
},
{
"epoch": 36.97478991596638,
"grad_norm": 1.5248706340789795,
"learning_rate": 0.001,
"loss": 0.9398,
"step": 114400
},
{
"epoch": 37.007110536522305,
"grad_norm": 1.1058337688446045,
"learning_rate": 0.001,
"loss": 0.9588,
"step": 114500
},
{
"epoch": 37.03943115707822,
"grad_norm": 1.2561908960342407,
"learning_rate": 0.001,
"loss": 0.8687,
"step": 114600
},
{
"epoch": 37.071751777634134,
"grad_norm": 1.2578023672103882,
"learning_rate": 0.001,
"loss": 0.8513,
"step": 114700
},
{
"epoch": 37.10407239819005,
"grad_norm": 1.7073860168457031,
"learning_rate": 0.001,
"loss": 0.8689,
"step": 114800
},
{
"epoch": 37.13639301874596,
"grad_norm": 1.2335535287857056,
"learning_rate": 0.001,
"loss": 0.8638,
"step": 114900
},
{
"epoch": 37.16871363930188,
"grad_norm": 1.389021396636963,
"learning_rate": 0.001,
"loss": 0.8734,
"step": 115000
},
{
"epoch": 37.20103425985779,
"grad_norm": 1.0842416286468506,
"learning_rate": 0.001,
"loss": 0.8801,
"step": 115100
},
{
"epoch": 37.23335488041371,
"grad_norm": 1.200750708580017,
"learning_rate": 0.001,
"loss": 0.8774,
"step": 115200
},
{
"epoch": 37.26567550096962,
"grad_norm": 1.2029190063476562,
"learning_rate": 0.001,
"loss": 0.893,
"step": 115300
},
{
"epoch": 37.297996121525536,
"grad_norm": 1.0285815000534058,
"learning_rate": 0.001,
"loss": 0.9137,
"step": 115400
},
{
"epoch": 37.33031674208145,
"grad_norm": 1.4431654214859009,
"learning_rate": 0.001,
"loss": 0.9109,
"step": 115500
},
{
"epoch": 37.362637362637365,
"grad_norm": 1.1851094961166382,
"learning_rate": 0.001,
"loss": 0.8865,
"step": 115600
},
{
"epoch": 37.39495798319328,
"grad_norm": 1.4423298835754395,
"learning_rate": 0.001,
"loss": 0.9152,
"step": 115700
},
{
"epoch": 37.427278603749194,
"grad_norm": 1.3550646305084229,
"learning_rate": 0.001,
"loss": 0.9074,
"step": 115800
},
{
"epoch": 37.45959922430511,
"grad_norm": 1.1404973268508911,
"learning_rate": 0.001,
"loss": 0.9146,
"step": 115900
},
{
"epoch": 37.49191984486102,
"grad_norm": 1.348008632659912,
"learning_rate": 0.001,
"loss": 0.9181,
"step": 116000
},
{
"epoch": 37.52424046541694,
"grad_norm": 1.3652067184448242,
"learning_rate": 0.001,
"loss": 0.9212,
"step": 116100
},
{
"epoch": 37.55656108597285,
"grad_norm": 1.0638148784637451,
"learning_rate": 0.001,
"loss": 0.9181,
"step": 116200
},
{
"epoch": 37.58888170652877,
"grad_norm": 1.0108212232589722,
"learning_rate": 0.001,
"loss": 0.9068,
"step": 116300
},
{
"epoch": 37.62120232708468,
"grad_norm": 1.4408948421478271,
"learning_rate": 0.001,
"loss": 0.9113,
"step": 116400
},
{
"epoch": 37.653522947640596,
"grad_norm": 1.2225804328918457,
"learning_rate": 0.001,
"loss": 0.9123,
"step": 116500
},
{
"epoch": 37.68584356819651,
"grad_norm": 2.1255993843078613,
"learning_rate": 0.001,
"loss": 0.9122,
"step": 116600
},
{
"epoch": 37.718164188752425,
"grad_norm": 1.3163739442825317,
"learning_rate": 0.001,
"loss": 0.9171,
"step": 116700
},
{
"epoch": 37.75048480930834,
"grad_norm": 1.3287923336029053,
"learning_rate": 0.001,
"loss": 0.9176,
"step": 116800
},
{
"epoch": 37.782805429864254,
"grad_norm": 1.0648704767227173,
"learning_rate": 0.001,
"loss": 0.9277,
"step": 116900
},
{
"epoch": 37.81512605042017,
"grad_norm": 1.2207541465759277,
"learning_rate": 0.001,
"loss": 0.9486,
"step": 117000
},
{
"epoch": 37.84744667097608,
"grad_norm": 1.2167179584503174,
"learning_rate": 0.001,
"loss": 0.9341,
"step": 117100
},
{
"epoch": 37.879767291532,
"grad_norm": 1.1062008142471313,
"learning_rate": 0.001,
"loss": 0.9155,
"step": 117200
},
{
"epoch": 37.91208791208791,
"grad_norm": 1.135310173034668,
"learning_rate": 0.001,
"loss": 0.9465,
"step": 117300
},
{
"epoch": 37.94440853264383,
"grad_norm": 1.182563304901123,
"learning_rate": 0.001,
"loss": 0.9311,
"step": 117400
},
{
"epoch": 37.97672915319974,
"grad_norm": 1.289273977279663,
"learning_rate": 0.001,
"loss": 0.9395,
"step": 117500
},
{
"epoch": 38.009049773755656,
"grad_norm": 2.1775407791137695,
"learning_rate": 0.001,
"loss": 0.912,
"step": 117600
},
{
"epoch": 38.04137039431157,
"grad_norm": 1.0216083526611328,
"learning_rate": 0.001,
"loss": 0.8395,
"step": 117700
},
{
"epoch": 38.073691014867485,
"grad_norm": 1.5370399951934814,
"learning_rate": 0.001,
"loss": 0.8599,
"step": 117800
},
{
"epoch": 38.1060116354234,
"grad_norm": 1.2524052858352661,
"learning_rate": 0.001,
"loss": 0.8611,
"step": 117900
},
{
"epoch": 38.138332255979314,
"grad_norm": 1.2202684879302979,
"learning_rate": 0.001,
"loss": 0.8669,
"step": 118000
},
{
"epoch": 38.17065287653523,
"grad_norm": 1.1983212232589722,
"learning_rate": 0.001,
"loss": 0.859,
"step": 118100
},
{
"epoch": 38.20297349709114,
"grad_norm": 3.6025729179382324,
"learning_rate": 0.001,
"loss": 0.8675,
"step": 118200
},
{
"epoch": 38.23529411764706,
"grad_norm": 1.2796443700790405,
"learning_rate": 0.001,
"loss": 0.8688,
"step": 118300
},
{
"epoch": 38.26761473820297,
"grad_norm": 1.3514477014541626,
"learning_rate": 0.001,
"loss": 0.8642,
"step": 118400
},
{
"epoch": 38.29993535875889,
"grad_norm": 1.2543727159500122,
"learning_rate": 0.001,
"loss": 0.892,
"step": 118500
},
{
"epoch": 38.3322559793148,
"grad_norm": 2.0149078369140625,
"learning_rate": 0.001,
"loss": 0.875,
"step": 118600
},
{
"epoch": 38.364576599870716,
"grad_norm": 1.5488308668136597,
"learning_rate": 0.001,
"loss": 0.9035,
"step": 118700
},
{
"epoch": 38.39689722042663,
"grad_norm": 1.1685779094696045,
"learning_rate": 0.001,
"loss": 0.8959,
"step": 118800
},
{
"epoch": 38.429217840982545,
"grad_norm": 1.3017526865005493,
"learning_rate": 0.001,
"loss": 0.8843,
"step": 118900
},
{
"epoch": 38.46153846153846,
"grad_norm": 1.4337364435195923,
"learning_rate": 0.001,
"loss": 0.8842,
"step": 119000
},
{
"epoch": 38.493859082094374,
"grad_norm": 1.4739630222320557,
"learning_rate": 0.001,
"loss": 0.8781,
"step": 119100
},
{
"epoch": 38.52617970265029,
"grad_norm": 1.5166560411453247,
"learning_rate": 0.001,
"loss": 0.9092,
"step": 119200
},
{
"epoch": 38.558500323206204,
"grad_norm": 1.242997407913208,
"learning_rate": 0.001,
"loss": 0.8854,
"step": 119300
},
{
"epoch": 38.59082094376212,
"grad_norm": 1.5462573766708374,
"learning_rate": 0.001,
"loss": 0.9008,
"step": 119400
},
{
"epoch": 38.62314156431803,
"grad_norm": 1.318841576576233,
"learning_rate": 0.001,
"loss": 0.9186,
"step": 119500
},
{
"epoch": 38.65546218487395,
"grad_norm": 1.2831882238388062,
"learning_rate": 0.001,
"loss": 0.9134,
"step": 119600
},
{
"epoch": 38.68778280542986,
"grad_norm": 1.7237813472747803,
"learning_rate": 0.001,
"loss": 0.9139,
"step": 119700
},
{
"epoch": 38.720103425985776,
"grad_norm": 1.2951987981796265,
"learning_rate": 0.001,
"loss": 0.9191,
"step": 119800
},
{
"epoch": 38.75242404654169,
"grad_norm": 1.2112561464309692,
"learning_rate": 0.001,
"loss": 0.8893,
"step": 119900
},
{
"epoch": 38.784744667097605,
"grad_norm": 1.510880708694458,
"learning_rate": 0.001,
"loss": 0.9243,
"step": 120000
},
{
"epoch": 38.81706528765352,
"grad_norm": 1.351163387298584,
"learning_rate": 0.001,
"loss": 0.9163,
"step": 120100
},
{
"epoch": 38.849385908209435,
"grad_norm": 1.3247283697128296,
"learning_rate": 0.001,
"loss": 0.9262,
"step": 120200
},
{
"epoch": 38.88170652876535,
"grad_norm": 1.0785566568374634,
"learning_rate": 0.001,
"loss": 0.9076,
"step": 120300
},
{
"epoch": 38.914027149321264,
"grad_norm": 1.0955142974853516,
"learning_rate": 0.001,
"loss": 0.9009,
"step": 120400
},
{
"epoch": 38.94634776987718,
"grad_norm": 1.0944831371307373,
"learning_rate": 0.001,
"loss": 0.9169,
"step": 120500
},
{
"epoch": 38.97866839043309,
"grad_norm": 2.2278664112091064,
"learning_rate": 0.001,
"loss": 0.9378,
"step": 120600
},
{
"epoch": 39.010989010989015,
"grad_norm": 1.430474042892456,
"learning_rate": 0.001,
"loss": 0.8951,
"step": 120700
},
{
"epoch": 39.04330963154493,
"grad_norm": 1.299006462097168,
"learning_rate": 0.001,
"loss": 0.82,
"step": 120800
},
{
"epoch": 39.075630252100844,
"grad_norm": 1.3967899084091187,
"learning_rate": 0.001,
"loss": 0.8329,
"step": 120900
},
{
"epoch": 39.10795087265676,
"grad_norm": 1.8231886625289917,
"learning_rate": 0.001,
"loss": 0.8339,
"step": 121000
},
{
"epoch": 39.14027149321267,
"grad_norm": 2.1227855682373047,
"learning_rate": 0.001,
"loss": 0.8417,
"step": 121100
},
{
"epoch": 39.17259211376859,
"grad_norm": 1.2603408098220825,
"learning_rate": 0.001,
"loss": 0.8658,
"step": 121200
},
{
"epoch": 39.2049127343245,
"grad_norm": 1.3786275386810303,
"learning_rate": 0.001,
"loss": 0.8835,
"step": 121300
},
{
"epoch": 39.237233354880416,
"grad_norm": 1.3917183876037598,
"learning_rate": 0.001,
"loss": 0.8726,
"step": 121400
},
{
"epoch": 39.26955397543633,
"grad_norm": 1.4549587965011597,
"learning_rate": 0.001,
"loss": 0.8656,
"step": 121500
},
{
"epoch": 39.301874595992246,
"grad_norm": 1.4152559041976929,
"learning_rate": 0.001,
"loss": 0.8748,
"step": 121600
},
{
"epoch": 39.33419521654816,
"grad_norm": 2.8188393115997314,
"learning_rate": 0.001,
"loss": 0.8734,
"step": 121700
},
{
"epoch": 39.366515837104075,
"grad_norm": 1.3342585563659668,
"learning_rate": 0.001,
"loss": 0.847,
"step": 121800
},
{
"epoch": 39.39883645765999,
"grad_norm": 1.2291045188903809,
"learning_rate": 0.001,
"loss": 0.8779,
"step": 121900
},
{
"epoch": 39.431157078215904,
"grad_norm": 1.130303978919983,
"learning_rate": 0.001,
"loss": 0.8561,
"step": 122000
},
{
"epoch": 39.46347769877182,
"grad_norm": 1.1709375381469727,
"learning_rate": 0.001,
"loss": 0.8792,
"step": 122100
},
{
"epoch": 39.49579831932773,
"grad_norm": 1.5080801248550415,
"learning_rate": 0.001,
"loss": 0.8826,
"step": 122200
},
{
"epoch": 39.52811893988365,
"grad_norm": 1.1423321962356567,
"learning_rate": 0.001,
"loss": 0.9035,
"step": 122300
},
{
"epoch": 39.56043956043956,
"grad_norm": 1.049439549446106,
"learning_rate": 0.001,
"loss": 0.8805,
"step": 122400
},
{
"epoch": 39.59276018099548,
"grad_norm": 1.460243821144104,
"learning_rate": 0.001,
"loss": 0.9,
"step": 122500
},
{
"epoch": 39.62508080155139,
"grad_norm": 1.9269883632659912,
"learning_rate": 0.001,
"loss": 0.9058,
"step": 122600
},
{
"epoch": 39.657401422107306,
"grad_norm": 1.2040832042694092,
"learning_rate": 0.001,
"loss": 0.8709,
"step": 122700
},
{
"epoch": 39.68972204266322,
"grad_norm": 1.3963993787765503,
"learning_rate": 0.001,
"loss": 0.9073,
"step": 122800
},
{
"epoch": 39.722042663219135,
"grad_norm": 1.2941703796386719,
"learning_rate": 0.001,
"loss": 0.8873,
"step": 122900
},
{
"epoch": 39.75436328377505,
"grad_norm": 1.2239911556243896,
"learning_rate": 0.001,
"loss": 0.8998,
"step": 123000
},
{
"epoch": 39.786683904330964,
"grad_norm": 1.0870931148529053,
"learning_rate": 0.001,
"loss": 0.8863,
"step": 123100
},
{
"epoch": 39.81900452488688,
"grad_norm": 1.0407912731170654,
"learning_rate": 0.001,
"loss": 0.8919,
"step": 123200
},
{
"epoch": 39.85132514544279,
"grad_norm": 1.204813838005066,
"learning_rate": 0.001,
"loss": 0.8963,
"step": 123300
},
{
"epoch": 39.88364576599871,
"grad_norm": 1.4278241395950317,
"learning_rate": 0.001,
"loss": 0.9084,
"step": 123400
},
{
"epoch": 39.91596638655462,
"grad_norm": 1.1825990676879883,
"learning_rate": 0.001,
"loss": 0.9032,
"step": 123500
},
{
"epoch": 39.94828700711054,
"grad_norm": 1.0813344717025757,
"learning_rate": 0.001,
"loss": 0.9231,
"step": 123600
},
{
"epoch": 39.98060762766645,
"grad_norm": 1.409714937210083,
"learning_rate": 0.001,
"loss": 0.9177,
"step": 123700
},
{
"epoch": 40.012928248222366,
"grad_norm": 1.474061131477356,
"learning_rate": 0.001,
"loss": 0.8616,
"step": 123800
},
{
"epoch": 40.04524886877828,
"grad_norm": 1.4782670736312866,
"learning_rate": 0.001,
"loss": 0.8286,
"step": 123900
},
{
"epoch": 40.077569489334195,
"grad_norm": 1.4716808795928955,
"learning_rate": 0.001,
"loss": 0.8311,
"step": 124000
},
{
"epoch": 40.10989010989011,
"grad_norm": 1.3071645498275757,
"learning_rate": 0.001,
"loss": 0.8232,
"step": 124100
},
{
"epoch": 40.142210730446024,
"grad_norm": 1.4670727252960205,
"learning_rate": 0.001,
"loss": 0.8244,
"step": 124200
},
{
"epoch": 40.17453135100194,
"grad_norm": 1.10783851146698,
"learning_rate": 0.001,
"loss": 0.8399,
"step": 124300
},
{
"epoch": 40.20685197155785,
"grad_norm": 1.0578640699386597,
"learning_rate": 0.001,
"loss": 0.8455,
"step": 124400
},
{
"epoch": 40.23917259211377,
"grad_norm": 1.236038327217102,
"learning_rate": 0.001,
"loss": 0.842,
"step": 124500
},
{
"epoch": 40.27149321266968,
"grad_norm": 1.1498534679412842,
"learning_rate": 0.001,
"loss": 0.8523,
"step": 124600
},
{
"epoch": 40.3038138332256,
"grad_norm": 1.199738621711731,
"learning_rate": 0.001,
"loss": 0.8457,
"step": 124700
},
{
"epoch": 40.33613445378151,
"grad_norm": 1.2088783979415894,
"learning_rate": 0.001,
"loss": 0.8572,
"step": 124800
},
{
"epoch": 40.368455074337426,
"grad_norm": 1.3873686790466309,
"learning_rate": 0.001,
"loss": 0.8561,
"step": 124900
},
{
"epoch": 40.40077569489334,
"grad_norm": 1.2533921003341675,
"learning_rate": 0.001,
"loss": 0.8655,
"step": 125000
},
{
"epoch": 40.433096315449255,
"grad_norm": 1.8301284313201904,
"learning_rate": 0.001,
"loss": 0.8748,
"step": 125100
},
{
"epoch": 40.46541693600517,
"grad_norm": 1.1949044466018677,
"learning_rate": 0.001,
"loss": 0.859,
"step": 125200
},
{
"epoch": 40.497737556561084,
"grad_norm": 2.156513214111328,
"learning_rate": 0.001,
"loss": 0.8567,
"step": 125300
},
{
"epoch": 40.530058177117,
"grad_norm": 1.2791069746017456,
"learning_rate": 0.001,
"loss": 0.8792,
"step": 125400
},
{
"epoch": 40.56237879767291,
"grad_norm": 1.3833438158035278,
"learning_rate": 0.001,
"loss": 0.8753,
"step": 125500
},
{
"epoch": 40.59469941822883,
"grad_norm": 1.301124095916748,
"learning_rate": 0.001,
"loss": 0.8718,
"step": 125600
},
{
"epoch": 40.62702003878474,
"grad_norm": 1.559802770614624,
"learning_rate": 0.001,
"loss": 0.8723,
"step": 125700
},
{
"epoch": 40.65934065934066,
"grad_norm": 1.1438794136047363,
"learning_rate": 0.001,
"loss": 0.8669,
"step": 125800
},
{
"epoch": 40.69166127989657,
"grad_norm": 1.6619253158569336,
"learning_rate": 0.001,
"loss": 0.8853,
"step": 125900
},
{
"epoch": 40.723981900452486,
"grad_norm": 1.3953354358673096,
"learning_rate": 0.001,
"loss": 0.8997,
"step": 126000
},
{
"epoch": 40.7563025210084,
"grad_norm": 1.3383327722549438,
"learning_rate": 0.001,
"loss": 0.8962,
"step": 126100
},
{
"epoch": 40.788623141564315,
"grad_norm": 1.0945528745651245,
"learning_rate": 0.001,
"loss": 0.8861,
"step": 126200
},
{
"epoch": 40.82094376212023,
"grad_norm": 1.4700912237167358,
"learning_rate": 0.001,
"loss": 0.899,
"step": 126300
},
{
"epoch": 40.853264382676144,
"grad_norm": 1.1272400617599487,
"learning_rate": 0.001,
"loss": 0.9029,
"step": 126400
},
{
"epoch": 40.88558500323206,
"grad_norm": 1.222775936126709,
"learning_rate": 0.001,
"loss": 0.8966,
"step": 126500
},
{
"epoch": 40.91790562378797,
"grad_norm": 1.9689314365386963,
"learning_rate": 0.001,
"loss": 0.894,
"step": 126600
},
{
"epoch": 40.95022624434389,
"grad_norm": 1.0816248655319214,
"learning_rate": 0.001,
"loss": 0.8955,
"step": 126700
},
{
"epoch": 40.9825468648998,
"grad_norm": 7.644120216369629,
"learning_rate": 0.001,
"loss": 0.8911,
"step": 126800
},
{
"epoch": 41.014867485455724,
"grad_norm": 1.3644909858703613,
"learning_rate": 0.001,
"loss": 0.8324,
"step": 126900
},
{
"epoch": 41.04718810601164,
"grad_norm": 1.2451497316360474,
"learning_rate": 0.001,
"loss": 0.8106,
"step": 127000
},
{
"epoch": 41.07950872656755,
"grad_norm": 2.127196788787842,
"learning_rate": 0.001,
"loss": 0.8169,
"step": 127100
},
{
"epoch": 41.11182934712347,
"grad_norm": 1.1432043313980103,
"learning_rate": 0.001,
"loss": 0.8184,
"step": 127200
},
{
"epoch": 41.14414996767938,
"grad_norm": 1.2829303741455078,
"learning_rate": 0.001,
"loss": 0.8124,
"step": 127300
},
{
"epoch": 41.1764705882353,
"grad_norm": 0.8562968373298645,
"learning_rate": 0.001,
"loss": 0.8184,
"step": 127400
},
{
"epoch": 41.20879120879121,
"grad_norm": 0.9684380888938904,
"learning_rate": 0.001,
"loss": 0.828,
"step": 127500
},
{
"epoch": 41.241111829347126,
"grad_norm": 1.0784997940063477,
"learning_rate": 0.001,
"loss": 0.836,
"step": 127600
},
{
"epoch": 41.27343244990304,
"grad_norm": 1.046213984489441,
"learning_rate": 0.001,
"loss": 0.844,
"step": 127700
},
{
"epoch": 41.305753070458955,
"grad_norm": 0.8843748569488525,
"learning_rate": 0.001,
"loss": 0.84,
"step": 127800
},
{
"epoch": 41.33807369101487,
"grad_norm": 0.7278721332550049,
"learning_rate": 0.001,
"loss": 0.8598,
"step": 127900
},
{
"epoch": 41.370394311570784,
"grad_norm": 0.9991313815116882,
"learning_rate": 0.001,
"loss": 0.8365,
"step": 128000
},
{
"epoch": 41.4027149321267,
"grad_norm": 0.8424844145774841,
"learning_rate": 0.001,
"loss": 0.8411,
"step": 128100
},
{
"epoch": 41.43503555268261,
"grad_norm": 1.4285770654678345,
"learning_rate": 0.001,
"loss": 0.8432,
"step": 128200
},
{
"epoch": 41.46735617323853,
"grad_norm": 1.2143181562423706,
"learning_rate": 0.001,
"loss": 0.8543,
"step": 128300
},
{
"epoch": 41.49967679379444,
"grad_norm": 1.2977038621902466,
"learning_rate": 0.001,
"loss": 0.8526,
"step": 128400
},
{
"epoch": 41.53199741435036,
"grad_norm": 1.0228110551834106,
"learning_rate": 0.001,
"loss": 0.8542,
"step": 128500
},
{
"epoch": 41.56431803490627,
"grad_norm": 1.2492506504058838,
"learning_rate": 0.001,
"loss": 0.8704,
"step": 128600
},
{
"epoch": 41.596638655462186,
"grad_norm": 1.5715211629867554,
"learning_rate": 0.001,
"loss": 0.8777,
"step": 128700
},
{
"epoch": 41.6289592760181,
"grad_norm": 1.181208610534668,
"learning_rate": 0.001,
"loss": 0.8637,
"step": 128800
},
{
"epoch": 41.661279896574015,
"grad_norm": 0.986804187297821,
"learning_rate": 0.001,
"loss": 0.8951,
"step": 128900
},
{
"epoch": 41.69360051712993,
"grad_norm": 1.0209100246429443,
"learning_rate": 0.001,
"loss": 0.8649,
"step": 129000
},
{
"epoch": 41.725921137685845,
"grad_norm": 1.1363537311553955,
"learning_rate": 0.001,
"loss": 0.8669,
"step": 129100
},
{
"epoch": 41.75824175824176,
"grad_norm": 1.1821092367172241,
"learning_rate": 0.001,
"loss": 0.8749,
"step": 129200
},
{
"epoch": 41.790562378797674,
"grad_norm": 0.9846389293670654,
"learning_rate": 0.001,
"loss": 0.8755,
"step": 129300
},
{
"epoch": 41.82288299935359,
"grad_norm": 1.3120102882385254,
"learning_rate": 0.001,
"loss": 0.8847,
"step": 129400
},
{
"epoch": 41.8552036199095,
"grad_norm": 1.2054803371429443,
"learning_rate": 0.001,
"loss": 0.8704,
"step": 129500
},
{
"epoch": 41.88752424046542,
"grad_norm": 1.1038309335708618,
"learning_rate": 0.001,
"loss": 0.8747,
"step": 129600
},
{
"epoch": 41.91984486102133,
"grad_norm": 3.2323157787323,
"learning_rate": 0.001,
"loss": 0.8803,
"step": 129700
},
{
"epoch": 41.95216548157725,
"grad_norm": 1.0762603282928467,
"learning_rate": 0.001,
"loss": 0.8818,
"step": 129800
},
{
"epoch": 41.98448610213316,
"grad_norm": 1.0352551937103271,
"learning_rate": 0.001,
"loss": 0.8977,
"step": 129900
},
{
"epoch": 42.016806722689076,
"grad_norm": 1.9383422136306763,
"learning_rate": 0.001,
"loss": 0.8207,
"step": 130000
},
{
"epoch": 42.04912734324499,
"grad_norm": 1.456878900527954,
"learning_rate": 0.001,
"loss": 0.7909,
"step": 130100
},
{
"epoch": 42.081447963800905,
"grad_norm": 2.3285322189331055,
"learning_rate": 0.001,
"loss": 0.8194,
"step": 130200
},
{
"epoch": 42.11376858435682,
"grad_norm": 1.8079019784927368,
"learning_rate": 0.001,
"loss": 0.8073,
"step": 130300
},
{
"epoch": 42.146089204912734,
"grad_norm": 1.3452057838439941,
"learning_rate": 0.001,
"loss": 0.7993,
"step": 130400
},
{
"epoch": 42.17840982546865,
"grad_norm": 1.915332555770874,
"learning_rate": 0.001,
"loss": 0.8246,
"step": 130500
},
{
"epoch": 42.21073044602456,
"grad_norm": 1.9111815690994263,
"learning_rate": 0.001,
"loss": 0.8088,
"step": 130600
},
{
"epoch": 42.24305106658048,
"grad_norm": 1.762703537940979,
"learning_rate": 0.001,
"loss": 0.8243,
"step": 130700
},
{
"epoch": 42.27537168713639,
"grad_norm": 1.3808751106262207,
"learning_rate": 0.001,
"loss": 0.8224,
"step": 130800
},
{
"epoch": 42.30769230769231,
"grad_norm": 1.7224359512329102,
"learning_rate": 0.001,
"loss": 0.8173,
"step": 130900
},
{
"epoch": 42.34001292824822,
"grad_norm": 2.0048210620880127,
"learning_rate": 0.001,
"loss": 0.833,
"step": 131000
},
{
"epoch": 42.372333548804136,
"grad_norm": 1.6992894411087036,
"learning_rate": 0.001,
"loss": 0.8386,
"step": 131100
},
{
"epoch": 42.40465416936005,
"grad_norm": 1.8275269269943237,
"learning_rate": 0.001,
"loss": 0.8428,
"step": 131200
},
{
"epoch": 42.436974789915965,
"grad_norm": 2.066981554031372,
"learning_rate": 0.001,
"loss": 0.818,
"step": 131300
},
{
"epoch": 42.46929541047188,
"grad_norm": 1.7945083379745483,
"learning_rate": 0.001,
"loss": 0.8342,
"step": 131400
},
{
"epoch": 42.501616031027794,
"grad_norm": 2.0542707443237305,
"learning_rate": 0.001,
"loss": 0.8374,
"step": 131500
},
{
"epoch": 42.53393665158371,
"grad_norm": 1.5740739107131958,
"learning_rate": 0.001,
"loss": 0.8487,
"step": 131600
},
{
"epoch": 42.56625727213962,
"grad_norm": 2.1072022914886475,
"learning_rate": 0.001,
"loss": 0.8606,
"step": 131700
},
{
"epoch": 42.59857789269554,
"grad_norm": 2.105353593826294,
"learning_rate": 0.001,
"loss": 0.8614,
"step": 131800
},
{
"epoch": 42.63089851325145,
"grad_norm": 1.8877348899841309,
"learning_rate": 0.001,
"loss": 0.8343,
"step": 131900
},
{
"epoch": 42.66321913380737,
"grad_norm": 1.740787148475647,
"learning_rate": 0.001,
"loss": 0.8556,
"step": 132000
},
{
"epoch": 42.69553975436328,
"grad_norm": 1.4763180017471313,
"learning_rate": 0.001,
"loss": 0.8485,
"step": 132100
},
{
"epoch": 42.727860374919196,
"grad_norm": 1.9027022123336792,
"learning_rate": 0.001,
"loss": 0.8436,
"step": 132200
},
{
"epoch": 42.76018099547511,
"grad_norm": 1.3983405828475952,
"learning_rate": 0.001,
"loss": 0.8714,
"step": 132300
},
{
"epoch": 42.792501616031025,
"grad_norm": 2.2473886013031006,
"learning_rate": 0.001,
"loss": 0.8717,
"step": 132400
},
{
"epoch": 42.82482223658694,
"grad_norm": 1.3791003227233887,
"learning_rate": 0.001,
"loss": 0.8692,
"step": 132500
},
{
"epoch": 42.857142857142854,
"grad_norm": 1.7160332202911377,
"learning_rate": 0.001,
"loss": 0.8712,
"step": 132600
},
{
"epoch": 42.88946347769877,
"grad_norm": 1.4543373584747314,
"learning_rate": 0.001,
"loss": 0.8719,
"step": 132700
},
{
"epoch": 42.92178409825468,
"grad_norm": 1.5968679189682007,
"learning_rate": 0.001,
"loss": 0.8879,
"step": 132800
},
{
"epoch": 42.9541047188106,
"grad_norm": 1.9997758865356445,
"learning_rate": 0.001,
"loss": 0.8759,
"step": 132900
},
{
"epoch": 42.98642533936652,
"grad_norm": 1.7506520748138428,
"learning_rate": 0.001,
"loss": 0.8855,
"step": 133000
},
{
"epoch": 43.018745959922434,
"grad_norm": 1.527773380279541,
"learning_rate": 0.001,
"loss": 0.8467,
"step": 133100
},
{
"epoch": 43.05106658047835,
"grad_norm": 1.4628387689590454,
"learning_rate": 0.001,
"loss": 0.7821,
"step": 133200
},
{
"epoch": 43.08338720103426,
"grad_norm": 1.3667227029800415,
"learning_rate": 0.001,
"loss": 0.7919,
"step": 133300
},
{
"epoch": 43.11570782159018,
"grad_norm": 1.2891846895217896,
"learning_rate": 0.001,
"loss": 0.8055,
"step": 133400
},
{
"epoch": 43.14802844214609,
"grad_norm": 1.349412441253662,
"learning_rate": 0.001,
"loss": 0.7955,
"step": 133500
},
{
"epoch": 43.18034906270201,
"grad_norm": 1.5655378103256226,
"learning_rate": 0.001,
"loss": 0.7913,
"step": 133600
},
{
"epoch": 43.21266968325792,
"grad_norm": 1.305897831916809,
"learning_rate": 0.001,
"loss": 0.8004,
"step": 133700
},
{
"epoch": 43.244990303813836,
"grad_norm": 1.294546365737915,
"learning_rate": 0.001,
"loss": 0.8053,
"step": 133800
},
{
"epoch": 43.27731092436975,
"grad_norm": 1.4070929288864136,
"learning_rate": 0.001,
"loss": 0.8101,
"step": 133900
},
{
"epoch": 43.309631544925665,
"grad_norm": 1.0786323547363281,
"learning_rate": 0.001,
"loss": 0.805,
"step": 134000
},
{
"epoch": 43.34195216548158,
"grad_norm": 1.484237551689148,
"learning_rate": 0.001,
"loss": 0.8018,
"step": 134100
},
{
"epoch": 43.374272786037494,
"grad_norm": 1.4975357055664062,
"learning_rate": 0.001,
"loss": 0.8563,
"step": 134200
},
{
"epoch": 43.40659340659341,
"grad_norm": 1.1751554012298584,
"learning_rate": 0.001,
"loss": 0.8259,
"step": 134300
},
{
"epoch": 43.43891402714932,
"grad_norm": 1.3102048635482788,
"learning_rate": 0.001,
"loss": 0.796,
"step": 134400
},
{
"epoch": 43.47123464770524,
"grad_norm": 1.3700244426727295,
"learning_rate": 0.001,
"loss": 0.8382,
"step": 134500
},
{
"epoch": 43.50355526826115,
"grad_norm": 1.8419667482376099,
"learning_rate": 0.001,
"loss": 0.8244,
"step": 134600
},
{
"epoch": 43.53587588881707,
"grad_norm": 1.279415249824524,
"learning_rate": 0.001,
"loss": 0.8414,
"step": 134700
},
{
"epoch": 43.56819650937298,
"grad_norm": 1.1568162441253662,
"learning_rate": 0.001,
"loss": 0.8424,
"step": 134800
},
{
"epoch": 43.600517129928896,
"grad_norm": 1.2953382730484009,
"learning_rate": 0.001,
"loss": 0.8303,
"step": 134900
},
{
"epoch": 43.63283775048481,
"grad_norm": 1.2967185974121094,
"learning_rate": 0.001,
"loss": 0.8394,
"step": 135000
},
{
"epoch": 43.665158371040725,
"grad_norm": 1.3641395568847656,
"learning_rate": 0.001,
"loss": 0.8408,
"step": 135100
},
{
"epoch": 43.69747899159664,
"grad_norm": 1.2159711122512817,
"learning_rate": 0.001,
"loss": 0.8462,
"step": 135200
},
{
"epoch": 43.729799612152554,
"grad_norm": 1.5351017713546753,
"learning_rate": 0.001,
"loss": 0.8459,
"step": 135300
},
{
"epoch": 43.76212023270847,
"grad_norm": 1.5198405981063843,
"learning_rate": 0.001,
"loss": 0.8499,
"step": 135400
},
{
"epoch": 43.79444085326438,
"grad_norm": 1.2137715816497803,
"learning_rate": 0.001,
"loss": 0.8526,
"step": 135500
},
{
"epoch": 43.8267614738203,
"grad_norm": 1.2001396417617798,
"learning_rate": 0.001,
"loss": 0.8715,
"step": 135600
},
{
"epoch": 43.85908209437621,
"grad_norm": 1.7482579946517944,
"learning_rate": 0.001,
"loss": 0.8561,
"step": 135700
},
{
"epoch": 43.89140271493213,
"grad_norm": 1.476863980293274,
"learning_rate": 0.001,
"loss": 0.86,
"step": 135800
},
{
"epoch": 43.92372333548804,
"grad_norm": 1.6021820306777954,
"learning_rate": 0.001,
"loss": 0.881,
"step": 135900
},
{
"epoch": 43.956043956043956,
"grad_norm": 1.5128792524337769,
"learning_rate": 0.001,
"loss": 0.873,
"step": 136000
},
{
"epoch": 43.98836457659987,
"grad_norm": 1.4070549011230469,
"learning_rate": 0.001,
"loss": 0.8732,
"step": 136100
},
{
"epoch": 44.020685197155785,
"grad_norm": 1.2981127500534058,
"learning_rate": 0.001,
"loss": 0.8268,
"step": 136200
},
{
"epoch": 44.0530058177117,
"grad_norm": 1.4790092706680298,
"learning_rate": 0.001,
"loss": 0.7825,
"step": 136300
},
{
"epoch": 44.085326438267614,
"grad_norm": 1.3770111799240112,
"learning_rate": 0.001,
"loss": 0.7824,
"step": 136400
},
{
"epoch": 44.11764705882353,
"grad_norm": 9.711091041564941,
"learning_rate": 0.001,
"loss": 0.7852,
"step": 136500
},
{
"epoch": 44.14996767937944,
"grad_norm": 1.4254145622253418,
"learning_rate": 0.001,
"loss": 0.7864,
"step": 136600
},
{
"epoch": 44.18228829993536,
"grad_norm": 1.2787805795669556,
"learning_rate": 0.001,
"loss": 0.79,
"step": 136700
},
{
"epoch": 44.21460892049127,
"grad_norm": 1.3041934967041016,
"learning_rate": 0.001,
"loss": 0.8038,
"step": 136800
},
{
"epoch": 44.24692954104719,
"grad_norm": 1.2634992599487305,
"learning_rate": 0.001,
"loss": 0.8016,
"step": 136900
},
{
"epoch": 44.2792501616031,
"grad_norm": 1.237308144569397,
"learning_rate": 0.001,
"loss": 0.8099,
"step": 137000
},
{
"epoch": 44.311570782159016,
"grad_norm": 1.2396942377090454,
"learning_rate": 0.001,
"loss": 0.8169,
"step": 137100
},
{
"epoch": 44.34389140271493,
"grad_norm": 1.2848567962646484,
"learning_rate": 0.001,
"loss": 0.8059,
"step": 137200
},
{
"epoch": 44.376212023270845,
"grad_norm": 1.5477381944656372,
"learning_rate": 0.001,
"loss": 0.8163,
"step": 137300
},
{
"epoch": 44.40853264382676,
"grad_norm": 1.2960271835327148,
"learning_rate": 0.001,
"loss": 0.8032,
"step": 137400
},
{
"epoch": 44.440853264382675,
"grad_norm": 1.273160696029663,
"learning_rate": 0.001,
"loss": 0.8081,
"step": 137500
},
{
"epoch": 44.47317388493859,
"grad_norm": 1.191059947013855,
"learning_rate": 0.001,
"loss": 0.8189,
"step": 137600
},
{
"epoch": 44.505494505494504,
"grad_norm": 1.1510519981384277,
"learning_rate": 0.001,
"loss": 0.807,
"step": 137700
},
{
"epoch": 44.53781512605042,
"grad_norm": 1.358547329902649,
"learning_rate": 0.001,
"loss": 0.8193,
"step": 137800
},
{
"epoch": 44.57013574660633,
"grad_norm": 1.4065147638320923,
"learning_rate": 0.001,
"loss": 0.8166,
"step": 137900
},
{
"epoch": 44.60245636716225,
"grad_norm": 1.5476964712142944,
"learning_rate": 0.001,
"loss": 0.8333,
"step": 138000
},
{
"epoch": 44.63477698771816,
"grad_norm": 1.3824409246444702,
"learning_rate": 0.001,
"loss": 0.8273,
"step": 138100
},
{
"epoch": 44.66709760827408,
"grad_norm": 1.1956334114074707,
"learning_rate": 0.001,
"loss": 0.8241,
"step": 138200
},
{
"epoch": 44.69941822882999,
"grad_norm": 1.1970595121383667,
"learning_rate": 0.001,
"loss": 0.8477,
"step": 138300
},
{
"epoch": 44.731738849385906,
"grad_norm": 1.162197470664978,
"learning_rate": 0.001,
"loss": 0.8377,
"step": 138400
},
{
"epoch": 44.76405946994182,
"grad_norm": 1.2741224765777588,
"learning_rate": 0.001,
"loss": 0.8509,
"step": 138500
},
{
"epoch": 44.796380090497735,
"grad_norm": 1.1326532363891602,
"learning_rate": 0.001,
"loss": 0.8413,
"step": 138600
},
{
"epoch": 44.82870071105365,
"grad_norm": 1.5357139110565186,
"learning_rate": 0.001,
"loss": 0.8304,
"step": 138700
},
{
"epoch": 44.861021331609564,
"grad_norm": 1.355280876159668,
"learning_rate": 0.001,
"loss": 0.8572,
"step": 138800
},
{
"epoch": 44.89334195216548,
"grad_norm": 1.6483911275863647,
"learning_rate": 0.001,
"loss": 0.8489,
"step": 138900
},
{
"epoch": 44.92566257272139,
"grad_norm": 1.3355098962783813,
"learning_rate": 0.001,
"loss": 0.8559,
"step": 139000
},
{
"epoch": 44.95798319327731,
"grad_norm": 1.3055047988891602,
"learning_rate": 0.001,
"loss": 0.8465,
"step": 139100
},
{
"epoch": 44.99030381383322,
"grad_norm": 1.4414533376693726,
"learning_rate": 0.001,
"loss": 0.8137,
"step": 139200
},
{
"epoch": 45.022624434389144,
"grad_norm": 1.346257209777832,
"learning_rate": 0.001,
"loss": 0.8097,
"step": 139300
},
{
"epoch": 45.05494505494506,
"grad_norm": 1.8192956447601318,
"learning_rate": 0.001,
"loss": 0.7631,
"step": 139400
},
{
"epoch": 45.08726567550097,
"grad_norm": 1.2960363626480103,
"learning_rate": 0.001,
"loss": 0.7741,
"step": 139500
},
{
"epoch": 45.11958629605689,
"grad_norm": 2.104506731033325,
"learning_rate": 0.001,
"loss": 0.7705,
"step": 139600
},
{
"epoch": 45.1519069166128,
"grad_norm": 1.1508853435516357,
"learning_rate": 0.001,
"loss": 0.7728,
"step": 139700
},
{
"epoch": 45.18422753716872,
"grad_norm": 1.65923011302948,
"learning_rate": 0.001,
"loss": 0.7694,
"step": 139800
},
{
"epoch": 45.21654815772463,
"grad_norm": 1.5621492862701416,
"learning_rate": 0.001,
"loss": 0.7782,
"step": 139900
},
{
"epoch": 45.248868778280546,
"grad_norm": 1.388525366783142,
"learning_rate": 0.001,
"loss": 0.7728,
"step": 140000
},
{
"epoch": 45.28118939883646,
"grad_norm": 1.4508206844329834,
"learning_rate": 0.001,
"loss": 0.7853,
"step": 140100
},
{
"epoch": 45.313510019392375,
"grad_norm": 1.1052316427230835,
"learning_rate": 0.001,
"loss": 0.7995,
"step": 140200
},
{
"epoch": 45.34583063994829,
"grad_norm": 1.0586109161376953,
"learning_rate": 0.001,
"loss": 0.7858,
"step": 140300
},
{
"epoch": 45.378151260504204,
"grad_norm": 1.1809642314910889,
"learning_rate": 0.001,
"loss": 0.7982,
"step": 140400
},
{
"epoch": 45.41047188106012,
"grad_norm": 1.301119327545166,
"learning_rate": 0.001,
"loss": 0.7995,
"step": 140500
},
{
"epoch": 45.44279250161603,
"grad_norm": 1.2782262563705444,
"learning_rate": 0.001,
"loss": 0.8058,
"step": 140600
},
{
"epoch": 45.47511312217195,
"grad_norm": 1.2796794176101685,
"learning_rate": 0.001,
"loss": 0.7985,
"step": 140700
},
{
"epoch": 45.50743374272786,
"grad_norm": 1.1781928539276123,
"learning_rate": 0.001,
"loss": 0.7911,
"step": 140800
},
{
"epoch": 45.53975436328378,
"grad_norm": 1.1269559860229492,
"learning_rate": 0.001,
"loss": 0.8117,
"step": 140900
},
{
"epoch": 45.57207498383969,
"grad_norm": 1.3490080833435059,
"learning_rate": 0.001,
"loss": 0.793,
"step": 141000
},
{
"epoch": 45.604395604395606,
"grad_norm": 1.1441093683242798,
"learning_rate": 0.001,
"loss": 0.8239,
"step": 141100
},
{
"epoch": 45.63671622495152,
"grad_norm": 1.4738757610321045,
"learning_rate": 0.001,
"loss": 0.8282,
"step": 141200
},
{
"epoch": 45.669036845507435,
"grad_norm": 1.5204062461853027,
"learning_rate": 0.001,
"loss": 0.8204,
"step": 141300
},
{
"epoch": 45.70135746606335,
"grad_norm": 1.3744752407073975,
"learning_rate": 0.001,
"loss": 0.8146,
"step": 141400
},
{
"epoch": 45.733678086619264,
"grad_norm": 1.4345048666000366,
"learning_rate": 0.001,
"loss": 0.8251,
"step": 141500
},
{
"epoch": 45.76599870717518,
"grad_norm": 1.3285446166992188,
"learning_rate": 0.001,
"loss": 0.8376,
"step": 141600
},
{
"epoch": 45.79831932773109,
"grad_norm": 1.2864034175872803,
"learning_rate": 0.001,
"loss": 0.84,
"step": 141700
},
{
"epoch": 45.83063994828701,
"grad_norm": 1.5362489223480225,
"learning_rate": 0.001,
"loss": 0.8346,
"step": 141800
},
{
"epoch": 45.86296056884292,
"grad_norm": 1.177847981452942,
"learning_rate": 0.001,
"loss": 0.8402,
"step": 141900
},
{
"epoch": 45.89528118939884,
"grad_norm": 1.168405294418335,
"learning_rate": 0.001,
"loss": 0.8333,
"step": 142000
},
{
"epoch": 45.92760180995475,
"grad_norm": 1.4306167364120483,
"learning_rate": 0.001,
"loss": 0.8347,
"step": 142100
},
{
"epoch": 45.959922430510666,
"grad_norm": 1.3686728477478027,
"learning_rate": 0.001,
"loss": 0.8238,
"step": 142200
},
{
"epoch": 45.99224305106658,
"grad_norm": 1.2857394218444824,
"learning_rate": 0.001,
"loss": 0.8655,
"step": 142300
},
{
"epoch": 46.024563671622495,
"grad_norm": 1.2607849836349487,
"learning_rate": 0.001,
"loss": 0.7783,
"step": 142400
},
{
"epoch": 46.05688429217841,
"grad_norm": 1.5633095502853394,
"learning_rate": 0.001,
"loss": 0.7446,
"step": 142500
},
{
"epoch": 46.089204912734324,
"grad_norm": 4.660933494567871,
"learning_rate": 0.001,
"loss": 0.7425,
"step": 142600
},
{
"epoch": 46.12152553329024,
"grad_norm": 1.7380452156066895,
"learning_rate": 0.001,
"loss": 0.7674,
"step": 142700
},
{
"epoch": 46.15384615384615,
"grad_norm": 1.2123302221298218,
"learning_rate": 0.001,
"loss": 0.775,
"step": 142800
},
{
"epoch": 46.18616677440207,
"grad_norm": 1.2319421768188477,
"learning_rate": 0.001,
"loss": 0.7753,
"step": 142900
},
{
"epoch": 46.21848739495798,
"grad_norm": 1.5815069675445557,
"learning_rate": 0.001,
"loss": 0.7775,
"step": 143000
},
{
"epoch": 46.2508080155139,
"grad_norm": 1.0354673862457275,
"learning_rate": 0.001,
"loss": 0.7743,
"step": 143100
},
{
"epoch": 46.28312863606981,
"grad_norm": 1.3230652809143066,
"learning_rate": 0.001,
"loss": 0.7778,
"step": 143200
},
{
"epoch": 46.315449256625726,
"grad_norm": 1.2702628374099731,
"learning_rate": 0.001,
"loss": 0.7708,
"step": 143300
},
{
"epoch": 46.34776987718164,
"grad_norm": 1.2347975969314575,
"learning_rate": 0.001,
"loss": 0.7792,
"step": 143400
},
{
"epoch": 46.380090497737555,
"grad_norm": 1.1771498918533325,
"learning_rate": 0.001,
"loss": 0.7826,
"step": 143500
},
{
"epoch": 46.41241111829347,
"grad_norm": 1.1414570808410645,
"learning_rate": 0.001,
"loss": 0.7848,
"step": 143600
},
{
"epoch": 46.444731738849384,
"grad_norm": 1.1657198667526245,
"learning_rate": 0.001,
"loss": 0.7975,
"step": 143700
},
{
"epoch": 46.4770523594053,
"grad_norm": 1.267530083656311,
"learning_rate": 0.001,
"loss": 0.8029,
"step": 143800
},
{
"epoch": 46.50937297996121,
"grad_norm": 1.1892708539962769,
"learning_rate": 0.001,
"loss": 0.798,
"step": 143900
},
{
"epoch": 46.54169360051713,
"grad_norm": 1.89950692653656,
"learning_rate": 0.001,
"loss": 0.7923,
"step": 144000
},
{
"epoch": 46.57401422107304,
"grad_norm": 1.305152177810669,
"learning_rate": 0.001,
"loss": 0.8014,
"step": 144100
},
{
"epoch": 46.60633484162896,
"grad_norm": 1.5587385892868042,
"learning_rate": 0.001,
"loss": 0.8099,
"step": 144200
},
{
"epoch": 46.63865546218487,
"grad_norm": 1.152372121810913,
"learning_rate": 0.001,
"loss": 0.8062,
"step": 144300
},
{
"epoch": 46.670976082740786,
"grad_norm": 1.1674656867980957,
"learning_rate": 0.001,
"loss": 0.8167,
"step": 144400
},
{
"epoch": 46.7032967032967,
"grad_norm": 1.259466290473938,
"learning_rate": 0.001,
"loss": 0.8222,
"step": 144500
},
{
"epoch": 46.735617323852615,
"grad_norm": 1.4729558229446411,
"learning_rate": 0.001,
"loss": 0.8043,
"step": 144600
},
{
"epoch": 46.76793794440853,
"grad_norm": 1.0603488683700562,
"learning_rate": 0.001,
"loss": 0.8125,
"step": 144700
},
{
"epoch": 46.800258564964444,
"grad_norm": 3.8644630908966064,
"learning_rate": 0.001,
"loss": 0.8291,
"step": 144800
},
{
"epoch": 46.83257918552036,
"grad_norm": 1.261315107345581,
"learning_rate": 0.001,
"loss": 0.8183,
"step": 144900
},
{
"epoch": 46.864899806076274,
"grad_norm": 1.1630548238754272,
"learning_rate": 0.001,
"loss": 0.8281,
"step": 145000
},
{
"epoch": 46.89722042663219,
"grad_norm": 1.2682583332061768,
"learning_rate": 0.001,
"loss": 0.8263,
"step": 145100
},
{
"epoch": 46.9295410471881,
"grad_norm": 1.2569491863250732,
"learning_rate": 0.001,
"loss": 0.8012,
"step": 145200
},
{
"epoch": 46.96186166774402,
"grad_norm": 1.174126148223877,
"learning_rate": 0.001,
"loss": 0.8269,
"step": 145300
},
{
"epoch": 46.99418228829994,
"grad_norm": 1.253170371055603,
"learning_rate": 0.001,
"loss": 0.826,
"step": 145400
},
{
"epoch": 47.02650290885585,
"grad_norm": 1.2278573513031006,
"learning_rate": 0.001,
"loss": 0.7577,
"step": 145500
},
{
"epoch": 47.05882352941177,
"grad_norm": 1.2207344770431519,
"learning_rate": 0.001,
"loss": 0.7376,
"step": 145600
},
{
"epoch": 47.09114414996768,
"grad_norm": 1.3616275787353516,
"learning_rate": 0.001,
"loss": 0.745,
"step": 145700
},
{
"epoch": 47.1234647705236,
"grad_norm": 1.287418007850647,
"learning_rate": 0.001,
"loss": 0.7458,
"step": 145800
},
{
"epoch": 47.15578539107951,
"grad_norm": 1.1178083419799805,
"learning_rate": 0.001,
"loss": 0.7562,
"step": 145900
},
{
"epoch": 47.188106011635426,
"grad_norm": 1.5619690418243408,
"learning_rate": 0.001,
"loss": 0.7389,
"step": 146000
},
{
"epoch": 47.22042663219134,
"grad_norm": 1.4271554946899414,
"learning_rate": 0.001,
"loss": 0.7723,
"step": 146100
},
{
"epoch": 47.252747252747255,
"grad_norm": 1.7732999324798584,
"learning_rate": 0.001,
"loss": 0.7538,
"step": 146200
},
{
"epoch": 47.28506787330317,
"grad_norm": 1.4282053709030151,
"learning_rate": 0.001,
"loss": 0.7821,
"step": 146300
},
{
"epoch": 47.317388493859085,
"grad_norm": 1.228203535079956,
"learning_rate": 0.001,
"loss": 0.7722,
"step": 146400
},
{
"epoch": 47.349709114415,
"grad_norm": 1.3013367652893066,
"learning_rate": 0.001,
"loss": 0.7652,
"step": 146500
},
{
"epoch": 47.382029734970914,
"grad_norm": 1.2661182880401611,
"learning_rate": 0.001,
"loss": 0.7777,
"step": 146600
},
{
"epoch": 47.41435035552683,
"grad_norm": 1.3260958194732666,
"learning_rate": 0.001,
"loss": 0.7804,
"step": 146700
},
{
"epoch": 47.44667097608274,
"grad_norm": 1.212944507598877,
"learning_rate": 0.001,
"loss": 0.7679,
"step": 146800
},
{
"epoch": 47.47899159663866,
"grad_norm": 1.4435935020446777,
"learning_rate": 0.001,
"loss": 0.765,
"step": 146900
},
{
"epoch": 47.51131221719457,
"grad_norm": 1.4578791856765747,
"learning_rate": 0.001,
"loss": 0.7912,
"step": 147000
},
{
"epoch": 47.543632837750486,
"grad_norm": 1.462074637413025,
"learning_rate": 0.001,
"loss": 0.7974,
"step": 147100
},
{
"epoch": 47.5759534583064,
"grad_norm": 1.403319001197815,
"learning_rate": 0.001,
"loss": 0.7922,
"step": 147200
},
{
"epoch": 47.608274078862316,
"grad_norm": 1.2772393226623535,
"learning_rate": 0.001,
"loss": 0.8032,
"step": 147300
},
{
"epoch": 47.64059469941823,
"grad_norm": 1.217108964920044,
"learning_rate": 0.001,
"loss": 0.8016,
"step": 147400
},
{
"epoch": 47.672915319974145,
"grad_norm": 1.2560069561004639,
"learning_rate": 0.001,
"loss": 0.802,
"step": 147500
},
{
"epoch": 47.70523594053006,
"grad_norm": 1.161948800086975,
"learning_rate": 0.001,
"loss": 0.7941,
"step": 147600
},
{
"epoch": 47.737556561085974,
"grad_norm": 1.2663328647613525,
"learning_rate": 0.001,
"loss": 0.8108,
"step": 147700
},
{
"epoch": 47.76987718164189,
"grad_norm": 1.628860592842102,
"learning_rate": 0.001,
"loss": 0.8044,
"step": 147800
},
{
"epoch": 47.8021978021978,
"grad_norm": 1.2811692953109741,
"learning_rate": 0.001,
"loss": 0.8118,
"step": 147900
},
{
"epoch": 47.83451842275372,
"grad_norm": 1.2758411169052124,
"learning_rate": 0.001,
"loss": 0.8147,
"step": 148000
},
{
"epoch": 47.86683904330963,
"grad_norm": 1.3008402585983276,
"learning_rate": 0.001,
"loss": 0.8128,
"step": 148100
},
{
"epoch": 47.89915966386555,
"grad_norm": 1.4310789108276367,
"learning_rate": 0.001,
"loss": 0.8126,
"step": 148200
},
{
"epoch": 47.93148028442146,
"grad_norm": 1.4643484354019165,
"learning_rate": 0.001,
"loss": 0.8037,
"step": 148300
},
{
"epoch": 47.963800904977376,
"grad_norm": 1.2317537069320679,
"learning_rate": 0.001,
"loss": 0.8193,
"step": 148400
},
{
"epoch": 47.99612152553329,
"grad_norm": 1.2515267133712769,
"learning_rate": 0.001,
"loss": 0.8045,
"step": 148500
},
{
"epoch": 48.028442146089205,
"grad_norm": 1.475560188293457,
"learning_rate": 0.001,
"loss": 0.7236,
"step": 148600
},
{
"epoch": 48.06076276664512,
"grad_norm": 1.2725452184677124,
"learning_rate": 0.001,
"loss": 0.7288,
"step": 148700
},
{
"epoch": 48.093083387201034,
"grad_norm": 1.295411467552185,
"learning_rate": 0.001,
"loss": 0.7375,
"step": 148800
},
{
"epoch": 48.12540400775695,
"grad_norm": 1.436627984046936,
"learning_rate": 0.001,
"loss": 0.7454,
"step": 148900
},
{
"epoch": 48.15772462831286,
"grad_norm": 1.3891310691833496,
"learning_rate": 0.001,
"loss": 0.744,
"step": 149000
},
{
"epoch": 48.19004524886878,
"grad_norm": 1.15779447555542,
"learning_rate": 0.001,
"loss": 0.7465,
"step": 149100
},
{
"epoch": 48.22236586942469,
"grad_norm": 1.1223961114883423,
"learning_rate": 0.001,
"loss": 0.7363,
"step": 149200
},
{
"epoch": 48.25468648998061,
"grad_norm": 1.1627105474472046,
"learning_rate": 0.001,
"loss": 0.7456,
"step": 149300
},
{
"epoch": 48.28700711053652,
"grad_norm": 1.5019142627716064,
"learning_rate": 0.001,
"loss": 0.7612,
"step": 149400
},
{
"epoch": 48.319327731092436,
"grad_norm": 1.1844847202301025,
"learning_rate": 0.001,
"loss": 0.7567,
"step": 149500
},
{
"epoch": 48.35164835164835,
"grad_norm": 1.4008020162582397,
"learning_rate": 0.001,
"loss": 0.7587,
"step": 149600
},
{
"epoch": 48.383968972204265,
"grad_norm": 1.299863338470459,
"learning_rate": 0.001,
"loss": 0.7637,
"step": 149700
},
{
"epoch": 48.41628959276018,
"grad_norm": 1.1990106105804443,
"learning_rate": 0.001,
"loss": 0.7528,
"step": 149800
},
{
"epoch": 48.448610213316094,
"grad_norm": 1.5277669429779053,
"learning_rate": 0.001,
"loss": 0.7751,
"step": 149900
},
{
"epoch": 48.48093083387201,
"grad_norm": 1.361127495765686,
"learning_rate": 0.001,
"loss": 0.7523,
"step": 150000
},
{
"epoch": 48.51325145442792,
"grad_norm": 1.2485038042068481,
"learning_rate": 0.001,
"loss": 0.7826,
"step": 150100
},
{
"epoch": 48.54557207498384,
"grad_norm": 1.1917405128479004,
"learning_rate": 0.001,
"loss": 0.7906,
"step": 150200
},
{
"epoch": 48.57789269553975,
"grad_norm": 1.3229522705078125,
"learning_rate": 0.001,
"loss": 0.7801,
"step": 150300
},
{
"epoch": 48.61021331609567,
"grad_norm": 1.2076053619384766,
"learning_rate": 0.001,
"loss": 0.7907,
"step": 150400
},
{
"epoch": 48.64253393665158,
"grad_norm": 1.409424066543579,
"learning_rate": 0.001,
"loss": 0.7948,
"step": 150500
},
{
"epoch": 48.674854557207496,
"grad_norm": 1.1984336376190186,
"learning_rate": 0.001,
"loss": 0.7876,
"step": 150600
},
{
"epoch": 48.70717517776341,
"grad_norm": 14.626358985900879,
"learning_rate": 0.001,
"loss": 0.7982,
"step": 150700
},
{
"epoch": 48.739495798319325,
"grad_norm": 1.6344068050384521,
"learning_rate": 0.001,
"loss": 0.7986,
"step": 150800
},
{
"epoch": 48.77181641887524,
"grad_norm": 1.367846965789795,
"learning_rate": 0.001,
"loss": 0.791,
"step": 150900
},
{
"epoch": 48.804137039431154,
"grad_norm": 1.1952319145202637,
"learning_rate": 0.001,
"loss": 0.7965,
"step": 151000
},
{
"epoch": 48.83645765998707,
"grad_norm": 1.4131128787994385,
"learning_rate": 0.001,
"loss": 0.8009,
"step": 151100
},
{
"epoch": 48.86877828054298,
"grad_norm": 1.2616978883743286,
"learning_rate": 0.001,
"loss": 0.8031,
"step": 151200
},
{
"epoch": 48.9010989010989,
"grad_norm": 1.5032106637954712,
"learning_rate": 0.001,
"loss": 0.8055,
"step": 151300
},
{
"epoch": 48.93341952165481,
"grad_norm": 1.5496946573257446,
"learning_rate": 0.001,
"loss": 0.801,
"step": 151400
},
{
"epoch": 48.96574014221073,
"grad_norm": 1.0915486812591553,
"learning_rate": 0.001,
"loss": 0.8008,
"step": 151500
},
{
"epoch": 48.99806076276664,
"grad_norm": 1.169753074645996,
"learning_rate": 0.001,
"loss": 0.7731,
"step": 151600
},
{
"epoch": 49.03038138332256,
"grad_norm": 1.0841970443725586,
"learning_rate": 0.001,
"loss": 0.7138,
"step": 151700
},
{
"epoch": 49.06270200387848,
"grad_norm": 1.2232255935668945,
"learning_rate": 0.001,
"loss": 0.7295,
"step": 151800
},
{
"epoch": 49.09502262443439,
"grad_norm": 1.101244330406189,
"learning_rate": 0.001,
"loss": 0.7295,
"step": 151900
},
{
"epoch": 49.12734324499031,
"grad_norm": 1.1868951320648193,
"learning_rate": 0.001,
"loss": 0.7322,
"step": 152000
},
{
"epoch": 49.15966386554622,
"grad_norm": 1.4374502897262573,
"learning_rate": 0.001,
"loss": 0.7394,
"step": 152100
},
{
"epoch": 49.191984486102136,
"grad_norm": 1.1606899499893188,
"learning_rate": 0.001,
"loss": 0.7298,
"step": 152200
},
{
"epoch": 49.22430510665805,
"grad_norm": 1.3472959995269775,
"learning_rate": 0.001,
"loss": 0.7425,
"step": 152300
},
{
"epoch": 49.256625727213965,
"grad_norm": 1.0685003995895386,
"learning_rate": 0.001,
"loss": 0.7441,
"step": 152400
},
{
"epoch": 49.28894634776988,
"grad_norm": 0.9984011650085449,
"learning_rate": 0.001,
"loss": 0.7484,
"step": 152500
},
{
"epoch": 49.321266968325794,
"grad_norm": 1.1586939096450806,
"learning_rate": 0.001,
"loss": 0.7544,
"step": 152600
},
{
"epoch": 49.35358758888171,
"grad_norm": 1.0123095512390137,
"learning_rate": 0.001,
"loss": 0.771,
"step": 152700
},
{
"epoch": 49.38590820943762,
"grad_norm": 1.0273991823196411,
"learning_rate": 0.001,
"loss": 0.7457,
"step": 152800
},
{
"epoch": 49.41822882999354,
"grad_norm": 1.0201725959777832,
"learning_rate": 0.001,
"loss": 0.7645,
"step": 152900
},
{
"epoch": 49.45054945054945,
"grad_norm": 1.0155104398727417,
"learning_rate": 0.001,
"loss": 0.7649,
"step": 153000
},
{
"epoch": 49.48287007110537,
"grad_norm": 1.6812835931777954,
"learning_rate": 0.001,
"loss": 0.7636,
"step": 153100
},
{
"epoch": 49.51519069166128,
"grad_norm": 1.271406888961792,
"learning_rate": 0.001,
"loss": 0.7741,
"step": 153200
},
{
"epoch": 49.547511312217196,
"grad_norm": 1.2045003175735474,
"learning_rate": 0.001,
"loss": 0.769,
"step": 153300
},
{
"epoch": 49.57983193277311,
"grad_norm": 1.3944015502929688,
"learning_rate": 0.001,
"loss": 0.7857,
"step": 153400
},
{
"epoch": 49.612152553329025,
"grad_norm": 1.1044825315475464,
"learning_rate": 0.001,
"loss": 0.7744,
"step": 153500
},
{
"epoch": 49.64447317388494,
"grad_norm": 1.2409112453460693,
"learning_rate": 0.001,
"loss": 0.7744,
"step": 153600
},
{
"epoch": 49.676793794440854,
"grad_norm": 1.0952699184417725,
"learning_rate": 0.001,
"loss": 0.7621,
"step": 153700
},
{
"epoch": 49.70911441499677,
"grad_norm": 1.224043607711792,
"learning_rate": 0.001,
"loss": 0.7822,
"step": 153800
},
{
"epoch": 49.74143503555268,
"grad_norm": 1.085472822189331,
"learning_rate": 0.001,
"loss": 0.7815,
"step": 153900
},
{
"epoch": 49.7737556561086,
"grad_norm": 1.2062139511108398,
"learning_rate": 0.001,
"loss": 0.7739,
"step": 154000
},
{
"epoch": 49.80607627666451,
"grad_norm": 1.1115362644195557,
"learning_rate": 0.001,
"loss": 0.775,
"step": 154100
},
{
"epoch": 49.83839689722043,
"grad_norm": 1.2838987112045288,
"learning_rate": 0.001,
"loss": 0.7823,
"step": 154200
},
{
"epoch": 49.87071751777634,
"grad_norm": 1.238246202468872,
"learning_rate": 0.001,
"loss": 0.7866,
"step": 154300
},
{
"epoch": 49.903038138332256,
"grad_norm": 1.235521912574768,
"learning_rate": 0.001,
"loss": 0.7761,
"step": 154400
},
{
"epoch": 49.93535875888817,
"grad_norm": 1.0825852155685425,
"learning_rate": 0.001,
"loss": 0.7827,
"step": 154500
},
{
"epoch": 49.967679379444085,
"grad_norm": 1.4009476900100708,
"learning_rate": 0.001,
"loss": 0.7995,
"step": 154600
},
{
"epoch": 50.0,
"grad_norm": 0.06359585374593735,
"learning_rate": 0.001,
"loss": 0.7531,
"step": 154700
},
{
"epoch": 50.032320620555915,
"grad_norm": 0.6125797033309937,
"learning_rate": 0.001,
"loss": 0.6978,
"step": 154800
},
{
"epoch": 50.06464124111183,
"grad_norm": 0.09692316502332687,
"learning_rate": 0.001,
"loss": 0.7195,
"step": 154900
},
{
"epoch": 50.096961861667744,
"grad_norm": 0.6651096343994141,
"learning_rate": 0.001,
"loss": 0.7354,
"step": 155000
},
{
"epoch": 50.12928248222366,
"grad_norm": 0.3355295658111572,
"learning_rate": 0.001,
"loss": 0.7262,
"step": 155100
},
{
"epoch": 50.16160310277957,
"grad_norm": 0.36281466484069824,
"learning_rate": 0.001,
"loss": 0.7209,
"step": 155200
},
{
"epoch": 50.19392372333549,
"grad_norm": 0.5302878022193909,
"learning_rate": 0.001,
"loss": 0.7309,
"step": 155300
},
{
"epoch": 50.2262443438914,
"grad_norm": 0.20198680460453033,
"learning_rate": 0.001,
"loss": 0.7168,
"step": 155400
},
{
"epoch": 50.25856496444732,
"grad_norm": 0.5641872882843018,
"learning_rate": 0.001,
"loss": 0.7314,
"step": 155500
},
{
"epoch": 50.29088558500323,
"grad_norm": 0.1572069376707077,
"learning_rate": 0.001,
"loss": 0.7431,
"step": 155600
},
{
"epoch": 50.323206205559146,
"grad_norm": 1.641300082206726,
"learning_rate": 0.001,
"loss": 0.7391,
"step": 155700
},
{
"epoch": 50.35552682611506,
"grad_norm": 0.5732482671737671,
"learning_rate": 0.001,
"loss": 0.7487,
"step": 155800
},
{
"epoch": 50.387847446670975,
"grad_norm": 0.4709911644458771,
"learning_rate": 0.001,
"loss": 0.7426,
"step": 155900
},
{
"epoch": 50.42016806722689,
"grad_norm": 0.3790436387062073,
"learning_rate": 0.001,
"loss": 0.7581,
"step": 156000
},
{
"epoch": 50.452488687782804,
"grad_norm": 0.8265621662139893,
"learning_rate": 0.001,
"loss": 0.7376,
"step": 156100
},
{
"epoch": 50.48480930833872,
"grad_norm": 0.5220683813095093,
"learning_rate": 0.001,
"loss": 0.7474,
"step": 156200
},
{
"epoch": 50.51712992889463,
"grad_norm": 0.22513864934444427,
"learning_rate": 0.001,
"loss": 0.7492,
"step": 156300
},
{
"epoch": 50.54945054945055,
"grad_norm": 0.5701922178268433,
"learning_rate": 0.001,
"loss": 0.7517,
"step": 156400
},
{
"epoch": 50.58177117000646,
"grad_norm": 0.5959166288375854,
"learning_rate": 0.001,
"loss": 0.7521,
"step": 156500
},
{
"epoch": 50.61409179056238,
"grad_norm": 0.5421650409698486,
"learning_rate": 0.001,
"loss": 0.7633,
"step": 156600
},
{
"epoch": 50.64641241111829,
"grad_norm": 0.604699969291687,
"learning_rate": 0.001,
"loss": 0.7506,
"step": 156700
},
{
"epoch": 50.678733031674206,
"grad_norm": 0.7056938409805298,
"learning_rate": 0.001,
"loss": 0.7583,
"step": 156800
},
{
"epoch": 50.71105365223012,
"grad_norm": 0.36048054695129395,
"learning_rate": 0.001,
"loss": 0.7446,
"step": 156900
},
{
"epoch": 50.743374272786035,
"grad_norm": 1.0495373010635376,
"learning_rate": 0.001,
"loss": 0.791,
"step": 157000
},
{
"epoch": 50.77569489334195,
"grad_norm": 0.38334110379219055,
"learning_rate": 0.001,
"loss": 0.7661,
"step": 157100
},
{
"epoch": 50.808015513897864,
"grad_norm": 0.34685152769088745,
"learning_rate": 0.001,
"loss": 0.7642,
"step": 157200
},
{
"epoch": 50.84033613445378,
"grad_norm": 6.124505043029785,
"learning_rate": 0.001,
"loss": 0.7831,
"step": 157300
},
{
"epoch": 50.87265675500969,
"grad_norm": 0.3870513439178467,
"learning_rate": 0.001,
"loss": 0.7879,
"step": 157400
},
{
"epoch": 50.90497737556561,
"grad_norm": 0.3915799558162689,
"learning_rate": 0.001,
"loss": 0.7751,
"step": 157500
},
{
"epoch": 50.93729799612152,
"grad_norm": 0.4591212868690491,
"learning_rate": 0.001,
"loss": 0.7986,
"step": 157600
},
{
"epoch": 50.96961861667744,
"grad_norm": 0.3180168867111206,
"learning_rate": 0.001,
"loss": 0.7874,
"step": 157700
},
{
"epoch": 51.00193923723336,
"grad_norm": 1.6160231828689575,
"learning_rate": 0.001,
"loss": 0.804,
"step": 157800
},
{
"epoch": 51.03425985778927,
"grad_norm": 1.5989055633544922,
"learning_rate": 0.001,
"loss": 0.6946,
"step": 157900
},
{
"epoch": 51.06658047834519,
"grad_norm": 1.248809576034546,
"learning_rate": 0.001,
"loss": 0.6941,
"step": 158000
},
{
"epoch": 51.0989010989011,
"grad_norm": 1.428173303604126,
"learning_rate": 0.001,
"loss": 0.717,
"step": 158100
},
{
"epoch": 51.13122171945702,
"grad_norm": 1.5812784433364868,
"learning_rate": 0.001,
"loss": 0.7142,
"step": 158200
},
{
"epoch": 51.16354234001293,
"grad_norm": 1.3164476156234741,
"learning_rate": 0.001,
"loss": 0.6995,
"step": 158300
},
{
"epoch": 51.195862960568846,
"grad_norm": 1.545201301574707,
"learning_rate": 0.001,
"loss": 0.7093,
"step": 158400
},
{
"epoch": 51.22818358112476,
"grad_norm": 1.6235681772232056,
"learning_rate": 0.001,
"loss": 0.728,
"step": 158500
},
{
"epoch": 51.260504201680675,
"grad_norm": 1.6064990758895874,
"learning_rate": 0.001,
"loss": 0.7269,
"step": 158600
},
{
"epoch": 51.29282482223659,
"grad_norm": 1.4662895202636719,
"learning_rate": 0.001,
"loss": 0.741,
"step": 158700
},
{
"epoch": 51.325145442792504,
"grad_norm": 1.4283791780471802,
"learning_rate": 0.001,
"loss": 0.7171,
"step": 158800
},
{
"epoch": 51.35746606334842,
"grad_norm": 1.3249914646148682,
"learning_rate": 0.001,
"loss": 0.7335,
"step": 158900
},
{
"epoch": 51.38978668390433,
"grad_norm": 1.3508305549621582,
"learning_rate": 0.001,
"loss": 0.7484,
"step": 159000
},
{
"epoch": 51.42210730446025,
"grad_norm": 1.402336597442627,
"learning_rate": 0.001,
"loss": 0.7242,
"step": 159100
},
{
"epoch": 51.45442792501616,
"grad_norm": 1.5387367010116577,
"learning_rate": 0.001,
"loss": 0.7565,
"step": 159200
},
{
"epoch": 51.48674854557208,
"grad_norm": 1.280126690864563,
"learning_rate": 0.001,
"loss": 0.7443,
"step": 159300
},
{
"epoch": 51.51906916612799,
"grad_norm": 1.4007776975631714,
"learning_rate": 0.001,
"loss": 0.73,
"step": 159400
},
{
"epoch": 51.551389786683906,
"grad_norm": 1.4511324167251587,
"learning_rate": 0.001,
"loss": 0.7601,
"step": 159500
},
{
"epoch": 51.58371040723982,
"grad_norm": 1.6074110269546509,
"learning_rate": 0.001,
"loss": 0.7468,
"step": 159600
},
{
"epoch": 51.616031027795735,
"grad_norm": 1.6182364225387573,
"learning_rate": 0.001,
"loss": 0.7409,
"step": 159700
},
{
"epoch": 51.64835164835165,
"grad_norm": 1.3085405826568604,
"learning_rate": 0.001,
"loss": 0.7545,
"step": 159800
},
{
"epoch": 51.680672268907564,
"grad_norm": 1.2141468524932861,
"learning_rate": 0.001,
"loss": 0.7507,
"step": 159900
},
{
"epoch": 51.71299288946348,
"grad_norm": 1.3837734460830688,
"learning_rate": 0.001,
"loss": 0.7613,
"step": 160000
},
{
"epoch": 51.74531351001939,
"grad_norm": 1.4727295637130737,
"learning_rate": 0.001,
"loss": 0.7631,
"step": 160100
},
{
"epoch": 51.77763413057531,
"grad_norm": 1.4606034755706787,
"learning_rate": 0.001,
"loss": 0.7705,
"step": 160200
},
{
"epoch": 51.80995475113122,
"grad_norm": 1.729146957397461,
"learning_rate": 0.001,
"loss": 0.7593,
"step": 160300
},
{
"epoch": 51.84227537168714,
"grad_norm": 1.6483460664749146,
"learning_rate": 0.001,
"loss": 0.7622,
"step": 160400
},
{
"epoch": 51.87459599224305,
"grad_norm": 1.164381980895996,
"learning_rate": 0.001,
"loss": 0.7678,
"step": 160500
},
{
"epoch": 51.906916612798966,
"grad_norm": 1.6500674486160278,
"learning_rate": 0.001,
"loss": 0.7659,
"step": 160600
},
{
"epoch": 51.93923723335488,
"grad_norm": 1.311309814453125,
"learning_rate": 0.001,
"loss": 0.7597,
"step": 160700
},
{
"epoch": 51.971557853910795,
"grad_norm": 1.169773817062378,
"learning_rate": 0.001,
"loss": 0.7661,
"step": 160800
},
{
"epoch": 52.00387847446671,
"grad_norm": 1.237779140472412,
"learning_rate": 0.001,
"loss": 0.7649,
"step": 160900
},
{
"epoch": 52.036199095022624,
"grad_norm": 1.669989824295044,
"learning_rate": 0.001,
"loss": 0.697,
"step": 161000
},
{
"epoch": 52.06851971557854,
"grad_norm": 1.4660130739212036,
"learning_rate": 0.001,
"loss": 0.7028,
"step": 161100
},
{
"epoch": 52.10084033613445,
"grad_norm": 1.304290533065796,
"learning_rate": 0.001,
"loss": 0.7115,
"step": 161200
},
{
"epoch": 52.13316095669037,
"grad_norm": 1.3688267469406128,
"learning_rate": 0.001,
"loss": 0.7015,
"step": 161300
},
{
"epoch": 52.16548157724628,
"grad_norm": 1.2158695459365845,
"learning_rate": 0.001,
"loss": 0.701,
"step": 161400
},
{
"epoch": 52.1978021978022,
"grad_norm": 1.2809381484985352,
"learning_rate": 0.001,
"loss": 0.703,
"step": 161500
},
{
"epoch": 52.23012281835811,
"grad_norm": 1.359277367591858,
"learning_rate": 0.001,
"loss": 0.7179,
"step": 161600
},
{
"epoch": 52.262443438914026,
"grad_norm": 1.4113330841064453,
"learning_rate": 0.001,
"loss": 0.7142,
"step": 161700
},
{
"epoch": 52.29476405946994,
"grad_norm": 1.4368807077407837,
"learning_rate": 0.001,
"loss": 0.7208,
"step": 161800
},
{
"epoch": 52.327084680025855,
"grad_norm": 1.1833715438842773,
"learning_rate": 0.001,
"loss": 0.715,
"step": 161900
},
{
"epoch": 52.35940530058177,
"grad_norm": 1.5551220178604126,
"learning_rate": 0.001,
"loss": 0.7246,
"step": 162000
},
{
"epoch": 52.391725921137684,
"grad_norm": 1.8916435241699219,
"learning_rate": 0.001,
"loss": 0.7346,
"step": 162100
},
{
"epoch": 52.4240465416936,
"grad_norm": 1.30422842502594,
"learning_rate": 0.001,
"loss": 0.7308,
"step": 162200
},
{
"epoch": 52.456367162249514,
"grad_norm": 1.31717050075531,
"learning_rate": 0.001,
"loss": 0.7259,
"step": 162300
},
{
"epoch": 52.48868778280543,
"grad_norm": 1.4147814512252808,
"learning_rate": 0.001,
"loss": 0.742,
"step": 162400
},
{
"epoch": 52.52100840336134,
"grad_norm": 1.0690675973892212,
"learning_rate": 0.001,
"loss": 0.7292,
"step": 162500
},
{
"epoch": 52.55332902391726,
"grad_norm": 1.655393123626709,
"learning_rate": 0.001,
"loss": 0.7393,
"step": 162600
},
{
"epoch": 52.58564964447317,
"grad_norm": 1.5793589353561401,
"learning_rate": 0.001,
"loss": 0.7345,
"step": 162700
},
{
"epoch": 52.617970265029086,
"grad_norm": 1.1870840787887573,
"learning_rate": 0.001,
"loss": 0.7449,
"step": 162800
},
{
"epoch": 52.650290885585,
"grad_norm": 1.2543737888336182,
"learning_rate": 0.001,
"loss": 0.7454,
"step": 162900
},
{
"epoch": 52.682611506140915,
"grad_norm": 1.7071529626846313,
"learning_rate": 0.001,
"loss": 0.7386,
"step": 163000
},
{
"epoch": 52.71493212669683,
"grad_norm": 1.2244914770126343,
"learning_rate": 0.001,
"loss": 0.7432,
"step": 163100
},
{
"epoch": 52.747252747252745,
"grad_norm": 1.2786270380020142,
"learning_rate": 0.001,
"loss": 0.7358,
"step": 163200
},
{
"epoch": 52.77957336780866,
"grad_norm": 1.573927640914917,
"learning_rate": 0.001,
"loss": 0.7321,
"step": 163300
},
{
"epoch": 52.811893988364574,
"grad_norm": 1.7538331747055054,
"learning_rate": 0.001,
"loss": 0.7337,
"step": 163400
},
{
"epoch": 52.84421460892049,
"grad_norm": 1.616270661354065,
"learning_rate": 0.001,
"loss": 0.7482,
"step": 163500
},
{
"epoch": 52.8765352294764,
"grad_norm": 1.253475546836853,
"learning_rate": 0.001,
"loss": 0.7635,
"step": 163600
},
{
"epoch": 52.90885585003232,
"grad_norm": 1.7537295818328857,
"learning_rate": 0.001,
"loss": 0.751,
"step": 163700
},
{
"epoch": 52.94117647058823,
"grad_norm": 1.3528518676757812,
"learning_rate": 0.001,
"loss": 0.7697,
"step": 163800
},
{
"epoch": 52.97349709114415,
"grad_norm": 1.401299238204956,
"learning_rate": 0.001,
"loss": 0.7577,
"step": 163900
},
{
"epoch": 53.00581771170007,
"grad_norm": 1.3519210815429688,
"learning_rate": 0.001,
"loss": 0.7499,
"step": 164000
},
{
"epoch": 53.03813833225598,
"grad_norm": 1.4237704277038574,
"learning_rate": 0.001,
"loss": 0.6907,
"step": 164100
},
{
"epoch": 53.0704589528119,
"grad_norm": 1.6554957628250122,
"learning_rate": 0.001,
"loss": 0.6931,
"step": 164200
},
{
"epoch": 53.10277957336781,
"grad_norm": 1.322135090827942,
"learning_rate": 0.001,
"loss": 0.6639,
"step": 164300
},
{
"epoch": 53.135100193923726,
"grad_norm": 1.5357197523117065,
"learning_rate": 0.001,
"loss": 0.6789,
"step": 164400
},
{
"epoch": 53.16742081447964,
"grad_norm": 1.1936781406402588,
"learning_rate": 0.001,
"loss": 0.69,
"step": 164500
},
{
"epoch": 53.199741435035556,
"grad_norm": 1.3940258026123047,
"learning_rate": 0.001,
"loss": 0.6974,
"step": 164600
},
{
"epoch": 53.23206205559147,
"grad_norm": 1.251088261604309,
"learning_rate": 0.001,
"loss": 0.7037,
"step": 164700
},
{
"epoch": 53.264382676147385,
"grad_norm": 1.4187980890274048,
"learning_rate": 0.001,
"loss": 0.6951,
"step": 164800
},
{
"epoch": 53.2967032967033,
"grad_norm": 1.410966396331787,
"learning_rate": 0.001,
"loss": 0.7132,
"step": 164900
},
{
"epoch": 53.329023917259214,
"grad_norm": 1.2439744472503662,
"learning_rate": 0.001,
"loss": 0.6969,
"step": 165000
},
{
"epoch": 53.36134453781513,
"grad_norm": 1.6391518115997314,
"learning_rate": 0.001,
"loss": 0.6999,
"step": 165100
},
{
"epoch": 53.39366515837104,
"grad_norm": 1.2765934467315674,
"learning_rate": 0.001,
"loss": 0.7341,
"step": 165200
},
{
"epoch": 53.42598577892696,
"grad_norm": 1.314164400100708,
"learning_rate": 0.001,
"loss": 0.7119,
"step": 165300
},
{
"epoch": 53.45830639948287,
"grad_norm": 1.3228839635849,
"learning_rate": 0.001,
"loss": 0.7288,
"step": 165400
},
{
"epoch": 53.49062702003879,
"grad_norm": 1.2543978691101074,
"learning_rate": 0.001,
"loss": 0.7222,
"step": 165500
},
{
"epoch": 53.5229476405947,
"grad_norm": 1.3691924810409546,
"learning_rate": 0.001,
"loss": 0.7289,
"step": 165600
},
{
"epoch": 53.555268261150616,
"grad_norm": 1.47661292552948,
"learning_rate": 0.001,
"loss": 0.7187,
"step": 165700
},
{
"epoch": 53.58758888170653,
"grad_norm": 1.5954946279525757,
"learning_rate": 0.001,
"loss": 0.7064,
"step": 165800
},
{
"epoch": 53.619909502262445,
"grad_norm": 1.302594780921936,
"learning_rate": 0.001,
"loss": 0.7357,
"step": 165900
},
{
"epoch": 53.65223012281836,
"grad_norm": 1.3874233961105347,
"learning_rate": 0.001,
"loss": 0.7342,
"step": 166000
},
{
"epoch": 53.684550743374274,
"grad_norm": 1.2953951358795166,
"learning_rate": 0.001,
"loss": 0.7399,
"step": 166100
},
{
"epoch": 53.71687136393019,
"grad_norm": 1.5059915781021118,
"learning_rate": 0.001,
"loss": 0.7226,
"step": 166200
},
{
"epoch": 53.7491919844861,
"grad_norm": 1.6534998416900635,
"learning_rate": 0.001,
"loss": 0.7478,
"step": 166300
},
{
"epoch": 53.78151260504202,
"grad_norm": 1.4183323383331299,
"learning_rate": 0.001,
"loss": 0.744,
"step": 166400
},
{
"epoch": 53.81383322559793,
"grad_norm": 1.4309442043304443,
"learning_rate": 0.001,
"loss": 0.7421,
"step": 166500
},
{
"epoch": 53.84615384615385,
"grad_norm": 1.4040199518203735,
"learning_rate": 0.001,
"loss": 0.7511,
"step": 166600
},
{
"epoch": 53.87847446670976,
"grad_norm": 1.5059336423873901,
"learning_rate": 0.001,
"loss": 0.763,
"step": 166700
},
{
"epoch": 53.910795087265676,
"grad_norm": 1.2663861513137817,
"learning_rate": 0.001,
"loss": 0.7299,
"step": 166800
},
{
"epoch": 53.94311570782159,
"grad_norm": 1.3244277238845825,
"learning_rate": 0.001,
"loss": 0.7438,
"step": 166900
},
{
"epoch": 53.975436328377505,
"grad_norm": 1.1952475309371948,
"learning_rate": 0.001,
"loss": 0.7579,
"step": 167000
},
{
"epoch": 54.00775694893342,
"grad_norm": 1.4403356313705444,
"learning_rate": 0.001,
"loss": 0.7466,
"step": 167100
},
{
"epoch": 54.040077569489334,
"grad_norm": 1.3646568059921265,
"learning_rate": 0.001,
"loss": 0.665,
"step": 167200
},
{
"epoch": 54.07239819004525,
"grad_norm": 1.3861289024353027,
"learning_rate": 0.001,
"loss": 0.6708,
"step": 167300
},
{
"epoch": 54.10471881060116,
"grad_norm": 1.2403484582901,
"learning_rate": 0.001,
"loss": 0.6773,
"step": 167400
},
{
"epoch": 54.13703943115708,
"grad_norm": 1.4412761926651,
"learning_rate": 0.001,
"loss": 0.6714,
"step": 167500
},
{
"epoch": 54.16936005171299,
"grad_norm": 1.5241587162017822,
"learning_rate": 0.001,
"loss": 0.6872,
"step": 167600
},
{
"epoch": 54.20168067226891,
"grad_norm": 1.2935175895690918,
"learning_rate": 0.001,
"loss": 0.6931,
"step": 167700
},
{
"epoch": 54.23400129282482,
"grad_norm": 1.2425872087478638,
"learning_rate": 0.001,
"loss": 0.6821,
"step": 167800
},
{
"epoch": 54.266321913380736,
"grad_norm": 1.1844414472579956,
"learning_rate": 0.001,
"loss": 0.6979,
"step": 167900
},
{
"epoch": 54.29864253393665,
"grad_norm": 1.5796948671340942,
"learning_rate": 0.001,
"loss": 0.7203,
"step": 168000
},
{
"epoch": 54.330963154492565,
"grad_norm": 1.386918306350708,
"learning_rate": 0.001,
"loss": 0.7002,
"step": 168100
},
{
"epoch": 54.36328377504848,
"grad_norm": 1.1819859743118286,
"learning_rate": 0.001,
"loss": 0.7158,
"step": 168200
},
{
"epoch": 54.395604395604394,
"grad_norm": 1.2906848192214966,
"learning_rate": 0.001,
"loss": 0.7115,
"step": 168300
},
{
"epoch": 54.42792501616031,
"grad_norm": 1.1121623516082764,
"learning_rate": 0.001,
"loss": 0.6974,
"step": 168400
},
{
"epoch": 54.46024563671622,
"grad_norm": 1.2486132383346558,
"learning_rate": 0.001,
"loss": 0.7067,
"step": 168500
},
{
"epoch": 54.49256625727214,
"grad_norm": 1.1886978149414062,
"learning_rate": 0.001,
"loss": 0.7072,
"step": 168600
},
{
"epoch": 54.52488687782805,
"grad_norm": 1.0904563665390015,
"learning_rate": 0.001,
"loss": 0.7235,
"step": 168700
},
{
"epoch": 54.55720749838397,
"grad_norm": 1.4579226970672607,
"learning_rate": 0.001,
"loss": 0.7086,
"step": 168800
},
{
"epoch": 54.58952811893988,
"grad_norm": 1.0935107469558716,
"learning_rate": 0.001,
"loss": 0.7086,
"step": 168900
},
{
"epoch": 54.621848739495796,
"grad_norm": 1.4180753231048584,
"learning_rate": 0.001,
"loss": 0.7231,
"step": 169000
},
{
"epoch": 54.65416936005171,
"grad_norm": 1.4720892906188965,
"learning_rate": 0.001,
"loss": 0.7279,
"step": 169100
},
{
"epoch": 54.686489980607625,
"grad_norm": 1.350489616394043,
"learning_rate": 0.001,
"loss": 0.7294,
"step": 169200
},
{
"epoch": 54.71881060116354,
"grad_norm": 1.259083867073059,
"learning_rate": 0.001,
"loss": 0.7361,
"step": 169300
},
{
"epoch": 54.751131221719454,
"grad_norm": 1.6136354207992554,
"learning_rate": 0.001,
"loss": 0.7294,
"step": 169400
},
{
"epoch": 54.78345184227537,
"grad_norm": 1.4642295837402344,
"learning_rate": 0.001,
"loss": 0.7428,
"step": 169500
},
{
"epoch": 54.81577246283128,
"grad_norm": 1.4074591398239136,
"learning_rate": 0.001,
"loss": 0.7217,
"step": 169600
},
{
"epoch": 54.8480930833872,
"grad_norm": 1.2914643287658691,
"learning_rate": 0.001,
"loss": 0.7551,
"step": 169700
},
{
"epoch": 54.88041370394311,
"grad_norm": 1.2137229442596436,
"learning_rate": 0.001,
"loss": 0.7368,
"step": 169800
},
{
"epoch": 54.91273432449903,
"grad_norm": 1.177645206451416,
"learning_rate": 0.001,
"loss": 0.7293,
"step": 169900
},
{
"epoch": 54.94505494505494,
"grad_norm": 1.1865633726119995,
"learning_rate": 0.001,
"loss": 0.7241,
"step": 170000
},
{
"epoch": 54.977375565610856,
"grad_norm": 1.1472890377044678,
"learning_rate": 0.001,
"loss": 0.7466,
"step": 170100
},
{
"epoch": 55.00969618616678,
"grad_norm": 1.3177831172943115,
"learning_rate": 0.001,
"loss": 0.7035,
"step": 170200
},
{
"epoch": 55.04201680672269,
"grad_norm": 2.511430263519287,
"learning_rate": 0.001,
"loss": 0.6627,
"step": 170300
},
{
"epoch": 55.07433742727861,
"grad_norm": 1.3882372379302979,
"learning_rate": 0.001,
"loss": 0.6713,
"step": 170400
},
{
"epoch": 55.10665804783452,
"grad_norm": 1.1807273626327515,
"learning_rate": 0.001,
"loss": 0.6763,
"step": 170500
},
{
"epoch": 55.138978668390436,
"grad_norm": 1.6859544515609741,
"learning_rate": 0.001,
"loss": 0.6848,
"step": 170600
},
{
"epoch": 55.17129928894635,
"grad_norm": 1.858846664428711,
"learning_rate": 0.001,
"loss": 0.6797,
"step": 170700
},
{
"epoch": 55.203619909502265,
"grad_norm": 1.37615168094635,
"learning_rate": 0.001,
"loss": 0.6874,
"step": 170800
},
{
"epoch": 55.23594053005818,
"grad_norm": 1.270747184753418,
"learning_rate": 0.001,
"loss": 0.6824,
"step": 170900
},
{
"epoch": 55.268261150614094,
"grad_norm": 1.7557049989700317,
"learning_rate": 0.001,
"loss": 0.6876,
"step": 171000
},
{
"epoch": 55.30058177117001,
"grad_norm": 1.3425112962722778,
"learning_rate": 0.001,
"loss": 0.674,
"step": 171100
},
{
"epoch": 55.33290239172592,
"grad_norm": 1.377942681312561,
"learning_rate": 0.001,
"loss": 0.691,
"step": 171200
},
{
"epoch": 55.36522301228184,
"grad_norm": 1.1815950870513916,
"learning_rate": 0.001,
"loss": 0.6899,
"step": 171300
},
{
"epoch": 55.39754363283775,
"grad_norm": 1.3967550992965698,
"learning_rate": 0.001,
"loss": 0.6935,
"step": 171400
},
{
"epoch": 55.42986425339367,
"grad_norm": 1.5351048707962036,
"learning_rate": 0.001,
"loss": 0.6875,
"step": 171500
},
{
"epoch": 55.46218487394958,
"grad_norm": 1.2867332696914673,
"learning_rate": 0.001,
"loss": 0.6963,
"step": 171600
},
{
"epoch": 55.494505494505496,
"grad_norm": 1.6633144617080688,
"learning_rate": 0.001,
"loss": 0.707,
"step": 171700
},
{
"epoch": 55.52682611506141,
"grad_norm": 1.3871257305145264,
"learning_rate": 0.001,
"loss": 0.7089,
"step": 171800
},
{
"epoch": 55.559146735617325,
"grad_norm": 1.155198097229004,
"learning_rate": 0.001,
"loss": 0.6981,
"step": 171900
},
{
"epoch": 55.59146735617324,
"grad_norm": 1.0028072595596313,
"learning_rate": 0.001,
"loss": 0.7082,
"step": 172000
},
{
"epoch": 55.623787976729155,
"grad_norm": 1.5060408115386963,
"learning_rate": 0.001,
"loss": 0.7144,
"step": 172100
},
{
"epoch": 55.65610859728507,
"grad_norm": 1.0690950155258179,
"learning_rate": 0.001,
"loss": 0.697,
"step": 172200
},
{
"epoch": 55.688429217840984,
"grad_norm": 1.4234610795974731,
"learning_rate": 0.001,
"loss": 0.7196,
"step": 172300
},
{
"epoch": 55.7207498383969,
"grad_norm": 1.2625986337661743,
"learning_rate": 0.001,
"loss": 0.7008,
"step": 172400
},
{
"epoch": 55.75307045895281,
"grad_norm": 1.2988468408584595,
"learning_rate": 0.001,
"loss": 0.7296,
"step": 172500
},
{
"epoch": 55.78539107950873,
"grad_norm": 1.5457489490509033,
"learning_rate": 0.001,
"loss": 0.7056,
"step": 172600
},
{
"epoch": 55.81771170006464,
"grad_norm": 1.2999827861785889,
"learning_rate": 0.001,
"loss": 0.7393,
"step": 172700
},
{
"epoch": 55.85003232062056,
"grad_norm": 1.3677887916564941,
"learning_rate": 0.001,
"loss": 0.7106,
"step": 172800
},
{
"epoch": 55.88235294117647,
"grad_norm": 1.438559889793396,
"learning_rate": 0.001,
"loss": 0.7275,
"step": 172900
},
{
"epoch": 55.914673561732386,
"grad_norm": 1.15455162525177,
"learning_rate": 0.001,
"loss": 0.7265,
"step": 173000
},
{
"epoch": 55.9469941822883,
"grad_norm": 1.3578943014144897,
"learning_rate": 0.001,
"loss": 0.734,
"step": 173100
},
{
"epoch": 55.979314802844215,
"grad_norm": 1.8947703838348389,
"learning_rate": 0.001,
"loss": 0.7342,
"step": 173200
},
{
"epoch": 56.01163542340013,
"grad_norm": 1.7013978958129883,
"learning_rate": 0.001,
"loss": 0.6952,
"step": 173300
},
{
"epoch": 56.043956043956044,
"grad_norm": 1.2173410654067993,
"learning_rate": 0.001,
"loss": 0.6489,
"step": 173400
},
{
"epoch": 56.07627666451196,
"grad_norm": 1.3756160736083984,
"learning_rate": 0.001,
"loss": 0.6525,
"step": 173500
},
{
"epoch": 56.10859728506787,
"grad_norm": 1.1060224771499634,
"learning_rate": 0.001,
"loss": 0.6633,
"step": 173600
},
{
"epoch": 56.14091790562379,
"grad_norm": 1.318628191947937,
"learning_rate": 0.001,
"loss": 0.6619,
"step": 173700
},
{
"epoch": 56.1732385261797,
"grad_norm": 1.0686438083648682,
"learning_rate": 0.001,
"loss": 0.6631,
"step": 173800
},
{
"epoch": 56.20555914673562,
"grad_norm": 1.1766424179077148,
"learning_rate": 0.001,
"loss": 0.6583,
"step": 173900
},
{
"epoch": 56.23787976729153,
"grad_norm": 1.3714375495910645,
"learning_rate": 0.001,
"loss": 0.7061,
"step": 174000
},
{
"epoch": 56.270200387847446,
"grad_norm": 1.0865048170089722,
"learning_rate": 0.001,
"loss": 0.6906,
"step": 174100
},
{
"epoch": 56.30252100840336,
"grad_norm": 1.3435384035110474,
"learning_rate": 0.001,
"loss": 0.6932,
"step": 174200
},
{
"epoch": 56.334841628959275,
"grad_norm": 1.2520074844360352,
"learning_rate": 0.001,
"loss": 0.6673,
"step": 174300
},
{
"epoch": 56.36716224951519,
"grad_norm": 1.3908581733703613,
"learning_rate": 0.001,
"loss": 0.6668,
"step": 174400
},
{
"epoch": 56.399482870071104,
"grad_norm": 1.0985134840011597,
"learning_rate": 0.001,
"loss": 0.6863,
"step": 174500
},
{
"epoch": 56.43180349062702,
"grad_norm": 1.246180772781372,
"learning_rate": 0.001,
"loss": 0.6887,
"step": 174600
},
{
"epoch": 56.46412411118293,
"grad_norm": 1.1640903949737549,
"learning_rate": 0.001,
"loss": 0.6972,
"step": 174700
},
{
"epoch": 56.49644473173885,
"grad_norm": 1.4162952899932861,
"learning_rate": 0.001,
"loss": 0.7061,
"step": 174800
},
{
"epoch": 56.52876535229476,
"grad_norm": 1.4577606916427612,
"learning_rate": 0.001,
"loss": 0.6915,
"step": 174900
},
{
"epoch": 56.56108597285068,
"grad_norm": 1.6326485872268677,
"learning_rate": 0.001,
"loss": 0.69,
"step": 175000
},
{
"epoch": 56.59340659340659,
"grad_norm": 1.6330126523971558,
"learning_rate": 0.001,
"loss": 0.698,
"step": 175100
},
{
"epoch": 56.625727213962506,
"grad_norm": 1.5364458560943604,
"learning_rate": 0.001,
"loss": 0.6766,
"step": 175200
},
{
"epoch": 56.65804783451842,
"grad_norm": 1.5034723281860352,
"learning_rate": 0.001,
"loss": 0.702,
"step": 175300
},
{
"epoch": 56.690368455074335,
"grad_norm": 1.1095960140228271,
"learning_rate": 0.001,
"loss": 0.7051,
"step": 175400
},
{
"epoch": 56.72268907563025,
"grad_norm": 1.692766785621643,
"learning_rate": 0.001,
"loss": 0.711,
"step": 175500
},
{
"epoch": 56.755009696186164,
"grad_norm": 1.2210887670516968,
"learning_rate": 0.001,
"loss": 0.6972,
"step": 175600
},
{
"epoch": 56.78733031674208,
"grad_norm": 1.6064265966415405,
"learning_rate": 0.001,
"loss": 0.7076,
"step": 175700
},
{
"epoch": 56.81965093729799,
"grad_norm": 1.4132839441299438,
"learning_rate": 0.001,
"loss": 0.7206,
"step": 175800
},
{
"epoch": 56.85197155785391,
"grad_norm": 1.3864809274673462,
"learning_rate": 0.001,
"loss": 0.7339,
"step": 175900
},
{
"epoch": 56.88429217840982,
"grad_norm": 1.4417805671691895,
"learning_rate": 0.001,
"loss": 0.7262,
"step": 176000
},
{
"epoch": 56.91661279896574,
"grad_norm": 1.3630573749542236,
"learning_rate": 0.001,
"loss": 0.7211,
"step": 176100
},
{
"epoch": 56.94893341952165,
"grad_norm": 1.3025026321411133,
"learning_rate": 0.001,
"loss": 0.7153,
"step": 176200
},
{
"epoch": 56.981254040077566,
"grad_norm": 1.1699614524841309,
"learning_rate": 0.001,
"loss": 0.7156,
"step": 176300
},
{
"epoch": 57.01357466063349,
"grad_norm": 1.3091782331466675,
"learning_rate": 0.001,
"loss": 0.6699,
"step": 176400
},
{
"epoch": 57.0458952811894,
"grad_norm": 1.0730043649673462,
"learning_rate": 0.001,
"loss": 0.6543,
"step": 176500
},
{
"epoch": 57.07821590174532,
"grad_norm": 1.6480448246002197,
"learning_rate": 0.001,
"loss": 0.6499,
"step": 176600
},
{
"epoch": 57.11053652230123,
"grad_norm": 1.1549612283706665,
"learning_rate": 0.001,
"loss": 0.6457,
"step": 176700
},
{
"epoch": 57.142857142857146,
"grad_norm": 0.9740864634513855,
"learning_rate": 0.001,
"loss": 0.6583,
"step": 176800
},
{
"epoch": 57.17517776341306,
"grad_norm": 1.0623377561569214,
"learning_rate": 0.001,
"loss": 0.6563,
"step": 176900
},
{
"epoch": 57.207498383968975,
"grad_norm": 1.1579992771148682,
"learning_rate": 0.001,
"loss": 0.6723,
"step": 177000
},
{
"epoch": 57.23981900452489,
"grad_norm": 1.4379750490188599,
"learning_rate": 0.001,
"loss": 0.6672,
"step": 177100
},
{
"epoch": 57.272139625080804,
"grad_norm": 1.191053032875061,
"learning_rate": 0.001,
"loss": 0.6594,
"step": 177200
},
{
"epoch": 57.30446024563672,
"grad_norm": 1.3507729768753052,
"learning_rate": 0.001,
"loss": 0.6797,
"step": 177300
},
{
"epoch": 57.33678086619263,
"grad_norm": 1.240713119506836,
"learning_rate": 0.001,
"loss": 0.6605,
"step": 177400
},
{
"epoch": 57.36910148674855,
"grad_norm": 1.1125223636627197,
"learning_rate": 0.001,
"loss": 0.6766,
"step": 177500
},
{
"epoch": 57.40142210730446,
"grad_norm": 1.2132062911987305,
"learning_rate": 0.001,
"loss": 0.6815,
"step": 177600
},
{
"epoch": 57.43374272786038,
"grad_norm": 1.2277776002883911,
"learning_rate": 0.001,
"loss": 0.684,
"step": 177700
},
{
"epoch": 57.46606334841629,
"grad_norm": 1.0552458763122559,
"learning_rate": 0.001,
"loss": 0.6758,
"step": 177800
},
{
"epoch": 57.498383968972206,
"grad_norm": 1.1401628255844116,
"learning_rate": 0.001,
"loss": 0.685,
"step": 177900
},
{
"epoch": 57.53070458952812,
"grad_norm": 1.225094199180603,
"learning_rate": 0.001,
"loss": 0.6842,
"step": 178000
},
{
"epoch": 57.563025210084035,
"grad_norm": 1.2406818866729736,
"learning_rate": 0.001,
"loss": 0.695,
"step": 178100
},
{
"epoch": 57.59534583063995,
"grad_norm": 0.9722842574119568,
"learning_rate": 0.001,
"loss": 0.6877,
"step": 178200
},
{
"epoch": 57.627666451195864,
"grad_norm": 1.3157597780227661,
"learning_rate": 0.001,
"loss": 0.7033,
"step": 178300
},
{
"epoch": 57.65998707175178,
"grad_norm": 1.02400803565979,
"learning_rate": 0.001,
"loss": 0.6946,
"step": 178400
},
{
"epoch": 57.69230769230769,
"grad_norm": 1.0910509824752808,
"learning_rate": 0.001,
"loss": 0.7019,
"step": 178500
},
{
"epoch": 57.72462831286361,
"grad_norm": 1.3276338577270508,
"learning_rate": 0.001,
"loss": 0.7062,
"step": 178600
},
{
"epoch": 57.75694893341952,
"grad_norm": 1.2191648483276367,
"learning_rate": 0.001,
"loss": 0.6972,
"step": 178700
},
{
"epoch": 57.78926955397544,
"grad_norm": 1.43450129032135,
"learning_rate": 0.001,
"loss": 0.706,
"step": 178800
},
{
"epoch": 57.82159017453135,
"grad_norm": 1.2280073165893555,
"learning_rate": 0.001,
"loss": 0.6939,
"step": 178900
},
{
"epoch": 57.853910795087266,
"grad_norm": 1.187094807624817,
"learning_rate": 0.001,
"loss": 0.6895,
"step": 179000
},
{
"epoch": 57.88623141564318,
"grad_norm": 1.1127578020095825,
"learning_rate": 0.001,
"loss": 0.709,
"step": 179100
},
{
"epoch": 57.918552036199095,
"grad_norm": 1.1391856670379639,
"learning_rate": 0.001,
"loss": 0.7105,
"step": 179200
},
{
"epoch": 57.95087265675501,
"grad_norm": 1.3863335847854614,
"learning_rate": 0.001,
"loss": 0.721,
"step": 179300
},
{
"epoch": 57.983193277310924,
"grad_norm": 0.9819924235343933,
"learning_rate": 0.001,
"loss": 0.7063,
"step": 179400
},
{
"epoch": 58.01551389786684,
"grad_norm": 0.818321168422699,
"learning_rate": 0.001,
"loss": 0.6418,
"step": 179500
},
{
"epoch": 58.04783451842275,
"grad_norm": 0.7625473737716675,
"learning_rate": 0.001,
"loss": 0.6364,
"step": 179600
},
{
"epoch": 58.08015513897867,
"grad_norm": 0.5951278209686279,
"learning_rate": 0.001,
"loss": 0.6252,
"step": 179700
},
{
"epoch": 58.11247575953458,
"grad_norm": 0.5998912453651428,
"learning_rate": 0.001,
"loss": 0.6378,
"step": 179800
},
{
"epoch": 58.1447963800905,
"grad_norm": 1.1031502485275269,
"learning_rate": 0.001,
"loss": 0.6487,
"step": 179900
},
{
"epoch": 58.17711700064641,
"grad_norm": 0.686004102230072,
"learning_rate": 0.001,
"loss": 0.6581,
"step": 180000
},
{
"epoch": 58.209437621202326,
"grad_norm": 0.9831950068473816,
"learning_rate": 0.001,
"loss": 0.6453,
"step": 180100
},
{
"epoch": 58.24175824175824,
"grad_norm": 0.9751262664794922,
"learning_rate": 0.001,
"loss": 0.6528,
"step": 180200
},
{
"epoch": 58.274078862314155,
"grad_norm": 0.9873791337013245,
"learning_rate": 0.001,
"loss": 0.6597,
"step": 180300
},
{
"epoch": 58.30639948287007,
"grad_norm": 0.6696386933326721,
"learning_rate": 0.001,
"loss": 0.6569,
"step": 180400
},
{
"epoch": 58.338720103425985,
"grad_norm": 0.8646842837333679,
"learning_rate": 0.001,
"loss": 0.6658,
"step": 180500
},
{
"epoch": 58.3710407239819,
"grad_norm": 0.4439694881439209,
"learning_rate": 0.001,
"loss": 0.6511,
"step": 180600
},
{
"epoch": 58.403361344537814,
"grad_norm": 0.6436874270439148,
"learning_rate": 0.001,
"loss": 0.6722,
"step": 180700
},
{
"epoch": 58.43568196509373,
"grad_norm": 1.4539772272109985,
"learning_rate": 0.001,
"loss": 0.6739,
"step": 180800
},
{
"epoch": 58.46800258564964,
"grad_norm": 1.248949408531189,
"learning_rate": 0.001,
"loss": 0.6787,
"step": 180900
},
{
"epoch": 58.50032320620556,
"grad_norm": 0.9955918788909912,
"learning_rate": 0.001,
"loss": 0.6826,
"step": 181000
},
{
"epoch": 58.53264382676147,
"grad_norm": 1.4825260639190674,
"learning_rate": 0.001,
"loss": 0.68,
"step": 181100
},
{
"epoch": 58.56496444731739,
"grad_norm": 1.0058106184005737,
"learning_rate": 0.001,
"loss": 0.6706,
"step": 181200
},
{
"epoch": 58.5972850678733,
"grad_norm": 1.0586559772491455,
"learning_rate": 0.001,
"loss": 0.6747,
"step": 181300
},
{
"epoch": 58.629605688429216,
"grad_norm": 0.8397805094718933,
"learning_rate": 0.001,
"loss": 0.6897,
"step": 181400
},
{
"epoch": 58.66192630898513,
"grad_norm": 0.8589861392974854,
"learning_rate": 0.001,
"loss": 0.6802,
"step": 181500
},
{
"epoch": 58.694246929541045,
"grad_norm": 0.6333179473876953,
"learning_rate": 0.001,
"loss": 0.7055,
"step": 181600
},
{
"epoch": 58.72656755009696,
"grad_norm": 0.7798647284507751,
"learning_rate": 0.001,
"loss": 0.6974,
"step": 181700
},
{
"epoch": 58.758888170652874,
"grad_norm": 1.4674975872039795,
"learning_rate": 0.001,
"loss": 0.6912,
"step": 181800
},
{
"epoch": 58.79120879120879,
"grad_norm": 0.7736033201217651,
"learning_rate": 0.001,
"loss": 0.6948,
"step": 181900
},
{
"epoch": 58.8235294117647,
"grad_norm": 0.7464256882667542,
"learning_rate": 0.001,
"loss": 0.6924,
"step": 182000
},
{
"epoch": 58.85585003232062,
"grad_norm": 0.839506983757019,
"learning_rate": 0.001,
"loss": 0.6994,
"step": 182100
},
{
"epoch": 58.88817065287653,
"grad_norm": 0.6937113404273987,
"learning_rate": 0.001,
"loss": 0.6914,
"step": 182200
},
{
"epoch": 58.92049127343245,
"grad_norm": 0.8933976888656616,
"learning_rate": 0.001,
"loss": 0.715,
"step": 182300
},
{
"epoch": 58.95281189398836,
"grad_norm": 0.8206430673599243,
"learning_rate": 0.001,
"loss": 0.707,
"step": 182400
},
{
"epoch": 58.985132514544276,
"grad_norm": 0.863739013671875,
"learning_rate": 0.001,
"loss": 0.7061,
"step": 182500
},
{
"epoch": 59.0174531351002,
"grad_norm": 1.6361074447631836,
"learning_rate": 0.001,
"loss": 0.6643,
"step": 182600
},
{
"epoch": 59.04977375565611,
"grad_norm": 1.581034779548645,
"learning_rate": 0.001,
"loss": 0.6275,
"step": 182700
},
{
"epoch": 59.08209437621203,
"grad_norm": 1.680957317352295,
"learning_rate": 0.001,
"loss": 0.6349,
"step": 182800
},
{
"epoch": 59.11441499676794,
"grad_norm": 1.7037603855133057,
"learning_rate": 0.001,
"loss": 0.634,
"step": 182900
},
{
"epoch": 59.146735617323856,
"grad_norm": 1.5957320928573608,
"learning_rate": 0.001,
"loss": 0.6395,
"step": 183000
},
{
"epoch": 59.17905623787977,
"grad_norm": 1.8327863216400146,
"learning_rate": 0.001,
"loss": 0.6531,
"step": 183100
},
{
"epoch": 59.211376858435685,
"grad_norm": 1.845932960510254,
"learning_rate": 0.001,
"loss": 0.6402,
"step": 183200
},
{
"epoch": 59.2436974789916,
"grad_norm": 1.7632895708084106,
"learning_rate": 0.001,
"loss": 0.6557,
"step": 183300
},
{
"epoch": 59.276018099547514,
"grad_norm": 1.6277376413345337,
"learning_rate": 0.001,
"loss": 0.6491,
"step": 183400
},
{
"epoch": 59.30833872010343,
"grad_norm": 1.7220267057418823,
"learning_rate": 0.001,
"loss": 0.6503,
"step": 183500
},
{
"epoch": 59.34065934065934,
"grad_norm": 1.6623505353927612,
"learning_rate": 0.001,
"loss": 0.6621,
"step": 183600
},
{
"epoch": 59.37297996121526,
"grad_norm": 1.4514002799987793,
"learning_rate": 0.001,
"loss": 0.6541,
"step": 183700
},
{
"epoch": 59.40530058177117,
"grad_norm": 1.77423095703125,
"learning_rate": 0.001,
"loss": 0.6583,
"step": 183800
},
{
"epoch": 59.43762120232709,
"grad_norm": 1.649661898612976,
"learning_rate": 0.001,
"loss": 0.6752,
"step": 183900
},
{
"epoch": 59.469941822883,
"grad_norm": 1.8120254278182983,
"learning_rate": 0.001,
"loss": 0.6613,
"step": 184000
},
{
"epoch": 59.502262443438916,
"grad_norm": 1.68980872631073,
"learning_rate": 0.001,
"loss": 0.6742,
"step": 184100
},
{
"epoch": 59.53458306399483,
"grad_norm": 1.6394644975662231,
"learning_rate": 0.001,
"loss": 0.6718,
"step": 184200
},
{
"epoch": 59.566903684550745,
"grad_norm": 1.7204967737197876,
"learning_rate": 0.001,
"loss": 0.6731,
"step": 184300
},
{
"epoch": 59.59922430510666,
"grad_norm": 1.4989378452301025,
"learning_rate": 0.001,
"loss": 0.6842,
"step": 184400
},
{
"epoch": 59.631544925662574,
"grad_norm": 1.6579501628875732,
"learning_rate": 0.001,
"loss": 0.6813,
"step": 184500
},
{
"epoch": 59.66386554621849,
"grad_norm": 1.2888191938400269,
"learning_rate": 0.001,
"loss": 0.6843,
"step": 184600
},
{
"epoch": 59.6961861667744,
"grad_norm": 1.838328242301941,
"learning_rate": 0.001,
"loss": 0.6749,
"step": 184700
},
{
"epoch": 59.72850678733032,
"grad_norm": 1.8516156673431396,
"learning_rate": 0.001,
"loss": 0.6793,
"step": 184800
},
{
"epoch": 59.76082740788623,
"grad_norm": 1.3773984909057617,
"learning_rate": 0.001,
"loss": 0.6932,
"step": 184900
},
{
"epoch": 59.79314802844215,
"grad_norm": 1.4964762926101685,
"learning_rate": 0.001,
"loss": 0.6755,
"step": 185000
},
{
"epoch": 59.82546864899806,
"grad_norm": 1.436951994895935,
"learning_rate": 0.001,
"loss": 0.6887,
"step": 185100
},
{
"epoch": 59.857789269553976,
"grad_norm": 1.999688982963562,
"learning_rate": 0.001,
"loss": 0.6755,
"step": 185200
},
{
"epoch": 59.89010989010989,
"grad_norm": 2.138962507247925,
"learning_rate": 0.001,
"loss": 0.6728,
"step": 185300
},
{
"epoch": 59.922430510665805,
"grad_norm": 1.625344157218933,
"learning_rate": 0.001,
"loss": 0.6871,
"step": 185400
},
{
"epoch": 59.95475113122172,
"grad_norm": 1.556227445602417,
"learning_rate": 0.001,
"loss": 0.692,
"step": 185500
},
{
"epoch": 59.987071751777634,
"grad_norm": 1.5213489532470703,
"learning_rate": 0.001,
"loss": 0.6942,
"step": 185600
},
{
"epoch": 60.01939237233355,
"grad_norm": 1.5821621417999268,
"learning_rate": 0.001,
"loss": 0.6547,
"step": 185700
},
{
"epoch": 60.05171299288946,
"grad_norm": 1.2159498929977417,
"learning_rate": 0.001,
"loss": 0.6171,
"step": 185800
},
{
"epoch": 60.08403361344538,
"grad_norm": 1.3063063621520996,
"learning_rate": 0.001,
"loss": 0.6401,
"step": 185900
},
{
"epoch": 60.11635423400129,
"grad_norm": 1.501042127609253,
"learning_rate": 0.001,
"loss": 0.6274,
"step": 186000
},
{
"epoch": 60.14867485455721,
"grad_norm": 1.4862663745880127,
"learning_rate": 0.001,
"loss": 0.6215,
"step": 186100
},
{
"epoch": 60.18099547511312,
"grad_norm": 1.3393819332122803,
"learning_rate": 0.001,
"loss": 0.6366,
"step": 186200
},
{
"epoch": 60.213316095669036,
"grad_norm": 1.5676498413085938,
"learning_rate": 0.001,
"loss": 0.6364,
"step": 186300
},
{
"epoch": 60.24563671622495,
"grad_norm": 1.6434075832366943,
"learning_rate": 0.001,
"loss": 0.6319,
"step": 186400
},
{
"epoch": 60.277957336780865,
"grad_norm": 1.2878286838531494,
"learning_rate": 0.001,
"loss": 0.6592,
"step": 186500
},
{
"epoch": 60.31027795733678,
"grad_norm": 1.3613319396972656,
"learning_rate": 0.001,
"loss": 0.647,
"step": 186600
},
{
"epoch": 60.342598577892694,
"grad_norm": 1.654203176498413,
"learning_rate": 0.001,
"loss": 0.6508,
"step": 186700
},
{
"epoch": 60.37491919844861,
"grad_norm": 1.6589421033859253,
"learning_rate": 0.001,
"loss": 0.6401,
"step": 186800
},
{
"epoch": 60.40723981900452,
"grad_norm": 1.7555818557739258,
"learning_rate": 0.001,
"loss": 0.6528,
"step": 186900
},
{
"epoch": 60.43956043956044,
"grad_norm": 1.5702903270721436,
"learning_rate": 0.001,
"loss": 0.6553,
"step": 187000
},
{
"epoch": 60.47188106011635,
"grad_norm": 1.3548332452774048,
"learning_rate": 0.001,
"loss": 0.6411,
"step": 187100
},
{
"epoch": 60.50420168067227,
"grad_norm": 1.6495980024337769,
"learning_rate": 0.001,
"loss": 0.6673,
"step": 187200
},
{
"epoch": 60.53652230122818,
"grad_norm": 1.319981336593628,
"learning_rate": 0.001,
"loss": 0.6493,
"step": 187300
},
{
"epoch": 60.568842921784096,
"grad_norm": 1.2345103025436401,
"learning_rate": 0.001,
"loss": 0.6848,
"step": 187400
},
{
"epoch": 60.60116354234001,
"grad_norm": 1.2018239498138428,
"learning_rate": 0.001,
"loss": 0.6668,
"step": 187500
},
{
"epoch": 60.633484162895925,
"grad_norm": 1.442687749862671,
"learning_rate": 0.001,
"loss": 0.6462,
"step": 187600
},
{
"epoch": 60.66580478345184,
"grad_norm": 1.4780453443527222,
"learning_rate": 0.001,
"loss": 0.6695,
"step": 187700
},
{
"epoch": 60.698125404007754,
"grad_norm": 1.3161441087722778,
"learning_rate": 0.001,
"loss": 0.6759,
"step": 187800
},
{
"epoch": 60.73044602456367,
"grad_norm": 1.5663788318634033,
"learning_rate": 0.001,
"loss": 0.6783,
"step": 187900
},
{
"epoch": 60.762766645119584,
"grad_norm": 1.2903250455856323,
"learning_rate": 0.001,
"loss": 0.6802,
"step": 188000
},
{
"epoch": 60.7950872656755,
"grad_norm": 1.2310444116592407,
"learning_rate": 0.001,
"loss": 0.6798,
"step": 188100
},
{
"epoch": 60.82740788623141,
"grad_norm": 1.6267993450164795,
"learning_rate": 0.001,
"loss": 0.6747,
"step": 188200
},
{
"epoch": 60.85972850678733,
"grad_norm": 1.3514257669448853,
"learning_rate": 0.001,
"loss": 0.6954,
"step": 188300
},
{
"epoch": 60.89204912734324,
"grad_norm": 1.3700529336929321,
"learning_rate": 0.001,
"loss": 0.6921,
"step": 188400
},
{
"epoch": 60.924369747899156,
"grad_norm": 1.3592313528060913,
"learning_rate": 0.001,
"loss": 0.6949,
"step": 188500
},
{
"epoch": 60.95669036845507,
"grad_norm": 1.6864216327667236,
"learning_rate": 0.001,
"loss": 0.693,
"step": 188600
},
{
"epoch": 60.98901098901099,
"grad_norm": 1.476345181465149,
"learning_rate": 0.001,
"loss": 0.679,
"step": 188700
},
{
"epoch": 61.02133160956691,
"grad_norm": 1.2383800745010376,
"learning_rate": 0.001,
"loss": 0.6467,
"step": 188800
},
{
"epoch": 61.05365223012282,
"grad_norm": 1.6404695510864258,
"learning_rate": 0.001,
"loss": 0.617,
"step": 188900
},
{
"epoch": 61.085972850678736,
"grad_norm": 1.4813312292099,
"learning_rate": 0.001,
"loss": 0.612,
"step": 189000
},
{
"epoch": 61.11829347123465,
"grad_norm": 1.823527455329895,
"learning_rate": 0.001,
"loss": 0.6325,
"step": 189100
},
{
"epoch": 61.150614091790565,
"grad_norm": 1.2506108283996582,
"learning_rate": 0.001,
"loss": 0.6095,
"step": 189200
},
{
"epoch": 61.18293471234648,
"grad_norm": 1.376138687133789,
"learning_rate": 0.001,
"loss": 0.6224,
"step": 189300
},
{
"epoch": 61.215255332902395,
"grad_norm": 1.1488109827041626,
"learning_rate": 0.001,
"loss": 0.6249,
"step": 189400
},
{
"epoch": 61.24757595345831,
"grad_norm": 1.6473678350448608,
"learning_rate": 0.001,
"loss": 0.6276,
"step": 189500
},
{
"epoch": 61.279896574014224,
"grad_norm": 1.2410812377929688,
"learning_rate": 0.001,
"loss": 0.6226,
"step": 189600
},
{
"epoch": 61.31221719457014,
"grad_norm": 10.854134559631348,
"learning_rate": 0.001,
"loss": 0.652,
"step": 189700
},
{
"epoch": 61.34453781512605,
"grad_norm": 2.2806782722473145,
"learning_rate": 0.001,
"loss": 0.6421,
"step": 189800
},
{
"epoch": 61.37685843568197,
"grad_norm": 1.4156832695007324,
"learning_rate": 0.001,
"loss": 0.6338,
"step": 189900
},
{
"epoch": 61.40917905623788,
"grad_norm": 1.337144374847412,
"learning_rate": 0.001,
"loss": 0.65,
"step": 190000
},
{
"epoch": 61.441499676793796,
"grad_norm": 1.3378078937530518,
"learning_rate": 0.001,
"loss": 0.6391,
"step": 190100
},
{
"epoch": 61.47382029734971,
"grad_norm": 1.5442296266555786,
"learning_rate": 0.001,
"loss": 0.6588,
"step": 190200
},
{
"epoch": 61.506140917905626,
"grad_norm": 1.5696725845336914,
"learning_rate": 0.001,
"loss": 0.6621,
"step": 190300
},
{
"epoch": 61.53846153846154,
"grad_norm": 1.3986225128173828,
"learning_rate": 0.001,
"loss": 0.6463,
"step": 190400
},
{
"epoch": 61.570782159017455,
"grad_norm": 1.4197642803192139,
"learning_rate": 0.001,
"loss": 0.6643,
"step": 190500
},
{
"epoch": 61.60310277957337,
"grad_norm": 1.4512290954589844,
"learning_rate": 0.001,
"loss": 0.6533,
"step": 190600
},
{
"epoch": 61.635423400129284,
"grad_norm": 1.717221975326538,
"learning_rate": 0.001,
"loss": 0.6638,
"step": 190700
},
{
"epoch": 61.6677440206852,
"grad_norm": 1.4332921504974365,
"learning_rate": 0.001,
"loss": 0.6626,
"step": 190800
},
{
"epoch": 61.70006464124111,
"grad_norm": 1.781604290008545,
"learning_rate": 0.001,
"loss": 0.6637,
"step": 190900
},
{
"epoch": 61.73238526179703,
"grad_norm": 1.6162261962890625,
"learning_rate": 0.001,
"loss": 0.6542,
"step": 191000
},
{
"epoch": 61.76470588235294,
"grad_norm": 1.3693976402282715,
"learning_rate": 0.001,
"loss": 0.6713,
"step": 191100
},
{
"epoch": 61.79702650290886,
"grad_norm": 1.339870810508728,
"learning_rate": 0.001,
"loss": 0.667,
"step": 191200
},
{
"epoch": 61.82934712346477,
"grad_norm": 1.6282362937927246,
"learning_rate": 0.001,
"loss": 0.677,
"step": 191300
},
{
"epoch": 61.861667744020686,
"grad_norm": 1.3749769926071167,
"learning_rate": 0.001,
"loss": 0.6788,
"step": 191400
},
{
"epoch": 61.8939883645766,
"grad_norm": 1.6163561344146729,
"learning_rate": 0.001,
"loss": 0.6856,
"step": 191500
},
{
"epoch": 61.926308985132515,
"grad_norm": 1.7528187036514282,
"learning_rate": 0.001,
"loss": 0.6811,
"step": 191600
},
{
"epoch": 61.95862960568843,
"grad_norm": 1.3337574005126953,
"learning_rate": 0.001,
"loss": 0.6813,
"step": 191700
},
{
"epoch": 61.990950226244344,
"grad_norm": 1.4761725664138794,
"learning_rate": 0.001,
"loss": 0.688,
"step": 191800
},
{
"epoch": 62.02327084680026,
"grad_norm": 1.1570292711257935,
"learning_rate": 0.001,
"loss": 0.634,
"step": 191900
},
{
"epoch": 62.05559146735617,
"grad_norm": 1.2674442529678345,
"learning_rate": 0.001,
"loss": 0.6063,
"step": 192000
},
{
"epoch": 62.08791208791209,
"grad_norm": 1.3792743682861328,
"learning_rate": 0.001,
"loss": 0.6035,
"step": 192100
},
{
"epoch": 62.120232708468,
"grad_norm": 1.5314244031906128,
"learning_rate": 0.001,
"loss": 0.6136,
"step": 192200
},
{
"epoch": 62.15255332902392,
"grad_norm": 1.6034588813781738,
"learning_rate": 0.001,
"loss": 0.6097,
"step": 192300
},
{
"epoch": 62.18487394957983,
"grad_norm": 1.53525972366333,
"learning_rate": 0.001,
"loss": 0.6319,
"step": 192400
},
{
"epoch": 62.217194570135746,
"grad_norm": 1.1694210767745972,
"learning_rate": 0.001,
"loss": 0.6177,
"step": 192500
},
{
"epoch": 62.24951519069166,
"grad_norm": 1.1147392988204956,
"learning_rate": 0.001,
"loss": 0.6296,
"step": 192600
},
{
"epoch": 62.281835811247575,
"grad_norm": 2.554081916809082,
"learning_rate": 0.001,
"loss": 0.6303,
"step": 192700
},
{
"epoch": 62.31415643180349,
"grad_norm": 1.8635549545288086,
"learning_rate": 0.001,
"loss": 0.6389,
"step": 192800
},
{
"epoch": 62.346477052359404,
"grad_norm": 1.2993345260620117,
"learning_rate": 0.001,
"loss": 0.6266,
"step": 192900
},
{
"epoch": 62.37879767291532,
"grad_norm": 1.4036643505096436,
"learning_rate": 0.001,
"loss": 0.6352,
"step": 193000
},
{
"epoch": 62.41111829347123,
"grad_norm": 1.7309150695800781,
"learning_rate": 0.001,
"loss": 0.6386,
"step": 193100
},
{
"epoch": 62.44343891402715,
"grad_norm": 1.4345393180847168,
"learning_rate": 0.001,
"loss": 0.6329,
"step": 193200
},
{
"epoch": 62.47575953458306,
"grad_norm": 1.1931675672531128,
"learning_rate": 0.001,
"loss": 0.6399,
"step": 193300
},
{
"epoch": 62.50808015513898,
"grad_norm": 1.2835602760314941,
"learning_rate": 0.001,
"loss": 0.636,
"step": 193400
},
{
"epoch": 62.54040077569489,
"grad_norm": 1.2109442949295044,
"learning_rate": 0.001,
"loss": 0.6593,
"step": 193500
},
{
"epoch": 62.572721396250806,
"grad_norm": 1.3101767301559448,
"learning_rate": 0.001,
"loss": 0.6553,
"step": 193600
},
{
"epoch": 62.60504201680672,
"grad_norm": 1.3763705492019653,
"learning_rate": 0.001,
"loss": 0.6493,
"step": 193700
},
{
"epoch": 62.637362637362635,
"grad_norm": 1.2936313152313232,
"learning_rate": 0.001,
"loss": 0.6668,
"step": 193800
},
{
"epoch": 62.66968325791855,
"grad_norm": 1.3182551860809326,
"learning_rate": 0.001,
"loss": 0.6578,
"step": 193900
},
{
"epoch": 62.702003878474464,
"grad_norm": 1.171347737312317,
"learning_rate": 0.001,
"loss": 0.6648,
"step": 194000
},
{
"epoch": 62.73432449903038,
"grad_norm": 1.108046293258667,
"learning_rate": 0.001,
"loss": 0.6601,
"step": 194100
},
{
"epoch": 62.76664511958629,
"grad_norm": 1.2180066108703613,
"learning_rate": 0.001,
"loss": 0.662,
"step": 194200
},
{
"epoch": 62.79896574014221,
"grad_norm": 1.196394443511963,
"learning_rate": 0.001,
"loss": 0.6833,
"step": 194300
},
{
"epoch": 62.83128636069812,
"grad_norm": 1.340371012687683,
"learning_rate": 0.001,
"loss": 0.6584,
"step": 194400
},
{
"epoch": 62.86360698125404,
"grad_norm": 1.432305932044983,
"learning_rate": 0.001,
"loss": 0.6577,
"step": 194500
},
{
"epoch": 62.89592760180995,
"grad_norm": 1.041042447090149,
"learning_rate": 0.001,
"loss": 0.6583,
"step": 194600
},
{
"epoch": 62.928248222365866,
"grad_norm": 1.2910453081130981,
"learning_rate": 0.001,
"loss": 0.6918,
"step": 194700
},
{
"epoch": 62.96056884292178,
"grad_norm": 1.4799001216888428,
"learning_rate": 0.001,
"loss": 0.6755,
"step": 194800
},
{
"epoch": 62.992889463477695,
"grad_norm": 1.722066879272461,
"learning_rate": 0.001,
"loss": 0.6661,
"step": 194900
},
{
"epoch": 63.02521008403362,
"grad_norm": 1.3788399696350098,
"learning_rate": 0.001,
"loss": 0.6136,
"step": 195000
},
{
"epoch": 63.05753070458953,
"grad_norm": 1.6480493545532227,
"learning_rate": 0.001,
"loss": 0.608,
"step": 195100
},
{
"epoch": 63.089851325145446,
"grad_norm": 1.5709948539733887,
"learning_rate": 0.001,
"loss": 0.6036,
"step": 195200
},
{
"epoch": 63.12217194570136,
"grad_norm": 1.4029523134231567,
"learning_rate": 0.001,
"loss": 0.6171,
"step": 195300
},
{
"epoch": 63.154492566257275,
"grad_norm": 1.2338638305664062,
"learning_rate": 0.001,
"loss": 0.6006,
"step": 195400
},
{
"epoch": 63.18681318681319,
"grad_norm": 1.1037108898162842,
"learning_rate": 0.001,
"loss": 0.6153,
"step": 195500
},
{
"epoch": 63.219133807369104,
"grad_norm": 1.4087430238723755,
"learning_rate": 0.001,
"loss": 0.6155,
"step": 195600
},
{
"epoch": 63.25145442792502,
"grad_norm": 1.2912625074386597,
"learning_rate": 0.001,
"loss": 0.6161,
"step": 195700
},
{
"epoch": 63.28377504848093,
"grad_norm": 1.2002849578857422,
"learning_rate": 0.001,
"loss": 0.611,
"step": 195800
},
{
"epoch": 63.31609566903685,
"grad_norm": 1.1420046091079712,
"learning_rate": 0.001,
"loss": 0.6373,
"step": 195900
},
{
"epoch": 63.34841628959276,
"grad_norm": 1.29826819896698,
"learning_rate": 0.001,
"loss": 0.6353,
"step": 196000
},
{
"epoch": 63.38073691014868,
"grad_norm": 1.15346360206604,
"learning_rate": 0.001,
"loss": 0.6186,
"step": 196100
},
{
"epoch": 63.41305753070459,
"grad_norm": 1.4631860256195068,
"learning_rate": 0.001,
"loss": 0.633,
"step": 196200
},
{
"epoch": 63.445378151260506,
"grad_norm": 1.5273650884628296,
"learning_rate": 0.001,
"loss": 0.6317,
"step": 196300
},
{
"epoch": 63.47769877181642,
"grad_norm": 1.2980990409851074,
"learning_rate": 0.001,
"loss": 0.6256,
"step": 196400
},
{
"epoch": 63.510019392372335,
"grad_norm": 4.956090927124023,
"learning_rate": 0.001,
"loss": 0.6367,
"step": 196500
},
{
"epoch": 63.54234001292825,
"grad_norm": 1.180713176727295,
"learning_rate": 0.001,
"loss": 0.6315,
"step": 196600
},
{
"epoch": 63.574660633484164,
"grad_norm": 1.9830008745193481,
"learning_rate": 0.001,
"loss": 0.6323,
"step": 196700
},
{
"epoch": 63.60698125404008,
"grad_norm": 1.653559923171997,
"learning_rate": 0.001,
"loss": 0.6339,
"step": 196800
},
{
"epoch": 63.63930187459599,
"grad_norm": 1.3870145082473755,
"learning_rate": 0.001,
"loss": 0.6497,
"step": 196900
},
{
"epoch": 63.67162249515191,
"grad_norm": 1.1964478492736816,
"learning_rate": 0.001,
"loss": 0.6457,
"step": 197000
},
{
"epoch": 63.70394311570782,
"grad_norm": 1.1363627910614014,
"learning_rate": 0.001,
"loss": 0.6566,
"step": 197100
},
{
"epoch": 63.73626373626374,
"grad_norm": 1.4076826572418213,
"learning_rate": 0.001,
"loss": 0.6512,
"step": 197200
},
{
"epoch": 63.76858435681965,
"grad_norm": 1.313567876815796,
"learning_rate": 0.001,
"loss": 0.6445,
"step": 197300
},
{
"epoch": 63.800904977375566,
"grad_norm": 1.2769737243652344,
"learning_rate": 0.001,
"loss": 0.651,
"step": 197400
},
{
"epoch": 63.83322559793148,
"grad_norm": 1.3601458072662354,
"learning_rate": 0.001,
"loss": 0.6534,
"step": 197500
},
{
"epoch": 63.865546218487395,
"grad_norm": 1.2652206420898438,
"learning_rate": 0.001,
"loss": 0.6746,
"step": 197600
},
{
"epoch": 63.89786683904331,
"grad_norm": 1.2474119663238525,
"learning_rate": 0.001,
"loss": 0.6498,
"step": 197700
},
{
"epoch": 63.930187459599225,
"grad_norm": 1.1776601076126099,
"learning_rate": 0.001,
"loss": 0.6568,
"step": 197800
},
{
"epoch": 63.96250808015514,
"grad_norm": 1.3926881551742554,
"learning_rate": 0.001,
"loss": 0.6834,
"step": 197900
},
{
"epoch": 63.994828700711054,
"grad_norm": 1.4210342168807983,
"learning_rate": 0.001,
"loss": 0.6644,
"step": 198000
},
{
"epoch": 64.02714932126698,
"grad_norm": 1.1131622791290283,
"learning_rate": 0.001,
"loss": 0.5911,
"step": 198100
},
{
"epoch": 64.05946994182288,
"grad_norm": 1.1499837636947632,
"learning_rate": 0.001,
"loss": 0.5899,
"step": 198200
},
{
"epoch": 64.0917905623788,
"grad_norm": 1.542824625968933,
"learning_rate": 0.001,
"loss": 0.5949,
"step": 198300
},
{
"epoch": 64.12411118293471,
"grad_norm": 0.9843227863311768,
"learning_rate": 0.001,
"loss": 0.6054,
"step": 198400
},
{
"epoch": 64.15643180349063,
"grad_norm": 1.3814574480056763,
"learning_rate": 0.001,
"loss": 0.6014,
"step": 198500
},
{
"epoch": 64.18875242404654,
"grad_norm": 1.4337519407272339,
"learning_rate": 0.001,
"loss": 0.6009,
"step": 198600
},
{
"epoch": 64.22107304460246,
"grad_norm": 1.3197928667068481,
"learning_rate": 0.001,
"loss": 0.6177,
"step": 198700
},
{
"epoch": 64.25339366515837,
"grad_norm": 1.289289116859436,
"learning_rate": 0.001,
"loss": 0.6107,
"step": 198800
},
{
"epoch": 64.28571428571429,
"grad_norm": 1.8015815019607544,
"learning_rate": 0.001,
"loss": 0.6158,
"step": 198900
},
{
"epoch": 64.3180349062702,
"grad_norm": 1.9143645763397217,
"learning_rate": 0.001,
"loss": 0.6172,
"step": 199000
},
{
"epoch": 64.35035552682612,
"grad_norm": 1.310492753982544,
"learning_rate": 0.001,
"loss": 0.6177,
"step": 199100
},
{
"epoch": 64.38267614738203,
"grad_norm": 1.1836750507354736,
"learning_rate": 0.001,
"loss": 0.6297,
"step": 199200
},
{
"epoch": 64.41499676793795,
"grad_norm": 1.273499846458435,
"learning_rate": 0.001,
"loss": 0.6168,
"step": 199300
},
{
"epoch": 64.44731738849386,
"grad_norm": 1.3184856176376343,
"learning_rate": 0.001,
"loss": 0.6205,
"step": 199400
},
{
"epoch": 64.47963800904978,
"grad_norm": 1.370647668838501,
"learning_rate": 0.001,
"loss": 0.6336,
"step": 199500
},
{
"epoch": 64.51195862960569,
"grad_norm": 1.4278833866119385,
"learning_rate": 0.001,
"loss": 0.6315,
"step": 199600
},
{
"epoch": 64.54427925016161,
"grad_norm": 1.2103545665740967,
"learning_rate": 0.001,
"loss": 0.6377,
"step": 199700
},
{
"epoch": 64.57659987071752,
"grad_norm": 1.6035828590393066,
"learning_rate": 0.001,
"loss": 0.6336,
"step": 199800
},
{
"epoch": 64.60892049127344,
"grad_norm": 1.029595971107483,
"learning_rate": 0.001,
"loss": 0.6382,
"step": 199900
},
{
"epoch": 64.64124111182934,
"grad_norm": 1.3785440921783447,
"learning_rate": 0.001,
"loss": 0.6392,
"step": 200000
},
{
"epoch": 64.67356173238527,
"grad_norm": 1.218138337135315,
"learning_rate": 0.001,
"loss": 0.6394,
"step": 200100
},
{
"epoch": 64.70588235294117,
"grad_norm": 1.326785922050476,
"learning_rate": 0.001,
"loss": 0.6552,
"step": 200200
},
{
"epoch": 64.7382029734971,
"grad_norm": 1.4117070436477661,
"learning_rate": 0.001,
"loss": 0.6476,
"step": 200300
},
{
"epoch": 64.770523594053,
"grad_norm": 1.4691327810287476,
"learning_rate": 0.001,
"loss": 0.6415,
"step": 200400
},
{
"epoch": 64.80284421460892,
"grad_norm": 1.4258376359939575,
"learning_rate": 0.001,
"loss": 0.6622,
"step": 200500
},
{
"epoch": 64.83516483516483,
"grad_norm": 1.3137234449386597,
"learning_rate": 0.001,
"loss": 0.6443,
"step": 200600
},
{
"epoch": 64.86748545572075,
"grad_norm": 1.0861976146697998,
"learning_rate": 0.001,
"loss": 0.6563,
"step": 200700
},
{
"epoch": 64.89980607627666,
"grad_norm": 1.148963212966919,
"learning_rate": 0.001,
"loss": 0.6483,
"step": 200800
},
{
"epoch": 64.93212669683258,
"grad_norm": 1.3883782625198364,
"learning_rate": 0.001,
"loss": 0.648,
"step": 200900
},
{
"epoch": 64.96444731738849,
"grad_norm": 1.6844929456710815,
"learning_rate": 0.001,
"loss": 0.6661,
"step": 201000
},
{
"epoch": 64.99676793794441,
"grad_norm": 1.6211166381835938,
"learning_rate": 0.001,
"loss": 0.6372,
"step": 201100
},
{
"epoch": 65.02908855850032,
"grad_norm": 1.3715167045593262,
"learning_rate": 0.001,
"loss": 0.5833,
"step": 201200
},
{
"epoch": 65.06140917905624,
"grad_norm": 1.0815142393112183,
"learning_rate": 0.001,
"loss": 0.5804,
"step": 201300
},
{
"epoch": 65.09372979961215,
"grad_norm": 1.040615439414978,
"learning_rate": 0.001,
"loss": 0.5859,
"step": 201400
},
{
"epoch": 65.12605042016807,
"grad_norm": 1.36244797706604,
"learning_rate": 0.001,
"loss": 0.5935,
"step": 201500
},
{
"epoch": 65.15837104072398,
"grad_norm": 1.1301552057266235,
"learning_rate": 0.001,
"loss": 0.5946,
"step": 201600
},
{
"epoch": 65.1906916612799,
"grad_norm": 1.00641930103302,
"learning_rate": 0.001,
"loss": 0.5962,
"step": 201700
},
{
"epoch": 65.2230122818358,
"grad_norm": 1.5438146591186523,
"learning_rate": 0.001,
"loss": 0.6009,
"step": 201800
},
{
"epoch": 65.25533290239173,
"grad_norm": 1.7968852519989014,
"learning_rate": 0.001,
"loss": 0.61,
"step": 201900
},
{
"epoch": 65.28765352294764,
"grad_norm": 1.2997229099273682,
"learning_rate": 0.001,
"loss": 0.6055,
"step": 202000
},
{
"epoch": 65.31997414350356,
"grad_norm": 1.2967760562896729,
"learning_rate": 0.001,
"loss": 0.5919,
"step": 202100
},
{
"epoch": 65.35229476405947,
"grad_norm": 1.2664779424667358,
"learning_rate": 0.001,
"loss": 0.6116,
"step": 202200
},
{
"epoch": 65.38461538461539,
"grad_norm": 1.5638916492462158,
"learning_rate": 0.001,
"loss": 0.6073,
"step": 202300
},
{
"epoch": 65.4169360051713,
"grad_norm": 1.400978922843933,
"learning_rate": 0.001,
"loss": 0.6127,
"step": 202400
},
{
"epoch": 65.44925662572722,
"grad_norm": 1.990863561630249,
"learning_rate": 0.001,
"loss": 0.6285,
"step": 202500
},
{
"epoch": 65.48157724628312,
"grad_norm": 1.062779188156128,
"learning_rate": 0.001,
"loss": 0.6176,
"step": 202600
},
{
"epoch": 65.51389786683905,
"grad_norm": 1.3369722366333008,
"learning_rate": 0.001,
"loss": 0.6216,
"step": 202700
},
{
"epoch": 65.54621848739495,
"grad_norm": 1.756347417831421,
"learning_rate": 0.001,
"loss": 0.6291,
"step": 202800
},
{
"epoch": 65.57853910795087,
"grad_norm": 1.065873384475708,
"learning_rate": 0.001,
"loss": 0.6181,
"step": 202900
},
{
"epoch": 65.61085972850678,
"grad_norm": 1.2089930772781372,
"learning_rate": 0.001,
"loss": 0.6189,
"step": 203000
},
{
"epoch": 65.6431803490627,
"grad_norm": 1.3150242567062378,
"learning_rate": 0.001,
"loss": 0.6453,
"step": 203100
},
{
"epoch": 65.67550096961861,
"grad_norm": 1.3111896514892578,
"learning_rate": 0.001,
"loss": 0.6317,
"step": 203200
},
{
"epoch": 65.70782159017453,
"grad_norm": 1.1152726411819458,
"learning_rate": 0.001,
"loss": 0.6405,
"step": 203300
},
{
"epoch": 65.74014221073044,
"grad_norm": 1.3868318796157837,
"learning_rate": 0.001,
"loss": 0.6381,
"step": 203400
},
{
"epoch": 65.77246283128636,
"grad_norm": 1.0794872045516968,
"learning_rate": 0.001,
"loss": 0.6367,
"step": 203500
},
{
"epoch": 65.80478345184227,
"grad_norm": 0.9957221746444702,
"learning_rate": 0.001,
"loss": 0.6376,
"step": 203600
},
{
"epoch": 65.83710407239819,
"grad_norm": 1.7803281545639038,
"learning_rate": 0.001,
"loss": 0.6441,
"step": 203700
},
{
"epoch": 65.8694246929541,
"grad_norm": 1.3243619203567505,
"learning_rate": 0.001,
"loss": 0.6486,
"step": 203800
},
{
"epoch": 65.90174531351002,
"grad_norm": 1.8025758266448975,
"learning_rate": 0.001,
"loss": 0.6266,
"step": 203900
},
{
"epoch": 65.93406593406593,
"grad_norm": 1.5865435600280762,
"learning_rate": 0.001,
"loss": 0.6534,
"step": 204000
},
{
"epoch": 65.96638655462185,
"grad_norm": 1.2580561637878418,
"learning_rate": 0.001,
"loss": 0.6571,
"step": 204100
},
{
"epoch": 65.99870717517777,
"grad_norm": 1.0204856395721436,
"learning_rate": 0.001,
"loss": 0.6378,
"step": 204200
},
{
"epoch": 66.03102779573368,
"grad_norm": 0.7897757887840271,
"learning_rate": 0.001,
"loss": 0.5688,
"step": 204300
},
{
"epoch": 66.0633484162896,
"grad_norm": 0.9957269430160522,
"learning_rate": 0.001,
"loss": 0.5762,
"step": 204400
},
{
"epoch": 66.0956690368455,
"grad_norm": 0.931441605091095,
"learning_rate": 0.001,
"loss": 0.5709,
"step": 204500
},
{
"epoch": 66.12798965740143,
"grad_norm": 1.0603188276290894,
"learning_rate": 0.001,
"loss": 0.6035,
"step": 204600
},
{
"epoch": 66.16031027795734,
"grad_norm": 0.9763708114624023,
"learning_rate": 0.001,
"loss": 0.5772,
"step": 204700
},
{
"epoch": 66.19263089851326,
"grad_norm": 0.8747771382331848,
"learning_rate": 0.001,
"loss": 0.5889,
"step": 204800
},
{
"epoch": 66.22495151906917,
"grad_norm": 0.968600869178772,
"learning_rate": 0.001,
"loss": 0.6063,
"step": 204900
},
{
"epoch": 66.25727213962509,
"grad_norm": 0.8659926652908325,
"learning_rate": 0.001,
"loss": 0.5979,
"step": 205000
},
{
"epoch": 66.289592760181,
"grad_norm": 0.9334861040115356,
"learning_rate": 0.001,
"loss": 0.5813,
"step": 205100
},
{
"epoch": 66.32191338073692,
"grad_norm": 1.1221340894699097,
"learning_rate": 0.001,
"loss": 0.6035,
"step": 205200
},
{
"epoch": 66.35423400129282,
"grad_norm": 1.2959271669387817,
"learning_rate": 0.001,
"loss": 0.5931,
"step": 205300
},
{
"epoch": 66.38655462184875,
"grad_norm": 0.9807857275009155,
"learning_rate": 0.001,
"loss": 0.6045,
"step": 205400
},
{
"epoch": 66.41887524240465,
"grad_norm": 1.0263525247573853,
"learning_rate": 0.001,
"loss": 0.604,
"step": 205500
},
{
"epoch": 66.45119586296057,
"grad_norm": 0.9026124477386475,
"learning_rate": 0.001,
"loss": 0.6266,
"step": 205600
},
{
"epoch": 66.48351648351648,
"grad_norm": 1.1074974536895752,
"learning_rate": 0.001,
"loss": 0.6292,
"step": 205700
},
{
"epoch": 66.5158371040724,
"grad_norm": 0.889342725276947,
"learning_rate": 0.001,
"loss": 0.6134,
"step": 205800
},
{
"epoch": 66.54815772462831,
"grad_norm": 1.0654977560043335,
"learning_rate": 0.001,
"loss": 0.6055,
"step": 205900
},
{
"epoch": 66.58047834518423,
"grad_norm": 1.0482105016708374,
"learning_rate": 0.001,
"loss": 0.6176,
"step": 206000
},
{
"epoch": 66.61279896574014,
"grad_norm": 1.1681993007659912,
"learning_rate": 0.001,
"loss": 0.6305,
"step": 206100
},
{
"epoch": 66.64511958629606,
"grad_norm": 1.0052601099014282,
"learning_rate": 0.001,
"loss": 0.6273,
"step": 206200
},
{
"epoch": 66.67744020685197,
"grad_norm": 0.967351496219635,
"learning_rate": 0.001,
"loss": 0.6226,
"step": 206300
},
{
"epoch": 66.70976082740789,
"grad_norm": 0.9480918049812317,
"learning_rate": 0.001,
"loss": 0.6232,
"step": 206400
},
{
"epoch": 66.7420814479638,
"grad_norm": 1.2429511547088623,
"learning_rate": 0.001,
"loss": 0.6479,
"step": 206500
},
{
"epoch": 66.77440206851972,
"grad_norm": 0.8635256290435791,
"learning_rate": 0.001,
"loss": 0.6254,
"step": 206600
},
{
"epoch": 66.80672268907563,
"grad_norm": 1.0228506326675415,
"learning_rate": 0.001,
"loss": 0.6335,
"step": 206700
},
{
"epoch": 66.83904330963155,
"grad_norm": 1.1268036365509033,
"learning_rate": 0.001,
"loss": 0.6306,
"step": 206800
},
{
"epoch": 66.87136393018746,
"grad_norm": 0.8553975224494934,
"learning_rate": 0.001,
"loss": 0.6328,
"step": 206900
},
{
"epoch": 66.90368455074338,
"grad_norm": 1.1963768005371094,
"learning_rate": 0.001,
"loss": 0.6224,
"step": 207000
},
{
"epoch": 66.93600517129929,
"grad_norm": 1.0734686851501465,
"learning_rate": 0.001,
"loss": 0.6444,
"step": 207100
},
{
"epoch": 66.96832579185521,
"grad_norm": 1.1173734664916992,
"learning_rate": 0.001,
"loss": 0.6419,
"step": 207200
},
{
"epoch": 67.00064641241111,
"grad_norm": 2.1012940406799316,
"learning_rate": 0.001,
"loss": 0.6206,
"step": 207300
},
{
"epoch": 67.03296703296704,
"grad_norm": 1.714339017868042,
"learning_rate": 0.001,
"loss": 0.5702,
"step": 207400
},
{
"epoch": 67.06528765352294,
"grad_norm": 1.949273705482483,
"learning_rate": 0.001,
"loss": 0.5714,
"step": 207500
},
{
"epoch": 67.09760827407887,
"grad_norm": 1.5728001594543457,
"learning_rate": 0.001,
"loss": 0.5665,
"step": 207600
},
{
"epoch": 67.12992889463477,
"grad_norm": 1.9394041299819946,
"learning_rate": 0.001,
"loss": 0.5756,
"step": 207700
},
{
"epoch": 67.1622495151907,
"grad_norm": 2.8248255252838135,
"learning_rate": 0.001,
"loss": 0.5865,
"step": 207800
},
{
"epoch": 67.1945701357466,
"grad_norm": 2.1231889724731445,
"learning_rate": 0.001,
"loss": 0.5867,
"step": 207900
},
{
"epoch": 67.22689075630252,
"grad_norm": 1.68788480758667,
"learning_rate": 0.001,
"loss": 0.5956,
"step": 208000
},
{
"epoch": 67.25921137685843,
"grad_norm": 2.0351736545562744,
"learning_rate": 0.001,
"loss": 0.5791,
"step": 208100
},
{
"epoch": 67.29153199741435,
"grad_norm": 39.95174789428711,
"learning_rate": 0.001,
"loss": 0.5842,
"step": 208200
},
{
"epoch": 67.32385261797026,
"grad_norm": 1.5130423307418823,
"learning_rate": 0.001,
"loss": 0.6005,
"step": 208300
},
{
"epoch": 67.35617323852618,
"grad_norm": 1.4413126707077026,
"learning_rate": 0.001,
"loss": 0.6138,
"step": 208400
},
{
"epoch": 67.38849385908209,
"grad_norm": 1.9939687252044678,
"learning_rate": 0.001,
"loss": 0.5946,
"step": 208500
},
{
"epoch": 67.42081447963801,
"grad_norm": 2.0666182041168213,
"learning_rate": 0.001,
"loss": 0.5939,
"step": 208600
},
{
"epoch": 67.45313510019392,
"grad_norm": 1.77826726436615,
"learning_rate": 0.001,
"loss": 0.6037,
"step": 208700
},
{
"epoch": 67.48545572074984,
"grad_norm": 1.637294888496399,
"learning_rate": 0.001,
"loss": 0.5942,
"step": 208800
},
{
"epoch": 67.51777634130575,
"grad_norm": 1.2931591272354126,
"learning_rate": 0.001,
"loss": 0.6037,
"step": 208900
},
{
"epoch": 67.55009696186167,
"grad_norm": 1.5712636709213257,
"learning_rate": 0.001,
"loss": 0.604,
"step": 209000
},
{
"epoch": 67.58241758241758,
"grad_norm": 2.2092108726501465,
"learning_rate": 0.001,
"loss": 0.6223,
"step": 209100
},
{
"epoch": 67.6147382029735,
"grad_norm": 1.896464228630066,
"learning_rate": 0.001,
"loss": 0.6131,
"step": 209200
},
{
"epoch": 67.6470588235294,
"grad_norm": 1.501253366470337,
"learning_rate": 0.001,
"loss": 0.6169,
"step": 209300
},
{
"epoch": 67.67937944408533,
"grad_norm": 2.152764081954956,
"learning_rate": 0.001,
"loss": 0.6189,
"step": 209400
},
{
"epoch": 67.71170006464124,
"grad_norm": 1.7892820835113525,
"learning_rate": 0.001,
"loss": 0.626,
"step": 209500
},
{
"epoch": 67.74402068519716,
"grad_norm": 1.704662799835205,
"learning_rate": 0.001,
"loss": 0.6233,
"step": 209600
},
{
"epoch": 67.77634130575306,
"grad_norm": 1.6158897876739502,
"learning_rate": 0.001,
"loss": 0.6373,
"step": 209700
},
{
"epoch": 67.80866192630899,
"grad_norm": 2.0007736682891846,
"learning_rate": 0.001,
"loss": 0.6404,
"step": 209800
},
{
"epoch": 67.8409825468649,
"grad_norm": 2.2105071544647217,
"learning_rate": 0.001,
"loss": 0.6201,
"step": 209900
},
{
"epoch": 67.87330316742081,
"grad_norm": 2.1892433166503906,
"learning_rate": 0.001,
"loss": 0.6174,
"step": 210000
},
{
"epoch": 67.90562378797672,
"grad_norm": 2.033268451690674,
"learning_rate": 0.001,
"loss": 0.6263,
"step": 210100
},
{
"epoch": 67.93794440853264,
"grad_norm": 1.7615630626678467,
"learning_rate": 0.001,
"loss": 0.6335,
"step": 210200
},
{
"epoch": 67.97026502908855,
"grad_norm": 2.064373254776001,
"learning_rate": 0.001,
"loss": 0.6224,
"step": 210300
},
{
"epoch": 68.00258564964447,
"grad_norm": 1.2215957641601562,
"learning_rate": 0.001,
"loss": 0.6624,
"step": 210400
},
{
"epoch": 68.0349062702004,
"grad_norm": 1.3218648433685303,
"learning_rate": 0.001,
"loss": 0.5683,
"step": 210500
},
{
"epoch": 68.0672268907563,
"grad_norm": 1.6484256982803345,
"learning_rate": 0.001,
"loss": 0.5646,
"step": 210600
},
{
"epoch": 68.09954751131222,
"grad_norm": 2.0356903076171875,
"learning_rate": 0.001,
"loss": 0.5728,
"step": 210700
},
{
"epoch": 68.13186813186813,
"grad_norm": 1.6391444206237793,
"learning_rate": 0.001,
"loss": 0.5682,
"step": 210800
},
{
"epoch": 68.16418875242405,
"grad_norm": 1.3358436822891235,
"learning_rate": 0.001,
"loss": 0.572,
"step": 210900
},
{
"epoch": 68.19650937297996,
"grad_norm": 1.5110633373260498,
"learning_rate": 0.001,
"loss": 0.5841,
"step": 211000
},
{
"epoch": 68.22882999353588,
"grad_norm": 1.7556782960891724,
"learning_rate": 0.001,
"loss": 0.579,
"step": 211100
},
{
"epoch": 68.26115061409179,
"grad_norm": 1.377893090248108,
"learning_rate": 0.001,
"loss": 0.5891,
"step": 211200
},
{
"epoch": 68.29347123464771,
"grad_norm": 1.45765221118927,
"learning_rate": 0.001,
"loss": 0.5824,
"step": 211300
},
{
"epoch": 68.32579185520362,
"grad_norm": 1.6839174032211304,
"learning_rate": 0.001,
"loss": 0.5854,
"step": 211400
},
{
"epoch": 68.35811247575954,
"grad_norm": 1.3309268951416016,
"learning_rate": 0.001,
"loss": 0.585,
"step": 211500
},
{
"epoch": 68.39043309631545,
"grad_norm": 1.6310690641403198,
"learning_rate": 0.001,
"loss": 0.592,
"step": 211600
},
{
"epoch": 68.42275371687137,
"grad_norm": 1.7203037738800049,
"learning_rate": 0.001,
"loss": 0.5917,
"step": 211700
},
{
"epoch": 68.45507433742728,
"grad_norm": 1.5274381637573242,
"learning_rate": 0.001,
"loss": 0.6048,
"step": 211800
},
{
"epoch": 68.4873949579832,
"grad_norm": 1.3097580671310425,
"learning_rate": 0.001,
"loss": 0.5978,
"step": 211900
},
{
"epoch": 68.5197155785391,
"grad_norm": 1.3245912790298462,
"learning_rate": 0.001,
"loss": 0.6101,
"step": 212000
},
{
"epoch": 68.55203619909503,
"grad_norm": 1.7545794248580933,
"learning_rate": 0.001,
"loss": 0.6098,
"step": 212100
},
{
"epoch": 68.58435681965094,
"grad_norm": 1.5376828908920288,
"learning_rate": 0.001,
"loss": 0.6023,
"step": 212200
},
{
"epoch": 68.61667744020686,
"grad_norm": 1.4824035167694092,
"learning_rate": 0.001,
"loss": 0.6075,
"step": 212300
},
{
"epoch": 68.64899806076276,
"grad_norm": 1.7261457443237305,
"learning_rate": 0.001,
"loss": 0.62,
"step": 212400
},
{
"epoch": 68.68131868131869,
"grad_norm": 1.4751150608062744,
"learning_rate": 0.001,
"loss": 0.595,
"step": 212500
},
{
"epoch": 68.7136393018746,
"grad_norm": 1.6435458660125732,
"learning_rate": 0.001,
"loss": 0.6185,
"step": 212600
},
{
"epoch": 68.74595992243052,
"grad_norm": 1.823643445968628,
"learning_rate": 0.001,
"loss": 0.6149,
"step": 212700
},
{
"epoch": 68.77828054298642,
"grad_norm": 1.5763131380081177,
"learning_rate": 0.001,
"loss": 0.6125,
"step": 212800
},
{
"epoch": 68.81060116354234,
"grad_norm": 2.119630813598633,
"learning_rate": 0.001,
"loss": 0.6197,
"step": 212900
},
{
"epoch": 68.84292178409825,
"grad_norm": 1.5855299234390259,
"learning_rate": 0.001,
"loss": 0.6174,
"step": 213000
},
{
"epoch": 68.87524240465417,
"grad_norm": 1.9166241884231567,
"learning_rate": 0.001,
"loss": 0.6214,
"step": 213100
},
{
"epoch": 68.90756302521008,
"grad_norm": 1.4405461549758911,
"learning_rate": 0.001,
"loss": 0.6125,
"step": 213200
},
{
"epoch": 68.939883645766,
"grad_norm": 1.537550449371338,
"learning_rate": 0.001,
"loss": 0.6363,
"step": 213300
},
{
"epoch": 68.97220426632191,
"grad_norm": 1.497914433479309,
"learning_rate": 0.001,
"loss": 0.6071,
"step": 213400
},
{
"epoch": 69.00452488687783,
"grad_norm": 1.598833441734314,
"learning_rate": 0.001,
"loss": 0.6439,
"step": 213500
},
{
"epoch": 69.03684550743374,
"grad_norm": 1.656296968460083,
"learning_rate": 0.001,
"loss": 0.5466,
"step": 213600
},
{
"epoch": 69.06916612798966,
"grad_norm": 1.80128812789917,
"learning_rate": 0.001,
"loss": 0.5598,
"step": 213700
},
{
"epoch": 69.10148674854557,
"grad_norm": 1.3732154369354248,
"learning_rate": 0.001,
"loss": 0.5704,
"step": 213800
},
{
"epoch": 69.13380736910149,
"grad_norm": 1.4523626565933228,
"learning_rate": 0.001,
"loss": 0.5613,
"step": 213900
},
{
"epoch": 69.1661279896574,
"grad_norm": 1.4673432111740112,
"learning_rate": 0.001,
"loss": 0.592,
"step": 214000
},
{
"epoch": 69.19844861021332,
"grad_norm": 1.9327133893966675,
"learning_rate": 0.001,
"loss": 0.5726,
"step": 214100
},
{
"epoch": 69.23076923076923,
"grad_norm": 1.9804271459579468,
"learning_rate": 0.001,
"loss": 0.563,
"step": 214200
},
{
"epoch": 69.26308985132515,
"grad_norm": 1.4549953937530518,
"learning_rate": 0.001,
"loss": 0.5724,
"step": 214300
},
{
"epoch": 69.29541047188106,
"grad_norm": 1.4232009649276733,
"learning_rate": 0.001,
"loss": 0.5779,
"step": 214400
},
{
"epoch": 69.32773109243698,
"grad_norm": 1.3493443727493286,
"learning_rate": 0.001,
"loss": 0.5708,
"step": 214500
},
{
"epoch": 69.36005171299288,
"grad_norm": 1.3888554573059082,
"learning_rate": 0.001,
"loss": 0.5761,
"step": 214600
},
{
"epoch": 69.3923723335488,
"grad_norm": 1.9216724634170532,
"learning_rate": 0.001,
"loss": 0.5856,
"step": 214700
},
{
"epoch": 69.42469295410471,
"grad_norm": 1.405328631401062,
"learning_rate": 0.001,
"loss": 0.5776,
"step": 214800
},
{
"epoch": 69.45701357466064,
"grad_norm": 6.138822078704834,
"learning_rate": 0.001,
"loss": 0.5921,
"step": 214900
},
{
"epoch": 69.48933419521654,
"grad_norm": 1.3978585004806519,
"learning_rate": 0.001,
"loss": 0.6057,
"step": 215000
},
{
"epoch": 69.52165481577246,
"grad_norm": 1.506648302078247,
"learning_rate": 0.001,
"loss": 0.6013,
"step": 215100
},
{
"epoch": 69.55397543632837,
"grad_norm": 1.3363564014434814,
"learning_rate": 0.001,
"loss": 0.6018,
"step": 215200
},
{
"epoch": 69.5862960568843,
"grad_norm": 1.5143280029296875,
"learning_rate": 0.001,
"loss": 0.6043,
"step": 215300
},
{
"epoch": 69.6186166774402,
"grad_norm": 1.5944569110870361,
"learning_rate": 0.001,
"loss": 0.6056,
"step": 215400
},
{
"epoch": 69.65093729799612,
"grad_norm": 1.5265306234359741,
"learning_rate": 0.001,
"loss": 0.6095,
"step": 215500
},
{
"epoch": 69.68325791855203,
"grad_norm": 1.4436867237091064,
"learning_rate": 0.001,
"loss": 0.6039,
"step": 215600
},
{
"epoch": 69.71557853910795,
"grad_norm": 1.7266173362731934,
"learning_rate": 0.001,
"loss": 0.6178,
"step": 215700
},
{
"epoch": 69.74789915966386,
"grad_norm": 1.4980125427246094,
"learning_rate": 0.001,
"loss": 0.6012,
"step": 215800
},
{
"epoch": 69.78021978021978,
"grad_norm": 1.6730942726135254,
"learning_rate": 0.001,
"loss": 0.5914,
"step": 215900
},
{
"epoch": 69.81254040077569,
"grad_norm": 1.3241289854049683,
"learning_rate": 0.001,
"loss": 0.6106,
"step": 216000
},
{
"epoch": 69.84486102133161,
"grad_norm": 1.7309767007827759,
"learning_rate": 0.001,
"loss": 0.6057,
"step": 216100
},
{
"epoch": 69.87718164188752,
"grad_norm": 1.775754451751709,
"learning_rate": 0.001,
"loss": 0.6051,
"step": 216200
},
{
"epoch": 69.90950226244344,
"grad_norm": 1.5161246061325073,
"learning_rate": 0.001,
"loss": 0.6182,
"step": 216300
},
{
"epoch": 69.94182288299935,
"grad_norm": 1.2103925943374634,
"learning_rate": 0.001,
"loss": 0.6103,
"step": 216400
},
{
"epoch": 69.97414350355527,
"grad_norm": 1.3319581747055054,
"learning_rate": 0.001,
"loss": 0.6308,
"step": 216500
},
{
"epoch": 70.00646412411119,
"grad_norm": 1.5204486846923828,
"learning_rate": 0.001,
"loss": 0.6271,
"step": 216600
},
{
"epoch": 70.0387847446671,
"grad_norm": 1.5178135633468628,
"learning_rate": 0.001,
"loss": 0.56,
"step": 216700
},
{
"epoch": 70.07110536522302,
"grad_norm": 2.1006743907928467,
"learning_rate": 0.001,
"loss": 0.5537,
"step": 216800
},
{
"epoch": 70.10342598577893,
"grad_norm": 1.3505820035934448,
"learning_rate": 0.001,
"loss": 0.552,
"step": 216900
},
{
"epoch": 70.13574660633485,
"grad_norm": 1.6325558423995972,
"learning_rate": 0.001,
"loss": 0.5586,
"step": 217000
},
{
"epoch": 70.16806722689076,
"grad_norm": 1.2984336614608765,
"learning_rate": 0.001,
"loss": 0.575,
"step": 217100
},
{
"epoch": 70.20038784744668,
"grad_norm": 1.3796709775924683,
"learning_rate": 0.001,
"loss": 0.5591,
"step": 217200
},
{
"epoch": 70.23270846800258,
"grad_norm": 1.330457091331482,
"learning_rate": 0.001,
"loss": 0.5662,
"step": 217300
},
{
"epoch": 70.2650290885585,
"grad_norm": 1.3719303607940674,
"learning_rate": 0.001,
"loss": 0.5662,
"step": 217400
},
{
"epoch": 70.29734970911441,
"grad_norm": 1.4298548698425293,
"learning_rate": 0.001,
"loss": 0.5764,
"step": 217500
},
{
"epoch": 70.32967032967034,
"grad_norm": 1.5424270629882812,
"learning_rate": 0.001,
"loss": 0.5796,
"step": 217600
},
{
"epoch": 70.36199095022624,
"grad_norm": 2.652244806289673,
"learning_rate": 0.001,
"loss": 0.5843,
"step": 217700
},
{
"epoch": 70.39431157078216,
"grad_norm": 1.5292531251907349,
"learning_rate": 0.001,
"loss": 0.5734,
"step": 217800
},
{
"epoch": 70.42663219133807,
"grad_norm": 1.3583437204360962,
"learning_rate": 0.001,
"loss": 0.5722,
"step": 217900
},
{
"epoch": 70.458952811894,
"grad_norm": 1.53679358959198,
"learning_rate": 0.001,
"loss": 0.5855,
"step": 218000
},
{
"epoch": 70.4912734324499,
"grad_norm": 1.4344574213027954,
"learning_rate": 0.001,
"loss": 0.5878,
"step": 218100
},
{
"epoch": 70.52359405300582,
"grad_norm": 1.6726317405700684,
"learning_rate": 0.001,
"loss": 0.6018,
"step": 218200
},
{
"epoch": 70.55591467356173,
"grad_norm": 1.8626093864440918,
"learning_rate": 0.001,
"loss": 0.5872,
"step": 218300
},
{
"epoch": 70.58823529411765,
"grad_norm": 1.1871416568756104,
"learning_rate": 0.001,
"loss": 0.5868,
"step": 218400
},
{
"epoch": 70.62055591467356,
"grad_norm": 1.326026439666748,
"learning_rate": 0.001,
"loss": 0.5924,
"step": 218500
},
{
"epoch": 70.65287653522948,
"grad_norm": 1.5485585927963257,
"learning_rate": 0.001,
"loss": 0.5912,
"step": 218600
},
{
"epoch": 70.68519715578539,
"grad_norm": 1.8677207231521606,
"learning_rate": 0.001,
"loss": 0.5959,
"step": 218700
},
{
"epoch": 70.71751777634131,
"grad_norm": 1.7053265571594238,
"learning_rate": 0.001,
"loss": 0.5952,
"step": 218800
},
{
"epoch": 70.74983839689722,
"grad_norm": 1.3536996841430664,
"learning_rate": 0.001,
"loss": 0.5907,
"step": 218900
},
{
"epoch": 70.78215901745314,
"grad_norm": 2.0897066593170166,
"learning_rate": 0.001,
"loss": 0.6039,
"step": 219000
},
{
"epoch": 70.81447963800905,
"grad_norm": 1.4968990087509155,
"learning_rate": 0.001,
"loss": 0.5972,
"step": 219100
},
{
"epoch": 70.84680025856497,
"grad_norm": 1.3414818048477173,
"learning_rate": 0.001,
"loss": 0.6101,
"step": 219200
},
{
"epoch": 70.87912087912088,
"grad_norm": 1.3500386476516724,
"learning_rate": 0.001,
"loss": 0.6112,
"step": 219300
},
{
"epoch": 70.9114414996768,
"grad_norm": 1.1630859375,
"learning_rate": 0.001,
"loss": 0.6123,
"step": 219400
},
{
"epoch": 70.9437621202327,
"grad_norm": 1.5307071208953857,
"learning_rate": 0.001,
"loss": 0.6161,
"step": 219500
},
{
"epoch": 70.97608274078863,
"grad_norm": 1.4781556129455566,
"learning_rate": 0.001,
"loss": 0.6133,
"step": 219600
},
{
"epoch": 71.00840336134453,
"grad_norm": 1.4278903007507324,
"learning_rate": 0.001,
"loss": 0.5894,
"step": 219700
},
{
"epoch": 71.04072398190046,
"grad_norm": 1.442977786064148,
"learning_rate": 0.001,
"loss": 0.534,
"step": 219800
},
{
"epoch": 71.07304460245636,
"grad_norm": 1.3905531167984009,
"learning_rate": 0.001,
"loss": 0.561,
"step": 219900
},
{
"epoch": 71.10536522301229,
"grad_norm": 1.9299741983413696,
"learning_rate": 0.001,
"loss": 0.5489,
"step": 220000
},
{
"epoch": 71.13768584356819,
"grad_norm": 1.498746633529663,
"learning_rate": 0.001,
"loss": 0.5542,
"step": 220100
},
{
"epoch": 71.17000646412411,
"grad_norm": 1.8542300462722778,
"learning_rate": 0.001,
"loss": 0.5478,
"step": 220200
},
{
"epoch": 71.20232708468002,
"grad_norm": 1.0949512720108032,
"learning_rate": 0.001,
"loss": 0.5619,
"step": 220300
},
{
"epoch": 71.23464770523594,
"grad_norm": 1.170140027999878,
"learning_rate": 0.001,
"loss": 0.5533,
"step": 220400
},
{
"epoch": 71.26696832579185,
"grad_norm": 1.5471538305282593,
"learning_rate": 0.001,
"loss": 0.5618,
"step": 220500
},
{
"epoch": 71.29928894634777,
"grad_norm": 1.3798595666885376,
"learning_rate": 0.001,
"loss": 0.5587,
"step": 220600
},
{
"epoch": 71.33160956690368,
"grad_norm": 1.5981390476226807,
"learning_rate": 0.001,
"loss": 0.5715,
"step": 220700
},
{
"epoch": 71.3639301874596,
"grad_norm": 1.3824917078018188,
"learning_rate": 0.001,
"loss": 0.5809,
"step": 220800
},
{
"epoch": 71.39625080801551,
"grad_norm": 1.6975314617156982,
"learning_rate": 0.001,
"loss": 0.5744,
"step": 220900
},
{
"epoch": 71.42857142857143,
"grad_norm": 1.794413685798645,
"learning_rate": 0.001,
"loss": 0.5806,
"step": 221000
},
{
"epoch": 71.46089204912734,
"grad_norm": 1.2590088844299316,
"learning_rate": 0.001,
"loss": 0.5779,
"step": 221100
},
{
"epoch": 71.49321266968326,
"grad_norm": 1.3916282653808594,
"learning_rate": 0.001,
"loss": 0.5704,
"step": 221200
},
{
"epoch": 71.52553329023917,
"grad_norm": 1.3690452575683594,
"learning_rate": 0.001,
"loss": 0.5878,
"step": 221300
},
{
"epoch": 71.55785391079509,
"grad_norm": 1.6590235233306885,
"learning_rate": 0.001,
"loss": 0.5892,
"step": 221400
},
{
"epoch": 71.590174531351,
"grad_norm": 1.3054158687591553,
"learning_rate": 0.001,
"loss": 0.5834,
"step": 221500
},
{
"epoch": 71.62249515190692,
"grad_norm": 1.5784341096878052,
"learning_rate": 0.001,
"loss": 0.5712,
"step": 221600
},
{
"epoch": 71.65481577246283,
"grad_norm": 1.4556595087051392,
"learning_rate": 0.001,
"loss": 0.5901,
"step": 221700
},
{
"epoch": 71.68713639301875,
"grad_norm": 1.4017647504806519,
"learning_rate": 0.001,
"loss": 0.5984,
"step": 221800
},
{
"epoch": 71.71945701357465,
"grad_norm": 1.2876161336898804,
"learning_rate": 0.001,
"loss": 0.5852,
"step": 221900
},
{
"epoch": 71.75177763413058,
"grad_norm": 1.0554882287979126,
"learning_rate": 0.001,
"loss": 0.6103,
"step": 222000
},
{
"epoch": 71.78409825468648,
"grad_norm": 1.4979000091552734,
"learning_rate": 0.001,
"loss": 0.6002,
"step": 222100
},
{
"epoch": 71.8164188752424,
"grad_norm": 1.4148439168930054,
"learning_rate": 0.001,
"loss": 0.5982,
"step": 222200
},
{
"epoch": 71.84873949579831,
"grad_norm": 1.569904088973999,
"learning_rate": 0.001,
"loss": 0.6052,
"step": 222300
},
{
"epoch": 71.88106011635423,
"grad_norm": 1.2948265075683594,
"learning_rate": 0.001,
"loss": 0.6032,
"step": 222400
},
{
"epoch": 71.91338073691014,
"grad_norm": 1.949517011642456,
"learning_rate": 0.001,
"loss": 0.5991,
"step": 222500
},
{
"epoch": 71.94570135746606,
"grad_norm": 1.2780336141586304,
"learning_rate": 0.001,
"loss": 0.6034,
"step": 222600
},
{
"epoch": 71.97802197802197,
"grad_norm": 2.284240245819092,
"learning_rate": 0.001,
"loss": 0.6101,
"step": 222700
},
{
"epoch": 72.01034259857789,
"grad_norm": 1.7869925498962402,
"learning_rate": 0.001,
"loss": 0.5696,
"step": 222800
},
{
"epoch": 72.04266321913381,
"grad_norm": 1.3875548839569092,
"learning_rate": 0.001,
"loss": 0.545,
"step": 222900
},
{
"epoch": 72.07498383968972,
"grad_norm": 1.3865928649902344,
"learning_rate": 0.001,
"loss": 0.5482,
"step": 223000
},
{
"epoch": 72.10730446024564,
"grad_norm": 1.2828845977783203,
"learning_rate": 0.001,
"loss": 0.552,
"step": 223100
},
{
"epoch": 72.13962508080155,
"grad_norm": 1.516929030418396,
"learning_rate": 0.001,
"loss": 0.5515,
"step": 223200
},
{
"epoch": 72.17194570135747,
"grad_norm": 1.5746568441390991,
"learning_rate": 0.001,
"loss": 0.5452,
"step": 223300
},
{
"epoch": 72.20426632191338,
"grad_norm": 1.2963000535964966,
"learning_rate": 0.001,
"loss": 0.5619,
"step": 223400
},
{
"epoch": 72.2365869424693,
"grad_norm": 1.3902140855789185,
"learning_rate": 0.001,
"loss": 0.5516,
"step": 223500
},
{
"epoch": 72.26890756302521,
"grad_norm": 1.8248586654663086,
"learning_rate": 0.001,
"loss": 0.5513,
"step": 223600
},
{
"epoch": 72.30122818358113,
"grad_norm": 1.8046656847000122,
"learning_rate": 0.001,
"loss": 0.569,
"step": 223700
},
{
"epoch": 72.33354880413704,
"grad_norm": 1.6079381704330444,
"learning_rate": 0.001,
"loss": 0.5721,
"step": 223800
},
{
"epoch": 72.36586942469296,
"grad_norm": 1.4093081951141357,
"learning_rate": 0.001,
"loss": 0.5692,
"step": 223900
},
{
"epoch": 72.39819004524887,
"grad_norm": 1.3620543479919434,
"learning_rate": 0.001,
"loss": 0.5683,
"step": 224000
},
{
"epoch": 72.43051066580479,
"grad_norm": 1.451627254486084,
"learning_rate": 0.001,
"loss": 0.565,
"step": 224100
},
{
"epoch": 72.4628312863607,
"grad_norm": 1.3643581867218018,
"learning_rate": 0.001,
"loss": 0.5579,
"step": 224200
},
{
"epoch": 72.49515190691662,
"grad_norm": 1.289871096611023,
"learning_rate": 0.001,
"loss": 0.5703,
"step": 224300
},
{
"epoch": 72.52747252747253,
"grad_norm": 1.699236512184143,
"learning_rate": 0.001,
"loss": 0.5651,
"step": 224400
},
{
"epoch": 72.55979314802845,
"grad_norm": 1.1216737031936646,
"learning_rate": 0.001,
"loss": 0.5717,
"step": 224500
},
{
"epoch": 72.59211376858435,
"grad_norm": 1.4215786457061768,
"learning_rate": 0.001,
"loss": 0.5855,
"step": 224600
},
{
"epoch": 72.62443438914028,
"grad_norm": 1.5842723846435547,
"learning_rate": 0.001,
"loss": 0.5796,
"step": 224700
},
{
"epoch": 72.65675500969618,
"grad_norm": 1.7459555864334106,
"learning_rate": 0.001,
"loss": 0.5807,
"step": 224800
},
{
"epoch": 72.6890756302521,
"grad_norm": 1.1991825103759766,
"learning_rate": 0.001,
"loss": 0.5856,
"step": 224900
},
{
"epoch": 72.72139625080801,
"grad_norm": 1.3949995040893555,
"learning_rate": 0.001,
"loss": 0.5847,
"step": 225000
},
{
"epoch": 72.75371687136393,
"grad_norm": 1.186496376991272,
"learning_rate": 0.001,
"loss": 0.5825,
"step": 225100
},
{
"epoch": 72.78603749191984,
"grad_norm": 1.3610190153121948,
"learning_rate": 0.001,
"loss": 0.5915,
"step": 225200
},
{
"epoch": 72.81835811247576,
"grad_norm": 1.4443519115447998,
"learning_rate": 0.001,
"loss": 0.5822,
"step": 225300
},
{
"epoch": 72.85067873303167,
"grad_norm": 1.031691074371338,
"learning_rate": 0.001,
"loss": 0.5897,
"step": 225400
},
{
"epoch": 72.88299935358759,
"grad_norm": 1.4198780059814453,
"learning_rate": 0.001,
"loss": 0.6026,
"step": 225500
},
{
"epoch": 72.9153199741435,
"grad_norm": 1.3856195211410522,
"learning_rate": 0.001,
"loss": 0.58,
"step": 225600
},
{
"epoch": 72.94764059469942,
"grad_norm": 1.4383305311203003,
"learning_rate": 0.001,
"loss": 0.5963,
"step": 225700
},
{
"epoch": 72.97996121525533,
"grad_norm": 1.4304252862930298,
"learning_rate": 0.001,
"loss": 0.6073,
"step": 225800
},
{
"epoch": 73.01228183581125,
"grad_norm": 1.6289730072021484,
"learning_rate": 0.001,
"loss": 0.5603,
"step": 225900
},
{
"epoch": 73.04460245636716,
"grad_norm": 1.623422622680664,
"learning_rate": 0.001,
"loss": 0.5222,
"step": 226000
},
{
"epoch": 73.07692307692308,
"grad_norm": 1.2384452819824219,
"learning_rate": 0.001,
"loss": 0.5311,
"step": 226100
},
{
"epoch": 73.10924369747899,
"grad_norm": 1.1145490407943726,
"learning_rate": 0.001,
"loss": 0.5302,
"step": 226200
},
{
"epoch": 73.14156431803491,
"grad_norm": 1.1442946195602417,
"learning_rate": 0.001,
"loss": 0.5457,
"step": 226300
},
{
"epoch": 73.17388493859082,
"grad_norm": 1.4592894315719604,
"learning_rate": 0.001,
"loss": 0.5369,
"step": 226400
},
{
"epoch": 73.20620555914674,
"grad_norm": 0.9766262173652649,
"learning_rate": 0.001,
"loss": 0.5423,
"step": 226500
},
{
"epoch": 73.23852617970265,
"grad_norm": 1.052048683166504,
"learning_rate": 0.001,
"loss": 0.5464,
"step": 226600
},
{
"epoch": 73.27084680025857,
"grad_norm": 1.2609376907348633,
"learning_rate": 0.001,
"loss": 0.5612,
"step": 226700
},
{
"epoch": 73.30316742081448,
"grad_norm": 1.1084191799163818,
"learning_rate": 0.001,
"loss": 0.5519,
"step": 226800
},
{
"epoch": 73.3354880413704,
"grad_norm": 1.3045639991760254,
"learning_rate": 0.001,
"loss": 0.5564,
"step": 226900
},
{
"epoch": 73.3678086619263,
"grad_norm": 1.4363510608673096,
"learning_rate": 0.001,
"loss": 0.5715,
"step": 227000
},
{
"epoch": 73.40012928248223,
"grad_norm": 1.3683849573135376,
"learning_rate": 0.001,
"loss": 0.5619,
"step": 227100
},
{
"epoch": 73.43244990303813,
"grad_norm": 1.4905529022216797,
"learning_rate": 0.001,
"loss": 0.5613,
"step": 227200
},
{
"epoch": 73.46477052359405,
"grad_norm": 1.3460173606872559,
"learning_rate": 0.001,
"loss": 0.5746,
"step": 227300
},
{
"epoch": 73.49709114414996,
"grad_norm": 1.5590981245040894,
"learning_rate": 0.001,
"loss": 0.5597,
"step": 227400
},
{
"epoch": 73.52941176470588,
"grad_norm": 1.2848957777023315,
"learning_rate": 0.001,
"loss": 0.5638,
"step": 227500
},
{
"epoch": 73.56173238526179,
"grad_norm": 1.3358205556869507,
"learning_rate": 0.001,
"loss": 0.5602,
"step": 227600
},
{
"epoch": 73.59405300581771,
"grad_norm": 1.6583843231201172,
"learning_rate": 0.001,
"loss": 0.5654,
"step": 227700
},
{
"epoch": 73.62637362637362,
"grad_norm": 1.0921430587768555,
"learning_rate": 0.001,
"loss": 0.5805,
"step": 227800
},
{
"epoch": 73.65869424692954,
"grad_norm": 1.0443511009216309,
"learning_rate": 0.001,
"loss": 0.5708,
"step": 227900
},
{
"epoch": 73.69101486748545,
"grad_norm": 1.2265650033950806,
"learning_rate": 0.001,
"loss": 0.5858,
"step": 228000
},
{
"epoch": 73.72333548804137,
"grad_norm": 1.4629805088043213,
"learning_rate": 0.001,
"loss": 0.5847,
"step": 228100
},
{
"epoch": 73.75565610859728,
"grad_norm": 1.3702300786972046,
"learning_rate": 0.001,
"loss": 0.5832,
"step": 228200
},
{
"epoch": 73.7879767291532,
"grad_norm": 1.6281440258026123,
"learning_rate": 0.001,
"loss": 0.5861,
"step": 228300
},
{
"epoch": 73.82029734970911,
"grad_norm": 1.4549918174743652,
"learning_rate": 0.001,
"loss": 0.5823,
"step": 228400
},
{
"epoch": 73.85261797026503,
"grad_norm": 1.3140660524368286,
"learning_rate": 0.001,
"loss": 0.5759,
"step": 228500
},
{
"epoch": 73.88493859082094,
"grad_norm": 1.5533117055892944,
"learning_rate": 0.001,
"loss": 0.586,
"step": 228600
},
{
"epoch": 73.91725921137686,
"grad_norm": 1.5729376077651978,
"learning_rate": 0.001,
"loss": 0.5965,
"step": 228700
},
{
"epoch": 73.94957983193277,
"grad_norm": 1.4772778749465942,
"learning_rate": 0.001,
"loss": 0.5939,
"step": 228800
},
{
"epoch": 73.98190045248869,
"grad_norm": 1.5759507417678833,
"learning_rate": 0.001,
"loss": 0.5919,
"step": 228900
},
{
"epoch": 74.01422107304461,
"grad_norm": 0.9670353531837463,
"learning_rate": 0.001,
"loss": 0.5414,
"step": 229000
},
{
"epoch": 74.04654169360052,
"grad_norm": 1.0269129276275635,
"learning_rate": 0.001,
"loss": 0.5299,
"step": 229100
},
{
"epoch": 74.07886231415644,
"grad_norm": 1.191085696220398,
"learning_rate": 0.001,
"loss": 0.5286,
"step": 229200
},
{
"epoch": 74.11118293471235,
"grad_norm": 1.1801611185073853,
"learning_rate": 0.001,
"loss": 0.549,
"step": 229300
},
{
"epoch": 74.14350355526827,
"grad_norm": 0.983653724193573,
"learning_rate": 0.001,
"loss": 0.5277,
"step": 229400
},
{
"epoch": 74.17582417582418,
"grad_norm": 1.0740876197814941,
"learning_rate": 0.001,
"loss": 0.5339,
"step": 229500
},
{
"epoch": 74.2081447963801,
"grad_norm": 1.0492289066314697,
"learning_rate": 0.001,
"loss": 0.5553,
"step": 229600
},
{
"epoch": 74.240465416936,
"grad_norm": 1.4024004936218262,
"learning_rate": 0.001,
"loss": 0.5512,
"step": 229700
},
{
"epoch": 74.27278603749193,
"grad_norm": 0.8431639671325684,
"learning_rate": 0.001,
"loss": 0.538,
"step": 229800
},
{
"epoch": 74.30510665804783,
"grad_norm": 0.8813731074333191,
"learning_rate": 0.001,
"loss": 0.5517,
"step": 229900
},
{
"epoch": 74.33742727860376,
"grad_norm": 1.1411322355270386,
"learning_rate": 0.001,
"loss": 0.5547,
"step": 230000
},
{
"epoch": 74.36974789915966,
"grad_norm": 1.0830614566802979,
"learning_rate": 0.001,
"loss": 0.5547,
"step": 230100
},
{
"epoch": 74.40206851971558,
"grad_norm": 1.2333221435546875,
"learning_rate": 0.001,
"loss": 0.5565,
"step": 230200
},
{
"epoch": 74.43438914027149,
"grad_norm": 1.199706792831421,
"learning_rate": 0.001,
"loss": 0.5625,
"step": 230300
},
{
"epoch": 74.46670976082741,
"grad_norm": 1.2191143035888672,
"learning_rate": 0.001,
"loss": 0.5647,
"step": 230400
},
{
"epoch": 74.49903038138332,
"grad_norm": 0.8158851861953735,
"learning_rate": 0.001,
"loss": 0.5592,
"step": 230500
},
{
"epoch": 74.53135100193924,
"grad_norm": 0.9935479760169983,
"learning_rate": 0.001,
"loss": 0.558,
"step": 230600
},
{
"epoch": 74.56367162249515,
"grad_norm": 1.1733251810073853,
"learning_rate": 0.001,
"loss": 0.5539,
"step": 230700
},
{
"epoch": 74.59599224305107,
"grad_norm": 1.2278839349746704,
"learning_rate": 0.001,
"loss": 0.5626,
"step": 230800
},
{
"epoch": 74.62831286360698,
"grad_norm": 3.4744834899902344,
"learning_rate": 0.001,
"loss": 0.5678,
"step": 230900
},
{
"epoch": 74.6606334841629,
"grad_norm": 0.8104879856109619,
"learning_rate": 0.001,
"loss": 0.5721,
"step": 231000
},
{
"epoch": 74.69295410471881,
"grad_norm": 0.9081707000732422,
"learning_rate": 0.001,
"loss": 0.571,
"step": 231100
},
{
"epoch": 74.72527472527473,
"grad_norm": 1.0820246934890747,
"learning_rate": 0.001,
"loss": 0.5718,
"step": 231200
},
{
"epoch": 74.75759534583064,
"grad_norm": 1.142275333404541,
"learning_rate": 0.001,
"loss": 0.5665,
"step": 231300
},
{
"epoch": 74.78991596638656,
"grad_norm": 0.747945249080658,
"learning_rate": 0.001,
"loss": 0.5796,
"step": 231400
},
{
"epoch": 74.82223658694247,
"grad_norm": 1.0675921440124512,
"learning_rate": 0.001,
"loss": 0.5735,
"step": 231500
},
{
"epoch": 74.85455720749839,
"grad_norm": 1.234141230583191,
"learning_rate": 0.001,
"loss": 0.5672,
"step": 231600
},
{
"epoch": 74.8868778280543,
"grad_norm": 1.0947504043579102,
"learning_rate": 0.001,
"loss": 0.5764,
"step": 231700
},
{
"epoch": 74.91919844861022,
"grad_norm": 1.1369717121124268,
"learning_rate": 0.001,
"loss": 0.5814,
"step": 231800
},
{
"epoch": 74.95151906916612,
"grad_norm": 1.0353525876998901,
"learning_rate": 0.001,
"loss": 0.5731,
"step": 231900
},
{
"epoch": 74.98383968972205,
"grad_norm": 1.0561206340789795,
"learning_rate": 0.001,
"loss": 0.5865,
"step": 232000
},
{
"epoch": 75.01616031027795,
"grad_norm": 1.0930769443511963,
"learning_rate": 0.001,
"loss": 0.5193,
"step": 232100
},
{
"epoch": 75.04848093083388,
"grad_norm": 0.1771896481513977,
"learning_rate": 0.001,
"loss": 0.5157,
"step": 232200
},
{
"epoch": 75.08080155138978,
"grad_norm": 1.141357660293579,
"learning_rate": 0.001,
"loss": 0.5361,
"step": 232300
},
{
"epoch": 75.1131221719457,
"grad_norm": 0.5655999779701233,
"learning_rate": 0.001,
"loss": 0.5407,
"step": 232400
},
{
"epoch": 75.14544279250161,
"grad_norm": 0.7469679713249207,
"learning_rate": 0.001,
"loss": 0.5313,
"step": 232500
},
{
"epoch": 75.17776341305753,
"grad_norm": 0.29743582010269165,
"learning_rate": 0.001,
"loss": 0.5311,
"step": 232600
},
{
"epoch": 75.21008403361344,
"grad_norm": 0.19023145735263824,
"learning_rate": 0.001,
"loss": 0.5385,
"step": 232700
},
{
"epoch": 75.24240465416936,
"grad_norm": 0.530585765838623,
"learning_rate": 0.001,
"loss": 0.5263,
"step": 232800
},
{
"epoch": 75.27472527472527,
"grad_norm": 0.21099801361560822,
"learning_rate": 0.001,
"loss": 0.5281,
"step": 232900
},
{
"epoch": 75.30704589528119,
"grad_norm": 0.460519015789032,
"learning_rate": 0.001,
"loss": 0.5299,
"step": 233000
},
{
"epoch": 75.3393665158371,
"grad_norm": 0.435160756111145,
"learning_rate": 0.001,
"loss": 0.5394,
"step": 233100
},
{
"epoch": 75.37168713639302,
"grad_norm": 0.1967736929655075,
"learning_rate": 0.001,
"loss": 0.5525,
"step": 233200
},
{
"epoch": 75.40400775694893,
"grad_norm": 1.248533844947815,
"learning_rate": 0.001,
"loss": 0.5491,
"step": 233300
},
{
"epoch": 75.43632837750485,
"grad_norm": 0.20162171125411987,
"learning_rate": 0.001,
"loss": 0.5436,
"step": 233400
},
{
"epoch": 75.46864899806076,
"grad_norm": 0.5278852581977844,
"learning_rate": 0.001,
"loss": 0.5521,
"step": 233500
},
{
"epoch": 75.50096961861668,
"grad_norm": 0.3178282380104065,
"learning_rate": 0.001,
"loss": 0.5498,
"step": 233600
},
{
"epoch": 75.53329023917259,
"grad_norm": 0.2286689579486847,
"learning_rate": 0.001,
"loss": 0.5596,
"step": 233700
},
{
"epoch": 75.56561085972851,
"grad_norm": 0.18539604544639587,
"learning_rate": 0.001,
"loss": 0.5556,
"step": 233800
},
{
"epoch": 75.59793148028442,
"grad_norm": 0.07810671627521515,
"learning_rate": 0.001,
"loss": 0.5644,
"step": 233900
},
{
"epoch": 75.63025210084034,
"grad_norm": 0.5242074131965637,
"learning_rate": 0.001,
"loss": 0.5799,
"step": 234000
},
{
"epoch": 75.66257272139624,
"grad_norm": 0.18300612270832062,
"learning_rate": 0.001,
"loss": 0.5625,
"step": 234100
},
{
"epoch": 75.69489334195217,
"grad_norm": 0.3278989791870117,
"learning_rate": 0.001,
"loss": 0.5732,
"step": 234200
},
{
"epoch": 75.72721396250807,
"grad_norm": 0.913730800151825,
"learning_rate": 0.001,
"loss": 0.5817,
"step": 234300
},
{
"epoch": 75.759534583064,
"grad_norm": 0.3457682132720947,
"learning_rate": 0.001,
"loss": 0.5654,
"step": 234400
},
{
"epoch": 75.7918552036199,
"grad_norm": 0.430754154920578,
"learning_rate": 0.001,
"loss": 0.5754,
"step": 234500
},
{
"epoch": 75.82417582417582,
"grad_norm": 0.433608740568161,
"learning_rate": 0.001,
"loss": 0.5754,
"step": 234600
},
{
"epoch": 75.85649644473173,
"grad_norm": 0.19627642631530762,
"learning_rate": 0.001,
"loss": 0.5867,
"step": 234700
},
{
"epoch": 75.88881706528765,
"grad_norm": 0.11609924584627151,
"learning_rate": 0.001,
"loss": 0.5757,
"step": 234800
},
{
"epoch": 75.92113768584356,
"grad_norm": 0.7167121171951294,
"learning_rate": 0.001,
"loss": 0.5651,
"step": 234900
},
{
"epoch": 75.95345830639948,
"grad_norm": 0.4575307369232178,
"learning_rate": 0.001,
"loss": 0.5579,
"step": 235000
},
{
"epoch": 75.98577892695539,
"grad_norm": 0.15593650937080383,
"learning_rate": 0.001,
"loss": 0.5828,
"step": 235100
},
{
"epoch": 76.01809954751131,
"grad_norm": 1.413036584854126,
"learning_rate": 0.001,
"loss": 0.5612,
"step": 235200
},
{
"epoch": 76.05042016806723,
"grad_norm": 1.6599562168121338,
"learning_rate": 0.001,
"loss": 0.5069,
"step": 235300
},
{
"epoch": 76.08274078862314,
"grad_norm": 1.707543969154358,
"learning_rate": 0.001,
"loss": 0.5186,
"step": 235400
},
{
"epoch": 76.11506140917906,
"grad_norm": 1.6075223684310913,
"learning_rate": 0.001,
"loss": 0.5235,
"step": 235500
},
{
"epoch": 76.14738202973497,
"grad_norm": 1.6342514753341675,
"learning_rate": 0.001,
"loss": 0.5195,
"step": 235600
},
{
"epoch": 76.17970265029089,
"grad_norm": 2.0759618282318115,
"learning_rate": 0.001,
"loss": 0.5269,
"step": 235700
},
{
"epoch": 76.2120232708468,
"grad_norm": 1.9232611656188965,
"learning_rate": 0.001,
"loss": 0.5439,
"step": 235800
},
{
"epoch": 76.24434389140272,
"grad_norm": 1.413246989250183,
"learning_rate": 0.001,
"loss": 0.5142,
"step": 235900
},
{
"epoch": 76.27666451195863,
"grad_norm": 1.647636890411377,
"learning_rate": 0.001,
"loss": 0.5399,
"step": 236000
},
{
"epoch": 76.30898513251455,
"grad_norm": 1.4785175323486328,
"learning_rate": 0.001,
"loss": 0.5389,
"step": 236100
},
{
"epoch": 76.34130575307046,
"grad_norm": 1.6953210830688477,
"learning_rate": 0.001,
"loss": 0.5401,
"step": 236200
},
{
"epoch": 76.37362637362638,
"grad_norm": 1.4326900243759155,
"learning_rate": 0.001,
"loss": 0.5289,
"step": 236300
},
{
"epoch": 76.40594699418229,
"grad_norm": 1.54912531375885,
"learning_rate": 0.001,
"loss": 0.5453,
"step": 236400
},
{
"epoch": 76.43826761473821,
"grad_norm": 1.8962860107421875,
"learning_rate": 0.001,
"loss": 0.5377,
"step": 236500
},
{
"epoch": 76.47058823529412,
"grad_norm": 1.2893810272216797,
"learning_rate": 0.001,
"loss": 0.5392,
"step": 236600
},
{
"epoch": 76.50290885585004,
"grad_norm": 1.5049962997436523,
"learning_rate": 0.001,
"loss": 0.5516,
"step": 236700
},
{
"epoch": 76.53522947640595,
"grad_norm": 1.5820329189300537,
"learning_rate": 0.001,
"loss": 0.5614,
"step": 236800
},
{
"epoch": 76.56755009696187,
"grad_norm": 1.3666566610336304,
"learning_rate": 0.001,
"loss": 0.5431,
"step": 236900
},
{
"epoch": 76.59987071751777,
"grad_norm": 1.2955440282821655,
"learning_rate": 0.001,
"loss": 0.5587,
"step": 237000
},
{
"epoch": 76.6321913380737,
"grad_norm": 1.5790258646011353,
"learning_rate": 0.001,
"loss": 0.5448,
"step": 237100
},
{
"epoch": 76.6645119586296,
"grad_norm": 1.5983870029449463,
"learning_rate": 0.001,
"loss": 0.5588,
"step": 237200
},
{
"epoch": 76.69683257918552,
"grad_norm": 1.6741951704025269,
"learning_rate": 0.001,
"loss": 0.5467,
"step": 237300
},
{
"epoch": 76.72915319974143,
"grad_norm": 1.8480662107467651,
"learning_rate": 0.001,
"loss": 0.5635,
"step": 237400
},
{
"epoch": 76.76147382029735,
"grad_norm": 1.6002708673477173,
"learning_rate": 0.001,
"loss": 0.5654,
"step": 237500
},
{
"epoch": 76.79379444085326,
"grad_norm": 1.3160319328308105,
"learning_rate": 0.001,
"loss": 0.561,
"step": 237600
},
{
"epoch": 76.82611506140918,
"grad_norm": 1.7721960544586182,
"learning_rate": 0.001,
"loss": 0.5754,
"step": 237700
},
{
"epoch": 76.85843568196509,
"grad_norm": 1.5011405944824219,
"learning_rate": 0.001,
"loss": 0.5592,
"step": 237800
},
{
"epoch": 76.89075630252101,
"grad_norm": 1.771674633026123,
"learning_rate": 0.001,
"loss": 0.5749,
"step": 237900
},
{
"epoch": 76.92307692307692,
"grad_norm": 1.3502850532531738,
"learning_rate": 0.001,
"loss": 0.5739,
"step": 238000
},
{
"epoch": 76.95539754363284,
"grad_norm": 1.7754833698272705,
"learning_rate": 0.001,
"loss": 0.5752,
"step": 238100
},
{
"epoch": 76.98771816418875,
"grad_norm": 1.694458246231079,
"learning_rate": 0.001,
"loss": 0.5826,
"step": 238200
},
{
"epoch": 77.02003878474467,
"grad_norm": 1.396079421043396,
"learning_rate": 0.001,
"loss": 0.5537,
"step": 238300
},
{
"epoch": 77.05235940530058,
"grad_norm": 1.532308578491211,
"learning_rate": 0.001,
"loss": 0.5143,
"step": 238400
},
{
"epoch": 77.0846800258565,
"grad_norm": 1.5896692276000977,
"learning_rate": 0.001,
"loss": 0.5114,
"step": 238500
},
{
"epoch": 77.11700064641241,
"grad_norm": 1.4605082273483276,
"learning_rate": 0.001,
"loss": 0.512,
"step": 238600
},
{
"epoch": 77.14932126696833,
"grad_norm": 1.233646035194397,
"learning_rate": 0.001,
"loss": 0.515,
"step": 238700
},
{
"epoch": 77.18164188752424,
"grad_norm": 1.7335985898971558,
"learning_rate": 0.001,
"loss": 0.5205,
"step": 238800
},
{
"epoch": 77.21396250808016,
"grad_norm": 1.4268879890441895,
"learning_rate": 0.001,
"loss": 0.5347,
"step": 238900
},
{
"epoch": 77.24628312863607,
"grad_norm": 1.8350880146026611,
"learning_rate": 0.001,
"loss": 0.5227,
"step": 239000
},
{
"epoch": 77.27860374919199,
"grad_norm": 1.3111212253570557,
"learning_rate": 0.001,
"loss": 0.512,
"step": 239100
},
{
"epoch": 77.3109243697479,
"grad_norm": 1.2222663164138794,
"learning_rate": 0.001,
"loss": 0.5264,
"step": 239200
},
{
"epoch": 77.34324499030382,
"grad_norm": 1.6778181791305542,
"learning_rate": 0.001,
"loss": 0.525,
"step": 239300
},
{
"epoch": 77.37556561085972,
"grad_norm": 1.714513897895813,
"learning_rate": 0.001,
"loss": 0.53,
"step": 239400
},
{
"epoch": 77.40788623141565,
"grad_norm": 1.4410483837127686,
"learning_rate": 0.001,
"loss": 0.5352,
"step": 239500
},
{
"epoch": 77.44020685197155,
"grad_norm": 1.4495421648025513,
"learning_rate": 0.001,
"loss": 0.5429,
"step": 239600
},
{
"epoch": 77.47252747252747,
"grad_norm": 1.4672760963439941,
"learning_rate": 0.001,
"loss": 0.5369,
"step": 239700
},
{
"epoch": 77.50484809308338,
"grad_norm": 1.186777114868164,
"learning_rate": 0.001,
"loss": 0.5525,
"step": 239800
},
{
"epoch": 77.5371687136393,
"grad_norm": 1.2865841388702393,
"learning_rate": 0.001,
"loss": 0.5406,
"step": 239900
},
{
"epoch": 77.56948933419521,
"grad_norm": 1.964085340499878,
"learning_rate": 0.001,
"loss": 0.5471,
"step": 240000
},
{
"epoch": 77.60180995475113,
"grad_norm": 1.2692760229110718,
"learning_rate": 0.001,
"loss": 0.5511,
"step": 240100
},
{
"epoch": 77.63413057530704,
"grad_norm": 1.427521824836731,
"learning_rate": 0.001,
"loss": 0.5463,
"step": 240200
},
{
"epoch": 77.66645119586296,
"grad_norm": 1.5266354084014893,
"learning_rate": 0.001,
"loss": 0.5682,
"step": 240300
},
{
"epoch": 77.69877181641887,
"grad_norm": 1.3747498989105225,
"learning_rate": 0.001,
"loss": 0.5447,
"step": 240400
},
{
"epoch": 77.73109243697479,
"grad_norm": 1.8401381969451904,
"learning_rate": 0.001,
"loss": 0.5509,
"step": 240500
},
{
"epoch": 77.7634130575307,
"grad_norm": 1.593461513519287,
"learning_rate": 0.001,
"loss": 0.569,
"step": 240600
},
{
"epoch": 77.79573367808662,
"grad_norm": 1.2960352897644043,
"learning_rate": 0.001,
"loss": 0.5557,
"step": 240700
},
{
"epoch": 77.82805429864253,
"grad_norm": 1.2029883861541748,
"learning_rate": 0.001,
"loss": 0.5646,
"step": 240800
},
{
"epoch": 77.86037491919845,
"grad_norm": 1.672709584236145,
"learning_rate": 0.001,
"loss": 0.5518,
"step": 240900
},
{
"epoch": 77.89269553975436,
"grad_norm": 1.5223016738891602,
"learning_rate": 0.001,
"loss": 0.5597,
"step": 241000
},
{
"epoch": 77.92501616031028,
"grad_norm": 1.2413277626037598,
"learning_rate": 0.001,
"loss": 0.5648,
"step": 241100
},
{
"epoch": 77.95733678086619,
"grad_norm": 1.4993832111358643,
"learning_rate": 0.001,
"loss": 0.571,
"step": 241200
},
{
"epoch": 77.98965740142211,
"grad_norm": 1.328671932220459,
"learning_rate": 0.001,
"loss": 0.5648,
"step": 241300
},
{
"epoch": 78.02197802197803,
"grad_norm": 1.6180659532546997,
"learning_rate": 0.001,
"loss": 0.5366,
"step": 241400
},
{
"epoch": 78.05429864253394,
"grad_norm": 1.5706161260604858,
"learning_rate": 0.001,
"loss": 0.5075,
"step": 241500
},
{
"epoch": 78.08661926308986,
"grad_norm": 1.5466357469558716,
"learning_rate": 0.001,
"loss": 0.4995,
"step": 241600
},
{
"epoch": 78.11893988364577,
"grad_norm": 1.4233200550079346,
"learning_rate": 0.001,
"loss": 0.507,
"step": 241700
},
{
"epoch": 78.15126050420169,
"grad_norm": 3.640650749206543,
"learning_rate": 0.001,
"loss": 0.5213,
"step": 241800
},
{
"epoch": 78.1835811247576,
"grad_norm": 1.5065131187438965,
"learning_rate": 0.001,
"loss": 0.5084,
"step": 241900
},
{
"epoch": 78.21590174531352,
"grad_norm": 1.3846442699432373,
"learning_rate": 0.001,
"loss": 0.5188,
"step": 242000
},
{
"epoch": 78.24822236586942,
"grad_norm": 1.592302918434143,
"learning_rate": 0.001,
"loss": 0.5206,
"step": 242100
},
{
"epoch": 78.28054298642535,
"grad_norm": 1.4627457857131958,
"learning_rate": 0.001,
"loss": 0.5306,
"step": 242200
},
{
"epoch": 78.31286360698125,
"grad_norm": 1.4785966873168945,
"learning_rate": 0.001,
"loss": 0.5259,
"step": 242300
},
{
"epoch": 78.34518422753717,
"grad_norm": 1.6304181814193726,
"learning_rate": 0.001,
"loss": 0.5293,
"step": 242400
},
{
"epoch": 78.37750484809308,
"grad_norm": 1.4955110549926758,
"learning_rate": 0.001,
"loss": 0.5267,
"step": 242500
},
{
"epoch": 78.409825468649,
"grad_norm": 1.3782621622085571,
"learning_rate": 0.001,
"loss": 0.5309,
"step": 242600
},
{
"epoch": 78.44214608920491,
"grad_norm": 1.5828373432159424,
"learning_rate": 0.001,
"loss": 0.5314,
"step": 242700
},
{
"epoch": 78.47446670976083,
"grad_norm": 2.112483024597168,
"learning_rate": 0.001,
"loss": 0.5381,
"step": 242800
},
{
"epoch": 78.50678733031674,
"grad_norm": 1.565582036972046,
"learning_rate": 0.001,
"loss": 0.5339,
"step": 242900
},
{
"epoch": 78.53910795087266,
"grad_norm": 1.747968316078186,
"learning_rate": 0.001,
"loss": 0.5372,
"step": 243000
},
{
"epoch": 78.57142857142857,
"grad_norm": 1.6491109132766724,
"learning_rate": 0.001,
"loss": 0.5333,
"step": 243100
},
{
"epoch": 78.60374919198449,
"grad_norm": 1.4201022386550903,
"learning_rate": 0.001,
"loss": 0.5317,
"step": 243200
},
{
"epoch": 78.6360698125404,
"grad_norm": 1.7670608758926392,
"learning_rate": 0.001,
"loss": 0.5339,
"step": 243300
},
{
"epoch": 78.66839043309632,
"grad_norm": 1.3140902519226074,
"learning_rate": 0.001,
"loss": 0.539,
"step": 243400
},
{
"epoch": 78.70071105365223,
"grad_norm": 1.4319005012512207,
"learning_rate": 0.001,
"loss": 0.5504,
"step": 243500
},
{
"epoch": 78.73303167420815,
"grad_norm": 1.4455088376998901,
"learning_rate": 0.001,
"loss": 0.5456,
"step": 243600
},
{
"epoch": 78.76535229476406,
"grad_norm": 1.309688687324524,
"learning_rate": 0.001,
"loss": 0.557,
"step": 243700
},
{
"epoch": 78.79767291531998,
"grad_norm": 1.356952428817749,
"learning_rate": 0.001,
"loss": 0.5575,
"step": 243800
},
{
"epoch": 78.82999353587589,
"grad_norm": 1.2032933235168457,
"learning_rate": 0.001,
"loss": 0.5477,
"step": 243900
},
{
"epoch": 78.86231415643181,
"grad_norm": 1.314985990524292,
"learning_rate": 0.001,
"loss": 0.554,
"step": 244000
},
{
"epoch": 78.89463477698771,
"grad_norm": 1.465287685394287,
"learning_rate": 0.001,
"loss": 0.5621,
"step": 244100
},
{
"epoch": 78.92695539754364,
"grad_norm": 1.4359557628631592,
"learning_rate": 0.001,
"loss": 0.5535,
"step": 244200
},
{
"epoch": 78.95927601809954,
"grad_norm": 1.966088056564331,
"learning_rate": 0.001,
"loss": 0.5639,
"step": 244300
},
{
"epoch": 78.99159663865547,
"grad_norm": 1.3312709331512451,
"learning_rate": 0.001,
"loss": 0.5657,
"step": 244400
},
{
"epoch": 79.02391725921137,
"grad_norm": 1.2351542711257935,
"learning_rate": 0.001,
"loss": 0.5102,
"step": 244500
},
{
"epoch": 79.0562378797673,
"grad_norm": 1.573776364326477,
"learning_rate": 0.001,
"loss": 0.4925,
"step": 244600
},
{
"epoch": 79.0885585003232,
"grad_norm": 1.472129225730896,
"learning_rate": 0.001,
"loss": 0.4985,
"step": 244700
},
{
"epoch": 79.12087912087912,
"grad_norm": 1.5297859907150269,
"learning_rate": 0.001,
"loss": 0.5091,
"step": 244800
},
{
"epoch": 79.15319974143503,
"grad_norm": 1.306641697883606,
"learning_rate": 0.001,
"loss": 0.5116,
"step": 244900
},
{
"epoch": 79.18552036199095,
"grad_norm": 1.8457226753234863,
"learning_rate": 0.001,
"loss": 0.5082,
"step": 245000
},
{
"epoch": 79.21784098254686,
"grad_norm": 1.9189332723617554,
"learning_rate": 0.001,
"loss": 0.5053,
"step": 245100
},
{
"epoch": 79.25016160310278,
"grad_norm": 1.3402392864227295,
"learning_rate": 0.001,
"loss": 0.5216,
"step": 245200
},
{
"epoch": 79.28248222365869,
"grad_norm": 1.5878645181655884,
"learning_rate": 0.001,
"loss": 0.5042,
"step": 245300
},
{
"epoch": 79.31480284421461,
"grad_norm": 1.192588448524475,
"learning_rate": 0.001,
"loss": 0.5133,
"step": 245400
},
{
"epoch": 79.34712346477052,
"grad_norm": 1.310906171798706,
"learning_rate": 0.001,
"loss": 0.522,
"step": 245500
},
{
"epoch": 79.37944408532644,
"grad_norm": 1.6999276876449585,
"learning_rate": 0.001,
"loss": 0.5244,
"step": 245600
},
{
"epoch": 79.41176470588235,
"grad_norm": 1.4118130207061768,
"learning_rate": 0.001,
"loss": 0.5215,
"step": 245700
},
{
"epoch": 79.44408532643827,
"grad_norm": 1.5291193723678589,
"learning_rate": 0.001,
"loss": 0.5291,
"step": 245800
},
{
"epoch": 79.47640594699418,
"grad_norm": 1.4727531671524048,
"learning_rate": 0.001,
"loss": 0.5372,
"step": 245900
},
{
"epoch": 79.5087265675501,
"grad_norm": 1.576590657234192,
"learning_rate": 0.001,
"loss": 0.5243,
"step": 246000
},
{
"epoch": 79.541047188106,
"grad_norm": 1.39322030544281,
"learning_rate": 0.001,
"loss": 0.5249,
"step": 246100
},
{
"epoch": 79.57336780866193,
"grad_norm": 1.2230325937271118,
"learning_rate": 0.001,
"loss": 0.5388,
"step": 246200
},
{
"epoch": 79.60568842921784,
"grad_norm": 1.5058960914611816,
"learning_rate": 0.001,
"loss": 0.5271,
"step": 246300
},
{
"epoch": 79.63800904977376,
"grad_norm": 1.4781763553619385,
"learning_rate": 0.001,
"loss": 0.5506,
"step": 246400
},
{
"epoch": 79.67032967032966,
"grad_norm": 1.458723783493042,
"learning_rate": 0.001,
"loss": 0.5491,
"step": 246500
},
{
"epoch": 79.70265029088559,
"grad_norm": 1.423794150352478,
"learning_rate": 0.001,
"loss": 0.5513,
"step": 246600
},
{
"epoch": 79.7349709114415,
"grad_norm": 1.9729347229003906,
"learning_rate": 0.001,
"loss": 0.5496,
"step": 246700
},
{
"epoch": 79.76729153199742,
"grad_norm": 1.5807349681854248,
"learning_rate": 0.001,
"loss": 0.5364,
"step": 246800
},
{
"epoch": 79.79961215255332,
"grad_norm": 1.4756208658218384,
"learning_rate": 0.001,
"loss": 0.5361,
"step": 246900
},
{
"epoch": 79.83193277310924,
"grad_norm": 2.0747814178466797,
"learning_rate": 0.001,
"loss": 0.5479,
"step": 247000
},
{
"epoch": 79.86425339366515,
"grad_norm": 1.1575489044189453,
"learning_rate": 0.001,
"loss": 0.5413,
"step": 247100
},
{
"epoch": 79.89657401422107,
"grad_norm": 1.3140865564346313,
"learning_rate": 0.001,
"loss": 0.561,
"step": 247200
},
{
"epoch": 79.92889463477698,
"grad_norm": 1.5108518600463867,
"learning_rate": 0.001,
"loss": 0.5559,
"step": 247300
},
{
"epoch": 79.9612152553329,
"grad_norm": 1.3009579181671143,
"learning_rate": 0.001,
"loss": 0.5639,
"step": 247400
},
{
"epoch": 79.99353587588882,
"grad_norm": 1.4407790899276733,
"learning_rate": 0.001,
"loss": 0.5556,
"step": 247500
},
{
"epoch": 80.02585649644473,
"grad_norm": 4.573031902313232,
"learning_rate": 0.001,
"loss": 0.5021,
"step": 247600
},
{
"epoch": 80.05817711700065,
"grad_norm": 1.7188622951507568,
"learning_rate": 0.001,
"loss": 0.4948,
"step": 247700
},
{
"epoch": 80.09049773755656,
"grad_norm": 1.1180098056793213,
"learning_rate": 0.001,
"loss": 0.4918,
"step": 247800
},
{
"epoch": 80.12281835811248,
"grad_norm": 1.9229891300201416,
"learning_rate": 0.001,
"loss": 0.5011,
"step": 247900
},
{
"epoch": 80.15513897866839,
"grad_norm": 1.5299087762832642,
"learning_rate": 0.001,
"loss": 0.4973,
"step": 248000
},
{
"epoch": 80.18745959922431,
"grad_norm": 1.6426825523376465,
"learning_rate": 0.001,
"loss": 0.5023,
"step": 248100
},
{
"epoch": 80.21978021978022,
"grad_norm": 1.409515142440796,
"learning_rate": 0.001,
"loss": 0.5165,
"step": 248200
},
{
"epoch": 80.25210084033614,
"grad_norm": 1.6629232168197632,
"learning_rate": 0.001,
"loss": 0.5056,
"step": 248300
},
{
"epoch": 80.28442146089205,
"grad_norm": 1.4850993156433105,
"learning_rate": 0.001,
"loss": 0.5186,
"step": 248400
},
{
"epoch": 80.31674208144797,
"grad_norm": 1.3737995624542236,
"learning_rate": 0.001,
"loss": 0.52,
"step": 248500
},
{
"epoch": 80.34906270200388,
"grad_norm": 1.550413966178894,
"learning_rate": 0.001,
"loss": 0.5109,
"step": 248600
},
{
"epoch": 80.3813833225598,
"grad_norm": 1.4993878602981567,
"learning_rate": 0.001,
"loss": 0.521,
"step": 248700
},
{
"epoch": 80.4137039431157,
"grad_norm": 1.1930394172668457,
"learning_rate": 0.001,
"loss": 0.5255,
"step": 248800
},
{
"epoch": 80.44602456367163,
"grad_norm": 1.3800441026687622,
"learning_rate": 0.001,
"loss": 0.525,
"step": 248900
},
{
"epoch": 80.47834518422754,
"grad_norm": 1.4232189655303955,
"learning_rate": 0.001,
"loss": 0.522,
"step": 249000
},
{
"epoch": 80.51066580478346,
"grad_norm": 1.3573640584945679,
"learning_rate": 0.001,
"loss": 0.524,
"step": 249100
},
{
"epoch": 80.54298642533936,
"grad_norm": 1.6436176300048828,
"learning_rate": 0.001,
"loss": 0.5277,
"step": 249200
},
{
"epoch": 80.57530704589529,
"grad_norm": 1.6628504991531372,
"learning_rate": 0.001,
"loss": 0.5448,
"step": 249300
},
{
"epoch": 80.6076276664512,
"grad_norm": 1.403755784034729,
"learning_rate": 0.001,
"loss": 0.5198,
"step": 249400
},
{
"epoch": 80.63994828700712,
"grad_norm": 1.6308602094650269,
"learning_rate": 0.001,
"loss": 0.5291,
"step": 249500
},
{
"epoch": 80.67226890756302,
"grad_norm": 1.2523367404937744,
"learning_rate": 0.001,
"loss": 0.5306,
"step": 249600
},
{
"epoch": 80.70458952811894,
"grad_norm": 1.445035457611084,
"learning_rate": 0.001,
"loss": 0.542,
"step": 249700
},
{
"epoch": 80.73691014867485,
"grad_norm": 1.7217998504638672,
"learning_rate": 0.001,
"loss": 0.5308,
"step": 249800
},
{
"epoch": 80.76923076923077,
"grad_norm": 1.1785924434661865,
"learning_rate": 0.001,
"loss": 0.5486,
"step": 249900
},
{
"epoch": 80.80155138978668,
"grad_norm": 1.978576898574829,
"learning_rate": 0.001,
"loss": 0.5426,
"step": 250000
},
{
"epoch": 80.8338720103426,
"grad_norm": 1.4352853298187256,
"learning_rate": 0.001,
"loss": 0.5462,
"step": 250100
},
{
"epoch": 80.86619263089851,
"grad_norm": 1.4232949018478394,
"learning_rate": 0.001,
"loss": 0.5443,
"step": 250200
},
{
"epoch": 80.89851325145443,
"grad_norm": 2.1108646392822266,
"learning_rate": 0.001,
"loss": 0.5451,
"step": 250300
},
{
"epoch": 80.93083387201034,
"grad_norm": 1.9700771570205688,
"learning_rate": 0.001,
"loss": 0.5406,
"step": 250400
},
{
"epoch": 80.96315449256626,
"grad_norm": 1.5097615718841553,
"learning_rate": 0.001,
"loss": 0.5569,
"step": 250500
},
{
"epoch": 80.99547511312217,
"grad_norm": 1.2994375228881836,
"learning_rate": 0.001,
"loss": 0.5285,
"step": 250600
},
{
"epoch": 81.02779573367809,
"grad_norm": 1.177480697631836,
"learning_rate": 0.001,
"loss": 0.4835,
"step": 250700
},
{
"epoch": 81.060116354234,
"grad_norm": 1.5064599514007568,
"learning_rate": 0.001,
"loss": 0.488,
"step": 250800
},
{
"epoch": 81.09243697478992,
"grad_norm": 1.557698369026184,
"learning_rate": 0.001,
"loss": 0.49,
"step": 250900
},
{
"epoch": 81.12475759534583,
"grad_norm": 1.3125524520874023,
"learning_rate": 0.001,
"loss": 0.5003,
"step": 251000
},
{
"epoch": 81.15707821590175,
"grad_norm": 1.453548550605774,
"learning_rate": 0.001,
"loss": 0.5017,
"step": 251100
},
{
"epoch": 81.18939883645766,
"grad_norm": 1.3550654649734497,
"learning_rate": 0.001,
"loss": 0.5051,
"step": 251200
},
{
"epoch": 81.22171945701358,
"grad_norm": 1.2782098054885864,
"learning_rate": 0.001,
"loss": 0.4999,
"step": 251300
},
{
"epoch": 81.25404007756948,
"grad_norm": 1.5780470371246338,
"learning_rate": 0.001,
"loss": 0.5014,
"step": 251400
},
{
"epoch": 81.2863606981254,
"grad_norm": 1.6237266063690186,
"learning_rate": 0.001,
"loss": 0.5,
"step": 251500
},
{
"epoch": 81.31868131868131,
"grad_norm": 1.1814064979553223,
"learning_rate": 0.001,
"loss": 0.5206,
"step": 251600
},
{
"epoch": 81.35100193923724,
"grad_norm": 1.1381033658981323,
"learning_rate": 0.001,
"loss": 0.4945,
"step": 251700
},
{
"epoch": 81.38332255979314,
"grad_norm": 1.3088628053665161,
"learning_rate": 0.001,
"loss": 0.505,
"step": 251800
},
{
"epoch": 81.41564318034906,
"grad_norm": 1.3342161178588867,
"learning_rate": 0.001,
"loss": 0.5175,
"step": 251900
},
{
"epoch": 81.44796380090497,
"grad_norm": 1.3151096105575562,
"learning_rate": 0.001,
"loss": 0.5176,
"step": 252000
},
{
"epoch": 81.4802844214609,
"grad_norm": 1.4550706148147583,
"learning_rate": 0.001,
"loss": 0.5305,
"step": 252100
},
{
"epoch": 81.5126050420168,
"grad_norm": 1.0697120428085327,
"learning_rate": 0.001,
"loss": 0.5091,
"step": 252200
},
{
"epoch": 81.54492566257272,
"grad_norm": 1.358964443206787,
"learning_rate": 0.001,
"loss": 0.5143,
"step": 252300
},
{
"epoch": 81.57724628312863,
"grad_norm": 1.603599190711975,
"learning_rate": 0.001,
"loss": 0.5222,
"step": 252400
},
{
"epoch": 81.60956690368455,
"grad_norm": 1.2112364768981934,
"learning_rate": 0.001,
"loss": 0.5381,
"step": 252500
},
{
"epoch": 81.64188752424046,
"grad_norm": 1.5382639169692993,
"learning_rate": 0.001,
"loss": 0.52,
"step": 252600
},
{
"epoch": 81.67420814479638,
"grad_norm": 1.3610308170318604,
"learning_rate": 0.001,
"loss": 0.5281,
"step": 252700
},
{
"epoch": 81.70652876535229,
"grad_norm": 1.3466453552246094,
"learning_rate": 0.001,
"loss": 0.5268,
"step": 252800
},
{
"epoch": 81.73884938590821,
"grad_norm": 1.0718330144882202,
"learning_rate": 0.001,
"loss": 0.539,
"step": 252900
},
{
"epoch": 81.77117000646412,
"grad_norm": 1.501024603843689,
"learning_rate": 0.001,
"loss": 0.5298,
"step": 253000
},
{
"epoch": 81.80349062702004,
"grad_norm": 1.89411461353302,
"learning_rate": 0.001,
"loss": 0.5463,
"step": 253100
},
{
"epoch": 81.83581124757595,
"grad_norm": 1.5171462297439575,
"learning_rate": 0.001,
"loss": 0.5408,
"step": 253200
},
{
"epoch": 81.86813186813187,
"grad_norm": 1.353501796722412,
"learning_rate": 0.001,
"loss": 0.5524,
"step": 253300
},
{
"epoch": 81.90045248868778,
"grad_norm": 1.2344218492507935,
"learning_rate": 0.001,
"loss": 0.5385,
"step": 253400
},
{
"epoch": 81.9327731092437,
"grad_norm": 1.0975984334945679,
"learning_rate": 0.001,
"loss": 0.5289,
"step": 253500
},
{
"epoch": 81.9650937297996,
"grad_norm": 1.1510794162750244,
"learning_rate": 0.001,
"loss": 0.5447,
"step": 253600
},
{
"epoch": 81.99741435035553,
"grad_norm": 1.4088082313537598,
"learning_rate": 0.001,
"loss": 0.5271,
"step": 253700
},
{
"epoch": 82.02973497091145,
"grad_norm": 1.0249762535095215,
"learning_rate": 0.001,
"loss": 0.4862,
"step": 253800
},
{
"epoch": 82.06205559146736,
"grad_norm": 1.7136085033416748,
"learning_rate": 0.001,
"loss": 0.4803,
"step": 253900
},
{
"epoch": 82.09437621202328,
"grad_norm": 1.3353995084762573,
"learning_rate": 0.001,
"loss": 0.4792,
"step": 254000
},
{
"epoch": 82.12669683257919,
"grad_norm": 1.2580580711364746,
"learning_rate": 0.001,
"loss": 0.4859,
"step": 254100
},
{
"epoch": 82.1590174531351,
"grad_norm": 1.9082170724868774,
"learning_rate": 0.001,
"loss": 0.4976,
"step": 254200
},
{
"epoch": 82.19133807369101,
"grad_norm": 0.9331086277961731,
"learning_rate": 0.001,
"loss": 0.4908,
"step": 254300
},
{
"epoch": 82.22365869424694,
"grad_norm": 1.1894890069961548,
"learning_rate": 0.001,
"loss": 0.5025,
"step": 254400
},
{
"epoch": 82.25597931480284,
"grad_norm": 2.082413673400879,
"learning_rate": 0.001,
"loss": 0.4951,
"step": 254500
},
{
"epoch": 82.28829993535876,
"grad_norm": 1.085007905960083,
"learning_rate": 0.001,
"loss": 0.5051,
"step": 254600
},
{
"epoch": 82.32062055591467,
"grad_norm": 4.038010120391846,
"learning_rate": 0.001,
"loss": 0.4978,
"step": 254700
},
{
"epoch": 82.3529411764706,
"grad_norm": 0.947473406791687,
"learning_rate": 0.001,
"loss": 0.504,
"step": 254800
},
{
"epoch": 82.3852617970265,
"grad_norm": 0.9688652157783508,
"learning_rate": 0.001,
"loss": 0.515,
"step": 254900
},
{
"epoch": 82.41758241758242,
"grad_norm": 1.1633764505386353,
"learning_rate": 0.001,
"loss": 0.516,
"step": 255000
},
{
"epoch": 82.44990303813833,
"grad_norm": 1.1820343732833862,
"learning_rate": 0.001,
"loss": 0.5087,
"step": 255100
},
{
"epoch": 82.48222365869425,
"grad_norm": 0.9853097796440125,
"learning_rate": 0.001,
"loss": 0.5236,
"step": 255200
},
{
"epoch": 82.51454427925016,
"grad_norm": 1.0773764848709106,
"learning_rate": 0.001,
"loss": 0.5222,
"step": 255300
},
{
"epoch": 82.54686489980608,
"grad_norm": 1.3068816661834717,
"learning_rate": 0.001,
"loss": 0.5314,
"step": 255400
},
{
"epoch": 82.57918552036199,
"grad_norm": 1.1652721166610718,
"learning_rate": 0.001,
"loss": 0.5161,
"step": 255500
},
{
"epoch": 82.61150614091791,
"grad_norm": 1.4878023862838745,
"learning_rate": 0.001,
"loss": 0.5206,
"step": 255600
},
{
"epoch": 82.64382676147382,
"grad_norm": 0.9717551469802856,
"learning_rate": 0.001,
"loss": 0.5143,
"step": 255700
},
{
"epoch": 82.67614738202974,
"grad_norm": 0.9180939793586731,
"learning_rate": 0.001,
"loss": 0.5245,
"step": 255800
},
{
"epoch": 82.70846800258565,
"grad_norm": 1.2813619375228882,
"learning_rate": 0.001,
"loss": 0.5479,
"step": 255900
},
{
"epoch": 82.74078862314157,
"grad_norm": 1.2449421882629395,
"learning_rate": 0.001,
"loss": 0.5229,
"step": 256000
},
{
"epoch": 82.77310924369748,
"grad_norm": 1.3184751272201538,
"learning_rate": 0.001,
"loss": 0.5303,
"step": 256100
},
{
"epoch": 82.8054298642534,
"grad_norm": 0.9855522513389587,
"learning_rate": 0.001,
"loss": 0.5216,
"step": 256200
},
{
"epoch": 82.8377504848093,
"grad_norm": 1.078248143196106,
"learning_rate": 0.001,
"loss": 0.5323,
"step": 256300
},
{
"epoch": 82.87007110536523,
"grad_norm": 1.2535841464996338,
"learning_rate": 0.001,
"loss": 0.5222,
"step": 256400
},
{
"epoch": 82.90239172592113,
"grad_norm": 1.2945135831832886,
"learning_rate": 0.001,
"loss": 0.5212,
"step": 256500
},
{
"epoch": 82.93471234647706,
"grad_norm": 1.130628228187561,
"learning_rate": 0.001,
"loss": 0.5398,
"step": 256600
},
{
"epoch": 82.96703296703296,
"grad_norm": 1.1353565454483032,
"learning_rate": 0.001,
"loss": 0.5363,
"step": 256700
},
{
"epoch": 82.99935358758889,
"grad_norm": 0.877821683883667,
"learning_rate": 0.001,
"loss": 0.5173,
"step": 256800
},
{
"epoch": 83.03167420814479,
"grad_norm": 1.0781742334365845,
"learning_rate": 0.001,
"loss": 0.4592,
"step": 256900
},
{
"epoch": 83.06399482870071,
"grad_norm": 0.8791617751121521,
"learning_rate": 0.001,
"loss": 0.4768,
"step": 257000
},
{
"epoch": 83.09631544925662,
"grad_norm": 0.7136475443840027,
"learning_rate": 0.001,
"loss": 0.4807,
"step": 257100
},
{
"epoch": 83.12863606981254,
"grad_norm": 1.038556456565857,
"learning_rate": 0.001,
"loss": 0.4869,
"step": 257200
},
{
"epoch": 83.16095669036845,
"grad_norm": 0.8827602863311768,
"learning_rate": 0.001,
"loss": 0.4861,
"step": 257300
},
{
"epoch": 83.19327731092437,
"grad_norm": 0.8706760406494141,
"learning_rate": 0.001,
"loss": 0.5013,
"step": 257400
},
{
"epoch": 83.22559793148028,
"grad_norm": 0.7551383376121521,
"learning_rate": 0.001,
"loss": 0.4835,
"step": 257500
},
{
"epoch": 83.2579185520362,
"grad_norm": 0.6588653922080994,
"learning_rate": 0.001,
"loss": 0.4866,
"step": 257600
},
{
"epoch": 83.29023917259211,
"grad_norm": 0.7341609597206116,
"learning_rate": 0.001,
"loss": 0.4937,
"step": 257700
},
{
"epoch": 83.32255979314803,
"grad_norm": 0.7104642391204834,
"learning_rate": 0.001,
"loss": 0.4991,
"step": 257800
},
{
"epoch": 83.35488041370394,
"grad_norm": 1.0989702939987183,
"learning_rate": 0.001,
"loss": 0.4976,
"step": 257900
},
{
"epoch": 83.38720103425986,
"grad_norm": 0.7935035228729248,
"learning_rate": 0.001,
"loss": 0.5084,
"step": 258000
},
{
"epoch": 83.41952165481577,
"grad_norm": 0.5782608985900879,
"learning_rate": 0.001,
"loss": 0.5151,
"step": 258100
},
{
"epoch": 83.45184227537169,
"grad_norm": 0.8032583594322205,
"learning_rate": 0.001,
"loss": 0.5042,
"step": 258200
},
{
"epoch": 83.4841628959276,
"grad_norm": 0.7582470774650574,
"learning_rate": 0.001,
"loss": 0.514,
"step": 258300
},
{
"epoch": 83.51648351648352,
"grad_norm": 0.8967429995536804,
"learning_rate": 0.001,
"loss": 0.5185,
"step": 258400
},
{
"epoch": 83.54880413703943,
"grad_norm": 0.7805576920509338,
"learning_rate": 0.001,
"loss": 0.5115,
"step": 258500
},
{
"epoch": 83.58112475759535,
"grad_norm": 1.0321794748306274,
"learning_rate": 0.001,
"loss": 0.5181,
"step": 258600
},
{
"epoch": 83.61344537815125,
"grad_norm": 0.6753956079483032,
"learning_rate": 0.001,
"loss": 0.514,
"step": 258700
},
{
"epoch": 83.64576599870718,
"grad_norm": 0.8881549835205078,
"learning_rate": 0.001,
"loss": 0.5151,
"step": 258800
},
{
"epoch": 83.67808661926308,
"grad_norm": 0.6611385941505432,
"learning_rate": 0.001,
"loss": 0.5282,
"step": 258900
},
{
"epoch": 83.710407239819,
"grad_norm": 0.7606088519096375,
"learning_rate": 0.001,
"loss": 0.5317,
"step": 259000
},
{
"epoch": 83.74272786037491,
"grad_norm": 0.8321859836578369,
"learning_rate": 0.001,
"loss": 0.5248,
"step": 259100
},
{
"epoch": 83.77504848093083,
"grad_norm": 0.7359297275543213,
"learning_rate": 0.001,
"loss": 0.5207,
"step": 259200
},
{
"epoch": 83.80736910148674,
"grad_norm": 0.6839292049407959,
"learning_rate": 0.001,
"loss": 0.5175,
"step": 259300
},
{
"epoch": 83.83968972204266,
"grad_norm": 0.5971709489822388,
"learning_rate": 0.001,
"loss": 0.5355,
"step": 259400
},
{
"epoch": 83.87201034259857,
"grad_norm": 0.5360345244407654,
"learning_rate": 0.001,
"loss": 0.5233,
"step": 259500
},
{
"epoch": 83.9043309631545,
"grad_norm": 0.551607608795166,
"learning_rate": 0.001,
"loss": 0.5298,
"step": 259600
},
{
"epoch": 83.9366515837104,
"grad_norm": 0.7951961755752563,
"learning_rate": 0.001,
"loss": 0.5336,
"step": 259700
},
{
"epoch": 83.96897220426632,
"grad_norm": 0.7808154821395874,
"learning_rate": 0.001,
"loss": 0.5337,
"step": 259800
},
{
"epoch": 84.00129282482224,
"grad_norm": 1.4088801145553589,
"learning_rate": 0.001,
"loss": 0.5438,
"step": 259900
},
{
"epoch": 84.03361344537815,
"grad_norm": 1.5833579301834106,
"learning_rate": 0.001,
"loss": 0.4798,
"step": 260000
},
{
"epoch": 84.06593406593407,
"grad_norm": 1.3921915292739868,
"learning_rate": 0.001,
"loss": 0.4727,
"step": 260100
},
{
"epoch": 84.09825468648998,
"grad_norm": 1.3180691003799438,
"learning_rate": 0.001,
"loss": 0.4633,
"step": 260200
},
{
"epoch": 84.1305753070459,
"grad_norm": 1.685084581375122,
"learning_rate": 0.001,
"loss": 0.4799,
"step": 260300
},
{
"epoch": 84.16289592760181,
"grad_norm": 1.4940513372421265,
"learning_rate": 0.001,
"loss": 0.4854,
"step": 260400
},
{
"epoch": 84.19521654815773,
"grad_norm": 1.7479921579360962,
"learning_rate": 0.001,
"loss": 0.4912,
"step": 260500
},
{
"epoch": 84.22753716871364,
"grad_norm": 2.0624806880950928,
"learning_rate": 0.001,
"loss": 0.4887,
"step": 260600
},
{
"epoch": 84.25985778926956,
"grad_norm": 1.9271317720413208,
"learning_rate": 0.001,
"loss": 0.4773,
"step": 260700
},
{
"epoch": 84.29217840982547,
"grad_norm": 4.249568939208984,
"learning_rate": 0.001,
"loss": 0.4867,
"step": 260800
},
{
"epoch": 84.32449903038139,
"grad_norm": 1.7943352460861206,
"learning_rate": 0.001,
"loss": 0.5005,
"step": 260900
},
{
"epoch": 84.3568196509373,
"grad_norm": 2.092435359954834,
"learning_rate": 0.001,
"loss": 0.5074,
"step": 261000
},
{
"epoch": 84.38914027149322,
"grad_norm": 1.368276834487915,
"learning_rate": 0.001,
"loss": 0.4962,
"step": 261100
},
{
"epoch": 84.42146089204913,
"grad_norm": 1.6893104314804077,
"learning_rate": 0.001,
"loss": 0.503,
"step": 261200
},
{
"epoch": 84.45378151260505,
"grad_norm": 1.8402940034866333,
"learning_rate": 0.001,
"loss": 0.5013,
"step": 261300
},
{
"epoch": 84.48610213316095,
"grad_norm": 1.8994182348251343,
"learning_rate": 0.001,
"loss": 0.5002,
"step": 261400
},
{
"epoch": 84.51842275371688,
"grad_norm": 1.7885549068450928,
"learning_rate": 0.001,
"loss": 0.5087,
"step": 261500
},
{
"epoch": 84.55074337427278,
"grad_norm": 2.087735652923584,
"learning_rate": 0.001,
"loss": 0.4951,
"step": 261600
},
{
"epoch": 84.5830639948287,
"grad_norm": 1.4527853727340698,
"learning_rate": 0.001,
"loss": 0.5014,
"step": 261700
},
{
"epoch": 84.61538461538461,
"grad_norm": 1.629301905632019,
"learning_rate": 0.001,
"loss": 0.5102,
"step": 261800
},
{
"epoch": 84.64770523594053,
"grad_norm": 1.9578490257263184,
"learning_rate": 0.001,
"loss": 0.5024,
"step": 261900
},
{
"epoch": 84.68002585649644,
"grad_norm": 2.0275087356567383,
"learning_rate": 0.001,
"loss": 0.5099,
"step": 262000
},
{
"epoch": 84.71234647705236,
"grad_norm": 1.6318986415863037,
"learning_rate": 0.001,
"loss": 0.5113,
"step": 262100
},
{
"epoch": 84.74466709760827,
"grad_norm": 1.8131710290908813,
"learning_rate": 0.001,
"loss": 0.5119,
"step": 262200
},
{
"epoch": 84.7769877181642,
"grad_norm": 1.5382099151611328,
"learning_rate": 0.001,
"loss": 0.5303,
"step": 262300
},
{
"epoch": 84.8093083387201,
"grad_norm": 2.2519476413726807,
"learning_rate": 0.001,
"loss": 0.5204,
"step": 262400
},
{
"epoch": 84.84162895927602,
"grad_norm": 1.4845337867736816,
"learning_rate": 0.001,
"loss": 0.5277,
"step": 262500
},
{
"epoch": 84.87394957983193,
"grad_norm": 1.4768366813659668,
"learning_rate": 0.001,
"loss": 0.5232,
"step": 262600
},
{
"epoch": 84.90627020038785,
"grad_norm": 1.7839581966400146,
"learning_rate": 0.001,
"loss": 0.5319,
"step": 262700
},
{
"epoch": 84.93859082094376,
"grad_norm": 1.6787962913513184,
"learning_rate": 0.001,
"loss": 0.5212,
"step": 262800
},
{
"epoch": 84.97091144149968,
"grad_norm": 1.7544941902160645,
"learning_rate": 0.001,
"loss": 0.5338,
"step": 262900
},
{
"epoch": 85.00323206205559,
"grad_norm": 1.494774580001831,
"learning_rate": 0.001,
"loss": 0.545,
"step": 263000
},
{
"epoch": 85.03555268261151,
"grad_norm": 1.3960219621658325,
"learning_rate": 0.001,
"loss": 0.4712,
"step": 263100
},
{
"epoch": 85.06787330316742,
"grad_norm": 1.553849220275879,
"learning_rate": 0.001,
"loss": 0.4741,
"step": 263200
},
{
"epoch": 85.10019392372334,
"grad_norm": 1.8072491884231567,
"learning_rate": 0.001,
"loss": 0.4699,
"step": 263300
},
{
"epoch": 85.13251454427925,
"grad_norm": 1.5213582515716553,
"learning_rate": 0.001,
"loss": 0.4671,
"step": 263400
},
{
"epoch": 85.16483516483517,
"grad_norm": 1.9114036560058594,
"learning_rate": 0.001,
"loss": 0.4783,
"step": 263500
},
{
"epoch": 85.19715578539108,
"grad_norm": 1.4559093713760376,
"learning_rate": 0.001,
"loss": 0.4804,
"step": 263600
},
{
"epoch": 85.229476405947,
"grad_norm": 1.2537195682525635,
"learning_rate": 0.001,
"loss": 0.4838,
"step": 263700
},
{
"epoch": 85.2617970265029,
"grad_norm": 1.655987024307251,
"learning_rate": 0.001,
"loss": 0.5079,
"step": 263800
},
{
"epoch": 85.29411764705883,
"grad_norm": 1.7664257287979126,
"learning_rate": 0.001,
"loss": 0.4813,
"step": 263900
},
{
"epoch": 85.32643826761473,
"grad_norm": 1.795916199684143,
"learning_rate": 0.001,
"loss": 0.4974,
"step": 264000
},
{
"epoch": 85.35875888817066,
"grad_norm": 1.5820717811584473,
"learning_rate": 0.001,
"loss": 0.5014,
"step": 264100
},
{
"epoch": 85.39107950872656,
"grad_norm": 1.4357985258102417,
"learning_rate": 0.001,
"loss": 0.496,
"step": 264200
},
{
"epoch": 85.42340012928248,
"grad_norm": 1.449780821800232,
"learning_rate": 0.001,
"loss": 0.4962,
"step": 264300
},
{
"epoch": 85.45572074983839,
"grad_norm": 1.4229458570480347,
"learning_rate": 0.001,
"loss": 0.5044,
"step": 264400
},
{
"epoch": 85.48804137039431,
"grad_norm": 1.443252682685852,
"learning_rate": 0.001,
"loss": 0.4841,
"step": 264500
},
{
"epoch": 85.52036199095022,
"grad_norm": 1.1594971418380737,
"learning_rate": 0.001,
"loss": 0.5087,
"step": 264600
},
{
"epoch": 85.55268261150614,
"grad_norm": 1.7895407676696777,
"learning_rate": 0.001,
"loss": 0.5107,
"step": 264700
},
{
"epoch": 85.58500323206205,
"grad_norm": 1.5300350189208984,
"learning_rate": 0.001,
"loss": 0.5063,
"step": 264800
},
{
"epoch": 85.61732385261797,
"grad_norm": 1.501360297203064,
"learning_rate": 0.001,
"loss": 0.5023,
"step": 264900
},
{
"epoch": 85.64964447317388,
"grad_norm": 1.5320264101028442,
"learning_rate": 0.001,
"loss": 0.4965,
"step": 265000
},
{
"epoch": 85.6819650937298,
"grad_norm": 1.4581434726715088,
"learning_rate": 0.001,
"loss": 0.5048,
"step": 265100
},
{
"epoch": 85.71428571428571,
"grad_norm": 1.3853094577789307,
"learning_rate": 0.001,
"loss": 0.4939,
"step": 265200
},
{
"epoch": 85.74660633484163,
"grad_norm": 1.4571151733398438,
"learning_rate": 0.001,
"loss": 0.5211,
"step": 265300
},
{
"epoch": 85.77892695539754,
"grad_norm": 1.6544904708862305,
"learning_rate": 0.001,
"loss": 0.51,
"step": 265400
},
{
"epoch": 85.81124757595346,
"grad_norm": 1.4322007894515991,
"learning_rate": 0.001,
"loss": 0.521,
"step": 265500
},
{
"epoch": 85.84356819650937,
"grad_norm": 1.7621874809265137,
"learning_rate": 0.001,
"loss": 0.5152,
"step": 265600
},
{
"epoch": 85.87588881706529,
"grad_norm": 1.492745041847229,
"learning_rate": 0.001,
"loss": 0.5191,
"step": 265700
},
{
"epoch": 85.9082094376212,
"grad_norm": 1.6130776405334473,
"learning_rate": 0.001,
"loss": 0.5192,
"step": 265800
},
{
"epoch": 85.94053005817712,
"grad_norm": 1.2372713088989258,
"learning_rate": 0.001,
"loss": 0.524,
"step": 265900
},
{
"epoch": 85.97285067873302,
"grad_norm": 1.493804693222046,
"learning_rate": 0.001,
"loss": 0.5237,
"step": 266000
},
{
"epoch": 86.00517129928895,
"grad_norm": 1.6226726770401,
"learning_rate": 0.001,
"loss": 0.5214,
"step": 266100
},
{
"epoch": 86.03749191984487,
"grad_norm": 1.511785864830017,
"learning_rate": 0.001,
"loss": 0.4658,
"step": 266200
},
{
"epoch": 86.06981254040078,
"grad_norm": 1.5849074125289917,
"learning_rate": 0.001,
"loss": 0.4684,
"step": 266300
},
{
"epoch": 86.1021331609567,
"grad_norm": 1.510827898979187,
"learning_rate": 0.001,
"loss": 0.4636,
"step": 266400
},
{
"epoch": 86.1344537815126,
"grad_norm": 1.6693753004074097,
"learning_rate": 0.001,
"loss": 0.4764,
"step": 266500
},
{
"epoch": 86.16677440206853,
"grad_norm": 1.2760614156723022,
"learning_rate": 0.001,
"loss": 0.4754,
"step": 266600
},
{
"epoch": 86.19909502262443,
"grad_norm": 1.7338852882385254,
"learning_rate": 0.001,
"loss": 0.4715,
"step": 266700
},
{
"epoch": 86.23141564318036,
"grad_norm": 1.388390302658081,
"learning_rate": 0.001,
"loss": 0.4788,
"step": 266800
},
{
"epoch": 86.26373626373626,
"grad_norm": 1.734383225440979,
"learning_rate": 0.001,
"loss": 0.4847,
"step": 266900
},
{
"epoch": 86.29605688429218,
"grad_norm": 1.4146814346313477,
"learning_rate": 0.001,
"loss": 0.4865,
"step": 267000
},
{
"epoch": 86.32837750484809,
"grad_norm": 1.3522993326187134,
"learning_rate": 0.001,
"loss": 0.4895,
"step": 267100
},
{
"epoch": 86.36069812540401,
"grad_norm": 1.469128131866455,
"learning_rate": 0.001,
"loss": 0.4917,
"step": 267200
},
{
"epoch": 86.39301874595992,
"grad_norm": 1.8461087942123413,
"learning_rate": 0.001,
"loss": 0.4969,
"step": 267300
},
{
"epoch": 86.42533936651584,
"grad_norm": 1.894525170326233,
"learning_rate": 0.001,
"loss": 0.4877,
"step": 267400
},
{
"epoch": 86.45765998707175,
"grad_norm": 2.118194341659546,
"learning_rate": 0.001,
"loss": 0.48,
"step": 267500
},
{
"epoch": 86.48998060762767,
"grad_norm": 1.5822690725326538,
"learning_rate": 0.001,
"loss": 0.4902,
"step": 267600
},
{
"epoch": 86.52230122818358,
"grad_norm": 1.3895262479782104,
"learning_rate": 0.001,
"loss": 0.4972,
"step": 267700
},
{
"epoch": 86.5546218487395,
"grad_norm": 1.4661526679992676,
"learning_rate": 0.001,
"loss": 0.4966,
"step": 267800
},
{
"epoch": 86.58694246929541,
"grad_norm": 1.634231448173523,
"learning_rate": 0.001,
"loss": 0.5081,
"step": 267900
},
{
"epoch": 86.61926308985133,
"grad_norm": 1.520247459411621,
"learning_rate": 0.001,
"loss": 0.4971,
"step": 268000
},
{
"epoch": 86.65158371040724,
"grad_norm": 1.878395438194275,
"learning_rate": 0.001,
"loss": 0.5079,
"step": 268100
},
{
"epoch": 86.68390433096316,
"grad_norm": 1.7085822820663452,
"learning_rate": 0.001,
"loss": 0.5052,
"step": 268200
},
{
"epoch": 86.71622495151907,
"grad_norm": 1.3339471817016602,
"learning_rate": 0.001,
"loss": 0.4948,
"step": 268300
},
{
"epoch": 86.74854557207499,
"grad_norm": 1.9712716341018677,
"learning_rate": 0.001,
"loss": 0.5098,
"step": 268400
},
{
"epoch": 86.7808661926309,
"grad_norm": 2.210573434829712,
"learning_rate": 0.001,
"loss": 0.5105,
"step": 268500
},
{
"epoch": 86.81318681318682,
"grad_norm": 1.4131146669387817,
"learning_rate": 0.001,
"loss": 0.5096,
"step": 268600
},
{
"epoch": 86.84550743374272,
"grad_norm": 1.391532063484192,
"learning_rate": 0.001,
"loss": 0.5151,
"step": 268700
},
{
"epoch": 86.87782805429865,
"grad_norm": 1.9204423427581787,
"learning_rate": 0.001,
"loss": 0.5061,
"step": 268800
},
{
"epoch": 86.91014867485455,
"grad_norm": 2.0518362522125244,
"learning_rate": 0.001,
"loss": 0.5058,
"step": 268900
},
{
"epoch": 86.94246929541048,
"grad_norm": 1.5138390064239502,
"learning_rate": 0.001,
"loss": 0.5072,
"step": 269000
},
{
"epoch": 86.97478991596638,
"grad_norm": 1.3717081546783447,
"learning_rate": 0.001,
"loss": 0.5203,
"step": 269100
},
{
"epoch": 87.0071105365223,
"grad_norm": 1.4673140048980713,
"learning_rate": 0.001,
"loss": 0.5119,
"step": 269200
},
{
"epoch": 87.03943115707821,
"grad_norm": 1.371171474456787,
"learning_rate": 0.001,
"loss": 0.4482,
"step": 269300
},
{
"epoch": 87.07175177763413,
"grad_norm": 1.397260308265686,
"learning_rate": 0.001,
"loss": 0.4578,
"step": 269400
},
{
"epoch": 87.10407239819004,
"grad_norm": 1.6206611394882202,
"learning_rate": 0.001,
"loss": 0.4609,
"step": 269500
},
{
"epoch": 87.13639301874596,
"grad_norm": 1.3245476484298706,
"learning_rate": 0.001,
"loss": 0.4701,
"step": 269600
},
{
"epoch": 87.16871363930187,
"grad_norm": 1.6914180517196655,
"learning_rate": 0.001,
"loss": 0.4584,
"step": 269700
},
{
"epoch": 87.20103425985779,
"grad_norm": 2.0249228477478027,
"learning_rate": 0.001,
"loss": 0.4669,
"step": 269800
},
{
"epoch": 87.2333548804137,
"grad_norm": 1.3308566808700562,
"learning_rate": 0.001,
"loss": 0.4724,
"step": 269900
},
{
"epoch": 87.26567550096962,
"grad_norm": 1.2931392192840576,
"learning_rate": 0.001,
"loss": 0.4759,
"step": 270000
},
{
"epoch": 87.29799612152553,
"grad_norm": 1.290625810623169,
"learning_rate": 0.001,
"loss": 0.4803,
"step": 270100
},
{
"epoch": 87.33031674208145,
"grad_norm": 1.3367546796798706,
"learning_rate": 0.001,
"loss": 0.4847,
"step": 270200
},
{
"epoch": 87.36263736263736,
"grad_norm": 1.2759326696395874,
"learning_rate": 0.001,
"loss": 0.4872,
"step": 270300
},
{
"epoch": 87.39495798319328,
"grad_norm": 1.3992459774017334,
"learning_rate": 0.001,
"loss": 0.4876,
"step": 270400
},
{
"epoch": 87.42727860374919,
"grad_norm": 1.4173572063446045,
"learning_rate": 0.001,
"loss": 0.4712,
"step": 270500
},
{
"epoch": 87.45959922430511,
"grad_norm": 1.8669795989990234,
"learning_rate": 0.001,
"loss": 0.4922,
"step": 270600
},
{
"epoch": 87.49191984486102,
"grad_norm": 1.6839419603347778,
"learning_rate": 0.001,
"loss": 0.495,
"step": 270700
},
{
"epoch": 87.52424046541694,
"grad_norm": 1.2317043542861938,
"learning_rate": 0.001,
"loss": 0.4771,
"step": 270800
},
{
"epoch": 87.55656108597285,
"grad_norm": 1.3039339780807495,
"learning_rate": 0.001,
"loss": 0.4837,
"step": 270900
},
{
"epoch": 87.58888170652877,
"grad_norm": 1.291479229927063,
"learning_rate": 0.001,
"loss": 0.4902,
"step": 271000
},
{
"epoch": 87.62120232708467,
"grad_norm": 1.3181146383285522,
"learning_rate": 0.001,
"loss": 0.4975,
"step": 271100
},
{
"epoch": 87.6535229476406,
"grad_norm": 1.7635709047317505,
"learning_rate": 0.001,
"loss": 0.4954,
"step": 271200
},
{
"epoch": 87.6858435681965,
"grad_norm": 1.4242125749588013,
"learning_rate": 0.001,
"loss": 0.509,
"step": 271300
},
{
"epoch": 87.71816418875243,
"grad_norm": 1.4490323066711426,
"learning_rate": 0.001,
"loss": 0.5068,
"step": 271400
},
{
"epoch": 87.75048480930833,
"grad_norm": 1.5760105848312378,
"learning_rate": 0.001,
"loss": 0.4997,
"step": 271500
},
{
"epoch": 87.78280542986425,
"grad_norm": 1.3230277299880981,
"learning_rate": 0.001,
"loss": 0.5041,
"step": 271600
},
{
"epoch": 87.81512605042016,
"grad_norm": 1.6378422975540161,
"learning_rate": 0.001,
"loss": 0.5138,
"step": 271700
},
{
"epoch": 87.84744667097608,
"grad_norm": 1.4305038452148438,
"learning_rate": 0.001,
"loss": 0.5073,
"step": 271800
},
{
"epoch": 87.87976729153199,
"grad_norm": 1.6552515029907227,
"learning_rate": 0.001,
"loss": 0.4978,
"step": 271900
},
{
"epoch": 87.91208791208791,
"grad_norm": 1.555861473083496,
"learning_rate": 0.001,
"loss": 0.511,
"step": 272000
},
{
"epoch": 87.94440853264382,
"grad_norm": 1.9281134605407715,
"learning_rate": 0.001,
"loss": 0.5197,
"step": 272100
},
{
"epoch": 87.97672915319974,
"grad_norm": 2.146212100982666,
"learning_rate": 0.001,
"loss": 0.5133,
"step": 272200
},
{
"epoch": 88.00904977375566,
"grad_norm": 1.4571073055267334,
"learning_rate": 0.001,
"loss": 0.4881,
"step": 272300
},
{
"epoch": 88.04137039431157,
"grad_norm": 1.42752206325531,
"learning_rate": 0.001,
"loss": 0.4532,
"step": 272400
},
{
"epoch": 88.07369101486749,
"grad_norm": 1.7515978813171387,
"learning_rate": 0.001,
"loss": 0.4482,
"step": 272500
},
{
"epoch": 88.1060116354234,
"grad_norm": 1.3939858675003052,
"learning_rate": 0.001,
"loss": 0.4564,
"step": 272600
},
{
"epoch": 88.13833225597932,
"grad_norm": 1.2333893775939941,
"learning_rate": 0.001,
"loss": 0.4716,
"step": 272700
},
{
"epoch": 88.17065287653523,
"grad_norm": 1.5755928754806519,
"learning_rate": 0.001,
"loss": 0.4726,
"step": 272800
},
{
"epoch": 88.20297349709115,
"grad_norm": 2.824388265609741,
"learning_rate": 0.001,
"loss": 0.4669,
"step": 272900
},
{
"epoch": 88.23529411764706,
"grad_norm": 1.1696257591247559,
"learning_rate": 0.001,
"loss": 0.4707,
"step": 273000
},
{
"epoch": 88.26761473820298,
"grad_norm": 1.5280286073684692,
"learning_rate": 0.001,
"loss": 0.4658,
"step": 273100
},
{
"epoch": 88.29993535875889,
"grad_norm": 1.493591070175171,
"learning_rate": 0.001,
"loss": 0.4746,
"step": 273200
},
{
"epoch": 88.33225597931481,
"grad_norm": 1.619083046913147,
"learning_rate": 0.001,
"loss": 0.4755,
"step": 273300
},
{
"epoch": 88.36457659987072,
"grad_norm": 3.416677236557007,
"learning_rate": 0.001,
"loss": 0.4689,
"step": 273400
},
{
"epoch": 88.39689722042664,
"grad_norm": 1.3980712890625,
"learning_rate": 0.001,
"loss": 0.4713,
"step": 273500
},
{
"epoch": 88.42921784098255,
"grad_norm": 1.4884861707687378,
"learning_rate": 0.001,
"loss": 0.4793,
"step": 273600
},
{
"epoch": 88.46153846153847,
"grad_norm": 1.4330198764801025,
"learning_rate": 0.001,
"loss": 0.4845,
"step": 273700
},
{
"epoch": 88.49385908209437,
"grad_norm": 1.7243083715438843,
"learning_rate": 0.001,
"loss": 0.4794,
"step": 273800
},
{
"epoch": 88.5261797026503,
"grad_norm": 1.2517062425613403,
"learning_rate": 0.001,
"loss": 0.4865,
"step": 273900
},
{
"epoch": 88.5585003232062,
"grad_norm": 1.5816173553466797,
"learning_rate": 0.001,
"loss": 0.4873,
"step": 274000
},
{
"epoch": 88.59082094376213,
"grad_norm": 1.3292566537857056,
"learning_rate": 0.001,
"loss": 0.4888,
"step": 274100
},
{
"epoch": 88.62314156431803,
"grad_norm": 1.2126435041427612,
"learning_rate": 0.001,
"loss": 0.4922,
"step": 274200
},
{
"epoch": 88.65546218487395,
"grad_norm": 1.7256187200546265,
"learning_rate": 0.001,
"loss": 0.4931,
"step": 274300
},
{
"epoch": 88.68778280542986,
"grad_norm": 1.8610934019088745,
"learning_rate": 0.001,
"loss": 0.5008,
"step": 274400
},
{
"epoch": 88.72010342598578,
"grad_norm": 1.6567224264144897,
"learning_rate": 0.001,
"loss": 0.4895,
"step": 274500
},
{
"epoch": 88.75242404654169,
"grad_norm": 1.3405829668045044,
"learning_rate": 0.001,
"loss": 0.5039,
"step": 274600
},
{
"epoch": 88.78474466709761,
"grad_norm": 1.4917628765106201,
"learning_rate": 0.001,
"loss": 0.4968,
"step": 274700
},
{
"epoch": 88.81706528765352,
"grad_norm": 1.517630696296692,
"learning_rate": 0.001,
"loss": 0.4977,
"step": 274800
},
{
"epoch": 88.84938590820944,
"grad_norm": 1.293230652809143,
"learning_rate": 0.001,
"loss": 0.4992,
"step": 274900
},
{
"epoch": 88.88170652876535,
"grad_norm": 1.6211521625518799,
"learning_rate": 0.001,
"loss": 0.5038,
"step": 275000
},
{
"epoch": 88.91402714932127,
"grad_norm": 1.3522111177444458,
"learning_rate": 0.001,
"loss": 0.5073,
"step": 275100
},
{
"epoch": 88.94634776987718,
"grad_norm": 1.6885173320770264,
"learning_rate": 0.001,
"loss": 0.5128,
"step": 275200
},
{
"epoch": 88.9786683904331,
"grad_norm": 1.1733167171478271,
"learning_rate": 0.001,
"loss": 0.5152,
"step": 275300
},
{
"epoch": 89.01098901098901,
"grad_norm": 1.6227352619171143,
"learning_rate": 0.001,
"loss": 0.4826,
"step": 275400
},
{
"epoch": 89.04330963154493,
"grad_norm": 1.5030772686004639,
"learning_rate": 0.001,
"loss": 0.4584,
"step": 275500
},
{
"epoch": 89.07563025210084,
"grad_norm": 1.3889706134796143,
"learning_rate": 0.001,
"loss": 0.4535,
"step": 275600
},
{
"epoch": 89.10795087265676,
"grad_norm": 2.0631189346313477,
"learning_rate": 0.001,
"loss": 0.4577,
"step": 275700
},
{
"epoch": 89.14027149321267,
"grad_norm": 1.3928669691085815,
"learning_rate": 0.001,
"loss": 0.4504,
"step": 275800
},
{
"epoch": 89.17259211376859,
"grad_norm": 1.517021656036377,
"learning_rate": 0.001,
"loss": 0.4644,
"step": 275900
},
{
"epoch": 89.2049127343245,
"grad_norm": 1.4841409921646118,
"learning_rate": 0.001,
"loss": 0.4523,
"step": 276000
},
{
"epoch": 89.23723335488042,
"grad_norm": 1.5518596172332764,
"learning_rate": 0.001,
"loss": 0.4628,
"step": 276100
},
{
"epoch": 89.26955397543632,
"grad_norm": 1.3948451280593872,
"learning_rate": 0.001,
"loss": 0.4553,
"step": 276200
},
{
"epoch": 89.30187459599225,
"grad_norm": 1.4886418581008911,
"learning_rate": 0.001,
"loss": 0.4834,
"step": 276300
},
{
"epoch": 89.33419521654815,
"grad_norm": 1.31479012966156,
"learning_rate": 0.001,
"loss": 0.4627,
"step": 276400
},
{
"epoch": 89.36651583710407,
"grad_norm": 1.0634099245071411,
"learning_rate": 0.001,
"loss": 0.4594,
"step": 276500
},
{
"epoch": 89.39883645765998,
"grad_norm": 1.2459696531295776,
"learning_rate": 0.001,
"loss": 0.479,
"step": 276600
},
{
"epoch": 89.4311570782159,
"grad_norm": 1.0318691730499268,
"learning_rate": 0.001,
"loss": 0.4676,
"step": 276700
},
{
"epoch": 89.46347769877181,
"grad_norm": 1.7067400217056274,
"learning_rate": 0.001,
"loss": 0.4723,
"step": 276800
},
{
"epoch": 89.49579831932773,
"grad_norm": 1.4222896099090576,
"learning_rate": 0.001,
"loss": 0.4702,
"step": 276900
},
{
"epoch": 89.52811893988364,
"grad_norm": 1.3543707132339478,
"learning_rate": 0.001,
"loss": 0.4773,
"step": 277000
},
{
"epoch": 89.56043956043956,
"grad_norm": 1.1513952016830444,
"learning_rate": 0.001,
"loss": 0.4819,
"step": 277100
},
{
"epoch": 89.59276018099547,
"grad_norm": 1.3153300285339355,
"learning_rate": 0.001,
"loss": 0.4861,
"step": 277200
},
{
"epoch": 89.62508080155139,
"grad_norm": 1.3194316625595093,
"learning_rate": 0.001,
"loss": 0.4933,
"step": 277300
},
{
"epoch": 89.6574014221073,
"grad_norm": 1.3721164464950562,
"learning_rate": 0.001,
"loss": 0.49,
"step": 277400
},
{
"epoch": 89.68972204266322,
"grad_norm": 1.4286446571350098,
"learning_rate": 0.001,
"loss": 0.4892,
"step": 277500
},
{
"epoch": 89.72204266321913,
"grad_norm": 1.2708386182785034,
"learning_rate": 0.001,
"loss": 0.4953,
"step": 277600
},
{
"epoch": 89.75436328377505,
"grad_norm": 1.0783087015151978,
"learning_rate": 0.001,
"loss": 0.4883,
"step": 277700
},
{
"epoch": 89.78668390433096,
"grad_norm": 1.4168130159378052,
"learning_rate": 0.001,
"loss": 0.4907,
"step": 277800
},
{
"epoch": 89.81900452488688,
"grad_norm": 1.315295934677124,
"learning_rate": 0.001,
"loss": 0.498,
"step": 277900
},
{
"epoch": 89.85132514544279,
"grad_norm": 1.4645394086837769,
"learning_rate": 0.001,
"loss": 0.5051,
"step": 278000
},
{
"epoch": 89.88364576599871,
"grad_norm": 1.6038243770599365,
"learning_rate": 0.001,
"loss": 0.5035,
"step": 278100
},
{
"epoch": 89.91596638655462,
"grad_norm": 1.3226242065429688,
"learning_rate": 0.001,
"loss": 0.5009,
"step": 278200
},
{
"epoch": 89.94828700711054,
"grad_norm": 1.4041608572006226,
"learning_rate": 0.001,
"loss": 0.5034,
"step": 278300
},
{
"epoch": 89.98060762766644,
"grad_norm": 1.4587429761886597,
"learning_rate": 0.001,
"loss": 0.5081,
"step": 278400
},
{
"epoch": 90.01292824822237,
"grad_norm": 1.2513530254364014,
"learning_rate": 0.001,
"loss": 0.4572,
"step": 278500
},
{
"epoch": 90.04524886877829,
"grad_norm": 1.0929393768310547,
"learning_rate": 0.001,
"loss": 0.4435,
"step": 278600
},
{
"epoch": 90.0775694893342,
"grad_norm": 1.1157305240631104,
"learning_rate": 0.001,
"loss": 0.4525,
"step": 278700
},
{
"epoch": 90.10989010989012,
"grad_norm": 1.1245415210723877,
"learning_rate": 0.001,
"loss": 0.452,
"step": 278800
},
{
"epoch": 90.14221073044602,
"grad_norm": 1.493154764175415,
"learning_rate": 0.001,
"loss": 0.4605,
"step": 278900
},
{
"epoch": 90.17453135100195,
"grad_norm": 0.9956198334693909,
"learning_rate": 0.001,
"loss": 0.4653,
"step": 279000
},
{
"epoch": 90.20685197155785,
"grad_norm": 1.3177974224090576,
"learning_rate": 0.001,
"loss": 0.4651,
"step": 279100
},
{
"epoch": 90.23917259211377,
"grad_norm": 1.2235926389694214,
"learning_rate": 0.001,
"loss": 0.46,
"step": 279200
},
{
"epoch": 90.27149321266968,
"grad_norm": 1.260908603668213,
"learning_rate": 0.001,
"loss": 0.4564,
"step": 279300
},
{
"epoch": 90.3038138332256,
"grad_norm": 1.13411545753479,
"learning_rate": 0.001,
"loss": 0.4617,
"step": 279400
},
{
"epoch": 90.33613445378151,
"grad_norm": 1.1787828207015991,
"learning_rate": 0.001,
"loss": 0.4699,
"step": 279500
},
{
"epoch": 90.36845507433743,
"grad_norm": 1.1601965427398682,
"learning_rate": 0.001,
"loss": 0.4677,
"step": 279600
},
{
"epoch": 90.40077569489334,
"grad_norm": 1.2311824560165405,
"learning_rate": 0.001,
"loss": 0.4751,
"step": 279700
},
{
"epoch": 90.43309631544926,
"grad_norm": 1.132232427597046,
"learning_rate": 0.001,
"loss": 0.472,
"step": 279800
},
{
"epoch": 90.46541693600517,
"grad_norm": 0.876586377620697,
"learning_rate": 0.001,
"loss": 0.4705,
"step": 279900
},
{
"epoch": 90.49773755656109,
"grad_norm": 1.2030634880065918,
"learning_rate": 0.001,
"loss": 0.477,
"step": 280000
},
{
"epoch": 90.530058177117,
"grad_norm": 0.9745743274688721,
"learning_rate": 0.001,
"loss": 0.4403,
"step": 280100
},
{
"epoch": 90.56237879767292,
"grad_norm": 1.3243677616119385,
"learning_rate": 0.001,
"loss": 0.4505,
"step": 280200
},
{
"epoch": 90.59469941822883,
"grad_norm": 1.1608860492706299,
"learning_rate": 0.001,
"loss": 0.4497,
"step": 280300
},
{
"epoch": 90.62702003878475,
"grad_norm": 1.1231688261032104,
"learning_rate": 0.001,
"loss": 0.4549,
"step": 280400
},
{
"epoch": 90.65934065934066,
"grad_norm": 1.5799304246902466,
"learning_rate": 0.001,
"loss": 0.4487,
"step": 280500
},
{
"epoch": 90.69166127989658,
"grad_norm": 1.247395396232605,
"learning_rate": 0.001,
"loss": 0.4572,
"step": 280600
},
{
"epoch": 90.72398190045249,
"grad_norm": 1.3597042560577393,
"learning_rate": 0.001,
"loss": 0.4627,
"step": 280700
},
{
"epoch": 90.75630252100841,
"grad_norm": 1.445366621017456,
"learning_rate": 0.001,
"loss": 0.4587,
"step": 280800
},
{
"epoch": 90.78862314156432,
"grad_norm": 1.2129086256027222,
"learning_rate": 0.001,
"loss": 0.4598,
"step": 280900
},
{
"epoch": 90.82094376212024,
"grad_norm": 1.0306206941604614,
"learning_rate": 0.001,
"loss": 0.4561,
"step": 281000
},
{
"epoch": 90.85326438267614,
"grad_norm": 1.1839573383331299,
"learning_rate": 0.001,
"loss": 0.4791,
"step": 281100
},
{
"epoch": 90.88558500323207,
"grad_norm": 0.9946609735488892,
"learning_rate": 0.001,
"loss": 0.4646,
"step": 281200
},
{
"epoch": 90.91790562378797,
"grad_norm": 1.1721656322479248,
"learning_rate": 0.001,
"loss": 0.4819,
"step": 281300
},
{
"epoch": 90.9502262443439,
"grad_norm": 1.1597821712493896,
"learning_rate": 0.001,
"loss": 0.4703,
"step": 281400
},
{
"epoch": 90.9825468648998,
"grad_norm": 1.4225759506225586,
"learning_rate": 0.001,
"loss": 0.4751,
"step": 281500
},
{
"epoch": 91.01486748545572,
"grad_norm": 9.793893814086914,
"learning_rate": 0.001,
"loss": 0.434,
"step": 281600
},
{
"epoch": 91.04718810601163,
"grad_norm": 1.1725051403045654,
"learning_rate": 0.001,
"loss": 0.4464,
"step": 281700
},
{
"epoch": 91.07950872656755,
"grad_norm": 0.8536701202392578,
"learning_rate": 0.001,
"loss": 0.439,
"step": 281800
},
{
"epoch": 91.11182934712346,
"grad_norm": 0.939294695854187,
"learning_rate": 0.001,
"loss": 0.4568,
"step": 281900
},
{
"epoch": 91.14414996767938,
"grad_norm": 1.0028142929077148,
"learning_rate": 0.001,
"loss": 0.452,
"step": 282000
},
{
"epoch": 91.17647058823529,
"grad_norm": 0.818120539188385,
"learning_rate": 0.001,
"loss": 0.4583,
"step": 282100
},
{
"epoch": 91.20879120879121,
"grad_norm": 0.8494078516960144,
"learning_rate": 0.001,
"loss": 0.4485,
"step": 282200
},
{
"epoch": 91.24111182934712,
"grad_norm": 0.7314313054084778,
"learning_rate": 0.001,
"loss": 0.4577,
"step": 282300
},
{
"epoch": 91.27343244990304,
"grad_norm": 0.6735559105873108,
"learning_rate": 0.001,
"loss": 0.465,
"step": 282400
},
{
"epoch": 91.30575307045895,
"grad_norm": 0.8712020516395569,
"learning_rate": 0.001,
"loss": 0.4652,
"step": 282500
},
{
"epoch": 91.33807369101487,
"grad_norm": 1.028043270111084,
"learning_rate": 0.001,
"loss": 0.4593,
"step": 282600
},
{
"epoch": 91.37039431157078,
"grad_norm": 0.8021206855773926,
"learning_rate": 0.001,
"loss": 0.4723,
"step": 282700
},
{
"epoch": 91.4027149321267,
"grad_norm": 0.8332772850990295,
"learning_rate": 0.001,
"loss": 0.47,
"step": 282800
},
{
"epoch": 91.4350355526826,
"grad_norm": 0.8631690740585327,
"learning_rate": 0.001,
"loss": 0.4683,
"step": 282900
},
{
"epoch": 91.46735617323853,
"grad_norm": 0.806066632270813,
"learning_rate": 0.001,
"loss": 0.47,
"step": 283000
},
{
"epoch": 91.49967679379444,
"grad_norm": 0.9106870889663696,
"learning_rate": 0.001,
"loss": 0.4687,
"step": 283100
},
{
"epoch": 91.53199741435036,
"grad_norm": 0.8315029740333557,
"learning_rate": 0.001,
"loss": 0.4618,
"step": 283200
},
{
"epoch": 91.56431803490626,
"grad_norm": 0.8321149945259094,
"learning_rate": 0.001,
"loss": 0.4793,
"step": 283300
},
{
"epoch": 91.59663865546219,
"grad_norm": 0.6962825059890747,
"learning_rate": 0.001,
"loss": 0.4825,
"step": 283400
},
{
"epoch": 91.6289592760181,
"grad_norm": 1.0555658340454102,
"learning_rate": 0.001,
"loss": 0.4724,
"step": 283500
},
{
"epoch": 91.66127989657402,
"grad_norm": 0.9735150337219238,
"learning_rate": 0.001,
"loss": 0.51,
"step": 283600
},
{
"epoch": 91.69360051712992,
"grad_norm": 1.2564635276794434,
"learning_rate": 0.001,
"loss": 0.4918,
"step": 283700
},
{
"epoch": 91.72592113768584,
"grad_norm": 0.7897902131080627,
"learning_rate": 0.001,
"loss": 0.486,
"step": 283800
},
{
"epoch": 91.75824175824175,
"grad_norm": 1.0546153783798218,
"learning_rate": 0.001,
"loss": 0.4935,
"step": 283900
},
{
"epoch": 91.79056237879767,
"grad_norm": 0.8487304449081421,
"learning_rate": 0.001,
"loss": 0.4855,
"step": 284000
},
{
"epoch": 91.82288299935358,
"grad_norm": 0.8980190753936768,
"learning_rate": 0.001,
"loss": 0.4905,
"step": 284100
},
{
"epoch": 91.8552036199095,
"grad_norm": 1.1748449802398682,
"learning_rate": 0.001,
"loss": 0.4818,
"step": 284200
},
{
"epoch": 91.88752424046541,
"grad_norm": 0.9486263394355774,
"learning_rate": 0.001,
"loss": 0.4965,
"step": 284300
},
{
"epoch": 91.91984486102133,
"grad_norm": 1.0960006713867188,
"learning_rate": 0.001,
"loss": 0.502,
"step": 284400
},
{
"epoch": 91.95216548157724,
"grad_norm": 1.006560206413269,
"learning_rate": 0.001,
"loss": 0.4868,
"step": 284500
},
{
"epoch": 91.98448610213316,
"grad_norm": 1.080833911895752,
"learning_rate": 0.001,
"loss": 0.502,
"step": 284600
},
{
"epoch": 92.01680672268908,
"grad_norm": 2.0345189571380615,
"learning_rate": 0.001,
"loss": 0.4498,
"step": 284700
},
{
"epoch": 92.04912734324499,
"grad_norm": 1.6295733451843262,
"learning_rate": 0.001,
"loss": 0.4319,
"step": 284800
},
{
"epoch": 92.08144796380091,
"grad_norm": 1.82840096950531,
"learning_rate": 0.001,
"loss": 0.4405,
"step": 284900
},
{
"epoch": 92.11376858435682,
"grad_norm": 3.0892772674560547,
"learning_rate": 0.001,
"loss": 0.4414,
"step": 285000
},
{
"epoch": 92.14608920491274,
"grad_norm": 2.1895627975463867,
"learning_rate": 0.001,
"loss": 0.4482,
"step": 285100
},
{
"epoch": 92.17840982546865,
"grad_norm": 1.6839579343795776,
"learning_rate": 0.001,
"loss": 0.4534,
"step": 285200
},
{
"epoch": 92.21073044602457,
"grad_norm": 2.260793924331665,
"learning_rate": 0.001,
"loss": 0.453,
"step": 285300
},
{
"epoch": 92.24305106658048,
"grad_norm": 42.66197967529297,
"learning_rate": 0.001,
"loss": 0.4458,
"step": 285400
},
{
"epoch": 92.2753716871364,
"grad_norm": 1.6944698095321655,
"learning_rate": 0.001,
"loss": 0.4503,
"step": 285500
},
{
"epoch": 92.3076923076923,
"grad_norm": 1.6756781339645386,
"learning_rate": 0.001,
"loss": 0.4563,
"step": 285600
},
{
"epoch": 92.34001292824823,
"grad_norm": 1.8374137878417969,
"learning_rate": 0.001,
"loss": 0.4468,
"step": 285700
},
{
"epoch": 92.37233354880414,
"grad_norm": 2.041393518447876,
"learning_rate": 0.001,
"loss": 0.4505,
"step": 285800
},
{
"epoch": 92.40465416936006,
"grad_norm": 1.8268532752990723,
"learning_rate": 0.001,
"loss": 0.4608,
"step": 285900
},
{
"epoch": 92.43697478991596,
"grad_norm": 1.7450902462005615,
"learning_rate": 0.001,
"loss": 0.4634,
"step": 286000
},
{
"epoch": 92.46929541047189,
"grad_norm": 1.7353445291519165,
"learning_rate": 0.001,
"loss": 0.4794,
"step": 286100
},
{
"epoch": 92.5016160310278,
"grad_norm": 2.340031385421753,
"learning_rate": 0.001,
"loss": 0.471,
"step": 286200
},
{
"epoch": 92.53393665158372,
"grad_norm": 1.8648983240127563,
"learning_rate": 0.001,
"loss": 0.4723,
"step": 286300
},
{
"epoch": 92.56625727213962,
"grad_norm": 2.6980106830596924,
"learning_rate": 0.001,
"loss": 0.4644,
"step": 286400
},
{
"epoch": 92.59857789269554,
"grad_norm": 1.8055412769317627,
"learning_rate": 0.001,
"loss": 0.468,
"step": 286500
},
{
"epoch": 92.63089851325145,
"grad_norm": 2.3196511268615723,
"learning_rate": 0.001,
"loss": 0.4766,
"step": 286600
},
{
"epoch": 92.66321913380737,
"grad_norm": 1.4777075052261353,
"learning_rate": 0.001,
"loss": 0.4736,
"step": 286700
},
{
"epoch": 92.69553975436328,
"grad_norm": 2.1524486541748047,
"learning_rate": 0.001,
"loss": 0.4834,
"step": 286800
},
{
"epoch": 92.7278603749192,
"grad_norm": 2.242614507675171,
"learning_rate": 0.001,
"loss": 0.4772,
"step": 286900
},
{
"epoch": 92.76018099547511,
"grad_norm": 1.7869904041290283,
"learning_rate": 0.001,
"loss": 0.4739,
"step": 287000
},
{
"epoch": 92.79250161603103,
"grad_norm": 2.129345417022705,
"learning_rate": 0.001,
"loss": 0.4887,
"step": 287100
},
{
"epoch": 92.82482223658694,
"grad_norm": 1.5137324333190918,
"learning_rate": 0.001,
"loss": 0.4807,
"step": 287200
},
{
"epoch": 92.85714285714286,
"grad_norm": 2.0685720443725586,
"learning_rate": 0.001,
"loss": 0.487,
"step": 287300
},
{
"epoch": 92.88946347769877,
"grad_norm": 1.8368549346923828,
"learning_rate": 0.001,
"loss": 0.5025,
"step": 287400
},
{
"epoch": 92.92178409825469,
"grad_norm": 1.7226216793060303,
"learning_rate": 0.001,
"loss": 0.4917,
"step": 287500
},
{
"epoch": 92.9541047188106,
"grad_norm": 2.0484440326690674,
"learning_rate": 0.001,
"loss": 0.4885,
"step": 287600
},
{
"epoch": 92.98642533936652,
"grad_norm": 1.8843029737472534,
"learning_rate": 0.001,
"loss": 0.4872,
"step": 287700
},
{
"epoch": 93.01874595992243,
"grad_norm": 2.280056953430176,
"learning_rate": 0.001,
"loss": 0.4684,
"step": 287800
},
{
"epoch": 93.05106658047835,
"grad_norm": 1.6946696043014526,
"learning_rate": 0.001,
"loss": 0.4274,
"step": 287900
},
{
"epoch": 93.08338720103426,
"grad_norm": 2.300701856613159,
"learning_rate": 0.001,
"loss": 0.4325,
"step": 288000
},
{
"epoch": 93.11570782159018,
"grad_norm": 1.729552984237671,
"learning_rate": 0.001,
"loss": 0.4413,
"step": 288100
},
{
"epoch": 93.14802844214609,
"grad_norm": 2.0106143951416016,
"learning_rate": 0.001,
"loss": 0.4507,
"step": 288200
},
{
"epoch": 93.180349062702,
"grad_norm": 1.5920679569244385,
"learning_rate": 0.001,
"loss": 0.4397,
"step": 288300
},
{
"epoch": 93.21266968325791,
"grad_norm": 1.4083905220031738,
"learning_rate": 0.001,
"loss": 0.4442,
"step": 288400
},
{
"epoch": 93.24499030381384,
"grad_norm": 1.5656646490097046,
"learning_rate": 0.001,
"loss": 0.444,
"step": 288500
},
{
"epoch": 93.27731092436974,
"grad_norm": 1.4155176877975464,
"learning_rate": 0.001,
"loss": 0.4594,
"step": 288600
},
{
"epoch": 93.30963154492567,
"grad_norm": 1.781264305114746,
"learning_rate": 0.001,
"loss": 0.4541,
"step": 288700
},
{
"epoch": 93.34195216548157,
"grad_norm": 1.7372766733169556,
"learning_rate": 0.001,
"loss": 0.4577,
"step": 288800
},
{
"epoch": 93.3742727860375,
"grad_norm": 1.7941893339157104,
"learning_rate": 0.001,
"loss": 0.4646,
"step": 288900
},
{
"epoch": 93.4065934065934,
"grad_norm": 1.5748519897460938,
"learning_rate": 0.001,
"loss": 0.4587,
"step": 289000
},
{
"epoch": 93.43891402714932,
"grad_norm": 2.3406195640563965,
"learning_rate": 0.001,
"loss": 0.4608,
"step": 289100
},
{
"epoch": 93.47123464770523,
"grad_norm": 1.817068099975586,
"learning_rate": 0.001,
"loss": 0.4598,
"step": 289200
},
{
"epoch": 93.50355526826115,
"grad_norm": 1.4823451042175293,
"learning_rate": 0.001,
"loss": 0.4561,
"step": 289300
},
{
"epoch": 93.53587588881706,
"grad_norm": 1.5961811542510986,
"learning_rate": 0.001,
"loss": 0.4662,
"step": 289400
},
{
"epoch": 93.56819650937298,
"grad_norm": 1.4638841152191162,
"learning_rate": 0.001,
"loss": 0.4713,
"step": 289500
},
{
"epoch": 93.60051712992889,
"grad_norm": 2.1063473224639893,
"learning_rate": 0.001,
"loss": 0.4644,
"step": 289600
},
{
"epoch": 93.63283775048481,
"grad_norm": 1.989016056060791,
"learning_rate": 0.001,
"loss": 0.4611,
"step": 289700
},
{
"epoch": 93.66515837104072,
"grad_norm": 1.9754928350448608,
"learning_rate": 0.001,
"loss": 0.4624,
"step": 289800
},
{
"epoch": 93.69747899159664,
"grad_norm": 1.4447102546691895,
"learning_rate": 0.001,
"loss": 0.4636,
"step": 289900
},
{
"epoch": 93.72979961215255,
"grad_norm": 1.5371551513671875,
"learning_rate": 0.001,
"loss": 0.4768,
"step": 290000
},
{
"epoch": 93.76212023270847,
"grad_norm": 1.5299981832504272,
"learning_rate": 0.001,
"loss": 0.4783,
"step": 290100
},
{
"epoch": 93.79444085326438,
"grad_norm": 1.286318063735962,
"learning_rate": 0.001,
"loss": 0.4764,
"step": 290200
},
{
"epoch": 93.8267614738203,
"grad_norm": 2.0626156330108643,
"learning_rate": 0.001,
"loss": 0.4776,
"step": 290300
},
{
"epoch": 93.8590820943762,
"grad_norm": 1.6997871398925781,
"learning_rate": 0.001,
"loss": 0.4695,
"step": 290400
},
{
"epoch": 93.89140271493213,
"grad_norm": 3.9364075660705566,
"learning_rate": 0.001,
"loss": 0.4801,
"step": 290500
},
{
"epoch": 93.92372333548803,
"grad_norm": 2.015624523162842,
"learning_rate": 0.001,
"loss": 0.4962,
"step": 290600
},
{
"epoch": 93.95604395604396,
"grad_norm": 344.4596862792969,
"learning_rate": 0.001,
"loss": 0.4812,
"step": 290700
},
{
"epoch": 93.98836457659988,
"grad_norm": 1.8616305589675903,
"learning_rate": 0.001,
"loss": 0.4814,
"step": 290800
},
{
"epoch": 94.02068519715579,
"grad_norm": 1.8940582275390625,
"learning_rate": 0.001,
"loss": 0.464,
"step": 290900
},
{
"epoch": 94.0530058177117,
"grad_norm": 1.938902735710144,
"learning_rate": 0.001,
"loss": 0.4213,
"step": 291000
},
{
"epoch": 94.08532643826761,
"grad_norm": 1.4678521156311035,
"learning_rate": 0.001,
"loss": 0.4337,
"step": 291100
},
{
"epoch": 94.11764705882354,
"grad_norm": 1.5415394306182861,
"learning_rate": 0.001,
"loss": 0.4445,
"step": 291200
},
{
"epoch": 94.14996767937944,
"grad_norm": 1.8836512565612793,
"learning_rate": 0.001,
"loss": 0.4274,
"step": 291300
},
{
"epoch": 94.18228829993537,
"grad_norm": 1.2818994522094727,
"learning_rate": 0.001,
"loss": 0.4448,
"step": 291400
},
{
"epoch": 94.21460892049127,
"grad_norm": 2.0979812145233154,
"learning_rate": 0.001,
"loss": 0.4396,
"step": 291500
},
{
"epoch": 94.2469295410472,
"grad_norm": 1.6995420455932617,
"learning_rate": 0.001,
"loss": 0.4488,
"step": 291600
},
{
"epoch": 94.2792501616031,
"grad_norm": 1.7920165061950684,
"learning_rate": 0.001,
"loss": 0.448,
"step": 291700
},
{
"epoch": 94.31157078215902,
"grad_norm": 1.6216713190078735,
"learning_rate": 0.001,
"loss": 0.452,
"step": 291800
},
{
"epoch": 94.34389140271493,
"grad_norm": 1.4908044338226318,
"learning_rate": 0.001,
"loss": 0.4468,
"step": 291900
},
{
"epoch": 94.37621202327085,
"grad_norm": 1.6962876319885254,
"learning_rate": 0.001,
"loss": 0.45,
"step": 292000
},
{
"epoch": 94.40853264382676,
"grad_norm": 1.460558533668518,
"learning_rate": 0.001,
"loss": 0.446,
"step": 292100
},
{
"epoch": 94.44085326438268,
"grad_norm": 1.4219996929168701,
"learning_rate": 0.001,
"loss": 0.4533,
"step": 292200
},
{
"epoch": 94.47317388493859,
"grad_norm": 1.758070468902588,
"learning_rate": 0.001,
"loss": 0.4511,
"step": 292300
},
{
"epoch": 94.50549450549451,
"grad_norm": 1.518767237663269,
"learning_rate": 0.001,
"loss": 0.4601,
"step": 292400
},
{
"epoch": 94.53781512605042,
"grad_norm": 1.4330819845199585,
"learning_rate": 0.001,
"loss": 0.464,
"step": 292500
},
{
"epoch": 94.57013574660634,
"grad_norm": 1.7060296535491943,
"learning_rate": 0.001,
"loss": 0.4593,
"step": 292600
},
{
"epoch": 94.60245636716225,
"grad_norm": 1.3387072086334229,
"learning_rate": 0.001,
"loss": 0.4582,
"step": 292700
},
{
"epoch": 94.63477698771817,
"grad_norm": 1.6963450908660889,
"learning_rate": 0.001,
"loss": 0.4619,
"step": 292800
},
{
"epoch": 94.66709760827408,
"grad_norm": 2.0844035148620605,
"learning_rate": 0.001,
"loss": 0.4665,
"step": 292900
},
{
"epoch": 94.69941822883,
"grad_norm": 1.3902792930603027,
"learning_rate": 0.001,
"loss": 0.4783,
"step": 293000
},
{
"epoch": 94.7317388493859,
"grad_norm": 1.3973846435546875,
"learning_rate": 0.001,
"loss": 0.4714,
"step": 293100
},
{
"epoch": 94.76405946994183,
"grad_norm": 1.479785442352295,
"learning_rate": 0.001,
"loss": 0.471,
"step": 293200
},
{
"epoch": 94.79638009049773,
"grad_norm": 1.6167840957641602,
"learning_rate": 0.001,
"loss": 0.4683,
"step": 293300
},
{
"epoch": 94.82870071105366,
"grad_norm": 1.5192022323608398,
"learning_rate": 0.001,
"loss": 0.484,
"step": 293400
},
{
"epoch": 94.86102133160956,
"grad_norm": 1.438111424446106,
"learning_rate": 0.001,
"loss": 0.4732,
"step": 293500
},
{
"epoch": 94.89334195216549,
"grad_norm": 1.3876760005950928,
"learning_rate": 0.001,
"loss": 0.4824,
"step": 293600
},
{
"epoch": 94.9256625727214,
"grad_norm": 1.9509048461914062,
"learning_rate": 0.001,
"loss": 0.4725,
"step": 293700
},
{
"epoch": 94.95798319327731,
"grad_norm": 1.4013237953186035,
"learning_rate": 0.001,
"loss": 0.4802,
"step": 293800
},
{
"epoch": 94.99030381383322,
"grad_norm": 1.737876534461975,
"learning_rate": 0.001,
"loss": 0.4793,
"step": 293900
},
{
"epoch": 95.02262443438914,
"grad_norm": 1.3785439729690552,
"learning_rate": 0.001,
"loss": 0.4504,
"step": 294000
},
{
"epoch": 95.05494505494505,
"grad_norm": 1.9236104488372803,
"learning_rate": 0.001,
"loss": 0.4349,
"step": 294100
},
{
"epoch": 95.08726567550097,
"grad_norm": 1.619763970375061,
"learning_rate": 0.001,
"loss": 0.4403,
"step": 294200
},
{
"epoch": 95.11958629605688,
"grad_norm": 1.5868083238601685,
"learning_rate": 0.001,
"loss": 0.4352,
"step": 294300
},
{
"epoch": 95.1519069166128,
"grad_norm": 1.8232312202453613,
"learning_rate": 0.001,
"loss": 0.4315,
"step": 294400
},
{
"epoch": 95.18422753716871,
"grad_norm": 1.5176035165786743,
"learning_rate": 0.001,
"loss": 0.434,
"step": 294500
},
{
"epoch": 95.21654815772463,
"grad_norm": 1.5614092350006104,
"learning_rate": 0.001,
"loss": 0.4418,
"step": 294600
},
{
"epoch": 95.24886877828054,
"grad_norm": 1.5651224851608276,
"learning_rate": 0.001,
"loss": 0.434,
"step": 294700
},
{
"epoch": 95.28118939883646,
"grad_norm": 1.3997880220413208,
"learning_rate": 0.001,
"loss": 0.4466,
"step": 294800
},
{
"epoch": 95.31351001939237,
"grad_norm": 1.8400006294250488,
"learning_rate": 0.001,
"loss": 0.4402,
"step": 294900
},
{
"epoch": 95.34583063994829,
"grad_norm": 1.32888662815094,
"learning_rate": 0.001,
"loss": 0.4404,
"step": 295000
},
{
"epoch": 95.3781512605042,
"grad_norm": 1.6747912168502808,
"learning_rate": 0.001,
"loss": 0.4522,
"step": 295100
},
{
"epoch": 95.41047188106012,
"grad_norm": 1.6857593059539795,
"learning_rate": 0.001,
"loss": 0.444,
"step": 295200
},
{
"epoch": 95.44279250161603,
"grad_norm": 1.3832392692565918,
"learning_rate": 0.001,
"loss": 0.4434,
"step": 295300
},
{
"epoch": 95.47511312217195,
"grad_norm": 1.914587140083313,
"learning_rate": 0.001,
"loss": 0.454,
"step": 295400
},
{
"epoch": 95.50743374272786,
"grad_norm": 1.6839224100112915,
"learning_rate": 0.001,
"loss": 0.4537,
"step": 295500
},
{
"epoch": 95.53975436328378,
"grad_norm": 1.4552383422851562,
"learning_rate": 0.001,
"loss": 0.4591,
"step": 295600
},
{
"epoch": 95.57207498383968,
"grad_norm": 1.5288134813308716,
"learning_rate": 0.001,
"loss": 0.4533,
"step": 295700
},
{
"epoch": 95.6043956043956,
"grad_norm": 1.8305370807647705,
"learning_rate": 0.001,
"loss": 0.4655,
"step": 295800
},
{
"epoch": 95.63671622495151,
"grad_norm": 1.2696152925491333,
"learning_rate": 0.001,
"loss": 0.4595,
"step": 295900
},
{
"epoch": 95.66903684550743,
"grad_norm": 1.5350581407546997,
"learning_rate": 0.001,
"loss": 0.4685,
"step": 296000
},
{
"epoch": 95.70135746606334,
"grad_norm": 1.1999725103378296,
"learning_rate": 0.001,
"loss": 0.4595,
"step": 296100
},
{
"epoch": 95.73367808661926,
"grad_norm": 1.2505929470062256,
"learning_rate": 0.001,
"loss": 0.4678,
"step": 296200
},
{
"epoch": 95.76599870717517,
"grad_norm": 1.3994829654693604,
"learning_rate": 0.001,
"loss": 0.4646,
"step": 296300
},
{
"epoch": 95.7983193277311,
"grad_norm": 1.6253557205200195,
"learning_rate": 0.001,
"loss": 0.4723,
"step": 296400
},
{
"epoch": 95.830639948287,
"grad_norm": 1.6554275751113892,
"learning_rate": 0.001,
"loss": 0.4714,
"step": 296500
},
{
"epoch": 95.86296056884292,
"grad_norm": 1.4614795446395874,
"learning_rate": 0.001,
"loss": 0.4793,
"step": 296600
},
{
"epoch": 95.89528118939883,
"grad_norm": 1.41798996925354,
"learning_rate": 0.001,
"loss": 0.464,
"step": 296700
},
{
"epoch": 95.92760180995475,
"grad_norm": 1.7458993196487427,
"learning_rate": 0.001,
"loss": 0.4661,
"step": 296800
},
{
"epoch": 95.95992243051066,
"grad_norm": 2.5169103145599365,
"learning_rate": 0.001,
"loss": 0.4737,
"step": 296900
},
{
"epoch": 95.99224305106658,
"grad_norm": 1.4173320531845093,
"learning_rate": 0.001,
"loss": 0.4853,
"step": 297000
},
{
"epoch": 96.0245636716225,
"grad_norm": 1.596639633178711,
"learning_rate": 0.001,
"loss": 0.4336,
"step": 297100
},
{
"epoch": 96.05688429217841,
"grad_norm": 1.4442991018295288,
"learning_rate": 0.001,
"loss": 0.4185,
"step": 297200
},
{
"epoch": 96.08920491273433,
"grad_norm": 1.3611177206039429,
"learning_rate": 0.001,
"loss": 0.4274,
"step": 297300
},
{
"epoch": 96.12152553329024,
"grad_norm": 1.1800782680511475,
"learning_rate": 0.001,
"loss": 0.4365,
"step": 297400
},
{
"epoch": 96.15384615384616,
"grad_norm": 1.8172197341918945,
"learning_rate": 0.001,
"loss": 0.4372,
"step": 297500
},
{
"epoch": 96.18616677440207,
"grad_norm": 1.4317479133605957,
"learning_rate": 0.001,
"loss": 0.4323,
"step": 297600
},
{
"epoch": 96.21848739495799,
"grad_norm": 1.2062143087387085,
"learning_rate": 0.001,
"loss": 0.4232,
"step": 297700
},
{
"epoch": 96.2508080155139,
"grad_norm": 1.6967133283615112,
"learning_rate": 0.001,
"loss": 0.4363,
"step": 297800
},
{
"epoch": 96.28312863606982,
"grad_norm": 1.3945610523223877,
"learning_rate": 0.001,
"loss": 0.4354,
"step": 297900
},
{
"epoch": 96.31544925662573,
"grad_norm": 1.4094641208648682,
"learning_rate": 0.001,
"loss": 0.4441,
"step": 298000
},
{
"epoch": 96.34776987718165,
"grad_norm": 1.2168558835983276,
"learning_rate": 0.001,
"loss": 0.4382,
"step": 298100
},
{
"epoch": 96.38009049773756,
"grad_norm": 1.9572267532348633,
"learning_rate": 0.001,
"loss": 0.4441,
"step": 298200
},
{
"epoch": 96.41241111829348,
"grad_norm": 1.3450571298599243,
"learning_rate": 0.001,
"loss": 0.439,
"step": 298300
},
{
"epoch": 96.44473173884938,
"grad_norm": 1.5765080451965332,
"learning_rate": 0.001,
"loss": 0.4468,
"step": 298400
},
{
"epoch": 96.4770523594053,
"grad_norm": 1.4704385995864868,
"learning_rate": 0.001,
"loss": 0.4472,
"step": 298500
},
{
"epoch": 96.50937297996121,
"grad_norm": 1.5044299364089966,
"learning_rate": 0.001,
"loss": 0.4453,
"step": 298600
},
{
"epoch": 96.54169360051714,
"grad_norm": 1.373509407043457,
"learning_rate": 0.001,
"loss": 0.4599,
"step": 298700
},
{
"epoch": 96.57401422107304,
"grad_norm": 1.180299162864685,
"learning_rate": 0.001,
"loss": 0.4476,
"step": 298800
},
{
"epoch": 96.60633484162896,
"grad_norm": 2.22157621383667,
"learning_rate": 0.001,
"loss": 0.451,
"step": 298900
},
{
"epoch": 96.63865546218487,
"grad_norm": 1.5506231784820557,
"learning_rate": 0.001,
"loss": 0.461,
"step": 299000
},
{
"epoch": 96.6709760827408,
"grad_norm": 1.7377060651779175,
"learning_rate": 0.001,
"loss": 0.4565,
"step": 299100
},
{
"epoch": 96.7032967032967,
"grad_norm": 1.1429818868637085,
"learning_rate": 0.001,
"loss": 0.4613,
"step": 299200
},
{
"epoch": 96.73561732385262,
"grad_norm": 1.4461809396743774,
"learning_rate": 0.001,
"loss": 0.4636,
"step": 299300
},
{
"epoch": 96.76793794440853,
"grad_norm": 1.6371991634368896,
"learning_rate": 0.001,
"loss": 0.4725,
"step": 299400
},
{
"epoch": 96.80025856496445,
"grad_norm": 2.10194993019104,
"learning_rate": 0.001,
"loss": 0.4548,
"step": 299500
},
{
"epoch": 96.83257918552036,
"grad_norm": 1.3943737745285034,
"learning_rate": 0.001,
"loss": 0.4732,
"step": 299600
},
{
"epoch": 96.86489980607628,
"grad_norm": 1.4672203063964844,
"learning_rate": 0.001,
"loss": 0.465,
"step": 299700
},
{
"epoch": 96.89722042663219,
"grad_norm": 1.6431360244750977,
"learning_rate": 0.001,
"loss": 0.4576,
"step": 299800
},
{
"epoch": 96.92954104718811,
"grad_norm": 1.731074333190918,
"learning_rate": 0.001,
"loss": 0.4686,
"step": 299900
},
{
"epoch": 96.96186166774402,
"grad_norm": 1.5479676723480225,
"learning_rate": 0.001,
"loss": 0.4716,
"step": 300000
},
{
"epoch": 96.99418228829994,
"grad_norm": 1.5700945854187012,
"learning_rate": 0.001,
"loss": 0.4677,
"step": 300100
},
{
"epoch": 97.02650290885585,
"grad_norm": 1.269909381866455,
"learning_rate": 0.001,
"loss": 0.4298,
"step": 300200
},
{
"epoch": 97.05882352941177,
"grad_norm": 1.5230191946029663,
"learning_rate": 0.001,
"loss": 0.4174,
"step": 300300
},
{
"epoch": 97.09114414996768,
"grad_norm": 1.2741731405258179,
"learning_rate": 0.001,
"loss": 0.4206,
"step": 300400
},
{
"epoch": 97.1234647705236,
"grad_norm": 1.440308690071106,
"learning_rate": 0.001,
"loss": 0.4147,
"step": 300500
},
{
"epoch": 97.1557853910795,
"grad_norm": 1.1169484853744507,
"learning_rate": 0.001,
"loss": 0.4224,
"step": 300600
},
{
"epoch": 97.18810601163543,
"grad_norm": 1.6060082912445068,
"learning_rate": 0.001,
"loss": 0.4322,
"step": 300700
},
{
"epoch": 97.22042663219133,
"grad_norm": 1.4612475633621216,
"learning_rate": 0.001,
"loss": 0.4353,
"step": 300800
},
{
"epoch": 97.25274725274726,
"grad_norm": 1.2991474866867065,
"learning_rate": 0.001,
"loss": 0.4241,
"step": 300900
},
{
"epoch": 97.28506787330316,
"grad_norm": 1.3116704225540161,
"learning_rate": 0.001,
"loss": 0.4366,
"step": 301000
},
{
"epoch": 97.31738849385908,
"grad_norm": 1.2389549016952515,
"learning_rate": 0.001,
"loss": 0.4384,
"step": 301100
},
{
"epoch": 97.34970911441499,
"grad_norm": 1.2728149890899658,
"learning_rate": 0.001,
"loss": 0.4372,
"step": 301200
},
{
"epoch": 97.38202973497091,
"grad_norm": 1.3004136085510254,
"learning_rate": 0.001,
"loss": 0.4373,
"step": 301300
},
{
"epoch": 97.41435035552682,
"grad_norm": 1.227472186088562,
"learning_rate": 0.001,
"loss": 0.4422,
"step": 301400
},
{
"epoch": 97.44667097608274,
"grad_norm": 1.1583950519561768,
"learning_rate": 0.001,
"loss": 0.4505,
"step": 301500
},
{
"epoch": 97.47899159663865,
"grad_norm": 1.5739110708236694,
"learning_rate": 0.001,
"loss": 0.4521,
"step": 301600
},
{
"epoch": 97.51131221719457,
"grad_norm": 1.3445355892181396,
"learning_rate": 0.001,
"loss": 0.4532,
"step": 301700
},
{
"epoch": 97.54363283775048,
"grad_norm": 1.2330920696258545,
"learning_rate": 0.001,
"loss": 0.4477,
"step": 301800
},
{
"epoch": 97.5759534583064,
"grad_norm": 1.5380816459655762,
"learning_rate": 0.001,
"loss": 0.4513,
"step": 301900
},
{
"epoch": 97.60827407886231,
"grad_norm": 1.2839477062225342,
"learning_rate": 0.001,
"loss": 0.4455,
"step": 302000
},
{
"epoch": 97.64059469941823,
"grad_norm": 1.4159269332885742,
"learning_rate": 0.001,
"loss": 0.4491,
"step": 302100
},
{
"epoch": 97.67291531997414,
"grad_norm": 1.401963472366333,
"learning_rate": 0.001,
"loss": 0.4503,
"step": 302200
},
{
"epoch": 97.70523594053006,
"grad_norm": 1.7864422798156738,
"learning_rate": 0.001,
"loss": 0.4675,
"step": 302300
},
{
"epoch": 97.73755656108597,
"grad_norm": 1.2578034400939941,
"learning_rate": 0.001,
"loss": 0.4583,
"step": 302400
},
{
"epoch": 97.76987718164189,
"grad_norm": 1.2712514400482178,
"learning_rate": 0.001,
"loss": 0.4646,
"step": 302500
},
{
"epoch": 97.8021978021978,
"grad_norm": 1.465354561805725,
"learning_rate": 0.001,
"loss": 0.4548,
"step": 302600
},
{
"epoch": 97.83451842275372,
"grad_norm": 1.1005356311798096,
"learning_rate": 0.001,
"loss": 0.4517,
"step": 302700
},
{
"epoch": 97.86683904330962,
"grad_norm": 1.7632066011428833,
"learning_rate": 0.001,
"loss": 0.4521,
"step": 302800
},
{
"epoch": 97.89915966386555,
"grad_norm": 1.5967885255813599,
"learning_rate": 0.001,
"loss": 0.473,
"step": 302900
},
{
"epoch": 97.93148028442145,
"grad_norm": 1.3136255741119385,
"learning_rate": 0.001,
"loss": 0.4713,
"step": 303000
},
{
"epoch": 97.96380090497738,
"grad_norm": 1.0595479011535645,
"learning_rate": 0.001,
"loss": 0.4598,
"step": 303100
},
{
"epoch": 97.99612152553328,
"grad_norm": 1.0992523431777954,
"learning_rate": 0.001,
"loss": 0.464,
"step": 303200
},
{
"epoch": 98.0284421460892,
"grad_norm": 1.4200688600540161,
"learning_rate": 0.001,
"loss": 0.4209,
"step": 303300
},
{
"epoch": 98.06076276664513,
"grad_norm": 1.107372760772705,
"learning_rate": 0.001,
"loss": 0.418,
"step": 303400
},
{
"epoch": 98.09308338720103,
"grad_norm": 0.9656426310539246,
"learning_rate": 0.001,
"loss": 0.4113,
"step": 303500
},
{
"epoch": 98.12540400775696,
"grad_norm": 0.9549182057380676,
"learning_rate": 0.001,
"loss": 0.4301,
"step": 303600
},
{
"epoch": 98.15772462831286,
"grad_norm": 1.4307749271392822,
"learning_rate": 0.001,
"loss": 0.4327,
"step": 303700
},
{
"epoch": 98.19004524886878,
"grad_norm": 1.2919740676879883,
"learning_rate": 0.001,
"loss": 0.4276,
"step": 303800
},
{
"epoch": 98.22236586942469,
"grad_norm": 1.20173978805542,
"learning_rate": 0.001,
"loss": 0.4228,
"step": 303900
},
{
"epoch": 98.25468648998061,
"grad_norm": 1.2167638540267944,
"learning_rate": 0.001,
"loss": 0.4178,
"step": 304000
},
{
"epoch": 98.28700711053652,
"grad_norm": 1.0864397287368774,
"learning_rate": 0.001,
"loss": 0.4273,
"step": 304100
},
{
"epoch": 98.31932773109244,
"grad_norm": 1.4295344352722168,
"learning_rate": 0.001,
"loss": 0.4379,
"step": 304200
},
{
"epoch": 98.35164835164835,
"grad_norm": 1.2204785346984863,
"learning_rate": 0.001,
"loss": 0.4396,
"step": 304300
},
{
"epoch": 98.38396897220427,
"grad_norm": 1.4378937482833862,
"learning_rate": 0.001,
"loss": 0.4384,
"step": 304400
},
{
"epoch": 98.41628959276018,
"grad_norm": 1.0496668815612793,
"learning_rate": 0.001,
"loss": 0.4438,
"step": 304500
},
{
"epoch": 98.4486102133161,
"grad_norm": 1.357386827468872,
"learning_rate": 0.001,
"loss": 0.4592,
"step": 304600
},
{
"epoch": 98.48093083387201,
"grad_norm": 1.6617597341537476,
"learning_rate": 0.001,
"loss": 0.4409,
"step": 304700
},
{
"epoch": 98.51325145442793,
"grad_norm": 1.351397156715393,
"learning_rate": 0.001,
"loss": 0.4471,
"step": 304800
},
{
"epoch": 98.54557207498384,
"grad_norm": 1.240196704864502,
"learning_rate": 0.001,
"loss": 0.4375,
"step": 304900
},
{
"epoch": 98.57789269553976,
"grad_norm": 1.221144437789917,
"learning_rate": 0.001,
"loss": 0.4476,
"step": 305000
},
{
"epoch": 98.61021331609567,
"grad_norm": 1.5508735179901123,
"learning_rate": 0.001,
"loss": 0.4431,
"step": 305100
},
{
"epoch": 98.64253393665159,
"grad_norm": 1.0978624820709229,
"learning_rate": 0.001,
"loss": 0.4461,
"step": 305200
},
{
"epoch": 98.6748545572075,
"grad_norm": 1.2614808082580566,
"learning_rate": 0.001,
"loss": 0.4397,
"step": 305300
},
{
"epoch": 98.70717517776342,
"grad_norm": 1.1213574409484863,
"learning_rate": 0.001,
"loss": 0.439,
"step": 305400
},
{
"epoch": 98.73949579831933,
"grad_norm": 1.5465046167373657,
"learning_rate": 0.001,
"loss": 0.4592,
"step": 305500
},
{
"epoch": 98.77181641887525,
"grad_norm": 1.1399887800216675,
"learning_rate": 0.001,
"loss": 0.4663,
"step": 305600
},
{
"epoch": 98.80413703943115,
"grad_norm": 1.1044340133666992,
"learning_rate": 0.001,
"loss": 0.4492,
"step": 305700
},
{
"epoch": 98.83645765998708,
"grad_norm": 1.1779178380966187,
"learning_rate": 0.001,
"loss": 0.4516,
"step": 305800
},
{
"epoch": 98.86877828054298,
"grad_norm": 1.3684269189834595,
"learning_rate": 0.001,
"loss": 0.4532,
"step": 305900
},
{
"epoch": 98.9010989010989,
"grad_norm": 1.3492311239242554,
"learning_rate": 0.001,
"loss": 0.4511,
"step": 306000
},
{
"epoch": 98.93341952165481,
"grad_norm": 1.1889764070510864,
"learning_rate": 0.001,
"loss": 0.4664,
"step": 306100
},
{
"epoch": 98.96574014221073,
"grad_norm": 0.8841199278831482,
"learning_rate": 0.001,
"loss": 0.4649,
"step": 306200
},
{
"epoch": 98.99806076276664,
"grad_norm": 0.9290673136711121,
"learning_rate": 0.001,
"loss": 0.4531,
"step": 306300
},
{
"epoch": 99.03038138332256,
"grad_norm": 1.022675633430481,
"learning_rate": 0.001,
"loss": 0.42,
"step": 306400
},
{
"epoch": 99.06270200387847,
"grad_norm": 1.138304352760315,
"learning_rate": 0.001,
"loss": 0.4125,
"step": 306500
},
{
"epoch": 99.09502262443439,
"grad_norm": 1.1132817268371582,
"learning_rate": 0.001,
"loss": 0.4205,
"step": 306600
},
{
"epoch": 99.1273432449903,
"grad_norm": 1.1500136852264404,
"learning_rate": 0.001,
"loss": 0.4136,
"step": 306700
},
{
"epoch": 99.15966386554622,
"grad_norm": 1.1773744821548462,
"learning_rate": 0.001,
"loss": 0.4212,
"step": 306800
},
{
"epoch": 99.19198448610213,
"grad_norm": 1.4039117097854614,
"learning_rate": 0.001,
"loss": 0.4338,
"step": 306900
},
{
"epoch": 99.22430510665805,
"grad_norm": 1.0985853672027588,
"learning_rate": 0.001,
"loss": 0.4278,
"step": 307000
},
{
"epoch": 99.25662572721396,
"grad_norm": 0.7000985741615295,
"learning_rate": 0.001,
"loss": 0.424,
"step": 307100
},
{
"epoch": 99.28894634776988,
"grad_norm": 1.5359601974487305,
"learning_rate": 0.001,
"loss": 0.4353,
"step": 307200
},
{
"epoch": 99.32126696832579,
"grad_norm": 0.8994166254997253,
"learning_rate": 0.001,
"loss": 0.4382,
"step": 307300
},
{
"epoch": 99.35358758888171,
"grad_norm": 0.724768340587616,
"learning_rate": 0.001,
"loss": 0.4355,
"step": 307400
},
{
"epoch": 99.38590820943762,
"grad_norm": 0.8465492129325867,
"learning_rate": 0.001,
"loss": 0.444,
"step": 307500
},
{
"epoch": 99.41822882999354,
"grad_norm": 1.830216884613037,
"learning_rate": 0.001,
"loss": 0.4336,
"step": 307600
},
{
"epoch": 99.45054945054945,
"grad_norm": 1.0542271137237549,
"learning_rate": 0.001,
"loss": 0.4336,
"step": 307700
},
{
"epoch": 99.48287007110537,
"grad_norm": 0.8846477270126343,
"learning_rate": 0.001,
"loss": 0.4516,
"step": 307800
},
{
"epoch": 99.51519069166127,
"grad_norm": 0.8210715055465698,
"learning_rate": 0.001,
"loss": 0.4412,
"step": 307900
},
{
"epoch": 99.5475113122172,
"grad_norm": 0.9523438215255737,
"learning_rate": 0.001,
"loss": 0.4341,
"step": 308000
}
],
"logging_steps": 100,
"max_steps": 309400,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 2000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.553808005867438e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}