Balcony-Model22 / trainer_state.json
adpretko's picture
Upload folder using huggingface_hub
64c3c64 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"ce_loss_10": 5.479339599609375,
"ce_loss_13": 3.4827667474746704,
"ce_loss_2": 13.979248523712158,
"ce_loss_3": 13.771953105926514,
"ce_loss_7": 7.430000305175781,
"epoch": 0.0001,
"grad_norm": 81408.0,
"kl_loss_10": 4489.56494140625,
"kl_loss_2": 22049.2119140625,
"kl_loss_3": 21566.693359375,
"kl_loss_7": 7499.004150390625,
"learning_rate": 1e-05,
"loss": 14123.4883,
"step": 1
},
{
"ce_loss_10": 5.119714260101318,
"ce_loss_13": 3.53999932607015,
"ce_loss_2": 11.240631209479439,
"ce_loss_3": 10.95585854848226,
"ce_loss_7": 6.906492206785414,
"epoch": 0.001,
"grad_norm": 37888.0,
"kl_loss_10": 3271.7383083767363,
"kl_loss_2": 15601.7939453125,
"kl_loss_3": 14746.902018229166,
"kl_loss_7": 6279.269124348958,
"learning_rate": 0.0001,
"loss": 9990.5972,
"step": 10
},
{
"ce_loss_10": 4.4336272239685055,
"ce_loss_13": 3.5471752166748045,
"ce_loss_2": 7.957242059707641,
"ce_loss_3": 7.463982796669006,
"ce_loss_7": 5.84502854347229,
"epoch": 0.002,
"grad_norm": 9216.0,
"kl_loss_10": 1664.693670654297,
"kl_loss_2": 8140.2906494140625,
"kl_loss_3": 7229.052661132812,
"kl_loss_7": 4268.229284667968,
"learning_rate": 0.0002,
"loss": 5408.8828,
"step": 20
},
{
"ce_loss_10": 3.938058543205261,
"ce_loss_13": 3.333306384086609,
"ce_loss_2": 6.773653674125671,
"ce_loss_3": 6.433317041397094,
"ce_loss_7": 5.134593844413757,
"epoch": 0.003,
"grad_norm": 3248.0,
"kl_loss_10": 1150.4377380371093,
"kl_loss_2": 6385.490258789063,
"kl_loss_3": 5771.109155273438,
"kl_loss_7": 3370.8108642578127,
"learning_rate": 0.0003,
"loss": 4090.45,
"step": 30
},
{
"ce_loss_10": 3.9883425116539,
"ce_loss_13": 3.5081888794898988,
"ce_loss_2": 6.318326234817505,
"ce_loss_3": 6.045081973075867,
"ce_loss_7": 4.969816541671753,
"epoch": 0.004,
"grad_norm": 6240.0,
"kl_loss_10": 955.3555145263672,
"kl_loss_2": 5198.101293945312,
"kl_loss_3": 4741.139855957032,
"kl_loss_7": 2762.6712036132812,
"learning_rate": 0.0004,
"loss": 3452.1297,
"step": 40
},
{
"ce_loss_10": 3.94427330493927,
"ce_loss_13": 3.4815958857536318,
"ce_loss_2": 6.088573956489563,
"ce_loss_3": 5.815527606010437,
"ce_loss_7": 4.780157661437988,
"epoch": 0.005,
"grad_norm": 4768.0,
"kl_loss_10": 882.2040557861328,
"kl_loss_2": 4868.76328125,
"kl_loss_3": 4416.1666259765625,
"kl_loss_7": 2467.264501953125,
"learning_rate": 0.0005,
"loss": 3156.5504,
"step": 50
},
{
"ce_loss_10": 3.877071762084961,
"ce_loss_13": 3.493886411190033,
"ce_loss_2": 5.86357319355011,
"ce_loss_3": 5.616779232025147,
"ce_loss_7": 4.6486598491668705,
"epoch": 0.006,
"grad_norm": 4768.0,
"kl_loss_10": 781.2706634521485,
"kl_loss_2": 4452.719311523438,
"kl_loss_3": 4034.9894165039063,
"kl_loss_7": 2237.3867736816405,
"learning_rate": 0.0006,
"loss": 2874.2984,
"step": 60
},
{
"ce_loss_10": 3.7734105229377746,
"ce_loss_13": 3.408212423324585,
"ce_loss_2": 5.7149782419204715,
"ce_loss_3": 5.489736318588257,
"ce_loss_7": 4.5027463555336,
"epoch": 0.007,
"grad_norm": 2896.0,
"kl_loss_10": 747.7415161132812,
"kl_loss_2": 4370.160473632813,
"kl_loss_3": 3989.598352050781,
"kl_loss_7": 2142.413153076172,
"learning_rate": 0.0007,
"loss": 2776.958,
"step": 70
},
{
"ce_loss_10": 3.7659215092658997,
"ce_loss_13": 3.409997522830963,
"ce_loss_2": 5.646710276603699,
"ce_loss_3": 5.404528284072876,
"ce_loss_7": 4.458205795288086,
"epoch": 0.008,
"grad_norm": 2256.0,
"kl_loss_10": 717.0317596435547,
"kl_loss_2": 4256.561865234375,
"kl_loss_3": 3846.635607910156,
"kl_loss_7": 2051.1767517089843,
"learning_rate": 0.0008,
"loss": 2710.7359,
"step": 80
},
{
"ce_loss_10": 3.6950827717781065,
"ce_loss_13": 3.3681079149246216,
"ce_loss_2": 5.57774977684021,
"ce_loss_3": 5.358799338340759,
"ce_loss_7": 4.384551775455475,
"epoch": 0.009,
"grad_norm": 2784.0,
"kl_loss_10": 674.085107421875,
"kl_loss_2": 4234.844958496094,
"kl_loss_3": 3849.5977783203125,
"kl_loss_7": 2047.007257080078,
"learning_rate": 0.0009000000000000001,
"loss": 2670.6148,
"step": 90
},
{
"ce_loss_10": 3.831665110588074,
"ce_loss_13": 3.493525803089142,
"ce_loss_2": 5.601197052001953,
"ce_loss_3": 5.398815608024597,
"ce_loss_7": 4.526931476593018,
"epoch": 0.01,
"grad_norm": 2752.0,
"kl_loss_10": 672.117709350586,
"kl_loss_2": 4021.284912109375,
"kl_loss_3": 3663.468957519531,
"kl_loss_7": 2026.185284423828,
"learning_rate": 0.001,
"loss": 2598.8504,
"step": 100
},
{
"ce_loss_10": 3.7488776206970216,
"ce_loss_13": 3.4458404183387756,
"ce_loss_2": 5.519205498695373,
"ce_loss_3": 5.296326518058777,
"ce_loss_7": 4.427326142787933,
"epoch": 0.011,
"grad_norm": 1728.0,
"kl_loss_10": 619.106655883789,
"kl_loss_2": 3960.856494140625,
"kl_loss_3": 3572.990026855469,
"kl_loss_7": 1934.6764282226563,
"learning_rate": 0.0009999974825027757,
"loss": 2513.0074,
"step": 110
},
{
"ce_loss_10": 3.805395770072937,
"ce_loss_13": 3.503445029258728,
"ce_loss_2": 5.498133254051209,
"ce_loss_3": 5.255028605461121,
"ce_loss_7": 4.415169513225555,
"epoch": 0.012,
"grad_norm": 2040.0,
"kl_loss_10": 603.263638305664,
"kl_loss_2": 3843.529895019531,
"kl_loss_3": 3407.914306640625,
"kl_loss_7": 1813.6988830566406,
"learning_rate": 0.0009999899300364532,
"loss": 2390.1301,
"step": 120
},
{
"ce_loss_10": 3.7707916855812074,
"ce_loss_13": 3.475922393798828,
"ce_loss_2": 5.500737500190735,
"ce_loss_3": 5.266239738464355,
"ce_loss_7": 4.383851003646851,
"epoch": 0.013,
"grad_norm": 2624.0,
"kl_loss_10": 587.465219116211,
"kl_loss_2": 3864.1528442382814,
"kl_loss_3": 3434.2073608398437,
"kl_loss_7": 1783.62373046875,
"learning_rate": 0.0009999773426770863,
"loss": 2449.8629,
"step": 130
},
{
"ce_loss_10": 3.867912781238556,
"ce_loss_13": 3.509277641773224,
"ce_loss_2": 5.469627714157104,
"ce_loss_3": 5.194933176040649,
"ce_loss_7": 4.379399788379669,
"epoch": 0.014,
"grad_norm": 1992.0,
"kl_loss_10": 725.8971160888672,
"kl_loss_2": 3778.5274291992187,
"kl_loss_3": 3296.354528808594,
"kl_loss_7": 1732.4250732421874,
"learning_rate": 0.0009999597205514296,
"loss": 2405.5832,
"step": 140
},
{
"ce_loss_10": 3.77968590259552,
"ce_loss_13": 3.471135640144348,
"ce_loss_2": 5.36739604473114,
"ce_loss_3": 5.106346774101257,
"ce_loss_7": 4.320439124107361,
"epoch": 0.015,
"grad_norm": 1360.0,
"kl_loss_10": 624.2580291748047,
"kl_loss_2": 3632.6698486328123,
"kl_loss_3": 3176.562145996094,
"kl_loss_7": 1695.959698486328,
"learning_rate": 0.0009999370638369377,
"loss": 2293.6836,
"step": 150
},
{
"ce_loss_10": 3.8028363585472107,
"ce_loss_13": 3.5099030256271364,
"ce_loss_2": 5.394668865203857,
"ce_loss_3": 5.231132960319519,
"ce_loss_7": 4.338041806221009,
"epoch": 0.016,
"grad_norm": 3296.0,
"kl_loss_10": 591.1725463867188,
"kl_loss_2": 3644.2818969726563,
"kl_loss_3": 3364.4876342773437,
"kl_loss_7": 1640.8858154296875,
"learning_rate": 0.000999909372761763,
"loss": 2313.8473,
"step": 160
},
{
"ce_loss_10": 3.715697240829468,
"ce_loss_13": 3.4447871685028075,
"ce_loss_2": 5.341588139533997,
"ce_loss_3": 5.263697862625122,
"ce_loss_7": 4.2574918389320375,
"epoch": 0.017,
"grad_norm": 3008.0,
"kl_loss_10": 555.6443099975586,
"kl_loss_2": 3670.3303466796874,
"kl_loss_3": 3553.7742431640627,
"kl_loss_7": 1629.572772216797,
"learning_rate": 0.0009998766476047546,
"loss": 2372.3059,
"step": 170
},
{
"ce_loss_10": 3.7622690200805664,
"ce_loss_13": 3.4889180302619933,
"ce_loss_2": 5.369840741157532,
"ce_loss_3": 5.276954698562622,
"ce_loss_7": 4.275516867637634,
"epoch": 0.018,
"grad_norm": 2040.0,
"kl_loss_10": 565.5127258300781,
"kl_loss_2": 3642.269982910156,
"kl_loss_3": 3495.3918212890626,
"kl_loss_7": 1571.4158569335937,
"learning_rate": 0.0009998388886954545,
"loss": 2349.4688,
"step": 180
},
{
"ce_loss_10": 3.712801456451416,
"ce_loss_13": 3.4555400371551515,
"ce_loss_2": 5.312930059432984,
"ce_loss_3": 5.148007488250732,
"ce_loss_7": 4.23377754688263,
"epoch": 0.019,
"grad_norm": 1328.0,
"kl_loss_10": 534.2748489379883,
"kl_loss_2": 3599.4434326171877,
"kl_loss_3": 3312.2544921875,
"kl_loss_7": 1559.4685668945312,
"learning_rate": 0.0009997960964140947,
"loss": 2241.091,
"step": 190
},
{
"ce_loss_10": 3.6890772104263307,
"ce_loss_13": 3.4474449634552,
"ce_loss_2": 5.328355288505554,
"ce_loss_3": 5.103678369522095,
"ce_loss_7": 4.223123550415039,
"epoch": 0.02,
"grad_norm": 1584.0,
"kl_loss_10": 498.3182800292969,
"kl_loss_2": 3626.2685302734376,
"kl_loss_3": 3230.7111083984373,
"kl_loss_7": 1544.03125,
"learning_rate": 0.0009997482711915926,
"loss": 2212.8523,
"step": 200
},
{
"ce_loss_10": 3.643280267715454,
"ce_loss_13": 3.4110453128814697,
"ce_loss_2": 5.262782073020935,
"ce_loss_3": 5.006648206710816,
"ce_loss_7": 4.161720204353332,
"epoch": 0.021,
"grad_norm": 1320.0,
"kl_loss_10": 468.05088500976564,
"kl_loss_2": 3600.3508911132812,
"kl_loss_3": 3146.037072753906,
"kl_loss_7": 1514.3593139648438,
"learning_rate": 0.0009996954135095479,
"loss": 2163.3328,
"step": 210
},
{
"ce_loss_10": 3.743840980529785,
"ce_loss_13": 3.495615518093109,
"ce_loss_2": 5.276893544197082,
"ce_loss_3": 5.026828193664551,
"ce_loss_7": 4.215770494937897,
"epoch": 0.022,
"grad_norm": 952.0,
"kl_loss_10": 494.9872482299805,
"kl_loss_2": 3434.308557128906,
"kl_loss_3": 2996.470593261719,
"kl_loss_7": 1447.3476196289062,
"learning_rate": 0.0009996375239002368,
"loss": 2094.248,
"step": 220
},
{
"ce_loss_10": 3.8117304921150206,
"ce_loss_13": 3.5717169761657717,
"ce_loss_2": 5.300674176216125,
"ce_loss_3": 5.045718550682068,
"ce_loss_7": 4.271833729743958,
"epoch": 0.023,
"grad_norm": 1064.0,
"kl_loss_10": 491.1131820678711,
"kl_loss_2": 3352.0796875,
"kl_loss_3": 2909.836950683594,
"kl_loss_7": 1405.5986450195312,
"learning_rate": 0.0009995746029466072,
"loss": 2050.6086,
"step": 230
},
{
"ce_loss_10": 3.6075421810150146,
"ce_loss_13": 3.3550766468048097,
"ce_loss_2": 5.39588577747345,
"ce_loss_3": 4.985904622077942,
"ce_loss_7": 4.14452086687088,
"epoch": 0.024,
"grad_norm": 1496.0,
"kl_loss_10": 521.684194946289,
"kl_loss_2": 3944.3539916992186,
"kl_loss_3": 3201.487194824219,
"kl_loss_7": 1583.0420288085938,
"learning_rate": 0.0009995066512822719,
"loss": 2234.7746,
"step": 240
},
{
"ce_loss_10": 3.6849907636642456,
"ce_loss_13": 3.461445081233978,
"ce_loss_2": 5.414009666442871,
"ce_loss_3": 5.085514402389526,
"ce_loss_7": 4.184376835823059,
"epoch": 0.025,
"grad_norm": 1800.0,
"kl_loss_10": 465.4432067871094,
"kl_loss_2": 3782.4762451171873,
"kl_loss_3": 3206.6178466796873,
"kl_loss_7": 1450.9975891113281,
"learning_rate": 0.000999433669591504,
"loss": 2142.3535,
"step": 250
},
{
"ce_loss_10": 3.6025625109672545,
"ce_loss_13": 3.360257649421692,
"ce_loss_2": 5.237245011329651,
"ce_loss_3": 4.9437507629394535,
"ce_loss_7": 4.080421531200409,
"epoch": 0.026,
"grad_norm": 1408.0,
"kl_loss_10": 503.2585876464844,
"kl_loss_2": 3655.8213134765624,
"kl_loss_3": 3140.7313842773438,
"kl_loss_7": 1460.5682739257813,
"learning_rate": 0.000999355658609228,
"loss": 2133.6004,
"step": 260
},
{
"ce_loss_10": 3.6813029885292052,
"ce_loss_13": 3.395027530193329,
"ce_loss_2": 5.295657467842102,
"ce_loss_3": 5.023426985740661,
"ce_loss_7": 4.133508098125458,
"epoch": 0.027,
"grad_norm": 1416.0,
"kl_loss_10": 572.3903137207031,
"kl_loss_2": 3669.314978027344,
"kl_loss_3": 3183.432019042969,
"kl_loss_7": 1464.0882202148437,
"learning_rate": 0.0009992726191210138,
"loss": 2179.2967,
"step": 270
},
{
"ce_loss_10": 3.696367251873016,
"ce_loss_13": 3.433962404727936,
"ce_loss_2": 5.21666829586029,
"ce_loss_3": 4.99695348739624,
"ce_loss_7": 4.169408094882965,
"epoch": 0.028,
"grad_norm": 1880.0,
"kl_loss_10": 529.3393615722656,
"kl_loss_2": 3457.8086547851562,
"kl_loss_3": 3089.980187988281,
"kl_loss_7": 1482.3380798339845,
"learning_rate": 0.0009991845519630679,
"loss": 2115.8172,
"step": 280
},
{
"ce_loss_10": 3.556672739982605,
"ce_loss_13": 3.3172685623168947,
"ce_loss_2": 5.112358474731446,
"ce_loss_3": 4.917420530319214,
"ce_loss_7": 4.036571848392486,
"epoch": 0.029,
"grad_norm": 2000.0,
"kl_loss_10": 477.5372833251953,
"kl_loss_2": 3475.2529418945314,
"kl_loss_3": 3146.291943359375,
"kl_loss_7": 1444.9558898925782,
"learning_rate": 0.0009990914580222257,
"loss": 2130.9104,
"step": 290
},
{
"ce_loss_10": 3.6650490760803223,
"ce_loss_13": 3.455529069900513,
"ce_loss_2": 5.149494194984436,
"ce_loss_3": 4.940038657188415,
"ce_loss_7": 4.130682170391083,
"epoch": 0.03,
"grad_norm": 1560.0,
"kl_loss_10": 441.9183044433594,
"kl_loss_2": 3299.1113159179686,
"kl_loss_3": 2933.9962768554688,
"kl_loss_7": 1369.8260986328125,
"learning_rate": 0.0009989933382359422,
"loss": 2069.7893,
"step": 300
},
{
"ce_loss_10": 3.6985942125320435,
"ce_loss_13": 3.465806806087494,
"ce_loss_2": 5.143499898910522,
"ce_loss_3": 4.909449362754822,
"ce_loss_7": 4.1382394433021545,
"epoch": 0.031,
"grad_norm": 1120.0,
"kl_loss_10": 486.39428558349607,
"kl_loss_2": 3301.6126342773437,
"kl_loss_3": 2880.7590454101564,
"kl_loss_7": 1392.4209594726562,
"learning_rate": 0.0009988901935922825,
"loss": 2022.2506,
"step": 310
},
{
"ce_loss_10": 3.544218695163727,
"ce_loss_13": 3.314011883735657,
"ce_loss_2": 5.10150101184845,
"ce_loss_3": 4.842008900642395,
"ce_loss_7": 4.024464392662049,
"epoch": 0.032,
"grad_norm": 1472.0,
"kl_loss_10": 469.5253311157227,
"kl_loss_2": 3487.786022949219,
"kl_loss_3": 3028.9456176757812,
"kl_loss_7": 1444.1999450683593,
"learning_rate": 0.0009987820251299122,
"loss": 2047.4186,
"step": 320
},
{
"ce_loss_10": 3.67177551984787,
"ce_loss_13": 3.4466672420501707,
"ce_loss_2": 5.135675239562988,
"ce_loss_3": 4.8595945835113525,
"ce_loss_7": 4.121568500995636,
"epoch": 0.033,
"grad_norm": 1012.0,
"kl_loss_10": 450.39354553222654,
"kl_loss_2": 3306.36376953125,
"kl_loss_3": 2827.274365234375,
"kl_loss_7": 1387.9264587402345,
"learning_rate": 0.0009986688339380862,
"loss": 1975.759,
"step": 330
},
{
"ce_loss_10": 3.6029638409614564,
"ce_loss_13": 3.397365379333496,
"ce_loss_2": 5.057221698760986,
"ce_loss_3": 4.7936498641967775,
"ce_loss_7": 4.015895903110504,
"epoch": 0.034,
"grad_norm": 964.0,
"kl_loss_10": 436.06551971435545,
"kl_loss_2": 3221.0221435546873,
"kl_loss_3": 2750.7028198242188,
"kl_loss_7": 1266.2399963378907,
"learning_rate": 0.0009985506211566387,
"loss": 1936.1705,
"step": 340
},
{
"ce_loss_10": 3.6370130658149717,
"ce_loss_13": 3.4315125226974486,
"ce_loss_2": 5.061046314239502,
"ce_loss_3": 4.7848950862884525,
"ce_loss_7": 4.02301949262619,
"epoch": 0.035,
"grad_norm": 908.0,
"kl_loss_10": 422.44232482910155,
"kl_loss_2": 3177.237683105469,
"kl_loss_3": 2690.365087890625,
"kl_loss_7": 1217.3221435546875,
"learning_rate": 0.0009984273879759713,
"loss": 1896.4475,
"step": 350
},
{
"ce_loss_10": 3.656745362281799,
"ce_loss_13": 3.4570682406425477,
"ce_loss_2": 5.137815022468567,
"ce_loss_3": 4.873619461059571,
"ce_loss_7": 4.083463799953461,
"epoch": 0.036,
"grad_norm": 860.0,
"kl_loss_10": 423.36531524658204,
"kl_loss_2": 3273.7410522460937,
"kl_loss_3": 2798.973498535156,
"kl_loss_7": 1268.806689453125,
"learning_rate": 0.0009982991356370402,
"loss": 1973.0059,
"step": 360
},
{
"ce_loss_10": 3.631060302257538,
"ce_loss_13": 3.4341819286346436,
"ce_loss_2": 5.11769585609436,
"ce_loss_3": 4.834220147132873,
"ce_loss_7": 4.047011601924896,
"epoch": 0.037,
"grad_norm": 908.0,
"kl_loss_10": 402.81113891601564,
"kl_loss_2": 3276.052880859375,
"kl_loss_3": 2791.4117065429687,
"kl_loss_7": 1245.3732849121093,
"learning_rate": 0.0009981658654313456,
"loss": 1941.0666,
"step": 370
},
{
"ce_loss_10": 3.7020971179008484,
"ce_loss_13": 3.5137467861175535,
"ce_loss_2": 5.156114864349365,
"ce_loss_3": 4.874492716789246,
"ce_loss_7": 4.092896187305451,
"epoch": 0.038,
"grad_norm": 744.0,
"kl_loss_10": 382.19567413330077,
"kl_loss_2": 3216.3584228515624,
"kl_loss_3": 2713.040576171875,
"kl_loss_7": 1200.0062133789063,
"learning_rate": 0.000998027578700917,
"loss": 1916.7457,
"step": 380
},
{
"ce_loss_10": 3.629340207576752,
"ce_loss_13": 3.4466560959815977,
"ce_loss_2": 5.104858756065369,
"ce_loss_3": 4.827202153205872,
"ce_loss_7": 4.051906526088715,
"epoch": 0.039,
"grad_norm": 768.0,
"kl_loss_10": 387.5618530273438,
"kl_loss_2": 3239.9035766601564,
"kl_loss_3": 2754.5089477539063,
"kl_loss_7": 1245.49443359375,
"learning_rate": 0.0009978842768382998,
"loss": 1919.6182,
"step": 390
},
{
"ce_loss_10": 3.6458646416664124,
"ce_loss_13": 3.4677427411079407,
"ce_loss_2": 5.076069569587707,
"ce_loss_3": 4.798897671699524,
"ce_loss_7": 4.036490082740784,
"epoch": 0.04,
"grad_norm": 820.0,
"kl_loss_10": 365.37960052490234,
"kl_loss_2": 3139.388073730469,
"kl_loss_3": 2645.03125,
"kl_loss_7": 1170.367169189453,
"learning_rate": 0.0009977359612865424,
"loss": 1848.2086,
"step": 400
},
{
"ce_loss_10": 3.6510703682899477,
"ce_loss_13": 3.472544801235199,
"ce_loss_2": 5.0927152872085575,
"ce_loss_3": 4.818557095527649,
"ce_loss_7": 4.048879408836365,
"epoch": 0.041,
"grad_norm": 752.0,
"kl_loss_10": 376.4607299804687,
"kl_loss_2": 3183.3623168945314,
"kl_loss_3": 2696.904638671875,
"kl_loss_7": 1198.10205078125,
"learning_rate": 0.0009975826335391806,
"loss": 1850.6066,
"step": 410
},
{
"ce_loss_10": 3.664944088459015,
"ce_loss_13": 3.4915570259094237,
"ce_loss_2": 5.092843031883239,
"ce_loss_3": 4.81715497970581,
"ce_loss_7": 4.06898148059845,
"epoch": 0.042,
"grad_norm": 1072.0,
"kl_loss_10": 367.5922546386719,
"kl_loss_2": 3121.2123901367186,
"kl_loss_3": 2637.7326782226564,
"kl_loss_7": 1178.5750122070312,
"learning_rate": 0.0009974242951402235,
"loss": 1847.4906,
"step": 420
},
{
"ce_loss_10": 3.6901652693748472,
"ce_loss_13": 3.5015287518501284,
"ce_loss_2": 5.113956260681152,
"ce_loss_3": 4.831623649597168,
"ce_loss_7": 4.073742997646332,
"epoch": 0.043,
"grad_norm": 932.0,
"kl_loss_10": 391.2719299316406,
"kl_loss_2": 3171.550817871094,
"kl_loss_3": 2672.8351318359373,
"kl_loss_7": 1198.8372039794922,
"learning_rate": 0.0009972609476841367,
"loss": 1839.4168,
"step": 430
},
{
"ce_loss_10": 3.592795264720917,
"ce_loss_13": 3.407615542411804,
"ce_loss_2": 5.048645877838135,
"ce_loss_3": 4.779121279716492,
"ce_loss_7": 3.982761597633362,
"epoch": 0.044,
"grad_norm": 928.0,
"kl_loss_10": 377.92359313964846,
"kl_loss_2": 3195.713342285156,
"kl_loss_3": 2713.7881591796877,
"kl_loss_7": 1185.5625,
"learning_rate": 0.0009970925928158272,
"loss": 1868.092,
"step": 440
},
{
"ce_loss_10": 3.542843294143677,
"ce_loss_13": 3.354374420642853,
"ce_loss_2": 5.013250637054443,
"ce_loss_3": 4.739123964309693,
"ce_loss_7": 3.935924601554871,
"epoch": 0.045,
"grad_norm": 740.0,
"kl_loss_10": 385.2865692138672,
"kl_loss_2": 3278.8071044921876,
"kl_loss_3": 2790.4721435546876,
"kl_loss_7": 1226.6742797851562,
"learning_rate": 0.000996919232230627,
"loss": 1885.8758,
"step": 450
},
{
"ce_loss_10": 3.609917199611664,
"ce_loss_13": 3.4386712551116942,
"ce_loss_2": 5.020998239517212,
"ce_loss_3": 4.756829810142517,
"ce_loss_7": 4.001234555244446,
"epoch": 0.046,
"grad_norm": 872.0,
"kl_loss_10": 358.4470748901367,
"kl_loss_2": 3100.1795166015627,
"kl_loss_3": 2620.8273803710936,
"kl_loss_7": 1157.8196044921874,
"learning_rate": 0.0009967408676742752,
"loss": 1772.8766,
"step": 460
},
{
"ce_loss_10": 3.7562451124191285,
"ce_loss_13": 3.5811493396759033,
"ce_loss_2": 5.11839497089386,
"ce_loss_3": 4.844864320755005,
"ce_loss_7": 4.1195793628692625,
"epoch": 0.047,
"grad_norm": 968.0,
"kl_loss_10": 364.69328155517576,
"kl_loss_2": 3032.6340087890626,
"kl_loss_3": 2548.6266967773436,
"kl_loss_7": 1130.8773040771484,
"learning_rate": 0.0009965575009429006,
"loss": 1825.8629,
"step": 470
},
{
"ce_loss_10": 3.542626643180847,
"ce_loss_13": 3.364771544933319,
"ce_loss_2": 4.9806403636932375,
"ce_loss_3": 4.703183531761169,
"ce_loss_7": 3.9297071576118467,
"epoch": 0.048,
"grad_norm": 772.0,
"kl_loss_10": 368.38177795410155,
"kl_loss_2": 3172.022900390625,
"kl_loss_3": 2678.579626464844,
"kl_loss_7": 1172.0243133544923,
"learning_rate": 0.0009963691338830043,
"loss": 1818.5924,
"step": 480
},
{
"ce_loss_10": 3.6282991647720335,
"ce_loss_13": 3.4611623764038084,
"ce_loss_2": 5.030923771858215,
"ce_loss_3": 4.765255475044251,
"ce_loss_7": 3.995365762710571,
"epoch": 0.049,
"grad_norm": 944.0,
"kl_loss_10": 346.68406372070314,
"kl_loss_2": 3111.6420288085938,
"kl_loss_3": 2633.9958740234374,
"kl_loss_7": 1125.4209197998048,
"learning_rate": 0.0009961757683914405,
"loss": 1782.6619,
"step": 490
},
{
"ce_loss_10": 3.6188631772994997,
"ce_loss_13": 3.450295829772949,
"ce_loss_2": 4.988259315490723,
"ce_loss_3": 4.726764726638794,
"ce_loss_7": 4.00168125629425,
"epoch": 0.05,
"grad_norm": 1184.0,
"kl_loss_10": 362.3049346923828,
"kl_loss_2": 3035.001806640625,
"kl_loss_3": 2588.0874145507814,
"kl_loss_7": 1166.9710693359375,
"learning_rate": 0.0009959774064153978,
"loss": 1805.0438,
"step": 500
},
{
"ce_loss_10": 3.623943197727203,
"ce_loss_13": 3.4620243430137636,
"ce_loss_2": 4.959137892723083,
"ce_loss_3": 4.687646722793579,
"ce_loss_7": 3.976128029823303,
"epoch": 0.051,
"grad_norm": 856.0,
"kl_loss_10": 343.2813385009766,
"kl_loss_2": 2963.6288208007813,
"kl_loss_3": 2485.8935424804686,
"kl_loss_7": 1086.964697265625,
"learning_rate": 0.0009957740499523787,
"loss": 1751.4643,
"step": 510
},
{
"ce_loss_10": 3.6490553617477417,
"ce_loss_13": 3.476555550098419,
"ce_loss_2": 4.994476556777954,
"ce_loss_3": 4.725382924079895,
"ce_loss_7": 4.001345467567444,
"epoch": 0.052,
"grad_norm": 808.0,
"kl_loss_10": 347.32325134277346,
"kl_loss_2": 2968.236572265625,
"kl_loss_3": 2495.7832275390624,
"kl_loss_7": 1099.3744354248047,
"learning_rate": 0.0009955657010501807,
"loss": 1740.4176,
"step": 520
},
{
"ce_loss_10": 3.6094146251678465,
"ce_loss_13": 3.4360305190086367,
"ce_loss_2": 4.987359571456909,
"ce_loss_3": 4.711909174919128,
"ce_loss_7": 3.96878160238266,
"epoch": 0.053,
"grad_norm": 732.0,
"kl_loss_10": 356.96947326660154,
"kl_loss_2": 3066.1166015625,
"kl_loss_3": 2574.2064819335938,
"kl_loss_7": 1113.183071899414,
"learning_rate": 0.000995352361806875,
"loss": 1757.3914,
"step": 530
},
{
"ce_loss_10": 3.6483884930610655,
"ce_loss_13": 3.4761168599128722,
"ce_loss_2": 5.01164448261261,
"ce_loss_3": 4.73403651714325,
"ce_loss_7": 4.005823755264283,
"epoch": 0.054,
"grad_norm": 868.0,
"kl_loss_10": 358.3400619506836,
"kl_loss_2": 3025.010693359375,
"kl_loss_3": 2540.0701538085937,
"kl_loss_7": 1117.8957305908202,
"learning_rate": 0.0009951340343707852,
"loss": 1783.3418,
"step": 540
},
{
"ce_loss_10": 3.693763518333435,
"ce_loss_13": 3.5300124645233155,
"ce_loss_2": 5.04529185295105,
"ce_loss_3": 4.776751947402954,
"ce_loss_7": 4.050560343265533,
"epoch": 0.055,
"grad_norm": 580.0,
"kl_loss_10": 343.201188659668,
"kl_loss_2": 2966.6511840820312,
"kl_loss_3": 2491.584606933594,
"kl_loss_7": 1070.658499145508,
"learning_rate": 0.0009949107209404665,
"loss": 1740.307,
"step": 550
},
{
"ce_loss_10": 3.618695652484894,
"ce_loss_13": 3.4460346341133117,
"ce_loss_2": 4.953143644332886,
"ce_loss_3": 4.67682032585144,
"ce_loss_7": 3.9601072311401366,
"epoch": 0.056,
"grad_norm": 972.0,
"kl_loss_10": 355.8962005615234,
"kl_loss_2": 2990.009143066406,
"kl_loss_3": 2495.9183959960938,
"kl_loss_7": 1092.0468170166016,
"learning_rate": 0.0009946824237646824,
"loss": 1737.0576,
"step": 560
},
{
"ce_loss_10": 3.5657299041748045,
"ce_loss_13": 3.3921077370643617,
"ce_loss_2": 4.9473305463790895,
"ce_loss_3": 4.655314612388611,
"ce_loss_7": 3.9485832929611204,
"epoch": 0.057,
"grad_norm": 1232.0,
"kl_loss_10": 368.3774078369141,
"kl_loss_2": 3077.5546997070314,
"kl_loss_3": 2563.977990722656,
"kl_loss_7": 1171.9384887695312,
"learning_rate": 0.0009944491451423828,
"loss": 1812.8215,
"step": 570
},
{
"ce_loss_10": 3.5597246408462526,
"ce_loss_13": 3.38997106552124,
"ce_loss_2": 4.957224941253662,
"ce_loss_3": 4.669384074211121,
"ce_loss_7": 3.9783090591430663,
"epoch": 0.058,
"grad_norm": 1048.0,
"kl_loss_10": 352.9766845703125,
"kl_loss_2": 3080.3538452148437,
"kl_loss_3": 2573.69345703125,
"kl_loss_7": 1221.7482543945312,
"learning_rate": 0.0009942108874226813,
"loss": 1775.8918,
"step": 580
},
{
"ce_loss_10": 3.667470908164978,
"ce_loss_13": 3.5143657088279725,
"ce_loss_2": 4.977405524253845,
"ce_loss_3": 4.70070378780365,
"ce_loss_7": 4.062089693546295,
"epoch": 0.059,
"grad_norm": 1160.0,
"kl_loss_10": 326.54786376953126,
"kl_loss_2": 2889.81787109375,
"kl_loss_3": 2394.497277832031,
"kl_loss_7": 1154.6502380371094,
"learning_rate": 0.00099396765300483,
"loss": 1684.8838,
"step": 590
},
{
"ce_loss_10": 3.65077520608902,
"ce_loss_13": 3.4909046292304993,
"ce_loss_2": 4.953103184700012,
"ce_loss_3": 4.675009846687317,
"ce_loss_7": 4.037787747383118,
"epoch": 0.06,
"grad_norm": 728.0,
"kl_loss_10": 333.6824432373047,
"kl_loss_2": 2888.043603515625,
"kl_loss_3": 2401.467254638672,
"kl_loss_7": 1146.9622497558594,
"learning_rate": 0.0009937194443381972,
"loss": 1692.9094,
"step": 600
},
{
"ce_loss_10": 3.6720112562179565,
"ce_loss_13": 3.5144667506217955,
"ce_loss_2": 4.945521140098572,
"ce_loss_3": 4.670798707008362,
"ce_loss_7": 4.003339779376984,
"epoch": 0.061,
"grad_norm": 728.0,
"kl_loss_10": 340.24414978027346,
"kl_loss_2": 2848.255480957031,
"kl_loss_3": 2358.6506469726564,
"kl_loss_7": 1042.5767547607422,
"learning_rate": 0.0009934662639222412,
"loss": 1695.9006,
"step": 610
},
{
"ce_loss_10": 3.6284273624420167,
"ce_loss_13": 3.466042399406433,
"ce_loss_2": 4.974099659919739,
"ce_loss_3": 4.698220872879029,
"ce_loss_7": 3.9703264474868774,
"epoch": 0.062,
"grad_norm": 708.0,
"kl_loss_10": 346.28453369140624,
"kl_loss_2": 2978.781689453125,
"kl_loss_3": 2496.677685546875,
"kl_loss_7": 1062.910955810547,
"learning_rate": 0.000993208114306486,
"loss": 1704.2672,
"step": 620
},
{
"ce_loss_10": 3.5462576508522035,
"ce_loss_13": 3.380946898460388,
"ce_loss_2": 4.922283387184143,
"ce_loss_3": 4.633153581619263,
"ce_loss_7": 3.890825295448303,
"epoch": 0.063,
"grad_norm": 924.0,
"kl_loss_10": 358.3551940917969,
"kl_loss_2": 3032.9190673828125,
"kl_loss_3": 2531.955603027344,
"kl_loss_7": 1071.5194458007813,
"learning_rate": 0.0009929449980904952,
"loss": 1693.2549,
"step": 630
},
{
"ce_loss_10": 3.6085665225982666,
"ce_loss_13": 3.444735288619995,
"ce_loss_2": 4.934487676620483,
"ce_loss_3": 4.655823493003846,
"ce_loss_7": 3.935356545448303,
"epoch": 0.064,
"grad_norm": 676.0,
"kl_loss_10": 344.3735855102539,
"kl_loss_2": 2962.493859863281,
"kl_loss_3": 2465.4102416992187,
"kl_loss_7": 1045.9244415283204,
"learning_rate": 0.0009926769179238466,
"loss": 1690.2553,
"step": 640
},
{
"ce_loss_10": 3.657176661491394,
"ce_loss_13": 3.4894155979156496,
"ce_loss_2": 4.984645247459412,
"ce_loss_3": 4.697536993026733,
"ce_loss_7": 3.984337937831879,
"epoch": 0.065,
"grad_norm": 812.0,
"kl_loss_10": 351.49694671630857,
"kl_loss_2": 2961.2925659179687,
"kl_loss_3": 2455.3551025390625,
"kl_loss_7": 1056.1930267333985,
"learning_rate": 0.000992403876506104,
"loss": 1699.9176,
"step": 650
},
{
"ce_loss_10": 3.5853109121322633,
"ce_loss_13": 3.4265636444091796,
"ce_loss_2": 4.949072217941284,
"ce_loss_3": 4.657009506225586,
"ce_loss_7": 3.9192400932312013,
"epoch": 0.066,
"grad_norm": 772.0,
"kl_loss_10": 332.7637084960937,
"kl_loss_2": 3005.3072998046873,
"kl_loss_3": 2488.590020751953,
"kl_loss_7": 1034.6812866210937,
"learning_rate": 0.0009921258765867918,
"loss": 1712.7359,
"step": 660
},
{
"ce_loss_10": 3.543238043785095,
"ce_loss_13": 3.392865073680878,
"ce_loss_2": 4.929511904716492,
"ce_loss_3": 4.673877739906311,
"ce_loss_7": 3.8979400753974915,
"epoch": 0.067,
"grad_norm": 1216.0,
"kl_loss_10": 326.31287689208983,
"kl_loss_2": 3073.3475952148438,
"kl_loss_3": 2606.592980957031,
"kl_loss_7": 1089.1828704833983,
"learning_rate": 0.0009918429209653662,
"loss": 1742.882,
"step": 670
},
{
"ce_loss_10": 3.60556218624115,
"ce_loss_13": 3.451234769821167,
"ce_loss_2": 4.9643912553787235,
"ce_loss_3": 4.685172462463379,
"ce_loss_7": 3.9489428043365478,
"epoch": 0.068,
"grad_norm": 700.0,
"kl_loss_10": 326.2720092773437,
"kl_loss_2": 2991.5349365234374,
"kl_loss_3": 2499.9562133789063,
"kl_loss_7": 1058.8525512695312,
"learning_rate": 0.0009915550124911866,
"loss": 1675.9207,
"step": 680
},
{
"ce_loss_10": 3.6152788639068603,
"ce_loss_13": 3.463881015777588,
"ce_loss_2": 4.9310142517089846,
"ce_loss_3": 4.651708984375,
"ce_loss_7": 3.939838695526123,
"epoch": 0.069,
"grad_norm": 716.0,
"kl_loss_10": 321.7038208007813,
"kl_loss_2": 2904.1300415039063,
"kl_loss_3": 2416.9381103515625,
"kl_loss_7": 1006.4677703857421,
"learning_rate": 0.0009912621540634887,
"loss": 1665.2684,
"step": 690
},
{
"ce_loss_10": 3.6430228471755983,
"ce_loss_13": 3.4952101826667787,
"ce_loss_2": 4.929437565803528,
"ce_loss_3": 4.6475961923599245,
"ce_loss_7": 3.9429776191711428,
"epoch": 0.07,
"grad_norm": 676.0,
"kl_loss_10": 309.61268615722656,
"kl_loss_2": 2848.814501953125,
"kl_loss_3": 2359.46318359375,
"kl_loss_7": 970.4827117919922,
"learning_rate": 0.0009909643486313534,
"loss": 1639.2395,
"step": 700
},
{
"ce_loss_10": 3.526335525512695,
"ce_loss_13": 3.3703501343727114,
"ce_loss_2": 4.889838075637817,
"ce_loss_3": 4.6030642032623295,
"ce_loss_7": 3.8521526575088503,
"epoch": 0.071,
"grad_norm": 744.0,
"kl_loss_10": 340.5390853881836,
"kl_loss_2": 3011.11240234375,
"kl_loss_3": 2515.4811889648436,
"kl_loss_7": 1017.710009765625,
"learning_rate": 0.000990661599193678,
"loss": 1737.2715,
"step": 710
},
{
"ce_loss_10": 3.6673552870750425,
"ce_loss_13": 3.5033403754234316,
"ce_loss_2": 4.93988311290741,
"ce_loss_3": 4.67398898601532,
"ce_loss_7": 3.9688475489616395,
"epoch": 0.072,
"grad_norm": 796.0,
"kl_loss_10": 340.83275604248047,
"kl_loss_2": 2865.5041381835936,
"kl_loss_3": 2386.658837890625,
"kl_loss_7": 996.4190338134765,
"learning_rate": 0.0009903539087991462,
"loss": 1651.048,
"step": 720
},
{
"ce_loss_10": 3.6324875712394715,
"ce_loss_13": 3.4752389669418333,
"ce_loss_2": 4.927369832992554,
"ce_loss_3": 4.656826686859131,
"ce_loss_7": 3.941362977027893,
"epoch": 0.073,
"grad_norm": 672.0,
"kl_loss_10": 338.573225402832,
"kl_loss_2": 2878.319189453125,
"kl_loss_3": 2403.4671142578127,
"kl_loss_7": 991.7197357177735,
"learning_rate": 0.0009900412805461966,
"loss": 1664.0748,
"step": 730
},
{
"ce_loss_10": 3.697860896587372,
"ce_loss_13": 3.5502901554107664,
"ce_loss_2": 4.959825038909912,
"ce_loss_3": 4.680054187774658,
"ce_loss_7": 4.008367860317231,
"epoch": 0.074,
"grad_norm": 796.0,
"kl_loss_10": 322.8813171386719,
"kl_loss_2": 2810.9089233398436,
"kl_loss_3": 2318.1740844726564,
"kl_loss_7": 980.3480072021484,
"learning_rate": 0.0009897237175829927,
"loss": 1630.2344,
"step": 740
},
{
"ce_loss_10": 3.5930413126945497,
"ce_loss_13": 3.43618665933609,
"ce_loss_2": 4.910944557189941,
"ce_loss_3": 4.628472471237183,
"ce_loss_7": 3.9170363903045655,
"epoch": 0.075,
"grad_norm": 720.0,
"kl_loss_10": 332.21988067626955,
"kl_loss_2": 2928.557727050781,
"kl_loss_3": 2429.9159301757813,
"kl_loss_7": 1037.6262634277343,
"learning_rate": 0.0009894012231073895,
"loss": 1665.4367,
"step": 750
},
{
"ce_loss_10": 3.6464996695518495,
"ce_loss_13": 3.4838218331336974,
"ce_loss_2": 4.924402260780335,
"ce_loss_3": 4.645452523231507,
"ce_loss_7": 3.9448330640792846,
"epoch": 0.076,
"grad_norm": 812.0,
"kl_loss_10": 338.6822082519531,
"kl_loss_2": 2855.4515014648437,
"kl_loss_3": 2358.476416015625,
"kl_loss_7": 978.1411010742188,
"learning_rate": 0.0009890738003669028,
"loss": 1654.1621,
"step": 760
},
{
"ce_loss_10": 3.617565965652466,
"ce_loss_13": 3.455268681049347,
"ce_loss_2": 4.933386254310608,
"ce_loss_3": 4.651405668258667,
"ce_loss_7": 3.9341206789016723,
"epoch": 0.077,
"grad_norm": 756.0,
"kl_loss_10": 337.93136138916014,
"kl_loss_2": 2949.602490234375,
"kl_loss_3": 2451.218469238281,
"kl_loss_7": 1020.4571960449218,
"learning_rate": 0.0009887414526586764,
"loss": 1640.4555,
"step": 770
},
{
"ce_loss_10": 3.6583608746528626,
"ce_loss_13": 3.512969744205475,
"ce_loss_2": 4.9441753149032595,
"ce_loss_3": 4.656214547157288,
"ce_loss_7": 3.964318811893463,
"epoch": 0.078,
"grad_norm": 720.0,
"kl_loss_10": 313.43713836669923,
"kl_loss_2": 2854.152880859375,
"kl_loss_3": 2348.5727111816404,
"kl_loss_7": 969.1142120361328,
"learning_rate": 0.0009884041833294476,
"loss": 1599.7842,
"step": 780
},
{
"ce_loss_10": 3.6560466647148133,
"ce_loss_13": 3.508514332771301,
"ce_loss_2": 4.940361285209656,
"ce_loss_3": 4.645708775520324,
"ce_loss_7": 3.958513784408569,
"epoch": 0.079,
"grad_norm": 832.0,
"kl_loss_10": 319.23270416259766,
"kl_loss_2": 2852.032861328125,
"kl_loss_3": 2330.51533203125,
"kl_loss_7": 969.9107818603516,
"learning_rate": 0.000988061995775515,
"loss": 1668.3449,
"step": 790
},
{
"ce_loss_10": 3.5980430364608766,
"ce_loss_13": 3.440366840362549,
"ce_loss_2": 4.8732929706573485,
"ce_loss_3": 4.587121820449829,
"ce_loss_7": 3.9043478846549986,
"epoch": 0.08,
"grad_norm": 752.0,
"kl_loss_10": 323.7010192871094,
"kl_loss_2": 2868.414514160156,
"kl_loss_3": 2356.581396484375,
"kl_loss_7": 987.0360656738281,
"learning_rate": 0.0009877148934427035,
"loss": 1633.2111,
"step": 800
},
{
"ce_loss_10": 3.633367455005646,
"ce_loss_13": 3.4834325551986693,
"ce_loss_2": 4.935962653160095,
"ce_loss_3": 4.627329421043396,
"ce_loss_7": 3.925890827178955,
"epoch": 0.081,
"grad_norm": 820.0,
"kl_loss_10": 330.4556167602539,
"kl_loss_2": 2885.1009033203127,
"kl_loss_3": 2351.8309020996094,
"kl_loss_7": 957.707730102539,
"learning_rate": 0.0009873628798263297,
"loss": 1611.097,
"step": 810
},
{
"ce_loss_10": 3.605324161052704,
"ce_loss_13": 3.438004171848297,
"ce_loss_2": 4.856884765625,
"ce_loss_3": 4.56407413482666,
"ce_loss_7": 3.8718148946762083,
"epoch": 0.082,
"grad_norm": 840.0,
"kl_loss_10": 339.57317504882815,
"kl_loss_2": 2826.0930053710936,
"kl_loss_3": 2305.033795166016,
"kl_loss_7": 931.82373046875,
"learning_rate": 0.0009870059584711668,
"loss": 1639.3607,
"step": 820
},
{
"ce_loss_10": 3.60188170671463,
"ce_loss_13": 3.455841100215912,
"ce_loss_2": 4.85420286655426,
"ce_loss_3": 4.581924772262573,
"ce_loss_7": 3.8951406598091127,
"epoch": 0.083,
"grad_norm": 720.0,
"kl_loss_10": 317.57149810791014,
"kl_loss_2": 2801.2140380859373,
"kl_loss_3": 2316.871270751953,
"kl_loss_7": 949.605337524414,
"learning_rate": 0.000986644132971409,
"loss": 1599.6842,
"step": 830
},
{
"ce_loss_10": 3.5939020037651064,
"ce_loss_13": 3.4429898500442504,
"ce_loss_2": 4.88135507106781,
"ce_loss_3": 4.604088640213012,
"ce_loss_7": 3.9158664107322694,
"epoch": 0.084,
"grad_norm": 932.0,
"kl_loss_10": 322.8277191162109,
"kl_loss_2": 2865.847692871094,
"kl_loss_3": 2367.4215576171873,
"kl_loss_7": 996.9576171875,
"learning_rate": 0.0009862774069706345,
"loss": 1629.1093,
"step": 840
},
{
"ce_loss_10": 3.710948944091797,
"ce_loss_13": 3.5685924649238587,
"ce_loss_2": 4.930621600151062,
"ce_loss_3": 4.65263340473175,
"ce_loss_7": 3.9990792274475098,
"epoch": 0.085,
"grad_norm": 684.0,
"kl_loss_10": 304.0562255859375,
"kl_loss_2": 2742.24169921875,
"kl_loss_3": 2253.91962890625,
"kl_loss_7": 950.4928100585937,
"learning_rate": 0.000985905784161771,
"loss": 1590.0119,
"step": 850
},
{
"ce_loss_10": 3.63605819940567,
"ce_loss_13": 3.4998138546943665,
"ce_loss_2": 4.900371265411377,
"ce_loss_3": 4.62078812122345,
"ce_loss_7": 3.934238409996033,
"epoch": 0.086,
"grad_norm": 748.0,
"kl_loss_10": 294.4667907714844,
"kl_loss_2": 2800.617395019531,
"kl_loss_3": 2314.4944458007812,
"kl_loss_7": 955.0795837402344,
"learning_rate": 0.000985529268287055,
"loss": 1585.186,
"step": 860
},
{
"ce_loss_10": 3.5651148438453673,
"ce_loss_13": 3.4233306527137755,
"ce_loss_2": 4.871410083770752,
"ce_loss_3": 4.5925886869430546,
"ce_loss_7": 3.877922761440277,
"epoch": 0.087,
"grad_norm": 796.0,
"kl_loss_10": 301.2444900512695,
"kl_loss_2": 2878.2498046875,
"kl_loss_3": 2387.6543212890624,
"kl_loss_7": 975.5103942871094,
"learning_rate": 0.0009851478631379982,
"loss": 1626.462,
"step": 870
},
{
"ce_loss_10": 3.6220229983329775,
"ce_loss_13": 3.4835654973983763,
"ce_loss_2": 4.903548383712769,
"ce_loss_3": 4.61605658531189,
"ce_loss_7": 3.9362378478050233,
"epoch": 0.088,
"grad_norm": 844.0,
"kl_loss_10": 293.3538963317871,
"kl_loss_2": 2833.7354125976562,
"kl_loss_3": 2335.5184326171875,
"kl_loss_7": 967.1238098144531,
"learning_rate": 0.0009847615725553456,
"loss": 1597.0803,
"step": 880
},
{
"ce_loss_10": 3.671082556247711,
"ce_loss_13": 3.542756676673889,
"ce_loss_2": 4.8840786695480345,
"ce_loss_3": 4.608758640289307,
"ce_loss_7": 3.9651415824890135,
"epoch": 0.089,
"grad_norm": 676.0,
"kl_loss_10": 274.7398094177246,
"kl_loss_2": 2672.2400390625,
"kl_loss_3": 2185.940838623047,
"kl_loss_7": 914.7755340576172,
"learning_rate": 0.0009843704004290394,
"loss": 1572.2007,
"step": 890
},
{
"ce_loss_10": 3.5845912218093874,
"ce_loss_13": 3.4463690519332886,
"ce_loss_2": 4.845745182037353,
"ce_loss_3": 4.566518807411194,
"ce_loss_7": 3.8977394104003906,
"epoch": 0.09,
"grad_norm": 800.0,
"kl_loss_10": 293.04640731811526,
"kl_loss_2": 2812.2204833984374,
"kl_loss_3": 2313.156042480469,
"kl_loss_7": 966.190869140625,
"learning_rate": 0.0009839743506981783,
"loss": 1597.2805,
"step": 900
},
{
"ce_loss_10": 3.5071211099624633,
"ce_loss_13": 3.369294321537018,
"ce_loss_2": 4.836311769485474,
"ce_loss_3": 4.550878620147705,
"ce_loss_7": 3.8309507608413695,
"epoch": 0.091,
"grad_norm": 716.0,
"kl_loss_10": 298.81206665039065,
"kl_loss_2": 2958.2573974609377,
"kl_loss_3": 2443.9187561035155,
"kl_loss_7": 1005.0242462158203,
"learning_rate": 0.0009835734273509786,
"loss": 1627.2797,
"step": 910
},
{
"ce_loss_10": 3.6050177574157716,
"ce_loss_13": 3.4665517807006836,
"ce_loss_2": 4.881958699226379,
"ce_loss_3": 4.6013915777206424,
"ce_loss_7": 3.9145362257957457,
"epoch": 0.092,
"grad_norm": 720.0,
"kl_loss_10": 288.0885604858398,
"kl_loss_2": 2799.756945800781,
"kl_loss_3": 2307.6742553710938,
"kl_loss_7": 959.5810729980469,
"learning_rate": 0.0009831676344247342,
"loss": 1585.5819,
"step": 920
},
{
"ce_loss_10": 3.615782046318054,
"ce_loss_13": 3.484424388408661,
"ce_loss_2": 4.840068244934082,
"ce_loss_3": 4.566077804565429,
"ce_loss_7": 3.905368459224701,
"epoch": 0.093,
"grad_norm": 592.0,
"kl_loss_10": 284.13806304931643,
"kl_loss_2": 2716.098291015625,
"kl_loss_3": 2237.568524169922,
"kl_loss_7": 925.932373046875,
"learning_rate": 0.0009827569760057755,
"loss": 1574.975,
"step": 930
},
{
"ce_loss_10": 3.5478480219841004,
"ce_loss_13": 3.4008304595947267,
"ce_loss_2": 4.878832292556763,
"ce_loss_3": 4.597835183143616,
"ce_loss_7": 3.860486149787903,
"epoch": 0.094,
"grad_norm": 812.0,
"kl_loss_10": 311.2947525024414,
"kl_loss_2": 2955.33916015625,
"kl_loss_3": 2458.781884765625,
"kl_loss_7": 985.0075500488281,
"learning_rate": 0.000982341456229428,
"loss": 1619.0104,
"step": 940
},
{
"ce_loss_10": 3.6401113510131835,
"ce_loss_13": 3.4997127175331117,
"ce_loss_2": 4.909311819076538,
"ce_loss_3": 4.633120918273926,
"ce_loss_7": 3.936661887168884,
"epoch": 0.095,
"grad_norm": 768.0,
"kl_loss_10": 304.94605484008787,
"kl_loss_2": 2847.3047485351562,
"kl_loss_3": 2358.746990966797,
"kl_loss_7": 958.3424041748046,
"learning_rate": 0.000981921079279971,
"loss": 1575.8767,
"step": 950
},
{
"ce_loss_10": 3.6493973970413207,
"ce_loss_13": 3.5170445680618285,
"ce_loss_2": 4.842743754386902,
"ce_loss_3": 4.559553527832032,
"ce_loss_7": 3.913013446331024,
"epoch": 0.096,
"grad_norm": 632.0,
"kl_loss_10": 287.80171127319335,
"kl_loss_2": 2681.031005859375,
"kl_loss_3": 2186.1464904785157,
"kl_loss_7": 891.4322113037109,
"learning_rate": 0.0009814958493905962,
"loss": 1541.8673,
"step": 960
},
{
"ce_loss_10": 3.6059035897254943,
"ce_loss_13": 3.464053213596344,
"ce_loss_2": 4.885409092903137,
"ce_loss_3": 4.605575942993164,
"ce_loss_7": 3.901495134830475,
"epoch": 0.097,
"grad_norm": 644.0,
"kl_loss_10": 302.9938400268555,
"kl_loss_2": 2842.060888671875,
"kl_loss_3": 2348.8412109375,
"kl_loss_7": 943.344677734375,
"learning_rate": 0.0009810657708433637,
"loss": 1620.3537,
"step": 970
},
{
"ce_loss_10": 3.6700100898742676,
"ce_loss_13": 3.538521420955658,
"ce_loss_2": 4.868229222297669,
"ce_loss_3": 4.590689539909363,
"ce_loss_7": 3.9474687933921815,
"epoch": 0.098,
"grad_norm": 808.0,
"kl_loss_10": 283.2241409301758,
"kl_loss_2": 2674.522265625,
"kl_loss_3": 2192.326556396484,
"kl_loss_7": 894.1458190917969,
"learning_rate": 0.0009806308479691594,
"loss": 1528.2636,
"step": 980
},
{
"ce_loss_10": 3.691223752498627,
"ce_loss_13": 3.55548814535141,
"ce_loss_2": 4.925488543510437,
"ce_loss_3": 4.648779034614563,
"ce_loss_7": 3.9924039959907534,
"epoch": 0.099,
"grad_norm": 740.0,
"kl_loss_10": 294.3150146484375,
"kl_loss_2": 2748.0041381835936,
"kl_loss_3": 2268.979638671875,
"kl_loss_7": 946.8526397705078,
"learning_rate": 0.0009801910851476522,
"loss": 1554.0744,
"step": 990
},
{
"ce_loss_10": 3.6008501768112184,
"ce_loss_13": 3.465990114212036,
"ce_loss_2": 4.890150642395019,
"ce_loss_3": 4.609904193878174,
"ce_loss_7": 3.9068346500396727,
"epoch": 0.1,
"grad_norm": 736.0,
"kl_loss_10": 294.7660331726074,
"kl_loss_2": 2875.2068603515627,
"kl_loss_3": 2379.5891052246093,
"kl_loss_7": 970.1351318359375,
"learning_rate": 0.0009797464868072487,
"loss": 1582.4648,
"step": 1000
},
{
"ce_loss_10": 3.5892885446548464,
"ce_loss_13": 3.454503262042999,
"ce_loss_2": 4.837452292442322,
"ce_loss_3": 4.55982882976532,
"ce_loss_7": 3.887318527698517,
"epoch": 0.101,
"grad_norm": 724.0,
"kl_loss_10": 288.82502670288085,
"kl_loss_2": 2762.65830078125,
"kl_loss_3": 2282.756170654297,
"kl_loss_7": 944.8302276611328,
"learning_rate": 0.0009792970574250492,
"loss": 1564.9662,
"step": 1010
},
{
"ce_loss_10": 3.6221608400344847,
"ce_loss_13": 3.482994794845581,
"ce_loss_2": 4.848793458938599,
"ce_loss_3": 4.575083756446839,
"ce_loss_7": 3.914657413959503,
"epoch": 0.102,
"grad_norm": 612.0,
"kl_loss_10": 290.8812942504883,
"kl_loss_2": 2743.8400146484373,
"kl_loss_3": 2261.9089599609374,
"kl_loss_7": 937.5250091552734,
"learning_rate": 0.0009788428015268028,
"loss": 1536.8119,
"step": 1020
},
{
"ce_loss_10": 3.6110181331634523,
"ce_loss_13": 3.47798638343811,
"ce_loss_2": 4.840990829467773,
"ce_loss_3": 4.55189163684845,
"ce_loss_7": 3.9010056853294373,
"epoch": 0.103,
"grad_norm": 616.0,
"kl_loss_10": 281.37939529418946,
"kl_loss_2": 2739.4623291015623,
"kl_loss_3": 2238.093048095703,
"kl_loss_7": 923.4858306884765,
"learning_rate": 0.0009783837236868609,
"loss": 1534.7721,
"step": 1030
},
{
"ce_loss_10": 3.5802615523338317,
"ce_loss_13": 3.4459127306938173,
"ce_loss_2": 4.818247056007385,
"ce_loss_3": 4.546270060539245,
"ce_loss_7": 3.8740112662315367,
"epoch": 0.104,
"grad_norm": 696.0,
"kl_loss_10": 281.4418014526367,
"kl_loss_2": 2719.910290527344,
"kl_loss_3": 2248.530157470703,
"kl_loss_7": 921.926953125,
"learning_rate": 0.0009779198285281327,
"loss": 1537.119,
"step": 1040
},
{
"ce_loss_10": 3.577412283420563,
"ce_loss_13": 3.4400023460388183,
"ce_loss_2": 4.825755000114441,
"ce_loss_3": 4.554906344413757,
"ce_loss_7": 3.8695693135261537,
"epoch": 0.105,
"grad_norm": 784.0,
"kl_loss_10": 293.84764709472654,
"kl_loss_2": 2770.2111328125,
"kl_loss_3": 2280.982073974609,
"kl_loss_7": 916.6518432617188,
"learning_rate": 0.0009774511207220368,
"loss": 1562.095,
"step": 1050
},
{
"ce_loss_10": 3.621231746673584,
"ce_loss_13": 3.4823400259017943,
"ce_loss_2": 4.867471241950989,
"ce_loss_3": 4.584862947463989,
"ce_loss_7": 3.895237350463867,
"epoch": 0.106,
"grad_norm": 588.0,
"kl_loss_10": 306.07321014404295,
"kl_loss_2": 2785.361218261719,
"kl_loss_3": 2286.776574707031,
"kl_loss_7": 918.4756744384765,
"learning_rate": 0.0009769776049884564,
"loss": 1554.5619,
"step": 1060
},
{
"ce_loss_10": 3.5330086588859557,
"ce_loss_13": 3.387469935417175,
"ce_loss_2": 4.804182314872742,
"ce_loss_3": 4.539949297904968,
"ce_loss_7": 3.8264609456062315,
"epoch": 0.107,
"grad_norm": 1184.0,
"kl_loss_10": 307.66697082519534,
"kl_loss_2": 2836.2517578125,
"kl_loss_3": 2373.5376220703124,
"kl_loss_7": 943.6192169189453,
"learning_rate": 0.0009764992860956889,
"loss": 1622.7785,
"step": 1070
},
{
"ce_loss_10": 3.677293050289154,
"ce_loss_13": 3.5469510316848756,
"ce_loss_2": 4.837077927589417,
"ce_loss_3": 4.588571333885193,
"ce_loss_7": 3.9465363740921022,
"epoch": 0.108,
"grad_norm": 816.0,
"kl_loss_10": 286.8066802978516,
"kl_loss_2": 2605.4248657226562,
"kl_loss_3": 2175.9279296875,
"kl_loss_7": 899.353060913086,
"learning_rate": 0.0009760161688604008,
"loss": 1520.9383,
"step": 1080
},
{
"ce_loss_10": 3.6768419981002807,
"ce_loss_13": 3.54748477935791,
"ce_loss_2": 4.881722617149353,
"ce_loss_3": 4.620517659187317,
"ce_loss_7": 3.9953080892562864,
"epoch": 0.109,
"grad_norm": 840.0,
"kl_loss_10": 283.82303619384766,
"kl_loss_2": 2660.0453125,
"kl_loss_3": 2210.3756591796873,
"kl_loss_7": 954.3282287597656,
"learning_rate": 0.0009755282581475768,
"loss": 1552.3523,
"step": 1090
},
{
"ce_loss_10": 3.742873156070709,
"ce_loss_13": 3.60170716047287,
"ce_loss_2": 4.9219562292099,
"ce_loss_3": 4.631097722053528,
"ce_loss_7": 4.016275346279144,
"epoch": 0.11,
"grad_norm": 792.0,
"kl_loss_10": 295.95645599365236,
"kl_loss_2": 2660.5046997070312,
"kl_loss_3": 2150.144982910156,
"kl_loss_7": 938.1217224121094,
"learning_rate": 0.0009750355588704727,
"loss": 1496.9391,
"step": 1100
},
{
"ce_loss_10": 3.5732216477394103,
"ce_loss_13": 3.427997899055481,
"ce_loss_2": 4.788290286064148,
"ce_loss_3": 4.501250839233398,
"ce_loss_7": 3.849083948135376,
"epoch": 0.111,
"grad_norm": 644.0,
"kl_loss_10": 301.9219177246094,
"kl_loss_2": 2692.5292846679686,
"kl_loss_3": 2192.219659423828,
"kl_loss_7": 902.1104858398437,
"learning_rate": 0.0009745380759905647,
"loss": 1547.9881,
"step": 1110
},
{
"ce_loss_10": 3.525436317920685,
"ce_loss_13": 3.388839864730835,
"ce_loss_2": 4.766349339485169,
"ce_loss_3": 4.478921818733215,
"ce_loss_7": 3.8117297768592833,
"epoch": 0.112,
"grad_norm": 636.0,
"kl_loss_10": 288.7658378601074,
"kl_loss_2": 2767.7005126953127,
"kl_loss_3": 2266.3693115234373,
"kl_loss_7": 916.3693817138671,
"learning_rate": 0.0009740358145174998,
"loss": 1582.2694,
"step": 1120
},
{
"ce_loss_10": 3.674707901477814,
"ce_loss_13": 3.541641688346863,
"ce_loss_2": 4.839509201049805,
"ce_loss_3": 4.554335117340088,
"ce_loss_7": 3.9309728384017943,
"epoch": 0.113,
"grad_norm": 740.0,
"kl_loss_10": 293.9353363037109,
"kl_loss_2": 2627.9318603515626,
"kl_loss_3": 2118.4319458007812,
"kl_loss_7": 883.5943176269532,
"learning_rate": 0.0009735287795090455,
"loss": 1505.1257,
"step": 1130
},
{
"ce_loss_10": 3.5646776437759398,
"ce_loss_13": 3.4284933686256407,
"ce_loss_2": 4.8010115146636965,
"ce_loss_3": 4.510753107070923,
"ce_loss_7": 3.839044988155365,
"epoch": 0.114,
"grad_norm": 692.0,
"kl_loss_10": 289.08748931884764,
"kl_loss_2": 2724.9734130859374,
"kl_loss_3": 2216.0496459960937,
"kl_loss_7": 891.9239013671875,
"learning_rate": 0.0009730169760710386,
"loss": 1526.1704,
"step": 1140
},
{
"ce_loss_10": 3.647395300865173,
"ce_loss_13": 3.51713547706604,
"ce_loss_2": 4.854923152923584,
"ce_loss_3": 4.577234363555908,
"ce_loss_7": 3.928617572784424,
"epoch": 0.115,
"grad_norm": 800.0,
"kl_loss_10": 280.8671928405762,
"kl_loss_2": 2669.748742675781,
"kl_loss_3": 2182.849468994141,
"kl_loss_7": 895.4142913818359,
"learning_rate": 0.0009725004093573342,
"loss": 1526.191,
"step": 1150
},
{
"ce_loss_10": 3.5862129092216493,
"ce_loss_13": 3.4506229400634765,
"ce_loss_2": 4.798703122138977,
"ce_loss_3": 4.520225930213928,
"ce_loss_7": 3.877876043319702,
"epoch": 0.116,
"grad_norm": 840.0,
"kl_loss_10": 283.1919075012207,
"kl_loss_2": 2672.7715087890624,
"kl_loss_3": 2193.748876953125,
"kl_loss_7": 903.5404602050781,
"learning_rate": 0.0009719790845697534,
"loss": 1504.2701,
"step": 1160
},
{
"ce_loss_10": 3.5309566259384155,
"ce_loss_13": 3.4061906576156615,
"ce_loss_2": 4.704360723495483,
"ce_loss_3": 4.450148797035217,
"ce_loss_7": 3.803053593635559,
"epoch": 0.117,
"grad_norm": 696.0,
"kl_loss_10": 271.77204208374025,
"kl_loss_2": 2620.2907592773436,
"kl_loss_3": 2176.4858520507814,
"kl_loss_7": 863.540869140625,
"learning_rate": 0.0009714530069580309,
"loss": 1485.2044,
"step": 1170
},
{
"ce_loss_10": 3.640796720981598,
"ce_loss_13": 3.507249903678894,
"ce_loss_2": 4.853176116943359,
"ce_loss_3": 4.5715264797210695,
"ce_loss_7": 3.914932680130005,
"epoch": 0.118,
"grad_norm": 716.0,
"kl_loss_10": 285.63293685913084,
"kl_loss_2": 2675.1877319335936,
"kl_loss_3": 2189.7057678222654,
"kl_loss_7": 884.98447265625,
"learning_rate": 0.0009709221818197624,
"loss": 1502.0164,
"step": 1180
},
{
"ce_loss_10": 3.6675962805747986,
"ce_loss_13": 3.534939968585968,
"ce_loss_2": 4.88215401172638,
"ce_loss_3": 4.607950353622437,
"ce_loss_7": 3.9379210352897642,
"epoch": 0.119,
"grad_norm": 596.0,
"kl_loss_10": 288.61556854248045,
"kl_loss_2": 2711.667822265625,
"kl_loss_3": 2227.545977783203,
"kl_loss_7": 887.7295013427735,
"learning_rate": 0.0009703866145003512,
"loss": 1525.4232,
"step": 1190
},
{
"ce_loss_10": 3.6349379420280457,
"ce_loss_13": 3.5029913663864134,
"ce_loss_2": 4.829423713684082,
"ce_loss_3": 4.558488368988037,
"ce_loss_7": 3.908590841293335,
"epoch": 0.12,
"grad_norm": 660.0,
"kl_loss_10": 279.50138397216796,
"kl_loss_2": 2676.350244140625,
"kl_loss_3": 2191.2725830078125,
"kl_loss_7": 882.7566497802734,
"learning_rate": 0.0009698463103929542,
"loss": 1529.4317,
"step": 1200
},
{
"ce_loss_10": 3.605515944957733,
"ce_loss_13": 3.472998011112213,
"ce_loss_2": 4.827000212669373,
"ce_loss_3": 4.540698933601379,
"ce_loss_7": 3.879436028003693,
"epoch": 0.121,
"grad_norm": 652.0,
"kl_loss_10": 281.2242576599121,
"kl_loss_2": 2695.142529296875,
"kl_loss_3": 2191.8710388183595,
"kl_loss_7": 882.5638031005859,
"learning_rate": 0.0009693012749384279,
"loss": 1527.1828,
"step": 1210
},
{
"ce_loss_10": 3.617890453338623,
"ce_loss_13": 3.4903839349746706,
"ce_loss_2": 4.823957228660584,
"ce_loss_3": 4.546852803230285,
"ce_loss_7": 3.8918931126594543,
"epoch": 0.122,
"grad_norm": 596.0,
"kl_loss_10": 274.6055084228516,
"kl_loss_2": 2677.51435546875,
"kl_loss_3": 2182.2475463867186,
"kl_loss_7": 884.2765747070313,
"learning_rate": 0.0009687515136252732,
"loss": 1502.8832,
"step": 1220
},
{
"ce_loss_10": 3.571158289909363,
"ce_loss_13": 3.4428164839744566,
"ce_loss_2": 4.832195687294006,
"ce_loss_3": 4.558122348785401,
"ce_loss_7": 3.866991031169891,
"epoch": 0.123,
"grad_norm": 656.0,
"kl_loss_10": 285.63698654174806,
"kl_loss_2": 2814.325549316406,
"kl_loss_3": 2321.4359924316404,
"kl_loss_7": 924.2180969238282,
"learning_rate": 0.0009681970319895803,
"loss": 1610.0467,
"step": 1230
},
{
"ce_loss_10": 3.6617783904075623,
"ce_loss_13": 3.5239094376564024,
"ce_loss_2": 4.840570569038391,
"ce_loss_3": 4.5621686458587645,
"ce_loss_7": 3.9261529445648193,
"epoch": 0.124,
"grad_norm": 660.0,
"kl_loss_10": 282.9206481933594,
"kl_loss_2": 2658.744873046875,
"kl_loss_3": 2162.91650390625,
"kl_loss_7": 894.8567260742187,
"learning_rate": 0.0009676378356149733,
"loss": 1510.0703,
"step": 1240
},
{
"ce_loss_10": 3.632222390174866,
"ce_loss_13": 3.49722797870636,
"ce_loss_2": 4.803181719779968,
"ce_loss_3": 4.527125644683838,
"ce_loss_7": 3.893145501613617,
"epoch": 0.125,
"grad_norm": 676.0,
"kl_loss_10": 306.93408966064453,
"kl_loss_2": 2618.3517578125,
"kl_loss_3": 2135.0671936035155,
"kl_loss_7": 870.7611785888672,
"learning_rate": 0.0009670739301325534,
"loss": 1495.915,
"step": 1250
},
{
"ce_loss_10": 3.5965846180915833,
"ce_loss_13": 3.461331534385681,
"ce_loss_2": 4.77229871749878,
"ce_loss_3": 4.488967990875244,
"ce_loss_7": 3.870732378959656,
"epoch": 0.126,
"grad_norm": 824.0,
"kl_loss_10": 288.87402572631834,
"kl_loss_2": 2631.6656005859377,
"kl_loss_3": 2132.2338745117186,
"kl_loss_7": 890.5492980957031,
"learning_rate": 0.0009665053212208426,
"loss": 1507.3391,
"step": 1260
},
{
"ce_loss_10": 3.6325414419174193,
"ce_loss_13": 3.5006507635116577,
"ce_loss_2": 4.82985291481018,
"ce_loss_3": 4.53967547416687,
"ce_loss_7": 3.907087206840515,
"epoch": 0.127,
"grad_norm": 824.0,
"kl_loss_10": 289.66627197265626,
"kl_loss_2": 2682.7635498046875,
"kl_loss_3": 2171.759143066406,
"kl_loss_7": 897.6279174804688,
"learning_rate": 0.0009659320146057262,
"loss": 1515.1299,
"step": 1270
},
{
"ce_loss_10": 3.6294240951538086,
"ce_loss_13": 3.5012729167938232,
"ce_loss_2": 4.802068519592285,
"ce_loss_3": 4.516877055168152,
"ce_loss_7": 3.912596344947815,
"epoch": 0.128,
"grad_norm": 1040.0,
"kl_loss_10": 274.3899444580078,
"kl_loss_2": 2616.2175048828126,
"kl_loss_3": 2113.874139404297,
"kl_loss_7": 894.8648956298828,
"learning_rate": 0.0009653540160603955,
"loss": 1485.5743,
"step": 1280
},
{
"ce_loss_10": 3.631951367855072,
"ce_loss_13": 3.5082743883132936,
"ce_loss_2": 4.7942791938781735,
"ce_loss_3": 4.533441662788391,
"ce_loss_7": 3.911020016670227,
"epoch": 0.129,
"grad_norm": 980.0,
"kl_loss_10": 277.70714950561523,
"kl_loss_2": 2607.6315795898436,
"kl_loss_3": 2154.8384338378905,
"kl_loss_7": 902.8254302978515,
"learning_rate": 0.0009647713314052896,
"loss": 1475.7309,
"step": 1290
},
{
"ce_loss_10": 3.5910762190818786,
"ce_loss_13": 3.4583710193634034,
"ce_loss_2": 4.806964182853699,
"ce_loss_3": 4.536605000495911,
"ce_loss_7": 3.892735993862152,
"epoch": 0.13,
"grad_norm": 1032.0,
"kl_loss_10": 281.282731628418,
"kl_loss_2": 2713.5140380859375,
"kl_loss_3": 2245.954937744141,
"kl_loss_7": 924.882958984375,
"learning_rate": 0.0009641839665080363,
"loss": 1529.1627,
"step": 1300
},
{
"ce_loss_10": 3.5369811177253725,
"ce_loss_13": 3.4184723615646364,
"ce_loss_2": 4.746987700462341,
"ce_loss_3": 4.47196786403656,
"ce_loss_7": 3.8142111539840697,
"epoch": 0.131,
"grad_norm": 708.0,
"kl_loss_10": 267.0766883850098,
"kl_loss_2": 2651.264123535156,
"kl_loss_3": 2169.8530151367186,
"kl_loss_7": 874.0395812988281,
"learning_rate": 0.0009635919272833937,
"loss": 1472.4912,
"step": 1310
},
{
"ce_loss_10": 3.582905340194702,
"ce_loss_13": 3.4547547817230226,
"ce_loss_2": 4.782030344009399,
"ce_loss_3": 4.50511953830719,
"ce_loss_7": 3.8575597286224363,
"epoch": 0.132,
"grad_norm": 640.0,
"kl_loss_10": 274.49700088500975,
"kl_loss_2": 2645.7089721679686,
"kl_loss_3": 2148.3076110839843,
"kl_loss_7": 865.2912628173829,
"learning_rate": 0.0009629952196931902,
"loss": 1461.5725,
"step": 1320
},
{
"ce_loss_10": 3.560918188095093,
"ce_loss_13": 3.4357552766799926,
"ce_loss_2": 4.777603983879089,
"ce_loss_3": 4.497129726409912,
"ce_loss_7": 3.8250754475593567,
"epoch": 0.133,
"grad_norm": 612.0,
"kl_loss_10": 266.5273551940918,
"kl_loss_2": 2692.33935546875,
"kl_loss_3": 2197.4263916015625,
"kl_loss_7": 846.3100128173828,
"learning_rate": 0.0009623938497462645,
"loss": 1482.4779,
"step": 1330
},
{
"ce_loss_10": 3.559932196140289,
"ce_loss_13": 3.4353162169456484,
"ce_loss_2": 4.754807543754578,
"ce_loss_3": 4.478498530387879,
"ce_loss_7": 3.8313623666763306,
"epoch": 0.134,
"grad_norm": 564.0,
"kl_loss_10": 268.2800895690918,
"kl_loss_2": 2653.6271240234373,
"kl_loss_3": 2162.7194641113283,
"kl_loss_7": 859.2419372558594,
"learning_rate": 0.0009617878234984055,
"loss": 1499.2066,
"step": 1340
},
{
"ce_loss_10": 3.651080513000488,
"ce_loss_13": 3.533881187438965,
"ce_loss_2": 4.8088576078414915,
"ce_loss_3": 4.535065650939941,
"ce_loss_7": 3.9042758703231812,
"epoch": 0.135,
"grad_norm": 712.0,
"kl_loss_10": 256.59825744628904,
"kl_loss_2": 2581.625207519531,
"kl_loss_3": 2098.4682495117186,
"kl_loss_7": 828.9938018798828,
"learning_rate": 0.0009611771470522907,
"loss": 1464.5767,
"step": 1350
},
{
"ce_loss_10": 3.5779558777809144,
"ce_loss_13": 3.457493555545807,
"ce_loss_2": 4.792022109031677,
"ce_loss_3": 4.514930057525635,
"ce_loss_7": 3.8448525190353395,
"epoch": 0.136,
"grad_norm": 616.0,
"kl_loss_10": 259.41123428344724,
"kl_loss_2": 2657.6331420898437,
"kl_loss_3": 2171.1466857910154,
"kl_loss_7": 847.0537750244141,
"learning_rate": 0.0009605618265574251,
"loss": 1459.6229,
"step": 1360
},
{
"ce_loss_10": 3.5429495334625245,
"ce_loss_13": 3.4162652492523193,
"ce_loss_2": 4.794952082633972,
"ce_loss_3": 4.535301685333252,
"ce_loss_7": 3.8165592908859254,
"epoch": 0.137,
"grad_norm": 620.0,
"kl_loss_10": 271.0598449707031,
"kl_loss_2": 2776.145849609375,
"kl_loss_3": 2325.675885009766,
"kl_loss_7": 881.587744140625,
"learning_rate": 0.0009599418682100792,
"loss": 1522.4414,
"step": 1370
},
{
"ce_loss_10": 3.58179566860199,
"ce_loss_13": 3.459395945072174,
"ce_loss_2": 4.792193937301636,
"ce_loss_3": 4.521099305152893,
"ce_loss_7": 3.84169602394104,
"epoch": 0.138,
"grad_norm": 724.0,
"kl_loss_10": 257.83258666992185,
"kl_loss_2": 2672.4068237304687,
"kl_loss_3": 2198.559918212891,
"kl_loss_7": 850.8091857910156,
"learning_rate": 0.0009593172782532268,
"loss": 1496.2724,
"step": 1380
},
{
"ce_loss_10": 3.622367191314697,
"ce_loss_13": 3.506042146682739,
"ce_loss_2": 4.801430583000183,
"ce_loss_3": 4.530508184432984,
"ce_loss_7": 3.888216722011566,
"epoch": 0.139,
"grad_norm": 672.0,
"kl_loss_10": 260.9531532287598,
"kl_loss_2": 2599.6354858398436,
"kl_loss_3": 2121.937152099609,
"kl_loss_7": 852.8548278808594,
"learning_rate": 0.0009586880629764817,
"loss": 1464.8023,
"step": 1390
},
{
"ce_loss_10": 3.546726655960083,
"ce_loss_13": 3.428490459918976,
"ce_loss_2": 4.748290467262268,
"ce_loss_3": 4.471861267089844,
"ce_loss_7": 3.824984240531921,
"epoch": 0.14,
"grad_norm": 620.0,
"kl_loss_10": 260.18620986938475,
"kl_loss_2": 2649.2240234375,
"kl_loss_3": 2164.870428466797,
"kl_loss_7": 870.0703582763672,
"learning_rate": 0.0009580542287160348,
"loss": 1462.9275,
"step": 1400
},
{
"ce_loss_10": 3.5134201645851135,
"ce_loss_13": 3.396924638748169,
"ce_loss_2": 4.727832221984864,
"ce_loss_3": 4.457144689559937,
"ce_loss_7": 3.781324291229248,
"epoch": 0.141,
"grad_norm": 724.0,
"kl_loss_10": 257.8106407165527,
"kl_loss_2": 2672.565283203125,
"kl_loss_3": 2194.398052978516,
"kl_loss_7": 841.9467041015625,
"learning_rate": 0.0009574157818545901,
"loss": 1469.0121,
"step": 1410
},
{
"ce_loss_10": 3.583372378349304,
"ce_loss_13": 3.4670314311981203,
"ce_loss_2": 4.753075981140137,
"ce_loss_3": 4.488786149024963,
"ce_loss_7": 3.8414045095443727,
"epoch": 0.142,
"grad_norm": 768.0,
"kl_loss_10": 250.4652572631836,
"kl_loss_2": 2575.260546875,
"kl_loss_3": 2109.250030517578,
"kl_loss_7": 815.4136535644532,
"learning_rate": 0.0009567727288213005,
"loss": 1470.4241,
"step": 1420
},
{
"ce_loss_10": 3.5615610837936402,
"ce_loss_13": 3.4428680539131165,
"ce_loss_2": 4.766120481491089,
"ce_loss_3": 4.489290237426758,
"ce_loss_7": 3.8387726664543154,
"epoch": 0.143,
"grad_norm": 680.0,
"kl_loss_10": 259.5032684326172,
"kl_loss_2": 2652.6231079101562,
"kl_loss_3": 2168.8318054199217,
"kl_loss_7": 872.5292297363281,
"learning_rate": 0.0009561250760917027,
"loss": 1465.2545,
"step": 1430
},
{
"ce_loss_10": 3.5825438022613527,
"ce_loss_13": 3.4635141372680662,
"ce_loss_2": 4.774414443969727,
"ce_loss_3": 4.498082184791565,
"ce_loss_7": 3.8522005438804627,
"epoch": 0.144,
"grad_norm": 656.0,
"kl_loss_10": 263.3311599731445,
"kl_loss_2": 2662.4484375,
"kl_loss_3": 2176.186492919922,
"kl_loss_7": 865.9247039794922,
"learning_rate": 0.0009554728301876525,
"loss": 1454.278,
"step": 1440
},
{
"ce_loss_10": 3.6376792669296263,
"ce_loss_13": 3.515091061592102,
"ce_loss_2": 4.810996460914612,
"ce_loss_3": 4.536413979530335,
"ce_loss_7": 3.9078781604766846,
"epoch": 0.145,
"grad_norm": 616.0,
"kl_loss_10": 259.68054962158203,
"kl_loss_2": 2600.8175415039063,
"kl_loss_3": 2120.5454040527343,
"kl_loss_7": 864.2900634765625,
"learning_rate": 0.0009548159976772592,
"loss": 1508.1567,
"step": 1450
},
{
"ce_loss_10": 3.5796504259109496,
"ce_loss_13": 3.456580376625061,
"ce_loss_2": 4.787333536148071,
"ce_loss_3": 4.520044946670533,
"ce_loss_7": 3.8587978959083555,
"epoch": 0.146,
"grad_norm": 624.0,
"kl_loss_10": 265.1648490905762,
"kl_loss_2": 2666.8885864257813,
"kl_loss_3": 2195.818231201172,
"kl_loss_7": 871.2362884521484,
"learning_rate": 0.0009541545851748186,
"loss": 1477.8201,
"step": 1460
},
{
"ce_loss_10": 3.4508144855499268,
"ce_loss_13": 3.3300524830818174,
"ce_loss_2": 4.699088287353516,
"ce_loss_3": 4.421405148506165,
"ce_loss_7": 3.735712671279907,
"epoch": 0.147,
"grad_norm": 844.0,
"kl_loss_10": 262.5924041748047,
"kl_loss_2": 2730.21630859375,
"kl_loss_3": 2243.504345703125,
"kl_loss_7": 878.0860382080078,
"learning_rate": 0.0009534885993407473,
"loss": 1496.8188,
"step": 1470
},
{
"ce_loss_10": 3.611809027194977,
"ce_loss_13": 3.4930022716522218,
"ce_loss_2": 4.806360912322998,
"ce_loss_3": 4.5402860879898075,
"ce_loss_7": 3.8858142852783204,
"epoch": 0.148,
"grad_norm": 740.0,
"kl_loss_10": 256.4318244934082,
"kl_loss_2": 2655.588269042969,
"kl_loss_3": 2175.6559936523436,
"kl_loss_7": 861.8565673828125,
"learning_rate": 0.0009528180468815154,
"loss": 1488.9336,
"step": 1480
},
{
"ce_loss_10": 3.6558565139770507,
"ce_loss_13": 3.538043713569641,
"ce_loss_2": 4.811466526985169,
"ce_loss_3": 4.544855618476868,
"ce_loss_7": 3.9390755891799927,
"epoch": 0.149,
"grad_norm": 844.0,
"kl_loss_10": 264.13821868896486,
"kl_loss_2": 2565.1232788085936,
"kl_loss_3": 2095.556463623047,
"kl_loss_7": 897.2646911621093,
"learning_rate": 0.0009521429345495787,
"loss": 1465.2869,
"step": 1490
},
{
"ce_loss_10": 3.646085023880005,
"ce_loss_13": 3.5196659207344054,
"ce_loss_2": 4.780038499832154,
"ce_loss_3": 4.50464768409729,
"ce_loss_7": 3.927055561542511,
"epoch": 0.15,
"grad_norm": 980.0,
"kl_loss_10": 266.5307144165039,
"kl_loss_2": 2540.4637084960937,
"kl_loss_3": 2068.8232849121096,
"kl_loss_7": 888.35068359375,
"learning_rate": 0.0009514632691433108,
"loss": 1455.9041,
"step": 1500
},
{
"ce_loss_10": 3.5988011956214905,
"ce_loss_13": 3.482589673995972,
"ce_loss_2": 4.76681923866272,
"ce_loss_3": 4.485762524604797,
"ce_loss_7": 3.8728180885314942,
"epoch": 0.151,
"grad_norm": 600.0,
"kl_loss_10": 260.8206298828125,
"kl_loss_2": 2587.971142578125,
"kl_loss_3": 2094.4052795410157,
"kl_loss_7": 863.3080963134765,
"learning_rate": 0.0009507790575069346,
"loss": 1457.9502,
"step": 1510
},
{
"ce_loss_10": 3.5764056205749513,
"ce_loss_13": 3.453061044216156,
"ce_loss_2": 4.775901889801025,
"ce_loss_3": 4.500339031219482,
"ce_loss_7": 3.849775242805481,
"epoch": 0.152,
"grad_norm": 672.0,
"kl_loss_10": 258.1785354614258,
"kl_loss_2": 2655.3977172851564,
"kl_loss_3": 2164.0363708496093,
"kl_loss_7": 857.1902496337891,
"learning_rate": 0.0009500903065304539,
"loss": 1495.6711,
"step": 1520
},
{
"ce_loss_10": 3.608113396167755,
"ce_loss_13": 3.498811888694763,
"ce_loss_2": 4.760950970649719,
"ce_loss_3": 4.486514663696289,
"ce_loss_7": 3.8602269887924194,
"epoch": 0.153,
"grad_norm": 664.0,
"kl_loss_10": 245.0189353942871,
"kl_loss_2": 2552.576379394531,
"kl_loss_3": 2060.3522766113283,
"kl_loss_7": 806.0807342529297,
"learning_rate": 0.0009493970231495835,
"loss": 1444.8406,
"step": 1530
},
{
"ce_loss_10": 3.547162938117981,
"ce_loss_13": 3.44165985584259,
"ce_loss_2": 4.701804065704346,
"ce_loss_3": 4.424288666248321,
"ce_loss_7": 3.8007919073104857,
"epoch": 0.154,
"grad_norm": 648.0,
"kl_loss_10": 241.08162002563478,
"kl_loss_2": 2573.9149780273438,
"kl_loss_3": 2088.7893615722655,
"kl_loss_7": 812.2064361572266,
"learning_rate": 0.0009486992143456792,
"loss": 1427.6314,
"step": 1540
},
{
"ce_loss_10": 3.5828447937965393,
"ce_loss_13": 3.4581031084060667,
"ce_loss_2": 4.834084248542785,
"ce_loss_3": 4.553447818756103,
"ce_loss_7": 3.8656217455863953,
"epoch": 0.155,
"grad_norm": 660.0,
"kl_loss_10": 263.4285690307617,
"kl_loss_2": 2765.8754150390623,
"kl_loss_3": 2266.0636291503906,
"kl_loss_7": 882.9553771972656,
"learning_rate": 0.0009479968871456679,
"loss": 1498.7352,
"step": 1550
},
{
"ce_loss_10": 3.547222447395325,
"ce_loss_13": 3.4320276618003844,
"ce_loss_2": 4.768963408470154,
"ce_loss_3": 4.480878567695617,
"ce_loss_7": 3.828988194465637,
"epoch": 0.156,
"grad_norm": 760.0,
"kl_loss_10": 259.2473831176758,
"kl_loss_2": 2697.095703125,
"kl_loss_3": 2199.6418579101564,
"kl_loss_7": 874.4399932861328,
"learning_rate": 0.0009472900486219768,
"loss": 1467.8941,
"step": 1560
},
{
"ce_loss_10": 3.54234699010849,
"ce_loss_13": 3.416030561923981,
"ce_loss_2": 4.702804708480835,
"ce_loss_3": 4.435993790626526,
"ce_loss_7": 3.811834490299225,
"epoch": 0.157,
"grad_norm": 996.0,
"kl_loss_10": 266.18832244873045,
"kl_loss_2": 2600.0114868164064,
"kl_loss_3": 2127.867303466797,
"kl_loss_7": 872.285043334961,
"learning_rate": 0.000946578705892462,
"loss": 1470.9803,
"step": 1570
},
{
"ce_loss_10": 3.5780028820037844,
"ce_loss_13": 3.457060468196869,
"ce_loss_2": 4.7225889444351195,
"ce_loss_3": 4.4475972890853885,
"ce_loss_7": 3.8437977194786073,
"epoch": 0.158,
"grad_norm": 804.0,
"kl_loss_10": 277.74141387939454,
"kl_loss_2": 2544.743029785156,
"kl_loss_3": 2066.6833618164064,
"kl_loss_7": 835.531689453125,
"learning_rate": 0.0009458628661203367,
"loss": 1460.073,
"step": 1580
},
{
"ce_loss_10": 3.5895689606666563,
"ce_loss_13": 3.4543488025665283,
"ce_loss_2": 4.777545094490051,
"ce_loss_3": 4.501162362098694,
"ce_loss_7": 3.846569240093231,
"epoch": 0.159,
"grad_norm": 748.0,
"kl_loss_10": 280.14837341308595,
"kl_loss_2": 2651.9770629882814,
"kl_loss_3": 2168.784558105469,
"kl_loss_7": 873.0370697021484,
"learning_rate": 0.0009451425365140996,
"loss": 1445.3969,
"step": 1590
},
{
"ce_loss_10": 3.6579004883766175,
"ce_loss_13": 3.5379099249839783,
"ce_loss_2": 4.773951435089112,
"ce_loss_3": 4.50466411113739,
"ce_loss_7": 3.922029638290405,
"epoch": 0.16,
"grad_norm": 728.0,
"kl_loss_10": 273.34312896728517,
"kl_loss_2": 2508.3281860351562,
"kl_loss_3": 2021.1453735351563,
"kl_loss_7": 841.1831604003906,
"learning_rate": 0.0009444177243274617,
"loss": 1408.8492,
"step": 1600
},
{
"ce_loss_10": 3.514503800868988,
"ce_loss_13": 3.388037991523743,
"ce_loss_2": 4.701039886474609,
"ce_loss_3": 4.418904185295105,
"ce_loss_7": 3.7763099312782287,
"epoch": 0.161,
"grad_norm": 704.0,
"kl_loss_10": 268.4227348327637,
"kl_loss_2": 2642.4529418945312,
"kl_loss_3": 2156.727893066406,
"kl_loss_7": 856.544287109375,
"learning_rate": 0.0009436884368592739,
"loss": 1462.7545,
"step": 1610
},
{
"ce_loss_10": 3.55902304649353,
"ce_loss_13": 3.441978645324707,
"ce_loss_2": 4.705282998085022,
"ce_loss_3": 4.427343964576721,
"ce_loss_7": 3.810055124759674,
"epoch": 0.162,
"grad_norm": 692.0,
"kl_loss_10": 253.71325302124023,
"kl_loss_2": 2545.9316528320314,
"kl_loss_3": 2055.34326171875,
"kl_loss_7": 814.7054443359375,
"learning_rate": 0.0009429546814534529,
"loss": 1452.6556,
"step": 1620
},
{
"ce_loss_10": 3.567894661426544,
"ce_loss_13": 3.4576117157936097,
"ce_loss_2": 4.725762176513672,
"ce_loss_3": 4.453288149833679,
"ce_loss_7": 3.8241241455078123,
"epoch": 0.163,
"grad_norm": 600.0,
"kl_loss_10": 249.5528419494629,
"kl_loss_2": 2561.039794921875,
"kl_loss_3": 2084.6285034179687,
"kl_loss_7": 811.9796569824218,
"learning_rate": 0.0009422164654989072,
"loss": 1405.3155,
"step": 1630
},
{
"ce_loss_10": 3.682257628440857,
"ce_loss_13": 3.57005797624588,
"ce_loss_2": 4.807721471786499,
"ce_loss_3": 4.5406172513961796,
"ce_loss_7": 3.9288353323936462,
"epoch": 0.164,
"grad_norm": 632.0,
"kl_loss_10": 249.95079803466797,
"kl_loss_2": 2525.5760131835937,
"kl_loss_3": 2063.9668884277344,
"kl_loss_7": 811.2918426513672,
"learning_rate": 0.0009414737964294635,
"loss": 1427.1312,
"step": 1640
},
{
"ce_loss_10": 3.6101224184036256,
"ce_loss_13": 3.5010381817817686,
"ce_loss_2": 4.720621514320373,
"ce_loss_3": 4.4590880393981935,
"ce_loss_7": 3.8465168356895445,
"epoch": 0.165,
"grad_norm": 592.0,
"kl_loss_10": 244.4941291809082,
"kl_loss_2": 2467.5499755859373,
"kl_loss_3": 2009.9993835449218,
"kl_loss_7": 785.2798095703125,
"learning_rate": 0.000940726681723791,
"loss": 1420.5047,
"step": 1650
},
{
"ce_loss_10": 3.4529421091079713,
"ce_loss_13": 3.3362591743469237,
"ce_loss_2": 4.67095410823822,
"ce_loss_3": 4.395775043964386,
"ce_loss_7": 3.7144131302833556,
"epoch": 0.166,
"grad_norm": 760.0,
"kl_loss_10": 256.26583633422854,
"kl_loss_2": 2688.0345825195313,
"kl_loss_3": 2212.7634155273436,
"kl_loss_7": 836.4253662109375,
"learning_rate": 0.0009399751289053266,
"loss": 1423.8466,
"step": 1660
},
{
"ce_loss_10": 3.667633831501007,
"ce_loss_13": 3.557128643989563,
"ce_loss_2": 4.805055928230286,
"ce_loss_3": 4.532746481895447,
"ce_loss_7": 3.911760663986206,
"epoch": 0.167,
"grad_norm": 700.0,
"kl_loss_10": 250.78092575073242,
"kl_loss_2": 2539.268176269531,
"kl_loss_3": 2059.193713378906,
"kl_loss_7": 797.4180786132813,
"learning_rate": 0.0009392191455421988,
"loss": 1439.8459,
"step": 1670
},
{
"ce_loss_10": 3.6344913125038145,
"ce_loss_13": 3.5230419993400575,
"ce_loss_2": 4.79052848815918,
"ce_loss_3": 4.512491989135742,
"ce_loss_7": 3.8782394886016847,
"epoch": 0.168,
"grad_norm": 672.0,
"kl_loss_10": 262.5000991821289,
"kl_loss_2": 2580.610949707031,
"kl_loss_3": 2102.571502685547,
"kl_loss_7": 817.8095031738281,
"learning_rate": 0.0009384587392471515,
"loss": 1409.023,
"step": 1680
},
{
"ce_loss_10": 3.6255006551742555,
"ce_loss_13": 3.514340567588806,
"ce_loss_2": 4.734428143501281,
"ce_loss_3": 4.468456673622131,
"ce_loss_7": 3.8644169330596925,
"epoch": 0.169,
"grad_norm": 628.0,
"kl_loss_10": 242.1350540161133,
"kl_loss_2": 2468.8160400390625,
"kl_loss_3": 2004.5263061523438,
"kl_loss_7": 785.5388061523438,
"learning_rate": 0.0009376939176774678,
"loss": 1384.7148,
"step": 1690
},
{
"ce_loss_10": 3.601682686805725,
"ce_loss_13": 3.4858548164367678,
"ce_loss_2": 4.752888894081115,
"ce_loss_3": 4.474552822113037,
"ce_loss_7": 3.842711091041565,
"epoch": 0.17,
"grad_norm": 636.0,
"kl_loss_10": 245.69830017089845,
"kl_loss_2": 2544.683557128906,
"kl_loss_3": 2064.160784912109,
"kl_loss_7": 792.8873626708985,
"learning_rate": 0.0009369246885348925,
"loss": 1434.5433,
"step": 1700
},
{
"ce_loss_10": 3.5952138662338258,
"ce_loss_13": 3.4776424884796144,
"ce_loss_2": 4.792232918739319,
"ce_loss_3": 4.513515877723694,
"ce_loss_7": 3.8616483092308043,
"epoch": 0.171,
"grad_norm": 644.0,
"kl_loss_10": 250.0074020385742,
"kl_loss_2": 2643.389611816406,
"kl_loss_3": 2155.037109375,
"kl_loss_7": 835.121694946289,
"learning_rate": 0.0009361510595655545,
"loss": 1446.8347,
"step": 1710
},
{
"ce_loss_10": 3.558023285865784,
"ce_loss_13": 3.438031017780304,
"ce_loss_2": 4.730398392677307,
"ce_loss_3": 4.452085471153259,
"ce_loss_7": 3.815502095222473,
"epoch": 0.172,
"grad_norm": 672.0,
"kl_loss_10": 260.42660064697264,
"kl_loss_2": 2622.6736572265627,
"kl_loss_3": 2126.2764099121096,
"kl_loss_7": 833.0841033935546,
"learning_rate": 0.0009353730385598887,
"loss": 1443.5211,
"step": 1720
},
{
"ce_loss_10": 3.4771748185157776,
"ce_loss_13": 3.364219045639038,
"ce_loss_2": 4.693475008010864,
"ce_loss_3": 4.410137629508972,
"ce_loss_7": 3.7466461181640627,
"epoch": 0.173,
"grad_norm": 576.0,
"kl_loss_10": 244.67605361938476,
"kl_loss_2": 2652.6466064453125,
"kl_loss_3": 2161.871044921875,
"kl_loss_7": 827.6846221923828,
"learning_rate": 0.0009345906333525581,
"loss": 1466.803,
"step": 1730
},
{
"ce_loss_10": 3.515894877910614,
"ce_loss_13": 3.403614568710327,
"ce_loss_2": 4.707322573661804,
"ce_loss_3": 4.422236812114716,
"ce_loss_7": 3.7741501927375793,
"epoch": 0.174,
"grad_norm": 608.0,
"kl_loss_10": 250.64810333251953,
"kl_loss_2": 2637.284143066406,
"kl_loss_3": 2135.0481140136717,
"kl_loss_7": 835.5378570556641,
"learning_rate": 0.0009338038518223745,
"loss": 1437.4744,
"step": 1740
},
{
"ce_loss_10": 3.5834938049316407,
"ce_loss_13": 3.468910980224609,
"ce_loss_2": 4.762004089355469,
"ce_loss_3": 4.486204957962036,
"ce_loss_7": 3.8505713820457457,
"epoch": 0.175,
"grad_norm": 652.0,
"kl_loss_10": 254.51539306640626,
"kl_loss_2": 2618.7681884765625,
"kl_loss_3": 2135.773858642578,
"kl_loss_7": 849.8012634277344,
"learning_rate": 0.0009330127018922195,
"loss": 1479.132,
"step": 1750
},
{
"ce_loss_10": 3.538338470458984,
"ce_loss_13": 3.4237607955932616,
"ce_loss_2": 4.7127416133880615,
"ce_loss_3": 4.443085932731629,
"ce_loss_7": 3.794516444206238,
"epoch": 0.176,
"grad_norm": 628.0,
"kl_loss_10": 245.44887084960936,
"kl_loss_2": 2605.848291015625,
"kl_loss_3": 2119.558026123047,
"kl_loss_7": 818.0517547607421,
"learning_rate": 0.0009322171915289634,
"loss": 1443.3754,
"step": 1760
},
{
"ce_loss_10": 3.5648101806640624,
"ce_loss_13": 3.459370458126068,
"ce_loss_2": 4.707282447814942,
"ce_loss_3": 4.433714365959167,
"ce_loss_7": 3.812247085571289,
"epoch": 0.177,
"grad_norm": 576.0,
"kl_loss_10": 245.77867431640624,
"kl_loss_2": 2558.2099365234376,
"kl_loss_3": 2069.932727050781,
"kl_loss_7": 809.8514526367187,
"learning_rate": 0.0009314173287433873,
"loss": 1402.6621,
"step": 1770
},
{
"ce_loss_10": 3.5681435227394105,
"ce_loss_13": 3.4554543256759644,
"ce_loss_2": 4.716624093055725,
"ce_loss_3": 4.441683101654053,
"ce_loss_7": 3.8178189396858215,
"epoch": 0.178,
"grad_norm": 704.0,
"kl_loss_10": 250.21724319458008,
"kl_loss_2": 2566.923547363281,
"kl_loss_3": 2076.703576660156,
"kl_loss_7": 808.6476196289062,
"learning_rate": 0.0009306131215901003,
"loss": 1403.6738,
"step": 1780
},
{
"ce_loss_10": 3.60051885843277,
"ce_loss_13": 3.4851089835166933,
"ce_loss_2": 4.74112594127655,
"ce_loss_3": 4.468067002296448,
"ce_loss_7": 3.8350677728652953,
"epoch": 0.179,
"grad_norm": 656.0,
"kl_loss_10": 247.00397262573242,
"kl_loss_2": 2550.1098876953124,
"kl_loss_3": 2071.478955078125,
"kl_loss_7": 797.3671112060547,
"learning_rate": 0.0009298045781674596,
"loss": 1386.7528,
"step": 1790
},
{
"ce_loss_10": 3.576521575450897,
"ce_loss_13": 3.465667748451233,
"ce_loss_2": 4.70545973777771,
"ce_loss_3": 4.437681531906128,
"ce_loss_7": 3.823224997520447,
"epoch": 0.18,
"grad_norm": 640.0,
"kl_loss_10": 245.9371192932129,
"kl_loss_2": 2516.936376953125,
"kl_loss_3": 2031.5426147460937,
"kl_loss_7": 793.7673767089843,
"learning_rate": 0.0009289917066174886,
"loss": 1415.4195,
"step": 1800
},
{
"ce_loss_10": 3.568215787410736,
"ce_loss_13": 3.465099549293518,
"ce_loss_2": 4.663200092315674,
"ce_loss_3": 4.39816825389862,
"ce_loss_7": 3.797432005405426,
"epoch": 0.181,
"grad_norm": 596.0,
"kl_loss_10": 236.99261932373048,
"kl_loss_2": 2444.4304931640627,
"kl_loss_3": 1977.5733642578125,
"kl_loss_7": 762.0940002441406,
"learning_rate": 0.0009281745151257945,
"loss": 1372.7959,
"step": 1810
},
{
"ce_loss_10": 3.589988100528717,
"ce_loss_13": 3.4779568314552307,
"ce_loss_2": 4.741122603416443,
"ce_loss_3": 4.463086032867432,
"ce_loss_7": 3.8317890048027037,
"epoch": 0.182,
"grad_norm": 576.0,
"kl_loss_10": 245.52628021240236,
"kl_loss_2": 2546.031115722656,
"kl_loss_3": 2057.9885314941407,
"kl_loss_7": 789.636849975586,
"learning_rate": 0.0009273530119214868,
"loss": 1414.9602,
"step": 1820
},
{
"ce_loss_10": 3.6874829173088073,
"ce_loss_13": 3.5830198526382446,
"ce_loss_2": 4.805440378189087,
"ce_loss_3": 4.542007470130921,
"ce_loss_7": 3.922217535972595,
"epoch": 0.183,
"grad_norm": 668.0,
"kl_loss_10": 240.62074966430663,
"kl_loss_2": 2477.285852050781,
"kl_loss_3": 2025.298876953125,
"kl_loss_7": 778.8258850097657,
"learning_rate": 0.0009265272052770935,
"loss": 1365.1876,
"step": 1830
},
{
"ce_loss_10": 3.5063879013061525,
"ce_loss_13": 3.3919414281845093,
"ce_loss_2": 4.691436982154846,
"ce_loss_3": 4.40977828502655,
"ce_loss_7": 3.7569626212120055,
"epoch": 0.184,
"grad_norm": 672.0,
"kl_loss_10": 245.37701873779298,
"kl_loss_2": 2600.8256103515623,
"kl_loss_3": 2103.731726074219,
"kl_loss_7": 796.2739471435547,
"learning_rate": 0.0009256971035084784,
"loss": 1423.7646,
"step": 1840
},
{
"ce_loss_10": 3.4534160137176513,
"ce_loss_13": 3.337074172496796,
"ce_loss_2": 4.650833582878112,
"ce_loss_3": 4.375414204597473,
"ce_loss_7": 3.7153374671936037,
"epoch": 0.185,
"grad_norm": 872.0,
"kl_loss_10": 253.35809020996095,
"kl_loss_2": 2636.6057983398437,
"kl_loss_3": 2149.6516052246093,
"kl_loss_7": 833.7322570800782,
"learning_rate": 0.0009248627149747573,
"loss": 1433.1182,
"step": 1850
},
{
"ce_loss_10": 3.6552318572998046,
"ce_loss_13": 3.5436174392700197,
"ce_loss_2": 4.771462321281433,
"ce_loss_3": 4.505353546142578,
"ce_loss_7": 3.8980504512786864,
"epoch": 0.186,
"grad_norm": 628.0,
"kl_loss_10": 244.59689865112304,
"kl_loss_2": 2502.3949462890623,
"kl_loss_3": 2027.7750183105468,
"kl_loss_7": 792.985708618164,
"learning_rate": 0.0009240240480782129,
"loss": 1402.8563,
"step": 1860
},
{
"ce_loss_10": 3.559772253036499,
"ce_loss_13": 3.444066059589386,
"ce_loss_2": 4.714746379852295,
"ce_loss_3": 4.439750409126281,
"ce_loss_7": 3.8083682656288147,
"epoch": 0.187,
"grad_norm": 696.0,
"kl_loss_10": 248.7159553527832,
"kl_loss_2": 2569.0499755859373,
"kl_loss_3": 2081.2441528320314,
"kl_loss_7": 799.65380859375,
"learning_rate": 0.0009231811112642122,
"loss": 1391.885,
"step": 1870
},
{
"ce_loss_10": 3.603023958206177,
"ce_loss_13": 3.4911489486694336,
"ce_loss_2": 4.7107093334198,
"ce_loss_3": 4.44477071762085,
"ce_loss_7": 3.8424261450767516,
"epoch": 0.188,
"grad_norm": 756.0,
"kl_loss_10": 245.3149284362793,
"kl_loss_2": 2484.4417358398437,
"kl_loss_3": 2013.6257080078126,
"kl_loss_7": 788.6129425048828,
"learning_rate": 0.0009223339130211192,
"loss": 1382.5715,
"step": 1880
},
{
"ce_loss_10": 3.451503300666809,
"ce_loss_13": 3.3456833481788637,
"ce_loss_2": 4.6354892492294315,
"ce_loss_3": 4.368392133712769,
"ce_loss_7": 3.7025105237960814,
"epoch": 0.189,
"grad_norm": 796.0,
"kl_loss_10": 235.8703857421875,
"kl_loss_2": 2606.5722534179686,
"kl_loss_3": 2137.981573486328,
"kl_loss_7": 795.582894897461,
"learning_rate": 0.0009214824618802108,
"loss": 1426.9203,
"step": 1890
},
{
"ce_loss_10": 3.633524978160858,
"ce_loss_13": 3.5242482304573057,
"ce_loss_2": 4.770471715927124,
"ce_loss_3": 4.501252269744873,
"ce_loss_7": 3.883507859706879,
"epoch": 0.19,
"grad_norm": 652.0,
"kl_loss_10": 237.73654251098634,
"kl_loss_2": 2486.365759277344,
"kl_loss_3": 2019.5263671875,
"kl_loss_7": 793.6973388671875,
"learning_rate": 0.0009206267664155906,
"loss": 1428.9256,
"step": 1900
},
{
"ce_loss_10": 3.5532122611999513,
"ce_loss_13": 3.443064200878143,
"ce_loss_2": 4.697825288772583,
"ce_loss_3": 4.427114844322205,
"ce_loss_7": 3.799003005027771,
"epoch": 0.191,
"grad_norm": 636.0,
"kl_loss_10": 241.10890350341796,
"kl_loss_2": 2548.555554199219,
"kl_loss_3": 2061.9953002929688,
"kl_loss_7": 794.9706726074219,
"learning_rate": 0.0009197668352441024,
"loss": 1417.5695,
"step": 1910
},
{
"ce_loss_10": 3.608381187915802,
"ce_loss_13": 3.4997890830039977,
"ce_loss_2": 4.748308372497559,
"ce_loss_3": 4.471417784690857,
"ce_loss_7": 3.851922130584717,
"epoch": 0.192,
"grad_norm": 636.0,
"kl_loss_10": 242.21438293457032,
"kl_loss_2": 2509.6267700195312,
"kl_loss_3": 2027.6890441894532,
"kl_loss_7": 779.7179229736328,
"learning_rate": 0.0009189026770252437,
"loss": 1396.1437,
"step": 1920
},
{
"ce_loss_10": 3.6384175658226012,
"ce_loss_13": 3.5275412440299987,
"ce_loss_2": 4.762041211128235,
"ce_loss_3": 4.48351948261261,
"ce_loss_7": 3.8741258502006533,
"epoch": 0.193,
"grad_norm": 688.0,
"kl_loss_10": 250.4880401611328,
"kl_loss_2": 2491.730749511719,
"kl_loss_3": 2004.1307067871094,
"kl_loss_7": 785.3524200439454,
"learning_rate": 0.000918034300461078,
"loss": 1438.3092,
"step": 1930
},
{
"ce_loss_10": 3.675648069381714,
"ce_loss_13": 3.555274224281311,
"ce_loss_2": 4.77588381767273,
"ce_loss_3": 4.506980061531067,
"ce_loss_7": 3.9165189027786256,
"epoch": 0.194,
"grad_norm": 1048.0,
"kl_loss_10": 251.8736488342285,
"kl_loss_2": 2458.3200805664064,
"kl_loss_3": 1995.1934143066405,
"kl_loss_7": 806.3202392578125,
"learning_rate": 0.0009171617142961477,
"loss": 1389.0176,
"step": 1940
},
{
"ce_loss_10": 3.623457467556,
"ce_loss_13": 3.512966477870941,
"ce_loss_2": 4.729074192047119,
"ce_loss_3": 4.464083290100097,
"ce_loss_7": 3.8867802739143373,
"epoch": 0.195,
"grad_norm": 688.0,
"kl_loss_10": 255.58710021972655,
"kl_loss_2": 2479.066796875,
"kl_loss_3": 2001.9678588867187,
"kl_loss_7": 833.6603210449218,
"learning_rate": 0.0009162849273173857,
"loss": 1403.0846,
"step": 1950
},
{
"ce_loss_10": 3.5657452821731566,
"ce_loss_13": 3.457024359703064,
"ce_loss_2": 4.679703283309936,
"ce_loss_3": 4.409625816345215,
"ce_loss_7": 3.8033991694450378,
"epoch": 0.196,
"grad_norm": 656.0,
"kl_loss_10": 242.9659797668457,
"kl_loss_2": 2473.7406372070313,
"kl_loss_3": 2000.534735107422,
"kl_loss_7": 783.4248046875,
"learning_rate": 0.0009154039483540273,
"loss": 1391.609,
"step": 1960
},
{
"ce_loss_10": 3.5444339156150817,
"ce_loss_13": 3.433286416530609,
"ce_loss_2": 4.677814674377442,
"ce_loss_3": 4.395683622360229,
"ce_loss_7": 3.784545695781708,
"epoch": 0.197,
"grad_norm": 608.0,
"kl_loss_10": 239.23334732055665,
"kl_loss_2": 2520.074182128906,
"kl_loss_3": 2031.6637634277345,
"kl_loss_7": 792.3442230224609,
"learning_rate": 0.0009145187862775209,
"loss": 1388.6972,
"step": 1970
},
{
"ce_loss_10": 3.572359085083008,
"ce_loss_13": 3.466273844242096,
"ce_loss_2": 4.692143273353577,
"ce_loss_3": 4.418303954601288,
"ce_loss_7": 3.8197664856910705,
"epoch": 0.198,
"grad_norm": 660.0,
"kl_loss_10": 241.7268035888672,
"kl_loss_2": 2492.987420654297,
"kl_loss_3": 2004.3476135253907,
"kl_loss_7": 794.6048614501954,
"learning_rate": 0.0009136294500014386,
"loss": 1377.9902,
"step": 1980
},
{
"ce_loss_10": 3.52831609249115,
"ce_loss_13": 3.4167757987976075,
"ce_loss_2": 4.705040359497071,
"ce_loss_3": 4.434882926940918,
"ce_loss_7": 3.7779016494750977,
"epoch": 0.199,
"grad_norm": 684.0,
"kl_loss_10": 242.86552047729492,
"kl_loss_2": 2578.6255493164062,
"kl_loss_3": 2108.4060180664064,
"kl_loss_7": 798.0517791748047,
"learning_rate": 0.000912735948481387,
"loss": 1426.8047,
"step": 1990
},
{
"ce_loss_10": 3.5601553082466126,
"ce_loss_13": 3.449883997440338,
"ce_loss_2": 4.691212105751037,
"ce_loss_3": 4.414692604541779,
"ce_loss_7": 3.8016102075576783,
"epoch": 0.2,
"grad_norm": 684.0,
"kl_loss_10": 242.28478622436523,
"kl_loss_2": 2530.514270019531,
"kl_loss_3": 2040.9486206054687,
"kl_loss_7": 800.2102844238282,
"learning_rate": 0.0009118382907149164,
"loss": 1370.7061,
"step": 2000
},
{
"ce_loss_10": 3.5833643674850464,
"ce_loss_13": 3.4740814447402952,
"ce_loss_2": 4.70447518825531,
"ce_loss_3": 4.429442811012268,
"ce_loss_7": 3.8237846970558165,
"epoch": 0.201,
"grad_norm": 612.0,
"kl_loss_10": 244.51040420532226,
"kl_loss_2": 2494.5580932617186,
"kl_loss_3": 2005.631494140625,
"kl_loss_7": 779.4999328613281,
"learning_rate": 0.0009109364857414306,
"loss": 1380.7336,
"step": 2010
},
{
"ce_loss_10": 3.5532099485397337,
"ce_loss_13": 3.4470490336418154,
"ce_loss_2": 4.681869411468506,
"ce_loss_3": 4.40200264453888,
"ce_loss_7": 3.790750026702881,
"epoch": 0.202,
"grad_norm": 608.0,
"kl_loss_10": 240.87973327636718,
"kl_loss_2": 2528.7482421875,
"kl_loss_3": 2036.1677551269531,
"kl_loss_7": 777.9466033935547,
"learning_rate": 0.0009100305426420956,
"loss": 1419.7547,
"step": 2020
},
{
"ce_loss_10": 3.5112710118293764,
"ce_loss_13": 3.404540646076202,
"ce_loss_2": 4.711292386054993,
"ce_loss_3": 4.432630777359009,
"ce_loss_7": 3.757065165042877,
"epoch": 0.203,
"grad_norm": 664.0,
"kl_loss_10": 238.4617919921875,
"kl_loss_2": 2652.4912963867187,
"kl_loss_3": 2152.2258422851564,
"kl_loss_7": 790.063916015625,
"learning_rate": 0.0009091204705397484,
"loss": 1413.6135,
"step": 2030
},
{
"ce_loss_10": 3.508105480670929,
"ce_loss_13": 3.399987006187439,
"ce_loss_2": 4.703747749328613,
"ce_loss_3": 4.428358674049377,
"ce_loss_7": 3.7540559649467466,
"epoch": 0.204,
"grad_norm": 700.0,
"kl_loss_10": 242.5270248413086,
"kl_loss_2": 2644.1144165039063,
"kl_loss_3": 2155.070489501953,
"kl_loss_7": 790.7262329101562,
"learning_rate": 0.0009082062785988049,
"loss": 1424.9719,
"step": 2040
},
{
"ce_loss_10": 3.638819897174835,
"ce_loss_13": 3.5337455749511717,
"ce_loss_2": 4.727799487113953,
"ce_loss_3": 4.457953143119812,
"ce_loss_7": 3.8601122856140138,
"epoch": 0.205,
"grad_norm": 668.0,
"kl_loss_10": 235.8659812927246,
"kl_loss_2": 2476.5026977539064,
"kl_loss_3": 1996.3927185058594,
"kl_loss_7": 769.8516876220704,
"learning_rate": 0.0009072879760251679,
"loss": 1387.9949,
"step": 2050
},
{
"ce_loss_10": 3.5858229279518126,
"ce_loss_13": 3.475198233127594,
"ce_loss_2": 4.739975643157959,
"ce_loss_3": 4.475312519073486,
"ce_loss_7": 3.834290158748627,
"epoch": 0.206,
"grad_norm": 700.0,
"kl_loss_10": 239.9431396484375,
"kl_loss_2": 2570.9485107421874,
"kl_loss_3": 2100.634240722656,
"kl_loss_7": 789.2198791503906,
"learning_rate": 0.0009063655720661341,
"loss": 1402.2605,
"step": 2060
},
{
"ce_loss_10": 3.6313581228256226,
"ce_loss_13": 3.5262081384658814,
"ce_loss_2": 4.7349327325820925,
"ce_loss_3": 4.470538520812989,
"ce_loss_7": 3.864632213115692,
"epoch": 0.207,
"grad_norm": 580.0,
"kl_loss_10": 238.97062911987305,
"kl_loss_2": 2454.8896240234376,
"kl_loss_3": 1987.1748107910157,
"kl_loss_7": 776.5097869873047,
"learning_rate": 0.000905439076010301,
"loss": 1376.7035,
"step": 2070
},
{
"ce_loss_10": 3.5894328594207763,
"ce_loss_13": 3.4751851201057433,
"ce_loss_2": 4.723314690589905,
"ce_loss_3": 4.451727390289307,
"ce_loss_7": 3.830363655090332,
"epoch": 0.208,
"grad_norm": 620.0,
"kl_loss_10": 243.43872604370117,
"kl_loss_2": 2525.0844848632814,
"kl_loss_3": 2046.1018615722655,
"kl_loss_7": 793.8133911132812,
"learning_rate": 0.0009045084971874737,
"loss": 1367.5893,
"step": 2080
},
{
"ce_loss_10": 3.5676583290100097,
"ce_loss_13": 3.452693998813629,
"ce_loss_2": 4.699956917762757,
"ce_loss_3": 4.424242115020752,
"ce_loss_7": 3.806007480621338,
"epoch": 0.209,
"grad_norm": 688.0,
"kl_loss_10": 249.41274871826172,
"kl_loss_2": 2529.7607299804686,
"kl_loss_3": 2042.927227783203,
"kl_loss_7": 789.6784515380859,
"learning_rate": 0.0009035738449685707,
"loss": 1418.6186,
"step": 2090
},
{
"ce_loss_10": 3.510753297805786,
"ce_loss_13": 3.3990254640579223,
"ce_loss_2": 4.691071200370788,
"ce_loss_3": 4.41790235042572,
"ce_loss_7": 3.7591400265693666,
"epoch": 0.21,
"grad_norm": 600.0,
"kl_loss_10": 248.95298919677734,
"kl_loss_2": 2601.993273925781,
"kl_loss_3": 2124.828485107422,
"kl_loss_7": 799.376498413086,
"learning_rate": 0.0009026351287655293,
"loss": 1399.0971,
"step": 2100
},
{
"ce_loss_10": 3.697406494617462,
"ce_loss_13": 3.5970078110694885,
"ce_loss_2": 4.7389151096344,
"ce_loss_3": 4.481091260910034,
"ce_loss_7": 3.9209010362625123,
"epoch": 0.211,
"grad_norm": 600.0,
"kl_loss_10": 229.3176498413086,
"kl_loss_2": 2353.4455688476564,
"kl_loss_3": 1885.3362854003906,
"kl_loss_7": 749.6781646728516,
"learning_rate": 0.0009016923580312113,
"loss": 1321.2097,
"step": 2110
},
{
"ce_loss_10": 3.565862810611725,
"ce_loss_13": 3.4591265320777893,
"ce_loss_2": 4.665999031066894,
"ce_loss_3": 4.391572332382202,
"ce_loss_7": 3.7967191696166993,
"epoch": 0.212,
"grad_norm": 732.0,
"kl_loss_10": 243.075350189209,
"kl_loss_2": 2458.2255859375,
"kl_loss_3": 1975.4440124511718,
"kl_loss_7": 771.7640777587891,
"learning_rate": 0.0009007455422593077,
"loss": 1392.0321,
"step": 2120
},
{
"ce_loss_10": 3.574350452423096,
"ce_loss_13": 3.4604103803634643,
"ce_loss_2": 4.7152410507202145,
"ce_loss_3": 4.439797115325928,
"ce_loss_7": 3.8057913303375246,
"epoch": 0.213,
"grad_norm": 652.0,
"kl_loss_10": 251.99988250732423,
"kl_loss_2": 2551.55615234375,
"kl_loss_3": 2068.113671875,
"kl_loss_7": 789.3677795410156,
"learning_rate": 0.0008997946909842425,
"loss": 1402.5921,
"step": 2130
},
{
"ce_loss_10": 3.592576038837433,
"ce_loss_13": 3.476356315612793,
"ce_loss_2": 4.7715356826782225,
"ce_loss_3": 4.504428267478943,
"ce_loss_7": 3.843649852275848,
"epoch": 0.214,
"grad_norm": 660.0,
"kl_loss_10": 255.3404312133789,
"kl_loss_2": 2625.214599609375,
"kl_loss_3": 2155.4658203125,
"kl_loss_7": 813.4428436279297,
"learning_rate": 0.0008988398137810777,
"loss": 1403.5207,
"step": 2140
},
{
"ce_loss_10": 3.620520067214966,
"ce_loss_13": 3.513581109046936,
"ce_loss_2": 4.717863583564759,
"ce_loss_3": 4.442376029491425,
"ce_loss_7": 3.8534181356430053,
"epoch": 0.215,
"grad_norm": 700.0,
"kl_loss_10": 239.26677551269532,
"kl_loss_2": 2448.3839477539063,
"kl_loss_3": 1962.8316284179687,
"kl_loss_7": 763.2356109619141,
"learning_rate": 0.0008978809202654162,
"loss": 1354.8944,
"step": 2150
},
{
"ce_loss_10": 3.593782067298889,
"ce_loss_13": 3.4892191767692564,
"ce_loss_2": 4.713660454750061,
"ce_loss_3": 4.43155483007431,
"ce_loss_7": 3.8341444969177245,
"epoch": 0.216,
"grad_norm": 640.0,
"kl_loss_10": 237.50842971801757,
"kl_loss_2": 2454.586071777344,
"kl_loss_3": 1970.583270263672,
"kl_loss_7": 773.5592163085937,
"learning_rate": 0.0008969180200933046,
"loss": 1383.4818,
"step": 2160
},
{
"ce_loss_10": 3.56014689207077,
"ce_loss_13": 3.4516719341278077,
"ce_loss_2": 4.715594840049744,
"ce_loss_3": 4.431590890884399,
"ce_loss_7": 3.8131863117218017,
"epoch": 0.217,
"grad_norm": 712.0,
"kl_loss_10": 241.1098258972168,
"kl_loss_2": 2533.49033203125,
"kl_loss_3": 2041.2841003417968,
"kl_loss_7": 799.241552734375,
"learning_rate": 0.0008959511229611376,
"loss": 1406.9449,
"step": 2170
},
{
"ce_loss_10": 3.634247362613678,
"ce_loss_13": 3.529753494262695,
"ce_loss_2": 4.747422552108764,
"ce_loss_3": 4.480298018455505,
"ce_loss_7": 3.8834722995758058,
"epoch": 0.218,
"grad_norm": 744.0,
"kl_loss_10": 231.06951522827148,
"kl_loss_2": 2480.240673828125,
"kl_loss_3": 2003.8335388183593,
"kl_loss_7": 794.5106719970703,
"learning_rate": 0.0008949802386055581,
"loss": 1379.2705,
"step": 2180
},
{
"ce_loss_10": 3.4931302070617676,
"ce_loss_13": 3.3903717041015624,
"ce_loss_2": 4.634695625305175,
"ce_loss_3": 4.343023872375488,
"ce_loss_7": 3.735668647289276,
"epoch": 0.219,
"grad_norm": 704.0,
"kl_loss_10": 229.31054229736327,
"kl_loss_2": 2487.9470336914064,
"kl_loss_3": 1978.2749877929687,
"kl_loss_7": 772.9935424804687,
"learning_rate": 0.0008940053768033609,
"loss": 1398.8061,
"step": 2190
},
{
"ce_loss_10": 3.579288733005524,
"ce_loss_13": 3.476969850063324,
"ce_loss_2": 4.679602265357971,
"ce_loss_3": 4.408792352676391,
"ce_loss_7": 3.818285346031189,
"epoch": 0.22,
"grad_norm": 648.0,
"kl_loss_10": 225.21361923217773,
"kl_loss_2": 2457.1766845703123,
"kl_loss_3": 1985.3013549804687,
"kl_loss_7": 762.693115234375,
"learning_rate": 0.0008930265473713938,
"loss": 1358.0689,
"step": 2200
},
{
"ce_loss_10": 3.5425936341285706,
"ce_loss_13": 3.437610614299774,
"ce_loss_2": 4.679268145561219,
"ce_loss_3": 4.395039463043213,
"ce_loss_7": 3.7786786198616027,
"epoch": 0.221,
"grad_norm": 624.0,
"kl_loss_10": 227.02418670654296,
"kl_loss_2": 2514.80498046875,
"kl_loss_3": 2012.999462890625,
"kl_loss_7": 766.7205108642578,
"learning_rate": 0.0008920437601664579,
"loss": 1344.9316,
"step": 2210
},
{
"ce_loss_10": 3.5330151677131654,
"ce_loss_13": 3.4283509850502014,
"ce_loss_2": 4.65971040725708,
"ce_loss_3": 4.389861440658569,
"ce_loss_7": 3.7775445342063905,
"epoch": 0.222,
"grad_norm": 728.0,
"kl_loss_10": 231.53972396850585,
"kl_loss_2": 2495.336804199219,
"kl_loss_3": 2020.2352600097656,
"kl_loss_7": 785.6470977783204,
"learning_rate": 0.0008910570250852097,
"loss": 1358.0102,
"step": 2220
},
{
"ce_loss_10": 3.6386430144309996,
"ce_loss_13": 3.5394553184509276,
"ce_loss_2": 4.721383547782898,
"ce_loss_3": 4.441709399223328,
"ce_loss_7": 3.8573225855827333,
"epoch": 0.223,
"grad_norm": 656.0,
"kl_loss_10": 222.80670547485352,
"kl_loss_2": 2415.298693847656,
"kl_loss_3": 1914.3474975585937,
"kl_loss_7": 735.9223663330079,
"learning_rate": 0.0008900663520640604,
"loss": 1330.9881,
"step": 2230
},
{
"ce_loss_10": 3.5963090658187866,
"ce_loss_13": 3.4863692045211794,
"ce_loss_2": 4.697564601898193,
"ce_loss_3": 4.4291857242584225,
"ce_loss_7": 3.8206969499588013,
"epoch": 0.224,
"grad_norm": 616.0,
"kl_loss_10": 232.82473220825196,
"kl_loss_2": 2436.1601440429686,
"kl_loss_3": 1975.5118774414063,
"kl_loss_7": 746.4637390136719,
"learning_rate": 0.0008890717510790764,
"loss": 1355.2247,
"step": 2240
},
{
"ce_loss_10": 3.550048661231995,
"ce_loss_13": 3.444666588306427,
"ce_loss_2": 4.6846558332443236,
"ce_loss_3": 4.415020489692688,
"ce_loss_7": 3.7784482836723328,
"epoch": 0.225,
"grad_norm": 748.0,
"kl_loss_10": 234.0259765625,
"kl_loss_2": 2511.7267456054688,
"kl_loss_3": 2033.7661254882812,
"kl_loss_7": 757.7471649169922,
"learning_rate": 0.0008880732321458784,
"loss": 1391.5023,
"step": 2250
},
{
"ce_loss_10": 3.5846696734428405,
"ce_loss_13": 3.475912594795227,
"ce_loss_2": 4.6859821557998655,
"ce_loss_3": 4.403112530708313,
"ce_loss_7": 3.8075541138648985,
"epoch": 0.226,
"grad_norm": 768.0,
"kl_loss_10": 241.0058906555176,
"kl_loss_2": 2434.0427978515627,
"kl_loss_3": 1946.942852783203,
"kl_loss_7": 750.951953125,
"learning_rate": 0.0008870708053195413,
"loss": 1371.0441,
"step": 2260
},
{
"ce_loss_10": 3.6066513299942016,
"ce_loss_13": 3.5011353135108947,
"ce_loss_2": 4.688438081741333,
"ce_loss_3": 4.419037127494812,
"ce_loss_7": 3.8243068933486937,
"epoch": 0.227,
"grad_norm": 612.0,
"kl_loss_10": 236.37487716674804,
"kl_loss_2": 2419.1595703125,
"kl_loss_3": 1947.7892822265626,
"kl_loss_7": 736.9884735107422,
"learning_rate": 0.0008860644806944918,
"loss": 1346.316,
"step": 2270
},
{
"ce_loss_10": 3.5470305681228638,
"ce_loss_13": 3.4408384203910827,
"ce_loss_2": 4.675415754318237,
"ce_loss_3": 4.405515837669372,
"ce_loss_7": 3.7811434388160707,
"epoch": 0.228,
"grad_norm": 712.0,
"kl_loss_10": 236.5175895690918,
"kl_loss_2": 2511.8415283203126,
"kl_loss_3": 2041.5828552246094,
"kl_loss_7": 773.2159851074218,
"learning_rate": 0.0008850542684044079,
"loss": 1347.2301,
"step": 2280
},
{
"ce_loss_10": 3.525200033187866,
"ce_loss_13": 3.4121009707450867,
"ce_loss_2": 4.704805684089661,
"ce_loss_3": 4.428252863883972,
"ce_loss_7": 3.7681017994880674,
"epoch": 0.229,
"grad_norm": 744.0,
"kl_loss_10": 243.2204231262207,
"kl_loss_2": 2609.259875488281,
"kl_loss_3": 2137.3057250976562,
"kl_loss_7": 781.1770416259766,
"learning_rate": 0.0008840401786221159,
"loss": 1392.1494,
"step": 2290
},
{
"ce_loss_10": 3.644639456272125,
"ce_loss_13": 3.546596646308899,
"ce_loss_2": 4.720036673545837,
"ce_loss_3": 4.461656093597412,
"ce_loss_7": 3.8639742493629456,
"epoch": 0.23,
"grad_norm": 736.0,
"kl_loss_10": 221.5949806213379,
"kl_loss_2": 2383.692004394531,
"kl_loss_3": 1920.7404052734375,
"kl_loss_7": 726.6697357177734,
"learning_rate": 0.000883022221559489,
"loss": 1309.8631,
"step": 2300
},
{
"ce_loss_10": 3.6106560468673705,
"ce_loss_13": 3.5103928685188293,
"ce_loss_2": 4.718800568580628,
"ce_loss_3": 4.453631711006165,
"ce_loss_7": 3.833037328720093,
"epoch": 0.231,
"grad_norm": 668.0,
"kl_loss_10": 224.89765014648438,
"kl_loss_2": 2469.4252197265623,
"kl_loss_3": 2018.495166015625,
"kl_loss_7": 748.8079467773438,
"learning_rate": 0.0008820004074673434,
"loss": 1405.4977,
"step": 2310
},
{
"ce_loss_10": 3.509887623786926,
"ce_loss_13": 3.4120625376701357,
"ce_loss_2": 4.630102276802063,
"ce_loss_3": 4.358427214622497,
"ce_loss_7": 3.748315227031708,
"epoch": 0.232,
"grad_norm": 604.0,
"kl_loss_10": 223.46416931152345,
"kl_loss_2": 2484.790771484375,
"kl_loss_3": 2005.2869995117187,
"kl_loss_7": 761.2884399414063,
"learning_rate": 0.0008809747466353355,
"loss": 1341.5085,
"step": 2320
},
{
"ce_loss_10": 3.522110950946808,
"ce_loss_13": 3.4228403091430666,
"ce_loss_2": 4.653229188919068,
"ce_loss_3": 4.378945517539978,
"ce_loss_7": 3.7502294540405274,
"epoch": 0.233,
"grad_norm": 744.0,
"kl_loss_10": 224.23116912841797,
"kl_loss_2": 2499.1381958007814,
"kl_loss_3": 2020.5157836914063,
"kl_loss_7": 752.2868743896485,
"learning_rate": 0.0008799452493918585,
"loss": 1366.2092,
"step": 2330
},
{
"ce_loss_10": 3.600525939464569,
"ce_loss_13": 3.501133692264557,
"ce_loss_2": 4.698138499259949,
"ce_loss_3": 4.4309428334236145,
"ce_loss_7": 3.8393119096755983,
"epoch": 0.234,
"grad_norm": 656.0,
"kl_loss_10": 221.8571762084961,
"kl_loss_2": 2452.500280761719,
"kl_loss_3": 1976.1439636230468,
"kl_loss_7": 759.1389068603515,
"learning_rate": 0.0008789119261039385,
"loss": 1400.5569,
"step": 2340
},
{
"ce_loss_10": 3.5126537322998046,
"ce_loss_13": 3.412049424648285,
"ce_loss_2": 4.627605974674225,
"ce_loss_3": 4.359820437431336,
"ce_loss_7": 3.747655713558197,
"epoch": 0.235,
"grad_norm": 584.0,
"kl_loss_10": 220.69495086669923,
"kl_loss_2": 2450.3417724609376,
"kl_loss_3": 1979.037158203125,
"kl_loss_7": 752.3414123535156,
"learning_rate": 0.0008778747871771292,
"loss": 1338.277,
"step": 2350
},
{
"ce_loss_10": 3.5650462746620177,
"ce_loss_13": 3.4650426387786863,
"ce_loss_2": 4.640904521942138,
"ce_loss_3": 4.3729163646698,
"ce_loss_7": 3.78610600233078,
"epoch": 0.236,
"grad_norm": 628.0,
"kl_loss_10": 215.22831954956055,
"kl_loss_2": 2399.6547119140623,
"kl_loss_3": 1925.4503356933594,
"kl_loss_7": 727.8779388427735,
"learning_rate": 0.0008768338430554083,
"loss": 1316.2055,
"step": 2360
},
{
"ce_loss_10": 3.572676420211792,
"ce_loss_13": 3.4714962005615235,
"ce_loss_2": 4.678735136985779,
"ce_loss_3": 4.39429270029068,
"ce_loss_7": 3.8077693939208985,
"epoch": 0.237,
"grad_norm": 688.0,
"kl_loss_10": 226.92397766113282,
"kl_loss_2": 2426.2300659179687,
"kl_loss_3": 1939.4405090332032,
"kl_loss_7": 752.637564086914,
"learning_rate": 0.0008757891042210713,
"loss": 1346.3338,
"step": 2370
},
{
"ce_loss_10": 3.592969560623169,
"ce_loss_13": 3.493350553512573,
"ce_loss_2": 4.688189601898193,
"ce_loss_3": 4.413512086868286,
"ce_loss_7": 3.821557307243347,
"epoch": 0.238,
"grad_norm": 656.0,
"kl_loss_10": 225.66336822509766,
"kl_loss_2": 2421.9510131835937,
"kl_loss_3": 1946.20556640625,
"kl_loss_7": 745.2722961425782,
"learning_rate": 0.0008747405811946271,
"loss": 1343.8345,
"step": 2380
},
{
"ce_loss_10": 3.49123694896698,
"ce_loss_13": 3.389770042896271,
"ce_loss_2": 4.654137110710144,
"ce_loss_3": 4.386571860313415,
"ce_loss_7": 3.731127667427063,
"epoch": 0.239,
"grad_norm": 616.0,
"kl_loss_10": 230.47370223999025,
"kl_loss_2": 2561.850231933594,
"kl_loss_3": 2084.1000549316404,
"kl_loss_7": 769.9209930419922,
"learning_rate": 0.0008736882845346905,
"loss": 1355.4398,
"step": 2390
},
{
"ce_loss_10": 3.5909661054611206,
"ce_loss_13": 3.4839738249778747,
"ce_loss_2": 4.705090403556824,
"ce_loss_3": 4.426928949356079,
"ce_loss_7": 3.8166149973869326,
"epoch": 0.24,
"grad_norm": 652.0,
"kl_loss_10": 232.27595291137695,
"kl_loss_2": 2463.9607543945312,
"kl_loss_3": 1976.524102783203,
"kl_loss_7": 748.5501831054687,
"learning_rate": 0.0008726322248378774,
"loss": 1350.1158,
"step": 2400
},
{
"ce_loss_10": 3.5857128262519837,
"ce_loss_13": 3.485344707965851,
"ce_loss_2": 4.720745325088501,
"ce_loss_3": 4.446980690956115,
"ce_loss_7": 3.815141475200653,
"epoch": 0.241,
"grad_norm": 620.0,
"kl_loss_10": 225.08902893066406,
"kl_loss_2": 2502.8332275390626,
"kl_loss_3": 2020.9147888183593,
"kl_loss_7": 748.0698608398437,
"learning_rate": 0.0008715724127386971,
"loss": 1388.577,
"step": 2410
},
{
"ce_loss_10": 3.656253182888031,
"ce_loss_13": 3.5548863530159,
"ce_loss_2": 4.740737318992615,
"ce_loss_3": 4.4647379398345945,
"ce_loss_7": 3.869425129890442,
"epoch": 0.242,
"grad_norm": 656.0,
"kl_loss_10": 233.72190628051757,
"kl_loss_2": 2420.5750244140627,
"kl_loss_3": 1941.4000915527345,
"kl_loss_7": 733.7942932128906,
"learning_rate": 0.0008705088589094458,
"loss": 1349.3883,
"step": 2420
},
{
"ce_loss_10": 3.6831162333488465,
"ce_loss_13": 3.5650919318199157,
"ce_loss_2": 4.759288740158081,
"ce_loss_3": 4.490408134460449,
"ce_loss_7": 3.8880489230155946,
"epoch": 0.243,
"grad_norm": 640.0,
"kl_loss_10": 258.1027114868164,
"kl_loss_2": 2453.8090209960938,
"kl_loss_3": 1977.7547729492187,
"kl_loss_7": 746.0192138671875,
"learning_rate": 0.0008694415740600988,
"loss": 1371.979,
"step": 2430
},
{
"ce_loss_10": 3.539147210121155,
"ce_loss_13": 3.418752908706665,
"ce_loss_2": 4.6640907526016235,
"ce_loss_3": 4.396868014335633,
"ce_loss_7": 3.753141713142395,
"epoch": 0.244,
"grad_norm": 720.0,
"kl_loss_10": 272.4710403442383,
"kl_loss_2": 2511.5777099609377,
"kl_loss_3": 2045.4482543945312,
"kl_loss_7": 744.3600494384766,
"learning_rate": 0.0008683705689382025,
"loss": 1374.2081,
"step": 2440
},
{
"ce_loss_10": 3.614233338832855,
"ce_loss_13": 3.502686250209808,
"ce_loss_2": 4.680193209648133,
"ce_loss_3": 4.409785914421081,
"ce_loss_7": 3.81562682390213,
"epoch": 0.245,
"grad_norm": 680.0,
"kl_loss_10": 242.92661514282227,
"kl_loss_2": 2418.696484375,
"kl_loss_3": 1945.9917602539062,
"kl_loss_7": 727.0407897949219,
"learning_rate": 0.0008672958543287666,
"loss": 1361.5771,
"step": 2450
},
{
"ce_loss_10": 3.6207616090774537,
"ce_loss_13": 3.5146057486534117,
"ce_loss_2": 4.6799437522888185,
"ce_loss_3": 4.408400678634644,
"ce_loss_7": 3.8393305063247682,
"epoch": 0.246,
"grad_norm": 640.0,
"kl_loss_10": 233.26868438720703,
"kl_loss_2": 2373.7197509765624,
"kl_loss_3": 1900.9493347167968,
"kl_loss_7": 737.9279724121094,
"learning_rate": 0.0008662174410541554,
"loss": 1323.3875,
"step": 2460
},
{
"ce_loss_10": 3.5795403718948364,
"ce_loss_13": 3.4791687726974487,
"ce_loss_2": 4.657073163986206,
"ce_loss_3": 4.389124321937561,
"ce_loss_7": 3.797624135017395,
"epoch": 0.247,
"grad_norm": 688.0,
"kl_loss_10": 228.68382720947267,
"kl_loss_2": 2405.7741943359374,
"kl_loss_3": 1929.0893249511719,
"kl_loss_7": 730.4046020507812,
"learning_rate": 0.0008651353399739787,
"loss": 1361.2713,
"step": 2470
},
{
"ce_loss_10": 3.6015311241149903,
"ce_loss_13": 3.5007805585861207,
"ce_loss_2": 4.693076491355896,
"ce_loss_3": 4.420244932174683,
"ce_loss_7": 3.8255343675613402,
"epoch": 0.248,
"grad_norm": 628.0,
"kl_loss_10": 225.77268676757814,
"kl_loss_2": 2413.6783447265625,
"kl_loss_3": 1937.1076232910157,
"kl_loss_7": 735.3206512451172,
"learning_rate": 0.0008640495619849821,
"loss": 1345.3404,
"step": 2480
},
{
"ce_loss_10": 3.5668503522872923,
"ce_loss_13": 3.4637187004089354,
"ce_loss_2": 4.644854807853699,
"ce_loss_3": 4.374481606483459,
"ce_loss_7": 3.791785490512848,
"epoch": 0.249,
"grad_norm": 616.0,
"kl_loss_10": 223.47670059204103,
"kl_loss_2": 2406.82578125,
"kl_loss_3": 1930.5429321289062,
"kl_loss_7": 738.2828582763672,
"learning_rate": 0.0008629601180209381,
"loss": 1326.733,
"step": 2490
},
{
"ce_loss_10": 3.5605925559997558,
"ce_loss_13": 3.4623565435409547,
"ce_loss_2": 4.648912143707276,
"ce_loss_3": 4.37358832359314,
"ce_loss_7": 3.7822588205337526,
"epoch": 0.25,
"grad_norm": 588.0,
"kl_loss_10": 221.60515823364258,
"kl_loss_2": 2408.634729003906,
"kl_loss_3": 1918.1406311035157,
"kl_loss_7": 733.2383361816406,
"learning_rate": 0.000861867019052535,
"loss": 1350.9314,
"step": 2500
},
{
"ce_loss_10": 3.4750850677490233,
"ce_loss_13": 3.3757749915122988,
"ce_loss_2": 4.618335509300232,
"ce_loss_3": 4.344382691383362,
"ce_loss_7": 3.7118528127670287,
"epoch": 0.251,
"grad_norm": 664.0,
"kl_loss_10": 225.6886344909668,
"kl_loss_2": 2520.0691040039064,
"kl_loss_3": 2028.4780883789062,
"kl_loss_7": 750.8930267333984,
"learning_rate": 0.0008607702760872678,
"loss": 1377.2211,
"step": 2510
},
{
"ce_loss_10": 3.5948320031166077,
"ce_loss_13": 3.493862783908844,
"ce_loss_2": 4.663220858573913,
"ce_loss_3": 4.39898452758789,
"ce_loss_7": 3.8143251180648803,
"epoch": 0.252,
"grad_norm": 736.0,
"kl_loss_10": 220.9385528564453,
"kl_loss_2": 2382.33095703125,
"kl_loss_3": 1919.1317260742187,
"kl_loss_7": 728.4733703613281,
"learning_rate": 0.0008596699001693256,
"loss": 1356.6151,
"step": 2520
},
{
"ce_loss_10": 3.6045937299728394,
"ce_loss_13": 3.5089424014091493,
"ce_loss_2": 4.674148344993592,
"ce_loss_3": 4.401587581634521,
"ce_loss_7": 3.8156301379203796,
"epoch": 0.253,
"grad_norm": 664.0,
"kl_loss_10": 222.60222702026368,
"kl_loss_2": 2399.647021484375,
"kl_loss_3": 1923.217791748047,
"kl_loss_7": 722.3135375976562,
"learning_rate": 0.0008585659023794818,
"loss": 1357.2354,
"step": 2530
},
{
"ce_loss_10": 3.5605056166648863,
"ce_loss_13": 3.458607590198517,
"ce_loss_2": 4.6924147605896,
"ce_loss_3": 4.421391654014587,
"ce_loss_7": 3.799249517917633,
"epoch": 0.254,
"grad_norm": 660.0,
"kl_loss_10": 233.0737617492676,
"kl_loss_2": 2499.324670410156,
"kl_loss_3": 2030.4549194335937,
"kl_loss_7": 761.279296875,
"learning_rate": 0.0008574582938349817,
"loss": 1364.7606,
"step": 2540
},
{
"ce_loss_10": 3.5620136737823485,
"ce_loss_13": 3.450424087047577,
"ce_loss_2": 4.679884123802185,
"ce_loss_3": 4.403433465957642,
"ce_loss_7": 3.8059414982795716,
"epoch": 0.255,
"grad_norm": 648.0,
"kl_loss_10": 238.74318084716796,
"kl_loss_2": 2486.331640625,
"kl_loss_3": 1999.8115600585938,
"kl_loss_7": 776.2368225097656,
"learning_rate": 0.0008563470856894315,
"loss": 1329.6849,
"step": 2550
},
{
"ce_loss_10": 3.540405642986298,
"ce_loss_13": 3.4457826972007752,
"ce_loss_2": 4.656697821617127,
"ce_loss_3": 4.386443245410919,
"ce_loss_7": 3.772416353225708,
"epoch": 0.256,
"grad_norm": 760.0,
"kl_loss_10": 221.72702865600587,
"kl_loss_2": 2443.3952514648436,
"kl_loss_3": 1969.1475952148437,
"kl_loss_7": 745.7592987060547,
"learning_rate": 0.0008552322891326845,
"loss": 1346.8541,
"step": 2560
},
{
"ce_loss_10": 3.5136868953704834,
"ce_loss_13": 3.415074276924133,
"ce_loss_2": 4.637244987487793,
"ce_loss_3": 4.365770423412323,
"ce_loss_7": 3.741610062122345,
"epoch": 0.257,
"grad_norm": 788.0,
"kl_loss_10": 218.68516159057617,
"kl_loss_2": 2477.789599609375,
"kl_loss_3": 2001.3069702148437,
"kl_loss_7": 743.3714080810547,
"learning_rate": 0.0008541139153907296,
"loss": 1329.1979,
"step": 2570
},
{
"ce_loss_10": 3.472187507152557,
"ce_loss_13": 3.3729379415512084,
"ce_loss_2": 4.581104445457458,
"ce_loss_3": 4.308674609661102,
"ce_loss_7": 3.69760080575943,
"epoch": 0.258,
"grad_norm": 636.0,
"kl_loss_10": 213.4689498901367,
"kl_loss_2": 2453.299768066406,
"kl_loss_3": 1976.8992919921875,
"kl_loss_7": 745.6326965332031,
"learning_rate": 0.0008529919757255782,
"loss": 1354.7893,
"step": 2580
},
{
"ce_loss_10": 3.500008797645569,
"ce_loss_13": 3.408738708496094,
"ce_loss_2": 4.560009336471557,
"ce_loss_3": 4.2931175351142885,
"ce_loss_7": 3.716734218597412,
"epoch": 0.259,
"grad_norm": 624.0,
"kl_loss_10": 208.80025100708008,
"kl_loss_2": 2371.1708251953123,
"kl_loss_3": 1897.6802124023438,
"kl_loss_7": 721.6227478027344,
"learning_rate": 0.0008518664814351503,
"loss": 1306.301,
"step": 2590
},
{
"ce_loss_10": 3.472637712955475,
"ce_loss_13": 3.37472482919693,
"ce_loss_2": 4.598471093177795,
"ce_loss_3": 4.321799778938294,
"ce_loss_7": 3.7131651520729063,
"epoch": 0.26,
"grad_norm": 644.0,
"kl_loss_10": 222.20911254882813,
"kl_loss_2": 2491.116162109375,
"kl_loss_3": 2007.4335876464843,
"kl_loss_7": 764.1704193115235,
"learning_rate": 0.0008507374438531607,
"loss": 1407.2535,
"step": 2600
},
{
"ce_loss_10": 3.447394275665283,
"ce_loss_13": 3.3539512395858764,
"ce_loss_2": 4.5548292875289915,
"ce_loss_3": 4.286789774894714,
"ce_loss_7": 3.6768516659736634,
"epoch": 0.261,
"grad_norm": 676.0,
"kl_loss_10": 214.65092697143555,
"kl_loss_2": 2437.03447265625,
"kl_loss_3": 1973.9089477539062,
"kl_loss_7": 738.8113952636719,
"learning_rate": 0.0008496048743490053,
"loss": 1332.7279,
"step": 2610
},
{
"ce_loss_10": 3.597834813594818,
"ce_loss_13": 3.5061428785324096,
"ce_loss_2": 4.655121803283691,
"ce_loss_3": 4.391561770439148,
"ce_loss_7": 3.814839816093445,
"epoch": 0.262,
"grad_norm": 564.0,
"kl_loss_10": 212.99711074829102,
"kl_loss_2": 2362.529577636719,
"kl_loss_3": 1891.9757995605469,
"kl_loss_7": 720.1662811279297,
"learning_rate": 0.0008484687843276469,
"loss": 1316.5832,
"step": 2620
},
{
"ce_loss_10": 3.533200740814209,
"ce_loss_13": 3.4373727798461915,
"ce_loss_2": 4.636826205253601,
"ce_loss_3": 4.3528993129730225,
"ce_loss_7": 3.7636064171791075,
"epoch": 0.263,
"grad_norm": 688.0,
"kl_loss_10": 217.95888977050782,
"kl_loss_2": 2432.091143798828,
"kl_loss_3": 1936.0632568359374,
"kl_loss_7": 738.968881225586,
"learning_rate": 0.0008473291852294987,
"loss": 1361.4943,
"step": 2630
},
{
"ce_loss_10": 3.5451728224754335,
"ce_loss_13": 3.446604347229004,
"ce_loss_2": 4.630346298217773,
"ce_loss_3": 4.3619812488555905,
"ce_loss_7": 3.7699208855628967,
"epoch": 0.264,
"grad_norm": 672.0,
"kl_loss_10": 220.66769561767578,
"kl_loss_2": 2436.2069458007813,
"kl_loss_3": 1956.8639526367188,
"kl_loss_7": 742.7248840332031,
"learning_rate": 0.0008461860885303114,
"loss": 1327.3721,
"step": 2640
},
{
"ce_loss_10": 3.5666414141654967,
"ce_loss_13": 3.4715107679367065,
"ce_loss_2": 4.639662265777588,
"ce_loss_3": 4.371685028076172,
"ce_loss_7": 3.788040292263031,
"epoch": 0.265,
"grad_norm": 656.0,
"kl_loss_10": 216.69636611938478,
"kl_loss_2": 2373.723107910156,
"kl_loss_3": 1899.1220764160157,
"kl_loss_7": 725.1952423095703,
"learning_rate": 0.000845039505741056,
"loss": 1327.8555,
"step": 2650
},
{
"ce_loss_10": 3.5541250467300416,
"ce_loss_13": 3.4555353045463564,
"ce_loss_2": 4.645513963699341,
"ce_loss_3": 4.378093981742859,
"ce_loss_7": 3.7833709001541136,
"epoch": 0.266,
"grad_norm": 668.0,
"kl_loss_10": 224.05798721313477,
"kl_loss_2": 2449.707385253906,
"kl_loss_3": 1967.4787109375,
"kl_loss_7": 750.5478302001953,
"learning_rate": 0.0008438894484078086,
"loss": 1378.657,
"step": 2660
},
{
"ce_loss_10": 3.557729125022888,
"ce_loss_13": 3.4628395080566405,
"ce_loss_2": 4.638984179496765,
"ce_loss_3": 4.374520492553711,
"ce_loss_7": 3.7801038026809692,
"epoch": 0.267,
"grad_norm": 796.0,
"kl_loss_10": 218.22870254516602,
"kl_loss_2": 2393.3899047851564,
"kl_loss_3": 1931.0333312988282,
"kl_loss_7": 732.3969909667969,
"learning_rate": 0.0008427359281116334,
"loss": 1329.4188,
"step": 2670
},
{
"ce_loss_10": 3.4619020819664,
"ce_loss_13": 3.3649930715560914,
"ce_loss_2": 4.586506628990174,
"ce_loss_3": 4.3114288449287415,
"ce_loss_7": 3.6977506399154665,
"epoch": 0.268,
"grad_norm": 560.0,
"kl_loss_10": 218.7227699279785,
"kl_loss_2": 2471.7220703125,
"kl_loss_3": 1986.8973815917968,
"kl_loss_7": 744.8811431884766,
"learning_rate": 0.0008415789564684673,
"loss": 1344.4947,
"step": 2680
},
{
"ce_loss_10": 3.7084735155105593,
"ce_loss_13": 3.610187065601349,
"ce_loss_2": 4.759761667251587,
"ce_loss_3": 4.487373423576355,
"ce_loss_7": 3.9243152022361754,
"epoch": 0.269,
"grad_norm": 756.0,
"kl_loss_10": 223.18955688476564,
"kl_loss_2": 2329.3449951171874,
"kl_loss_3": 1847.8426208496094,
"kl_loss_7": 721.1707153320312,
"learning_rate": 0.0008404185451290017,
"loss": 1296.1146,
"step": 2690
},
{
"ce_loss_10": 3.578732097148895,
"ce_loss_13": 3.4770421504974367,
"ce_loss_2": 4.659151983261109,
"ce_loss_3": 4.38085663318634,
"ce_loss_7": 3.7948765754699707,
"epoch": 0.27,
"grad_norm": 692.0,
"kl_loss_10": 224.61487731933593,
"kl_loss_2": 2417.559912109375,
"kl_loss_3": 1939.3710815429688,
"kl_loss_7": 727.4687561035156,
"learning_rate": 0.0008392547057785661,
"loss": 1317.3512,
"step": 2700
},
{
"ce_loss_10": 3.5002851486206055,
"ce_loss_13": 3.396597516536713,
"ce_loss_2": 4.633592844009399,
"ce_loss_3": 4.365511727333069,
"ce_loss_7": 3.738453209400177,
"epoch": 0.271,
"grad_norm": 732.0,
"kl_loss_10": 231.73975296020507,
"kl_loss_2": 2517.132354736328,
"kl_loss_3": 2044.1573425292968,
"kl_loss_7": 768.5197204589844,
"learning_rate": 0.0008380874501370098,
"loss": 1329.0642,
"step": 2710
},
{
"ce_loss_10": 3.5027819752693174,
"ce_loss_13": 3.4010127544403077,
"ce_loss_2": 4.628546047210693,
"ce_loss_3": 4.359855842590332,
"ce_loss_7": 3.7310682773590087,
"epoch": 0.272,
"grad_norm": 628.0,
"kl_loss_10": 236.13679275512695,
"kl_loss_2": 2503.883825683594,
"kl_loss_3": 2020.1560424804688,
"kl_loss_7": 758.8711700439453,
"learning_rate": 0.0008369167899585841,
"loss": 1363.7068,
"step": 2720
},
{
"ce_loss_10": 3.6181455850601196,
"ce_loss_13": 3.521961879730225,
"ce_loss_2": 4.664963984489441,
"ce_loss_3": 4.396141123771668,
"ce_loss_7": 3.839101779460907,
"epoch": 0.273,
"grad_norm": 636.0,
"kl_loss_10": 223.16615371704103,
"kl_loss_2": 2348.37099609375,
"kl_loss_3": 1879.9346130371093,
"kl_loss_7": 730.2560852050781,
"learning_rate": 0.0008357427370318238,
"loss": 1337.943,
"step": 2730
},
{
"ce_loss_10": 3.571904718875885,
"ce_loss_13": 3.4762736320495606,
"ce_loss_2": 4.677034759521485,
"ce_loss_3": 4.40289398431778,
"ce_loss_7": 3.7918145298957824,
"epoch": 0.274,
"grad_norm": 772.0,
"kl_loss_10": 222.57760772705078,
"kl_loss_2": 2451.346435546875,
"kl_loss_3": 1973.4313354492188,
"kl_loss_7": 730.7371429443359,
"learning_rate": 0.0008345653031794292,
"loss": 1347.6243,
"step": 2740
},
{
"ce_loss_10": 3.5737530469894407,
"ce_loss_13": 3.4740692615509032,
"ce_loss_2": 4.659031462669373,
"ce_loss_3": 4.387771344184875,
"ce_loss_7": 3.792672348022461,
"epoch": 0.275,
"grad_norm": 672.0,
"kl_loss_10": 222.67840805053712,
"kl_loss_2": 2406.277941894531,
"kl_loss_3": 1924.3234985351562,
"kl_loss_7": 730.7620574951172,
"learning_rate": 0.0008333845002581458,
"loss": 1320.2523,
"step": 2750
},
{
"ce_loss_10": 3.498860251903534,
"ce_loss_13": 3.400104033946991,
"ce_loss_2": 4.611243772506714,
"ce_loss_3": 4.342458128929138,
"ce_loss_7": 3.733369469642639,
"epoch": 0.276,
"grad_norm": 644.0,
"kl_loss_10": 224.65963973999024,
"kl_loss_2": 2495.7015869140623,
"kl_loss_3": 2015.1633422851562,
"kl_loss_7": 762.1438781738282,
"learning_rate": 0.0008322003401586462,
"loss": 1364.4495,
"step": 2760
},
{
"ce_loss_10": 3.532784569263458,
"ce_loss_13": 3.440683197975159,
"ce_loss_2": 4.59234881401062,
"ce_loss_3": 4.320498394966125,
"ce_loss_7": 3.7502055525779725,
"epoch": 0.277,
"grad_norm": 724.0,
"kl_loss_10": 211.5718635559082,
"kl_loss_2": 2343.010675048828,
"kl_loss_3": 1873.985821533203,
"kl_loss_7": 709.5305114746094,
"learning_rate": 0.0008310128348054094,
"loss": 1276.2701,
"step": 2770
},
{
"ce_loss_10": 3.5014058470726015,
"ce_loss_13": 3.406921911239624,
"ce_loss_2": 4.603280448913575,
"ce_loss_3": 4.329492771625519,
"ce_loss_7": 3.7248639822006226,
"epoch": 0.278,
"grad_norm": 652.0,
"kl_loss_10": 214.84819107055665,
"kl_loss_2": 2431.7943481445313,
"kl_loss_3": 1951.13515625,
"kl_loss_7": 731.5488677978516,
"learning_rate": 0.0008298219961566008,
"loss": 1329.707,
"step": 2780
},
{
"ce_loss_10": 3.4713513970375063,
"ce_loss_13": 3.3771822571754457,
"ce_loss_2": 4.587963104248047,
"ce_loss_3": 4.32047404050827,
"ce_loss_7": 3.711584746837616,
"epoch": 0.279,
"grad_norm": 644.0,
"kl_loss_10": 217.99566726684571,
"kl_loss_2": 2492.9334106445312,
"kl_loss_3": 2016.429022216797,
"kl_loss_7": 761.9394226074219,
"learning_rate": 0.0008286278362039527,
"loss": 1336.5162,
"step": 2790
},
{
"ce_loss_10": 3.496282184123993,
"ce_loss_13": 3.3998995065689086,
"ce_loss_2": 4.622646689414978,
"ce_loss_3": 4.352741932868957,
"ce_loss_7": 3.7300979018211367,
"epoch": 0.28,
"grad_norm": 592.0,
"kl_loss_10": 216.96264114379883,
"kl_loss_2": 2489.9998046875,
"kl_loss_3": 2008.0425537109375,
"kl_loss_7": 746.8909149169922,
"learning_rate": 0.0008274303669726426,
"loss": 1325.7328,
"step": 2800
},
{
"ce_loss_10": 3.4048958301544188,
"ce_loss_13": 3.3045366764068604,
"ce_loss_2": 4.5690556287765505,
"ce_loss_3": 4.298348617553711,
"ce_loss_7": 3.6378442645072937,
"epoch": 0.281,
"grad_norm": 684.0,
"kl_loss_10": 218.18540115356444,
"kl_loss_2": 2561.6716186523436,
"kl_loss_3": 2080.7119262695314,
"kl_loss_7": 743.8994750976562,
"learning_rate": 0.0008262296005211721,
"loss": 1337.6219,
"step": 2810
},
{
"ce_loss_10": 3.5260050296783447,
"ce_loss_13": 3.428924763202667,
"ce_loss_2": 4.642134022712708,
"ce_loss_3": 4.368475294113159,
"ce_loss_7": 3.7550152063369753,
"epoch": 0.282,
"grad_norm": 600.0,
"kl_loss_10": 216.54320907592773,
"kl_loss_2": 2444.2397338867186,
"kl_loss_3": 1975.6794677734374,
"kl_loss_7": 734.2523712158203,
"learning_rate": 0.0008250255489412463,
"loss": 1322.247,
"step": 2820
},
{
"ce_loss_10": 3.629942464828491,
"ce_loss_13": 3.532360863685608,
"ce_loss_2": 4.7163821935653685,
"ce_loss_3": 4.444535660743713,
"ce_loss_7": 3.846136474609375,
"epoch": 0.283,
"grad_norm": 628.0,
"kl_loss_10": 214.22548904418946,
"kl_loss_2": 2410.5466918945312,
"kl_loss_3": 1930.2673034667969,
"kl_loss_7": 714.048681640625,
"learning_rate": 0.0008238182243576511,
"loss": 1325.0883,
"step": 2830
},
{
"ce_loss_10": 3.5913167357444764,
"ce_loss_13": 3.5031124353408813,
"ce_loss_2": 4.611292886734009,
"ce_loss_3": 4.339277529716492,
"ce_loss_7": 3.796242094039917,
"epoch": 0.284,
"grad_norm": 620.0,
"kl_loss_10": 208.4808135986328,
"kl_loss_2": 2294.337286376953,
"kl_loss_3": 1814.4247924804688,
"kl_loss_7": 695.5996673583984,
"learning_rate": 0.0008226076389281315,
"loss": 1277.3086,
"step": 2840
},
{
"ce_loss_10": 3.632950210571289,
"ce_loss_13": 3.542364180088043,
"ce_loss_2": 4.697378945350647,
"ce_loss_3": 4.428278470039368,
"ce_loss_7": 3.8434852004051208,
"epoch": 0.285,
"grad_norm": 592.0,
"kl_loss_10": 210.92243499755858,
"kl_loss_2": 2375.7556274414064,
"kl_loss_3": 1902.3470825195313,
"kl_loss_7": 701.8125823974609,
"learning_rate": 0.0008213938048432696,
"loss": 1285.7082,
"step": 2850
},
{
"ce_loss_10": 3.561896014213562,
"ce_loss_13": 3.4673075318336486,
"ce_loss_2": 4.635823488235474,
"ce_loss_3": 4.3728371381759645,
"ce_loss_7": 3.780589020252228,
"epoch": 0.286,
"grad_norm": 616.0,
"kl_loss_10": 216.6977653503418,
"kl_loss_2": 2390.834924316406,
"kl_loss_3": 1924.6818054199218,
"kl_loss_7": 726.8750396728516,
"learning_rate": 0.0008201767343263612,
"loss": 1324.6124,
"step": 2860
},
{
"ce_loss_10": 3.4997401237487793,
"ce_loss_13": 3.4044744968414307,
"ce_loss_2": 4.604890465736389,
"ce_loss_3": 4.338030159473419,
"ce_loss_7": 3.7291186928749083,
"epoch": 0.287,
"grad_norm": 616.0,
"kl_loss_10": 213.92771530151367,
"kl_loss_2": 2444.1182250976562,
"kl_loss_3": 1971.163818359375,
"kl_loss_7": 731.3478240966797,
"learning_rate": 0.0008189564396332927,
"loss": 1291.9086,
"step": 2870
},
{
"ce_loss_10": 3.480617916584015,
"ce_loss_13": 3.388473629951477,
"ce_loss_2": 4.600887513160705,
"ce_loss_3": 4.323178672790528,
"ce_loss_7": 3.7104127168655396,
"epoch": 0.288,
"grad_norm": 668.0,
"kl_loss_10": 212.88904190063477,
"kl_loss_2": 2441.765899658203,
"kl_loss_3": 1961.8893615722657,
"kl_loss_7": 728.4373413085938,
"learning_rate": 0.0008177329330524181,
"loss": 1342.4608,
"step": 2880
},
{
"ce_loss_10": 3.5435534834861757,
"ce_loss_13": 3.4502355217933656,
"ce_loss_2": 4.6120285987854,
"ce_loss_3": 4.346097040176391,
"ce_loss_7": 3.762561321258545,
"epoch": 0.289,
"grad_norm": 648.0,
"kl_loss_10": 212.22290649414063,
"kl_loss_2": 2358.1793823242188,
"kl_loss_3": 1890.9413208007813,
"kl_loss_7": 714.5174743652344,
"learning_rate": 0.0008165062269044352,
"loss": 1305.3231,
"step": 2890
},
{
"ce_loss_10": 3.4996484994888304,
"ce_loss_13": 3.401354455947876,
"ce_loss_2": 4.609268927574158,
"ce_loss_3": 4.3294067740440365,
"ce_loss_7": 3.723408377170563,
"epoch": 0.29,
"grad_norm": 660.0,
"kl_loss_10": 216.81241302490236,
"kl_loss_2": 2451.4824340820314,
"kl_loss_3": 1968.3146179199218,
"kl_loss_7": 729.5468353271484,
"learning_rate": 0.0008152763335422613,
"loss": 1337.7896,
"step": 2900
},
{
"ce_loss_10": 3.4890666246414184,
"ce_loss_13": 3.392501711845398,
"ce_loss_2": 4.58982219696045,
"ce_loss_3": 4.312074947357178,
"ce_loss_7": 3.713588225841522,
"epoch": 0.291,
"grad_norm": 664.0,
"kl_loss_10": 218.38675384521486,
"kl_loss_2": 2445.5037841796875,
"kl_loss_3": 1949.8568176269532,
"kl_loss_7": 729.6879028320312,
"learning_rate": 0.0008140432653509088,
"loss": 1317.595,
"step": 2910
},
{
"ce_loss_10": 3.538894033432007,
"ce_loss_13": 3.4391178250312806,
"ce_loss_2": 4.60951418876648,
"ce_loss_3": 4.337265026569367,
"ce_loss_7": 3.7542282700538636,
"epoch": 0.292,
"grad_norm": 576.0,
"kl_loss_10": 218.85857162475585,
"kl_loss_2": 2397.1072692871094,
"kl_loss_3": 1916.8259216308593,
"kl_loss_7": 718.4374481201172,
"learning_rate": 0.0008128070347473608,
"loss": 1302.2107,
"step": 2920
},
{
"ce_loss_10": 3.5429399847984313,
"ce_loss_13": 3.447796130180359,
"ce_loss_2": 4.665868854522705,
"ce_loss_3": 4.389448404312134,
"ce_loss_7": 3.7667205929756165,
"epoch": 0.293,
"grad_norm": 664.0,
"kl_loss_10": 216.54725646972656,
"kl_loss_2": 2487.7160583496093,
"kl_loss_3": 2004.9421325683593,
"kl_loss_7": 736.1060913085937,
"learning_rate": 0.0008115676541804455,
"loss": 1333.5637,
"step": 2930
},
{
"ce_loss_10": 3.5453550815582275,
"ce_loss_13": 3.4535977363586428,
"ce_loss_2": 4.623500943183899,
"ce_loss_3": 4.348728823661804,
"ce_loss_7": 3.760838878154755,
"epoch": 0.294,
"grad_norm": 580.0,
"kl_loss_10": 209.94191284179686,
"kl_loss_2": 2400.48662109375,
"kl_loss_3": 1909.5526062011718,
"kl_loss_7": 710.1752807617188,
"learning_rate": 0.0008103251361307119,
"loss": 1325.5172,
"step": 2940
},
{
"ce_loss_10": 3.578377163410187,
"ce_loss_13": 3.4808244347572326,
"ce_loss_2": 4.6591003894805905,
"ce_loss_3": 4.395820617675781,
"ce_loss_7": 3.793817377090454,
"epoch": 0.295,
"grad_norm": 616.0,
"kl_loss_10": 214.81473617553712,
"kl_loss_2": 2396.3223205566405,
"kl_loss_3": 1926.4922485351562,
"kl_loss_7": 722.0272766113281,
"learning_rate": 0.0008090794931103026,
"loss": 1300.3234,
"step": 2950
},
{
"ce_loss_10": 3.566417765617371,
"ce_loss_13": 3.475232172012329,
"ce_loss_2": 4.628555154800415,
"ce_loss_3": 4.358175444602966,
"ce_loss_7": 3.7831589698791506,
"epoch": 0.296,
"grad_norm": 692.0,
"kl_loss_10": 209.84390869140626,
"kl_loss_2": 2350.2305419921877,
"kl_loss_3": 1877.9652465820313,
"kl_loss_7": 713.7039794921875,
"learning_rate": 0.0008078307376628291,
"loss": 1303.6331,
"step": 2960
},
{
"ce_loss_10": 3.6232991099357603,
"ce_loss_13": 3.534627139568329,
"ce_loss_2": 4.6475036382675174,
"ce_loss_3": 4.389086437225342,
"ce_loss_7": 3.83059047460556,
"epoch": 0.297,
"grad_norm": 644.0,
"kl_loss_10": 205.1537940979004,
"kl_loss_2": 2274.82734375,
"kl_loss_3": 1823.2497436523438,
"kl_loss_7": 686.9072265625,
"learning_rate": 0.000806578882363245,
"loss": 1259.2264,
"step": 2970
},
{
"ce_loss_10": 3.536562275886536,
"ce_loss_13": 3.447048234939575,
"ce_loss_2": 4.597748541831971,
"ce_loss_3": 4.3311933994293215,
"ce_loss_7": 3.7559500217437742,
"epoch": 0.298,
"grad_norm": 736.0,
"kl_loss_10": 208.43729248046876,
"kl_loss_2": 2344.390216064453,
"kl_loss_3": 1878.6112243652344,
"kl_loss_7": 714.4485260009766,
"learning_rate": 0.0008053239398177191,
"loss": 1329.3172,
"step": 2980
},
{
"ce_loss_10": 3.524178981781006,
"ce_loss_13": 3.4312392354011534,
"ce_loss_2": 4.604809284210205,
"ce_loss_3": 4.337883043289184,
"ce_loss_7": 3.7429209470748903,
"epoch": 0.299,
"grad_norm": 684.0,
"kl_loss_10": 211.32650604248047,
"kl_loss_2": 2394.308056640625,
"kl_loss_3": 1917.52822265625,
"kl_loss_7": 709.9231262207031,
"learning_rate": 0.0008040659226635089,
"loss": 1341.8297,
"step": 2990
},
{
"ce_loss_10": 3.65326806306839,
"ce_loss_13": 3.555258011817932,
"ce_loss_2": 4.710744786262512,
"ce_loss_3": 4.444170761108398,
"ce_loss_7": 3.8668533086776735,
"epoch": 0.3,
"grad_norm": 640.0,
"kl_loss_10": 219.24570388793944,
"kl_loss_2": 2376.9404907226562,
"kl_loss_3": 1902.857159423828,
"kl_loss_7": 725.9926879882812,
"learning_rate": 0.0008028048435688333,
"loss": 1298.4502,
"step": 3000
},
{
"ce_loss_10": 3.521394634246826,
"ce_loss_13": 3.4270112991333006,
"ce_loss_2": 4.624356460571289,
"ce_loss_3": 4.355751609802246,
"ce_loss_7": 3.7494575500488283,
"epoch": 0.301,
"grad_norm": 716.0,
"kl_loss_10": 217.2972724914551,
"kl_loss_2": 2452.999304199219,
"kl_loss_3": 1985.1250549316405,
"kl_loss_7": 732.1629119873047,
"learning_rate": 0.0008015407152327448,
"loss": 1335.19,
"step": 3010
},
{
"ce_loss_10": 3.5699279427528383,
"ce_loss_13": 3.475005257129669,
"ce_loss_2": 4.65969865322113,
"ce_loss_3": 4.38304386138916,
"ce_loss_7": 3.784406042098999,
"epoch": 0.302,
"grad_norm": 620.0,
"kl_loss_10": 215.99359130859375,
"kl_loss_2": 2432.162463378906,
"kl_loss_3": 1951.8839721679688,
"kl_loss_7": 718.2368713378906,
"learning_rate": 0.0008002735503850016,
"loss": 1332.6505,
"step": 3020
},
{
"ce_loss_10": 3.4684691429138184,
"ce_loss_13": 3.367643666267395,
"ce_loss_2": 4.5924430847167965,
"ce_loss_3": 4.30932047367096,
"ce_loss_7": 3.6915883660316466,
"epoch": 0.303,
"grad_norm": 636.0,
"kl_loss_10": 224.01161422729493,
"kl_loss_2": 2494.453234863281,
"kl_loss_3": 2004.73359375,
"kl_loss_7": 736.367529296875,
"learning_rate": 0.0007990033617859396,
"loss": 1348.4062,
"step": 3030
},
{
"ce_loss_10": 3.5133005499839784,
"ce_loss_13": 3.417665791511536,
"ce_loss_2": 4.581400918960571,
"ce_loss_3": 4.318250679969788,
"ce_loss_7": 3.734131360054016,
"epoch": 0.304,
"grad_norm": 692.0,
"kl_loss_10": 218.55305099487305,
"kl_loss_2": 2367.1648193359374,
"kl_loss_3": 1894.6960754394531,
"kl_loss_7": 712.4279693603515,
"learning_rate": 0.000797730162226344,
"loss": 1274.1975,
"step": 3040
},
{
"ce_loss_10": 3.540754234790802,
"ce_loss_13": 3.4410573482513427,
"ce_loss_2": 4.607666325569153,
"ce_loss_3": 4.33906877040863,
"ce_loss_7": 3.76459002494812,
"epoch": 0.305,
"grad_norm": 692.0,
"kl_loss_10": 221.26933517456055,
"kl_loss_2": 2377.095458984375,
"kl_loss_3": 1910.9453735351562,
"kl_loss_7": 729.3416778564454,
"learning_rate": 0.0007964539645273203,
"loss": 1293.3233,
"step": 3050
},
{
"ce_loss_10": 3.549929714202881,
"ce_loss_13": 3.4547195076942443,
"ce_loss_2": 4.595946025848389,
"ce_loss_3": 4.332681286334991,
"ce_loss_7": 3.7608805656433106,
"epoch": 0.306,
"grad_norm": 608.0,
"kl_loss_10": 214.02068862915038,
"kl_loss_2": 2324.1172485351562,
"kl_loss_3": 1866.7198425292968,
"kl_loss_7": 705.0489013671875,
"learning_rate": 0.000795174781540165,
"loss": 1301.7614,
"step": 3060
},
{
"ce_loss_10": 3.626460921764374,
"ce_loss_13": 3.5295538663864137,
"ce_loss_2": 4.639704465866089,
"ce_loss_3": 4.383927941322327,
"ce_loss_7": 3.8362658858299254,
"epoch": 0.307,
"grad_norm": 644.0,
"kl_loss_10": 215.03676071166993,
"kl_loss_2": 2264.9541259765624,
"kl_loss_3": 1824.0037841796875,
"kl_loss_7": 696.3071411132812,
"learning_rate": 0.0007938926261462366,
"loss": 1288.9521,
"step": 3070
},
{
"ce_loss_10": 3.5775561928749084,
"ce_loss_13": 3.480459380149841,
"ce_loss_2": 4.618080592155456,
"ce_loss_3": 4.350315952301026,
"ce_loss_7": 3.7854344248771667,
"epoch": 0.308,
"grad_norm": 648.0,
"kl_loss_10": 216.656893157959,
"kl_loss_2": 2357.475067138672,
"kl_loss_3": 1888.0174133300782,
"kl_loss_7": 712.7872009277344,
"learning_rate": 0.0007926075112568258,
"loss": 1316.9054,
"step": 3080
},
{
"ce_loss_10": 3.5692449688911436,
"ce_loss_13": 3.4759126543998717,
"ce_loss_2": 4.623606491088867,
"ce_loss_3": 4.366301465034485,
"ce_loss_7": 3.78162659406662,
"epoch": 0.309,
"grad_norm": 560.0,
"kl_loss_10": 213.1074462890625,
"kl_loss_2": 2357.0720764160155,
"kl_loss_3": 1902.1767517089843,
"kl_loss_7": 709.6952423095703,
"learning_rate": 0.0007913194498130252,
"loss": 1281.0172,
"step": 3090
},
{
"ce_loss_10": 3.494074010848999,
"ce_loss_13": 3.400245749950409,
"ce_loss_2": 4.5784650325775145,
"ce_loss_3": 4.316486406326294,
"ce_loss_7": 3.7143809318542482,
"epoch": 0.31,
"grad_norm": 736.0,
"kl_loss_10": 216.9530891418457,
"kl_loss_2": 2388.186309814453,
"kl_loss_3": 1924.5094665527345,
"kl_loss_7": 718.4751098632812,
"learning_rate": 0.0007900284547855992,
"loss": 1312.7211,
"step": 3100
},
{
"ce_loss_10": 3.5040755391120912,
"ce_loss_13": 3.409269428253174,
"ce_loss_2": 4.549410009384156,
"ce_loss_3": 4.294600343704223,
"ce_loss_7": 3.7213049054145815,
"epoch": 0.311,
"grad_norm": 800.0,
"kl_loss_10": 210.81134338378905,
"kl_loss_2": 2329.0393676757812,
"kl_loss_3": 1876.9636657714843,
"kl_loss_7": 708.2128143310547,
"learning_rate": 0.0007887345391748532,
"loss": 1312.8156,
"step": 3110
},
{
"ce_loss_10": 3.634432864189148,
"ce_loss_13": 3.543325686454773,
"ce_loss_2": 4.651146030426025,
"ce_loss_3": 4.387193036079407,
"ce_loss_7": 3.8459346532821654,
"epoch": 0.312,
"grad_norm": 1168.0,
"kl_loss_10": 212.2933433532715,
"kl_loss_2": 2284.2329711914062,
"kl_loss_3": 1829.6757873535157,
"kl_loss_7": 706.4437377929687,
"learning_rate": 0.0007874377160105036,
"loss": 1259.3671,
"step": 3120
},
{
"ce_loss_10": 3.530054819583893,
"ce_loss_13": 3.4342761754989626,
"ce_loss_2": 4.628887629508972,
"ce_loss_3": 4.362399673461914,
"ce_loss_7": 3.7490867018699645,
"epoch": 0.313,
"grad_norm": 608.0,
"kl_loss_10": 212.55482711791993,
"kl_loss_2": 2429.394366455078,
"kl_loss_3": 1971.4875915527343,
"kl_loss_7": 728.8083862304687,
"learning_rate": 0.0007861379983515449,
"loss": 1354.4891,
"step": 3130
},
{
"ce_loss_10": 3.6109140157699584,
"ce_loss_13": 3.5200807809829713,
"ce_loss_2": 4.655977535247803,
"ce_loss_3": 4.39032473564148,
"ce_loss_7": 3.831193280220032,
"epoch": 0.314,
"grad_norm": 592.0,
"kl_loss_10": 209.2868881225586,
"kl_loss_2": 2336.8374755859377,
"kl_loss_3": 1868.733642578125,
"kl_loss_7": 717.5943817138672,
"learning_rate": 0.0007848353992861195,
"loss": 1273.946,
"step": 3140
},
{
"ce_loss_10": 3.6957940101623534,
"ce_loss_13": 3.595130515098572,
"ce_loss_2": 4.7389120101928714,
"ce_loss_3": 4.469233250617981,
"ce_loss_7": 3.926785933971405,
"epoch": 0.315,
"grad_norm": 888.0,
"kl_loss_10": 223.79472427368165,
"kl_loss_2": 2334.7629638671874,
"kl_loss_3": 1867.655010986328,
"kl_loss_7": 743.88798828125,
"learning_rate": 0.0007835299319313853,
"loss": 1303.1903,
"step": 3150
},
{
"ce_loss_10": 3.5704684495925902,
"ce_loss_13": 3.476880931854248,
"ce_loss_2": 4.606448101997375,
"ce_loss_3": 4.3400969982147215,
"ce_loss_7": 3.7886768341064454,
"epoch": 0.316,
"grad_norm": 700.0,
"kl_loss_10": 211.18966979980468,
"kl_loss_2": 2323.449572753906,
"kl_loss_3": 1851.584783935547,
"kl_loss_7": 721.3533996582031,
"learning_rate": 0.0007822216094333848,
"loss": 1322.3376,
"step": 3160
},
{
"ce_loss_10": 3.5810484290122986,
"ce_loss_13": 3.4873368740081787,
"ce_loss_2": 4.65300440788269,
"ce_loss_3": 4.387210464477539,
"ce_loss_7": 3.807372546195984,
"epoch": 0.317,
"grad_norm": 752.0,
"kl_loss_10": 212.44315567016602,
"kl_loss_2": 2384.089465332031,
"kl_loss_3": 1914.7229309082031,
"kl_loss_7": 731.2730682373046,
"learning_rate": 0.0007809104449669101,
"loss": 1294.9703,
"step": 3170
},
{
"ce_loss_10": 3.532199835777283,
"ce_loss_13": 3.4395654439926147,
"ce_loss_2": 4.584282898902893,
"ce_loss_3": 4.30876350402832,
"ce_loss_7": 3.7615070223808287,
"epoch": 0.318,
"grad_norm": 916.0,
"kl_loss_10": 207.75176467895508,
"kl_loss_2": 2339.527239990234,
"kl_loss_3": 1858.5612731933593,
"kl_loss_7": 730.9345184326172,
"learning_rate": 0.0007795964517353734,
"loss": 1278.7686,
"step": 3180
},
{
"ce_loss_10": 3.518466317653656,
"ce_loss_13": 3.426977741718292,
"ce_loss_2": 4.596842670440674,
"ce_loss_3": 4.325531184673309,
"ce_loss_7": 3.750142526626587,
"epoch": 0.319,
"grad_norm": 648.0,
"kl_loss_10": 211.74872894287108,
"kl_loss_2": 2403.7151733398437,
"kl_loss_3": 1931.1865478515624,
"kl_loss_7": 753.2544128417969,
"learning_rate": 0.000778279642970672,
"loss": 1282.6858,
"step": 3190
},
{
"ce_loss_10": 3.5179845094680786,
"ce_loss_13": 3.428935539722443,
"ce_loss_2": 4.562283158302307,
"ce_loss_3": 4.295236802101135,
"ce_loss_7": 3.7340755701065063,
"epoch": 0.32,
"grad_norm": 904.0,
"kl_loss_10": 205.840421295166,
"kl_loss_2": 2345.048876953125,
"kl_loss_3": 1866.877911376953,
"kl_loss_7": 720.2934661865235,
"learning_rate": 0.0007769600319330552,
"loss": 1264.9217,
"step": 3200
},
{
"ce_loss_10": 3.554915177822113,
"ce_loss_13": 3.466732156276703,
"ce_loss_2": 4.653983449935913,
"ce_loss_3": 4.384042191505432,
"ce_loss_7": 3.7919634103775026,
"epoch": 0.321,
"grad_norm": 708.0,
"kl_loss_10": 205.94034423828126,
"kl_loss_2": 2414.151336669922,
"kl_loss_3": 1938.7024047851562,
"kl_loss_7": 735.4687530517579,
"learning_rate": 0.0007756376319109917,
"loss": 1299.3125,
"step": 3210
},
{
"ce_loss_10": 3.601811099052429,
"ce_loss_13": 3.513204276561737,
"ce_loss_2": 4.643903732299805,
"ce_loss_3": 4.372084999084473,
"ce_loss_7": 3.82387717962265,
"epoch": 0.322,
"grad_norm": 856.0,
"kl_loss_10": 205.92393646240234,
"kl_loss_2": 2310.6433349609374,
"kl_loss_3": 1837.0882263183594,
"kl_loss_7": 727.0832824707031,
"learning_rate": 0.0007743124562210351,
"loss": 1252.0768,
"step": 3220
},
{
"ce_loss_10": 3.613737678527832,
"ce_loss_13": 3.5243070006370543,
"ce_loss_2": 4.6432843685150145,
"ce_loss_3": 4.373880839347839,
"ce_loss_7": 3.835604417324066,
"epoch": 0.323,
"grad_norm": 804.0,
"kl_loss_10": 206.8451774597168,
"kl_loss_2": 2302.7849548339846,
"kl_loss_3": 1831.265850830078,
"kl_loss_7": 718.6182281494141,
"learning_rate": 0.0007729845182076895,
"loss": 1281.717,
"step": 3230
},
{
"ce_loss_10": 3.54460072517395,
"ce_loss_13": 3.458022344112396,
"ce_loss_2": 4.567643523216248,
"ce_loss_3": 4.304308319091797,
"ce_loss_7": 3.7557874441146852,
"epoch": 0.324,
"grad_norm": 780.0,
"kl_loss_10": 202.53059158325195,
"kl_loss_2": 2275.4922607421877,
"kl_loss_3": 1814.183154296875,
"kl_loss_7": 706.1778442382813,
"learning_rate": 0.0007716538312432765,
"loss": 1299.5142,
"step": 3240
},
{
"ce_loss_10": 3.5034128069877624,
"ce_loss_13": 3.4109013199806215,
"ce_loss_2": 4.59195454120636,
"ce_loss_3": 4.316867542266846,
"ce_loss_7": 3.7340264201164244,
"epoch": 0.325,
"grad_norm": 620.0,
"kl_loss_10": 212.30709838867188,
"kl_loss_2": 2399.0862243652346,
"kl_loss_3": 1912.8944152832032,
"kl_loss_7": 738.4792083740234,
"learning_rate": 0.0007703204087277988,
"loss": 1308.0572,
"step": 3250
},
{
"ce_loss_10": 3.60279586315155,
"ce_loss_13": 3.5141580939292907,
"ce_loss_2": 4.619040894508362,
"ce_loss_3": 4.348745739459991,
"ce_loss_7": 3.81355699300766,
"epoch": 0.326,
"grad_norm": 728.0,
"kl_loss_10": 202.46756286621093,
"kl_loss_2": 2248.315087890625,
"kl_loss_3": 1773.8885681152344,
"kl_loss_7": 686.2587005615235,
"learning_rate": 0.0007689842640888063,
"loss": 1245.8748,
"step": 3260
},
{
"ce_loss_10": 3.6051684260368346,
"ce_loss_13": 3.5150891542434692,
"ce_loss_2": 4.619964861869812,
"ce_loss_3": 4.360685467720032,
"ce_loss_7": 3.8154699206352234,
"epoch": 0.327,
"grad_norm": 684.0,
"kl_loss_10": 208.96502685546875,
"kl_loss_2": 2265.3287658691406,
"kl_loss_3": 1811.821759033203,
"kl_loss_7": 703.9406219482422,
"learning_rate": 0.0007676454107812607,
"loss": 1264.3093,
"step": 3270
},
{
"ce_loss_10": 3.537815499305725,
"ce_loss_13": 3.444960331916809,
"ce_loss_2": 4.608094549179077,
"ce_loss_3": 4.3397119522094725,
"ce_loss_7": 3.7506736159324645,
"epoch": 0.328,
"grad_norm": 616.0,
"kl_loss_10": 211.32426528930665,
"kl_loss_2": 2388.8077392578125,
"kl_loss_3": 1915.7237182617187,
"kl_loss_7": 707.672705078125,
"learning_rate": 0.0007663038622873999,
"loss": 1279.8335,
"step": 3280
},
{
"ce_loss_10": 3.574945878982544,
"ce_loss_13": 3.4833101868629455,
"ce_loss_2": 4.628302264213562,
"ce_loss_3": 4.366952037811279,
"ce_loss_7": 3.784594464302063,
"epoch": 0.329,
"grad_norm": 596.0,
"kl_loss_10": 211.56422576904296,
"kl_loss_2": 2351.5840576171877,
"kl_loss_3": 1879.0997131347656,
"kl_loss_7": 694.7927307128906,
"learning_rate": 0.0007649596321166025,
"loss": 1256.8023,
"step": 3290
},
{
"ce_loss_10": 3.4788912653923036,
"ce_loss_13": 3.3914729714393617,
"ce_loss_2": 4.513116896152496,
"ce_loss_3": 4.253373873233795,
"ce_loss_7": 3.6928447008132936,
"epoch": 0.33,
"grad_norm": 600.0,
"kl_loss_10": 203.03155212402345,
"kl_loss_2": 2285.9035522460936,
"kl_loss_3": 1828.6068603515625,
"kl_loss_7": 691.3944427490235,
"learning_rate": 0.0007636127338052513,
"loss": 1273.8033,
"step": 3300
},
{
"ce_loss_10": 3.5868964433670043,
"ce_loss_13": 3.49528044462204,
"ce_loss_2": 4.663711452484131,
"ce_loss_3": 4.39831657409668,
"ce_loss_7": 3.804142189025879,
"epoch": 0.331,
"grad_norm": 624.0,
"kl_loss_10": 211.30522232055665,
"kl_loss_2": 2397.5427856445312,
"kl_loss_3": 1927.950506591797,
"kl_loss_7": 706.7799133300781,
"learning_rate": 0.0007622631809165971,
"loss": 1277.9496,
"step": 3310
},
{
"ce_loss_10": 3.582921600341797,
"ce_loss_13": 3.4965414881706236,
"ce_loss_2": 4.58232958316803,
"ce_loss_3": 4.323147928714752,
"ce_loss_7": 3.783066177368164,
"epoch": 0.332,
"grad_norm": 688.0,
"kl_loss_10": 197.47354049682616,
"kl_loss_2": 2216.269598388672,
"kl_loss_3": 1760.1223999023437,
"kl_loss_7": 664.7514923095703,
"learning_rate": 0.000760910987040623,
"loss": 1245.9068,
"step": 3320
},
{
"ce_loss_10": 3.5663990497589113,
"ce_loss_13": 3.474509632587433,
"ce_loss_2": 4.641107606887817,
"ce_loss_3": 4.369283008575439,
"ce_loss_7": 3.78259996175766,
"epoch": 0.333,
"grad_norm": 616.0,
"kl_loss_10": 210.06242904663085,
"kl_loss_2": 2402.0831298828125,
"kl_loss_3": 1926.8569946289062,
"kl_loss_7": 714.4321014404297,
"learning_rate": 0.000759556165793906,
"loss": 1272.2351,
"step": 3330
},
{
"ce_loss_10": 3.5859936118125915,
"ce_loss_13": 3.4947105884552,
"ce_loss_2": 4.635438013076782,
"ce_loss_3": 4.3713214635849,
"ce_loss_7": 3.79847708940506,
"epoch": 0.334,
"grad_norm": 600.0,
"kl_loss_10": 207.2735107421875,
"kl_loss_2": 2336.860705566406,
"kl_loss_3": 1864.5109497070312,
"kl_loss_7": 698.3573028564454,
"learning_rate": 0.000758198730819481,
"loss": 1291.4691,
"step": 3340
},
{
"ce_loss_10": 3.530641829967499,
"ce_loss_13": 3.44451619386673,
"ce_loss_2": 4.589142799377441,
"ce_loss_3": 4.3217404961586,
"ce_loss_7": 3.7362788200378416,
"epoch": 0.335,
"grad_norm": 624.0,
"kl_loss_10": 202.07082290649413,
"kl_loss_2": 2360.776690673828,
"kl_loss_3": 1886.3787536621094,
"kl_loss_7": 695.4340698242188,
"learning_rate": 0.0007568386957867032,
"loss": 1283.006,
"step": 3350
},
{
"ce_loss_10": 3.6058520078659058,
"ce_loss_13": 3.5129651188850404,
"ce_loss_2": 4.643417167663574,
"ce_loss_3": 4.37364354133606,
"ce_loss_7": 3.813083219528198,
"epoch": 0.336,
"grad_norm": 784.0,
"kl_loss_10": 207.97874145507814,
"kl_loss_2": 2295.3684020996093,
"kl_loss_3": 1825.3164672851562,
"kl_loss_7": 687.7690948486328,
"learning_rate": 0.0007554760743911103,
"loss": 1276.5395,
"step": 3360
},
{
"ce_loss_10": 3.5018799662590028,
"ce_loss_13": 3.4133763194084166,
"ce_loss_2": 4.551569533348084,
"ce_loss_3": 4.283941590785981,
"ce_loss_7": 3.704894995689392,
"epoch": 0.337,
"grad_norm": 644.0,
"kl_loss_10": 201.99492797851562,
"kl_loss_2": 2352.8791320800783,
"kl_loss_3": 1880.3864440917969,
"kl_loss_7": 682.2205291748047,
"learning_rate": 0.0007541108803542846,
"loss": 1306.1851,
"step": 3370
},
{
"ce_loss_10": 3.5562949419021606,
"ce_loss_13": 3.467681646347046,
"ce_loss_2": 4.61066963672638,
"ce_loss_3": 4.3377085566520694,
"ce_loss_7": 3.7622047662734985,
"epoch": 0.338,
"grad_norm": 632.0,
"kl_loss_10": 205.39780044555664,
"kl_loss_2": 2363.417413330078,
"kl_loss_3": 1877.7716186523437,
"kl_loss_7": 681.8937408447266,
"learning_rate": 0.0007527431274237149,
"loss": 1343.544,
"step": 3380
},
{
"ce_loss_10": 3.5283817052841187,
"ce_loss_13": 3.4397483229637147,
"ce_loss_2": 4.570840525627136,
"ce_loss_3": 4.304589962959289,
"ce_loss_7": 3.7283903479576113,
"epoch": 0.339,
"grad_norm": 572.0,
"kl_loss_10": 203.6351058959961,
"kl_loss_2": 2336.9517456054687,
"kl_loss_3": 1865.9661682128906,
"kl_loss_7": 677.4322174072265,
"learning_rate": 0.0007513728293726579,
"loss": 1277.8105,
"step": 3390
},
{
"ce_loss_10": 3.644584619998932,
"ce_loss_13": 3.556074547767639,
"ce_loss_2": 4.664924669265747,
"ce_loss_3": 4.399448752403259,
"ce_loss_7": 3.8509042620658875,
"epoch": 0.34,
"grad_norm": 644.0,
"kl_loss_10": 203.6712448120117,
"kl_loss_2": 2293.6278381347656,
"kl_loss_3": 1826.5482238769532,
"kl_loss_7": 683.2333282470703,
"learning_rate": 0.00075,
"loss": 1246.8078,
"step": 3400
},
{
"ce_loss_10": 3.6313098788261415,
"ce_loss_13": 3.5418556571006774,
"ce_loss_2": 4.690052318572998,
"ce_loss_3": 4.416153597831726,
"ce_loss_7": 3.843977117538452,
"epoch": 0.341,
"grad_norm": 644.0,
"kl_loss_10": 205.15843811035157,
"kl_loss_2": 2335.1500244140625,
"kl_loss_3": 1861.3693969726562,
"kl_loss_7": 693.7598510742188,
"learning_rate": 0.0007486246531301177,
"loss": 1258.7575,
"step": 3410
},
{
"ce_loss_10": 3.443863534927368,
"ce_loss_13": 3.3510751008987425,
"ce_loss_2": 4.5022605657577515,
"ce_loss_3": 4.230472648143769,
"ce_loss_7": 3.6559174418449403,
"epoch": 0.342,
"grad_norm": 664.0,
"kl_loss_10": 202.74062805175782,
"kl_loss_2": 2345.9876220703127,
"kl_loss_3": 1864.947119140625,
"kl_loss_7": 688.1812042236328,
"learning_rate": 0.0007472468026127384,
"loss": 1260.6121,
"step": 3420
},
{
"ce_loss_10": 3.5721543431282043,
"ce_loss_13": 3.4770930409431458,
"ce_loss_2": 4.665278792381287,
"ce_loss_3": 4.404071187973022,
"ce_loss_7": 3.788026750087738,
"epoch": 0.343,
"grad_norm": 712.0,
"kl_loss_10": 214.17584533691405,
"kl_loss_2": 2439.352404785156,
"kl_loss_3": 1972.9944885253906,
"kl_loss_7": 720.6741455078125,
"learning_rate": 0.000745866462322802,
"loss": 1320.8363,
"step": 3430
},
{
"ce_loss_10": 3.560039293766022,
"ce_loss_13": 3.4741207122802735,
"ce_loss_2": 4.592786359786987,
"ce_loss_3": 4.334179782867432,
"ce_loss_7": 3.7690793752670286,
"epoch": 0.344,
"grad_norm": 700.0,
"kl_loss_10": 200.53601684570313,
"kl_loss_2": 2283.709338378906,
"kl_loss_3": 1835.7159240722656,
"kl_loss_7": 673.5906463623047,
"learning_rate": 0.0007444836461603195,
"loss": 1261.9196,
"step": 3440
},
{
"ce_loss_10": 3.6245110511779783,
"ce_loss_13": 3.5312341451644897,
"ce_loss_2": 4.6719811201095585,
"ce_loss_3": 4.406914234161377,
"ce_loss_7": 3.8266146540641786,
"epoch": 0.345,
"grad_norm": 648.0,
"kl_loss_10": 214.16654891967772,
"kl_loss_2": 2362.7242797851563,
"kl_loss_3": 1898.2087890625,
"kl_loss_7": 704.3192626953125,
"learning_rate": 0.0007430983680502344,
"loss": 1301.0338,
"step": 3450
},
{
"ce_loss_10": 3.4667457938194275,
"ce_loss_13": 3.377876877784729,
"ce_loss_2": 4.545617830753327,
"ce_loss_3": 4.272857880592346,
"ce_loss_7": 3.6741854548454285,
"epoch": 0.346,
"grad_norm": 608.0,
"kl_loss_10": 206.17358779907227,
"kl_loss_2": 2388.203955078125,
"kl_loss_3": 1909.6425415039062,
"kl_loss_7": 697.4611602783203,
"learning_rate": 0.0007417106419422819,
"loss": 1290.2338,
"step": 3460
},
{
"ce_loss_10": 3.571521496772766,
"ce_loss_13": 3.4770392775535583,
"ce_loss_2": 4.614993333816528,
"ce_loss_3": 4.345528078079224,
"ce_loss_7": 3.7798440217971803,
"epoch": 0.347,
"grad_norm": 656.0,
"kl_loss_10": 204.45724334716797,
"kl_loss_2": 2308.5895385742188,
"kl_loss_3": 1833.5083312988281,
"kl_loss_7": 683.0788604736329,
"learning_rate": 0.0007403204818108486,
"loss": 1275.3799,
"step": 3470
},
{
"ce_loss_10": 3.5445627093315126,
"ce_loss_13": 3.4533625841140747,
"ce_loss_2": 4.6031595230102536,
"ce_loss_3": 4.338364768028259,
"ce_loss_7": 3.746653878688812,
"epoch": 0.348,
"grad_norm": 576.0,
"kl_loss_10": 208.29350357055665,
"kl_loss_2": 2371.4946899414062,
"kl_loss_3": 1909.6997863769532,
"kl_loss_7": 686.1026062011719,
"learning_rate": 0.0007389279016548316,
"loss": 1247.1171,
"step": 3480
},
{
"ce_loss_10": 3.553533661365509,
"ce_loss_13": 3.458456254005432,
"ce_loss_2": 4.6566637516021725,
"ce_loss_3": 4.37568781375885,
"ce_loss_7": 3.7642048597335815,
"epoch": 0.349,
"grad_norm": 684.0,
"kl_loss_10": 212.6401054382324,
"kl_loss_2": 2451.175671386719,
"kl_loss_3": 1951.6068969726562,
"kl_loss_7": 702.7821899414063,
"learning_rate": 0.0007375329154974975,
"loss": 1307.9424,
"step": 3490
},
{
"ce_loss_10": 3.5084131717681886,
"ce_loss_13": 3.4206284284591675,
"ce_loss_2": 4.546359324455262,
"ce_loss_3": 4.28377673625946,
"ce_loss_7": 3.7158578753471376,
"epoch": 0.35,
"grad_norm": 676.0,
"kl_loss_10": 208.94808502197264,
"kl_loss_2": 2307.420263671875,
"kl_loss_3": 1848.263897705078,
"kl_loss_7": 683.7157653808594,
"learning_rate": 0.0007361355373863414,
"loss": 1294.9244,
"step": 3500
},
{
"ce_loss_10": 3.563704586029053,
"ce_loss_13": 3.471196401119232,
"ce_loss_2": 4.5938108444213865,
"ce_loss_3": 4.332852721214294,
"ce_loss_7": 3.7703267097473145,
"epoch": 0.351,
"grad_norm": 580.0,
"kl_loss_10": 208.09024658203126,
"kl_loss_2": 2287.9739318847655,
"kl_loss_3": 1828.5689758300782,
"kl_loss_7": 673.030307006836,
"learning_rate": 0.0007347357813929454,
"loss": 1287.6393,
"step": 3510
},
{
"ce_loss_10": 3.5099044919013975,
"ce_loss_13": 3.419321870803833,
"ce_loss_2": 4.543364262580871,
"ce_loss_3": 4.27359983921051,
"ce_loss_7": 3.7119598269462584,
"epoch": 0.352,
"grad_norm": 620.0,
"kl_loss_10": 207.49536819458007,
"kl_loss_2": 2274.5087951660157,
"kl_loss_3": 1817.6334594726563,
"kl_loss_7": 673.7456726074219,
"learning_rate": 0.0007333336616128369,
"loss": 1275.3445,
"step": 3520
},
{
"ce_loss_10": 3.4893477082252504,
"ce_loss_13": 3.394596815109253,
"ce_loss_2": 4.558405804634094,
"ce_loss_3": 4.2951094031333925,
"ce_loss_7": 3.69784619808197,
"epoch": 0.353,
"grad_norm": 636.0,
"kl_loss_10": 211.34491577148438,
"kl_loss_2": 2368.5742614746096,
"kl_loss_3": 1904.588153076172,
"kl_loss_7": 699.6206512451172,
"learning_rate": 0.0007319291921653463,
"loss": 1290.8219,
"step": 3530
},
{
"ce_loss_10": 3.5741103887557983,
"ce_loss_13": 3.4787994265556335,
"ce_loss_2": 4.633104467391968,
"ce_loss_3": 4.363708543777466,
"ce_loss_7": 3.7883455634117125,
"epoch": 0.354,
"grad_norm": 688.0,
"kl_loss_10": 211.75357818603516,
"kl_loss_2": 2353.7052368164063,
"kl_loss_3": 1872.2497680664062,
"kl_loss_7": 696.9233825683593,
"learning_rate": 0.0007305223871934656,
"loss": 1261.161,
"step": 3540
},
{
"ce_loss_10": 3.53648921251297,
"ce_loss_13": 3.4479789614677427,
"ce_loss_2": 4.58404312133789,
"ce_loss_3": 4.315784668922424,
"ce_loss_7": 3.7411927938461305,
"epoch": 0.355,
"grad_norm": 644.0,
"kl_loss_10": 204.81159896850585,
"kl_loss_2": 2318.000451660156,
"kl_loss_3": 1841.0889282226562,
"kl_loss_7": 674.2556213378906,
"learning_rate": 0.0007291132608637052,
"loss": 1261.7902,
"step": 3550
},
{
"ce_loss_10": 3.4981685996055605,
"ce_loss_13": 3.4104817390441893,
"ce_loss_2": 4.630524325370788,
"ce_loss_3": 4.356097209453583,
"ce_loss_7": 3.7044720530509947,
"epoch": 0.356,
"grad_norm": 596.0,
"kl_loss_10": 201.21338653564453,
"kl_loss_2": 2484.7927124023436,
"kl_loss_3": 2010.760675048828,
"kl_loss_7": 676.6305114746094,
"learning_rate": 0.0007277018273659516,
"loss": 1327.9727,
"step": 3560
},
{
"ce_loss_10": 3.625146007537842,
"ce_loss_13": 3.531074047088623,
"ce_loss_2": 4.6699333667755125,
"ce_loss_3": 4.4035911679267885,
"ce_loss_7": 3.836405646800995,
"epoch": 0.357,
"grad_norm": 620.0,
"kl_loss_10": 209.5035614013672,
"kl_loss_2": 2341.261669921875,
"kl_loss_3": 1873.83955078125,
"kl_loss_7": 701.3334136962891,
"learning_rate": 0.0007262881009133242,
"loss": 1275.0637,
"step": 3570
},
{
"ce_loss_10": 3.5401904344558717,
"ce_loss_13": 3.4541720032691954,
"ce_loss_2": 4.572478699684143,
"ce_loss_3": 4.315898811817169,
"ce_loss_7": 3.7435639023780825,
"epoch": 0.358,
"grad_norm": 616.0,
"kl_loss_10": 201.09010009765626,
"kl_loss_2": 2313.080432128906,
"kl_loss_3": 1855.7528381347656,
"kl_loss_7": 673.6273254394531,
"learning_rate": 0.0007248720957420329,
"loss": 1252.028,
"step": 3580
},
{
"ce_loss_10": 3.55083909034729,
"ce_loss_13": 3.466167140007019,
"ce_loss_2": 4.594191384315491,
"ce_loss_3": 4.320683646202087,
"ce_loss_7": 3.750420665740967,
"epoch": 0.359,
"grad_norm": 592.0,
"kl_loss_10": 201.2973388671875,
"kl_loss_2": 2304.7897521972654,
"kl_loss_3": 1830.9858337402343,
"kl_loss_7": 667.1753356933593,
"learning_rate": 0.0007234538261112341,
"loss": 1305.9864,
"step": 3590
},
{
"ce_loss_10": 3.5870068073272705,
"ce_loss_13": 3.4963618993759153,
"ce_loss_2": 4.64858865737915,
"ce_loss_3": 4.383180546760559,
"ce_loss_7": 3.7978907346725466,
"epoch": 0.36,
"grad_norm": 580.0,
"kl_loss_10": 206.438224029541,
"kl_loss_2": 2350.6687255859374,
"kl_loss_3": 1879.8920471191407,
"kl_loss_7": 691.6597686767578,
"learning_rate": 0.0007220333063028871,
"loss": 1262.9199,
"step": 3600
},
{
"ce_loss_10": 3.617115843296051,
"ce_loss_13": 3.5226447105407717,
"ce_loss_2": 4.7090448379516605,
"ce_loss_3": 4.440777349472046,
"ce_loss_7": 3.927542436122894,
"epoch": 0.361,
"grad_norm": 700.0,
"kl_loss_10": 215.82505798339844,
"kl_loss_2": 2451.148791503906,
"kl_loss_3": 1982.2255798339843,
"kl_loss_7": 911.9942932128906,
"learning_rate": 0.0007206105506216106,
"loss": 1351.6553,
"step": 3610
},
{
"ce_loss_10": 3.4991318702697756,
"ce_loss_13": 3.4086315035820007,
"ce_loss_2": 4.517360043525696,
"ce_loss_3": 4.2627614617347716,
"ce_loss_7": 3.7236221551895143,
"epoch": 0.362,
"grad_norm": 836.0,
"kl_loss_10": 210.4427146911621,
"kl_loss_2": 2266.5427673339846,
"kl_loss_3": 1820.741632080078,
"kl_loss_7": 721.7343872070312,
"learning_rate": 0.0007191855733945387,
"loss": 1249.7904,
"step": 3620
},
{
"ce_loss_10": 3.5907591581344604,
"ce_loss_13": 3.5010143160820006,
"ce_loss_2": 4.622481203079223,
"ce_loss_3": 4.356601357460022,
"ce_loss_7": 3.79650160074234,
"epoch": 0.363,
"grad_norm": 572.0,
"kl_loss_10": 204.04148864746094,
"kl_loss_2": 2289.9740478515623,
"kl_loss_3": 1818.431005859375,
"kl_loss_7": 692.2773986816406,
"learning_rate": 0.0007177583889711762,
"loss": 1250.2074,
"step": 3630
},
{
"ce_loss_10": 3.5057953119277956,
"ce_loss_13": 3.411814069747925,
"ce_loss_2": 4.563321113586426,
"ce_loss_3": 4.290196192264557,
"ce_loss_7": 3.7388221859931945,
"epoch": 0.364,
"grad_norm": 952.0,
"kl_loss_10": 206.33124389648438,
"kl_loss_2": 2346.8802978515623,
"kl_loss_3": 1869.4090270996094,
"kl_loss_7": 731.2307403564453,
"learning_rate": 0.0007163290117232541,
"loss": 1286.6971,
"step": 3640
},
{
"ce_loss_10": 3.625288701057434,
"ce_loss_13": 3.5375028014183045,
"ce_loss_2": 4.612985825538635,
"ce_loss_3": 4.35078866481781,
"ce_loss_7": 3.820189893245697,
"epoch": 0.365,
"grad_norm": 676.0,
"kl_loss_10": 199.10545043945314,
"kl_loss_2": 2225.872174072266,
"kl_loss_3": 1769.0750122070312,
"kl_loss_7": 679.4105773925781,
"learning_rate": 0.0007148974560445859,
"loss": 1248.8154,
"step": 3650
},
{
"ce_loss_10": 3.5454740643501284,
"ce_loss_13": 3.458422267436981,
"ce_loss_2": 4.550081968307495,
"ce_loss_3": 4.287899553775787,
"ce_loss_7": 3.7501559376716616,
"epoch": 0.366,
"grad_norm": 588.0,
"kl_loss_10": 198.9521499633789,
"kl_loss_2": 2238.7438537597654,
"kl_loss_3": 1775.3712280273437,
"kl_loss_7": 679.886441040039,
"learning_rate": 0.0007134637363509209,
"loss": 1224.5007,
"step": 3660
},
{
"ce_loss_10": 3.6524737238883973,
"ce_loss_13": 3.566513454914093,
"ce_loss_2": 4.64628803730011,
"ce_loss_3": 4.383014440536499,
"ce_loss_7": 3.85452960729599,
"epoch": 0.367,
"grad_norm": 804.0,
"kl_loss_10": 195.8438636779785,
"kl_loss_2": 2202.7541381835936,
"kl_loss_3": 1744.0477172851563,
"kl_loss_7": 669.0114837646485,
"learning_rate": 0.0007120278670798009,
"loss": 1241.0041,
"step": 3670
},
{
"ce_loss_10": 3.451411759853363,
"ce_loss_13": 3.362742209434509,
"ce_loss_2": 4.563682770729065,
"ce_loss_3": 4.3000654697418215,
"ce_loss_7": 3.6735877275466917,
"epoch": 0.368,
"grad_norm": 692.0,
"kl_loss_10": 207.53860931396486,
"kl_loss_2": 2452.550360107422,
"kl_loss_3": 1973.4927856445313,
"kl_loss_7": 716.8897491455078,
"learning_rate": 0.0007105898626904133,
"loss": 1338.2093,
"step": 3680
},
{
"ce_loss_10": 3.5554641008377077,
"ce_loss_13": 3.4662737131118773,
"ce_loss_2": 4.611227035522461,
"ce_loss_3": 4.340918231010437,
"ce_loss_7": 3.768160092830658,
"epoch": 0.369,
"grad_norm": 564.0,
"kl_loss_10": 202.72654418945314,
"kl_loss_2": 2339.7893005371093,
"kl_loss_3": 1860.5950439453125,
"kl_loss_7": 686.1352569580079,
"learning_rate": 0.0007091497376634463,
"loss": 1252.1551,
"step": 3690
},
{
"ce_loss_10": 3.5008182168006896,
"ce_loss_13": 3.412228453159332,
"ce_loss_2": 4.538051557540894,
"ce_loss_3": 4.271669220924378,
"ce_loss_7": 3.707795190811157,
"epoch": 0.37,
"grad_norm": 688.0,
"kl_loss_10": 203.25593032836915,
"kl_loss_2": 2313.7531005859373,
"kl_loss_3": 1845.0465942382812,
"kl_loss_7": 679.24736328125,
"learning_rate": 0.0007077075065009433,
"loss": 1276.0328,
"step": 3700
},
{
"ce_loss_10": 3.6071534514427186,
"ce_loss_13": 3.5158491373062133,
"ce_loss_2": 4.6654202222824095,
"ce_loss_3": 4.392651915550232,
"ce_loss_7": 3.819450116157532,
"epoch": 0.371,
"grad_norm": 752.0,
"kl_loss_10": 208.91845016479493,
"kl_loss_2": 2358.2886352539062,
"kl_loss_3": 1879.4206665039062,
"kl_loss_7": 699.9392761230469,
"learning_rate": 0.0007062631837261557,
"loss": 1268.6693,
"step": 3710
},
{
"ce_loss_10": 3.476445233821869,
"ce_loss_13": 3.390165627002716,
"ce_loss_2": 4.5276483535766605,
"ce_loss_3": 4.25641827583313,
"ce_loss_7": 3.683011364936829,
"epoch": 0.372,
"grad_norm": 640.0,
"kl_loss_10": 202.1516098022461,
"kl_loss_2": 2314.015344238281,
"kl_loss_3": 1841.6598022460937,
"kl_loss_7": 682.8849884033203,
"learning_rate": 0.0007048167838833977,
"loss": 1289.0859,
"step": 3720
},
{
"ce_loss_10": 3.5699679255485535,
"ce_loss_13": 3.480803608894348,
"ce_loss_2": 4.588373041152954,
"ce_loss_3": 4.323996567726136,
"ce_loss_7": 3.7677942156791686,
"epoch": 0.373,
"grad_norm": 744.0,
"kl_loss_10": 203.9791275024414,
"kl_loss_2": 2290.5110717773437,
"kl_loss_3": 1815.935235595703,
"kl_loss_7": 672.9903137207032,
"learning_rate": 0.0007033683215379002,
"loss": 1247.4349,
"step": 3730
},
{
"ce_loss_10": 3.556042289733887,
"ce_loss_13": 3.4659453988075257,
"ce_loss_2": 4.594552016258239,
"ce_loss_3": 4.319927525520325,
"ce_loss_7": 3.7590227007865904,
"epoch": 0.374,
"grad_norm": 608.0,
"kl_loss_10": 199.66127700805663,
"kl_loss_2": 2281.638720703125,
"kl_loss_3": 1802.195782470703,
"kl_loss_7": 667.0344909667969,
"learning_rate": 0.0007019178112756625,
"loss": 1258.8061,
"step": 3740
},
{
"ce_loss_10": 3.518285346031189,
"ce_loss_13": 3.432125985622406,
"ce_loss_2": 4.562115430831909,
"ce_loss_3": 4.29533269405365,
"ce_loss_7": 3.7262615442276,
"epoch": 0.375,
"grad_norm": 640.0,
"kl_loss_10": 200.82229309082032,
"kl_loss_2": 2292.846240234375,
"kl_loss_3": 1823.6997009277343,
"kl_loss_7": 673.9724395751953,
"learning_rate": 0.0007004652677033068,
"loss": 1263.2482,
"step": 3750
},
{
"ce_loss_10": 3.5903055548667906,
"ce_loss_13": 3.506292223930359,
"ce_loss_2": 4.5962906837463375,
"ce_loss_3": 4.3357175350189205,
"ce_loss_7": 3.786800575256348,
"epoch": 0.376,
"grad_norm": 656.0,
"kl_loss_10": 200.60662612915038,
"kl_loss_2": 2252.357458496094,
"kl_loss_3": 1790.429217529297,
"kl_loss_7": 660.8626007080078,
"learning_rate": 0.0006990107054479312,
"loss": 1245.262,
"step": 3760
},
{
"ce_loss_10": 3.5865476965904235,
"ce_loss_13": 3.4931369185447694,
"ce_loss_2": 4.599129343032837,
"ce_loss_3": 4.335857176780701,
"ce_loss_7": 3.77964334487915,
"epoch": 0.377,
"grad_norm": 700.0,
"kl_loss_10": 209.85385513305664,
"kl_loss_2": 2261.3722229003906,
"kl_loss_3": 1806.916485595703,
"kl_loss_7": 670.6618469238281,
"learning_rate": 0.000697554139156961,
"loss": 1247.3398,
"step": 3770
},
{
"ce_loss_10": 3.572757053375244,
"ce_loss_13": 3.477368426322937,
"ce_loss_2": 4.60923056602478,
"ce_loss_3": 4.342034792900085,
"ce_loss_7": 3.766658973693848,
"epoch": 0.378,
"grad_norm": 628.0,
"kl_loss_10": 218.62799072265625,
"kl_loss_2": 2331.5685607910154,
"kl_loss_3": 1853.7978393554688,
"kl_loss_7": 681.1488891601563,
"learning_rate": 0.0006960955834980027,
"loss": 1246.4775,
"step": 3780
},
{
"ce_loss_10": 3.5454328536987303,
"ce_loss_13": 3.449174666404724,
"ce_loss_2": 4.563591694831848,
"ce_loss_3": 4.298812806606293,
"ce_loss_7": 3.7385509848594665,
"epoch": 0.379,
"grad_norm": 740.0,
"kl_loss_10": 215.9669273376465,
"kl_loss_2": 2275.3556274414063,
"kl_loss_3": 1807.9537841796875,
"kl_loss_7": 673.0077819824219,
"learning_rate": 0.0006946350531586958,
"loss": 1251.4496,
"step": 3790
},
{
"ce_loss_10": 3.5613739252090455,
"ce_loss_13": 3.4710352540016176,
"ce_loss_2": 4.586881446838379,
"ce_loss_3": 4.326097631454468,
"ce_loss_7": 3.7597612500190736,
"epoch": 0.38,
"grad_norm": 636.0,
"kl_loss_10": 210.37978897094726,
"kl_loss_2": 2278.926678466797,
"kl_loss_3": 1818.876287841797,
"kl_loss_7": 669.4569915771484,
"learning_rate": 0.0006931725628465643,
"loss": 1275.2133,
"step": 3800
},
{
"ce_loss_10": 3.590466618537903,
"ce_loss_13": 3.4937587857246397,
"ce_loss_2": 4.623274827003479,
"ce_loss_3": 4.352746081352234,
"ce_loss_7": 3.7943554282188416,
"epoch": 0.381,
"grad_norm": 628.0,
"kl_loss_10": 211.29082336425782,
"kl_loss_2": 2296.4768188476564,
"kl_loss_3": 1818.6126953125,
"kl_loss_7": 678.5069641113281,
"learning_rate": 0.0006917081272888696,
"loss": 1259.3377,
"step": 3810
},
{
"ce_loss_10": 3.487190854549408,
"ce_loss_13": 3.393886852264404,
"ce_loss_2": 4.553677868843079,
"ce_loss_3": 4.300019836425781,
"ce_loss_7": 3.6916916847229,
"epoch": 0.382,
"grad_norm": 596.0,
"kl_loss_10": 216.8355224609375,
"kl_loss_2": 2365.8853271484377,
"kl_loss_3": 1922.5055114746094,
"kl_loss_7": 684.1588439941406,
"learning_rate": 0.0006902417612324615,
"loss": 1266.6071,
"step": 3820
},
{
"ce_loss_10": 3.6190301895141603,
"ce_loss_13": 3.5245797634124756,
"ce_loss_2": 4.67730553150177,
"ce_loss_3": 4.405901682376862,
"ce_loss_7": 3.8294657945632933,
"epoch": 0.383,
"grad_norm": 680.0,
"kl_loss_10": 219.55593795776366,
"kl_loss_2": 2360.2657958984373,
"kl_loss_3": 1879.3323364257812,
"kl_loss_7": 698.4299591064453,
"learning_rate": 0.00068877347944363,
"loss": 1281.5383,
"step": 3830
},
{
"ce_loss_10": 3.612284016609192,
"ce_loss_13": 3.522170841693878,
"ce_loss_2": 4.627012848854065,
"ce_loss_3": 4.361323833465576,
"ce_loss_7": 3.8073740243911742,
"epoch": 0.384,
"grad_norm": 852.0,
"kl_loss_10": 211.08247299194335,
"kl_loss_2": 2264.5133850097654,
"kl_loss_3": 1800.5201843261718,
"kl_loss_7": 672.6636840820313,
"learning_rate": 0.0006873032967079561,
"loss": 1258.6725,
"step": 3840
},
{
"ce_loss_10": 3.5931047439575194,
"ce_loss_13": 3.5063655853271483,
"ce_loss_2": 4.588197422027588,
"ce_loss_3": 4.324539279937744,
"ce_loss_7": 3.7907418251037597,
"epoch": 0.385,
"grad_norm": 664.0,
"kl_loss_10": 203.42158584594728,
"kl_loss_2": 2234.902947998047,
"kl_loss_3": 1772.472021484375,
"kl_loss_7": 662.3596588134766,
"learning_rate": 0.0006858312278301637,
"loss": 1226.7012,
"step": 3850
},
{
"ce_loss_10": 3.635795843601227,
"ce_loss_13": 3.549472713470459,
"ce_loss_2": 4.623842811584472,
"ce_loss_3": 4.353901195526123,
"ce_loss_7": 3.825099301338196,
"epoch": 0.386,
"grad_norm": 736.0,
"kl_loss_10": 204.89519424438475,
"kl_loss_2": 2217.719934082031,
"kl_loss_3": 1747.3561584472657,
"kl_loss_7": 659.4771606445313,
"learning_rate": 0.0006843572876339704,
"loss": 1225.6961,
"step": 3860
},
{
"ce_loss_10": 3.5519859790802,
"ce_loss_13": 3.466093647480011,
"ce_loss_2": 4.525204968452454,
"ce_loss_3": 4.264074110984803,
"ce_loss_7": 3.742415523529053,
"epoch": 0.387,
"grad_norm": 668.0,
"kl_loss_10": 199.43942337036134,
"kl_loss_2": 2183.9968811035155,
"kl_loss_3": 1725.1913513183595,
"kl_loss_7": 644.7796203613282,
"learning_rate": 0.0006828814909619373,
"loss": 1252.2885,
"step": 3870
},
{
"ce_loss_10": 3.674282944202423,
"ce_loss_13": 3.5820161938667296,
"ce_loss_2": 4.6895040512084964,
"ce_loss_3": 4.4149659156799315,
"ce_loss_7": 3.86486736536026,
"epoch": 0.388,
"grad_norm": 576.0,
"kl_loss_10": 211.43887939453126,
"kl_loss_2": 2266.184930419922,
"kl_loss_3": 1785.1635192871095,
"kl_loss_7": 661.8903228759766,
"learning_rate": 0.0006814038526753205,
"loss": 1223.6402,
"step": 3880
},
{
"ce_loss_10": 3.5698843955993653,
"ce_loss_13": 3.479625034332275,
"ce_loss_2": 4.587342977523804,
"ce_loss_3": 4.3197312474250795,
"ce_loss_7": 3.766351842880249,
"epoch": 0.389,
"grad_norm": 616.0,
"kl_loss_10": 208.68895874023437,
"kl_loss_2": 2258.2895751953124,
"kl_loss_3": 1782.5825317382812,
"kl_loss_7": 664.0186126708984,
"learning_rate": 0.0006799243876539213,
"loss": 1238.0235,
"step": 3890
},
{
"ce_loss_10": 3.500353288650513,
"ce_loss_13": 3.408971738815308,
"ce_loss_2": 4.572002196311951,
"ce_loss_3": 4.29817762374878,
"ce_loss_7": 3.699842798709869,
"epoch": 0.39,
"grad_norm": 836.0,
"kl_loss_10": 206.20438537597656,
"kl_loss_2": 2378.092236328125,
"kl_loss_3": 1891.477294921875,
"kl_loss_7": 671.2416046142578,
"learning_rate": 0.0006784431107959359,
"loss": 1281.9199,
"step": 3900
},
{
"ce_loss_10": 3.5523509979248047,
"ce_loss_13": 3.460574519634247,
"ce_loss_2": 4.626534819602966,
"ce_loss_3": 4.3451399326324465,
"ce_loss_7": 3.7586856484413147,
"epoch": 0.391,
"grad_norm": 668.0,
"kl_loss_10": 206.77191925048828,
"kl_loss_2": 2369.177069091797,
"kl_loss_3": 1882.2445129394532,
"kl_loss_7": 681.9191589355469,
"learning_rate": 0.0006769600370178059,
"loss": 1269.1178,
"step": 3910
},
{
"ce_loss_10": 3.5188350439071656,
"ce_loss_13": 3.4273067116737366,
"ce_loss_2": 4.554541206359863,
"ce_loss_3": 4.289613115787506,
"ce_loss_7": 3.724477529525757,
"epoch": 0.392,
"grad_norm": 560.0,
"kl_loss_10": 199.67687606811523,
"kl_loss_2": 2289.5328735351563,
"kl_loss_3": 1825.1418823242188,
"kl_loss_7": 674.4119354248047,
"learning_rate": 0.0006754751812540679,
"loss": 1229.9105,
"step": 3920
},
{
"ce_loss_10": 3.5683494329452516,
"ce_loss_13": 3.482330596446991,
"ce_loss_2": 4.621720671653748,
"ce_loss_3": 4.348645758628845,
"ce_loss_7": 3.7689119219779967,
"epoch": 0.393,
"grad_norm": 644.0,
"kl_loss_10": 204.25539169311523,
"kl_loss_2": 2339.84892578125,
"kl_loss_3": 1860.5805419921876,
"kl_loss_7": 678.0737976074219,
"learning_rate": 0.0006739885584572025,
"loss": 1265.1324,
"step": 3930
},
{
"ce_loss_10": 3.596517300605774,
"ce_loss_13": 3.5048423767089845,
"ce_loss_2": 4.657581090927124,
"ce_loss_3": 4.393209981918335,
"ce_loss_7": 3.8048507928848267,
"epoch": 0.394,
"grad_norm": 740.0,
"kl_loss_10": 206.0053512573242,
"kl_loss_2": 2374.587469482422,
"kl_loss_3": 1914.2769775390625,
"kl_loss_7": 691.7073638916015,
"learning_rate": 0.0006725001835974853,
"loss": 1262.3447,
"step": 3940
},
{
"ce_loss_10": 3.5837875604629517,
"ce_loss_13": 3.49317661523819,
"ce_loss_2": 4.625016355514527,
"ce_loss_3": 4.360765337944031,
"ce_loss_7": 3.795220899581909,
"epoch": 0.395,
"grad_norm": 624.0,
"kl_loss_10": 202.6530014038086,
"kl_loss_2": 2326.028674316406,
"kl_loss_3": 1851.947198486328,
"kl_loss_7": 690.550015258789,
"learning_rate": 0.0006710100716628344,
"loss": 1233.6354,
"step": 3950
},
{
"ce_loss_10": 3.566684401035309,
"ce_loss_13": 3.4755138635635374,
"ce_loss_2": 4.594186568260193,
"ce_loss_3": 4.3349669694900514,
"ce_loss_7": 3.7691023349761963,
"epoch": 0.396,
"grad_norm": 612.0,
"kl_loss_10": 198.98139343261718,
"kl_loss_2": 2292.616107177734,
"kl_loss_3": 1833.538885498047,
"kl_loss_7": 679.8143615722656,
"learning_rate": 0.0006695182376586602,
"loss": 1262.3783,
"step": 3960
},
{
"ce_loss_10": 3.596343123912811,
"ce_loss_13": 3.512529468536377,
"ce_loss_2": 4.574345445632934,
"ce_loss_3": 4.312320637702942,
"ce_loss_7": 3.790241527557373,
"epoch": 0.397,
"grad_norm": 708.0,
"kl_loss_10": 191.98949813842773,
"kl_loss_2": 2169.2684020996094,
"kl_loss_3": 1705.870343017578,
"kl_loss_7": 641.6268707275391,
"learning_rate": 0.000668024696607715,
"loss": 1235.5194,
"step": 3970
},
{
"ce_loss_10": 3.555418300628662,
"ce_loss_13": 3.470967173576355,
"ce_loss_2": 4.566342353820801,
"ce_loss_3": 4.313628911972046,
"ce_loss_7": 3.7538477182388306,
"epoch": 0.398,
"grad_norm": 636.0,
"kl_loss_10": 198.03487548828124,
"kl_loss_2": 2281.672326660156,
"kl_loss_3": 1825.4962097167968,
"kl_loss_7": 667.3125,
"learning_rate": 0.0006665294635499404,
"loss": 1243.9658,
"step": 3980
},
{
"ce_loss_10": 3.561533272266388,
"ce_loss_13": 3.4714276075363157,
"ce_loss_2": 4.635219573974609,
"ce_loss_3": 4.372889280319214,
"ce_loss_7": 3.771253454685211,
"epoch": 0.399,
"grad_norm": 876.0,
"kl_loss_10": 208.96954040527345,
"kl_loss_2": 2390.9282470703124,
"kl_loss_3": 1928.2362426757813,
"kl_loss_7": 697.694384765625,
"learning_rate": 0.0006650325535423167,
"loss": 1276.8535,
"step": 3990
},
{
"ce_loss_10": 3.5832207202911377,
"ce_loss_13": 3.4972815036773683,
"ce_loss_2": 4.57238998413086,
"ce_loss_3": 4.3085246801376345,
"ce_loss_7": 3.7773876547813416,
"epoch": 0.4,
"grad_norm": 680.0,
"kl_loss_10": 194.11105422973634,
"kl_loss_2": 2185.9527893066406,
"kl_loss_3": 1735.314678955078,
"kl_loss_7": 647.3017364501953,
"learning_rate": 0.0006635339816587109,
"loss": 1234.4078,
"step": 4000
},
{
"ce_loss_10": 3.524456286430359,
"ce_loss_13": 3.4352723956108093,
"ce_loss_2": 4.582833385467529,
"ce_loss_3": 4.318940043449402,
"ce_loss_7": 3.7211544632911684,
"epoch": 0.401,
"grad_norm": 624.0,
"kl_loss_10": 200.03394927978516,
"kl_loss_2": 2362.4151245117187,
"kl_loss_3": 1889.6026794433594,
"kl_loss_7": 668.6993530273437,
"learning_rate": 0.0006620337629897252,
"loss": 1251.5271,
"step": 4010
},
{
"ce_loss_10": 3.531862771511078,
"ce_loss_13": 3.4413245558738708,
"ce_loss_2": 4.564659547805786,
"ce_loss_3": 4.295144200325012,
"ce_loss_7": 3.728400182723999,
"epoch": 0.402,
"grad_norm": 556.0,
"kl_loss_10": 199.88810348510742,
"kl_loss_2": 2302.102014160156,
"kl_loss_3": 1823.07431640625,
"kl_loss_7": 668.4421752929687,
"learning_rate": 0.0006605319126425454,
"loss": 1275.6262,
"step": 4020
},
{
"ce_loss_10": 3.4339096665382387,
"ce_loss_13": 3.350968396663666,
"ce_loss_2": 4.514854836463928,
"ce_loss_3": 4.238226985931396,
"ce_loss_7": 3.644399344921112,
"epoch": 0.403,
"grad_norm": 644.0,
"kl_loss_10": 199.70583419799806,
"kl_loss_2": 2387.631066894531,
"kl_loss_3": 1906.55048828125,
"kl_loss_7": 681.1330749511719,
"learning_rate": 0.0006590284457407876,
"loss": 1275.1312,
"step": 4030
},
{
"ce_loss_10": 3.5380223751068116,
"ce_loss_13": 3.448465049266815,
"ce_loss_2": 4.57396821975708,
"ce_loss_3": 4.313019490242004,
"ce_loss_7": 3.7362441062927245,
"epoch": 0.404,
"grad_norm": 688.0,
"kl_loss_10": 198.1963623046875,
"kl_loss_2": 2292.718151855469,
"kl_loss_3": 1830.845147705078,
"kl_loss_7": 661.0607788085938,
"learning_rate": 0.0006575233774243465,
"loss": 1249.6318,
"step": 4040
},
{
"ce_loss_10": 3.528366136550903,
"ce_loss_13": 3.4398876786231996,
"ce_loss_2": 4.5835960626602175,
"ce_loss_3": 4.318015563488006,
"ce_loss_7": 3.734170937538147,
"epoch": 0.405,
"grad_norm": 744.0,
"kl_loss_10": 199.81770248413085,
"kl_loss_2": 2371.9314208984374,
"kl_loss_3": 1896.1641540527344,
"kl_loss_7": 680.1676055908204,
"learning_rate": 0.0006560167228492435,
"loss": 1274.3472,
"step": 4050
},
{
"ce_loss_10": 3.5713927507400514,
"ce_loss_13": 3.4897979974746702,
"ce_loss_2": 4.564716410636902,
"ce_loss_3": 4.304848039150238,
"ce_loss_7": 3.764770579338074,
"epoch": 0.406,
"grad_norm": 632.0,
"kl_loss_10": 190.77299575805665,
"kl_loss_2": 2213.3808837890624,
"kl_loss_3": 1758.434912109375,
"kl_loss_7": 651.0917877197265,
"learning_rate": 0.0006545084971874737,
"loss": 1244.4535,
"step": 4060
},
{
"ce_loss_10": 3.5361163854599,
"ce_loss_13": 3.446142256259918,
"ce_loss_2": 4.613076639175415,
"ce_loss_3": 4.341050863265991,
"ce_loss_7": 3.7489752054214476,
"epoch": 0.407,
"grad_norm": 724.0,
"kl_loss_10": 204.6956573486328,
"kl_loss_2": 2384.869763183594,
"kl_loss_3": 1900.13232421875,
"kl_loss_7": 693.6003051757813,
"learning_rate": 0.0006529987156268526,
"loss": 1264.1867,
"step": 4070
},
{
"ce_loss_10": 3.4604807257652284,
"ce_loss_13": 3.3677136301994324,
"ce_loss_2": 4.529002094268799,
"ce_loss_3": 4.2508728981018065,
"ce_loss_7": 3.671179461479187,
"epoch": 0.408,
"grad_norm": 652.0,
"kl_loss_10": 200.91606369018555,
"kl_loss_2": 2349.1596496582033,
"kl_loss_3": 1858.6745910644531,
"kl_loss_7": 678.4067779541016,
"learning_rate": 0.0006514873933708637,
"loss": 1288.6936,
"step": 4080
},
{
"ce_loss_10": 3.5669307708740234,
"ce_loss_13": 3.4793569445610046,
"ce_loss_2": 4.594972729682922,
"ce_loss_3": 4.326959013938904,
"ce_loss_7": 3.7646772980690004,
"epoch": 0.409,
"grad_norm": 624.0,
"kl_loss_10": 195.33749084472657,
"kl_loss_2": 2285.8158325195313,
"kl_loss_3": 1812.939276123047,
"kl_loss_7": 660.7087280273438,
"learning_rate": 0.0006499745456385053,
"loss": 1246.0525,
"step": 4090
},
{
"ce_loss_10": 3.532058572769165,
"ce_loss_13": 3.446662437915802,
"ce_loss_2": 4.566872882843017,
"ce_loss_3": 4.295732653141021,
"ce_loss_7": 3.7351000905036926,
"epoch": 0.41,
"grad_norm": 592.0,
"kl_loss_10": 197.3603828430176,
"kl_loss_2": 2284.4489013671873,
"kl_loss_3": 1809.6247680664062,
"kl_loss_7": 669.4995666503906,
"learning_rate": 0.0006484601876641375,
"loss": 1259.2045,
"step": 4100
},
{
"ce_loss_10": 3.523475456237793,
"ce_loss_13": 3.4387876391410828,
"ce_loss_2": 4.524111318588257,
"ce_loss_3": 4.253742909431457,
"ce_loss_7": 3.7196569561958315,
"epoch": 0.411,
"grad_norm": 608.0,
"kl_loss_10": 194.7115333557129,
"kl_loss_2": 2231.6944580078125,
"kl_loss_3": 1757.1336669921875,
"kl_loss_7": 654.9919372558594,
"learning_rate": 0.000646944334697328,
"loss": 1224.3209,
"step": 4110
},
{
"ce_loss_10": 3.6390475988388062,
"ce_loss_13": 3.55245840549469,
"ce_loss_2": 4.6237300157547,
"ce_loss_3": 4.356840944290161,
"ce_loss_7": 3.83481205701828,
"epoch": 0.412,
"grad_norm": 564.0,
"kl_loss_10": 194.73427352905273,
"kl_loss_2": 2191.641564941406,
"kl_loss_3": 1726.6775146484374,
"kl_loss_7": 650.4818603515625,
"learning_rate": 0.0006454270020026995,
"loss": 1203.4656,
"step": 4120
},
{
"ce_loss_10": 3.6017306566238405,
"ce_loss_13": 3.520050418376923,
"ce_loss_2": 4.577455329895019,
"ce_loss_3": 4.320154881477356,
"ce_loss_7": 3.792659568786621,
"epoch": 0.413,
"grad_norm": 576.0,
"kl_loss_10": 189.64769592285157,
"kl_loss_2": 2176.6477478027346,
"kl_loss_3": 1722.2450378417968,
"kl_loss_7": 643.5274627685546,
"learning_rate": 0.0006439082048597755,
"loss": 1192.4902,
"step": 4130
},
{
"ce_loss_10": 3.5903451800346375,
"ce_loss_13": 3.507824885845184,
"ce_loss_2": 4.608788180351257,
"ce_loss_3": 4.346335411071777,
"ce_loss_7": 3.7915576100349426,
"epoch": 0.414,
"grad_norm": 580.0,
"kl_loss_10": 197.56676864624023,
"kl_loss_2": 2267.8357421875,
"kl_loss_3": 1807.2357055664063,
"kl_loss_7": 666.154751586914,
"learning_rate": 0.0006423879585628261,
"loss": 1240.4789,
"step": 4140
},
{
"ce_loss_10": 3.55734179019928,
"ce_loss_13": 3.467788887023926,
"ce_loss_2": 4.6172443151474,
"ce_loss_3": 4.339134466648102,
"ce_loss_7": 3.7582768201828003,
"epoch": 0.415,
"grad_norm": 688.0,
"kl_loss_10": 201.20330123901368,
"kl_loss_2": 2351.881707763672,
"kl_loss_3": 1863.0321411132813,
"kl_loss_7": 675.1522674560547,
"learning_rate": 0.0006408662784207149,
"loss": 1267.4067,
"step": 4150
},
{
"ce_loss_10": 3.5083068370819093,
"ce_loss_13": 3.421711838245392,
"ce_loss_2": 4.537308859825134,
"ce_loss_3": 4.2711180448532104,
"ce_loss_7": 3.708827292919159,
"epoch": 0.416,
"grad_norm": 696.0,
"kl_loss_10": 195.25809020996093,
"kl_loss_2": 2288.984796142578,
"kl_loss_3": 1822.1060485839844,
"kl_loss_7": 665.5764404296875,
"learning_rate": 0.0006393431797567439,
"loss": 1250.1072,
"step": 4160
},
{
"ce_loss_10": 3.5913987278938295,
"ce_loss_13": 3.509873795509338,
"ce_loss_2": 4.573607659339904,
"ce_loss_3": 4.318917143344879,
"ce_loss_7": 3.7776596546173096,
"epoch": 0.417,
"grad_norm": 596.0,
"kl_loss_10": 194.73566436767578,
"kl_loss_2": 2221.0649841308596,
"kl_loss_3": 1766.04775390625,
"kl_loss_7": 648.3369201660156,
"learning_rate": 0.0006378186779084996,
"loss": 1190.9323,
"step": 4170
},
{
"ce_loss_10": 3.4334940314292908,
"ce_loss_13": 3.3453728437423704,
"ce_loss_2": 4.485861015319824,
"ce_loss_3": 4.215418803691864,
"ce_loss_7": 3.6394405364990234,
"epoch": 0.418,
"grad_norm": 676.0,
"kl_loss_10": 196.98247756958008,
"kl_loss_2": 2312.9372680664064,
"kl_loss_3": 1836.5678649902343,
"kl_loss_7": 670.5612365722657,
"learning_rate": 0.0006362927882276989,
"loss": 1261.1342,
"step": 4180
},
{
"ce_loss_10": 3.622100257873535,
"ce_loss_13": 3.5377141356468202,
"ce_loss_2": 4.620994114875794,
"ce_loss_3": 4.349101042747497,
"ce_loss_7": 3.81082307100296,
"epoch": 0.419,
"grad_norm": 620.0,
"kl_loss_10": 192.66633071899415,
"kl_loss_2": 2225.1201049804686,
"kl_loss_3": 1751.446875,
"kl_loss_7": 636.4902648925781,
"learning_rate": 0.000634765526080034,
"loss": 1194.2031,
"step": 4190
},
{
"ce_loss_10": 3.626552712917328,
"ce_loss_13": 3.5395890951156614,
"ce_loss_2": 4.6221943378448485,
"ce_loss_3": 4.355586886405945,
"ce_loss_7": 3.8178786516189573,
"epoch": 0.42,
"grad_norm": 612.0,
"kl_loss_10": 198.29063568115234,
"kl_loss_2": 2233.3197509765623,
"kl_loss_3": 1764.3755920410156,
"kl_loss_7": 656.7103454589844,
"learning_rate": 0.0006332369068450174,
"loss": 1207.3598,
"step": 4200
},
{
"ce_loss_10": 3.5582772374153135,
"ce_loss_13": 3.4749309182167054,
"ce_loss_2": 4.574929785728455,
"ce_loss_3": 4.314627623558044,
"ce_loss_7": 3.7553325653076173,
"epoch": 0.421,
"grad_norm": 588.0,
"kl_loss_10": 195.14224548339843,
"kl_loss_2": 2252.938916015625,
"kl_loss_3": 1800.94208984375,
"kl_loss_7": 656.2003112792969,
"learning_rate": 0.0006317069459158283,
"loss": 1220.2742,
"step": 4210
},
{
"ce_loss_10": 3.66640442609787,
"ce_loss_13": 3.584319996833801,
"ce_loss_2": 4.649453711509705,
"ce_loss_3": 4.3819632768630985,
"ce_loss_7": 3.8596243381500246,
"epoch": 0.422,
"grad_norm": 592.0,
"kl_loss_10": 193.5803665161133,
"kl_loss_2": 2195.1602294921877,
"kl_loss_3": 1735.2393310546875,
"kl_loss_7": 647.3203796386719,
"learning_rate": 0.0006301756586991561,
"loss": 1218.0184,
"step": 4220
},
{
"ce_loss_10": 3.452160143852234,
"ce_loss_13": 3.3657590985298156,
"ce_loss_2": 4.505249190330505,
"ce_loss_3": 4.242624092102051,
"ce_loss_7": 3.6535696148872376,
"epoch": 0.423,
"grad_norm": 764.0,
"kl_loss_10": 198.62798614501952,
"kl_loss_2": 2358.790142822266,
"kl_loss_3": 1893.541082763672,
"kl_loss_7": 677.0058868408203,
"learning_rate": 0.0006286430606150459,
"loss": 1264.3267,
"step": 4230
},
{
"ce_loss_10": 3.6465710401535034,
"ce_loss_13": 3.563078057765961,
"ce_loss_2": 4.641510224342346,
"ce_loss_3": 4.377968907356262,
"ce_loss_7": 3.843237745761871,
"epoch": 0.424,
"grad_norm": 752.0,
"kl_loss_10": 199.9485771179199,
"kl_loss_2": 2240.6987426757814,
"kl_loss_3": 1778.9172729492188,
"kl_loss_7": 666.6387664794922,
"learning_rate": 0.0006271091670967436,
"loss": 1223.7141,
"step": 4240
},
{
"ce_loss_10": 3.570220148563385,
"ce_loss_13": 3.474740993976593,
"ce_loss_2": 4.616126585006714,
"ce_loss_3": 4.345292592048645,
"ce_loss_7": 3.7782423973083494,
"epoch": 0.425,
"grad_norm": 604.0,
"kl_loss_10": 206.9665100097656,
"kl_loss_2": 2359.6162658691405,
"kl_loss_3": 1878.0119995117188,
"kl_loss_7": 699.4417297363282,
"learning_rate": 0.0006255739935905395,
"loss": 1260.2877,
"step": 4250
},
{
"ce_loss_10": 3.6002479434013366,
"ce_loss_13": 3.5161559224128722,
"ce_loss_2": 4.592222595214844,
"ce_loss_3": 4.328677630424499,
"ce_loss_7": 3.790011668205261,
"epoch": 0.426,
"grad_norm": 688.0,
"kl_loss_10": 196.72698440551758,
"kl_loss_2": 2221.3829833984373,
"kl_loss_3": 1756.8525756835938,
"kl_loss_7": 652.2526733398438,
"learning_rate": 0.0006240375555556145,
"loss": 1261.0352,
"step": 4260
},
{
"ce_loss_10": 3.6026462078094483,
"ce_loss_13": 3.5168575167655947,
"ce_loss_2": 4.6545480489730835,
"ce_loss_3": 4.386264157295227,
"ce_loss_7": 3.8051093459129333,
"epoch": 0.427,
"grad_norm": 580.0,
"kl_loss_10": 197.9854705810547,
"kl_loss_2": 2316.6946411132812,
"kl_loss_3": 1838.7398193359375,
"kl_loss_7": 667.8762664794922,
"learning_rate": 0.000622499868463882,
"loss": 1243.157,
"step": 4270
},
{
"ce_loss_10": 3.574436700344086,
"ce_loss_13": 3.4921345114707947,
"ce_loss_2": 4.568159365653992,
"ce_loss_3": 4.298846364021301,
"ce_loss_7": 3.765162992477417,
"epoch": 0.428,
"grad_norm": 620.0,
"kl_loss_10": 194.46619186401367,
"kl_loss_2": 2240.061688232422,
"kl_loss_3": 1767.6112670898438,
"kl_loss_7": 647.0378051757813,
"learning_rate": 0.0006209609477998338,
"loss": 1226.4191,
"step": 4280
},
{
"ce_loss_10": 3.6271798372268678,
"ce_loss_13": 3.5434911131858824,
"ce_loss_2": 4.628253221511841,
"ce_loss_3": 4.367080307006836,
"ce_loss_7": 3.823750925064087,
"epoch": 0.429,
"grad_norm": 596.0,
"kl_loss_10": 199.08662948608398,
"kl_loss_2": 2248.870013427734,
"kl_loss_3": 1777.940362548828,
"kl_loss_7": 666.140267944336,
"learning_rate": 0.0006194208090603844,
"loss": 1245.6613,
"step": 4290
},
{
"ce_loss_10": 3.550025999546051,
"ce_loss_13": 3.4652276039123535,
"ce_loss_2": 4.55794665813446,
"ce_loss_3": 4.292889666557312,
"ce_loss_7": 3.7512326836586,
"epoch": 0.43,
"grad_norm": 696.0,
"kl_loss_10": 194.7114356994629,
"kl_loss_2": 2238.0842895507812,
"kl_loss_3": 1764.372296142578,
"kl_loss_7": 659.5233337402344,
"learning_rate": 0.0006178794677547138,
"loss": 1204.7698,
"step": 4300
},
{
"ce_loss_10": 3.5732582211494446,
"ce_loss_13": 3.4901079893112184,
"ce_loss_2": 4.593334412574768,
"ce_loss_3": 4.330301976203918,
"ce_loss_7": 3.7785701513290406,
"epoch": 0.431,
"grad_norm": 716.0,
"kl_loss_10": 204.0959487915039,
"kl_loss_2": 2270.5096435546875,
"kl_loss_3": 1810.6998352050782,
"kl_loss_7": 680.2683868408203,
"learning_rate": 0.0006163369394041111,
"loss": 1234.0865,
"step": 4310
},
{
"ce_loss_10": 3.522435462474823,
"ce_loss_13": 3.427918314933777,
"ce_loss_2": 4.549873030185699,
"ce_loss_3": 4.289526271820068,
"ce_loss_7": 3.7253190755844114,
"epoch": 0.432,
"grad_norm": 800.0,
"kl_loss_10": 208.77886199951172,
"kl_loss_2": 2301.5377990722654,
"kl_loss_3": 1836.5447998046875,
"kl_loss_7": 679.7338165283203,
"learning_rate": 0.0006147932395418205,
"loss": 1277.3873,
"step": 4320
},
{
"ce_loss_10": 3.5494457960128782,
"ce_loss_13": 3.4614178776741027,
"ce_loss_2": 4.544875764846802,
"ce_loss_3": 4.281032645702362,
"ce_loss_7": 3.7431655168533324,
"epoch": 0.433,
"grad_norm": 576.0,
"kl_loss_10": 207.28576126098633,
"kl_loss_2": 2223.6774475097654,
"kl_loss_3": 1762.4080688476563,
"kl_loss_7": 660.7338745117188,
"learning_rate": 0.0006132483837128823,
"loss": 1209.1447,
"step": 4330
},
{
"ce_loss_10": 3.5334264755249025,
"ce_loss_13": 3.4463666915893554,
"ce_loss_2": 4.564454817771912,
"ce_loss_3": 4.294960129261017,
"ce_loss_7": 3.73456689119339,
"epoch": 0.434,
"grad_norm": 772.0,
"kl_loss_10": 203.00026016235353,
"kl_loss_2": 2313.798968505859,
"kl_loss_3": 1837.2741760253907,
"kl_loss_7": 664.0175872802735,
"learning_rate": 0.0006117023874739772,
"loss": 1240.8283,
"step": 4340
},
{
"ce_loss_10": 3.5215348839759826,
"ce_loss_13": 3.4345417499542235,
"ce_loss_2": 4.554822826385498,
"ce_loss_3": 4.285668563842774,
"ce_loss_7": 3.7274380683898927,
"epoch": 0.435,
"grad_norm": 600.0,
"kl_loss_10": 199.90404205322267,
"kl_loss_2": 2303.0773803710936,
"kl_loss_3": 1827.208251953125,
"kl_loss_7": 672.8900817871094,
"learning_rate": 0.0006101552663932703,
"loss": 1260.7756,
"step": 4350
},
{
"ce_loss_10": 3.554371106624603,
"ce_loss_13": 3.4667278289794923,
"ce_loss_2": 4.5602539539337155,
"ce_loss_3": 4.297711455821991,
"ce_loss_7": 3.74671790599823,
"epoch": 0.436,
"grad_norm": 664.0,
"kl_loss_10": 201.51847763061522,
"kl_loss_2": 2254.3937927246093,
"kl_loss_3": 1790.253662109375,
"kl_loss_7": 662.9543731689453,
"learning_rate": 0.0006086070360502539,
"loss": 1241.8814,
"step": 4360
},
{
"ce_loss_10": 3.5543460965156557,
"ce_loss_13": 3.470974051952362,
"ce_loss_2": 4.571776509284973,
"ce_loss_3": 4.305854046344757,
"ce_loss_7": 3.7547166466712953,
"epoch": 0.437,
"grad_norm": 608.0,
"kl_loss_10": 196.51984329223632,
"kl_loss_2": 2276.0406494140625,
"kl_loss_3": 1801.6470031738281,
"kl_loss_7": 659.9133270263671,
"learning_rate": 0.0006070577120355903,
"loss": 1236.9521,
"step": 4370
},
{
"ce_loss_10": 3.5628577947616575,
"ce_loss_13": 3.47283878326416,
"ce_loss_2": 4.547589898109436,
"ce_loss_3": 4.279985129833221,
"ce_loss_7": 3.7624007940292357,
"epoch": 0.438,
"grad_norm": 700.0,
"kl_loss_10": 200.08970794677734,
"kl_loss_2": 2194.6991455078123,
"kl_loss_3": 1728.643865966797,
"kl_loss_7": 657.7827362060547,
"learning_rate": 0.0006055073099509549,
"loss": 1218.3828,
"step": 4380
},
{
"ce_loss_10": 3.6181469678878786,
"ce_loss_13": 3.531364715099335,
"ce_loss_2": 4.607781720161438,
"ce_loss_3": 4.3447977781295775,
"ce_loss_7": 3.8072004199028013,
"epoch": 0.439,
"grad_norm": 616.0,
"kl_loss_10": 200.97432174682618,
"kl_loss_2": 2223.8163146972656,
"kl_loss_3": 1756.1592224121093,
"kl_loss_7": 652.6859497070312,
"learning_rate": 0.0006039558454088796,
"loss": 1239.9502,
"step": 4390
},
{
"ce_loss_10": 3.598993420600891,
"ce_loss_13": 3.508393979072571,
"ce_loss_2": 4.611153769493103,
"ce_loss_3": 4.343827414512634,
"ce_loss_7": 3.798681151866913,
"epoch": 0.44,
"grad_norm": 636.0,
"kl_loss_10": 207.16089324951173,
"kl_loss_2": 2267.215954589844,
"kl_loss_3": 1798.3621337890625,
"kl_loss_7": 665.1716247558594,
"learning_rate": 0.0006024033340325954,
"loss": 1210.7668,
"step": 4400
},
{
"ce_loss_10": 3.6592474579811096,
"ce_loss_13": 3.575475811958313,
"ce_loss_2": 4.615442514419556,
"ce_loss_3": 4.356250524520874,
"ce_loss_7": 3.841563415527344,
"epoch": 0.441,
"grad_norm": 564.0,
"kl_loss_10": 192.91486740112305,
"kl_loss_2": 2138.478411865234,
"kl_loss_3": 1682.6779296875,
"kl_loss_7": 628.4862640380859,
"learning_rate": 0.0006008497914558743,
"loss": 1188.8043,
"step": 4410
},
{
"ce_loss_10": 3.603752911090851,
"ce_loss_13": 3.514535641670227,
"ce_loss_2": 4.619881939888001,
"ce_loss_3": 4.351648759841919,
"ce_loss_7": 3.8029965996742248,
"epoch": 0.442,
"grad_norm": 680.0,
"kl_loss_10": 203.31059799194335,
"kl_loss_2": 2279.580682373047,
"kl_loss_3": 1800.9931640625,
"kl_loss_7": 667.106298828125,
"learning_rate": 0.0005992952333228728,
"loss": 1234.8536,
"step": 4420
},
{
"ce_loss_10": 3.5360623002052307,
"ce_loss_13": 3.452274763584137,
"ce_loss_2": 4.555125761032104,
"ce_loss_3": 4.292830312252045,
"ce_loss_7": 3.7339815139770507,
"epoch": 0.443,
"grad_norm": 660.0,
"kl_loss_10": 193.53399200439452,
"kl_loss_2": 2284.7425048828127,
"kl_loss_3": 1820.065606689453,
"kl_loss_7": 662.5294036865234,
"learning_rate": 0.0005977396752879741,
"loss": 1233.2003,
"step": 4430
},
{
"ce_loss_10": 3.4606794357299804,
"ce_loss_13": 3.377534472942352,
"ce_loss_2": 4.48543610572815,
"ce_loss_3": 4.220411324501038,
"ce_loss_7": 3.6664485812187193,
"epoch": 0.444,
"grad_norm": 580.0,
"kl_loss_10": 191.26479797363282,
"kl_loss_2": 2280.07265625,
"kl_loss_3": 1810.239013671875,
"kl_loss_7": 656.8585327148437,
"learning_rate": 0.0005961831330156305,
"loss": 1222.7674,
"step": 4440
},
{
"ce_loss_10": 3.603837263584137,
"ce_loss_13": 3.5208237767219543,
"ce_loss_2": 4.638635230064392,
"ce_loss_3": 4.366818988323212,
"ce_loss_7": 3.8011784672737123,
"epoch": 0.445,
"grad_norm": 652.0,
"kl_loss_10": 193.8144203186035,
"kl_loss_2": 2316.0056640625,
"kl_loss_3": 1833.1619812011718,
"kl_loss_7": 659.0817749023438,
"learning_rate": 0.0005946256221802051,
"loss": 1263.1171,
"step": 4450
},
{
"ce_loss_10": 3.5832170486450194,
"ce_loss_13": 3.5048667788505554,
"ce_loss_2": 4.5584005355834964,
"ce_loss_3": 4.296856260299682,
"ce_loss_7": 3.7672229290008543,
"epoch": 0.446,
"grad_norm": 700.0,
"kl_loss_10": 189.50232849121093,
"kl_loss_2": 2181.445458984375,
"kl_loss_3": 1725.645037841797,
"kl_loss_7": 639.0539123535157,
"learning_rate": 0.0005930671584658151,
"loss": 1259.6497,
"step": 4460
},
{
"ce_loss_10": 3.5820990085601805,
"ce_loss_13": 3.4986414194107054,
"ce_loss_2": 4.585966444015503,
"ce_loss_3": 4.327683901786804,
"ce_loss_7": 3.778271293640137,
"epoch": 0.447,
"grad_norm": 624.0,
"kl_loss_10": 192.16329650878907,
"kl_loss_2": 2241.6711364746093,
"kl_loss_3": 1786.2928161621094,
"kl_loss_7": 656.8107452392578,
"learning_rate": 0.0005915077575661722,
"loss": 1237.7033,
"step": 4470
},
{
"ce_loss_10": 3.601723861694336,
"ce_loss_13": 3.5175135850906374,
"ce_loss_2": 4.623058772087097,
"ce_loss_3": 4.352630817890168,
"ce_loss_7": 3.801686096191406,
"epoch": 0.448,
"grad_norm": 576.0,
"kl_loss_10": 197.77509002685548,
"kl_loss_2": 2287.8472229003905,
"kl_loss_3": 1814.064471435547,
"kl_loss_7": 669.7168884277344,
"learning_rate": 0.000589947435184427,
"loss": 1221.476,
"step": 4480
},
{
"ce_loss_10": 3.667625939846039,
"ce_loss_13": 3.5879098773002625,
"ce_loss_2": 4.623752212524414,
"ce_loss_3": 4.3639614343643185,
"ce_loss_7": 3.854480040073395,
"epoch": 0.449,
"grad_norm": 676.0,
"kl_loss_10": 191.73966369628906,
"kl_loss_2": 2169.5962097167967,
"kl_loss_3": 1711.8840759277343,
"kl_loss_7": 644.2419403076171,
"learning_rate": 0.0005883862070330078,
"loss": 1205.0104,
"step": 4490
},
{
"ce_loss_10": 3.5975982904434205,
"ce_loss_13": 3.5138633131980894,
"ce_loss_2": 4.596203637123108,
"ce_loss_3": 4.342746245861053,
"ce_loss_7": 3.7984990000724794,
"epoch": 0.45,
"grad_norm": 680.0,
"kl_loss_10": 192.790771484375,
"kl_loss_2": 2245.34130859375,
"kl_loss_3": 1787.1853515625,
"kl_loss_7": 655.1883148193359,
"learning_rate": 0.0005868240888334653,
"loss": 1211.5924,
"step": 4500
},
{
"ce_loss_10": 3.484956693649292,
"ce_loss_13": 3.3994694352149963,
"ce_loss_2": 4.541581082344055,
"ce_loss_3": 4.265275609493256,
"ce_loss_7": 3.685898816585541,
"epoch": 0.451,
"grad_norm": 664.0,
"kl_loss_10": 197.9249183654785,
"kl_loss_2": 2329.778839111328,
"kl_loss_3": 1847.1219604492187,
"kl_loss_7": 669.9408386230468,
"learning_rate": 0.0005852610963163119,
"loss": 1246.2838,
"step": 4510
},
{
"ce_loss_10": 3.506740427017212,
"ce_loss_13": 3.425083673000336,
"ce_loss_2": 4.510421705245972,
"ce_loss_3": 4.246485769748688,
"ce_loss_7": 3.6969696044921876,
"epoch": 0.452,
"grad_norm": 600.0,
"kl_loss_10": 188.6581832885742,
"kl_loss_2": 2238.5858459472656,
"kl_loss_3": 1770.7893127441407,
"kl_loss_7": 646.1408782958985,
"learning_rate": 0.0005836972452208654,
"loss": 1201.6553,
"step": 4520
},
{
"ce_loss_10": 3.505844843387604,
"ce_loss_13": 3.4249507427215575,
"ce_loss_2": 4.529585886001587,
"ce_loss_3": 4.277110803127289,
"ce_loss_7": 3.708256196975708,
"epoch": 0.453,
"grad_norm": 668.0,
"kl_loss_10": 193.22399291992187,
"kl_loss_2": 2265.8468383789063,
"kl_loss_3": 1815.4268432617187,
"kl_loss_7": 656.5519592285157,
"learning_rate": 0.0005821325512950885,
"loss": 1236.8736,
"step": 4530
},
{
"ce_loss_10": 3.5389772057533264,
"ce_loss_13": 3.4585880279541015,
"ce_loss_2": 4.540098547935486,
"ce_loss_3": 4.2798211693763735,
"ce_loss_7": 3.7288518071174623,
"epoch": 0.454,
"grad_norm": 592.0,
"kl_loss_10": 187.7821243286133,
"kl_loss_2": 2205.198779296875,
"kl_loss_3": 1748.45341796875,
"kl_loss_7": 639.7683776855469,
"learning_rate": 0.0005805670302954321,
"loss": 1221.9566,
"step": 4540
},
{
"ce_loss_10": 3.544492793083191,
"ce_loss_13": 3.4652194142341615,
"ce_loss_2": 4.541022229194641,
"ce_loss_3": 4.279520082473755,
"ce_loss_7": 3.7328044533729554,
"epoch": 0.455,
"grad_norm": 656.0,
"kl_loss_10": 186.06844177246094,
"kl_loss_2": 2226.980224609375,
"kl_loss_3": 1765.562744140625,
"kl_loss_7": 639.6060729980469,
"learning_rate": 0.000579000697986675,
"loss": 1199.4846,
"step": 4550
},
{
"ce_loss_10": 3.5037956118583677,
"ce_loss_13": 3.4134857773780825,
"ce_loss_2": 4.544147634506226,
"ce_loss_3": 4.274082601070404,
"ce_loss_7": 3.707910752296448,
"epoch": 0.456,
"grad_norm": 664.0,
"kl_loss_10": 200.43186416625977,
"kl_loss_2": 2315.5464111328124,
"kl_loss_3": 1832.4145568847657,
"kl_loss_7": 672.0404296875,
"learning_rate": 0.0005774335701417662,
"loss": 1229.2445,
"step": 4560
},
{
"ce_loss_10": 3.4942433714866636,
"ce_loss_13": 3.4095874786376954,
"ce_loss_2": 4.549353170394897,
"ce_loss_3": 4.279058015346527,
"ce_loss_7": 3.693026268482208,
"epoch": 0.457,
"grad_norm": 608.0,
"kl_loss_10": 192.1516143798828,
"kl_loss_2": 2342.186248779297,
"kl_loss_3": 1864.820654296875,
"kl_loss_7": 655.2123260498047,
"learning_rate": 0.0005758656625416658,
"loss": 1241.1571,
"step": 4570
},
{
"ce_loss_10": 3.5480048656463623,
"ce_loss_13": 3.4622543811798097,
"ce_loss_2": 4.561884355545044,
"ce_loss_3": 4.293217277526855,
"ce_loss_7": 3.743514323234558,
"epoch": 0.458,
"grad_norm": 616.0,
"kl_loss_10": 194.93896102905273,
"kl_loss_2": 2260.48984375,
"kl_loss_3": 1786.8557556152343,
"kl_loss_7": 654.7685607910156,
"learning_rate": 0.0005742969909751859,
"loss": 1199.7715,
"step": 4580
},
{
"ce_loss_10": 3.558157193660736,
"ce_loss_13": 3.4740110039711,
"ce_loss_2": 4.582901740074158,
"ce_loss_3": 4.310809695720673,
"ce_loss_7": 3.7480292677879334,
"epoch": 0.459,
"grad_norm": 636.0,
"kl_loss_10": 193.16277923583985,
"kl_loss_2": 2285.9891052246094,
"kl_loss_3": 1800.92705078125,
"kl_loss_7": 648.6197357177734,
"learning_rate": 0.0005727275712388318,
"loss": 1238.3732,
"step": 4590
},
{
"ce_loss_10": 3.5862102270126344,
"ce_loss_13": 3.509055662155151,
"ce_loss_2": 4.560896277427673,
"ce_loss_3": 4.298801875114441,
"ce_loss_7": 3.773897314071655,
"epoch": 0.46,
"grad_norm": 768.0,
"kl_loss_10": 186.60687026977538,
"kl_loss_2": 2190.591516113281,
"kl_loss_3": 1728.75283203125,
"kl_loss_7": 633.5937683105469,
"learning_rate": 0.0005711574191366427,
"loss": 1204.0141,
"step": 4600
},
{
"ce_loss_10": 3.537917101383209,
"ce_loss_13": 3.456929898262024,
"ce_loss_2": 4.532997250556946,
"ce_loss_3": 4.271801400184631,
"ce_loss_7": 3.7239314556121825,
"epoch": 0.461,
"grad_norm": 544.0,
"kl_loss_10": 188.38971405029298,
"kl_loss_2": 2244.5726928710938,
"kl_loss_3": 1779.3487548828125,
"kl_loss_7": 643.0867309570312,
"learning_rate": 0.0005695865504800327,
"loss": 1208.6229,
"step": 4610
},
{
"ce_loss_10": 3.475814175605774,
"ce_loss_13": 3.3895989418029786,
"ce_loss_2": 4.570864033699036,
"ce_loss_3": 4.298530387878418,
"ce_loss_7": 3.6918977737426757,
"epoch": 0.462,
"grad_norm": 688.0,
"kl_loss_10": 199.44021301269532,
"kl_loss_2": 2396.831396484375,
"kl_loss_3": 1919.1037109375,
"kl_loss_7": 685.7258270263671,
"learning_rate": 0.0005680149810876322,
"loss": 1259.1618,
"step": 4620
},
{
"ce_loss_10": 3.5307737231254577,
"ce_loss_13": 3.448805606365204,
"ce_loss_2": 4.553147649765014,
"ce_loss_3": 4.283793473243714,
"ce_loss_7": 3.720176661014557,
"epoch": 0.463,
"grad_norm": 632.0,
"kl_loss_10": 191.36487274169923,
"kl_loss_2": 2267.567822265625,
"kl_loss_3": 1802.5030578613282,
"kl_loss_7": 648.5958099365234,
"learning_rate": 0.0005664427267851271,
"loss": 1217.3594,
"step": 4630
},
{
"ce_loss_10": 3.4447478532791136,
"ce_loss_13": 3.362277901172638,
"ce_loss_2": 4.474937617778778,
"ce_loss_3": 4.203511357307434,
"ce_loss_7": 3.640981078147888,
"epoch": 0.464,
"grad_norm": 616.0,
"kl_loss_10": 189.61345367431642,
"kl_loss_2": 2284.305810546875,
"kl_loss_3": 1801.5720520019531,
"kl_loss_7": 647.2827972412109,
"learning_rate": 0.0005648698034051009,
"loss": 1216.2738,
"step": 4640
},
{
"ce_loss_10": 3.5612680554389953,
"ce_loss_13": 3.479226899147034,
"ce_loss_2": 4.606190347671509,
"ce_loss_3": 4.343288254737854,
"ce_loss_7": 3.7559192776679993,
"epoch": 0.465,
"grad_norm": 680.0,
"kl_loss_10": 189.31488800048828,
"kl_loss_2": 2300.2595642089846,
"kl_loss_3": 1835.63125,
"kl_loss_7": 647.0011413574218,
"learning_rate": 0.0005632962267868747,
"loss": 1204.3232,
"step": 4650
},
{
"ce_loss_10": 3.504312825202942,
"ce_loss_13": 3.4246782064437866,
"ce_loss_2": 4.501714015007019,
"ce_loss_3": 4.243509244918823,
"ce_loss_7": 3.6963974952697756,
"epoch": 0.466,
"grad_norm": 656.0,
"kl_loss_10": 184.82376022338866,
"kl_loss_2": 2221.081378173828,
"kl_loss_3": 1770.11845703125,
"kl_loss_7": 636.5809020996094,
"learning_rate": 0.0005617220127763474,
"loss": 1219.1382,
"step": 4660
},
{
"ce_loss_10": 3.578074049949646,
"ce_loss_13": 3.497161865234375,
"ce_loss_2": 4.561417579650879,
"ce_loss_3": 4.303230881690979,
"ce_loss_7": 3.7666924834251403,
"epoch": 0.467,
"grad_norm": 592.0,
"kl_loss_10": 188.17724151611327,
"kl_loss_2": 2198.6551513671875,
"kl_loss_3": 1739.3878234863282,
"kl_loss_7": 638.6716613769531,
"learning_rate": 0.0005601471772258368,
"loss": 1209.8152,
"step": 4670
},
{
"ce_loss_10": 3.5602858781814577,
"ce_loss_13": 3.4812931418418884,
"ce_loss_2": 4.544128322601319,
"ce_loss_3": 4.283940744400025,
"ce_loss_7": 3.750748324394226,
"epoch": 0.468,
"grad_norm": 684.0,
"kl_loss_10": 186.29373779296876,
"kl_loss_2": 2186.011083984375,
"kl_loss_3": 1724.9905029296874,
"kl_loss_7": 634.5341583251953,
"learning_rate": 0.0005585717359939192,
"loss": 1216.8666,
"step": 4680
},
{
"ce_loss_10": 3.47387490272522,
"ce_loss_13": 3.3916377425193787,
"ce_loss_2": 4.47896523475647,
"ce_loss_3": 4.213813447952271,
"ce_loss_7": 3.6638592004776003,
"epoch": 0.469,
"grad_norm": 736.0,
"kl_loss_10": 187.3494743347168,
"kl_loss_2": 2222.502734375,
"kl_loss_3": 1755.6538635253905,
"kl_loss_7": 638.4273468017578,
"learning_rate": 0.0005569957049452703,
"loss": 1235.6265,
"step": 4690
},
{
"ce_loss_10": 3.530002760887146,
"ce_loss_13": 3.4474871516227723,
"ce_loss_2": 4.558400893211365,
"ce_loss_3": 4.2877805709838865,
"ce_loss_7": 3.7245721340179445,
"epoch": 0.47,
"grad_norm": 704.0,
"kl_loss_10": 192.37612838745116,
"kl_loss_2": 2285.8403198242186,
"kl_loss_3": 1808.7154968261718,
"kl_loss_7": 653.8845581054687,
"learning_rate": 0.0005554190999505056,
"loss": 1234.8666,
"step": 4700
},
{
"ce_loss_10": 3.655286133289337,
"ce_loss_13": 3.5717312812805178,
"ce_loss_2": 4.666804194450378,
"ce_loss_3": 4.405120444297791,
"ce_loss_7": 3.852936863899231,
"epoch": 0.471,
"grad_norm": 612.0,
"kl_loss_10": 194.36407165527345,
"kl_loss_2": 2267.82900390625,
"kl_loss_3": 1798.198681640625,
"kl_loss_7": 661.5685516357422,
"learning_rate": 0.0005538419368860196,
"loss": 1183.023,
"step": 4710
},
{
"ce_loss_10": 3.5788578033447265,
"ce_loss_13": 3.498483991622925,
"ce_loss_2": 4.574405527114868,
"ce_loss_3": 4.313237249851227,
"ce_loss_7": 3.768782043457031,
"epoch": 0.472,
"grad_norm": 600.0,
"kl_loss_10": 190.92964248657228,
"kl_loss_2": 2231.130651855469,
"kl_loss_3": 1765.2990478515626,
"kl_loss_7": 643.7991912841796,
"learning_rate": 0.0005522642316338268,
"loss": 1233.693,
"step": 4720
},
{
"ce_loss_10": 3.581640887260437,
"ce_loss_13": 3.5026119351387024,
"ce_loss_2": 4.585021948814392,
"ce_loss_3": 4.325532901287079,
"ce_loss_7": 3.7722782731056212,
"epoch": 0.473,
"grad_norm": 608.0,
"kl_loss_10": 190.94201431274413,
"kl_loss_2": 2235.0365600585938,
"kl_loss_3": 1776.091015625,
"kl_loss_7": 644.9752258300781,
"learning_rate": 0.0005506860000814017,
"loss": 1245.2729,
"step": 4730
},
{
"ce_loss_10": 3.609380042552948,
"ce_loss_13": 3.5285964608192444,
"ce_loss_2": 4.5732040166854855,
"ce_loss_3": 4.316051697731018,
"ce_loss_7": 3.7950204849243163,
"epoch": 0.474,
"grad_norm": 624.0,
"kl_loss_10": 185.59933853149414,
"kl_loss_2": 2152.8808044433595,
"kl_loss_3": 1698.98193359375,
"kl_loss_7": 630.6379913330078,
"learning_rate": 0.0005491072581215186,
"loss": 1197.5367,
"step": 4740
},
{
"ce_loss_10": 3.6150610566139223,
"ce_loss_13": 3.5275300979614257,
"ce_loss_2": 4.606984066963196,
"ce_loss_3": 4.331383717060089,
"ce_loss_7": 3.8067922830581664,
"epoch": 0.475,
"grad_norm": 636.0,
"kl_loss_10": 196.42518692016603,
"kl_loss_2": 2246.8279663085937,
"kl_loss_3": 1758.9309448242188,
"kl_loss_7": 653.222998046875,
"learning_rate": 0.0005475280216520913,
"loss": 1187.7061,
"step": 4750
},
{
"ce_loss_10": 3.5246535778045653,
"ce_loss_13": 3.4453783988952638,
"ce_loss_2": 4.515398740768433,
"ce_loss_3": 4.251828491687775,
"ce_loss_7": 3.7125940799713133,
"epoch": 0.476,
"grad_norm": 660.0,
"kl_loss_10": 186.9199966430664,
"kl_loss_2": 2199.839562988281,
"kl_loss_3": 1734.392041015625,
"kl_loss_7": 632.1179809570312,
"learning_rate": 0.0005459483065760138,
"loss": 1229.7142,
"step": 4760
},
{
"ce_loss_10": 3.4620707392692567,
"ce_loss_13": 3.379168164730072,
"ce_loss_2": 4.535378384590149,
"ce_loss_3": 4.269984316825867,
"ce_loss_7": 3.66820707321167,
"epoch": 0.477,
"grad_norm": 668.0,
"kl_loss_10": 189.84198379516602,
"kl_loss_2": 2346.7093200683594,
"kl_loss_3": 1881.5502380371095,
"kl_loss_7": 655.4429504394532,
"learning_rate": 0.0005443681288009991,
"loss": 1231.516,
"step": 4770
},
{
"ce_loss_10": 3.5201017260551453,
"ce_loss_13": 3.4394583463668824,
"ce_loss_2": 4.551519656181336,
"ce_loss_3": 4.275486898422241,
"ce_loss_7": 3.712023985385895,
"epoch": 0.478,
"grad_norm": 560.0,
"kl_loss_10": 188.47934265136718,
"kl_loss_2": 2298.261828613281,
"kl_loss_3": 1807.319403076172,
"kl_loss_7": 646.2581420898438,
"learning_rate": 0.0005427875042394199,
"loss": 1231.2074,
"step": 4780
},
{
"ce_loss_10": 3.5546525955200194,
"ce_loss_13": 3.4689871072769165,
"ce_loss_2": 4.55171308517456,
"ce_loss_3": 4.2830651044845585,
"ce_loss_7": 3.7494895100593566,
"epoch": 0.479,
"grad_norm": 568.0,
"kl_loss_10": 193.1684341430664,
"kl_loss_2": 2223.558709716797,
"kl_loss_3": 1744.958233642578,
"kl_loss_7": 652.0952423095703,
"learning_rate": 0.0005412064488081482,
"loss": 1232.2334,
"step": 4790
},
{
"ce_loss_10": 3.560468685626984,
"ce_loss_13": 3.4794244885444643,
"ce_loss_2": 4.549170875549317,
"ce_loss_3": 4.280433797836304,
"ce_loss_7": 3.744898808002472,
"epoch": 0.48,
"grad_norm": 548.0,
"kl_loss_10": 188.24676589965821,
"kl_loss_2": 2217.6575561523437,
"kl_loss_3": 1743.2784423828125,
"kl_loss_7": 636.4865295410157,
"learning_rate": 0.0005396249784283942,
"loss": 1197.0651,
"step": 4800
},
{
"ce_loss_10": 3.575687527656555,
"ce_loss_13": 3.4918730735778807,
"ce_loss_2": 4.614717435836792,
"ce_loss_3": 4.347899675369263,
"ce_loss_7": 3.7766286730766296,
"epoch": 0.481,
"grad_norm": 592.0,
"kl_loss_10": 195.0629508972168,
"kl_loss_2": 2307.5621826171873,
"kl_loss_3": 1836.4153686523437,
"kl_loss_7": 665.144580078125,
"learning_rate": 0.0005380431090255476,
"loss": 1235.3045,
"step": 4810
},
{
"ce_loss_10": 3.565406787395477,
"ce_loss_13": 3.487363612651825,
"ce_loss_2": 4.546458888053894,
"ce_loss_3": 4.2899955153465275,
"ce_loss_7": 3.747445857524872,
"epoch": 0.482,
"grad_norm": 608.0,
"kl_loss_10": 183.49071578979493,
"kl_loss_2": 2200.6481811523436,
"kl_loss_3": 1737.9393493652344,
"kl_loss_7": 622.3307556152344,
"learning_rate": 0.0005364608565290155,
"loss": 1189.2841,
"step": 4820
},
{
"ce_loss_10": 3.5748016953468325,
"ce_loss_13": 3.493953990936279,
"ce_loss_2": 4.58906729221344,
"ce_loss_3": 4.324404489994049,
"ce_loss_7": 3.7643205761909484,
"epoch": 0.483,
"grad_norm": 640.0,
"kl_loss_10": 190.96404800415038,
"kl_loss_2": 2251.1127075195313,
"kl_loss_3": 1785.181817626953,
"kl_loss_7": 641.9654663085937,
"learning_rate": 0.0005348782368720626,
"loss": 1217.6031,
"step": 4830
},
{
"ce_loss_10": 3.5082598328590393,
"ce_loss_13": 3.427862787246704,
"ce_loss_2": 4.508589172363282,
"ce_loss_3": 4.243466067314148,
"ce_loss_7": 3.6949036836624147,
"epoch": 0.484,
"grad_norm": 560.0,
"kl_loss_10": 186.74840545654297,
"kl_loss_2": 2224.133184814453,
"kl_loss_3": 1753.7203369140625,
"kl_loss_7": 630.4135833740235,
"learning_rate": 0.000533295265991652,
"loss": 1216.8205,
"step": 4840
},
{
"ce_loss_10": 3.5815645456314087,
"ce_loss_13": 3.4982495427131655,
"ce_loss_2": 4.554982018470764,
"ce_loss_3": 4.299691355228424,
"ce_loss_7": 3.7735238790512087,
"epoch": 0.485,
"grad_norm": 584.0,
"kl_loss_10": 187.77717666625978,
"kl_loss_2": 2175.573095703125,
"kl_loss_3": 1715.7588928222656,
"kl_loss_7": 631.838656616211,
"learning_rate": 0.0005317119598282822,
"loss": 1183.9046,
"step": 4850
},
{
"ce_loss_10": 3.586243951320648,
"ce_loss_13": 3.5034523725509645,
"ce_loss_2": 4.583103036880493,
"ce_loss_3": 4.312316799163819,
"ce_loss_7": 3.777419722080231,
"epoch": 0.486,
"grad_norm": 648.0,
"kl_loss_10": 189.01727676391602,
"kl_loss_2": 2203.9095703125,
"kl_loss_3": 1739.787420654297,
"kl_loss_7": 638.8051147460938,
"learning_rate": 0.0005301283343258293,
"loss": 1199.0793,
"step": 4860
},
{
"ce_loss_10": 3.644785749912262,
"ce_loss_13": 3.563555288314819,
"ce_loss_2": 4.610913848876953,
"ce_loss_3": 4.34997011423111,
"ce_loss_7": 3.832633006572723,
"epoch": 0.487,
"grad_norm": 648.0,
"kl_loss_10": 187.26018371582032,
"kl_loss_2": 2164.9867431640623,
"kl_loss_3": 1703.8299499511718,
"kl_loss_7": 629.719970703125,
"learning_rate": 0.000528544405431384,
"loss": 1174.2795,
"step": 4870
},
{
"ce_loss_10": 3.5308486342430117,
"ce_loss_13": 3.4465184569358827,
"ce_loss_2": 4.54223735332489,
"ce_loss_3": 4.275557327270508,
"ce_loss_7": 3.728735053539276,
"epoch": 0.488,
"grad_norm": 692.0,
"kl_loss_10": 194.1014518737793,
"kl_loss_2": 2267.4865783691407,
"kl_loss_3": 1794.8980224609375,
"kl_loss_7": 653.2649841308594,
"learning_rate": 0.000526960189095093,
"loss": 1222.7201,
"step": 4880
},
{
"ce_loss_10": 3.5016911029815674,
"ce_loss_13": 3.422479736804962,
"ce_loss_2": 4.5065477132797245,
"ce_loss_3": 4.244668066501617,
"ce_loss_7": 3.6958303570747377,
"epoch": 0.489,
"grad_norm": 624.0,
"kl_loss_10": 185.53594131469725,
"kl_loss_2": 2219.627575683594,
"kl_loss_3": 1760.8566650390626,
"kl_loss_7": 633.8929748535156,
"learning_rate": 0.0005253757012699972,
"loss": 1199.7284,
"step": 4890
},
{
"ce_loss_10": 3.592365336418152,
"ce_loss_13": 3.5133956909179687,
"ce_loss_2": 4.582755160331726,
"ce_loss_3": 4.310234916210175,
"ce_loss_7": 3.779422330856323,
"epoch": 0.49,
"grad_norm": 608.0,
"kl_loss_10": 188.63387451171874,
"kl_loss_2": 2196.80634765625,
"kl_loss_3": 1721.877880859375,
"kl_loss_7": 628.9239440917969,
"learning_rate": 0.0005237909579118712,
"loss": 1209.9893,
"step": 4900
},
{
"ce_loss_10": 3.5542251110076903,
"ce_loss_13": 3.470878171920776,
"ce_loss_2": 4.575217127799988,
"ce_loss_3": 4.311789894104004,
"ce_loss_7": 3.748956894874573,
"epoch": 0.491,
"grad_norm": 688.0,
"kl_loss_10": 192.452791595459,
"kl_loss_2": 2289.5204833984376,
"kl_loss_3": 1820.7321044921875,
"kl_loss_7": 654.6072784423828,
"learning_rate": 0.0005222059749790631,
"loss": 1232.3309,
"step": 4910
},
{
"ce_loss_10": 3.6172361254692076,
"ce_loss_13": 3.538671875,
"ce_loss_2": 4.572561645507813,
"ce_loss_3": 4.3095086216926575,
"ce_loss_7": 3.7990201711654663,
"epoch": 0.492,
"grad_norm": 580.0,
"kl_loss_10": 186.14958953857422,
"kl_loss_2": 2152.0723571777344,
"kl_loss_3": 1686.5931701660156,
"kl_loss_7": 627.8851165771484,
"learning_rate": 0.0005206207684323337,
"loss": 1161.1154,
"step": 4920
},
{
"ce_loss_10": 3.597834038734436,
"ce_loss_13": 3.5186328291893005,
"ce_loss_2": 4.576415348052978,
"ce_loss_3": 4.318524956703186,
"ce_loss_7": 3.7833918571472167,
"epoch": 0.493,
"grad_norm": 680.0,
"kl_loss_10": 190.28093643188475,
"kl_loss_2": 2205.189178466797,
"kl_loss_3": 1744.895166015625,
"kl_loss_7": 637.9018249511719,
"learning_rate": 0.000519035354234695,
"loss": 1221.5055,
"step": 4930
},
{
"ce_loss_10": 3.5777213335037232,
"ce_loss_13": 3.4926177620887757,
"ce_loss_2": 4.569476509094239,
"ce_loss_3": 4.300305211544037,
"ce_loss_7": 3.7719446539878847,
"epoch": 0.494,
"grad_norm": 652.0,
"kl_loss_10": 191.98795700073242,
"kl_loss_2": 2217.8314697265623,
"kl_loss_3": 1735.119366455078,
"kl_loss_7": 648.0345703125,
"learning_rate": 0.0005174497483512506,
"loss": 1188.0275,
"step": 4940
},
{
"ce_loss_10": 3.617672252655029,
"ce_loss_13": 3.5411610841751098,
"ce_loss_2": 4.595355463027954,
"ce_loss_3": 4.32707976102829,
"ce_loss_7": 3.8022171378135683,
"epoch": 0.495,
"grad_norm": 704.0,
"kl_loss_10": 185.97076797485352,
"kl_loss_2": 2193.0468505859376,
"kl_loss_3": 1726.8812622070313,
"kl_loss_7": 638.2306701660157,
"learning_rate": 0.0005158639667490339,
"loss": 1220.6553,
"step": 4950
},
{
"ce_loss_10": 3.5151694416999817,
"ce_loss_13": 3.4326966643333434,
"ce_loss_2": 4.5227725267410275,
"ce_loss_3": 4.255635476112365,
"ce_loss_7": 3.710303211212158,
"epoch": 0.496,
"grad_norm": 632.0,
"kl_loss_10": 189.1722068786621,
"kl_loss_2": 2227.908306884766,
"kl_loss_3": 1751.7201293945313,
"kl_loss_7": 643.16630859375,
"learning_rate": 0.0005142780253968481,
"loss": 1203.2568,
"step": 4960
},
{
"ce_loss_10": 3.4694177746772765,
"ce_loss_13": 3.3919921875,
"ce_loss_2": 4.455039215087891,
"ce_loss_3": 4.192563462257385,
"ce_loss_7": 3.6608409881591797,
"epoch": 0.497,
"grad_norm": 672.0,
"kl_loss_10": 182.45398559570313,
"kl_loss_2": 2196.9568115234374,
"kl_loss_3": 1734.4176452636718,
"kl_loss_7": 624.9595611572265,
"learning_rate": 0.0005126919402651053,
"loss": 1165.1617,
"step": 4970
},
{
"ce_loss_10": 3.5411869525909423,
"ce_loss_13": 3.4560129284858703,
"ce_loss_2": 4.551884508132934,
"ce_loss_3": 4.285728931427002,
"ce_loss_7": 3.730460357666016,
"epoch": 0.498,
"grad_norm": 612.0,
"kl_loss_10": 190.1128143310547,
"kl_loss_2": 2234.540148925781,
"kl_loss_3": 1768.0274841308594,
"kl_loss_7": 642.9938751220703,
"learning_rate": 0.0005111057273256647,
"loss": 1218.0719,
"step": 4980
},
{
"ce_loss_10": 3.640482187271118,
"ce_loss_13": 3.563759708404541,
"ce_loss_2": 4.559627389907837,
"ce_loss_3": 4.304729497432708,
"ce_loss_7": 3.809755790233612,
"epoch": 0.499,
"grad_norm": 600.0,
"kl_loss_10": 181.2877067565918,
"kl_loss_2": 2076.4399169921876,
"kl_loss_3": 1633.4311096191407,
"kl_loss_7": 606.3493682861329,
"learning_rate": 0.0005095194025516733,
"loss": 1149.4782,
"step": 4990
},
{
"ce_loss_10": 3.561459171772003,
"ce_loss_13": 3.485899102687836,
"ce_loss_2": 4.532869434356689,
"ce_loss_3": 4.273192000389099,
"ce_loss_7": 3.7427910447120665,
"epoch": 0.5,
"grad_norm": 612.0,
"kl_loss_10": 182.62270965576172,
"kl_loss_2": 2161.449591064453,
"kl_loss_3": 1706.75859375,
"kl_loss_7": 617.4605316162109,
"learning_rate": 0.000507932981917404,
"loss": 1217.3309,
"step": 5000
},
{
"ce_loss_10": 3.518963348865509,
"ce_loss_13": 3.4364115476608275,
"ce_loss_2": 4.566620469093323,
"ce_loss_3": 4.296507096290588,
"ce_loss_7": 3.7167017698287963,
"epoch": 0.501,
"grad_norm": 604.0,
"kl_loss_10": 191.43318862915038,
"kl_loss_2": 2312.919299316406,
"kl_loss_3": 1835.5362060546875,
"kl_loss_7": 654.5825164794921,
"learning_rate": 0.0005063464813980949,
"loss": 1243.5809,
"step": 5010
},
{
"ce_loss_10": 3.503278911113739,
"ce_loss_13": 3.423468828201294,
"ce_loss_2": 4.508650994300842,
"ce_loss_3": 4.244243478775024,
"ce_loss_7": 3.6842400670051574,
"epoch": 0.502,
"grad_norm": 616.0,
"kl_loss_10": 187.45429153442382,
"kl_loss_2": 2242.6967956542967,
"kl_loss_3": 1780.9238586425781,
"kl_loss_7": 636.0354858398438,
"learning_rate": 0.0005047599169697884,
"loss": 1195.7843,
"step": 5020
},
{
"ce_loss_10": 3.4397648930549622,
"ce_loss_13": 3.357620894908905,
"ce_loss_2": 4.463168692588806,
"ce_loss_3": 4.195060646533966,
"ce_loss_7": 3.633397877216339,
"epoch": 0.503,
"grad_norm": 604.0,
"kl_loss_10": 185.41551361083984,
"kl_loss_2": 2258.051135253906,
"kl_loss_3": 1778.3387390136718,
"kl_loss_7": 635.239013671875,
"learning_rate": 0.000503173304609171,
"loss": 1183.8663,
"step": 5030
},
{
"ce_loss_10": 3.5603776931762696,
"ce_loss_13": 3.4799546360969544,
"ce_loss_2": 4.5456082105636595,
"ce_loss_3": 4.285192847251892,
"ce_loss_7": 3.7480576753616335,
"epoch": 0.504,
"grad_norm": 656.0,
"kl_loss_10": 184.81720504760742,
"kl_loss_2": 2170.377197265625,
"kl_loss_3": 1713.3125,
"kl_loss_7": 627.2051513671875,
"learning_rate": 0.0005015866602934111,
"loss": 1173.4953,
"step": 5040
},
{
"ce_loss_10": 3.5348097562789915,
"ce_loss_13": 3.4481786727905273,
"ce_loss_2": 4.561786007881165,
"ce_loss_3": 4.291987287998199,
"ce_loss_7": 3.732908022403717,
"epoch": 0.505,
"grad_norm": 584.0,
"kl_loss_10": 195.19094161987306,
"kl_loss_2": 2283.529962158203,
"kl_loss_3": 1808.7241088867188,
"kl_loss_7": 661.38330078125,
"learning_rate": 0.0005,
"loss": 1216.3971,
"step": 5050
},
{
"ce_loss_10": 3.5199029207229615,
"ce_loss_13": 3.4400954723358153,
"ce_loss_2": 4.522394108772278,
"ce_loss_3": 4.258548331260681,
"ce_loss_7": 3.7067763924598696,
"epoch": 0.506,
"grad_norm": 632.0,
"kl_loss_10": 190.6364860534668,
"kl_loss_2": 2246.9065795898437,
"kl_loss_3": 1774.8018188476562,
"kl_loss_7": 642.158203125,
"learning_rate": 0.0004984133397065889,
"loss": 1187.0591,
"step": 5060
},
{
"ce_loss_10": 3.529603970050812,
"ce_loss_13": 3.448684501647949,
"ce_loss_2": 4.540892434120178,
"ce_loss_3": 4.281192362308502,
"ce_loss_7": 3.727533829212189,
"epoch": 0.507,
"grad_norm": 572.0,
"kl_loss_10": 189.2146110534668,
"kl_loss_2": 2238.786248779297,
"kl_loss_3": 1779.2802124023438,
"kl_loss_7": 641.3925506591797,
"learning_rate": 0.0004968266953908291,
"loss": 1190.1465,
"step": 5070
},
{
"ce_loss_10": 3.5666260600090025,
"ce_loss_13": 3.486749768257141,
"ce_loss_2": 4.580948376655579,
"ce_loss_3": 4.316030120849609,
"ce_loss_7": 3.7590123891830443,
"epoch": 0.508,
"grad_norm": 608.0,
"kl_loss_10": 183.170157623291,
"kl_loss_2": 2245.8196716308594,
"kl_loss_3": 1773.8548583984375,
"kl_loss_7": 630.8032928466797,
"learning_rate": 0.0004952400830302117,
"loss": 1205.3312,
"step": 5080
},
{
"ce_loss_10": 3.4943687319755554,
"ce_loss_13": 3.4131668329238893,
"ce_loss_2": 4.523447823524475,
"ce_loss_3": 4.255696547031403,
"ce_loss_7": 3.686564898490906,
"epoch": 0.509,
"grad_norm": 624.0,
"kl_loss_10": 190.01820449829103,
"kl_loss_2": 2279.890344238281,
"kl_loss_3": 1807.6574096679688,
"kl_loss_7": 647.3827026367187,
"learning_rate": 0.0004936535186019053,
"loss": 1207.5289,
"step": 5090
},
{
"ce_loss_10": 3.5966561436653137,
"ce_loss_13": 3.5205499291419984,
"ce_loss_2": 4.557806515693665,
"ce_loss_3": 4.297008419036866,
"ce_loss_7": 3.777693784236908,
"epoch": 0.51,
"grad_norm": 572.0,
"kl_loss_10": 181.29688186645507,
"kl_loss_2": 2148.9375854492187,
"kl_loss_3": 1687.6425170898438,
"kl_loss_7": 609.7850677490235,
"learning_rate": 0.000492067018082596,
"loss": 1180.1517,
"step": 5100
},
{
"ce_loss_10": 3.5341065168380736,
"ce_loss_13": 3.448958945274353,
"ce_loss_2": 4.584142446517944,
"ce_loss_3": 4.311909413337707,
"ce_loss_7": 3.7378315210342405,
"epoch": 0.511,
"grad_norm": 580.0,
"kl_loss_10": 191.75616531372071,
"kl_loss_2": 2313.8392028808594,
"kl_loss_3": 1838.1626281738281,
"kl_loss_7": 657.9944549560547,
"learning_rate": 0.0004904805974483267,
"loss": 1252.0359,
"step": 5110
},
{
"ce_loss_10": 3.6478444814682005,
"ce_loss_13": 3.5622426509857177,
"ce_loss_2": 4.652135348320007,
"ce_loss_3": 4.385519003868103,
"ce_loss_7": 3.8461916565895082,
"epoch": 0.512,
"grad_norm": 620.0,
"kl_loss_10": 196.4123405456543,
"kl_loss_2": 2261.5567626953125,
"kl_loss_3": 1794.3322509765626,
"kl_loss_7": 663.2056915283204,
"learning_rate": 0.0004888942726743353,
"loss": 1254.773,
"step": 5120
},
{
"ce_loss_10": 3.5161622405052184,
"ce_loss_13": 3.435041069984436,
"ce_loss_2": 4.527030563354492,
"ce_loss_3": 4.273452854156494,
"ce_loss_7": 3.7132395029067995,
"epoch": 0.513,
"grad_norm": 612.0,
"kl_loss_10": 189.22552337646485,
"kl_loss_2": 2261.9498779296873,
"kl_loss_3": 1801.7636291503907,
"kl_loss_7": 649.0409454345703,
"learning_rate": 0.0004873080597348947,
"loss": 1220.4549,
"step": 5130
},
{
"ce_loss_10": 3.4059476256370544,
"ce_loss_13": 3.325529730319977,
"ce_loss_2": 4.472175240516663,
"ce_loss_3": 4.212775444984436,
"ce_loss_7": 3.6100828886032104,
"epoch": 0.514,
"grad_norm": 576.0,
"kl_loss_10": 188.01781005859374,
"kl_loss_2": 2348.922229003906,
"kl_loss_3": 1884.4989135742187,
"kl_loss_7": 653.7215118408203,
"learning_rate": 0.0004857219746031519,
"loss": 1228.3554,
"step": 5140
},
{
"ce_loss_10": 3.5706036925315856,
"ce_loss_13": 3.4925912499427794,
"ce_loss_2": 4.564787793159485,
"ce_loss_3": 4.288926684856415,
"ce_loss_7": 3.7588186025619508,
"epoch": 0.515,
"grad_norm": 564.0,
"kl_loss_10": 187.27239913940429,
"kl_loss_2": 2197.20703125,
"kl_loss_3": 1721.3864501953126,
"kl_loss_7": 633.3875030517578,
"learning_rate": 0.0004841360332509663,
"loss": 1198.5317,
"step": 5150
},
{
"ce_loss_10": 3.5291930079460143,
"ce_loss_13": 3.451045203208923,
"ce_loss_2": 4.509535562992096,
"ce_loss_3": 4.244547712802887,
"ce_loss_7": 3.7146947622299193,
"epoch": 0.516,
"grad_norm": 640.0,
"kl_loss_10": 182.93116302490233,
"kl_loss_2": 2188.2712890625,
"kl_loss_3": 1720.3389709472656,
"kl_loss_7": 621.1425567626953,
"learning_rate": 0.0004825502516487497,
"loss": 1155.4164,
"step": 5160
},
{
"ce_loss_10": 3.494162142276764,
"ce_loss_13": 3.410848069190979,
"ce_loss_2": 4.509466361999512,
"ce_loss_3": 4.249596023559571,
"ce_loss_7": 3.689158225059509,
"epoch": 0.517,
"grad_norm": 776.0,
"kl_loss_10": 188.6508804321289,
"kl_loss_2": 2267.020611572266,
"kl_loss_3": 1803.9957458496094,
"kl_loss_7": 643.8313232421875,
"learning_rate": 0.00048096464576530507,
"loss": 1222.8347,
"step": 5170
},
{
"ce_loss_10": 3.5969889640808104,
"ce_loss_13": 3.5190072774887087,
"ce_loss_2": 4.547854423522949,
"ce_loss_3": 4.293277430534363,
"ce_loss_7": 3.7762330770492554,
"epoch": 0.518,
"grad_norm": 620.0,
"kl_loss_10": 184.8033348083496,
"kl_loss_2": 2134.698907470703,
"kl_loss_3": 1683.638848876953,
"kl_loss_7": 620.0321014404296,
"learning_rate": 0.00047937923156766646,
"loss": 1168.0455,
"step": 5180
},
{
"ce_loss_10": 3.6420543789863586,
"ce_loss_13": 3.5626509547233582,
"ce_loss_2": 4.591393780708313,
"ce_loss_3": 4.326744735240936,
"ce_loss_7": 3.8211991429328918,
"epoch": 0.519,
"grad_norm": 560.0,
"kl_loss_10": 186.71140975952147,
"kl_loss_2": 2131.37041015625,
"kl_loss_3": 1673.548291015625,
"kl_loss_7": 620.3797760009766,
"learning_rate": 0.00047779402502093696,
"loss": 1176.4619,
"step": 5190
},
{
"ce_loss_10": 3.6047547817230225,
"ce_loss_13": 3.5276923775672913,
"ce_loss_2": 4.578773355484008,
"ce_loss_3": 4.314329183101654,
"ce_loss_7": 3.7894126772880554,
"epoch": 0.52,
"grad_norm": 572.0,
"kl_loss_10": 184.64933090209962,
"kl_loss_2": 2171.194439697266,
"kl_loss_3": 1701.0345581054687,
"kl_loss_7": 621.1681274414062,
"learning_rate": 0.0004762090420881289,
"loss": 1192.2752,
"step": 5200
},
{
"ce_loss_10": 3.524991714954376,
"ce_loss_13": 3.449671447277069,
"ce_loss_2": 4.498011994361877,
"ce_loss_3": 4.23529599905014,
"ce_loss_7": 3.705312669277191,
"epoch": 0.521,
"grad_norm": 608.0,
"kl_loss_10": 186.35540390014648,
"kl_loss_2": 2183.97861328125,
"kl_loss_3": 1723.4856018066407,
"kl_loss_7": 620.2787628173828,
"learning_rate": 0.00047462429873000296,
"loss": 1166.6783,
"step": 5210
},
{
"ce_loss_10": 3.610927963256836,
"ce_loss_13": 3.5292730212211607,
"ce_loss_2": 4.586357808113098,
"ce_loss_3": 4.316179418563843,
"ce_loss_7": 3.7876657485961913,
"epoch": 0.522,
"grad_norm": 572.0,
"kl_loss_10": 187.58379135131835,
"kl_loss_2": 2205.0898559570314,
"kl_loss_3": 1728.4106079101562,
"kl_loss_7": 624.2286590576172,
"learning_rate": 0.0004730398109049071,
"loss": 1181.2787,
"step": 5220
},
{
"ce_loss_10": 3.542900788784027,
"ce_loss_13": 3.4592981576919555,
"ce_loss_2": 4.5604215383529665,
"ce_loss_3": 4.294589376449585,
"ce_loss_7": 3.7346125721931456,
"epoch": 0.523,
"grad_norm": 632.0,
"kl_loss_10": 192.22620544433593,
"kl_loss_2": 2275.4386779785154,
"kl_loss_3": 1804.1369689941407,
"kl_loss_7": 648.4010925292969,
"learning_rate": 0.000471455594568616,
"loss": 1206.5586,
"step": 5230
},
{
"ce_loss_10": 3.612694036960602,
"ce_loss_13": 3.5346154451370237,
"ce_loss_2": 4.571756148338318,
"ce_loss_3": 4.30354597568512,
"ce_loss_7": 3.7924134016036986,
"epoch": 0.524,
"grad_norm": 584.0,
"kl_loss_10": 184.57232360839845,
"kl_loss_2": 2148.5399963378904,
"kl_loss_3": 1679.7537841796875,
"kl_loss_7": 619.590966796875,
"learning_rate": 0.00046987166567417086,
"loss": 1185.6557,
"step": 5240
},
{
"ce_loss_10": 3.5288819313049316,
"ce_loss_13": 3.452391028404236,
"ce_loss_2": 4.524345111846924,
"ce_loss_3": 4.255153965950012,
"ce_loss_7": 3.7151949644088744,
"epoch": 0.525,
"grad_norm": 640.0,
"kl_loss_10": 184.01749114990236,
"kl_loss_2": 2198.7858947753907,
"kl_loss_3": 1730.872314453125,
"kl_loss_7": 629.3970092773437,
"learning_rate": 0.00046828804017171776,
"loss": 1156.5996,
"step": 5250
},
{
"ce_loss_10": 3.5754063725471497,
"ce_loss_13": 3.4882086515426636,
"ce_loss_2": 4.589629459381103,
"ce_loss_3": 4.328830146789551,
"ce_loss_7": 3.7704445004463194,
"epoch": 0.526,
"grad_norm": 640.0,
"kl_loss_10": 189.38601303100586,
"kl_loss_2": 2242.6748046875,
"kl_loss_3": 1771.9065246582031,
"kl_loss_7": 637.7771270751953,
"learning_rate": 0.00046670473400834805,
"loss": 1218.8605,
"step": 5260
},
{
"ce_loss_10": 3.5049550890922547,
"ce_loss_13": 3.428373408317566,
"ce_loss_2": 4.489202237129211,
"ce_loss_3": 4.228802132606506,
"ce_loss_7": 3.686991608142853,
"epoch": 0.527,
"grad_norm": 580.0,
"kl_loss_10": 181.47291641235353,
"kl_loss_2": 2184.507427978516,
"kl_loss_3": 1721.9762329101563,
"kl_loss_7": 614.4228942871093,
"learning_rate": 0.00046512176312793734,
"loss": 1216.9304,
"step": 5270
},
{
"ce_loss_10": 3.497020888328552,
"ce_loss_13": 3.415910315513611,
"ce_loss_2": 4.500096344947815,
"ce_loss_3": 4.221375334262848,
"ce_loss_7": 3.6874555468559267,
"epoch": 0.528,
"grad_norm": 608.0,
"kl_loss_10": 183.7262046813965,
"kl_loss_2": 2223.9841369628907,
"kl_loss_3": 1744.9040588378907,
"kl_loss_7": 628.2290283203125,
"learning_rate": 0.00046353914347098467,
"loss": 1206.4577,
"step": 5280
},
{
"ce_loss_10": 3.5970619559288024,
"ce_loss_13": 3.5186134576797485,
"ce_loss_2": 4.588784885406494,
"ce_loss_3": 4.328100037574768,
"ce_loss_7": 3.7806106090545653,
"epoch": 0.529,
"grad_norm": 608.0,
"kl_loss_10": 183.81845779418944,
"kl_loss_2": 2204.89072265625,
"kl_loss_3": 1738.6330078125,
"kl_loss_7": 622.7282592773438,
"learning_rate": 0.0004619568909744524,
"loss": 1214.3289,
"step": 5290
},
{
"ce_loss_10": 3.5965808272361754,
"ce_loss_13": 3.519477891921997,
"ce_loss_2": 4.575191998481751,
"ce_loss_3": 4.308115267753601,
"ce_loss_7": 3.779456090927124,
"epoch": 0.53,
"grad_norm": 624.0,
"kl_loss_10": 185.90534057617188,
"kl_loss_2": 2166.9622314453127,
"kl_loss_3": 1701.3540832519532,
"kl_loss_7": 623.1412811279297,
"learning_rate": 0.00046037502157160573,
"loss": 1194.0631,
"step": 5300
},
{
"ce_loss_10": 3.475346398353577,
"ce_loss_13": 3.3953770637512206,
"ce_loss_2": 4.472927665710449,
"ce_loss_3": 4.211131680011749,
"ce_loss_7": 3.672848129272461,
"epoch": 0.531,
"grad_norm": 608.0,
"kl_loss_10": 188.33962783813476,
"kl_loss_2": 2232.2967163085937,
"kl_loss_3": 1767.3839172363282,
"kl_loss_7": 649.8835662841797,
"learning_rate": 0.00045879355119185207,
"loss": 1212.3993,
"step": 5310
},
{
"ce_loss_10": 3.555951988697052,
"ce_loss_13": 3.474162495136261,
"ce_loss_2": 4.560364985466004,
"ce_loss_3": 4.293745231628418,
"ce_loss_7": 3.751049613952637,
"epoch": 0.532,
"grad_norm": 672.0,
"kl_loss_10": 190.96983184814454,
"kl_loss_2": 2257.3020629882812,
"kl_loss_3": 1780.9093383789063,
"kl_loss_7": 650.6417663574218,
"learning_rate": 0.0004572124957605803,
"loss": 1223.1152,
"step": 5320
},
{
"ce_loss_10": 3.5723905324935914,
"ce_loss_13": 3.492247462272644,
"ce_loss_2": 4.554775309562683,
"ce_loss_3": 4.289375352859497,
"ce_loss_7": 3.7621920228004457,
"epoch": 0.533,
"grad_norm": 584.0,
"kl_loss_10": 185.00704040527344,
"kl_loss_2": 2210.7857055664062,
"kl_loss_3": 1738.2788696289062,
"kl_loss_7": 631.7678100585938,
"learning_rate": 0.00045563187119900103,
"loss": 1171.3742,
"step": 5330
},
{
"ce_loss_10": 3.4156481266021728,
"ce_loss_13": 3.338289904594421,
"ce_loss_2": 4.459405374526978,
"ce_loss_3": 4.184291207790375,
"ce_loss_7": 3.612143576145172,
"epoch": 0.534,
"grad_norm": 668.0,
"kl_loss_10": 185.77383117675782,
"kl_loss_2": 2280.9642578125,
"kl_loss_3": 1801.5166015625,
"kl_loss_7": 637.4973205566406,
"learning_rate": 0.00045405169342398633,
"loss": 1214.5622,
"step": 5340
},
{
"ce_loss_10": 3.5048020482063293,
"ce_loss_13": 3.422155427932739,
"ce_loss_2": 4.527113747596741,
"ce_loss_3": 4.256860768795013,
"ce_loss_7": 3.6958253622055053,
"epoch": 0.535,
"grad_norm": 580.0,
"kl_loss_10": 188.63988189697267,
"kl_loss_2": 2252.499432373047,
"kl_loss_3": 1773.2778076171876,
"kl_loss_7": 633.0957061767579,
"learning_rate": 0.0004524719783479088,
"loss": 1187.9953,
"step": 5350
},
{
"ce_loss_10": 3.460780155658722,
"ce_loss_13": 3.378307545185089,
"ce_loss_2": 4.497902464866638,
"ce_loss_3": 4.2276026725769045,
"ce_loss_7": 3.6559959650039673,
"epoch": 0.536,
"grad_norm": 580.0,
"kl_loss_10": 189.0280532836914,
"kl_loss_2": 2293.6262939453127,
"kl_loss_3": 1820.3978698730468,
"kl_loss_7": 642.6393402099609,
"learning_rate": 0.00045089274187848144,
"loss": 1197.8392,
"step": 5360
},
{
"ce_loss_10": 3.5799126744270326,
"ce_loss_13": 3.501321530342102,
"ce_loss_2": 4.5603124618530275,
"ce_loss_3": 4.297963404655457,
"ce_loss_7": 3.7619481921195983,
"epoch": 0.537,
"grad_norm": 672.0,
"kl_loss_10": 183.09423599243163,
"kl_loss_2": 2192.1404357910155,
"kl_loss_3": 1730.206787109375,
"kl_loss_7": 620.7777648925781,
"learning_rate": 0.00044931399991859835,
"loss": 1181.3807,
"step": 5370
},
{
"ce_loss_10": 3.4432420253753664,
"ce_loss_13": 3.364873206615448,
"ce_loss_2": 4.452599573135376,
"ce_loss_3": 4.183995950222015,
"ce_loss_7": 3.6285991072654724,
"epoch": 0.538,
"grad_norm": 600.0,
"kl_loss_10": 182.95552597045898,
"kl_loss_2": 2236.559704589844,
"kl_loss_3": 1765.850408935547,
"kl_loss_7": 629.1190887451172,
"learning_rate": 0.00044773576836617336,
"loss": 1181.7396,
"step": 5380
},
{
"ce_loss_10": 3.537210750579834,
"ce_loss_13": 3.4561371922492983,
"ce_loss_2": 4.546432638168335,
"ce_loss_3": 4.281138265132904,
"ce_loss_7": 3.7339015364646913,
"epoch": 0.539,
"grad_norm": 612.0,
"kl_loss_10": 189.98071517944337,
"kl_loss_2": 2253.4164794921876,
"kl_loss_3": 1781.9518432617188,
"kl_loss_7": 650.4307464599609,
"learning_rate": 0.00044615806311398056,
"loss": 1232.9109,
"step": 5390
},
{
"ce_loss_10": 3.6113093972206114,
"ce_loss_13": 3.5354915499687194,
"ce_loss_2": 4.540320181846619,
"ce_loss_3": 4.277068996429444,
"ce_loss_7": 3.787187647819519,
"epoch": 0.54,
"grad_norm": 580.0,
"kl_loss_10": 181.3637908935547,
"kl_loss_2": 2094.3728942871094,
"kl_loss_3": 1633.9456298828125,
"kl_loss_7": 605.8723022460938,
"learning_rate": 0.00044458090004949454,
"loss": 1175.0439,
"step": 5400
},
{
"ce_loss_10": 3.47382390499115,
"ce_loss_13": 3.39083354473114,
"ce_loss_2": 4.532833766937256,
"ce_loss_3": 4.262583804130554,
"ce_loss_7": 3.6737227201461793,
"epoch": 0.541,
"grad_norm": 620.0,
"kl_loss_10": 194.1818962097168,
"kl_loss_2": 2371.5374450683594,
"kl_loss_3": 1877.3123657226563,
"kl_loss_7": 665.8367095947266,
"learning_rate": 0.0004430042950547297,
"loss": 1218.705,
"step": 5410
},
{
"ce_loss_10": 3.5697335839271545,
"ce_loss_13": 3.483165454864502,
"ce_loss_2": 4.578557109832763,
"ce_loss_3": 4.31482458114624,
"ce_loss_7": 3.763143301010132,
"epoch": 0.542,
"grad_norm": 572.0,
"kl_loss_10": 191.8735610961914,
"kl_loss_2": 2253.524365234375,
"kl_loss_3": 1779.8523681640625,
"kl_loss_7": 645.0971527099609,
"learning_rate": 0.0004414282640060809,
"loss": 1200.7552,
"step": 5420
},
{
"ce_loss_10": 3.656325376033783,
"ce_loss_13": 3.575901198387146,
"ce_loss_2": 4.611030888557434,
"ce_loss_3": 4.35529580116272,
"ce_loss_7": 3.8402703166007996,
"epoch": 0.543,
"grad_norm": 672.0,
"kl_loss_10": 186.09361267089844,
"kl_loss_2": 2127.538677978516,
"kl_loss_3": 1677.6395080566406,
"kl_loss_7": 622.3258697509766,
"learning_rate": 0.0004398528227741633,
"loss": 1179.4629,
"step": 5430
},
{
"ce_loss_10": 3.5199654936790465,
"ce_loss_13": 3.442525625228882,
"ce_loss_2": 4.519460201263428,
"ce_loss_3": 4.247548985481262,
"ce_loss_7": 3.7133419036865236,
"epoch": 0.544,
"grad_norm": 656.0,
"kl_loss_10": 186.4021957397461,
"kl_loss_2": 2206.209338378906,
"kl_loss_3": 1726.7237670898437,
"kl_loss_7": 636.2167572021484,
"learning_rate": 0.00043827798722365264,
"loss": 1202.1797,
"step": 5440
},
{
"ce_loss_10": 3.6471530318260195,
"ce_loss_13": 3.566143047809601,
"ce_loss_2": 4.5952486276626585,
"ce_loss_3": 4.333053851127625,
"ce_loss_7": 3.8201894760131836,
"epoch": 0.545,
"grad_norm": 592.0,
"kl_loss_10": 185.36949920654297,
"kl_loss_2": 2143.129284667969,
"kl_loss_3": 1675.2244567871094,
"kl_loss_7": 617.6786651611328,
"learning_rate": 0.00043670377321312535,
"loss": 1164.6765,
"step": 5450
},
{
"ce_loss_10": 3.6508351445198057,
"ce_loss_13": 3.574675273895264,
"ce_loss_2": 4.5991229772567745,
"ce_loss_3": 4.339683651924133,
"ce_loss_7": 3.8303612232208253,
"epoch": 0.546,
"grad_norm": 700.0,
"kl_loss_10": 183.1472366333008,
"kl_loss_2": 2130.7037048339844,
"kl_loss_3": 1667.0253173828125,
"kl_loss_7": 613.02021484375,
"learning_rate": 0.0004351301965948991,
"loss": 1168.8242,
"step": 5460
},
{
"ce_loss_10": 3.559572923183441,
"ce_loss_13": 3.478611421585083,
"ce_loss_2": 4.511995816230774,
"ce_loss_3": 4.249083304405213,
"ce_loss_7": 3.7354837536811827,
"epoch": 0.547,
"grad_norm": 636.0,
"kl_loss_10": 181.6176902770996,
"kl_loss_2": 2130.2896118164062,
"kl_loss_3": 1667.8863098144532,
"kl_loss_7": 614.7661987304688,
"learning_rate": 0.000433557273214873,
"loss": 1176.8127,
"step": 5470
},
{
"ce_loss_10": 3.545152747631073,
"ce_loss_13": 3.4662238121032716,
"ce_loss_2": 4.518579649925232,
"ce_loss_3": 4.245905971527099,
"ce_loss_7": 3.7270439863204956,
"epoch": 0.548,
"grad_norm": 608.0,
"kl_loss_10": 184.05833053588867,
"kl_loss_2": 2168.807977294922,
"kl_loss_3": 1696.2628234863282,
"kl_loss_7": 616.8240295410156,
"learning_rate": 0.000431985018912368,
"loss": 1150.4518,
"step": 5480
},
{
"ce_loss_10": 3.514492917060852,
"ce_loss_13": 3.4341874718666077,
"ce_loss_2": 4.534255909919739,
"ce_loss_3": 4.270003151893616,
"ce_loss_7": 3.7031027913093566,
"epoch": 0.549,
"grad_norm": 600.0,
"kl_loss_10": 189.14087448120117,
"kl_loss_2": 2268.4575805664062,
"kl_loss_3": 1800.3566040039063,
"kl_loss_7": 639.125277709961,
"learning_rate": 0.0004304134495199674,
"loss": 1178.9426,
"step": 5490
},
{
"ce_loss_10": 3.538786220550537,
"ce_loss_13": 3.4557671666145326,
"ce_loss_2": 4.5282275676727295,
"ce_loss_3": 4.265328872203827,
"ce_loss_7": 3.731334662437439,
"epoch": 0.55,
"grad_norm": 604.0,
"kl_loss_10": 188.5583984375,
"kl_loss_2": 2236.901904296875,
"kl_loss_3": 1761.930596923828,
"kl_loss_7": 644.1045196533203,
"learning_rate": 0.0004288425808633575,
"loss": 1185.0572,
"step": 5500
},
{
"ce_loss_10": 3.514096534252167,
"ce_loss_13": 3.435099017620087,
"ce_loss_2": 4.509266877174378,
"ce_loss_3": 4.252711880207062,
"ce_loss_7": 3.6944369435310365,
"epoch": 0.551,
"grad_norm": 664.0,
"kl_loss_10": 184.48614044189452,
"kl_loss_2": 2223.2010803222656,
"kl_loss_3": 1765.0783264160157,
"kl_loss_7": 630.5097595214844,
"learning_rate": 0.0004272724287611684,
"loss": 1201.5842,
"step": 5510
},
{
"ce_loss_10": 3.490022134780884,
"ce_loss_13": 3.4118714332580566,
"ce_loss_2": 4.514768314361572,
"ce_loss_3": 4.2408933401107785,
"ce_loss_7": 3.680497145652771,
"epoch": 0.552,
"grad_norm": 652.0,
"kl_loss_10": 185.8211784362793,
"kl_loss_2": 2267.8012084960938,
"kl_loss_3": 1790.0665649414063,
"kl_loss_7": 633.0888458251953,
"learning_rate": 0.00042570300902481425,
"loss": 1202.0281,
"step": 5520
},
{
"ce_loss_10": 3.523720991611481,
"ce_loss_13": 3.448110568523407,
"ce_loss_2": 4.499163627624512,
"ce_loss_3": 4.242252886295319,
"ce_loss_7": 3.704312777519226,
"epoch": 0.553,
"grad_norm": 608.0,
"kl_loss_10": 183.24146575927733,
"kl_loss_2": 2193.4769287109375,
"kl_loss_3": 1734.7890686035157,
"kl_loss_7": 623.5370147705078,
"learning_rate": 0.00042413433745833423,
"loss": 1179.776,
"step": 5530
},
{
"ce_loss_10": 3.5270172238349913,
"ce_loss_13": 3.448072147369385,
"ce_loss_2": 4.536388492584228,
"ce_loss_3": 4.260759913921357,
"ce_loss_7": 3.715148115158081,
"epoch": 0.554,
"grad_norm": 556.0,
"kl_loss_10": 183.68499755859375,
"kl_loss_2": 2226.1301025390626,
"kl_loss_3": 1743.2217163085938,
"kl_loss_7": 626.4557403564453,
"learning_rate": 0.0004225664298582339,
"loss": 1157.0496,
"step": 5540
},
{
"ce_loss_10": 3.6083423376083372,
"ce_loss_13": 3.530562436580658,
"ce_loss_2": 4.568499255180359,
"ce_loss_3": 4.307069134712219,
"ce_loss_7": 3.7862043499946596,
"epoch": 0.555,
"grad_norm": 548.0,
"kl_loss_10": 182.37268829345703,
"kl_loss_2": 2137.707696533203,
"kl_loss_3": 1673.7792663574219,
"kl_loss_7": 611.6491577148438,
"learning_rate": 0.000420999302013325,
"loss": 1149.7553,
"step": 5550
},
{
"ce_loss_10": 3.5049922823905946,
"ce_loss_13": 3.420680546760559,
"ce_loss_2": 4.553832268714904,
"ce_loss_3": 4.279029071331024,
"ce_loss_7": 3.700891983509064,
"epoch": 0.556,
"grad_norm": 572.0,
"kl_loss_10": 190.98652191162108,
"kl_loss_2": 2305.641845703125,
"kl_loss_3": 1822.5648254394532,
"kl_loss_7": 641.2451202392579,
"learning_rate": 0.000419432969704568,
"loss": 1204.391,
"step": 5560
},
{
"ce_loss_10": 3.548888063430786,
"ce_loss_13": 3.4704429507255554,
"ce_loss_2": 4.518404316902161,
"ce_loss_3": 4.257292962074279,
"ce_loss_7": 3.735389542579651,
"epoch": 0.557,
"grad_norm": 564.0,
"kl_loss_10": 182.6816421508789,
"kl_loss_2": 2144.968542480469,
"kl_loss_3": 1682.6951538085937,
"kl_loss_7": 617.6556762695312,
"learning_rate": 0.00041786744870491154,
"loss": 1202.9963,
"step": 5570
},
{
"ce_loss_10": 3.491339087486267,
"ce_loss_13": 3.412715029716492,
"ce_loss_2": 4.4881198644638065,
"ce_loss_3": 4.219742333889007,
"ce_loss_7": 3.679445171356201,
"epoch": 0.558,
"grad_norm": 576.0,
"kl_loss_10": 189.42200622558593,
"kl_loss_2": 2234.280969238281,
"kl_loss_3": 1757.53349609375,
"kl_loss_7": 641.0087585449219,
"learning_rate": 0.0004163027547791347,
"loss": 1192.3963,
"step": 5580
},
{
"ce_loss_10": 3.4689704895019533,
"ce_loss_13": 3.3872820258140566,
"ce_loss_2": 4.518157267570496,
"ce_loss_3": 4.244075846672058,
"ce_loss_7": 3.6619726419448853,
"epoch": 0.559,
"grad_norm": 688.0,
"kl_loss_10": 188.0017578125,
"kl_loss_2": 2320.9525756835938,
"kl_loss_3": 1834.1813659667969,
"kl_loss_7": 642.0279479980469,
"learning_rate": 0.0004147389036836881,
"loss": 1210.1521,
"step": 5590
},
{
"ce_loss_10": 3.5183377385139467,
"ce_loss_13": 3.4371410965919496,
"ce_loss_2": 4.522028660774231,
"ce_loss_3": 4.258878147602081,
"ce_loss_7": 3.706261694431305,
"epoch": 0.56,
"grad_norm": 652.0,
"kl_loss_10": 185.66660232543944,
"kl_loss_2": 2233.013397216797,
"kl_loss_3": 1764.0713806152344,
"kl_loss_7": 637.6866302490234,
"learning_rate": 0.00041317591116653486,
"loss": 1219.6441,
"step": 5600
},
{
"ce_loss_10": 3.558071720600128,
"ce_loss_13": 3.474745440483093,
"ce_loss_2": 4.558679819107056,
"ce_loss_3": 4.291901731491089,
"ce_loss_7": 3.746951687335968,
"epoch": 0.561,
"grad_norm": 592.0,
"kl_loss_10": 189.82635574340821,
"kl_loss_2": 2230.9510803222656,
"kl_loss_3": 1759.6529296875,
"kl_loss_7": 636.9456726074219,
"learning_rate": 0.0004116137929669921,
"loss": 1188.2356,
"step": 5610
},
{
"ce_loss_10": 3.544596457481384,
"ce_loss_13": 3.465434396266937,
"ce_loss_2": 4.526343536376953,
"ce_loss_3": 4.262159049510956,
"ce_loss_7": 3.7297433972358705,
"epoch": 0.562,
"grad_norm": 700.0,
"kl_loss_10": 184.16798706054686,
"kl_loss_2": 2204.5443481445313,
"kl_loss_3": 1738.5609375,
"kl_loss_7": 629.1714752197265,
"learning_rate": 0.00041005256481557305,
"loss": 1174.8596,
"step": 5620
},
{
"ce_loss_10": 3.6428149700164796,
"ce_loss_13": 3.568005383014679,
"ce_loss_2": 4.574557089805603,
"ce_loss_3": 4.320431900024414,
"ce_loss_7": 3.8154868602752687,
"epoch": 0.563,
"grad_norm": 580.0,
"kl_loss_10": 178.43261108398437,
"kl_loss_2": 2081.9929809570312,
"kl_loss_3": 1633.8047790527344,
"kl_loss_7": 600.1017929077149,
"learning_rate": 0.00040849224243382767,
"loss": 1150.8125,
"step": 5630
},
{
"ce_loss_10": 3.4989004015922545,
"ce_loss_13": 3.4218288540840147,
"ce_loss_2": 4.497757744789124,
"ce_loss_3": 4.228800570964813,
"ce_loss_7": 3.6881244659423826,
"epoch": 0.564,
"grad_norm": 576.0,
"kl_loss_10": 184.93341827392578,
"kl_loss_2": 2224.632287597656,
"kl_loss_3": 1749.1263427734375,
"kl_loss_7": 632.0666015625,
"learning_rate": 0.000406932841534185,
"loss": 1173.0332,
"step": 5640
},
{
"ce_loss_10": 3.453734540939331,
"ce_loss_13": 3.372727131843567,
"ce_loss_2": 4.460113084316253,
"ce_loss_3": 4.19973611831665,
"ce_loss_7": 3.6455657839775086,
"epoch": 0.565,
"grad_norm": 708.0,
"kl_loss_10": 186.30313568115236,
"kl_loss_2": 2260.893664550781,
"kl_loss_3": 1792.1846252441405,
"kl_loss_7": 638.3344879150391,
"learning_rate": 0.0004053743778197951,
"loss": 1219.3186,
"step": 5650
},
{
"ce_loss_10": 3.565755784511566,
"ce_loss_13": 3.481943702697754,
"ce_loss_2": 4.545414447784424,
"ce_loss_3": 4.281696927547455,
"ce_loss_7": 3.7513938307762147,
"epoch": 0.566,
"grad_norm": 584.0,
"kl_loss_10": 188.62994842529298,
"kl_loss_2": 2184.7360778808593,
"kl_loss_3": 1721.9289123535157,
"kl_loss_7": 628.1358184814453,
"learning_rate": 0.0004038168669843697,
"loss": 1209.3523,
"step": 5660
},
{
"ce_loss_10": 3.532804882526398,
"ce_loss_13": 3.4522215127944946,
"ce_loss_2": 4.494965553283691,
"ce_loss_3": 4.231216824054718,
"ce_loss_7": 3.7118934392929077,
"epoch": 0.567,
"grad_norm": 620.0,
"kl_loss_10": 183.03904342651367,
"kl_loss_2": 2154.956463623047,
"kl_loss_3": 1695.0998046875,
"kl_loss_7": 613.3763107299804,
"learning_rate": 0.000402260324712026,
"loss": 1195.8986,
"step": 5670
},
{
"ce_loss_10": 3.5749718070030214,
"ce_loss_13": 3.497403085231781,
"ce_loss_2": 4.588955020904541,
"ce_loss_3": 4.319999086856842,
"ce_loss_7": 3.7625349521636964,
"epoch": 0.568,
"grad_norm": 616.0,
"kl_loss_10": 184.26412506103514,
"kl_loss_2": 2236.5206665039063,
"kl_loss_3": 1760.365301513672,
"kl_loss_7": 624.1568267822265,
"learning_rate": 0.00040070476667712743,
"loss": 1174.4818,
"step": 5680
},
{
"ce_loss_10": 3.595443320274353,
"ce_loss_13": 3.5173869848251345,
"ce_loss_2": 4.573628330230713,
"ce_loss_3": 4.3121489644050595,
"ce_loss_7": 3.7780985593795777,
"epoch": 0.569,
"grad_norm": 540.0,
"kl_loss_10": 184.3900894165039,
"kl_loss_2": 2190.797717285156,
"kl_loss_3": 1726.8204223632813,
"kl_loss_7": 618.142544555664,
"learning_rate": 0.0003991502085441259,
"loss": 1191.0875,
"step": 5690
},
{
"ce_loss_10": 3.6352679252624513,
"ce_loss_13": 3.556475079059601,
"ce_loss_2": 4.568906188011169,
"ce_loss_3": 4.311613416671753,
"ce_loss_7": 3.8102620005607606,
"epoch": 0.57,
"grad_norm": 616.0,
"kl_loss_10": 180.942374420166,
"kl_loss_2": 2084.3558349609375,
"kl_loss_3": 1627.6179626464843,
"kl_loss_7": 599.5358856201171,
"learning_rate": 0.0003975966659674047,
"loss": 1160.7822,
"step": 5700
},
{
"ce_loss_10": 3.5962194561958314,
"ce_loss_13": 3.517608177661896,
"ce_loss_2": 4.578224086761475,
"ce_loss_3": 4.314012908935547,
"ce_loss_7": 3.7789862513542176,
"epoch": 0.571,
"grad_norm": 644.0,
"kl_loss_10": 182.5239112854004,
"kl_loss_2": 2180.907177734375,
"kl_loss_3": 1721.9898742675782,
"kl_loss_7": 614.957388305664,
"learning_rate": 0.0003960441545911204,
"loss": 1160.7484,
"step": 5710
},
{
"ce_loss_10": 3.5932918190956116,
"ce_loss_13": 3.5129475712776186,
"ce_loss_2": 4.558131432533264,
"ce_loss_3": 4.293534338474274,
"ce_loss_7": 3.7742814660072326,
"epoch": 0.572,
"grad_norm": 604.0,
"kl_loss_10": 183.15422897338868,
"kl_loss_2": 2156.431115722656,
"kl_loss_3": 1695.9377807617188,
"kl_loss_7": 619.908480834961,
"learning_rate": 0.0003944926900490452,
"loss": 1164.068,
"step": 5720
},
{
"ce_loss_10": 3.5127488017082213,
"ce_loss_13": 3.430432641506195,
"ce_loss_2": 4.5248651027679445,
"ce_loss_3": 4.258909869194031,
"ce_loss_7": 3.709870958328247,
"epoch": 0.573,
"grad_norm": 564.0,
"kl_loss_10": 186.0645439147949,
"kl_loss_2": 2235.3706176757814,
"kl_loss_3": 1765.8205688476562,
"kl_loss_7": 637.5899017333984,
"learning_rate": 0.0003929422879644099,
"loss": 1176.3957,
"step": 5730
},
{
"ce_loss_10": 3.510514330863953,
"ce_loss_13": 3.436869239807129,
"ce_loss_2": 4.478006148338318,
"ce_loss_3": 4.212211620807648,
"ce_loss_7": 3.6878655314445496,
"epoch": 0.574,
"grad_norm": 608.0,
"kl_loss_10": 179.26688079833986,
"kl_loss_2": 2168.3131591796873,
"kl_loss_3": 1699.4741943359375,
"kl_loss_7": 606.771630859375,
"learning_rate": 0.0003913929639497462,
"loss": 1141.8648,
"step": 5740
},
{
"ce_loss_10": 3.468266797065735,
"ce_loss_13": 3.3873007535934447,
"ce_loss_2": 4.490426182746887,
"ce_loss_3": 4.221552240848541,
"ce_loss_7": 3.6532665491104126,
"epoch": 0.575,
"grad_norm": 600.0,
"kl_loss_10": 182.11020889282227,
"kl_loss_2": 2259.020544433594,
"kl_loss_3": 1779.8084838867187,
"kl_loss_7": 622.9515014648438,
"learning_rate": 0.00038984473360672965,
"loss": 1169.1125,
"step": 5750
},
{
"ce_loss_10": 3.4774887681007387,
"ce_loss_13": 3.3949706315994264,
"ce_loss_2": 4.497473883628845,
"ce_loss_3": 4.2249194264411924,
"ce_loss_7": 3.664697051048279,
"epoch": 0.576,
"grad_norm": 596.0,
"kl_loss_10": 181.4011428833008,
"kl_loss_2": 2244.824786376953,
"kl_loss_3": 1764.2872131347656,
"kl_loss_7": 621.9651702880859,
"learning_rate": 0.0003882976125260229,
"loss": 1170.2874,
"step": 5760
},
{
"ce_loss_10": 3.5439630150794983,
"ce_loss_13": 3.4651756167411802,
"ce_loss_2": 4.539518880844116,
"ce_loss_3": 4.274804329872131,
"ce_loss_7": 3.728801262378693,
"epoch": 0.577,
"grad_norm": 592.0,
"kl_loss_10": 183.33100814819335,
"kl_loss_2": 2204.0270751953126,
"kl_loss_3": 1723.9171936035157,
"kl_loss_7": 615.5777862548828,
"learning_rate": 0.00038675161628711776,
"loss": 1179.8893,
"step": 5770
},
{
"ce_loss_10": 3.5816867470741274,
"ce_loss_13": 3.5046088337898254,
"ce_loss_2": 4.544821619987488,
"ce_loss_3": 4.285539746284485,
"ce_loss_7": 3.761784756183624,
"epoch": 0.578,
"grad_norm": 620.0,
"kl_loss_10": 181.6286849975586,
"kl_loss_2": 2136.3880615234375,
"kl_loss_3": 1677.4017333984375,
"kl_loss_7": 610.2154174804688,
"learning_rate": 0.0003852067604581794,
"loss": 1194.1891,
"step": 5780
},
{
"ce_loss_10": 3.523706150054932,
"ce_loss_13": 3.448537766933441,
"ce_loss_2": 4.533637523651123,
"ce_loss_3": 4.265281748771668,
"ce_loss_7": 3.709212040901184,
"epoch": 0.579,
"grad_norm": 676.0,
"kl_loss_10": 181.67257690429688,
"kl_loss_2": 2230.821612548828,
"kl_loss_3": 1755.830584716797,
"kl_loss_7": 620.3396881103515,
"learning_rate": 0.0003836630605958888,
"loss": 1177.6782,
"step": 5790
},
{
"ce_loss_10": 3.583223593235016,
"ce_loss_13": 3.503636026382446,
"ce_loss_2": 4.566303539276123,
"ce_loss_3": 4.3056800127029415,
"ce_loss_7": 3.76351158618927,
"epoch": 0.58,
"grad_norm": 708.0,
"kl_loss_10": 183.71082077026367,
"kl_loss_2": 2228.4331115722657,
"kl_loss_3": 1769.3681579589843,
"kl_loss_7": 628.4158660888672,
"learning_rate": 0.0003821205322452863,
"loss": 1235.8768,
"step": 5800
},
{
"ce_loss_10": 3.563581478595734,
"ce_loss_13": 3.488909196853638,
"ce_loss_2": 4.543065023422241,
"ce_loss_3": 4.286456656455994,
"ce_loss_7": 3.7441120743751526,
"epoch": 0.581,
"grad_norm": 584.0,
"kl_loss_10": 180.5809585571289,
"kl_loss_2": 2191.5135986328123,
"kl_loss_3": 1729.1048767089844,
"kl_loss_7": 608.2429626464843,
"learning_rate": 0.0003805791909396155,
"loss": 1179.2295,
"step": 5810
},
{
"ce_loss_10": 3.5160235166549683,
"ce_loss_13": 3.43984659910202,
"ce_loss_2": 4.500444793701172,
"ce_loss_3": 4.2373772144317625,
"ce_loss_7": 3.6964723467826843,
"epoch": 0.582,
"grad_norm": 652.0,
"kl_loss_10": 180.02818908691407,
"kl_loss_2": 2186.5078369140624,
"kl_loss_3": 1730.8345642089844,
"kl_loss_7": 613.5680450439453,
"learning_rate": 0.0003790390522001662,
"loss": 1191.4708,
"step": 5820
},
{
"ce_loss_10": 3.447020876407623,
"ce_loss_13": 3.3710612773895265,
"ce_loss_2": 4.448183393478393,
"ce_loss_3": 4.185709154605865,
"ce_loss_7": 3.6283095359802244,
"epoch": 0.583,
"grad_norm": 620.0,
"kl_loss_10": 180.24705505371094,
"kl_loss_2": 2242.3388488769533,
"kl_loss_3": 1776.8985290527344,
"kl_loss_7": 620.0051086425781,
"learning_rate": 0.0003775001315361183,
"loss": 1173.2469,
"step": 5830
},
{
"ce_loss_10": 3.560646951198578,
"ce_loss_13": 3.481656861305237,
"ce_loss_2": 4.561934852600098,
"ce_loss_3": 4.297064936161041,
"ce_loss_7": 3.746256446838379,
"epoch": 0.584,
"grad_norm": 560.0,
"kl_loss_10": 183.9656074523926,
"kl_loss_2": 2215.3773864746095,
"kl_loss_3": 1750.5085021972657,
"kl_loss_7": 621.9390472412109,
"learning_rate": 0.0003759624444443858,
"loss": 1186.5547,
"step": 5840
},
{
"ce_loss_10": 3.592632758617401,
"ce_loss_13": 3.520240008831024,
"ce_loss_2": 4.567729663848877,
"ce_loss_3": 4.300854158401489,
"ce_loss_7": 3.769944798946381,
"epoch": 0.585,
"grad_norm": 568.0,
"kl_loss_10": 180.2906066894531,
"kl_loss_2": 2170.1985412597655,
"kl_loss_3": 1706.62548828125,
"kl_loss_7": 608.6328552246093,
"learning_rate": 0.00037442600640946044,
"loss": 1155.9348,
"step": 5850
},
{
"ce_loss_10": 3.550674855709076,
"ce_loss_13": 3.475678253173828,
"ce_loss_2": 4.5188051700592045,
"ce_loss_3": 4.257573843002319,
"ce_loss_7": 3.733881187438965,
"epoch": 0.586,
"grad_norm": 624.0,
"kl_loss_10": 180.34449844360353,
"kl_loss_2": 2161.917333984375,
"kl_loss_3": 1700.5603820800782,
"kl_loss_7": 615.4381408691406,
"learning_rate": 0.00037289083290325663,
"loss": 1151.5385,
"step": 5860
},
{
"ce_loss_10": 3.5404091477394104,
"ce_loss_13": 3.4616484522819517,
"ce_loss_2": 4.5070148229599,
"ce_loss_3": 4.242105662822723,
"ce_loss_7": 3.7187010407447816,
"epoch": 0.587,
"grad_norm": 592.0,
"kl_loss_10": 183.17743911743165,
"kl_loss_2": 2149.7393432617187,
"kl_loss_3": 1683.0787292480468,
"kl_loss_7": 610.0913803100586,
"learning_rate": 0.0003713569393849543,
"loss": 1154.5703,
"step": 5870
},
{
"ce_loss_10": 3.5839020013809204,
"ce_loss_13": 3.5078009486198427,
"ce_loss_2": 4.56416871547699,
"ce_loss_3": 4.296731424331665,
"ce_loss_7": 3.767895996570587,
"epoch": 0.588,
"grad_norm": 592.0,
"kl_loss_10": 183.36542816162108,
"kl_loss_2": 2186.738494873047,
"kl_loss_3": 1717.5487915039062,
"kl_loss_7": 612.2841430664063,
"learning_rate": 0.00036982434130084397,
"loss": 1179.8928,
"step": 5880
},
{
"ce_loss_10": 3.4997439622879027,
"ce_loss_13": 3.4187664270401,
"ce_loss_2": 4.478350329399109,
"ce_loss_3": 4.210885548591614,
"ce_loss_7": 3.6801365852355956,
"epoch": 0.589,
"grad_norm": 664.0,
"kl_loss_10": 186.01408843994142,
"kl_loss_2": 2192.050701904297,
"kl_loss_3": 1713.8697509765625,
"kl_loss_7": 622.2605224609375,
"learning_rate": 0.00036829305408417166,
"loss": 1185.5467,
"step": 5890
},
{
"ce_loss_10": 3.4883674502372743,
"ce_loss_13": 3.4076414942741393,
"ce_loss_2": 4.51081612110138,
"ce_loss_3": 4.233860373497009,
"ce_loss_7": 3.68140949010849,
"epoch": 0.59,
"grad_norm": 632.0,
"kl_loss_10": 185.69306488037108,
"kl_loss_2": 2265.8583251953123,
"kl_loss_3": 1770.2322631835937,
"kl_loss_7": 633.7182983398437,
"learning_rate": 0.0003667630931549826,
"loss": 1189.5502,
"step": 5900
},
{
"ce_loss_10": 3.454320323467255,
"ce_loss_13": 3.376146912574768,
"ce_loss_2": 4.510071706771851,
"ce_loss_3": 4.2408855676651,
"ce_loss_7": 3.649706947803497,
"epoch": 0.591,
"grad_norm": 728.0,
"kl_loss_10": 185.1581298828125,
"kl_loss_2": 2343.439013671875,
"kl_loss_3": 1859.8356567382812,
"kl_loss_7": 639.2615692138672,
"learning_rate": 0.00036523447391996613,
"loss": 1217.3514,
"step": 5910
},
{
"ce_loss_10": 3.549302911758423,
"ce_loss_13": 3.4722840428352355,
"ce_loss_2": 4.514612603187561,
"ce_loss_3": 4.256480038166046,
"ce_loss_7": 3.727895641326904,
"epoch": 0.592,
"grad_norm": 580.0,
"kl_loss_10": 181.60699539184571,
"kl_loss_2": 2162.6364685058593,
"kl_loss_3": 1701.0076782226563,
"kl_loss_7": 610.4459930419922,
"learning_rate": 0.00036370721177230114,
"loss": 1162.5948,
"step": 5920
},
{
"ce_loss_10": 3.543530595302582,
"ce_loss_13": 3.4660569787025453,
"ce_loss_2": 4.543927192687988,
"ce_loss_3": 4.277280712127686,
"ce_loss_7": 3.728453516960144,
"epoch": 0.593,
"grad_norm": 628.0,
"kl_loss_10": 184.26243515014647,
"kl_loss_2": 2218.4042541503904,
"kl_loss_3": 1743.315625,
"kl_loss_7": 620.730111694336,
"learning_rate": 0.00036218132209150044,
"loss": 1186.6707,
"step": 5930
},
{
"ce_loss_10": 3.497697722911835,
"ce_loss_13": 3.4142557263374327,
"ce_loss_2": 4.5388647556304935,
"ce_loss_3": 4.264691114425659,
"ce_loss_7": 3.6943756103515626,
"epoch": 0.594,
"grad_norm": 524.0,
"kl_loss_10": 188.87873077392578,
"kl_loss_2": 2304.517468261719,
"kl_loss_3": 1814.6093872070312,
"kl_loss_7": 639.0129974365234,
"learning_rate": 0.0003606568202432562,
"loss": 1197.9809,
"step": 5940
},
{
"ce_loss_10": 3.565451109409332,
"ce_loss_13": 3.4856663823127745,
"ce_loss_2": 4.5841080904006954,
"ce_loss_3": 4.317169034481049,
"ce_loss_7": 3.754825806617737,
"epoch": 0.595,
"grad_norm": 696.0,
"kl_loss_10": 187.19320907592774,
"kl_loss_2": 2274.0406982421873,
"kl_loss_3": 1793.8463073730468,
"kl_loss_7": 630.620458984375,
"learning_rate": 0.0003591337215792851,
"loss": 1177.4938,
"step": 5950
},
{
"ce_loss_10": 3.611758494377136,
"ce_loss_13": 3.5361703038215637,
"ce_loss_2": 4.54854645729065,
"ce_loss_3": 4.2874367237091064,
"ce_loss_7": 3.781387460231781,
"epoch": 0.596,
"grad_norm": 536.0,
"kl_loss_10": 179.44385452270507,
"kl_loss_2": 2134.903210449219,
"kl_loss_3": 1672.3198852539062,
"kl_loss_7": 603.0327301025391,
"learning_rate": 0.00035761204143717383,
"loss": 1174.0895,
"step": 5960
},
{
"ce_loss_10": 3.564636397361755,
"ce_loss_13": 3.4857504963874817,
"ce_loss_2": 4.562372779846191,
"ce_loss_3": 4.294865238666534,
"ce_loss_7": 3.747916042804718,
"epoch": 0.597,
"grad_norm": 616.0,
"kl_loss_10": 181.63295822143556,
"kl_loss_2": 2217.5136901855467,
"kl_loss_3": 1751.903790283203,
"kl_loss_7": 618.9376495361328,
"learning_rate": 0.0003560917951402245,
"loss": 1215.2734,
"step": 5970
},
{
"ce_loss_10": 3.5358213543891908,
"ce_loss_13": 3.461250603199005,
"ce_loss_2": 4.515460109710693,
"ce_loss_3": 4.252109396457672,
"ce_loss_7": 3.720645487308502,
"epoch": 0.598,
"grad_norm": 616.0,
"kl_loss_10": 180.68030853271483,
"kl_loss_2": 2199.883331298828,
"kl_loss_3": 1727.857843017578,
"kl_loss_7": 614.7005615234375,
"learning_rate": 0.00035457299799730046,
"loss": 1174.0783,
"step": 5980
},
{
"ce_loss_10": 3.6016149520874023,
"ce_loss_13": 3.523995506763458,
"ce_loss_2": 4.564206576347351,
"ce_loss_3": 4.302748084068298,
"ce_loss_7": 3.7862717866897584,
"epoch": 0.599,
"grad_norm": 600.0,
"kl_loss_10": 181.36301651000977,
"kl_loss_2": 2153.0896545410155,
"kl_loss_3": 1694.4290161132812,
"kl_loss_7": 614.9286560058594,
"learning_rate": 0.0003530556653026721,
"loss": 1181.7495,
"step": 5990
},
{
"ce_loss_10": 3.5210883378982545,
"ce_loss_13": 3.4458776116371155,
"ce_loss_2": 4.520641088485718,
"ce_loss_3": 4.254351568222046,
"ce_loss_7": 3.699181377887726,
"epoch": 0.6,
"grad_norm": 764.0,
"kl_loss_10": 179.21529235839844,
"kl_loss_2": 2227.2805419921874,
"kl_loss_3": 1758.5499328613282,
"kl_loss_7": 610.0478576660156,
"learning_rate": 0.00035153981233586274,
"loss": 1193.8637,
"step": 6000
},
{
"ce_loss_10": 3.499428999423981,
"ce_loss_13": 3.422479748725891,
"ce_loss_2": 4.4867565631866455,
"ce_loss_3": 4.227682662010193,
"ce_loss_7": 3.6805691361427306,
"epoch": 0.601,
"grad_norm": 584.0,
"kl_loss_10": 179.26205139160157,
"kl_loss_2": 2193.6119079589844,
"kl_loss_3": 1731.6024475097656,
"kl_loss_7": 612.7285736083984,
"learning_rate": 0.00035002545436149473,
"loss": 1214.442,
"step": 6010
},
{
"ce_loss_10": 3.507369041442871,
"ce_loss_13": 3.427609443664551,
"ce_loss_2": 4.515847969055176,
"ce_loss_3": 4.248699688911438,
"ce_loss_7": 3.6938512086868287,
"epoch": 0.602,
"grad_norm": 592.0,
"kl_loss_10": 187.4394386291504,
"kl_loss_2": 2240.724530029297,
"kl_loss_3": 1766.6628112792969,
"kl_loss_7": 629.6498748779297,
"learning_rate": 0.0003485126066291364,
"loss": 1169.8236,
"step": 6020
},
{
"ce_loss_10": 3.5554185032844545,
"ce_loss_13": 3.4788596630096436,
"ce_loss_2": 4.540017461776733,
"ce_loss_3": 4.2838677883148195,
"ce_loss_7": 3.736110508441925,
"epoch": 0.603,
"grad_norm": 520.0,
"kl_loss_10": 179.3347900390625,
"kl_loss_2": 2192.1767639160157,
"kl_loss_3": 1731.594775390625,
"kl_loss_7": 613.0785980224609,
"learning_rate": 0.0003470012843731476,
"loss": 1185.9094,
"step": 6030
},
{
"ce_loss_10": 3.494213032722473,
"ce_loss_13": 3.41587815284729,
"ce_loss_2": 4.493516874313355,
"ce_loss_3": 4.230558323860168,
"ce_loss_7": 3.6748696088790895,
"epoch": 0.604,
"grad_norm": 604.0,
"kl_loss_10": 180.02317504882814,
"kl_loss_2": 2220.4429626464844,
"kl_loss_3": 1750.9172302246093,
"kl_loss_7": 613.3353332519531,
"learning_rate": 0.00034549150281252633,
"loss": 1207.7186,
"step": 6040
},
{
"ce_loss_10": 3.4735769987106324,
"ce_loss_13": 3.398567247390747,
"ce_loss_2": 4.450454211235046,
"ce_loss_3": 4.185557043552398,
"ce_loss_7": 3.660480535030365,
"epoch": 0.605,
"grad_norm": 608.0,
"kl_loss_10": 181.83876571655273,
"kl_loss_2": 2163.210076904297,
"kl_loss_3": 1694.9721801757812,
"kl_loss_7": 612.4143432617187,
"learning_rate": 0.0003439832771507565,
"loss": 1157.9707,
"step": 6050
},
{
"ce_loss_10": 3.4816818594932557,
"ce_loss_13": 3.4034390568733217,
"ce_loss_2": 4.478318929672241,
"ce_loss_3": 4.211991810798645,
"ce_loss_7": 3.6656970381736755,
"epoch": 0.606,
"grad_norm": 560.0,
"kl_loss_10": 181.10105361938477,
"kl_loss_2": 2226.4850891113283,
"kl_loss_3": 1757.37236328125,
"kl_loss_7": 619.9399398803711,
"learning_rate": 0.0003424766225756537,
"loss": 1172.4078,
"step": 6060
},
{
"ce_loss_10": 3.5375612139701844,
"ce_loss_13": 3.4606423020362853,
"ce_loss_2": 4.53115668296814,
"ce_loss_3": 4.261643159389496,
"ce_loss_7": 3.7194941639900208,
"epoch": 0.607,
"grad_norm": 600.0,
"kl_loss_10": 181.42390975952148,
"kl_loss_2": 2202.2797973632814,
"kl_loss_3": 1733.2598999023437,
"kl_loss_7": 615.9942810058594,
"learning_rate": 0.00034097155425921255,
"loss": 1158.2284,
"step": 6070
},
{
"ce_loss_10": 3.433805537223816,
"ce_loss_13": 3.354471778869629,
"ce_loss_2": 4.449812698364258,
"ce_loss_3": 4.179146933555603,
"ce_loss_7": 3.6204983830451964,
"epoch": 0.608,
"grad_norm": 592.0,
"kl_loss_10": 183.06991577148438,
"kl_loss_2": 2273.455847167969,
"kl_loss_3": 1787.318505859375,
"kl_loss_7": 624.6353576660156,
"learning_rate": 0.0003394680873574546,
"loss": 1187.3987,
"step": 6080
},
{
"ce_loss_10": 3.54138503074646,
"ce_loss_13": 3.4626068115234374,
"ce_loss_2": 4.556825470924378,
"ce_loss_3": 4.281811666488648,
"ce_loss_7": 3.7267327547073363,
"epoch": 0.609,
"grad_norm": 620.0,
"kl_loss_10": 183.72728881835937,
"kl_loss_2": 2232.6337280273438,
"kl_loss_3": 1752.9180786132813,
"kl_loss_7": 617.8084594726563,
"learning_rate": 0.0003379662370102747,
"loss": 1176.7848,
"step": 6090
},
{
"ce_loss_10": 3.5495489597320558,
"ce_loss_13": 3.4742938756942747,
"ce_loss_2": 4.515744471549988,
"ce_loss_3": 4.251201486587524,
"ce_loss_7": 3.726244103908539,
"epoch": 0.61,
"grad_norm": 640.0,
"kl_loss_10": 179.96657028198243,
"kl_loss_2": 2182.172985839844,
"kl_loss_3": 1717.0841491699218,
"kl_loss_7": 617.4093353271485,
"learning_rate": 0.0003364660183412892,
"loss": 1176.2052,
"step": 6100
},
{
"ce_loss_10": 3.5306557536125185,
"ce_loss_13": 3.4546700954437255,
"ce_loss_2": 4.500067496299744,
"ce_loss_3": 4.235128319263458,
"ce_loss_7": 3.7075342297554017,
"epoch": 0.611,
"grad_norm": 592.0,
"kl_loss_10": 182.79292755126954,
"kl_loss_2": 2182.2781616210937,
"kl_loss_3": 1714.5931213378906,
"kl_loss_7": 613.8878936767578,
"learning_rate": 0.0003349674464576834,
"loss": 1190.8153,
"step": 6110
},
{
"ce_loss_10": 3.477449345588684,
"ce_loss_13": 3.3995738983154298,
"ce_loss_2": 4.485787630081177,
"ce_loss_3": 4.219619536399842,
"ce_loss_7": 3.6623815417289736,
"epoch": 0.612,
"grad_norm": 628.0,
"kl_loss_10": 181.76175689697266,
"kl_loss_2": 2235.75986328125,
"kl_loss_3": 1763.9286254882813,
"kl_loss_7": 619.9945251464844,
"learning_rate": 0.00033347053645005966,
"loss": 1163.8981,
"step": 6120
},
{
"ce_loss_10": 3.5906055331230164,
"ce_loss_13": 3.514803075790405,
"ce_loss_2": 4.5458073854446415,
"ce_loss_3": 4.283458161354065,
"ce_loss_7": 3.772923803329468,
"epoch": 0.613,
"grad_norm": 644.0,
"kl_loss_10": 178.51968688964843,
"kl_loss_2": 2116.6735778808593,
"kl_loss_3": 1659.320733642578,
"kl_loss_7": 606.7959167480469,
"learning_rate": 0.00033197530339228485,
"loss": 1170.5501,
"step": 6130
},
{
"ce_loss_10": 3.5471089243888856,
"ce_loss_13": 3.468013954162598,
"ce_loss_2": 4.5254878282546995,
"ce_loss_3": 4.254842627048492,
"ce_loss_7": 3.73079137802124,
"epoch": 0.614,
"grad_norm": 532.0,
"kl_loss_10": 183.3593994140625,
"kl_loss_2": 2176.643206787109,
"kl_loss_3": 1701.0747802734375,
"kl_loss_7": 619.2012481689453,
"learning_rate": 0.00033048176234133967,
"loss": 1166.8168,
"step": 6140
},
{
"ce_loss_10": 3.5306158542633055,
"ce_loss_13": 3.453017568588257,
"ce_loss_2": 4.494013047218322,
"ce_loss_3": 4.233083915710449,
"ce_loss_7": 3.7115015268325804,
"epoch": 0.615,
"grad_norm": 592.0,
"kl_loss_10": 183.00715713500978,
"kl_loss_2": 2175.1028686523437,
"kl_loss_3": 1702.7456420898438,
"kl_loss_7": 619.7471405029297,
"learning_rate": 0.0003289899283371657,
"loss": 1181.7955,
"step": 6150
},
{
"ce_loss_10": 3.5544473528862,
"ce_loss_13": 3.4786699175834657,
"ce_loss_2": 4.547568416595459,
"ce_loss_3": 4.281970739364624,
"ce_loss_7": 3.7363924741744996,
"epoch": 0.616,
"grad_norm": 600.0,
"kl_loss_10": 178.17992782592773,
"kl_loss_2": 2185.039025878906,
"kl_loss_3": 1723.0535766601563,
"kl_loss_7": 600.4897644042969,
"learning_rate": 0.0003274998164025148,
"loss": 1196.8087,
"step": 6160
},
{
"ce_loss_10": 3.586019229888916,
"ce_loss_13": 3.509108769893646,
"ce_loss_2": 4.5615111827850345,
"ce_loss_3": 4.2898026466369625,
"ce_loss_7": 3.76910115480423,
"epoch": 0.617,
"grad_norm": 596.0,
"kl_loss_10": 183.4706718444824,
"kl_loss_2": 2168.4442443847656,
"kl_loss_3": 1695.5099731445312,
"kl_loss_7": 616.1861694335937,
"learning_rate": 0.0003260114415427975,
"loss": 1190.7359,
"step": 6170
},
{
"ce_loss_10": 3.5073242664337156,
"ce_loss_13": 3.4292925119400026,
"ce_loss_2": 4.523944449424744,
"ce_loss_3": 4.251231408119201,
"ce_loss_7": 3.6900092363357544,
"epoch": 0.618,
"grad_norm": 612.0,
"kl_loss_10": 180.3868850708008,
"kl_loss_2": 2258.1235778808596,
"kl_loss_3": 1773.0142578125,
"kl_loss_7": 615.9339263916015,
"learning_rate": 0.0003245248187459323,
"loss": 1218.0189,
"step": 6180
},
{
"ce_loss_10": 3.4972055196762084,
"ce_loss_13": 3.4217321276664734,
"ce_loss_2": 4.4563206195831295,
"ce_loss_3": 4.195159709453582,
"ce_loss_7": 3.6716169476509095,
"epoch": 0.619,
"grad_norm": 596.0,
"kl_loss_10": 176.01737060546876,
"kl_loss_2": 2149.769183349609,
"kl_loss_3": 1675.0436157226563,
"kl_loss_7": 597.3845794677734,
"learning_rate": 0.00032303996298219416,
"loss": 1151.9841,
"step": 6190
},
{
"ce_loss_10": 3.5777448058128356,
"ce_loss_13": 3.500323712825775,
"ce_loss_2": 4.53541202545166,
"ce_loss_3": 4.266927003860474,
"ce_loss_7": 3.755027210712433,
"epoch": 0.62,
"grad_norm": 540.0,
"kl_loss_10": 178.23485260009767,
"kl_loss_2": 2112.595593261719,
"kl_loss_3": 1646.6524230957032,
"kl_loss_7": 602.416943359375,
"learning_rate": 0.00032155688920406414,
"loss": 1145.6068,
"step": 6200
},
{
"ce_loss_10": 3.489628565311432,
"ce_loss_13": 3.408998668193817,
"ce_loss_2": 4.5190582275390625,
"ce_loss_3": 4.245685923099518,
"ce_loss_7": 3.671571230888367,
"epoch": 0.621,
"grad_norm": 652.0,
"kl_loss_10": 183.76829681396484,
"kl_loss_2": 2272.4242309570313,
"kl_loss_3": 1788.1328247070312,
"kl_loss_7": 627.5300720214843,
"learning_rate": 0.0003200756123460788,
"loss": 1224.8912,
"step": 6210
},
{
"ce_loss_10": 3.5219372153282165,
"ce_loss_13": 3.4430843591690063,
"ce_loss_2": 4.530118870735168,
"ce_loss_3": 4.26385805606842,
"ce_loss_7": 3.708560848236084,
"epoch": 0.622,
"grad_norm": 684.0,
"kl_loss_10": 185.2090690612793,
"kl_loss_2": 2254.1378173828125,
"kl_loss_3": 1774.3692199707032,
"kl_loss_7": 633.037890625,
"learning_rate": 0.00031859614732467957,
"loss": 1207.0312,
"step": 6220
},
{
"ce_loss_10": 3.5700612902641295,
"ce_loss_13": 3.4917181968688964,
"ce_loss_2": 4.540509462356567,
"ce_loss_3": 4.275044929981232,
"ce_loss_7": 3.7488471269607544,
"epoch": 0.623,
"grad_norm": 564.0,
"kl_loss_10": 178.5159034729004,
"kl_loss_2": 2155.8026611328123,
"kl_loss_3": 1685.940985107422,
"kl_loss_7": 600.1484497070312,
"learning_rate": 0.00031711850903806275,
"loss": 1157.7447,
"step": 6230
},
{
"ce_loss_10": 3.479930281639099,
"ce_loss_13": 3.39938303232193,
"ce_loss_2": 4.482881689071656,
"ce_loss_3": 4.214280414581299,
"ce_loss_7": 3.666577732563019,
"epoch": 0.624,
"grad_norm": 528.0,
"kl_loss_10": 185.9188034057617,
"kl_loss_2": 2243.2543823242186,
"kl_loss_3": 1758.6941833496094,
"kl_loss_7": 628.0701446533203,
"learning_rate": 0.0003156427123660297,
"loss": 1172.3383,
"step": 6240
},
{
"ce_loss_10": 3.5643810868263244,
"ce_loss_13": 3.4881609320640563,
"ce_loss_2": 4.518170762062073,
"ce_loss_3": 4.258860862255096,
"ce_loss_7": 3.745578372478485,
"epoch": 0.625,
"grad_norm": 596.0,
"kl_loss_10": 180.73046417236327,
"kl_loss_2": 2135.2883361816407,
"kl_loss_3": 1669.8626892089844,
"kl_loss_7": 610.9410751342773,
"learning_rate": 0.0003141687721698363,
"loss": 1172.6947,
"step": 6250
},
{
"ce_loss_10": 3.536016345024109,
"ce_loss_13": 3.4606189489364625,
"ce_loss_2": 4.476572108268738,
"ce_loss_3": 4.211447751522064,
"ce_loss_7": 3.7014155983924866,
"epoch": 0.626,
"grad_norm": 616.0,
"kl_loss_10": 175.8163749694824,
"kl_loss_2": 2105.446813964844,
"kl_loss_3": 1637.2767333984375,
"kl_loss_7": 587.5009735107421,
"learning_rate": 0.00031269670329204396,
"loss": 1155.6384,
"step": 6260
},
{
"ce_loss_10": 3.5712973356246946,
"ce_loss_13": 3.4947034239768984,
"ce_loss_2": 4.515408515930176,
"ce_loss_3": 4.251049220561981,
"ce_loss_7": 3.7454182147979735,
"epoch": 0.627,
"grad_norm": 644.0,
"kl_loss_10": 181.6370933532715,
"kl_loss_2": 2120.7030395507813,
"kl_loss_3": 1650.25341796875,
"kl_loss_7": 607.5851348876953,
"learning_rate": 0.00031122652055637015,
"loss": 1169.2292,
"step": 6270
},
{
"ce_loss_10": 3.536707639694214,
"ce_loss_13": 3.460920011997223,
"ce_loss_2": 4.534442710876465,
"ce_loss_3": 4.263644289970398,
"ce_loss_7": 3.7196394085884092,
"epoch": 0.628,
"grad_norm": 556.0,
"kl_loss_10": 181.97393569946288,
"kl_loss_2": 2233.067547607422,
"kl_loss_3": 1750.9132995605469,
"kl_loss_7": 618.4156631469726,
"learning_rate": 0.0003097582387675385,
"loss": 1169.3315,
"step": 6280
},
{
"ce_loss_10": 3.5805759191513062,
"ce_loss_13": 3.503600060939789,
"ce_loss_2": 4.546688604354858,
"ce_loss_3": 4.285043132305145,
"ce_loss_7": 3.7596506476402283,
"epoch": 0.629,
"grad_norm": 536.0,
"kl_loss_10": 181.50545425415038,
"kl_loss_2": 2176.2076171875,
"kl_loss_3": 1706.1532836914062,
"kl_loss_7": 611.5946624755859,
"learning_rate": 0.00030829187271113034,
"loss": 1162.2808,
"step": 6290
},
{
"ce_loss_10": 3.5692893385887148,
"ce_loss_13": 3.49398432970047,
"ce_loss_2": 4.5324320793151855,
"ce_loss_3": 4.271732580661774,
"ce_loss_7": 3.738038659095764,
"epoch": 0.63,
"grad_norm": 660.0,
"kl_loss_10": 176.80067443847656,
"kl_loss_2": 2142.2837646484377,
"kl_loss_3": 1672.4158203125,
"kl_loss_7": 598.4024932861328,
"learning_rate": 0.00030682743715343565,
"loss": 1178.4112,
"step": 6300
},
{
"ce_loss_10": 3.5165117979049683,
"ce_loss_13": 3.4367071866989134,
"ce_loss_2": 4.5106003999710085,
"ce_loss_3": 4.248035335540772,
"ce_loss_7": 3.709369492530823,
"epoch": 0.631,
"grad_norm": 624.0,
"kl_loss_10": 185.85676803588868,
"kl_loss_2": 2187.9409912109377,
"kl_loss_3": 1716.0930541992188,
"kl_loss_7": 624.2622802734375,
"learning_rate": 0.0003053649468413043,
"loss": 1194.6155,
"step": 6310
},
{
"ce_loss_10": 3.6293103814125063,
"ce_loss_13": 3.5522167325019836,
"ce_loss_2": 4.589023590087891,
"ce_loss_3": 4.323796653747559,
"ce_loss_7": 3.8087464213371276,
"epoch": 0.632,
"grad_norm": 664.0,
"kl_loss_10": 183.21706161499023,
"kl_loss_2": 2147.7636474609376,
"kl_loss_3": 1686.63291015625,
"kl_loss_7": 615.6221435546875,
"learning_rate": 0.00030390441650199725,
"loss": 1158.5613,
"step": 6320
},
{
"ce_loss_10": 3.528099310398102,
"ce_loss_13": 3.4539591908454894,
"ce_loss_2": 4.50098488330841,
"ce_loss_3": 4.2318372368812565,
"ce_loss_7": 3.70650874376297,
"epoch": 0.633,
"grad_norm": 676.0,
"kl_loss_10": 181.68777465820312,
"kl_loss_2": 2164.3626098632812,
"kl_loss_3": 1687.4978088378907,
"kl_loss_7": 610.1166748046875,
"learning_rate": 0.00030244586084303903,
"loss": 1154.3,
"step": 6330
},
{
"ce_loss_10": 3.4934327363967896,
"ce_loss_13": 3.416351318359375,
"ce_loss_2": 4.505590105056763,
"ce_loss_3": 4.235206222534179,
"ce_loss_7": 3.6859657049179075,
"epoch": 0.634,
"grad_norm": 564.0,
"kl_loss_10": 183.83423309326173,
"kl_loss_2": 2252.3034423828126,
"kl_loss_3": 1765.4126586914062,
"kl_loss_7": 627.5603424072266,
"learning_rate": 0.00030098929455206903,
"loss": 1173.0053,
"step": 6340
},
{
"ce_loss_10": 3.5009153842926026,
"ce_loss_13": 3.4256786108016968,
"ce_loss_2": 4.492053604125976,
"ce_loss_3": 4.224474251270294,
"ce_loss_7": 3.6754886388778685,
"epoch": 0.635,
"grad_norm": 592.0,
"kl_loss_10": 180.03737106323243,
"kl_loss_2": 2236.1428771972655,
"kl_loss_3": 1754.843768310547,
"kl_loss_7": 615.7969268798828,
"learning_rate": 0.00029953473229669324,
"loss": 1215.3177,
"step": 6350
},
{
"ce_loss_10": 3.5320404410362243,
"ce_loss_13": 3.4564929485321043,
"ce_loss_2": 4.505661821365356,
"ce_loss_3": 4.2487224817276,
"ce_loss_7": 3.717836594581604,
"epoch": 0.636,
"grad_norm": 560.0,
"kl_loss_10": 180.38322067260742,
"kl_loss_2": 2164.6011291503905,
"kl_loss_3": 1703.929705810547,
"kl_loss_7": 616.6229248046875,
"learning_rate": 0.00029808218872433767,
"loss": 1152.0346,
"step": 6360
},
{
"ce_loss_10": 3.5955461502075194,
"ce_loss_13": 3.521399176120758,
"ce_loss_2": 4.553752660751343,
"ce_loss_3": 4.287336015701294,
"ce_loss_7": 3.7661701798439027,
"epoch": 0.637,
"grad_norm": 584.0,
"kl_loss_10": 178.29160919189454,
"kl_loss_2": 2154.0076416015627,
"kl_loss_3": 1683.874932861328,
"kl_loss_7": 604.1974517822266,
"learning_rate": 0.0002966316784621,
"loss": 1148.5613,
"step": 6370
},
{
"ce_loss_10": 3.509734773635864,
"ce_loss_13": 3.4283226490020753,
"ce_loss_2": 4.500501930713654,
"ce_loss_3": 4.237296044826508,
"ce_loss_7": 3.697099339962006,
"epoch": 0.638,
"grad_norm": 572.0,
"kl_loss_10": 183.85193252563477,
"kl_loss_2": 2219.202685546875,
"kl_loss_3": 1744.0482971191407,
"kl_loss_7": 628.3488616943359,
"learning_rate": 0.0002951832161166024,
"loss": 1161.3599,
"step": 6380
},
{
"ce_loss_10": 3.5833853006362917,
"ce_loss_13": 3.5059871673583984,
"ce_loss_2": 4.560363245010376,
"ce_loss_3": 4.295464622974396,
"ce_loss_7": 3.7690476536750794,
"epoch": 0.639,
"grad_norm": 524.0,
"kl_loss_10": 182.52049560546874,
"kl_loss_2": 2159.0023681640623,
"kl_loss_3": 1692.8950073242188,
"kl_loss_7": 613.0403747558594,
"learning_rate": 0.0002937368162738445,
"loss": 1138.2498,
"step": 6390
},
{
"ce_loss_10": 3.5200544476509092,
"ce_loss_13": 3.4506627917289734,
"ce_loss_2": 4.487471246719361,
"ce_loss_3": 4.225354993343354,
"ce_loss_7": 3.695161283016205,
"epoch": 0.64,
"grad_norm": 648.0,
"kl_loss_10": 174.7782325744629,
"kl_loss_2": 2168.6176025390623,
"kl_loss_3": 1700.3829406738282,
"kl_loss_7": 598.6395080566406,
"learning_rate": 0.0002922924934990568,
"loss": 1174.9205,
"step": 6400
},
{
"ce_loss_10": 3.460334539413452,
"ce_loss_13": 3.3851306796073914,
"ce_loss_2": 4.486385345458984,
"ce_loss_3": 4.209315371513367,
"ce_loss_7": 3.646379458904266,
"epoch": 0.641,
"grad_norm": 592.0,
"kl_loss_10": 181.4815986633301,
"kl_loss_2": 2269.7528442382813,
"kl_loss_3": 1780.3827819824219,
"kl_loss_7": 623.2667114257813,
"learning_rate": 0.0002908502623365536,
"loss": 1180.7166,
"step": 6410
},
{
"ce_loss_10": 3.400831735134125,
"ce_loss_13": 3.323111522197723,
"ce_loss_2": 4.43465530872345,
"ce_loss_3": 4.168019390106201,
"ce_loss_7": 3.5887860655784607,
"epoch": 0.642,
"grad_norm": 584.0,
"kl_loss_10": 180.2845359802246,
"kl_loss_2": 2285.9919677734374,
"kl_loss_3": 1807.420263671875,
"kl_loss_7": 623.3097045898437,
"learning_rate": 0.0002894101373095867,
"loss": 1196.7524,
"step": 6420
},
{
"ce_loss_10": 3.610305404663086,
"ce_loss_13": 3.5335601687431337,
"ce_loss_2": 4.569022560119629,
"ce_loss_3": 4.3065975427627565,
"ce_loss_7": 3.788155424594879,
"epoch": 0.643,
"grad_norm": 656.0,
"kl_loss_10": 185.8272720336914,
"kl_loss_2": 2151.861962890625,
"kl_loss_3": 1684.6893188476563,
"kl_loss_7": 614.9325622558594,
"learning_rate": 0.00028797213292019926,
"loss": 1162.4543,
"step": 6430
},
{
"ce_loss_10": 3.5838815212249755,
"ce_loss_13": 3.5060059309005736,
"ce_loss_2": 4.542932081222534,
"ce_loss_3": 4.284253716468811,
"ce_loss_7": 3.7631338119506834,
"epoch": 0.644,
"grad_norm": 536.0,
"kl_loss_10": 182.34116134643554,
"kl_loss_2": 2161.139373779297,
"kl_loss_3": 1697.281298828125,
"kl_loss_7": 612.268832397461,
"learning_rate": 0.0002865362636490791,
"loss": 1187.0314,
"step": 6440
},
{
"ce_loss_10": 3.598045587539673,
"ce_loss_13": 3.524975371360779,
"ce_loss_2": 4.552241158485413,
"ce_loss_3": 4.294051146507263,
"ce_loss_7": 3.7727458000183107,
"epoch": 0.645,
"grad_norm": 536.0,
"kl_loss_10": 178.94673080444335,
"kl_loss_2": 2142.5753845214845,
"kl_loss_3": 1685.976092529297,
"kl_loss_7": 604.8534698486328,
"learning_rate": 0.0002851025439554142,
"loss": 1148.6578,
"step": 6450
},
{
"ce_loss_10": 3.5864033341407775,
"ce_loss_13": 3.5102365136146547,
"ce_loss_2": 4.530459260940551,
"ce_loss_3": 4.2697702050209045,
"ce_loss_7": 3.77256600856781,
"epoch": 0.646,
"grad_norm": 552.0,
"kl_loss_10": 180.96249084472657,
"kl_loss_2": 2094.927349853516,
"kl_loss_3": 1631.7639465332031,
"kl_loss_7": 608.3878204345704,
"learning_rate": 0.00028367098827674573,
"loss": 1141.2359,
"step": 6460
},
{
"ce_loss_10": 3.5153084993362427,
"ce_loss_13": 3.4397502303123475,
"ce_loss_2": 4.504270768165588,
"ce_loss_3": 4.232501339912415,
"ce_loss_7": 3.69371120929718,
"epoch": 0.647,
"grad_norm": 588.0,
"kl_loss_10": 178.14280624389647,
"kl_loss_2": 2186.196148681641,
"kl_loss_3": 1706.314013671875,
"kl_loss_7": 600.8890106201172,
"learning_rate": 0.00028224161102882397,
"loss": 1170.0225,
"step": 6470
},
{
"ce_loss_10": 3.494782865047455,
"ce_loss_13": 3.418469178676605,
"ce_loss_2": 4.45595076084137,
"ce_loss_3": 4.1893230199813845,
"ce_loss_7": 3.6707924604415894,
"epoch": 0.648,
"grad_norm": 644.0,
"kl_loss_10": 177.30072097778321,
"kl_loss_2": 2145.1575622558594,
"kl_loss_3": 1676.7467163085937,
"kl_loss_7": 599.7084075927735,
"learning_rate": 0.00028081442660546124,
"loss": 1164.476,
"step": 6480
},
{
"ce_loss_10": 3.5571305990219115,
"ce_loss_13": 3.4820198893547056,
"ce_loss_2": 4.520304107666016,
"ce_loss_3": 4.250013303756714,
"ce_loss_7": 3.7307825326919555,
"epoch": 0.649,
"grad_norm": 708.0,
"kl_loss_10": 180.5020294189453,
"kl_loss_2": 2162.021893310547,
"kl_loss_3": 1681.335223388672,
"kl_loss_7": 604.5856201171875,
"learning_rate": 0.0002793894493783892,
"loss": 1161.7205,
"step": 6490
},
{
"ce_loss_10": 3.5730626702308657,
"ce_loss_13": 3.4996850967407225,
"ce_loss_2": 4.535577750205993,
"ce_loss_3": 4.2806238532066345,
"ce_loss_7": 3.746683120727539,
"epoch": 0.65,
"grad_norm": 532.0,
"kl_loss_10": 175.4969383239746,
"kl_loss_2": 2147.980224609375,
"kl_loss_3": 1685.3772094726562,
"kl_loss_7": 592.4264343261718,
"learning_rate": 0.0002779666936971129,
"loss": 1147.2826,
"step": 6500
},
{
"ce_loss_10": 3.579540717601776,
"ce_loss_13": 3.503932845592499,
"ce_loss_2": 4.570328307151795,
"ce_loss_3": 4.304692578315735,
"ce_loss_7": 3.760816919803619,
"epoch": 0.651,
"grad_norm": 560.0,
"kl_loss_10": 180.5074890136719,
"kl_loss_2": 2190.618371582031,
"kl_loss_3": 1722.614599609375,
"kl_loss_7": 613.0008575439454,
"learning_rate": 0.00027654617388876614,
"loss": 1176.0404,
"step": 6510
},
{
"ce_loss_10": 3.6101376891136168,
"ce_loss_13": 3.5372079849243163,
"ce_loss_2": 4.574031090736389,
"ce_loss_3": 4.305119824409485,
"ce_loss_7": 3.7854838371276855,
"epoch": 0.652,
"grad_norm": 506.0,
"kl_loss_10": 179.79571838378905,
"kl_loss_2": 2171.3123046875,
"kl_loss_3": 1687.553973388672,
"kl_loss_7": 603.6635681152344,
"learning_rate": 0.0002751279042579672,
"loss": 1161.0621,
"step": 6520
},
{
"ce_loss_10": 3.5500629782676696,
"ce_loss_13": 3.475240981578827,
"ce_loss_2": 4.515746712684631,
"ce_loss_3": 4.248768877983093,
"ce_loss_7": 3.726365828514099,
"epoch": 0.653,
"grad_norm": 520.0,
"kl_loss_10": 175.72006454467774,
"kl_loss_2": 2132.0258361816404,
"kl_loss_3": 1663.1835388183595,
"kl_loss_7": 593.7185913085938,
"learning_rate": 0.00027371189908667604,
"loss": 1173.0754,
"step": 6530
},
{
"ce_loss_10": 3.6066598892211914,
"ce_loss_13": 3.5256664633750914,
"ce_loss_2": 4.603748297691345,
"ce_loss_3": 4.334264886379242,
"ce_loss_7": 3.7890505313873293,
"epoch": 0.654,
"grad_norm": 556.0,
"kl_loss_10": 184.21709976196288,
"kl_loss_2": 2224.855090332031,
"kl_loss_3": 1750.1156860351562,
"kl_loss_7": 618.167140197754,
"learning_rate": 0.00027229817263404863,
"loss": 1200.1538,
"step": 6540
},
{
"ce_loss_10": 3.579574966430664,
"ce_loss_13": 3.502782142162323,
"ce_loss_2": 4.505041122436523,
"ce_loss_3": 4.243296790122986,
"ce_loss_7": 3.751440441608429,
"epoch": 0.655,
"grad_norm": 552.0,
"kl_loss_10": 178.47067565917968,
"kl_loss_2": 2091.8775146484377,
"kl_loss_3": 1621.7340759277345,
"kl_loss_7": 596.6207824707031,
"learning_rate": 0.0002708867391362948,
"loss": 1145.7798,
"step": 6550
},
{
"ce_loss_10": 3.5594303607940674,
"ce_loss_13": 3.4848424673080443,
"ce_loss_2": 4.510186004638672,
"ce_loss_3": 4.239946413040161,
"ce_loss_7": 3.729544758796692,
"epoch": 0.656,
"grad_norm": 600.0,
"kl_loss_10": 174.02187423706056,
"kl_loss_2": 2098.8442565917967,
"kl_loss_3": 1625.1830017089844,
"kl_loss_7": 579.9141540527344,
"learning_rate": 0.0002694776128065345,
"loss": 1152.9096,
"step": 6560
},
{
"ce_loss_10": 3.500006926059723,
"ce_loss_13": 3.4239490151405336,
"ce_loss_2": 4.463183629512787,
"ce_loss_3": 4.198473536968232,
"ce_loss_7": 3.6787616848945617,
"epoch": 0.657,
"grad_norm": 524.0,
"kl_loss_10": 181.49803161621094,
"kl_loss_2": 2175.4376220703125,
"kl_loss_3": 1692.8995849609375,
"kl_loss_7": 616.3900787353516,
"learning_rate": 0.00026807080783465374,
"loss": 1144.908,
"step": 6570
},
{
"ce_loss_10": 3.609026849269867,
"ce_loss_13": 3.5301132678985594,
"ce_loss_2": 4.5842578411102295,
"ce_loss_3": 4.322691702842713,
"ce_loss_7": 3.7907386422157288,
"epoch": 0.658,
"grad_norm": 540.0,
"kl_loss_10": 181.2076759338379,
"kl_loss_2": 2170.1943603515624,
"kl_loss_3": 1703.6680847167968,
"kl_loss_7": 614.0023223876954,
"learning_rate": 0.00026666633838716316,
"loss": 1180.9756,
"step": 6580
},
{
"ce_loss_10": 3.505808639526367,
"ce_loss_13": 3.424725067615509,
"ce_loss_2": 4.498762392997742,
"ce_loss_3": 4.228746104240417,
"ce_loss_7": 3.6866647005081177,
"epoch": 0.659,
"grad_norm": 660.0,
"kl_loss_10": 183.16211471557617,
"kl_loss_2": 2205.399041748047,
"kl_loss_3": 1729.7577697753907,
"kl_loss_7": 619.1098449707031,
"learning_rate": 0.00026526421860705474,
"loss": 1196.5574,
"step": 6590
},
{
"ce_loss_10": 3.5278443932533263,
"ce_loss_13": 3.451081359386444,
"ce_loss_2": 4.514556968212128,
"ce_loss_3": 4.246424973011017,
"ce_loss_7": 3.7130470991134645,
"epoch": 0.66,
"grad_norm": 604.0,
"kl_loss_10": 183.11859054565429,
"kl_loss_2": 2195.080352783203,
"kl_loss_3": 1720.6291870117188,
"kl_loss_7": 617.9165832519532,
"learning_rate": 0.0002638644626136587,
"loss": 1167.115,
"step": 6600
},
{
"ce_loss_10": 3.5388341546058655,
"ce_loss_13": 3.4648394107818605,
"ce_loss_2": 4.518757033348083,
"ce_loss_3": 4.251232302188873,
"ce_loss_7": 3.7166133403778074,
"epoch": 0.661,
"grad_norm": 600.0,
"kl_loss_10": 177.9744987487793,
"kl_loss_2": 2169.1557434082033,
"kl_loss_3": 1695.717547607422,
"kl_loss_7": 605.9922027587891,
"learning_rate": 0.00026246708450250255,
"loss": 1163.9504,
"step": 6610
},
{
"ce_loss_10": 3.536445343494415,
"ce_loss_13": 3.4618695259094237,
"ce_loss_2": 4.500265717506409,
"ce_loss_3": 4.239484262466431,
"ce_loss_7": 3.7084587097167967,
"epoch": 0.662,
"grad_norm": 624.0,
"kl_loss_10": 177.32165069580077,
"kl_loss_2": 2153.5470336914063,
"kl_loss_3": 1682.642596435547,
"kl_loss_7": 596.0139556884766,
"learning_rate": 0.00026107209834516854,
"loss": 1159.9879,
"step": 6620
},
{
"ce_loss_10": 3.4876843810081484,
"ce_loss_13": 3.4082067966461183,
"ce_loss_2": 4.498864269256591,
"ce_loss_3": 4.235283279418946,
"ce_loss_7": 3.666292154788971,
"epoch": 0.663,
"grad_norm": 620.0,
"kl_loss_10": 180.81258544921874,
"kl_loss_2": 2256.257257080078,
"kl_loss_3": 1782.119805908203,
"kl_loss_7": 615.3174774169922,
"learning_rate": 0.0002596795181891514,
"loss": 1197.8284,
"step": 6630
},
{
"ce_loss_10": 3.4956326842308045,
"ce_loss_13": 3.414097845554352,
"ce_loss_2": 4.498321509361267,
"ce_loss_3": 4.228005886077881,
"ce_loss_7": 3.6832273960113526,
"epoch": 0.664,
"grad_norm": 676.0,
"kl_loss_10": 186.22876663208007,
"kl_loss_2": 2223.420690917969,
"kl_loss_3": 1743.7244079589843,
"kl_loss_7": 627.1363647460937,
"learning_rate": 0.000258289358057718,
"loss": 1222.5622,
"step": 6640
},
{
"ce_loss_10": 3.5669368505477905,
"ce_loss_13": 3.4856945157051085,
"ce_loss_2": 4.551595258712768,
"ce_loss_3": 4.286670958995819,
"ce_loss_7": 3.751522934436798,
"epoch": 0.665,
"grad_norm": 556.0,
"kl_loss_10": 185.22528228759765,
"kl_loss_2": 2211.366003417969,
"kl_loss_3": 1740.3289489746094,
"kl_loss_7": 619.9797149658203,
"learning_rate": 0.0002569016319497657,
"loss": 1184.505,
"step": 6650
},
{
"ce_loss_10": 3.5523419260978697,
"ce_loss_13": 3.4712039113044737,
"ce_loss_2": 4.537931609153747,
"ce_loss_3": 4.279768109321594,
"ce_loss_7": 3.7336499214172365,
"epoch": 0.666,
"grad_norm": 544.0,
"kl_loss_10": 186.748779296875,
"kl_loss_2": 2205.344372558594,
"kl_loss_3": 1734.7506713867188,
"kl_loss_7": 622.0667907714844,
"learning_rate": 0.00025551635383968066,
"loss": 1198.5273,
"step": 6660
},
{
"ce_loss_10": 3.463807392120361,
"ce_loss_13": 3.3866657257080077,
"ce_loss_2": 4.469345259666443,
"ce_loss_3": 4.193110883235931,
"ce_loss_7": 3.6494885683059692,
"epoch": 0.667,
"grad_norm": 576.0,
"kl_loss_10": 184.71422729492187,
"kl_loss_2": 2248.0073059082033,
"kl_loss_3": 1753.0323059082032,
"kl_loss_7": 619.0755401611328,
"learning_rate": 0.00025413353767719804,
"loss": 1195.2947,
"step": 6670
},
{
"ce_loss_10": 3.5190200567245484,
"ce_loss_13": 3.4452382922172546,
"ce_loss_2": 4.497325706481933,
"ce_loss_3": 4.232502174377442,
"ce_loss_7": 3.694538187980652,
"epoch": 0.668,
"grad_norm": 568.0,
"kl_loss_10": 177.03625259399413,
"kl_loss_2": 2189.713330078125,
"kl_loss_3": 1718.3252258300781,
"kl_loss_7": 606.7687957763671,
"learning_rate": 0.0002527531973872617,
"loss": 1177.4366,
"step": 6680
},
{
"ce_loss_10": 3.538633036613464,
"ce_loss_13": 3.4624911904335023,
"ce_loss_2": 4.504513430595398,
"ce_loss_3": 4.237690329551697,
"ce_loss_7": 3.7170337319374083,
"epoch": 0.669,
"grad_norm": 592.0,
"kl_loss_10": 178.7047462463379,
"kl_loss_2": 2166.141973876953,
"kl_loss_3": 1687.0463806152343,
"kl_loss_7": 609.9779602050781,
"learning_rate": 0.0002513753468698826,
"loss": 1160.7738,
"step": 6690
},
{
"ce_loss_10": 3.510761630535126,
"ce_loss_13": 3.4321574330329896,
"ce_loss_2": 4.506561207771301,
"ce_loss_3": 4.238252663612366,
"ce_loss_7": 3.6953013062477114,
"epoch": 0.67,
"grad_norm": 544.0,
"kl_loss_10": 182.07874755859376,
"kl_loss_2": 2231.308416748047,
"kl_loss_3": 1749.7796997070313,
"kl_loss_7": 618.383251953125,
"learning_rate": 0.0002500000000000001,
"loss": 1185.6723,
"step": 6700
},
{
"ce_loss_10": 3.62176308631897,
"ce_loss_13": 3.547257494926453,
"ce_loss_2": 4.5295734882354735,
"ce_loss_3": 4.277957272529602,
"ce_loss_7": 3.788572609424591,
"epoch": 0.671,
"grad_norm": 548.0,
"kl_loss_10": 173.84563446044922,
"kl_loss_2": 2050.3306396484377,
"kl_loss_3": 1604.508935546875,
"kl_loss_7": 584.9092193603516,
"learning_rate": 0.0002486271706273421,
"loss": 1168.4034,
"step": 6710
},
{
"ce_loss_10": 3.557868146896362,
"ce_loss_13": 3.485461747646332,
"ce_loss_2": 4.488811063766479,
"ce_loss_3": 4.22833331823349,
"ce_loss_7": 3.7254873156547545,
"epoch": 0.672,
"grad_norm": 644.0,
"kl_loss_10": 175.32781143188475,
"kl_loss_2": 2091.0470642089845,
"kl_loss_3": 1627.7723266601563,
"kl_loss_7": 589.4509521484375,
"learning_rate": 0.0002472568725762853,
"loss": 1154.7741,
"step": 6720
},
{
"ce_loss_10": 3.5483877897262572,
"ce_loss_13": 3.4755659341812133,
"ce_loss_2": 4.477482891082763,
"ce_loss_3": 4.2254945039749146,
"ce_loss_7": 3.717711102962494,
"epoch": 0.673,
"grad_norm": 564.0,
"kl_loss_10": 173.398193359375,
"kl_loss_2": 2110.752795410156,
"kl_loss_3": 1653.0817565917969,
"kl_loss_7": 586.6092742919922,
"learning_rate": 0.00024588911964571554,
"loss": 1145.849,
"step": 6730
},
{
"ce_loss_10": 3.5657129168510435,
"ce_loss_13": 3.4857802987098694,
"ce_loss_2": 4.5576330661773685,
"ce_loss_3": 4.288707995414734,
"ce_loss_7": 3.753812789916992,
"epoch": 0.674,
"grad_norm": 524.0,
"kl_loss_10": 187.46376113891603,
"kl_loss_2": 2205.4671936035156,
"kl_loss_3": 1727.960107421875,
"kl_loss_7": 626.6214233398438,
"learning_rate": 0.00024452392560888974,
"loss": 1167.7188,
"step": 6740
},
{
"ce_loss_10": 3.455358147621155,
"ce_loss_13": 3.378260016441345,
"ce_loss_2": 4.419786167144776,
"ce_loss_3": 4.155612635612488,
"ce_loss_7": 3.6355133295059203,
"epoch": 0.675,
"grad_norm": 532.0,
"kl_loss_10": 177.83211364746094,
"kl_loss_2": 2172.1373962402345,
"kl_loss_3": 1704.7519836425781,
"kl_loss_7": 606.2507995605469,
"learning_rate": 0.00024316130421329695,
"loss": 1157.1621,
"step": 6750
},
{
"ce_loss_10": 3.535005438327789,
"ce_loss_13": 3.4575978398323057,
"ce_loss_2": 4.505799317359925,
"ce_loss_3": 4.232890093326569,
"ce_loss_7": 3.7101247310638428,
"epoch": 0.676,
"grad_norm": 564.0,
"kl_loss_10": 177.2459358215332,
"kl_loss_2": 2136.964288330078,
"kl_loss_3": 1660.3365295410156,
"kl_loss_7": 591.8486877441406,
"learning_rate": 0.00024180126918051909,
"loss": 1154.5281,
"step": 6760
},
{
"ce_loss_10": 3.577043890953064,
"ce_loss_13": 3.5019183993339538,
"ce_loss_2": 4.527614569664001,
"ce_loss_3": 4.265857553482055,
"ce_loss_7": 3.7534381628036497,
"epoch": 0.677,
"grad_norm": 604.0,
"kl_loss_10": 178.1947784423828,
"kl_loss_2": 2127.7522155761717,
"kl_loss_3": 1659.7866516113281,
"kl_loss_7": 597.4583068847656,
"learning_rate": 0.00024044383420609406,
"loss": 1141.1451,
"step": 6770
},
{
"ce_loss_10": 3.589032161235809,
"ce_loss_13": 3.514654505252838,
"ce_loss_2": 4.520573258399963,
"ce_loss_3": 4.2588379859924315,
"ce_loss_7": 3.7536001801490784,
"epoch": 0.678,
"grad_norm": 552.0,
"kl_loss_10": 175.52578201293946,
"kl_loss_2": 2107.4931701660157,
"kl_loss_3": 1641.6564514160157,
"kl_loss_7": 591.3939788818359,
"learning_rate": 0.00023908901295937712,
"loss": 1175.0256,
"step": 6780
},
{
"ce_loss_10": 3.5837427616119384,
"ce_loss_13": 3.505910849571228,
"ce_loss_2": 4.535600376129151,
"ce_loss_3": 4.271885943412781,
"ce_loss_7": 3.755298101902008,
"epoch": 0.679,
"grad_norm": 596.0,
"kl_loss_10": 177.23758392333986,
"kl_loss_2": 2111.0602905273436,
"kl_loss_3": 1645.3706420898438,
"kl_loss_7": 592.0553497314453,
"learning_rate": 0.00023773681908340283,
"loss": 1169.8496,
"step": 6790
},
{
"ce_loss_10": 3.5592074632644652,
"ce_loss_13": 3.4772790670394897,
"ce_loss_2": 4.548656535148621,
"ce_loss_3": 4.2832125425338745,
"ce_loss_7": 3.7424607038497926,
"epoch": 0.68,
"grad_norm": 600.0,
"kl_loss_10": 187.7086395263672,
"kl_loss_2": 2222.717413330078,
"kl_loss_3": 1751.2139282226562,
"kl_loss_7": 621.8694488525391,
"learning_rate": 0.00023638726619474876,
"loss": 1203.8379,
"step": 6800
},
{
"ce_loss_10": 3.5476158022880555,
"ce_loss_13": 3.467449462413788,
"ce_loss_2": 4.563005781173706,
"ce_loss_3": 4.29602427482605,
"ce_loss_7": 3.7365992784500124,
"epoch": 0.681,
"grad_norm": 580.0,
"kl_loss_10": 183.3829345703125,
"kl_loss_2": 2228.727575683594,
"kl_loss_3": 1760.1514953613282,
"kl_loss_7": 626.0395812988281,
"learning_rate": 0.0002350403678833976,
"loss": 1182.506,
"step": 6810
},
{
"ce_loss_10": 3.4750794649124144,
"ce_loss_13": 3.39876846075058,
"ce_loss_2": 4.465261030197143,
"ce_loss_3": 4.1875766038894655,
"ce_loss_7": 3.6560620784759523,
"epoch": 0.682,
"grad_norm": 490.0,
"kl_loss_10": 178.67537307739258,
"kl_loss_2": 2200.3225830078127,
"kl_loss_3": 1714.0246276855469,
"kl_loss_7": 608.0651733398438,
"learning_rate": 0.00023369613771260007,
"loss": 1160.444,
"step": 6820
},
{
"ce_loss_10": 3.5863471627235413,
"ce_loss_13": 3.5096321582794188,
"ce_loss_2": 4.573717498779297,
"ce_loss_3": 4.300703597068787,
"ce_loss_7": 3.767488884925842,
"epoch": 0.683,
"grad_norm": 608.0,
"kl_loss_10": 181.20342712402345,
"kl_loss_2": 2206.5099487304688,
"kl_loss_3": 1721.635076904297,
"kl_loss_7": 610.5273590087891,
"learning_rate": 0.00023235458921873925,
"loss": 1187.8242,
"step": 6830
},
{
"ce_loss_10": 3.5434704184532166,
"ce_loss_13": 3.4598939180374146,
"ce_loss_2": 4.561892867088318,
"ce_loss_3": 4.300772976875305,
"ce_loss_7": 3.73870370388031,
"epoch": 0.684,
"grad_norm": 640.0,
"kl_loss_10": 187.79423599243165,
"kl_loss_2": 2268.489392089844,
"kl_loss_3": 1798.0396545410156,
"kl_loss_7": 637.83154296875,
"learning_rate": 0.0002310157359111938,
"loss": 1215.3348,
"step": 6840
},
{
"ce_loss_10": 3.4310184836387636,
"ce_loss_13": 3.3527446746826173,
"ce_loss_2": 4.52064049243927,
"ce_loss_3": 4.243770575523376,
"ce_loss_7": 3.6270575404167174,
"epoch": 0.685,
"grad_norm": 656.0,
"kl_loss_10": 183.79262008666993,
"kl_loss_2": 2376.693957519531,
"kl_loss_3": 1883.7591125488282,
"kl_loss_7": 632.1836151123047,
"learning_rate": 0.0002296795912722014,
"loss": 1227.4164,
"step": 6850
},
{
"ce_loss_10": 3.570713925361633,
"ce_loss_13": 3.494589388370514,
"ce_loss_2": 4.519134759902954,
"ce_loss_3": 4.253862988948822,
"ce_loss_7": 3.7498515605926515,
"epoch": 0.686,
"grad_norm": 576.0,
"kl_loss_10": 179.78029174804686,
"kl_loss_2": 2128.572625732422,
"kl_loss_3": 1654.8151062011718,
"kl_loss_7": 601.8367279052734,
"learning_rate": 0.0002283461687567236,
"loss": 1133.3289,
"step": 6860
},
{
"ce_loss_10": 3.6324430108070374,
"ce_loss_13": 3.5565361857414244,
"ce_loss_2": 4.557056021690369,
"ce_loss_3": 4.298560571670532,
"ce_loss_7": 3.8067931652069094,
"epoch": 0.687,
"grad_norm": 506.0,
"kl_loss_10": 176.90674362182617,
"kl_loss_2": 2057.1298095703123,
"kl_loss_3": 1601.75263671875,
"kl_loss_7": 589.7931121826172,
"learning_rate": 0.00022701548179231045,
"loss": 1148.6605,
"step": 6870
},
{
"ce_loss_10": 3.582988679409027,
"ce_loss_13": 3.5054625153541563,
"ce_loss_2": 4.558988261222839,
"ce_loss_3": 4.300906538963318,
"ce_loss_7": 3.7617339849472047,
"epoch": 0.688,
"grad_norm": 628.0,
"kl_loss_10": 181.7885940551758,
"kl_loss_2": 2183.6723815917967,
"kl_loss_3": 1725.755859375,
"kl_loss_7": 613.1118804931641,
"learning_rate": 0.00022568754377896516,
"loss": 1157.5781,
"step": 6880
},
{
"ce_loss_10": 3.5760830521583555,
"ce_loss_13": 3.499359941482544,
"ce_loss_2": 4.526648283004761,
"ce_loss_3": 4.2548288941383365,
"ce_loss_7": 3.746409332752228,
"epoch": 0.689,
"grad_norm": 596.0,
"kl_loss_10": 180.4591537475586,
"kl_loss_2": 2140.339678955078,
"kl_loss_3": 1666.426806640625,
"kl_loss_7": 608.2543426513672,
"learning_rate": 0.00022436236808900844,
"loss": 1146.7832,
"step": 6890
},
{
"ce_loss_10": 3.4679219722747803,
"ce_loss_13": 3.3943055748939512,
"ce_loss_2": 4.462708353996277,
"ce_loss_3": 4.191701900959015,
"ce_loss_7": 3.6564658761024473,
"epoch": 0.69,
"grad_norm": 576.0,
"kl_loss_10": 181.4543029785156,
"kl_loss_2": 2231.3488586425783,
"kl_loss_3": 1747.2512329101562,
"kl_loss_7": 621.3712341308594,
"learning_rate": 0.00022303996806694487,
"loss": 1171.5013,
"step": 6900
},
{
"ce_loss_10": 3.5484726190567017,
"ce_loss_13": 3.4742958664894106,
"ce_loss_2": 4.519419646263122,
"ce_loss_3": 4.2546670794487,
"ce_loss_7": 3.7258636236190794,
"epoch": 0.691,
"grad_norm": 628.0,
"kl_loss_10": 177.98818740844726,
"kl_loss_2": 2169.697412109375,
"kl_loss_3": 1700.1277648925782,
"kl_loss_7": 608.3069030761719,
"learning_rate": 0.00022172035702932823,
"loss": 1158.7983,
"step": 6910
},
{
"ce_loss_10": 3.5924888372421266,
"ce_loss_13": 3.52042818069458,
"ce_loss_2": 4.517103600502014,
"ce_loss_3": 4.2600155711174015,
"ce_loss_7": 3.7615610361099243,
"epoch": 0.692,
"grad_norm": 644.0,
"kl_loss_10": 178.67746124267578,
"kl_loss_2": 2075.089074707031,
"kl_loss_3": 1619.080419921875,
"kl_loss_7": 597.07578125,
"learning_rate": 0.00022040354826462666,
"loss": 1140.3766,
"step": 6920
},
{
"ce_loss_10": 3.5235054731369018,
"ce_loss_13": 3.4497315883636475,
"ce_loss_2": 4.493763208389282,
"ce_loss_3": 4.228443372249603,
"ce_loss_7": 3.696590280532837,
"epoch": 0.693,
"grad_norm": 608.0,
"kl_loss_10": 176.88443298339843,
"kl_loss_2": 2155.86865234375,
"kl_loss_3": 1688.133123779297,
"kl_loss_7": 594.834016418457,
"learning_rate": 0.0002190895550330899,
"loss": 1170.6351,
"step": 6930
},
{
"ce_loss_10": 3.4576660275459288,
"ce_loss_13": 3.3801838874816896,
"ce_loss_2": 4.465155124664307,
"ce_loss_3": 4.190037369728088,
"ce_loss_7": 3.644961953163147,
"epoch": 0.694,
"grad_norm": 596.0,
"kl_loss_10": 183.47678833007814,
"kl_loss_2": 2243.730157470703,
"kl_loss_3": 1750.4187255859374,
"kl_loss_7": 620.829443359375,
"learning_rate": 0.00021777839056661552,
"loss": 1165.1125,
"step": 6940
},
{
"ce_loss_10": 3.5390109062194823,
"ce_loss_13": 3.464726150035858,
"ce_loss_2": 4.509364485740662,
"ce_loss_3": 4.234912276268005,
"ce_loss_7": 3.7123560190200804,
"epoch": 0.695,
"grad_norm": 544.0,
"kl_loss_10": 176.9818588256836,
"kl_loss_2": 2161.626544189453,
"kl_loss_3": 1678.7994750976563,
"kl_loss_7": 599.7094299316407,
"learning_rate": 0.0002164700680686147,
"loss": 1138.0607,
"step": 6950
},
{
"ce_loss_10": 3.584149193763733,
"ce_loss_13": 3.509235203266144,
"ce_loss_2": 4.522939825057984,
"ce_loss_3": 4.255844712257385,
"ce_loss_7": 3.757488739490509,
"epoch": 0.696,
"grad_norm": 520.0,
"kl_loss_10": 178.28938369750978,
"kl_loss_2": 2107.5391052246096,
"kl_loss_3": 1637.0810913085938,
"kl_loss_7": 596.8087493896485,
"learning_rate": 0.0002151646007138806,
"loss": 1144.8846,
"step": 6960
},
{
"ce_loss_10": 3.463143539428711,
"ce_loss_13": 3.386814093589783,
"ce_loss_2": 4.468677043914795,
"ce_loss_3": 4.195722925662994,
"ce_loss_7": 3.644878602027893,
"epoch": 0.697,
"grad_norm": 592.0,
"kl_loss_10": 182.87069091796874,
"kl_loss_2": 2238.085968017578,
"kl_loss_3": 1753.8017456054688,
"kl_loss_7": 618.0010162353516,
"learning_rate": 0.00021386200164845526,
"loss": 1174.7895,
"step": 6970
},
{
"ce_loss_10": 3.646360158920288,
"ce_loss_13": 3.5726787090301513,
"ce_loss_2": 4.5610116720199585,
"ce_loss_3": 4.303556060791015,
"ce_loss_7": 3.814376199245453,
"epoch": 0.698,
"grad_norm": 564.0,
"kl_loss_10": 176.60812377929688,
"kl_loss_2": 2073.2183532714844,
"kl_loss_3": 1616.5314147949218,
"kl_loss_7": 592.3170806884766,
"learning_rate": 0.0002125622839894964,
"loss": 1126.8248,
"step": 6980
},
{
"ce_loss_10": 3.5844451546669007,
"ce_loss_13": 3.5105634808540342,
"ce_loss_2": 4.530939984321594,
"ce_loss_3": 4.263714623451233,
"ce_loss_7": 3.7546409368515015,
"epoch": 0.699,
"grad_norm": 580.0,
"kl_loss_10": 177.07121353149415,
"kl_loss_2": 2114.079455566406,
"kl_loss_3": 1646.3038818359375,
"kl_loss_7": 590.21640625,
"learning_rate": 0.00021126546082514663,
"loss": 1144.4324,
"step": 6990
},
{
"ce_loss_10": 3.6092105984687803,
"ce_loss_13": 3.533507966995239,
"ce_loss_2": 4.539715147018432,
"ce_loss_3": 4.274128103256226,
"ce_loss_7": 3.7783223032951354,
"epoch": 0.7,
"grad_norm": 576.0,
"kl_loss_10": 177.3388931274414,
"kl_loss_2": 2107.8220703125,
"kl_loss_3": 1636.2730224609375,
"kl_loss_7": 594.1880798339844,
"learning_rate": 0.00020997154521440098,
"loss": 1131.7685,
"step": 7000
},
{
"ce_loss_10": 3.5483237147331237,
"ce_loss_13": 3.476468551158905,
"ce_loss_2": 4.5004148244857785,
"ce_loss_3": 4.238211619853973,
"ce_loss_7": 3.722394573688507,
"epoch": 0.701,
"grad_norm": 556.0,
"kl_loss_10": 174.87986907958984,
"kl_loss_2": 2127.186975097656,
"kl_loss_3": 1661.8602966308595,
"kl_loss_7": 600.6610717773438,
"learning_rate": 0.0002086805501869749,
"loss": 1133.7422,
"step": 7010
},
{
"ce_loss_10": 3.5188135743141173,
"ce_loss_13": 3.441002869606018,
"ce_loss_2": 4.517698335647583,
"ce_loss_3": 4.247731244564056,
"ce_loss_7": 3.704049062728882,
"epoch": 0.702,
"grad_norm": 616.0,
"kl_loss_10": 182.97085342407226,
"kl_loss_2": 2238.2483459472655,
"kl_loss_3": 1746.861260986328,
"kl_loss_7": 621.9453765869141,
"learning_rate": 0.0002073924887431744,
"loss": 1180.4881,
"step": 7020
},
{
"ce_loss_10": 3.5274356603622437,
"ce_loss_13": 3.45092910528183,
"ce_loss_2": 4.4901411771774296,
"ce_loss_3": 4.230588483810425,
"ce_loss_7": 3.706618547439575,
"epoch": 0.703,
"grad_norm": 568.0,
"kl_loss_10": 179.11029281616212,
"kl_loss_2": 2178.3450439453127,
"kl_loss_3": 1711.4957885742188,
"kl_loss_7": 605.4426422119141,
"learning_rate": 0.00020610737385376348,
"loss": 1200.9115,
"step": 7030
},
{
"ce_loss_10": 3.5887810468673704,
"ce_loss_13": 3.5163929224014283,
"ce_loss_2": 4.518351888656616,
"ce_loss_3": 4.254893863201142,
"ce_loss_7": 3.7612039923667906,
"epoch": 0.704,
"grad_norm": 628.0,
"kl_loss_10": 176.6663619995117,
"kl_loss_2": 2075.716662597656,
"kl_loss_3": 1610.9020690917969,
"kl_loss_7": 588.8746612548828,
"learning_rate": 0.00020482521845983521,
"loss": 1151.7219,
"step": 7040
},
{
"ce_loss_10": 3.5866637587547303,
"ce_loss_13": 3.5072137475013734,
"ce_loss_2": 4.558261132240295,
"ce_loss_3": 4.291126704216003,
"ce_loss_7": 3.7625884056091308,
"epoch": 0.705,
"grad_norm": 600.0,
"kl_loss_10": 182.52303237915038,
"kl_loss_2": 2193.1544799804688,
"kl_loss_3": 1715.2766052246093,
"kl_loss_7": 612.0993133544922,
"learning_rate": 0.00020354603547267987,
"loss": 1187.2512,
"step": 7050
},
{
"ce_loss_10": 3.56976774930954,
"ce_loss_13": 3.488901746273041,
"ce_loss_2": 4.5605854988098145,
"ce_loss_3": 4.2862097263336185,
"ce_loss_7": 3.7558568716049194,
"epoch": 0.706,
"grad_norm": 504.0,
"kl_loss_10": 182.46872100830078,
"kl_loss_2": 2185.692938232422,
"kl_loss_3": 1703.4005493164063,
"kl_loss_7": 615.3342132568359,
"learning_rate": 0.00020226983777365604,
"loss": 1201.599,
"step": 7060
},
{
"ce_loss_10": 3.46960107088089,
"ce_loss_13": 3.394390141963959,
"ce_loss_2": 4.4708491563797,
"ce_loss_3": 4.21563994884491,
"ce_loss_7": 3.6478799104690554,
"epoch": 0.707,
"grad_norm": 548.0,
"kl_loss_10": 174.23039703369142,
"kl_loss_2": 2219.3698486328126,
"kl_loss_3": 1767.6679748535157,
"kl_loss_7": 596.5048126220703,
"learning_rate": 0.00020099663821406056,
"loss": 1167.8441,
"step": 7070
},
{
"ce_loss_10": 3.573564553260803,
"ce_loss_13": 3.4988652229309083,
"ce_loss_2": 4.518075895309448,
"ce_loss_3": 4.2526293873786924,
"ce_loss_7": 3.74619642496109,
"epoch": 0.708,
"grad_norm": 688.0,
"kl_loss_10": 173.7955307006836,
"kl_loss_2": 2112.61328125,
"kl_loss_3": 1644.760516357422,
"kl_loss_7": 588.589468383789,
"learning_rate": 0.00019972644961499853,
"loss": 1168.0168,
"step": 7080
},
{
"ce_loss_10": 3.5425114035606384,
"ce_loss_13": 3.4652504205703734,
"ce_loss_2": 4.536031889915466,
"ce_loss_3": 4.265958952903747,
"ce_loss_7": 3.7277685403823853,
"epoch": 0.709,
"grad_norm": 544.0,
"kl_loss_10": 181.94257354736328,
"kl_loss_2": 2208.387451171875,
"kl_loss_3": 1727.9980712890624,
"kl_loss_7": 619.6463317871094,
"learning_rate": 0.00019845928476725522,
"loss": 1173.2897,
"step": 7090
},
{
"ce_loss_10": 3.6211097598075868,
"ce_loss_13": 3.542751681804657,
"ce_loss_2": 4.576697874069214,
"ce_loss_3": 4.307754421234131,
"ce_loss_7": 3.794824481010437,
"epoch": 0.71,
"grad_norm": 524.0,
"kl_loss_10": 179.40447082519532,
"kl_loss_2": 2133.6560546875,
"kl_loss_3": 1661.1115417480469,
"kl_loss_7": 603.4232849121094,
"learning_rate": 0.00019719515643116677,
"loss": 1187.0576,
"step": 7100
},
{
"ce_loss_10": 3.563658607006073,
"ce_loss_13": 3.486394798755646,
"ce_loss_2": 4.523072552680969,
"ce_loss_3": 4.254948425292969,
"ce_loss_7": 3.7338495373725893,
"epoch": 0.711,
"grad_norm": 560.0,
"kl_loss_10": 177.84368362426758,
"kl_loss_2": 2144.635882568359,
"kl_loss_3": 1666.16875,
"kl_loss_7": 594.3132598876953,
"learning_rate": 0.0001959340773364911,
"loss": 1165.8826,
"step": 7110
},
{
"ce_loss_10": 3.5770322680473328,
"ce_loss_13": 3.5012174606323243,
"ce_loss_2": 4.550109481811523,
"ce_loss_3": 4.284217190742493,
"ce_loss_7": 3.7552335023880006,
"epoch": 0.712,
"grad_norm": 482.0,
"kl_loss_10": 179.49577865600585,
"kl_loss_2": 2181.1701049804688,
"kl_loss_3": 1700.0443542480468,
"kl_loss_7": 603.1331329345703,
"learning_rate": 0.0001946760601822809,
"loss": 1144.9554,
"step": 7120
},
{
"ce_loss_10": 3.6210792899131774,
"ce_loss_13": 3.549504554271698,
"ce_loss_2": 4.563032126426696,
"ce_loss_3": 4.2925217628479,
"ce_loss_7": 3.7989898562431335,
"epoch": 0.713,
"grad_norm": 592.0,
"kl_loss_10": 177.09535369873046,
"kl_loss_2": 2104.0981018066404,
"kl_loss_3": 1631.4184448242188,
"kl_loss_7": 592.4103118896485,
"learning_rate": 0.00019342111763675512,
"loss": 1123.9035,
"step": 7130
},
{
"ce_loss_10": 3.624540627002716,
"ce_loss_13": 3.5509743094444275,
"ce_loss_2": 4.5522850275039675,
"ce_loss_3": 4.289403009414673,
"ce_loss_7": 3.7917919158935547,
"epoch": 0.714,
"grad_norm": 588.0,
"kl_loss_10": 179.54557189941406,
"kl_loss_2": 2098.2009887695312,
"kl_loss_3": 1627.7805236816407,
"kl_loss_7": 597.2573303222656,
"learning_rate": 0.00019216926233717085,
"loss": 1127.0122,
"step": 7140
},
{
"ce_loss_10": 3.5141358375549316,
"ce_loss_13": 3.439559853076935,
"ce_loss_2": 4.534635162353515,
"ce_loss_3": 4.271865749359131,
"ce_loss_7": 3.6872041702270506,
"epoch": 0.715,
"grad_norm": 660.0,
"kl_loss_10": 176.31234970092774,
"kl_loss_2": 2255.184912109375,
"kl_loss_3": 1791.5307861328124,
"kl_loss_7": 594.7268737792969,
"learning_rate": 0.00019092050688969737,
"loss": 1192.3771,
"step": 7150
},
{
"ce_loss_10": 3.586177408695221,
"ce_loss_13": 3.5133618116378784,
"ce_loss_2": 4.527247905731201,
"ce_loss_3": 4.265925621986389,
"ce_loss_7": 3.7605576038360597,
"epoch": 0.716,
"grad_norm": 644.0,
"kl_loss_10": 177.39978713989257,
"kl_loss_2": 2138.382684326172,
"kl_loss_3": 1670.822119140625,
"kl_loss_7": 599.2600921630859,
"learning_rate": 0.00018967486386928817,
"loss": 1143.1982,
"step": 7160
},
{
"ce_loss_10": 3.4582155346870422,
"ce_loss_13": 3.3820405125617983,
"ce_loss_2": 4.456401991844177,
"ce_loss_3": 4.1904214262962345,
"ce_loss_7": 3.640235483646393,
"epoch": 0.717,
"grad_norm": 644.0,
"kl_loss_10": 181.15178756713868,
"kl_loss_2": 2234.275775146484,
"kl_loss_3": 1755.7729919433593,
"kl_loss_7": 621.9208374023438,
"learning_rate": 0.00018843234581955443,
"loss": 1211.3026,
"step": 7170
},
{
"ce_loss_10": 3.4746442079544066,
"ce_loss_13": 3.3969290494918822,
"ce_loss_2": 4.4550795435905455,
"ce_loss_3": 4.190334832668304,
"ce_loss_7": 3.6564103603363036,
"epoch": 0.718,
"grad_norm": 552.0,
"kl_loss_10": 182.11315155029297,
"kl_loss_2": 2189.7255920410157,
"kl_loss_3": 1717.2798217773438,
"kl_loss_7": 618.1327026367187,
"learning_rate": 0.00018719296525263924,
"loss": 1174.7828,
"step": 7180
},
{
"ce_loss_10": 3.571851980686188,
"ce_loss_13": 3.4972564935684205,
"ce_loss_2": 4.505244612693787,
"ce_loss_3": 4.243821203708649,
"ce_loss_7": 3.744515597820282,
"epoch": 0.719,
"grad_norm": 616.0,
"kl_loss_10": 176.35762710571288,
"kl_loss_2": 2085.3956665039063,
"kl_loss_3": 1620.6713073730468,
"kl_loss_7": 587.7710571289062,
"learning_rate": 0.0001859567346490913,
"loss": 1127.6644,
"step": 7190
},
{
"ce_loss_10": 3.5473140597343447,
"ce_loss_13": 3.469071900844574,
"ce_loss_2": 4.532921981811524,
"ce_loss_3": 4.260496711730957,
"ce_loss_7": 3.727588391304016,
"epoch": 0.72,
"grad_norm": 576.0,
"kl_loss_10": 181.04826431274415,
"kl_loss_2": 2198.079150390625,
"kl_loss_3": 1714.5421325683594,
"kl_loss_7": 608.8879028320313,
"learning_rate": 0.0001847236664577389,
"loss": 1142.0284,
"step": 7200
},
{
"ce_loss_10": 3.5739798665046694,
"ce_loss_13": 3.498915135860443,
"ce_loss_2": 4.512744069099426,
"ce_loss_3": 4.2453584432601925,
"ce_loss_7": 3.7430235743522644,
"epoch": 0.721,
"grad_norm": 560.0,
"kl_loss_10": 177.07028579711914,
"kl_loss_2": 2100.2286865234373,
"kl_loss_3": 1626.5753784179688,
"kl_loss_7": 587.8365112304688,
"learning_rate": 0.00018349377309556487,
"loss": 1123.1494,
"step": 7210
},
{
"ce_loss_10": 3.5153507471084593,
"ce_loss_13": 3.438252806663513,
"ce_loss_2": 4.529551863670349,
"ce_loss_3": 4.264591979980469,
"ce_loss_7": 3.6999141216278075,
"epoch": 0.722,
"grad_norm": 576.0,
"kl_loss_10": 181.94500274658202,
"kl_loss_2": 2259.3618774414062,
"kl_loss_3": 1782.3347534179688,
"kl_loss_7": 618.6104766845704,
"learning_rate": 0.00018226706694758193,
"loss": 1192.0385,
"step": 7220
},
{
"ce_loss_10": 3.589731001853943,
"ce_loss_13": 3.5162469148635864,
"ce_loss_2": 4.535777926445007,
"ce_loss_3": 4.275981712341308,
"ce_loss_7": 3.758218777179718,
"epoch": 0.723,
"grad_norm": 536.0,
"kl_loss_10": 176.7706611633301,
"kl_loss_2": 2136.6498046875,
"kl_loss_3": 1678.7979248046875,
"kl_loss_7": 600.344839477539,
"learning_rate": 0.0001810435603667075,
"loss": 1186.8562,
"step": 7230
},
{
"ce_loss_10": 3.4363317847251893,
"ce_loss_13": 3.3615066409111023,
"ce_loss_2": 4.428185939788818,
"ce_loss_3": 4.15840493440628,
"ce_loss_7": 3.6154449939727784,
"epoch": 0.724,
"grad_norm": 568.0,
"kl_loss_10": 175.6705749511719,
"kl_loss_2": 2191.837860107422,
"kl_loss_3": 1708.5897644042968,
"kl_loss_7": 600.9333648681641,
"learning_rate": 0.0001798232656736389,
"loss": 1187.3889,
"step": 7240
},
{
"ce_loss_10": 3.6142520189285277,
"ce_loss_13": 3.539129304885864,
"ce_loss_2": 4.541441655158996,
"ce_loss_3": 4.278818452358246,
"ce_loss_7": 3.7878984928131105,
"epoch": 0.725,
"grad_norm": 548.0,
"kl_loss_10": 176.2219985961914,
"kl_loss_2": 2082.9966674804687,
"kl_loss_3": 1618.1460876464844,
"kl_loss_7": 589.8986907958985,
"learning_rate": 0.0001786061951567303,
"loss": 1139.4487,
"step": 7250
},
{
"ce_loss_10": 3.528095841407776,
"ce_loss_13": 3.449831175804138,
"ce_loss_2": 4.499278616905213,
"ce_loss_3": 4.2353353023529055,
"ce_loss_7": 3.7135850310325624,
"epoch": 0.726,
"grad_norm": 564.0,
"kl_loss_10": 179.76034393310547,
"kl_loss_2": 2139.9875549316407,
"kl_loss_3": 1671.8400817871093,
"kl_loss_7": 601.5716674804687,
"learning_rate": 0.00017739236107186857,
"loss": 1166.0127,
"step": 7260
},
{
"ce_loss_10": 3.6185179114341737,
"ce_loss_13": 3.5442421674728393,
"ce_loss_2": 4.529335474967956,
"ce_loss_3": 4.268719971179962,
"ce_loss_7": 3.782019078731537,
"epoch": 0.727,
"grad_norm": 506.0,
"kl_loss_10": 174.4645896911621,
"kl_loss_2": 2059.7219299316407,
"kl_loss_3": 1594.1942993164062,
"kl_loss_7": 584.5985778808594,
"learning_rate": 0.00017618177564234904,
"loss": 1131.8243,
"step": 7270
},
{
"ce_loss_10": 3.5931476950645447,
"ce_loss_13": 3.5195810914039614,
"ce_loss_2": 4.50758855342865,
"ce_loss_3": 4.243484151363373,
"ce_loss_7": 3.7607154488563537,
"epoch": 0.728,
"grad_norm": 560.0,
"kl_loss_10": 172.751806640625,
"kl_loss_2": 2033.7148681640624,
"kl_loss_3": 1570.946112060547,
"kl_loss_7": 576.0696563720703,
"learning_rate": 0.00017497445105875377,
"loss": 1116.918,
"step": 7280
},
{
"ce_loss_10": 3.5072262048721314,
"ce_loss_13": 3.429379200935364,
"ce_loss_2": 4.499281525611877,
"ce_loss_3": 4.232627415657044,
"ce_loss_7": 3.695177102088928,
"epoch": 0.729,
"grad_norm": 552.0,
"kl_loss_10": 181.318611907959,
"kl_loss_2": 2210.19443359375,
"kl_loss_3": 1730.8836486816406,
"kl_loss_7": 613.139291381836,
"learning_rate": 0.000173770399478828,
"loss": 1168.2677,
"step": 7290
},
{
"ce_loss_10": 3.422491526603699,
"ce_loss_13": 3.347836971282959,
"ce_loss_2": 4.407784128189087,
"ce_loss_3": 4.131522953510284,
"ce_loss_7": 3.6013160228729246,
"epoch": 0.73,
"grad_norm": 560.0,
"kl_loss_10": 176.02440795898437,
"kl_loss_2": 2191.964385986328,
"kl_loss_3": 1698.5522827148438,
"kl_loss_7": 599.1639129638672,
"learning_rate": 0.0001725696330273575,
"loss": 1197.3154,
"step": 7300
},
{
"ce_loss_10": 3.611281132698059,
"ce_loss_13": 3.536842370033264,
"ce_loss_2": 4.535079216957092,
"ce_loss_3": 4.276083791255951,
"ce_loss_7": 3.782840621471405,
"epoch": 0.731,
"grad_norm": 608.0,
"kl_loss_10": 174.63313903808594,
"kl_loss_2": 2067.6716491699217,
"kl_loss_3": 1609.9422119140625,
"kl_loss_7": 585.3752410888671,
"learning_rate": 0.00017137216379604724,
"loss": 1120.0867,
"step": 7310
},
{
"ce_loss_10": 3.491976761817932,
"ce_loss_13": 3.4171910762786863,
"ce_loss_2": 4.477530479431152,
"ce_loss_3": 4.205724453926086,
"ce_loss_7": 3.667802131175995,
"epoch": 0.732,
"grad_norm": 588.0,
"kl_loss_10": 177.26437683105468,
"kl_loss_2": 2177.8865478515627,
"kl_loss_3": 1690.7372009277344,
"kl_loss_7": 596.9918426513672,
"learning_rate": 0.00017017800384339925,
"loss": 1158.1862,
"step": 7320
},
{
"ce_loss_10": 3.446759831905365,
"ce_loss_13": 3.3701040625572203,
"ce_loss_2": 4.4656068086624146,
"ce_loss_3": 4.189201056957245,
"ce_loss_7": 3.63215457201004,
"epoch": 0.733,
"grad_norm": 608.0,
"kl_loss_10": 179.6235023498535,
"kl_loss_2": 2245.6789367675783,
"kl_loss_3": 1758.8689514160155,
"kl_loss_7": 611.1446807861328,
"learning_rate": 0.00016898716519459073,
"loss": 1147.9725,
"step": 7330
},
{
"ce_loss_10": 3.5716673254966738,
"ce_loss_13": 3.4945391058921813,
"ce_loss_2": 4.573297142982483,
"ce_loss_3": 4.307599520683288,
"ce_loss_7": 3.7545908093452454,
"epoch": 0.734,
"grad_norm": 564.0,
"kl_loss_10": 182.98650054931642,
"kl_loss_2": 2208.1832763671873,
"kl_loss_3": 1733.5484619140625,
"kl_loss_7": 619.9228546142579,
"learning_rate": 0.00016779965984135375,
"loss": 1166.6811,
"step": 7340
},
{
"ce_loss_10": 3.478439450263977,
"ce_loss_13": 3.4015959978103636,
"ce_loss_2": 4.458614790439606,
"ce_loss_3": 4.194760942459107,
"ce_loss_7": 3.6524015784263613,
"epoch": 0.735,
"grad_norm": 612.0,
"kl_loss_10": 173.75391540527343,
"kl_loss_2": 2180.093780517578,
"kl_loss_3": 1698.9966857910156,
"kl_loss_7": 593.231803894043,
"learning_rate": 0.00016661549974185424,
"loss": 1159.2525,
"step": 7350
},
{
"ce_loss_10": 3.51222710609436,
"ce_loss_13": 3.4394211292266847,
"ce_loss_2": 4.489507508277893,
"ce_loss_3": 4.216231632232666,
"ce_loss_7": 3.6876235485076903,
"epoch": 0.736,
"grad_norm": 604.0,
"kl_loss_10": 179.0154716491699,
"kl_loss_2": 2169.4521362304686,
"kl_loss_3": 1690.815203857422,
"kl_loss_7": 602.3167053222656,
"learning_rate": 0.00016543469682057105,
"loss": 1143.9477,
"step": 7360
},
{
"ce_loss_10": 3.5415560364723206,
"ce_loss_13": 3.465597319602966,
"ce_loss_2": 4.508477449417114,
"ce_loss_3": 4.240069580078125,
"ce_loss_7": 3.7229697704315186,
"epoch": 0.737,
"grad_norm": 564.0,
"kl_loss_10": 181.52649993896483,
"kl_loss_2": 2153.332647705078,
"kl_loss_3": 1671.2495178222657,
"kl_loss_7": 610.9849151611328,
"learning_rate": 0.00016425726296817632,
"loss": 1153.5225,
"step": 7370
},
{
"ce_loss_10": 3.5615882515907287,
"ce_loss_13": 3.4901331782341005,
"ce_loss_2": 4.51513102054596,
"ce_loss_3": 4.248232364654541,
"ce_loss_7": 3.7354116439819336,
"epoch": 0.738,
"grad_norm": 544.0,
"kl_loss_10": 174.93305130004882,
"kl_loss_2": 2115.262805175781,
"kl_loss_3": 1640.2427490234375,
"kl_loss_7": 589.8659851074219,
"learning_rate": 0.00016308321004141607,
"loss": 1140.3666,
"step": 7380
},
{
"ce_loss_10": 3.518048846721649,
"ce_loss_13": 3.438374364376068,
"ce_loss_2": 4.499938416481018,
"ce_loss_3": 4.236223828792572,
"ce_loss_7": 3.695832920074463,
"epoch": 0.739,
"grad_norm": 548.0,
"kl_loss_10": 181.39317779541017,
"kl_loss_2": 2175.456677246094,
"kl_loss_3": 1701.57724609375,
"kl_loss_7": 609.4653137207031,
"learning_rate": 0.00016191254986299043,
"loss": 1150.5328,
"step": 7390
},
{
"ce_loss_10": 3.5613037228584288,
"ce_loss_13": 3.4887098908424377,
"ce_loss_2": 4.503171324729919,
"ce_loss_3": 4.245256781578064,
"ce_loss_7": 3.7236536622047423,
"epoch": 0.74,
"grad_norm": 680.0,
"kl_loss_10": 174.15130844116212,
"kl_loss_2": 2131.4445068359373,
"kl_loss_3": 1674.7688537597655,
"kl_loss_7": 591.5977661132813,
"learning_rate": 0.00016074529422143398,
"loss": 1164.3935,
"step": 7400
},
{
"ce_loss_10": 3.5027013421058655,
"ce_loss_13": 3.429375433921814,
"ce_loss_2": 4.4999552249908445,
"ce_loss_3": 4.231460630893707,
"ce_loss_7": 3.6830108165740967,
"epoch": 0.741,
"grad_norm": 736.0,
"kl_loss_10": 175.83671493530272,
"kl_loss_2": 2196.9726989746096,
"kl_loss_3": 1720.8603271484376,
"kl_loss_7": 599.7947540283203,
"learning_rate": 0.0001595814548709983,
"loss": 1180.4217,
"step": 7410
},
{
"ce_loss_10": 3.576788854598999,
"ce_loss_13": 3.498660683631897,
"ce_loss_2": 4.549895691871643,
"ce_loss_3": 4.287308168411255,
"ce_loss_7": 3.7568582773208616,
"epoch": 0.742,
"grad_norm": 556.0,
"kl_loss_10": 181.97546997070313,
"kl_loss_2": 2178.1142333984376,
"kl_loss_3": 1714.0122802734375,
"kl_loss_7": 610.0974487304687,
"learning_rate": 0.00015842104353153285,
"loss": 1164.6248,
"step": 7420
},
{
"ce_loss_10": 3.5943754434585573,
"ce_loss_13": 3.5180631637573243,
"ce_loss_2": 4.549882531166077,
"ce_loss_3": 4.288905811309815,
"ce_loss_7": 3.7695993304252626,
"epoch": 0.743,
"grad_norm": 548.0,
"kl_loss_10": 179.57282943725585,
"kl_loss_2": 2154.6554992675783,
"kl_loss_3": 1684.7554626464844,
"kl_loss_7": 607.9901489257812,
"learning_rate": 0.0001572640718883667,
"loss": 1181.4139,
"step": 7430
},
{
"ce_loss_10": 3.5268728017807005,
"ce_loss_13": 3.454422962665558,
"ce_loss_2": 4.4702025055885315,
"ce_loss_3": 4.211061191558838,
"ce_loss_7": 3.699466872215271,
"epoch": 0.744,
"grad_norm": 544.0,
"kl_loss_10": 173.9086715698242,
"kl_loss_2": 2107.2433349609373,
"kl_loss_3": 1643.049658203125,
"kl_loss_7": 587.0272888183594,
"learning_rate": 0.0001561105515921915,
"loss": 1164.3465,
"step": 7440
},
{
"ce_loss_10": 3.376306939125061,
"ce_loss_13": 3.3052693247795104,
"ce_loss_2": 4.399094796180725,
"ce_loss_3": 4.130729305744171,
"ce_loss_7": 3.5687609910964966,
"epoch": 0.745,
"grad_norm": 540.0,
"kl_loss_10": 174.5767349243164,
"kl_loss_2": 2266.337322998047,
"kl_loss_3": 1780.6517333984375,
"kl_loss_7": 616.0360229492187,
"learning_rate": 0.0001549604942589441,
"loss": 1163.9994,
"step": 7450
},
{
"ce_loss_10": 3.5653053879737855,
"ce_loss_13": 3.493623507022858,
"ce_loss_2": 4.478042149543763,
"ce_loss_3": 4.218795919418335,
"ce_loss_7": 3.731026256084442,
"epoch": 0.746,
"grad_norm": 580.0,
"kl_loss_10": 170.22484588623047,
"kl_loss_2": 2039.5635498046875,
"kl_loss_3": 1579.8051452636719,
"kl_loss_7": 567.1303924560547,
"learning_rate": 0.00015381391146968864,
"loss": 1115.5928,
"step": 7460
},
{
"ce_loss_10": 3.5406330108642576,
"ce_loss_13": 3.4665817737579347,
"ce_loss_2": 4.507574367523193,
"ce_loss_3": 4.2422141313552855,
"ce_loss_7": 3.711947810649872,
"epoch": 0.747,
"grad_norm": 576.0,
"kl_loss_10": 173.49912338256837,
"kl_loss_2": 2137.4108154296873,
"kl_loss_3": 1666.1288146972656,
"kl_loss_7": 586.0349029541015,
"learning_rate": 0.00015267081477050133,
"loss": 1153.2315,
"step": 7470
},
{
"ce_loss_10": 3.6397757053375246,
"ce_loss_13": 3.565910828113556,
"ce_loss_2": 4.558345174789428,
"ce_loss_3": 4.3014825820922855,
"ce_loss_7": 3.813869845867157,
"epoch": 0.748,
"grad_norm": 524.0,
"kl_loss_10": 179.69472961425782,
"kl_loss_2": 2081.7364990234373,
"kl_loss_3": 1619.6140869140625,
"kl_loss_7": 597.4245025634766,
"learning_rate": 0.00015153121567235335,
"loss": 1120.7676,
"step": 7480
},
{
"ce_loss_10": 3.529373216629028,
"ce_loss_13": 3.454616332054138,
"ce_loss_2": 4.507159662246704,
"ce_loss_3": 4.2369110703468325,
"ce_loss_7": 3.7003498554229735,
"epoch": 0.749,
"grad_norm": 596.0,
"kl_loss_10": 178.19662170410157,
"kl_loss_2": 2201.308489990234,
"kl_loss_3": 1718.668115234375,
"kl_loss_7": 600.4444549560546,
"learning_rate": 0.00015039512565099468,
"loss": 1130.487,
"step": 7490
},
{
"ce_loss_10": 3.59435373544693,
"ce_loss_13": 3.5217554926872254,
"ce_loss_2": 4.542114019393921,
"ce_loss_3": 4.2746872186660765,
"ce_loss_7": 3.768599247932434,
"epoch": 0.75,
"grad_norm": 532.0,
"kl_loss_10": 177.41806030273438,
"kl_loss_2": 2130.0947509765624,
"kl_loss_3": 1653.6429382324218,
"kl_loss_7": 598.9670806884766,
"learning_rate": 0.00014926255614683932,
"loss": 1188.0775,
"step": 7500
},
{
"ce_loss_10": 3.5343728065490723,
"ce_loss_13": 3.462270963191986,
"ce_loss_2": 4.491153955459595,
"ce_loss_3": 4.2244093179702755,
"ce_loss_7": 3.70688259601593,
"epoch": 0.751,
"grad_norm": 584.0,
"kl_loss_10": 175.2909957885742,
"kl_loss_2": 2134.823455810547,
"kl_loss_3": 1661.539990234375,
"kl_loss_7": 592.2256774902344,
"learning_rate": 0.0001481335185648498,
"loss": 1152.3602,
"step": 7510
},
{
"ce_loss_10": 3.5509208917617796,
"ce_loss_13": 3.4760316491127012,
"ce_loss_2": 4.4910846710205075,
"ce_loss_3": 4.236609256267547,
"ce_loss_7": 3.7286911368370057,
"epoch": 0.752,
"grad_norm": 560.0,
"kl_loss_10": 175.903653717041,
"kl_loss_2": 2132.4962768554688,
"kl_loss_3": 1669.2187805175781,
"kl_loss_7": 598.009976196289,
"learning_rate": 0.0001470080242744218,
"loss": 1135.5451,
"step": 7520
},
{
"ce_loss_10": 3.5404749631881716,
"ce_loss_13": 3.4668622732162477,
"ce_loss_2": 4.505393123626709,
"ce_loss_3": 4.248504590988159,
"ce_loss_7": 3.7097239255905152,
"epoch": 0.753,
"grad_norm": 600.0,
"kl_loss_10": 172.68473205566406,
"kl_loss_2": 2143.0695861816407,
"kl_loss_3": 1687.700439453125,
"kl_loss_7": 591.5756866455079,
"learning_rate": 0.0001458860846092705,
"loss": 1151.0906,
"step": 7530
},
{
"ce_loss_10": 3.578909718990326,
"ce_loss_13": 3.503495466709137,
"ce_loss_2": 4.502352619171143,
"ce_loss_3": 4.240141928195953,
"ce_loss_7": 3.750500977039337,
"epoch": 0.754,
"grad_norm": 604.0,
"kl_loss_10": 174.89483642578125,
"kl_loss_2": 2075.617956542969,
"kl_loss_3": 1612.4501708984376,
"kl_loss_7": 588.5097457885743,
"learning_rate": 0.00014476771086731566,
"loss": 1116.6235,
"step": 7540
},
{
"ce_loss_10": 3.688204324245453,
"ce_loss_13": 3.610430431365967,
"ce_loss_2": 4.621451306343078,
"ce_loss_3": 4.3530316829681395,
"ce_loss_7": 3.8562689661979674,
"epoch": 0.755,
"grad_norm": 572.0,
"kl_loss_10": 181.31634902954102,
"kl_loss_2": 2096.732080078125,
"kl_loss_3": 1625.2518310546875,
"kl_loss_7": 592.3898040771485,
"learning_rate": 0.00014365291431056872,
"loss": 1170.6359,
"step": 7550
},
{
"ce_loss_10": 3.513639771938324,
"ce_loss_13": 3.43876428604126,
"ce_loss_2": 4.494768452644348,
"ce_loss_3": 4.226865899562836,
"ce_loss_7": 3.6938853025436402,
"epoch": 0.756,
"grad_norm": 648.0,
"kl_loss_10": 182.01916885375977,
"kl_loss_2": 2211.1534912109373,
"kl_loss_3": 1723.5334899902343,
"kl_loss_7": 617.1691345214844,
"learning_rate": 0.00014254170616501827,
"loss": 1163.1255,
"step": 7560
},
{
"ce_loss_10": 3.4477534770965574,
"ce_loss_13": 3.3702123761177063,
"ce_loss_2": 4.465814185142517,
"ce_loss_3": 4.193384432792664,
"ce_loss_7": 3.6376350045204164,
"epoch": 0.757,
"grad_norm": 652.0,
"kl_loss_10": 181.91958312988282,
"kl_loss_2": 2272.9578369140627,
"kl_loss_3": 1780.6268127441406,
"kl_loss_7": 631.3283477783203,
"learning_rate": 0.0001414340976205183,
"loss": 1210.6553,
"step": 7570
},
{
"ce_loss_10": 3.4623551964759827,
"ce_loss_13": 3.386858320236206,
"ce_loss_2": 4.47217173576355,
"ce_loss_3": 4.196212124824524,
"ce_loss_7": 3.6454687833786013,
"epoch": 0.758,
"grad_norm": 652.0,
"kl_loss_10": 175.49118347167968,
"kl_loss_2": 2225.9182312011717,
"kl_loss_3": 1743.3149719238281,
"kl_loss_7": 604.4145355224609,
"learning_rate": 0.00014033009983067452,
"loss": 1165.3377,
"step": 7580
},
{
"ce_loss_10": 3.625165855884552,
"ce_loss_13": 3.553388500213623,
"ce_loss_2": 4.5477535963058475,
"ce_loss_3": 4.282978129386902,
"ce_loss_7": 3.790937566757202,
"epoch": 0.759,
"grad_norm": 540.0,
"kl_loss_10": 173.22186889648438,
"kl_loss_2": 2076.229632568359,
"kl_loss_3": 1605.808331298828,
"kl_loss_7": 578.3152954101563,
"learning_rate": 0.00013922972391273224,
"loss": 1124.4209,
"step": 7590
},
{
"ce_loss_10": 3.624656689167023,
"ce_loss_13": 3.5520288705825807,
"ce_loss_2": 4.581440138816833,
"ce_loss_3": 4.323860204219818,
"ce_loss_7": 3.799424684047699,
"epoch": 0.76,
"grad_norm": 604.0,
"kl_loss_10": 176.8631507873535,
"kl_loss_2": 2111.316943359375,
"kl_loss_3": 1657.235821533203,
"kl_loss_7": 591.5200927734375,
"learning_rate": 0.0001381329809474649,
"loss": 1146.3586,
"step": 7600
},
{
"ce_loss_10": 3.532001996040344,
"ce_loss_13": 3.4530585527420046,
"ce_loss_2": 4.544336724281311,
"ce_loss_3": 4.269702458381653,
"ce_loss_7": 3.7165846705436705,
"epoch": 0.761,
"grad_norm": 632.0,
"kl_loss_10": 181.38144760131837,
"kl_loss_2": 2247.282580566406,
"kl_loss_3": 1759.4795043945312,
"kl_loss_7": 616.4709213256835,
"learning_rate": 0.0001370398819790621,
"loss": 1186.2754,
"step": 7610
},
{
"ce_loss_10": 3.6697842359542845,
"ce_loss_13": 3.5929376244544984,
"ce_loss_2": 4.604382491111755,
"ce_loss_3": 4.336557102203369,
"ce_loss_7": 3.8371459245681763,
"epoch": 0.762,
"grad_norm": 612.0,
"kl_loss_10": 176.96341781616212,
"kl_loss_2": 2080.2568908691405,
"kl_loss_3": 1604.4097290039062,
"kl_loss_7": 582.9634078979492,
"learning_rate": 0.00013595043801501794,
"loss": 1108.4416,
"step": 7620
},
{
"ce_loss_10": 3.4644748091697695,
"ce_loss_13": 3.386727011203766,
"ce_loss_2": 4.503179264068604,
"ce_loss_3": 4.235963094234466,
"ce_loss_7": 3.650843346118927,
"epoch": 0.763,
"grad_norm": 664.0,
"kl_loss_10": 180.12555694580078,
"kl_loss_2": 2289.173895263672,
"kl_loss_3": 1815.476171875,
"kl_loss_7": 622.1785308837891,
"learning_rate": 0.00013486466002602133,
"loss": 1194.0496,
"step": 7630
},
{
"ce_loss_10": 3.577344560623169,
"ce_loss_13": 3.503310763835907,
"ce_loss_2": 4.512240695953369,
"ce_loss_3": 4.2521095991134645,
"ce_loss_7": 3.7476378440856934,
"epoch": 0.764,
"grad_norm": 556.0,
"kl_loss_10": 175.91430206298827,
"kl_loss_2": 2097.193493652344,
"kl_loss_3": 1632.0237731933594,
"kl_loss_7": 587.4400573730469,
"learning_rate": 0.00013378255894584462,
"loss": 1166.6646,
"step": 7640
},
{
"ce_loss_10": 3.5123034000396727,
"ce_loss_13": 3.433635425567627,
"ce_loss_2": 4.500353503227234,
"ce_loss_3": 4.2323464274406435,
"ce_loss_7": 3.6943077445030212,
"epoch": 0.765,
"grad_norm": 560.0,
"kl_loss_10": 181.23019485473634,
"kl_loss_2": 2206.8176452636717,
"kl_loss_3": 1726.0316467285156,
"kl_loss_7": 608.9391540527344,
"learning_rate": 0.0001327041456712334,
"loss": 1171.7679,
"step": 7650
},
{
"ce_loss_10": 3.55541011095047,
"ce_loss_13": 3.477762734889984,
"ce_loss_2": 4.513465809822082,
"ce_loss_3": 4.241680002212524,
"ce_loss_7": 3.7298651814460753,
"epoch": 0.766,
"grad_norm": 544.0,
"kl_loss_10": 180.71754302978516,
"kl_loss_2": 2169.4558044433593,
"kl_loss_3": 1686.6506469726562,
"kl_loss_7": 611.8586975097656,
"learning_rate": 0.00013162943106179747,
"loss": 1171.1721,
"step": 7660
},
{
"ce_loss_10": 3.5293742179870606,
"ce_loss_13": 3.456415057182312,
"ce_loss_2": 4.477925181388855,
"ce_loss_3": 4.21549437046051,
"ce_loss_7": 3.7069293022155763,
"epoch": 0.767,
"grad_norm": 588.0,
"kl_loss_10": 176.59339599609376,
"kl_loss_2": 2121.027722167969,
"kl_loss_3": 1652.0191345214844,
"kl_loss_7": 595.7734832763672,
"learning_rate": 0.00013055842593990132,
"loss": 1142.6258,
"step": 7670
},
{
"ce_loss_10": 3.4710524678230286,
"ce_loss_13": 3.399113714694977,
"ce_loss_2": 4.434552907943726,
"ce_loss_3": 4.163622748851776,
"ce_loss_7": 3.6474674701690675,
"epoch": 0.768,
"grad_norm": 540.0,
"kl_loss_10": 174.6668930053711,
"kl_loss_2": 2126.9706176757813,
"kl_loss_3": 1655.3524780273438,
"kl_loss_7": 590.6725830078125,
"learning_rate": 0.00012949114109055414,
"loss": 1168.1568,
"step": 7680
},
{
"ce_loss_10": 3.519006776809692,
"ce_loss_13": 3.4431997537612915,
"ce_loss_2": 4.4897076964378355,
"ce_loss_3": 4.226867043972016,
"ce_loss_7": 3.6994680523872376,
"epoch": 0.769,
"grad_norm": 584.0,
"kl_loss_10": 177.6523193359375,
"kl_loss_2": 2161.8881958007814,
"kl_loss_3": 1689.2801330566406,
"kl_loss_7": 607.9943145751953,
"learning_rate": 0.00012842758726130281,
"loss": 1170.0649,
"step": 7690
},
{
"ce_loss_10": 3.5628538727760315,
"ce_loss_13": 3.485966920852661,
"ce_loss_2": 4.55888135433197,
"ce_loss_3": 4.292485213279724,
"ce_loss_7": 3.7444689750671385,
"epoch": 0.77,
"grad_norm": 580.0,
"kl_loss_10": 179.38818130493163,
"kl_loss_2": 2210.8425048828126,
"kl_loss_3": 1733.9449951171875,
"kl_loss_7": 610.3931243896484,
"learning_rate": 0.00012736777516212267,
"loss": 1160.5377,
"step": 7700
},
{
"ce_loss_10": 3.557508039474487,
"ce_loss_13": 3.4799222111701966,
"ce_loss_2": 4.522961139678955,
"ce_loss_3": 4.253420984745025,
"ce_loss_7": 3.736038076877594,
"epoch": 0.771,
"grad_norm": 548.0,
"kl_loss_10": 181.55507125854493,
"kl_loss_2": 2158.760382080078,
"kl_loss_3": 1679.4241821289063,
"kl_loss_7": 612.0054595947265,
"learning_rate": 0.00012631171546530968,
"loss": 1138.0437,
"step": 7710
},
{
"ce_loss_10": 3.573415291309357,
"ce_loss_13": 3.4920427322387697,
"ce_loss_2": 4.5341356039047245,
"ce_loss_3": 4.2658631801605225,
"ce_loss_7": 3.752500355243683,
"epoch": 0.772,
"grad_norm": 568.0,
"kl_loss_10": 181.86062927246093,
"kl_loss_2": 2149.480059814453,
"kl_loss_3": 1673.6519470214844,
"kl_loss_7": 603.6808334350586,
"learning_rate": 0.00012525941880537307,
"loss": 1168.6842,
"step": 7720
},
{
"ce_loss_10": 3.6038484454154966,
"ce_loss_13": 3.528382158279419,
"ce_loss_2": 4.546409988403321,
"ce_loss_3": 4.28290638923645,
"ce_loss_7": 3.774893271923065,
"epoch": 0.773,
"grad_norm": 648.0,
"kl_loss_10": 176.13294677734376,
"kl_loss_2": 2093.1892028808593,
"kl_loss_3": 1628.6803955078126,
"kl_loss_7": 588.4353439331055,
"learning_rate": 0.00012421089577892869,
"loss": 1139.2071,
"step": 7730
},
{
"ce_loss_10": 3.555491530895233,
"ce_loss_13": 3.4761422514915465,
"ce_loss_2": 4.545820116996765,
"ce_loss_3": 4.266150867938995,
"ce_loss_7": 3.7351402401924134,
"epoch": 0.774,
"grad_norm": 668.0,
"kl_loss_10": 179.60176849365234,
"kl_loss_2": 2216.0720031738283,
"kl_loss_3": 1715.4457092285156,
"kl_loss_7": 609.4783508300782,
"learning_rate": 0.0001231661569445919,
"loss": 1172.4699,
"step": 7740
},
{
"ce_loss_10": 3.410160577297211,
"ce_loss_13": 3.3377888798713684,
"ce_loss_2": 4.401880002021789,
"ce_loss_3": 4.1333277225494385,
"ce_loss_7": 3.589407229423523,
"epoch": 0.775,
"grad_norm": 560.0,
"kl_loss_10": 176.03026962280273,
"kl_loss_2": 2206.2500732421877,
"kl_loss_3": 1718.6787414550781,
"kl_loss_7": 601.0735870361328,
"learning_rate": 0.00012212521282287093,
"loss": 1191.8578,
"step": 7750
},
{
"ce_loss_10": 3.5700145840644835,
"ce_loss_13": 3.493156003952026,
"ce_loss_2": 4.517843317985535,
"ce_loss_3": 4.254901158809662,
"ce_loss_7": 3.748375141620636,
"epoch": 0.776,
"grad_norm": 536.0,
"kl_loss_10": 180.4297233581543,
"kl_loss_2": 2117.2120727539063,
"kl_loss_3": 1651.861083984375,
"kl_loss_7": 599.0601287841797,
"learning_rate": 0.00012108807389606158,
"loss": 1171.4985,
"step": 7760
},
{
"ce_loss_10": 3.5604520797729493,
"ce_loss_13": 3.4879041433334352,
"ce_loss_2": 4.51350736618042,
"ce_loss_3": 4.255460405349732,
"ce_loss_7": 3.737185871601105,
"epoch": 0.777,
"grad_norm": 624.0,
"kl_loss_10": 173.82694396972656,
"kl_loss_2": 2134.6752502441404,
"kl_loss_3": 1670.9033142089843,
"kl_loss_7": 592.1039428710938,
"learning_rate": 0.00012005475060814159,
"loss": 1139.6322,
"step": 7770
},
{
"ce_loss_10": 3.5012547731399537,
"ce_loss_13": 3.4265154361724854,
"ce_loss_2": 4.493270707130432,
"ce_loss_3": 4.232757782936096,
"ce_loss_7": 3.676969814300537,
"epoch": 0.778,
"grad_norm": 592.0,
"kl_loss_10": 178.45665435791017,
"kl_loss_2": 2218.041455078125,
"kl_loss_3": 1749.4460815429688,
"kl_loss_7": 609.0319793701171,
"learning_rate": 0.00011902525336466464,
"loss": 1173.4994,
"step": 7780
},
{
"ce_loss_10": 3.487755036354065,
"ce_loss_13": 3.40771107673645,
"ce_loss_2": 4.503837430477143,
"ce_loss_3": 4.227473521232605,
"ce_loss_7": 3.6715272665023804,
"epoch": 0.779,
"grad_norm": 556.0,
"kl_loss_10": 182.97367095947266,
"kl_loss_2": 2253.76220703125,
"kl_loss_3": 1756.6077819824218,
"kl_loss_7": 618.8783203125,
"learning_rate": 0.00011799959253265668,
"loss": 1168.3436,
"step": 7790
},
{
"ce_loss_10": 3.548134469985962,
"ce_loss_13": 3.4717200636863708,
"ce_loss_2": 4.531218719482422,
"ce_loss_3": 4.259068071842194,
"ce_loss_7": 3.725462853908539,
"epoch": 0.78,
"grad_norm": 588.0,
"kl_loss_10": 179.5894790649414,
"kl_loss_2": 2197.6992370605467,
"kl_loss_3": 1714.0868286132813,
"kl_loss_7": 606.9559936523438,
"learning_rate": 0.00011697777844051105,
"loss": 1168.1586,
"step": 7800
},
{
"ce_loss_10": 3.5325579047203064,
"ce_loss_13": 3.4524773359298706,
"ce_loss_2": 4.540277624130249,
"ce_loss_3": 4.275496506690979,
"ce_loss_7": 3.709948194026947,
"epoch": 0.781,
"grad_norm": 600.0,
"kl_loss_10": 182.08444366455078,
"kl_loss_2": 2253.191998291016,
"kl_loss_3": 1783.90888671875,
"kl_loss_7": 609.2252471923828,
"learning_rate": 0.00011595982137788402,
"loss": 1182.0791,
"step": 7810
},
{
"ce_loss_10": 3.507384693622589,
"ce_loss_13": 3.433124232292175,
"ce_loss_2": 4.452573490142822,
"ce_loss_3": 4.191938650608063,
"ce_loss_7": 3.6804782152175903,
"epoch": 0.782,
"grad_norm": 552.0,
"kl_loss_10": 174.53733520507814,
"kl_loss_2": 2103.1436462402344,
"kl_loss_3": 1636.1021728515625,
"kl_loss_7": 594.1032348632813,
"learning_rate": 0.00011494573159559212,
"loss": 1150.1953,
"step": 7820
},
{
"ce_loss_10": 3.495812237262726,
"ce_loss_13": 3.4193639039993284,
"ce_loss_2": 4.4669132947921755,
"ce_loss_3": 4.2113652467727665,
"ce_loss_7": 3.67316712141037,
"epoch": 0.783,
"grad_norm": 572.0,
"kl_loss_10": 178.65593719482422,
"kl_loss_2": 2173.239221191406,
"kl_loss_3": 1708.3340942382813,
"kl_loss_7": 603.083627319336,
"learning_rate": 0.00011393551930550828,
"loss": 1187.9246,
"step": 7830
},
{
"ce_loss_10": 3.6368354201316833,
"ce_loss_13": 3.559674692153931,
"ce_loss_2": 4.571843910217285,
"ce_loss_3": 4.303619515895844,
"ce_loss_7": 3.8069366455078124,
"epoch": 0.784,
"grad_norm": 588.0,
"kl_loss_10": 179.06233749389648,
"kl_loss_2": 2120.6896240234373,
"kl_loss_3": 1638.9197570800782,
"kl_loss_7": 595.7463287353515,
"learning_rate": 0.00011292919468045875,
"loss": 1145.6585,
"step": 7840
},
{
"ce_loss_10": 3.584019410610199,
"ce_loss_13": 3.5086099743843078,
"ce_loss_2": 4.53436963558197,
"ce_loss_3": 4.271309959888458,
"ce_loss_7": 3.7602915167808533,
"epoch": 0.785,
"grad_norm": 528.0,
"kl_loss_10": 177.57644500732422,
"kl_loss_2": 2126.9706481933595,
"kl_loss_3": 1654.4735168457032,
"kl_loss_7": 600.0492980957031,
"learning_rate": 0.00011192676785412154,
"loss": 1144.0532,
"step": 7850
},
{
"ce_loss_10": 3.522589087486267,
"ce_loss_13": 3.4456050395965576,
"ce_loss_2": 4.529689431190491,
"ce_loss_3": 4.258461606502533,
"ce_loss_7": 3.704596519470215,
"epoch": 0.786,
"grad_norm": 624.0,
"kl_loss_10": 178.9210517883301,
"kl_loss_2": 2216.430499267578,
"kl_loss_3": 1733.1487976074218,
"kl_loss_7": 602.0237121582031,
"learning_rate": 0.00011092824892092374,
"loss": 1161.7434,
"step": 7860
},
{
"ce_loss_10": 3.454429876804352,
"ce_loss_13": 3.376889729499817,
"ce_loss_2": 4.473304414749146,
"ce_loss_3": 4.201044774055481,
"ce_loss_7": 3.6403449535369874,
"epoch": 0.787,
"grad_norm": 544.0,
"kl_loss_10": 178.50691452026368,
"kl_loss_2": 2241.591131591797,
"kl_loss_3": 1762.5004089355468,
"kl_loss_7": 614.9870758056641,
"learning_rate": 0.0001099336479359398,
"loss": 1163.7643,
"step": 7870
},
{
"ce_loss_10": 3.5764689803123475,
"ce_loss_13": 3.507636034488678,
"ce_loss_2": 4.512799096107483,
"ce_loss_3": 4.25046044588089,
"ce_loss_7": 3.746009385585785,
"epoch": 0.788,
"grad_norm": 564.0,
"kl_loss_10": 175.3071716308594,
"kl_loss_2": 2102.777294921875,
"kl_loss_3": 1634.8632263183595,
"kl_loss_7": 592.2164337158204,
"learning_rate": 0.00010894297491479043,
"loss": 1142.6834,
"step": 7880
},
{
"ce_loss_10": 3.575552821159363,
"ce_loss_13": 3.5023175954818724,
"ce_loss_2": 4.539198517799377,
"ce_loss_3": 4.279193782806397,
"ce_loss_7": 3.750091075897217,
"epoch": 0.789,
"grad_norm": 576.0,
"kl_loss_10": 176.76428680419923,
"kl_loss_2": 2146.3808166503904,
"kl_loss_3": 1681.0488159179688,
"kl_loss_7": 595.56142578125,
"learning_rate": 0.00010795623983354214,
"loss": 1139.8293,
"step": 7890
},
{
"ce_loss_10": 3.4591768264770506,
"ce_loss_13": 3.3825891733169557,
"ce_loss_2": 4.4514943838119505,
"ce_loss_3": 4.181638932228088,
"ce_loss_7": 3.643087315559387,
"epoch": 0.79,
"grad_norm": 580.0,
"kl_loss_10": 182.44262008666993,
"kl_loss_2": 2230.9637817382813,
"kl_loss_3": 1740.3924072265625,
"kl_loss_7": 621.2922943115234,
"learning_rate": 0.00010697345262860636,
"loss": 1171.6089,
"step": 7900
},
{
"ce_loss_10": 3.600342130661011,
"ce_loss_13": 3.5264546155929564,
"ce_loss_2": 4.545495390892029,
"ce_loss_3": 4.278535521030426,
"ce_loss_7": 3.771434724330902,
"epoch": 0.791,
"grad_norm": 736.0,
"kl_loss_10": 177.22287063598634,
"kl_loss_2": 2132.298791503906,
"kl_loss_3": 1650.1429077148437,
"kl_loss_7": 593.3167419433594,
"learning_rate": 0.00010599462319663906,
"loss": 1136.3734,
"step": 7910
},
{
"ce_loss_10": 3.5746383547782896,
"ce_loss_13": 3.4998196601867675,
"ce_loss_2": 4.493499338626862,
"ce_loss_3": 4.230222713947296,
"ce_loss_7": 3.7425215244293213,
"epoch": 0.792,
"grad_norm": 520.0,
"kl_loss_10": 174.49715042114258,
"kl_loss_2": 2051.4084716796874,
"kl_loss_3": 1592.7509643554688,
"kl_loss_7": 582.2202606201172,
"learning_rate": 0.00010501976139444191,
"loss": 1118.4902,
"step": 7920
},
{
"ce_loss_10": 3.6047690868377686,
"ce_loss_13": 3.5289911150932314,
"ce_loss_2": 4.545255088806153,
"ce_loss_3": 4.2847788572311405,
"ce_loss_7": 3.7748185992240906,
"epoch": 0.793,
"grad_norm": 604.0,
"kl_loss_10": 176.07794952392578,
"kl_loss_2": 2104.0453186035156,
"kl_loss_3": 1645.7491271972656,
"kl_loss_7": 587.9952331542969,
"learning_rate": 0.0001040488770388625,
"loss": 1154.1295,
"step": 7930
},
{
"ce_loss_10": 3.548888790607452,
"ce_loss_13": 3.4759244203567503,
"ce_loss_2": 4.515872287750244,
"ce_loss_3": 4.250580382347107,
"ce_loss_7": 3.7205033540725707,
"epoch": 0.794,
"grad_norm": 680.0,
"kl_loss_10": 177.23135299682616,
"kl_loss_2": 2173.47548828125,
"kl_loss_3": 1700.3454467773438,
"kl_loss_7": 599.390249633789,
"learning_rate": 0.00010308197990669538,
"loss": 1149.7575,
"step": 7940
},
{
"ce_loss_10": 3.664888024330139,
"ce_loss_13": 3.5850353479385375,
"ce_loss_2": 4.610143923759461,
"ce_loss_3": 4.346996653079986,
"ce_loss_7": 3.83613098859787,
"epoch": 0.795,
"grad_norm": 540.0,
"kl_loss_10": 179.47011337280273,
"kl_loss_2": 2129.81064453125,
"kl_loss_3": 1662.4896118164063,
"kl_loss_7": 599.4926940917969,
"learning_rate": 0.0001021190797345839,
"loss": 1140.0146,
"step": 7950
},
{
"ce_loss_10": 3.3896429777145385,
"ce_loss_13": 3.3098750829696657,
"ce_loss_2": 4.413335740566254,
"ce_loss_3": 4.134794509410858,
"ce_loss_7": 3.57609201669693,
"epoch": 0.796,
"grad_norm": 580.0,
"kl_loss_10": 185.28996887207032,
"kl_loss_2": 2269.882763671875,
"kl_loss_3": 1772.13056640625,
"kl_loss_7": 628.5536560058594,
"learning_rate": 0.00010116018621892236,
"loss": 1175.2182,
"step": 7960
},
{
"ce_loss_10": 3.603187918663025,
"ce_loss_13": 3.5232182860374452,
"ce_loss_2": 4.567694234848022,
"ce_loss_3": 4.309406304359436,
"ce_loss_7": 3.779837393760681,
"epoch": 0.797,
"grad_norm": 608.0,
"kl_loss_10": 186.1454734802246,
"kl_loss_2": 2165.1848999023437,
"kl_loss_3": 1705.360009765625,
"kl_loss_7": 616.0535675048828,
"learning_rate": 0.00010020530901575753,
"loss": 1136.0533,
"step": 7970
},
{
"ce_loss_10": 3.625728499889374,
"ce_loss_13": 3.5490816116333006,
"ce_loss_2": 4.573475480079651,
"ce_loss_3": 4.304804050922394,
"ce_loss_7": 3.799483132362366,
"epoch": 0.798,
"grad_norm": 520.0,
"kl_loss_10": 180.7791946411133,
"kl_loss_2": 2134.7111938476564,
"kl_loss_3": 1658.3630126953126,
"kl_loss_7": 601.8699676513672,
"learning_rate": 9.925445774069231e-05,
"loss": 1126.8894,
"step": 7980
},
{
"ce_loss_10": 3.5760633826255797,
"ce_loss_13": 3.500509262084961,
"ce_loss_2": 4.527716112136841,
"ce_loss_3": 4.2627903580665585,
"ce_loss_7": 3.754010498523712,
"epoch": 0.799,
"grad_norm": 728.0,
"kl_loss_10": 177.96156311035156,
"kl_loss_2": 2117.894659423828,
"kl_loss_3": 1646.0127319335938,
"kl_loss_7": 595.1834564208984,
"learning_rate": 9.830764196878872e-05,
"loss": 1125.6953,
"step": 7990
},
{
"ce_loss_10": 3.5167272210121157,
"ce_loss_13": 3.443312036991119,
"ce_loss_2": 4.485463619232178,
"ce_loss_3": 4.227659916877746,
"ce_loss_7": 3.6942790031433104,
"epoch": 0.8,
"grad_norm": 480.0,
"kl_loss_10": 175.60029678344728,
"kl_loss_2": 2190.2330810546873,
"kl_loss_3": 1721.7053161621093,
"kl_loss_7": 603.3801788330078,
"learning_rate": 9.736487123447069e-05,
"loss": 1159.6166,
"step": 8000
},
{
"ce_loss_10": 3.4639697551727293,
"ce_loss_13": 3.389075720310211,
"ce_loss_2": 4.485336112976074,
"ce_loss_3": 4.228267467021942,
"ce_loss_7": 3.6402989268302917,
"epoch": 0.801,
"grad_norm": 600.0,
"kl_loss_10": 179.87705307006837,
"kl_loss_2": 2294.8387084960937,
"kl_loss_3": 1823.8187927246095,
"kl_loss_7": 608.9864349365234,
"learning_rate": 9.642615503142926e-05,
"loss": 1194.0703,
"step": 8010
},
{
"ce_loss_10": 3.5347692489624025,
"ce_loss_13": 3.4572302103042603,
"ce_loss_2": 4.520514702796936,
"ce_loss_3": 4.254623317718506,
"ce_loss_7": 3.7080873131752012,
"epoch": 0.802,
"grad_norm": 572.0,
"kl_loss_10": 175.5455764770508,
"kl_loss_2": 2196.378839111328,
"kl_loss_3": 1715.636651611328,
"kl_loss_7": 596.8724548339844,
"learning_rate": 9.549150281252633e-05,
"loss": 1151.6992,
"step": 8020
},
{
"ce_loss_10": 3.563262867927551,
"ce_loss_13": 3.486225724220276,
"ce_loss_2": 4.527439785003662,
"ce_loss_3": 4.256748235225677,
"ce_loss_7": 3.7390462875366213,
"epoch": 0.803,
"grad_norm": 520.0,
"kl_loss_10": 179.0418388366699,
"kl_loss_2": 2160.1055908203125,
"kl_loss_3": 1681.3586303710938,
"kl_loss_7": 596.7608947753906,
"learning_rate": 9.4560923989699e-05,
"loss": 1169.5601,
"step": 8030
},
{
"ce_loss_10": 3.549173581600189,
"ce_loss_13": 3.4747613072395325,
"ce_loss_2": 4.515510749816895,
"ce_loss_3": 4.245619797706604,
"ce_loss_7": 3.7281826019287108,
"epoch": 0.804,
"grad_norm": 552.0,
"kl_loss_10": 177.9036865234375,
"kl_loss_2": 2149.996447753906,
"kl_loss_3": 1673.0265747070312,
"kl_loss_7": 598.6687103271485,
"learning_rate": 9.363442793386607e-05,
"loss": 1174.7094,
"step": 8040
},
{
"ce_loss_10": 3.5321462750434875,
"ce_loss_13": 3.453168177604675,
"ce_loss_2": 4.5332019329071045,
"ce_loss_3": 4.265519142150879,
"ce_loss_7": 3.7162665724754333,
"epoch": 0.805,
"grad_norm": 592.0,
"kl_loss_10": 181.04829177856445,
"kl_loss_2": 2218.4782836914064,
"kl_loss_3": 1732.6318908691405,
"kl_loss_7": 617.0633758544922,
"learning_rate": 9.271202397483213e-05,
"loss": 1149.8916,
"step": 8050
},
{
"ce_loss_10": 3.547755253314972,
"ce_loss_13": 3.474861478805542,
"ce_loss_2": 4.498465514183044,
"ce_loss_3": 4.235939025878906,
"ce_loss_7": 3.7163867115974427,
"epoch": 0.806,
"grad_norm": 572.0,
"kl_loss_10": 175.92396697998046,
"kl_loss_2": 2136.2159729003906,
"kl_loss_3": 1668.2355712890626,
"kl_loss_7": 590.7317443847656,
"learning_rate": 9.179372140119524e-05,
"loss": 1168.4604,
"step": 8060
},
{
"ce_loss_10": 3.494523513317108,
"ce_loss_13": 3.4206513285636904,
"ce_loss_2": 4.459244108200073,
"ce_loss_3": 4.188582479953766,
"ce_loss_7": 3.6680249691009523,
"epoch": 0.807,
"grad_norm": 564.0,
"kl_loss_10": 176.53147811889647,
"kl_loss_2": 2154.551867675781,
"kl_loss_3": 1677.4455200195312,
"kl_loss_7": 596.7226654052735,
"learning_rate": 9.087952946025175e-05,
"loss": 1164.4886,
"step": 8070
},
{
"ce_loss_10": 3.6058158397674562,
"ce_loss_13": 3.5339553594589233,
"ce_loss_2": 4.533062171936035,
"ce_loss_3": 4.26933354139328,
"ce_loss_7": 3.768651068210602,
"epoch": 0.808,
"grad_norm": 592.0,
"kl_loss_10": 173.78207092285157,
"kl_loss_2": 2082.071905517578,
"kl_loss_3": 1614.2910522460938,
"kl_loss_7": 576.9482543945312,
"learning_rate": 8.996945735790446e-05,
"loss": 1146.8303,
"step": 8080
},
{
"ce_loss_10": 3.50276095867157,
"ce_loss_13": 3.428582501411438,
"ce_loss_2": 4.457551169395447,
"ce_loss_3": 4.193507122993469,
"ce_loss_7": 3.672742247581482,
"epoch": 0.809,
"grad_norm": 608.0,
"kl_loss_10": 175.95007400512696,
"kl_loss_2": 2152.8104553222656,
"kl_loss_3": 1678.2301330566406,
"kl_loss_7": 594.3645935058594,
"learning_rate": 8.906351425856951e-05,
"loss": 1158.1713,
"step": 8090
},
{
"ce_loss_10": 3.4856011509895324,
"ce_loss_13": 3.412043738365173,
"ce_loss_2": 4.477128624916077,
"ce_loss_3": 4.2102068901062015,
"ce_loss_7": 3.663009238243103,
"epoch": 0.81,
"grad_norm": 588.0,
"kl_loss_10": 178.9893539428711,
"kl_loss_2": 2217.222705078125,
"kl_loss_3": 1734.5539611816407,
"kl_loss_7": 606.3814819335937,
"learning_rate": 8.816170928508365e-05,
"loss": 1174.1137,
"step": 8100
},
{
"ce_loss_10": 3.456532561779022,
"ce_loss_13": 3.380963850021362,
"ce_loss_2": 4.470215916633606,
"ce_loss_3": 4.203172373771667,
"ce_loss_7": 3.637952506542206,
"epoch": 0.811,
"grad_norm": 564.0,
"kl_loss_10": 181.76464233398437,
"kl_loss_2": 2271.6646240234377,
"kl_loss_3": 1789.1731323242188,
"kl_loss_7": 618.0579010009766,
"learning_rate": 8.7264051518613e-05,
"loss": 1188.6756,
"step": 8110
},
{
"ce_loss_10": 3.5451604604721068,
"ce_loss_13": 3.4716222047805787,
"ce_loss_2": 4.482484936714172,
"ce_loss_3": 4.218600440025329,
"ce_loss_7": 3.7114962100982667,
"epoch": 0.812,
"grad_norm": 572.0,
"kl_loss_10": 174.23039016723632,
"kl_loss_2": 2104.2210205078127,
"kl_loss_3": 1635.3122253417969,
"kl_loss_7": 586.6013153076171,
"learning_rate": 8.637054999856148e-05,
"loss": 1140.2461,
"step": 8120
},
{
"ce_loss_10": 3.5334657073020934,
"ce_loss_13": 3.4553168177604676,
"ce_loss_2": 4.5001609325408936,
"ce_loss_3": 4.233760714530945,
"ce_loss_7": 3.71214896440506,
"epoch": 0.813,
"grad_norm": 572.0,
"kl_loss_10": 179.94693908691406,
"kl_loss_2": 2168.638543701172,
"kl_loss_3": 1690.7197631835938,
"kl_loss_7": 602.7170349121094,
"learning_rate": 8.548121372247918e-05,
"loss": 1176.2271,
"step": 8130
},
{
"ce_loss_10": 3.6031864166259764,
"ce_loss_13": 3.5284059882164,
"ce_loss_2": 4.540188145637512,
"ce_loss_3": 4.276613438129425,
"ce_loss_7": 3.7673808455467226,
"epoch": 0.814,
"grad_norm": 576.0,
"kl_loss_10": 175.132576751709,
"kl_loss_2": 2116.421954345703,
"kl_loss_3": 1652.4540222167968,
"kl_loss_7": 584.193586730957,
"learning_rate": 8.459605164597267e-05,
"loss": 1140.1102,
"step": 8140
},
{
"ce_loss_10": 3.4851497173309327,
"ce_loss_13": 3.4121100902557373,
"ce_loss_2": 4.4567595481872555,
"ce_loss_3": 4.188820004463196,
"ce_loss_7": 3.6609971284866334,
"epoch": 0.815,
"grad_norm": 516.0,
"kl_loss_10": 176.46202392578124,
"kl_loss_2": 2164.97646484375,
"kl_loss_3": 1690.654833984375,
"kl_loss_7": 595.82392578125,
"learning_rate": 8.371507268261436e-05,
"loss": 1160.1355,
"step": 8150
},
{
"ce_loss_10": 3.5612674951553345,
"ce_loss_13": 3.486202526092529,
"ce_loss_2": 4.5333171606063845,
"ce_loss_3": 4.264222574234009,
"ce_loss_7": 3.7375367999076845,
"epoch": 0.816,
"grad_norm": 536.0,
"kl_loss_10": 178.54783096313477,
"kl_loss_2": 2174.8032287597657,
"kl_loss_3": 1693.2032836914063,
"kl_loss_7": 601.639274597168,
"learning_rate": 8.283828570385238e-05,
"loss": 1135.0793,
"step": 8160
},
{
"ce_loss_10": 3.566178250312805,
"ce_loss_13": 3.4907922625541685,
"ce_loss_2": 4.535942006111145,
"ce_loss_3": 4.269787204265595,
"ce_loss_7": 3.745415151119232,
"epoch": 0.817,
"grad_norm": 596.0,
"kl_loss_10": 175.95259857177734,
"kl_loss_2": 2127.893908691406,
"kl_loss_3": 1655.5139526367188,
"kl_loss_7": 597.2055206298828,
"learning_rate": 8.196569953892202e-05,
"loss": 1147.5553,
"step": 8170
},
{
"ce_loss_10": 3.485050618648529,
"ce_loss_13": 3.410007894039154,
"ce_loss_2": 4.454114603996277,
"ce_loss_3": 4.187376809120178,
"ce_loss_7": 3.6601618766784667,
"epoch": 0.818,
"grad_norm": 640.0,
"kl_loss_10": 177.51841354370117,
"kl_loss_2": 2140.9811096191406,
"kl_loss_3": 1670.5337829589844,
"kl_loss_7": 598.4014099121093,
"learning_rate": 8.109732297475635e-05,
"loss": 1142.2542,
"step": 8180
},
{
"ce_loss_10": 3.4574038982391357,
"ce_loss_13": 3.376466763019562,
"ce_loss_2": 4.488081407546997,
"ce_loss_3": 4.2140247344970705,
"ce_loss_7": 3.65143164396286,
"epoch": 0.819,
"grad_norm": 588.0,
"kl_loss_10": 184.73964157104493,
"kl_loss_2": 2257.5185974121096,
"kl_loss_3": 1764.1057739257812,
"kl_loss_7": 626.5130950927735,
"learning_rate": 8.023316475589754e-05,
"loss": 1190.8261,
"step": 8190
},
{
"ce_loss_10": 3.4220961928367615,
"ce_loss_13": 3.338273751735687,
"ce_loss_2": 4.495982336997986,
"ce_loss_3": 4.211030387878418,
"ce_loss_7": 3.615649092197418,
"epoch": 0.82,
"grad_norm": 680.0,
"kl_loss_10": 185.78453063964844,
"kl_loss_2": 2349.4278076171877,
"kl_loss_3": 1849.0640197753905,
"kl_loss_7": 637.3896453857421,
"learning_rate": 7.937323358440934e-05,
"loss": 1214.0248,
"step": 8200
},
{
"ce_loss_10": 3.541324031352997,
"ce_loss_13": 3.4684749960899355,
"ce_loss_2": 4.468911576271057,
"ce_loss_3": 4.20685533285141,
"ce_loss_7": 3.709389495849609,
"epoch": 0.821,
"grad_norm": 584.0,
"kl_loss_10": 174.97513656616212,
"kl_loss_2": 2090.5933227539062,
"kl_loss_3": 1628.1781616210938,
"kl_loss_7": 589.6404022216797,
"learning_rate": 7.851753811978923e-05,
"loss": 1140.9928,
"step": 8210
},
{
"ce_loss_10": 3.5598355412483214,
"ce_loss_13": 3.4843420505523683,
"ce_loss_2": 4.541475534439087,
"ce_loss_3": 4.275748348236084,
"ce_loss_7": 3.735049307346344,
"epoch": 0.822,
"grad_norm": 604.0,
"kl_loss_10": 177.00316925048827,
"kl_loss_2": 2174.6779052734373,
"kl_loss_3": 1702.5414611816407,
"kl_loss_7": 595.3967559814453,
"learning_rate": 7.766608697888095e-05,
"loss": 1150.2977,
"step": 8220
},
{
"ce_loss_10": 3.57365106344223,
"ce_loss_13": 3.498138427734375,
"ce_loss_2": 4.5424954175949095,
"ce_loss_3": 4.2809364080429075,
"ce_loss_7": 3.7457818508148195,
"epoch": 0.823,
"grad_norm": 576.0,
"kl_loss_10": 180.9578956604004,
"kl_loss_2": 2174.856481933594,
"kl_loss_3": 1712.8867797851562,
"kl_loss_7": 606.9090911865235,
"learning_rate": 7.681888873578785e-05,
"loss": 1172.8941,
"step": 8230
},
{
"ce_loss_10": 3.5023999333381655,
"ce_loss_13": 3.423751747608185,
"ce_loss_2": 4.507507848739624,
"ce_loss_3": 4.228441286087036,
"ce_loss_7": 3.6896154999732973,
"epoch": 0.824,
"grad_norm": 556.0,
"kl_loss_10": 182.18136978149414,
"kl_loss_2": 2222.666143798828,
"kl_loss_3": 1725.1198669433593,
"kl_loss_7": 612.9842071533203,
"learning_rate": 7.597595192178702e-05,
"loss": 1157.2363,
"step": 8240
},
{
"ce_loss_10": 3.501276743412018,
"ce_loss_13": 3.422858786582947,
"ce_loss_2": 4.514269304275513,
"ce_loss_3": 4.2400298595428465,
"ce_loss_7": 3.6824575424194337,
"epoch": 0.825,
"grad_norm": 588.0,
"kl_loss_10": 181.8477668762207,
"kl_loss_2": 2277.1808898925783,
"kl_loss_3": 1781.4539184570312,
"kl_loss_7": 622.9125793457031,
"learning_rate": 7.513728502524286e-05,
"loss": 1187.7779,
"step": 8250
},
{
"ce_loss_10": 3.5026116013526916,
"ce_loss_13": 3.428036153316498,
"ce_loss_2": 4.455591607093811,
"ce_loss_3": 4.192325818538666,
"ce_loss_7": 3.6698171854019166,
"epoch": 0.826,
"grad_norm": 540.0,
"kl_loss_10": 170.86422119140624,
"kl_loss_2": 2124.475665283203,
"kl_loss_3": 1660.3611083984374,
"kl_loss_7": 580.019741821289,
"learning_rate": 7.430289649152156e-05,
"loss": 1161.5576,
"step": 8260
},
{
"ce_loss_10": 3.404016637802124,
"ce_loss_13": 3.3291639566421507,
"ce_loss_2": 4.41770989894867,
"ce_loss_3": 4.153000998497009,
"ce_loss_7": 3.591460871696472,
"epoch": 0.827,
"grad_norm": 564.0,
"kl_loss_10": 179.06679763793946,
"kl_loss_2": 2271.2553527832033,
"kl_loss_3": 1785.926287841797,
"kl_loss_7": 620.811245727539,
"learning_rate": 7.347279472290646e-05,
"loss": 1175.1479,
"step": 8270
},
{
"ce_loss_10": 3.5473936796188354,
"ce_loss_13": 3.4728646278381348,
"ce_loss_2": 4.529109454154968,
"ce_loss_3": 4.2634922623634335,
"ce_loss_7": 3.7243195176124573,
"epoch": 0.828,
"grad_norm": 516.0,
"kl_loss_10": 176.9839729309082,
"kl_loss_2": 2192.3595458984373,
"kl_loss_3": 1719.4920288085937,
"kl_loss_7": 599.4450622558594,
"learning_rate": 7.264698807851328e-05,
"loss": 1170.4515,
"step": 8280
},
{
"ce_loss_10": 3.511405515670776,
"ce_loss_13": 3.4420324087142946,
"ce_loss_2": 4.462396240234375,
"ce_loss_3": 4.196757709980011,
"ce_loss_7": 3.678698420524597,
"epoch": 0.829,
"grad_norm": 520.0,
"kl_loss_10": 173.053133392334,
"kl_loss_2": 2122.6127746582033,
"kl_loss_3": 1647.0698120117188,
"kl_loss_7": 586.3415740966797,
"learning_rate": 7.182548487420554e-05,
"loss": 1152.9492,
"step": 8290
},
{
"ce_loss_10": 3.56430242061615,
"ce_loss_13": 3.4877224922180177,
"ce_loss_2": 4.515660381317138,
"ce_loss_3": 4.256710803508758,
"ce_loss_7": 3.7377355813980104,
"epoch": 0.83,
"grad_norm": 552.0,
"kl_loss_10": 178.47344284057618,
"kl_loss_2": 2141.9302307128905,
"kl_loss_3": 1673.7640502929687,
"kl_loss_7": 594.9386505126953,
"learning_rate": 7.100829338251146e-05,
"loss": 1142.0348,
"step": 8300
},
{
"ce_loss_10": 3.500733995437622,
"ce_loss_13": 3.420394682884216,
"ce_loss_2": 4.495737314224243,
"ce_loss_3": 4.226100885868073,
"ce_loss_7": 3.6811896324157716,
"epoch": 0.831,
"grad_norm": 664.0,
"kl_loss_10": 181.1860824584961,
"kl_loss_2": 2206.595349121094,
"kl_loss_3": 1725.6842834472657,
"kl_loss_7": 613.2857147216797,
"learning_rate": 7.019542183254046e-05,
"loss": 1155.5816,
"step": 8310
},
{
"ce_loss_10": 3.542024350166321,
"ce_loss_13": 3.46200088262558,
"ce_loss_2": 4.494865345954895,
"ce_loss_3": 4.2283999681472775,
"ce_loss_7": 3.7202147483825683,
"epoch": 0.832,
"grad_norm": 700.0,
"kl_loss_10": 184.21017608642578,
"kl_loss_2": 2155.698864746094,
"kl_loss_3": 1680.3289855957032,
"kl_loss_7": 609.4942947387696,
"learning_rate": 6.938687840989971e-05,
"loss": 1152.3119,
"step": 8320
},
{
"ce_loss_10": 3.475117301940918,
"ce_loss_13": 3.396902585029602,
"ce_loss_2": 4.447841107845306,
"ce_loss_3": 4.1811567902565,
"ce_loss_7": 3.65566908121109,
"epoch": 0.833,
"grad_norm": 644.0,
"kl_loss_10": 180.3916358947754,
"kl_loss_2": 2151.0271850585937,
"kl_loss_3": 1678.1678161621094,
"kl_loss_7": 600.837434387207,
"learning_rate": 6.858267125661271e-05,
"loss": 1171.0359,
"step": 8330
},
{
"ce_loss_10": 3.5341761112213135,
"ce_loss_13": 3.4623092293739317,
"ce_loss_2": 4.504952430725098,
"ce_loss_3": 4.235734903812409,
"ce_loss_7": 3.7161438941955565,
"epoch": 0.834,
"grad_norm": 652.0,
"kl_loss_10": 176.11358489990235,
"kl_loss_2": 2152.3007080078123,
"kl_loss_3": 1674.9229736328125,
"kl_loss_7": 599.1006164550781,
"learning_rate": 6.778280847103668e-05,
"loss": 1187.2795,
"step": 8340
},
{
"ce_loss_10": 3.5474065065383913,
"ce_loss_13": 3.4685073494911194,
"ce_loss_2": 4.511071228981018,
"ce_loss_3": 4.243249070644379,
"ce_loss_7": 3.7250569343566893,
"epoch": 0.835,
"grad_norm": 544.0,
"kl_loss_10": 179.0617935180664,
"kl_loss_2": 2161.984759521484,
"kl_loss_3": 1690.6224731445313,
"kl_loss_7": 606.1362609863281,
"learning_rate": 6.698729810778065e-05,
"loss": 1153.2188,
"step": 8350
},
{
"ce_loss_10": 3.4568483591079713,
"ce_loss_13": 3.3825206756591797,
"ce_loss_2": 4.450219774246216,
"ce_loss_3": 4.178645396232605,
"ce_loss_7": 3.6368404626846313,
"epoch": 0.836,
"grad_norm": 628.0,
"kl_loss_10": 176.9057861328125,
"kl_loss_2": 2207.3519287109375,
"kl_loss_3": 1716.1288146972656,
"kl_loss_7": 600.357534790039,
"learning_rate": 6.619614817762538e-05,
"loss": 1175.9664,
"step": 8360
},
{
"ce_loss_10": 3.421834397315979,
"ce_loss_13": 3.3451184391975404,
"ce_loss_2": 4.458368134498596,
"ce_loss_3": 4.186334764957428,
"ce_loss_7": 3.611315131187439,
"epoch": 0.837,
"grad_norm": 524.0,
"kl_loss_10": 179.41786346435546,
"kl_loss_2": 2288.001843261719,
"kl_loss_3": 1799.511328125,
"kl_loss_7": 622.7222686767578,
"learning_rate": 6.540936664744196e-05,
"loss": 1185.6504,
"step": 8370
},
{
"ce_loss_10": 3.5705604910850526,
"ce_loss_13": 3.495253837108612,
"ce_loss_2": 4.550519323348999,
"ce_loss_3": 4.287250196933746,
"ce_loss_7": 3.7482882261276247,
"epoch": 0.838,
"grad_norm": 556.0,
"kl_loss_10": 178.4987823486328,
"kl_loss_2": 2165.8636779785156,
"kl_loss_3": 1697.199853515625,
"kl_loss_7": 600.5514739990234,
"learning_rate": 6.462696144011149e-05,
"loss": 1144.948,
"step": 8380
},
{
"ce_loss_10": 3.521126616001129,
"ce_loss_13": 3.44657279253006,
"ce_loss_2": 4.47376012802124,
"ce_loss_3": 4.215767812728882,
"ce_loss_7": 3.700752067565918,
"epoch": 0.839,
"grad_norm": 552.0,
"kl_loss_10": 181.0975456237793,
"kl_loss_2": 2145.9520629882813,
"kl_loss_3": 1683.1618103027345,
"kl_loss_7": 606.7530914306641,
"learning_rate": 6.384894043444567e-05,
"loss": 1140.7508,
"step": 8390
},
{
"ce_loss_10": 3.5482063770294188,
"ce_loss_13": 3.4719661116600036,
"ce_loss_2": 4.523540115356445,
"ce_loss_3": 4.260622024536133,
"ce_loss_7": 3.7237455368041994,
"epoch": 0.84,
"grad_norm": 540.0,
"kl_loss_10": 178.300057220459,
"kl_loss_2": 2181.8050231933594,
"kl_loss_3": 1707.3205505371093,
"kl_loss_7": 602.2170059204102,
"learning_rate": 6.307531146510753e-05,
"loss": 1150.0869,
"step": 8400
},
{
"ce_loss_10": 3.526041495800018,
"ce_loss_13": 3.4509783387184143,
"ce_loss_2": 4.471963119506836,
"ce_loss_3": 4.206485414505005,
"ce_loss_7": 3.701741063594818,
"epoch": 0.841,
"grad_norm": 560.0,
"kl_loss_10": 177.0880439758301,
"kl_loss_2": 2118.260693359375,
"kl_loss_3": 1641.845037841797,
"kl_loss_7": 595.3580291748046,
"learning_rate": 6.230608232253226e-05,
"loss": 1129.8508,
"step": 8410
},
{
"ce_loss_10": 3.4824550271034242,
"ce_loss_13": 3.405357301235199,
"ce_loss_2": 4.482615494728089,
"ce_loss_3": 4.2257519364356995,
"ce_loss_7": 3.6680721044540405,
"epoch": 0.842,
"grad_norm": 512.0,
"kl_loss_10": 179.48506774902344,
"kl_loss_2": 2227.8513061523436,
"kl_loss_3": 1761.4332275390625,
"kl_loss_7": 616.7242858886718,
"learning_rate": 6.154126075284855e-05,
"loss": 1155.2555,
"step": 8420
},
{
"ce_loss_10": 3.577245807647705,
"ce_loss_13": 3.5012118101119993,
"ce_loss_2": 4.512918734550476,
"ce_loss_3": 4.249192714691162,
"ce_loss_7": 3.7460012435913086,
"epoch": 0.843,
"grad_norm": 704.0,
"kl_loss_10": 174.01815643310547,
"kl_loss_2": 2066.7827331542967,
"kl_loss_3": 1608.2534240722657,
"kl_loss_7": 586.0584747314454,
"learning_rate": 6.078085445780129e-05,
"loss": 1117.5314,
"step": 8430
},
{
"ce_loss_10": 3.584468650817871,
"ce_loss_13": 3.5081024169921875,
"ce_loss_2": 4.569616174697876,
"ce_loss_3": 4.300150573253632,
"ce_loss_7": 3.7608886480331423,
"epoch": 0.844,
"grad_norm": 576.0,
"kl_loss_10": 177.62249679565429,
"kl_loss_2": 2185.7296508789063,
"kl_loss_3": 1710.7079772949219,
"kl_loss_7": 599.2171966552735,
"learning_rate": 6.002487109467347e-05,
"loss": 1141.6974,
"step": 8440
},
{
"ce_loss_10": 3.587876856327057,
"ce_loss_13": 3.5108195781707763,
"ce_loss_2": 4.524105596542358,
"ce_loss_3": 4.2644576787948605,
"ce_loss_7": 3.756032574176788,
"epoch": 0.845,
"grad_norm": 592.0,
"kl_loss_10": 181.40281448364257,
"kl_loss_2": 2131.273962402344,
"kl_loss_3": 1667.7534301757812,
"kl_loss_7": 605.4166229248046,
"learning_rate": 5.927331827620902e-05,
"loss": 1141.2443,
"step": 8450
},
{
"ce_loss_10": 3.573608911037445,
"ce_loss_13": 3.499223828315735,
"ce_loss_2": 4.488192296028137,
"ce_loss_3": 4.230596256256104,
"ce_loss_7": 3.7483445525169374,
"epoch": 0.846,
"grad_norm": 552.0,
"kl_loss_10": 175.3866973876953,
"kl_loss_2": 2041.3302062988282,
"kl_loss_3": 1588.8981994628907,
"kl_loss_7": 591.4093719482422,
"learning_rate": 5.852620357053651e-05,
"loss": 1132.2791,
"step": 8460
},
{
"ce_loss_10": 3.6111098527908325,
"ce_loss_13": 3.536815571784973,
"ce_loss_2": 4.544794130325317,
"ce_loss_3": 4.2818133473396305,
"ce_loss_7": 3.780075490474701,
"epoch": 0.847,
"grad_norm": 596.0,
"kl_loss_10": 174.2255989074707,
"kl_loss_2": 2095.0241271972654,
"kl_loss_3": 1629.0387634277345,
"kl_loss_7": 588.6748046875,
"learning_rate": 5.778353450109286e-05,
"loss": 1140.0846,
"step": 8470
},
{
"ce_loss_10": 3.648575019836426,
"ce_loss_13": 3.5720423340797423,
"ce_loss_2": 4.605719590187073,
"ce_loss_3": 4.344667458534241,
"ce_loss_7": 3.8233685731887816,
"epoch": 0.848,
"grad_norm": 486.0,
"kl_loss_10": 179.81479415893554,
"kl_loss_2": 2138.249481201172,
"kl_loss_3": 1668.5974670410155,
"kl_loss_7": 599.209912109375,
"learning_rate": 5.7045318546547206e-05,
"loss": 1146.8947,
"step": 8480
},
{
"ce_loss_10": 3.5448459148406983,
"ce_loss_13": 3.468916821479797,
"ce_loss_2": 4.523812007904053,
"ce_loss_3": 4.25856339931488,
"ce_loss_7": 3.7180402636528016,
"epoch": 0.849,
"grad_norm": 556.0,
"kl_loss_10": 176.35201721191407,
"kl_loss_2": 2188.633197021484,
"kl_loss_3": 1710.0829528808595,
"kl_loss_7": 595.4919921875,
"learning_rate": 5.631156314072605e-05,
"loss": 1145.8699,
"step": 8490
},
{
"ce_loss_10": 3.559221601486206,
"ce_loss_13": 3.4834945678710936,
"ce_loss_2": 4.495839285850525,
"ce_loss_3": 4.22744711637497,
"ce_loss_7": 3.7315674662590026,
"epoch": 0.85,
"grad_norm": 536.0,
"kl_loss_10": 176.7962844848633,
"kl_loss_2": 2110.8933044433593,
"kl_loss_3": 1632.140673828125,
"kl_loss_7": 588.6574279785157,
"learning_rate": 5.5582275672538315e-05,
"loss": 1128.7181,
"step": 8500
},
{
"ce_loss_10": 3.4811159491539003,
"ce_loss_13": 3.403310573101044,
"ce_loss_2": 4.505023097991943,
"ce_loss_3": 4.238485896587372,
"ce_loss_7": 3.6721346259117125,
"epoch": 0.851,
"grad_norm": 608.0,
"kl_loss_10": 182.8627899169922,
"kl_loss_2": 2282.4833068847656,
"kl_loss_3": 1798.2781005859374,
"kl_loss_7": 625.0588409423829,
"learning_rate": 5.4857463485900484e-05,
"loss": 1192.5725,
"step": 8510
},
{
"ce_loss_10": 3.5297972202301025,
"ce_loss_13": 3.454351043701172,
"ce_loss_2": 4.489051342010498,
"ce_loss_3": 4.219796097278595,
"ce_loss_7": 3.7097468852996824,
"epoch": 0.852,
"grad_norm": 592.0,
"kl_loss_10": 178.4038864135742,
"kl_loss_2": 2146.418908691406,
"kl_loss_3": 1667.9574584960938,
"kl_loss_7": 602.1778778076172,
"learning_rate": 5.413713387966329e-05,
"loss": 1150.9164,
"step": 8520
},
{
"ce_loss_10": 3.558277463912964,
"ce_loss_13": 3.480745458602905,
"ce_loss_2": 4.530928635597229,
"ce_loss_3": 4.266195034980774,
"ce_loss_7": 3.7276942253112795,
"epoch": 0.853,
"grad_norm": 620.0,
"kl_loss_10": 178.86384658813478,
"kl_loss_2": 2174.0791748046877,
"kl_loss_3": 1703.99384765625,
"kl_loss_7": 598.962451171875,
"learning_rate": 5.34212941075381e-05,
"loss": 1160.2438,
"step": 8530
},
{
"ce_loss_10": 3.559523808956146,
"ce_loss_13": 3.4899546623229982,
"ce_loss_2": 4.511786758899689,
"ce_loss_3": 4.244810569286346,
"ce_loss_7": 3.7282875180244446,
"epoch": 0.854,
"grad_norm": 544.0,
"kl_loss_10": 173.61905364990236,
"kl_loss_2": 2125.2814514160154,
"kl_loss_3": 1651.0782043457032,
"kl_loss_7": 580.7644989013672,
"learning_rate": 5.270995137802315e-05,
"loss": 1139.1208,
"step": 8540
},
{
"ce_loss_10": 3.4913312673568724,
"ce_loss_13": 3.4199952483177185,
"ce_loss_2": 4.46144163608551,
"ce_loss_3": 4.1969265818595884,
"ce_loss_7": 3.6703786849975586,
"epoch": 0.855,
"grad_norm": 596.0,
"kl_loss_10": 176.40887756347655,
"kl_loss_2": 2170.294287109375,
"kl_loss_3": 1693.5492065429687,
"kl_loss_7": 604.686279296875,
"learning_rate": 5.2003112854332125e-05,
"loss": 1161.1432,
"step": 8550
},
{
"ce_loss_10": 3.495318293571472,
"ce_loss_13": 3.421377086639404,
"ce_loss_2": 4.460399007797241,
"ce_loss_3": 4.191486561298371,
"ce_loss_7": 3.666605508327484,
"epoch": 0.856,
"grad_norm": 624.0,
"kl_loss_10": 174.107186126709,
"kl_loss_2": 2159.1057250976564,
"kl_loss_3": 1686.5014953613281,
"kl_loss_7": 595.0416564941406,
"learning_rate": 5.130078565432089e-05,
"loss": 1138.6503,
"step": 8560
},
{
"ce_loss_10": 3.5646494030952454,
"ce_loss_13": 3.4924421072006226,
"ce_loss_2": 4.498122811317444,
"ce_loss_3": 4.236236476898194,
"ce_loss_7": 3.731403958797455,
"epoch": 0.857,
"grad_norm": 548.0,
"kl_loss_10": 173.7933578491211,
"kl_loss_2": 2107.9238403320314,
"kl_loss_3": 1647.1588623046875,
"kl_loss_7": 586.2743255615235,
"learning_rate": 5.060297685041659e-05,
"loss": 1120.3278,
"step": 8570
},
{
"ce_loss_10": 3.498642110824585,
"ce_loss_13": 3.423696291446686,
"ce_loss_2": 4.494770348072052,
"ce_loss_3": 4.218523621559143,
"ce_loss_7": 3.6762722969055175,
"epoch": 0.858,
"grad_norm": 548.0,
"kl_loss_10": 180.7342544555664,
"kl_loss_2": 2221.7594360351563,
"kl_loss_3": 1733.2135437011718,
"kl_loss_7": 609.6224884033203,
"learning_rate": 4.99096934695461e-05,
"loss": 1183.0167,
"step": 8580
},
{
"ce_loss_10": 3.55733345746994,
"ce_loss_13": 3.4829642295837404,
"ce_loss_2": 4.523844695091247,
"ce_loss_3": 4.2578066945075985,
"ce_loss_7": 3.7367467999458315,
"epoch": 0.859,
"grad_norm": 544.0,
"kl_loss_10": 175.85337829589844,
"kl_loss_2": 2157.883190917969,
"kl_loss_3": 1681.7412048339843,
"kl_loss_7": 598.1185791015625,
"learning_rate": 4.922094249306558e-05,
"loss": 1131.2188,
"step": 8590
},
{
"ce_loss_10": 3.5841567873954774,
"ce_loss_13": 3.509797990322113,
"ce_loss_2": 4.5469663619995115,
"ce_loss_3": 4.2871175646781925,
"ce_loss_7": 3.7628376722335815,
"epoch": 0.86,
"grad_norm": 604.0,
"kl_loss_10": 179.83917465209962,
"kl_loss_2": 2154.5059204101562,
"kl_loss_3": 1690.2081298828125,
"kl_loss_7": 604.0245941162109,
"learning_rate": 4.853673085668947e-05,
"loss": 1135.9622,
"step": 8600
},
{
"ce_loss_10": 3.60320885181427,
"ce_loss_13": 3.529503357410431,
"ce_loss_2": 4.566924571990967,
"ce_loss_3": 4.302417039871216,
"ce_loss_7": 3.7819976687431334,
"epoch": 0.861,
"grad_norm": 596.0,
"kl_loss_10": 177.36936798095704,
"kl_loss_2": 2148.3923461914064,
"kl_loss_3": 1680.8319580078125,
"kl_loss_7": 597.4716522216797,
"learning_rate": 4.78570654504214e-05,
"loss": 1156.3883,
"step": 8610
},
{
"ce_loss_10": 3.5473016500473022,
"ce_loss_13": 3.4740379452705383,
"ce_loss_2": 4.5167618751525875,
"ce_loss_3": 4.248826539516449,
"ce_loss_7": 3.7283095955848693,
"epoch": 0.862,
"grad_norm": 512.0,
"kl_loss_10": 175.62952041625977,
"kl_loss_2": 2176.8274475097655,
"kl_loss_3": 1698.4800903320313,
"kl_loss_7": 603.3765747070313,
"learning_rate": 4.7181953118484556e-05,
"loss": 1157.7057,
"step": 8620
},
{
"ce_loss_10": 3.5743127822875977,
"ce_loss_13": 3.49841423034668,
"ce_loss_2": 4.522394800186158,
"ce_loss_3": 4.2583330273628235,
"ce_loss_7": 3.751324450969696,
"epoch": 0.863,
"grad_norm": 604.0,
"kl_loss_10": 175.00742568969727,
"kl_loss_2": 2093.126568603516,
"kl_loss_3": 1630.9019409179687,
"kl_loss_7": 592.01396484375,
"learning_rate": 4.651140065925269e-05,
"loss": 1159.3387,
"step": 8630
},
{
"ce_loss_10": 3.507640373706818,
"ce_loss_13": 3.434223484992981,
"ce_loss_2": 4.487589573860168,
"ce_loss_3": 4.2153548240661625,
"ce_loss_7": 3.6856843709945677,
"epoch": 0.864,
"grad_norm": 588.0,
"kl_loss_10": 177.25660781860353,
"kl_loss_2": 2189.234814453125,
"kl_loss_3": 1705.8089416503906,
"kl_loss_7": 594.7530731201172,
"learning_rate": 4.58454148251814e-05,
"loss": 1175.3236,
"step": 8640
},
{
"ce_loss_10": 3.5295264959335326,
"ce_loss_13": 3.451808476448059,
"ce_loss_2": 4.534165596961975,
"ce_loss_3": 4.261823272705078,
"ce_loss_7": 3.7138017773628236,
"epoch": 0.865,
"grad_norm": 568.0,
"kl_loss_10": 177.66054000854493,
"kl_loss_2": 2227.2971801757812,
"kl_loss_3": 1739.9827575683594,
"kl_loss_7": 610.7921539306641,
"learning_rate": 4.518400232274078e-05,
"loss": 1162.0056,
"step": 8650
},
{
"ce_loss_10": 3.5479356169700624,
"ce_loss_13": 3.4702929258346558,
"ce_loss_2": 4.501330161094666,
"ce_loss_3": 4.237502670288086,
"ce_loss_7": 3.723937380313873,
"epoch": 0.866,
"grad_norm": 524.0,
"kl_loss_10": 179.25594482421874,
"kl_loss_2": 2137.7675231933595,
"kl_loss_3": 1671.597607421875,
"kl_loss_7": 602.4135375976563,
"learning_rate": 4.452716981234745e-05,
"loss": 1122.9633,
"step": 8660
},
{
"ce_loss_10": 3.5203991651535036,
"ce_loss_13": 3.447481095790863,
"ce_loss_2": 4.47524061203003,
"ce_loss_3": 4.205355083942413,
"ce_loss_7": 3.695466148853302,
"epoch": 0.867,
"grad_norm": 568.0,
"kl_loss_10": 174.45485000610353,
"kl_loss_2": 2135.713586425781,
"kl_loss_3": 1657.6833251953126,
"kl_loss_7": 594.2672515869141,
"learning_rate": 4.3874923908297335e-05,
"loss": 1125.4834,
"step": 8670
},
{
"ce_loss_10": 3.575284111499786,
"ce_loss_13": 3.498721444606781,
"ce_loss_2": 4.54740161895752,
"ce_loss_3": 4.281282663345337,
"ce_loss_7": 3.7500776290893554,
"epoch": 0.868,
"grad_norm": 596.0,
"kl_loss_10": 178.63047256469727,
"kl_loss_2": 2175.332312011719,
"kl_loss_3": 1702.8009948730469,
"kl_loss_7": 597.87509765625,
"learning_rate": 4.322727117869951e-05,
"loss": 1149.3994,
"step": 8680
},
{
"ce_loss_10": 3.575519359111786,
"ce_loss_13": 3.4998192310333254,
"ce_loss_2": 4.55748233795166,
"ce_loss_3": 4.284105372428894,
"ce_loss_7": 3.752281701564789,
"epoch": 0.869,
"grad_norm": 584.0,
"kl_loss_10": 179.47224349975585,
"kl_loss_2": 2189.6538024902343,
"kl_loss_3": 1698.4593017578125,
"kl_loss_7": 604.034390258789,
"learning_rate": 4.2584218145409916e-05,
"loss": 1151.0721,
"step": 8690
},
{
"ce_loss_10": 3.6216215252876283,
"ce_loss_13": 3.551214134693146,
"ce_loss_2": 4.543716049194336,
"ce_loss_3": 4.272314977645874,
"ce_loss_7": 3.786858594417572,
"epoch": 0.87,
"grad_norm": 600.0,
"kl_loss_10": 174.5644790649414,
"kl_loss_2": 2072.967736816406,
"kl_loss_3": 1598.7917419433593,
"kl_loss_7": 582.653207397461,
"learning_rate": 4.194577128396521e-05,
"loss": 1108.3934,
"step": 8700
},
{
"ce_loss_10": 3.498377776145935,
"ce_loss_13": 3.425851809978485,
"ce_loss_2": 4.466418659687042,
"ce_loss_3": 4.194790709018707,
"ce_loss_7": 3.670235824584961,
"epoch": 0.871,
"grad_norm": 506.0,
"kl_loss_10": 174.37066497802735,
"kl_loss_2": 2168.9560546875,
"kl_loss_3": 1689.01513671875,
"kl_loss_7": 590.7626220703125,
"learning_rate": 4.1311937023518264e-05,
"loss": 1166.4488,
"step": 8710
},
{
"ce_loss_10": 3.5134344696998596,
"ce_loss_13": 3.4397946119308473,
"ce_loss_2": 4.529711484909058,
"ce_loss_3": 4.263941979408264,
"ce_loss_7": 3.682065725326538,
"epoch": 0.872,
"grad_norm": 460.0,
"kl_loss_10": 171.77398529052735,
"kl_loss_2": 2246.7038696289064,
"kl_loss_3": 1774.6010986328124,
"kl_loss_7": 576.3054107666015,
"learning_rate": 4.0682721746773344e-05,
"loss": 1163.771,
"step": 8720
},
{
"ce_loss_10": 3.3905357241630556,
"ce_loss_13": 3.314958465099335,
"ce_loss_2": 4.396602368354797,
"ce_loss_3": 4.132218360900879,
"ce_loss_7": 3.5750641107559202,
"epoch": 0.873,
"grad_norm": 552.0,
"kl_loss_10": 175.90054779052736,
"kl_loss_2": 2222.7579711914063,
"kl_loss_3": 1749.4481201171875,
"kl_loss_7": 613.7805450439453,
"learning_rate": 4.0058131789920904e-05,
"loss": 1143.7059,
"step": 8730
},
{
"ce_loss_10": 3.5397099256515503,
"ce_loss_13": 3.4643173098564146,
"ce_loss_2": 4.497500014305115,
"ce_loss_3": 4.226579332351685,
"ce_loss_7": 3.709259867668152,
"epoch": 0.874,
"grad_norm": 572.0,
"kl_loss_10": 176.51957778930665,
"kl_loss_2": 2162.519940185547,
"kl_loss_3": 1680.782940673828,
"kl_loss_7": 600.4374298095703,
"learning_rate": 3.9438173442575e-05,
"loss": 1188.067,
"step": 8740
},
{
"ce_loss_10": 3.5728036165237427,
"ce_loss_13": 3.4973131656646728,
"ce_loss_2": 4.514467573165893,
"ce_loss_3": 4.250711810588837,
"ce_loss_7": 3.740223217010498,
"epoch": 0.875,
"grad_norm": 524.0,
"kl_loss_10": 175.66529846191406,
"kl_loss_2": 2112.5622009277345,
"kl_loss_3": 1651.0288818359375,
"kl_loss_7": 594.9720977783203,
"learning_rate": 3.882285294770937e-05,
"loss": 1137.6895,
"step": 8750
},
{
"ce_loss_10": 3.5377378940582274,
"ce_loss_13": 3.4619855165481566,
"ce_loss_2": 4.4779297590255736,
"ce_loss_3": 4.2127908825874325,
"ce_loss_7": 3.7088746547698976,
"epoch": 0.876,
"grad_norm": 600.0,
"kl_loss_10": 178.39902954101564,
"kl_loss_2": 2127.570068359375,
"kl_loss_3": 1648.2625427246094,
"kl_loss_7": 594.3859985351562,
"learning_rate": 3.821217650159453e-05,
"loss": 1155.4234,
"step": 8760
},
{
"ce_loss_10": 3.4084259629249574,
"ce_loss_13": 3.332693111896515,
"ce_loss_2": 4.428424310684204,
"ce_loss_3": 4.158231461048127,
"ce_loss_7": 3.6029205918312073,
"epoch": 0.877,
"grad_norm": 548.0,
"kl_loss_10": 180.0270034790039,
"kl_loss_2": 2236.819873046875,
"kl_loss_3": 1758.5952392578124,
"kl_loss_7": 625.4339080810547,
"learning_rate": 3.760615025373543e-05,
"loss": 1171.5936,
"step": 8770
},
{
"ce_loss_10": 3.595931589603424,
"ce_loss_13": 3.5179906845092774,
"ce_loss_2": 4.587141966819763,
"ce_loss_3": 4.309424257278442,
"ce_loss_7": 3.7760006308555605,
"epoch": 0.878,
"grad_norm": 660.0,
"kl_loss_10": 183.31888961791992,
"kl_loss_2": 2207.746258544922,
"kl_loss_3": 1714.543280029297,
"kl_loss_7": 607.3442749023437,
"learning_rate": 3.700478030680987e-05,
"loss": 1181.1754,
"step": 8780
},
{
"ce_loss_10": 3.5762731194496156,
"ce_loss_13": 3.5029969453811645,
"ce_loss_2": 4.536650991439819,
"ce_loss_3": 4.271095442771911,
"ce_loss_7": 3.748454582691193,
"epoch": 0.879,
"grad_norm": 502.0,
"kl_loss_10": 176.4349349975586,
"kl_loss_2": 2141.4830688476563,
"kl_loss_3": 1675.0247314453125,
"kl_loss_7": 590.1913375854492,
"learning_rate": 3.6408072716606344e-05,
"loss": 1149.3131,
"step": 8790
},
{
"ce_loss_10": 3.50073447227478,
"ce_loss_13": 3.4274021863937376,
"ce_loss_2": 4.501238942146301,
"ce_loss_3": 4.239216554164886,
"ce_loss_7": 3.683646392822266,
"epoch": 0.88,
"grad_norm": 600.0,
"kl_loss_10": 180.50948486328124,
"kl_loss_2": 2229.279052734375,
"kl_loss_3": 1758.1964477539063,
"kl_loss_7": 612.8555847167969,
"learning_rate": 3.5816033491963716e-05,
"loss": 1204.1847,
"step": 8800
},
{
"ce_loss_10": 3.3653410911560058,
"ce_loss_13": 3.289612293243408,
"ce_loss_2": 4.399015557765961,
"ce_loss_3": 4.129138934612274,
"ce_loss_7": 3.5484530568122863,
"epoch": 0.881,
"grad_norm": 696.0,
"kl_loss_10": 176.9532043457031,
"kl_loss_2": 2282.183734130859,
"kl_loss_3": 1791.626202392578,
"kl_loss_7": 607.9001495361329,
"learning_rate": 3.522866859471047e-05,
"loss": 1184.3774,
"step": 8810
},
{
"ce_loss_10": 3.597711908817291,
"ce_loss_13": 3.5282416582107543,
"ce_loss_2": 4.506113409996033,
"ce_loss_3": 4.251258683204651,
"ce_loss_7": 3.7641051173210145,
"epoch": 0.882,
"grad_norm": 636.0,
"kl_loss_10": 169.83995971679687,
"kl_loss_2": 2038.54423828125,
"kl_loss_3": 1585.0056030273438,
"kl_loss_7": 568.1876190185546,
"learning_rate": 3.46459839396045e-05,
"loss": 1125.9656,
"step": 8820
},
{
"ce_loss_10": 3.529653000831604,
"ce_loss_13": 3.449263334274292,
"ce_loss_2": 4.503918576240539,
"ce_loss_3": 4.2343867182731625,
"ce_loss_7": 3.712590277194977,
"epoch": 0.883,
"grad_norm": 576.0,
"kl_loss_10": 178.9514488220215,
"kl_loss_2": 2152.299346923828,
"kl_loss_3": 1671.0650329589844,
"kl_loss_7": 603.799105834961,
"learning_rate": 3.406798539427386e-05,
"loss": 1176.0018,
"step": 8830
},
{
"ce_loss_10": 3.5842846632003784,
"ce_loss_13": 3.510246682167053,
"ce_loss_2": 4.541045117378235,
"ce_loss_3": 4.27923276424408,
"ce_loss_7": 3.7581582188606264,
"epoch": 0.884,
"grad_norm": 576.0,
"kl_loss_10": 176.05337142944336,
"kl_loss_2": 2155.0933471679687,
"kl_loss_3": 1681.9300415039063,
"kl_loss_7": 595.8100616455079,
"learning_rate": 3.349467877915746e-05,
"loss": 1155.9855,
"step": 8840
},
{
"ce_loss_10": 3.5404534935951233,
"ce_loss_13": 3.4635657548904417,
"ce_loss_2": 4.524973630905151,
"ce_loss_3": 4.259057784080506,
"ce_loss_7": 3.7238620042800905,
"epoch": 0.885,
"grad_norm": 604.0,
"kl_loss_10": 178.34489822387695,
"kl_loss_2": 2212.789306640625,
"kl_loss_3": 1740.2167602539062,
"kl_loss_7": 609.8538391113282,
"learning_rate": 3.292606986744667e-05,
"loss": 1199.5514,
"step": 8850
},
{
"ce_loss_10": 3.4956598401069643,
"ce_loss_13": 3.4253405332565308,
"ce_loss_2": 4.470083999633789,
"ce_loss_3": 4.206580317020416,
"ce_loss_7": 3.6703789830207825,
"epoch": 0.886,
"grad_norm": 580.0,
"kl_loss_10": 174.30244827270508,
"kl_loss_2": 2159.222479248047,
"kl_loss_3": 1693.7933227539063,
"kl_loss_7": 599.4409484863281,
"learning_rate": 3.23621643850267e-05,
"loss": 1154.7352,
"step": 8860
},
{
"ce_loss_10": 3.5727248191833496,
"ce_loss_13": 3.496046614646912,
"ce_loss_2": 4.52955162525177,
"ce_loss_3": 4.25726010799408,
"ce_loss_7": 3.7465644001960756,
"epoch": 0.887,
"grad_norm": 552.0,
"kl_loss_10": 179.55787811279296,
"kl_loss_2": 2159.213214111328,
"kl_loss_3": 1673.28759765625,
"kl_loss_7": 605.2644119262695,
"learning_rate": 3.180296801041971e-05,
"loss": 1139.4904,
"step": 8870
},
{
"ce_loss_10": 3.595055866241455,
"ce_loss_13": 3.5219205260276794,
"ce_loss_2": 4.565229892730713,
"ce_loss_3": 4.302072286605835,
"ce_loss_7": 3.7657612919807435,
"epoch": 0.888,
"grad_norm": 488.0,
"kl_loss_10": 174.51052551269532,
"kl_loss_2": 2168.450274658203,
"kl_loss_3": 1696.4366394042968,
"kl_loss_7": 592.0915069580078,
"learning_rate": 3.124848637472688e-05,
"loss": 1132.4514,
"step": 8880
},
{
"ce_loss_10": 3.420267331600189,
"ce_loss_13": 3.346735382080078,
"ce_loss_2": 4.411080622673035,
"ce_loss_3": 4.143524849414826,
"ce_loss_7": 3.6017141342163086,
"epoch": 0.889,
"grad_norm": 600.0,
"kl_loss_10": 174.23086242675782,
"kl_loss_2": 2199.3413024902343,
"kl_loss_3": 1717.92958984375,
"kl_loss_7": 596.7423614501953,
"learning_rate": 3.069872506157212e-05,
"loss": 1155.8682,
"step": 8890
},
{
"ce_loss_10": 3.5183400988578795,
"ce_loss_13": 3.4446550846099853,
"ce_loss_2": 4.4763764381408695,
"ce_loss_3": 4.213694953918457,
"ce_loss_7": 3.693737256526947,
"epoch": 0.89,
"grad_norm": 544.0,
"kl_loss_10": 176.23825073242188,
"kl_loss_2": 2152.81796875,
"kl_loss_3": 1686.6082885742187,
"kl_loss_7": 599.2687866210938,
"learning_rate": 3.0153689607045842e-05,
"loss": 1144.8437,
"step": 8900
},
{
"ce_loss_10": 3.4148733854293822,
"ce_loss_13": 3.3367454648017882,
"ce_loss_2": 4.46618926525116,
"ce_loss_3": 4.190043389797211,
"ce_loss_7": 3.606413686275482,
"epoch": 0.891,
"grad_norm": 556.0,
"kl_loss_10": 181.56116943359376,
"kl_loss_2": 2316.6614379882812,
"kl_loss_3": 1823.7576782226563,
"kl_loss_7": 624.2273193359375,
"learning_rate": 2.9613385499648926e-05,
"loss": 1174.4811,
"step": 8910
},
{
"ce_loss_10": 3.472314703464508,
"ce_loss_13": 3.3953917384147645,
"ce_loss_2": 4.439359056949615,
"ce_loss_3": 4.1694392442703245,
"ce_loss_7": 3.6529108047485352,
"epoch": 0.892,
"grad_norm": 632.0,
"kl_loss_10": 176.6066520690918,
"kl_loss_2": 2142.8492736816406,
"kl_loss_3": 1665.7835510253906,
"kl_loss_7": 596.6286926269531,
"learning_rate": 2.9077818180237692e-05,
"loss": 1160.4215,
"step": 8920
},
{
"ce_loss_10": 3.5216124176979067,
"ce_loss_13": 3.444805955886841,
"ce_loss_2": 4.504645991325378,
"ce_loss_3": 4.23671303987503,
"ce_loss_7": 3.703485441207886,
"epoch": 0.893,
"grad_norm": 584.0,
"kl_loss_10": 176.44115447998047,
"kl_loss_2": 2174.382470703125,
"kl_loss_3": 1696.403564453125,
"kl_loss_7": 600.8687164306641,
"learning_rate": 2.8546993041969172e-05,
"loss": 1152.6479,
"step": 8930
},
{
"ce_loss_10": 3.5529621839523315,
"ce_loss_13": 3.4791373729705812,
"ce_loss_2": 4.487787294387817,
"ce_loss_3": 4.227098524570465,
"ce_loss_7": 3.7245055556297304,
"epoch": 0.894,
"grad_norm": 506.0,
"kl_loss_10": 174.32415542602538,
"kl_loss_2": 2110.938995361328,
"kl_loss_3": 1637.950372314453,
"kl_loss_7": 590.5497222900391,
"learning_rate": 2.802091543024671e-05,
"loss": 1153.5114,
"step": 8940
},
{
"ce_loss_10": 3.5515737652778627,
"ce_loss_13": 3.4770275354385376,
"ce_loss_2": 4.526445126533508,
"ce_loss_3": 4.267046928405762,
"ce_loss_7": 3.728025937080383,
"epoch": 0.895,
"grad_norm": 612.0,
"kl_loss_10": 177.65689697265626,
"kl_loss_2": 2195.3055725097656,
"kl_loss_3": 1727.4093139648437,
"kl_loss_7": 604.4041717529296,
"learning_rate": 2.7499590642665774e-05,
"loss": 1190.9908,
"step": 8950
},
{
"ce_loss_10": 3.5625943899154664,
"ce_loss_13": 3.4850521326065063,
"ce_loss_2": 4.553565168380738,
"ce_loss_3": 4.279750061035156,
"ce_loss_7": 3.739047312736511,
"epoch": 0.896,
"grad_norm": 512.0,
"kl_loss_10": 178.24014129638672,
"kl_loss_2": 2193.0329833984374,
"kl_loss_3": 1709.0227905273437,
"kl_loss_7": 602.5221313476562,
"learning_rate": 2.6983023928961405e-05,
"loss": 1147.626,
"step": 8960
},
{
"ce_loss_10": 3.532795751094818,
"ce_loss_13": 3.4568071961402893,
"ce_loss_2": 4.499462056159973,
"ce_loss_3": 4.242076885700226,
"ce_loss_7": 3.709202516078949,
"epoch": 0.897,
"grad_norm": 616.0,
"kl_loss_10": 177.7622848510742,
"kl_loss_2": 2147.4717651367187,
"kl_loss_3": 1687.235498046875,
"kl_loss_7": 597.6563537597656,
"learning_rate": 2.6471220490954628e-05,
"loss": 1172.1214,
"step": 8970
},
{
"ce_loss_10": 3.5174603939056395,
"ce_loss_13": 3.4463194727897646,
"ce_loss_2": 4.477302503585816,
"ce_loss_3": 4.214387357234955,
"ce_loss_7": 3.6873306155204775,
"epoch": 0.898,
"grad_norm": 592.0,
"kl_loss_10": 174.26932220458986,
"kl_loss_2": 2152.279821777344,
"kl_loss_3": 1683.4832763671875,
"kl_loss_7": 590.3468292236328,
"learning_rate": 2.596418548250029e-05,
"loss": 1156.035,
"step": 8980
},
{
"ce_loss_10": 3.5602595686912535,
"ce_loss_13": 3.485771131515503,
"ce_loss_2": 4.5211225032806395,
"ce_loss_3": 4.257622516155243,
"ce_loss_7": 3.7368709683418273,
"epoch": 0.899,
"grad_norm": 524.0,
"kl_loss_10": 179.2437530517578,
"kl_loss_2": 2158.524066162109,
"kl_loss_3": 1691.2223327636718,
"kl_loss_7": 601.7962463378906,
"learning_rate": 2.5461924009435368e-05,
"loss": 1142.976,
"step": 8990
},
{
"ce_loss_10": 3.5547463297843933,
"ce_loss_13": 3.479547905921936,
"ce_loss_2": 4.515283250808716,
"ce_loss_3": 4.250737547874451,
"ce_loss_7": 3.732912743091583,
"epoch": 0.9,
"grad_norm": 572.0,
"kl_loss_10": 177.4375427246094,
"kl_loss_2": 2139.4888916015625,
"kl_loss_3": 1664.3239318847657,
"kl_loss_7": 599.9839569091797,
"learning_rate": 2.4964441129527336e-05,
"loss": 1166.3201,
"step": 9000
},
{
"ce_loss_10": 3.553958511352539,
"ce_loss_13": 3.476763606071472,
"ce_loss_2": 4.496755647659302,
"ce_loss_3": 4.227215158939361,
"ce_loss_7": 3.7206253528594972,
"epoch": 0.901,
"grad_norm": 540.0,
"kl_loss_10": 174.2966407775879,
"kl_loss_2": 2111.5487548828123,
"kl_loss_3": 1639.27841796875,
"kl_loss_7": 584.4350708007812,
"learning_rate": 2.4471741852423235e-05,
"loss": 1125.0274,
"step": 9010
},
{
"ce_loss_10": 3.600440430641174,
"ce_loss_13": 3.522721529006958,
"ce_loss_2": 4.542108774185181,
"ce_loss_3": 4.282799339294433,
"ce_loss_7": 3.7742578268051146,
"epoch": 0.902,
"grad_norm": 524.0,
"kl_loss_10": 175.27555160522462,
"kl_loss_2": 2098.4271484375,
"kl_loss_3": 1637.176934814453,
"kl_loss_7": 587.0385147094727,
"learning_rate": 2.3983831139599287e-05,
"loss": 1139.7687,
"step": 9020
},
{
"ce_loss_10": 3.519875633716583,
"ce_loss_13": 3.446099603176117,
"ce_loss_2": 4.47457070350647,
"ce_loss_3": 4.212863862514496,
"ce_loss_7": 3.68597651720047,
"epoch": 0.903,
"grad_norm": 508.0,
"kl_loss_10": 174.20441055297852,
"kl_loss_2": 2129.7295166015624,
"kl_loss_3": 1661.6650817871093,
"kl_loss_7": 579.8323806762695,
"learning_rate": 2.3500713904311022e-05,
"loss": 1116.7298,
"step": 9030
},
{
"ce_loss_10": 3.5635103940963746,
"ce_loss_13": 3.4901776790618895,
"ce_loss_2": 4.492858815193176,
"ce_loss_3": 4.233377468585968,
"ce_loss_7": 3.7266834378242493,
"epoch": 0.904,
"grad_norm": 568.0,
"kl_loss_10": 172.6203300476074,
"kl_loss_2": 2067.9869079589844,
"kl_loss_3": 1612.4867309570313,
"kl_loss_7": 575.5206619262696,
"learning_rate": 2.3022395011543685e-05,
"loss": 1119.9885,
"step": 9040
},
{
"ce_loss_10": 3.592438757419586,
"ce_loss_13": 3.515104389190674,
"ce_loss_2": 4.541802954673767,
"ce_loss_3": 4.281132400035858,
"ce_loss_7": 3.7722238898277283,
"epoch": 0.905,
"grad_norm": 572.0,
"kl_loss_10": 180.47207794189453,
"kl_loss_2": 2144.9797241210936,
"kl_loss_3": 1672.9977722167969,
"kl_loss_7": 611.6667938232422,
"learning_rate": 2.2548879277963063e-05,
"loss": 1176.2332,
"step": 9050
},
{
"ce_loss_10": 3.5052724361419676,
"ce_loss_13": 3.43240772485733,
"ce_loss_2": 4.459889388084411,
"ce_loss_3": 4.187293374538422,
"ce_loss_7": 3.677665722370148,
"epoch": 0.906,
"grad_norm": 536.0,
"kl_loss_10": 175.61516647338868,
"kl_loss_2": 2136.0928955078125,
"kl_loss_3": 1652.1388549804688,
"kl_loss_7": 587.7709274291992,
"learning_rate": 2.208017147186736e-05,
"loss": 1112.7982,
"step": 9060
},
{
"ce_loss_10": 3.5033626675605776,
"ce_loss_13": 3.4270050883293153,
"ce_loss_2": 4.460723853111267,
"ce_loss_3": 4.200226056575775,
"ce_loss_7": 3.675853359699249,
"epoch": 0.907,
"grad_norm": 532.0,
"kl_loss_10": 175.80412521362305,
"kl_loss_2": 2135.014111328125,
"kl_loss_3": 1673.9324096679688,
"kl_loss_7": 594.3165740966797,
"learning_rate": 2.1616276313139227e-05,
"loss": 1130.9125,
"step": 9070
},
{
"ce_loss_10": 3.540289318561554,
"ce_loss_13": 3.4620949029922485,
"ce_loss_2": 4.504480719566345,
"ce_loss_3": 4.243532609939575,
"ce_loss_7": 3.7148184418678283,
"epoch": 0.908,
"grad_norm": 564.0,
"kl_loss_10": 176.60092849731444,
"kl_loss_2": 2138.388262939453,
"kl_loss_3": 1670.2328552246095,
"kl_loss_7": 593.4554962158203,
"learning_rate": 2.1157198473197415e-05,
"loss": 1155.7779,
"step": 9080
},
{
"ce_loss_10": 3.608424699306488,
"ce_loss_13": 3.5318522691726684,
"ce_loss_2": 4.569310665130615,
"ce_loss_3": 4.307307338714599,
"ce_loss_7": 3.7887983441352846,
"epoch": 0.909,
"grad_norm": 532.0,
"kl_loss_10": 179.34665451049804,
"kl_loss_2": 2147.910009765625,
"kl_loss_3": 1676.3675476074218,
"kl_loss_7": 609.1422882080078,
"learning_rate": 2.0702942574950812e-05,
"loss": 1150.5193,
"step": 9090
},
{
"ce_loss_10": 3.531160354614258,
"ce_loss_13": 3.4534537196159363,
"ce_loss_2": 4.502471184730529,
"ce_loss_3": 4.2366371870040895,
"ce_loss_7": 3.7111218690872194,
"epoch": 0.91,
"grad_norm": 576.0,
"kl_loss_10": 178.72406845092775,
"kl_loss_2": 2166.001983642578,
"kl_loss_3": 1694.5049133300781,
"kl_loss_7": 603.0742904663086,
"learning_rate": 2.025351319275137e-05,
"loss": 1154.2008,
"step": 9100
},
{
"ce_loss_10": 3.657759261131287,
"ce_loss_13": 3.5795228123664855,
"ce_loss_2": 4.611237382888794,
"ce_loss_3": 4.346826362609863,
"ce_loss_7": 3.829502213001251,
"epoch": 0.911,
"grad_norm": 568.0,
"kl_loss_10": 182.78317489624024,
"kl_loss_2": 2152.8356689453126,
"kl_loss_3": 1681.0592834472657,
"kl_loss_7": 612.198373413086,
"learning_rate": 1.9808914852347816e-05,
"loss": 1183.935,
"step": 9110
},
{
"ce_loss_10": 3.5076343536376955,
"ce_loss_13": 3.4301217675209044,
"ce_loss_2": 4.475468993186951,
"ce_loss_3": 4.200416827201844,
"ce_loss_7": 3.690832221508026,
"epoch": 0.912,
"grad_norm": 520.0,
"kl_loss_10": 177.99471740722657,
"kl_loss_2": 2138.6880798339844,
"kl_loss_3": 1648.8575317382813,
"kl_loss_7": 603.4748657226562,
"learning_rate": 1.9369152030840554e-05,
"loss": 1133.9587,
"step": 9120
},
{
"ce_loss_10": 3.5838579297065736,
"ce_loss_13": 3.5108367919921877,
"ce_loss_2": 4.5474550247192385,
"ce_loss_3": 4.282457900047302,
"ce_loss_7": 3.752550458908081,
"epoch": 0.913,
"grad_norm": 592.0,
"kl_loss_10": 175.99298171997071,
"kl_loss_2": 2175.4973083496093,
"kl_loss_3": 1708.1368530273437,
"kl_loss_7": 595.3381591796875,
"learning_rate": 1.893422915663645e-05,
"loss": 1154.3063,
"step": 9130
},
{
"ce_loss_10": 3.4526755094528196,
"ce_loss_13": 3.376223611831665,
"ce_loss_2": 4.463929057121277,
"ce_loss_3": 4.188671815395355,
"ce_loss_7": 3.6436040878295897,
"epoch": 0.914,
"grad_norm": 528.0,
"kl_loss_10": 178.96754608154296,
"kl_loss_2": 2226.8522216796873,
"kl_loss_3": 1741.089337158203,
"kl_loss_7": 614.4321655273437,
"learning_rate": 1.850415060940386e-05,
"loss": 1177.2793,
"step": 9140
},
{
"ce_loss_10": 3.577260196208954,
"ce_loss_13": 3.4996687054634092,
"ce_loss_2": 4.506159293651581,
"ce_loss_3": 4.247144281864166,
"ce_loss_7": 3.7475706696510316,
"epoch": 0.915,
"grad_norm": 576.0,
"kl_loss_10": 176.05650253295897,
"kl_loss_2": 2092.3647644042967,
"kl_loss_3": 1626.7418640136718,
"kl_loss_7": 590.4974914550781,
"learning_rate": 1.8078920720028978e-05,
"loss": 1136.8029,
"step": 9150
},
{
"ce_loss_10": 3.5006513595581055,
"ce_loss_13": 3.4293729782104494,
"ce_loss_2": 4.446831393241882,
"ce_loss_3": 4.180594873428345,
"ce_loss_7": 3.670674538612366,
"epoch": 0.916,
"grad_norm": 584.0,
"kl_loss_10": 173.59793243408203,
"kl_loss_2": 2105.566021728516,
"kl_loss_3": 1637.2901428222656,
"kl_loss_7": 585.1143585205078,
"learning_rate": 1.765854377057219e-05,
"loss": 1156.8438,
"step": 9160
},
{
"ce_loss_10": 3.4831743359565737,
"ce_loss_13": 3.410732936859131,
"ce_loss_2": 4.439985752105713,
"ce_loss_3": 4.173957622051239,
"ce_loss_7": 3.652401328086853,
"epoch": 0.917,
"grad_norm": 552.0,
"kl_loss_10": 172.13598022460937,
"kl_loss_2": 2136.6128540039062,
"kl_loss_3": 1663.3346557617188,
"kl_loss_7": 585.5476837158203,
"learning_rate": 1.724302399422456e-05,
"loss": 1148.3008,
"step": 9170
},
{
"ce_loss_10": 3.4418306827545164,
"ce_loss_13": 3.365187871456146,
"ce_loss_2": 4.424201607704163,
"ce_loss_3": 4.15188490152359,
"ce_loss_7": 3.617412793636322,
"epoch": 0.918,
"grad_norm": 540.0,
"kl_loss_10": 181.21381607055665,
"kl_loss_2": 2192.6884948730467,
"kl_loss_3": 1711.2826416015625,
"kl_loss_7": 608.8610565185547,
"learning_rate": 1.683236557526574e-05,
"loss": 1171.7086,
"step": 9180
},
{
"ce_loss_10": 3.5525336861610413,
"ce_loss_13": 3.479281461238861,
"ce_loss_2": 4.4706899404525755,
"ce_loss_3": 4.209463405609131,
"ce_loss_7": 3.7199216723442077,
"epoch": 0.919,
"grad_norm": 552.0,
"kl_loss_10": 172.36394195556642,
"kl_loss_2": 2051.388214111328,
"kl_loss_3": 1592.7936218261718,
"kl_loss_7": 577.4165252685547,
"learning_rate": 1.6426572649021475e-05,
"loss": 1138.484,
"step": 9190
},
{
"ce_loss_10": 3.5873886704444886,
"ce_loss_13": 3.515676808357239,
"ce_loss_2": 4.504871940612793,
"ce_loss_3": 4.245928645133972,
"ce_loss_7": 3.7530444860458374,
"epoch": 0.92,
"grad_norm": 560.0,
"kl_loss_10": 175.5730728149414,
"kl_loss_2": 2067.4602783203127,
"kl_loss_3": 1611.9698425292968,
"kl_loss_7": 583.8149566650391,
"learning_rate": 1.6025649301821876e-05,
"loss": 1125.4826,
"step": 9200
},
{
"ce_loss_10": 3.579323208332062,
"ce_loss_13": 3.50703387260437,
"ce_loss_2": 4.493245768547058,
"ce_loss_3": 4.232775616645813,
"ce_loss_7": 3.7471976399421694,
"epoch": 0.921,
"grad_norm": 620.0,
"kl_loss_10": 177.73348236083984,
"kl_loss_2": 2084.493505859375,
"kl_loss_3": 1628.828369140625,
"kl_loss_7": 594.5904907226562,
"learning_rate": 1.5629599570960716e-05,
"loss": 1123.5703,
"step": 9210
},
{
"ce_loss_10": 3.482688879966736,
"ce_loss_13": 3.4090826153755187,
"ce_loss_2": 4.466846561431884,
"ce_loss_3": 4.195373678207398,
"ce_loss_7": 3.658596193790436,
"epoch": 0.922,
"grad_norm": 588.0,
"kl_loss_10": 175.80986251831055,
"kl_loss_2": 2196.6754943847654,
"kl_loss_3": 1711.6648010253907,
"kl_loss_7": 598.8134338378907,
"learning_rate": 1.5238427444654367e-05,
"loss": 1155.2326,
"step": 9220
},
{
"ce_loss_10": 3.543232810497284,
"ce_loss_13": 3.467606770992279,
"ce_loss_2": 4.491106653213501,
"ce_loss_3": 4.219079720973968,
"ce_loss_7": 3.710583233833313,
"epoch": 0.923,
"grad_norm": 548.0,
"kl_loss_10": 174.20623474121095,
"kl_loss_2": 2120.3849365234373,
"kl_loss_3": 1639.1658142089843,
"kl_loss_7": 584.3361358642578,
"learning_rate": 1.4852136862001764e-05,
"loss": 1130.8816,
"step": 9230
},
{
"ce_loss_10": 3.5088143348693848,
"ce_loss_13": 3.435594344139099,
"ce_loss_2": 4.446617817878723,
"ce_loss_3": 4.185671401023865,
"ce_loss_7": 3.681156051158905,
"epoch": 0.924,
"grad_norm": 584.0,
"kl_loss_10": 172.72551879882812,
"kl_loss_2": 2097.9775573730467,
"kl_loss_3": 1637.147705078125,
"kl_loss_7": 588.3473190307617,
"learning_rate": 1.4470731712944884e-05,
"loss": 1146.0963,
"step": 9240
},
{
"ce_loss_10": 3.5362769246101378,
"ce_loss_13": 3.461189365386963,
"ce_loss_2": 4.501055717468262,
"ce_loss_3": 4.224046432971955,
"ce_loss_7": 3.714902651309967,
"epoch": 0.925,
"grad_norm": 548.0,
"kl_loss_10": 178.0212043762207,
"kl_loss_2": 2145.880969238281,
"kl_loss_3": 1660.7241577148438,
"kl_loss_7": 597.9191680908203,
"learning_rate": 1.4094215838229174e-05,
"loss": 1173.0712,
"step": 9250
},
{
"ce_loss_10": 3.498918581008911,
"ce_loss_13": 3.4251022219657896,
"ce_loss_2": 4.4930708646774296,
"ce_loss_3": 4.217134141921997,
"ce_loss_7": 3.675217390060425,
"epoch": 0.926,
"grad_norm": 628.0,
"kl_loss_10": 177.476513671875,
"kl_loss_2": 2207.0631896972654,
"kl_loss_3": 1717.4895629882812,
"kl_loss_7": 603.3580856323242,
"learning_rate": 1.372259302936546e-05,
"loss": 1205.574,
"step": 9260
},
{
"ce_loss_10": 3.6163718700408936,
"ce_loss_13": 3.537315881252289,
"ce_loss_2": 4.571975326538086,
"ce_loss_3": 4.302231848239899,
"ce_loss_7": 3.783820962905884,
"epoch": 0.927,
"grad_norm": 576.0,
"kl_loss_10": 181.85763092041014,
"kl_loss_2": 2140.0186767578125,
"kl_loss_3": 1662.9603210449218,
"kl_loss_7": 600.1376983642579,
"learning_rate": 1.3355867028591206e-05,
"loss": 1136.2721,
"step": 9270
},
{
"ce_loss_10": 3.514796030521393,
"ce_loss_13": 3.440743112564087,
"ce_loss_2": 4.449939727783203,
"ce_loss_3": 4.1810842752456665,
"ce_loss_7": 3.683949875831604,
"epoch": 0.928,
"grad_norm": 564.0,
"kl_loss_10": 175.13030853271485,
"kl_loss_2": 2107.975048828125,
"kl_loss_3": 1633.0731262207032,
"kl_loss_7": 589.7343017578125,
"learning_rate": 1.2994041528833267e-05,
"loss": 1127.6617,
"step": 9280
},
{
"ce_loss_10": 3.5171499490737914,
"ce_loss_13": 3.440678071975708,
"ce_loss_2": 4.470655179023742,
"ce_loss_3": 4.200739192962646,
"ce_loss_7": 3.690466821193695,
"epoch": 0.929,
"grad_norm": 584.0,
"kl_loss_10": 174.82103958129883,
"kl_loss_2": 2150.8757751464846,
"kl_loss_3": 1677.1400146484375,
"kl_loss_7": 592.8601806640625,
"learning_rate": 1.2637120173670358e-05,
"loss": 1145.5388,
"step": 9290
},
{
"ce_loss_10": 3.5360547065734864,
"ce_loss_13": 3.459415102005005,
"ce_loss_2": 4.509140729904175,
"ce_loss_3": 4.243091595172882,
"ce_loss_7": 3.717022383213043,
"epoch": 0.93,
"grad_norm": 616.0,
"kl_loss_10": 177.1988067626953,
"kl_loss_2": 2160.6812377929687,
"kl_loss_3": 1681.2325256347656,
"kl_loss_7": 601.0645294189453,
"learning_rate": 1.2285106557296478e-05,
"loss": 1155.5311,
"step": 9300
},
{
"ce_loss_10": 3.4133058071136473,
"ce_loss_13": 3.340100085735321,
"ce_loss_2": 4.45502986907959,
"ce_loss_3": 4.176646625995636,
"ce_loss_7": 3.593162167072296,
"epoch": 0.931,
"grad_norm": 684.0,
"kl_loss_10": 177.049959564209,
"kl_loss_2": 2280.7558776855467,
"kl_loss_3": 1784.5312927246093,
"kl_loss_7": 606.032177734375,
"learning_rate": 1.1938004224484989e-05,
"loss": 1177.6771,
"step": 9310
},
{
"ce_loss_10": 3.6532346606254578,
"ce_loss_13": 3.5762033224105836,
"ce_loss_2": 4.59397509098053,
"ce_loss_3": 4.32731124162674,
"ce_loss_7": 3.8223699569702148,
"epoch": 0.932,
"grad_norm": 552.0,
"kl_loss_10": 179.57099151611328,
"kl_loss_2": 2131.067413330078,
"kl_loss_3": 1656.3818481445312,
"kl_loss_7": 601.6865631103516,
"learning_rate": 1.1595816670552429e-05,
"loss": 1167.2541,
"step": 9320
},
{
"ce_loss_10": 3.582180309295654,
"ce_loss_13": 3.5061400294303895,
"ce_loss_2": 4.524780786037445,
"ce_loss_3": 4.259365129470825,
"ce_loss_7": 3.747998225688934,
"epoch": 0.933,
"grad_norm": 568.0,
"kl_loss_10": 175.25698852539062,
"kl_loss_2": 2109.6074279785157,
"kl_loss_3": 1638.4892211914062,
"kl_loss_7": 583.7247375488281,
"learning_rate": 1.1258547341323699e-05,
"loss": 1126.3885,
"step": 9330
},
{
"ce_loss_10": 3.6068438053131104,
"ce_loss_13": 3.5321076273918153,
"ce_loss_2": 4.546216082572937,
"ce_loss_3": 4.2822174549102785,
"ce_loss_7": 3.7781530022621155,
"epoch": 0.934,
"grad_norm": 584.0,
"kl_loss_10": 177.80956649780273,
"kl_loss_2": 2142.086737060547,
"kl_loss_3": 1668.28828125,
"kl_loss_7": 595.8422210693359,
"learning_rate": 1.0926199633097156e-05,
"loss": 1139.4527,
"step": 9340
},
{
"ce_loss_10": 3.610630822181702,
"ce_loss_13": 3.540286922454834,
"ce_loss_2": 4.522486686706543,
"ce_loss_3": 4.263027024269104,
"ce_loss_7": 3.7751463413238526,
"epoch": 0.935,
"grad_norm": 568.0,
"kl_loss_10": 172.0374610900879,
"kl_loss_2": 2078.3720947265624,
"kl_loss_3": 1611.571875,
"kl_loss_7": 582.5087860107421,
"learning_rate": 1.0598776892610684e-05,
"loss": 1147.9141,
"step": 9350
},
{
"ce_loss_10": 3.4225520491600037,
"ce_loss_13": 3.350025403499603,
"ce_loss_2": 4.415711855888366,
"ce_loss_3": 4.137125706672668,
"ce_loss_7": 3.5997640252113343,
"epoch": 0.936,
"grad_norm": 552.0,
"kl_loss_10": 173.3252960205078,
"kl_loss_2": 2195.4003051757813,
"kl_loss_3": 1704.4373046875,
"kl_loss_7": 593.9061553955078,
"learning_rate": 1.0276282417007399e-05,
"loss": 1147.3932,
"step": 9360
},
{
"ce_loss_10": 3.581203269958496,
"ce_loss_13": 3.5097854137420654,
"ce_loss_2": 4.501006007194519,
"ce_loss_3": 4.243209981918335,
"ce_loss_7": 3.747337484359741,
"epoch": 0.937,
"grad_norm": 596.0,
"kl_loss_10": 171.74332122802736,
"kl_loss_2": 2068.851727294922,
"kl_loss_3": 1615.05986328125,
"kl_loss_7": 580.8333526611328,
"learning_rate": 9.958719453803277e-06,
"loss": 1127.7196,
"step": 9370
},
{
"ce_loss_10": 3.578359854221344,
"ce_loss_13": 3.504493975639343,
"ce_loss_2": 4.52911410331726,
"ce_loss_3": 4.265734839439392,
"ce_loss_7": 3.757120943069458,
"epoch": 0.938,
"grad_norm": 568.0,
"kl_loss_10": 176.9942184448242,
"kl_loss_2": 2134.3438110351562,
"kl_loss_3": 1664.9087463378905,
"kl_loss_7": 601.7982177734375,
"learning_rate": 9.646091200853802e-06,
"loss": 1132.6439,
"step": 9380
},
{
"ce_loss_10": 3.5366848587989805,
"ce_loss_13": 3.4644816398620604,
"ce_loss_2": 4.483027625083923,
"ce_loss_3": 4.215219330787659,
"ce_loss_7": 3.704088735580444,
"epoch": 0.939,
"grad_norm": 536.0,
"kl_loss_10": 172.74244918823243,
"kl_loss_2": 2099.378448486328,
"kl_loss_3": 1622.1510986328126,
"kl_loss_7": 583.5708526611328,
"learning_rate": 9.338400806321978e-06,
"loss": 1100.1545,
"step": 9390
},
{
"ce_loss_10": 3.571504032611847,
"ce_loss_13": 3.493305134773254,
"ce_loss_2": 4.510696125030518,
"ce_loss_3": 4.24888288974762,
"ce_loss_7": 3.742924678325653,
"epoch": 0.94,
"grad_norm": 516.0,
"kl_loss_10": 177.7894515991211,
"kl_loss_2": 2105.6502197265627,
"kl_loss_3": 1641.3450012207031,
"kl_loss_7": 590.5869018554688,
"learning_rate": 9.035651368646646e-06,
"loss": 1131.6762,
"step": 9400
},
{
"ce_loss_10": 3.572381889820099,
"ce_loss_13": 3.5001948475837708,
"ce_loss_2": 4.5045966625213625,
"ce_loss_3": 4.241596531867981,
"ce_loss_7": 3.742561626434326,
"epoch": 0.941,
"grad_norm": 612.0,
"kl_loss_10": 173.46388778686523,
"kl_loss_2": 2088.4161376953125,
"kl_loss_3": 1622.1473693847656,
"kl_loss_7": 583.22626953125,
"learning_rate": 8.737845936511335e-06,
"loss": 1133.2381,
"step": 9410
},
{
"ce_loss_10": 3.522591459751129,
"ce_loss_13": 3.446700024604797,
"ce_loss_2": 4.507854294776917,
"ce_loss_3": 4.236737239360809,
"ce_loss_7": 3.698591649532318,
"epoch": 0.942,
"grad_norm": 572.0,
"kl_loss_10": 178.92413635253905,
"kl_loss_2": 2186.715954589844,
"kl_loss_3": 1705.135888671875,
"kl_loss_7": 600.7923004150391,
"learning_rate": 8.444987508813451e-06,
"loss": 1149.7434,
"step": 9420
},
{
"ce_loss_10": 3.475817048549652,
"ce_loss_13": 3.3989389657974245,
"ce_loss_2": 4.473474383354187,
"ce_loss_3": 4.199087584018708,
"ce_loss_7": 3.6562391996383665,
"epoch": 0.943,
"grad_norm": 628.0,
"kl_loss_10": 179.55375061035156,
"kl_loss_2": 2263.5999450683594,
"kl_loss_3": 1768.8511352539062,
"kl_loss_7": 615.3503479003906,
"learning_rate": 8.157079034633974e-06,
"loss": 1178.9379,
"step": 9430
},
{
"ce_loss_10": 3.473416876792908,
"ce_loss_13": 3.40051189661026,
"ce_loss_2": 4.446794199943542,
"ce_loss_3": 4.178017342090607,
"ce_loss_7": 3.6506085276603697,
"epoch": 0.944,
"grad_norm": 552.0,
"kl_loss_10": 174.8175079345703,
"kl_loss_2": 2187.4151916503906,
"kl_loss_3": 1711.5777709960937,
"kl_loss_7": 600.643310546875,
"learning_rate": 7.874123413208145e-06,
"loss": 1147.171,
"step": 9440
},
{
"ce_loss_10": 3.445332610607147,
"ce_loss_13": 3.369913935661316,
"ce_loss_2": 4.434185910224914,
"ce_loss_3": 4.161804282665253,
"ce_loss_7": 3.6255853891372682,
"epoch": 0.945,
"grad_norm": 572.0,
"kl_loss_10": 175.52419662475586,
"kl_loss_2": 2184.3239013671873,
"kl_loss_3": 1698.89501953125,
"kl_loss_7": 598.6286346435547,
"learning_rate": 7.59612349389599e-06,
"loss": 1155.226,
"step": 9450
},
{
"ce_loss_10": 3.534553039073944,
"ce_loss_13": 3.462403440475464,
"ce_loss_2": 4.465369653701782,
"ce_loss_3": 4.198646211624146,
"ce_loss_7": 3.708206284046173,
"epoch": 0.946,
"grad_norm": 580.0,
"kl_loss_10": 172.39571685791014,
"kl_loss_2": 2074.147509765625,
"kl_loss_3": 1600.0551879882812,
"kl_loss_7": 581.5289535522461,
"learning_rate": 7.323082076153509e-06,
"loss": 1126.5964,
"step": 9460
},
{
"ce_loss_10": 3.576580452919006,
"ce_loss_13": 3.503724229335785,
"ce_loss_2": 4.510397839546203,
"ce_loss_3": 4.244443106651306,
"ce_loss_7": 3.7473479986190794,
"epoch": 0.947,
"grad_norm": 572.0,
"kl_loss_10": 179.0567657470703,
"kl_loss_2": 2087.8162841796875,
"kl_loss_3": 1617.9851928710937,
"kl_loss_7": 593.7520263671875,
"learning_rate": 7.055001909504755e-06,
"loss": 1153.5377,
"step": 9470
},
{
"ce_loss_10": 3.6078381657600405,
"ce_loss_13": 3.5327057957649233,
"ce_loss_2": 4.550836896896362,
"ce_loss_3": 4.287693047523499,
"ce_loss_7": 3.7842918038368225,
"epoch": 0.948,
"grad_norm": 616.0,
"kl_loss_10": 177.3563217163086,
"kl_loss_2": 2119.7028747558593,
"kl_loss_3": 1649.5453063964844,
"kl_loss_7": 593.8896911621093,
"learning_rate": 6.791885693514133e-06,
"loss": 1138.076,
"step": 9480
},
{
"ce_loss_10": 3.5228418946266173,
"ce_loss_13": 3.4471074819564818,
"ce_loss_2": 4.505965852737427,
"ce_loss_3": 4.226520001888275,
"ce_loss_7": 3.6976125478744506,
"epoch": 0.949,
"grad_norm": 544.0,
"kl_loss_10": 179.4360237121582,
"kl_loss_2": 2204.2863159179688,
"kl_loss_3": 1709.0836547851563,
"kl_loss_7": 603.0492492675781,
"learning_rate": 6.533736077758867e-06,
"loss": 1164.073,
"step": 9490
},
{
"ce_loss_10": 3.480616366863251,
"ce_loss_13": 3.4073901891708376,
"ce_loss_2": 4.4987491250038145,
"ce_loss_3": 4.22996586561203,
"ce_loss_7": 3.6651357769966126,
"epoch": 0.95,
"grad_norm": 596.0,
"kl_loss_10": 179.7611167907715,
"kl_loss_2": 2254.7767639160156,
"kl_loss_3": 1772.1949829101563,
"kl_loss_7": 613.6569396972657,
"learning_rate": 6.2805556618028556e-06,
"loss": 1174.4523,
"step": 9500
},
{
"ce_loss_10": 3.5753507733345034,
"ce_loss_13": 3.5015788078308105,
"ce_loss_2": 4.508717775344849,
"ce_loss_3": 4.236187517642975,
"ce_loss_7": 3.735841393470764,
"epoch": 0.951,
"grad_norm": 600.0,
"kl_loss_10": 171.3624740600586,
"kl_loss_2": 2070.820721435547,
"kl_loss_3": 1594.0807983398438,
"kl_loss_7": 569.2584503173828,
"learning_rate": 6.032346995169968e-06,
"loss": 1091.2504,
"step": 9510
},
{
"ce_loss_10": 3.5802130341529845,
"ce_loss_13": 3.505545949935913,
"ce_loss_2": 4.52126247882843,
"ce_loss_3": 4.2531510353088375,
"ce_loss_7": 3.7508664727211,
"epoch": 0.952,
"grad_norm": 572.0,
"kl_loss_10": 175.58048248291016,
"kl_loss_2": 2115.77294921875,
"kl_loss_3": 1640.845928955078,
"kl_loss_7": 590.4191436767578,
"learning_rate": 5.789112577318789e-06,
"loss": 1125.406,
"step": 9520
},
{
"ce_loss_10": 3.5525818467140198,
"ce_loss_13": 3.474942719936371,
"ce_loss_2": 4.529764556884766,
"ce_loss_3": 4.262361979484558,
"ce_loss_7": 3.725843298435211,
"epoch": 0.953,
"grad_norm": 560.0,
"kl_loss_10": 178.63978881835936,
"kl_loss_2": 2187.347344970703,
"kl_loss_3": 1713.5655639648437,
"kl_loss_7": 604.028662109375,
"learning_rate": 5.550854857617194e-06,
"loss": 1138.3246,
"step": 9530
},
{
"ce_loss_10": 3.5410927653312685,
"ce_loss_13": 3.464462494850159,
"ce_loss_2": 4.532546710968018,
"ce_loss_3": 4.2611222743988035,
"ce_loss_7": 3.717101490497589,
"epoch": 0.954,
"grad_norm": 596.0,
"kl_loss_10": 179.71324462890624,
"kl_loss_2": 2218.49970703125,
"kl_loss_3": 1729.5723754882813,
"kl_loss_7": 606.5572113037109,
"learning_rate": 5.317576235317756e-06,
"loss": 1164.7152,
"step": 9540
},
{
"ce_loss_10": 3.567497718334198,
"ce_loss_13": 3.4959131717681884,
"ce_loss_2": 4.49013991355896,
"ce_loss_3": 4.226358330249786,
"ce_loss_7": 3.7339030742645263,
"epoch": 0.955,
"grad_norm": 580.0,
"kl_loss_10": 171.57061843872071,
"kl_loss_2": 2045.03349609375,
"kl_loss_3": 1581.6937194824218,
"kl_loss_7": 573.2978149414063,
"learning_rate": 5.089279059533658e-06,
"loss": 1144.5578,
"step": 9550
},
{
"ce_loss_10": 3.6264307737350463,
"ce_loss_13": 3.549366092681885,
"ce_loss_2": 4.555849361419678,
"ce_loss_3": 4.291936588287354,
"ce_loss_7": 3.796077787876129,
"epoch": 0.956,
"grad_norm": 532.0,
"kl_loss_10": 180.97408142089844,
"kl_loss_2": 2101.3386962890627,
"kl_loss_3": 1636.1171264648438,
"kl_loss_7": 603.2738311767578,
"learning_rate": 4.865965629214819e-06,
"loss": 1128.9252,
"step": 9560
},
{
"ce_loss_10": 3.5740526914596558,
"ce_loss_13": 3.4976862549781798,
"ce_loss_2": 4.531564974784851,
"ce_loss_3": 4.273219418525696,
"ce_loss_7": 3.7471871614456176,
"epoch": 0.957,
"grad_norm": 496.0,
"kl_loss_10": 178.81691131591796,
"kl_loss_2": 2162.70166015625,
"kl_loss_3": 1696.7497314453126,
"kl_loss_7": 603.7053924560547,
"learning_rate": 4.6476381931251366e-06,
"loss": 1126.9263,
"step": 9570
},
{
"ce_loss_10": 3.5494153618812563,
"ce_loss_13": 3.475415658950806,
"ce_loss_2": 4.496997284889221,
"ce_loss_3": 4.230398142337799,
"ce_loss_7": 3.728412318229675,
"epoch": 0.958,
"grad_norm": 496.0,
"kl_loss_10": 176.03445205688476,
"kl_loss_2": 2117.532794189453,
"kl_loss_3": 1643.0480895996093,
"kl_loss_7": 594.0362182617188,
"learning_rate": 4.434298949819449e-06,
"loss": 1135.89,
"step": 9580
},
{
"ce_loss_10": 3.5075241327285767,
"ce_loss_13": 3.4301467418670653,
"ce_loss_2": 4.514026093482971,
"ce_loss_3": 4.236743009090423,
"ce_loss_7": 3.6883852958679197,
"epoch": 0.959,
"grad_norm": 584.0,
"kl_loss_10": 182.3026496887207,
"kl_loss_2": 2271.594354248047,
"kl_loss_3": 1773.125311279297,
"kl_loss_7": 624.3575988769531,
"learning_rate": 4.2259500476214406e-06,
"loss": 1183.4918,
"step": 9590
},
{
"ce_loss_10": 3.491668391227722,
"ce_loss_13": 3.4157654523849486,
"ce_loss_2": 4.465724205970764,
"ce_loss_3": 4.201405656337738,
"ce_loss_7": 3.665540862083435,
"epoch": 0.96,
"grad_norm": 556.0,
"kl_loss_10": 177.03348541259766,
"kl_loss_2": 2184.085583496094,
"kl_loss_3": 1717.5306762695313,
"kl_loss_7": 602.626220703125,
"learning_rate": 4.02259358460233e-06,
"loss": 1148.7472,
"step": 9600
},
{
"ce_loss_10": 3.5573193550109865,
"ce_loss_13": 3.4829213500022886,
"ce_loss_2": 4.506733560562134,
"ce_loss_3": 4.238800776004791,
"ce_loss_7": 3.7266565203666686,
"epoch": 0.961,
"grad_norm": 580.0,
"kl_loss_10": 176.3166290283203,
"kl_loss_2": 2114.0912475585938,
"kl_loss_3": 1637.3499328613282,
"kl_loss_7": 588.5086486816406,
"learning_rate": 3.8242316085594916e-06,
"loss": 1126.8451,
"step": 9610
},
{
"ce_loss_10": 3.447552573680878,
"ce_loss_13": 3.369247031211853,
"ce_loss_2": 4.473224306106568,
"ce_loss_3": 4.197393763065338,
"ce_loss_7": 3.6313952803611755,
"epoch": 0.962,
"grad_norm": 556.0,
"kl_loss_10": 180.42641220092773,
"kl_loss_2": 2280.7411865234376,
"kl_loss_3": 1787.7846984863281,
"kl_loss_7": 615.477572631836,
"learning_rate": 3.630866116995757e-06,
"loss": 1194.5547,
"step": 9620
},
{
"ce_loss_10": 3.5979113578796387,
"ce_loss_13": 3.5257344841957092,
"ce_loss_2": 4.537094449996948,
"ce_loss_3": 4.265922880172729,
"ce_loss_7": 3.7622151970863342,
"epoch": 0.963,
"grad_norm": 572.0,
"kl_loss_10": 174.69513320922852,
"kl_loss_2": 2105.5745544433594,
"kl_loss_3": 1622.0366455078124,
"kl_loss_7": 578.8991943359375,
"learning_rate": 3.4424990570994797e-06,
"loss": 1156.3669,
"step": 9630
},
{
"ce_loss_10": 3.585505282878876,
"ce_loss_13": 3.5114797711372376,
"ce_loss_2": 4.518644833564759,
"ce_loss_3": 4.256495106220245,
"ce_loss_7": 3.7576936960220335,
"epoch": 0.964,
"grad_norm": 482.0,
"kl_loss_10": 175.59745712280272,
"kl_loss_2": 2103.631524658203,
"kl_loss_3": 1631.6069641113281,
"kl_loss_7": 588.9240661621094,
"learning_rate": 3.2591323257248896e-06,
"loss": 1134.1978,
"step": 9640
},
{
"ce_loss_10": 3.437925660610199,
"ce_loss_13": 3.3662607192993166,
"ce_loss_2": 4.409651112556458,
"ce_loss_3": 4.150672721862793,
"ce_loss_7": 3.6122069478034975,
"epoch": 0.965,
"grad_norm": 556.0,
"kl_loss_10": 174.7218978881836,
"kl_loss_2": 2173.464489746094,
"kl_loss_3": 1704.9824951171875,
"kl_loss_7": 600.8370666503906,
"learning_rate": 3.0807677693729385e-06,
"loss": 1163.455,
"step": 9650
},
{
"ce_loss_10": 3.623323905467987,
"ce_loss_13": 3.55154949426651,
"ce_loss_2": 4.551669549942017,
"ce_loss_3": 4.290165424346924,
"ce_loss_7": 3.794328248500824,
"epoch": 0.966,
"grad_norm": 544.0,
"kl_loss_10": 174.09824600219727,
"kl_loss_2": 2080.361853027344,
"kl_loss_3": 1626.3007873535157,
"kl_loss_7": 584.5892837524414,
"learning_rate": 2.9074071841727055e-06,
"loss": 1115.8137,
"step": 9660
},
{
"ce_loss_10": 3.548972153663635,
"ce_loss_13": 3.4729049801826477,
"ce_loss_2": 4.494955968856812,
"ce_loss_3": 4.230167889595032,
"ce_loss_7": 3.730636739730835,
"epoch": 0.967,
"grad_norm": 632.0,
"kl_loss_10": 177.06267852783202,
"kl_loss_2": 2105.5685302734373,
"kl_loss_3": 1641.2366760253906,
"kl_loss_7": 599.4572601318359,
"learning_rate": 2.739052315863355e-06,
"loss": 1112.1609,
"step": 9670
},
{
"ce_loss_10": 3.5363902688026427,
"ce_loss_13": 3.4610472440719606,
"ce_loss_2": 4.502471828460694,
"ce_loss_3": 4.230240440368652,
"ce_loss_7": 3.7059998750686645,
"epoch": 0.968,
"grad_norm": 560.0,
"kl_loss_10": 176.56764450073243,
"kl_loss_2": 2152.0122924804687,
"kl_loss_3": 1676.3801025390626,
"kl_loss_7": 591.2061340332032,
"learning_rate": 2.5757048597765396e-06,
"loss": 1135.4543,
"step": 9680
},
{
"ce_loss_10": 3.5459084630012514,
"ce_loss_13": 3.4721821188926696,
"ce_loss_2": 4.505685806274414,
"ce_loss_3": 4.235912537574768,
"ce_loss_7": 3.722131609916687,
"epoch": 0.969,
"grad_norm": 560.0,
"kl_loss_10": 176.31484680175782,
"kl_loss_2": 2142.29345703125,
"kl_loss_3": 1672.873828125,
"kl_loss_7": 599.0281616210938,
"learning_rate": 2.417366460819359e-06,
"loss": 1141.189,
"step": 9690
},
{
"ce_loss_10": 3.5568428516387938,
"ce_loss_13": 3.47944039106369,
"ce_loss_2": 4.546383309364319,
"ce_loss_3": 4.280533790588379,
"ce_loss_7": 3.73818119764328,
"epoch": 0.97,
"grad_norm": 592.0,
"kl_loss_10": 181.22289581298827,
"kl_loss_2": 2223.5619262695313,
"kl_loss_3": 1743.523046875,
"kl_loss_7": 618.0255676269531,
"learning_rate": 2.2640387134577057e-06,
"loss": 1150.9949,
"step": 9700
},
{
"ce_loss_10": 3.4835644006729125,
"ce_loss_13": 3.409128963947296,
"ce_loss_2": 4.400413775444031,
"ce_loss_3": 4.1408212065696715,
"ce_loss_7": 3.6511133790016173,
"epoch": 0.971,
"grad_norm": 584.0,
"kl_loss_10": 169.66612396240234,
"kl_loss_2": 2037.37294921875,
"kl_loss_3": 1580.9468200683593,
"kl_loss_7": 575.112336730957,
"learning_rate": 2.115723161700278e-06,
"loss": 1111.2564,
"step": 9710
},
{
"ce_loss_10": 3.462701106071472,
"ce_loss_13": 3.383505952358246,
"ce_loss_2": 4.462756657600403,
"ce_loss_3": 4.1902328610420225,
"ce_loss_7": 3.6453136444091796,
"epoch": 0.972,
"grad_norm": 676.0,
"kl_loss_10": 180.0776268005371,
"kl_loss_2": 2223.634521484375,
"kl_loss_3": 1740.0434143066407,
"kl_loss_7": 612.3085083007812,
"learning_rate": 1.9724212990830937e-06,
"loss": 1170.462,
"step": 9720
},
{
"ce_loss_10": 3.6076322913169863,
"ce_loss_13": 3.532732355594635,
"ce_loss_2": 4.577161026000977,
"ce_loss_3": 4.311069667339325,
"ce_loss_7": 3.7834354996681214,
"epoch": 0.973,
"grad_norm": 488.0,
"kl_loss_10": 178.08698196411132,
"kl_loss_2": 2168.2475769042967,
"kl_loss_3": 1699.6189208984374,
"kl_loss_7": 598.9332580566406,
"learning_rate": 1.8341345686543331e-06,
"loss": 1146.8779,
"step": 9730
},
{
"ce_loss_10": 3.5909879326820375,
"ce_loss_13": 3.5183821320533752,
"ce_loss_2": 4.50426287651062,
"ce_loss_3": 4.235173010826111,
"ce_loss_7": 3.757961595058441,
"epoch": 0.974,
"grad_norm": 548.0,
"kl_loss_10": 174.61135635375976,
"kl_loss_2": 2063.9743225097654,
"kl_loss_3": 1591.8313293457031,
"kl_loss_7": 585.6029083251954,
"learning_rate": 1.7008643629596864e-06,
"loss": 1145.0081,
"step": 9740
},
{
"ce_loss_10": 3.5759197235107423,
"ce_loss_13": 3.4986127734184267,
"ce_loss_2": 4.5397637486457825,
"ce_loss_3": 4.2685352802276615,
"ce_loss_7": 3.7446988224983215,
"epoch": 0.975,
"grad_norm": 552.0,
"kl_loss_10": 176.2814811706543,
"kl_loss_2": 2161.20859375,
"kl_loss_3": 1678.5180541992188,
"kl_loss_7": 590.2671813964844,
"learning_rate": 1.5726120240288633e-06,
"loss": 1164.5706,
"step": 9750
},
{
"ce_loss_10": 3.4757342100143434,
"ce_loss_13": 3.4012367367744445,
"ce_loss_2": 4.433041834831238,
"ce_loss_3": 4.165659952163696,
"ce_loss_7": 3.6462602019309998,
"epoch": 0.976,
"grad_norm": 572.0,
"kl_loss_10": 174.65177154541016,
"kl_loss_2": 2138.293341064453,
"kl_loss_3": 1655.4461547851563,
"kl_loss_7": 589.6313079833984,
"learning_rate": 1.4493788433612708e-06,
"loss": 1134.1515,
"step": 9760
},
{
"ce_loss_10": 3.5877037525177,
"ce_loss_13": 3.5132053971290587,
"ce_loss_2": 4.55386061668396,
"ce_loss_3": 4.287087714672088,
"ce_loss_7": 3.7638731479644774,
"epoch": 0.977,
"grad_norm": 536.0,
"kl_loss_10": 177.9455436706543,
"kl_loss_2": 2173.966436767578,
"kl_loss_3": 1692.082745361328,
"kl_loss_7": 599.7038208007813,
"learning_rate": 1.3311660619138578e-06,
"loss": 1161.4269,
"step": 9770
},
{
"ce_loss_10": 3.584187960624695,
"ce_loss_13": 3.510979926586151,
"ce_loss_2": 4.489086222648621,
"ce_loss_3": 4.228979337215423,
"ce_loss_7": 3.748577618598938,
"epoch": 0.978,
"grad_norm": 516.0,
"kl_loss_10": 176.20037689208985,
"kl_loss_2": 2033.9857421875,
"kl_loss_3": 1575.569403076172,
"kl_loss_7": 583.5555999755859,
"learning_rate": 1.2179748700879012e-06,
"loss": 1135.4594,
"step": 9780
},
{
"ce_loss_10": 3.516654706001282,
"ce_loss_13": 3.442041552066803,
"ce_loss_2": 4.460341954231263,
"ce_loss_3": 4.201344418525696,
"ce_loss_7": 3.6880866169929503,
"epoch": 0.979,
"grad_norm": 648.0,
"kl_loss_10": 175.97493591308594,
"kl_loss_2": 2106.5852966308594,
"kl_loss_3": 1640.5349182128907,
"kl_loss_7": 589.2118927001953,
"learning_rate": 1.1098064077174619e-06,
"loss": 1139.4391,
"step": 9790
},
{
"ce_loss_10": 3.548008131980896,
"ce_loss_13": 3.470580744743347,
"ce_loss_2": 4.531388640403748,
"ce_loss_3": 4.256609618663788,
"ce_loss_7": 3.7258023023605347,
"epoch": 0.98,
"grad_norm": 660.0,
"kl_loss_10": 175.85005264282228,
"kl_loss_2": 2184.833563232422,
"kl_loss_3": 1695.749658203125,
"kl_loss_7": 597.7893035888671,
"learning_rate": 1.006661764057837e-06,
"loss": 1144.1424,
"step": 9800
},
{
"ce_loss_10": 3.5516860127449035,
"ce_loss_13": 3.479186308383942,
"ce_loss_2": 4.507886123657227,
"ce_loss_3": 4.23874124288559,
"ce_loss_7": 3.7239818572998047,
"epoch": 0.981,
"grad_norm": 548.0,
"kl_loss_10": 174.7688331604004,
"kl_loss_2": 2140.183038330078,
"kl_loss_3": 1663.4087707519532,
"kl_loss_7": 592.6127227783203,
"learning_rate": 9.085419777743465e-07,
"loss": 1136.217,
"step": 9810
},
{
"ce_loss_10": 3.4896764159202576,
"ce_loss_13": 3.4188039541244506,
"ce_loss_2": 4.450884318351745,
"ce_loss_3": 4.184124147891998,
"ce_loss_7": 3.6670993685722353,
"epoch": 0.982,
"grad_norm": 476.0,
"kl_loss_10": 171.6952751159668,
"kl_loss_2": 2127.2390258789064,
"kl_loss_3": 1658.6653991699218,
"kl_loss_7": 588.117578125,
"learning_rate": 8.15448036932176e-07,
"loss": 1121.8644,
"step": 9820
},
{
"ce_loss_10": 3.542994940280914,
"ce_loss_13": 3.471325635910034,
"ce_loss_2": 4.491614294052124,
"ce_loss_3": 4.226279616355896,
"ce_loss_7": 3.716835379600525,
"epoch": 0.983,
"grad_norm": 580.0,
"kl_loss_10": 175.40776138305665,
"kl_loss_2": 2138.5871826171874,
"kl_loss_3": 1668.4995056152343,
"kl_loss_7": 599.0077606201172,
"learning_rate": 7.273808789862724e-07,
"loss": 1157.4876,
"step": 9830
},
{
"ce_loss_10": 3.62471262216568,
"ce_loss_13": 3.552128314971924,
"ce_loss_2": 4.560764002799988,
"ce_loss_3": 4.2987874269485475,
"ce_loss_7": 3.7973197221755983,
"epoch": 0.984,
"grad_norm": 536.0,
"kl_loss_10": 177.9404067993164,
"kl_loss_2": 2121.9407958984375,
"kl_loss_3": 1649.4432312011718,
"kl_loss_7": 593.6075317382813,
"learning_rate": 6.443413907720186e-07,
"loss": 1128.3074,
"step": 9840
},
{
"ce_loss_10": 3.553659164905548,
"ce_loss_13": 3.479843807220459,
"ce_loss_2": 4.502509045600891,
"ce_loss_3": 4.239866006374359,
"ce_loss_7": 3.7261658310890198,
"epoch": 0.985,
"grad_norm": 612.0,
"kl_loss_10": 175.90703582763672,
"kl_loss_2": 2105.8654174804688,
"kl_loss_3": 1643.8707397460937,
"kl_loss_7": 589.3217529296875,
"learning_rate": 5.663304084960185e-07,
"loss": 1125.6893,
"step": 9850
},
{
"ce_loss_10": 3.4857439756393434,
"ce_loss_13": 3.40972044467926,
"ce_loss_2": 4.458719778060913,
"ce_loss_3": 4.193828642368317,
"ce_loss_7": 3.661728310585022,
"epoch": 0.986,
"grad_norm": 544.0,
"kl_loss_10": 175.7668014526367,
"kl_loss_2": 2168.083819580078,
"kl_loss_3": 1695.883349609375,
"kl_loss_7": 599.47685546875,
"learning_rate": 4.933487177280482e-07,
"loss": 1132.0084,
"step": 9860
},
{
"ce_loss_10": 3.577410614490509,
"ce_loss_13": 3.50371458530426,
"ce_loss_2": 4.517120695114135,
"ce_loss_3": 4.256355273723602,
"ce_loss_7": 3.745650053024292,
"epoch": 0.987,
"grad_norm": 580.0,
"kl_loss_10": 172.69470291137696,
"kl_loss_2": 2116.2484741210938,
"kl_loss_3": 1646.7192932128905,
"kl_loss_7": 586.3196258544922,
"learning_rate": 4.2539705339295075e-07,
"loss": 1129.2027,
"step": 9870
},
{
"ce_loss_10": 3.4351974010467528,
"ce_loss_13": 3.359704864025116,
"ce_loss_2": 4.414662563800812,
"ce_loss_3": 4.1485153317451475,
"ce_loss_7": 3.614791524410248,
"epoch": 0.988,
"grad_norm": 624.0,
"kl_loss_10": 176.81834564208984,
"kl_loss_2": 2189.1237670898436,
"kl_loss_3": 1714.1568420410156,
"kl_loss_7": 602.8686370849609,
"learning_rate": 3.6247609976319816e-07,
"loss": 1142.2324,
"step": 9880
},
{
"ce_loss_10": 3.5325068116188048,
"ce_loss_13": 3.4560230016708373,
"ce_loss_2": 4.515677762031555,
"ce_loss_3": 4.241289448738098,
"ce_loss_7": 3.7140289902687074,
"epoch": 0.989,
"grad_norm": 644.0,
"kl_loss_10": 178.62700347900392,
"kl_loss_2": 2181.043316650391,
"kl_loss_3": 1701.3241455078125,
"kl_loss_7": 601.8407318115235,
"learning_rate": 3.0458649045211895e-07,
"loss": 1177.6322,
"step": 9890
},
{
"ce_loss_10": 3.505313539505005,
"ce_loss_13": 3.4275246262550354,
"ce_loss_2": 4.470349764823913,
"ce_loss_3": 4.199915885925293,
"ce_loss_7": 3.687111556529999,
"epoch": 0.99,
"grad_norm": 628.0,
"kl_loss_10": 179.9844207763672,
"kl_loss_2": 2144.779681396484,
"kl_loss_3": 1664.5868041992187,
"kl_loss_7": 610.2965026855469,
"learning_rate": 2.517288084074587e-07,
"loss": 1173.5785,
"step": 9900
},
{
"ce_loss_10": 3.541435408592224,
"ce_loss_13": 3.4641653418540956,
"ce_loss_2": 4.540918755531311,
"ce_loss_3": 4.268487918376922,
"ce_loss_7": 3.728367364406586,
"epoch": 0.991,
"grad_norm": 544.0,
"kl_loss_10": 181.58360061645507,
"kl_loss_2": 2223.09423828125,
"kl_loss_3": 1733.728173828125,
"kl_loss_7": 618.8083801269531,
"learning_rate": 2.0390358590538505e-07,
"loss": 1164.2306,
"step": 9910
},
{
"ce_loss_10": 3.5465844750404356,
"ce_loss_13": 3.4692795395851137,
"ce_loss_2": 4.505300617218017,
"ce_loss_3": 4.238151812553406,
"ce_loss_7": 3.7215544462203978,
"epoch": 0.992,
"grad_norm": 516.0,
"kl_loss_10": 178.79893417358397,
"kl_loss_2": 2149.3738037109374,
"kl_loss_3": 1683.4728881835938,
"kl_loss_7": 602.8562408447266,
"learning_rate": 1.61111304545436e-07,
"loss": 1139.9141,
"step": 9920
},
{
"ce_loss_10": 3.5144612431526183,
"ce_loss_13": 3.439807415008545,
"ce_loss_2": 4.468925881385803,
"ce_loss_3": 4.204157900810242,
"ce_loss_7": 3.68552029132843,
"epoch": 0.993,
"grad_norm": 524.0,
"kl_loss_10": 174.9011474609375,
"kl_loss_2": 2131.701556396484,
"kl_loss_3": 1667.3637084960938,
"kl_loss_7": 591.8195831298829,
"learning_rate": 1.2335239524541298e-07,
"loss": 1123.1069,
"step": 9930
},
{
"ce_loss_10": 3.485284912586212,
"ce_loss_13": 3.4107711553573608,
"ce_loss_2": 4.4413145065307615,
"ce_loss_3": 4.1761764764785765,
"ce_loss_7": 3.658044862747192,
"epoch": 0.994,
"grad_norm": 552.0,
"kl_loss_10": 174.74987030029297,
"kl_loss_2": 2137.9515625,
"kl_loss_3": 1659.8411071777343,
"kl_loss_7": 590.9619750976562,
"learning_rate": 9.06272382371065e-08,
"loss": 1140.1338,
"step": 9940
},
{
"ce_loss_10": 3.5549147844314577,
"ce_loss_13": 3.482628679275513,
"ce_loss_2": 4.527088284492493,
"ce_loss_3": 4.2653639078140255,
"ce_loss_7": 3.7300615668296815,
"epoch": 0.995,
"grad_norm": 540.0,
"kl_loss_10": 177.89019927978515,
"kl_loss_2": 2179.192108154297,
"kl_loss_3": 1710.0057861328125,
"kl_loss_7": 601.8619506835937,
"learning_rate": 6.293616306246586e-08,
"loss": 1148.1468,
"step": 9950
},
{
"ce_loss_10": 3.5492191195487974,
"ce_loss_13": 3.4784142851829527,
"ce_loss_2": 4.47113618850708,
"ce_loss_3": 4.207776916027069,
"ce_loss_7": 3.7163458704948424,
"epoch": 0.996,
"grad_norm": 568.0,
"kl_loss_10": 171.20833358764648,
"kl_loss_2": 2067.156182861328,
"kl_loss_3": 1607.7954223632812,
"kl_loss_7": 575.8024002075196,
"learning_rate": 4.027944857032395e-08,
"loss": 1102.1236,
"step": 9960
},
{
"ce_loss_10": 3.5417333483695983,
"ce_loss_13": 3.4737359166145323,
"ce_loss_2": 4.454948210716248,
"ce_loss_3": 4.189112281799316,
"ce_loss_7": 3.7030033111572265,
"epoch": 0.997,
"grad_norm": 564.0,
"kl_loss_10": 169.30588455200194,
"kl_loss_2": 2030.570849609375,
"kl_loss_3": 1568.8222778320312,
"kl_loss_7": 562.5833770751954,
"learning_rate": 2.265732291356626e-08,
"loss": 1096.3691,
"step": 9970
},
{
"ce_loss_10": 3.5887541651725767,
"ce_loss_13": 3.5155721068382264,
"ce_loss_2": 4.518339204788208,
"ce_loss_3": 4.2516262292861935,
"ce_loss_7": 3.7596161723136903,
"epoch": 0.998,
"grad_norm": 516.0,
"kl_loss_10": 174.8034523010254,
"kl_loss_2": 2081.462506103516,
"kl_loss_3": 1607.8740112304688,
"kl_loss_7": 584.5911361694336,
"learning_rate": 1.0069963546743833e-08,
"loss": 1138.0035,
"step": 9980
},
{
"ce_loss_10": 3.567852771282196,
"ce_loss_13": 3.4926111340522765,
"ce_loss_2": 4.526482367515564,
"ce_loss_3": 4.2642577409744264,
"ce_loss_7": 3.741330122947693,
"epoch": 0.999,
"grad_norm": 504.0,
"kl_loss_10": 177.2101951599121,
"kl_loss_2": 2140.8559020996095,
"kl_loss_3": 1666.8830322265626,
"kl_loss_7": 597.1717834472656,
"learning_rate": 2.517497224463483e-09,
"loss": 1140.1191,
"step": 9990
},
{
"ce_loss_10": 3.5264371991157533,
"ce_loss_13": 3.450861382484436,
"ce_loss_2": 4.53892297744751,
"ce_loss_3": 4.266285753250122,
"ce_loss_7": 3.7066094994544985,
"epoch": 1.0,
"grad_norm": 580.0,
"kl_loss_10": 180.3290283203125,
"kl_loss_2": 2255.579718017578,
"kl_loss_3": 1769.6880432128905,
"kl_loss_7": 615.3499603271484,
"learning_rate": 0.0,
"loss": 1181.1314,
"step": 10000
}
],
"logging_steps": 10,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.177819035608023e+17,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}