| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 10000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "ce_loss_10": 5.479339599609375, |
| "ce_loss_13": 3.4827667474746704, |
| "ce_loss_2": 13.979248523712158, |
| "ce_loss_3": 13.771953105926514, |
| "ce_loss_7": 7.430000305175781, |
| "epoch": 0.0001, |
| "grad_norm": 81408.0, |
| "kl_loss_10": 4489.56494140625, |
| "kl_loss_2": 22049.2119140625, |
| "kl_loss_3": 21566.693359375, |
| "kl_loss_7": 7499.004150390625, |
| "learning_rate": 1e-05, |
| "loss": 14123.4883, |
| "step": 1 |
| }, |
| { |
| "ce_loss_10": 5.119714260101318, |
| "ce_loss_13": 3.53999932607015, |
| "ce_loss_2": 11.240631209479439, |
| "ce_loss_3": 10.95585854848226, |
| "ce_loss_7": 6.906492206785414, |
| "epoch": 0.001, |
| "grad_norm": 37888.0, |
| "kl_loss_10": 3271.7383083767363, |
| "kl_loss_2": 15601.7939453125, |
| "kl_loss_3": 14746.902018229166, |
| "kl_loss_7": 6279.269124348958, |
| "learning_rate": 0.0001, |
| "loss": 9990.5972, |
| "step": 10 |
| }, |
| { |
| "ce_loss_10": 4.4336272239685055, |
| "ce_loss_13": 3.5471752166748045, |
| "ce_loss_2": 7.957242059707641, |
| "ce_loss_3": 7.463982796669006, |
| "ce_loss_7": 5.84502854347229, |
| "epoch": 0.002, |
| "grad_norm": 9216.0, |
| "kl_loss_10": 1664.693670654297, |
| "kl_loss_2": 8140.2906494140625, |
| "kl_loss_3": 7229.052661132812, |
| "kl_loss_7": 4268.229284667968, |
| "learning_rate": 0.0002, |
| "loss": 5408.8828, |
| "step": 20 |
| }, |
| { |
| "ce_loss_10": 3.938058543205261, |
| "ce_loss_13": 3.333306384086609, |
| "ce_loss_2": 6.773653674125671, |
| "ce_loss_3": 6.433317041397094, |
| "ce_loss_7": 5.134593844413757, |
| "epoch": 0.003, |
| "grad_norm": 3248.0, |
| "kl_loss_10": 1150.4377380371093, |
| "kl_loss_2": 6385.490258789063, |
| "kl_loss_3": 5771.109155273438, |
| "kl_loss_7": 3370.8108642578127, |
| "learning_rate": 0.0003, |
| "loss": 4090.45, |
| "step": 30 |
| }, |
| { |
| "ce_loss_10": 3.9883425116539, |
| "ce_loss_13": 3.5081888794898988, |
| "ce_loss_2": 6.318326234817505, |
| "ce_loss_3": 6.045081973075867, |
| "ce_loss_7": 4.969816541671753, |
| "epoch": 0.004, |
| "grad_norm": 6240.0, |
| "kl_loss_10": 955.3555145263672, |
| "kl_loss_2": 5198.101293945312, |
| "kl_loss_3": 4741.139855957032, |
| "kl_loss_7": 2762.6712036132812, |
| "learning_rate": 0.0004, |
| "loss": 3452.1297, |
| "step": 40 |
| }, |
| { |
| "ce_loss_10": 3.94427330493927, |
| "ce_loss_13": 3.4815958857536318, |
| "ce_loss_2": 6.088573956489563, |
| "ce_loss_3": 5.815527606010437, |
| "ce_loss_7": 4.780157661437988, |
| "epoch": 0.005, |
| "grad_norm": 4768.0, |
| "kl_loss_10": 882.2040557861328, |
| "kl_loss_2": 4868.76328125, |
| "kl_loss_3": 4416.1666259765625, |
| "kl_loss_7": 2467.264501953125, |
| "learning_rate": 0.0005, |
| "loss": 3156.5504, |
| "step": 50 |
| }, |
| { |
| "ce_loss_10": 3.877071762084961, |
| "ce_loss_13": 3.493886411190033, |
| "ce_loss_2": 5.86357319355011, |
| "ce_loss_3": 5.616779232025147, |
| "ce_loss_7": 4.6486598491668705, |
| "epoch": 0.006, |
| "grad_norm": 4768.0, |
| "kl_loss_10": 781.2706634521485, |
| "kl_loss_2": 4452.719311523438, |
| "kl_loss_3": 4034.9894165039063, |
| "kl_loss_7": 2237.3867736816405, |
| "learning_rate": 0.0006, |
| "loss": 2874.2984, |
| "step": 60 |
| }, |
| { |
| "ce_loss_10": 3.7734105229377746, |
| "ce_loss_13": 3.408212423324585, |
| "ce_loss_2": 5.7149782419204715, |
| "ce_loss_3": 5.489736318588257, |
| "ce_loss_7": 4.5027463555336, |
| "epoch": 0.007, |
| "grad_norm": 2896.0, |
| "kl_loss_10": 747.7415161132812, |
| "kl_loss_2": 4370.160473632813, |
| "kl_loss_3": 3989.598352050781, |
| "kl_loss_7": 2142.413153076172, |
| "learning_rate": 0.0007, |
| "loss": 2776.958, |
| "step": 70 |
| }, |
| { |
| "ce_loss_10": 3.7659215092658997, |
| "ce_loss_13": 3.409997522830963, |
| "ce_loss_2": 5.646710276603699, |
| "ce_loss_3": 5.404528284072876, |
| "ce_loss_7": 4.458205795288086, |
| "epoch": 0.008, |
| "grad_norm": 2256.0, |
| "kl_loss_10": 717.0317596435547, |
| "kl_loss_2": 4256.561865234375, |
| "kl_loss_3": 3846.635607910156, |
| "kl_loss_7": 2051.1767517089843, |
| "learning_rate": 0.0008, |
| "loss": 2710.7359, |
| "step": 80 |
| }, |
| { |
| "ce_loss_10": 3.6950827717781065, |
| "ce_loss_13": 3.3681079149246216, |
| "ce_loss_2": 5.57774977684021, |
| "ce_loss_3": 5.358799338340759, |
| "ce_loss_7": 4.384551775455475, |
| "epoch": 0.009, |
| "grad_norm": 2784.0, |
| "kl_loss_10": 674.085107421875, |
| "kl_loss_2": 4234.844958496094, |
| "kl_loss_3": 3849.5977783203125, |
| "kl_loss_7": 2047.007257080078, |
| "learning_rate": 0.0009000000000000001, |
| "loss": 2670.6148, |
| "step": 90 |
| }, |
| { |
| "ce_loss_10": 3.831665110588074, |
| "ce_loss_13": 3.493525803089142, |
| "ce_loss_2": 5.601197052001953, |
| "ce_loss_3": 5.398815608024597, |
| "ce_loss_7": 4.526931476593018, |
| "epoch": 0.01, |
| "grad_norm": 2752.0, |
| "kl_loss_10": 672.117709350586, |
| "kl_loss_2": 4021.284912109375, |
| "kl_loss_3": 3663.468957519531, |
| "kl_loss_7": 2026.185284423828, |
| "learning_rate": 0.001, |
| "loss": 2598.8504, |
| "step": 100 |
| }, |
| { |
| "ce_loss_10": 3.7488776206970216, |
| "ce_loss_13": 3.4458404183387756, |
| "ce_loss_2": 5.519205498695373, |
| "ce_loss_3": 5.296326518058777, |
| "ce_loss_7": 4.427326142787933, |
| "epoch": 0.011, |
| "grad_norm": 1728.0, |
| "kl_loss_10": 619.106655883789, |
| "kl_loss_2": 3960.856494140625, |
| "kl_loss_3": 3572.990026855469, |
| "kl_loss_7": 1934.6764282226563, |
| "learning_rate": 0.0009999974825027757, |
| "loss": 2513.0074, |
| "step": 110 |
| }, |
| { |
| "ce_loss_10": 3.805395770072937, |
| "ce_loss_13": 3.503445029258728, |
| "ce_loss_2": 5.498133254051209, |
| "ce_loss_3": 5.255028605461121, |
| "ce_loss_7": 4.415169513225555, |
| "epoch": 0.012, |
| "grad_norm": 2040.0, |
| "kl_loss_10": 603.263638305664, |
| "kl_loss_2": 3843.529895019531, |
| "kl_loss_3": 3407.914306640625, |
| "kl_loss_7": 1813.6988830566406, |
| "learning_rate": 0.0009999899300364532, |
| "loss": 2390.1301, |
| "step": 120 |
| }, |
| { |
| "ce_loss_10": 3.7707916855812074, |
| "ce_loss_13": 3.475922393798828, |
| "ce_loss_2": 5.500737500190735, |
| "ce_loss_3": 5.266239738464355, |
| "ce_loss_7": 4.383851003646851, |
| "epoch": 0.013, |
| "grad_norm": 2624.0, |
| "kl_loss_10": 587.465219116211, |
| "kl_loss_2": 3864.1528442382814, |
| "kl_loss_3": 3434.2073608398437, |
| "kl_loss_7": 1783.62373046875, |
| "learning_rate": 0.0009999773426770863, |
| "loss": 2449.8629, |
| "step": 130 |
| }, |
| { |
| "ce_loss_10": 3.867912781238556, |
| "ce_loss_13": 3.509277641773224, |
| "ce_loss_2": 5.469627714157104, |
| "ce_loss_3": 5.194933176040649, |
| "ce_loss_7": 4.379399788379669, |
| "epoch": 0.014, |
| "grad_norm": 1992.0, |
| "kl_loss_10": 725.8971160888672, |
| "kl_loss_2": 3778.5274291992187, |
| "kl_loss_3": 3296.354528808594, |
| "kl_loss_7": 1732.4250732421874, |
| "learning_rate": 0.0009999597205514296, |
| "loss": 2405.5832, |
| "step": 140 |
| }, |
| { |
| "ce_loss_10": 3.77968590259552, |
| "ce_loss_13": 3.471135640144348, |
| "ce_loss_2": 5.36739604473114, |
| "ce_loss_3": 5.106346774101257, |
| "ce_loss_7": 4.320439124107361, |
| "epoch": 0.015, |
| "grad_norm": 1360.0, |
| "kl_loss_10": 624.2580291748047, |
| "kl_loss_2": 3632.6698486328123, |
| "kl_loss_3": 3176.562145996094, |
| "kl_loss_7": 1695.959698486328, |
| "learning_rate": 0.0009999370638369377, |
| "loss": 2293.6836, |
| "step": 150 |
| }, |
| { |
| "ce_loss_10": 3.8028363585472107, |
| "ce_loss_13": 3.5099030256271364, |
| "ce_loss_2": 5.394668865203857, |
| "ce_loss_3": 5.231132960319519, |
| "ce_loss_7": 4.338041806221009, |
| "epoch": 0.016, |
| "grad_norm": 3296.0, |
| "kl_loss_10": 591.1725463867188, |
| "kl_loss_2": 3644.2818969726563, |
| "kl_loss_3": 3364.4876342773437, |
| "kl_loss_7": 1640.8858154296875, |
| "learning_rate": 0.000999909372761763, |
| "loss": 2313.8473, |
| "step": 160 |
| }, |
| { |
| "ce_loss_10": 3.715697240829468, |
| "ce_loss_13": 3.4447871685028075, |
| "ce_loss_2": 5.341588139533997, |
| "ce_loss_3": 5.263697862625122, |
| "ce_loss_7": 4.2574918389320375, |
| "epoch": 0.017, |
| "grad_norm": 3008.0, |
| "kl_loss_10": 555.6443099975586, |
| "kl_loss_2": 3670.3303466796874, |
| "kl_loss_3": 3553.7742431640627, |
| "kl_loss_7": 1629.572772216797, |
| "learning_rate": 0.0009998766476047546, |
| "loss": 2372.3059, |
| "step": 170 |
| }, |
| { |
| "ce_loss_10": 3.7622690200805664, |
| "ce_loss_13": 3.4889180302619933, |
| "ce_loss_2": 5.369840741157532, |
| "ce_loss_3": 5.276954698562622, |
| "ce_loss_7": 4.275516867637634, |
| "epoch": 0.018, |
| "grad_norm": 2040.0, |
| "kl_loss_10": 565.5127258300781, |
| "kl_loss_2": 3642.269982910156, |
| "kl_loss_3": 3495.3918212890626, |
| "kl_loss_7": 1571.4158569335937, |
| "learning_rate": 0.0009998388886954545, |
| "loss": 2349.4688, |
| "step": 180 |
| }, |
| { |
| "ce_loss_10": 3.712801456451416, |
| "ce_loss_13": 3.4555400371551515, |
| "ce_loss_2": 5.312930059432984, |
| "ce_loss_3": 5.148007488250732, |
| "ce_loss_7": 4.23377754688263, |
| "epoch": 0.019, |
| "grad_norm": 1328.0, |
| "kl_loss_10": 534.2748489379883, |
| "kl_loss_2": 3599.4434326171877, |
| "kl_loss_3": 3312.2544921875, |
| "kl_loss_7": 1559.4685668945312, |
| "learning_rate": 0.0009997960964140947, |
| "loss": 2241.091, |
| "step": 190 |
| }, |
| { |
| "ce_loss_10": 3.6890772104263307, |
| "ce_loss_13": 3.4474449634552, |
| "ce_loss_2": 5.328355288505554, |
| "ce_loss_3": 5.103678369522095, |
| "ce_loss_7": 4.223123550415039, |
| "epoch": 0.02, |
| "grad_norm": 1584.0, |
| "kl_loss_10": 498.3182800292969, |
| "kl_loss_2": 3626.2685302734376, |
| "kl_loss_3": 3230.7111083984373, |
| "kl_loss_7": 1544.03125, |
| "learning_rate": 0.0009997482711915926, |
| "loss": 2212.8523, |
| "step": 200 |
| }, |
| { |
| "ce_loss_10": 3.643280267715454, |
| "ce_loss_13": 3.4110453128814697, |
| "ce_loss_2": 5.262782073020935, |
| "ce_loss_3": 5.006648206710816, |
| "ce_loss_7": 4.161720204353332, |
| "epoch": 0.021, |
| "grad_norm": 1320.0, |
| "kl_loss_10": 468.05088500976564, |
| "kl_loss_2": 3600.3508911132812, |
| "kl_loss_3": 3146.037072753906, |
| "kl_loss_7": 1514.3593139648438, |
| "learning_rate": 0.0009996954135095479, |
| "loss": 2163.3328, |
| "step": 210 |
| }, |
| { |
| "ce_loss_10": 3.743840980529785, |
| "ce_loss_13": 3.495615518093109, |
| "ce_loss_2": 5.276893544197082, |
| "ce_loss_3": 5.026828193664551, |
| "ce_loss_7": 4.215770494937897, |
| "epoch": 0.022, |
| "grad_norm": 952.0, |
| "kl_loss_10": 494.9872482299805, |
| "kl_loss_2": 3434.308557128906, |
| "kl_loss_3": 2996.470593261719, |
| "kl_loss_7": 1447.3476196289062, |
| "learning_rate": 0.0009996375239002368, |
| "loss": 2094.248, |
| "step": 220 |
| }, |
| { |
| "ce_loss_10": 3.8117304921150206, |
| "ce_loss_13": 3.5717169761657717, |
| "ce_loss_2": 5.300674176216125, |
| "ce_loss_3": 5.045718550682068, |
| "ce_loss_7": 4.271833729743958, |
| "epoch": 0.023, |
| "grad_norm": 1064.0, |
| "kl_loss_10": 491.1131820678711, |
| "kl_loss_2": 3352.0796875, |
| "kl_loss_3": 2909.836950683594, |
| "kl_loss_7": 1405.5986450195312, |
| "learning_rate": 0.0009995746029466072, |
| "loss": 2050.6086, |
| "step": 230 |
| }, |
| { |
| "ce_loss_10": 3.6075421810150146, |
| "ce_loss_13": 3.3550766468048097, |
| "ce_loss_2": 5.39588577747345, |
| "ce_loss_3": 4.985904622077942, |
| "ce_loss_7": 4.14452086687088, |
| "epoch": 0.024, |
| "grad_norm": 1496.0, |
| "kl_loss_10": 521.684194946289, |
| "kl_loss_2": 3944.3539916992186, |
| "kl_loss_3": 3201.487194824219, |
| "kl_loss_7": 1583.0420288085938, |
| "learning_rate": 0.0009995066512822719, |
| "loss": 2234.7746, |
| "step": 240 |
| }, |
| { |
| "ce_loss_10": 3.6849907636642456, |
| "ce_loss_13": 3.461445081233978, |
| "ce_loss_2": 5.414009666442871, |
| "ce_loss_3": 5.085514402389526, |
| "ce_loss_7": 4.184376835823059, |
| "epoch": 0.025, |
| "grad_norm": 1800.0, |
| "kl_loss_10": 465.4432067871094, |
| "kl_loss_2": 3782.4762451171873, |
| "kl_loss_3": 3206.6178466796873, |
| "kl_loss_7": 1450.9975891113281, |
| "learning_rate": 0.000999433669591504, |
| "loss": 2142.3535, |
| "step": 250 |
| }, |
| { |
| "ce_loss_10": 3.6025625109672545, |
| "ce_loss_13": 3.360257649421692, |
| "ce_loss_2": 5.237245011329651, |
| "ce_loss_3": 4.9437507629394535, |
| "ce_loss_7": 4.080421531200409, |
| "epoch": 0.026, |
| "grad_norm": 1408.0, |
| "kl_loss_10": 503.2585876464844, |
| "kl_loss_2": 3655.8213134765624, |
| "kl_loss_3": 3140.7313842773438, |
| "kl_loss_7": 1460.5682739257813, |
| "learning_rate": 0.000999355658609228, |
| "loss": 2133.6004, |
| "step": 260 |
| }, |
| { |
| "ce_loss_10": 3.6813029885292052, |
| "ce_loss_13": 3.395027530193329, |
| "ce_loss_2": 5.295657467842102, |
| "ce_loss_3": 5.023426985740661, |
| "ce_loss_7": 4.133508098125458, |
| "epoch": 0.027, |
| "grad_norm": 1416.0, |
| "kl_loss_10": 572.3903137207031, |
| "kl_loss_2": 3669.314978027344, |
| "kl_loss_3": 3183.432019042969, |
| "kl_loss_7": 1464.0882202148437, |
| "learning_rate": 0.0009992726191210138, |
| "loss": 2179.2967, |
| "step": 270 |
| }, |
| { |
| "ce_loss_10": 3.696367251873016, |
| "ce_loss_13": 3.433962404727936, |
| "ce_loss_2": 5.21666829586029, |
| "ce_loss_3": 4.99695348739624, |
| "ce_loss_7": 4.169408094882965, |
| "epoch": 0.028, |
| "grad_norm": 1880.0, |
| "kl_loss_10": 529.3393615722656, |
| "kl_loss_2": 3457.8086547851562, |
| "kl_loss_3": 3089.980187988281, |
| "kl_loss_7": 1482.3380798339845, |
| "learning_rate": 0.0009991845519630679, |
| "loss": 2115.8172, |
| "step": 280 |
| }, |
| { |
| "ce_loss_10": 3.556672739982605, |
| "ce_loss_13": 3.3172685623168947, |
| "ce_loss_2": 5.112358474731446, |
| "ce_loss_3": 4.917420530319214, |
| "ce_loss_7": 4.036571848392486, |
| "epoch": 0.029, |
| "grad_norm": 2000.0, |
| "kl_loss_10": 477.5372833251953, |
| "kl_loss_2": 3475.2529418945314, |
| "kl_loss_3": 3146.291943359375, |
| "kl_loss_7": 1444.9558898925782, |
| "learning_rate": 0.0009990914580222257, |
| "loss": 2130.9104, |
| "step": 290 |
| }, |
| { |
| "ce_loss_10": 3.6650490760803223, |
| "ce_loss_13": 3.455529069900513, |
| "ce_loss_2": 5.149494194984436, |
| "ce_loss_3": 4.940038657188415, |
| "ce_loss_7": 4.130682170391083, |
| "epoch": 0.03, |
| "grad_norm": 1560.0, |
| "kl_loss_10": 441.9183044433594, |
| "kl_loss_2": 3299.1113159179686, |
| "kl_loss_3": 2933.9962768554688, |
| "kl_loss_7": 1369.8260986328125, |
| "learning_rate": 0.0009989933382359422, |
| "loss": 2069.7893, |
| "step": 300 |
| }, |
| { |
| "ce_loss_10": 3.6985942125320435, |
| "ce_loss_13": 3.465806806087494, |
| "ce_loss_2": 5.143499898910522, |
| "ce_loss_3": 4.909449362754822, |
| "ce_loss_7": 4.1382394433021545, |
| "epoch": 0.031, |
| "grad_norm": 1120.0, |
| "kl_loss_10": 486.39428558349607, |
| "kl_loss_2": 3301.6126342773437, |
| "kl_loss_3": 2880.7590454101564, |
| "kl_loss_7": 1392.4209594726562, |
| "learning_rate": 0.0009988901935922825, |
| "loss": 2022.2506, |
| "step": 310 |
| }, |
| { |
| "ce_loss_10": 3.544218695163727, |
| "ce_loss_13": 3.314011883735657, |
| "ce_loss_2": 5.10150101184845, |
| "ce_loss_3": 4.842008900642395, |
| "ce_loss_7": 4.024464392662049, |
| "epoch": 0.032, |
| "grad_norm": 1472.0, |
| "kl_loss_10": 469.5253311157227, |
| "kl_loss_2": 3487.786022949219, |
| "kl_loss_3": 3028.9456176757812, |
| "kl_loss_7": 1444.1999450683593, |
| "learning_rate": 0.0009987820251299122, |
| "loss": 2047.4186, |
| "step": 320 |
| }, |
| { |
| "ce_loss_10": 3.67177551984787, |
| "ce_loss_13": 3.4466672420501707, |
| "ce_loss_2": 5.135675239562988, |
| "ce_loss_3": 4.8595945835113525, |
| "ce_loss_7": 4.121568500995636, |
| "epoch": 0.033, |
| "grad_norm": 1012.0, |
| "kl_loss_10": 450.39354553222654, |
| "kl_loss_2": 3306.36376953125, |
| "kl_loss_3": 2827.274365234375, |
| "kl_loss_7": 1387.9264587402345, |
| "learning_rate": 0.0009986688339380862, |
| "loss": 1975.759, |
| "step": 330 |
| }, |
| { |
| "ce_loss_10": 3.6029638409614564, |
| "ce_loss_13": 3.397365379333496, |
| "ce_loss_2": 5.057221698760986, |
| "ce_loss_3": 4.7936498641967775, |
| "ce_loss_7": 4.015895903110504, |
| "epoch": 0.034, |
| "grad_norm": 964.0, |
| "kl_loss_10": 436.06551971435545, |
| "kl_loss_2": 3221.0221435546873, |
| "kl_loss_3": 2750.7028198242188, |
| "kl_loss_7": 1266.2399963378907, |
| "learning_rate": 0.0009985506211566387, |
| "loss": 1936.1705, |
| "step": 340 |
| }, |
| { |
| "ce_loss_10": 3.6370130658149717, |
| "ce_loss_13": 3.4315125226974486, |
| "ce_loss_2": 5.061046314239502, |
| "ce_loss_3": 4.7848950862884525, |
| "ce_loss_7": 4.02301949262619, |
| "epoch": 0.035, |
| "grad_norm": 908.0, |
| "kl_loss_10": 422.44232482910155, |
| "kl_loss_2": 3177.237683105469, |
| "kl_loss_3": 2690.365087890625, |
| "kl_loss_7": 1217.3221435546875, |
| "learning_rate": 0.0009984273879759713, |
| "loss": 1896.4475, |
| "step": 350 |
| }, |
| { |
| "ce_loss_10": 3.656745362281799, |
| "ce_loss_13": 3.4570682406425477, |
| "ce_loss_2": 5.137815022468567, |
| "ce_loss_3": 4.873619461059571, |
| "ce_loss_7": 4.083463799953461, |
| "epoch": 0.036, |
| "grad_norm": 860.0, |
| "kl_loss_10": 423.36531524658204, |
| "kl_loss_2": 3273.7410522460937, |
| "kl_loss_3": 2798.973498535156, |
| "kl_loss_7": 1268.806689453125, |
| "learning_rate": 0.0009982991356370402, |
| "loss": 1973.0059, |
| "step": 360 |
| }, |
| { |
| "ce_loss_10": 3.631060302257538, |
| "ce_loss_13": 3.4341819286346436, |
| "ce_loss_2": 5.11769585609436, |
| "ce_loss_3": 4.834220147132873, |
| "ce_loss_7": 4.047011601924896, |
| "epoch": 0.037, |
| "grad_norm": 908.0, |
| "kl_loss_10": 402.81113891601564, |
| "kl_loss_2": 3276.052880859375, |
| "kl_loss_3": 2791.4117065429687, |
| "kl_loss_7": 1245.3732849121093, |
| "learning_rate": 0.0009981658654313456, |
| "loss": 1941.0666, |
| "step": 370 |
| }, |
| { |
| "ce_loss_10": 3.7020971179008484, |
| "ce_loss_13": 3.5137467861175535, |
| "ce_loss_2": 5.156114864349365, |
| "ce_loss_3": 4.874492716789246, |
| "ce_loss_7": 4.092896187305451, |
| "epoch": 0.038, |
| "grad_norm": 744.0, |
| "kl_loss_10": 382.19567413330077, |
| "kl_loss_2": 3216.3584228515624, |
| "kl_loss_3": 2713.040576171875, |
| "kl_loss_7": 1200.0062133789063, |
| "learning_rate": 0.000998027578700917, |
| "loss": 1916.7457, |
| "step": 380 |
| }, |
| { |
| "ce_loss_10": 3.629340207576752, |
| "ce_loss_13": 3.4466560959815977, |
| "ce_loss_2": 5.104858756065369, |
| "ce_loss_3": 4.827202153205872, |
| "ce_loss_7": 4.051906526088715, |
| "epoch": 0.039, |
| "grad_norm": 768.0, |
| "kl_loss_10": 387.5618530273438, |
| "kl_loss_2": 3239.9035766601564, |
| "kl_loss_3": 2754.5089477539063, |
| "kl_loss_7": 1245.49443359375, |
| "learning_rate": 0.0009978842768382998, |
| "loss": 1919.6182, |
| "step": 390 |
| }, |
| { |
| "ce_loss_10": 3.6458646416664124, |
| "ce_loss_13": 3.4677427411079407, |
| "ce_loss_2": 5.076069569587707, |
| "ce_loss_3": 4.798897671699524, |
| "ce_loss_7": 4.036490082740784, |
| "epoch": 0.04, |
| "grad_norm": 820.0, |
| "kl_loss_10": 365.37960052490234, |
| "kl_loss_2": 3139.388073730469, |
| "kl_loss_3": 2645.03125, |
| "kl_loss_7": 1170.367169189453, |
| "learning_rate": 0.0009977359612865424, |
| "loss": 1848.2086, |
| "step": 400 |
| }, |
| { |
| "ce_loss_10": 3.6510703682899477, |
| "ce_loss_13": 3.472544801235199, |
| "ce_loss_2": 5.0927152872085575, |
| "ce_loss_3": 4.818557095527649, |
| "ce_loss_7": 4.048879408836365, |
| "epoch": 0.041, |
| "grad_norm": 752.0, |
| "kl_loss_10": 376.4607299804687, |
| "kl_loss_2": 3183.3623168945314, |
| "kl_loss_3": 2696.904638671875, |
| "kl_loss_7": 1198.10205078125, |
| "learning_rate": 0.0009975826335391806, |
| "loss": 1850.6066, |
| "step": 410 |
| }, |
| { |
| "ce_loss_10": 3.664944088459015, |
| "ce_loss_13": 3.4915570259094237, |
| "ce_loss_2": 5.092843031883239, |
| "ce_loss_3": 4.81715497970581, |
| "ce_loss_7": 4.06898148059845, |
| "epoch": 0.042, |
| "grad_norm": 1072.0, |
| "kl_loss_10": 367.5922546386719, |
| "kl_loss_2": 3121.2123901367186, |
| "kl_loss_3": 2637.7326782226564, |
| "kl_loss_7": 1178.5750122070312, |
| "learning_rate": 0.0009974242951402235, |
| "loss": 1847.4906, |
| "step": 420 |
| }, |
| { |
| "ce_loss_10": 3.6901652693748472, |
| "ce_loss_13": 3.5015287518501284, |
| "ce_loss_2": 5.113956260681152, |
| "ce_loss_3": 4.831623649597168, |
| "ce_loss_7": 4.073742997646332, |
| "epoch": 0.043, |
| "grad_norm": 932.0, |
| "kl_loss_10": 391.2719299316406, |
| "kl_loss_2": 3171.550817871094, |
| "kl_loss_3": 2672.8351318359373, |
| "kl_loss_7": 1198.8372039794922, |
| "learning_rate": 0.0009972609476841367, |
| "loss": 1839.4168, |
| "step": 430 |
| }, |
| { |
| "ce_loss_10": 3.592795264720917, |
| "ce_loss_13": 3.407615542411804, |
| "ce_loss_2": 5.048645877838135, |
| "ce_loss_3": 4.779121279716492, |
| "ce_loss_7": 3.982761597633362, |
| "epoch": 0.044, |
| "grad_norm": 928.0, |
| "kl_loss_10": 377.92359313964846, |
| "kl_loss_2": 3195.713342285156, |
| "kl_loss_3": 2713.7881591796877, |
| "kl_loss_7": 1185.5625, |
| "learning_rate": 0.0009970925928158272, |
| "loss": 1868.092, |
| "step": 440 |
| }, |
| { |
| "ce_loss_10": 3.542843294143677, |
| "ce_loss_13": 3.354374420642853, |
| "ce_loss_2": 5.013250637054443, |
| "ce_loss_3": 4.739123964309693, |
| "ce_loss_7": 3.935924601554871, |
| "epoch": 0.045, |
| "grad_norm": 740.0, |
| "kl_loss_10": 385.2865692138672, |
| "kl_loss_2": 3278.8071044921876, |
| "kl_loss_3": 2790.4721435546876, |
| "kl_loss_7": 1226.6742797851562, |
| "learning_rate": 0.000996919232230627, |
| "loss": 1885.8758, |
| "step": 450 |
| }, |
| { |
| "ce_loss_10": 3.609917199611664, |
| "ce_loss_13": 3.4386712551116942, |
| "ce_loss_2": 5.020998239517212, |
| "ce_loss_3": 4.756829810142517, |
| "ce_loss_7": 4.001234555244446, |
| "epoch": 0.046, |
| "grad_norm": 872.0, |
| "kl_loss_10": 358.4470748901367, |
| "kl_loss_2": 3100.1795166015627, |
| "kl_loss_3": 2620.8273803710936, |
| "kl_loss_7": 1157.8196044921874, |
| "learning_rate": 0.0009967408676742752, |
| "loss": 1772.8766, |
| "step": 460 |
| }, |
| { |
| "ce_loss_10": 3.7562451124191285, |
| "ce_loss_13": 3.5811493396759033, |
| "ce_loss_2": 5.11839497089386, |
| "ce_loss_3": 4.844864320755005, |
| "ce_loss_7": 4.1195793628692625, |
| "epoch": 0.047, |
| "grad_norm": 968.0, |
| "kl_loss_10": 364.69328155517576, |
| "kl_loss_2": 3032.6340087890626, |
| "kl_loss_3": 2548.6266967773436, |
| "kl_loss_7": 1130.8773040771484, |
| "learning_rate": 0.0009965575009429006, |
| "loss": 1825.8629, |
| "step": 470 |
| }, |
| { |
| "ce_loss_10": 3.542626643180847, |
| "ce_loss_13": 3.364771544933319, |
| "ce_loss_2": 4.9806403636932375, |
| "ce_loss_3": 4.703183531761169, |
| "ce_loss_7": 3.9297071576118467, |
| "epoch": 0.048, |
| "grad_norm": 772.0, |
| "kl_loss_10": 368.38177795410155, |
| "kl_loss_2": 3172.022900390625, |
| "kl_loss_3": 2678.579626464844, |
| "kl_loss_7": 1172.0243133544923, |
| "learning_rate": 0.0009963691338830043, |
| "loss": 1818.5924, |
| "step": 480 |
| }, |
| { |
| "ce_loss_10": 3.6282991647720335, |
| "ce_loss_13": 3.4611623764038084, |
| "ce_loss_2": 5.030923771858215, |
| "ce_loss_3": 4.765255475044251, |
| "ce_loss_7": 3.995365762710571, |
| "epoch": 0.049, |
| "grad_norm": 944.0, |
| "kl_loss_10": 346.68406372070314, |
| "kl_loss_2": 3111.6420288085938, |
| "kl_loss_3": 2633.9958740234374, |
| "kl_loss_7": 1125.4209197998048, |
| "learning_rate": 0.0009961757683914405, |
| "loss": 1782.6619, |
| "step": 490 |
| }, |
| { |
| "ce_loss_10": 3.6188631772994997, |
| "ce_loss_13": 3.450295829772949, |
| "ce_loss_2": 4.988259315490723, |
| "ce_loss_3": 4.726764726638794, |
| "ce_loss_7": 4.00168125629425, |
| "epoch": 0.05, |
| "grad_norm": 1184.0, |
| "kl_loss_10": 362.3049346923828, |
| "kl_loss_2": 3035.001806640625, |
| "kl_loss_3": 2588.0874145507814, |
| "kl_loss_7": 1166.9710693359375, |
| "learning_rate": 0.0009959774064153978, |
| "loss": 1805.0438, |
| "step": 500 |
| }, |
| { |
| "ce_loss_10": 3.623943197727203, |
| "ce_loss_13": 3.4620243430137636, |
| "ce_loss_2": 4.959137892723083, |
| "ce_loss_3": 4.687646722793579, |
| "ce_loss_7": 3.976128029823303, |
| "epoch": 0.051, |
| "grad_norm": 856.0, |
| "kl_loss_10": 343.2813385009766, |
| "kl_loss_2": 2963.6288208007813, |
| "kl_loss_3": 2485.8935424804686, |
| "kl_loss_7": 1086.964697265625, |
| "learning_rate": 0.0009957740499523787, |
| "loss": 1751.4643, |
| "step": 510 |
| }, |
| { |
| "ce_loss_10": 3.6490553617477417, |
| "ce_loss_13": 3.476555550098419, |
| "ce_loss_2": 4.994476556777954, |
| "ce_loss_3": 4.725382924079895, |
| "ce_loss_7": 4.001345467567444, |
| "epoch": 0.052, |
| "grad_norm": 808.0, |
| "kl_loss_10": 347.32325134277346, |
| "kl_loss_2": 2968.236572265625, |
| "kl_loss_3": 2495.7832275390624, |
| "kl_loss_7": 1099.3744354248047, |
| "learning_rate": 0.0009955657010501807, |
| "loss": 1740.4176, |
| "step": 520 |
| }, |
| { |
| "ce_loss_10": 3.6094146251678465, |
| "ce_loss_13": 3.4360305190086367, |
| "ce_loss_2": 4.987359571456909, |
| "ce_loss_3": 4.711909174919128, |
| "ce_loss_7": 3.96878160238266, |
| "epoch": 0.053, |
| "grad_norm": 732.0, |
| "kl_loss_10": 356.96947326660154, |
| "kl_loss_2": 3066.1166015625, |
| "kl_loss_3": 2574.2064819335938, |
| "kl_loss_7": 1113.183071899414, |
| "learning_rate": 0.000995352361806875, |
| "loss": 1757.3914, |
| "step": 530 |
| }, |
| { |
| "ce_loss_10": 3.6483884930610655, |
| "ce_loss_13": 3.4761168599128722, |
| "ce_loss_2": 5.01164448261261, |
| "ce_loss_3": 4.73403651714325, |
| "ce_loss_7": 4.005823755264283, |
| "epoch": 0.054, |
| "grad_norm": 868.0, |
| "kl_loss_10": 358.3400619506836, |
| "kl_loss_2": 3025.010693359375, |
| "kl_loss_3": 2540.0701538085937, |
| "kl_loss_7": 1117.8957305908202, |
| "learning_rate": 0.0009951340343707852, |
| "loss": 1783.3418, |
| "step": 540 |
| }, |
| { |
| "ce_loss_10": 3.693763518333435, |
| "ce_loss_13": 3.5300124645233155, |
| "ce_loss_2": 5.04529185295105, |
| "ce_loss_3": 4.776751947402954, |
| "ce_loss_7": 4.050560343265533, |
| "epoch": 0.055, |
| "grad_norm": 580.0, |
| "kl_loss_10": 343.201188659668, |
| "kl_loss_2": 2966.6511840820312, |
| "kl_loss_3": 2491.584606933594, |
| "kl_loss_7": 1070.658499145508, |
| "learning_rate": 0.0009949107209404665, |
| "loss": 1740.307, |
| "step": 550 |
| }, |
| { |
| "ce_loss_10": 3.618695652484894, |
| "ce_loss_13": 3.4460346341133117, |
| "ce_loss_2": 4.953143644332886, |
| "ce_loss_3": 4.67682032585144, |
| "ce_loss_7": 3.9601072311401366, |
| "epoch": 0.056, |
| "grad_norm": 972.0, |
| "kl_loss_10": 355.8962005615234, |
| "kl_loss_2": 2990.009143066406, |
| "kl_loss_3": 2495.9183959960938, |
| "kl_loss_7": 1092.0468170166016, |
| "learning_rate": 0.0009946824237646824, |
| "loss": 1737.0576, |
| "step": 560 |
| }, |
| { |
| "ce_loss_10": 3.5657299041748045, |
| "ce_loss_13": 3.3921077370643617, |
| "ce_loss_2": 4.9473305463790895, |
| "ce_loss_3": 4.655314612388611, |
| "ce_loss_7": 3.9485832929611204, |
| "epoch": 0.057, |
| "grad_norm": 1232.0, |
| "kl_loss_10": 368.3774078369141, |
| "kl_loss_2": 3077.5546997070314, |
| "kl_loss_3": 2563.977990722656, |
| "kl_loss_7": 1171.9384887695312, |
| "learning_rate": 0.0009944491451423828, |
| "loss": 1812.8215, |
| "step": 570 |
| }, |
| { |
| "ce_loss_10": 3.5597246408462526, |
| "ce_loss_13": 3.38997106552124, |
| "ce_loss_2": 4.957224941253662, |
| "ce_loss_3": 4.669384074211121, |
| "ce_loss_7": 3.9783090591430663, |
| "epoch": 0.058, |
| "grad_norm": 1048.0, |
| "kl_loss_10": 352.9766845703125, |
| "kl_loss_2": 3080.3538452148437, |
| "kl_loss_3": 2573.69345703125, |
| "kl_loss_7": 1221.7482543945312, |
| "learning_rate": 0.0009942108874226813, |
| "loss": 1775.8918, |
| "step": 580 |
| }, |
| { |
| "ce_loss_10": 3.667470908164978, |
| "ce_loss_13": 3.5143657088279725, |
| "ce_loss_2": 4.977405524253845, |
| "ce_loss_3": 4.70070378780365, |
| "ce_loss_7": 4.062089693546295, |
| "epoch": 0.059, |
| "grad_norm": 1160.0, |
| "kl_loss_10": 326.54786376953126, |
| "kl_loss_2": 2889.81787109375, |
| "kl_loss_3": 2394.497277832031, |
| "kl_loss_7": 1154.6502380371094, |
| "learning_rate": 0.00099396765300483, |
| "loss": 1684.8838, |
| "step": 590 |
| }, |
| { |
| "ce_loss_10": 3.65077520608902, |
| "ce_loss_13": 3.4909046292304993, |
| "ce_loss_2": 4.953103184700012, |
| "ce_loss_3": 4.675009846687317, |
| "ce_loss_7": 4.037787747383118, |
| "epoch": 0.06, |
| "grad_norm": 728.0, |
| "kl_loss_10": 333.6824432373047, |
| "kl_loss_2": 2888.043603515625, |
| "kl_loss_3": 2401.467254638672, |
| "kl_loss_7": 1146.9622497558594, |
| "learning_rate": 0.0009937194443381972, |
| "loss": 1692.9094, |
| "step": 600 |
| }, |
| { |
| "ce_loss_10": 3.6720112562179565, |
| "ce_loss_13": 3.5144667506217955, |
| "ce_loss_2": 4.945521140098572, |
| "ce_loss_3": 4.670798707008362, |
| "ce_loss_7": 4.003339779376984, |
| "epoch": 0.061, |
| "grad_norm": 728.0, |
| "kl_loss_10": 340.24414978027346, |
| "kl_loss_2": 2848.255480957031, |
| "kl_loss_3": 2358.6506469726564, |
| "kl_loss_7": 1042.5767547607422, |
| "learning_rate": 0.0009934662639222412, |
| "loss": 1695.9006, |
| "step": 610 |
| }, |
| { |
| "ce_loss_10": 3.6284273624420167, |
| "ce_loss_13": 3.466042399406433, |
| "ce_loss_2": 4.974099659919739, |
| "ce_loss_3": 4.698220872879029, |
| "ce_loss_7": 3.9703264474868774, |
| "epoch": 0.062, |
| "grad_norm": 708.0, |
| "kl_loss_10": 346.28453369140624, |
| "kl_loss_2": 2978.781689453125, |
| "kl_loss_3": 2496.677685546875, |
| "kl_loss_7": 1062.910955810547, |
| "learning_rate": 0.000993208114306486, |
| "loss": 1704.2672, |
| "step": 620 |
| }, |
| { |
| "ce_loss_10": 3.5462576508522035, |
| "ce_loss_13": 3.380946898460388, |
| "ce_loss_2": 4.922283387184143, |
| "ce_loss_3": 4.633153581619263, |
| "ce_loss_7": 3.890825295448303, |
| "epoch": 0.063, |
| "grad_norm": 924.0, |
| "kl_loss_10": 358.3551940917969, |
| "kl_loss_2": 3032.9190673828125, |
| "kl_loss_3": 2531.955603027344, |
| "kl_loss_7": 1071.5194458007813, |
| "learning_rate": 0.0009929449980904952, |
| "loss": 1693.2549, |
| "step": 630 |
| }, |
| { |
| "ce_loss_10": 3.6085665225982666, |
| "ce_loss_13": 3.444735288619995, |
| "ce_loss_2": 4.934487676620483, |
| "ce_loss_3": 4.655823493003846, |
| "ce_loss_7": 3.935356545448303, |
| "epoch": 0.064, |
| "grad_norm": 676.0, |
| "kl_loss_10": 344.3735855102539, |
| "kl_loss_2": 2962.493859863281, |
| "kl_loss_3": 2465.4102416992187, |
| "kl_loss_7": 1045.9244415283204, |
| "learning_rate": 0.0009926769179238466, |
| "loss": 1690.2553, |
| "step": 640 |
| }, |
| { |
| "ce_loss_10": 3.657176661491394, |
| "ce_loss_13": 3.4894155979156496, |
| "ce_loss_2": 4.984645247459412, |
| "ce_loss_3": 4.697536993026733, |
| "ce_loss_7": 3.984337937831879, |
| "epoch": 0.065, |
| "grad_norm": 812.0, |
| "kl_loss_10": 351.49694671630857, |
| "kl_loss_2": 2961.2925659179687, |
| "kl_loss_3": 2455.3551025390625, |
| "kl_loss_7": 1056.1930267333985, |
| "learning_rate": 0.000992403876506104, |
| "loss": 1699.9176, |
| "step": 650 |
| }, |
| { |
| "ce_loss_10": 3.5853109121322633, |
| "ce_loss_13": 3.4265636444091796, |
| "ce_loss_2": 4.949072217941284, |
| "ce_loss_3": 4.657009506225586, |
| "ce_loss_7": 3.9192400932312013, |
| "epoch": 0.066, |
| "grad_norm": 772.0, |
| "kl_loss_10": 332.7637084960937, |
| "kl_loss_2": 3005.3072998046873, |
| "kl_loss_3": 2488.590020751953, |
| "kl_loss_7": 1034.6812866210937, |
| "learning_rate": 0.0009921258765867918, |
| "loss": 1712.7359, |
| "step": 660 |
| }, |
| { |
| "ce_loss_10": 3.543238043785095, |
| "ce_loss_13": 3.392865073680878, |
| "ce_loss_2": 4.929511904716492, |
| "ce_loss_3": 4.673877739906311, |
| "ce_loss_7": 3.8979400753974915, |
| "epoch": 0.067, |
| "grad_norm": 1216.0, |
| "kl_loss_10": 326.31287689208983, |
| "kl_loss_2": 3073.3475952148438, |
| "kl_loss_3": 2606.592980957031, |
| "kl_loss_7": 1089.1828704833983, |
| "learning_rate": 0.0009918429209653662, |
| "loss": 1742.882, |
| "step": 670 |
| }, |
| { |
| "ce_loss_10": 3.60556218624115, |
| "ce_loss_13": 3.451234769821167, |
| "ce_loss_2": 4.9643912553787235, |
| "ce_loss_3": 4.685172462463379, |
| "ce_loss_7": 3.9489428043365478, |
| "epoch": 0.068, |
| "grad_norm": 700.0, |
| "kl_loss_10": 326.2720092773437, |
| "kl_loss_2": 2991.5349365234374, |
| "kl_loss_3": 2499.9562133789063, |
| "kl_loss_7": 1058.8525512695312, |
| "learning_rate": 0.0009915550124911866, |
| "loss": 1675.9207, |
| "step": 680 |
| }, |
| { |
| "ce_loss_10": 3.6152788639068603, |
| "ce_loss_13": 3.463881015777588, |
| "ce_loss_2": 4.9310142517089846, |
| "ce_loss_3": 4.651708984375, |
| "ce_loss_7": 3.939838695526123, |
| "epoch": 0.069, |
| "grad_norm": 716.0, |
| "kl_loss_10": 321.7038208007813, |
| "kl_loss_2": 2904.1300415039063, |
| "kl_loss_3": 2416.9381103515625, |
| "kl_loss_7": 1006.4677703857421, |
| "learning_rate": 0.0009912621540634887, |
| "loss": 1665.2684, |
| "step": 690 |
| }, |
| { |
| "ce_loss_10": 3.6430228471755983, |
| "ce_loss_13": 3.4952101826667787, |
| "ce_loss_2": 4.929437565803528, |
| "ce_loss_3": 4.6475961923599245, |
| "ce_loss_7": 3.9429776191711428, |
| "epoch": 0.07, |
| "grad_norm": 676.0, |
| "kl_loss_10": 309.61268615722656, |
| "kl_loss_2": 2848.814501953125, |
| "kl_loss_3": 2359.46318359375, |
| "kl_loss_7": 970.4827117919922, |
| "learning_rate": 0.0009909643486313534, |
| "loss": 1639.2395, |
| "step": 700 |
| }, |
| { |
| "ce_loss_10": 3.526335525512695, |
| "ce_loss_13": 3.3703501343727114, |
| "ce_loss_2": 4.889838075637817, |
| "ce_loss_3": 4.6030642032623295, |
| "ce_loss_7": 3.8521526575088503, |
| "epoch": 0.071, |
| "grad_norm": 744.0, |
| "kl_loss_10": 340.5390853881836, |
| "kl_loss_2": 3011.11240234375, |
| "kl_loss_3": 2515.4811889648436, |
| "kl_loss_7": 1017.710009765625, |
| "learning_rate": 0.000990661599193678, |
| "loss": 1737.2715, |
| "step": 710 |
| }, |
| { |
| "ce_loss_10": 3.6673552870750425, |
| "ce_loss_13": 3.5033403754234316, |
| "ce_loss_2": 4.93988311290741, |
| "ce_loss_3": 4.67398898601532, |
| "ce_loss_7": 3.9688475489616395, |
| "epoch": 0.072, |
| "grad_norm": 796.0, |
| "kl_loss_10": 340.83275604248047, |
| "kl_loss_2": 2865.5041381835936, |
| "kl_loss_3": 2386.658837890625, |
| "kl_loss_7": 996.4190338134765, |
| "learning_rate": 0.0009903539087991462, |
| "loss": 1651.048, |
| "step": 720 |
| }, |
| { |
| "ce_loss_10": 3.6324875712394715, |
| "ce_loss_13": 3.4752389669418333, |
| "ce_loss_2": 4.927369832992554, |
| "ce_loss_3": 4.656826686859131, |
| "ce_loss_7": 3.941362977027893, |
| "epoch": 0.073, |
| "grad_norm": 672.0, |
| "kl_loss_10": 338.573225402832, |
| "kl_loss_2": 2878.319189453125, |
| "kl_loss_3": 2403.4671142578127, |
| "kl_loss_7": 991.7197357177735, |
| "learning_rate": 0.0009900412805461966, |
| "loss": 1664.0748, |
| "step": 730 |
| }, |
| { |
| "ce_loss_10": 3.697860896587372, |
| "ce_loss_13": 3.5502901554107664, |
| "ce_loss_2": 4.959825038909912, |
| "ce_loss_3": 4.680054187774658, |
| "ce_loss_7": 4.008367860317231, |
| "epoch": 0.074, |
| "grad_norm": 796.0, |
| "kl_loss_10": 322.8813171386719, |
| "kl_loss_2": 2810.9089233398436, |
| "kl_loss_3": 2318.1740844726564, |
| "kl_loss_7": 980.3480072021484, |
| "learning_rate": 0.0009897237175829927, |
| "loss": 1630.2344, |
| "step": 740 |
| }, |
| { |
| "ce_loss_10": 3.5930413126945497, |
| "ce_loss_13": 3.43618665933609, |
| "ce_loss_2": 4.910944557189941, |
| "ce_loss_3": 4.628472471237183, |
| "ce_loss_7": 3.9170363903045655, |
| "epoch": 0.075, |
| "grad_norm": 720.0, |
| "kl_loss_10": 332.21988067626955, |
| "kl_loss_2": 2928.557727050781, |
| "kl_loss_3": 2429.9159301757813, |
| "kl_loss_7": 1037.6262634277343, |
| "learning_rate": 0.0009894012231073895, |
| "loss": 1665.4367, |
| "step": 750 |
| }, |
| { |
| "ce_loss_10": 3.6464996695518495, |
| "ce_loss_13": 3.4838218331336974, |
| "ce_loss_2": 4.924402260780335, |
| "ce_loss_3": 4.645452523231507, |
| "ce_loss_7": 3.9448330640792846, |
| "epoch": 0.076, |
| "grad_norm": 812.0, |
| "kl_loss_10": 338.6822082519531, |
| "kl_loss_2": 2855.4515014648437, |
| "kl_loss_3": 2358.476416015625, |
| "kl_loss_7": 978.1411010742188, |
| "learning_rate": 0.0009890738003669028, |
| "loss": 1654.1621, |
| "step": 760 |
| }, |
| { |
| "ce_loss_10": 3.617565965652466, |
| "ce_loss_13": 3.455268681049347, |
| "ce_loss_2": 4.933386254310608, |
| "ce_loss_3": 4.651405668258667, |
| "ce_loss_7": 3.9341206789016723, |
| "epoch": 0.077, |
| "grad_norm": 756.0, |
| "kl_loss_10": 337.93136138916014, |
| "kl_loss_2": 2949.602490234375, |
| "kl_loss_3": 2451.218469238281, |
| "kl_loss_7": 1020.4571960449218, |
| "learning_rate": 0.0009887414526586764, |
| "loss": 1640.4555, |
| "step": 770 |
| }, |
| { |
| "ce_loss_10": 3.6583608746528626, |
| "ce_loss_13": 3.512969744205475, |
| "ce_loss_2": 4.9441753149032595, |
| "ce_loss_3": 4.656214547157288, |
| "ce_loss_7": 3.964318811893463, |
| "epoch": 0.078, |
| "grad_norm": 720.0, |
| "kl_loss_10": 313.43713836669923, |
| "kl_loss_2": 2854.152880859375, |
| "kl_loss_3": 2348.5727111816404, |
| "kl_loss_7": 969.1142120361328, |
| "learning_rate": 0.0009884041833294476, |
| "loss": 1599.7842, |
| "step": 780 |
| }, |
| { |
| "ce_loss_10": 3.6560466647148133, |
| "ce_loss_13": 3.508514332771301, |
| "ce_loss_2": 4.940361285209656, |
| "ce_loss_3": 4.645708775520324, |
| "ce_loss_7": 3.958513784408569, |
| "epoch": 0.079, |
| "grad_norm": 832.0, |
| "kl_loss_10": 319.23270416259766, |
| "kl_loss_2": 2852.032861328125, |
| "kl_loss_3": 2330.51533203125, |
| "kl_loss_7": 969.9107818603516, |
| "learning_rate": 0.000988061995775515, |
| "loss": 1668.3449, |
| "step": 790 |
| }, |
| { |
| "ce_loss_10": 3.5980430364608766, |
| "ce_loss_13": 3.440366840362549, |
| "ce_loss_2": 4.8732929706573485, |
| "ce_loss_3": 4.587121820449829, |
| "ce_loss_7": 3.9043478846549986, |
| "epoch": 0.08, |
| "grad_norm": 752.0, |
| "kl_loss_10": 323.7010192871094, |
| "kl_loss_2": 2868.414514160156, |
| "kl_loss_3": 2356.581396484375, |
| "kl_loss_7": 987.0360656738281, |
| "learning_rate": 0.0009877148934427035, |
| "loss": 1633.2111, |
| "step": 800 |
| }, |
| { |
| "ce_loss_10": 3.633367455005646, |
| "ce_loss_13": 3.4834325551986693, |
| "ce_loss_2": 4.935962653160095, |
| "ce_loss_3": 4.627329421043396, |
| "ce_loss_7": 3.925890827178955, |
| "epoch": 0.081, |
| "grad_norm": 820.0, |
| "kl_loss_10": 330.4556167602539, |
| "kl_loss_2": 2885.1009033203127, |
| "kl_loss_3": 2351.8309020996094, |
| "kl_loss_7": 957.707730102539, |
| "learning_rate": 0.0009873628798263297, |
| "loss": 1611.097, |
| "step": 810 |
| }, |
| { |
| "ce_loss_10": 3.605324161052704, |
| "ce_loss_13": 3.438004171848297, |
| "ce_loss_2": 4.856884765625, |
| "ce_loss_3": 4.56407413482666, |
| "ce_loss_7": 3.8718148946762083, |
| "epoch": 0.082, |
| "grad_norm": 840.0, |
| "kl_loss_10": 339.57317504882815, |
| "kl_loss_2": 2826.0930053710936, |
| "kl_loss_3": 2305.033795166016, |
| "kl_loss_7": 931.82373046875, |
| "learning_rate": 0.0009870059584711668, |
| "loss": 1639.3607, |
| "step": 820 |
| }, |
| { |
| "ce_loss_10": 3.60188170671463, |
| "ce_loss_13": 3.455841100215912, |
| "ce_loss_2": 4.85420286655426, |
| "ce_loss_3": 4.581924772262573, |
| "ce_loss_7": 3.8951406598091127, |
| "epoch": 0.083, |
| "grad_norm": 720.0, |
| "kl_loss_10": 317.57149810791014, |
| "kl_loss_2": 2801.2140380859373, |
| "kl_loss_3": 2316.871270751953, |
| "kl_loss_7": 949.605337524414, |
| "learning_rate": 0.000986644132971409, |
| "loss": 1599.6842, |
| "step": 830 |
| }, |
| { |
| "ce_loss_10": 3.5939020037651064, |
| "ce_loss_13": 3.4429898500442504, |
| "ce_loss_2": 4.88135507106781, |
| "ce_loss_3": 4.604088640213012, |
| "ce_loss_7": 3.9158664107322694, |
| "epoch": 0.084, |
| "grad_norm": 932.0, |
| "kl_loss_10": 322.8277191162109, |
| "kl_loss_2": 2865.847692871094, |
| "kl_loss_3": 2367.4215576171873, |
| "kl_loss_7": 996.9576171875, |
| "learning_rate": 0.0009862774069706345, |
| "loss": 1629.1093, |
| "step": 840 |
| }, |
| { |
| "ce_loss_10": 3.710948944091797, |
| "ce_loss_13": 3.5685924649238587, |
| "ce_loss_2": 4.930621600151062, |
| "ce_loss_3": 4.65263340473175, |
| "ce_loss_7": 3.9990792274475098, |
| "epoch": 0.085, |
| "grad_norm": 684.0, |
| "kl_loss_10": 304.0562255859375, |
| "kl_loss_2": 2742.24169921875, |
| "kl_loss_3": 2253.91962890625, |
| "kl_loss_7": 950.4928100585937, |
| "learning_rate": 0.000985905784161771, |
| "loss": 1590.0119, |
| "step": 850 |
| }, |
| { |
| "ce_loss_10": 3.63605819940567, |
| "ce_loss_13": 3.4998138546943665, |
| "ce_loss_2": 4.900371265411377, |
| "ce_loss_3": 4.62078812122345, |
| "ce_loss_7": 3.934238409996033, |
| "epoch": 0.086, |
| "grad_norm": 748.0, |
| "kl_loss_10": 294.4667907714844, |
| "kl_loss_2": 2800.617395019531, |
| "kl_loss_3": 2314.4944458007812, |
| "kl_loss_7": 955.0795837402344, |
| "learning_rate": 0.000985529268287055, |
| "loss": 1585.186, |
| "step": 860 |
| }, |
| { |
| "ce_loss_10": 3.5651148438453673, |
| "ce_loss_13": 3.4233306527137755, |
| "ce_loss_2": 4.871410083770752, |
| "ce_loss_3": 4.5925886869430546, |
| "ce_loss_7": 3.877922761440277, |
| "epoch": 0.087, |
| "grad_norm": 796.0, |
| "kl_loss_10": 301.2444900512695, |
| "kl_loss_2": 2878.2498046875, |
| "kl_loss_3": 2387.6543212890624, |
| "kl_loss_7": 975.5103942871094, |
| "learning_rate": 0.0009851478631379982, |
| "loss": 1626.462, |
| "step": 870 |
| }, |
| { |
| "ce_loss_10": 3.6220229983329775, |
| "ce_loss_13": 3.4835654973983763, |
| "ce_loss_2": 4.903548383712769, |
| "ce_loss_3": 4.61605658531189, |
| "ce_loss_7": 3.9362378478050233, |
| "epoch": 0.088, |
| "grad_norm": 844.0, |
| "kl_loss_10": 293.3538963317871, |
| "kl_loss_2": 2833.7354125976562, |
| "kl_loss_3": 2335.5184326171875, |
| "kl_loss_7": 967.1238098144531, |
| "learning_rate": 0.0009847615725553456, |
| "loss": 1597.0803, |
| "step": 880 |
| }, |
| { |
| "ce_loss_10": 3.671082556247711, |
| "ce_loss_13": 3.542756676673889, |
| "ce_loss_2": 4.8840786695480345, |
| "ce_loss_3": 4.608758640289307, |
| "ce_loss_7": 3.9651415824890135, |
| "epoch": 0.089, |
| "grad_norm": 676.0, |
| "kl_loss_10": 274.7398094177246, |
| "kl_loss_2": 2672.2400390625, |
| "kl_loss_3": 2185.940838623047, |
| "kl_loss_7": 914.7755340576172, |
| "learning_rate": 0.0009843704004290394, |
| "loss": 1572.2007, |
| "step": 890 |
| }, |
| { |
| "ce_loss_10": 3.5845912218093874, |
| "ce_loss_13": 3.4463690519332886, |
| "ce_loss_2": 4.845745182037353, |
| "ce_loss_3": 4.566518807411194, |
| "ce_loss_7": 3.8977394104003906, |
| "epoch": 0.09, |
| "grad_norm": 800.0, |
| "kl_loss_10": 293.04640731811526, |
| "kl_loss_2": 2812.2204833984374, |
| "kl_loss_3": 2313.156042480469, |
| "kl_loss_7": 966.190869140625, |
| "learning_rate": 0.0009839743506981783, |
| "loss": 1597.2805, |
| "step": 900 |
| }, |
| { |
| "ce_loss_10": 3.5071211099624633, |
| "ce_loss_13": 3.369294321537018, |
| "ce_loss_2": 4.836311769485474, |
| "ce_loss_3": 4.550878620147705, |
| "ce_loss_7": 3.8309507608413695, |
| "epoch": 0.091, |
| "grad_norm": 716.0, |
| "kl_loss_10": 298.81206665039065, |
| "kl_loss_2": 2958.2573974609377, |
| "kl_loss_3": 2443.9187561035155, |
| "kl_loss_7": 1005.0242462158203, |
| "learning_rate": 0.0009835734273509786, |
| "loss": 1627.2797, |
| "step": 910 |
| }, |
| { |
| "ce_loss_10": 3.6050177574157716, |
| "ce_loss_13": 3.4665517807006836, |
| "ce_loss_2": 4.881958699226379, |
| "ce_loss_3": 4.6013915777206424, |
| "ce_loss_7": 3.9145362257957457, |
| "epoch": 0.092, |
| "grad_norm": 720.0, |
| "kl_loss_10": 288.0885604858398, |
| "kl_loss_2": 2799.756945800781, |
| "kl_loss_3": 2307.6742553710938, |
| "kl_loss_7": 959.5810729980469, |
| "learning_rate": 0.0009831676344247342, |
| "loss": 1585.5819, |
| "step": 920 |
| }, |
| { |
| "ce_loss_10": 3.615782046318054, |
| "ce_loss_13": 3.484424388408661, |
| "ce_loss_2": 4.840068244934082, |
| "ce_loss_3": 4.566077804565429, |
| "ce_loss_7": 3.905368459224701, |
| "epoch": 0.093, |
| "grad_norm": 592.0, |
| "kl_loss_10": 284.13806304931643, |
| "kl_loss_2": 2716.098291015625, |
| "kl_loss_3": 2237.568524169922, |
| "kl_loss_7": 925.932373046875, |
| "learning_rate": 0.0009827569760057755, |
| "loss": 1574.975, |
| "step": 930 |
| }, |
| { |
| "ce_loss_10": 3.5478480219841004, |
| "ce_loss_13": 3.4008304595947267, |
| "ce_loss_2": 4.878832292556763, |
| "ce_loss_3": 4.597835183143616, |
| "ce_loss_7": 3.860486149787903, |
| "epoch": 0.094, |
| "grad_norm": 812.0, |
| "kl_loss_10": 311.2947525024414, |
| "kl_loss_2": 2955.33916015625, |
| "kl_loss_3": 2458.781884765625, |
| "kl_loss_7": 985.0075500488281, |
| "learning_rate": 0.000982341456229428, |
| "loss": 1619.0104, |
| "step": 940 |
| }, |
| { |
| "ce_loss_10": 3.6401113510131835, |
| "ce_loss_13": 3.4997127175331117, |
| "ce_loss_2": 4.909311819076538, |
| "ce_loss_3": 4.633120918273926, |
| "ce_loss_7": 3.936661887168884, |
| "epoch": 0.095, |
| "grad_norm": 768.0, |
| "kl_loss_10": 304.94605484008787, |
| "kl_loss_2": 2847.3047485351562, |
| "kl_loss_3": 2358.746990966797, |
| "kl_loss_7": 958.3424041748046, |
| "learning_rate": 0.000981921079279971, |
| "loss": 1575.8767, |
| "step": 950 |
| }, |
| { |
| "ce_loss_10": 3.6493973970413207, |
| "ce_loss_13": 3.5170445680618285, |
| "ce_loss_2": 4.842743754386902, |
| "ce_loss_3": 4.559553527832032, |
| "ce_loss_7": 3.913013446331024, |
| "epoch": 0.096, |
| "grad_norm": 632.0, |
| "kl_loss_10": 287.80171127319335, |
| "kl_loss_2": 2681.031005859375, |
| "kl_loss_3": 2186.1464904785157, |
| "kl_loss_7": 891.4322113037109, |
| "learning_rate": 0.0009814958493905962, |
| "loss": 1541.8673, |
| "step": 960 |
| }, |
| { |
| "ce_loss_10": 3.6059035897254943, |
| "ce_loss_13": 3.464053213596344, |
| "ce_loss_2": 4.885409092903137, |
| "ce_loss_3": 4.605575942993164, |
| "ce_loss_7": 3.901495134830475, |
| "epoch": 0.097, |
| "grad_norm": 644.0, |
| "kl_loss_10": 302.9938400268555, |
| "kl_loss_2": 2842.060888671875, |
| "kl_loss_3": 2348.8412109375, |
| "kl_loss_7": 943.344677734375, |
| "learning_rate": 0.0009810657708433637, |
| "loss": 1620.3537, |
| "step": 970 |
| }, |
| { |
| "ce_loss_10": 3.6700100898742676, |
| "ce_loss_13": 3.538521420955658, |
| "ce_loss_2": 4.868229222297669, |
| "ce_loss_3": 4.590689539909363, |
| "ce_loss_7": 3.9474687933921815, |
| "epoch": 0.098, |
| "grad_norm": 808.0, |
| "kl_loss_10": 283.2241409301758, |
| "kl_loss_2": 2674.522265625, |
| "kl_loss_3": 2192.326556396484, |
| "kl_loss_7": 894.1458190917969, |
| "learning_rate": 0.0009806308479691594, |
| "loss": 1528.2636, |
| "step": 980 |
| }, |
| { |
| "ce_loss_10": 3.691223752498627, |
| "ce_loss_13": 3.55548814535141, |
| "ce_loss_2": 4.925488543510437, |
| "ce_loss_3": 4.648779034614563, |
| "ce_loss_7": 3.9924039959907534, |
| "epoch": 0.099, |
| "grad_norm": 740.0, |
| "kl_loss_10": 294.3150146484375, |
| "kl_loss_2": 2748.0041381835936, |
| "kl_loss_3": 2268.979638671875, |
| "kl_loss_7": 946.8526397705078, |
| "learning_rate": 0.0009801910851476522, |
| "loss": 1554.0744, |
| "step": 990 |
| }, |
| { |
| "ce_loss_10": 3.6008501768112184, |
| "ce_loss_13": 3.465990114212036, |
| "ce_loss_2": 4.890150642395019, |
| "ce_loss_3": 4.609904193878174, |
| "ce_loss_7": 3.9068346500396727, |
| "epoch": 0.1, |
| "grad_norm": 736.0, |
| "kl_loss_10": 294.7660331726074, |
| "kl_loss_2": 2875.2068603515627, |
| "kl_loss_3": 2379.5891052246093, |
| "kl_loss_7": 970.1351318359375, |
| "learning_rate": 0.0009797464868072487, |
| "loss": 1582.4648, |
| "step": 1000 |
| }, |
| { |
| "ce_loss_10": 3.5892885446548464, |
| "ce_loss_13": 3.454503262042999, |
| "ce_loss_2": 4.837452292442322, |
| "ce_loss_3": 4.55982882976532, |
| "ce_loss_7": 3.887318527698517, |
| "epoch": 0.101, |
| "grad_norm": 724.0, |
| "kl_loss_10": 288.82502670288085, |
| "kl_loss_2": 2762.65830078125, |
| "kl_loss_3": 2282.756170654297, |
| "kl_loss_7": 944.8302276611328, |
| "learning_rate": 0.0009792970574250492, |
| "loss": 1564.9662, |
| "step": 1010 |
| }, |
| { |
| "ce_loss_10": 3.6221608400344847, |
| "ce_loss_13": 3.482994794845581, |
| "ce_loss_2": 4.848793458938599, |
| "ce_loss_3": 4.575083756446839, |
| "ce_loss_7": 3.914657413959503, |
| "epoch": 0.102, |
| "grad_norm": 612.0, |
| "kl_loss_10": 290.8812942504883, |
| "kl_loss_2": 2743.8400146484373, |
| "kl_loss_3": 2261.9089599609374, |
| "kl_loss_7": 937.5250091552734, |
| "learning_rate": 0.0009788428015268028, |
| "loss": 1536.8119, |
| "step": 1020 |
| }, |
| { |
| "ce_loss_10": 3.6110181331634523, |
| "ce_loss_13": 3.47798638343811, |
| "ce_loss_2": 4.840990829467773, |
| "ce_loss_3": 4.55189163684845, |
| "ce_loss_7": 3.9010056853294373, |
| "epoch": 0.103, |
| "grad_norm": 616.0, |
| "kl_loss_10": 281.37939529418946, |
| "kl_loss_2": 2739.4623291015623, |
| "kl_loss_3": 2238.093048095703, |
| "kl_loss_7": 923.4858306884765, |
| "learning_rate": 0.0009783837236868609, |
| "loss": 1534.7721, |
| "step": 1030 |
| }, |
| { |
| "ce_loss_10": 3.5802615523338317, |
| "ce_loss_13": 3.4459127306938173, |
| "ce_loss_2": 4.818247056007385, |
| "ce_loss_3": 4.546270060539245, |
| "ce_loss_7": 3.8740112662315367, |
| "epoch": 0.104, |
| "grad_norm": 696.0, |
| "kl_loss_10": 281.4418014526367, |
| "kl_loss_2": 2719.910290527344, |
| "kl_loss_3": 2248.530157470703, |
| "kl_loss_7": 921.926953125, |
| "learning_rate": 0.0009779198285281327, |
| "loss": 1537.119, |
| "step": 1040 |
| }, |
| { |
| "ce_loss_10": 3.577412283420563, |
| "ce_loss_13": 3.4400023460388183, |
| "ce_loss_2": 4.825755000114441, |
| "ce_loss_3": 4.554906344413757, |
| "ce_loss_7": 3.8695693135261537, |
| "epoch": 0.105, |
| "grad_norm": 784.0, |
| "kl_loss_10": 293.84764709472654, |
| "kl_loss_2": 2770.2111328125, |
| "kl_loss_3": 2280.982073974609, |
| "kl_loss_7": 916.6518432617188, |
| "learning_rate": 0.0009774511207220368, |
| "loss": 1562.095, |
| "step": 1050 |
| }, |
| { |
| "ce_loss_10": 3.621231746673584, |
| "ce_loss_13": 3.4823400259017943, |
| "ce_loss_2": 4.867471241950989, |
| "ce_loss_3": 4.584862947463989, |
| "ce_loss_7": 3.895237350463867, |
| "epoch": 0.106, |
| "grad_norm": 588.0, |
| "kl_loss_10": 306.07321014404295, |
| "kl_loss_2": 2785.361218261719, |
| "kl_loss_3": 2286.776574707031, |
| "kl_loss_7": 918.4756744384765, |
| "learning_rate": 0.0009769776049884564, |
| "loss": 1554.5619, |
| "step": 1060 |
| }, |
| { |
| "ce_loss_10": 3.5330086588859557, |
| "ce_loss_13": 3.387469935417175, |
| "ce_loss_2": 4.804182314872742, |
| "ce_loss_3": 4.539949297904968, |
| "ce_loss_7": 3.8264609456062315, |
| "epoch": 0.107, |
| "grad_norm": 1184.0, |
| "kl_loss_10": 307.66697082519534, |
| "kl_loss_2": 2836.2517578125, |
| "kl_loss_3": 2373.5376220703124, |
| "kl_loss_7": 943.6192169189453, |
| "learning_rate": 0.0009764992860956889, |
| "loss": 1622.7785, |
| "step": 1070 |
| }, |
| { |
| "ce_loss_10": 3.677293050289154, |
| "ce_loss_13": 3.5469510316848756, |
| "ce_loss_2": 4.837077927589417, |
| "ce_loss_3": 4.588571333885193, |
| "ce_loss_7": 3.9465363740921022, |
| "epoch": 0.108, |
| "grad_norm": 816.0, |
| "kl_loss_10": 286.8066802978516, |
| "kl_loss_2": 2605.4248657226562, |
| "kl_loss_3": 2175.9279296875, |
| "kl_loss_7": 899.353060913086, |
| "learning_rate": 0.0009760161688604008, |
| "loss": 1520.9383, |
| "step": 1080 |
| }, |
| { |
| "ce_loss_10": 3.6768419981002807, |
| "ce_loss_13": 3.54748477935791, |
| "ce_loss_2": 4.881722617149353, |
| "ce_loss_3": 4.620517659187317, |
| "ce_loss_7": 3.9953080892562864, |
| "epoch": 0.109, |
| "grad_norm": 840.0, |
| "kl_loss_10": 283.82303619384766, |
| "kl_loss_2": 2660.0453125, |
| "kl_loss_3": 2210.3756591796873, |
| "kl_loss_7": 954.3282287597656, |
| "learning_rate": 0.0009755282581475768, |
| "loss": 1552.3523, |
| "step": 1090 |
| }, |
| { |
| "ce_loss_10": 3.742873156070709, |
| "ce_loss_13": 3.60170716047287, |
| "ce_loss_2": 4.9219562292099, |
| "ce_loss_3": 4.631097722053528, |
| "ce_loss_7": 4.016275346279144, |
| "epoch": 0.11, |
| "grad_norm": 792.0, |
| "kl_loss_10": 295.95645599365236, |
| "kl_loss_2": 2660.5046997070312, |
| "kl_loss_3": 2150.144982910156, |
| "kl_loss_7": 938.1217224121094, |
| "learning_rate": 0.0009750355588704727, |
| "loss": 1496.9391, |
| "step": 1100 |
| }, |
| { |
| "ce_loss_10": 3.5732216477394103, |
| "ce_loss_13": 3.427997899055481, |
| "ce_loss_2": 4.788290286064148, |
| "ce_loss_3": 4.501250839233398, |
| "ce_loss_7": 3.849083948135376, |
| "epoch": 0.111, |
| "grad_norm": 644.0, |
| "kl_loss_10": 301.9219177246094, |
| "kl_loss_2": 2692.5292846679686, |
| "kl_loss_3": 2192.219659423828, |
| "kl_loss_7": 902.1104858398437, |
| "learning_rate": 0.0009745380759905647, |
| "loss": 1547.9881, |
| "step": 1110 |
| }, |
| { |
| "ce_loss_10": 3.525436317920685, |
| "ce_loss_13": 3.388839864730835, |
| "ce_loss_2": 4.766349339485169, |
| "ce_loss_3": 4.478921818733215, |
| "ce_loss_7": 3.8117297768592833, |
| "epoch": 0.112, |
| "grad_norm": 636.0, |
| "kl_loss_10": 288.7658378601074, |
| "kl_loss_2": 2767.7005126953127, |
| "kl_loss_3": 2266.3693115234373, |
| "kl_loss_7": 916.3693817138671, |
| "learning_rate": 0.0009740358145174998, |
| "loss": 1582.2694, |
| "step": 1120 |
| }, |
| { |
| "ce_loss_10": 3.674707901477814, |
| "ce_loss_13": 3.541641688346863, |
| "ce_loss_2": 4.839509201049805, |
| "ce_loss_3": 4.554335117340088, |
| "ce_loss_7": 3.9309728384017943, |
| "epoch": 0.113, |
| "grad_norm": 740.0, |
| "kl_loss_10": 293.9353363037109, |
| "kl_loss_2": 2627.9318603515626, |
| "kl_loss_3": 2118.4319458007812, |
| "kl_loss_7": 883.5943176269532, |
| "learning_rate": 0.0009735287795090455, |
| "loss": 1505.1257, |
| "step": 1130 |
| }, |
| { |
| "ce_loss_10": 3.5646776437759398, |
| "ce_loss_13": 3.4284933686256407, |
| "ce_loss_2": 4.8010115146636965, |
| "ce_loss_3": 4.510753107070923, |
| "ce_loss_7": 3.839044988155365, |
| "epoch": 0.114, |
| "grad_norm": 692.0, |
| "kl_loss_10": 289.08748931884764, |
| "kl_loss_2": 2724.9734130859374, |
| "kl_loss_3": 2216.0496459960937, |
| "kl_loss_7": 891.9239013671875, |
| "learning_rate": 0.0009730169760710386, |
| "loss": 1526.1704, |
| "step": 1140 |
| }, |
| { |
| "ce_loss_10": 3.647395300865173, |
| "ce_loss_13": 3.51713547706604, |
| "ce_loss_2": 4.854923152923584, |
| "ce_loss_3": 4.577234363555908, |
| "ce_loss_7": 3.928617572784424, |
| "epoch": 0.115, |
| "grad_norm": 800.0, |
| "kl_loss_10": 280.8671928405762, |
| "kl_loss_2": 2669.748742675781, |
| "kl_loss_3": 2182.849468994141, |
| "kl_loss_7": 895.4142913818359, |
| "learning_rate": 0.0009725004093573342, |
| "loss": 1526.191, |
| "step": 1150 |
| }, |
| { |
| "ce_loss_10": 3.5862129092216493, |
| "ce_loss_13": 3.4506229400634765, |
| "ce_loss_2": 4.798703122138977, |
| "ce_loss_3": 4.520225930213928, |
| "ce_loss_7": 3.877876043319702, |
| "epoch": 0.116, |
| "grad_norm": 840.0, |
| "kl_loss_10": 283.1919075012207, |
| "kl_loss_2": 2672.7715087890624, |
| "kl_loss_3": 2193.748876953125, |
| "kl_loss_7": 903.5404602050781, |
| "learning_rate": 0.0009719790845697534, |
| "loss": 1504.2701, |
| "step": 1160 |
| }, |
| { |
| "ce_loss_10": 3.5309566259384155, |
| "ce_loss_13": 3.4061906576156615, |
| "ce_loss_2": 4.704360723495483, |
| "ce_loss_3": 4.450148797035217, |
| "ce_loss_7": 3.803053593635559, |
| "epoch": 0.117, |
| "grad_norm": 696.0, |
| "kl_loss_10": 271.77204208374025, |
| "kl_loss_2": 2620.2907592773436, |
| "kl_loss_3": 2176.4858520507814, |
| "kl_loss_7": 863.540869140625, |
| "learning_rate": 0.0009714530069580309, |
| "loss": 1485.2044, |
| "step": 1170 |
| }, |
| { |
| "ce_loss_10": 3.640796720981598, |
| "ce_loss_13": 3.507249903678894, |
| "ce_loss_2": 4.853176116943359, |
| "ce_loss_3": 4.5715264797210695, |
| "ce_loss_7": 3.914932680130005, |
| "epoch": 0.118, |
| "grad_norm": 716.0, |
| "kl_loss_10": 285.63293685913084, |
| "kl_loss_2": 2675.1877319335936, |
| "kl_loss_3": 2189.7057678222654, |
| "kl_loss_7": 884.98447265625, |
| "learning_rate": 0.0009709221818197624, |
| "loss": 1502.0164, |
| "step": 1180 |
| }, |
| { |
| "ce_loss_10": 3.6675962805747986, |
| "ce_loss_13": 3.534939968585968, |
| "ce_loss_2": 4.88215401172638, |
| "ce_loss_3": 4.607950353622437, |
| "ce_loss_7": 3.9379210352897642, |
| "epoch": 0.119, |
| "grad_norm": 596.0, |
| "kl_loss_10": 288.61556854248045, |
| "kl_loss_2": 2711.667822265625, |
| "kl_loss_3": 2227.545977783203, |
| "kl_loss_7": 887.7295013427735, |
| "learning_rate": 0.0009703866145003512, |
| "loss": 1525.4232, |
| "step": 1190 |
| }, |
| { |
| "ce_loss_10": 3.6349379420280457, |
| "ce_loss_13": 3.5029913663864134, |
| "ce_loss_2": 4.829423713684082, |
| "ce_loss_3": 4.558488368988037, |
| "ce_loss_7": 3.908590841293335, |
| "epoch": 0.12, |
| "grad_norm": 660.0, |
| "kl_loss_10": 279.50138397216796, |
| "kl_loss_2": 2676.350244140625, |
| "kl_loss_3": 2191.2725830078125, |
| "kl_loss_7": 882.7566497802734, |
| "learning_rate": 0.0009698463103929542, |
| "loss": 1529.4317, |
| "step": 1200 |
| }, |
| { |
| "ce_loss_10": 3.605515944957733, |
| "ce_loss_13": 3.472998011112213, |
| "ce_loss_2": 4.827000212669373, |
| "ce_loss_3": 4.540698933601379, |
| "ce_loss_7": 3.879436028003693, |
| "epoch": 0.121, |
| "grad_norm": 652.0, |
| "kl_loss_10": 281.2242576599121, |
| "kl_loss_2": 2695.142529296875, |
| "kl_loss_3": 2191.8710388183595, |
| "kl_loss_7": 882.5638031005859, |
| "learning_rate": 0.0009693012749384279, |
| "loss": 1527.1828, |
| "step": 1210 |
| }, |
| { |
| "ce_loss_10": 3.617890453338623, |
| "ce_loss_13": 3.4903839349746706, |
| "ce_loss_2": 4.823957228660584, |
| "ce_loss_3": 4.546852803230285, |
| "ce_loss_7": 3.8918931126594543, |
| "epoch": 0.122, |
| "grad_norm": 596.0, |
| "kl_loss_10": 274.6055084228516, |
| "kl_loss_2": 2677.51435546875, |
| "kl_loss_3": 2182.2475463867186, |
| "kl_loss_7": 884.2765747070313, |
| "learning_rate": 0.0009687515136252732, |
| "loss": 1502.8832, |
| "step": 1220 |
| }, |
| { |
| "ce_loss_10": 3.571158289909363, |
| "ce_loss_13": 3.4428164839744566, |
| "ce_loss_2": 4.832195687294006, |
| "ce_loss_3": 4.558122348785401, |
| "ce_loss_7": 3.866991031169891, |
| "epoch": 0.123, |
| "grad_norm": 656.0, |
| "kl_loss_10": 285.63698654174806, |
| "kl_loss_2": 2814.325549316406, |
| "kl_loss_3": 2321.4359924316404, |
| "kl_loss_7": 924.2180969238282, |
| "learning_rate": 0.0009681970319895803, |
| "loss": 1610.0467, |
| "step": 1230 |
| }, |
| { |
| "ce_loss_10": 3.6617783904075623, |
| "ce_loss_13": 3.5239094376564024, |
| "ce_loss_2": 4.840570569038391, |
| "ce_loss_3": 4.5621686458587645, |
| "ce_loss_7": 3.9261529445648193, |
| "epoch": 0.124, |
| "grad_norm": 660.0, |
| "kl_loss_10": 282.9206481933594, |
| "kl_loss_2": 2658.744873046875, |
| "kl_loss_3": 2162.91650390625, |
| "kl_loss_7": 894.8567260742187, |
| "learning_rate": 0.0009676378356149733, |
| "loss": 1510.0703, |
| "step": 1240 |
| }, |
| { |
| "ce_loss_10": 3.632222390174866, |
| "ce_loss_13": 3.49722797870636, |
| "ce_loss_2": 4.803181719779968, |
| "ce_loss_3": 4.527125644683838, |
| "ce_loss_7": 3.893145501613617, |
| "epoch": 0.125, |
| "grad_norm": 676.0, |
| "kl_loss_10": 306.93408966064453, |
| "kl_loss_2": 2618.3517578125, |
| "kl_loss_3": 2135.0671936035155, |
| "kl_loss_7": 870.7611785888672, |
| "learning_rate": 0.0009670739301325534, |
| "loss": 1495.915, |
| "step": 1250 |
| }, |
| { |
| "ce_loss_10": 3.5965846180915833, |
| "ce_loss_13": 3.461331534385681, |
| "ce_loss_2": 4.77229871749878, |
| "ce_loss_3": 4.488967990875244, |
| "ce_loss_7": 3.870732378959656, |
| "epoch": 0.126, |
| "grad_norm": 824.0, |
| "kl_loss_10": 288.87402572631834, |
| "kl_loss_2": 2631.6656005859377, |
| "kl_loss_3": 2132.2338745117186, |
| "kl_loss_7": 890.5492980957031, |
| "learning_rate": 0.0009665053212208426, |
| "loss": 1507.3391, |
| "step": 1260 |
| }, |
| { |
| "ce_loss_10": 3.6325414419174193, |
| "ce_loss_13": 3.5006507635116577, |
| "ce_loss_2": 4.82985291481018, |
| "ce_loss_3": 4.53967547416687, |
| "ce_loss_7": 3.907087206840515, |
| "epoch": 0.127, |
| "grad_norm": 824.0, |
| "kl_loss_10": 289.66627197265626, |
| "kl_loss_2": 2682.7635498046875, |
| "kl_loss_3": 2171.759143066406, |
| "kl_loss_7": 897.6279174804688, |
| "learning_rate": 0.0009659320146057262, |
| "loss": 1515.1299, |
| "step": 1270 |
| }, |
| { |
| "ce_loss_10": 3.6294240951538086, |
| "ce_loss_13": 3.5012729167938232, |
| "ce_loss_2": 4.802068519592285, |
| "ce_loss_3": 4.516877055168152, |
| "ce_loss_7": 3.912596344947815, |
| "epoch": 0.128, |
| "grad_norm": 1040.0, |
| "kl_loss_10": 274.3899444580078, |
| "kl_loss_2": 2616.2175048828126, |
| "kl_loss_3": 2113.874139404297, |
| "kl_loss_7": 894.8648956298828, |
| "learning_rate": 0.0009653540160603955, |
| "loss": 1485.5743, |
| "step": 1280 |
| }, |
| { |
| "ce_loss_10": 3.631951367855072, |
| "ce_loss_13": 3.5082743883132936, |
| "ce_loss_2": 4.7942791938781735, |
| "ce_loss_3": 4.533441662788391, |
| "ce_loss_7": 3.911020016670227, |
| "epoch": 0.129, |
| "grad_norm": 980.0, |
| "kl_loss_10": 277.70714950561523, |
| "kl_loss_2": 2607.6315795898436, |
| "kl_loss_3": 2154.8384338378905, |
| "kl_loss_7": 902.8254302978515, |
| "learning_rate": 0.0009647713314052896, |
| "loss": 1475.7309, |
| "step": 1290 |
| }, |
| { |
| "ce_loss_10": 3.5910762190818786, |
| "ce_loss_13": 3.4583710193634034, |
| "ce_loss_2": 4.806964182853699, |
| "ce_loss_3": 4.536605000495911, |
| "ce_loss_7": 3.892735993862152, |
| "epoch": 0.13, |
| "grad_norm": 1032.0, |
| "kl_loss_10": 281.282731628418, |
| "kl_loss_2": 2713.5140380859375, |
| "kl_loss_3": 2245.954937744141, |
| "kl_loss_7": 924.882958984375, |
| "learning_rate": 0.0009641839665080363, |
| "loss": 1529.1627, |
| "step": 1300 |
| }, |
| { |
| "ce_loss_10": 3.5369811177253725, |
| "ce_loss_13": 3.4184723615646364, |
| "ce_loss_2": 4.746987700462341, |
| "ce_loss_3": 4.47196786403656, |
| "ce_loss_7": 3.8142111539840697, |
| "epoch": 0.131, |
| "grad_norm": 708.0, |
| "kl_loss_10": 267.0766883850098, |
| "kl_loss_2": 2651.264123535156, |
| "kl_loss_3": 2169.8530151367186, |
| "kl_loss_7": 874.0395812988281, |
| "learning_rate": 0.0009635919272833937, |
| "loss": 1472.4912, |
| "step": 1310 |
| }, |
| { |
| "ce_loss_10": 3.582905340194702, |
| "ce_loss_13": 3.4547547817230226, |
| "ce_loss_2": 4.782030344009399, |
| "ce_loss_3": 4.50511953830719, |
| "ce_loss_7": 3.8575597286224363, |
| "epoch": 0.132, |
| "grad_norm": 640.0, |
| "kl_loss_10": 274.49700088500975, |
| "kl_loss_2": 2645.7089721679686, |
| "kl_loss_3": 2148.3076110839843, |
| "kl_loss_7": 865.2912628173829, |
| "learning_rate": 0.0009629952196931902, |
| "loss": 1461.5725, |
| "step": 1320 |
| }, |
| { |
| "ce_loss_10": 3.560918188095093, |
| "ce_loss_13": 3.4357552766799926, |
| "ce_loss_2": 4.777603983879089, |
| "ce_loss_3": 4.497129726409912, |
| "ce_loss_7": 3.8250754475593567, |
| "epoch": 0.133, |
| "grad_norm": 612.0, |
| "kl_loss_10": 266.5273551940918, |
| "kl_loss_2": 2692.33935546875, |
| "kl_loss_3": 2197.4263916015625, |
| "kl_loss_7": 846.3100128173828, |
| "learning_rate": 0.0009623938497462645, |
| "loss": 1482.4779, |
| "step": 1330 |
| }, |
| { |
| "ce_loss_10": 3.559932196140289, |
| "ce_loss_13": 3.4353162169456484, |
| "ce_loss_2": 4.754807543754578, |
| "ce_loss_3": 4.478498530387879, |
| "ce_loss_7": 3.8313623666763306, |
| "epoch": 0.134, |
| "grad_norm": 564.0, |
| "kl_loss_10": 268.2800895690918, |
| "kl_loss_2": 2653.6271240234373, |
| "kl_loss_3": 2162.7194641113283, |
| "kl_loss_7": 859.2419372558594, |
| "learning_rate": 0.0009617878234984055, |
| "loss": 1499.2066, |
| "step": 1340 |
| }, |
| { |
| "ce_loss_10": 3.651080513000488, |
| "ce_loss_13": 3.533881187438965, |
| "ce_loss_2": 4.8088576078414915, |
| "ce_loss_3": 4.535065650939941, |
| "ce_loss_7": 3.9042758703231812, |
| "epoch": 0.135, |
| "grad_norm": 712.0, |
| "kl_loss_10": 256.59825744628904, |
| "kl_loss_2": 2581.625207519531, |
| "kl_loss_3": 2098.4682495117186, |
| "kl_loss_7": 828.9938018798828, |
| "learning_rate": 0.0009611771470522907, |
| "loss": 1464.5767, |
| "step": 1350 |
| }, |
| { |
| "ce_loss_10": 3.5779558777809144, |
| "ce_loss_13": 3.457493555545807, |
| "ce_loss_2": 4.792022109031677, |
| "ce_loss_3": 4.514930057525635, |
| "ce_loss_7": 3.8448525190353395, |
| "epoch": 0.136, |
| "grad_norm": 616.0, |
| "kl_loss_10": 259.41123428344724, |
| "kl_loss_2": 2657.6331420898437, |
| "kl_loss_3": 2171.1466857910154, |
| "kl_loss_7": 847.0537750244141, |
| "learning_rate": 0.0009605618265574251, |
| "loss": 1459.6229, |
| "step": 1360 |
| }, |
| { |
| "ce_loss_10": 3.5429495334625245, |
| "ce_loss_13": 3.4162652492523193, |
| "ce_loss_2": 4.794952082633972, |
| "ce_loss_3": 4.535301685333252, |
| "ce_loss_7": 3.8165592908859254, |
| "epoch": 0.137, |
| "grad_norm": 620.0, |
| "kl_loss_10": 271.0598449707031, |
| "kl_loss_2": 2776.145849609375, |
| "kl_loss_3": 2325.675885009766, |
| "kl_loss_7": 881.587744140625, |
| "learning_rate": 0.0009599418682100792, |
| "loss": 1522.4414, |
| "step": 1370 |
| }, |
| { |
| "ce_loss_10": 3.58179566860199, |
| "ce_loss_13": 3.459395945072174, |
| "ce_loss_2": 4.792193937301636, |
| "ce_loss_3": 4.521099305152893, |
| "ce_loss_7": 3.84169602394104, |
| "epoch": 0.138, |
| "grad_norm": 724.0, |
| "kl_loss_10": 257.83258666992185, |
| "kl_loss_2": 2672.4068237304687, |
| "kl_loss_3": 2198.559918212891, |
| "kl_loss_7": 850.8091857910156, |
| "learning_rate": 0.0009593172782532268, |
| "loss": 1496.2724, |
| "step": 1380 |
| }, |
| { |
| "ce_loss_10": 3.622367191314697, |
| "ce_loss_13": 3.506042146682739, |
| "ce_loss_2": 4.801430583000183, |
| "ce_loss_3": 4.530508184432984, |
| "ce_loss_7": 3.888216722011566, |
| "epoch": 0.139, |
| "grad_norm": 672.0, |
| "kl_loss_10": 260.9531532287598, |
| "kl_loss_2": 2599.6354858398436, |
| "kl_loss_3": 2121.937152099609, |
| "kl_loss_7": 852.8548278808594, |
| "learning_rate": 0.0009586880629764817, |
| "loss": 1464.8023, |
| "step": 1390 |
| }, |
| { |
| "ce_loss_10": 3.546726655960083, |
| "ce_loss_13": 3.428490459918976, |
| "ce_loss_2": 4.748290467262268, |
| "ce_loss_3": 4.471861267089844, |
| "ce_loss_7": 3.824984240531921, |
| "epoch": 0.14, |
| "grad_norm": 620.0, |
| "kl_loss_10": 260.18620986938475, |
| "kl_loss_2": 2649.2240234375, |
| "kl_loss_3": 2164.870428466797, |
| "kl_loss_7": 870.0703582763672, |
| "learning_rate": 0.0009580542287160348, |
| "loss": 1462.9275, |
| "step": 1400 |
| }, |
| { |
| "ce_loss_10": 3.5134201645851135, |
| "ce_loss_13": 3.396924638748169, |
| "ce_loss_2": 4.727832221984864, |
| "ce_loss_3": 4.457144689559937, |
| "ce_loss_7": 3.781324291229248, |
| "epoch": 0.141, |
| "grad_norm": 724.0, |
| "kl_loss_10": 257.8106407165527, |
| "kl_loss_2": 2672.565283203125, |
| "kl_loss_3": 2194.398052978516, |
| "kl_loss_7": 841.9467041015625, |
| "learning_rate": 0.0009574157818545901, |
| "loss": 1469.0121, |
| "step": 1410 |
| }, |
| { |
| "ce_loss_10": 3.583372378349304, |
| "ce_loss_13": 3.4670314311981203, |
| "ce_loss_2": 4.753075981140137, |
| "ce_loss_3": 4.488786149024963, |
| "ce_loss_7": 3.8414045095443727, |
| "epoch": 0.142, |
| "grad_norm": 768.0, |
| "kl_loss_10": 250.4652572631836, |
| "kl_loss_2": 2575.260546875, |
| "kl_loss_3": 2109.250030517578, |
| "kl_loss_7": 815.4136535644532, |
| "learning_rate": 0.0009567727288213005, |
| "loss": 1470.4241, |
| "step": 1420 |
| }, |
| { |
| "ce_loss_10": 3.5615610837936402, |
| "ce_loss_13": 3.4428680539131165, |
| "ce_loss_2": 4.766120481491089, |
| "ce_loss_3": 4.489290237426758, |
| "ce_loss_7": 3.8387726664543154, |
| "epoch": 0.143, |
| "grad_norm": 680.0, |
| "kl_loss_10": 259.5032684326172, |
| "kl_loss_2": 2652.6231079101562, |
| "kl_loss_3": 2168.8318054199217, |
| "kl_loss_7": 872.5292297363281, |
| "learning_rate": 0.0009561250760917027, |
| "loss": 1465.2545, |
| "step": 1430 |
| }, |
| { |
| "ce_loss_10": 3.5825438022613527, |
| "ce_loss_13": 3.4635141372680662, |
| "ce_loss_2": 4.774414443969727, |
| "ce_loss_3": 4.498082184791565, |
| "ce_loss_7": 3.8522005438804627, |
| "epoch": 0.144, |
| "grad_norm": 656.0, |
| "kl_loss_10": 263.3311599731445, |
| "kl_loss_2": 2662.4484375, |
| "kl_loss_3": 2176.186492919922, |
| "kl_loss_7": 865.9247039794922, |
| "learning_rate": 0.0009554728301876525, |
| "loss": 1454.278, |
| "step": 1440 |
| }, |
| { |
| "ce_loss_10": 3.6376792669296263, |
| "ce_loss_13": 3.515091061592102, |
| "ce_loss_2": 4.810996460914612, |
| "ce_loss_3": 4.536413979530335, |
| "ce_loss_7": 3.9078781604766846, |
| "epoch": 0.145, |
| "grad_norm": 616.0, |
| "kl_loss_10": 259.68054962158203, |
| "kl_loss_2": 2600.8175415039063, |
| "kl_loss_3": 2120.5454040527343, |
| "kl_loss_7": 864.2900634765625, |
| "learning_rate": 0.0009548159976772592, |
| "loss": 1508.1567, |
| "step": 1450 |
| }, |
| { |
| "ce_loss_10": 3.5796504259109496, |
| "ce_loss_13": 3.456580376625061, |
| "ce_loss_2": 4.787333536148071, |
| "ce_loss_3": 4.520044946670533, |
| "ce_loss_7": 3.8587978959083555, |
| "epoch": 0.146, |
| "grad_norm": 624.0, |
| "kl_loss_10": 265.1648490905762, |
| "kl_loss_2": 2666.8885864257813, |
| "kl_loss_3": 2195.818231201172, |
| "kl_loss_7": 871.2362884521484, |
| "learning_rate": 0.0009541545851748186, |
| "loss": 1477.8201, |
| "step": 1460 |
| }, |
| { |
| "ce_loss_10": 3.4508144855499268, |
| "ce_loss_13": 3.3300524830818174, |
| "ce_loss_2": 4.699088287353516, |
| "ce_loss_3": 4.421405148506165, |
| "ce_loss_7": 3.735712671279907, |
| "epoch": 0.147, |
| "grad_norm": 844.0, |
| "kl_loss_10": 262.5924041748047, |
| "kl_loss_2": 2730.21630859375, |
| "kl_loss_3": 2243.504345703125, |
| "kl_loss_7": 878.0860382080078, |
| "learning_rate": 0.0009534885993407473, |
| "loss": 1496.8188, |
| "step": 1470 |
| }, |
| { |
| "ce_loss_10": 3.611809027194977, |
| "ce_loss_13": 3.4930022716522218, |
| "ce_loss_2": 4.806360912322998, |
| "ce_loss_3": 4.5402860879898075, |
| "ce_loss_7": 3.8858142852783204, |
| "epoch": 0.148, |
| "grad_norm": 740.0, |
| "kl_loss_10": 256.4318244934082, |
| "kl_loss_2": 2655.588269042969, |
| "kl_loss_3": 2175.6559936523436, |
| "kl_loss_7": 861.8565673828125, |
| "learning_rate": 0.0009528180468815154, |
| "loss": 1488.9336, |
| "step": 1480 |
| }, |
| { |
| "ce_loss_10": 3.6558565139770507, |
| "ce_loss_13": 3.538043713569641, |
| "ce_loss_2": 4.811466526985169, |
| "ce_loss_3": 4.544855618476868, |
| "ce_loss_7": 3.9390755891799927, |
| "epoch": 0.149, |
| "grad_norm": 844.0, |
| "kl_loss_10": 264.13821868896486, |
| "kl_loss_2": 2565.1232788085936, |
| "kl_loss_3": 2095.556463623047, |
| "kl_loss_7": 897.2646911621093, |
| "learning_rate": 0.0009521429345495787, |
| "loss": 1465.2869, |
| "step": 1490 |
| }, |
| { |
| "ce_loss_10": 3.646085023880005, |
| "ce_loss_13": 3.5196659207344054, |
| "ce_loss_2": 4.780038499832154, |
| "ce_loss_3": 4.50464768409729, |
| "ce_loss_7": 3.927055561542511, |
| "epoch": 0.15, |
| "grad_norm": 980.0, |
| "kl_loss_10": 266.5307144165039, |
| "kl_loss_2": 2540.4637084960937, |
| "kl_loss_3": 2068.8232849121096, |
| "kl_loss_7": 888.35068359375, |
| "learning_rate": 0.0009514632691433108, |
| "loss": 1455.9041, |
| "step": 1500 |
| }, |
| { |
| "ce_loss_10": 3.5988011956214905, |
| "ce_loss_13": 3.482589673995972, |
| "ce_loss_2": 4.76681923866272, |
| "ce_loss_3": 4.485762524604797, |
| "ce_loss_7": 3.8728180885314942, |
| "epoch": 0.151, |
| "grad_norm": 600.0, |
| "kl_loss_10": 260.8206298828125, |
| "kl_loss_2": 2587.971142578125, |
| "kl_loss_3": 2094.4052795410157, |
| "kl_loss_7": 863.3080963134765, |
| "learning_rate": 0.0009507790575069346, |
| "loss": 1457.9502, |
| "step": 1510 |
| }, |
| { |
| "ce_loss_10": 3.5764056205749513, |
| "ce_loss_13": 3.453061044216156, |
| "ce_loss_2": 4.775901889801025, |
| "ce_loss_3": 4.500339031219482, |
| "ce_loss_7": 3.849775242805481, |
| "epoch": 0.152, |
| "grad_norm": 672.0, |
| "kl_loss_10": 258.1785354614258, |
| "kl_loss_2": 2655.3977172851564, |
| "kl_loss_3": 2164.0363708496093, |
| "kl_loss_7": 857.1902496337891, |
| "learning_rate": 0.0009500903065304539, |
| "loss": 1495.6711, |
| "step": 1520 |
| }, |
| { |
| "ce_loss_10": 3.608113396167755, |
| "ce_loss_13": 3.498811888694763, |
| "ce_loss_2": 4.760950970649719, |
| "ce_loss_3": 4.486514663696289, |
| "ce_loss_7": 3.8602269887924194, |
| "epoch": 0.153, |
| "grad_norm": 664.0, |
| "kl_loss_10": 245.0189353942871, |
| "kl_loss_2": 2552.576379394531, |
| "kl_loss_3": 2060.3522766113283, |
| "kl_loss_7": 806.0807342529297, |
| "learning_rate": 0.0009493970231495835, |
| "loss": 1444.8406, |
| "step": 1530 |
| }, |
| { |
| "ce_loss_10": 3.547162938117981, |
| "ce_loss_13": 3.44165985584259, |
| "ce_loss_2": 4.701804065704346, |
| "ce_loss_3": 4.424288666248321, |
| "ce_loss_7": 3.8007919073104857, |
| "epoch": 0.154, |
| "grad_norm": 648.0, |
| "kl_loss_10": 241.08162002563478, |
| "kl_loss_2": 2573.9149780273438, |
| "kl_loss_3": 2088.7893615722655, |
| "kl_loss_7": 812.2064361572266, |
| "learning_rate": 0.0009486992143456792, |
| "loss": 1427.6314, |
| "step": 1540 |
| }, |
| { |
| "ce_loss_10": 3.5828447937965393, |
| "ce_loss_13": 3.4581031084060667, |
| "ce_loss_2": 4.834084248542785, |
| "ce_loss_3": 4.553447818756103, |
| "ce_loss_7": 3.8656217455863953, |
| "epoch": 0.155, |
| "grad_norm": 660.0, |
| "kl_loss_10": 263.4285690307617, |
| "kl_loss_2": 2765.8754150390623, |
| "kl_loss_3": 2266.0636291503906, |
| "kl_loss_7": 882.9553771972656, |
| "learning_rate": 0.0009479968871456679, |
| "loss": 1498.7352, |
| "step": 1550 |
| }, |
| { |
| "ce_loss_10": 3.547222447395325, |
| "ce_loss_13": 3.4320276618003844, |
| "ce_loss_2": 4.768963408470154, |
| "ce_loss_3": 4.480878567695617, |
| "ce_loss_7": 3.828988194465637, |
| "epoch": 0.156, |
| "grad_norm": 760.0, |
| "kl_loss_10": 259.2473831176758, |
| "kl_loss_2": 2697.095703125, |
| "kl_loss_3": 2199.6418579101564, |
| "kl_loss_7": 874.4399932861328, |
| "learning_rate": 0.0009472900486219768, |
| "loss": 1467.8941, |
| "step": 1560 |
| }, |
| { |
| "ce_loss_10": 3.54234699010849, |
| "ce_loss_13": 3.416030561923981, |
| "ce_loss_2": 4.702804708480835, |
| "ce_loss_3": 4.435993790626526, |
| "ce_loss_7": 3.811834490299225, |
| "epoch": 0.157, |
| "grad_norm": 996.0, |
| "kl_loss_10": 266.18832244873045, |
| "kl_loss_2": 2600.0114868164064, |
| "kl_loss_3": 2127.867303466797, |
| "kl_loss_7": 872.285043334961, |
| "learning_rate": 0.000946578705892462, |
| "loss": 1470.9803, |
| "step": 1570 |
| }, |
| { |
| "ce_loss_10": 3.5780028820037844, |
| "ce_loss_13": 3.457060468196869, |
| "ce_loss_2": 4.7225889444351195, |
| "ce_loss_3": 4.4475972890853885, |
| "ce_loss_7": 3.8437977194786073, |
| "epoch": 0.158, |
| "grad_norm": 804.0, |
| "kl_loss_10": 277.74141387939454, |
| "kl_loss_2": 2544.743029785156, |
| "kl_loss_3": 2066.6833618164064, |
| "kl_loss_7": 835.531689453125, |
| "learning_rate": 0.0009458628661203367, |
| "loss": 1460.073, |
| "step": 1580 |
| }, |
| { |
| "ce_loss_10": 3.5895689606666563, |
| "ce_loss_13": 3.4543488025665283, |
| "ce_loss_2": 4.777545094490051, |
| "ce_loss_3": 4.501162362098694, |
| "ce_loss_7": 3.846569240093231, |
| "epoch": 0.159, |
| "grad_norm": 748.0, |
| "kl_loss_10": 280.14837341308595, |
| "kl_loss_2": 2651.9770629882814, |
| "kl_loss_3": 2168.784558105469, |
| "kl_loss_7": 873.0370697021484, |
| "learning_rate": 0.0009451425365140996, |
| "loss": 1445.3969, |
| "step": 1590 |
| }, |
| { |
| "ce_loss_10": 3.6579004883766175, |
| "ce_loss_13": 3.5379099249839783, |
| "ce_loss_2": 4.773951435089112, |
| "ce_loss_3": 4.50466411113739, |
| "ce_loss_7": 3.922029638290405, |
| "epoch": 0.16, |
| "grad_norm": 728.0, |
| "kl_loss_10": 273.34312896728517, |
| "kl_loss_2": 2508.3281860351562, |
| "kl_loss_3": 2021.1453735351563, |
| "kl_loss_7": 841.1831604003906, |
| "learning_rate": 0.0009444177243274617, |
| "loss": 1408.8492, |
| "step": 1600 |
| }, |
| { |
| "ce_loss_10": 3.514503800868988, |
| "ce_loss_13": 3.388037991523743, |
| "ce_loss_2": 4.701039886474609, |
| "ce_loss_3": 4.418904185295105, |
| "ce_loss_7": 3.7763099312782287, |
| "epoch": 0.161, |
| "grad_norm": 704.0, |
| "kl_loss_10": 268.4227348327637, |
| "kl_loss_2": 2642.4529418945312, |
| "kl_loss_3": 2156.727893066406, |
| "kl_loss_7": 856.544287109375, |
| "learning_rate": 0.0009436884368592739, |
| "loss": 1462.7545, |
| "step": 1610 |
| }, |
| { |
| "ce_loss_10": 3.55902304649353, |
| "ce_loss_13": 3.441978645324707, |
| "ce_loss_2": 4.705282998085022, |
| "ce_loss_3": 4.427343964576721, |
| "ce_loss_7": 3.810055124759674, |
| "epoch": 0.162, |
| "grad_norm": 692.0, |
| "kl_loss_10": 253.71325302124023, |
| "kl_loss_2": 2545.9316528320314, |
| "kl_loss_3": 2055.34326171875, |
| "kl_loss_7": 814.7054443359375, |
| "learning_rate": 0.0009429546814534529, |
| "loss": 1452.6556, |
| "step": 1620 |
| }, |
| { |
| "ce_loss_10": 3.567894661426544, |
| "ce_loss_13": 3.4576117157936097, |
| "ce_loss_2": 4.725762176513672, |
| "ce_loss_3": 4.453288149833679, |
| "ce_loss_7": 3.8241241455078123, |
| "epoch": 0.163, |
| "grad_norm": 600.0, |
| "kl_loss_10": 249.5528419494629, |
| "kl_loss_2": 2561.039794921875, |
| "kl_loss_3": 2084.6285034179687, |
| "kl_loss_7": 811.9796569824218, |
| "learning_rate": 0.0009422164654989072, |
| "loss": 1405.3155, |
| "step": 1630 |
| }, |
| { |
| "ce_loss_10": 3.682257628440857, |
| "ce_loss_13": 3.57005797624588, |
| "ce_loss_2": 4.807721471786499, |
| "ce_loss_3": 4.5406172513961796, |
| "ce_loss_7": 3.9288353323936462, |
| "epoch": 0.164, |
| "grad_norm": 632.0, |
| "kl_loss_10": 249.95079803466797, |
| "kl_loss_2": 2525.5760131835937, |
| "kl_loss_3": 2063.9668884277344, |
| "kl_loss_7": 811.2918426513672, |
| "learning_rate": 0.0009414737964294635, |
| "loss": 1427.1312, |
| "step": 1640 |
| }, |
| { |
| "ce_loss_10": 3.6101224184036256, |
| "ce_loss_13": 3.5010381817817686, |
| "ce_loss_2": 4.720621514320373, |
| "ce_loss_3": 4.4590880393981935, |
| "ce_loss_7": 3.8465168356895445, |
| "epoch": 0.165, |
| "grad_norm": 592.0, |
| "kl_loss_10": 244.4941291809082, |
| "kl_loss_2": 2467.5499755859373, |
| "kl_loss_3": 2009.9993835449218, |
| "kl_loss_7": 785.2798095703125, |
| "learning_rate": 0.000940726681723791, |
| "loss": 1420.5047, |
| "step": 1650 |
| }, |
| { |
| "ce_loss_10": 3.4529421091079713, |
| "ce_loss_13": 3.3362591743469237, |
| "ce_loss_2": 4.67095410823822, |
| "ce_loss_3": 4.395775043964386, |
| "ce_loss_7": 3.7144131302833556, |
| "epoch": 0.166, |
| "grad_norm": 760.0, |
| "kl_loss_10": 256.26583633422854, |
| "kl_loss_2": 2688.0345825195313, |
| "kl_loss_3": 2212.7634155273436, |
| "kl_loss_7": 836.4253662109375, |
| "learning_rate": 0.0009399751289053266, |
| "loss": 1423.8466, |
| "step": 1660 |
| }, |
| { |
| "ce_loss_10": 3.667633831501007, |
| "ce_loss_13": 3.557128643989563, |
| "ce_loss_2": 4.805055928230286, |
| "ce_loss_3": 4.532746481895447, |
| "ce_loss_7": 3.911760663986206, |
| "epoch": 0.167, |
| "grad_norm": 700.0, |
| "kl_loss_10": 250.78092575073242, |
| "kl_loss_2": 2539.268176269531, |
| "kl_loss_3": 2059.193713378906, |
| "kl_loss_7": 797.4180786132813, |
| "learning_rate": 0.0009392191455421988, |
| "loss": 1439.8459, |
| "step": 1670 |
| }, |
| { |
| "ce_loss_10": 3.6344913125038145, |
| "ce_loss_13": 3.5230419993400575, |
| "ce_loss_2": 4.79052848815918, |
| "ce_loss_3": 4.512491989135742, |
| "ce_loss_7": 3.8782394886016847, |
| "epoch": 0.168, |
| "grad_norm": 672.0, |
| "kl_loss_10": 262.5000991821289, |
| "kl_loss_2": 2580.610949707031, |
| "kl_loss_3": 2102.571502685547, |
| "kl_loss_7": 817.8095031738281, |
| "learning_rate": 0.0009384587392471515, |
| "loss": 1409.023, |
| "step": 1680 |
| }, |
| { |
| "ce_loss_10": 3.6255006551742555, |
| "ce_loss_13": 3.514340567588806, |
| "ce_loss_2": 4.734428143501281, |
| "ce_loss_3": 4.468456673622131, |
| "ce_loss_7": 3.8644169330596925, |
| "epoch": 0.169, |
| "grad_norm": 628.0, |
| "kl_loss_10": 242.1350540161133, |
| "kl_loss_2": 2468.8160400390625, |
| "kl_loss_3": 2004.5263061523438, |
| "kl_loss_7": 785.5388061523438, |
| "learning_rate": 0.0009376939176774678, |
| "loss": 1384.7148, |
| "step": 1690 |
| }, |
| { |
| "ce_loss_10": 3.601682686805725, |
| "ce_loss_13": 3.4858548164367678, |
| "ce_loss_2": 4.752888894081115, |
| "ce_loss_3": 4.474552822113037, |
| "ce_loss_7": 3.842711091041565, |
| "epoch": 0.17, |
| "grad_norm": 636.0, |
| "kl_loss_10": 245.69830017089845, |
| "kl_loss_2": 2544.683557128906, |
| "kl_loss_3": 2064.160784912109, |
| "kl_loss_7": 792.8873626708985, |
| "learning_rate": 0.0009369246885348925, |
| "loss": 1434.5433, |
| "step": 1700 |
| }, |
| { |
| "ce_loss_10": 3.5952138662338258, |
| "ce_loss_13": 3.4776424884796144, |
| "ce_loss_2": 4.792232918739319, |
| "ce_loss_3": 4.513515877723694, |
| "ce_loss_7": 3.8616483092308043, |
| "epoch": 0.171, |
| "grad_norm": 644.0, |
| "kl_loss_10": 250.0074020385742, |
| "kl_loss_2": 2643.389611816406, |
| "kl_loss_3": 2155.037109375, |
| "kl_loss_7": 835.121694946289, |
| "learning_rate": 0.0009361510595655545, |
| "loss": 1446.8347, |
| "step": 1710 |
| }, |
| { |
| "ce_loss_10": 3.558023285865784, |
| "ce_loss_13": 3.438031017780304, |
| "ce_loss_2": 4.730398392677307, |
| "ce_loss_3": 4.452085471153259, |
| "ce_loss_7": 3.815502095222473, |
| "epoch": 0.172, |
| "grad_norm": 672.0, |
| "kl_loss_10": 260.42660064697264, |
| "kl_loss_2": 2622.6736572265627, |
| "kl_loss_3": 2126.2764099121096, |
| "kl_loss_7": 833.0841033935546, |
| "learning_rate": 0.0009353730385598887, |
| "loss": 1443.5211, |
| "step": 1720 |
| }, |
| { |
| "ce_loss_10": 3.4771748185157776, |
| "ce_loss_13": 3.364219045639038, |
| "ce_loss_2": 4.693475008010864, |
| "ce_loss_3": 4.410137629508972, |
| "ce_loss_7": 3.7466461181640627, |
| "epoch": 0.173, |
| "grad_norm": 576.0, |
| "kl_loss_10": 244.67605361938476, |
| "kl_loss_2": 2652.6466064453125, |
| "kl_loss_3": 2161.871044921875, |
| "kl_loss_7": 827.6846221923828, |
| "learning_rate": 0.0009345906333525581, |
| "loss": 1466.803, |
| "step": 1730 |
| }, |
| { |
| "ce_loss_10": 3.515894877910614, |
| "ce_loss_13": 3.403614568710327, |
| "ce_loss_2": 4.707322573661804, |
| "ce_loss_3": 4.422236812114716, |
| "ce_loss_7": 3.7741501927375793, |
| "epoch": 0.174, |
| "grad_norm": 608.0, |
| "kl_loss_10": 250.64810333251953, |
| "kl_loss_2": 2637.284143066406, |
| "kl_loss_3": 2135.0481140136717, |
| "kl_loss_7": 835.5378570556641, |
| "learning_rate": 0.0009338038518223745, |
| "loss": 1437.4744, |
| "step": 1740 |
| }, |
| { |
| "ce_loss_10": 3.5834938049316407, |
| "ce_loss_13": 3.468910980224609, |
| "ce_loss_2": 4.762004089355469, |
| "ce_loss_3": 4.486204957962036, |
| "ce_loss_7": 3.8505713820457457, |
| "epoch": 0.175, |
| "grad_norm": 652.0, |
| "kl_loss_10": 254.51539306640626, |
| "kl_loss_2": 2618.7681884765625, |
| "kl_loss_3": 2135.773858642578, |
| "kl_loss_7": 849.8012634277344, |
| "learning_rate": 0.0009330127018922195, |
| "loss": 1479.132, |
| "step": 1750 |
| }, |
| { |
| "ce_loss_10": 3.538338470458984, |
| "ce_loss_13": 3.4237607955932616, |
| "ce_loss_2": 4.7127416133880615, |
| "ce_loss_3": 4.443085932731629, |
| "ce_loss_7": 3.794516444206238, |
| "epoch": 0.176, |
| "grad_norm": 628.0, |
| "kl_loss_10": 245.44887084960936, |
| "kl_loss_2": 2605.848291015625, |
| "kl_loss_3": 2119.558026123047, |
| "kl_loss_7": 818.0517547607421, |
| "learning_rate": 0.0009322171915289634, |
| "loss": 1443.3754, |
| "step": 1760 |
| }, |
| { |
| "ce_loss_10": 3.5648101806640624, |
| "ce_loss_13": 3.459370458126068, |
| "ce_loss_2": 4.707282447814942, |
| "ce_loss_3": 4.433714365959167, |
| "ce_loss_7": 3.812247085571289, |
| "epoch": 0.177, |
| "grad_norm": 576.0, |
| "kl_loss_10": 245.77867431640624, |
| "kl_loss_2": 2558.2099365234376, |
| "kl_loss_3": 2069.932727050781, |
| "kl_loss_7": 809.8514526367187, |
| "learning_rate": 0.0009314173287433873, |
| "loss": 1402.6621, |
| "step": 1770 |
| }, |
| { |
| "ce_loss_10": 3.5681435227394105, |
| "ce_loss_13": 3.4554543256759644, |
| "ce_loss_2": 4.716624093055725, |
| "ce_loss_3": 4.441683101654053, |
| "ce_loss_7": 3.8178189396858215, |
| "epoch": 0.178, |
| "grad_norm": 704.0, |
| "kl_loss_10": 250.21724319458008, |
| "kl_loss_2": 2566.923547363281, |
| "kl_loss_3": 2076.703576660156, |
| "kl_loss_7": 808.6476196289062, |
| "learning_rate": 0.0009306131215901003, |
| "loss": 1403.6738, |
| "step": 1780 |
| }, |
| { |
| "ce_loss_10": 3.60051885843277, |
| "ce_loss_13": 3.4851089835166933, |
| "ce_loss_2": 4.74112594127655, |
| "ce_loss_3": 4.468067002296448, |
| "ce_loss_7": 3.8350677728652953, |
| "epoch": 0.179, |
| "grad_norm": 656.0, |
| "kl_loss_10": 247.00397262573242, |
| "kl_loss_2": 2550.1098876953124, |
| "kl_loss_3": 2071.478955078125, |
| "kl_loss_7": 797.3671112060547, |
| "learning_rate": 0.0009298045781674596, |
| "loss": 1386.7528, |
| "step": 1790 |
| }, |
| { |
| "ce_loss_10": 3.576521575450897, |
| "ce_loss_13": 3.465667748451233, |
| "ce_loss_2": 4.70545973777771, |
| "ce_loss_3": 4.437681531906128, |
| "ce_loss_7": 3.823224997520447, |
| "epoch": 0.18, |
| "grad_norm": 640.0, |
| "kl_loss_10": 245.9371192932129, |
| "kl_loss_2": 2516.936376953125, |
| "kl_loss_3": 2031.5426147460937, |
| "kl_loss_7": 793.7673767089843, |
| "learning_rate": 0.0009289917066174886, |
| "loss": 1415.4195, |
| "step": 1800 |
| }, |
| { |
| "ce_loss_10": 3.568215787410736, |
| "ce_loss_13": 3.465099549293518, |
| "ce_loss_2": 4.663200092315674, |
| "ce_loss_3": 4.39816825389862, |
| "ce_loss_7": 3.797432005405426, |
| "epoch": 0.181, |
| "grad_norm": 596.0, |
| "kl_loss_10": 236.99261932373048, |
| "kl_loss_2": 2444.4304931640627, |
| "kl_loss_3": 1977.5733642578125, |
| "kl_loss_7": 762.0940002441406, |
| "learning_rate": 0.0009281745151257945, |
| "loss": 1372.7959, |
| "step": 1810 |
| }, |
| { |
| "ce_loss_10": 3.589988100528717, |
| "ce_loss_13": 3.4779568314552307, |
| "ce_loss_2": 4.741122603416443, |
| "ce_loss_3": 4.463086032867432, |
| "ce_loss_7": 3.8317890048027037, |
| "epoch": 0.182, |
| "grad_norm": 576.0, |
| "kl_loss_10": 245.52628021240236, |
| "kl_loss_2": 2546.031115722656, |
| "kl_loss_3": 2057.9885314941407, |
| "kl_loss_7": 789.636849975586, |
| "learning_rate": 0.0009273530119214868, |
| "loss": 1414.9602, |
| "step": 1820 |
| }, |
| { |
| "ce_loss_10": 3.6874829173088073, |
| "ce_loss_13": 3.5830198526382446, |
| "ce_loss_2": 4.805440378189087, |
| "ce_loss_3": 4.542007470130921, |
| "ce_loss_7": 3.922217535972595, |
| "epoch": 0.183, |
| "grad_norm": 668.0, |
| "kl_loss_10": 240.62074966430663, |
| "kl_loss_2": 2477.285852050781, |
| "kl_loss_3": 2025.298876953125, |
| "kl_loss_7": 778.8258850097657, |
| "learning_rate": 0.0009265272052770935, |
| "loss": 1365.1876, |
| "step": 1830 |
| }, |
| { |
| "ce_loss_10": 3.5063879013061525, |
| "ce_loss_13": 3.3919414281845093, |
| "ce_loss_2": 4.691436982154846, |
| "ce_loss_3": 4.40977828502655, |
| "ce_loss_7": 3.7569626212120055, |
| "epoch": 0.184, |
| "grad_norm": 672.0, |
| "kl_loss_10": 245.37701873779298, |
| "kl_loss_2": 2600.8256103515623, |
| "kl_loss_3": 2103.731726074219, |
| "kl_loss_7": 796.2739471435547, |
| "learning_rate": 0.0009256971035084784, |
| "loss": 1423.7646, |
| "step": 1840 |
| }, |
| { |
| "ce_loss_10": 3.4534160137176513, |
| "ce_loss_13": 3.337074172496796, |
| "ce_loss_2": 4.650833582878112, |
| "ce_loss_3": 4.375414204597473, |
| "ce_loss_7": 3.7153374671936037, |
| "epoch": 0.185, |
| "grad_norm": 872.0, |
| "kl_loss_10": 253.35809020996095, |
| "kl_loss_2": 2636.6057983398437, |
| "kl_loss_3": 2149.6516052246093, |
| "kl_loss_7": 833.7322570800782, |
| "learning_rate": 0.0009248627149747573, |
| "loss": 1433.1182, |
| "step": 1850 |
| }, |
| { |
| "ce_loss_10": 3.6552318572998046, |
| "ce_loss_13": 3.5436174392700197, |
| "ce_loss_2": 4.771462321281433, |
| "ce_loss_3": 4.505353546142578, |
| "ce_loss_7": 3.8980504512786864, |
| "epoch": 0.186, |
| "grad_norm": 628.0, |
| "kl_loss_10": 244.59689865112304, |
| "kl_loss_2": 2502.3949462890623, |
| "kl_loss_3": 2027.7750183105468, |
| "kl_loss_7": 792.985708618164, |
| "learning_rate": 0.0009240240480782129, |
| "loss": 1402.8563, |
| "step": 1860 |
| }, |
| { |
| "ce_loss_10": 3.559772253036499, |
| "ce_loss_13": 3.444066059589386, |
| "ce_loss_2": 4.714746379852295, |
| "ce_loss_3": 4.439750409126281, |
| "ce_loss_7": 3.8083682656288147, |
| "epoch": 0.187, |
| "grad_norm": 696.0, |
| "kl_loss_10": 248.7159553527832, |
| "kl_loss_2": 2569.0499755859373, |
| "kl_loss_3": 2081.2441528320314, |
| "kl_loss_7": 799.65380859375, |
| "learning_rate": 0.0009231811112642122, |
| "loss": 1391.885, |
| "step": 1870 |
| }, |
| { |
| "ce_loss_10": 3.603023958206177, |
| "ce_loss_13": 3.4911489486694336, |
| "ce_loss_2": 4.7107093334198, |
| "ce_loss_3": 4.44477071762085, |
| "ce_loss_7": 3.8424261450767516, |
| "epoch": 0.188, |
| "grad_norm": 756.0, |
| "kl_loss_10": 245.3149284362793, |
| "kl_loss_2": 2484.4417358398437, |
| "kl_loss_3": 2013.6257080078126, |
| "kl_loss_7": 788.6129425048828, |
| "learning_rate": 0.0009223339130211192, |
| "loss": 1382.5715, |
| "step": 1880 |
| }, |
| { |
| "ce_loss_10": 3.451503300666809, |
| "ce_loss_13": 3.3456833481788637, |
| "ce_loss_2": 4.6354892492294315, |
| "ce_loss_3": 4.368392133712769, |
| "ce_loss_7": 3.7025105237960814, |
| "epoch": 0.189, |
| "grad_norm": 796.0, |
| "kl_loss_10": 235.8703857421875, |
| "kl_loss_2": 2606.5722534179686, |
| "kl_loss_3": 2137.981573486328, |
| "kl_loss_7": 795.582894897461, |
| "learning_rate": 0.0009214824618802108, |
| "loss": 1426.9203, |
| "step": 1890 |
| }, |
| { |
| "ce_loss_10": 3.633524978160858, |
| "ce_loss_13": 3.5242482304573057, |
| "ce_loss_2": 4.770471715927124, |
| "ce_loss_3": 4.501252269744873, |
| "ce_loss_7": 3.883507859706879, |
| "epoch": 0.19, |
| "grad_norm": 652.0, |
| "kl_loss_10": 237.73654251098634, |
| "kl_loss_2": 2486.365759277344, |
| "kl_loss_3": 2019.5263671875, |
| "kl_loss_7": 793.6973388671875, |
| "learning_rate": 0.0009206267664155906, |
| "loss": 1428.9256, |
| "step": 1900 |
| }, |
| { |
| "ce_loss_10": 3.5532122611999513, |
| "ce_loss_13": 3.443064200878143, |
| "ce_loss_2": 4.697825288772583, |
| "ce_loss_3": 4.427114844322205, |
| "ce_loss_7": 3.799003005027771, |
| "epoch": 0.191, |
| "grad_norm": 636.0, |
| "kl_loss_10": 241.10890350341796, |
| "kl_loss_2": 2548.555554199219, |
| "kl_loss_3": 2061.9953002929688, |
| "kl_loss_7": 794.9706726074219, |
| "learning_rate": 0.0009197668352441024, |
| "loss": 1417.5695, |
| "step": 1910 |
| }, |
| { |
| "ce_loss_10": 3.608381187915802, |
| "ce_loss_13": 3.4997890830039977, |
| "ce_loss_2": 4.748308372497559, |
| "ce_loss_3": 4.471417784690857, |
| "ce_loss_7": 3.851922130584717, |
| "epoch": 0.192, |
| "grad_norm": 636.0, |
| "kl_loss_10": 242.21438293457032, |
| "kl_loss_2": 2509.6267700195312, |
| "kl_loss_3": 2027.6890441894532, |
| "kl_loss_7": 779.7179229736328, |
| "learning_rate": 0.0009189026770252437, |
| "loss": 1396.1437, |
| "step": 1920 |
| }, |
| { |
| "ce_loss_10": 3.6384175658226012, |
| "ce_loss_13": 3.5275412440299987, |
| "ce_loss_2": 4.762041211128235, |
| "ce_loss_3": 4.48351948261261, |
| "ce_loss_7": 3.8741258502006533, |
| "epoch": 0.193, |
| "grad_norm": 688.0, |
| "kl_loss_10": 250.4880401611328, |
| "kl_loss_2": 2491.730749511719, |
| "kl_loss_3": 2004.1307067871094, |
| "kl_loss_7": 785.3524200439454, |
| "learning_rate": 0.000918034300461078, |
| "loss": 1438.3092, |
| "step": 1930 |
| }, |
| { |
| "ce_loss_10": 3.675648069381714, |
| "ce_loss_13": 3.555274224281311, |
| "ce_loss_2": 4.77588381767273, |
| "ce_loss_3": 4.506980061531067, |
| "ce_loss_7": 3.9165189027786256, |
| "epoch": 0.194, |
| "grad_norm": 1048.0, |
| "kl_loss_10": 251.8736488342285, |
| "kl_loss_2": 2458.3200805664064, |
| "kl_loss_3": 1995.1934143066405, |
| "kl_loss_7": 806.3202392578125, |
| "learning_rate": 0.0009171617142961477, |
| "loss": 1389.0176, |
| "step": 1940 |
| }, |
| { |
| "ce_loss_10": 3.623457467556, |
| "ce_loss_13": 3.512966477870941, |
| "ce_loss_2": 4.729074192047119, |
| "ce_loss_3": 4.464083290100097, |
| "ce_loss_7": 3.8867802739143373, |
| "epoch": 0.195, |
| "grad_norm": 688.0, |
| "kl_loss_10": 255.58710021972655, |
| "kl_loss_2": 2479.066796875, |
| "kl_loss_3": 2001.9678588867187, |
| "kl_loss_7": 833.6603210449218, |
| "learning_rate": 0.0009162849273173857, |
| "loss": 1403.0846, |
| "step": 1950 |
| }, |
| { |
| "ce_loss_10": 3.5657452821731566, |
| "ce_loss_13": 3.457024359703064, |
| "ce_loss_2": 4.679703283309936, |
| "ce_loss_3": 4.409625816345215, |
| "ce_loss_7": 3.8033991694450378, |
| "epoch": 0.196, |
| "grad_norm": 656.0, |
| "kl_loss_10": 242.9659797668457, |
| "kl_loss_2": 2473.7406372070313, |
| "kl_loss_3": 2000.534735107422, |
| "kl_loss_7": 783.4248046875, |
| "learning_rate": 0.0009154039483540273, |
| "loss": 1391.609, |
| "step": 1960 |
| }, |
| { |
| "ce_loss_10": 3.5444339156150817, |
| "ce_loss_13": 3.433286416530609, |
| "ce_loss_2": 4.677814674377442, |
| "ce_loss_3": 4.395683622360229, |
| "ce_loss_7": 3.784545695781708, |
| "epoch": 0.197, |
| "grad_norm": 608.0, |
| "kl_loss_10": 239.23334732055665, |
| "kl_loss_2": 2520.074182128906, |
| "kl_loss_3": 2031.6637634277345, |
| "kl_loss_7": 792.3442230224609, |
| "learning_rate": 0.0009145187862775209, |
| "loss": 1388.6972, |
| "step": 1970 |
| }, |
| { |
| "ce_loss_10": 3.572359085083008, |
| "ce_loss_13": 3.466273844242096, |
| "ce_loss_2": 4.692143273353577, |
| "ce_loss_3": 4.418303954601288, |
| "ce_loss_7": 3.8197664856910705, |
| "epoch": 0.198, |
| "grad_norm": 660.0, |
| "kl_loss_10": 241.7268035888672, |
| "kl_loss_2": 2492.987420654297, |
| "kl_loss_3": 2004.3476135253907, |
| "kl_loss_7": 794.6048614501954, |
| "learning_rate": 0.0009136294500014386, |
| "loss": 1377.9902, |
| "step": 1980 |
| }, |
| { |
| "ce_loss_10": 3.52831609249115, |
| "ce_loss_13": 3.4167757987976075, |
| "ce_loss_2": 4.705040359497071, |
| "ce_loss_3": 4.434882926940918, |
| "ce_loss_7": 3.7779016494750977, |
| "epoch": 0.199, |
| "grad_norm": 684.0, |
| "kl_loss_10": 242.86552047729492, |
| "kl_loss_2": 2578.6255493164062, |
| "kl_loss_3": 2108.4060180664064, |
| "kl_loss_7": 798.0517791748047, |
| "learning_rate": 0.000912735948481387, |
| "loss": 1426.8047, |
| "step": 1990 |
| }, |
| { |
| "ce_loss_10": 3.5601553082466126, |
| "ce_loss_13": 3.449883997440338, |
| "ce_loss_2": 4.691212105751037, |
| "ce_loss_3": 4.414692604541779, |
| "ce_loss_7": 3.8016102075576783, |
| "epoch": 0.2, |
| "grad_norm": 684.0, |
| "kl_loss_10": 242.28478622436523, |
| "kl_loss_2": 2530.514270019531, |
| "kl_loss_3": 2040.9486206054687, |
| "kl_loss_7": 800.2102844238282, |
| "learning_rate": 0.0009118382907149164, |
| "loss": 1370.7061, |
| "step": 2000 |
| }, |
| { |
| "ce_loss_10": 3.5833643674850464, |
| "ce_loss_13": 3.4740814447402952, |
| "ce_loss_2": 4.70447518825531, |
| "ce_loss_3": 4.429442811012268, |
| "ce_loss_7": 3.8237846970558165, |
| "epoch": 0.201, |
| "grad_norm": 612.0, |
| "kl_loss_10": 244.51040420532226, |
| "kl_loss_2": 2494.5580932617186, |
| "kl_loss_3": 2005.631494140625, |
| "kl_loss_7": 779.4999328613281, |
| "learning_rate": 0.0009109364857414306, |
| "loss": 1380.7336, |
| "step": 2010 |
| }, |
| { |
| "ce_loss_10": 3.5532099485397337, |
| "ce_loss_13": 3.4470490336418154, |
| "ce_loss_2": 4.681869411468506, |
| "ce_loss_3": 4.40200264453888, |
| "ce_loss_7": 3.790750026702881, |
| "epoch": 0.202, |
| "grad_norm": 608.0, |
| "kl_loss_10": 240.87973327636718, |
| "kl_loss_2": 2528.7482421875, |
| "kl_loss_3": 2036.1677551269531, |
| "kl_loss_7": 777.9466033935547, |
| "learning_rate": 0.0009100305426420956, |
| "loss": 1419.7547, |
| "step": 2020 |
| }, |
| { |
| "ce_loss_10": 3.5112710118293764, |
| "ce_loss_13": 3.404540646076202, |
| "ce_loss_2": 4.711292386054993, |
| "ce_loss_3": 4.432630777359009, |
| "ce_loss_7": 3.757065165042877, |
| "epoch": 0.203, |
| "grad_norm": 664.0, |
| "kl_loss_10": 238.4617919921875, |
| "kl_loss_2": 2652.4912963867187, |
| "kl_loss_3": 2152.2258422851564, |
| "kl_loss_7": 790.063916015625, |
| "learning_rate": 0.0009091204705397484, |
| "loss": 1413.6135, |
| "step": 2030 |
| }, |
| { |
| "ce_loss_10": 3.508105480670929, |
| "ce_loss_13": 3.399987006187439, |
| "ce_loss_2": 4.703747749328613, |
| "ce_loss_3": 4.428358674049377, |
| "ce_loss_7": 3.7540559649467466, |
| "epoch": 0.204, |
| "grad_norm": 700.0, |
| "kl_loss_10": 242.5270248413086, |
| "kl_loss_2": 2644.1144165039063, |
| "kl_loss_3": 2155.070489501953, |
| "kl_loss_7": 790.7262329101562, |
| "learning_rate": 0.0009082062785988049, |
| "loss": 1424.9719, |
| "step": 2040 |
| }, |
| { |
| "ce_loss_10": 3.638819897174835, |
| "ce_loss_13": 3.5337455749511717, |
| "ce_loss_2": 4.727799487113953, |
| "ce_loss_3": 4.457953143119812, |
| "ce_loss_7": 3.8601122856140138, |
| "epoch": 0.205, |
| "grad_norm": 668.0, |
| "kl_loss_10": 235.8659812927246, |
| "kl_loss_2": 2476.5026977539064, |
| "kl_loss_3": 1996.3927185058594, |
| "kl_loss_7": 769.8516876220704, |
| "learning_rate": 0.0009072879760251679, |
| "loss": 1387.9949, |
| "step": 2050 |
| }, |
| { |
| "ce_loss_10": 3.5858229279518126, |
| "ce_loss_13": 3.475198233127594, |
| "ce_loss_2": 4.739975643157959, |
| "ce_loss_3": 4.475312519073486, |
| "ce_loss_7": 3.834290158748627, |
| "epoch": 0.206, |
| "grad_norm": 700.0, |
| "kl_loss_10": 239.9431396484375, |
| "kl_loss_2": 2570.9485107421874, |
| "kl_loss_3": 2100.634240722656, |
| "kl_loss_7": 789.2198791503906, |
| "learning_rate": 0.0009063655720661341, |
| "loss": 1402.2605, |
| "step": 2060 |
| }, |
| { |
| "ce_loss_10": 3.6313581228256226, |
| "ce_loss_13": 3.5262081384658814, |
| "ce_loss_2": 4.7349327325820925, |
| "ce_loss_3": 4.470538520812989, |
| "ce_loss_7": 3.864632213115692, |
| "epoch": 0.207, |
| "grad_norm": 580.0, |
| "kl_loss_10": 238.97062911987305, |
| "kl_loss_2": 2454.8896240234376, |
| "kl_loss_3": 1987.1748107910157, |
| "kl_loss_7": 776.5097869873047, |
| "learning_rate": 0.000905439076010301, |
| "loss": 1376.7035, |
| "step": 2070 |
| }, |
| { |
| "ce_loss_10": 3.5894328594207763, |
| "ce_loss_13": 3.4751851201057433, |
| "ce_loss_2": 4.723314690589905, |
| "ce_loss_3": 4.451727390289307, |
| "ce_loss_7": 3.830363655090332, |
| "epoch": 0.208, |
| "grad_norm": 620.0, |
| "kl_loss_10": 243.43872604370117, |
| "kl_loss_2": 2525.0844848632814, |
| "kl_loss_3": 2046.1018615722655, |
| "kl_loss_7": 793.8133911132812, |
| "learning_rate": 0.0009045084971874737, |
| "loss": 1367.5893, |
| "step": 2080 |
| }, |
| { |
| "ce_loss_10": 3.5676583290100097, |
| "ce_loss_13": 3.452693998813629, |
| "ce_loss_2": 4.699956917762757, |
| "ce_loss_3": 4.424242115020752, |
| "ce_loss_7": 3.806007480621338, |
| "epoch": 0.209, |
| "grad_norm": 688.0, |
| "kl_loss_10": 249.41274871826172, |
| "kl_loss_2": 2529.7607299804686, |
| "kl_loss_3": 2042.927227783203, |
| "kl_loss_7": 789.6784515380859, |
| "learning_rate": 0.0009035738449685707, |
| "loss": 1418.6186, |
| "step": 2090 |
| }, |
| { |
| "ce_loss_10": 3.510753297805786, |
| "ce_loss_13": 3.3990254640579223, |
| "ce_loss_2": 4.691071200370788, |
| "ce_loss_3": 4.41790235042572, |
| "ce_loss_7": 3.7591400265693666, |
| "epoch": 0.21, |
| "grad_norm": 600.0, |
| "kl_loss_10": 248.95298919677734, |
| "kl_loss_2": 2601.993273925781, |
| "kl_loss_3": 2124.828485107422, |
| "kl_loss_7": 799.376498413086, |
| "learning_rate": 0.0009026351287655293, |
| "loss": 1399.0971, |
| "step": 2100 |
| }, |
| { |
| "ce_loss_10": 3.697406494617462, |
| "ce_loss_13": 3.5970078110694885, |
| "ce_loss_2": 4.7389151096344, |
| "ce_loss_3": 4.481091260910034, |
| "ce_loss_7": 3.9209010362625123, |
| "epoch": 0.211, |
| "grad_norm": 600.0, |
| "kl_loss_10": 229.3176498413086, |
| "kl_loss_2": 2353.4455688476564, |
| "kl_loss_3": 1885.3362854003906, |
| "kl_loss_7": 749.6781646728516, |
| "learning_rate": 0.0009016923580312113, |
| "loss": 1321.2097, |
| "step": 2110 |
| }, |
| { |
| "ce_loss_10": 3.565862810611725, |
| "ce_loss_13": 3.4591265320777893, |
| "ce_loss_2": 4.665999031066894, |
| "ce_loss_3": 4.391572332382202, |
| "ce_loss_7": 3.7967191696166993, |
| "epoch": 0.212, |
| "grad_norm": 732.0, |
| "kl_loss_10": 243.075350189209, |
| "kl_loss_2": 2458.2255859375, |
| "kl_loss_3": 1975.4440124511718, |
| "kl_loss_7": 771.7640777587891, |
| "learning_rate": 0.0009007455422593077, |
| "loss": 1392.0321, |
| "step": 2120 |
| }, |
| { |
| "ce_loss_10": 3.574350452423096, |
| "ce_loss_13": 3.4604103803634643, |
| "ce_loss_2": 4.7152410507202145, |
| "ce_loss_3": 4.439797115325928, |
| "ce_loss_7": 3.8057913303375246, |
| "epoch": 0.213, |
| "grad_norm": 652.0, |
| "kl_loss_10": 251.99988250732423, |
| "kl_loss_2": 2551.55615234375, |
| "kl_loss_3": 2068.113671875, |
| "kl_loss_7": 789.3677795410156, |
| "learning_rate": 0.0008997946909842425, |
| "loss": 1402.5921, |
| "step": 2130 |
| }, |
| { |
| "ce_loss_10": 3.592576038837433, |
| "ce_loss_13": 3.476356315612793, |
| "ce_loss_2": 4.7715356826782225, |
| "ce_loss_3": 4.504428267478943, |
| "ce_loss_7": 3.843649852275848, |
| "epoch": 0.214, |
| "grad_norm": 660.0, |
| "kl_loss_10": 255.3404312133789, |
| "kl_loss_2": 2625.214599609375, |
| "kl_loss_3": 2155.4658203125, |
| "kl_loss_7": 813.4428436279297, |
| "learning_rate": 0.0008988398137810777, |
| "loss": 1403.5207, |
| "step": 2140 |
| }, |
| { |
| "ce_loss_10": 3.620520067214966, |
| "ce_loss_13": 3.513581109046936, |
| "ce_loss_2": 4.717863583564759, |
| "ce_loss_3": 4.442376029491425, |
| "ce_loss_7": 3.8534181356430053, |
| "epoch": 0.215, |
| "grad_norm": 700.0, |
| "kl_loss_10": 239.26677551269532, |
| "kl_loss_2": 2448.3839477539063, |
| "kl_loss_3": 1962.8316284179687, |
| "kl_loss_7": 763.2356109619141, |
| "learning_rate": 0.0008978809202654162, |
| "loss": 1354.8944, |
| "step": 2150 |
| }, |
| { |
| "ce_loss_10": 3.593782067298889, |
| "ce_loss_13": 3.4892191767692564, |
| "ce_loss_2": 4.713660454750061, |
| "ce_loss_3": 4.43155483007431, |
| "ce_loss_7": 3.8341444969177245, |
| "epoch": 0.216, |
| "grad_norm": 640.0, |
| "kl_loss_10": 237.50842971801757, |
| "kl_loss_2": 2454.586071777344, |
| "kl_loss_3": 1970.583270263672, |
| "kl_loss_7": 773.5592163085937, |
| "learning_rate": 0.0008969180200933046, |
| "loss": 1383.4818, |
| "step": 2160 |
| }, |
| { |
| "ce_loss_10": 3.56014689207077, |
| "ce_loss_13": 3.4516719341278077, |
| "ce_loss_2": 4.715594840049744, |
| "ce_loss_3": 4.431590890884399, |
| "ce_loss_7": 3.8131863117218017, |
| "epoch": 0.217, |
| "grad_norm": 712.0, |
| "kl_loss_10": 241.1098258972168, |
| "kl_loss_2": 2533.49033203125, |
| "kl_loss_3": 2041.2841003417968, |
| "kl_loss_7": 799.241552734375, |
| "learning_rate": 0.0008959511229611376, |
| "loss": 1406.9449, |
| "step": 2170 |
| }, |
| { |
| "ce_loss_10": 3.634247362613678, |
| "ce_loss_13": 3.529753494262695, |
| "ce_loss_2": 4.747422552108764, |
| "ce_loss_3": 4.480298018455505, |
| "ce_loss_7": 3.8834722995758058, |
| "epoch": 0.218, |
| "grad_norm": 744.0, |
| "kl_loss_10": 231.06951522827148, |
| "kl_loss_2": 2480.240673828125, |
| "kl_loss_3": 2003.8335388183593, |
| "kl_loss_7": 794.5106719970703, |
| "learning_rate": 0.0008949802386055581, |
| "loss": 1379.2705, |
| "step": 2180 |
| }, |
| { |
| "ce_loss_10": 3.4931302070617676, |
| "ce_loss_13": 3.3903717041015624, |
| "ce_loss_2": 4.634695625305175, |
| "ce_loss_3": 4.343023872375488, |
| "ce_loss_7": 3.735668647289276, |
| "epoch": 0.219, |
| "grad_norm": 704.0, |
| "kl_loss_10": 229.31054229736327, |
| "kl_loss_2": 2487.9470336914064, |
| "kl_loss_3": 1978.2749877929687, |
| "kl_loss_7": 772.9935424804687, |
| "learning_rate": 0.0008940053768033609, |
| "loss": 1398.8061, |
| "step": 2190 |
| }, |
| { |
| "ce_loss_10": 3.579288733005524, |
| "ce_loss_13": 3.476969850063324, |
| "ce_loss_2": 4.679602265357971, |
| "ce_loss_3": 4.408792352676391, |
| "ce_loss_7": 3.818285346031189, |
| "epoch": 0.22, |
| "grad_norm": 648.0, |
| "kl_loss_10": 225.21361923217773, |
| "kl_loss_2": 2457.1766845703123, |
| "kl_loss_3": 1985.3013549804687, |
| "kl_loss_7": 762.693115234375, |
| "learning_rate": 0.0008930265473713938, |
| "loss": 1358.0689, |
| "step": 2200 |
| }, |
| { |
| "ce_loss_10": 3.5425936341285706, |
| "ce_loss_13": 3.437610614299774, |
| "ce_loss_2": 4.679268145561219, |
| "ce_loss_3": 4.395039463043213, |
| "ce_loss_7": 3.7786786198616027, |
| "epoch": 0.221, |
| "grad_norm": 624.0, |
| "kl_loss_10": 227.02418670654296, |
| "kl_loss_2": 2514.80498046875, |
| "kl_loss_3": 2012.999462890625, |
| "kl_loss_7": 766.7205108642578, |
| "learning_rate": 0.0008920437601664579, |
| "loss": 1344.9316, |
| "step": 2210 |
| }, |
| { |
| "ce_loss_10": 3.5330151677131654, |
| "ce_loss_13": 3.4283509850502014, |
| "ce_loss_2": 4.65971040725708, |
| "ce_loss_3": 4.389861440658569, |
| "ce_loss_7": 3.7775445342063905, |
| "epoch": 0.222, |
| "grad_norm": 728.0, |
| "kl_loss_10": 231.53972396850585, |
| "kl_loss_2": 2495.336804199219, |
| "kl_loss_3": 2020.2352600097656, |
| "kl_loss_7": 785.6470977783204, |
| "learning_rate": 0.0008910570250852097, |
| "loss": 1358.0102, |
| "step": 2220 |
| }, |
| { |
| "ce_loss_10": 3.6386430144309996, |
| "ce_loss_13": 3.5394553184509276, |
| "ce_loss_2": 4.721383547782898, |
| "ce_loss_3": 4.441709399223328, |
| "ce_loss_7": 3.8573225855827333, |
| "epoch": 0.223, |
| "grad_norm": 656.0, |
| "kl_loss_10": 222.80670547485352, |
| "kl_loss_2": 2415.298693847656, |
| "kl_loss_3": 1914.3474975585937, |
| "kl_loss_7": 735.9223663330079, |
| "learning_rate": 0.0008900663520640604, |
| "loss": 1330.9881, |
| "step": 2230 |
| }, |
| { |
| "ce_loss_10": 3.5963090658187866, |
| "ce_loss_13": 3.4863692045211794, |
| "ce_loss_2": 4.697564601898193, |
| "ce_loss_3": 4.4291857242584225, |
| "ce_loss_7": 3.8206969499588013, |
| "epoch": 0.224, |
| "grad_norm": 616.0, |
| "kl_loss_10": 232.82473220825196, |
| "kl_loss_2": 2436.1601440429686, |
| "kl_loss_3": 1975.5118774414063, |
| "kl_loss_7": 746.4637390136719, |
| "learning_rate": 0.0008890717510790764, |
| "loss": 1355.2247, |
| "step": 2240 |
| }, |
| { |
| "ce_loss_10": 3.550048661231995, |
| "ce_loss_13": 3.444666588306427, |
| "ce_loss_2": 4.6846558332443236, |
| "ce_loss_3": 4.415020489692688, |
| "ce_loss_7": 3.7784482836723328, |
| "epoch": 0.225, |
| "grad_norm": 748.0, |
| "kl_loss_10": 234.0259765625, |
| "kl_loss_2": 2511.7267456054688, |
| "kl_loss_3": 2033.7661254882812, |
| "kl_loss_7": 757.7471649169922, |
| "learning_rate": 0.0008880732321458784, |
| "loss": 1391.5023, |
| "step": 2250 |
| }, |
| { |
| "ce_loss_10": 3.5846696734428405, |
| "ce_loss_13": 3.475912594795227, |
| "ce_loss_2": 4.6859821557998655, |
| "ce_loss_3": 4.403112530708313, |
| "ce_loss_7": 3.8075541138648985, |
| "epoch": 0.226, |
| "grad_norm": 768.0, |
| "kl_loss_10": 241.0058906555176, |
| "kl_loss_2": 2434.0427978515627, |
| "kl_loss_3": 1946.942852783203, |
| "kl_loss_7": 750.951953125, |
| "learning_rate": 0.0008870708053195413, |
| "loss": 1371.0441, |
| "step": 2260 |
| }, |
| { |
| "ce_loss_10": 3.6066513299942016, |
| "ce_loss_13": 3.5011353135108947, |
| "ce_loss_2": 4.688438081741333, |
| "ce_loss_3": 4.419037127494812, |
| "ce_loss_7": 3.8243068933486937, |
| "epoch": 0.227, |
| "grad_norm": 612.0, |
| "kl_loss_10": 236.37487716674804, |
| "kl_loss_2": 2419.1595703125, |
| "kl_loss_3": 1947.7892822265626, |
| "kl_loss_7": 736.9884735107422, |
| "learning_rate": 0.0008860644806944918, |
| "loss": 1346.316, |
| "step": 2270 |
| }, |
| { |
| "ce_loss_10": 3.5470305681228638, |
| "ce_loss_13": 3.4408384203910827, |
| "ce_loss_2": 4.675415754318237, |
| "ce_loss_3": 4.405515837669372, |
| "ce_loss_7": 3.7811434388160707, |
| "epoch": 0.228, |
| "grad_norm": 712.0, |
| "kl_loss_10": 236.5175895690918, |
| "kl_loss_2": 2511.8415283203126, |
| "kl_loss_3": 2041.5828552246094, |
| "kl_loss_7": 773.2159851074218, |
| "learning_rate": 0.0008850542684044079, |
| "loss": 1347.2301, |
| "step": 2280 |
| }, |
| { |
| "ce_loss_10": 3.525200033187866, |
| "ce_loss_13": 3.4121009707450867, |
| "ce_loss_2": 4.704805684089661, |
| "ce_loss_3": 4.428252863883972, |
| "ce_loss_7": 3.7681017994880674, |
| "epoch": 0.229, |
| "grad_norm": 744.0, |
| "kl_loss_10": 243.2204231262207, |
| "kl_loss_2": 2609.259875488281, |
| "kl_loss_3": 2137.3057250976562, |
| "kl_loss_7": 781.1770416259766, |
| "learning_rate": 0.0008840401786221159, |
| "loss": 1392.1494, |
| "step": 2290 |
| }, |
| { |
| "ce_loss_10": 3.644639456272125, |
| "ce_loss_13": 3.546596646308899, |
| "ce_loss_2": 4.720036673545837, |
| "ce_loss_3": 4.461656093597412, |
| "ce_loss_7": 3.8639742493629456, |
| "epoch": 0.23, |
| "grad_norm": 736.0, |
| "kl_loss_10": 221.5949806213379, |
| "kl_loss_2": 2383.692004394531, |
| "kl_loss_3": 1920.7404052734375, |
| "kl_loss_7": 726.6697357177734, |
| "learning_rate": 0.000883022221559489, |
| "loss": 1309.8631, |
| "step": 2300 |
| }, |
| { |
| "ce_loss_10": 3.6106560468673705, |
| "ce_loss_13": 3.5103928685188293, |
| "ce_loss_2": 4.718800568580628, |
| "ce_loss_3": 4.453631711006165, |
| "ce_loss_7": 3.833037328720093, |
| "epoch": 0.231, |
| "grad_norm": 668.0, |
| "kl_loss_10": 224.89765014648438, |
| "kl_loss_2": 2469.4252197265623, |
| "kl_loss_3": 2018.495166015625, |
| "kl_loss_7": 748.8079467773438, |
| "learning_rate": 0.0008820004074673434, |
| "loss": 1405.4977, |
| "step": 2310 |
| }, |
| { |
| "ce_loss_10": 3.509887623786926, |
| "ce_loss_13": 3.4120625376701357, |
| "ce_loss_2": 4.630102276802063, |
| "ce_loss_3": 4.358427214622497, |
| "ce_loss_7": 3.748315227031708, |
| "epoch": 0.232, |
| "grad_norm": 604.0, |
| "kl_loss_10": 223.46416931152345, |
| "kl_loss_2": 2484.790771484375, |
| "kl_loss_3": 2005.2869995117187, |
| "kl_loss_7": 761.2884399414063, |
| "learning_rate": 0.0008809747466353355, |
| "loss": 1341.5085, |
| "step": 2320 |
| }, |
| { |
| "ce_loss_10": 3.522110950946808, |
| "ce_loss_13": 3.4228403091430666, |
| "ce_loss_2": 4.653229188919068, |
| "ce_loss_3": 4.378945517539978, |
| "ce_loss_7": 3.7502294540405274, |
| "epoch": 0.233, |
| "grad_norm": 744.0, |
| "kl_loss_10": 224.23116912841797, |
| "kl_loss_2": 2499.1381958007814, |
| "kl_loss_3": 2020.5157836914063, |
| "kl_loss_7": 752.2868743896485, |
| "learning_rate": 0.0008799452493918585, |
| "loss": 1366.2092, |
| "step": 2330 |
| }, |
| { |
| "ce_loss_10": 3.600525939464569, |
| "ce_loss_13": 3.501133692264557, |
| "ce_loss_2": 4.698138499259949, |
| "ce_loss_3": 4.4309428334236145, |
| "ce_loss_7": 3.8393119096755983, |
| "epoch": 0.234, |
| "grad_norm": 656.0, |
| "kl_loss_10": 221.8571762084961, |
| "kl_loss_2": 2452.500280761719, |
| "kl_loss_3": 1976.1439636230468, |
| "kl_loss_7": 759.1389068603515, |
| "learning_rate": 0.0008789119261039385, |
| "loss": 1400.5569, |
| "step": 2340 |
| }, |
| { |
| "ce_loss_10": 3.5126537322998046, |
| "ce_loss_13": 3.412049424648285, |
| "ce_loss_2": 4.627605974674225, |
| "ce_loss_3": 4.359820437431336, |
| "ce_loss_7": 3.747655713558197, |
| "epoch": 0.235, |
| "grad_norm": 584.0, |
| "kl_loss_10": 220.69495086669923, |
| "kl_loss_2": 2450.3417724609376, |
| "kl_loss_3": 1979.037158203125, |
| "kl_loss_7": 752.3414123535156, |
| "learning_rate": 0.0008778747871771292, |
| "loss": 1338.277, |
| "step": 2350 |
| }, |
| { |
| "ce_loss_10": 3.5650462746620177, |
| "ce_loss_13": 3.4650426387786863, |
| "ce_loss_2": 4.640904521942138, |
| "ce_loss_3": 4.3729163646698, |
| "ce_loss_7": 3.78610600233078, |
| "epoch": 0.236, |
| "grad_norm": 628.0, |
| "kl_loss_10": 215.22831954956055, |
| "kl_loss_2": 2399.6547119140623, |
| "kl_loss_3": 1925.4503356933594, |
| "kl_loss_7": 727.8779388427735, |
| "learning_rate": 0.0008768338430554083, |
| "loss": 1316.2055, |
| "step": 2360 |
| }, |
| { |
| "ce_loss_10": 3.572676420211792, |
| "ce_loss_13": 3.4714962005615235, |
| "ce_loss_2": 4.678735136985779, |
| "ce_loss_3": 4.39429270029068, |
| "ce_loss_7": 3.8077693939208985, |
| "epoch": 0.237, |
| "grad_norm": 688.0, |
| "kl_loss_10": 226.92397766113282, |
| "kl_loss_2": 2426.2300659179687, |
| "kl_loss_3": 1939.4405090332032, |
| "kl_loss_7": 752.637564086914, |
| "learning_rate": 0.0008757891042210713, |
| "loss": 1346.3338, |
| "step": 2370 |
| }, |
| { |
| "ce_loss_10": 3.592969560623169, |
| "ce_loss_13": 3.493350553512573, |
| "ce_loss_2": 4.688189601898193, |
| "ce_loss_3": 4.413512086868286, |
| "ce_loss_7": 3.821557307243347, |
| "epoch": 0.238, |
| "grad_norm": 656.0, |
| "kl_loss_10": 225.66336822509766, |
| "kl_loss_2": 2421.9510131835937, |
| "kl_loss_3": 1946.20556640625, |
| "kl_loss_7": 745.2722961425782, |
| "learning_rate": 0.0008747405811946271, |
| "loss": 1343.8345, |
| "step": 2380 |
| }, |
| { |
| "ce_loss_10": 3.49123694896698, |
| "ce_loss_13": 3.389770042896271, |
| "ce_loss_2": 4.654137110710144, |
| "ce_loss_3": 4.386571860313415, |
| "ce_loss_7": 3.731127667427063, |
| "epoch": 0.239, |
| "grad_norm": 616.0, |
| "kl_loss_10": 230.47370223999025, |
| "kl_loss_2": 2561.850231933594, |
| "kl_loss_3": 2084.1000549316404, |
| "kl_loss_7": 769.9209930419922, |
| "learning_rate": 0.0008736882845346905, |
| "loss": 1355.4398, |
| "step": 2390 |
| }, |
| { |
| "ce_loss_10": 3.5909661054611206, |
| "ce_loss_13": 3.4839738249778747, |
| "ce_loss_2": 4.705090403556824, |
| "ce_loss_3": 4.426928949356079, |
| "ce_loss_7": 3.8166149973869326, |
| "epoch": 0.24, |
| "grad_norm": 652.0, |
| "kl_loss_10": 232.27595291137695, |
| "kl_loss_2": 2463.9607543945312, |
| "kl_loss_3": 1976.524102783203, |
| "kl_loss_7": 748.5501831054687, |
| "learning_rate": 0.0008726322248378774, |
| "loss": 1350.1158, |
| "step": 2400 |
| }, |
| { |
| "ce_loss_10": 3.5857128262519837, |
| "ce_loss_13": 3.485344707965851, |
| "ce_loss_2": 4.720745325088501, |
| "ce_loss_3": 4.446980690956115, |
| "ce_loss_7": 3.815141475200653, |
| "epoch": 0.241, |
| "grad_norm": 620.0, |
| "kl_loss_10": 225.08902893066406, |
| "kl_loss_2": 2502.8332275390626, |
| "kl_loss_3": 2020.9147888183593, |
| "kl_loss_7": 748.0698608398437, |
| "learning_rate": 0.0008715724127386971, |
| "loss": 1388.577, |
| "step": 2410 |
| }, |
| { |
| "ce_loss_10": 3.656253182888031, |
| "ce_loss_13": 3.5548863530159, |
| "ce_loss_2": 4.740737318992615, |
| "ce_loss_3": 4.4647379398345945, |
| "ce_loss_7": 3.869425129890442, |
| "epoch": 0.242, |
| "grad_norm": 656.0, |
| "kl_loss_10": 233.72190628051757, |
| "kl_loss_2": 2420.5750244140627, |
| "kl_loss_3": 1941.4000915527345, |
| "kl_loss_7": 733.7942932128906, |
| "learning_rate": 0.0008705088589094458, |
| "loss": 1349.3883, |
| "step": 2420 |
| }, |
| { |
| "ce_loss_10": 3.6831162333488465, |
| "ce_loss_13": 3.5650919318199157, |
| "ce_loss_2": 4.759288740158081, |
| "ce_loss_3": 4.490408134460449, |
| "ce_loss_7": 3.8880489230155946, |
| "epoch": 0.243, |
| "grad_norm": 640.0, |
| "kl_loss_10": 258.1027114868164, |
| "kl_loss_2": 2453.8090209960938, |
| "kl_loss_3": 1977.7547729492187, |
| "kl_loss_7": 746.0192138671875, |
| "learning_rate": 0.0008694415740600988, |
| "loss": 1371.979, |
| "step": 2430 |
| }, |
| { |
| "ce_loss_10": 3.539147210121155, |
| "ce_loss_13": 3.418752908706665, |
| "ce_loss_2": 4.6640907526016235, |
| "ce_loss_3": 4.396868014335633, |
| "ce_loss_7": 3.753141713142395, |
| "epoch": 0.244, |
| "grad_norm": 720.0, |
| "kl_loss_10": 272.4710403442383, |
| "kl_loss_2": 2511.5777099609377, |
| "kl_loss_3": 2045.4482543945312, |
| "kl_loss_7": 744.3600494384766, |
| "learning_rate": 0.0008683705689382025, |
| "loss": 1374.2081, |
| "step": 2440 |
| }, |
| { |
| "ce_loss_10": 3.614233338832855, |
| "ce_loss_13": 3.502686250209808, |
| "ce_loss_2": 4.680193209648133, |
| "ce_loss_3": 4.409785914421081, |
| "ce_loss_7": 3.81562682390213, |
| "epoch": 0.245, |
| "grad_norm": 680.0, |
| "kl_loss_10": 242.92661514282227, |
| "kl_loss_2": 2418.696484375, |
| "kl_loss_3": 1945.9917602539062, |
| "kl_loss_7": 727.0407897949219, |
| "learning_rate": 0.0008672958543287666, |
| "loss": 1361.5771, |
| "step": 2450 |
| }, |
| { |
| "ce_loss_10": 3.6207616090774537, |
| "ce_loss_13": 3.5146057486534117, |
| "ce_loss_2": 4.6799437522888185, |
| "ce_loss_3": 4.408400678634644, |
| "ce_loss_7": 3.8393305063247682, |
| "epoch": 0.246, |
| "grad_norm": 640.0, |
| "kl_loss_10": 233.26868438720703, |
| "kl_loss_2": 2373.7197509765624, |
| "kl_loss_3": 1900.9493347167968, |
| "kl_loss_7": 737.9279724121094, |
| "learning_rate": 0.0008662174410541554, |
| "loss": 1323.3875, |
| "step": 2460 |
| }, |
| { |
| "ce_loss_10": 3.5795403718948364, |
| "ce_loss_13": 3.4791687726974487, |
| "ce_loss_2": 4.657073163986206, |
| "ce_loss_3": 4.389124321937561, |
| "ce_loss_7": 3.797624135017395, |
| "epoch": 0.247, |
| "grad_norm": 688.0, |
| "kl_loss_10": 228.68382720947267, |
| "kl_loss_2": 2405.7741943359374, |
| "kl_loss_3": 1929.0893249511719, |
| "kl_loss_7": 730.4046020507812, |
| "learning_rate": 0.0008651353399739787, |
| "loss": 1361.2713, |
| "step": 2470 |
| }, |
| { |
| "ce_loss_10": 3.6015311241149903, |
| "ce_loss_13": 3.5007805585861207, |
| "ce_loss_2": 4.693076491355896, |
| "ce_loss_3": 4.420244932174683, |
| "ce_loss_7": 3.8255343675613402, |
| "epoch": 0.248, |
| "grad_norm": 628.0, |
| "kl_loss_10": 225.77268676757814, |
| "kl_loss_2": 2413.6783447265625, |
| "kl_loss_3": 1937.1076232910157, |
| "kl_loss_7": 735.3206512451172, |
| "learning_rate": 0.0008640495619849821, |
| "loss": 1345.3404, |
| "step": 2480 |
| }, |
| { |
| "ce_loss_10": 3.5668503522872923, |
| "ce_loss_13": 3.4637187004089354, |
| "ce_loss_2": 4.644854807853699, |
| "ce_loss_3": 4.374481606483459, |
| "ce_loss_7": 3.791785490512848, |
| "epoch": 0.249, |
| "grad_norm": 616.0, |
| "kl_loss_10": 223.47670059204103, |
| "kl_loss_2": 2406.82578125, |
| "kl_loss_3": 1930.5429321289062, |
| "kl_loss_7": 738.2828582763672, |
| "learning_rate": 0.0008629601180209381, |
| "loss": 1326.733, |
| "step": 2490 |
| }, |
| { |
| "ce_loss_10": 3.5605925559997558, |
| "ce_loss_13": 3.4623565435409547, |
| "ce_loss_2": 4.648912143707276, |
| "ce_loss_3": 4.37358832359314, |
| "ce_loss_7": 3.7822588205337526, |
| "epoch": 0.25, |
| "grad_norm": 588.0, |
| "kl_loss_10": 221.60515823364258, |
| "kl_loss_2": 2408.634729003906, |
| "kl_loss_3": 1918.1406311035157, |
| "kl_loss_7": 733.2383361816406, |
| "learning_rate": 0.000861867019052535, |
| "loss": 1350.9314, |
| "step": 2500 |
| }, |
| { |
| "ce_loss_10": 3.4750850677490233, |
| "ce_loss_13": 3.3757749915122988, |
| "ce_loss_2": 4.618335509300232, |
| "ce_loss_3": 4.344382691383362, |
| "ce_loss_7": 3.7118528127670287, |
| "epoch": 0.251, |
| "grad_norm": 664.0, |
| "kl_loss_10": 225.6886344909668, |
| "kl_loss_2": 2520.0691040039064, |
| "kl_loss_3": 2028.4780883789062, |
| "kl_loss_7": 750.8930267333984, |
| "learning_rate": 0.0008607702760872678, |
| "loss": 1377.2211, |
| "step": 2510 |
| }, |
| { |
| "ce_loss_10": 3.5948320031166077, |
| "ce_loss_13": 3.493862783908844, |
| "ce_loss_2": 4.663220858573913, |
| "ce_loss_3": 4.39898452758789, |
| "ce_loss_7": 3.8143251180648803, |
| "epoch": 0.252, |
| "grad_norm": 736.0, |
| "kl_loss_10": 220.9385528564453, |
| "kl_loss_2": 2382.33095703125, |
| "kl_loss_3": 1919.1317260742187, |
| "kl_loss_7": 728.4733703613281, |
| "learning_rate": 0.0008596699001693256, |
| "loss": 1356.6151, |
| "step": 2520 |
| }, |
| { |
| "ce_loss_10": 3.6045937299728394, |
| "ce_loss_13": 3.5089424014091493, |
| "ce_loss_2": 4.674148344993592, |
| "ce_loss_3": 4.401587581634521, |
| "ce_loss_7": 3.8156301379203796, |
| "epoch": 0.253, |
| "grad_norm": 664.0, |
| "kl_loss_10": 222.60222702026368, |
| "kl_loss_2": 2399.647021484375, |
| "kl_loss_3": 1923.217791748047, |
| "kl_loss_7": 722.3135375976562, |
| "learning_rate": 0.0008585659023794818, |
| "loss": 1357.2354, |
| "step": 2530 |
| }, |
| { |
| "ce_loss_10": 3.5605056166648863, |
| "ce_loss_13": 3.458607590198517, |
| "ce_loss_2": 4.6924147605896, |
| "ce_loss_3": 4.421391654014587, |
| "ce_loss_7": 3.799249517917633, |
| "epoch": 0.254, |
| "grad_norm": 660.0, |
| "kl_loss_10": 233.0737617492676, |
| "kl_loss_2": 2499.324670410156, |
| "kl_loss_3": 2030.4549194335937, |
| "kl_loss_7": 761.279296875, |
| "learning_rate": 0.0008574582938349817, |
| "loss": 1364.7606, |
| "step": 2540 |
| }, |
| { |
| "ce_loss_10": 3.5620136737823485, |
| "ce_loss_13": 3.450424087047577, |
| "ce_loss_2": 4.679884123802185, |
| "ce_loss_3": 4.403433465957642, |
| "ce_loss_7": 3.8059414982795716, |
| "epoch": 0.255, |
| "grad_norm": 648.0, |
| "kl_loss_10": 238.74318084716796, |
| "kl_loss_2": 2486.331640625, |
| "kl_loss_3": 1999.8115600585938, |
| "kl_loss_7": 776.2368225097656, |
| "learning_rate": 0.0008563470856894315, |
| "loss": 1329.6849, |
| "step": 2550 |
| }, |
| { |
| "ce_loss_10": 3.540405642986298, |
| "ce_loss_13": 3.4457826972007752, |
| "ce_loss_2": 4.656697821617127, |
| "ce_loss_3": 4.386443245410919, |
| "ce_loss_7": 3.772416353225708, |
| "epoch": 0.256, |
| "grad_norm": 760.0, |
| "kl_loss_10": 221.72702865600587, |
| "kl_loss_2": 2443.3952514648436, |
| "kl_loss_3": 1969.1475952148437, |
| "kl_loss_7": 745.7592987060547, |
| "learning_rate": 0.0008552322891326845, |
| "loss": 1346.8541, |
| "step": 2560 |
| }, |
| { |
| "ce_loss_10": 3.5136868953704834, |
| "ce_loss_13": 3.415074276924133, |
| "ce_loss_2": 4.637244987487793, |
| "ce_loss_3": 4.365770423412323, |
| "ce_loss_7": 3.741610062122345, |
| "epoch": 0.257, |
| "grad_norm": 788.0, |
| "kl_loss_10": 218.68516159057617, |
| "kl_loss_2": 2477.789599609375, |
| "kl_loss_3": 2001.3069702148437, |
| "kl_loss_7": 743.3714080810547, |
| "learning_rate": 0.0008541139153907296, |
| "loss": 1329.1979, |
| "step": 2570 |
| }, |
| { |
| "ce_loss_10": 3.472187507152557, |
| "ce_loss_13": 3.3729379415512084, |
| "ce_loss_2": 4.581104445457458, |
| "ce_loss_3": 4.308674609661102, |
| "ce_loss_7": 3.69760080575943, |
| "epoch": 0.258, |
| "grad_norm": 636.0, |
| "kl_loss_10": 213.4689498901367, |
| "kl_loss_2": 2453.299768066406, |
| "kl_loss_3": 1976.8992919921875, |
| "kl_loss_7": 745.6326965332031, |
| "learning_rate": 0.0008529919757255782, |
| "loss": 1354.7893, |
| "step": 2580 |
| }, |
| { |
| "ce_loss_10": 3.500008797645569, |
| "ce_loss_13": 3.408738708496094, |
| "ce_loss_2": 4.560009336471557, |
| "ce_loss_3": 4.2931175351142885, |
| "ce_loss_7": 3.716734218597412, |
| "epoch": 0.259, |
| "grad_norm": 624.0, |
| "kl_loss_10": 208.80025100708008, |
| "kl_loss_2": 2371.1708251953123, |
| "kl_loss_3": 1897.6802124023438, |
| "kl_loss_7": 721.6227478027344, |
| "learning_rate": 0.0008518664814351503, |
| "loss": 1306.301, |
| "step": 2590 |
| }, |
| { |
| "ce_loss_10": 3.472637712955475, |
| "ce_loss_13": 3.37472482919693, |
| "ce_loss_2": 4.598471093177795, |
| "ce_loss_3": 4.321799778938294, |
| "ce_loss_7": 3.7131651520729063, |
| "epoch": 0.26, |
| "grad_norm": 644.0, |
| "kl_loss_10": 222.20911254882813, |
| "kl_loss_2": 2491.116162109375, |
| "kl_loss_3": 2007.4335876464843, |
| "kl_loss_7": 764.1704193115235, |
| "learning_rate": 0.0008507374438531607, |
| "loss": 1407.2535, |
| "step": 2600 |
| }, |
| { |
| "ce_loss_10": 3.447394275665283, |
| "ce_loss_13": 3.3539512395858764, |
| "ce_loss_2": 4.5548292875289915, |
| "ce_loss_3": 4.286789774894714, |
| "ce_loss_7": 3.6768516659736634, |
| "epoch": 0.261, |
| "grad_norm": 676.0, |
| "kl_loss_10": 214.65092697143555, |
| "kl_loss_2": 2437.03447265625, |
| "kl_loss_3": 1973.9089477539062, |
| "kl_loss_7": 738.8113952636719, |
| "learning_rate": 0.0008496048743490053, |
| "loss": 1332.7279, |
| "step": 2610 |
| }, |
| { |
| "ce_loss_10": 3.597834813594818, |
| "ce_loss_13": 3.5061428785324096, |
| "ce_loss_2": 4.655121803283691, |
| "ce_loss_3": 4.391561770439148, |
| "ce_loss_7": 3.814839816093445, |
| "epoch": 0.262, |
| "grad_norm": 564.0, |
| "kl_loss_10": 212.99711074829102, |
| "kl_loss_2": 2362.529577636719, |
| "kl_loss_3": 1891.9757995605469, |
| "kl_loss_7": 720.1662811279297, |
| "learning_rate": 0.0008484687843276469, |
| "loss": 1316.5832, |
| "step": 2620 |
| }, |
| { |
| "ce_loss_10": 3.533200740814209, |
| "ce_loss_13": 3.4373727798461915, |
| "ce_loss_2": 4.636826205253601, |
| "ce_loss_3": 4.3528993129730225, |
| "ce_loss_7": 3.7636064171791075, |
| "epoch": 0.263, |
| "grad_norm": 688.0, |
| "kl_loss_10": 217.95888977050782, |
| "kl_loss_2": 2432.091143798828, |
| "kl_loss_3": 1936.0632568359374, |
| "kl_loss_7": 738.968881225586, |
| "learning_rate": 0.0008473291852294987, |
| "loss": 1361.4943, |
| "step": 2630 |
| }, |
| { |
| "ce_loss_10": 3.5451728224754335, |
| "ce_loss_13": 3.446604347229004, |
| "ce_loss_2": 4.630346298217773, |
| "ce_loss_3": 4.3619812488555905, |
| "ce_loss_7": 3.7699208855628967, |
| "epoch": 0.264, |
| "grad_norm": 672.0, |
| "kl_loss_10": 220.66769561767578, |
| "kl_loss_2": 2436.2069458007813, |
| "kl_loss_3": 1956.8639526367188, |
| "kl_loss_7": 742.7248840332031, |
| "learning_rate": 0.0008461860885303114, |
| "loss": 1327.3721, |
| "step": 2640 |
| }, |
| { |
| "ce_loss_10": 3.5666414141654967, |
| "ce_loss_13": 3.4715107679367065, |
| "ce_loss_2": 4.639662265777588, |
| "ce_loss_3": 4.371685028076172, |
| "ce_loss_7": 3.788040292263031, |
| "epoch": 0.265, |
| "grad_norm": 656.0, |
| "kl_loss_10": 216.69636611938478, |
| "kl_loss_2": 2373.723107910156, |
| "kl_loss_3": 1899.1220764160157, |
| "kl_loss_7": 725.1952423095703, |
| "learning_rate": 0.000845039505741056, |
| "loss": 1327.8555, |
| "step": 2650 |
| }, |
| { |
| "ce_loss_10": 3.5541250467300416, |
| "ce_loss_13": 3.4555353045463564, |
| "ce_loss_2": 4.645513963699341, |
| "ce_loss_3": 4.378093981742859, |
| "ce_loss_7": 3.7833709001541136, |
| "epoch": 0.266, |
| "grad_norm": 668.0, |
| "kl_loss_10": 224.05798721313477, |
| "kl_loss_2": 2449.707385253906, |
| "kl_loss_3": 1967.4787109375, |
| "kl_loss_7": 750.5478302001953, |
| "learning_rate": 0.0008438894484078086, |
| "loss": 1378.657, |
| "step": 2660 |
| }, |
| { |
| "ce_loss_10": 3.557729125022888, |
| "ce_loss_13": 3.4628395080566405, |
| "ce_loss_2": 4.638984179496765, |
| "ce_loss_3": 4.374520492553711, |
| "ce_loss_7": 3.7801038026809692, |
| "epoch": 0.267, |
| "grad_norm": 796.0, |
| "kl_loss_10": 218.22870254516602, |
| "kl_loss_2": 2393.3899047851564, |
| "kl_loss_3": 1931.0333312988282, |
| "kl_loss_7": 732.3969909667969, |
| "learning_rate": 0.0008427359281116334, |
| "loss": 1329.4188, |
| "step": 2670 |
| }, |
| { |
| "ce_loss_10": 3.4619020819664, |
| "ce_loss_13": 3.3649930715560914, |
| "ce_loss_2": 4.586506628990174, |
| "ce_loss_3": 4.3114288449287415, |
| "ce_loss_7": 3.6977506399154665, |
| "epoch": 0.268, |
| "grad_norm": 560.0, |
| "kl_loss_10": 218.7227699279785, |
| "kl_loss_2": 2471.7220703125, |
| "kl_loss_3": 1986.8973815917968, |
| "kl_loss_7": 744.8811431884766, |
| "learning_rate": 0.0008415789564684673, |
| "loss": 1344.4947, |
| "step": 2680 |
| }, |
| { |
| "ce_loss_10": 3.7084735155105593, |
| "ce_loss_13": 3.610187065601349, |
| "ce_loss_2": 4.759761667251587, |
| "ce_loss_3": 4.487373423576355, |
| "ce_loss_7": 3.9243152022361754, |
| "epoch": 0.269, |
| "grad_norm": 756.0, |
| "kl_loss_10": 223.18955688476564, |
| "kl_loss_2": 2329.3449951171874, |
| "kl_loss_3": 1847.8426208496094, |
| "kl_loss_7": 721.1707153320312, |
| "learning_rate": 0.0008404185451290017, |
| "loss": 1296.1146, |
| "step": 2690 |
| }, |
| { |
| "ce_loss_10": 3.578732097148895, |
| "ce_loss_13": 3.4770421504974367, |
| "ce_loss_2": 4.659151983261109, |
| "ce_loss_3": 4.38085663318634, |
| "ce_loss_7": 3.7948765754699707, |
| "epoch": 0.27, |
| "grad_norm": 692.0, |
| "kl_loss_10": 224.61487731933593, |
| "kl_loss_2": 2417.559912109375, |
| "kl_loss_3": 1939.3710815429688, |
| "kl_loss_7": 727.4687561035156, |
| "learning_rate": 0.0008392547057785661, |
| "loss": 1317.3512, |
| "step": 2700 |
| }, |
| { |
| "ce_loss_10": 3.5002851486206055, |
| "ce_loss_13": 3.396597516536713, |
| "ce_loss_2": 4.633592844009399, |
| "ce_loss_3": 4.365511727333069, |
| "ce_loss_7": 3.738453209400177, |
| "epoch": 0.271, |
| "grad_norm": 732.0, |
| "kl_loss_10": 231.73975296020507, |
| "kl_loss_2": 2517.132354736328, |
| "kl_loss_3": 2044.1573425292968, |
| "kl_loss_7": 768.5197204589844, |
| "learning_rate": 0.0008380874501370098, |
| "loss": 1329.0642, |
| "step": 2710 |
| }, |
| { |
| "ce_loss_10": 3.5027819752693174, |
| "ce_loss_13": 3.4010127544403077, |
| "ce_loss_2": 4.628546047210693, |
| "ce_loss_3": 4.359855842590332, |
| "ce_loss_7": 3.7310682773590087, |
| "epoch": 0.272, |
| "grad_norm": 628.0, |
| "kl_loss_10": 236.13679275512695, |
| "kl_loss_2": 2503.883825683594, |
| "kl_loss_3": 2020.1560424804688, |
| "kl_loss_7": 758.8711700439453, |
| "learning_rate": 0.0008369167899585841, |
| "loss": 1363.7068, |
| "step": 2720 |
| }, |
| { |
| "ce_loss_10": 3.6181455850601196, |
| "ce_loss_13": 3.521961879730225, |
| "ce_loss_2": 4.664963984489441, |
| "ce_loss_3": 4.396141123771668, |
| "ce_loss_7": 3.839101779460907, |
| "epoch": 0.273, |
| "grad_norm": 636.0, |
| "kl_loss_10": 223.16615371704103, |
| "kl_loss_2": 2348.37099609375, |
| "kl_loss_3": 1879.9346130371093, |
| "kl_loss_7": 730.2560852050781, |
| "learning_rate": 0.0008357427370318238, |
| "loss": 1337.943, |
| "step": 2730 |
| }, |
| { |
| "ce_loss_10": 3.571904718875885, |
| "ce_loss_13": 3.4762736320495606, |
| "ce_loss_2": 4.677034759521485, |
| "ce_loss_3": 4.40289398431778, |
| "ce_loss_7": 3.7918145298957824, |
| "epoch": 0.274, |
| "grad_norm": 772.0, |
| "kl_loss_10": 222.57760772705078, |
| "kl_loss_2": 2451.346435546875, |
| "kl_loss_3": 1973.4313354492188, |
| "kl_loss_7": 730.7371429443359, |
| "learning_rate": 0.0008345653031794292, |
| "loss": 1347.6243, |
| "step": 2740 |
| }, |
| { |
| "ce_loss_10": 3.5737530469894407, |
| "ce_loss_13": 3.4740692615509032, |
| "ce_loss_2": 4.659031462669373, |
| "ce_loss_3": 4.387771344184875, |
| "ce_loss_7": 3.792672348022461, |
| "epoch": 0.275, |
| "grad_norm": 672.0, |
| "kl_loss_10": 222.67840805053712, |
| "kl_loss_2": 2406.277941894531, |
| "kl_loss_3": 1924.3234985351562, |
| "kl_loss_7": 730.7620574951172, |
| "learning_rate": 0.0008333845002581458, |
| "loss": 1320.2523, |
| "step": 2750 |
| }, |
| { |
| "ce_loss_10": 3.498860251903534, |
| "ce_loss_13": 3.400104033946991, |
| "ce_loss_2": 4.611243772506714, |
| "ce_loss_3": 4.342458128929138, |
| "ce_loss_7": 3.733369469642639, |
| "epoch": 0.276, |
| "grad_norm": 644.0, |
| "kl_loss_10": 224.65963973999024, |
| "kl_loss_2": 2495.7015869140623, |
| "kl_loss_3": 2015.1633422851562, |
| "kl_loss_7": 762.1438781738282, |
| "learning_rate": 0.0008322003401586462, |
| "loss": 1364.4495, |
| "step": 2760 |
| }, |
| { |
| "ce_loss_10": 3.532784569263458, |
| "ce_loss_13": 3.440683197975159, |
| "ce_loss_2": 4.59234881401062, |
| "ce_loss_3": 4.320498394966125, |
| "ce_loss_7": 3.7502055525779725, |
| "epoch": 0.277, |
| "grad_norm": 724.0, |
| "kl_loss_10": 211.5718635559082, |
| "kl_loss_2": 2343.010675048828, |
| "kl_loss_3": 1873.985821533203, |
| "kl_loss_7": 709.5305114746094, |
| "learning_rate": 0.0008310128348054094, |
| "loss": 1276.2701, |
| "step": 2770 |
| }, |
| { |
| "ce_loss_10": 3.5014058470726015, |
| "ce_loss_13": 3.406921911239624, |
| "ce_loss_2": 4.603280448913575, |
| "ce_loss_3": 4.329492771625519, |
| "ce_loss_7": 3.7248639822006226, |
| "epoch": 0.278, |
| "grad_norm": 652.0, |
| "kl_loss_10": 214.84819107055665, |
| "kl_loss_2": 2431.7943481445313, |
| "kl_loss_3": 1951.13515625, |
| "kl_loss_7": 731.5488677978516, |
| "learning_rate": 0.0008298219961566008, |
| "loss": 1329.707, |
| "step": 2780 |
| }, |
| { |
| "ce_loss_10": 3.4713513970375063, |
| "ce_loss_13": 3.3771822571754457, |
| "ce_loss_2": 4.587963104248047, |
| "ce_loss_3": 4.32047404050827, |
| "ce_loss_7": 3.711584746837616, |
| "epoch": 0.279, |
| "grad_norm": 644.0, |
| "kl_loss_10": 217.99566726684571, |
| "kl_loss_2": 2492.9334106445312, |
| "kl_loss_3": 2016.429022216797, |
| "kl_loss_7": 761.9394226074219, |
| "learning_rate": 0.0008286278362039527, |
| "loss": 1336.5162, |
| "step": 2790 |
| }, |
| { |
| "ce_loss_10": 3.496282184123993, |
| "ce_loss_13": 3.3998995065689086, |
| "ce_loss_2": 4.622646689414978, |
| "ce_loss_3": 4.352741932868957, |
| "ce_loss_7": 3.7300979018211367, |
| "epoch": 0.28, |
| "grad_norm": 592.0, |
| "kl_loss_10": 216.96264114379883, |
| "kl_loss_2": 2489.9998046875, |
| "kl_loss_3": 2008.0425537109375, |
| "kl_loss_7": 746.8909149169922, |
| "learning_rate": 0.0008274303669726426, |
| "loss": 1325.7328, |
| "step": 2800 |
| }, |
| { |
| "ce_loss_10": 3.4048958301544188, |
| "ce_loss_13": 3.3045366764068604, |
| "ce_loss_2": 4.5690556287765505, |
| "ce_loss_3": 4.298348617553711, |
| "ce_loss_7": 3.6378442645072937, |
| "epoch": 0.281, |
| "grad_norm": 684.0, |
| "kl_loss_10": 218.18540115356444, |
| "kl_loss_2": 2561.6716186523436, |
| "kl_loss_3": 2080.7119262695314, |
| "kl_loss_7": 743.8994750976562, |
| "learning_rate": 0.0008262296005211721, |
| "loss": 1337.6219, |
| "step": 2810 |
| }, |
| { |
| "ce_loss_10": 3.5260050296783447, |
| "ce_loss_13": 3.428924763202667, |
| "ce_loss_2": 4.642134022712708, |
| "ce_loss_3": 4.368475294113159, |
| "ce_loss_7": 3.7550152063369753, |
| "epoch": 0.282, |
| "grad_norm": 600.0, |
| "kl_loss_10": 216.54320907592773, |
| "kl_loss_2": 2444.2397338867186, |
| "kl_loss_3": 1975.6794677734374, |
| "kl_loss_7": 734.2523712158203, |
| "learning_rate": 0.0008250255489412463, |
| "loss": 1322.247, |
| "step": 2820 |
| }, |
| { |
| "ce_loss_10": 3.629942464828491, |
| "ce_loss_13": 3.532360863685608, |
| "ce_loss_2": 4.7163821935653685, |
| "ce_loss_3": 4.444535660743713, |
| "ce_loss_7": 3.846136474609375, |
| "epoch": 0.283, |
| "grad_norm": 628.0, |
| "kl_loss_10": 214.22548904418946, |
| "kl_loss_2": 2410.5466918945312, |
| "kl_loss_3": 1930.2673034667969, |
| "kl_loss_7": 714.048681640625, |
| "learning_rate": 0.0008238182243576511, |
| "loss": 1325.0883, |
| "step": 2830 |
| }, |
| { |
| "ce_loss_10": 3.5913167357444764, |
| "ce_loss_13": 3.5031124353408813, |
| "ce_loss_2": 4.611292886734009, |
| "ce_loss_3": 4.339277529716492, |
| "ce_loss_7": 3.796242094039917, |
| "epoch": 0.284, |
| "grad_norm": 620.0, |
| "kl_loss_10": 208.4808135986328, |
| "kl_loss_2": 2294.337286376953, |
| "kl_loss_3": 1814.4247924804688, |
| "kl_loss_7": 695.5996673583984, |
| "learning_rate": 0.0008226076389281315, |
| "loss": 1277.3086, |
| "step": 2840 |
| }, |
| { |
| "ce_loss_10": 3.632950210571289, |
| "ce_loss_13": 3.542364180088043, |
| "ce_loss_2": 4.697378945350647, |
| "ce_loss_3": 4.428278470039368, |
| "ce_loss_7": 3.8434852004051208, |
| "epoch": 0.285, |
| "grad_norm": 592.0, |
| "kl_loss_10": 210.92243499755858, |
| "kl_loss_2": 2375.7556274414064, |
| "kl_loss_3": 1902.3470825195313, |
| "kl_loss_7": 701.8125823974609, |
| "learning_rate": 0.0008213938048432696, |
| "loss": 1285.7082, |
| "step": 2850 |
| }, |
| { |
| "ce_loss_10": 3.561896014213562, |
| "ce_loss_13": 3.4673075318336486, |
| "ce_loss_2": 4.635823488235474, |
| "ce_loss_3": 4.3728371381759645, |
| "ce_loss_7": 3.780589020252228, |
| "epoch": 0.286, |
| "grad_norm": 616.0, |
| "kl_loss_10": 216.6977653503418, |
| "kl_loss_2": 2390.834924316406, |
| "kl_loss_3": 1924.6818054199218, |
| "kl_loss_7": 726.8750396728516, |
| "learning_rate": 0.0008201767343263612, |
| "loss": 1324.6124, |
| "step": 2860 |
| }, |
| { |
| "ce_loss_10": 3.4997401237487793, |
| "ce_loss_13": 3.4044744968414307, |
| "ce_loss_2": 4.604890465736389, |
| "ce_loss_3": 4.338030159473419, |
| "ce_loss_7": 3.7291186928749083, |
| "epoch": 0.287, |
| "grad_norm": 616.0, |
| "kl_loss_10": 213.92771530151367, |
| "kl_loss_2": 2444.1182250976562, |
| "kl_loss_3": 1971.163818359375, |
| "kl_loss_7": 731.3478240966797, |
| "learning_rate": 0.0008189564396332927, |
| "loss": 1291.9086, |
| "step": 2870 |
| }, |
| { |
| "ce_loss_10": 3.480617916584015, |
| "ce_loss_13": 3.388473629951477, |
| "ce_loss_2": 4.600887513160705, |
| "ce_loss_3": 4.323178672790528, |
| "ce_loss_7": 3.7104127168655396, |
| "epoch": 0.288, |
| "grad_norm": 668.0, |
| "kl_loss_10": 212.88904190063477, |
| "kl_loss_2": 2441.765899658203, |
| "kl_loss_3": 1961.8893615722657, |
| "kl_loss_7": 728.4373413085938, |
| "learning_rate": 0.0008177329330524181, |
| "loss": 1342.4608, |
| "step": 2880 |
| }, |
| { |
| "ce_loss_10": 3.5435534834861757, |
| "ce_loss_13": 3.4502355217933656, |
| "ce_loss_2": 4.6120285987854, |
| "ce_loss_3": 4.346097040176391, |
| "ce_loss_7": 3.762561321258545, |
| "epoch": 0.289, |
| "grad_norm": 648.0, |
| "kl_loss_10": 212.22290649414063, |
| "kl_loss_2": 2358.1793823242188, |
| "kl_loss_3": 1890.9413208007813, |
| "kl_loss_7": 714.5174743652344, |
| "learning_rate": 0.0008165062269044352, |
| "loss": 1305.3231, |
| "step": 2890 |
| }, |
| { |
| "ce_loss_10": 3.4996484994888304, |
| "ce_loss_13": 3.401354455947876, |
| "ce_loss_2": 4.609268927574158, |
| "ce_loss_3": 4.3294067740440365, |
| "ce_loss_7": 3.723408377170563, |
| "epoch": 0.29, |
| "grad_norm": 660.0, |
| "kl_loss_10": 216.81241302490236, |
| "kl_loss_2": 2451.4824340820314, |
| "kl_loss_3": 1968.3146179199218, |
| "kl_loss_7": 729.5468353271484, |
| "learning_rate": 0.0008152763335422613, |
| "loss": 1337.7896, |
| "step": 2900 |
| }, |
| { |
| "ce_loss_10": 3.4890666246414184, |
| "ce_loss_13": 3.392501711845398, |
| "ce_loss_2": 4.58982219696045, |
| "ce_loss_3": 4.312074947357178, |
| "ce_loss_7": 3.713588225841522, |
| "epoch": 0.291, |
| "grad_norm": 664.0, |
| "kl_loss_10": 218.38675384521486, |
| "kl_loss_2": 2445.5037841796875, |
| "kl_loss_3": 1949.8568176269532, |
| "kl_loss_7": 729.6879028320312, |
| "learning_rate": 0.0008140432653509088, |
| "loss": 1317.595, |
| "step": 2910 |
| }, |
| { |
| "ce_loss_10": 3.538894033432007, |
| "ce_loss_13": 3.4391178250312806, |
| "ce_loss_2": 4.60951418876648, |
| "ce_loss_3": 4.337265026569367, |
| "ce_loss_7": 3.7542282700538636, |
| "epoch": 0.292, |
| "grad_norm": 576.0, |
| "kl_loss_10": 218.85857162475585, |
| "kl_loss_2": 2397.1072692871094, |
| "kl_loss_3": 1916.8259216308593, |
| "kl_loss_7": 718.4374481201172, |
| "learning_rate": 0.0008128070347473608, |
| "loss": 1302.2107, |
| "step": 2920 |
| }, |
| { |
| "ce_loss_10": 3.5429399847984313, |
| "ce_loss_13": 3.447796130180359, |
| "ce_loss_2": 4.665868854522705, |
| "ce_loss_3": 4.389448404312134, |
| "ce_loss_7": 3.7667205929756165, |
| "epoch": 0.293, |
| "grad_norm": 664.0, |
| "kl_loss_10": 216.54725646972656, |
| "kl_loss_2": 2487.7160583496093, |
| "kl_loss_3": 2004.9421325683593, |
| "kl_loss_7": 736.1060913085937, |
| "learning_rate": 0.0008115676541804455, |
| "loss": 1333.5637, |
| "step": 2930 |
| }, |
| { |
| "ce_loss_10": 3.5453550815582275, |
| "ce_loss_13": 3.4535977363586428, |
| "ce_loss_2": 4.623500943183899, |
| "ce_loss_3": 4.348728823661804, |
| "ce_loss_7": 3.760838878154755, |
| "epoch": 0.294, |
| "grad_norm": 580.0, |
| "kl_loss_10": 209.94191284179686, |
| "kl_loss_2": 2400.48662109375, |
| "kl_loss_3": 1909.5526062011718, |
| "kl_loss_7": 710.1752807617188, |
| "learning_rate": 0.0008103251361307119, |
| "loss": 1325.5172, |
| "step": 2940 |
| }, |
| { |
| "ce_loss_10": 3.578377163410187, |
| "ce_loss_13": 3.4808244347572326, |
| "ce_loss_2": 4.6591003894805905, |
| "ce_loss_3": 4.395820617675781, |
| "ce_loss_7": 3.793817377090454, |
| "epoch": 0.295, |
| "grad_norm": 616.0, |
| "kl_loss_10": 214.81473617553712, |
| "kl_loss_2": 2396.3223205566405, |
| "kl_loss_3": 1926.4922485351562, |
| "kl_loss_7": 722.0272766113281, |
| "learning_rate": 0.0008090794931103026, |
| "loss": 1300.3234, |
| "step": 2950 |
| }, |
| { |
| "ce_loss_10": 3.566417765617371, |
| "ce_loss_13": 3.475232172012329, |
| "ce_loss_2": 4.628555154800415, |
| "ce_loss_3": 4.358175444602966, |
| "ce_loss_7": 3.7831589698791506, |
| "epoch": 0.296, |
| "grad_norm": 692.0, |
| "kl_loss_10": 209.84390869140626, |
| "kl_loss_2": 2350.2305419921877, |
| "kl_loss_3": 1877.9652465820313, |
| "kl_loss_7": 713.7039794921875, |
| "learning_rate": 0.0008078307376628291, |
| "loss": 1303.6331, |
| "step": 2960 |
| }, |
| { |
| "ce_loss_10": 3.6232991099357603, |
| "ce_loss_13": 3.534627139568329, |
| "ce_loss_2": 4.6475036382675174, |
| "ce_loss_3": 4.389086437225342, |
| "ce_loss_7": 3.83059047460556, |
| "epoch": 0.297, |
| "grad_norm": 644.0, |
| "kl_loss_10": 205.1537940979004, |
| "kl_loss_2": 2274.82734375, |
| "kl_loss_3": 1823.2497436523438, |
| "kl_loss_7": 686.9072265625, |
| "learning_rate": 0.000806578882363245, |
| "loss": 1259.2264, |
| "step": 2970 |
| }, |
| { |
| "ce_loss_10": 3.536562275886536, |
| "ce_loss_13": 3.447048234939575, |
| "ce_loss_2": 4.597748541831971, |
| "ce_loss_3": 4.3311933994293215, |
| "ce_loss_7": 3.7559500217437742, |
| "epoch": 0.298, |
| "grad_norm": 736.0, |
| "kl_loss_10": 208.43729248046876, |
| "kl_loss_2": 2344.390216064453, |
| "kl_loss_3": 1878.6112243652344, |
| "kl_loss_7": 714.4485260009766, |
| "learning_rate": 0.0008053239398177191, |
| "loss": 1329.3172, |
| "step": 2980 |
| }, |
| { |
| "ce_loss_10": 3.524178981781006, |
| "ce_loss_13": 3.4312392354011534, |
| "ce_loss_2": 4.604809284210205, |
| "ce_loss_3": 4.337883043289184, |
| "ce_loss_7": 3.7429209470748903, |
| "epoch": 0.299, |
| "grad_norm": 684.0, |
| "kl_loss_10": 211.32650604248047, |
| "kl_loss_2": 2394.308056640625, |
| "kl_loss_3": 1917.52822265625, |
| "kl_loss_7": 709.9231262207031, |
| "learning_rate": 0.0008040659226635089, |
| "loss": 1341.8297, |
| "step": 2990 |
| }, |
| { |
| "ce_loss_10": 3.65326806306839, |
| "ce_loss_13": 3.555258011817932, |
| "ce_loss_2": 4.710744786262512, |
| "ce_loss_3": 4.444170761108398, |
| "ce_loss_7": 3.8668533086776735, |
| "epoch": 0.3, |
| "grad_norm": 640.0, |
| "kl_loss_10": 219.24570388793944, |
| "kl_loss_2": 2376.9404907226562, |
| "kl_loss_3": 1902.857159423828, |
| "kl_loss_7": 725.9926879882812, |
| "learning_rate": 0.0008028048435688333, |
| "loss": 1298.4502, |
| "step": 3000 |
| }, |
| { |
| "ce_loss_10": 3.521394634246826, |
| "ce_loss_13": 3.4270112991333006, |
| "ce_loss_2": 4.624356460571289, |
| "ce_loss_3": 4.355751609802246, |
| "ce_loss_7": 3.7494575500488283, |
| "epoch": 0.301, |
| "grad_norm": 716.0, |
| "kl_loss_10": 217.2972724914551, |
| "kl_loss_2": 2452.999304199219, |
| "kl_loss_3": 1985.1250549316405, |
| "kl_loss_7": 732.1629119873047, |
| "learning_rate": 0.0008015407152327448, |
| "loss": 1335.19, |
| "step": 3010 |
| }, |
| { |
| "ce_loss_10": 3.5699279427528383, |
| "ce_loss_13": 3.475005257129669, |
| "ce_loss_2": 4.65969865322113, |
| "ce_loss_3": 4.38304386138916, |
| "ce_loss_7": 3.784406042098999, |
| "epoch": 0.302, |
| "grad_norm": 620.0, |
| "kl_loss_10": 215.99359130859375, |
| "kl_loss_2": 2432.162463378906, |
| "kl_loss_3": 1951.8839721679688, |
| "kl_loss_7": 718.2368713378906, |
| "learning_rate": 0.0008002735503850016, |
| "loss": 1332.6505, |
| "step": 3020 |
| }, |
| { |
| "ce_loss_10": 3.4684691429138184, |
| "ce_loss_13": 3.367643666267395, |
| "ce_loss_2": 4.5924430847167965, |
| "ce_loss_3": 4.30932047367096, |
| "ce_loss_7": 3.6915883660316466, |
| "epoch": 0.303, |
| "grad_norm": 636.0, |
| "kl_loss_10": 224.01161422729493, |
| "kl_loss_2": 2494.453234863281, |
| "kl_loss_3": 2004.73359375, |
| "kl_loss_7": 736.367529296875, |
| "learning_rate": 0.0007990033617859396, |
| "loss": 1348.4062, |
| "step": 3030 |
| }, |
| { |
| "ce_loss_10": 3.5133005499839784, |
| "ce_loss_13": 3.417665791511536, |
| "ce_loss_2": 4.581400918960571, |
| "ce_loss_3": 4.318250679969788, |
| "ce_loss_7": 3.734131360054016, |
| "epoch": 0.304, |
| "grad_norm": 692.0, |
| "kl_loss_10": 218.55305099487305, |
| "kl_loss_2": 2367.1648193359374, |
| "kl_loss_3": 1894.6960754394531, |
| "kl_loss_7": 712.4279693603515, |
| "learning_rate": 0.000797730162226344, |
| "loss": 1274.1975, |
| "step": 3040 |
| }, |
| { |
| "ce_loss_10": 3.540754234790802, |
| "ce_loss_13": 3.4410573482513427, |
| "ce_loss_2": 4.607666325569153, |
| "ce_loss_3": 4.33906877040863, |
| "ce_loss_7": 3.76459002494812, |
| "epoch": 0.305, |
| "grad_norm": 692.0, |
| "kl_loss_10": 221.26933517456055, |
| "kl_loss_2": 2377.095458984375, |
| "kl_loss_3": 1910.9453735351562, |
| "kl_loss_7": 729.3416778564454, |
| "learning_rate": 0.0007964539645273203, |
| "loss": 1293.3233, |
| "step": 3050 |
| }, |
| { |
| "ce_loss_10": 3.549929714202881, |
| "ce_loss_13": 3.4547195076942443, |
| "ce_loss_2": 4.595946025848389, |
| "ce_loss_3": 4.332681286334991, |
| "ce_loss_7": 3.7608805656433106, |
| "epoch": 0.306, |
| "grad_norm": 608.0, |
| "kl_loss_10": 214.02068862915038, |
| "kl_loss_2": 2324.1172485351562, |
| "kl_loss_3": 1866.7198425292968, |
| "kl_loss_7": 705.0489013671875, |
| "learning_rate": 0.000795174781540165, |
| "loss": 1301.7614, |
| "step": 3060 |
| }, |
| { |
| "ce_loss_10": 3.626460921764374, |
| "ce_loss_13": 3.5295538663864137, |
| "ce_loss_2": 4.639704465866089, |
| "ce_loss_3": 4.383927941322327, |
| "ce_loss_7": 3.8362658858299254, |
| "epoch": 0.307, |
| "grad_norm": 644.0, |
| "kl_loss_10": 215.03676071166993, |
| "kl_loss_2": 2264.9541259765624, |
| "kl_loss_3": 1824.0037841796875, |
| "kl_loss_7": 696.3071411132812, |
| "learning_rate": 0.0007938926261462366, |
| "loss": 1288.9521, |
| "step": 3070 |
| }, |
| { |
| "ce_loss_10": 3.5775561928749084, |
| "ce_loss_13": 3.480459380149841, |
| "ce_loss_2": 4.618080592155456, |
| "ce_loss_3": 4.350315952301026, |
| "ce_loss_7": 3.7854344248771667, |
| "epoch": 0.308, |
| "grad_norm": 648.0, |
| "kl_loss_10": 216.656893157959, |
| "kl_loss_2": 2357.475067138672, |
| "kl_loss_3": 1888.0174133300782, |
| "kl_loss_7": 712.7872009277344, |
| "learning_rate": 0.0007926075112568258, |
| "loss": 1316.9054, |
| "step": 3080 |
| }, |
| { |
| "ce_loss_10": 3.5692449688911436, |
| "ce_loss_13": 3.4759126543998717, |
| "ce_loss_2": 4.623606491088867, |
| "ce_loss_3": 4.366301465034485, |
| "ce_loss_7": 3.78162659406662, |
| "epoch": 0.309, |
| "grad_norm": 560.0, |
| "kl_loss_10": 213.1074462890625, |
| "kl_loss_2": 2357.0720764160155, |
| "kl_loss_3": 1902.1767517089843, |
| "kl_loss_7": 709.6952423095703, |
| "learning_rate": 0.0007913194498130252, |
| "loss": 1281.0172, |
| "step": 3090 |
| }, |
| { |
| "ce_loss_10": 3.494074010848999, |
| "ce_loss_13": 3.400245749950409, |
| "ce_loss_2": 4.5784650325775145, |
| "ce_loss_3": 4.316486406326294, |
| "ce_loss_7": 3.7143809318542482, |
| "epoch": 0.31, |
| "grad_norm": 736.0, |
| "kl_loss_10": 216.9530891418457, |
| "kl_loss_2": 2388.186309814453, |
| "kl_loss_3": 1924.5094665527345, |
| "kl_loss_7": 718.4751098632812, |
| "learning_rate": 0.0007900284547855992, |
| "loss": 1312.7211, |
| "step": 3100 |
| }, |
| { |
| "ce_loss_10": 3.5040755391120912, |
| "ce_loss_13": 3.409269428253174, |
| "ce_loss_2": 4.549410009384156, |
| "ce_loss_3": 4.294600343704223, |
| "ce_loss_7": 3.7213049054145815, |
| "epoch": 0.311, |
| "grad_norm": 800.0, |
| "kl_loss_10": 210.81134338378905, |
| "kl_loss_2": 2329.0393676757812, |
| "kl_loss_3": 1876.9636657714843, |
| "kl_loss_7": 708.2128143310547, |
| "learning_rate": 0.0007887345391748532, |
| "loss": 1312.8156, |
| "step": 3110 |
| }, |
| { |
| "ce_loss_10": 3.634432864189148, |
| "ce_loss_13": 3.543325686454773, |
| "ce_loss_2": 4.651146030426025, |
| "ce_loss_3": 4.387193036079407, |
| "ce_loss_7": 3.8459346532821654, |
| "epoch": 0.312, |
| "grad_norm": 1168.0, |
| "kl_loss_10": 212.2933433532715, |
| "kl_loss_2": 2284.2329711914062, |
| "kl_loss_3": 1829.6757873535157, |
| "kl_loss_7": 706.4437377929687, |
| "learning_rate": 0.0007874377160105036, |
| "loss": 1259.3671, |
| "step": 3120 |
| }, |
| { |
| "ce_loss_10": 3.530054819583893, |
| "ce_loss_13": 3.4342761754989626, |
| "ce_loss_2": 4.628887629508972, |
| "ce_loss_3": 4.362399673461914, |
| "ce_loss_7": 3.7490867018699645, |
| "epoch": 0.313, |
| "grad_norm": 608.0, |
| "kl_loss_10": 212.55482711791993, |
| "kl_loss_2": 2429.394366455078, |
| "kl_loss_3": 1971.4875915527343, |
| "kl_loss_7": 728.8083862304687, |
| "learning_rate": 0.0007861379983515449, |
| "loss": 1354.4891, |
| "step": 3130 |
| }, |
| { |
| "ce_loss_10": 3.6109140157699584, |
| "ce_loss_13": 3.5200807809829713, |
| "ce_loss_2": 4.655977535247803, |
| "ce_loss_3": 4.39032473564148, |
| "ce_loss_7": 3.831193280220032, |
| "epoch": 0.314, |
| "grad_norm": 592.0, |
| "kl_loss_10": 209.2868881225586, |
| "kl_loss_2": 2336.8374755859377, |
| "kl_loss_3": 1868.733642578125, |
| "kl_loss_7": 717.5943817138672, |
| "learning_rate": 0.0007848353992861195, |
| "loss": 1273.946, |
| "step": 3140 |
| }, |
| { |
| "ce_loss_10": 3.6957940101623534, |
| "ce_loss_13": 3.595130515098572, |
| "ce_loss_2": 4.7389120101928714, |
| "ce_loss_3": 4.469233250617981, |
| "ce_loss_7": 3.926785933971405, |
| "epoch": 0.315, |
| "grad_norm": 888.0, |
| "kl_loss_10": 223.79472427368165, |
| "kl_loss_2": 2334.7629638671874, |
| "kl_loss_3": 1867.655010986328, |
| "kl_loss_7": 743.88798828125, |
| "learning_rate": 0.0007835299319313853, |
| "loss": 1303.1903, |
| "step": 3150 |
| }, |
| { |
| "ce_loss_10": 3.5704684495925902, |
| "ce_loss_13": 3.476880931854248, |
| "ce_loss_2": 4.606448101997375, |
| "ce_loss_3": 4.3400969982147215, |
| "ce_loss_7": 3.7886768341064454, |
| "epoch": 0.316, |
| "grad_norm": 700.0, |
| "kl_loss_10": 211.18966979980468, |
| "kl_loss_2": 2323.449572753906, |
| "kl_loss_3": 1851.584783935547, |
| "kl_loss_7": 721.3533996582031, |
| "learning_rate": 0.0007822216094333848, |
| "loss": 1322.3376, |
| "step": 3160 |
| }, |
| { |
| "ce_loss_10": 3.5810484290122986, |
| "ce_loss_13": 3.4873368740081787, |
| "ce_loss_2": 4.65300440788269, |
| "ce_loss_3": 4.387210464477539, |
| "ce_loss_7": 3.807372546195984, |
| "epoch": 0.317, |
| "grad_norm": 752.0, |
| "kl_loss_10": 212.44315567016602, |
| "kl_loss_2": 2384.089465332031, |
| "kl_loss_3": 1914.7229309082031, |
| "kl_loss_7": 731.2730682373046, |
| "learning_rate": 0.0007809104449669101, |
| "loss": 1294.9703, |
| "step": 3170 |
| }, |
| { |
| "ce_loss_10": 3.532199835777283, |
| "ce_loss_13": 3.4395654439926147, |
| "ce_loss_2": 4.584282898902893, |
| "ce_loss_3": 4.30876350402832, |
| "ce_loss_7": 3.7615070223808287, |
| "epoch": 0.318, |
| "grad_norm": 916.0, |
| "kl_loss_10": 207.75176467895508, |
| "kl_loss_2": 2339.527239990234, |
| "kl_loss_3": 1858.5612731933593, |
| "kl_loss_7": 730.9345184326172, |
| "learning_rate": 0.0007795964517353734, |
| "loss": 1278.7686, |
| "step": 3180 |
| }, |
| { |
| "ce_loss_10": 3.518466317653656, |
| "ce_loss_13": 3.426977741718292, |
| "ce_loss_2": 4.596842670440674, |
| "ce_loss_3": 4.325531184673309, |
| "ce_loss_7": 3.750142526626587, |
| "epoch": 0.319, |
| "grad_norm": 648.0, |
| "kl_loss_10": 211.74872894287108, |
| "kl_loss_2": 2403.7151733398437, |
| "kl_loss_3": 1931.1865478515624, |
| "kl_loss_7": 753.2544128417969, |
| "learning_rate": 0.000778279642970672, |
| "loss": 1282.6858, |
| "step": 3190 |
| }, |
| { |
| "ce_loss_10": 3.5179845094680786, |
| "ce_loss_13": 3.428935539722443, |
| "ce_loss_2": 4.562283158302307, |
| "ce_loss_3": 4.295236802101135, |
| "ce_loss_7": 3.7340755701065063, |
| "epoch": 0.32, |
| "grad_norm": 904.0, |
| "kl_loss_10": 205.840421295166, |
| "kl_loss_2": 2345.048876953125, |
| "kl_loss_3": 1866.877911376953, |
| "kl_loss_7": 720.2934661865235, |
| "learning_rate": 0.0007769600319330552, |
| "loss": 1264.9217, |
| "step": 3200 |
| }, |
| { |
| "ce_loss_10": 3.554915177822113, |
| "ce_loss_13": 3.466732156276703, |
| "ce_loss_2": 4.653983449935913, |
| "ce_loss_3": 4.384042191505432, |
| "ce_loss_7": 3.7919634103775026, |
| "epoch": 0.321, |
| "grad_norm": 708.0, |
| "kl_loss_10": 205.94034423828126, |
| "kl_loss_2": 2414.151336669922, |
| "kl_loss_3": 1938.7024047851562, |
| "kl_loss_7": 735.4687530517579, |
| "learning_rate": 0.0007756376319109917, |
| "loss": 1299.3125, |
| "step": 3210 |
| }, |
| { |
| "ce_loss_10": 3.601811099052429, |
| "ce_loss_13": 3.513204276561737, |
| "ce_loss_2": 4.643903732299805, |
| "ce_loss_3": 4.372084999084473, |
| "ce_loss_7": 3.82387717962265, |
| "epoch": 0.322, |
| "grad_norm": 856.0, |
| "kl_loss_10": 205.92393646240234, |
| "kl_loss_2": 2310.6433349609374, |
| "kl_loss_3": 1837.0882263183594, |
| "kl_loss_7": 727.0832824707031, |
| "learning_rate": 0.0007743124562210351, |
| "loss": 1252.0768, |
| "step": 3220 |
| }, |
| { |
| "ce_loss_10": 3.613737678527832, |
| "ce_loss_13": 3.5243070006370543, |
| "ce_loss_2": 4.6432843685150145, |
| "ce_loss_3": 4.373880839347839, |
| "ce_loss_7": 3.835604417324066, |
| "epoch": 0.323, |
| "grad_norm": 804.0, |
| "kl_loss_10": 206.8451774597168, |
| "kl_loss_2": 2302.7849548339846, |
| "kl_loss_3": 1831.265850830078, |
| "kl_loss_7": 718.6182281494141, |
| "learning_rate": 0.0007729845182076895, |
| "loss": 1281.717, |
| "step": 3230 |
| }, |
| { |
| "ce_loss_10": 3.54460072517395, |
| "ce_loss_13": 3.458022344112396, |
| "ce_loss_2": 4.567643523216248, |
| "ce_loss_3": 4.304308319091797, |
| "ce_loss_7": 3.7557874441146852, |
| "epoch": 0.324, |
| "grad_norm": 780.0, |
| "kl_loss_10": 202.53059158325195, |
| "kl_loss_2": 2275.4922607421877, |
| "kl_loss_3": 1814.183154296875, |
| "kl_loss_7": 706.1778442382813, |
| "learning_rate": 0.0007716538312432765, |
| "loss": 1299.5142, |
| "step": 3240 |
| }, |
| { |
| "ce_loss_10": 3.5034128069877624, |
| "ce_loss_13": 3.4109013199806215, |
| "ce_loss_2": 4.59195454120636, |
| "ce_loss_3": 4.316867542266846, |
| "ce_loss_7": 3.7340264201164244, |
| "epoch": 0.325, |
| "grad_norm": 620.0, |
| "kl_loss_10": 212.30709838867188, |
| "kl_loss_2": 2399.0862243652346, |
| "kl_loss_3": 1912.8944152832032, |
| "kl_loss_7": 738.4792083740234, |
| "learning_rate": 0.0007703204087277988, |
| "loss": 1308.0572, |
| "step": 3250 |
| }, |
| { |
| "ce_loss_10": 3.60279586315155, |
| "ce_loss_13": 3.5141580939292907, |
| "ce_loss_2": 4.619040894508362, |
| "ce_loss_3": 4.348745739459991, |
| "ce_loss_7": 3.81355699300766, |
| "epoch": 0.326, |
| "grad_norm": 728.0, |
| "kl_loss_10": 202.46756286621093, |
| "kl_loss_2": 2248.315087890625, |
| "kl_loss_3": 1773.8885681152344, |
| "kl_loss_7": 686.2587005615235, |
| "learning_rate": 0.0007689842640888063, |
| "loss": 1245.8748, |
| "step": 3260 |
| }, |
| { |
| "ce_loss_10": 3.6051684260368346, |
| "ce_loss_13": 3.5150891542434692, |
| "ce_loss_2": 4.619964861869812, |
| "ce_loss_3": 4.360685467720032, |
| "ce_loss_7": 3.8154699206352234, |
| "epoch": 0.327, |
| "grad_norm": 684.0, |
| "kl_loss_10": 208.96502685546875, |
| "kl_loss_2": 2265.3287658691406, |
| "kl_loss_3": 1811.821759033203, |
| "kl_loss_7": 703.9406219482422, |
| "learning_rate": 0.0007676454107812607, |
| "loss": 1264.3093, |
| "step": 3270 |
| }, |
| { |
| "ce_loss_10": 3.537815499305725, |
| "ce_loss_13": 3.444960331916809, |
| "ce_loss_2": 4.608094549179077, |
| "ce_loss_3": 4.3397119522094725, |
| "ce_loss_7": 3.7506736159324645, |
| "epoch": 0.328, |
| "grad_norm": 616.0, |
| "kl_loss_10": 211.32426528930665, |
| "kl_loss_2": 2388.8077392578125, |
| "kl_loss_3": 1915.7237182617187, |
| "kl_loss_7": 707.672705078125, |
| "learning_rate": 0.0007663038622873999, |
| "loss": 1279.8335, |
| "step": 3280 |
| }, |
| { |
| "ce_loss_10": 3.574945878982544, |
| "ce_loss_13": 3.4833101868629455, |
| "ce_loss_2": 4.628302264213562, |
| "ce_loss_3": 4.366952037811279, |
| "ce_loss_7": 3.784594464302063, |
| "epoch": 0.329, |
| "grad_norm": 596.0, |
| "kl_loss_10": 211.56422576904296, |
| "kl_loss_2": 2351.5840576171877, |
| "kl_loss_3": 1879.0997131347656, |
| "kl_loss_7": 694.7927307128906, |
| "learning_rate": 0.0007649596321166025, |
| "loss": 1256.8023, |
| "step": 3290 |
| }, |
| { |
| "ce_loss_10": 3.4788912653923036, |
| "ce_loss_13": 3.3914729714393617, |
| "ce_loss_2": 4.513116896152496, |
| "ce_loss_3": 4.253373873233795, |
| "ce_loss_7": 3.6928447008132936, |
| "epoch": 0.33, |
| "grad_norm": 600.0, |
| "kl_loss_10": 203.03155212402345, |
| "kl_loss_2": 2285.9035522460936, |
| "kl_loss_3": 1828.6068603515625, |
| "kl_loss_7": 691.3944427490235, |
| "learning_rate": 0.0007636127338052513, |
| "loss": 1273.8033, |
| "step": 3300 |
| }, |
| { |
| "ce_loss_10": 3.5868964433670043, |
| "ce_loss_13": 3.49528044462204, |
| "ce_loss_2": 4.663711452484131, |
| "ce_loss_3": 4.39831657409668, |
| "ce_loss_7": 3.804142189025879, |
| "epoch": 0.331, |
| "grad_norm": 624.0, |
| "kl_loss_10": 211.30522232055665, |
| "kl_loss_2": 2397.5427856445312, |
| "kl_loss_3": 1927.950506591797, |
| "kl_loss_7": 706.7799133300781, |
| "learning_rate": 0.0007622631809165971, |
| "loss": 1277.9496, |
| "step": 3310 |
| }, |
| { |
| "ce_loss_10": 3.582921600341797, |
| "ce_loss_13": 3.4965414881706236, |
| "ce_loss_2": 4.58232958316803, |
| "ce_loss_3": 4.323147928714752, |
| "ce_loss_7": 3.783066177368164, |
| "epoch": 0.332, |
| "grad_norm": 688.0, |
| "kl_loss_10": 197.47354049682616, |
| "kl_loss_2": 2216.269598388672, |
| "kl_loss_3": 1760.1223999023437, |
| "kl_loss_7": 664.7514923095703, |
| "learning_rate": 0.000760910987040623, |
| "loss": 1245.9068, |
| "step": 3320 |
| }, |
| { |
| "ce_loss_10": 3.5663990497589113, |
| "ce_loss_13": 3.474509632587433, |
| "ce_loss_2": 4.641107606887817, |
| "ce_loss_3": 4.369283008575439, |
| "ce_loss_7": 3.78259996175766, |
| "epoch": 0.333, |
| "grad_norm": 616.0, |
| "kl_loss_10": 210.06242904663085, |
| "kl_loss_2": 2402.0831298828125, |
| "kl_loss_3": 1926.8569946289062, |
| "kl_loss_7": 714.4321014404297, |
| "learning_rate": 0.000759556165793906, |
| "loss": 1272.2351, |
| "step": 3330 |
| }, |
| { |
| "ce_loss_10": 3.5859936118125915, |
| "ce_loss_13": 3.4947105884552, |
| "ce_loss_2": 4.635438013076782, |
| "ce_loss_3": 4.3713214635849, |
| "ce_loss_7": 3.79847708940506, |
| "epoch": 0.334, |
| "grad_norm": 600.0, |
| "kl_loss_10": 207.2735107421875, |
| "kl_loss_2": 2336.860705566406, |
| "kl_loss_3": 1864.5109497070312, |
| "kl_loss_7": 698.3573028564454, |
| "learning_rate": 0.000758198730819481, |
| "loss": 1291.4691, |
| "step": 3340 |
| }, |
| { |
| "ce_loss_10": 3.530641829967499, |
| "ce_loss_13": 3.44451619386673, |
| "ce_loss_2": 4.589142799377441, |
| "ce_loss_3": 4.3217404961586, |
| "ce_loss_7": 3.7362788200378416, |
| "epoch": 0.335, |
| "grad_norm": 624.0, |
| "kl_loss_10": 202.07082290649413, |
| "kl_loss_2": 2360.776690673828, |
| "kl_loss_3": 1886.3787536621094, |
| "kl_loss_7": 695.4340698242188, |
| "learning_rate": 0.0007568386957867032, |
| "loss": 1283.006, |
| "step": 3350 |
| }, |
| { |
| "ce_loss_10": 3.6058520078659058, |
| "ce_loss_13": 3.5129651188850404, |
| "ce_loss_2": 4.643417167663574, |
| "ce_loss_3": 4.37364354133606, |
| "ce_loss_7": 3.813083219528198, |
| "epoch": 0.336, |
| "grad_norm": 784.0, |
| "kl_loss_10": 207.97874145507814, |
| "kl_loss_2": 2295.3684020996093, |
| "kl_loss_3": 1825.3164672851562, |
| "kl_loss_7": 687.7690948486328, |
| "learning_rate": 0.0007554760743911103, |
| "loss": 1276.5395, |
| "step": 3360 |
| }, |
| { |
| "ce_loss_10": 3.5018799662590028, |
| "ce_loss_13": 3.4133763194084166, |
| "ce_loss_2": 4.551569533348084, |
| "ce_loss_3": 4.283941590785981, |
| "ce_loss_7": 3.704894995689392, |
| "epoch": 0.337, |
| "grad_norm": 644.0, |
| "kl_loss_10": 201.99492797851562, |
| "kl_loss_2": 2352.8791320800783, |
| "kl_loss_3": 1880.3864440917969, |
| "kl_loss_7": 682.2205291748047, |
| "learning_rate": 0.0007541108803542846, |
| "loss": 1306.1851, |
| "step": 3370 |
| }, |
| { |
| "ce_loss_10": 3.5562949419021606, |
| "ce_loss_13": 3.467681646347046, |
| "ce_loss_2": 4.61066963672638, |
| "ce_loss_3": 4.3377085566520694, |
| "ce_loss_7": 3.7622047662734985, |
| "epoch": 0.338, |
| "grad_norm": 632.0, |
| "kl_loss_10": 205.39780044555664, |
| "kl_loss_2": 2363.417413330078, |
| "kl_loss_3": 1877.7716186523437, |
| "kl_loss_7": 681.8937408447266, |
| "learning_rate": 0.0007527431274237149, |
| "loss": 1343.544, |
| "step": 3380 |
| }, |
| { |
| "ce_loss_10": 3.5283817052841187, |
| "ce_loss_13": 3.4397483229637147, |
| "ce_loss_2": 4.570840525627136, |
| "ce_loss_3": 4.304589962959289, |
| "ce_loss_7": 3.7283903479576113, |
| "epoch": 0.339, |
| "grad_norm": 572.0, |
| "kl_loss_10": 203.6351058959961, |
| "kl_loss_2": 2336.9517456054687, |
| "kl_loss_3": 1865.9661682128906, |
| "kl_loss_7": 677.4322174072265, |
| "learning_rate": 0.0007513728293726579, |
| "loss": 1277.8105, |
| "step": 3390 |
| }, |
| { |
| "ce_loss_10": 3.644584619998932, |
| "ce_loss_13": 3.556074547767639, |
| "ce_loss_2": 4.664924669265747, |
| "ce_loss_3": 4.399448752403259, |
| "ce_loss_7": 3.8509042620658875, |
| "epoch": 0.34, |
| "grad_norm": 644.0, |
| "kl_loss_10": 203.6712448120117, |
| "kl_loss_2": 2293.6278381347656, |
| "kl_loss_3": 1826.5482238769532, |
| "kl_loss_7": 683.2333282470703, |
| "learning_rate": 0.00075, |
| "loss": 1246.8078, |
| "step": 3400 |
| }, |
| { |
| "ce_loss_10": 3.6313098788261415, |
| "ce_loss_13": 3.5418556571006774, |
| "ce_loss_2": 4.690052318572998, |
| "ce_loss_3": 4.416153597831726, |
| "ce_loss_7": 3.843977117538452, |
| "epoch": 0.341, |
| "grad_norm": 644.0, |
| "kl_loss_10": 205.15843811035157, |
| "kl_loss_2": 2335.1500244140625, |
| "kl_loss_3": 1861.3693969726562, |
| "kl_loss_7": 693.7598510742188, |
| "learning_rate": 0.0007486246531301177, |
| "loss": 1258.7575, |
| "step": 3410 |
| }, |
| { |
| "ce_loss_10": 3.443863534927368, |
| "ce_loss_13": 3.3510751008987425, |
| "ce_loss_2": 4.5022605657577515, |
| "ce_loss_3": 4.230472648143769, |
| "ce_loss_7": 3.6559174418449403, |
| "epoch": 0.342, |
| "grad_norm": 664.0, |
| "kl_loss_10": 202.74062805175782, |
| "kl_loss_2": 2345.9876220703127, |
| "kl_loss_3": 1864.947119140625, |
| "kl_loss_7": 688.1812042236328, |
| "learning_rate": 0.0007472468026127384, |
| "loss": 1260.6121, |
| "step": 3420 |
| }, |
| { |
| "ce_loss_10": 3.5721543431282043, |
| "ce_loss_13": 3.4770930409431458, |
| "ce_loss_2": 4.665278792381287, |
| "ce_loss_3": 4.404071187973022, |
| "ce_loss_7": 3.788026750087738, |
| "epoch": 0.343, |
| "grad_norm": 712.0, |
| "kl_loss_10": 214.17584533691405, |
| "kl_loss_2": 2439.352404785156, |
| "kl_loss_3": 1972.9944885253906, |
| "kl_loss_7": 720.6741455078125, |
| "learning_rate": 0.000745866462322802, |
| "loss": 1320.8363, |
| "step": 3430 |
| }, |
| { |
| "ce_loss_10": 3.560039293766022, |
| "ce_loss_13": 3.4741207122802735, |
| "ce_loss_2": 4.592786359786987, |
| "ce_loss_3": 4.334179782867432, |
| "ce_loss_7": 3.7690793752670286, |
| "epoch": 0.344, |
| "grad_norm": 700.0, |
| "kl_loss_10": 200.53601684570313, |
| "kl_loss_2": 2283.709338378906, |
| "kl_loss_3": 1835.7159240722656, |
| "kl_loss_7": 673.5906463623047, |
| "learning_rate": 0.0007444836461603195, |
| "loss": 1261.9196, |
| "step": 3440 |
| }, |
| { |
| "ce_loss_10": 3.6245110511779783, |
| "ce_loss_13": 3.5312341451644897, |
| "ce_loss_2": 4.6719811201095585, |
| "ce_loss_3": 4.406914234161377, |
| "ce_loss_7": 3.8266146540641786, |
| "epoch": 0.345, |
| "grad_norm": 648.0, |
| "kl_loss_10": 214.16654891967772, |
| "kl_loss_2": 2362.7242797851563, |
| "kl_loss_3": 1898.2087890625, |
| "kl_loss_7": 704.3192626953125, |
| "learning_rate": 0.0007430983680502344, |
| "loss": 1301.0338, |
| "step": 3450 |
| }, |
| { |
| "ce_loss_10": 3.4667457938194275, |
| "ce_loss_13": 3.377876877784729, |
| "ce_loss_2": 4.545617830753327, |
| "ce_loss_3": 4.272857880592346, |
| "ce_loss_7": 3.6741854548454285, |
| "epoch": 0.346, |
| "grad_norm": 608.0, |
| "kl_loss_10": 206.17358779907227, |
| "kl_loss_2": 2388.203955078125, |
| "kl_loss_3": 1909.6425415039062, |
| "kl_loss_7": 697.4611602783203, |
| "learning_rate": 0.0007417106419422819, |
| "loss": 1290.2338, |
| "step": 3460 |
| }, |
| { |
| "ce_loss_10": 3.571521496772766, |
| "ce_loss_13": 3.4770392775535583, |
| "ce_loss_2": 4.614993333816528, |
| "ce_loss_3": 4.345528078079224, |
| "ce_loss_7": 3.7798440217971803, |
| "epoch": 0.347, |
| "grad_norm": 656.0, |
| "kl_loss_10": 204.45724334716797, |
| "kl_loss_2": 2308.5895385742188, |
| "kl_loss_3": 1833.5083312988281, |
| "kl_loss_7": 683.0788604736329, |
| "learning_rate": 0.0007403204818108486, |
| "loss": 1275.3799, |
| "step": 3470 |
| }, |
| { |
| "ce_loss_10": 3.5445627093315126, |
| "ce_loss_13": 3.4533625841140747, |
| "ce_loss_2": 4.6031595230102536, |
| "ce_loss_3": 4.338364768028259, |
| "ce_loss_7": 3.746653878688812, |
| "epoch": 0.348, |
| "grad_norm": 576.0, |
| "kl_loss_10": 208.29350357055665, |
| "kl_loss_2": 2371.4946899414062, |
| "kl_loss_3": 1909.6997863769532, |
| "kl_loss_7": 686.1026062011719, |
| "learning_rate": 0.0007389279016548316, |
| "loss": 1247.1171, |
| "step": 3480 |
| }, |
| { |
| "ce_loss_10": 3.553533661365509, |
| "ce_loss_13": 3.458456254005432, |
| "ce_loss_2": 4.6566637516021725, |
| "ce_loss_3": 4.37568781375885, |
| "ce_loss_7": 3.7642048597335815, |
| "epoch": 0.349, |
| "grad_norm": 684.0, |
| "kl_loss_10": 212.6401054382324, |
| "kl_loss_2": 2451.175671386719, |
| "kl_loss_3": 1951.6068969726562, |
| "kl_loss_7": 702.7821899414063, |
| "learning_rate": 0.0007375329154974975, |
| "loss": 1307.9424, |
| "step": 3490 |
| }, |
| { |
| "ce_loss_10": 3.5084131717681886, |
| "ce_loss_13": 3.4206284284591675, |
| "ce_loss_2": 4.546359324455262, |
| "ce_loss_3": 4.28377673625946, |
| "ce_loss_7": 3.7158578753471376, |
| "epoch": 0.35, |
| "grad_norm": 676.0, |
| "kl_loss_10": 208.94808502197264, |
| "kl_loss_2": 2307.420263671875, |
| "kl_loss_3": 1848.263897705078, |
| "kl_loss_7": 683.7157653808594, |
| "learning_rate": 0.0007361355373863414, |
| "loss": 1294.9244, |
| "step": 3500 |
| }, |
| { |
| "ce_loss_10": 3.563704586029053, |
| "ce_loss_13": 3.471196401119232, |
| "ce_loss_2": 4.5938108444213865, |
| "ce_loss_3": 4.332852721214294, |
| "ce_loss_7": 3.7703267097473145, |
| "epoch": 0.351, |
| "grad_norm": 580.0, |
| "kl_loss_10": 208.09024658203126, |
| "kl_loss_2": 2287.9739318847655, |
| "kl_loss_3": 1828.5689758300782, |
| "kl_loss_7": 673.030307006836, |
| "learning_rate": 0.0007347357813929454, |
| "loss": 1287.6393, |
| "step": 3510 |
| }, |
| { |
| "ce_loss_10": 3.5099044919013975, |
| "ce_loss_13": 3.419321870803833, |
| "ce_loss_2": 4.543364262580871, |
| "ce_loss_3": 4.27359983921051, |
| "ce_loss_7": 3.7119598269462584, |
| "epoch": 0.352, |
| "grad_norm": 620.0, |
| "kl_loss_10": 207.49536819458007, |
| "kl_loss_2": 2274.5087951660157, |
| "kl_loss_3": 1817.6334594726563, |
| "kl_loss_7": 673.7456726074219, |
| "learning_rate": 0.0007333336616128369, |
| "loss": 1275.3445, |
| "step": 3520 |
| }, |
| { |
| "ce_loss_10": 3.4893477082252504, |
| "ce_loss_13": 3.394596815109253, |
| "ce_loss_2": 4.558405804634094, |
| "ce_loss_3": 4.2951094031333925, |
| "ce_loss_7": 3.69784619808197, |
| "epoch": 0.353, |
| "grad_norm": 636.0, |
| "kl_loss_10": 211.34491577148438, |
| "kl_loss_2": 2368.5742614746096, |
| "kl_loss_3": 1904.588153076172, |
| "kl_loss_7": 699.6206512451172, |
| "learning_rate": 0.0007319291921653463, |
| "loss": 1290.8219, |
| "step": 3530 |
| }, |
| { |
| "ce_loss_10": 3.5741103887557983, |
| "ce_loss_13": 3.4787994265556335, |
| "ce_loss_2": 4.633104467391968, |
| "ce_loss_3": 4.363708543777466, |
| "ce_loss_7": 3.7883455634117125, |
| "epoch": 0.354, |
| "grad_norm": 688.0, |
| "kl_loss_10": 211.75357818603516, |
| "kl_loss_2": 2353.7052368164063, |
| "kl_loss_3": 1872.2497680664062, |
| "kl_loss_7": 696.9233825683593, |
| "learning_rate": 0.0007305223871934656, |
| "loss": 1261.161, |
| "step": 3540 |
| }, |
| { |
| "ce_loss_10": 3.53648921251297, |
| "ce_loss_13": 3.4479789614677427, |
| "ce_loss_2": 4.58404312133789, |
| "ce_loss_3": 4.315784668922424, |
| "ce_loss_7": 3.7411927938461305, |
| "epoch": 0.355, |
| "grad_norm": 644.0, |
| "kl_loss_10": 204.81159896850585, |
| "kl_loss_2": 2318.000451660156, |
| "kl_loss_3": 1841.0889282226562, |
| "kl_loss_7": 674.2556213378906, |
| "learning_rate": 0.0007291132608637052, |
| "loss": 1261.7902, |
| "step": 3550 |
| }, |
| { |
| "ce_loss_10": 3.4981685996055605, |
| "ce_loss_13": 3.4104817390441893, |
| "ce_loss_2": 4.630524325370788, |
| "ce_loss_3": 4.356097209453583, |
| "ce_loss_7": 3.7044720530509947, |
| "epoch": 0.356, |
| "grad_norm": 596.0, |
| "kl_loss_10": 201.21338653564453, |
| "kl_loss_2": 2484.7927124023436, |
| "kl_loss_3": 2010.760675048828, |
| "kl_loss_7": 676.6305114746094, |
| "learning_rate": 0.0007277018273659516, |
| "loss": 1327.9727, |
| "step": 3560 |
| }, |
| { |
| "ce_loss_10": 3.625146007537842, |
| "ce_loss_13": 3.531074047088623, |
| "ce_loss_2": 4.6699333667755125, |
| "ce_loss_3": 4.4035911679267885, |
| "ce_loss_7": 3.836405646800995, |
| "epoch": 0.357, |
| "grad_norm": 620.0, |
| "kl_loss_10": 209.5035614013672, |
| "kl_loss_2": 2341.261669921875, |
| "kl_loss_3": 1873.83955078125, |
| "kl_loss_7": 701.3334136962891, |
| "learning_rate": 0.0007262881009133242, |
| "loss": 1275.0637, |
| "step": 3570 |
| }, |
| { |
| "ce_loss_10": 3.5401904344558717, |
| "ce_loss_13": 3.4541720032691954, |
| "ce_loss_2": 4.572478699684143, |
| "ce_loss_3": 4.315898811817169, |
| "ce_loss_7": 3.7435639023780825, |
| "epoch": 0.358, |
| "grad_norm": 616.0, |
| "kl_loss_10": 201.09010009765626, |
| "kl_loss_2": 2313.080432128906, |
| "kl_loss_3": 1855.7528381347656, |
| "kl_loss_7": 673.6273254394531, |
| "learning_rate": 0.0007248720957420329, |
| "loss": 1252.028, |
| "step": 3580 |
| }, |
| { |
| "ce_loss_10": 3.55083909034729, |
| "ce_loss_13": 3.466167140007019, |
| "ce_loss_2": 4.594191384315491, |
| "ce_loss_3": 4.320683646202087, |
| "ce_loss_7": 3.750420665740967, |
| "epoch": 0.359, |
| "grad_norm": 592.0, |
| "kl_loss_10": 201.2973388671875, |
| "kl_loss_2": 2304.7897521972654, |
| "kl_loss_3": 1830.9858337402343, |
| "kl_loss_7": 667.1753356933593, |
| "learning_rate": 0.0007234538261112341, |
| "loss": 1305.9864, |
| "step": 3590 |
| }, |
| { |
| "ce_loss_10": 3.5870068073272705, |
| "ce_loss_13": 3.4963618993759153, |
| "ce_loss_2": 4.64858865737915, |
| "ce_loss_3": 4.383180546760559, |
| "ce_loss_7": 3.7978907346725466, |
| "epoch": 0.36, |
| "grad_norm": 580.0, |
| "kl_loss_10": 206.438224029541, |
| "kl_loss_2": 2350.6687255859374, |
| "kl_loss_3": 1879.8920471191407, |
| "kl_loss_7": 691.6597686767578, |
| "learning_rate": 0.0007220333063028871, |
| "loss": 1262.9199, |
| "step": 3600 |
| }, |
| { |
| "ce_loss_10": 3.617115843296051, |
| "ce_loss_13": 3.5226447105407717, |
| "ce_loss_2": 4.7090448379516605, |
| "ce_loss_3": 4.440777349472046, |
| "ce_loss_7": 3.927542436122894, |
| "epoch": 0.361, |
| "grad_norm": 700.0, |
| "kl_loss_10": 215.82505798339844, |
| "kl_loss_2": 2451.148791503906, |
| "kl_loss_3": 1982.2255798339843, |
| "kl_loss_7": 911.9942932128906, |
| "learning_rate": 0.0007206105506216106, |
| "loss": 1351.6553, |
| "step": 3610 |
| }, |
| { |
| "ce_loss_10": 3.4991318702697756, |
| "ce_loss_13": 3.4086315035820007, |
| "ce_loss_2": 4.517360043525696, |
| "ce_loss_3": 4.2627614617347716, |
| "ce_loss_7": 3.7236221551895143, |
| "epoch": 0.362, |
| "grad_norm": 836.0, |
| "kl_loss_10": 210.4427146911621, |
| "kl_loss_2": 2266.5427673339846, |
| "kl_loss_3": 1820.741632080078, |
| "kl_loss_7": 721.7343872070312, |
| "learning_rate": 0.0007191855733945387, |
| "loss": 1249.7904, |
| "step": 3620 |
| }, |
| { |
| "ce_loss_10": 3.5907591581344604, |
| "ce_loss_13": 3.5010143160820006, |
| "ce_loss_2": 4.622481203079223, |
| "ce_loss_3": 4.356601357460022, |
| "ce_loss_7": 3.79650160074234, |
| "epoch": 0.363, |
| "grad_norm": 572.0, |
| "kl_loss_10": 204.04148864746094, |
| "kl_loss_2": 2289.9740478515623, |
| "kl_loss_3": 1818.431005859375, |
| "kl_loss_7": 692.2773986816406, |
| "learning_rate": 0.0007177583889711762, |
| "loss": 1250.2074, |
| "step": 3630 |
| }, |
| { |
| "ce_loss_10": 3.5057953119277956, |
| "ce_loss_13": 3.411814069747925, |
| "ce_loss_2": 4.563321113586426, |
| "ce_loss_3": 4.290196192264557, |
| "ce_loss_7": 3.7388221859931945, |
| "epoch": 0.364, |
| "grad_norm": 952.0, |
| "kl_loss_10": 206.33124389648438, |
| "kl_loss_2": 2346.8802978515623, |
| "kl_loss_3": 1869.4090270996094, |
| "kl_loss_7": 731.2307403564453, |
| "learning_rate": 0.0007163290117232541, |
| "loss": 1286.6971, |
| "step": 3640 |
| }, |
| { |
| "ce_loss_10": 3.625288701057434, |
| "ce_loss_13": 3.5375028014183045, |
| "ce_loss_2": 4.612985825538635, |
| "ce_loss_3": 4.35078866481781, |
| "ce_loss_7": 3.820189893245697, |
| "epoch": 0.365, |
| "grad_norm": 676.0, |
| "kl_loss_10": 199.10545043945314, |
| "kl_loss_2": 2225.872174072266, |
| "kl_loss_3": 1769.0750122070312, |
| "kl_loss_7": 679.4105773925781, |
| "learning_rate": 0.0007148974560445859, |
| "loss": 1248.8154, |
| "step": 3650 |
| }, |
| { |
| "ce_loss_10": 3.5454740643501284, |
| "ce_loss_13": 3.458422267436981, |
| "ce_loss_2": 4.550081968307495, |
| "ce_loss_3": 4.287899553775787, |
| "ce_loss_7": 3.7501559376716616, |
| "epoch": 0.366, |
| "grad_norm": 588.0, |
| "kl_loss_10": 198.9521499633789, |
| "kl_loss_2": 2238.7438537597654, |
| "kl_loss_3": 1775.3712280273437, |
| "kl_loss_7": 679.886441040039, |
| "learning_rate": 0.0007134637363509209, |
| "loss": 1224.5007, |
| "step": 3660 |
| }, |
| { |
| "ce_loss_10": 3.6524737238883973, |
| "ce_loss_13": 3.566513454914093, |
| "ce_loss_2": 4.64628803730011, |
| "ce_loss_3": 4.383014440536499, |
| "ce_loss_7": 3.85452960729599, |
| "epoch": 0.367, |
| "grad_norm": 804.0, |
| "kl_loss_10": 195.8438636779785, |
| "kl_loss_2": 2202.7541381835936, |
| "kl_loss_3": 1744.0477172851563, |
| "kl_loss_7": 669.0114837646485, |
| "learning_rate": 0.0007120278670798009, |
| "loss": 1241.0041, |
| "step": 3670 |
| }, |
| { |
| "ce_loss_10": 3.451411759853363, |
| "ce_loss_13": 3.362742209434509, |
| "ce_loss_2": 4.563682770729065, |
| "ce_loss_3": 4.3000654697418215, |
| "ce_loss_7": 3.6735877275466917, |
| "epoch": 0.368, |
| "grad_norm": 692.0, |
| "kl_loss_10": 207.53860931396486, |
| "kl_loss_2": 2452.550360107422, |
| "kl_loss_3": 1973.4927856445313, |
| "kl_loss_7": 716.8897491455078, |
| "learning_rate": 0.0007105898626904133, |
| "loss": 1338.2093, |
| "step": 3680 |
| }, |
| { |
| "ce_loss_10": 3.5554641008377077, |
| "ce_loss_13": 3.4662737131118773, |
| "ce_loss_2": 4.611227035522461, |
| "ce_loss_3": 4.340918231010437, |
| "ce_loss_7": 3.768160092830658, |
| "epoch": 0.369, |
| "grad_norm": 564.0, |
| "kl_loss_10": 202.72654418945314, |
| "kl_loss_2": 2339.7893005371093, |
| "kl_loss_3": 1860.5950439453125, |
| "kl_loss_7": 686.1352569580079, |
| "learning_rate": 0.0007091497376634463, |
| "loss": 1252.1551, |
| "step": 3690 |
| }, |
| { |
| "ce_loss_10": 3.5008182168006896, |
| "ce_loss_13": 3.412228453159332, |
| "ce_loss_2": 4.538051557540894, |
| "ce_loss_3": 4.271669220924378, |
| "ce_loss_7": 3.707795190811157, |
| "epoch": 0.37, |
| "grad_norm": 688.0, |
| "kl_loss_10": 203.25593032836915, |
| "kl_loss_2": 2313.7531005859373, |
| "kl_loss_3": 1845.0465942382812, |
| "kl_loss_7": 679.24736328125, |
| "learning_rate": 0.0007077075065009433, |
| "loss": 1276.0328, |
| "step": 3700 |
| }, |
| { |
| "ce_loss_10": 3.6071534514427186, |
| "ce_loss_13": 3.5158491373062133, |
| "ce_loss_2": 4.6654202222824095, |
| "ce_loss_3": 4.392651915550232, |
| "ce_loss_7": 3.819450116157532, |
| "epoch": 0.371, |
| "grad_norm": 752.0, |
| "kl_loss_10": 208.91845016479493, |
| "kl_loss_2": 2358.2886352539062, |
| "kl_loss_3": 1879.4206665039062, |
| "kl_loss_7": 699.9392761230469, |
| "learning_rate": 0.0007062631837261557, |
| "loss": 1268.6693, |
| "step": 3710 |
| }, |
| { |
| "ce_loss_10": 3.476445233821869, |
| "ce_loss_13": 3.390165627002716, |
| "ce_loss_2": 4.5276483535766605, |
| "ce_loss_3": 4.25641827583313, |
| "ce_loss_7": 3.683011364936829, |
| "epoch": 0.372, |
| "grad_norm": 640.0, |
| "kl_loss_10": 202.1516098022461, |
| "kl_loss_2": 2314.015344238281, |
| "kl_loss_3": 1841.6598022460937, |
| "kl_loss_7": 682.8849884033203, |
| "learning_rate": 0.0007048167838833977, |
| "loss": 1289.0859, |
| "step": 3720 |
| }, |
| { |
| "ce_loss_10": 3.5699679255485535, |
| "ce_loss_13": 3.480803608894348, |
| "ce_loss_2": 4.588373041152954, |
| "ce_loss_3": 4.323996567726136, |
| "ce_loss_7": 3.7677942156791686, |
| "epoch": 0.373, |
| "grad_norm": 744.0, |
| "kl_loss_10": 203.9791275024414, |
| "kl_loss_2": 2290.5110717773437, |
| "kl_loss_3": 1815.935235595703, |
| "kl_loss_7": 672.9903137207032, |
| "learning_rate": 0.0007033683215379002, |
| "loss": 1247.4349, |
| "step": 3730 |
| }, |
| { |
| "ce_loss_10": 3.556042289733887, |
| "ce_loss_13": 3.4659453988075257, |
| "ce_loss_2": 4.594552016258239, |
| "ce_loss_3": 4.319927525520325, |
| "ce_loss_7": 3.7590227007865904, |
| "epoch": 0.374, |
| "grad_norm": 608.0, |
| "kl_loss_10": 199.66127700805663, |
| "kl_loss_2": 2281.638720703125, |
| "kl_loss_3": 1802.195782470703, |
| "kl_loss_7": 667.0344909667969, |
| "learning_rate": 0.0007019178112756625, |
| "loss": 1258.8061, |
| "step": 3740 |
| }, |
| { |
| "ce_loss_10": 3.518285346031189, |
| "ce_loss_13": 3.432125985622406, |
| "ce_loss_2": 4.562115430831909, |
| "ce_loss_3": 4.29533269405365, |
| "ce_loss_7": 3.7262615442276, |
| "epoch": 0.375, |
| "grad_norm": 640.0, |
| "kl_loss_10": 200.82229309082032, |
| "kl_loss_2": 2292.846240234375, |
| "kl_loss_3": 1823.6997009277343, |
| "kl_loss_7": 673.9724395751953, |
| "learning_rate": 0.0007004652677033068, |
| "loss": 1263.2482, |
| "step": 3750 |
| }, |
| { |
| "ce_loss_10": 3.5903055548667906, |
| "ce_loss_13": 3.506292223930359, |
| "ce_loss_2": 4.5962906837463375, |
| "ce_loss_3": 4.3357175350189205, |
| "ce_loss_7": 3.786800575256348, |
| "epoch": 0.376, |
| "grad_norm": 656.0, |
| "kl_loss_10": 200.60662612915038, |
| "kl_loss_2": 2252.357458496094, |
| "kl_loss_3": 1790.429217529297, |
| "kl_loss_7": 660.8626007080078, |
| "learning_rate": 0.0006990107054479312, |
| "loss": 1245.262, |
| "step": 3760 |
| }, |
| { |
| "ce_loss_10": 3.5865476965904235, |
| "ce_loss_13": 3.4931369185447694, |
| "ce_loss_2": 4.599129343032837, |
| "ce_loss_3": 4.335857176780701, |
| "ce_loss_7": 3.77964334487915, |
| "epoch": 0.377, |
| "grad_norm": 700.0, |
| "kl_loss_10": 209.85385513305664, |
| "kl_loss_2": 2261.3722229003906, |
| "kl_loss_3": 1806.916485595703, |
| "kl_loss_7": 670.6618469238281, |
| "learning_rate": 0.000697554139156961, |
| "loss": 1247.3398, |
| "step": 3770 |
| }, |
| { |
| "ce_loss_10": 3.572757053375244, |
| "ce_loss_13": 3.477368426322937, |
| "ce_loss_2": 4.60923056602478, |
| "ce_loss_3": 4.342034792900085, |
| "ce_loss_7": 3.766658973693848, |
| "epoch": 0.378, |
| "grad_norm": 628.0, |
| "kl_loss_10": 218.62799072265625, |
| "kl_loss_2": 2331.5685607910154, |
| "kl_loss_3": 1853.7978393554688, |
| "kl_loss_7": 681.1488891601563, |
| "learning_rate": 0.0006960955834980027, |
| "loss": 1246.4775, |
| "step": 3780 |
| }, |
| { |
| "ce_loss_10": 3.5454328536987303, |
| "ce_loss_13": 3.449174666404724, |
| "ce_loss_2": 4.563591694831848, |
| "ce_loss_3": 4.298812806606293, |
| "ce_loss_7": 3.7385509848594665, |
| "epoch": 0.379, |
| "grad_norm": 740.0, |
| "kl_loss_10": 215.9669273376465, |
| "kl_loss_2": 2275.3556274414063, |
| "kl_loss_3": 1807.9537841796875, |
| "kl_loss_7": 673.0077819824219, |
| "learning_rate": 0.0006946350531586958, |
| "loss": 1251.4496, |
| "step": 3790 |
| }, |
| { |
| "ce_loss_10": 3.5613739252090455, |
| "ce_loss_13": 3.4710352540016176, |
| "ce_loss_2": 4.586881446838379, |
| "ce_loss_3": 4.326097631454468, |
| "ce_loss_7": 3.7597612500190736, |
| "epoch": 0.38, |
| "grad_norm": 636.0, |
| "kl_loss_10": 210.37978897094726, |
| "kl_loss_2": 2278.926678466797, |
| "kl_loss_3": 1818.876287841797, |
| "kl_loss_7": 669.4569915771484, |
| "learning_rate": 0.0006931725628465643, |
| "loss": 1275.2133, |
| "step": 3800 |
| }, |
| { |
| "ce_loss_10": 3.590466618537903, |
| "ce_loss_13": 3.4937587857246397, |
| "ce_loss_2": 4.623274827003479, |
| "ce_loss_3": 4.352746081352234, |
| "ce_loss_7": 3.7943554282188416, |
| "epoch": 0.381, |
| "grad_norm": 628.0, |
| "kl_loss_10": 211.29082336425782, |
| "kl_loss_2": 2296.4768188476564, |
| "kl_loss_3": 1818.6126953125, |
| "kl_loss_7": 678.5069641113281, |
| "learning_rate": 0.0006917081272888696, |
| "loss": 1259.3377, |
| "step": 3810 |
| }, |
| { |
| "ce_loss_10": 3.487190854549408, |
| "ce_loss_13": 3.393886852264404, |
| "ce_loss_2": 4.553677868843079, |
| "ce_loss_3": 4.300019836425781, |
| "ce_loss_7": 3.6916916847229, |
| "epoch": 0.382, |
| "grad_norm": 596.0, |
| "kl_loss_10": 216.8355224609375, |
| "kl_loss_2": 2365.8853271484377, |
| "kl_loss_3": 1922.5055114746094, |
| "kl_loss_7": 684.1588439941406, |
| "learning_rate": 0.0006902417612324615, |
| "loss": 1266.6071, |
| "step": 3820 |
| }, |
| { |
| "ce_loss_10": 3.6190301895141603, |
| "ce_loss_13": 3.5245797634124756, |
| "ce_loss_2": 4.67730553150177, |
| "ce_loss_3": 4.405901682376862, |
| "ce_loss_7": 3.8294657945632933, |
| "epoch": 0.383, |
| "grad_norm": 680.0, |
| "kl_loss_10": 219.55593795776366, |
| "kl_loss_2": 2360.2657958984373, |
| "kl_loss_3": 1879.3323364257812, |
| "kl_loss_7": 698.4299591064453, |
| "learning_rate": 0.00068877347944363, |
| "loss": 1281.5383, |
| "step": 3830 |
| }, |
| { |
| "ce_loss_10": 3.612284016609192, |
| "ce_loss_13": 3.522170841693878, |
| "ce_loss_2": 4.627012848854065, |
| "ce_loss_3": 4.361323833465576, |
| "ce_loss_7": 3.8073740243911742, |
| "epoch": 0.384, |
| "grad_norm": 852.0, |
| "kl_loss_10": 211.08247299194335, |
| "kl_loss_2": 2264.5133850097654, |
| "kl_loss_3": 1800.5201843261718, |
| "kl_loss_7": 672.6636840820313, |
| "learning_rate": 0.0006873032967079561, |
| "loss": 1258.6725, |
| "step": 3840 |
| }, |
| { |
| "ce_loss_10": 3.5931047439575194, |
| "ce_loss_13": 3.5063655853271483, |
| "ce_loss_2": 4.588197422027588, |
| "ce_loss_3": 4.324539279937744, |
| "ce_loss_7": 3.7907418251037597, |
| "epoch": 0.385, |
| "grad_norm": 664.0, |
| "kl_loss_10": 203.42158584594728, |
| "kl_loss_2": 2234.902947998047, |
| "kl_loss_3": 1772.472021484375, |
| "kl_loss_7": 662.3596588134766, |
| "learning_rate": 0.0006858312278301637, |
| "loss": 1226.7012, |
| "step": 3850 |
| }, |
| { |
| "ce_loss_10": 3.635795843601227, |
| "ce_loss_13": 3.549472713470459, |
| "ce_loss_2": 4.623842811584472, |
| "ce_loss_3": 4.353901195526123, |
| "ce_loss_7": 3.825099301338196, |
| "epoch": 0.386, |
| "grad_norm": 736.0, |
| "kl_loss_10": 204.89519424438475, |
| "kl_loss_2": 2217.719934082031, |
| "kl_loss_3": 1747.3561584472657, |
| "kl_loss_7": 659.4771606445313, |
| "learning_rate": 0.0006843572876339704, |
| "loss": 1225.6961, |
| "step": 3860 |
| }, |
| { |
| "ce_loss_10": 3.5519859790802, |
| "ce_loss_13": 3.466093647480011, |
| "ce_loss_2": 4.525204968452454, |
| "ce_loss_3": 4.264074110984803, |
| "ce_loss_7": 3.742415523529053, |
| "epoch": 0.387, |
| "grad_norm": 668.0, |
| "kl_loss_10": 199.43942337036134, |
| "kl_loss_2": 2183.9968811035155, |
| "kl_loss_3": 1725.1913513183595, |
| "kl_loss_7": 644.7796203613282, |
| "learning_rate": 0.0006828814909619373, |
| "loss": 1252.2885, |
| "step": 3870 |
| }, |
| { |
| "ce_loss_10": 3.674282944202423, |
| "ce_loss_13": 3.5820161938667296, |
| "ce_loss_2": 4.6895040512084964, |
| "ce_loss_3": 4.4149659156799315, |
| "ce_loss_7": 3.86486736536026, |
| "epoch": 0.388, |
| "grad_norm": 576.0, |
| "kl_loss_10": 211.43887939453126, |
| "kl_loss_2": 2266.184930419922, |
| "kl_loss_3": 1785.1635192871095, |
| "kl_loss_7": 661.8903228759766, |
| "learning_rate": 0.0006814038526753205, |
| "loss": 1223.6402, |
| "step": 3880 |
| }, |
| { |
| "ce_loss_10": 3.5698843955993653, |
| "ce_loss_13": 3.479625034332275, |
| "ce_loss_2": 4.587342977523804, |
| "ce_loss_3": 4.3197312474250795, |
| "ce_loss_7": 3.766351842880249, |
| "epoch": 0.389, |
| "grad_norm": 616.0, |
| "kl_loss_10": 208.68895874023437, |
| "kl_loss_2": 2258.2895751953124, |
| "kl_loss_3": 1782.5825317382812, |
| "kl_loss_7": 664.0186126708984, |
| "learning_rate": 0.0006799243876539213, |
| "loss": 1238.0235, |
| "step": 3890 |
| }, |
| { |
| "ce_loss_10": 3.500353288650513, |
| "ce_loss_13": 3.408971738815308, |
| "ce_loss_2": 4.572002196311951, |
| "ce_loss_3": 4.29817762374878, |
| "ce_loss_7": 3.699842798709869, |
| "epoch": 0.39, |
| "grad_norm": 836.0, |
| "kl_loss_10": 206.20438537597656, |
| "kl_loss_2": 2378.092236328125, |
| "kl_loss_3": 1891.477294921875, |
| "kl_loss_7": 671.2416046142578, |
| "learning_rate": 0.0006784431107959359, |
| "loss": 1281.9199, |
| "step": 3900 |
| }, |
| { |
| "ce_loss_10": 3.5523509979248047, |
| "ce_loss_13": 3.460574519634247, |
| "ce_loss_2": 4.626534819602966, |
| "ce_loss_3": 4.3451399326324465, |
| "ce_loss_7": 3.7586856484413147, |
| "epoch": 0.391, |
| "grad_norm": 668.0, |
| "kl_loss_10": 206.77191925048828, |
| "kl_loss_2": 2369.177069091797, |
| "kl_loss_3": 1882.2445129394532, |
| "kl_loss_7": 681.9191589355469, |
| "learning_rate": 0.0006769600370178059, |
| "loss": 1269.1178, |
| "step": 3910 |
| }, |
| { |
| "ce_loss_10": 3.5188350439071656, |
| "ce_loss_13": 3.4273067116737366, |
| "ce_loss_2": 4.554541206359863, |
| "ce_loss_3": 4.289613115787506, |
| "ce_loss_7": 3.724477529525757, |
| "epoch": 0.392, |
| "grad_norm": 560.0, |
| "kl_loss_10": 199.67687606811523, |
| "kl_loss_2": 2289.5328735351563, |
| "kl_loss_3": 1825.1418823242188, |
| "kl_loss_7": 674.4119354248047, |
| "learning_rate": 0.0006754751812540679, |
| "loss": 1229.9105, |
| "step": 3920 |
| }, |
| { |
| "ce_loss_10": 3.5683494329452516, |
| "ce_loss_13": 3.482330596446991, |
| "ce_loss_2": 4.621720671653748, |
| "ce_loss_3": 4.348645758628845, |
| "ce_loss_7": 3.7689119219779967, |
| "epoch": 0.393, |
| "grad_norm": 644.0, |
| "kl_loss_10": 204.25539169311523, |
| "kl_loss_2": 2339.84892578125, |
| "kl_loss_3": 1860.5805419921876, |
| "kl_loss_7": 678.0737976074219, |
| "learning_rate": 0.0006739885584572025, |
| "loss": 1265.1324, |
| "step": 3930 |
| }, |
| { |
| "ce_loss_10": 3.596517300605774, |
| "ce_loss_13": 3.5048423767089845, |
| "ce_loss_2": 4.657581090927124, |
| "ce_loss_3": 4.393209981918335, |
| "ce_loss_7": 3.8048507928848267, |
| "epoch": 0.394, |
| "grad_norm": 740.0, |
| "kl_loss_10": 206.0053512573242, |
| "kl_loss_2": 2374.587469482422, |
| "kl_loss_3": 1914.2769775390625, |
| "kl_loss_7": 691.7073638916015, |
| "learning_rate": 0.0006725001835974853, |
| "loss": 1262.3447, |
| "step": 3940 |
| }, |
| { |
| "ce_loss_10": 3.5837875604629517, |
| "ce_loss_13": 3.49317661523819, |
| "ce_loss_2": 4.625016355514527, |
| "ce_loss_3": 4.360765337944031, |
| "ce_loss_7": 3.795220899581909, |
| "epoch": 0.395, |
| "grad_norm": 624.0, |
| "kl_loss_10": 202.6530014038086, |
| "kl_loss_2": 2326.028674316406, |
| "kl_loss_3": 1851.947198486328, |
| "kl_loss_7": 690.550015258789, |
| "learning_rate": 0.0006710100716628344, |
| "loss": 1233.6354, |
| "step": 3950 |
| }, |
| { |
| "ce_loss_10": 3.566684401035309, |
| "ce_loss_13": 3.4755138635635374, |
| "ce_loss_2": 4.594186568260193, |
| "ce_loss_3": 4.3349669694900514, |
| "ce_loss_7": 3.7691023349761963, |
| "epoch": 0.396, |
| "grad_norm": 612.0, |
| "kl_loss_10": 198.98139343261718, |
| "kl_loss_2": 2292.616107177734, |
| "kl_loss_3": 1833.538885498047, |
| "kl_loss_7": 679.8143615722656, |
| "learning_rate": 0.0006695182376586602, |
| "loss": 1262.3783, |
| "step": 3960 |
| }, |
| { |
| "ce_loss_10": 3.596343123912811, |
| "ce_loss_13": 3.512529468536377, |
| "ce_loss_2": 4.574345445632934, |
| "ce_loss_3": 4.312320637702942, |
| "ce_loss_7": 3.790241527557373, |
| "epoch": 0.397, |
| "grad_norm": 708.0, |
| "kl_loss_10": 191.98949813842773, |
| "kl_loss_2": 2169.2684020996094, |
| "kl_loss_3": 1705.870343017578, |
| "kl_loss_7": 641.6268707275391, |
| "learning_rate": 0.000668024696607715, |
| "loss": 1235.5194, |
| "step": 3970 |
| }, |
| { |
| "ce_loss_10": 3.555418300628662, |
| "ce_loss_13": 3.470967173576355, |
| "ce_loss_2": 4.566342353820801, |
| "ce_loss_3": 4.313628911972046, |
| "ce_loss_7": 3.7538477182388306, |
| "epoch": 0.398, |
| "grad_norm": 636.0, |
| "kl_loss_10": 198.03487548828124, |
| "kl_loss_2": 2281.672326660156, |
| "kl_loss_3": 1825.4962097167968, |
| "kl_loss_7": 667.3125, |
| "learning_rate": 0.0006665294635499404, |
| "loss": 1243.9658, |
| "step": 3980 |
| }, |
| { |
| "ce_loss_10": 3.561533272266388, |
| "ce_loss_13": 3.4714276075363157, |
| "ce_loss_2": 4.635219573974609, |
| "ce_loss_3": 4.372889280319214, |
| "ce_loss_7": 3.771253454685211, |
| "epoch": 0.399, |
| "grad_norm": 876.0, |
| "kl_loss_10": 208.96954040527345, |
| "kl_loss_2": 2390.9282470703124, |
| "kl_loss_3": 1928.2362426757813, |
| "kl_loss_7": 697.694384765625, |
| "learning_rate": 0.0006650325535423167, |
| "loss": 1276.8535, |
| "step": 3990 |
| }, |
| { |
| "ce_loss_10": 3.5832207202911377, |
| "ce_loss_13": 3.4972815036773683, |
| "ce_loss_2": 4.57238998413086, |
| "ce_loss_3": 4.3085246801376345, |
| "ce_loss_7": 3.7773876547813416, |
| "epoch": 0.4, |
| "grad_norm": 680.0, |
| "kl_loss_10": 194.11105422973634, |
| "kl_loss_2": 2185.9527893066406, |
| "kl_loss_3": 1735.314678955078, |
| "kl_loss_7": 647.3017364501953, |
| "learning_rate": 0.0006635339816587109, |
| "loss": 1234.4078, |
| "step": 4000 |
| }, |
| { |
| "ce_loss_10": 3.524456286430359, |
| "ce_loss_13": 3.4352723956108093, |
| "ce_loss_2": 4.582833385467529, |
| "ce_loss_3": 4.318940043449402, |
| "ce_loss_7": 3.7211544632911684, |
| "epoch": 0.401, |
| "grad_norm": 624.0, |
| "kl_loss_10": 200.03394927978516, |
| "kl_loss_2": 2362.4151245117187, |
| "kl_loss_3": 1889.6026794433594, |
| "kl_loss_7": 668.6993530273437, |
| "learning_rate": 0.0006620337629897252, |
| "loss": 1251.5271, |
| "step": 4010 |
| }, |
| { |
| "ce_loss_10": 3.531862771511078, |
| "ce_loss_13": 3.4413245558738708, |
| "ce_loss_2": 4.564659547805786, |
| "ce_loss_3": 4.295144200325012, |
| "ce_loss_7": 3.728400182723999, |
| "epoch": 0.402, |
| "grad_norm": 556.0, |
| "kl_loss_10": 199.88810348510742, |
| "kl_loss_2": 2302.102014160156, |
| "kl_loss_3": 1823.07431640625, |
| "kl_loss_7": 668.4421752929687, |
| "learning_rate": 0.0006605319126425454, |
| "loss": 1275.6262, |
| "step": 4020 |
| }, |
| { |
| "ce_loss_10": 3.4339096665382387, |
| "ce_loss_13": 3.350968396663666, |
| "ce_loss_2": 4.514854836463928, |
| "ce_loss_3": 4.238226985931396, |
| "ce_loss_7": 3.644399344921112, |
| "epoch": 0.403, |
| "grad_norm": 644.0, |
| "kl_loss_10": 199.70583419799806, |
| "kl_loss_2": 2387.631066894531, |
| "kl_loss_3": 1906.55048828125, |
| "kl_loss_7": 681.1330749511719, |
| "learning_rate": 0.0006590284457407876, |
| "loss": 1275.1312, |
| "step": 4030 |
| }, |
| { |
| "ce_loss_10": 3.5380223751068116, |
| "ce_loss_13": 3.448465049266815, |
| "ce_loss_2": 4.57396821975708, |
| "ce_loss_3": 4.313019490242004, |
| "ce_loss_7": 3.7362441062927245, |
| "epoch": 0.404, |
| "grad_norm": 688.0, |
| "kl_loss_10": 198.1963623046875, |
| "kl_loss_2": 2292.718151855469, |
| "kl_loss_3": 1830.845147705078, |
| "kl_loss_7": 661.0607788085938, |
| "learning_rate": 0.0006575233774243465, |
| "loss": 1249.6318, |
| "step": 4040 |
| }, |
| { |
| "ce_loss_10": 3.528366136550903, |
| "ce_loss_13": 3.4398876786231996, |
| "ce_loss_2": 4.5835960626602175, |
| "ce_loss_3": 4.318015563488006, |
| "ce_loss_7": 3.734170937538147, |
| "epoch": 0.405, |
| "grad_norm": 744.0, |
| "kl_loss_10": 199.81770248413085, |
| "kl_loss_2": 2371.9314208984374, |
| "kl_loss_3": 1896.1641540527344, |
| "kl_loss_7": 680.1676055908204, |
| "learning_rate": 0.0006560167228492435, |
| "loss": 1274.3472, |
| "step": 4050 |
| }, |
| { |
| "ce_loss_10": 3.5713927507400514, |
| "ce_loss_13": 3.4897979974746702, |
| "ce_loss_2": 4.564716410636902, |
| "ce_loss_3": 4.304848039150238, |
| "ce_loss_7": 3.764770579338074, |
| "epoch": 0.406, |
| "grad_norm": 632.0, |
| "kl_loss_10": 190.77299575805665, |
| "kl_loss_2": 2213.3808837890624, |
| "kl_loss_3": 1758.434912109375, |
| "kl_loss_7": 651.0917877197265, |
| "learning_rate": 0.0006545084971874737, |
| "loss": 1244.4535, |
| "step": 4060 |
| }, |
| { |
| "ce_loss_10": 3.5361163854599, |
| "ce_loss_13": 3.446142256259918, |
| "ce_loss_2": 4.613076639175415, |
| "ce_loss_3": 4.341050863265991, |
| "ce_loss_7": 3.7489752054214476, |
| "epoch": 0.407, |
| "grad_norm": 724.0, |
| "kl_loss_10": 204.6956573486328, |
| "kl_loss_2": 2384.869763183594, |
| "kl_loss_3": 1900.13232421875, |
| "kl_loss_7": 693.6003051757813, |
| "learning_rate": 0.0006529987156268526, |
| "loss": 1264.1867, |
| "step": 4070 |
| }, |
| { |
| "ce_loss_10": 3.4604807257652284, |
| "ce_loss_13": 3.3677136301994324, |
| "ce_loss_2": 4.529002094268799, |
| "ce_loss_3": 4.2508728981018065, |
| "ce_loss_7": 3.671179461479187, |
| "epoch": 0.408, |
| "grad_norm": 652.0, |
| "kl_loss_10": 200.91606369018555, |
| "kl_loss_2": 2349.1596496582033, |
| "kl_loss_3": 1858.6745910644531, |
| "kl_loss_7": 678.4067779541016, |
| "learning_rate": 0.0006514873933708637, |
| "loss": 1288.6936, |
| "step": 4080 |
| }, |
| { |
| "ce_loss_10": 3.5669307708740234, |
| "ce_loss_13": 3.4793569445610046, |
| "ce_loss_2": 4.594972729682922, |
| "ce_loss_3": 4.326959013938904, |
| "ce_loss_7": 3.7646772980690004, |
| "epoch": 0.409, |
| "grad_norm": 624.0, |
| "kl_loss_10": 195.33749084472657, |
| "kl_loss_2": 2285.8158325195313, |
| "kl_loss_3": 1812.939276123047, |
| "kl_loss_7": 660.7087280273438, |
| "learning_rate": 0.0006499745456385053, |
| "loss": 1246.0525, |
| "step": 4090 |
| }, |
| { |
| "ce_loss_10": 3.532058572769165, |
| "ce_loss_13": 3.446662437915802, |
| "ce_loss_2": 4.566872882843017, |
| "ce_loss_3": 4.295732653141021, |
| "ce_loss_7": 3.7351000905036926, |
| "epoch": 0.41, |
| "grad_norm": 592.0, |
| "kl_loss_10": 197.3603828430176, |
| "kl_loss_2": 2284.4489013671873, |
| "kl_loss_3": 1809.6247680664062, |
| "kl_loss_7": 669.4995666503906, |
| "learning_rate": 0.0006484601876641375, |
| "loss": 1259.2045, |
| "step": 4100 |
| }, |
| { |
| "ce_loss_10": 3.523475456237793, |
| "ce_loss_13": 3.4387876391410828, |
| "ce_loss_2": 4.524111318588257, |
| "ce_loss_3": 4.253742909431457, |
| "ce_loss_7": 3.7196569561958315, |
| "epoch": 0.411, |
| "grad_norm": 608.0, |
| "kl_loss_10": 194.7115333557129, |
| "kl_loss_2": 2231.6944580078125, |
| "kl_loss_3": 1757.1336669921875, |
| "kl_loss_7": 654.9919372558594, |
| "learning_rate": 0.000646944334697328, |
| "loss": 1224.3209, |
| "step": 4110 |
| }, |
| { |
| "ce_loss_10": 3.6390475988388062, |
| "ce_loss_13": 3.55245840549469, |
| "ce_loss_2": 4.6237300157547, |
| "ce_loss_3": 4.356840944290161, |
| "ce_loss_7": 3.83481205701828, |
| "epoch": 0.412, |
| "grad_norm": 564.0, |
| "kl_loss_10": 194.73427352905273, |
| "kl_loss_2": 2191.641564941406, |
| "kl_loss_3": 1726.6775146484374, |
| "kl_loss_7": 650.4818603515625, |
| "learning_rate": 0.0006454270020026995, |
| "loss": 1203.4656, |
| "step": 4120 |
| }, |
| { |
| "ce_loss_10": 3.6017306566238405, |
| "ce_loss_13": 3.520050418376923, |
| "ce_loss_2": 4.577455329895019, |
| "ce_loss_3": 4.320154881477356, |
| "ce_loss_7": 3.792659568786621, |
| "epoch": 0.413, |
| "grad_norm": 576.0, |
| "kl_loss_10": 189.64769592285157, |
| "kl_loss_2": 2176.6477478027346, |
| "kl_loss_3": 1722.2450378417968, |
| "kl_loss_7": 643.5274627685546, |
| "learning_rate": 0.0006439082048597755, |
| "loss": 1192.4902, |
| "step": 4130 |
| }, |
| { |
| "ce_loss_10": 3.5903451800346375, |
| "ce_loss_13": 3.507824885845184, |
| "ce_loss_2": 4.608788180351257, |
| "ce_loss_3": 4.346335411071777, |
| "ce_loss_7": 3.7915576100349426, |
| "epoch": 0.414, |
| "grad_norm": 580.0, |
| "kl_loss_10": 197.56676864624023, |
| "kl_loss_2": 2267.8357421875, |
| "kl_loss_3": 1807.2357055664063, |
| "kl_loss_7": 666.154751586914, |
| "learning_rate": 0.0006423879585628261, |
| "loss": 1240.4789, |
| "step": 4140 |
| }, |
| { |
| "ce_loss_10": 3.55734179019928, |
| "ce_loss_13": 3.467788887023926, |
| "ce_loss_2": 4.6172443151474, |
| "ce_loss_3": 4.339134466648102, |
| "ce_loss_7": 3.7582768201828003, |
| "epoch": 0.415, |
| "grad_norm": 688.0, |
| "kl_loss_10": 201.20330123901368, |
| "kl_loss_2": 2351.881707763672, |
| "kl_loss_3": 1863.0321411132813, |
| "kl_loss_7": 675.1522674560547, |
| "learning_rate": 0.0006408662784207149, |
| "loss": 1267.4067, |
| "step": 4150 |
| }, |
| { |
| "ce_loss_10": 3.5083068370819093, |
| "ce_loss_13": 3.421711838245392, |
| "ce_loss_2": 4.537308859825134, |
| "ce_loss_3": 4.2711180448532104, |
| "ce_loss_7": 3.708827292919159, |
| "epoch": 0.416, |
| "grad_norm": 696.0, |
| "kl_loss_10": 195.25809020996093, |
| "kl_loss_2": 2288.984796142578, |
| "kl_loss_3": 1822.1060485839844, |
| "kl_loss_7": 665.5764404296875, |
| "learning_rate": 0.0006393431797567439, |
| "loss": 1250.1072, |
| "step": 4160 |
| }, |
| { |
| "ce_loss_10": 3.5913987278938295, |
| "ce_loss_13": 3.509873795509338, |
| "ce_loss_2": 4.573607659339904, |
| "ce_loss_3": 4.318917143344879, |
| "ce_loss_7": 3.7776596546173096, |
| "epoch": 0.417, |
| "grad_norm": 596.0, |
| "kl_loss_10": 194.73566436767578, |
| "kl_loss_2": 2221.0649841308596, |
| "kl_loss_3": 1766.04775390625, |
| "kl_loss_7": 648.3369201660156, |
| "learning_rate": 0.0006378186779084996, |
| "loss": 1190.9323, |
| "step": 4170 |
| }, |
| { |
| "ce_loss_10": 3.4334940314292908, |
| "ce_loss_13": 3.3453728437423704, |
| "ce_loss_2": 4.485861015319824, |
| "ce_loss_3": 4.215418803691864, |
| "ce_loss_7": 3.6394405364990234, |
| "epoch": 0.418, |
| "grad_norm": 676.0, |
| "kl_loss_10": 196.98247756958008, |
| "kl_loss_2": 2312.9372680664064, |
| "kl_loss_3": 1836.5678649902343, |
| "kl_loss_7": 670.5612365722657, |
| "learning_rate": 0.0006362927882276989, |
| "loss": 1261.1342, |
| "step": 4180 |
| }, |
| { |
| "ce_loss_10": 3.622100257873535, |
| "ce_loss_13": 3.5377141356468202, |
| "ce_loss_2": 4.620994114875794, |
| "ce_loss_3": 4.349101042747497, |
| "ce_loss_7": 3.81082307100296, |
| "epoch": 0.419, |
| "grad_norm": 620.0, |
| "kl_loss_10": 192.66633071899415, |
| "kl_loss_2": 2225.1201049804686, |
| "kl_loss_3": 1751.446875, |
| "kl_loss_7": 636.4902648925781, |
| "learning_rate": 0.000634765526080034, |
| "loss": 1194.2031, |
| "step": 4190 |
| }, |
| { |
| "ce_loss_10": 3.626552712917328, |
| "ce_loss_13": 3.5395890951156614, |
| "ce_loss_2": 4.6221943378448485, |
| "ce_loss_3": 4.355586886405945, |
| "ce_loss_7": 3.8178786516189573, |
| "epoch": 0.42, |
| "grad_norm": 612.0, |
| "kl_loss_10": 198.29063568115234, |
| "kl_loss_2": 2233.3197509765623, |
| "kl_loss_3": 1764.3755920410156, |
| "kl_loss_7": 656.7103454589844, |
| "learning_rate": 0.0006332369068450174, |
| "loss": 1207.3598, |
| "step": 4200 |
| }, |
| { |
| "ce_loss_10": 3.5582772374153135, |
| "ce_loss_13": 3.4749309182167054, |
| "ce_loss_2": 4.574929785728455, |
| "ce_loss_3": 4.314627623558044, |
| "ce_loss_7": 3.7553325653076173, |
| "epoch": 0.421, |
| "grad_norm": 588.0, |
| "kl_loss_10": 195.14224548339843, |
| "kl_loss_2": 2252.938916015625, |
| "kl_loss_3": 1800.94208984375, |
| "kl_loss_7": 656.2003112792969, |
| "learning_rate": 0.0006317069459158283, |
| "loss": 1220.2742, |
| "step": 4210 |
| }, |
| { |
| "ce_loss_10": 3.66640442609787, |
| "ce_loss_13": 3.584319996833801, |
| "ce_loss_2": 4.649453711509705, |
| "ce_loss_3": 4.3819632768630985, |
| "ce_loss_7": 3.8596243381500246, |
| "epoch": 0.422, |
| "grad_norm": 592.0, |
| "kl_loss_10": 193.5803665161133, |
| "kl_loss_2": 2195.1602294921877, |
| "kl_loss_3": 1735.2393310546875, |
| "kl_loss_7": 647.3203796386719, |
| "learning_rate": 0.0006301756586991561, |
| "loss": 1218.0184, |
| "step": 4220 |
| }, |
| { |
| "ce_loss_10": 3.452160143852234, |
| "ce_loss_13": 3.3657590985298156, |
| "ce_loss_2": 4.505249190330505, |
| "ce_loss_3": 4.242624092102051, |
| "ce_loss_7": 3.6535696148872376, |
| "epoch": 0.423, |
| "grad_norm": 764.0, |
| "kl_loss_10": 198.62798614501952, |
| "kl_loss_2": 2358.790142822266, |
| "kl_loss_3": 1893.541082763672, |
| "kl_loss_7": 677.0058868408203, |
| "learning_rate": 0.0006286430606150459, |
| "loss": 1264.3267, |
| "step": 4230 |
| }, |
| { |
| "ce_loss_10": 3.6465710401535034, |
| "ce_loss_13": 3.563078057765961, |
| "ce_loss_2": 4.641510224342346, |
| "ce_loss_3": 4.377968907356262, |
| "ce_loss_7": 3.843237745761871, |
| "epoch": 0.424, |
| "grad_norm": 752.0, |
| "kl_loss_10": 199.9485771179199, |
| "kl_loss_2": 2240.6987426757814, |
| "kl_loss_3": 1778.9172729492188, |
| "kl_loss_7": 666.6387664794922, |
| "learning_rate": 0.0006271091670967436, |
| "loss": 1223.7141, |
| "step": 4240 |
| }, |
| { |
| "ce_loss_10": 3.570220148563385, |
| "ce_loss_13": 3.474740993976593, |
| "ce_loss_2": 4.616126585006714, |
| "ce_loss_3": 4.345292592048645, |
| "ce_loss_7": 3.7782423973083494, |
| "epoch": 0.425, |
| "grad_norm": 604.0, |
| "kl_loss_10": 206.9665100097656, |
| "kl_loss_2": 2359.6162658691405, |
| "kl_loss_3": 1878.0119995117188, |
| "kl_loss_7": 699.4417297363282, |
| "learning_rate": 0.0006255739935905395, |
| "loss": 1260.2877, |
| "step": 4250 |
| }, |
| { |
| "ce_loss_10": 3.6002479434013366, |
| "ce_loss_13": 3.5161559224128722, |
| "ce_loss_2": 4.592222595214844, |
| "ce_loss_3": 4.328677630424499, |
| "ce_loss_7": 3.790011668205261, |
| "epoch": 0.426, |
| "grad_norm": 688.0, |
| "kl_loss_10": 196.72698440551758, |
| "kl_loss_2": 2221.3829833984373, |
| "kl_loss_3": 1756.8525756835938, |
| "kl_loss_7": 652.2526733398438, |
| "learning_rate": 0.0006240375555556145, |
| "loss": 1261.0352, |
| "step": 4260 |
| }, |
| { |
| "ce_loss_10": 3.6026462078094483, |
| "ce_loss_13": 3.5168575167655947, |
| "ce_loss_2": 4.6545480489730835, |
| "ce_loss_3": 4.386264157295227, |
| "ce_loss_7": 3.8051093459129333, |
| "epoch": 0.427, |
| "grad_norm": 580.0, |
| "kl_loss_10": 197.9854705810547, |
| "kl_loss_2": 2316.6946411132812, |
| "kl_loss_3": 1838.7398193359375, |
| "kl_loss_7": 667.8762664794922, |
| "learning_rate": 0.000622499868463882, |
| "loss": 1243.157, |
| "step": 4270 |
| }, |
| { |
| "ce_loss_10": 3.574436700344086, |
| "ce_loss_13": 3.4921345114707947, |
| "ce_loss_2": 4.568159365653992, |
| "ce_loss_3": 4.298846364021301, |
| "ce_loss_7": 3.765162992477417, |
| "epoch": 0.428, |
| "grad_norm": 620.0, |
| "kl_loss_10": 194.46619186401367, |
| "kl_loss_2": 2240.061688232422, |
| "kl_loss_3": 1767.6112670898438, |
| "kl_loss_7": 647.0378051757813, |
| "learning_rate": 0.0006209609477998338, |
| "loss": 1226.4191, |
| "step": 4280 |
| }, |
| { |
| "ce_loss_10": 3.6271798372268678, |
| "ce_loss_13": 3.5434911131858824, |
| "ce_loss_2": 4.628253221511841, |
| "ce_loss_3": 4.367080307006836, |
| "ce_loss_7": 3.823750925064087, |
| "epoch": 0.429, |
| "grad_norm": 596.0, |
| "kl_loss_10": 199.08662948608398, |
| "kl_loss_2": 2248.870013427734, |
| "kl_loss_3": 1777.940362548828, |
| "kl_loss_7": 666.140267944336, |
| "learning_rate": 0.0006194208090603844, |
| "loss": 1245.6613, |
| "step": 4290 |
| }, |
| { |
| "ce_loss_10": 3.550025999546051, |
| "ce_loss_13": 3.4652276039123535, |
| "ce_loss_2": 4.55794665813446, |
| "ce_loss_3": 4.292889666557312, |
| "ce_loss_7": 3.7512326836586, |
| "epoch": 0.43, |
| "grad_norm": 696.0, |
| "kl_loss_10": 194.7114356994629, |
| "kl_loss_2": 2238.0842895507812, |
| "kl_loss_3": 1764.372296142578, |
| "kl_loss_7": 659.5233337402344, |
| "learning_rate": 0.0006178794677547138, |
| "loss": 1204.7698, |
| "step": 4300 |
| }, |
| { |
| "ce_loss_10": 3.5732582211494446, |
| "ce_loss_13": 3.4901079893112184, |
| "ce_loss_2": 4.593334412574768, |
| "ce_loss_3": 4.330301976203918, |
| "ce_loss_7": 3.7785701513290406, |
| "epoch": 0.431, |
| "grad_norm": 716.0, |
| "kl_loss_10": 204.0959487915039, |
| "kl_loss_2": 2270.5096435546875, |
| "kl_loss_3": 1810.6998352050782, |
| "kl_loss_7": 680.2683868408203, |
| "learning_rate": 0.0006163369394041111, |
| "loss": 1234.0865, |
| "step": 4310 |
| }, |
| { |
| "ce_loss_10": 3.522435462474823, |
| "ce_loss_13": 3.427918314933777, |
| "ce_loss_2": 4.549873030185699, |
| "ce_loss_3": 4.289526271820068, |
| "ce_loss_7": 3.7253190755844114, |
| "epoch": 0.432, |
| "grad_norm": 800.0, |
| "kl_loss_10": 208.77886199951172, |
| "kl_loss_2": 2301.5377990722654, |
| "kl_loss_3": 1836.5447998046875, |
| "kl_loss_7": 679.7338165283203, |
| "learning_rate": 0.0006147932395418205, |
| "loss": 1277.3873, |
| "step": 4320 |
| }, |
| { |
| "ce_loss_10": 3.5494457960128782, |
| "ce_loss_13": 3.4614178776741027, |
| "ce_loss_2": 4.544875764846802, |
| "ce_loss_3": 4.281032645702362, |
| "ce_loss_7": 3.7431655168533324, |
| "epoch": 0.433, |
| "grad_norm": 576.0, |
| "kl_loss_10": 207.28576126098633, |
| "kl_loss_2": 2223.6774475097654, |
| "kl_loss_3": 1762.4080688476563, |
| "kl_loss_7": 660.7338745117188, |
| "learning_rate": 0.0006132483837128823, |
| "loss": 1209.1447, |
| "step": 4330 |
| }, |
| { |
| "ce_loss_10": 3.5334264755249025, |
| "ce_loss_13": 3.4463666915893554, |
| "ce_loss_2": 4.564454817771912, |
| "ce_loss_3": 4.294960129261017, |
| "ce_loss_7": 3.73456689119339, |
| "epoch": 0.434, |
| "grad_norm": 772.0, |
| "kl_loss_10": 203.00026016235353, |
| "kl_loss_2": 2313.798968505859, |
| "kl_loss_3": 1837.2741760253907, |
| "kl_loss_7": 664.0175872802735, |
| "learning_rate": 0.0006117023874739772, |
| "loss": 1240.8283, |
| "step": 4340 |
| }, |
| { |
| "ce_loss_10": 3.5215348839759826, |
| "ce_loss_13": 3.4345417499542235, |
| "ce_loss_2": 4.554822826385498, |
| "ce_loss_3": 4.285668563842774, |
| "ce_loss_7": 3.7274380683898927, |
| "epoch": 0.435, |
| "grad_norm": 600.0, |
| "kl_loss_10": 199.90404205322267, |
| "kl_loss_2": 2303.0773803710936, |
| "kl_loss_3": 1827.208251953125, |
| "kl_loss_7": 672.8900817871094, |
| "learning_rate": 0.0006101552663932703, |
| "loss": 1260.7756, |
| "step": 4350 |
| }, |
| { |
| "ce_loss_10": 3.554371106624603, |
| "ce_loss_13": 3.4667278289794923, |
| "ce_loss_2": 4.5602539539337155, |
| "ce_loss_3": 4.297711455821991, |
| "ce_loss_7": 3.74671790599823, |
| "epoch": 0.436, |
| "grad_norm": 664.0, |
| "kl_loss_10": 201.51847763061522, |
| "kl_loss_2": 2254.3937927246093, |
| "kl_loss_3": 1790.253662109375, |
| "kl_loss_7": 662.9543731689453, |
| "learning_rate": 0.0006086070360502539, |
| "loss": 1241.8814, |
| "step": 4360 |
| }, |
| { |
| "ce_loss_10": 3.5543460965156557, |
| "ce_loss_13": 3.470974051952362, |
| "ce_loss_2": 4.571776509284973, |
| "ce_loss_3": 4.305854046344757, |
| "ce_loss_7": 3.7547166466712953, |
| "epoch": 0.437, |
| "grad_norm": 608.0, |
| "kl_loss_10": 196.51984329223632, |
| "kl_loss_2": 2276.0406494140625, |
| "kl_loss_3": 1801.6470031738281, |
| "kl_loss_7": 659.9133270263671, |
| "learning_rate": 0.0006070577120355903, |
| "loss": 1236.9521, |
| "step": 4370 |
| }, |
| { |
| "ce_loss_10": 3.5628577947616575, |
| "ce_loss_13": 3.47283878326416, |
| "ce_loss_2": 4.547589898109436, |
| "ce_loss_3": 4.279985129833221, |
| "ce_loss_7": 3.7624007940292357, |
| "epoch": 0.438, |
| "grad_norm": 700.0, |
| "kl_loss_10": 200.08970794677734, |
| "kl_loss_2": 2194.6991455078123, |
| "kl_loss_3": 1728.643865966797, |
| "kl_loss_7": 657.7827362060547, |
| "learning_rate": 0.0006055073099509549, |
| "loss": 1218.3828, |
| "step": 4380 |
| }, |
| { |
| "ce_loss_10": 3.6181469678878786, |
| "ce_loss_13": 3.531364715099335, |
| "ce_loss_2": 4.607781720161438, |
| "ce_loss_3": 4.3447977781295775, |
| "ce_loss_7": 3.8072004199028013, |
| "epoch": 0.439, |
| "grad_norm": 616.0, |
| "kl_loss_10": 200.97432174682618, |
| "kl_loss_2": 2223.8163146972656, |
| "kl_loss_3": 1756.1592224121093, |
| "kl_loss_7": 652.6859497070312, |
| "learning_rate": 0.0006039558454088796, |
| "loss": 1239.9502, |
| "step": 4390 |
| }, |
| { |
| "ce_loss_10": 3.598993420600891, |
| "ce_loss_13": 3.508393979072571, |
| "ce_loss_2": 4.611153769493103, |
| "ce_loss_3": 4.343827414512634, |
| "ce_loss_7": 3.798681151866913, |
| "epoch": 0.44, |
| "grad_norm": 636.0, |
| "kl_loss_10": 207.16089324951173, |
| "kl_loss_2": 2267.215954589844, |
| "kl_loss_3": 1798.3621337890625, |
| "kl_loss_7": 665.1716247558594, |
| "learning_rate": 0.0006024033340325954, |
| "loss": 1210.7668, |
| "step": 4400 |
| }, |
| { |
| "ce_loss_10": 3.6592474579811096, |
| "ce_loss_13": 3.575475811958313, |
| "ce_loss_2": 4.615442514419556, |
| "ce_loss_3": 4.356250524520874, |
| "ce_loss_7": 3.841563415527344, |
| "epoch": 0.441, |
| "grad_norm": 564.0, |
| "kl_loss_10": 192.91486740112305, |
| "kl_loss_2": 2138.478411865234, |
| "kl_loss_3": 1682.6779296875, |
| "kl_loss_7": 628.4862640380859, |
| "learning_rate": 0.0006008497914558743, |
| "loss": 1188.8043, |
| "step": 4410 |
| }, |
| { |
| "ce_loss_10": 3.603752911090851, |
| "ce_loss_13": 3.514535641670227, |
| "ce_loss_2": 4.619881939888001, |
| "ce_loss_3": 4.351648759841919, |
| "ce_loss_7": 3.8029965996742248, |
| "epoch": 0.442, |
| "grad_norm": 680.0, |
| "kl_loss_10": 203.31059799194335, |
| "kl_loss_2": 2279.580682373047, |
| "kl_loss_3": 1800.9931640625, |
| "kl_loss_7": 667.106298828125, |
| "learning_rate": 0.0005992952333228728, |
| "loss": 1234.8536, |
| "step": 4420 |
| }, |
| { |
| "ce_loss_10": 3.5360623002052307, |
| "ce_loss_13": 3.452274763584137, |
| "ce_loss_2": 4.555125761032104, |
| "ce_loss_3": 4.292830312252045, |
| "ce_loss_7": 3.7339815139770507, |
| "epoch": 0.443, |
| "grad_norm": 660.0, |
| "kl_loss_10": 193.53399200439452, |
| "kl_loss_2": 2284.7425048828127, |
| "kl_loss_3": 1820.065606689453, |
| "kl_loss_7": 662.5294036865234, |
| "learning_rate": 0.0005977396752879741, |
| "loss": 1233.2003, |
| "step": 4430 |
| }, |
| { |
| "ce_loss_10": 3.4606794357299804, |
| "ce_loss_13": 3.377534472942352, |
| "ce_loss_2": 4.48543610572815, |
| "ce_loss_3": 4.220411324501038, |
| "ce_loss_7": 3.6664485812187193, |
| "epoch": 0.444, |
| "grad_norm": 580.0, |
| "kl_loss_10": 191.26479797363282, |
| "kl_loss_2": 2280.07265625, |
| "kl_loss_3": 1810.239013671875, |
| "kl_loss_7": 656.8585327148437, |
| "learning_rate": 0.0005961831330156305, |
| "loss": 1222.7674, |
| "step": 4440 |
| }, |
| { |
| "ce_loss_10": 3.603837263584137, |
| "ce_loss_13": 3.5208237767219543, |
| "ce_loss_2": 4.638635230064392, |
| "ce_loss_3": 4.366818988323212, |
| "ce_loss_7": 3.8011784672737123, |
| "epoch": 0.445, |
| "grad_norm": 652.0, |
| "kl_loss_10": 193.8144203186035, |
| "kl_loss_2": 2316.0056640625, |
| "kl_loss_3": 1833.1619812011718, |
| "kl_loss_7": 659.0817749023438, |
| "learning_rate": 0.0005946256221802051, |
| "loss": 1263.1171, |
| "step": 4450 |
| }, |
| { |
| "ce_loss_10": 3.5832170486450194, |
| "ce_loss_13": 3.5048667788505554, |
| "ce_loss_2": 4.5584005355834964, |
| "ce_loss_3": 4.296856260299682, |
| "ce_loss_7": 3.7672229290008543, |
| "epoch": 0.446, |
| "grad_norm": 700.0, |
| "kl_loss_10": 189.50232849121093, |
| "kl_loss_2": 2181.445458984375, |
| "kl_loss_3": 1725.645037841797, |
| "kl_loss_7": 639.0539123535157, |
| "learning_rate": 0.0005930671584658151, |
| "loss": 1259.6497, |
| "step": 4460 |
| }, |
| { |
| "ce_loss_10": 3.5820990085601805, |
| "ce_loss_13": 3.4986414194107054, |
| "ce_loss_2": 4.585966444015503, |
| "ce_loss_3": 4.327683901786804, |
| "ce_loss_7": 3.778271293640137, |
| "epoch": 0.447, |
| "grad_norm": 624.0, |
| "kl_loss_10": 192.16329650878907, |
| "kl_loss_2": 2241.6711364746093, |
| "kl_loss_3": 1786.2928161621094, |
| "kl_loss_7": 656.8107452392578, |
| "learning_rate": 0.0005915077575661722, |
| "loss": 1237.7033, |
| "step": 4470 |
| }, |
| { |
| "ce_loss_10": 3.601723861694336, |
| "ce_loss_13": 3.5175135850906374, |
| "ce_loss_2": 4.623058772087097, |
| "ce_loss_3": 4.352630817890168, |
| "ce_loss_7": 3.801686096191406, |
| "epoch": 0.448, |
| "grad_norm": 576.0, |
| "kl_loss_10": 197.77509002685548, |
| "kl_loss_2": 2287.8472229003905, |
| "kl_loss_3": 1814.064471435547, |
| "kl_loss_7": 669.7168884277344, |
| "learning_rate": 0.000589947435184427, |
| "loss": 1221.476, |
| "step": 4480 |
| }, |
| { |
| "ce_loss_10": 3.667625939846039, |
| "ce_loss_13": 3.5879098773002625, |
| "ce_loss_2": 4.623752212524414, |
| "ce_loss_3": 4.3639614343643185, |
| "ce_loss_7": 3.854480040073395, |
| "epoch": 0.449, |
| "grad_norm": 676.0, |
| "kl_loss_10": 191.73966369628906, |
| "kl_loss_2": 2169.5962097167967, |
| "kl_loss_3": 1711.8840759277343, |
| "kl_loss_7": 644.2419403076171, |
| "learning_rate": 0.0005883862070330078, |
| "loss": 1205.0104, |
| "step": 4490 |
| }, |
| { |
| "ce_loss_10": 3.5975982904434205, |
| "ce_loss_13": 3.5138633131980894, |
| "ce_loss_2": 4.596203637123108, |
| "ce_loss_3": 4.342746245861053, |
| "ce_loss_7": 3.7984990000724794, |
| "epoch": 0.45, |
| "grad_norm": 680.0, |
| "kl_loss_10": 192.790771484375, |
| "kl_loss_2": 2245.34130859375, |
| "kl_loss_3": 1787.1853515625, |
| "kl_loss_7": 655.1883148193359, |
| "learning_rate": 0.0005868240888334653, |
| "loss": 1211.5924, |
| "step": 4500 |
| }, |
| { |
| "ce_loss_10": 3.484956693649292, |
| "ce_loss_13": 3.3994694352149963, |
| "ce_loss_2": 4.541581082344055, |
| "ce_loss_3": 4.265275609493256, |
| "ce_loss_7": 3.685898816585541, |
| "epoch": 0.451, |
| "grad_norm": 664.0, |
| "kl_loss_10": 197.9249183654785, |
| "kl_loss_2": 2329.778839111328, |
| "kl_loss_3": 1847.1219604492187, |
| "kl_loss_7": 669.9408386230468, |
| "learning_rate": 0.0005852610963163119, |
| "loss": 1246.2838, |
| "step": 4510 |
| }, |
| { |
| "ce_loss_10": 3.506740427017212, |
| "ce_loss_13": 3.425083673000336, |
| "ce_loss_2": 4.510421705245972, |
| "ce_loss_3": 4.246485769748688, |
| "ce_loss_7": 3.6969696044921876, |
| "epoch": 0.452, |
| "grad_norm": 600.0, |
| "kl_loss_10": 188.6581832885742, |
| "kl_loss_2": 2238.5858459472656, |
| "kl_loss_3": 1770.7893127441407, |
| "kl_loss_7": 646.1408782958985, |
| "learning_rate": 0.0005836972452208654, |
| "loss": 1201.6553, |
| "step": 4520 |
| }, |
| { |
| "ce_loss_10": 3.505844843387604, |
| "ce_loss_13": 3.4249507427215575, |
| "ce_loss_2": 4.529585886001587, |
| "ce_loss_3": 4.277110803127289, |
| "ce_loss_7": 3.708256196975708, |
| "epoch": 0.453, |
| "grad_norm": 668.0, |
| "kl_loss_10": 193.22399291992187, |
| "kl_loss_2": 2265.8468383789063, |
| "kl_loss_3": 1815.4268432617187, |
| "kl_loss_7": 656.5519592285157, |
| "learning_rate": 0.0005821325512950885, |
| "loss": 1236.8736, |
| "step": 4530 |
| }, |
| { |
| "ce_loss_10": 3.5389772057533264, |
| "ce_loss_13": 3.4585880279541015, |
| "ce_loss_2": 4.540098547935486, |
| "ce_loss_3": 4.2798211693763735, |
| "ce_loss_7": 3.7288518071174623, |
| "epoch": 0.454, |
| "grad_norm": 592.0, |
| "kl_loss_10": 187.7821243286133, |
| "kl_loss_2": 2205.198779296875, |
| "kl_loss_3": 1748.45341796875, |
| "kl_loss_7": 639.7683776855469, |
| "learning_rate": 0.0005805670302954321, |
| "loss": 1221.9566, |
| "step": 4540 |
| }, |
| { |
| "ce_loss_10": 3.544492793083191, |
| "ce_loss_13": 3.4652194142341615, |
| "ce_loss_2": 4.541022229194641, |
| "ce_loss_3": 4.279520082473755, |
| "ce_loss_7": 3.7328044533729554, |
| "epoch": 0.455, |
| "grad_norm": 656.0, |
| "kl_loss_10": 186.06844177246094, |
| "kl_loss_2": 2226.980224609375, |
| "kl_loss_3": 1765.562744140625, |
| "kl_loss_7": 639.6060729980469, |
| "learning_rate": 0.000579000697986675, |
| "loss": 1199.4846, |
| "step": 4550 |
| }, |
| { |
| "ce_loss_10": 3.5037956118583677, |
| "ce_loss_13": 3.4134857773780825, |
| "ce_loss_2": 4.544147634506226, |
| "ce_loss_3": 4.274082601070404, |
| "ce_loss_7": 3.707910752296448, |
| "epoch": 0.456, |
| "grad_norm": 664.0, |
| "kl_loss_10": 200.43186416625977, |
| "kl_loss_2": 2315.5464111328124, |
| "kl_loss_3": 1832.4145568847657, |
| "kl_loss_7": 672.0404296875, |
| "learning_rate": 0.0005774335701417662, |
| "loss": 1229.2445, |
| "step": 4560 |
| }, |
| { |
| "ce_loss_10": 3.4942433714866636, |
| "ce_loss_13": 3.4095874786376954, |
| "ce_loss_2": 4.549353170394897, |
| "ce_loss_3": 4.279058015346527, |
| "ce_loss_7": 3.693026268482208, |
| "epoch": 0.457, |
| "grad_norm": 608.0, |
| "kl_loss_10": 192.1516143798828, |
| "kl_loss_2": 2342.186248779297, |
| "kl_loss_3": 1864.820654296875, |
| "kl_loss_7": 655.2123260498047, |
| "learning_rate": 0.0005758656625416658, |
| "loss": 1241.1571, |
| "step": 4570 |
| }, |
| { |
| "ce_loss_10": 3.5480048656463623, |
| "ce_loss_13": 3.4622543811798097, |
| "ce_loss_2": 4.561884355545044, |
| "ce_loss_3": 4.293217277526855, |
| "ce_loss_7": 3.743514323234558, |
| "epoch": 0.458, |
| "grad_norm": 616.0, |
| "kl_loss_10": 194.93896102905273, |
| "kl_loss_2": 2260.48984375, |
| "kl_loss_3": 1786.8557556152343, |
| "kl_loss_7": 654.7685607910156, |
| "learning_rate": 0.0005742969909751859, |
| "loss": 1199.7715, |
| "step": 4580 |
| }, |
| { |
| "ce_loss_10": 3.558157193660736, |
| "ce_loss_13": 3.4740110039711, |
| "ce_loss_2": 4.582901740074158, |
| "ce_loss_3": 4.310809695720673, |
| "ce_loss_7": 3.7480292677879334, |
| "epoch": 0.459, |
| "grad_norm": 636.0, |
| "kl_loss_10": 193.16277923583985, |
| "kl_loss_2": 2285.9891052246094, |
| "kl_loss_3": 1800.92705078125, |
| "kl_loss_7": 648.6197357177734, |
| "learning_rate": 0.0005727275712388318, |
| "loss": 1238.3732, |
| "step": 4590 |
| }, |
| { |
| "ce_loss_10": 3.5862102270126344, |
| "ce_loss_13": 3.509055662155151, |
| "ce_loss_2": 4.560896277427673, |
| "ce_loss_3": 4.298801875114441, |
| "ce_loss_7": 3.773897314071655, |
| "epoch": 0.46, |
| "grad_norm": 768.0, |
| "kl_loss_10": 186.60687026977538, |
| "kl_loss_2": 2190.591516113281, |
| "kl_loss_3": 1728.75283203125, |
| "kl_loss_7": 633.5937683105469, |
| "learning_rate": 0.0005711574191366427, |
| "loss": 1204.0141, |
| "step": 4600 |
| }, |
| { |
| "ce_loss_10": 3.537917101383209, |
| "ce_loss_13": 3.456929898262024, |
| "ce_loss_2": 4.532997250556946, |
| "ce_loss_3": 4.271801400184631, |
| "ce_loss_7": 3.7239314556121825, |
| "epoch": 0.461, |
| "grad_norm": 544.0, |
| "kl_loss_10": 188.38971405029298, |
| "kl_loss_2": 2244.5726928710938, |
| "kl_loss_3": 1779.3487548828125, |
| "kl_loss_7": 643.0867309570312, |
| "learning_rate": 0.0005695865504800327, |
| "loss": 1208.6229, |
| "step": 4610 |
| }, |
| { |
| "ce_loss_10": 3.475814175605774, |
| "ce_loss_13": 3.3895989418029786, |
| "ce_loss_2": 4.570864033699036, |
| "ce_loss_3": 4.298530387878418, |
| "ce_loss_7": 3.6918977737426757, |
| "epoch": 0.462, |
| "grad_norm": 688.0, |
| "kl_loss_10": 199.44021301269532, |
| "kl_loss_2": 2396.831396484375, |
| "kl_loss_3": 1919.1037109375, |
| "kl_loss_7": 685.7258270263671, |
| "learning_rate": 0.0005680149810876322, |
| "loss": 1259.1618, |
| "step": 4620 |
| }, |
| { |
| "ce_loss_10": 3.5307737231254577, |
| "ce_loss_13": 3.448805606365204, |
| "ce_loss_2": 4.553147649765014, |
| "ce_loss_3": 4.283793473243714, |
| "ce_loss_7": 3.720176661014557, |
| "epoch": 0.463, |
| "grad_norm": 632.0, |
| "kl_loss_10": 191.36487274169923, |
| "kl_loss_2": 2267.567822265625, |
| "kl_loss_3": 1802.5030578613282, |
| "kl_loss_7": 648.5958099365234, |
| "learning_rate": 0.0005664427267851271, |
| "loss": 1217.3594, |
| "step": 4630 |
| }, |
| { |
| "ce_loss_10": 3.4447478532791136, |
| "ce_loss_13": 3.362277901172638, |
| "ce_loss_2": 4.474937617778778, |
| "ce_loss_3": 4.203511357307434, |
| "ce_loss_7": 3.640981078147888, |
| "epoch": 0.464, |
| "grad_norm": 616.0, |
| "kl_loss_10": 189.61345367431642, |
| "kl_loss_2": 2284.305810546875, |
| "kl_loss_3": 1801.5720520019531, |
| "kl_loss_7": 647.2827972412109, |
| "learning_rate": 0.0005648698034051009, |
| "loss": 1216.2738, |
| "step": 4640 |
| }, |
| { |
| "ce_loss_10": 3.5612680554389953, |
| "ce_loss_13": 3.479226899147034, |
| "ce_loss_2": 4.606190347671509, |
| "ce_loss_3": 4.343288254737854, |
| "ce_loss_7": 3.7559192776679993, |
| "epoch": 0.465, |
| "grad_norm": 680.0, |
| "kl_loss_10": 189.31488800048828, |
| "kl_loss_2": 2300.2595642089846, |
| "kl_loss_3": 1835.63125, |
| "kl_loss_7": 647.0011413574218, |
| "learning_rate": 0.0005632962267868747, |
| "loss": 1204.3232, |
| "step": 4650 |
| }, |
| { |
| "ce_loss_10": 3.504312825202942, |
| "ce_loss_13": 3.4246782064437866, |
| "ce_loss_2": 4.501714015007019, |
| "ce_loss_3": 4.243509244918823, |
| "ce_loss_7": 3.6963974952697756, |
| "epoch": 0.466, |
| "grad_norm": 656.0, |
| "kl_loss_10": 184.82376022338866, |
| "kl_loss_2": 2221.081378173828, |
| "kl_loss_3": 1770.11845703125, |
| "kl_loss_7": 636.5809020996094, |
| "learning_rate": 0.0005617220127763474, |
| "loss": 1219.1382, |
| "step": 4660 |
| }, |
| { |
| "ce_loss_10": 3.578074049949646, |
| "ce_loss_13": 3.497161865234375, |
| "ce_loss_2": 4.561417579650879, |
| "ce_loss_3": 4.303230881690979, |
| "ce_loss_7": 3.7666924834251403, |
| "epoch": 0.467, |
| "grad_norm": 592.0, |
| "kl_loss_10": 188.17724151611327, |
| "kl_loss_2": 2198.6551513671875, |
| "kl_loss_3": 1739.3878234863282, |
| "kl_loss_7": 638.6716613769531, |
| "learning_rate": 0.0005601471772258368, |
| "loss": 1209.8152, |
| "step": 4670 |
| }, |
| { |
| "ce_loss_10": 3.5602858781814577, |
| "ce_loss_13": 3.4812931418418884, |
| "ce_loss_2": 4.544128322601319, |
| "ce_loss_3": 4.283940744400025, |
| "ce_loss_7": 3.750748324394226, |
| "epoch": 0.468, |
| "grad_norm": 684.0, |
| "kl_loss_10": 186.29373779296876, |
| "kl_loss_2": 2186.011083984375, |
| "kl_loss_3": 1724.9905029296874, |
| "kl_loss_7": 634.5341583251953, |
| "learning_rate": 0.0005585717359939192, |
| "loss": 1216.8666, |
| "step": 4680 |
| }, |
| { |
| "ce_loss_10": 3.47387490272522, |
| "ce_loss_13": 3.3916377425193787, |
| "ce_loss_2": 4.47896523475647, |
| "ce_loss_3": 4.213813447952271, |
| "ce_loss_7": 3.6638592004776003, |
| "epoch": 0.469, |
| "grad_norm": 736.0, |
| "kl_loss_10": 187.3494743347168, |
| "kl_loss_2": 2222.502734375, |
| "kl_loss_3": 1755.6538635253905, |
| "kl_loss_7": 638.4273468017578, |
| "learning_rate": 0.0005569957049452703, |
| "loss": 1235.6265, |
| "step": 4690 |
| }, |
| { |
| "ce_loss_10": 3.530002760887146, |
| "ce_loss_13": 3.4474871516227723, |
| "ce_loss_2": 4.558400893211365, |
| "ce_loss_3": 4.2877805709838865, |
| "ce_loss_7": 3.7245721340179445, |
| "epoch": 0.47, |
| "grad_norm": 704.0, |
| "kl_loss_10": 192.37612838745116, |
| "kl_loss_2": 2285.8403198242186, |
| "kl_loss_3": 1808.7154968261718, |
| "kl_loss_7": 653.8845581054687, |
| "learning_rate": 0.0005554190999505056, |
| "loss": 1234.8666, |
| "step": 4700 |
| }, |
| { |
| "ce_loss_10": 3.655286133289337, |
| "ce_loss_13": 3.5717312812805178, |
| "ce_loss_2": 4.666804194450378, |
| "ce_loss_3": 4.405120444297791, |
| "ce_loss_7": 3.852936863899231, |
| "epoch": 0.471, |
| "grad_norm": 612.0, |
| "kl_loss_10": 194.36407165527345, |
| "kl_loss_2": 2267.82900390625, |
| "kl_loss_3": 1798.198681640625, |
| "kl_loss_7": 661.5685516357422, |
| "learning_rate": 0.0005538419368860196, |
| "loss": 1183.023, |
| "step": 4710 |
| }, |
| { |
| "ce_loss_10": 3.5788578033447265, |
| "ce_loss_13": 3.498483991622925, |
| "ce_loss_2": 4.574405527114868, |
| "ce_loss_3": 4.313237249851227, |
| "ce_loss_7": 3.768782043457031, |
| "epoch": 0.472, |
| "grad_norm": 600.0, |
| "kl_loss_10": 190.92964248657228, |
| "kl_loss_2": 2231.130651855469, |
| "kl_loss_3": 1765.2990478515626, |
| "kl_loss_7": 643.7991912841796, |
| "learning_rate": 0.0005522642316338268, |
| "loss": 1233.693, |
| "step": 4720 |
| }, |
| { |
| "ce_loss_10": 3.581640887260437, |
| "ce_loss_13": 3.5026119351387024, |
| "ce_loss_2": 4.585021948814392, |
| "ce_loss_3": 4.325532901287079, |
| "ce_loss_7": 3.7722782731056212, |
| "epoch": 0.473, |
| "grad_norm": 608.0, |
| "kl_loss_10": 190.94201431274413, |
| "kl_loss_2": 2235.0365600585938, |
| "kl_loss_3": 1776.091015625, |
| "kl_loss_7": 644.9752258300781, |
| "learning_rate": 0.0005506860000814017, |
| "loss": 1245.2729, |
| "step": 4730 |
| }, |
| { |
| "ce_loss_10": 3.609380042552948, |
| "ce_loss_13": 3.5285964608192444, |
| "ce_loss_2": 4.5732040166854855, |
| "ce_loss_3": 4.316051697731018, |
| "ce_loss_7": 3.7950204849243163, |
| "epoch": 0.474, |
| "grad_norm": 624.0, |
| "kl_loss_10": 185.59933853149414, |
| "kl_loss_2": 2152.8808044433595, |
| "kl_loss_3": 1698.98193359375, |
| "kl_loss_7": 630.6379913330078, |
| "learning_rate": 0.0005491072581215186, |
| "loss": 1197.5367, |
| "step": 4740 |
| }, |
| { |
| "ce_loss_10": 3.6150610566139223, |
| "ce_loss_13": 3.5275300979614257, |
| "ce_loss_2": 4.606984066963196, |
| "ce_loss_3": 4.331383717060089, |
| "ce_loss_7": 3.8067922830581664, |
| "epoch": 0.475, |
| "grad_norm": 636.0, |
| "kl_loss_10": 196.42518692016603, |
| "kl_loss_2": 2246.8279663085937, |
| "kl_loss_3": 1758.9309448242188, |
| "kl_loss_7": 653.222998046875, |
| "learning_rate": 0.0005475280216520913, |
| "loss": 1187.7061, |
| "step": 4750 |
| }, |
| { |
| "ce_loss_10": 3.5246535778045653, |
| "ce_loss_13": 3.4453783988952638, |
| "ce_loss_2": 4.515398740768433, |
| "ce_loss_3": 4.251828491687775, |
| "ce_loss_7": 3.7125940799713133, |
| "epoch": 0.476, |
| "grad_norm": 660.0, |
| "kl_loss_10": 186.9199966430664, |
| "kl_loss_2": 2199.839562988281, |
| "kl_loss_3": 1734.392041015625, |
| "kl_loss_7": 632.1179809570312, |
| "learning_rate": 0.0005459483065760138, |
| "loss": 1229.7142, |
| "step": 4760 |
| }, |
| { |
| "ce_loss_10": 3.4620707392692567, |
| "ce_loss_13": 3.379168164730072, |
| "ce_loss_2": 4.535378384590149, |
| "ce_loss_3": 4.269984316825867, |
| "ce_loss_7": 3.66820707321167, |
| "epoch": 0.477, |
| "grad_norm": 668.0, |
| "kl_loss_10": 189.84198379516602, |
| "kl_loss_2": 2346.7093200683594, |
| "kl_loss_3": 1881.5502380371095, |
| "kl_loss_7": 655.4429504394532, |
| "learning_rate": 0.0005443681288009991, |
| "loss": 1231.516, |
| "step": 4770 |
| }, |
| { |
| "ce_loss_10": 3.5201017260551453, |
| "ce_loss_13": 3.4394583463668824, |
| "ce_loss_2": 4.551519656181336, |
| "ce_loss_3": 4.275486898422241, |
| "ce_loss_7": 3.712023985385895, |
| "epoch": 0.478, |
| "grad_norm": 560.0, |
| "kl_loss_10": 188.47934265136718, |
| "kl_loss_2": 2298.261828613281, |
| "kl_loss_3": 1807.319403076172, |
| "kl_loss_7": 646.2581420898438, |
| "learning_rate": 0.0005427875042394199, |
| "loss": 1231.2074, |
| "step": 4780 |
| }, |
| { |
| "ce_loss_10": 3.5546525955200194, |
| "ce_loss_13": 3.4689871072769165, |
| "ce_loss_2": 4.55171308517456, |
| "ce_loss_3": 4.2830651044845585, |
| "ce_loss_7": 3.7494895100593566, |
| "epoch": 0.479, |
| "grad_norm": 568.0, |
| "kl_loss_10": 193.1684341430664, |
| "kl_loss_2": 2223.558709716797, |
| "kl_loss_3": 1744.958233642578, |
| "kl_loss_7": 652.0952423095703, |
| "learning_rate": 0.0005412064488081482, |
| "loss": 1232.2334, |
| "step": 4790 |
| }, |
| { |
| "ce_loss_10": 3.560468685626984, |
| "ce_loss_13": 3.4794244885444643, |
| "ce_loss_2": 4.549170875549317, |
| "ce_loss_3": 4.280433797836304, |
| "ce_loss_7": 3.744898808002472, |
| "epoch": 0.48, |
| "grad_norm": 548.0, |
| "kl_loss_10": 188.24676589965821, |
| "kl_loss_2": 2217.6575561523437, |
| "kl_loss_3": 1743.2784423828125, |
| "kl_loss_7": 636.4865295410157, |
| "learning_rate": 0.0005396249784283942, |
| "loss": 1197.0651, |
| "step": 4800 |
| }, |
| { |
| "ce_loss_10": 3.575687527656555, |
| "ce_loss_13": 3.4918730735778807, |
| "ce_loss_2": 4.614717435836792, |
| "ce_loss_3": 4.347899675369263, |
| "ce_loss_7": 3.7766286730766296, |
| "epoch": 0.481, |
| "grad_norm": 592.0, |
| "kl_loss_10": 195.0629508972168, |
| "kl_loss_2": 2307.5621826171873, |
| "kl_loss_3": 1836.4153686523437, |
| "kl_loss_7": 665.144580078125, |
| "learning_rate": 0.0005380431090255476, |
| "loss": 1235.3045, |
| "step": 4810 |
| }, |
| { |
| "ce_loss_10": 3.565406787395477, |
| "ce_loss_13": 3.487363612651825, |
| "ce_loss_2": 4.546458888053894, |
| "ce_loss_3": 4.2899955153465275, |
| "ce_loss_7": 3.747445857524872, |
| "epoch": 0.482, |
| "grad_norm": 608.0, |
| "kl_loss_10": 183.49071578979493, |
| "kl_loss_2": 2200.6481811523436, |
| "kl_loss_3": 1737.9393493652344, |
| "kl_loss_7": 622.3307556152344, |
| "learning_rate": 0.0005364608565290155, |
| "loss": 1189.2841, |
| "step": 4820 |
| }, |
| { |
| "ce_loss_10": 3.5748016953468325, |
| "ce_loss_13": 3.493953990936279, |
| "ce_loss_2": 4.58906729221344, |
| "ce_loss_3": 4.324404489994049, |
| "ce_loss_7": 3.7643205761909484, |
| "epoch": 0.483, |
| "grad_norm": 640.0, |
| "kl_loss_10": 190.96404800415038, |
| "kl_loss_2": 2251.1127075195313, |
| "kl_loss_3": 1785.181817626953, |
| "kl_loss_7": 641.9654663085937, |
| "learning_rate": 0.0005348782368720626, |
| "loss": 1217.6031, |
| "step": 4830 |
| }, |
| { |
| "ce_loss_10": 3.5082598328590393, |
| "ce_loss_13": 3.427862787246704, |
| "ce_loss_2": 4.508589172363282, |
| "ce_loss_3": 4.243466067314148, |
| "ce_loss_7": 3.6949036836624147, |
| "epoch": 0.484, |
| "grad_norm": 560.0, |
| "kl_loss_10": 186.74840545654297, |
| "kl_loss_2": 2224.133184814453, |
| "kl_loss_3": 1753.7203369140625, |
| "kl_loss_7": 630.4135833740235, |
| "learning_rate": 0.000533295265991652, |
| "loss": 1216.8205, |
| "step": 4840 |
| }, |
| { |
| "ce_loss_10": 3.5815645456314087, |
| "ce_loss_13": 3.4982495427131655, |
| "ce_loss_2": 4.554982018470764, |
| "ce_loss_3": 4.299691355228424, |
| "ce_loss_7": 3.7735238790512087, |
| "epoch": 0.485, |
| "grad_norm": 584.0, |
| "kl_loss_10": 187.77717666625978, |
| "kl_loss_2": 2175.573095703125, |
| "kl_loss_3": 1715.7588928222656, |
| "kl_loss_7": 631.838656616211, |
| "learning_rate": 0.0005317119598282822, |
| "loss": 1183.9046, |
| "step": 4850 |
| }, |
| { |
| "ce_loss_10": 3.586243951320648, |
| "ce_loss_13": 3.5034523725509645, |
| "ce_loss_2": 4.583103036880493, |
| "ce_loss_3": 4.312316799163819, |
| "ce_loss_7": 3.777419722080231, |
| "epoch": 0.486, |
| "grad_norm": 648.0, |
| "kl_loss_10": 189.01727676391602, |
| "kl_loss_2": 2203.9095703125, |
| "kl_loss_3": 1739.787420654297, |
| "kl_loss_7": 638.8051147460938, |
| "learning_rate": 0.0005301283343258293, |
| "loss": 1199.0793, |
| "step": 4860 |
| }, |
| { |
| "ce_loss_10": 3.644785749912262, |
| "ce_loss_13": 3.563555288314819, |
| "ce_loss_2": 4.610913848876953, |
| "ce_loss_3": 4.34997011423111, |
| "ce_loss_7": 3.832633006572723, |
| "epoch": 0.487, |
| "grad_norm": 648.0, |
| "kl_loss_10": 187.26018371582032, |
| "kl_loss_2": 2164.9867431640623, |
| "kl_loss_3": 1703.8299499511718, |
| "kl_loss_7": 629.719970703125, |
| "learning_rate": 0.000528544405431384, |
| "loss": 1174.2795, |
| "step": 4870 |
| }, |
| { |
| "ce_loss_10": 3.5308486342430117, |
| "ce_loss_13": 3.4465184569358827, |
| "ce_loss_2": 4.54223735332489, |
| "ce_loss_3": 4.275557327270508, |
| "ce_loss_7": 3.728735053539276, |
| "epoch": 0.488, |
| "grad_norm": 692.0, |
| "kl_loss_10": 194.1014518737793, |
| "kl_loss_2": 2267.4865783691407, |
| "kl_loss_3": 1794.8980224609375, |
| "kl_loss_7": 653.2649841308594, |
| "learning_rate": 0.000526960189095093, |
| "loss": 1222.7201, |
| "step": 4880 |
| }, |
| { |
| "ce_loss_10": 3.5016911029815674, |
| "ce_loss_13": 3.422479736804962, |
| "ce_loss_2": 4.5065477132797245, |
| "ce_loss_3": 4.244668066501617, |
| "ce_loss_7": 3.6958303570747377, |
| "epoch": 0.489, |
| "grad_norm": 624.0, |
| "kl_loss_10": 185.53594131469725, |
| "kl_loss_2": 2219.627575683594, |
| "kl_loss_3": 1760.8566650390626, |
| "kl_loss_7": 633.8929748535156, |
| "learning_rate": 0.0005253757012699972, |
| "loss": 1199.7284, |
| "step": 4890 |
| }, |
| { |
| "ce_loss_10": 3.592365336418152, |
| "ce_loss_13": 3.5133956909179687, |
| "ce_loss_2": 4.582755160331726, |
| "ce_loss_3": 4.310234916210175, |
| "ce_loss_7": 3.779422330856323, |
| "epoch": 0.49, |
| "grad_norm": 608.0, |
| "kl_loss_10": 188.63387451171874, |
| "kl_loss_2": 2196.80634765625, |
| "kl_loss_3": 1721.877880859375, |
| "kl_loss_7": 628.9239440917969, |
| "learning_rate": 0.0005237909579118712, |
| "loss": 1209.9893, |
| "step": 4900 |
| }, |
| { |
| "ce_loss_10": 3.5542251110076903, |
| "ce_loss_13": 3.470878171920776, |
| "ce_loss_2": 4.575217127799988, |
| "ce_loss_3": 4.311789894104004, |
| "ce_loss_7": 3.748956894874573, |
| "epoch": 0.491, |
| "grad_norm": 688.0, |
| "kl_loss_10": 192.452791595459, |
| "kl_loss_2": 2289.5204833984376, |
| "kl_loss_3": 1820.7321044921875, |
| "kl_loss_7": 654.6072784423828, |
| "learning_rate": 0.0005222059749790631, |
| "loss": 1232.3309, |
| "step": 4910 |
| }, |
| { |
| "ce_loss_10": 3.6172361254692076, |
| "ce_loss_13": 3.538671875, |
| "ce_loss_2": 4.572561645507813, |
| "ce_loss_3": 4.3095086216926575, |
| "ce_loss_7": 3.7990201711654663, |
| "epoch": 0.492, |
| "grad_norm": 580.0, |
| "kl_loss_10": 186.14958953857422, |
| "kl_loss_2": 2152.0723571777344, |
| "kl_loss_3": 1686.5931701660156, |
| "kl_loss_7": 627.8851165771484, |
| "learning_rate": 0.0005206207684323337, |
| "loss": 1161.1154, |
| "step": 4920 |
| }, |
| { |
| "ce_loss_10": 3.597834038734436, |
| "ce_loss_13": 3.5186328291893005, |
| "ce_loss_2": 4.576415348052978, |
| "ce_loss_3": 4.318524956703186, |
| "ce_loss_7": 3.7833918571472167, |
| "epoch": 0.493, |
| "grad_norm": 680.0, |
| "kl_loss_10": 190.28093643188475, |
| "kl_loss_2": 2205.189178466797, |
| "kl_loss_3": 1744.895166015625, |
| "kl_loss_7": 637.9018249511719, |
| "learning_rate": 0.000519035354234695, |
| "loss": 1221.5055, |
| "step": 4930 |
| }, |
| { |
| "ce_loss_10": 3.5777213335037232, |
| "ce_loss_13": 3.4926177620887757, |
| "ce_loss_2": 4.569476509094239, |
| "ce_loss_3": 4.300305211544037, |
| "ce_loss_7": 3.7719446539878847, |
| "epoch": 0.494, |
| "grad_norm": 652.0, |
| "kl_loss_10": 191.98795700073242, |
| "kl_loss_2": 2217.8314697265623, |
| "kl_loss_3": 1735.119366455078, |
| "kl_loss_7": 648.0345703125, |
| "learning_rate": 0.0005174497483512506, |
| "loss": 1188.0275, |
| "step": 4940 |
| }, |
| { |
| "ce_loss_10": 3.617672252655029, |
| "ce_loss_13": 3.5411610841751098, |
| "ce_loss_2": 4.595355463027954, |
| "ce_loss_3": 4.32707976102829, |
| "ce_loss_7": 3.8022171378135683, |
| "epoch": 0.495, |
| "grad_norm": 704.0, |
| "kl_loss_10": 185.97076797485352, |
| "kl_loss_2": 2193.0468505859376, |
| "kl_loss_3": 1726.8812622070313, |
| "kl_loss_7": 638.2306701660157, |
| "learning_rate": 0.0005158639667490339, |
| "loss": 1220.6553, |
| "step": 4950 |
| }, |
| { |
| "ce_loss_10": 3.5151694416999817, |
| "ce_loss_13": 3.4326966643333434, |
| "ce_loss_2": 4.5227725267410275, |
| "ce_loss_3": 4.255635476112365, |
| "ce_loss_7": 3.710303211212158, |
| "epoch": 0.496, |
| "grad_norm": 632.0, |
| "kl_loss_10": 189.1722068786621, |
| "kl_loss_2": 2227.908306884766, |
| "kl_loss_3": 1751.7201293945313, |
| "kl_loss_7": 643.16630859375, |
| "learning_rate": 0.0005142780253968481, |
| "loss": 1203.2568, |
| "step": 4960 |
| }, |
| { |
| "ce_loss_10": 3.4694177746772765, |
| "ce_loss_13": 3.3919921875, |
| "ce_loss_2": 4.455039215087891, |
| "ce_loss_3": 4.192563462257385, |
| "ce_loss_7": 3.6608409881591797, |
| "epoch": 0.497, |
| "grad_norm": 672.0, |
| "kl_loss_10": 182.45398559570313, |
| "kl_loss_2": 2196.9568115234374, |
| "kl_loss_3": 1734.4176452636718, |
| "kl_loss_7": 624.9595611572265, |
| "learning_rate": 0.0005126919402651053, |
| "loss": 1165.1617, |
| "step": 4970 |
| }, |
| { |
| "ce_loss_10": 3.5411869525909423, |
| "ce_loss_13": 3.4560129284858703, |
| "ce_loss_2": 4.551884508132934, |
| "ce_loss_3": 4.285728931427002, |
| "ce_loss_7": 3.730460357666016, |
| "epoch": 0.498, |
| "grad_norm": 612.0, |
| "kl_loss_10": 190.1128143310547, |
| "kl_loss_2": 2234.540148925781, |
| "kl_loss_3": 1768.0274841308594, |
| "kl_loss_7": 642.9938751220703, |
| "learning_rate": 0.0005111057273256647, |
| "loss": 1218.0719, |
| "step": 4980 |
| }, |
| { |
| "ce_loss_10": 3.640482187271118, |
| "ce_loss_13": 3.563759708404541, |
| "ce_loss_2": 4.559627389907837, |
| "ce_loss_3": 4.304729497432708, |
| "ce_loss_7": 3.809755790233612, |
| "epoch": 0.499, |
| "grad_norm": 600.0, |
| "kl_loss_10": 181.2877067565918, |
| "kl_loss_2": 2076.4399169921876, |
| "kl_loss_3": 1633.4311096191407, |
| "kl_loss_7": 606.3493682861329, |
| "learning_rate": 0.0005095194025516733, |
| "loss": 1149.4782, |
| "step": 4990 |
| }, |
| { |
| "ce_loss_10": 3.561459171772003, |
| "ce_loss_13": 3.485899102687836, |
| "ce_loss_2": 4.532869434356689, |
| "ce_loss_3": 4.273192000389099, |
| "ce_loss_7": 3.7427910447120665, |
| "epoch": 0.5, |
| "grad_norm": 612.0, |
| "kl_loss_10": 182.62270965576172, |
| "kl_loss_2": 2161.449591064453, |
| "kl_loss_3": 1706.75859375, |
| "kl_loss_7": 617.4605316162109, |
| "learning_rate": 0.000507932981917404, |
| "loss": 1217.3309, |
| "step": 5000 |
| }, |
| { |
| "ce_loss_10": 3.518963348865509, |
| "ce_loss_13": 3.4364115476608275, |
| "ce_loss_2": 4.566620469093323, |
| "ce_loss_3": 4.296507096290588, |
| "ce_loss_7": 3.7167017698287963, |
| "epoch": 0.501, |
| "grad_norm": 604.0, |
| "kl_loss_10": 191.43318862915038, |
| "kl_loss_2": 2312.919299316406, |
| "kl_loss_3": 1835.5362060546875, |
| "kl_loss_7": 654.5825164794921, |
| "learning_rate": 0.0005063464813980949, |
| "loss": 1243.5809, |
| "step": 5010 |
| }, |
| { |
| "ce_loss_10": 3.503278911113739, |
| "ce_loss_13": 3.423468828201294, |
| "ce_loss_2": 4.508650994300842, |
| "ce_loss_3": 4.244243478775024, |
| "ce_loss_7": 3.6842400670051574, |
| "epoch": 0.502, |
| "grad_norm": 616.0, |
| "kl_loss_10": 187.45429153442382, |
| "kl_loss_2": 2242.6967956542967, |
| "kl_loss_3": 1780.9238586425781, |
| "kl_loss_7": 636.0354858398438, |
| "learning_rate": 0.0005047599169697884, |
| "loss": 1195.7843, |
| "step": 5020 |
| }, |
| { |
| "ce_loss_10": 3.4397648930549622, |
| "ce_loss_13": 3.357620894908905, |
| "ce_loss_2": 4.463168692588806, |
| "ce_loss_3": 4.195060646533966, |
| "ce_loss_7": 3.633397877216339, |
| "epoch": 0.503, |
| "grad_norm": 604.0, |
| "kl_loss_10": 185.41551361083984, |
| "kl_loss_2": 2258.051135253906, |
| "kl_loss_3": 1778.3387390136718, |
| "kl_loss_7": 635.239013671875, |
| "learning_rate": 0.000503173304609171, |
| "loss": 1183.8663, |
| "step": 5030 |
| }, |
| { |
| "ce_loss_10": 3.5603776931762696, |
| "ce_loss_13": 3.4799546360969544, |
| "ce_loss_2": 4.5456082105636595, |
| "ce_loss_3": 4.285192847251892, |
| "ce_loss_7": 3.7480576753616335, |
| "epoch": 0.504, |
| "grad_norm": 656.0, |
| "kl_loss_10": 184.81720504760742, |
| "kl_loss_2": 2170.377197265625, |
| "kl_loss_3": 1713.3125, |
| "kl_loss_7": 627.2051513671875, |
| "learning_rate": 0.0005015866602934111, |
| "loss": 1173.4953, |
| "step": 5040 |
| }, |
| { |
| "ce_loss_10": 3.5348097562789915, |
| "ce_loss_13": 3.4481786727905273, |
| "ce_loss_2": 4.561786007881165, |
| "ce_loss_3": 4.291987287998199, |
| "ce_loss_7": 3.732908022403717, |
| "epoch": 0.505, |
| "grad_norm": 584.0, |
| "kl_loss_10": 195.19094161987306, |
| "kl_loss_2": 2283.529962158203, |
| "kl_loss_3": 1808.7241088867188, |
| "kl_loss_7": 661.38330078125, |
| "learning_rate": 0.0005, |
| "loss": 1216.3971, |
| "step": 5050 |
| }, |
| { |
| "ce_loss_10": 3.5199029207229615, |
| "ce_loss_13": 3.4400954723358153, |
| "ce_loss_2": 4.522394108772278, |
| "ce_loss_3": 4.258548331260681, |
| "ce_loss_7": 3.7067763924598696, |
| "epoch": 0.506, |
| "grad_norm": 632.0, |
| "kl_loss_10": 190.6364860534668, |
| "kl_loss_2": 2246.9065795898437, |
| "kl_loss_3": 1774.8018188476562, |
| "kl_loss_7": 642.158203125, |
| "learning_rate": 0.0004984133397065889, |
| "loss": 1187.0591, |
| "step": 5060 |
| }, |
| { |
| "ce_loss_10": 3.529603970050812, |
| "ce_loss_13": 3.448684501647949, |
| "ce_loss_2": 4.540892434120178, |
| "ce_loss_3": 4.281192362308502, |
| "ce_loss_7": 3.727533829212189, |
| "epoch": 0.507, |
| "grad_norm": 572.0, |
| "kl_loss_10": 189.2146110534668, |
| "kl_loss_2": 2238.786248779297, |
| "kl_loss_3": 1779.2802124023438, |
| "kl_loss_7": 641.3925506591797, |
| "learning_rate": 0.0004968266953908291, |
| "loss": 1190.1465, |
| "step": 5070 |
| }, |
| { |
| "ce_loss_10": 3.5666260600090025, |
| "ce_loss_13": 3.486749768257141, |
| "ce_loss_2": 4.580948376655579, |
| "ce_loss_3": 4.316030120849609, |
| "ce_loss_7": 3.7590123891830443, |
| "epoch": 0.508, |
| "grad_norm": 608.0, |
| "kl_loss_10": 183.170157623291, |
| "kl_loss_2": 2245.8196716308594, |
| "kl_loss_3": 1773.8548583984375, |
| "kl_loss_7": 630.8032928466797, |
| "learning_rate": 0.0004952400830302117, |
| "loss": 1205.3312, |
| "step": 5080 |
| }, |
| { |
| "ce_loss_10": 3.4943687319755554, |
| "ce_loss_13": 3.4131668329238893, |
| "ce_loss_2": 4.523447823524475, |
| "ce_loss_3": 4.255696547031403, |
| "ce_loss_7": 3.686564898490906, |
| "epoch": 0.509, |
| "grad_norm": 624.0, |
| "kl_loss_10": 190.01820449829103, |
| "kl_loss_2": 2279.890344238281, |
| "kl_loss_3": 1807.6574096679688, |
| "kl_loss_7": 647.3827026367187, |
| "learning_rate": 0.0004936535186019053, |
| "loss": 1207.5289, |
| "step": 5090 |
| }, |
| { |
| "ce_loss_10": 3.5966561436653137, |
| "ce_loss_13": 3.5205499291419984, |
| "ce_loss_2": 4.557806515693665, |
| "ce_loss_3": 4.297008419036866, |
| "ce_loss_7": 3.777693784236908, |
| "epoch": 0.51, |
| "grad_norm": 572.0, |
| "kl_loss_10": 181.29688186645507, |
| "kl_loss_2": 2148.9375854492187, |
| "kl_loss_3": 1687.6425170898438, |
| "kl_loss_7": 609.7850677490235, |
| "learning_rate": 0.000492067018082596, |
| "loss": 1180.1517, |
| "step": 5100 |
| }, |
| { |
| "ce_loss_10": 3.5341065168380736, |
| "ce_loss_13": 3.448958945274353, |
| "ce_loss_2": 4.584142446517944, |
| "ce_loss_3": 4.311909413337707, |
| "ce_loss_7": 3.7378315210342405, |
| "epoch": 0.511, |
| "grad_norm": 580.0, |
| "kl_loss_10": 191.75616531372071, |
| "kl_loss_2": 2313.8392028808594, |
| "kl_loss_3": 1838.1626281738281, |
| "kl_loss_7": 657.9944549560547, |
| "learning_rate": 0.0004904805974483267, |
| "loss": 1252.0359, |
| "step": 5110 |
| }, |
| { |
| "ce_loss_10": 3.6478444814682005, |
| "ce_loss_13": 3.5622426509857177, |
| "ce_loss_2": 4.652135348320007, |
| "ce_loss_3": 4.385519003868103, |
| "ce_loss_7": 3.8461916565895082, |
| "epoch": 0.512, |
| "grad_norm": 620.0, |
| "kl_loss_10": 196.4123405456543, |
| "kl_loss_2": 2261.5567626953125, |
| "kl_loss_3": 1794.3322509765626, |
| "kl_loss_7": 663.2056915283204, |
| "learning_rate": 0.0004888942726743353, |
| "loss": 1254.773, |
| "step": 5120 |
| }, |
| { |
| "ce_loss_10": 3.5161622405052184, |
| "ce_loss_13": 3.435041069984436, |
| "ce_loss_2": 4.527030563354492, |
| "ce_loss_3": 4.273452854156494, |
| "ce_loss_7": 3.7132395029067995, |
| "epoch": 0.513, |
| "grad_norm": 612.0, |
| "kl_loss_10": 189.22552337646485, |
| "kl_loss_2": 2261.9498779296873, |
| "kl_loss_3": 1801.7636291503907, |
| "kl_loss_7": 649.0409454345703, |
| "learning_rate": 0.0004873080597348947, |
| "loss": 1220.4549, |
| "step": 5130 |
| }, |
| { |
| "ce_loss_10": 3.4059476256370544, |
| "ce_loss_13": 3.325529730319977, |
| "ce_loss_2": 4.472175240516663, |
| "ce_loss_3": 4.212775444984436, |
| "ce_loss_7": 3.6100828886032104, |
| "epoch": 0.514, |
| "grad_norm": 576.0, |
| "kl_loss_10": 188.01781005859374, |
| "kl_loss_2": 2348.922229003906, |
| "kl_loss_3": 1884.4989135742187, |
| "kl_loss_7": 653.7215118408203, |
| "learning_rate": 0.0004857219746031519, |
| "loss": 1228.3554, |
| "step": 5140 |
| }, |
| { |
| "ce_loss_10": 3.5706036925315856, |
| "ce_loss_13": 3.4925912499427794, |
| "ce_loss_2": 4.564787793159485, |
| "ce_loss_3": 4.288926684856415, |
| "ce_loss_7": 3.7588186025619508, |
| "epoch": 0.515, |
| "grad_norm": 564.0, |
| "kl_loss_10": 187.27239913940429, |
| "kl_loss_2": 2197.20703125, |
| "kl_loss_3": 1721.3864501953126, |
| "kl_loss_7": 633.3875030517578, |
| "learning_rate": 0.0004841360332509663, |
| "loss": 1198.5317, |
| "step": 5150 |
| }, |
| { |
| "ce_loss_10": 3.5291930079460143, |
| "ce_loss_13": 3.451045203208923, |
| "ce_loss_2": 4.509535562992096, |
| "ce_loss_3": 4.244547712802887, |
| "ce_loss_7": 3.7146947622299193, |
| "epoch": 0.516, |
| "grad_norm": 640.0, |
| "kl_loss_10": 182.93116302490233, |
| "kl_loss_2": 2188.2712890625, |
| "kl_loss_3": 1720.3389709472656, |
| "kl_loss_7": 621.1425567626953, |
| "learning_rate": 0.0004825502516487497, |
| "loss": 1155.4164, |
| "step": 5160 |
| }, |
| { |
| "ce_loss_10": 3.494162142276764, |
| "ce_loss_13": 3.410848069190979, |
| "ce_loss_2": 4.509466361999512, |
| "ce_loss_3": 4.249596023559571, |
| "ce_loss_7": 3.689158225059509, |
| "epoch": 0.517, |
| "grad_norm": 776.0, |
| "kl_loss_10": 188.6508804321289, |
| "kl_loss_2": 2267.020611572266, |
| "kl_loss_3": 1803.9957458496094, |
| "kl_loss_7": 643.8313232421875, |
| "learning_rate": 0.00048096464576530507, |
| "loss": 1222.8347, |
| "step": 5170 |
| }, |
| { |
| "ce_loss_10": 3.5969889640808104, |
| "ce_loss_13": 3.5190072774887087, |
| "ce_loss_2": 4.547854423522949, |
| "ce_loss_3": 4.293277430534363, |
| "ce_loss_7": 3.7762330770492554, |
| "epoch": 0.518, |
| "grad_norm": 620.0, |
| "kl_loss_10": 184.8033348083496, |
| "kl_loss_2": 2134.698907470703, |
| "kl_loss_3": 1683.638848876953, |
| "kl_loss_7": 620.0321014404296, |
| "learning_rate": 0.00047937923156766646, |
| "loss": 1168.0455, |
| "step": 5180 |
| }, |
| { |
| "ce_loss_10": 3.6420543789863586, |
| "ce_loss_13": 3.5626509547233582, |
| "ce_loss_2": 4.591393780708313, |
| "ce_loss_3": 4.326744735240936, |
| "ce_loss_7": 3.8211991429328918, |
| "epoch": 0.519, |
| "grad_norm": 560.0, |
| "kl_loss_10": 186.71140975952147, |
| "kl_loss_2": 2131.37041015625, |
| "kl_loss_3": 1673.548291015625, |
| "kl_loss_7": 620.3797760009766, |
| "learning_rate": 0.00047779402502093696, |
| "loss": 1176.4619, |
| "step": 5190 |
| }, |
| { |
| "ce_loss_10": 3.6047547817230225, |
| "ce_loss_13": 3.5276923775672913, |
| "ce_loss_2": 4.578773355484008, |
| "ce_loss_3": 4.314329183101654, |
| "ce_loss_7": 3.7894126772880554, |
| "epoch": 0.52, |
| "grad_norm": 572.0, |
| "kl_loss_10": 184.64933090209962, |
| "kl_loss_2": 2171.194439697266, |
| "kl_loss_3": 1701.0345581054687, |
| "kl_loss_7": 621.1681274414062, |
| "learning_rate": 0.0004762090420881289, |
| "loss": 1192.2752, |
| "step": 5200 |
| }, |
| { |
| "ce_loss_10": 3.524991714954376, |
| "ce_loss_13": 3.449671447277069, |
| "ce_loss_2": 4.498011994361877, |
| "ce_loss_3": 4.23529599905014, |
| "ce_loss_7": 3.705312669277191, |
| "epoch": 0.521, |
| "grad_norm": 608.0, |
| "kl_loss_10": 186.35540390014648, |
| "kl_loss_2": 2183.97861328125, |
| "kl_loss_3": 1723.4856018066407, |
| "kl_loss_7": 620.2787628173828, |
| "learning_rate": 0.00047462429873000296, |
| "loss": 1166.6783, |
| "step": 5210 |
| }, |
| { |
| "ce_loss_10": 3.610927963256836, |
| "ce_loss_13": 3.5292730212211607, |
| "ce_loss_2": 4.586357808113098, |
| "ce_loss_3": 4.316179418563843, |
| "ce_loss_7": 3.7876657485961913, |
| "epoch": 0.522, |
| "grad_norm": 572.0, |
| "kl_loss_10": 187.58379135131835, |
| "kl_loss_2": 2205.0898559570314, |
| "kl_loss_3": 1728.4106079101562, |
| "kl_loss_7": 624.2286590576172, |
| "learning_rate": 0.0004730398109049071, |
| "loss": 1181.2787, |
| "step": 5220 |
| }, |
| { |
| "ce_loss_10": 3.542900788784027, |
| "ce_loss_13": 3.4592981576919555, |
| "ce_loss_2": 4.5604215383529665, |
| "ce_loss_3": 4.294589376449585, |
| "ce_loss_7": 3.7346125721931456, |
| "epoch": 0.523, |
| "grad_norm": 632.0, |
| "kl_loss_10": 192.22620544433593, |
| "kl_loss_2": 2275.4386779785154, |
| "kl_loss_3": 1804.1369689941407, |
| "kl_loss_7": 648.4010925292969, |
| "learning_rate": 0.000471455594568616, |
| "loss": 1206.5586, |
| "step": 5230 |
| }, |
| { |
| "ce_loss_10": 3.612694036960602, |
| "ce_loss_13": 3.5346154451370237, |
| "ce_loss_2": 4.571756148338318, |
| "ce_loss_3": 4.30354597568512, |
| "ce_loss_7": 3.7924134016036986, |
| "epoch": 0.524, |
| "grad_norm": 584.0, |
| "kl_loss_10": 184.57232360839845, |
| "kl_loss_2": 2148.5399963378904, |
| "kl_loss_3": 1679.7537841796875, |
| "kl_loss_7": 619.590966796875, |
| "learning_rate": 0.00046987166567417086, |
| "loss": 1185.6557, |
| "step": 5240 |
| }, |
| { |
| "ce_loss_10": 3.5288819313049316, |
| "ce_loss_13": 3.452391028404236, |
| "ce_loss_2": 4.524345111846924, |
| "ce_loss_3": 4.255153965950012, |
| "ce_loss_7": 3.7151949644088744, |
| "epoch": 0.525, |
| "grad_norm": 640.0, |
| "kl_loss_10": 184.01749114990236, |
| "kl_loss_2": 2198.7858947753907, |
| "kl_loss_3": 1730.872314453125, |
| "kl_loss_7": 629.3970092773437, |
| "learning_rate": 0.00046828804017171776, |
| "loss": 1156.5996, |
| "step": 5250 |
| }, |
| { |
| "ce_loss_10": 3.5754063725471497, |
| "ce_loss_13": 3.4882086515426636, |
| "ce_loss_2": 4.589629459381103, |
| "ce_loss_3": 4.328830146789551, |
| "ce_loss_7": 3.7704445004463194, |
| "epoch": 0.526, |
| "grad_norm": 640.0, |
| "kl_loss_10": 189.38601303100586, |
| "kl_loss_2": 2242.6748046875, |
| "kl_loss_3": 1771.9065246582031, |
| "kl_loss_7": 637.7771270751953, |
| "learning_rate": 0.00046670473400834805, |
| "loss": 1218.8605, |
| "step": 5260 |
| }, |
| { |
| "ce_loss_10": 3.5049550890922547, |
| "ce_loss_13": 3.428373408317566, |
| "ce_loss_2": 4.489202237129211, |
| "ce_loss_3": 4.228802132606506, |
| "ce_loss_7": 3.686991608142853, |
| "epoch": 0.527, |
| "grad_norm": 580.0, |
| "kl_loss_10": 181.47291641235353, |
| "kl_loss_2": 2184.507427978516, |
| "kl_loss_3": 1721.9762329101563, |
| "kl_loss_7": 614.4228942871093, |
| "learning_rate": 0.00046512176312793734, |
| "loss": 1216.9304, |
| "step": 5270 |
| }, |
| { |
| "ce_loss_10": 3.497020888328552, |
| "ce_loss_13": 3.415910315513611, |
| "ce_loss_2": 4.500096344947815, |
| "ce_loss_3": 4.221375334262848, |
| "ce_loss_7": 3.6874555468559267, |
| "epoch": 0.528, |
| "grad_norm": 608.0, |
| "kl_loss_10": 183.7262046813965, |
| "kl_loss_2": 2223.9841369628907, |
| "kl_loss_3": 1744.9040588378907, |
| "kl_loss_7": 628.2290283203125, |
| "learning_rate": 0.00046353914347098467, |
| "loss": 1206.4577, |
| "step": 5280 |
| }, |
| { |
| "ce_loss_10": 3.5970619559288024, |
| "ce_loss_13": 3.5186134576797485, |
| "ce_loss_2": 4.588784885406494, |
| "ce_loss_3": 4.328100037574768, |
| "ce_loss_7": 3.7806106090545653, |
| "epoch": 0.529, |
| "grad_norm": 608.0, |
| "kl_loss_10": 183.81845779418944, |
| "kl_loss_2": 2204.89072265625, |
| "kl_loss_3": 1738.6330078125, |
| "kl_loss_7": 622.7282592773438, |
| "learning_rate": 0.0004619568909744524, |
| "loss": 1214.3289, |
| "step": 5290 |
| }, |
| { |
| "ce_loss_10": 3.5965808272361754, |
| "ce_loss_13": 3.519477891921997, |
| "ce_loss_2": 4.575191998481751, |
| "ce_loss_3": 4.308115267753601, |
| "ce_loss_7": 3.779456090927124, |
| "epoch": 0.53, |
| "grad_norm": 624.0, |
| "kl_loss_10": 185.90534057617188, |
| "kl_loss_2": 2166.9622314453127, |
| "kl_loss_3": 1701.3540832519532, |
| "kl_loss_7": 623.1412811279297, |
| "learning_rate": 0.00046037502157160573, |
| "loss": 1194.0631, |
| "step": 5300 |
| }, |
| { |
| "ce_loss_10": 3.475346398353577, |
| "ce_loss_13": 3.3953770637512206, |
| "ce_loss_2": 4.472927665710449, |
| "ce_loss_3": 4.211131680011749, |
| "ce_loss_7": 3.672848129272461, |
| "epoch": 0.531, |
| "grad_norm": 608.0, |
| "kl_loss_10": 188.33962783813476, |
| "kl_loss_2": 2232.2967163085937, |
| "kl_loss_3": 1767.3839172363282, |
| "kl_loss_7": 649.8835662841797, |
| "learning_rate": 0.00045879355119185207, |
| "loss": 1212.3993, |
| "step": 5310 |
| }, |
| { |
| "ce_loss_10": 3.555951988697052, |
| "ce_loss_13": 3.474162495136261, |
| "ce_loss_2": 4.560364985466004, |
| "ce_loss_3": 4.293745231628418, |
| "ce_loss_7": 3.751049613952637, |
| "epoch": 0.532, |
| "grad_norm": 672.0, |
| "kl_loss_10": 190.96983184814454, |
| "kl_loss_2": 2257.3020629882812, |
| "kl_loss_3": 1780.9093383789063, |
| "kl_loss_7": 650.6417663574218, |
| "learning_rate": 0.0004572124957605803, |
| "loss": 1223.1152, |
| "step": 5320 |
| }, |
| { |
| "ce_loss_10": 3.5723905324935914, |
| "ce_loss_13": 3.492247462272644, |
| "ce_loss_2": 4.554775309562683, |
| "ce_loss_3": 4.289375352859497, |
| "ce_loss_7": 3.7621920228004457, |
| "epoch": 0.533, |
| "grad_norm": 584.0, |
| "kl_loss_10": 185.00704040527344, |
| "kl_loss_2": 2210.7857055664062, |
| "kl_loss_3": 1738.2788696289062, |
| "kl_loss_7": 631.7678100585938, |
| "learning_rate": 0.00045563187119900103, |
| "loss": 1171.3742, |
| "step": 5330 |
| }, |
| { |
| "ce_loss_10": 3.4156481266021728, |
| "ce_loss_13": 3.338289904594421, |
| "ce_loss_2": 4.459405374526978, |
| "ce_loss_3": 4.184291207790375, |
| "ce_loss_7": 3.612143576145172, |
| "epoch": 0.534, |
| "grad_norm": 668.0, |
| "kl_loss_10": 185.77383117675782, |
| "kl_loss_2": 2280.9642578125, |
| "kl_loss_3": 1801.5166015625, |
| "kl_loss_7": 637.4973205566406, |
| "learning_rate": 0.00045405169342398633, |
| "loss": 1214.5622, |
| "step": 5340 |
| }, |
| { |
| "ce_loss_10": 3.5048020482063293, |
| "ce_loss_13": 3.422155427932739, |
| "ce_loss_2": 4.527113747596741, |
| "ce_loss_3": 4.256860768795013, |
| "ce_loss_7": 3.6958253622055053, |
| "epoch": 0.535, |
| "grad_norm": 580.0, |
| "kl_loss_10": 188.63988189697267, |
| "kl_loss_2": 2252.499432373047, |
| "kl_loss_3": 1773.2778076171876, |
| "kl_loss_7": 633.0957061767579, |
| "learning_rate": 0.0004524719783479088, |
| "loss": 1187.9953, |
| "step": 5350 |
| }, |
| { |
| "ce_loss_10": 3.460780155658722, |
| "ce_loss_13": 3.378307545185089, |
| "ce_loss_2": 4.497902464866638, |
| "ce_loss_3": 4.2276026725769045, |
| "ce_loss_7": 3.6559959650039673, |
| "epoch": 0.536, |
| "grad_norm": 580.0, |
| "kl_loss_10": 189.0280532836914, |
| "kl_loss_2": 2293.6262939453127, |
| "kl_loss_3": 1820.3978698730468, |
| "kl_loss_7": 642.6393402099609, |
| "learning_rate": 0.00045089274187848144, |
| "loss": 1197.8392, |
| "step": 5360 |
| }, |
| { |
| "ce_loss_10": 3.5799126744270326, |
| "ce_loss_13": 3.501321530342102, |
| "ce_loss_2": 4.5603124618530275, |
| "ce_loss_3": 4.297963404655457, |
| "ce_loss_7": 3.7619481921195983, |
| "epoch": 0.537, |
| "grad_norm": 672.0, |
| "kl_loss_10": 183.09423599243163, |
| "kl_loss_2": 2192.1404357910155, |
| "kl_loss_3": 1730.206787109375, |
| "kl_loss_7": 620.7777648925781, |
| "learning_rate": 0.00044931399991859835, |
| "loss": 1181.3807, |
| "step": 5370 |
| }, |
| { |
| "ce_loss_10": 3.4432420253753664, |
| "ce_loss_13": 3.364873206615448, |
| "ce_loss_2": 4.452599573135376, |
| "ce_loss_3": 4.183995950222015, |
| "ce_loss_7": 3.6285991072654724, |
| "epoch": 0.538, |
| "grad_norm": 600.0, |
| "kl_loss_10": 182.95552597045898, |
| "kl_loss_2": 2236.559704589844, |
| "kl_loss_3": 1765.850408935547, |
| "kl_loss_7": 629.1190887451172, |
| "learning_rate": 0.00044773576836617336, |
| "loss": 1181.7396, |
| "step": 5380 |
| }, |
| { |
| "ce_loss_10": 3.537210750579834, |
| "ce_loss_13": 3.4561371922492983, |
| "ce_loss_2": 4.546432638168335, |
| "ce_loss_3": 4.281138265132904, |
| "ce_loss_7": 3.7339015364646913, |
| "epoch": 0.539, |
| "grad_norm": 612.0, |
| "kl_loss_10": 189.98071517944337, |
| "kl_loss_2": 2253.4164794921876, |
| "kl_loss_3": 1781.9518432617188, |
| "kl_loss_7": 650.4307464599609, |
| "learning_rate": 0.00044615806311398056, |
| "loss": 1232.9109, |
| "step": 5390 |
| }, |
| { |
| "ce_loss_10": 3.6113093972206114, |
| "ce_loss_13": 3.5354915499687194, |
| "ce_loss_2": 4.540320181846619, |
| "ce_loss_3": 4.277068996429444, |
| "ce_loss_7": 3.787187647819519, |
| "epoch": 0.54, |
| "grad_norm": 580.0, |
| "kl_loss_10": 181.3637908935547, |
| "kl_loss_2": 2094.3728942871094, |
| "kl_loss_3": 1633.9456298828125, |
| "kl_loss_7": 605.8723022460938, |
| "learning_rate": 0.00044458090004949454, |
| "loss": 1175.0439, |
| "step": 5400 |
| }, |
| { |
| "ce_loss_10": 3.47382390499115, |
| "ce_loss_13": 3.39083354473114, |
| "ce_loss_2": 4.532833766937256, |
| "ce_loss_3": 4.262583804130554, |
| "ce_loss_7": 3.6737227201461793, |
| "epoch": 0.541, |
| "grad_norm": 620.0, |
| "kl_loss_10": 194.1818962097168, |
| "kl_loss_2": 2371.5374450683594, |
| "kl_loss_3": 1877.3123657226563, |
| "kl_loss_7": 665.8367095947266, |
| "learning_rate": 0.0004430042950547297, |
| "loss": 1218.705, |
| "step": 5410 |
| }, |
| { |
| "ce_loss_10": 3.5697335839271545, |
| "ce_loss_13": 3.483165454864502, |
| "ce_loss_2": 4.578557109832763, |
| "ce_loss_3": 4.31482458114624, |
| "ce_loss_7": 3.763143301010132, |
| "epoch": 0.542, |
| "grad_norm": 572.0, |
| "kl_loss_10": 191.8735610961914, |
| "kl_loss_2": 2253.524365234375, |
| "kl_loss_3": 1779.8523681640625, |
| "kl_loss_7": 645.0971527099609, |
| "learning_rate": 0.0004414282640060809, |
| "loss": 1200.7552, |
| "step": 5420 |
| }, |
| { |
| "ce_loss_10": 3.656325376033783, |
| "ce_loss_13": 3.575901198387146, |
| "ce_loss_2": 4.611030888557434, |
| "ce_loss_3": 4.35529580116272, |
| "ce_loss_7": 3.8402703166007996, |
| "epoch": 0.543, |
| "grad_norm": 672.0, |
| "kl_loss_10": 186.09361267089844, |
| "kl_loss_2": 2127.538677978516, |
| "kl_loss_3": 1677.6395080566406, |
| "kl_loss_7": 622.3258697509766, |
| "learning_rate": 0.0004398528227741633, |
| "loss": 1179.4629, |
| "step": 5430 |
| }, |
| { |
| "ce_loss_10": 3.5199654936790465, |
| "ce_loss_13": 3.442525625228882, |
| "ce_loss_2": 4.519460201263428, |
| "ce_loss_3": 4.247548985481262, |
| "ce_loss_7": 3.7133419036865236, |
| "epoch": 0.544, |
| "grad_norm": 656.0, |
| "kl_loss_10": 186.4021957397461, |
| "kl_loss_2": 2206.209338378906, |
| "kl_loss_3": 1726.7237670898437, |
| "kl_loss_7": 636.2167572021484, |
| "learning_rate": 0.00043827798722365264, |
| "loss": 1202.1797, |
| "step": 5440 |
| }, |
| { |
| "ce_loss_10": 3.6471530318260195, |
| "ce_loss_13": 3.566143047809601, |
| "ce_loss_2": 4.5952486276626585, |
| "ce_loss_3": 4.333053851127625, |
| "ce_loss_7": 3.8201894760131836, |
| "epoch": 0.545, |
| "grad_norm": 592.0, |
| "kl_loss_10": 185.36949920654297, |
| "kl_loss_2": 2143.129284667969, |
| "kl_loss_3": 1675.2244567871094, |
| "kl_loss_7": 617.6786651611328, |
| "learning_rate": 0.00043670377321312535, |
| "loss": 1164.6765, |
| "step": 5450 |
| }, |
| { |
| "ce_loss_10": 3.6508351445198057, |
| "ce_loss_13": 3.574675273895264, |
| "ce_loss_2": 4.5991229772567745, |
| "ce_loss_3": 4.339683651924133, |
| "ce_loss_7": 3.8303612232208253, |
| "epoch": 0.546, |
| "grad_norm": 700.0, |
| "kl_loss_10": 183.1472366333008, |
| "kl_loss_2": 2130.7037048339844, |
| "kl_loss_3": 1667.0253173828125, |
| "kl_loss_7": 613.02021484375, |
| "learning_rate": 0.0004351301965948991, |
| "loss": 1168.8242, |
| "step": 5460 |
| }, |
| { |
| "ce_loss_10": 3.559572923183441, |
| "ce_loss_13": 3.478611421585083, |
| "ce_loss_2": 4.511995816230774, |
| "ce_loss_3": 4.249083304405213, |
| "ce_loss_7": 3.7354837536811827, |
| "epoch": 0.547, |
| "grad_norm": 636.0, |
| "kl_loss_10": 181.6176902770996, |
| "kl_loss_2": 2130.2896118164062, |
| "kl_loss_3": 1667.8863098144532, |
| "kl_loss_7": 614.7661987304688, |
| "learning_rate": 0.000433557273214873, |
| "loss": 1176.8127, |
| "step": 5470 |
| }, |
| { |
| "ce_loss_10": 3.545152747631073, |
| "ce_loss_13": 3.4662238121032716, |
| "ce_loss_2": 4.518579649925232, |
| "ce_loss_3": 4.245905971527099, |
| "ce_loss_7": 3.7270439863204956, |
| "epoch": 0.548, |
| "grad_norm": 608.0, |
| "kl_loss_10": 184.05833053588867, |
| "kl_loss_2": 2168.807977294922, |
| "kl_loss_3": 1696.2628234863282, |
| "kl_loss_7": 616.8240295410156, |
| "learning_rate": 0.000431985018912368, |
| "loss": 1150.4518, |
| "step": 5480 |
| }, |
| { |
| "ce_loss_10": 3.514492917060852, |
| "ce_loss_13": 3.4341874718666077, |
| "ce_loss_2": 4.534255909919739, |
| "ce_loss_3": 4.270003151893616, |
| "ce_loss_7": 3.7031027913093566, |
| "epoch": 0.549, |
| "grad_norm": 600.0, |
| "kl_loss_10": 189.14087448120117, |
| "kl_loss_2": 2268.4575805664062, |
| "kl_loss_3": 1800.3566040039063, |
| "kl_loss_7": 639.125277709961, |
| "learning_rate": 0.0004304134495199674, |
| "loss": 1178.9426, |
| "step": 5490 |
| }, |
| { |
| "ce_loss_10": 3.538786220550537, |
| "ce_loss_13": 3.4557671666145326, |
| "ce_loss_2": 4.5282275676727295, |
| "ce_loss_3": 4.265328872203827, |
| "ce_loss_7": 3.731334662437439, |
| "epoch": 0.55, |
| "grad_norm": 604.0, |
| "kl_loss_10": 188.5583984375, |
| "kl_loss_2": 2236.901904296875, |
| "kl_loss_3": 1761.930596923828, |
| "kl_loss_7": 644.1045196533203, |
| "learning_rate": 0.0004288425808633575, |
| "loss": 1185.0572, |
| "step": 5500 |
| }, |
| { |
| "ce_loss_10": 3.514096534252167, |
| "ce_loss_13": 3.435099017620087, |
| "ce_loss_2": 4.509266877174378, |
| "ce_loss_3": 4.252711880207062, |
| "ce_loss_7": 3.6944369435310365, |
| "epoch": 0.551, |
| "grad_norm": 664.0, |
| "kl_loss_10": 184.48614044189452, |
| "kl_loss_2": 2223.2010803222656, |
| "kl_loss_3": 1765.0783264160157, |
| "kl_loss_7": 630.5097595214844, |
| "learning_rate": 0.0004272724287611684, |
| "loss": 1201.5842, |
| "step": 5510 |
| }, |
| { |
| "ce_loss_10": 3.490022134780884, |
| "ce_loss_13": 3.4118714332580566, |
| "ce_loss_2": 4.514768314361572, |
| "ce_loss_3": 4.2408933401107785, |
| "ce_loss_7": 3.680497145652771, |
| "epoch": 0.552, |
| "grad_norm": 652.0, |
| "kl_loss_10": 185.8211784362793, |
| "kl_loss_2": 2267.8012084960938, |
| "kl_loss_3": 1790.0665649414063, |
| "kl_loss_7": 633.0888458251953, |
| "learning_rate": 0.00042570300902481425, |
| "loss": 1202.0281, |
| "step": 5520 |
| }, |
| { |
| "ce_loss_10": 3.523720991611481, |
| "ce_loss_13": 3.448110568523407, |
| "ce_loss_2": 4.499163627624512, |
| "ce_loss_3": 4.242252886295319, |
| "ce_loss_7": 3.704312777519226, |
| "epoch": 0.553, |
| "grad_norm": 608.0, |
| "kl_loss_10": 183.24146575927733, |
| "kl_loss_2": 2193.4769287109375, |
| "kl_loss_3": 1734.7890686035157, |
| "kl_loss_7": 623.5370147705078, |
| "learning_rate": 0.00042413433745833423, |
| "loss": 1179.776, |
| "step": 5530 |
| }, |
| { |
| "ce_loss_10": 3.5270172238349913, |
| "ce_loss_13": 3.448072147369385, |
| "ce_loss_2": 4.536388492584228, |
| "ce_loss_3": 4.260759913921357, |
| "ce_loss_7": 3.715148115158081, |
| "epoch": 0.554, |
| "grad_norm": 556.0, |
| "kl_loss_10": 183.68499755859375, |
| "kl_loss_2": 2226.1301025390626, |
| "kl_loss_3": 1743.2217163085938, |
| "kl_loss_7": 626.4557403564453, |
| "learning_rate": 0.0004225664298582339, |
| "loss": 1157.0496, |
| "step": 5540 |
| }, |
| { |
| "ce_loss_10": 3.6083423376083372, |
| "ce_loss_13": 3.530562436580658, |
| "ce_loss_2": 4.568499255180359, |
| "ce_loss_3": 4.307069134712219, |
| "ce_loss_7": 3.7862043499946596, |
| "epoch": 0.555, |
| "grad_norm": 548.0, |
| "kl_loss_10": 182.37268829345703, |
| "kl_loss_2": 2137.707696533203, |
| "kl_loss_3": 1673.7792663574219, |
| "kl_loss_7": 611.6491577148438, |
| "learning_rate": 0.000420999302013325, |
| "loss": 1149.7553, |
| "step": 5550 |
| }, |
| { |
| "ce_loss_10": 3.5049922823905946, |
| "ce_loss_13": 3.420680546760559, |
| "ce_loss_2": 4.553832268714904, |
| "ce_loss_3": 4.279029071331024, |
| "ce_loss_7": 3.700891983509064, |
| "epoch": 0.556, |
| "grad_norm": 572.0, |
| "kl_loss_10": 190.98652191162108, |
| "kl_loss_2": 2305.641845703125, |
| "kl_loss_3": 1822.5648254394532, |
| "kl_loss_7": 641.2451202392579, |
| "learning_rate": 0.000419432969704568, |
| "loss": 1204.391, |
| "step": 5560 |
| }, |
| { |
| "ce_loss_10": 3.548888063430786, |
| "ce_loss_13": 3.4704429507255554, |
| "ce_loss_2": 4.518404316902161, |
| "ce_loss_3": 4.257292962074279, |
| "ce_loss_7": 3.735389542579651, |
| "epoch": 0.557, |
| "grad_norm": 564.0, |
| "kl_loss_10": 182.6816421508789, |
| "kl_loss_2": 2144.968542480469, |
| "kl_loss_3": 1682.6951538085937, |
| "kl_loss_7": 617.6556762695312, |
| "learning_rate": 0.00041786744870491154, |
| "loss": 1202.9963, |
| "step": 5570 |
| }, |
| { |
| "ce_loss_10": 3.491339087486267, |
| "ce_loss_13": 3.412715029716492, |
| "ce_loss_2": 4.4881198644638065, |
| "ce_loss_3": 4.219742333889007, |
| "ce_loss_7": 3.679445171356201, |
| "epoch": 0.558, |
| "grad_norm": 576.0, |
| "kl_loss_10": 189.42200622558593, |
| "kl_loss_2": 2234.280969238281, |
| "kl_loss_3": 1757.53349609375, |
| "kl_loss_7": 641.0087585449219, |
| "learning_rate": 0.0004163027547791347, |
| "loss": 1192.3963, |
| "step": 5580 |
| }, |
| { |
| "ce_loss_10": 3.4689704895019533, |
| "ce_loss_13": 3.3872820258140566, |
| "ce_loss_2": 4.518157267570496, |
| "ce_loss_3": 4.244075846672058, |
| "ce_loss_7": 3.6619726419448853, |
| "epoch": 0.559, |
| "grad_norm": 688.0, |
| "kl_loss_10": 188.0017578125, |
| "kl_loss_2": 2320.9525756835938, |
| "kl_loss_3": 1834.1813659667969, |
| "kl_loss_7": 642.0279479980469, |
| "learning_rate": 0.0004147389036836881, |
| "loss": 1210.1521, |
| "step": 5590 |
| }, |
| { |
| "ce_loss_10": 3.5183377385139467, |
| "ce_loss_13": 3.4371410965919496, |
| "ce_loss_2": 4.522028660774231, |
| "ce_loss_3": 4.258878147602081, |
| "ce_loss_7": 3.706261694431305, |
| "epoch": 0.56, |
| "grad_norm": 652.0, |
| "kl_loss_10": 185.66660232543944, |
| "kl_loss_2": 2233.013397216797, |
| "kl_loss_3": 1764.0713806152344, |
| "kl_loss_7": 637.6866302490234, |
| "learning_rate": 0.00041317591116653486, |
| "loss": 1219.6441, |
| "step": 5600 |
| }, |
| { |
| "ce_loss_10": 3.558071720600128, |
| "ce_loss_13": 3.474745440483093, |
| "ce_loss_2": 4.558679819107056, |
| "ce_loss_3": 4.291901731491089, |
| "ce_loss_7": 3.746951687335968, |
| "epoch": 0.561, |
| "grad_norm": 592.0, |
| "kl_loss_10": 189.82635574340821, |
| "kl_loss_2": 2230.9510803222656, |
| "kl_loss_3": 1759.6529296875, |
| "kl_loss_7": 636.9456726074219, |
| "learning_rate": 0.0004116137929669921, |
| "loss": 1188.2356, |
| "step": 5610 |
| }, |
| { |
| "ce_loss_10": 3.544596457481384, |
| "ce_loss_13": 3.465434396266937, |
| "ce_loss_2": 4.526343536376953, |
| "ce_loss_3": 4.262159049510956, |
| "ce_loss_7": 3.7297433972358705, |
| "epoch": 0.562, |
| "grad_norm": 700.0, |
| "kl_loss_10": 184.16798706054686, |
| "kl_loss_2": 2204.5443481445313, |
| "kl_loss_3": 1738.5609375, |
| "kl_loss_7": 629.1714752197265, |
| "learning_rate": 0.00041005256481557305, |
| "loss": 1174.8596, |
| "step": 5620 |
| }, |
| { |
| "ce_loss_10": 3.6428149700164796, |
| "ce_loss_13": 3.568005383014679, |
| "ce_loss_2": 4.574557089805603, |
| "ce_loss_3": 4.320431900024414, |
| "ce_loss_7": 3.8154868602752687, |
| "epoch": 0.563, |
| "grad_norm": 580.0, |
| "kl_loss_10": 178.43261108398437, |
| "kl_loss_2": 2081.9929809570312, |
| "kl_loss_3": 1633.8047790527344, |
| "kl_loss_7": 600.1017929077149, |
| "learning_rate": 0.00040849224243382767, |
| "loss": 1150.8125, |
| "step": 5630 |
| }, |
| { |
| "ce_loss_10": 3.4989004015922545, |
| "ce_loss_13": 3.4218288540840147, |
| "ce_loss_2": 4.497757744789124, |
| "ce_loss_3": 4.228800570964813, |
| "ce_loss_7": 3.6881244659423826, |
| "epoch": 0.564, |
| "grad_norm": 576.0, |
| "kl_loss_10": 184.93341827392578, |
| "kl_loss_2": 2224.632287597656, |
| "kl_loss_3": 1749.1263427734375, |
| "kl_loss_7": 632.0666015625, |
| "learning_rate": 0.000406932841534185, |
| "loss": 1173.0332, |
| "step": 5640 |
| }, |
| { |
| "ce_loss_10": 3.453734540939331, |
| "ce_loss_13": 3.372727131843567, |
| "ce_loss_2": 4.460113084316253, |
| "ce_loss_3": 4.19973611831665, |
| "ce_loss_7": 3.6455657839775086, |
| "epoch": 0.565, |
| "grad_norm": 708.0, |
| "kl_loss_10": 186.30313568115236, |
| "kl_loss_2": 2260.893664550781, |
| "kl_loss_3": 1792.1846252441405, |
| "kl_loss_7": 638.3344879150391, |
| "learning_rate": 0.0004053743778197951, |
| "loss": 1219.3186, |
| "step": 5650 |
| }, |
| { |
| "ce_loss_10": 3.565755784511566, |
| "ce_loss_13": 3.481943702697754, |
| "ce_loss_2": 4.545414447784424, |
| "ce_loss_3": 4.281696927547455, |
| "ce_loss_7": 3.7513938307762147, |
| "epoch": 0.566, |
| "grad_norm": 584.0, |
| "kl_loss_10": 188.62994842529298, |
| "kl_loss_2": 2184.7360778808593, |
| "kl_loss_3": 1721.9289123535157, |
| "kl_loss_7": 628.1358184814453, |
| "learning_rate": 0.0004038168669843697, |
| "loss": 1209.3523, |
| "step": 5660 |
| }, |
| { |
| "ce_loss_10": 3.532804882526398, |
| "ce_loss_13": 3.4522215127944946, |
| "ce_loss_2": 4.494965553283691, |
| "ce_loss_3": 4.231216824054718, |
| "ce_loss_7": 3.7118934392929077, |
| "epoch": 0.567, |
| "grad_norm": 620.0, |
| "kl_loss_10": 183.03904342651367, |
| "kl_loss_2": 2154.956463623047, |
| "kl_loss_3": 1695.0998046875, |
| "kl_loss_7": 613.3763107299804, |
| "learning_rate": 0.000402260324712026, |
| "loss": 1195.8986, |
| "step": 5670 |
| }, |
| { |
| "ce_loss_10": 3.5749718070030214, |
| "ce_loss_13": 3.497403085231781, |
| "ce_loss_2": 4.588955020904541, |
| "ce_loss_3": 4.319999086856842, |
| "ce_loss_7": 3.7625349521636964, |
| "epoch": 0.568, |
| "grad_norm": 616.0, |
| "kl_loss_10": 184.26412506103514, |
| "kl_loss_2": 2236.5206665039063, |
| "kl_loss_3": 1760.365301513672, |
| "kl_loss_7": 624.1568267822265, |
| "learning_rate": 0.00040070476667712743, |
| "loss": 1174.4818, |
| "step": 5680 |
| }, |
| { |
| "ce_loss_10": 3.595443320274353, |
| "ce_loss_13": 3.5173869848251345, |
| "ce_loss_2": 4.573628330230713, |
| "ce_loss_3": 4.3121489644050595, |
| "ce_loss_7": 3.7780985593795777, |
| "epoch": 0.569, |
| "grad_norm": 540.0, |
| "kl_loss_10": 184.3900894165039, |
| "kl_loss_2": 2190.797717285156, |
| "kl_loss_3": 1726.8204223632813, |
| "kl_loss_7": 618.142544555664, |
| "learning_rate": 0.0003991502085441259, |
| "loss": 1191.0875, |
| "step": 5690 |
| }, |
| { |
| "ce_loss_10": 3.6352679252624513, |
| "ce_loss_13": 3.556475079059601, |
| "ce_loss_2": 4.568906188011169, |
| "ce_loss_3": 4.311613416671753, |
| "ce_loss_7": 3.8102620005607606, |
| "epoch": 0.57, |
| "grad_norm": 616.0, |
| "kl_loss_10": 180.942374420166, |
| "kl_loss_2": 2084.3558349609375, |
| "kl_loss_3": 1627.6179626464843, |
| "kl_loss_7": 599.5358856201171, |
| "learning_rate": 0.0003975966659674047, |
| "loss": 1160.7822, |
| "step": 5700 |
| }, |
| { |
| "ce_loss_10": 3.5962194561958314, |
| "ce_loss_13": 3.517608177661896, |
| "ce_loss_2": 4.578224086761475, |
| "ce_loss_3": 4.314012908935547, |
| "ce_loss_7": 3.7789862513542176, |
| "epoch": 0.571, |
| "grad_norm": 644.0, |
| "kl_loss_10": 182.5239112854004, |
| "kl_loss_2": 2180.907177734375, |
| "kl_loss_3": 1721.9898742675782, |
| "kl_loss_7": 614.957388305664, |
| "learning_rate": 0.0003960441545911204, |
| "loss": 1160.7484, |
| "step": 5710 |
| }, |
| { |
| "ce_loss_10": 3.5932918190956116, |
| "ce_loss_13": 3.5129475712776186, |
| "ce_loss_2": 4.558131432533264, |
| "ce_loss_3": 4.293534338474274, |
| "ce_loss_7": 3.7742814660072326, |
| "epoch": 0.572, |
| "grad_norm": 604.0, |
| "kl_loss_10": 183.15422897338868, |
| "kl_loss_2": 2156.431115722656, |
| "kl_loss_3": 1695.9377807617188, |
| "kl_loss_7": 619.908480834961, |
| "learning_rate": 0.0003944926900490452, |
| "loss": 1164.068, |
| "step": 5720 |
| }, |
| { |
| "ce_loss_10": 3.5127488017082213, |
| "ce_loss_13": 3.430432641506195, |
| "ce_loss_2": 4.5248651027679445, |
| "ce_loss_3": 4.258909869194031, |
| "ce_loss_7": 3.709870958328247, |
| "epoch": 0.573, |
| "grad_norm": 564.0, |
| "kl_loss_10": 186.0645439147949, |
| "kl_loss_2": 2235.3706176757814, |
| "kl_loss_3": 1765.8205688476562, |
| "kl_loss_7": 637.5899017333984, |
| "learning_rate": 0.0003929422879644099, |
| "loss": 1176.3957, |
| "step": 5730 |
| }, |
| { |
| "ce_loss_10": 3.510514330863953, |
| "ce_loss_13": 3.436869239807129, |
| "ce_loss_2": 4.478006148338318, |
| "ce_loss_3": 4.212211620807648, |
| "ce_loss_7": 3.6878655314445496, |
| "epoch": 0.574, |
| "grad_norm": 608.0, |
| "kl_loss_10": 179.26688079833986, |
| "kl_loss_2": 2168.3131591796873, |
| "kl_loss_3": 1699.4741943359375, |
| "kl_loss_7": 606.771630859375, |
| "learning_rate": 0.0003913929639497462, |
| "loss": 1141.8648, |
| "step": 5740 |
| }, |
| { |
| "ce_loss_10": 3.468266797065735, |
| "ce_loss_13": 3.3873007535934447, |
| "ce_loss_2": 4.490426182746887, |
| "ce_loss_3": 4.221552240848541, |
| "ce_loss_7": 3.6532665491104126, |
| "epoch": 0.575, |
| "grad_norm": 600.0, |
| "kl_loss_10": 182.11020889282227, |
| "kl_loss_2": 2259.020544433594, |
| "kl_loss_3": 1779.8084838867187, |
| "kl_loss_7": 622.9515014648438, |
| "learning_rate": 0.00038984473360672965, |
| "loss": 1169.1125, |
| "step": 5750 |
| }, |
| { |
| "ce_loss_10": 3.4774887681007387, |
| "ce_loss_13": 3.3949706315994264, |
| "ce_loss_2": 4.497473883628845, |
| "ce_loss_3": 4.2249194264411924, |
| "ce_loss_7": 3.664697051048279, |
| "epoch": 0.576, |
| "grad_norm": 596.0, |
| "kl_loss_10": 181.4011428833008, |
| "kl_loss_2": 2244.824786376953, |
| "kl_loss_3": 1764.2872131347656, |
| "kl_loss_7": 621.9651702880859, |
| "learning_rate": 0.0003882976125260229, |
| "loss": 1170.2874, |
| "step": 5760 |
| }, |
| { |
| "ce_loss_10": 3.5439630150794983, |
| "ce_loss_13": 3.4651756167411802, |
| "ce_loss_2": 4.539518880844116, |
| "ce_loss_3": 4.274804329872131, |
| "ce_loss_7": 3.728801262378693, |
| "epoch": 0.577, |
| "grad_norm": 592.0, |
| "kl_loss_10": 183.33100814819335, |
| "kl_loss_2": 2204.0270751953126, |
| "kl_loss_3": 1723.9171936035157, |
| "kl_loss_7": 615.5777862548828, |
| "learning_rate": 0.00038675161628711776, |
| "loss": 1179.8893, |
| "step": 5770 |
| }, |
| { |
| "ce_loss_10": 3.5816867470741274, |
| "ce_loss_13": 3.5046088337898254, |
| "ce_loss_2": 4.544821619987488, |
| "ce_loss_3": 4.285539746284485, |
| "ce_loss_7": 3.761784756183624, |
| "epoch": 0.578, |
| "grad_norm": 620.0, |
| "kl_loss_10": 181.6286849975586, |
| "kl_loss_2": 2136.3880615234375, |
| "kl_loss_3": 1677.4017333984375, |
| "kl_loss_7": 610.2154174804688, |
| "learning_rate": 0.0003852067604581794, |
| "loss": 1194.1891, |
| "step": 5780 |
| }, |
| { |
| "ce_loss_10": 3.523706150054932, |
| "ce_loss_13": 3.448537766933441, |
| "ce_loss_2": 4.533637523651123, |
| "ce_loss_3": 4.265281748771668, |
| "ce_loss_7": 3.709212040901184, |
| "epoch": 0.579, |
| "grad_norm": 676.0, |
| "kl_loss_10": 181.67257690429688, |
| "kl_loss_2": 2230.821612548828, |
| "kl_loss_3": 1755.830584716797, |
| "kl_loss_7": 620.3396881103515, |
| "learning_rate": 0.0003836630605958888, |
| "loss": 1177.6782, |
| "step": 5790 |
| }, |
| { |
| "ce_loss_10": 3.583223593235016, |
| "ce_loss_13": 3.503636026382446, |
| "ce_loss_2": 4.566303539276123, |
| "ce_loss_3": 4.3056800127029415, |
| "ce_loss_7": 3.76351158618927, |
| "epoch": 0.58, |
| "grad_norm": 708.0, |
| "kl_loss_10": 183.71082077026367, |
| "kl_loss_2": 2228.4331115722657, |
| "kl_loss_3": 1769.3681579589843, |
| "kl_loss_7": 628.4158660888672, |
| "learning_rate": 0.0003821205322452863, |
| "loss": 1235.8768, |
| "step": 5800 |
| }, |
| { |
| "ce_loss_10": 3.563581478595734, |
| "ce_loss_13": 3.488909196853638, |
| "ce_loss_2": 4.543065023422241, |
| "ce_loss_3": 4.286456656455994, |
| "ce_loss_7": 3.7441120743751526, |
| "epoch": 0.581, |
| "grad_norm": 584.0, |
| "kl_loss_10": 180.5809585571289, |
| "kl_loss_2": 2191.5135986328123, |
| "kl_loss_3": 1729.1048767089844, |
| "kl_loss_7": 608.2429626464843, |
| "learning_rate": 0.0003805791909396155, |
| "loss": 1179.2295, |
| "step": 5810 |
| }, |
| { |
| "ce_loss_10": 3.5160235166549683, |
| "ce_loss_13": 3.43984659910202, |
| "ce_loss_2": 4.500444793701172, |
| "ce_loss_3": 4.2373772144317625, |
| "ce_loss_7": 3.6964723467826843, |
| "epoch": 0.582, |
| "grad_norm": 652.0, |
| "kl_loss_10": 180.02818908691407, |
| "kl_loss_2": 2186.5078369140624, |
| "kl_loss_3": 1730.8345642089844, |
| "kl_loss_7": 613.5680450439453, |
| "learning_rate": 0.0003790390522001662, |
| "loss": 1191.4708, |
| "step": 5820 |
| }, |
| { |
| "ce_loss_10": 3.447020876407623, |
| "ce_loss_13": 3.3710612773895265, |
| "ce_loss_2": 4.448183393478393, |
| "ce_loss_3": 4.185709154605865, |
| "ce_loss_7": 3.6283095359802244, |
| "epoch": 0.583, |
| "grad_norm": 620.0, |
| "kl_loss_10": 180.24705505371094, |
| "kl_loss_2": 2242.3388488769533, |
| "kl_loss_3": 1776.8985290527344, |
| "kl_loss_7": 620.0051086425781, |
| "learning_rate": 0.0003775001315361183, |
| "loss": 1173.2469, |
| "step": 5830 |
| }, |
| { |
| "ce_loss_10": 3.560646951198578, |
| "ce_loss_13": 3.481656861305237, |
| "ce_loss_2": 4.561934852600098, |
| "ce_loss_3": 4.297064936161041, |
| "ce_loss_7": 3.746256446838379, |
| "epoch": 0.584, |
| "grad_norm": 560.0, |
| "kl_loss_10": 183.9656074523926, |
| "kl_loss_2": 2215.3773864746095, |
| "kl_loss_3": 1750.5085021972657, |
| "kl_loss_7": 621.9390472412109, |
| "learning_rate": 0.0003759624444443858, |
| "loss": 1186.5547, |
| "step": 5840 |
| }, |
| { |
| "ce_loss_10": 3.592632758617401, |
| "ce_loss_13": 3.520240008831024, |
| "ce_loss_2": 4.567729663848877, |
| "ce_loss_3": 4.300854158401489, |
| "ce_loss_7": 3.769944798946381, |
| "epoch": 0.585, |
| "grad_norm": 568.0, |
| "kl_loss_10": 180.2906066894531, |
| "kl_loss_2": 2170.1985412597655, |
| "kl_loss_3": 1706.62548828125, |
| "kl_loss_7": 608.6328552246093, |
| "learning_rate": 0.00037442600640946044, |
| "loss": 1155.9348, |
| "step": 5850 |
| }, |
| { |
| "ce_loss_10": 3.550674855709076, |
| "ce_loss_13": 3.475678253173828, |
| "ce_loss_2": 4.5188051700592045, |
| "ce_loss_3": 4.257573843002319, |
| "ce_loss_7": 3.733881187438965, |
| "epoch": 0.586, |
| "grad_norm": 624.0, |
| "kl_loss_10": 180.34449844360353, |
| "kl_loss_2": 2161.917333984375, |
| "kl_loss_3": 1700.5603820800782, |
| "kl_loss_7": 615.4381408691406, |
| "learning_rate": 0.00037289083290325663, |
| "loss": 1151.5385, |
| "step": 5860 |
| }, |
| { |
| "ce_loss_10": 3.5404091477394104, |
| "ce_loss_13": 3.4616484522819517, |
| "ce_loss_2": 4.5070148229599, |
| "ce_loss_3": 4.242105662822723, |
| "ce_loss_7": 3.7187010407447816, |
| "epoch": 0.587, |
| "grad_norm": 592.0, |
| "kl_loss_10": 183.17743911743165, |
| "kl_loss_2": 2149.7393432617187, |
| "kl_loss_3": 1683.0787292480468, |
| "kl_loss_7": 610.0913803100586, |
| "learning_rate": 0.0003713569393849543, |
| "loss": 1154.5703, |
| "step": 5870 |
| }, |
| { |
| "ce_loss_10": 3.5839020013809204, |
| "ce_loss_13": 3.5078009486198427, |
| "ce_loss_2": 4.56416871547699, |
| "ce_loss_3": 4.296731424331665, |
| "ce_loss_7": 3.767895996570587, |
| "epoch": 0.588, |
| "grad_norm": 592.0, |
| "kl_loss_10": 183.36542816162108, |
| "kl_loss_2": 2186.738494873047, |
| "kl_loss_3": 1717.5487915039062, |
| "kl_loss_7": 612.2841430664063, |
| "learning_rate": 0.00036982434130084397, |
| "loss": 1179.8928, |
| "step": 5880 |
| }, |
| { |
| "ce_loss_10": 3.4997439622879027, |
| "ce_loss_13": 3.4187664270401, |
| "ce_loss_2": 4.478350329399109, |
| "ce_loss_3": 4.210885548591614, |
| "ce_loss_7": 3.6801365852355956, |
| "epoch": 0.589, |
| "grad_norm": 664.0, |
| "kl_loss_10": 186.01408843994142, |
| "kl_loss_2": 2192.050701904297, |
| "kl_loss_3": 1713.8697509765625, |
| "kl_loss_7": 622.2605224609375, |
| "learning_rate": 0.00036829305408417166, |
| "loss": 1185.5467, |
| "step": 5890 |
| }, |
| { |
| "ce_loss_10": 3.4883674502372743, |
| "ce_loss_13": 3.4076414942741393, |
| "ce_loss_2": 4.51081612110138, |
| "ce_loss_3": 4.233860373497009, |
| "ce_loss_7": 3.68140949010849, |
| "epoch": 0.59, |
| "grad_norm": 632.0, |
| "kl_loss_10": 185.69306488037108, |
| "kl_loss_2": 2265.8583251953123, |
| "kl_loss_3": 1770.2322631835937, |
| "kl_loss_7": 633.7182983398437, |
| "learning_rate": 0.0003667630931549826, |
| "loss": 1189.5502, |
| "step": 5900 |
| }, |
| { |
| "ce_loss_10": 3.454320323467255, |
| "ce_loss_13": 3.376146912574768, |
| "ce_loss_2": 4.510071706771851, |
| "ce_loss_3": 4.2408855676651, |
| "ce_loss_7": 3.649706947803497, |
| "epoch": 0.591, |
| "grad_norm": 728.0, |
| "kl_loss_10": 185.1581298828125, |
| "kl_loss_2": 2343.439013671875, |
| "kl_loss_3": 1859.8356567382812, |
| "kl_loss_7": 639.2615692138672, |
| "learning_rate": 0.00036523447391996613, |
| "loss": 1217.3514, |
| "step": 5910 |
| }, |
| { |
| "ce_loss_10": 3.549302911758423, |
| "ce_loss_13": 3.4722840428352355, |
| "ce_loss_2": 4.514612603187561, |
| "ce_loss_3": 4.256480038166046, |
| "ce_loss_7": 3.727895641326904, |
| "epoch": 0.592, |
| "grad_norm": 580.0, |
| "kl_loss_10": 181.60699539184571, |
| "kl_loss_2": 2162.6364685058593, |
| "kl_loss_3": 1701.0076782226563, |
| "kl_loss_7": 610.4459930419922, |
| "learning_rate": 0.00036370721177230114, |
| "loss": 1162.5948, |
| "step": 5920 |
| }, |
| { |
| "ce_loss_10": 3.543530595302582, |
| "ce_loss_13": 3.4660569787025453, |
| "ce_loss_2": 4.543927192687988, |
| "ce_loss_3": 4.277280712127686, |
| "ce_loss_7": 3.728453516960144, |
| "epoch": 0.593, |
| "grad_norm": 628.0, |
| "kl_loss_10": 184.26243515014647, |
| "kl_loss_2": 2218.4042541503904, |
| "kl_loss_3": 1743.315625, |
| "kl_loss_7": 620.730111694336, |
| "learning_rate": 0.00036218132209150044, |
| "loss": 1186.6707, |
| "step": 5930 |
| }, |
| { |
| "ce_loss_10": 3.497697722911835, |
| "ce_loss_13": 3.4142557263374327, |
| "ce_loss_2": 4.5388647556304935, |
| "ce_loss_3": 4.264691114425659, |
| "ce_loss_7": 3.6943756103515626, |
| "epoch": 0.594, |
| "grad_norm": 524.0, |
| "kl_loss_10": 188.87873077392578, |
| "kl_loss_2": 2304.517468261719, |
| "kl_loss_3": 1814.6093872070312, |
| "kl_loss_7": 639.0129974365234, |
| "learning_rate": 0.0003606568202432562, |
| "loss": 1197.9809, |
| "step": 5940 |
| }, |
| { |
| "ce_loss_10": 3.565451109409332, |
| "ce_loss_13": 3.4856663823127745, |
| "ce_loss_2": 4.5841080904006954, |
| "ce_loss_3": 4.317169034481049, |
| "ce_loss_7": 3.754825806617737, |
| "epoch": 0.595, |
| "grad_norm": 696.0, |
| "kl_loss_10": 187.19320907592774, |
| "kl_loss_2": 2274.0406982421873, |
| "kl_loss_3": 1793.8463073730468, |
| "kl_loss_7": 630.620458984375, |
| "learning_rate": 0.0003591337215792851, |
| "loss": 1177.4938, |
| "step": 5950 |
| }, |
| { |
| "ce_loss_10": 3.611758494377136, |
| "ce_loss_13": 3.5361703038215637, |
| "ce_loss_2": 4.54854645729065, |
| "ce_loss_3": 4.2874367237091064, |
| "ce_loss_7": 3.781387460231781, |
| "epoch": 0.596, |
| "grad_norm": 536.0, |
| "kl_loss_10": 179.44385452270507, |
| "kl_loss_2": 2134.903210449219, |
| "kl_loss_3": 1672.3198852539062, |
| "kl_loss_7": 603.0327301025391, |
| "learning_rate": 0.00035761204143717383, |
| "loss": 1174.0895, |
| "step": 5960 |
| }, |
| { |
| "ce_loss_10": 3.564636397361755, |
| "ce_loss_13": 3.4857504963874817, |
| "ce_loss_2": 4.562372779846191, |
| "ce_loss_3": 4.294865238666534, |
| "ce_loss_7": 3.747916042804718, |
| "epoch": 0.597, |
| "grad_norm": 616.0, |
| "kl_loss_10": 181.63295822143556, |
| "kl_loss_2": 2217.5136901855467, |
| "kl_loss_3": 1751.903790283203, |
| "kl_loss_7": 618.9376495361328, |
| "learning_rate": 0.0003560917951402245, |
| "loss": 1215.2734, |
| "step": 5970 |
| }, |
| { |
| "ce_loss_10": 3.5358213543891908, |
| "ce_loss_13": 3.461250603199005, |
| "ce_loss_2": 4.515460109710693, |
| "ce_loss_3": 4.252109396457672, |
| "ce_loss_7": 3.720645487308502, |
| "epoch": 0.598, |
| "grad_norm": 616.0, |
| "kl_loss_10": 180.68030853271483, |
| "kl_loss_2": 2199.883331298828, |
| "kl_loss_3": 1727.857843017578, |
| "kl_loss_7": 614.7005615234375, |
| "learning_rate": 0.00035457299799730046, |
| "loss": 1174.0783, |
| "step": 5980 |
| }, |
| { |
| "ce_loss_10": 3.6016149520874023, |
| "ce_loss_13": 3.523995506763458, |
| "ce_loss_2": 4.564206576347351, |
| "ce_loss_3": 4.302748084068298, |
| "ce_loss_7": 3.7862717866897584, |
| "epoch": 0.599, |
| "grad_norm": 600.0, |
| "kl_loss_10": 181.36301651000977, |
| "kl_loss_2": 2153.0896545410155, |
| "kl_loss_3": 1694.4290161132812, |
| "kl_loss_7": 614.9286560058594, |
| "learning_rate": 0.0003530556653026721, |
| "loss": 1181.7495, |
| "step": 5990 |
| }, |
| { |
| "ce_loss_10": 3.5210883378982545, |
| "ce_loss_13": 3.4458776116371155, |
| "ce_loss_2": 4.520641088485718, |
| "ce_loss_3": 4.254351568222046, |
| "ce_loss_7": 3.699181377887726, |
| "epoch": 0.6, |
| "grad_norm": 764.0, |
| "kl_loss_10": 179.21529235839844, |
| "kl_loss_2": 2227.2805419921874, |
| "kl_loss_3": 1758.5499328613282, |
| "kl_loss_7": 610.0478576660156, |
| "learning_rate": 0.00035153981233586274, |
| "loss": 1193.8637, |
| "step": 6000 |
| }, |
| { |
| "ce_loss_10": 3.499428999423981, |
| "ce_loss_13": 3.422479748725891, |
| "ce_loss_2": 4.4867565631866455, |
| "ce_loss_3": 4.227682662010193, |
| "ce_loss_7": 3.6805691361427306, |
| "epoch": 0.601, |
| "grad_norm": 584.0, |
| "kl_loss_10": 179.26205139160157, |
| "kl_loss_2": 2193.6119079589844, |
| "kl_loss_3": 1731.6024475097656, |
| "kl_loss_7": 612.7285736083984, |
| "learning_rate": 0.00035002545436149473, |
| "loss": 1214.442, |
| "step": 6010 |
| }, |
| { |
| "ce_loss_10": 3.507369041442871, |
| "ce_loss_13": 3.427609443664551, |
| "ce_loss_2": 4.515847969055176, |
| "ce_loss_3": 4.248699688911438, |
| "ce_loss_7": 3.6938512086868287, |
| "epoch": 0.602, |
| "grad_norm": 592.0, |
| "kl_loss_10": 187.4394386291504, |
| "kl_loss_2": 2240.724530029297, |
| "kl_loss_3": 1766.6628112792969, |
| "kl_loss_7": 629.6498748779297, |
| "learning_rate": 0.0003485126066291364, |
| "loss": 1169.8236, |
| "step": 6020 |
| }, |
| { |
| "ce_loss_10": 3.5554185032844545, |
| "ce_loss_13": 3.4788596630096436, |
| "ce_loss_2": 4.540017461776733, |
| "ce_loss_3": 4.2838677883148195, |
| "ce_loss_7": 3.736110508441925, |
| "epoch": 0.603, |
| "grad_norm": 520.0, |
| "kl_loss_10": 179.3347900390625, |
| "kl_loss_2": 2192.1767639160157, |
| "kl_loss_3": 1731.594775390625, |
| "kl_loss_7": 613.0785980224609, |
| "learning_rate": 0.0003470012843731476, |
| "loss": 1185.9094, |
| "step": 6030 |
| }, |
| { |
| "ce_loss_10": 3.494213032722473, |
| "ce_loss_13": 3.41587815284729, |
| "ce_loss_2": 4.493516874313355, |
| "ce_loss_3": 4.230558323860168, |
| "ce_loss_7": 3.6748696088790895, |
| "epoch": 0.604, |
| "grad_norm": 604.0, |
| "kl_loss_10": 180.02317504882814, |
| "kl_loss_2": 2220.4429626464844, |
| "kl_loss_3": 1750.9172302246093, |
| "kl_loss_7": 613.3353332519531, |
| "learning_rate": 0.00034549150281252633, |
| "loss": 1207.7186, |
| "step": 6040 |
| }, |
| { |
| "ce_loss_10": 3.4735769987106324, |
| "ce_loss_13": 3.398567247390747, |
| "ce_loss_2": 4.450454211235046, |
| "ce_loss_3": 4.185557043552398, |
| "ce_loss_7": 3.660480535030365, |
| "epoch": 0.605, |
| "grad_norm": 608.0, |
| "kl_loss_10": 181.83876571655273, |
| "kl_loss_2": 2163.210076904297, |
| "kl_loss_3": 1694.9721801757812, |
| "kl_loss_7": 612.4143432617187, |
| "learning_rate": 0.0003439832771507565, |
| "loss": 1157.9707, |
| "step": 6050 |
| }, |
| { |
| "ce_loss_10": 3.4816818594932557, |
| "ce_loss_13": 3.4034390568733217, |
| "ce_loss_2": 4.478318929672241, |
| "ce_loss_3": 4.211991810798645, |
| "ce_loss_7": 3.6656970381736755, |
| "epoch": 0.606, |
| "grad_norm": 560.0, |
| "kl_loss_10": 181.10105361938477, |
| "kl_loss_2": 2226.4850891113283, |
| "kl_loss_3": 1757.37236328125, |
| "kl_loss_7": 619.9399398803711, |
| "learning_rate": 0.0003424766225756537, |
| "loss": 1172.4078, |
| "step": 6060 |
| }, |
| { |
| "ce_loss_10": 3.5375612139701844, |
| "ce_loss_13": 3.4606423020362853, |
| "ce_loss_2": 4.53115668296814, |
| "ce_loss_3": 4.261643159389496, |
| "ce_loss_7": 3.7194941639900208, |
| "epoch": 0.607, |
| "grad_norm": 600.0, |
| "kl_loss_10": 181.42390975952148, |
| "kl_loss_2": 2202.2797973632814, |
| "kl_loss_3": 1733.2598999023437, |
| "kl_loss_7": 615.9942810058594, |
| "learning_rate": 0.00034097155425921255, |
| "loss": 1158.2284, |
| "step": 6070 |
| }, |
| { |
| "ce_loss_10": 3.433805537223816, |
| "ce_loss_13": 3.354471778869629, |
| "ce_loss_2": 4.449812698364258, |
| "ce_loss_3": 4.179146933555603, |
| "ce_loss_7": 3.6204983830451964, |
| "epoch": 0.608, |
| "grad_norm": 592.0, |
| "kl_loss_10": 183.06991577148438, |
| "kl_loss_2": 2273.455847167969, |
| "kl_loss_3": 1787.318505859375, |
| "kl_loss_7": 624.6353576660156, |
| "learning_rate": 0.0003394680873574546, |
| "loss": 1187.3987, |
| "step": 6080 |
| }, |
| { |
| "ce_loss_10": 3.54138503074646, |
| "ce_loss_13": 3.4626068115234374, |
| "ce_loss_2": 4.556825470924378, |
| "ce_loss_3": 4.281811666488648, |
| "ce_loss_7": 3.7267327547073363, |
| "epoch": 0.609, |
| "grad_norm": 620.0, |
| "kl_loss_10": 183.72728881835937, |
| "kl_loss_2": 2232.6337280273438, |
| "kl_loss_3": 1752.9180786132813, |
| "kl_loss_7": 617.8084594726563, |
| "learning_rate": 0.0003379662370102747, |
| "loss": 1176.7848, |
| "step": 6090 |
| }, |
| { |
| "ce_loss_10": 3.5495489597320558, |
| "ce_loss_13": 3.4742938756942747, |
| "ce_loss_2": 4.515744471549988, |
| "ce_loss_3": 4.251201486587524, |
| "ce_loss_7": 3.726244103908539, |
| "epoch": 0.61, |
| "grad_norm": 640.0, |
| "kl_loss_10": 179.96657028198243, |
| "kl_loss_2": 2182.172985839844, |
| "kl_loss_3": 1717.0841491699218, |
| "kl_loss_7": 617.4093353271485, |
| "learning_rate": 0.0003364660183412892, |
| "loss": 1176.2052, |
| "step": 6100 |
| }, |
| { |
| "ce_loss_10": 3.5306557536125185, |
| "ce_loss_13": 3.4546700954437255, |
| "ce_loss_2": 4.500067496299744, |
| "ce_loss_3": 4.235128319263458, |
| "ce_loss_7": 3.7075342297554017, |
| "epoch": 0.611, |
| "grad_norm": 592.0, |
| "kl_loss_10": 182.79292755126954, |
| "kl_loss_2": 2182.2781616210937, |
| "kl_loss_3": 1714.5931213378906, |
| "kl_loss_7": 613.8878936767578, |
| "learning_rate": 0.0003349674464576834, |
| "loss": 1190.8153, |
| "step": 6110 |
| }, |
| { |
| "ce_loss_10": 3.477449345588684, |
| "ce_loss_13": 3.3995738983154298, |
| "ce_loss_2": 4.485787630081177, |
| "ce_loss_3": 4.219619536399842, |
| "ce_loss_7": 3.6623815417289736, |
| "epoch": 0.612, |
| "grad_norm": 628.0, |
| "kl_loss_10": 181.76175689697266, |
| "kl_loss_2": 2235.75986328125, |
| "kl_loss_3": 1763.9286254882813, |
| "kl_loss_7": 619.9945251464844, |
| "learning_rate": 0.00033347053645005966, |
| "loss": 1163.8981, |
| "step": 6120 |
| }, |
| { |
| "ce_loss_10": 3.5906055331230164, |
| "ce_loss_13": 3.514803075790405, |
| "ce_loss_2": 4.5458073854446415, |
| "ce_loss_3": 4.283458161354065, |
| "ce_loss_7": 3.772923803329468, |
| "epoch": 0.613, |
| "grad_norm": 644.0, |
| "kl_loss_10": 178.51968688964843, |
| "kl_loss_2": 2116.6735778808593, |
| "kl_loss_3": 1659.320733642578, |
| "kl_loss_7": 606.7959167480469, |
| "learning_rate": 0.00033197530339228485, |
| "loss": 1170.5501, |
| "step": 6130 |
| }, |
| { |
| "ce_loss_10": 3.5471089243888856, |
| "ce_loss_13": 3.468013954162598, |
| "ce_loss_2": 4.5254878282546995, |
| "ce_loss_3": 4.254842627048492, |
| "ce_loss_7": 3.73079137802124, |
| "epoch": 0.614, |
| "grad_norm": 532.0, |
| "kl_loss_10": 183.3593994140625, |
| "kl_loss_2": 2176.643206787109, |
| "kl_loss_3": 1701.0747802734375, |
| "kl_loss_7": 619.2012481689453, |
| "learning_rate": 0.00033048176234133967, |
| "loss": 1166.8168, |
| "step": 6140 |
| }, |
| { |
| "ce_loss_10": 3.5306158542633055, |
| "ce_loss_13": 3.453017568588257, |
| "ce_loss_2": 4.494013047218322, |
| "ce_loss_3": 4.233083915710449, |
| "ce_loss_7": 3.7115015268325804, |
| "epoch": 0.615, |
| "grad_norm": 592.0, |
| "kl_loss_10": 183.00715713500978, |
| "kl_loss_2": 2175.1028686523437, |
| "kl_loss_3": 1702.7456420898438, |
| "kl_loss_7": 619.7471405029297, |
| "learning_rate": 0.0003289899283371657, |
| "loss": 1181.7955, |
| "step": 6150 |
| }, |
| { |
| "ce_loss_10": 3.5544473528862, |
| "ce_loss_13": 3.4786699175834657, |
| "ce_loss_2": 4.547568416595459, |
| "ce_loss_3": 4.281970739364624, |
| "ce_loss_7": 3.7363924741744996, |
| "epoch": 0.616, |
| "grad_norm": 600.0, |
| "kl_loss_10": 178.17992782592773, |
| "kl_loss_2": 2185.039025878906, |
| "kl_loss_3": 1723.0535766601563, |
| "kl_loss_7": 600.4897644042969, |
| "learning_rate": 0.0003274998164025148, |
| "loss": 1196.8087, |
| "step": 6160 |
| }, |
| { |
| "ce_loss_10": 3.586019229888916, |
| "ce_loss_13": 3.509108769893646, |
| "ce_loss_2": 4.5615111827850345, |
| "ce_loss_3": 4.2898026466369625, |
| "ce_loss_7": 3.76910115480423, |
| "epoch": 0.617, |
| "grad_norm": 596.0, |
| "kl_loss_10": 183.4706718444824, |
| "kl_loss_2": 2168.4442443847656, |
| "kl_loss_3": 1695.5099731445312, |
| "kl_loss_7": 616.1861694335937, |
| "learning_rate": 0.0003260114415427975, |
| "loss": 1190.7359, |
| "step": 6170 |
| }, |
| { |
| "ce_loss_10": 3.5073242664337156, |
| "ce_loss_13": 3.4292925119400026, |
| "ce_loss_2": 4.523944449424744, |
| "ce_loss_3": 4.251231408119201, |
| "ce_loss_7": 3.6900092363357544, |
| "epoch": 0.618, |
| "grad_norm": 612.0, |
| "kl_loss_10": 180.3868850708008, |
| "kl_loss_2": 2258.1235778808596, |
| "kl_loss_3": 1773.0142578125, |
| "kl_loss_7": 615.9339263916015, |
| "learning_rate": 0.0003245248187459323, |
| "loss": 1218.0189, |
| "step": 6180 |
| }, |
| { |
| "ce_loss_10": 3.4972055196762084, |
| "ce_loss_13": 3.4217321276664734, |
| "ce_loss_2": 4.4563206195831295, |
| "ce_loss_3": 4.195159709453582, |
| "ce_loss_7": 3.6716169476509095, |
| "epoch": 0.619, |
| "grad_norm": 596.0, |
| "kl_loss_10": 176.01737060546876, |
| "kl_loss_2": 2149.769183349609, |
| "kl_loss_3": 1675.0436157226563, |
| "kl_loss_7": 597.3845794677734, |
| "learning_rate": 0.00032303996298219416, |
| "loss": 1151.9841, |
| "step": 6190 |
| }, |
| { |
| "ce_loss_10": 3.5777448058128356, |
| "ce_loss_13": 3.500323712825775, |
| "ce_loss_2": 4.53541202545166, |
| "ce_loss_3": 4.266927003860474, |
| "ce_loss_7": 3.755027210712433, |
| "epoch": 0.62, |
| "grad_norm": 540.0, |
| "kl_loss_10": 178.23485260009767, |
| "kl_loss_2": 2112.595593261719, |
| "kl_loss_3": 1646.6524230957032, |
| "kl_loss_7": 602.416943359375, |
| "learning_rate": 0.00032155688920406414, |
| "loss": 1145.6068, |
| "step": 6200 |
| }, |
| { |
| "ce_loss_10": 3.489628565311432, |
| "ce_loss_13": 3.408998668193817, |
| "ce_loss_2": 4.5190582275390625, |
| "ce_loss_3": 4.245685923099518, |
| "ce_loss_7": 3.671571230888367, |
| "epoch": 0.621, |
| "grad_norm": 652.0, |
| "kl_loss_10": 183.76829681396484, |
| "kl_loss_2": 2272.4242309570313, |
| "kl_loss_3": 1788.1328247070312, |
| "kl_loss_7": 627.5300720214843, |
| "learning_rate": 0.0003200756123460788, |
| "loss": 1224.8912, |
| "step": 6210 |
| }, |
| { |
| "ce_loss_10": 3.5219372153282165, |
| "ce_loss_13": 3.4430843591690063, |
| "ce_loss_2": 4.530118870735168, |
| "ce_loss_3": 4.26385805606842, |
| "ce_loss_7": 3.708560848236084, |
| "epoch": 0.622, |
| "grad_norm": 684.0, |
| "kl_loss_10": 185.2090690612793, |
| "kl_loss_2": 2254.1378173828125, |
| "kl_loss_3": 1774.3692199707032, |
| "kl_loss_7": 633.037890625, |
| "learning_rate": 0.00031859614732467957, |
| "loss": 1207.0312, |
| "step": 6220 |
| }, |
| { |
| "ce_loss_10": 3.5700612902641295, |
| "ce_loss_13": 3.4917181968688964, |
| "ce_loss_2": 4.540509462356567, |
| "ce_loss_3": 4.275044929981232, |
| "ce_loss_7": 3.7488471269607544, |
| "epoch": 0.623, |
| "grad_norm": 564.0, |
| "kl_loss_10": 178.5159034729004, |
| "kl_loss_2": 2155.8026611328123, |
| "kl_loss_3": 1685.940985107422, |
| "kl_loss_7": 600.1484497070312, |
| "learning_rate": 0.00031711850903806275, |
| "loss": 1157.7447, |
| "step": 6230 |
| }, |
| { |
| "ce_loss_10": 3.479930281639099, |
| "ce_loss_13": 3.39938303232193, |
| "ce_loss_2": 4.482881689071656, |
| "ce_loss_3": 4.214280414581299, |
| "ce_loss_7": 3.666577732563019, |
| "epoch": 0.624, |
| "grad_norm": 528.0, |
| "kl_loss_10": 185.9188034057617, |
| "kl_loss_2": 2243.2543823242186, |
| "kl_loss_3": 1758.6941833496094, |
| "kl_loss_7": 628.0701446533203, |
| "learning_rate": 0.0003156427123660297, |
| "loss": 1172.3383, |
| "step": 6240 |
| }, |
| { |
| "ce_loss_10": 3.5643810868263244, |
| "ce_loss_13": 3.4881609320640563, |
| "ce_loss_2": 4.518170762062073, |
| "ce_loss_3": 4.258860862255096, |
| "ce_loss_7": 3.745578372478485, |
| "epoch": 0.625, |
| "grad_norm": 596.0, |
| "kl_loss_10": 180.73046417236327, |
| "kl_loss_2": 2135.2883361816407, |
| "kl_loss_3": 1669.8626892089844, |
| "kl_loss_7": 610.9410751342773, |
| "learning_rate": 0.0003141687721698363, |
| "loss": 1172.6947, |
| "step": 6250 |
| }, |
| { |
| "ce_loss_10": 3.536016345024109, |
| "ce_loss_13": 3.4606189489364625, |
| "ce_loss_2": 4.476572108268738, |
| "ce_loss_3": 4.211447751522064, |
| "ce_loss_7": 3.7014155983924866, |
| "epoch": 0.626, |
| "grad_norm": 616.0, |
| "kl_loss_10": 175.8163749694824, |
| "kl_loss_2": 2105.446813964844, |
| "kl_loss_3": 1637.2767333984375, |
| "kl_loss_7": 587.5009735107421, |
| "learning_rate": 0.00031269670329204396, |
| "loss": 1155.6384, |
| "step": 6260 |
| }, |
| { |
| "ce_loss_10": 3.5712973356246946, |
| "ce_loss_13": 3.4947034239768984, |
| "ce_loss_2": 4.515408515930176, |
| "ce_loss_3": 4.251049220561981, |
| "ce_loss_7": 3.7454182147979735, |
| "epoch": 0.627, |
| "grad_norm": 644.0, |
| "kl_loss_10": 181.6370933532715, |
| "kl_loss_2": 2120.7030395507813, |
| "kl_loss_3": 1650.25341796875, |
| "kl_loss_7": 607.5851348876953, |
| "learning_rate": 0.00031122652055637015, |
| "loss": 1169.2292, |
| "step": 6270 |
| }, |
| { |
| "ce_loss_10": 3.536707639694214, |
| "ce_loss_13": 3.460920011997223, |
| "ce_loss_2": 4.534442710876465, |
| "ce_loss_3": 4.263644289970398, |
| "ce_loss_7": 3.7196394085884092, |
| "epoch": 0.628, |
| "grad_norm": 556.0, |
| "kl_loss_10": 181.97393569946288, |
| "kl_loss_2": 2233.067547607422, |
| "kl_loss_3": 1750.9132995605469, |
| "kl_loss_7": 618.4156631469726, |
| "learning_rate": 0.0003097582387675385, |
| "loss": 1169.3315, |
| "step": 6280 |
| }, |
| { |
| "ce_loss_10": 3.5805759191513062, |
| "ce_loss_13": 3.503600060939789, |
| "ce_loss_2": 4.546688604354858, |
| "ce_loss_3": 4.285043132305145, |
| "ce_loss_7": 3.7596506476402283, |
| "epoch": 0.629, |
| "grad_norm": 536.0, |
| "kl_loss_10": 181.50545425415038, |
| "kl_loss_2": 2176.2076171875, |
| "kl_loss_3": 1706.1532836914062, |
| "kl_loss_7": 611.5946624755859, |
| "learning_rate": 0.00030829187271113034, |
| "loss": 1162.2808, |
| "step": 6290 |
| }, |
| { |
| "ce_loss_10": 3.5692893385887148, |
| "ce_loss_13": 3.49398432970047, |
| "ce_loss_2": 4.5324320793151855, |
| "ce_loss_3": 4.271732580661774, |
| "ce_loss_7": 3.738038659095764, |
| "epoch": 0.63, |
| "grad_norm": 660.0, |
| "kl_loss_10": 176.80067443847656, |
| "kl_loss_2": 2142.2837646484377, |
| "kl_loss_3": 1672.4158203125, |
| "kl_loss_7": 598.4024932861328, |
| "learning_rate": 0.00030682743715343565, |
| "loss": 1178.4112, |
| "step": 6300 |
| }, |
| { |
| "ce_loss_10": 3.5165117979049683, |
| "ce_loss_13": 3.4367071866989134, |
| "ce_loss_2": 4.5106003999710085, |
| "ce_loss_3": 4.248035335540772, |
| "ce_loss_7": 3.709369492530823, |
| "epoch": 0.631, |
| "grad_norm": 624.0, |
| "kl_loss_10": 185.85676803588868, |
| "kl_loss_2": 2187.9409912109377, |
| "kl_loss_3": 1716.0930541992188, |
| "kl_loss_7": 624.2622802734375, |
| "learning_rate": 0.0003053649468413043, |
| "loss": 1194.6155, |
| "step": 6310 |
| }, |
| { |
| "ce_loss_10": 3.6293103814125063, |
| "ce_loss_13": 3.5522167325019836, |
| "ce_loss_2": 4.589023590087891, |
| "ce_loss_3": 4.323796653747559, |
| "ce_loss_7": 3.8087464213371276, |
| "epoch": 0.632, |
| "grad_norm": 664.0, |
| "kl_loss_10": 183.21706161499023, |
| "kl_loss_2": 2147.7636474609376, |
| "kl_loss_3": 1686.63291015625, |
| "kl_loss_7": 615.6221435546875, |
| "learning_rate": 0.00030390441650199725, |
| "loss": 1158.5613, |
| "step": 6320 |
| }, |
| { |
| "ce_loss_10": 3.528099310398102, |
| "ce_loss_13": 3.4539591908454894, |
| "ce_loss_2": 4.50098488330841, |
| "ce_loss_3": 4.2318372368812565, |
| "ce_loss_7": 3.70650874376297, |
| "epoch": 0.633, |
| "grad_norm": 676.0, |
| "kl_loss_10": 181.68777465820312, |
| "kl_loss_2": 2164.3626098632812, |
| "kl_loss_3": 1687.4978088378907, |
| "kl_loss_7": 610.1166748046875, |
| "learning_rate": 0.00030244586084303903, |
| "loss": 1154.3, |
| "step": 6330 |
| }, |
| { |
| "ce_loss_10": 3.4934327363967896, |
| "ce_loss_13": 3.416351318359375, |
| "ce_loss_2": 4.505590105056763, |
| "ce_loss_3": 4.235206222534179, |
| "ce_loss_7": 3.6859657049179075, |
| "epoch": 0.634, |
| "grad_norm": 564.0, |
| "kl_loss_10": 183.83423309326173, |
| "kl_loss_2": 2252.3034423828126, |
| "kl_loss_3": 1765.4126586914062, |
| "kl_loss_7": 627.5603424072266, |
| "learning_rate": 0.00030098929455206903, |
| "loss": 1173.0053, |
| "step": 6340 |
| }, |
| { |
| "ce_loss_10": 3.5009153842926026, |
| "ce_loss_13": 3.4256786108016968, |
| "ce_loss_2": 4.492053604125976, |
| "ce_loss_3": 4.224474251270294, |
| "ce_loss_7": 3.6754886388778685, |
| "epoch": 0.635, |
| "grad_norm": 592.0, |
| "kl_loss_10": 180.03737106323243, |
| "kl_loss_2": 2236.1428771972655, |
| "kl_loss_3": 1754.843768310547, |
| "kl_loss_7": 615.7969268798828, |
| "learning_rate": 0.00029953473229669324, |
| "loss": 1215.3177, |
| "step": 6350 |
| }, |
| { |
| "ce_loss_10": 3.5320404410362243, |
| "ce_loss_13": 3.4564929485321043, |
| "ce_loss_2": 4.505661821365356, |
| "ce_loss_3": 4.2487224817276, |
| "ce_loss_7": 3.717836594581604, |
| "epoch": 0.636, |
| "grad_norm": 560.0, |
| "kl_loss_10": 180.38322067260742, |
| "kl_loss_2": 2164.6011291503905, |
| "kl_loss_3": 1703.929705810547, |
| "kl_loss_7": 616.6229248046875, |
| "learning_rate": 0.00029808218872433767, |
| "loss": 1152.0346, |
| "step": 6360 |
| }, |
| { |
| "ce_loss_10": 3.5955461502075194, |
| "ce_loss_13": 3.521399176120758, |
| "ce_loss_2": 4.553752660751343, |
| "ce_loss_3": 4.287336015701294, |
| "ce_loss_7": 3.7661701798439027, |
| "epoch": 0.637, |
| "grad_norm": 584.0, |
| "kl_loss_10": 178.29160919189454, |
| "kl_loss_2": 2154.0076416015627, |
| "kl_loss_3": 1683.874932861328, |
| "kl_loss_7": 604.1974517822266, |
| "learning_rate": 0.0002966316784621, |
| "loss": 1148.5613, |
| "step": 6370 |
| }, |
| { |
| "ce_loss_10": 3.509734773635864, |
| "ce_loss_13": 3.4283226490020753, |
| "ce_loss_2": 4.500501930713654, |
| "ce_loss_3": 4.237296044826508, |
| "ce_loss_7": 3.697099339962006, |
| "epoch": 0.638, |
| "grad_norm": 572.0, |
| "kl_loss_10": 183.85193252563477, |
| "kl_loss_2": 2219.202685546875, |
| "kl_loss_3": 1744.0482971191407, |
| "kl_loss_7": 628.3488616943359, |
| "learning_rate": 0.0002951832161166024, |
| "loss": 1161.3599, |
| "step": 6380 |
| }, |
| { |
| "ce_loss_10": 3.5833853006362917, |
| "ce_loss_13": 3.5059871673583984, |
| "ce_loss_2": 4.560363245010376, |
| "ce_loss_3": 4.295464622974396, |
| "ce_loss_7": 3.7690476536750794, |
| "epoch": 0.639, |
| "grad_norm": 524.0, |
| "kl_loss_10": 182.52049560546874, |
| "kl_loss_2": 2159.0023681640623, |
| "kl_loss_3": 1692.8950073242188, |
| "kl_loss_7": 613.0403747558594, |
| "learning_rate": 0.0002937368162738445, |
| "loss": 1138.2498, |
| "step": 6390 |
| }, |
| { |
| "ce_loss_10": 3.5200544476509092, |
| "ce_loss_13": 3.4506627917289734, |
| "ce_loss_2": 4.487471246719361, |
| "ce_loss_3": 4.225354993343354, |
| "ce_loss_7": 3.695161283016205, |
| "epoch": 0.64, |
| "grad_norm": 648.0, |
| "kl_loss_10": 174.7782325744629, |
| "kl_loss_2": 2168.6176025390623, |
| "kl_loss_3": 1700.3829406738282, |
| "kl_loss_7": 598.6395080566406, |
| "learning_rate": 0.0002922924934990568, |
| "loss": 1174.9205, |
| "step": 6400 |
| }, |
| { |
| "ce_loss_10": 3.460334539413452, |
| "ce_loss_13": 3.3851306796073914, |
| "ce_loss_2": 4.486385345458984, |
| "ce_loss_3": 4.209315371513367, |
| "ce_loss_7": 3.646379458904266, |
| "epoch": 0.641, |
| "grad_norm": 592.0, |
| "kl_loss_10": 181.4815986633301, |
| "kl_loss_2": 2269.7528442382813, |
| "kl_loss_3": 1780.3827819824219, |
| "kl_loss_7": 623.2667114257813, |
| "learning_rate": 0.0002908502623365536, |
| "loss": 1180.7166, |
| "step": 6410 |
| }, |
| { |
| "ce_loss_10": 3.400831735134125, |
| "ce_loss_13": 3.323111522197723, |
| "ce_loss_2": 4.43465530872345, |
| "ce_loss_3": 4.168019390106201, |
| "ce_loss_7": 3.5887860655784607, |
| "epoch": 0.642, |
| "grad_norm": 584.0, |
| "kl_loss_10": 180.2845359802246, |
| "kl_loss_2": 2285.9919677734374, |
| "kl_loss_3": 1807.420263671875, |
| "kl_loss_7": 623.3097045898437, |
| "learning_rate": 0.0002894101373095867, |
| "loss": 1196.7524, |
| "step": 6420 |
| }, |
| { |
| "ce_loss_10": 3.610305404663086, |
| "ce_loss_13": 3.5335601687431337, |
| "ce_loss_2": 4.569022560119629, |
| "ce_loss_3": 4.3065975427627565, |
| "ce_loss_7": 3.788155424594879, |
| "epoch": 0.643, |
| "grad_norm": 656.0, |
| "kl_loss_10": 185.8272720336914, |
| "kl_loss_2": 2151.861962890625, |
| "kl_loss_3": 1684.6893188476563, |
| "kl_loss_7": 614.9325622558594, |
| "learning_rate": 0.00028797213292019926, |
| "loss": 1162.4543, |
| "step": 6430 |
| }, |
| { |
| "ce_loss_10": 3.5838815212249755, |
| "ce_loss_13": 3.5060059309005736, |
| "ce_loss_2": 4.542932081222534, |
| "ce_loss_3": 4.284253716468811, |
| "ce_loss_7": 3.7631338119506834, |
| "epoch": 0.644, |
| "grad_norm": 536.0, |
| "kl_loss_10": 182.34116134643554, |
| "kl_loss_2": 2161.139373779297, |
| "kl_loss_3": 1697.281298828125, |
| "kl_loss_7": 612.268832397461, |
| "learning_rate": 0.0002865362636490791, |
| "loss": 1187.0314, |
| "step": 6440 |
| }, |
| { |
| "ce_loss_10": 3.598045587539673, |
| "ce_loss_13": 3.524975371360779, |
| "ce_loss_2": 4.552241158485413, |
| "ce_loss_3": 4.294051146507263, |
| "ce_loss_7": 3.7727458000183107, |
| "epoch": 0.645, |
| "grad_norm": 536.0, |
| "kl_loss_10": 178.94673080444335, |
| "kl_loss_2": 2142.5753845214845, |
| "kl_loss_3": 1685.976092529297, |
| "kl_loss_7": 604.8534698486328, |
| "learning_rate": 0.0002851025439554142, |
| "loss": 1148.6578, |
| "step": 6450 |
| }, |
| { |
| "ce_loss_10": 3.5864033341407775, |
| "ce_loss_13": 3.5102365136146547, |
| "ce_loss_2": 4.530459260940551, |
| "ce_loss_3": 4.2697702050209045, |
| "ce_loss_7": 3.77256600856781, |
| "epoch": 0.646, |
| "grad_norm": 552.0, |
| "kl_loss_10": 180.96249084472657, |
| "kl_loss_2": 2094.927349853516, |
| "kl_loss_3": 1631.7639465332031, |
| "kl_loss_7": 608.3878204345704, |
| "learning_rate": 0.00028367098827674573, |
| "loss": 1141.2359, |
| "step": 6460 |
| }, |
| { |
| "ce_loss_10": 3.5153084993362427, |
| "ce_loss_13": 3.4397502303123475, |
| "ce_loss_2": 4.504270768165588, |
| "ce_loss_3": 4.232501339912415, |
| "ce_loss_7": 3.69371120929718, |
| "epoch": 0.647, |
| "grad_norm": 588.0, |
| "kl_loss_10": 178.14280624389647, |
| "kl_loss_2": 2186.196148681641, |
| "kl_loss_3": 1706.314013671875, |
| "kl_loss_7": 600.8890106201172, |
| "learning_rate": 0.00028224161102882397, |
| "loss": 1170.0225, |
| "step": 6470 |
| }, |
| { |
| "ce_loss_10": 3.494782865047455, |
| "ce_loss_13": 3.418469178676605, |
| "ce_loss_2": 4.45595076084137, |
| "ce_loss_3": 4.1893230199813845, |
| "ce_loss_7": 3.6707924604415894, |
| "epoch": 0.648, |
| "grad_norm": 644.0, |
| "kl_loss_10": 177.30072097778321, |
| "kl_loss_2": 2145.1575622558594, |
| "kl_loss_3": 1676.7467163085937, |
| "kl_loss_7": 599.7084075927735, |
| "learning_rate": 0.00028081442660546124, |
| "loss": 1164.476, |
| "step": 6480 |
| }, |
| { |
| "ce_loss_10": 3.5571305990219115, |
| "ce_loss_13": 3.4820198893547056, |
| "ce_loss_2": 4.520304107666016, |
| "ce_loss_3": 4.250013303756714, |
| "ce_loss_7": 3.7307825326919555, |
| "epoch": 0.649, |
| "grad_norm": 708.0, |
| "kl_loss_10": 180.5020294189453, |
| "kl_loss_2": 2162.021893310547, |
| "kl_loss_3": 1681.335223388672, |
| "kl_loss_7": 604.5856201171875, |
| "learning_rate": 0.0002793894493783892, |
| "loss": 1161.7205, |
| "step": 6490 |
| }, |
| { |
| "ce_loss_10": 3.5730626702308657, |
| "ce_loss_13": 3.4996850967407225, |
| "ce_loss_2": 4.535577750205993, |
| "ce_loss_3": 4.2806238532066345, |
| "ce_loss_7": 3.746683120727539, |
| "epoch": 0.65, |
| "grad_norm": 532.0, |
| "kl_loss_10": 175.4969383239746, |
| "kl_loss_2": 2147.980224609375, |
| "kl_loss_3": 1685.3772094726562, |
| "kl_loss_7": 592.4264343261718, |
| "learning_rate": 0.0002779666936971129, |
| "loss": 1147.2826, |
| "step": 6500 |
| }, |
| { |
| "ce_loss_10": 3.579540717601776, |
| "ce_loss_13": 3.503932845592499, |
| "ce_loss_2": 4.570328307151795, |
| "ce_loss_3": 4.304692578315735, |
| "ce_loss_7": 3.760816919803619, |
| "epoch": 0.651, |
| "grad_norm": 560.0, |
| "kl_loss_10": 180.5074890136719, |
| "kl_loss_2": 2190.618371582031, |
| "kl_loss_3": 1722.614599609375, |
| "kl_loss_7": 613.0008575439454, |
| "learning_rate": 0.00027654617388876614, |
| "loss": 1176.0404, |
| "step": 6510 |
| }, |
| { |
| "ce_loss_10": 3.6101376891136168, |
| "ce_loss_13": 3.5372079849243163, |
| "ce_loss_2": 4.574031090736389, |
| "ce_loss_3": 4.305119824409485, |
| "ce_loss_7": 3.7854838371276855, |
| "epoch": 0.652, |
| "grad_norm": 506.0, |
| "kl_loss_10": 179.79571838378905, |
| "kl_loss_2": 2171.3123046875, |
| "kl_loss_3": 1687.553973388672, |
| "kl_loss_7": 603.6635681152344, |
| "learning_rate": 0.0002751279042579672, |
| "loss": 1161.0621, |
| "step": 6520 |
| }, |
| { |
| "ce_loss_10": 3.5500629782676696, |
| "ce_loss_13": 3.475240981578827, |
| "ce_loss_2": 4.515746712684631, |
| "ce_loss_3": 4.248768877983093, |
| "ce_loss_7": 3.726365828514099, |
| "epoch": 0.653, |
| "grad_norm": 520.0, |
| "kl_loss_10": 175.72006454467774, |
| "kl_loss_2": 2132.0258361816404, |
| "kl_loss_3": 1663.1835388183595, |
| "kl_loss_7": 593.7185913085938, |
| "learning_rate": 0.00027371189908667604, |
| "loss": 1173.0754, |
| "step": 6530 |
| }, |
| { |
| "ce_loss_10": 3.6066598892211914, |
| "ce_loss_13": 3.5256664633750914, |
| "ce_loss_2": 4.603748297691345, |
| "ce_loss_3": 4.334264886379242, |
| "ce_loss_7": 3.7890505313873293, |
| "epoch": 0.654, |
| "grad_norm": 556.0, |
| "kl_loss_10": 184.21709976196288, |
| "kl_loss_2": 2224.855090332031, |
| "kl_loss_3": 1750.1156860351562, |
| "kl_loss_7": 618.167140197754, |
| "learning_rate": 0.00027229817263404863, |
| "loss": 1200.1538, |
| "step": 6540 |
| }, |
| { |
| "ce_loss_10": 3.579574966430664, |
| "ce_loss_13": 3.502782142162323, |
| "ce_loss_2": 4.505041122436523, |
| "ce_loss_3": 4.243296790122986, |
| "ce_loss_7": 3.751440441608429, |
| "epoch": 0.655, |
| "grad_norm": 552.0, |
| "kl_loss_10": 178.47067565917968, |
| "kl_loss_2": 2091.8775146484377, |
| "kl_loss_3": 1621.7340759277345, |
| "kl_loss_7": 596.6207824707031, |
| "learning_rate": 0.0002708867391362948, |
| "loss": 1145.7798, |
| "step": 6550 |
| }, |
| { |
| "ce_loss_10": 3.5594303607940674, |
| "ce_loss_13": 3.4848424673080443, |
| "ce_loss_2": 4.510186004638672, |
| "ce_loss_3": 4.239946413040161, |
| "ce_loss_7": 3.729544758796692, |
| "epoch": 0.656, |
| "grad_norm": 600.0, |
| "kl_loss_10": 174.02187423706056, |
| "kl_loss_2": 2098.8442565917967, |
| "kl_loss_3": 1625.1830017089844, |
| "kl_loss_7": 579.9141540527344, |
| "learning_rate": 0.0002694776128065345, |
| "loss": 1152.9096, |
| "step": 6560 |
| }, |
| { |
| "ce_loss_10": 3.500006926059723, |
| "ce_loss_13": 3.4239490151405336, |
| "ce_loss_2": 4.463183629512787, |
| "ce_loss_3": 4.198473536968232, |
| "ce_loss_7": 3.6787616848945617, |
| "epoch": 0.657, |
| "grad_norm": 524.0, |
| "kl_loss_10": 181.49803161621094, |
| "kl_loss_2": 2175.4376220703125, |
| "kl_loss_3": 1692.8995849609375, |
| "kl_loss_7": 616.3900787353516, |
| "learning_rate": 0.00026807080783465374, |
| "loss": 1144.908, |
| "step": 6570 |
| }, |
| { |
| "ce_loss_10": 3.609026849269867, |
| "ce_loss_13": 3.5301132678985594, |
| "ce_loss_2": 4.5842578411102295, |
| "ce_loss_3": 4.322691702842713, |
| "ce_loss_7": 3.7907386422157288, |
| "epoch": 0.658, |
| "grad_norm": 540.0, |
| "kl_loss_10": 181.2076759338379, |
| "kl_loss_2": 2170.1943603515624, |
| "kl_loss_3": 1703.6680847167968, |
| "kl_loss_7": 614.0023223876954, |
| "learning_rate": 0.00026666633838716316, |
| "loss": 1180.9756, |
| "step": 6580 |
| }, |
| { |
| "ce_loss_10": 3.505808639526367, |
| "ce_loss_13": 3.424725067615509, |
| "ce_loss_2": 4.498762392997742, |
| "ce_loss_3": 4.228746104240417, |
| "ce_loss_7": 3.6866647005081177, |
| "epoch": 0.659, |
| "grad_norm": 660.0, |
| "kl_loss_10": 183.16211471557617, |
| "kl_loss_2": 2205.399041748047, |
| "kl_loss_3": 1729.7577697753907, |
| "kl_loss_7": 619.1098449707031, |
| "learning_rate": 0.00026526421860705474, |
| "loss": 1196.5574, |
| "step": 6590 |
| }, |
| { |
| "ce_loss_10": 3.5278443932533263, |
| "ce_loss_13": 3.451081359386444, |
| "ce_loss_2": 4.514556968212128, |
| "ce_loss_3": 4.246424973011017, |
| "ce_loss_7": 3.7130470991134645, |
| "epoch": 0.66, |
| "grad_norm": 604.0, |
| "kl_loss_10": 183.11859054565429, |
| "kl_loss_2": 2195.080352783203, |
| "kl_loss_3": 1720.6291870117188, |
| "kl_loss_7": 617.9165832519532, |
| "learning_rate": 0.0002638644626136587, |
| "loss": 1167.115, |
| "step": 6600 |
| }, |
| { |
| "ce_loss_10": 3.5388341546058655, |
| "ce_loss_13": 3.4648394107818605, |
| "ce_loss_2": 4.518757033348083, |
| "ce_loss_3": 4.251232302188873, |
| "ce_loss_7": 3.7166133403778074, |
| "epoch": 0.661, |
| "grad_norm": 600.0, |
| "kl_loss_10": 177.9744987487793, |
| "kl_loss_2": 2169.1557434082033, |
| "kl_loss_3": 1695.717547607422, |
| "kl_loss_7": 605.9922027587891, |
| "learning_rate": 0.00026246708450250255, |
| "loss": 1163.9504, |
| "step": 6610 |
| }, |
| { |
| "ce_loss_10": 3.536445343494415, |
| "ce_loss_13": 3.4618695259094237, |
| "ce_loss_2": 4.500265717506409, |
| "ce_loss_3": 4.239484262466431, |
| "ce_loss_7": 3.7084587097167967, |
| "epoch": 0.662, |
| "grad_norm": 624.0, |
| "kl_loss_10": 177.32165069580077, |
| "kl_loss_2": 2153.5470336914063, |
| "kl_loss_3": 1682.642596435547, |
| "kl_loss_7": 596.0139556884766, |
| "learning_rate": 0.00026107209834516854, |
| "loss": 1159.9879, |
| "step": 6620 |
| }, |
| { |
| "ce_loss_10": 3.4876843810081484, |
| "ce_loss_13": 3.4082067966461183, |
| "ce_loss_2": 4.498864269256591, |
| "ce_loss_3": 4.235283279418946, |
| "ce_loss_7": 3.666292154788971, |
| "epoch": 0.663, |
| "grad_norm": 620.0, |
| "kl_loss_10": 180.81258544921874, |
| "kl_loss_2": 2256.257257080078, |
| "kl_loss_3": 1782.119805908203, |
| "kl_loss_7": 615.3174774169922, |
| "learning_rate": 0.0002596795181891514, |
| "loss": 1197.8284, |
| "step": 6630 |
| }, |
| { |
| "ce_loss_10": 3.4956326842308045, |
| "ce_loss_13": 3.414097845554352, |
| "ce_loss_2": 4.498321509361267, |
| "ce_loss_3": 4.228005886077881, |
| "ce_loss_7": 3.6832273960113526, |
| "epoch": 0.664, |
| "grad_norm": 676.0, |
| "kl_loss_10": 186.22876663208007, |
| "kl_loss_2": 2223.420690917969, |
| "kl_loss_3": 1743.7244079589843, |
| "kl_loss_7": 627.1363647460937, |
| "learning_rate": 0.000258289358057718, |
| "loss": 1222.5622, |
| "step": 6640 |
| }, |
| { |
| "ce_loss_10": 3.5669368505477905, |
| "ce_loss_13": 3.4856945157051085, |
| "ce_loss_2": 4.551595258712768, |
| "ce_loss_3": 4.286670958995819, |
| "ce_loss_7": 3.751522934436798, |
| "epoch": 0.665, |
| "grad_norm": 556.0, |
| "kl_loss_10": 185.22528228759765, |
| "kl_loss_2": 2211.366003417969, |
| "kl_loss_3": 1740.3289489746094, |
| "kl_loss_7": 619.9797149658203, |
| "learning_rate": 0.0002569016319497657, |
| "loss": 1184.505, |
| "step": 6650 |
| }, |
| { |
| "ce_loss_10": 3.5523419260978697, |
| "ce_loss_13": 3.4712039113044737, |
| "ce_loss_2": 4.537931609153747, |
| "ce_loss_3": 4.279768109321594, |
| "ce_loss_7": 3.7336499214172365, |
| "epoch": 0.666, |
| "grad_norm": 544.0, |
| "kl_loss_10": 186.748779296875, |
| "kl_loss_2": 2205.344372558594, |
| "kl_loss_3": 1734.7506713867188, |
| "kl_loss_7": 622.0667907714844, |
| "learning_rate": 0.00025551635383968066, |
| "loss": 1198.5273, |
| "step": 6660 |
| }, |
| { |
| "ce_loss_10": 3.463807392120361, |
| "ce_loss_13": 3.3866657257080077, |
| "ce_loss_2": 4.469345259666443, |
| "ce_loss_3": 4.193110883235931, |
| "ce_loss_7": 3.6494885683059692, |
| "epoch": 0.667, |
| "grad_norm": 576.0, |
| "kl_loss_10": 184.71422729492187, |
| "kl_loss_2": 2248.0073059082033, |
| "kl_loss_3": 1753.0323059082032, |
| "kl_loss_7": 619.0755401611328, |
| "learning_rate": 0.00025413353767719804, |
| "loss": 1195.2947, |
| "step": 6670 |
| }, |
| { |
| "ce_loss_10": 3.5190200567245484, |
| "ce_loss_13": 3.4452382922172546, |
| "ce_loss_2": 4.497325706481933, |
| "ce_loss_3": 4.232502174377442, |
| "ce_loss_7": 3.694538187980652, |
| "epoch": 0.668, |
| "grad_norm": 568.0, |
| "kl_loss_10": 177.03625259399413, |
| "kl_loss_2": 2189.713330078125, |
| "kl_loss_3": 1718.3252258300781, |
| "kl_loss_7": 606.7687957763671, |
| "learning_rate": 0.0002527531973872617, |
| "loss": 1177.4366, |
| "step": 6680 |
| }, |
| { |
| "ce_loss_10": 3.538633036613464, |
| "ce_loss_13": 3.4624911904335023, |
| "ce_loss_2": 4.504513430595398, |
| "ce_loss_3": 4.237690329551697, |
| "ce_loss_7": 3.7170337319374083, |
| "epoch": 0.669, |
| "grad_norm": 592.0, |
| "kl_loss_10": 178.7047462463379, |
| "kl_loss_2": 2166.141973876953, |
| "kl_loss_3": 1687.0463806152343, |
| "kl_loss_7": 609.9779602050781, |
| "learning_rate": 0.0002513753468698826, |
| "loss": 1160.7738, |
| "step": 6690 |
| }, |
| { |
| "ce_loss_10": 3.510761630535126, |
| "ce_loss_13": 3.4321574330329896, |
| "ce_loss_2": 4.506561207771301, |
| "ce_loss_3": 4.238252663612366, |
| "ce_loss_7": 3.6953013062477114, |
| "epoch": 0.67, |
| "grad_norm": 544.0, |
| "kl_loss_10": 182.07874755859376, |
| "kl_loss_2": 2231.308416748047, |
| "kl_loss_3": 1749.7796997070313, |
| "kl_loss_7": 618.383251953125, |
| "learning_rate": 0.0002500000000000001, |
| "loss": 1185.6723, |
| "step": 6700 |
| }, |
| { |
| "ce_loss_10": 3.62176308631897, |
| "ce_loss_13": 3.547257494926453, |
| "ce_loss_2": 4.5295734882354735, |
| "ce_loss_3": 4.277957272529602, |
| "ce_loss_7": 3.788572609424591, |
| "epoch": 0.671, |
| "grad_norm": 548.0, |
| "kl_loss_10": 173.84563446044922, |
| "kl_loss_2": 2050.3306396484377, |
| "kl_loss_3": 1604.508935546875, |
| "kl_loss_7": 584.9092193603516, |
| "learning_rate": 0.0002486271706273421, |
| "loss": 1168.4034, |
| "step": 6710 |
| }, |
| { |
| "ce_loss_10": 3.557868146896362, |
| "ce_loss_13": 3.485461747646332, |
| "ce_loss_2": 4.488811063766479, |
| "ce_loss_3": 4.22833331823349, |
| "ce_loss_7": 3.7254873156547545, |
| "epoch": 0.672, |
| "grad_norm": 644.0, |
| "kl_loss_10": 175.32781143188475, |
| "kl_loss_2": 2091.0470642089845, |
| "kl_loss_3": 1627.7723266601563, |
| "kl_loss_7": 589.4509521484375, |
| "learning_rate": 0.0002472568725762853, |
| "loss": 1154.7741, |
| "step": 6720 |
| }, |
| { |
| "ce_loss_10": 3.5483877897262572, |
| "ce_loss_13": 3.4755659341812133, |
| "ce_loss_2": 4.477482891082763, |
| "ce_loss_3": 4.2254945039749146, |
| "ce_loss_7": 3.717711102962494, |
| "epoch": 0.673, |
| "grad_norm": 564.0, |
| "kl_loss_10": 173.398193359375, |
| "kl_loss_2": 2110.752795410156, |
| "kl_loss_3": 1653.0817565917969, |
| "kl_loss_7": 586.6092742919922, |
| "learning_rate": 0.00024588911964571554, |
| "loss": 1145.849, |
| "step": 6730 |
| }, |
| { |
| "ce_loss_10": 3.5657129168510435, |
| "ce_loss_13": 3.4857802987098694, |
| "ce_loss_2": 4.5576330661773685, |
| "ce_loss_3": 4.288707995414734, |
| "ce_loss_7": 3.753812789916992, |
| "epoch": 0.674, |
| "grad_norm": 524.0, |
| "kl_loss_10": 187.46376113891603, |
| "kl_loss_2": 2205.4671936035156, |
| "kl_loss_3": 1727.960107421875, |
| "kl_loss_7": 626.6214233398438, |
| "learning_rate": 0.00024452392560888974, |
| "loss": 1167.7188, |
| "step": 6740 |
| }, |
| { |
| "ce_loss_10": 3.455358147621155, |
| "ce_loss_13": 3.378260016441345, |
| "ce_loss_2": 4.419786167144776, |
| "ce_loss_3": 4.155612635612488, |
| "ce_loss_7": 3.6355133295059203, |
| "epoch": 0.675, |
| "grad_norm": 532.0, |
| "kl_loss_10": 177.83211364746094, |
| "kl_loss_2": 2172.1373962402345, |
| "kl_loss_3": 1704.7519836425781, |
| "kl_loss_7": 606.2507995605469, |
| "learning_rate": 0.00024316130421329695, |
| "loss": 1157.1621, |
| "step": 6750 |
| }, |
| { |
| "ce_loss_10": 3.535005438327789, |
| "ce_loss_13": 3.4575978398323057, |
| "ce_loss_2": 4.505799317359925, |
| "ce_loss_3": 4.232890093326569, |
| "ce_loss_7": 3.7101247310638428, |
| "epoch": 0.676, |
| "grad_norm": 564.0, |
| "kl_loss_10": 177.2459358215332, |
| "kl_loss_2": 2136.964288330078, |
| "kl_loss_3": 1660.3365295410156, |
| "kl_loss_7": 591.8486877441406, |
| "learning_rate": 0.00024180126918051909, |
| "loss": 1154.5281, |
| "step": 6760 |
| }, |
| { |
| "ce_loss_10": 3.577043890953064, |
| "ce_loss_13": 3.5019183993339538, |
| "ce_loss_2": 4.527614569664001, |
| "ce_loss_3": 4.265857553482055, |
| "ce_loss_7": 3.7534381628036497, |
| "epoch": 0.677, |
| "grad_norm": 604.0, |
| "kl_loss_10": 178.1947784423828, |
| "kl_loss_2": 2127.7522155761717, |
| "kl_loss_3": 1659.7866516113281, |
| "kl_loss_7": 597.4583068847656, |
| "learning_rate": 0.00024044383420609406, |
| "loss": 1141.1451, |
| "step": 6770 |
| }, |
| { |
| "ce_loss_10": 3.589032161235809, |
| "ce_loss_13": 3.514654505252838, |
| "ce_loss_2": 4.520573258399963, |
| "ce_loss_3": 4.2588379859924315, |
| "ce_loss_7": 3.7536001801490784, |
| "epoch": 0.678, |
| "grad_norm": 552.0, |
| "kl_loss_10": 175.52578201293946, |
| "kl_loss_2": 2107.4931701660157, |
| "kl_loss_3": 1641.6564514160157, |
| "kl_loss_7": 591.3939788818359, |
| "learning_rate": 0.00023908901295937712, |
| "loss": 1175.0256, |
| "step": 6780 |
| }, |
| { |
| "ce_loss_10": 3.5837427616119384, |
| "ce_loss_13": 3.505910849571228, |
| "ce_loss_2": 4.535600376129151, |
| "ce_loss_3": 4.271885943412781, |
| "ce_loss_7": 3.755298101902008, |
| "epoch": 0.679, |
| "grad_norm": 596.0, |
| "kl_loss_10": 177.23758392333986, |
| "kl_loss_2": 2111.0602905273436, |
| "kl_loss_3": 1645.3706420898438, |
| "kl_loss_7": 592.0553497314453, |
| "learning_rate": 0.00023773681908340283, |
| "loss": 1169.8496, |
| "step": 6790 |
| }, |
| { |
| "ce_loss_10": 3.5592074632644652, |
| "ce_loss_13": 3.4772790670394897, |
| "ce_loss_2": 4.548656535148621, |
| "ce_loss_3": 4.2832125425338745, |
| "ce_loss_7": 3.7424607038497926, |
| "epoch": 0.68, |
| "grad_norm": 600.0, |
| "kl_loss_10": 187.7086395263672, |
| "kl_loss_2": 2222.717413330078, |
| "kl_loss_3": 1751.2139282226562, |
| "kl_loss_7": 621.8694488525391, |
| "learning_rate": 0.00023638726619474876, |
| "loss": 1203.8379, |
| "step": 6800 |
| }, |
| { |
| "ce_loss_10": 3.5476158022880555, |
| "ce_loss_13": 3.467449462413788, |
| "ce_loss_2": 4.563005781173706, |
| "ce_loss_3": 4.29602427482605, |
| "ce_loss_7": 3.7365992784500124, |
| "epoch": 0.681, |
| "grad_norm": 580.0, |
| "kl_loss_10": 183.3829345703125, |
| "kl_loss_2": 2228.727575683594, |
| "kl_loss_3": 1760.1514953613282, |
| "kl_loss_7": 626.0395812988281, |
| "learning_rate": 0.0002350403678833976, |
| "loss": 1182.506, |
| "step": 6810 |
| }, |
| { |
| "ce_loss_10": 3.4750794649124144, |
| "ce_loss_13": 3.39876846075058, |
| "ce_loss_2": 4.465261030197143, |
| "ce_loss_3": 4.1875766038894655, |
| "ce_loss_7": 3.6560620784759523, |
| "epoch": 0.682, |
| "grad_norm": 490.0, |
| "kl_loss_10": 178.67537307739258, |
| "kl_loss_2": 2200.3225830078127, |
| "kl_loss_3": 1714.0246276855469, |
| "kl_loss_7": 608.0651733398438, |
| "learning_rate": 0.00023369613771260007, |
| "loss": 1160.444, |
| "step": 6820 |
| }, |
| { |
| "ce_loss_10": 3.5863471627235413, |
| "ce_loss_13": 3.5096321582794188, |
| "ce_loss_2": 4.573717498779297, |
| "ce_loss_3": 4.300703597068787, |
| "ce_loss_7": 3.767488884925842, |
| "epoch": 0.683, |
| "grad_norm": 608.0, |
| "kl_loss_10": 181.20342712402345, |
| "kl_loss_2": 2206.5099487304688, |
| "kl_loss_3": 1721.635076904297, |
| "kl_loss_7": 610.5273590087891, |
| "learning_rate": 0.00023235458921873925, |
| "loss": 1187.8242, |
| "step": 6830 |
| }, |
| { |
| "ce_loss_10": 3.5434704184532166, |
| "ce_loss_13": 3.4598939180374146, |
| "ce_loss_2": 4.561892867088318, |
| "ce_loss_3": 4.300772976875305, |
| "ce_loss_7": 3.73870370388031, |
| "epoch": 0.684, |
| "grad_norm": 640.0, |
| "kl_loss_10": 187.79423599243165, |
| "kl_loss_2": 2268.489392089844, |
| "kl_loss_3": 1798.0396545410156, |
| "kl_loss_7": 637.83154296875, |
| "learning_rate": 0.0002310157359111938, |
| "loss": 1215.3348, |
| "step": 6840 |
| }, |
| { |
| "ce_loss_10": 3.4310184836387636, |
| "ce_loss_13": 3.3527446746826173, |
| "ce_loss_2": 4.52064049243927, |
| "ce_loss_3": 4.243770575523376, |
| "ce_loss_7": 3.6270575404167174, |
| "epoch": 0.685, |
| "grad_norm": 656.0, |
| "kl_loss_10": 183.79262008666993, |
| "kl_loss_2": 2376.693957519531, |
| "kl_loss_3": 1883.7591125488282, |
| "kl_loss_7": 632.1836151123047, |
| "learning_rate": 0.0002296795912722014, |
| "loss": 1227.4164, |
| "step": 6850 |
| }, |
| { |
| "ce_loss_10": 3.570713925361633, |
| "ce_loss_13": 3.494589388370514, |
| "ce_loss_2": 4.519134759902954, |
| "ce_loss_3": 4.253862988948822, |
| "ce_loss_7": 3.7498515605926515, |
| "epoch": 0.686, |
| "grad_norm": 576.0, |
| "kl_loss_10": 179.78029174804686, |
| "kl_loss_2": 2128.572625732422, |
| "kl_loss_3": 1654.8151062011718, |
| "kl_loss_7": 601.8367279052734, |
| "learning_rate": 0.0002283461687567236, |
| "loss": 1133.3289, |
| "step": 6860 |
| }, |
| { |
| "ce_loss_10": 3.6324430108070374, |
| "ce_loss_13": 3.5565361857414244, |
| "ce_loss_2": 4.557056021690369, |
| "ce_loss_3": 4.298560571670532, |
| "ce_loss_7": 3.8067931652069094, |
| "epoch": 0.687, |
| "grad_norm": 506.0, |
| "kl_loss_10": 176.90674362182617, |
| "kl_loss_2": 2057.1298095703123, |
| "kl_loss_3": 1601.75263671875, |
| "kl_loss_7": 589.7931121826172, |
| "learning_rate": 0.00022701548179231045, |
| "loss": 1148.6605, |
| "step": 6870 |
| }, |
| { |
| "ce_loss_10": 3.582988679409027, |
| "ce_loss_13": 3.5054625153541563, |
| "ce_loss_2": 4.558988261222839, |
| "ce_loss_3": 4.300906538963318, |
| "ce_loss_7": 3.7617339849472047, |
| "epoch": 0.688, |
| "grad_norm": 628.0, |
| "kl_loss_10": 181.7885940551758, |
| "kl_loss_2": 2183.6723815917967, |
| "kl_loss_3": 1725.755859375, |
| "kl_loss_7": 613.1118804931641, |
| "learning_rate": 0.00022568754377896516, |
| "loss": 1157.5781, |
| "step": 6880 |
| }, |
| { |
| "ce_loss_10": 3.5760830521583555, |
| "ce_loss_13": 3.499359941482544, |
| "ce_loss_2": 4.526648283004761, |
| "ce_loss_3": 4.2548288941383365, |
| "ce_loss_7": 3.746409332752228, |
| "epoch": 0.689, |
| "grad_norm": 596.0, |
| "kl_loss_10": 180.4591537475586, |
| "kl_loss_2": 2140.339678955078, |
| "kl_loss_3": 1666.426806640625, |
| "kl_loss_7": 608.2543426513672, |
| "learning_rate": 0.00022436236808900844, |
| "loss": 1146.7832, |
| "step": 6890 |
| }, |
| { |
| "ce_loss_10": 3.4679219722747803, |
| "ce_loss_13": 3.3943055748939512, |
| "ce_loss_2": 4.462708353996277, |
| "ce_loss_3": 4.191701900959015, |
| "ce_loss_7": 3.6564658761024473, |
| "epoch": 0.69, |
| "grad_norm": 576.0, |
| "kl_loss_10": 181.4543029785156, |
| "kl_loss_2": 2231.3488586425783, |
| "kl_loss_3": 1747.2512329101562, |
| "kl_loss_7": 621.3712341308594, |
| "learning_rate": 0.00022303996806694487, |
| "loss": 1171.5013, |
| "step": 6900 |
| }, |
| { |
| "ce_loss_10": 3.5484726190567017, |
| "ce_loss_13": 3.4742958664894106, |
| "ce_loss_2": 4.519419646263122, |
| "ce_loss_3": 4.2546670794487, |
| "ce_loss_7": 3.7258636236190794, |
| "epoch": 0.691, |
| "grad_norm": 628.0, |
| "kl_loss_10": 177.98818740844726, |
| "kl_loss_2": 2169.697412109375, |
| "kl_loss_3": 1700.1277648925782, |
| "kl_loss_7": 608.3069030761719, |
| "learning_rate": 0.00022172035702932823, |
| "loss": 1158.7983, |
| "step": 6910 |
| }, |
| { |
| "ce_loss_10": 3.5924888372421266, |
| "ce_loss_13": 3.52042818069458, |
| "ce_loss_2": 4.517103600502014, |
| "ce_loss_3": 4.2600155711174015, |
| "ce_loss_7": 3.7615610361099243, |
| "epoch": 0.692, |
| "grad_norm": 644.0, |
| "kl_loss_10": 178.67746124267578, |
| "kl_loss_2": 2075.089074707031, |
| "kl_loss_3": 1619.080419921875, |
| "kl_loss_7": 597.07578125, |
| "learning_rate": 0.00022040354826462666, |
| "loss": 1140.3766, |
| "step": 6920 |
| }, |
| { |
| "ce_loss_10": 3.5235054731369018, |
| "ce_loss_13": 3.4497315883636475, |
| "ce_loss_2": 4.493763208389282, |
| "ce_loss_3": 4.228443372249603, |
| "ce_loss_7": 3.696590280532837, |
| "epoch": 0.693, |
| "grad_norm": 608.0, |
| "kl_loss_10": 176.88443298339843, |
| "kl_loss_2": 2155.86865234375, |
| "kl_loss_3": 1688.133123779297, |
| "kl_loss_7": 594.834016418457, |
| "learning_rate": 0.0002190895550330899, |
| "loss": 1170.6351, |
| "step": 6930 |
| }, |
| { |
| "ce_loss_10": 3.4576660275459288, |
| "ce_loss_13": 3.3801838874816896, |
| "ce_loss_2": 4.465155124664307, |
| "ce_loss_3": 4.190037369728088, |
| "ce_loss_7": 3.644961953163147, |
| "epoch": 0.694, |
| "grad_norm": 596.0, |
| "kl_loss_10": 183.47678833007814, |
| "kl_loss_2": 2243.730157470703, |
| "kl_loss_3": 1750.4187255859374, |
| "kl_loss_7": 620.829443359375, |
| "learning_rate": 0.00021777839056661552, |
| "loss": 1165.1125, |
| "step": 6940 |
| }, |
| { |
| "ce_loss_10": 3.5390109062194823, |
| "ce_loss_13": 3.464726150035858, |
| "ce_loss_2": 4.509364485740662, |
| "ce_loss_3": 4.234912276268005, |
| "ce_loss_7": 3.7123560190200804, |
| "epoch": 0.695, |
| "grad_norm": 544.0, |
| "kl_loss_10": 176.9818588256836, |
| "kl_loss_2": 2161.626544189453, |
| "kl_loss_3": 1678.7994750976563, |
| "kl_loss_7": 599.7094299316407, |
| "learning_rate": 0.0002164700680686147, |
| "loss": 1138.0607, |
| "step": 6950 |
| }, |
| { |
| "ce_loss_10": 3.584149193763733, |
| "ce_loss_13": 3.509235203266144, |
| "ce_loss_2": 4.522939825057984, |
| "ce_loss_3": 4.255844712257385, |
| "ce_loss_7": 3.757488739490509, |
| "epoch": 0.696, |
| "grad_norm": 520.0, |
| "kl_loss_10": 178.28938369750978, |
| "kl_loss_2": 2107.5391052246096, |
| "kl_loss_3": 1637.0810913085938, |
| "kl_loss_7": 596.8087493896485, |
| "learning_rate": 0.0002151646007138806, |
| "loss": 1144.8846, |
| "step": 6960 |
| }, |
| { |
| "ce_loss_10": 3.463143539428711, |
| "ce_loss_13": 3.386814093589783, |
| "ce_loss_2": 4.468677043914795, |
| "ce_loss_3": 4.195722925662994, |
| "ce_loss_7": 3.644878602027893, |
| "epoch": 0.697, |
| "grad_norm": 592.0, |
| "kl_loss_10": 182.87069091796874, |
| "kl_loss_2": 2238.085968017578, |
| "kl_loss_3": 1753.8017456054688, |
| "kl_loss_7": 618.0010162353516, |
| "learning_rate": 0.00021386200164845526, |
| "loss": 1174.7895, |
| "step": 6970 |
| }, |
| { |
| "ce_loss_10": 3.646360158920288, |
| "ce_loss_13": 3.5726787090301513, |
| "ce_loss_2": 4.5610116720199585, |
| "ce_loss_3": 4.303556060791015, |
| "ce_loss_7": 3.814376199245453, |
| "epoch": 0.698, |
| "grad_norm": 564.0, |
| "kl_loss_10": 176.60812377929688, |
| "kl_loss_2": 2073.2183532714844, |
| "kl_loss_3": 1616.5314147949218, |
| "kl_loss_7": 592.3170806884766, |
| "learning_rate": 0.0002125622839894964, |
| "loss": 1126.8248, |
| "step": 6980 |
| }, |
| { |
| "ce_loss_10": 3.5844451546669007, |
| "ce_loss_13": 3.5105634808540342, |
| "ce_loss_2": 4.530939984321594, |
| "ce_loss_3": 4.263714623451233, |
| "ce_loss_7": 3.7546409368515015, |
| "epoch": 0.699, |
| "grad_norm": 580.0, |
| "kl_loss_10": 177.07121353149415, |
| "kl_loss_2": 2114.079455566406, |
| "kl_loss_3": 1646.3038818359375, |
| "kl_loss_7": 590.21640625, |
| "learning_rate": 0.00021126546082514663, |
| "loss": 1144.4324, |
| "step": 6990 |
| }, |
| { |
| "ce_loss_10": 3.6092105984687803, |
| "ce_loss_13": 3.533507966995239, |
| "ce_loss_2": 4.539715147018432, |
| "ce_loss_3": 4.274128103256226, |
| "ce_loss_7": 3.7783223032951354, |
| "epoch": 0.7, |
| "grad_norm": 576.0, |
| "kl_loss_10": 177.3388931274414, |
| "kl_loss_2": 2107.8220703125, |
| "kl_loss_3": 1636.2730224609375, |
| "kl_loss_7": 594.1880798339844, |
| "learning_rate": 0.00020997154521440098, |
| "loss": 1131.7685, |
| "step": 7000 |
| }, |
| { |
| "ce_loss_10": 3.5483237147331237, |
| "ce_loss_13": 3.476468551158905, |
| "ce_loss_2": 4.5004148244857785, |
| "ce_loss_3": 4.238211619853973, |
| "ce_loss_7": 3.722394573688507, |
| "epoch": 0.701, |
| "grad_norm": 556.0, |
| "kl_loss_10": 174.87986907958984, |
| "kl_loss_2": 2127.186975097656, |
| "kl_loss_3": 1661.8602966308595, |
| "kl_loss_7": 600.6610717773438, |
| "learning_rate": 0.0002086805501869749, |
| "loss": 1133.7422, |
| "step": 7010 |
| }, |
| { |
| "ce_loss_10": 3.5188135743141173, |
| "ce_loss_13": 3.441002869606018, |
| "ce_loss_2": 4.517698335647583, |
| "ce_loss_3": 4.247731244564056, |
| "ce_loss_7": 3.704049062728882, |
| "epoch": 0.702, |
| "grad_norm": 616.0, |
| "kl_loss_10": 182.97085342407226, |
| "kl_loss_2": 2238.2483459472655, |
| "kl_loss_3": 1746.861260986328, |
| "kl_loss_7": 621.9453765869141, |
| "learning_rate": 0.0002073924887431744, |
| "loss": 1180.4881, |
| "step": 7020 |
| }, |
| { |
| "ce_loss_10": 3.5274356603622437, |
| "ce_loss_13": 3.45092910528183, |
| "ce_loss_2": 4.4901411771774296, |
| "ce_loss_3": 4.230588483810425, |
| "ce_loss_7": 3.706618547439575, |
| "epoch": 0.703, |
| "grad_norm": 568.0, |
| "kl_loss_10": 179.11029281616212, |
| "kl_loss_2": 2178.3450439453127, |
| "kl_loss_3": 1711.4957885742188, |
| "kl_loss_7": 605.4426422119141, |
| "learning_rate": 0.00020610737385376348, |
| "loss": 1200.9115, |
| "step": 7030 |
| }, |
| { |
| "ce_loss_10": 3.5887810468673704, |
| "ce_loss_13": 3.5163929224014283, |
| "ce_loss_2": 4.518351888656616, |
| "ce_loss_3": 4.254893863201142, |
| "ce_loss_7": 3.7612039923667906, |
| "epoch": 0.704, |
| "grad_norm": 628.0, |
| "kl_loss_10": 176.6663619995117, |
| "kl_loss_2": 2075.716662597656, |
| "kl_loss_3": 1610.9020690917969, |
| "kl_loss_7": 588.8746612548828, |
| "learning_rate": 0.00020482521845983521, |
| "loss": 1151.7219, |
| "step": 7040 |
| }, |
| { |
| "ce_loss_10": 3.5866637587547303, |
| "ce_loss_13": 3.5072137475013734, |
| "ce_loss_2": 4.558261132240295, |
| "ce_loss_3": 4.291126704216003, |
| "ce_loss_7": 3.7625884056091308, |
| "epoch": 0.705, |
| "grad_norm": 600.0, |
| "kl_loss_10": 182.52303237915038, |
| "kl_loss_2": 2193.1544799804688, |
| "kl_loss_3": 1715.2766052246093, |
| "kl_loss_7": 612.0993133544922, |
| "learning_rate": 0.00020354603547267987, |
| "loss": 1187.2512, |
| "step": 7050 |
| }, |
| { |
| "ce_loss_10": 3.56976774930954, |
| "ce_loss_13": 3.488901746273041, |
| "ce_loss_2": 4.5605854988098145, |
| "ce_loss_3": 4.2862097263336185, |
| "ce_loss_7": 3.7558568716049194, |
| "epoch": 0.706, |
| "grad_norm": 504.0, |
| "kl_loss_10": 182.46872100830078, |
| "kl_loss_2": 2185.692938232422, |
| "kl_loss_3": 1703.4005493164063, |
| "kl_loss_7": 615.3342132568359, |
| "learning_rate": 0.00020226983777365604, |
| "loss": 1201.599, |
| "step": 7060 |
| }, |
| { |
| "ce_loss_10": 3.46960107088089, |
| "ce_loss_13": 3.394390141963959, |
| "ce_loss_2": 4.4708491563797, |
| "ce_loss_3": 4.21563994884491, |
| "ce_loss_7": 3.6478799104690554, |
| "epoch": 0.707, |
| "grad_norm": 548.0, |
| "kl_loss_10": 174.23039703369142, |
| "kl_loss_2": 2219.3698486328126, |
| "kl_loss_3": 1767.6679748535157, |
| "kl_loss_7": 596.5048126220703, |
| "learning_rate": 0.00020099663821406056, |
| "loss": 1167.8441, |
| "step": 7070 |
| }, |
| { |
| "ce_loss_10": 3.573564553260803, |
| "ce_loss_13": 3.4988652229309083, |
| "ce_loss_2": 4.518075895309448, |
| "ce_loss_3": 4.2526293873786924, |
| "ce_loss_7": 3.74619642496109, |
| "epoch": 0.708, |
| "grad_norm": 688.0, |
| "kl_loss_10": 173.7955307006836, |
| "kl_loss_2": 2112.61328125, |
| "kl_loss_3": 1644.760516357422, |
| "kl_loss_7": 588.589468383789, |
| "learning_rate": 0.00019972644961499853, |
| "loss": 1168.0168, |
| "step": 7080 |
| }, |
| { |
| "ce_loss_10": 3.5425114035606384, |
| "ce_loss_13": 3.4652504205703734, |
| "ce_loss_2": 4.536031889915466, |
| "ce_loss_3": 4.265958952903747, |
| "ce_loss_7": 3.7277685403823853, |
| "epoch": 0.709, |
| "grad_norm": 544.0, |
| "kl_loss_10": 181.94257354736328, |
| "kl_loss_2": 2208.387451171875, |
| "kl_loss_3": 1727.9980712890624, |
| "kl_loss_7": 619.6463317871094, |
| "learning_rate": 0.00019845928476725522, |
| "loss": 1173.2897, |
| "step": 7090 |
| }, |
| { |
| "ce_loss_10": 3.6211097598075868, |
| "ce_loss_13": 3.542751681804657, |
| "ce_loss_2": 4.576697874069214, |
| "ce_loss_3": 4.307754421234131, |
| "ce_loss_7": 3.794824481010437, |
| "epoch": 0.71, |
| "grad_norm": 524.0, |
| "kl_loss_10": 179.40447082519532, |
| "kl_loss_2": 2133.6560546875, |
| "kl_loss_3": 1661.1115417480469, |
| "kl_loss_7": 603.4232849121094, |
| "learning_rate": 0.00019719515643116677, |
| "loss": 1187.0576, |
| "step": 7100 |
| }, |
| { |
| "ce_loss_10": 3.563658607006073, |
| "ce_loss_13": 3.486394798755646, |
| "ce_loss_2": 4.523072552680969, |
| "ce_loss_3": 4.254948425292969, |
| "ce_loss_7": 3.7338495373725893, |
| "epoch": 0.711, |
| "grad_norm": 560.0, |
| "kl_loss_10": 177.84368362426758, |
| "kl_loss_2": 2144.635882568359, |
| "kl_loss_3": 1666.16875, |
| "kl_loss_7": 594.3132598876953, |
| "learning_rate": 0.0001959340773364911, |
| "loss": 1165.8826, |
| "step": 7110 |
| }, |
| { |
| "ce_loss_10": 3.5770322680473328, |
| "ce_loss_13": 3.5012174606323243, |
| "ce_loss_2": 4.550109481811523, |
| "ce_loss_3": 4.284217190742493, |
| "ce_loss_7": 3.7552335023880006, |
| "epoch": 0.712, |
| "grad_norm": 482.0, |
| "kl_loss_10": 179.49577865600585, |
| "kl_loss_2": 2181.1701049804688, |
| "kl_loss_3": 1700.0443542480468, |
| "kl_loss_7": 603.1331329345703, |
| "learning_rate": 0.0001946760601822809, |
| "loss": 1144.9554, |
| "step": 7120 |
| }, |
| { |
| "ce_loss_10": 3.6210792899131774, |
| "ce_loss_13": 3.549504554271698, |
| "ce_loss_2": 4.563032126426696, |
| "ce_loss_3": 4.2925217628479, |
| "ce_loss_7": 3.7989898562431335, |
| "epoch": 0.713, |
| "grad_norm": 592.0, |
| "kl_loss_10": 177.09535369873046, |
| "kl_loss_2": 2104.0981018066404, |
| "kl_loss_3": 1631.4184448242188, |
| "kl_loss_7": 592.4103118896485, |
| "learning_rate": 0.00019342111763675512, |
| "loss": 1123.9035, |
| "step": 7130 |
| }, |
| { |
| "ce_loss_10": 3.624540627002716, |
| "ce_loss_13": 3.5509743094444275, |
| "ce_loss_2": 4.5522850275039675, |
| "ce_loss_3": 4.289403009414673, |
| "ce_loss_7": 3.7917919158935547, |
| "epoch": 0.714, |
| "grad_norm": 588.0, |
| "kl_loss_10": 179.54557189941406, |
| "kl_loss_2": 2098.2009887695312, |
| "kl_loss_3": 1627.7805236816407, |
| "kl_loss_7": 597.2573303222656, |
| "learning_rate": 0.00019216926233717085, |
| "loss": 1127.0122, |
| "step": 7140 |
| }, |
| { |
| "ce_loss_10": 3.5141358375549316, |
| "ce_loss_13": 3.439559853076935, |
| "ce_loss_2": 4.534635162353515, |
| "ce_loss_3": 4.271865749359131, |
| "ce_loss_7": 3.6872041702270506, |
| "epoch": 0.715, |
| "grad_norm": 660.0, |
| "kl_loss_10": 176.31234970092774, |
| "kl_loss_2": 2255.184912109375, |
| "kl_loss_3": 1791.5307861328124, |
| "kl_loss_7": 594.7268737792969, |
| "learning_rate": 0.00019092050688969737, |
| "loss": 1192.3771, |
| "step": 7150 |
| }, |
| { |
| "ce_loss_10": 3.586177408695221, |
| "ce_loss_13": 3.5133618116378784, |
| "ce_loss_2": 4.527247905731201, |
| "ce_loss_3": 4.265925621986389, |
| "ce_loss_7": 3.7605576038360597, |
| "epoch": 0.716, |
| "grad_norm": 644.0, |
| "kl_loss_10": 177.39978713989257, |
| "kl_loss_2": 2138.382684326172, |
| "kl_loss_3": 1670.822119140625, |
| "kl_loss_7": 599.2600921630859, |
| "learning_rate": 0.00018967486386928817, |
| "loss": 1143.1982, |
| "step": 7160 |
| }, |
| { |
| "ce_loss_10": 3.4582155346870422, |
| "ce_loss_13": 3.3820405125617983, |
| "ce_loss_2": 4.456401991844177, |
| "ce_loss_3": 4.1904214262962345, |
| "ce_loss_7": 3.640235483646393, |
| "epoch": 0.717, |
| "grad_norm": 644.0, |
| "kl_loss_10": 181.15178756713868, |
| "kl_loss_2": 2234.275775146484, |
| "kl_loss_3": 1755.7729919433593, |
| "kl_loss_7": 621.9208374023438, |
| "learning_rate": 0.00018843234581955443, |
| "loss": 1211.3026, |
| "step": 7170 |
| }, |
| { |
| "ce_loss_10": 3.4746442079544066, |
| "ce_loss_13": 3.3969290494918822, |
| "ce_loss_2": 4.4550795435905455, |
| "ce_loss_3": 4.190334832668304, |
| "ce_loss_7": 3.6564103603363036, |
| "epoch": 0.718, |
| "grad_norm": 552.0, |
| "kl_loss_10": 182.11315155029297, |
| "kl_loss_2": 2189.7255920410157, |
| "kl_loss_3": 1717.2798217773438, |
| "kl_loss_7": 618.1327026367187, |
| "learning_rate": 0.00018719296525263924, |
| "loss": 1174.7828, |
| "step": 7180 |
| }, |
| { |
| "ce_loss_10": 3.571851980686188, |
| "ce_loss_13": 3.4972564935684205, |
| "ce_loss_2": 4.505244612693787, |
| "ce_loss_3": 4.243821203708649, |
| "ce_loss_7": 3.744515597820282, |
| "epoch": 0.719, |
| "grad_norm": 616.0, |
| "kl_loss_10": 176.35762710571288, |
| "kl_loss_2": 2085.3956665039063, |
| "kl_loss_3": 1620.6713073730468, |
| "kl_loss_7": 587.7710571289062, |
| "learning_rate": 0.0001859567346490913, |
| "loss": 1127.6644, |
| "step": 7190 |
| }, |
| { |
| "ce_loss_10": 3.5473140597343447, |
| "ce_loss_13": 3.469071900844574, |
| "ce_loss_2": 4.532921981811524, |
| "ce_loss_3": 4.260496711730957, |
| "ce_loss_7": 3.727588391304016, |
| "epoch": 0.72, |
| "grad_norm": 576.0, |
| "kl_loss_10": 181.04826431274415, |
| "kl_loss_2": 2198.079150390625, |
| "kl_loss_3": 1714.5421325683594, |
| "kl_loss_7": 608.8879028320313, |
| "learning_rate": 0.0001847236664577389, |
| "loss": 1142.0284, |
| "step": 7200 |
| }, |
| { |
| "ce_loss_10": 3.5739798665046694, |
| "ce_loss_13": 3.498915135860443, |
| "ce_loss_2": 4.512744069099426, |
| "ce_loss_3": 4.2453584432601925, |
| "ce_loss_7": 3.7430235743522644, |
| "epoch": 0.721, |
| "grad_norm": 560.0, |
| "kl_loss_10": 177.07028579711914, |
| "kl_loss_2": 2100.2286865234373, |
| "kl_loss_3": 1626.5753784179688, |
| "kl_loss_7": 587.8365112304688, |
| "learning_rate": 0.00018349377309556487, |
| "loss": 1123.1494, |
| "step": 7210 |
| }, |
| { |
| "ce_loss_10": 3.5153507471084593, |
| "ce_loss_13": 3.438252806663513, |
| "ce_loss_2": 4.529551863670349, |
| "ce_loss_3": 4.264591979980469, |
| "ce_loss_7": 3.6999141216278075, |
| "epoch": 0.722, |
| "grad_norm": 576.0, |
| "kl_loss_10": 181.94500274658202, |
| "kl_loss_2": 2259.3618774414062, |
| "kl_loss_3": 1782.3347534179688, |
| "kl_loss_7": 618.6104766845704, |
| "learning_rate": 0.00018226706694758193, |
| "loss": 1192.0385, |
| "step": 7220 |
| }, |
| { |
| "ce_loss_10": 3.589731001853943, |
| "ce_loss_13": 3.5162469148635864, |
| "ce_loss_2": 4.535777926445007, |
| "ce_loss_3": 4.275981712341308, |
| "ce_loss_7": 3.758218777179718, |
| "epoch": 0.723, |
| "grad_norm": 536.0, |
| "kl_loss_10": 176.7706611633301, |
| "kl_loss_2": 2136.6498046875, |
| "kl_loss_3": 1678.7979248046875, |
| "kl_loss_7": 600.344839477539, |
| "learning_rate": 0.0001810435603667075, |
| "loss": 1186.8562, |
| "step": 7230 |
| }, |
| { |
| "ce_loss_10": 3.4363317847251893, |
| "ce_loss_13": 3.3615066409111023, |
| "ce_loss_2": 4.428185939788818, |
| "ce_loss_3": 4.15840493440628, |
| "ce_loss_7": 3.6154449939727784, |
| "epoch": 0.724, |
| "grad_norm": 568.0, |
| "kl_loss_10": 175.6705749511719, |
| "kl_loss_2": 2191.837860107422, |
| "kl_loss_3": 1708.5897644042968, |
| "kl_loss_7": 600.9333648681641, |
| "learning_rate": 0.0001798232656736389, |
| "loss": 1187.3889, |
| "step": 7240 |
| }, |
| { |
| "ce_loss_10": 3.6142520189285277, |
| "ce_loss_13": 3.539129304885864, |
| "ce_loss_2": 4.541441655158996, |
| "ce_loss_3": 4.278818452358246, |
| "ce_loss_7": 3.7878984928131105, |
| "epoch": 0.725, |
| "grad_norm": 548.0, |
| "kl_loss_10": 176.2219985961914, |
| "kl_loss_2": 2082.9966674804687, |
| "kl_loss_3": 1618.1460876464844, |
| "kl_loss_7": 589.8986907958985, |
| "learning_rate": 0.0001786061951567303, |
| "loss": 1139.4487, |
| "step": 7250 |
| }, |
| { |
| "ce_loss_10": 3.528095841407776, |
| "ce_loss_13": 3.449831175804138, |
| "ce_loss_2": 4.499278616905213, |
| "ce_loss_3": 4.2353353023529055, |
| "ce_loss_7": 3.7135850310325624, |
| "epoch": 0.726, |
| "grad_norm": 564.0, |
| "kl_loss_10": 179.76034393310547, |
| "kl_loss_2": 2139.9875549316407, |
| "kl_loss_3": 1671.8400817871093, |
| "kl_loss_7": 601.5716674804687, |
| "learning_rate": 0.00017739236107186857, |
| "loss": 1166.0127, |
| "step": 7260 |
| }, |
| { |
| "ce_loss_10": 3.6185179114341737, |
| "ce_loss_13": 3.5442421674728393, |
| "ce_loss_2": 4.529335474967956, |
| "ce_loss_3": 4.268719971179962, |
| "ce_loss_7": 3.782019078731537, |
| "epoch": 0.727, |
| "grad_norm": 506.0, |
| "kl_loss_10": 174.4645896911621, |
| "kl_loss_2": 2059.7219299316407, |
| "kl_loss_3": 1594.1942993164062, |
| "kl_loss_7": 584.5985778808594, |
| "learning_rate": 0.00017618177564234904, |
| "loss": 1131.8243, |
| "step": 7270 |
| }, |
| { |
| "ce_loss_10": 3.5931476950645447, |
| "ce_loss_13": 3.5195810914039614, |
| "ce_loss_2": 4.50758855342865, |
| "ce_loss_3": 4.243484151363373, |
| "ce_loss_7": 3.7607154488563537, |
| "epoch": 0.728, |
| "grad_norm": 560.0, |
| "kl_loss_10": 172.751806640625, |
| "kl_loss_2": 2033.7148681640624, |
| "kl_loss_3": 1570.946112060547, |
| "kl_loss_7": 576.0696563720703, |
| "learning_rate": 0.00017497445105875377, |
| "loss": 1116.918, |
| "step": 7280 |
| }, |
| { |
| "ce_loss_10": 3.5072262048721314, |
| "ce_loss_13": 3.429379200935364, |
| "ce_loss_2": 4.499281525611877, |
| "ce_loss_3": 4.232627415657044, |
| "ce_loss_7": 3.695177102088928, |
| "epoch": 0.729, |
| "grad_norm": 552.0, |
| "kl_loss_10": 181.318611907959, |
| "kl_loss_2": 2210.19443359375, |
| "kl_loss_3": 1730.8836486816406, |
| "kl_loss_7": 613.139291381836, |
| "learning_rate": 0.000173770399478828, |
| "loss": 1168.2677, |
| "step": 7290 |
| }, |
| { |
| "ce_loss_10": 3.422491526603699, |
| "ce_loss_13": 3.347836971282959, |
| "ce_loss_2": 4.407784128189087, |
| "ce_loss_3": 4.131522953510284, |
| "ce_loss_7": 3.6013160228729246, |
| "epoch": 0.73, |
| "grad_norm": 560.0, |
| "kl_loss_10": 176.02440795898437, |
| "kl_loss_2": 2191.964385986328, |
| "kl_loss_3": 1698.5522827148438, |
| "kl_loss_7": 599.1639129638672, |
| "learning_rate": 0.0001725696330273575, |
| "loss": 1197.3154, |
| "step": 7300 |
| }, |
| { |
| "ce_loss_10": 3.611281132698059, |
| "ce_loss_13": 3.536842370033264, |
| "ce_loss_2": 4.535079216957092, |
| "ce_loss_3": 4.276083791255951, |
| "ce_loss_7": 3.782840621471405, |
| "epoch": 0.731, |
| "grad_norm": 608.0, |
| "kl_loss_10": 174.63313903808594, |
| "kl_loss_2": 2067.6716491699217, |
| "kl_loss_3": 1609.9422119140625, |
| "kl_loss_7": 585.3752410888671, |
| "learning_rate": 0.00017137216379604724, |
| "loss": 1120.0867, |
| "step": 7310 |
| }, |
| { |
| "ce_loss_10": 3.491976761817932, |
| "ce_loss_13": 3.4171910762786863, |
| "ce_loss_2": 4.477530479431152, |
| "ce_loss_3": 4.205724453926086, |
| "ce_loss_7": 3.667802131175995, |
| "epoch": 0.732, |
| "grad_norm": 588.0, |
| "kl_loss_10": 177.26437683105468, |
| "kl_loss_2": 2177.8865478515627, |
| "kl_loss_3": 1690.7372009277344, |
| "kl_loss_7": 596.9918426513672, |
| "learning_rate": 0.00017017800384339925, |
| "loss": 1158.1862, |
| "step": 7320 |
| }, |
| { |
| "ce_loss_10": 3.446759831905365, |
| "ce_loss_13": 3.3701040625572203, |
| "ce_loss_2": 4.4656068086624146, |
| "ce_loss_3": 4.189201056957245, |
| "ce_loss_7": 3.63215457201004, |
| "epoch": 0.733, |
| "grad_norm": 608.0, |
| "kl_loss_10": 179.6235023498535, |
| "kl_loss_2": 2245.6789367675783, |
| "kl_loss_3": 1758.8689514160155, |
| "kl_loss_7": 611.1446807861328, |
| "learning_rate": 0.00016898716519459073, |
| "loss": 1147.9725, |
| "step": 7330 |
| }, |
| { |
| "ce_loss_10": 3.5716673254966738, |
| "ce_loss_13": 3.4945391058921813, |
| "ce_loss_2": 4.573297142982483, |
| "ce_loss_3": 4.307599520683288, |
| "ce_loss_7": 3.7545908093452454, |
| "epoch": 0.734, |
| "grad_norm": 564.0, |
| "kl_loss_10": 182.98650054931642, |
| "kl_loss_2": 2208.1832763671873, |
| "kl_loss_3": 1733.5484619140625, |
| "kl_loss_7": 619.9228546142579, |
| "learning_rate": 0.00016779965984135375, |
| "loss": 1166.6811, |
| "step": 7340 |
| }, |
| { |
| "ce_loss_10": 3.478439450263977, |
| "ce_loss_13": 3.4015959978103636, |
| "ce_loss_2": 4.458614790439606, |
| "ce_loss_3": 4.194760942459107, |
| "ce_loss_7": 3.6524015784263613, |
| "epoch": 0.735, |
| "grad_norm": 612.0, |
| "kl_loss_10": 173.75391540527343, |
| "kl_loss_2": 2180.093780517578, |
| "kl_loss_3": 1698.9966857910156, |
| "kl_loss_7": 593.231803894043, |
| "learning_rate": 0.00016661549974185424, |
| "loss": 1159.2525, |
| "step": 7350 |
| }, |
| { |
| "ce_loss_10": 3.51222710609436, |
| "ce_loss_13": 3.4394211292266847, |
| "ce_loss_2": 4.489507508277893, |
| "ce_loss_3": 4.216231632232666, |
| "ce_loss_7": 3.6876235485076903, |
| "epoch": 0.736, |
| "grad_norm": 604.0, |
| "kl_loss_10": 179.0154716491699, |
| "kl_loss_2": 2169.4521362304686, |
| "kl_loss_3": 1690.815203857422, |
| "kl_loss_7": 602.3167053222656, |
| "learning_rate": 0.00016543469682057105, |
| "loss": 1143.9477, |
| "step": 7360 |
| }, |
| { |
| "ce_loss_10": 3.5415560364723206, |
| "ce_loss_13": 3.465597319602966, |
| "ce_loss_2": 4.508477449417114, |
| "ce_loss_3": 4.240069580078125, |
| "ce_loss_7": 3.7229697704315186, |
| "epoch": 0.737, |
| "grad_norm": 564.0, |
| "kl_loss_10": 181.52649993896483, |
| "kl_loss_2": 2153.332647705078, |
| "kl_loss_3": 1671.2495178222657, |
| "kl_loss_7": 610.9849151611328, |
| "learning_rate": 0.00016425726296817632, |
| "loss": 1153.5225, |
| "step": 7370 |
| }, |
| { |
| "ce_loss_10": 3.5615882515907287, |
| "ce_loss_13": 3.4901331782341005, |
| "ce_loss_2": 4.51513102054596, |
| "ce_loss_3": 4.248232364654541, |
| "ce_loss_7": 3.7354116439819336, |
| "epoch": 0.738, |
| "grad_norm": 544.0, |
| "kl_loss_10": 174.93305130004882, |
| "kl_loss_2": 2115.262805175781, |
| "kl_loss_3": 1640.2427490234375, |
| "kl_loss_7": 589.8659851074219, |
| "learning_rate": 0.00016308321004141607, |
| "loss": 1140.3666, |
| "step": 7380 |
| }, |
| { |
| "ce_loss_10": 3.518048846721649, |
| "ce_loss_13": 3.438374364376068, |
| "ce_loss_2": 4.499938416481018, |
| "ce_loss_3": 4.236223828792572, |
| "ce_loss_7": 3.695832920074463, |
| "epoch": 0.739, |
| "grad_norm": 548.0, |
| "kl_loss_10": 181.39317779541017, |
| "kl_loss_2": 2175.456677246094, |
| "kl_loss_3": 1701.57724609375, |
| "kl_loss_7": 609.4653137207031, |
| "learning_rate": 0.00016191254986299043, |
| "loss": 1150.5328, |
| "step": 7390 |
| }, |
| { |
| "ce_loss_10": 3.5613037228584288, |
| "ce_loss_13": 3.4887098908424377, |
| "ce_loss_2": 4.503171324729919, |
| "ce_loss_3": 4.245256781578064, |
| "ce_loss_7": 3.7236536622047423, |
| "epoch": 0.74, |
| "grad_norm": 680.0, |
| "kl_loss_10": 174.15130844116212, |
| "kl_loss_2": 2131.4445068359373, |
| "kl_loss_3": 1674.7688537597655, |
| "kl_loss_7": 591.5977661132813, |
| "learning_rate": 0.00016074529422143398, |
| "loss": 1164.3935, |
| "step": 7400 |
| }, |
| { |
| "ce_loss_10": 3.5027013421058655, |
| "ce_loss_13": 3.429375433921814, |
| "ce_loss_2": 4.4999552249908445, |
| "ce_loss_3": 4.231460630893707, |
| "ce_loss_7": 3.6830108165740967, |
| "epoch": 0.741, |
| "grad_norm": 736.0, |
| "kl_loss_10": 175.83671493530272, |
| "kl_loss_2": 2196.9726989746096, |
| "kl_loss_3": 1720.8603271484376, |
| "kl_loss_7": 599.7947540283203, |
| "learning_rate": 0.0001595814548709983, |
| "loss": 1180.4217, |
| "step": 7410 |
| }, |
| { |
| "ce_loss_10": 3.576788854598999, |
| "ce_loss_13": 3.498660683631897, |
| "ce_loss_2": 4.549895691871643, |
| "ce_loss_3": 4.287308168411255, |
| "ce_loss_7": 3.7568582773208616, |
| "epoch": 0.742, |
| "grad_norm": 556.0, |
| "kl_loss_10": 181.97546997070313, |
| "kl_loss_2": 2178.1142333984376, |
| "kl_loss_3": 1714.0122802734375, |
| "kl_loss_7": 610.0974487304687, |
| "learning_rate": 0.00015842104353153285, |
| "loss": 1164.6248, |
| "step": 7420 |
| }, |
| { |
| "ce_loss_10": 3.5943754434585573, |
| "ce_loss_13": 3.5180631637573243, |
| "ce_loss_2": 4.549882531166077, |
| "ce_loss_3": 4.288905811309815, |
| "ce_loss_7": 3.7695993304252626, |
| "epoch": 0.743, |
| "grad_norm": 548.0, |
| "kl_loss_10": 179.57282943725585, |
| "kl_loss_2": 2154.6554992675783, |
| "kl_loss_3": 1684.7554626464844, |
| "kl_loss_7": 607.9901489257812, |
| "learning_rate": 0.0001572640718883667, |
| "loss": 1181.4139, |
| "step": 7430 |
| }, |
| { |
| "ce_loss_10": 3.5268728017807005, |
| "ce_loss_13": 3.454422962665558, |
| "ce_loss_2": 4.4702025055885315, |
| "ce_loss_3": 4.211061191558838, |
| "ce_loss_7": 3.699466872215271, |
| "epoch": 0.744, |
| "grad_norm": 544.0, |
| "kl_loss_10": 173.9086715698242, |
| "kl_loss_2": 2107.2433349609373, |
| "kl_loss_3": 1643.049658203125, |
| "kl_loss_7": 587.0272888183594, |
| "learning_rate": 0.0001561105515921915, |
| "loss": 1164.3465, |
| "step": 7440 |
| }, |
| { |
| "ce_loss_10": 3.376306939125061, |
| "ce_loss_13": 3.3052693247795104, |
| "ce_loss_2": 4.399094796180725, |
| "ce_loss_3": 4.130729305744171, |
| "ce_loss_7": 3.5687609910964966, |
| "epoch": 0.745, |
| "grad_norm": 540.0, |
| "kl_loss_10": 174.5767349243164, |
| "kl_loss_2": 2266.337322998047, |
| "kl_loss_3": 1780.6517333984375, |
| "kl_loss_7": 616.0360229492187, |
| "learning_rate": 0.0001549604942589441, |
| "loss": 1163.9994, |
| "step": 7450 |
| }, |
| { |
| "ce_loss_10": 3.5653053879737855, |
| "ce_loss_13": 3.493623507022858, |
| "ce_loss_2": 4.478042149543763, |
| "ce_loss_3": 4.218795919418335, |
| "ce_loss_7": 3.731026256084442, |
| "epoch": 0.746, |
| "grad_norm": 580.0, |
| "kl_loss_10": 170.22484588623047, |
| "kl_loss_2": 2039.5635498046875, |
| "kl_loss_3": 1579.8051452636719, |
| "kl_loss_7": 567.1303924560547, |
| "learning_rate": 0.00015381391146968864, |
| "loss": 1115.5928, |
| "step": 7460 |
| }, |
| { |
| "ce_loss_10": 3.5406330108642576, |
| "ce_loss_13": 3.4665817737579347, |
| "ce_loss_2": 4.507574367523193, |
| "ce_loss_3": 4.2422141313552855, |
| "ce_loss_7": 3.711947810649872, |
| "epoch": 0.747, |
| "grad_norm": 576.0, |
| "kl_loss_10": 173.49912338256837, |
| "kl_loss_2": 2137.4108154296873, |
| "kl_loss_3": 1666.1288146972656, |
| "kl_loss_7": 586.0349029541015, |
| "learning_rate": 0.00015267081477050133, |
| "loss": 1153.2315, |
| "step": 7470 |
| }, |
| { |
| "ce_loss_10": 3.6397757053375246, |
| "ce_loss_13": 3.565910828113556, |
| "ce_loss_2": 4.558345174789428, |
| "ce_loss_3": 4.3014825820922855, |
| "ce_loss_7": 3.813869845867157, |
| "epoch": 0.748, |
| "grad_norm": 524.0, |
| "kl_loss_10": 179.69472961425782, |
| "kl_loss_2": 2081.7364990234373, |
| "kl_loss_3": 1619.6140869140625, |
| "kl_loss_7": 597.4245025634766, |
| "learning_rate": 0.00015153121567235335, |
| "loss": 1120.7676, |
| "step": 7480 |
| }, |
| { |
| "ce_loss_10": 3.529373216629028, |
| "ce_loss_13": 3.454616332054138, |
| "ce_loss_2": 4.507159662246704, |
| "ce_loss_3": 4.2369110703468325, |
| "ce_loss_7": 3.7003498554229735, |
| "epoch": 0.749, |
| "grad_norm": 596.0, |
| "kl_loss_10": 178.19662170410157, |
| "kl_loss_2": 2201.308489990234, |
| "kl_loss_3": 1718.668115234375, |
| "kl_loss_7": 600.4444549560546, |
| "learning_rate": 0.00015039512565099468, |
| "loss": 1130.487, |
| "step": 7490 |
| }, |
| { |
| "ce_loss_10": 3.59435373544693, |
| "ce_loss_13": 3.5217554926872254, |
| "ce_loss_2": 4.542114019393921, |
| "ce_loss_3": 4.2746872186660765, |
| "ce_loss_7": 3.768599247932434, |
| "epoch": 0.75, |
| "grad_norm": 532.0, |
| "kl_loss_10": 177.41806030273438, |
| "kl_loss_2": 2130.0947509765624, |
| "kl_loss_3": 1653.6429382324218, |
| "kl_loss_7": 598.9670806884766, |
| "learning_rate": 0.00014926255614683932, |
| "loss": 1188.0775, |
| "step": 7500 |
| }, |
| { |
| "ce_loss_10": 3.5343728065490723, |
| "ce_loss_13": 3.462270963191986, |
| "ce_loss_2": 4.491153955459595, |
| "ce_loss_3": 4.2244093179702755, |
| "ce_loss_7": 3.70688259601593, |
| "epoch": 0.751, |
| "grad_norm": 584.0, |
| "kl_loss_10": 175.2909957885742, |
| "kl_loss_2": 2134.823455810547, |
| "kl_loss_3": 1661.539990234375, |
| "kl_loss_7": 592.2256774902344, |
| "learning_rate": 0.0001481335185648498, |
| "loss": 1152.3602, |
| "step": 7510 |
| }, |
| { |
| "ce_loss_10": 3.5509208917617796, |
| "ce_loss_13": 3.4760316491127012, |
| "ce_loss_2": 4.4910846710205075, |
| "ce_loss_3": 4.236609256267547, |
| "ce_loss_7": 3.7286911368370057, |
| "epoch": 0.752, |
| "grad_norm": 560.0, |
| "kl_loss_10": 175.903653717041, |
| "kl_loss_2": 2132.4962768554688, |
| "kl_loss_3": 1669.2187805175781, |
| "kl_loss_7": 598.009976196289, |
| "learning_rate": 0.0001470080242744218, |
| "loss": 1135.5451, |
| "step": 7520 |
| }, |
| { |
| "ce_loss_10": 3.5404749631881716, |
| "ce_loss_13": 3.4668622732162477, |
| "ce_loss_2": 4.505393123626709, |
| "ce_loss_3": 4.248504590988159, |
| "ce_loss_7": 3.7097239255905152, |
| "epoch": 0.753, |
| "grad_norm": 600.0, |
| "kl_loss_10": 172.68473205566406, |
| "kl_loss_2": 2143.0695861816407, |
| "kl_loss_3": 1687.700439453125, |
| "kl_loss_7": 591.5756866455079, |
| "learning_rate": 0.0001458860846092705, |
| "loss": 1151.0906, |
| "step": 7530 |
| }, |
| { |
| "ce_loss_10": 3.578909718990326, |
| "ce_loss_13": 3.503495466709137, |
| "ce_loss_2": 4.502352619171143, |
| "ce_loss_3": 4.240141928195953, |
| "ce_loss_7": 3.750500977039337, |
| "epoch": 0.754, |
| "grad_norm": 604.0, |
| "kl_loss_10": 174.89483642578125, |
| "kl_loss_2": 2075.617956542969, |
| "kl_loss_3": 1612.4501708984376, |
| "kl_loss_7": 588.5097457885743, |
| "learning_rate": 0.00014476771086731566, |
| "loss": 1116.6235, |
| "step": 7540 |
| }, |
| { |
| "ce_loss_10": 3.688204324245453, |
| "ce_loss_13": 3.610430431365967, |
| "ce_loss_2": 4.621451306343078, |
| "ce_loss_3": 4.3530316829681395, |
| "ce_loss_7": 3.8562689661979674, |
| "epoch": 0.755, |
| "grad_norm": 572.0, |
| "kl_loss_10": 181.31634902954102, |
| "kl_loss_2": 2096.732080078125, |
| "kl_loss_3": 1625.2518310546875, |
| "kl_loss_7": 592.3898040771485, |
| "learning_rate": 0.00014365291431056872, |
| "loss": 1170.6359, |
| "step": 7550 |
| }, |
| { |
| "ce_loss_10": 3.513639771938324, |
| "ce_loss_13": 3.43876428604126, |
| "ce_loss_2": 4.494768452644348, |
| "ce_loss_3": 4.226865899562836, |
| "ce_loss_7": 3.6938853025436402, |
| "epoch": 0.756, |
| "grad_norm": 648.0, |
| "kl_loss_10": 182.01916885375977, |
| "kl_loss_2": 2211.1534912109373, |
| "kl_loss_3": 1723.5334899902343, |
| "kl_loss_7": 617.1691345214844, |
| "learning_rate": 0.00014254170616501827, |
| "loss": 1163.1255, |
| "step": 7560 |
| }, |
| { |
| "ce_loss_10": 3.4477534770965574, |
| "ce_loss_13": 3.3702123761177063, |
| "ce_loss_2": 4.465814185142517, |
| "ce_loss_3": 4.193384432792664, |
| "ce_loss_7": 3.6376350045204164, |
| "epoch": 0.757, |
| "grad_norm": 652.0, |
| "kl_loss_10": 181.91958312988282, |
| "kl_loss_2": 2272.9578369140627, |
| "kl_loss_3": 1780.6268127441406, |
| "kl_loss_7": 631.3283477783203, |
| "learning_rate": 0.0001414340976205183, |
| "loss": 1210.6553, |
| "step": 7570 |
| }, |
| { |
| "ce_loss_10": 3.4623551964759827, |
| "ce_loss_13": 3.386858320236206, |
| "ce_loss_2": 4.47217173576355, |
| "ce_loss_3": 4.196212124824524, |
| "ce_loss_7": 3.6454687833786013, |
| "epoch": 0.758, |
| "grad_norm": 652.0, |
| "kl_loss_10": 175.49118347167968, |
| "kl_loss_2": 2225.9182312011717, |
| "kl_loss_3": 1743.3149719238281, |
| "kl_loss_7": 604.4145355224609, |
| "learning_rate": 0.00014033009983067452, |
| "loss": 1165.3377, |
| "step": 7580 |
| }, |
| { |
| "ce_loss_10": 3.625165855884552, |
| "ce_loss_13": 3.553388500213623, |
| "ce_loss_2": 4.5477535963058475, |
| "ce_loss_3": 4.282978129386902, |
| "ce_loss_7": 3.790937566757202, |
| "epoch": 0.759, |
| "grad_norm": 540.0, |
| "kl_loss_10": 173.22186889648438, |
| "kl_loss_2": 2076.229632568359, |
| "kl_loss_3": 1605.808331298828, |
| "kl_loss_7": 578.3152954101563, |
| "learning_rate": 0.00013922972391273224, |
| "loss": 1124.4209, |
| "step": 7590 |
| }, |
| { |
| "ce_loss_10": 3.624656689167023, |
| "ce_loss_13": 3.5520288705825807, |
| "ce_loss_2": 4.581440138816833, |
| "ce_loss_3": 4.323860204219818, |
| "ce_loss_7": 3.799424684047699, |
| "epoch": 0.76, |
| "grad_norm": 604.0, |
| "kl_loss_10": 176.8631507873535, |
| "kl_loss_2": 2111.316943359375, |
| "kl_loss_3": 1657.235821533203, |
| "kl_loss_7": 591.5200927734375, |
| "learning_rate": 0.0001381329809474649, |
| "loss": 1146.3586, |
| "step": 7600 |
| }, |
| { |
| "ce_loss_10": 3.532001996040344, |
| "ce_loss_13": 3.4530585527420046, |
| "ce_loss_2": 4.544336724281311, |
| "ce_loss_3": 4.269702458381653, |
| "ce_loss_7": 3.7165846705436705, |
| "epoch": 0.761, |
| "grad_norm": 632.0, |
| "kl_loss_10": 181.38144760131837, |
| "kl_loss_2": 2247.282580566406, |
| "kl_loss_3": 1759.4795043945312, |
| "kl_loss_7": 616.4709213256835, |
| "learning_rate": 0.0001370398819790621, |
| "loss": 1186.2754, |
| "step": 7610 |
| }, |
| { |
| "ce_loss_10": 3.6697842359542845, |
| "ce_loss_13": 3.5929376244544984, |
| "ce_loss_2": 4.604382491111755, |
| "ce_loss_3": 4.336557102203369, |
| "ce_loss_7": 3.8371459245681763, |
| "epoch": 0.762, |
| "grad_norm": 612.0, |
| "kl_loss_10": 176.96341781616212, |
| "kl_loss_2": 2080.2568908691405, |
| "kl_loss_3": 1604.4097290039062, |
| "kl_loss_7": 582.9634078979492, |
| "learning_rate": 0.00013595043801501794, |
| "loss": 1108.4416, |
| "step": 7620 |
| }, |
| { |
| "ce_loss_10": 3.4644748091697695, |
| "ce_loss_13": 3.386727011203766, |
| "ce_loss_2": 4.503179264068604, |
| "ce_loss_3": 4.235963094234466, |
| "ce_loss_7": 3.650843346118927, |
| "epoch": 0.763, |
| "grad_norm": 664.0, |
| "kl_loss_10": 180.12555694580078, |
| "kl_loss_2": 2289.173895263672, |
| "kl_loss_3": 1815.476171875, |
| "kl_loss_7": 622.1785308837891, |
| "learning_rate": 0.00013486466002602133, |
| "loss": 1194.0496, |
| "step": 7630 |
| }, |
| { |
| "ce_loss_10": 3.577344560623169, |
| "ce_loss_13": 3.503310763835907, |
| "ce_loss_2": 4.512240695953369, |
| "ce_loss_3": 4.2521095991134645, |
| "ce_loss_7": 3.7476378440856934, |
| "epoch": 0.764, |
| "grad_norm": 556.0, |
| "kl_loss_10": 175.91430206298827, |
| "kl_loss_2": 2097.193493652344, |
| "kl_loss_3": 1632.0237731933594, |
| "kl_loss_7": 587.4400573730469, |
| "learning_rate": 0.00013378255894584462, |
| "loss": 1166.6646, |
| "step": 7640 |
| }, |
| { |
| "ce_loss_10": 3.5123034000396727, |
| "ce_loss_13": 3.433635425567627, |
| "ce_loss_2": 4.500353503227234, |
| "ce_loss_3": 4.2323464274406435, |
| "ce_loss_7": 3.6943077445030212, |
| "epoch": 0.765, |
| "grad_norm": 560.0, |
| "kl_loss_10": 181.23019485473634, |
| "kl_loss_2": 2206.8176452636717, |
| "kl_loss_3": 1726.0316467285156, |
| "kl_loss_7": 608.9391540527344, |
| "learning_rate": 0.0001327041456712334, |
| "loss": 1171.7679, |
| "step": 7650 |
| }, |
| { |
| "ce_loss_10": 3.55541011095047, |
| "ce_loss_13": 3.477762734889984, |
| "ce_loss_2": 4.513465809822082, |
| "ce_loss_3": 4.241680002212524, |
| "ce_loss_7": 3.7298651814460753, |
| "epoch": 0.766, |
| "grad_norm": 544.0, |
| "kl_loss_10": 180.71754302978516, |
| "kl_loss_2": 2169.4558044433593, |
| "kl_loss_3": 1686.6506469726562, |
| "kl_loss_7": 611.8586975097656, |
| "learning_rate": 0.00013162943106179747, |
| "loss": 1171.1721, |
| "step": 7660 |
| }, |
| { |
| "ce_loss_10": 3.5293742179870606, |
| "ce_loss_13": 3.456415057182312, |
| "ce_loss_2": 4.477925181388855, |
| "ce_loss_3": 4.21549437046051, |
| "ce_loss_7": 3.7069293022155763, |
| "epoch": 0.767, |
| "grad_norm": 588.0, |
| "kl_loss_10": 176.59339599609376, |
| "kl_loss_2": 2121.027722167969, |
| "kl_loss_3": 1652.0191345214844, |
| "kl_loss_7": 595.7734832763672, |
| "learning_rate": 0.00013055842593990132, |
| "loss": 1142.6258, |
| "step": 7670 |
| }, |
| { |
| "ce_loss_10": 3.4710524678230286, |
| "ce_loss_13": 3.399113714694977, |
| "ce_loss_2": 4.434552907943726, |
| "ce_loss_3": 4.163622748851776, |
| "ce_loss_7": 3.6474674701690675, |
| "epoch": 0.768, |
| "grad_norm": 540.0, |
| "kl_loss_10": 174.6668930053711, |
| "kl_loss_2": 2126.9706176757813, |
| "kl_loss_3": 1655.3524780273438, |
| "kl_loss_7": 590.6725830078125, |
| "learning_rate": 0.00012949114109055414, |
| "loss": 1168.1568, |
| "step": 7680 |
| }, |
| { |
| "ce_loss_10": 3.519006776809692, |
| "ce_loss_13": 3.4431997537612915, |
| "ce_loss_2": 4.4897076964378355, |
| "ce_loss_3": 4.226867043972016, |
| "ce_loss_7": 3.6994680523872376, |
| "epoch": 0.769, |
| "grad_norm": 584.0, |
| "kl_loss_10": 177.6523193359375, |
| "kl_loss_2": 2161.8881958007814, |
| "kl_loss_3": 1689.2801330566406, |
| "kl_loss_7": 607.9943145751953, |
| "learning_rate": 0.00012842758726130281, |
| "loss": 1170.0649, |
| "step": 7690 |
| }, |
| { |
| "ce_loss_10": 3.5628538727760315, |
| "ce_loss_13": 3.485966920852661, |
| "ce_loss_2": 4.55888135433197, |
| "ce_loss_3": 4.292485213279724, |
| "ce_loss_7": 3.7444689750671385, |
| "epoch": 0.77, |
| "grad_norm": 580.0, |
| "kl_loss_10": 179.38818130493163, |
| "kl_loss_2": 2210.8425048828126, |
| "kl_loss_3": 1733.9449951171875, |
| "kl_loss_7": 610.3931243896484, |
| "learning_rate": 0.00012736777516212267, |
| "loss": 1160.5377, |
| "step": 7700 |
| }, |
| { |
| "ce_loss_10": 3.557508039474487, |
| "ce_loss_13": 3.4799222111701966, |
| "ce_loss_2": 4.522961139678955, |
| "ce_loss_3": 4.253420984745025, |
| "ce_loss_7": 3.736038076877594, |
| "epoch": 0.771, |
| "grad_norm": 548.0, |
| "kl_loss_10": 181.55507125854493, |
| "kl_loss_2": 2158.760382080078, |
| "kl_loss_3": 1679.4241821289063, |
| "kl_loss_7": 612.0054595947265, |
| "learning_rate": 0.00012631171546530968, |
| "loss": 1138.0437, |
| "step": 7710 |
| }, |
| { |
| "ce_loss_10": 3.573415291309357, |
| "ce_loss_13": 3.4920427322387697, |
| "ce_loss_2": 4.5341356039047245, |
| "ce_loss_3": 4.2658631801605225, |
| "ce_loss_7": 3.752500355243683, |
| "epoch": 0.772, |
| "grad_norm": 568.0, |
| "kl_loss_10": 181.86062927246093, |
| "kl_loss_2": 2149.480059814453, |
| "kl_loss_3": 1673.6519470214844, |
| "kl_loss_7": 603.6808334350586, |
| "learning_rate": 0.00012525941880537307, |
| "loss": 1168.6842, |
| "step": 7720 |
| }, |
| { |
| "ce_loss_10": 3.6038484454154966, |
| "ce_loss_13": 3.528382158279419, |
| "ce_loss_2": 4.546409988403321, |
| "ce_loss_3": 4.28290638923645, |
| "ce_loss_7": 3.774893271923065, |
| "epoch": 0.773, |
| "grad_norm": 648.0, |
| "kl_loss_10": 176.13294677734376, |
| "kl_loss_2": 2093.1892028808593, |
| "kl_loss_3": 1628.6803955078126, |
| "kl_loss_7": 588.4353439331055, |
| "learning_rate": 0.00012421089577892869, |
| "loss": 1139.2071, |
| "step": 7730 |
| }, |
| { |
| "ce_loss_10": 3.555491530895233, |
| "ce_loss_13": 3.4761422514915465, |
| "ce_loss_2": 4.545820116996765, |
| "ce_loss_3": 4.266150867938995, |
| "ce_loss_7": 3.7351402401924134, |
| "epoch": 0.774, |
| "grad_norm": 668.0, |
| "kl_loss_10": 179.60176849365234, |
| "kl_loss_2": 2216.0720031738283, |
| "kl_loss_3": 1715.4457092285156, |
| "kl_loss_7": 609.4783508300782, |
| "learning_rate": 0.0001231661569445919, |
| "loss": 1172.4699, |
| "step": 7740 |
| }, |
| { |
| "ce_loss_10": 3.410160577297211, |
| "ce_loss_13": 3.3377888798713684, |
| "ce_loss_2": 4.401880002021789, |
| "ce_loss_3": 4.1333277225494385, |
| "ce_loss_7": 3.589407229423523, |
| "epoch": 0.775, |
| "grad_norm": 560.0, |
| "kl_loss_10": 176.03026962280273, |
| "kl_loss_2": 2206.2500732421877, |
| "kl_loss_3": 1718.6787414550781, |
| "kl_loss_7": 601.0735870361328, |
| "learning_rate": 0.00012212521282287093, |
| "loss": 1191.8578, |
| "step": 7750 |
| }, |
| { |
| "ce_loss_10": 3.5700145840644835, |
| "ce_loss_13": 3.493156003952026, |
| "ce_loss_2": 4.517843317985535, |
| "ce_loss_3": 4.254901158809662, |
| "ce_loss_7": 3.748375141620636, |
| "epoch": 0.776, |
| "grad_norm": 536.0, |
| "kl_loss_10": 180.4297233581543, |
| "kl_loss_2": 2117.2120727539063, |
| "kl_loss_3": 1651.861083984375, |
| "kl_loss_7": 599.0601287841797, |
| "learning_rate": 0.00012108807389606158, |
| "loss": 1171.4985, |
| "step": 7760 |
| }, |
| { |
| "ce_loss_10": 3.5604520797729493, |
| "ce_loss_13": 3.4879041433334352, |
| "ce_loss_2": 4.51350736618042, |
| "ce_loss_3": 4.255460405349732, |
| "ce_loss_7": 3.737185871601105, |
| "epoch": 0.777, |
| "grad_norm": 624.0, |
| "kl_loss_10": 173.82694396972656, |
| "kl_loss_2": 2134.6752502441404, |
| "kl_loss_3": 1670.9033142089843, |
| "kl_loss_7": 592.1039428710938, |
| "learning_rate": 0.00012005475060814159, |
| "loss": 1139.6322, |
| "step": 7770 |
| }, |
| { |
| "ce_loss_10": 3.5012547731399537, |
| "ce_loss_13": 3.4265154361724854, |
| "ce_loss_2": 4.493270707130432, |
| "ce_loss_3": 4.232757782936096, |
| "ce_loss_7": 3.676969814300537, |
| "epoch": 0.778, |
| "grad_norm": 592.0, |
| "kl_loss_10": 178.45665435791017, |
| "kl_loss_2": 2218.041455078125, |
| "kl_loss_3": 1749.4460815429688, |
| "kl_loss_7": 609.0319793701171, |
| "learning_rate": 0.00011902525336466464, |
| "loss": 1173.4994, |
| "step": 7780 |
| }, |
| { |
| "ce_loss_10": 3.487755036354065, |
| "ce_loss_13": 3.40771107673645, |
| "ce_loss_2": 4.503837430477143, |
| "ce_loss_3": 4.227473521232605, |
| "ce_loss_7": 3.6715272665023804, |
| "epoch": 0.779, |
| "grad_norm": 556.0, |
| "kl_loss_10": 182.97367095947266, |
| "kl_loss_2": 2253.76220703125, |
| "kl_loss_3": 1756.6077819824218, |
| "kl_loss_7": 618.8783203125, |
| "learning_rate": 0.00011799959253265668, |
| "loss": 1168.3436, |
| "step": 7790 |
| }, |
| { |
| "ce_loss_10": 3.548134469985962, |
| "ce_loss_13": 3.4717200636863708, |
| "ce_loss_2": 4.531218719482422, |
| "ce_loss_3": 4.259068071842194, |
| "ce_loss_7": 3.725462853908539, |
| "epoch": 0.78, |
| "grad_norm": 588.0, |
| "kl_loss_10": 179.5894790649414, |
| "kl_loss_2": 2197.6992370605467, |
| "kl_loss_3": 1714.0868286132813, |
| "kl_loss_7": 606.9559936523438, |
| "learning_rate": 0.00011697777844051105, |
| "loss": 1168.1586, |
| "step": 7800 |
| }, |
| { |
| "ce_loss_10": 3.5325579047203064, |
| "ce_loss_13": 3.4524773359298706, |
| "ce_loss_2": 4.540277624130249, |
| "ce_loss_3": 4.275496506690979, |
| "ce_loss_7": 3.709948194026947, |
| "epoch": 0.781, |
| "grad_norm": 600.0, |
| "kl_loss_10": 182.08444366455078, |
| "kl_loss_2": 2253.191998291016, |
| "kl_loss_3": 1783.90888671875, |
| "kl_loss_7": 609.2252471923828, |
| "learning_rate": 0.00011595982137788402, |
| "loss": 1182.0791, |
| "step": 7810 |
| }, |
| { |
| "ce_loss_10": 3.507384693622589, |
| "ce_loss_13": 3.433124232292175, |
| "ce_loss_2": 4.452573490142822, |
| "ce_loss_3": 4.191938650608063, |
| "ce_loss_7": 3.6804782152175903, |
| "epoch": 0.782, |
| "grad_norm": 552.0, |
| "kl_loss_10": 174.53733520507814, |
| "kl_loss_2": 2103.1436462402344, |
| "kl_loss_3": 1636.1021728515625, |
| "kl_loss_7": 594.1032348632813, |
| "learning_rate": 0.00011494573159559212, |
| "loss": 1150.1953, |
| "step": 7820 |
| }, |
| { |
| "ce_loss_10": 3.495812237262726, |
| "ce_loss_13": 3.4193639039993284, |
| "ce_loss_2": 4.4669132947921755, |
| "ce_loss_3": 4.2113652467727665, |
| "ce_loss_7": 3.67316712141037, |
| "epoch": 0.783, |
| "grad_norm": 572.0, |
| "kl_loss_10": 178.65593719482422, |
| "kl_loss_2": 2173.239221191406, |
| "kl_loss_3": 1708.3340942382813, |
| "kl_loss_7": 603.083627319336, |
| "learning_rate": 0.00011393551930550828, |
| "loss": 1187.9246, |
| "step": 7830 |
| }, |
| { |
| "ce_loss_10": 3.6368354201316833, |
| "ce_loss_13": 3.559674692153931, |
| "ce_loss_2": 4.571843910217285, |
| "ce_loss_3": 4.303619515895844, |
| "ce_loss_7": 3.8069366455078124, |
| "epoch": 0.784, |
| "grad_norm": 588.0, |
| "kl_loss_10": 179.06233749389648, |
| "kl_loss_2": 2120.6896240234373, |
| "kl_loss_3": 1638.9197570800782, |
| "kl_loss_7": 595.7463287353515, |
| "learning_rate": 0.00011292919468045875, |
| "loss": 1145.6585, |
| "step": 7840 |
| }, |
| { |
| "ce_loss_10": 3.584019410610199, |
| "ce_loss_13": 3.5086099743843078, |
| "ce_loss_2": 4.53436963558197, |
| "ce_loss_3": 4.271309959888458, |
| "ce_loss_7": 3.7602915167808533, |
| "epoch": 0.785, |
| "grad_norm": 528.0, |
| "kl_loss_10": 177.57644500732422, |
| "kl_loss_2": 2126.9706481933595, |
| "kl_loss_3": 1654.4735168457032, |
| "kl_loss_7": 600.0492980957031, |
| "learning_rate": 0.00011192676785412154, |
| "loss": 1144.0532, |
| "step": 7850 |
| }, |
| { |
| "ce_loss_10": 3.522589087486267, |
| "ce_loss_13": 3.4456050395965576, |
| "ce_loss_2": 4.529689431190491, |
| "ce_loss_3": 4.258461606502533, |
| "ce_loss_7": 3.704596519470215, |
| "epoch": 0.786, |
| "grad_norm": 624.0, |
| "kl_loss_10": 178.9210517883301, |
| "kl_loss_2": 2216.430499267578, |
| "kl_loss_3": 1733.1487976074218, |
| "kl_loss_7": 602.0237121582031, |
| "learning_rate": 0.00011092824892092374, |
| "loss": 1161.7434, |
| "step": 7860 |
| }, |
| { |
| "ce_loss_10": 3.454429876804352, |
| "ce_loss_13": 3.376889729499817, |
| "ce_loss_2": 4.473304414749146, |
| "ce_loss_3": 4.201044774055481, |
| "ce_loss_7": 3.6403449535369874, |
| "epoch": 0.787, |
| "grad_norm": 544.0, |
| "kl_loss_10": 178.50691452026368, |
| "kl_loss_2": 2241.591131591797, |
| "kl_loss_3": 1762.5004089355468, |
| "kl_loss_7": 614.9870758056641, |
| "learning_rate": 0.0001099336479359398, |
| "loss": 1163.7643, |
| "step": 7870 |
| }, |
| { |
| "ce_loss_10": 3.5764689803123475, |
| "ce_loss_13": 3.507636034488678, |
| "ce_loss_2": 4.512799096107483, |
| "ce_loss_3": 4.25046044588089, |
| "ce_loss_7": 3.746009385585785, |
| "epoch": 0.788, |
| "grad_norm": 564.0, |
| "kl_loss_10": 175.3071716308594, |
| "kl_loss_2": 2102.777294921875, |
| "kl_loss_3": 1634.8632263183595, |
| "kl_loss_7": 592.2164337158204, |
| "learning_rate": 0.00010894297491479043, |
| "loss": 1142.6834, |
| "step": 7880 |
| }, |
| { |
| "ce_loss_10": 3.575552821159363, |
| "ce_loss_13": 3.5023175954818724, |
| "ce_loss_2": 4.539198517799377, |
| "ce_loss_3": 4.279193782806397, |
| "ce_loss_7": 3.750091075897217, |
| "epoch": 0.789, |
| "grad_norm": 576.0, |
| "kl_loss_10": 176.76428680419923, |
| "kl_loss_2": 2146.3808166503904, |
| "kl_loss_3": 1681.0488159179688, |
| "kl_loss_7": 595.56142578125, |
| "learning_rate": 0.00010795623983354214, |
| "loss": 1139.8293, |
| "step": 7890 |
| }, |
| { |
| "ce_loss_10": 3.4591768264770506, |
| "ce_loss_13": 3.3825891733169557, |
| "ce_loss_2": 4.4514943838119505, |
| "ce_loss_3": 4.181638932228088, |
| "ce_loss_7": 3.643087315559387, |
| "epoch": 0.79, |
| "grad_norm": 580.0, |
| "kl_loss_10": 182.44262008666993, |
| "kl_loss_2": 2230.9637817382813, |
| "kl_loss_3": 1740.3924072265625, |
| "kl_loss_7": 621.2922943115234, |
| "learning_rate": 0.00010697345262860636, |
| "loss": 1171.6089, |
| "step": 7900 |
| }, |
| { |
| "ce_loss_10": 3.600342130661011, |
| "ce_loss_13": 3.5264546155929564, |
| "ce_loss_2": 4.545495390892029, |
| "ce_loss_3": 4.278535521030426, |
| "ce_loss_7": 3.771434724330902, |
| "epoch": 0.791, |
| "grad_norm": 736.0, |
| "kl_loss_10": 177.22287063598634, |
| "kl_loss_2": 2132.298791503906, |
| "kl_loss_3": 1650.1429077148437, |
| "kl_loss_7": 593.3167419433594, |
| "learning_rate": 0.00010599462319663906, |
| "loss": 1136.3734, |
| "step": 7910 |
| }, |
| { |
| "ce_loss_10": 3.5746383547782896, |
| "ce_loss_13": 3.4998196601867675, |
| "ce_loss_2": 4.493499338626862, |
| "ce_loss_3": 4.230222713947296, |
| "ce_loss_7": 3.7425215244293213, |
| "epoch": 0.792, |
| "grad_norm": 520.0, |
| "kl_loss_10": 174.49715042114258, |
| "kl_loss_2": 2051.4084716796874, |
| "kl_loss_3": 1592.7509643554688, |
| "kl_loss_7": 582.2202606201172, |
| "learning_rate": 0.00010501976139444191, |
| "loss": 1118.4902, |
| "step": 7920 |
| }, |
| { |
| "ce_loss_10": 3.6047690868377686, |
| "ce_loss_13": 3.5289911150932314, |
| "ce_loss_2": 4.545255088806153, |
| "ce_loss_3": 4.2847788572311405, |
| "ce_loss_7": 3.7748185992240906, |
| "epoch": 0.793, |
| "grad_norm": 604.0, |
| "kl_loss_10": 176.07794952392578, |
| "kl_loss_2": 2104.0453186035156, |
| "kl_loss_3": 1645.7491271972656, |
| "kl_loss_7": 587.9952331542969, |
| "learning_rate": 0.0001040488770388625, |
| "loss": 1154.1295, |
| "step": 7930 |
| }, |
| { |
| "ce_loss_10": 3.548888790607452, |
| "ce_loss_13": 3.4759244203567503, |
| "ce_loss_2": 4.515872287750244, |
| "ce_loss_3": 4.250580382347107, |
| "ce_loss_7": 3.7205033540725707, |
| "epoch": 0.794, |
| "grad_norm": 680.0, |
| "kl_loss_10": 177.23135299682616, |
| "kl_loss_2": 2173.47548828125, |
| "kl_loss_3": 1700.3454467773438, |
| "kl_loss_7": 599.390249633789, |
| "learning_rate": 0.00010308197990669538, |
| "loss": 1149.7575, |
| "step": 7940 |
| }, |
| { |
| "ce_loss_10": 3.664888024330139, |
| "ce_loss_13": 3.5850353479385375, |
| "ce_loss_2": 4.610143923759461, |
| "ce_loss_3": 4.346996653079986, |
| "ce_loss_7": 3.83613098859787, |
| "epoch": 0.795, |
| "grad_norm": 540.0, |
| "kl_loss_10": 179.47011337280273, |
| "kl_loss_2": 2129.81064453125, |
| "kl_loss_3": 1662.4896118164063, |
| "kl_loss_7": 599.4926940917969, |
| "learning_rate": 0.0001021190797345839, |
| "loss": 1140.0146, |
| "step": 7950 |
| }, |
| { |
| "ce_loss_10": 3.3896429777145385, |
| "ce_loss_13": 3.3098750829696657, |
| "ce_loss_2": 4.413335740566254, |
| "ce_loss_3": 4.134794509410858, |
| "ce_loss_7": 3.57609201669693, |
| "epoch": 0.796, |
| "grad_norm": 580.0, |
| "kl_loss_10": 185.28996887207032, |
| "kl_loss_2": 2269.882763671875, |
| "kl_loss_3": 1772.13056640625, |
| "kl_loss_7": 628.5536560058594, |
| "learning_rate": 0.00010116018621892236, |
| "loss": 1175.2182, |
| "step": 7960 |
| }, |
| { |
| "ce_loss_10": 3.603187918663025, |
| "ce_loss_13": 3.5232182860374452, |
| "ce_loss_2": 4.567694234848022, |
| "ce_loss_3": 4.309406304359436, |
| "ce_loss_7": 3.779837393760681, |
| "epoch": 0.797, |
| "grad_norm": 608.0, |
| "kl_loss_10": 186.1454734802246, |
| "kl_loss_2": 2165.1848999023437, |
| "kl_loss_3": 1705.360009765625, |
| "kl_loss_7": 616.0535675048828, |
| "learning_rate": 0.00010020530901575753, |
| "loss": 1136.0533, |
| "step": 7970 |
| }, |
| { |
| "ce_loss_10": 3.625728499889374, |
| "ce_loss_13": 3.5490816116333006, |
| "ce_loss_2": 4.573475480079651, |
| "ce_loss_3": 4.304804050922394, |
| "ce_loss_7": 3.799483132362366, |
| "epoch": 0.798, |
| "grad_norm": 520.0, |
| "kl_loss_10": 180.7791946411133, |
| "kl_loss_2": 2134.7111938476564, |
| "kl_loss_3": 1658.3630126953126, |
| "kl_loss_7": 601.8699676513672, |
| "learning_rate": 9.925445774069231e-05, |
| "loss": 1126.8894, |
| "step": 7980 |
| }, |
| { |
| "ce_loss_10": 3.5760633826255797, |
| "ce_loss_13": 3.500509262084961, |
| "ce_loss_2": 4.527716112136841, |
| "ce_loss_3": 4.2627903580665585, |
| "ce_loss_7": 3.754010498523712, |
| "epoch": 0.799, |
| "grad_norm": 728.0, |
| "kl_loss_10": 177.96156311035156, |
| "kl_loss_2": 2117.894659423828, |
| "kl_loss_3": 1646.0127319335938, |
| "kl_loss_7": 595.1834564208984, |
| "learning_rate": 9.830764196878872e-05, |
| "loss": 1125.6953, |
| "step": 7990 |
| }, |
| { |
| "ce_loss_10": 3.5167272210121157, |
| "ce_loss_13": 3.443312036991119, |
| "ce_loss_2": 4.485463619232178, |
| "ce_loss_3": 4.227659916877746, |
| "ce_loss_7": 3.6942790031433104, |
| "epoch": 0.8, |
| "grad_norm": 480.0, |
| "kl_loss_10": 175.60029678344728, |
| "kl_loss_2": 2190.2330810546873, |
| "kl_loss_3": 1721.7053161621093, |
| "kl_loss_7": 603.3801788330078, |
| "learning_rate": 9.736487123447069e-05, |
| "loss": 1159.6166, |
| "step": 8000 |
| }, |
| { |
| "ce_loss_10": 3.4639697551727293, |
| "ce_loss_13": 3.389075720310211, |
| "ce_loss_2": 4.485336112976074, |
| "ce_loss_3": 4.228267467021942, |
| "ce_loss_7": 3.6402989268302917, |
| "epoch": 0.801, |
| "grad_norm": 600.0, |
| "kl_loss_10": 179.87705307006837, |
| "kl_loss_2": 2294.8387084960937, |
| "kl_loss_3": 1823.8187927246095, |
| "kl_loss_7": 608.9864349365234, |
| "learning_rate": 9.642615503142926e-05, |
| "loss": 1194.0703, |
| "step": 8010 |
| }, |
| { |
| "ce_loss_10": 3.5347692489624025, |
| "ce_loss_13": 3.4572302103042603, |
| "ce_loss_2": 4.520514702796936, |
| "ce_loss_3": 4.254623317718506, |
| "ce_loss_7": 3.7080873131752012, |
| "epoch": 0.802, |
| "grad_norm": 572.0, |
| "kl_loss_10": 175.5455764770508, |
| "kl_loss_2": 2196.378839111328, |
| "kl_loss_3": 1715.636651611328, |
| "kl_loss_7": 596.8724548339844, |
| "learning_rate": 9.549150281252633e-05, |
| "loss": 1151.6992, |
| "step": 8020 |
| }, |
| { |
| "ce_loss_10": 3.563262867927551, |
| "ce_loss_13": 3.486225724220276, |
| "ce_loss_2": 4.527439785003662, |
| "ce_loss_3": 4.256748235225677, |
| "ce_loss_7": 3.7390462875366213, |
| "epoch": 0.803, |
| "grad_norm": 520.0, |
| "kl_loss_10": 179.0418388366699, |
| "kl_loss_2": 2160.1055908203125, |
| "kl_loss_3": 1681.3586303710938, |
| "kl_loss_7": 596.7608947753906, |
| "learning_rate": 9.4560923989699e-05, |
| "loss": 1169.5601, |
| "step": 8030 |
| }, |
| { |
| "ce_loss_10": 3.549173581600189, |
| "ce_loss_13": 3.4747613072395325, |
| "ce_loss_2": 4.515510749816895, |
| "ce_loss_3": 4.245619797706604, |
| "ce_loss_7": 3.7281826019287108, |
| "epoch": 0.804, |
| "grad_norm": 552.0, |
| "kl_loss_10": 177.9036865234375, |
| "kl_loss_2": 2149.996447753906, |
| "kl_loss_3": 1673.0265747070312, |
| "kl_loss_7": 598.6687103271485, |
| "learning_rate": 9.363442793386607e-05, |
| "loss": 1174.7094, |
| "step": 8040 |
| }, |
| { |
| "ce_loss_10": 3.5321462750434875, |
| "ce_loss_13": 3.453168177604675, |
| "ce_loss_2": 4.5332019329071045, |
| "ce_loss_3": 4.265519142150879, |
| "ce_loss_7": 3.7162665724754333, |
| "epoch": 0.805, |
| "grad_norm": 592.0, |
| "kl_loss_10": 181.04829177856445, |
| "kl_loss_2": 2218.4782836914064, |
| "kl_loss_3": 1732.6318908691405, |
| "kl_loss_7": 617.0633758544922, |
| "learning_rate": 9.271202397483213e-05, |
| "loss": 1149.8916, |
| "step": 8050 |
| }, |
| { |
| "ce_loss_10": 3.547755253314972, |
| "ce_loss_13": 3.474861478805542, |
| "ce_loss_2": 4.498465514183044, |
| "ce_loss_3": 4.235939025878906, |
| "ce_loss_7": 3.7163867115974427, |
| "epoch": 0.806, |
| "grad_norm": 572.0, |
| "kl_loss_10": 175.92396697998046, |
| "kl_loss_2": 2136.2159729003906, |
| "kl_loss_3": 1668.2355712890626, |
| "kl_loss_7": 590.7317443847656, |
| "learning_rate": 9.179372140119524e-05, |
| "loss": 1168.4604, |
| "step": 8060 |
| }, |
| { |
| "ce_loss_10": 3.494523513317108, |
| "ce_loss_13": 3.4206513285636904, |
| "ce_loss_2": 4.459244108200073, |
| "ce_loss_3": 4.188582479953766, |
| "ce_loss_7": 3.6680249691009523, |
| "epoch": 0.807, |
| "grad_norm": 564.0, |
| "kl_loss_10": 176.53147811889647, |
| "kl_loss_2": 2154.551867675781, |
| "kl_loss_3": 1677.4455200195312, |
| "kl_loss_7": 596.7226654052735, |
| "learning_rate": 9.087952946025175e-05, |
| "loss": 1164.4886, |
| "step": 8070 |
| }, |
| { |
| "ce_loss_10": 3.6058158397674562, |
| "ce_loss_13": 3.5339553594589233, |
| "ce_loss_2": 4.533062171936035, |
| "ce_loss_3": 4.26933354139328, |
| "ce_loss_7": 3.768651068210602, |
| "epoch": 0.808, |
| "grad_norm": 592.0, |
| "kl_loss_10": 173.78207092285157, |
| "kl_loss_2": 2082.071905517578, |
| "kl_loss_3": 1614.2910522460938, |
| "kl_loss_7": 576.9482543945312, |
| "learning_rate": 8.996945735790446e-05, |
| "loss": 1146.8303, |
| "step": 8080 |
| }, |
| { |
| "ce_loss_10": 3.50276095867157, |
| "ce_loss_13": 3.428582501411438, |
| "ce_loss_2": 4.457551169395447, |
| "ce_loss_3": 4.193507122993469, |
| "ce_loss_7": 3.672742247581482, |
| "epoch": 0.809, |
| "grad_norm": 608.0, |
| "kl_loss_10": 175.95007400512696, |
| "kl_loss_2": 2152.8104553222656, |
| "kl_loss_3": 1678.2301330566406, |
| "kl_loss_7": 594.3645935058594, |
| "learning_rate": 8.906351425856951e-05, |
| "loss": 1158.1713, |
| "step": 8090 |
| }, |
| { |
| "ce_loss_10": 3.4856011509895324, |
| "ce_loss_13": 3.412043738365173, |
| "ce_loss_2": 4.477128624916077, |
| "ce_loss_3": 4.2102068901062015, |
| "ce_loss_7": 3.663009238243103, |
| "epoch": 0.81, |
| "grad_norm": 588.0, |
| "kl_loss_10": 178.9893539428711, |
| "kl_loss_2": 2217.222705078125, |
| "kl_loss_3": 1734.5539611816407, |
| "kl_loss_7": 606.3814819335937, |
| "learning_rate": 8.816170928508365e-05, |
| "loss": 1174.1137, |
| "step": 8100 |
| }, |
| { |
| "ce_loss_10": 3.456532561779022, |
| "ce_loss_13": 3.380963850021362, |
| "ce_loss_2": 4.470215916633606, |
| "ce_loss_3": 4.203172373771667, |
| "ce_loss_7": 3.637952506542206, |
| "epoch": 0.811, |
| "grad_norm": 564.0, |
| "kl_loss_10": 181.76464233398437, |
| "kl_loss_2": 2271.6646240234377, |
| "kl_loss_3": 1789.1731323242188, |
| "kl_loss_7": 618.0579010009766, |
| "learning_rate": 8.7264051518613e-05, |
| "loss": 1188.6756, |
| "step": 8110 |
| }, |
| { |
| "ce_loss_10": 3.5451604604721068, |
| "ce_loss_13": 3.4716222047805787, |
| "ce_loss_2": 4.482484936714172, |
| "ce_loss_3": 4.218600440025329, |
| "ce_loss_7": 3.7114962100982667, |
| "epoch": 0.812, |
| "grad_norm": 572.0, |
| "kl_loss_10": 174.23039016723632, |
| "kl_loss_2": 2104.2210205078127, |
| "kl_loss_3": 1635.3122253417969, |
| "kl_loss_7": 586.6013153076171, |
| "learning_rate": 8.637054999856148e-05, |
| "loss": 1140.2461, |
| "step": 8120 |
| }, |
| { |
| "ce_loss_10": 3.5334657073020934, |
| "ce_loss_13": 3.4553168177604676, |
| "ce_loss_2": 4.5001609325408936, |
| "ce_loss_3": 4.233760714530945, |
| "ce_loss_7": 3.71214896440506, |
| "epoch": 0.813, |
| "grad_norm": 572.0, |
| "kl_loss_10": 179.94693908691406, |
| "kl_loss_2": 2168.638543701172, |
| "kl_loss_3": 1690.7197631835938, |
| "kl_loss_7": 602.7170349121094, |
| "learning_rate": 8.548121372247918e-05, |
| "loss": 1176.2271, |
| "step": 8130 |
| }, |
| { |
| "ce_loss_10": 3.6031864166259764, |
| "ce_loss_13": 3.5284059882164, |
| "ce_loss_2": 4.540188145637512, |
| "ce_loss_3": 4.276613438129425, |
| "ce_loss_7": 3.7673808455467226, |
| "epoch": 0.814, |
| "grad_norm": 576.0, |
| "kl_loss_10": 175.132576751709, |
| "kl_loss_2": 2116.421954345703, |
| "kl_loss_3": 1652.4540222167968, |
| "kl_loss_7": 584.193586730957, |
| "learning_rate": 8.459605164597267e-05, |
| "loss": 1140.1102, |
| "step": 8140 |
| }, |
| { |
| "ce_loss_10": 3.4851497173309327, |
| "ce_loss_13": 3.4121100902557373, |
| "ce_loss_2": 4.4567595481872555, |
| "ce_loss_3": 4.188820004463196, |
| "ce_loss_7": 3.6609971284866334, |
| "epoch": 0.815, |
| "grad_norm": 516.0, |
| "kl_loss_10": 176.46202392578124, |
| "kl_loss_2": 2164.97646484375, |
| "kl_loss_3": 1690.654833984375, |
| "kl_loss_7": 595.82392578125, |
| "learning_rate": 8.371507268261436e-05, |
| "loss": 1160.1355, |
| "step": 8150 |
| }, |
| { |
| "ce_loss_10": 3.5612674951553345, |
| "ce_loss_13": 3.486202526092529, |
| "ce_loss_2": 4.5333171606063845, |
| "ce_loss_3": 4.264222574234009, |
| "ce_loss_7": 3.7375367999076845, |
| "epoch": 0.816, |
| "grad_norm": 536.0, |
| "kl_loss_10": 178.54783096313477, |
| "kl_loss_2": 2174.8032287597657, |
| "kl_loss_3": 1693.2032836914063, |
| "kl_loss_7": 601.639274597168, |
| "learning_rate": 8.283828570385238e-05, |
| "loss": 1135.0793, |
| "step": 8160 |
| }, |
| { |
| "ce_loss_10": 3.566178250312805, |
| "ce_loss_13": 3.4907922625541685, |
| "ce_loss_2": 4.535942006111145, |
| "ce_loss_3": 4.269787204265595, |
| "ce_loss_7": 3.745415151119232, |
| "epoch": 0.817, |
| "grad_norm": 596.0, |
| "kl_loss_10": 175.95259857177734, |
| "kl_loss_2": 2127.893908691406, |
| "kl_loss_3": 1655.5139526367188, |
| "kl_loss_7": 597.2055206298828, |
| "learning_rate": 8.196569953892202e-05, |
| "loss": 1147.5553, |
| "step": 8170 |
| }, |
| { |
| "ce_loss_10": 3.485050618648529, |
| "ce_loss_13": 3.410007894039154, |
| "ce_loss_2": 4.454114603996277, |
| "ce_loss_3": 4.187376809120178, |
| "ce_loss_7": 3.6601618766784667, |
| "epoch": 0.818, |
| "grad_norm": 640.0, |
| "kl_loss_10": 177.51841354370117, |
| "kl_loss_2": 2140.9811096191406, |
| "kl_loss_3": 1670.5337829589844, |
| "kl_loss_7": 598.4014099121093, |
| "learning_rate": 8.109732297475635e-05, |
| "loss": 1142.2542, |
| "step": 8180 |
| }, |
| { |
| "ce_loss_10": 3.4574038982391357, |
| "ce_loss_13": 3.376466763019562, |
| "ce_loss_2": 4.488081407546997, |
| "ce_loss_3": 4.2140247344970705, |
| "ce_loss_7": 3.65143164396286, |
| "epoch": 0.819, |
| "grad_norm": 588.0, |
| "kl_loss_10": 184.73964157104493, |
| "kl_loss_2": 2257.5185974121096, |
| "kl_loss_3": 1764.1057739257812, |
| "kl_loss_7": 626.5130950927735, |
| "learning_rate": 8.023316475589754e-05, |
| "loss": 1190.8261, |
| "step": 8190 |
| }, |
| { |
| "ce_loss_10": 3.4220961928367615, |
| "ce_loss_13": 3.338273751735687, |
| "ce_loss_2": 4.495982336997986, |
| "ce_loss_3": 4.211030387878418, |
| "ce_loss_7": 3.615649092197418, |
| "epoch": 0.82, |
| "grad_norm": 680.0, |
| "kl_loss_10": 185.78453063964844, |
| "kl_loss_2": 2349.4278076171877, |
| "kl_loss_3": 1849.0640197753905, |
| "kl_loss_7": 637.3896453857421, |
| "learning_rate": 7.937323358440934e-05, |
| "loss": 1214.0248, |
| "step": 8200 |
| }, |
| { |
| "ce_loss_10": 3.541324031352997, |
| "ce_loss_13": 3.4684749960899355, |
| "ce_loss_2": 4.468911576271057, |
| "ce_loss_3": 4.20685533285141, |
| "ce_loss_7": 3.709389495849609, |
| "epoch": 0.821, |
| "grad_norm": 584.0, |
| "kl_loss_10": 174.97513656616212, |
| "kl_loss_2": 2090.5933227539062, |
| "kl_loss_3": 1628.1781616210938, |
| "kl_loss_7": 589.6404022216797, |
| "learning_rate": 7.851753811978923e-05, |
| "loss": 1140.9928, |
| "step": 8210 |
| }, |
| { |
| "ce_loss_10": 3.5598355412483214, |
| "ce_loss_13": 3.4843420505523683, |
| "ce_loss_2": 4.541475534439087, |
| "ce_loss_3": 4.275748348236084, |
| "ce_loss_7": 3.735049307346344, |
| "epoch": 0.822, |
| "grad_norm": 604.0, |
| "kl_loss_10": 177.00316925048827, |
| "kl_loss_2": 2174.6779052734373, |
| "kl_loss_3": 1702.5414611816407, |
| "kl_loss_7": 595.3967559814453, |
| "learning_rate": 7.766608697888095e-05, |
| "loss": 1150.2977, |
| "step": 8220 |
| }, |
| { |
| "ce_loss_10": 3.57365106344223, |
| "ce_loss_13": 3.498138427734375, |
| "ce_loss_2": 4.5424954175949095, |
| "ce_loss_3": 4.2809364080429075, |
| "ce_loss_7": 3.7457818508148195, |
| "epoch": 0.823, |
| "grad_norm": 576.0, |
| "kl_loss_10": 180.9578956604004, |
| "kl_loss_2": 2174.856481933594, |
| "kl_loss_3": 1712.8867797851562, |
| "kl_loss_7": 606.9090911865235, |
| "learning_rate": 7.681888873578785e-05, |
| "loss": 1172.8941, |
| "step": 8230 |
| }, |
| { |
| "ce_loss_10": 3.5023999333381655, |
| "ce_loss_13": 3.423751747608185, |
| "ce_loss_2": 4.507507848739624, |
| "ce_loss_3": 4.228441286087036, |
| "ce_loss_7": 3.6896154999732973, |
| "epoch": 0.824, |
| "grad_norm": 556.0, |
| "kl_loss_10": 182.18136978149414, |
| "kl_loss_2": 2222.666143798828, |
| "kl_loss_3": 1725.1198669433593, |
| "kl_loss_7": 612.9842071533203, |
| "learning_rate": 7.597595192178702e-05, |
| "loss": 1157.2363, |
| "step": 8240 |
| }, |
| { |
| "ce_loss_10": 3.501276743412018, |
| "ce_loss_13": 3.422858786582947, |
| "ce_loss_2": 4.514269304275513, |
| "ce_loss_3": 4.2400298595428465, |
| "ce_loss_7": 3.6824575424194337, |
| "epoch": 0.825, |
| "grad_norm": 588.0, |
| "kl_loss_10": 181.8477668762207, |
| "kl_loss_2": 2277.1808898925783, |
| "kl_loss_3": 1781.4539184570312, |
| "kl_loss_7": 622.9125793457031, |
| "learning_rate": 7.513728502524286e-05, |
| "loss": 1187.7779, |
| "step": 8250 |
| }, |
| { |
| "ce_loss_10": 3.5026116013526916, |
| "ce_loss_13": 3.428036153316498, |
| "ce_loss_2": 4.455591607093811, |
| "ce_loss_3": 4.192325818538666, |
| "ce_loss_7": 3.6698171854019166, |
| "epoch": 0.826, |
| "grad_norm": 540.0, |
| "kl_loss_10": 170.86422119140624, |
| "kl_loss_2": 2124.475665283203, |
| "kl_loss_3": 1660.3611083984374, |
| "kl_loss_7": 580.019741821289, |
| "learning_rate": 7.430289649152156e-05, |
| "loss": 1161.5576, |
| "step": 8260 |
| }, |
| { |
| "ce_loss_10": 3.404016637802124, |
| "ce_loss_13": 3.3291639566421507, |
| "ce_loss_2": 4.41770989894867, |
| "ce_loss_3": 4.153000998497009, |
| "ce_loss_7": 3.591460871696472, |
| "epoch": 0.827, |
| "grad_norm": 564.0, |
| "kl_loss_10": 179.06679763793946, |
| "kl_loss_2": 2271.2553527832033, |
| "kl_loss_3": 1785.926287841797, |
| "kl_loss_7": 620.811245727539, |
| "learning_rate": 7.347279472290646e-05, |
| "loss": 1175.1479, |
| "step": 8270 |
| }, |
| { |
| "ce_loss_10": 3.5473936796188354, |
| "ce_loss_13": 3.4728646278381348, |
| "ce_loss_2": 4.529109454154968, |
| "ce_loss_3": 4.2634922623634335, |
| "ce_loss_7": 3.7243195176124573, |
| "epoch": 0.828, |
| "grad_norm": 516.0, |
| "kl_loss_10": 176.9839729309082, |
| "kl_loss_2": 2192.3595458984373, |
| "kl_loss_3": 1719.4920288085937, |
| "kl_loss_7": 599.4450622558594, |
| "learning_rate": 7.264698807851328e-05, |
| "loss": 1170.4515, |
| "step": 8280 |
| }, |
| { |
| "ce_loss_10": 3.511405515670776, |
| "ce_loss_13": 3.4420324087142946, |
| "ce_loss_2": 4.462396240234375, |
| "ce_loss_3": 4.196757709980011, |
| "ce_loss_7": 3.678698420524597, |
| "epoch": 0.829, |
| "grad_norm": 520.0, |
| "kl_loss_10": 173.053133392334, |
| "kl_loss_2": 2122.6127746582033, |
| "kl_loss_3": 1647.0698120117188, |
| "kl_loss_7": 586.3415740966797, |
| "learning_rate": 7.182548487420554e-05, |
| "loss": 1152.9492, |
| "step": 8290 |
| }, |
| { |
| "ce_loss_10": 3.56430242061615, |
| "ce_loss_13": 3.4877224922180177, |
| "ce_loss_2": 4.515660381317138, |
| "ce_loss_3": 4.256710803508758, |
| "ce_loss_7": 3.7377355813980104, |
| "epoch": 0.83, |
| "grad_norm": 552.0, |
| "kl_loss_10": 178.47344284057618, |
| "kl_loss_2": 2141.9302307128905, |
| "kl_loss_3": 1673.7640502929687, |
| "kl_loss_7": 594.9386505126953, |
| "learning_rate": 7.100829338251146e-05, |
| "loss": 1142.0348, |
| "step": 8300 |
| }, |
| { |
| "ce_loss_10": 3.500733995437622, |
| "ce_loss_13": 3.420394682884216, |
| "ce_loss_2": 4.495737314224243, |
| "ce_loss_3": 4.226100885868073, |
| "ce_loss_7": 3.6811896324157716, |
| "epoch": 0.831, |
| "grad_norm": 664.0, |
| "kl_loss_10": 181.1860824584961, |
| "kl_loss_2": 2206.595349121094, |
| "kl_loss_3": 1725.6842834472657, |
| "kl_loss_7": 613.2857147216797, |
| "learning_rate": 7.019542183254046e-05, |
| "loss": 1155.5816, |
| "step": 8310 |
| }, |
| { |
| "ce_loss_10": 3.542024350166321, |
| "ce_loss_13": 3.46200088262558, |
| "ce_loss_2": 4.494865345954895, |
| "ce_loss_3": 4.2283999681472775, |
| "ce_loss_7": 3.7202147483825683, |
| "epoch": 0.832, |
| "grad_norm": 700.0, |
| "kl_loss_10": 184.21017608642578, |
| "kl_loss_2": 2155.698864746094, |
| "kl_loss_3": 1680.3289855957032, |
| "kl_loss_7": 609.4942947387696, |
| "learning_rate": 6.938687840989971e-05, |
| "loss": 1152.3119, |
| "step": 8320 |
| }, |
| { |
| "ce_loss_10": 3.475117301940918, |
| "ce_loss_13": 3.396902585029602, |
| "ce_loss_2": 4.447841107845306, |
| "ce_loss_3": 4.1811567902565, |
| "ce_loss_7": 3.65566908121109, |
| "epoch": 0.833, |
| "grad_norm": 644.0, |
| "kl_loss_10": 180.3916358947754, |
| "kl_loss_2": 2151.0271850585937, |
| "kl_loss_3": 1678.1678161621094, |
| "kl_loss_7": 600.837434387207, |
| "learning_rate": 6.858267125661271e-05, |
| "loss": 1171.0359, |
| "step": 8330 |
| }, |
| { |
| "ce_loss_10": 3.5341761112213135, |
| "ce_loss_13": 3.4623092293739317, |
| "ce_loss_2": 4.504952430725098, |
| "ce_loss_3": 4.235734903812409, |
| "ce_loss_7": 3.7161438941955565, |
| "epoch": 0.834, |
| "grad_norm": 652.0, |
| "kl_loss_10": 176.11358489990235, |
| "kl_loss_2": 2152.3007080078123, |
| "kl_loss_3": 1674.9229736328125, |
| "kl_loss_7": 599.1006164550781, |
| "learning_rate": 6.778280847103668e-05, |
| "loss": 1187.2795, |
| "step": 8340 |
| }, |
| { |
| "ce_loss_10": 3.5474065065383913, |
| "ce_loss_13": 3.4685073494911194, |
| "ce_loss_2": 4.511071228981018, |
| "ce_loss_3": 4.243249070644379, |
| "ce_loss_7": 3.7250569343566893, |
| "epoch": 0.835, |
| "grad_norm": 544.0, |
| "kl_loss_10": 179.0617935180664, |
| "kl_loss_2": 2161.984759521484, |
| "kl_loss_3": 1690.6224731445313, |
| "kl_loss_7": 606.1362609863281, |
| "learning_rate": 6.698729810778065e-05, |
| "loss": 1153.2188, |
| "step": 8350 |
| }, |
| { |
| "ce_loss_10": 3.4568483591079713, |
| "ce_loss_13": 3.3825206756591797, |
| "ce_loss_2": 4.450219774246216, |
| "ce_loss_3": 4.178645396232605, |
| "ce_loss_7": 3.6368404626846313, |
| "epoch": 0.836, |
| "grad_norm": 628.0, |
| "kl_loss_10": 176.9057861328125, |
| "kl_loss_2": 2207.3519287109375, |
| "kl_loss_3": 1716.1288146972656, |
| "kl_loss_7": 600.357534790039, |
| "learning_rate": 6.619614817762538e-05, |
| "loss": 1175.9664, |
| "step": 8360 |
| }, |
| { |
| "ce_loss_10": 3.421834397315979, |
| "ce_loss_13": 3.3451184391975404, |
| "ce_loss_2": 4.458368134498596, |
| "ce_loss_3": 4.186334764957428, |
| "ce_loss_7": 3.611315131187439, |
| "epoch": 0.837, |
| "grad_norm": 524.0, |
| "kl_loss_10": 179.41786346435546, |
| "kl_loss_2": 2288.001843261719, |
| "kl_loss_3": 1799.511328125, |
| "kl_loss_7": 622.7222686767578, |
| "learning_rate": 6.540936664744196e-05, |
| "loss": 1185.6504, |
| "step": 8370 |
| }, |
| { |
| "ce_loss_10": 3.5705604910850526, |
| "ce_loss_13": 3.495253837108612, |
| "ce_loss_2": 4.550519323348999, |
| "ce_loss_3": 4.287250196933746, |
| "ce_loss_7": 3.7482882261276247, |
| "epoch": 0.838, |
| "grad_norm": 556.0, |
| "kl_loss_10": 178.4987823486328, |
| "kl_loss_2": 2165.8636779785156, |
| "kl_loss_3": 1697.199853515625, |
| "kl_loss_7": 600.5514739990234, |
| "learning_rate": 6.462696144011149e-05, |
| "loss": 1144.948, |
| "step": 8380 |
| }, |
| { |
| "ce_loss_10": 3.521126616001129, |
| "ce_loss_13": 3.44657279253006, |
| "ce_loss_2": 4.47376012802124, |
| "ce_loss_3": 4.215767812728882, |
| "ce_loss_7": 3.700752067565918, |
| "epoch": 0.839, |
| "grad_norm": 552.0, |
| "kl_loss_10": 181.0975456237793, |
| "kl_loss_2": 2145.9520629882813, |
| "kl_loss_3": 1683.1618103027345, |
| "kl_loss_7": 606.7530914306641, |
| "learning_rate": 6.384894043444567e-05, |
| "loss": 1140.7508, |
| "step": 8390 |
| }, |
| { |
| "ce_loss_10": 3.5482063770294188, |
| "ce_loss_13": 3.4719661116600036, |
| "ce_loss_2": 4.523540115356445, |
| "ce_loss_3": 4.260622024536133, |
| "ce_loss_7": 3.7237455368041994, |
| "epoch": 0.84, |
| "grad_norm": 540.0, |
| "kl_loss_10": 178.300057220459, |
| "kl_loss_2": 2181.8050231933594, |
| "kl_loss_3": 1707.3205505371093, |
| "kl_loss_7": 602.2170059204102, |
| "learning_rate": 6.307531146510753e-05, |
| "loss": 1150.0869, |
| "step": 8400 |
| }, |
| { |
| "ce_loss_10": 3.526041495800018, |
| "ce_loss_13": 3.4509783387184143, |
| "ce_loss_2": 4.471963119506836, |
| "ce_loss_3": 4.206485414505005, |
| "ce_loss_7": 3.701741063594818, |
| "epoch": 0.841, |
| "grad_norm": 560.0, |
| "kl_loss_10": 177.0880439758301, |
| "kl_loss_2": 2118.260693359375, |
| "kl_loss_3": 1641.845037841797, |
| "kl_loss_7": 595.3580291748046, |
| "learning_rate": 6.230608232253226e-05, |
| "loss": 1129.8508, |
| "step": 8410 |
| }, |
| { |
| "ce_loss_10": 3.4824550271034242, |
| "ce_loss_13": 3.405357301235199, |
| "ce_loss_2": 4.482615494728089, |
| "ce_loss_3": 4.2257519364356995, |
| "ce_loss_7": 3.6680721044540405, |
| "epoch": 0.842, |
| "grad_norm": 512.0, |
| "kl_loss_10": 179.48506774902344, |
| "kl_loss_2": 2227.8513061523436, |
| "kl_loss_3": 1761.4332275390625, |
| "kl_loss_7": 616.7242858886718, |
| "learning_rate": 6.154126075284855e-05, |
| "loss": 1155.2555, |
| "step": 8420 |
| }, |
| { |
| "ce_loss_10": 3.577245807647705, |
| "ce_loss_13": 3.5012118101119993, |
| "ce_loss_2": 4.512918734550476, |
| "ce_loss_3": 4.249192714691162, |
| "ce_loss_7": 3.7460012435913086, |
| "epoch": 0.843, |
| "grad_norm": 704.0, |
| "kl_loss_10": 174.01815643310547, |
| "kl_loss_2": 2066.7827331542967, |
| "kl_loss_3": 1608.2534240722657, |
| "kl_loss_7": 586.0584747314454, |
| "learning_rate": 6.078085445780129e-05, |
| "loss": 1117.5314, |
| "step": 8430 |
| }, |
| { |
| "ce_loss_10": 3.584468650817871, |
| "ce_loss_13": 3.5081024169921875, |
| "ce_loss_2": 4.569616174697876, |
| "ce_loss_3": 4.300150573253632, |
| "ce_loss_7": 3.7608886480331423, |
| "epoch": 0.844, |
| "grad_norm": 576.0, |
| "kl_loss_10": 177.62249679565429, |
| "kl_loss_2": 2185.7296508789063, |
| "kl_loss_3": 1710.7079772949219, |
| "kl_loss_7": 599.2171966552735, |
| "learning_rate": 6.002487109467347e-05, |
| "loss": 1141.6974, |
| "step": 8440 |
| }, |
| { |
| "ce_loss_10": 3.587876856327057, |
| "ce_loss_13": 3.5108195781707763, |
| "ce_loss_2": 4.524105596542358, |
| "ce_loss_3": 4.2644576787948605, |
| "ce_loss_7": 3.756032574176788, |
| "epoch": 0.845, |
| "grad_norm": 592.0, |
| "kl_loss_10": 181.40281448364257, |
| "kl_loss_2": 2131.273962402344, |
| "kl_loss_3": 1667.7534301757812, |
| "kl_loss_7": 605.4166229248046, |
| "learning_rate": 5.927331827620902e-05, |
| "loss": 1141.2443, |
| "step": 8450 |
| }, |
| { |
| "ce_loss_10": 3.573608911037445, |
| "ce_loss_13": 3.499223828315735, |
| "ce_loss_2": 4.488192296028137, |
| "ce_loss_3": 4.230596256256104, |
| "ce_loss_7": 3.7483445525169374, |
| "epoch": 0.846, |
| "grad_norm": 552.0, |
| "kl_loss_10": 175.3866973876953, |
| "kl_loss_2": 2041.3302062988282, |
| "kl_loss_3": 1588.8981994628907, |
| "kl_loss_7": 591.4093719482422, |
| "learning_rate": 5.852620357053651e-05, |
| "loss": 1132.2791, |
| "step": 8460 |
| }, |
| { |
| "ce_loss_10": 3.6111098527908325, |
| "ce_loss_13": 3.536815571784973, |
| "ce_loss_2": 4.544794130325317, |
| "ce_loss_3": 4.2818133473396305, |
| "ce_loss_7": 3.780075490474701, |
| "epoch": 0.847, |
| "grad_norm": 596.0, |
| "kl_loss_10": 174.2255989074707, |
| "kl_loss_2": 2095.0241271972654, |
| "kl_loss_3": 1629.0387634277345, |
| "kl_loss_7": 588.6748046875, |
| "learning_rate": 5.778353450109286e-05, |
| "loss": 1140.0846, |
| "step": 8470 |
| }, |
| { |
| "ce_loss_10": 3.648575019836426, |
| "ce_loss_13": 3.5720423340797423, |
| "ce_loss_2": 4.605719590187073, |
| "ce_loss_3": 4.344667458534241, |
| "ce_loss_7": 3.8233685731887816, |
| "epoch": 0.848, |
| "grad_norm": 486.0, |
| "kl_loss_10": 179.81479415893554, |
| "kl_loss_2": 2138.249481201172, |
| "kl_loss_3": 1668.5974670410155, |
| "kl_loss_7": 599.209912109375, |
| "learning_rate": 5.7045318546547206e-05, |
| "loss": 1146.8947, |
| "step": 8480 |
| }, |
| { |
| "ce_loss_10": 3.5448459148406983, |
| "ce_loss_13": 3.468916821479797, |
| "ce_loss_2": 4.523812007904053, |
| "ce_loss_3": 4.25856339931488, |
| "ce_loss_7": 3.7180402636528016, |
| "epoch": 0.849, |
| "grad_norm": 556.0, |
| "kl_loss_10": 176.35201721191407, |
| "kl_loss_2": 2188.633197021484, |
| "kl_loss_3": 1710.0829528808595, |
| "kl_loss_7": 595.4919921875, |
| "learning_rate": 5.631156314072605e-05, |
| "loss": 1145.8699, |
| "step": 8490 |
| }, |
| { |
| "ce_loss_10": 3.559221601486206, |
| "ce_loss_13": 3.4834945678710936, |
| "ce_loss_2": 4.495839285850525, |
| "ce_loss_3": 4.22744711637497, |
| "ce_loss_7": 3.7315674662590026, |
| "epoch": 0.85, |
| "grad_norm": 536.0, |
| "kl_loss_10": 176.7962844848633, |
| "kl_loss_2": 2110.8933044433593, |
| "kl_loss_3": 1632.140673828125, |
| "kl_loss_7": 588.6574279785157, |
| "learning_rate": 5.5582275672538315e-05, |
| "loss": 1128.7181, |
| "step": 8500 |
| }, |
| { |
| "ce_loss_10": 3.4811159491539003, |
| "ce_loss_13": 3.403310573101044, |
| "ce_loss_2": 4.505023097991943, |
| "ce_loss_3": 4.238485896587372, |
| "ce_loss_7": 3.6721346259117125, |
| "epoch": 0.851, |
| "grad_norm": 608.0, |
| "kl_loss_10": 182.8627899169922, |
| "kl_loss_2": 2282.4833068847656, |
| "kl_loss_3": 1798.2781005859374, |
| "kl_loss_7": 625.0588409423829, |
| "learning_rate": 5.4857463485900484e-05, |
| "loss": 1192.5725, |
| "step": 8510 |
| }, |
| { |
| "ce_loss_10": 3.5297972202301025, |
| "ce_loss_13": 3.454351043701172, |
| "ce_loss_2": 4.489051342010498, |
| "ce_loss_3": 4.219796097278595, |
| "ce_loss_7": 3.7097468852996824, |
| "epoch": 0.852, |
| "grad_norm": 592.0, |
| "kl_loss_10": 178.4038864135742, |
| "kl_loss_2": 2146.418908691406, |
| "kl_loss_3": 1667.9574584960938, |
| "kl_loss_7": 602.1778778076172, |
| "learning_rate": 5.413713387966329e-05, |
| "loss": 1150.9164, |
| "step": 8520 |
| }, |
| { |
| "ce_loss_10": 3.558277463912964, |
| "ce_loss_13": 3.480745458602905, |
| "ce_loss_2": 4.530928635597229, |
| "ce_loss_3": 4.266195034980774, |
| "ce_loss_7": 3.7276942253112795, |
| "epoch": 0.853, |
| "grad_norm": 620.0, |
| "kl_loss_10": 178.86384658813478, |
| "kl_loss_2": 2174.0791748046877, |
| "kl_loss_3": 1703.99384765625, |
| "kl_loss_7": 598.962451171875, |
| "learning_rate": 5.34212941075381e-05, |
| "loss": 1160.2438, |
| "step": 8530 |
| }, |
| { |
| "ce_loss_10": 3.559523808956146, |
| "ce_loss_13": 3.4899546623229982, |
| "ce_loss_2": 4.511786758899689, |
| "ce_loss_3": 4.244810569286346, |
| "ce_loss_7": 3.7282875180244446, |
| "epoch": 0.854, |
| "grad_norm": 544.0, |
| "kl_loss_10": 173.61905364990236, |
| "kl_loss_2": 2125.2814514160154, |
| "kl_loss_3": 1651.0782043457032, |
| "kl_loss_7": 580.7644989013672, |
| "learning_rate": 5.270995137802315e-05, |
| "loss": 1139.1208, |
| "step": 8540 |
| }, |
| { |
| "ce_loss_10": 3.4913312673568724, |
| "ce_loss_13": 3.4199952483177185, |
| "ce_loss_2": 4.46144163608551, |
| "ce_loss_3": 4.1969265818595884, |
| "ce_loss_7": 3.6703786849975586, |
| "epoch": 0.855, |
| "grad_norm": 596.0, |
| "kl_loss_10": 176.40887756347655, |
| "kl_loss_2": 2170.294287109375, |
| "kl_loss_3": 1693.5492065429687, |
| "kl_loss_7": 604.686279296875, |
| "learning_rate": 5.2003112854332125e-05, |
| "loss": 1161.1432, |
| "step": 8550 |
| }, |
| { |
| "ce_loss_10": 3.495318293571472, |
| "ce_loss_13": 3.421377086639404, |
| "ce_loss_2": 4.460399007797241, |
| "ce_loss_3": 4.191486561298371, |
| "ce_loss_7": 3.666605508327484, |
| "epoch": 0.856, |
| "grad_norm": 624.0, |
| "kl_loss_10": 174.107186126709, |
| "kl_loss_2": 2159.1057250976564, |
| "kl_loss_3": 1686.5014953613281, |
| "kl_loss_7": 595.0416564941406, |
| "learning_rate": 5.130078565432089e-05, |
| "loss": 1138.6503, |
| "step": 8560 |
| }, |
| { |
| "ce_loss_10": 3.5646494030952454, |
| "ce_loss_13": 3.4924421072006226, |
| "ce_loss_2": 4.498122811317444, |
| "ce_loss_3": 4.236236476898194, |
| "ce_loss_7": 3.731403958797455, |
| "epoch": 0.857, |
| "grad_norm": 548.0, |
| "kl_loss_10": 173.7933578491211, |
| "kl_loss_2": 2107.9238403320314, |
| "kl_loss_3": 1647.1588623046875, |
| "kl_loss_7": 586.2743255615235, |
| "learning_rate": 5.060297685041659e-05, |
| "loss": 1120.3278, |
| "step": 8570 |
| }, |
| { |
| "ce_loss_10": 3.498642110824585, |
| "ce_loss_13": 3.423696291446686, |
| "ce_loss_2": 4.494770348072052, |
| "ce_loss_3": 4.218523621559143, |
| "ce_loss_7": 3.6762722969055175, |
| "epoch": 0.858, |
| "grad_norm": 548.0, |
| "kl_loss_10": 180.7342544555664, |
| "kl_loss_2": 2221.7594360351563, |
| "kl_loss_3": 1733.2135437011718, |
| "kl_loss_7": 609.6224884033203, |
| "learning_rate": 4.99096934695461e-05, |
| "loss": 1183.0167, |
| "step": 8580 |
| }, |
| { |
| "ce_loss_10": 3.55733345746994, |
| "ce_loss_13": 3.4829642295837404, |
| "ce_loss_2": 4.523844695091247, |
| "ce_loss_3": 4.2578066945075985, |
| "ce_loss_7": 3.7367467999458315, |
| "epoch": 0.859, |
| "grad_norm": 544.0, |
| "kl_loss_10": 175.85337829589844, |
| "kl_loss_2": 2157.883190917969, |
| "kl_loss_3": 1681.7412048339843, |
| "kl_loss_7": 598.1185791015625, |
| "learning_rate": 4.922094249306558e-05, |
| "loss": 1131.2188, |
| "step": 8590 |
| }, |
| { |
| "ce_loss_10": 3.5841567873954774, |
| "ce_loss_13": 3.509797990322113, |
| "ce_loss_2": 4.5469663619995115, |
| "ce_loss_3": 4.2871175646781925, |
| "ce_loss_7": 3.7628376722335815, |
| "epoch": 0.86, |
| "grad_norm": 604.0, |
| "kl_loss_10": 179.83917465209962, |
| "kl_loss_2": 2154.5059204101562, |
| "kl_loss_3": 1690.2081298828125, |
| "kl_loss_7": 604.0245941162109, |
| "learning_rate": 4.853673085668947e-05, |
| "loss": 1135.9622, |
| "step": 8600 |
| }, |
| { |
| "ce_loss_10": 3.60320885181427, |
| "ce_loss_13": 3.529503357410431, |
| "ce_loss_2": 4.566924571990967, |
| "ce_loss_3": 4.302417039871216, |
| "ce_loss_7": 3.7819976687431334, |
| "epoch": 0.861, |
| "grad_norm": 596.0, |
| "kl_loss_10": 177.36936798095704, |
| "kl_loss_2": 2148.3923461914064, |
| "kl_loss_3": 1680.8319580078125, |
| "kl_loss_7": 597.4716522216797, |
| "learning_rate": 4.78570654504214e-05, |
| "loss": 1156.3883, |
| "step": 8610 |
| }, |
| { |
| "ce_loss_10": 3.5473016500473022, |
| "ce_loss_13": 3.4740379452705383, |
| "ce_loss_2": 4.5167618751525875, |
| "ce_loss_3": 4.248826539516449, |
| "ce_loss_7": 3.7283095955848693, |
| "epoch": 0.862, |
| "grad_norm": 512.0, |
| "kl_loss_10": 175.62952041625977, |
| "kl_loss_2": 2176.8274475097655, |
| "kl_loss_3": 1698.4800903320313, |
| "kl_loss_7": 603.3765747070313, |
| "learning_rate": 4.7181953118484556e-05, |
| "loss": 1157.7057, |
| "step": 8620 |
| }, |
| { |
| "ce_loss_10": 3.5743127822875977, |
| "ce_loss_13": 3.49841423034668, |
| "ce_loss_2": 4.522394800186158, |
| "ce_loss_3": 4.2583330273628235, |
| "ce_loss_7": 3.751324450969696, |
| "epoch": 0.863, |
| "grad_norm": 604.0, |
| "kl_loss_10": 175.00742568969727, |
| "kl_loss_2": 2093.126568603516, |
| "kl_loss_3": 1630.9019409179687, |
| "kl_loss_7": 592.01396484375, |
| "learning_rate": 4.651140065925269e-05, |
| "loss": 1159.3387, |
| "step": 8630 |
| }, |
| { |
| "ce_loss_10": 3.507640373706818, |
| "ce_loss_13": 3.434223484992981, |
| "ce_loss_2": 4.487589573860168, |
| "ce_loss_3": 4.2153548240661625, |
| "ce_loss_7": 3.6856843709945677, |
| "epoch": 0.864, |
| "grad_norm": 588.0, |
| "kl_loss_10": 177.25660781860353, |
| "kl_loss_2": 2189.234814453125, |
| "kl_loss_3": 1705.8089416503906, |
| "kl_loss_7": 594.7530731201172, |
| "learning_rate": 4.58454148251814e-05, |
| "loss": 1175.3236, |
| "step": 8640 |
| }, |
| { |
| "ce_loss_10": 3.5295264959335326, |
| "ce_loss_13": 3.451808476448059, |
| "ce_loss_2": 4.534165596961975, |
| "ce_loss_3": 4.261823272705078, |
| "ce_loss_7": 3.7138017773628236, |
| "epoch": 0.865, |
| "grad_norm": 568.0, |
| "kl_loss_10": 177.66054000854493, |
| "kl_loss_2": 2227.2971801757812, |
| "kl_loss_3": 1739.9827575683594, |
| "kl_loss_7": 610.7921539306641, |
| "learning_rate": 4.518400232274078e-05, |
| "loss": 1162.0056, |
| "step": 8650 |
| }, |
| { |
| "ce_loss_10": 3.5479356169700624, |
| "ce_loss_13": 3.4702929258346558, |
| "ce_loss_2": 4.501330161094666, |
| "ce_loss_3": 4.237502670288086, |
| "ce_loss_7": 3.723937380313873, |
| "epoch": 0.866, |
| "grad_norm": 524.0, |
| "kl_loss_10": 179.25594482421874, |
| "kl_loss_2": 2137.7675231933595, |
| "kl_loss_3": 1671.597607421875, |
| "kl_loss_7": 602.4135375976563, |
| "learning_rate": 4.452716981234745e-05, |
| "loss": 1122.9633, |
| "step": 8660 |
| }, |
| { |
| "ce_loss_10": 3.5203991651535036, |
| "ce_loss_13": 3.447481095790863, |
| "ce_loss_2": 4.47524061203003, |
| "ce_loss_3": 4.205355083942413, |
| "ce_loss_7": 3.695466148853302, |
| "epoch": 0.867, |
| "grad_norm": 568.0, |
| "kl_loss_10": 174.45485000610353, |
| "kl_loss_2": 2135.713586425781, |
| "kl_loss_3": 1657.6833251953126, |
| "kl_loss_7": 594.2672515869141, |
| "learning_rate": 4.3874923908297335e-05, |
| "loss": 1125.4834, |
| "step": 8670 |
| }, |
| { |
| "ce_loss_10": 3.575284111499786, |
| "ce_loss_13": 3.498721444606781, |
| "ce_loss_2": 4.54740161895752, |
| "ce_loss_3": 4.281282663345337, |
| "ce_loss_7": 3.7500776290893554, |
| "epoch": 0.868, |
| "grad_norm": 596.0, |
| "kl_loss_10": 178.63047256469727, |
| "kl_loss_2": 2175.332312011719, |
| "kl_loss_3": 1702.8009948730469, |
| "kl_loss_7": 597.87509765625, |
| "learning_rate": 4.322727117869951e-05, |
| "loss": 1149.3994, |
| "step": 8680 |
| }, |
| { |
| "ce_loss_10": 3.575519359111786, |
| "ce_loss_13": 3.4998192310333254, |
| "ce_loss_2": 4.55748233795166, |
| "ce_loss_3": 4.284105372428894, |
| "ce_loss_7": 3.752281701564789, |
| "epoch": 0.869, |
| "grad_norm": 584.0, |
| "kl_loss_10": 179.47224349975585, |
| "kl_loss_2": 2189.6538024902343, |
| "kl_loss_3": 1698.4593017578125, |
| "kl_loss_7": 604.034390258789, |
| "learning_rate": 4.2584218145409916e-05, |
| "loss": 1151.0721, |
| "step": 8690 |
| }, |
| { |
| "ce_loss_10": 3.6216215252876283, |
| "ce_loss_13": 3.551214134693146, |
| "ce_loss_2": 4.543716049194336, |
| "ce_loss_3": 4.272314977645874, |
| "ce_loss_7": 3.786858594417572, |
| "epoch": 0.87, |
| "grad_norm": 600.0, |
| "kl_loss_10": 174.5644790649414, |
| "kl_loss_2": 2072.967736816406, |
| "kl_loss_3": 1598.7917419433593, |
| "kl_loss_7": 582.653207397461, |
| "learning_rate": 4.194577128396521e-05, |
| "loss": 1108.3934, |
| "step": 8700 |
| }, |
| { |
| "ce_loss_10": 3.498377776145935, |
| "ce_loss_13": 3.425851809978485, |
| "ce_loss_2": 4.466418659687042, |
| "ce_loss_3": 4.194790709018707, |
| "ce_loss_7": 3.670235824584961, |
| "epoch": 0.871, |
| "grad_norm": 506.0, |
| "kl_loss_10": 174.37066497802735, |
| "kl_loss_2": 2168.9560546875, |
| "kl_loss_3": 1689.01513671875, |
| "kl_loss_7": 590.7626220703125, |
| "learning_rate": 4.1311937023518264e-05, |
| "loss": 1166.4488, |
| "step": 8710 |
| }, |
| { |
| "ce_loss_10": 3.5134344696998596, |
| "ce_loss_13": 3.4397946119308473, |
| "ce_loss_2": 4.529711484909058, |
| "ce_loss_3": 4.263941979408264, |
| "ce_loss_7": 3.682065725326538, |
| "epoch": 0.872, |
| "grad_norm": 460.0, |
| "kl_loss_10": 171.77398529052735, |
| "kl_loss_2": 2246.7038696289064, |
| "kl_loss_3": 1774.6010986328124, |
| "kl_loss_7": 576.3054107666015, |
| "learning_rate": 4.0682721746773344e-05, |
| "loss": 1163.771, |
| "step": 8720 |
| }, |
| { |
| "ce_loss_10": 3.3905357241630556, |
| "ce_loss_13": 3.314958465099335, |
| "ce_loss_2": 4.396602368354797, |
| "ce_loss_3": 4.132218360900879, |
| "ce_loss_7": 3.5750641107559202, |
| "epoch": 0.873, |
| "grad_norm": 552.0, |
| "kl_loss_10": 175.90054779052736, |
| "kl_loss_2": 2222.7579711914063, |
| "kl_loss_3": 1749.4481201171875, |
| "kl_loss_7": 613.7805450439453, |
| "learning_rate": 4.0058131789920904e-05, |
| "loss": 1143.7059, |
| "step": 8730 |
| }, |
| { |
| "ce_loss_10": 3.5397099256515503, |
| "ce_loss_13": 3.4643173098564146, |
| "ce_loss_2": 4.497500014305115, |
| "ce_loss_3": 4.226579332351685, |
| "ce_loss_7": 3.709259867668152, |
| "epoch": 0.874, |
| "grad_norm": 572.0, |
| "kl_loss_10": 176.51957778930665, |
| "kl_loss_2": 2162.519940185547, |
| "kl_loss_3": 1680.782940673828, |
| "kl_loss_7": 600.4374298095703, |
| "learning_rate": 3.9438173442575e-05, |
| "loss": 1188.067, |
| "step": 8740 |
| }, |
| { |
| "ce_loss_10": 3.5728036165237427, |
| "ce_loss_13": 3.4973131656646728, |
| "ce_loss_2": 4.514467573165893, |
| "ce_loss_3": 4.250711810588837, |
| "ce_loss_7": 3.740223217010498, |
| "epoch": 0.875, |
| "grad_norm": 524.0, |
| "kl_loss_10": 175.66529846191406, |
| "kl_loss_2": 2112.5622009277345, |
| "kl_loss_3": 1651.0288818359375, |
| "kl_loss_7": 594.9720977783203, |
| "learning_rate": 3.882285294770937e-05, |
| "loss": 1137.6895, |
| "step": 8750 |
| }, |
| { |
| "ce_loss_10": 3.5377378940582274, |
| "ce_loss_13": 3.4619855165481566, |
| "ce_loss_2": 4.4779297590255736, |
| "ce_loss_3": 4.2127908825874325, |
| "ce_loss_7": 3.7088746547698976, |
| "epoch": 0.876, |
| "grad_norm": 600.0, |
| "kl_loss_10": 178.39902954101564, |
| "kl_loss_2": 2127.570068359375, |
| "kl_loss_3": 1648.2625427246094, |
| "kl_loss_7": 594.3859985351562, |
| "learning_rate": 3.821217650159453e-05, |
| "loss": 1155.4234, |
| "step": 8760 |
| }, |
| { |
| "ce_loss_10": 3.4084259629249574, |
| "ce_loss_13": 3.332693111896515, |
| "ce_loss_2": 4.428424310684204, |
| "ce_loss_3": 4.158231461048127, |
| "ce_loss_7": 3.6029205918312073, |
| "epoch": 0.877, |
| "grad_norm": 548.0, |
| "kl_loss_10": 180.0270034790039, |
| "kl_loss_2": 2236.819873046875, |
| "kl_loss_3": 1758.5952392578124, |
| "kl_loss_7": 625.4339080810547, |
| "learning_rate": 3.760615025373543e-05, |
| "loss": 1171.5936, |
| "step": 8770 |
| }, |
| { |
| "ce_loss_10": 3.595931589603424, |
| "ce_loss_13": 3.5179906845092774, |
| "ce_loss_2": 4.587141966819763, |
| "ce_loss_3": 4.309424257278442, |
| "ce_loss_7": 3.7760006308555605, |
| "epoch": 0.878, |
| "grad_norm": 660.0, |
| "kl_loss_10": 183.31888961791992, |
| "kl_loss_2": 2207.746258544922, |
| "kl_loss_3": 1714.543280029297, |
| "kl_loss_7": 607.3442749023437, |
| "learning_rate": 3.700478030680987e-05, |
| "loss": 1181.1754, |
| "step": 8780 |
| }, |
| { |
| "ce_loss_10": 3.5762731194496156, |
| "ce_loss_13": 3.5029969453811645, |
| "ce_loss_2": 4.536650991439819, |
| "ce_loss_3": 4.271095442771911, |
| "ce_loss_7": 3.748454582691193, |
| "epoch": 0.879, |
| "grad_norm": 502.0, |
| "kl_loss_10": 176.4349349975586, |
| "kl_loss_2": 2141.4830688476563, |
| "kl_loss_3": 1675.0247314453125, |
| "kl_loss_7": 590.1913375854492, |
| "learning_rate": 3.6408072716606344e-05, |
| "loss": 1149.3131, |
| "step": 8790 |
| }, |
| { |
| "ce_loss_10": 3.50073447227478, |
| "ce_loss_13": 3.4274021863937376, |
| "ce_loss_2": 4.501238942146301, |
| "ce_loss_3": 4.239216554164886, |
| "ce_loss_7": 3.683646392822266, |
| "epoch": 0.88, |
| "grad_norm": 600.0, |
| "kl_loss_10": 180.50948486328124, |
| "kl_loss_2": 2229.279052734375, |
| "kl_loss_3": 1758.1964477539063, |
| "kl_loss_7": 612.8555847167969, |
| "learning_rate": 3.5816033491963716e-05, |
| "loss": 1204.1847, |
| "step": 8800 |
| }, |
| { |
| "ce_loss_10": 3.3653410911560058, |
| "ce_loss_13": 3.289612293243408, |
| "ce_loss_2": 4.399015557765961, |
| "ce_loss_3": 4.129138934612274, |
| "ce_loss_7": 3.5484530568122863, |
| "epoch": 0.881, |
| "grad_norm": 696.0, |
| "kl_loss_10": 176.9532043457031, |
| "kl_loss_2": 2282.183734130859, |
| "kl_loss_3": 1791.626202392578, |
| "kl_loss_7": 607.9001495361329, |
| "learning_rate": 3.522866859471047e-05, |
| "loss": 1184.3774, |
| "step": 8810 |
| }, |
| { |
| "ce_loss_10": 3.597711908817291, |
| "ce_loss_13": 3.5282416582107543, |
| "ce_loss_2": 4.506113409996033, |
| "ce_loss_3": 4.251258683204651, |
| "ce_loss_7": 3.7641051173210145, |
| "epoch": 0.882, |
| "grad_norm": 636.0, |
| "kl_loss_10": 169.83995971679687, |
| "kl_loss_2": 2038.54423828125, |
| "kl_loss_3": 1585.0056030273438, |
| "kl_loss_7": 568.1876190185546, |
| "learning_rate": 3.46459839396045e-05, |
| "loss": 1125.9656, |
| "step": 8820 |
| }, |
| { |
| "ce_loss_10": 3.529653000831604, |
| "ce_loss_13": 3.449263334274292, |
| "ce_loss_2": 4.503918576240539, |
| "ce_loss_3": 4.2343867182731625, |
| "ce_loss_7": 3.712590277194977, |
| "epoch": 0.883, |
| "grad_norm": 576.0, |
| "kl_loss_10": 178.9514488220215, |
| "kl_loss_2": 2152.299346923828, |
| "kl_loss_3": 1671.0650329589844, |
| "kl_loss_7": 603.799105834961, |
| "learning_rate": 3.406798539427386e-05, |
| "loss": 1176.0018, |
| "step": 8830 |
| }, |
| { |
| "ce_loss_10": 3.5842846632003784, |
| "ce_loss_13": 3.510246682167053, |
| "ce_loss_2": 4.541045117378235, |
| "ce_loss_3": 4.27923276424408, |
| "ce_loss_7": 3.7581582188606264, |
| "epoch": 0.884, |
| "grad_norm": 576.0, |
| "kl_loss_10": 176.05337142944336, |
| "kl_loss_2": 2155.0933471679687, |
| "kl_loss_3": 1681.9300415039063, |
| "kl_loss_7": 595.8100616455079, |
| "learning_rate": 3.349467877915746e-05, |
| "loss": 1155.9855, |
| "step": 8840 |
| }, |
| { |
| "ce_loss_10": 3.5404534935951233, |
| "ce_loss_13": 3.4635657548904417, |
| "ce_loss_2": 4.524973630905151, |
| "ce_loss_3": 4.259057784080506, |
| "ce_loss_7": 3.7238620042800905, |
| "epoch": 0.885, |
| "grad_norm": 604.0, |
| "kl_loss_10": 178.34489822387695, |
| "kl_loss_2": 2212.789306640625, |
| "kl_loss_3": 1740.2167602539062, |
| "kl_loss_7": 609.8538391113282, |
| "learning_rate": 3.292606986744667e-05, |
| "loss": 1199.5514, |
| "step": 8850 |
| }, |
| { |
| "ce_loss_10": 3.4956598401069643, |
| "ce_loss_13": 3.4253405332565308, |
| "ce_loss_2": 4.470083999633789, |
| "ce_loss_3": 4.206580317020416, |
| "ce_loss_7": 3.6703789830207825, |
| "epoch": 0.886, |
| "grad_norm": 580.0, |
| "kl_loss_10": 174.30244827270508, |
| "kl_loss_2": 2159.222479248047, |
| "kl_loss_3": 1693.7933227539063, |
| "kl_loss_7": 599.4409484863281, |
| "learning_rate": 3.23621643850267e-05, |
| "loss": 1154.7352, |
| "step": 8860 |
| }, |
| { |
| "ce_loss_10": 3.5727248191833496, |
| "ce_loss_13": 3.496046614646912, |
| "ce_loss_2": 4.52955162525177, |
| "ce_loss_3": 4.25726010799408, |
| "ce_loss_7": 3.7465644001960756, |
| "epoch": 0.887, |
| "grad_norm": 552.0, |
| "kl_loss_10": 179.55787811279296, |
| "kl_loss_2": 2159.213214111328, |
| "kl_loss_3": 1673.28759765625, |
| "kl_loss_7": 605.2644119262695, |
| "learning_rate": 3.180296801041971e-05, |
| "loss": 1139.4904, |
| "step": 8870 |
| }, |
| { |
| "ce_loss_10": 3.595055866241455, |
| "ce_loss_13": 3.5219205260276794, |
| "ce_loss_2": 4.565229892730713, |
| "ce_loss_3": 4.302072286605835, |
| "ce_loss_7": 3.7657612919807435, |
| "epoch": 0.888, |
| "grad_norm": 488.0, |
| "kl_loss_10": 174.51052551269532, |
| "kl_loss_2": 2168.450274658203, |
| "kl_loss_3": 1696.4366394042968, |
| "kl_loss_7": 592.0915069580078, |
| "learning_rate": 3.124848637472688e-05, |
| "loss": 1132.4514, |
| "step": 8880 |
| }, |
| { |
| "ce_loss_10": 3.420267331600189, |
| "ce_loss_13": 3.346735382080078, |
| "ce_loss_2": 4.411080622673035, |
| "ce_loss_3": 4.143524849414826, |
| "ce_loss_7": 3.6017141342163086, |
| "epoch": 0.889, |
| "grad_norm": 600.0, |
| "kl_loss_10": 174.23086242675782, |
| "kl_loss_2": 2199.3413024902343, |
| "kl_loss_3": 1717.92958984375, |
| "kl_loss_7": 596.7423614501953, |
| "learning_rate": 3.069872506157212e-05, |
| "loss": 1155.8682, |
| "step": 8890 |
| }, |
| { |
| "ce_loss_10": 3.5183400988578795, |
| "ce_loss_13": 3.4446550846099853, |
| "ce_loss_2": 4.4763764381408695, |
| "ce_loss_3": 4.213694953918457, |
| "ce_loss_7": 3.693737256526947, |
| "epoch": 0.89, |
| "grad_norm": 544.0, |
| "kl_loss_10": 176.23825073242188, |
| "kl_loss_2": 2152.81796875, |
| "kl_loss_3": 1686.6082885742187, |
| "kl_loss_7": 599.2687866210938, |
| "learning_rate": 3.0153689607045842e-05, |
| "loss": 1144.8437, |
| "step": 8900 |
| }, |
| { |
| "ce_loss_10": 3.4148733854293822, |
| "ce_loss_13": 3.3367454648017882, |
| "ce_loss_2": 4.46618926525116, |
| "ce_loss_3": 4.190043389797211, |
| "ce_loss_7": 3.606413686275482, |
| "epoch": 0.891, |
| "grad_norm": 556.0, |
| "kl_loss_10": 181.56116943359376, |
| "kl_loss_2": 2316.6614379882812, |
| "kl_loss_3": 1823.7576782226563, |
| "kl_loss_7": 624.2273193359375, |
| "learning_rate": 2.9613385499648926e-05, |
| "loss": 1174.4811, |
| "step": 8910 |
| }, |
| { |
| "ce_loss_10": 3.472314703464508, |
| "ce_loss_13": 3.3953917384147645, |
| "ce_loss_2": 4.439359056949615, |
| "ce_loss_3": 4.1694392442703245, |
| "ce_loss_7": 3.6529108047485352, |
| "epoch": 0.892, |
| "grad_norm": 632.0, |
| "kl_loss_10": 176.6066520690918, |
| "kl_loss_2": 2142.8492736816406, |
| "kl_loss_3": 1665.7835510253906, |
| "kl_loss_7": 596.6286926269531, |
| "learning_rate": 2.9077818180237692e-05, |
| "loss": 1160.4215, |
| "step": 8920 |
| }, |
| { |
| "ce_loss_10": 3.5216124176979067, |
| "ce_loss_13": 3.444805955886841, |
| "ce_loss_2": 4.504645991325378, |
| "ce_loss_3": 4.23671303987503, |
| "ce_loss_7": 3.703485441207886, |
| "epoch": 0.893, |
| "grad_norm": 584.0, |
| "kl_loss_10": 176.44115447998047, |
| "kl_loss_2": 2174.382470703125, |
| "kl_loss_3": 1696.403564453125, |
| "kl_loss_7": 600.8687164306641, |
| "learning_rate": 2.8546993041969172e-05, |
| "loss": 1152.6479, |
| "step": 8930 |
| }, |
| { |
| "ce_loss_10": 3.5529621839523315, |
| "ce_loss_13": 3.4791373729705812, |
| "ce_loss_2": 4.487787294387817, |
| "ce_loss_3": 4.227098524570465, |
| "ce_loss_7": 3.7245055556297304, |
| "epoch": 0.894, |
| "grad_norm": 506.0, |
| "kl_loss_10": 174.32415542602538, |
| "kl_loss_2": 2110.938995361328, |
| "kl_loss_3": 1637.950372314453, |
| "kl_loss_7": 590.5497222900391, |
| "learning_rate": 2.802091543024671e-05, |
| "loss": 1153.5114, |
| "step": 8940 |
| }, |
| { |
| "ce_loss_10": 3.5515737652778627, |
| "ce_loss_13": 3.4770275354385376, |
| "ce_loss_2": 4.526445126533508, |
| "ce_loss_3": 4.267046928405762, |
| "ce_loss_7": 3.728025937080383, |
| "epoch": 0.895, |
| "grad_norm": 612.0, |
| "kl_loss_10": 177.65689697265626, |
| "kl_loss_2": 2195.3055725097656, |
| "kl_loss_3": 1727.4093139648437, |
| "kl_loss_7": 604.4041717529296, |
| "learning_rate": 2.7499590642665774e-05, |
| "loss": 1190.9908, |
| "step": 8950 |
| }, |
| { |
| "ce_loss_10": 3.5625943899154664, |
| "ce_loss_13": 3.4850521326065063, |
| "ce_loss_2": 4.553565168380738, |
| "ce_loss_3": 4.279750061035156, |
| "ce_loss_7": 3.739047312736511, |
| "epoch": 0.896, |
| "grad_norm": 512.0, |
| "kl_loss_10": 178.24014129638672, |
| "kl_loss_2": 2193.0329833984374, |
| "kl_loss_3": 1709.0227905273437, |
| "kl_loss_7": 602.5221313476562, |
| "learning_rate": 2.6983023928961405e-05, |
| "loss": 1147.626, |
| "step": 8960 |
| }, |
| { |
| "ce_loss_10": 3.532795751094818, |
| "ce_loss_13": 3.4568071961402893, |
| "ce_loss_2": 4.499462056159973, |
| "ce_loss_3": 4.242076885700226, |
| "ce_loss_7": 3.709202516078949, |
| "epoch": 0.897, |
| "grad_norm": 616.0, |
| "kl_loss_10": 177.7622848510742, |
| "kl_loss_2": 2147.4717651367187, |
| "kl_loss_3": 1687.235498046875, |
| "kl_loss_7": 597.6563537597656, |
| "learning_rate": 2.6471220490954628e-05, |
| "loss": 1172.1214, |
| "step": 8970 |
| }, |
| { |
| "ce_loss_10": 3.5174603939056395, |
| "ce_loss_13": 3.4463194727897646, |
| "ce_loss_2": 4.477302503585816, |
| "ce_loss_3": 4.214387357234955, |
| "ce_loss_7": 3.6873306155204775, |
| "epoch": 0.898, |
| "grad_norm": 592.0, |
| "kl_loss_10": 174.26932220458986, |
| "kl_loss_2": 2152.279821777344, |
| "kl_loss_3": 1683.4832763671875, |
| "kl_loss_7": 590.3468292236328, |
| "learning_rate": 2.596418548250029e-05, |
| "loss": 1156.035, |
| "step": 8980 |
| }, |
| { |
| "ce_loss_10": 3.5602595686912535, |
| "ce_loss_13": 3.485771131515503, |
| "ce_loss_2": 4.5211225032806395, |
| "ce_loss_3": 4.257622516155243, |
| "ce_loss_7": 3.7368709683418273, |
| "epoch": 0.899, |
| "grad_norm": 524.0, |
| "kl_loss_10": 179.2437530517578, |
| "kl_loss_2": 2158.524066162109, |
| "kl_loss_3": 1691.2223327636718, |
| "kl_loss_7": 601.7962463378906, |
| "learning_rate": 2.5461924009435368e-05, |
| "loss": 1142.976, |
| "step": 8990 |
| }, |
| { |
| "ce_loss_10": 3.5547463297843933, |
| "ce_loss_13": 3.479547905921936, |
| "ce_loss_2": 4.515283250808716, |
| "ce_loss_3": 4.250737547874451, |
| "ce_loss_7": 3.732912743091583, |
| "epoch": 0.9, |
| "grad_norm": 572.0, |
| "kl_loss_10": 177.4375427246094, |
| "kl_loss_2": 2139.4888916015625, |
| "kl_loss_3": 1664.3239318847657, |
| "kl_loss_7": 599.9839569091797, |
| "learning_rate": 2.4964441129527336e-05, |
| "loss": 1166.3201, |
| "step": 9000 |
| }, |
| { |
| "ce_loss_10": 3.553958511352539, |
| "ce_loss_13": 3.476763606071472, |
| "ce_loss_2": 4.496755647659302, |
| "ce_loss_3": 4.227215158939361, |
| "ce_loss_7": 3.7206253528594972, |
| "epoch": 0.901, |
| "grad_norm": 540.0, |
| "kl_loss_10": 174.2966407775879, |
| "kl_loss_2": 2111.5487548828123, |
| "kl_loss_3": 1639.27841796875, |
| "kl_loss_7": 584.4350708007812, |
| "learning_rate": 2.4471741852423235e-05, |
| "loss": 1125.0274, |
| "step": 9010 |
| }, |
| { |
| "ce_loss_10": 3.600440430641174, |
| "ce_loss_13": 3.522721529006958, |
| "ce_loss_2": 4.542108774185181, |
| "ce_loss_3": 4.282799339294433, |
| "ce_loss_7": 3.7742578268051146, |
| "epoch": 0.902, |
| "grad_norm": 524.0, |
| "kl_loss_10": 175.27555160522462, |
| "kl_loss_2": 2098.4271484375, |
| "kl_loss_3": 1637.176934814453, |
| "kl_loss_7": 587.0385147094727, |
| "learning_rate": 2.3983831139599287e-05, |
| "loss": 1139.7687, |
| "step": 9020 |
| }, |
| { |
| "ce_loss_10": 3.519875633716583, |
| "ce_loss_13": 3.446099603176117, |
| "ce_loss_2": 4.47457070350647, |
| "ce_loss_3": 4.212863862514496, |
| "ce_loss_7": 3.68597651720047, |
| "epoch": 0.903, |
| "grad_norm": 508.0, |
| "kl_loss_10": 174.20441055297852, |
| "kl_loss_2": 2129.7295166015624, |
| "kl_loss_3": 1661.6650817871093, |
| "kl_loss_7": 579.8323806762695, |
| "learning_rate": 2.3500713904311022e-05, |
| "loss": 1116.7298, |
| "step": 9030 |
| }, |
| { |
| "ce_loss_10": 3.5635103940963746, |
| "ce_loss_13": 3.4901776790618895, |
| "ce_loss_2": 4.492858815193176, |
| "ce_loss_3": 4.233377468585968, |
| "ce_loss_7": 3.7266834378242493, |
| "epoch": 0.904, |
| "grad_norm": 568.0, |
| "kl_loss_10": 172.6203300476074, |
| "kl_loss_2": 2067.9869079589844, |
| "kl_loss_3": 1612.4867309570313, |
| "kl_loss_7": 575.5206619262696, |
| "learning_rate": 2.3022395011543685e-05, |
| "loss": 1119.9885, |
| "step": 9040 |
| }, |
| { |
| "ce_loss_10": 3.592438757419586, |
| "ce_loss_13": 3.515104389190674, |
| "ce_loss_2": 4.541802954673767, |
| "ce_loss_3": 4.281132400035858, |
| "ce_loss_7": 3.7722238898277283, |
| "epoch": 0.905, |
| "grad_norm": 572.0, |
| "kl_loss_10": 180.47207794189453, |
| "kl_loss_2": 2144.9797241210936, |
| "kl_loss_3": 1672.9977722167969, |
| "kl_loss_7": 611.6667938232422, |
| "learning_rate": 2.2548879277963063e-05, |
| "loss": 1176.2332, |
| "step": 9050 |
| }, |
| { |
| "ce_loss_10": 3.5052724361419676, |
| "ce_loss_13": 3.43240772485733, |
| "ce_loss_2": 4.459889388084411, |
| "ce_loss_3": 4.187293374538422, |
| "ce_loss_7": 3.677665722370148, |
| "epoch": 0.906, |
| "grad_norm": 536.0, |
| "kl_loss_10": 175.61516647338868, |
| "kl_loss_2": 2136.0928955078125, |
| "kl_loss_3": 1652.1388549804688, |
| "kl_loss_7": 587.7709274291992, |
| "learning_rate": 2.208017147186736e-05, |
| "loss": 1112.7982, |
| "step": 9060 |
| }, |
| { |
| "ce_loss_10": 3.5033626675605776, |
| "ce_loss_13": 3.4270050883293153, |
| "ce_loss_2": 4.460723853111267, |
| "ce_loss_3": 4.200226056575775, |
| "ce_loss_7": 3.675853359699249, |
| "epoch": 0.907, |
| "grad_norm": 532.0, |
| "kl_loss_10": 175.80412521362305, |
| "kl_loss_2": 2135.014111328125, |
| "kl_loss_3": 1673.9324096679688, |
| "kl_loss_7": 594.3165740966797, |
| "learning_rate": 2.1616276313139227e-05, |
| "loss": 1130.9125, |
| "step": 9070 |
| }, |
| { |
| "ce_loss_10": 3.540289318561554, |
| "ce_loss_13": 3.4620949029922485, |
| "ce_loss_2": 4.504480719566345, |
| "ce_loss_3": 4.243532609939575, |
| "ce_loss_7": 3.7148184418678283, |
| "epoch": 0.908, |
| "grad_norm": 564.0, |
| "kl_loss_10": 176.60092849731444, |
| "kl_loss_2": 2138.388262939453, |
| "kl_loss_3": 1670.2328552246095, |
| "kl_loss_7": 593.4554962158203, |
| "learning_rate": 2.1157198473197415e-05, |
| "loss": 1155.7779, |
| "step": 9080 |
| }, |
| { |
| "ce_loss_10": 3.608424699306488, |
| "ce_loss_13": 3.5318522691726684, |
| "ce_loss_2": 4.569310665130615, |
| "ce_loss_3": 4.307307338714599, |
| "ce_loss_7": 3.7887983441352846, |
| "epoch": 0.909, |
| "grad_norm": 532.0, |
| "kl_loss_10": 179.34665451049804, |
| "kl_loss_2": 2147.910009765625, |
| "kl_loss_3": 1676.3675476074218, |
| "kl_loss_7": 609.1422882080078, |
| "learning_rate": 2.0702942574950812e-05, |
| "loss": 1150.5193, |
| "step": 9090 |
| }, |
| { |
| "ce_loss_10": 3.531160354614258, |
| "ce_loss_13": 3.4534537196159363, |
| "ce_loss_2": 4.502471184730529, |
| "ce_loss_3": 4.2366371870040895, |
| "ce_loss_7": 3.7111218690872194, |
| "epoch": 0.91, |
| "grad_norm": 576.0, |
| "kl_loss_10": 178.72406845092775, |
| "kl_loss_2": 2166.001983642578, |
| "kl_loss_3": 1694.5049133300781, |
| "kl_loss_7": 603.0742904663086, |
| "learning_rate": 2.025351319275137e-05, |
| "loss": 1154.2008, |
| "step": 9100 |
| }, |
| { |
| "ce_loss_10": 3.657759261131287, |
| "ce_loss_13": 3.5795228123664855, |
| "ce_loss_2": 4.611237382888794, |
| "ce_loss_3": 4.346826362609863, |
| "ce_loss_7": 3.829502213001251, |
| "epoch": 0.911, |
| "grad_norm": 568.0, |
| "kl_loss_10": 182.78317489624024, |
| "kl_loss_2": 2152.8356689453126, |
| "kl_loss_3": 1681.0592834472657, |
| "kl_loss_7": 612.198373413086, |
| "learning_rate": 1.9808914852347816e-05, |
| "loss": 1183.935, |
| "step": 9110 |
| }, |
| { |
| "ce_loss_10": 3.5076343536376955, |
| "ce_loss_13": 3.4301217675209044, |
| "ce_loss_2": 4.475468993186951, |
| "ce_loss_3": 4.200416827201844, |
| "ce_loss_7": 3.690832221508026, |
| "epoch": 0.912, |
| "grad_norm": 520.0, |
| "kl_loss_10": 177.99471740722657, |
| "kl_loss_2": 2138.6880798339844, |
| "kl_loss_3": 1648.8575317382813, |
| "kl_loss_7": 603.4748657226562, |
| "learning_rate": 1.9369152030840554e-05, |
| "loss": 1133.9587, |
| "step": 9120 |
| }, |
| { |
| "ce_loss_10": 3.5838579297065736, |
| "ce_loss_13": 3.5108367919921877, |
| "ce_loss_2": 4.5474550247192385, |
| "ce_loss_3": 4.282457900047302, |
| "ce_loss_7": 3.752550458908081, |
| "epoch": 0.913, |
| "grad_norm": 592.0, |
| "kl_loss_10": 175.99298171997071, |
| "kl_loss_2": 2175.4973083496093, |
| "kl_loss_3": 1708.1368530273437, |
| "kl_loss_7": 595.3381591796875, |
| "learning_rate": 1.893422915663645e-05, |
| "loss": 1154.3063, |
| "step": 9130 |
| }, |
| { |
| "ce_loss_10": 3.4526755094528196, |
| "ce_loss_13": 3.376223611831665, |
| "ce_loss_2": 4.463929057121277, |
| "ce_loss_3": 4.188671815395355, |
| "ce_loss_7": 3.6436040878295897, |
| "epoch": 0.914, |
| "grad_norm": 528.0, |
| "kl_loss_10": 178.96754608154296, |
| "kl_loss_2": 2226.8522216796873, |
| "kl_loss_3": 1741.089337158203, |
| "kl_loss_7": 614.4321655273437, |
| "learning_rate": 1.850415060940386e-05, |
| "loss": 1177.2793, |
| "step": 9140 |
| }, |
| { |
| "ce_loss_10": 3.577260196208954, |
| "ce_loss_13": 3.4996687054634092, |
| "ce_loss_2": 4.506159293651581, |
| "ce_loss_3": 4.247144281864166, |
| "ce_loss_7": 3.7475706696510316, |
| "epoch": 0.915, |
| "grad_norm": 576.0, |
| "kl_loss_10": 176.05650253295897, |
| "kl_loss_2": 2092.3647644042967, |
| "kl_loss_3": 1626.7418640136718, |
| "kl_loss_7": 590.4974914550781, |
| "learning_rate": 1.8078920720028978e-05, |
| "loss": 1136.8029, |
| "step": 9150 |
| }, |
| { |
| "ce_loss_10": 3.5006513595581055, |
| "ce_loss_13": 3.4293729782104494, |
| "ce_loss_2": 4.446831393241882, |
| "ce_loss_3": 4.180594873428345, |
| "ce_loss_7": 3.670674538612366, |
| "epoch": 0.916, |
| "grad_norm": 584.0, |
| "kl_loss_10": 173.59793243408203, |
| "kl_loss_2": 2105.566021728516, |
| "kl_loss_3": 1637.2901428222656, |
| "kl_loss_7": 585.1143585205078, |
| "learning_rate": 1.765854377057219e-05, |
| "loss": 1156.8438, |
| "step": 9160 |
| }, |
| { |
| "ce_loss_10": 3.4831743359565737, |
| "ce_loss_13": 3.410732936859131, |
| "ce_loss_2": 4.439985752105713, |
| "ce_loss_3": 4.173957622051239, |
| "ce_loss_7": 3.652401328086853, |
| "epoch": 0.917, |
| "grad_norm": 552.0, |
| "kl_loss_10": 172.13598022460937, |
| "kl_loss_2": 2136.6128540039062, |
| "kl_loss_3": 1663.3346557617188, |
| "kl_loss_7": 585.5476837158203, |
| "learning_rate": 1.724302399422456e-05, |
| "loss": 1148.3008, |
| "step": 9170 |
| }, |
| { |
| "ce_loss_10": 3.4418306827545164, |
| "ce_loss_13": 3.365187871456146, |
| "ce_loss_2": 4.424201607704163, |
| "ce_loss_3": 4.15188490152359, |
| "ce_loss_7": 3.617412793636322, |
| "epoch": 0.918, |
| "grad_norm": 540.0, |
| "kl_loss_10": 181.21381607055665, |
| "kl_loss_2": 2192.6884948730467, |
| "kl_loss_3": 1711.2826416015625, |
| "kl_loss_7": 608.8610565185547, |
| "learning_rate": 1.683236557526574e-05, |
| "loss": 1171.7086, |
| "step": 9180 |
| }, |
| { |
| "ce_loss_10": 3.5525336861610413, |
| "ce_loss_13": 3.479281461238861, |
| "ce_loss_2": 4.4706899404525755, |
| "ce_loss_3": 4.209463405609131, |
| "ce_loss_7": 3.7199216723442077, |
| "epoch": 0.919, |
| "grad_norm": 552.0, |
| "kl_loss_10": 172.36394195556642, |
| "kl_loss_2": 2051.388214111328, |
| "kl_loss_3": 1592.7936218261718, |
| "kl_loss_7": 577.4165252685547, |
| "learning_rate": 1.6426572649021475e-05, |
| "loss": 1138.484, |
| "step": 9190 |
| }, |
| { |
| "ce_loss_10": 3.5873886704444886, |
| "ce_loss_13": 3.515676808357239, |
| "ce_loss_2": 4.504871940612793, |
| "ce_loss_3": 4.245928645133972, |
| "ce_loss_7": 3.7530444860458374, |
| "epoch": 0.92, |
| "grad_norm": 560.0, |
| "kl_loss_10": 175.5730728149414, |
| "kl_loss_2": 2067.4602783203127, |
| "kl_loss_3": 1611.9698425292968, |
| "kl_loss_7": 583.8149566650391, |
| "learning_rate": 1.6025649301821876e-05, |
| "loss": 1125.4826, |
| "step": 9200 |
| }, |
| { |
| "ce_loss_10": 3.579323208332062, |
| "ce_loss_13": 3.50703387260437, |
| "ce_loss_2": 4.493245768547058, |
| "ce_loss_3": 4.232775616645813, |
| "ce_loss_7": 3.7471976399421694, |
| "epoch": 0.921, |
| "grad_norm": 620.0, |
| "kl_loss_10": 177.73348236083984, |
| "kl_loss_2": 2084.493505859375, |
| "kl_loss_3": 1628.828369140625, |
| "kl_loss_7": 594.5904907226562, |
| "learning_rate": 1.5629599570960716e-05, |
| "loss": 1123.5703, |
| "step": 9210 |
| }, |
| { |
| "ce_loss_10": 3.482688879966736, |
| "ce_loss_13": 3.4090826153755187, |
| "ce_loss_2": 4.466846561431884, |
| "ce_loss_3": 4.195373678207398, |
| "ce_loss_7": 3.658596193790436, |
| "epoch": 0.922, |
| "grad_norm": 588.0, |
| "kl_loss_10": 175.80986251831055, |
| "kl_loss_2": 2196.6754943847654, |
| "kl_loss_3": 1711.6648010253907, |
| "kl_loss_7": 598.8134338378907, |
| "learning_rate": 1.5238427444654367e-05, |
| "loss": 1155.2326, |
| "step": 9220 |
| }, |
| { |
| "ce_loss_10": 3.543232810497284, |
| "ce_loss_13": 3.467606770992279, |
| "ce_loss_2": 4.491106653213501, |
| "ce_loss_3": 4.219079720973968, |
| "ce_loss_7": 3.710583233833313, |
| "epoch": 0.923, |
| "grad_norm": 548.0, |
| "kl_loss_10": 174.20623474121095, |
| "kl_loss_2": 2120.3849365234373, |
| "kl_loss_3": 1639.1658142089843, |
| "kl_loss_7": 584.3361358642578, |
| "learning_rate": 1.4852136862001764e-05, |
| "loss": 1130.8816, |
| "step": 9230 |
| }, |
| { |
| "ce_loss_10": 3.5088143348693848, |
| "ce_loss_13": 3.435594344139099, |
| "ce_loss_2": 4.446617817878723, |
| "ce_loss_3": 4.185671401023865, |
| "ce_loss_7": 3.681156051158905, |
| "epoch": 0.924, |
| "grad_norm": 584.0, |
| "kl_loss_10": 172.72551879882812, |
| "kl_loss_2": 2097.9775573730467, |
| "kl_loss_3": 1637.147705078125, |
| "kl_loss_7": 588.3473190307617, |
| "learning_rate": 1.4470731712944884e-05, |
| "loss": 1146.0963, |
| "step": 9240 |
| }, |
| { |
| "ce_loss_10": 3.5362769246101378, |
| "ce_loss_13": 3.461189365386963, |
| "ce_loss_2": 4.501055717468262, |
| "ce_loss_3": 4.224046432971955, |
| "ce_loss_7": 3.714902651309967, |
| "epoch": 0.925, |
| "grad_norm": 548.0, |
| "kl_loss_10": 178.0212043762207, |
| "kl_loss_2": 2145.880969238281, |
| "kl_loss_3": 1660.7241577148438, |
| "kl_loss_7": 597.9191680908203, |
| "learning_rate": 1.4094215838229174e-05, |
| "loss": 1173.0712, |
| "step": 9250 |
| }, |
| { |
| "ce_loss_10": 3.498918581008911, |
| "ce_loss_13": 3.4251022219657896, |
| "ce_loss_2": 4.4930708646774296, |
| "ce_loss_3": 4.217134141921997, |
| "ce_loss_7": 3.675217390060425, |
| "epoch": 0.926, |
| "grad_norm": 628.0, |
| "kl_loss_10": 177.476513671875, |
| "kl_loss_2": 2207.0631896972654, |
| "kl_loss_3": 1717.4895629882812, |
| "kl_loss_7": 603.3580856323242, |
| "learning_rate": 1.372259302936546e-05, |
| "loss": 1205.574, |
| "step": 9260 |
| }, |
| { |
| "ce_loss_10": 3.6163718700408936, |
| "ce_loss_13": 3.537315881252289, |
| "ce_loss_2": 4.571975326538086, |
| "ce_loss_3": 4.302231848239899, |
| "ce_loss_7": 3.783820962905884, |
| "epoch": 0.927, |
| "grad_norm": 576.0, |
| "kl_loss_10": 181.85763092041014, |
| "kl_loss_2": 2140.0186767578125, |
| "kl_loss_3": 1662.9603210449218, |
| "kl_loss_7": 600.1376983642579, |
| "learning_rate": 1.3355867028591206e-05, |
| "loss": 1136.2721, |
| "step": 9270 |
| }, |
| { |
| "ce_loss_10": 3.514796030521393, |
| "ce_loss_13": 3.440743112564087, |
| "ce_loss_2": 4.449939727783203, |
| "ce_loss_3": 4.1810842752456665, |
| "ce_loss_7": 3.683949875831604, |
| "epoch": 0.928, |
| "grad_norm": 564.0, |
| "kl_loss_10": 175.13030853271485, |
| "kl_loss_2": 2107.975048828125, |
| "kl_loss_3": 1633.0731262207032, |
| "kl_loss_7": 589.7343017578125, |
| "learning_rate": 1.2994041528833267e-05, |
| "loss": 1127.6617, |
| "step": 9280 |
| }, |
| { |
| "ce_loss_10": 3.5171499490737914, |
| "ce_loss_13": 3.440678071975708, |
| "ce_loss_2": 4.470655179023742, |
| "ce_loss_3": 4.200739192962646, |
| "ce_loss_7": 3.690466821193695, |
| "epoch": 0.929, |
| "grad_norm": 584.0, |
| "kl_loss_10": 174.82103958129883, |
| "kl_loss_2": 2150.8757751464846, |
| "kl_loss_3": 1677.1400146484375, |
| "kl_loss_7": 592.8601806640625, |
| "learning_rate": 1.2637120173670358e-05, |
| "loss": 1145.5388, |
| "step": 9290 |
| }, |
| { |
| "ce_loss_10": 3.5360547065734864, |
| "ce_loss_13": 3.459415102005005, |
| "ce_loss_2": 4.509140729904175, |
| "ce_loss_3": 4.243091595172882, |
| "ce_loss_7": 3.717022383213043, |
| "epoch": 0.93, |
| "grad_norm": 616.0, |
| "kl_loss_10": 177.1988067626953, |
| "kl_loss_2": 2160.6812377929687, |
| "kl_loss_3": 1681.2325256347656, |
| "kl_loss_7": 601.0645294189453, |
| "learning_rate": 1.2285106557296478e-05, |
| "loss": 1155.5311, |
| "step": 9300 |
| }, |
| { |
| "ce_loss_10": 3.4133058071136473, |
| "ce_loss_13": 3.340100085735321, |
| "ce_loss_2": 4.45502986907959, |
| "ce_loss_3": 4.176646625995636, |
| "ce_loss_7": 3.593162167072296, |
| "epoch": 0.931, |
| "grad_norm": 684.0, |
| "kl_loss_10": 177.049959564209, |
| "kl_loss_2": 2280.7558776855467, |
| "kl_loss_3": 1784.5312927246093, |
| "kl_loss_7": 606.032177734375, |
| "learning_rate": 1.1938004224484989e-05, |
| "loss": 1177.6771, |
| "step": 9310 |
| }, |
| { |
| "ce_loss_10": 3.6532346606254578, |
| "ce_loss_13": 3.5762033224105836, |
| "ce_loss_2": 4.59397509098053, |
| "ce_loss_3": 4.32731124162674, |
| "ce_loss_7": 3.8223699569702148, |
| "epoch": 0.932, |
| "grad_norm": 552.0, |
| "kl_loss_10": 179.57099151611328, |
| "kl_loss_2": 2131.067413330078, |
| "kl_loss_3": 1656.3818481445312, |
| "kl_loss_7": 601.6865631103516, |
| "learning_rate": 1.1595816670552429e-05, |
| "loss": 1167.2541, |
| "step": 9320 |
| }, |
| { |
| "ce_loss_10": 3.582180309295654, |
| "ce_loss_13": 3.5061400294303895, |
| "ce_loss_2": 4.524780786037445, |
| "ce_loss_3": 4.259365129470825, |
| "ce_loss_7": 3.747998225688934, |
| "epoch": 0.933, |
| "grad_norm": 568.0, |
| "kl_loss_10": 175.25698852539062, |
| "kl_loss_2": 2109.6074279785157, |
| "kl_loss_3": 1638.4892211914062, |
| "kl_loss_7": 583.7247375488281, |
| "learning_rate": 1.1258547341323699e-05, |
| "loss": 1126.3885, |
| "step": 9330 |
| }, |
| { |
| "ce_loss_10": 3.6068438053131104, |
| "ce_loss_13": 3.5321076273918153, |
| "ce_loss_2": 4.546216082572937, |
| "ce_loss_3": 4.2822174549102785, |
| "ce_loss_7": 3.7781530022621155, |
| "epoch": 0.934, |
| "grad_norm": 584.0, |
| "kl_loss_10": 177.80956649780273, |
| "kl_loss_2": 2142.086737060547, |
| "kl_loss_3": 1668.28828125, |
| "kl_loss_7": 595.8422210693359, |
| "learning_rate": 1.0926199633097156e-05, |
| "loss": 1139.4527, |
| "step": 9340 |
| }, |
| { |
| "ce_loss_10": 3.610630822181702, |
| "ce_loss_13": 3.540286922454834, |
| "ce_loss_2": 4.522486686706543, |
| "ce_loss_3": 4.263027024269104, |
| "ce_loss_7": 3.7751463413238526, |
| "epoch": 0.935, |
| "grad_norm": 568.0, |
| "kl_loss_10": 172.0374610900879, |
| "kl_loss_2": 2078.3720947265624, |
| "kl_loss_3": 1611.571875, |
| "kl_loss_7": 582.5087860107421, |
| "learning_rate": 1.0598776892610684e-05, |
| "loss": 1147.9141, |
| "step": 9350 |
| }, |
| { |
| "ce_loss_10": 3.4225520491600037, |
| "ce_loss_13": 3.350025403499603, |
| "ce_loss_2": 4.415711855888366, |
| "ce_loss_3": 4.137125706672668, |
| "ce_loss_7": 3.5997640252113343, |
| "epoch": 0.936, |
| "grad_norm": 552.0, |
| "kl_loss_10": 173.3252960205078, |
| "kl_loss_2": 2195.4003051757813, |
| "kl_loss_3": 1704.4373046875, |
| "kl_loss_7": 593.9061553955078, |
| "learning_rate": 1.0276282417007399e-05, |
| "loss": 1147.3932, |
| "step": 9360 |
| }, |
| { |
| "ce_loss_10": 3.581203269958496, |
| "ce_loss_13": 3.5097854137420654, |
| "ce_loss_2": 4.501006007194519, |
| "ce_loss_3": 4.243209981918335, |
| "ce_loss_7": 3.747337484359741, |
| "epoch": 0.937, |
| "grad_norm": 596.0, |
| "kl_loss_10": 171.74332122802736, |
| "kl_loss_2": 2068.851727294922, |
| "kl_loss_3": 1615.05986328125, |
| "kl_loss_7": 580.8333526611328, |
| "learning_rate": 9.958719453803277e-06, |
| "loss": 1127.7196, |
| "step": 9370 |
| }, |
| { |
| "ce_loss_10": 3.578359854221344, |
| "ce_loss_13": 3.504493975639343, |
| "ce_loss_2": 4.52911410331726, |
| "ce_loss_3": 4.265734839439392, |
| "ce_loss_7": 3.757120943069458, |
| "epoch": 0.938, |
| "grad_norm": 568.0, |
| "kl_loss_10": 176.9942184448242, |
| "kl_loss_2": 2134.3438110351562, |
| "kl_loss_3": 1664.9087463378905, |
| "kl_loss_7": 601.7982177734375, |
| "learning_rate": 9.646091200853802e-06, |
| "loss": 1132.6439, |
| "step": 9380 |
| }, |
| { |
| "ce_loss_10": 3.5366848587989805, |
| "ce_loss_13": 3.4644816398620604, |
| "ce_loss_2": 4.483027625083923, |
| "ce_loss_3": 4.215219330787659, |
| "ce_loss_7": 3.704088735580444, |
| "epoch": 0.939, |
| "grad_norm": 536.0, |
| "kl_loss_10": 172.74244918823243, |
| "kl_loss_2": 2099.378448486328, |
| "kl_loss_3": 1622.1510986328126, |
| "kl_loss_7": 583.5708526611328, |
| "learning_rate": 9.338400806321978e-06, |
| "loss": 1100.1545, |
| "step": 9390 |
| }, |
| { |
| "ce_loss_10": 3.571504032611847, |
| "ce_loss_13": 3.493305134773254, |
| "ce_loss_2": 4.510696125030518, |
| "ce_loss_3": 4.24888288974762, |
| "ce_loss_7": 3.742924678325653, |
| "epoch": 0.94, |
| "grad_norm": 516.0, |
| "kl_loss_10": 177.7894515991211, |
| "kl_loss_2": 2105.6502197265627, |
| "kl_loss_3": 1641.3450012207031, |
| "kl_loss_7": 590.5869018554688, |
| "learning_rate": 9.035651368646646e-06, |
| "loss": 1131.6762, |
| "step": 9400 |
| }, |
| { |
| "ce_loss_10": 3.572381889820099, |
| "ce_loss_13": 3.5001948475837708, |
| "ce_loss_2": 4.5045966625213625, |
| "ce_loss_3": 4.241596531867981, |
| "ce_loss_7": 3.742561626434326, |
| "epoch": 0.941, |
| "grad_norm": 612.0, |
| "kl_loss_10": 173.46388778686523, |
| "kl_loss_2": 2088.4161376953125, |
| "kl_loss_3": 1622.1473693847656, |
| "kl_loss_7": 583.22626953125, |
| "learning_rate": 8.737845936511335e-06, |
| "loss": 1133.2381, |
| "step": 9410 |
| }, |
| { |
| "ce_loss_10": 3.522591459751129, |
| "ce_loss_13": 3.446700024604797, |
| "ce_loss_2": 4.507854294776917, |
| "ce_loss_3": 4.236737239360809, |
| "ce_loss_7": 3.698591649532318, |
| "epoch": 0.942, |
| "grad_norm": 572.0, |
| "kl_loss_10": 178.92413635253905, |
| "kl_loss_2": 2186.715954589844, |
| "kl_loss_3": 1705.135888671875, |
| "kl_loss_7": 600.7923004150391, |
| "learning_rate": 8.444987508813451e-06, |
| "loss": 1149.7434, |
| "step": 9420 |
| }, |
| { |
| "ce_loss_10": 3.475817048549652, |
| "ce_loss_13": 3.3989389657974245, |
| "ce_loss_2": 4.473474383354187, |
| "ce_loss_3": 4.199087584018708, |
| "ce_loss_7": 3.6562391996383665, |
| "epoch": 0.943, |
| "grad_norm": 628.0, |
| "kl_loss_10": 179.55375061035156, |
| "kl_loss_2": 2263.5999450683594, |
| "kl_loss_3": 1768.8511352539062, |
| "kl_loss_7": 615.3503479003906, |
| "learning_rate": 8.157079034633974e-06, |
| "loss": 1178.9379, |
| "step": 9430 |
| }, |
| { |
| "ce_loss_10": 3.473416876792908, |
| "ce_loss_13": 3.40051189661026, |
| "ce_loss_2": 4.446794199943542, |
| "ce_loss_3": 4.178017342090607, |
| "ce_loss_7": 3.6506085276603697, |
| "epoch": 0.944, |
| "grad_norm": 552.0, |
| "kl_loss_10": 174.8175079345703, |
| "kl_loss_2": 2187.4151916503906, |
| "kl_loss_3": 1711.5777709960937, |
| "kl_loss_7": 600.643310546875, |
| "learning_rate": 7.874123413208145e-06, |
| "loss": 1147.171, |
| "step": 9440 |
| }, |
| { |
| "ce_loss_10": 3.445332610607147, |
| "ce_loss_13": 3.369913935661316, |
| "ce_loss_2": 4.434185910224914, |
| "ce_loss_3": 4.161804282665253, |
| "ce_loss_7": 3.6255853891372682, |
| "epoch": 0.945, |
| "grad_norm": 572.0, |
| "kl_loss_10": 175.52419662475586, |
| "kl_loss_2": 2184.3239013671873, |
| "kl_loss_3": 1698.89501953125, |
| "kl_loss_7": 598.6286346435547, |
| "learning_rate": 7.59612349389599e-06, |
| "loss": 1155.226, |
| "step": 9450 |
| }, |
| { |
| "ce_loss_10": 3.534553039073944, |
| "ce_loss_13": 3.462403440475464, |
| "ce_loss_2": 4.465369653701782, |
| "ce_loss_3": 4.198646211624146, |
| "ce_loss_7": 3.708206284046173, |
| "epoch": 0.946, |
| "grad_norm": 580.0, |
| "kl_loss_10": 172.39571685791014, |
| "kl_loss_2": 2074.147509765625, |
| "kl_loss_3": 1600.0551879882812, |
| "kl_loss_7": 581.5289535522461, |
| "learning_rate": 7.323082076153509e-06, |
| "loss": 1126.5964, |
| "step": 9460 |
| }, |
| { |
| "ce_loss_10": 3.576580452919006, |
| "ce_loss_13": 3.503724229335785, |
| "ce_loss_2": 4.510397839546203, |
| "ce_loss_3": 4.244443106651306, |
| "ce_loss_7": 3.7473479986190794, |
| "epoch": 0.947, |
| "grad_norm": 572.0, |
| "kl_loss_10": 179.0567657470703, |
| "kl_loss_2": 2087.8162841796875, |
| "kl_loss_3": 1617.9851928710937, |
| "kl_loss_7": 593.7520263671875, |
| "learning_rate": 7.055001909504755e-06, |
| "loss": 1153.5377, |
| "step": 9470 |
| }, |
| { |
| "ce_loss_10": 3.6078381657600405, |
| "ce_loss_13": 3.5327057957649233, |
| "ce_loss_2": 4.550836896896362, |
| "ce_loss_3": 4.287693047523499, |
| "ce_loss_7": 3.7842918038368225, |
| "epoch": 0.948, |
| "grad_norm": 616.0, |
| "kl_loss_10": 177.3563217163086, |
| "kl_loss_2": 2119.7028747558593, |
| "kl_loss_3": 1649.5453063964844, |
| "kl_loss_7": 593.8896911621093, |
| "learning_rate": 6.791885693514133e-06, |
| "loss": 1138.076, |
| "step": 9480 |
| }, |
| { |
| "ce_loss_10": 3.5228418946266173, |
| "ce_loss_13": 3.4471074819564818, |
| "ce_loss_2": 4.505965852737427, |
| "ce_loss_3": 4.226520001888275, |
| "ce_loss_7": 3.6976125478744506, |
| "epoch": 0.949, |
| "grad_norm": 544.0, |
| "kl_loss_10": 179.4360237121582, |
| "kl_loss_2": 2204.2863159179688, |
| "kl_loss_3": 1709.0836547851563, |
| "kl_loss_7": 603.0492492675781, |
| "learning_rate": 6.533736077758867e-06, |
| "loss": 1164.073, |
| "step": 9490 |
| }, |
| { |
| "ce_loss_10": 3.480616366863251, |
| "ce_loss_13": 3.4073901891708376, |
| "ce_loss_2": 4.4987491250038145, |
| "ce_loss_3": 4.22996586561203, |
| "ce_loss_7": 3.6651357769966126, |
| "epoch": 0.95, |
| "grad_norm": 596.0, |
| "kl_loss_10": 179.7611167907715, |
| "kl_loss_2": 2254.7767639160156, |
| "kl_loss_3": 1772.1949829101563, |
| "kl_loss_7": 613.6569396972657, |
| "learning_rate": 6.2805556618028556e-06, |
| "loss": 1174.4523, |
| "step": 9500 |
| }, |
| { |
| "ce_loss_10": 3.5753507733345034, |
| "ce_loss_13": 3.5015788078308105, |
| "ce_loss_2": 4.508717775344849, |
| "ce_loss_3": 4.236187517642975, |
| "ce_loss_7": 3.735841393470764, |
| "epoch": 0.951, |
| "grad_norm": 600.0, |
| "kl_loss_10": 171.3624740600586, |
| "kl_loss_2": 2070.820721435547, |
| "kl_loss_3": 1594.0807983398438, |
| "kl_loss_7": 569.2584503173828, |
| "learning_rate": 6.032346995169968e-06, |
| "loss": 1091.2504, |
| "step": 9510 |
| }, |
| { |
| "ce_loss_10": 3.5802130341529845, |
| "ce_loss_13": 3.505545949935913, |
| "ce_loss_2": 4.52126247882843, |
| "ce_loss_3": 4.2531510353088375, |
| "ce_loss_7": 3.7508664727211, |
| "epoch": 0.952, |
| "grad_norm": 572.0, |
| "kl_loss_10": 175.58048248291016, |
| "kl_loss_2": 2115.77294921875, |
| "kl_loss_3": 1640.845928955078, |
| "kl_loss_7": 590.4191436767578, |
| "learning_rate": 5.789112577318789e-06, |
| "loss": 1125.406, |
| "step": 9520 |
| }, |
| { |
| "ce_loss_10": 3.5525818467140198, |
| "ce_loss_13": 3.474942719936371, |
| "ce_loss_2": 4.529764556884766, |
| "ce_loss_3": 4.262361979484558, |
| "ce_loss_7": 3.725843298435211, |
| "epoch": 0.953, |
| "grad_norm": 560.0, |
| "kl_loss_10": 178.63978881835936, |
| "kl_loss_2": 2187.347344970703, |
| "kl_loss_3": 1713.5655639648437, |
| "kl_loss_7": 604.028662109375, |
| "learning_rate": 5.550854857617194e-06, |
| "loss": 1138.3246, |
| "step": 9530 |
| }, |
| { |
| "ce_loss_10": 3.5410927653312685, |
| "ce_loss_13": 3.464462494850159, |
| "ce_loss_2": 4.532546710968018, |
| "ce_loss_3": 4.2611222743988035, |
| "ce_loss_7": 3.717101490497589, |
| "epoch": 0.954, |
| "grad_norm": 596.0, |
| "kl_loss_10": 179.71324462890624, |
| "kl_loss_2": 2218.49970703125, |
| "kl_loss_3": 1729.5723754882813, |
| "kl_loss_7": 606.5572113037109, |
| "learning_rate": 5.317576235317756e-06, |
| "loss": 1164.7152, |
| "step": 9540 |
| }, |
| { |
| "ce_loss_10": 3.567497718334198, |
| "ce_loss_13": 3.4959131717681884, |
| "ce_loss_2": 4.49013991355896, |
| "ce_loss_3": 4.226358330249786, |
| "ce_loss_7": 3.7339030742645263, |
| "epoch": 0.955, |
| "grad_norm": 580.0, |
| "kl_loss_10": 171.57061843872071, |
| "kl_loss_2": 2045.03349609375, |
| "kl_loss_3": 1581.6937194824218, |
| "kl_loss_7": 573.2978149414063, |
| "learning_rate": 5.089279059533658e-06, |
| "loss": 1144.5578, |
| "step": 9550 |
| }, |
| { |
| "ce_loss_10": 3.6264307737350463, |
| "ce_loss_13": 3.549366092681885, |
| "ce_loss_2": 4.555849361419678, |
| "ce_loss_3": 4.291936588287354, |
| "ce_loss_7": 3.796077787876129, |
| "epoch": 0.956, |
| "grad_norm": 532.0, |
| "kl_loss_10": 180.97408142089844, |
| "kl_loss_2": 2101.3386962890627, |
| "kl_loss_3": 1636.1171264648438, |
| "kl_loss_7": 603.2738311767578, |
| "learning_rate": 4.865965629214819e-06, |
| "loss": 1128.9252, |
| "step": 9560 |
| }, |
| { |
| "ce_loss_10": 3.5740526914596558, |
| "ce_loss_13": 3.4976862549781798, |
| "ce_loss_2": 4.531564974784851, |
| "ce_loss_3": 4.273219418525696, |
| "ce_loss_7": 3.7471871614456176, |
| "epoch": 0.957, |
| "grad_norm": 496.0, |
| "kl_loss_10": 178.81691131591796, |
| "kl_loss_2": 2162.70166015625, |
| "kl_loss_3": 1696.7497314453126, |
| "kl_loss_7": 603.7053924560547, |
| "learning_rate": 4.6476381931251366e-06, |
| "loss": 1126.9263, |
| "step": 9570 |
| }, |
| { |
| "ce_loss_10": 3.5494153618812563, |
| "ce_loss_13": 3.475415658950806, |
| "ce_loss_2": 4.496997284889221, |
| "ce_loss_3": 4.230398142337799, |
| "ce_loss_7": 3.728412318229675, |
| "epoch": 0.958, |
| "grad_norm": 496.0, |
| "kl_loss_10": 176.03445205688476, |
| "kl_loss_2": 2117.532794189453, |
| "kl_loss_3": 1643.0480895996093, |
| "kl_loss_7": 594.0362182617188, |
| "learning_rate": 4.434298949819449e-06, |
| "loss": 1135.89, |
| "step": 9580 |
| }, |
| { |
| "ce_loss_10": 3.5075241327285767, |
| "ce_loss_13": 3.4301467418670653, |
| "ce_loss_2": 4.514026093482971, |
| "ce_loss_3": 4.236743009090423, |
| "ce_loss_7": 3.6883852958679197, |
| "epoch": 0.959, |
| "grad_norm": 584.0, |
| "kl_loss_10": 182.3026496887207, |
| "kl_loss_2": 2271.594354248047, |
| "kl_loss_3": 1773.125311279297, |
| "kl_loss_7": 624.3575988769531, |
| "learning_rate": 4.2259500476214406e-06, |
| "loss": 1183.4918, |
| "step": 9590 |
| }, |
| { |
| "ce_loss_10": 3.491668391227722, |
| "ce_loss_13": 3.4157654523849486, |
| "ce_loss_2": 4.465724205970764, |
| "ce_loss_3": 4.201405656337738, |
| "ce_loss_7": 3.665540862083435, |
| "epoch": 0.96, |
| "grad_norm": 556.0, |
| "kl_loss_10": 177.03348541259766, |
| "kl_loss_2": 2184.085583496094, |
| "kl_loss_3": 1717.5306762695313, |
| "kl_loss_7": 602.626220703125, |
| "learning_rate": 4.02259358460233e-06, |
| "loss": 1148.7472, |
| "step": 9600 |
| }, |
| { |
| "ce_loss_10": 3.5573193550109865, |
| "ce_loss_13": 3.4829213500022886, |
| "ce_loss_2": 4.506733560562134, |
| "ce_loss_3": 4.238800776004791, |
| "ce_loss_7": 3.7266565203666686, |
| "epoch": 0.961, |
| "grad_norm": 580.0, |
| "kl_loss_10": 176.3166290283203, |
| "kl_loss_2": 2114.0912475585938, |
| "kl_loss_3": 1637.3499328613282, |
| "kl_loss_7": 588.5086486816406, |
| "learning_rate": 3.8242316085594916e-06, |
| "loss": 1126.8451, |
| "step": 9610 |
| }, |
| { |
| "ce_loss_10": 3.447552573680878, |
| "ce_loss_13": 3.369247031211853, |
| "ce_loss_2": 4.473224306106568, |
| "ce_loss_3": 4.197393763065338, |
| "ce_loss_7": 3.6313952803611755, |
| "epoch": 0.962, |
| "grad_norm": 556.0, |
| "kl_loss_10": 180.42641220092773, |
| "kl_loss_2": 2280.7411865234376, |
| "kl_loss_3": 1787.7846984863281, |
| "kl_loss_7": 615.477572631836, |
| "learning_rate": 3.630866116995757e-06, |
| "loss": 1194.5547, |
| "step": 9620 |
| }, |
| { |
| "ce_loss_10": 3.5979113578796387, |
| "ce_loss_13": 3.5257344841957092, |
| "ce_loss_2": 4.537094449996948, |
| "ce_loss_3": 4.265922880172729, |
| "ce_loss_7": 3.7622151970863342, |
| "epoch": 0.963, |
| "grad_norm": 572.0, |
| "kl_loss_10": 174.69513320922852, |
| "kl_loss_2": 2105.5745544433594, |
| "kl_loss_3": 1622.0366455078124, |
| "kl_loss_7": 578.8991943359375, |
| "learning_rate": 3.4424990570994797e-06, |
| "loss": 1156.3669, |
| "step": 9630 |
| }, |
| { |
| "ce_loss_10": 3.585505282878876, |
| "ce_loss_13": 3.5114797711372376, |
| "ce_loss_2": 4.518644833564759, |
| "ce_loss_3": 4.256495106220245, |
| "ce_loss_7": 3.7576936960220335, |
| "epoch": 0.964, |
| "grad_norm": 482.0, |
| "kl_loss_10": 175.59745712280272, |
| "kl_loss_2": 2103.631524658203, |
| "kl_loss_3": 1631.6069641113281, |
| "kl_loss_7": 588.9240661621094, |
| "learning_rate": 3.2591323257248896e-06, |
| "loss": 1134.1978, |
| "step": 9640 |
| }, |
| { |
| "ce_loss_10": 3.437925660610199, |
| "ce_loss_13": 3.3662607192993166, |
| "ce_loss_2": 4.409651112556458, |
| "ce_loss_3": 4.150672721862793, |
| "ce_loss_7": 3.6122069478034975, |
| "epoch": 0.965, |
| "grad_norm": 556.0, |
| "kl_loss_10": 174.7218978881836, |
| "kl_loss_2": 2173.464489746094, |
| "kl_loss_3": 1704.9824951171875, |
| "kl_loss_7": 600.8370666503906, |
| "learning_rate": 3.0807677693729385e-06, |
| "loss": 1163.455, |
| "step": 9650 |
| }, |
| { |
| "ce_loss_10": 3.623323905467987, |
| "ce_loss_13": 3.55154949426651, |
| "ce_loss_2": 4.551669549942017, |
| "ce_loss_3": 4.290165424346924, |
| "ce_loss_7": 3.794328248500824, |
| "epoch": 0.966, |
| "grad_norm": 544.0, |
| "kl_loss_10": 174.09824600219727, |
| "kl_loss_2": 2080.361853027344, |
| "kl_loss_3": 1626.3007873535157, |
| "kl_loss_7": 584.5892837524414, |
| "learning_rate": 2.9074071841727055e-06, |
| "loss": 1115.8137, |
| "step": 9660 |
| }, |
| { |
| "ce_loss_10": 3.548972153663635, |
| "ce_loss_13": 3.4729049801826477, |
| "ce_loss_2": 4.494955968856812, |
| "ce_loss_3": 4.230167889595032, |
| "ce_loss_7": 3.730636739730835, |
| "epoch": 0.967, |
| "grad_norm": 632.0, |
| "kl_loss_10": 177.06267852783202, |
| "kl_loss_2": 2105.5685302734373, |
| "kl_loss_3": 1641.2366760253906, |
| "kl_loss_7": 599.4572601318359, |
| "learning_rate": 2.739052315863355e-06, |
| "loss": 1112.1609, |
| "step": 9670 |
| }, |
| { |
| "ce_loss_10": 3.5363902688026427, |
| "ce_loss_13": 3.4610472440719606, |
| "ce_loss_2": 4.502471828460694, |
| "ce_loss_3": 4.230240440368652, |
| "ce_loss_7": 3.7059998750686645, |
| "epoch": 0.968, |
| "grad_norm": 560.0, |
| "kl_loss_10": 176.56764450073243, |
| "kl_loss_2": 2152.0122924804687, |
| "kl_loss_3": 1676.3801025390626, |
| "kl_loss_7": 591.2061340332032, |
| "learning_rate": 2.5757048597765396e-06, |
| "loss": 1135.4543, |
| "step": 9680 |
| }, |
| { |
| "ce_loss_10": 3.5459084630012514, |
| "ce_loss_13": 3.4721821188926696, |
| "ce_loss_2": 4.505685806274414, |
| "ce_loss_3": 4.235912537574768, |
| "ce_loss_7": 3.722131609916687, |
| "epoch": 0.969, |
| "grad_norm": 560.0, |
| "kl_loss_10": 176.31484680175782, |
| "kl_loss_2": 2142.29345703125, |
| "kl_loss_3": 1672.873828125, |
| "kl_loss_7": 599.0281616210938, |
| "learning_rate": 2.417366460819359e-06, |
| "loss": 1141.189, |
| "step": 9690 |
| }, |
| { |
| "ce_loss_10": 3.5568428516387938, |
| "ce_loss_13": 3.47944039106369, |
| "ce_loss_2": 4.546383309364319, |
| "ce_loss_3": 4.280533790588379, |
| "ce_loss_7": 3.73818119764328, |
| "epoch": 0.97, |
| "grad_norm": 592.0, |
| "kl_loss_10": 181.22289581298827, |
| "kl_loss_2": 2223.5619262695313, |
| "kl_loss_3": 1743.523046875, |
| "kl_loss_7": 618.0255676269531, |
| "learning_rate": 2.2640387134577057e-06, |
| "loss": 1150.9949, |
| "step": 9700 |
| }, |
| { |
| "ce_loss_10": 3.4835644006729125, |
| "ce_loss_13": 3.409128963947296, |
| "ce_loss_2": 4.400413775444031, |
| "ce_loss_3": 4.1408212065696715, |
| "ce_loss_7": 3.6511133790016173, |
| "epoch": 0.971, |
| "grad_norm": 584.0, |
| "kl_loss_10": 169.66612396240234, |
| "kl_loss_2": 2037.37294921875, |
| "kl_loss_3": 1580.9468200683593, |
| "kl_loss_7": 575.112336730957, |
| "learning_rate": 2.115723161700278e-06, |
| "loss": 1111.2564, |
| "step": 9710 |
| }, |
| { |
| "ce_loss_10": 3.462701106071472, |
| "ce_loss_13": 3.383505952358246, |
| "ce_loss_2": 4.462756657600403, |
| "ce_loss_3": 4.1902328610420225, |
| "ce_loss_7": 3.6453136444091796, |
| "epoch": 0.972, |
| "grad_norm": 676.0, |
| "kl_loss_10": 180.0776268005371, |
| "kl_loss_2": 2223.634521484375, |
| "kl_loss_3": 1740.0434143066407, |
| "kl_loss_7": 612.3085083007812, |
| "learning_rate": 1.9724212990830937e-06, |
| "loss": 1170.462, |
| "step": 9720 |
| }, |
| { |
| "ce_loss_10": 3.6076322913169863, |
| "ce_loss_13": 3.532732355594635, |
| "ce_loss_2": 4.577161026000977, |
| "ce_loss_3": 4.311069667339325, |
| "ce_loss_7": 3.7834354996681214, |
| "epoch": 0.973, |
| "grad_norm": 488.0, |
| "kl_loss_10": 178.08698196411132, |
| "kl_loss_2": 2168.2475769042967, |
| "kl_loss_3": 1699.6189208984374, |
| "kl_loss_7": 598.9332580566406, |
| "learning_rate": 1.8341345686543331e-06, |
| "loss": 1146.8779, |
| "step": 9730 |
| }, |
| { |
| "ce_loss_10": 3.5909879326820375, |
| "ce_loss_13": 3.5183821320533752, |
| "ce_loss_2": 4.50426287651062, |
| "ce_loss_3": 4.235173010826111, |
| "ce_loss_7": 3.757961595058441, |
| "epoch": 0.974, |
| "grad_norm": 548.0, |
| "kl_loss_10": 174.61135635375976, |
| "kl_loss_2": 2063.9743225097654, |
| "kl_loss_3": 1591.8313293457031, |
| "kl_loss_7": 585.6029083251954, |
| "learning_rate": 1.7008643629596864e-06, |
| "loss": 1145.0081, |
| "step": 9740 |
| }, |
| { |
| "ce_loss_10": 3.5759197235107423, |
| "ce_loss_13": 3.4986127734184267, |
| "ce_loss_2": 4.5397637486457825, |
| "ce_loss_3": 4.2685352802276615, |
| "ce_loss_7": 3.7446988224983215, |
| "epoch": 0.975, |
| "grad_norm": 552.0, |
| "kl_loss_10": 176.2814811706543, |
| "kl_loss_2": 2161.20859375, |
| "kl_loss_3": 1678.5180541992188, |
| "kl_loss_7": 590.2671813964844, |
| "learning_rate": 1.5726120240288633e-06, |
| "loss": 1164.5706, |
| "step": 9750 |
| }, |
| { |
| "ce_loss_10": 3.4757342100143434, |
| "ce_loss_13": 3.4012367367744445, |
| "ce_loss_2": 4.433041834831238, |
| "ce_loss_3": 4.165659952163696, |
| "ce_loss_7": 3.6462602019309998, |
| "epoch": 0.976, |
| "grad_norm": 572.0, |
| "kl_loss_10": 174.65177154541016, |
| "kl_loss_2": 2138.293341064453, |
| "kl_loss_3": 1655.4461547851563, |
| "kl_loss_7": 589.6313079833984, |
| "learning_rate": 1.4493788433612708e-06, |
| "loss": 1134.1515, |
| "step": 9760 |
| }, |
| { |
| "ce_loss_10": 3.5877037525177, |
| "ce_loss_13": 3.5132053971290587, |
| "ce_loss_2": 4.55386061668396, |
| "ce_loss_3": 4.287087714672088, |
| "ce_loss_7": 3.7638731479644774, |
| "epoch": 0.977, |
| "grad_norm": 536.0, |
| "kl_loss_10": 177.9455436706543, |
| "kl_loss_2": 2173.966436767578, |
| "kl_loss_3": 1692.082745361328, |
| "kl_loss_7": 599.7038208007813, |
| "learning_rate": 1.3311660619138578e-06, |
| "loss": 1161.4269, |
| "step": 9770 |
| }, |
| { |
| "ce_loss_10": 3.584187960624695, |
| "ce_loss_13": 3.510979926586151, |
| "ce_loss_2": 4.489086222648621, |
| "ce_loss_3": 4.228979337215423, |
| "ce_loss_7": 3.748577618598938, |
| "epoch": 0.978, |
| "grad_norm": 516.0, |
| "kl_loss_10": 176.20037689208985, |
| "kl_loss_2": 2033.9857421875, |
| "kl_loss_3": 1575.569403076172, |
| "kl_loss_7": 583.5555999755859, |
| "learning_rate": 1.2179748700879012e-06, |
| "loss": 1135.4594, |
| "step": 9780 |
| }, |
| { |
| "ce_loss_10": 3.516654706001282, |
| "ce_loss_13": 3.442041552066803, |
| "ce_loss_2": 4.460341954231263, |
| "ce_loss_3": 4.201344418525696, |
| "ce_loss_7": 3.6880866169929503, |
| "epoch": 0.979, |
| "grad_norm": 648.0, |
| "kl_loss_10": 175.97493591308594, |
| "kl_loss_2": 2106.5852966308594, |
| "kl_loss_3": 1640.5349182128907, |
| "kl_loss_7": 589.2118927001953, |
| "learning_rate": 1.1098064077174619e-06, |
| "loss": 1139.4391, |
| "step": 9790 |
| }, |
| { |
| "ce_loss_10": 3.548008131980896, |
| "ce_loss_13": 3.470580744743347, |
| "ce_loss_2": 4.531388640403748, |
| "ce_loss_3": 4.256609618663788, |
| "ce_loss_7": 3.7258023023605347, |
| "epoch": 0.98, |
| "grad_norm": 660.0, |
| "kl_loss_10": 175.85005264282228, |
| "kl_loss_2": 2184.833563232422, |
| "kl_loss_3": 1695.749658203125, |
| "kl_loss_7": 597.7893035888671, |
| "learning_rate": 1.006661764057837e-06, |
| "loss": 1144.1424, |
| "step": 9800 |
| }, |
| { |
| "ce_loss_10": 3.5516860127449035, |
| "ce_loss_13": 3.479186308383942, |
| "ce_loss_2": 4.507886123657227, |
| "ce_loss_3": 4.23874124288559, |
| "ce_loss_7": 3.7239818572998047, |
| "epoch": 0.981, |
| "grad_norm": 548.0, |
| "kl_loss_10": 174.7688331604004, |
| "kl_loss_2": 2140.183038330078, |
| "kl_loss_3": 1663.4087707519532, |
| "kl_loss_7": 592.6127227783203, |
| "learning_rate": 9.085419777743465e-07, |
| "loss": 1136.217, |
| "step": 9810 |
| }, |
| { |
| "ce_loss_10": 3.4896764159202576, |
| "ce_loss_13": 3.4188039541244506, |
| "ce_loss_2": 4.450884318351745, |
| "ce_loss_3": 4.184124147891998, |
| "ce_loss_7": 3.6670993685722353, |
| "epoch": 0.982, |
| "grad_norm": 476.0, |
| "kl_loss_10": 171.6952751159668, |
| "kl_loss_2": 2127.2390258789064, |
| "kl_loss_3": 1658.6653991699218, |
| "kl_loss_7": 588.117578125, |
| "learning_rate": 8.15448036932176e-07, |
| "loss": 1121.8644, |
| "step": 9820 |
| }, |
| { |
| "ce_loss_10": 3.542994940280914, |
| "ce_loss_13": 3.471325635910034, |
| "ce_loss_2": 4.491614294052124, |
| "ce_loss_3": 4.226279616355896, |
| "ce_loss_7": 3.716835379600525, |
| "epoch": 0.983, |
| "grad_norm": 580.0, |
| "kl_loss_10": 175.40776138305665, |
| "kl_loss_2": 2138.5871826171874, |
| "kl_loss_3": 1668.4995056152343, |
| "kl_loss_7": 599.0077606201172, |
| "learning_rate": 7.273808789862724e-07, |
| "loss": 1157.4876, |
| "step": 9830 |
| }, |
| { |
| "ce_loss_10": 3.62471262216568, |
| "ce_loss_13": 3.552128314971924, |
| "ce_loss_2": 4.560764002799988, |
| "ce_loss_3": 4.2987874269485475, |
| "ce_loss_7": 3.7973197221755983, |
| "epoch": 0.984, |
| "grad_norm": 536.0, |
| "kl_loss_10": 177.9404067993164, |
| "kl_loss_2": 2121.9407958984375, |
| "kl_loss_3": 1649.4432312011718, |
| "kl_loss_7": 593.6075317382813, |
| "learning_rate": 6.443413907720186e-07, |
| "loss": 1128.3074, |
| "step": 9840 |
| }, |
| { |
| "ce_loss_10": 3.553659164905548, |
| "ce_loss_13": 3.479843807220459, |
| "ce_loss_2": 4.502509045600891, |
| "ce_loss_3": 4.239866006374359, |
| "ce_loss_7": 3.7261658310890198, |
| "epoch": 0.985, |
| "grad_norm": 612.0, |
| "kl_loss_10": 175.90703582763672, |
| "kl_loss_2": 2105.8654174804688, |
| "kl_loss_3": 1643.8707397460937, |
| "kl_loss_7": 589.3217529296875, |
| "learning_rate": 5.663304084960185e-07, |
| "loss": 1125.6893, |
| "step": 9850 |
| }, |
| { |
| "ce_loss_10": 3.4857439756393434, |
| "ce_loss_13": 3.40972044467926, |
| "ce_loss_2": 4.458719778060913, |
| "ce_loss_3": 4.193828642368317, |
| "ce_loss_7": 3.661728310585022, |
| "epoch": 0.986, |
| "grad_norm": 544.0, |
| "kl_loss_10": 175.7668014526367, |
| "kl_loss_2": 2168.083819580078, |
| "kl_loss_3": 1695.883349609375, |
| "kl_loss_7": 599.47685546875, |
| "learning_rate": 4.933487177280482e-07, |
| "loss": 1132.0084, |
| "step": 9860 |
| }, |
| { |
| "ce_loss_10": 3.577410614490509, |
| "ce_loss_13": 3.50371458530426, |
| "ce_loss_2": 4.517120695114135, |
| "ce_loss_3": 4.256355273723602, |
| "ce_loss_7": 3.745650053024292, |
| "epoch": 0.987, |
| "grad_norm": 580.0, |
| "kl_loss_10": 172.69470291137696, |
| "kl_loss_2": 2116.2484741210938, |
| "kl_loss_3": 1646.7192932128905, |
| "kl_loss_7": 586.3196258544922, |
| "learning_rate": 4.2539705339295075e-07, |
| "loss": 1129.2027, |
| "step": 9870 |
| }, |
| { |
| "ce_loss_10": 3.4351974010467528, |
| "ce_loss_13": 3.359704864025116, |
| "ce_loss_2": 4.414662563800812, |
| "ce_loss_3": 4.1485153317451475, |
| "ce_loss_7": 3.614791524410248, |
| "epoch": 0.988, |
| "grad_norm": 624.0, |
| "kl_loss_10": 176.81834564208984, |
| "kl_loss_2": 2189.1237670898436, |
| "kl_loss_3": 1714.1568420410156, |
| "kl_loss_7": 602.8686370849609, |
| "learning_rate": 3.6247609976319816e-07, |
| "loss": 1142.2324, |
| "step": 9880 |
| }, |
| { |
| "ce_loss_10": 3.5325068116188048, |
| "ce_loss_13": 3.4560230016708373, |
| "ce_loss_2": 4.515677762031555, |
| "ce_loss_3": 4.241289448738098, |
| "ce_loss_7": 3.7140289902687074, |
| "epoch": 0.989, |
| "grad_norm": 644.0, |
| "kl_loss_10": 178.62700347900392, |
| "kl_loss_2": 2181.043316650391, |
| "kl_loss_3": 1701.3241455078125, |
| "kl_loss_7": 601.8407318115235, |
| "learning_rate": 3.0458649045211895e-07, |
| "loss": 1177.6322, |
| "step": 9890 |
| }, |
| { |
| "ce_loss_10": 3.505313539505005, |
| "ce_loss_13": 3.4275246262550354, |
| "ce_loss_2": 4.470349764823913, |
| "ce_loss_3": 4.199915885925293, |
| "ce_loss_7": 3.687111556529999, |
| "epoch": 0.99, |
| "grad_norm": 628.0, |
| "kl_loss_10": 179.9844207763672, |
| "kl_loss_2": 2144.779681396484, |
| "kl_loss_3": 1664.5868041992187, |
| "kl_loss_7": 610.2965026855469, |
| "learning_rate": 2.517288084074587e-07, |
| "loss": 1173.5785, |
| "step": 9900 |
| }, |
| { |
| "ce_loss_10": 3.541435408592224, |
| "ce_loss_13": 3.4641653418540956, |
| "ce_loss_2": 4.540918755531311, |
| "ce_loss_3": 4.268487918376922, |
| "ce_loss_7": 3.728367364406586, |
| "epoch": 0.991, |
| "grad_norm": 544.0, |
| "kl_loss_10": 181.58360061645507, |
| "kl_loss_2": 2223.09423828125, |
| "kl_loss_3": 1733.728173828125, |
| "kl_loss_7": 618.8083801269531, |
| "learning_rate": 2.0390358590538505e-07, |
| "loss": 1164.2306, |
| "step": 9910 |
| }, |
| { |
| "ce_loss_10": 3.5465844750404356, |
| "ce_loss_13": 3.4692795395851137, |
| "ce_loss_2": 4.505300617218017, |
| "ce_loss_3": 4.238151812553406, |
| "ce_loss_7": 3.7215544462203978, |
| "epoch": 0.992, |
| "grad_norm": 516.0, |
| "kl_loss_10": 178.79893417358397, |
| "kl_loss_2": 2149.3738037109374, |
| "kl_loss_3": 1683.4728881835938, |
| "kl_loss_7": 602.8562408447266, |
| "learning_rate": 1.61111304545436e-07, |
| "loss": 1139.9141, |
| "step": 9920 |
| }, |
| { |
| "ce_loss_10": 3.5144612431526183, |
| "ce_loss_13": 3.439807415008545, |
| "ce_loss_2": 4.468925881385803, |
| "ce_loss_3": 4.204157900810242, |
| "ce_loss_7": 3.68552029132843, |
| "epoch": 0.993, |
| "grad_norm": 524.0, |
| "kl_loss_10": 174.9011474609375, |
| "kl_loss_2": 2131.701556396484, |
| "kl_loss_3": 1667.3637084960938, |
| "kl_loss_7": 591.8195831298829, |
| "learning_rate": 1.2335239524541298e-07, |
| "loss": 1123.1069, |
| "step": 9930 |
| }, |
| { |
| "ce_loss_10": 3.485284912586212, |
| "ce_loss_13": 3.4107711553573608, |
| "ce_loss_2": 4.4413145065307615, |
| "ce_loss_3": 4.1761764764785765, |
| "ce_loss_7": 3.658044862747192, |
| "epoch": 0.994, |
| "grad_norm": 552.0, |
| "kl_loss_10": 174.74987030029297, |
| "kl_loss_2": 2137.9515625, |
| "kl_loss_3": 1659.8411071777343, |
| "kl_loss_7": 590.9619750976562, |
| "learning_rate": 9.06272382371065e-08, |
| "loss": 1140.1338, |
| "step": 9940 |
| }, |
| { |
| "ce_loss_10": 3.5549147844314577, |
| "ce_loss_13": 3.482628679275513, |
| "ce_loss_2": 4.527088284492493, |
| "ce_loss_3": 4.2653639078140255, |
| "ce_loss_7": 3.7300615668296815, |
| "epoch": 0.995, |
| "grad_norm": 540.0, |
| "kl_loss_10": 177.89019927978515, |
| "kl_loss_2": 2179.192108154297, |
| "kl_loss_3": 1710.0057861328125, |
| "kl_loss_7": 601.8619506835937, |
| "learning_rate": 6.293616306246586e-08, |
| "loss": 1148.1468, |
| "step": 9950 |
| }, |
| { |
| "ce_loss_10": 3.5492191195487974, |
| "ce_loss_13": 3.4784142851829527, |
| "ce_loss_2": 4.47113618850708, |
| "ce_loss_3": 4.207776916027069, |
| "ce_loss_7": 3.7163458704948424, |
| "epoch": 0.996, |
| "grad_norm": 568.0, |
| "kl_loss_10": 171.20833358764648, |
| "kl_loss_2": 2067.156182861328, |
| "kl_loss_3": 1607.7954223632812, |
| "kl_loss_7": 575.8024002075196, |
| "learning_rate": 4.027944857032395e-08, |
| "loss": 1102.1236, |
| "step": 9960 |
| }, |
| { |
| "ce_loss_10": 3.5417333483695983, |
| "ce_loss_13": 3.4737359166145323, |
| "ce_loss_2": 4.454948210716248, |
| "ce_loss_3": 4.189112281799316, |
| "ce_loss_7": 3.7030033111572265, |
| "epoch": 0.997, |
| "grad_norm": 564.0, |
| "kl_loss_10": 169.30588455200194, |
| "kl_loss_2": 2030.570849609375, |
| "kl_loss_3": 1568.8222778320312, |
| "kl_loss_7": 562.5833770751954, |
| "learning_rate": 2.265732291356626e-08, |
| "loss": 1096.3691, |
| "step": 9970 |
| }, |
| { |
| "ce_loss_10": 3.5887541651725767, |
| "ce_loss_13": 3.5155721068382264, |
| "ce_loss_2": 4.518339204788208, |
| "ce_loss_3": 4.2516262292861935, |
| "ce_loss_7": 3.7596161723136903, |
| "epoch": 0.998, |
| "grad_norm": 516.0, |
| "kl_loss_10": 174.8034523010254, |
| "kl_loss_2": 2081.462506103516, |
| "kl_loss_3": 1607.8740112304688, |
| "kl_loss_7": 584.5911361694336, |
| "learning_rate": 1.0069963546743833e-08, |
| "loss": 1138.0035, |
| "step": 9980 |
| }, |
| { |
| "ce_loss_10": 3.567852771282196, |
| "ce_loss_13": 3.4926111340522765, |
| "ce_loss_2": 4.526482367515564, |
| "ce_loss_3": 4.2642577409744264, |
| "ce_loss_7": 3.741330122947693, |
| "epoch": 0.999, |
| "grad_norm": 504.0, |
| "kl_loss_10": 177.2101951599121, |
| "kl_loss_2": 2140.8559020996095, |
| "kl_loss_3": 1666.8830322265626, |
| "kl_loss_7": 597.1717834472656, |
| "learning_rate": 2.517497224463483e-09, |
| "loss": 1140.1191, |
| "step": 9990 |
| }, |
| { |
| "ce_loss_10": 3.5264371991157533, |
| "ce_loss_13": 3.450861382484436, |
| "ce_loss_2": 4.53892297744751, |
| "ce_loss_3": 4.266285753250122, |
| "ce_loss_7": 3.7066094994544985, |
| "epoch": 1.0, |
| "grad_norm": 580.0, |
| "kl_loss_10": 180.3290283203125, |
| "kl_loss_2": 2255.579718017578, |
| "kl_loss_3": 1769.6880432128905, |
| "kl_loss_7": 615.3499603271484, |
| "learning_rate": 0.0, |
| "loss": 1181.1314, |
| "step": 10000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 10000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 250, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.177819035608023e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|