| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 10000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "ce_loss_13": 8.010122537612915, |
| "ce_loss_17": 3.1751047372817993, |
| "ce_loss_2": 14.833064079284668, |
| "ce_loss_4": 13.72553825378418, |
| "ce_loss_9": 10.760433197021484, |
| "epoch": 0.0001, |
| "grad_norm": 102912.0, |
| "kl_loss_13": 10937.15283203125, |
| "kl_loss_2": 24630.6259765625, |
| "kl_loss_4": 22217.994140625, |
| "kl_loss_9": 16021.4296875, |
| "learning_rate": 1e-05, |
| "loss": 18735.0371, |
| "step": 1 |
| }, |
| { |
| "ce_loss_13": 6.549808184305827, |
| "ce_loss_17": 3.228388296233283, |
| "ce_loss_2": 11.08839209874471, |
| "ce_loss_4": 10.315435753928291, |
| "ce_loss_9": 8.660266160964966, |
| "epoch": 0.001, |
| "grad_norm": 18816.0, |
| "kl_loss_13": 7237.9655354817705, |
| "kl_loss_2": 15891.634494357639, |
| "kl_loss_4": 14196.696885850695, |
| "kl_loss_9": 10979.929850260416, |
| "learning_rate": 0.0001, |
| "loss": 12137.204, |
| "step": 10 |
| }, |
| { |
| "ce_loss_13": 4.558600926399231, |
| "ce_loss_17": 3.240402579307556, |
| "ce_loss_2": 7.437748813629151, |
| "ce_loss_4": 7.138039708137512, |
| "ce_loss_9": 6.0494462013244625, |
| "epoch": 0.002, |
| "grad_norm": 11456.0, |
| "kl_loss_13": 2548.09873046875, |
| "kl_loss_2": 7932.008154296875, |
| "kl_loss_4": 7345.439770507813, |
| "kl_loss_9": 5318.958911132812, |
| "learning_rate": 0.0002, |
| "loss": 5882.6383, |
| "step": 20 |
| }, |
| { |
| "ce_loss_13": 3.8870108842849733, |
| "ce_loss_17": 3.0360390901565553, |
| "ce_loss_2": 6.600701427459716, |
| "ce_loss_4": 6.254192614555359, |
| "ce_loss_9": 5.277278900146484, |
| "epoch": 0.003, |
| "grad_norm": 4768.0, |
| "kl_loss_13": 1637.8152770996094, |
| "kl_loss_2": 6758.89423828125, |
| "kl_loss_4": 6101.557934570313, |
| "kl_loss_9": 4281.71435546875, |
| "learning_rate": 0.0003, |
| "loss": 4596.5875, |
| "step": 30 |
| }, |
| { |
| "ce_loss_13": 3.858346462249756, |
| "ce_loss_17": 3.2072247862815857, |
| "ce_loss_2": 6.163068389892578, |
| "ce_loss_4": 5.84541118144989, |
| "ce_loss_9": 5.031937289237976, |
| "epoch": 0.004, |
| "grad_norm": 2608.0, |
| "kl_loss_13": 1270.1531524658203, |
| "kl_loss_2": 5581.758447265625, |
| "kl_loss_4": 4999.07060546875, |
| "kl_loss_9": 3497.7363403320314, |
| "learning_rate": 0.0004, |
| "loss": 3868.043, |
| "step": 40 |
| }, |
| { |
| "ce_loss_13": 3.757353723049164, |
| "ce_loss_17": 3.1732503175735474, |
| "ce_loss_2": 5.953920221328735, |
| "ce_loss_4": 5.614688682556152, |
| "ce_loss_9": 4.806208968162537, |
| "epoch": 0.005, |
| "grad_norm": 7936.0, |
| "kl_loss_13": 1102.7322174072265, |
| "kl_loss_2": 5320.936206054687, |
| "kl_loss_4": 4689.3359375, |
| "kl_loss_9": 3153.777587890625, |
| "learning_rate": 0.0005, |
| "loss": 3560.1562, |
| "step": 50 |
| }, |
| { |
| "ce_loss_13": 3.802089977264404, |
| "ce_loss_17": 3.1876197695732116, |
| "ce_loss_2": 5.80324399471283, |
| "ce_loss_4": 5.4294521570205685, |
| "ce_loss_9": 4.638357520103455, |
| "epoch": 0.006, |
| "grad_norm": 3312.0, |
| "kl_loss_13": 1244.8134368896485, |
| "kl_loss_2": 5024.971240234375, |
| "kl_loss_4": 4302.3890380859375, |
| "kl_loss_9": 2834.17763671875, |
| "learning_rate": 0.0006, |
| "loss": 3360.3262, |
| "step": 60 |
| }, |
| { |
| "ce_loss_13": 3.6345572113990783, |
| "ce_loss_17": 3.1099562883377074, |
| "ce_loss_2": 5.658854675292969, |
| "ce_loss_4": 5.306998753547669, |
| "ce_loss_9": 4.541006076335907, |
| "epoch": 0.007, |
| "grad_norm": 6976.0, |
| "kl_loss_13": 1051.1200164794923, |
| "kl_loss_2": 4916.650634765625, |
| "kl_loss_4": 4266.261865234375, |
| "kl_loss_9": 2840.3209228515625, |
| "learning_rate": 0.0007, |
| "loss": 3243.2391, |
| "step": 70 |
| }, |
| { |
| "ce_loss_13": 3.62385470867157, |
| "ce_loss_17": 3.105690336227417, |
| "ce_loss_2": 5.629134607315064, |
| "ce_loss_4": 5.29190571308136, |
| "ce_loss_9": 4.585710549354554, |
| "epoch": 0.008, |
| "grad_norm": 6432.0, |
| "kl_loss_13": 1015.2538757324219, |
| "kl_loss_2": 4867.480224609375, |
| "kl_loss_4": 4226.879077148437, |
| "kl_loss_9": 2901.058557128906, |
| "learning_rate": 0.0008, |
| "loss": 3255.2068, |
| "step": 80 |
| }, |
| { |
| "ce_loss_13": 3.5578789591789244, |
| "ce_loss_17": 3.0659744143486023, |
| "ce_loss_2": 5.595231628417968, |
| "ce_loss_4": 5.249085760116577, |
| "ce_loss_9": 4.490253472328186, |
| "epoch": 0.009, |
| "grad_norm": 3328.0, |
| "kl_loss_13": 1002.1604888916015, |
| "kl_loss_2": 4893.657006835938, |
| "kl_loss_4": 4253.1173095703125, |
| "kl_loss_9": 2834.4761474609377, |
| "learning_rate": 0.0009000000000000001, |
| "loss": 3229.1129, |
| "step": 90 |
| }, |
| { |
| "ce_loss_13": 3.683272635936737, |
| "ce_loss_17": 3.182042109966278, |
| "ce_loss_2": 5.648371434211731, |
| "ce_loss_4": 5.4668464183807375, |
| "ce_loss_9": 4.528874349594116, |
| "epoch": 0.01, |
| "grad_norm": 4928.0, |
| "kl_loss_13": 999.3995574951172, |
| "kl_loss_2": 4784.1326904296875, |
| "kl_loss_4": 4484.12451171875, |
| "kl_loss_9": 2677.7843017578125, |
| "learning_rate": 0.001, |
| "loss": 3233.375, |
| "step": 100 |
| }, |
| { |
| "ce_loss_13": 3.6131449580192565, |
| "ce_loss_17": 3.142174708843231, |
| "ce_loss_2": 5.670793271064758, |
| "ce_loss_4": 5.298868942260742, |
| "ce_loss_9": 4.463374948501587, |
| "epoch": 0.011, |
| "grad_norm": 4896.0, |
| "kl_loss_13": 959.4629852294922, |
| "kl_loss_2": 4913.542309570313, |
| "kl_loss_4": 4189.067163085938, |
| "kl_loss_9": 2670.0919799804688, |
| "learning_rate": 0.0009999974825027757, |
| "loss": 3178.3477, |
| "step": 110 |
| }, |
| { |
| "ce_loss_13": 3.6022252082824706, |
| "ce_loss_17": 3.19889919757843, |
| "ce_loss_2": 5.626460456848145, |
| "ce_loss_4": 5.211553907394409, |
| "ce_loss_9": 4.450517749786377, |
| "epoch": 0.012, |
| "grad_norm": 2144.0, |
| "kl_loss_13": 820.5734649658203, |
| "kl_loss_2": 4730.507958984375, |
| "kl_loss_4": 3955.15234375, |
| "kl_loss_9": 2489.3434204101563, |
| "learning_rate": 0.0009999899300364532, |
| "loss": 2975.3076, |
| "step": 120 |
| }, |
| { |
| "ce_loss_13": 3.5816900730133057, |
| "ce_loss_17": 3.1710021018981935, |
| "ce_loss_2": 5.589413738250732, |
| "ce_loss_4": 5.266030120849609, |
| "ce_loss_9": 4.426473808288574, |
| "epoch": 0.013, |
| "grad_norm": 1904.0, |
| "kl_loss_13": 838.0909301757813, |
| "kl_loss_2": 4680.41015625, |
| "kl_loss_4": 4068.0897827148438, |
| "kl_loss_9": 2480.7856567382814, |
| "learning_rate": 0.0009999773426770863, |
| "loss": 3043.0082, |
| "step": 130 |
| }, |
| { |
| "ce_loss_13": 3.640711486339569, |
| "ce_loss_17": 3.2069756150245667, |
| "ce_loss_2": 5.519480276107788, |
| "ce_loss_4": 5.205518078804016, |
| "ce_loss_9": 4.400923824310302, |
| "epoch": 0.014, |
| "grad_norm": 1584.0, |
| "kl_loss_13": 878.0303619384765, |
| "kl_loss_2": 4515.172680664063, |
| "kl_loss_4": 3920.8177124023437, |
| "kl_loss_9": 2383.91787109375, |
| "learning_rate": 0.0009999597205514296, |
| "loss": 2944.726, |
| "step": 140 |
| }, |
| { |
| "ce_loss_13": 3.572978138923645, |
| "ce_loss_17": 3.165075254440308, |
| "ce_loss_2": 5.441094017028808, |
| "ce_loss_4": 5.120605206489563, |
| "ce_loss_9": 4.307848358154297, |
| "epoch": 0.015, |
| "grad_norm": 1432.0, |
| "kl_loss_13": 826.1624969482422, |
| "kl_loss_2": 4424.702392578125, |
| "kl_loss_4": 3823.240515136719, |
| "kl_loss_9": 2261.693463134766, |
| "learning_rate": 0.0009999370638369377, |
| "loss": 2849.4543, |
| "step": 150 |
| }, |
| { |
| "ce_loss_13": 3.5762784242630006, |
| "ce_loss_17": 3.202464735507965, |
| "ce_loss_2": 5.482413220405578, |
| "ce_loss_4": 5.156105327606201, |
| "ce_loss_9": 4.305610775947571, |
| "epoch": 0.016, |
| "grad_norm": 1624.0, |
| "kl_loss_13": 770.926235961914, |
| "kl_loss_2": 4446.180615234375, |
| "kl_loss_4": 3831.9423828125, |
| "kl_loss_9": 2198.9774475097656, |
| "learning_rate": 0.000999909372761763, |
| "loss": 2817.4812, |
| "step": 160 |
| }, |
| { |
| "ce_loss_13": 3.518922579288483, |
| "ce_loss_17": 3.1375705718994142, |
| "ce_loss_2": 5.4155909538269045, |
| "ce_loss_4": 5.101733207702637, |
| "ce_loss_9": 4.236639869213104, |
| "epoch": 0.017, |
| "grad_norm": 1704.0, |
| "kl_loss_13": 780.1618255615234, |
| "kl_loss_2": 4465.235131835938, |
| "kl_loss_4": 3875.888464355469, |
| "kl_loss_9": 2226.467120361328, |
| "learning_rate": 0.0009998766476047546, |
| "loss": 2856.7816, |
| "step": 170 |
| }, |
| { |
| "ce_loss_13": 3.558823084831238, |
| "ce_loss_17": 3.176159310340881, |
| "ce_loss_2": 5.462405729293823, |
| "ce_loss_4": 5.1283296823501585, |
| "ce_loss_9": 4.296038508415222, |
| "epoch": 0.018, |
| "grad_norm": 1200.0, |
| "kl_loss_13": 768.1738891601562, |
| "kl_loss_2": 4447.139147949219, |
| "kl_loss_4": 3829.0420043945314, |
| "kl_loss_9": 2247.1280151367187, |
| "learning_rate": 0.0009998388886954545, |
| "loss": 2842.7012, |
| "step": 180 |
| }, |
| { |
| "ce_loss_13": 3.498755896091461, |
| "ce_loss_17": 3.146486723423004, |
| "ce_loss_2": 5.386862397193909, |
| "ce_loss_4": 5.051463723182678, |
| "ce_loss_9": 4.243084788322449, |
| "epoch": 0.019, |
| "grad_norm": 1960.0, |
| "kl_loss_13": 730.3713623046875, |
| "kl_loss_2": 4382.531701660157, |
| "kl_loss_4": 3757.971337890625, |
| "kl_loss_9": 2200.8175537109373, |
| "learning_rate": 0.0009997960964140947, |
| "loss": 2752.2479, |
| "step": 190 |
| }, |
| { |
| "ce_loss_13": 3.490229105949402, |
| "ce_loss_17": 3.136069047451019, |
| "ce_loss_2": 5.383873295783997, |
| "ce_loss_4": 5.0795320749282835, |
| "ce_loss_9": 4.208669066429138, |
| "epoch": 0.02, |
| "grad_norm": 1840.0, |
| "kl_loss_13": 715.5518585205078, |
| "kl_loss_2": 4407.433044433594, |
| "kl_loss_4": 3842.116345214844, |
| "kl_loss_9": 2163.088830566406, |
| "learning_rate": 0.0009997482711915926, |
| "loss": 2759.7049, |
| "step": 200 |
| }, |
| { |
| "ce_loss_13": 3.443050265312195, |
| "ce_loss_17": 3.1130157709121704, |
| "ce_loss_2": 5.3130934000015255, |
| "ce_loss_4": 4.975576567649841, |
| "ce_loss_9": 4.111668515205383, |
| "epoch": 0.021, |
| "grad_norm": 1392.0, |
| "kl_loss_13": 676.3757446289062, |
| "kl_loss_2": 4311.921350097657, |
| "kl_loss_4": 3679.7574096679687, |
| "kl_loss_9": 2020.0243896484376, |
| "learning_rate": 0.0009996954135095479, |
| "loss": 2658.1875, |
| "step": 210 |
| }, |
| { |
| "ce_loss_13": 3.5141454219818113, |
| "ce_loss_17": 3.187968075275421, |
| "ce_loss_2": 5.32227590084076, |
| "ce_loss_4": 4.977713632583618, |
| "ce_loss_9": 4.135642993450165, |
| "epoch": 0.022, |
| "grad_norm": 1248.0, |
| "kl_loss_13": 661.5016204833985, |
| "kl_loss_2": 4173.251977539063, |
| "kl_loss_4": 3514.913610839844, |
| "kl_loss_9": 1914.9284118652345, |
| "learning_rate": 0.0009996375239002368, |
| "loss": 2569.9176, |
| "step": 220 |
| }, |
| { |
| "ce_loss_13": 3.57603634595871, |
| "ce_loss_17": 3.253368413448334, |
| "ce_loss_2": 5.329878497123718, |
| "ce_loss_4": 5.0020640134811405, |
| "ce_loss_9": 4.166190993785858, |
| "epoch": 0.023, |
| "grad_norm": 1184.0, |
| "kl_loss_13": 671.0865173339844, |
| "kl_loss_2": 4064.0949829101564, |
| "kl_loss_4": 3449.0091064453127, |
| "kl_loss_9": 1859.9651611328125, |
| "learning_rate": 0.0009995746029466072, |
| "loss": 2524.016, |
| "step": 230 |
| }, |
| { |
| "ce_loss_13": 3.3813811898231507, |
| "ce_loss_17": 3.053626906871796, |
| "ce_loss_2": 5.2718713760375975, |
| "ce_loss_4": 4.921644139289856, |
| "ce_loss_9": 4.037386178970337, |
| "epoch": 0.024, |
| "grad_norm": 1176.0, |
| "kl_loss_13": 685.3779907226562, |
| "kl_loss_2": 4352.920947265625, |
| "kl_loss_4": 3706.268444824219, |
| "kl_loss_9": 2006.7455627441407, |
| "learning_rate": 0.0009995066512822719, |
| "loss": 2594.1637, |
| "step": 240 |
| }, |
| { |
| "ce_loss_13": 3.4662138700485228, |
| "ce_loss_17": 3.1522522807121276, |
| "ce_loss_2": 5.383834600448608, |
| "ce_loss_4": 5.032086563110352, |
| "ce_loss_9": 4.128169333934784, |
| "epoch": 0.025, |
| "grad_norm": 1264.0, |
| "kl_loss_13": 654.3518585205078, |
| "kl_loss_2": 4362.5865234375, |
| "kl_loss_4": 3713.2698364257812, |
| "kl_loss_9": 1977.1189331054688, |
| "learning_rate": 0.000999433669591504, |
| "loss": 2565.9314, |
| "step": 250 |
| }, |
| { |
| "ce_loss_13": 3.3903905630111693, |
| "ce_loss_17": 3.0589525938034057, |
| "ce_loss_2": 5.258690309524536, |
| "ce_loss_4": 4.894338870048523, |
| "ce_loss_9": 4.018422996997833, |
| "epoch": 0.026, |
| "grad_norm": 1192.0, |
| "kl_loss_13": 678.4205200195313, |
| "kl_loss_2": 4332.092919921875, |
| "kl_loss_4": 3650.1448974609375, |
| "kl_loss_9": 1948.64580078125, |
| "learning_rate": 0.000999355658609228, |
| "loss": 2590.8977, |
| "step": 260 |
| }, |
| { |
| "ce_loss_13": 3.439604413509369, |
| "ce_loss_17": 3.0890163660049437, |
| "ce_loss_2": 5.332418704032898, |
| "ce_loss_4": 4.9640573263168335, |
| "ce_loss_9": 4.0712716460227965, |
| "epoch": 0.027, |
| "grad_norm": 1080.0, |
| "kl_loss_13": 714.6063018798828, |
| "kl_loss_2": 4377.355871582031, |
| "kl_loss_4": 3676.47099609375, |
| "kl_loss_9": 1971.5729553222657, |
| "learning_rate": 0.0009992726191210138, |
| "loss": 2638.8242, |
| "step": 270 |
| }, |
| { |
| "ce_loss_13": 3.4747392416000364, |
| "ce_loss_17": 3.128302311897278, |
| "ce_loss_2": 5.24924533367157, |
| "ce_loss_4": 4.8899637937545775, |
| "ce_loss_9": 4.055513346195221, |
| "epoch": 0.028, |
| "grad_norm": 1120.0, |
| "kl_loss_13": 704.5193634033203, |
| "kl_loss_2": 4159.834167480469, |
| "kl_loss_4": 3484.457470703125, |
| "kl_loss_9": 1888.2344665527344, |
| "learning_rate": 0.0009991845519630679, |
| "loss": 2530.2309, |
| "step": 280 |
| }, |
| { |
| "ce_loss_13": 3.3332153797149657, |
| "ce_loss_17": 3.0172152161598205, |
| "ce_loss_2": 5.156639909744262, |
| "ce_loss_4": 4.787631249427795, |
| "ce_loss_9": 3.9519541382789614, |
| "epoch": 0.029, |
| "grad_norm": 924.0, |
| "kl_loss_13": 660.3054718017578, |
| "kl_loss_2": 4186.50458984375, |
| "kl_loss_4": 3497.2395141601564, |
| "kl_loss_9": 1890.07255859375, |
| "learning_rate": 0.0009990914580222257, |
| "loss": 2556.2297, |
| "step": 290 |
| }, |
| { |
| "ce_loss_13": 3.4582252860069276, |
| "ce_loss_17": 3.155684804916382, |
| "ce_loss_2": 5.200908660888672, |
| "ce_loss_4": 4.830238652229309, |
| "ce_loss_9": 4.016579568386078, |
| "epoch": 0.03, |
| "grad_norm": 952.0, |
| "kl_loss_13": 627.9629150390625, |
| "kl_loss_2": 4037.119091796875, |
| "kl_loss_4": 3348.5225341796877, |
| "kl_loss_9": 1776.173828125, |
| "learning_rate": 0.0009989933382359422, |
| "loss": 2503.1105, |
| "step": 300 |
| }, |
| { |
| "ce_loss_13": 3.451007342338562, |
| "ce_loss_17": 3.1639443755149843, |
| "ce_loss_2": 5.20041708946228, |
| "ce_loss_4": 4.834484934806824, |
| "ce_loss_9": 4.0087680459022526, |
| "epoch": 0.031, |
| "grad_norm": 1120.0, |
| "kl_loss_13": 595.6909759521484, |
| "kl_loss_2": 4011.2225830078123, |
| "kl_loss_4": 3322.5175048828123, |
| "kl_loss_9": 1745.5184753417968, |
| "learning_rate": 0.0009988901935922825, |
| "loss": 2437.718, |
| "step": 310 |
| }, |
| { |
| "ce_loss_13": 3.304946184158325, |
| "ce_loss_17": 3.0153247833251955, |
| "ce_loss_2": 5.156751942634583, |
| "ce_loss_4": 4.787038731575012, |
| "ce_loss_9": 3.919682335853577, |
| "epoch": 0.032, |
| "grad_norm": 924.0, |
| "kl_loss_13": 604.137646484375, |
| "kl_loss_2": 4230.719775390625, |
| "kl_loss_4": 3536.085498046875, |
| "kl_loss_9": 1846.253662109375, |
| "learning_rate": 0.0009987820251299122, |
| "loss": 2487.0752, |
| "step": 320 |
| }, |
| { |
| "ce_loss_13": 3.413302314281464, |
| "ce_loss_17": 3.139352059364319, |
| "ce_loss_2": 5.166732549667358, |
| "ce_loss_4": 4.820427799224854, |
| "ce_loss_9": 3.973645544052124, |
| "epoch": 0.033, |
| "grad_norm": 972.0, |
| "kl_loss_13": 565.111538696289, |
| "kl_loss_2": 4011.9102294921877, |
| "kl_loss_4": 3371.32333984375, |
| "kl_loss_9": 1726.3803161621095, |
| "learning_rate": 0.0009986688339380862, |
| "loss": 2402.9094, |
| "step": 330 |
| }, |
| { |
| "ce_loss_13": 3.3493018984794616, |
| "ce_loss_17": 3.0934662342071535, |
| "ce_loss_2": 5.086379742622375, |
| "ce_loss_4": 4.723411202430725, |
| "ce_loss_9": 3.8982354640960692, |
| "epoch": 0.034, |
| "grad_norm": 940.0, |
| "kl_loss_13": 542.8931762695313, |
| "kl_loss_2": 3903.7267578125, |
| "kl_loss_4": 3232.934191894531, |
| "kl_loss_9": 1664.3126037597656, |
| "learning_rate": 0.0009985506211566387, |
| "loss": 2357.9805, |
| "step": 340 |
| }, |
| { |
| "ce_loss_13": 3.3917447686195374, |
| "ce_loss_17": 3.1199039578437806, |
| "ce_loss_2": 5.09677414894104, |
| "ce_loss_4": 4.713316965103149, |
| "ce_loss_9": 3.9090429663658144, |
| "epoch": 0.035, |
| "grad_norm": 1128.0, |
| "kl_loss_13": 561.1893493652344, |
| "kl_loss_2": 3892.9814208984376, |
| "kl_loss_4": 3178.0161254882814, |
| "kl_loss_9": 1634.4148376464843, |
| "learning_rate": 0.0009984273879759713, |
| "loss": 2331.2398, |
| "step": 350 |
| }, |
| { |
| "ce_loss_13": 3.425916314125061, |
| "ce_loss_17": 3.1532268047332765, |
| "ce_loss_2": 5.1598622560501095, |
| "ce_loss_4": 4.779404020309448, |
| "ce_loss_9": 3.9879016995429994, |
| "epoch": 0.036, |
| "grad_norm": 880.0, |
| "kl_loss_13": 565.4670867919922, |
| "kl_loss_2": 3953.1918212890623, |
| "kl_loss_4": 3235.9317016601562, |
| "kl_loss_9": 1712.1473815917968, |
| "learning_rate": 0.0009982991356370402, |
| "loss": 2392.2691, |
| "step": 360 |
| }, |
| { |
| "ce_loss_13": 3.3816256880760194, |
| "ce_loss_17": 3.1269331216812133, |
| "ce_loss_2": 5.104949283599853, |
| "ce_loss_4": 4.7368521928787235, |
| "ce_loss_9": 3.9427488803863526, |
| "epoch": 0.037, |
| "grad_norm": 936.0, |
| "kl_loss_13": 541.7421142578125, |
| "kl_loss_2": 3898.096301269531, |
| "kl_loss_4": 3200.548059082031, |
| "kl_loss_9": 1667.4443786621093, |
| "learning_rate": 0.0009981658654313456, |
| "loss": 2349.4039, |
| "step": 370 |
| }, |
| { |
| "ce_loss_13": 3.453015315532684, |
| "ce_loss_17": 3.2010253190994264, |
| "ce_loss_2": 5.116927909851074, |
| "ce_loss_4": 4.758021831512451, |
| "ce_loss_9": 3.9752739906311034, |
| "epoch": 0.038, |
| "grad_norm": 896.0, |
| "kl_loss_13": 523.2178573608398, |
| "kl_loss_2": 3786.7024658203127, |
| "kl_loss_4": 3119.5727661132814, |
| "kl_loss_9": 1609.0381713867187, |
| "learning_rate": 0.000998027578700917, |
| "loss": 2295.4098, |
| "step": 380 |
| }, |
| { |
| "ce_loss_13": 3.3921372056007386, |
| "ce_loss_17": 3.141234886646271, |
| "ce_loss_2": 5.104283928871155, |
| "ce_loss_4": 4.739434242248535, |
| "ce_loss_9": 3.9265757322311403, |
| "epoch": 0.039, |
| "grad_norm": 884.0, |
| "kl_loss_13": 538.9760391235352, |
| "kl_loss_2": 3863.5144775390627, |
| "kl_loss_4": 3176.7807495117186, |
| "kl_loss_9": 1626.165216064453, |
| "learning_rate": 0.0009978842768382998, |
| "loss": 2327.7336, |
| "step": 390 |
| }, |
| { |
| "ce_loss_13": 3.410324442386627, |
| "ce_loss_17": 3.1595824122428895, |
| "ce_loss_2": 5.0791549444198605, |
| "ce_loss_4": 4.700193262100219, |
| "ce_loss_9": 3.906184220314026, |
| "epoch": 0.04, |
| "grad_norm": 772.0, |
| "kl_loss_13": 530.6867767333985, |
| "kl_loss_2": 3779.2588012695314, |
| "kl_loss_4": 3081.442919921875, |
| "kl_loss_9": 1544.514111328125, |
| "learning_rate": 0.0009977359612865424, |
| "loss": 2250.3691, |
| "step": 400 |
| }, |
| { |
| "ce_loss_13": 3.418565273284912, |
| "ce_loss_17": 3.1605410099029543, |
| "ce_loss_2": 5.10105881690979, |
| "ce_loss_4": 4.724870347976685, |
| "ce_loss_9": 3.9385703206062317, |
| "epoch": 0.041, |
| "grad_norm": 884.0, |
| "kl_loss_13": 541.151644897461, |
| "kl_loss_2": 3817.33115234375, |
| "kl_loss_4": 3123.6171020507813, |
| "kl_loss_9": 1620.2747863769532, |
| "learning_rate": 0.0009975826335391806, |
| "loss": 2258.5668, |
| "step": 410 |
| }, |
| { |
| "ce_loss_13": 3.432558834552765, |
| "ce_loss_17": 3.187964618206024, |
| "ce_loss_2": 5.072172045707703, |
| "ce_loss_4": 4.714474868774414, |
| "ce_loss_9": 3.909123384952545, |
| "epoch": 0.042, |
| "grad_norm": 876.0, |
| "kl_loss_13": 517.4167633056641, |
| "kl_loss_2": 3730.3439208984373, |
| "kl_loss_4": 3062.949841308594, |
| "kl_loss_9": 1526.067608642578, |
| "learning_rate": 0.0009974242951402235, |
| "loss": 2228.7236, |
| "step": 420 |
| }, |
| { |
| "ce_loss_13": 3.441532075405121, |
| "ce_loss_17": 3.1943300485610964, |
| "ce_loss_2": 5.1000643491745, |
| "ce_loss_4": 4.739845228195191, |
| "ce_loss_9": 3.94395512342453, |
| "epoch": 0.043, |
| "grad_norm": 872.0, |
| "kl_loss_13": 522.9370330810547, |
| "kl_loss_2": 3779.383935546875, |
| "kl_loss_4": 3116.0631713867188, |
| "kl_loss_9": 1578.3824645996094, |
| "learning_rate": 0.0009972609476841367, |
| "loss": 2220.5199, |
| "step": 430 |
| }, |
| { |
| "ce_loss_13": 3.3502190709114075, |
| "ce_loss_17": 3.10364625453949, |
| "ce_loss_2": 5.051486015319824, |
| "ce_loss_4": 4.704463362693787, |
| "ce_loss_9": 3.868996226787567, |
| "epoch": 0.044, |
| "grad_norm": 1000.0, |
| "kl_loss_13": 501.1860717773437, |
| "kl_loss_2": 3837.9580078125, |
| "kl_loss_4": 3182.2989868164063, |
| "kl_loss_9": 1584.0864196777343, |
| "learning_rate": 0.0009970925928158272, |
| "loss": 2270.2719, |
| "step": 440 |
| }, |
| { |
| "ce_loss_13": 3.3077200293540954, |
| "ce_loss_17": 3.052558660507202, |
| "ce_loss_2": 5.004787874221802, |
| "ce_loss_4": 4.654259443283081, |
| "ce_loss_9": 3.834864377975464, |
| "epoch": 0.045, |
| "grad_norm": 908.0, |
| "kl_loss_13": 526.1941101074219, |
| "kl_loss_2": 3893.235632324219, |
| "kl_loss_4": 3244.5399658203123, |
| "kl_loss_9": 1645.8276733398438, |
| "learning_rate": 0.000996919232230627, |
| "loss": 2297.8324, |
| "step": 450 |
| }, |
| { |
| "ce_loss_13": 3.3651317954063416, |
| "ce_loss_17": 3.135745894908905, |
| "ce_loss_2": 5.008993244171142, |
| "ce_loss_4": 4.660128760337829, |
| "ce_loss_9": 3.876137411594391, |
| "epoch": 0.046, |
| "grad_norm": 872.0, |
| "kl_loss_13": 488.7731307983398, |
| "kl_loss_2": 3702.9694702148436, |
| "kl_loss_4": 3058.5682373046875, |
| "kl_loss_9": 1538.7457336425782, |
| "learning_rate": 0.0009967408676742752, |
| "loss": 2161.1371, |
| "step": 460 |
| }, |
| { |
| "ce_loss_13": 3.508651888370514, |
| "ce_loss_17": 3.2760107040405275, |
| "ce_loss_2": 5.099280881881714, |
| "ce_loss_4": 4.750043106079102, |
| "ce_loss_9": 3.9776250600814818, |
| "epoch": 0.047, |
| "grad_norm": 808.0, |
| "kl_loss_13": 499.91826171875, |
| "kl_loss_2": 3634.514782714844, |
| "kl_loss_4": 2981.7106079101563, |
| "kl_loss_9": 1495.2112365722655, |
| "learning_rate": 0.0009965575009429006, |
| "loss": 2217.815, |
| "step": 470 |
| }, |
| { |
| "ce_loss_13": 3.2985710740089416, |
| "ce_loss_17": 3.0624053478240967, |
| "ce_loss_2": 4.982487034797669, |
| "ce_loss_4": 4.624861192703247, |
| "ce_loss_9": 3.8066399455070496, |
| "epoch": 0.048, |
| "grad_norm": 836.0, |
| "kl_loss_13": 497.1519607543945, |
| "kl_loss_2": 3802.2880737304686, |
| "kl_loss_4": 3130.960974121094, |
| "kl_loss_9": 1545.8674865722655, |
| "learning_rate": 0.0009963691338830043, |
| "loss": 2209.7576, |
| "step": 480 |
| }, |
| { |
| "ce_loss_13": 3.3796708941459657, |
| "ce_loss_17": 3.156713938713074, |
| "ce_loss_2": 5.025083065032959, |
| "ce_loss_4": 4.67227258682251, |
| "ce_loss_9": 3.861804699897766, |
| "epoch": 0.049, |
| "grad_norm": 1040.0, |
| "kl_loss_13": 481.88634796142577, |
| "kl_loss_2": 3728.9262451171876, |
| "kl_loss_4": 3073.272985839844, |
| "kl_loss_9": 1493.6070373535156, |
| "learning_rate": 0.0009961757683914405, |
| "loss": 2169.6145, |
| "step": 490 |
| }, |
| { |
| "ce_loss_13": 3.393477773666382, |
| "ce_loss_17": 3.1482961654663084, |
| "ce_loss_2": 4.970569515228272, |
| "ce_loss_4": 4.620833873748779, |
| "ce_loss_9": 3.8702486991882323, |
| "epoch": 0.05, |
| "grad_norm": 912.0, |
| "kl_loss_13": 497.7796997070312, |
| "kl_loss_2": 3622.9356079101562, |
| "kl_loss_4": 2982.1828002929688, |
| "kl_loss_9": 1501.4689147949218, |
| "learning_rate": 0.0009959774064153978, |
| "loss": 2175.5449, |
| "step": 500 |
| }, |
| { |
| "ce_loss_13": 3.373159205913544, |
| "ce_loss_17": 3.1612361669540405, |
| "ce_loss_2": 4.950432252883911, |
| "ce_loss_4": 4.596165728569031, |
| "ce_loss_9": 3.8339231848716735, |
| "epoch": 0.051, |
| "grad_norm": 1000.0, |
| "kl_loss_13": 466.17348327636716, |
| "kl_loss_2": 3585.5204223632813, |
| "kl_loss_4": 2909.660534667969, |
| "kl_loss_9": 1441.3450439453125, |
| "learning_rate": 0.0009957740499523787, |
| "loss": 2137.3928, |
| "step": 510 |
| }, |
| { |
| "ce_loss_13": 3.3906698346138002, |
| "ce_loss_17": 3.1723674893379212, |
| "ce_loss_2": 4.9782023429870605, |
| "ce_loss_4": 4.625633692741394, |
| "ce_loss_9": 3.8561393857002257, |
| "epoch": 0.052, |
| "grad_norm": 872.0, |
| "kl_loss_13": 468.9834732055664, |
| "kl_loss_2": 3579.528515625, |
| "kl_loss_4": 2921.1966186523437, |
| "kl_loss_9": 1446.9594177246095, |
| "learning_rate": 0.0009955657010501807, |
| "loss": 2120.3191, |
| "step": 520 |
| }, |
| { |
| "ce_loss_13": 3.3632453083992004, |
| "ce_loss_17": 3.1311764240264894, |
| "ce_loss_2": 4.969284391403198, |
| "ce_loss_4": 4.618723821640015, |
| "ce_loss_9": 3.836634063720703, |
| "epoch": 0.053, |
| "grad_norm": 892.0, |
| "kl_loss_13": 484.82396392822267, |
| "kl_loss_2": 3645.302209472656, |
| "kl_loss_4": 2990.6371337890623, |
| "kl_loss_9": 1465.7335388183594, |
| "learning_rate": 0.000995352361806875, |
| "loss": 2134.5391, |
| "step": 530 |
| }, |
| { |
| "ce_loss_13": 3.394209456443787, |
| "ce_loss_17": 3.1703338265419005, |
| "ce_loss_2": 4.986485123634338, |
| "ce_loss_4": 4.635237050056458, |
| "ce_loss_9": 3.864534831047058, |
| "epoch": 0.054, |
| "grad_norm": 796.0, |
| "kl_loss_13": 484.2129165649414, |
| "kl_loss_2": 3635.974816894531, |
| "kl_loss_4": 2980.203173828125, |
| "kl_loss_9": 1478.2803466796875, |
| "learning_rate": 0.0009951340343707852, |
| "loss": 2166.8965, |
| "step": 540 |
| }, |
| { |
| "ce_loss_13": 3.4342824816703796, |
| "ce_loss_17": 3.220643925666809, |
| "ce_loss_2": 5.035447287559509, |
| "ce_loss_4": 4.687987613677978, |
| "ce_loss_9": 3.910528600215912, |
| "epoch": 0.055, |
| "grad_norm": 852.0, |
| "kl_loss_13": 456.5559326171875, |
| "kl_loss_2": 3591.8749755859376, |
| "kl_loss_4": 2949.824658203125, |
| "kl_loss_9": 1444.964910888672, |
| "learning_rate": 0.0009949107209404665, |
| "loss": 2138.0064, |
| "step": 550 |
| }, |
| { |
| "ce_loss_13": 3.35660035610199, |
| "ce_loss_17": 3.141009175777435, |
| "ce_loss_2": 4.941444396972656, |
| "ce_loss_4": 4.582303762435913, |
| "ce_loss_9": 3.826809287071228, |
| "epoch": 0.056, |
| "grad_norm": 1040.0, |
| "kl_loss_13": 456.8359939575195, |
| "kl_loss_2": 3589.2345581054688, |
| "kl_loss_4": 2928.601123046875, |
| "kl_loss_9": 1446.8042541503905, |
| "learning_rate": 0.0009946824237646824, |
| "loss": 2115.8109, |
| "step": 560 |
| }, |
| { |
| "ce_loss_13": 3.3314553022384645, |
| "ce_loss_17": 3.092339289188385, |
| "ce_loss_2": 4.9242928981781, |
| "ce_loss_4": 4.569640302658081, |
| "ce_loss_9": 3.800382375717163, |
| "epoch": 0.057, |
| "grad_norm": 852.0, |
| "kl_loss_13": 503.4763381958008, |
| "kl_loss_2": 3663.1847900390626, |
| "kl_loss_4": 3005.309289550781, |
| "kl_loss_9": 1497.809063720703, |
| "learning_rate": 0.0009944491451423828, |
| "loss": 2198.6785, |
| "step": 570 |
| }, |
| { |
| "ce_loss_13": 3.3124619722366333, |
| "ce_loss_17": 3.0873939990997314, |
| "ce_loss_2": 4.940806913375854, |
| "ce_loss_4": 4.576763057708741, |
| "ce_loss_9": 3.7953989505767822, |
| "epoch": 0.058, |
| "grad_norm": 852.0, |
| "kl_loss_13": 482.5867614746094, |
| "kl_loss_2": 3691.87294921875, |
| "kl_loss_4": 3010.1155395507812, |
| "kl_loss_9": 1493.1613891601562, |
| "learning_rate": 0.0009942108874226813, |
| "loss": 2134.6461, |
| "step": 580 |
| }, |
| { |
| "ce_loss_13": 3.4112470388412475, |
| "ce_loss_17": 3.1972869753837587, |
| "ce_loss_2": 4.959044456481934, |
| "ce_loss_4": 4.598792099952698, |
| "ce_loss_9": 3.8505393266677856, |
| "epoch": 0.059, |
| "grad_norm": 856.0, |
| "kl_loss_13": 454.33775787353517, |
| "kl_loss_2": 3522.612927246094, |
| "kl_loss_4": 2852.199841308594, |
| "kl_loss_9": 1392.1672607421874, |
| "learning_rate": 0.00099396765300483, |
| "loss": 2048.2014, |
| "step": 590 |
| }, |
| { |
| "ce_loss_13": 3.395838713645935, |
| "ce_loss_17": 3.175139045715332, |
| "ce_loss_2": 4.9531395673751835, |
| "ce_loss_4": 4.588991045951843, |
| "ce_loss_9": 3.8398922204971315, |
| "epoch": 0.06, |
| "grad_norm": 844.0, |
| "kl_loss_13": 465.70201416015624, |
| "kl_loss_2": 3545.3302001953125, |
| "kl_loss_4": 2863.952673339844, |
| "kl_loss_9": 1401.6750427246093, |
| "learning_rate": 0.0009937194443381972, |
| "loss": 2069.59, |
| "step": 600 |
| }, |
| { |
| "ce_loss_13": 3.416799175739288, |
| "ce_loss_17": 3.2072720646858217, |
| "ce_loss_2": 4.932146883010864, |
| "ce_loss_4": 4.583581805229187, |
| "ce_loss_9": 3.8486327171325683, |
| "epoch": 0.061, |
| "grad_norm": 908.0, |
| "kl_loss_13": 455.0954193115234, |
| "kl_loss_2": 3452.7316650390626, |
| "kl_loss_4": 2795.195202636719, |
| "kl_loss_9": 1371.477117919922, |
| "learning_rate": 0.0009934662639222412, |
| "loss": 2075.8383, |
| "step": 610 |
| }, |
| { |
| "ce_loss_13": 3.372501254081726, |
| "ce_loss_17": 3.157397246360779, |
| "ce_loss_2": 4.967001247406006, |
| "ce_loss_4": 4.606789755821228, |
| "ce_loss_9": 3.8294251680374147, |
| "epoch": 0.062, |
| "grad_norm": 844.0, |
| "kl_loss_13": 465.4024383544922, |
| "kl_loss_2": 3617.4651611328127, |
| "kl_loss_4": 2942.0623413085937, |
| "kl_loss_9": 1422.85283203125, |
| "learning_rate": 0.000993208114306486, |
| "loss": 2087.9994, |
| "step": 620 |
| }, |
| { |
| "ce_loss_13": 3.299955499172211, |
| "ce_loss_17": 3.085653471946716, |
| "ce_loss_2": 4.910202121734619, |
| "ce_loss_4": 4.551524496078491, |
| "ce_loss_9": 3.762219512462616, |
| "epoch": 0.063, |
| "grad_norm": 936.0, |
| "kl_loss_13": 456.752978515625, |
| "kl_loss_2": 3618.606591796875, |
| "kl_loss_4": 2956.915478515625, |
| "kl_loss_9": 1419.0578186035157, |
| "learning_rate": 0.0009929449980904952, |
| "loss": 2054.7795, |
| "step": 630 |
| }, |
| { |
| "ce_loss_13": 3.3500998497009276, |
| "ce_loss_17": 3.1439953327178953, |
| "ce_loss_2": 4.917953848838806, |
| "ce_loss_4": 4.563798975944519, |
| "ce_loss_9": 3.8012561559677125, |
| "epoch": 0.064, |
| "grad_norm": 860.0, |
| "kl_loss_13": 451.0824035644531, |
| "kl_loss_2": 3543.6160400390627, |
| "kl_loss_4": 2889.9016845703127, |
| "kl_loss_9": 1407.3394592285156, |
| "learning_rate": 0.0009926769179238466, |
| "loss": 2053.4785, |
| "step": 640 |
| }, |
| { |
| "ce_loss_13": 3.3938300132751467, |
| "ce_loss_17": 3.1796295642852783, |
| "ce_loss_2": 4.945849442481995, |
| "ce_loss_4": 4.598684406280517, |
| "ce_loss_9": 3.831195759773254, |
| "epoch": 0.065, |
| "grad_norm": 1000.0, |
| "kl_loss_13": 467.66880493164064, |
| "kl_loss_2": 3539.2062744140626, |
| "kl_loss_4": 2889.17568359375, |
| "kl_loss_9": 1397.6646301269532, |
| "learning_rate": 0.000992403876506104, |
| "loss": 2064.1902, |
| "step": 650 |
| }, |
| { |
| "ce_loss_13": 3.33291871547699, |
| "ce_loss_17": 3.122549068927765, |
| "ce_loss_2": 4.897202277183533, |
| "ce_loss_4": 4.550416302680969, |
| "ce_loss_9": 3.773349571228027, |
| "epoch": 0.066, |
| "grad_norm": 916.0, |
| "kl_loss_13": 449.5287139892578, |
| "kl_loss_2": 3530.60322265625, |
| "kl_loss_4": 2879.1534912109373, |
| "kl_loss_9": 1377.0751831054688, |
| "learning_rate": 0.0009921258765867918, |
| "loss": 2061.1312, |
| "step": 660 |
| }, |
| { |
| "ce_loss_13": 3.2969218492507935, |
| "ce_loss_17": 3.093582308292389, |
| "ce_loss_2": 4.896408152580261, |
| "ce_loss_4": 4.5536270380020145, |
| "ce_loss_9": 3.7574182629585264, |
| "epoch": 0.067, |
| "grad_norm": 1040.0, |
| "kl_loss_13": 460.2426361083984, |
| "kl_loss_2": 3624.5967041015624, |
| "kl_loss_4": 2981.2020263671875, |
| "kl_loss_9": 1420.056658935547, |
| "learning_rate": 0.0009918429209653662, |
| "loss": 2083.3785, |
| "step": 670 |
| }, |
| { |
| "ce_loss_13": 3.3495418548583986, |
| "ce_loss_17": 3.142987310886383, |
| "ce_loss_2": 4.915597724914551, |
| "ce_loss_4": 4.5686545133590695, |
| "ce_loss_9": 3.788528251647949, |
| "epoch": 0.068, |
| "grad_norm": 848.0, |
| "kl_loss_13": 460.44593811035156, |
| "kl_loss_2": 3561.2265625, |
| "kl_loss_4": 2914.064172363281, |
| "kl_loss_9": 1386.4138977050782, |
| "learning_rate": 0.0009915550124911866, |
| "loss": 2034.4879, |
| "step": 680 |
| }, |
| { |
| "ce_loss_13": 3.3552000880241395, |
| "ce_loss_17": 3.1527761816978455, |
| "ce_loss_2": 4.8922336339950565, |
| "ce_loss_4": 4.545932102203369, |
| "ce_loss_9": 3.7837376952171327, |
| "epoch": 0.069, |
| "grad_norm": 1080.0, |
| "kl_loss_13": 433.5186996459961, |
| "kl_loss_2": 3450.9926147460938, |
| "kl_loss_4": 2809.521240234375, |
| "kl_loss_9": 1340.025860595703, |
| "learning_rate": 0.0009912621540634887, |
| "loss": 2019.2238, |
| "step": 690 |
| }, |
| { |
| "ce_loss_13": 3.379618489742279, |
| "ce_loss_17": 3.1929449081420898, |
| "ce_loss_2": 4.886630058288574, |
| "ce_loss_4": 4.542127203941345, |
| "ce_loss_9": 3.795738422870636, |
| "epoch": 0.07, |
| "grad_norm": 976.0, |
| "kl_loss_13": 418.6984573364258, |
| "kl_loss_2": 3421.0200073242186, |
| "kl_loss_4": 2775.9146240234377, |
| "kl_loss_9": 1314.91328125, |
| "learning_rate": 0.0009909643486313534, |
| "loss": 2005.057, |
| "step": 700 |
| }, |
| { |
| "ce_loss_13": 3.2766343474388124, |
| "ce_loss_17": 3.077045226097107, |
| "ce_loss_2": 4.856631731986999, |
| "ce_loss_4": 4.491817474365234, |
| "ce_loss_9": 3.7208046078681947, |
| "epoch": 0.071, |
| "grad_norm": 888.0, |
| "kl_loss_13": 429.47057189941404, |
| "kl_loss_2": 3550.787890625, |
| "kl_loss_4": 2882.5520629882812, |
| "kl_loss_9": 1377.0965881347656, |
| "learning_rate": 0.000990661599193678, |
| "loss": 2090.2475, |
| "step": 710 |
| }, |
| { |
| "ce_loss_13": 3.392967128753662, |
| "ce_loss_17": 3.193705677986145, |
| "ce_loss_2": 4.90472264289856, |
| "ce_loss_4": 4.552316355705261, |
| "ce_loss_9": 3.813526916503906, |
| "epoch": 0.072, |
| "grad_norm": 940.0, |
| "kl_loss_13": 432.0402038574219, |
| "kl_loss_2": 3425.7497680664064, |
| "kl_loss_4": 2772.5600341796876, |
| "kl_loss_9": 1330.7037780761718, |
| "learning_rate": 0.0009903539087991462, |
| "loss": 1996.8686, |
| "step": 720 |
| }, |
| { |
| "ce_loss_13": 3.369348168373108, |
| "ce_loss_17": 3.173522746562958, |
| "ce_loss_2": 4.89017391204834, |
| "ce_loss_4": 4.537626624107361, |
| "ce_loss_9": 3.785863721370697, |
| "epoch": 0.073, |
| "grad_norm": 928.0, |
| "kl_loss_13": 426.95457763671874, |
| "kl_loss_2": 3446.7977661132813, |
| "kl_loss_4": 2792.6529296875, |
| "kl_loss_9": 1319.9725280761718, |
| "learning_rate": 0.0009900412805461966, |
| "loss": 2006.5203, |
| "step": 730 |
| }, |
| { |
| "ce_loss_13": 3.4318490028381348, |
| "ce_loss_17": 3.2430662155151366, |
| "ce_loss_2": 4.922482419013977, |
| "ce_loss_4": 4.573343729972839, |
| "ce_loss_9": 3.8417624115943907, |
| "epoch": 0.074, |
| "grad_norm": 936.0, |
| "kl_loss_13": 413.79088287353517, |
| "kl_loss_2": 3397.953369140625, |
| "kl_loss_4": 2738.3268310546873, |
| "kl_loss_9": 1293.321337890625, |
| "learning_rate": 0.0009897237175829927, |
| "loss": 1988.8957, |
| "step": 740 |
| }, |
| { |
| "ce_loss_13": 3.3247160077095033, |
| "ce_loss_17": 3.1352283358573914, |
| "ce_loss_2": 4.865108942985534, |
| "ce_loss_4": 4.5231712579727175, |
| "ce_loss_9": 3.7554025769233705, |
| "epoch": 0.075, |
| "grad_norm": 844.0, |
| "kl_loss_13": 421.07178649902346, |
| "kl_loss_2": 3469.0927978515624, |
| "kl_loss_4": 2825.906884765625, |
| "kl_loss_9": 1338.857568359375, |
| "learning_rate": 0.0009894012231073895, |
| "loss": 1998.8957, |
| "step": 750 |
| }, |
| { |
| "ce_loss_13": 3.368278980255127, |
| "ce_loss_17": 3.1781376719474794, |
| "ce_loss_2": 4.8757919549942015, |
| "ce_loss_4": 4.5389830589294435, |
| "ce_loss_9": 3.7914411664009093, |
| "epoch": 0.076, |
| "grad_norm": 944.0, |
| "kl_loss_13": 409.7508941650391, |
| "kl_loss_2": 3407.4770629882814, |
| "kl_loss_4": 2791.3968139648437, |
| "kl_loss_9": 1314.810546875, |
| "learning_rate": 0.0009890738003669028, |
| "loss": 2008.9746, |
| "step": 760 |
| }, |
| { |
| "ce_loss_13": 3.343692898750305, |
| "ce_loss_17": 3.14743994474411, |
| "ce_loss_2": 4.8914374828338625, |
| "ce_loss_4": 4.553251171112061, |
| "ce_loss_9": 3.781083512306213, |
| "epoch": 0.077, |
| "grad_norm": 968.0, |
| "kl_loss_13": 423.18428192138674, |
| "kl_loss_2": 3502.153857421875, |
| "kl_loss_4": 2875.511267089844, |
| "kl_loss_9": 1362.31875, |
| "learning_rate": 0.0009887414526586764, |
| "loss": 1988.7578, |
| "step": 770 |
| }, |
| { |
| "ce_loss_13": 3.3911314845085143, |
| "ce_loss_17": 3.2056641221046447, |
| "ce_loss_2": 4.897607350349427, |
| "ce_loss_4": 4.562703633308411, |
| "ce_loss_9": 3.8049958825111387, |
| "epoch": 0.078, |
| "grad_norm": 936.0, |
| "kl_loss_13": 414.01283569335936, |
| "kl_loss_2": 3399.990588378906, |
| "kl_loss_4": 2770.4991088867187, |
| "kl_loss_9": 1288.91259765625, |
| "learning_rate": 0.0009884041833294476, |
| "loss": 1940.1848, |
| "step": 780 |
| }, |
| { |
| "ce_loss_13": 3.391792869567871, |
| "ce_loss_17": 3.2053155303001404, |
| "ce_loss_2": 4.873739266395569, |
| "ce_loss_4": 4.540884971618652, |
| "ce_loss_9": 3.801806998252869, |
| "epoch": 0.079, |
| "grad_norm": 860.0, |
| "kl_loss_13": 415.51280975341797, |
| "kl_loss_2": 3357.626599121094, |
| "kl_loss_4": 2732.0444458007814, |
| "kl_loss_9": 1285.20927734375, |
| "learning_rate": 0.000988061995775515, |
| "loss": 2015.0715, |
| "step": 790 |
| }, |
| { |
| "ce_loss_13": 3.325045144557953, |
| "ce_loss_17": 3.1392316699028013, |
| "ce_loss_2": 4.812699151039124, |
| "ce_loss_4": 4.479365229606628, |
| "ce_loss_9": 3.7390761733055116, |
| "epoch": 0.08, |
| "grad_norm": 1040.0, |
| "kl_loss_13": 415.2960464477539, |
| "kl_loss_2": 3374.4686889648438, |
| "kl_loss_4": 2754.907177734375, |
| "kl_loss_9": 1315.3015991210937, |
| "learning_rate": 0.0009877148934427035, |
| "loss": 1966.6633, |
| "step": 800 |
| }, |
| { |
| "ce_loss_13": 3.361221945285797, |
| "ce_loss_17": 3.174115777015686, |
| "ce_loss_2": 4.865154695510864, |
| "ce_loss_4": 4.531916379928589, |
| "ce_loss_9": 3.779590427875519, |
| "epoch": 0.081, |
| "grad_norm": 888.0, |
| "kl_loss_13": 412.87647705078126, |
| "kl_loss_2": 3389.8561889648436, |
| "kl_loss_4": 2766.3066284179686, |
| "kl_loss_9": 1303.5166809082032, |
| "learning_rate": 0.0009873628798263297, |
| "loss": 1945.9953, |
| "step": 810 |
| }, |
| { |
| "ce_loss_13": 3.3172059178352358, |
| "ce_loss_17": 3.1370773911476135, |
| "ce_loss_2": 4.791570925712586, |
| "ce_loss_4": 4.455942940711975, |
| "ce_loss_9": 3.718342387676239, |
| "epoch": 0.082, |
| "grad_norm": 880.0, |
| "kl_loss_13": 402.0048858642578, |
| "kl_loss_2": 3330.507568359375, |
| "kl_loss_4": 2707.251501464844, |
| "kl_loss_9": 1269.6024780273438, |
| "learning_rate": 0.0009870059584711668, |
| "loss": 1972.2762, |
| "step": 820 |
| }, |
| { |
| "ce_loss_13": 3.3350242495536806, |
| "ce_loss_17": 3.1476898074150084, |
| "ce_loss_2": 4.811323523521423, |
| "ce_loss_4": 4.477843689918518, |
| "ce_loss_9": 3.755842161178589, |
| "epoch": 0.083, |
| "grad_norm": 800.0, |
| "kl_loss_13": 398.83159790039065, |
| "kl_loss_2": 3356.8597045898437, |
| "kl_loss_4": 2725.174353027344, |
| "kl_loss_9": 1297.8435302734374, |
| "learning_rate": 0.000986644132971409, |
| "loss": 1944.8152, |
| "step": 830 |
| }, |
| { |
| "ce_loss_13": 3.3208301186561586, |
| "ce_loss_17": 3.1352628588676454, |
| "ce_loss_2": 4.8309894561767575, |
| "ce_loss_4": 4.504492831230164, |
| "ce_loss_9": 3.758168947696686, |
| "epoch": 0.084, |
| "grad_norm": 960.0, |
| "kl_loss_13": 401.25804901123047, |
| "kl_loss_2": 3404.7124145507814, |
| "kl_loss_4": 2786.1770751953127, |
| "kl_loss_9": 1333.3421508789063, |
| "learning_rate": 0.0009862774069706345, |
| "loss": 1964.8699, |
| "step": 840 |
| }, |
| { |
| "ce_loss_13": 3.437472343444824, |
| "ce_loss_17": 3.2665435791015627, |
| "ce_loss_2": 4.876515960693359, |
| "ce_loss_4": 4.547065806388855, |
| "ce_loss_9": 3.841744804382324, |
| "epoch": 0.085, |
| "grad_norm": 960.0, |
| "kl_loss_13": 389.9429443359375, |
| "kl_loss_2": 3285.643798828125, |
| "kl_loss_4": 2672.3885009765627, |
| "kl_loss_9": 1282.2401489257813, |
| "learning_rate": 0.000985905784161771, |
| "loss": 1930.4863, |
| "step": 850 |
| }, |
| { |
| "ce_loss_13": 3.3625681519508364, |
| "ce_loss_17": 3.191510498523712, |
| "ce_loss_2": 4.824199914932251, |
| "ce_loss_4": 4.498007225990295, |
| "ce_loss_9": 3.770563566684723, |
| "epoch": 0.086, |
| "grad_norm": 844.0, |
| "kl_loss_13": 383.7548110961914, |
| "kl_loss_2": 3301.8364379882814, |
| "kl_loss_4": 2693.669140625, |
| "kl_loss_9": 1280.2837768554687, |
| "learning_rate": 0.000985529268287055, |
| "loss": 1911.0293, |
| "step": 860 |
| }, |
| { |
| "ce_loss_13": 3.3050955653190615, |
| "ce_loss_17": 3.123162257671356, |
| "ce_loss_2": 4.819775795936584, |
| "ce_loss_4": 4.47889621257782, |
| "ce_loss_9": 3.7266830921173097, |
| "epoch": 0.087, |
| "grad_norm": 772.0, |
| "kl_loss_13": 398.754704284668, |
| "kl_loss_2": 3404.6179931640627, |
| "kl_loss_4": 2768.282507324219, |
| "kl_loss_9": 1292.7787353515625, |
| "learning_rate": 0.0009851478631379982, |
| "loss": 1963.9223, |
| "step": 870 |
| }, |
| { |
| "ce_loss_13": 3.3675146818161013, |
| "ce_loss_17": 3.178437685966492, |
| "ce_loss_2": 4.8413186311721805, |
| "ce_loss_4": 4.5075929164886475, |
| "ce_loss_9": 3.764183247089386, |
| "epoch": 0.088, |
| "grad_norm": 976.0, |
| "kl_loss_13": 415.20287170410154, |
| "kl_loss_2": 3360.4562866210936, |
| "kl_loss_4": 2734.4081176757813, |
| "kl_loss_9": 1269.864990234375, |
| "learning_rate": 0.0009847615725553456, |
| "loss": 1935.5604, |
| "step": 880 |
| }, |
| { |
| "ce_loss_13": 3.406362807750702, |
| "ce_loss_17": 3.2369236826896666, |
| "ce_loss_2": 4.821893978118896, |
| "ce_loss_4": 4.493126487731933, |
| "ce_loss_9": 3.7880091071128845, |
| "epoch": 0.089, |
| "grad_norm": 840.0, |
| "kl_loss_13": 383.4114242553711, |
| "kl_loss_2": 3199.1895385742187, |
| "kl_loss_4": 2589.841796875, |
| "kl_loss_9": 1215.9469848632812, |
| "learning_rate": 0.0009843704004290394, |
| "loss": 1913.5568, |
| "step": 890 |
| }, |
| { |
| "ce_loss_13": 3.3325154781341553, |
| "ce_loss_17": 3.146414506435394, |
| "ce_loss_2": 4.795909833908081, |
| "ce_loss_4": 4.45392324924469, |
| "ce_loss_9": 3.7367355942726137, |
| "epoch": 0.09, |
| "grad_norm": 1008.0, |
| "kl_loss_13": 410.2917770385742, |
| "kl_loss_2": 3358.256799316406, |
| "kl_loss_4": 2716.9063232421877, |
| "kl_loss_9": 1290.2903198242188, |
| "learning_rate": 0.0009839743506981783, |
| "loss": 1938.7434, |
| "step": 900 |
| }, |
| { |
| "ce_loss_13": 3.2540414690971375, |
| "ce_loss_17": 3.063079464435577, |
| "ce_loss_2": 4.789106225967407, |
| "ce_loss_4": 4.444174361228943, |
| "ce_loss_9": 3.6872658133506775, |
| "epoch": 0.091, |
| "grad_norm": 1080.0, |
| "kl_loss_13": 417.6730331420898, |
| "kl_loss_2": 3501.5642211914064, |
| "kl_loss_4": 2849.4146118164062, |
| "kl_loss_9": 1342.6936279296874, |
| "learning_rate": 0.0009835734273509786, |
| "loss": 1977.3492, |
| "step": 910 |
| }, |
| { |
| "ce_loss_13": 3.346299242973328, |
| "ce_loss_17": 3.157994043827057, |
| "ce_loss_2": 4.837053966522217, |
| "ce_loss_4": 4.492336368560791, |
| "ce_loss_9": 3.763401138782501, |
| "epoch": 0.092, |
| "grad_norm": 832.0, |
| "kl_loss_13": 401.46257934570315, |
| "kl_loss_2": 3346.2372802734376, |
| "kl_loss_4": 2697.43896484375, |
| "kl_loss_9": 1278.7077697753907, |
| "learning_rate": 0.0009831676344247342, |
| "loss": 1933.4803, |
| "step": 920 |
| }, |
| { |
| "ce_loss_13": 3.348154938220978, |
| "ce_loss_17": 3.180648756027222, |
| "ce_loss_2": 4.792514109611512, |
| "ce_loss_4": 4.452423858642578, |
| "ce_loss_9": 3.748816692829132, |
| "epoch": 0.093, |
| "grad_norm": 928.0, |
| "kl_loss_13": 370.6465698242188, |
| "kl_loss_2": 3261.6263427734375, |
| "kl_loss_4": 2625.723596191406, |
| "kl_loss_9": 1236.6765411376953, |
| "learning_rate": 0.0009827569760057755, |
| "loss": 1911.6672, |
| "step": 930 |
| }, |
| { |
| "ce_loss_13": 3.285767138004303, |
| "ce_loss_17": 3.097846233844757, |
| "ce_loss_2": 4.824236750602722, |
| "ce_loss_4": 4.4804726362228395, |
| "ce_loss_9": 3.7157430171966555, |
| "epoch": 0.094, |
| "grad_norm": 1000.0, |
| "kl_loss_13": 400.1313217163086, |
| "kl_loss_2": 3489.612438964844, |
| "kl_loss_4": 2838.1316650390627, |
| "kl_loss_9": 1338.6936889648437, |
| "learning_rate": 0.000982341456229428, |
| "loss": 1947.877, |
| "step": 940 |
| }, |
| { |
| "ce_loss_13": 3.368918788433075, |
| "ce_loss_17": 3.1905226707458496, |
| "ce_loss_2": 4.841083121299744, |
| "ce_loss_4": 4.515086054801941, |
| "ce_loss_9": 3.7773422956466676, |
| "epoch": 0.095, |
| "grad_norm": 964.0, |
| "kl_loss_13": 405.5454956054688, |
| "kl_loss_2": 3343.8675903320313, |
| "kl_loss_4": 2734.8436279296875, |
| "kl_loss_9": 1290.0235229492187, |
| "learning_rate": 0.000981921079279971, |
| "loss": 1904.6535, |
| "step": 950 |
| }, |
| { |
| "ce_loss_13": 3.3743069529533387, |
| "ce_loss_17": 3.2032186150550843, |
| "ce_loss_2": 4.7667896270751955, |
| "ce_loss_4": 4.4387904644012455, |
| "ce_loss_9": 3.744371938705444, |
| "epoch": 0.096, |
| "grad_norm": 920.0, |
| "kl_loss_13": 384.10196685791016, |
| "kl_loss_2": 3194.66494140625, |
| "kl_loss_4": 2580.2245483398438, |
| "kl_loss_9": 1207.9341430664062, |
| "learning_rate": 0.0009814958493905962, |
| "loss": 1869.4641, |
| "step": 960 |
| }, |
| { |
| "ce_loss_13": 3.337582790851593, |
| "ce_loss_17": 3.1601481318473814, |
| "ce_loss_2": 4.82601318359375, |
| "ce_loss_4": 4.487420868873596, |
| "ce_loss_9": 3.7436182856559754, |
| "epoch": 0.097, |
| "grad_norm": 828.0, |
| "kl_loss_13": 390.50591430664065, |
| "kl_loss_2": 3350.0862426757812, |
| "kl_loss_4": 2723.4616943359374, |
| "kl_loss_9": 1261.0095764160155, |
| "learning_rate": 0.0009810657708433637, |
| "loss": 1951.8195, |
| "step": 970 |
| }, |
| { |
| "ce_loss_13": 3.403837275505066, |
| "ce_loss_17": 3.2347267866134644, |
| "ce_loss_2": 4.81567165851593, |
| "ce_loss_4": 4.468466424942017, |
| "ce_loss_9": 3.772913944721222, |
| "epoch": 0.098, |
| "grad_norm": 852.0, |
| "kl_loss_13": 382.08585357666016, |
| "kl_loss_2": 3192.696533203125, |
| "kl_loss_4": 2552.9170043945314, |
| "kl_loss_9": 1188.332696533203, |
| "learning_rate": 0.0009806308479691594, |
| "loss": 1849.975, |
| "step": 980 |
| }, |
| { |
| "ce_loss_13": 3.424488735198975, |
| "ce_loss_17": 3.235040080547333, |
| "ce_loss_2": 4.866095018386841, |
| "ce_loss_4": 4.522079491615296, |
| "ce_loss_9": 3.8104368567466738, |
| "epoch": 0.099, |
| "grad_norm": 948.0, |
| "kl_loss_13": 411.6816650390625, |
| "kl_loss_2": 3305.6978149414062, |
| "kl_loss_4": 2653.93779296875, |
| "kl_loss_9": 1252.2878723144531, |
| "learning_rate": 0.0009801910851476522, |
| "loss": 1890.7043, |
| "step": 990 |
| }, |
| { |
| "ce_loss_13": 3.3440085887908935, |
| "ce_loss_17": 3.1690502524375916, |
| "ce_loss_2": 4.836632800102234, |
| "ce_loss_4": 4.493210172653198, |
| "ce_loss_9": 3.7448533892631533, |
| "epoch": 0.1, |
| "grad_norm": 960.0, |
| "kl_loss_13": 399.61362915039064, |
| "kl_loss_2": 3406.6630615234376, |
| "kl_loss_4": 2759.11484375, |
| "kl_loss_9": 1266.5451721191407, |
| "learning_rate": 0.0009797464868072487, |
| "loss": 1912.3621, |
| "step": 1000 |
| }, |
| { |
| "ce_loss_13": 3.331806945800781, |
| "ce_loss_17": 3.152591598033905, |
| "ce_loss_2": 4.792890572547913, |
| "ce_loss_4": 4.451310873031616, |
| "ce_loss_9": 3.72512149810791, |
| "epoch": 0.101, |
| "grad_norm": 1200.0, |
| "kl_loss_13": 394.763720703125, |
| "kl_loss_2": 3318.3656005859375, |
| "kl_loss_4": 2692.0280883789064, |
| "kl_loss_9": 1252.956787109375, |
| "learning_rate": 0.0009792970574250492, |
| "loss": 1905.3281, |
| "step": 1010 |
| }, |
| { |
| "ce_loss_13": 3.341937553882599, |
| "ce_loss_17": 3.169361650943756, |
| "ce_loss_2": 4.784651064872742, |
| "ce_loss_4": 4.460782313346863, |
| "ce_loss_9": 3.729896664619446, |
| "epoch": 0.102, |
| "grad_norm": 944.0, |
| "kl_loss_13": 381.2003631591797, |
| "kl_loss_2": 3270.02509765625, |
| "kl_loss_4": 2665.2223999023436, |
| "kl_loss_9": 1235.4885803222655, |
| "learning_rate": 0.0009788428015268028, |
| "loss": 1863.8438, |
| "step": 1020 |
| }, |
| { |
| "ce_loss_13": 3.33928097486496, |
| "ce_loss_17": 3.1749049186706544, |
| "ce_loss_2": 4.772533559799195, |
| "ce_loss_4": 4.439685535430908, |
| "ce_loss_9": 3.7271411061286925, |
| "epoch": 0.103, |
| "grad_norm": 920.0, |
| "kl_loss_13": 374.9629165649414, |
| "kl_loss_2": 3243.825537109375, |
| "kl_loss_4": 2629.011022949219, |
| "kl_loss_9": 1215.5707580566407, |
| "learning_rate": 0.0009783837236868609, |
| "loss": 1859.6172, |
| "step": 1030 |
| }, |
| { |
| "ce_loss_13": 3.3120030641555784, |
| "ce_loss_17": 3.1386825680732726, |
| "ce_loss_2": 4.7418444633483885, |
| "ce_loss_4": 4.41668553352356, |
| "ce_loss_9": 3.698753499984741, |
| "epoch": 0.104, |
| "grad_norm": 892.0, |
| "kl_loss_13": 385.54081573486326, |
| "kl_loss_2": 3211.5653076171875, |
| "kl_loss_4": 2609.5830810546877, |
| "kl_loss_9": 1217.1534149169922, |
| "learning_rate": 0.0009779198285281327, |
| "loss": 1854.5191, |
| "step": 1040 |
| }, |
| { |
| "ce_loss_13": 3.3089224219322206, |
| "ce_loss_17": 3.1374737858772277, |
| "ce_loss_2": 4.771598625183105, |
| "ce_loss_4": 4.440955734252929, |
| "ce_loss_9": 3.7080607414245605, |
| "epoch": 0.105, |
| "grad_norm": 884.0, |
| "kl_loss_13": 373.78731842041014, |
| "kl_loss_2": 3293.0612182617188, |
| "kl_loss_4": 2677.461389160156, |
| "kl_loss_9": 1238.1656616210937, |
| "learning_rate": 0.0009774511207220368, |
| "loss": 1885.1617, |
| "step": 1050 |
| }, |
| { |
| "ce_loss_13": 3.35217467546463, |
| "ce_loss_17": 3.1806638121604918, |
| "ce_loss_2": 4.81612331867218, |
| "ce_loss_4": 4.478370618820191, |
| "ce_loss_9": 3.7391607642173765, |
| "epoch": 0.106, |
| "grad_norm": 820.0, |
| "kl_loss_13": 385.7530212402344, |
| "kl_loss_2": 3294.76513671875, |
| "kl_loss_4": 2664.874072265625, |
| "kl_loss_9": 1220.798532104492, |
| "learning_rate": 0.0009769776049884564, |
| "loss": 1874.1031, |
| "step": 1060 |
| }, |
| { |
| "ce_loss_13": 3.2690223813056947, |
| "ce_loss_17": 3.0942304491996766, |
| "ce_loss_2": 4.759356594085693, |
| "ce_loss_4": 4.422593832015991, |
| "ce_loss_9": 3.6778587818145754, |
| "epoch": 0.107, |
| "grad_norm": 956.0, |
| "kl_loss_13": 391.147314453125, |
| "kl_loss_2": 3376.554724121094, |
| "kl_loss_4": 2748.0857788085937, |
| "kl_loss_9": 1266.021502685547, |
| "learning_rate": 0.0009764992860956889, |
| "loss": 1949.5391, |
| "step": 1070 |
| }, |
| { |
| "ce_loss_13": 3.4036198973655702, |
| "ce_loss_17": 3.239108693599701, |
| "ce_loss_2": 4.763926935195923, |
| "ce_loss_4": 4.44455623626709, |
| "ce_loss_9": 3.7673982262611387, |
| "epoch": 0.108, |
| "grad_norm": 936.0, |
| "kl_loss_13": 367.35121459960936, |
| "kl_loss_2": 3102.4430908203126, |
| "kl_loss_4": 2501.094482421875, |
| "kl_loss_9": 1167.6445037841797, |
| "learning_rate": 0.0009760161688604008, |
| "loss": 1819.5711, |
| "step": 1080 |
| }, |
| { |
| "ce_loss_13": 3.4040125131607057, |
| "ce_loss_17": 3.2376139521598817, |
| "ce_loss_2": 4.832027888298034, |
| "ce_loss_4": 4.494500255584716, |
| "ce_loss_9": 3.788975703716278, |
| "epoch": 0.109, |
| "grad_norm": 888.0, |
| "kl_loss_13": 369.05223541259767, |
| "kl_loss_2": 3194.8518310546874, |
| "kl_loss_4": 2585.8927001953125, |
| "kl_loss_9": 1196.6102905273438, |
| "learning_rate": 0.0009755282581475768, |
| "loss": 1861.7242, |
| "step": 1090 |
| }, |
| { |
| "ce_loss_13": 3.4441361665725707, |
| "ce_loss_17": 3.280840015411377, |
| "ce_loss_2": 4.855321836471558, |
| "ce_loss_4": 4.517546820640564, |
| "ce_loss_9": 3.833357799053192, |
| "epoch": 0.11, |
| "grad_norm": 1016.0, |
| "kl_loss_13": 373.8306381225586, |
| "kl_loss_2": 3183.0297729492186, |
| "kl_loss_4": 2554.546875, |
| "kl_loss_9": 1197.6685791015625, |
| "learning_rate": 0.0009750355588704727, |
| "loss": 1819.5221, |
| "step": 1100 |
| }, |
| { |
| "ce_loss_13": 3.3001001715660094, |
| "ce_loss_17": 3.1322520971298218, |
| "ce_loss_2": 4.751658868789673, |
| "ce_loss_4": 4.398335409164429, |
| "ce_loss_9": 3.6881660342216493, |
| "epoch": 0.111, |
| "grad_norm": 1000.0, |
| "kl_loss_13": 365.6656921386719, |
| "kl_loss_2": 3269.6130615234374, |
| "kl_loss_4": 2618.7026611328124, |
| "kl_loss_9": 1215.534423828125, |
| "learning_rate": 0.0009745380759905647, |
| "loss": 1903.5375, |
| "step": 1110 |
| }, |
| { |
| "ce_loss_13": 3.245807480812073, |
| "ce_loss_17": 3.0830583453178404, |
| "ce_loss_2": 4.716756153106689, |
| "ce_loss_4": 4.377573132514954, |
| "ce_loss_9": 3.6491206765174864, |
| "epoch": 0.112, |
| "grad_norm": 864.0, |
| "kl_loss_13": 366.47874908447267, |
| "kl_loss_2": 3306.8806884765627, |
| "kl_loss_4": 2663.4728515625, |
| "kl_loss_9": 1240.725323486328, |
| "learning_rate": 0.0009740358145174998, |
| "loss": 1941.0566, |
| "step": 1120 |
| }, |
| { |
| "ce_loss_13": 3.3866483330726624, |
| "ce_loss_17": 3.2277196288108825, |
| "ce_loss_2": 4.774269104003906, |
| "ce_loss_4": 4.433835482597351, |
| "ce_loss_9": 3.7634063839912413, |
| "epoch": 0.113, |
| "grad_norm": 848.0, |
| "kl_loss_13": 361.10958709716795, |
| "kl_loss_2": 3146.92509765625, |
| "kl_loss_4": 2518.6606201171876, |
| "kl_loss_9": 1186.183612060547, |
| "learning_rate": 0.0009735287795090455, |
| "loss": 1833.2547, |
| "step": 1130 |
| }, |
| { |
| "ce_loss_13": 3.2892943382263184, |
| "ce_loss_17": 3.1269325494766234, |
| "ce_loss_2": 4.729933071136474, |
| "ce_loss_4": 4.395347666740418, |
| "ce_loss_9": 3.6729499459266663, |
| "epoch": 0.114, |
| "grad_norm": 876.0, |
| "kl_loss_13": 362.5305465698242, |
| "kl_loss_2": 3237.9978271484374, |
| "kl_loss_4": 2619.9707885742187, |
| "kl_loss_9": 1200.743426513672, |
| "learning_rate": 0.0009730169760710386, |
| "loss": 1847.4512, |
| "step": 1140 |
| }, |
| { |
| "ce_loss_13": 3.357778012752533, |
| "ce_loss_17": 3.194906449317932, |
| "ce_loss_2": 4.77970175743103, |
| "ce_loss_4": 4.4458307981491085, |
| "ce_loss_9": 3.7482528924942016, |
| "epoch": 0.115, |
| "grad_norm": 1080.0, |
| "kl_loss_13": 352.80699920654297, |
| "kl_loss_2": 3183.4198974609376, |
| "kl_loss_4": 2553.4814575195314, |
| "kl_loss_9": 1190.7755249023437, |
| "learning_rate": 0.0009725004093573342, |
| "loss": 1844.8674, |
| "step": 1150 |
| }, |
| { |
| "ce_loss_13": 3.30957270860672, |
| "ce_loss_17": 3.1439372062683106, |
| "ce_loss_2": 4.74575138092041, |
| "ce_loss_4": 4.402514219284058, |
| "ce_loss_9": 3.709815800189972, |
| "epoch": 0.116, |
| "grad_norm": 1056.0, |
| "kl_loss_13": 357.7722702026367, |
| "kl_loss_2": 3204.6459838867186, |
| "kl_loss_4": 2562.1646118164062, |
| "kl_loss_9": 1219.2004638671874, |
| "learning_rate": 0.0009719790845697534, |
| "loss": 1822.2119, |
| "step": 1160 |
| }, |
| { |
| "ce_loss_13": 3.2644479274749756, |
| "ce_loss_17": 3.108327102661133, |
| "ce_loss_2": 4.647129654884338, |
| "ce_loss_4": 4.312844860553741, |
| "ce_loss_9": 3.6578468084335327, |
| "epoch": 0.117, |
| "grad_norm": 1040.0, |
| "kl_loss_13": 344.85960388183594, |
| "kl_loss_2": 3122.0739990234374, |
| "kl_loss_4": 2502.5944702148436, |
| "kl_loss_9": 1204.761767578125, |
| "learning_rate": 0.0009714530069580309, |
| "loss": 1808.5439, |
| "step": 1170 |
| }, |
| { |
| "ce_loss_13": 3.3457039833068847, |
| "ce_loss_17": 3.190069305896759, |
| "ce_loss_2": 4.774503207206726, |
| "ce_loss_4": 4.443236660957337, |
| "ce_loss_9": 3.7676589250564576, |
| "epoch": 0.118, |
| "grad_norm": 972.0, |
| "kl_loss_13": 362.91875, |
| "kl_loss_2": 3198.452185058594, |
| "kl_loss_4": 2586.32919921875, |
| "kl_loss_9": 1254.7549560546875, |
| "learning_rate": 0.0009709221818197624, |
| "loss": 1839.6691, |
| "step": 1180 |
| }, |
| { |
| "ce_loss_13": 3.3937922358512878, |
| "ce_loss_17": 3.2305485367774964, |
| "ce_loss_2": 4.81508846282959, |
| "ce_loss_4": 4.488950777053833, |
| "ce_loss_9": 3.788563871383667, |
| "epoch": 0.119, |
| "grad_norm": 964.0, |
| "kl_loss_13": 361.83642578125, |
| "kl_loss_2": 3216.6996826171876, |
| "kl_loss_4": 2605.2082397460936, |
| "kl_loss_9": 1222.1690795898437, |
| "learning_rate": 0.0009703866145003512, |
| "loss": 1844.6881, |
| "step": 1190 |
| }, |
| { |
| "ce_loss_13": 3.3621174573898314, |
| "ce_loss_17": 3.2041364312171936, |
| "ce_loss_2": 4.763102650642395, |
| "ce_loss_4": 4.438978171348571, |
| "ce_loss_9": 3.7436181783676146, |
| "epoch": 0.12, |
| "grad_norm": 972.0, |
| "kl_loss_13": 349.3827590942383, |
| "kl_loss_2": 3177.04287109375, |
| "kl_loss_4": 2565.992138671875, |
| "kl_loss_9": 1189.7509399414062, |
| "learning_rate": 0.0009698463103929542, |
| "loss": 1845.2006, |
| "step": 1200 |
| }, |
| { |
| "ce_loss_13": 3.3279492259025574, |
| "ce_loss_17": 3.1659576535224914, |
| "ce_loss_2": 4.75805766582489, |
| "ce_loss_4": 4.4306889295578005, |
| "ce_loss_9": 3.713662326335907, |
| "epoch": 0.121, |
| "grad_norm": 884.0, |
| "kl_loss_13": 363.17106475830076, |
| "kl_loss_2": 3220.48115234375, |
| "kl_loss_4": 2602.700341796875, |
| "kl_loss_9": 1192.4669372558594, |
| "learning_rate": 0.0009693012749384279, |
| "loss": 1854.6668, |
| "step": 1210 |
| }, |
| { |
| "ce_loss_13": 3.336058557033539, |
| "ce_loss_17": 3.1785760521888733, |
| "ce_loss_2": 4.742625999450683, |
| "ce_loss_4": 4.419625449180603, |
| "ce_loss_9": 3.715330648422241, |
| "epoch": 0.122, |
| "grad_norm": 924.0, |
| "kl_loss_13": 357.46961364746096, |
| "kl_loss_2": 3175.962756347656, |
| "kl_loss_4": 2567.0968505859373, |
| "kl_loss_9": 1183.8647583007812, |
| "learning_rate": 0.0009687515136252732, |
| "loss": 1816.2451, |
| "step": 1220 |
| }, |
| { |
| "ce_loss_13": 3.3042866706848146, |
| "ce_loss_17": 3.1386887311935423, |
| "ce_loss_2": 4.7668650388717655, |
| "ce_loss_4": 4.431103610992432, |
| "ce_loss_9": 3.695324969291687, |
| "epoch": 0.123, |
| "grad_norm": 960.0, |
| "kl_loss_13": 368.94044799804686, |
| "kl_loss_2": 3305.9065673828127, |
| "kl_loss_4": 2684.9451904296875, |
| "kl_loss_9": 1229.5591186523438, |
| "learning_rate": 0.0009681970319895803, |
| "loss": 1952.7406, |
| "step": 1230 |
| }, |
| { |
| "ce_loss_13": 3.3781582713127136, |
| "ce_loss_17": 3.2220380663871766, |
| "ce_loss_2": 4.782853364944458, |
| "ce_loss_4": 4.449784755706787, |
| "ce_loss_9": 3.7553099393844604, |
| "epoch": 0.124, |
| "grad_norm": 956.0, |
| "kl_loss_13": 356.1455780029297, |
| "kl_loss_2": 3166.3423828125, |
| "kl_loss_4": 2539.4695434570312, |
| "kl_loss_9": 1179.7913696289063, |
| "learning_rate": 0.0009676378356149733, |
| "loss": 1817.5471, |
| "step": 1240 |
| }, |
| { |
| "ce_loss_13": 3.3359023094177247, |
| "ce_loss_17": 3.1852412700653074, |
| "ce_loss_2": 4.72843554019928, |
| "ce_loss_4": 4.399932110309601, |
| "ce_loss_9": 3.7013972401618958, |
| "epoch": 0.125, |
| "grad_norm": 956.0, |
| "kl_loss_13": 341.65853118896484, |
| "kl_loss_2": 3142.8338256835937, |
| "kl_loss_4": 2516.4767456054688, |
| "kl_loss_9": 1144.9053802490234, |
| "learning_rate": 0.0009670739301325534, |
| "loss": 1795.8484, |
| "step": 1250 |
| }, |
| { |
| "ce_loss_13": 3.3055288195610046, |
| "ce_loss_17": 3.148827874660492, |
| "ce_loss_2": 4.6990382194519045, |
| "ce_loss_4": 4.362352633476258, |
| "ce_loss_9": 3.677418422698975, |
| "epoch": 0.126, |
| "grad_norm": 840.0, |
| "kl_loss_13": 351.054264831543, |
| "kl_loss_2": 3137.4232421875, |
| "kl_loss_4": 2514.1816162109376, |
| "kl_loss_9": 1166.7933227539063, |
| "learning_rate": 0.0009665053212208426, |
| "loss": 1819.1746, |
| "step": 1260 |
| }, |
| { |
| "ce_loss_13": 3.349940574169159, |
| "ce_loss_17": 3.187956917285919, |
| "ce_loss_2": 4.766580557823181, |
| "ce_loss_4": 4.428262984752655, |
| "ce_loss_9": 3.7286780834198, |
| "epoch": 0.127, |
| "grad_norm": 1528.0, |
| "kl_loss_13": 364.5601104736328, |
| "kl_loss_2": 3205.3591918945312, |
| "kl_loss_4": 2561.2893676757812, |
| "kl_loss_9": 1186.4169006347656, |
| "learning_rate": 0.0009659320146057262, |
| "loss": 1828.0889, |
| "step": 1270 |
| }, |
| { |
| "ce_loss_13": 3.3558884024620057, |
| "ce_loss_17": 3.199122393131256, |
| "ce_loss_2": 4.731974625587464, |
| "ce_loss_4": 4.404265213012695, |
| "ce_loss_9": 3.715572690963745, |
| "epoch": 0.128, |
| "grad_norm": 848.0, |
| "kl_loss_13": 361.27819366455077, |
| "kl_loss_2": 3142.15146484375, |
| "kl_loss_4": 2515.825732421875, |
| "kl_loss_9": 1151.0317626953124, |
| "learning_rate": 0.0009653540160603955, |
| "loss": 1793.2938, |
| "step": 1280 |
| }, |
| { |
| "ce_loss_13": 3.3554338812828064, |
| "ce_loss_17": 3.203555464744568, |
| "ce_loss_2": 4.71930513381958, |
| "ce_loss_4": 4.399723863601684, |
| "ce_loss_9": 3.7074398398399353, |
| "epoch": 0.129, |
| "grad_norm": 1016.0, |
| "kl_loss_13": 355.52637634277346, |
| "kl_loss_2": 3111.012487792969, |
| "kl_loss_4": 2516.2204833984374, |
| "kl_loss_9": 1141.2606292724608, |
| "learning_rate": 0.0009647713314052896, |
| "loss": 1767.6027, |
| "step": 1290 |
| }, |
| { |
| "ce_loss_13": 3.3171236634254457, |
| "ce_loss_17": 3.1485302209854127, |
| "ce_loss_2": 4.7571357727050785, |
| "ce_loss_4": 4.424438738822937, |
| "ce_loss_9": 3.694945514202118, |
| "epoch": 0.13, |
| "grad_norm": 940.0, |
| "kl_loss_13": 366.16795806884767, |
| "kl_loss_2": 3239.4711059570313, |
| "kl_loss_4": 2612.030554199219, |
| "kl_loss_9": 1188.488702392578, |
| "learning_rate": 0.0009641839665080363, |
| "loss": 1834.6273, |
| "step": 1300 |
| }, |
| { |
| "ce_loss_13": 3.277591037750244, |
| "ce_loss_17": 3.1221741437911987, |
| "ce_loss_2": 4.695172500610352, |
| "ce_loss_4": 4.366708278656006, |
| "ce_loss_9": 3.6467333793640138, |
| "epoch": 0.131, |
| "grad_norm": 984.0, |
| "kl_loss_13": 346.6986328125, |
| "kl_loss_2": 3177.282434082031, |
| "kl_loss_4": 2572.5327880859377, |
| "kl_loss_9": 1149.5988739013671, |
| "learning_rate": 0.0009635919272833937, |
| "loss": 1787.7705, |
| "step": 1310 |
| }, |
| { |
| "ce_loss_13": 3.304717409610748, |
| "ce_loss_17": 3.147184658050537, |
| "ce_loss_2": 4.727725172042847, |
| "ce_loss_4": 4.396122694015503, |
| "ce_loss_9": 3.7021288871765137, |
| "epoch": 0.132, |
| "grad_norm": 1056.0, |
| "kl_loss_13": 350.2265197753906, |
| "kl_loss_2": 3178.2564697265625, |
| "kl_loss_4": 2551.0311889648438, |
| "kl_loss_9": 1192.341815185547, |
| "learning_rate": 0.0009629952196931902, |
| "loss": 1784.1535, |
| "step": 1320 |
| }, |
| { |
| "ce_loss_13": 3.283700692653656, |
| "ce_loss_17": 3.137368988990784, |
| "ce_loss_2": 4.696712350845337, |
| "ce_loss_4": 4.364002633094787, |
| "ce_loss_9": 3.656220042705536, |
| "epoch": 0.133, |
| "grad_norm": 972.0, |
| "kl_loss_13": 340.9196014404297, |
| "kl_loss_2": 3167.00986328125, |
| "kl_loss_4": 2557.2827026367186, |
| "kl_loss_9": 1152.1808532714845, |
| "learning_rate": 0.0009623938497462645, |
| "loss": 1796.5461, |
| "step": 1330 |
| }, |
| { |
| "ce_loss_13": 3.280201959609985, |
| "ce_loss_17": 3.130404198169708, |
| "ce_loss_2": 4.687838745117188, |
| "ce_loss_4": 4.361579084396363, |
| "ce_loss_9": 3.658969283103943, |
| "epoch": 0.134, |
| "grad_norm": 916.0, |
| "kl_loss_13": 348.50816345214844, |
| "kl_loss_2": 3160.2753051757813, |
| "kl_loss_4": 2546.505615234375, |
| "kl_loss_9": 1165.6310821533202, |
| "learning_rate": 0.0009617878234984055, |
| "loss": 1816.8023, |
| "step": 1340 |
| }, |
| { |
| "ce_loss_13": 3.3739678263664246, |
| "ce_loss_17": 3.223553514480591, |
| "ce_loss_2": 4.7280254602432255, |
| "ce_loss_4": 4.403179931640625, |
| "ce_loss_9": 3.7230977177619935, |
| "epoch": 0.135, |
| "grad_norm": 908.0, |
| "kl_loss_13": 338.50958557128905, |
| "kl_loss_2": 3053.082067871094, |
| "kl_loss_4": 2449.660925292969, |
| "kl_loss_9": 1112.513897705078, |
| "learning_rate": 0.0009611771470522907, |
| "loss": 1768.7051, |
| "step": 1350 |
| }, |
| { |
| "ce_loss_13": 3.3069321751594543, |
| "ce_loss_17": 3.1484426617622376, |
| "ce_loss_2": 4.69906759262085, |
| "ce_loss_4": 4.3818199872970585, |
| "ce_loss_9": 3.676460826396942, |
| "epoch": 0.136, |
| "grad_norm": 888.0, |
| "kl_loss_13": 345.8798767089844, |
| "kl_loss_2": 3112.4317260742187, |
| "kl_loss_4": 2519.9077270507814, |
| "kl_loss_9": 1142.991879272461, |
| "learning_rate": 0.0009605618265574251, |
| "loss": 1760.6551, |
| "step": 1360 |
| }, |
| { |
| "ce_loss_13": 3.2838287234306334, |
| "ce_loss_17": 3.1186364889144897, |
| "ce_loss_2": 4.702053523063659, |
| "ce_loss_4": 4.377023506164551, |
| "ce_loss_9": 3.6757367730140684, |
| "epoch": 0.137, |
| "grad_norm": 1080.0, |
| "kl_loss_13": 365.806575012207, |
| "kl_loss_2": 3226.5471923828127, |
| "kl_loss_4": 2612.6611572265624, |
| "kl_loss_9": 1227.2938293457032, |
| "learning_rate": 0.0009599418682100792, |
| "loss": 1821.6424, |
| "step": 1370 |
| }, |
| { |
| "ce_loss_13": 3.3075397610664368, |
| "ce_loss_17": 3.1570510387420656, |
| "ce_loss_2": 4.705657339096069, |
| "ce_loss_4": 4.3784123182296755, |
| "ce_loss_9": 3.6774469375610352, |
| "epoch": 0.138, |
| "grad_norm": 888.0, |
| "kl_loss_13": 341.84042205810545, |
| "kl_loss_2": 3146.644287109375, |
| "kl_loss_4": 2534.15849609375, |
| "kl_loss_9": 1165.5805969238281, |
| "learning_rate": 0.0009593172782532268, |
| "loss": 1804.3691, |
| "step": 1380 |
| }, |
| { |
| "ce_loss_13": 3.3456047773361206, |
| "ce_loss_17": 3.1907795071601868, |
| "ce_loss_2": 4.721057319641114, |
| "ce_loss_4": 4.401040947437286, |
| "ce_loss_9": 3.7160727262496946, |
| "epoch": 0.139, |
| "grad_norm": 920.0, |
| "kl_loss_13": 349.742497253418, |
| "kl_loss_2": 3090.6633056640626, |
| "kl_loss_4": 2493.1666625976563, |
| "kl_loss_9": 1151.4166076660156, |
| "learning_rate": 0.0009586880629764817, |
| "loss": 1780.2971, |
| "step": 1390 |
| }, |
| { |
| "ce_loss_13": 3.2838189721107485, |
| "ce_loss_17": 3.1260019898414613, |
| "ce_loss_2": 4.682038426399231, |
| "ce_loss_4": 4.362703704833985, |
| "ce_loss_9": 3.660857653617859, |
| "epoch": 0.14, |
| "grad_norm": 900.0, |
| "kl_loss_13": 351.3061782836914, |
| "kl_loss_2": 3128.9981689453125, |
| "kl_loss_4": 2532.068395996094, |
| "kl_loss_9": 1163.7023803710938, |
| "learning_rate": 0.0009580542287160348, |
| "loss": 1767.2607, |
| "step": 1400 |
| }, |
| { |
| "ce_loss_13": 3.2470011115074158, |
| "ce_loss_17": 3.092609977722168, |
| "ce_loss_2": 4.651752281188965, |
| "ce_loss_4": 4.318781995773316, |
| "ce_loss_9": 3.609373915195465, |
| "epoch": 0.141, |
| "grad_norm": 944.0, |
| "kl_loss_13": 345.38047485351564, |
| "kl_loss_2": 3152.17041015625, |
| "kl_loss_4": 2540.3191772460937, |
| "kl_loss_9": 1138.7125671386718, |
| "learning_rate": 0.0009574157818545901, |
| "loss": 1769.2824, |
| "step": 1410 |
| }, |
| { |
| "ce_loss_13": 3.3160932898521422, |
| "ce_loss_17": 3.1630540251731873, |
| "ce_loss_2": 4.675372576713562, |
| "ce_loss_4": 4.359324955940247, |
| "ce_loss_9": 3.662063109874725, |
| "epoch": 0.142, |
| "grad_norm": 948.0, |
| "kl_loss_13": 335.5095245361328, |
| "kl_loss_2": 3061.304150390625, |
| "kl_loss_4": 2475.720349121094, |
| "kl_loss_9": 1107.0988647460938, |
| "learning_rate": 0.0009567727288213005, |
| "loss": 1780.9785, |
| "step": 1420 |
| }, |
| { |
| "ce_loss_13": 3.295659267902374, |
| "ce_loss_17": 3.1438552379608153, |
| "ce_loss_2": 4.68499231338501, |
| "ce_loss_4": 4.377166032791138, |
| "ce_loss_9": 3.669982576370239, |
| "epoch": 0.143, |
| "grad_norm": 924.0, |
| "kl_loss_13": 347.1465377807617, |
| "kl_loss_2": 3142.35185546875, |
| "kl_loss_4": 2562.173388671875, |
| "kl_loss_9": 1176.5192016601563, |
| "learning_rate": 0.0009561250760917027, |
| "loss": 1778.8293, |
| "step": 1430 |
| }, |
| { |
| "ce_loss_13": 3.309324586391449, |
| "ce_loss_17": 3.1523546457290648, |
| "ce_loss_2": 4.686427474021912, |
| "ce_loss_4": 4.371018195152283, |
| "ce_loss_9": 3.6777040481567385, |
| "epoch": 0.144, |
| "grad_norm": 904.0, |
| "kl_loss_13": 350.9153335571289, |
| "kl_loss_2": 3126.068408203125, |
| "kl_loss_4": 2533.664270019531, |
| "kl_loss_9": 1165.5422302246093, |
| "learning_rate": 0.0009554728301876525, |
| "loss": 1754.5764, |
| "step": 1440 |
| }, |
| { |
| "ce_loss_13": 3.3491756081581117, |
| "ce_loss_17": 3.197803294658661, |
| "ce_loss_2": 4.712909507751465, |
| "ce_loss_4": 4.389699935913086, |
| "ce_loss_9": 3.713365852832794, |
| "epoch": 0.145, |
| "grad_norm": 936.0, |
| "kl_loss_13": 343.36599884033205, |
| "kl_loss_2": 3068.6017333984373, |
| "kl_loss_4": 2472.229833984375, |
| "kl_loss_9": 1135.7852691650392, |
| "learning_rate": 0.0009548159976772592, |
| "loss": 1805.7918, |
| "step": 1450 |
| }, |
| { |
| "ce_loss_13": 3.3039668917655947, |
| "ce_loss_17": 3.1510722994804383, |
| "ce_loss_2": 4.7096524953842165, |
| "ce_loss_4": 4.391816759109497, |
| "ce_loss_9": 3.6755584955215452, |
| "epoch": 0.146, |
| "grad_norm": 848.0, |
| "kl_loss_13": 346.2204956054687, |
| "kl_loss_2": 3153.2633422851563, |
| "kl_loss_4": 2561.881311035156, |
| "kl_loss_9": 1152.6500183105468, |
| "learning_rate": 0.0009541545851748186, |
| "loss": 1781.8623, |
| "step": 1460 |
| }, |
| { |
| "ce_loss_13": 3.184967744350433, |
| "ce_loss_17": 3.028830909729004, |
| "ce_loss_2": 4.62920114994049, |
| "ce_loss_4": 4.306463098526001, |
| "ce_loss_9": 3.5654355883598328, |
| "epoch": 0.147, |
| "grad_norm": 988.0, |
| "kl_loss_13": 344.8602066040039, |
| "kl_loss_2": 3216.1061401367188, |
| "kl_loss_4": 2604.6420288085938, |
| "kl_loss_9": 1161.0882293701172, |
| "learning_rate": 0.0009534885993407473, |
| "loss": 1804.9313, |
| "step": 1470 |
| }, |
| { |
| "ce_loss_13": 3.343163049221039, |
| "ce_loss_17": 3.1838710069656373, |
| "ce_loss_2": 4.726481223106385, |
| "ce_loss_4": 4.410363483428955, |
| "ce_loss_9": 3.6973416209220886, |
| "epoch": 0.148, |
| "grad_norm": 896.0, |
| "kl_loss_13": 354.92088775634767, |
| "kl_loss_2": 3120.2980346679688, |
| "kl_loss_4": 2532.867138671875, |
| "kl_loss_9": 1132.5392517089845, |
| "learning_rate": 0.0009528180468815154, |
| "loss": 1790.3148, |
| "step": 1480 |
| }, |
| { |
| "ce_loss_13": 3.401682674884796, |
| "ce_loss_17": 3.2349347352981566, |
| "ce_loss_2": 4.736024785041809, |
| "ce_loss_4": 4.420929384231568, |
| "ce_loss_9": 3.7358593463897707, |
| "epoch": 0.149, |
| "grad_norm": 832.0, |
| "kl_loss_13": 384.67883148193357, |
| "kl_loss_2": 3053.905480957031, |
| "kl_loss_4": 2463.6721313476564, |
| "kl_loss_9": 1135.9265686035155, |
| "learning_rate": 0.0009521429345495787, |
| "loss": 1768.6777, |
| "step": 1490 |
| }, |
| { |
| "ce_loss_13": 3.376400685310364, |
| "ce_loss_17": 3.215477669239044, |
| "ce_loss_2": 4.695270037651062, |
| "ce_loss_4": 4.3831846714019775, |
| "ce_loss_9": 3.701464664936066, |
| "epoch": 0.15, |
| "grad_norm": 1112.0, |
| "kl_loss_13": 363.56079559326173, |
| "kl_loss_2": 3020.2793823242187, |
| "kl_loss_4": 2448.1266845703126, |
| "kl_loss_9": 1096.5532623291015, |
| "learning_rate": 0.0009514632691433108, |
| "loss": 1750.3266, |
| "step": 1500 |
| }, |
| { |
| "ce_loss_13": 3.3351935148239136, |
| "ce_loss_17": 3.1781211853027345, |
| "ce_loss_2": 4.7009505271911625, |
| "ce_loss_4": 4.377572560310364, |
| "ce_loss_9": 3.685502851009369, |
| "epoch": 0.151, |
| "grad_norm": 952.0, |
| "kl_loss_13": 360.2738861083984, |
| "kl_loss_2": 3101.499658203125, |
| "kl_loss_4": 2496.559680175781, |
| "kl_loss_9": 1124.5883026123047, |
| "learning_rate": 0.0009507790575069346, |
| "loss": 1773.4379, |
| "step": 1510 |
| }, |
| { |
| "ce_loss_13": 3.3083924889564513, |
| "ce_loss_17": 3.140878915786743, |
| "ce_loss_2": 4.70655038356781, |
| "ce_loss_4": 4.38455286026001, |
| "ce_loss_9": 3.6780938386917112, |
| "epoch": 0.152, |
| "grad_norm": 864.0, |
| "kl_loss_13": 360.31238250732423, |
| "kl_loss_2": 3151.769152832031, |
| "kl_loss_4": 2545.374401855469, |
| "kl_loss_9": 1154.7457763671875, |
| "learning_rate": 0.0009500903065304539, |
| "loss": 1815.1738, |
| "step": 1520 |
| }, |
| { |
| "ce_loss_13": 3.3356539011001587, |
| "ce_loss_17": 3.189967668056488, |
| "ce_loss_2": 4.676838684082031, |
| "ce_loss_4": 4.354623579978943, |
| "ce_loss_9": 3.67772753238678, |
| "epoch": 0.153, |
| "grad_norm": 936.0, |
| "kl_loss_13": 330.97486877441406, |
| "kl_loss_2": 3040.882946777344, |
| "kl_loss_4": 2432.934045410156, |
| "kl_loss_9": 1096.8438842773437, |
| "learning_rate": 0.0009493970231495835, |
| "loss": 1751.5031, |
| "step": 1530 |
| }, |
| { |
| "ce_loss_13": 3.276842999458313, |
| "ce_loss_17": 3.137658751010895, |
| "ce_loss_2": 4.610387825965882, |
| "ce_loss_4": 4.289961338043213, |
| "ce_loss_9": 3.6171847939491273, |
| "epoch": 0.154, |
| "grad_norm": 988.0, |
| "kl_loss_13": 324.7131378173828, |
| "kl_loss_2": 3029.9604248046876, |
| "kl_loss_4": 2431.1107421875, |
| "kl_loss_9": 1095.838232421875, |
| "learning_rate": 0.0009486992143456792, |
| "loss": 1717.2365, |
| "step": 1540 |
| }, |
| { |
| "ce_loss_13": 3.3048726439476015, |
| "ce_loss_17": 3.149159014225006, |
| "ce_loss_2": 4.752410364151001, |
| "ce_loss_4": 4.426711654663086, |
| "ce_loss_9": 3.6890255570411683, |
| "epoch": 0.155, |
| "grad_norm": 832.0, |
| "kl_loss_13": 351.6216766357422, |
| "kl_loss_2": 3255.118408203125, |
| "kl_loss_4": 2640.0526489257813, |
| "kl_loss_9": 1187.4871032714843, |
| "learning_rate": 0.0009479968871456679, |
| "loss": 1805.9711, |
| "step": 1550 |
| }, |
| { |
| "ce_loss_13": 3.273043465614319, |
| "ce_loss_17": 3.1271315097808836, |
| "ce_loss_2": 4.683293724060059, |
| "ce_loss_4": 4.357288956642151, |
| "ce_loss_9": 3.6377100229263304, |
| "epoch": 0.156, |
| "grad_norm": 992.0, |
| "kl_loss_13": 331.80619049072266, |
| "kl_loss_2": 3157.44267578125, |
| "kl_loss_4": 2550.610302734375, |
| "kl_loss_9": 1133.6968048095703, |
| "learning_rate": 0.0009472900486219768, |
| "loss": 1756.0287, |
| "step": 1560 |
| }, |
| { |
| "ce_loss_13": 3.263408601284027, |
| "ce_loss_17": 3.1160223960876463, |
| "ce_loss_2": 4.633613777160645, |
| "ce_loss_4": 4.307847380638123, |
| "ce_loss_9": 3.614348661899567, |
| "epoch": 0.157, |
| "grad_norm": 808.0, |
| "kl_loss_13": 331.19839935302736, |
| "kl_loss_2": 3094.4170776367187, |
| "kl_loss_4": 2477.1323364257814, |
| "kl_loss_9": 1116.4197570800782, |
| "learning_rate": 0.000946578705892462, |
| "loss": 1755.9688, |
| "step": 1570 |
| }, |
| { |
| "ce_loss_13": 3.300995659828186, |
| "ce_loss_17": 3.1548858761787413, |
| "ce_loss_2": 4.646145176887512, |
| "ce_loss_4": 4.328919124603272, |
| "ce_loss_9": 3.6465566158294678, |
| "epoch": 0.158, |
| "grad_norm": 884.0, |
| "kl_loss_13": 328.43347930908203, |
| "kl_loss_2": 3037.9541015625, |
| "kl_loss_4": 2446.363146972656, |
| "kl_loss_9": 1094.0669891357422, |
| "learning_rate": 0.0009458628661203367, |
| "loss": 1755.3217, |
| "step": 1580 |
| }, |
| { |
| "ce_loss_13": 3.3079935193061827, |
| "ce_loss_17": 3.1622170209884644, |
| "ce_loss_2": 4.716912245750427, |
| "ce_loss_4": 4.392867374420166, |
| "ce_loss_9": 3.6763587951660157, |
| "epoch": 0.159, |
| "grad_norm": 1144.0, |
| "kl_loss_13": 341.8176528930664, |
| "kl_loss_2": 3163.5356567382814, |
| "kl_loss_4": 2548.3068603515626, |
| "kl_loss_9": 1154.300845336914, |
| "learning_rate": 0.0009451425365140996, |
| "loss": 1744.6188, |
| "step": 1590 |
| }, |
| { |
| "ce_loss_13": 3.369904029369354, |
| "ce_loss_17": 3.223819315433502, |
| "ce_loss_2": 4.70569953918457, |
| "ce_loss_4": 4.383472967147827, |
| "ce_loss_9": 3.7201624870300294, |
| "epoch": 0.16, |
| "grad_norm": 932.0, |
| "kl_loss_13": 330.65557556152345, |
| "kl_loss_2": 3023.493786621094, |
| "kl_loss_4": 2408.998712158203, |
| "kl_loss_9": 1101.6724029541015, |
| "learning_rate": 0.0009444177243274617, |
| "loss": 1710.9596, |
| "step": 1600 |
| }, |
| { |
| "ce_loss_13": 3.2426217436790465, |
| "ce_loss_17": 3.0882898926734925, |
| "ce_loss_2": 4.63769805431366, |
| "ce_loss_4": 4.314783692359924, |
| "ce_loss_9": 3.609778642654419, |
| "epoch": 0.161, |
| "grad_norm": 996.0, |
| "kl_loss_13": 346.3702026367188, |
| "kl_loss_2": 3123.5116943359376, |
| "kl_loss_4": 2525.2113403320313, |
| "kl_loss_9": 1151.8468841552735, |
| "learning_rate": 0.0009436884368592739, |
| "loss": 1770.4922, |
| "step": 1610 |
| }, |
| { |
| "ce_loss_13": 3.28545196056366, |
| "ce_loss_17": 3.138417398929596, |
| "ce_loss_2": 4.634627103805542, |
| "ce_loss_4": 4.320222735404968, |
| "ce_loss_9": 3.636016881465912, |
| "epoch": 0.162, |
| "grad_norm": 900.0, |
| "kl_loss_13": 338.63466796875, |
| "kl_loss_2": 3048.8128173828127, |
| "kl_loss_4": 2448.1596069335938, |
| "kl_loss_9": 1109.2102783203125, |
| "learning_rate": 0.0009429546814534529, |
| "loss": 1768.9836, |
| "step": 1620 |
| }, |
| { |
| "ce_loss_13": 3.2956096053123476, |
| "ce_loss_17": 3.148363399505615, |
| "ce_loss_2": 4.633633232116699, |
| "ce_loss_4": 4.3194786548614506, |
| "ce_loss_9": 3.632928895950317, |
| "epoch": 0.163, |
| "grad_norm": 800.0, |
| "kl_loss_13": 348.34557037353517, |
| "kl_loss_2": 3018.760461425781, |
| "kl_loss_4": 2434.3108520507812, |
| "kl_loss_9": 1091.3109558105468, |
| "learning_rate": 0.0009422164654989072, |
| "loss": 1704.9285, |
| "step": 1630 |
| }, |
| { |
| "ce_loss_13": 3.4090987920761107, |
| "ce_loss_17": 3.2582162737846376, |
| "ce_loss_2": 4.720637488365173, |
| "ce_loss_4": 4.408244061470032, |
| "ce_loss_9": 3.741966998577118, |
| "epoch": 0.164, |
| "grad_norm": 1016.0, |
| "kl_loss_13": 343.50830230712893, |
| "kl_loss_2": 2985.711572265625, |
| "kl_loss_4": 2398.8918701171874, |
| "kl_loss_9": 1094.6207458496094, |
| "learning_rate": 0.0009414737964294635, |
| "loss": 1719.7918, |
| "step": 1640 |
| }, |
| { |
| "ce_loss_13": 3.3357781648635862, |
| "ce_loss_17": 3.190202260017395, |
| "ce_loss_2": 4.635064482688904, |
| "ce_loss_4": 4.324166345596313, |
| "ce_loss_9": 3.662205529212952, |
| "epoch": 0.165, |
| "grad_norm": 920.0, |
| "kl_loss_13": 329.86558837890624, |
| "kl_loss_2": 2929.6790283203127, |
| "kl_loss_4": 2343.7837646484377, |
| "kl_loss_9": 1054.6887329101562, |
| "learning_rate": 0.000940726681723791, |
| "loss": 1715.1156, |
| "step": 1650 |
| }, |
| { |
| "ce_loss_13": 3.1896764516830443, |
| "ce_loss_17": 3.0431617975234984, |
| "ce_loss_2": 4.596868777275086, |
| "ce_loss_4": 4.271089637279511, |
| "ce_loss_9": 3.5470924735069276, |
| "epoch": 0.166, |
| "grad_norm": 988.0, |
| "kl_loss_13": 340.5326095581055, |
| "kl_loss_2": 3162.3132568359374, |
| "kl_loss_4": 2560.507360839844, |
| "kl_loss_9": 1128.7283935546875, |
| "learning_rate": 0.0009399751289053266, |
| "loss": 1723.0844, |
| "step": 1660 |
| }, |
| { |
| "ce_loss_13": 3.379969835281372, |
| "ce_loss_17": 3.2356004118919373, |
| "ce_loss_2": 4.702561593055725, |
| "ce_loss_4": 4.37991418838501, |
| "ce_loss_9": 3.7151947379112245, |
| "epoch": 0.167, |
| "grad_norm": 824.0, |
| "kl_loss_13": 344.6328430175781, |
| "kl_loss_2": 3001.8953247070312, |
| "kl_loss_4": 2398.8171875, |
| "kl_loss_9": 1076.517510986328, |
| "learning_rate": 0.0009392191455421988, |
| "loss": 1737.8105, |
| "step": 1670 |
| }, |
| { |
| "ce_loss_13": 3.3747507095336915, |
| "ce_loss_17": 3.221015417575836, |
| "ce_loss_2": 4.69924635887146, |
| "ce_loss_4": 4.377769660949707, |
| "ce_loss_9": 3.7062151432037354, |
| "epoch": 0.168, |
| "grad_norm": 916.0, |
| "kl_loss_13": 360.6223419189453, |
| "kl_loss_2": 3038.3630126953126, |
| "kl_loss_4": 2435.707653808594, |
| "kl_loss_9": 1111.2371643066406, |
| "learning_rate": 0.0009384587392471515, |
| "loss": 1702.0465, |
| "step": 1680 |
| }, |
| { |
| "ce_loss_13": 3.3501906871795653, |
| "ce_loss_17": 3.2089019536972048, |
| "ce_loss_2": 4.647149395942688, |
| "ce_loss_4": 4.326480281352997, |
| "ce_loss_9": 3.6803701519966125, |
| "epoch": 0.169, |
| "grad_norm": 1096.0, |
| "kl_loss_13": 325.1235046386719, |
| "kl_loss_2": 2948.4739501953127, |
| "kl_loss_4": 2356.395672607422, |
| "kl_loss_9": 1063.064730834961, |
| "learning_rate": 0.0009376939176774678, |
| "loss": 1680.5014, |
| "step": 1690 |
| }, |
| { |
| "ce_loss_13": 3.3312074303627015, |
| "ce_loss_17": 3.1841515064239503, |
| "ce_loss_2": 4.667434883117676, |
| "ce_loss_4": 4.351763486862183, |
| "ce_loss_9": 3.6748557925224303, |
| "epoch": 0.17, |
| "grad_norm": 852.0, |
| "kl_loss_13": 325.2788650512695, |
| "kl_loss_2": 3007.161828613281, |
| "kl_loss_4": 2411.526745605469, |
| "kl_loss_9": 1091.346240234375, |
| "learning_rate": 0.0009369246885348925, |
| "loss": 1734.7703, |
| "step": 1700 |
| }, |
| { |
| "ce_loss_13": 3.3149147748947145, |
| "ce_loss_17": 3.17114896774292, |
| "ce_loss_2": 4.694669222831726, |
| "ce_loss_4": 4.369971537590027, |
| "ce_loss_9": 3.6792506098747255, |
| "epoch": 0.171, |
| "grad_norm": 980.0, |
| "kl_loss_13": 333.1719375610352, |
| "kl_loss_2": 3092.3607788085938, |
| "kl_loss_4": 2495.0754638671874, |
| "kl_loss_9": 1121.8295806884767, |
| "learning_rate": 0.0009361510595655545, |
| "loss": 1742.2516, |
| "step": 1710 |
| }, |
| { |
| "ce_loss_13": 3.284603452682495, |
| "ce_loss_17": 3.1317435264587403, |
| "ce_loss_2": 4.63212902545929, |
| "ce_loss_4": 4.31629102230072, |
| "ce_loss_9": 3.6394808650016786, |
| "epoch": 0.172, |
| "grad_norm": 952.0, |
| "kl_loss_13": 343.11474151611327, |
| "kl_loss_2": 3064.3009521484373, |
| "kl_loss_4": 2478.7039184570312, |
| "kl_loss_9": 1128.8363525390625, |
| "learning_rate": 0.0009353730385598887, |
| "loss": 1739.8842, |
| "step": 1720 |
| }, |
| { |
| "ce_loss_13": 3.209711420536041, |
| "ce_loss_17": 3.0663392186164855, |
| "ce_loss_2": 4.601539134979248, |
| "ce_loss_4": 4.284777450561523, |
| "ce_loss_9": 3.5684893131256104, |
| "epoch": 0.173, |
| "grad_norm": 940.0, |
| "kl_loss_13": 331.15040588378906, |
| "kl_loss_2": 3114.6553955078125, |
| "kl_loss_4": 2526.00810546875, |
| "kl_loss_9": 1117.340380859375, |
| "learning_rate": 0.0009345906333525581, |
| "loss": 1763.7367, |
| "step": 1730 |
| }, |
| { |
| "ce_loss_13": 3.2470051884651183, |
| "ce_loss_17": 3.100960433483124, |
| "ce_loss_2": 4.612321376800537, |
| "ce_loss_4": 4.295753347873688, |
| "ce_loss_9": 3.604890739917755, |
| "epoch": 0.174, |
| "grad_norm": 948.0, |
| "kl_loss_13": 338.6273529052734, |
| "kl_loss_2": 3088.3747314453126, |
| "kl_loss_4": 2493.0422485351564, |
| "kl_loss_9": 1116.8533142089843, |
| "learning_rate": 0.0009338038518223745, |
| "loss": 1728.3164, |
| "step": 1740 |
| }, |
| { |
| "ce_loss_13": 3.315132188796997, |
| "ce_loss_17": 3.1622018218040466, |
| "ce_loss_2": 4.669206261634827, |
| "ce_loss_4": 4.3636315822601315, |
| "ce_loss_9": 3.673261833190918, |
| "epoch": 0.175, |
| "grad_norm": 944.0, |
| "kl_loss_13": 344.2067535400391, |
| "kl_loss_2": 3097.088916015625, |
| "kl_loss_4": 2509.694616699219, |
| "kl_loss_9": 1135.1895111083984, |
| "learning_rate": 0.0009330127018922195, |
| "loss": 1786.5902, |
| "step": 1750 |
| }, |
| { |
| "ce_loss_13": 3.265569067001343, |
| "ce_loss_17": 3.121223747730255, |
| "ce_loss_2": 4.617794132232666, |
| "ce_loss_4": 4.3092085242271425, |
| "ce_loss_9": 3.618662393093109, |
| "epoch": 0.176, |
| "grad_norm": 948.0, |
| "kl_loss_13": 326.6499877929688, |
| "kl_loss_2": 3067.448449707031, |
| "kl_loss_4": 2483.7280395507814, |
| "kl_loss_9": 1113.4848236083985, |
| "learning_rate": 0.0009322171915289634, |
| "loss": 1740.409, |
| "step": 1760 |
| }, |
| { |
| "ce_loss_13": 3.300253987312317, |
| "ce_loss_17": 3.166309428215027, |
| "ce_loss_2": 4.6185898065567015, |
| "ce_loss_4": 4.3082571744918825, |
| "ce_loss_9": 3.6409834384918214, |
| "epoch": 0.177, |
| "grad_norm": 804.0, |
| "kl_loss_13": 320.72338409423827, |
| "kl_loss_2": 2991.6572509765624, |
| "kl_loss_4": 2409.6597290039062, |
| "kl_loss_9": 1087.1696472167969, |
| "learning_rate": 0.0009314173287433873, |
| "loss": 1687.1207, |
| "step": 1770 |
| }, |
| { |
| "ce_loss_13": 3.289630877971649, |
| "ce_loss_17": 3.142843008041382, |
| "ce_loss_2": 4.626211190223694, |
| "ce_loss_4": 4.3087324619293215, |
| "ce_loss_9": 3.631891739368439, |
| "epoch": 0.178, |
| "grad_norm": 992.0, |
| "kl_loss_13": 329.1949890136719, |
| "kl_loss_2": 3022.0464599609377, |
| "kl_loss_4": 2426.825518798828, |
| "kl_loss_9": 1093.789859008789, |
| "learning_rate": 0.0009306131215901003, |
| "loss": 1690.7082, |
| "step": 1780 |
| }, |
| { |
| "ce_loss_13": 3.3200312733650206, |
| "ce_loss_17": 3.176234519481659, |
| "ce_loss_2": 4.648792433738708, |
| "ce_loss_4": 4.332716870307922, |
| "ce_loss_9": 3.660875880718231, |
| "epoch": 0.179, |
| "grad_norm": 924.0, |
| "kl_loss_13": 330.60894775390625, |
| "kl_loss_2": 3001.9236328125, |
| "kl_loss_4": 2407.71240234375, |
| "kl_loss_9": 1086.3715362548828, |
| "learning_rate": 0.0009298045781674596, |
| "loss": 1675.8539, |
| "step": 1790 |
| }, |
| { |
| "ce_loss_13": 3.300528514385223, |
| "ce_loss_17": 3.1608885645866396, |
| "ce_loss_2": 4.610639953613282, |
| "ce_loss_4": 4.2958252906799315, |
| "ce_loss_9": 3.635425591468811, |
| "epoch": 0.18, |
| "grad_norm": 932.0, |
| "kl_loss_13": 333.2683532714844, |
| "kl_loss_2": 2965.7910766601562, |
| "kl_loss_4": 2381.9828247070313, |
| "kl_loss_9": 1066.5880493164063, |
| "learning_rate": 0.0009289917066174886, |
| "loss": 1704.6016, |
| "step": 1800 |
| }, |
| { |
| "ce_loss_13": 3.2990561604499815, |
| "ce_loss_17": 3.1590625047683716, |
| "ce_loss_2": 4.5701922416687015, |
| "ce_loss_4": 4.261891508102417, |
| "ce_loss_9": 3.6111907005310058, |
| "epoch": 0.181, |
| "grad_norm": 1032.0, |
| "kl_loss_13": 327.1022033691406, |
| "kl_loss_2": 2911.3859741210936, |
| "kl_loss_4": 2326.5006469726563, |
| "kl_loss_9": 1023.0532073974609, |
| "learning_rate": 0.0009281745151257945, |
| "loss": 1664.785, |
| "step": 1810 |
| }, |
| { |
| "ce_loss_13": 3.3196240186691286, |
| "ce_loss_17": 3.173276627063751, |
| "ce_loss_2": 4.6561943054199215, |
| "ce_loss_4": 4.331534540653228, |
| "ce_loss_9": 3.6514352560043335, |
| "epoch": 0.182, |
| "grad_norm": 840.0, |
| "kl_loss_13": 332.4508880615234, |
| "kl_loss_2": 3007.704626464844, |
| "kl_loss_4": 2394.2581420898437, |
| "kl_loss_9": 1064.9154327392578, |
| "learning_rate": 0.0009273530119214868, |
| "loss": 1710.0805, |
| "step": 1820 |
| }, |
| { |
| "ce_loss_13": 3.399492025375366, |
| "ce_loss_17": 3.2670570254325866, |
| "ce_loss_2": 4.707061719894409, |
| "ce_loss_4": 4.408486175537109, |
| "ce_loss_9": 3.726747679710388, |
| "epoch": 0.183, |
| "grad_norm": 832.0, |
| "kl_loss_13": 315.56218872070315, |
| "kl_loss_2": 2941.9202392578127, |
| "kl_loss_4": 2377.2765686035154, |
| "kl_loss_9": 1050.3401092529298, |
| "learning_rate": 0.0009265272052770935, |
| "loss": 1658.6164, |
| "step": 1830 |
| }, |
| { |
| "ce_loss_13": 3.241188955307007, |
| "ce_loss_17": 3.097905230522156, |
| "ce_loss_2": 4.615456485748291, |
| "ce_loss_4": 4.291520380973816, |
| "ce_loss_9": 3.5967069983482363, |
| "epoch": 0.184, |
| "grad_norm": 1064.0, |
| "kl_loss_13": 319.540576171875, |
| "kl_loss_2": 3048.9252685546876, |
| "kl_loss_4": 2446.2099609375, |
| "kl_loss_9": 1090.7315155029296, |
| "learning_rate": 0.0009256971035084784, |
| "loss": 1717.7367, |
| "step": 1840 |
| }, |
| { |
| "ce_loss_13": 3.183176326751709, |
| "ce_loss_17": 3.032970154285431, |
| "ce_loss_2": 4.578329372406006, |
| "ce_loss_4": 4.25200344324112, |
| "ce_loss_9": 3.546638536453247, |
| "epoch": 0.185, |
| "grad_norm": 1056.0, |
| "kl_loss_13": 329.8408660888672, |
| "kl_loss_2": 3132.689697265625, |
| "kl_loss_4": 2521.613671875, |
| "kl_loss_9": 1132.6336364746094, |
| "learning_rate": 0.0009248627149747573, |
| "loss": 1736.5449, |
| "step": 1850 |
| }, |
| { |
| "ce_loss_13": 3.3700860023498533, |
| "ce_loss_17": 3.235945498943329, |
| "ce_loss_2": 4.684654283523559, |
| "ce_loss_4": 4.370312261581421, |
| "ce_loss_9": 3.70113662481308, |
| "epoch": 0.186, |
| "grad_norm": 912.0, |
| "kl_loss_13": 311.49042053222655, |
| "kl_loss_2": 2974.7622314453124, |
| "kl_loss_4": 2394.7427490234377, |
| "kl_loss_9": 1062.0761749267579, |
| "learning_rate": 0.0009240240480782129, |
| "loss": 1686.3809, |
| "step": 1860 |
| }, |
| { |
| "ce_loss_13": 3.271839952468872, |
| "ce_loss_17": 3.1345713257789614, |
| "ce_loss_2": 4.622663855552673, |
| "ce_loss_4": 4.309623444080353, |
| "ce_loss_9": 3.6245725989341735, |
| "epoch": 0.187, |
| "grad_norm": 848.0, |
| "kl_loss_13": 314.02408294677736, |
| "kl_loss_2": 3021.1706420898436, |
| "kl_loss_4": 2439.18779296875, |
| "kl_loss_9": 1089.5779876708984, |
| "learning_rate": 0.0009231811112642122, |
| "loss": 1690.423, |
| "step": 1870 |
| }, |
| { |
| "ce_loss_13": 3.3170746207237243, |
| "ce_loss_17": 3.1826101899147035, |
| "ce_loss_2": 4.617326879501343, |
| "ce_loss_4": 4.297490060329437, |
| "ce_loss_9": 3.651800787448883, |
| "epoch": 0.188, |
| "grad_norm": 932.0, |
| "kl_loss_13": 317.5179779052734, |
| "kl_loss_2": 2943.584130859375, |
| "kl_loss_4": 2355.8169372558596, |
| "kl_loss_9": 1067.2879119873046, |
| "learning_rate": 0.0009223339130211192, |
| "loss": 1675.9391, |
| "step": 1880 |
| }, |
| { |
| "ce_loss_13": 3.1827825784683226, |
| "ce_loss_17": 3.0472137331962585, |
| "ce_loss_2": 4.543703174591064, |
| "ce_loss_4": 4.2226891040802, |
| "ce_loss_9": 3.5217792987823486, |
| "epoch": 0.189, |
| "grad_norm": 908.0, |
| "kl_loss_13": 310.95121307373046, |
| "kl_loss_2": 3050.8374145507814, |
| "kl_loss_4": 2454.386279296875, |
| "kl_loss_9": 1069.45693359375, |
| "learning_rate": 0.0009214824618802108, |
| "loss": 1710.3328, |
| "step": 1890 |
| }, |
| { |
| "ce_loss_13": 3.3579103708267213, |
| "ce_loss_17": 3.214918923377991, |
| "ce_loss_2": 4.685916471481323, |
| "ce_loss_4": 4.363340485095978, |
| "ce_loss_9": 3.6941242694854735, |
| "epoch": 0.19, |
| "grad_norm": 920.0, |
| "kl_loss_13": 325.14684143066404, |
| "kl_loss_2": 2989.9252075195313, |
| "kl_loss_4": 2387.9420776367188, |
| "kl_loss_9": 1071.7905395507812, |
| "learning_rate": 0.0009206267664155906, |
| "loss": 1728.9008, |
| "step": 1900 |
| }, |
| { |
| "ce_loss_13": 3.280672717094421, |
| "ce_loss_17": 3.1395169854164124, |
| "ce_loss_2": 4.616325831413269, |
| "ce_loss_4": 4.299834370613098, |
| "ce_loss_9": 3.6235618352890016, |
| "epoch": 0.191, |
| "grad_norm": 1096.0, |
| "kl_loss_13": 320.41553192138673, |
| "kl_loss_2": 3002.745715332031, |
| "kl_loss_4": 2409.4244873046873, |
| "kl_loss_9": 1058.2708099365234, |
| "learning_rate": 0.0009197668352441024, |
| "loss": 1707.7363, |
| "step": 1910 |
| }, |
| { |
| "ce_loss_13": 3.3297136664390563, |
| "ce_loss_17": 3.1943230509757994, |
| "ce_loss_2": 4.650165748596192, |
| "ce_loss_4": 4.332601535320282, |
| "ce_loss_9": 3.6573877334594727, |
| "epoch": 0.192, |
| "grad_norm": 972.0, |
| "kl_loss_13": 310.84013214111326, |
| "kl_loss_2": 2974.4477416992186, |
| "kl_loss_4": 2371.4453369140624, |
| "kl_loss_9": 1049.8490325927735, |
| "learning_rate": 0.0009189026770252437, |
| "loss": 1686.9504, |
| "step": 1920 |
| }, |
| { |
| "ce_loss_13": 3.3480990052223207, |
| "ce_loss_17": 3.2176770210266112, |
| "ce_loss_2": 4.678257179260254, |
| "ce_loss_4": 4.3551212549209595, |
| "ce_loss_9": 3.6879690051078797, |
| "epoch": 0.193, |
| "grad_norm": 976.0, |
| "kl_loss_13": 311.2315979003906, |
| "kl_loss_2": 2980.5617553710936, |
| "kl_loss_4": 2380.5751770019533, |
| "kl_loss_9": 1060.0710479736329, |
| "learning_rate": 0.000918034300461078, |
| "loss": 1742.0033, |
| "step": 1930 |
| }, |
| { |
| "ce_loss_13": 3.382282018661499, |
| "ce_loss_17": 3.2473576068878174, |
| "ce_loss_2": 4.680280494689941, |
| "ce_loss_4": 4.367475247383117, |
| "ce_loss_9": 3.709751844406128, |
| "epoch": 0.194, |
| "grad_norm": 956.0, |
| "kl_loss_13": 312.8464126586914, |
| "kl_loss_2": 2935.0189819335938, |
| "kl_loss_4": 2345.4461364746094, |
| "kl_loss_9": 1046.4751770019532, |
| "learning_rate": 0.0009171617142961477, |
| "loss": 1670.1602, |
| "step": 1940 |
| }, |
| { |
| "ce_loss_13": 3.340591549873352, |
| "ce_loss_17": 3.205805504322052, |
| "ce_loss_2": 4.650496292114258, |
| "ce_loss_4": 4.33450448513031, |
| "ce_loss_9": 3.670855188369751, |
| "epoch": 0.195, |
| "grad_norm": 1072.0, |
| "kl_loss_13": 309.28123779296874, |
| "kl_loss_2": 2949.9548950195312, |
| "kl_loss_4": 2360.7995361328126, |
| "kl_loss_9": 1049.5942596435548, |
| "learning_rate": 0.0009162849273173857, |
| "loss": 1676.3613, |
| "step": 1950 |
| }, |
| { |
| "ce_loss_13": 3.2835671663284303, |
| "ce_loss_17": 3.1486000180244447, |
| "ce_loss_2": 4.607069182395935, |
| "ce_loss_4": 4.287353086471557, |
| "ce_loss_9": 3.613594961166382, |
| "epoch": 0.196, |
| "grad_norm": 820.0, |
| "kl_loss_13": 305.7414825439453, |
| "kl_loss_2": 2973.151953125, |
| "kl_loss_4": 2376.4894104003906, |
| "kl_loss_9": 1036.030709838867, |
| "learning_rate": 0.0009154039483540273, |
| "loss": 1688.3225, |
| "step": 1960 |
| }, |
| { |
| "ce_loss_13": 3.2589982867240908, |
| "ce_loss_17": 3.128650724887848, |
| "ce_loss_2": 4.603858280181885, |
| "ce_loss_4": 4.276942348480224, |
| "ce_loss_9": 3.595140290260315, |
| "epoch": 0.197, |
| "grad_norm": 1048.0, |
| "kl_loss_13": 306.42955474853517, |
| "kl_loss_2": 3011.138049316406, |
| "kl_loss_4": 2407.12412109375, |
| "kl_loss_9": 1056.911538696289, |
| "learning_rate": 0.0009145187862775209, |
| "loss": 1688.5338, |
| "step": 1970 |
| }, |
| { |
| "ce_loss_13": 3.2973819851875303, |
| "ce_loss_17": 3.1623541831970217, |
| "ce_loss_2": 4.6057007074356076, |
| "ce_loss_4": 4.289049386978149, |
| "ce_loss_9": 3.6262983322143554, |
| "epoch": 0.198, |
| "grad_norm": 900.0, |
| "kl_loss_13": 315.20320587158204, |
| "kl_loss_2": 2968.1071044921873, |
| "kl_loss_4": 2371.3665771484375, |
| "kl_loss_9": 1055.8795623779297, |
| "learning_rate": 0.0009136294500014386, |
| "loss": 1672.8637, |
| "step": 1980 |
| }, |
| { |
| "ce_loss_13": 3.246236276626587, |
| "ce_loss_17": 3.1092082142829893, |
| "ce_loss_2": 4.62981595993042, |
| "ce_loss_4": 4.3133632898330685, |
| "ce_loss_9": 3.593501567840576, |
| "epoch": 0.199, |
| "grad_norm": 1208.0, |
| "kl_loss_13": 315.2277770996094, |
| "kl_loss_2": 3042.6927734375, |
| "kl_loss_4": 2459.7537841796875, |
| "kl_loss_9": 1064.27587890625, |
| "learning_rate": 0.000912735948481387, |
| "loss": 1718.3559, |
| "step": 1990 |
| }, |
| { |
| "ce_loss_13": 3.280001187324524, |
| "ce_loss_17": 3.1462537169456484, |
| "ce_loss_2": 4.606158256530762, |
| "ce_loss_4": 4.289944136142731, |
| "ce_loss_9": 3.6153658390045167, |
| "epoch": 0.2, |
| "grad_norm": 1200.0, |
| "kl_loss_13": 315.1036407470703, |
| "kl_loss_2": 2995.057080078125, |
| "kl_loss_4": 2402.257849121094, |
| "kl_loss_9": 1067.4875183105469, |
| "learning_rate": 0.0009118382907149164, |
| "loss": 1667.1238, |
| "step": 2000 |
| }, |
| { |
| "ce_loss_13": 3.3050238132476806, |
| "ce_loss_17": 3.167179322242737, |
| "ce_loss_2": 4.620061135292053, |
| "ce_loss_4": 4.294731092453003, |
| "ce_loss_9": 3.6310877203941345, |
| "epoch": 0.201, |
| "grad_norm": 948.0, |
| "kl_loss_13": 316.0277557373047, |
| "kl_loss_2": 2953.5430908203125, |
| "kl_loss_4": 2348.8698608398436, |
| "kl_loss_9": 1051.2647247314453, |
| "learning_rate": 0.0009109364857414306, |
| "loss": 1662.3219, |
| "step": 2010 |
| }, |
| { |
| "ce_loss_13": 3.267809200286865, |
| "ce_loss_17": 3.134126341342926, |
| "ce_loss_2": 4.579069185256958, |
| "ce_loss_4": 4.265661716461182, |
| "ce_loss_9": 3.5956831574440002, |
| "epoch": 0.202, |
| "grad_norm": 988.0, |
| "kl_loss_13": 317.3951782226562, |
| "kl_loss_2": 2973.852587890625, |
| "kl_loss_4": 2382.701037597656, |
| "kl_loss_9": 1047.444317626953, |
| "learning_rate": 0.0009100305426420956, |
| "loss": 1708.768, |
| "step": 2020 |
| }, |
| { |
| "ce_loss_13": 3.2384045004844664, |
| "ce_loss_17": 3.1056310653686525, |
| "ce_loss_2": 4.61571741104126, |
| "ce_loss_4": 4.29548032283783, |
| "ce_loss_9": 3.575000691413879, |
| "epoch": 0.203, |
| "grad_norm": 912.0, |
| "kl_loss_13": 316.5642684936523, |
| "kl_loss_2": 3089.9134765625, |
| "kl_loss_4": 2494.646588134766, |
| "kl_loss_9": 1071.6087829589844, |
| "learning_rate": 0.0009091204705397484, |
| "loss": 1699.0529, |
| "step": 2030 |
| }, |
| { |
| "ce_loss_13": 3.229975736141205, |
| "ce_loss_17": 3.0909839868545532, |
| "ce_loss_2": 4.593655323982238, |
| "ce_loss_4": 4.2776727199554445, |
| "ce_loss_9": 3.5776746034622193, |
| "epoch": 0.204, |
| "grad_norm": 844.0, |
| "kl_loss_13": 321.4533996582031, |
| "kl_loss_2": 3081.678845214844, |
| "kl_loss_4": 2482.3412658691404, |
| "kl_loss_9": 1077.1077087402343, |
| "learning_rate": 0.0009082062785988049, |
| "loss": 1709.3756, |
| "step": 2040 |
| }, |
| { |
| "ce_loss_13": 3.366510272026062, |
| "ce_loss_17": 3.2245919585227965, |
| "ce_loss_2": 4.631314325332641, |
| "ce_loss_4": 4.322544360160828, |
| "ce_loss_9": 3.6698357462882996, |
| "epoch": 0.205, |
| "grad_norm": 1012.0, |
| "kl_loss_13": 333.6727294921875, |
| "kl_loss_2": 2903.525927734375, |
| "kl_loss_4": 2323.128173828125, |
| "kl_loss_9": 1028.489340209961, |
| "learning_rate": 0.0009072879760251679, |
| "loss": 1671.6572, |
| "step": 2050 |
| }, |
| { |
| "ce_loss_13": 3.342466855049133, |
| "ce_loss_17": 3.1741410851478578, |
| "ce_loss_2": 4.641148495674133, |
| "ce_loss_4": 4.33937566280365, |
| "ce_loss_9": 3.653992712497711, |
| "epoch": 0.206, |
| "grad_norm": 1120.0, |
| "kl_loss_13": 378.0567794799805, |
| "kl_loss_2": 3017.975341796875, |
| "kl_loss_4": 2453.679333496094, |
| "kl_loss_9": 1072.9305938720704, |
| "learning_rate": 0.0009063655720661341, |
| "loss": 1706.7299, |
| "step": 2060 |
| }, |
| { |
| "ce_loss_13": 3.3735498785972595, |
| "ce_loss_17": 3.214876985549927, |
| "ce_loss_2": 4.629880380630493, |
| "ce_loss_4": 4.321504092216491, |
| "ce_loss_9": 3.66883327960968, |
| "epoch": 0.207, |
| "grad_norm": 868.0, |
| "kl_loss_13": 367.0061889648438, |
| "kl_loss_2": 2900.9889404296873, |
| "kl_loss_4": 2323.9548828125, |
| "kl_loss_9": 1041.0056579589843, |
| "learning_rate": 0.000905439076010301, |
| "loss": 1671.4455, |
| "step": 2070 |
| }, |
| { |
| "ce_loss_13": 3.3185313940048218, |
| "ce_loss_17": 3.1657449841499328, |
| "ce_loss_2": 4.630046439170838, |
| "ce_loss_4": 4.310200715065003, |
| "ce_loss_9": 3.6360178709030153, |
| "epoch": 0.208, |
| "grad_norm": 980.0, |
| "kl_loss_13": 350.2437454223633, |
| "kl_loss_2": 2983.597509765625, |
| "kl_loss_4": 2383.0232177734374, |
| "kl_loss_9": 1056.0640838623046, |
| "learning_rate": 0.0009045084971874737, |
| "loss": 1660.3988, |
| "step": 2080 |
| }, |
| { |
| "ce_loss_13": 3.295871877670288, |
| "ce_loss_17": 3.1508471012115478, |
| "ce_loss_2": 4.600740051269531, |
| "ce_loss_4": 4.283790171146393, |
| "ce_loss_9": 3.621567988395691, |
| "epoch": 0.209, |
| "grad_norm": 812.0, |
| "kl_loss_13": 336.4440521240234, |
| "kl_loss_2": 2960.8560546875, |
| "kl_loss_4": 2367.225506591797, |
| "kl_loss_9": 1059.256381225586, |
| "learning_rate": 0.0009035738449685707, |
| "loss": 1701.1732, |
| "step": 2090 |
| }, |
| { |
| "ce_loss_13": 3.2303785562515257, |
| "ce_loss_17": 3.089155077934265, |
| "ce_loss_2": 4.603342223167419, |
| "ce_loss_4": 4.285797762870788, |
| "ce_loss_9": 3.5784525632858277, |
| "epoch": 0.21, |
| "grad_norm": 840.0, |
| "kl_loss_13": 322.00733337402346, |
| "kl_loss_2": 3068.8140380859377, |
| "kl_loss_4": 2478.9049438476563, |
| "kl_loss_9": 1084.0568939208983, |
| "learning_rate": 0.0009026351287655293, |
| "loss": 1689.6162, |
| "step": 2100 |
| }, |
| { |
| "ce_loss_13": 3.4153374671936034, |
| "ce_loss_17": 3.2879414796829223, |
| "ce_loss_2": 4.645143985748291, |
| "ce_loss_4": 4.348204255104065, |
| "ce_loss_9": 3.727934491634369, |
| "epoch": 0.211, |
| "grad_norm": 992.0, |
| "kl_loss_13": 302.51541900634766, |
| "kl_loss_2": 2800.839453125, |
| "kl_loss_4": 2238.2302062988283, |
| "kl_loss_9": 1004.9395477294922, |
| "learning_rate": 0.0009016923580312113, |
| "loss": 1599.1767, |
| "step": 2110 |
| }, |
| { |
| "ce_loss_13": 3.281217110157013, |
| "ce_loss_17": 3.1498187303543093, |
| "ce_loss_2": 4.581685590744018, |
| "ce_loss_4": 4.264639461040497, |
| "ce_loss_9": 3.600258469581604, |
| "epoch": 0.212, |
| "grad_norm": 1088.0, |
| "kl_loss_13": 308.59120025634763, |
| "kl_loss_2": 2939.0564819335937, |
| "kl_loss_4": 2342.163293457031, |
| "kl_loss_9": 1041.1623565673829, |
| "learning_rate": 0.0009007455422593077, |
| "loss": 1683.9701, |
| "step": 2120 |
| }, |
| { |
| "ce_loss_13": 3.2921030044555666, |
| "ce_loss_17": 3.158948540687561, |
| "ce_loss_2": 4.626010632514953, |
| "ce_loss_4": 4.318738889694214, |
| "ce_loss_9": 3.640748620033264, |
| "epoch": 0.213, |
| "grad_norm": 948.0, |
| "kl_loss_13": 311.5044937133789, |
| "kl_loss_2": 3003.3541870117188, |
| "kl_loss_4": 2421.8424682617188, |
| "kl_loss_9": 1084.6547302246095, |
| "learning_rate": 0.0008997946909842425, |
| "loss": 1701.5732, |
| "step": 2130 |
| }, |
| { |
| "ce_loss_13": 3.3131263256073, |
| "ce_loss_17": 3.1681451320648195, |
| "ce_loss_2": 4.682714056968689, |
| "ce_loss_4": 4.374153351783752, |
| "ce_loss_9": 3.672535574436188, |
| "epoch": 0.214, |
| "grad_norm": 1048.0, |
| "kl_loss_13": 324.59002990722655, |
| "kl_loss_2": 3085.134167480469, |
| "kl_loss_4": 2508.9499389648436, |
| "kl_loss_9": 1107.0599670410156, |
| "learning_rate": 0.0008988398137810777, |
| "loss": 1692.1084, |
| "step": 2140 |
| }, |
| { |
| "ce_loss_13": 3.3412655711174013, |
| "ce_loss_17": 3.207951271533966, |
| "ce_loss_2": 4.63103597164154, |
| "ce_loss_4": 4.3216476202011105, |
| "ce_loss_9": 3.6678178668022157, |
| "epoch": 0.215, |
| "grad_norm": 1120.0, |
| "kl_loss_13": 312.88766326904295, |
| "kl_loss_2": 2911.307897949219, |
| "kl_loss_4": 2329.045068359375, |
| "kl_loss_9": 1038.3618103027343, |
| "learning_rate": 0.0008978809202654162, |
| "loss": 1641.8588, |
| "step": 2150 |
| }, |
| { |
| "ce_loss_13": 3.3228820323944093, |
| "ce_loss_17": 3.1840418100357057, |
| "ce_loss_2": 4.621336102485657, |
| "ce_loss_4": 4.308529698848725, |
| "ce_loss_9": 3.6487194180488585, |
| "epoch": 0.216, |
| "grad_norm": 1080.0, |
| "kl_loss_13": 316.93070831298826, |
| "kl_loss_2": 2910.083215332031, |
| "kl_loss_4": 2330.002093505859, |
| "kl_loss_9": 1032.5771789550781, |
| "learning_rate": 0.0008969180200933046, |
| "loss": 1670.8207, |
| "step": 2160 |
| }, |
| { |
| "ce_loss_13": 3.3064399003982543, |
| "ce_loss_17": 3.1452136397361756, |
| "ce_loss_2": 4.631820559501648, |
| "ce_loss_4": 4.319561338424682, |
| "ce_loss_9": 3.633393371105194, |
| "epoch": 0.217, |
| "grad_norm": 1160.0, |
| "kl_loss_13": 362.232112121582, |
| "kl_loss_2": 2993.9896484375, |
| "kl_loss_4": 2414.5381408691405, |
| "kl_loss_9": 1071.7207061767579, |
| "learning_rate": 0.0008959511229611376, |
| "loss": 1711.1533, |
| "step": 2170 |
| }, |
| { |
| "ce_loss_13": 3.3684532761573793, |
| "ce_loss_17": 3.2196085929870604, |
| "ce_loss_2": 4.6537925720214846, |
| "ce_loss_4": 4.3553660869598385, |
| "ce_loss_9": 3.6753413915634154, |
| "epoch": 0.218, |
| "grad_norm": 924.0, |
| "kl_loss_13": 354.83716888427733, |
| "kl_loss_2": 2933.256970214844, |
| "kl_loss_4": 2370.560723876953, |
| "kl_loss_9": 1032.005581665039, |
| "learning_rate": 0.0008949802386055581, |
| "loss": 1671.3055, |
| "step": 2180 |
| }, |
| { |
| "ce_loss_13": 3.246527910232544, |
| "ce_loss_17": 3.0874037861824037, |
| "ce_loss_2": 4.544924974441528, |
| "ce_loss_4": 4.221065580844879, |
| "ce_loss_9": 3.5494844794273375, |
| "epoch": 0.219, |
| "grad_norm": 884.0, |
| "kl_loss_13": 349.8112518310547, |
| "kl_loss_2": 2944.659814453125, |
| "kl_loss_4": 2338.0881958007812, |
| "kl_loss_9": 1024.6070922851563, |
| "learning_rate": 0.0008940053768033609, |
| "loss": 1694.7809, |
| "step": 2190 |
| }, |
| { |
| "ce_loss_13": 3.316020703315735, |
| "ce_loss_17": 3.17333402633667, |
| "ce_loss_2": 4.5921409845352175, |
| "ce_loss_4": 4.285398149490357, |
| "ce_loss_9": 3.6149450659751894, |
| "epoch": 0.22, |
| "grad_norm": 920.0, |
| "kl_loss_13": 332.43937530517576, |
| "kl_loss_2": 2926.0295288085936, |
| "kl_loss_4": 2351.063018798828, |
| "kl_loss_9": 1018.8271484375, |
| "learning_rate": 0.0008930265473713938, |
| "loss": 1655.8172, |
| "step": 2200 |
| }, |
| { |
| "ce_loss_13": 3.269703197479248, |
| "ce_loss_17": 3.129119908809662, |
| "ce_loss_2": 4.580475306510925, |
| "ce_loss_4": 4.26227376461029, |
| "ce_loss_9": 3.584591794013977, |
| "epoch": 0.221, |
| "grad_norm": 1096.0, |
| "kl_loss_13": 326.22893676757815, |
| "kl_loss_2": 2960.6923706054686, |
| "kl_loss_4": 2354.4943115234373, |
| "kl_loss_9": 1020.52822265625, |
| "learning_rate": 0.0008920437601664579, |
| "loss": 1633.1572, |
| "step": 2210 |
| }, |
| { |
| "ce_loss_13": 3.2685254216194153, |
| "ce_loss_17": 3.128947913646698, |
| "ce_loss_2": 4.570039582252503, |
| "ce_loss_4": 4.26165542602539, |
| "ce_loss_9": 3.5980277180671694, |
| "epoch": 0.222, |
| "grad_norm": 1144.0, |
| "kl_loss_13": 320.0248626708984, |
| "kl_loss_2": 2960.6189086914064, |
| "kl_loss_4": 2369.3476440429686, |
| "kl_loss_9": 1048.8888549804688, |
| "learning_rate": 0.0008910570250852097, |
| "loss": 1645.8068, |
| "step": 2220 |
| }, |
| { |
| "ce_loss_13": 3.355926287174225, |
| "ce_loss_17": 3.2257240772247315, |
| "ce_loss_2": 4.620677042007446, |
| "ce_loss_4": 4.292669582366943, |
| "ce_loss_9": 3.6633192181587217, |
| "epoch": 0.223, |
| "grad_norm": 1080.0, |
| "kl_loss_13": 300.8265068054199, |
| "kl_loss_2": 2870.9962158203125, |
| "kl_loss_4": 2257.135302734375, |
| "kl_loss_9": 1002.3094543457031, |
| "learning_rate": 0.0008900663520640604, |
| "loss": 1616.9061, |
| "step": 2230 |
| }, |
| { |
| "ce_loss_13": 3.30984890460968, |
| "ce_loss_17": 3.1787513852119447, |
| "ce_loss_2": 4.608496284484863, |
| "ce_loss_4": 4.294780206680298, |
| "ce_loss_9": 3.633631157875061, |
| "epoch": 0.224, |
| "grad_norm": 924.0, |
| "kl_loss_13": 308.4694885253906, |
| "kl_loss_2": 2937.8706787109377, |
| "kl_loss_4": 2351.1838256835936, |
| "kl_loss_9": 1041.574917602539, |
| "learning_rate": 0.0008890717510790764, |
| "loss": 1656.2035, |
| "step": 2240 |
| }, |
| { |
| "ce_loss_13": 3.2739879369735716, |
| "ce_loss_17": 3.1423872351646422, |
| "ce_loss_2": 4.596669864654541, |
| "ce_loss_4": 4.268906736373902, |
| "ce_loss_9": 3.6001088976860047, |
| "epoch": 0.225, |
| "grad_norm": 868.0, |
| "kl_loss_13": 304.40641326904296, |
| "kl_loss_2": 2973.41064453125, |
| "kl_loss_4": 2364.267706298828, |
| "kl_loss_9": 1030.137890625, |
| "learning_rate": 0.0008880732321458784, |
| "loss": 1674.6625, |
| "step": 2250 |
| }, |
| { |
| "ce_loss_13": 3.2961417198181153, |
| "ce_loss_17": 3.1698644042015074, |
| "ce_loss_2": 4.593883466720581, |
| "ce_loss_4": 4.2756976127624515, |
| "ce_loss_9": 3.6222903966903686, |
| "epoch": 0.226, |
| "grad_norm": 1008.0, |
| "kl_loss_13": 303.41551818847654, |
| "kl_loss_2": 2915.967529296875, |
| "kl_loss_4": 2321.536273193359, |
| "kl_loss_9": 1029.0643920898438, |
| "learning_rate": 0.0008870708053195413, |
| "loss": 1665.1555, |
| "step": 2260 |
| }, |
| { |
| "ce_loss_13": 3.319032096862793, |
| "ce_loss_17": 3.196590614318848, |
| "ce_loss_2": 4.588518190383911, |
| "ce_loss_4": 4.272482478618622, |
| "ce_loss_9": 3.6295907855033875, |
| "epoch": 0.227, |
| "grad_norm": 920.0, |
| "kl_loss_13": 289.6078353881836, |
| "kl_loss_2": 2874.2600708007812, |
| "kl_loss_4": 2284.4325561523438, |
| "kl_loss_9": 1002.5240905761718, |
| "learning_rate": 0.0008860644806944918, |
| "loss": 1628.4755, |
| "step": 2270 |
| }, |
| { |
| "ce_loss_13": 3.2673797011375427, |
| "ce_loss_17": 3.1349551558494566, |
| "ce_loss_2": 4.580315804481506, |
| "ce_loss_4": 4.264006936550141, |
| "ce_loss_9": 3.6007945775985717, |
| "epoch": 0.228, |
| "grad_norm": 944.0, |
| "kl_loss_13": 302.1911224365234, |
| "kl_loss_2": 2956.342419433594, |
| "kl_loss_4": 2359.0373779296874, |
| "kl_loss_9": 1037.9614227294921, |
| "learning_rate": 0.0008850542684044079, |
| "loss": 1622.2234, |
| "step": 2280 |
| }, |
| { |
| "ce_loss_13": 3.237555146217346, |
| "ce_loss_17": 3.0998697161674498, |
| "ce_loss_2": 4.605713748931885, |
| "ce_loss_4": 4.284874546527862, |
| "ce_loss_9": 3.587420332431793, |
| "epoch": 0.229, |
| "grad_norm": 996.0, |
| "kl_loss_13": 310.7320899963379, |
| "kl_loss_2": 3069.1661987304688, |
| "kl_loss_4": 2463.925329589844, |
| "kl_loss_9": 1075.1367279052733, |
| "learning_rate": 0.0008840401786221159, |
| "loss": 1675.7064, |
| "step": 2290 |
| }, |
| { |
| "ce_loss_13": 3.370629632472992, |
| "ce_loss_17": 3.2480675101280214, |
| "ce_loss_2": 4.629262661933899, |
| "ce_loss_4": 4.321358180046081, |
| "ce_loss_9": 3.6830974936485292, |
| "epoch": 0.23, |
| "grad_norm": 840.0, |
| "kl_loss_13": 285.8288948059082, |
| "kl_loss_2": 2827.9096557617186, |
| "kl_loss_4": 2256.582763671875, |
| "kl_loss_9": 990.9099761962891, |
| "learning_rate": 0.000883022221559489, |
| "loss": 1591.9154, |
| "step": 2300 |
| }, |
| { |
| "ce_loss_13": 3.32784389257431, |
| "ce_loss_17": 3.202398419380188, |
| "ce_loss_2": 4.621891617774963, |
| "ce_loss_4": 4.321145343780517, |
| "ce_loss_9": 3.649727237224579, |
| "epoch": 0.231, |
| "grad_norm": 868.0, |
| "kl_loss_13": 297.4648681640625, |
| "kl_loss_2": 2920.39990234375, |
| "kl_loss_4": 2358.5670532226563, |
| "kl_loss_9": 1032.665316772461, |
| "learning_rate": 0.0008820004074673434, |
| "loss": 1692.0145, |
| "step": 2310 |
| }, |
| { |
| "ce_loss_13": 3.240832042694092, |
| "ce_loss_17": 3.112210047245026, |
| "ce_loss_2": 4.536813807487488, |
| "ce_loss_4": 4.223457646369934, |
| "ce_loss_9": 3.5707820653915405, |
| "epoch": 0.232, |
| "grad_norm": 868.0, |
| "kl_loss_13": 298.81760635375974, |
| "kl_loss_2": 2932.25478515625, |
| "kl_loss_4": 2340.141467285156, |
| "kl_loss_9": 1031.2572387695313, |
| "learning_rate": 0.0008809747466353355, |
| "loss": 1625.2012, |
| "step": 2320 |
| }, |
| { |
| "ce_loss_13": 3.2373562335968016, |
| "ce_loss_17": 3.114722716808319, |
| "ce_loss_2": 4.549531376361847, |
| "ce_loss_4": 4.237539303302765, |
| "ce_loss_9": 3.5638630270957945, |
| "epoch": 0.233, |
| "grad_norm": 896.0, |
| "kl_loss_13": 294.2587493896484, |
| "kl_loss_2": 2937.6509033203124, |
| "kl_loss_4": 2352.528948974609, |
| "kl_loss_9": 1009.2812927246093, |
| "learning_rate": 0.0008799452493918585, |
| "loss": 1652.5027, |
| "step": 2330 |
| }, |
| { |
| "ce_loss_13": 3.3194618344306948, |
| "ce_loss_17": 3.1950511932373047, |
| "ce_loss_2": 4.603914141654968, |
| "ce_loss_4": 4.295511245727539, |
| "ce_loss_9": 3.644179904460907, |
| "epoch": 0.234, |
| "grad_norm": 900.0, |
| "kl_loss_13": 290.78978271484374, |
| "kl_loss_2": 2895.8525634765624, |
| "kl_loss_4": 2312.163116455078, |
| "kl_loss_9": 1023.2259887695312, |
| "learning_rate": 0.0008789119261039385, |
| "loss": 1674.5498, |
| "step": 2340 |
| }, |
| { |
| "ce_loss_13": 3.2323792219161986, |
| "ce_loss_17": 3.1045128464698792, |
| "ce_loss_2": 4.529043483734131, |
| "ce_loss_4": 4.220817899703979, |
| "ce_loss_9": 3.5680212140083314, |
| "epoch": 0.235, |
| "grad_norm": 992.0, |
| "kl_loss_13": 292.59674377441405, |
| "kl_loss_2": 2910.7634521484374, |
| "kl_loss_4": 2336.0444946289062, |
| "kl_loss_9": 1030.5188995361327, |
| "learning_rate": 0.0008778747871771292, |
| "loss": 1621.2353, |
| "step": 2350 |
| }, |
| { |
| "ce_loss_13": 3.2797988414764405, |
| "ce_loss_17": 3.1562238335609436, |
| "ce_loss_2": 4.5353799104690555, |
| "ce_loss_4": 4.238891696929931, |
| "ce_loss_9": 3.592901861667633, |
| "epoch": 0.236, |
| "grad_norm": 1004.0, |
| "kl_loss_13": 282.03241577148435, |
| "kl_loss_2": 2824.370007324219, |
| "kl_loss_4": 2269.0152587890625, |
| "kl_loss_9": 987.7082122802734, |
| "learning_rate": 0.0008768338430554083, |
| "loss": 1590.733, |
| "step": 2360 |
| }, |
| { |
| "ce_loss_13": 3.3001204252243044, |
| "ce_loss_17": 3.169539213180542, |
| "ce_loss_2": 4.586280775070191, |
| "ce_loss_4": 4.2673201084136965, |
| "ce_loss_9": 3.622165870666504, |
| "epoch": 0.237, |
| "grad_norm": 1104.0, |
| "kl_loss_13": 298.4084838867187, |
| "kl_loss_2": 2888.375146484375, |
| "kl_loss_4": 2299.5453552246095, |
| "kl_loss_9": 1011.7012084960937, |
| "learning_rate": 0.0008757891042210713, |
| "loss": 1639.2125, |
| "step": 2370 |
| }, |
| { |
| "ce_loss_13": 3.307539737224579, |
| "ce_loss_17": 3.185174059867859, |
| "ce_loss_2": 4.58805661201477, |
| "ce_loss_4": 4.274906814098358, |
| "ce_loss_9": 3.6281189918518066, |
| "epoch": 0.238, |
| "grad_norm": 936.0, |
| "kl_loss_13": 291.6076263427734, |
| "kl_loss_2": 2845.2931518554688, |
| "kl_loss_4": 2273.3110534667967, |
| "kl_loss_9": 995.6541046142578, |
| "learning_rate": 0.0008747405811946271, |
| "loss": 1616.0865, |
| "step": 2380 |
| }, |
| { |
| "ce_loss_13": 3.2203105330467223, |
| "ce_loss_17": 3.089602053165436, |
| "ce_loss_2": 4.562181878089905, |
| "ce_loss_4": 4.251259768009186, |
| "ce_loss_9": 3.554316794872284, |
| "epoch": 0.239, |
| "grad_norm": 876.0, |
| "kl_loss_13": 297.2876396179199, |
| "kl_loss_2": 3014.0890991210936, |
| "kl_loss_4": 2435.5117919921877, |
| "kl_loss_9": 1050.7017669677734, |
| "learning_rate": 0.0008736882845346905, |
| "loss": 1637.707, |
| "step": 2390 |
| }, |
| { |
| "ce_loss_13": 3.3104472279548647, |
| "ce_loss_17": 3.171858215332031, |
| "ce_loss_2": 4.607442688941956, |
| "ce_loss_4": 4.297037196159363, |
| "ce_loss_9": 3.6347326397895814, |
| "epoch": 0.24, |
| "grad_norm": 1128.0, |
| "kl_loss_13": 305.9137329101562, |
| "kl_loss_2": 2909.6814453125, |
| "kl_loss_4": 2329.4797180175783, |
| "kl_loss_9": 1027.391165161133, |
| "learning_rate": 0.0008726322248378774, |
| "loss": 1629.5055, |
| "step": 2400 |
| }, |
| { |
| "ce_loss_13": 3.3081172585487364, |
| "ce_loss_17": 3.17943480014801, |
| "ce_loss_2": 4.619137024879455, |
| "ce_loss_4": 4.307534241676331, |
| "ce_loss_9": 3.6300449848175047, |
| "epoch": 0.241, |
| "grad_norm": 992.0, |
| "kl_loss_13": 300.4436065673828, |
| "kl_loss_2": 2966.785852050781, |
| "kl_loss_4": 2387.1541015625, |
| "kl_loss_9": 1031.5527801513672, |
| "learning_rate": 0.0008715724127386971, |
| "loss": 1678.6367, |
| "step": 2410 |
| }, |
| { |
| "ce_loss_13": 3.372581994533539, |
| "ce_loss_17": 3.2493183612823486, |
| "ce_loss_2": 4.628890919685364, |
| "ce_loss_4": 4.326624870300293, |
| "ce_loss_9": 3.6759827017784117, |
| "epoch": 0.242, |
| "grad_norm": 928.0, |
| "kl_loss_13": 293.4173645019531, |
| "kl_loss_2": 2858.6896240234373, |
| "kl_loss_4": 2292.6798583984373, |
| "kl_loss_9": 990.3782440185547, |
| "learning_rate": 0.0008705088589094458, |
| "loss": 1626.332, |
| "step": 2420 |
| }, |
| { |
| "ce_loss_13": 3.3809884548187257, |
| "ce_loss_17": 3.2555585384368895, |
| "ce_loss_2": 4.662137699127197, |
| "ce_loss_4": 4.351862525939941, |
| "ce_loss_9": 3.6982211112976073, |
| "epoch": 0.243, |
| "grad_norm": 1424.0, |
| "kl_loss_13": 304.6127471923828, |
| "kl_loss_2": 2905.42236328125, |
| "kl_loss_4": 2317.7510498046877, |
| "kl_loss_9": 1014.0051849365234, |
| "learning_rate": 0.0008694415740600988, |
| "loss": 1642.5791, |
| "step": 2430 |
| }, |
| { |
| "ce_loss_13": 3.243203592300415, |
| "ce_loss_17": 3.116365575790405, |
| "ce_loss_2": 4.565831923484803, |
| "ce_loss_4": 4.261678349971771, |
| "ce_loss_9": 3.566124069690704, |
| "epoch": 0.244, |
| "grad_norm": 1152.0, |
| "kl_loss_13": 306.19521179199216, |
| "kl_loss_2": 2957.50634765625, |
| "kl_loss_4": 2391.3567504882812, |
| "kl_loss_9": 1027.9412689208984, |
| "learning_rate": 0.0008683705689382025, |
| "loss": 1641.177, |
| "step": 2440 |
| }, |
| { |
| "ce_loss_13": 3.319749081134796, |
| "ce_loss_17": 3.1990580320358277, |
| "ce_loss_2": 4.5717099666595455, |
| "ce_loss_4": 4.267187988758087, |
| "ce_loss_9": 3.633048212528229, |
| "epoch": 0.245, |
| "grad_norm": 1040.0, |
| "kl_loss_13": 286.17855072021484, |
| "kl_loss_2": 2832.5764892578127, |
| "kl_loss_4": 2262.751641845703, |
| "kl_loss_9": 991.6012756347657, |
| "learning_rate": 0.0008672958543287666, |
| "loss": 1635.3568, |
| "step": 2450 |
| }, |
| { |
| "ce_loss_13": 3.326963114738464, |
| "ce_loss_17": 3.206622231006622, |
| "ce_loss_2": 4.572412705421447, |
| "ce_loss_4": 4.272608768939972, |
| "ce_loss_9": 3.642713487148285, |
| "epoch": 0.246, |
| "grad_norm": 924.0, |
| "kl_loss_13": 289.12726440429685, |
| "kl_loss_2": 2820.82841796875, |
| "kl_loss_4": 2248.3615295410154, |
| "kl_loss_9": 993.68134765625, |
| "learning_rate": 0.0008662174410541554, |
| "loss": 1594.2012, |
| "step": 2460 |
| }, |
| { |
| "ce_loss_13": 3.296044445037842, |
| "ce_loss_17": 3.1760493993759153, |
| "ce_loss_2": 4.547842645645142, |
| "ce_loss_4": 4.239560651779175, |
| "ce_loss_9": 3.6016671776771547, |
| "epoch": 0.247, |
| "grad_norm": 860.0, |
| "kl_loss_13": 287.0252395629883, |
| "kl_loss_2": 2829.6388549804688, |
| "kl_loss_4": 2251.3867004394533, |
| "kl_loss_9": 981.9071838378907, |
| "learning_rate": 0.0008651353399739787, |
| "loss": 1634.153, |
| "step": 2470 |
| }, |
| { |
| "ce_loss_13": 3.3206401109695434, |
| "ce_loss_17": 3.1998250007629396, |
| "ce_loss_2": 4.590792870521545, |
| "ce_loss_4": 4.286621856689453, |
| "ce_loss_9": 3.6425787806510925, |
| "epoch": 0.248, |
| "grad_norm": 928.0, |
| "kl_loss_13": 286.9697860717773, |
| "kl_loss_2": 2842.6251708984373, |
| "kl_loss_4": 2279.952502441406, |
| "kl_loss_9": 995.1058532714844, |
| "learning_rate": 0.0008640495619849821, |
| "loss": 1610.9688, |
| "step": 2480 |
| }, |
| { |
| "ce_loss_13": 3.2803879737854005, |
| "ce_loss_17": 3.1567034482955934, |
| "ce_loss_2": 4.546201705932617, |
| "ce_loss_4": 4.232971477508545, |
| "ce_loss_9": 3.588778924942017, |
| "epoch": 0.249, |
| "grad_norm": 924.0, |
| "kl_loss_13": 291.201180267334, |
| "kl_loss_2": 2858.2070068359376, |
| "kl_loss_4": 2274.083807373047, |
| "kl_loss_9": 989.4730499267578, |
| "learning_rate": 0.0008629601180209381, |
| "loss": 1602.223, |
| "step": 2490 |
| }, |
| { |
| "ce_loss_13": 3.2767815709114076, |
| "ce_loss_17": 3.1536255478858948, |
| "ce_loss_2": 4.547171616554261, |
| "ce_loss_4": 4.23374240398407, |
| "ce_loss_9": 3.589306080341339, |
| "epoch": 0.25, |
| "grad_norm": 924.0, |
| "kl_loss_13": 289.24659729003906, |
| "kl_loss_2": 2830.3158325195313, |
| "kl_loss_4": 2250.081640625, |
| "kl_loss_9": 979.8197479248047, |
| "learning_rate": 0.000861867019052535, |
| "loss": 1614.6669, |
| "step": 2500 |
| }, |
| { |
| "ce_loss_13": 3.2062613725662232, |
| "ce_loss_17": 3.073312187194824, |
| "ce_loss_2": 4.525754141807556, |
| "ce_loss_4": 4.216163754463196, |
| "ce_loss_9": 3.5246147513389587, |
| "epoch": 0.251, |
| "grad_norm": 928.0, |
| "kl_loss_13": 304.6170486450195, |
| "kl_loss_2": 2955.1001098632814, |
| "kl_loss_4": 2374.7687438964845, |
| "kl_loss_9": 1011.628628540039, |
| "learning_rate": 0.0008607702760872678, |
| "loss": 1654.2996, |
| "step": 2510 |
| }, |
| { |
| "ce_loss_13": 3.306063413619995, |
| "ce_loss_17": 3.178913378715515, |
| "ce_loss_2": 4.551721382141113, |
| "ce_loss_4": 4.254784882068634, |
| "ce_loss_9": 3.605251908302307, |
| "epoch": 0.252, |
| "grad_norm": 1024.0, |
| "kl_loss_13": 295.6897918701172, |
| "kl_loss_2": 2803.2751586914064, |
| "kl_loss_4": 2250.443225097656, |
| "kl_loss_9": 970.370947265625, |
| "learning_rate": 0.0008596699001693256, |
| "loss": 1629.6939, |
| "step": 2520 |
| }, |
| { |
| "ce_loss_13": 3.317117977142334, |
| "ce_loss_17": 3.1989371538162232, |
| "ce_loss_2": 4.557626843452454, |
| "ce_loss_4": 4.249381899833679, |
| "ce_loss_9": 3.612459719181061, |
| "epoch": 0.253, |
| "grad_norm": 880.0, |
| "kl_loss_13": 287.5278747558594, |
| "kl_loss_2": 2813.3516357421877, |
| "kl_loss_4": 2239.7448913574217, |
| "kl_loss_9": 966.4338073730469, |
| "learning_rate": 0.0008585659023794818, |
| "loss": 1622.9551, |
| "step": 2530 |
| }, |
| { |
| "ce_loss_13": 3.286828410625458, |
| "ce_loss_17": 3.1603270292282106, |
| "ce_loss_2": 4.601086258888245, |
| "ce_loss_4": 4.2928143501281735, |
| "ce_loss_9": 3.6168991327285767, |
| "epoch": 0.254, |
| "grad_norm": 836.0, |
| "kl_loss_13": 298.24049682617186, |
| "kl_loss_2": 2933.430517578125, |
| "kl_loss_4": 2355.2827331542967, |
| "kl_loss_9": 1025.785546875, |
| "learning_rate": 0.0008574582938349817, |
| "loss": 1640.077, |
| "step": 2540 |
| }, |
| { |
| "ce_loss_13": 3.274578666687012, |
| "ce_loss_17": 3.136221206188202, |
| "ce_loss_2": 4.588469123840332, |
| "ce_loss_4": 4.278005242347717, |
| "ce_loss_9": 3.6075248599052427, |
| "epoch": 0.255, |
| "grad_norm": 892.0, |
| "kl_loss_13": 308.6023178100586, |
| "kl_loss_2": 2939.1209350585937, |
| "kl_loss_4": 2357.57861328125, |
| "kl_loss_9": 1042.74169921875, |
| "learning_rate": 0.0008563470856894315, |
| "loss": 1609.0549, |
| "step": 2550 |
| }, |
| { |
| "ce_loss_13": 3.2660026431083677, |
| "ce_loss_17": 3.1421032309532166, |
| "ce_loss_2": 4.562472009658814, |
| "ce_loss_4": 4.255641174316406, |
| "ce_loss_9": 3.5885525345802307, |
| "epoch": 0.256, |
| "grad_norm": 936.0, |
| "kl_loss_13": 289.44919662475587, |
| "kl_loss_2": 2893.981311035156, |
| "kl_loss_4": 2323.9205932617188, |
| "kl_loss_9": 1012.901611328125, |
| "learning_rate": 0.0008552322891326845, |
| "loss": 1619.8651, |
| "step": 2560 |
| }, |
| { |
| "ce_loss_13": 3.2321555495262144, |
| "ce_loss_17": 3.109209358692169, |
| "ce_loss_2": 4.541443562507629, |
| "ce_loss_4": 4.23039140701294, |
| "ce_loss_9": 3.5549809217453, |
| "epoch": 0.257, |
| "grad_norm": 1152.0, |
| "kl_loss_13": 284.43701934814453, |
| "kl_loss_2": 2916.8890869140623, |
| "kl_loss_4": 2341.3971740722654, |
| "kl_loss_9": 998.0176422119141, |
| "learning_rate": 0.0008541139153907296, |
| "loss": 1603.4274, |
| "step": 2570 |
| }, |
| { |
| "ce_loss_13": 3.192133629322052, |
| "ce_loss_17": 3.070415472984314, |
| "ce_loss_2": 4.486647391319275, |
| "ce_loss_4": 4.174953711032868, |
| "ce_loss_9": 3.5114189982414246, |
| "epoch": 0.258, |
| "grad_norm": 964.0, |
| "kl_loss_13": 278.5242919921875, |
| "kl_loss_2": 2881.787731933594, |
| "kl_loss_4": 2308.077313232422, |
| "kl_loss_9": 985.5480651855469, |
| "learning_rate": 0.0008529919757255782, |
| "loss": 1629.6424, |
| "step": 2580 |
| }, |
| { |
| "ce_loss_13": 3.217985284328461, |
| "ce_loss_17": 3.1062445521354674, |
| "ce_loss_2": 4.462697196006775, |
| "ce_loss_4": 4.157460844516754, |
| "ce_loss_9": 3.512054204940796, |
| "epoch": 0.259, |
| "grad_norm": 912.0, |
| "kl_loss_13": 271.9975273132324, |
| "kl_loss_2": 2796.550537109375, |
| "kl_loss_4": 2222.2314270019533, |
| "kl_loss_9": 949.5425567626953, |
| "learning_rate": 0.0008518664814351503, |
| "loss": 1572.5631, |
| "step": 2590 |
| }, |
| { |
| "ce_loss_13": 3.196563982963562, |
| "ce_loss_17": 3.071341335773468, |
| "ce_loss_2": 4.5131648778915405, |
| "ce_loss_4": 4.201615858078003, |
| "ce_loss_9": 3.5166616797447205, |
| "epoch": 0.26, |
| "grad_norm": 904.0, |
| "kl_loss_13": 292.19246368408204, |
| "kl_loss_2": 2938.97783203125, |
| "kl_loss_4": 2354.365118408203, |
| "kl_loss_9": 1012.131234741211, |
| "learning_rate": 0.0008507374438531607, |
| "loss": 1686.0457, |
| "step": 2600 |
| }, |
| { |
| "ce_loss_13": 3.172201859951019, |
| "ce_loss_17": 3.05413476228714, |
| "ce_loss_2": 4.454415559768677, |
| "ce_loss_4": 4.152374064922332, |
| "ce_loss_9": 3.4919321060180666, |
| "epoch": 0.261, |
| "grad_norm": 1104.0, |
| "kl_loss_13": 281.7638107299805, |
| "kl_loss_2": 2857.7443237304688, |
| "kl_loss_4": 2288.983264160156, |
| "kl_loss_9": 987.0382049560546, |
| "learning_rate": 0.0008496048743490053, |
| "loss": 1603.1475, |
| "step": 2610 |
| }, |
| { |
| "ce_loss_13": 3.32059725522995, |
| "ce_loss_17": 3.1986724138259888, |
| "ce_loss_2": 4.559686994552612, |
| "ce_loss_4": 4.259667956829071, |
| "ce_loss_9": 3.6245452165603638, |
| "epoch": 0.262, |
| "grad_norm": 888.0, |
| "kl_loss_13": 282.71466674804685, |
| "kl_loss_2": 2807.50380859375, |
| "kl_loss_4": 2234.340087890625, |
| "kl_loss_9": 973.4236206054687, |
| "learning_rate": 0.0008484687843276469, |
| "loss": 1595.0721, |
| "step": 2620 |
| }, |
| { |
| "ce_loss_13": 3.2528497219085692, |
| "ce_loss_17": 3.1310725927352907, |
| "ce_loss_2": 4.5340532779693605, |
| "ce_loss_4": 4.219503045082092, |
| "ce_loss_9": 3.5716007471084597, |
| "epoch": 0.263, |
| "grad_norm": 1040.0, |
| "kl_loss_13": 288.65707244873045, |
| "kl_loss_2": 2875.629248046875, |
| "kl_loss_4": 2278.032305908203, |
| "kl_loss_9": 998.065185546875, |
| "learning_rate": 0.0008473291852294987, |
| "loss": 1633.93, |
| "step": 2630 |
| }, |
| { |
| "ce_loss_13": 3.2652063727378846, |
| "ce_loss_17": 3.141418147087097, |
| "ce_loss_2": 4.541818857192993, |
| "ce_loss_4": 4.238283658027649, |
| "ce_loss_9": 3.585766649246216, |
| "epoch": 0.264, |
| "grad_norm": 1112.0, |
| "kl_loss_13": 290.4628158569336, |
| "kl_loss_2": 2885.605334472656, |
| "kl_loss_4": 2314.400567626953, |
| "kl_loss_9": 1007.6306579589843, |
| "learning_rate": 0.0008461860885303114, |
| "loss": 1603.6747, |
| "step": 2640 |
| }, |
| { |
| "ce_loss_13": 3.292245590686798, |
| "ce_loss_17": 3.171410655975342, |
| "ce_loss_2": 4.5390942096710205, |
| "ce_loss_4": 4.239257597923279, |
| "ce_loss_9": 3.6101866006851195, |
| "epoch": 0.265, |
| "grad_norm": 932.0, |
| "kl_loss_13": 284.9529167175293, |
| "kl_loss_2": 2812.7326538085936, |
| "kl_loss_4": 2248.4744079589846, |
| "kl_loss_9": 988.2147277832031, |
| "learning_rate": 0.000845039505741056, |
| "loss": 1606.1671, |
| "step": 2650 |
| }, |
| { |
| "ce_loss_13": 3.2741735100746157, |
| "ce_loss_17": 3.150105154514313, |
| "ce_loss_2": 4.545386862754822, |
| "ce_loss_4": 4.245937943458557, |
| "ce_loss_9": 3.5979922890663145, |
| "epoch": 0.266, |
| "grad_norm": 1040.0, |
| "kl_loss_13": 295.6175003051758, |
| "kl_loss_2": 2904.401818847656, |
| "kl_loss_4": 2331.8230407714846, |
| "kl_loss_9": 1031.5228149414063, |
| "learning_rate": 0.0008438894484078086, |
| "loss": 1670.0883, |
| "step": 2660 |
| }, |
| { |
| "ce_loss_13": 3.280404818058014, |
| "ce_loss_17": 3.1578108787536623, |
| "ce_loss_2": 4.536375164985657, |
| "ce_loss_4": 4.228759133815766, |
| "ce_loss_9": 3.587415170669556, |
| "epoch": 0.267, |
| "grad_norm": 812.0, |
| "kl_loss_13": 285.996403503418, |
| "kl_loss_2": 2828.8488647460936, |
| "kl_loss_4": 2259.752117919922, |
| "kl_loss_9": 983.035922241211, |
| "learning_rate": 0.0008427359281116334, |
| "loss": 1601.9956, |
| "step": 2670 |
| }, |
| { |
| "ce_loss_13": 3.1835838317871095, |
| "ce_loss_17": 3.0539584040641783, |
| "ce_loss_2": 4.491893172264099, |
| "ce_loss_4": 4.182012701034546, |
| "ce_loss_9": 3.510149967670441, |
| "epoch": 0.268, |
| "grad_norm": 896.0, |
| "kl_loss_13": 293.40307846069334, |
| "kl_loss_2": 2926.4771728515625, |
| "kl_loss_4": 2341.3355407714844, |
| "kl_loss_9": 1015.4264831542969, |
| "learning_rate": 0.0008415789564684673, |
| "loss": 1631.741, |
| "step": 2680 |
| }, |
| { |
| "ce_loss_13": 3.4233740329742433, |
| "ce_loss_17": 3.299894630908966, |
| "ce_loss_2": 4.666839551925659, |
| "ce_loss_4": 4.35275719165802, |
| "ce_loss_9": 3.7351592421531676, |
| "epoch": 0.269, |
| "grad_norm": 852.0, |
| "kl_loss_13": 295.1780403137207, |
| "kl_loss_2": 2790.9798828125, |
| "kl_loss_4": 2206.834509277344, |
| "kl_loss_9": 985.7969787597656, |
| "learning_rate": 0.0008404185451290017, |
| "loss": 1579.6709, |
| "step": 2690 |
| }, |
| { |
| "ce_loss_13": 3.294009006023407, |
| "ce_loss_17": 3.175998365879059, |
| "ce_loss_2": 4.555729842185974, |
| "ce_loss_4": 4.246478509902954, |
| "ce_loss_9": 3.6012258172035216, |
| "epoch": 0.27, |
| "grad_norm": 1032.0, |
| "kl_loss_13": 285.7749221801758, |
| "kl_loss_2": 2823.909423828125, |
| "kl_loss_4": 2247.897332763672, |
| "kl_loss_9": 975.7720031738281, |
| "learning_rate": 0.0008392547057785661, |
| "loss": 1583.1482, |
| "step": 2700 |
| }, |
| { |
| "ce_loss_13": 3.227368414402008, |
| "ce_loss_17": 3.105896461009979, |
| "ce_loss_2": 4.5378422975540165, |
| "ce_loss_4": 4.228003525733948, |
| "ce_loss_9": 3.5542636036872866, |
| "epoch": 0.271, |
| "grad_norm": 844.0, |
| "kl_loss_13": 291.69226760864257, |
| "kl_loss_2": 2963.926379394531, |
| "kl_loss_4": 2381.7996459960937, |
| "kl_loss_9": 1024.808383178711, |
| "learning_rate": 0.0008380874501370098, |
| "loss": 1603.6732, |
| "step": 2710 |
| }, |
| { |
| "ce_loss_13": 3.2251667261123655, |
| "ce_loss_17": 3.0979998230934145, |
| "ce_loss_2": 4.531300711631775, |
| "ce_loss_4": 4.222065436840057, |
| "ce_loss_9": 3.553966796398163, |
| "epoch": 0.272, |
| "grad_norm": 1012.0, |
| "kl_loss_13": 296.8633186340332, |
| "kl_loss_2": 2936.8505981445314, |
| "kl_loss_4": 2361.1948852539062, |
| "kl_loss_9": 1023.4551696777344, |
| "learning_rate": 0.0008369167899585841, |
| "loss": 1635.4837, |
| "step": 2720 |
| }, |
| { |
| "ce_loss_13": 3.3345279574394224, |
| "ce_loss_17": 3.2195035099983214, |
| "ce_loss_2": 4.546821546554566, |
| "ce_loss_4": 4.250281524658203, |
| "ce_loss_9": 3.6345677971839905, |
| "epoch": 0.273, |
| "grad_norm": 1004.0, |
| "kl_loss_13": 281.866024017334, |
| "kl_loss_2": 2758.502978515625, |
| "kl_loss_4": 2202.805487060547, |
| "kl_loss_9": 963.4181732177734, |
| "learning_rate": 0.0008357427370318238, |
| "loss": 1605.3008, |
| "step": 2730 |
| }, |
| { |
| "ce_loss_13": 3.2902962923049928, |
| "ce_loss_17": 3.168014645576477, |
| "ce_loss_2": 4.561334037780762, |
| "ce_loss_4": 4.2694053769111635, |
| "ce_loss_9": 3.602027785778046, |
| "epoch": 0.274, |
| "grad_norm": 1008.0, |
| "kl_loss_13": 286.1255012512207, |
| "kl_loss_2": 2874.9748657226564, |
| "kl_loss_4": 2308.7690185546876, |
| "kl_loss_9": 984.6821319580079, |
| "learning_rate": 0.0008345653031794292, |
| "loss": 1622.8312, |
| "step": 2740 |
| }, |
| { |
| "ce_loss_13": 3.2882461547851562, |
| "ce_loss_17": 3.1660869240760805, |
| "ce_loss_2": 4.554894280433655, |
| "ce_loss_4": 4.24486141204834, |
| "ce_loss_9": 3.5995869874954223, |
| "epoch": 0.275, |
| "grad_norm": 888.0, |
| "kl_loss_13": 287.41429138183594, |
| "kl_loss_2": 2837.460412597656, |
| "kl_loss_4": 2264.114715576172, |
| "kl_loss_9": 984.4078521728516, |
| "learning_rate": 0.0008333845002581458, |
| "loss": 1598.7549, |
| "step": 2750 |
| }, |
| { |
| "ce_loss_13": 3.2206096053123474, |
| "ce_loss_17": 3.094936728477478, |
| "ce_loss_2": 4.52073712348938, |
| "ce_loss_4": 4.210266506671905, |
| "ce_loss_9": 3.5471937894821166, |
| "epoch": 0.276, |
| "grad_norm": 796.0, |
| "kl_loss_13": 297.68716049194336, |
| "kl_loss_2": 2932.2358276367186, |
| "kl_loss_4": 2354.4706604003904, |
| "kl_loss_9": 1027.962664794922, |
| "learning_rate": 0.0008322003401586462, |
| "loss": 1640.3008, |
| "step": 2760 |
| }, |
| { |
| "ce_loss_13": 3.2499445915222167, |
| "ce_loss_17": 3.133220112323761, |
| "ce_loss_2": 4.484242916107178, |
| "ce_loss_4": 4.18062641620636, |
| "ce_loss_9": 3.545853316783905, |
| "epoch": 0.277, |
| "grad_norm": 852.0, |
| "kl_loss_13": 278.0310089111328, |
| "kl_loss_2": 2777.01181640625, |
| "kl_loss_4": 2212.459405517578, |
| "kl_loss_9": 956.9126495361328, |
| "learning_rate": 0.0008310128348054094, |
| "loss": 1545.8723, |
| "step": 2770 |
| }, |
| { |
| "ce_loss_13": 3.223058843612671, |
| "ce_loss_17": 3.1042088866233826, |
| "ce_loss_2": 4.484471321105957, |
| "ce_loss_4": 4.185724556446075, |
| "ce_loss_9": 3.5301337242126465, |
| "epoch": 0.278, |
| "grad_norm": 1072.0, |
| "kl_loss_13": 282.98550033569336, |
| "kl_loss_2": 2839.0412841796874, |
| "kl_loss_4": 2267.3141540527345, |
| "kl_loss_9": 986.4726745605469, |
| "learning_rate": 0.0008298219961566008, |
| "loss": 1594.5641, |
| "step": 2780 |
| }, |
| { |
| "ce_loss_13": 3.1846871376037598, |
| "ce_loss_17": 3.0667507529258726, |
| "ce_loss_2": 4.486607003211975, |
| "ce_loss_4": 4.184105217456818, |
| "ce_loss_9": 3.515022933483124, |
| "epoch": 0.279, |
| "grad_norm": 896.0, |
| "kl_loss_13": 287.2220848083496, |
| "kl_loss_2": 2933.843408203125, |
| "kl_loss_4": 2362.742071533203, |
| "kl_loss_9": 1020.189077758789, |
| "learning_rate": 0.0008286278362039527, |
| "loss": 1609.5307, |
| "step": 2790 |
| }, |
| { |
| "ce_loss_13": 3.214345967769623, |
| "ce_loss_17": 3.097870433330536, |
| "ce_loss_2": 4.535495281219482, |
| "ce_loss_4": 4.212986302375794, |
| "ce_loss_9": 3.54015052318573, |
| "epoch": 0.28, |
| "grad_norm": 888.0, |
| "kl_loss_13": 287.04673461914064, |
| "kl_loss_2": 2959.1888916015623, |
| "kl_loss_4": 2359.879498291016, |
| "kl_loss_9": 1017.3332458496094, |
| "learning_rate": 0.0008274303669726426, |
| "loss": 1603.5236, |
| "step": 2800 |
| }, |
| { |
| "ce_loss_13": 3.1277572393417357, |
| "ce_loss_17": 3.001349353790283, |
| "ce_loss_2": 4.464611601829529, |
| "ce_loss_4": 4.163355004787445, |
| "ce_loss_9": 3.4533198356628416, |
| "epoch": 0.281, |
| "grad_norm": 976.0, |
| "kl_loss_13": 289.89603118896486, |
| "kl_loss_2": 2983.6671752929688, |
| "kl_loss_4": 2412.9560485839843, |
| "kl_loss_9": 1010.9874877929688, |
| "learning_rate": 0.0008262296005211721, |
| "loss": 1610.6947, |
| "step": 2810 |
| }, |
| { |
| "ce_loss_13": 3.2461358070373536, |
| "ce_loss_17": 3.126099479198456, |
| "ce_loss_2": 4.544889760017395, |
| "ce_loss_4": 4.2353127002716064, |
| "ce_loss_9": 3.5660160064697264, |
| "epoch": 0.282, |
| "grad_norm": 836.0, |
| "kl_loss_13": 285.3932945251465, |
| "kl_loss_2": 2889.317004394531, |
| "kl_loss_4": 2319.7860595703123, |
| "kl_loss_9": 999.7187866210937, |
| "learning_rate": 0.0008250255489412463, |
| "loss": 1596.4756, |
| "step": 2820 |
| }, |
| { |
| "ce_loss_13": 3.34898898601532, |
| "ce_loss_17": 3.2260169506073, |
| "ce_loss_2": 4.6104878187179565, |
| "ce_loss_4": 4.309991264343262, |
| "ce_loss_9": 3.6566107869148254, |
| "epoch": 0.283, |
| "grad_norm": 1136.0, |
| "kl_loss_13": 286.4916145324707, |
| "kl_loss_2": 2841.5496459960937, |
| "kl_loss_4": 2279.262072753906, |
| "kl_loss_9": 978.733984375, |
| "learning_rate": 0.0008238182243576511, |
| "loss": 1600.1098, |
| "step": 2830 |
| }, |
| { |
| "ce_loss_13": 3.3122432947158815, |
| "ce_loss_17": 3.191829764842987, |
| "ce_loss_2": 4.494195079803466, |
| "ce_loss_4": 4.1985708475112915, |
| "ce_loss_9": 3.5908570647239686, |
| "epoch": 0.284, |
| "grad_norm": 1112.0, |
| "kl_loss_13": 287.8615158081055, |
| "kl_loss_2": 2701.8959228515623, |
| "kl_loss_4": 2155.233837890625, |
| "kl_loss_9": 939.8635955810547, |
| "learning_rate": 0.0008226076389281315, |
| "loss": 1551.2213, |
| "step": 2840 |
| }, |
| { |
| "ce_loss_13": 3.3502746701240538, |
| "ce_loss_17": 3.231513559818268, |
| "ce_loss_2": 4.58073661327362, |
| "ce_loss_4": 4.286579442024231, |
| "ce_loss_9": 3.641250765323639, |
| "epoch": 0.285, |
| "grad_norm": 808.0, |
| "kl_loss_13": 288.8710304260254, |
| "kl_loss_2": 2795.6219482421875, |
| "kl_loss_4": 2237.4796630859373, |
| "kl_loss_9": 956.4706207275391, |
| "learning_rate": 0.0008213938048432696, |
| "loss": 1555.8686, |
| "step": 2850 |
| }, |
| { |
| "ce_loss_13": 3.28165922164917, |
| "ce_loss_17": 3.1545842051506043, |
| "ce_loss_2": 4.5358498096466064, |
| "ce_loss_4": 4.225637066364288, |
| "ce_loss_9": 3.5823240756988524, |
| "epoch": 0.286, |
| "grad_norm": 1032.0, |
| "kl_loss_13": 294.4871231079102, |
| "kl_loss_2": 2820.2166137695312, |
| "kl_loss_4": 2239.786029052734, |
| "kl_loss_9": 973.698095703125, |
| "learning_rate": 0.0008201767343263612, |
| "loss": 1599.423, |
| "step": 2860 |
| }, |
| { |
| "ce_loss_13": 3.2217300057411196, |
| "ce_loss_17": 3.1019028067588805, |
| "ce_loss_2": 4.501135754585266, |
| "ce_loss_4": 4.203259193897248, |
| "ce_loss_9": 3.543823516368866, |
| "epoch": 0.287, |
| "grad_norm": 1056.0, |
| "kl_loss_13": 284.2254928588867, |
| "kl_loss_2": 2880.4339965820313, |
| "kl_loss_4": 2315.0229614257814, |
| "kl_loss_9": 1005.7537414550782, |
| "learning_rate": 0.0008189564396332927, |
| "loss": 1561.3052, |
| "step": 2870 |
| }, |
| { |
| "ce_loss_13": 3.212079346179962, |
| "ce_loss_17": 3.0912064790725706, |
| "ce_loss_2": 4.506139898300171, |
| "ce_loss_4": 4.195946276187897, |
| "ce_loss_9": 3.5318343877792358, |
| "epoch": 0.288, |
| "grad_norm": 1104.0, |
| "kl_loss_13": 279.55200576782227, |
| "kl_loss_2": 2883.9751831054687, |
| "kl_loss_4": 2305.9562866210936, |
| "kl_loss_9": 992.6949340820313, |
| "learning_rate": 0.0008177329330524181, |
| "loss": 1618.0895, |
| "step": 2880 |
| }, |
| { |
| "ce_loss_13": 3.258961391448975, |
| "ce_loss_17": 3.1412371397018433, |
| "ce_loss_2": 4.5010463237762455, |
| "ce_loss_4": 4.201276159286499, |
| "ce_loss_9": 3.564037549495697, |
| "epoch": 0.289, |
| "grad_norm": 812.0, |
| "kl_loss_13": 273.7047462463379, |
| "kl_loss_2": 2779.233752441406, |
| "kl_loss_4": 2221.82998046875, |
| "kl_loss_9": 960.7529418945312, |
| "learning_rate": 0.0008165062269044352, |
| "loss": 1571.7527, |
| "step": 2890 |
| }, |
| { |
| "ce_loss_13": 3.2189348697662354, |
| "ce_loss_17": 3.0967533826828, |
| "ce_loss_2": 4.503298592567444, |
| "ce_loss_4": 4.19886873960495, |
| "ce_loss_9": 3.534325432777405, |
| "epoch": 0.29, |
| "grad_norm": 1104.0, |
| "kl_loss_13": 285.2427642822266, |
| "kl_loss_2": 2884.4711791992186, |
| "kl_loss_4": 2313.1965148925783, |
| "kl_loss_9": 994.6007049560546, |
| "learning_rate": 0.0008152763335422613, |
| "loss": 1618.2254, |
| "step": 2900 |
| }, |
| { |
| "ce_loss_13": 3.2083574652671816, |
| "ce_loss_17": 3.0843614101409913, |
| "ce_loss_2": 4.4828746795654295, |
| "ce_loss_4": 4.1724948525428776, |
| "ce_loss_9": 3.5200745344161986, |
| "epoch": 0.291, |
| "grad_norm": 1232.0, |
| "kl_loss_13": 286.9447898864746, |
| "kl_loss_2": 2863.100378417969, |
| "kl_loss_4": 2282.642138671875, |
| "kl_loss_9": 990.8550262451172, |
| "learning_rate": 0.0008140432653509088, |
| "loss": 1584.8366, |
| "step": 2910 |
| }, |
| { |
| "ce_loss_13": 3.2559476852416993, |
| "ce_loss_17": 3.1372368216514586, |
| "ce_loss_2": 4.503469681739807, |
| "ce_loss_4": 4.197622585296631, |
| "ce_loss_9": 3.563284969329834, |
| "epoch": 0.292, |
| "grad_norm": 996.0, |
| "kl_loss_13": 288.70996246337893, |
| "kl_loss_2": 2826.5008056640627, |
| "kl_loss_4": 2252.2177917480467, |
| "kl_loss_9": 980.0047973632812, |
| "learning_rate": 0.0008128070347473608, |
| "loss": 1578.374, |
| "step": 2920 |
| }, |
| { |
| "ce_loss_13": 3.269769477844238, |
| "ce_loss_17": 3.1454373955726624, |
| "ce_loss_2": 4.56416757106781, |
| "ce_loss_4": 4.2507861971855165, |
| "ce_loss_9": 3.586102819442749, |
| "epoch": 0.293, |
| "grad_norm": 884.0, |
| "kl_loss_13": 289.1005027770996, |
| "kl_loss_2": 2915.4571044921877, |
| "kl_loss_4": 2334.055310058594, |
| "kl_loss_9": 1006.4992126464844, |
| "learning_rate": 0.0008115676541804455, |
| "loss": 1604.9533, |
| "step": 2930 |
| }, |
| { |
| "ce_loss_13": 3.2624789118766784, |
| "ce_loss_17": 3.1466320395469665, |
| "ce_loss_2": 4.506610178947449, |
| "ce_loss_4": 4.206110858917237, |
| "ce_loss_9": 3.5684964776039125, |
| "epoch": 0.294, |
| "grad_norm": 1064.0, |
| "kl_loss_13": 276.8395080566406, |
| "kl_loss_2": 2817.6049194335938, |
| "kl_loss_4": 2243.142858886719, |
| "kl_loss_9": 972.8132232666015, |
| "learning_rate": 0.0008103251361307119, |
| "loss": 1598.4172, |
| "step": 2940 |
| }, |
| { |
| "ce_loss_13": 3.295071244239807, |
| "ce_loss_17": 3.176465618610382, |
| "ce_loss_2": 4.548882579803466, |
| "ce_loss_4": 4.25100302696228, |
| "ce_loss_9": 3.607457995414734, |
| "epoch": 0.295, |
| "grad_norm": 1064.0, |
| "kl_loss_13": 284.5409103393555, |
| "kl_loss_2": 2828.6580322265627, |
| "kl_loss_4": 2267.947644042969, |
| "kl_loss_9": 987.2343078613281, |
| "learning_rate": 0.0008090794931103026, |
| "loss": 1578.4329, |
| "step": 2950 |
| }, |
| { |
| "ce_loss_13": 3.2742568969726564, |
| "ce_loss_17": 3.1607110261917115, |
| "ce_loss_2": 4.524548196792603, |
| "ce_loss_4": 4.213069832324981, |
| "ce_loss_9": 3.581976366043091, |
| "epoch": 0.296, |
| "grad_norm": 948.0, |
| "kl_loss_13": 271.086083984375, |
| "kl_loss_2": 2785.2946044921873, |
| "kl_loss_4": 2207.580285644531, |
| "kl_loss_9": 964.4064788818359, |
| "learning_rate": 0.0008078307376628291, |
| "loss": 1571.8193, |
| "step": 2960 |
| }, |
| { |
| "ce_loss_13": 3.3346418857574465, |
| "ce_loss_17": 3.2218273282051086, |
| "ce_loss_2": 4.530858635902405, |
| "ce_loss_4": 4.236799001693726, |
| "ce_loss_9": 3.6293553471565247, |
| "epoch": 0.297, |
| "grad_norm": 964.0, |
| "kl_loss_13": 264.12412490844724, |
| "kl_loss_2": 2687.8562255859374, |
| "kl_loss_4": 2135.786096191406, |
| "kl_loss_9": 928.7370697021485, |
| "learning_rate": 0.000806578882363245, |
| "loss": 1519.5714, |
| "step": 2970 |
| }, |
| { |
| "ce_loss_13": 3.2452542066574095, |
| "ce_loss_17": 3.136290204524994, |
| "ce_loss_2": 4.491577506065369, |
| "ce_loss_4": 4.184594225883484, |
| "ce_loss_9": 3.5495835065841677, |
| "epoch": 0.298, |
| "grad_norm": 1088.0, |
| "kl_loss_13": 271.81456298828124, |
| "kl_loss_2": 2800.4161865234373, |
| "kl_loss_4": 2217.1242553710936, |
| "kl_loss_9": 958.9943176269531, |
| "learning_rate": 0.0008053239398177191, |
| "loss": 1599.2152, |
| "step": 2980 |
| }, |
| { |
| "ce_loss_13": 3.244611394405365, |
| "ce_loss_17": 3.127071964740753, |
| "ce_loss_2": 4.508699440956116, |
| "ce_loss_4": 4.199268484115601, |
| "ce_loss_9": 3.5492395162582397, |
| "epoch": 0.299, |
| "grad_norm": 1096.0, |
| "kl_loss_13": 279.28087387084963, |
| "kl_loss_2": 2833.916857910156, |
| "kl_loss_4": 2252.944128417969, |
| "kl_loss_9": 959.1758026123047, |
| "learning_rate": 0.0008040659226635089, |
| "loss": 1612.3607, |
| "step": 2990 |
| }, |
| { |
| "ce_loss_13": 3.3649550795555117, |
| "ce_loss_17": 3.2388473987579345, |
| "ce_loss_2": 4.596168828010559, |
| "ce_loss_4": 4.2983060598373415, |
| "ce_loss_9": 3.673971450328827, |
| "epoch": 0.3, |
| "grad_norm": 968.0, |
| "kl_loss_13": 289.9134162902832, |
| "kl_loss_2": 2798.9067504882814, |
| "kl_loss_4": 2235.1320678710936, |
| "kl_loss_9": 978.6536499023438, |
| "learning_rate": 0.0008028048435688333, |
| "loss": 1570.527, |
| "step": 3000 |
| }, |
| { |
| "ce_loss_13": 3.2460662722587585, |
| "ce_loss_17": 3.1257930517196657, |
| "ce_loss_2": 4.518106842041016, |
| "ce_loss_4": 4.211145675182342, |
| "ce_loss_9": 3.5623016119003297, |
| "epoch": 0.301, |
| "grad_norm": 1024.0, |
| "kl_loss_13": 277.65109024047854, |
| "kl_loss_2": 2873.9763305664064, |
| "kl_loss_4": 2299.245983886719, |
| "kl_loss_9": 983.6670318603516, |
| "learning_rate": 0.0008015407152327448, |
| "loss": 1595.9152, |
| "step": 3010 |
| }, |
| { |
| "ce_loss_13": 3.2861130356788637, |
| "ce_loss_17": 3.1670594811439514, |
| "ce_loss_2": 4.54851884841919, |
| "ce_loss_4": 4.239895117282868, |
| "ce_loss_9": 3.5888345718383787, |
| "epoch": 0.302, |
| "grad_norm": 908.0, |
| "kl_loss_13": 281.9727096557617, |
| "kl_loss_2": 2871.5486450195312, |
| "kl_loss_4": 2285.6641235351562, |
| "kl_loss_9": 977.5694122314453, |
| "learning_rate": 0.0008002735503850016, |
| "loss": 1598.5455, |
| "step": 3020 |
| }, |
| { |
| "ce_loss_13": 3.181409013271332, |
| "ce_loss_17": 3.0593944787979126, |
| "ce_loss_2": 4.484816193580627, |
| "ce_loss_4": 4.177158224582672, |
| "ce_loss_9": 3.4981321930885314, |
| "epoch": 0.303, |
| "grad_norm": 988.0, |
| "kl_loss_13": 282.094172668457, |
| "kl_loss_2": 2910.0009521484376, |
| "kl_loss_4": 2335.713757324219, |
| "kl_loss_9": 991.5478698730469, |
| "learning_rate": 0.0007990033617859396, |
| "loss": 1617.6652, |
| "step": 3030 |
| }, |
| { |
| "ce_loss_13": 3.229492974281311, |
| "ce_loss_17": 3.1118812561035156, |
| "ce_loss_2": 4.483743333816529, |
| "ce_loss_4": 4.177499234676361, |
| "ce_loss_9": 3.5371679186820986, |
| "epoch": 0.304, |
| "grad_norm": 836.0, |
| "kl_loss_13": 276.66433944702146, |
| "kl_loss_2": 2812.9922729492187, |
| "kl_loss_4": 2236.2713317871094, |
| "kl_loss_9": 959.1571258544922, |
| "learning_rate": 0.000797730162226344, |
| "loss": 1538.4656, |
| "step": 3040 |
| }, |
| { |
| "ce_loss_13": 3.2546813011169435, |
| "ce_loss_17": 3.134936511516571, |
| "ce_loss_2": 4.504783082008362, |
| "ce_loss_4": 4.206938338279724, |
| "ce_loss_9": 3.562973344326019, |
| "epoch": 0.305, |
| "grad_norm": 968.0, |
| "kl_loss_13": 281.3242317199707, |
| "kl_loss_2": 2818.2148559570314, |
| "kl_loss_4": 2258.343853759766, |
| "kl_loss_9": 970.253305053711, |
| "learning_rate": 0.0007964539645273203, |
| "loss": 1561.8303, |
| "step": 3050 |
| }, |
| { |
| "ce_loss_13": 3.2663259387016295, |
| "ce_loss_17": 3.15209755897522, |
| "ce_loss_2": 4.493727421760559, |
| "ce_loss_4": 4.195807886123657, |
| "ce_loss_9": 3.5595535755157472, |
| "epoch": 0.306, |
| "grad_norm": 912.0, |
| "kl_loss_13": 272.078955078125, |
| "kl_loss_2": 2752.707421875, |
| "kl_loss_4": 2194.953546142578, |
| "kl_loss_9": 939.0423736572266, |
| "learning_rate": 0.000795174781540165, |
| "loss": 1563.6326, |
| "step": 3060 |
| }, |
| { |
| "ce_loss_13": 3.339310586452484, |
| "ce_loss_17": 3.225066292285919, |
| "ce_loss_2": 4.538775062561035, |
| "ce_loss_4": 4.249652981758118, |
| "ce_loss_9": 3.6289660573005675, |
| "epoch": 0.307, |
| "grad_norm": 836.0, |
| "kl_loss_13": 267.2957252502441, |
| "kl_loss_2": 2684.1191650390624, |
| "kl_loss_4": 2149.5090454101564, |
| "kl_loss_9": 922.7618530273437, |
| "learning_rate": 0.0007938926261462366, |
| "loss": 1554.1631, |
| "step": 3070 |
| }, |
| { |
| "ce_loss_13": 3.2909810066223146, |
| "ce_loss_17": 3.1758777379989622, |
| "ce_loss_2": 4.5030752658844, |
| "ce_loss_4": 4.199388837814331, |
| "ce_loss_9": 3.5742830514907835, |
| "epoch": 0.308, |
| "grad_norm": 1200.0, |
| "kl_loss_13": 273.68957138061523, |
| "kl_loss_2": 2764.279248046875, |
| "kl_loss_4": 2193.534539794922, |
| "kl_loss_9": 941.3706359863281, |
| "learning_rate": 0.0007926075112568258, |
| "loss": 1575.6994, |
| "step": 3080 |
| }, |
| { |
| "ce_loss_13": 3.277854061126709, |
| "ce_loss_17": 3.161978805065155, |
| "ce_loss_2": 4.5184684753417965, |
| "ce_loss_4": 4.2138096451759335, |
| "ce_loss_9": 3.583821475505829, |
| "epoch": 0.309, |
| "grad_norm": 876.0, |
| "kl_loss_13": 273.8983093261719, |
| "kl_loss_2": 2793.4811157226563, |
| "kl_loss_4": 2222.3793823242186, |
| "kl_loss_9": 963.9919616699219, |
| "learning_rate": 0.0007913194498130252, |
| "loss": 1541.167, |
| "step": 3090 |
| }, |
| { |
| "ce_loss_13": 3.214554417133331, |
| "ce_loss_17": 3.098016345500946, |
| "ce_loss_2": 4.478818607330322, |
| "ce_loss_4": 4.17561844587326, |
| "ce_loss_9": 3.5237682104110717, |
| "epoch": 0.31, |
| "grad_norm": 864.0, |
| "kl_loss_13": 275.41066970825193, |
| "kl_loss_2": 2820.3305419921876, |
| "kl_loss_4": 2248.0314208984373, |
| "kl_loss_9": 963.2971008300781, |
| "learning_rate": 0.0007900284547855992, |
| "loss": 1581.9468, |
| "step": 3100 |
| }, |
| { |
| "ce_loss_13": 3.228273904323578, |
| "ce_loss_17": 3.1136075258255005, |
| "ce_loss_2": 4.459900450706482, |
| "ce_loss_4": 4.14665732383728, |
| "ce_loss_9": 3.5251725792884825, |
| "epoch": 0.311, |
| "grad_norm": 1136.0, |
| "kl_loss_13": 272.3390228271484, |
| "kl_loss_2": 2787.1204345703127, |
| "kl_loss_4": 2205.8927307128906, |
| "kl_loss_9": 961.7925262451172, |
| "learning_rate": 0.0007887345391748532, |
| "loss": 1581.0115, |
| "step": 3110 |
| }, |
| { |
| "ce_loss_13": 3.3402189135551454, |
| "ce_loss_17": 3.2290727615356447, |
| "ce_loss_2": 4.537902426719666, |
| "ce_loss_4": 4.235116374492645, |
| "ce_loss_9": 3.6369069814682007, |
| "epoch": 0.312, |
| "grad_norm": 1168.0, |
| "kl_loss_13": 266.8268844604492, |
| "kl_loss_2": 2710.648645019531, |
| "kl_loss_4": 2147.968615722656, |
| "kl_loss_9": 936.2000610351563, |
| "learning_rate": 0.0007874377160105036, |
| "loss": 1513.0617, |
| "step": 3120 |
| }, |
| { |
| "ce_loss_13": 3.248319673538208, |
| "ce_loss_17": 3.1336915850639344, |
| "ce_loss_2": 4.516077709197998, |
| "ce_loss_4": 4.201394057273864, |
| "ce_loss_9": 3.5536004066467286, |
| "epoch": 0.313, |
| "grad_norm": 856.0, |
| "kl_loss_13": 266.1555046081543, |
| "kl_loss_2": 2859.893896484375, |
| "kl_loss_4": 2272.0468322753904, |
| "kl_loss_9": 973.1194732666015, |
| "learning_rate": 0.0007861379983515449, |
| "loss": 1620.0061, |
| "step": 3130 |
| }, |
| { |
| "ce_loss_13": 3.322508680820465, |
| "ce_loss_17": 3.2080819725990297, |
| "ce_loss_2": 4.547446084022522, |
| "ce_loss_4": 4.245393395423889, |
| "ce_loss_9": 3.6283490657806396, |
| "epoch": 0.314, |
| "grad_norm": 1152.0, |
| "kl_loss_13": 269.5914726257324, |
| "kl_loss_2": 2774.368518066406, |
| "kl_loss_4": 2209.1611206054686, |
| "kl_loss_9": 968.7327423095703, |
| "learning_rate": 0.0007848353992861195, |
| "loss": 1544.9365, |
| "step": 3140 |
| }, |
| { |
| "ce_loss_13": 3.403998100757599, |
| "ce_loss_17": 3.275753974914551, |
| "ce_loss_2": 4.631322360038757, |
| "ce_loss_4": 4.333673286437988, |
| "ce_loss_9": 3.7198479652404783, |
| "epoch": 0.315, |
| "grad_norm": 1020.0, |
| "kl_loss_13": 295.059912109375, |
| "kl_loss_2": 2784.7739868164062, |
| "kl_loss_4": 2226.2811950683595, |
| "kl_loss_9": 993.2255096435547, |
| "learning_rate": 0.0007835299319313853, |
| "loss": 1582.4869, |
| "step": 3150 |
| }, |
| { |
| "ce_loss_13": 3.2930777072906494, |
| "ce_loss_17": 3.1757339596748353, |
| "ce_loss_2": 4.497713923454285, |
| "ce_loss_4": 4.199179148674011, |
| "ce_loss_9": 3.5781076669692995, |
| "epoch": 0.316, |
| "grad_norm": 1012.0, |
| "kl_loss_13": 283.0929374694824, |
| "kl_loss_2": 2733.6542846679686, |
| "kl_loss_4": 2185.5362548828125, |
| "kl_loss_9": 944.363330078125, |
| "learning_rate": 0.0007822216094333848, |
| "loss": 1590.9141, |
| "step": 3160 |
| }, |
| { |
| "ce_loss_13": 3.2975282311439513, |
| "ce_loss_17": 3.17965430021286, |
| "ce_loss_2": 4.546630835533142, |
| "ce_loss_4": 4.247738254070282, |
| "ce_loss_9": 3.6045960426330566, |
| "epoch": 0.317, |
| "grad_norm": 1024.0, |
| "kl_loss_13": 284.6186943054199, |
| "kl_loss_2": 2813.29599609375, |
| "kl_loss_4": 2250.4382263183593, |
| "kl_loss_9": 966.4887451171875, |
| "learning_rate": 0.0007809104449669101, |
| "loss": 1563.5169, |
| "step": 3170 |
| }, |
| { |
| "ce_loss_13": 3.2469945788383483, |
| "ce_loss_17": 3.1276997923851013, |
| "ce_loss_2": 4.471801328659057, |
| "ce_loss_4": 4.16688768863678, |
| "ce_loss_9": 3.539893078804016, |
| "epoch": 0.318, |
| "grad_norm": 888.0, |
| "kl_loss_13": 280.2876937866211, |
| "kl_loss_2": 2752.5779052734374, |
| "kl_loss_4": 2191.1085693359373, |
| "kl_loss_9": 942.042269897461, |
| "learning_rate": 0.0007795964517353734, |
| "loss": 1540.8963, |
| "step": 3180 |
| }, |
| { |
| "ce_loss_13": 3.244401454925537, |
| "ce_loss_17": 3.1286351084709167, |
| "ce_loss_2": 4.496015191078186, |
| "ce_loss_4": 4.1932557106018065, |
| "ce_loss_9": 3.54423850774765, |
| "epoch": 0.319, |
| "grad_norm": 880.0, |
| "kl_loss_13": 287.23448791503904, |
| "kl_loss_2": 2838.22119140625, |
| "kl_loss_4": 2263.0765625, |
| "kl_loss_9": 964.8008636474609, |
| "learning_rate": 0.000778279642970672, |
| "loss": 1541.8548, |
| "step": 3190 |
| }, |
| { |
| "ce_loss_13": 3.2421715021133424, |
| "ce_loss_17": 3.1298956513404845, |
| "ce_loss_2": 4.4572087049484255, |
| "ce_loss_4": 4.168467354774475, |
| "ce_loss_9": 3.5430691599845887, |
| "epoch": 0.32, |
| "grad_norm": 904.0, |
| "kl_loss_13": 276.8183708190918, |
| "kl_loss_2": 2749.2382690429686, |
| "kl_loss_4": 2203.2156677246094, |
| "kl_loss_9": 946.2814147949218, |
| "learning_rate": 0.0007769600319330552, |
| "loss": 1527.5336, |
| "step": 3200 |
| }, |
| { |
| "ce_loss_13": 3.274927353858948, |
| "ce_loss_17": 3.158747744560242, |
| "ce_loss_2": 4.556273770332337, |
| "ce_loss_4": 4.2537779092788695, |
| "ce_loss_9": 3.5850720047950744, |
| "epoch": 0.321, |
| "grad_norm": 1152.0, |
| "kl_loss_13": 275.17505264282227, |
| "kl_loss_2": 2865.657861328125, |
| "kl_loss_4": 2286.8991882324217, |
| "kl_loss_9": 964.7405670166015, |
| "learning_rate": 0.0007756376319109917, |
| "loss": 1568.4346, |
| "step": 3210 |
| }, |
| { |
| "ce_loss_13": 3.319024181365967, |
| "ce_loss_17": 3.208410918712616, |
| "ce_loss_2": 4.538045167922974, |
| "ce_loss_4": 4.225291419029236, |
| "ce_loss_9": 3.61713593006134, |
| "epoch": 0.322, |
| "grad_norm": 916.0, |
| "kl_loss_13": 270.0167869567871, |
| "kl_loss_2": 2743.7939331054686, |
| "kl_loss_4": 2167.060235595703, |
| "kl_loss_9": 946.6562072753907, |
| "learning_rate": 0.0007743124562210351, |
| "loss": 1510.9901, |
| "step": 3220 |
| }, |
| { |
| "ce_loss_13": 3.330537438392639, |
| "ce_loss_17": 3.2180320024490356, |
| "ce_loss_2": 4.5244622230529785, |
| "ce_loss_4": 4.23722620010376, |
| "ce_loss_9": 3.623570477962494, |
| "epoch": 0.323, |
| "grad_norm": 1344.0, |
| "kl_loss_13": 269.67826690673826, |
| "kl_loss_2": 2720.2651611328124, |
| "kl_loss_4": 2174.1652648925783, |
| "kl_loss_9": 937.5300079345703, |
| "learning_rate": 0.0007729845182076895, |
| "loss": 1543.9738, |
| "step": 3230 |
| }, |
| { |
| "ce_loss_13": 3.257945251464844, |
| "ce_loss_17": 3.149770975112915, |
| "ce_loss_2": 4.469063019752502, |
| "ce_loss_4": 4.165922224521637, |
| "ce_loss_9": 3.550974118709564, |
| "epoch": 0.324, |
| "grad_norm": 1048.0, |
| "kl_loss_13": 266.3274787902832, |
| "kl_loss_2": 2731.5584106445312, |
| "kl_loss_4": 2161.984240722656, |
| "kl_loss_9": 937.9604461669921, |
| "learning_rate": 0.0007716538312432765, |
| "loss": 1568.9255, |
| "step": 3240 |
| }, |
| { |
| "ce_loss_13": 3.2215582966804504, |
| "ce_loss_17": 3.1058525323867796, |
| "ce_loss_2": 4.480478024482727, |
| "ce_loss_4": 4.17129145860672, |
| "ce_loss_9": 3.5299358487129213, |
| "epoch": 0.325, |
| "grad_norm": 920.0, |
| "kl_loss_13": 276.2053161621094, |
| "kl_loss_2": 2815.1505493164063, |
| "kl_loss_4": 2242.41376953125, |
| "kl_loss_9": 974.6908813476563, |
| "learning_rate": 0.0007703204087277988, |
| "loss": 1569.5339, |
| "step": 3250 |
| }, |
| { |
| "ce_loss_13": 3.3126750588417053, |
| "ce_loss_17": 3.2068999886512755, |
| "ce_loss_2": 4.502849841117859, |
| "ce_loss_4": 4.202327466011047, |
| "ce_loss_9": 3.6018685936927795, |
| "epoch": 0.326, |
| "grad_norm": 884.0, |
| "kl_loss_13": 260.0581558227539, |
| "kl_loss_2": 2668.5241455078126, |
| "kl_loss_4": 2117.4724731445312, |
| "kl_loss_9": 910.7038879394531, |
| "learning_rate": 0.0007689842640888063, |
| "loss": 1507.614, |
| "step": 3260 |
| }, |
| { |
| "ce_loss_13": 3.3084128737449645, |
| "ce_loss_17": 3.2014007449150084, |
| "ce_loss_2": 4.517469263076782, |
| "ce_loss_4": 4.218803930282593, |
| "ce_loss_9": 3.6058817982673643, |
| "epoch": 0.327, |
| "grad_norm": 884.0, |
| "kl_loss_13": 266.42607803344725, |
| "kl_loss_2": 2704.573828125, |
| "kl_loss_4": 2148.391497802734, |
| "kl_loss_9": 931.1391815185547, |
| "learning_rate": 0.0007676454107812607, |
| "loss": 1527.4816, |
| "step": 3270 |
| }, |
| { |
| "ce_loss_13": 3.2546404242515563, |
| "ce_loss_17": 3.142548108100891, |
| "ce_loss_2": 4.501965403556824, |
| "ce_loss_4": 4.198862934112549, |
| "ce_loss_9": 3.557183790206909, |
| "epoch": 0.328, |
| "grad_norm": 1016.0, |
| "kl_loss_13": 272.0782051086426, |
| "kl_loss_2": 2814.5278442382814, |
| "kl_loss_4": 2245.3919921875, |
| "kl_loss_9": 958.6899810791016, |
| "learning_rate": 0.0007663038622873999, |
| "loss": 1541.0045, |
| "step": 3280 |
| }, |
| { |
| "ce_loss_13": 3.2893999218940735, |
| "ce_loss_17": 3.18322206735611, |
| "ce_loss_2": 4.519177496433258, |
| "ce_loss_4": 4.221520841121674, |
| "ce_loss_9": 3.5871684432029722, |
| "epoch": 0.329, |
| "grad_norm": 872.0, |
| "kl_loss_13": 268.89587631225584, |
| "kl_loss_2": 2763.5900146484373, |
| "kl_loss_4": 2204.918310546875, |
| "kl_loss_9": 938.5980438232422, |
| "learning_rate": 0.0007649596321166025, |
| "loss": 1519.9422, |
| "step": 3290 |
| }, |
| { |
| "ce_loss_13": 3.202285659313202, |
| "ce_loss_17": 3.092141497135162, |
| "ce_loss_2": 4.403944611549377, |
| "ce_loss_4": 4.105515563488007, |
| "ce_loss_9": 3.4965336084365846, |
| "epoch": 0.33, |
| "grad_norm": 980.0, |
| "kl_loss_13": 259.88773040771486, |
| "kl_loss_2": 2684.8335571289062, |
| "kl_loss_4": 2128.1696655273436, |
| "kl_loss_9": 921.4997375488281, |
| "learning_rate": 0.0007636127338052513, |
| "loss": 1528.184, |
| "step": 3300 |
| }, |
| { |
| "ce_loss_13": 3.301859939098358, |
| "ce_loss_17": 3.186275064945221, |
| "ce_loss_2": 4.545389008522034, |
| "ce_loss_4": 4.254642629623413, |
| "ce_loss_9": 3.6036364316940306, |
| "epoch": 0.331, |
| "grad_norm": 744.0, |
| "kl_loss_13": 270.93817977905275, |
| "kl_loss_2": 2812.50205078125, |
| "kl_loss_4": 2264.8998779296876, |
| "kl_loss_9": 953.3371337890625, |
| "learning_rate": 0.0007622631809165971, |
| "loss": 1539.0198, |
| "step": 3310 |
| }, |
| { |
| "ce_loss_13": 3.283806037902832, |
| "ce_loss_17": 3.184336471557617, |
| "ce_loss_2": 4.464780688285828, |
| "ce_loss_4": 4.176603603363037, |
| "ce_loss_9": 3.56682745218277, |
| "epoch": 0.332, |
| "grad_norm": 880.0, |
| "kl_loss_13": 252.60040588378905, |
| "kl_loss_2": 2617.5267333984375, |
| "kl_loss_4": 2076.6250610351562, |
| "kl_loss_9": 889.7474029541015, |
| "learning_rate": 0.000760910987040623, |
| "loss": 1500.3265, |
| "step": 3320 |
| }, |
| { |
| "ce_loss_13": 3.278459334373474, |
| "ce_loss_17": 3.1641597628593443, |
| "ce_loss_2": 4.531191396713257, |
| "ce_loss_4": 4.23327329158783, |
| "ce_loss_9": 3.5926972150802614, |
| "epoch": 0.333, |
| "grad_norm": 956.0, |
| "kl_loss_13": 275.97788391113284, |
| "kl_loss_2": 2824.361083984375, |
| "kl_loss_4": 2263.9645751953126, |
| "kl_loss_9": 971.9883453369141, |
| "learning_rate": 0.000759556165793906, |
| "loss": 1541.1529, |
| "step": 3330 |
| }, |
| { |
| "ce_loss_13": 3.2896785616874693, |
| "ce_loss_17": 3.1791512727737428, |
| "ce_loss_2": 4.521309220790863, |
| "ce_loss_4": 4.223354470729828, |
| "ce_loss_9": 3.589315724372864, |
| "epoch": 0.334, |
| "grad_norm": 912.0, |
| "kl_loss_13": 266.7881057739258, |
| "kl_loss_2": 2766.123254394531, |
| "kl_loss_4": 2197.516650390625, |
| "kl_loss_9": 950.2745239257813, |
| "learning_rate": 0.000758198730819481, |
| "loss": 1556.2384, |
| "step": 3340 |
| }, |
| { |
| "ce_loss_13": 3.2492231965065, |
| "ce_loss_17": 3.1451782107353212, |
| "ce_loss_2": 4.481045436859131, |
| "ce_loss_4": 4.179851341247558, |
| "ce_loss_9": 3.542218101024628, |
| "epoch": 0.335, |
| "grad_norm": 872.0, |
| "kl_loss_13": 262.3326354980469, |
| "kl_loss_2": 2763.7760620117188, |
| "kl_loss_4": 2202.8555114746096, |
| "kl_loss_9": 928.0163726806641, |
| "learning_rate": 0.0007568386957867032, |
| "loss": 1542.0523, |
| "step": 3350 |
| }, |
| { |
| "ce_loss_13": 3.31020872592926, |
| "ce_loss_17": 3.195552670955658, |
| "ce_loss_2": 4.525788259506226, |
| "ce_loss_4": 4.222182834148407, |
| "ce_loss_9": 3.604987645149231, |
| "epoch": 0.336, |
| "grad_norm": 876.0, |
| "kl_loss_13": 265.8648292541504, |
| "kl_loss_2": 2735.0986572265624, |
| "kl_loss_4": 2168.284149169922, |
| "kl_loss_9": 934.0886199951171, |
| "learning_rate": 0.0007554760743911103, |
| "loss": 1549.6854, |
| "step": 3360 |
| }, |
| { |
| "ce_loss_13": 3.223640334606171, |
| "ce_loss_17": 3.1187192797660828, |
| "ce_loss_2": 4.438878381252289, |
| "ce_loss_4": 4.1361319661140445, |
| "ce_loss_9": 3.513273775577545, |
| "epoch": 0.337, |
| "grad_norm": 1344.0, |
| "kl_loss_13": 258.05127182006834, |
| "kl_loss_2": 2752.9601318359373, |
| "kl_loss_4": 2178.503546142578, |
| "kl_loss_9": 916.3459869384766, |
| "learning_rate": 0.0007541108803542846, |
| "loss": 1567.4688, |
| "step": 3370 |
| }, |
| { |
| "ce_loss_13": 3.2685051441192625, |
| "ce_loss_17": 3.161276388168335, |
| "ce_loss_2": 4.486205792427063, |
| "ce_loss_4": 4.190169501304626, |
| "ce_loss_9": 3.55316504240036, |
| "epoch": 0.338, |
| "grad_norm": 960.0, |
| "kl_loss_13": 263.7984809875488, |
| "kl_loss_2": 2770.8141967773436, |
| "kl_loss_4": 2203.708447265625, |
| "kl_loss_9": 931.730923461914, |
| "learning_rate": 0.0007527431274237149, |
| "loss": 1611.293, |
| "step": 3380 |
| }, |
| { |
| "ce_loss_13": 3.2437796354293824, |
| "ce_loss_17": 3.137548494338989, |
| "ce_loss_2": 4.456655740737915, |
| "ce_loss_4": 4.159723901748658, |
| "ce_loss_9": 3.5290903568267824, |
| "epoch": 0.339, |
| "grad_norm": 1024.0, |
| "kl_loss_13": 260.51111755371096, |
| "kl_loss_2": 2735.7857421875, |
| "kl_loss_4": 2180.468231201172, |
| "kl_loss_9": 917.7699462890625, |
| "learning_rate": 0.0007513728293726579, |
| "loss": 1537.3131, |
| "step": 3390 |
| }, |
| { |
| "ce_loss_13": 3.3449551939964293, |
| "ce_loss_17": 3.2365132570266724, |
| "ce_loss_2": 4.543925142288208, |
| "ce_loss_4": 4.249056077003479, |
| "ce_loss_9": 3.6360458612442015, |
| "epoch": 0.34, |
| "grad_norm": 1088.0, |
| "kl_loss_13": 263.0923706054688, |
| "kl_loss_2": 2708.170166015625, |
| "kl_loss_4": 2152.1197021484377, |
| "kl_loss_9": 920.2247039794922, |
| "learning_rate": 0.00075, |
| "loss": 1508.5383, |
| "step": 3400 |
| }, |
| { |
| "ce_loss_13": 3.341777038574219, |
| "ce_loss_17": 3.2307618618011475, |
| "ce_loss_2": 4.569138479232788, |
| "ce_loss_4": 4.269686961174012, |
| "ce_loss_9": 3.6399669528007506, |
| "epoch": 0.341, |
| "grad_norm": 980.0, |
| "kl_loss_13": 269.5948371887207, |
| "kl_loss_2": 2770.316857910156, |
| "kl_loss_4": 2201.8653564453125, |
| "kl_loss_9": 951.3301879882813, |
| "learning_rate": 0.0007486246531301177, |
| "loss": 1528.5927, |
| "step": 3410 |
| }, |
| { |
| "ce_loss_13": 3.156513547897339, |
| "ce_loss_17": 3.048259365558624, |
| "ce_loss_2": 4.38749053478241, |
| "ce_loss_4": 4.080587613582611, |
| "ce_loss_9": 3.4617828369140624, |
| "epoch": 0.342, |
| "grad_norm": 852.0, |
| "kl_loss_13": 265.1149276733398, |
| "kl_loss_2": 2752.9420776367188, |
| "kl_loss_4": 2178.886193847656, |
| "kl_loss_9": 933.7513427734375, |
| "learning_rate": 0.0007472468026127384, |
| "loss": 1513.4949, |
| "step": 3420 |
| }, |
| { |
| "ce_loss_13": 3.2966465592384337, |
| "ce_loss_17": 3.178287982940674, |
| "ce_loss_2": 4.55316390991211, |
| "ce_loss_4": 4.249738669395446, |
| "ce_loss_9": 3.60037442445755, |
| "epoch": 0.343, |
| "grad_norm": 884.0, |
| "kl_loss_13": 279.35314483642577, |
| "kl_loss_2": 2852.934216308594, |
| "kl_loss_4": 2277.991345214844, |
| "kl_loss_9": 970.7464904785156, |
| "learning_rate": 0.000745866462322802, |
| "loss": 1573.7838, |
| "step": 3430 |
| }, |
| { |
| "ce_loss_13": 3.271784245967865, |
| "ce_loss_17": 3.1670589089393615, |
| "ce_loss_2": 4.473905694484711, |
| "ce_loss_4": 4.182957363128662, |
| "ce_loss_9": 3.5616883397102357, |
| "epoch": 0.344, |
| "grad_norm": 856.0, |
| "kl_loss_13": 257.2951263427734, |
| "kl_loss_2": 2678.4755859375, |
| "kl_loss_4": 2136.723114013672, |
| "kl_loss_9": 906.0756713867188, |
| "learning_rate": 0.0007444836461603195, |
| "loss": 1510.2672, |
| "step": 3440 |
| }, |
| { |
| "ce_loss_13": 3.338606262207031, |
| "ce_loss_17": 3.2255253911018373, |
| "ce_loss_2": 4.555480146408081, |
| "ce_loss_4": 4.2601256489753725, |
| "ce_loss_9": 3.6350815892219543, |
| "epoch": 0.345, |
| "grad_norm": 924.0, |
| "kl_loss_13": 275.8904197692871, |
| "kl_loss_2": 2773.336462402344, |
| "kl_loss_4": 2220.651788330078, |
| "kl_loss_9": 956.8078552246094, |
| "learning_rate": 0.0007430983680502344, |
| "loss": 1561.4796, |
| "step": 3450 |
| }, |
| { |
| "ce_loss_13": 3.184423577785492, |
| "ce_loss_17": 3.0734758734703065, |
| "ce_loss_2": 4.436075353622437, |
| "ce_loss_4": 4.125931084156036, |
| "ce_loss_9": 3.481435251235962, |
| "epoch": 0.346, |
| "grad_norm": 1056.0, |
| "kl_loss_13": 267.9133583068848, |
| "kl_loss_2": 2809.0328125, |
| "kl_loss_4": 2233.378558349609, |
| "kl_loss_9": 944.1458312988282, |
| "learning_rate": 0.0007417106419422819, |
| "loss": 1552.5776, |
| "step": 3460 |
| }, |
| { |
| "ce_loss_13": 3.2850061774253847, |
| "ce_loss_17": 3.1702851176261904, |
| "ce_loss_2": 4.502034020423889, |
| "ce_loss_4": 4.195082807540894, |
| "ce_loss_9": 3.571630120277405, |
| "epoch": 0.347, |
| "grad_norm": 800.0, |
| "kl_loss_13": 264.5592643737793, |
| "kl_loss_2": 2725.1367309570314, |
| "kl_loss_4": 2150.877081298828, |
| "kl_loss_9": 918.9856872558594, |
| "learning_rate": 0.0007403204818108486, |
| "loss": 1539.9608, |
| "step": 3470 |
| }, |
| { |
| "ce_loss_13": 3.266357696056366, |
| "ce_loss_17": 3.1550687670707704, |
| "ce_loss_2": 4.493543338775635, |
| "ce_loss_4": 4.1920408487319945, |
| "ce_loss_9": 3.55805698633194, |
| "epoch": 0.348, |
| "grad_norm": 988.0, |
| "kl_loss_13": 266.9770141601563, |
| "kl_loss_2": 2789.0259765625, |
| "kl_loss_4": 2222.89580078125, |
| "kl_loss_9": 934.7239471435547, |
| "learning_rate": 0.0007389279016548316, |
| "loss": 1509.0587, |
| "step": 3480 |
| }, |
| { |
| "ce_loss_13": 3.2662723660469055, |
| "ce_loss_17": 3.1496416687965394, |
| "ce_loss_2": 4.549608850479126, |
| "ce_loss_4": 4.2360859394073485, |
| "ce_loss_9": 3.574393153190613, |
| "epoch": 0.349, |
| "grad_norm": 940.0, |
| "kl_loss_13": 275.62647476196287, |
| "kl_loss_2": 2882.6294555664062, |
| "kl_loss_4": 2289.571746826172, |
| "kl_loss_9": 964.0973388671875, |
| "learning_rate": 0.0007375329154974975, |
| "loss": 1576.9346, |
| "step": 3490 |
| }, |
| { |
| "ce_loss_13": 3.229642868041992, |
| "ce_loss_17": 3.1246652483940123, |
| "ce_loss_2": 4.436204671859741, |
| "ce_loss_4": 4.130906963348389, |
| "ce_loss_9": 3.5120436668396, |
| "epoch": 0.35, |
| "grad_norm": 920.0, |
| "kl_loss_13": 259.2286506652832, |
| "kl_loss_2": 2714.7550537109373, |
| "kl_loss_4": 2148.7419860839846, |
| "kl_loss_9": 916.3951171875, |
| "learning_rate": 0.0007361355373863414, |
| "loss": 1552.3697, |
| "step": 3500 |
| }, |
| { |
| "ce_loss_13": 3.2729630351066588, |
| "ce_loss_17": 3.1631830811500548, |
| "ce_loss_2": 4.4772727489471436, |
| "ce_loss_4": 4.183874452114106, |
| "ce_loss_9": 3.5575453877449035, |
| "epoch": 0.351, |
| "grad_norm": 1008.0, |
| "kl_loss_13": 260.13666305541994, |
| "kl_loss_2": 2700.1645751953124, |
| "kl_loss_4": 2145.1249877929686, |
| "kl_loss_9": 909.9921813964844, |
| "learning_rate": 0.0007347357813929454, |
| "loss": 1549.0204, |
| "step": 3510 |
| }, |
| { |
| "ce_loss_13": 3.226134145259857, |
| "ce_loss_17": 3.1180890083312987, |
| "ce_loss_2": 4.426052618026733, |
| "ce_loss_4": 4.129113936424256, |
| "ce_loss_9": 3.505542826652527, |
| "epoch": 0.352, |
| "grad_norm": 980.0, |
| "kl_loss_13": 259.01134185791017, |
| "kl_loss_2": 2689.3010986328127, |
| "kl_loss_4": 2140.092169189453, |
| "kl_loss_9": 902.5338195800781, |
| "learning_rate": 0.0007333336616128369, |
| "loss": 1534.1436, |
| "step": 3520 |
| }, |
| { |
| "ce_loss_13": 3.205605316162109, |
| "ce_loss_17": 3.0906078577041627, |
| "ce_loss_2": 4.4533565759658815, |
| "ce_loss_4": 4.1520047068595884, |
| "ce_loss_9": 3.506682014465332, |
| "epoch": 0.353, |
| "grad_norm": 948.0, |
| "kl_loss_13": 267.5398712158203, |
| "kl_loss_2": 2798.996301269531, |
| "kl_loss_4": 2232.368060302734, |
| "kl_loss_9": 945.4359832763672, |
| "learning_rate": 0.0007319291921653463, |
| "loss": 1552.4628, |
| "step": 3530 |
| }, |
| { |
| "ce_loss_13": 3.2874096274375915, |
| "ce_loss_17": 3.1730998396873473, |
| "ce_loss_2": 4.531050324440002, |
| "ce_loss_4": 4.220206606388092, |
| "ce_loss_9": 3.5879290223121645, |
| "epoch": 0.354, |
| "grad_norm": 868.0, |
| "kl_loss_13": 271.1177352905273, |
| "kl_loss_2": 2793.8673706054688, |
| "kl_loss_4": 2204.772351074219, |
| "kl_loss_9": 942.2139038085937, |
| "learning_rate": 0.0007305223871934656, |
| "loss": 1525.7721, |
| "step": 3540 |
| }, |
| { |
| "ce_loss_13": 3.2504573464393616, |
| "ce_loss_17": 3.1372912883758546, |
| "ce_loss_2": 4.477088403701782, |
| "ce_loss_4": 4.163264679908752, |
| "ce_loss_9": 3.540087676048279, |
| "epoch": 0.355, |
| "grad_norm": 780.0, |
| "kl_loss_13": 265.2740966796875, |
| "kl_loss_2": 2747.8240478515627, |
| "kl_loss_4": 2166.4047241210938, |
| "kl_loss_9": 922.7394775390625, |
| "learning_rate": 0.0007291132608637052, |
| "loss": 1529.7303, |
| "step": 3550 |
| }, |
| { |
| "ce_loss_13": 3.2179305791854858, |
| "ce_loss_17": 3.1098664879798887, |
| "ce_loss_2": 4.517221367359161, |
| "ce_loss_4": 4.217991542816162, |
| "ce_loss_9": 3.567255067825317, |
| "epoch": 0.356, |
| "grad_norm": 928.0, |
| "kl_loss_13": 260.6855499267578, |
| "kl_loss_2": 2888.7609008789063, |
| "kl_loss_4": 2332.2343322753904, |
| "kl_loss_9": 1035.0205932617187, |
| "learning_rate": 0.0007277018273659516, |
| "loss": 1603.4388, |
| "step": 3560 |
| }, |
| { |
| "ce_loss_13": 3.3349716782569887, |
| "ce_loss_17": 3.2235106110572813, |
| "ce_loss_2": 4.553770208358765, |
| "ce_loss_4": 4.26164277791977, |
| "ce_loss_9": 3.6422377705574034, |
| "epoch": 0.357, |
| "grad_norm": 944.0, |
| "kl_loss_13": 273.6425354003906, |
| "kl_loss_2": 2757.1307739257813, |
| "kl_loss_4": 2209.2499084472656, |
| "kl_loss_9": 959.8227935791016, |
| "learning_rate": 0.0007262881009133242, |
| "loss": 1541.2783, |
| "step": 3570 |
| }, |
| { |
| "ce_loss_13": 3.2567261099815368, |
| "ce_loss_17": 3.1487882494926454, |
| "ce_loss_2": 4.466839146614075, |
| "ce_loss_4": 4.167692220211029, |
| "ce_loss_9": 3.545789396762848, |
| "epoch": 0.358, |
| "grad_norm": 860.0, |
| "kl_loss_13": 257.5396339416504, |
| "kl_loss_2": 2707.481640625, |
| "kl_loss_4": 2148.8184814453125, |
| "kl_loss_9": 913.860482788086, |
| "learning_rate": 0.0007248720957420329, |
| "loss": 1503.1021, |
| "step": 3580 |
| }, |
| { |
| "ce_loss_13": 3.2594184041023255, |
| "ce_loss_17": 3.1559406042099, |
| "ce_loss_2": 4.463469076156616, |
| "ce_loss_4": 4.162961220741272, |
| "ce_loss_9": 3.5453680992126464, |
| "epoch": 0.359, |
| "grad_norm": 852.0, |
| "kl_loss_13": 256.5190902709961, |
| "kl_loss_2": 2709.578125, |
| "kl_loss_4": 2148.3919982910156, |
| "kl_loss_9": 907.0227081298829, |
| "learning_rate": 0.0007234538261112341, |
| "loss": 1565.8535, |
| "step": 3590 |
| }, |
| { |
| "ce_loss_13": 3.300876867771149, |
| "ce_loss_17": 3.190394973754883, |
| "ce_loss_2": 4.534508442878723, |
| "ce_loss_4": 4.2313772439956665, |
| "ce_loss_9": 3.597358286380768, |
| "epoch": 0.36, |
| "grad_norm": 800.0, |
| "kl_loss_13": 264.86460037231444, |
| "kl_loss_2": 2770.872766113281, |
| "kl_loss_4": 2198.2044189453127, |
| "kl_loss_9": 938.0355834960938, |
| "learning_rate": 0.0007220333063028871, |
| "loss": 1521.7223, |
| "step": 3600 |
| }, |
| { |
| "ce_loss_13": 3.3348900437355042, |
| "ce_loss_17": 3.2230247497558593, |
| "ce_loss_2": 4.632556366920471, |
| "ce_loss_4": 4.338490962982178, |
| "ce_loss_9": 3.7549325942993166, |
| "epoch": 0.361, |
| "grad_norm": 984.0, |
| "kl_loss_13": 273.062801361084, |
| "kl_loss_2": 2932.0540283203127, |
| "kl_loss_4": 2382.24267578125, |
| "kl_loss_9": 1204.1538635253905, |
| "learning_rate": 0.0007206105506216106, |
| "loss": 1633.5795, |
| "step": 3610 |
| }, |
| { |
| "ce_loss_13": 3.2122520565986634, |
| "ce_loss_17": 3.105607509613037, |
| "ce_loss_2": 4.409682142734527, |
| "ce_loss_4": 4.122783887386322, |
| "ce_loss_9": 3.5149780035018923, |
| "epoch": 0.362, |
| "grad_norm": 924.0, |
| "kl_loss_13": 257.5180694580078, |
| "kl_loss_2": 2683.5932739257814, |
| "kl_loss_4": 2137.2582153320313, |
| "kl_loss_9": 935.5963073730469, |
| "learning_rate": 0.0007191855733945387, |
| "loss": 1495.8896, |
| "step": 3620 |
| }, |
| { |
| "ce_loss_13": 3.2981897830963134, |
| "ce_loss_17": 3.188976562023163, |
| "ce_loss_2": 4.5067554950714115, |
| "ce_loss_4": 4.2201844453811646, |
| "ce_loss_9": 3.5986984014511108, |
| "epoch": 0.363, |
| "grad_norm": 840.0, |
| "kl_loss_13": 258.6417350769043, |
| "kl_loss_2": 2713.3114990234376, |
| "kl_loss_4": 2165.3823486328124, |
| "kl_loss_9": 931.6595306396484, |
| "learning_rate": 0.0007177583889711762, |
| "loss": 1511.7896, |
| "step": 3630 |
| }, |
| { |
| "ce_loss_13": 3.216473865509033, |
| "ce_loss_17": 3.1065465688705443, |
| "ce_loss_2": 4.443889188766479, |
| "ce_loss_4": 4.145169186592102, |
| "ce_loss_9": 3.519019401073456, |
| "epoch": 0.364, |
| "grad_norm": 920.0, |
| "kl_loss_13": 260.0630790710449, |
| "kl_loss_2": 2757.587548828125, |
| "kl_loss_4": 2195.412939453125, |
| "kl_loss_9": 945.4518646240234, |
| "learning_rate": 0.0007163290117232541, |
| "loss": 1536.4037, |
| "step": 3640 |
| }, |
| { |
| "ce_loss_13": 3.329908609390259, |
| "ce_loss_17": 3.2245630502700804, |
| "ce_loss_2": 4.487287878990173, |
| "ce_loss_4": 4.196808767318726, |
| "ce_loss_9": 3.6057862877845763, |
| "epoch": 0.365, |
| "grad_norm": 824.0, |
| "kl_loss_13": 253.54234313964844, |
| "kl_loss_2": 2646.7896484375, |
| "kl_loss_4": 2099.4751953125, |
| "kl_loss_9": 894.7712463378906, |
| "learning_rate": 0.0007148974560445859, |
| "loss": 1503.4634, |
| "step": 3650 |
| }, |
| { |
| "ce_loss_13": 3.2578052520751952, |
| "ce_loss_17": 3.1506507396698, |
| "ce_loss_2": 4.438538837432861, |
| "ce_loss_4": 4.142103123664856, |
| "ce_loss_9": 3.5446487069129944, |
| "epoch": 0.366, |
| "grad_norm": 876.0, |
| "kl_loss_13": 256.79127349853513, |
| "kl_loss_2": 2649.738623046875, |
| "kl_loss_4": 2101.44814453125, |
| "kl_loss_9": 908.7516204833985, |
| "learning_rate": 0.0007134637363509209, |
| "loss": 1480.0288, |
| "step": 3660 |
| }, |
| { |
| "ce_loss_13": 3.364702308177948, |
| "ce_loss_17": 3.2644433975219727, |
| "ce_loss_2": 4.530397081375122, |
| "ce_loss_4": 4.244260728359222, |
| "ce_loss_9": 3.6426509380340577, |
| "epoch": 0.367, |
| "grad_norm": 1080.0, |
| "kl_loss_13": 251.78546752929688, |
| "kl_loss_2": 2621.7019287109374, |
| "kl_loss_4": 2082.4747680664063, |
| "kl_loss_9": 893.3975799560546, |
| "learning_rate": 0.0007120278670798009, |
| "loss": 1501.5459, |
| "step": 3670 |
| }, |
| { |
| "ce_loss_13": 3.167364203929901, |
| "ce_loss_17": 3.0563937783241273, |
| "ce_loss_2": 4.4570074558258055, |
| "ce_loss_4": 4.153195750713349, |
| "ce_loss_9": 3.487860894203186, |
| "epoch": 0.368, |
| "grad_norm": 1008.0, |
| "kl_loss_13": 265.667463684082, |
| "kl_loss_2": 2866.7249755859375, |
| "kl_loss_4": 2296.5847595214846, |
| "kl_loss_9": 970.1669647216797, |
| "learning_rate": 0.0007105898626904133, |
| "loss": 1599.851, |
| "step": 3680 |
| }, |
| { |
| "ce_loss_13": 3.2669419646263123, |
| "ce_loss_17": 3.157273232936859, |
| "ce_loss_2": 4.495619750022888, |
| "ce_loss_4": 4.197921943664551, |
| "ce_loss_9": 3.5591470837593078, |
| "epoch": 0.369, |
| "grad_norm": 944.0, |
| "kl_loss_13": 257.6121223449707, |
| "kl_loss_2": 2734.7568969726562, |
| "kl_loss_4": 2178.7565002441406, |
| "kl_loss_9": 915.5562561035156, |
| "learning_rate": 0.0007091497376634463, |
| "loss": 1505.5248, |
| "step": 3690 |
| }, |
| { |
| "ce_loss_13": 3.211390125751495, |
| "ce_loss_17": 3.1078919649124144, |
| "ce_loss_2": 4.41987681388855, |
| "ce_loss_4": 4.1266319990158085, |
| "ce_loss_9": 3.5000448346138002, |
| "epoch": 0.37, |
| "grad_norm": 980.0, |
| "kl_loss_13": 261.45837631225584, |
| "kl_loss_2": 2716.6825561523438, |
| "kl_loss_4": 2158.869757080078, |
| "kl_loss_9": 913.3272674560546, |
| "learning_rate": 0.0007077075065009433, |
| "loss": 1534.3164, |
| "step": 3700 |
| }, |
| { |
| "ce_loss_13": 3.313927936553955, |
| "ce_loss_17": 3.204163873195648, |
| "ce_loss_2": 4.540425372123718, |
| "ce_loss_4": 4.241553962230682, |
| "ce_loss_9": 3.613093602657318, |
| "epoch": 0.371, |
| "grad_norm": 1040.0, |
| "kl_loss_13": 265.8834648132324, |
| "kl_loss_2": 2754.3368408203123, |
| "kl_loss_4": 2197.1968688964844, |
| "kl_loss_9": 940.7850860595703, |
| "learning_rate": 0.0007062631837261557, |
| "loss": 1528.1254, |
| "step": 3710 |
| }, |
| { |
| "ce_loss_13": 3.194749319553375, |
| "ce_loss_17": 3.0911470293998717, |
| "ce_loss_2": 4.408958315849304, |
| "ce_loss_4": 4.1089115858078005, |
| "ce_loss_9": 3.490426230430603, |
| "epoch": 0.372, |
| "grad_norm": 1040.0, |
| "kl_loss_13": 254.69494705200196, |
| "kl_loss_2": 2723.1532958984376, |
| "kl_loss_4": 2154.638623046875, |
| "kl_loss_9": 921.4205688476562, |
| "learning_rate": 0.0007048167838833977, |
| "loss": 1548.4969, |
| "step": 3720 |
| }, |
| { |
| "ce_loss_13": 3.2801799178123474, |
| "ce_loss_17": 3.1717621088027954, |
| "ce_loss_2": 4.464912414550781, |
| "ce_loss_4": 4.165446448326111, |
| "ce_loss_9": 3.5629095315933226, |
| "epoch": 0.373, |
| "grad_norm": 996.0, |
| "kl_loss_13": 261.7152183532715, |
| "kl_loss_2": 2698.193151855469, |
| "kl_loss_4": 2131.0233459472656, |
| "kl_loss_9": 911.2842071533203, |
| "learning_rate": 0.0007033683215379002, |
| "loss": 1503.6412, |
| "step": 3730 |
| }, |
| { |
| "ce_loss_13": 3.2637365460395813, |
| "ce_loss_17": 3.1559090495109556, |
| "ce_loss_2": 4.485249447822571, |
| "ce_loss_4": 4.182522225379944, |
| "ce_loss_9": 3.5602781772613525, |
| "epoch": 0.374, |
| "grad_norm": 984.0, |
| "kl_loss_13": 255.32725677490234, |
| "kl_loss_2": 2715.4867553710938, |
| "kl_loss_4": 2151.5368896484374, |
| "kl_loss_9": 913.8755249023437, |
| "learning_rate": 0.0007019178112756625, |
| "loss": 1522.4976, |
| "step": 3740 |
| }, |
| { |
| "ce_loss_13": 3.230670213699341, |
| "ce_loss_17": 3.1286957025527955, |
| "ce_loss_2": 4.445177912712097, |
| "ce_loss_4": 4.1452152013778685, |
| "ce_loss_9": 3.5189914226531984, |
| "epoch": 0.375, |
| "grad_norm": 900.0, |
| "kl_loss_13": 255.2519073486328, |
| "kl_loss_2": 2715.345349121094, |
| "kl_loss_4": 2149.3450439453127, |
| "kl_loss_9": 905.7212707519532, |
| "learning_rate": 0.0007004652677033068, |
| "loss": 1518.2148, |
| "step": 3750 |
| }, |
| { |
| "ce_loss_13": 3.3054085612297057, |
| "ce_loss_17": 3.2047349572181703, |
| "ce_loss_2": 4.480301284790039, |
| "ce_loss_4": 4.18759024143219, |
| "ce_loss_9": 3.5867436051368715, |
| "epoch": 0.376, |
| "grad_norm": 884.0, |
| "kl_loss_13": 248.02193374633788, |
| "kl_loss_2": 2648.7164916992188, |
| "kl_loss_4": 2091.7419311523436, |
| "kl_loss_9": 885.0427612304687, |
| "learning_rate": 0.0006990107054479312, |
| "loss": 1493.5008, |
| "step": 3760 |
| }, |
| { |
| "ce_loss_13": 3.285297656059265, |
| "ce_loss_17": 3.1768379330635073, |
| "ce_loss_2": 4.480677556991577, |
| "ce_loss_4": 4.184199416637421, |
| "ce_loss_9": 3.575889194011688, |
| "epoch": 0.377, |
| "grad_norm": 824.0, |
| "kl_loss_13": 257.4175407409668, |
| "kl_loss_2": 2680.176306152344, |
| "kl_loss_4": 2124.8356384277345, |
| "kl_loss_9": 910.8084014892578, |
| "learning_rate": 0.000697554139156961, |
| "loss": 1504.4557, |
| "step": 3770 |
| }, |
| { |
| "ce_loss_13": 3.2776976466178893, |
| "ce_loss_17": 3.1708826661109923, |
| "ce_loss_2": 4.499907684326172, |
| "ce_loss_4": 4.196723687648773, |
| "ce_loss_9": 3.5752155065536497, |
| "epoch": 0.378, |
| "grad_norm": 836.0, |
| "kl_loss_13": 262.2697509765625, |
| "kl_loss_2": 2745.4320678710938, |
| "kl_loss_4": 2173.7311767578126, |
| "kl_loss_9": 925.2001190185547, |
| "learning_rate": 0.0006960955834980027, |
| "loss": 1492.4615, |
| "step": 3780 |
| }, |
| { |
| "ce_loss_13": 3.2487109780311583, |
| "ce_loss_17": 3.1408375978469847, |
| "ce_loss_2": 4.445514249801636, |
| "ce_loss_4": 4.148711025714874, |
| "ce_loss_9": 3.5368839502334595, |
| "epoch": 0.379, |
| "grad_norm": 876.0, |
| "kl_loss_13": 256.243830871582, |
| "kl_loss_2": 2686.2978515625, |
| "kl_loss_4": 2126.0045532226563, |
| "kl_loss_9": 902.8145324707032, |
| "learning_rate": 0.0006946350531586958, |
| "loss": 1504.1542, |
| "step": 3790 |
| }, |
| { |
| "ce_loss_13": 3.279289186000824, |
| "ce_loss_17": 3.1716007351875306, |
| "ce_loss_2": 4.489738643169403, |
| "ce_loss_4": 4.189744293689728, |
| "ce_loss_9": 3.561597526073456, |
| "epoch": 0.38, |
| "grad_norm": 852.0, |
| "kl_loss_13": 256.13149642944336, |
| "kl_loss_2": 2729.830700683594, |
| "kl_loss_4": 2160.052276611328, |
| "kl_loss_9": 908.5320495605469, |
| "learning_rate": 0.0006931725628465643, |
| "loss": 1539.1525, |
| "step": 3800 |
| }, |
| { |
| "ce_loss_13": 3.2851457595825195, |
| "ce_loss_17": 3.175520598888397, |
| "ce_loss_2": 4.4944953441619875, |
| "ce_loss_4": 4.203706610202789, |
| "ce_loss_9": 3.5788394927978517, |
| "epoch": 0.381, |
| "grad_norm": 964.0, |
| "kl_loss_13": 259.54501876831057, |
| "kl_loss_2": 2708.1065307617187, |
| "kl_loss_4": 2156.5178100585936, |
| "kl_loss_9": 919.4812438964843, |
| "learning_rate": 0.0006917081272888696, |
| "loss": 1517.0754, |
| "step": 3810 |
| }, |
| { |
| "ce_loss_13": 3.2031586289405825, |
| "ce_loss_17": 3.093578827381134, |
| "ce_loss_2": 4.443379020690918, |
| "ce_loss_4": 4.158561587333679, |
| "ce_loss_9": 3.5018152356147767, |
| "epoch": 0.382, |
| "grad_norm": 1004.0, |
| "kl_loss_13": 261.8097373962402, |
| "kl_loss_2": 2790.98818359375, |
| "kl_loss_4": 2257.50458984375, |
| "kl_loss_9": 947.4319519042969, |
| "learning_rate": 0.0006902417612324615, |
| "loss": 1526.6009, |
| "step": 3820 |
| }, |
| { |
| "ce_loss_13": 3.326650393009186, |
| "ce_loss_17": 3.211716187000275, |
| "ce_loss_2": 4.574871611595154, |
| "ce_loss_4": 4.266353797912598, |
| "ce_loss_9": 3.6265225768089295, |
| "epoch": 0.383, |
| "grad_norm": 1112.0, |
| "kl_loss_13": 273.65276260375975, |
| "kl_loss_2": 2808.7891479492187, |
| "kl_loss_4": 2225.904541015625, |
| "kl_loss_9": 949.0901641845703, |
| "learning_rate": 0.00068877347944363, |
| "loss": 1543.6871, |
| "step": 3830 |
| }, |
| { |
| "ce_loss_13": 3.322444438934326, |
| "ce_loss_17": 3.217497932910919, |
| "ce_loss_2": 4.499426317214966, |
| "ce_loss_4": 4.206605076789856, |
| "ce_loss_9": 3.605990445613861, |
| "epoch": 0.384, |
| "grad_norm": 924.0, |
| "kl_loss_13": 260.74532318115234, |
| "kl_loss_2": 2654.346301269531, |
| "kl_loss_4": 2114.3500122070313, |
| "kl_loss_9": 906.5224609375, |
| "learning_rate": 0.0006873032967079561, |
| "loss": 1509.6395, |
| "step": 3840 |
| }, |
| { |
| "ce_loss_13": 3.30531644821167, |
| "ce_loss_17": 3.2004496335983275, |
| "ce_loss_2": 4.468958473205566, |
| "ce_loss_4": 4.180116498470307, |
| "ce_loss_9": 3.5825775384902956, |
| "epoch": 0.385, |
| "grad_norm": 1024.0, |
| "kl_loss_13": 256.6315238952637, |
| "kl_loss_2": 2648.3871459960938, |
| "kl_loss_4": 2099.5373596191407, |
| "kl_loss_9": 896.1856536865234, |
| "learning_rate": 0.0006858312278301637, |
| "loss": 1480.9059, |
| "step": 3850 |
| }, |
| { |
| "ce_loss_13": 3.3395624995231628, |
| "ce_loss_17": 3.2356110572814942, |
| "ce_loss_2": 4.486798334121704, |
| "ce_loss_4": 4.196293044090271, |
| "ce_loss_9": 3.6115405678749086, |
| "epoch": 0.386, |
| "grad_norm": 956.0, |
| "kl_loss_13": 255.22662048339845, |
| "kl_loss_2": 2623.6235107421876, |
| "kl_loss_4": 2076.038458251953, |
| "kl_loss_9": 890.8045532226563, |
| "learning_rate": 0.0006843572876339704, |
| "loss": 1477.2686, |
| "step": 3860 |
| }, |
| { |
| "ce_loss_13": 3.2566393971443177, |
| "ce_loss_17": 3.157276213169098, |
| "ce_loss_2": 4.411909604072571, |
| "ce_loss_4": 4.117034494876862, |
| "ce_loss_9": 3.5282593727111817, |
| "epoch": 0.387, |
| "grad_norm": 984.0, |
| "kl_loss_13": 245.25594482421874, |
| "kl_loss_2": 2600.825, |
| "kl_loss_4": 2049.154083251953, |
| "kl_loss_9": 869.0475250244141, |
| "learning_rate": 0.0006828814909619373, |
| "loss": 1506.1814, |
| "step": 3870 |
| }, |
| { |
| "ce_loss_13": 3.3778513073921204, |
| "ce_loss_17": 3.275028121471405, |
| "ce_loss_2": 4.565483021736145, |
| "ce_loss_4": 4.265429353713989, |
| "ce_loss_9": 3.6555633425712584, |
| "epoch": 0.388, |
| "grad_norm": 740.0, |
| "kl_loss_13": 258.9280632019043, |
| "kl_loss_2": 2668.3092041015625, |
| "kl_loss_4": 2106.6774841308593, |
| "kl_loss_9": 900.5886901855469, |
| "learning_rate": 0.0006814038526753205, |
| "loss": 1468.7395, |
| "step": 3880 |
| }, |
| { |
| "ce_loss_13": 3.2835440039634705, |
| "ce_loss_17": 3.176141059398651, |
| "ce_loss_2": 4.466986298561096, |
| "ce_loss_4": 4.174821531772613, |
| "ce_loss_9": 3.5715714931488036, |
| "epoch": 0.389, |
| "grad_norm": 1012.0, |
| "kl_loss_13": 256.4760231018066, |
| "kl_loss_2": 2655.9200073242187, |
| "kl_loss_4": 2107.1416198730467, |
| "kl_loss_9": 902.8250457763672, |
| "learning_rate": 0.0006799243876539213, |
| "loss": 1490.4295, |
| "step": 3890 |
| }, |
| { |
| "ce_loss_13": 3.2106783986091614, |
| "ce_loss_17": 3.1081603288650514, |
| "ce_loss_2": 4.438698053359985, |
| "ce_loss_4": 4.143589389324188, |
| "ce_loss_9": 3.498281919956207, |
| "epoch": 0.39, |
| "grad_norm": 1224.0, |
| "kl_loss_13": 252.99812088012695, |
| "kl_loss_2": 2749.638293457031, |
| "kl_loss_4": 2195.5742431640624, |
| "kl_loss_9": 909.4136962890625, |
| "learning_rate": 0.0006784431107959359, |
| "loss": 1532.8127, |
| "step": 3900 |
| }, |
| { |
| "ce_loss_13": 3.2698216438293457, |
| "ce_loss_17": 3.1604208827018736, |
| "ce_loss_2": 4.506122970581055, |
| "ce_loss_4": 4.210958790779114, |
| "ce_loss_9": 3.568881094455719, |
| "epoch": 0.391, |
| "grad_norm": 992.0, |
| "kl_loss_13": 260.73576431274415, |
| "kl_loss_2": 2797.4330200195313, |
| "kl_loss_4": 2231.8090637207033, |
| "kl_loss_9": 933.0070922851562, |
| "learning_rate": 0.0006769600370178059, |
| "loss": 1528.6467, |
| "step": 3910 |
| }, |
| { |
| "ce_loss_13": 3.2326796531677244, |
| "ce_loss_17": 3.1260639071464538, |
| "ce_loss_2": 4.442447280883789, |
| "ce_loss_4": 4.144731736183166, |
| "ce_loss_9": 3.521716368198395, |
| "epoch": 0.392, |
| "grad_norm": 876.0, |
| "kl_loss_13": 252.8242385864258, |
| "kl_loss_2": 2694.6508544921876, |
| "kl_loss_4": 2147.473309326172, |
| "kl_loss_9": 913.2889129638672, |
| "learning_rate": 0.0006754751812540679, |
| "loss": 1477.6962, |
| "step": 3920 |
| }, |
| { |
| "ce_loss_13": 3.277917730808258, |
| "ce_loss_17": 3.172783100605011, |
| "ce_loss_2": 4.496692824363708, |
| "ce_loss_4": 4.201251602172851, |
| "ce_loss_9": 3.570281219482422, |
| "epoch": 0.393, |
| "grad_norm": 968.0, |
| "kl_loss_13": 258.7775993347168, |
| "kl_loss_2": 2741.7844848632812, |
| "kl_loss_4": 2181.000567626953, |
| "kl_loss_9": 915.6311431884766, |
| "learning_rate": 0.0006739885584572025, |
| "loss": 1525.5232, |
| "step": 3930 |
| }, |
| { |
| "ce_loss_13": 3.3017163634300233, |
| "ce_loss_17": 3.1931395888328553, |
| "ce_loss_2": 4.5345635414123535, |
| "ce_loss_4": 4.239324998855591, |
| "ce_loss_9": 3.6164015173912047, |
| "epoch": 0.394, |
| "grad_norm": 1080.0, |
| "kl_loss_13": 259.0673210144043, |
| "kl_loss_2": 2785.4064331054688, |
| "kl_loss_4": 2231.9640380859373, |
| "kl_loss_9": 982.2934204101563, |
| "learning_rate": 0.0006725001835974853, |
| "loss": 1521.083, |
| "step": 3940 |
| }, |
| { |
| "ce_loss_13": 3.2948089957237245, |
| "ce_loss_17": 3.1901307702064514, |
| "ce_loss_2": 4.5174314975738525, |
| "ce_loss_4": 4.219349348545075, |
| "ce_loss_9": 3.5903812766075136, |
| "epoch": 0.395, |
| "grad_norm": 892.0, |
| "kl_loss_13": 263.14002990722656, |
| "kl_loss_2": 2751.4258666992187, |
| "kl_loss_4": 2182.2444274902346, |
| "kl_loss_9": 926.5575988769531, |
| "learning_rate": 0.0006710100716628344, |
| "loss": 1492.3178, |
| "step": 3950 |
| }, |
| { |
| "ce_loss_13": 3.276067054271698, |
| "ce_loss_17": 3.168751561641693, |
| "ce_loss_2": 4.480341553688049, |
| "ce_loss_4": 4.184464979171753, |
| "ce_loss_9": 3.5675591349601747, |
| "epoch": 0.396, |
| "grad_norm": 936.0, |
| "kl_loss_13": 255.0099967956543, |
| "kl_loss_2": 2696.762561035156, |
| "kl_loss_4": 2148.080944824219, |
| "kl_loss_9": 914.0700073242188, |
| "learning_rate": 0.0006695182376586602, |
| "loss": 1512.2644, |
| "step": 3960 |
| }, |
| { |
| "ce_loss_13": 3.306942069530487, |
| "ce_loss_17": 3.2029885768890383, |
| "ce_loss_2": 4.4489758014678955, |
| "ce_loss_4": 4.159535086154937, |
| "ce_loss_9": 3.5739479422569276, |
| "epoch": 0.397, |
| "grad_norm": 1088.0, |
| "kl_loss_13": 246.39905242919923, |
| "kl_loss_2": 2573.448107910156, |
| "kl_loss_4": 2029.3048950195312, |
| "kl_loss_9": 865.8281646728516, |
| "learning_rate": 0.000668024696607715, |
| "loss": 1494.0627, |
| "step": 3970 |
| }, |
| { |
| "ce_loss_13": 3.2687542915344237, |
| "ce_loss_17": 3.1674939393997192, |
| "ce_loss_2": 4.457654964923859, |
| "ce_loss_4": 4.163764524459839, |
| "ce_loss_9": 3.5486562252044678, |
| "epoch": 0.398, |
| "grad_norm": 960.0, |
| "kl_loss_13": 253.8588996887207, |
| "kl_loss_2": 2682.0030151367187, |
| "kl_loss_4": 2131.700372314453, |
| "kl_loss_9": 902.5161743164062, |
| "learning_rate": 0.0006665294635499404, |
| "loss": 1499.1521, |
| "step": 3980 |
| }, |
| { |
| "ce_loss_13": 3.278521728515625, |
| "ce_loss_17": 3.164673399925232, |
| "ce_loss_2": 4.5200001955032345, |
| "ce_loss_4": 4.224125778675079, |
| "ce_loss_9": 3.577804756164551, |
| "epoch": 0.399, |
| "grad_norm": 1288.0, |
| "kl_loss_13": 269.40819931030273, |
| "kl_loss_2": 2802.2524536132814, |
| "kl_loss_4": 2250.640521240234, |
| "kl_loss_9": 946.616748046875, |
| "learning_rate": 0.0006650325535423167, |
| "loss": 1529.9044, |
| "step": 3990 |
| }, |
| { |
| "ce_loss_13": 3.2968241453170775, |
| "ce_loss_17": 3.192675495147705, |
| "ce_loss_2": 4.444801652431488, |
| "ce_loss_4": 4.1636927962303165, |
| "ce_loss_9": 3.5698022842407227, |
| "epoch": 0.4, |
| "grad_norm": 900.0, |
| "kl_loss_13": 253.8608497619629, |
| "kl_loss_2": 2573.104699707031, |
| "kl_loss_4": 2033.4189575195312, |
| "kl_loss_9": 869.8557220458985, |
| "learning_rate": 0.0006635339816587109, |
| "loss": 1477.6811, |
| "step": 4000 |
| }, |
| { |
| "ce_loss_13": 3.239960014820099, |
| "ce_loss_17": 3.1299314975738524, |
| "ce_loss_2": 4.461476826667786, |
| "ce_loss_4": 4.167317008972168, |
| "ce_loss_9": 3.519672131538391, |
| "epoch": 0.401, |
| "grad_norm": 784.0, |
| "kl_loss_13": 263.29820861816404, |
| "kl_loss_2": 2755.9470458984374, |
| "kl_loss_4": 2204.6129943847654, |
| "kl_loss_9": 909.1761901855468, |
| "learning_rate": 0.0006620337629897252, |
| "loss": 1506.9777, |
| "step": 4010 |
| }, |
| { |
| "ce_loss_13": 3.2471362352371216, |
| "ce_loss_17": 3.13832368850708, |
| "ce_loss_2": 4.451458191871643, |
| "ce_loss_4": 4.145995020866394, |
| "ce_loss_9": 3.534397339820862, |
| "epoch": 0.402, |
| "grad_norm": 1224.0, |
| "kl_loss_13": 261.9503913879395, |
| "kl_loss_2": 2719.291650390625, |
| "kl_loss_4": 2145.2047912597654, |
| "kl_loss_9": 901.3993530273438, |
| "learning_rate": 0.0006605319126425454, |
| "loss": 1533.0329, |
| "step": 4020 |
| }, |
| { |
| "ce_loss_13": 3.154713821411133, |
| "ce_loss_17": 3.0501688599586485, |
| "ce_loss_2": 4.396152353286743, |
| "ce_loss_4": 4.094900381565094, |
| "ce_loss_9": 3.4492963910102845, |
| "epoch": 0.403, |
| "grad_norm": 952.0, |
| "kl_loss_13": 257.43613510131837, |
| "kl_loss_2": 2796.779895019531, |
| "kl_loss_4": 2234.564727783203, |
| "kl_loss_9": 925.970669555664, |
| "learning_rate": 0.0006590284457407876, |
| "loss": 1535.3605, |
| "step": 4030 |
| }, |
| { |
| "ce_loss_13": 3.248992455005646, |
| "ce_loss_17": 3.140540564060211, |
| "ce_loss_2": 4.454291653633118, |
| "ce_loss_4": 4.152192795276642, |
| "ce_loss_9": 3.5340006709098817, |
| "epoch": 0.404, |
| "grad_norm": 956.0, |
| "kl_loss_13": 259.04834365844727, |
| "kl_loss_2": 2702.6875, |
| "kl_loss_4": 2143.7586181640627, |
| "kl_loss_9": 904.829296875, |
| "learning_rate": 0.0006575233774243465, |
| "loss": 1504.0392, |
| "step": 4040 |
| }, |
| { |
| "ce_loss_13": 3.2439682126045226, |
| "ce_loss_17": 3.132973873615265, |
| "ce_loss_2": 4.457049179077148, |
| "ce_loss_4": 4.15779435634613, |
| "ce_loss_9": 3.538478982448578, |
| "epoch": 0.405, |
| "grad_norm": 1168.0, |
| "kl_loss_13": 264.3772857666016, |
| "kl_loss_2": 2759.050817871094, |
| "kl_loss_4": 2191.870812988281, |
| "kl_loss_9": 923.5060333251953, |
| "learning_rate": 0.0006560167228492435, |
| "loss": 1529.1475, |
| "step": 4050 |
| }, |
| { |
| "ce_loss_13": 3.28635972738266, |
| "ce_loss_17": 3.1868526935577393, |
| "ce_loss_2": 4.451249098777771, |
| "ce_loss_4": 4.158509790897369, |
| "ce_loss_9": 3.5620246291160584, |
| "epoch": 0.406, |
| "grad_norm": 872.0, |
| "kl_loss_13": 248.8571029663086, |
| "kl_loss_2": 2629.9989013671875, |
| "kl_loss_4": 2083.5358764648436, |
| "kl_loss_9": 889.4856201171875, |
| "learning_rate": 0.0006545084971874737, |
| "loss": 1498.9373, |
| "step": 4060 |
| }, |
| { |
| "ce_loss_13": 3.252144455909729, |
| "ce_loss_17": 3.1407360196113587, |
| "ce_loss_2": 4.496238851547242, |
| "ce_loss_4": 4.188989889621735, |
| "ce_loss_9": 3.5516305923461915, |
| "epoch": 0.407, |
| "grad_norm": 1088.0, |
| "kl_loss_13": 266.5647735595703, |
| "kl_loss_2": 2803.868762207031, |
| "kl_loss_4": 2222.3373291015623, |
| "kl_loss_9": 942.9162872314453, |
| "learning_rate": 0.0006529987156268526, |
| "loss": 1518.0225, |
| "step": 4070 |
| }, |
| { |
| "ce_loss_13": 3.1712148070335386, |
| "ce_loss_17": 3.060807430744171, |
| "ce_loss_2": 4.401592254638672, |
| "ce_loss_4": 4.099349057674408, |
| "ce_loss_9": 3.462227690219879, |
| "epoch": 0.408, |
| "grad_norm": 932.0, |
| "kl_loss_13": 256.5192222595215, |
| "kl_loss_2": 2754.0554809570312, |
| "kl_loss_4": 2178.2113525390623, |
| "kl_loss_9": 917.4382476806641, |
| "learning_rate": 0.0006514873933708637, |
| "loss": 1547.6354, |
| "step": 4080 |
| }, |
| { |
| "ce_loss_13": 3.280282235145569, |
| "ce_loss_17": 3.177602434158325, |
| "ce_loss_2": 4.476862525939941, |
| "ce_loss_4": 4.174697351455689, |
| "ce_loss_9": 3.5550962328910827, |
| "epoch": 0.409, |
| "grad_norm": 976.0, |
| "kl_loss_13": 249.44141693115233, |
| "kl_loss_2": 2696.6348388671877, |
| "kl_loss_4": 2128.7225036621094, |
| "kl_loss_9": 889.0351318359375, |
| "learning_rate": 0.0006499745456385053, |
| "loss": 1492.0152, |
| "step": 4090 |
| }, |
| { |
| "ce_loss_13": 3.246891236305237, |
| "ce_loss_17": 3.1406236410140993, |
| "ce_loss_2": 4.446601033210754, |
| "ce_loss_4": 4.149120056629181, |
| "ce_loss_9": 3.5301833271980287, |
| "epoch": 0.41, |
| "grad_norm": 1144.0, |
| "kl_loss_13": 255.66474685668945, |
| "kl_loss_2": 2696.27841796875, |
| "kl_loss_4": 2136.27607421875, |
| "kl_loss_9": 901.5696441650391, |
| "learning_rate": 0.0006484601876641375, |
| "loss": 1511.4131, |
| "step": 4100 |
| }, |
| { |
| "ce_loss_13": 3.236333763599396, |
| "ce_loss_17": 3.1360055685043333, |
| "ce_loss_2": 4.393021845817566, |
| "ce_loss_4": 4.102540409564972, |
| "ce_loss_9": 3.5071921944618225, |
| "epoch": 0.411, |
| "grad_norm": 976.0, |
| "kl_loss_13": 247.48419265747071, |
| "kl_loss_2": 2608.329541015625, |
| "kl_loss_4": 2059.3785034179687, |
| "kl_loss_9": 868.8331329345704, |
| "learning_rate": 0.000646944334697328, |
| "loss": 1466.0266, |
| "step": 4110 |
| }, |
| { |
| "ce_loss_13": 3.340055322647095, |
| "ce_loss_17": 3.2402821660041807, |
| "ce_loss_2": 4.484807562828064, |
| "ce_loss_4": 4.19776873588562, |
| "ce_loss_9": 3.612150752544403, |
| "epoch": 0.412, |
| "grad_norm": 952.0, |
| "kl_loss_13": 248.76947937011718, |
| "kl_loss_2": 2571.6581176757813, |
| "kl_loss_4": 2038.1038879394532, |
| "kl_loss_9": 868.3009399414062, |
| "learning_rate": 0.0006454270020026995, |
| "loss": 1444.5451, |
| "step": 4120 |
| }, |
| { |
| "ce_loss_13": 3.309734010696411, |
| "ce_loss_17": 3.2125233888626097, |
| "ce_loss_2": 4.449851608276367, |
| "ce_loss_4": 4.1617103099823, |
| "ce_loss_9": 3.5752691626548767, |
| "epoch": 0.413, |
| "grad_norm": 780.0, |
| "kl_loss_13": 241.0909393310547, |
| "kl_loss_2": 2565.3167358398437, |
| "kl_loss_4": 2027.0023376464844, |
| "kl_loss_9": 860.9738616943359, |
| "learning_rate": 0.0006439082048597755, |
| "loss": 1441.1991, |
| "step": 4130 |
| }, |
| { |
| "ce_loss_13": 3.301981580257416, |
| "ce_loss_17": 3.1978550791740417, |
| "ce_loss_2": 4.4916246175765995, |
| "ce_loss_4": 4.199550783634185, |
| "ce_loss_9": 3.5869855880737305, |
| "epoch": 0.414, |
| "grad_norm": 844.0, |
| "kl_loss_13": 251.52623138427734, |
| "kl_loss_2": 2666.551171875, |
| "kl_loss_4": 2124.1689208984376, |
| "kl_loss_9": 901.9555053710938, |
| "learning_rate": 0.0006423879585628261, |
| "loss": 1492.6865, |
| "step": 4140 |
| }, |
| { |
| "ce_loss_13": 3.2579838156700136, |
| "ce_loss_17": 3.150812804698944, |
| "ce_loss_2": 4.476174592971802, |
| "ce_loss_4": 4.186261522769928, |
| "ce_loss_9": 3.552501606941223, |
| "epoch": 0.415, |
| "grad_norm": 992.0, |
| "kl_loss_13": 258.9015319824219, |
| "kl_loss_2": 2742.9803833007813, |
| "kl_loss_4": 2183.9866943359375, |
| "kl_loss_9": 914.411343383789, |
| "learning_rate": 0.0006408662784207149, |
| "loss": 1516.8415, |
| "step": 4150 |
| }, |
| { |
| "ce_loss_13": 3.2277621030807495, |
| "ce_loss_17": 3.1248366117477415, |
| "ce_loss_2": 4.420533800125122, |
| "ce_loss_4": 4.125111293792725, |
| "ce_loss_9": 3.505410647392273, |
| "epoch": 0.416, |
| "grad_norm": 1128.0, |
| "kl_loss_13": 249.44825134277343, |
| "kl_loss_2": 2689.5724365234373, |
| "kl_loss_4": 2136.4685485839846, |
| "kl_loss_9": 898.8004425048828, |
| "learning_rate": 0.0006393431797567439, |
| "loss": 1497.85, |
| "step": 4160 |
| }, |
| { |
| "ce_loss_13": 3.303004336357117, |
| "ce_loss_17": 3.2045061588287354, |
| "ce_loss_2": 4.447912311553955, |
| "ce_loss_4": 4.16178308725357, |
| "ce_loss_9": 3.5648901581764223, |
| "epoch": 0.417, |
| "grad_norm": 912.0, |
| "kl_loss_13": 247.51254959106444, |
| "kl_loss_2": 2599.4328491210936, |
| "kl_loss_4": 2055.670196533203, |
| "kl_loss_9": 870.3442169189453, |
| "learning_rate": 0.0006378186779084996, |
| "loss": 1427.0573, |
| "step": 4170 |
| }, |
| { |
| "ce_loss_13": 3.1452762961387633, |
| "ce_loss_17": 3.0423365592956544, |
| "ce_loss_2": 4.372230696678161, |
| "ce_loss_4": 4.0655600190162655, |
| "ce_loss_9": 3.443104100227356, |
| "epoch": 0.418, |
| "grad_norm": 1112.0, |
| "kl_loss_13": 254.8896339416504, |
| "kl_loss_2": 2715.9855712890626, |
| "kl_loss_4": 2156.0260803222654, |
| "kl_loss_9": 911.9010192871094, |
| "learning_rate": 0.0006362927882276989, |
| "loss": 1517.1884, |
| "step": 4180 |
| }, |
| { |
| "ce_loss_13": 3.326024615764618, |
| "ce_loss_17": 3.2253722310066224, |
| "ce_loss_2": 4.480400609970093, |
| "ce_loss_4": 4.187675476074219, |
| "ce_loss_9": 3.591586148738861, |
| "epoch": 0.419, |
| "grad_norm": 880.0, |
| "kl_loss_13": 244.14863967895508, |
| "kl_loss_2": 2608.224658203125, |
| "kl_loss_4": 2055.314483642578, |
| "kl_loss_9": 865.4117401123046, |
| "learning_rate": 0.000634765526080034, |
| "loss": 1435.7271, |
| "step": 4190 |
| }, |
| { |
| "ce_loss_13": 3.332194709777832, |
| "ce_loss_17": 3.229012358188629, |
| "ce_loss_2": 4.503545594215393, |
| "ce_loss_4": 4.203345084190369, |
| "ce_loss_9": 3.6066953539848328, |
| "epoch": 0.42, |
| "grad_norm": 1272.0, |
| "kl_loss_13": 254.1906379699707, |
| "kl_loss_2": 2642.7889404296875, |
| "kl_loss_4": 2086.995257568359, |
| "kl_loss_9": 889.3188873291016, |
| "learning_rate": 0.0006332369068450174, |
| "loss": 1461.619, |
| "step": 4200 |
| }, |
| { |
| "ce_loss_13": 3.2727856636047363, |
| "ce_loss_17": 3.1705421566963197, |
| "ce_loss_2": 4.452891707420349, |
| "ce_loss_4": 4.15848217010498, |
| "ce_loss_9": 3.5492327570915223, |
| "epoch": 0.421, |
| "grad_norm": 844.0, |
| "kl_loss_13": 250.96693420410156, |
| "kl_loss_2": 2658.6174438476564, |
| "kl_loss_4": 2102.503204345703, |
| "kl_loss_9": 889.322543334961, |
| "learning_rate": 0.0006317069459158283, |
| "loss": 1468.2406, |
| "step": 4210 |
| }, |
| { |
| "ce_loss_13": 3.3720582246780397, |
| "ce_loss_17": 3.272776734828949, |
| "ce_loss_2": 4.5194926977157595, |
| "ce_loss_4": 4.219952380657196, |
| "ce_loss_9": 3.6350441217422484, |
| "epoch": 0.422, |
| "grad_norm": 1008.0, |
| "kl_loss_13": 249.13603820800782, |
| "kl_loss_2": 2606.3539428710938, |
| "kl_loss_4": 2048.601678466797, |
| "kl_loss_9": 865.5066223144531, |
| "learning_rate": 0.0006301756586991561, |
| "loss": 1457.0361, |
| "step": 4220 |
| }, |
| { |
| "ce_loss_13": 3.1610000848770143, |
| "ce_loss_17": 3.058186376094818, |
| "ce_loss_2": 4.380949056148529, |
| "ce_loss_4": 4.081416308879852, |
| "ce_loss_9": 3.4471717834472657, |
| "epoch": 0.423, |
| "grad_norm": 948.0, |
| "kl_loss_13": 253.76913833618164, |
| "kl_loss_2": 2752.35029296875, |
| "kl_loss_4": 2189.721124267578, |
| "kl_loss_9": 914.4220336914062, |
| "learning_rate": 0.0006286430606150459, |
| "loss": 1507.8766, |
| "step": 4230 |
| }, |
| { |
| "ce_loss_13": 3.3587942123413086, |
| "ce_loss_17": 3.2566834926605224, |
| "ce_loss_2": 4.522728085517883, |
| "ce_loss_4": 4.231990051269531, |
| "ce_loss_9": 3.628968966007233, |
| "epoch": 0.424, |
| "grad_norm": 884.0, |
| "kl_loss_13": 257.0655799865723, |
| "kl_loss_2": 2626.136962890625, |
| "kl_loss_4": 2079.118756103516, |
| "kl_loss_9": 882.9879302978516, |
| "learning_rate": 0.0006271091670967436, |
| "loss": 1464.4814, |
| "step": 4240 |
| }, |
| { |
| "ce_loss_13": 3.2773000955581666, |
| "ce_loss_17": 3.1651819467544557, |
| "ce_loss_2": 4.497791874408722, |
| "ce_loss_4": 4.202965342998505, |
| "ce_loss_9": 3.572916769981384, |
| "epoch": 0.425, |
| "grad_norm": 840.0, |
| "kl_loss_13": 261.743123626709, |
| "kl_loss_2": 2767.2141723632812, |
| "kl_loss_4": 2206.4209716796877, |
| "kl_loss_9": 937.2714935302735, |
| "learning_rate": 0.0006255739935905395, |
| "loss": 1510.7184, |
| "step": 4250 |
| }, |
| { |
| "ce_loss_13": 3.311608874797821, |
| "ce_loss_17": 3.206806015968323, |
| "ce_loss_2": 4.474738073348999, |
| "ce_loss_4": 4.1825801730155945, |
| "ce_loss_9": 3.581409478187561, |
| "epoch": 0.426, |
| "grad_norm": 1004.0, |
| "kl_loss_13": 257.0722938537598, |
| "kl_loss_2": 2631.002294921875, |
| "kl_loss_4": 2078.8925842285157, |
| "kl_loss_9": 879.9195007324219, |
| "learning_rate": 0.0006240375555556145, |
| "loss": 1517.8949, |
| "step": 4260 |
| }, |
| { |
| "ce_loss_13": 3.3106016278266908, |
| "ce_loss_17": 3.2040348410606385, |
| "ce_loss_2": 4.536832475662232, |
| "ce_loss_4": 4.233153486251831, |
| "ce_loss_9": 3.6027888655662537, |
| "epoch": 0.427, |
| "grad_norm": 820.0, |
| "kl_loss_13": 256.7631546020508, |
| "kl_loss_2": 2728.1762451171876, |
| "kl_loss_4": 2163.1329345703125, |
| "kl_loss_9": 909.4368682861328, |
| "learning_rate": 0.000622499868463882, |
| "loss": 1496.1117, |
| "step": 4270 |
| }, |
| { |
| "ce_loss_13": 3.2789727210998536, |
| "ce_loss_17": 3.18315132856369, |
| "ce_loss_2": 4.426679062843323, |
| "ce_loss_4": 4.128840184211731, |
| "ce_loss_9": 3.5437944173812865, |
| "epoch": 0.428, |
| "grad_norm": 932.0, |
| "kl_loss_13": 248.85104370117188, |
| "kl_loss_2": 2597.86845703125, |
| "kl_loss_4": 2041.6480529785156, |
| "kl_loss_9": 863.9812622070312, |
| "learning_rate": 0.0006209609477998338, |
| "loss": 1460.5415, |
| "step": 4280 |
| }, |
| { |
| "ce_loss_13": 3.335625696182251, |
| "ce_loss_17": 3.2333606243133546, |
| "ce_loss_2": 4.506818151473999, |
| "ce_loss_4": 4.207405161857605, |
| "ce_loss_9": 3.614287316799164, |
| "epoch": 0.429, |
| "grad_norm": 1008.0, |
| "kl_loss_13": 255.15546417236328, |
| "kl_loss_2": 2638.89560546875, |
| "kl_loss_4": 2082.751251220703, |
| "kl_loss_9": 895.7029449462891, |
| "learning_rate": 0.0006194208090603844, |
| "loss": 1489.2334, |
| "step": 4290 |
| }, |
| { |
| "ce_loss_13": 3.252824330329895, |
| "ce_loss_17": 3.1553032279014586, |
| "ce_loss_2": 4.430841708183289, |
| "ce_loss_4": 4.131936132907867, |
| "ce_loss_9": 3.5294957876205446, |
| "epoch": 0.43, |
| "grad_norm": 824.0, |
| "kl_loss_13": 243.024747467041, |
| "kl_loss_2": 2625.475720214844, |
| "kl_loss_4": 2069.042266845703, |
| "kl_loss_9": 870.0502197265625, |
| "learning_rate": 0.0006178794677547138, |
| "loss": 1444.7055, |
| "step": 4300 |
| }, |
| { |
| "ce_loss_13": 3.281115210056305, |
| "ce_loss_17": 3.178876352310181, |
| "ce_loss_2": 4.46984338760376, |
| "ce_loss_4": 4.176541292667389, |
| "ce_loss_9": 3.5629197478294374, |
| "epoch": 0.431, |
| "grad_norm": 872.0, |
| "kl_loss_13": 253.11211776733398, |
| "kl_loss_2": 2678.749645996094, |
| "kl_loss_4": 2129.639288330078, |
| "kl_loss_9": 902.0837158203125, |
| "learning_rate": 0.0006163369394041111, |
| "loss": 1477.8029, |
| "step": 4310 |
| }, |
| { |
| "ce_loss_13": 3.2192331671714784, |
| "ce_loss_17": 3.117171049118042, |
| "ce_loss_2": 4.425715219974518, |
| "ce_loss_4": 4.129125761985779, |
| "ce_loss_9": 3.5010545372962953, |
| "epoch": 0.432, |
| "grad_norm": 996.0, |
| "kl_loss_13": 249.5622299194336, |
| "kl_loss_2": 2696.57158203125, |
| "kl_loss_4": 2139.875695800781, |
| "kl_loss_9": 893.9369323730468, |
| "learning_rate": 0.0006147932395418205, |
| "loss": 1520.1375, |
| "step": 4320 |
| }, |
| { |
| "ce_loss_13": 3.259524667263031, |
| "ce_loss_17": 3.1584609389305114, |
| "ce_loss_2": 4.4268115043640135, |
| "ce_loss_4": 4.1242932915687565, |
| "ce_loss_9": 3.534462296962738, |
| "epoch": 0.433, |
| "grad_norm": 812.0, |
| "kl_loss_13": 248.51909713745118, |
| "kl_loss_2": 2634.310192871094, |
| "kl_loss_4": 2075.114886474609, |
| "kl_loss_9": 885.6772857666016, |
| "learning_rate": 0.0006132483837128823, |
| "loss": 1455.8972, |
| "step": 4330 |
| }, |
| { |
| "ce_loss_13": 3.2379467606544496, |
| "ce_loss_17": 3.136869728565216, |
| "ce_loss_2": 4.433293747901916, |
| "ce_loss_4": 4.137357759475708, |
| "ce_loss_9": 3.5167957663536074, |
| "epoch": 0.434, |
| "grad_norm": 880.0, |
| "kl_loss_13": 248.912646484375, |
| "kl_loss_2": 2702.531640625, |
| "kl_loss_4": 2141.402960205078, |
| "kl_loss_9": 891.4068756103516, |
| "learning_rate": 0.0006117023874739772, |
| "loss": 1490.4471, |
| "step": 4340 |
| }, |
| { |
| "ce_loss_13": 3.230147731304169, |
| "ce_loss_17": 3.1287991046905517, |
| "ce_loss_2": 4.431419682502747, |
| "ce_loss_4": 4.132992041110993, |
| "ce_loss_9": 3.516324818134308, |
| "epoch": 0.435, |
| "grad_norm": 784.0, |
| "kl_loss_13": 250.83800506591797, |
| "kl_loss_2": 2690.851428222656, |
| "kl_loss_4": 2134.7096557617188, |
| "kl_loss_9": 900.7971160888671, |
| "learning_rate": 0.0006101552663932703, |
| "loss": 1503.827, |
| "step": 4350 |
| }, |
| { |
| "ce_loss_13": 3.258086693286896, |
| "ce_loss_17": 3.155627632141113, |
| "ce_loss_2": 4.42899911403656, |
| "ce_loss_4": 4.147423326969147, |
| "ce_loss_9": 3.53746874332428, |
| "epoch": 0.436, |
| "grad_norm": 828.0, |
| "kl_loss_13": 251.53899154663085, |
| "kl_loss_2": 2640.7837890625, |
| "kl_loss_4": 2104.3141967773436, |
| "kl_loss_9": 890.4929962158203, |
| "learning_rate": 0.0006086070360502539, |
| "loss": 1477.684, |
| "step": 4360 |
| }, |
| { |
| "ce_loss_13": 3.267600750923157, |
| "ce_loss_17": 3.169524610042572, |
| "ce_loss_2": 4.447488939762115, |
| "ce_loss_4": 4.154543244838715, |
| "ce_loss_9": 3.539546489715576, |
| "epoch": 0.437, |
| "grad_norm": 900.0, |
| "kl_loss_13": 245.392578125, |
| "kl_loss_2": 2671.688818359375, |
| "kl_loss_4": 2117.351318359375, |
| "kl_loss_9": 881.3132904052734, |
| "learning_rate": 0.0006070577120355903, |
| "loss": 1478.8512, |
| "step": 4370 |
| }, |
| { |
| "ce_loss_13": 3.269170844554901, |
| "ce_loss_17": 3.1662243247032165, |
| "ce_loss_2": 4.423617565631867, |
| "ce_loss_4": 4.134192037582397, |
| "ce_loss_9": 3.5504857659339906, |
| "epoch": 0.438, |
| "grad_norm": 908.0, |
| "kl_loss_13": 248.28932418823243, |
| "kl_loss_2": 2578.096228027344, |
| "kl_loss_4": 2040.5339721679688, |
| "kl_loss_9": 881.1011779785156, |
| "learning_rate": 0.0006055073099509549, |
| "loss": 1460.0988, |
| "step": 4380 |
| }, |
| { |
| "ce_loss_13": 3.3231343030929565, |
| "ce_loss_17": 3.224526357650757, |
| "ce_loss_2": 4.482841801643372, |
| "ce_loss_4": 4.189580345153809, |
| "ce_loss_9": 3.5920210003852846, |
| "epoch": 0.439, |
| "grad_norm": 980.0, |
| "kl_loss_13": 248.46026916503905, |
| "kl_loss_2": 2603.894494628906, |
| "kl_loss_4": 2063.0343627929688, |
| "kl_loss_9": 875.1090911865234, |
| "learning_rate": 0.0006039558454088796, |
| "loss": 1481.9967, |
| "step": 4390 |
| }, |
| { |
| "ce_loss_13": 3.2967402935028076, |
| "ce_loss_17": 3.1926787853240968, |
| "ce_loss_2": 4.483539938926697, |
| "ce_loss_4": 4.184451556205749, |
| "ce_loss_9": 3.5780591487884523, |
| "epoch": 0.44, |
| "grad_norm": 944.0, |
| "kl_loss_13": 251.04228820800782, |
| "kl_loss_2": 2656.8356323242188, |
| "kl_loss_4": 2097.7961608886717, |
| "kl_loss_9": 890.8476409912109, |
| "learning_rate": 0.0006024033340325954, |
| "loss": 1453.1353, |
| "step": 4400 |
| }, |
| { |
| "ce_loss_13": 3.3612659096717836, |
| "ce_loss_17": 3.262036085128784, |
| "ce_loss_2": 4.486901068687439, |
| "ce_loss_4": 4.196433663368225, |
| "ce_loss_9": 3.6231072187423705, |
| "epoch": 0.441, |
| "grad_norm": 812.0, |
| "kl_loss_13": 243.3283203125, |
| "kl_loss_2": 2530.8232421875, |
| "kl_loss_4": 1989.6197448730468, |
| "kl_loss_9": 845.4649658203125, |
| "learning_rate": 0.0006008497914558743, |
| "loss": 1437.3441, |
| "step": 4410 |
| }, |
| { |
| "ce_loss_13": 3.3072720170021057, |
| "ce_loss_17": 3.20322607755661, |
| "ce_loss_2": 4.491423296928406, |
| "ce_loss_4": 4.197820460796356, |
| "ce_loss_9": 3.5889449954032897, |
| "epoch": 0.442, |
| "grad_norm": 844.0, |
| "kl_loss_13": 262.4004066467285, |
| "kl_loss_2": 2678.535693359375, |
| "kl_loss_4": 2126.85537109375, |
| "kl_loss_9": 899.709341430664, |
| "learning_rate": 0.0005992952333228728, |
| "loss": 1486.9236, |
| "step": 4420 |
| }, |
| { |
| "ce_loss_13": 3.246595299243927, |
| "ce_loss_17": 3.1440267324447633, |
| "ce_loss_2": 4.426004767417908, |
| "ce_loss_4": 4.136229348182678, |
| "ce_loss_9": 3.5241060137748716, |
| "epoch": 0.443, |
| "grad_norm": 1040.0, |
| "kl_loss_13": 250.53558349609375, |
| "kl_loss_2": 2655.2722045898436, |
| "kl_loss_4": 2112.9690856933594, |
| "kl_loss_9": 880.7141845703125, |
| "learning_rate": 0.0005977396752879741, |
| "loss": 1469.6055, |
| "step": 4430 |
| }, |
| { |
| "ce_loss_13": 3.177084732055664, |
| "ce_loss_17": 3.0737411737442017, |
| "ce_loss_2": 4.359619045257569, |
| "ce_loss_4": 4.068020248413086, |
| "ce_loss_9": 3.461109435558319, |
| "epoch": 0.444, |
| "grad_norm": 872.0, |
| "kl_loss_13": 248.73351821899413, |
| "kl_loss_2": 2671.556286621094, |
| "kl_loss_4": 2123.0719604492188, |
| "kl_loss_9": 892.6676544189453, |
| "learning_rate": 0.0005961831330156305, |
| "loss": 1468.1997, |
| "step": 4440 |
| }, |
| { |
| "ce_loss_13": 3.317836058139801, |
| "ce_loss_17": 3.2198015809059144, |
| "ce_loss_2": 4.508257877826691, |
| "ce_loss_4": 4.210682964324951, |
| "ce_loss_9": 3.599003553390503, |
| "epoch": 0.445, |
| "grad_norm": 988.0, |
| "kl_loss_13": 247.8022834777832, |
| "kl_loss_2": 2689.914685058594, |
| "kl_loss_4": 2140.6933410644533, |
| "kl_loss_9": 890.7416595458984, |
| "learning_rate": 0.0005946256221802051, |
| "loss": 1509.3669, |
| "step": 4450 |
| }, |
| { |
| "ce_loss_13": 3.287480020523071, |
| "ce_loss_17": 3.187624156475067, |
| "ce_loss_2": 4.422724366188049, |
| "ce_loss_4": 4.129485881328582, |
| "ce_loss_9": 3.540782070159912, |
| "epoch": 0.446, |
| "grad_norm": 1080.0, |
| "kl_loss_13": 244.29184036254884, |
| "kl_loss_2": 2561.3310668945314, |
| "kl_loss_4": 2014.090264892578, |
| "kl_loss_9": 841.6330383300781, |
| "learning_rate": 0.0005930671584658151, |
| "loss": 1500.8738, |
| "step": 4460 |
| }, |
| { |
| "ce_loss_13": 3.2993441581726075, |
| "ce_loss_17": 3.1980358958244324, |
| "ce_loss_2": 4.465033459663391, |
| "ce_loss_4": 4.16738258600235, |
| "ce_loss_9": 3.569570779800415, |
| "epoch": 0.447, |
| "grad_norm": 1144.0, |
| "kl_loss_13": 249.68865737915038, |
| "kl_loss_2": 2645.7868896484374, |
| "kl_loss_4": 2094.798388671875, |
| "kl_loss_9": 882.6870849609375, |
| "learning_rate": 0.0005915077575661722, |
| "loss": 1486.8843, |
| "step": 4470 |
| }, |
| { |
| "ce_loss_13": 3.3104690074920655, |
| "ce_loss_17": 3.208168315887451, |
| "ce_loss_2": 4.4935990333557125, |
| "ce_loss_4": 4.197047102451324, |
| "ce_loss_9": 3.588785398006439, |
| "epoch": 0.448, |
| "grad_norm": 900.0, |
| "kl_loss_13": 254.03107070922852, |
| "kl_loss_2": 2676.439978027344, |
| "kl_loss_4": 2116.448760986328, |
| "kl_loss_9": 900.4331237792969, |
| "learning_rate": 0.000589947435184427, |
| "loss": 1462.4272, |
| "step": 4480 |
| }, |
| { |
| "ce_loss_13": 3.372387373447418, |
| "ce_loss_17": 3.2739452838897707, |
| "ce_loss_2": 4.48538749217987, |
| "ce_loss_4": 4.195743918418884, |
| "ce_loss_9": 3.6298947811126707, |
| "epoch": 0.449, |
| "grad_norm": 1016.0, |
| "kl_loss_13": 245.27474822998047, |
| "kl_loss_2": 2553.704968261719, |
| "kl_loss_4": 2015.7976379394531, |
| "kl_loss_9": 867.9740966796875, |
| "learning_rate": 0.0005883862070330078, |
| "loss": 1452.4443, |
| "step": 4490 |
| }, |
| { |
| "ce_loss_13": 3.306465280056, |
| "ce_loss_17": 3.2080992341041563, |
| "ce_loss_2": 4.470435309410095, |
| "ce_loss_4": 4.182673251628875, |
| "ce_loss_9": 3.5861368417739867, |
| "epoch": 0.45, |
| "grad_norm": 892.0, |
| "kl_loss_13": 248.69550704956055, |
| "kl_loss_2": 2641.979931640625, |
| "kl_loss_4": 2101.6389892578127, |
| "kl_loss_9": 889.5014190673828, |
| "learning_rate": 0.0005868240888334653, |
| "loss": 1462.8361, |
| "step": 4500 |
| }, |
| { |
| "ce_loss_13": 3.199371540546417, |
| "ce_loss_17": 3.0941427111625672, |
| "ce_loss_2": 4.408132266998291, |
| "ce_loss_4": 4.109568667411804, |
| "ce_loss_9": 3.4845192313194273, |
| "epoch": 0.451, |
| "grad_norm": 1008.0, |
| "kl_loss_13": 252.08171615600585, |
| "kl_loss_2": 2711.8114379882813, |
| "kl_loss_4": 2149.739129638672, |
| "kl_loss_9": 903.5345581054687, |
| "learning_rate": 0.0005852610963163119, |
| "loss": 1488.2083, |
| "step": 4510 |
| }, |
| { |
| "ce_loss_13": 3.220580577850342, |
| "ce_loss_17": 3.1253477811813353, |
| "ce_loss_2": 4.386686527729035, |
| "ce_loss_4": 4.091801774501801, |
| "ce_loss_9": 3.4889899253845216, |
| "epoch": 0.452, |
| "grad_norm": 848.0, |
| "kl_loss_13": 244.482275390625, |
| "kl_loss_2": 2627.63876953125, |
| "kl_loss_4": 2078.801409912109, |
| "kl_loss_9": 874.8622619628907, |
| "learning_rate": 0.0005836972452208654, |
| "loss": 1443.8111, |
| "step": 4520 |
| }, |
| { |
| "ce_loss_13": 3.2246150970458984, |
| "ce_loss_17": 3.127169352769852, |
| "ce_loss_2": 4.412639141082764, |
| "ce_loss_4": 4.1222248554229735, |
| "ce_loss_9": 3.4979526519775392, |
| "epoch": 0.453, |
| "grad_norm": 904.0, |
| "kl_loss_13": 247.92935333251953, |
| "kl_loss_2": 2664.356823730469, |
| "kl_loss_4": 2119.691278076172, |
| "kl_loss_9": 885.7821105957031, |
| "learning_rate": 0.0005821325512950885, |
| "loss": 1481.7197, |
| "step": 4530 |
| }, |
| { |
| "ce_loss_13": 3.246385562419891, |
| "ce_loss_17": 3.1497872352600096, |
| "ce_loss_2": 4.414893829822541, |
| "ce_loss_4": 4.121063005924225, |
| "ce_loss_9": 3.5210866928100586, |
| "epoch": 0.454, |
| "grad_norm": 928.0, |
| "kl_loss_13": 238.96555404663087, |
| "kl_loss_2": 2594.190087890625, |
| "kl_loss_4": 2049.1946655273437, |
| "kl_loss_9": 856.9666076660156, |
| "learning_rate": 0.0005805670302954321, |
| "loss": 1458.1268, |
| "step": 4540 |
| }, |
| { |
| "ce_loss_13": 3.2551254987716676, |
| "ce_loss_17": 3.156969141960144, |
| "ce_loss_2": 4.412191867828369, |
| "ce_loss_4": 4.1229653596878055, |
| "ce_loss_9": 3.5245905756950378, |
| "epoch": 0.455, |
| "grad_norm": 1200.0, |
| "kl_loss_13": 238.9917335510254, |
| "kl_loss_2": 2610.4751831054687, |
| "kl_loss_4": 2068.6161376953123, |
| "kl_loss_9": 873.8004486083985, |
| "learning_rate": 0.000579000697986675, |
| "loss": 1437.5031, |
| "step": 4550 |
| }, |
| { |
| "ce_loss_13": 3.214897334575653, |
| "ce_loss_17": 3.110537755489349, |
| "ce_loss_2": 4.426256227493286, |
| "ce_loss_4": 4.127910017967224, |
| "ce_loss_9": 3.507897675037384, |
| "epoch": 0.456, |
| "grad_norm": 876.0, |
| "kl_loss_13": 252.3795295715332, |
| "kl_loss_2": 2724.5778930664064, |
| "kl_loss_4": 2163.3832946777343, |
| "kl_loss_9": 912.0921417236328, |
| "learning_rate": 0.0005774335701417662, |
| "loss": 1480.0213, |
| "step": 4560 |
| }, |
| { |
| "ce_loss_13": 3.2058955311775206, |
| "ce_loss_17": 3.10800119638443, |
| "ce_loss_2": 4.4174775838851925, |
| "ce_loss_4": 4.123294258117676, |
| "ce_loss_9": 3.4903197050094605, |
| "epoch": 0.457, |
| "grad_norm": 888.0, |
| "kl_loss_13": 242.82202224731446, |
| "kl_loss_2": 2726.372521972656, |
| "kl_loss_4": 2165.899053955078, |
| "kl_loss_9": 888.9030212402344, |
| "learning_rate": 0.0005758656625416658, |
| "loss": 1483.9846, |
| "step": 4570 |
| }, |
| { |
| "ce_loss_13": 3.2643279910087584, |
| "ce_loss_17": 3.158558702468872, |
| "ce_loss_2": 4.43040611743927, |
| "ce_loss_4": 4.139791047573089, |
| "ce_loss_9": 3.534331130981445, |
| "epoch": 0.458, |
| "grad_norm": 860.0, |
| "kl_loss_13": 249.05729598999022, |
| "kl_loss_2": 2628.913671875, |
| "kl_loss_4": 2082.1982238769533, |
| "kl_loss_9": 877.5784484863282, |
| "learning_rate": 0.0005742969909751859, |
| "loss": 1439.8111, |
| "step": 4580 |
| }, |
| { |
| "ce_loss_13": 3.271359217166901, |
| "ce_loss_17": 3.1717960596084596, |
| "ce_loss_2": 4.465836477279663, |
| "ce_loss_4": 4.1649953603744505, |
| "ce_loss_9": 3.5440791487693786, |
| "epoch": 0.459, |
| "grad_norm": 888.0, |
| "kl_loss_13": 246.49940490722656, |
| "kl_loss_2": 2696.7085815429687, |
| "kl_loss_4": 2124.31787109375, |
| "kl_loss_9": 878.0618438720703, |
| "learning_rate": 0.0005727275712388318, |
| "loss": 1487.7734, |
| "step": 4590 |
| }, |
| { |
| "ce_loss_13": 3.292631280422211, |
| "ce_loss_17": 3.199222040176392, |
| "ce_loss_2": 4.438827574253082, |
| "ce_loss_4": 4.14014880657196, |
| "ce_loss_9": 3.5531943082809447, |
| "epoch": 0.46, |
| "grad_norm": 1012.0, |
| "kl_loss_13": 238.26314697265624, |
| "kl_loss_2": 2587.5807250976563, |
| "kl_loss_4": 2030.396795654297, |
| "kl_loss_9": 847.1401580810547, |
| "learning_rate": 0.0005711574191366427, |
| "loss": 1446.0883, |
| "step": 4600 |
| }, |
| { |
| "ce_loss_13": 3.2462383985519407, |
| "ce_loss_17": 3.15254967212677, |
| "ce_loss_2": 4.409518206119538, |
| "ce_loss_4": 4.115434217453003, |
| "ce_loss_9": 3.5157724380493165, |
| "epoch": 0.461, |
| "grad_norm": 960.0, |
| "kl_loss_13": 241.7682014465332, |
| "kl_loss_2": 2628.2000122070312, |
| "kl_loss_4": 2072.7570373535154, |
| "kl_loss_9": 867.3012725830079, |
| "learning_rate": 0.0005695865504800327, |
| "loss": 1449.4152, |
| "step": 4610 |
| }, |
| { |
| "ce_loss_13": 3.1912040233612062, |
| "ce_loss_17": 3.082474875450134, |
| "ce_loss_2": 4.451409220695496, |
| "ce_loss_4": 4.150456476211548, |
| "ce_loss_9": 3.4877715706825256, |
| "epoch": 0.462, |
| "grad_norm": 888.0, |
| "kl_loss_13": 257.0031867980957, |
| "kl_loss_2": 2807.7813110351562, |
| "kl_loss_4": 2252.2212158203124, |
| "kl_loss_9": 932.7056304931641, |
| "learning_rate": 0.0005680149810876322, |
| "loss": 1504.068, |
| "step": 4620 |
| }, |
| { |
| "ce_loss_13": 3.2387269616127012, |
| "ce_loss_17": 3.140761303901672, |
| "ce_loss_2": 4.430392503738403, |
| "ce_loss_4": 4.132601082324982, |
| "ce_loss_9": 3.5092081785202027, |
| "epoch": 0.463, |
| "grad_norm": 932.0, |
| "kl_loss_13": 241.06493453979493, |
| "kl_loss_2": 2656.53515625, |
| "kl_loss_4": 2107.958044433594, |
| "kl_loss_9": 861.8116119384765, |
| "learning_rate": 0.0005664427267851271, |
| "loss": 1461.0905, |
| "step": 4630 |
| }, |
| { |
| "ce_loss_13": 3.1637516140937807, |
| "ce_loss_17": 3.063162994384766, |
| "ce_loss_2": 4.346575164794922, |
| "ce_loss_4": 4.054389178752899, |
| "ce_loss_9": 3.4423089027404785, |
| "epoch": 0.464, |
| "grad_norm": 976.0, |
| "kl_loss_13": 242.30985794067382, |
| "kl_loss_2": 2655.5493774414062, |
| "kl_loss_4": 2102.231896972656, |
| "kl_loss_9": 868.5125274658203, |
| "learning_rate": 0.0005648698034051009, |
| "loss": 1456.1764, |
| "step": 4640 |
| }, |
| { |
| "ce_loss_13": 3.27357702255249, |
| "ce_loss_17": 3.1738630175590514, |
| "ce_loss_2": 4.479008650779724, |
| "ce_loss_4": 4.180851852893829, |
| "ce_loss_9": 3.5502917051315306, |
| "epoch": 0.465, |
| "grad_norm": 872.0, |
| "kl_loss_13": 242.08240203857423, |
| "kl_loss_2": 2697.251513671875, |
| "kl_loss_4": 2139.713934326172, |
| "kl_loss_9": 873.7429962158203, |
| "learning_rate": 0.0005632962267868747, |
| "loss": 1449.8048, |
| "step": 4650 |
| }, |
| { |
| "ce_loss_13": 3.209519600868225, |
| "ce_loss_17": 3.114519214630127, |
| "ce_loss_2": 4.37313836812973, |
| "ce_loss_4": 4.087342858314514, |
| "ce_loss_9": 3.4847055315971374, |
| "epoch": 0.466, |
| "grad_norm": 1020.0, |
| "kl_loss_13": 236.69105072021483, |
| "kl_loss_2": 2604.515380859375, |
| "kl_loss_4": 2071.4075561523437, |
| "kl_loss_9": 860.996826171875, |
| "learning_rate": 0.0005617220127763474, |
| "loss": 1459.733, |
| "step": 4660 |
| }, |
| { |
| "ce_loss_13": 3.2913490414619444, |
| "ce_loss_17": 3.1948550820350645, |
| "ce_loss_2": 4.443688607215881, |
| "ce_loss_4": 4.153319680690766, |
| "ce_loss_9": 3.5587194561958313, |
| "epoch": 0.467, |
| "grad_norm": 984.0, |
| "kl_loss_13": 242.51566543579102, |
| "kl_loss_2": 2593.6571533203123, |
| "kl_loss_4": 2046.8360961914063, |
| "kl_loss_9": 862.1263122558594, |
| "learning_rate": 0.0005601471772258368, |
| "loss": 1457.6073, |
| "step": 4670 |
| }, |
| { |
| "ce_loss_13": 3.279325079917908, |
| "ce_loss_17": 3.1844658613204957, |
| "ce_loss_2": 4.4286264181137085, |
| "ce_loss_4": 4.134890782833099, |
| "ce_loss_9": 3.545341122150421, |
| "epoch": 0.468, |
| "grad_norm": 820.0, |
| "kl_loss_13": 240.5227828979492, |
| "kl_loss_2": 2573.2077758789064, |
| "kl_loss_4": 2025.697283935547, |
| "kl_loss_9": 845.3602294921875, |
| "learning_rate": 0.0005585717359939192, |
| "loss": 1457.2997, |
| "step": 4680 |
| }, |
| { |
| "ce_loss_13": 3.1878591895103456, |
| "ce_loss_17": 3.0894285798072816, |
| "ce_loss_2": 4.350538182258606, |
| "ce_loss_4": 4.055094122886658, |
| "ce_loss_9": 3.4581305503845217, |
| "epoch": 0.469, |
| "grad_norm": 1176.0, |
| "kl_loss_13": 241.09145126342773, |
| "kl_loss_2": 2594.5868896484376, |
| "kl_loss_4": 2046.383935546875, |
| "kl_loss_9": 860.8360290527344, |
| "learning_rate": 0.0005569957049452703, |
| "loss": 1477.0623, |
| "step": 4690 |
| }, |
| { |
| "ce_loss_13": 3.24889053106308, |
| "ce_loss_17": 3.1471008181571962, |
| "ce_loss_2": 4.4327600479125975, |
| "ce_loss_4": 4.140732765197754, |
| "ce_loss_9": 3.5251524448394775, |
| "epoch": 0.47, |
| "grad_norm": 968.0, |
| "kl_loss_13": 248.6085075378418, |
| "kl_loss_2": 2670.2482788085936, |
| "kl_loss_4": 2113.751745605469, |
| "kl_loss_9": 880.6789093017578, |
| "learning_rate": 0.0005554190999505056, |
| "loss": 1479.4576, |
| "step": 4700 |
| }, |
| { |
| "ce_loss_13": 3.3635989308357237, |
| "ce_loss_17": 3.261705422401428, |
| "ce_loss_2": 4.534376430511474, |
| "ce_loss_4": 4.240249371528625, |
| "ce_loss_9": 3.639077877998352, |
| "epoch": 0.471, |
| "grad_norm": 884.0, |
| "kl_loss_13": 255.55682144165038, |
| "kl_loss_2": 2659.8042602539062, |
| "kl_loss_4": 2107.002813720703, |
| "kl_loss_9": 887.0404846191407, |
| "learning_rate": 0.0005538419368860196, |
| "loss": 1426.4711, |
| "step": 4710 |
| }, |
| { |
| "ce_loss_13": 3.298137605190277, |
| "ce_loss_17": 3.184324860572815, |
| "ce_loss_2": 4.444453656673431, |
| "ce_loss_4": 4.150001633167267, |
| "ce_loss_9": 3.5541068077087403, |
| "epoch": 0.472, |
| "grad_norm": 816.0, |
| "kl_loss_13": 263.3406044006348, |
| "kl_loss_2": 2624.729248046875, |
| "kl_loss_4": 2068.8202270507813, |
| "kl_loss_9": 867.560171508789, |
| "learning_rate": 0.0005522642316338268, |
| "loss": 1485.7726, |
| "step": 4720 |
| }, |
| { |
| "ce_loss_13": 3.295651078224182, |
| "ce_loss_17": 3.1997266888618467, |
| "ce_loss_2": 4.445337700843811, |
| "ce_loss_4": 4.166255760192871, |
| "ce_loss_9": 3.561964285373688, |
| "epoch": 0.473, |
| "grad_norm": 996.0, |
| "kl_loss_13": 256.49093475341795, |
| "kl_loss_2": 2603.633349609375, |
| "kl_loss_4": 2076.9752685546873, |
| "kl_loss_9": 860.5860107421875, |
| "learning_rate": 0.0005506860000814017, |
| "loss": 1489.0584, |
| "step": 4730 |
| }, |
| { |
| "ce_loss_13": 3.323046934604645, |
| "ce_loss_17": 3.224143898487091, |
| "ce_loss_2": 4.439942741394043, |
| "ce_loss_4": 4.1534304022789, |
| "ce_loss_9": 3.5785869002342223, |
| "epoch": 0.474, |
| "grad_norm": 924.0, |
| "kl_loss_13": 242.021492767334, |
| "kl_loss_2": 2548.1635986328124, |
| "kl_loss_4": 2009.906396484375, |
| "kl_loss_9": 845.0722259521484, |
| "learning_rate": 0.0005491072581215186, |
| "loss": 1443.7427, |
| "step": 4740 |
| }, |
| { |
| "ce_loss_13": 3.3203973531723023, |
| "ce_loss_17": 3.216915714740753, |
| "ce_loss_2": 4.467496109008789, |
| "ce_loss_4": 4.1805973768234255, |
| "ce_loss_9": 3.587935519218445, |
| "epoch": 0.475, |
| "grad_norm": 1056.0, |
| "kl_loss_13": 250.21990051269532, |
| "kl_loss_2": 2637.1995361328127, |
| "kl_loss_4": 2086.691931152344, |
| "kl_loss_9": 879.4140747070312, |
| "learning_rate": 0.0005475280216520913, |
| "loss": 1432.984, |
| "step": 4750 |
| }, |
| { |
| "ce_loss_13": 3.2384687662124634, |
| "ce_loss_17": 3.1443339347839356, |
| "ce_loss_2": 4.38439908027649, |
| "ce_loss_4": 4.094705259799957, |
| "ce_loss_9": 3.5072630763053896, |
| "epoch": 0.476, |
| "grad_norm": 948.0, |
| "kl_loss_13": 238.86000366210936, |
| "kl_loss_2": 2571.2765380859373, |
| "kl_loss_4": 2025.8844909667969, |
| "kl_loss_9": 850.1655731201172, |
| "learning_rate": 0.0005459483065760138, |
| "loss": 1468.6286, |
| "step": 4760 |
| }, |
| { |
| "ce_loss_13": 3.183466649055481, |
| "ce_loss_17": 3.08471519947052, |
| "ce_loss_2": 4.40746123790741, |
| "ce_loss_4": 4.111117601394653, |
| "ce_loss_9": 3.470612585544586, |
| "epoch": 0.477, |
| "grad_norm": 1012.0, |
| "kl_loss_13": 243.05873184204103, |
| "kl_loss_2": 2743.385595703125, |
| "kl_loss_4": 2186.896203613281, |
| "kl_loss_9": 896.5440216064453, |
| "learning_rate": 0.0005443681288009991, |
| "loss": 1479.9271, |
| "step": 4770 |
| }, |
| { |
| "ce_loss_13": 3.232217800617218, |
| "ce_loss_17": 3.1330276131629944, |
| "ce_loss_2": 4.409432864189148, |
| "ce_loss_4": 4.114718520641327, |
| "ce_loss_9": 3.506826901435852, |
| "epoch": 0.478, |
| "grad_norm": 1088.0, |
| "kl_loss_13": 240.03599319458007, |
| "kl_loss_2": 2659.6222412109373, |
| "kl_loss_4": 2100.3444274902345, |
| "kl_loss_9": 866.5295532226562, |
| "learning_rate": 0.0005427875042394199, |
| "loss": 1469.6061, |
| "step": 4780 |
| }, |
| { |
| "ce_loss_13": 3.270896017551422, |
| "ce_loss_17": 3.168141782283783, |
| "ce_loss_2": 4.419343400001526, |
| "ce_loss_4": 4.1240739822387695, |
| "ce_loss_9": 3.5369176268577576, |
| "epoch": 0.479, |
| "grad_norm": 868.0, |
| "kl_loss_13": 246.61552047729492, |
| "kl_loss_2": 2591.948449707031, |
| "kl_loss_4": 2045.0651611328126, |
| "kl_loss_9": 866.8050689697266, |
| "learning_rate": 0.0005412064488081482, |
| "loss": 1466.9504, |
| "step": 4790 |
| }, |
| { |
| "ce_loss_13": 3.2598042249679566, |
| "ce_loss_17": 3.1666521310806273, |
| "ce_loss_2": 4.414253401756286, |
| "ce_loss_4": 4.116017127037049, |
| "ce_loss_9": 3.5245125889778137, |
| "epoch": 0.48, |
| "grad_norm": 932.0, |
| "kl_loss_13": 238.16119842529298, |
| "kl_loss_2": 2589.067431640625, |
| "kl_loss_4": 2036.2339416503905, |
| "kl_loss_9": 845.9292144775391, |
| "learning_rate": 0.0005396249784283942, |
| "loss": 1434.757, |
| "step": 4800 |
| }, |
| { |
| "ce_loss_13": 3.2845727801322937, |
| "ce_loss_17": 3.1830533266067507, |
| "ce_loss_2": 4.485634207725525, |
| "ce_loss_4": 4.1873292326927185, |
| "ce_loss_9": 3.5634528160095216, |
| "epoch": 0.481, |
| "grad_norm": 968.0, |
| "kl_loss_13": 246.77734298706054, |
| "kl_loss_2": 2699.9413208007813, |
| "kl_loss_4": 2131.593865966797, |
| "kl_loss_9": 887.4481536865235, |
| "learning_rate": 0.0005380431090255476, |
| "loss": 1473.4531, |
| "step": 4810 |
| }, |
| { |
| "ce_loss_13": 3.2814700961112977, |
| "ce_loss_17": 3.193215215206146, |
| "ce_loss_2": 4.4245933294296265, |
| "ce_loss_4": 4.132190155982971, |
| "ce_loss_9": 3.5425297617912292, |
| "epoch": 0.482, |
| "grad_norm": 1184.0, |
| "kl_loss_13": 231.40986251831055, |
| "kl_loss_2": 2578.60458984375, |
| "kl_loss_4": 2040.8356628417969, |
| "kl_loss_9": 836.1674194335938, |
| "learning_rate": 0.0005364608565290155, |
| "loss": 1423.5715, |
| "step": 4820 |
| }, |
| { |
| "ce_loss_13": 3.289795529842377, |
| "ce_loss_17": 3.191315495967865, |
| "ce_loss_2": 4.4568726301193236, |
| "ce_loss_4": 4.1591292977333065, |
| "ce_loss_9": 3.5526525378227234, |
| "epoch": 0.483, |
| "grad_norm": 872.0, |
| "kl_loss_13": 241.10177307128907, |
| "kl_loss_2": 2639.099560546875, |
| "kl_loss_4": 2086.164587402344, |
| "kl_loss_9": 860.5158966064453, |
| "learning_rate": 0.0005348782368720626, |
| "loss": 1454.6872, |
| "step": 4830 |
| }, |
| { |
| "ce_loss_13": 3.2221515774726868, |
| "ce_loss_17": 3.125793623924255, |
| "ce_loss_2": 4.382792353630066, |
| "ce_loss_4": 4.09155660867691, |
| "ce_loss_9": 3.492680823802948, |
| "epoch": 0.484, |
| "grad_norm": 772.0, |
| "kl_loss_13": 236.99618377685547, |
| "kl_loss_2": 2593.3800537109373, |
| "kl_loss_4": 2039.3712097167968, |
| "kl_loss_9": 842.8997680664063, |
| "learning_rate": 0.000533295265991652, |
| "loss": 1451.5818, |
| "step": 4840 |
| }, |
| { |
| "ce_loss_13": 3.2917858004570006, |
| "ce_loss_17": 3.193897032737732, |
| "ce_loss_2": 4.424165105819702, |
| "ce_loss_4": 4.141590797901154, |
| "ce_loss_9": 3.557798135280609, |
| "epoch": 0.485, |
| "grad_norm": 884.0, |
| "kl_loss_13": 240.38711700439453, |
| "kl_loss_2": 2543.8928466796874, |
| "kl_loss_4": 2021.5502502441407, |
| "kl_loss_9": 846.9335510253907, |
| "learning_rate": 0.0005317119598282822, |
| "loss": 1419.6698, |
| "step": 4850 |
| }, |
| { |
| "ce_loss_13": 3.294071066379547, |
| "ce_loss_17": 3.195480978488922, |
| "ce_loss_2": 4.44334431886673, |
| "ce_loss_4": 4.1645449042320255, |
| "ce_loss_9": 3.562126672267914, |
| "epoch": 0.486, |
| "grad_norm": 856.0, |
| "kl_loss_13": 240.23355255126953, |
| "kl_loss_2": 2578.2186889648438, |
| "kl_loss_4": 2052.1957824707033, |
| "kl_loss_9": 857.8381500244141, |
| "learning_rate": 0.0005301283343258293, |
| "loss": 1436.1154, |
| "step": 4860 |
| }, |
| { |
| "ce_loss_13": 3.3502646565437315, |
| "ce_loss_17": 3.253378117084503, |
| "ce_loss_2": 4.473665189743042, |
| "ce_loss_4": 4.19256546497345, |
| "ce_loss_9": 3.613093137741089, |
| "epoch": 0.487, |
| "grad_norm": 876.0, |
| "kl_loss_13": 238.80475616455078, |
| "kl_loss_2": 2553.577917480469, |
| "kl_loss_4": 2017.983416748047, |
| "kl_loss_9": 851.2505432128906, |
| "learning_rate": 0.000528544405431384, |
| "loss": 1417.274, |
| "step": 4870 |
| }, |
| { |
| "ce_loss_13": 3.238815200328827, |
| "ce_loss_17": 3.1343557596206666, |
| "ce_loss_2": 4.403094947338104, |
| "ce_loss_4": 4.1182872414588925, |
| "ce_loss_9": 3.5149595499038697, |
| "epoch": 0.488, |
| "grad_norm": 1000.0, |
| "kl_loss_13": 247.85373153686524, |
| "kl_loss_2": 2661.6438110351564, |
| "kl_loss_4": 2111.142120361328, |
| "kl_loss_9": 887.7988708496093, |
| "learning_rate": 0.000526960189095093, |
| "loss": 1472.446, |
| "step": 4880 |
| }, |
| { |
| "ce_loss_13": 3.219186842441559, |
| "ce_loss_17": 3.122846579551697, |
| "ce_loss_2": 4.383413958549499, |
| "ce_loss_4": 4.097567927837372, |
| "ce_loss_9": 3.4923867702484133, |
| "epoch": 0.489, |
| "grad_norm": 864.0, |
| "kl_loss_13": 237.92819519042968, |
| "kl_loss_2": 2596.8593994140624, |
| "kl_loss_4": 2055.7096252441406, |
| "kl_loss_9": 847.8617401123047, |
| "learning_rate": 0.0005253757012699972, |
| "loss": 1437.4262, |
| "step": 4890 |
| }, |
| { |
| "ce_loss_13": 3.29185836315155, |
| "ce_loss_17": 3.1962204456329344, |
| "ce_loss_2": 4.444211626052857, |
| "ce_loss_4": 4.151696789264679, |
| "ce_loss_9": 3.5513964891433716, |
| "epoch": 0.49, |
| "grad_norm": 852.0, |
| "kl_loss_13": 238.34862365722657, |
| "kl_loss_2": 2579.6810546875, |
| "kl_loss_4": 2029.3398315429688, |
| "kl_loss_9": 845.1798248291016, |
| "learning_rate": 0.0005237909579118712, |
| "loss": 1450.6535, |
| "step": 4900 |
| }, |
| { |
| "ce_loss_13": 3.263746476173401, |
| "ce_loss_17": 3.1618027091026306, |
| "ce_loss_2": 4.450617575645447, |
| "ce_loss_4": 4.161546063423157, |
| "ce_loss_9": 3.541788303852081, |
| "epoch": 0.491, |
| "grad_norm": 1048.0, |
| "kl_loss_13": 247.26928482055663, |
| "kl_loss_2": 2666.4929321289064, |
| "kl_loss_4": 2120.8137145996093, |
| "kl_loss_9": 881.1005310058594, |
| "learning_rate": 0.0005222059749790631, |
| "loss": 1471.1471, |
| "step": 4910 |
| }, |
| { |
| "ce_loss_13": 3.3210319757461546, |
| "ce_loss_17": 3.225182604789734, |
| "ce_loss_2": 4.433528733253479, |
| "ce_loss_4": 4.147412490844727, |
| "ce_loss_9": 3.5761449575424193, |
| "epoch": 0.492, |
| "grad_norm": 824.0, |
| "kl_loss_13": 236.25469055175782, |
| "kl_loss_2": 2520.1626586914062, |
| "kl_loss_4": 1986.6041870117188, |
| "kl_loss_9": 835.0551452636719, |
| "learning_rate": 0.0005206207684323337, |
| "loss": 1395.8805, |
| "step": 4920 |
| }, |
| { |
| "ce_loss_13": 3.301472795009613, |
| "ce_loss_17": 3.203929030895233, |
| "ce_loss_2": 4.4502195596694945, |
| "ce_loss_4": 4.15687175989151, |
| "ce_loss_9": 3.5700029611587523, |
| "epoch": 0.493, |
| "grad_norm": 912.0, |
| "kl_loss_13": 241.81357421875, |
| "kl_loss_2": 2588.5178955078127, |
| "kl_loss_4": 2049.602673339844, |
| "kl_loss_9": 855.5626373291016, |
| "learning_rate": 0.000519035354234695, |
| "loss": 1463.6256, |
| "step": 4930 |
| }, |
| { |
| "ce_loss_13": 3.286547601222992, |
| "ce_loss_17": 3.1806051254272463, |
| "ce_loss_2": 4.435503339767456, |
| "ce_loss_4": 4.140270352363586, |
| "ce_loss_9": 3.5548422574996947, |
| "epoch": 0.494, |
| "grad_norm": 956.0, |
| "kl_loss_13": 245.41824188232422, |
| "kl_loss_2": 2584.5112548828124, |
| "kl_loss_4": 2034.346044921875, |
| "kl_loss_9": 861.2739959716797, |
| "learning_rate": 0.0005174497483512506, |
| "loss": 1426.6146, |
| "step": 4940 |
| }, |
| { |
| "ce_loss_13": 3.321953308582306, |
| "ce_loss_17": 3.23127121925354, |
| "ce_loss_2": 4.459312295913696, |
| "ce_loss_4": 4.167674922943116, |
| "ce_loss_9": 3.5815550327301025, |
| "epoch": 0.495, |
| "grad_norm": 912.0, |
| "kl_loss_13": 235.25918197631836, |
| "kl_loss_2": 2577.7537353515627, |
| "kl_loss_4": 2032.7030029296875, |
| "kl_loss_9": 845.6281585693359, |
| "learning_rate": 0.0005158639667490339, |
| "loss": 1461.4805, |
| "step": 4950 |
| }, |
| { |
| "ce_loss_13": 3.22986695766449, |
| "ce_loss_17": 3.1316942811012267, |
| "ce_loss_2": 4.396744513511658, |
| "ce_loss_4": 4.107758033275604, |
| "ce_loss_9": 3.5002617597579957, |
| "epoch": 0.496, |
| "grad_norm": 980.0, |
| "kl_loss_13": 240.02241744995118, |
| "kl_loss_2": 2621.9751831054687, |
| "kl_loss_4": 2073.7432067871096, |
| "kl_loss_9": 866.0383880615234, |
| "learning_rate": 0.0005142780253968481, |
| "loss": 1449.6266, |
| "step": 4960 |
| }, |
| { |
| "ce_loss_13": 3.1805750370025634, |
| "ce_loss_17": 3.08574892282486, |
| "ce_loss_2": 4.326789712905883, |
| "ce_loss_4": 4.03907573223114, |
| "ce_loss_9": 3.4398080110549927, |
| "epoch": 0.497, |
| "grad_norm": 972.0, |
| "kl_loss_13": 231.72888107299804, |
| "kl_loss_2": 2571.455126953125, |
| "kl_loss_4": 2033.9963317871093, |
| "kl_loss_9": 830.5175415039063, |
| "learning_rate": 0.0005126919402651053, |
| "loss": 1399.7183, |
| "step": 4970 |
| }, |
| { |
| "ce_loss_13": 3.2543250203132628, |
| "ce_loss_17": 3.1528120279312133, |
| "ce_loss_2": 4.42558331489563, |
| "ce_loss_4": 4.135217249393463, |
| "ce_loss_9": 3.5347471714019774, |
| "epoch": 0.498, |
| "grad_norm": 892.0, |
| "kl_loss_13": 242.64834365844726, |
| "kl_loss_2": 2608.9553100585936, |
| "kl_loss_4": 2068.4495056152346, |
| "kl_loss_9": 865.389892578125, |
| "learning_rate": 0.0005111057273256647, |
| "loss": 1453.5617, |
| "step": 4980 |
| }, |
| { |
| "ce_loss_13": 3.345826256275177, |
| "ce_loss_17": 3.2567189216613768, |
| "ce_loss_2": 4.424938821792603, |
| "ce_loss_4": 4.140935170650482, |
| "ce_loss_9": 3.5866678953170776, |
| "epoch": 0.499, |
| "grad_norm": 996.0, |
| "kl_loss_13": 226.10517501831055, |
| "kl_loss_2": 2436.4197998046875, |
| "kl_loss_4": 1916.8664916992188, |
| "kl_loss_9": 802.2586853027344, |
| "learning_rate": 0.0005095194025516733, |
| "loss": 1376.7618, |
| "step": 4990 |
| }, |
| { |
| "ce_loss_13": 3.276675271987915, |
| "ce_loss_17": 3.1833918690681458, |
| "ce_loss_2": 4.406952524185181, |
| "ce_loss_4": 4.123424065113068, |
| "ce_loss_9": 3.5300026416778563, |
| "epoch": 0.5, |
| "grad_norm": 940.0, |
| "kl_loss_13": 230.78496932983398, |
| "kl_loss_2": 2547.992431640625, |
| "kl_loss_4": 2013.721453857422, |
| "kl_loss_9": 824.7653076171875, |
| "learning_rate": 0.000507932981917404, |
| "loss": 1458.0455, |
| "step": 5000 |
| }, |
| { |
| "ce_loss_13": 3.227001595497131, |
| "ce_loss_17": 3.1263611793518065, |
| "ce_loss_2": 4.428818273544311, |
| "ce_loss_4": 4.133854484558105, |
| "ce_loss_9": 3.508305788040161, |
| "epoch": 0.501, |
| "grad_norm": 956.0, |
| "kl_loss_13": 246.21904602050782, |
| "kl_loss_2": 2690.306262207031, |
| "kl_loss_4": 2135.8757934570312, |
| "kl_loss_9": 884.2395935058594, |
| "learning_rate": 0.0005063464813980949, |
| "loss": 1484.9651, |
| "step": 5010 |
| }, |
| { |
| "ce_loss_13": 3.2097026348114013, |
| "ce_loss_17": 3.117544937133789, |
| "ce_loss_2": 4.372553920745849, |
| "ce_loss_4": 4.083785223960876, |
| "ce_loss_9": 3.4720048546791076, |
| "epoch": 0.502, |
| "grad_norm": 904.0, |
| "kl_loss_13": 234.7597915649414, |
| "kl_loss_2": 2619.0628173828127, |
| "kl_loss_4": 2080.6287170410155, |
| "kl_loss_9": 859.3595123291016, |
| "learning_rate": 0.0005047599169697884, |
| "loss": 1435.5339, |
| "step": 5020 |
| }, |
| { |
| "ce_loss_13": 3.1530491590499876, |
| "ce_loss_17": 3.0544331192970278, |
| "ce_loss_2": 4.330958092212677, |
| "ce_loss_4": 4.043696069717408, |
| "ce_loss_9": 3.426003265380859, |
| "epoch": 0.503, |
| "grad_norm": 1104.0, |
| "kl_loss_13": 235.11533889770507, |
| "kl_loss_2": 2620.079296875, |
| "kl_loss_4": 2080.308935546875, |
| "kl_loss_9": 849.3477172851562, |
| "learning_rate": 0.000503173304609171, |
| "loss": 1410.7125, |
| "step": 5030 |
| }, |
| { |
| "ce_loss_13": 3.2696502208709717, |
| "ce_loss_17": 3.1747093081474302, |
| "ce_loss_2": 4.4174240827560425, |
| "ce_loss_4": 4.133626675605774, |
| "ce_loss_9": 3.542594039440155, |
| "epoch": 0.504, |
| "grad_norm": 936.0, |
| "kl_loss_13": 235.67844467163087, |
| "kl_loss_2": 2577.290148925781, |
| "kl_loss_4": 2042.3094116210937, |
| "kl_loss_9": 853.2701599121094, |
| "learning_rate": 0.0005015866602934111, |
| "loss": 1414.3605, |
| "step": 5040 |
| }, |
| { |
| "ce_loss_13": 3.2455977320671083, |
| "ce_loss_17": 3.145504105091095, |
| "ce_loss_2": 4.4242793917655945, |
| "ce_loss_4": 4.13698718547821, |
| "ce_loss_9": 3.5282525777816773, |
| "epoch": 0.505, |
| "grad_norm": 848.0, |
| "kl_loss_13": 246.58940048217772, |
| "kl_loss_2": 2655.48291015625, |
| "kl_loss_4": 2103.589318847656, |
| "kl_loss_9": 893.3353790283203, |
| "learning_rate": 0.0005, |
| "loss": 1452.2792, |
| "step": 5050 |
| }, |
| { |
| "ce_loss_13": 3.2372453212738037, |
| "ce_loss_17": 3.143076753616333, |
| "ce_loss_2": 4.386533856391907, |
| "ce_loss_4": 4.101909339427948, |
| "ce_loss_9": 3.506837558746338, |
| "epoch": 0.506, |
| "grad_norm": 1024.0, |
| "kl_loss_13": 239.5323829650879, |
| "kl_loss_2": 2589.192907714844, |
| "kl_loss_4": 2055.511614990234, |
| "kl_loss_9": 859.9503234863281, |
| "learning_rate": 0.0004984133397065889, |
| "loss": 1415.6783, |
| "step": 5060 |
| }, |
| { |
| "ce_loss_13": 3.245497965812683, |
| "ce_loss_17": 3.1446333050727846, |
| "ce_loss_2": 4.426615655422211, |
| "ce_loss_4": 4.133918762207031, |
| "ce_loss_9": 3.5249499678611755, |
| "epoch": 0.507, |
| "grad_norm": 876.0, |
| "kl_loss_13": 239.8615364074707, |
| "kl_loss_2": 2624.0885498046873, |
| "kl_loss_4": 2074.615850830078, |
| "kl_loss_9": 863.5698425292969, |
| "learning_rate": 0.0004968266953908291, |
| "loss": 1423.1771, |
| "step": 5070 |
| }, |
| { |
| "ce_loss_13": 3.2790528893470765, |
| "ce_loss_17": 3.1869851469993593, |
| "ce_loss_2": 4.449378228187561, |
| "ce_loss_4": 4.151314640045166, |
| "ce_loss_9": 3.543865203857422, |
| "epoch": 0.508, |
| "grad_norm": 944.0, |
| "kl_loss_13": 232.71310272216797, |
| "kl_loss_2": 2621.4929077148436, |
| "kl_loss_4": 2075.585577392578, |
| "kl_loss_9": 853.6735382080078, |
| "learning_rate": 0.0004952400830302117, |
| "loss": 1436.4579, |
| "step": 5080 |
| }, |
| { |
| "ce_loss_13": 3.2073542714118957, |
| "ce_loss_17": 3.109194242954254, |
| "ce_loss_2": 4.3964375257492065, |
| "ce_loss_4": 4.102424001693725, |
| "ce_loss_9": 3.4876574635505677, |
| "epoch": 0.509, |
| "grad_norm": 932.0, |
| "kl_loss_13": 241.81171798706055, |
| "kl_loss_2": 2655.2519409179686, |
| "kl_loss_4": 2103.470178222656, |
| "kl_loss_9": 872.4802947998047, |
| "learning_rate": 0.0004936535186019053, |
| "loss": 1440.1688, |
| "step": 5090 |
| }, |
| { |
| "ce_loss_13": 3.304133379459381, |
| "ce_loss_17": 3.2125471115112303, |
| "ce_loss_2": 4.425479125976563, |
| "ce_loss_4": 4.146884608268738, |
| "ce_loss_9": 3.5583510994911194, |
| "epoch": 0.51, |
| "grad_norm": 868.0, |
| "kl_loss_13": 228.76439056396484, |
| "kl_loss_2": 2507.0686279296874, |
| "kl_loss_4": 1979.644189453125, |
| "kl_loss_9": 812.3001708984375, |
| "learning_rate": 0.000492067018082596, |
| "loss": 1409.3196, |
| "step": 5100 |
| }, |
| { |
| "ce_loss_13": 3.2472472906112673, |
| "ce_loss_17": 3.1495702028274537, |
| "ce_loss_2": 4.4543510437011715, |
| "ce_loss_4": 4.162413489818573, |
| "ce_loss_9": 3.526298940181732, |
| "epoch": 0.511, |
| "grad_norm": 1096.0, |
| "kl_loss_13": 242.95312118530273, |
| "kl_loss_2": 2697.7456298828124, |
| "kl_loss_4": 2149.5037475585937, |
| "kl_loss_9": 883.1267456054687, |
| "learning_rate": 0.0004904805974483267, |
| "loss": 1494.2959, |
| "step": 5110 |
| }, |
| { |
| "ce_loss_13": 3.3556219696998597, |
| "ce_loss_17": 3.255929160118103, |
| "ce_loss_2": 4.532062077522278, |
| "ce_loss_4": 4.2496236085891725, |
| "ce_loss_9": 3.643121039867401, |
| "epoch": 0.512, |
| "grad_norm": 932.0, |
| "kl_loss_13": 252.16768798828124, |
| "kl_loss_2": 2673.134423828125, |
| "kl_loss_4": 2127.1182861328125, |
| "kl_loss_9": 907.9591217041016, |
| "learning_rate": 0.0004888942726743353, |
| "loss": 1510.2235, |
| "step": 5120 |
| }, |
| { |
| "ce_loss_13": 3.230607581138611, |
| "ce_loss_17": 3.1330650806427003, |
| "ce_loss_2": 4.398468375205994, |
| "ce_loss_4": 4.109967064857483, |
| "ce_loss_9": 3.5019327640533446, |
| "epoch": 0.513, |
| "grad_norm": 1012.0, |
| "kl_loss_13": 242.79933395385743, |
| "kl_loss_2": 2641.539416503906, |
| "kl_loss_4": 2095.720983886719, |
| "kl_loss_9": 871.6864196777344, |
| "learning_rate": 0.0004873080597348947, |
| "loss": 1462.0812, |
| "step": 5130 |
| }, |
| { |
| "ce_loss_13": 3.1205553293228148, |
| "ce_loss_17": 3.021705949306488, |
| "ce_loss_2": 4.3372450232505795, |
| "ce_loss_4": 4.0543635249137875, |
| "ce_loss_9": 3.407736039161682, |
| "epoch": 0.514, |
| "grad_norm": 932.0, |
| "kl_loss_13": 238.0709213256836, |
| "kl_loss_2": 2727.278076171875, |
| "kl_loss_4": 2196.502264404297, |
| "kl_loss_9": 890.4388610839844, |
| "learning_rate": 0.0004857219746031519, |
| "loss": 1473.211, |
| "step": 5140 |
| }, |
| { |
| "ce_loss_13": 3.2836833596229553, |
| "ce_loss_17": 3.1918662071228026, |
| "ce_loss_2": 4.433004128932953, |
| "ce_loss_4": 4.1365177750587465, |
| "ce_loss_9": 3.5420670032501222, |
| "epoch": 0.515, |
| "grad_norm": 908.0, |
| "kl_loss_13": 238.53227844238282, |
| "kl_loss_2": 2574.837512207031, |
| "kl_loss_4": 2027.2145141601563, |
| "kl_loss_9": 845.2674926757812, |
| "learning_rate": 0.0004841360332509663, |
| "loss": 1438.6002, |
| "step": 5150 |
| }, |
| { |
| "ce_loss_13": 3.2391231775283815, |
| "ce_loss_17": 3.1462583899497987, |
| "ce_loss_2": 4.387050712108612, |
| "ce_loss_4": 4.098397672176361, |
| "ce_loss_9": 3.497805321216583, |
| "epoch": 0.516, |
| "grad_norm": 872.0, |
| "kl_loss_13": 232.6981399536133, |
| "kl_loss_2": 2570.5610595703124, |
| "kl_loss_4": 2030.766796875, |
| "kl_loss_9": 832.0125366210938, |
| "learning_rate": 0.0004825502516487497, |
| "loss": 1385.7259, |
| "step": 5160 |
| }, |
| { |
| "ce_loss_13": 3.202157497406006, |
| "ce_loss_17": 3.108566415309906, |
| "ce_loss_2": 4.3786345481872555, |
| "ce_loss_4": 4.087872278690338, |
| "ce_loss_9": 3.4742018818855285, |
| "epoch": 0.517, |
| "grad_norm": 912.0, |
| "kl_loss_13": 237.90475997924804, |
| "kl_loss_2": 2644.0026733398436, |
| "kl_loss_4": 2100.5954772949217, |
| "kl_loss_9": 866.8111968994141, |
| "learning_rate": 0.00048096464576530507, |
| "loss": 1460.8957, |
| "step": 5170 |
| }, |
| { |
| "ce_loss_13": 3.3097125053405763, |
| "ce_loss_17": 3.213398313522339, |
| "ce_loss_2": 4.409957647323608, |
| "ce_loss_4": 4.126878249645233, |
| "ce_loss_9": 3.5622095346450804, |
| "epoch": 0.518, |
| "grad_norm": 952.0, |
| "kl_loss_13": 235.99602737426758, |
| "kl_loss_2": 2503.4755615234376, |
| "kl_loss_4": 1975.0342224121093, |
| "kl_loss_9": 829.3927062988281, |
| "learning_rate": 0.00047937923156766646, |
| "loss": 1403.223, |
| "step": 5180 |
| }, |
| { |
| "ce_loss_13": 3.3514813661575316, |
| "ce_loss_17": 3.258249056339264, |
| "ce_loss_2": 4.452425241470337, |
| "ce_loss_4": 4.160135567188263, |
| "ce_loss_9": 3.5974743604660033, |
| "epoch": 0.519, |
| "grad_norm": 820.0, |
| "kl_loss_13": 230.52173767089843, |
| "kl_loss_2": 2502.7245483398438, |
| "kl_loss_4": 1968.4182495117188, |
| "kl_loss_9": 825.0557922363281, |
| "learning_rate": 0.00047779402502093696, |
| "loss": 1405.9625, |
| "step": 5190 |
| }, |
| { |
| "ce_loss_13": 3.3159550189971925, |
| "ce_loss_17": 3.2244293212890627, |
| "ce_loss_2": 4.439442348480225, |
| "ce_loss_4": 4.158687317371369, |
| "ce_loss_9": 3.5760621070861816, |
| "epoch": 0.52, |
| "grad_norm": 916.0, |
| "kl_loss_13": 234.95609817504882, |
| "kl_loss_2": 2543.1817138671877, |
| "kl_loss_4": 2013.0815979003905, |
| "kl_loss_9": 841.3401336669922, |
| "learning_rate": 0.0004762090420881289, |
| "loss": 1426.2323, |
| "step": 5200 |
| }, |
| { |
| "ce_loss_13": 3.2310401082038878, |
| "ce_loss_17": 3.143102526664734, |
| "ce_loss_2": 4.3658442735672, |
| "ce_loss_4": 4.078952825069427, |
| "ce_loss_9": 3.49340238571167, |
| "epoch": 0.521, |
| "grad_norm": 824.0, |
| "kl_loss_13": 233.0906898498535, |
| "kl_loss_2": 2547.193078613281, |
| "kl_loss_4": 2009.5703979492187, |
| "kl_loss_9": 835.896206665039, |
| "learning_rate": 0.00047462429873000296, |
| "loss": 1395.5598, |
| "step": 5210 |
| }, |
| { |
| "ce_loss_13": 3.3198703289031983, |
| "ce_loss_17": 3.2258225798606874, |
| "ce_loss_2": 4.4462830305099486, |
| "ce_loss_4": 4.158935689926148, |
| "ce_loss_9": 3.57533620595932, |
| "epoch": 0.522, |
| "grad_norm": 836.0, |
| "kl_loss_13": 236.85490493774415, |
| "kl_loss_2": 2561.7283447265627, |
| "kl_loss_4": 2020.519708251953, |
| "kl_loss_9": 831.8292388916016, |
| "learning_rate": 0.0004730398109049071, |
| "loss": 1407.2447, |
| "step": 5220 |
| }, |
| { |
| "ce_loss_13": 3.2472725987434385, |
| "ce_loss_17": 3.1479132175445557, |
| "ce_loss_2": 4.430426096916198, |
| "ce_loss_4": 4.146101117134094, |
| "ce_loss_9": 3.526295793056488, |
| "epoch": 0.523, |
| "grad_norm": 932.0, |
| "kl_loss_13": 244.75756149291993, |
| "kl_loss_2": 2654.38798828125, |
| "kl_loss_4": 2114.736853027344, |
| "kl_loss_9": 883.2355834960938, |
| "learning_rate": 0.000471455594568616, |
| "loss": 1443.6139, |
| "step": 5230 |
| }, |
| { |
| "ce_loss_13": 3.315943658351898, |
| "ce_loss_17": 3.2224785685539246, |
| "ce_loss_2": 4.430027461051941, |
| "ce_loss_4": 4.143380248546601, |
| "ce_loss_9": 3.569036054611206, |
| "epoch": 0.524, |
| "grad_norm": 992.0, |
| "kl_loss_13": 235.09022521972656, |
| "kl_loss_2": 2522.860241699219, |
| "kl_loss_4": 1978.1872314453126, |
| "kl_loss_9": 827.0627166748047, |
| "learning_rate": 0.00046987166567417086, |
| "loss": 1420.9344, |
| "step": 5240 |
| }, |
| { |
| "ce_loss_13": 3.2369293093681337, |
| "ce_loss_17": 3.146747362613678, |
| "ce_loss_2": 4.396955442428589, |
| "ce_loss_4": 4.106416141986847, |
| "ce_loss_9": 3.5002312660217285, |
| "epoch": 0.525, |
| "grad_norm": 932.0, |
| "kl_loss_13": 234.92754592895508, |
| "kl_loss_2": 2593.167028808594, |
| "kl_loss_4": 2044.6620056152344, |
| "kl_loss_9": 842.1581970214844, |
| "learning_rate": 0.00046828804017171776, |
| "loss": 1392.3518, |
| "step": 5250 |
| }, |
| { |
| "ce_loss_13": 3.284357285499573, |
| "ce_loss_17": 3.1804961442947386, |
| "ce_loss_2": 4.468084740638733, |
| "ce_loss_4": 4.174921703338623, |
| "ce_loss_9": 3.560269320011139, |
| "epoch": 0.526, |
| "grad_norm": 1168.0, |
| "kl_loss_13": 240.1732093811035, |
| "kl_loss_2": 2630.087890625, |
| "kl_loss_4": 2079.335302734375, |
| "kl_loss_9": 861.2389770507813, |
| "learning_rate": 0.00046670473400834805, |
| "loss": 1458.5312, |
| "step": 5260 |
| }, |
| { |
| "ce_loss_13": 3.2199405431747437, |
| "ce_loss_17": 3.125663161277771, |
| "ce_loss_2": 4.352294254302978, |
| "ce_loss_4": 4.063109636306763, |
| "ce_loss_9": 3.4701417565345762, |
| "epoch": 0.527, |
| "grad_norm": 864.0, |
| "kl_loss_13": 230.97345123291015, |
| "kl_loss_2": 2543.17138671875, |
| "kl_loss_4": 2003.5989868164063, |
| "kl_loss_9": 825.7855102539063, |
| "learning_rate": 0.00046512176312793734, |
| "loss": 1452.7487, |
| "step": 5270 |
| }, |
| { |
| "ce_loss_13": 3.210582745075226, |
| "ce_loss_17": 3.114614963531494, |
| "ce_loss_2": 4.365846920013428, |
| "ce_loss_4": 4.07510724067688, |
| "ce_loss_9": 3.4768617153167725, |
| "epoch": 0.528, |
| "grad_norm": 816.0, |
| "kl_loss_13": 235.7464958190918, |
| "kl_loss_2": 2607.3465942382813, |
| "kl_loss_4": 2052.3574157714843, |
| "kl_loss_9": 843.5831573486328, |
| "learning_rate": 0.00046353914347098467, |
| "loss": 1445.1469, |
| "step": 5280 |
| }, |
| { |
| "ce_loss_13": 3.3082749605178834, |
| "ce_loss_17": 3.2137810468673704, |
| "ce_loss_2": 4.448835802078247, |
| "ce_loss_4": 4.163150906562805, |
| "ce_loss_9": 3.568954873085022, |
| "epoch": 0.529, |
| "grad_norm": 920.0, |
| "kl_loss_13": 233.536181640625, |
| "kl_loss_2": 2564.11416015625, |
| "kl_loss_4": 2030.8201416015625, |
| "kl_loss_9": 831.7883270263671, |
| "learning_rate": 0.0004619568909744524, |
| "loss": 1444.5604, |
| "step": 5290 |
| }, |
| { |
| "ce_loss_13": 3.308987283706665, |
| "ce_loss_17": 3.2154680252075196, |
| "ce_loss_2": 4.432635307312012, |
| "ce_loss_4": 4.152930164337159, |
| "ce_loss_9": 3.562365674972534, |
| "epoch": 0.53, |
| "grad_norm": 936.0, |
| "kl_loss_13": 235.57002716064454, |
| "kl_loss_2": 2524.588244628906, |
| "kl_loss_4": 1994.4384704589843, |
| "kl_loss_9": 829.7635955810547, |
| "learning_rate": 0.00046037502157160573, |
| "loss": 1423.2525, |
| "step": 5300 |
| }, |
| { |
| "ce_loss_13": 3.1924183845520018, |
| "ce_loss_17": 3.0945096135139467, |
| "ce_loss_2": 4.345118379592895, |
| "ce_loss_4": 4.058304846286774, |
| "ce_loss_9": 3.457942259311676, |
| "epoch": 0.531, |
| "grad_norm": 848.0, |
| "kl_loss_13": 239.82484741210936, |
| "kl_loss_2": 2598.5821899414063, |
| "kl_loss_4": 2063.542687988281, |
| "kl_loss_9": 849.6396301269531, |
| "learning_rate": 0.00045879355119185207, |
| "loss": 1446.259, |
| "step": 5310 |
| }, |
| { |
| "ce_loss_13": 3.269328761100769, |
| "ce_loss_17": 3.174268388748169, |
| "ce_loss_2": 4.4317457437515255, |
| "ce_loss_4": 4.140791881084442, |
| "ce_loss_9": 3.5443851590156554, |
| "epoch": 0.532, |
| "grad_norm": 944.0, |
| "kl_loss_13": 241.83887634277343, |
| "kl_loss_2": 2627.425256347656, |
| "kl_loss_4": 2076.6944274902344, |
| "kl_loss_9": 872.8814453125, |
| "learning_rate": 0.0004572124957605803, |
| "loss": 1461.4662, |
| "step": 5320 |
| }, |
| { |
| "ce_loss_13": 3.285194683074951, |
| "ce_loss_17": 3.186634087562561, |
| "ce_loss_2": 4.433029246330261, |
| "ce_loss_4": 4.138253295421601, |
| "ce_loss_9": 3.5500720739364624, |
| "epoch": 0.533, |
| "grad_norm": 880.0, |
| "kl_loss_13": 240.8382713317871, |
| "kl_loss_2": 2600.680529785156, |
| "kl_loss_4": 2042.1682067871093, |
| "kl_loss_9": 850.68427734375, |
| "learning_rate": 0.00045563187119900103, |
| "loss": 1412.1377, |
| "step": 5330 |
| }, |
| { |
| "ce_loss_13": 3.1356484055519105, |
| "ce_loss_17": 3.0415505886077883, |
| "ce_loss_2": 4.3238618016242985, |
| "ce_loss_4": 4.028524541854859, |
| "ce_loss_9": 3.4074003219604494, |
| "epoch": 0.534, |
| "grad_norm": 956.0, |
| "kl_loss_13": 235.38914947509767, |
| "kl_loss_2": 2667.044677734375, |
| "kl_loss_4": 2106.6890869140625, |
| "kl_loss_9": 862.3077911376953, |
| "learning_rate": 0.00045405169342398633, |
| "loss": 1454.8516, |
| "step": 5340 |
| }, |
| { |
| "ce_loss_13": 3.224562954902649, |
| "ce_loss_17": 3.126737880706787, |
| "ce_loss_2": 4.3905526280403135, |
| "ce_loss_4": 4.104603385925293, |
| "ce_loss_9": 3.49308865070343, |
| "epoch": 0.535, |
| "grad_norm": 848.0, |
| "kl_loss_13": 241.32951736450195, |
| "kl_loss_2": 2628.3176513671874, |
| "kl_loss_4": 2090.070068359375, |
| "kl_loss_9": 858.0560638427735, |
| "learning_rate": 0.0004524719783479088, |
| "loss": 1424.0244, |
| "step": 5350 |
| }, |
| { |
| "ce_loss_13": 3.1741369366645813, |
| "ce_loss_17": 3.0767220854759216, |
| "ce_loss_2": 4.362726187705993, |
| "ce_loss_4": 4.075022065639496, |
| "ce_loss_9": 3.452989196777344, |
| "epoch": 0.536, |
| "grad_norm": 852.0, |
| "kl_loss_13": 240.72121887207032, |
| "kl_loss_2": 2669.3479248046874, |
| "kl_loss_4": 2126.304315185547, |
| "kl_loss_9": 872.2170806884766, |
| "learning_rate": 0.00045089274187848144, |
| "loss": 1431.9494, |
| "step": 5360 |
| }, |
| { |
| "ce_loss_13": 3.298326337337494, |
| "ce_loss_17": 3.202434706687927, |
| "ce_loss_2": 4.426736211776733, |
| "ce_loss_4": 4.1472920179367065, |
| "ce_loss_9": 3.5499486446380617, |
| "epoch": 0.537, |
| "grad_norm": 888.0, |
| "kl_loss_13": 235.2048225402832, |
| "kl_loss_2": 2578.008203125, |
| "kl_loss_4": 2040.4716735839843, |
| "kl_loss_9": 838.3128295898438, |
| "learning_rate": 0.00044931399991859835, |
| "loss": 1416.3664, |
| "step": 5370 |
| }, |
| { |
| "ce_loss_13": 3.15212277173996, |
| "ce_loss_17": 3.059235119819641, |
| "ce_loss_2": 4.311778402328491, |
| "ce_loss_4": 4.015219748020172, |
| "ce_loss_9": 3.421120047569275, |
| "epoch": 0.538, |
| "grad_norm": 940.0, |
| "kl_loss_13": 231.76331634521483, |
| "kl_loss_2": 2606.6871948242188, |
| "kl_loss_4": 2051.1831481933596, |
| "kl_loss_9": 844.9657623291016, |
| "learning_rate": 0.00044773576836617336, |
| "loss": 1413.9028, |
| "step": 5380 |
| }, |
| { |
| "ce_loss_13": 3.2439461231231688, |
| "ce_loss_17": 3.1480749607086183, |
| "ce_loss_2": 4.4129960298538204, |
| "ce_loss_4": 4.122675991058349, |
| "ce_loss_9": 3.5230408787727354, |
| "epoch": 0.539, |
| "grad_norm": 988.0, |
| "kl_loss_13": 237.98947219848634, |
| "kl_loss_2": 2625.866259765625, |
| "kl_loss_4": 2077.430450439453, |
| "kl_loss_9": 864.8189117431641, |
| "learning_rate": 0.00044615806311398056, |
| "loss": 1468.0268, |
| "step": 5390 |
| }, |
| { |
| "ce_loss_13": 3.3209930181503298, |
| "ce_loss_17": 3.229605257511139, |
| "ce_loss_2": 4.391498923301697, |
| "ce_loss_4": 4.109268450737, |
| "ce_loss_9": 3.5633479714393617, |
| "epoch": 0.54, |
| "grad_norm": 924.0, |
| "kl_loss_13": 230.18476257324218, |
| "kl_loss_2": 2462.3640991210937, |
| "kl_loss_4": 1935.889404296875, |
| "kl_loss_9": 811.630825805664, |
| "learning_rate": 0.00044458090004949454, |
| "loss": 1413.2641, |
| "step": 5400 |
| }, |
| { |
| "ce_loss_13": 3.190377962589264, |
| "ce_loss_17": 3.087027299404144, |
| "ce_loss_2": 4.405064845085144, |
| "ce_loss_4": 4.107898688316345, |
| "ce_loss_9": 3.4767847418785096, |
| "epoch": 0.541, |
| "grad_norm": 840.0, |
| "kl_loss_13": 247.92353210449218, |
| "kl_loss_2": 2743.4492065429686, |
| "kl_loss_4": 2178.961114501953, |
| "kl_loss_9": 898.308383178711, |
| "learning_rate": 0.0004430042950547297, |
| "loss": 1451.5533, |
| "step": 5410 |
| }, |
| { |
| "ce_loss_13": 3.283571946620941, |
| "ce_loss_17": 3.175406312942505, |
| "ce_loss_2": 4.4423020362854, |
| "ce_loss_4": 4.14836574792862, |
| "ce_loss_9": 3.549823021888733, |
| "epoch": 0.542, |
| "grad_norm": 844.0, |
| "kl_loss_13": 246.515869140625, |
| "kl_loss_2": 2621.01884765625, |
| "kl_loss_4": 2066.5546936035157, |
| "kl_loss_9": 862.0836517333985, |
| "learning_rate": 0.0004414282640060809, |
| "loss": 1434.6312, |
| "step": 5420 |
| }, |
| { |
| "ce_loss_13": 3.3572264909744263, |
| "ce_loss_17": 3.262089800834656, |
| "ce_loss_2": 4.476845741271973, |
| "ce_loss_4": 4.201539206504822, |
| "ce_loss_9": 3.6384461522102356, |
| "epoch": 0.543, |
| "grad_norm": 1000.0, |
| "kl_loss_13": 237.77007369995118, |
| "kl_loss_2": 2498.5660766601563, |
| "kl_loss_4": 1978.6057250976562, |
| "kl_loss_9": 861.1564147949218, |
| "learning_rate": 0.0004398528227741633, |
| "loss": 1426.9819, |
| "step": 5430 |
| }, |
| { |
| "ce_loss_13": 3.233256757259369, |
| "ce_loss_17": 3.1367274641990663, |
| "ce_loss_2": 4.391225600242615, |
| "ce_loss_4": 4.101148080825806, |
| "ce_loss_9": 3.507310390472412, |
| "epoch": 0.544, |
| "grad_norm": 932.0, |
| "kl_loss_13": 242.8495719909668, |
| "kl_loss_2": 2594.2315673828125, |
| "kl_loss_4": 2050.564532470703, |
| "kl_loss_9": 872.3645721435547, |
| "learning_rate": 0.00043827798722365264, |
| "loss": 1451.5957, |
| "step": 5440 |
| }, |
| { |
| "ce_loss_13": 3.3480281233787537, |
| "ce_loss_17": 3.2546151757240294, |
| "ce_loss_2": 4.454211759567261, |
| "ce_loss_4": 4.16955418586731, |
| "ce_loss_9": 3.5987664222717286, |
| "epoch": 0.545, |
| "grad_norm": 972.0, |
| "kl_loss_13": 237.6885887145996, |
| "kl_loss_2": 2520.513293457031, |
| "kl_loss_4": 1988.2488708496094, |
| "kl_loss_9": 833.6821960449219, |
| "learning_rate": 0.00043670377321312535, |
| "loss": 1410.8355, |
| "step": 5450 |
| }, |
| { |
| "ce_loss_13": 3.3540380597114563, |
| "ce_loss_17": 3.261450242996216, |
| "ce_loss_2": 4.453149652481079, |
| "ce_loss_4": 4.1675089478492735, |
| "ce_loss_9": 3.601581835746765, |
| "epoch": 0.546, |
| "grad_norm": 872.0, |
| "kl_loss_13": 231.9382209777832, |
| "kl_loss_2": 2500.5212890625, |
| "kl_loss_4": 1967.8952209472657, |
| "kl_loss_9": 823.4026123046875, |
| "learning_rate": 0.0004351301965948991, |
| "loss": 1408.3826, |
| "step": 5460 |
| }, |
| { |
| "ce_loss_13": 3.260563921928406, |
| "ce_loss_17": 3.168972909450531, |
| "ce_loss_2": 4.369951272010804, |
| "ce_loss_4": 4.088285195827484, |
| "ce_loss_9": 3.5148344159126284, |
| "epoch": 0.547, |
| "grad_norm": 1256.0, |
| "kl_loss_13": 230.15998916625978, |
| "kl_loss_2": 2501.23369140625, |
| "kl_loss_4": 1973.2835815429687, |
| "kl_loss_9": 827.8443908691406, |
| "learning_rate": 0.000433557273214873, |
| "loss": 1410.1893, |
| "step": 5470 |
| }, |
| { |
| "ce_loss_13": 3.25427725315094, |
| "ce_loss_17": 3.1570460915565492, |
| "ce_loss_2": 4.379108524322509, |
| "ce_loss_4": 4.0927928447723385, |
| "ce_loss_9": 3.5142199039459228, |
| "epoch": 0.548, |
| "grad_norm": 928.0, |
| "kl_loss_13": 236.9877830505371, |
| "kl_loss_2": 2545.434216308594, |
| "kl_loss_4": 1998.2710754394532, |
| "kl_loss_9": 831.4971374511719, |
| "learning_rate": 0.000431985018912368, |
| "loss": 1389.0537, |
| "step": 5480 |
| }, |
| { |
| "ce_loss_13": 3.226706564426422, |
| "ce_loss_17": 3.1292683482170105, |
| "ce_loss_2": 4.399243092536926, |
| "ce_loss_4": 4.112428474426269, |
| "ce_loss_9": 3.495073413848877, |
| "epoch": 0.549, |
| "grad_norm": 868.0, |
| "kl_loss_13": 239.58219223022462, |
| "kl_loss_2": 2627.8002319335938, |
| "kl_loss_4": 2095.0270385742188, |
| "kl_loss_9": 860.5090057373047, |
| "learning_rate": 0.0004304134495199674, |
| "loss": 1409.3469, |
| "step": 5490 |
| }, |
| { |
| "ce_loss_13": 3.2517759561538697, |
| "ce_loss_17": 3.1568957448005674, |
| "ce_loss_2": 4.401349353790283, |
| "ce_loss_4": 4.105891764163971, |
| "ce_loss_9": 3.525186467170715, |
| "epoch": 0.55, |
| "grad_norm": 852.0, |
| "kl_loss_13": 238.64617233276368, |
| "kl_loss_2": 2620.271130371094, |
| "kl_loss_4": 2072.4135192871095, |
| "kl_loss_9": 874.193798828125, |
| "learning_rate": 0.0004288425808633575, |
| "loss": 1428.5023, |
| "step": 5500 |
| }, |
| { |
| "ce_loss_13": 3.230370843410492, |
| "ce_loss_17": 3.1366963267326353, |
| "ce_loss_2": 4.378269791603088, |
| "ce_loss_4": 4.090001904964447, |
| "ce_loss_9": 3.4878253698349, |
| "epoch": 0.551, |
| "grad_norm": 1032.0, |
| "kl_loss_13": 233.13706436157227, |
| "kl_loss_2": 2593.618603515625, |
| "kl_loss_4": 2050.0465698242188, |
| "kl_loss_9": 842.4681884765625, |
| "learning_rate": 0.0004272724287611684, |
| "loss": 1431.1554, |
| "step": 5510 |
| }, |
| { |
| "ce_loss_13": 3.2069764852523805, |
| "ce_loss_17": 3.1097816228866577, |
| "ce_loss_2": 4.379705595970154, |
| "ce_loss_4": 4.098069417476654, |
| "ce_loss_9": 3.4648272037506103, |
| "epoch": 0.552, |
| "grad_norm": 840.0, |
| "kl_loss_13": 238.2747917175293, |
| "kl_loss_2": 2633.7432861328125, |
| "kl_loss_4": 2101.164514160156, |
| "kl_loss_9": 846.2826568603516, |
| "learning_rate": 0.00042570300902481425, |
| "loss": 1439.5931, |
| "step": 5520 |
| }, |
| { |
| "ce_loss_13": 3.2375672459602356, |
| "ce_loss_17": 3.146790337562561, |
| "ce_loss_2": 4.3601685762405396, |
| "ce_loss_4": 4.081451714038849, |
| "ce_loss_9": 3.4896689295768737, |
| "epoch": 0.553, |
| "grad_norm": 920.0, |
| "kl_loss_13": 231.33807907104492, |
| "kl_loss_2": 2555.3967895507812, |
| "kl_loss_4": 2022.6777709960938, |
| "kl_loss_9": 830.5851776123047, |
| "learning_rate": 0.00042413433745833423, |
| "loss": 1410.9723, |
| "step": 5530 |
| }, |
| { |
| "ce_loss_13": 3.235991060733795, |
| "ce_loss_17": 3.1391934752464294, |
| "ce_loss_2": 4.394415354728698, |
| "ce_loss_4": 4.1022326111793515, |
| "ce_loss_9": 3.500549876689911, |
| "epoch": 0.554, |
| "grad_norm": 996.0, |
| "kl_loss_13": 233.80357131958007, |
| "kl_loss_2": 2598.0952392578124, |
| "kl_loss_4": 2043.8817993164062, |
| "kl_loss_9": 843.0975341796875, |
| "learning_rate": 0.0004225664298582339, |
| "loss": 1383.5615, |
| "step": 5540 |
| }, |
| { |
| "ce_loss_13": 3.309430015087128, |
| "ce_loss_17": 3.218477714061737, |
| "ce_loss_2": 4.429325103759766, |
| "ce_loss_4": 4.147542655467987, |
| "ce_loss_9": 3.5620126724243164, |
| "epoch": 0.555, |
| "grad_norm": 944.0, |
| "kl_loss_13": 230.62957534790038, |
| "kl_loss_2": 2511.424279785156, |
| "kl_loss_4": 1979.3216857910156, |
| "kl_loss_9": 823.8212493896484, |
| "learning_rate": 0.000420999302013325, |
| "loss": 1382.5365, |
| "step": 5550 |
| }, |
| { |
| "ce_loss_13": 3.216475558280945, |
| "ce_loss_17": 3.119503605365753, |
| "ce_loss_2": 4.420439648628235, |
| "ce_loss_4": 4.1296982884407045, |
| "ce_loss_9": 3.4864991903305054, |
| "epoch": 0.556, |
| "grad_norm": 912.0, |
| "kl_loss_13": 243.6633056640625, |
| "kl_loss_2": 2688.5159912109375, |
| "kl_loss_4": 2135.290557861328, |
| "kl_loss_9": 862.8586547851562, |
| "learning_rate": 0.000419432969704568, |
| "loss": 1434.2247, |
| "step": 5560 |
| }, |
| { |
| "ce_loss_13": 3.2545287132263185, |
| "ce_loss_17": 3.160950255393982, |
| "ce_loss_2": 4.383987593650818, |
| "ce_loss_4": 4.097819077968597, |
| "ce_loss_9": 3.511135494709015, |
| "epoch": 0.557, |
| "grad_norm": 868.0, |
| "kl_loss_13": 233.8695381164551, |
| "kl_loss_2": 2532.8112548828126, |
| "kl_loss_4": 1993.3291015625, |
| "kl_loss_9": 830.1571441650391, |
| "learning_rate": 0.00041786744870491154, |
| "loss": 1445.4945, |
| "step": 5570 |
| }, |
| { |
| "ce_loss_13": 3.1984161138534546, |
| "ce_loss_17": 3.10117369890213, |
| "ce_loss_2": 4.349075174331665, |
| "ce_loss_4": 4.062174940109253, |
| "ce_loss_9": 3.4661641359329223, |
| "epoch": 0.558, |
| "grad_norm": 936.0, |
| "kl_loss_13": 238.3305236816406, |
| "kl_loss_2": 2593.8963134765627, |
| "kl_loss_4": 2053.7305603027344, |
| "kl_loss_9": 860.2523498535156, |
| "learning_rate": 0.0004163027547791347, |
| "loss": 1422.9781, |
| "step": 5580 |
| }, |
| { |
| "ce_loss_13": 3.1820846796035767, |
| "ce_loss_17": 3.0863997459411623, |
| "ce_loss_2": 4.382808256149292, |
| "ce_loss_4": 4.086593902111053, |
| "ce_loss_9": 3.454039692878723, |
| "epoch": 0.559, |
| "grad_norm": 856.0, |
| "kl_loss_13": 238.1822525024414, |
| "kl_loss_2": 2674.379833984375, |
| "kl_loss_4": 2117.0798278808593, |
| "kl_loss_9": 860.9488952636718, |
| "learning_rate": 0.0004147389036836881, |
| "loss": 1438.0104, |
| "step": 5590 |
| }, |
| { |
| "ce_loss_13": 3.2278117656707765, |
| "ce_loss_17": 3.1327100872993467, |
| "ce_loss_2": 4.3901468276977536, |
| "ce_loss_4": 4.1008705496788025, |
| "ce_loss_9": 3.4992452025413514, |
| "epoch": 0.56, |
| "grad_norm": 1008.0, |
| "kl_loss_13": 236.05131149291992, |
| "kl_loss_2": 2596.961328125, |
| "kl_loss_4": 2052.2495361328124, |
| "kl_loss_9": 856.0904724121094, |
| "learning_rate": 0.00041317591116653486, |
| "loss": 1456.3472, |
| "step": 5600 |
| }, |
| { |
| "ce_loss_13": 3.255778098106384, |
| "ce_loss_17": 3.1611954808235168, |
| "ce_loss_2": 4.4192228555679325, |
| "ce_loss_4": 4.134356498718262, |
| "ce_loss_9": 3.5241804718971252, |
| "epoch": 0.561, |
| "grad_norm": 820.0, |
| "kl_loss_13": 241.46857452392578, |
| "kl_loss_2": 2619.669287109375, |
| "kl_loss_4": 2075.8135498046877, |
| "kl_loss_9": 858.2986419677734, |
| "learning_rate": 0.0004116137929669921, |
| "loss": 1427.8906, |
| "step": 5610 |
| }, |
| { |
| "ce_loss_13": 3.2517098903656008, |
| "ce_loss_17": 3.1602564215660096, |
| "ce_loss_2": 4.39375718832016, |
| "ce_loss_4": 4.109205067157745, |
| "ce_loss_9": 3.5135385155677796, |
| "epoch": 0.562, |
| "grad_norm": 996.0, |
| "kl_loss_13": 231.79026412963867, |
| "kl_loss_2": 2573.0449462890624, |
| "kl_loss_4": 2034.3422607421876, |
| "kl_loss_9": 839.1124053955078, |
| "learning_rate": 0.00041005256481557305, |
| "loss": 1399.7518, |
| "step": 5620 |
| }, |
| { |
| "ce_loss_13": 3.351257121562958, |
| "ce_loss_17": 3.2595076084136965, |
| "ce_loss_2": 4.4319993019104, |
| "ce_loss_4": 4.154893457889557, |
| "ce_loss_9": 3.590802001953125, |
| "epoch": 0.563, |
| "grad_norm": 924.0, |
| "kl_loss_13": 225.93316345214845, |
| "kl_loss_2": 2459.0507202148438, |
| "kl_loss_4": 1931.7695434570312, |
| "kl_loss_9": 803.16787109375, |
| "learning_rate": 0.00040849224243382767, |
| "loss": 1377.0832, |
| "step": 5630 |
| }, |
| { |
| "ce_loss_13": 3.2062005162239076, |
| "ce_loss_17": 3.1118099808692934, |
| "ce_loss_2": 4.359602761268616, |
| "ce_loss_4": 4.0741450667381285, |
| "ce_loss_9": 3.466958475112915, |
| "epoch": 0.564, |
| "grad_norm": 880.0, |
| "kl_loss_13": 234.28491592407227, |
| "kl_loss_2": 2585.8306518554687, |
| "kl_loss_4": 2053.440704345703, |
| "kl_loss_9": 846.0140014648438, |
| "learning_rate": 0.000406932841534185, |
| "loss": 1403.7951, |
| "step": 5640 |
| }, |
| { |
| "ce_loss_13": 3.176671230792999, |
| "ce_loss_17": 3.078430962562561, |
| "ce_loss_2": 4.331629621982574, |
| "ce_loss_4": 4.053027820587158, |
| "ce_loss_9": 3.450083887577057, |
| "epoch": 0.565, |
| "grad_norm": 1200.0, |
| "kl_loss_13": 237.31102752685547, |
| "kl_loss_2": 2619.3731201171877, |
| "kl_loss_4": 2092.3601684570312, |
| "kl_loss_9": 863.4239288330078, |
| "learning_rate": 0.0004053743778197951, |
| "loss": 1458.5658, |
| "step": 5650 |
| }, |
| { |
| "ce_loss_13": 3.274586188793182, |
| "ce_loss_17": 3.1797106742858885, |
| "ce_loss_2": 4.407772159576416, |
| "ce_loss_4": 4.133595025539398, |
| "ce_loss_9": 3.5387351512908936, |
| "epoch": 0.566, |
| "grad_norm": 884.0, |
| "kl_loss_13": 236.99430923461915, |
| "kl_loss_2": 2536.6812255859377, |
| "kl_loss_4": 2023.3791259765626, |
| "kl_loss_9": 842.8063934326171, |
| "learning_rate": 0.0004038168669843697, |
| "loss": 1437.6104, |
| "step": 5660 |
| }, |
| { |
| "ce_loss_13": 3.2296634316444397, |
| "ce_loss_17": 3.137158191204071, |
| "ce_loss_2": 4.339892792701721, |
| "ce_loss_4": 4.065719175338745, |
| "ce_loss_9": 3.4848448514938353, |
| "epoch": 0.567, |
| "grad_norm": 928.0, |
| "kl_loss_13": 232.789705657959, |
| "kl_loss_2": 2506.612060546875, |
| "kl_loss_4": 1980.6403076171875, |
| "kl_loss_9": 823.4604064941407, |
| "learning_rate": 0.000402260324712026, |
| "loss": 1429.6207, |
| "step": 5670 |
| }, |
| { |
| "ce_loss_13": 3.27266640663147, |
| "ce_loss_17": 3.180715024471283, |
| "ce_loss_2": 4.4394615650177, |
| "ce_loss_4": 4.1508574962615965, |
| "ce_loss_9": 3.538129734992981, |
| "epoch": 0.568, |
| "grad_norm": 804.0, |
| "kl_loss_13": 230.25597610473633, |
| "kl_loss_2": 2594.2272216796873, |
| "kl_loss_4": 2059.2771606445312, |
| "kl_loss_9": 840.2705047607421, |
| "learning_rate": 0.00040070476667712743, |
| "loss": 1404.5531, |
| "step": 5680 |
| }, |
| { |
| "ce_loss_13": 3.3071704864501954, |
| "ce_loss_17": 3.213378405570984, |
| "ce_loss_2": 4.430555748939514, |
| "ce_loss_4": 4.148973155021667, |
| "ce_loss_9": 3.563006508350372, |
| "epoch": 0.569, |
| "grad_norm": 840.0, |
| "kl_loss_13": 232.40666885375975, |
| "kl_loss_2": 2534.8173583984376, |
| "kl_loss_4": 2006.4983459472655, |
| "kl_loss_9": 830.2776977539063, |
| "learning_rate": 0.0003991502085441259, |
| "loss": 1418.3568, |
| "step": 5690 |
| }, |
| { |
| "ce_loss_13": 3.338138663768768, |
| "ce_loss_17": 3.2503367185592653, |
| "ce_loss_2": 4.427838611602783, |
| "ce_loss_4": 4.148164165019989, |
| "ce_loss_9": 3.583894896507263, |
| "epoch": 0.57, |
| "grad_norm": 868.0, |
| "kl_loss_13": 225.03974685668945, |
| "kl_loss_2": 2460.3081665039062, |
| "kl_loss_4": 1922.2230102539063, |
| "kl_loss_9": 797.9386169433594, |
| "learning_rate": 0.0003975966659674047, |
| "loss": 1391.193, |
| "step": 5700 |
| }, |
| { |
| "ce_loss_13": 3.305832934379578, |
| "ce_loss_17": 3.2160136818885805, |
| "ce_loss_2": 4.443049371242523, |
| "ce_loss_4": 4.157168364524841, |
| "ce_loss_9": 3.562156653404236, |
| "epoch": 0.571, |
| "grad_norm": 808.0, |
| "kl_loss_13": 233.45500640869142, |
| "kl_loss_2": 2555.1978149414062, |
| "kl_loss_4": 2015.8797180175782, |
| "kl_loss_9": 825.7862274169922, |
| "learning_rate": 0.0003960441545911204, |
| "loss": 1392.2576, |
| "step": 5710 |
| }, |
| { |
| "ce_loss_13": 3.2979984998703005, |
| "ce_loss_17": 3.20640572309494, |
| "ce_loss_2": 4.414292907714843, |
| "ce_loss_4": 4.1284249186515805, |
| "ce_loss_9": 3.5509427428245544, |
| "epoch": 0.572, |
| "grad_norm": 840.0, |
| "kl_loss_13": 231.97683868408203, |
| "kl_loss_2": 2534.3826171875, |
| "kl_loss_4": 2007.47265625, |
| "kl_loss_9": 832.1235748291016, |
| "learning_rate": 0.0003944926900490452, |
| "loss": 1402.5652, |
| "step": 5720 |
| }, |
| { |
| "ce_loss_13": 3.219693434238434, |
| "ce_loss_17": 3.1212239384651186, |
| "ce_loss_2": 4.393144679069519, |
| "ce_loss_4": 4.102212285995483, |
| "ce_loss_9": 3.490486514568329, |
| "epoch": 0.573, |
| "grad_norm": 772.0, |
| "kl_loss_13": 235.44636001586915, |
| "kl_loss_2": 2616.2996826171875, |
| "kl_loss_4": 2070.2421264648438, |
| "kl_loss_9": 851.2656707763672, |
| "learning_rate": 0.0003929422879644099, |
| "loss": 1413.3826, |
| "step": 5730 |
| }, |
| { |
| "ce_loss_13": 3.22045019865036, |
| "ce_loss_17": 3.129276084899902, |
| "ce_loss_2": 4.351107883453369, |
| "ce_loss_4": 4.054018497467041, |
| "ce_loss_9": 3.4733495831489565, |
| "epoch": 0.574, |
| "grad_norm": 864.0, |
| "kl_loss_13": 227.50900268554688, |
| "kl_loss_2": 2547.5375244140623, |
| "kl_loss_4": 2001.3611694335937, |
| "kl_loss_9": 826.952767944336, |
| "learning_rate": 0.0003913929639497462, |
| "loss": 1375.0348, |
| "step": 5740 |
| }, |
| { |
| "ce_loss_13": 3.1821637749671936, |
| "ce_loss_17": 3.087957036495209, |
| "ce_loss_2": 4.359432423114777, |
| "ce_loss_4": 4.066091692447662, |
| "ce_loss_9": 3.4462706208229066, |
| "epoch": 0.575, |
| "grad_norm": 828.0, |
| "kl_loss_13": 232.518807220459, |
| "kl_loss_2": 2636.359411621094, |
| "kl_loss_4": 2089.879962158203, |
| "kl_loss_9": 839.9574462890625, |
| "learning_rate": 0.00038984473360672965, |
| "loss": 1406.9381, |
| "step": 5750 |
| }, |
| { |
| "ce_loss_13": 3.1875351667404175, |
| "ce_loss_17": 3.0950862526893617, |
| "ce_loss_2": 4.362112975120544, |
| "ce_loss_4": 4.066373074054718, |
| "ce_loss_9": 3.4482316613197326, |
| "epoch": 0.576, |
| "grad_norm": 868.0, |
| "kl_loss_13": 227.55917205810547, |
| "kl_loss_2": 2609.50537109375, |
| "kl_loss_4": 2059.553112792969, |
| "kl_loss_9": 835.4907867431641, |
| "learning_rate": 0.0003882976125260229, |
| "loss": 1394.3872, |
| "step": 5760 |
| }, |
| { |
| "ce_loss_13": 3.253630042076111, |
| "ce_loss_17": 3.158768332004547, |
| "ce_loss_2": 4.4003081798553465, |
| "ce_loss_4": 4.105486381053924, |
| "ce_loss_9": 3.5109742760658262, |
| "epoch": 0.577, |
| "grad_norm": 1056.0, |
| "kl_loss_13": 232.02925643920898, |
| "kl_loss_2": 2572.6723388671876, |
| "kl_loss_4": 2018.9011962890625, |
| "kl_loss_9": 828.4405487060546, |
| "learning_rate": 0.00038675161628711776, |
| "loss": 1416.0004, |
| "step": 5770 |
| }, |
| { |
| "ce_loss_13": 3.287421774864197, |
| "ce_loss_17": 3.1927932143211364, |
| "ce_loss_2": 4.401708006858826, |
| "ce_loss_4": 4.117022109031677, |
| "ce_loss_9": 3.539080190658569, |
| "epoch": 0.578, |
| "grad_norm": 992.0, |
| "kl_loss_13": 233.13113174438476, |
| "kl_loss_2": 2513.6958251953124, |
| "kl_loss_4": 1986.6510437011718, |
| "kl_loss_9": 825.7736572265625, |
| "learning_rate": 0.0003852067604581794, |
| "loss": 1435.1811, |
| "step": 5780 |
| }, |
| { |
| "ce_loss_13": 3.2348132729530334, |
| "ce_loss_17": 3.1447670340538023, |
| "ce_loss_2": 4.38879165649414, |
| "ce_loss_4": 4.099067687988281, |
| "ce_loss_9": 3.494375240802765, |
| "epoch": 0.579, |
| "grad_norm": 1016.0, |
| "kl_loss_13": 229.48376007080077, |
| "kl_loss_2": 2593.1627685546873, |
| "kl_loss_4": 2053.509631347656, |
| "kl_loss_9": 838.6121520996094, |
| "learning_rate": 0.0003836630605958888, |
| "loss": 1407.9807, |
| "step": 5790 |
| }, |
| { |
| "ce_loss_13": 3.295253574848175, |
| "ce_loss_17": 3.203297519683838, |
| "ce_loss_2": 4.423774898052216, |
| "ce_loss_4": 4.145563721656799, |
| "ce_loss_9": 3.544156217575073, |
| "epoch": 0.58, |
| "grad_norm": 992.0, |
| "kl_loss_13": 233.95409469604493, |
| "kl_loss_2": 2570.943115234375, |
| "kl_loss_4": 2042.2085388183593, |
| "kl_loss_9": 833.4795379638672, |
| "learning_rate": 0.0003821205322452863, |
| "loss": 1464.8057, |
| "step": 5800 |
| }, |
| { |
| "ce_loss_13": 3.267258608341217, |
| "ce_loss_17": 3.1813372492790224, |
| "ce_loss_2": 4.396870112419128, |
| "ce_loss_4": 4.116019523143768, |
| "ce_loss_9": 3.523220121860504, |
| "epoch": 0.581, |
| "grad_norm": 1040.0, |
| "kl_loss_13": 228.84890594482422, |
| "kl_loss_2": 2556.1291381835936, |
| "kl_loss_4": 2024.586492919922, |
| "kl_loss_9": 824.1128021240235, |
| "learning_rate": 0.0003805791909396155, |
| "loss": 1413.3881, |
| "step": 5810 |
| }, |
| { |
| "ce_loss_13": 3.2223562479019163, |
| "ce_loss_17": 3.131128215789795, |
| "ce_loss_2": 4.3608808517456055, |
| "ce_loss_4": 4.078696227073669, |
| "ce_loss_9": 3.4814393758773803, |
| "epoch": 0.582, |
| "grad_norm": 828.0, |
| "kl_loss_13": 229.21967391967775, |
| "kl_loss_2": 2549.3384399414062, |
| "kl_loss_4": 2020.2727478027343, |
| "kl_loss_9": 827.5791931152344, |
| "learning_rate": 0.0003790390522001662, |
| "loss": 1418.4744, |
| "step": 5820 |
| }, |
| { |
| "ce_loss_13": 3.1667389273643494, |
| "ce_loss_17": 3.075244390964508, |
| "ce_loss_2": 4.309235787391662, |
| "ce_loss_4": 4.0269601225852965, |
| "ce_loss_9": 3.42468101978302, |
| "epoch": 0.583, |
| "grad_norm": 824.0, |
| "kl_loss_13": 228.8446029663086, |
| "kl_loss_2": 2606.355505371094, |
| "kl_loss_4": 2067.4308471679688, |
| "kl_loss_9": 837.4672698974609, |
| "learning_rate": 0.0003775001315361183, |
| "loss": 1400.2421, |
| "step": 5830 |
| }, |
| { |
| "ce_loss_13": 3.265468406677246, |
| "ce_loss_17": 3.171788048744202, |
| "ce_loss_2": 4.423987770080567, |
| "ce_loss_4": 4.135515594482422, |
| "ce_loss_9": 3.531561052799225, |
| "epoch": 0.584, |
| "grad_norm": 1088.0, |
| "kl_loss_13": 232.24418106079102, |
| "kl_loss_2": 2586.575439453125, |
| "kl_loss_4": 2047.969744873047, |
| "kl_loss_9": 836.1255950927734, |
| "learning_rate": 0.0003759624444443858, |
| "loss": 1424.1965, |
| "step": 5840 |
| }, |
| { |
| "ce_loss_13": 3.2983623504638673, |
| "ce_loss_17": 3.211426329612732, |
| "ce_loss_2": 4.418631052970886, |
| "ce_loss_4": 4.139723670482636, |
| "ce_loss_9": 3.5482105374336244, |
| "epoch": 0.585, |
| "grad_norm": 820.0, |
| "kl_loss_13": 227.23451919555663, |
| "kl_loss_2": 2537.3679321289064, |
| "kl_loss_4": 2012.68701171875, |
| "kl_loss_9": 819.8915100097656, |
| "learning_rate": 0.00037442600640946044, |
| "loss": 1386.7533, |
| "step": 5850 |
| }, |
| { |
| "ce_loss_13": 3.27207111120224, |
| "ce_loss_17": 3.1785031914711, |
| "ce_loss_2": 4.384279441833496, |
| "ce_loss_4": 4.09559998512268, |
| "ce_loss_9": 3.521187937259674, |
| "epoch": 0.586, |
| "grad_norm": 772.0, |
| "kl_loss_13": 228.3373001098633, |
| "kl_loss_2": 2530.15283203125, |
| "kl_loss_4": 1994.758740234375, |
| "kl_loss_9": 823.6193054199218, |
| "learning_rate": 0.00037289083290325663, |
| "loss": 1372.6998, |
| "step": 5860 |
| }, |
| { |
| "ce_loss_13": 3.244118702411652, |
| "ce_loss_17": 3.1523624658584595, |
| "ce_loss_2": 4.360163080692291, |
| "ce_loss_4": 4.083122837543487, |
| "ce_loss_9": 3.4959442615509033, |
| "epoch": 0.587, |
| "grad_norm": 928.0, |
| "kl_loss_13": 231.24836120605468, |
| "kl_loss_2": 2515.5857788085937, |
| "kl_loss_4": 1981.6635192871095, |
| "kl_loss_9": 816.1141571044922, |
| "learning_rate": 0.0003713569393849543, |
| "loss": 1387.4872, |
| "step": 5870 |
| }, |
| { |
| "ce_loss_13": 3.2992770671844482, |
| "ce_loss_17": 3.2059216022491457, |
| "ce_loss_2": 4.410819458961487, |
| "ce_loss_4": 4.13393474817276, |
| "ce_loss_9": 3.5530097007751467, |
| "epoch": 0.588, |
| "grad_norm": 876.0, |
| "kl_loss_13": 230.8677879333496, |
| "kl_loss_2": 2526.34072265625, |
| "kl_loss_4": 2002.4048767089844, |
| "kl_loss_9": 830.1593688964844, |
| "learning_rate": 0.00036982434130084397, |
| "loss": 1404.6256, |
| "step": 5880 |
| }, |
| { |
| "ce_loss_13": 3.212831735610962, |
| "ce_loss_17": 3.115894150733948, |
| "ce_loss_2": 4.344887983798981, |
| "ce_loss_4": 4.050029182434082, |
| "ce_loss_9": 3.469256889820099, |
| "epoch": 0.589, |
| "grad_norm": 884.0, |
| "kl_loss_13": 237.92863845825195, |
| "kl_loss_2": 2563.0936889648438, |
| "kl_loss_4": 2015.7842590332032, |
| "kl_loss_9": 839.4622436523438, |
| "learning_rate": 0.00036829305408417166, |
| "loss": 1420.9584, |
| "step": 5890 |
| }, |
| { |
| "ce_loss_13": 3.2000078201293944, |
| "ce_loss_17": 3.1018877744674684, |
| "ce_loss_2": 4.370662379264831, |
| "ce_loss_4": 4.0750037908554075, |
| "ce_loss_9": 3.4702266454696655, |
| "epoch": 0.59, |
| "grad_norm": 968.0, |
| "kl_loss_13": 237.14598693847657, |
| "kl_loss_2": 2621.7508544921875, |
| "kl_loss_4": 2067.988635253906, |
| "kl_loss_9": 850.0048950195312, |
| "learning_rate": 0.0003667630931549826, |
| "loss": 1418.2734, |
| "step": 5900 |
| }, |
| { |
| "ce_loss_13": 3.167231333255768, |
| "ce_loss_17": 3.0730149030685423, |
| "ce_loss_2": 4.366086077690125, |
| "ce_loss_4": 4.077059423923492, |
| "ce_loss_9": 3.4455093264579775, |
| "epoch": 0.591, |
| "grad_norm": 1000.0, |
| "kl_loss_13": 232.85792694091796, |
| "kl_loss_2": 2678.4753295898436, |
| "kl_loss_4": 2132.040148925781, |
| "kl_loss_9": 858.3171447753906, |
| "learning_rate": 0.00036523447391996613, |
| "loss": 1443.2424, |
| "step": 5910 |
| }, |
| { |
| "ce_loss_13": 3.254744052886963, |
| "ce_loss_17": 3.1648544311523437, |
| "ce_loss_2": 4.374972128868103, |
| "ce_loss_4": 4.092326152324676, |
| "ce_loss_9": 3.509063982963562, |
| "epoch": 0.592, |
| "grad_norm": 788.0, |
| "kl_loss_13": 229.79783325195314, |
| "kl_loss_2": 2524.7337280273437, |
| "kl_loss_4": 1988.9702270507812, |
| "kl_loss_9": 821.4826202392578, |
| "learning_rate": 0.00036370721177230114, |
| "loss": 1391.0173, |
| "step": 5920 |
| }, |
| { |
| "ce_loss_13": 3.2567181825637816, |
| "ce_loss_17": 3.16144859790802, |
| "ce_loss_2": 4.414609766006469, |
| "ce_loss_4": 4.125541710853577, |
| "ce_loss_9": 3.5193495392799377, |
| "epoch": 0.593, |
| "grad_norm": 804.0, |
| "kl_loss_13": 236.6628875732422, |
| "kl_loss_2": 2582.9745483398438, |
| "kl_loss_4": 2041.99072265625, |
| "kl_loss_9": 842.2959655761719, |
| "learning_rate": 0.00036218132209150044, |
| "loss": 1413.9506, |
| "step": 5930 |
| }, |
| { |
| "ce_loss_13": 3.2117167711257935, |
| "ce_loss_17": 3.1085928797721865, |
| "ce_loss_2": 4.395073628425598, |
| "ce_loss_4": 4.10965234041214, |
| "ce_loss_9": 3.482756757736206, |
| "epoch": 0.594, |
| "grad_norm": 932.0, |
| "kl_loss_13": 243.6380187988281, |
| "kl_loss_2": 2668.6521850585937, |
| "kl_loss_4": 2127.320697021484, |
| "kl_loss_9": 864.8132110595703, |
| "learning_rate": 0.0003606568202432562, |
| "loss": 1437.4641, |
| "step": 5940 |
| }, |
| { |
| "ce_loss_13": 3.2749355912208555, |
| "ce_loss_17": 3.180494260787964, |
| "ce_loss_2": 4.442890572547912, |
| "ce_loss_4": 4.156251358985901, |
| "ce_loss_9": 3.53540940284729, |
| "epoch": 0.595, |
| "grad_norm": 1040.0, |
| "kl_loss_13": 234.7014488220215, |
| "kl_loss_2": 2638.2986328125, |
| "kl_loss_4": 2087.424670410156, |
| "kl_loss_9": 846.3891693115235, |
| "learning_rate": 0.0003591337215792851, |
| "loss": 1406.0023, |
| "step": 5950 |
| }, |
| { |
| "ce_loss_13": 3.309464681148529, |
| "ce_loss_17": 3.2235302329063416, |
| "ce_loss_2": 4.402388203144073, |
| "ce_loss_4": 4.121564400196076, |
| "ce_loss_9": 3.556831753253937, |
| "epoch": 0.596, |
| "grad_norm": 1024.0, |
| "kl_loss_13": 223.71617889404297, |
| "kl_loss_2": 2476.5489868164063, |
| "kl_loss_4": 1953.2594482421875, |
| "kl_loss_9": 807.5070098876953, |
| "learning_rate": 0.00035761204143717383, |
| "loss": 1394.3458, |
| "step": 5960 |
| }, |
| { |
| "ce_loss_13": 3.2692392468452454, |
| "ce_loss_17": 3.1779242873191835, |
| "ce_loss_2": 4.398617744445801, |
| "ce_loss_4": 4.118852138519287, |
| "ce_loss_9": 3.5263784885406495, |
| "epoch": 0.597, |
| "grad_norm": 928.0, |
| "kl_loss_13": 230.25725250244142, |
| "kl_loss_2": 2564.5046875, |
| "kl_loss_4": 2041.696600341797, |
| "kl_loss_9": 834.9072937011719, |
| "learning_rate": 0.0003560917951402245, |
| "loss": 1439.8227, |
| "step": 5970 |
| }, |
| { |
| "ce_loss_13": 3.2496317982673646, |
| "ce_loss_17": 3.1600715041160585, |
| "ce_loss_2": 4.378537487983704, |
| "ce_loss_4": 4.102442514896393, |
| "ce_loss_9": 3.510550153255463, |
| "epoch": 0.598, |
| "grad_norm": 1224.0, |
| "kl_loss_13": 228.3923698425293, |
| "kl_loss_2": 2548.4739379882812, |
| "kl_loss_4": 2024.2143981933593, |
| "kl_loss_9": 833.0222869873047, |
| "learning_rate": 0.00035457299799730046, |
| "loss": 1400.526, |
| "step": 5980 |
| }, |
| { |
| "ce_loss_13": 3.30535683631897, |
| "ce_loss_17": 3.215275287628174, |
| "ce_loss_2": 4.417876863479615, |
| "ce_loss_4": 4.138031899929047, |
| "ce_loss_9": 3.5640909314155578, |
| "epoch": 0.599, |
| "grad_norm": 1004.0, |
| "kl_loss_13": 229.51971969604492, |
| "kl_loss_2": 2516.5375244140623, |
| "kl_loss_4": 1980.7409606933593, |
| "kl_loss_9": 825.5029388427735, |
| "learning_rate": 0.0003530556653026721, |
| "loss": 1407.9975, |
| "step": 5990 |
| }, |
| { |
| "ce_loss_13": 3.2337883830070497, |
| "ce_loss_17": 3.1406787872314452, |
| "ce_loss_2": 4.377436971664428, |
| "ce_loss_4": 4.089027559757232, |
| "ce_loss_9": 3.486391067504883, |
| "epoch": 0.6, |
| "grad_norm": 1144.0, |
| "kl_loss_13": 227.38719482421874, |
| "kl_loss_2": 2581.539196777344, |
| "kl_loss_4": 2049.9173889160156, |
| "kl_loss_9": 819.7808288574219, |
| "learning_rate": 0.00035153981233586274, |
| "loss": 1427.5375, |
| "step": 6000 |
| }, |
| { |
| "ce_loss_13": 3.20476176738739, |
| "ce_loss_17": 3.1141116976737977, |
| "ce_loss_2": 4.352747404575348, |
| "ce_loss_4": 4.067610096931458, |
| "ce_loss_9": 3.4695231437683107, |
| "epoch": 0.601, |
| "grad_norm": 1040.0, |
| "kl_loss_13": 226.83367309570312, |
| "kl_loss_2": 2564.571325683594, |
| "kl_loss_4": 2033.4016906738282, |
| "kl_loss_9": 832.6494201660156, |
| "learning_rate": 0.00035002545436149473, |
| "loss": 1450.2203, |
| "step": 6010 |
| }, |
| { |
| "ce_loss_13": 3.2144734382629396, |
| "ce_loss_17": 3.121029281616211, |
| "ce_loss_2": 4.372288644313812, |
| "ce_loss_4": 4.088346481323242, |
| "ce_loss_9": 3.4823519229888915, |
| "epoch": 0.602, |
| "grad_norm": 1080.0, |
| "kl_loss_13": 237.34789276123047, |
| "kl_loss_2": 2607.0394287109375, |
| "kl_loss_4": 2063.7788940429687, |
| "kl_loss_9": 848.1593475341797, |
| "learning_rate": 0.0003485126066291364, |
| "loss": 1400.3073, |
| "step": 6020 |
| }, |
| { |
| "ce_loss_13": 3.255044734477997, |
| "ce_loss_17": 3.164732348918915, |
| "ce_loss_2": 4.404607486724854, |
| "ce_loss_4": 4.122542726993561, |
| "ce_loss_9": 3.517188382148743, |
| "epoch": 0.603, |
| "grad_norm": 964.0, |
| "kl_loss_13": 226.77251281738282, |
| "kl_loss_2": 2559.346826171875, |
| "kl_loss_4": 2031.9142272949218, |
| "kl_loss_9": 827.2862487792969, |
| "learning_rate": 0.0003470012843731476, |
| "loss": 1413.7271, |
| "step": 6030 |
| }, |
| { |
| "ce_loss_13": 3.206252062320709, |
| "ce_loss_17": 3.1121634960174562, |
| "ce_loss_2": 4.356084418296814, |
| "ce_loss_4": 4.07145768404007, |
| "ce_loss_9": 3.464072847366333, |
| "epoch": 0.604, |
| "grad_norm": 1032.0, |
| "kl_loss_13": 228.70163803100587, |
| "kl_loss_2": 2579.2633544921873, |
| "kl_loss_4": 2046.7201721191407, |
| "kl_loss_9": 834.6922180175782, |
| "learning_rate": 0.00034549150281252633, |
| "loss": 1444.5348, |
| "step": 6040 |
| }, |
| { |
| "ce_loss_13": 3.1869379758834837, |
| "ce_loss_17": 3.0919574022293093, |
| "ce_loss_2": 4.307004332542419, |
| "ce_loss_4": 4.023111295700073, |
| "ce_loss_9": 3.43609904050827, |
| "epoch": 0.605, |
| "grad_norm": 852.0, |
| "kl_loss_13": 231.76103820800782, |
| "kl_loss_2": 2514.5482666015623, |
| "kl_loss_4": 1974.574041748047, |
| "kl_loss_9": 815.0408874511719, |
| "learning_rate": 0.0003439832771507565, |
| "loss": 1381.5484, |
| "step": 6050 |
| }, |
| { |
| "ce_loss_13": 3.1909875512123107, |
| "ce_loss_17": 3.098554015159607, |
| "ce_loss_2": 4.342778587341309, |
| "ce_loss_4": 4.053665661811829, |
| "ce_loss_9": 3.454178750514984, |
| "epoch": 0.606, |
| "grad_norm": 920.0, |
| "kl_loss_13": 228.57085647583008, |
| "kl_loss_2": 2592.7705810546877, |
| "kl_loss_4": 2045.2464477539063, |
| "kl_loss_9": 839.3021881103516, |
| "learning_rate": 0.0003424766225756537, |
| "loss": 1405.1307, |
| "step": 6060 |
| }, |
| { |
| "ce_loss_13": 3.245312237739563, |
| "ce_loss_17": 3.1574817061424256, |
| "ce_loss_2": 4.386974465847016, |
| "ce_loss_4": 4.10160768032074, |
| "ce_loss_9": 3.502121889591217, |
| "epoch": 0.607, |
| "grad_norm": 912.0, |
| "kl_loss_13": 226.91528244018554, |
| "kl_loss_2": 2561.604052734375, |
| "kl_loss_4": 2021.492694091797, |
| "kl_loss_9": 827.3878692626953, |
| "learning_rate": 0.00034097155425921255, |
| "loss": 1386.3665, |
| "step": 6070 |
| }, |
| { |
| "ce_loss_13": 3.1451975226402284, |
| "ce_loss_17": 3.0513622999191283, |
| "ce_loss_2": 4.310491049289704, |
| "ce_loss_4": 4.019709491729737, |
| "ce_loss_9": 3.4111838579177856, |
| "epoch": 0.608, |
| "grad_norm": 928.0, |
| "kl_loss_13": 232.4799789428711, |
| "kl_loss_2": 2627.470166015625, |
| "kl_loss_4": 2079.4474609375, |
| "kl_loss_9": 843.9461151123047, |
| "learning_rate": 0.0003394680873574546, |
| "loss": 1414.8074, |
| "step": 6080 |
| }, |
| { |
| "ce_loss_13": 3.248126494884491, |
| "ce_loss_17": 3.156547796726227, |
| "ce_loss_2": 4.402919721603394, |
| "ce_loss_4": 4.123408806324005, |
| "ce_loss_9": 3.51514755487442, |
| "epoch": 0.609, |
| "grad_norm": 896.0, |
| "kl_loss_13": 231.09435119628907, |
| "kl_loss_2": 2619.8621337890627, |
| "kl_loss_4": 2088.242785644531, |
| "kl_loss_9": 847.3386016845703, |
| "learning_rate": 0.0003379662370102747, |
| "loss": 1410.5209, |
| "step": 6090 |
| }, |
| { |
| "ce_loss_13": 3.260214614868164, |
| "ce_loss_17": 3.17033851146698, |
| "ce_loss_2": 4.378973543643951, |
| "ce_loss_4": 4.095032501220703, |
| "ce_loss_9": 3.5076473712921143, |
| "epoch": 0.61, |
| "grad_norm": 1004.0, |
| "kl_loss_13": 228.6587013244629, |
| "kl_loss_2": 2555.754248046875, |
| "kl_loss_4": 2012.432354736328, |
| "kl_loss_9": 829.2882019042969, |
| "learning_rate": 0.0003364660183412892, |
| "loss": 1409.8023, |
| "step": 6100 |
| }, |
| { |
| "ce_loss_13": 3.243327963352203, |
| "ce_loss_17": 3.150516951084137, |
| "ce_loss_2": 4.3727805614471436, |
| "ce_loss_4": 4.081695449352265, |
| "ce_loss_9": 3.4951064825057983, |
| "epoch": 0.611, |
| "grad_norm": 928.0, |
| "kl_loss_13": 233.1937469482422, |
| "kl_loss_2": 2564.4494750976564, |
| "kl_loss_4": 2017.0374572753906, |
| "kl_loss_9": 829.6589935302734, |
| "learning_rate": 0.0003349674464576834, |
| "loss": 1423.7318, |
| "step": 6110 |
| }, |
| { |
| "ce_loss_13": 3.192290186882019, |
| "ce_loss_17": 3.101076591014862, |
| "ce_loss_2": 4.344872784614563, |
| "ce_loss_4": 4.060330271720886, |
| "ce_loss_9": 3.4539494037628176, |
| "epoch": 0.612, |
| "grad_norm": 812.0, |
| "kl_loss_13": 231.32125244140624, |
| "kl_loss_2": 2600.617919921875, |
| "kl_loss_4": 2060.823516845703, |
| "kl_loss_9": 837.5670745849609, |
| "learning_rate": 0.00033347053645005966, |
| "loss": 1390.0195, |
| "step": 6120 |
| }, |
| { |
| "ce_loss_13": 3.292400562763214, |
| "ce_loss_17": 3.2021037340164185, |
| "ce_loss_2": 4.395995855331421, |
| "ce_loss_4": 4.1126100540161135, |
| "ce_loss_9": 3.5435969948768617, |
| "epoch": 0.613, |
| "grad_norm": 980.0, |
| "kl_loss_13": 225.01504440307616, |
| "kl_loss_2": 2464.6257080078126, |
| "kl_loss_4": 1934.4326599121093, |
| "kl_loss_9": 808.3387603759766, |
| "learning_rate": 0.00033197530339228485, |
| "loss": 1390.4135, |
| "step": 6130 |
| }, |
| { |
| "ce_loss_13": 3.252897322177887, |
| "ce_loss_17": 3.1589378714561462, |
| "ce_loss_2": 4.389720106124878, |
| "ce_loss_4": 4.1005248665809635, |
| "ce_loss_9": 3.5177234768867494, |
| "epoch": 0.614, |
| "grad_norm": 888.0, |
| "kl_loss_13": 232.7153091430664, |
| "kl_loss_2": 2543.49384765625, |
| "kl_loss_4": 2005.2430114746094, |
| "kl_loss_9": 838.3109405517578, |
| "learning_rate": 0.00033048176234133967, |
| "loss": 1400.575, |
| "step": 6140 |
| }, |
| { |
| "ce_loss_13": 3.2428000450134276, |
| "ce_loss_17": 3.1512856483459473, |
| "ce_loss_2": 4.356198191642761, |
| "ce_loss_4": 4.075578737258911, |
| "ce_loss_9": 3.502461588382721, |
| "epoch": 0.615, |
| "grad_norm": 900.0, |
| "kl_loss_13": 231.79894485473633, |
| "kl_loss_2": 2542.9717041015624, |
| "kl_loss_4": 2007.5570251464844, |
| "kl_loss_9": 844.2892639160157, |
| "learning_rate": 0.0003289899283371657, |
| "loss": 1418.6631, |
| "step": 6150 |
| }, |
| { |
| "ce_loss_13": 3.2548093795776367, |
| "ce_loss_17": 3.166204738616943, |
| "ce_loss_2": 4.406282043457031, |
| "ce_loss_4": 4.114752006530762, |
| "ce_loss_9": 3.511092782020569, |
| "epoch": 0.616, |
| "grad_norm": 1056.0, |
| "kl_loss_13": 225.45708694458008, |
| "kl_loss_2": 2561.15966796875, |
| "kl_loss_4": 2021.7472045898437, |
| "kl_loss_9": 816.1165588378906, |
| "learning_rate": 0.0003274998164025148, |
| "loss": 1428.4361, |
| "step": 6160 |
| }, |
| { |
| "ce_loss_13": 3.291805636882782, |
| "ce_loss_17": 3.1999740481376646, |
| "ce_loss_2": 4.4159709215164185, |
| "ce_loss_4": 4.134799444675446, |
| "ce_loss_9": 3.548912453651428, |
| "epoch": 0.617, |
| "grad_norm": 852.0, |
| "kl_loss_13": 232.31563873291014, |
| "kl_loss_2": 2546.1911499023436, |
| "kl_loss_4": 2009.5380004882813, |
| "kl_loss_9": 838.2556762695312, |
| "learning_rate": 0.0003260114415427975, |
| "loss": 1432.9609, |
| "step": 6170 |
| }, |
| { |
| "ce_loss_13": 3.2170358896255493, |
| "ce_loss_17": 3.126904237270355, |
| "ce_loss_2": 4.385936522483826, |
| "ce_loss_4": 4.094873237609863, |
| "ce_loss_9": 3.480419063568115, |
| "epoch": 0.618, |
| "grad_norm": 1016.0, |
| "kl_loss_13": 226.1689582824707, |
| "kl_loss_2": 2612.0671997070312, |
| "kl_loss_4": 2064.2389526367188, |
| "kl_loss_9": 831.8998748779297, |
| "learning_rate": 0.0003245248187459323, |
| "loss": 1442.7007, |
| "step": 6180 |
| }, |
| { |
| "ce_loss_13": 3.2045365929603578, |
| "ce_loss_17": 3.1164793968200684, |
| "ce_loss_2": 4.311137497425079, |
| "ce_loss_4": 4.029911494255066, |
| "ce_loss_9": 3.448254609107971, |
| "epoch": 0.619, |
| "grad_norm": 788.0, |
| "kl_loss_13": 221.98872604370118, |
| "kl_loss_2": 2506.536022949219, |
| "kl_loss_4": 1973.9262390136719, |
| "kl_loss_9": 800.507046508789, |
| "learning_rate": 0.00032303996298219416, |
| "loss": 1373.5637, |
| "step": 6190 |
| }, |
| { |
| "ce_loss_13": 3.2838147401809694, |
| "ce_loss_17": 3.1910577178001405, |
| "ce_loss_2": 4.395757746696472, |
| "ce_loss_4": 4.11085067987442, |
| "ce_loss_9": 3.529459023475647, |
| "epoch": 0.62, |
| "grad_norm": 920.0, |
| "kl_loss_13": 224.92547988891602, |
| "kl_loss_2": 2477.8688110351563, |
| "kl_loss_4": 1948.8328857421875, |
| "kl_loss_9": 798.5069763183594, |
| "learning_rate": 0.00032155688920406414, |
| "loss": 1369.3968, |
| "step": 6200 |
| }, |
| { |
| "ce_loss_13": 3.192294681072235, |
| "ce_loss_17": 3.0995654106140136, |
| "ce_loss_2": 4.3764742612838745, |
| "ce_loss_4": 4.082545292377472, |
| "ce_loss_9": 3.456455075740814, |
| "epoch": 0.621, |
| "grad_norm": 868.0, |
| "kl_loss_13": 231.96823654174804, |
| "kl_loss_2": 2630.0911376953127, |
| "kl_loss_4": 2081.5944213867188, |
| "kl_loss_9": 834.0532379150391, |
| "learning_rate": 0.0003200756123460788, |
| "loss": 1452.0631, |
| "step": 6210 |
| }, |
| { |
| "ce_loss_13": 3.2363399386405947, |
| "ce_loss_17": 3.1417808055877687, |
| "ce_loss_2": 4.39202229976654, |
| "ce_loss_4": 4.107456624507904, |
| "ce_loss_9": 3.507209098339081, |
| "epoch": 0.622, |
| "grad_norm": 928.0, |
| "kl_loss_13": 235.32316360473632, |
| "kl_loss_2": 2611.224084472656, |
| "kl_loss_4": 2073.0699157714844, |
| "kl_loss_9": 855.1863891601563, |
| "learning_rate": 0.00031859614732467957, |
| "loss": 1436.7217, |
| "step": 6220 |
| }, |
| { |
| "ce_loss_13": 3.2802329897880553, |
| "ce_loss_17": 3.1894822239875795, |
| "ce_loss_2": 4.394841420650482, |
| "ce_loss_4": 4.106322288513184, |
| "ce_loss_9": 3.5291473388671877, |
| "epoch": 0.623, |
| "grad_norm": 816.0, |
| "kl_loss_13": 224.93512268066405, |
| "kl_loss_2": 2516.4769775390623, |
| "kl_loss_4": 1968.3585266113282, |
| "kl_loss_9": 808.7358306884765, |
| "learning_rate": 0.00031711850903806275, |
| "loss": 1383.0154, |
| "step": 6230 |
| }, |
| { |
| "ce_loss_13": 3.1911649227142336, |
| "ce_loss_17": 3.094308817386627, |
| "ce_loss_2": 4.351166224479675, |
| "ce_loss_4": 4.066704988479614, |
| "ce_loss_9": 3.454544258117676, |
| "epoch": 0.624, |
| "grad_norm": 988.0, |
| "kl_loss_13": 234.15537185668944, |
| "kl_loss_2": 2614.300048828125, |
| "kl_loss_4": 2074.8535217285157, |
| "kl_loss_9": 846.2453674316406, |
| "learning_rate": 0.0003156427123660297, |
| "loss": 1401.41, |
| "step": 6240 |
| }, |
| { |
| "ce_loss_13": 3.272945058345795, |
| "ce_loss_17": 3.181516873836517, |
| "ce_loss_2": 4.378597617149353, |
| "ce_loss_4": 4.0980629324913025, |
| "ce_loss_9": 3.525704002380371, |
| "epoch": 0.625, |
| "grad_norm": 1048.0, |
| "kl_loss_13": 227.339363861084, |
| "kl_loss_2": 2497.1392456054687, |
| "kl_loss_4": 1965.319580078125, |
| "kl_loss_9": 819.4188751220703, |
| "learning_rate": 0.0003141687721698363, |
| "loss": 1399.1988, |
| "step": 6250 |
| }, |
| { |
| "ce_loss_13": 3.2418040871620177, |
| "ce_loss_17": 3.155091571807861, |
| "ce_loss_2": 4.332506418228149, |
| "ce_loss_4": 4.053801393508911, |
| "ce_loss_9": 3.482528841495514, |
| "epoch": 0.626, |
| "grad_norm": 896.0, |
| "kl_loss_13": 217.76402053833007, |
| "kl_loss_2": 2441.399853515625, |
| "kl_loss_4": 1917.8978759765625, |
| "kl_loss_9": 784.1637420654297, |
| "learning_rate": 0.00031269670329204396, |
| "loss": 1377.8039, |
| "step": 6260 |
| }, |
| { |
| "ce_loss_13": 3.281051242351532, |
| "ce_loss_17": 3.195384848117828, |
| "ce_loss_2": 4.3794536828994755, |
| "ce_loss_4": 4.093663692474365, |
| "ce_loss_9": 3.533396577835083, |
| "epoch": 0.627, |
| "grad_norm": 996.0, |
| "kl_loss_13": 227.29047241210938, |
| "kl_loss_2": 2477.8679809570312, |
| "kl_loss_4": 1938.6812133789062, |
| "kl_loss_9": 808.43310546875, |
| "learning_rate": 0.00031122652055637015, |
| "loss": 1393.3557, |
| "step": 6270 |
| }, |
| { |
| "ce_loss_13": 3.247497284412384, |
| "ce_loss_17": 3.1615176677703856, |
| "ce_loss_2": 4.3885966539382935, |
| "ce_loss_4": 4.104041790962219, |
| "ce_loss_9": 3.50910062789917, |
| "epoch": 0.628, |
| "grad_norm": 776.0, |
| "kl_loss_13": 227.87898178100585, |
| "kl_loss_2": 2578.9588012695312, |
| "kl_loss_4": 2042.0823181152343, |
| "kl_loss_9": 835.7837829589844, |
| "learning_rate": 0.0003097582387675385, |
| "loss": 1391.5011, |
| "step": 6280 |
| }, |
| { |
| "ce_loss_13": 3.2861045002937317, |
| "ce_loss_17": 3.197433149814606, |
| "ce_loss_2": 4.40528359413147, |
| "ce_loss_4": 4.116912710666656, |
| "ce_loss_9": 3.539530074596405, |
| "epoch": 0.629, |
| "grad_norm": 844.0, |
| "kl_loss_13": 229.70106735229493, |
| "kl_loss_2": 2537.421276855469, |
| "kl_loss_4": 2002.1822875976563, |
| "kl_loss_9": 822.256625366211, |
| "learning_rate": 0.00030829187271113034, |
| "loss": 1389.5541, |
| "step": 6290 |
| }, |
| { |
| "ce_loss_13": 3.2767645955085754, |
| "ce_loss_17": 3.1875134229660036, |
| "ce_loss_2": 4.3805430889129635, |
| "ce_loss_4": 4.1031859636306764, |
| "ce_loss_9": 3.516218435764313, |
| "epoch": 0.63, |
| "grad_norm": 844.0, |
| "kl_loss_13": 224.5469512939453, |
| "kl_loss_2": 2482.713037109375, |
| "kl_loss_4": 1959.737158203125, |
| "kl_loss_9": 806.6842315673828, |
| "learning_rate": 0.00030682743715343565, |
| "loss": 1403.367, |
| "step": 6300 |
| }, |
| { |
| "ce_loss_13": 3.2265755772590636, |
| "ce_loss_17": 3.131485438346863, |
| "ce_loss_2": 4.36829628944397, |
| "ce_loss_4": 4.087601912021637, |
| "ce_loss_9": 3.4934881567955016, |
| "epoch": 0.631, |
| "grad_norm": 1024.0, |
| "kl_loss_13": 234.22077407836915, |
| "kl_loss_2": 2553.2029296875, |
| "kl_loss_4": 2024.1413208007812, |
| "kl_loss_9": 841.8471282958984, |
| "learning_rate": 0.0003053649468413043, |
| "loss": 1429.9232, |
| "step": 6310 |
| }, |
| { |
| "ce_loss_13": 3.3367688417434693, |
| "ce_loss_17": 3.2449944853782653, |
| "ce_loss_2": 4.44246883392334, |
| "ce_loss_4": 4.160059881210327, |
| "ce_loss_9": 3.587943661212921, |
| "epoch": 0.632, |
| "grad_norm": 984.0, |
| "kl_loss_13": 231.40140380859376, |
| "kl_loss_2": 2513.854138183594, |
| "kl_loss_4": 1973.4285095214843, |
| "kl_loss_9": 820.7870483398438, |
| "learning_rate": 0.00030390441650199725, |
| "loss": 1383.2857, |
| "step": 6320 |
| }, |
| { |
| "ce_loss_13": 3.2414955258369447, |
| "ce_loss_17": 3.1530691981315613, |
| "ce_loss_2": 4.364517951011658, |
| "ce_loss_4": 4.077583825588226, |
| "ce_loss_9": 3.4930731296539306, |
| "epoch": 0.633, |
| "grad_norm": 1008.0, |
| "kl_loss_13": 229.3896842956543, |
| "kl_loss_2": 2529.341015625, |
| "kl_loss_4": 1986.383935546875, |
| "kl_loss_9": 821.0194274902344, |
| "learning_rate": 0.00030244586084303903, |
| "loss": 1379.6859, |
| "step": 6330 |
| }, |
| { |
| "ce_loss_13": 3.2142238140106203, |
| "ce_loss_17": 3.1198761463165283, |
| "ce_loss_2": 4.361454582214355, |
| "ce_loss_4": 4.071536684036255, |
| "ce_loss_9": 3.471467673778534, |
| "epoch": 0.634, |
| "grad_norm": 896.0, |
| "kl_loss_13": 233.71195907592772, |
| "kl_loss_2": 2602.586767578125, |
| "kl_loss_4": 2056.246545410156, |
| "kl_loss_9": 845.5255004882813, |
| "learning_rate": 0.00030098929455206903, |
| "loss": 1399.7385, |
| "step": 6340 |
| }, |
| { |
| "ce_loss_13": 3.2147318601608275, |
| "ce_loss_17": 3.1261605262756347, |
| "ce_loss_2": 4.350941133499146, |
| "ce_loss_4": 4.061924576759338, |
| "ce_loss_9": 3.467269480228424, |
| "epoch": 0.635, |
| "grad_norm": 896.0, |
| "kl_loss_13": 225.42356338500977, |
| "kl_loss_2": 2568.0879760742187, |
| "kl_loss_4": 2026.3318969726563, |
| "kl_loss_9": 822.1717559814454, |
| "learning_rate": 0.00029953473229669324, |
| "loss": 1438.9115, |
| "step": 6350 |
| }, |
| { |
| "ce_loss_13": 3.241168701648712, |
| "ce_loss_17": 3.150354278087616, |
| "ce_loss_2": 4.374743938446045, |
| "ce_loss_4": 4.088599300384521, |
| "ce_loss_9": 3.5038307547569274, |
| "epoch": 0.636, |
| "grad_norm": 828.0, |
| "kl_loss_13": 226.90768585205078, |
| "kl_loss_2": 2540.56328125, |
| "kl_loss_4": 2008.9533142089845, |
| "kl_loss_9": 833.0119720458985, |
| "learning_rate": 0.00029808218872433767, |
| "loss": 1382.8463, |
| "step": 6360 |
| }, |
| { |
| "ce_loss_13": 3.297260856628418, |
| "ce_loss_17": 3.206966185569763, |
| "ce_loss_2": 4.411247491836548, |
| "ce_loss_4": 4.131738340854644, |
| "ce_loss_9": 3.546586203575134, |
| "epoch": 0.637, |
| "grad_norm": 884.0, |
| "kl_loss_13": 224.06393508911134, |
| "kl_loss_2": 2504.6856689453125, |
| "kl_loss_4": 1975.1798217773437, |
| "kl_loss_9": 808.3064483642578, |
| "learning_rate": 0.0002966316784621, |
| "loss": 1370.6816, |
| "step": 6370 |
| }, |
| { |
| "ce_loss_13": 3.216203451156616, |
| "ce_loss_17": 3.119618463516235, |
| "ce_loss_2": 4.366829180717469, |
| "ce_loss_4": 4.080282235145569, |
| "ce_loss_9": 3.4816792726516725, |
| "epoch": 0.638, |
| "grad_norm": 960.0, |
| "kl_loss_13": 232.77178878784179, |
| "kl_loss_2": 2581.4222778320313, |
| "kl_loss_4": 2043.318603515625, |
| "kl_loss_9": 846.8696075439453, |
| "learning_rate": 0.0002951832161166024, |
| "loss": 1390.8863, |
| "step": 6380 |
| }, |
| { |
| "ce_loss_13": 3.2908342361450194, |
| "ce_loss_17": 3.1968275904655457, |
| "ce_loss_2": 4.406253838539124, |
| "ce_loss_4": 4.12986044883728, |
| "ce_loss_9": 3.5496591210365294, |
| "epoch": 0.639, |
| "grad_norm": 784.0, |
| "kl_loss_13": 229.6701316833496, |
| "kl_loss_2": 2513.99462890625, |
| "kl_loss_4": 1992.3557495117188, |
| "kl_loss_9": 825.7627868652344, |
| "learning_rate": 0.0002937368162738445, |
| "loss": 1368.4383, |
| "step": 6390 |
| }, |
| { |
| "ce_loss_13": 3.229096734523773, |
| "ce_loss_17": 3.1455905199050904, |
| "ce_loss_2": 4.349045753479004, |
| "ce_loss_4": 4.068724250793457, |
| "ce_loss_9": 3.4772324562072754, |
| "epoch": 0.64, |
| "grad_norm": 1064.0, |
| "kl_loss_13": 219.6314926147461, |
| "kl_loss_2": 2516.2894897460938, |
| "kl_loss_4": 1985.0686096191407, |
| "kl_loss_9": 805.2250122070312, |
| "learning_rate": 0.0002922924934990568, |
| "loss": 1399.7807, |
| "step": 6400 |
| }, |
| { |
| "ce_loss_13": 3.172426164150238, |
| "ce_loss_17": 3.0812967300415037, |
| "ce_loss_2": 4.342437672615051, |
| "ce_loss_4": 4.052666902542114, |
| "ce_loss_9": 3.440312457084656, |
| "epoch": 0.641, |
| "grad_norm": 888.0, |
| "kl_loss_13": 227.10142669677734, |
| "kl_loss_2": 2621.7179443359373, |
| "kl_loss_4": 2071.343231201172, |
| "kl_loss_9": 841.8452056884765, |
| "learning_rate": 0.0002908502623365536, |
| "loss": 1413.8551, |
| "step": 6410 |
| }, |
| { |
| "ce_loss_13": 3.1062959313392637, |
| "ce_loss_17": 3.0144997954368593, |
| "ce_loss_2": 4.294253861904144, |
| "ce_loss_4": 3.996906352043152, |
| "ce_loss_9": 3.3769165515899657, |
| "epoch": 0.642, |
| "grad_norm": 1004.0, |
| "kl_loss_13": 227.44247665405274, |
| "kl_loss_2": 2639.017431640625, |
| "kl_loss_4": 2094.474688720703, |
| "kl_loss_9": 843.4009368896484, |
| "learning_rate": 0.0002894101373095867, |
| "loss": 1420.0286, |
| "step": 6420 |
| }, |
| { |
| "ce_loss_13": 3.318646419048309, |
| "ce_loss_17": 3.227793884277344, |
| "ce_loss_2": 4.416646718978882, |
| "ce_loss_4": 4.136425817012787, |
| "ce_loss_9": 3.5618483901023863, |
| "epoch": 0.643, |
| "grad_norm": 1056.0, |
| "kl_loss_13": 230.03461532592775, |
| "kl_loss_2": 2513.0759765625, |
| "kl_loss_4": 1981.4266723632813, |
| "kl_loss_9": 816.9708038330078, |
| "learning_rate": 0.00028797213292019926, |
| "loss": 1392.2287, |
| "step": 6430 |
| }, |
| { |
| "ce_loss_13": 3.294539451599121, |
| "ce_loss_17": 3.202876555919647, |
| "ce_loss_2": 4.412116122245789, |
| "ce_loss_4": 4.120684218406677, |
| "ce_loss_9": 3.5416916489601133, |
| "epoch": 0.644, |
| "grad_norm": 828.0, |
| "kl_loss_13": 230.92177810668946, |
| "kl_loss_2": 2525.5751953125, |
| "kl_loss_4": 1989.6196044921876, |
| "kl_loss_9": 825.4972442626953, |
| "learning_rate": 0.0002865362636490791, |
| "loss": 1420.1119, |
| "step": 6440 |
| }, |
| { |
| "ce_loss_13": 3.309741997718811, |
| "ce_loss_17": 3.222404444217682, |
| "ce_loss_2": 4.419623613357544, |
| "ce_loss_4": 4.132715785503388, |
| "ce_loss_9": 3.56042857170105, |
| "epoch": 0.645, |
| "grad_norm": 828.0, |
| "kl_loss_13": 226.59962539672853, |
| "kl_loss_2": 2494.6920532226563, |
| "kl_loss_4": 1969.4511291503907, |
| "kl_loss_9": 817.1368225097656, |
| "learning_rate": 0.0002851025439554142, |
| "loss": 1377.2508, |
| "step": 6450 |
| }, |
| { |
| "ce_loss_13": 3.296689212322235, |
| "ce_loss_17": 3.2046764612197878, |
| "ce_loss_2": 4.396158170700073, |
| "ce_loss_4": 4.114370489120484, |
| "ce_loss_9": 3.556058073043823, |
| "epoch": 0.646, |
| "grad_norm": 848.0, |
| "kl_loss_13": 228.6302734375, |
| "kl_loss_2": 2468.174597167969, |
| "kl_loss_4": 1936.852587890625, |
| "kl_loss_9": 818.2100250244141, |
| "learning_rate": 0.00028367098827674573, |
| "loss": 1376.2439, |
| "step": 6460 |
| }, |
| { |
| "ce_loss_13": 3.225395154953003, |
| "ce_loss_17": 3.13791309595108, |
| "ce_loss_2": 4.359770154953003, |
| "ce_loss_4": 4.078741455078125, |
| "ce_loss_9": 3.4745787620544433, |
| "epoch": 0.647, |
| "grad_norm": 924.0, |
| "kl_loss_13": 222.7610176086426, |
| "kl_loss_2": 2532.627307128906, |
| "kl_loss_4": 1996.5273803710938, |
| "kl_loss_9": 803.6190856933594, |
| "learning_rate": 0.00028224161102882397, |
| "loss": 1396.7857, |
| "step": 6470 |
| }, |
| { |
| "ce_loss_13": 3.19874347448349, |
| "ce_loss_17": 3.114915502071381, |
| "ce_loss_2": 4.299088919162751, |
| "ce_loss_4": 4.021137297153473, |
| "ce_loss_9": 3.4499616980552674, |
| "epoch": 0.648, |
| "grad_norm": 928.0, |
| "kl_loss_13": 219.87050704956056, |
| "kl_loss_2": 2471.4651123046874, |
| "kl_loss_4": 1950.0511657714844, |
| "kl_loss_9": 804.1710510253906, |
| "learning_rate": 0.00028081442660546124, |
| "loss": 1389.2885, |
| "step": 6480 |
| }, |
| { |
| "ce_loss_13": 3.2644436836242674, |
| "ce_loss_17": 3.173009693622589, |
| "ce_loss_2": 4.371016049385071, |
| "ce_loss_4": 4.086002993583679, |
| "ce_loss_9": 3.518217372894287, |
| "epoch": 0.649, |
| "grad_norm": 796.0, |
| "kl_loss_13": 230.70403289794922, |
| "kl_loss_2": 2510.9924926757812, |
| "kl_loss_4": 1966.690167236328, |
| "kl_loss_9": 815.2742462158203, |
| "learning_rate": 0.0002793894493783892, |
| "loss": 1386.0506, |
| "step": 6490 |
| }, |
| { |
| "ce_loss_13": 3.2800037264823914, |
| "ce_loss_17": 3.191721427440643, |
| "ce_loss_2": 4.38927059173584, |
| "ce_loss_4": 4.115973925590515, |
| "ce_loss_9": 3.5229194521903993, |
| "epoch": 0.65, |
| "grad_norm": 784.0, |
| "kl_loss_13": 221.48081588745117, |
| "kl_loss_2": 2502.23515625, |
| "kl_loss_4": 1981.554541015625, |
| "kl_loss_9": 799.6488494873047, |
| "learning_rate": 0.0002779666936971129, |
| "loss": 1372.6685, |
| "step": 6500 |
| }, |
| { |
| "ce_loss_13": 3.2889639854431154, |
| "ce_loss_17": 3.197144901752472, |
| "ce_loss_2": 4.422222232818603, |
| "ce_loss_4": 4.1408725619316105, |
| "ce_loss_9": 3.546233355998993, |
| "epoch": 0.651, |
| "grad_norm": 1040.0, |
| "kl_loss_13": 227.6748191833496, |
| "kl_loss_2": 2549.470007324219, |
| "kl_loss_4": 2019.1922790527344, |
| "kl_loss_9": 827.4289398193359, |
| "learning_rate": 0.00027654617388876614, |
| "loss": 1406.9853, |
| "step": 6510 |
| }, |
| { |
| "ce_loss_13": 3.3097339034080506, |
| "ce_loss_17": 3.2205450534820557, |
| "ce_loss_2": 4.426493430137635, |
| "ce_loss_4": 4.136209976673126, |
| "ce_loss_9": 3.559991943836212, |
| "epoch": 0.652, |
| "grad_norm": 800.0, |
| "kl_loss_13": 227.00861282348632, |
| "kl_loss_2": 2528.0768920898436, |
| "kl_loss_4": 1987.3669006347657, |
| "kl_loss_9": 812.4847229003906, |
| "learning_rate": 0.0002751279042579672, |
| "loss": 1390.9523, |
| "step": 6520 |
| }, |
| { |
| "ce_loss_13": 3.2558979868888853, |
| "ce_loss_17": 3.169518268108368, |
| "ce_loss_2": 4.370462274551391, |
| "ce_loss_4": 4.08671133518219, |
| "ce_loss_9": 3.503693103790283, |
| "epoch": 0.653, |
| "grad_norm": 784.0, |
| "kl_loss_13": 220.280770111084, |
| "kl_loss_2": 2496.8043701171873, |
| "kl_loss_4": 1972.1827514648437, |
| "kl_loss_9": 798.9055206298829, |
| "learning_rate": 0.00027371189908667604, |
| "loss": 1403.8042, |
| "step": 6530 |
| }, |
| { |
| "ce_loss_13": 3.309041976928711, |
| "ce_loss_17": 3.2149966239929197, |
| "ce_loss_2": 4.4652539730072025, |
| "ce_loss_4": 4.1784031867980955, |
| "ce_loss_9": 3.5736100316047668, |
| "epoch": 0.654, |
| "grad_norm": 904.0, |
| "kl_loss_13": 235.906884765625, |
| "kl_loss_2": 2587.6687866210937, |
| "kl_loss_4": 2046.8087951660157, |
| "kl_loss_9": 831.9873962402344, |
| "learning_rate": 0.00027229817263404863, |
| "loss": 1429.758, |
| "step": 6540 |
| }, |
| { |
| "ce_loss_13": 3.2918017864227296, |
| "ce_loss_17": 3.203631508350372, |
| "ce_loss_2": 4.361274838447571, |
| "ce_loss_4": 4.076114463806152, |
| "ce_loss_9": 3.5296783328056334, |
| "epoch": 0.655, |
| "grad_norm": 796.0, |
| "kl_loss_13": 222.7517349243164, |
| "kl_loss_2": 2433.7910278320314, |
| "kl_loss_4": 1905.0374267578125, |
| "kl_loss_9": 800.1869323730468, |
| "learning_rate": 0.0002708867391362948, |
| "loss": 1366.1469, |
| "step": 6550 |
| }, |
| { |
| "ce_loss_13": 3.2699525952339172, |
| "ce_loss_17": 3.182655727863312, |
| "ce_loss_2": 4.355190777778626, |
| "ce_loss_4": 4.074844861030579, |
| "ce_loss_9": 3.5043346285820007, |
| "epoch": 0.656, |
| "grad_norm": 804.0, |
| "kl_loss_13": 219.05189666748046, |
| "kl_loss_2": 2446.1003540039064, |
| "kl_loss_4": 1910.2807983398438, |
| "kl_loss_9": 777.3076873779297, |
| "learning_rate": 0.0002694776128065345, |
| "loss": 1375.6059, |
| "step": 6560 |
| }, |
| { |
| "ce_loss_13": 3.208493912220001, |
| "ce_loss_17": 3.1173330545425415, |
| "ce_loss_2": 4.334606385231018, |
| "ce_loss_4": 4.045690822601318, |
| "ce_loss_9": 3.4686250567436216, |
| "epoch": 0.657, |
| "grad_norm": 780.0, |
| "kl_loss_13": 229.73377532958983, |
| "kl_loss_2": 2554.2600219726564, |
| "kl_loss_4": 2001.8809936523437, |
| "kl_loss_9": 834.3918487548829, |
| "learning_rate": 0.00026807080783465374, |
| "loss": 1375.6125, |
| "step": 6570 |
| }, |
| { |
| "ce_loss_13": 3.311686706542969, |
| "ce_loss_17": 3.2227351665496826, |
| "ce_loss_2": 4.435728859901428, |
| "ce_loss_4": 4.151098394393921, |
| "ce_loss_9": 3.5671212673187256, |
| "epoch": 0.658, |
| "grad_norm": 680.0, |
| "kl_loss_13": 228.21895523071288, |
| "kl_loss_2": 2521.74912109375, |
| "kl_loss_4": 1998.2172424316407, |
| "kl_loss_9": 820.6941711425782, |
| "learning_rate": 0.00026666633838716316, |
| "loss": 1404.3357, |
| "step": 6580 |
| }, |
| { |
| "ce_loss_13": 3.21847026348114, |
| "ce_loss_17": 3.1265950798988342, |
| "ce_loss_2": 4.357976269721985, |
| "ce_loss_4": 4.06951402425766, |
| "ce_loss_9": 3.47168790102005, |
| "epoch": 0.659, |
| "grad_norm": 1008.0, |
| "kl_loss_13": 231.6580795288086, |
| "kl_loss_2": 2572.1566162109375, |
| "kl_loss_4": 2037.4882385253907, |
| "kl_loss_9": 834.7012451171875, |
| "learning_rate": 0.00026526421860705474, |
| "loss": 1426.4838, |
| "step": 6590 |
| }, |
| { |
| "ce_loss_13": 3.241065502166748, |
| "ce_loss_17": 3.147893464565277, |
| "ce_loss_2": 4.371680760383606, |
| "ce_loss_4": 4.087780070304871, |
| "ce_loss_9": 3.500224161148071, |
| "epoch": 0.66, |
| "grad_norm": 884.0, |
| "kl_loss_13": 231.74642944335938, |
| "kl_loss_2": 2540.704626464844, |
| "kl_loss_4": 2009.9585083007812, |
| "kl_loss_9": 825.7754333496093, |
| "learning_rate": 0.0002638644626136587, |
| "loss": 1387.5845, |
| "step": 6600 |
| }, |
| { |
| "ce_loss_13": 3.2518101930618286, |
| "ce_loss_17": 3.163067263364792, |
| "ce_loss_2": 4.377039980888367, |
| "ce_loss_4": 4.093204665184021, |
| "ce_loss_9": 3.5025036573410033, |
| "epoch": 0.661, |
| "grad_norm": 860.0, |
| "kl_loss_13": 225.01166076660155, |
| "kl_loss_2": 2521.225231933594, |
| "kl_loss_4": 1985.5373962402343, |
| "kl_loss_9": 816.6730895996094, |
| "learning_rate": 0.00026246708450250255, |
| "loss": 1386.2938, |
| "step": 6610 |
| }, |
| { |
| "ce_loss_13": 3.239645743370056, |
| "ce_loss_17": 3.153135633468628, |
| "ce_loss_2": 4.337894868850708, |
| "ce_loss_4": 4.062164068222046, |
| "ce_loss_9": 3.482769787311554, |
| "epoch": 0.662, |
| "grad_norm": 920.0, |
| "kl_loss_13": 222.76163558959962, |
| "kl_loss_2": 2484.5597961425783, |
| "kl_loss_4": 1958.0606323242187, |
| "kl_loss_9": 802.6218048095703, |
| "learning_rate": 0.00026107209834516854, |
| "loss": 1377.9414, |
| "step": 6620 |
| }, |
| { |
| "ce_loss_13": 3.1988412737846375, |
| "ce_loss_17": 3.1080427408218383, |
| "ce_loss_2": 4.363201832771301, |
| "ce_loss_4": 4.072892606258392, |
| "ce_loss_9": 3.460556983947754, |
| "epoch": 0.663, |
| "grad_norm": 880.0, |
| "kl_loss_13": 227.96312408447267, |
| "kl_loss_2": 2617.6880126953124, |
| "kl_loss_4": 2065.4799926757814, |
| "kl_loss_9": 838.41416015625, |
| "learning_rate": 0.0002596795181891514, |
| "loss": 1425.9258, |
| "step": 6630 |
| }, |
| { |
| "ce_loss_13": 3.202695620059967, |
| "ce_loss_17": 3.107593536376953, |
| "ce_loss_2": 4.347871923446656, |
| "ce_loss_4": 4.053984808921814, |
| "ce_loss_9": 3.463169074058533, |
| "epoch": 0.664, |
| "grad_norm": 872.0, |
| "kl_loss_13": 233.27393188476563, |
| "kl_loss_2": 2572.1153198242187, |
| "kl_loss_4": 2025.9291259765625, |
| "kl_loss_9": 839.6776306152344, |
| "learning_rate": 0.000258289358057718, |
| "loss": 1453.4813, |
| "step": 6640 |
| }, |
| { |
| "ce_loss_13": 3.2787650346755983, |
| "ce_loss_17": 3.182022047042847, |
| "ce_loss_2": 4.422251641750336, |
| "ce_loss_4": 4.132551395893097, |
| "ce_loss_9": 3.538319194316864, |
| "epoch": 0.665, |
| "grad_norm": 844.0, |
| "kl_loss_13": 236.55617980957032, |
| "kl_loss_2": 2574.144677734375, |
| "kl_loss_4": 2034.644189453125, |
| "kl_loss_9": 836.24501953125, |
| "learning_rate": 0.0002569016319497657, |
| "loss": 1414.4379, |
| "step": 6650 |
| }, |
| { |
| "ce_loss_13": 3.2590417742729185, |
| "ce_loss_17": 3.1653162121772764, |
| "ce_loss_2": 4.402447319030761, |
| "ce_loss_4": 4.110550367832184, |
| "ce_loss_9": 3.5174036383628846, |
| "epoch": 0.666, |
| "grad_norm": 932.0, |
| "kl_loss_13": 234.56823043823243, |
| "kl_loss_2": 2579.8322265625, |
| "kl_loss_4": 2033.58046875, |
| "kl_loss_9": 838.9488311767578, |
| "learning_rate": 0.00025551635383968066, |
| "loss": 1432.2742, |
| "step": 6660 |
| }, |
| { |
| "ce_loss_13": 3.1783674597740172, |
| "ce_loss_17": 3.0876069903373717, |
| "ce_loss_2": 4.325557041168213, |
| "ce_loss_4": 4.038710987567901, |
| "ce_loss_9": 3.432902228832245, |
| "epoch": 0.667, |
| "grad_norm": 972.0, |
| "kl_loss_13": 232.85045700073243, |
| "kl_loss_2": 2599.0522338867186, |
| "kl_loss_4": 2059.9551208496096, |
| "kl_loss_9": 838.2795562744141, |
| "learning_rate": 0.00025413353767719804, |
| "loss": 1425.7612, |
| "step": 6670 |
| }, |
| { |
| "ce_loss_13": 3.2352439641952513, |
| "ce_loss_17": 3.1490802526474, |
| "ce_loss_2": 4.355892992019653, |
| "ce_loss_4": 4.082921981811523, |
| "ce_loss_9": 3.4831491351127624, |
| "epoch": 0.668, |
| "grad_norm": 840.0, |
| "kl_loss_13": 223.52349014282225, |
| "kl_loss_2": 2537.236853027344, |
| "kl_loss_4": 2018.5569763183594, |
| "kl_loss_9": 822.4813171386719, |
| "learning_rate": 0.0002527531973872617, |
| "loss": 1396.8673, |
| "step": 6680 |
| }, |
| { |
| "ce_loss_13": 3.242522084712982, |
| "ce_loss_17": 3.1554473400115968, |
| "ce_loss_2": 4.358831810951233, |
| "ce_loss_4": 4.069754600524902, |
| "ce_loss_9": 3.4941627383232117, |
| "epoch": 0.669, |
| "grad_norm": 1056.0, |
| "kl_loss_13": 224.91253204345702, |
| "kl_loss_2": 2509.0516845703123, |
| "kl_loss_4": 1968.0543273925782, |
| "kl_loss_9": 811.0765899658203, |
| "learning_rate": 0.0002513753468698826, |
| "loss": 1385.8213, |
| "step": 6690 |
| }, |
| { |
| "ce_loss_13": 3.2168107509613035, |
| "ce_loss_17": 3.1233647227287293, |
| "ce_loss_2": 4.3500475764274595, |
| "ce_loss_4": 4.065746247768402, |
| "ce_loss_9": 3.4685078501701354, |
| "epoch": 0.67, |
| "grad_norm": 1096.0, |
| "kl_loss_13": 229.20053176879884, |
| "kl_loss_2": 2563.12822265625, |
| "kl_loss_4": 2030.7737976074218, |
| "kl_loss_9": 832.4876220703125, |
| "learning_rate": 0.0002500000000000001, |
| "loss": 1407.6477, |
| "step": 6700 |
| }, |
| { |
| "ce_loss_13": 3.3195030450820924, |
| "ce_loss_17": 3.236395871639252, |
| "ce_loss_2": 4.377614164352417, |
| "ce_loss_4": 4.109622275829315, |
| "ce_loss_9": 3.5554600834846495, |
| "epoch": 0.671, |
| "grad_norm": 956.0, |
| "kl_loss_13": 218.80756072998048, |
| "kl_loss_2": 2403.7837463378905, |
| "kl_loss_4": 1899.0211364746094, |
| "kl_loss_9": 784.3532409667969, |
| "learning_rate": 0.0002486271706273421, |
| "loss": 1400.0493, |
| "step": 6710 |
| }, |
| { |
| "ce_loss_13": 3.2630727767944334, |
| "ce_loss_17": 3.180737543106079, |
| "ce_loss_2": 4.331099224090576, |
| "ce_loss_4": 4.058320021629333, |
| "ce_loss_9": 3.499131906032562, |
| "epoch": 0.672, |
| "grad_norm": 780.0, |
| "kl_loss_13": 217.14373703002929, |
| "kl_loss_2": 2424.615686035156, |
| "kl_loss_4": 1908.8784973144532, |
| "kl_loss_9": 782.8959350585938, |
| "learning_rate": 0.0002472568725762853, |
| "loss": 1376.193, |
| "step": 6720 |
| }, |
| { |
| "ce_loss_13": 3.2541046261787416, |
| "ce_loss_17": 3.1691025495529175, |
| "ce_loss_2": 4.334105753898621, |
| "ce_loss_4": 4.053524935245514, |
| "ce_loss_9": 3.4929776906967165, |
| "epoch": 0.673, |
| "grad_norm": 860.0, |
| "kl_loss_13": 217.35878295898436, |
| "kl_loss_2": 2452.4041748046875, |
| "kl_loss_4": 1932.6178649902345, |
| "kl_loss_9": 787.3083099365234, |
| "learning_rate": 0.00024588911964571554, |
| "loss": 1367.4658, |
| "step": 6730 |
| }, |
| { |
| "ce_loss_13": 3.274617838859558, |
| "ce_loss_17": 3.17531156539917, |
| "ce_loss_2": 4.410956609249115, |
| "ce_loss_4": 4.130764174461365, |
| "ce_loss_9": 3.5374782919883727, |
| "epoch": 0.674, |
| "grad_norm": 968.0, |
| "kl_loss_13": 237.39123992919923, |
| "kl_loss_2": 2550.848046875, |
| "kl_loss_4": 2018.4190002441405, |
| "kl_loss_9": 842.0796356201172, |
| "learning_rate": 0.00024452392560888974, |
| "loss": 1389.9145, |
| "step": 6740 |
| }, |
| { |
| "ce_loss_13": 3.1634849309921265, |
| "ce_loss_17": 3.073228192329407, |
| "ce_loss_2": 4.283240520954132, |
| "ce_loss_4": 3.9931190848350524, |
| "ce_loss_9": 3.4144848227500915, |
| "epoch": 0.675, |
| "grad_norm": 864.0, |
| "kl_loss_13": 222.49356689453126, |
| "kl_loss_2": 2535.4306396484376, |
| "kl_loss_4": 1991.826971435547, |
| "kl_loss_9": 812.557894897461, |
| "learning_rate": 0.00024316130421329695, |
| "loss": 1375.1762, |
| "step": 6750 |
| }, |
| { |
| "ce_loss_13": 3.2407036662101745, |
| "ce_loss_17": 3.1566784620285033, |
| "ce_loss_2": 4.348425877094269, |
| "ce_loss_4": 4.068143820762634, |
| "ce_loss_9": 3.4899294853210447, |
| "epoch": 0.676, |
| "grad_norm": 896.0, |
| "kl_loss_13": 220.62753982543944, |
| "kl_loss_2": 2493.5763916015626, |
| "kl_loss_4": 1961.6778442382813, |
| "kl_loss_9": 800.5608795166015, |
| "learning_rate": 0.00024180126918051909, |
| "loss": 1383.2601, |
| "step": 6760 |
| }, |
| { |
| "ce_loss_13": 3.2836318135261537, |
| "ce_loss_17": 3.193866515159607, |
| "ce_loss_2": 4.38095452785492, |
| "ce_loss_4": 4.105333518981934, |
| "ce_loss_9": 3.5330063104629517, |
| "epoch": 0.677, |
| "grad_norm": 932.0, |
| "kl_loss_13": 226.68489151000978, |
| "kl_loss_2": 2488.1648071289064, |
| "kl_loss_4": 1965.694805908203, |
| "kl_loss_9": 805.2911254882813, |
| "learning_rate": 0.00024044383420609406, |
| "loss": 1371.0274, |
| "step": 6770 |
| }, |
| { |
| "ce_loss_13": 3.291364300251007, |
| "ce_loss_17": 3.2045462131500244, |
| "ce_loss_2": 4.369945597648621, |
| "ce_loss_4": 4.08666900396347, |
| "ce_loss_9": 3.5264209628105165, |
| "epoch": 0.678, |
| "grad_norm": 812.0, |
| "kl_loss_13": 220.8335403442383, |
| "kl_loss_2": 2462.1601684570314, |
| "kl_loss_4": 1929.69853515625, |
| "kl_loss_9": 794.7519073486328, |
| "learning_rate": 0.00023908901295937712, |
| "loss": 1398.2065, |
| "step": 6780 |
| }, |
| { |
| "ce_loss_13": 3.283587968349457, |
| "ce_loss_17": 3.1970601081848145, |
| "ce_loss_2": 4.381623601913452, |
| "ce_loss_4": 4.10012971162796, |
| "ce_loss_9": 3.528293585777283, |
| "epoch": 0.679, |
| "grad_norm": 1064.0, |
| "kl_loss_13": 222.77989196777344, |
| "kl_loss_2": 2460.1813720703126, |
| "kl_loss_4": 1932.0266906738282, |
| "kl_loss_9": 792.6221374511719, |
| "learning_rate": 0.00023773681908340283, |
| "loss": 1388.4029, |
| "step": 6790 |
| }, |
| { |
| "ce_loss_13": 3.2637901306152344, |
| "ce_loss_17": 3.1705700516700746, |
| "ce_loss_2": 4.410048580169677, |
| "ce_loss_4": 4.122096133232117, |
| "ce_loss_9": 3.5255157470703127, |
| "epoch": 0.68, |
| "grad_norm": 864.0, |
| "kl_loss_13": 236.52789230346679, |
| "kl_loss_2": 2592.909228515625, |
| "kl_loss_4": 2049.3003540039062, |
| "kl_loss_9": 848.7372528076172, |
| "learning_rate": 0.00023638726619474876, |
| "loss": 1438.3135, |
| "step": 6800 |
| }, |
| { |
| "ce_loss_13": 3.252738356590271, |
| "ce_loss_17": 3.157555031776428, |
| "ce_loss_2": 4.420324802398682, |
| "ce_loss_4": 4.134533941745758, |
| "ce_loss_9": 3.5213294982910157, |
| "epoch": 0.681, |
| "grad_norm": 916.0, |
| "kl_loss_13": 234.84537506103516, |
| "kl_loss_2": 2599.709094238281, |
| "kl_loss_4": 2066.92724609375, |
| "kl_loss_9": 844.2077087402344, |
| "learning_rate": 0.0002350403678833976, |
| "loss": 1414.9887, |
| "step": 6810 |
| }, |
| { |
| "ce_loss_13": 3.1841442942619325, |
| "ce_loss_17": 3.0949651479721068, |
| "ce_loss_2": 4.314553606510162, |
| "ce_loss_4": 4.028892433643341, |
| "ce_loss_9": 3.4367475867271424, |
| "epoch": 0.682, |
| "grad_norm": 880.0, |
| "kl_loss_13": 223.9334243774414, |
| "kl_loss_2": 2558.786181640625, |
| "kl_loss_4": 2012.3491271972657, |
| "kl_loss_9": 824.0137359619141, |
| "learning_rate": 0.00023369613771260007, |
| "loss": 1388.79, |
| "step": 6820 |
| }, |
| { |
| "ce_loss_13": 3.2972843408584596, |
| "ce_loss_17": 3.207979679107666, |
| "ce_loss_2": 4.423956942558289, |
| "ce_loss_4": 4.139782214164734, |
| "ce_loss_9": 3.555013644695282, |
| "epoch": 0.683, |
| "grad_norm": 864.0, |
| "kl_loss_13": 228.44933700561523, |
| "kl_loss_2": 2556.3253784179688, |
| "kl_loss_4": 2020.214044189453, |
| "kl_loss_9": 824.9123413085938, |
| "learning_rate": 0.00023235458921873925, |
| "loss": 1414.4389, |
| "step": 6830 |
| }, |
| { |
| "ce_loss_13": 3.2572293877601624, |
| "ce_loss_17": 3.161380076408386, |
| "ce_loss_2": 4.42143828868866, |
| "ce_loss_4": 4.144210529327393, |
| "ce_loss_9": 3.527844321727753, |
| "epoch": 0.684, |
| "grad_norm": 1024.0, |
| "kl_loss_13": 236.6844741821289, |
| "kl_loss_2": 2618.1310424804688, |
| "kl_loss_4": 2090.4394592285157, |
| "kl_loss_9": 864.5554901123047, |
| "learning_rate": 0.0002310157359111938, |
| "loss": 1447.36, |
| "step": 6840 |
| }, |
| { |
| "ce_loss_13": 3.146578013896942, |
| "ce_loss_17": 3.050377106666565, |
| "ce_loss_2": 4.370137643814087, |
| "ce_loss_4": 4.077717661857605, |
| "ce_loss_9": 3.4260385751724245, |
| "epoch": 0.685, |
| "grad_norm": 972.0, |
| "kl_loss_13": 234.48453369140626, |
| "kl_loss_2": 2729.6728393554686, |
| "kl_loss_4": 2170.5785583496095, |
| "kl_loss_9": 863.6528503417969, |
| "learning_rate": 0.0002296795912722014, |
| "loss": 1458.2611, |
| "step": 6850 |
| }, |
| { |
| "ce_loss_13": 3.2871939659118654, |
| "ce_loss_17": 3.1957009077072143, |
| "ce_loss_2": 4.379037642478943, |
| "ce_loss_4": 4.092627227306366, |
| "ce_loss_9": 3.5352322340011595, |
| "epoch": 0.686, |
| "grad_norm": 908.0, |
| "kl_loss_13": 224.73418579101562, |
| "kl_loss_2": 2473.486181640625, |
| "kl_loss_4": 1941.0966247558595, |
| "kl_loss_9": 807.001220703125, |
| "learning_rate": 0.0002283461687567236, |
| "loss": 1356.0598, |
| "step": 6860 |
| }, |
| { |
| "ce_loss_13": 3.3351909041404726, |
| "ce_loss_17": 3.248302721977234, |
| "ce_loss_2": 4.4024493932724, |
| "ce_loss_4": 4.1268727540969845, |
| "ce_loss_9": 3.5784929037094115, |
| "epoch": 0.687, |
| "grad_norm": 892.0, |
| "kl_loss_13": 223.5128173828125, |
| "kl_loss_2": 2421.3988647460938, |
| "kl_loss_4": 1899.7137451171875, |
| "kl_loss_9": 791.4371765136718, |
| "learning_rate": 0.00022701548179231045, |
| "loss": 1379.4305, |
| "step": 6870 |
| }, |
| { |
| "ce_loss_13": 3.2907162427902223, |
| "ce_loss_17": 3.199568712711334, |
| "ce_loss_2": 4.412567067146301, |
| "ce_loss_4": 4.132801270484924, |
| "ce_loss_9": 3.5403625130653382, |
| "epoch": 0.688, |
| "grad_norm": 924.0, |
| "kl_loss_13": 227.7600341796875, |
| "kl_loss_2": 2534.824084472656, |
| "kl_loss_4": 2015.9478637695313, |
| "kl_loss_9": 821.2925659179688, |
| "learning_rate": 0.00022568754377896516, |
| "loss": 1381.1674, |
| "step": 6880 |
| }, |
| { |
| "ce_loss_13": 3.2835817337036133, |
| "ce_loss_17": 3.1926389217376707, |
| "ce_loss_2": 4.37939612865448, |
| "ce_loss_4": 4.098658502101898, |
| "ce_loss_9": 3.529259502887726, |
| "epoch": 0.689, |
| "grad_norm": 824.0, |
| "kl_loss_13": 228.50012817382813, |
| "kl_loss_2": 2498.63447265625, |
| "kl_loss_4": 1964.8612243652344, |
| "kl_loss_9": 814.290283203125, |
| "learning_rate": 0.00022436236808900844, |
| "loss": 1377.5176, |
| "step": 6890 |
| }, |
| { |
| "ce_loss_13": 3.180496096611023, |
| "ce_loss_17": 3.0882019281387327, |
| "ce_loss_2": 4.3191688537597654, |
| "ce_loss_4": 4.031343269348144, |
| "ce_loss_9": 3.4364490032196047, |
| "epoch": 0.69, |
| "grad_norm": 868.0, |
| "kl_loss_13": 228.10622177124023, |
| "kl_loss_2": 2580.0264892578125, |
| "kl_loss_4": 2038.4791198730468, |
| "kl_loss_9": 825.1335174560547, |
| "learning_rate": 0.00022303996806694487, |
| "loss": 1395.3219, |
| "step": 6900 |
| }, |
| { |
| "ce_loss_13": 3.2514352798461914, |
| "ce_loss_17": 3.1644859075546266, |
| "ce_loss_2": 4.372483134269714, |
| "ce_loss_4": 4.095207059383393, |
| "ce_loss_9": 3.502042090892792, |
| "epoch": 0.691, |
| "grad_norm": 948.0, |
| "kl_loss_13": 223.31015701293944, |
| "kl_loss_2": 2533.658642578125, |
| "kl_loss_4": 2003.885400390625, |
| "kl_loss_9": 817.3878997802734, |
| "learning_rate": 0.00022172035702932823, |
| "loss": 1382.6459, |
| "step": 6910 |
| }, |
| { |
| "ce_loss_13": 3.302597963809967, |
| "ce_loss_17": 3.2123578071594237, |
| "ce_loss_2": 4.3816111326217655, |
| "ce_loss_4": 4.105821824073791, |
| "ce_loss_9": 3.545745587348938, |
| "epoch": 0.692, |
| "grad_norm": 840.0, |
| "kl_loss_13": 227.14473190307618, |
| "kl_loss_2": 2442.1774047851563, |
| "kl_loss_4": 1927.7969177246093, |
| "kl_loss_9": 804.2861114501953, |
| "learning_rate": 0.00022040354826462666, |
| "loss": 1365.3614, |
| "step": 6920 |
| }, |
| { |
| "ce_loss_13": 3.227451467514038, |
| "ce_loss_17": 3.1418288588523864, |
| "ce_loss_2": 4.349059462547302, |
| "ce_loss_4": 4.066020691394806, |
| "ce_loss_9": 3.4737524509429933, |
| "epoch": 0.693, |
| "grad_norm": 856.0, |
| "kl_loss_13": 219.2278564453125, |
| "kl_loss_2": 2521.2966430664064, |
| "kl_loss_4": 1990.3687438964844, |
| "kl_loss_9": 799.6419464111328, |
| "learning_rate": 0.0002190895550330899, |
| "loss": 1396.9391, |
| "step": 6930 |
| }, |
| { |
| "ce_loss_13": 3.165955913066864, |
| "ce_loss_17": 3.073744761943817, |
| "ce_loss_2": 4.322216045856476, |
| "ce_loss_4": 4.028091156482697, |
| "ce_loss_9": 3.4312268614768984, |
| "epoch": 0.694, |
| "grad_norm": 936.0, |
| "kl_loss_13": 229.0870216369629, |
| "kl_loss_2": 2588.823779296875, |
| "kl_loss_4": 2042.9987426757812, |
| "kl_loss_9": 836.6030731201172, |
| "learning_rate": 0.00021777839056661552, |
| "loss": 1391.2529, |
| "step": 6940 |
| }, |
| { |
| "ce_loss_13": 3.2504790306091307, |
| "ce_loss_17": 3.1614428758621216, |
| "ce_loss_2": 4.355109357833863, |
| "ce_loss_4": 4.0713593602180485, |
| "ce_loss_9": 3.497498321533203, |
| "epoch": 0.695, |
| "grad_norm": 908.0, |
| "kl_loss_13": 224.00432357788085, |
| "kl_loss_2": 2501.229345703125, |
| "kl_loss_4": 1967.3579162597657, |
| "kl_loss_9": 806.033090209961, |
| "learning_rate": 0.0002164700680686147, |
| "loss": 1362.4709, |
| "step": 6950 |
| }, |
| { |
| "ce_loss_13": 3.2929418683052063, |
| "ce_loss_17": 3.2058985352516176, |
| "ce_loss_2": 4.378036165237427, |
| "ce_loss_4": 4.101405417919159, |
| "ce_loss_9": 3.5365471839904785, |
| "epoch": 0.696, |
| "grad_norm": 828.0, |
| "kl_loss_13": 226.53884658813476, |
| "kl_loss_2": 2455.274560546875, |
| "kl_loss_4": 1924.337158203125, |
| "kl_loss_9": 795.2972381591796, |
| "learning_rate": 0.0002151646007138806, |
| "loss": 1364.2182, |
| "step": 6960 |
| }, |
| { |
| "ce_loss_13": 3.179440367221832, |
| "ce_loss_17": 3.0889686226844786, |
| "ce_loss_2": 4.314872598648071, |
| "ce_loss_4": 4.032753050327301, |
| "ce_loss_9": 3.4392405271530153, |
| "epoch": 0.697, |
| "grad_norm": 828.0, |
| "kl_loss_13": 227.77658615112304, |
| "kl_loss_2": 2575.025109863281, |
| "kl_loss_4": 2045.5470458984375, |
| "kl_loss_9": 833.3858337402344, |
| "learning_rate": 0.00021386200164845526, |
| "loss": 1397.3668, |
| "step": 6970 |
| }, |
| { |
| "ce_loss_13": 3.3459704875946046, |
| "ce_loss_17": 3.261486768722534, |
| "ce_loss_2": 4.4051127433776855, |
| "ce_loss_4": 4.132306838035584, |
| "ce_loss_9": 3.587813639640808, |
| "epoch": 0.698, |
| "grad_norm": 812.0, |
| "kl_loss_13": 221.6239372253418, |
| "kl_loss_2": 2424.71103515625, |
| "kl_loss_4": 1907.8235290527343, |
| "kl_loss_9": 793.41806640625, |
| "learning_rate": 0.0002125622839894964, |
| "loss": 1349.1637, |
| "step": 6980 |
| }, |
| { |
| "ce_loss_13": 3.289145255088806, |
| "ce_loss_17": 3.203282558917999, |
| "ce_loss_2": 4.382337474822998, |
| "ce_loss_4": 4.094339299201965, |
| "ce_loss_9": 3.5309545397758484, |
| "epoch": 0.699, |
| "grad_norm": 868.0, |
| "kl_loss_13": 221.8167709350586, |
| "kl_loss_2": 2463.612109375, |
| "kl_loss_4": 1930.7593688964844, |
| "kl_loss_9": 794.2679748535156, |
| "learning_rate": 0.00021126546082514663, |
| "loss": 1363.3807, |
| "step": 6990 |
| }, |
| { |
| "ce_loss_13": 3.3122405886650084, |
| "ce_loss_17": 3.2266310453414917, |
| "ce_loss_2": 4.392450308799743, |
| "ce_loss_4": 4.111060690879822, |
| "ce_loss_9": 3.5555851459503174, |
| "epoch": 0.7, |
| "grad_norm": 764.0, |
| "kl_loss_13": 221.9448013305664, |
| "kl_loss_2": 2448.335412597656, |
| "kl_loss_4": 1918.58330078125, |
| "kl_loss_9": 795.4322692871094, |
| "learning_rate": 0.00020997154521440098, |
| "loss": 1351.0527, |
| "step": 7000 |
| }, |
| { |
| "ce_loss_13": 3.2560290694236755, |
| "ce_loss_17": 3.172095501422882, |
| "ce_loss_2": 4.35725462436676, |
| "ce_loss_4": 4.071544814109802, |
| "ce_loss_9": 3.5006725668907164, |
| "epoch": 0.701, |
| "grad_norm": 916.0, |
| "kl_loss_13": 221.02141036987305, |
| "kl_loss_2": 2495.580285644531, |
| "kl_loss_4": 1961.078173828125, |
| "kl_loss_9": 800.934375, |
| "learning_rate": 0.0002086805501869749, |
| "loss": 1358.5234, |
| "step": 7010 |
| }, |
| { |
| "ce_loss_13": 3.2334765076637266, |
| "ce_loss_17": 3.14152569770813, |
| "ce_loss_2": 4.375302791595459, |
| "ce_loss_4": 4.093678617477417, |
| "ce_loss_9": 3.497301089763641, |
| "epoch": 0.702, |
| "grad_norm": 920.0, |
| "kl_loss_13": 231.4882049560547, |
| "kl_loss_2": 2574.637487792969, |
| "kl_loss_4": 2036.480780029297, |
| "kl_loss_9": 840.040658569336, |
| "learning_rate": 0.0002073924887431744, |
| "loss": 1402.0027, |
| "step": 7020 |
| }, |
| { |
| "ce_loss_13": 3.2397172331809996, |
| "ce_loss_17": 3.147324788570404, |
| "ce_loss_2": 4.351306700706482, |
| "ce_loss_4": 4.07316380739212, |
| "ce_loss_9": 3.4922420144081117, |
| "epoch": 0.703, |
| "grad_norm": 868.0, |
| "kl_loss_13": 223.71793060302736, |
| "kl_loss_2": 2531.930224609375, |
| "kl_loss_4": 2009.022149658203, |
| "kl_loss_9": 817.4667053222656, |
| "learning_rate": 0.00020610737385376348, |
| "loss": 1429.6402, |
| "step": 7030 |
| }, |
| { |
| "ce_loss_13": 3.2928420066833497, |
| "ce_loss_17": 3.2050134897232057, |
| "ce_loss_2": 4.363224124908447, |
| "ce_loss_4": 4.089160430431366, |
| "ce_loss_9": 3.5341002583503722, |
| "epoch": 0.704, |
| "grad_norm": 1024.0, |
| "kl_loss_13": 223.09845657348632, |
| "kl_loss_2": 2438.3024780273436, |
| "kl_loss_4": 1920.8396606445312, |
| "kl_loss_9": 795.2175018310547, |
| "learning_rate": 0.00020482521845983521, |
| "loss": 1383.1951, |
| "step": 7040 |
| }, |
| { |
| "ce_loss_13": 3.2900060772895814, |
| "ce_loss_17": 3.1998684406280518, |
| "ce_loss_2": 4.402012515068054, |
| "ce_loss_4": 4.125573408603668, |
| "ce_loss_9": 3.53646205663681, |
| "epoch": 0.705, |
| "grad_norm": 824.0, |
| "kl_loss_13": 227.93955917358397, |
| "kl_loss_2": 2517.812854003906, |
| "kl_loss_4": 1993.4254333496094, |
| "kl_loss_9": 815.7513000488282, |
| "learning_rate": 0.00020354603547267987, |
| "loss": 1404.9314, |
| "step": 7050 |
| }, |
| { |
| "ce_loss_13": 3.2780707120895385, |
| "ce_loss_17": 3.1833311319351196, |
| "ce_loss_2": 4.413818502426148, |
| "ce_loss_4": 4.133419227600098, |
| "ce_loss_9": 3.537304186820984, |
| "epoch": 0.706, |
| "grad_norm": 804.0, |
| "kl_loss_13": 230.00745391845703, |
| "kl_loss_2": 2533.838513183594, |
| "kl_loss_4": 2006.9207885742187, |
| "kl_loss_9": 826.0888732910156, |
| "learning_rate": 0.00020226983777365604, |
| "loss": 1429.8103, |
| "step": 7060 |
| }, |
| { |
| "ce_loss_13": 3.1825997829437256, |
| "ce_loss_17": 3.095646381378174, |
| "ce_loss_2": 4.336901795864105, |
| "ce_loss_4": 4.05969123840332, |
| "ce_loss_9": 3.4370158553123473, |
| "epoch": 0.707, |
| "grad_norm": 932.0, |
| "kl_loss_13": 220.45883178710938, |
| "kl_loss_2": 2574.1071533203126, |
| "kl_loss_4": 2044.1881225585937, |
| "kl_loss_9": 800.0923217773437, |
| "learning_rate": 0.00020099663821406056, |
| "loss": 1392.1373, |
| "step": 7070 |
| }, |
| { |
| "ce_loss_13": 3.276571524143219, |
| "ce_loss_17": 3.193793308734894, |
| "ce_loss_2": 4.3669240236282345, |
| "ce_loss_4": 4.08516161441803, |
| "ce_loss_9": 3.522121012210846, |
| "epoch": 0.708, |
| "grad_norm": 964.0, |
| "kl_loss_13": 220.92641830444336, |
| "kl_loss_2": 2459.0380126953123, |
| "kl_loss_4": 1927.5954650878907, |
| "kl_loss_9": 793.0940368652343, |
| "learning_rate": 0.00019972644961499853, |
| "loss": 1389.9689, |
| "step": 7080 |
| }, |
| { |
| "ce_loss_13": 3.2516239285469055, |
| "ce_loss_17": 3.1578205823898315, |
| "ce_loss_2": 4.392509937286377, |
| "ce_loss_4": 4.1058523774147035, |
| "ce_loss_9": 3.511735665798187, |
| "epoch": 0.709, |
| "grad_norm": 868.0, |
| "kl_loss_13": 230.5964668273926, |
| "kl_loss_2": 2571.553564453125, |
| "kl_loss_4": 2033.4962158203125, |
| "kl_loss_9": 832.2728820800781, |
| "learning_rate": 0.00019845928476725522, |
| "loss": 1402.3674, |
| "step": 7090 |
| }, |
| { |
| "ce_loss_13": 3.3249686479568483, |
| "ce_loss_17": 3.231999695301056, |
| "ce_loss_2": 4.431102132797241, |
| "ce_loss_4": 4.151071059703827, |
| "ce_loss_9": 3.5809815883636475, |
| "epoch": 0.71, |
| "grad_norm": 884.0, |
| "kl_loss_13": 226.21194152832032, |
| "kl_loss_2": 2487.993115234375, |
| "kl_loss_4": 1961.412176513672, |
| "kl_loss_9": 818.6100280761718, |
| "learning_rate": 0.00019719515643116677, |
| "loss": 1416.1883, |
| "step": 7100 |
| }, |
| { |
| "ce_loss_13": 3.2668410539627075, |
| "ce_loss_17": 3.179190993309021, |
| "ce_loss_2": 4.372535943984985, |
| "ce_loss_4": 4.082563805580139, |
| "ce_loss_9": 3.5075453758239745, |
| "epoch": 0.711, |
| "grad_norm": 876.0, |
| "kl_loss_13": 222.45076217651368, |
| "kl_loss_2": 2497.483203125, |
| "kl_loss_4": 1949.665106201172, |
| "kl_loss_9": 796.8344665527344, |
| "learning_rate": 0.0001959340773364911, |
| "loss": 1391.192, |
| "step": 7110 |
| }, |
| { |
| "ce_loss_13": 3.278505599498749, |
| "ce_loss_17": 3.189879536628723, |
| "ce_loss_2": 4.400609827041626, |
| "ce_loss_4": 4.12062668800354, |
| "ce_loss_9": 3.5300690650939943, |
| "epoch": 0.712, |
| "grad_norm": 888.0, |
| "kl_loss_13": 223.94837112426757, |
| "kl_loss_2": 2530.3639282226563, |
| "kl_loss_4": 1995.4391784667969, |
| "kl_loss_9": 813.5061553955078, |
| "learning_rate": 0.0001946760601822809, |
| "loss": 1364.7543, |
| "step": 7120 |
| }, |
| { |
| "ce_loss_13": 3.335614728927612, |
| "ce_loss_17": 3.24614816904068, |
| "ce_loss_2": 4.426643013954163, |
| "ce_loss_4": 4.143035471439362, |
| "ce_loss_9": 3.5788122177124024, |
| "epoch": 0.713, |
| "grad_norm": 908.0, |
| "kl_loss_13": 223.02699584960936, |
| "kl_loss_2": 2471.6893676757813, |
| "kl_loss_4": 1940.165673828125, |
| "kl_loss_9": 797.8519927978516, |
| "learning_rate": 0.00019342111763675512, |
| "loss": 1347.9066, |
| "step": 7130 |
| }, |
| { |
| "ce_loss_13": 3.332121789455414, |
| "ce_loss_17": 3.2446430206298826, |
| "ce_loss_2": 4.4034710168838505, |
| "ce_loss_4": 4.1206164240837095, |
| "ce_loss_9": 3.57075754404068, |
| "epoch": 0.714, |
| "grad_norm": 908.0, |
| "kl_loss_13": 223.96689224243164, |
| "kl_loss_2": 2437.3945434570314, |
| "kl_loss_4": 1913.8612976074219, |
| "kl_loss_9": 797.1569885253906, |
| "learning_rate": 0.00019216926233717085, |
| "loss": 1349.5184, |
| "step": 7140 |
| }, |
| { |
| "ce_loss_13": 3.2237551331520082, |
| "ce_loss_17": 3.138021945953369, |
| "ce_loss_2": 4.396461355686188, |
| "ce_loss_4": 4.111767518520355, |
| "ce_loss_9": 3.4798713326454163, |
| "epoch": 0.715, |
| "grad_norm": 972.0, |
| "kl_loss_13": 219.81161651611328, |
| "kl_loss_2": 2606.2385009765626, |
| "kl_loss_4": 2073.2661315917967, |
| "kl_loss_9": 811.4367095947266, |
| "learning_rate": 0.00019092050688969737, |
| "loss": 1417.142, |
| "step": 7150 |
| }, |
| { |
| "ce_loss_13": 3.2940005540847777, |
| "ce_loss_17": 3.20362628698349, |
| "ce_loss_2": 4.379084181785584, |
| "ce_loss_4": 4.09904112815857, |
| "ce_loss_9": 3.5334732294082642, |
| "epoch": 0.716, |
| "grad_norm": 848.0, |
| "kl_loss_13": 221.24933624267578, |
| "kl_loss_2": 2486.9433471679686, |
| "kl_loss_4": 1957.744805908203, |
| "kl_loss_9": 799.6082641601563, |
| "learning_rate": 0.00018967486386928817, |
| "loss": 1363.4369, |
| "step": 7160 |
| }, |
| { |
| "ce_loss_13": 3.1702972769737245, |
| "ce_loss_17": 3.0778634905815125, |
| "ce_loss_2": 4.305674028396607, |
| "ce_loss_4": 4.022684466838837, |
| "ce_loss_9": 3.4266818165779114, |
| "epoch": 0.717, |
| "grad_norm": 904.0, |
| "kl_loss_13": 226.25028381347656, |
| "kl_loss_2": 2569.0533081054687, |
| "kl_loss_4": 2033.7097717285155, |
| "kl_loss_9": 829.1175048828125, |
| "learning_rate": 0.00018843234581955443, |
| "loss": 1438.2367, |
| "step": 7170 |
| }, |
| { |
| "ce_loss_13": 3.179194700717926, |
| "ce_loss_17": 3.0865483403205873, |
| "ce_loss_2": 4.31151237487793, |
| "ce_loss_4": 4.028907465934753, |
| "ce_loss_9": 3.4390278100967406, |
| "epoch": 0.718, |
| "grad_norm": 948.0, |
| "kl_loss_13": 226.59228897094727, |
| "kl_loss_2": 2542.7175537109374, |
| "kl_loss_4": 2013.9704650878907, |
| "kl_loss_9": 827.2505493164062, |
| "learning_rate": 0.00018719296525263924, |
| "loss": 1402.2582, |
| "step": 7180 |
| }, |
| { |
| "ce_loss_13": 3.278154671192169, |
| "ce_loss_17": 3.1906838417053223, |
| "ce_loss_2": 4.349312424659729, |
| "ce_loss_4": 4.067033684253692, |
| "ce_loss_9": 3.516253387928009, |
| "epoch": 0.719, |
| "grad_norm": 880.0, |
| "kl_loss_13": 222.74993591308595, |
| "kl_loss_2": 2434.472521972656, |
| "kl_loss_4": 1906.9119812011718, |
| "kl_loss_9": 784.903076171875, |
| "learning_rate": 0.0001859567346490913, |
| "loss": 1349.8689, |
| "step": 7190 |
| }, |
| { |
| "ce_loss_13": 3.258313977718353, |
| "ce_loss_17": 3.168236827850342, |
| "ce_loss_2": 4.387526202201843, |
| "ce_loss_4": 4.101441133022308, |
| "ce_loss_9": 3.51610072851181, |
| "epoch": 0.72, |
| "grad_norm": 856.0, |
| "kl_loss_13": 229.3596206665039, |
| "kl_loss_2": 2559.0230346679687, |
| "kl_loss_4": 2018.7219665527343, |
| "kl_loss_9": 823.8413452148437, |
| "learning_rate": 0.0001847236664577389, |
| "loss": 1375.0223, |
| "step": 7200 |
| }, |
| { |
| "ce_loss_13": 3.2753131985664368, |
| "ce_loss_17": 3.188183045387268, |
| "ce_loss_2": 4.346803331375122, |
| "ce_loss_4": 4.067043495178223, |
| "ce_loss_9": 3.5119241952896116, |
| "epoch": 0.721, |
| "grad_norm": 704.0, |
| "kl_loss_13": 220.52175750732422, |
| "kl_loss_2": 2430.823876953125, |
| "kl_loss_4": 1905.3960876464844, |
| "kl_loss_9": 784.2279052734375, |
| "learning_rate": 0.00018349377309556487, |
| "loss": 1339.7385, |
| "step": 7210 |
| }, |
| { |
| "ce_loss_13": 3.2280171394348143, |
| "ce_loss_17": 3.1385859727859495, |
| "ce_loss_2": 4.38875195980072, |
| "ce_loss_4": 4.107080328464508, |
| "ce_loss_9": 3.482544815540314, |
| "epoch": 0.722, |
| "grad_norm": 968.0, |
| "kl_loss_13": 230.0931884765625, |
| "kl_loss_2": 2634.4558715820312, |
| "kl_loss_4": 2098.3107788085936, |
| "kl_loss_9": 840.7581634521484, |
| "learning_rate": 0.00018226706694758193, |
| "loss": 1418.1277, |
| "step": 7220 |
| }, |
| { |
| "ce_loss_13": 3.296447491645813, |
| "ce_loss_17": 3.211305522918701, |
| "ce_loss_2": 4.394935607910156, |
| "ce_loss_4": 4.118708693981171, |
| "ce_loss_9": 3.5478051900863647, |
| "epoch": 0.723, |
| "grad_norm": 852.0, |
| "kl_loss_13": 222.28013763427734, |
| "kl_loss_2": 2493.9412231445312, |
| "kl_loss_4": 1977.3378601074219, |
| "kl_loss_9": 808.938442993164, |
| "learning_rate": 0.0001810435603667075, |
| "loss": 1414.4257, |
| "step": 7230 |
| }, |
| { |
| "ce_loss_13": 3.1549424529075623, |
| "ce_loss_17": 3.0660014152526855, |
| "ce_loss_2": 4.281372559070587, |
| "ce_loss_4": 3.996079409122467, |
| "ce_loss_9": 3.404166269302368, |
| "epoch": 0.724, |
| "grad_norm": 828.0, |
| "kl_loss_13": 219.93065490722657, |
| "kl_loss_2": 2518.232568359375, |
| "kl_loss_4": 1995.6101806640625, |
| "kl_loss_9": 800.1742858886719, |
| "learning_rate": 0.0001798232656736389, |
| "loss": 1407.6965, |
| "step": 7240 |
| }, |
| { |
| "ce_loss_13": 3.317498469352722, |
| "ce_loss_17": 3.2285274028778077, |
| "ce_loss_2": 4.385501897335052, |
| "ce_loss_4": 4.1082599401474, |
| "ce_loss_9": 3.556129503250122, |
| "epoch": 0.725, |
| "grad_norm": 880.0, |
| "kl_loss_13": 221.1670364379883, |
| "kl_loss_2": 2411.0855346679687, |
| "kl_loss_4": 1893.3787780761718, |
| "kl_loss_9": 786.5548400878906, |
| "learning_rate": 0.0001786061951567303, |
| "loss": 1358.4174, |
| "step": 7250 |
| }, |
| { |
| "ce_loss_13": 3.234435260295868, |
| "ce_loss_17": 3.142055535316467, |
| "ce_loss_2": 4.351441478729248, |
| "ce_loss_4": 4.06730625629425, |
| "ce_loss_9": 3.4882951974868774, |
| "epoch": 0.726, |
| "grad_norm": 892.0, |
| "kl_loss_13": 225.69800262451173, |
| "kl_loss_2": 2510.493542480469, |
| "kl_loss_4": 1979.3406616210937, |
| "kl_loss_9": 811.0598449707031, |
| "learning_rate": 0.00017739236107186857, |
| "loss": 1396.1227, |
| "step": 7260 |
| }, |
| { |
| "ce_loss_13": 3.3244032144546507, |
| "ce_loss_17": 3.242356812953949, |
| "ce_loss_2": 4.383695316314697, |
| "ce_loss_4": 4.103539967536927, |
| "ce_loss_9": 3.5522592067718506, |
| "epoch": 0.727, |
| "grad_norm": 760.0, |
| "kl_loss_13": 216.82111511230468, |
| "kl_loss_2": 2413.560302734375, |
| "kl_loss_4": 1890.413836669922, |
| "kl_loss_9": 777.4590789794922, |
| "learning_rate": 0.00017618177564234904, |
| "loss": 1350.7613, |
| "step": 7270 |
| }, |
| { |
| "ce_loss_13": 3.2978416681289673, |
| "ce_loss_17": 3.215206229686737, |
| "ce_loss_2": 4.36386650800705, |
| "ce_loss_4": 4.079239988327027, |
| "ce_loss_9": 3.5300705313682554, |
| "epoch": 0.728, |
| "grad_norm": 784.0, |
| "kl_loss_13": 216.17998352050782, |
| "kl_loss_2": 2402.7852172851562, |
| "kl_loss_4": 1871.9532531738282, |
| "kl_loss_9": 776.7924713134765, |
| "learning_rate": 0.00017497445105875377, |
| "loss": 1346.1324, |
| "step": 7280 |
| }, |
| { |
| "ce_loss_13": 3.2075208187103272, |
| "ce_loss_17": 3.1188966035842896, |
| "ce_loss_2": 4.358772337436676, |
| "ce_loss_4": 4.065756452083588, |
| "ce_loss_9": 3.4693151712417603, |
| "epoch": 0.729, |
| "grad_norm": 932.0, |
| "kl_loss_13": 225.47571411132813, |
| "kl_loss_2": 2579.8472900390625, |
| "kl_loss_4": 2035.7959228515624, |
| "kl_loss_9": 829.8043212890625, |
| "learning_rate": 0.000173770399478828, |
| "loss": 1402.1396, |
| "step": 7290 |
| }, |
| { |
| "ce_loss_13": 3.1399217367172243, |
| "ce_loss_17": 3.0544590830802916, |
| "ce_loss_2": 4.252683699131012, |
| "ce_loss_4": 3.9701715111732483, |
| "ce_loss_9": 3.3831241011619566, |
| "epoch": 0.73, |
| "grad_norm": 880.0, |
| "kl_loss_13": 220.23760681152345, |
| "kl_loss_2": 2528.6279418945314, |
| "kl_loss_4": 1993.2266418457032, |
| "kl_loss_9": 804.466421508789, |
| "learning_rate": 0.0001725696330273575, |
| "loss": 1415.7393, |
| "step": 7300 |
| }, |
| { |
| "ce_loss_13": 3.31300311088562, |
| "ce_loss_17": 3.2254587531089784, |
| "ce_loss_2": 4.393245458602905, |
| "ce_loss_4": 4.113193070888519, |
| "ce_loss_9": 3.5561506271362306, |
| "epoch": 0.731, |
| "grad_norm": 824.0, |
| "kl_loss_13": 219.36753540039064, |
| "kl_loss_2": 2430.49404296875, |
| "kl_loss_4": 1910.1047485351562, |
| "kl_loss_9": 791.7362030029296, |
| "learning_rate": 0.00017137216379604724, |
| "loss": 1345.908, |
| "step": 7310 |
| }, |
| { |
| "ce_loss_13": 3.1978121995925903, |
| "ce_loss_17": 3.108300507068634, |
| "ce_loss_2": 4.323741781711578, |
| "ce_loss_4": 4.038744950294495, |
| "ce_loss_9": 3.4428383469581605, |
| "epoch": 0.732, |
| "grad_norm": 844.0, |
| "kl_loss_13": 222.1520118713379, |
| "kl_loss_2": 2518.7337036132812, |
| "kl_loss_4": 1982.4679138183594, |
| "kl_loss_9": 799.7935150146484, |
| "learning_rate": 0.00017017800384339925, |
| "loss": 1380.7736, |
| "step": 7320 |
| }, |
| { |
| "ce_loss_13": 3.156403136253357, |
| "ce_loss_17": 3.065125834941864, |
| "ce_loss_2": 4.316664326190948, |
| "ce_loss_4": 4.025813055038452, |
| "ce_loss_9": 3.4149357318878173, |
| "epoch": 0.733, |
| "grad_norm": 884.0, |
| "kl_loss_13": 226.70897674560547, |
| "kl_loss_2": 2602.639514160156, |
| "kl_loss_4": 2055.229699707031, |
| "kl_loss_9": 830.0261199951171, |
| "learning_rate": 0.00016898716519459073, |
| "loss": 1374.1618, |
| "step": 7330 |
| }, |
| { |
| "ce_loss_13": 3.277767014503479, |
| "ce_loss_17": 3.1817540645599367, |
| "ce_loss_2": 4.433549690246582, |
| "ce_loss_4": 4.139484691619873, |
| "ce_loss_9": 3.5390387058258055, |
| "epoch": 0.734, |
| "grad_norm": 772.0, |
| "kl_loss_13": 233.6792755126953, |
| "kl_loss_2": 2582.837268066406, |
| "kl_loss_4": 2035.7763244628907, |
| "kl_loss_9": 838.448696899414, |
| "learning_rate": 0.00016779965984135375, |
| "loss": 1398.3605, |
| "step": 7340 |
| }, |
| { |
| "ce_loss_13": 3.191946041584015, |
| "ce_loss_17": 3.103343439102173, |
| "ce_loss_2": 4.313434028625489, |
| "ce_loss_4": 4.029736876487732, |
| "ce_loss_9": 3.440126359462738, |
| "epoch": 0.735, |
| "grad_norm": 948.0, |
| "kl_loss_13": 217.35691680908204, |
| "kl_loss_2": 2509.741491699219, |
| "kl_loss_4": 1971.9896362304687, |
| "kl_loss_9": 795.3908996582031, |
| "learning_rate": 0.00016661549974185424, |
| "loss": 1375.2754, |
| "step": 7350 |
| }, |
| { |
| "ce_loss_13": 3.224069058895111, |
| "ce_loss_17": 3.1362082242965696, |
| "ce_loss_2": 4.3418581008911135, |
| "ce_loss_4": 4.0584205150604244, |
| "ce_loss_9": 3.476341438293457, |
| "epoch": 0.736, |
| "grad_norm": 876.0, |
| "kl_loss_13": 224.64472122192382, |
| "kl_loss_2": 2512.541149902344, |
| "kl_loss_4": 1973.3987854003906, |
| "kl_loss_9": 809.9782562255859, |
| "learning_rate": 0.00016543469682057105, |
| "loss": 1361.8506, |
| "step": 7360 |
| }, |
| { |
| "ce_loss_13": 3.253961670398712, |
| "ce_loss_17": 3.160890281200409, |
| "ce_loss_2": 4.364241003990173, |
| "ce_loss_4": 4.082494986057282, |
| "ce_loss_9": 3.5027183890342712, |
| "epoch": 0.737, |
| "grad_norm": 808.0, |
| "kl_loss_13": 228.54029541015626, |
| "kl_loss_2": 2522.8011840820313, |
| "kl_loss_4": 1982.2555847167969, |
| "kl_loss_9": 816.8658660888672, |
| "learning_rate": 0.00016425726296817632, |
| "loss": 1377.8166, |
| "step": 7370 |
| }, |
| { |
| "ce_loss_13": 3.2597399234771727, |
| "ce_loss_17": 3.1757564544677734, |
| "ce_loss_2": 4.356501781940461, |
| "ce_loss_4": 4.079467332363128, |
| "ce_loss_9": 3.512023890018463, |
| "epoch": 0.738, |
| "grad_norm": 1072.0, |
| "kl_loss_13": 219.32396545410157, |
| "kl_loss_2": 2449.466442871094, |
| "kl_loss_4": 1930.436944580078, |
| "kl_loss_9": 794.3770080566406, |
| "learning_rate": 0.00016308321004141607, |
| "loss": 1359.7994, |
| "step": 7380 |
| }, |
| { |
| "ce_loss_13": 3.2198528170585634, |
| "ce_loss_17": 3.127401065826416, |
| "ce_loss_2": 4.346443819999695, |
| "ce_loss_4": 4.062979590892792, |
| "ce_loss_9": 3.47401909828186, |
| "epoch": 0.739, |
| "grad_norm": 812.0, |
| "kl_loss_13": 230.59588851928712, |
| "kl_loss_2": 2524.584228515625, |
| "kl_loss_4": 1997.1767822265624, |
| "kl_loss_9": 820.2922180175781, |
| "learning_rate": 0.00016191254986299043, |
| "loss": 1374.4784, |
| "step": 7390 |
| }, |
| { |
| "ce_loss_13": 3.255491924285889, |
| "ce_loss_17": 3.1759058117866514, |
| "ce_loss_2": 4.3369375467300415, |
| "ce_loss_4": 4.065928339958191, |
| "ce_loss_9": 3.494213354587555, |
| "epoch": 0.74, |
| "grad_norm": 944.0, |
| "kl_loss_13": 216.01871643066406, |
| "kl_loss_2": 2462.201806640625, |
| "kl_loss_4": 1948.6968017578124, |
| "kl_loss_9": 791.6112121582031, |
| "learning_rate": 0.00016074529422143398, |
| "loss": 1387.2176, |
| "step": 7400 |
| }, |
| { |
| "ce_loss_13": 3.221555936336517, |
| "ce_loss_17": 3.1326343417167664, |
| "ce_loss_2": 4.347753095626831, |
| "ce_loss_4": 4.072192120552063, |
| "ce_loss_9": 3.4735021352767945, |
| "epoch": 0.741, |
| "grad_norm": 1000.0, |
| "kl_loss_13": 225.79469375610353, |
| "kl_loss_2": 2528.604949951172, |
| "kl_loss_4": 2012.8276062011719, |
| "kl_loss_9": 806.7053344726562, |
| "learning_rate": 0.0001595814548709983, |
| "loss": 1403.0246, |
| "step": 7410 |
| }, |
| { |
| "ce_loss_13": 3.28202338218689, |
| "ce_loss_17": 3.1919385194778442, |
| "ce_loss_2": 4.406007695198059, |
| "ce_loss_4": 4.126522958278656, |
| "ce_loss_9": 3.5409831047058105, |
| "epoch": 0.742, |
| "grad_norm": 792.0, |
| "kl_loss_13": 229.86243286132813, |
| "kl_loss_2": 2534.54462890625, |
| "kl_loss_4": 2010.2397216796876, |
| "kl_loss_9": 826.1360229492187, |
| "learning_rate": 0.00015842104353153285, |
| "loss": 1388.7898, |
| "step": 7420 |
| }, |
| { |
| "ce_loss_13": 3.297808051109314, |
| "ce_loss_17": 3.2083394885063172, |
| "ce_loss_2": 4.4050181865692135, |
| "ce_loss_4": 4.127640378475189, |
| "ce_loss_9": 3.5505934834480284, |
| "epoch": 0.743, |
| "grad_norm": 916.0, |
| "kl_loss_13": 227.13286361694335, |
| "kl_loss_2": 2497.8704833984375, |
| "kl_loss_4": 1975.397967529297, |
| "kl_loss_9": 813.0075134277344, |
| "learning_rate": 0.0001572640718883667, |
| "loss": 1407.1898, |
| "step": 7430 |
| }, |
| { |
| "ce_loss_13": 3.234146702289581, |
| "ce_loss_17": 3.15252799987793, |
| "ce_loss_2": 4.325962245464325, |
| "ce_loss_4": 4.048633146286011, |
| "ce_loss_9": 3.475199341773987, |
| "epoch": 0.744, |
| "grad_norm": 820.0, |
| "kl_loss_13": 218.3878601074219, |
| "kl_loss_2": 2449.649865722656, |
| "kl_loss_4": 1933.0609924316407, |
| "kl_loss_9": 787.0348846435547, |
| "learning_rate": 0.0001561105515921915, |
| "loss": 1386.6887, |
| "step": 7440 |
| }, |
| { |
| "ce_loss_13": 3.093195152282715, |
| "ce_loss_17": 3.0062808632850646, |
| "ce_loss_2": 4.255380797386169, |
| "ce_loss_4": 3.971154308319092, |
| "ce_loss_9": 3.3545236349105836, |
| "epoch": 0.745, |
| "grad_norm": 1032.0, |
| "kl_loss_13": 219.13532485961915, |
| "kl_loss_2": 2606.2638549804688, |
| "kl_loss_4": 2064.4847900390623, |
| "kl_loss_9": 822.8829315185546, |
| "learning_rate": 0.0001549604942589441, |
| "loss": 1388.2781, |
| "step": 7450 |
| }, |
| { |
| "ce_loss_13": 3.2698857188224792, |
| "ce_loss_17": 3.1861334443092346, |
| "ce_loss_2": 4.322661006450653, |
| "ce_loss_4": 4.046770060062409, |
| "ce_loss_9": 3.5081974029541017, |
| "epoch": 0.746, |
| "grad_norm": 1104.0, |
| "kl_loss_13": 214.92021484375, |
| "kl_loss_2": 2376.7243041992188, |
| "kl_loss_4": 1856.7019348144531, |
| "kl_loss_9": 762.9657653808594, |
| "learning_rate": 0.00015381391146968864, |
| "loss": 1332.993, |
| "step": 7460 |
| }, |
| { |
| "ce_loss_13": 3.245181119441986, |
| "ce_loss_17": 3.1607391595840455, |
| "ce_loss_2": 4.360982942581177, |
| "ce_loss_4": 4.081872379779815, |
| "ce_loss_9": 3.4913008332252504, |
| "epoch": 0.747, |
| "grad_norm": 964.0, |
| "kl_loss_13": 215.42116088867186, |
| "kl_loss_2": 2485.310693359375, |
| "kl_loss_4": 1958.4087768554687, |
| "kl_loss_9": 787.5557434082032, |
| "learning_rate": 0.00015267081477050133, |
| "loss": 1376.3338, |
| "step": 7470 |
| }, |
| { |
| "ce_loss_13": 3.337534523010254, |
| "ce_loss_17": 3.247822880744934, |
| "ce_loss_2": 4.406154370307922, |
| "ce_loss_4": 4.132781744003296, |
| "ce_loss_9": 3.5795215010643004, |
| "epoch": 0.748, |
| "grad_norm": 824.0, |
| "kl_loss_13": 226.4216209411621, |
| "kl_loss_2": 2433.3333740234375, |
| "kl_loss_4": 1907.99150390625, |
| "kl_loss_9": 797.8165679931641, |
| "learning_rate": 0.00015153121567235335, |
| "loss": 1342.7906, |
| "step": 7480 |
| }, |
| { |
| "ce_loss_13": 3.238274133205414, |
| "ce_loss_17": 3.151008331775665, |
| "ce_loss_2": 4.366896986961365, |
| "ce_loss_4": 4.0749584317207335, |
| "ce_loss_9": 3.4854373693466187, |
| "epoch": 0.749, |
| "grad_norm": 764.0, |
| "kl_loss_13": 222.78318939208984, |
| "kl_loss_2": 2544.0747802734377, |
| "kl_loss_4": 2004.0062744140625, |
| "kl_loss_9": 811.2469543457031, |
| "learning_rate": 0.00015039512565099468, |
| "loss": 1348.1119, |
| "step": 7490 |
| }, |
| { |
| "ce_loss_13": 3.300297403335571, |
| "ce_loss_17": 3.214410126209259, |
| "ce_loss_2": 4.385453414916992, |
| "ce_loss_4": 4.104611361026764, |
| "ce_loss_9": 3.5460037708282472, |
| "epoch": 0.75, |
| "grad_norm": 984.0, |
| "kl_loss_13": 221.63871307373046, |
| "kl_loss_2": 2468.0780029296875, |
| "kl_loss_4": 1939.5006408691406, |
| "kl_loss_9": 798.3361328125, |
| "learning_rate": 0.00014926255614683932, |
| "loss": 1414.0303, |
| "step": 7500 |
| }, |
| { |
| "ce_loss_13": 3.238014888763428, |
| "ce_loss_17": 3.1520272970199583, |
| "ce_loss_2": 4.333132290840149, |
| "ce_loss_4": 4.04829728603363, |
| "ce_loss_9": 3.479207384586334, |
| "epoch": 0.751, |
| "grad_norm": 888.0, |
| "kl_loss_13": 220.70538864135742, |
| "kl_loss_2": 2480.5853515625, |
| "kl_loss_4": 1948.4291564941407, |
| "kl_loss_9": 794.6212036132813, |
| "learning_rate": 0.0001481335185648498, |
| "loss": 1371.1186, |
| "step": 7510 |
| }, |
| { |
| "ce_loss_13": 3.257987916469574, |
| "ce_loss_17": 3.1700197219848634, |
| "ce_loss_2": 4.354377293586731, |
| "ce_loss_4": 4.074113178253174, |
| "ce_loss_9": 3.5023501396179197, |
| "epoch": 0.752, |
| "grad_norm": 808.0, |
| "kl_loss_13": 221.5806770324707, |
| "kl_loss_2": 2484.6394287109374, |
| "kl_loss_4": 1951.444921875, |
| "kl_loss_9": 799.4147277832031, |
| "learning_rate": 0.0001470080242744218, |
| "loss": 1356.298, |
| "step": 7520 |
| }, |
| { |
| "ce_loss_13": 3.253499674797058, |
| "ce_loss_17": 3.1705570101737974, |
| "ce_loss_2": 4.36003748178482, |
| "ce_loss_4": 4.08024080991745, |
| "ce_loss_9": 3.4953670382499693, |
| "epoch": 0.753, |
| "grad_norm": 740.0, |
| "kl_loss_13": 215.0956657409668, |
| "kl_loss_2": 2494.137902832031, |
| "kl_loss_4": 1969.0171203613281, |
| "kl_loss_9": 791.6822387695313, |
| "learning_rate": 0.0001458860846092705, |
| "loss": 1376.8581, |
| "step": 7530 |
| }, |
| { |
| "ce_loss_13": 3.2914599180221558, |
| "ce_loss_17": 3.205676829814911, |
| "ce_loss_2": 4.357267689704895, |
| "ce_loss_4": 4.08239254951477, |
| "ce_loss_9": 3.530220794677734, |
| "epoch": 0.754, |
| "grad_norm": 812.0, |
| "kl_loss_13": 222.4879379272461, |
| "kl_loss_2": 2419.254675292969, |
| "kl_loss_4": 1902.8656677246095, |
| "kl_loss_9": 786.6647888183594, |
| "learning_rate": 0.00014476771086731566, |
| "loss": 1333.1204, |
| "step": 7540 |
| }, |
| { |
| "ce_loss_13": 3.3799967169761658, |
| "ce_loss_17": 3.287744390964508, |
| "ce_loss_2": 4.465925884246826, |
| "ce_loss_4": 4.189502060413361, |
| "ce_loss_9": 3.6242950916290284, |
| "epoch": 0.755, |
| "grad_norm": 808.0, |
| "kl_loss_13": 228.28051300048827, |
| "kl_loss_2": 2444.6833618164064, |
| "kl_loss_4": 1923.5794189453125, |
| "kl_loss_9": 796.6243194580078, |
| "learning_rate": 0.00014365291431056872, |
| "loss": 1394.6869, |
| "step": 7550 |
| }, |
| { |
| "ce_loss_13": 3.225019657611847, |
| "ce_loss_17": 3.132712996006012, |
| "ce_loss_2": 4.347153151035309, |
| "ce_loss_4": 4.051355564594269, |
| "ce_loss_9": 3.4836304903030397, |
| "epoch": 0.756, |
| "grad_norm": 840.0, |
| "kl_loss_13": 230.89688949584962, |
| "kl_loss_2": 2557.0416870117188, |
| "kl_loss_4": 1999.4586608886718, |
| "kl_loss_9": 831.4675872802734, |
| "learning_rate": 0.00014254170616501827, |
| "loss": 1390.0566, |
| "step": 7560 |
| }, |
| { |
| "ce_loss_13": 3.1649601578712465, |
| "ce_loss_17": 3.0718032717704773, |
| "ce_loss_2": 4.333395147323609, |
| "ce_loss_4": 4.044786882400513, |
| "ce_loss_9": 3.4387746930122374, |
| "epoch": 0.757, |
| "grad_norm": 1072.0, |
| "kl_loss_13": 231.36233291625976, |
| "kl_loss_2": 2611.2241821289062, |
| "kl_loss_4": 2072.9087646484377, |
| "kl_loss_9": 849.7628021240234, |
| "learning_rate": 0.0001414340976205183, |
| "loss": 1432.4323, |
| "step": 7570 |
| }, |
| { |
| "ce_loss_13": 3.176026093959808, |
| "ce_loss_17": 3.086432921886444, |
| "ce_loss_2": 4.32662969827652, |
| "ce_loss_4": 4.035133969783783, |
| "ce_loss_9": 3.4280447244644163, |
| "epoch": 0.758, |
| "grad_norm": 992.0, |
| "kl_loss_13": 224.3409797668457, |
| "kl_loss_2": 2564.575231933594, |
| "kl_loss_4": 2021.018817138672, |
| "kl_loss_9": 811.0707885742188, |
| "learning_rate": 0.00014033009983067452, |
| "loss": 1382.5868, |
| "step": 7580 |
| }, |
| { |
| "ce_loss_13": 3.3284544706344605, |
| "ce_loss_17": 3.2429365515708923, |
| "ce_loss_2": 4.38847074508667, |
| "ce_loss_4": 4.107561874389648, |
| "ce_loss_9": 3.5644681453704834, |
| "epoch": 0.759, |
| "grad_norm": 852.0, |
| "kl_loss_13": 215.6023063659668, |
| "kl_loss_2": 2403.9462280273438, |
| "kl_loss_4": 1879.6187683105468, |
| "kl_loss_9": 769.9466003417969, |
| "learning_rate": 0.00013922972391273224, |
| "loss": 1342.4143, |
| "step": 7590 |
| }, |
| { |
| "ce_loss_13": 3.332418477535248, |
| "ce_loss_17": 3.2462657570838926, |
| "ce_loss_2": 4.44138286113739, |
| "ce_loss_4": 4.168942725658416, |
| "ce_loss_9": 3.576342451572418, |
| "epoch": 0.76, |
| "grad_norm": 908.0, |
| "kl_loss_13": 219.82144470214843, |
| "kl_loss_2": 2486.4855834960936, |
| "kl_loss_4": 1969.8103454589843, |
| "kl_loss_9": 792.1762268066407, |
| "learning_rate": 0.0001381329809474649, |
| "loss": 1373.4414, |
| "step": 7600 |
| }, |
| { |
| "ce_loss_13": 3.238096904754639, |
| "ce_loss_17": 3.1457011461257935, |
| "ce_loss_2": 4.386391186714173, |
| "ce_loss_4": 4.109273302555084, |
| "ce_loss_9": 3.499604892730713, |
| "epoch": 0.761, |
| "grad_norm": 932.0, |
| "kl_loss_13": 227.62478408813476, |
| "kl_loss_2": 2582.0073120117186, |
| "kl_loss_4": 2052.8338012695312, |
| "kl_loss_9": 828.7124328613281, |
| "learning_rate": 0.0001370398819790621, |
| "loss": 1408.2457, |
| "step": 7610 |
| }, |
| { |
| "ce_loss_13": 3.368555212020874, |
| "ce_loss_17": 3.2829229831695557, |
| "ce_loss_2": 4.447155952453613, |
| "ce_loss_4": 4.16901433467865, |
| "ce_loss_9": 3.6069896459579467, |
| "epoch": 0.762, |
| "grad_norm": 840.0, |
| "kl_loss_13": 221.8843147277832, |
| "kl_loss_2": 2438.428125, |
| "kl_loss_4": 1909.4388610839844, |
| "kl_loss_9": 787.5963348388672, |
| "learning_rate": 0.00013595043801501794, |
| "loss": 1336.3828, |
| "step": 7620 |
| }, |
| { |
| "ce_loss_13": 3.1797441363334658, |
| "ce_loss_17": 3.086809766292572, |
| "ce_loss_2": 4.358413362503052, |
| "ce_loss_4": 4.072304320335388, |
| "ce_loss_9": 3.4420668482780457, |
| "epoch": 0.763, |
| "grad_norm": 1020.0, |
| "kl_loss_13": 227.9445999145508, |
| "kl_loss_2": 2637.14560546875, |
| "kl_loss_4": 2099.4191650390626, |
| "kl_loss_9": 834.0781097412109, |
| "learning_rate": 0.00013486466002602133, |
| "loss": 1413.9376, |
| "step": 7630 |
| }, |
| { |
| "ce_loss_13": 3.2811490416526796, |
| "ce_loss_17": 3.196494662761688, |
| "ce_loss_2": 4.352040338516235, |
| "ce_loss_4": 4.07087630033493, |
| "ce_loss_9": 3.5168084025382997, |
| "epoch": 0.764, |
| "grad_norm": 804.0, |
| "kl_loss_13": 218.68697357177734, |
| "kl_loss_2": 2446.8120971679687, |
| "kl_loss_4": 1918.7146240234374, |
| "kl_loss_9": 788.9490905761719, |
| "learning_rate": 0.00013378255894584462, |
| "loss": 1391.0102, |
| "step": 7640 |
| }, |
| { |
| "ce_loss_13": 3.225428247451782, |
| "ce_loss_17": 3.1329970717430116, |
| "ce_loss_2": 4.361479806900024, |
| "ce_loss_4": 4.0807177186012265, |
| "ce_loss_9": 3.4798406839370726, |
| "epoch": 0.765, |
| "grad_norm": 744.0, |
| "kl_loss_13": 228.18805541992188, |
| "kl_loss_2": 2544.268359375, |
| "kl_loss_4": 2016.472607421875, |
| "kl_loss_9": 818.3882904052734, |
| "learning_rate": 0.0001327041456712334, |
| "loss": 1394.5432, |
| "step": 7650 |
| }, |
| { |
| "ce_loss_13": 3.2621197938919066, |
| "ce_loss_17": 3.1707888722419737, |
| "ce_loss_2": 4.3665901780128475, |
| "ce_loss_4": 4.0881025195121765, |
| "ce_loss_9": 3.5097957134246824, |
| "epoch": 0.766, |
| "grad_norm": 1000.0, |
| "kl_loss_13": 227.44260940551757, |
| "kl_loss_2": 2510.399560546875, |
| "kl_loss_4": 1978.4738037109375, |
| "kl_loss_9": 821.5803588867187, |
| "learning_rate": 0.00013162943106179747, |
| "loss": 1392.5841, |
| "step": 7660 |
| }, |
| { |
| "ce_loss_13": 3.2373632073402403, |
| "ce_loss_17": 3.1501285076141357, |
| "ce_loss_2": 4.328226685523987, |
| "ce_loss_4": 4.046550655364991, |
| "ce_loss_9": 3.482451915740967, |
| "epoch": 0.767, |
| "grad_norm": 972.0, |
| "kl_loss_13": 220.82966690063478, |
| "kl_loss_2": 2476.5694213867187, |
| "kl_loss_4": 1943.77783203125, |
| "kl_loss_9": 797.8186798095703, |
| "learning_rate": 0.00013055842593990132, |
| "loss": 1366.713, |
| "step": 7670 |
| }, |
| { |
| "ce_loss_13": 3.185035467147827, |
| "ce_loss_17": 3.09632488489151, |
| "ce_loss_2": 4.28573968410492, |
| "ce_loss_4": 4.005963659286499, |
| "ce_loss_9": 3.4342915773391725, |
| "epoch": 0.768, |
| "grad_norm": 996.0, |
| "kl_loss_13": 218.43871612548827, |
| "kl_loss_2": 2466.1605834960938, |
| "kl_loss_4": 1941.8172729492187, |
| "kl_loss_9": 797.7214660644531, |
| "learning_rate": 0.00012949114109055414, |
| "loss": 1390.7108, |
| "step": 7680 |
| }, |
| { |
| "ce_loss_13": 3.2294776797294618, |
| "ce_loss_17": 3.1401471734046935, |
| "ce_loss_2": 4.346489596366882, |
| "ce_loss_4": 4.064988732337952, |
| "ce_loss_9": 3.481194865703583, |
| "epoch": 0.769, |
| "grad_norm": 1008.0, |
| "kl_loss_13": 223.74182357788087, |
| "kl_loss_2": 2512.917614746094, |
| "kl_loss_4": 1976.859930419922, |
| "kl_loss_9": 806.7765319824218, |
| "learning_rate": 0.00012842758726130281, |
| "loss": 1392.6047, |
| "step": 7690 |
| }, |
| { |
| "ce_loss_13": 3.2692777037620546, |
| "ce_loss_17": 3.179296886920929, |
| "ce_loss_2": 4.411483407020569, |
| "ce_loss_4": 4.129411828517914, |
| "ce_loss_9": 3.5285709738731383, |
| "epoch": 0.77, |
| "grad_norm": 920.0, |
| "kl_loss_13": 228.36310424804688, |
| "kl_loss_2": 2550.5869750976562, |
| "kl_loss_4": 2015.7442749023437, |
| "kl_loss_9": 819.3382629394531, |
| "learning_rate": 0.00012736777516212267, |
| "loss": 1378.3677, |
| "step": 7700 |
| }, |
| { |
| "ce_loss_13": 3.267863190174103, |
| "ce_loss_17": 3.176251482963562, |
| "ce_loss_2": 4.380473399162293, |
| "ce_loss_4": 4.095113825798035, |
| "ce_loss_9": 3.522014319896698, |
| "epoch": 0.771, |
| "grad_norm": 856.0, |
| "kl_loss_13": 228.07180633544922, |
| "kl_loss_2": 2508.365673828125, |
| "kl_loss_4": 1971.7255798339843, |
| "kl_loss_9": 820.9787353515625, |
| "learning_rate": 0.00012631171546530968, |
| "loss": 1358.1248, |
| "step": 7710 |
| }, |
| { |
| "ce_loss_13": 3.274967110157013, |
| "ce_loss_17": 3.1833778023719788, |
| "ce_loss_2": 4.3787302613258365, |
| "ce_loss_4": 4.093603134155273, |
| "ce_loss_9": 3.5262497425079347, |
| "epoch": 0.772, |
| "grad_norm": 828.0, |
| "kl_loss_13": 228.67630462646486, |
| "kl_loss_2": 2492.828680419922, |
| "kl_loss_4": 1959.4551879882813, |
| "kl_loss_9": 814.0563110351562, |
| "learning_rate": 0.00012525941880537307, |
| "loss": 1397.8371, |
| "step": 7720 |
| }, |
| { |
| "ce_loss_13": 3.3043895840644835, |
| "ce_loss_17": 3.214349794387817, |
| "ce_loss_2": 4.397526240348816, |
| "ce_loss_4": 4.123982179164886, |
| "ce_loss_9": 3.5513405799865723, |
| "epoch": 0.773, |
| "grad_norm": 736.0, |
| "kl_loss_13": 221.13384170532225, |
| "kl_loss_2": 2456.335302734375, |
| "kl_loss_4": 1933.7284301757813, |
| "kl_loss_9": 792.3969940185547, |
| "learning_rate": 0.00012421089577892869, |
| "loss": 1359.0197, |
| "step": 7730 |
| }, |
| { |
| "ce_loss_13": 3.263060200214386, |
| "ce_loss_17": 3.175589680671692, |
| "ce_loss_2": 4.392911100387574, |
| "ce_loss_4": 4.106127560138702, |
| "ce_loss_9": 3.512665033340454, |
| "epoch": 0.774, |
| "grad_norm": 1020.0, |
| "kl_loss_13": 226.73205490112304, |
| "kl_loss_2": 2553.7462646484373, |
| "kl_loss_4": 2013.3649291992188, |
| "kl_loss_9": 819.6057373046875, |
| "learning_rate": 0.0001231661569445919, |
| "loss": 1394.4959, |
| "step": 7740 |
| }, |
| { |
| "ce_loss_13": 3.1302661895751953, |
| "ce_loss_17": 3.0401893138885496, |
| "ce_loss_2": 4.261979520320892, |
| "ce_loss_4": 3.9734166502952575, |
| "ce_loss_9": 3.3824090719223023, |
| "epoch": 0.775, |
| "grad_norm": 904.0, |
| "kl_loss_13": 222.49043731689454, |
| "kl_loss_2": 2536.603405761719, |
| "kl_loss_4": 1994.908349609375, |
| "kl_loss_9": 805.6484436035156, |
| "learning_rate": 0.00012212521282287093, |
| "loss": 1414.9274, |
| "step": 7750 |
| }, |
| { |
| "ce_loss_13": 3.269586420059204, |
| "ce_loss_17": 3.1798322796821594, |
| "ce_loss_2": 4.361129212379455, |
| "ce_loss_4": 4.086302149295807, |
| "ce_loss_9": 3.520177185535431, |
| "epoch": 0.776, |
| "grad_norm": 964.0, |
| "kl_loss_13": 226.48846359252929, |
| "kl_loss_2": 2482.2866088867186, |
| "kl_loss_4": 1954.758721923828, |
| "kl_loss_9": 812.0755065917969, |
| "learning_rate": 0.00012108807389606158, |
| "loss": 1401.2011, |
| "step": 7760 |
| }, |
| { |
| "ce_loss_13": 3.2647178173065186, |
| "ce_loss_17": 3.17938392162323, |
| "ce_loss_2": 4.366236639022827, |
| "ce_loss_4": 4.087905156612396, |
| "ce_loss_9": 3.5066582202911376, |
| "epoch": 0.777, |
| "grad_norm": 836.0, |
| "kl_loss_13": 217.91585845947264, |
| "kl_loss_2": 2479.5460571289063, |
| "kl_loss_4": 1948.29970703125, |
| "kl_loss_9": 787.1430786132812, |
| "learning_rate": 0.00012005475060814159, |
| "loss": 1361.5719, |
| "step": 7770 |
| }, |
| { |
| "ce_loss_13": 3.2002243638038634, |
| "ce_loss_17": 3.1133617520332337, |
| "ce_loss_2": 4.340393137931824, |
| "ce_loss_4": 4.053977739810944, |
| "ce_loss_9": 3.459071171283722, |
| "epoch": 0.778, |
| "grad_norm": 900.0, |
| "kl_loss_13": 224.2657028198242, |
| "kl_loss_2": 2570.114294433594, |
| "kl_loss_4": 2024.2496215820313, |
| "kl_loss_9": 820.0492858886719, |
| "learning_rate": 0.00011902525336466464, |
| "loss": 1396.9895, |
| "step": 7780 |
| }, |
| { |
| "ce_loss_13": 3.1970431566238404, |
| "ce_loss_17": 3.103760826587677, |
| "ce_loss_2": 4.357132995128632, |
| "ce_loss_4": 4.070213985443115, |
| "ce_loss_9": 3.4593411445617677, |
| "epoch": 0.779, |
| "grad_norm": 992.0, |
| "kl_loss_13": 230.84845428466798, |
| "kl_loss_2": 2594.173693847656, |
| "kl_loss_4": 2056.0373413085936, |
| "kl_loss_9": 835.5239166259765, |
| "learning_rate": 0.00011799959253265668, |
| "loss": 1395.5017, |
| "step": 7790 |
| }, |
| { |
| "ce_loss_13": 3.2516624689102174, |
| "ce_loss_17": 3.1630171179771422, |
| "ce_loss_2": 4.381498885154724, |
| "ce_loss_4": 4.092389369010926, |
| "ce_loss_9": 3.5000041365623473, |
| "epoch": 0.78, |
| "grad_norm": 896.0, |
| "kl_loss_13": 224.24137802124022, |
| "kl_loss_2": 2543.176892089844, |
| "kl_loss_4": 2000.6499816894532, |
| "kl_loss_9": 810.3859191894531, |
| "learning_rate": 0.00011697777844051105, |
| "loss": 1387.1945, |
| "step": 7800 |
| }, |
| { |
| "ce_loss_13": 3.238757538795471, |
| "ce_loss_17": 3.1467276573181153, |
| "ce_loss_2": 4.401383852958679, |
| "ce_loss_4": 4.113471186161041, |
| "ce_loss_9": 3.50083167552948, |
| "epoch": 0.781, |
| "grad_norm": 844.0, |
| "kl_loss_13": 227.61635665893556, |
| "kl_loss_2": 2596.8357666015627, |
| "kl_loss_4": 2058.6575561523437, |
| "kl_loss_9": 822.5207214355469, |
| "learning_rate": 0.00011595982137788402, |
| "loss": 1403.5885, |
| "step": 7810 |
| }, |
| { |
| "ce_loss_13": 3.2153478622436524, |
| "ce_loss_17": 3.1282518982887266, |
| "ce_loss_2": 4.302980852127075, |
| "ce_loss_4": 4.023877811431885, |
| "ce_loss_9": 3.4552973389625548, |
| "epoch": 0.782, |
| "grad_norm": 860.0, |
| "kl_loss_13": 221.30579071044923, |
| "kl_loss_2": 2456.2185791015627, |
| "kl_loss_4": 1924.213067626953, |
| "kl_loss_9": 792.0626220703125, |
| "learning_rate": 0.00011494573159559212, |
| "loss": 1366.739, |
| "step": 7820 |
| }, |
| { |
| "ce_loss_13": 3.2062780380249025, |
| "ce_loss_17": 3.1137363791465758, |
| "ce_loss_2": 4.321243810653686, |
| "ce_loss_4": 4.038500916957855, |
| "ce_loss_9": 3.4549594283103944, |
| "epoch": 0.783, |
| "grad_norm": 1216.0, |
| "kl_loss_13": 225.67907943725587, |
| "kl_loss_2": 2535.0178344726564, |
| "kl_loss_4": 1995.6634094238282, |
| "kl_loss_9": 817.3709289550782, |
| "learning_rate": 0.00011393551930550828, |
| "loss": 1414.698, |
| "step": 7830 |
| }, |
| { |
| "ce_loss_13": 3.337601363658905, |
| "ce_loss_17": 3.2485817551612852, |
| "ce_loss_2": 4.408281898498535, |
| "ce_loss_4": 4.128136658668518, |
| "ce_loss_9": 3.578730881214142, |
| "epoch": 0.784, |
| "grad_norm": 960.0, |
| "kl_loss_13": 223.8050994873047, |
| "kl_loss_2": 2447.8237915039062, |
| "kl_loss_4": 1917.9582885742188, |
| "kl_loss_9": 797.3851867675781, |
| "learning_rate": 0.00011292919468045875, |
| "loss": 1359.6032, |
| "step": 7840 |
| }, |
| { |
| "ce_loss_13": 3.2895867943763735, |
| "ce_loss_17": 3.2018993496894836, |
| "ce_loss_2": 4.3929472208023075, |
| "ce_loss_4": 4.110674726963043, |
| "ce_loss_9": 3.5345945596694945, |
| "epoch": 0.785, |
| "grad_norm": 776.0, |
| "kl_loss_13": 225.29066696166993, |
| "kl_loss_2": 2485.1955078125, |
| "kl_loss_4": 1956.9943969726562, |
| "kl_loss_9": 807.8281188964844, |
| "learning_rate": 0.00011192676785412154, |
| "loss": 1363.5742, |
| "step": 7850 |
| }, |
| { |
| "ce_loss_13": 3.2329471349716186, |
| "ce_loss_17": 3.137916612625122, |
| "ce_loss_2": 4.379679560661316, |
| "ce_loss_4": 4.094557797908783, |
| "ce_loss_9": 3.4846953392028808, |
| "epoch": 0.786, |
| "grad_norm": 920.0, |
| "kl_loss_13": 226.50506896972655, |
| "kl_loss_2": 2566.345947265625, |
| "kl_loss_4": 2029.9564208984375, |
| "kl_loss_9": 819.1044738769531, |
| "learning_rate": 0.00011092824892092374, |
| "loss": 1396.7059, |
| "step": 7860 |
| }, |
| { |
| "ce_loss_13": 3.165910518169403, |
| "ce_loss_17": 3.078306519985199, |
| "ce_loss_2": 4.317526865005493, |
| "ce_loss_4": 4.033228135108947, |
| "ce_loss_9": 3.4250346302986143, |
| "epoch": 0.787, |
| "grad_norm": 1016.0, |
| "kl_loss_13": 224.9335792541504, |
| "kl_loss_2": 2580.2394287109373, |
| "kl_loss_4": 2040.8598937988281, |
| "kl_loss_9": 818.8031555175781, |
| "learning_rate": 0.0001099336479359398, |
| "loss": 1381.4417, |
| "step": 7870 |
| }, |
| { |
| "ce_loss_13": 3.284831154346466, |
| "ce_loss_17": 3.1973366737365723, |
| "ce_loss_2": 4.364708030223847, |
| "ce_loss_4": 4.0878499269485475, |
| "ce_loss_9": 3.5242127060890196, |
| "epoch": 0.788, |
| "grad_norm": 864.0, |
| "kl_loss_13": 222.17998046875, |
| "kl_loss_2": 2458.079833984375, |
| "kl_loss_4": 1931.173651123047, |
| "kl_loss_9": 795.1588012695313, |
| "learning_rate": 0.00010894297491479043, |
| "loss": 1369.8168, |
| "step": 7880 |
| }, |
| { |
| "ce_loss_13": 3.2719725012779235, |
| "ce_loss_17": 3.1863118171691895, |
| "ce_loss_2": 4.376298451423645, |
| "ce_loss_4": 4.100898790359497, |
| "ce_loss_9": 3.5160131096839904, |
| "epoch": 0.789, |
| "grad_norm": 900.0, |
| "kl_loss_13": 220.7770088195801, |
| "kl_loss_2": 2485.175817871094, |
| "kl_loss_4": 1966.8784790039062, |
| "kl_loss_9": 797.678369140625, |
| "learning_rate": 0.00010795623983354214, |
| "loss": 1362.425, |
| "step": 7890 |
| }, |
| { |
| "ce_loss_13": 3.1782204031944277, |
| "ce_loss_17": 3.085512375831604, |
| "ce_loss_2": 4.306517696380615, |
| "ce_loss_4": 4.022887492179871, |
| "ce_loss_9": 3.4331058263778687, |
| "epoch": 0.79, |
| "grad_norm": 956.0, |
| "kl_loss_13": 231.71184539794922, |
| "kl_loss_2": 2557.1060546875, |
| "kl_loss_4": 2026.078564453125, |
| "kl_loss_9": 837.0126892089844, |
| "learning_rate": 0.00010697345262860636, |
| "loss": 1389.1147, |
| "step": 7900 |
| }, |
| { |
| "ce_loss_13": 3.306474280357361, |
| "ce_loss_17": 3.219396984577179, |
| "ce_loss_2": 4.38658037185669, |
| "ce_loss_4": 4.106796336174011, |
| "ce_loss_9": 3.541022026538849, |
| "epoch": 0.791, |
| "grad_norm": 880.0, |
| "kl_loss_13": 223.46587905883788, |
| "kl_loss_2": 2451.1871826171873, |
| "kl_loss_4": 1928.5599304199218, |
| "kl_loss_9": 792.5600524902344, |
| "learning_rate": 0.00010599462319663906, |
| "loss": 1352.1263, |
| "step": 7910 |
| }, |
| { |
| "ce_loss_13": 3.2779975056648256, |
| "ce_loss_17": 3.1927762985229493, |
| "ce_loss_2": 4.347692286968231, |
| "ce_loss_4": 4.067851769924164, |
| "ce_loss_9": 3.516193914413452, |
| "epoch": 0.792, |
| "grad_norm": 760.0, |
| "kl_loss_13": 220.21947784423827, |
| "kl_loss_2": 2420.2115966796873, |
| "kl_loss_4": 1895.0272338867187, |
| "kl_loss_9": 786.0841003417969, |
| "learning_rate": 0.00010501976139444191, |
| "loss": 1340.7148, |
| "step": 7920 |
| }, |
| { |
| "ce_loss_13": 3.298214519023895, |
| "ce_loss_17": 3.2132441997528076, |
| "ce_loss_2": 4.385530972480774, |
| "ce_loss_4": 4.1142360210418705, |
| "ce_loss_9": 3.539467179775238, |
| "epoch": 0.793, |
| "grad_norm": 1004.0, |
| "kl_loss_13": 218.51279830932617, |
| "kl_loss_2": 2446.5368530273436, |
| "kl_loss_4": 1936.372821044922, |
| "kl_loss_9": 784.8643157958984, |
| "learning_rate": 0.0001040488770388625, |
| "loss": 1376.316, |
| "step": 7930 |
| }, |
| { |
| "ce_loss_13": 3.2538429379463194, |
| "ce_loss_17": 3.1726126432418824, |
| "ce_loss_2": 4.36536066532135, |
| "ce_loss_4": 4.0814009547233585, |
| "ce_loss_9": 3.5021591544151307, |
| "epoch": 0.794, |
| "grad_norm": 972.0, |
| "kl_loss_13": 222.11438446044923, |
| "kl_loss_2": 2510.11923828125, |
| "kl_loss_4": 1978.3949035644532, |
| "kl_loss_9": 807.1800170898438, |
| "learning_rate": 0.00010308197990669538, |
| "loss": 1369.7728, |
| "step": 7940 |
| }, |
| { |
| "ce_loss_13": 3.367190444469452, |
| "ce_loss_17": 3.276266264915466, |
| "ce_loss_2": 4.4568297386169435, |
| "ce_loss_4": 4.1817707300186155, |
| "ce_loss_9": 3.6101149559020995, |
| "epoch": 0.795, |
| "grad_norm": 788.0, |
| "kl_loss_13": 225.43220977783204, |
| "kl_loss_2": 2474.580187988281, |
| "kl_loss_4": 1953.5517517089843, |
| "kl_loss_9": 810.1820434570312, |
| "learning_rate": 0.0001021190797345839, |
| "loss": 1361.9895, |
| "step": 7950 |
| }, |
| { |
| "ce_loss_13": 3.099755311012268, |
| "ce_loss_17": 3.0065363526344298, |
| "ce_loss_2": 4.2668628096580505, |
| "ce_loss_4": 3.971409797668457, |
| "ce_loss_9": 3.365699326992035, |
| "epoch": 0.796, |
| "grad_norm": 1440.0, |
| "kl_loss_13": 234.1839828491211, |
| "kl_loss_2": 2625.586181640625, |
| "kl_loss_4": 2070.3461181640623, |
| "kl_loss_9": 856.1321990966796, |
| "learning_rate": 0.00010116018621892236, |
| "loss": 1403.2432, |
| "step": 7960 |
| }, |
| { |
| "ce_loss_13": 3.303152871131897, |
| "ce_loss_17": 3.2116121888160705, |
| "ce_loss_2": 4.418788051605224, |
| "ce_loss_4": 4.14488252401352, |
| "ce_loss_9": 3.559329104423523, |
| "epoch": 0.797, |
| "grad_norm": 948.0, |
| "kl_loss_13": 232.96103744506837, |
| "kl_loss_2": 2525.059765625, |
| "kl_loss_4": 2007.8031982421876, |
| "kl_loss_9": 827.8689270019531, |
| "learning_rate": 0.00010020530901575753, |
| "loss": 1359.8641, |
| "step": 7970 |
| }, |
| { |
| "ce_loss_13": 3.327608060836792, |
| "ce_loss_17": 3.2403406143188476, |
| "ce_loss_2": 4.4137204647064205, |
| "ce_loss_4": 4.131820201873779, |
| "ce_loss_9": 3.5678149819374085, |
| "epoch": 0.798, |
| "grad_norm": 788.0, |
| "kl_loss_13": 227.06566467285157, |
| "kl_loss_2": 2479.300988769531, |
| "kl_loss_4": 1944.6218566894531, |
| "kl_loss_9": 806.4063873291016, |
| "learning_rate": 9.925445774069231e-05, |
| "loss": 1347.6822, |
| "step": 7980 |
| }, |
| { |
| "ce_loss_13": 3.2811261892318724, |
| "ce_loss_17": 3.1919934153556824, |
| "ce_loss_2": 4.382939958572388, |
| "ce_loss_4": 4.101240575313568, |
| "ce_loss_9": 3.5340216994285583, |
| "epoch": 0.799, |
| "grad_norm": 848.0, |
| "kl_loss_13": 223.93359756469727, |
| "kl_loss_2": 2468.969140625, |
| "kl_loss_4": 1939.6381713867188, |
| "kl_loss_9": 802.2064453125, |
| "learning_rate": 9.830764196878872e-05, |
| "loss": 1342.0732, |
| "step": 7990 |
| }, |
| { |
| "ce_loss_13": 3.2254366874694824, |
| "ce_loss_17": 3.1382691740989683, |
| "ce_loss_2": 4.335188496112823, |
| "ce_loss_4": 4.052461290359497, |
| "ce_loss_9": 3.470421814918518, |
| "epoch": 0.8, |
| "grad_norm": 768.0, |
| "kl_loss_13": 220.74289169311524, |
| "kl_loss_2": 2529.8259887695312, |
| "kl_loss_4": 1995.0783813476562, |
| "kl_loss_9": 807.0941192626954, |
| "learning_rate": 9.736487123447069e-05, |
| "loss": 1375.8162, |
| "step": 8000 |
| }, |
| { |
| "ce_loss_13": 3.1770267128944396, |
| "ce_loss_17": 3.089499294757843, |
| "ce_loss_2": 4.343133449554443, |
| "ce_loss_4": 4.0638148307800295, |
| "ce_loss_9": 3.4387208342552187, |
| "epoch": 0.801, |
| "grad_norm": 724.0, |
| "kl_loss_13": 226.93739852905273, |
| "kl_loss_2": 2637.5811279296877, |
| "kl_loss_4": 2109.3770751953125, |
| "kl_loss_9": 833.6124237060546, |
| "learning_rate": 9.642615503142926e-05, |
| "loss": 1420.8419, |
| "step": 8010 |
| }, |
| { |
| "ce_loss_13": 3.2385539412498474, |
| "ce_loss_17": 3.1498786091804503, |
| "ce_loss_2": 4.371684455871582, |
| "ce_loss_4": 4.083432590961456, |
| "ce_loss_9": 3.4884016513824463, |
| "epoch": 0.802, |
| "grad_norm": 768.0, |
| "kl_loss_13": 221.43769302368165, |
| "kl_loss_2": 2560.348107910156, |
| "kl_loss_4": 2017.5557067871093, |
| "kl_loss_9": 804.5605072021484, |
| "learning_rate": 9.549150281252633e-05, |
| "loss": 1375.2334, |
| "step": 8020 |
| }, |
| { |
| "ce_loss_13": 3.2660260081291197, |
| "ce_loss_17": 3.176693916320801, |
| "ce_loss_2": 4.376864409446716, |
| "ce_loss_4": 4.092126798629761, |
| "ce_loss_9": 3.5163286447525026, |
| "epoch": 0.803, |
| "grad_norm": 792.0, |
| "kl_loss_13": 224.05511550903321, |
| "kl_loss_2": 2511.4217895507813, |
| "kl_loss_4": 1983.4522766113282, |
| "kl_loss_9": 805.1759246826172, |
| "learning_rate": 9.4560923989699e-05, |
| "loss": 1391.521, |
| "step": 8030 |
| }, |
| { |
| "ce_loss_13": 3.260799825191498, |
| "ce_loss_17": 3.172711956501007, |
| "ce_loss_2": 4.364776086807251, |
| "ce_loss_4": 4.083143830299377, |
| "ce_loss_9": 3.5085883378982543, |
| "epoch": 0.804, |
| "grad_norm": 860.0, |
| "kl_loss_13": 223.57135772705078, |
| "kl_loss_2": 2480.294958496094, |
| "kl_loss_4": 1952.1392944335937, |
| "kl_loss_9": 802.3091857910156, |
| "learning_rate": 9.363442793386607e-05, |
| "loss": 1394.5486, |
| "step": 8040 |
| }, |
| { |
| "ce_loss_13": 3.238362693786621, |
| "ce_loss_17": 3.145321083068848, |
| "ce_loss_2": 4.379942286014557, |
| "ce_loss_4": 4.098853194713593, |
| "ce_loss_9": 3.500857079029083, |
| "epoch": 0.805, |
| "grad_norm": 824.0, |
| "kl_loss_13": 228.7298454284668, |
| "kl_loss_2": 2552.064990234375, |
| "kl_loss_4": 2025.6884704589843, |
| "kl_loss_9": 826.6193176269531, |
| "learning_rate": 9.271202397483213e-05, |
| "loss": 1365.5828, |
| "step": 8050 |
| }, |
| { |
| "ce_loss_13": 3.262423515319824, |
| "ce_loss_17": 3.178011214733124, |
| "ce_loss_2": 4.3518139839172365, |
| "ce_loss_4": 4.06558084487915, |
| "ce_loss_9": 3.4967321038246153, |
| "epoch": 0.806, |
| "grad_norm": 1020.0, |
| "kl_loss_13": 219.77361907958985, |
| "kl_loss_2": 2469.5812133789063, |
| "kl_loss_4": 1939.9063659667968, |
| "kl_loss_9": 787.4560791015625, |
| "learning_rate": 9.179372140119524e-05, |
| "loss": 1384.0, |
| "step": 8060 |
| }, |
| { |
| "ce_loss_13": 3.2050329089164733, |
| "ce_loss_17": 3.118261456489563, |
| "ce_loss_2": 4.3080508470535275, |
| "ce_loss_4": 4.029356837272644, |
| "ce_loss_9": 3.451282811164856, |
| "epoch": 0.807, |
| "grad_norm": 916.0, |
| "kl_loss_13": 220.9561798095703, |
| "kl_loss_2": 2495.82890625, |
| "kl_loss_4": 1971.114727783203, |
| "kl_loss_9": 798.6717529296875, |
| "learning_rate": 9.087952946025175e-05, |
| "loss": 1391.709, |
| "step": 8070 |
| }, |
| { |
| "ce_loss_13": 3.311403751373291, |
| "ce_loss_17": 3.2265172481536863, |
| "ce_loss_2": 4.374763298034668, |
| "ce_loss_4": 4.093200993537903, |
| "ce_loss_9": 3.5428014159202577, |
| "epoch": 0.808, |
| "grad_norm": 868.0, |
| "kl_loss_13": 218.15849761962892, |
| "kl_loss_2": 2410.1259155273438, |
| "kl_loss_4": 1888.1170776367187, |
| "kl_loss_9": 773.8412719726563, |
| "learning_rate": 8.996945735790446e-05, |
| "loss": 1363.7383, |
| "step": 8080 |
| }, |
| { |
| "ce_loss_13": 3.2155717730522158, |
| "ce_loss_17": 3.1294687151908875, |
| "ce_loss_2": 4.31129013299942, |
| "ce_loss_4": 4.0278658986091616, |
| "ce_loss_9": 3.461386263370514, |
| "epoch": 0.809, |
| "grad_norm": 844.0, |
| "kl_loss_13": 221.96558990478516, |
| "kl_loss_2": 2490.4510986328123, |
| "kl_loss_4": 1961.0308837890625, |
| "kl_loss_9": 805.1013610839843, |
| "learning_rate": 8.906351425856951e-05, |
| "loss": 1385.3815, |
| "step": 8090 |
| }, |
| { |
| "ce_loss_13": 3.1980334639549257, |
| "ce_loss_17": 3.111038017272949, |
| "ce_loss_2": 4.32276861667633, |
| "ce_loss_4": 4.0459129333496096, |
| "ce_loss_9": 3.447723460197449, |
| "epoch": 0.81, |
| "grad_norm": 996.0, |
| "kl_loss_13": 223.42060012817382, |
| "kl_loss_2": 2552.838977050781, |
| "kl_loss_4": 2024.1286315917969, |
| "kl_loss_9": 815.0195770263672, |
| "learning_rate": 8.816170928508365e-05, |
| "loss": 1398.1475, |
| "step": 8100 |
| }, |
| { |
| "ce_loss_13": 3.16351261138916, |
| "ce_loss_17": 3.0718586325645445, |
| "ce_loss_2": 4.326090204715729, |
| "ce_loss_4": 4.043891990184784, |
| "ce_loss_9": 3.42418794631958, |
| "epoch": 0.811, |
| "grad_norm": 1056.0, |
| "kl_loss_13": 225.75188446044922, |
| "kl_loss_2": 2605.1463012695312, |
| "kl_loss_4": 2069.7749572753905, |
| "kl_loss_9": 829.7225738525391, |
| "learning_rate": 8.7264051518613e-05, |
| "loss": 1403.7536, |
| "step": 8110 |
| }, |
| { |
| "ce_loss_13": 3.2520105719566343, |
| "ce_loss_17": 3.1701902508735658, |
| "ce_loss_2": 4.32922523021698, |
| "ce_loss_4": 4.058504164218903, |
| "ce_loss_9": 3.4897550106048585, |
| "epoch": 0.812, |
| "grad_norm": 904.0, |
| "kl_loss_13": 216.81841125488282, |
| "kl_loss_2": 2438.9434204101562, |
| "kl_loss_4": 1924.6337524414062, |
| "kl_loss_9": 785.5289459228516, |
| "learning_rate": 8.637054999856148e-05, |
| "loss": 1360.0235, |
| "step": 8120 |
| }, |
| { |
| "ce_loss_13": 3.243906044960022, |
| "ce_loss_17": 3.1534186005592346, |
| "ce_loss_2": 4.359775090217591, |
| "ce_loss_4": 4.074860966205597, |
| "ce_loss_9": 3.4959781050682066, |
| "epoch": 0.813, |
| "grad_norm": 892.0, |
| "kl_loss_13": 227.84734191894532, |
| "kl_loss_2": 2518.0587524414063, |
| "kl_loss_4": 1981.924560546875, |
| "kl_loss_9": 811.8103546142578, |
| "learning_rate": 8.548121372247918e-05, |
| "loss": 1397.716, |
| "step": 8130 |
| }, |
| { |
| "ce_loss_13": 3.3110806584358214, |
| "ce_loss_17": 3.2285948991775513, |
| "ce_loss_2": 4.383181643486023, |
| "ce_loss_4": 4.110422492027283, |
| "ce_loss_9": 3.54707248210907, |
| "epoch": 0.814, |
| "grad_norm": 872.0, |
| "kl_loss_13": 220.66705474853515, |
| "kl_loss_2": 2454.8108032226564, |
| "kl_loss_4": 1939.189111328125, |
| "kl_loss_9": 791.5809173583984, |
| "learning_rate": 8.459605164597267e-05, |
| "loss": 1358.9557, |
| "step": 8140 |
| }, |
| { |
| "ce_loss_13": 3.1948183178901672, |
| "ce_loss_17": 3.1102925896644593, |
| "ce_loss_2": 4.316994738578797, |
| "ce_loss_4": 4.033453631401062, |
| "ce_loss_9": 3.441008985042572, |
| "epoch": 0.815, |
| "grad_norm": 804.0, |
| "kl_loss_13": 219.61373138427734, |
| "kl_loss_2": 2517.217492675781, |
| "kl_loss_4": 1983.1307006835937, |
| "kl_loss_9": 801.1011535644532, |
| "learning_rate": 8.371507268261436e-05, |
| "loss": 1384.7771, |
| "step": 8150 |
| }, |
| { |
| "ce_loss_13": 3.2735339283943174, |
| "ce_loss_17": 3.184214413166046, |
| "ce_loss_2": 4.379142904281617, |
| "ce_loss_4": 4.1032023429870605, |
| "ce_loss_9": 3.5239558696746824, |
| "epoch": 0.816, |
| "grad_norm": 812.0, |
| "kl_loss_13": 224.8229133605957, |
| "kl_loss_2": 2489.7993896484377, |
| "kl_loss_4": 1971.6356506347656, |
| "kl_loss_9": 804.9994140625, |
| "learning_rate": 8.283828570385238e-05, |
| "loss": 1349.659, |
| "step": 8160 |
| }, |
| { |
| "ce_loss_13": 3.269688606262207, |
| "ce_loss_17": 3.1809231758117678, |
| "ce_loss_2": 4.383044624328614, |
| "ce_loss_4": 4.097042512893677, |
| "ce_loss_9": 3.5166628479957582, |
| "epoch": 0.817, |
| "grad_norm": 896.0, |
| "kl_loss_13": 223.29241104125975, |
| "kl_loss_2": 2474.5247314453127, |
| "kl_loss_4": 1941.5979431152343, |
| "kl_loss_9": 793.0740325927734, |
| "learning_rate": 8.196569953892202e-05, |
| "loss": 1366.3528, |
| "step": 8170 |
| }, |
| { |
| "ce_loss_13": 3.1958377599716186, |
| "ce_loss_17": 3.1082884073257446, |
| "ce_loss_2": 4.308591604232788, |
| "ce_loss_4": 4.025162780284882, |
| "ce_loss_9": 3.449683403968811, |
| "epoch": 0.818, |
| "grad_norm": 904.0, |
| "kl_loss_13": 222.01957092285156, |
| "kl_loss_2": 2486.2179443359373, |
| "kl_loss_4": 1958.6151123046875, |
| "kl_loss_9": 803.7994873046875, |
| "learning_rate": 8.109732297475635e-05, |
| "loss": 1362.1192, |
| "step": 8180 |
| }, |
| { |
| "ce_loss_13": 3.16897908449173, |
| "ce_loss_17": 3.0717229008674622, |
| "ce_loss_2": 4.355467438697815, |
| "ce_loss_4": 4.067920184135437, |
| "ce_loss_9": 3.4424399971961974, |
| "epoch": 0.819, |
| "grad_norm": 876.0, |
| "kl_loss_13": 232.51748809814453, |
| "kl_loss_2": 2628.5271484375, |
| "kl_loss_4": 2082.767059326172, |
| "kl_loss_9": 848.3743499755859, |
| "learning_rate": 8.023316475589754e-05, |
| "loss": 1419.9205, |
| "step": 8190 |
| }, |
| { |
| "ce_loss_13": 3.1390973687171937, |
| "ce_loss_17": 3.0419116258621215, |
| "ce_loss_2": 4.355739164352417, |
| "ce_loss_4": 4.064283812046051, |
| "ce_loss_9": 3.4155005693435667, |
| "epoch": 0.82, |
| "grad_norm": 1088.0, |
| "kl_loss_13": 239.16704864501952, |
| "kl_loss_2": 2712.7430053710937, |
| "kl_loss_4": 2158.0326416015623, |
| "kl_loss_9": 863.2716064453125, |
| "learning_rate": 7.937323358440934e-05, |
| "loss": 1446.035, |
| "step": 8200 |
| }, |
| { |
| "ce_loss_13": 3.2472149968147277, |
| "ce_loss_17": 3.16400808095932, |
| "ce_loss_2": 4.32280478477478, |
| "ce_loss_4": 4.044382894039154, |
| "ce_loss_9": 3.485769736766815, |
| "epoch": 0.821, |
| "grad_norm": 728.0, |
| "kl_loss_13": 219.56322479248047, |
| "kl_loss_2": 2439.0638916015623, |
| "kl_loss_4": 1912.4187866210937, |
| "kl_loss_9": 790.9305084228515, |
| "learning_rate": 7.851753811978923e-05, |
| "loss": 1361.6016, |
| "step": 8210 |
| }, |
| { |
| "ce_loss_13": 3.267391085624695, |
| "ce_loss_17": 3.178033375740051, |
| "ce_loss_2": 4.391184163093567, |
| "ce_loss_4": 4.105301785469055, |
| "ce_loss_9": 3.5145377993583677, |
| "epoch": 0.822, |
| "grad_norm": 872.0, |
| "kl_loss_13": 225.35082397460937, |
| "kl_loss_2": 2524.5203247070312, |
| "kl_loss_4": 1994.8364807128905, |
| "kl_loss_9": 805.8979400634765, |
| "learning_rate": 7.766608697888095e-05, |
| "loss": 1371.0406, |
| "step": 8220 |
| }, |
| { |
| "ce_loss_13": 3.2764451146125793, |
| "ce_loss_17": 3.1906116366386414, |
| "ce_loss_2": 4.38669867515564, |
| "ce_loss_4": 4.111961352825165, |
| "ce_loss_9": 3.523875415325165, |
| "epoch": 0.823, |
| "grad_norm": 852.0, |
| "kl_loss_13": 225.99702987670898, |
| "kl_loss_2": 2509.3898803710936, |
| "kl_loss_4": 1998.3305908203124, |
| "kl_loss_9": 808.0251861572266, |
| "learning_rate": 7.681888873578785e-05, |
| "loss": 1394.8252, |
| "step": 8230 |
| }, |
| { |
| "ce_loss_13": 3.2139707088470457, |
| "ce_loss_17": 3.1197495341300963, |
| "ce_loss_2": 4.354037642478943, |
| "ce_loss_4": 4.063400399684906, |
| "ce_loss_9": 3.47417665719986, |
| "epoch": 0.824, |
| "grad_norm": 820.0, |
| "kl_loss_13": 229.3338249206543, |
| "kl_loss_2": 2567.2235107421875, |
| "kl_loss_4": 2022.7681396484375, |
| "kl_loss_9": 824.1971496582031, |
| "learning_rate": 7.597595192178702e-05, |
| "loss": 1383.3944, |
| "step": 8240 |
| }, |
| { |
| "ce_loss_13": 3.2053810477256777, |
| "ce_loss_17": 3.116931641101837, |
| "ce_loss_2": 4.370664536952972, |
| "ce_loss_4": 4.077317404747009, |
| "ce_loss_9": 3.4695890665054323, |
| "epoch": 0.825, |
| "grad_norm": 868.0, |
| "kl_loss_13": 228.5384078979492, |
| "kl_loss_2": 2631.160754394531, |
| "kl_loss_4": 2076.3259338378907, |
| "kl_loss_9": 840.345751953125, |
| "learning_rate": 7.513728502524286e-05, |
| "loss": 1416.7643, |
| "step": 8250 |
| }, |
| { |
| "ce_loss_13": 3.2048859000205994, |
| "ce_loss_17": 3.123488414287567, |
| "ce_loss_2": 4.304041302204132, |
| "ce_loss_4": 4.0171948909759525, |
| "ce_loss_9": 3.4498552799224855, |
| "epoch": 0.826, |
| "grad_norm": 760.0, |
| "kl_loss_13": 213.62081069946288, |
| "kl_loss_2": 2462.559436035156, |
| "kl_loss_4": 1932.0973999023438, |
| "kl_loss_9": 782.9279968261719, |
| "learning_rate": 7.430289649152156e-05, |
| "loss": 1377.3469, |
| "step": 8260 |
| }, |
| { |
| "ce_loss_13": 3.120034670829773, |
| "ce_loss_17": 3.0287449955940247, |
| "ce_loss_2": 4.283819329738617, |
| "ce_loss_4": 3.9942748308181764, |
| "ce_loss_9": 3.379044306278229, |
| "epoch": 0.827, |
| "grad_norm": 908.0, |
| "kl_loss_13": 228.2209846496582, |
| "kl_loss_2": 2624.3792358398437, |
| "kl_loss_4": 2076.8033447265625, |
| "kl_loss_9": 839.2672546386718, |
| "learning_rate": 7.347279472290646e-05, |
| "loss": 1398.7959, |
| "step": 8270 |
| }, |
| { |
| "ce_loss_13": 3.251451086997986, |
| "ce_loss_17": 3.1642702221870422, |
| "ce_loss_2": 4.385140514373779, |
| "ce_loss_4": 4.100665700435639, |
| "ce_loss_9": 3.501319396495819, |
| "epoch": 0.828, |
| "grad_norm": 816.0, |
| "kl_loss_13": 222.71439895629882, |
| "kl_loss_2": 2534.687713623047, |
| "kl_loss_4": 2002.5374938964844, |
| "kl_loss_9": 808.1467224121094, |
| "learning_rate": 7.264698807851328e-05, |
| "loss": 1394.8232, |
| "step": 8280 |
| }, |
| { |
| "ce_loss_13": 3.227537226676941, |
| "ce_loss_17": 3.1441309571266176, |
| "ce_loss_2": 4.318798077106476, |
| "ce_loss_4": 4.038094973564148, |
| "ce_loss_9": 3.468658471107483, |
| "epoch": 0.829, |
| "grad_norm": 988.0, |
| "kl_loss_13": 216.05848541259766, |
| "kl_loss_2": 2457.3656616210938, |
| "kl_loss_4": 1931.4304809570312, |
| "kl_loss_9": 781.806396484375, |
| "learning_rate": 7.182548487420554e-05, |
| "loss": 1371.9726, |
| "step": 8290 |
| }, |
| { |
| "ce_loss_13": 3.274464964866638, |
| "ce_loss_17": 3.1883435964584352, |
| "ce_loss_2": 4.370679450035095, |
| "ce_loss_4": 4.0855323553085325, |
| "ce_loss_9": 3.5177146077156065, |
| "epoch": 0.83, |
| "grad_norm": 784.0, |
| "kl_loss_13": 224.5548988342285, |
| "kl_loss_2": 2499.8404541015625, |
| "kl_loss_4": 1961.8424438476563, |
| "kl_loss_9": 807.8698089599609, |
| "learning_rate": 7.100829338251146e-05, |
| "loss": 1365.4091, |
| "step": 8300 |
| }, |
| { |
| "ce_loss_13": 3.2095795631408692, |
| "ce_loss_17": 3.114107370376587, |
| "ce_loss_2": 4.352908825874328, |
| "ce_loss_4": 4.060082828998565, |
| "ce_loss_9": 3.4646241545677183, |
| "epoch": 0.831, |
| "grad_norm": 836.0, |
| "kl_loss_13": 230.04553756713867, |
| "kl_loss_2": 2555.5595703125, |
| "kl_loss_4": 2014.6021118164062, |
| "kl_loss_9": 825.6994018554688, |
| "learning_rate": 7.019542183254046e-05, |
| "loss": 1376.1705, |
| "step": 8310 |
| }, |
| { |
| "ce_loss_13": 3.25156090259552, |
| "ce_loss_17": 3.154491126537323, |
| "ce_loss_2": 4.344011974334717, |
| "ce_loss_4": 4.071551716327667, |
| "ce_loss_9": 3.4982887744903564, |
| "epoch": 0.832, |
| "grad_norm": 1096.0, |
| "kl_loss_13": 231.02550048828124, |
| "kl_loss_2": 2498.6554931640626, |
| "kl_loss_4": 1985.939434814453, |
| "kl_loss_9": 813.9911041259766, |
| "learning_rate": 6.938687840989971e-05, |
| "loss": 1377.9254, |
| "step": 8320 |
| }, |
| { |
| "ce_loss_13": 3.1894129037857057, |
| "ce_loss_17": 3.0963603377342226, |
| "ce_loss_2": 4.306365704536438, |
| "ce_loss_4": 4.032098889350891, |
| "ce_loss_9": 3.4420806527137757, |
| "epoch": 0.833, |
| "grad_norm": 852.0, |
| "kl_loss_13": 227.902921295166, |
| "kl_loss_2": 2507.4160766601562, |
| "kl_loss_4": 1986.3455688476563, |
| "kl_loss_9": 809.3701416015625, |
| "learning_rate": 6.858267125661271e-05, |
| "loss": 1404.0568, |
| "step": 8330 |
| }, |
| { |
| "ce_loss_13": 3.2466275453567506, |
| "ce_loss_17": 3.1548444390296937, |
| "ce_loss_2": 4.367550003528595, |
| "ce_loss_4": 4.073925399780274, |
| "ce_loss_9": 3.4978042125701903, |
| "epoch": 0.834, |
| "grad_norm": 1040.0, |
| "kl_loss_13": 222.23722076416016, |
| "kl_loss_2": 2513.7103271484375, |
| "kl_loss_4": 1971.091064453125, |
| "kl_loss_9": 805.3066009521484, |
| "learning_rate": 6.778280847103668e-05, |
| "loss": 1408.7881, |
| "step": 8340 |
| }, |
| { |
| "ce_loss_13": 3.252854609489441, |
| "ce_loss_17": 3.1623154640197755, |
| "ce_loss_2": 4.352147364616394, |
| "ce_loss_4": 4.074189913272858, |
| "ce_loss_9": 3.500877857208252, |
| "epoch": 0.835, |
| "grad_norm": 804.0, |
| "kl_loss_13": 226.42251586914062, |
| "kl_loss_2": 2507.483642578125, |
| "kl_loss_4": 1978.929998779297, |
| "kl_loss_9": 819.6830261230468, |
| "learning_rate": 6.698729810778065e-05, |
| "loss": 1376.1392, |
| "step": 8350 |
| }, |
| { |
| "ce_loss_13": 3.168547821044922, |
| "ce_loss_17": 3.0786109566688538, |
| "ce_loss_2": 4.308636355400085, |
| "ce_loss_4": 4.02109557390213, |
| "ce_loss_9": 3.4208256483078, |
| "epoch": 0.836, |
| "grad_norm": 984.0, |
| "kl_loss_13": 221.387215423584, |
| "kl_loss_2": 2536.461389160156, |
| "kl_loss_4": 1996.493182373047, |
| "kl_loss_9": 809.0911865234375, |
| "learning_rate": 6.619614817762538e-05, |
| "loss": 1396.3678, |
| "step": 8360 |
| }, |
| { |
| "ce_loss_13": 3.1370506405830385, |
| "ce_loss_17": 3.0419957399368287, |
| "ce_loss_2": 4.319327402114868, |
| "ce_loss_4": 4.035522627830505, |
| "ce_loss_9": 3.4083261847496034, |
| "epoch": 0.837, |
| "grad_norm": 772.0, |
| "kl_loss_13": 227.74701080322265, |
| "kl_loss_2": 2651.0900756835936, |
| "kl_loss_4": 2101.658233642578, |
| "kl_loss_9": 841.8978912353516, |
| "learning_rate": 6.540936664744196e-05, |
| "loss": 1416.3646, |
| "step": 8370 |
| }, |
| { |
| "ce_loss_13": 3.2723100662231444, |
| "ce_loss_17": 3.1818986177444457, |
| "ce_loss_2": 4.400793433189392, |
| "ce_loss_4": 4.1213273167610165, |
| "ce_loss_9": 3.5245197296142576, |
| "epoch": 0.838, |
| "grad_norm": 708.0, |
| "kl_loss_13": 224.46642837524413, |
| "kl_loss_2": 2521.49931640625, |
| "kl_loss_4": 1995.08037109375, |
| "kl_loss_9": 813.4250671386719, |
| "learning_rate": 6.462696144011149e-05, |
| "loss": 1373.2486, |
| "step": 8380 |
| }, |
| { |
| "ce_loss_13": 3.232816243171692, |
| "ce_loss_17": 3.1425030708312987, |
| "ce_loss_2": 4.322551119327545, |
| "ce_loss_4": 4.050675320625305, |
| "ce_loss_9": 3.482021701335907, |
| "epoch": 0.839, |
| "grad_norm": 776.0, |
| "kl_loss_13": 227.35297927856445, |
| "kl_loss_2": 2481.0372314453125, |
| "kl_loss_4": 1966.5402221679688, |
| "kl_loss_9": 817.0650329589844, |
| "learning_rate": 6.384894043444567e-05, |
| "loss": 1359.5428, |
| "step": 8390 |
| }, |
| { |
| "ce_loss_13": 3.258307361602783, |
| "ce_loss_17": 3.1695342540740965, |
| "ce_loss_2": 4.380703735351562, |
| "ce_loss_4": 4.0994639039039615, |
| "ce_loss_9": 3.5110024333000185, |
| "epoch": 0.84, |
| "grad_norm": 808.0, |
| "kl_loss_13": 223.17430267333984, |
| "kl_loss_2": 2517.5744384765626, |
| "kl_loss_4": 1986.7656555175781, |
| "kl_loss_9": 811.2584442138672, |
| "learning_rate": 6.307531146510753e-05, |
| "loss": 1371.5238, |
| "step": 8400 |
| }, |
| { |
| "ce_loss_13": 3.235944426059723, |
| "ce_loss_17": 3.146202564239502, |
| "ce_loss_2": 4.312396574020386, |
| "ce_loss_4": 4.040442144870758, |
| "ce_loss_9": 3.4761090755462645, |
| "epoch": 0.841, |
| "grad_norm": 952.0, |
| "kl_loss_13": 223.23860549926758, |
| "kl_loss_2": 2447.5295654296874, |
| "kl_loss_4": 1927.2985717773438, |
| "kl_loss_9": 797.4351684570313, |
| "learning_rate": 6.230608232253226e-05, |
| "loss": 1345.7262, |
| "step": 8410 |
| }, |
| { |
| "ce_loss_13": 3.1952862620353697, |
| "ce_loss_17": 3.105052101612091, |
| "ce_loss_2": 4.353400325775146, |
| "ce_loss_4": 4.072367346286773, |
| "ce_loss_9": 3.4637001037597654, |
| "epoch": 0.842, |
| "grad_norm": 792.0, |
| "kl_loss_13": 225.7907371520996, |
| "kl_loss_2": 2579.1862182617188, |
| "kl_loss_4": 2052.997265625, |
| "kl_loss_9": 836.5051239013671, |
| "learning_rate": 6.154126075284855e-05, |
| "loss": 1378.55, |
| "step": 8420 |
| }, |
| { |
| "ce_loss_13": 3.2807404160499574, |
| "ce_loss_17": 3.1929176211357118, |
| "ce_loss_2": 4.36737015247345, |
| "ce_loss_4": 4.086746263504028, |
| "ce_loss_9": 3.525919222831726, |
| "epoch": 0.843, |
| "grad_norm": 1040.0, |
| "kl_loss_13": 219.02406463623046, |
| "kl_loss_2": 2433.381689453125, |
| "kl_loss_4": 1912.8984985351562, |
| "kl_loss_9": 789.5070159912109, |
| "learning_rate": 6.078085445780129e-05, |
| "loss": 1340.2408, |
| "step": 8430 |
| }, |
| { |
| "ce_loss_13": 3.283352243900299, |
| "ce_loss_17": 3.198323404788971, |
| "ce_loss_2": 4.410041677951813, |
| "ce_loss_4": 4.130485701560974, |
| "ce_loss_9": 3.5314752221107484, |
| "epoch": 0.844, |
| "grad_norm": 872.0, |
| "kl_loss_13": 224.7509735107422, |
| "kl_loss_2": 2545.0581787109377, |
| "kl_loss_4": 2014.4204528808593, |
| "kl_loss_9": 810.4713958740234, |
| "learning_rate": 6.002487109467347e-05, |
| "loss": 1365.629, |
| "step": 8440 |
| }, |
| { |
| "ce_loss_13": 3.291152811050415, |
| "ce_loss_17": 3.2018171668052675, |
| "ce_loss_2": 4.384028792381287, |
| "ce_loss_4": 4.10359365940094, |
| "ce_loss_9": 3.5427115321159364, |
| "epoch": 0.845, |
| "grad_norm": 904.0, |
| "kl_loss_13": 229.13097381591797, |
| "kl_loss_2": 2494.240515136719, |
| "kl_loss_4": 1955.106201171875, |
| "kl_loss_9": 817.8092803955078, |
| "learning_rate": 5.927331827620902e-05, |
| "loss": 1367.1767, |
| "step": 8450 |
| }, |
| { |
| "ce_loss_13": 3.2765185475349425, |
| "ce_loss_17": 3.189631760120392, |
| "ce_loss_2": 4.343027925491333, |
| "ce_loss_4": 4.065610003471375, |
| "ce_loss_9": 3.518001842498779, |
| "epoch": 0.846, |
| "grad_norm": 896.0, |
| "kl_loss_13": 222.40250549316406, |
| "kl_loss_2": 2419.6857421875, |
| "kl_loss_4": 1898.617413330078, |
| "kl_loss_9": 790.1800750732422, |
| "learning_rate": 5.852620357053651e-05, |
| "loss": 1357.4576, |
| "step": 8460 |
| }, |
| { |
| "ce_loss_13": 3.3102288484573363, |
| "ce_loss_17": 3.228847789764404, |
| "ce_loss_2": 4.391223919391632, |
| "ce_loss_4": 4.115030658245087, |
| "ce_loss_9": 3.5545468807220457, |
| "epoch": 0.847, |
| "grad_norm": 820.0, |
| "kl_loss_13": 217.73869934082032, |
| "kl_loss_2": 2441.4995849609377, |
| "kl_loss_4": 1913.0333374023437, |
| "kl_loss_9": 789.9753662109375, |
| "learning_rate": 5.778353450109286e-05, |
| "loss": 1357.9863, |
| "step": 8470 |
| }, |
| { |
| "ce_loss_13": 3.351598834991455, |
| "ce_loss_17": 3.2602376341819763, |
| "ce_loss_2": 4.461829590797424, |
| "ce_loss_4": 4.1848254799842834, |
| "ce_loss_9": 3.5962899565696715, |
| "epoch": 0.848, |
| "grad_norm": 756.0, |
| "kl_loss_13": 225.3842903137207, |
| "kl_loss_2": 2503.073278808594, |
| "kl_loss_4": 1971.266375732422, |
| "kl_loss_9": 806.8426513671875, |
| "learning_rate": 5.7045318546547206e-05, |
| "loss": 1366.9471, |
| "step": 8480 |
| }, |
| { |
| "ce_loss_13": 3.245673930644989, |
| "ce_loss_17": 3.158996880054474, |
| "ce_loss_2": 4.370895767211914, |
| "ce_loss_4": 4.090060245990753, |
| "ce_loss_9": 3.4955820202827455, |
| "epoch": 0.849, |
| "grad_norm": 940.0, |
| "kl_loss_13": 222.43734130859374, |
| "kl_loss_2": 2538.139599609375, |
| "kl_loss_4": 2004.0116943359376, |
| "kl_loss_9": 809.4167785644531, |
| "learning_rate": 5.631156314072605e-05, |
| "loss": 1368.3263, |
| "step": 8490 |
| }, |
| { |
| "ce_loss_13": 3.2690833806991577, |
| "ce_loss_17": 3.179894721508026, |
| "ce_loss_2": 4.351716184616089, |
| "ce_loss_4": 4.0634235620498655, |
| "ce_loss_9": 3.5045432686805724, |
| "epoch": 0.85, |
| "grad_norm": 768.0, |
| "kl_loss_13": 221.4508071899414, |
| "kl_loss_2": 2457.1086181640626, |
| "kl_loss_4": 1923.715069580078, |
| "kl_loss_9": 787.1172882080078, |
| "learning_rate": 5.5582275672538315e-05, |
| "loss": 1350.1803, |
| "step": 8500 |
| }, |
| { |
| "ce_loss_13": 3.183105933666229, |
| "ce_loss_17": 3.093016564846039, |
| "ce_loss_2": 4.3546016931533815, |
| "ce_loss_4": 4.074822103977203, |
| "ce_loss_9": 3.454826259613037, |
| "epoch": 0.851, |
| "grad_norm": 1004.0, |
| "kl_loss_13": 229.16873321533203, |
| "kl_loss_2": 2609.3549072265623, |
| "kl_loss_4": 2078.700653076172, |
| "kl_loss_9": 840.963134765625, |
| "learning_rate": 5.4857463485900484e-05, |
| "loss": 1412.7694, |
| "step": 8510 |
| }, |
| { |
| "ce_loss_13": 3.244599390029907, |
| "ce_loss_17": 3.1563225984573364, |
| "ce_loss_2": 4.339552807807922, |
| "ce_loss_4": 4.055168533325196, |
| "ce_loss_9": 3.484923315048218, |
| "epoch": 0.852, |
| "grad_norm": 1032.0, |
| "kl_loss_13": 222.49315948486327, |
| "kl_loss_2": 2480.657080078125, |
| "kl_loss_4": 1947.7760437011718, |
| "kl_loss_9": 800.4620849609375, |
| "learning_rate": 5.413713387966329e-05, |
| "loss": 1366.182, |
| "step": 8520 |
| }, |
| { |
| "ce_loss_13": 3.2607167601585387, |
| "ce_loss_17": 3.171521472930908, |
| "ce_loss_2": 4.377828359603882, |
| "ce_loss_4": 4.098320472240448, |
| "ce_loss_9": 3.5070831775665283, |
| "epoch": 0.853, |
| "grad_norm": 1000.0, |
| "kl_loss_13": 222.3227279663086, |
| "kl_loss_2": 2512.6893920898438, |
| "kl_loss_4": 1987.333056640625, |
| "kl_loss_9": 800.6467498779297, |
| "learning_rate": 5.34212941075381e-05, |
| "loss": 1378.2313, |
| "step": 8530 |
| }, |
| { |
| "ce_loss_13": 3.2687427043914794, |
| "ce_loss_17": 3.1869537830352783, |
| "ce_loss_2": 4.355408132076263, |
| "ce_loss_4": 4.07498688697815, |
| "ce_loss_9": 3.5036257147789, |
| "epoch": 0.854, |
| "grad_norm": 848.0, |
| "kl_loss_13": 216.05119094848632, |
| "kl_loss_2": 2458.6519165039062, |
| "kl_loss_4": 1937.5098083496093, |
| "kl_loss_9": 773.2369079589844, |
| "learning_rate": 5.270995137802315e-05, |
| "loss": 1356.591, |
| "step": 8540 |
| }, |
| { |
| "ce_loss_13": 3.209310221672058, |
| "ce_loss_17": 3.1247020721435548, |
| "ce_loss_2": 4.317274355888367, |
| "ce_loss_4": 4.031463468074799, |
| "ce_loss_9": 3.4555763483047484, |
| "epoch": 0.855, |
| "grad_norm": 964.0, |
| "kl_loss_13": 220.22894515991212, |
| "kl_loss_2": 2512.912646484375, |
| "kl_loss_4": 1973.674462890625, |
| "kl_loss_9": 806.1905578613281, |
| "learning_rate": 5.2003112854332125e-05, |
| "loss": 1382.9368, |
| "step": 8550 |
| }, |
| { |
| "ce_loss_13": 3.2004865646362304, |
| "ce_loss_17": 3.1167020320892336, |
| "ce_loss_2": 4.29835809469223, |
| "ce_loss_4": 4.024637043476105, |
| "ce_loss_9": 3.4463637709617614, |
| "epoch": 0.856, |
| "grad_norm": 868.0, |
| "kl_loss_13": 217.68044509887696, |
| "kl_loss_2": 2484.987060546875, |
| "kl_loss_4": 1968.1286682128907, |
| "kl_loss_9": 797.999136352539, |
| "learning_rate": 5.130078565432089e-05, |
| "loss": 1350.2027, |
| "step": 8560 |
| }, |
| { |
| "ce_loss_13": 3.263746190071106, |
| "ce_loss_17": 3.1800696730613707, |
| "ce_loss_2": 4.344775664806366, |
| "ce_loss_4": 4.062954688072205, |
| "ce_loss_9": 3.500784087181091, |
| "epoch": 0.857, |
| "grad_norm": 840.0, |
| "kl_loss_13": 216.57264099121093, |
| "kl_loss_2": 2448.2782592773438, |
| "kl_loss_4": 1921.8090393066407, |
| "kl_loss_9": 782.9766845703125, |
| "learning_rate": 5.060297685041659e-05, |
| "loss": 1335.4072, |
| "step": 8570 |
| }, |
| { |
| "ce_loss_13": 3.201523411273956, |
| "ce_loss_17": 3.1115949749946594, |
| "ce_loss_2": 4.334869360923767, |
| "ce_loss_4": 4.045943570137024, |
| "ce_loss_9": 3.455605125427246, |
| "epoch": 0.858, |
| "grad_norm": 804.0, |
| "kl_loss_13": 226.6846145629883, |
| "kl_loss_2": 2553.7030029296875, |
| "kl_loss_4": 2018.593035888672, |
| "kl_loss_9": 815.6271911621094, |
| "learning_rate": 4.99096934695461e-05, |
| "loss": 1402.5301, |
| "step": 8580 |
| }, |
| { |
| "ce_loss_13": 3.261041224002838, |
| "ce_loss_17": 3.171911871433258, |
| "ce_loss_2": 4.365757715702057, |
| "ce_loss_4": 4.087458348274231, |
| "ce_loss_9": 3.507707965373993, |
| "epoch": 0.859, |
| "grad_norm": 900.0, |
| "kl_loss_13": 219.1694763183594, |
| "kl_loss_2": 2475.396032714844, |
| "kl_loss_4": 1955.4731750488281, |
| "kl_loss_9": 799.1766326904296, |
| "learning_rate": 4.922094249306558e-05, |
| "loss": 1349.7432, |
| "step": 8590 |
| }, |
| { |
| "ce_loss_13": 3.2894827365875243, |
| "ce_loss_17": 3.200601649284363, |
| "ce_loss_2": 4.407066583633423, |
| "ce_loss_4": 4.1291030764579775, |
| "ce_loss_9": 3.54045547246933, |
| "epoch": 0.86, |
| "grad_norm": 1032.0, |
| "kl_loss_13": 224.28192214965821, |
| "kl_loss_2": 2504.247998046875, |
| "kl_loss_4": 1977.733380126953, |
| "kl_loss_9": 804.361083984375, |
| "learning_rate": 4.853673085668947e-05, |
| "loss": 1352.1019, |
| "step": 8600 |
| }, |
| { |
| "ce_loss_13": 3.3038515567779543, |
| "ce_loss_17": 3.218389058113098, |
| "ce_loss_2": 4.410473251342774, |
| "ce_loss_4": 4.132902157306671, |
| "ce_loss_9": 3.5554654121398928, |
| "epoch": 0.861, |
| "grad_norm": 920.0, |
| "kl_loss_13": 220.55741958618165, |
| "kl_loss_2": 2503.7746459960936, |
| "kl_loss_4": 1971.676446533203, |
| "kl_loss_9": 799.8308197021485, |
| "learning_rate": 4.78570654504214e-05, |
| "loss": 1374.1394, |
| "step": 8610 |
| }, |
| { |
| "ce_loss_13": 3.2581624150276185, |
| "ce_loss_17": 3.168742263317108, |
| "ce_loss_2": 4.358493328094482, |
| "ce_loss_4": 4.085207974910736, |
| "ce_loss_9": 3.5058843731880187, |
| "epoch": 0.862, |
| "grad_norm": 892.0, |
| "kl_loss_13": 220.33161697387695, |
| "kl_loss_2": 2508.045520019531, |
| "kl_loss_4": 1978.0421142578125, |
| "kl_loss_9": 808.544076538086, |
| "learning_rate": 4.7181953118484556e-05, |
| "loss": 1374.3772, |
| "step": 8620 |
| }, |
| { |
| "ce_loss_13": 3.279995334148407, |
| "ce_loss_17": 3.1945241332054137, |
| "ce_loss_2": 4.369914996623993, |
| "ce_loss_4": 4.089016485214233, |
| "ce_loss_9": 3.5252942085266112, |
| "epoch": 0.863, |
| "grad_norm": 828.0, |
| "kl_loss_13": 219.30308151245117, |
| "kl_loss_2": 2431.4044799804688, |
| "kl_loss_4": 1906.2814880371093, |
| "kl_loss_9": 791.9886047363282, |
| "learning_rate": 4.651140065925269e-05, |
| "loss": 1378.6559, |
| "step": 8630 |
| }, |
| { |
| "ce_loss_13": 3.215870177745819, |
| "ce_loss_17": 3.1291185855865478, |
| "ce_loss_2": 4.319887256622314, |
| "ce_loss_4": 4.040187931060791, |
| "ce_loss_9": 3.4598678231239317, |
| "epoch": 0.864, |
| "grad_norm": 936.0, |
| "kl_loss_13": 221.21563720703125, |
| "kl_loss_2": 2501.269885253906, |
| "kl_loss_4": 1973.7274597167968, |
| "kl_loss_9": 794.120004272461, |
| "learning_rate": 4.58454148251814e-05, |
| "loss": 1387.3803, |
| "step": 8640 |
| }, |
| { |
| "ce_loss_13": 3.2270374059677125, |
| "ce_loss_17": 3.1369571685791016, |
| "ce_loss_2": 4.367625904083252, |
| "ce_loss_4": 4.087838864326477, |
| "ce_loss_9": 3.4836596488952636, |
| "epoch": 0.865, |
| "grad_norm": 880.0, |
| "kl_loss_13": 221.12657623291017, |
| "kl_loss_2": 2561.509228515625, |
| "kl_loss_4": 2022.5290649414062, |
| "kl_loss_9": 814.7313110351563, |
| "learning_rate": 4.518400232274078e-05, |
| "loss": 1382.3643, |
| "step": 8650 |
| }, |
| { |
| "ce_loss_13": 3.2557000517845154, |
| "ce_loss_17": 3.164890241622925, |
| "ce_loss_2": 4.360116624832154, |
| "ce_loss_4": 4.0815647602081295, |
| "ce_loss_9": 3.508148229122162, |
| "epoch": 0.866, |
| "grad_norm": 900.0, |
| "kl_loss_13": 222.03219909667968, |
| "kl_loss_2": 2485.7266479492187, |
| "kl_loss_4": 1953.6076049804688, |
| "kl_loss_9": 803.0507751464844, |
| "learning_rate": 4.452716981234745e-05, |
| "loss": 1337.3979, |
| "step": 8660 |
| }, |
| { |
| "ce_loss_13": 3.22855224609375, |
| "ce_loss_17": 3.142552173137665, |
| "ce_loss_2": 4.328233432769776, |
| "ce_loss_4": 4.0453173279762265, |
| "ce_loss_9": 3.474174642562866, |
| "epoch": 0.867, |
| "grad_norm": 728.0, |
| "kl_loss_13": 217.2768867492676, |
| "kl_loss_2": 2481.0482788085938, |
| "kl_loss_4": 1949.9904907226562, |
| "kl_loss_9": 793.4791473388672, |
| "learning_rate": 4.3874923908297335e-05, |
| "loss": 1340.4432, |
| "step": 8670 |
| }, |
| { |
| "ce_loss_13": 3.2772523283958437, |
| "ce_loss_17": 3.1918304562568665, |
| "ce_loss_2": 4.394083786010742, |
| "ce_loss_4": 4.117776727676391, |
| "ce_loss_9": 3.527587914466858, |
| "epoch": 0.868, |
| "grad_norm": 900.0, |
| "kl_loss_13": 225.22350311279297, |
| "kl_loss_2": 2528.2052978515626, |
| "kl_loss_4": 1999.0821472167968, |
| "kl_loss_9": 809.065219116211, |
| "learning_rate": 4.322727117869951e-05, |
| "loss": 1373.7883, |
| "step": 8680 |
| }, |
| { |
| "ce_loss_13": 3.2888386487960815, |
| "ce_loss_17": 3.2018653035163878, |
| "ce_loss_2": 4.402938723564148, |
| "ce_loss_4": 4.119437623023987, |
| "ce_loss_9": 3.538691055774689, |
| "epoch": 0.869, |
| "grad_norm": 840.0, |
| "kl_loss_13": 223.04277420043945, |
| "kl_loss_2": 2519.4125, |
| "kl_loss_4": 1982.2539611816405, |
| "kl_loss_9": 806.7682952880859, |
| "learning_rate": 4.2584218145409916e-05, |
| "loss": 1362.452, |
| "step": 8690 |
| }, |
| { |
| "ce_loss_13": 3.3250046491622927, |
| "ce_loss_17": 3.240601694583893, |
| "ce_loss_2": 4.394601058959961, |
| "ce_loss_4": 4.111003625392914, |
| "ce_loss_9": 3.55549818277359, |
| "epoch": 0.87, |
| "grad_norm": 872.0, |
| "kl_loss_13": 217.65272903442383, |
| "kl_loss_2": 2426.7046630859377, |
| "kl_loss_4": 1895.7978881835938, |
| "kl_loss_9": 776.6824890136719, |
| "learning_rate": 4.194577128396521e-05, |
| "loss": 1331.2094, |
| "step": 8700 |
| }, |
| { |
| "ce_loss_13": 3.2114205598831176, |
| "ce_loss_17": 3.125608801841736, |
| "ce_loss_2": 4.321311450004577, |
| "ce_loss_4": 4.031610798835755, |
| "ce_loss_9": 3.456533634662628, |
| "epoch": 0.871, |
| "grad_norm": 844.0, |
| "kl_loss_13": 216.6657241821289, |
| "kl_loss_2": 2510.70712890625, |
| "kl_loss_4": 1970.7602111816407, |
| "kl_loss_9": 790.9755737304688, |
| "learning_rate": 4.1311937023518264e-05, |
| "loss": 1387.0572, |
| "step": 8710 |
| }, |
| { |
| "ce_loss_13": 3.232364630699158, |
| "ce_loss_17": 3.148038387298584, |
| "ce_loss_2": 4.373801684379577, |
| "ce_loss_4": 4.097211575508117, |
| "ce_loss_9": 3.468158745765686, |
| "epoch": 0.872, |
| "grad_norm": 720.0, |
| "kl_loss_13": 211.67377624511718, |
| "kl_loss_2": 2562.7600952148437, |
| "kl_loss_4": 2052.1681518554688, |
| "kl_loss_9": 771.7417144775391, |
| "learning_rate": 4.0682721746773344e-05, |
| "loss": 1372.3551, |
| "step": 8720 |
| }, |
| { |
| "ce_loss_13": 3.102079999446869, |
| "ce_loss_17": 3.0143698692321776, |
| "ce_loss_2": 4.2561737418174745, |
| "ce_loss_4": 3.9776964664459227, |
| "ce_loss_9": 3.363087069988251, |
| "epoch": 0.873, |
| "grad_norm": 924.0, |
| "kl_loss_13": 219.80954208374024, |
| "kl_loss_2": 2556.7317504882812, |
| "kl_loss_4": 2028.6144714355469, |
| "kl_loss_9": 812.2474212646484, |
| "learning_rate": 4.0058131789920904e-05, |
| "loss": 1360.6398, |
| "step": 8730 |
| }, |
| { |
| "ce_loss_13": 3.248383116722107, |
| "ce_loss_17": 3.161784315109253, |
| "ce_loss_2": 4.343278455734253, |
| "ce_loss_4": 4.05727082490921, |
| "ce_loss_9": 3.497367000579834, |
| "epoch": 0.874, |
| "grad_norm": 844.0, |
| "kl_loss_13": 217.63532638549805, |
| "kl_loss_2": 2498.145330810547, |
| "kl_loss_4": 1953.8746948242188, |
| "kl_loss_9": 805.5346405029297, |
| "learning_rate": 3.9438173442575e-05, |
| "loss": 1406.1834, |
| "step": 8740 |
| }, |
| { |
| "ce_loss_13": 3.281778025627136, |
| "ce_loss_17": 3.196896195411682, |
| "ce_loss_2": 4.369906187057495, |
| "ce_loss_4": 4.090480637550354, |
| "ce_loss_9": 3.5244454741477966, |
| "epoch": 0.875, |
| "grad_norm": 868.0, |
| "kl_loss_13": 219.32987289428712, |
| "kl_loss_2": 2450.8204956054688, |
| "kl_loss_4": 1926.434814453125, |
| "kl_loss_9": 790.7694763183594, |
| "learning_rate": 3.882285294770937e-05, |
| "loss": 1355.4023, |
| "step": 8750 |
| }, |
| { |
| "ce_loss_13": 3.240086781978607, |
| "ce_loss_17": 3.1548407316207885, |
| "ce_loss_2": 4.320746099948883, |
| "ce_loss_4": 4.042723560333252, |
| "ce_loss_9": 3.4843781352043153, |
| "epoch": 0.876, |
| "grad_norm": 784.0, |
| "kl_loss_13": 218.81257400512695, |
| "kl_loss_2": 2455.314660644531, |
| "kl_loss_4": 1923.2606140136718, |
| "kl_loss_9": 790.0522674560547, |
| "learning_rate": 3.821217650159453e-05, |
| "loss": 1372.3299, |
| "step": 8760 |
| }, |
| { |
| "ce_loss_13": 3.120124650001526, |
| "ce_loss_17": 3.0304124116897584, |
| "ce_loss_2": 4.283944201469422, |
| "ce_loss_4": 3.992121458053589, |
| "ce_loss_9": 3.380226469039917, |
| "epoch": 0.877, |
| "grad_norm": 1016.0, |
| "kl_loss_13": 225.32861557006837, |
| "kl_loss_2": 2588.8431640625, |
| "kl_loss_4": 2045.7957092285155, |
| "kl_loss_9": 827.8635314941406, |
| "learning_rate": 3.760615025373543e-05, |
| "loss": 1390.5456, |
| "step": 8770 |
| }, |
| { |
| "ce_loss_13": 3.2935760855674743, |
| "ce_loss_17": 3.2032875776290894, |
| "ce_loss_2": 4.4244472742080685, |
| "ce_loss_4": 4.134840977191925, |
| "ce_loss_9": 3.5494131326675413, |
| "epoch": 0.878, |
| "grad_norm": 860.0, |
| "kl_loss_13": 230.5771156311035, |
| "kl_loss_2": 2556.13828125, |
| "kl_loss_4": 2006.4092956542968, |
| "kl_loss_9": 819.290087890625, |
| "learning_rate": 3.700478030680987e-05, |
| "loss": 1403.566, |
| "step": 8780 |
| }, |
| { |
| "ce_loss_13": 3.287939488887787, |
| "ce_loss_17": 3.2011399269104004, |
| "ce_loss_2": 4.388074493408203, |
| "ce_loss_4": 4.11102727651596, |
| "ce_loss_9": 3.533255708217621, |
| "epoch": 0.879, |
| "grad_norm": 948.0, |
| "kl_loss_13": 219.78177642822266, |
| "kl_loss_2": 2485.790423583984, |
| "kl_loss_4": 1970.4736206054688, |
| "kl_loss_9": 799.7017120361328, |
| "learning_rate": 3.6408072716606344e-05, |
| "loss": 1363.0672, |
| "step": 8790 |
| }, |
| { |
| "ce_loss_13": 3.2155078053474426, |
| "ce_loss_17": 3.1282681703567503, |
| "ce_loss_2": 4.348038697242737, |
| "ce_loss_4": 4.072812831401825, |
| "ce_loss_9": 3.4732912302017214, |
| "epoch": 0.88, |
| "grad_norm": 860.0, |
| "kl_loss_13": 224.64749984741212, |
| "kl_loss_2": 2551.444091796875, |
| "kl_loss_4": 2021.2731201171875, |
| "kl_loss_9": 820.0504577636718, |
| "learning_rate": 3.5816033491963716e-05, |
| "loss": 1417.9936, |
| "step": 8800 |
| }, |
| { |
| "ce_loss_13": 3.0825191020965574, |
| "ce_loss_17": 2.9929514050483705, |
| "ce_loss_2": 4.254282987117767, |
| "ce_loss_4": 3.964124834537506, |
| "ce_loss_9": 3.3385504484176636, |
| "epoch": 0.881, |
| "grad_norm": 1120.0, |
| "kl_loss_13": 220.25969619750975, |
| "kl_loss_2": 2613.3452026367186, |
| "kl_loss_4": 2069.54580078125, |
| "kl_loss_9": 813.1832702636718, |
| "learning_rate": 3.522866859471047e-05, |
| "loss": 1398.5448, |
| "step": 8810 |
| }, |
| { |
| "ce_loss_13": 3.3029095411300657, |
| "ce_loss_17": 3.219867527484894, |
| "ce_loss_2": 4.354422855377197, |
| "ce_loss_4": 4.080214440822601, |
| "ce_loss_9": 3.5335610270500184, |
| "epoch": 0.882, |
| "grad_norm": 956.0, |
| "kl_loss_13": 212.41907119750977, |
| "kl_loss_2": 2378.804553222656, |
| "kl_loss_4": 1868.8552673339843, |
| "kl_loss_9": 765.5652740478515, |
| "learning_rate": 3.46459839396045e-05, |
| "loss": 1339.9922, |
| "step": 8820 |
| }, |
| { |
| "ce_loss_13": 3.2258768677711487, |
| "ce_loss_17": 3.1318113446235656, |
| "ce_loss_2": 4.353494000434876, |
| "ce_loss_4": 4.058047711849213, |
| "ce_loss_9": 3.482666862010956, |
| "epoch": 0.883, |
| "grad_norm": 872.0, |
| "kl_loss_13": 224.29499282836915, |
| "kl_loss_2": 2510.57294921875, |
| "kl_loss_4": 1963.8523803710937, |
| "kl_loss_9": 807.906283569336, |
| "learning_rate": 3.406798539427386e-05, |
| "loss": 1405.065, |
| "step": 8830 |
| }, |
| { |
| "ce_loss_13": 3.2840797901153564, |
| "ce_loss_17": 3.196672427654266, |
| "ce_loss_2": 4.386753487586975, |
| "ce_loss_4": 4.109513545036316, |
| "ce_loss_9": 3.5331713199615478, |
| "epoch": 0.884, |
| "grad_norm": 924.0, |
| "kl_loss_13": 221.75431213378906, |
| "kl_loss_2": 2507.6088134765623, |
| "kl_loss_4": 1989.5658813476562, |
| "kl_loss_9": 803.8614959716797, |
| "learning_rate": 3.349467877915746e-05, |
| "loss": 1380.0859, |
| "step": 8840 |
| }, |
| { |
| "ce_loss_13": 3.244016110897064, |
| "ce_loss_17": 3.1553622484207153, |
| "ce_loss_2": 4.372105979919434, |
| "ce_loss_4": 4.091294658184052, |
| "ce_loss_9": 3.4970618724822997, |
| "epoch": 0.885, |
| "grad_norm": 1072.0, |
| "kl_loss_13": 223.21151428222657, |
| "kl_loss_2": 2563.3557739257812, |
| "kl_loss_4": 2030.5114196777345, |
| "kl_loss_9": 818.3362365722656, |
| "learning_rate": 3.292606986744667e-05, |
| "loss": 1425.2906, |
| "step": 8850 |
| }, |
| { |
| "ce_loss_13": 3.199081063270569, |
| "ce_loss_17": 3.117172658443451, |
| "ce_loss_2": 4.32207328081131, |
| "ce_loss_4": 4.0382969856262205, |
| "ce_loss_9": 3.4497790217399595, |
| "epoch": 0.886, |
| "grad_norm": 800.0, |
| "kl_loss_13": 216.29458923339843, |
| "kl_loss_2": 2513.0485717773436, |
| "kl_loss_4": 1982.37470703125, |
| "kl_loss_9": 806.0659027099609, |
| "learning_rate": 3.23621643850267e-05, |
| "loss": 1377.8373, |
| "step": 8860 |
| }, |
| { |
| "ce_loss_13": 3.27376788854599, |
| "ce_loss_17": 3.18737016916275, |
| "ce_loss_2": 4.3785954236984255, |
| "ce_loss_4": 4.0984990954399105, |
| "ce_loss_9": 3.5208308696746826, |
| "epoch": 0.887, |
| "grad_norm": 1024.0, |
| "kl_loss_13": 226.7053695678711, |
| "kl_loss_2": 2506.8286010742186, |
| "kl_loss_4": 1970.566473388672, |
| "kl_loss_9": 810.0457061767578, |
| "learning_rate": 3.180296801041971e-05, |
| "loss": 1359.2627, |
| "step": 8870 |
| }, |
| { |
| "ce_loss_13": 3.297605562210083, |
| "ce_loss_17": 3.213636100292206, |
| "ce_loss_2": 4.41242778301239, |
| "ce_loss_4": 4.132280993461609, |
| "ce_loss_9": 3.5384950041770935, |
| "epoch": 0.888, |
| "grad_norm": 776.0, |
| "kl_loss_13": 217.77302780151368, |
| "kl_loss_2": 2514.273815917969, |
| "kl_loss_4": 1988.1409912109375, |
| "kl_loss_9": 791.9225494384766, |
| "learning_rate": 3.124848637472688e-05, |
| "loss": 1348.5748, |
| "step": 8880 |
| }, |
| { |
| "ce_loss_13": 3.1249602675437926, |
| "ce_loss_17": 3.038655388355255, |
| "ce_loss_2": 4.25794825553894, |
| "ce_loss_4": 3.9747861862182616, |
| "ce_loss_9": 3.377835726737976, |
| "epoch": 0.889, |
| "grad_norm": 908.0, |
| "kl_loss_13": 215.64293670654297, |
| "kl_loss_2": 2538.662951660156, |
| "kl_loss_4": 1997.264208984375, |
| "kl_loss_9": 806.4836547851562, |
| "learning_rate": 3.069872506157212e-05, |
| "loss": 1370.8979, |
| "step": 8890 |
| }, |
| { |
| "ce_loss_13": 3.2268425703048704, |
| "ce_loss_17": 3.139546525478363, |
| "ce_loss_2": 4.328174114227295, |
| "ce_loss_4": 4.051902306079865, |
| "ce_loss_9": 3.4705706357955934, |
| "epoch": 0.89, |
| "grad_norm": 864.0, |
| "kl_loss_13": 219.3784065246582, |
| "kl_loss_2": 2484.1745483398436, |
| "kl_loss_4": 1963.5433288574218, |
| "kl_loss_9": 792.9700897216796, |
| "learning_rate": 3.0153689607045842e-05, |
| "loss": 1362.3307, |
| "step": 8900 |
| }, |
| { |
| "ce_loss_13": 3.131894898414612, |
| "ce_loss_17": 3.0421868801116942, |
| "ce_loss_2": 4.3142383098602295, |
| "ce_loss_4": 4.024949514865876, |
| "ce_loss_9": 3.40098876953125, |
| "epoch": 0.891, |
| "grad_norm": 948.0, |
| "kl_loss_13": 227.655126953125, |
| "kl_loss_2": 2664.9869873046873, |
| "kl_loss_4": 2111.8084228515627, |
| "kl_loss_9": 853.6656219482422, |
| "learning_rate": 2.9613385499648926e-05, |
| "loss": 1397.1209, |
| "step": 8910 |
| }, |
| { |
| "ce_loss_13": 3.1831221699714662, |
| "ce_loss_17": 3.094726359844208, |
| "ce_loss_2": 4.289004158973694, |
| "ce_loss_4": 4.005497312545776, |
| "ce_loss_9": 3.4327184557914734, |
| "epoch": 0.892, |
| "grad_norm": 840.0, |
| "kl_loss_13": 220.87451782226563, |
| "kl_loss_2": 2474.5325561523437, |
| "kl_loss_4": 1944.5530700683594, |
| "kl_loss_9": 793.0059173583984, |
| "learning_rate": 2.9077818180237692e-05, |
| "loss": 1373.02, |
| "step": 8920 |
| }, |
| { |
| "ce_loss_13": 3.2247784972190856, |
| "ce_loss_17": 3.1343048095703123, |
| "ce_loss_2": 4.358565402030945, |
| "ce_loss_4": 4.070203065872192, |
| "ce_loss_9": 3.4758506655693053, |
| "epoch": 0.893, |
| "grad_norm": 964.0, |
| "kl_loss_13": 220.11781692504883, |
| "kl_loss_2": 2512.887255859375, |
| "kl_loss_4": 1977.3384033203124, |
| "kl_loss_9": 798.7517913818359, |
| "learning_rate": 2.8546993041969172e-05, |
| "loss": 1367.4203, |
| "step": 8930 |
| }, |
| { |
| "ce_loss_13": 3.258907437324524, |
| "ce_loss_17": 3.1736387848854064, |
| "ce_loss_2": 4.330030989646912, |
| "ce_loss_4": 4.055886971950531, |
| "ce_loss_9": 3.497764265537262, |
| "epoch": 0.894, |
| "grad_norm": 768.0, |
| "kl_loss_13": 217.61174850463868, |
| "kl_loss_2": 2434.3168701171876, |
| "kl_loss_4": 1915.8539978027343, |
| "kl_loss_9": 785.0666015625, |
| "learning_rate": 2.802091543024671e-05, |
| "loss": 1363.4322, |
| "step": 8940 |
| }, |
| { |
| "ce_loss_13": 3.2544283986091616, |
| "ce_loss_17": 3.1665090680122376, |
| "ce_loss_2": 4.377725553512573, |
| "ce_loss_4": 4.09741724729538, |
| "ce_loss_9": 3.506710946559906, |
| "epoch": 0.895, |
| "grad_norm": 856.0, |
| "kl_loss_13": 222.79181671142578, |
| "kl_loss_2": 2539.9704711914064, |
| "kl_loss_4": 2010.047119140625, |
| "kl_loss_9": 809.6312438964844, |
| "learning_rate": 2.7499590642665774e-05, |
| "loss": 1412.3726, |
| "step": 8950 |
| }, |
| { |
| "ce_loss_13": 3.271846294403076, |
| "ce_loss_17": 3.180848681926727, |
| "ce_loss_2": 4.377647531032562, |
| "ce_loss_4": 4.096516931056977, |
| "ce_loss_9": 3.515015959739685, |
| "epoch": 0.896, |
| "grad_norm": 888.0, |
| "kl_loss_13": 226.96068572998047, |
| "kl_loss_2": 2509.722302246094, |
| "kl_loss_4": 1982.2886291503905, |
| "kl_loss_9": 803.804476928711, |
| "learning_rate": 2.6983023928961405e-05, |
| "loss": 1362.3409, |
| "step": 8960 |
| }, |
| { |
| "ce_loss_13": 3.237930691242218, |
| "ce_loss_17": 3.150544786453247, |
| "ce_loss_2": 4.342291474342346, |
| "ce_loss_4": 4.0727537870407104, |
| "ce_loss_9": 3.489503836631775, |
| "epoch": 0.897, |
| "grad_norm": 880.0, |
| "kl_loss_13": 220.3332389831543, |
| "kl_loss_2": 2464.066516113281, |
| "kl_loss_4": 1957.343194580078, |
| "kl_loss_9": 800.6205108642578, |
| "learning_rate": 2.6471220490954628e-05, |
| "loss": 1385.0533, |
| "step": 8970 |
| }, |
| { |
| "ce_loss_13": 3.2273140072822573, |
| "ce_loss_17": 3.1436033964157106, |
| "ce_loss_2": 4.331751799583435, |
| "ce_loss_4": 4.053059315681457, |
| "ce_loss_9": 3.4650877833366396, |
| "epoch": 0.898, |
| "grad_norm": 920.0, |
| "kl_loss_13": 217.11159133911133, |
| "kl_loss_2": 2491.9383178710937, |
| "kl_loss_4": 1966.174005126953, |
| "kl_loss_9": 784.9631195068359, |
| "learning_rate": 2.596418548250029e-05, |
| "loss": 1373.9889, |
| "step": 8980 |
| }, |
| { |
| "ce_loss_13": 3.26533043384552, |
| "ce_loss_17": 3.1777626395225527, |
| "ce_loss_2": 4.362241911888122, |
| "ce_loss_4": 4.079789578914642, |
| "ce_loss_9": 3.50988028049469, |
| "epoch": 0.899, |
| "grad_norm": 832.0, |
| "kl_loss_13": 223.80094146728516, |
| "kl_loss_2": 2511.8578369140623, |
| "kl_loss_4": 1985.0419189453125, |
| "kl_loss_9": 808.9386138916016, |
| "learning_rate": 2.5461924009435368e-05, |
| "loss": 1363.7965, |
| "step": 8990 |
| }, |
| { |
| "ce_loss_13": 3.2584426641464233, |
| "ce_loss_17": 3.170464050769806, |
| "ce_loss_2": 4.365917778015136, |
| "ce_loss_4": 4.080835664272309, |
| "ce_loss_9": 3.5044830322265623, |
| "epoch": 0.9, |
| "grad_norm": 776.0, |
| "kl_loss_13": 222.38669052124024, |
| "kl_loss_2": 2491.651513671875, |
| "kl_loss_4": 1949.7656127929688, |
| "kl_loss_9": 800.291958618164, |
| "learning_rate": 2.4964441129527336e-05, |
| "loss": 1386.1471, |
| "step": 9000 |
| }, |
| { |
| "ce_loss_13": 3.258763921260834, |
| "ce_loss_17": 3.1775803685188295, |
| "ce_loss_2": 4.334868860244751, |
| "ce_loss_4": 4.053112626075745, |
| "ce_loss_9": 3.498238742351532, |
| "epoch": 0.901, |
| "grad_norm": 936.0, |
| "kl_loss_13": 217.06993560791017, |
| "kl_loss_2": 2442.2597045898438, |
| "kl_loss_4": 1914.3587280273437, |
| "kl_loss_9": 785.4851593017578, |
| "learning_rate": 2.4471741852423235e-05, |
| "loss": 1342.6739, |
| "step": 9010 |
| }, |
| { |
| "ce_loss_13": 3.307252860069275, |
| "ce_loss_17": 3.2175087571144103, |
| "ce_loss_2": 4.393547511100769, |
| "ce_loss_4": 4.127389025688172, |
| "ce_loss_9": 3.5532113671302796, |
| "epoch": 0.902, |
| "grad_norm": 892.0, |
| "kl_loss_13": 219.54621124267578, |
| "kl_loss_2": 2442.2252197265625, |
| "kl_loss_4": 1936.2179260253906, |
| "kl_loss_9": 787.6474151611328, |
| "learning_rate": 2.3983831139599287e-05, |
| "loss": 1355.5996, |
| "step": 9020 |
| }, |
| { |
| "ce_loss_13": 3.2304179310798644, |
| "ce_loss_17": 3.141878080368042, |
| "ce_loss_2": 4.324336528778076, |
| "ce_loss_4": 4.049396026134491, |
| "ce_loss_9": 3.464998996257782, |
| "epoch": 0.903, |
| "grad_norm": 772.0, |
| "kl_loss_13": 216.9328285217285, |
| "kl_loss_2": 2453.4583251953127, |
| "kl_loss_4": 1939.1372924804687, |
| "kl_loss_9": 775.8932067871094, |
| "learning_rate": 2.3500713904311022e-05, |
| "loss": 1327.4785, |
| "step": 9030 |
| }, |
| { |
| "ce_loss_13": 3.2645386457443237, |
| "ce_loss_17": 3.1836945056915282, |
| "ce_loss_2": 4.3366811037063595, |
| "ce_loss_4": 4.058149003982544, |
| "ce_loss_9": 3.4976337909698487, |
| "epoch": 0.904, |
| "grad_norm": 860.0, |
| "kl_loss_13": 211.90866928100587, |
| "kl_loss_2": 2398.1073608398438, |
| "kl_loss_4": 1880.5147827148437, |
| "kl_loss_9": 765.6016906738281, |
| "learning_rate": 2.3022395011543685e-05, |
| "loss": 1329.9283, |
| "step": 9040 |
| }, |
| { |
| "ce_loss_13": 3.295250749588013, |
| "ce_loss_17": 3.2060691475868226, |
| "ce_loss_2": 4.393528008460999, |
| "ce_loss_4": 4.109420108795166, |
| "ce_loss_9": 3.5465949892997743, |
| "epoch": 0.905, |
| "grad_norm": 908.0, |
| "kl_loss_13": 227.4729362487793, |
| "kl_loss_2": 2492.628625488281, |
| "kl_loss_4": 1953.9336853027344, |
| "kl_loss_9": 813.4953216552734, |
| "learning_rate": 2.2548879277963063e-05, |
| "loss": 1392.64, |
| "step": 9050 |
| }, |
| { |
| "ce_loss_13": 3.2123307824134826, |
| "ce_loss_17": 3.1253671050071716, |
| "ce_loss_2": 4.304782915115356, |
| "ce_loss_4": 4.023476386070252, |
| "ce_loss_9": 3.4541374683380126, |
| "epoch": 0.906, |
| "grad_norm": 848.0, |
| "kl_loss_13": 218.5086555480957, |
| "kl_loss_2": 2472.942431640625, |
| "kl_loss_4": 1937.4415344238282, |
| "kl_loss_9": 791.0092132568359, |
| "learning_rate": 2.208017147186736e-05, |
| "loss": 1329.1565, |
| "step": 9060 |
| }, |
| { |
| "ce_loss_13": 3.207584524154663, |
| "ce_loss_17": 3.1224289894104005, |
| "ce_loss_2": 4.305971288681031, |
| "ce_loss_4": 4.0261146068573, |
| "ce_loss_9": 3.4547170996665955, |
| "epoch": 0.907, |
| "grad_norm": 844.0, |
| "kl_loss_13": 218.99121551513673, |
| "kl_loss_2": 2480.3300659179686, |
| "kl_loss_4": 1956.8550170898438, |
| "kl_loss_9": 797.4085205078125, |
| "learning_rate": 2.1616276313139227e-05, |
| "loss": 1348.7938, |
| "step": 9070 |
| }, |
| { |
| "ce_loss_13": 3.2467629432678224, |
| "ce_loss_17": 3.156818318367004, |
| "ce_loss_2": 4.34942741394043, |
| "ce_loss_4": 4.068715739250183, |
| "ce_loss_9": 3.4923762559890745, |
| "epoch": 0.908, |
| "grad_norm": 812.0, |
| "kl_loss_13": 220.7992935180664, |
| "kl_loss_2": 2487.8225708007812, |
| "kl_loss_4": 1961.7897155761718, |
| "kl_loss_9": 797.899771118164, |
| "learning_rate": 2.1157198473197415e-05, |
| "loss": 1377.2855, |
| "step": 9080 |
| }, |
| { |
| "ce_loss_13": 3.310531508922577, |
| "ce_loss_17": 3.2205408573150636, |
| "ce_loss_2": 4.418992137908935, |
| "ce_loss_4": 4.135834491252899, |
| "ce_loss_9": 3.557993221282959, |
| "epoch": 0.909, |
| "grad_norm": 984.0, |
| "kl_loss_13": 223.41179656982422, |
| "kl_loss_2": 2488.6339111328125, |
| "kl_loss_4": 1961.6606994628905, |
| "kl_loss_9": 807.566064453125, |
| "learning_rate": 2.0702942574950812e-05, |
| "loss": 1367.1679, |
| "step": 9090 |
| }, |
| { |
| "ce_loss_13": 3.241121828556061, |
| "ce_loss_17": 3.149674820899963, |
| "ce_loss_2": 4.353479814529419, |
| "ce_loss_4": 4.066814351081848, |
| "ce_loss_9": 3.490784764289856, |
| "epoch": 0.91, |
| "grad_norm": 772.0, |
| "kl_loss_13": 227.33242950439453, |
| "kl_loss_2": 2519.5287353515623, |
| "kl_loss_4": 1983.0399780273438, |
| "kl_loss_9": 817.3186950683594, |
| "learning_rate": 2.025351319275137e-05, |
| "loss": 1377.3342, |
| "step": 9100 |
| }, |
| { |
| "ce_loss_13": 3.360267686843872, |
| "ce_loss_17": 3.271786260604858, |
| "ce_loss_2": 4.444746088981629, |
| "ce_loss_4": 4.173572850227356, |
| "ce_loss_9": 3.604773426055908, |
| "epoch": 0.911, |
| "grad_norm": 772.0, |
| "kl_loss_13": 227.0474723815918, |
| "kl_loss_2": 2495.2429931640627, |
| "kl_loss_4": 1971.7109985351562, |
| "kl_loss_9": 816.9932159423828, |
| "learning_rate": 1.9808914852347816e-05, |
| "loss": 1403.5953, |
| "step": 9110 |
| }, |
| { |
| "ce_loss_13": 3.2066776037216185, |
| "ce_loss_17": 3.116776716709137, |
| "ce_loss_2": 4.309818959236145, |
| "ce_loss_4": 4.031574809551239, |
| "ce_loss_9": 3.458123779296875, |
| "epoch": 0.912, |
| "grad_norm": 840.0, |
| "kl_loss_13": 220.9835105895996, |
| "kl_loss_2": 2473.516748046875, |
| "kl_loss_4": 1946.9725708007813, |
| "kl_loss_9": 800.2813110351562, |
| "learning_rate": 1.9369152030840554e-05, |
| "loss": 1354.2768, |
| "step": 9120 |
| }, |
| { |
| "ce_loss_13": 3.283405137062073, |
| "ce_loss_17": 3.1993649005889893, |
| "ce_loss_2": 4.39392511844635, |
| "ce_loss_4": 4.115605187416077, |
| "ce_loss_9": 3.5309956789016725, |
| "epoch": 0.913, |
| "grad_norm": 936.0, |
| "kl_loss_13": 218.62859497070312, |
| "kl_loss_2": 2510.861865234375, |
| "kl_loss_4": 1982.0250427246094, |
| "kl_loss_9": 797.3995391845704, |
| "learning_rate": 1.893422915663645e-05, |
| "loss": 1370.2637, |
| "step": 9130 |
| }, |
| { |
| "ce_loss_13": 3.161654829978943, |
| "ce_loss_17": 3.0701867938041687, |
| "ce_loss_2": 4.318281030654907, |
| "ce_loss_4": 4.032570040225982, |
| "ce_loss_9": 3.420887851715088, |
| "epoch": 0.914, |
| "grad_norm": 896.0, |
| "kl_loss_13": 223.37092361450195, |
| "kl_loss_2": 2574.8150390625, |
| "kl_loss_4": 2028.6739135742187, |
| "kl_loss_9": 820.965786743164, |
| "learning_rate": 1.850415060940386e-05, |
| "loss": 1400.192, |
| "step": 9140 |
| }, |
| { |
| "ce_loss_13": 3.2818647265434264, |
| "ce_loss_17": 3.1991522789001463, |
| "ce_loss_2": 4.354596889019012, |
| "ce_loss_4": 4.082476079463959, |
| "ce_loss_9": 3.5242899537086485, |
| "epoch": 0.915, |
| "grad_norm": 796.0, |
| "kl_loss_13": 219.54589080810547, |
| "kl_loss_2": 2434.888146972656, |
| "kl_loss_4": 1912.6146606445313, |
| "kl_loss_9": 789.3105773925781, |
| "learning_rate": 1.8078920720028978e-05, |
| "loss": 1358.1771, |
| "step": 9150 |
| }, |
| { |
| "ce_loss_13": 3.203986310958862, |
| "ce_loss_17": 3.1224583268165587, |
| "ce_loss_2": 4.28719732761383, |
| "ce_loss_4": 4.0050101518630985, |
| "ce_loss_9": 3.446657347679138, |
| "epoch": 0.916, |
| "grad_norm": 844.0, |
| "kl_loss_13": 213.93195190429688, |
| "kl_loss_2": 2434.3916870117187, |
| "kl_loss_4": 1904.3346374511718, |
| "kl_loss_9": 783.1549621582031, |
| "learning_rate": 1.765854377057219e-05, |
| "loss": 1366.1328, |
| "step": 9160 |
| }, |
| { |
| "ce_loss_13": 3.1830304622650147, |
| "ce_loss_17": 3.102406632900238, |
| "ce_loss_2": 4.286171793937683, |
| "ce_loss_4": 4.006797409057617, |
| "ce_loss_9": 3.425233840942383, |
| "epoch": 0.917, |
| "grad_norm": 776.0, |
| "kl_loss_13": 213.46029663085938, |
| "kl_loss_2": 2463.6358032226562, |
| "kl_loss_4": 1937.9644714355468, |
| "kl_loss_9": 781.2274444580078, |
| "learning_rate": 1.724302399422456e-05, |
| "loss": 1360.2615, |
| "step": 9170 |
| }, |
| { |
| "ce_loss_13": 3.1548372507095337, |
| "ce_loss_17": 3.064271068572998, |
| "ce_loss_2": 4.2698247075080875, |
| "ce_loss_4": 3.984210562705994, |
| "ce_loss_9": 3.403843379020691, |
| "epoch": 0.918, |
| "grad_norm": 744.0, |
| "kl_loss_13": 225.4613235473633, |
| "kl_loss_2": 2519.0862426757812, |
| "kl_loss_4": 1979.069891357422, |
| "kl_loss_9": 812.0192260742188, |
| "learning_rate": 1.683236557526574e-05, |
| "loss": 1380.3875, |
| "step": 9180 |
| }, |
| { |
| "ce_loss_13": 3.2605535626411437, |
| "ce_loss_17": 3.1784393906593325, |
| "ce_loss_2": 4.314115810394287, |
| "ce_loss_4": 4.0400647759437565, |
| "ce_loss_9": 3.4913466453552244, |
| "epoch": 0.919, |
| "grad_norm": 852.0, |
| "kl_loss_13": 211.61532745361328, |
| "kl_loss_2": 2384.9400146484377, |
| "kl_loss_4": 1868.9087707519532, |
| "kl_loss_9": 765.3319274902344, |
| "learning_rate": 1.6426572649021475e-05, |
| "loss": 1348.5619, |
| "step": 9190 |
| }, |
| { |
| "ce_loss_13": 3.292950749397278, |
| "ce_loss_17": 3.209618389606476, |
| "ce_loss_2": 4.344734001159668, |
| "ce_loss_4": 4.065221893787384, |
| "ce_loss_9": 3.5231752753257752, |
| "epoch": 0.92, |
| "grad_norm": 1064.0, |
| "kl_loss_13": 217.45301513671876, |
| "kl_loss_2": 2404.6033203125, |
| "kl_loss_4": 1887.4710510253906, |
| "kl_loss_9": 777.3987243652343, |
| "learning_rate": 1.6025649301821876e-05, |
| "loss": 1341.4621, |
| "step": 9200 |
| }, |
| { |
| "ce_loss_13": 3.282778263092041, |
| "ce_loss_17": 3.199418640136719, |
| "ce_loss_2": 4.34298689365387, |
| "ce_loss_4": 4.066406810283661, |
| "ce_loss_9": 3.524368369579315, |
| "epoch": 0.921, |
| "grad_norm": 868.0, |
| "kl_loss_13": 220.49258193969726, |
| "kl_loss_2": 2426.205041503906, |
| "kl_loss_4": 1902.8577941894532, |
| "kl_loss_9": 792.9719604492187, |
| "learning_rate": 1.5629599570960716e-05, |
| "loss": 1334.749, |
| "step": 9210 |
| }, |
| { |
| "ce_loss_13": 3.192858934402466, |
| "ce_loss_17": 3.110598695278168, |
| "ce_loss_2": 4.315822863578797, |
| "ce_loss_4": 4.025082242488861, |
| "ce_loss_9": 3.4394363284111025, |
| "epoch": 0.922, |
| "grad_norm": 996.0, |
| "kl_loss_13": 218.6992645263672, |
| "kl_loss_2": 2533.305139160156, |
| "kl_loss_4": 1987.8855529785155, |
| "kl_loss_9": 798.0518951416016, |
| "learning_rate": 1.5238427444654367e-05, |
| "loss": 1369.4425, |
| "step": 9220 |
| }, |
| { |
| "ce_loss_13": 3.2471844077110292, |
| "ce_loss_17": 3.1605598092079163, |
| "ce_loss_2": 4.34227591753006, |
| "ce_loss_4": 4.059690332412719, |
| "ce_loss_9": 3.486121213436127, |
| "epoch": 0.923, |
| "grad_norm": 876.0, |
| "kl_loss_13": 217.1166961669922, |
| "kl_loss_2": 2463.8434814453126, |
| "kl_loss_4": 1934.2061889648437, |
| "kl_loss_9": 778.2934112548828, |
| "learning_rate": 1.4852136862001764e-05, |
| "loss": 1348.692, |
| "step": 9230 |
| }, |
| { |
| "ce_loss_13": 3.2150325059890745, |
| "ce_loss_17": 3.1286561131477355, |
| "ce_loss_2": 4.292143750190735, |
| "ce_loss_4": 4.015335857868195, |
| "ce_loss_9": 3.462070846557617, |
| "epoch": 0.924, |
| "grad_norm": 868.0, |
| "kl_loss_13": 215.59997634887696, |
| "kl_loss_2": 2433.2443969726564, |
| "kl_loss_4": 1908.5103088378905, |
| "kl_loss_9": 786.3178131103516, |
| "learning_rate": 1.4470731712944884e-05, |
| "loss": 1361.9221, |
| "step": 9240 |
| }, |
| { |
| "ce_loss_13": 3.2384039402008056, |
| "ce_loss_17": 3.147601532936096, |
| "ce_loss_2": 4.349691724777221, |
| "ce_loss_4": 4.0618557095527645, |
| "ce_loss_9": 3.4858252882957457, |
| "epoch": 0.925, |
| "grad_norm": 792.0, |
| "kl_loss_13": 223.06068496704103, |
| "kl_loss_2": 2493.9858642578124, |
| "kl_loss_4": 1946.7096862792969, |
| "kl_loss_9": 800.909213256836, |
| "learning_rate": 1.4094215838229174e-05, |
| "loss": 1393.5494, |
| "step": 9250 |
| }, |
| { |
| "ce_loss_13": 3.208323061466217, |
| "ce_loss_17": 3.1210061430931093, |
| "ce_loss_2": 4.335579466819763, |
| "ce_loss_4": 4.04714937210083, |
| "ce_loss_9": 3.456245946884155, |
| "epoch": 0.926, |
| "grad_norm": 1032.0, |
| "kl_loss_13": 221.0323944091797, |
| "kl_loss_2": 2529.4101318359376, |
| "kl_loss_4": 1984.0954528808593, |
| "kl_loss_9": 801.6336364746094, |
| "learning_rate": 1.372259302936546e-05, |
| "loss": 1420.2388, |
| "step": 9260 |
| }, |
| { |
| "ce_loss_13": 3.3168997526168824, |
| "ce_loss_17": 3.228820037841797, |
| "ce_loss_2": 4.411512446403504, |
| "ce_loss_4": 4.128126418590545, |
| "ce_loss_9": 3.561565411090851, |
| "epoch": 0.927, |
| "grad_norm": 764.0, |
| "kl_loss_13": 227.0028160095215, |
| "kl_loss_2": 2471.144421386719, |
| "kl_loss_4": 1944.900213623047, |
| "kl_loss_9": 807.1984619140625, |
| "learning_rate": 1.3355867028591206e-05, |
| "loss": 1354.3068, |
| "step": 9270 |
| }, |
| { |
| "ce_loss_13": 3.220912754535675, |
| "ce_loss_17": 3.134995758533478, |
| "ce_loss_2": 4.297907853126526, |
| "ce_loss_4": 4.008646047115326, |
| "ce_loss_9": 3.4564685106277464, |
| "epoch": 0.928, |
| "grad_norm": 836.0, |
| "kl_loss_13": 216.133447265625, |
| "kl_loss_2": 2450.6377075195314, |
| "kl_loss_4": 1906.2369812011718, |
| "kl_loss_9": 784.589974975586, |
| "learning_rate": 1.2994041528833267e-05, |
| "loss": 1343.272, |
| "step": 9280 |
| }, |
| { |
| "ce_loss_13": 3.2189253091812136, |
| "ce_loss_17": 3.1319851636886598, |
| "ce_loss_2": 4.312949919700623, |
| "ce_loss_4": 4.031126940250397, |
| "ce_loss_9": 3.4616688013076784, |
| "epoch": 0.929, |
| "grad_norm": 872.0, |
| "kl_loss_13": 216.1491668701172, |
| "kl_loss_2": 2479.8517333984373, |
| "kl_loss_4": 1955.3857971191405, |
| "kl_loss_9": 789.3241943359375, |
| "learning_rate": 1.2637120173670358e-05, |
| "loss": 1355.4605, |
| "step": 9290 |
| }, |
| { |
| "ce_loss_13": 3.2423638701438904, |
| "ce_loss_17": 3.155144190788269, |
| "ce_loss_2": 4.356745862960816, |
| "ce_loss_4": 4.0728073716163635, |
| "ce_loss_9": 3.4955561876297, |
| "epoch": 0.93, |
| "grad_norm": 864.0, |
| "kl_loss_13": 220.93303298950195, |
| "kl_loss_2": 2495.88125, |
| "kl_loss_4": 1962.6788696289063, |
| "kl_loss_9": 806.3583404541016, |
| "learning_rate": 1.2285106557296478e-05, |
| "loss": 1369.5115, |
| "step": 9300 |
| }, |
| { |
| "ce_loss_13": 3.129614543914795, |
| "ce_loss_17": 3.041240561008453, |
| "ce_loss_2": 4.303548383712768, |
| "ce_loss_4": 4.019749534130097, |
| "ce_loss_9": 3.392469036579132, |
| "epoch": 0.931, |
| "grad_norm": 940.0, |
| "kl_loss_13": 220.8660446166992, |
| "kl_loss_2": 2617.67294921875, |
| "kl_loss_4": 2080.0368469238283, |
| "kl_loss_9": 824.7458038330078, |
| "learning_rate": 1.1938004224484989e-05, |
| "loss": 1396.6688, |
| "step": 9310 |
| }, |
| { |
| "ce_loss_13": 3.3512013792991637, |
| "ce_loss_17": 3.261004054546356, |
| "ce_loss_2": 4.4421525478363035, |
| "ce_loss_4": 4.151990175247192, |
| "ce_loss_9": 3.592440891265869, |
| "epoch": 0.932, |
| "grad_norm": 820.0, |
| "kl_loss_13": 222.4288772583008, |
| "kl_loss_2": 2481.216760253906, |
| "kl_loss_4": 1938.7572082519532, |
| "kl_loss_9": 799.0105590820312, |
| "learning_rate": 1.1595816670552429e-05, |
| "loss": 1383.4453, |
| "step": 9320 |
| }, |
| { |
| "ce_loss_13": 3.2786450386047363, |
| "ce_loss_17": 3.1929323077201843, |
| "ce_loss_2": 4.3666028618812565, |
| "ce_loss_4": 4.081855010986328, |
| "ce_loss_9": 3.5156550288200377, |
| "epoch": 0.933, |
| "grad_norm": 772.0, |
| "kl_loss_13": 218.9696357727051, |
| "kl_loss_2": 2451.6491455078126, |
| "kl_loss_4": 1919.7008422851563, |
| "kl_loss_9": 783.2412811279297, |
| "learning_rate": 1.1258547341323699e-05, |
| "loss": 1341.8271, |
| "step": 9330 |
| }, |
| { |
| "ce_loss_13": 3.3116394519805907, |
| "ce_loss_17": 3.22735196352005, |
| "ce_loss_2": 4.391518211364746, |
| "ce_loss_4": 4.112160420417785, |
| "ce_loss_9": 3.5525638699531554, |
| "epoch": 0.934, |
| "grad_norm": 868.0, |
| "kl_loss_13": 221.42182998657228, |
| "kl_loss_2": 2471.0857421875, |
| "kl_loss_4": 1943.7613952636718, |
| "kl_loss_9": 798.0545623779296, |
| "learning_rate": 1.0926199633097156e-05, |
| "loss": 1353.4176, |
| "step": 9340 |
| }, |
| { |
| "ce_loss_13": 3.31710399389267, |
| "ce_loss_17": 3.234906792640686, |
| "ce_loss_2": 4.368254041671753, |
| "ce_loss_4": 4.090949869155883, |
| "ce_loss_9": 3.5452964305877686, |
| "epoch": 0.935, |
| "grad_norm": 720.0, |
| "kl_loss_13": 214.74539947509766, |
| "kl_loss_2": 2421.686389160156, |
| "kl_loss_4": 1893.8810913085938, |
| "kl_loss_9": 772.0395233154297, |
| "learning_rate": 1.0598776892610684e-05, |
| "loss": 1365.94, |
| "step": 9350 |
| }, |
| { |
| "ce_loss_13": 3.1336665034294127, |
| "ce_loss_17": 3.0514662861824036, |
| "ce_loss_2": 4.25325882434845, |
| "ce_loss_4": 3.961105000972748, |
| "ce_loss_9": 3.383448898792267, |
| "epoch": 0.936, |
| "grad_norm": 1008.0, |
| "kl_loss_13": 214.9184326171875, |
| "kl_loss_2": 2502.357287597656, |
| "kl_loss_4": 1959.0135498046875, |
| "kl_loss_9": 794.4308959960938, |
| "learning_rate": 1.0276282417007399e-05, |
| "loss": 1355.0353, |
| "step": 9360 |
| }, |
| { |
| "ce_loss_13": 3.28331344127655, |
| "ce_loss_17": 3.203270697593689, |
| "ce_loss_2": 4.341977858543396, |
| "ce_loss_4": 4.070633184909821, |
| "ce_loss_9": 3.5182706475257874, |
| "epoch": 0.937, |
| "grad_norm": 840.0, |
| "kl_loss_13": 213.5865608215332, |
| "kl_loss_2": 2408.081994628906, |
| "kl_loss_4": 1888.2426147460938, |
| "kl_loss_9": 772.2623779296875, |
| "learning_rate": 9.958719453803277e-06, |
| "loss": 1340.2713, |
| "step": 9370 |
| }, |
| { |
| "ce_loss_13": 3.2833734512329102, |
| "ce_loss_17": 3.1941158294677736, |
| "ce_loss_2": 4.380245876312256, |
| "ce_loss_4": 4.104355025291443, |
| "ce_loss_9": 3.5337594151496887, |
| "epoch": 0.938, |
| "grad_norm": 856.0, |
| "kl_loss_13": 221.2907485961914, |
| "kl_loss_2": 2475.372705078125, |
| "kl_loss_4": 1955.619805908203, |
| "kl_loss_9": 807.4383697509766, |
| "learning_rate": 9.646091200853802e-06, |
| "loss": 1351.0312, |
| "step": 9380 |
| }, |
| { |
| "ce_loss_13": 3.2392139196395875, |
| "ce_loss_17": 3.1552943110466005, |
| "ce_loss_2": 4.324875295162201, |
| "ce_loss_4": 4.042110526561737, |
| "ce_loss_9": 3.48607794046402, |
| "epoch": 0.939, |
| "grad_norm": 740.0, |
| "kl_loss_13": 215.70135192871095, |
| "kl_loss_2": 2434.798132324219, |
| "kl_loss_4": 1903.2385864257812, |
| "kl_loss_9": 787.5908813476562, |
| "learning_rate": 9.338400806321978e-06, |
| "loss": 1310.8646, |
| "step": 9390 |
| }, |
| { |
| "ce_loss_13": 3.2766632795333863, |
| "ce_loss_17": 3.1855878233909607, |
| "ce_loss_2": 4.352747321128845, |
| "ce_loss_4": 4.075416874885559, |
| "ce_loss_9": 3.5220303654670717, |
| "epoch": 0.94, |
| "grad_norm": 824.0, |
| "kl_loss_13": 223.0373176574707, |
| "kl_loss_2": 2438.11748046875, |
| "kl_loss_4": 1921.560565185547, |
| "kl_loss_9": 796.8032958984375, |
| "learning_rate": 9.035651368646646e-06, |
| "loss": 1343.7436, |
| "step": 9400 |
| }, |
| { |
| "ce_loss_13": 3.2748085379600527, |
| "ce_loss_17": 3.1926278591156008, |
| "ce_loss_2": 4.352897191047669, |
| "ce_loss_4": 4.070467281341553, |
| "ce_loss_9": 3.511702334880829, |
| "epoch": 0.941, |
| "grad_norm": 800.0, |
| "kl_loss_13": 213.71628036499024, |
| "kl_loss_2": 2437.5661499023436, |
| "kl_loss_4": 1910.470458984375, |
| "kl_loss_9": 780.2021423339844, |
| "learning_rate": 8.737845936511335e-06, |
| "loss": 1350.9834, |
| "step": 9410 |
| }, |
| { |
| "ce_loss_13": 3.2305777192115785, |
| "ce_loss_17": 3.140793776512146, |
| "ce_loss_2": 4.344888091087341, |
| "ce_loss_4": 4.065187382698059, |
| "ce_loss_9": 3.4761541366577147, |
| "epoch": 0.942, |
| "grad_norm": 856.0, |
| "kl_loss_13": 222.2299057006836, |
| "kl_loss_2": 2521.6388671875, |
| "kl_loss_4": 1988.6979187011718, |
| "kl_loss_9": 794.2424591064453, |
| "learning_rate": 8.444987508813451e-06, |
| "loss": 1365.4251, |
| "step": 9420 |
| }, |
| { |
| "ce_loss_13": 3.1864752054214476, |
| "ce_loss_17": 3.0988988518714904, |
| "ce_loss_2": 4.331013536453247, |
| "ce_loss_4": 4.044015240669251, |
| "ce_loss_9": 3.446845054626465, |
| "epoch": 0.943, |
| "grad_norm": 804.0, |
| "kl_loss_13": 225.6263671875, |
| "kl_loss_2": 2597.182080078125, |
| "kl_loss_4": 2049.704754638672, |
| "kl_loss_9": 835.3397613525391, |
| "learning_rate": 8.157079034633974e-06, |
| "loss": 1396.5229, |
| "step": 9430 |
| }, |
| { |
| "ce_loss_13": 3.1807761549949647, |
| "ce_loss_17": 3.0956114530563354, |
| "ce_loss_2": 4.287228143215179, |
| "ce_loss_4": 4.004971146583557, |
| "ce_loss_9": 3.427367401123047, |
| "epoch": 0.944, |
| "grad_norm": 808.0, |
| "kl_loss_13": 217.57715759277343, |
| "kl_loss_2": 2506.3245361328127, |
| "kl_loss_4": 1977.0880737304688, |
| "kl_loss_9": 801.5355834960938, |
| "learning_rate": 7.874123413208145e-06, |
| "loss": 1356.4012, |
| "step": 9440 |
| }, |
| { |
| "ce_loss_13": 3.15591778755188, |
| "ce_loss_17": 3.0687134623527528, |
| "ce_loss_2": 4.2906004071235655, |
| "ce_loss_4": 4.001482820510864, |
| "ce_loss_9": 3.4126017570495604, |
| "epoch": 0.945, |
| "grad_norm": 852.0, |
| "kl_loss_13": 218.69328231811522, |
| "kl_loss_2": 2545.3590576171873, |
| "kl_loss_4": 1996.5914123535156, |
| "kl_loss_9": 813.08896484375, |
| "learning_rate": 7.59612349389599e-06, |
| "loss": 1381.4177, |
| "step": 9450 |
| }, |
| { |
| "ce_loss_13": 3.2464738368988035, |
| "ce_loss_17": 3.1593940377235414, |
| "ce_loss_2": 4.31714437007904, |
| "ce_loss_4": 4.03574823141098, |
| "ce_loss_9": 3.483457398414612, |
| "epoch": 0.946, |
| "grad_norm": 1144.0, |
| "kl_loss_13": 216.57987289428712, |
| "kl_loss_2": 2417.8890869140623, |
| "kl_loss_4": 1881.8531005859375, |
| "kl_loss_9": 772.4367126464844, |
| "learning_rate": 7.323082076153509e-06, |
| "loss": 1342.7811, |
| "step": 9460 |
| }, |
| { |
| "ce_loss_13": 3.290072965621948, |
| "ce_loss_17": 3.2043086171150206, |
| "ce_loss_2": 4.35761536359787, |
| "ce_loss_4": 4.079579699039459, |
| "ce_loss_9": 3.525270438194275, |
| "epoch": 0.947, |
| "grad_norm": 828.0, |
| "kl_loss_13": 222.43314056396486, |
| "kl_loss_2": 2422.739221191406, |
| "kl_loss_4": 1899.7501037597656, |
| "kl_loss_9": 787.5891754150391, |
| "learning_rate": 7.055001909504755e-06, |
| "loss": 1368.9857, |
| "step": 9470 |
| }, |
| { |
| "ce_loss_13": 3.316695213317871, |
| "ce_loss_17": 3.2308751583099364, |
| "ce_loss_2": 4.395706987380981, |
| "ce_loss_4": 4.11409410238266, |
| "ce_loss_9": 3.5532686829566957, |
| "epoch": 0.948, |
| "grad_norm": 872.0, |
| "kl_loss_13": 220.38413467407227, |
| "kl_loss_2": 2458.068798828125, |
| "kl_loss_4": 1920.4984375, |
| "kl_loss_9": 788.3192443847656, |
| "learning_rate": 6.791885693514133e-06, |
| "loss": 1356.5563, |
| "step": 9480 |
| }, |
| { |
| "ce_loss_13": 3.225104308128357, |
| "ce_loss_17": 3.1385900259017943, |
| "ce_loss_2": 4.344547700881958, |
| "ce_loss_4": 4.062541484832764, |
| "ce_loss_9": 3.4725778698921204, |
| "epoch": 0.949, |
| "grad_norm": 868.0, |
| "kl_loss_13": 220.89653701782225, |
| "kl_loss_2": 2532.2171630859375, |
| "kl_loss_4": 2000.7470275878907, |
| "kl_loss_9": 803.1367370605469, |
| "learning_rate": 6.533736077758867e-06, |
| "loss": 1381.3528, |
| "step": 9490 |
| }, |
| { |
| "ce_loss_13": 3.195958375930786, |
| "ce_loss_17": 3.1049342274665834, |
| "ce_loss_2": 4.34363625049591, |
| "ce_loss_4": 4.0638914704322815, |
| "ce_loss_9": 3.4523710131645204, |
| "epoch": 0.95, |
| "grad_norm": 924.0, |
| "kl_loss_13": 224.56532516479493, |
| "kl_loss_2": 2583.4022705078123, |
| "kl_loss_4": 2049.2861755371096, |
| "kl_loss_9": 820.3523315429687, |
| "learning_rate": 6.2805556618028556e-06, |
| "loss": 1385.2922, |
| "step": 9500 |
| }, |
| { |
| "ce_loss_13": 3.2737756252288817, |
| "ce_loss_17": 3.1905980467796327, |
| "ce_loss_2": 4.346153998374939, |
| "ce_loss_4": 4.062958240509033, |
| "ce_loss_9": 3.501893973350525, |
| "epoch": 0.951, |
| "grad_norm": 756.0, |
| "kl_loss_13": 211.4181007385254, |
| "kl_loss_2": 2411.5949829101564, |
| "kl_loss_4": 1884.6334045410156, |
| "kl_loss_9": 759.334814453125, |
| "learning_rate": 6.032346995169968e-06, |
| "loss": 1304.0113, |
| "step": 9510 |
| }, |
| { |
| "ce_loss_13": 3.277216684818268, |
| "ce_loss_17": 3.1925328850746153, |
| "ce_loss_2": 4.362728118896484, |
| "ce_loss_4": 4.0835275888443, |
| "ce_loss_9": 3.517595410346985, |
| "epoch": 0.952, |
| "grad_norm": 756.0, |
| "kl_loss_13": 215.97168045043946, |
| "kl_loss_2": 2453.770556640625, |
| "kl_loss_4": 1925.393780517578, |
| "kl_loss_9": 784.8690155029296, |
| "learning_rate": 5.789112577318789e-06, |
| "loss": 1340.7672, |
| "step": 9520 |
| }, |
| { |
| "ce_loss_13": 3.2610395193099975, |
| "ce_loss_17": 3.1749332785606383, |
| "ce_loss_2": 4.374293923377991, |
| "ce_loss_4": 4.093641686439514, |
| "ce_loss_9": 3.5034366726875303, |
| "epoch": 0.953, |
| "grad_norm": 768.0, |
| "kl_loss_13": 221.60036849975586, |
| "kl_loss_2": 2517.109814453125, |
| "kl_loss_4": 1983.563525390625, |
| "kl_loss_9": 802.1741821289063, |
| "learning_rate": 5.550854857617194e-06, |
| "loss": 1350.7597, |
| "step": 9530 |
| }, |
| { |
| "ce_loss_13": 3.245774734020233, |
| "ce_loss_17": 3.1594863533973694, |
| "ce_loss_2": 4.378055286407471, |
| "ce_loss_4": 4.090924561023712, |
| "ce_loss_9": 3.4971926689147947, |
| "epoch": 0.954, |
| "grad_norm": 1032.0, |
| "kl_loss_13": 224.5074562072754, |
| "kl_loss_2": 2542.746142578125, |
| "kl_loss_4": 2002.8309814453125, |
| "kl_loss_9": 810.5063690185547, |
| "learning_rate": 5.317576235317756e-06, |
| "loss": 1382.2507, |
| "step": 9540 |
| }, |
| { |
| "ce_loss_13": 3.2691051363945007, |
| "ce_loss_17": 3.184979832172394, |
| "ce_loss_2": 4.333749771118164, |
| "ce_loss_4": 4.049982392787934, |
| "ce_loss_9": 3.504191827774048, |
| "epoch": 0.955, |
| "grad_norm": 1416.0, |
| "kl_loss_13": 213.8662338256836, |
| "kl_loss_2": 2390.7116088867188, |
| "kl_loss_4": 1865.9178161621094, |
| "kl_loss_9": 766.6866821289062, |
| "learning_rate": 5.089279059533658e-06, |
| "loss": 1358.693, |
| "step": 9550 |
| }, |
| { |
| "ce_loss_13": 3.325921416282654, |
| "ce_loss_17": 3.2375960826873778, |
| "ce_loss_2": 4.396740531921386, |
| "ce_loss_4": 4.120703685283661, |
| "ce_loss_9": 3.5712178111076356, |
| "epoch": 0.956, |
| "grad_norm": 908.0, |
| "kl_loss_13": 226.58722991943358, |
| "kl_loss_2": 2454.7302978515627, |
| "kl_loss_4": 1924.9460327148438, |
| "kl_loss_9": 807.0700164794922, |
| "learning_rate": 4.865965629214819e-06, |
| "loss": 1346.2179, |
| "step": 9560 |
| }, |
| { |
| "ce_loss_13": 3.270737099647522, |
| "ce_loss_17": 3.181360864639282, |
| "ce_loss_2": 4.3722082614898685, |
| "ce_loss_4": 4.084476697444916, |
| "ce_loss_9": 3.512242543697357, |
| "epoch": 0.957, |
| "grad_norm": 940.0, |
| "kl_loss_13": 222.20181045532226, |
| "kl_loss_2": 2516.9514770507812, |
| "kl_loss_4": 1977.9654907226563, |
| "kl_loss_9": 808.6686187744141, |
| "learning_rate": 4.6476381931251366e-06, |
| "loss": 1345.7714, |
| "step": 9570 |
| }, |
| { |
| "ce_loss_13": 3.253694784641266, |
| "ce_loss_17": 3.167990577220917, |
| "ce_loss_2": 4.342315924167633, |
| "ce_loss_4": 4.059697329998016, |
| "ce_loss_9": 3.5023301005363465, |
| "epoch": 0.958, |
| "grad_norm": 820.0, |
| "kl_loss_13": 217.87476501464843, |
| "kl_loss_2": 2446.7124877929687, |
| "kl_loss_4": 1912.6610778808595, |
| "kl_loss_9": 786.8750885009765, |
| "learning_rate": 4.434298949819449e-06, |
| "loss": 1350.8592, |
| "step": 9580 |
| }, |
| { |
| "ce_loss_13": 3.220312249660492, |
| "ce_loss_17": 3.129145085811615, |
| "ce_loss_2": 4.362578868865967, |
| "ce_loss_4": 4.074148535728455, |
| "ce_loss_9": 3.4757197976112364, |
| "epoch": 0.959, |
| "grad_norm": 932.0, |
| "kl_loss_13": 227.9875503540039, |
| "kl_loss_2": 2605.30185546875, |
| "kl_loss_4": 2053.5823303222655, |
| "kl_loss_9": 832.5057647705078, |
| "learning_rate": 4.2259500476214406e-06, |
| "loss": 1394.9221, |
| "step": 9590 |
| }, |
| { |
| "ce_loss_13": 3.195894491672516, |
| "ce_loss_17": 3.108951759338379, |
| "ce_loss_2": 4.311345505714416, |
| "ce_loss_4": 4.030492007732391, |
| "ce_loss_9": 3.440891969203949, |
| "epoch": 0.96, |
| "grad_norm": 792.0, |
| "kl_loss_13": 219.76137084960936, |
| "kl_loss_2": 2522.520458984375, |
| "kl_loss_4": 1995.4986633300782, |
| "kl_loss_9": 807.3765380859375, |
| "learning_rate": 4.02259358460233e-06, |
| "loss": 1364.3726, |
| "step": 9600 |
| }, |
| { |
| "ce_loss_13": 3.2646095156669617, |
| "ce_loss_17": 3.178682732582092, |
| "ce_loss_2": 4.3500265836715695, |
| "ce_loss_4": 4.0639472842216495, |
| "ce_loss_9": 3.508467698097229, |
| "epoch": 0.961, |
| "grad_norm": 796.0, |
| "kl_loss_13": 220.3709243774414, |
| "kl_loss_2": 2447.066760253906, |
| "kl_loss_4": 1905.499090576172, |
| "kl_loss_9": 785.2954406738281, |
| "learning_rate": 3.8242316085594916e-06, |
| "loss": 1336.9375, |
| "step": 9610 |
| }, |
| { |
| "ce_loss_13": 3.1539738535881043, |
| "ce_loss_17": 3.066152548789978, |
| "ce_loss_2": 4.322041165828705, |
| "ce_loss_4": 4.033271777629852, |
| "ce_loss_9": 3.414609456062317, |
| "epoch": 0.962, |
| "grad_norm": 844.0, |
| "kl_loss_13": 226.12157135009767, |
| "kl_loss_2": 2629.878662109375, |
| "kl_loss_4": 2085.311846923828, |
| "kl_loss_9": 830.1753509521484, |
| "learning_rate": 3.630866116995757e-06, |
| "loss": 1418.5467, |
| "step": 9620 |
| }, |
| { |
| "ce_loss_13": 3.303531062602997, |
| "ce_loss_17": 3.2193968892097473, |
| "ce_loss_2": 4.374594783782959, |
| "ce_loss_4": 4.0948406457901, |
| "ce_loss_9": 3.5384915828704835, |
| "epoch": 0.963, |
| "grad_norm": 812.0, |
| "kl_loss_13": 218.21690216064454, |
| "kl_loss_2": 2434.9108276367188, |
| "kl_loss_4": 1912.481134033203, |
| "kl_loss_9": 776.3510528564453, |
| "learning_rate": 3.4424990570994797e-06, |
| "loss": 1374.0029, |
| "step": 9630 |
| }, |
| { |
| "ce_loss_13": 3.294644820690155, |
| "ce_loss_17": 3.2087340116500855, |
| "ce_loss_2": 4.366245722770691, |
| "ce_loss_4": 4.085958552360535, |
| "ce_loss_9": 3.5307599902153015, |
| "epoch": 0.964, |
| "grad_norm": 736.0, |
| "kl_loss_13": 216.86040496826172, |
| "kl_loss_2": 2445.8479858398437, |
| "kl_loss_4": 1920.2136352539062, |
| "kl_loss_9": 783.8744293212891, |
| "learning_rate": 3.2591323257248896e-06, |
| "loss": 1348.4049, |
| "step": 9640 |
| }, |
| { |
| "ce_loss_13": 3.145173895359039, |
| "ce_loss_17": 3.060458815097809, |
| "ce_loss_2": 4.261240267753601, |
| "ce_loss_4": 3.983374571800232, |
| "ce_loss_9": 3.3919747471809387, |
| "epoch": 0.965, |
| "grad_norm": 1032.0, |
| "kl_loss_13": 218.46996841430663, |
| "kl_loss_2": 2510.019494628906, |
| "kl_loss_4": 1980.5718078613281, |
| "kl_loss_9": 803.9881225585938, |
| "learning_rate": 3.0807677693729385e-06, |
| "loss": 1379.6697, |
| "step": 9650 |
| }, |
| { |
| "ce_loss_13": 3.3268900752067565, |
| "ce_loss_17": 3.2429277896881104, |
| "ce_loss_2": 4.398799586296081, |
| "ce_loss_4": 4.118711459636688, |
| "ce_loss_9": 3.5721837520599364, |
| "epoch": 0.966, |
| "grad_norm": 804.0, |
| "kl_loss_13": 217.35578231811525, |
| "kl_loss_2": 2432.91962890625, |
| "kl_loss_4": 1905.4734680175782, |
| "kl_loss_9": 786.7719024658203, |
| "learning_rate": 2.9074071841727055e-06, |
| "loss": 1330.2363, |
| "step": 9660 |
| }, |
| { |
| "ce_loss_13": 3.255733001232147, |
| "ce_loss_17": 3.1683148741722107, |
| "ce_loss_2": 4.342859768867493, |
| "ce_loss_4": 4.0628856182098385, |
| "ce_loss_9": 3.5070341348648073, |
| "epoch": 0.967, |
| "grad_norm": 912.0, |
| "kl_loss_13": 221.269522857666, |
| "kl_loss_2": 2453.208532714844, |
| "kl_loss_4": 1928.3604614257813, |
| "kl_loss_9": 798.5466705322266, |
| "learning_rate": 2.739052315863355e-06, |
| "loss": 1327.9771, |
| "step": 9670 |
| }, |
| { |
| "ce_loss_13": 3.2325741052627563, |
| "ce_loss_17": 3.1498236656188965, |
| "ce_loss_2": 4.335789132118225, |
| "ce_loss_4": 4.053774678707123, |
| "ce_loss_9": 3.473189079761505, |
| "epoch": 0.968, |
| "grad_norm": 768.0, |
| "kl_loss_13": 216.8374397277832, |
| "kl_loss_2": 2495.5137329101562, |
| "kl_loss_4": 1969.8667846679687, |
| "kl_loss_9": 790.1967864990235, |
| "learning_rate": 2.5757048597765396e-06, |
| "loss": 1348.9118, |
| "step": 9680 |
| }, |
| { |
| "ce_loss_13": 3.2543190479278565, |
| "ce_loss_17": 3.167156398296356, |
| "ce_loss_2": 4.359293568134308, |
| "ce_loss_4": 4.072268962860107, |
| "ce_loss_9": 3.499773073196411, |
| "epoch": 0.969, |
| "grad_norm": 952.0, |
| "kl_loss_13": 218.40027313232423, |
| "kl_loss_2": 2505.6250610351562, |
| "kl_loss_4": 1961.7472778320312, |
| "kl_loss_9": 800.0337341308593, |
| "learning_rate": 2.417366460819359e-06, |
| "loss": 1363.5117, |
| "step": 9690 |
| }, |
| { |
| "ce_loss_13": 3.262500524520874, |
| "ce_loss_17": 3.171657931804657, |
| "ce_loss_2": 4.398045229911804, |
| "ce_loss_4": 4.109879744052887, |
| "ce_loss_9": 3.511061406135559, |
| "epoch": 0.97, |
| "grad_norm": 928.0, |
| "kl_loss_13": 224.9924758911133, |
| "kl_loss_2": 2561.6322265625, |
| "kl_loss_4": 2016.7850158691406, |
| "kl_loss_9": 810.6648406982422, |
| "learning_rate": 2.2640387134577057e-06, |
| "loss": 1361.0584, |
| "step": 9700 |
| }, |
| { |
| "ce_loss_13": 3.1868832230567934, |
| "ce_loss_17": 3.103873634338379, |
| "ce_loss_2": 4.244852519035339, |
| "ce_loss_4": 3.9719587683677675, |
| "ce_loss_9": 3.4207942724227904, |
| "epoch": 0.971, |
| "grad_norm": 752.0, |
| "kl_loss_13": 208.93080215454103, |
| "kl_loss_2": 2361.2753051757813, |
| "kl_loss_4": 1845.3123168945312, |
| "kl_loss_9": 752.730581665039, |
| "learning_rate": 2.115723161700278e-06, |
| "loss": 1317.4146, |
| "step": 9710 |
| }, |
| { |
| "ce_loss_13": 3.172582244873047, |
| "ce_loss_17": 3.083338499069214, |
| "ce_loss_2": 4.307528579235077, |
| "ce_loss_4": 4.020644688606263, |
| "ce_loss_9": 3.426468777656555, |
| "epoch": 0.972, |
| "grad_norm": 844.0, |
| "kl_loss_13": 224.78013534545897, |
| "kl_loss_2": 2555.5634643554686, |
| "kl_loss_4": 2022.5922790527343, |
| "kl_loss_9": 816.3752349853515, |
| "learning_rate": 1.9724212990830937e-06, |
| "loss": 1391.6437, |
| "step": 9720 |
| }, |
| { |
| "ce_loss_13": 3.3089357018470764, |
| "ce_loss_17": 3.2206944823265076, |
| "ce_loss_2": 4.422231459617615, |
| "ce_loss_4": 4.138329660892486, |
| "ce_loss_9": 3.5589197635650636, |
| "epoch": 0.973, |
| "grad_norm": 1040.0, |
| "kl_loss_13": 222.28607788085938, |
| "kl_loss_2": 2519.200732421875, |
| "kl_loss_4": 1980.961669921875, |
| "kl_loss_9": 801.6954284667969, |
| "learning_rate": 1.8341345686543331e-06, |
| "loss": 1368.0766, |
| "step": 9730 |
| }, |
| { |
| "ce_loss_13": 3.2950737714767455, |
| "ce_loss_17": 3.2094454646110533, |
| "ce_loss_2": 4.347123765945435, |
| "ce_loss_4": 4.072555518150329, |
| "ce_loss_9": 3.5262292623519897, |
| "epoch": 0.974, |
| "grad_norm": 1040.0, |
| "kl_loss_13": 215.64831008911133, |
| "kl_loss_2": 2401.1509338378905, |
| "kl_loss_4": 1881.5595764160157, |
| "kl_loss_9": 776.4869323730469, |
| "learning_rate": 1.7008643629596864e-06, |
| "loss": 1361.1689, |
| "step": 9740 |
| }, |
| { |
| "ce_loss_13": 3.278672957420349, |
| "ce_loss_17": 3.1897144436836244, |
| "ce_loss_2": 4.377420365810394, |
| "ce_loss_4": 4.092463898658752, |
| "ce_loss_9": 3.5170222163200378, |
| "epoch": 0.975, |
| "grad_norm": 916.0, |
| "kl_loss_13": 218.61910095214844, |
| "kl_loss_2": 2507.2249755859375, |
| "kl_loss_4": 1960.962469482422, |
| "kl_loss_9": 790.5519073486328, |
| "learning_rate": 1.5726120240288633e-06, |
| "loss": 1384.61, |
| "step": 9750 |
| }, |
| { |
| "ce_loss_13": 3.181318700313568, |
| "ce_loss_17": 3.0963064193725587, |
| "ce_loss_2": 4.272090482711792, |
| "ce_loss_4": 3.9894553661346435, |
| "ce_loss_9": 3.419423234462738, |
| "epoch": 0.976, |
| "grad_norm": 1024.0, |
| "kl_loss_13": 216.36061630249023, |
| "kl_loss_2": 2461.0934204101563, |
| "kl_loss_4": 1932.3086853027344, |
| "kl_loss_9": 786.0869445800781, |
| "learning_rate": 1.4493788433612708e-06, |
| "loss": 1342.5629, |
| "step": 9760 |
| }, |
| { |
| "ce_loss_13": 3.2960918068885805, |
| "ce_loss_17": 3.206250810623169, |
| "ce_loss_2": 4.398169755935669, |
| "ce_loss_4": 4.114586043357849, |
| "ce_loss_9": 3.539888310432434, |
| "epoch": 0.977, |
| "grad_norm": 732.0, |
| "kl_loss_13": 221.34927520751953, |
| "kl_loss_2": 2511.3609130859377, |
| "kl_loss_4": 1964.6192321777344, |
| "kl_loss_9": 802.1725128173828, |
| "learning_rate": 1.3311660619138578e-06, |
| "loss": 1377.2024, |
| "step": 9770 |
| }, |
| { |
| "ce_loss_13": 3.2913751721382143, |
| "ce_loss_17": 3.2072720766067504, |
| "ce_loss_2": 4.336518740653991, |
| "ce_loss_4": 4.056340169906616, |
| "ce_loss_9": 3.5276540160179137, |
| "epoch": 0.978, |
| "grad_norm": 720.0, |
| "kl_loss_13": 218.31615829467773, |
| "kl_loss_2": 2379.3531860351563, |
| "kl_loss_4": 1851.736065673828, |
| "kl_loss_9": 778.3702362060546, |
| "learning_rate": 1.2179748700879012e-06, |
| "loss": 1347.6346, |
| "step": 9780 |
| }, |
| { |
| "ce_loss_13": 3.218771994113922, |
| "ce_loss_17": 3.1339413046836855, |
| "ce_loss_2": 4.30980110168457, |
| "ce_loss_4": 4.029927408695221, |
| "ce_loss_9": 3.4643234968185426, |
| "epoch": 0.979, |
| "grad_norm": 920.0, |
| "kl_loss_13": 217.51713027954102, |
| "kl_loss_2": 2441.228869628906, |
| "kl_loss_4": 1918.5179016113282, |
| "kl_loss_9": 785.2601654052735, |
| "learning_rate": 1.1098064077174619e-06, |
| "loss": 1355.3729, |
| "step": 9790 |
| }, |
| { |
| "ce_loss_13": 3.2470396876335146, |
| "ce_loss_17": 3.1596713781356813, |
| "ce_loss_2": 4.378944170475006, |
| "ce_loss_4": 4.08536856174469, |
| "ce_loss_9": 3.5031735062599183, |
| "epoch": 0.98, |
| "grad_norm": 804.0, |
| "kl_loss_13": 219.27398147583008, |
| "kl_loss_2": 2539.2723388671875, |
| "kl_loss_4": 1986.9726135253907, |
| "kl_loss_9": 804.7271179199219, |
| "learning_rate": 1.006661764057837e-06, |
| "loss": 1366.1204, |
| "step": 9800 |
| }, |
| { |
| "ce_loss_13": 3.2563411712646486, |
| "ce_loss_17": 3.1701019406318665, |
| "ce_loss_2": 4.347962689399719, |
| "ce_loss_4": 4.070125019550323, |
| "ce_loss_9": 3.498327910900116, |
| "epoch": 0.981, |
| "grad_norm": 804.0, |
| "kl_loss_13": 216.20537719726562, |
| "kl_loss_2": 2471.2597045898438, |
| "kl_loss_4": 1941.7896789550782, |
| "kl_loss_9": 788.5150848388672, |
| "learning_rate": 9.085419777743465e-07, |
| "loss": 1342.7039, |
| "step": 9810 |
| }, |
| { |
| "ce_loss_13": 3.2031669855117797, |
| "ce_loss_17": 3.122615599632263, |
| "ce_loss_2": 4.296006894111633, |
| "ce_loss_4": 4.023138654232025, |
| "ce_loss_9": 3.4449007868766786, |
| "epoch": 0.982, |
| "grad_norm": 924.0, |
| "kl_loss_13": 212.11586380004883, |
| "kl_loss_2": 2458.181884765625, |
| "kl_loss_4": 1944.0519409179688, |
| "kl_loss_9": 779.5261138916015, |
| "learning_rate": 8.15448036932176e-07, |
| "loss": 1327.3837, |
| "step": 9820 |
| }, |
| { |
| "ce_loss_13": 3.2484714865684508, |
| "ce_loss_17": 3.1628469109535216, |
| "ce_loss_2": 4.341381049156189, |
| "ce_loss_4": 4.064522337913513, |
| "ce_loss_9": 3.4957844734191896, |
| "epoch": 0.983, |
| "grad_norm": 888.0, |
| "kl_loss_13": 218.90567016601562, |
| "kl_loss_2": 2478.914831542969, |
| "kl_loss_4": 1960.2015869140625, |
| "kl_loss_9": 797.0689544677734, |
| "learning_rate": 7.273808789862724e-07, |
| "loss": 1372.8572, |
| "step": 9830 |
| }, |
| { |
| "ce_loss_13": 3.324189102649689, |
| "ce_loss_17": 3.2393589496612547, |
| "ce_loss_2": 4.406980681419372, |
| "ce_loss_4": 4.127825582027436, |
| "ce_loss_9": 3.5665980100631716, |
| "epoch": 0.984, |
| "grad_norm": 724.0, |
| "kl_loss_13": 220.28966751098633, |
| "kl_loss_2": 2457.141467285156, |
| "kl_loss_4": 1932.477227783203, |
| "kl_loss_9": 792.8278533935547, |
| "learning_rate": 6.443413907720186e-07, |
| "loss": 1343.8639, |
| "step": 9840 |
| }, |
| { |
| "ce_loss_13": 3.2618006587028505, |
| "ce_loss_17": 3.17478061914444, |
| "ce_loss_2": 4.346713733673096, |
| "ce_loss_4": 4.073301959037781, |
| "ce_loss_9": 3.4989508986473083, |
| "epoch": 0.985, |
| "grad_norm": 1012.0, |
| "kl_loss_13": 220.24280700683593, |
| "kl_loss_2": 2430.0309936523436, |
| "kl_loss_4": 1914.0229064941407, |
| "kl_loss_9": 780.6063018798828, |
| "learning_rate": 5.663304084960185e-07, |
| "loss": 1333.3168, |
| "step": 9850 |
| }, |
| { |
| "ce_loss_13": 3.1913982629776, |
| "ce_loss_17": 3.1021784424781798, |
| "ce_loss_2": 4.307283854484558, |
| "ce_loss_4": 4.024576556682587, |
| "ce_loss_9": 3.4390994906425476, |
| "epoch": 0.986, |
| "grad_norm": 1256.0, |
| "kl_loss_13": 219.1021354675293, |
| "kl_loss_2": 2505.5106567382813, |
| "kl_loss_4": 1971.7984680175782, |
| "kl_loss_9": 798.1460510253906, |
| "learning_rate": 4.933487177280482e-07, |
| "loss": 1345.9686, |
| "step": 9860 |
| }, |
| { |
| "ce_loss_13": 3.282895731925964, |
| "ce_loss_17": 3.2016955614089966, |
| "ce_loss_2": 4.355383956432343, |
| "ce_loss_4": 4.078336107730865, |
| "ce_loss_9": 3.5192210197448732, |
| "epoch": 0.987, |
| "grad_norm": 1696.0, |
| "kl_loss_13": 214.47653274536134, |
| "kl_loss_2": 2441.724499511719, |
| "kl_loss_4": 1910.0088745117187, |
| "kl_loss_9": 779.322216796875, |
| "learning_rate": 4.2539705339295075e-07, |
| "loss": 1335.7965, |
| "step": 9870 |
| }, |
| { |
| "ce_loss_13": 3.149475121498108, |
| "ce_loss_17": 3.061207127571106, |
| "ce_loss_2": 4.2652768850326535, |
| "ce_loss_4": 3.9764740824699403, |
| "ce_loss_9": 3.3991323471069337, |
| "epoch": 0.988, |
| "grad_norm": 900.0, |
| "kl_loss_13": 218.90238342285156, |
| "kl_loss_2": 2509.769189453125, |
| "kl_loss_4": 1973.7698791503906, |
| "kl_loss_9": 808.2424499511719, |
| "learning_rate": 3.6247609976319816e-07, |
| "loss": 1353.9066, |
| "step": 9880 |
| }, |
| { |
| "ce_loss_13": 3.2319627523422243, |
| "ce_loss_17": 3.1474406838417055, |
| "ce_loss_2": 4.3519148349761965, |
| "ce_loss_4": 4.073113644123078, |
| "ce_loss_9": 3.4862895131111147, |
| "epoch": 0.989, |
| "grad_norm": 828.0, |
| "kl_loss_13": 221.2615982055664, |
| "kl_loss_2": 2510.678173828125, |
| "kl_loss_4": 1978.1934936523437, |
| "kl_loss_9": 805.4162811279297, |
| "learning_rate": 3.0458649045211895e-07, |
| "loss": 1394.2037, |
| "step": 9890 |
| }, |
| { |
| "ce_loss_13": 3.20868661403656, |
| "ce_loss_17": 3.117846596240997, |
| "ce_loss_2": 4.32569420337677, |
| "ce_loss_4": 4.031478524208069, |
| "ce_loss_9": 3.464202880859375, |
| "epoch": 0.99, |
| "grad_norm": 1004.0, |
| "kl_loss_13": 225.72737579345704, |
| "kl_loss_2": 2498.446154785156, |
| "kl_loss_4": 1957.9348571777343, |
| "kl_loss_9": 812.2869873046875, |
| "learning_rate": 2.517288084074587e-07, |
| "loss": 1391.0302, |
| "step": 9900 |
| }, |
| { |
| "ce_loss_13": 3.2503684043884276, |
| "ce_loss_17": 3.1557893991470336, |
| "ce_loss_2": 4.399222469329834, |
| "ce_loss_4": 4.103204774856567, |
| "ce_loss_9": 3.50929594039917, |
| "epoch": 0.991, |
| "grad_norm": 988.0, |
| "kl_loss_13": 227.41172714233397, |
| "kl_loss_2": 2576.45224609375, |
| "kl_loss_4": 2015.4464477539063, |
| "kl_loss_9": 826.1490264892578, |
| "learning_rate": 2.0390358590538505e-07, |
| "loss": 1382.2771, |
| "step": 9910 |
| }, |
| { |
| "ce_loss_13": 3.256967079639435, |
| "ce_loss_17": 3.1677147269248964, |
| "ce_loss_2": 4.355877804756164, |
| "ce_loss_4": 4.074959802627563, |
| "ce_loss_9": 3.5054347038269045, |
| "epoch": 0.992, |
| "grad_norm": 756.0, |
| "kl_loss_13": 223.14381942749023, |
| "kl_loss_2": 2495.9954040527346, |
| "kl_loss_4": 1967.9006286621093, |
| "kl_loss_9": 807.5478668212891, |
| "learning_rate": 1.61111304545436e-07, |
| "loss": 1355.7512, |
| "step": 9920 |
| }, |
| { |
| "ce_loss_13": 3.224148380756378, |
| "ce_loss_17": 3.1364871859550476, |
| "ce_loss_2": 4.310623145103454, |
| "ce_loss_4": 4.031493699550628, |
| "ce_loss_9": 3.464025688171387, |
| "epoch": 0.993, |
| "grad_norm": 832.0, |
| "kl_loss_13": 217.57301635742186, |
| "kl_loss_2": 2463.0309814453126, |
| "kl_loss_4": 1941.7696228027344, |
| "kl_loss_9": 786.3763916015625, |
| "learning_rate": 1.2335239524541298e-07, |
| "loss": 1336.3118, |
| "step": 9930 |
| }, |
| { |
| "ce_loss_13": 3.19323810338974, |
| "ce_loss_17": 3.1078309655189513, |
| "ce_loss_2": 4.281654262542725, |
| "ce_loss_4": 4.002816307544708, |
| "ce_loss_9": 3.4326565861701965, |
| "epoch": 0.994, |
| "grad_norm": 744.0, |
| "kl_loss_13": 219.31902160644532, |
| "kl_loss_2": 2461.8650390625, |
| "kl_loss_4": 1932.9007141113282, |
| "kl_loss_9": 784.773876953125, |
| "learning_rate": 9.06272382371065e-08, |
| "loss": 1350.4164, |
| "step": 9940 |
| }, |
| { |
| "ce_loss_13": 3.2581248760223387, |
| "ce_loss_17": 3.1752687096595764, |
| "ce_loss_2": 4.373711252212525, |
| "ce_loss_4": 4.094207131862641, |
| "ce_loss_9": 3.5092731356620788, |
| "epoch": 0.995, |
| "grad_norm": 796.0, |
| "kl_loss_13": 221.85831451416016, |
| "kl_loss_2": 2509.984094238281, |
| "kl_loss_4": 1988.051287841797, |
| "kl_loss_9": 808.1008361816406, |
| "learning_rate": 6.293616306246586e-08, |
| "loss": 1366.3273, |
| "step": 9950 |
| }, |
| { |
| "ce_loss_13": 3.253830671310425, |
| "ce_loss_17": 3.1708797216415405, |
| "ce_loss_2": 4.317631435394287, |
| "ce_loss_4": 4.036856400966644, |
| "ce_loss_9": 3.4921401143074036, |
| "epoch": 0.996, |
| "grad_norm": 852.0, |
| "kl_loss_13": 214.52972869873048, |
| "kl_loss_2": 2416.996447753906, |
| "kl_loss_4": 1893.014093017578, |
| "kl_loss_9": 777.1513916015625, |
| "learning_rate": 4.027944857032395e-08, |
| "loss": 1316.8882, |
| "step": 9960 |
| }, |
| { |
| "ce_loss_13": 3.2442896366119385, |
| "ce_loss_17": 3.163611447811127, |
| "ce_loss_2": 4.294960856437683, |
| "ce_loss_4": 4.017445850372314, |
| "ce_loss_9": 3.469395875930786, |
| "epoch": 0.997, |
| "grad_norm": 856.0, |
| "kl_loss_13": 207.23084106445313, |
| "kl_loss_2": 2365.6643798828127, |
| "kl_loss_4": 1852.8196655273437, |
| "kl_loss_9": 748.1991973876953, |
| "learning_rate": 2.265732291356626e-08, |
| "loss": 1305.4615, |
| "step": 9970 |
| }, |
| { |
| "ce_loss_13": 3.2869481325149534, |
| "ce_loss_17": 3.203015947341919, |
| "ce_loss_2": 4.364979863166809, |
| "ce_loss_4": 4.078140783309936, |
| "ce_loss_9": 3.5234572649002076, |
| "epoch": 0.998, |
| "grad_norm": 852.0, |
| "kl_loss_13": 217.42401657104492, |
| "kl_loss_2": 2428.1246948242188, |
| "kl_loss_4": 1888.5685241699218, |
| "kl_loss_9": 781.7529541015625, |
| "learning_rate": 1.0069963546743833e-08, |
| "loss": 1356.3082, |
| "step": 9980 |
| }, |
| { |
| "ce_loss_13": 3.2734344720840456, |
| "ce_loss_17": 3.183213269710541, |
| "ce_loss_2": 4.368534970283508, |
| "ce_loss_4": 4.0925636172294615, |
| "ce_loss_9": 3.519333600997925, |
| "epoch": 0.999, |
| "grad_norm": 808.0, |
| "kl_loss_13": 220.437491607666, |
| "kl_loss_2": 2473.471228027344, |
| "kl_loss_4": 1945.9803588867187, |
| "kl_loss_9": 795.393930053711, |
| "learning_rate": 2.517497224463483e-09, |
| "loss": 1348.6465, |
| "step": 9990 |
| }, |
| { |
| "ce_loss_13": 3.228131055831909, |
| "ce_loss_17": 3.137771213054657, |
| "ce_loss_2": 4.377237319946289, |
| "ce_loss_4": 4.084628927707672, |
| "ce_loss_9": 3.4845844864845277, |
| "epoch": 1.0, |
| "grad_norm": 816.0, |
| "kl_loss_13": 224.53093338012695, |
| "kl_loss_2": 2583.963586425781, |
| "kl_loss_4": 2034.6659301757813, |
| "kl_loss_9": 816.4570587158203, |
| "learning_rate": 0.0, |
| "loss": 1395.7951, |
| "step": 10000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 10000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 250, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4.447557417823109e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|