| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 10000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "ce_loss_10": 6.949970483779907, |
| "ce_loss_13": 3.5991063117980957, |
| "ce_loss_2": 20.74317169189453, |
| "ce_loss_3": 26.111305236816406, |
| "ce_loss_7": 10.075343608856201, |
| "epoch": 0.0001, |
| "grad_norm": 212992.0, |
| "kl_loss_10": 7864.61865234375, |
| "kl_loss_2": 35348.310546875, |
| "kl_loss_3": 46478.765625, |
| "kl_loss_7": 14199.76806640625, |
| "learning_rate": 1e-05, |
| "loss": 25853.3086, |
| "step": 1 |
| }, |
| { |
| "ce_loss_10": 6.125355773501926, |
| "ce_loss_13": 3.6540163622962103, |
| "ce_loss_2": 12.076997624503242, |
| "ce_loss_3": 15.207524087693956, |
| "ce_loss_7": 7.174011654324001, |
| "epoch": 0.001, |
| "grad_norm": 17792.0, |
| "kl_loss_10": 5945.64690483941, |
| "kl_loss_2": 17211.485812717016, |
| "kl_loss_3": 23727.339274088543, |
| "kl_loss_7": 7859.595960828993, |
| "learning_rate": 0.0001, |
| "loss": 13522.6684, |
| "step": 10 |
| }, |
| { |
| "ce_loss_10": 4.603781843185425, |
| "ce_loss_13": 3.6583157896995546, |
| "ce_loss_2": 6.582165956497192, |
| "ce_loss_3": 6.530840277671814, |
| "ce_loss_7": 4.767995834350586, |
| "epoch": 0.002, |
| "grad_norm": 2416.0, |
| "kl_loss_10": 2069.933459472656, |
| "kl_loss_2": 5383.958471679687, |
| "kl_loss_3": 5293.375524902343, |
| "kl_loss_7": 2034.765203857422, |
| "learning_rate": 0.0002, |
| "loss": 3721.8547, |
| "step": 20 |
| }, |
| { |
| "ce_loss_10": 3.8755152463912963, |
| "ce_loss_13": 3.440992832183838, |
| "ce_loss_2": 5.676479697227478, |
| "ce_loss_3": 5.412591814994812, |
| "ce_loss_7": 4.082889425754547, |
| "epoch": 0.003, |
| "grad_norm": 1440.0, |
| "kl_loss_10": 764.9044219970704, |
| "kl_loss_2": 4085.6137573242186, |
| "kl_loss_3": 3584.689697265625, |
| "kl_loss_7": 1103.4823669433595, |
| "learning_rate": 0.0003, |
| "loss": 2353.6012, |
| "step": 30 |
| }, |
| { |
| "ce_loss_10": 3.9151899099349974, |
| "ce_loss_13": 3.612821674346924, |
| "ce_loss_2": 5.4220003366470335, |
| "ce_loss_3": 5.159578323364258, |
| "ce_loss_7": 4.096081411838531, |
| "epoch": 0.004, |
| "grad_norm": 2240.0, |
| "kl_loss_10": 524.7813415527344, |
| "kl_loss_2": 3348.9226196289064, |
| "kl_loss_3": 2841.234338378906, |
| "kl_loss_7": 838.199349975586, |
| "learning_rate": 0.0004, |
| "loss": 1895.9125, |
| "step": 40 |
| }, |
| { |
| "ce_loss_10": 3.8530999541282656, |
| "ce_loss_13": 3.5993613958358766, |
| "ce_loss_2": 5.269976806640625, |
| "ce_loss_3": 4.972214603424073, |
| "ce_loss_7": 4.020862734317779, |
| "epoch": 0.005, |
| "grad_norm": 1856.0, |
| "kl_loss_10": 415.9984130859375, |
| "kl_loss_2": 3121.74814453125, |
| "kl_loss_3": 2545.165393066406, |
| "kl_loss_7": 715.3964080810547, |
| "learning_rate": 0.0005, |
| "loss": 1695.3973, |
| "step": 50 |
| }, |
| { |
| "ce_loss_10": 3.8142170667648316, |
| "ce_loss_13": 3.6055872440338135, |
| "ce_loss_2": 5.112091851234436, |
| "ce_loss_3": 4.877537202835083, |
| "ce_loss_7": 3.9615157723426817, |
| "epoch": 0.006, |
| "grad_norm": 1256.0, |
| "kl_loss_10": 352.13475494384767, |
| "kl_loss_2": 2834.7232299804687, |
| "kl_loss_3": 2411.520703125, |
| "kl_loss_7": 626.5961456298828, |
| "learning_rate": 0.0006, |
| "loss": 1559.8957, |
| "step": 60 |
| }, |
| { |
| "ce_loss_10": 3.7009406328201293, |
| "ce_loss_13": 3.519935369491577, |
| "ce_loss_2": 4.975889682769775, |
| "ce_loss_3": 4.767486214637756, |
| "ce_loss_7": 3.847541904449463, |
| "epoch": 0.007, |
| "grad_norm": 1208.0, |
| "kl_loss_10": 301.96008453369143, |
| "kl_loss_2": 2753.304443359375, |
| "kl_loss_3": 2332.9209411621096, |
| "kl_loss_7": 579.1212310791016, |
| "learning_rate": 0.0007, |
| "loss": 1482.1832, |
| "step": 70 |
| }, |
| { |
| "ce_loss_10": 3.691651237010956, |
| "ce_loss_13": 3.5254875898361204, |
| "ce_loss_2": 4.9323248863220215, |
| "ce_loss_3": 4.6815266609191895, |
| "ce_loss_7": 3.918946826457977, |
| "epoch": 0.008, |
| "grad_norm": 1920.0, |
| "kl_loss_10": 277.7585922241211, |
| "kl_loss_2": 2673.962158203125, |
| "kl_loss_3": 2198.915148925781, |
| "kl_loss_7": 679.1366607666016, |
| "learning_rate": 0.0008, |
| "loss": 1458.9709, |
| "step": 80 |
| }, |
| { |
| "ce_loss_10": 3.630067002773285, |
| "ce_loss_13": 3.48206342458725, |
| "ce_loss_2": 4.835212993621826, |
| "ce_loss_3": 4.615828561782837, |
| "ce_loss_7": 3.8199189424514772, |
| "epoch": 0.009, |
| "grad_norm": 2336.0, |
| "kl_loss_10": 252.8492774963379, |
| "kl_loss_2": 2587.6504516601562, |
| "kl_loss_3": 2134.85634765625, |
| "kl_loss_7": 622.173226928711, |
| "learning_rate": 0.0009000000000000001, |
| "loss": 1391.4618, |
| "step": 90 |
| }, |
| { |
| "ce_loss_10": 3.7532737612724305, |
| "ce_loss_13": 3.6053535461425783, |
| "ce_loss_2": 4.9651381254196165, |
| "ce_loss_3": 4.679249548912049, |
| "ce_loss_7": 3.9317663073539735, |
| "epoch": 0.01, |
| "grad_norm": 2128.0, |
| "kl_loss_10": 251.45062484741212, |
| "kl_loss_2": 2572.321484375, |
| "kl_loss_3": 2009.2118774414062, |
| "kl_loss_7": 549.0976821899415, |
| "learning_rate": 0.001, |
| "loss": 1348.2805, |
| "step": 100 |
| }, |
| { |
| "ce_loss_10": 3.7507344126701354, |
| "ce_loss_13": 3.560739505290985, |
| "ce_loss_2": 4.900289678573609, |
| "ce_loss_3": 4.575748300552368, |
| "ce_loss_7": 3.855139982700348, |
| "epoch": 0.011, |
| "grad_norm": 2096.0, |
| "kl_loss_10": 336.0966377258301, |
| "kl_loss_2": 2548.9544921875, |
| "kl_loss_3": 1914.9787902832031, |
| "kl_loss_7": 503.8795822143555, |
| "learning_rate": 0.0009999974825027757, |
| "loss": 1319.618, |
| "step": 110 |
| }, |
| { |
| "ce_loss_10": 3.820546102523804, |
| "ce_loss_13": 3.6168412566184998, |
| "ce_loss_2": 4.895331883430481, |
| "ce_loss_3": 4.579937386512756, |
| "ce_loss_7": 3.905752420425415, |
| "epoch": 0.012, |
| "grad_norm": 1216.0, |
| "kl_loss_10": 360.8111114501953, |
| "kl_loss_2": 2423.57294921875, |
| "kl_loss_3": 1821.830096435547, |
| "kl_loss_7": 497.84374084472654, |
| "learning_rate": 0.0009999899300364532, |
| "loss": 1256.7569, |
| "step": 120 |
| }, |
| { |
| "ce_loss_10": 3.765466582775116, |
| "ce_loss_13": 3.588362789154053, |
| "ce_loss_2": 4.861788105964661, |
| "ce_loss_3": 4.571296620368957, |
| "ce_loss_7": 3.876194405555725, |
| "epoch": 0.013, |
| "grad_norm": 1920.0, |
| "kl_loss_10": 300.5776092529297, |
| "kl_loss_2": 2421.4876953125, |
| "kl_loss_3": 1850.192041015625, |
| "kl_loss_7": 510.1988525390625, |
| "learning_rate": 0.0009999773426770863, |
| "loss": 1278.616, |
| "step": 130 |
| }, |
| { |
| "ce_loss_10": 3.812940168380737, |
| "ce_loss_13": 3.6267056345939634, |
| "ce_loss_2": 4.856600952148438, |
| "ce_loss_3": 4.555375063419342, |
| "ce_loss_7": 3.9197404980659485, |
| "epoch": 0.014, |
| "grad_norm": 1104.0, |
| "kl_loss_10": 334.16673736572267, |
| "kl_loss_2": 2343.682012939453, |
| "kl_loss_3": 1750.5664672851562, |
| "kl_loss_7": 514.5185562133789, |
| "learning_rate": 0.0009999597205514296, |
| "loss": 1248.4314, |
| "step": 140 |
| }, |
| { |
| "ce_loss_10": 3.7693222880363466, |
| "ce_loss_13": 3.5812222719192506, |
| "ce_loss_2": 4.7746042013168335, |
| "ce_loss_3": 4.491594767570495, |
| "ce_loss_7": 3.9013134360313417, |
| "epoch": 0.015, |
| "grad_norm": 2000.0, |
| "kl_loss_10": 301.1280906677246, |
| "kl_loss_2": 2261.218878173828, |
| "kl_loss_3": 1705.700830078125, |
| "kl_loss_7": 572.2008193969726, |
| "learning_rate": 0.0009999370638369377, |
| "loss": 1215.427, |
| "step": 150 |
| }, |
| { |
| "ce_loss_10": 3.771867072582245, |
| "ce_loss_13": 3.623634135723114, |
| "ce_loss_2": 4.793287062644959, |
| "ce_loss_3": 4.509058833122253, |
| "ce_loss_7": 3.964562237262726, |
| "epoch": 0.016, |
| "grad_norm": 1736.0, |
| "kl_loss_10": 262.3149169921875, |
| "kl_loss_2": 2258.9556396484377, |
| "kl_loss_3": 1685.8056640625, |
| "kl_loss_7": 604.1481872558594, |
| "learning_rate": 0.000999909372761763, |
| "loss": 1207.4248, |
| "step": 160 |
| }, |
| { |
| "ce_loss_10": 3.702605497837067, |
| "ce_loss_13": 3.555959129333496, |
| "ce_loss_2": 4.72133858203888, |
| "ce_loss_3": 4.451281535625458, |
| "ce_loss_7": 3.8406598806381225, |
| "epoch": 0.017, |
| "grad_norm": 1536.0, |
| "kl_loss_10": 242.87019119262695, |
| "kl_loss_2": 2254.203680419922, |
| "kl_loss_3": 1717.980206298828, |
| "kl_loss_7": 507.3527557373047, |
| "learning_rate": 0.0009998766476047546, |
| "loss": 1188.5746, |
| "step": 170 |
| }, |
| { |
| "ce_loss_10": 3.7377680063247682, |
| "ce_loss_13": 3.600107181072235, |
| "ce_loss_2": 4.7649291276931764, |
| "ce_loss_3": 4.480887150764465, |
| "ce_loss_7": 3.8928799867630004, |
| "epoch": 0.018, |
| "grad_norm": 1096.0, |
| "kl_loss_10": 231.40118408203125, |
| "kl_loss_2": 2247.180828857422, |
| "kl_loss_3": 1668.1500427246094, |
| "kl_loss_7": 502.5091278076172, |
| "learning_rate": 0.0009998388886954545, |
| "loss": 1181.5367, |
| "step": 180 |
| }, |
| { |
| "ce_loss_10": 3.693929398059845, |
| "ce_loss_13": 3.5655275106430055, |
| "ce_loss_2": 4.720048952102661, |
| "ce_loss_3": 4.425967907905578, |
| "ce_loss_7": 3.8348891854286196, |
| "epoch": 0.019, |
| "grad_norm": 1032.0, |
| "kl_loss_10": 214.6924057006836, |
| "kl_loss_2": 2225.93642578125, |
| "kl_loss_3": 1640.7731079101563, |
| "kl_loss_7": 467.8557983398438, |
| "learning_rate": 0.0009997960964140947, |
| "loss": 1132.1148, |
| "step": 190 |
| }, |
| { |
| "ce_loss_10": 3.682847249507904, |
| "ce_loss_13": 3.5613570332527162, |
| "ce_loss_2": 4.7296292066574095, |
| "ce_loss_3": 4.422930908203125, |
| "ce_loss_7": 3.8146942615509034, |
| "epoch": 0.02, |
| "grad_norm": 1360.0, |
| "kl_loss_10": 204.76239395141602, |
| "kl_loss_2": 2234.7769409179687, |
| "kl_loss_3": 1619.6412719726563, |
| "kl_loss_7": 443.86146240234376, |
| "learning_rate": 0.0009997482711915926, |
| "loss": 1118.6208, |
| "step": 200 |
| }, |
| { |
| "ce_loss_10": 3.6386141061782835, |
| "ce_loss_13": 3.523626208305359, |
| "ce_loss_2": 4.654034543037414, |
| "ce_loss_3": 4.351287698745727, |
| "ce_loss_7": 3.7648990035057066, |
| "epoch": 0.021, |
| "grad_norm": 844.0, |
| "kl_loss_10": 191.48220748901366, |
| "kl_loss_2": 2176.9761169433596, |
| "kl_loss_3": 1592.8676696777343, |
| "kl_loss_7": 425.4680374145508, |
| "learning_rate": 0.0009996954135095479, |
| "loss": 1087.035, |
| "step": 210 |
| }, |
| { |
| "ce_loss_10": 3.726301395893097, |
| "ce_loss_13": 3.613930583000183, |
| "ce_loss_2": 4.689849066734314, |
| "ce_loss_3": 4.408529257774353, |
| "ce_loss_7": 3.8633771181106566, |
| "epoch": 0.022, |
| "grad_norm": 968.0, |
| "kl_loss_10": 185.19210891723634, |
| "kl_loss_2": 2058.4550598144533, |
| "kl_loss_3": 1519.1773864746094, |
| "kl_loss_7": 434.87823486328125, |
| "learning_rate": 0.0009996375239002368, |
| "loss": 1051.0784, |
| "step": 220 |
| }, |
| { |
| "ce_loss_10": 3.7977551460266112, |
| "ce_loss_13": 3.6818463683128355, |
| "ce_loss_2": 4.714341163635254, |
| "ce_loss_3": 4.444690012931824, |
| "ce_loss_7": 3.9216720938682554, |
| "epoch": 0.023, |
| "grad_norm": 792.0, |
| "kl_loss_10": 197.30275802612306, |
| "kl_loss_2": 1985.7976623535155, |
| "kl_loss_3": 1460.261212158203, |
| "kl_loss_7": 417.5539093017578, |
| "learning_rate": 0.0009995746029466072, |
| "loss": 1021.8153, |
| "step": 230 |
| }, |
| { |
| "ce_loss_10": 3.5866660118103026, |
| "ce_loss_13": 3.465270149707794, |
| "ce_loss_2": 4.572040939331055, |
| "ce_loss_3": 4.279095077514649, |
| "ce_loss_7": 3.7078741550445558, |
| "epoch": 0.024, |
| "grad_norm": 908.0, |
| "kl_loss_10": 207.95580520629883, |
| "kl_loss_2": 2143.852404785156, |
| "kl_loss_3": 1571.8129760742188, |
| "kl_loss_7": 426.2864517211914, |
| "learning_rate": 0.0009995066512822719, |
| "loss": 1050.4631, |
| "step": 240 |
| }, |
| { |
| "ce_loss_10": 3.686765193939209, |
| "ce_loss_13": 3.5706915736198424, |
| "ce_loss_2": 4.679925036430359, |
| "ce_loss_3": 4.384693300724029, |
| "ce_loss_7": 3.8067264676094057, |
| "epoch": 0.025, |
| "grad_norm": 1032.0, |
| "kl_loss_10": 199.02285385131836, |
| "kl_loss_2": 2131.8410888671874, |
| "kl_loss_3": 1544.0210571289062, |
| "kl_loss_7": 413.1264175415039, |
| "learning_rate": 0.000999433669591504, |
| "loss": 1033.4686, |
| "step": 250 |
| }, |
| { |
| "ce_loss_10": 3.581657183170319, |
| "ce_loss_13": 3.472314774990082, |
| "ce_loss_2": 4.541779208183288, |
| "ce_loss_3": 4.317579293251038, |
| "ce_loss_7": 3.7064756393432616, |
| "epoch": 0.026, |
| "grad_norm": 932.0, |
| "kl_loss_10": 189.23829040527343, |
| "kl_loss_2": 2088.3945861816405, |
| "kl_loss_3": 1637.8548217773437, |
| "kl_loss_7": 412.1736145019531, |
| "learning_rate": 0.000999355658609228, |
| "loss": 1057.2906, |
| "step": 260 |
| }, |
| { |
| "ce_loss_10": 3.6219969391822815, |
| "ce_loss_13": 3.5048365235328673, |
| "ce_loss_2": 4.596334934234619, |
| "ce_loss_3": 4.441597318649292, |
| "ce_loss_7": 3.7431845664978027, |
| "epoch": 0.027, |
| "grad_norm": 900.0, |
| "kl_loss_10": 188.88880004882813, |
| "kl_loss_2": 2095.5673889160157, |
| "kl_loss_3": 1750.0010925292968, |
| "kl_loss_7": 405.53798522949216, |
| "learning_rate": 0.0009992726191210138, |
| "loss": 1093.5438, |
| "step": 270 |
| }, |
| { |
| "ce_loss_10": 3.652155375480652, |
| "ce_loss_13": 3.5428276896476745, |
| "ce_loss_2": 4.579152154922485, |
| "ce_loss_3": 4.383497536182404, |
| "ce_loss_7": 3.789737546443939, |
| "epoch": 0.028, |
| "grad_norm": 780.0, |
| "kl_loss_10": 187.1881446838379, |
| "kl_loss_2": 2006.3188415527343, |
| "kl_loss_3": 1629.3374816894532, |
| "kl_loss_7": 423.36602783203125, |
| "learning_rate": 0.0009991845519630679, |
| "loss": 1050.2449, |
| "step": 280 |
| }, |
| { |
| "ce_loss_10": 3.535972011089325, |
| "ce_loss_13": 3.427106332778931, |
| "ce_loss_2": 4.4744978785514835, |
| "ce_loss_3": 4.262858963012695, |
| "ce_loss_7": 3.6743877053260805, |
| "epoch": 0.029, |
| "grad_norm": 684.0, |
| "kl_loss_10": 179.29404220581054, |
| "kl_loss_2": 2016.5242065429688, |
| "kl_loss_3": 1582.6825744628907, |
| "kl_loss_7": 441.13164978027345, |
| "learning_rate": 0.0009990914580222257, |
| "loss": 1053.0684, |
| "step": 290 |
| }, |
| { |
| "ce_loss_10": 3.668388879299164, |
| "ce_loss_13": 3.567758929729462, |
| "ce_loss_2": 4.53744785785675, |
| "ce_loss_3": 4.309589576721192, |
| "ce_loss_7": 3.8119096040725706, |
| "epoch": 0.03, |
| "grad_norm": 1224.0, |
| "kl_loss_10": 187.29344177246094, |
| "kl_loss_2": 1896.2646728515624, |
| "kl_loss_3": 1456.2583312988281, |
| "kl_loss_7": 421.26478881835936, |
| "learning_rate": 0.0009989933382359422, |
| "loss": 1015.491, |
| "step": 300 |
| }, |
| { |
| "ce_loss_10": 3.6792996883392335, |
| "ce_loss_13": 3.5740679264068604, |
| "ce_loss_2": 4.557365846633911, |
| "ce_loss_3": 4.323324573040009, |
| "ce_loss_7": 3.7865865588188172, |
| "epoch": 0.031, |
| "grad_norm": 828.0, |
| "kl_loss_10": 187.5126724243164, |
| "kl_loss_2": 1923.0036499023438, |
| "kl_loss_3": 1442.6469970703124, |
| "kl_loss_7": 384.39212493896486, |
| "learning_rate": 0.0009988901935922825, |
| "loss": 997.117, |
| "step": 310 |
| }, |
| { |
| "ce_loss_10": 3.5293397903442383, |
| "ce_loss_13": 3.4199066400527953, |
| "ce_loss_2": 4.486744737625122, |
| "ce_loss_3": 4.227087867259979, |
| "ce_loss_7": 3.6469008684158326, |
| "epoch": 0.032, |
| "grad_norm": 976.0, |
| "kl_loss_10": 183.28864593505858, |
| "kl_loss_2": 2055.861083984375, |
| "kl_loss_3": 1531.5701293945312, |
| "kl_loss_7": 385.79076690673827, |
| "learning_rate": 0.0009987820251299122, |
| "loss": 1008.4045, |
| "step": 320 |
| }, |
| { |
| "ce_loss_10": 3.66086140871048, |
| "ce_loss_13": 3.556379699707031, |
| "ce_loss_2": 4.536041283607483, |
| "ce_loss_3": 4.270140862464904, |
| "ce_loss_7": 3.770153260231018, |
| "epoch": 0.033, |
| "grad_norm": 1144.0, |
| "kl_loss_10": 168.59372940063477, |
| "kl_loss_2": 1906.9965759277343, |
| "kl_loss_3": 1385.0498474121093, |
| "kl_loss_7": 372.18479614257814, |
| "learning_rate": 0.0009986688339380862, |
| "loss": 957.1518, |
| "step": 330 |
| }, |
| { |
| "ce_loss_10": 3.6052905559539794, |
| "ce_loss_13": 3.504980742931366, |
| "ce_loss_2": 4.501095390319824, |
| "ce_loss_3": 4.218203604221344, |
| "ce_loss_7": 3.727604556083679, |
| "epoch": 0.034, |
| "grad_norm": 1104.0, |
| "kl_loss_10": 164.38146286010743, |
| "kl_loss_2": 1931.4434020996093, |
| "kl_loss_3": 1379.435321044922, |
| "kl_loss_7": 389.75638275146486, |
| "learning_rate": 0.0009985506211566387, |
| "loss": 969.0948, |
| "step": 340 |
| }, |
| { |
| "ce_loss_10": 3.6377886295318604, |
| "ce_loss_13": 3.541017484664917, |
| "ce_loss_2": 4.482600402832031, |
| "ce_loss_3": 4.22925614118576, |
| "ce_loss_7": 3.7690312385559084, |
| "epoch": 0.035, |
| "grad_norm": 988.0, |
| "kl_loss_10": 158.43596343994142, |
| "kl_loss_2": 1829.8166870117188, |
| "kl_loss_3": 1337.818865966797, |
| "kl_loss_7": 388.05591278076173, |
| "learning_rate": 0.0009984273879759713, |
| "loss": 933.1328, |
| "step": 350 |
| }, |
| { |
| "ce_loss_10": 3.667439329624176, |
| "ce_loss_13": 3.5666789412498474, |
| "ce_loss_2": 4.5066794633865355, |
| "ce_loss_3": 4.2926198720932005, |
| "ce_loss_7": 3.7826303958892824, |
| "epoch": 0.036, |
| "grad_norm": 600.0, |
| "kl_loss_10": 162.84700927734374, |
| "kl_loss_2": 1826.3444274902345, |
| "kl_loss_3": 1395.6122314453125, |
| "kl_loss_7": 384.24933471679685, |
| "learning_rate": 0.0009982991356370402, |
| "loss": 957.8976, |
| "step": 360 |
| }, |
| { |
| "ce_loss_10": 3.643305718898773, |
| "ce_loss_13": 3.545375657081604, |
| "ce_loss_2": 4.487171721458435, |
| "ce_loss_3": 4.280910170078277, |
| "ce_loss_7": 3.767821896076202, |
| "epoch": 0.037, |
| "grad_norm": 596.0, |
| "kl_loss_10": 164.2067985534668, |
| "kl_loss_2": 1829.6034606933595, |
| "kl_loss_3": 1399.7697387695312, |
| "kl_loss_7": 389.38902282714844, |
| "learning_rate": 0.0009981658654313456, |
| "loss": 945.4266, |
| "step": 370 |
| }, |
| { |
| "ce_loss_10": 3.728627920150757, |
| "ce_loss_13": 3.628399407863617, |
| "ce_loss_2": 4.530555677413941, |
| "ce_loss_3": 4.30595852136612, |
| "ce_loss_7": 3.83515260219574, |
| "epoch": 0.038, |
| "grad_norm": 572.0, |
| "kl_loss_10": 166.8636932373047, |
| "kl_loss_2": 1769.3647521972657, |
| "kl_loss_3": 1309.9726135253907, |
| "kl_loss_7": 360.24556121826174, |
| "learning_rate": 0.000998027578700917, |
| "loss": 918.2047, |
| "step": 380 |
| }, |
| { |
| "ce_loss_10": 3.6558377385139464, |
| "ce_loss_13": 3.5584804892539976, |
| "ce_loss_2": 4.499538516998291, |
| "ce_loss_3": 4.255290400981903, |
| "ce_loss_7": 3.7707452058792112, |
| "epoch": 0.039, |
| "grad_norm": 684.0, |
| "kl_loss_10": 164.3509963989258, |
| "kl_loss_2": 1842.500555419922, |
| "kl_loss_3": 1353.7652526855468, |
| "kl_loss_7": 364.5350601196289, |
| "learning_rate": 0.0009978842768382998, |
| "loss": 935.4773, |
| "step": 390 |
| }, |
| { |
| "ce_loss_10": 3.6760897040367126, |
| "ce_loss_13": 3.5800448179244997, |
| "ce_loss_2": 4.493572664260864, |
| "ce_loss_3": 4.2423638820648195, |
| "ce_loss_7": 3.786301875114441, |
| "epoch": 0.04, |
| "grad_norm": 968.0, |
| "kl_loss_10": 161.01671752929687, |
| "kl_loss_2": 1790.2952514648437, |
| "kl_loss_3": 1298.0454895019532, |
| "kl_loss_7": 363.45732421875, |
| "learning_rate": 0.0009977359612865424, |
| "loss": 914.3111, |
| "step": 400 |
| }, |
| { |
| "ce_loss_10": 3.684686779975891, |
| "ce_loss_13": 3.586086595058441, |
| "ce_loss_2": 4.512642502784729, |
| "ce_loss_3": 4.255100309848785, |
| "ce_loss_7": 3.805712080001831, |
| "epoch": 0.041, |
| "grad_norm": 724.0, |
| "kl_loss_10": 161.0974250793457, |
| "kl_loss_2": 1807.8360168457032, |
| "kl_loss_3": 1310.046209716797, |
| "kl_loss_7": 391.8801742553711, |
| "learning_rate": 0.0009975826335391806, |
| "loss": 914.0043, |
| "step": 410 |
| }, |
| { |
| "ce_loss_10": 3.707440197467804, |
| "ce_loss_13": 3.604601538181305, |
| "ce_loss_2": 4.522381353378296, |
| "ce_loss_3": 4.265636503696442, |
| "ce_loss_7": 3.822117471694946, |
| "epoch": 0.042, |
| "grad_norm": 900.0, |
| "kl_loss_10": 166.57249908447267, |
| "kl_loss_2": 1773.633642578125, |
| "kl_loss_3": 1273.706396484375, |
| "kl_loss_7": 380.59193420410156, |
| "learning_rate": 0.0009974242951402235, |
| "loss": 906.3268, |
| "step": 420 |
| }, |
| { |
| "ce_loss_10": 3.7127435922622682, |
| "ce_loss_13": 3.6068360447883605, |
| "ce_loss_2": 4.534731841087341, |
| "ce_loss_3": 4.272397923469543, |
| "ce_loss_7": 3.8254016041755676, |
| "epoch": 0.043, |
| "grad_norm": 544.0, |
| "kl_loss_10": 171.76721878051757, |
| "kl_loss_2": 1813.8242553710938, |
| "kl_loss_3": 1297.1632202148437, |
| "kl_loss_7": 380.753857421875, |
| "learning_rate": 0.0009972609476841367, |
| "loss": 907.3121, |
| "step": 430 |
| }, |
| { |
| "ce_loss_10": 3.638201355934143, |
| "ce_loss_13": 3.521967649459839, |
| "ce_loss_2": 4.476631236076355, |
| "ce_loss_3": 4.207662534713745, |
| "ce_loss_7": 3.743925619125366, |
| "epoch": 0.044, |
| "grad_norm": 656.0, |
| "kl_loss_10": 205.51385726928712, |
| "kl_loss_2": 1862.7595336914062, |
| "kl_loss_3": 1318.6298767089843, |
| "kl_loss_7": 397.6446823120117, |
| "learning_rate": 0.0009970925928158272, |
| "loss": 947.2434, |
| "step": 440 |
| }, |
| { |
| "ce_loss_10": 3.5770766854286196, |
| "ce_loss_13": 3.463445019721985, |
| "ce_loss_2": 4.41340719461441, |
| "ce_loss_3": 4.154228365421295, |
| "ce_loss_7": 3.683333933353424, |
| "epoch": 0.045, |
| "grad_norm": 544.0, |
| "kl_loss_10": 187.16454544067383, |
| "kl_loss_2": 1860.5320922851563, |
| "kl_loss_3": 1349.3171264648438, |
| "kl_loss_7": 389.9994171142578, |
| "learning_rate": 0.000996919232230627, |
| "loss": 931.1581, |
| "step": 450 |
| }, |
| { |
| "ce_loss_10": 3.6615111470222472, |
| "ce_loss_13": 3.5475740671157836, |
| "ce_loss_2": 4.4471900224685665, |
| "ce_loss_3": 4.206307077407837, |
| "ce_loss_7": 3.769719123840332, |
| "epoch": 0.046, |
| "grad_norm": 792.0, |
| "kl_loss_10": 189.3802345275879, |
| "kl_loss_2": 1767.1443359375, |
| "kl_loss_3": 1299.2234252929688, |
| "kl_loss_7": 404.1760650634766, |
| "learning_rate": 0.0009967408676742752, |
| "loss": 896.3932, |
| "step": 460 |
| }, |
| { |
| "ce_loss_10": 3.815341627597809, |
| "ce_loss_13": 3.6976951956748962, |
| "ce_loss_2": 4.575603008270264, |
| "ce_loss_3": 4.349165272712708, |
| "ce_loss_7": 3.926807904243469, |
| "epoch": 0.047, |
| "grad_norm": 1020.0, |
| "kl_loss_10": 193.17176513671876, |
| "kl_loss_2": 1722.3591735839843, |
| "kl_loss_3": 1269.6951721191406, |
| "kl_loss_7": 399.5799560546875, |
| "learning_rate": 0.0009965575009429006, |
| "loss": 911.5342, |
| "step": 470 |
| }, |
| { |
| "ce_loss_10": 3.5749866485595705, |
| "ce_loss_13": 3.471819591522217, |
| "ce_loss_2": 4.3897274255752565, |
| "ce_loss_3": 4.163278090953827, |
| "ce_loss_7": 3.6932021975517273, |
| "epoch": 0.048, |
| "grad_norm": 832.0, |
| "kl_loss_10": 173.3515739440918, |
| "kl_loss_2": 1803.337139892578, |
| "kl_loss_3": 1356.6680847167968, |
| "kl_loss_7": 384.8886749267578, |
| "learning_rate": 0.0009963691338830043, |
| "loss": 913.6404, |
| "step": 480 |
| }, |
| { |
| "ce_loss_10": 3.6706506490707396, |
| "ce_loss_13": 3.5724706411361695, |
| "ce_loss_2": 4.442422878742218, |
| "ce_loss_3": 4.223198866844177, |
| "ce_loss_7": 3.7754740715026855, |
| "epoch": 0.049, |
| "grad_norm": 664.0, |
| "kl_loss_10": 163.7422233581543, |
| "kl_loss_2": 1726.0343017578125, |
| "kl_loss_3": 1283.6178405761718, |
| "kl_loss_7": 355.8869354248047, |
| "learning_rate": 0.0009961757683914405, |
| "loss": 866.413, |
| "step": 490 |
| }, |
| { |
| "ce_loss_10": 3.657481300830841, |
| "ce_loss_13": 3.561222219467163, |
| "ce_loss_2": 4.412674343585968, |
| "ce_loss_3": 4.190000641345978, |
| "ce_loss_7": 3.7463939428329467, |
| "epoch": 0.05, |
| "grad_norm": 552.0, |
| "kl_loss_10": 171.74871139526368, |
| "kl_loss_2": 1693.8871337890625, |
| "kl_loss_3": 1238.5546569824219, |
| "kl_loss_7": 333.96338348388673, |
| "learning_rate": 0.0009959774064153978, |
| "loss": 867.9215, |
| "step": 500 |
| }, |
| { |
| "ce_loss_10": 3.6671042442321777, |
| "ce_loss_13": 3.5669935941696167, |
| "ce_loss_2": 4.402782237529754, |
| "ce_loss_3": 4.179040241241455, |
| "ce_loss_7": 3.7529300928115843, |
| "epoch": 0.051, |
| "grad_norm": 548.0, |
| "kl_loss_10": 165.0301971435547, |
| "kl_loss_2": 1649.0840270996093, |
| "kl_loss_3": 1201.8943420410155, |
| "kl_loss_7": 327.53272857666013, |
| "learning_rate": 0.0009957740499523787, |
| "loss": 850.5875, |
| "step": 510 |
| }, |
| { |
| "ce_loss_10": 3.692741870880127, |
| "ce_loss_13": 3.5905726313591004, |
| "ce_loss_2": 4.450686037540436, |
| "ce_loss_3": 4.220798969268799, |
| "ce_loss_7": 3.785287392139435, |
| "epoch": 0.052, |
| "grad_norm": 560.0, |
| "kl_loss_10": 160.49609146118163, |
| "kl_loss_2": 1681.6054443359376, |
| "kl_loss_3": 1234.1128479003905, |
| "kl_loss_7": 330.61391296386716, |
| "learning_rate": 0.0009955657010501807, |
| "loss": 859.9023, |
| "step": 520 |
| }, |
| { |
| "ce_loss_10": 3.654950940608978, |
| "ce_loss_13": 3.554408383369446, |
| "ce_loss_2": 4.4271773338317875, |
| "ce_loss_3": 4.2007159948348995, |
| "ce_loss_7": 3.7458335757255554, |
| "epoch": 0.053, |
| "grad_norm": 560.0, |
| "kl_loss_10": 160.82289505004883, |
| "kl_loss_2": 1731.140557861328, |
| "kl_loss_3": 1270.9948364257812, |
| "kl_loss_7": 331.9795379638672, |
| "learning_rate": 0.000995352361806875, |
| "loss": 862.8967, |
| "step": 530 |
| }, |
| { |
| "ce_loss_10": 3.6911896467208862, |
| "ce_loss_13": 3.5907997369766234, |
| "ce_loss_2": 4.458630633354187, |
| "ce_loss_3": 4.2227191686630245, |
| "ce_loss_7": 3.7843895673751833, |
| "epoch": 0.054, |
| "grad_norm": 552.0, |
| "kl_loss_10": 166.47220001220703, |
| "kl_loss_2": 1722.0740966796875, |
| "kl_loss_3": 1249.600811767578, |
| "kl_loss_7": 335.7519927978516, |
| "learning_rate": 0.0009951340343707852, |
| "loss": 876.934, |
| "step": 540 |
| }, |
| { |
| "ce_loss_10": 3.7539408445358275, |
| "ce_loss_13": 3.6503811120986938, |
| "ce_loss_2": 4.52553424835205, |
| "ce_loss_3": 4.282987451553344, |
| "ce_loss_7": 3.839770758152008, |
| "epoch": 0.055, |
| "grad_norm": 512.0, |
| "kl_loss_10": 162.00789489746094, |
| "kl_loss_2": 1707.214862060547, |
| "kl_loss_3": 1233.2592041015625, |
| "kl_loss_7": 323.39465484619143, |
| "learning_rate": 0.0009949107209404665, |
| "loss": 863.0879, |
| "step": 550 |
| }, |
| { |
| "ce_loss_10": 3.6489940643310548, |
| "ce_loss_13": 3.5539053201675417, |
| "ce_loss_2": 4.41527898311615, |
| "ce_loss_3": 4.180786430835724, |
| "ce_loss_7": 3.750082802772522, |
| "epoch": 0.056, |
| "grad_norm": 540.0, |
| "kl_loss_10": 157.5300537109375, |
| "kl_loss_2": 1703.8317993164062, |
| "kl_loss_3": 1234.3482482910156, |
| "kl_loss_7": 346.96667022705077, |
| "learning_rate": 0.0009946824237646824, |
| "loss": 859.4348, |
| "step": 560 |
| }, |
| { |
| "ce_loss_10": 3.5962815046310426, |
| "ce_loss_13": 3.501141941547394, |
| "ce_loss_2": 4.377828812599182, |
| "ce_loss_3": 4.148308992385864, |
| "ce_loss_7": 3.7191163897514343, |
| "epoch": 0.057, |
| "grad_norm": 764.0, |
| "kl_loss_10": 153.23575592041016, |
| "kl_loss_2": 1739.7751159667969, |
| "kl_loss_3": 1272.3390563964845, |
| "kl_loss_7": 396.63781890869143, |
| "learning_rate": 0.0009944491451423828, |
| "loss": 901.9479, |
| "step": 570 |
| }, |
| { |
| "ce_loss_10": 3.594892370700836, |
| "ce_loss_13": 3.500366282463074, |
| "ce_loss_2": 4.390602493286133, |
| "ce_loss_3": 4.148435056209564, |
| "ce_loss_7": 3.710537350177765, |
| "epoch": 0.058, |
| "grad_norm": 804.0, |
| "kl_loss_10": 153.60133438110353, |
| "kl_loss_2": 1753.4079895019531, |
| "kl_loss_3": 1272.2657775878906, |
| "kl_loss_7": 368.931379699707, |
| "learning_rate": 0.0009942108874226813, |
| "loss": 870.9764, |
| "step": 580 |
| }, |
| { |
| "ce_loss_10": 3.7256513595581056, |
| "ce_loss_13": 3.6301231741905213, |
| "ce_loss_2": 4.468952918052674, |
| "ce_loss_3": 4.23498455286026, |
| "ce_loss_7": 3.8281203866004945, |
| "epoch": 0.059, |
| "grad_norm": 494.0, |
| "kl_loss_10": 155.00701828002929, |
| "kl_loss_2": 1650.434881591797, |
| "kl_loss_3": 1188.881414794922, |
| "kl_loss_7": 349.0711242675781, |
| "learning_rate": 0.00099396765300483, |
| "loss": 829.2725, |
| "step": 590 |
| }, |
| { |
| "ce_loss_10": 3.688658046722412, |
| "ce_loss_13": 3.600668156147003, |
| "ce_loss_2": 4.441829895973205, |
| "ce_loss_3": 4.204685604572296, |
| "ce_loss_7": 3.7927687644958494, |
| "epoch": 0.06, |
| "grad_norm": 700.0, |
| "kl_loss_10": 147.73722648620605, |
| "kl_loss_2": 1665.102276611328, |
| "kl_loss_3": 1201.0594848632813, |
| "kl_loss_7": 336.5929977416992, |
| "learning_rate": 0.0009937194443381972, |
| "loss": 836.3632, |
| "step": 600 |
| }, |
| { |
| "ce_loss_10": 3.7074933648109436, |
| "ce_loss_13": 3.6225223660469057, |
| "ce_loss_2": 4.444479322433471, |
| "ce_loss_3": 4.212475669384003, |
| "ce_loss_7": 3.806171452999115, |
| "epoch": 0.061, |
| "grad_norm": 490.0, |
| "kl_loss_10": 145.92314338684082, |
| "kl_loss_2": 1647.2934875488281, |
| "kl_loss_3": 1192.1070617675782, |
| "kl_loss_7": 330.35746612548826, |
| "learning_rate": 0.0009934662639222412, |
| "loss": 841.5062, |
| "step": 610 |
| }, |
| { |
| "ce_loss_10": 3.6668009042739866, |
| "ce_loss_13": 3.5791383743286134, |
| "ce_loss_2": 4.436111927032471, |
| "ce_loss_3": 4.192030191421509, |
| "ce_loss_7": 3.7709102272987365, |
| "epoch": 0.062, |
| "grad_norm": 548.0, |
| "kl_loss_10": 142.56752128601073, |
| "kl_loss_2": 1707.9285888671875, |
| "kl_loss_3": 1224.2419647216798, |
| "kl_loss_7": 333.47724609375, |
| "learning_rate": 0.000993208114306486, |
| "loss": 843.8041, |
| "step": 620 |
| }, |
| { |
| "ce_loss_10": 3.5789570450782775, |
| "ce_loss_13": 3.4927441477775574, |
| "ce_loss_2": 4.355255722999573, |
| "ce_loss_3": 4.113215839862823, |
| "ce_loss_7": 3.6797728538513184, |
| "epoch": 0.063, |
| "grad_norm": 684.0, |
| "kl_loss_10": 142.47375717163087, |
| "kl_loss_2": 1703.1561950683595, |
| "kl_loss_3": 1224.8568115234375, |
| "kl_loss_7": 327.08728790283203, |
| "learning_rate": 0.0009929449980904952, |
| "loss": 827.3757, |
| "step": 630 |
| }, |
| { |
| "ce_loss_10": 3.6368979692459105, |
| "ce_loss_13": 3.552669334411621, |
| "ce_loss_2": 4.39526858329773, |
| "ce_loss_3": 4.161888694763183, |
| "ce_loss_7": 3.7305431842803953, |
| "epoch": 0.064, |
| "grad_norm": 604.0, |
| "kl_loss_10": 145.31115531921387, |
| "kl_loss_2": 1675.4742797851563, |
| "kl_loss_3": 1206.004461669922, |
| "kl_loss_7": 311.66287689208986, |
| "learning_rate": 0.0009926769179238466, |
| "loss": 830.4232, |
| "step": 640 |
| }, |
| { |
| "ce_loss_10": 3.708518397808075, |
| "ce_loss_13": 3.6032424330711366, |
| "ce_loss_2": 4.449502897262573, |
| "ce_loss_3": 4.213514125347137, |
| "ce_loss_7": 3.7848907709121704, |
| "epoch": 0.065, |
| "grad_norm": 572.0, |
| "kl_loss_10": 183.44921951293946, |
| "kl_loss_2": 1690.6953979492187, |
| "kl_loss_3": 1209.2367431640625, |
| "kl_loss_7": 320.96158905029296, |
| "learning_rate": 0.000992403876506104, |
| "loss": 845.6277, |
| "step": 650 |
| }, |
| { |
| "ce_loss_10": 3.6422240853309633, |
| "ce_loss_13": 3.5376295328140257, |
| "ce_loss_2": 4.388966178894043, |
| "ce_loss_3": 4.148260116577148, |
| "ce_loss_7": 3.721568763256073, |
| "epoch": 0.066, |
| "grad_norm": 516.0, |
| "kl_loss_10": 166.38197479248046, |
| "kl_loss_2": 1675.3920837402343, |
| "kl_loss_3": 1201.2981964111327, |
| "kl_loss_7": 311.2148132324219, |
| "learning_rate": 0.0009921258765867918, |
| "loss": 834.6085, |
| "step": 660 |
| }, |
| { |
| "ce_loss_10": 3.593488574028015, |
| "ce_loss_13": 3.5049474120140074, |
| "ce_loss_2": 4.357883477210999, |
| "ce_loss_3": 4.115947949886322, |
| "ce_loss_7": 3.6764505982398985, |
| "epoch": 0.067, |
| "grad_norm": 600.0, |
| "kl_loss_10": 148.19011993408202, |
| "kl_loss_2": 1717.049383544922, |
| "kl_loss_3": 1223.6348205566405, |
| "kl_loss_7": 306.2616958618164, |
| "learning_rate": 0.0009918429209653662, |
| "loss": 833.8985, |
| "step": 670 |
| }, |
| { |
| "ce_loss_10": 3.648161160945892, |
| "ce_loss_13": 3.559934389591217, |
| "ce_loss_2": 4.409848690032959, |
| "ce_loss_3": 4.171260499954224, |
| "ce_loss_7": 3.7374308466911317, |
| "epoch": 0.068, |
| "grad_norm": 596.0, |
| "kl_loss_10": 147.48591995239258, |
| "kl_loss_2": 1679.9023315429688, |
| "kl_loss_3": 1208.9112182617187, |
| "kl_loss_7": 313.83782348632815, |
| "learning_rate": 0.0009915550124911866, |
| "loss": 822.998, |
| "step": 680 |
| }, |
| { |
| "ce_loss_10": 3.6632981300354004, |
| "ce_loss_13": 3.573338711261749, |
| "ce_loss_2": 4.395955181121826, |
| "ce_loss_3": 4.164679610729218, |
| "ce_loss_7": 3.7496419668197634, |
| "epoch": 0.069, |
| "grad_norm": 636.0, |
| "kl_loss_10": 148.64911651611328, |
| "kl_loss_2": 1629.2107971191406, |
| "kl_loss_3": 1186.3095611572267, |
| "kl_loss_7": 309.2399566650391, |
| "learning_rate": 0.0009912621540634887, |
| "loss": 816.0117, |
| "step": 690 |
| }, |
| { |
| "ce_loss_10": 3.6952749490737915, |
| "ce_loss_13": 3.608550024032593, |
| "ce_loss_2": 4.3951560974121096, |
| "ce_loss_3": 4.167996168136597, |
| "ce_loss_7": 3.778964614868164, |
| "epoch": 0.07, |
| "grad_norm": 524.0, |
| "kl_loss_10": 140.3430618286133, |
| "kl_loss_2": 1575.8819396972656, |
| "kl_loss_3": 1123.8694213867188, |
| "kl_loss_7": 294.651708984375, |
| "learning_rate": 0.0009909643486313534, |
| "loss": 794.9152, |
| "step": 700 |
| }, |
| { |
| "ce_loss_10": 3.5606731176376343, |
| "ce_loss_13": 3.4771942019462587, |
| "ce_loss_2": 4.3175184488296505, |
| "ce_loss_3": 4.074072551727295, |
| "ce_loss_7": 3.650731146335602, |
| "epoch": 0.071, |
| "grad_norm": 600.0, |
| "kl_loss_10": 135.5239990234375, |
| "kl_loss_2": 1676.284442138672, |
| "kl_loss_3": 1193.2137634277344, |
| "kl_loss_7": 307.0430740356445, |
| "learning_rate": 0.000990661599193678, |
| "loss": 839.2205, |
| "step": 710 |
| }, |
| { |
| "ce_loss_10": 3.7052354335784914, |
| "ce_loss_13": 3.6190937519073487, |
| "ce_loss_2": 4.42795637845993, |
| "ce_loss_3": 4.203339767456055, |
| "ce_loss_7": 3.7865342020988466, |
| "epoch": 0.072, |
| "grad_norm": 708.0, |
| "kl_loss_10": 139.11955757141112, |
| "kl_loss_2": 1630.3473266601563, |
| "kl_loss_3": 1169.5729766845702, |
| "kl_loss_7": 299.42345809936523, |
| "learning_rate": 0.0009903539087991462, |
| "loss": 803.8498, |
| "step": 720 |
| }, |
| { |
| "ce_loss_10": 3.6689595699310305, |
| "ce_loss_13": 3.586829674243927, |
| "ce_loss_2": 4.399398994445801, |
| "ce_loss_3": 4.174283814430237, |
| "ce_loss_7": 3.7554702758789062, |
| "epoch": 0.073, |
| "grad_norm": 860.0, |
| "kl_loss_10": 133.20760345458984, |
| "kl_loss_2": 1626.384521484375, |
| "kl_loss_3": 1158.8964233398438, |
| "kl_loss_7": 296.6233856201172, |
| "learning_rate": 0.0009900412805461966, |
| "loss": 810.3949, |
| "step": 730 |
| }, |
| { |
| "ce_loss_10": 3.7475465893745423, |
| "ce_loss_13": 3.6637478709220885, |
| "ce_loss_2": 4.477526593208313, |
| "ce_loss_3": 4.232499527931213, |
| "ce_loss_7": 3.834572732448578, |
| "epoch": 0.074, |
| "grad_norm": 756.0, |
| "kl_loss_10": 136.13002281188966, |
| "kl_loss_2": 1615.2621215820313, |
| "kl_loss_3": 1135.3047760009765, |
| "kl_loss_7": 302.71751708984374, |
| "learning_rate": 0.0009897237175829927, |
| "loss": 812.032, |
| "step": 740 |
| }, |
| { |
| "ce_loss_10": 3.633454430103302, |
| "ce_loss_13": 3.546045184135437, |
| "ce_loss_2": 4.386739385128021, |
| "ce_loss_3": 4.157361710071564, |
| "ce_loss_7": 3.7257861375808714, |
| "epoch": 0.075, |
| "grad_norm": 624.0, |
| "kl_loss_10": 138.0735656738281, |
| "kl_loss_2": 1664.8802978515625, |
| "kl_loss_3": 1209.155780029297, |
| "kl_loss_7": 314.29449157714845, |
| "learning_rate": 0.0009894012231073895, |
| "loss": 820.1248, |
| "step": 750 |
| }, |
| { |
| "ce_loss_10": 3.675256085395813, |
| "ce_loss_13": 3.591258680820465, |
| "ce_loss_2": 4.3781631827354435, |
| "ce_loss_3": 4.169950652122497, |
| "ce_loss_7": 3.7596523761749268, |
| "epoch": 0.076, |
| "grad_norm": 596.0, |
| "kl_loss_10": 137.33892288208008, |
| "kl_loss_2": 1570.5589111328125, |
| "kl_loss_3": 1161.7032104492187, |
| "kl_loss_7": 298.7381622314453, |
| "learning_rate": 0.0009890738003669028, |
| "loss": 801.2431, |
| "step": 760 |
| }, |
| { |
| "ce_loss_10": 3.64959534406662, |
| "ce_loss_13": 3.5664158701896667, |
| "ce_loss_2": 4.371342432498932, |
| "ce_loss_3": 4.150311291217804, |
| "ce_loss_7": 3.7354934453964233, |
| "epoch": 0.077, |
| "grad_norm": 540.0, |
| "kl_loss_10": 136.36218070983887, |
| "kl_loss_2": 1622.240057373047, |
| "kl_loss_3": 1172.391793823242, |
| "kl_loss_7": 304.76952667236327, |
| "learning_rate": 0.0009887414526586764, |
| "loss": 787.9819, |
| "step": 770 |
| }, |
| { |
| "ce_loss_10": 3.708216655254364, |
| "ce_loss_13": 3.625141477584839, |
| "ce_loss_2": 4.414664888381958, |
| "ce_loss_3": 4.183781635761261, |
| "ce_loss_7": 3.8081562399864195, |
| "epoch": 0.078, |
| "grad_norm": 596.0, |
| "kl_loss_10": 133.56560096740722, |
| "kl_loss_2": 1562.47041015625, |
| "kl_loss_3": 1106.445620727539, |
| "kl_loss_7": 312.0776168823242, |
| "learning_rate": 0.0009884041833294476, |
| "loss": 768.2491, |
| "step": 780 |
| }, |
| { |
| "ce_loss_10": 3.706817853450775, |
| "ce_loss_13": 3.622973358631134, |
| "ce_loss_2": 4.41116281747818, |
| "ce_loss_3": 4.179266679286957, |
| "ce_loss_7": 3.8186426639556883, |
| "epoch": 0.079, |
| "grad_norm": 632.0, |
| "kl_loss_10": 132.2478443145752, |
| "kl_loss_2": 1599.446923828125, |
| "kl_loss_3": 1117.8709930419923, |
| "kl_loss_7": 368.3747268676758, |
| "learning_rate": 0.000988061995775515, |
| "loss": 815.0693, |
| "step": 790 |
| }, |
| { |
| "ce_loss_10": 3.641828775405884, |
| "ce_loss_13": 3.5547205209732056, |
| "ce_loss_2": 4.335572981834412, |
| "ce_loss_3": 4.108006286621094, |
| "ce_loss_7": 3.7402275919914247, |
| "epoch": 0.08, |
| "grad_norm": 516.0, |
| "kl_loss_10": 141.807564163208, |
| "kl_loss_2": 1570.8703674316407, |
| "kl_loss_3": 1110.252996826172, |
| "kl_loss_7": 321.9771667480469, |
| "learning_rate": 0.0009877148934427035, |
| "loss": 786.1404, |
| "step": 800 |
| }, |
| { |
| "ce_loss_10": 3.681752073764801, |
| "ce_loss_13": 3.596014940738678, |
| "ce_loss_2": 4.380065774917602, |
| "ce_loss_3": 4.151778030395508, |
| "ce_loss_7": 3.7655294299125672, |
| "epoch": 0.081, |
| "grad_norm": 496.0, |
| "kl_loss_10": 145.9334274291992, |
| "kl_loss_2": 1572.8681091308595, |
| "kl_loss_3": 1116.675845336914, |
| "kl_loss_7": 297.05968246459963, |
| "learning_rate": 0.0009873628798263297, |
| "loss": 776.0455, |
| "step": 810 |
| }, |
| { |
| "ce_loss_10": 3.6424105167388916, |
| "ce_loss_13": 3.5447566747665404, |
| "ce_loss_2": 4.312346494197845, |
| "ce_loss_3": 4.088560962677002, |
| "ce_loss_7": 3.7104405045509337, |
| "epoch": 0.082, |
| "grad_norm": 478.0, |
| "kl_loss_10": 152.06344909667968, |
| "kl_loss_2": 1539.9718017578125, |
| "kl_loss_3": 1091.6052520751953, |
| "kl_loss_7": 286.7229400634766, |
| "learning_rate": 0.0009870059584711668, |
| "loss": 790.5065, |
| "step": 820 |
| }, |
| { |
| "ce_loss_10": 3.6575138568878174, |
| "ce_loss_13": 3.5694735765457155, |
| "ce_loss_2": 4.352469277381897, |
| "ce_loss_3": 4.124953854084015, |
| "ce_loss_7": 3.7358759164810182, |
| "epoch": 0.083, |
| "grad_norm": 516.0, |
| "kl_loss_10": 158.90749130249023, |
| "kl_loss_2": 1569.4235595703126, |
| "kl_loss_3": 1125.5340545654296, |
| "kl_loss_7": 290.7964630126953, |
| "learning_rate": 0.000986644132971409, |
| "loss": 786.8994, |
| "step": 830 |
| }, |
| { |
| "ce_loss_10": 3.6558743476867677, |
| "ce_loss_13": 3.5544149518013, |
| "ce_loss_2": 4.354631888866424, |
| "ce_loss_3": 4.1281127572059635, |
| "ce_loss_7": 3.727833020687103, |
| "epoch": 0.084, |
| "grad_norm": 576.0, |
| "kl_loss_10": 158.36446990966797, |
| "kl_loss_2": 1584.950128173828, |
| "kl_loss_3": 1138.0484100341796, |
| "kl_loss_7": 300.7915969848633, |
| "learning_rate": 0.0009862774069706345, |
| "loss": 786.4536, |
| "step": 840 |
| }, |
| { |
| "ce_loss_10": 3.7631431221961975, |
| "ce_loss_13": 3.6783902406692506, |
| "ce_loss_2": 4.423887753486634, |
| "ce_loss_3": 4.210748863220215, |
| "ce_loss_7": 3.8459392905235292, |
| "epoch": 0.085, |
| "grad_norm": 720.0, |
| "kl_loss_10": 144.1476722717285, |
| "kl_loss_2": 1526.45078125, |
| "kl_loss_3": 1098.919091796875, |
| "kl_loss_7": 305.10309143066405, |
| "learning_rate": 0.000985905784161771, |
| "loss": 773.6244, |
| "step": 850 |
| }, |
| { |
| "ce_loss_10": 3.693523097038269, |
| "ce_loss_13": 3.6117894887924193, |
| "ce_loss_2": 4.374859690666199, |
| "ce_loss_3": 4.145905554294586, |
| "ce_loss_7": 3.799845337867737, |
| "epoch": 0.086, |
| "grad_norm": 648.0, |
| "kl_loss_10": 141.55279006958008, |
| "kl_loss_2": 1548.1404724121094, |
| "kl_loss_3": 1092.3538146972655, |
| "kl_loss_7": 338.8992858886719, |
| "learning_rate": 0.000985529268287055, |
| "loss": 780.1624, |
| "step": 860 |
| }, |
| { |
| "ce_loss_10": 3.6179853677749634, |
| "ce_loss_13": 3.532400143146515, |
| "ce_loss_2": 4.3180185675621034, |
| "ce_loss_3": 4.092539095878601, |
| "ce_loss_7": 3.716568684577942, |
| "epoch": 0.087, |
| "grad_norm": 584.0, |
| "kl_loss_10": 138.25293006896973, |
| "kl_loss_2": 1583.606640625, |
| "kl_loss_3": 1113.2994171142577, |
| "kl_loss_7": 327.81214904785156, |
| "learning_rate": 0.0009851478631379982, |
| "loss": 787.4821, |
| "step": 870 |
| }, |
| { |
| "ce_loss_10": 3.6815198183059694, |
| "ce_loss_13": 3.5956546545028685, |
| "ce_loss_2": 4.367411196231842, |
| "ce_loss_3": 4.13222428560257, |
| "ce_loss_7": 3.7695237517356874, |
| "epoch": 0.088, |
| "grad_norm": 628.0, |
| "kl_loss_10": 140.43244590759278, |
| "kl_loss_2": 1545.1064147949219, |
| "kl_loss_3": 1094.267755126953, |
| "kl_loss_7": 312.3658508300781, |
| "learning_rate": 0.0009847615725553456, |
| "loss": 767.0908, |
| "step": 880 |
| }, |
| { |
| "ce_loss_10": 3.739601492881775, |
| "ce_loss_13": 3.657086157798767, |
| "ce_loss_2": 4.379729843139648, |
| "ce_loss_3": 4.177917766571045, |
| "ce_loss_7": 3.820488429069519, |
| "epoch": 0.089, |
| "grad_norm": 552.0, |
| "kl_loss_10": 134.12742614746094, |
| "kl_loss_2": 1464.5765686035156, |
| "kl_loss_3": 1051.556851196289, |
| "kl_loss_7": 283.62481689453125, |
| "learning_rate": 0.0009843704004290394, |
| "loss": 761.853, |
| "step": 890 |
| }, |
| { |
| "ce_loss_10": 3.6452771425247192, |
| "ce_loss_13": 3.5613077044487, |
| "ce_loss_2": 4.318511128425598, |
| "ce_loss_3": 4.107461535930634, |
| "ce_loss_7": 3.726675534248352, |
| "epoch": 0.09, |
| "grad_norm": 474.0, |
| "kl_loss_10": 136.06297454833984, |
| "kl_loss_2": 1542.6724487304687, |
| "kl_loss_3": 1117.772933959961, |
| "kl_loss_7": 292.2666213989258, |
| "learning_rate": 0.0009839743506981783, |
| "loss": 768.8108, |
| "step": 900 |
| }, |
| { |
| "ce_loss_10": 3.5574649572372437, |
| "ce_loss_13": 3.4748517513275146, |
| "ce_loss_2": 4.266572868824005, |
| "ce_loss_3": 4.057099211215973, |
| "ce_loss_7": 3.6422529578208924, |
| "epoch": 0.091, |
| "grad_norm": 516.0, |
| "kl_loss_10": 139.13952560424804, |
| "kl_loss_2": 1603.9869201660156, |
| "kl_loss_3": 1170.3635620117188, |
| "kl_loss_7": 298.2760665893555, |
| "learning_rate": 0.0009835734273509786, |
| "loss": 783.7168, |
| "step": 910 |
| }, |
| { |
| "ce_loss_10": 3.6700770974159242, |
| "ce_loss_13": 3.5813122153282166, |
| "ce_loss_2": 4.351845908164978, |
| "ce_loss_3": 4.139319920539856, |
| "ce_loss_7": 3.7498608589172364, |
| "epoch": 0.092, |
| "grad_norm": 516.0, |
| "kl_loss_10": 139.36617164611818, |
| "kl_loss_2": 1526.7721801757812, |
| "kl_loss_3": 1107.183511352539, |
| "kl_loss_7": 287.28514404296874, |
| "learning_rate": 0.0009831676344247342, |
| "loss": 768.4225, |
| "step": 920 |
| }, |
| { |
| "ce_loss_10": 3.684238874912262, |
| "ce_loss_13": 3.6015963315963746, |
| "ce_loss_2": 4.3427834749221805, |
| "ce_loss_3": 4.138106441497802, |
| "ce_loss_7": 3.75754714012146, |
| "epoch": 0.093, |
| "grad_norm": 490.0, |
| "kl_loss_10": 135.07495460510253, |
| "kl_loss_2": 1516.6379028320312, |
| "kl_loss_3": 1094.0326538085938, |
| "kl_loss_7": 277.64155731201174, |
| "learning_rate": 0.0009827569760057755, |
| "loss": 762.3584, |
| "step": 930 |
| }, |
| { |
| "ce_loss_10": 3.5946595072746277, |
| "ce_loss_13": 3.512081265449524, |
| "ce_loss_2": 4.322237813472748, |
| "ce_loss_3": 4.095906281471253, |
| "ce_loss_7": 3.6798322200775146, |
| "epoch": 0.094, |
| "grad_norm": 728.0, |
| "kl_loss_10": 138.28199310302733, |
| "kl_loss_2": 1619.1793823242188, |
| "kl_loss_3": 1165.3315551757812, |
| "kl_loss_7": 295.293204498291, |
| "learning_rate": 0.000982341456229428, |
| "loss": 780.917, |
| "step": 940 |
| }, |
| { |
| "ce_loss_10": 3.69069162607193, |
| "ce_loss_13": 3.6100045323371885, |
| "ce_loss_2": 4.376732325553894, |
| "ce_loss_3": 4.16404242515564, |
| "ce_loss_7": 3.7701812386512756, |
| "epoch": 0.095, |
| "grad_norm": 688.0, |
| "kl_loss_10": 131.1420455932617, |
| "kl_loss_2": 1575.732354736328, |
| "kl_loss_3": 1138.4372924804688, |
| "kl_loss_7": 285.67282180786134, |
| "learning_rate": 0.000981921079279971, |
| "loss": 765.979, |
| "step": 950 |
| }, |
| { |
| "ce_loss_10": 3.7074394822120667, |
| "ce_loss_13": 3.62913464307785, |
| "ce_loss_2": 4.366938805580139, |
| "ce_loss_3": 4.150120985507965, |
| "ce_loss_7": 3.7818633675575257, |
| "epoch": 0.096, |
| "grad_norm": 720.0, |
| "kl_loss_10": 130.51903839111327, |
| "kl_loss_2": 1507.3517028808594, |
| "kl_loss_3": 1076.092593383789, |
| "kl_loss_7": 272.2766448974609, |
| "learning_rate": 0.0009814958493905962, |
| "loss": 753.6946, |
| "step": 960 |
| }, |
| { |
| "ce_loss_10": 3.658416414260864, |
| "ce_loss_13": 3.576970672607422, |
| "ce_loss_2": 4.346470355987549, |
| "ce_loss_3": 4.128688275814056, |
| "ce_loss_7": 3.7415476202964784, |
| "epoch": 0.097, |
| "grad_norm": 512.0, |
| "kl_loss_10": 128.56299629211426, |
| "kl_loss_2": 1557.0646423339845, |
| "kl_loss_3": 1112.28828125, |
| "kl_loss_7": 279.6500648498535, |
| "learning_rate": 0.0009810657708433637, |
| "loss": 775.217, |
| "step": 970 |
| }, |
| { |
| "ce_loss_10": 3.7308164954185488, |
| "ce_loss_13": 3.6533005952835085, |
| "ce_loss_2": 4.3734122037887575, |
| "ce_loss_3": 4.170846402645111, |
| "ce_loss_7": 3.8050424695014953, |
| "epoch": 0.098, |
| "grad_norm": 716.0, |
| "kl_loss_10": 124.60902214050293, |
| "kl_loss_2": 1475.1663879394532, |
| "kl_loss_3": 1054.8542236328126, |
| "kl_loss_7": 269.9375114440918, |
| "learning_rate": 0.0009806308479691594, |
| "loss": 736.7519, |
| "step": 980 |
| }, |
| { |
| "ce_loss_10": 3.750465714931488, |
| "ce_loss_13": 3.668341946601868, |
| "ce_loss_2": 4.426263308525085, |
| "ce_loss_3": 4.20258377790451, |
| "ce_loss_7": 3.836391198635101, |
| "epoch": 0.099, |
| "grad_norm": 644.0, |
| "kl_loss_10": 131.81643409729003, |
| "kl_loss_2": 1535.673388671875, |
| "kl_loss_3": 1090.963656616211, |
| "kl_loss_7": 289.6497604370117, |
| "learning_rate": 0.0009801910851476522, |
| "loss": 754.2551, |
| "step": 990 |
| }, |
| { |
| "ce_loss_10": 3.653952169418335, |
| "ce_loss_13": 3.577095854282379, |
| "ce_loss_2": 4.349191665649414, |
| "ce_loss_3": 4.125085318088532, |
| "ce_loss_7": 3.7413162350654603, |
| "epoch": 0.1, |
| "grad_norm": 478.0, |
| "kl_loss_10": 128.62464637756347, |
| "kl_loss_2": 1573.8733642578125, |
| "kl_loss_3": 1114.0891967773437, |
| "kl_loss_7": 292.6377975463867, |
| "learning_rate": 0.0009797464868072487, |
| "loss": 758.6713, |
| "step": 1000 |
| }, |
| { |
| "ce_loss_10": 3.6456503033638, |
| "ce_loss_13": 3.5667870163917543, |
| "ce_loss_2": 4.3237790822982785, |
| "ce_loss_3": 4.11080631017685, |
| "ce_loss_7": 3.7275813579559327, |
| "epoch": 0.101, |
| "grad_norm": 432.0, |
| "kl_loss_10": 128.03596534729004, |
| "kl_loss_2": 1525.84384765625, |
| "kl_loss_3": 1094.1039764404297, |
| "kl_loss_7": 291.2514984130859, |
| "learning_rate": 0.0009792970574250492, |
| "loss": 758.2494, |
| "step": 1010 |
| }, |
| { |
| "ce_loss_10": 3.677238702774048, |
| "ce_loss_13": 3.597763466835022, |
| "ce_loss_2": 4.345702481269837, |
| "ce_loss_3": 4.1323373198509215, |
| "ce_loss_7": 3.757745099067688, |
| "epoch": 0.102, |
| "grad_norm": 480.0, |
| "kl_loss_10": 126.84439620971679, |
| "kl_loss_2": 1518.9351745605468, |
| "kl_loss_3": 1090.8279510498046, |
| "kl_loss_7": 289.6885223388672, |
| "learning_rate": 0.0009788428015268028, |
| "loss": 746.4768, |
| "step": 1020 |
| }, |
| { |
| "ce_loss_10": 3.670746088027954, |
| "ce_loss_13": 3.5901795506477354, |
| "ce_loss_2": 4.326242756843567, |
| "ce_loss_3": 4.110077440738678, |
| "ce_loss_7": 3.7697238445281984, |
| "epoch": 0.103, |
| "grad_norm": 520.0, |
| "kl_loss_10": 147.23381576538085, |
| "kl_loss_2": 1500.6592041015624, |
| "kl_loss_3": 1064.419287109375, |
| "kl_loss_7": 309.967301940918, |
| "learning_rate": 0.0009783837236868609, |
| "loss": 752.1227, |
| "step": 1030 |
| }, |
| { |
| "ce_loss_10": 3.665172076225281, |
| "ce_loss_13": 3.559502327442169, |
| "ce_loss_2": 4.309127068519592, |
| "ce_loss_3": 4.0925112009048465, |
| "ce_loss_7": 3.730722725391388, |
| "epoch": 0.104, |
| "grad_norm": 624.0, |
| "kl_loss_10": 168.995276260376, |
| "kl_loss_2": 1506.7355590820312, |
| "kl_loss_3": 1077.4224884033204, |
| "kl_loss_7": 306.44668807983396, |
| "learning_rate": 0.0009779198285281327, |
| "loss": 758.6978, |
| "step": 1040 |
| }, |
| { |
| "ce_loss_10": 3.6450916528701782, |
| "ce_loss_13": 3.5567120909690857, |
| "ce_loss_2": 4.307799768447876, |
| "ce_loss_3": 4.096536159515381, |
| "ce_loss_7": 3.7174035549163817, |
| "epoch": 0.105, |
| "grad_norm": 464.0, |
| "kl_loss_10": 145.4011459350586, |
| "kl_loss_2": 1511.9253051757812, |
| "kl_loss_3": 1079.955551147461, |
| "kl_loss_7": 290.32603912353517, |
| "learning_rate": 0.0009774511207220368, |
| "loss": 751.4335, |
| "step": 1050 |
| }, |
| { |
| "ce_loss_10": 3.6726208686828614, |
| "ce_loss_13": 3.5870786190032957, |
| "ce_loss_2": 4.340760517120361, |
| "ce_loss_3": 4.122452509403229, |
| "ce_loss_7": 3.7611562490463255, |
| "epoch": 0.106, |
| "grad_norm": 516.0, |
| "kl_loss_10": 146.77743186950684, |
| "kl_loss_2": 1523.6993774414063, |
| "kl_loss_3": 1080.6748168945312, |
| "kl_loss_7": 305.8709197998047, |
| "learning_rate": 0.0009769776049884564, |
| "loss": 759.1102, |
| "step": 1060 |
| }, |
| { |
| "ce_loss_10": 3.5789316415786745, |
| "ce_loss_13": 3.4973001360893248, |
| "ce_loss_2": 4.2655829906463625, |
| "ce_loss_3": 4.0485687255859375, |
| "ce_loss_7": 3.664482927322388, |
| "epoch": 0.107, |
| "grad_norm": 512.0, |
| "kl_loss_10": 138.50279579162597, |
| "kl_loss_2": 1555.6201232910157, |
| "kl_loss_3": 1112.0815826416015, |
| "kl_loss_7": 305.1489685058594, |
| "learning_rate": 0.0009764992860956889, |
| "loss": 779.55, |
| "step": 1070 |
| }, |
| { |
| "ce_loss_10": 3.7416428446769716, |
| "ce_loss_13": 3.6618621706962586, |
| "ce_loss_2": 4.364235496520996, |
| "ce_loss_3": 4.161127758026123, |
| "ce_loss_7": 3.8312565684318542, |
| "epoch": 0.108, |
| "grad_norm": 612.0, |
| "kl_loss_10": 132.17280654907228, |
| "kl_loss_2": 1434.9408752441407, |
| "kl_loss_3": 1021.1740997314453, |
| "kl_loss_7": 306.5512954711914, |
| "learning_rate": 0.0009760161688604008, |
| "loss": 729.6794, |
| "step": 1080 |
| }, |
| { |
| "ce_loss_10": 3.74602724313736, |
| "ce_loss_13": 3.6610143184661865, |
| "ce_loss_2": 4.390218591690063, |
| "ce_loss_3": 4.1842693328857425, |
| "ce_loss_7": 3.8411784768104553, |
| "epoch": 0.109, |
| "grad_norm": 576.0, |
| "kl_loss_10": 133.29356536865234, |
| "kl_loss_2": 1472.9822204589843, |
| "kl_loss_3": 1051.5467559814454, |
| "kl_loss_7": 310.2565521240234, |
| "learning_rate": 0.0009755282581475768, |
| "loss": 747.7812, |
| "step": 1090 |
| }, |
| { |
| "ce_loss_10": 3.801929402351379, |
| "ce_loss_13": 3.715673303604126, |
| "ce_loss_2": 4.428924131393432, |
| "ce_loss_3": 4.217723715305328, |
| "ce_loss_7": 3.886538052558899, |
| "epoch": 0.11, |
| "grad_norm": 552.0, |
| "kl_loss_10": 141.27199668884276, |
| "kl_loss_2": 1455.4054565429688, |
| "kl_loss_3": 1032.206768798828, |
| "kl_loss_7": 311.90191345214845, |
| "learning_rate": 0.0009750355588704727, |
| "loss": 730.8727, |
| "step": 1100 |
| }, |
| { |
| "ce_loss_10": 3.6245179295539858, |
| "ce_loss_13": 3.5434940338134764, |
| "ce_loss_2": 4.285040807723999, |
| "ce_loss_3": 4.064236760139465, |
| "ce_loss_7": 3.722900152206421, |
| "epoch": 0.111, |
| "grad_norm": 536.0, |
| "kl_loss_10": 128.93951110839845, |
| "kl_loss_2": 1479.008282470703, |
| "kl_loss_3": 1042.542987060547, |
| "kl_loss_7": 309.4350082397461, |
| "learning_rate": 0.0009745380759905647, |
| "loss": 755.6506, |
| "step": 1110 |
| }, |
| { |
| "ce_loss_10": 3.5733557820320128, |
| "ce_loss_13": 3.497096002101898, |
| "ce_loss_2": 4.2416357636451725, |
| "ce_loss_3": 4.02953668832779, |
| "ce_loss_7": 3.6843501210212706, |
| "epoch": 0.112, |
| "grad_norm": 584.0, |
| "kl_loss_10": 128.8479259490967, |
| "kl_loss_2": 1501.6354125976563, |
| "kl_loss_3": 1078.8607055664063, |
| "kl_loss_7": 309.57103271484374, |
| "learning_rate": 0.0009740358145174998, |
| "loss": 782.7103, |
| "step": 1120 |
| }, |
| { |
| "ce_loss_10": 3.7390747904777526, |
| "ce_loss_13": 3.654650056362152, |
| "ce_loss_2": 4.359592080116272, |
| "ce_loss_3": 4.165994334220886, |
| "ce_loss_7": 3.8400262117385866, |
| "epoch": 0.113, |
| "grad_norm": 434.0, |
| "kl_loss_10": 134.35130157470704, |
| "kl_loss_2": 1442.04345703125, |
| "kl_loss_3": 1051.9576324462892, |
| "kl_loss_7": 334.60899047851564, |
| "learning_rate": 0.0009735287795090455, |
| "loss": 747.7461, |
| "step": 1130 |
| }, |
| { |
| "ce_loss_10": 3.6206952929496765, |
| "ce_loss_13": 3.5408340215682985, |
| "ce_loss_2": 4.278374576568604, |
| "ce_loss_3": 4.073013770580292, |
| "ce_loss_7": 3.709249567985535, |
| "epoch": 0.114, |
| "grad_norm": 560.0, |
| "kl_loss_10": 129.40065841674806, |
| "kl_loss_2": 1489.2802490234376, |
| "kl_loss_3": 1078.2224548339843, |
| "kl_loss_7": 308.7394744873047, |
| "learning_rate": 0.0009730169760710386, |
| "loss": 743.8783, |
| "step": 1140 |
| }, |
| { |
| "ce_loss_10": 3.7078137516975405, |
| "ce_loss_13": 3.6258071303367614, |
| "ce_loss_2": 4.352467465400696, |
| "ce_loss_3": 4.14322521686554, |
| "ce_loss_7": 3.793818712234497, |
| "epoch": 0.115, |
| "grad_norm": 532.0, |
| "kl_loss_10": 132.82089805603027, |
| "kl_loss_2": 1462.5518798828125, |
| "kl_loss_3": 1047.3691467285157, |
| "kl_loss_7": 303.57424392700193, |
| "learning_rate": 0.0009725004093573342, |
| "loss": 741.0269, |
| "step": 1150 |
| }, |
| { |
| "ce_loss_10": 3.641883647441864, |
| "ce_loss_13": 3.5618484139442446, |
| "ce_loss_2": 4.298850560188294, |
| "ce_loss_3": 4.0857291460037235, |
| "ce_loss_7": 3.732511842250824, |
| "epoch": 0.116, |
| "grad_norm": 500.0, |
| "kl_loss_10": 125.74126281738282, |
| "kl_loss_2": 1472.2821716308595, |
| "kl_loss_3": 1051.9892150878907, |
| "kl_loss_7": 293.47923736572267, |
| "learning_rate": 0.0009719790845697534, |
| "loss": 730.1605, |
| "step": 1160 |
| }, |
| { |
| "ce_loss_10": 3.588691568374634, |
| "ce_loss_13": 3.514021909236908, |
| "ce_loss_2": 4.223182845115661, |
| "ce_loss_3": 4.0243830442428585, |
| "ce_loss_7": 3.668225383758545, |
| "epoch": 0.117, |
| "grad_norm": 544.0, |
| "kl_loss_10": 118.70261917114257, |
| "kl_loss_2": 1445.7062133789063, |
| "kl_loss_3": 1032.7508636474608, |
| "kl_loss_7": 274.055322265625, |
| "learning_rate": 0.0009714530069580309, |
| "loss": 718.2419, |
| "step": 1170 |
| }, |
| { |
| "ce_loss_10": 3.6957285404205322, |
| "ce_loss_13": 3.618162250518799, |
| "ce_loss_2": 4.352480411529541, |
| "ce_loss_3": 4.145036590099335, |
| "ce_loss_7": 3.7782084584236144, |
| "epoch": 0.118, |
| "grad_norm": 536.0, |
| "kl_loss_10": 127.76230659484864, |
| "kl_loss_2": 1480.515966796875, |
| "kl_loss_3": 1059.5112030029297, |
| "kl_loss_7": 282.41261138916013, |
| "learning_rate": 0.0009709221818197624, |
| "loss": 734.455, |
| "step": 1180 |
| }, |
| { |
| "ce_loss_10": 3.721509063243866, |
| "ce_loss_13": 3.6461830377578734, |
| "ce_loss_2": 4.384591698646545, |
| "ce_loss_3": 4.175746941566468, |
| "ce_loss_7": 3.804957926273346, |
| "epoch": 0.119, |
| "grad_norm": 454.0, |
| "kl_loss_10": 121.90502281188965, |
| "kl_loss_2": 1485.071533203125, |
| "kl_loss_3": 1060.364013671875, |
| "kl_loss_7": 273.83970947265624, |
| "learning_rate": 0.0009703866145003512, |
| "loss": 735.9141, |
| "step": 1190 |
| }, |
| { |
| "ce_loss_10": 3.6931097984313963, |
| "ce_loss_13": 3.618978762626648, |
| "ce_loss_2": 4.338696074485779, |
| "ce_loss_3": 4.131911754608154, |
| "ce_loss_7": 3.771903729438782, |
| "epoch": 0.12, |
| "grad_norm": 404.0, |
| "kl_loss_10": 117.82418823242188, |
| "kl_loss_2": 1472.2267517089845, |
| "kl_loss_3": 1051.9043701171875, |
| "kl_loss_7": 267.1518127441406, |
| "learning_rate": 0.0009698463103929542, |
| "loss": 740.661, |
| "step": 1200 |
| }, |
| { |
| "ce_loss_10": 3.658799970149994, |
| "ce_loss_13": 3.5842487812042236, |
| "ce_loss_2": 4.312227940559387, |
| "ce_loss_3": 4.10740053653717, |
| "ce_loss_7": 3.7385629415512085, |
| "epoch": 0.121, |
| "grad_norm": 412.0, |
| "kl_loss_10": 122.52205390930176, |
| "kl_loss_2": 1466.4009826660156, |
| "kl_loss_3": 1056.7593078613281, |
| "kl_loss_7": 272.06723709106444, |
| "learning_rate": 0.0009693012749384279, |
| "loss": 737.0117, |
| "step": 1210 |
| }, |
| { |
| "ce_loss_10": 3.679182291030884, |
| "ce_loss_13": 3.6015621542930605, |
| "ce_loss_2": 4.328339552879333, |
| "ce_loss_3": 4.114730060100555, |
| "ce_loss_7": 3.7581299543380737, |
| "epoch": 0.122, |
| "grad_norm": 500.0, |
| "kl_loss_10": 124.13164978027343, |
| "kl_loss_2": 1486.9410522460937, |
| "kl_loss_3": 1053.6353607177734, |
| "kl_loss_7": 279.761865234375, |
| "learning_rate": 0.0009687515136252732, |
| "loss": 728.5778, |
| "step": 1220 |
| }, |
| { |
| "ce_loss_10": 3.6272791743278505, |
| "ce_loss_13": 3.55220046043396, |
| "ce_loss_2": 4.301675605773926, |
| "ce_loss_3": 4.08597983121872, |
| "ce_loss_7": 3.70781672000885, |
| "epoch": 0.123, |
| "grad_norm": 568.0, |
| "kl_loss_10": 121.01997566223145, |
| "kl_loss_2": 1522.73505859375, |
| "kl_loss_3": 1086.6077362060546, |
| "kl_loss_7": 279.3069206237793, |
| "learning_rate": 0.0009681970319895803, |
| "loss": 759.7192, |
| "step": 1230 |
| }, |
| { |
| "ce_loss_10": 3.71327965259552, |
| "ce_loss_13": 3.6385520815849306, |
| "ce_loss_2": 4.354242825508118, |
| "ce_loss_3": 4.150368654727936, |
| "ce_loss_7": 3.7916497707366945, |
| "epoch": 0.124, |
| "grad_norm": 414.0, |
| "kl_loss_10": 124.44540634155274, |
| "kl_loss_2": 1443.4426513671874, |
| "kl_loss_3": 1030.0522674560548, |
| "kl_loss_7": 268.6724395751953, |
| "learning_rate": 0.0009676378356149733, |
| "loss": 722.6414, |
| "step": 1240 |
| }, |
| { |
| "ce_loss_10": 3.6944294214248656, |
| "ce_loss_13": 3.6130942940711974, |
| "ce_loss_2": 4.31483553647995, |
| "ce_loss_3": 4.113047087192536, |
| "ce_loss_7": 3.7628474831581116, |
| "epoch": 0.125, |
| "grad_norm": 572.0, |
| "kl_loss_10": 133.05949897766112, |
| "kl_loss_2": 1434.6165405273437, |
| "kl_loss_3": 1024.405093383789, |
| "kl_loss_7": 265.9820045471191, |
| "learning_rate": 0.0009670739301325534, |
| "loss": 721.2149, |
| "step": 1250 |
| }, |
| { |
| "ce_loss_10": 3.6491236448287965, |
| "ce_loss_13": 3.5683398127555845, |
| "ce_loss_2": 4.294895899295807, |
| "ce_loss_3": 4.082043838500977, |
| "ce_loss_7": 3.721854901313782, |
| "epoch": 0.126, |
| "grad_norm": 506.0, |
| "kl_loss_10": 130.2590259552002, |
| "kl_loss_2": 1460.9762817382812, |
| "kl_loss_3": 1047.9487213134767, |
| "kl_loss_7": 271.88867340087893, |
| "learning_rate": 0.0009665053212208426, |
| "loss": 732.2017, |
| "step": 1260 |
| }, |
| { |
| "ce_loss_10": 3.6933886647224425, |
| "ce_loss_13": 3.6137840390205382, |
| "ce_loss_2": 4.336398506164551, |
| "ce_loss_3": 4.125988566875458, |
| "ce_loss_7": 3.7641473054885863, |
| "epoch": 0.127, |
| "grad_norm": 470.0, |
| "kl_loss_10": 131.11599006652833, |
| "kl_loss_2": 1466.260760498047, |
| "kl_loss_3": 1047.477099609375, |
| "kl_loss_7": 271.31814041137693, |
| "learning_rate": 0.0009659320146057262, |
| "loss": 729.9061, |
| "step": 1270 |
| }, |
| { |
| "ce_loss_10": 3.6932409524917604, |
| "ce_loss_13": 3.6162060022354128, |
| "ce_loss_2": 4.326151037216187, |
| "ce_loss_3": 4.118873739242554, |
| "ce_loss_7": 3.7665857672691345, |
| "epoch": 0.128, |
| "grad_norm": 488.0, |
| "kl_loss_10": 126.3920768737793, |
| "kl_loss_2": 1439.7459106445312, |
| "kl_loss_3": 1023.4634368896484, |
| "kl_loss_7": 263.45100021362305, |
| "learning_rate": 0.0009653540160603955, |
| "loss": 714.3654, |
| "step": 1280 |
| }, |
| { |
| "ce_loss_10": 3.695625138282776, |
| "ce_loss_13": 3.619100844860077, |
| "ce_loss_2": 4.322635555267334, |
| "ce_loss_3": 4.121702527999878, |
| "ce_loss_7": 3.764703559875488, |
| "epoch": 0.129, |
| "grad_norm": 516.0, |
| "kl_loss_10": 125.06153717041016, |
| "kl_loss_2": 1449.9097961425782, |
| "kl_loss_3": 1036.939111328125, |
| "kl_loss_7": 261.42085418701174, |
| "learning_rate": 0.0009647713314052896, |
| "loss": 709.775, |
| "step": 1290 |
| }, |
| { |
| "ce_loss_10": 3.645713412761688, |
| "ce_loss_13": 3.5693684458732604, |
| "ce_loss_2": 4.318705654144287, |
| "ce_loss_3": 4.105780220031738, |
| "ce_loss_7": 3.721473240852356, |
| "epoch": 0.13, |
| "grad_norm": 504.0, |
| "kl_loss_10": 125.77756729125977, |
| "kl_loss_2": 1515.6605834960938, |
| "kl_loss_3": 1082.791098022461, |
| "kl_loss_7": 268.05779418945315, |
| "learning_rate": 0.0009641839665080363, |
| "loss": 739.9956, |
| "step": 1300 |
| }, |
| { |
| "ce_loss_10": 3.6060986638069155, |
| "ce_loss_13": 3.532057249546051, |
| "ce_loss_2": 4.258748412132263, |
| "ce_loss_3": 4.044409060478211, |
| "ce_loss_7": 3.6810790419578554, |
| "epoch": 0.131, |
| "grad_norm": 576.0, |
| "kl_loss_10": 120.33845672607421, |
| "kl_loss_2": 1464.5880249023437, |
| "kl_loss_3": 1035.7476196289062, |
| "kl_loss_7": 258.05460357666016, |
| "learning_rate": 0.0009635919272833937, |
| "loss": 712.5358, |
| "step": 1310 |
| }, |
| { |
| "ce_loss_10": 3.6437799096107484, |
| "ce_loss_13": 3.567954385280609, |
| "ce_loss_2": 4.29961267709732, |
| "ce_loss_3": 4.091295349597931, |
| "ce_loss_7": 3.7204660773277283, |
| "epoch": 0.132, |
| "grad_norm": 520.0, |
| "kl_loss_10": 123.54525375366211, |
| "kl_loss_2": 1460.5380920410157, |
| "kl_loss_3": 1039.971875, |
| "kl_loss_7": 264.77962188720704, |
| "learning_rate": 0.0009629952196931902, |
| "loss": 712.4777, |
| "step": 1320 |
| }, |
| { |
| "ce_loss_10": 3.63455046415329, |
| "ce_loss_13": 3.557650101184845, |
| "ce_loss_2": 4.270671212673188, |
| "ce_loss_3": 4.062439024448395, |
| "ce_loss_7": 3.702920150756836, |
| "epoch": 0.133, |
| "grad_norm": 434.0, |
| "kl_loss_10": 123.04212112426758, |
| "kl_loss_2": 1444.6365600585937, |
| "kl_loss_3": 1028.4689849853517, |
| "kl_loss_7": 258.92202911376955, |
| "learning_rate": 0.0009623938497462645, |
| "loss": 713.1292, |
| "step": 1330 |
| }, |
| { |
| "ce_loss_10": 3.6247922778129578, |
| "ce_loss_13": 3.5494427919387816, |
| "ce_loss_2": 4.2690158009529116, |
| "ce_loss_3": 4.058642566204071, |
| "ce_loss_7": 3.6972993493080137, |
| "epoch": 0.134, |
| "grad_norm": 478.0, |
| "kl_loss_10": 120.75489349365235, |
| "kl_loss_2": 1456.6053527832032, |
| "kl_loss_3": 1037.5501190185546, |
| "kl_loss_7": 266.06713485717773, |
| "learning_rate": 0.0009617878234984055, |
| "loss": 726.2297, |
| "step": 1340 |
| }, |
| { |
| "ce_loss_10": 3.717451739311218, |
| "ce_loss_13": 3.642001247406006, |
| "ce_loss_2": 4.3319720983505245, |
| "ce_loss_3": 4.123037731647491, |
| "ce_loss_7": 3.790008616447449, |
| "epoch": 0.135, |
| "grad_norm": 548.0, |
| "kl_loss_10": 120.84955863952636, |
| "kl_loss_2": 1400.965509033203, |
| "kl_loss_3": 989.1194274902343, |
| "kl_loss_7": 260.0563507080078, |
| "learning_rate": 0.0009611771470522907, |
| "loss": 704.2836, |
| "step": 1350 |
| }, |
| { |
| "ce_loss_10": 3.6397268891334535, |
| "ce_loss_13": 3.565677487850189, |
| "ce_loss_2": 4.285844933986664, |
| "ce_loss_3": 4.075031089782715, |
| "ce_loss_7": 3.7177743196487425, |
| "epoch": 0.136, |
| "grad_norm": 548.0, |
| "kl_loss_10": 119.19244728088378, |
| "kl_loss_2": 1430.9075134277343, |
| "kl_loss_3": 1014.8560333251953, |
| "kl_loss_7": 264.9954383850098, |
| "learning_rate": 0.0009605618265574251, |
| "loss": 706.0607, |
| "step": 1360 |
| }, |
| { |
| "ce_loss_10": 3.6019657135009764, |
| "ce_loss_13": 3.5283800959587097, |
| "ce_loss_2": 4.247595989704132, |
| "ce_loss_3": 4.0493292808532715, |
| "ce_loss_7": 3.682861661911011, |
| "epoch": 0.137, |
| "grad_norm": 544.0, |
| "kl_loss_10": 120.26506729125977, |
| "kl_loss_2": 1482.0683837890624, |
| "kl_loss_3": 1078.6839630126954, |
| "kl_loss_7": 272.45921630859374, |
| "learning_rate": 0.0009599418682100792, |
| "loss": 727.2132, |
| "step": 1370 |
| }, |
| { |
| "ce_loss_10": 3.645335590839386, |
| "ce_loss_13": 3.570277786254883, |
| "ce_loss_2": 4.2866430401802065, |
| "ce_loss_3": 4.071622550487518, |
| "ce_loss_7": 3.717624640464783, |
| "epoch": 0.138, |
| "grad_norm": 612.0, |
| "kl_loss_10": 119.49271163940429, |
| "kl_loss_2": 1442.7004455566407, |
| "kl_loss_3": 1025.406283569336, |
| "kl_loss_7": 261.58585968017576, |
| "learning_rate": 0.0009593172782532268, |
| "loss": 717.2026, |
| "step": 1380 |
| }, |
| { |
| "ce_loss_10": 3.6908539175987243, |
| "ce_loss_13": 3.617784011363983, |
| "ce_loss_2": 4.313388335704803, |
| "ce_loss_3": 4.114008998870849, |
| "ce_loss_7": 3.7639716506004333, |
| "epoch": 0.139, |
| "grad_norm": 476.0, |
| "kl_loss_10": 120.62876358032227, |
| "kl_loss_2": 1423.3504333496094, |
| "kl_loss_3": 1012.8270812988281, |
| "kl_loss_7": 261.68493881225584, |
| "learning_rate": 0.0009586880629764817, |
| "loss": 706.5565, |
| "step": 1390 |
| }, |
| { |
| "ce_loss_10": 3.61561758518219, |
| "ce_loss_13": 3.5403517365455626, |
| "ce_loss_2": 4.258929216861725, |
| "ce_loss_3": 4.060744059085846, |
| "ce_loss_7": 3.687643599510193, |
| "epoch": 0.14, |
| "grad_norm": 792.0, |
| "kl_loss_10": 120.44653511047363, |
| "kl_loss_2": 1437.7734375, |
| "kl_loss_3": 1068.6304412841796, |
| "kl_loss_7": 272.7257308959961, |
| "learning_rate": 0.0009580542287160348, |
| "loss": 716.5157, |
| "step": 1400 |
| }, |
| { |
| "ce_loss_10": 3.579881501197815, |
| "ce_loss_13": 3.505436861515045, |
| "ce_loss_2": 4.2163320779800415, |
| "ce_loss_3": 4.016775751113892, |
| "ce_loss_7": 3.6602004766464233, |
| "epoch": 0.141, |
| "grad_norm": 740.0, |
| "kl_loss_10": 119.12874908447266, |
| "kl_loss_2": 1437.7877868652345, |
| "kl_loss_3": 1029.927035522461, |
| "kl_loss_7": 274.3121276855469, |
| "learning_rate": 0.0009574157818545901, |
| "loss": 704.4754, |
| "step": 1410 |
| }, |
| { |
| "ce_loss_10": 3.654617667198181, |
| "ce_loss_13": 3.581939327716827, |
| "ce_loss_2": 4.266410648822784, |
| "ce_loss_3": 4.076312291622162, |
| "ce_loss_7": 3.7350067377090452, |
| "epoch": 0.142, |
| "grad_norm": 788.0, |
| "kl_loss_10": 117.25881233215333, |
| "kl_loss_2": 1402.042706298828, |
| "kl_loss_3": 1008.1144165039062, |
| "kl_loss_7": 268.0317886352539, |
| "learning_rate": 0.0009567727288213005, |
| "loss": 712.509, |
| "step": 1420 |
| }, |
| { |
| "ce_loss_10": 3.62344468832016, |
| "ce_loss_13": 3.552217972278595, |
| "ce_loss_2": 4.242154741287232, |
| "ce_loss_3": 4.047549939155578, |
| "ce_loss_7": 3.6985828638076783, |
| "epoch": 0.143, |
| "grad_norm": 466.0, |
| "kl_loss_10": 115.47184524536132, |
| "kl_loss_2": 1418.5062316894532, |
| "kl_loss_3": 1018.4080627441406, |
| "kl_loss_7": 270.1881278991699, |
| "learning_rate": 0.0009561250760917027, |
| "loss": 702.7143, |
| "step": 1430 |
| }, |
| { |
| "ce_loss_10": 3.6490369558334352, |
| "ce_loss_13": 3.5760287642478943, |
| "ce_loss_2": 4.275133848190308, |
| "ce_loss_3": 4.07141832113266, |
| "ce_loss_7": 3.7269250392913817, |
| "epoch": 0.144, |
| "grad_norm": 524.0, |
| "kl_loss_10": 119.90954666137695, |
| "kl_loss_2": 1441.103936767578, |
| "kl_loss_3": 1028.5968627929688, |
| "kl_loss_7": 267.66681442260744, |
| "learning_rate": 0.0009554728301876525, |
| "loss": 698.2885, |
| "step": 1440 |
| }, |
| { |
| "ce_loss_10": 3.7067930936813354, |
| "ce_loss_13": 3.629922258853912, |
| "ce_loss_2": 4.314408445358277, |
| "ce_loss_3": 4.132321739196778, |
| "ce_loss_7": 3.7809135794639586, |
| "epoch": 0.145, |
| "grad_norm": 632.0, |
| "kl_loss_10": 122.64454803466796, |
| "kl_loss_2": 1398.767156982422, |
| "kl_loss_3": 1023.6964874267578, |
| "kl_loss_7": 262.7124481201172, |
| "learning_rate": 0.0009548159976772592, |
| "loss": 721.9051, |
| "step": 1450 |
| }, |
| { |
| "ce_loss_10": 3.641107952594757, |
| "ce_loss_13": 3.5679691076278686, |
| "ce_loss_2": 4.273185658454895, |
| "ce_loss_3": 4.074773287773132, |
| "ce_loss_7": 3.715712809562683, |
| "epoch": 0.146, |
| "grad_norm": 472.0, |
| "kl_loss_10": 119.45023498535156, |
| "kl_loss_2": 1426.5163330078126, |
| "kl_loss_3": 1022.3043518066406, |
| "kl_loss_7": 264.85511245727537, |
| "learning_rate": 0.0009541545851748186, |
| "loss": 702.8599, |
| "step": 1460 |
| }, |
| { |
| "ce_loss_10": 3.5100984811782836, |
| "ce_loss_13": 3.436799693107605, |
| "ce_loss_2": 4.161883985996246, |
| "ce_loss_3": 3.955858516693115, |
| "ce_loss_7": 3.594360911846161, |
| "epoch": 0.147, |
| "grad_norm": 556.0, |
| "kl_loss_10": 116.41493873596191, |
| "kl_loss_2": 1467.1879943847657, |
| "kl_loss_3": 1037.354574584961, |
| "kl_loss_7": 266.5866645812988, |
| "learning_rate": 0.0009534885993407473, |
| "loss": 713.4948, |
| "step": 1470 |
| }, |
| { |
| "ce_loss_10": 3.6824231266975405, |
| "ce_loss_13": 3.608121466636658, |
| "ce_loss_2": 4.328725492954254, |
| "ce_loss_3": 4.115788686275482, |
| "ce_loss_7": 3.755172336101532, |
| "epoch": 0.148, |
| "grad_norm": 560.0, |
| "kl_loss_10": 118.46903686523437, |
| "kl_loss_2": 1448.8276794433593, |
| "kl_loss_3": 1029.914727783203, |
| "kl_loss_7": 263.37538986206056, |
| "learning_rate": 0.0009528180468815154, |
| "loss": 714.8544, |
| "step": 1480 |
| }, |
| { |
| "ce_loss_10": 3.7179338216781614, |
| "ce_loss_13": 3.6484482169151304, |
| "ce_loss_2": 4.323283433914185, |
| "ce_loss_3": 4.127403116226196, |
| "ce_loss_7": 3.7899964809417725, |
| "epoch": 0.149, |
| "grad_norm": 480.0, |
| "kl_loss_10": 114.30357208251954, |
| "kl_loss_2": 1395.1859313964844, |
| "kl_loss_3": 989.8287170410156, |
| "kl_loss_7": 257.14686279296876, |
| "learning_rate": 0.0009521429345495787, |
| "loss": 690.7114, |
| "step": 1490 |
| }, |
| { |
| "ce_loss_10": 3.7034213185310363, |
| "ce_loss_13": 3.6311787962913513, |
| "ce_loss_2": 4.309155285358429, |
| "ce_loss_3": 4.092868828773499, |
| "ce_loss_7": 3.7654823780059816, |
| "epoch": 0.15, |
| "grad_norm": 448.0, |
| "kl_loss_10": 116.55960197448731, |
| "kl_loss_2": 1382.6544982910157, |
| "kl_loss_3": 969.2838195800781, |
| "kl_loss_7": 249.21936950683593, |
| "learning_rate": 0.0009514632691433108, |
| "loss": 688.2995, |
| "step": 1500 |
| }, |
| { |
| "ce_loss_10": 3.6700626373291017, |
| "ce_loss_13": 3.5945797085762026, |
| "ce_loss_2": 4.289841759204864, |
| "ce_loss_3": 4.08635276556015, |
| "ce_loss_7": 3.738140869140625, |
| "epoch": 0.151, |
| "grad_norm": 448.0, |
| "kl_loss_10": 129.3878589630127, |
| "kl_loss_2": 1424.29853515625, |
| "kl_loss_3": 1001.7422454833984, |
| "kl_loss_7": 254.78705520629882, |
| "learning_rate": 0.0009507790575069346, |
| "loss": 706.6927, |
| "step": 1510 |
| }, |
| { |
| "ce_loss_10": 3.65303395986557, |
| "ce_loss_13": 3.5714800715446473, |
| "ce_loss_2": 4.28388956785202, |
| "ce_loss_3": 4.07215541601181, |
| "ce_loss_7": 3.7174383282661436, |
| "epoch": 0.152, |
| "grad_norm": 560.0, |
| "kl_loss_10": 131.33350143432617, |
| "kl_loss_2": 1434.660906982422, |
| "kl_loss_3": 1017.4268585205078, |
| "kl_loss_7": 260.6188400268555, |
| "learning_rate": 0.0009500903065304539, |
| "loss": 715.3042, |
| "step": 1520 |
| }, |
| { |
| "ce_loss_10": 3.683453822135925, |
| "ce_loss_13": 3.60856169462204, |
| "ce_loss_2": 4.287684428691864, |
| "ce_loss_3": 4.0820488929748535, |
| "ce_loss_7": 3.7489410638809204, |
| "epoch": 0.153, |
| "grad_norm": 592.0, |
| "kl_loss_10": 120.57846107482911, |
| "kl_loss_2": 1384.860614013672, |
| "kl_loss_3": 975.0672027587891, |
| "kl_loss_7": 247.4440773010254, |
| "learning_rate": 0.0009493970231495835, |
| "loss": 691.6448, |
| "step": 1530 |
| }, |
| { |
| "ce_loss_10": 3.6223431706428526, |
| "ce_loss_13": 3.55173202753067, |
| "ce_loss_2": 4.230155563354492, |
| "ce_loss_3": 4.02554075717926, |
| "ce_loss_7": 3.6863773345947264, |
| "epoch": 0.154, |
| "grad_norm": 490.0, |
| "kl_loss_10": 119.43844871520996, |
| "kl_loss_2": 1397.457257080078, |
| "kl_loss_3": 991.8431060791015, |
| "kl_loss_7": 243.42232818603514, |
| "learning_rate": 0.0009486992143456792, |
| "loss": 686.1227, |
| "step": 1540 |
| }, |
| { |
| "ce_loss_10": 3.6514541625976564, |
| "ce_loss_13": 3.571796643733978, |
| "ce_loss_2": 4.304909610748291, |
| "ce_loss_3": 4.0922522187232975, |
| "ce_loss_7": 3.7216905117034913, |
| "epoch": 0.155, |
| "grad_norm": 396.0, |
| "kl_loss_10": 128.10715980529784, |
| "kl_loss_2": 1491.158837890625, |
| "kl_loss_3": 1056.2210266113282, |
| "kl_loss_7": 262.1390213012695, |
| "learning_rate": 0.0009479968871456679, |
| "loss": 716.6379, |
| "step": 1550 |
| }, |
| { |
| "ce_loss_10": 3.6170923829078676, |
| "ce_loss_13": 3.542399287223816, |
| "ce_loss_2": 4.252330017089844, |
| "ce_loss_3": 4.045702540874482, |
| "ce_loss_7": 3.685628616809845, |
| "epoch": 0.156, |
| "grad_norm": 454.0, |
| "kl_loss_10": 121.63033790588379, |
| "kl_loss_2": 1463.288525390625, |
| "kl_loss_3": 1026.1065032958984, |
| "kl_loss_7": 254.98253784179687, |
| "learning_rate": 0.0009472900486219768, |
| "loss": 702.4742, |
| "step": 1560 |
| }, |
| { |
| "ce_loss_10": 3.6025954604148867, |
| "ce_loss_13": 3.5303670883178713, |
| "ce_loss_2": 4.232356917858124, |
| "ce_loss_3": 4.022554993629456, |
| "ce_loss_7": 3.6709616661071776, |
| "epoch": 0.157, |
| "grad_norm": 520.0, |
| "kl_loss_10": 118.88864822387696, |
| "kl_loss_2": 1434.4180419921875, |
| "kl_loss_3": 1021.8371948242187, |
| "kl_loss_7": 253.59476776123046, |
| "learning_rate": 0.000946578705892462, |
| "loss": 706.9224, |
| "step": 1570 |
| }, |
| { |
| "ce_loss_10": 3.6455034971237184, |
| "ce_loss_13": 3.5725855112075804, |
| "ce_loss_2": 4.251002633571625, |
| "ce_loss_3": 4.075614416599274, |
| "ce_loss_7": 3.712466835975647, |
| "epoch": 0.158, |
| "grad_norm": 520.0, |
| "kl_loss_10": 115.86212844848633, |
| "kl_loss_2": 1388.911444091797, |
| "kl_loss_3": 1008.0096618652344, |
| "kl_loss_7": 249.38579559326172, |
| "learning_rate": 0.0009458628661203367, |
| "loss": 702.1684, |
| "step": 1580 |
| }, |
| { |
| "ce_loss_10": 3.6394161105155947, |
| "ce_loss_13": 3.571541059017181, |
| "ce_loss_2": 4.284844183921814, |
| "ce_loss_3": 4.076776087284088, |
| "ce_loss_7": 3.7110044956207275, |
| "epoch": 0.159, |
| "grad_norm": 494.0, |
| "kl_loss_10": 113.66581001281739, |
| "kl_loss_2": 1444.869854736328, |
| "kl_loss_3": 1032.262744140625, |
| "kl_loss_7": 253.4554000854492, |
| "learning_rate": 0.0009451425365140996, |
| "loss": 688.5467, |
| "step": 1590 |
| }, |
| { |
| "ce_loss_10": 3.7211164236068726, |
| "ce_loss_13": 3.649132215976715, |
| "ce_loss_2": 4.325118780136108, |
| "ce_loss_3": 4.128993570804596, |
| "ce_loss_7": 3.7914722681045534, |
| "epoch": 0.16, |
| "grad_norm": 456.0, |
| "kl_loss_10": 117.80431632995605, |
| "kl_loss_2": 1373.1717468261718, |
| "kl_loss_3": 981.5717620849609, |
| "kl_loss_7": 253.6373489379883, |
| "learning_rate": 0.0009444177243274617, |
| "loss": 681.3762, |
| "step": 1600 |
| }, |
| { |
| "ce_loss_10": 3.574730896949768, |
| "ce_loss_13": 3.498664665222168, |
| "ce_loss_2": 4.200723135471344, |
| "ce_loss_3": 4.009881269931793, |
| "ce_loss_7": 3.6463570594787598, |
| "epoch": 0.161, |
| "grad_norm": 480.0, |
| "kl_loss_10": 122.87367897033691, |
| "kl_loss_2": 1430.2820068359374, |
| "kl_loss_3": 1037.6371978759767, |
| "kl_loss_7": 260.55384521484376, |
| "learning_rate": 0.0009436884368592739, |
| "loss": 706.6845, |
| "step": 1610 |
| }, |
| { |
| "ce_loss_10": 3.6286559462547303, |
| "ce_loss_13": 3.555983376502991, |
| "ce_loss_2": 4.232067906856537, |
| "ce_loss_3": 4.041269278526306, |
| "ce_loss_7": 3.6985298871994017, |
| "epoch": 0.162, |
| "grad_norm": 498.0, |
| "kl_loss_10": 118.67424545288085, |
| "kl_loss_2": 1385.5154724121094, |
| "kl_loss_3": 999.56015625, |
| "kl_loss_7": 250.75928268432617, |
| "learning_rate": 0.0009429546814534529, |
| "loss": 699.0302, |
| "step": 1620 |
| }, |
| { |
| "ce_loss_10": 3.639633226394653, |
| "ce_loss_13": 3.5706356167793274, |
| "ce_loss_2": 4.241236877441406, |
| "ce_loss_3": 4.056409633159637, |
| "ce_loss_7": 3.708655667304993, |
| "epoch": 0.163, |
| "grad_norm": 384.0, |
| "kl_loss_10": 117.12662200927734, |
| "kl_loss_2": 1374.89609375, |
| "kl_loss_3": 989.9520751953125, |
| "kl_loss_7": 248.8686378479004, |
| "learning_rate": 0.0009422164654989072, |
| "loss": 676.7936, |
| "step": 1630 |
| }, |
| { |
| "ce_loss_10": 3.7635043978691103, |
| "ce_loss_13": 3.687360870838165, |
| "ce_loss_2": 4.338383412361145, |
| "ce_loss_3": 4.1611551403999325, |
| "ce_loss_7": 3.828977358341217, |
| "epoch": 0.164, |
| "grad_norm": 424.0, |
| "kl_loss_10": 119.46500015258789, |
| "kl_loss_2": 1362.1967407226562, |
| "kl_loss_3": 990.5942932128906, |
| "kl_loss_7": 248.94699325561524, |
| "learning_rate": 0.0009414737964294635, |
| "loss": 685.8197, |
| "step": 1640 |
| }, |
| { |
| "ce_loss_10": 3.678090500831604, |
| "ce_loss_13": 3.6101470470428465, |
| "ce_loss_2": 4.259114742279053, |
| "ce_loss_3": 4.078064382076263, |
| "ce_loss_7": 3.7428590416908265, |
| "epoch": 0.165, |
| "grad_norm": 444.0, |
| "kl_loss_10": 112.9244888305664, |
| "kl_loss_2": 1333.3570129394532, |
| "kl_loss_3": 969.1624145507812, |
| "kl_loss_7": 238.38165054321288, |
| "learning_rate": 0.000940726681723791, |
| "loss": 682.7061, |
| "step": 1650 |
| }, |
| { |
| "ce_loss_10": 3.512197470664978, |
| "ce_loss_13": 3.4408557653427123, |
| "ce_loss_2": 4.148977339267731, |
| "ce_loss_3": 3.9514773368835447, |
| "ce_loss_7": 3.5815786600112913, |
| "epoch": 0.166, |
| "grad_norm": 488.0, |
| "kl_loss_10": 117.70020294189453, |
| "kl_loss_2": 1442.5229797363281, |
| "kl_loss_3": 1035.8803924560548, |
| "kl_loss_7": 256.4058250427246, |
| "learning_rate": 0.0009399751289053266, |
| "loss": 690.3188, |
| "step": 1660 |
| }, |
| { |
| "ce_loss_10": 3.742681550979614, |
| "ce_loss_13": 3.671311604976654, |
| "ce_loss_2": 4.328805279731751, |
| "ce_loss_3": 4.135993778705597, |
| "ce_loss_7": 3.809998023509979, |
| "epoch": 0.167, |
| "grad_norm": 478.0, |
| "kl_loss_10": 116.78232650756836, |
| "kl_loss_2": 1366.6806213378907, |
| "kl_loss_3": 967.4927185058593, |
| "kl_loss_7": 249.3471366882324, |
| "learning_rate": 0.0009392191455421988, |
| "loss": 682.1736, |
| "step": 1670 |
| }, |
| { |
| "ce_loss_10": 3.7067878365516664, |
| "ce_loss_13": 3.6276530623435974, |
| "ce_loss_2": 4.298012292385101, |
| "ce_loss_3": 4.104024171829224, |
| "ce_loss_7": 3.7697168350219727, |
| "epoch": 0.168, |
| "grad_norm": 490.0, |
| "kl_loss_10": 123.8147029876709, |
| "kl_loss_2": 1386.5616271972656, |
| "kl_loss_3": 990.9451782226563, |
| "kl_loss_7": 260.7920967102051, |
| "learning_rate": 0.0009384587392471515, |
| "loss": 679.4555, |
| "step": 1680 |
| }, |
| { |
| "ce_loss_10": 3.7010039329528808, |
| "ce_loss_13": 3.629232919216156, |
| "ce_loss_2": 4.288461661338806, |
| "ce_loss_3": 4.104441356658936, |
| "ce_loss_7": 3.773091959953308, |
| "epoch": 0.169, |
| "grad_norm": 494.0, |
| "kl_loss_10": 117.7159465789795, |
| "kl_loss_2": 1349.4280029296874, |
| "kl_loss_3": 968.4034729003906, |
| "kl_loss_7": 251.4811584472656, |
| "learning_rate": 0.0009376939176774678, |
| "loss": 675.85, |
| "step": 1690 |
| }, |
| { |
| "ce_loss_10": 3.678883969783783, |
| "ce_loss_13": 3.602108871936798, |
| "ce_loss_2": 4.274775016307831, |
| "ce_loss_3": 4.074398016929626, |
| "ce_loss_7": 3.7432108521461487, |
| "epoch": 0.17, |
| "grad_norm": 540.0, |
| "kl_loss_10": 124.26388397216797, |
| "kl_loss_2": 1371.6622314453125, |
| "kl_loss_3": 974.3467742919922, |
| "kl_loss_7": 252.39389877319337, |
| "learning_rate": 0.0009369246885348925, |
| "loss": 687.5515, |
| "step": 1700 |
| }, |
| { |
| "ce_loss_10": 3.6718587994575502, |
| "ce_loss_13": 3.591440510749817, |
| "ce_loss_2": 4.303124558925629, |
| "ce_loss_3": 4.095115387439728, |
| "ce_loss_7": 3.7370152711868285, |
| "epoch": 0.171, |
| "grad_norm": 548.0, |
| "kl_loss_10": 130.6899742126465, |
| "kl_loss_2": 1433.3515563964843, |
| "kl_loss_3": 1016.4502960205078, |
| "kl_loss_7": 255.15539627075196, |
| "learning_rate": 0.0009361510595655545, |
| "loss": 695.4526, |
| "step": 1710 |
| }, |
| { |
| "ce_loss_10": 3.6283922672271727, |
| "ce_loss_13": 3.5495692014694216, |
| "ce_loss_2": 4.237072479724884, |
| "ce_loss_3": 4.041323733329773, |
| "ce_loss_7": 3.6956202745437623, |
| "epoch": 0.172, |
| "grad_norm": 466.0, |
| "kl_loss_10": 127.2772174835205, |
| "kl_loss_2": 1409.4606872558593, |
| "kl_loss_3": 1009.6889221191407, |
| "kl_loss_7": 256.7372299194336, |
| "learning_rate": 0.0009353730385598887, |
| "loss": 691.3762, |
| "step": 1720 |
| }, |
| { |
| "ce_loss_10": 3.54926735162735, |
| "ce_loss_13": 3.4769670009613036, |
| "ce_loss_2": 4.178456115722656, |
| "ce_loss_3": 3.9720041275024416, |
| "ce_loss_7": 3.6176819682121275, |
| "epoch": 0.173, |
| "grad_norm": 436.0, |
| "kl_loss_10": 118.23514060974121, |
| "kl_loss_2": 1418.4845947265626, |
| "kl_loss_3": 998.5092987060547, |
| "kl_loss_7": 249.0585678100586, |
| "learning_rate": 0.0009345906333525581, |
| "loss": 697.0381, |
| "step": 1730 |
| }, |
| { |
| "ce_loss_10": 3.5872240900993346, |
| "ce_loss_13": 3.5136430621147157, |
| "ce_loss_2": 4.2012934923172, |
| "ce_loss_3": 3.9967063546180723, |
| "ce_loss_7": 3.65671169757843, |
| "epoch": 0.174, |
| "grad_norm": 408.0, |
| "kl_loss_10": 122.21610031127929, |
| "kl_loss_2": 1418.8342651367188, |
| "kl_loss_3": 1007.5152191162109, |
| "kl_loss_7": 254.60486221313477, |
| "learning_rate": 0.0009338038518223745, |
| "loss": 687.4246, |
| "step": 1740 |
| }, |
| { |
| "ce_loss_10": 3.657099163532257, |
| "ce_loss_13": 3.5811222553253175, |
| "ce_loss_2": 4.272738003730774, |
| "ce_loss_3": 4.0657650351524355, |
| "ce_loss_7": 3.7293712973594664, |
| "epoch": 0.175, |
| "grad_norm": 424.0, |
| "kl_loss_10": 122.57909774780273, |
| "kl_loss_2": 1418.8521423339844, |
| "kl_loss_3": 1004.8310028076172, |
| "kl_loss_7": 258.8813926696777, |
| "learning_rate": 0.0009330127018922195, |
| "loss": 709.7155, |
| "step": 1750 |
| }, |
| { |
| "ce_loss_10": 3.60728679895401, |
| "ce_loss_13": 3.5332212805747987, |
| "ce_loss_2": 4.2153865694999695, |
| "ce_loss_3": 4.016399335861206, |
| "ce_loss_7": 3.6759958028793336, |
| "epoch": 0.176, |
| "grad_norm": 446.0, |
| "kl_loss_10": 117.00486183166504, |
| "kl_loss_2": 1406.6310485839845, |
| "kl_loss_3": 989.8223937988281, |
| "kl_loss_7": 252.39801330566405, |
| "learning_rate": 0.0009322171915289634, |
| "loss": 689.0163, |
| "step": 1760 |
| }, |
| { |
| "ce_loss_10": 3.640791046619415, |
| "ce_loss_13": 3.5716994404792786, |
| "ce_loss_2": 4.240784847736359, |
| "ce_loss_3": 4.040842926502227, |
| "ce_loss_7": 3.7066658616065977, |
| "epoch": 0.177, |
| "grad_norm": 504.0, |
| "kl_loss_10": 114.7268009185791, |
| "kl_loss_2": 1384.9641479492188, |
| "kl_loss_3": 983.0798065185547, |
| "kl_loss_7": 249.6033966064453, |
| "learning_rate": 0.0009314173287433873, |
| "loss": 677.7067, |
| "step": 1770 |
| }, |
| { |
| "ce_loss_10": 3.6371870756149294, |
| "ce_loss_13": 3.565370166301727, |
| "ce_loss_2": 4.248478496074677, |
| "ce_loss_3": 4.042388367652893, |
| "ce_loss_7": 3.7081421256065368, |
| "epoch": 0.178, |
| "grad_norm": 544.0, |
| "kl_loss_10": 117.58927421569824, |
| "kl_loss_2": 1410.781103515625, |
| "kl_loss_3": 995.2456298828125, |
| "kl_loss_7": 252.9298988342285, |
| "learning_rate": 0.0009306131215901003, |
| "loss": 681.0704, |
| "step": 1780 |
| }, |
| { |
| "ce_loss_10": 3.6657212376594543, |
| "ce_loss_13": 3.5942596793174744, |
| "ce_loss_2": 4.267256224155426, |
| "ce_loss_3": 4.070630991458893, |
| "ce_loss_7": 3.736851954460144, |
| "epoch": 0.179, |
| "grad_norm": 608.0, |
| "kl_loss_10": 117.64482841491699, |
| "kl_loss_2": 1382.4147399902345, |
| "kl_loss_3": 974.7222351074219, |
| "kl_loss_7": 254.88860549926758, |
| "learning_rate": 0.0009298045781674596, |
| "loss": 674.4276, |
| "step": 1790 |
| }, |
| { |
| "ce_loss_10": 3.6482357382774353, |
| "ce_loss_13": 3.577735936641693, |
| "ce_loss_2": 4.238207507133484, |
| "ce_loss_3": 4.047251141071319, |
| "ce_loss_7": 3.7247403979301454, |
| "epoch": 0.18, |
| "grad_norm": 584.0, |
| "kl_loss_10": 113.15109825134277, |
| "kl_loss_2": 1356.1355346679688, |
| "kl_loss_3": 966.8513641357422, |
| "kl_loss_7": 260.6168983459473, |
| "learning_rate": 0.0009289917066174886, |
| "loss": 687.0212, |
| "step": 1800 |
| }, |
| { |
| "ce_loss_10": 3.6436230182647704, |
| "ce_loss_13": 3.573524606227875, |
| "ce_loss_2": 4.205891370773315, |
| "ce_loss_3": 4.035960531234741, |
| "ce_loss_7": 3.713192844390869, |
| "epoch": 0.181, |
| "grad_norm": 644.0, |
| "kl_loss_10": 111.37209777832031, |
| "kl_loss_2": 1312.0460144042968, |
| "kl_loss_3": 951.6195190429687, |
| "kl_loss_7": 248.27648315429687, |
| "learning_rate": 0.0009281745151257945, |
| "loss": 665.2686, |
| "step": 1810 |
| }, |
| { |
| "ce_loss_10": 3.6573068499565125, |
| "ce_loss_13": 3.5899064898490907, |
| "ce_loss_2": 4.263534939289093, |
| "ce_loss_3": 4.073390209674836, |
| "ce_loss_7": 3.725028562545776, |
| "epoch": 0.182, |
| "grad_norm": 496.0, |
| "kl_loss_10": 112.47701683044434, |
| "kl_loss_2": 1362.4901000976563, |
| "kl_loss_3": 982.173095703125, |
| "kl_loss_7": 248.95221557617188, |
| "learning_rate": 0.0009273530119214868, |
| "loss": 681.1132, |
| "step": 1820 |
| }, |
| { |
| "ce_loss_10": 3.7659960746765138, |
| "ce_loss_13": 3.6931302428245543, |
| "ce_loss_2": 4.335440850257873, |
| "ce_loss_3": 4.146318483352661, |
| "ce_loss_7": 3.831969678401947, |
| "epoch": 0.183, |
| "grad_norm": 460.0, |
| "kl_loss_10": 115.37424812316894, |
| "kl_loss_2": 1332.8465270996094, |
| "kl_loss_3": 945.53359375, |
| "kl_loss_7": 244.2625930786133, |
| "learning_rate": 0.0009265272052770935, |
| "loss": 653.1528, |
| "step": 1830 |
| }, |
| { |
| "ce_loss_10": 3.573833405971527, |
| "ce_loss_13": 3.504916477203369, |
| "ce_loss_2": 4.191505336761475, |
| "ce_loss_3": 4.003697621822357, |
| "ce_loss_7": 3.6443989157676695, |
| "epoch": 0.184, |
| "grad_norm": 524.0, |
| "kl_loss_10": 110.15304069519043, |
| "kl_loss_2": 1378.9180969238282, |
| "kl_loss_3": 997.8934539794922, |
| "kl_loss_7": 241.1827133178711, |
| "learning_rate": 0.0009256971035084784, |
| "loss": 679.6828, |
| "step": 1840 |
| }, |
| { |
| "ce_loss_10": 3.5141557097434997, |
| "ce_loss_13": 3.4410739183425902, |
| "ce_loss_2": 4.1375454545021055, |
| "ce_loss_3": 3.934183955192566, |
| "ce_loss_7": 3.588452696800232, |
| "epoch": 0.185, |
| "grad_norm": 528.0, |
| "kl_loss_10": 114.25855445861816, |
| "kl_loss_2": 1412.8881896972657, |
| "kl_loss_3": 1019.4745208740235, |
| "kl_loss_7": 253.67017517089843, |
| "learning_rate": 0.0009248627149747573, |
| "loss": 690.3363, |
| "step": 1850 |
| }, |
| { |
| "ce_loss_10": 3.7252640962600707, |
| "ce_loss_13": 3.653822290897369, |
| "ce_loss_2": 4.3040543556213375, |
| "ce_loss_3": 4.132464408874512, |
| "ce_loss_7": 3.793966567516327, |
| "epoch": 0.186, |
| "grad_norm": 564.0, |
| "kl_loss_10": 115.14772300720215, |
| "kl_loss_2": 1340.048565673828, |
| "kl_loss_3": 980.2122955322266, |
| "kl_loss_7": 244.52606735229492, |
| "learning_rate": 0.0009240240480782129, |
| "loss": 674.7569, |
| "step": 1860 |
| }, |
| { |
| "ce_loss_10": 3.635197627544403, |
| "ce_loss_13": 3.561525750160217, |
| "ce_loss_2": 4.234712994098663, |
| "ce_loss_3": 4.036596286296844, |
| "ce_loss_7": 3.7000155448913574, |
| "epoch": 0.187, |
| "grad_norm": 442.0, |
| "kl_loss_10": 116.40899467468262, |
| "kl_loss_2": 1366.7482482910157, |
| "kl_loss_3": 985.8397155761719, |
| "kl_loss_7": 245.4348571777344, |
| "learning_rate": 0.0009231811112642122, |
| "loss": 670.6495, |
| "step": 1870 |
| }, |
| { |
| "ce_loss_10": 3.680171477794647, |
| "ce_loss_13": 3.607526624202728, |
| "ce_loss_2": 4.242461228370667, |
| "ce_loss_3": 4.0597851276397705, |
| "ce_loss_7": 3.7417822241783143, |
| "epoch": 0.188, |
| "grad_norm": 462.0, |
| "kl_loss_10": 115.97058601379395, |
| "kl_loss_2": 1329.476806640625, |
| "kl_loss_3": 944.8101593017578, |
| "kl_loss_7": 240.48009033203124, |
| "learning_rate": 0.0009223339130211192, |
| "loss": 656.504, |
| "step": 1880 |
| }, |
| { |
| "ce_loss_10": 3.527233564853668, |
| "ce_loss_13": 3.456979143619537, |
| "ce_loss_2": 4.1365337610244755, |
| "ce_loss_3": 3.9334477186203003, |
| "ce_loss_7": 3.5922507286071776, |
| "epoch": 0.189, |
| "grad_norm": 492.0, |
| "kl_loss_10": 120.61857757568359, |
| "kl_loss_2": 1391.3780578613282, |
| "kl_loss_3": 981.7018951416015, |
| "kl_loss_7": 240.8455017089844, |
| "learning_rate": 0.0009214824618802108, |
| "loss": 678.3247, |
| "step": 1890 |
| }, |
| { |
| "ce_loss_10": 3.715823400020599, |
| "ce_loss_13": 3.639923906326294, |
| "ce_loss_2": 4.3134965896606445, |
| "ce_loss_3": 4.111241257190704, |
| "ce_loss_7": 3.779346799850464, |
| "epoch": 0.19, |
| "grad_norm": 456.0, |
| "kl_loss_10": 127.23999710083008, |
| "kl_loss_2": 1364.8941589355468, |
| "kl_loss_3": 960.9680725097656, |
| "kl_loss_7": 248.77009048461915, |
| "learning_rate": 0.0009206267664155906, |
| "loss": 685.2967, |
| "step": 1900 |
| }, |
| { |
| "ce_loss_10": 3.6354769825935365, |
| "ce_loss_13": 3.556038224697113, |
| "ce_loss_2": 4.224778318405152, |
| "ce_loss_3": 4.022096812725067, |
| "ce_loss_7": 3.690963363647461, |
| "epoch": 0.191, |
| "grad_norm": 524.0, |
| "kl_loss_10": 125.37596015930175, |
| "kl_loss_2": 1371.057391357422, |
| "kl_loss_3": 969.1447021484375, |
| "kl_loss_7": 243.19865951538085, |
| "learning_rate": 0.0009197668352441024, |
| "loss": 678.1597, |
| "step": 1910 |
| }, |
| { |
| "ce_loss_10": 3.6849255323410035, |
| "ce_loss_13": 3.6070022225379943, |
| "ce_loss_2": 4.272895455360413, |
| "ce_loss_3": 4.0722639799118046, |
| "ce_loss_7": 3.741350519657135, |
| "epoch": 0.192, |
| "grad_norm": 512.0, |
| "kl_loss_10": 128.65237312316896, |
| "kl_loss_2": 1349.729931640625, |
| "kl_loss_3": 949.7346984863282, |
| "kl_loss_7": 242.41346817016603, |
| "learning_rate": 0.0009189026770252437, |
| "loss": 671.3585, |
| "step": 1920 |
| }, |
| { |
| "ce_loss_10": 3.7201656699180603, |
| "ce_loss_13": 3.6394999861717223, |
| "ce_loss_2": 4.302338600158691, |
| "ce_loss_3": 4.1032923579216005, |
| "ce_loss_7": 3.7764319658279417, |
| "epoch": 0.193, |
| "grad_norm": 458.0, |
| "kl_loss_10": 133.32494201660157, |
| "kl_loss_2": 1342.450421142578, |
| "kl_loss_3": 949.4765075683594, |
| "kl_loss_7": 246.09827728271483, |
| "learning_rate": 0.000918034300461078, |
| "loss": 688.7368, |
| "step": 1930 |
| }, |
| { |
| "ce_loss_10": 3.747203004360199, |
| "ce_loss_13": 3.6689361929893494, |
| "ce_loss_2": 4.312567710876465, |
| "ce_loss_3": 4.122261881828308, |
| "ce_loss_7": 3.8043017029762267, |
| "epoch": 0.194, |
| "grad_norm": 446.0, |
| "kl_loss_10": 129.00423164367675, |
| "kl_loss_2": 1325.102197265625, |
| "kl_loss_3": 931.5237396240234, |
| "kl_loss_7": 241.53863983154298, |
| "learning_rate": 0.0009171617142961477, |
| "loss": 661.2737, |
| "step": 1940 |
| }, |
| { |
| "ce_loss_10": 3.699472951889038, |
| "ce_loss_13": 3.6279419898986816, |
| "ce_loss_2": 4.281847763061523, |
| "ce_loss_3": 4.083541011810302, |
| "ce_loss_7": 3.7648919463157653, |
| "epoch": 0.195, |
| "grad_norm": 434.0, |
| "kl_loss_10": 121.35350723266602, |
| "kl_loss_2": 1352.1436584472656, |
| "kl_loss_3": 952.5272399902344, |
| "kl_loss_7": 240.53460235595702, |
| "learning_rate": 0.0009162849273173857, |
| "loss": 665.7376, |
| "step": 1950 |
| }, |
| { |
| "ce_loss_10": 3.632410800457001, |
| "ce_loss_13": 3.5614632248878477, |
| "ce_loss_2": 4.223404765129089, |
| "ce_loss_3": 4.023203945159912, |
| "ce_loss_7": 3.700835573673248, |
| "epoch": 0.196, |
| "grad_norm": 470.0, |
| "kl_loss_10": 118.8283805847168, |
| "kl_loss_2": 1344.0367370605468, |
| "kl_loss_3": 944.1380065917969, |
| "kl_loss_7": 251.04139099121093, |
| "learning_rate": 0.0009154039483540273, |
| "loss": 672.422, |
| "step": 1960 |
| }, |
| { |
| "ce_loss_10": 3.6197654128074648, |
| "ce_loss_13": 3.546481454372406, |
| "ce_loss_2": 4.201360607147217, |
| "ce_loss_3": 4.004413700103759, |
| "ce_loss_7": 3.682723355293274, |
| "epoch": 0.197, |
| "grad_norm": 406.0, |
| "kl_loss_10": 120.08837623596192, |
| "kl_loss_2": 1349.5242309570312, |
| "kl_loss_3": 942.7119018554688, |
| "kl_loss_7": 243.98333435058595, |
| "learning_rate": 0.0009145187862775209, |
| "loss": 667.6594, |
| "step": 1970 |
| }, |
| { |
| "ce_loss_10": 3.6506257891654967, |
| "ce_loss_13": 3.5804669737815855, |
| "ce_loss_2": 4.243929970264435, |
| "ce_loss_3": 4.034862732887268, |
| "ce_loss_7": 3.7135458827018737, |
| "epoch": 0.198, |
| "grad_norm": 620.0, |
| "kl_loss_10": 117.48477897644042, |
| "kl_loss_2": 1377.6264770507812, |
| "kl_loss_3": 958.5087188720703, |
| "kl_loss_7": 243.4327537536621, |
| "learning_rate": 0.0009136294500014386, |
| "loss": 665.5496, |
| "step": 1980 |
| }, |
| { |
| "ce_loss_10": 3.599961686134338, |
| "ce_loss_13": 3.528086531162262, |
| "ce_loss_2": 4.217166924476624, |
| "ce_loss_3": 4.008637738227844, |
| "ce_loss_7": 3.6669308066368105, |
| "epoch": 0.199, |
| "grad_norm": 616.0, |
| "kl_loss_10": 115.34629516601562, |
| "kl_loss_2": 1399.4097900390625, |
| "kl_loss_3": 983.5843353271484, |
| "kl_loss_7": 244.90453720092773, |
| "learning_rate": 0.000912735948481387, |
| "loss": 681.3188, |
| "step": 1990 |
| }, |
| { |
| "ce_loss_10": 3.6347181677818297, |
| "ce_loss_13": 3.560755395889282, |
| "ce_loss_2": 4.230910205841065, |
| "ce_loss_3": 4.03362866640091, |
| "ce_loss_7": 3.700130546092987, |
| "epoch": 0.2, |
| "grad_norm": 492.0, |
| "kl_loss_10": 115.55288009643554, |
| "kl_loss_2": 1372.0876098632812, |
| "kl_loss_3": 976.6782318115235, |
| "kl_loss_7": 248.47412033081054, |
| "learning_rate": 0.0009118382907149164, |
| "loss": 666.3086, |
| "step": 2000 |
| }, |
| { |
| "ce_loss_10": 3.6592599511146546, |
| "ce_loss_13": 3.5870088934898376, |
| "ce_loss_2": 4.23843743801117, |
| "ce_loss_3": 4.045619630813599, |
| "ce_loss_7": 3.722980320453644, |
| "epoch": 0.201, |
| "grad_norm": 492.0, |
| "kl_loss_10": 114.64969139099121, |
| "kl_loss_2": 1351.183447265625, |
| "kl_loss_3": 956.2785675048829, |
| "kl_loss_7": 247.1648811340332, |
| "learning_rate": 0.0009109364857414306, |
| "loss": 658.4385, |
| "step": 2010 |
| }, |
| { |
| "ce_loss_10": 3.6247077345848084, |
| "ce_loss_13": 3.5549973130226133, |
| "ce_loss_2": 4.192802679538727, |
| "ce_loss_3": 4.006897258758545, |
| "ce_loss_7": 3.694356381893158, |
| "epoch": 0.202, |
| "grad_norm": 432.0, |
| "kl_loss_10": 111.96462211608886, |
| "kl_loss_2": 1332.7575988769531, |
| "kl_loss_3": 943.8550109863281, |
| "kl_loss_7": 248.51104660034179, |
| "learning_rate": 0.0009100305426420956, |
| "loss": 673.1317, |
| "step": 2020 |
| }, |
| { |
| "ce_loss_10": 3.5841406345367433, |
| "ce_loss_13": 3.5164321780204775, |
| "ce_loss_2": 4.202693927288055, |
| "ce_loss_3": 3.9953248143196105, |
| "ce_loss_7": 3.650038242340088, |
| "epoch": 0.203, |
| "grad_norm": 432.0, |
| "kl_loss_10": 113.3315975189209, |
| "kl_loss_2": 1413.3973693847656, |
| "kl_loss_3": 984.508837890625, |
| "kl_loss_7": 247.24474029541017, |
| "learning_rate": 0.0009091204705397484, |
| "loss": 669.0848, |
| "step": 2030 |
| }, |
| { |
| "ce_loss_10": 3.585637128353119, |
| "ce_loss_13": 3.5091155648231505, |
| "ce_loss_2": 4.185344040393829, |
| "ce_loss_3": 3.992532753944397, |
| "ce_loss_7": 3.6510769963264464, |
| "epoch": 0.204, |
| "grad_norm": 448.0, |
| "kl_loss_10": 124.74103927612305, |
| "kl_loss_2": 1400.9709167480469, |
| "kl_loss_3": 992.3661834716797, |
| "kl_loss_7": 250.7290901184082, |
| "learning_rate": 0.0009082062785988049, |
| "loss": 681.1268, |
| "step": 2040 |
| }, |
| { |
| "ce_loss_10": 3.721049964427948, |
| "ce_loss_13": 3.6463207244873046, |
| "ce_loss_2": 4.2808568477630615, |
| "ce_loss_3": 4.093539321422577, |
| "ce_loss_7": 3.7847790718078613, |
| "epoch": 0.205, |
| "grad_norm": 466.0, |
| "kl_loss_10": 117.74674758911132, |
| "kl_loss_2": 1322.8293701171874, |
| "kl_loss_3": 931.9617309570312, |
| "kl_loss_7": 242.782958984375, |
| "learning_rate": 0.0009072879760251679, |
| "loss": 667.7382, |
| "step": 2050 |
| }, |
| { |
| "ce_loss_10": 3.6576568126678466, |
| "ce_loss_13": 3.5876036405563356, |
| "ce_loss_2": 4.261196970939636, |
| "ce_loss_3": 4.065271866321564, |
| "ce_loss_7": 3.727122116088867, |
| "epoch": 0.206, |
| "grad_norm": 510.0, |
| "kl_loss_10": 116.0688491821289, |
| "kl_loss_2": 1368.6263488769532, |
| "kl_loss_3": 974.0030578613281, |
| "kl_loss_7": 247.75207290649413, |
| "learning_rate": 0.0009063655720661341, |
| "loss": 667.9454, |
| "step": 2060 |
| }, |
| { |
| "ce_loss_10": 3.7091850519180296, |
| "ce_loss_13": 3.6359564065933228, |
| "ce_loss_2": 4.2861632108688354, |
| "ce_loss_3": 4.091731405258178, |
| "ce_loss_7": 3.780376970767975, |
| "epoch": 0.207, |
| "grad_norm": 756.0, |
| "kl_loss_10": 117.6738265991211, |
| "kl_loss_2": 1338.1011291503905, |
| "kl_loss_3": 948.2550628662109, |
| "kl_loss_7": 265.19689025878904, |
| "learning_rate": 0.000905439076010301, |
| "loss": 666.8086, |
| "step": 2070 |
| }, |
| { |
| "ce_loss_10": 3.661241602897644, |
| "ce_loss_13": 3.5897186398506165, |
| "ce_loss_2": 4.2563663721084595, |
| "ce_loss_3": 4.06014586687088, |
| "ce_loss_7": 3.7477613568305967, |
| "epoch": 0.208, |
| "grad_norm": 502.0, |
| "kl_loss_10": 114.75448532104492, |
| "kl_loss_2": 1354.8022583007812, |
| "kl_loss_3": 965.9129913330078, |
| "kl_loss_7": 280.605126953125, |
| "learning_rate": 0.0009045084971874737, |
| "loss": 668.0169, |
| "step": 2080 |
| }, |
| { |
| "ce_loss_10": 3.6376566290855408, |
| "ce_loss_13": 3.568475866317749, |
| "ce_loss_2": 4.219207537174225, |
| "ce_loss_3": 4.027460336685181, |
| "ce_loss_7": 3.7098584175109863, |
| "epoch": 0.209, |
| "grad_norm": 476.0, |
| "kl_loss_10": 112.78402214050293, |
| "kl_loss_2": 1340.8613708496093, |
| "kl_loss_3": 948.0414184570312, |
| "kl_loss_7": 264.3713745117187, |
| "learning_rate": 0.0009035738449685707, |
| "loss": 673.0266, |
| "step": 2090 |
| }, |
| { |
| "ce_loss_10": 3.5796483874320986, |
| "ce_loss_13": 3.507876431941986, |
| "ce_loss_2": 4.179275572299957, |
| "ce_loss_3": 3.9804170727729797, |
| "ce_loss_7": 3.6553168416023256, |
| "epoch": 0.21, |
| "grad_norm": 576.0, |
| "kl_loss_10": 116.00268287658692, |
| "kl_loss_2": 1358.9301879882812, |
| "kl_loss_3": 960.5301177978515, |
| "kl_loss_7": 258.80527420043944, |
| "learning_rate": 0.0009026351287655293, |
| "loss": 660.1454, |
| "step": 2100 |
| }, |
| { |
| "ce_loss_10": 3.7848559260368346, |
| "ce_loss_13": 3.7130728244781492, |
| "ce_loss_2": 4.324071049690247, |
| "ce_loss_3": 4.141572403907776, |
| "ce_loss_7": 3.8475868105888367, |
| "epoch": 0.211, |
| "grad_norm": 410.0, |
| "kl_loss_10": 115.61974067687989, |
| "kl_loss_2": 1276.820086669922, |
| "kl_loss_3": 901.7400726318359, |
| "kl_loss_7": 240.78678359985352, |
| "learning_rate": 0.0009016923580312113, |
| "loss": 636.6335, |
| "step": 2110 |
| }, |
| { |
| "ce_loss_10": 3.6301342844963074, |
| "ce_loss_13": 3.560654580593109, |
| "ce_loss_2": 4.20012868642807, |
| "ce_loss_3": 4.0125791192054745, |
| "ce_loss_7": 3.6946483969688417, |
| "epoch": 0.212, |
| "grad_norm": 462.0, |
| "kl_loss_10": 112.99716110229492, |
| "kl_loss_2": 1313.61005859375, |
| "kl_loss_3": 932.8424255371094, |
| "kl_loss_7": 240.82636260986328, |
| "learning_rate": 0.0009007455422593077, |
| "loss": 661.1402, |
| "step": 2120 |
| }, |
| { |
| "ce_loss_10": 3.643904185295105, |
| "ce_loss_13": 3.5732839465141297, |
| "ce_loss_2": 4.229895269870758, |
| "ce_loss_3": 4.039864921569825, |
| "ce_loss_7": 3.7100953698158263, |
| "epoch": 0.213, |
| "grad_norm": 544.0, |
| "kl_loss_10": 113.59500656127929, |
| "kl_loss_2": 1376.1138916015625, |
| "kl_loss_3": 982.583627319336, |
| "kl_loss_7": 245.73143768310547, |
| "learning_rate": 0.0008997946909842425, |
| "loss": 673.5951, |
| "step": 2130 |
| }, |
| { |
| "ce_loss_10": 3.6592751264572145, |
| "ce_loss_13": 3.5883899569511413, |
| "ce_loss_2": 4.269070100784302, |
| "ce_loss_3": 4.073972117900849, |
| "ce_loss_7": 3.7271844029426573, |
| "epoch": 0.214, |
| "grad_norm": 486.0, |
| "kl_loss_10": 115.54258766174317, |
| "kl_loss_2": 1390.4590087890624, |
| "kl_loss_3": 991.5129302978515, |
| "kl_loss_7": 248.1077392578125, |
| "learning_rate": 0.0008988398137810777, |
| "loss": 666.5385, |
| "step": 2140 |
| }, |
| { |
| "ce_loss_10": 3.696693778038025, |
| "ce_loss_13": 3.6274760484695436, |
| "ce_loss_2": 4.272996628284455, |
| "ce_loss_3": 4.077895438671112, |
| "ce_loss_7": 3.759436583518982, |
| "epoch": 0.215, |
| "grad_norm": 410.0, |
| "kl_loss_10": 109.15078239440918, |
| "kl_loss_2": 1323.0664428710938, |
| "kl_loss_3": 929.4178771972656, |
| "kl_loss_7": 235.6734588623047, |
| "learning_rate": 0.0008978809202654162, |
| "loss": 648.7643, |
| "step": 2150 |
| }, |
| { |
| "ce_loss_10": 3.674948477745056, |
| "ce_loss_13": 3.6074150681495665, |
| "ce_loss_2": 4.254977214336395, |
| "ce_loss_3": 4.054577016830445, |
| "ce_loss_7": 3.7380695223808287, |
| "epoch": 0.216, |
| "grad_norm": 342.0, |
| "kl_loss_10": 111.83034286499023, |
| "kl_loss_2": 1326.1663391113282, |
| "kl_loss_3": 930.476919555664, |
| "kl_loss_7": 237.29275283813476, |
| "learning_rate": 0.0008969180200933046, |
| "loss": 659.8788, |
| "step": 2160 |
| }, |
| { |
| "ce_loss_10": 3.633396315574646, |
| "ce_loss_13": 3.5632909893989564, |
| "ce_loss_2": 4.235332405567169, |
| "ce_loss_3": 4.043020272254944, |
| "ce_loss_7": 3.700575852394104, |
| "epoch": 0.217, |
| "grad_norm": 426.0, |
| "kl_loss_10": 113.71143455505371, |
| "kl_loss_2": 1376.4859924316406, |
| "kl_loss_3": 965.2266693115234, |
| "kl_loss_7": 244.74252319335938, |
| "learning_rate": 0.0008959511229611376, |
| "loss": 671.5447, |
| "step": 2170 |
| }, |
| { |
| "ce_loss_10": 3.7160756826400756, |
| "ce_loss_13": 3.6463282823562624, |
| "ce_loss_2": 4.29187992811203, |
| "ce_loss_3": 4.0930745005607605, |
| "ce_loss_7": 3.77754408121109, |
| "epoch": 0.218, |
| "grad_norm": 494.0, |
| "kl_loss_10": 112.80306968688964, |
| "kl_loss_2": 1327.3540283203124, |
| "kl_loss_3": 931.4955108642578, |
| "kl_loss_7": 236.75977325439453, |
| "learning_rate": 0.0008949802386055581, |
| "loss": 652.6458, |
| "step": 2180 |
| }, |
| { |
| "ce_loss_10": 3.5766260862350463, |
| "ce_loss_13": 3.5034859418869018, |
| "ce_loss_2": 4.159404098987579, |
| "ce_loss_3": 3.9599217534065247, |
| "ce_loss_7": 3.6389345288276673, |
| "epoch": 0.219, |
| "grad_norm": 466.0, |
| "kl_loss_10": 111.61733703613281, |
| "kl_loss_2": 1335.0789672851563, |
| "kl_loss_3": 936.6266204833985, |
| "kl_loss_7": 234.00814971923828, |
| "learning_rate": 0.0008940053768033609, |
| "loss": 665.0317, |
| "step": 2190 |
| }, |
| { |
| "ce_loss_10": 3.65551677942276, |
| "ce_loss_13": 3.58828284740448, |
| "ce_loss_2": 4.223571491241455, |
| "ce_loss_3": 4.04930864572525, |
| "ce_loss_7": 3.7170740485191347, |
| "epoch": 0.22, |
| "grad_norm": 500.0, |
| "kl_loss_10": 111.12692604064941, |
| "kl_loss_2": 1315.6073425292968, |
| "kl_loss_3": 947.5290161132813, |
| "kl_loss_7": 230.48658447265626, |
| "learning_rate": 0.0008930265473713938, |
| "loss": 654.9715, |
| "step": 2200 |
| }, |
| { |
| "ce_loss_10": 3.6195818066596983, |
| "ce_loss_13": 3.54861319065094, |
| "ce_loss_2": 4.198423433303833, |
| "ce_loss_3": 4.012469959259033, |
| "ce_loss_7": 3.681425595283508, |
| "epoch": 0.221, |
| "grad_norm": 528.0, |
| "kl_loss_10": 115.1269718170166, |
| "kl_loss_2": 1324.1462280273438, |
| "kl_loss_3": 954.0400299072265, |
| "kl_loss_7": 233.1149475097656, |
| "learning_rate": 0.0008920437601664579, |
| "loss": 648.2547, |
| "step": 2210 |
| }, |
| { |
| "ce_loss_10": 3.6091471552848815, |
| "ce_loss_13": 3.539783036708832, |
| "ce_loss_2": 4.181177127361297, |
| "ce_loss_3": 3.993445408344269, |
| "ce_loss_7": 3.669792366027832, |
| "epoch": 0.222, |
| "grad_norm": 410.0, |
| "kl_loss_10": 115.02236862182617, |
| "kl_loss_2": 1333.0483093261719, |
| "kl_loss_3": 948.5272521972656, |
| "kl_loss_7": 236.40514373779297, |
| "learning_rate": 0.0008910570250852097, |
| "loss": 647.6241, |
| "step": 2220 |
| }, |
| { |
| "ce_loss_10": 3.7300463914871216, |
| "ce_loss_13": 3.657940351963043, |
| "ce_loss_2": 4.2689752101898195, |
| "ce_loss_3": 4.08460431098938, |
| "ce_loss_7": 3.786776435375214, |
| "epoch": 0.223, |
| "grad_norm": 396.0, |
| "kl_loss_10": 119.05343589782714, |
| "kl_loss_2": 1270.7809265136718, |
| "kl_loss_3": 894.4331726074219, |
| "kl_loss_7": 233.2676574707031, |
| "learning_rate": 0.0008900663520640604, |
| "loss": 634.1773, |
| "step": 2230 |
| }, |
| { |
| "ce_loss_10": 3.668592298030853, |
| "ce_loss_13": 3.5982241868972777, |
| "ce_loss_2": 4.234554326534271, |
| "ce_loss_3": 4.045566046237946, |
| "ce_loss_7": 3.7271997809410093, |
| "epoch": 0.224, |
| "grad_norm": 484.0, |
| "kl_loss_10": 115.87549057006837, |
| "kl_loss_2": 1305.9935668945313, |
| "kl_loss_3": 922.4547302246094, |
| "kl_loss_7": 233.83654251098633, |
| "learning_rate": 0.0008890717510790764, |
| "loss": 651.4086, |
| "step": 2240 |
| }, |
| { |
| "ce_loss_10": 3.624187970161438, |
| "ce_loss_13": 3.553602933883667, |
| "ce_loss_2": 4.209651458263397, |
| "ce_loss_3": 4.014156377315521, |
| "ce_loss_7": 3.6865435361862184, |
| "epoch": 0.225, |
| "grad_norm": 428.0, |
| "kl_loss_10": 111.85544166564941, |
| "kl_loss_2": 1344.305419921875, |
| "kl_loss_3": 948.2704040527344, |
| "kl_loss_7": 233.77221450805663, |
| "learning_rate": 0.0008880732321458784, |
| "loss": 659.9288, |
| "step": 2250 |
| }, |
| { |
| "ce_loss_10": 3.6572599172592164, |
| "ce_loss_13": 3.589207625389099, |
| "ce_loss_2": 4.229009163379669, |
| "ce_loss_3": 4.035797142982483, |
| "ce_loss_7": 3.720983362197876, |
| "epoch": 0.226, |
| "grad_norm": 450.0, |
| "kl_loss_10": 112.32478141784668, |
| "kl_loss_2": 1315.7465759277343, |
| "kl_loss_3": 923.5756164550781, |
| "kl_loss_7": 233.98969955444335, |
| "learning_rate": 0.0008870708053195413, |
| "loss": 656.0779, |
| "step": 2260 |
| }, |
| { |
| "ce_loss_10": 3.6822890281677245, |
| "ce_loss_13": 3.613891136646271, |
| "ce_loss_2": 4.243361723423004, |
| "ce_loss_3": 4.054495620727539, |
| "ce_loss_7": 3.7477814078330995, |
| "epoch": 0.227, |
| "grad_norm": 510.0, |
| "kl_loss_10": 109.18305702209473, |
| "kl_loss_2": 1298.216571044922, |
| "kl_loss_3": 915.6232360839844, |
| "kl_loss_7": 232.2860221862793, |
| "learning_rate": 0.0008860644806944918, |
| "loss": 640.8393, |
| "step": 2270 |
| }, |
| { |
| "ce_loss_10": 3.6228482365608214, |
| "ce_loss_13": 3.55364191532135, |
| "ce_loss_2": 4.206750881671906, |
| "ce_loss_3": 4.00951054096222, |
| "ce_loss_7": 3.692072665691376, |
| "epoch": 0.228, |
| "grad_norm": 516.0, |
| "kl_loss_10": 112.56623115539551, |
| "kl_loss_2": 1346.1175109863282, |
| "kl_loss_3": 947.8353302001954, |
| "kl_loss_7": 250.14039154052733, |
| "learning_rate": 0.0008850542684044079, |
| "loss": 646.6089, |
| "step": 2280 |
| }, |
| { |
| "ce_loss_10": 3.594875121116638, |
| "ce_loss_13": 3.5233037948608397, |
| "ce_loss_2": 4.204600942134857, |
| "ce_loss_3": 3.998581278324127, |
| "ce_loss_7": 3.665067207813263, |
| "epoch": 0.229, |
| "grad_norm": 450.0, |
| "kl_loss_10": 112.58180122375488, |
| "kl_loss_2": 1387.7576477050782, |
| "kl_loss_3": 974.7930572509765, |
| "kl_loss_7": 248.70092544555663, |
| "learning_rate": 0.0008840401786221159, |
| "loss": 661.1442, |
| "step": 2290 |
| }, |
| { |
| "ce_loss_10": 3.731163203716278, |
| "ce_loss_13": 3.6657732129096985, |
| "ce_loss_2": 4.296657812595368, |
| "ce_loss_3": 4.099385142326355, |
| "ce_loss_7": 3.791227328777313, |
| "epoch": 0.23, |
| "grad_norm": 480.0, |
| "kl_loss_10": 108.29386520385742, |
| "kl_loss_2": 1301.2722473144531, |
| "kl_loss_3": 910.2853973388671, |
| "kl_loss_7": 230.31064682006837, |
| "learning_rate": 0.000883022221559489, |
| "loss": 636.6557, |
| "step": 2300 |
| }, |
| { |
| "ce_loss_10": 3.68310546875, |
| "ce_loss_13": 3.615295338630676, |
| "ce_loss_2": 4.257085943222046, |
| "ce_loss_3": 4.067182207107544, |
| "ce_loss_7": 3.7437336564064028, |
| "epoch": 0.231, |
| "grad_norm": 446.0, |
| "kl_loss_10": 109.58497161865235, |
| "kl_loss_2": 1331.8181457519531, |
| "kl_loss_3": 935.3311614990234, |
| "kl_loss_7": 230.688321685791, |
| "learning_rate": 0.0008820004074673434, |
| "loss": 666.0036, |
| "step": 2310 |
| }, |
| { |
| "ce_loss_10": 3.588702178001404, |
| "ce_loss_13": 3.524743127822876, |
| "ce_loss_2": 4.165178096294403, |
| "ce_loss_3": 3.97126362323761, |
| "ce_loss_7": 3.6520536303520204, |
| "epoch": 0.232, |
| "grad_norm": 494.0, |
| "kl_loss_10": 105.68032302856446, |
| "kl_loss_2": 1342.763037109375, |
| "kl_loss_3": 936.0263000488281, |
| "kl_loss_7": 229.52542724609376, |
| "learning_rate": 0.0008809747466353355, |
| "loss": 641.3422, |
| "step": 2320 |
| }, |
| { |
| "ce_loss_10": 3.602530860900879, |
| "ce_loss_13": 3.5331971526145933, |
| "ce_loss_2": 4.167471373081208, |
| "ce_loss_3": 3.977475893497467, |
| "ce_loss_7": 3.6642409324645997, |
| "epoch": 0.233, |
| "grad_norm": 458.0, |
| "kl_loss_10": 110.41828346252441, |
| "kl_loss_2": 1312.3292907714845, |
| "kl_loss_3": 919.6944396972656, |
| "kl_loss_7": 231.60648040771486, |
| "learning_rate": 0.0008799452493918585, |
| "loss": 645.3, |
| "step": 2330 |
| }, |
| { |
| "ce_loss_10": 3.6880576372146607, |
| "ce_loss_13": 3.6198028326034546, |
| "ce_loss_2": 4.254074168205261, |
| "ce_loss_3": 4.062888276576996, |
| "ce_loss_7": 3.7507563471794128, |
| "epoch": 0.234, |
| "grad_norm": 474.0, |
| "kl_loss_10": 110.42879600524903, |
| "kl_loss_2": 1309.2276428222656, |
| "kl_loss_3": 921.8907257080078, |
| "kl_loss_7": 233.63690185546875, |
| "learning_rate": 0.0008789119261039385, |
| "loss": 662.3614, |
| "step": 2340 |
| }, |
| { |
| "ce_loss_10": 3.5944358229637148, |
| "ce_loss_13": 3.5265544295310973, |
| "ce_loss_2": 4.165751957893372, |
| "ce_loss_3": 3.9788960099220274, |
| "ce_loss_7": 3.656530773639679, |
| "epoch": 0.235, |
| "grad_norm": 390.0, |
| "kl_loss_10": 106.77197875976563, |
| "kl_loss_2": 1303.9624755859375, |
| "kl_loss_3": 920.4357025146485, |
| "kl_loss_7": 233.87101974487305, |
| "learning_rate": 0.0008778747871771292, |
| "loss": 636.2241, |
| "step": 2350 |
| }, |
| { |
| "ce_loss_10": 3.642832565307617, |
| "ce_loss_13": 3.5764389514923094, |
| "ce_loss_2": 4.1962644219398495, |
| "ce_loss_3": 4.005896735191345, |
| "ce_loss_7": 3.703742432594299, |
| "epoch": 0.236, |
| "grad_norm": 488.0, |
| "kl_loss_10": 106.56211357116699, |
| "kl_loss_2": 1280.7797607421876, |
| "kl_loss_3": 899.6746459960938, |
| "kl_loss_7": 226.2964195251465, |
| "learning_rate": 0.0008768338430554083, |
| "loss": 628.8105, |
| "step": 2360 |
| }, |
| { |
| "ce_loss_10": 3.6540219306945803, |
| "ce_loss_13": 3.5862942576408385, |
| "ce_loss_2": 4.217021405696869, |
| "ce_loss_3": 4.0323525190353395, |
| "ce_loss_7": 3.715273082256317, |
| "epoch": 0.237, |
| "grad_norm": 446.0, |
| "kl_loss_10": 108.96415519714355, |
| "kl_loss_2": 1303.7487731933593, |
| "kl_loss_3": 917.9468658447265, |
| "kl_loss_7": 231.81985321044922, |
| "learning_rate": 0.0008757891042210713, |
| "loss": 643.5909, |
| "step": 2370 |
| }, |
| { |
| "ce_loss_10": 3.6785866141319277, |
| "ce_loss_13": 3.6081984996795655, |
| "ce_loss_2": 4.2465239524841305, |
| "ce_loss_3": 4.049813544750213, |
| "ce_loss_7": 3.7435325622558593, |
| "epoch": 0.238, |
| "grad_norm": 504.0, |
| "kl_loss_10": 111.76208610534668, |
| "kl_loss_2": 1305.791943359375, |
| "kl_loss_3": 916.3558563232422, |
| "kl_loss_7": 238.5102066040039, |
| "learning_rate": 0.0008747405811946271, |
| "loss": 645.7604, |
| "step": 2380 |
| }, |
| { |
| "ce_loss_10": 3.5631368517875672, |
| "ce_loss_13": 3.4966716051101683, |
| "ce_loss_2": 4.149629712104797, |
| "ce_loss_3": 3.960558259487152, |
| "ce_loss_7": 3.629357707500458, |
| "epoch": 0.239, |
| "grad_norm": 466.0, |
| "kl_loss_10": 110.7029800415039, |
| "kl_loss_2": 1342.1500549316406, |
| "kl_loss_3": 952.2329833984375, |
| "kl_loss_7": 240.75519790649415, |
| "learning_rate": 0.0008736882845346905, |
| "loss": 640.1473, |
| "step": 2390 |
| }, |
| { |
| "ce_loss_10": 3.6702625513076783, |
| "ce_loss_13": 3.598974347114563, |
| "ce_loss_2": 4.235510897636414, |
| "ce_loss_3": 4.041280698776245, |
| "ce_loss_7": 3.734322738647461, |
| "epoch": 0.24, |
| "grad_norm": 504.0, |
| "kl_loss_10": 116.23694610595703, |
| "kl_loss_2": 1302.5589904785156, |
| "kl_loss_3": 916.9543487548829, |
| "kl_loss_7": 247.71970977783204, |
| "learning_rate": 0.0008726322248378774, |
| "loss": 637.9229, |
| "step": 2400 |
| }, |
| { |
| "ce_loss_10": 3.6652265906333925, |
| "ce_loss_13": 3.595516872406006, |
| "ce_loss_2": 4.246520745754242, |
| "ce_loss_3": 4.043909120559692, |
| "ce_loss_7": 3.7296547532081603, |
| "epoch": 0.241, |
| "grad_norm": 450.0, |
| "kl_loss_10": 113.42751388549804, |
| "kl_loss_2": 1334.7802490234376, |
| "kl_loss_3": 932.5218017578125, |
| "kl_loss_7": 240.9356887817383, |
| "learning_rate": 0.0008715724127386971, |
| "loss": 657.7121, |
| "step": 2410 |
| }, |
| { |
| "ce_loss_10": 3.7339873194694517, |
| "ce_loss_13": 3.6643826484680178, |
| "ce_loss_2": 4.278091061115265, |
| "ce_loss_3": 4.095656621456146, |
| "ce_loss_7": 3.791609489917755, |
| "epoch": 0.242, |
| "grad_norm": 462.0, |
| "kl_loss_10": 112.56025924682618, |
| "kl_loss_2": 1285.8053466796875, |
| "kl_loss_3": 903.9169921875, |
| "kl_loss_7": 234.85026092529296, |
| "learning_rate": 0.0008705088589094458, |
| "loss": 638.7832, |
| "step": 2420 |
| }, |
| { |
| "ce_loss_10": 3.745934009552002, |
| "ce_loss_13": 3.677195417881012, |
| "ce_loss_2": 4.306411802768707, |
| "ce_loss_3": 4.115467298030853, |
| "ce_loss_7": 3.8079170107841493, |
| "epoch": 0.243, |
| "grad_norm": 414.0, |
| "kl_loss_10": 111.40414924621582, |
| "kl_loss_2": 1291.0523193359375, |
| "kl_loss_3": 906.0183563232422, |
| "kl_loss_7": 231.1467155456543, |
| "learning_rate": 0.0008694415740600988, |
| "loss": 640.6179, |
| "step": 2430 |
| }, |
| { |
| "ce_loss_10": 3.595633792877197, |
| "ce_loss_13": 3.5286824345588683, |
| "ce_loss_2": 4.186046612262726, |
| "ce_loss_3": 3.9901591181755065, |
| "ce_loss_7": 3.6573471426963806, |
| "epoch": 0.244, |
| "grad_norm": 500.0, |
| "kl_loss_10": 109.70008354187011, |
| "kl_loss_2": 1348.1939697265625, |
| "kl_loss_3": 959.4411895751953, |
| "kl_loss_7": 231.17713012695313, |
| "learning_rate": 0.0008683705689382025, |
| "loss": 654.2107, |
| "step": 2440 |
| }, |
| { |
| "ce_loss_10": 3.684832978248596, |
| "ce_loss_13": 3.616632854938507, |
| "ce_loss_2": 4.231842195987701, |
| "ce_loss_3": 4.048018515110016, |
| "ce_loss_7": 3.740548253059387, |
| "epoch": 0.245, |
| "grad_norm": 450.0, |
| "kl_loss_10": 108.41531753540039, |
| "kl_loss_2": 1280.763720703125, |
| "kl_loss_3": 904.3856048583984, |
| "kl_loss_7": 224.61626434326172, |
| "learning_rate": 0.0008672958543287666, |
| "loss": 648.306, |
| "step": 2450 |
| }, |
| { |
| "ce_loss_10": 3.6943544864654543, |
| "ce_loss_13": 3.625825345516205, |
| "ce_loss_2": 4.246035170555115, |
| "ce_loss_3": 4.063331556320191, |
| "ce_loss_7": 3.75509090423584, |
| "epoch": 0.246, |
| "grad_norm": 428.0, |
| "kl_loss_10": 109.8709056854248, |
| "kl_loss_2": 1283.2248840332031, |
| "kl_loss_3": 904.9634002685547, |
| "kl_loss_7": 228.17876358032225, |
| "learning_rate": 0.0008662174410541554, |
| "loss": 632.1618, |
| "step": 2460 |
| }, |
| { |
| "ce_loss_10": 3.6546427369117738, |
| "ce_loss_13": 3.587355947494507, |
| "ce_loss_2": 4.204605233669281, |
| "ce_loss_3": 4.022764265537262, |
| "ce_loss_7": 3.717895233631134, |
| "epoch": 0.247, |
| "grad_norm": 394.0, |
| "kl_loss_10": 107.19166374206543, |
| "kl_loss_2": 1283.1351989746095, |
| "kl_loss_3": 906.7954956054688, |
| "kl_loss_7": 227.77373123168945, |
| "learning_rate": 0.0008651353399739787, |
| "loss": 642.6704, |
| "step": 2470 |
| }, |
| { |
| "ce_loss_10": 3.685038208961487, |
| "ce_loss_13": 3.6166242718696595, |
| "ce_loss_2": 4.247460579872131, |
| "ce_loss_3": 4.056502640247345, |
| "ce_loss_7": 3.7451204299926757, |
| "epoch": 0.248, |
| "grad_norm": 520.0, |
| "kl_loss_10": 109.50839309692383, |
| "kl_loss_2": 1297.0845886230468, |
| "kl_loss_3": 908.0451263427734, |
| "kl_loss_7": 229.69447708129883, |
| "learning_rate": 0.0008640495619849821, |
| "loss": 636.7805, |
| "step": 2480 |
| }, |
| { |
| "ce_loss_10": 3.6444509506225584, |
| "ce_loss_13": 3.5771190404891966, |
| "ce_loss_2": 4.202155363559723, |
| "ce_loss_3": 4.00964595079422, |
| "ce_loss_7": 3.709311318397522, |
| "epoch": 0.249, |
| "grad_norm": 492.0, |
| "kl_loss_10": 107.21613540649415, |
| "kl_loss_2": 1287.2485595703124, |
| "kl_loss_3": 903.1389892578125, |
| "kl_loss_7": 236.67683792114258, |
| "learning_rate": 0.0008629601180209381, |
| "loss": 632.6728, |
| "step": 2490 |
| }, |
| { |
| "ce_loss_10": 3.640513265132904, |
| "ce_loss_13": 3.571250784397125, |
| "ce_loss_2": 4.189491713047028, |
| "ce_loss_3": 4.000437986850739, |
| "ce_loss_7": 3.6998685002326965, |
| "epoch": 0.25, |
| "grad_norm": 388.0, |
| "kl_loss_10": 109.32069320678711, |
| "kl_loss_2": 1269.254248046875, |
| "kl_loss_3": 889.8983123779296, |
| "kl_loss_7": 235.86445999145508, |
| "learning_rate": 0.000861867019052535, |
| "loss": 634.9495, |
| "step": 2500 |
| }, |
| { |
| "ce_loss_10": 3.5532511711120605, |
| "ce_loss_13": 3.4857767462730407, |
| "ce_loss_2": 4.138734245300293, |
| "ce_loss_3": 3.9396822333335875, |
| "ce_loss_7": 3.6218135476112367, |
| "epoch": 0.251, |
| "grad_norm": 454.0, |
| "kl_loss_10": 108.08290519714356, |
| "kl_loss_2": 1328.9036437988282, |
| "kl_loss_3": 931.4583953857422, |
| "kl_loss_7": 239.18134002685548, |
| "learning_rate": 0.0008607702760872678, |
| "loss": 651.695, |
| "step": 2510 |
| }, |
| { |
| "ce_loss_10": 3.6758545279502868, |
| "ce_loss_13": 3.6099536180496217, |
| "ce_loss_2": 4.224261367321015, |
| "ce_loss_3": 4.035415709018707, |
| "ce_loss_7": 3.738192629814148, |
| "epoch": 0.252, |
| "grad_norm": 676.0, |
| "kl_loss_10": 109.95955963134766, |
| "kl_loss_2": 1264.877392578125, |
| "kl_loss_3": 890.2998840332032, |
| "kl_loss_7": 227.99790649414064, |
| "learning_rate": 0.0008596699001693256, |
| "loss": 638.9367, |
| "step": 2520 |
| }, |
| { |
| "ce_loss_10": 3.695500302314758, |
| "ce_loss_13": 3.61992267370224, |
| "ce_loss_2": 4.222428333759308, |
| "ce_loss_3": 4.035504674911499, |
| "ce_loss_7": 3.743453121185303, |
| "epoch": 0.253, |
| "grad_norm": 548.0, |
| "kl_loss_10": 127.16191024780274, |
| "kl_loss_2": 1273.7016052246095, |
| "kl_loss_3": 884.7771850585938, |
| "kl_loss_7": 228.30911254882812, |
| "learning_rate": 0.0008585659023794818, |
| "loss": 643.5041, |
| "step": 2530 |
| }, |
| { |
| "ce_loss_10": 3.637289047241211, |
| "ce_loss_13": 3.5686028838157653, |
| "ce_loss_2": 4.218254780769348, |
| "ce_loss_3": 4.019161069393158, |
| "ce_loss_7": 3.696817862987518, |
| "epoch": 0.254, |
| "grad_norm": 462.0, |
| "kl_loss_10": 120.60056190490722, |
| "kl_loss_2": 1330.6954162597656, |
| "kl_loss_3": 938.699496459961, |
| "kl_loss_7": 233.61075134277343, |
| "learning_rate": 0.0008574582938349817, |
| "loss": 644.681, |
| "step": 2540 |
| }, |
| { |
| "ce_loss_10": 3.6420682311058044, |
| "ce_loss_13": 3.567863130569458, |
| "ce_loss_2": 4.228188097476959, |
| "ce_loss_3": 4.029180979728698, |
| "ce_loss_7": 3.705214190483093, |
| "epoch": 0.255, |
| "grad_norm": 372.0, |
| "kl_loss_10": 117.31230735778809, |
| "kl_loss_2": 1354.2801513671875, |
| "kl_loss_3": 952.2808837890625, |
| "kl_loss_7": 241.81886978149413, |
| "learning_rate": 0.0008563470856894315, |
| "loss": 640.0965, |
| "step": 2550 |
| }, |
| { |
| "ce_loss_10": 3.6231508612632752, |
| "ce_loss_13": 3.5556546688079833, |
| "ce_loss_2": 4.189491558074951, |
| "ce_loss_3": 3.9987146973609926, |
| "ce_loss_7": 3.681329298019409, |
| "epoch": 0.256, |
| "grad_norm": 472.0, |
| "kl_loss_10": 108.91358489990235, |
| "kl_loss_2": 1298.7685607910157, |
| "kl_loss_3": 917.6602386474609, |
| "kl_loss_7": 230.57952499389648, |
| "learning_rate": 0.0008552322891326845, |
| "loss": 638.6699, |
| "step": 2560 |
| }, |
| { |
| "ce_loss_10": 3.5982382774353026, |
| "ce_loss_13": 3.528933322429657, |
| "ce_loss_2": 4.162684524059296, |
| "ce_loss_3": 3.9698683977127076, |
| "ce_loss_7": 3.656614398956299, |
| "epoch": 0.257, |
| "grad_norm": 434.0, |
| "kl_loss_10": 109.61449127197265, |
| "kl_loss_2": 1301.0992553710937, |
| "kl_loss_3": 921.5931274414063, |
| "kl_loss_7": 231.00868759155273, |
| "learning_rate": 0.0008541139153907296, |
| "loss": 634.7393, |
| "step": 2570 |
| }, |
| { |
| "ce_loss_10": 3.5526642203330994, |
| "ce_loss_13": 3.485519516468048, |
| "ce_loss_2": 4.114146530628204, |
| "ce_loss_3": 3.924081325531006, |
| "ce_loss_7": 3.614666759967804, |
| "epoch": 0.258, |
| "grad_norm": 548.0, |
| "kl_loss_10": 107.18747673034667, |
| "kl_loss_2": 1300.3511169433593, |
| "kl_loss_3": 919.3837615966797, |
| "kl_loss_7": 228.61345367431642, |
| "learning_rate": 0.0008529919757255782, |
| "loss": 640.0102, |
| "step": 2580 |
| }, |
| { |
| "ce_loss_10": 3.591010940074921, |
| "ce_loss_13": 3.519867956638336, |
| "ce_loss_2": 4.122671520709991, |
| "ce_loss_3": 3.9352630972862244, |
| "ce_loss_7": 3.642985260486603, |
| "epoch": 0.259, |
| "grad_norm": 462.0, |
| "kl_loss_10": 115.52005577087402, |
| "kl_loss_2": 1261.9588439941406, |
| "kl_loss_3": 881.7059204101563, |
| "kl_loss_7": 222.79493560791016, |
| "learning_rate": 0.0008518664814351503, |
| "loss": 624.9721, |
| "step": 2590 |
| }, |
| { |
| "ce_loss_10": 3.5559722065925596, |
| "ce_loss_13": 3.4851076006889343, |
| "ce_loss_2": 4.12727187871933, |
| "ce_loss_3": 3.9312629222869875, |
| "ce_loss_7": 3.6141554713249207, |
| "epoch": 0.26, |
| "grad_norm": 468.0, |
| "kl_loss_10": 118.2235237121582, |
| "kl_loss_2": 1329.9431396484374, |
| "kl_loss_3": 938.9490844726563, |
| "kl_loss_7": 229.97386627197267, |
| "learning_rate": 0.0008507374438531607, |
| "loss": 664.5543, |
| "step": 2600 |
| }, |
| { |
| "ce_loss_10": 3.52994726896286, |
| "ce_loss_13": 3.460645878314972, |
| "ce_loss_2": 4.086832702159882, |
| "ce_loss_3": 3.8989439606666565, |
| "ce_loss_7": 3.5843619465827943, |
| "epoch": 0.261, |
| "grad_norm": 454.0, |
| "kl_loss_10": 111.8594409942627, |
| "kl_loss_2": 1283.5894409179687, |
| "kl_loss_3": 906.6501983642578, |
| "kl_loss_7": 225.07547607421876, |
| "learning_rate": 0.0008496048743490053, |
| "loss": 631.2727, |
| "step": 2610 |
| }, |
| { |
| "ce_loss_10": 3.688689887523651, |
| "ce_loss_13": 3.6171088218688965, |
| "ce_loss_2": 4.232082653045654, |
| "ce_loss_3": 4.045144772529602, |
| "ce_loss_7": 3.7411328554153442, |
| "epoch": 0.262, |
| "grad_norm": 498.0, |
| "kl_loss_10": 112.36735153198242, |
| "kl_loss_2": 1262.6953674316405, |
| "kl_loss_3": 890.0498321533203, |
| "kl_loss_7": 224.3122886657715, |
| "learning_rate": 0.0008484687843276469, |
| "loss": 626.552, |
| "step": 2620 |
| }, |
| { |
| "ce_loss_10": 3.6142364263534548, |
| "ce_loss_13": 3.54564208984375, |
| "ce_loss_2": 4.1653601884841915, |
| "ce_loss_3": 3.979761373996735, |
| "ce_loss_7": 3.670048642158508, |
| "epoch": 0.263, |
| "grad_norm": 604.0, |
| "kl_loss_10": 113.79613609313965, |
| "kl_loss_2": 1291.2943481445313, |
| "kl_loss_3": 909.2907562255859, |
| "kl_loss_7": 229.63263626098632, |
| "learning_rate": 0.0008473291852294987, |
| "loss": 643.7754, |
| "step": 2630 |
| }, |
| { |
| "ce_loss_10": 3.624956822395325, |
| "ce_loss_13": 3.5516101837158205, |
| "ce_loss_2": 4.185651910305023, |
| "ce_loss_3": 3.9955446600914, |
| "ce_loss_7": 3.682393753528595, |
| "epoch": 0.264, |
| "grad_norm": 560.0, |
| "kl_loss_10": 118.29873504638672, |
| "kl_loss_2": 1318.540350341797, |
| "kl_loss_3": 922.2017700195313, |
| "kl_loss_7": 233.18995971679686, |
| "learning_rate": 0.0008461860885303114, |
| "loss": 639.2153, |
| "step": 2640 |
| }, |
| { |
| "ce_loss_10": 3.651511311531067, |
| "ce_loss_13": 3.5835230231285093, |
| "ce_loss_2": 4.192479598522186, |
| "ce_loss_3": 4.010076713562012, |
| "ce_loss_7": 3.705660367012024, |
| "epoch": 0.265, |
| "grad_norm": 532.0, |
| "kl_loss_10": 120.45394592285156, |
| "kl_loss_2": 1256.7761352539062, |
| "kl_loss_3": 889.5764984130859, |
| "kl_loss_7": 224.92701721191406, |
| "learning_rate": 0.000845039505741056, |
| "loss": 630.0308, |
| "step": 2650 |
| }, |
| { |
| "ce_loss_10": 3.6381911635398865, |
| "ce_loss_13": 3.567105031013489, |
| "ce_loss_2": 4.197239780426026, |
| "ce_loss_3": 4.007714176177979, |
| "ce_loss_7": 3.694069528579712, |
| "epoch": 0.266, |
| "grad_norm": 476.0, |
| "kl_loss_10": 120.83754425048828, |
| "kl_loss_2": 1304.788995361328, |
| "kl_loss_3": 924.8183044433594, |
| "kl_loss_7": 230.70355300903321, |
| "learning_rate": 0.0008438894484078086, |
| "loss": 659.7302, |
| "step": 2660 |
| }, |
| { |
| "ce_loss_10": 3.6475989818573, |
| "ce_loss_13": 3.575591731071472, |
| "ce_loss_2": 4.190777051448822, |
| "ce_loss_3": 4.004483807086944, |
| "ce_loss_7": 3.6995357990264894, |
| "epoch": 0.267, |
| "grad_norm": 486.0, |
| "kl_loss_10": 115.3904426574707, |
| "kl_loss_2": 1277.0895690917969, |
| "kl_loss_3": 898.4886535644531, |
| "kl_loss_7": 228.7105613708496, |
| "learning_rate": 0.0008427359281116334, |
| "loss": 634.3606, |
| "step": 2670 |
| }, |
| { |
| "ce_loss_10": 3.547777831554413, |
| "ce_loss_13": 3.4795953035354614, |
| "ce_loss_2": 4.113273656368255, |
| "ce_loss_3": 3.9340083956718446, |
| "ce_loss_7": 3.6087413907051085, |
| "epoch": 0.268, |
| "grad_norm": 572.0, |
| "kl_loss_10": 111.49882125854492, |
| "kl_loss_2": 1312.4553100585938, |
| "kl_loss_3": 937.0226959228515, |
| "kl_loss_7": 228.7356918334961, |
| "learning_rate": 0.0008415789564684673, |
| "loss": 643.8098, |
| "step": 2680 |
| }, |
| { |
| "ce_loss_10": 3.7991090655326842, |
| "ce_loss_13": 3.7250254154205322, |
| "ce_loss_2": 4.329492318630218, |
| "ce_loss_3": 4.153719592094421, |
| "ce_loss_7": 3.8572392106056212, |
| "epoch": 0.269, |
| "grad_norm": 536.0, |
| "kl_loss_10": 119.75568199157715, |
| "kl_loss_2": 1240.7658264160157, |
| "kl_loss_3": 900.5773712158203, |
| "kl_loss_7": 236.5658187866211, |
| "learning_rate": 0.0008404185451290017, |
| "loss": 621.7949, |
| "step": 2690 |
| }, |
| { |
| "ce_loss_10": 3.6571221590042113, |
| "ce_loss_13": 3.5904589772224424, |
| "ce_loss_2": 4.20020170211792, |
| "ce_loss_3": 4.020016396045685, |
| "ce_loss_7": 3.7206122040748597, |
| "epoch": 0.27, |
| "grad_norm": 612.0, |
| "kl_loss_10": 109.39498329162598, |
| "kl_loss_2": 1278.1862854003907, |
| "kl_loss_3": 903.8941192626953, |
| "kl_loss_7": 233.3107780456543, |
| "learning_rate": 0.0008392547057785661, |
| "loss": 629.3908, |
| "step": 2700 |
| }, |
| { |
| "ce_loss_10": 3.5812445521354674, |
| "ce_loss_13": 3.511835253238678, |
| "ce_loss_2": 4.144945275783539, |
| "ce_loss_3": 3.964122700691223, |
| "ce_loss_7": 3.65096001625061, |
| "epoch": 0.271, |
| "grad_norm": 536.0, |
| "kl_loss_10": 110.46146507263184, |
| "kl_loss_2": 1320.70322265625, |
| "kl_loss_3": 951.1240264892579, |
| "kl_loss_7": 251.59460906982423, |
| "learning_rate": 0.0008380874501370098, |
| "loss": 636.0247, |
| "step": 2710 |
| }, |
| { |
| "ce_loss_10": 3.576056122779846, |
| "ce_loss_13": 3.5100281834602356, |
| "ce_loss_2": 4.140222942829132, |
| "ce_loss_3": 3.9531075954437256, |
| "ce_loss_7": 3.6410070419311524, |
| "epoch": 0.272, |
| "grad_norm": 544.0, |
| "kl_loss_10": 110.53367462158204, |
| "kl_loss_2": 1306.970849609375, |
| "kl_loss_3": 923.5474395751953, |
| "kl_loss_7": 236.50457839965821, |
| "learning_rate": 0.0008369167899585841, |
| "loss": 640.9572, |
| "step": 2720 |
| }, |
| { |
| "ce_loss_10": 3.6997987627983093, |
| "ce_loss_13": 3.635802137851715, |
| "ce_loss_2": 4.225974369049072, |
| "ce_loss_3": 4.0530330538749695, |
| "ce_loss_7": 3.760606610774994, |
| "epoch": 0.273, |
| "grad_norm": 700.0, |
| "kl_loss_10": 106.76014976501465, |
| "kl_loss_2": 1235.3878662109375, |
| "kl_loss_3": 873.6434448242187, |
| "kl_loss_7": 224.53188552856446, |
| "learning_rate": 0.0008357427370318238, |
| "loss": 630.9094, |
| "step": 2730 |
| }, |
| { |
| "ce_loss_10": 3.6538386702537538, |
| "ce_loss_13": 3.5854528903961183, |
| "ce_loss_2": 4.205159163475036, |
| "ce_loss_3": 4.013937699794769, |
| "ce_loss_7": 3.712061953544617, |
| "epoch": 0.274, |
| "grad_norm": 448.0, |
| "kl_loss_10": 110.17894134521484, |
| "kl_loss_2": 1286.5609130859375, |
| "kl_loss_3": 904.9044860839844, |
| "kl_loss_7": 229.6865478515625, |
| "learning_rate": 0.0008345653031794292, |
| "loss": 635.8903, |
| "step": 2740 |
| }, |
| { |
| "ce_loss_10": 3.6535327553749086, |
| "ce_loss_13": 3.5872610807418823, |
| "ce_loss_2": 4.199507582187652, |
| "ce_loss_3": 4.0229881525039675, |
| "ce_loss_7": 3.712740111351013, |
| "epoch": 0.275, |
| "grad_norm": 494.0, |
| "kl_loss_10": 108.84803733825683, |
| "kl_loss_2": 1267.8911865234375, |
| "kl_loss_3": 897.5597198486328, |
| "kl_loss_7": 226.9661651611328, |
| "learning_rate": 0.0008333845002581458, |
| "loss": 628.4583, |
| "step": 2750 |
| }, |
| { |
| "ce_loss_10": 3.5756468772888184, |
| "ce_loss_13": 3.5082659125328064, |
| "ce_loss_2": 4.145406031608582, |
| "ce_loss_3": 3.954765808582306, |
| "ce_loss_7": 3.6372469902038573, |
| "epoch": 0.276, |
| "grad_norm": 442.0, |
| "kl_loss_10": 107.86047019958497, |
| "kl_loss_2": 1333.4900146484374, |
| "kl_loss_3": 936.4077697753906, |
| "kl_loss_7": 230.6979835510254, |
| "learning_rate": 0.0008322003401586462, |
| "loss": 647.3615, |
| "step": 2760 |
| }, |
| { |
| "ce_loss_10": 3.615983176231384, |
| "ce_loss_13": 3.5494200587272644, |
| "ce_loss_2": 4.153625726699829, |
| "ce_loss_3": 3.9661497712135314, |
| "ce_loss_7": 3.670648729801178, |
| "epoch": 0.277, |
| "grad_norm": 456.0, |
| "kl_loss_10": 107.68133277893067, |
| "kl_loss_2": 1252.6388610839845, |
| "kl_loss_3": 875.4474029541016, |
| "kl_loss_7": 220.59310073852538, |
| "learning_rate": 0.0008310128348054094, |
| "loss": 608.5761, |
| "step": 2770 |
| }, |
| { |
| "ce_loss_10": 3.581556737422943, |
| "ce_loss_13": 3.518260824680328, |
| "ce_loss_2": 4.13277485370636, |
| "ce_loss_3": 3.9427122831344605, |
| "ce_loss_7": 3.6427804708480833, |
| "epoch": 0.278, |
| "grad_norm": 556.0, |
| "kl_loss_10": 107.17420654296875, |
| "kl_loss_2": 1270.6333923339844, |
| "kl_loss_3": 894.9070373535156, |
| "kl_loss_7": 225.04671096801758, |
| "learning_rate": 0.0008298219961566008, |
| "loss": 624.6308, |
| "step": 2780 |
| }, |
| { |
| "ce_loss_10": 3.5525727391242983, |
| "ce_loss_13": 3.485328257083893, |
| "ce_loss_2": 4.1319693446159365, |
| "ce_loss_3": 3.9388387560844422, |
| "ce_loss_7": 3.6104352355003355, |
| "epoch": 0.279, |
| "grad_norm": 394.0, |
| "kl_loss_10": 112.5637420654297, |
| "kl_loss_2": 1333.4249206542968, |
| "kl_loss_3": 937.83544921875, |
| "kl_loss_7": 227.29133377075195, |
| "learning_rate": 0.0008286278362039527, |
| "loss": 635.7598, |
| "step": 2790 |
| }, |
| { |
| "ce_loss_10": 3.587259495258331, |
| "ce_loss_13": 3.5128440499305724, |
| "ce_loss_2": 4.159168899059296, |
| "ce_loss_3": 3.9615533709526063, |
| "ce_loss_7": 3.6405880570411684, |
| "epoch": 0.28, |
| "grad_norm": 402.0, |
| "kl_loss_10": 114.4114917755127, |
| "kl_loss_2": 1318.4106079101562, |
| "kl_loss_3": 924.2900451660156, |
| "kl_loss_7": 224.0070999145508, |
| "learning_rate": 0.0008274303669726426, |
| "loss": 628.2539, |
| "step": 2800 |
| }, |
| { |
| "ce_loss_10": 3.479981768131256, |
| "ce_loss_13": 3.411652183532715, |
| "ce_loss_2": 4.056045913696289, |
| "ce_loss_3": 3.866449761390686, |
| "ce_loss_7": 3.5390130996704103, |
| "epoch": 0.281, |
| "grad_norm": 484.0, |
| "kl_loss_10": 111.04401168823242, |
| "kl_loss_2": 1325.4802673339843, |
| "kl_loss_3": 933.757958984375, |
| "kl_loss_7": 223.9423583984375, |
| "learning_rate": 0.0008262296005211721, |
| "loss": 628.6442, |
| "step": 2810 |
| }, |
| { |
| "ce_loss_10": 3.6082133769989015, |
| "ce_loss_13": 3.543479096889496, |
| "ce_loss_2": 4.177137637138367, |
| "ce_loss_3": 3.9879695653915403, |
| "ce_loss_7": 3.667951965332031, |
| "epoch": 0.282, |
| "grad_norm": 436.0, |
| "kl_loss_10": 106.70964050292969, |
| "kl_loss_2": 1303.7048461914062, |
| "kl_loss_3": 916.7790283203125, |
| "kl_loss_7": 222.79779052734375, |
| "learning_rate": 0.0008250255489412463, |
| "loss": 627.094, |
| "step": 2820 |
| }, |
| { |
| "ce_loss_10": 3.716309094429016, |
| "ce_loss_13": 3.642316293716431, |
| "ce_loss_2": 4.261255824565888, |
| "ce_loss_3": 4.0787659049034115, |
| "ce_loss_7": 3.7748358964920046, |
| "epoch": 0.283, |
| "grad_norm": 604.0, |
| "kl_loss_10": 114.64575080871582, |
| "kl_loss_2": 1277.7698669433594, |
| "kl_loss_3": 901.2324371337891, |
| "kl_loss_7": 231.05035324096679, |
| "learning_rate": 0.0008238182243576511, |
| "loss": 633.2869, |
| "step": 2830 |
| }, |
| { |
| "ce_loss_10": 3.682005834579468, |
| "ce_loss_13": 3.615007734298706, |
| "ce_loss_2": 4.202854037284851, |
| "ce_loss_3": 4.023157751560211, |
| "ce_loss_7": 3.736177396774292, |
| "epoch": 0.284, |
| "grad_norm": 548.0, |
| "kl_loss_10": 110.75144157409667, |
| "kl_loss_2": 1221.345361328125, |
| "kl_loss_3": 870.2192474365235, |
| "kl_loss_7": 222.93451232910155, |
| "learning_rate": 0.0008226076389281315, |
| "loss": 611.4373, |
| "step": 2840 |
| }, |
| { |
| "ce_loss_10": 3.7233519554138184, |
| "ce_loss_13": 3.6542891025543214, |
| "ce_loss_2": 4.248092949390411, |
| "ce_loss_3": 4.069663691520691, |
| "ce_loss_7": 3.775057625770569, |
| "epoch": 0.285, |
| "grad_norm": 704.0, |
| "kl_loss_10": 110.19483451843261, |
| "kl_loss_2": 1255.0005493164062, |
| "kl_loss_3": 882.4262268066407, |
| "kl_loss_7": 222.1134048461914, |
| "learning_rate": 0.0008213938048432696, |
| "loss": 610.5205, |
| "step": 2850 |
| }, |
| { |
| "ce_loss_10": 3.64341379404068, |
| "ce_loss_13": 3.5780718684196473, |
| "ce_loss_2": 4.180006468296051, |
| "ce_loss_3": 3.9996572732925415, |
| "ce_loss_7": 3.701814925670624, |
| "epoch": 0.286, |
| "grad_norm": 442.0, |
| "kl_loss_10": 108.8284294128418, |
| "kl_loss_2": 1259.7149841308594, |
| "kl_loss_3": 887.2555450439453, |
| "kl_loss_7": 224.16595458984375, |
| "learning_rate": 0.0008201767343263612, |
| "loss": 623.8719, |
| "step": 2860 |
| }, |
| { |
| "ce_loss_10": 3.580712640285492, |
| "ce_loss_13": 3.514770495891571, |
| "ce_loss_2": 4.152428865432739, |
| "ce_loss_3": 3.9586745381355284, |
| "ce_loss_7": 3.6420669674873354, |
| "epoch": 0.287, |
| "grad_norm": 478.0, |
| "kl_loss_10": 104.37866973876953, |
| "kl_loss_2": 1290.6899536132812, |
| "kl_loss_3": 906.0174041748047, |
| "kl_loss_7": 219.9403289794922, |
| "learning_rate": 0.0008189564396332927, |
| "loss": 611.9311, |
| "step": 2870 |
| }, |
| { |
| "ce_loss_10": 3.560059654712677, |
| "ce_loss_13": 3.4959851503372192, |
| "ce_loss_2": 4.124033749103546, |
| "ce_loss_3": 3.9340568661689757, |
| "ce_loss_7": 3.6201700448989866, |
| "epoch": 0.288, |
| "grad_norm": 480.0, |
| "kl_loss_10": 103.69261512756347, |
| "kl_loss_2": 1290.621844482422, |
| "kl_loss_3": 906.7965057373046, |
| "kl_loss_7": 223.63958206176758, |
| "learning_rate": 0.0008177329330524181, |
| "loss": 627.1938, |
| "step": 2880 |
| }, |
| { |
| "ce_loss_10": 3.6303093075752257, |
| "ce_loss_13": 3.5619912266731264, |
| "ce_loss_2": 4.173935759067535, |
| "ce_loss_3": 3.9890891432762148, |
| "ce_loss_7": 3.6870488286018372, |
| "epoch": 0.289, |
| "grad_norm": 452.0, |
| "kl_loss_10": 105.55148658752441, |
| "kl_loss_2": 1245.1935241699218, |
| "kl_loss_3": 874.3131744384766, |
| "kl_loss_7": 225.99310531616212, |
| "learning_rate": 0.0008165062269044352, |
| "loss": 620.2292, |
| "step": 2890 |
| }, |
| { |
| "ce_loss_10": 3.574904942512512, |
| "ce_loss_13": 3.5098610281944276, |
| "ce_loss_2": 4.134270429611206, |
| "ce_loss_3": 3.941369962692261, |
| "ce_loss_7": 3.641903018951416, |
| "epoch": 0.29, |
| "grad_norm": 394.0, |
| "kl_loss_10": 109.33544578552247, |
| "kl_loss_2": 1282.6864868164062, |
| "kl_loss_3": 899.0664276123047, |
| "kl_loss_7": 231.66256561279297, |
| "learning_rate": 0.0008152763335422613, |
| "loss": 630.6565, |
| "step": 2900 |
| }, |
| { |
| "ce_loss_10": 3.5721667885780333, |
| "ce_loss_13": 3.503155696392059, |
| "ce_loss_2": 4.123925364017486, |
| "ce_loss_3": 3.935485672950745, |
| "ce_loss_7": 3.625267505645752, |
| "epoch": 0.291, |
| "grad_norm": 600.0, |
| "kl_loss_10": 111.11225318908691, |
| "kl_loss_2": 1285.6910400390625, |
| "kl_loss_3": 903.9730163574219, |
| "kl_loss_7": 227.13845748901366, |
| "learning_rate": 0.0008140432653509088, |
| "loss": 623.4421, |
| "step": 2910 |
| }, |
| { |
| "ce_loss_10": 3.617369520664215, |
| "ce_loss_13": 3.5511601328849793, |
| "ce_loss_2": 4.158329248428345, |
| "ce_loss_3": 3.9670159101486204, |
| "ce_loss_7": 3.6755479097366335, |
| "epoch": 0.292, |
| "grad_norm": 424.0, |
| "kl_loss_10": 108.97641296386719, |
| "kl_loss_2": 1271.4477172851562, |
| "kl_loss_3": 887.2839324951171, |
| "kl_loss_7": 224.6483947753906, |
| "learning_rate": 0.0008128070347473608, |
| "loss": 614.8932, |
| "step": 2920 |
| }, |
| { |
| "ce_loss_10": 3.6203887820243836, |
| "ce_loss_13": 3.5571650743484495, |
| "ce_loss_2": 4.184931480884552, |
| "ce_loss_3": 3.988680112361908, |
| "ce_loss_7": 3.6783168077468873, |
| "epoch": 0.293, |
| "grad_norm": 442.0, |
| "kl_loss_10": 106.56389541625977, |
| "kl_loss_2": 1309.3880310058594, |
| "kl_loss_3": 912.7983032226563, |
| "kl_loss_7": 223.91178283691406, |
| "learning_rate": 0.0008115676541804455, |
| "loss": 627.653, |
| "step": 2930 |
| }, |
| { |
| "ce_loss_10": 3.631400096416473, |
| "ce_loss_13": 3.565066874027252, |
| "ce_loss_2": 4.173557686805725, |
| "ce_loss_3": 3.9938668727874758, |
| "ce_loss_7": 3.6861080646514894, |
| "epoch": 0.294, |
| "grad_norm": 410.0, |
| "kl_loss_10": 107.59184074401855, |
| "kl_loss_2": 1258.5327880859375, |
| "kl_loss_3": 893.1478424072266, |
| "kl_loss_7": 221.63349151611328, |
| "learning_rate": 0.0008103251361307119, |
| "loss": 625.068, |
| "step": 2940 |
| }, |
| { |
| "ce_loss_10": 3.6633550047874452, |
| "ce_loss_13": 3.5975454568862917, |
| "ce_loss_2": 4.201860392093659, |
| "ce_loss_3": 4.0187016248703005, |
| "ce_loss_7": 3.7217815637588503, |
| "epoch": 0.295, |
| "grad_norm": 484.0, |
| "kl_loss_10": 107.80433006286621, |
| "kl_loss_2": 1263.7409545898438, |
| "kl_loss_3": 899.7593811035156, |
| "kl_loss_7": 224.86621551513673, |
| "learning_rate": 0.0008090794931103026, |
| "loss": 620.4886, |
| "step": 2950 |
| }, |
| { |
| "ce_loss_10": 3.6508840203285216, |
| "ce_loss_13": 3.588442325592041, |
| "ce_loss_2": 4.192799139022827, |
| "ce_loss_3": 4.012849128246307, |
| "ce_loss_7": 3.706376481056213, |
| "epoch": 0.296, |
| "grad_norm": 560.0, |
| "kl_loss_10": 104.73687210083008, |
| "kl_loss_2": 1249.5858154296875, |
| "kl_loss_3": 882.2532653808594, |
| "kl_loss_7": 217.49150695800782, |
| "learning_rate": 0.0008078307376628291, |
| "loss": 618.8026, |
| "step": 2960 |
| }, |
| { |
| "ce_loss_10": 3.714610981941223, |
| "ce_loss_13": 3.648031437397003, |
| "ce_loss_2": 4.232832741737366, |
| "ce_loss_3": 4.05701197385788, |
| "ce_loss_7": 3.7682809591293336, |
| "epoch": 0.297, |
| "grad_norm": 438.0, |
| "kl_loss_10": 105.18131446838379, |
| "kl_loss_2": 1206.7602905273438, |
| "kl_loss_3": 851.9172241210938, |
| "kl_loss_7": 215.51920394897462, |
| "learning_rate": 0.000806578882363245, |
| "loss": 597.2082, |
| "step": 2970 |
| }, |
| { |
| "ce_loss_10": 3.6252587914466856, |
| "ce_loss_13": 3.5611796617507934, |
| "ce_loss_2": 4.163775825500489, |
| "ce_loss_3": 3.977950668334961, |
| "ce_loss_7": 3.6833796977996824, |
| "epoch": 0.298, |
| "grad_norm": 648.0, |
| "kl_loss_10": 103.29475135803223, |
| "kl_loss_2": 1245.597314453125, |
| "kl_loss_3": 877.1612213134765, |
| "kl_loss_7": 219.75524444580077, |
| "learning_rate": 0.0008053239398177191, |
| "loss": 627.9783, |
| "step": 2980 |
| }, |
| { |
| "ce_loss_10": 3.602860856056213, |
| "ce_loss_13": 3.538487696647644, |
| "ce_loss_2": 4.142560148239136, |
| "ce_loss_3": 3.9584405183792115, |
| "ce_loss_7": 3.659018313884735, |
| "epoch": 0.299, |
| "grad_norm": 502.0, |
| "kl_loss_10": 104.49293098449706, |
| "kl_loss_2": 1247.2865417480468, |
| "kl_loss_3": 873.8518829345703, |
| "kl_loss_7": 218.60237731933594, |
| "learning_rate": 0.0008040659226635089, |
| "loss": 629.0394, |
| "step": 2990 |
| }, |
| { |
| "ce_loss_10": 3.737885308265686, |
| "ce_loss_13": 3.670609879493713, |
| "ce_loss_2": 4.267246758937835, |
| "ce_loss_3": 4.084029448032379, |
| "ce_loss_7": 3.801585829257965, |
| "epoch": 0.3, |
| "grad_norm": 474.0, |
| "kl_loss_10": 109.13075065612793, |
| "kl_loss_2": 1251.634942626953, |
| "kl_loss_3": 874.3355682373046, |
| "kl_loss_7": 234.4466766357422, |
| "learning_rate": 0.0008028048435688333, |
| "loss": 617.8753, |
| "step": 3000 |
| }, |
| { |
| "ce_loss_10": 3.608117866516113, |
| "ce_loss_13": 3.5417439699172975, |
| "ce_loss_2": 4.164859163761139, |
| "ce_loss_3": 3.9728567838668822, |
| "ce_loss_7": 3.666444170475006, |
| "epoch": 0.301, |
| "grad_norm": 458.0, |
| "kl_loss_10": 104.67718696594238, |
| "kl_loss_2": 1290.2385009765626, |
| "kl_loss_3": 895.6051879882813, |
| "kl_loss_7": 230.77984161376952, |
| "learning_rate": 0.0008015407152327448, |
| "loss": 624.6472, |
| "step": 3010 |
| }, |
| { |
| "ce_loss_10": 3.655743646621704, |
| "ce_loss_13": 3.589059603214264, |
| "ce_loss_2": 4.197956717014312, |
| "ce_loss_3": 4.010993158817291, |
| "ce_loss_7": 3.715561032295227, |
| "epoch": 0.302, |
| "grad_norm": 490.0, |
| "kl_loss_10": 108.86725807189941, |
| "kl_loss_2": 1260.063540649414, |
| "kl_loss_3": 888.4078857421875, |
| "kl_loss_7": 225.77315521240234, |
| "learning_rate": 0.0008002735503850016, |
| "loss": 621.0348, |
| "step": 3020 |
| }, |
| { |
| "ce_loss_10": 3.5483126521110533, |
| "ce_loss_13": 3.477071487903595, |
| "ce_loss_2": 4.1067805051803585, |
| "ce_loss_3": 3.9177415490150453, |
| "ce_loss_7": 3.613772678375244, |
| "epoch": 0.303, |
| "grad_norm": 442.0, |
| "kl_loss_10": 113.85512008666993, |
| "kl_loss_2": 1301.403955078125, |
| "kl_loss_3": 923.0243713378907, |
| "kl_loss_7": 243.21601486206055, |
| "learning_rate": 0.0007990033617859396, |
| "loss": 643.6124, |
| "step": 3030 |
| }, |
| { |
| "ce_loss_10": 3.596233379840851, |
| "ce_loss_13": 3.527192997932434, |
| "ce_loss_2": 4.134040641784668, |
| "ce_loss_3": 3.9505585551261904, |
| "ce_loss_7": 3.6559112668037415, |
| "epoch": 0.304, |
| "grad_norm": 576.0, |
| "kl_loss_10": 111.53803482055665, |
| "kl_loss_2": 1246.1058349609375, |
| "kl_loss_3": 878.1594299316406, |
| "kl_loss_7": 229.73634643554686, |
| "learning_rate": 0.000797730162226344, |
| "loss": 607.6155, |
| "step": 3040 |
| }, |
| { |
| "ce_loss_10": 3.6262240767478944, |
| "ce_loss_13": 3.55741890668869, |
| "ce_loss_2": 4.167547011375428, |
| "ce_loss_3": 3.981596386432648, |
| "ce_loss_7": 3.686588776111603, |
| "epoch": 0.305, |
| "grad_norm": 430.0, |
| "kl_loss_10": 113.34700317382813, |
| "kl_loss_2": 1258.705908203125, |
| "kl_loss_3": 888.1617828369141, |
| "kl_loss_7": 230.86616134643555, |
| "learning_rate": 0.0007964539645273203, |
| "loss": 613.4882, |
| "step": 3050 |
| }, |
| { |
| "ce_loss_10": 3.6409424901008607, |
| "ce_loss_13": 3.574270474910736, |
| "ce_loss_2": 4.167909657955169, |
| "ce_loss_3": 3.9862082481384276, |
| "ce_loss_7": 3.6940474629402162, |
| "epoch": 0.306, |
| "grad_norm": 486.0, |
| "kl_loss_10": 106.54784317016602, |
| "kl_loss_2": 1238.411444091797, |
| "kl_loss_3": 866.3065307617187, |
| "kl_loss_7": 220.125154876709, |
| "learning_rate": 0.000795174781540165, |
| "loss": 615.3713, |
| "step": 3060 |
| }, |
| { |
| "ce_loss_10": 3.721142077445984, |
| "ce_loss_13": 3.643355393409729, |
| "ce_loss_2": 4.226944315433502, |
| "ce_loss_3": 4.049481880664826, |
| "ce_loss_7": 3.771383452415466, |
| "epoch": 0.307, |
| "grad_norm": 418.0, |
| "kl_loss_10": 122.25658149719239, |
| "kl_loss_2": 1203.3186645507812, |
| "kl_loss_3": 851.1368927001953, |
| "kl_loss_7": 225.69219970703125, |
| "learning_rate": 0.0007938926261462366, |
| "loss": 615.3413, |
| "step": 3070 |
| }, |
| { |
| "ce_loss_10": 3.6604058384895324, |
| "ce_loss_13": 3.5915605425834656, |
| "ce_loss_2": 4.175356435775757, |
| "ce_loss_3": 3.9981507778167726, |
| "ce_loss_7": 3.7177716493606567, |
| "epoch": 0.308, |
| "grad_norm": 528.0, |
| "kl_loss_10": 111.06630897521973, |
| "kl_loss_2": 1238.821875, |
| "kl_loss_3": 876.2146820068359, |
| "kl_loss_7": 223.17951583862305, |
| "learning_rate": 0.0007926075112568258, |
| "loss": 625.9794, |
| "step": 3080 |
| }, |
| { |
| "ce_loss_10": 3.652525985240936, |
| "ce_loss_13": 3.586830127239227, |
| "ce_loss_2": 4.18239061832428, |
| "ce_loss_3": 4.001185369491577, |
| "ce_loss_7": 3.7117084741592405, |
| "epoch": 0.309, |
| "grad_norm": 408.0, |
| "kl_loss_10": 105.23004531860352, |
| "kl_loss_2": 1238.4955749511719, |
| "kl_loss_3": 879.8196685791015, |
| "kl_loss_7": 219.05570373535156, |
| "learning_rate": 0.0007913194498130252, |
| "loss": 606.0291, |
| "step": 3090 |
| }, |
| { |
| "ce_loss_10": 3.576726019382477, |
| "ce_loss_13": 3.5116322517395018, |
| "ce_loss_2": 4.1267429232597355, |
| "ce_loss_3": 3.951638638973236, |
| "ce_loss_7": 3.633882737159729, |
| "epoch": 0.31, |
| "grad_norm": 596.0, |
| "kl_loss_10": 104.83585128784179, |
| "kl_loss_2": 1271.4552978515626, |
| "kl_loss_3": 899.7277404785157, |
| "kl_loss_7": 221.13562393188477, |
| "learning_rate": 0.0007900284547855992, |
| "loss": 625.4858, |
| "step": 3100 |
| }, |
| { |
| "ce_loss_10": 3.585216796398163, |
| "ce_loss_13": 3.5201086163520814, |
| "ce_loss_2": 4.1151411652565, |
| "ce_loss_3": 3.9415447235107424, |
| "ce_loss_7": 3.6418359875679016, |
| "epoch": 0.311, |
| "grad_norm": 460.0, |
| "kl_loss_10": 104.5475685119629, |
| "kl_loss_2": 1231.1286071777345, |
| "kl_loss_3": 880.9251007080078, |
| "kl_loss_7": 215.18857421875, |
| "learning_rate": 0.0007887345391748532, |
| "loss": 620.3755, |
| "step": 3110 |
| }, |
| { |
| "ce_loss_10": 3.7296380400657654, |
| "ce_loss_13": 3.6599961280822755, |
| "ce_loss_2": 4.223298215866089, |
| "ce_loss_3": 4.0600717782974245, |
| "ce_loss_7": 3.7785526752471923, |
| "epoch": 0.312, |
| "grad_norm": 434.0, |
| "kl_loss_10": 110.47460975646973, |
| "kl_loss_2": 1200.8911010742188, |
| "kl_loss_3": 857.2419494628906, |
| "kl_loss_7": 215.84228134155273, |
| "learning_rate": 0.0007874377160105036, |
| "loss": 594.074, |
| "step": 3120 |
| }, |
| { |
| "ce_loss_10": 3.647064197063446, |
| "ce_loss_13": 3.5629413604736326, |
| "ce_loss_2": 4.20149587392807, |
| "ce_loss_3": 4.026539087295532, |
| "ce_loss_7": 3.7087369561195374, |
| "epoch": 0.313, |
| "grad_norm": 504.0, |
| "kl_loss_10": 117.26640319824219, |
| "kl_loss_2": 1253.9545349121095, |
| "kl_loss_3": 905.2646728515625, |
| "kl_loss_7": 235.96448516845703, |
| "learning_rate": 0.0007861379983515449, |
| "loss": 636.8147, |
| "step": 3130 |
| }, |
| { |
| "ce_loss_10": 3.7007260084152223, |
| "ce_loss_13": 3.631552994251251, |
| "ce_loss_2": 4.21818333864212, |
| "ce_loss_3": 4.040616655349732, |
| "ce_loss_7": 3.757350814342499, |
| "epoch": 0.314, |
| "grad_norm": 466.0, |
| "kl_loss_10": 112.17713203430176, |
| "kl_loss_2": 1239.388739013672, |
| "kl_loss_3": 879.6389221191406, |
| "kl_loss_7": 227.5748275756836, |
| "learning_rate": 0.0007848353992861195, |
| "loss": 608.3133, |
| "step": 3140 |
| }, |
| { |
| "ce_loss_10": 3.786464810371399, |
| "ce_loss_13": 3.710281562805176, |
| "ce_loss_2": 4.314648783206939, |
| "ce_loss_3": 4.134762763977051, |
| "ce_loss_7": 3.843652272224426, |
| "epoch": 0.315, |
| "grad_norm": 458.0, |
| "kl_loss_10": 124.92040100097657, |
| "kl_loss_2": 1240.0601806640625, |
| "kl_loss_3": 878.5192687988281, |
| "kl_loss_7": 241.41039581298827, |
| "learning_rate": 0.0007835299319313853, |
| "loss": 620.9426, |
| "step": 3150 |
| }, |
| { |
| "ce_loss_10": 3.663742733001709, |
| "ce_loss_13": 3.5886364459991453, |
| "ce_loss_2": 4.173808574676514, |
| "ce_loss_3": 3.994606840610504, |
| "ce_loss_7": 3.7157713413238525, |
| "epoch": 0.316, |
| "grad_norm": 478.0, |
| "kl_loss_10": 119.08418045043945, |
| "kl_loss_2": 1222.8418212890624, |
| "kl_loss_3": 864.8908721923829, |
| "kl_loss_7": 229.07857666015624, |
| "learning_rate": 0.0007822216094333848, |
| "loss": 627.9899, |
| "step": 3160 |
| }, |
| { |
| "ce_loss_10": 3.660254752635956, |
| "ce_loss_13": 3.592539119720459, |
| "ce_loss_2": 4.196765351295471, |
| "ce_loss_3": 4.01435557603836, |
| "ce_loss_7": 3.7205393433570864, |
| "epoch": 0.317, |
| "grad_norm": 402.0, |
| "kl_loss_10": 115.94363555908203, |
| "kl_loss_2": 1238.9798767089844, |
| "kl_loss_3": 878.1770599365234, |
| "kl_loss_7": 235.79936828613282, |
| "learning_rate": 0.0007809104449669101, |
| "loss": 611.1889, |
| "step": 3170 |
| }, |
| { |
| "ce_loss_10": 3.625234532356262, |
| "ce_loss_13": 3.5487714052200316, |
| "ce_loss_2": 4.128973770141601, |
| "ce_loss_3": 3.9529131054878235, |
| "ce_loss_7": 3.6731096148490905, |
| "epoch": 0.318, |
| "grad_norm": 524.0, |
| "kl_loss_10": 118.72456016540528, |
| "kl_loss_2": 1219.5467956542968, |
| "kl_loss_3": 854.3440185546875, |
| "kl_loss_7": 228.71927642822266, |
| "learning_rate": 0.0007795964517353734, |
| "loss": 608.5358, |
| "step": 3180 |
| }, |
| { |
| "ce_loss_10": 3.623099219799042, |
| "ce_loss_13": 3.54120157957077, |
| "ce_loss_2": 4.132599997520447, |
| "ce_loss_3": 3.9547854542732237, |
| "ce_loss_7": 3.670779359340668, |
| "epoch": 0.319, |
| "grad_norm": 438.0, |
| "kl_loss_10": 145.3066722869873, |
| "kl_loss_2": 1253.242724609375, |
| "kl_loss_3": 888.5025939941406, |
| "kl_loss_7": 249.7946647644043, |
| "learning_rate": 0.000778279642970672, |
| "loss": 614.9188, |
| "step": 3190 |
| }, |
| { |
| "ce_loss_10": 3.61579008102417, |
| "ce_loss_13": 3.5464967608451845, |
| "ce_loss_2": 4.135542809963226, |
| "ce_loss_3": 3.949288582801819, |
| "ce_loss_7": 3.6743045926094053, |
| "epoch": 0.32, |
| "grad_norm": 580.0, |
| "kl_loss_10": 120.59939308166504, |
| "kl_loss_2": 1232.2691650390625, |
| "kl_loss_3": 859.8254913330078, |
| "kl_loss_7": 236.18389816284179, |
| "learning_rate": 0.0007769600319330552, |
| "loss": 603.041, |
| "step": 3200 |
| }, |
| { |
| "ce_loss_10": 3.6462074518203735, |
| "ce_loss_13": 3.5768035650253296, |
| "ce_loss_2": 4.193181753158569, |
| "ce_loss_3": 4.002738869190216, |
| "ce_loss_7": 3.7033765077590943, |
| "epoch": 0.321, |
| "grad_norm": 536.0, |
| "kl_loss_10": 113.30461883544922, |
| "kl_loss_2": 1261.3274169921874, |
| "kl_loss_3": 880.6385803222656, |
| "kl_loss_7": 233.63602294921876, |
| "learning_rate": 0.0007756376319109917, |
| "loss": 615.0137, |
| "step": 3210 |
| }, |
| { |
| "ce_loss_10": 3.6983227729797363, |
| "ce_loss_13": 3.628855359554291, |
| "ce_loss_2": 4.215565764904023, |
| "ce_loss_3": 4.036277508735656, |
| "ce_loss_7": 3.7591845273971556, |
| "epoch": 0.322, |
| "grad_norm": 414.0, |
| "kl_loss_10": 113.85302391052247, |
| "kl_loss_2": 1215.7693420410155, |
| "kl_loss_3": 852.6664520263672, |
| "kl_loss_7": 233.59415435791016, |
| "learning_rate": 0.0007743124562210351, |
| "loss": 595.5453, |
| "step": 3220 |
| }, |
| { |
| "ce_loss_10": 3.7038231015205385, |
| "ce_loss_13": 3.636870324611664, |
| "ce_loss_2": 4.220840120315552, |
| "ce_loss_3": 4.040867578983307, |
| "ce_loss_7": 3.759516727924347, |
| "epoch": 0.323, |
| "grad_norm": 500.0, |
| "kl_loss_10": 116.7942398071289, |
| "kl_loss_2": 1231.380029296875, |
| "kl_loss_3": 860.8811431884766, |
| "kl_loss_7": 226.99792938232423, |
| "learning_rate": 0.0007729845182076895, |
| "loss": 609.7637, |
| "step": 3230 |
| }, |
| { |
| "ce_loss_10": 3.635891842842102, |
| "ce_loss_13": 3.570459449291229, |
| "ce_loss_2": 4.146735298633575, |
| "ce_loss_3": 3.971414268016815, |
| "ce_loss_7": 3.6916919469833376, |
| "epoch": 0.324, |
| "grad_norm": 544.0, |
| "kl_loss_10": 107.84456443786621, |
| "kl_loss_2": 1210.0861877441407, |
| "kl_loss_3": 854.6661926269531, |
| "kl_loss_7": 223.18558731079102, |
| "learning_rate": 0.0007716538312432765, |
| "loss": 613.749, |
| "step": 3240 |
| }, |
| { |
| "ce_loss_10": 3.5933796405792235, |
| "ce_loss_13": 3.5238136887550353, |
| "ce_loss_2": 4.137728452682495, |
| "ce_loss_3": 3.9503737330436706, |
| "ce_loss_7": 3.6502291560173035, |
| "epoch": 0.325, |
| "grad_norm": 532.0, |
| "kl_loss_10": 110.89730453491211, |
| "kl_loss_2": 1272.4953063964845, |
| "kl_loss_3": 899.5693664550781, |
| "kl_loss_7": 234.18169174194335, |
| "learning_rate": 0.0007703204087277988, |
| "loss": 621.0721, |
| "step": 3250 |
| }, |
| { |
| "ce_loss_10": 3.691065728664398, |
| "ce_loss_13": 3.625254142284393, |
| "ce_loss_2": 4.195106828212738, |
| "ce_loss_3": 4.023638522624969, |
| "ce_loss_7": 3.744424653053284, |
| "epoch": 0.326, |
| "grad_norm": 480.0, |
| "kl_loss_10": 108.84702529907227, |
| "kl_loss_2": 1187.3806762695312, |
| "kl_loss_3": 834.2469482421875, |
| "kl_loss_7": 219.25809020996093, |
| "learning_rate": 0.0007689842640888063, |
| "loss": 594.9809, |
| "step": 3260 |
| }, |
| { |
| "ce_loss_10": 3.6937523603439333, |
| "ce_loss_13": 3.6257047772407534, |
| "ce_loss_2": 4.207961022853851, |
| "ce_loss_3": 4.029592931270599, |
| "ce_loss_7": 3.7506144404411317, |
| "epoch": 0.327, |
| "grad_norm": 432.0, |
| "kl_loss_10": 109.73489418029786, |
| "kl_loss_2": 1197.2553649902343, |
| "kl_loss_3": 845.9240936279297, |
| "kl_loss_7": 224.3518325805664, |
| "learning_rate": 0.0007676454107812607, |
| "loss": 600.9104, |
| "step": 3270 |
| }, |
| { |
| "ce_loss_10": 3.6202093243598936, |
| "ce_loss_13": 3.556860589981079, |
| "ce_loss_2": 4.152219152450561, |
| "ce_loss_3": 3.972454571723938, |
| "ce_loss_7": 3.6772433161735534, |
| "epoch": 0.328, |
| "grad_norm": 552.0, |
| "kl_loss_10": 107.7342628479004, |
| "kl_loss_2": 1234.4693603515625, |
| "kl_loss_3": 866.5982177734375, |
| "kl_loss_7": 224.09054641723634, |
| "learning_rate": 0.0007663038622873999, |
| "loss": 600.4109, |
| "step": 3280 |
| }, |
| { |
| "ce_loss_10": 3.6624753713607787, |
| "ce_loss_13": 3.5959082007408143, |
| "ce_loss_2": 4.186812722682953, |
| "ce_loss_3": 4.007313239574432, |
| "ce_loss_7": 3.7183284163475037, |
| "epoch": 0.329, |
| "grad_norm": 416.0, |
| "kl_loss_10": 107.99775848388671, |
| "kl_loss_2": 1235.7617919921875, |
| "kl_loss_3": 865.4350341796875, |
| "kl_loss_7": 219.93520736694336, |
| "learning_rate": 0.0007649596321166025, |
| "loss": 596.3813, |
| "step": 3290 |
| }, |
| { |
| "ce_loss_10": 3.5629011154174806, |
| "ce_loss_13": 3.500445473194122, |
| "ce_loss_2": 4.090505909919739, |
| "ce_loss_3": 3.9089764833450316, |
| "ce_loss_7": 3.619310712814331, |
| "epoch": 0.33, |
| "grad_norm": 448.0, |
| "kl_loss_10": 101.5875473022461, |
| "kl_loss_2": 1220.246160888672, |
| "kl_loss_3": 856.5614715576172, |
| "kl_loss_7": 215.10712509155275, |
| "learning_rate": 0.0007636127338052513, |
| "loss": 603.8148, |
| "step": 3300 |
| }, |
| { |
| "ce_loss_10": 3.670552396774292, |
| "ce_loss_13": 3.6016101121902464, |
| "ce_loss_2": 4.213171231746673, |
| "ce_loss_3": 4.018688130378723, |
| "ce_loss_7": 3.727590525150299, |
| "epoch": 0.331, |
| "grad_norm": 374.0, |
| "kl_loss_10": 108.33710594177246, |
| "kl_loss_2": 1257.856024169922, |
| "kl_loss_3": 874.112905883789, |
| "kl_loss_7": 224.637939453125, |
| "learning_rate": 0.0007622631809165971, |
| "loss": 604.7203, |
| "step": 3310 |
| }, |
| { |
| "ce_loss_10": 3.671126115322113, |
| "ce_loss_13": 3.6092859148979186, |
| "ce_loss_2": 4.177222061157226, |
| "ce_loss_3": 3.9993849992752075, |
| "ce_loss_7": 3.722568082809448, |
| "epoch": 0.332, |
| "grad_norm": 414.0, |
| "kl_loss_10": 101.74094352722167, |
| "kl_loss_2": 1180.6327026367187, |
| "kl_loss_3": 821.7367553710938, |
| "kl_loss_7": 208.3566993713379, |
| "learning_rate": 0.000760910987040623, |
| "loss": 588.4586, |
| "step": 3320 |
| }, |
| { |
| "ce_loss_10": 3.64985990524292, |
| "ce_loss_13": 3.585498571395874, |
| "ce_loss_2": 4.191103303432465, |
| "ce_loss_3": 4.004770576953888, |
| "ce_loss_7": 3.7059614300727843, |
| "epoch": 0.333, |
| "grad_norm": 346.0, |
| "kl_loss_10": 102.83302307128906, |
| "kl_loss_2": 1259.7546875, |
| "kl_loss_3": 881.3513031005859, |
| "kl_loss_7": 217.63404388427733, |
| "learning_rate": 0.000759556165793906, |
| "loss": 599.8207, |
| "step": 3330 |
| }, |
| { |
| "ce_loss_10": 3.676869213581085, |
| "ce_loss_13": 3.610471022129059, |
| "ce_loss_2": 4.2084539294242855, |
| "ce_loss_3": 4.019213974475861, |
| "ce_loss_7": 3.7275768160820006, |
| "epoch": 0.334, |
| "grad_norm": 502.0, |
| "kl_loss_10": 104.88800392150878, |
| "kl_loss_2": 1223.232958984375, |
| "kl_loss_3": 852.1926971435547, |
| "kl_loss_7": 215.24551544189453, |
| "learning_rate": 0.000758198730819481, |
| "loss": 604.6092, |
| "step": 3340 |
| }, |
| { |
| "ce_loss_10": 3.616540086269379, |
| "ce_loss_13": 3.553786301612854, |
| "ce_loss_2": 4.152189195156097, |
| "ce_loss_3": 3.9668321132659914, |
| "ce_loss_7": 3.6709399580955506, |
| "epoch": 0.335, |
| "grad_norm": 488.0, |
| "kl_loss_10": 102.31456336975097, |
| "kl_loss_2": 1251.2591918945313, |
| "kl_loss_3": 875.474462890625, |
| "kl_loss_7": 214.77994079589843, |
| "learning_rate": 0.0007568386957867032, |
| "loss": 608.125, |
| "step": 3350 |
| }, |
| { |
| "ce_loss_10": 3.695429575443268, |
| "ce_loss_13": 3.6276296377182007, |
| "ce_loss_2": 4.209121763706207, |
| "ce_loss_3": 4.032123720645904, |
| "ce_loss_7": 3.749704658985138, |
| "epoch": 0.336, |
| "grad_norm": 664.0, |
| "kl_loss_10": 107.0846736907959, |
| "kl_loss_2": 1209.7884765625, |
| "kl_loss_3": 853.7374877929688, |
| "kl_loss_7": 220.54676055908203, |
| "learning_rate": 0.0007554760743911103, |
| "loss": 605.0996, |
| "step": 3360 |
| }, |
| { |
| "ce_loss_10": 3.5890319466590883, |
| "ce_loss_13": 3.5283274173736574, |
| "ce_loss_2": 4.114323127269745, |
| "ce_loss_3": 3.932225775718689, |
| "ce_loss_7": 3.644662916660309, |
| "epoch": 0.337, |
| "grad_norm": 398.0, |
| "kl_loss_10": 101.10566368103028, |
| "kl_loss_2": 1236.1671508789063, |
| "kl_loss_3": 865.7673828125, |
| "kl_loss_7": 212.85166015625, |
| "learning_rate": 0.0007541108803542846, |
| "loss": 613.867, |
| "step": 3370 |
| }, |
| { |
| "ce_loss_10": 3.6427289605140687, |
| "ce_loss_13": 3.576077425479889, |
| "ce_loss_2": 4.166507577896118, |
| "ce_loss_3": 3.9814778923988343, |
| "ce_loss_7": 3.6960788011550902, |
| "epoch": 0.338, |
| "grad_norm": 420.0, |
| "kl_loss_10": 106.68134155273438, |
| "kl_loss_2": 1229.0040222167968, |
| "kl_loss_3": 856.9913909912109, |
| "kl_loss_7": 213.85500411987306, |
| "learning_rate": 0.0007527431274237149, |
| "loss": 624.6923, |
| "step": 3380 |
| }, |
| { |
| "ce_loss_10": 3.611558997631073, |
| "ce_loss_13": 3.549490749835968, |
| "ce_loss_2": 4.114035534858703, |
| "ce_loss_3": 3.942946660518646, |
| "ce_loss_7": 3.662776732444763, |
| "epoch": 0.339, |
| "grad_norm": 406.0, |
| "kl_loss_10": 102.27137718200683, |
| "kl_loss_2": 1206.6684020996095, |
| "kl_loss_3": 846.7297576904297, |
| "kl_loss_7": 210.38721313476563, |
| "learning_rate": 0.0007513728293726579, |
| "loss": 594.8909, |
| "step": 3390 |
| }, |
| { |
| "ce_loss_10": 3.737028419971466, |
| "ce_loss_13": 3.669820773601532, |
| "ce_loss_2": 4.24596471786499, |
| "ce_loss_3": 4.065989923477173, |
| "ce_loss_7": 3.7901018500328063, |
| "epoch": 0.34, |
| "grad_norm": 456.0, |
| "kl_loss_10": 106.7515941619873, |
| "kl_loss_2": 1213.6457214355469, |
| "kl_loss_3": 848.0824188232422, |
| "kl_loss_7": 217.41063537597657, |
| "learning_rate": 0.00075, |
| "loss": 593.8513, |
| "step": 3400 |
| }, |
| { |
| "ce_loss_10": 3.719330894947052, |
| "ce_loss_13": 3.6538206934928894, |
| "ce_loss_2": 4.25202556848526, |
| "ce_loss_3": 4.069514441490173, |
| "ce_loss_7": 3.7754390835762024, |
| "epoch": 0.341, |
| "grad_norm": 442.0, |
| "kl_loss_10": 105.26911506652831, |
| "kl_loss_2": 1229.2578063964843, |
| "kl_loss_3": 857.8241027832031, |
| "kl_loss_7": 215.74853057861327, |
| "learning_rate": 0.0007486246531301177, |
| "loss": 595.3941, |
| "step": 3410 |
| }, |
| { |
| "ce_loss_10": 3.5200854897499085, |
| "ce_loss_13": 3.457200789451599, |
| "ce_loss_2": 4.057665538787842, |
| "ce_loss_3": 3.8753583312034605, |
| "ce_loss_7": 3.575985038280487, |
| "epoch": 0.342, |
| "grad_norm": 388.0, |
| "kl_loss_10": 101.49059600830078, |
| "kl_loss_2": 1229.5487548828125, |
| "kl_loss_3": 867.5537567138672, |
| "kl_loss_7": 212.1739074707031, |
| "learning_rate": 0.0007472468026127384, |
| "loss": 593.475, |
| "step": 3420 |
| }, |
| { |
| "ce_loss_10": 3.6591346502304076, |
| "ce_loss_13": 3.5927812099456786, |
| "ce_loss_2": 4.209147357940674, |
| "ce_loss_3": 4.019513976573944, |
| "ce_loss_7": 3.7172008395195006, |
| "epoch": 0.343, |
| "grad_norm": 442.0, |
| "kl_loss_10": 106.34202499389649, |
| "kl_loss_2": 1270.0667724609375, |
| "kl_loss_3": 890.6144561767578, |
| "kl_loss_7": 221.5020393371582, |
| "learning_rate": 0.000745866462322802, |
| "loss": 614.0497, |
| "step": 3430 |
| }, |
| { |
| "ce_loss_10": 3.647415816783905, |
| "ce_loss_13": 3.5850081205368043, |
| "ce_loss_2": 4.1631152629852295, |
| "ce_loss_3": 3.980070149898529, |
| "ce_loss_7": 3.7022210240364073, |
| "epoch": 0.344, |
| "grad_norm": 428.0, |
| "kl_loss_10": 103.86195526123046, |
| "kl_loss_2": 1198.3542846679688, |
| "kl_loss_3": 835.6711212158203, |
| "kl_loss_7": 208.45360870361327, |
| "learning_rate": 0.0007444836461603195, |
| "loss": 592.3941, |
| "step": 3440 |
| }, |
| { |
| "ce_loss_10": 3.7135616302490235, |
| "ce_loss_13": 3.6434731125831603, |
| "ce_loss_2": 4.233828973770142, |
| "ce_loss_3": 4.05616340637207, |
| "ce_loss_7": 3.762986993789673, |
| "epoch": 0.345, |
| "grad_norm": 548.0, |
| "kl_loss_10": 110.37765045166016, |
| "kl_loss_2": 1249.6877746582031, |
| "kl_loss_3": 880.3564361572265, |
| "kl_loss_7": 216.23881912231445, |
| "learning_rate": 0.0007430983680502344, |
| "loss": 610.9966, |
| "step": 3450 |
| }, |
| { |
| "ce_loss_10": 3.5541942715644836, |
| "ce_loss_13": 3.4891390204429626, |
| "ce_loss_2": 4.090934145450592, |
| "ce_loss_3": 3.908629584312439, |
| "ce_loss_7": 3.606754219532013, |
| "epoch": 0.346, |
| "grad_norm": 432.0, |
| "kl_loss_10": 110.62757797241211, |
| "kl_loss_2": 1245.3806091308593, |
| "kl_loss_3": 869.5422088623047, |
| "kl_loss_7": 211.6188102722168, |
| "learning_rate": 0.0007417106419422819, |
| "loss": 606.0509, |
| "step": 3460 |
| }, |
| { |
| "ce_loss_10": 3.6656701445579527, |
| "ce_loss_13": 3.596804141998291, |
| "ce_loss_2": 4.186310410499573, |
| "ce_loss_3": 4.003709590435028, |
| "ce_loss_7": 3.716957890987396, |
| "epoch": 0.347, |
| "grad_norm": 432.0, |
| "kl_loss_10": 110.30144805908203, |
| "kl_loss_2": 1208.0226745605469, |
| "kl_loss_3": 843.9369232177735, |
| "kl_loss_7": 210.9572967529297, |
| "learning_rate": 0.0007403204818108486, |
| "loss": 597.1857, |
| "step": 3470 |
| }, |
| { |
| "ce_loss_10": 3.6337965607643126, |
| "ce_loss_13": 3.5606253027915953, |
| "ce_loss_2": 4.153940236568451, |
| "ce_loss_3": 3.970261514186859, |
| "ce_loss_7": 3.680176484584808, |
| "epoch": 0.348, |
| "grad_norm": 380.0, |
| "kl_loss_10": 122.88734741210938, |
| "kl_loss_2": 1235.673895263672, |
| "kl_loss_3": 863.5119903564453, |
| "kl_loss_7": 214.55614318847657, |
| "learning_rate": 0.0007389279016548316, |
| "loss": 589.7067, |
| "step": 3480 |
| }, |
| { |
| "ce_loss_10": 3.6385215759277343, |
| "ce_loss_13": 3.5720754146575926, |
| "ce_loss_2": 4.187943410873413, |
| "ce_loss_3": 3.9984039187431337, |
| "ce_loss_7": 3.692442834377289, |
| "epoch": 0.349, |
| "grad_norm": 540.0, |
| "kl_loss_10": 110.95368614196778, |
| "kl_loss_2": 1266.4402160644531, |
| "kl_loss_3": 881.5294525146485, |
| "kl_loss_7": 217.94278945922852, |
| "learning_rate": 0.0007375329154974975, |
| "loss": 613.9418, |
| "step": 3490 |
| }, |
| { |
| "ce_loss_10": 3.5970895290374756, |
| "ce_loss_13": 3.5335337281227113, |
| "ce_loss_2": 4.117660129070282, |
| "ce_loss_3": 3.938844621181488, |
| "ce_loss_7": 3.6496007084846496, |
| "epoch": 0.35, |
| "grad_norm": 364.0, |
| "kl_loss_10": 106.09449501037598, |
| "kl_loss_2": 1217.6699768066405, |
| "kl_loss_3": 855.84267578125, |
| "kl_loss_7": 211.2824508666992, |
| "learning_rate": 0.0007361355373863414, |
| "loss": 604.2842, |
| "step": 3500 |
| }, |
| { |
| "ce_loss_10": 3.6508504867553713, |
| "ce_loss_13": 3.5859110236167906, |
| "ce_loss_2": 4.1644844770431515, |
| "ce_loss_3": 3.989104926586151, |
| "ce_loss_7": 3.7059740304946898, |
| "epoch": 0.351, |
| "grad_norm": 420.0, |
| "kl_loss_10": 105.65600318908692, |
| "kl_loss_2": 1192.6789306640626, |
| "kl_loss_3": 837.2236511230469, |
| "kl_loss_7": 210.62101364135742, |
| "learning_rate": 0.0007347357813929454, |
| "loss": 605.2478, |
| "step": 3510 |
| }, |
| { |
| "ce_loss_10": 3.5983325362205507, |
| "ce_loss_13": 3.5318838000297545, |
| "ce_loss_2": 4.108148908615112, |
| "ce_loss_3": 3.935304307937622, |
| "ce_loss_7": 3.6479654192924498, |
| "epoch": 0.352, |
| "grad_norm": 500.0, |
| "kl_loss_10": 106.45629920959473, |
| "kl_loss_2": 1190.6948181152343, |
| "kl_loss_3": 837.8225341796875, |
| "kl_loss_7": 210.1330581665039, |
| "learning_rate": 0.0007333336616128369, |
| "loss": 599.2653, |
| "step": 3520 |
| }, |
| { |
| "ce_loss_10": 3.570793068408966, |
| "ce_loss_13": 3.507152056694031, |
| "ce_loss_2": 4.106606543064117, |
| "ce_loss_3": 3.9213356494903566, |
| "ce_loss_7": 3.624741232395172, |
| "epoch": 0.353, |
| "grad_norm": 468.0, |
| "kl_loss_10": 102.9274845123291, |
| "kl_loss_2": 1231.522442626953, |
| "kl_loss_3": 864.720751953125, |
| "kl_loss_7": 214.17628860473633, |
| "learning_rate": 0.0007319291921653463, |
| "loss": 605.1452, |
| "step": 3530 |
| }, |
| { |
| "ce_loss_10": 3.6573350191116334, |
| "ce_loss_13": 3.591005003452301, |
| "ce_loss_2": 4.190282225608826, |
| "ce_loss_3": 4.010705304145813, |
| "ce_loss_7": 3.713829779624939, |
| "epoch": 0.354, |
| "grad_norm": 480.0, |
| "kl_loss_10": 105.38732643127442, |
| "kl_loss_2": 1246.1359802246093, |
| "kl_loss_3": 875.5277282714844, |
| "kl_loss_7": 217.63313064575195, |
| "learning_rate": 0.0007305223871934656, |
| "loss": 597.4614, |
| "step": 3540 |
| }, |
| { |
| "ce_loss_10": 3.6225136160850524, |
| "ce_loss_13": 3.556077516078949, |
| "ce_loss_2": 4.138617634773254, |
| "ce_loss_3": 3.9633963227272035, |
| "ce_loss_7": 3.678558957576752, |
| "epoch": 0.355, |
| "grad_norm": 502.0, |
| "kl_loss_10": 104.04609298706055, |
| "kl_loss_2": 1205.1107055664063, |
| "kl_loss_3": 845.5688415527344, |
| "kl_loss_7": 210.7905143737793, |
| "learning_rate": 0.0007291132608637052, |
| "loss": 595.3202, |
| "step": 3550 |
| }, |
| { |
| "ce_loss_10": 3.585705029964447, |
| "ce_loss_13": 3.52364000082016, |
| "ce_loss_2": 4.140194058418274, |
| "ce_loss_3": 3.939319980144501, |
| "ce_loss_7": 3.637845540046692, |
| "epoch": 0.356, |
| "grad_norm": 612.0, |
| "kl_loss_10": 100.68717575073242, |
| "kl_loss_2": 1272.5315246582031, |
| "kl_loss_3": 866.628369140625, |
| "kl_loss_7": 206.60951766967773, |
| "learning_rate": 0.0007277018273659516, |
| "loss": 612.2947, |
| "step": 3560 |
| }, |
| { |
| "ce_loss_10": 3.708829402923584, |
| "ce_loss_13": 3.6439966320991517, |
| "ce_loss_2": 4.2357800006866455, |
| "ce_loss_3": 4.058422148227692, |
| "ce_loss_7": 3.7655990600585936, |
| "epoch": 0.357, |
| "grad_norm": 400.0, |
| "kl_loss_10": 105.25033149719238, |
| "kl_loss_2": 1234.6828186035157, |
| "kl_loss_3": 864.7261169433593, |
| "kl_loss_7": 215.20211639404297, |
| "learning_rate": 0.0007262881009133242, |
| "loss": 605.0631, |
| "step": 3570 |
| }, |
| { |
| "ce_loss_10": 3.6265846729278564, |
| "ce_loss_13": 3.5641749501228333, |
| "ce_loss_2": 4.144611585140228, |
| "ce_loss_3": 3.9691020011901856, |
| "ce_loss_7": 3.6797274351119995, |
| "epoch": 0.358, |
| "grad_norm": 422.0, |
| "kl_loss_10": 101.45686912536621, |
| "kl_loss_2": 1216.0844970703124, |
| "kl_loss_3": 849.7874267578125, |
| "kl_loss_7": 208.09806137084962, |
| "learning_rate": 0.0007248720957420329, |
| "loss": 589.5256, |
| "step": 3580 |
| }, |
| { |
| "ce_loss_10": 3.6416075587272645, |
| "ce_loss_13": 3.5768683552742004, |
| "ce_loss_2": 4.156981098651886, |
| "ce_loss_3": 3.9762784600257874, |
| "ce_loss_7": 3.690297317504883, |
| "epoch": 0.359, |
| "grad_norm": 374.0, |
| "kl_loss_10": 104.18233222961426, |
| "kl_loss_2": 1196.5406433105468, |
| "kl_loss_3": 831.4658630371093, |
| "kl_loss_7": 209.4309959411621, |
| "learning_rate": 0.0007234538261112341, |
| "loss": 608.9998, |
| "step": 3590 |
| }, |
| { |
| "ce_loss_10": 3.6725340247154237, |
| "ce_loss_13": 3.6092687249183655, |
| "ce_loss_2": 4.202276730537415, |
| "ce_loss_3": 4.014237463474274, |
| "ce_loss_7": 3.7282424688339235, |
| "epoch": 0.36, |
| "grad_norm": 400.0, |
| "kl_loss_10": 101.90313911437988, |
| "kl_loss_2": 1228.7942749023437, |
| "kl_loss_3": 851.1504791259765, |
| "kl_loss_7": 214.15290603637695, |
| "learning_rate": 0.0007220333063028871, |
| "loss": 593.6457, |
| "step": 3600 |
| }, |
| { |
| "ce_loss_10": 3.7029056310653687, |
| "ce_loss_13": 3.6388812899589538, |
| "ce_loss_2": 4.263094091415406, |
| "ce_loss_3": 4.055423867702484, |
| "ce_loss_7": 3.7583480000495912, |
| "epoch": 0.361, |
| "grad_norm": 406.0, |
| "kl_loss_10": 103.6033935546875, |
| "kl_loss_2": 1316.5648254394532, |
| "kl_loss_3": 896.4495971679687, |
| "kl_loss_7": 217.90971908569335, |
| "learning_rate": 0.0007206105506216106, |
| "loss": 621.4246, |
| "step": 3610 |
| }, |
| { |
| "ce_loss_10": 3.582909846305847, |
| "ce_loss_13": 3.5207375407218935, |
| "ce_loss_2": 4.105194330215454, |
| "ce_loss_3": 3.92072172164917, |
| "ce_loss_7": 3.6367709159851076, |
| "epoch": 0.362, |
| "grad_norm": 488.0, |
| "kl_loss_10": 100.51245307922363, |
| "kl_loss_2": 1208.4382385253907, |
| "kl_loss_3": 842.719369506836, |
| "kl_loss_7": 209.43429107666014, |
| "learning_rate": 0.0007191855733945387, |
| "loss": 586.8207, |
| "step": 3620 |
| }, |
| { |
| "ce_loss_10": 3.6772588729858398, |
| "ce_loss_13": 3.611865592002869, |
| "ce_loss_2": 4.192759323120117, |
| "ce_loss_3": 4.0132176041603085, |
| "ce_loss_7": 3.7312068581581115, |
| "epoch": 0.363, |
| "grad_norm": 482.0, |
| "kl_loss_10": 103.05736274719239, |
| "kl_loss_2": 1206.339794921875, |
| "kl_loss_3": 840.5841491699218, |
| "kl_loss_7": 209.33160095214845, |
| "learning_rate": 0.0007177583889711762, |
| "loss": 590.5756, |
| "step": 3630 |
| }, |
| { |
| "ce_loss_10": 3.5943727612495424, |
| "ce_loss_13": 3.5278201699256897, |
| "ce_loss_2": 4.115126085281372, |
| "ce_loss_3": 3.9359707951545717, |
| "ce_loss_7": 3.64764518737793, |
| "epoch": 0.364, |
| "grad_norm": 474.0, |
| "kl_loss_10": 104.63778533935547, |
| "kl_loss_2": 1232.7115539550782, |
| "kl_loss_3": 867.7350891113281, |
| "kl_loss_7": 215.38798904418945, |
| "learning_rate": 0.0007163290117232541, |
| "loss": 602.1762, |
| "step": 3640 |
| }, |
| { |
| "ce_loss_10": 3.719394052028656, |
| "ce_loss_13": 3.6543713212013245, |
| "ce_loss_2": 4.207157838344574, |
| "ce_loss_3": 4.033388280868531, |
| "ce_loss_7": 3.766360378265381, |
| "epoch": 0.365, |
| "grad_norm": 516.0, |
| "kl_loss_10": 106.55956001281739, |
| "kl_loss_2": 1177.5490844726562, |
| "kl_loss_3": 820.275503540039, |
| "kl_loss_7": 210.7781494140625, |
| "learning_rate": 0.0007148974560445859, |
| "loss": 585.3312, |
| "step": 3650 |
| }, |
| { |
| "ce_loss_10": 3.63283451795578, |
| "ce_loss_13": 3.569260811805725, |
| "ce_loss_2": 4.140059876441955, |
| "ce_loss_3": 3.9612114429473877, |
| "ce_loss_7": 3.68426718711853, |
| "epoch": 0.366, |
| "grad_norm": 446.0, |
| "kl_loss_10": 101.39652633666992, |
| "kl_loss_2": 1181.2005432128906, |
| "kl_loss_3": 826.3975830078125, |
| "kl_loss_7": 208.74162216186522, |
| "learning_rate": 0.0007134637363509209, |
| "loss": 580.396, |
| "step": 3660 |
| }, |
| { |
| "ce_loss_10": 3.740837073326111, |
| "ce_loss_13": 3.676628518104553, |
| "ce_loss_2": 4.238210546970367, |
| "ce_loss_3": 4.064305305480957, |
| "ce_loss_7": 3.7917707443237303, |
| "epoch": 0.367, |
| "grad_norm": 374.0, |
| "kl_loss_10": 102.68134994506836, |
| "kl_loss_2": 1165.9671203613282, |
| "kl_loss_3": 815.8925506591797, |
| "kl_loss_7": 205.73183975219726, |
| "learning_rate": 0.0007120278670798009, |
| "loss": 586.6874, |
| "step": 3670 |
| }, |
| { |
| "ce_loss_10": 3.530075693130493, |
| "ce_loss_13": 3.467638063430786, |
| "ce_loss_2": 4.08873633146286, |
| "ce_loss_3": 3.8983967661857606, |
| "ce_loss_7": 3.590684974193573, |
| "epoch": 0.368, |
| "grad_norm": 504.0, |
| "kl_loss_10": 102.20494270324707, |
| "kl_loss_2": 1276.5897247314454, |
| "kl_loss_3": 894.699105834961, |
| "kl_loss_7": 217.834383392334, |
| "learning_rate": 0.0007105898626904133, |
| "loss": 620.3519, |
| "step": 3680 |
| }, |
| { |
| "ce_loss_10": 3.6397287964820864, |
| "ce_loss_13": 3.576084387302399, |
| "ce_loss_2": 4.165349864959717, |
| "ce_loss_3": 3.9844519972801207, |
| "ce_loss_7": 3.6932525277137755, |
| "epoch": 0.369, |
| "grad_norm": 548.0, |
| "kl_loss_10": 103.31561088562012, |
| "kl_loss_2": 1214.6401062011719, |
| "kl_loss_3": 850.1350677490234, |
| "kl_loss_7": 211.8514373779297, |
| "learning_rate": 0.0007091497376634463, |
| "loss": 587.3888, |
| "step": 3690 |
| }, |
| { |
| "ce_loss_10": 3.580397891998291, |
| "ce_loss_13": 3.518483591079712, |
| "ce_loss_2": 4.098948669433594, |
| "ce_loss_3": 3.9198103308677674, |
| "ce_loss_7": 3.633430314064026, |
| "epoch": 0.37, |
| "grad_norm": 462.0, |
| "kl_loss_10": 102.7860034942627, |
| "kl_loss_2": 1196.8778686523438, |
| "kl_loss_3": 839.7853210449218, |
| "kl_loss_7": 210.37151184082032, |
| "learning_rate": 0.0007077075065009433, |
| "loss": 599.0922, |
| "step": 3700 |
| }, |
| { |
| "ce_loss_10": 3.6922479033470155, |
| "ce_loss_13": 3.6247249126434324, |
| "ce_loss_2": 4.215528225898742, |
| "ce_loss_3": 4.034583401679993, |
| "ce_loss_7": 3.7439934253692626, |
| "epoch": 0.371, |
| "grad_norm": 436.0, |
| "kl_loss_10": 107.0543056488037, |
| "kl_loss_2": 1234.6434143066406, |
| "kl_loss_3": 869.9170135498047, |
| "kl_loss_7": 215.78035430908204, |
| "learning_rate": 0.0007062631837261557, |
| "loss": 601.1125, |
| "step": 3710 |
| }, |
| { |
| "ce_loss_10": 3.558840346336365, |
| "ce_loss_13": 3.4976505637168884, |
| "ce_loss_2": 4.082807242870331, |
| "ce_loss_3": 3.90502552986145, |
| "ce_loss_7": 3.611116898059845, |
| "epoch": 0.372, |
| "grad_norm": 418.0, |
| "kl_loss_10": 102.55169563293457, |
| "kl_loss_2": 1217.97548828125, |
| "kl_loss_3": 855.1094757080078, |
| "kl_loss_7": 209.0750946044922, |
| "learning_rate": 0.0007048167838833977, |
| "loss": 602.8635, |
| "step": 3720 |
| }, |
| { |
| "ce_loss_10": 3.6581831574440002, |
| "ce_loss_13": 3.593174624443054, |
| "ce_loss_2": 4.162305021286011, |
| "ce_loss_3": 3.9847410321235657, |
| "ce_loss_7": 3.7109787225723267, |
| "epoch": 0.373, |
| "grad_norm": 536.0, |
| "kl_loss_10": 103.06450958251953, |
| "kl_loss_2": 1197.146795654297, |
| "kl_loss_3": 834.3573669433594, |
| "kl_loss_7": 209.46187515258788, |
| "learning_rate": 0.0007033683215379002, |
| "loss": 588.3938, |
| "step": 3730 |
| }, |
| { |
| "ce_loss_10": 3.6515901923179626, |
| "ce_loss_13": 3.586732280254364, |
| "ce_loss_2": 4.166427576541901, |
| "ce_loss_3": 3.9861610412597654, |
| "ce_loss_7": 3.703124833106995, |
| "epoch": 0.374, |
| "grad_norm": 384.0, |
| "kl_loss_10": 101.91668891906738, |
| "kl_loss_2": 1196.090036010742, |
| "kl_loss_3": 834.7775848388671, |
| "kl_loss_7": 206.9270217895508, |
| "learning_rate": 0.0007019178112756625, |
| "loss": 596.7028, |
| "step": 3740 |
| }, |
| { |
| "ce_loss_10": 3.5998276591300966, |
| "ce_loss_13": 3.539226603507996, |
| "ce_loss_2": 4.120503497123718, |
| "ce_loss_3": 3.938064229488373, |
| "ce_loss_7": 3.6514668703079223, |
| "epoch": 0.375, |
| "grad_norm": 484.0, |
| "kl_loss_10": 101.7071418762207, |
| "kl_loss_2": 1206.4351013183593, |
| "kl_loss_3": 842.5018493652344, |
| "kl_loss_7": 207.55127868652343, |
| "learning_rate": 0.0007004652677033068, |
| "loss": 596.7216, |
| "step": 3750 |
| }, |
| { |
| "ce_loss_10": 3.6823023438453673, |
| "ce_loss_13": 3.6218234419822695, |
| "ce_loss_2": 4.1750637769699095, |
| "ce_loss_3": 4.004729413986206, |
| "ce_loss_7": 3.732503056526184, |
| "epoch": 0.376, |
| "grad_norm": 388.0, |
| "kl_loss_10": 99.9868221282959, |
| "kl_loss_2": 1168.4398498535156, |
| "kl_loss_3": 816.7180572509766, |
| "kl_loss_7": 201.70328750610352, |
| "learning_rate": 0.0006990107054479312, |
| "loss": 584.5167, |
| "step": 3760 |
| }, |
| { |
| "ce_loss_10": 3.667929840087891, |
| "ce_loss_13": 3.6051357984542847, |
| "ce_loss_2": 4.166206574440002, |
| "ce_loss_3": 3.9985297203063963, |
| "ce_loss_7": 3.719240057468414, |
| "epoch": 0.377, |
| "grad_norm": 496.0, |
| "kl_loss_10": 102.5582088470459, |
| "kl_loss_2": 1182.1695739746094, |
| "kl_loss_3": 832.6118957519532, |
| "kl_loss_7": 206.43120498657225, |
| "learning_rate": 0.000697554139156961, |
| "loss": 586.6759, |
| "step": 3770 |
| }, |
| { |
| "ce_loss_10": 3.648312306404114, |
| "ce_loss_13": 3.5864667892456055, |
| "ce_loss_2": 4.165168154239654, |
| "ce_loss_3": 3.980992519855499, |
| "ce_loss_7": 3.703998303413391, |
| "epoch": 0.378, |
| "grad_norm": 532.0, |
| "kl_loss_10": 102.77268753051757, |
| "kl_loss_2": 1217.0308044433593, |
| "kl_loss_3": 845.4426635742187, |
| "kl_loss_7": 211.65556106567382, |
| "learning_rate": 0.0006960955834980027, |
| "loss": 586.9333, |
| "step": 3780 |
| }, |
| { |
| "ce_loss_10": 3.624769401550293, |
| "ce_loss_13": 3.559871160984039, |
| "ce_loss_2": 4.141481828689575, |
| "ce_loss_3": 3.9655247926712036, |
| "ce_loss_7": 3.681060993671417, |
| "epoch": 0.379, |
| "grad_norm": 402.0, |
| "kl_loss_10": 104.66882057189942, |
| "kl_loss_2": 1194.9725402832032, |
| "kl_loss_3": 840.5210388183593, |
| "kl_loss_7": 214.32746124267578, |
| "learning_rate": 0.0006946350531586958, |
| "loss": 591.0428, |
| "step": 3790 |
| }, |
| { |
| "ce_loss_10": 3.6484233260154726, |
| "ce_loss_13": 3.5856125354766846, |
| "ce_loss_2": 4.168078374862671, |
| "ce_loss_3": 3.984307587146759, |
| "ce_loss_7": 3.7046299457550047, |
| "epoch": 0.38, |
| "grad_norm": 494.0, |
| "kl_loss_10": 102.10320167541504, |
| "kl_loss_2": 1202.4750549316407, |
| "kl_loss_3": 836.4282287597656, |
| "kl_loss_7": 215.46153411865234, |
| "learning_rate": 0.0006931725628465643, |
| "loss": 600.8652, |
| "step": 3800 |
| }, |
| { |
| "ce_loss_10": 3.669872498512268, |
| "ce_loss_13": 3.606708490848541, |
| "ce_loss_2": 4.190654408931732, |
| "ce_loss_3": 4.012761104106903, |
| "ce_loss_7": 3.725092887878418, |
| "epoch": 0.381, |
| "grad_norm": 462.0, |
| "kl_loss_10": 105.94147644042968, |
| "kl_loss_2": 1198.5632446289062, |
| "kl_loss_3": 842.3563995361328, |
| "kl_loss_7": 216.23879013061523, |
| "learning_rate": 0.0006917081272888696, |
| "loss": 594.3836, |
| "step": 3810 |
| }, |
| { |
| "ce_loss_10": 3.5702871322631835, |
| "ce_loss_13": 3.503624665737152, |
| "ce_loss_2": 4.083241939544678, |
| "ce_loss_3": 3.9013825416564942, |
| "ce_loss_7": 3.6281121611595153, |
| "epoch": 0.382, |
| "grad_norm": 430.0, |
| "kl_loss_10": 104.559330368042, |
| "kl_loss_2": 1205.9051391601563, |
| "kl_loss_3": 846.9787689208985, |
| "kl_loss_7": 214.2649803161621, |
| "learning_rate": 0.0006902417612324615, |
| "loss": 588.9565, |
| "step": 3820 |
| }, |
| { |
| "ce_loss_10": 3.705217492580414, |
| "ce_loss_13": 3.6370500326156616, |
| "ce_loss_2": 4.2347581624984745, |
| "ce_loss_3": 4.056124079227447, |
| "ce_loss_7": 3.7589930057525636, |
| "epoch": 0.383, |
| "grad_norm": 418.0, |
| "kl_loss_10": 107.22665023803711, |
| "kl_loss_2": 1242.482080078125, |
| "kl_loss_3": 871.0590393066407, |
| "kl_loss_7": 218.71700134277344, |
| "learning_rate": 0.00068877347944363, |
| "loss": 600.3775, |
| "step": 3830 |
| }, |
| { |
| "ce_loss_10": 3.6945597529411316, |
| "ce_loss_13": 3.6302199006080627, |
| "ce_loss_2": 4.190360188484192, |
| "ce_loss_3": 4.017442071437836, |
| "ce_loss_7": 3.74516099691391, |
| "epoch": 0.384, |
| "grad_norm": 460.0, |
| "kl_loss_10": 105.2132453918457, |
| "kl_loss_2": 1180.0169799804687, |
| "kl_loss_3": 825.1839294433594, |
| "kl_loss_7": 210.17990188598634, |
| "learning_rate": 0.0006873032967079561, |
| "loss": 592.1172, |
| "step": 3840 |
| }, |
| { |
| "ce_loss_10": 3.6860820412635804, |
| "ce_loss_13": 3.622925412654877, |
| "ce_loss_2": 4.173858499526977, |
| "ce_loss_3": 4.0060118436813354, |
| "ce_loss_7": 3.7361050128936766, |
| "epoch": 0.385, |
| "grad_norm": 444.0, |
| "kl_loss_10": 102.31974792480469, |
| "kl_loss_2": 1169.402410888672, |
| "kl_loss_3": 819.6500732421875, |
| "kl_loss_7": 207.8970947265625, |
| "learning_rate": 0.0006858312278301637, |
| "loss": 578.5368, |
| "step": 3850 |
| }, |
| { |
| "ce_loss_10": 3.724821174144745, |
| "ce_loss_13": 3.6599106669425963, |
| "ce_loss_2": 4.216867661476135, |
| "ce_loss_3": 4.043015420436859, |
| "ce_loss_7": 3.7741833090782166, |
| "epoch": 0.386, |
| "grad_norm": 628.0, |
| "kl_loss_10": 105.45792541503906, |
| "kl_loss_2": 1182.8445251464843, |
| "kl_loss_3": 827.4248168945312, |
| "kl_loss_7": 208.66201171875, |
| "learning_rate": 0.0006843572876339704, |
| "loss": 581.9299, |
| "step": 3860 |
| }, |
| { |
| "ce_loss_10": 3.639630389213562, |
| "ce_loss_13": 3.578851103782654, |
| "ce_loss_2": 4.1167685151100155, |
| "ce_loss_3": 3.953296732902527, |
| "ce_loss_7": 3.6866363167762755, |
| "epoch": 0.387, |
| "grad_norm": 402.0, |
| "kl_loss_10": 101.30325736999512, |
| "kl_loss_2": 1144.7853637695312, |
| "kl_loss_3": 802.1904113769531, |
| "kl_loss_7": 201.72076492309571, |
| "learning_rate": 0.0006828814909619373, |
| "loss": 586.7184, |
| "step": 3870 |
| }, |
| { |
| "ce_loss_10": 3.7647191643714906, |
| "ce_loss_13": 3.697379672527313, |
| "ce_loss_2": 4.260519480705261, |
| "ce_loss_3": 4.083593368530273, |
| "ce_loss_7": 3.813764202594757, |
| "epoch": 0.388, |
| "grad_norm": 350.0, |
| "kl_loss_10": 106.36605720520019, |
| "kl_loss_2": 1172.6269104003907, |
| "kl_loss_3": 820.4572174072266, |
| "kl_loss_7": 210.88503875732422, |
| "learning_rate": 0.0006814038526753205, |
| "loss": 576.9886, |
| "step": 3880 |
| }, |
| { |
| "ce_loss_10": 3.6557364583015444, |
| "ce_loss_13": 3.5924967169761657, |
| "ce_loss_2": 4.160025131702423, |
| "ce_loss_3": 3.984356963634491, |
| "ce_loss_7": 3.7067020535469055, |
| "epoch": 0.389, |
| "grad_norm": 330.0, |
| "kl_loss_10": 102.68659782409668, |
| "kl_loss_2": 1186.152001953125, |
| "kl_loss_3": 826.8501800537109, |
| "kl_loss_7": 206.71521759033203, |
| "learning_rate": 0.0006799243876539213, |
| "loss": 580.4666, |
| "step": 3890 |
| }, |
| { |
| "ce_loss_10": 3.5759631991386414, |
| "ce_loss_13": 3.5127877712249758, |
| "ce_loss_2": 4.105723321437836, |
| "ce_loss_3": 3.9167493343353272, |
| "ce_loss_7": 3.6288220643997193, |
| "epoch": 0.39, |
| "grad_norm": 536.0, |
| "kl_loss_10": 103.75163269042969, |
| "kl_loss_2": 1215.1460266113281, |
| "kl_loss_3": 839.8725982666016, |
| "kl_loss_7": 208.5065475463867, |
| "learning_rate": 0.0006784431107959359, |
| "loss": 592.4442, |
| "step": 3900 |
| }, |
| { |
| "ce_loss_10": 3.639443838596344, |
| "ce_loss_13": 3.5752380013465883, |
| "ce_loss_2": 4.170507109165191, |
| "ce_loss_3": 3.9816882967948914, |
| "ce_loss_7": 3.694754195213318, |
| "epoch": 0.391, |
| "grad_norm": 510.0, |
| "kl_loss_10": 103.07575302124023, |
| "kl_loss_2": 1237.5377136230468, |
| "kl_loss_3": 858.0287719726563, |
| "kl_loss_7": 214.26128845214845, |
| "learning_rate": 0.0006769600370178059, |
| "loss": 594.2272, |
| "step": 3910 |
| }, |
| { |
| "ce_loss_10": 3.607291209697723, |
| "ce_loss_13": 3.5426042318344115, |
| "ce_loss_2": 4.134967279434204, |
| "ce_loss_3": 3.9495469093322755, |
| "ce_loss_7": 3.6644778490066527, |
| "epoch": 0.392, |
| "grad_norm": 348.0, |
| "kl_loss_10": 100.81994514465332, |
| "kl_loss_2": 1201.7113891601562, |
| "kl_loss_3": 841.3645660400391, |
| "kl_loss_7": 207.30770874023438, |
| "learning_rate": 0.0006754751812540679, |
| "loss": 578.4809, |
| "step": 3920 |
| }, |
| { |
| "ce_loss_10": 3.6542662262916563, |
| "ce_loss_13": 3.5899597883224486, |
| "ce_loss_2": 4.172767472267151, |
| "ce_loss_3": 3.9909741401672365, |
| "ce_loss_7": 3.706152844429016, |
| "epoch": 0.393, |
| "grad_norm": 440.0, |
| "kl_loss_10": 104.03220100402832, |
| "kl_loss_2": 1209.6233947753906, |
| "kl_loss_3": 843.8147003173829, |
| "kl_loss_7": 210.7646583557129, |
| "learning_rate": 0.0006739885584572025, |
| "loss": 592.3653, |
| "step": 3930 |
| }, |
| { |
| "ce_loss_10": 3.685343015193939, |
| "ce_loss_13": 3.619848680496216, |
| "ce_loss_2": 4.199707639217377, |
| "ce_loss_3": 4.017499768733979, |
| "ce_loss_7": 3.734171211719513, |
| "epoch": 0.394, |
| "grad_norm": 564.0, |
| "kl_loss_10": 107.80731964111328, |
| "kl_loss_2": 1232.0240844726563, |
| "kl_loss_3": 850.9272064208984, |
| "kl_loss_7": 211.88618087768555, |
| "learning_rate": 0.0006725001835974853, |
| "loss": 590.3288, |
| "step": 3940 |
| }, |
| { |
| "ce_loss_10": 3.671092712879181, |
| "ce_loss_13": 3.6061443567276, |
| "ce_loss_2": 4.189756679534912, |
| "ce_loss_3": 4.005955624580383, |
| "ce_loss_7": 3.7217952370643617, |
| "epoch": 0.395, |
| "grad_norm": 472.0, |
| "kl_loss_10": 105.94960823059083, |
| "kl_loss_2": 1209.6172180175781, |
| "kl_loss_3": 848.8837646484375, |
| "kl_loss_7": 211.4744026184082, |
| "learning_rate": 0.0006710100716628344, |
| "loss": 581.9217, |
| "step": 3950 |
| }, |
| { |
| "ce_loss_10": 3.6513510942459106, |
| "ce_loss_13": 3.586063766479492, |
| "ce_loss_2": 4.175520932674408, |
| "ce_loss_3": 3.992800068855286, |
| "ce_loss_7": 3.7037784814834596, |
| "epoch": 0.396, |
| "grad_norm": 556.0, |
| "kl_loss_10": 102.45261993408204, |
| "kl_loss_2": 1202.025439453125, |
| "kl_loss_3": 843.4705932617187, |
| "kl_loss_7": 207.75647506713867, |
| "learning_rate": 0.0006695182376586602, |
| "loss": 594.7452, |
| "step": 3960 |
| }, |
| { |
| "ce_loss_10": 3.6946488857269286, |
| "ce_loss_13": 3.6310433030128477, |
| "ce_loss_2": 4.180384719371796, |
| "ce_loss_3": 4.00883582830429, |
| "ce_loss_7": 3.739116144180298, |
| "epoch": 0.397, |
| "grad_norm": 484.0, |
| "kl_loss_10": 100.45674743652344, |
| "kl_loss_2": 1141.924838256836, |
| "kl_loss_3": 795.2099151611328, |
| "kl_loss_7": 201.57386474609376, |
| "learning_rate": 0.000668024696607715, |
| "loss": 581.8865, |
| "step": 3970 |
| }, |
| { |
| "ce_loss_10": 3.63701788187027, |
| "ce_loss_13": 3.5759130001068113, |
| "ce_loss_2": 4.141798782348633, |
| "ce_loss_3": 3.965423548221588, |
| "ce_loss_7": 3.691797506809235, |
| "epoch": 0.398, |
| "grad_norm": 402.0, |
| "kl_loss_10": 99.83709602355957, |
| "kl_loss_2": 1189.6253723144532, |
| "kl_loss_3": 836.8567596435547, |
| "kl_loss_7": 210.05224533081054, |
| "learning_rate": 0.0006665294635499404, |
| "loss": 585.3059, |
| "step": 3980 |
| }, |
| { |
| "ce_loss_10": 3.645500433444977, |
| "ce_loss_13": 3.5827003002166746, |
| "ce_loss_2": 4.174324834346772, |
| "ce_loss_3": 3.992855429649353, |
| "ce_loss_7": 3.7015270590782166, |
| "epoch": 0.399, |
| "grad_norm": 438.0, |
| "kl_loss_10": 103.66120948791504, |
| "kl_loss_2": 1245.642510986328, |
| "kl_loss_3": 869.6440063476563, |
| "kl_loss_7": 216.26355361938477, |
| "learning_rate": 0.0006650325535423167, |
| "loss": 596.3225, |
| "step": 3990 |
| }, |
| { |
| "ce_loss_10": 3.6747123122215273, |
| "ce_loss_13": 3.6138512253761292, |
| "ce_loss_2": 4.168187916278839, |
| "ce_loss_3": 3.993897998332977, |
| "ce_loss_7": 3.725596582889557, |
| "epoch": 0.4, |
| "grad_norm": 520.0, |
| "kl_loss_10": 96.3211498260498, |
| "kl_loss_2": 1152.9211303710938, |
| "kl_loss_3": 801.8546081542969, |
| "kl_loss_7": 200.72928695678712, |
| "learning_rate": 0.0006635339816587109, |
| "loss": 575.9933, |
| "step": 4000 |
| }, |
| { |
| "ce_loss_10": 3.6128929018974305, |
| "ce_loss_13": 3.548132801055908, |
| "ce_loss_2": 4.128501725196839, |
| "ce_loss_3": 3.945591115951538, |
| "ce_loss_7": 3.6652005195617674, |
| "epoch": 0.401, |
| "grad_norm": 430.0, |
| "kl_loss_10": 103.19527244567871, |
| "kl_loss_2": 1214.8156677246093, |
| "kl_loss_3": 840.3229400634766, |
| "kl_loss_7": 210.74479904174805, |
| "learning_rate": 0.0006620337629897252, |
| "loss": 583.2822, |
| "step": 4010 |
| }, |
| { |
| "ce_loss_10": 3.619123613834381, |
| "ce_loss_13": 3.5573631048202516, |
| "ce_loss_2": 4.140160727500915, |
| "ce_loss_3": 3.958257591724396, |
| "ce_loss_7": 3.674074041843414, |
| "epoch": 0.402, |
| "grad_norm": 432.0, |
| "kl_loss_10": 100.38173408508301, |
| "kl_loss_2": 1207.5167907714845, |
| "kl_loss_3": 837.2485626220703, |
| "kl_loss_7": 208.48973083496094, |
| "learning_rate": 0.0006605319126425454, |
| "loss": 597.1898, |
| "step": 4020 |
| }, |
| { |
| "ce_loss_10": 3.5208260893821715, |
| "ce_loss_13": 3.4589377880096435, |
| "ce_loss_2": 4.050716698169708, |
| "ce_loss_3": 3.8632638931274412, |
| "ce_loss_7": 3.5759450912475588, |
| "epoch": 0.403, |
| "grad_norm": 420.0, |
| "kl_loss_10": 100.48741989135742, |
| "kl_loss_2": 1233.5194946289062, |
| "kl_loss_3": 854.4578369140625, |
| "kl_loss_7": 208.70274200439454, |
| "learning_rate": 0.0006590284457407876, |
| "loss": 593.5098, |
| "step": 4030 |
| }, |
| { |
| "ce_loss_10": 3.6270558714866636, |
| "ce_loss_13": 3.5626144886016844, |
| "ce_loss_2": 4.136511921882629, |
| "ce_loss_3": 3.957785797119141, |
| "ce_loss_7": 3.6768479347229004, |
| "epoch": 0.404, |
| "grad_norm": 392.0, |
| "kl_loss_10": 101.69999923706055, |
| "kl_loss_2": 1185.4601745605469, |
| "kl_loss_3": 821.0296905517578, |
| "kl_loss_7": 206.82139434814454, |
| "learning_rate": 0.0006575233774243465, |
| "loss": 582.2525, |
| "step": 4040 |
| }, |
| { |
| "ce_loss_10": 3.612906110286713, |
| "ce_loss_13": 3.550376224517822, |
| "ce_loss_2": 4.1283538222312925, |
| "ce_loss_3": 3.951547086238861, |
| "ce_loss_7": 3.667691433429718, |
| "epoch": 0.405, |
| "grad_norm": 464.0, |
| "kl_loss_10": 100.57203559875488, |
| "kl_loss_2": 1203.0161071777343, |
| "kl_loss_3": 838.8151794433594, |
| "kl_loss_7": 210.55067977905273, |
| "learning_rate": 0.0006560167228492435, |
| "loss": 587.686, |
| "step": 4050 |
| }, |
| { |
| "ce_loss_10": 3.6582042455673216, |
| "ce_loss_13": 3.597072696685791, |
| "ce_loss_2": 4.15371550321579, |
| "ce_loss_3": 3.9819828867912292, |
| "ce_loss_7": 3.7127379179000854, |
| "epoch": 0.406, |
| "grad_norm": 396.0, |
| "kl_loss_10": 97.44431228637696, |
| "kl_loss_2": 1157.4290466308594, |
| "kl_loss_3": 807.0505889892578, |
| "kl_loss_7": 202.94429702758788, |
| "learning_rate": 0.0006545084971874737, |
| "loss": 580.7177, |
| "step": 4060 |
| }, |
| { |
| "ce_loss_10": 3.6273567199707033, |
| "ce_loss_13": 3.564158725738525, |
| "ce_loss_2": 4.158101809024811, |
| "ce_loss_3": 3.9733991026878357, |
| "ce_loss_7": 3.685515010356903, |
| "epoch": 0.407, |
| "grad_norm": 372.0, |
| "kl_loss_10": 103.08215293884277, |
| "kl_loss_2": 1230.8001892089844, |
| "kl_loss_3": 853.4359588623047, |
| "kl_loss_7": 216.80452346801758, |
| "learning_rate": 0.0006529987156268526, |
| "loss": 583.8351, |
| "step": 4070 |
| }, |
| { |
| "ce_loss_10": 3.5464280128479, |
| "ce_loss_13": 3.481638014316559, |
| "ce_loss_2": 4.076263022422791, |
| "ce_loss_3": 3.8974447727203367, |
| "ce_loss_7": 3.6043801426887514, |
| "epoch": 0.408, |
| "grad_norm": 350.0, |
| "kl_loss_10": 102.87330780029296, |
| "kl_loss_2": 1214.2586059570312, |
| "kl_loss_3": 851.9112091064453, |
| "kl_loss_7": 211.73340759277343, |
| "learning_rate": 0.0006514873933708637, |
| "loss": 602.7298, |
| "step": 4080 |
| }, |
| { |
| "ce_loss_10": 3.6543262004852295, |
| "ce_loss_13": 3.5908489346504213, |
| "ce_loss_2": 4.153554606437683, |
| "ce_loss_3": 3.9771866679191588, |
| "ce_loss_7": 3.703446090221405, |
| "epoch": 0.409, |
| "grad_norm": 378.0, |
| "kl_loss_10": 100.85495872497559, |
| "kl_loss_2": 1179.416357421875, |
| "kl_loss_3": 822.3047607421875, |
| "kl_loss_7": 207.08517990112304, |
| "learning_rate": 0.0006499745456385053, |
| "loss": 579.5981, |
| "step": 4090 |
| }, |
| { |
| "ce_loss_10": 3.622114622592926, |
| "ce_loss_13": 3.5604026079177857, |
| "ce_loss_2": 4.138943600654602, |
| "ce_loss_3": 3.9601905822753904, |
| "ce_loss_7": 3.6786248087882996, |
| "epoch": 0.41, |
| "grad_norm": 460.0, |
| "kl_loss_10": 101.49279441833497, |
| "kl_loss_2": 1187.613018798828, |
| "kl_loss_3": 832.265737915039, |
| "kl_loss_7": 211.90668182373048, |
| "learning_rate": 0.0006484601876641375, |
| "loss": 591.7443, |
| "step": 4100 |
| }, |
| { |
| "ce_loss_10": 3.6106685280799864, |
| "ce_loss_13": 3.5491909265518187, |
| "ce_loss_2": 4.104636693000794, |
| "ce_loss_3": 3.9329436659812926, |
| "ce_loss_7": 3.6641584396362306, |
| "epoch": 0.411, |
| "grad_norm": 378.0, |
| "kl_loss_10": 101.25703315734863, |
| "kl_loss_2": 1168.0580017089844, |
| "kl_loss_3": 813.8080810546875, |
| "kl_loss_7": 212.12922592163085, |
| "learning_rate": 0.000646944334697328, |
| "loss": 577.3537, |
| "step": 4110 |
| }, |
| { |
| "ce_loss_10": 3.7338776111602785, |
| "ce_loss_13": 3.665091943740845, |
| "ce_loss_2": 4.2223006844520565, |
| "ce_loss_3": 4.049113523960114, |
| "ce_loss_7": 3.799789845943451, |
| "epoch": 0.412, |
| "grad_norm": 450.0, |
| "kl_loss_10": 109.65744743347167, |
| "kl_loss_2": 1151.4740142822266, |
| "kl_loss_3": 801.2218536376953, |
| "kl_loss_7": 236.72526626586915, |
| "learning_rate": 0.0006454270020026995, |
| "loss": 574.9525, |
| "step": 4120 |
| }, |
| { |
| "ce_loss_10": 3.69082772731781, |
| "ce_loss_13": 3.6286051154136656, |
| "ce_loss_2": 4.175914537906647, |
| "ce_loss_3": 4.002845597267151, |
| "ce_loss_7": 3.7393308877944946, |
| "epoch": 0.413, |
| "grad_norm": 580.0, |
| "kl_loss_10": 104.95364952087402, |
| "kl_loss_2": 1127.3133270263672, |
| "kl_loss_3": 788.5207000732422, |
| "kl_loss_7": 214.98480072021485, |
| "learning_rate": 0.0006439082048597755, |
| "loss": 564.7141, |
| "step": 4130 |
| }, |
| { |
| "ce_loss_10": 3.683094894886017, |
| "ce_loss_13": 3.61643271446228, |
| "ce_loss_2": 4.181109619140625, |
| "ce_loss_3": 4.005432403087616, |
| "ce_loss_7": 3.745869052410126, |
| "epoch": 0.414, |
| "grad_norm": 520.0, |
| "kl_loss_10": 111.28029708862304, |
| "kl_loss_2": 1178.55703125, |
| "kl_loss_3": 823.4579254150391, |
| "kl_loss_7": 238.62436599731444, |
| "learning_rate": 0.0006423879585628261, |
| "loss": 585.353, |
| "step": 4140 |
| }, |
| { |
| "ce_loss_10": 3.648063910007477, |
| "ce_loss_13": 3.579416477680206, |
| "ce_loss_2": 4.166888773441315, |
| "ce_loss_3": 3.98115758895874, |
| "ce_loss_7": 3.7089965462684633, |
| "epoch": 0.415, |
| "grad_norm": 402.0, |
| "kl_loss_10": 109.57027854919434, |
| "kl_loss_2": 1214.0814270019532, |
| "kl_loss_3": 843.1505004882813, |
| "kl_loss_7": 233.17276763916016, |
| "learning_rate": 0.0006408662784207149, |
| "loss": 596.7986, |
| "step": 4150 |
| }, |
| { |
| "ce_loss_10": 3.596502733230591, |
| "ce_loss_13": 3.5327386379241945, |
| "ce_loss_2": 4.09819370508194, |
| "ce_loss_3": 3.9237332344055176, |
| "ce_loss_7": 3.654523158073425, |
| "epoch": 0.416, |
| "grad_norm": 544.0, |
| "kl_loss_10": 99.90503120422363, |
| "kl_loss_2": 1189.1891540527345, |
| "kl_loss_3": 823.6777069091797, |
| "kl_loss_7": 211.67333221435547, |
| "learning_rate": 0.0006393431797567439, |
| "loss": 583.1826, |
| "step": 4160 |
| }, |
| { |
| "ce_loss_10": 3.6853842735290527, |
| "ce_loss_13": 3.622405004501343, |
| "ce_loss_2": 4.1561102867126465, |
| "ce_loss_3": 3.9865566968917845, |
| "ce_loss_7": 3.7344152450561525, |
| "epoch": 0.417, |
| "grad_norm": 384.0, |
| "kl_loss_10": 103.1281753540039, |
| "kl_loss_2": 1144.869805908203, |
| "kl_loss_3": 800.3423767089844, |
| "kl_loss_7": 211.40862579345702, |
| "learning_rate": 0.0006378186779084996, |
| "loss": 557.4173, |
| "step": 4170 |
| }, |
| { |
| "ce_loss_10": 3.5140963315963747, |
| "ce_loss_13": 3.452511179447174, |
| "ce_loss_2": 4.041843056678772, |
| "ce_loss_3": 3.857197344303131, |
| "ce_loss_7": 3.571711480617523, |
| "epoch": 0.418, |
| "grad_norm": 464.0, |
| "kl_loss_10": 100.09027862548828, |
| "kl_loss_2": 1203.0338989257812, |
| "kl_loss_3": 838.9081939697265, |
| "kl_loss_7": 213.11346130371095, |
| "learning_rate": 0.0006362927882276989, |
| "loss": 588.2966, |
| "step": 4180 |
| }, |
| { |
| "ce_loss_10": 3.7188942313194273, |
| "ce_loss_13": 3.6518460750579833, |
| "ce_loss_2": 4.204531168937683, |
| "ce_loss_3": 4.025935411453247, |
| "ce_loss_7": 3.7728618144989015, |
| "epoch": 0.419, |
| "grad_norm": 426.0, |
| "kl_loss_10": 103.15027618408203, |
| "kl_loss_2": 1156.1428161621093, |
| "kl_loss_3": 794.2856292724609, |
| "kl_loss_7": 211.89537048339844, |
| "learning_rate": 0.000634765526080034, |
| "loss": 562.2326, |
| "step": 4190 |
| }, |
| { |
| "ce_loss_10": 3.717780148983002, |
| "ce_loss_13": 3.6511818051338194, |
| "ce_loss_2": 4.210239946842194, |
| "ce_loss_3": 4.0393988490104675, |
| "ce_loss_7": 3.7724336862564085, |
| "epoch": 0.42, |
| "grad_norm": 456.0, |
| "kl_loss_10": 104.51988563537597, |
| "kl_loss_2": 1161.7059631347656, |
| "kl_loss_3": 818.392855834961, |
| "kl_loss_7": 219.07965316772462, |
| "learning_rate": 0.0006332369068450174, |
| "loss": 570.1012, |
| "step": 4200 |
| }, |
| { |
| "ce_loss_10": 3.648071753978729, |
| "ce_loss_13": 3.5840353846549986, |
| "ce_loss_2": 4.147714996337891, |
| "ce_loss_3": 3.972030484676361, |
| "ce_loss_7": 3.7039226770401, |
| "epoch": 0.421, |
| "grad_norm": 426.0, |
| "kl_loss_10": 101.72255935668946, |
| "kl_loss_2": 1175.2358459472657, |
| "kl_loss_3": 821.6455657958984, |
| "kl_loss_7": 216.67398834228516, |
| "learning_rate": 0.0006317069459158283, |
| "loss": 576.074, |
| "step": 4210 |
| }, |
| { |
| "ce_loss_10": 3.766611933708191, |
| "ce_loss_13": 3.7019524574279785, |
| "ce_loss_2": 4.238518404960632, |
| "ce_loss_3": 4.070182096958161, |
| "ce_loss_7": 3.818829393386841, |
| "epoch": 0.422, |
| "grad_norm": 404.0, |
| "kl_loss_10": 102.42731742858886, |
| "kl_loss_2": 1134.2777221679687, |
| "kl_loss_3": 793.5420806884765, |
| "kl_loss_7": 214.86822509765625, |
| "learning_rate": 0.0006301756586991561, |
| "loss": 572.4437, |
| "step": 4220 |
| }, |
| { |
| "ce_loss_10": 3.538297724723816, |
| "ce_loss_13": 3.4769801259040833, |
| "ce_loss_2": 4.051598787307739, |
| "ce_loss_3": 3.8692006349563597, |
| "ce_loss_7": 3.592081093788147, |
| "epoch": 0.423, |
| "grad_norm": 524.0, |
| "kl_loss_10": 100.02308959960938, |
| "kl_loss_2": 1219.534228515625, |
| "kl_loss_3": 847.8958953857422, |
| "kl_loss_7": 217.3907485961914, |
| "learning_rate": 0.0006286430606150459, |
| "loss": 590.4341, |
| "step": 4230 |
| }, |
| { |
| "ce_loss_10": 3.732722854614258, |
| "ce_loss_13": 3.670178234577179, |
| "ce_loss_2": 4.228793060779571, |
| "ce_loss_3": 4.055911266803742, |
| "ce_loss_7": 3.7854557275772094, |
| "epoch": 0.424, |
| "grad_norm": 440.0, |
| "kl_loss_10": 101.63710746765136, |
| "kl_loss_2": 1171.4819213867188, |
| "kl_loss_3": 815.24853515625, |
| "kl_loss_7": 212.84099502563475, |
| "learning_rate": 0.0006271091670967436, |
| "loss": 572.0026, |
| "step": 4240 |
| }, |
| { |
| "ce_loss_10": 3.64589341878891, |
| "ce_loss_13": 3.579445707798004, |
| "ce_loss_2": 4.168534743785858, |
| "ce_loss_3": 3.9873276472091677, |
| "ce_loss_7": 3.7041419625282286, |
| "epoch": 0.425, |
| "grad_norm": 436.0, |
| "kl_loss_10": 105.33321189880371, |
| "kl_loss_2": 1223.9686584472656, |
| "kl_loss_3": 856.7900268554688, |
| "kl_loss_7": 219.8565589904785, |
| "learning_rate": 0.0006255739935905395, |
| "loss": 587.2729, |
| "step": 4250 |
| }, |
| { |
| "ce_loss_10": 3.684093916416168, |
| "ce_loss_13": 3.622530627250671, |
| "ce_loss_2": 4.176068413257599, |
| "ce_loss_3": 4.005461478233338, |
| "ce_loss_7": 3.73612722158432, |
| "epoch": 0.426, |
| "grad_norm": 444.0, |
| "kl_loss_10": 101.16957168579101, |
| "kl_loss_2": 1151.114599609375, |
| "kl_loss_3": 804.5711151123047, |
| "kl_loss_7": 206.51019058227538, |
| "learning_rate": 0.0006240375555556145, |
| "loss": 584.5814, |
| "step": 4260 |
| }, |
| { |
| "ce_loss_10": 3.694865620136261, |
| "ce_loss_13": 3.6328345060348513, |
| "ce_loss_2": 4.216705179214477, |
| "ce_loss_3": 4.035941934585571, |
| "ce_loss_7": 3.7489806532859804, |
| "epoch": 0.427, |
| "grad_norm": 544.0, |
| "kl_loss_10": 102.23134536743164, |
| "kl_loss_2": 1200.0044555664062, |
| "kl_loss_3": 832.4086944580079, |
| "kl_loss_7": 208.58624954223632, |
| "learning_rate": 0.000622499868463882, |
| "loss": 581.1191, |
| "step": 4270 |
| }, |
| { |
| "ce_loss_10": 3.6664886713027953, |
| "ce_loss_13": 3.6031296968460085, |
| "ce_loss_2": 4.138775157928467, |
| "ce_loss_3": 3.968552088737488, |
| "ce_loss_7": 3.716127264499664, |
| "epoch": 0.428, |
| "grad_norm": 442.0, |
| "kl_loss_10": 102.83601112365723, |
| "kl_loss_2": 1148.9752075195313, |
| "kl_loss_3": 798.4193389892578, |
| "kl_loss_7": 204.8626609802246, |
| "learning_rate": 0.0006209609477998338, |
| "loss": 570.8694, |
| "step": 4280 |
| }, |
| { |
| "ce_loss_10": 3.7170133352279664, |
| "ce_loss_13": 3.6512863278388976, |
| "ce_loss_2": 4.214985513687134, |
| "ce_loss_3": 4.041373360157013, |
| "ce_loss_7": 3.76862713098526, |
| "epoch": 0.429, |
| "grad_norm": 492.0, |
| "kl_loss_10": 105.98460693359375, |
| "kl_loss_2": 1171.2547790527344, |
| "kl_loss_3": 819.7431121826172, |
| "kl_loss_7": 209.78300704956055, |
| "learning_rate": 0.0006194208090603844, |
| "loss": 582.6892, |
| "step": 4290 |
| }, |
| { |
| "ce_loss_10": 3.636822462081909, |
| "ce_loss_13": 3.572554814815521, |
| "ce_loss_2": 4.128273499011994, |
| "ce_loss_3": 3.9540862798690797, |
| "ce_loss_7": 3.6845338463783266, |
| "epoch": 0.43, |
| "grad_norm": 384.0, |
| "kl_loss_10": 104.19713554382324, |
| "kl_loss_2": 1158.2531616210938, |
| "kl_loss_3": 808.0290679931641, |
| "kl_loss_7": 201.06265716552736, |
| "learning_rate": 0.0006178794677547138, |
| "loss": 566.7275, |
| "step": 4300 |
| }, |
| { |
| "ce_loss_10": 3.669668412208557, |
| "ce_loss_13": 3.6048370003700256, |
| "ce_loss_2": 4.167822825908661, |
| "ce_loss_3": 3.990470898151398, |
| "ce_loss_7": 3.7204079270362853, |
| "epoch": 0.431, |
| "grad_norm": 462.0, |
| "kl_loss_10": 105.12696495056153, |
| "kl_loss_2": 1189.7153015136719, |
| "kl_loss_3": 827.7414642333985, |
| "kl_loss_7": 209.76073608398437, |
| "learning_rate": 0.0006163369394041111, |
| "loss": 578.5617, |
| "step": 4310 |
| }, |
| { |
| "ce_loss_10": 3.603849542140961, |
| "ce_loss_13": 3.540567708015442, |
| "ce_loss_2": 4.114995861053467, |
| "ce_loss_3": 3.93278226852417, |
| "ce_loss_7": 3.6533514499664306, |
| "epoch": 0.432, |
| "grad_norm": 524.0, |
| "kl_loss_10": 103.23071632385253, |
| "kl_loss_2": 1199.0398742675782, |
| "kl_loss_3": 837.4948120117188, |
| "kl_loss_7": 206.72886505126954, |
| "learning_rate": 0.0006147932395418205, |
| "loss": 593.6705, |
| "step": 4320 |
| }, |
| { |
| "ce_loss_10": 3.6318950057029724, |
| "ce_loss_13": 3.5694007515907287, |
| "ce_loss_2": 4.121479880809784, |
| "ce_loss_3": 3.9539971709251405, |
| "ce_loss_7": 3.6812774300575257, |
| "epoch": 0.433, |
| "grad_norm": 372.0, |
| "kl_loss_10": 101.08283462524415, |
| "kl_loss_2": 1163.6617614746094, |
| "kl_loss_3": 814.8068634033203, |
| "kl_loss_7": 204.31798858642577, |
| "learning_rate": 0.0006132483837128823, |
| "loss": 570.1899, |
| "step": 4330 |
| }, |
| { |
| "ce_loss_10": 3.6211368441581726, |
| "ce_loss_13": 3.5578442931175234, |
| "ce_loss_2": 4.120713996887207, |
| "ce_loss_3": 3.9408787965774534, |
| "ce_loss_7": 3.6715193152427674, |
| "epoch": 0.434, |
| "grad_norm": 380.0, |
| "kl_loss_10": 102.18530006408692, |
| "kl_loss_2": 1181.1154479980469, |
| "kl_loss_3": 821.5291748046875, |
| "kl_loss_7": 205.94673614501954, |
| "learning_rate": 0.0006117023874739772, |
| "loss": 579.966, |
| "step": 4340 |
| }, |
| { |
| "ce_loss_10": 3.606392514705658, |
| "ce_loss_13": 3.542631506919861, |
| "ce_loss_2": 4.1229788064956665, |
| "ce_loss_3": 3.943661665916443, |
| "ce_loss_7": 3.660093939304352, |
| "epoch": 0.435, |
| "grad_norm": 366.0, |
| "kl_loss_10": 101.41253623962402, |
| "kl_loss_2": 1198.5234008789062, |
| "kl_loss_3": 836.8120849609375, |
| "kl_loss_7": 206.9767189025879, |
| "learning_rate": 0.0006101552663932703, |
| "loss": 586.1095, |
| "step": 4350 |
| }, |
| { |
| "ce_loss_10": 3.6401270270347594, |
| "ce_loss_13": 3.5747036576271056, |
| "ce_loss_2": 4.133774304389954, |
| "ce_loss_3": 3.9579702854156493, |
| "ce_loss_7": 3.689171576499939, |
| "epoch": 0.436, |
| "grad_norm": 432.0, |
| "kl_loss_10": 103.28445014953613, |
| "kl_loss_2": 1170.830484008789, |
| "kl_loss_3": 821.6876098632813, |
| "kl_loss_7": 207.47048645019532, |
| "learning_rate": 0.0006086070360502539, |
| "loss": 578.1617, |
| "step": 4360 |
| }, |
| { |
| "ce_loss_10": 3.6478831648826597, |
| "ce_loss_13": 3.5829063415527345, |
| "ce_loss_2": 4.140194344520569, |
| "ce_loss_3": 3.9674217224121096, |
| "ce_loss_7": 3.6954386711120604, |
| "epoch": 0.437, |
| "grad_norm": 324.0, |
| "kl_loss_10": 102.49744033813477, |
| "kl_loss_2": 1182.2726196289063, |
| "kl_loss_3": 820.302099609375, |
| "kl_loss_7": 202.6822937011719, |
| "learning_rate": 0.0006070577120355903, |
| "loss": 585.725, |
| "step": 4370 |
| }, |
| { |
| "ce_loss_10": 3.6493834018707276, |
| "ce_loss_13": 3.585710608959198, |
| "ce_loss_2": 4.1475905418396, |
| "ce_loss_3": 3.9780289769172668, |
| "ce_loss_7": 3.6994438648223875, |
| "epoch": 0.438, |
| "grad_norm": 464.0, |
| "kl_loss_10": 99.22572135925293, |
| "kl_loss_2": 1158.4001525878907, |
| "kl_loss_3": 817.9062316894531, |
| "kl_loss_7": 200.7786117553711, |
| "learning_rate": 0.0006055073099509549, |
| "loss": 570.4337, |
| "step": 4380 |
| }, |
| { |
| "ce_loss_10": 3.7072151064872743, |
| "ce_loss_13": 3.6444019198417665, |
| "ce_loss_2": 4.1913762331008915, |
| "ce_loss_3": 4.024674141407013, |
| "ce_loss_7": 3.755181634426117, |
| "epoch": 0.439, |
| "grad_norm": 414.0, |
| "kl_loss_10": 101.21295433044433, |
| "kl_loss_2": 1155.983868408203, |
| "kl_loss_3": 813.5707092285156, |
| "kl_loss_7": 201.68513870239258, |
| "learning_rate": 0.0006039558454088796, |
| "loss": 578.4039, |
| "step": 4390 |
| }, |
| { |
| "ce_loss_10": 3.6866373896598814, |
| "ce_loss_13": 3.6209323048591613, |
| "ce_loss_2": 4.190221071243286, |
| "ce_loss_3": 4.017517447471619, |
| "ce_loss_7": 3.736443567276001, |
| "epoch": 0.44, |
| "grad_norm": 388.0, |
| "kl_loss_10": 103.66101570129395, |
| "kl_loss_2": 1179.7899597167968, |
| "kl_loss_3": 831.9971649169922, |
| "kl_loss_7": 206.1973434448242, |
| "learning_rate": 0.0006024033340325954, |
| "loss": 572.2276, |
| "step": 4400 |
| }, |
| { |
| "ce_loss_10": 3.7494076251983643, |
| "ce_loss_13": 3.6860761404037476, |
| "ce_loss_2": 4.22088440656662, |
| "ce_loss_3": 4.061302840709686, |
| "ce_loss_7": 3.7976527214050293, |
| "epoch": 0.441, |
| "grad_norm": 384.0, |
| "kl_loss_10": 100.95717124938965, |
| "kl_loss_2": 1117.0268005371095, |
| "kl_loss_3": 788.523080444336, |
| "kl_loss_7": 197.15192718505858, |
| "learning_rate": 0.0006008497914558743, |
| "loss": 559.696, |
| "step": 4410 |
| }, |
| { |
| "ce_loss_10": 3.689165186882019, |
| "ce_loss_13": 3.6250773549079893, |
| "ce_loss_2": 4.1833924651145935, |
| "ce_loss_3": 4.016273534297943, |
| "ce_loss_7": 3.738771951198578, |
| "epoch": 0.442, |
| "grad_norm": 476.0, |
| "kl_loss_10": 105.19830055236817, |
| "kl_loss_2": 1174.740167236328, |
| "kl_loss_3": 830.987890625, |
| "kl_loss_7": 209.00811996459962, |
| "learning_rate": 0.0005992952333228728, |
| "loss": 576.4588, |
| "step": 4420 |
| }, |
| { |
| "ce_loss_10": 3.620419418811798, |
| "ce_loss_13": 3.5588944792747497, |
| "ce_loss_2": 4.125707459449768, |
| "ce_loss_3": 3.9479523420333864, |
| "ce_loss_7": 3.6681005358695984, |
| "epoch": 0.443, |
| "grad_norm": 464.0, |
| "kl_loss_10": 100.17966499328614, |
| "kl_loss_2": 1181.0232360839843, |
| "kl_loss_3": 829.0245361328125, |
| "kl_loss_7": 201.25574188232423, |
| "learning_rate": 0.0005977396752879741, |
| "loss": 577.6452, |
| "step": 4430 |
| }, |
| { |
| "ce_loss_10": 3.5535963416099547, |
| "ce_loss_13": 3.4911730885505676, |
| "ce_loss_2": 4.057285642623901, |
| "ce_loss_3": 3.882522702217102, |
| "ce_loss_7": 3.603209447860718, |
| "epoch": 0.444, |
| "grad_norm": 450.0, |
| "kl_loss_10": 96.56860618591308, |
| "kl_loss_2": 1184.1321594238282, |
| "kl_loss_3": 827.8955352783203, |
| "kl_loss_7": 199.06893157958984, |
| "learning_rate": 0.0005961831330156305, |
| "loss": 569.2716, |
| "step": 4440 |
| }, |
| { |
| "ce_loss_10": 3.697277545928955, |
| "ce_loss_13": 3.6338467955589295, |
| "ce_loss_2": 4.1992070317268375, |
| "ce_loss_3": 4.02395384311676, |
| "ce_loss_7": 3.747213661670685, |
| "epoch": 0.445, |
| "grad_norm": 392.0, |
| "kl_loss_10": 101.60056228637696, |
| "kl_loss_2": 1189.420147705078, |
| "kl_loss_3": 827.8122314453125, |
| "kl_loss_7": 205.08227157592773, |
| "learning_rate": 0.0005946256221802051, |
| "loss": 584.411, |
| "step": 4450 |
| }, |
| { |
| "ce_loss_10": 3.679532468318939, |
| "ce_loss_13": 3.6183473825454713, |
| "ce_loss_2": 4.146489477157592, |
| "ce_loss_3": 3.9755648136138917, |
| "ce_loss_7": 3.7207812786102297, |
| "epoch": 0.446, |
| "grad_norm": 494.0, |
| "kl_loss_10": 101.10317420959473, |
| "kl_loss_2": 1119.8320098876952, |
| "kl_loss_3": 779.770297241211, |
| "kl_loss_7": 198.91878814697264, |
| "learning_rate": 0.0005930671584658151, |
| "loss": 578.7685, |
| "step": 4460 |
| }, |
| { |
| "ce_loss_10": 3.674864172935486, |
| "ce_loss_13": 3.6118743062019347, |
| "ce_loss_2": 4.166282546520233, |
| "ce_loss_3": 3.9925308227539062, |
| "ce_loss_7": 3.7198517322540283, |
| "epoch": 0.447, |
| "grad_norm": 364.0, |
| "kl_loss_10": 100.75155410766601, |
| "kl_loss_2": 1165.5830871582032, |
| "kl_loss_3": 814.2670196533203, |
| "kl_loss_7": 201.9087059020996, |
| "learning_rate": 0.0005915077575661722, |
| "loss": 579.8401, |
| "step": 4470 |
| }, |
| { |
| "ce_loss_10": 3.694182288646698, |
| "ce_loss_13": 3.628465461730957, |
| "ce_loss_2": 4.190526556968689, |
| "ce_loss_3": 4.015213489532471, |
| "ce_loss_7": 3.7417189121246337, |
| "epoch": 0.448, |
| "grad_norm": 520.0, |
| "kl_loss_10": 105.40261840820312, |
| "kl_loss_2": 1179.2632690429687, |
| "kl_loss_3": 825.197119140625, |
| "kl_loss_7": 209.67544021606446, |
| "learning_rate": 0.000589947435184427, |
| "loss": 569.8479, |
| "step": 4480 |
| }, |
| { |
| "ce_loss_10": 3.7602591633796694, |
| "ce_loss_13": 3.6975467801094055, |
| "ce_loss_2": 4.231885468959808, |
| "ce_loss_3": 4.062859082221985, |
| "ce_loss_7": 3.8065670251846315, |
| "epoch": 0.449, |
| "grad_norm": 406.0, |
| "kl_loss_10": 104.7243579864502, |
| "kl_loss_2": 1147.1027252197266, |
| "kl_loss_3": 795.4058624267578, |
| "kl_loss_7": 203.6425910949707, |
| "learning_rate": 0.0005883862070330078, |
| "loss": 568.9265, |
| "step": 4490 |
| }, |
| { |
| "ce_loss_10": 3.6874640941619874, |
| "ce_loss_13": 3.6227025985717773, |
| "ce_loss_2": 4.18091858625412, |
| "ce_loss_3": 4.004498326778412, |
| "ce_loss_7": 3.7389190554618836, |
| "epoch": 0.45, |
| "grad_norm": 342.0, |
| "kl_loss_10": 102.03626098632813, |
| "kl_loss_2": 1166.0193176269531, |
| "kl_loss_3": 811.4805572509765, |
| "kl_loss_7": 204.2785285949707, |
| "learning_rate": 0.0005868240888334653, |
| "loss": 567.3452, |
| "step": 4500 |
| }, |
| { |
| "ce_loss_10": 3.570815551280975, |
| "ce_loss_13": 3.508398413658142, |
| "ce_loss_2": 4.096131467819214, |
| "ce_loss_3": 3.9093389391899107, |
| "ce_loss_7": 3.625988078117371, |
| "epoch": 0.451, |
| "grad_norm": 616.0, |
| "kl_loss_10": 100.9030990600586, |
| "kl_loss_2": 1212.356463623047, |
| "kl_loss_3": 839.7065948486328, |
| "kl_loss_7": 207.68597564697265, |
| "learning_rate": 0.0005852610963163119, |
| "loss": 584.0681, |
| "step": 4510 |
| }, |
| { |
| "ce_loss_10": 3.5951132655143736, |
| "ce_loss_13": 3.5340840578079225, |
| "ce_loss_2": 4.088473439216614, |
| "ce_loss_3": 3.9123128294944762, |
| "ce_loss_7": 3.6418415188789366, |
| "epoch": 0.452, |
| "grad_norm": 440.0, |
| "kl_loss_10": 97.94427604675293, |
| "kl_loss_2": 1155.4515991210938, |
| "kl_loss_3": 802.8143249511719, |
| "kl_loss_7": 198.15041809082032, |
| "learning_rate": 0.0005836972452208654, |
| "loss": 560.779, |
| "step": 4520 |
| }, |
| { |
| "ce_loss_10": 3.6001816511154177, |
| "ce_loss_13": 3.540806245803833, |
| "ce_loss_2": 4.105304884910583, |
| "ce_loss_3": 3.9283313751220703, |
| "ce_loss_7": 3.6497029066085815, |
| "epoch": 0.453, |
| "grad_norm": 470.0, |
| "kl_loss_10": 99.28575630187989, |
| "kl_loss_2": 1176.1295288085937, |
| "kl_loss_3": 817.2998046875, |
| "kl_loss_7": 202.73690338134764, |
| "learning_rate": 0.0005821325512950885, |
| "loss": 572.314, |
| "step": 4530 |
| }, |
| { |
| "ce_loss_10": 3.629274320602417, |
| "ce_loss_13": 3.5687419891357424, |
| "ce_loss_2": 4.1162322640419005, |
| "ce_loss_3": 3.9458845138549803, |
| "ce_loss_7": 3.680540406703949, |
| "epoch": 0.454, |
| "grad_norm": 368.0, |
| "kl_loss_10": 96.52360496520996, |
| "kl_loss_2": 1136.2307861328125, |
| "kl_loss_3": 790.6944702148437, |
| "kl_loss_7": 197.31127700805663, |
| "learning_rate": 0.0005805670302954321, |
| "loss": 568.0196, |
| "step": 4540 |
| }, |
| { |
| "ce_loss_10": 3.6337098717689513, |
| "ce_loss_13": 3.5753876209259032, |
| "ce_loss_2": 4.115709042549133, |
| "ce_loss_3": 3.9439353704452516, |
| "ce_loss_7": 3.6809528470039368, |
| "epoch": 0.455, |
| "grad_norm": 434.0, |
| "kl_loss_10": 95.89570465087891, |
| "kl_loss_2": 1140.969873046875, |
| "kl_loss_3": 792.410400390625, |
| "kl_loss_7": 194.6849395751953, |
| "learning_rate": 0.000579000697986675, |
| "loss": 559.3398, |
| "step": 4550 |
| }, |
| { |
| "ce_loss_10": 3.5949880719184875, |
| "ce_loss_13": 3.5312354803085326, |
| "ce_loss_2": 4.110612523555756, |
| "ce_loss_3": 3.9363887429237367, |
| "ce_loss_7": 3.6481791853904726, |
| "epoch": 0.456, |
| "grad_norm": 398.0, |
| "kl_loss_10": 102.14065132141113, |
| "kl_loss_2": 1200.508935546875, |
| "kl_loss_3": 844.4349182128906, |
| "kl_loss_7": 207.93037872314454, |
| "learning_rate": 0.0005774335701417662, |
| "loss": 577.7247, |
| "step": 4560 |
| }, |
| { |
| "ce_loss_10": 3.578439974784851, |
| "ce_loss_13": 3.5177830338478087, |
| "ce_loss_2": 4.086728799343109, |
| "ce_loss_3": 3.9092958092689516, |
| "ce_loss_7": 3.628882908821106, |
| "epoch": 0.457, |
| "grad_norm": 438.0, |
| "kl_loss_10": 98.15573539733887, |
| "kl_loss_2": 1190.6679321289062, |
| "kl_loss_3": 827.183969116211, |
| "kl_loss_7": 201.49042510986328, |
| "learning_rate": 0.0005758656625416658, |
| "loss": 579.3393, |
| "step": 4570 |
| }, |
| { |
| "ce_loss_10": 3.6351425409317017, |
| "ce_loss_13": 3.5740421295166014, |
| "ce_loss_2": 4.13430563211441, |
| "ce_loss_3": 3.9581828236579897, |
| "ce_loss_7": 3.685711920261383, |
| "epoch": 0.458, |
| "grad_norm": 378.0, |
| "kl_loss_10": 98.59328498840333, |
| "kl_loss_2": 1165.538037109375, |
| "kl_loss_3": 813.1740905761719, |
| "kl_loss_7": 200.91252059936522, |
| "learning_rate": 0.0005742969909751859, |
| "loss": 562.4629, |
| "step": 4580 |
| }, |
| { |
| "ce_loss_10": 3.6438634276390074, |
| "ce_loss_13": 3.5822227597236633, |
| "ce_loss_2": 4.139957237243652, |
| "ce_loss_3": 3.96221022605896, |
| "ce_loss_7": 3.692858374118805, |
| "epoch": 0.459, |
| "grad_norm": 396.0, |
| "kl_loss_10": 100.12554626464843, |
| "kl_loss_2": 1167.3160705566406, |
| "kl_loss_3": 805.8544036865235, |
| "kl_loss_7": 201.26202087402345, |
| "learning_rate": 0.0005727275712388318, |
| "loss": 570.0833, |
| "step": 4590 |
| }, |
| { |
| "ce_loss_10": 3.681215536594391, |
| "ce_loss_13": 3.620731198787689, |
| "ce_loss_2": 4.155962944030762, |
| "ce_loss_3": 3.984270441532135, |
| "ce_loss_7": 3.7283701658248902, |
| "epoch": 0.46, |
| "grad_norm": 568.0, |
| "kl_loss_10": 98.76027946472168, |
| "kl_loss_2": 1132.1197998046875, |
| "kl_loss_3": 792.0047241210938, |
| "kl_loss_7": 197.17216033935546, |
| "learning_rate": 0.0005711574191366427, |
| "loss": 562.7997, |
| "step": 4600 |
| }, |
| { |
| "ce_loss_10": 3.6236431002616882, |
| "ce_loss_13": 3.565703308582306, |
| "ce_loss_2": 4.114531934261322, |
| "ce_loss_3": 3.93969669342041, |
| "ce_loss_7": 3.671102833747864, |
| "epoch": 0.461, |
| "grad_norm": 372.0, |
| "kl_loss_10": 98.42190704345703, |
| "kl_loss_2": 1170.4917938232422, |
| "kl_loss_3": 808.7791198730469, |
| "kl_loss_7": 199.0694892883301, |
| "learning_rate": 0.0005695865504800327, |
| "loss": 564.0159, |
| "step": 4610 |
| }, |
| { |
| "ce_loss_10": 3.562722647190094, |
| "ce_loss_13": 3.500598740577698, |
| "ce_loss_2": 4.109580218791962, |
| "ce_loss_3": 3.9190361380577086, |
| "ce_loss_7": 3.6191172361373902, |
| "epoch": 0.462, |
| "grad_norm": 480.0, |
| "kl_loss_10": 100.51305274963379, |
| "kl_loss_2": 1233.0393005371093, |
| "kl_loss_3": 860.259619140625, |
| "kl_loss_7": 208.89999542236328, |
| "learning_rate": 0.0005680149810876322, |
| "loss": 581.488, |
| "step": 4620 |
| }, |
| { |
| "ce_loss_10": 3.6198580145835875, |
| "ce_loss_13": 3.5573437213897705, |
| "ce_loss_2": 4.117598211765289, |
| "ce_loss_3": 3.94056499004364, |
| "ce_loss_7": 3.667776870727539, |
| "epoch": 0.463, |
| "grad_norm": 560.0, |
| "kl_loss_10": 99.44257354736328, |
| "kl_loss_2": 1160.7040802001952, |
| "kl_loss_3": 809.362094116211, |
| "kl_loss_7": 201.12859268188475, |
| "learning_rate": 0.0005664427267851271, |
| "loss": 565.3629, |
| "step": 4630 |
| }, |
| { |
| "ce_loss_10": 3.534971606731415, |
| "ce_loss_13": 3.47266343832016, |
| "ce_loss_2": 4.036073172092438, |
| "ce_loss_3": 3.857685387134552, |
| "ce_loss_7": 3.5870521306991576, |
| "epoch": 0.464, |
| "grad_norm": 498.0, |
| "kl_loss_10": 97.52345237731933, |
| "kl_loss_2": 1167.1843322753907, |
| "kl_loss_3": 810.5214752197265, |
| "kl_loss_7": 199.60354309082032, |
| "learning_rate": 0.0005648698034051009, |
| "loss": 562.6416, |
| "step": 4640 |
| }, |
| { |
| "ce_loss_10": 3.6570662021636964, |
| "ce_loss_13": 3.594506525993347, |
| "ce_loss_2": 4.158554673194885, |
| "ce_loss_3": 3.980504941940308, |
| "ce_loss_7": 3.7062572717666624, |
| "epoch": 0.465, |
| "grad_norm": 412.0, |
| "kl_loss_10": 99.88166885375976, |
| "kl_loss_2": 1173.9357055664063, |
| "kl_loss_3": 818.5712066650391, |
| "kl_loss_7": 200.30800857543946, |
| "learning_rate": 0.0005632962267868747, |
| "loss": 561.8186, |
| "step": 4650 |
| }, |
| { |
| "ce_loss_10": 3.5903021335601806, |
| "ce_loss_13": 3.5294329643249513, |
| "ce_loss_2": 4.08318532705307, |
| "ce_loss_3": 3.9098427176475523, |
| "ce_loss_7": 3.6388569593429567, |
| "epoch": 0.466, |
| "grad_norm": 464.0, |
| "kl_loss_10": 95.17009468078614, |
| "kl_loss_2": 1143.232162475586, |
| "kl_loss_3": 798.761831665039, |
| "kl_loss_7": 195.75977783203126, |
| "learning_rate": 0.0005617220127763474, |
| "loss": 567.0608, |
| "step": 4660 |
| }, |
| { |
| "ce_loss_10": 3.669221520423889, |
| "ce_loss_13": 3.607930314540863, |
| "ce_loss_2": 4.160642421245575, |
| "ce_loss_3": 3.9847203373908995, |
| "ce_loss_7": 3.717066395282745, |
| "epoch": 0.467, |
| "grad_norm": 412.0, |
| "kl_loss_10": 98.76815719604492, |
| "kl_loss_2": 1153.8832275390625, |
| "kl_loss_3": 803.9543914794922, |
| "kl_loss_7": 198.99397354125978, |
| "learning_rate": 0.0005601471772258368, |
| "loss": 567.3518, |
| "step": 4670 |
| }, |
| { |
| "ce_loss_10": 3.6542641162872314, |
| "ce_loss_13": 3.593363094329834, |
| "ce_loss_2": 4.133442676067352, |
| "ce_loss_3": 3.96450389623642, |
| "ce_loss_7": 3.7022117972373962, |
| "epoch": 0.468, |
| "grad_norm": 384.0, |
| "kl_loss_10": 98.04742546081543, |
| "kl_loss_2": 1118.5282470703125, |
| "kl_loss_3": 784.399691772461, |
| "kl_loss_7": 197.338858795166, |
| "learning_rate": 0.0005585717359939192, |
| "loss": 565.1176, |
| "step": 4680 |
| }, |
| { |
| "ce_loss_10": 3.56116144657135, |
| "ce_loss_13": 3.4993683457374574, |
| "ce_loss_2": 4.055442547798156, |
| "ce_loss_3": 3.887247931957245, |
| "ce_loss_7": 3.6099945425987245, |
| "epoch": 0.469, |
| "grad_norm": 490.0, |
| "kl_loss_10": 97.45741577148438, |
| "kl_loss_2": 1149.7481964111328, |
| "kl_loss_3": 806.3391754150391, |
| "kl_loss_7": 197.63161849975586, |
| "learning_rate": 0.0005569957049452703, |
| "loss": 571.714, |
| "step": 4690 |
| }, |
| { |
| "ce_loss_10": 3.6181132555007935, |
| "ce_loss_13": 3.558199667930603, |
| "ce_loss_2": 4.1229860305786135, |
| "ce_loss_3": 3.9408149838447573, |
| "ce_loss_7": 3.668530523777008, |
| "epoch": 0.47, |
| "grad_norm": 458.0, |
| "kl_loss_10": 98.11741218566894, |
| "kl_loss_2": 1179.65732421875, |
| "kl_loss_3": 819.0914672851562, |
| "kl_loss_7": 202.21502075195312, |
| "learning_rate": 0.0005554190999505056, |
| "loss": 572.5331, |
| "step": 4700 |
| }, |
| { |
| "ce_loss_10": 3.7477443337440492, |
| "ce_loss_13": 3.6823888421058655, |
| "ce_loss_2": 4.236353850364685, |
| "ce_loss_3": 4.064246296882629, |
| "ce_loss_7": 3.7983964323997497, |
| "epoch": 0.471, |
| "grad_norm": 376.0, |
| "kl_loss_10": 101.09743614196778, |
| "kl_loss_2": 1167.4985229492188, |
| "kl_loss_3": 813.3948120117187, |
| "kl_loss_7": 205.17110900878907, |
| "learning_rate": 0.0005538419368860196, |
| "loss": 552.1318, |
| "step": 4710 |
| }, |
| { |
| "ce_loss_10": 3.670793604850769, |
| "ce_loss_13": 3.6081652998924256, |
| "ce_loss_2": 4.154720652103424, |
| "ce_loss_3": 3.986761474609375, |
| "ce_loss_7": 3.7201395988464356, |
| "epoch": 0.472, |
| "grad_norm": 416.0, |
| "kl_loss_10": 100.02058029174805, |
| "kl_loss_2": 1152.6582946777344, |
| "kl_loss_3": 806.7274566650391, |
| "kl_loss_7": 202.40063400268554, |
| "learning_rate": 0.0005522642316338268, |
| "loss": 576.1212, |
| "step": 4720 |
| }, |
| { |
| "ce_loss_10": 3.673479509353638, |
| "ce_loss_13": 3.613760471343994, |
| "ce_loss_2": 4.150910186767578, |
| "ce_loss_3": 3.981798696517944, |
| "ce_loss_7": 3.721827840805054, |
| "epoch": 0.473, |
| "grad_norm": 478.0, |
| "kl_loss_10": 99.9439712524414, |
| "kl_loss_2": 1142.4451599121094, |
| "kl_loss_3": 795.6325531005859, |
| "kl_loss_7": 199.72487106323243, |
| "learning_rate": 0.0005506860000814017, |
| "loss": 573.0671, |
| "step": 4730 |
| }, |
| { |
| "ce_loss_10": 3.700618231296539, |
| "ce_loss_13": 3.638905906677246, |
| "ce_loss_2": 4.180734276771545, |
| "ce_loss_3": 4.006302297115326, |
| "ce_loss_7": 3.7447570085525514, |
| "epoch": 0.474, |
| "grad_norm": 372.0, |
| "kl_loss_10": 99.73388938903808, |
| "kl_loss_2": 1127.7213500976563, |
| "kl_loss_3": 793.5628936767578, |
| "kl_loss_7": 197.02488555908204, |
| "learning_rate": 0.0005491072581215186, |
| "loss": 565.0697, |
| "step": 4740 |
| }, |
| { |
| "ce_loss_10": 3.706625771522522, |
| "ce_loss_13": 3.6401172399520876, |
| "ce_loss_2": 4.184090709686279, |
| "ce_loss_3": 4.019766807556152, |
| "ce_loss_7": 3.754279363155365, |
| "epoch": 0.475, |
| "grad_norm": 516.0, |
| "kl_loss_10": 103.58124504089355, |
| "kl_loss_2": 1159.682275390625, |
| "kl_loss_3": 813.5887573242187, |
| "kl_loss_7": 204.05538330078124, |
| "learning_rate": 0.0005475280216520913, |
| "loss": 556.0086, |
| "step": 4750 |
| }, |
| { |
| "ce_loss_10": 3.617805337905884, |
| "ce_loss_13": 3.5573843002319334, |
| "ce_loss_2": 4.093091154098511, |
| "ce_loss_3": 3.926499140262604, |
| "ce_loss_7": 3.664002466201782, |
| "epoch": 0.476, |
| "grad_norm": 438.0, |
| "kl_loss_10": 97.125687789917, |
| "kl_loss_2": 1118.9559478759766, |
| "kl_loss_3": 784.6352722167969, |
| "kl_loss_7": 196.01404037475587, |
| "learning_rate": 0.0005459483065760138, |
| "loss": 565.9596, |
| "step": 4760 |
| }, |
| { |
| "ce_loss_10": 3.552186381816864, |
| "ce_loss_13": 3.4902740478515626, |
| "ce_loss_2": 4.07539484500885, |
| "ce_loss_3": 3.891750192642212, |
| "ce_loss_7": 3.601547920703888, |
| "epoch": 0.477, |
| "grad_norm": 584.0, |
| "kl_loss_10": 97.89878273010254, |
| "kl_loss_2": 1199.7971740722655, |
| "kl_loss_3": 836.253662109375, |
| "kl_loss_7": 197.98745880126953, |
| "learning_rate": 0.0005443681288009991, |
| "loss": 568.1693, |
| "step": 4770 |
| }, |
| { |
| "ce_loss_10": 3.6120885968208314, |
| "ce_loss_13": 3.5525715351104736, |
| "ce_loss_2": 4.106596338748932, |
| "ce_loss_3": 3.932267451286316, |
| "ce_loss_7": 3.6594039678573607, |
| "epoch": 0.478, |
| "grad_norm": 430.0, |
| "kl_loss_10": 98.81552238464356, |
| "kl_loss_2": 1169.4871887207032, |
| "kl_loss_3": 816.0136047363281, |
| "kl_loss_7": 198.91362609863282, |
| "learning_rate": 0.0005427875042394199, |
| "loss": 570.9199, |
| "step": 4780 |
| }, |
| { |
| "ce_loss_10": 3.6413972973823547, |
| "ce_loss_13": 3.5771793842315676, |
| "ce_loss_2": 4.133682417869568, |
| "ce_loss_3": 3.9594278573989867, |
| "ce_loss_7": 3.6885754466056824, |
| "epoch": 0.479, |
| "grad_norm": 396.0, |
| "kl_loss_10": 102.98994331359863, |
| "kl_loss_2": 1166.8763580322266, |
| "kl_loss_3": 812.8268646240234, |
| "kl_loss_7": 201.2046257019043, |
| "learning_rate": 0.0005412064488081482, |
| "loss": 576.3787, |
| "step": 4790 |
| }, |
| { |
| "ce_loss_10": 3.6483134269714355, |
| "ce_loss_13": 3.5873068809509276, |
| "ce_loss_2": 4.13967661857605, |
| "ce_loss_3": 3.9646928787231444, |
| "ce_loss_7": 3.697467315196991, |
| "epoch": 0.48, |
| "grad_norm": 370.0, |
| "kl_loss_10": 99.1940761566162, |
| "kl_loss_2": 1147.4876434326172, |
| "kl_loss_3": 791.1785400390625, |
| "kl_loss_7": 197.28219909667968, |
| "learning_rate": 0.0005396249784283942, |
| "loss": 558.8872, |
| "step": 4800 |
| }, |
| { |
| "ce_loss_10": 3.675038015842438, |
| "ce_loss_13": 3.605392372608185, |
| "ce_loss_2": 4.173114275932312, |
| "ce_loss_3": 3.99793621301651, |
| "ce_loss_7": 3.719290328025818, |
| "epoch": 0.481, |
| "grad_norm": 424.0, |
| "kl_loss_10": 109.17574653625488, |
| "kl_loss_2": 1186.1647857666017, |
| "kl_loss_3": 827.1956359863282, |
| "kl_loss_7": 205.77321548461913, |
| "learning_rate": 0.0005380431090255476, |
| "loss": 574.2385, |
| "step": 4810 |
| }, |
| { |
| "ce_loss_10": 3.6580063104629517, |
| "ce_loss_13": 3.600323748588562, |
| "ce_loss_2": 4.138371276855469, |
| "ce_loss_3": 3.968156564235687, |
| "ce_loss_7": 3.705365836620331, |
| "epoch": 0.482, |
| "grad_norm": 368.0, |
| "kl_loss_10": 96.444384765625, |
| "kl_loss_2": 1126.6424652099608, |
| "kl_loss_3": 782.5597137451172, |
| "kl_loss_7": 192.58789978027343, |
| "learning_rate": 0.0005364608565290155, |
| "loss": 556.892, |
| "step": 4820 |
| }, |
| { |
| "ce_loss_10": 3.66942412853241, |
| "ce_loss_13": 3.6059840083122254, |
| "ce_loss_2": 4.159801661968231, |
| "ce_loss_3": 3.985584008693695, |
| "ce_loss_7": 3.7178696751594544, |
| "epoch": 0.483, |
| "grad_norm": 528.0, |
| "kl_loss_10": 101.1554500579834, |
| "kl_loss_2": 1154.6878021240234, |
| "kl_loss_3": 803.7564819335937, |
| "kl_loss_7": 199.8929084777832, |
| "learning_rate": 0.0005348782368720626, |
| "loss": 563.005, |
| "step": 4830 |
| }, |
| { |
| "ce_loss_10": 3.596053886413574, |
| "ce_loss_13": 3.5365728974342345, |
| "ce_loss_2": 4.080682539939881, |
| "ce_loss_3": 3.9060325980186463, |
| "ce_loss_7": 3.6432487964630127, |
| "epoch": 0.484, |
| "grad_norm": 520.0, |
| "kl_loss_10": 96.21514892578125, |
| "kl_loss_2": 1134.5447143554688, |
| "kl_loss_3": 787.3878204345704, |
| "kl_loss_7": 194.21477661132812, |
| "learning_rate": 0.000533295265991652, |
| "loss": 564.2112, |
| "step": 4840 |
| }, |
| { |
| "ce_loss_10": 3.6783321022987367, |
| "ce_loss_13": 3.6159629583358766, |
| "ce_loss_2": 4.154437899589539, |
| "ce_loss_3": 3.9877618312835694, |
| "ce_loss_7": 3.727357840538025, |
| "epoch": 0.485, |
| "grad_norm": 434.0, |
| "kl_loss_10": 97.2699405670166, |
| "kl_loss_2": 1128.611801147461, |
| "kl_loss_3": 786.6338958740234, |
| "kl_loss_7": 195.64030685424805, |
| "learning_rate": 0.0005317119598282822, |
| "loss": 554.8634, |
| "step": 4850 |
| }, |
| { |
| "ce_loss_10": 3.6783334612846375, |
| "ce_loss_13": 3.6158772826194765, |
| "ce_loss_2": 4.161105620861053, |
| "ce_loss_3": 3.9936763644218445, |
| "ce_loss_7": 3.726669430732727, |
| "epoch": 0.486, |
| "grad_norm": 500.0, |
| "kl_loss_10": 99.51188240051269, |
| "kl_loss_2": 1139.204409790039, |
| "kl_loss_3": 796.6284942626953, |
| "kl_loss_7": 197.98922119140624, |
| "learning_rate": 0.0005301283343258293, |
| "loss": 559.5733, |
| "step": 4860 |
| }, |
| { |
| "ce_loss_10": 3.739852726459503, |
| "ce_loss_13": 3.679302477836609, |
| "ce_loss_2": 4.207214975357056, |
| "ce_loss_3": 4.046137988567352, |
| "ce_loss_7": 3.7877432465553285, |
| "epoch": 0.487, |
| "grad_norm": 434.0, |
| "kl_loss_10": 98.4985725402832, |
| "kl_loss_2": 1115.5814056396484, |
| "kl_loss_3": 781.7784240722656, |
| "kl_loss_7": 195.47981796264648, |
| "learning_rate": 0.000528544405431384, |
| "loss": 548.517, |
| "step": 4870 |
| }, |
| { |
| "ce_loss_10": 3.617240381240845, |
| "ce_loss_13": 3.555454957485199, |
| "ce_loss_2": 4.122074174880981, |
| "ce_loss_3": 3.944024980068207, |
| "ce_loss_7": 3.668628621101379, |
| "epoch": 0.488, |
| "grad_norm": 432.0, |
| "kl_loss_10": 98.9582015991211, |
| "kl_loss_2": 1175.8768676757813, |
| "kl_loss_3": 814.3092010498046, |
| "kl_loss_7": 202.09591979980468, |
| "learning_rate": 0.000526960189095093, |
| "loss": 569.4682, |
| "step": 4880 |
| }, |
| { |
| "ce_loss_10": 3.5905461430549623, |
| "ce_loss_13": 3.5317755937576294, |
| "ce_loss_2": 4.075044083595276, |
| "ce_loss_3": 3.9047257542610168, |
| "ce_loss_7": 3.637452006340027, |
| "epoch": 0.489, |
| "grad_norm": 406.0, |
| "kl_loss_10": 95.30788230895996, |
| "kl_loss_2": 1125.9373596191406, |
| "kl_loss_3": 783.2284423828125, |
| "kl_loss_7": 192.63981170654296, |
| "learning_rate": 0.0005253757012699972, |
| "loss": 553.6164, |
| "step": 4890 |
| }, |
| { |
| "ce_loss_10": 3.680708420276642, |
| "ce_loss_13": 3.621255648136139, |
| "ce_loss_2": 4.161669278144837, |
| "ce_loss_3": 3.9898939728736877, |
| "ce_loss_7": 3.726882266998291, |
| "epoch": 0.49, |
| "grad_norm": 436.0, |
| "kl_loss_10": 98.59705772399903, |
| "kl_loss_2": 1136.0980651855468, |
| "kl_loss_3": 790.2571563720703, |
| "kl_loss_7": 197.4443115234375, |
| "learning_rate": 0.0005237909579118712, |
| "loss": 568.0026, |
| "step": 4900 |
| }, |
| { |
| "ce_loss_10": 3.6435038447380066, |
| "ce_loss_13": 3.581137490272522, |
| "ce_loss_2": 4.134112453460693, |
| "ce_loss_3": 3.9640262126922607, |
| "ce_loss_7": 3.6911675453186037, |
| "epoch": 0.491, |
| "grad_norm": 520.0, |
| "kl_loss_10": 99.66703796386719, |
| "kl_loss_2": 1167.6467651367188, |
| "kl_loss_3": 818.6171966552735, |
| "kl_loss_7": 200.65354614257814, |
| "learning_rate": 0.0005222059749790631, |
| "loss": 568.3183, |
| "step": 4910 |
| }, |
| { |
| "ce_loss_10": 3.7152050852775576, |
| "ce_loss_13": 3.652708613872528, |
| "ce_loss_2": 4.176082861423493, |
| "ce_loss_3": 4.013521981239319, |
| "ce_loss_7": 3.759286069869995, |
| "epoch": 0.492, |
| "grad_norm": 394.0, |
| "kl_loss_10": 100.0508934020996, |
| "kl_loss_2": 1112.6296081542969, |
| "kl_loss_3": 774.64658203125, |
| "kl_loss_7": 196.3288688659668, |
| "learning_rate": 0.0005206207684323337, |
| "loss": 544.9011, |
| "step": 4920 |
| }, |
| { |
| "ce_loss_10": 3.689722108840942, |
| "ce_loss_13": 3.6289564847946165, |
| "ce_loss_2": 4.170908105373383, |
| "ce_loss_3": 3.9987404584884643, |
| "ce_loss_7": 3.7391751527786257, |
| "epoch": 0.493, |
| "grad_norm": 368.0, |
| "kl_loss_10": 100.77400093078613, |
| "kl_loss_2": 1140.2743774414062, |
| "kl_loss_3": 795.368798828125, |
| "kl_loss_7": 200.2589553833008, |
| "learning_rate": 0.000519035354234695, |
| "loss": 567.6383, |
| "step": 4930 |
| }, |
| { |
| "ce_loss_10": 3.666009783744812, |
| "ce_loss_13": 3.603765845298767, |
| "ce_loss_2": 4.156714332103729, |
| "ce_loss_3": 3.9840614438056945, |
| "ce_loss_7": 3.7159415602684023, |
| "epoch": 0.494, |
| "grad_norm": 516.0, |
| "kl_loss_10": 99.73322830200195, |
| "kl_loss_2": 1144.6152709960938, |
| "kl_loss_3": 797.5058837890625, |
| "kl_loss_7": 199.84856643676758, |
| "learning_rate": 0.0005174497483512506, |
| "loss": 551.5833, |
| "step": 4940 |
| }, |
| { |
| "ce_loss_10": 3.715251398086548, |
| "ce_loss_13": 3.6532492995262147, |
| "ce_loss_2": 4.190750586986542, |
| "ce_loss_3": 4.017711067199707, |
| "ce_loss_7": 3.760482394695282, |
| "epoch": 0.495, |
| "grad_norm": 404.0, |
| "kl_loss_10": 99.74794273376465, |
| "kl_loss_2": 1135.6743072509767, |
| "kl_loss_3": 788.5320007324219, |
| "kl_loss_7": 197.0201416015625, |
| "learning_rate": 0.0005158639667490339, |
| "loss": 559.5508, |
| "step": 4950 |
| }, |
| { |
| "ce_loss_10": 3.60677056312561, |
| "ce_loss_13": 3.545226526260376, |
| "ce_loss_2": 4.091673123836517, |
| "ce_loss_3": 3.921009349822998, |
| "ce_loss_7": 3.6560636878013613, |
| "epoch": 0.496, |
| "grad_norm": 380.0, |
| "kl_loss_10": 97.61143035888672, |
| "kl_loss_2": 1146.4500457763672, |
| "kl_loss_3": 801.032958984375, |
| "kl_loss_7": 198.76946029663085, |
| "learning_rate": 0.0005142780253968481, |
| "loss": 559.3498, |
| "step": 4960 |
| }, |
| { |
| "ce_loss_10": 3.558833396434784, |
| "ce_loss_13": 3.498770594596863, |
| "ce_loss_2": 4.029934275150299, |
| "ce_loss_3": 3.8623911499977113, |
| "ce_loss_7": 3.605703389644623, |
| "epoch": 0.497, |
| "grad_norm": 404.0, |
| "kl_loss_10": 95.15658073425293, |
| "kl_loss_2": 1120.565899658203, |
| "kl_loss_3": 776.6140930175782, |
| "kl_loss_7": 192.40693054199218, |
| "learning_rate": 0.0005126919402651053, |
| "loss": 541.1446, |
| "step": 4970 |
| }, |
| { |
| "ce_loss_10": 3.6243564009666445, |
| "ce_loss_13": 3.562463808059692, |
| "ce_loss_2": 4.122486090660095, |
| "ce_loss_3": 3.9518114924430847, |
| "ce_loss_7": 3.6740434527397157, |
| "epoch": 0.498, |
| "grad_norm": 500.0, |
| "kl_loss_10": 98.81732482910157, |
| "kl_loss_2": 1158.3788116455078, |
| "kl_loss_3": 805.2687072753906, |
| "kl_loss_7": 198.79998626708985, |
| "learning_rate": 0.0005111057273256647, |
| "loss": 562.34, |
| "step": 4980 |
| }, |
| { |
| "ce_loss_10": 3.736222839355469, |
| "ce_loss_13": 3.676733374595642, |
| "ce_loss_2": 4.189973556995392, |
| "ce_loss_3": 4.022429513931274, |
| "ce_loss_7": 3.7769731283187866, |
| "epoch": 0.499, |
| "grad_norm": 396.0, |
| "kl_loss_10": 98.13356437683106, |
| "kl_loss_2": 1078.4886474609375, |
| "kl_loss_3": 748.3916412353516, |
| "kl_loss_7": 191.23028793334962, |
| "learning_rate": 0.0005095194025516733, |
| "loss": 536.8887, |
| "step": 4990 |
| }, |
| { |
| "ce_loss_10": 3.6507428646087647, |
| "ce_loss_13": 3.592644715309143, |
| "ce_loss_2": 4.122073376178742, |
| "ce_loss_3": 3.9521225333213805, |
| "ce_loss_7": 3.697298324108124, |
| "epoch": 0.5, |
| "grad_norm": 378.0, |
| "kl_loss_10": 95.96725730895996, |
| "kl_loss_2": 1110.4840362548828, |
| "kl_loss_3": 769.709603881836, |
| "kl_loss_7": 192.08199310302734, |
| "learning_rate": 0.000507932981917404, |
| "loss": 562.5593, |
| "step": 5000 |
| }, |
| { |
| "ce_loss_10": 3.609897780418396, |
| "ce_loss_13": 3.5468419432640075, |
| "ce_loss_2": 4.115197873115539, |
| "ce_loss_3": 3.9347579956054686, |
| "ce_loss_7": 3.6594788432121277, |
| "epoch": 0.501, |
| "grad_norm": 496.0, |
| "kl_loss_10": 102.02307662963867, |
| "kl_loss_2": 1185.6702362060546, |
| "kl_loss_3": 822.8478820800781, |
| "kl_loss_7": 202.77078170776366, |
| "learning_rate": 0.0005063464813980949, |
| "loss": 576.005, |
| "step": 5010 |
| }, |
| { |
| "ce_loss_10": 3.595167326927185, |
| "ce_loss_13": 3.534419858455658, |
| "ce_loss_2": 4.08291003704071, |
| "ce_loss_3": 3.910551607608795, |
| "ce_loss_7": 3.6416044354438784, |
| "epoch": 0.502, |
| "grad_norm": 366.0, |
| "kl_loss_10": 98.82206382751465, |
| "kl_loss_2": 1157.9163513183594, |
| "kl_loss_3": 802.2986022949219, |
| "kl_loss_7": 196.4967498779297, |
| "learning_rate": 0.0005047599169697884, |
| "loss": 557.0335, |
| "step": 5020 |
| }, |
| { |
| "ce_loss_10": 3.5276883602142335, |
| "ce_loss_13": 3.469167137145996, |
| "ce_loss_2": 4.028472435474396, |
| "ce_loss_3": 3.8497302412986754, |
| "ce_loss_7": 3.5778237104415895, |
| "epoch": 0.503, |
| "grad_norm": 544.0, |
| "kl_loss_10": 95.17037048339844, |
| "kl_loss_2": 1142.5230926513673, |
| "kl_loss_3": 789.8021270751954, |
| "kl_loss_7": 195.37155456542968, |
| "learning_rate": 0.000503173304609171, |
| "loss": 545.4258, |
| "step": 5030 |
| }, |
| { |
| "ce_loss_10": 3.6576398611068726, |
| "ce_loss_13": 3.5950983643531798, |
| "ce_loss_2": 4.14467431306839, |
| "ce_loss_3": 3.9757012486457826, |
| "ce_loss_7": 3.7055052399635313, |
| "epoch": 0.504, |
| "grad_norm": 482.0, |
| "kl_loss_10": 98.4008186340332, |
| "kl_loss_2": 1135.7276794433594, |
| "kl_loss_3": 789.9247985839844, |
| "kl_loss_7": 196.23304824829103, |
| "learning_rate": 0.0005015866602934111, |
| "loss": 552.1605, |
| "step": 5040 |
| }, |
| { |
| "ce_loss_10": 3.621449387073517, |
| "ce_loss_13": 3.5583016514778136, |
| "ce_loss_2": 4.125820016860962, |
| "ce_loss_3": 3.9470208525657653, |
| "ce_loss_7": 3.6696552276611327, |
| "epoch": 0.505, |
| "grad_norm": 386.0, |
| "kl_loss_10": 101.05188751220703, |
| "kl_loss_2": 1170.8730712890624, |
| "kl_loss_3": 822.174462890625, |
| "kl_loss_7": 203.6134246826172, |
| "learning_rate": 0.0005, |
| "loss": 564.1666, |
| "step": 5050 |
| }, |
| { |
| "ce_loss_10": 3.608661472797394, |
| "ce_loss_13": 3.549720525741577, |
| "ce_loss_2": 4.094336903095245, |
| "ce_loss_3": 3.921399199962616, |
| "ce_loss_7": 3.6561817049980165, |
| "epoch": 0.506, |
| "grad_norm": 532.0, |
| "kl_loss_10": 97.96763725280762, |
| "kl_loss_2": 1147.6109741210937, |
| "kl_loss_3": 799.4344543457031, |
| "kl_loss_7": 197.70511016845703, |
| "learning_rate": 0.0004984133397065889, |
| "loss": 551.9219, |
| "step": 5060 |
| }, |
| { |
| "ce_loss_10": 3.619631803035736, |
| "ce_loss_13": 3.5591482758522033, |
| "ce_loss_2": 4.1191855549812315, |
| "ce_loss_3": 3.947730815410614, |
| "ce_loss_7": 3.671154284477234, |
| "epoch": 0.507, |
| "grad_norm": 420.0, |
| "kl_loss_10": 98.14169616699219, |
| "kl_loss_2": 1152.0039337158203, |
| "kl_loss_3": 803.1968292236328, |
| "kl_loss_7": 198.87692565917968, |
| "learning_rate": 0.0004968266953908291, |
| "loss": 554.0305, |
| "step": 5070 |
| }, |
| { |
| "ce_loss_10": 3.6628435134887694, |
| "ce_loss_13": 3.6024859309196473, |
| "ce_loss_2": 4.145783054828644, |
| "ce_loss_3": 3.972540259361267, |
| "ce_loss_7": 3.7080691695213317, |
| "epoch": 0.508, |
| "grad_norm": 532.0, |
| "kl_loss_10": 98.82306175231933, |
| "kl_loss_2": 1137.6268676757813, |
| "kl_loss_3": 795.5397338867188, |
| "kl_loss_7": 194.52870864868163, |
| "learning_rate": 0.0004952400830302117, |
| "loss": 554.9051, |
| "step": 5080 |
| }, |
| { |
| "ce_loss_10": 3.585409712791443, |
| "ce_loss_13": 3.525643265247345, |
| "ce_loss_2": 4.091677510738373, |
| "ce_loss_3": 3.9131953358650207, |
| "ce_loss_7": 3.6364392280578612, |
| "epoch": 0.509, |
| "grad_norm": 412.0, |
| "kl_loss_10": 98.62568626403808, |
| "kl_loss_2": 1168.942919921875, |
| "kl_loss_3": 811.3192687988281, |
| "kl_loss_7": 199.42913665771485, |
| "learning_rate": 0.0004936535186019053, |
| "loss": 559.6511, |
| "step": 5090 |
| }, |
| { |
| "ce_loss_10": 3.6907896161079408, |
| "ce_loss_13": 3.62961208820343, |
| "ce_loss_2": 4.153078198432922, |
| "ce_loss_3": 3.9874324560165406, |
| "ce_loss_7": 3.735322892665863, |
| "epoch": 0.51, |
| "grad_norm": 376.0, |
| "kl_loss_10": 97.42878112792968, |
| "kl_loss_2": 1101.246890258789, |
| "kl_loss_3": 771.5801239013672, |
| "kl_loss_7": 192.14101791381836, |
| "learning_rate": 0.000492067018082596, |
| "loss": 549.3435, |
| "step": 5100 |
| }, |
| { |
| "ce_loss_10": 3.6234113693237306, |
| "ce_loss_13": 3.55826051235199, |
| "ce_loss_2": 4.134788942337036, |
| "ce_loss_3": 3.9512638211250306, |
| "ce_loss_7": 3.673302376270294, |
| "epoch": 0.511, |
| "grad_norm": 358.0, |
| "kl_loss_10": 100.71795692443848, |
| "kl_loss_2": 1184.7957580566406, |
| "kl_loss_3": 822.3129302978516, |
| "kl_loss_7": 201.37216567993164, |
| "learning_rate": 0.0004904805974483267, |
| "loss": 578.112, |
| "step": 5110 |
| }, |
| { |
| "ce_loss_10": 3.73909273147583, |
| "ce_loss_13": 3.6729060292243956, |
| "ce_loss_2": 4.232535266876221, |
| "ce_loss_3": 4.064092624187469, |
| "ce_loss_7": 3.78980005979538, |
| "epoch": 0.512, |
| "grad_norm": 418.0, |
| "kl_loss_10": 103.6674789428711, |
| "kl_loss_2": 1170.0532684326172, |
| "kl_loss_3": 824.2820373535156, |
| "kl_loss_7": 206.52156982421874, |
| "learning_rate": 0.0004888942726743353, |
| "loss": 580.3403, |
| "step": 5120 |
| }, |
| { |
| "ce_loss_10": 3.6079283952713013, |
| "ce_loss_13": 3.5456172823905945, |
| "ce_loss_2": 4.103336191177368, |
| "ce_loss_3": 3.9267752170562744, |
| "ce_loss_7": 3.655103015899658, |
| "epoch": 0.513, |
| "grad_norm": 378.0, |
| "kl_loss_10": 97.65564994812011, |
| "kl_loss_2": 1156.2654846191406, |
| "kl_loss_3": 800.4834381103516, |
| "kl_loss_7": 198.76654281616212, |
| "learning_rate": 0.0004873080597348947, |
| "loss": 561.8108, |
| "step": 5130 |
| }, |
| { |
| "ce_loss_10": 3.492985022068024, |
| "ce_loss_13": 3.433611583709717, |
| "ce_loss_2": 4.009678089618683, |
| "ce_loss_3": 3.82467257976532, |
| "ce_loss_7": 3.543225371837616, |
| "epoch": 0.514, |
| "grad_norm": 440.0, |
| "kl_loss_10": 96.83905181884765, |
| "kl_loss_2": 1194.322329711914, |
| "kl_loss_3": 828.9491943359375, |
| "kl_loss_7": 198.22924575805663, |
| "learning_rate": 0.0004857219746031519, |
| "loss": 567.8251, |
| "step": 5140 |
| }, |
| { |
| "ce_loss_10": 3.6722797036170958, |
| "ce_loss_13": 3.6109776854515077, |
| "ce_loss_2": 4.149738478660583, |
| "ce_loss_3": 3.975986909866333, |
| "ce_loss_7": 3.7163102626800537, |
| "epoch": 0.515, |
| "grad_norm": 430.0, |
| "kl_loss_10": 99.9472442626953, |
| "kl_loss_2": 1140.7201843261719, |
| "kl_loss_3": 787.3806091308594, |
| "kl_loss_7": 197.54812469482422, |
| "learning_rate": 0.0004841360332509663, |
| "loss": 556.8349, |
| "step": 5150 |
| }, |
| { |
| "ce_loss_10": 3.6183668613433837, |
| "ce_loss_13": 3.5591975688934325, |
| "ce_loss_2": 4.100240254402161, |
| "ce_loss_3": 3.9269237518310547, |
| "ce_loss_7": 3.6642425417900086, |
| "epoch": 0.516, |
| "grad_norm": 366.0, |
| "kl_loss_10": 93.92010688781738, |
| "kl_loss_2": 1122.7465362548828, |
| "kl_loss_3": 778.0984069824219, |
| "kl_loss_7": 191.03939056396484, |
| "learning_rate": 0.0004825502516487497, |
| "loss": 537.9487, |
| "step": 5160 |
| }, |
| { |
| "ce_loss_10": 3.5835310339927675, |
| "ce_loss_13": 3.523791456222534, |
| "ce_loss_2": 4.082003366947174, |
| "ce_loss_3": 3.908873450756073, |
| "ce_loss_7": 3.634874391555786, |
| "epoch": 0.517, |
| "grad_norm": 608.0, |
| "kl_loss_10": 99.05728721618652, |
| "kl_loss_2": 1155.0127502441405, |
| "kl_loss_3": 805.5277587890625, |
| "kl_loss_7": 198.6641098022461, |
| "learning_rate": 0.00048096464576530507, |
| "loss": 561.8511, |
| "step": 5170 |
| }, |
| { |
| "ce_loss_10": 3.6886157989501953, |
| "ce_loss_13": 3.628003740310669, |
| "ce_loss_2": 4.146280741691589, |
| "ce_loss_3": 3.9846285343170167, |
| "ce_loss_7": 3.731534945964813, |
| "epoch": 0.518, |
| "grad_norm": 390.0, |
| "kl_loss_10": 98.92878913879395, |
| "kl_loss_2": 1103.851336669922, |
| "kl_loss_3": 767.5214813232421, |
| "kl_loss_7": 193.13973236083984, |
| "learning_rate": 0.00047937923156766646, |
| "loss": 544.8563, |
| "step": 5180 |
| }, |
| { |
| "ce_loss_10": 3.737223446369171, |
| "ce_loss_13": 3.6758363366127016, |
| "ce_loss_2": 4.200218558311462, |
| "ce_loss_3": 4.037039196491241, |
| "ce_loss_7": 3.7829922437667847, |
| "epoch": 0.519, |
| "grad_norm": 428.0, |
| "kl_loss_10": 102.72743797302246, |
| "kl_loss_2": 1108.4752288818358, |
| "kl_loss_3": 772.8697265625, |
| "kl_loss_7": 198.5632797241211, |
| "learning_rate": 0.00047779402502093696, |
| "loss": 549.91, |
| "step": 5190 |
| }, |
| { |
| "ce_loss_10": 3.703013610839844, |
| "ce_loss_13": 3.640911114215851, |
| "ce_loss_2": 4.174945414066315, |
| "ce_loss_3": 4.009368169307709, |
| "ce_loss_7": 3.7497113823890684, |
| "epoch": 0.52, |
| "grad_norm": 478.0, |
| "kl_loss_10": 99.68995170593261, |
| "kl_loss_2": 1110.2117858886718, |
| "kl_loss_3": 777.3010894775391, |
| "kl_loss_7": 196.47792434692383, |
| "learning_rate": 0.0004762090420881289, |
| "loss": 553.7422, |
| "step": 5200 |
| }, |
| { |
| "ce_loss_10": 3.6182032585144044, |
| "ce_loss_13": 3.5570725202560425, |
| "ce_loss_2": 4.098654413223267, |
| "ce_loss_3": 3.916290044784546, |
| "ce_loss_7": 3.665347421169281, |
| "epoch": 0.521, |
| "grad_norm": 426.0, |
| "kl_loss_10": 98.28518867492676, |
| "kl_loss_2": 1126.3521606445313, |
| "kl_loss_3": 772.9946044921875, |
| "kl_loss_7": 193.74108428955077, |
| "learning_rate": 0.00047462429873000296, |
| "loss": 544.104, |
| "step": 5210 |
| }, |
| { |
| "ce_loss_10": 3.7033097624778746, |
| "ce_loss_13": 3.6430840730667113, |
| "ce_loss_2": 4.168367850780487, |
| "ce_loss_3": 3.9993362069129943, |
| "ce_loss_7": 3.74978985786438, |
| "epoch": 0.522, |
| "grad_norm": 412.0, |
| "kl_loss_10": 98.88156356811524, |
| "kl_loss_2": 1115.6398986816407, |
| "kl_loss_3": 774.026156616211, |
| "kl_loss_7": 195.32233123779298, |
| "learning_rate": 0.0004730398109049071, |
| "loss": 547.7821, |
| "step": 5220 |
| }, |
| { |
| "ce_loss_10": 3.633508253097534, |
| "ce_loss_13": 3.5716773152351378, |
| "ce_loss_2": 4.128389453887939, |
| "ce_loss_3": 3.9533074378967283, |
| "ce_loss_7": 3.6823344349861147, |
| "epoch": 0.523, |
| "grad_norm": 396.0, |
| "kl_loss_10": 98.93126792907715, |
| "kl_loss_2": 1163.846746826172, |
| "kl_loss_3": 810.2734771728516, |
| "kl_loss_7": 200.85460052490234, |
| "learning_rate": 0.000471455594568616, |
| "loss": 558.1328, |
| "step": 5230 |
| }, |
| { |
| "ce_loss_10": 3.707250881195068, |
| "ce_loss_13": 3.6447718501091004, |
| "ce_loss_2": 4.174321246147156, |
| "ce_loss_3": 4.004381275177002, |
| "ce_loss_7": 3.753636956214905, |
| "epoch": 0.524, |
| "grad_norm": 394.0, |
| "kl_loss_10": 100.72676544189453, |
| "kl_loss_2": 1114.3457427978515, |
| "kl_loss_3": 768.1556701660156, |
| "kl_loss_7": 195.28340759277344, |
| "learning_rate": 0.00046987166567417086, |
| "loss": 552.4388, |
| "step": 5240 |
| }, |
| { |
| "ce_loss_10": 3.6187984108924867, |
| "ce_loss_13": 3.5605034112930296, |
| "ce_loss_2": 4.1001020789146425, |
| "ce_loss_3": 3.9256922364234925, |
| "ce_loss_7": 3.664110267162323, |
| "epoch": 0.525, |
| "grad_norm": 380.0, |
| "kl_loss_10": 95.83710632324218, |
| "kl_loss_2": 1120.3159301757812, |
| "kl_loss_3": 775.0560852050781, |
| "kl_loss_7": 192.1679656982422, |
| "learning_rate": 0.00046828804017171776, |
| "loss": 536.3316, |
| "step": 5250 |
| }, |
| { |
| "ce_loss_10": 3.6720359563827514, |
| "ce_loss_13": 3.6088499784469605, |
| "ce_loss_2": 4.162907612323761, |
| "ce_loss_3": 3.9896105885505677, |
| "ce_loss_7": 3.722712779045105, |
| "epoch": 0.526, |
| "grad_norm": 394.0, |
| "kl_loss_10": 98.17714996337891, |
| "kl_loss_2": 1138.502374267578, |
| "kl_loss_3": 789.8116973876953, |
| "kl_loss_7": 197.40582656860352, |
| "learning_rate": 0.00046670473400834805, |
| "loss": 559.8189, |
| "step": 5260 |
| }, |
| { |
| "ce_loss_10": 3.597737526893616, |
| "ce_loss_13": 3.5393651485443116, |
| "ce_loss_2": 4.074982023239135, |
| "ce_loss_3": 3.9021154403686524, |
| "ce_loss_7": 3.644618010520935, |
| "epoch": 0.527, |
| "grad_norm": 436.0, |
| "kl_loss_10": 95.52880744934082, |
| "kl_loss_2": 1111.367953491211, |
| "kl_loss_3": 768.6636322021484, |
| "kl_loss_7": 191.67658157348632, |
| "learning_rate": 0.00046512176312793734, |
| "loss": 559.1187, |
| "step": 5270 |
| }, |
| { |
| "ce_loss_10": 3.5923956394195558, |
| "ce_loss_13": 3.5312567353248596, |
| "ce_loss_2": 4.0659032464027405, |
| "ce_loss_3": 3.9041757225990295, |
| "ce_loss_7": 3.638344919681549, |
| "epoch": 0.528, |
| "grad_norm": 382.0, |
| "kl_loss_10": 95.8816967010498, |
| "kl_loss_2": 1131.7323181152344, |
| "kl_loss_3": 788.8408813476562, |
| "kl_loss_7": 193.95931167602538, |
| "learning_rate": 0.00046353914347098467, |
| "loss": 557.7083, |
| "step": 5280 |
| }, |
| { |
| "ce_loss_10": 3.688094747066498, |
| "ce_loss_13": 3.626521134376526, |
| "ce_loss_2": 4.17344571352005, |
| "ce_loss_3": 3.9936492323875425, |
| "ce_loss_7": 3.7344411969184876, |
| "epoch": 0.529, |
| "grad_norm": 438.0, |
| "kl_loss_10": 99.97393112182617, |
| "kl_loss_2": 1136.7248291015626, |
| "kl_loss_3": 780.4773040771485, |
| "kl_loss_7": 194.1311233520508, |
| "learning_rate": 0.0004619568909744524, |
| "loss": 554.6544, |
| "step": 5290 |
| }, |
| { |
| "ce_loss_10": 3.6992242336273193, |
| "ce_loss_13": 3.6374841570854186, |
| "ce_loss_2": 4.173903214931488, |
| "ce_loss_3": 4.004681324958801, |
| "ce_loss_7": 3.7441007494926453, |
| "epoch": 0.53, |
| "grad_norm": 496.0, |
| "kl_loss_10": 100.66301612854004, |
| "kl_loss_2": 1118.1583740234375, |
| "kl_loss_3": 778.1599609375, |
| "kl_loss_7": 195.17978057861328, |
| "learning_rate": 0.00046037502157160573, |
| "loss": 555.7068, |
| "step": 5300 |
| }, |
| { |
| "ce_loss_10": 3.5648537158966063, |
| "ce_loss_13": 3.50801477432251, |
| "ce_loss_2": 4.0505608201026915, |
| "ce_loss_3": 3.885770845413208, |
| "ce_loss_7": 3.614854156970978, |
| "epoch": 0.531, |
| "grad_norm": 392.0, |
| "kl_loss_10": 95.29824142456054, |
| "kl_loss_2": 1148.0569580078125, |
| "kl_loss_3": 803.5360778808594, |
| "kl_loss_7": 195.23088302612305, |
| "learning_rate": 0.00045879355119185207, |
| "loss": 559.6594, |
| "step": 5310 |
| }, |
| { |
| "ce_loss_10": 3.6439425349235535, |
| "ce_loss_13": 3.583683359622955, |
| "ce_loss_2": 4.135701584815979, |
| "ce_loss_3": 3.9598298192024233, |
| "ce_loss_7": 3.692049765586853, |
| "epoch": 0.532, |
| "grad_norm": 444.0, |
| "kl_loss_10": 97.83190078735352, |
| "kl_loss_2": 1160.7438171386718, |
| "kl_loss_3": 807.647915649414, |
| "kl_loss_7": 199.49599685668946, |
| "learning_rate": 0.0004572124957605803, |
| "loss": 565.4321, |
| "step": 5320 |
| }, |
| { |
| "ce_loss_10": 3.6681848645210264, |
| "ce_loss_13": 3.607477676868439, |
| "ce_loss_2": 4.14128270149231, |
| "ce_loss_3": 3.9746485590934753, |
| "ce_loss_7": 3.7138744235038756, |
| "epoch": 0.533, |
| "grad_norm": 340.0, |
| "kl_loss_10": 95.41666564941406, |
| "kl_loss_2": 1136.1244140625, |
| "kl_loss_3": 793.3468963623047, |
| "kl_loss_7": 195.33221740722655, |
| "learning_rate": 0.00045563187119900103, |
| "loss": 550.4382, |
| "step": 5330 |
| }, |
| { |
| "ce_loss_10": 3.5087064266204835, |
| "ce_loss_13": 3.4494638442993164, |
| "ce_loss_2": 4.00373204946518, |
| "ce_loss_3": 3.8344790935516357, |
| "ce_loss_7": 3.5566913962364195, |
| "epoch": 0.534, |
| "grad_norm": 456.0, |
| "kl_loss_10": 96.30420112609863, |
| "kl_loss_2": 1145.2862731933594, |
| "kl_loss_3": 803.7556610107422, |
| "kl_loss_7": 194.92612915039064, |
| "learning_rate": 0.00045405169342398633, |
| "loss": 560.8537, |
| "step": 5340 |
| }, |
| { |
| "ce_loss_10": 3.5990882992744444, |
| "ce_loss_13": 3.535432243347168, |
| "ce_loss_2": 4.08842386007309, |
| "ce_loss_3": 3.912948155403137, |
| "ce_loss_7": 3.6465937376022337, |
| "epoch": 0.535, |
| "grad_norm": 422.0, |
| "kl_loss_10": 99.51773872375489, |
| "kl_loss_2": 1142.4013549804688, |
| "kl_loss_3": 795.5528442382813, |
| "kl_loss_7": 196.72316284179686, |
| "learning_rate": 0.0004524719783479088, |
| "loss": 548.8232, |
| "step": 5350 |
| }, |
| { |
| "ce_loss_10": 3.552276241779327, |
| "ce_loss_13": 3.492251825332642, |
| "ce_loss_2": 4.056445682048798, |
| "ce_loss_3": 3.8783608794212343, |
| "ce_loss_7": 3.603780543804169, |
| "epoch": 0.536, |
| "grad_norm": 376.0, |
| "kl_loss_10": 97.24302253723144, |
| "kl_loss_2": 1164.848809814453, |
| "kl_loss_3": 811.2062194824218, |
| "kl_loss_7": 198.37730560302734, |
| "learning_rate": 0.00045089274187848144, |
| "loss": 554.2202, |
| "step": 5360 |
| }, |
| { |
| "ce_loss_10": 3.6724863052368164, |
| "ce_loss_13": 3.6130531072616576, |
| "ce_loss_2": 4.1379453301429745, |
| "ce_loss_3": 3.968498194217682, |
| "ce_loss_7": 3.717296040058136, |
| "epoch": 0.537, |
| "grad_norm": 536.0, |
| "kl_loss_10": 96.28798866271973, |
| "kl_loss_2": 1108.8939270019532, |
| "kl_loss_3": 770.5279510498046, |
| "kl_loss_7": 192.69188079833984, |
| "learning_rate": 0.00044931399991859835, |
| "loss": 545.4216, |
| "step": 5370 |
| }, |
| { |
| "ce_loss_10": 3.5360588788986207, |
| "ce_loss_13": 3.474487328529358, |
| "ce_loss_2": 4.018628227710724, |
| "ce_loss_3": 3.8429470539093016, |
| "ce_loss_7": 3.5856809496879576, |
| "epoch": 0.538, |
| "grad_norm": 446.0, |
| "kl_loss_10": 97.58423805236816, |
| "kl_loss_2": 1139.092123413086, |
| "kl_loss_3": 788.7141876220703, |
| "kl_loss_7": 196.66349868774415, |
| "learning_rate": 0.00044773576836617336, |
| "loss": 546.6951, |
| "step": 5380 |
| }, |
| { |
| "ce_loss_10": 3.6238678693771362, |
| "ce_loss_13": 3.5626631021499633, |
| "ce_loss_2": 4.120850419998169, |
| "ce_loss_3": 3.943516790866852, |
| "ce_loss_7": 3.6712807416915894, |
| "epoch": 0.539, |
| "grad_norm": 388.0, |
| "kl_loss_10": 99.70593795776367, |
| "kl_loss_2": 1163.2907775878907, |
| "kl_loss_3": 810.1283508300781, |
| "kl_loss_7": 199.7040023803711, |
| "learning_rate": 0.00044615806311398056, |
| "loss": 569.078, |
| "step": 5390 |
| }, |
| { |
| "ce_loss_10": 3.706363093852997, |
| "ce_loss_13": 3.6457801342010496, |
| "ce_loss_2": 4.146688032150268, |
| "ce_loss_3": 3.9897242546081544, |
| "ce_loss_7": 3.7506498098373413, |
| "epoch": 0.54, |
| "grad_norm": 318.0, |
| "kl_loss_10": 98.56370239257812, |
| "kl_loss_2": 1084.4558197021483, |
| "kl_loss_3": 756.0719848632813, |
| "kl_loss_7": 191.6246208190918, |
| "learning_rate": 0.00044458090004949454, |
| "loss": 551.6847, |
| "step": 5400 |
| }, |
| { |
| "ce_loss_10": 3.5594072341918945, |
| "ce_loss_13": 3.4980836510658264, |
| "ce_loss_2": 4.072906112670898, |
| "ce_loss_3": 3.8963231086730956, |
| "ce_loss_7": 3.6096426606178285, |
| "epoch": 0.541, |
| "grad_norm": 490.0, |
| "kl_loss_10": 98.93370399475097, |
| "kl_loss_2": 1204.406317138672, |
| "kl_loss_3": 841.533901977539, |
| "kl_loss_7": 202.28990631103517, |
| "learning_rate": 0.0004430042950547297, |
| "loss": 563.3182, |
| "step": 5410 |
| }, |
| { |
| "ce_loss_10": 3.656948244571686, |
| "ce_loss_13": 3.5917163252830506, |
| "ce_loss_2": 4.146977603435516, |
| "ce_loss_3": 3.9775506377220156, |
| "ce_loss_7": 3.7048157334327696, |
| "epoch": 0.542, |
| "grad_norm": 472.0, |
| "kl_loss_10": 100.26595115661621, |
| "kl_loss_2": 1150.8060424804687, |
| "kl_loss_3": 803.8866760253907, |
| "kl_loss_7": 200.08724365234374, |
| "learning_rate": 0.0004414282640060809, |
| "loss": 559.1381, |
| "step": 5420 |
| }, |
| { |
| "ce_loss_10": 3.7556936740875244, |
| "ce_loss_13": 3.690820097923279, |
| "ce_loss_2": 4.2162927985191345, |
| "ce_loss_3": 4.059760391712189, |
| "ce_loss_7": 3.7993207812309264, |
| "epoch": 0.543, |
| "grad_norm": 466.0, |
| "kl_loss_10": 100.5603858947754, |
| "kl_loss_2": 1102.3566284179688, |
| "kl_loss_3": 774.5157104492188, |
| "kl_loss_7": 196.8573425292969, |
| "learning_rate": 0.0004398528227741633, |
| "loss": 566.5525, |
| "step": 5430 |
| }, |
| { |
| "ce_loss_10": 3.6126871943473815, |
| "ce_loss_13": 3.553126609325409, |
| "ce_loss_2": 4.1005645275115965, |
| "ce_loss_3": 3.9280160546302794, |
| "ce_loss_7": 3.660943078994751, |
| "epoch": 0.544, |
| "grad_norm": 458.0, |
| "kl_loss_10": 97.1538932800293, |
| "kl_loss_2": 1131.997964477539, |
| "kl_loss_3": 791.6496276855469, |
| "kl_loss_7": 198.33607559204103, |
| "learning_rate": 0.00043827798722365264, |
| "loss": 560.7217, |
| "step": 5440 |
| }, |
| { |
| "ce_loss_10": 3.744398605823517, |
| "ce_loss_13": 3.681015205383301, |
| "ce_loss_2": 4.201505517959594, |
| "ce_loss_3": 4.03549770116806, |
| "ce_loss_7": 3.788591706752777, |
| "epoch": 0.545, |
| "grad_norm": 352.0, |
| "kl_loss_10": 99.98037643432617, |
| "kl_loss_2": 1095.4162628173829, |
| "kl_loss_3": 762.5562530517578, |
| "kl_loss_7": 196.50249557495118, |
| "learning_rate": 0.00043670377321312535, |
| "loss": 539.1079, |
| "step": 5450 |
| }, |
| { |
| "ce_loss_10": 3.7459957599639893, |
| "ce_loss_13": 3.6846879959106444, |
| "ce_loss_2": 4.2025530457496645, |
| "ce_loss_3": 4.042814528942108, |
| "ce_loss_7": 3.789257228374481, |
| "epoch": 0.546, |
| "grad_norm": 346.0, |
| "kl_loss_10": 99.90774993896484, |
| "kl_loss_2": 1095.3400299072266, |
| "kl_loss_3": 761.9524017333985, |
| "kl_loss_7": 193.25130310058594, |
| "learning_rate": 0.0004351301965948991, |
| "loss": 550.9912, |
| "step": 5460 |
| }, |
| { |
| "ce_loss_10": 3.6544747233390806, |
| "ce_loss_13": 3.5925102829933167, |
| "ce_loss_2": 4.1156612753868105, |
| "ce_loss_3": 3.9492591619491577, |
| "ce_loss_7": 3.700915348529816, |
| "epoch": 0.547, |
| "grad_norm": 446.0, |
| "kl_loss_10": 99.69101219177246, |
| "kl_loss_2": 1097.9489288330078, |
| "kl_loss_3": 763.9795166015625, |
| "kl_loss_7": 193.2705093383789, |
| "learning_rate": 0.000433557273214873, |
| "loss": 548.6603, |
| "step": 5470 |
| }, |
| { |
| "ce_loss_10": 3.6407829880714417, |
| "ce_loss_13": 3.58055636882782, |
| "ce_loss_2": 4.112010169029236, |
| "ce_loss_3": 3.9410730838775634, |
| "ce_loss_7": 3.6900732636451723, |
| "epoch": 0.548, |
| "grad_norm": 364.0, |
| "kl_loss_10": 96.30272674560547, |
| "kl_loss_2": 1104.9110717773438, |
| "kl_loss_3": 764.0930358886719, |
| "kl_loss_7": 193.28277206420898, |
| "learning_rate": 0.000431985018912368, |
| "loss": 539.9292, |
| "step": 5480 |
| }, |
| { |
| "ce_loss_10": 3.6089709639549254, |
| "ce_loss_13": 3.5466750621795655, |
| "ce_loss_2": 4.105600357055664, |
| "ce_loss_3": 3.9258901715278625, |
| "ce_loss_7": 3.658845567703247, |
| "epoch": 0.549, |
| "grad_norm": 428.0, |
| "kl_loss_10": 98.85242919921875, |
| "kl_loss_2": 1163.1305419921875, |
| "kl_loss_3": 809.7076019287109, |
| "kl_loss_7": 198.85261154174805, |
| "learning_rate": 0.0004304134495199674, |
| "loss": 550.7034, |
| "step": 5490 |
| }, |
| { |
| "ce_loss_10": 3.638536274433136, |
| "ce_loss_13": 3.575793814659119, |
| "ce_loss_2": 4.123488712310791, |
| "ce_loss_3": 3.954343330860138, |
| "ce_loss_7": 3.685023546218872, |
| "epoch": 0.55, |
| "grad_norm": 488.0, |
| "kl_loss_10": 99.10371284484863, |
| "kl_loss_2": 1163.9283081054687, |
| "kl_loss_3": 806.7497436523438, |
| "kl_loss_7": 200.15425338745118, |
| "learning_rate": 0.0004288425808633575, |
| "loss": 555.8719, |
| "step": 5500 |
| }, |
| { |
| "ce_loss_10": 3.6068961024284363, |
| "ce_loss_13": 3.5489359140396117, |
| "ce_loss_2": 4.091926336288452, |
| "ce_loss_3": 3.914480412006378, |
| "ce_loss_7": 3.653805840015411, |
| "epoch": 0.551, |
| "grad_norm": 482.0, |
| "kl_loss_10": 95.30807762145996, |
| "kl_loss_2": 1135.6305114746094, |
| "kl_loss_3": 782.8162139892578, |
| "kl_loss_7": 192.36727905273438, |
| "learning_rate": 0.0004272724287611684, |
| "loss": 551.1164, |
| "step": 5510 |
| }, |
| { |
| "ce_loss_10": 3.5843793511390687, |
| "ce_loss_13": 3.5220483541488647, |
| "ce_loss_2": 4.066782796382904, |
| "ce_loss_3": 3.8880024194717406, |
| "ce_loss_7": 3.628884470462799, |
| "epoch": 0.552, |
| "grad_norm": 472.0, |
| "kl_loss_10": 98.19914245605469, |
| "kl_loss_2": 1138.4930938720704, |
| "kl_loss_3": 792.6924499511719, |
| "kl_loss_7": 197.34004135131835, |
| "learning_rate": 0.00042570300902481425, |
| "loss": 550.9366, |
| "step": 5520 |
| }, |
| { |
| "ce_loss_10": 3.6187870144844054, |
| "ce_loss_13": 3.559086096286774, |
| "ce_loss_2": 4.0836735486984255, |
| "ce_loss_3": 3.913509225845337, |
| "ce_loss_7": 3.662268269062042, |
| "epoch": 0.553, |
| "grad_norm": 460.0, |
| "kl_loss_10": 96.8458236694336, |
| "kl_loss_2": 1113.29208984375, |
| "kl_loss_3": 778.9167602539062, |
| "kl_loss_7": 192.73130722045897, |
| "learning_rate": 0.00042413433745833423, |
| "loss": 545.5068, |
| "step": 5530 |
| }, |
| { |
| "ce_loss_10": 3.6217783451080323, |
| "ce_loss_13": 3.5588382482528687, |
| "ce_loss_2": 4.102611029148102, |
| "ce_loss_3": 3.9288668751716616, |
| "ce_loss_7": 3.667692792415619, |
| "epoch": 0.554, |
| "grad_norm": 394.0, |
| "kl_loss_10": 99.64076881408691, |
| "kl_loss_2": 1129.861962890625, |
| "kl_loss_3": 781.159780883789, |
| "kl_loss_7": 194.5426254272461, |
| "learning_rate": 0.0004225664298582339, |
| "loss": 538.3319, |
| "step": 5540 |
| }, |
| { |
| "ce_loss_10": 3.7008472084999084, |
| "ce_loss_13": 3.6404882073402405, |
| "ce_loss_2": 4.157876873016358, |
| "ce_loss_3": 3.9944301009178163, |
| "ce_loss_7": 3.7464569926261904, |
| "epoch": 0.555, |
| "grad_norm": 352.0, |
| "kl_loss_10": 98.0084358215332, |
| "kl_loss_2": 1092.2807312011719, |
| "kl_loss_3": 758.9974426269531, |
| "kl_loss_7": 191.41172409057617, |
| "learning_rate": 0.000420999302013325, |
| "loss": 539.2247, |
| "step": 5550 |
| }, |
| { |
| "ce_loss_10": 3.5973586678504943, |
| "ce_loss_13": 3.534582734107971, |
| "ce_loss_2": 4.09981359243393, |
| "ce_loss_3": 3.9165178179740905, |
| "ce_loss_7": 3.6474678754806518, |
| "epoch": 0.556, |
| "grad_norm": 454.0, |
| "kl_loss_10": 99.95339088439941, |
| "kl_loss_2": 1148.3679443359374, |
| "kl_loss_3": 795.4782531738281, |
| "kl_loss_7": 199.34042739868164, |
| "learning_rate": 0.000419432969704568, |
| "loss": 547.6515, |
| "step": 5560 |
| }, |
| { |
| "ce_loss_10": 3.6402106523513793, |
| "ce_loss_13": 3.580482280254364, |
| "ce_loss_2": 4.112204611301422, |
| "ce_loss_3": 3.9463653802871703, |
| "ce_loss_7": 3.6864510416984557, |
| "epoch": 0.557, |
| "grad_norm": 374.0, |
| "kl_loss_10": 97.21049270629882, |
| "kl_loss_2": 1103.2306396484375, |
| "kl_loss_3": 765.6696472167969, |
| "kl_loss_7": 192.21127700805664, |
| "learning_rate": 0.00041786744870491154, |
| "loss": 552.003, |
| "step": 5570 |
| }, |
| { |
| "ce_loss_10": 3.5763687014579775, |
| "ce_loss_13": 3.513793337345123, |
| "ce_loss_2": 4.059341847896576, |
| "ce_loss_3": 3.8873541951179504, |
| "ce_loss_7": 3.6242376923561097, |
| "epoch": 0.558, |
| "grad_norm": 496.0, |
| "kl_loss_10": 99.6470874786377, |
| "kl_loss_2": 1146.4394836425781, |
| "kl_loss_3": 799.3714019775391, |
| "kl_loss_7": 198.99811019897462, |
| "learning_rate": 0.0004163027547791347, |
| "loss": 555.3918, |
| "step": 5580 |
| }, |
| { |
| "ce_loss_10": 3.550457501411438, |
| "ce_loss_13": 3.490234684944153, |
| "ce_loss_2": 4.058210396766663, |
| "ce_loss_3": 3.8777605056762696, |
| "ce_loss_7": 3.5981253504753115, |
| "epoch": 0.559, |
| "grad_norm": 362.0, |
| "kl_loss_10": 96.0154800415039, |
| "kl_loss_2": 1166.6077453613282, |
| "kl_loss_3": 807.5666870117187, |
| "kl_loss_7": 196.15278396606445, |
| "learning_rate": 0.0004147389036836881, |
| "loss": 556.2604, |
| "step": 5590 |
| }, |
| { |
| "ce_loss_10": 3.606854057312012, |
| "ce_loss_13": 3.545392167568207, |
| "ce_loss_2": 4.097903311252594, |
| "ce_loss_3": 3.924593436717987, |
| "ce_loss_7": 3.652910280227661, |
| "epoch": 0.56, |
| "grad_norm": 580.0, |
| "kl_loss_10": 99.4388584136963, |
| "kl_loss_2": 1150.4553649902343, |
| "kl_loss_3": 802.4499359130859, |
| "kl_loss_7": 196.6334327697754, |
| "learning_rate": 0.00041317591116653486, |
| "loss": 563.6437, |
| "step": 5600 |
| }, |
| { |
| "ce_loss_10": 3.6449447154998778, |
| "ce_loss_13": 3.5830300569534304, |
| "ce_loss_2": 4.1296777606010435, |
| "ce_loss_3": 3.9580175995826723, |
| "ce_loss_7": 3.695024287700653, |
| "epoch": 0.561, |
| "grad_norm": 528.0, |
| "kl_loss_10": 100.15715980529785, |
| "kl_loss_2": 1137.3770324707032, |
| "kl_loss_3": 786.0429443359375, |
| "kl_loss_7": 199.2029815673828, |
| "learning_rate": 0.0004116137929669921, |
| "loss": 545.8336, |
| "step": 5610 |
| }, |
| { |
| "ce_loss_10": 3.6345237135887145, |
| "ce_loss_13": 3.575796604156494, |
| "ce_loss_2": 4.1131403088569645, |
| "ce_loss_3": 3.940467345714569, |
| "ce_loss_7": 3.6807628154754637, |
| "epoch": 0.562, |
| "grad_norm": 388.0, |
| "kl_loss_10": 95.75808372497559, |
| "kl_loss_2": 1128.9722564697265, |
| "kl_loss_3": 784.2019927978515, |
| "kl_loss_7": 193.04863052368165, |
| "learning_rate": 0.00041005256481557305, |
| "loss": 543.754, |
| "step": 5620 |
| }, |
| { |
| "ce_loss_10": 3.7401763558387757, |
| "ce_loss_13": 3.6805962681770326, |
| "ce_loss_2": 4.185898721218109, |
| "ce_loss_3": 4.027551281452179, |
| "ce_loss_7": 3.783228611946106, |
| "epoch": 0.563, |
| "grad_norm": 516.0, |
| "kl_loss_10": 96.21339073181153, |
| "kl_loss_2": 1061.5840301513672, |
| "kl_loss_3": 738.3266693115235, |
| "kl_loss_7": 187.24717712402344, |
| "learning_rate": 0.00040849224243382767, |
| "loss": 533.9922, |
| "step": 5630 |
| }, |
| { |
| "ce_loss_10": 3.5920221328735353, |
| "ce_loss_13": 3.5324007272720337, |
| "ce_loss_2": 4.072316908836365, |
| "ce_loss_3": 3.8983843684196473, |
| "ce_loss_7": 3.6374821186065676, |
| "epoch": 0.564, |
| "grad_norm": 338.0, |
| "kl_loss_10": 95.43405532836914, |
| "kl_loss_2": 1128.149676513672, |
| "kl_loss_3": 783.0245666503906, |
| "kl_loss_7": 193.40655746459962, |
| "learning_rate": 0.000406932841534185, |
| "loss": 541.5961, |
| "step": 5640 |
| }, |
| { |
| "ce_loss_10": 3.5484704256057737, |
| "ce_loss_13": 3.486864137649536, |
| "ce_loss_2": 4.036842632293701, |
| "ce_loss_3": 3.8651990056037904, |
| "ce_loss_7": 3.597266983985901, |
| "epoch": 0.565, |
| "grad_norm": 604.0, |
| "kl_loss_10": 95.5288932800293, |
| "kl_loss_2": 1141.9300598144532, |
| "kl_loss_3": 797.4877136230468, |
| "kl_loss_7": 194.9025909423828, |
| "learning_rate": 0.0004053743778197951, |
| "loss": 559.9006, |
| "step": 5650 |
| }, |
| { |
| "ce_loss_10": 3.6602503299713134, |
| "ce_loss_13": 3.596804344654083, |
| "ce_loss_2": 4.136269843578338, |
| "ce_loss_3": 3.967122423648834, |
| "ce_loss_7": 3.7048738479614256, |
| "epoch": 0.566, |
| "grad_norm": 418.0, |
| "kl_loss_10": 101.36623306274414, |
| "kl_loss_2": 1114.9331634521484, |
| "kl_loss_3": 774.6735748291015, |
| "kl_loss_7": 196.29364929199218, |
| "learning_rate": 0.0004038168669843697, |
| "loss": 553.1191, |
| "step": 5660 |
| }, |
| { |
| "ce_loss_10": 3.6255574107170103, |
| "ce_loss_13": 3.5639535069465635, |
| "ce_loss_2": 4.085369718074799, |
| "ce_loss_3": 3.919695568084717, |
| "ce_loss_7": 3.6704380750656127, |
| "epoch": 0.567, |
| "grad_norm": 736.0, |
| "kl_loss_10": 98.19256973266602, |
| "kl_loss_2": 1100.560809326172, |
| "kl_loss_3": 765.2613342285156, |
| "kl_loss_7": 192.50554656982422, |
| "learning_rate": 0.000402260324712026, |
| "loss": 547.8535, |
| "step": 5670 |
| }, |
| { |
| "ce_loss_10": 3.669494354724884, |
| "ce_loss_13": 3.60741925239563, |
| "ce_loss_2": 4.148016309738159, |
| "ce_loss_3": 3.9792763590812683, |
| "ce_loss_7": 3.7149499893188476, |
| "epoch": 0.568, |
| "grad_norm": 498.0, |
| "kl_loss_10": 99.65063438415527, |
| "kl_loss_2": 1126.1991058349608, |
| "kl_loss_3": 783.0127746582032, |
| "kl_loss_7": 194.12312698364258, |
| "learning_rate": 0.00040070476667712743, |
| "loss": 543.5942, |
| "step": 5680 |
| }, |
| { |
| "ce_loss_10": 3.7005011796951295, |
| "ce_loss_13": 3.6357283353805543, |
| "ce_loss_2": 4.166275656223297, |
| "ce_loss_3": 4.000016844272613, |
| "ce_loss_7": 3.745167064666748, |
| "epoch": 0.569, |
| "grad_norm": 356.0, |
| "kl_loss_10": 100.85004692077636, |
| "kl_loss_2": 1110.8271209716797, |
| "kl_loss_3": 770.1773681640625, |
| "kl_loss_7": 194.5637939453125, |
| "learning_rate": 0.0003991502085441259, |
| "loss": 548.6594, |
| "step": 5690 |
| }, |
| { |
| "ce_loss_10": 3.729709804058075, |
| "ce_loss_13": 3.6688971519470215, |
| "ce_loss_2": 4.18160834312439, |
| "ce_loss_3": 4.015434455871582, |
| "ce_loss_7": 3.7735623002052305, |
| "epoch": 0.57, |
| "grad_norm": 374.0, |
| "kl_loss_10": 98.11349868774414, |
| "kl_loss_2": 1070.740576171875, |
| "kl_loss_3": 744.3529327392578, |
| "kl_loss_7": 190.02555770874022, |
| "learning_rate": 0.0003975966659674047, |
| "loss": 541.8046, |
| "step": 5700 |
| }, |
| { |
| "ce_loss_10": 3.691783332824707, |
| "ce_loss_13": 3.6318029403686523, |
| "ce_loss_2": 4.161513650417328, |
| "ce_loss_3": 3.986249303817749, |
| "ce_loss_7": 3.73646023273468, |
| "epoch": 0.571, |
| "grad_norm": 536.0, |
| "kl_loss_10": 98.58754501342773, |
| "kl_loss_2": 1102.3078491210938, |
| "kl_loss_3": 759.7488586425782, |
| "kl_loss_7": 191.9359992980957, |
| "learning_rate": 0.0003960441545911204, |
| "loss": 538.4236, |
| "step": 5710 |
| }, |
| { |
| "ce_loss_10": 3.6897791981697083, |
| "ce_loss_13": 3.6274471282958984, |
| "ce_loss_2": 4.157203590869903, |
| "ce_loss_3": 3.9884847044944762, |
| "ce_loss_7": 3.736597418785095, |
| "epoch": 0.572, |
| "grad_norm": 600.0, |
| "kl_loss_10": 97.47168769836426, |
| "kl_loss_2": 1115.5811676025392, |
| "kl_loss_3": 773.7587646484375, |
| "kl_loss_7": 193.96655197143554, |
| "learning_rate": 0.0003944926900490452, |
| "loss": 541.7897, |
| "step": 5720 |
| }, |
| { |
| "ce_loss_10": 3.6022287607192993, |
| "ce_loss_13": 3.541483187675476, |
| "ce_loss_2": 4.094926071166992, |
| "ce_loss_3": 3.9194396138191223, |
| "ce_loss_7": 3.65008407831192, |
| "epoch": 0.573, |
| "grad_norm": 352.0, |
| "kl_loss_10": 96.51857452392578, |
| "kl_loss_2": 1147.3706939697265, |
| "kl_loss_3": 794.91083984375, |
| "kl_loss_7": 194.98720092773436, |
| "learning_rate": 0.0003929422879644099, |
| "loss": 544.8611, |
| "step": 5730 |
| }, |
| { |
| "ce_loss_10": 3.6093438267707825, |
| "ce_loss_13": 3.5497053503990172, |
| "ce_loss_2": 4.068271553516388, |
| "ce_loss_3": 3.9011916518211365, |
| "ce_loss_7": 3.6547187089920046, |
| "epoch": 0.574, |
| "grad_norm": 426.0, |
| "kl_loss_10": 95.6807746887207, |
| "kl_loss_2": 1107.6688201904296, |
| "kl_loss_3": 763.7292449951171, |
| "kl_loss_7": 189.65744247436524, |
| "learning_rate": 0.0003913929639497462, |
| "loss": 535.444, |
| "step": 5740 |
| }, |
| { |
| "ce_loss_10": 3.5539973855018614, |
| "ce_loss_13": 3.4933292627334596, |
| "ce_loss_2": 4.044394338130951, |
| "ce_loss_3": 3.8677351474761963, |
| "ce_loss_7": 3.6000022888183594, |
| "epoch": 0.575, |
| "grad_norm": 408.0, |
| "kl_loss_10": 95.82653579711913, |
| "kl_loss_2": 1130.1885803222656, |
| "kl_loss_3": 778.0026184082031, |
| "kl_loss_7": 190.79474563598632, |
| "learning_rate": 0.00038984473360672965, |
| "loss": 541.1631, |
| "step": 5750 |
| }, |
| { |
| "ce_loss_10": 3.5721747159957884, |
| "ce_loss_13": 3.5100734710693358, |
| "ce_loss_2": 4.053931272029876, |
| "ce_loss_3": 3.883261811733246, |
| "ce_loss_7": 3.6166505217552185, |
| "epoch": 0.576, |
| "grad_norm": 436.0, |
| "kl_loss_10": 95.3091812133789, |
| "kl_loss_2": 1128.7456329345703, |
| "kl_loss_3": 780.4191925048829, |
| "kl_loss_7": 190.4754554748535, |
| "learning_rate": 0.0003882976125260229, |
| "loss": 539.7566, |
| "step": 5760 |
| }, |
| { |
| "ce_loss_10": 3.638679492473602, |
| "ce_loss_13": 3.5770092844963073, |
| "ce_loss_2": 4.1140677571296695, |
| "ce_loss_3": 3.9416022896766663, |
| "ce_loss_7": 3.6866235971450805, |
| "epoch": 0.577, |
| "grad_norm": 366.0, |
| "kl_loss_10": 98.93351516723632, |
| "kl_loss_2": 1112.5931701660156, |
| "kl_loss_3": 770.6242248535157, |
| "kl_loss_7": 191.9038848876953, |
| "learning_rate": 0.00038675161628711776, |
| "loss": 545.2976, |
| "step": 5770 |
| }, |
| { |
| "ce_loss_10": 3.678569030761719, |
| "ce_loss_13": 3.616915798187256, |
| "ce_loss_2": 4.1388965249061584, |
| "ce_loss_3": 3.9749330997467043, |
| "ce_loss_7": 3.722931241989136, |
| "epoch": 0.578, |
| "grad_norm": 404.0, |
| "kl_loss_10": 97.5284637451172, |
| "kl_loss_2": 1093.5021606445312, |
| "kl_loss_3": 761.3094451904296, |
| "kl_loss_7": 191.26370391845703, |
| "learning_rate": 0.0003852067604581794, |
| "loss": 553.459, |
| "step": 5780 |
| }, |
| { |
| "ce_loss_10": 3.6174680829048156, |
| "ce_loss_13": 3.5550846695899962, |
| "ce_loss_2": 4.100849425792694, |
| "ce_loss_3": 3.927929162979126, |
| "ce_loss_7": 3.665549111366272, |
| "epoch": 0.579, |
| "grad_norm": 502.0, |
| "kl_loss_10": 97.5420696258545, |
| "kl_loss_2": 1125.1912048339843, |
| "kl_loss_3": 782.9702056884765, |
| "kl_loss_7": 193.16246643066407, |
| "learning_rate": 0.0003836630605958888, |
| "loss": 543.639, |
| "step": 5790 |
| }, |
| { |
| "ce_loss_10": 3.6780447602272033, |
| "ce_loss_13": 3.616100025177002, |
| "ce_loss_2": 4.136243522167206, |
| "ce_loss_3": 3.9725910425186157, |
| "ce_loss_7": 3.7234076499938964, |
| "epoch": 0.58, |
| "grad_norm": 506.0, |
| "kl_loss_10": 99.15894927978516, |
| "kl_loss_2": 1117.2952941894532, |
| "kl_loss_3": 777.6545166015625, |
| "kl_loss_7": 194.16991271972657, |
| "learning_rate": 0.0003821205322452863, |
| "loss": 560.4495, |
| "step": 5800 |
| }, |
| { |
| "ce_loss_10": 3.657036304473877, |
| "ce_loss_13": 3.5961548686027527, |
| "ce_loss_2": 4.118453872203827, |
| "ce_loss_3": 3.948525774478912, |
| "ce_loss_7": 3.7012171149253845, |
| "epoch": 0.581, |
| "grad_norm": 438.0, |
| "kl_loss_10": 98.11412734985352, |
| "kl_loss_2": 1098.6213439941407, |
| "kl_loss_3": 759.3198364257812, |
| "kl_loss_7": 189.98369064331055, |
| "learning_rate": 0.0003805791909396155, |
| "loss": 541.5742, |
| "step": 5810 |
| }, |
| { |
| "ce_loss_10": 3.6096495151519776, |
| "ce_loss_13": 3.550210452079773, |
| "ce_loss_2": 4.077665090560913, |
| "ce_loss_3": 3.9094552993774414, |
| "ce_loss_7": 3.654946839809418, |
| "epoch": 0.582, |
| "grad_norm": 428.0, |
| "kl_loss_10": 95.98116798400879, |
| "kl_loss_2": 1109.6123931884765, |
| "kl_loss_3": 763.3366668701171, |
| "kl_loss_7": 189.48765182495117, |
| "learning_rate": 0.0003790390522001662, |
| "loss": 547.1139, |
| "step": 5820 |
| }, |
| { |
| "ce_loss_10": 3.538465416431427, |
| "ce_loss_13": 3.4795125126838684, |
| "ce_loss_2": 4.019526553153992, |
| "ce_loss_3": 3.8418781757354736, |
| "ce_loss_7": 3.5831465244293215, |
| "epoch": 0.583, |
| "grad_norm": 354.0, |
| "kl_loss_10": 94.34587249755859, |
| "kl_loss_2": 1136.918035888672, |
| "kl_loss_3": 784.7109252929688, |
| "kl_loss_7": 191.27632827758788, |
| "learning_rate": 0.0003775001315361183, |
| "loss": 542.445, |
| "step": 5830 |
| }, |
| { |
| "ce_loss_10": 3.659132921695709, |
| "ce_loss_13": 3.596101534366608, |
| "ce_loss_2": 4.132727253437042, |
| "ce_loss_3": 3.958163845539093, |
| "ce_loss_7": 3.704639720916748, |
| "epoch": 0.584, |
| "grad_norm": 298.0, |
| "kl_loss_10": 98.75731201171875, |
| "kl_loss_2": 1122.0884033203124, |
| "kl_loss_3": 776.4772644042969, |
| "kl_loss_7": 193.22739944458007, |
| "learning_rate": 0.0003759624444443858, |
| "loss": 544.9992, |
| "step": 5840 |
| }, |
| { |
| "ce_loss_10": 3.6889251112937926, |
| "ce_loss_13": 3.6282206773757935, |
| "ce_loss_2": 4.151758980751038, |
| "ce_loss_3": 3.9822983741760254, |
| "ce_loss_7": 3.732993245124817, |
| "epoch": 0.585, |
| "grad_norm": 346.0, |
| "kl_loss_10": 99.06045837402344, |
| "kl_loss_2": 1097.8614471435546, |
| "kl_loss_3": 758.9134582519531, |
| "kl_loss_7": 191.27917098999023, |
| "learning_rate": 0.00037442600640946044, |
| "loss": 536.17, |
| "step": 5850 |
| }, |
| { |
| "ce_loss_10": 3.6461440443992617, |
| "ce_loss_13": 3.5892478227615356, |
| "ce_loss_2": 4.105236732959748, |
| "ce_loss_3": 3.9375507473945617, |
| "ce_loss_7": 3.692450475692749, |
| "epoch": 0.586, |
| "grad_norm": 408.0, |
| "kl_loss_10": 94.86803092956544, |
| "kl_loss_2": 1099.2377655029297, |
| "kl_loss_3": 758.3301605224609, |
| "kl_loss_7": 189.78098831176757, |
| "learning_rate": 0.00037289083290325663, |
| "loss": 531.0057, |
| "step": 5860 |
| }, |
| { |
| "ce_loss_10": 3.63515100479126, |
| "ce_loss_13": 3.574202799797058, |
| "ce_loss_2": 4.095511162281037, |
| "ce_loss_3": 3.930715727806091, |
| "ce_loss_7": 3.6794507265090943, |
| "epoch": 0.587, |
| "grad_norm": 540.0, |
| "kl_loss_10": 97.98805313110351, |
| "kl_loss_2": 1091.7025299072266, |
| "kl_loss_3": 757.6223114013671, |
| "kl_loss_7": 191.85128860473634, |
| "learning_rate": 0.0003713569393849543, |
| "loss": 533.4333, |
| "step": 5870 |
| }, |
| { |
| "ce_loss_10": 3.6827593207359315, |
| "ce_loss_13": 3.6205956816673277, |
| "ce_loss_2": 4.148468089103699, |
| "ce_loss_3": 3.978341579437256, |
| "ce_loss_7": 3.7273068189620973, |
| "epoch": 0.588, |
| "grad_norm": 398.0, |
| "kl_loss_10": 98.60938911437988, |
| "kl_loss_2": 1107.6281311035157, |
| "kl_loss_3": 765.0102233886719, |
| "kl_loss_7": 192.96542663574218, |
| "learning_rate": 0.00036982434130084397, |
| "loss": 541.5767, |
| "step": 5880 |
| }, |
| { |
| "ce_loss_10": 3.589915359020233, |
| "ce_loss_13": 3.5286367654800417, |
| "ce_loss_2": 4.061057722568512, |
| "ce_loss_3": 3.8881011605262756, |
| "ce_loss_7": 3.6373565912246706, |
| "epoch": 0.589, |
| "grad_norm": 506.0, |
| "kl_loss_10": 97.51137619018554, |
| "kl_loss_2": 1115.5977966308594, |
| "kl_loss_3": 775.6395446777344, |
| "kl_loss_7": 195.47111892700195, |
| "learning_rate": 0.00036829305408417166, |
| "loss": 546.8446, |
| "step": 5890 |
| }, |
| { |
| "ce_loss_10": 3.5797411799430847, |
| "ce_loss_13": 3.5188158631324766, |
| "ce_loss_2": 4.067822527885437, |
| "ce_loss_3": 3.893584966659546, |
| "ce_loss_7": 3.6291656494140625, |
| "epoch": 0.59, |
| "grad_norm": 364.0, |
| "kl_loss_10": 96.57020835876465, |
| "kl_loss_2": 1141.290579223633, |
| "kl_loss_3": 789.6200988769531, |
| "kl_loss_7": 196.76195220947267, |
| "learning_rate": 0.0003667630931549826, |
| "loss": 548.8211, |
| "step": 5900 |
| }, |
| { |
| "ce_loss_10": 3.547331213951111, |
| "ce_loss_13": 3.4874081373214723, |
| "ce_loss_2": 4.03765162229538, |
| "ce_loss_3": 3.8655640482902527, |
| "ce_loss_7": 3.5946906566619874, |
| "epoch": 0.591, |
| "grad_norm": 454.0, |
| "kl_loss_10": 95.69526252746581, |
| "kl_loss_2": 1154.8450256347655, |
| "kl_loss_3": 798.5165588378907, |
| "kl_loss_7": 194.5025749206543, |
| "learning_rate": 0.00036523447391996613, |
| "loss": 552.8163, |
| "step": 5910 |
| }, |
| { |
| "ce_loss_10": 3.6425758361816407, |
| "ce_loss_13": 3.5853498816490172, |
| "ce_loss_2": 4.10631023645401, |
| "ce_loss_3": 3.9402198076248167, |
| "ce_loss_7": 3.690027916431427, |
| "epoch": 0.592, |
| "grad_norm": 432.0, |
| "kl_loss_10": 94.87303581237794, |
| "kl_loss_2": 1090.1558319091796, |
| "kl_loss_3": 756.7847717285156, |
| "kl_loss_7": 189.84710311889648, |
| "learning_rate": 0.00036370721177230114, |
| "loss": 533.6673, |
| "step": 5920 |
| }, |
| { |
| "ce_loss_10": 3.635672652721405, |
| "ce_loss_13": 3.577661764621735, |
| "ce_loss_2": 4.114610862731934, |
| "ce_loss_3": 3.9419226169586183, |
| "ce_loss_7": 3.681511878967285, |
| "epoch": 0.593, |
| "grad_norm": 326.0, |
| "kl_loss_10": 95.39519729614258, |
| "kl_loss_2": 1127.0120056152343, |
| "kl_loss_3": 780.4901336669922, |
| "kl_loss_7": 194.04692993164062, |
| "learning_rate": 0.00036218132209150044, |
| "loss": 545.1962, |
| "step": 5930 |
| }, |
| { |
| "ce_loss_10": 3.593142592906952, |
| "ce_loss_13": 3.530347979068756, |
| "ce_loss_2": 4.095171976089477, |
| "ce_loss_3": 3.920231354236603, |
| "ce_loss_7": 3.645453596115112, |
| "epoch": 0.594, |
| "grad_norm": 378.0, |
| "kl_loss_10": 99.63440895080566, |
| "kl_loss_2": 1173.4297882080077, |
| "kl_loss_3": 813.8213714599609, |
| "kl_loss_7": 199.65494766235352, |
| "learning_rate": 0.0003606568202432562, |
| "loss": 557.0208, |
| "step": 5940 |
| }, |
| { |
| "ce_loss_10": 3.665185475349426, |
| "ce_loss_13": 3.6032612800598143, |
| "ce_loss_2": 4.14498724937439, |
| "ce_loss_3": 3.9701961159706114, |
| "ce_loss_7": 3.7108847856521607, |
| "epoch": 0.595, |
| "grad_norm": 528.0, |
| "kl_loss_10": 99.43977394104004, |
| "kl_loss_2": 1140.6280212402344, |
| "kl_loss_3": 787.1899200439453, |
| "kl_loss_7": 195.35167922973633, |
| "learning_rate": 0.0003591337215792851, |
| "loss": 544.2271, |
| "step": 5950 |
| }, |
| { |
| "ce_loss_10": 3.706349265575409, |
| "ce_loss_13": 3.64465993642807, |
| "ce_loss_2": 4.152172029018402, |
| "ce_loss_3": 3.9943688988685606, |
| "ce_loss_7": 3.7489245533943176, |
| "epoch": 0.596, |
| "grad_norm": 356.0, |
| "kl_loss_10": 99.39506378173829, |
| "kl_loss_2": 1087.233724975586, |
| "kl_loss_3": 759.1374755859375, |
| "kl_loss_7": 190.80716857910156, |
| "learning_rate": 0.00035761204143717383, |
| "loss": 544.3471, |
| "step": 5960 |
| }, |
| { |
| "ce_loss_10": 3.6578794836997988, |
| "ce_loss_13": 3.5957969784736634, |
| "ce_loss_2": 4.119996964931488, |
| "ce_loss_3": 3.9552765846252442, |
| "ce_loss_7": 3.7025834202766417, |
| "epoch": 0.597, |
| "grad_norm": 400.0, |
| "kl_loss_10": 99.01246032714843, |
| "kl_loss_2": 1115.1319488525392, |
| "kl_loss_3": 774.3078552246094, |
| "kl_loss_7": 193.01641845703125, |
| "learning_rate": 0.0003560917951402245, |
| "loss": 556.3752, |
| "step": 5970 |
| }, |
| { |
| "ce_loss_10": 3.632036602497101, |
| "ce_loss_13": 3.5740628480911254, |
| "ce_loss_2": 4.0921210765838625, |
| "ce_loss_3": 3.9307610511779787, |
| "ce_loss_7": 3.6746655702590942, |
| "epoch": 0.598, |
| "grad_norm": 412.0, |
| "kl_loss_10": 95.97110137939453, |
| "kl_loss_2": 1101.7569305419922, |
| "kl_loss_3": 768.7692047119141, |
| "kl_loss_7": 189.95830230712892, |
| "learning_rate": 0.00035457299799730046, |
| "loss": 538.1885, |
| "step": 5980 |
| }, |
| { |
| "ce_loss_10": 3.69617702960968, |
| "ce_loss_13": 3.6354240775108337, |
| "ce_loss_2": 4.163921213150024, |
| "ce_loss_3": 3.993851900100708, |
| "ce_loss_7": 3.7415480971336366, |
| "epoch": 0.599, |
| "grad_norm": 388.0, |
| "kl_loss_10": 96.27426452636719, |
| "kl_loss_2": 1105.9306549072267, |
| "kl_loss_3": 762.228305053711, |
| "kl_loss_7": 190.51752395629882, |
| "learning_rate": 0.0003530556653026721, |
| "loss": 545.8183, |
| "step": 5990 |
| }, |
| { |
| "ce_loss_10": 3.611501228809357, |
| "ce_loss_13": 3.5530946016311646, |
| "ce_loss_2": 4.07593857049942, |
| "ce_loss_3": 3.9016834497451782, |
| "ce_loss_7": 3.6570339798927307, |
| "epoch": 0.6, |
| "grad_norm": 1424.0, |
| "kl_loss_10": 94.48569107055664, |
| "kl_loss_2": 1108.4388488769532, |
| "kl_loss_3": 760.983023071289, |
| "kl_loss_7": 188.30435333251953, |
| "learning_rate": 0.00035153981233586274, |
| "loss": 543.2547, |
| "step": 6000 |
| }, |
| { |
| "ce_loss_10": 3.589734137058258, |
| "ce_loss_13": 3.5291273951530457, |
| "ce_loss_2": 4.066950809955597, |
| "ce_loss_3": 3.8936201214790342, |
| "ce_loss_7": 3.6356727838516236, |
| "epoch": 0.601, |
| "grad_norm": 478.0, |
| "kl_loss_10": 95.43113746643067, |
| "kl_loss_2": 1117.119808959961, |
| "kl_loss_3": 769.7344940185546, |
| "kl_loss_7": 188.8736831665039, |
| "learning_rate": 0.00035002545436149473, |
| "loss": 555.4068, |
| "step": 6010 |
| }, |
| { |
| "ce_loss_10": 3.603361654281616, |
| "ce_loss_13": 3.5395719528198244, |
| "ce_loss_2": 4.084376287460327, |
| "ce_loss_3": 3.913386416435242, |
| "ce_loss_7": 3.6495144724845887, |
| "epoch": 0.602, |
| "grad_norm": 414.0, |
| "kl_loss_10": 99.58069725036621, |
| "kl_loss_2": 1138.4922149658203, |
| "kl_loss_3": 791.1285461425781, |
| "kl_loss_7": 196.0400062561035, |
| "learning_rate": 0.0003485126066291364, |
| "loss": 543.3661, |
| "step": 6020 |
| }, |
| { |
| "ce_loss_10": 3.6472663640975953, |
| "ce_loss_13": 3.586405074596405, |
| "ce_loss_2": 4.12690646648407, |
| "ce_loss_3": 3.9540088891983034, |
| "ce_loss_7": 3.6910028219223023, |
| "epoch": 0.603, |
| "grad_norm": 426.0, |
| "kl_loss_10": 97.50395317077637, |
| "kl_loss_2": 1120.6384643554688, |
| "kl_loss_3": 773.8977966308594, |
| "kl_loss_7": 189.96464309692382, |
| "learning_rate": 0.0003470012843731476, |
| "loss": 547.4742, |
| "step": 6030 |
| }, |
| { |
| "ce_loss_10": 3.587485361099243, |
| "ce_loss_13": 3.527864229679108, |
| "ce_loss_2": 4.065750408172607, |
| "ce_loss_3": 3.8930314064025877, |
| "ce_loss_7": 3.6307687997817992, |
| "epoch": 0.604, |
| "grad_norm": 450.0, |
| "kl_loss_10": 95.93178520202636, |
| "kl_loss_2": 1125.8798370361328, |
| "kl_loss_3": 778.0897277832031, |
| "kl_loss_7": 190.32968826293944, |
| "learning_rate": 0.00034549150281252633, |
| "loss": 553.9461, |
| "step": 6040 |
| }, |
| { |
| "ce_loss_10": 3.567354416847229, |
| "ce_loss_13": 3.5087788224220278, |
| "ce_loss_2": 4.041226005554199, |
| "ce_loss_3": 3.868573796749115, |
| "ce_loss_7": 3.613230037689209, |
| "epoch": 0.605, |
| "grad_norm": 376.0, |
| "kl_loss_10": 96.31193771362305, |
| "kl_loss_2": 1101.1357208251952, |
| "kl_loss_3": 760.5451019287109, |
| "kl_loss_7": 190.99923782348634, |
| "learning_rate": 0.0003439832771507565, |
| "loss": 537.7418, |
| "step": 6050 |
| }, |
| { |
| "ce_loss_10": 3.569633936882019, |
| "ce_loss_13": 3.5091484904289247, |
| "ce_loss_2": 4.052746975421906, |
| "ce_loss_3": 3.8793442845344543, |
| "ce_loss_7": 3.6145769238471983, |
| "epoch": 0.606, |
| "grad_norm": 364.0, |
| "kl_loss_10": 96.17846641540527, |
| "kl_loss_2": 1126.9381469726563, |
| "kl_loss_3": 780.4287139892579, |
| "kl_loss_7": 191.24787139892578, |
| "learning_rate": 0.0003424766225756537, |
| "loss": 539.2611, |
| "step": 6060 |
| }, |
| { |
| "ce_loss_10": 3.6349270820617674, |
| "ce_loss_13": 3.5724891662597655, |
| "ce_loss_2": 4.110528755187988, |
| "ce_loss_3": 3.9370043516159057, |
| "ce_loss_7": 3.679009509086609, |
| "epoch": 0.607, |
| "grad_norm": 380.0, |
| "kl_loss_10": 98.61342163085938, |
| "kl_loss_2": 1107.0002716064453, |
| "kl_loss_3": 763.0299987792969, |
| "kl_loss_7": 192.68891830444335, |
| "learning_rate": 0.00034097155425921255, |
| "loss": 535.4806, |
| "step": 6070 |
| }, |
| { |
| "ce_loss_10": 3.5260583400726317, |
| "ce_loss_13": 3.4644631028175352, |
| "ce_loss_2": 4.0014289021492, |
| "ce_loss_3": 3.829664409160614, |
| "ce_loss_7": 3.571485424041748, |
| "epoch": 0.608, |
| "grad_norm": 422.0, |
| "kl_loss_10": 95.72014465332032, |
| "kl_loss_2": 1128.9732635498046, |
| "kl_loss_3": 780.0001983642578, |
| "kl_loss_7": 191.94852294921876, |
| "learning_rate": 0.0003394680873574546, |
| "loss": 542.5872, |
| "step": 6080 |
| }, |
| { |
| "ce_loss_10": 3.638583517074585, |
| "ce_loss_13": 3.5754881620407106, |
| "ce_loss_2": 4.1181090593338014, |
| "ce_loss_3": 3.9476171731948853, |
| "ce_loss_7": 3.6838363647460937, |
| "epoch": 0.609, |
| "grad_norm": 402.0, |
| "kl_loss_10": 99.43503112792969, |
| "kl_loss_2": 1131.3410400390626, |
| "kl_loss_3": 782.6971099853515, |
| "kl_loss_7": 192.93393096923828, |
| "learning_rate": 0.0003379662370102747, |
| "loss": 542.0118, |
| "step": 6090 |
| }, |
| { |
| "ce_loss_10": 3.6437841415405274, |
| "ce_loss_13": 3.5835014939308167, |
| "ce_loss_2": 4.107234466075897, |
| "ce_loss_3": 3.9407611727714538, |
| "ce_loss_7": 3.689082384109497, |
| "epoch": 0.61, |
| "grad_norm": 378.0, |
| "kl_loss_10": 95.95064582824708, |
| "kl_loss_2": 1116.5803283691407, |
| "kl_loss_3": 769.8769500732421, |
| "kl_loss_7": 190.42120208740235, |
| "learning_rate": 0.0003364660183412892, |
| "loss": 543.2468, |
| "step": 6100 |
| }, |
| { |
| "ce_loss_10": 3.6229702949523928, |
| "ce_loss_13": 3.5642863631248476, |
| "ce_loss_2": 4.082474946975708, |
| "ce_loss_3": 3.920805549621582, |
| "ce_loss_7": 3.6692759871482847, |
| "epoch": 0.611, |
| "grad_norm": 438.0, |
| "kl_loss_10": 95.98471641540527, |
| "kl_loss_2": 1107.3975402832032, |
| "kl_loss_3": 770.6610443115235, |
| "kl_loss_7": 191.18293151855468, |
| "learning_rate": 0.0003349674464576834, |
| "loss": 547.1137, |
| "step": 6110 |
| }, |
| { |
| "ce_loss_10": 3.572301459312439, |
| "ce_loss_13": 3.5100274682044983, |
| "ce_loss_2": 4.04880428314209, |
| "ce_loss_3": 3.87799711227417, |
| "ce_loss_7": 3.6172243118286134, |
| "epoch": 0.612, |
| "grad_norm": 400.0, |
| "kl_loss_10": 97.55015258789062, |
| "kl_loss_2": 1121.5612213134766, |
| "kl_loss_3": 776.7356872558594, |
| "kl_loss_7": 191.68118591308593, |
| "learning_rate": 0.00033347053645005966, |
| "loss": 533.933, |
| "step": 6120 |
| }, |
| { |
| "ce_loss_10": 3.6915227651596068, |
| "ce_loss_13": 3.6307403206825257, |
| "ce_loss_2": 4.149306988716125, |
| "ce_loss_3": 3.986075186729431, |
| "ce_loss_7": 3.7352558612823485, |
| "epoch": 0.613, |
| "grad_norm": 456.0, |
| "kl_loss_10": 97.44704780578613, |
| "kl_loss_2": 1082.3290954589843, |
| "kl_loss_3": 751.4226776123047, |
| "kl_loss_7": 188.24736099243165, |
| "learning_rate": 0.00033197530339228485, |
| "loss": 541.4641, |
| "step": 6130 |
| }, |
| { |
| "ce_loss_10": 3.6387569904327393, |
| "ce_loss_13": 3.5774574756622313, |
| "ce_loss_2": 4.1059521555900576, |
| "ce_loss_3": 3.9463557958602906, |
| "ce_loss_7": 3.686212944984436, |
| "epoch": 0.614, |
| "grad_norm": 320.0, |
| "kl_loss_10": 97.79526100158691, |
| "kl_loss_2": 1105.2626007080078, |
| "kl_loss_3": 773.3958312988282, |
| "kl_loss_7": 193.28426208496094, |
| "learning_rate": 0.00033048176234133967, |
| "loss": 539.6668, |
| "step": 6140 |
| }, |
| { |
| "ce_loss_10": 3.6235718965530395, |
| "ce_loss_13": 3.563166308403015, |
| "ce_loss_2": 4.0937678694725035, |
| "ce_loss_3": 3.9205260276794434, |
| "ce_loss_7": 3.6674267172813417, |
| "epoch": 0.615, |
| "grad_norm": 434.0, |
| "kl_loss_10": 96.52788619995117, |
| "kl_loss_2": 1108.4606842041017, |
| "kl_loss_3": 766.9316375732421, |
| "kl_loss_7": 191.76514892578126, |
| "learning_rate": 0.0003289899283371657, |
| "loss": 545.3005, |
| "step": 6150 |
| }, |
| { |
| "ce_loss_10": 3.6545772314071656, |
| "ce_loss_13": 3.5920246958732607, |
| "ce_loss_2": 4.122934722900391, |
| "ce_loss_3": 3.954042661190033, |
| "ce_loss_7": 3.7002484798431396, |
| "epoch": 0.616, |
| "grad_norm": 512.0, |
| "kl_loss_10": 96.86014366149902, |
| "kl_loss_2": 1110.978466796875, |
| "kl_loss_3": 763.7065795898437, |
| "kl_loss_7": 189.29309463500977, |
| "learning_rate": 0.0003274998164025148, |
| "loss": 546.4095, |
| "step": 6160 |
| }, |
| { |
| "ce_loss_10": 3.687037003040314, |
| "ce_loss_13": 3.62383953332901, |
| "ce_loss_2": 4.151339697837829, |
| "ce_loss_3": 3.982528305053711, |
| "ce_loss_7": 3.730109751224518, |
| "epoch": 0.617, |
| "grad_norm": 420.0, |
| "kl_loss_10": 98.5214340209961, |
| "kl_loss_2": 1105.55556640625, |
| "kl_loss_3": 765.5938140869141, |
| "kl_loss_7": 192.1310241699219, |
| "learning_rate": 0.0003260114415427975, |
| "loss": 551.3336, |
| "step": 6170 |
| }, |
| { |
| "ce_loss_10": 3.6019906878471373, |
| "ce_loss_13": 3.543228101730347, |
| "ce_loss_2": 4.074436497688294, |
| "ce_loss_3": 3.910420286655426, |
| "ce_loss_7": 3.650412619113922, |
| "epoch": 0.618, |
| "grad_norm": 326.0, |
| "kl_loss_10": 96.38783836364746, |
| "kl_loss_2": 1118.9971984863282, |
| "kl_loss_3": 780.3896484375, |
| "kl_loss_7": 191.8697937011719, |
| "learning_rate": 0.0003245248187459323, |
| "loss": 553.7879, |
| "step": 6180 |
| }, |
| { |
| "ce_loss_10": 3.5864107251167296, |
| "ce_loss_13": 3.53016597032547, |
| "ce_loss_2": 4.042503225803375, |
| "ce_loss_3": 3.874970889091492, |
| "ce_loss_7": 3.6281535744667055, |
| "epoch": 0.619, |
| "grad_norm": 418.0, |
| "kl_loss_10": 92.61179161071777, |
| "kl_loss_2": 1080.5412902832031, |
| "kl_loss_3": 743.7303924560547, |
| "kl_loss_7": 185.2653793334961, |
| "learning_rate": 0.00032303996298219416, |
| "loss": 531.9591, |
| "step": 6190 |
| }, |
| { |
| "ce_loss_10": 3.6777410745620727, |
| "ce_loss_13": 3.6153058767318726, |
| "ce_loss_2": 4.135652315616608, |
| "ce_loss_3": 3.968917655944824, |
| "ce_loss_7": 3.723153126239777, |
| "epoch": 0.62, |
| "grad_norm": 328.0, |
| "kl_loss_10": 97.44341430664062, |
| "kl_loss_2": 1081.0255004882813, |
| "kl_loss_3": 750.4999420166016, |
| "kl_loss_7": 189.73232498168946, |
| "learning_rate": 0.00032155688920406414, |
| "loss": 532.518, |
| "step": 6200 |
| }, |
| { |
| "ce_loss_10": 3.587769341468811, |
| "ce_loss_13": 3.524891209602356, |
| "ce_loss_2": 4.075685119628906, |
| "ce_loss_3": 3.896172082424164, |
| "ce_loss_7": 3.6351929187774656, |
| "epoch": 0.621, |
| "grad_norm": 376.0, |
| "kl_loss_10": 100.48479537963867, |
| "kl_loss_2": 1141.9940368652344, |
| "kl_loss_3": 786.613900756836, |
| "kl_loss_7": 195.81097640991212, |
| "learning_rate": 0.0003200756123460788, |
| "loss": 557.093, |
| "step": 6210 |
| }, |
| { |
| "ce_loss_10": 3.613728904724121, |
| "ce_loss_13": 3.5514798045158384, |
| "ce_loss_2": 4.097208368778229, |
| "ce_loss_3": 3.922844612598419, |
| "ce_loss_7": 3.6612853050231933, |
| "epoch": 0.622, |
| "grad_norm": 436.0, |
| "kl_loss_10": 98.99568367004395, |
| "kl_loss_2": 1137.439712524414, |
| "kl_loss_3": 786.5532501220703, |
| "kl_loss_7": 195.3180892944336, |
| "learning_rate": 0.00031859614732467957, |
| "loss": 552.2858, |
| "step": 6220 |
| }, |
| { |
| "ce_loss_10": 3.668611526489258, |
| "ce_loss_13": 3.6079549193382263, |
| "ce_loss_2": 4.123561811447144, |
| "ce_loss_3": 3.957008695602417, |
| "ce_loss_7": 3.7130979537963866, |
| "epoch": 0.623, |
| "grad_norm": 436.0, |
| "kl_loss_10": 96.12700805664062, |
| "kl_loss_2": 1085.7240417480468, |
| "kl_loss_3": 750.371206665039, |
| "kl_loss_7": 188.20330352783202, |
| "learning_rate": 0.00031711850903806275, |
| "loss": 532.2347, |
| "step": 6230 |
| }, |
| { |
| "ce_loss_10": 3.5722012281417848, |
| "ce_loss_13": 3.5121172070503235, |
| "ce_loss_2": 4.05529419183731, |
| "ce_loss_3": 3.8803335189819337, |
| "ce_loss_7": 3.6196384906768797, |
| "epoch": 0.624, |
| "grad_norm": 372.0, |
| "kl_loss_10": 98.26438941955567, |
| "kl_loss_2": 1135.1425506591797, |
| "kl_loss_3": 784.2965637207031, |
| "kl_loss_7": 195.50869674682616, |
| "learning_rate": 0.0003156427123660297, |
| "loss": 544.6104, |
| "step": 6240 |
| }, |
| { |
| "ce_loss_10": 3.663820195198059, |
| "ce_loss_13": 3.6021278977394102, |
| "ce_loss_2": 4.12639445066452, |
| "ce_loss_3": 3.9577032327651978, |
| "ce_loss_7": 3.709599566459656, |
| "epoch": 0.625, |
| "grad_norm": 376.0, |
| "kl_loss_10": 96.6868911743164, |
| "kl_loss_2": 1095.9533905029298, |
| "kl_loss_3": 760.994189453125, |
| "kl_loss_7": 189.45380859375, |
| "learning_rate": 0.0003141687721698363, |
| "loss": 542.975, |
| "step": 6250 |
| }, |
| { |
| "ce_loss_10": 3.6301703572273256, |
| "ce_loss_13": 3.5708668351173403, |
| "ce_loss_2": 4.076251423358917, |
| "ce_loss_3": 3.9138960361480715, |
| "ce_loss_7": 3.6724702954292296, |
| "epoch": 0.626, |
| "grad_norm": 424.0, |
| "kl_loss_10": 94.79209213256836, |
| "kl_loss_2": 1062.4459991455078, |
| "kl_loss_3": 731.6935089111328, |
| "kl_loss_7": 183.48223037719725, |
| "learning_rate": 0.00031269670329204396, |
| "loss": 531.0972, |
| "step": 6260 |
| }, |
| { |
| "ce_loss_10": 3.6652311086654663, |
| "ce_loss_13": 3.6031481981277467, |
| "ce_loss_2": 4.122557854652404, |
| "ce_loss_3": 3.9541366934776305, |
| "ce_loss_7": 3.707497763633728, |
| "epoch": 0.627, |
| "grad_norm": 404.0, |
| "kl_loss_10": 97.36745681762696, |
| "kl_loss_2": 1087.3731384277344, |
| "kl_loss_3": 749.8712615966797, |
| "kl_loss_7": 189.97913208007813, |
| "learning_rate": 0.00031122652055637015, |
| "loss": 536.5034, |
| "step": 6270 |
| }, |
| { |
| "ce_loss_10": 3.6263384938240053, |
| "ce_loss_13": 3.5657132387161257, |
| "ce_loss_2": 4.101957285404206, |
| "ce_loss_3": 3.9301799178123473, |
| "ce_loss_7": 3.671717309951782, |
| "epoch": 0.628, |
| "grad_norm": 320.0, |
| "kl_loss_10": 97.96914176940918, |
| "kl_loss_2": 1132.4724700927734, |
| "kl_loss_3": 779.5158935546875, |
| "kl_loss_7": 193.307218170166, |
| "learning_rate": 0.0003097582387675385, |
| "loss": 538.5988, |
| "step": 6280 |
| }, |
| { |
| "ce_loss_10": 3.6690368175506594, |
| "ce_loss_13": 3.608207333087921, |
| "ce_loss_2": 4.131593143939972, |
| "ce_loss_3": 3.967103731632233, |
| "ce_loss_7": 3.714122140407562, |
| "epoch": 0.629, |
| "grad_norm": 380.0, |
| "kl_loss_10": 97.3248161315918, |
| "kl_loss_2": 1100.8168243408204, |
| "kl_loss_3": 758.5913757324219, |
| "kl_loss_7": 190.2446075439453, |
| "learning_rate": 0.00030829187271113034, |
| "loss": 533.383, |
| "step": 6290 |
| }, |
| { |
| "ce_loss_10": 3.6720826983451844, |
| "ce_loss_13": 3.6116329789161683, |
| "ce_loss_2": 4.121181070804596, |
| "ce_loss_3": 3.958890378475189, |
| "ce_loss_7": 3.713034725189209, |
| "epoch": 0.63, |
| "grad_norm": 474.0, |
| "kl_loss_10": 95.86663208007812, |
| "kl_loss_2": 1078.529071044922, |
| "kl_loss_3": 747.6958526611328, |
| "kl_loss_7": 186.88264846801758, |
| "learning_rate": 0.00030682743715343565, |
| "loss": 538.6207, |
| "step": 6300 |
| }, |
| { |
| "ce_loss_10": 3.6168052315711976, |
| "ce_loss_13": 3.5534343481063844, |
| "ce_loss_2": 4.1001279830932615, |
| "ce_loss_3": 3.926764929294586, |
| "ce_loss_7": 3.6654592990875243, |
| "epoch": 0.631, |
| "grad_norm": 352.0, |
| "kl_loss_10": 98.38105430603028, |
| "kl_loss_2": 1116.2974884033204, |
| "kl_loss_3": 769.4165740966797, |
| "kl_loss_7": 194.41071319580078, |
| "learning_rate": 0.0003053649468413043, |
| "loss": 544.2852, |
| "step": 6310 |
| }, |
| { |
| "ce_loss_10": 3.728801727294922, |
| "ce_loss_13": 3.6677038788795473, |
| "ce_loss_2": 4.186562621593476, |
| "ce_loss_3": 4.021135902404785, |
| "ce_loss_7": 3.7726667642593386, |
| "epoch": 0.632, |
| "grad_norm": 548.0, |
| "kl_loss_10": 98.36889610290527, |
| "kl_loss_2": 1106.3314636230468, |
| "kl_loss_3": 764.3384338378906, |
| "kl_loss_7": 193.92676391601563, |
| "learning_rate": 0.00030390441650199725, |
| "loss": 534.6711, |
| "step": 6320 |
| }, |
| { |
| "ce_loss_10": 3.6225173473358154, |
| "ce_loss_13": 3.564038324356079, |
| "ce_loss_2": 4.088936626911163, |
| "ce_loss_3": 3.9200194835662843, |
| "ce_loss_7": 3.6701310753822325, |
| "epoch": 0.633, |
| "grad_norm": 390.0, |
| "kl_loss_10": 93.89363708496094, |
| "kl_loss_2": 1093.413995361328, |
| "kl_loss_3": 755.4691772460938, |
| "kl_loss_7": 188.9584762573242, |
| "learning_rate": 0.00030244586084303903, |
| "loss": 531.6465, |
| "step": 6330 |
| }, |
| { |
| "ce_loss_10": 3.5908933520317077, |
| "ce_loss_13": 3.530228877067566, |
| "ce_loss_2": 4.073009943962097, |
| "ce_loss_3": 3.908068907260895, |
| "ce_loss_7": 3.6380571484565736, |
| "epoch": 0.634, |
| "grad_norm": 362.0, |
| "kl_loss_10": 96.08535652160644, |
| "kl_loss_2": 1137.027798461914, |
| "kl_loss_3": 794.3090057373047, |
| "kl_loss_7": 193.36979446411132, |
| "learning_rate": 0.00030098929455206903, |
| "loss": 541.8852, |
| "step": 6340 |
| }, |
| { |
| "ce_loss_10": 3.5973508238792418, |
| "ce_loss_13": 3.538694751262665, |
| "ce_loss_2": 4.059111332893371, |
| "ce_loss_3": 3.8917571187019346, |
| "ce_loss_7": 3.6398496866226195, |
| "epoch": 0.635, |
| "grad_norm": 396.0, |
| "kl_loss_10": 95.19868698120118, |
| "kl_loss_2": 1117.9919860839843, |
| "kl_loss_3": 769.856167602539, |
| "kl_loss_7": 189.57870178222657, |
| "learning_rate": 0.00029953473229669324, |
| "loss": 545.9079, |
| "step": 6350 |
| }, |
| { |
| "ce_loss_10": 3.6316630482673644, |
| "ce_loss_13": 3.5723133206367494, |
| "ce_loss_2": 4.099796783924103, |
| "ce_loss_3": 3.9292221426963807, |
| "ce_loss_7": 3.6748278617858885, |
| "epoch": 0.636, |
| "grad_norm": 382.0, |
| "kl_loss_10": 94.04772453308105, |
| "kl_loss_2": 1105.0771392822267, |
| "kl_loss_3": 767.0107574462891, |
| "kl_loss_7": 189.39691848754882, |
| "learning_rate": 0.00029808218872433767, |
| "loss": 534.2105, |
| "step": 6360 |
| }, |
| { |
| "ce_loss_10": 3.6887783288955687, |
| "ce_loss_13": 3.6287707686424255, |
| "ce_loss_2": 4.1434108257293705, |
| "ce_loss_3": 3.9780289769172668, |
| "ce_loss_7": 3.7338571667671205, |
| "epoch": 0.637, |
| "grad_norm": 402.0, |
| "kl_loss_10": 97.2003547668457, |
| "kl_loss_2": 1086.371304321289, |
| "kl_loss_3": 753.1467376708985, |
| "kl_loss_7": 190.29918899536133, |
| "learning_rate": 0.0002966316784621, |
| "loss": 530.8481, |
| "step": 6370 |
| }, |
| { |
| "ce_loss_10": 3.5995650410652162, |
| "ce_loss_13": 3.5394855737686157, |
| "ce_loss_2": 4.081933212280274, |
| "ce_loss_3": 3.905743455886841, |
| "ce_loss_7": 3.6461820721626284, |
| "epoch": 0.638, |
| "grad_norm": 392.0, |
| "kl_loss_10": 94.92418899536133, |
| "kl_loss_2": 1131.0511108398437, |
| "kl_loss_3": 782.9240203857422, |
| "kl_loss_7": 192.17471160888672, |
| "learning_rate": 0.0002951832161166024, |
| "loss": 537.9302, |
| "step": 6380 |
| }, |
| { |
| "ce_loss_10": 3.6817028760910033, |
| "ce_loss_13": 3.619114363193512, |
| "ce_loss_2": 4.15013542175293, |
| "ce_loss_3": 3.980035495758057, |
| "ce_loss_7": 3.726088798046112, |
| "epoch": 0.639, |
| "grad_norm": 284.0, |
| "kl_loss_10": 99.42742652893067, |
| "kl_loss_2": 1089.2870971679688, |
| "kl_loss_3": 758.1006713867188, |
| "kl_loss_7": 192.03466110229493, |
| "learning_rate": 0.0002937368162738445, |
| "loss": 530.5328, |
| "step": 6390 |
| }, |
| { |
| "ce_loss_10": 3.6132258057594298, |
| "ce_loss_13": 3.557306098937988, |
| "ce_loss_2": 4.071500968933106, |
| "ce_loss_3": 3.905410099029541, |
| "ce_loss_7": 3.6560685634613037, |
| "epoch": 0.64, |
| "grad_norm": 580.0, |
| "kl_loss_10": 93.17153434753418, |
| "kl_loss_2": 1090.426809692383, |
| "kl_loss_3": 756.628515625, |
| "kl_loss_7": 185.41258697509767, |
| "learning_rate": 0.0002922924934990568, |
| "loss": 537.7791, |
| "step": 6400 |
| }, |
| { |
| "ce_loss_10": 3.553709554672241, |
| "ce_loss_13": 3.495926034450531, |
| "ce_loss_2": 4.037974917888642, |
| "ce_loss_3": 3.862057626247406, |
| "ce_loss_7": 3.5978724122047425, |
| "epoch": 0.641, |
| "grad_norm": 316.0, |
| "kl_loss_10": 94.70829887390137, |
| "kl_loss_2": 1132.230615234375, |
| "kl_loss_3": 780.3255004882812, |
| "kl_loss_7": 189.6028953552246, |
| "learning_rate": 0.0002908502623365536, |
| "loss": 541.2746, |
| "step": 6410 |
| }, |
| { |
| "ce_loss_10": 3.493143379688263, |
| "ce_loss_13": 3.4340757846832277, |
| "ce_loss_2": 3.982888162136078, |
| "ce_loss_3": 3.8087966442108154, |
| "ce_loss_7": 3.541613507270813, |
| "epoch": 0.642, |
| "grad_norm": 448.0, |
| "kl_loss_10": 93.92830047607421, |
| "kl_loss_2": 1141.5694763183594, |
| "kl_loss_3": 791.2887268066406, |
| "kl_loss_7": 189.8411407470703, |
| "learning_rate": 0.0002894101373095867, |
| "loss": 544.0511, |
| "step": 6420 |
| }, |
| { |
| "ce_loss_10": 3.7018409371376038, |
| "ce_loss_13": 3.641219162940979, |
| "ce_loss_2": 4.160841226577759, |
| "ce_loss_3": 3.996344065666199, |
| "ce_loss_7": 3.7449718475341798, |
| "epoch": 0.643, |
| "grad_norm": 444.0, |
| "kl_loss_10": 98.50596771240234, |
| "kl_loss_2": 1096.2253509521483, |
| "kl_loss_3": 759.2389587402344, |
| "kl_loss_7": 191.72063598632812, |
| "learning_rate": 0.00028797213292019926, |
| "loss": 535.7118, |
| "step": 6430 |
| }, |
| { |
| "ce_loss_10": 3.679163944721222, |
| "ce_loss_13": 3.6178041219711305, |
| "ce_loss_2": 4.137241208553315, |
| "ce_loss_3": 3.9736143589019775, |
| "ce_loss_7": 3.7223108887672423, |
| "epoch": 0.644, |
| "grad_norm": 316.0, |
| "kl_loss_10": 96.37056579589844, |
| "kl_loss_2": 1093.3028533935546, |
| "kl_loss_3": 763.8056060791016, |
| "kl_loss_7": 190.55449371337892, |
| "learning_rate": 0.0002865362636490791, |
| "loss": 543.9671, |
| "step": 6440 |
| }, |
| { |
| "ce_loss_10": 3.689470386505127, |
| "ce_loss_13": 3.6325947284698485, |
| "ce_loss_2": 4.151259076595307, |
| "ce_loss_3": 3.9852967262268066, |
| "ce_loss_7": 3.7347108364105224, |
| "epoch": 0.645, |
| "grad_norm": 422.0, |
| "kl_loss_10": 95.76711997985839, |
| "kl_loss_2": 1101.8473754882812, |
| "kl_loss_3": 757.8740173339844, |
| "kl_loss_7": 188.20162200927734, |
| "learning_rate": 0.0002851025439554142, |
| "loss": 532.7338, |
| "step": 6450 |
| }, |
| { |
| "ce_loss_10": 3.6879691004753115, |
| "ce_loss_13": 3.6268020391464235, |
| "ce_loss_2": 4.149470102787018, |
| "ce_loss_3": 3.9827425360679625, |
| "ce_loss_7": 3.732300865650177, |
| "epoch": 0.646, |
| "grad_norm": 432.0, |
| "kl_loss_10": 96.89583930969238, |
| "kl_loss_2": 1086.1058197021484, |
| "kl_loss_3": 754.8961853027344, |
| "kl_loss_7": 190.88655471801758, |
| "learning_rate": 0.00028367098827674573, |
| "loss": 531.1024, |
| "step": 6460 |
| }, |
| { |
| "ce_loss_10": 3.613504183292389, |
| "ce_loss_13": 3.552918183803558, |
| "ce_loss_2": 4.07694593667984, |
| "ce_loss_3": 3.9072110176086428, |
| "ce_loss_7": 3.656181883811951, |
| "epoch": 0.647, |
| "grad_norm": 382.0, |
| "kl_loss_10": 95.70045394897461, |
| "kl_loss_2": 1088.4426727294922, |
| "kl_loss_3": 747.3143646240235, |
| "kl_loss_7": 185.63362350463868, |
| "learning_rate": 0.00028224161102882397, |
| "loss": 534.1186, |
| "step": 6470 |
| }, |
| { |
| "ce_loss_10": 3.591862881183624, |
| "ce_loss_13": 3.5325499296188356, |
| "ce_loss_2": 4.047231125831604, |
| "ce_loss_3": 3.8850304007530214, |
| "ce_loss_7": 3.6327146530151366, |
| "epoch": 0.648, |
| "grad_norm": 398.0, |
| "kl_loss_10": 97.32144050598144, |
| "kl_loss_2": 1084.3862060546876, |
| "kl_loss_3": 756.0506072998047, |
| "kl_loss_7": 188.20642013549804, |
| "learning_rate": 0.00028081442660546124, |
| "loss": 534.4936, |
| "step": 6480 |
| }, |
| { |
| "ce_loss_10": 3.6528772950172423, |
| "ce_loss_13": 3.593310809135437, |
| "ce_loss_2": 4.104138958454132, |
| "ce_loss_3": 3.940169370174408, |
| "ce_loss_7": 3.6972940802574157, |
| "epoch": 0.649, |
| "grad_norm": 442.0, |
| "kl_loss_10": 96.56869812011719, |
| "kl_loss_2": 1082.232455444336, |
| "kl_loss_3": 748.2576446533203, |
| "kl_loss_7": 188.56612319946288, |
| "learning_rate": 0.0002793894493783892, |
| "loss": 535.3609, |
| "step": 6490 |
| }, |
| { |
| "ce_loss_10": 3.671093225479126, |
| "ce_loss_13": 3.6125397443771363, |
| "ce_loss_2": 4.120749580860138, |
| "ce_loss_3": 3.957093584537506, |
| "ce_loss_7": 3.715547430515289, |
| "epoch": 0.65, |
| "grad_norm": 340.0, |
| "kl_loss_10": 95.52767143249511, |
| "kl_loss_2": 1081.513833618164, |
| "kl_loss_3": 750.0977233886719, |
| "kl_loss_7": 185.41107177734375, |
| "learning_rate": 0.0002779666936971129, |
| "loss": 530.5015, |
| "step": 6500 |
| }, |
| { |
| "ce_loss_10": 3.6747244358062745, |
| "ce_loss_13": 3.6157574892044066, |
| "ce_loss_2": 4.147137761116028, |
| "ce_loss_3": 3.9802316427230835, |
| "ce_loss_7": 3.7200183868408203, |
| "epoch": 0.651, |
| "grad_norm": 388.0, |
| "kl_loss_10": 96.378706741333, |
| "kl_loss_2": 1104.2031311035157, |
| "kl_loss_3": 768.3699279785156, |
| "kl_loss_7": 190.13947677612305, |
| "learning_rate": 0.00027654617388876614, |
| "loss": 540.9622, |
| "step": 6510 |
| }, |
| { |
| "ce_loss_10": 3.7085010170936585, |
| "ce_loss_13": 3.650082528591156, |
| "ce_loss_2": 4.159732723236084, |
| "ce_loss_3": 3.9939939975738525, |
| "ce_loss_7": 3.752064514160156, |
| "epoch": 0.652, |
| "grad_norm": 372.0, |
| "kl_loss_10": 98.8690299987793, |
| "kl_loss_2": 1084.27646484375, |
| "kl_loss_3": 749.1016296386719, |
| "kl_loss_7": 189.19281463623048, |
| "learning_rate": 0.0002751279042579672, |
| "loss": 533.7532, |
| "step": 6520 |
| }, |
| { |
| "ce_loss_10": 3.6514885902404783, |
| "ce_loss_13": 3.589630663394928, |
| "ce_loss_2": 4.104155695438385, |
| "ce_loss_3": 3.9368098855018614, |
| "ce_loss_7": 3.696379566192627, |
| "epoch": 0.653, |
| "grad_norm": 388.0, |
| "kl_loss_10": 98.10863304138184, |
| "kl_loss_2": 1078.5175903320312, |
| "kl_loss_3": 739.8918975830078, |
| "kl_loss_7": 187.05665588378906, |
| "learning_rate": 0.00027371189908667604, |
| "loss": 535.8568, |
| "step": 6530 |
| }, |
| { |
| "ce_loss_10": 3.6950425028800966, |
| "ce_loss_13": 3.6345377445220945, |
| "ce_loss_2": 4.172570693492889, |
| "ce_loss_3": 4.002642476558686, |
| "ce_loss_7": 3.742088866233826, |
| "epoch": 0.654, |
| "grad_norm": 456.0, |
| "kl_loss_10": 98.50621490478515, |
| "kl_loss_2": 1120.8493408203126, |
| "kl_loss_3": 772.4739196777343, |
| "kl_loss_7": 194.52065811157226, |
| "learning_rate": 0.00027229817263404863, |
| "loss": 550.1683, |
| "step": 6540 |
| }, |
| { |
| "ce_loss_10": 3.678051483631134, |
| "ce_loss_13": 3.6163152933120726, |
| "ce_loss_2": 4.125236618518829, |
| "ce_loss_3": 3.9632533311843874, |
| "ce_loss_7": 3.717917835712433, |
| "epoch": 0.655, |
| "grad_norm": 354.0, |
| "kl_loss_10": 97.52188301086426, |
| "kl_loss_2": 1072.0729919433593, |
| "kl_loss_3": 745.5059295654297, |
| "kl_loss_7": 187.41375122070312, |
| "learning_rate": 0.0002708867391362948, |
| "loss": 530.4727, |
| "step": 6550 |
| }, |
| { |
| "ce_loss_10": 3.659157025814056, |
| "ce_loss_13": 3.5987429141998293, |
| "ce_loss_2": 4.098348212242127, |
| "ce_loss_3": 3.9343943357467652, |
| "ce_loss_7": 3.69932336807251, |
| "epoch": 0.656, |
| "grad_norm": 380.0, |
| "kl_loss_10": 95.51490859985351, |
| "kl_loss_2": 1048.09501953125, |
| "kl_loss_3": 723.2193145751953, |
| "kl_loss_7": 183.38801651000978, |
| "learning_rate": 0.0002694776128065345, |
| "loss": 526.4233, |
| "step": 6560 |
| }, |
| { |
| "ce_loss_10": 3.5926573395729067, |
| "ce_loss_13": 3.5355629920959473, |
| "ce_loss_2": 4.059596955776215, |
| "ce_loss_3": 3.8947146415710447, |
| "ce_loss_7": 3.63899849653244, |
| "epoch": 0.657, |
| "grad_norm": 302.0, |
| "kl_loss_10": 94.25321388244629, |
| "kl_loss_2": 1108.046826171875, |
| "kl_loss_3": 769.1714508056641, |
| "kl_loss_7": 190.54062194824218, |
| "learning_rate": 0.00026807080783465374, |
| "loss": 532.2117, |
| "step": 6570 |
| }, |
| { |
| "ce_loss_10": 3.7099499464035035, |
| "ce_loss_13": 3.6470829010009767, |
| "ce_loss_2": 4.173487448692322, |
| "ce_loss_3": 4.007464277744293, |
| "ce_loss_7": 3.753613090515137, |
| "epoch": 0.658, |
| "grad_norm": 336.0, |
| "kl_loss_10": 98.83243751525879, |
| "kl_loss_2": 1096.7148071289062, |
| "kl_loss_3": 763.6604827880859, |
| "kl_loss_7": 191.30890121459962, |
| "learning_rate": 0.00026666633838716316, |
| "loss": 542.1623, |
| "step": 6580 |
| }, |
| { |
| "ce_loss_10": 3.597714030742645, |
| "ce_loss_13": 3.5341309905052185, |
| "ce_loss_2": 4.0741772770881655, |
| "ce_loss_3": 3.9031991958618164, |
| "ce_loss_7": 3.64434130191803, |
| "epoch": 0.659, |
| "grad_norm": 418.0, |
| "kl_loss_10": 98.79775390625, |
| "kl_loss_2": 1119.104165649414, |
| "kl_loss_3": 772.7665252685547, |
| "kl_loss_7": 193.75399169921874, |
| "learning_rate": 0.00026526421860705474, |
| "loss": 546.4087, |
| "step": 6590 |
| }, |
| { |
| "ce_loss_10": 3.6211095809936524, |
| "ce_loss_13": 3.56248060464859, |
| "ce_loss_2": 4.090437388420105, |
| "ce_loss_3": 3.9254501700401305, |
| "ce_loss_7": 3.669628012180328, |
| "epoch": 0.66, |
| "grad_norm": 388.0, |
| "kl_loss_10": 97.33003234863281, |
| "kl_loss_2": 1100.579428100586, |
| "kl_loss_3": 767.1163055419922, |
| "kl_loss_7": 192.85016250610352, |
| "learning_rate": 0.0002638644626136587, |
| "loss": 535.0932, |
| "step": 6600 |
| }, |
| { |
| "ce_loss_10": 3.632294547557831, |
| "ce_loss_13": 3.5736007690429688, |
| "ce_loss_2": 4.098874115943909, |
| "ce_loss_3": 3.928848695755005, |
| "ce_loss_7": 3.6751357674598695, |
| "epoch": 0.661, |
| "grad_norm": 370.0, |
| "kl_loss_10": 95.11613578796387, |
| "kl_loss_2": 1096.4229095458984, |
| "kl_loss_3": 759.0542449951172, |
| "kl_loss_7": 188.92064208984374, |
| "learning_rate": 0.00026246708450250255, |
| "loss": 537.9207, |
| "step": 6610 |
| }, |
| { |
| "ce_loss_10": 3.6327243566513063, |
| "ce_loss_13": 3.5709309697151186, |
| "ce_loss_2": 4.086973357200622, |
| "ce_loss_3": 3.9239420771598814, |
| "ce_loss_7": 3.675078272819519, |
| "epoch": 0.662, |
| "grad_norm": 450.0, |
| "kl_loss_10": 97.06436119079589, |
| "kl_loss_2": 1079.41337890625, |
| "kl_loss_3": 752.72802734375, |
| "kl_loss_7": 187.51063842773436, |
| "learning_rate": 0.00026107209834516854, |
| "loss": 531.8906, |
| "step": 6620 |
| }, |
| { |
| "ce_loss_10": 3.5740899324417112, |
| "ce_loss_13": 3.5152911067008974, |
| "ce_loss_2": 4.057041144371032, |
| "ce_loss_3": 3.8850310802459718, |
| "ce_loss_7": 3.6205747365951537, |
| "epoch": 0.663, |
| "grad_norm": 326.0, |
| "kl_loss_10": 95.74808731079102, |
| "kl_loss_2": 1136.7873779296874, |
| "kl_loss_3": 780.0463623046875, |
| "kl_loss_7": 190.15955352783203, |
| "learning_rate": 0.0002596795181891514, |
| "loss": 547.2686, |
| "step": 6630 |
| }, |
| { |
| "ce_loss_10": 3.5901227831840514, |
| "ce_loss_13": 3.527127909660339, |
| "ce_loss_2": 4.062633895874024, |
| "ce_loss_3": 3.8958073616027833, |
| "ce_loss_7": 3.63388534784317, |
| "epoch": 0.664, |
| "grad_norm": 488.0, |
| "kl_loss_10": 97.48413009643555, |
| "kl_loss_2": 1119.4189453125, |
| "kl_loss_3": 774.4207427978515, |
| "kl_loss_7": 193.8588966369629, |
| "learning_rate": 0.000258289358057718, |
| "loss": 556.5954, |
| "step": 6640 |
| }, |
| { |
| "ce_loss_10": 3.6630045056343077, |
| "ce_loss_13": 3.6010705709457396, |
| "ce_loss_2": 4.126548099517822, |
| "ce_loss_3": 3.960009717941284, |
| "ce_loss_7": 3.70961674451828, |
| "epoch": 0.665, |
| "grad_norm": 368.0, |
| "kl_loss_10": 97.2126693725586, |
| "kl_loss_2": 1116.2655120849608, |
| "kl_loss_3": 770.7855743408203, |
| "kl_loss_7": 193.7609016418457, |
| "learning_rate": 0.0002569016319497657, |
| "loss": 544.2068, |
| "step": 6650 |
| }, |
| { |
| "ce_loss_10": 3.645352327823639, |
| "ce_loss_13": 3.582920753955841, |
| "ce_loss_2": 4.116545259952545, |
| "ce_loss_3": 3.9502077460289002, |
| "ce_loss_7": 3.6899593830108643, |
| "epoch": 0.666, |
| "grad_norm": 324.0, |
| "kl_loss_10": 98.58149223327636, |
| "kl_loss_2": 1127.1539520263673, |
| "kl_loss_3": 778.5697784423828, |
| "kl_loss_7": 194.4781005859375, |
| "learning_rate": 0.00025551635383968066, |
| "loss": 551.8321, |
| "step": 6660 |
| }, |
| { |
| "ce_loss_10": 3.5590095281600953, |
| "ce_loss_13": 3.497633898258209, |
| "ce_loss_2": 4.0256366491317745, |
| "ce_loss_3": 3.8563454031944273, |
| "ce_loss_7": 3.6033952236175537, |
| "epoch": 0.667, |
| "grad_norm": 386.0, |
| "kl_loss_10": 96.00436630249024, |
| "kl_loss_2": 1115.5439819335938, |
| "kl_loss_3": 764.8407897949219, |
| "kl_loss_7": 191.15278091430665, |
| "learning_rate": 0.00025413353767719804, |
| "loss": 541.5643, |
| "step": 6670 |
| }, |
| { |
| "ce_loss_10": 3.6135716080665587, |
| "ce_loss_13": 3.556279420852661, |
| "ce_loss_2": 4.074564230442047, |
| "ce_loss_3": 3.9083084225654603, |
| "ce_loss_7": 3.6589901089668273, |
| "epoch": 0.668, |
| "grad_norm": 404.0, |
| "kl_loss_10": 95.40520133972169, |
| "kl_loss_2": 1103.0668395996095, |
| "kl_loss_3": 766.21494140625, |
| "kl_loss_7": 187.07973251342773, |
| "learning_rate": 0.0002527531973872617, |
| "loss": 541.5821, |
| "step": 6680 |
| }, |
| { |
| "ce_loss_10": 3.630588722229004, |
| "ce_loss_13": 3.5716015577316282, |
| "ce_loss_2": 4.09862619638443, |
| "ce_loss_3": 3.9337419509887694, |
| "ce_loss_7": 3.6740004658699035, |
| "epoch": 0.669, |
| "grad_norm": 376.0, |
| "kl_loss_10": 94.05056571960449, |
| "kl_loss_2": 1104.580502319336, |
| "kl_loss_3": 767.1347503662109, |
| "kl_loss_7": 187.80085144042968, |
| "learning_rate": 0.0002513753468698826, |
| "loss": 536.7451, |
| "step": 6690 |
| }, |
| { |
| "ce_loss_10": 3.6005271077156067, |
| "ce_loss_13": 3.538683819770813, |
| "ce_loss_2": 4.075844824314117, |
| "ce_loss_3": 3.901875948905945, |
| "ce_loss_7": 3.6449614763259888, |
| "epoch": 0.67, |
| "grad_norm": 392.0, |
| "kl_loss_10": 97.46344718933105, |
| "kl_loss_2": 1117.6306915283203, |
| "kl_loss_3": 769.393521118164, |
| "kl_loss_7": 191.83680877685546, |
| "learning_rate": 0.0002500000000000001, |
| "loss": 543.8447, |
| "step": 6700 |
| }, |
| { |
| "ce_loss_10": 3.7194844245910645, |
| "ce_loss_13": 3.6591498017311097, |
| "ce_loss_2": 4.157877945899964, |
| "ce_loss_3": 3.9965709686279296, |
| "ce_loss_7": 3.7608611464500425, |
| "epoch": 0.671, |
| "grad_norm": 388.0, |
| "kl_loss_10": 96.12382774353027, |
| "kl_loss_2": 1059.211587524414, |
| "kl_loss_3": 732.8135711669922, |
| "kl_loss_7": 185.53207092285157, |
| "learning_rate": 0.0002486271706273421, |
| "loss": 540.9632, |
| "step": 6710 |
| }, |
| { |
| "ce_loss_10": 3.652998185157776, |
| "ce_loss_13": 3.5960669040679933, |
| "ce_loss_2": 4.096874964237213, |
| "ce_loss_3": 3.930626368522644, |
| "ce_loss_7": 3.694219136238098, |
| "epoch": 0.672, |
| "grad_norm": 370.0, |
| "kl_loss_10": 96.1414752960205, |
| "kl_loss_2": 1060.9839447021484, |
| "kl_loss_3": 732.6356231689454, |
| "kl_loss_7": 184.73310241699218, |
| "learning_rate": 0.0002472568725762853, |
| "loss": 531.8145, |
| "step": 6720 |
| }, |
| { |
| "ce_loss_10": 3.644508719444275, |
| "ce_loss_13": 3.585316574573517, |
| "ce_loss_2": 4.077662718296051, |
| "ce_loss_3": 3.923126482963562, |
| "ce_loss_7": 3.6880379915237427, |
| "epoch": 0.673, |
| "grad_norm": 536.0, |
| "kl_loss_10": 95.44480400085449, |
| "kl_loss_2": 1060.1810028076172, |
| "kl_loss_3": 734.1040588378906, |
| "kl_loss_7": 183.89718780517578, |
| "learning_rate": 0.00024588911964571554, |
| "loss": 524.9737, |
| "step": 6730 |
| }, |
| { |
| "ce_loss_10": 3.6595176219940186, |
| "ce_loss_13": 3.5960793495178223, |
| "ce_loss_2": 4.141416406631469, |
| "ce_loss_3": 3.971626269817352, |
| "ce_loss_7": 3.706479799747467, |
| "epoch": 0.674, |
| "grad_norm": 370.0, |
| "kl_loss_10": 101.08820152282715, |
| "kl_loss_2": 1123.6421142578124, |
| "kl_loss_3": 779.8745697021484, |
| "kl_loss_7": 196.79359664916993, |
| "learning_rate": 0.00024452392560888974, |
| "loss": 538.6094, |
| "step": 6740 |
| }, |
| { |
| "ce_loss_10": 3.5484472513198853, |
| "ce_loss_13": 3.4903222799301146, |
| "ce_loss_2": 4.00926810503006, |
| "ce_loss_3": 3.837252104282379, |
| "ce_loss_7": 3.5929391860961912, |
| "epoch": 0.675, |
| "grad_norm": 376.0, |
| "kl_loss_10": 94.44077377319336, |
| "kl_loss_2": 1104.6140991210937, |
| "kl_loss_3": 759.8463775634766, |
| "kl_loss_7": 187.49753799438477, |
| "learning_rate": 0.00024316130421329695, |
| "loss": 531.6798, |
| "step": 6750 |
| }, |
| { |
| "ce_loss_10": 3.63141074180603, |
| "ce_loss_13": 3.5704286813735964, |
| "ce_loss_2": 4.089890336990356, |
| "ce_loss_3": 3.9222849130630495, |
| "ce_loss_7": 3.6722644567489624, |
| "epoch": 0.676, |
| "grad_norm": 320.0, |
| "kl_loss_10": 96.4859691619873, |
| "kl_loss_2": 1072.7287811279298, |
| "kl_loss_3": 740.4257781982421, |
| "kl_loss_7": 185.37494659423828, |
| "learning_rate": 0.00024180126918051909, |
| "loss": 528.9844, |
| "step": 6760 |
| }, |
| { |
| "ce_loss_10": 3.6748690009117126, |
| "ce_loss_13": 3.6154377579689028, |
| "ce_loss_2": 4.126313555240631, |
| "ce_loss_3": 3.959956741333008, |
| "ce_loss_7": 3.719127857685089, |
| "epoch": 0.677, |
| "grad_norm": 494.0, |
| "kl_loss_10": 95.71767883300781, |
| "kl_loss_2": 1071.3604461669922, |
| "kl_loss_3": 739.3463531494141, |
| "kl_loss_7": 186.98586730957032, |
| "learning_rate": 0.00024044383420609406, |
| "loss": 526.4402, |
| "step": 6770 |
| }, |
| { |
| "ce_loss_10": 3.6849735140800477, |
| "ce_loss_13": 3.6251555919647216, |
| "ce_loss_2": 4.126254045963288, |
| "ce_loss_3": 3.9655120730400086, |
| "ce_loss_7": 3.7277087569236755, |
| "epoch": 0.678, |
| "grad_norm": 406.0, |
| "kl_loss_10": 96.21127319335938, |
| "kl_loss_2": 1065.4650268554688, |
| "kl_loss_3": 737.1611053466797, |
| "kl_loss_7": 186.31879425048828, |
| "learning_rate": 0.00023908901295937712, |
| "loss": 532.375, |
| "step": 6780 |
| }, |
| { |
| "ce_loss_10": 3.6866431832313538, |
| "ce_loss_13": 3.621911180019379, |
| "ce_loss_2": 4.138471448421479, |
| "ce_loss_3": 3.9692311763763426, |
| "ce_loss_7": 3.727970468997955, |
| "epoch": 0.679, |
| "grad_norm": 520.0, |
| "kl_loss_10": 97.46222076416015, |
| "kl_loss_2": 1075.2411163330078, |
| "kl_loss_3": 742.8502899169922, |
| "kl_loss_7": 187.16495361328126, |
| "learning_rate": 0.00023773681908340283, |
| "loss": 541.7315, |
| "step": 6790 |
| }, |
| { |
| "ce_loss_10": 3.6525588750839235, |
| "ce_loss_13": 3.590035092830658, |
| "ce_loss_2": 4.125091111660003, |
| "ce_loss_3": 3.955258107185364, |
| "ce_loss_7": 3.6996394038200378, |
| "epoch": 0.68, |
| "grad_norm": 448.0, |
| "kl_loss_10": 100.11968383789062, |
| "kl_loss_2": 1120.372329711914, |
| "kl_loss_3": 775.7205535888672, |
| "kl_loss_7": 195.07009201049806, |
| "learning_rate": 0.00023638726619474876, |
| "loss": 550.8879, |
| "step": 6800 |
| }, |
| { |
| "ce_loss_10": 3.6433764457702638, |
| "ce_loss_13": 3.581800138950348, |
| "ce_loss_2": 4.1252215027809145, |
| "ce_loss_3": 3.95204918384552, |
| "ce_loss_7": 3.68941251039505, |
| "epoch": 0.681, |
| "grad_norm": 380.0, |
| "kl_loss_10": 94.89226531982422, |
| "kl_loss_2": 1121.6464782714843, |
| "kl_loss_3": 776.2536529541015, |
| "kl_loss_7": 190.19580459594727, |
| "learning_rate": 0.0002350403678833976, |
| "loss": 540.7707, |
| "step": 6810 |
| }, |
| { |
| "ce_loss_10": 3.5702003121376036, |
| "ce_loss_13": 3.509978950023651, |
| "ce_loss_2": 4.041775238513947, |
| "ce_loss_3": 3.871393322944641, |
| "ce_loss_7": 3.6151094794273377, |
| "epoch": 0.682, |
| "grad_norm": 316.0, |
| "kl_loss_10": 94.982954788208, |
| "kl_loss_2": 1118.5872802734375, |
| "kl_loss_3": 772.0714935302734, |
| "kl_loss_7": 188.55085983276368, |
| "learning_rate": 0.00023369613771260007, |
| "loss": 536.8643, |
| "step": 6820 |
| }, |
| { |
| "ce_loss_10": 3.688840866088867, |
| "ce_loss_13": 3.6270360946655273, |
| "ce_loss_2": 4.156035900115967, |
| "ce_loss_3": 3.9860677838325502, |
| "ce_loss_7": 3.7326239466667177, |
| "epoch": 0.683, |
| "grad_norm": 410.0, |
| "kl_loss_10": 97.82878112792969, |
| "kl_loss_2": 1106.3897888183594, |
| "kl_loss_3": 766.803921508789, |
| "kl_loss_7": 191.37064056396486, |
| "learning_rate": 0.00023235458921873925, |
| "loss": 544.207, |
| "step": 6830 |
| }, |
| { |
| "ce_loss_10": 3.63765789270401, |
| "ce_loss_13": 3.5765843272209166, |
| "ce_loss_2": 4.12269172668457, |
| "ce_loss_3": 3.953417754173279, |
| "ce_loss_7": 3.6850703358650208, |
| "epoch": 0.684, |
| "grad_norm": 676.0, |
| "kl_loss_10": 97.75669631958007, |
| "kl_loss_2": 1147.8291046142579, |
| "kl_loss_3": 799.1194305419922, |
| "kl_loss_7": 195.58543319702147, |
| "learning_rate": 0.0002310157359111938, |
| "loss": 555.1555, |
| "step": 6840 |
| }, |
| { |
| "ce_loss_10": 3.526192367076874, |
| "ce_loss_13": 3.4662320494651793, |
| "ce_loss_2": 4.027907514572144, |
| "ce_loss_3": 3.8482834458351136, |
| "ce_loss_7": 3.574409317970276, |
| "epoch": 0.685, |
| "grad_norm": 660.0, |
| "kl_loss_10": 96.51494178771972, |
| "kl_loss_2": 1163.1898101806642, |
| "kl_loss_3": 802.0491363525391, |
| "kl_loss_7": 194.50169296264647, |
| "learning_rate": 0.0002296795912722014, |
| "loss": 551.9703, |
| "step": 6850 |
| }, |
| { |
| "ce_loss_10": 3.6707953572273255, |
| "ce_loss_13": 3.6116589188575743, |
| "ce_loss_2": 4.125709581375122, |
| "ce_loss_3": 3.957431602478027, |
| "ce_loss_7": 3.716504216194153, |
| "epoch": 0.686, |
| "grad_norm": 328.0, |
| "kl_loss_10": 96.6977554321289, |
| "kl_loss_2": 1086.6772430419921, |
| "kl_loss_3": 747.0762786865234, |
| "kl_loss_7": 188.86367645263672, |
| "learning_rate": 0.0002283461687567236, |
| "loss": 527.8294, |
| "step": 6860 |
| }, |
| { |
| "ce_loss_10": 3.727082335948944, |
| "ce_loss_13": 3.664930725097656, |
| "ce_loss_2": 4.172837960720062, |
| "ce_loss_3": 4.010821652412415, |
| "ce_loss_7": 3.7691392421722414, |
| "epoch": 0.687, |
| "grad_norm": 334.0, |
| "kl_loss_10": 97.53575859069824, |
| "kl_loss_2": 1058.4923736572266, |
| "kl_loss_3": 731.6483947753907, |
| "kl_loss_7": 186.02228698730468, |
| "learning_rate": 0.00022701548179231045, |
| "loss": 535.9072, |
| "step": 6870 |
| }, |
| { |
| "ce_loss_10": 3.6793978810310364, |
| "ce_loss_13": 3.6168754935264587, |
| "ce_loss_2": 4.133899199962616, |
| "ce_loss_3": 3.9700045347213746, |
| "ce_loss_7": 3.7239136338233947, |
| "epoch": 0.688, |
| "grad_norm": 382.0, |
| "kl_loss_10": 98.03768157958984, |
| "kl_loss_2": 1087.3397521972656, |
| "kl_loss_3": 753.5451507568359, |
| "kl_loss_7": 189.21656646728516, |
| "learning_rate": 0.00022568754377896516, |
| "loss": 530.6016, |
| "step": 6880 |
| }, |
| { |
| "ce_loss_10": 3.669530212879181, |
| "ce_loss_13": 3.611078381538391, |
| "ce_loss_2": 4.122839629650116, |
| "ce_loss_3": 3.9565317392349244, |
| "ce_loss_7": 3.7144492745399473, |
| "epoch": 0.689, |
| "grad_norm": 482.0, |
| "kl_loss_10": 93.94465446472168, |
| "kl_loss_2": 1092.5764556884765, |
| "kl_loss_3": 757.9043579101562, |
| "kl_loss_7": 189.06216201782226, |
| "learning_rate": 0.00022436236808900844, |
| "loss": 532.0287, |
| "step": 6890 |
| }, |
| { |
| "ce_loss_10": 3.563220775127411, |
| "ce_loss_13": 3.505044734477997, |
| "ce_loss_2": 4.028258430957794, |
| "ce_loss_3": 3.860454273223877, |
| "ce_loss_7": 3.6083375453948974, |
| "epoch": 0.69, |
| "grad_norm": 402.0, |
| "kl_loss_10": 95.30224533081055, |
| "kl_loss_2": 1114.9274475097657, |
| "kl_loss_3": 768.1644836425781, |
| "kl_loss_7": 189.04213485717773, |
| "learning_rate": 0.00022303996806694487, |
| "loss": 534.7889, |
| "step": 6900 |
| }, |
| { |
| "ce_loss_10": 3.646312749385834, |
| "ce_loss_13": 3.5865816950798033, |
| "ce_loss_2": 4.1086891174316404, |
| "ce_loss_3": 3.9399857401847838, |
| "ce_loss_7": 3.69192236661911, |
| "epoch": 0.691, |
| "grad_norm": 392.0, |
| "kl_loss_10": 95.77762832641602, |
| "kl_loss_2": 1094.2582000732423, |
| "kl_loss_3": 756.6770172119141, |
| "kl_loss_7": 187.92616500854493, |
| "learning_rate": 0.00022172035702932823, |
| "loss": 534.246, |
| "step": 6910 |
| }, |
| { |
| "ce_loss_10": 3.685254919528961, |
| "ce_loss_13": 3.6261175990104677, |
| "ce_loss_2": 4.142215931415558, |
| "ce_loss_3": 3.9721115231513977, |
| "ce_loss_7": 3.7271186470985413, |
| "epoch": 0.692, |
| "grad_norm": 430.0, |
| "kl_loss_10": 94.89179420471191, |
| "kl_loss_2": 1075.7997589111328, |
| "kl_loss_3": 742.8703857421875, |
| "kl_loss_7": 186.23582077026367, |
| "learning_rate": 0.00022040354826462666, |
| "loss": 530.2491, |
| "step": 6920 |
| }, |
| { |
| "ce_loss_10": 3.62452495098114, |
| "ce_loss_13": 3.563087892532349, |
| "ce_loss_2": 4.079807507991791, |
| "ce_loss_3": 3.913197338581085, |
| "ce_loss_7": 3.6697877049446106, |
| "epoch": 0.693, |
| "grad_norm": 410.0, |
| "kl_loss_10": 96.51725845336914, |
| "kl_loss_2": 1085.478707885742, |
| "kl_loss_3": 750.6873352050782, |
| "kl_loss_7": 187.0568748474121, |
| "learning_rate": 0.0002190895550330899, |
| "loss": 535.6979, |
| "step": 6930 |
| }, |
| { |
| "ce_loss_10": 3.547420835494995, |
| "ce_loss_13": 3.488833248615265, |
| "ce_loss_2": 4.036989772319794, |
| "ce_loss_3": 3.85961799621582, |
| "ce_loss_7": 3.598125493526459, |
| "epoch": 0.694, |
| "grad_norm": 406.0, |
| "kl_loss_10": 96.3628433227539, |
| "kl_loss_2": 1128.3786254882812, |
| "kl_loss_3": 778.4836669921875, |
| "kl_loss_7": 192.30058898925782, |
| "learning_rate": 0.00021777839056661552, |
| "loss": 534.9962, |
| "step": 6940 |
| }, |
| { |
| "ce_loss_10": 3.636169970035553, |
| "ce_loss_13": 3.576909136772156, |
| "ce_loss_2": 4.093018388748169, |
| "ce_loss_3": 3.9319678425788878, |
| "ce_loss_7": 3.682026994228363, |
| "epoch": 0.695, |
| "grad_norm": 380.0, |
| "kl_loss_10": 95.15358619689941, |
| "kl_loss_2": 1086.1379272460938, |
| "kl_loss_3": 753.724154663086, |
| "kl_loss_7": 185.95790100097656, |
| "learning_rate": 0.0002164700680686147, |
| "loss": 526.2859, |
| "step": 6950 |
| }, |
| { |
| "ce_loss_10": 3.6809890270233154, |
| "ce_loss_13": 3.6225372910499574, |
| "ce_loss_2": 4.135282206535339, |
| "ce_loss_3": 3.9695199608802794, |
| "ce_loss_7": 3.7249368906021116, |
| "epoch": 0.696, |
| "grad_norm": 400.0, |
| "kl_loss_10": 96.4394718170166, |
| "kl_loss_2": 1074.3540649414062, |
| "kl_loss_3": 743.0920288085938, |
| "kl_loss_7": 188.12129898071288, |
| "learning_rate": 0.0002151646007138806, |
| "loss": 527.0223, |
| "step": 6960 |
| }, |
| { |
| "ce_loss_10": 3.55483934879303, |
| "ce_loss_13": 3.493414306640625, |
| "ce_loss_2": 4.029416286945343, |
| "ce_loss_3": 3.8593334913253785, |
| "ce_loss_7": 3.5997050285339354, |
| "epoch": 0.697, |
| "grad_norm": 324.0, |
| "kl_loss_10": 98.1744327545166, |
| "kl_loss_2": 1119.6888793945313, |
| "kl_loss_3": 776.6419464111328, |
| "kl_loss_7": 191.90652236938476, |
| "learning_rate": 0.00021386200164845526, |
| "loss": 540.4315, |
| "step": 6970 |
| }, |
| { |
| "ce_loss_10": 3.7494669914245606, |
| "ce_loss_13": 3.6868221879005434, |
| "ce_loss_2": 4.180894982814789, |
| "ce_loss_3": 4.02219043970108, |
| "ce_loss_7": 3.790766155719757, |
| "epoch": 0.698, |
| "grad_norm": 386.0, |
| "kl_loss_10": 98.89772605895996, |
| "kl_loss_2": 1061.9671813964844, |
| "kl_loss_3": 736.8194549560546, |
| "kl_loss_7": 189.14059829711914, |
| "learning_rate": 0.0002125622839894964, |
| "loss": 526.3207, |
| "step": 6980 |
| }, |
| { |
| "ce_loss_10": 3.6859158158302305, |
| "ce_loss_13": 3.626417076587677, |
| "ce_loss_2": 4.136348474025726, |
| "ce_loss_3": 3.974168133735657, |
| "ce_loss_7": 3.7279628992080687, |
| "epoch": 0.699, |
| "grad_norm": 406.0, |
| "kl_loss_10": 97.57818336486817, |
| "kl_loss_2": 1081.921697998047, |
| "kl_loss_3": 746.1339630126953, |
| "kl_loss_7": 188.19551315307618, |
| "learning_rate": 0.00021126546082514663, |
| "loss": 529.5254, |
| "step": 6990 |
| }, |
| { |
| "ce_loss_10": 3.704355037212372, |
| "ce_loss_13": 3.643582081794739, |
| "ce_loss_2": 4.151243126392364, |
| "ce_loss_3": 3.9851069808006288, |
| "ce_loss_7": 3.747806203365326, |
| "epoch": 0.7, |
| "grad_norm": 394.0, |
| "kl_loss_10": 97.80472221374512, |
| "kl_loss_2": 1074.9452331542968, |
| "kl_loss_3": 745.385775756836, |
| "kl_loss_7": 188.936759185791, |
| "learning_rate": 0.00020997154521440098, |
| "loss": 526.4211, |
| "step": 7000 |
| }, |
| { |
| "ce_loss_10": 3.6455201506614685, |
| "ce_loss_13": 3.586948239803314, |
| "ce_loss_2": 4.104578590393066, |
| "ce_loss_3": 3.9375877380371094, |
| "ce_loss_7": 3.68754506111145, |
| "epoch": 0.701, |
| "grad_norm": 322.0, |
| "kl_loss_10": 93.82002601623535, |
| "kl_loss_2": 1085.8826141357422, |
| "kl_loss_3": 746.0692993164063, |
| "kl_loss_7": 184.4355583190918, |
| "learning_rate": 0.0002086805501869749, |
| "loss": 524.1356, |
| "step": 7010 |
| }, |
| { |
| "ce_loss_10": 3.6133246064186095, |
| "ce_loss_13": 3.554938244819641, |
| "ce_loss_2": 4.0853543996810915, |
| "ce_loss_3": 3.918412721157074, |
| "ce_loss_7": 3.6612335562705995, |
| "epoch": 0.702, |
| "grad_norm": 398.0, |
| "kl_loss_10": 95.29999237060547, |
| "kl_loss_2": 1131.5339111328126, |
| "kl_loss_3": 781.2637298583984, |
| "kl_loss_7": 192.70318984985352, |
| "learning_rate": 0.0002073924887431744, |
| "loss": 542.1648, |
| "step": 7020 |
| }, |
| { |
| "ce_loss_10": 3.619812881946564, |
| "ce_loss_13": 3.561210036277771, |
| "ce_loss_2": 4.088810133934021, |
| "ce_loss_3": 3.9195892930030825, |
| "ce_loss_7": 3.667060124874115, |
| "epoch": 0.703, |
| "grad_norm": 396.0, |
| "kl_loss_10": 95.14918022155761, |
| "kl_loss_2": 1112.4185638427734, |
| "kl_loss_3": 769.590234375, |
| "kl_loss_7": 188.17913894653321, |
| "learning_rate": 0.00020610737385376348, |
| "loss": 545.7339, |
| "step": 7030 |
| }, |
| { |
| "ce_loss_10": 3.689952182769775, |
| "ce_loss_13": 3.629777657985687, |
| "ce_loss_2": 4.127048587799072, |
| "ce_loss_3": 3.9679968118667603, |
| "ce_loss_7": 3.7309682607650756, |
| "epoch": 0.704, |
| "grad_norm": 480.0, |
| "kl_loss_10": 96.72987632751465, |
| "kl_loss_2": 1060.028268432617, |
| "kl_loss_3": 736.1820068359375, |
| "kl_loss_7": 185.3560775756836, |
| "learning_rate": 0.00020482521845983521, |
| "loss": 531.1421, |
| "step": 7040 |
| }, |
| { |
| "ce_loss_10": 3.681384038925171, |
| "ce_loss_13": 3.6203475475311278, |
| "ce_loss_2": 4.1394176363945006, |
| "ce_loss_3": 3.9727881073951723, |
| "ce_loss_7": 3.725051200389862, |
| "epoch": 0.705, |
| "grad_norm": 482.0, |
| "kl_loss_10": 100.69121513366699, |
| "kl_loss_2": 1089.9848724365233, |
| "kl_loss_3": 754.3679351806641, |
| "kl_loss_7": 192.38913803100587, |
| "learning_rate": 0.00020354603547267987, |
| "loss": 542.1912, |
| "step": 7050 |
| }, |
| { |
| "ce_loss_10": 3.667348313331604, |
| "ce_loss_13": 3.605680251121521, |
| "ce_loss_2": 4.1402019739151, |
| "ce_loss_3": 3.971468675136566, |
| "ce_loss_7": 3.712887394428253, |
| "epoch": 0.706, |
| "grad_norm": 364.0, |
| "kl_loss_10": 97.05326614379882, |
| "kl_loss_2": 1105.346597290039, |
| "kl_loss_3": 773.4686828613281, |
| "kl_loss_7": 191.13608169555664, |
| "learning_rate": 0.00020226983777365604, |
| "loss": 548.4642, |
| "step": 7060 |
| }, |
| { |
| "ce_loss_10": 3.563067603111267, |
| "ce_loss_13": 3.504194128513336, |
| "ce_loss_2": 4.040568280220032, |
| "ce_loss_3": 3.8677730679512026, |
| "ce_loss_7": 3.6067102789878844, |
| "epoch": 0.707, |
| "grad_norm": 338.0, |
| "kl_loss_10": 92.14009590148926, |
| "kl_loss_2": 1122.7782775878907, |
| "kl_loss_3": 765.3169036865235, |
| "kl_loss_7": 183.86895446777345, |
| "learning_rate": 0.00020099663821406056, |
| "loss": 534.7408, |
| "step": 7070 |
| }, |
| { |
| "ce_loss_10": 3.669863748550415, |
| "ce_loss_13": 3.6097553610801696, |
| "ce_loss_2": 4.117836952209473, |
| "ce_loss_3": 3.955933165550232, |
| "ce_loss_7": 3.7124067664146425, |
| "epoch": 0.708, |
| "grad_norm": 528.0, |
| "kl_loss_10": 95.14625968933106, |
| "kl_loss_2": 1064.9939758300782, |
| "kl_loss_3": 737.6582244873047, |
| "kl_loss_7": 184.71611633300782, |
| "learning_rate": 0.00019972644961499853, |
| "loss": 531.3339, |
| "step": 7080 |
| }, |
| { |
| "ce_loss_10": 3.635360848903656, |
| "ce_loss_13": 3.575283741950989, |
| "ce_loss_2": 4.107546412944794, |
| "ce_loss_3": 3.9376320004463197, |
| "ce_loss_7": 3.6813616275787355, |
| "epoch": 0.709, |
| "grad_norm": 454.0, |
| "kl_loss_10": 95.76384582519532, |
| "kl_loss_2": 1112.5157043457032, |
| "kl_loss_3": 768.9019195556641, |
| "kl_loss_7": 190.37624435424806, |
| "learning_rate": 0.00019845928476725522, |
| "loss": 537.9877, |
| "step": 7090 |
| }, |
| { |
| "ce_loss_10": 3.7167228937149046, |
| "ce_loss_13": 3.654716455936432, |
| "ce_loss_2": 4.171470665931702, |
| "ce_loss_3": 4.006917369365692, |
| "ce_loss_7": 3.763367462158203, |
| "epoch": 0.71, |
| "grad_norm": 402.0, |
| "kl_loss_10": 97.96182098388672, |
| "kl_loss_2": 1088.9804382324219, |
| "kl_loss_3": 752.4143249511719, |
| "kl_loss_7": 190.0522773742676, |
| "learning_rate": 0.00019719515643116677, |
| "loss": 545.6708, |
| "step": 7100 |
| }, |
| { |
| "ce_loss_10": 3.657674491405487, |
| "ce_loss_13": 3.595584750175476, |
| "ce_loss_2": 4.113815677165985, |
| "ce_loss_3": 3.9436608791351317, |
| "ce_loss_7": 3.700818693637848, |
| "epoch": 0.711, |
| "grad_norm": 354.0, |
| "kl_loss_10": 97.26274185180664, |
| "kl_loss_2": 1084.9519958496094, |
| "kl_loss_3": 745.9836975097656, |
| "kl_loss_7": 187.7238555908203, |
| "learning_rate": 0.0001959340773364911, |
| "loss": 535.516, |
| "step": 7110 |
| }, |
| { |
| "ce_loss_10": 3.6742369413375853, |
| "ce_loss_13": 3.613626217842102, |
| "ce_loss_2": 4.1353883981704715, |
| "ce_loss_3": 3.9663340568542482, |
| "ce_loss_7": 3.715994417667389, |
| "epoch": 0.712, |
| "grad_norm": 414.0, |
| "kl_loss_10": 97.77620887756348, |
| "kl_loss_2": 1094.1240295410157, |
| "kl_loss_3": 755.1257873535156, |
| "kl_loss_7": 188.97418975830078, |
| "learning_rate": 0.0001946760601822809, |
| "loss": 526.0803, |
| "step": 7120 |
| }, |
| { |
| "ce_loss_10": 3.724298870563507, |
| "ce_loss_13": 3.6654844999313356, |
| "ce_loss_2": 4.171249413490296, |
| "ce_loss_3": 4.011460411548614, |
| "ce_loss_7": 3.770449674129486, |
| "epoch": 0.713, |
| "grad_norm": 328.0, |
| "kl_loss_10": 95.51984024047852, |
| "kl_loss_2": 1076.5175323486328, |
| "kl_loss_3": 741.9749359130859, |
| "kl_loss_7": 187.3384910583496, |
| "learning_rate": 0.00019342111763675512, |
| "loss": 520.2061, |
| "step": 7130 |
| }, |
| { |
| "ce_loss_10": 3.730803680419922, |
| "ce_loss_13": 3.6689053654670714, |
| "ce_loss_2": 4.169312536716461, |
| "ce_loss_3": 4.00542528629303, |
| "ce_loss_7": 3.7727373957633974, |
| "epoch": 0.714, |
| "grad_norm": 418.0, |
| "kl_loss_10": 99.5161979675293, |
| "kl_loss_2": 1071.9742065429687, |
| "kl_loss_3": 743.7749084472656, |
| "kl_loss_7": 189.85234451293945, |
| "learning_rate": 0.00019216926233717085, |
| "loss": 525.6779, |
| "step": 7140 |
| }, |
| { |
| "ce_loss_10": 3.6117329597473145, |
| "ce_loss_13": 3.5528572678565977, |
| "ce_loss_2": 4.092064487934112, |
| "ce_loss_3": 3.914566385746002, |
| "ce_loss_7": 3.653049111366272, |
| "epoch": 0.715, |
| "grad_norm": 342.0, |
| "kl_loss_10": 95.1452823638916, |
| "kl_loss_2": 1135.5553619384766, |
| "kl_loss_3": 779.0962982177734, |
| "kl_loss_7": 185.5459442138672, |
| "learning_rate": 0.00019092050688969737, |
| "loss": 540.4428, |
| "step": 7150 |
| }, |
| { |
| "ce_loss_10": 3.6794282674789427, |
| "ce_loss_13": 3.619647240638733, |
| "ce_loss_2": 4.124801588058472, |
| "ce_loss_3": 3.9644263625144958, |
| "ce_loss_7": 3.7204025983810425, |
| "epoch": 0.716, |
| "grad_norm": 458.0, |
| "kl_loss_10": 95.73797454833985, |
| "kl_loss_2": 1075.4539825439454, |
| "kl_loss_3": 743.3267883300781, |
| "kl_loss_7": 186.0149787902832, |
| "learning_rate": 0.00018967486386928817, |
| "loss": 525.8811, |
| "step": 7160 |
| }, |
| { |
| "ce_loss_10": 3.5499155521392822, |
| "ce_loss_13": 3.4892677664756775, |
| "ce_loss_2": 4.026895833015442, |
| "ce_loss_3": 3.8540278673171997, |
| "ce_loss_7": 3.594646680355072, |
| "epoch": 0.717, |
| "grad_norm": 458.0, |
| "kl_loss_10": 93.14333381652833, |
| "kl_loss_2": 1122.9288635253906, |
| "kl_loss_3": 776.9212982177735, |
| "kl_loss_7": 188.66815719604492, |
| "learning_rate": 0.00018843234581955443, |
| "loss": 552.9929, |
| "step": 7170 |
| }, |
| { |
| "ce_loss_10": 3.574516201019287, |
| "ce_loss_13": 3.512941229343414, |
| "ce_loss_2": 4.049459004402161, |
| "ce_loss_3": 3.871944236755371, |
| "ce_loss_7": 3.6209982872009276, |
| "epoch": 0.718, |
| "grad_norm": 364.0, |
| "kl_loss_10": 96.56784629821777, |
| "kl_loss_2": 1129.7979248046875, |
| "kl_loss_3": 775.4145477294921, |
| "kl_loss_7": 190.88178558349608, |
| "learning_rate": 0.00018719296525263924, |
| "loss": 541.6241, |
| "step": 7180 |
| }, |
| { |
| "ce_loss_10": 3.6690776705741883, |
| "ce_loss_13": 3.6084558844566343, |
| "ce_loss_2": 4.109010553359985, |
| "ce_loss_3": 3.944419741630554, |
| "ce_loss_7": 3.7104645013809203, |
| "epoch": 0.719, |
| "grad_norm": 472.0, |
| "kl_loss_10": 96.92717056274414, |
| "kl_loss_2": 1058.7910217285157, |
| "kl_loss_3": 728.63525390625, |
| "kl_loss_7": 186.22266235351563, |
| "learning_rate": 0.0001859567346490913, |
| "loss": 525.3373, |
| "step": 7190 |
| }, |
| { |
| "ce_loss_10": 3.6438188314437867, |
| "ce_loss_13": 3.5840962886810304, |
| "ce_loss_2": 4.113380300998688, |
| "ce_loss_3": 3.9464030385017397, |
| "ce_loss_7": 3.690832197666168, |
| "epoch": 0.72, |
| "grad_norm": 372.0, |
| "kl_loss_10": 96.38097648620605, |
| "kl_loss_2": 1109.4217742919923, |
| "kl_loss_3": 771.3218353271484, |
| "kl_loss_7": 191.62188110351562, |
| "learning_rate": 0.0001847236664577389, |
| "loss": 531.0333, |
| "step": 7200 |
| }, |
| { |
| "ce_loss_10": 3.673705244064331, |
| "ce_loss_13": 3.614805054664612, |
| "ce_loss_2": 4.117141389846802, |
| "ce_loss_3": 3.954344153404236, |
| "ce_loss_7": 3.717172992229462, |
| "epoch": 0.721, |
| "grad_norm": 342.0, |
| "kl_loss_10": 96.93136787414551, |
| "kl_loss_2": 1071.9077087402343, |
| "kl_loss_3": 737.0366821289062, |
| "kl_loss_7": 186.3966079711914, |
| "learning_rate": 0.00018349377309556487, |
| "loss": 518.4113, |
| "step": 7210 |
| }, |
| { |
| "ce_loss_10": 3.609177756309509, |
| "ce_loss_13": 3.5509839773178102, |
| "ce_loss_2": 4.084410285949707, |
| "ce_loss_3": 3.911909210681915, |
| "ce_loss_7": 3.6546399116516115, |
| "epoch": 0.722, |
| "grad_norm": 436.0, |
| "kl_loss_10": 94.82120094299316, |
| "kl_loss_2": 1119.1944885253906, |
| "kl_loss_3": 772.7051483154297, |
| "kl_loss_7": 190.29311599731446, |
| "learning_rate": 0.00018226706694758193, |
| "loss": 539.7223, |
| "step": 7220 |
| }, |
| { |
| "ce_loss_10": 3.6862050175666807, |
| "ce_loss_13": 3.6256973266601564, |
| "ce_loss_2": 4.135941016674042, |
| "ce_loss_3": 3.9752198338508604, |
| "ce_loss_7": 3.7262799024581907, |
| "epoch": 0.723, |
| "grad_norm": 386.0, |
| "kl_loss_10": 96.04033012390137, |
| "kl_loss_2": 1079.298776245117, |
| "kl_loss_3": 752.3606506347656, |
| "kl_loss_7": 187.0266014099121, |
| "learning_rate": 0.0001810435603667075, |
| "loss": 540.3036, |
| "step": 7230 |
| }, |
| { |
| "ce_loss_10": 3.5322535395622254, |
| "ce_loss_13": 3.4715150594711304, |
| "ce_loss_2": 4.000997626781464, |
| "ce_loss_3": 3.8283260583877565, |
| "ce_loss_7": 3.5753297805786133, |
| "epoch": 0.724, |
| "grad_norm": 348.0, |
| "kl_loss_10": 92.0587100982666, |
| "kl_loss_2": 1101.032977294922, |
| "kl_loss_3": 757.3754730224609, |
| "kl_loss_7": 184.87646255493163, |
| "learning_rate": 0.0001798232656736389, |
| "loss": 539.9771, |
| "step": 7240 |
| }, |
| { |
| "ce_loss_10": 3.7180214405059813, |
| "ce_loss_13": 3.6561784505844117, |
| "ce_loss_2": 4.153665316104889, |
| "ce_loss_3": 3.994912326335907, |
| "ce_loss_7": 3.759207808971405, |
| "epoch": 0.725, |
| "grad_norm": 388.0, |
| "kl_loss_10": 97.47655296325684, |
| "kl_loss_2": 1060.039584350586, |
| "kl_loss_3": 729.7286529541016, |
| "kl_loss_7": 185.7909019470215, |
| "learning_rate": 0.0001786061951567303, |
| "loss": 527.9849, |
| "step": 7250 |
| }, |
| { |
| "ce_loss_10": 3.630312275886536, |
| "ce_loss_13": 3.5694428086280823, |
| "ce_loss_2": 4.091070818901062, |
| "ce_loss_3": 3.92718985080719, |
| "ce_loss_7": 3.675185751914978, |
| "epoch": 0.726, |
| "grad_norm": 382.0, |
| "kl_loss_10": 97.81040573120117, |
| "kl_loss_2": 1091.2934509277343, |
| "kl_loss_3": 755.8922180175781, |
| "kl_loss_7": 189.30439071655275, |
| "learning_rate": 0.00017739236107186857, |
| "loss": 537.2411, |
| "step": 7260 |
| }, |
| { |
| "ce_loss_10": 3.711188280582428, |
| "ce_loss_13": 3.6525003552436828, |
| "ce_loss_2": 4.142853522300721, |
| "ce_loss_3": 3.981386995315552, |
| "ce_loss_7": 3.7502527594566346, |
| "epoch": 0.727, |
| "grad_norm": 374.0, |
| "kl_loss_10": 93.90410652160645, |
| "kl_loss_2": 1048.1178436279297, |
| "kl_loss_3": 725.0591918945313, |
| "kl_loss_7": 182.22721328735352, |
| "learning_rate": 0.00017618177564234904, |
| "loss": 519.2631, |
| "step": 7270 |
| }, |
| { |
| "ce_loss_10": 3.693279492855072, |
| "ce_loss_13": 3.6356263041496275, |
| "ce_loss_2": 4.13202931880951, |
| "ce_loss_3": 3.9750990748405455, |
| "ce_loss_7": 3.7332441210746765, |
| "epoch": 0.728, |
| "grad_norm": 318.0, |
| "kl_loss_10": 95.86821098327637, |
| "kl_loss_2": 1048.5098999023437, |
| "kl_loss_3": 724.8844573974609, |
| "kl_loss_7": 182.79603576660156, |
| "learning_rate": 0.00017497445105875377, |
| "loss": 523.0468, |
| "step": 7280 |
| }, |
| { |
| "ce_loss_10": 3.595864677429199, |
| "ce_loss_13": 3.5371819376945495, |
| "ce_loss_2": 4.073407852649689, |
| "ce_loss_3": 3.904122495651245, |
| "ce_loss_7": 3.6426048040390016, |
| "epoch": 0.729, |
| "grad_norm": 442.0, |
| "kl_loss_10": 95.08332710266113, |
| "kl_loss_2": 1130.4158264160155, |
| "kl_loss_3": 780.6070220947265, |
| "kl_loss_7": 189.8589889526367, |
| "learning_rate": 0.000173770399478828, |
| "loss": 538.7581, |
| "step": 7290 |
| }, |
| { |
| "ce_loss_10": 3.5191142082214357, |
| "ce_loss_13": 3.461543416976929, |
| "ce_loss_2": 3.977211833000183, |
| "ce_loss_3": 3.8103960871696474, |
| "ce_loss_7": 3.564071011543274, |
| "epoch": 0.73, |
| "grad_norm": 438.0, |
| "kl_loss_10": 93.54008331298829, |
| "kl_loss_2": 1093.509115600586, |
| "kl_loss_3": 757.6209930419922, |
| "kl_loss_7": 186.89632568359374, |
| "learning_rate": 0.0001725696330273575, |
| "loss": 540.4559, |
| "step": 7300 |
| }, |
| { |
| "ce_loss_10": 3.714753472805023, |
| "ce_loss_13": 3.6550832748413087, |
| "ce_loss_2": 4.150299251079559, |
| "ce_loss_3": 3.9939948439598085, |
| "ce_loss_7": 3.757344377040863, |
| "epoch": 0.731, |
| "grad_norm": 486.0, |
| "kl_loss_10": 93.61467895507812, |
| "kl_loss_2": 1050.2083618164063, |
| "kl_loss_3": 726.4699127197266, |
| "kl_loss_7": 182.62665328979492, |
| "learning_rate": 0.00017137216379604724, |
| "loss": 517.0194, |
| "step": 7310 |
| }, |
| { |
| "ce_loss_10": 3.590583050251007, |
| "ce_loss_13": 3.5311309576034544, |
| "ce_loss_2": 4.051425302028656, |
| "ce_loss_3": 3.8829818606376647, |
| "ce_loss_7": 3.632352864742279, |
| "epoch": 0.732, |
| "grad_norm": 340.0, |
| "kl_loss_10": 95.8599407196045, |
| "kl_loss_2": 1085.3143981933595, |
| "kl_loss_3": 747.0916809082031, |
| "kl_loss_7": 186.49290466308594, |
| "learning_rate": 0.00017017800384339925, |
| "loss": 528.4002, |
| "step": 7320 |
| }, |
| { |
| "ce_loss_10": 3.540472662448883, |
| "ce_loss_13": 3.4801993131637574, |
| "ce_loss_2": 4.017971241474152, |
| "ce_loss_3": 3.8469355702400208, |
| "ce_loss_7": 3.586536169052124, |
| "epoch": 0.733, |
| "grad_norm": 316.0, |
| "kl_loss_10": 95.24363555908204, |
| "kl_loss_2": 1121.9350006103516, |
| "kl_loss_3": 775.7258972167969, |
| "kl_loss_7": 189.4253242492676, |
| "learning_rate": 0.00016898716519459073, |
| "loss": 528.2626, |
| "step": 7330 |
| }, |
| { |
| "ce_loss_10": 3.6674713015556337, |
| "ce_loss_13": 3.608376145362854, |
| "ce_loss_2": 4.144577407836914, |
| "ce_loss_3": 3.9727422475814818, |
| "ce_loss_7": 3.712773549556732, |
| "epoch": 0.734, |
| "grad_norm": 330.0, |
| "kl_loss_10": 96.16988220214844, |
| "kl_loss_2": 1116.4668975830077, |
| "kl_loss_3": 767.9603485107422, |
| "kl_loss_7": 191.9127670288086, |
| "learning_rate": 0.00016779965984135375, |
| "loss": 536.6205, |
| "step": 7340 |
| }, |
| { |
| "ce_loss_10": 3.5673499703407288, |
| "ce_loss_13": 3.5097331523895265, |
| "ce_loss_2": 4.023692965507507, |
| "ce_loss_3": 3.8575591087341308, |
| "ce_loss_7": 3.6114558935165406, |
| "epoch": 0.735, |
| "grad_norm": 478.0, |
| "kl_loss_10": 92.66586227416992, |
| "kl_loss_2": 1079.1628143310547, |
| "kl_loss_3": 740.6051025390625, |
| "kl_loss_7": 182.72610321044922, |
| "learning_rate": 0.00016661549974185424, |
| "loss": 528.04, |
| "step": 7350 |
| }, |
| { |
| "ce_loss_10": 3.612525475025177, |
| "ce_loss_13": 3.5525230765342712, |
| "ce_loss_2": 4.068535602092743, |
| "ce_loss_3": 3.9024940848350527, |
| "ce_loss_7": 3.6558452367782595, |
| "epoch": 0.736, |
| "grad_norm": 390.0, |
| "kl_loss_10": 97.4712890625, |
| "kl_loss_2": 1087.6514739990234, |
| "kl_loss_3": 751.1584289550781, |
| "kl_loss_7": 188.87088012695312, |
| "learning_rate": 0.00016543469682057105, |
| "loss": 524.4483, |
| "step": 7360 |
| }, |
| { |
| "ce_loss_10": 3.6394684672355653, |
| "ce_loss_13": 3.579529356956482, |
| "ce_loss_2": 4.096106541156769, |
| "ce_loss_3": 3.930702245235443, |
| "ce_loss_7": 3.6828288197517396, |
| "epoch": 0.737, |
| "grad_norm": 332.0, |
| "kl_loss_10": 96.63297386169434, |
| "kl_loss_2": 1092.361489868164, |
| "kl_loss_3": 752.7328277587891, |
| "kl_loss_7": 189.77932739257812, |
| "learning_rate": 0.00016425726296817632, |
| "loss": 533.2087, |
| "step": 7370 |
| }, |
| { |
| "ce_loss_10": 3.6602264523506163, |
| "ce_loss_13": 3.6020756483078005, |
| "ce_loss_2": 4.102893972396851, |
| "ce_loss_3": 3.9389352679252623, |
| "ce_loss_7": 3.702047073841095, |
| "epoch": 0.738, |
| "grad_norm": 604.0, |
| "kl_loss_10": 95.1510066986084, |
| "kl_loss_2": 1066.6962097167968, |
| "kl_loss_3": 734.1273132324219, |
| "kl_loss_7": 185.05731124877929, |
| "learning_rate": 0.00016308321004141607, |
| "loss": 524.9394, |
| "step": 7380 |
| }, |
| { |
| "ce_loss_10": 3.6052220940589903, |
| "ce_loss_13": 3.544311022758484, |
| "ce_loss_2": 4.074472200870514, |
| "ce_loss_3": 3.905838668346405, |
| "ce_loss_7": 3.6499088406562805, |
| "epoch": 0.739, |
| "grad_norm": 414.0, |
| "kl_loss_10": 98.00579032897949, |
| "kl_loss_2": 1091.213427734375, |
| "kl_loss_3": 753.4314147949219, |
| "kl_loss_7": 190.15870666503906, |
| "learning_rate": 0.00016191254986299043, |
| "loss": 528.1322, |
| "step": 7390 |
| }, |
| { |
| "ce_loss_10": 3.665621018409729, |
| "ce_loss_13": 3.606168735027313, |
| "ce_loss_2": 4.110114741325378, |
| "ce_loss_3": 3.9419893980026246, |
| "ce_loss_7": 3.7061524271965025, |
| "epoch": 0.74, |
| "grad_norm": 380.0, |
| "kl_loss_10": 95.95707778930664, |
| "kl_loss_2": 1084.0633728027344, |
| "kl_loss_3": 743.1361236572266, |
| "kl_loss_7": 184.58063583374025, |
| "learning_rate": 0.00016074529422143398, |
| "loss": 534.7291, |
| "step": 7400 |
| }, |
| { |
| "ce_loss_10": 3.5971511721611025, |
| "ce_loss_13": 3.540179669857025, |
| "ce_loss_2": 4.063095271587372, |
| "ce_loss_3": 3.8901899337768553, |
| "ce_loss_7": 3.6407782435417175, |
| "epoch": 0.741, |
| "grad_norm": 672.0, |
| "kl_loss_10": 95.23762931823731, |
| "kl_loss_2": 1107.8231140136718, |
| "kl_loss_3": 756.5032379150391, |
| "kl_loss_7": 187.1332000732422, |
| "learning_rate": 0.0001595814548709983, |
| "loss": 535.9396, |
| "step": 7410 |
| }, |
| { |
| "ce_loss_10": 3.6745630502700806, |
| "ce_loss_13": 3.613684153556824, |
| "ce_loss_2": 4.1425374269485475, |
| "ce_loss_3": 3.9706888437271117, |
| "ce_loss_7": 3.7216501116752623, |
| "epoch": 0.742, |
| "grad_norm": 372.0, |
| "kl_loss_10": 97.69215469360351, |
| "kl_loss_2": 1104.6529205322265, |
| "kl_loss_3": 761.8667907714844, |
| "kl_loss_7": 191.12793655395507, |
| "learning_rate": 0.00015842104353153285, |
| "loss": 536.9469, |
| "step": 7420 |
| }, |
| { |
| "ce_loss_10": 3.6906041502952576, |
| "ce_loss_13": 3.6308916926383974, |
| "ce_loss_2": 4.145760095119476, |
| "ce_loss_3": 3.981899178028107, |
| "ce_loss_7": 3.7335981249809267, |
| "epoch": 0.743, |
| "grad_norm": 418.0, |
| "kl_loss_10": 97.18793029785157, |
| "kl_loss_2": 1097.6078674316407, |
| "kl_loss_3": 759.1226196289062, |
| "kl_loss_7": 189.25363845825194, |
| "learning_rate": 0.0001572640718883667, |
| "loss": 543.1555, |
| "step": 7430 |
| }, |
| { |
| "ce_loss_10": 3.6231363296508787, |
| "ce_loss_13": 3.564767360687256, |
| "ce_loss_2": 4.071314561367035, |
| "ce_loss_3": 3.91090784072876, |
| "ce_loss_7": 3.664019286632538, |
| "epoch": 0.744, |
| "grad_norm": 320.0, |
| "kl_loss_10": 94.90192832946778, |
| "kl_loss_2": 1067.2735595703125, |
| "kl_loss_3": 738.7796752929687, |
| "kl_loss_7": 183.48248062133788, |
| "learning_rate": 0.0001561105515921915, |
| "loss": 533.524, |
| "step": 7440 |
| }, |
| { |
| "ce_loss_10": 3.463870346546173, |
| "ce_loss_13": 3.4067335724830627, |
| "ce_loss_2": 3.9477816224098206, |
| "ce_loss_3": 3.7779128670692446, |
| "ce_loss_7": 3.51077561378479, |
| "epoch": 0.745, |
| "grad_norm": 300.0, |
| "kl_loss_10": 92.0508991241455, |
| "kl_loss_2": 1123.8193664550781, |
| "kl_loss_3": 770.2350646972657, |
| "kl_loss_7": 184.63360900878905, |
| "learning_rate": 0.0001549604942589441, |
| "loss": 530.4723, |
| "step": 7450 |
| }, |
| { |
| "ce_loss_10": 3.6651261687278747, |
| "ce_loss_13": 3.6062275648117064, |
| "ce_loss_2": 4.092234718799591, |
| "ce_loss_3": 3.9361136317253114, |
| "ce_loss_7": 3.7055254936218263, |
| "epoch": 0.746, |
| "grad_norm": 366.0, |
| "kl_loss_10": 93.61905822753906, |
| "kl_loss_2": 1028.498812866211, |
| "kl_loss_3": 711.0323303222656, |
| "kl_loss_7": 180.76227340698242, |
| "learning_rate": 0.00015381391146968864, |
| "loss": 518.9042, |
| "step": 7460 |
| }, |
| { |
| "ce_loss_10": 3.6343637704849243, |
| "ce_loss_13": 3.5772631406784057, |
| "ce_loss_2": 4.097551655769348, |
| "ce_loss_3": 3.9294708490371706, |
| "ce_loss_7": 3.6792303323745728, |
| "epoch": 0.747, |
| "grad_norm": 348.0, |
| "kl_loss_10": 93.67252769470215, |
| "kl_loss_2": 1075.3313690185546, |
| "kl_loss_3": 736.6811370849609, |
| "kl_loss_7": 182.92275466918946, |
| "learning_rate": 0.00015267081477050133, |
| "loss": 529.2104, |
| "step": 7470 |
| }, |
| { |
| "ce_loss_10": 3.737002635002136, |
| "ce_loss_13": 3.6760261058807373, |
| "ce_loss_2": 4.184408628940583, |
| "ce_loss_3": 4.020549094676971, |
| "ce_loss_7": 3.779319405555725, |
| "epoch": 0.748, |
| "grad_norm": 314.0, |
| "kl_loss_10": 97.9722526550293, |
| "kl_loss_2": 1074.686865234375, |
| "kl_loss_3": 738.4016967773438, |
| "kl_loss_7": 188.9453094482422, |
| "learning_rate": 0.00015153121567235335, |
| "loss": 521.3269, |
| "step": 7480 |
| }, |
| { |
| "ce_loss_10": 3.627355396747589, |
| "ce_loss_13": 3.566980814933777, |
| "ce_loss_2": 4.087941682338714, |
| "ce_loss_3": 3.9189056277275087, |
| "ce_loss_7": 3.671427834033966, |
| "epoch": 0.749, |
| "grad_norm": 362.0, |
| "kl_loss_10": 95.86229972839355, |
| "kl_loss_2": 1099.5704315185546, |
| "kl_loss_3": 757.0687835693359, |
| "kl_loss_7": 188.21585922241212, |
| "learning_rate": 0.00015039512565099468, |
| "loss": 520.7597, |
| "step": 7490 |
| }, |
| { |
| "ce_loss_10": 3.6923457860946653, |
| "ce_loss_13": 3.6337139129638674, |
| "ce_loss_2": 4.142465770244598, |
| "ce_loss_3": 3.9779353976249694, |
| "ce_loss_7": 3.7360000610351562, |
| "epoch": 0.75, |
| "grad_norm": 400.0, |
| "kl_loss_10": 96.83558921813965, |
| "kl_loss_2": 1084.189535522461, |
| "kl_loss_3": 748.4331817626953, |
| "kl_loss_7": 188.31302337646486, |
| "learning_rate": 0.00014926255614683932, |
| "loss": 542.3, |
| "step": 7500 |
| }, |
| { |
| "ce_loss_10": 3.63236540555954, |
| "ce_loss_13": 3.5743218302726745, |
| "ce_loss_2": 4.084986460208893, |
| "ce_loss_3": 3.9159162759780886, |
| "ce_loss_7": 3.6776034474372863, |
| "epoch": 0.751, |
| "grad_norm": 356.0, |
| "kl_loss_10": 95.49623985290528, |
| "kl_loss_2": 1074.4479522705078, |
| "kl_loss_3": 737.1827270507813, |
| "kl_loss_7": 185.40160522460937, |
| "learning_rate": 0.0001481335185648498, |
| "loss": 533.0406, |
| "step": 7510 |
| }, |
| { |
| "ce_loss_10": 3.6419626474380493, |
| "ce_loss_13": 3.583856701850891, |
| "ce_loss_2": 4.0939129114151, |
| "ce_loss_3": 3.9313414216041567, |
| "ce_loss_7": 3.686499559879303, |
| "epoch": 0.752, |
| "grad_norm": 406.0, |
| "kl_loss_10": 93.70109405517579, |
| "kl_loss_2": 1078.2966064453126, |
| "kl_loss_3": 747.9822265625, |
| "kl_loss_7": 186.15133514404297, |
| "learning_rate": 0.0001470080242744218, |
| "loss": 523.242, |
| "step": 7520 |
| }, |
| { |
| "ce_loss_10": 3.638762640953064, |
| "ce_loss_13": 3.5817859530448914, |
| "ce_loss_2": 4.096928322315216, |
| "ce_loss_3": 3.925868511199951, |
| "ce_loss_7": 3.6821122765541077, |
| "epoch": 0.753, |
| "grad_norm": 304.0, |
| "kl_loss_10": 92.91362495422364, |
| "kl_loss_2": 1078.3225189208983, |
| "kl_loss_3": 744.8880218505859, |
| "kl_loss_7": 183.9945556640625, |
| "learning_rate": 0.0001458860846092705, |
| "loss": 532.4821, |
| "step": 7530 |
| }, |
| { |
| "ce_loss_10": 3.6720047116279604, |
| "ce_loss_13": 3.6128148198127747, |
| "ce_loss_2": 4.114215791225433, |
| "ce_loss_3": 3.9525702714920046, |
| "ce_loss_7": 3.7135818719863893, |
| "epoch": 0.754, |
| "grad_norm": 322.0, |
| "kl_loss_10": 94.26252975463868, |
| "kl_loss_2": 1064.2825866699218, |
| "kl_loss_3": 735.7307800292969, |
| "kl_loss_7": 183.32004623413087, |
| "learning_rate": 0.00014476771086731566, |
| "loss": 517.3908, |
| "step": 7540 |
| }, |
| { |
| "ce_loss_10": 3.7847033739089966, |
| "ce_loss_13": 3.7219197750091553, |
| "ce_loss_2": 4.230582165718078, |
| "ce_loss_3": 4.065666139125824, |
| "ce_loss_7": 3.827774000167847, |
| "epoch": 0.755, |
| "grad_norm": 430.0, |
| "kl_loss_10": 99.63549118041992, |
| "kl_loss_2": 1067.45849609375, |
| "kl_loss_3": 732.3584259033203, |
| "kl_loss_7": 187.05829620361328, |
| "learning_rate": 0.00014365291431056872, |
| "loss": 535.3279, |
| "step": 7550 |
| }, |
| { |
| "ce_loss_10": 3.6090814113616942, |
| "ce_loss_13": 3.5493834733963014, |
| "ce_loss_2": 4.077333819866181, |
| "ce_loss_3": 3.906116855144501, |
| "ce_loss_7": 3.652885007858276, |
| "epoch": 0.756, |
| "grad_norm": 460.0, |
| "kl_loss_10": 97.59222984313965, |
| "kl_loss_2": 1117.932635498047, |
| "kl_loss_3": 769.9259338378906, |
| "kl_loss_7": 192.52491149902343, |
| "learning_rate": 0.00014254170616501827, |
| "loss": 534.983, |
| "step": 7560 |
| }, |
| { |
| "ce_loss_10": 3.535455918312073, |
| "ce_loss_13": 3.47601797580719, |
| "ce_loss_2": 4.02073061466217, |
| "ce_loss_3": 3.852824592590332, |
| "ce_loss_7": 3.582290494441986, |
| "epoch": 0.757, |
| "grad_norm": 544.0, |
| "kl_loss_10": 94.12142906188964, |
| "kl_loss_2": 1137.807843017578, |
| "kl_loss_3": 793.451205444336, |
| "kl_loss_7": 191.15487823486328, |
| "learning_rate": 0.0001414340976205183, |
| "loss": 552.139, |
| "step": 7570 |
| }, |
| { |
| "ce_loss_10": 3.554329538345337, |
| "ce_loss_13": 3.495155191421509, |
| "ce_loss_2": 4.028338003158569, |
| "ce_loss_3": 3.860016918182373, |
| "ce_loss_7": 3.6010040402412415, |
| "epoch": 0.758, |
| "grad_norm": 392.0, |
| "kl_loss_10": 94.82050590515136, |
| "kl_loss_2": 1103.2837646484375, |
| "kl_loss_3": 760.9699432373047, |
| "kl_loss_7": 186.93385314941406, |
| "learning_rate": 0.00014033009983067452, |
| "loss": 536.1902, |
| "step": 7580 |
| }, |
| { |
| "ce_loss_10": 3.7230227828025817, |
| "ce_loss_13": 3.663366961479187, |
| "ce_loss_2": 4.157820415496826, |
| "ce_loss_3": 3.9997562408447265, |
| "ce_loss_7": 3.766176974773407, |
| "epoch": 0.759, |
| "grad_norm": 366.0, |
| "kl_loss_10": 95.41510429382325, |
| "kl_loss_2": 1045.2667877197266, |
| "kl_loss_3": 721.8115600585937, |
| "kl_loss_7": 183.35761260986328, |
| "learning_rate": 0.00013922972391273224, |
| "loss": 521.7405, |
| "step": 7590 |
| }, |
| { |
| "ce_loss_10": 3.726309287548065, |
| "ce_loss_13": 3.666226303577423, |
| "ce_loss_2": 4.173925065994263, |
| "ce_loss_3": 4.007045650482178, |
| "ce_loss_7": 3.7666892886161802, |
| "epoch": 0.76, |
| "grad_norm": 396.0, |
| "kl_loss_10": 96.0021198272705, |
| "kl_loss_2": 1064.7614837646483, |
| "kl_loss_3": 734.0039642333984, |
| "kl_loss_7": 185.6392059326172, |
| "learning_rate": 0.0001381329809474649, |
| "loss": 528.3375, |
| "step": 7600 |
| }, |
| { |
| "ce_loss_10": 3.621905469894409, |
| "ce_loss_13": 3.561663830280304, |
| "ce_loss_2": 4.098969185352326, |
| "ce_loss_3": 3.925651717185974, |
| "ce_loss_7": 3.6682042717933654, |
| "epoch": 0.761, |
| "grad_norm": 370.0, |
| "kl_loss_10": 96.61415328979493, |
| "kl_loss_2": 1119.761654663086, |
| "kl_loss_3": 769.7445831298828, |
| "kl_loss_7": 190.59917831420898, |
| "learning_rate": 0.0001370398819790621, |
| "loss": 540.338, |
| "step": 7610 |
| }, |
| { |
| "ce_loss_10": 3.7644327759742735, |
| "ce_loss_13": 3.704228925704956, |
| "ce_loss_2": 4.202455806732178, |
| "ce_loss_3": 4.041428947448731, |
| "ce_loss_7": 3.8075477123260497, |
| "epoch": 0.762, |
| "grad_norm": 424.0, |
| "kl_loss_10": 97.06539382934571, |
| "kl_loss_2": 1046.6303649902343, |
| "kl_loss_3": 720.9399322509765, |
| "kl_loss_7": 185.40132827758788, |
| "learning_rate": 0.00013595043801501794, |
| "loss": 512.6931, |
| "step": 7620 |
| }, |
| { |
| "ce_loss_10": 3.5539215803146362, |
| "ce_loss_13": 3.4973302245140077, |
| "ce_loss_2": 4.044493949413299, |
| "ce_loss_3": 3.8687676310539247, |
| "ce_loss_7": 3.602993667125702, |
| "epoch": 0.763, |
| "grad_norm": 468.0, |
| "kl_loss_10": 92.99364700317383, |
| "kl_loss_2": 1138.1223754882812, |
| "kl_loss_3": 782.3796447753906, |
| "kl_loss_7": 188.2281280517578, |
| "learning_rate": 0.00013486466002602133, |
| "loss": 539.5471, |
| "step": 7630 |
| }, |
| { |
| "ce_loss_10": 3.680443322658539, |
| "ce_loss_13": 3.6184515833854674, |
| "ce_loss_2": 4.119498157501221, |
| "ce_loss_3": 3.9600594878196715, |
| "ce_loss_7": 3.7241831541061403, |
| "epoch": 0.764, |
| "grad_norm": 376.0, |
| "kl_loss_10": 97.24503707885742, |
| "kl_loss_2": 1061.5150573730468, |
| "kl_loss_3": 737.4153533935547, |
| "kl_loss_7": 187.22406005859375, |
| "learning_rate": 0.00013378255894584462, |
| "loss": 537.8561, |
| "step": 7640 |
| }, |
| { |
| "ce_loss_10": 3.60829781293869, |
| "ce_loss_13": 3.5466054916381835, |
| "ce_loss_2": 4.072665071487426, |
| "ce_loss_3": 3.9038659572601317, |
| "ce_loss_7": 3.6548298835754394, |
| "epoch": 0.765, |
| "grad_norm": 380.0, |
| "kl_loss_10": 95.1153465270996, |
| "kl_loss_2": 1096.5119873046874, |
| "kl_loss_3": 758.0185302734375, |
| "kl_loss_7": 188.7285140991211, |
| "learning_rate": 0.0001327041456712334, |
| "loss": 535.4322, |
| "step": 7650 |
| }, |
| { |
| "ce_loss_10": 3.649807059764862, |
| "ce_loss_13": 3.588579738140106, |
| "ce_loss_2": 4.103657793998718, |
| "ce_loss_3": 3.9434640645980834, |
| "ce_loss_7": 3.6960434794425963, |
| "epoch": 0.766, |
| "grad_norm": 410.0, |
| "kl_loss_10": 95.99581718444824, |
| "kl_loss_2": 1095.5443603515625, |
| "kl_loss_3": 758.2011474609375, |
| "kl_loss_7": 189.6258804321289, |
| "learning_rate": 0.00013162943106179747, |
| "loss": 538.4857, |
| "step": 7660 |
| }, |
| { |
| "ce_loss_10": 3.627143681049347, |
| "ce_loss_13": 3.5671829342842103, |
| "ce_loss_2": 4.08168009519577, |
| "ce_loss_3": 3.9202899813652037, |
| "ce_loss_7": 3.6696593165397644, |
| "epoch": 0.767, |
| "grad_norm": 372.0, |
| "kl_loss_10": 97.96165161132812, |
| "kl_loss_2": 1080.916067504883, |
| "kl_loss_3": 746.2386291503906, |
| "kl_loss_7": 187.8828155517578, |
| "learning_rate": 0.00013055842593990132, |
| "loss": 529.1405, |
| "step": 7670 |
| }, |
| { |
| "ce_loss_10": 3.571021115779877, |
| "ce_loss_13": 3.5149319171905518, |
| "ce_loss_2": 4.027233076095581, |
| "ce_loss_3": 3.864198935031891, |
| "ce_loss_7": 3.6173386335372926, |
| "epoch": 0.768, |
| "grad_norm": 372.0, |
| "kl_loss_10": 92.48302154541015, |
| "kl_loss_2": 1072.3523834228515, |
| "kl_loss_3": 740.25439453125, |
| "kl_loss_7": 183.08441925048828, |
| "learning_rate": 0.00012949114109055414, |
| "loss": 533.8078, |
| "step": 7680 |
| }, |
| { |
| "ce_loss_10": 3.6176257848739626, |
| "ce_loss_13": 3.5594166994094847, |
| "ce_loss_2": 4.078605031967163, |
| "ce_loss_3": 3.918487286567688, |
| "ce_loss_7": 3.6636170506477357, |
| "epoch": 0.769, |
| "grad_norm": 422.0, |
| "kl_loss_10": 94.60773849487305, |
| "kl_loss_2": 1089.138235473633, |
| "kl_loss_3": 757.3290557861328, |
| "kl_loss_7": 187.67217483520508, |
| "learning_rate": 0.00012842758726130281, |
| "loss": 537.3952, |
| "step": 7690 |
| }, |
| { |
| "ce_loss_10": 3.655508840084076, |
| "ce_loss_13": 3.5946714520454406, |
| "ce_loss_2": 4.117365610599518, |
| "ce_loss_3": 3.9561346530914308, |
| "ce_loss_7": 3.7002153038978576, |
| "epoch": 0.77, |
| "grad_norm": 432.0, |
| "kl_loss_10": 94.65040473937988, |
| "kl_loss_2": 1092.9069885253907, |
| "kl_loss_3": 757.1287445068359, |
| "kl_loss_7": 189.29573440551758, |
| "learning_rate": 0.00012736777516212267, |
| "loss": 528.3388, |
| "step": 7700 |
| }, |
| { |
| "ce_loss_10": 3.65016793012619, |
| "ce_loss_13": 3.5914124608039857, |
| "ce_loss_2": 4.1151956677436825, |
| "ce_loss_3": 3.947415459156036, |
| "ce_loss_7": 3.6969300508499146, |
| "epoch": 0.771, |
| "grad_norm": 404.0, |
| "kl_loss_10": 94.72591972351074, |
| "kl_loss_2": 1095.3469024658202, |
| "kl_loss_3": 757.773715209961, |
| "kl_loss_7": 189.3510871887207, |
| "learning_rate": 0.00012631171546530968, |
| "loss": 527.5062, |
| "step": 7710 |
| }, |
| { |
| "ce_loss_10": 3.6695477604866027, |
| "ce_loss_13": 3.6066803336143494, |
| "ce_loss_2": 4.130255508422851, |
| "ce_loss_3": 3.9629722952842714, |
| "ce_loss_7": 3.7124558687210083, |
| "epoch": 0.772, |
| "grad_norm": 400.0, |
| "kl_loss_10": 99.19231147766114, |
| "kl_loss_2": 1089.8547271728517, |
| "kl_loss_3": 754.8526977539062, |
| "kl_loss_7": 189.7204719543457, |
| "learning_rate": 0.00012525941880537307, |
| "loss": 538.339, |
| "step": 7720 |
| }, |
| { |
| "ce_loss_10": 3.7045652866363525, |
| "ce_loss_13": 3.6435051798820495, |
| "ce_loss_2": 4.150338041782379, |
| "ce_loss_3": 3.9872673988342284, |
| "ce_loss_7": 3.7454243421554567, |
| "epoch": 0.773, |
| "grad_norm": 398.0, |
| "kl_loss_10": 95.61402626037598, |
| "kl_loss_2": 1061.4443786621093, |
| "kl_loss_3": 733.3583831787109, |
| "kl_loss_7": 185.768399810791, |
| "learning_rate": 0.00012421089577892869, |
| "loss": 524.5635, |
| "step": 7730 |
| }, |
| { |
| "ce_loss_10": 3.645431864261627, |
| "ce_loss_13": 3.584313917160034, |
| "ce_loss_2": 4.109975218772888, |
| "ce_loss_3": 3.9383553504943847, |
| "ce_loss_7": 3.6912776827812195, |
| "epoch": 0.774, |
| "grad_norm": 440.0, |
| "kl_loss_10": 96.41397132873536, |
| "kl_loss_2": 1098.874331665039, |
| "kl_loss_3": 755.1620544433594, |
| "kl_loss_7": 190.60089797973632, |
| "learning_rate": 0.0001231661569445919, |
| "loss": 536.2486, |
| "step": 7740 |
| }, |
| { |
| "ce_loss_10": 3.501088798046112, |
| "ce_loss_13": 3.443252968788147, |
| "ce_loss_2": 3.9620775461196898, |
| "ce_loss_3": 3.795079970359802, |
| "ce_loss_7": 3.5464309573173525, |
| "epoch": 0.775, |
| "grad_norm": 346.0, |
| "kl_loss_10": 93.47399139404297, |
| "kl_loss_2": 1090.8283447265626, |
| "kl_loss_3": 754.4031158447266, |
| "kl_loss_7": 186.22638092041015, |
| "learning_rate": 0.00012212521282287093, |
| "loss": 538.4937, |
| "step": 7750 |
| }, |
| { |
| "ce_loss_10": 3.6629942655563354, |
| "ce_loss_13": 3.601106119155884, |
| "ce_loss_2": 4.111618340015411, |
| "ce_loss_3": 3.951659619808197, |
| "ce_loss_7": 3.7078338623046876, |
| "epoch": 0.776, |
| "grad_norm": 364.0, |
| "kl_loss_10": 98.37307014465333, |
| "kl_loss_2": 1080.0280029296875, |
| "kl_loss_3": 745.148388671875, |
| "kl_loss_7": 190.13256072998047, |
| "learning_rate": 0.00012108807389606158, |
| "loss": 538.7029, |
| "step": 7760 |
| }, |
| { |
| "ce_loss_10": 3.659121203422546, |
| "ce_loss_13": 3.6007887601852415, |
| "ce_loss_2": 4.112268555164337, |
| "ce_loss_3": 3.9502876162528993, |
| "ce_loss_7": 3.7037811279296875, |
| "epoch": 0.777, |
| "grad_norm": 364.0, |
| "kl_loss_10": 93.70635108947754, |
| "kl_loss_2": 1072.3641204833984, |
| "kl_loss_3": 740.7109130859375, |
| "kl_loss_7": 182.99172821044922, |
| "learning_rate": 0.00012005475060814159, |
| "loss": 525.026, |
| "step": 7770 |
| }, |
| { |
| "ce_loss_10": 3.5951377630233763, |
| "ce_loss_13": 3.5359464406967165, |
| "ce_loss_2": 4.060847020149231, |
| "ce_loss_3": 3.891322433948517, |
| "ce_loss_7": 3.63891544342041, |
| "epoch": 0.778, |
| "grad_norm": 384.0, |
| "kl_loss_10": 97.0392059326172, |
| "kl_loss_2": 1106.707992553711, |
| "kl_loss_3": 763.6160034179687, |
| "kl_loss_7": 188.94908752441407, |
| "learning_rate": 0.00011902525336466464, |
| "loss": 535.4202, |
| "step": 7780 |
| }, |
| { |
| "ce_loss_10": 3.5829373240470885, |
| "ce_loss_13": 3.5231135487556458, |
| "ce_loss_2": 4.054291594028473, |
| "ce_loss_3": 3.888161540031433, |
| "ce_loss_7": 3.630410146713257, |
| "epoch": 0.779, |
| "grad_norm": 384.0, |
| "kl_loss_10": 95.91268005371094, |
| "kl_loss_2": 1108.9134979248047, |
| "kl_loss_3": 768.8667724609375, |
| "kl_loss_7": 190.86130905151367, |
| "learning_rate": 0.00011799959253265668, |
| "loss": 532.9367, |
| "step": 7790 |
| }, |
| { |
| "ce_loss_10": 3.646629250049591, |
| "ce_loss_13": 3.584940028190613, |
| "ce_loss_2": 4.100114536285401, |
| "ce_loss_3": 3.9342658519744873, |
| "ce_loss_7": 3.687722849845886, |
| "epoch": 0.78, |
| "grad_norm": 426.0, |
| "kl_loss_10": 98.96642303466797, |
| "kl_loss_2": 1093.9118621826171, |
| "kl_loss_3": 757.5971832275391, |
| "kl_loss_7": 190.95031204223633, |
| "learning_rate": 0.00011697777844051105, |
| "loss": 534.9413, |
| "step": 7800 |
| }, |
| { |
| "ce_loss_10": 3.6246392488479615, |
| "ce_loss_13": 3.5636275887489317, |
| "ce_loss_2": 4.0959463000297545, |
| "ce_loss_3": 3.9209203004837034, |
| "ce_loss_7": 3.668913960456848, |
| "epoch": 0.781, |
| "grad_norm": 394.0, |
| "kl_loss_10": 96.37951927185058, |
| "kl_loss_2": 1131.5390258789062, |
| "kl_loss_3": 774.0704650878906, |
| "kl_loss_7": 190.10399703979493, |
| "learning_rate": 0.00011595982137788402, |
| "loss": 539.5272, |
| "step": 7810 |
| }, |
| { |
| "ce_loss_10": 3.601748263835907, |
| "ce_loss_13": 3.542947518825531, |
| "ce_loss_2": 4.0462228655815125, |
| "ce_loss_3": 3.884107196331024, |
| "ce_loss_7": 3.6427837133407595, |
| "epoch": 0.782, |
| "grad_norm": 362.0, |
| "kl_loss_10": 95.04786491394043, |
| "kl_loss_2": 1064.3328094482422, |
| "kl_loss_3": 734.7262878417969, |
| "kl_loss_7": 183.74214706420898, |
| "learning_rate": 0.00011494573159559212, |
| "loss": 528.7992, |
| "step": 7820 |
| }, |
| { |
| "ce_loss_10": 3.587358093261719, |
| "ce_loss_13": 3.5285757184028625, |
| "ce_loss_2": 4.055095791816711, |
| "ce_loss_3": 3.8850948452949523, |
| "ce_loss_7": 3.6320362448692323, |
| "epoch": 0.783, |
| "grad_norm": 344.0, |
| "kl_loss_10": 95.2613368988037, |
| "kl_loss_2": 1092.221664428711, |
| "kl_loss_3": 759.4220550537109, |
| "kl_loss_7": 186.76042938232422, |
| "learning_rate": 0.00011393551930550828, |
| "loss": 541.8625, |
| "step": 7830 |
| }, |
| { |
| "ce_loss_10": 3.7354641199111938, |
| "ce_loss_13": 3.6739312171936036, |
| "ce_loss_2": 4.175600934028625, |
| "ce_loss_3": 4.019279301166534, |
| "ce_loss_7": 3.7783281922340395, |
| "epoch": 0.784, |
| "grad_norm": 390.0, |
| "kl_loss_10": 99.59685325622559, |
| "kl_loss_2": 1064.6414337158203, |
| "kl_loss_3": 741.2587860107421, |
| "kl_loss_7": 189.10858612060548, |
| "learning_rate": 0.00011292919468045875, |
| "loss": 527.9955, |
| "step": 7840 |
| }, |
| { |
| "ce_loss_10": 3.680347263813019, |
| "ce_loss_13": 3.6196223735809325, |
| "ce_loss_2": 4.128578865528107, |
| "ce_loss_3": 3.9644781708717347, |
| "ce_loss_7": 3.723640871047974, |
| "epoch": 0.785, |
| "grad_norm": 326.0, |
| "kl_loss_10": 95.6224323272705, |
| "kl_loss_2": 1072.9300354003906, |
| "kl_loss_3": 746.2864379882812, |
| "kl_loss_7": 187.85019607543944, |
| "learning_rate": 0.00011192676785412154, |
| "loss": 523.3404, |
| "step": 7850 |
| }, |
| { |
| "ce_loss_10": 3.622621536254883, |
| "ce_loss_13": 3.560643196105957, |
| "ce_loss_2": 4.089509451389313, |
| "ce_loss_3": 3.9235698699951174, |
| "ce_loss_7": 3.6674723744392397, |
| "epoch": 0.786, |
| "grad_norm": 458.0, |
| "kl_loss_10": 96.80489120483398, |
| "kl_loss_2": 1093.20048828125, |
| "kl_loss_3": 754.2780883789062, |
| "kl_loss_7": 187.94250411987304, |
| "learning_rate": 0.00011092824892092374, |
| "loss": 533.5229, |
| "step": 7860 |
| }, |
| { |
| "ce_loss_10": 3.547496974468231, |
| "ce_loss_13": 3.4892043232917787, |
| "ce_loss_2": 4.020435309410095, |
| "ce_loss_3": 3.8508559226989747, |
| "ce_loss_7": 3.5902876496315, |
| "epoch": 0.787, |
| "grad_norm": 322.0, |
| "kl_loss_10": 94.49787139892578, |
| "kl_loss_2": 1110.2376556396484, |
| "kl_loss_3": 767.8008331298828, |
| "kl_loss_7": 188.15859375, |
| "learning_rate": 0.0001099336479359398, |
| "loss": 532.4489, |
| "step": 7870 |
| }, |
| { |
| "ce_loss_10": 3.676584839820862, |
| "ce_loss_13": 3.6199623942375183, |
| "ce_loss_2": 4.124644804000854, |
| "ce_loss_3": 3.9601522207260134, |
| "ce_loss_7": 3.7184366583824158, |
| "epoch": 0.788, |
| "grad_norm": 414.0, |
| "kl_loss_10": 92.98647613525391, |
| "kl_loss_2": 1076.658267211914, |
| "kl_loss_3": 737.3064331054687, |
| "kl_loss_7": 183.75065536499022, |
| "learning_rate": 0.00010894297491479043, |
| "loss": 529.369, |
| "step": 7880 |
| }, |
| { |
| "ce_loss_10": 3.675907850265503, |
| "ce_loss_13": 3.615241324901581, |
| "ce_loss_2": 4.123448085784912, |
| "ce_loss_3": 3.9602300405502318, |
| "ce_loss_7": 3.715320038795471, |
| "epoch": 0.789, |
| "grad_norm": 370.0, |
| "kl_loss_10": 97.27086067199707, |
| "kl_loss_2": 1078.250909423828, |
| "kl_loss_3": 741.1790222167969, |
| "kl_loss_7": 186.16854553222657, |
| "learning_rate": 0.00010795623983354214, |
| "loss": 523.6978, |
| "step": 7890 |
| }, |
| { |
| "ce_loss_10": 3.549619424343109, |
| "ce_loss_13": 3.492576813697815, |
| "ce_loss_2": 4.021520948410034, |
| "ce_loss_3": 3.8529414176940917, |
| "ce_loss_7": 3.595447373390198, |
| "epoch": 0.79, |
| "grad_norm": 428.0, |
| "kl_loss_10": 93.0215072631836, |
| "kl_loss_2": 1113.914730834961, |
| "kl_loss_3": 772.1699676513672, |
| "kl_loss_7": 189.76142959594728, |
| "learning_rate": 0.00010697345262860636, |
| "loss": 533.2417, |
| "step": 7900 |
| }, |
| { |
| "ce_loss_10": 3.702609384059906, |
| "ce_loss_13": 3.6431208491325378, |
| "ce_loss_2": 4.14087233543396, |
| "ce_loss_3": 3.9802441716194155, |
| "ce_loss_7": 3.7457746505737304, |
| "epoch": 0.791, |
| "grad_norm": 368.0, |
| "kl_loss_10": 97.61579055786133, |
| "kl_loss_2": 1063.5964447021483, |
| "kl_loss_3": 734.3944030761719, |
| "kl_loss_7": 187.06654663085936, |
| "learning_rate": 0.00010599462319663906, |
| "loss": 520.0625, |
| "step": 7910 |
| }, |
| { |
| "ce_loss_10": 3.6748117208480835, |
| "ce_loss_13": 3.614163410663605, |
| "ce_loss_2": 4.111383318901062, |
| "ce_loss_3": 3.951873278617859, |
| "ce_loss_7": 3.715614116191864, |
| "epoch": 0.792, |
| "grad_norm": 382.0, |
| "kl_loss_10": 94.54501228332519, |
| "kl_loss_2": 1049.0091613769532, |
| "kl_loss_3": 722.9781219482422, |
| "kl_loss_7": 183.01754150390624, |
| "learning_rate": 0.00010501976139444191, |
| "loss": 518.3574, |
| "step": 7920 |
| }, |
| { |
| "ce_loss_10": 3.7049331426620484, |
| "ce_loss_13": 3.6438170671463013, |
| "ce_loss_2": 4.144390141963958, |
| "ce_loss_3": 3.9876601815223696, |
| "ce_loss_7": 3.745813262462616, |
| "epoch": 0.793, |
| "grad_norm": 370.0, |
| "kl_loss_10": 97.8447940826416, |
| "kl_loss_2": 1057.744808959961, |
| "kl_loss_3": 730.5345703125, |
| "kl_loss_7": 185.18996047973633, |
| "learning_rate": 0.0001040488770388625, |
| "loss": 527.8366, |
| "step": 7930 |
| }, |
| { |
| "ce_loss_10": 3.6446168065071105, |
| "ce_loss_13": 3.5857683539390566, |
| "ce_loss_2": 4.095709836483001, |
| "ce_loss_3": 3.92866997718811, |
| "ce_loss_7": 3.685992920398712, |
| "epoch": 0.794, |
| "grad_norm": 426.0, |
| "kl_loss_10": 95.57501831054688, |
| "kl_loss_2": 1080.6232208251954, |
| "kl_loss_3": 746.1043212890625, |
| "kl_loss_7": 186.66847763061523, |
| "learning_rate": 0.00010308197990669538, |
| "loss": 527.0882, |
| "step": 7940 |
| }, |
| { |
| "ce_loss_10": 3.7647696137428284, |
| "ce_loss_13": 3.7019853234291076, |
| "ce_loss_2": 4.21561850309372, |
| "ce_loss_3": 4.0513708114624025, |
| "ce_loss_7": 3.8064971685409548, |
| "epoch": 0.795, |
| "grad_norm": 356.0, |
| "kl_loss_10": 100.9611873626709, |
| "kl_loss_2": 1084.6148345947265, |
| "kl_loss_3": 743.2166534423828, |
| "kl_loss_7": 191.26584091186524, |
| "learning_rate": 0.0001021190797345839, |
| "loss": 525.7331, |
| "step": 7950 |
| }, |
| { |
| "ce_loss_10": 3.4792375445365904, |
| "ce_loss_13": 3.4190258502960207, |
| "ce_loss_2": 3.96710387468338, |
| "ce_loss_3": 3.7957834005355835, |
| "ce_loss_7": 3.528597414493561, |
| "epoch": 0.796, |
| "grad_norm": 386.0, |
| "kl_loss_10": 95.0804401397705, |
| "kl_loss_2": 1137.388375854492, |
| "kl_loss_3": 792.2215118408203, |
| "kl_loss_7": 192.50171508789063, |
| "learning_rate": 0.00010116018621892236, |
| "loss": 537.4441, |
| "step": 7960 |
| }, |
| { |
| "ce_loss_10": 3.6988709568977356, |
| "ce_loss_13": 3.6362175583839416, |
| "ce_loss_2": 4.151265692710877, |
| "ce_loss_3": 3.9912821412086488, |
| "ce_loss_7": 3.742702007293701, |
| "epoch": 0.797, |
| "grad_norm": 444.0, |
| "kl_loss_10": 99.6129222869873, |
| "kl_loss_2": 1100.607211303711, |
| "kl_loss_3": 767.8290985107421, |
| "kl_loss_7": 194.2897491455078, |
| "learning_rate": 0.00010020530901575753, |
| "loss": 526.4385, |
| "step": 7970 |
| }, |
| { |
| "ce_loss_10": 3.727276122570038, |
| "ce_loss_13": 3.664809966087341, |
| "ce_loss_2": 4.17646723985672, |
| "ce_loss_3": 4.011640095710755, |
| "ce_loss_7": 3.7683190941810607, |
| "epoch": 0.798, |
| "grad_norm": 334.0, |
| "kl_loss_10": 98.68130950927734, |
| "kl_loss_2": 1084.4167602539062, |
| "kl_loss_3": 747.0828460693359, |
| "kl_loss_7": 190.09516677856445, |
| "learning_rate": 9.925445774069231e-05, |
| "loss": 521.7054, |
| "step": 7980 |
| }, |
| { |
| "ce_loss_10": 3.677051067352295, |
| "ce_loss_13": 3.6162899494171143, |
| "ce_loss_2": 4.132367658615112, |
| "ce_loss_3": 3.9699331760406493, |
| "ce_loss_7": 3.723151159286499, |
| "epoch": 0.799, |
| "grad_norm": 340.0, |
| "kl_loss_10": 97.4996379852295, |
| "kl_loss_2": 1074.8818054199219, |
| "kl_loss_3": 740.7804992675781, |
| "kl_loss_7": 187.78277282714845, |
| "learning_rate": 9.830764196878872e-05, |
| "loss": 517.902, |
| "step": 7990 |
| }, |
| { |
| "ce_loss_10": 3.6140867948532103, |
| "ce_loss_13": 3.556562864780426, |
| "ce_loss_2": 4.0635038137435915, |
| "ce_loss_3": 3.902656090259552, |
| "ce_loss_7": 3.6608413100242614, |
| "epoch": 0.8, |
| "grad_norm": 410.0, |
| "kl_loss_10": 94.1772445678711, |
| "kl_loss_2": 1099.7673645019531, |
| "kl_loss_3": 761.414794921875, |
| "kl_loss_7": 186.34807205200195, |
| "learning_rate": 9.736487123447069e-05, |
| "loss": 531.4563, |
| "step": 8000 |
| }, |
| { |
| "ce_loss_10": 3.559322512149811, |
| "ce_loss_13": 3.49820739030838, |
| "ce_loss_2": 4.036343896389008, |
| "ce_loss_3": 3.8618996500968934, |
| "ce_loss_7": 3.6017415881156922, |
| "epoch": 0.801, |
| "grad_norm": 424.0, |
| "kl_loss_10": 96.55318107604981, |
| "kl_loss_2": 1136.456121826172, |
| "kl_loss_3": 771.9989410400391, |
| "kl_loss_7": 188.50249938964845, |
| "learning_rate": 9.642615503142926e-05, |
| "loss": 541.6381, |
| "step": 8010 |
| }, |
| { |
| "ce_loss_10": 3.630905735492706, |
| "ce_loss_13": 3.5719484210014345, |
| "ce_loss_2": 4.097460567951202, |
| "ce_loss_3": 3.9188284277915955, |
| "ce_loss_7": 3.673666751384735, |
| "epoch": 0.802, |
| "grad_norm": 370.0, |
| "kl_loss_10": 94.45314712524414, |
| "kl_loss_2": 1090.8831848144532, |
| "kl_loss_3": 738.8009979248047, |
| "kl_loss_7": 184.0514343261719, |
| "learning_rate": 9.549150281252633e-05, |
| "loss": 524.0769, |
| "step": 8020 |
| }, |
| { |
| "ce_loss_10": 3.658740258216858, |
| "ce_loss_13": 3.598051357269287, |
| "ce_loss_2": 4.112537753582001, |
| "ce_loss_3": 3.9440460920333864, |
| "ce_loss_7": 3.701529622077942, |
| "epoch": 0.803, |
| "grad_norm": 354.0, |
| "kl_loss_10": 97.62285194396972, |
| "kl_loss_2": 1076.1221923828125, |
| "kl_loss_3": 742.6418304443359, |
| "kl_loss_7": 187.46692276000977, |
| "learning_rate": 9.4560923989699e-05, |
| "loss": 531.6947, |
| "step": 8030 |
| }, |
| { |
| "ce_loss_10": 3.6491722106933593, |
| "ce_loss_13": 3.5902853846549987, |
| "ce_loss_2": 4.109341251850128, |
| "ce_loss_3": 3.942945408821106, |
| "ce_loss_7": 3.696093666553497, |
| "epoch": 0.804, |
| "grad_norm": 382.0, |
| "kl_loss_10": 96.87751007080078, |
| "kl_loss_2": 1089.1260498046875, |
| "kl_loss_3": 751.9404052734375, |
| "kl_loss_7": 188.3861946105957, |
| "learning_rate": 9.363442793386607e-05, |
| "loss": 538.5806, |
| "step": 8040 |
| }, |
| { |
| "ce_loss_10": 3.6259461641311646, |
| "ce_loss_13": 3.5652650475502012, |
| "ce_loss_2": 4.09434745311737, |
| "ce_loss_3": 3.9288868069648744, |
| "ce_loss_7": 3.670744836330414, |
| "epoch": 0.805, |
| "grad_norm": 436.0, |
| "kl_loss_10": 96.23310775756836, |
| "kl_loss_2": 1102.4481658935547, |
| "kl_loss_3": 766.5739196777344, |
| "kl_loss_7": 189.9322036743164, |
| "learning_rate": 9.271202397483213e-05, |
| "loss": 525.3384, |
| "step": 8050 |
| }, |
| { |
| "ce_loss_10": 3.64525443315506, |
| "ce_loss_13": 3.587091565132141, |
| "ce_loss_2": 4.088842356204987, |
| "ce_loss_3": 3.926717495918274, |
| "ce_loss_7": 3.6877028584480285, |
| "epoch": 0.806, |
| "grad_norm": 462.0, |
| "kl_loss_10": 95.10493888854981, |
| "kl_loss_2": 1064.438558959961, |
| "kl_loss_3": 734.5970611572266, |
| "kl_loss_7": 184.7579719543457, |
| "learning_rate": 9.179372140119524e-05, |
| "loss": 530.6901, |
| "step": 8060 |
| }, |
| { |
| "ce_loss_10": 3.59020277261734, |
| "ce_loss_13": 3.531452512741089, |
| "ce_loss_2": 4.036340653896332, |
| "ce_loss_3": 3.8760047912597657, |
| "ce_loss_7": 3.6337902188301086, |
| "epoch": 0.807, |
| "grad_norm": 432.0, |
| "kl_loss_10": 94.00482330322265, |
| "kl_loss_2": 1074.4489135742188, |
| "kl_loss_3": 739.4833740234375, |
| "kl_loss_7": 184.7809310913086, |
| "learning_rate": 9.087952946025175e-05, |
| "loss": 531.5049, |
| "step": 8070 |
| }, |
| { |
| "ce_loss_10": 3.7053560853004455, |
| "ce_loss_13": 3.6452667355537414, |
| "ce_loss_2": 4.136937665939331, |
| "ce_loss_3": 3.9754079580307007, |
| "ce_loss_7": 3.7457935094833372, |
| "epoch": 0.808, |
| "grad_norm": 368.0, |
| "kl_loss_10": 96.12910385131836, |
| "kl_loss_2": 1048.5191436767577, |
| "kl_loss_3": 719.7487762451171, |
| "kl_loss_7": 183.48829498291016, |
| "learning_rate": 8.996945735790446e-05, |
| "loss": 523.2327, |
| "step": 8080 |
| }, |
| { |
| "ce_loss_10": 3.602836012840271, |
| "ce_loss_13": 3.542934799194336, |
| "ce_loss_2": 4.055256414413452, |
| "ce_loss_3": 3.8926199197769167, |
| "ce_loss_7": 3.6462236762046816, |
| "epoch": 0.809, |
| "grad_norm": 414.0, |
| "kl_loss_10": 95.67857933044434, |
| "kl_loss_2": 1093.489208984375, |
| "kl_loss_3": 759.0634765625, |
| "kl_loss_7": 186.64484634399415, |
| "learning_rate": 8.906351425856951e-05, |
| "loss": 536.3948, |
| "step": 8090 |
| }, |
| { |
| "ce_loss_10": 3.586146354675293, |
| "ce_loss_13": 3.5270805954933167, |
| "ce_loss_2": 4.053403818607331, |
| "ce_loss_3": 3.883652901649475, |
| "ce_loss_7": 3.6302590370178223, |
| "epoch": 0.81, |
| "grad_norm": 328.0, |
| "kl_loss_10": 96.12913436889649, |
| "kl_loss_2": 1108.7147094726563, |
| "kl_loss_3": 762.2885803222656, |
| "kl_loss_7": 187.99051055908203, |
| "learning_rate": 8.816170928508365e-05, |
| "loss": 536.7299, |
| "step": 8100 |
| }, |
| { |
| "ce_loss_10": 3.5469899415969848, |
| "ce_loss_13": 3.487591028213501, |
| "ce_loss_2": 4.024684643745422, |
| "ce_loss_3": 3.853050243854523, |
| "ce_loss_7": 3.5918329834938048, |
| "epoch": 0.811, |
| "grad_norm": 424.0, |
| "kl_loss_10": 95.16791305541992, |
| "kl_loss_2": 1131.8392974853516, |
| "kl_loss_3": 782.3692016601562, |
| "kl_loss_7": 188.51590728759766, |
| "learning_rate": 8.7264051518613e-05, |
| "loss": 538.6139, |
| "step": 8110 |
| }, |
| { |
| "ce_loss_10": 3.639654815196991, |
| "ce_loss_13": 3.583385097980499, |
| "ce_loss_2": 4.081218779087067, |
| "ce_loss_3": 3.9191540598869326, |
| "ce_loss_7": 3.680349314212799, |
| "epoch": 0.812, |
| "grad_norm": 358.0, |
| "kl_loss_10": 93.30685958862304, |
| "kl_loss_2": 1057.4586822509766, |
| "kl_loss_3": 735.9759002685547, |
| "kl_loss_7": 182.97039413452148, |
| "learning_rate": 8.637054999856148e-05, |
| "loss": 526.1802, |
| "step": 8120 |
| }, |
| { |
| "ce_loss_10": 3.6243308544158936, |
| "ce_loss_13": 3.5630579233169555, |
| "ce_loss_2": 4.083577620983124, |
| "ce_loss_3": 3.9160293340682983, |
| "ce_loss_7": 3.6718581318855286, |
| "epoch": 0.813, |
| "grad_norm": 328.0, |
| "kl_loss_10": 95.2622299194336, |
| "kl_loss_2": 1086.6508239746095, |
| "kl_loss_3": 748.3265411376954, |
| "kl_loss_7": 187.44526748657228, |
| "learning_rate": 8.548121372247918e-05, |
| "loss": 536.2552, |
| "step": 8130 |
| }, |
| { |
| "ce_loss_10": 3.699293088912964, |
| "ce_loss_13": 3.641393613815308, |
| "ce_loss_2": 4.146343159675598, |
| "ce_loss_3": 3.982176637649536, |
| "ce_loss_7": 3.7424126982688906, |
| "epoch": 0.814, |
| "grad_norm": 420.0, |
| "kl_loss_10": 97.64918098449706, |
| "kl_loss_2": 1075.0233795166016, |
| "kl_loss_3": 745.3918151855469, |
| "kl_loss_7": 187.1306022644043, |
| "learning_rate": 8.459605164597267e-05, |
| "loss": 527.4509, |
| "step": 8140 |
| }, |
| { |
| "ce_loss_10": 3.5794180989265443, |
| "ce_loss_13": 3.521663022041321, |
| "ce_loss_2": 4.035482859611511, |
| "ce_loss_3": 3.869397759437561, |
| "ce_loss_7": 3.6230968952178957, |
| "epoch": 0.815, |
| "grad_norm": 322.0, |
| "kl_loss_10": 93.84382820129395, |
| "kl_loss_2": 1085.6336395263672, |
| "kl_loss_3": 749.5215454101562, |
| "kl_loss_7": 184.3967170715332, |
| "learning_rate": 8.371507268261436e-05, |
| "loss": 530.9717, |
| "step": 8150 |
| }, |
| { |
| "ce_loss_10": 3.6623859286308287, |
| "ce_loss_13": 3.603581893444061, |
| "ce_loss_2": 4.1160969018936155, |
| "ce_loss_3": 3.9481249690055846, |
| "ce_loss_7": 3.7034823894500732, |
| "epoch": 0.816, |
| "grad_norm": 410.0, |
| "kl_loss_10": 96.0962978363037, |
| "kl_loss_2": 1085.8551330566406, |
| "kl_loss_3": 744.0009185791016, |
| "kl_loss_7": 187.44638290405274, |
| "learning_rate": 8.283828570385238e-05, |
| "loss": 515.8468, |
| "step": 8160 |
| }, |
| { |
| "ce_loss_10": 3.6646664142608643, |
| "ce_loss_13": 3.607030153274536, |
| "ce_loss_2": 4.124508082866669, |
| "ce_loss_3": 3.955708396434784, |
| "ce_loss_7": 3.708679938316345, |
| "epoch": 0.817, |
| "grad_norm": 286.0, |
| "kl_loss_10": 95.48198356628419, |
| "kl_loss_2": 1068.3529357910156, |
| "kl_loss_3": 737.6435119628907, |
| "kl_loss_7": 186.3275260925293, |
| "learning_rate": 8.196569953892202e-05, |
| "loss": 525.6566, |
| "step": 8170 |
| }, |
| { |
| "ce_loss_10": 3.5752533435821534, |
| "ce_loss_13": 3.5151426196098328, |
| "ce_loss_2": 4.039277529716491, |
| "ce_loss_3": 3.8700820326805117, |
| "ce_loss_7": 3.6193170666694643, |
| "epoch": 0.818, |
| "grad_norm": 392.0, |
| "kl_loss_10": 95.23657569885253, |
| "kl_loss_2": 1087.7711944580078, |
| "kl_loss_3": 748.5086303710938, |
| "kl_loss_7": 185.79026489257814, |
| "learning_rate": 8.109732297475635e-05, |
| "loss": 529.4896, |
| "step": 8180 |
| }, |
| { |
| "ce_loss_10": 3.5442301869392394, |
| "ce_loss_13": 3.48368262052536, |
| "ce_loss_2": 4.041348910331726, |
| "ce_loss_3": 3.8620414972305297, |
| "ce_loss_7": 3.593292236328125, |
| "epoch": 0.819, |
| "grad_norm": 508.0, |
| "kl_loss_10": 94.79218406677246, |
| "kl_loss_2": 1140.4125610351562, |
| "kl_loss_3": 788.5256622314453, |
| "kl_loss_7": 192.41318969726564, |
| "learning_rate": 8.023316475589754e-05, |
| "loss": 543.2035, |
| "step": 8190 |
| }, |
| { |
| "ce_loss_10": 3.5104150652885435, |
| "ce_loss_13": 3.44714834690094, |
| "ce_loss_2": 4.0140674948692325, |
| "ce_loss_3": 3.8308369159698485, |
| "ce_loss_7": 3.5589245796203612, |
| "epoch": 0.82, |
| "grad_norm": 532.0, |
| "kl_loss_10": 97.92351608276367, |
| "kl_loss_2": 1158.8160186767577, |
| "kl_loss_3": 797.9960662841797, |
| "kl_loss_7": 195.1374740600586, |
| "learning_rate": 7.937323358440934e-05, |
| "loss": 549.9746, |
| "step": 8200 |
| }, |
| { |
| "ce_loss_10": 3.637300205230713, |
| "ce_loss_13": 3.5789112567901613, |
| "ce_loss_2": 4.087347877025604, |
| "ce_loss_3": 3.923117625713348, |
| "ce_loss_7": 3.679899263381958, |
| "epoch": 0.821, |
| "grad_norm": 404.0, |
| "kl_loss_10": 95.01284561157226, |
| "kl_loss_2": 1074.5766845703124, |
| "kl_loss_3": 743.3249450683594, |
| "kl_loss_7": 184.74501190185546, |
| "learning_rate": 7.851753811978923e-05, |
| "loss": 530.0879, |
| "step": 8210 |
| }, |
| { |
| "ce_loss_10": 3.661479341983795, |
| "ce_loss_13": 3.6010610818862916, |
| "ce_loss_2": 4.123578751087189, |
| "ce_loss_3": 3.9517632484436036, |
| "ce_loss_7": 3.7047110080718992, |
| "epoch": 0.822, |
| "grad_norm": 358.0, |
| "kl_loss_10": 96.71367454528809, |
| "kl_loss_2": 1091.6120025634766, |
| "kl_loss_3": 744.3147399902343, |
| "kl_loss_7": 186.59919815063478, |
| "learning_rate": 7.766608697888095e-05, |
| "loss": 527.9285, |
| "step": 8220 |
| }, |
| { |
| "ce_loss_10": 3.672685134410858, |
| "ce_loss_13": 3.6110698223114013, |
| "ce_loss_2": 4.123067581653595, |
| "ce_loss_3": 3.9549397349357607, |
| "ce_loss_7": 3.7160248041152952, |
| "epoch": 0.823, |
| "grad_norm": 428.0, |
| "kl_loss_10": 99.5799617767334, |
| "kl_loss_2": 1090.7132843017578, |
| "kl_loss_3": 754.5721008300782, |
| "kl_loss_7": 190.94887008666993, |
| "learning_rate": 7.681888873578785e-05, |
| "loss": 534.6821, |
| "step": 8230 |
| }, |
| { |
| "ce_loss_10": 3.599495697021484, |
| "ce_loss_13": 3.5377328515052797, |
| "ce_loss_2": 4.075003004074096, |
| "ce_loss_3": 3.9027179360389708, |
| "ce_loss_7": 3.6464317083358764, |
| "epoch": 0.824, |
| "grad_norm": 454.0, |
| "kl_loss_10": 96.61878395080566, |
| "kl_loss_2": 1113.7870971679688, |
| "kl_loss_3": 766.2083129882812, |
| "kl_loss_7": 191.40456848144532, |
| "learning_rate": 7.597595192178702e-05, |
| "loss": 531.8756, |
| "step": 8240 |
| }, |
| { |
| "ce_loss_10": 3.5937318563461305, |
| "ce_loss_13": 3.5349967002868654, |
| "ce_loss_2": 4.069689559936523, |
| "ce_loss_3": 3.896022927761078, |
| "ce_loss_7": 3.640897309780121, |
| "epoch": 0.825, |
| "grad_norm": 390.0, |
| "kl_loss_10": 96.6520393371582, |
| "kl_loss_2": 1123.3416778564454, |
| "kl_loss_3": 772.9763427734375, |
| "kl_loss_7": 191.78026428222657, |
| "learning_rate": 7.513728502524286e-05, |
| "loss": 540.9631, |
| "step": 8250 |
| }, |
| { |
| "ce_loss_10": 3.600663185119629, |
| "ce_loss_13": 3.543354606628418, |
| "ce_loss_2": 4.056336843967438, |
| "ce_loss_3": 3.886526358127594, |
| "ce_loss_7": 3.644607651233673, |
| "epoch": 0.826, |
| "grad_norm": 520.0, |
| "kl_loss_10": 94.51933555603027, |
| "kl_loss_2": 1071.4390838623046, |
| "kl_loss_3": 737.6031066894532, |
| "kl_loss_7": 182.459228515625, |
| "learning_rate": 7.430289649152156e-05, |
| "loss": 532.1943, |
| "step": 8260 |
| }, |
| { |
| "ce_loss_10": 3.4964008927345276, |
| "ce_loss_13": 3.4386809706687926, |
| "ce_loss_2": 3.979319155216217, |
| "ce_loss_3": 3.806533432006836, |
| "ce_loss_7": 3.5424695372581483, |
| "epoch": 0.827, |
| "grad_norm": 438.0, |
| "kl_loss_10": 92.59819717407227, |
| "kl_loss_2": 1138.140805053711, |
| "kl_loss_3": 787.0873046875, |
| "kl_loss_7": 188.89702301025392, |
| "learning_rate": 7.347279472290646e-05, |
| "loss": 536.0913, |
| "step": 8270 |
| }, |
| { |
| "ce_loss_10": 3.641860234737396, |
| "ce_loss_13": 3.5819854736328125, |
| "ce_loss_2": 4.100788974761963, |
| "ce_loss_3": 3.9369661927223207, |
| "ce_loss_7": 3.6862512946128847, |
| "epoch": 0.828, |
| "grad_norm": 404.0, |
| "kl_loss_10": 96.73132438659668, |
| "kl_loss_2": 1085.176287841797, |
| "kl_loss_3": 756.2387023925781, |
| "kl_loss_7": 187.64101333618163, |
| "learning_rate": 7.264698807851328e-05, |
| "loss": 532.8096, |
| "step": 8280 |
| }, |
| { |
| "ce_loss_10": 3.604352295398712, |
| "ce_loss_13": 3.549103558063507, |
| "ce_loss_2": 4.042815041542053, |
| "ce_loss_3": 3.880488729476929, |
| "ce_loss_7": 3.64413400888443, |
| "epoch": 0.829, |
| "grad_norm": 520.0, |
| "kl_loss_10": 92.21053123474121, |
| "kl_loss_2": 1057.8690124511718, |
| "kl_loss_3": 728.9253723144532, |
| "kl_loss_7": 181.22113647460938, |
| "learning_rate": 7.182548487420554e-05, |
| "loss": 524.6575, |
| "step": 8290 |
| }, |
| { |
| "ce_loss_10": 3.6577786207199097, |
| "ce_loss_13": 3.597848916053772, |
| "ce_loss_2": 4.107566392421722, |
| "ce_loss_3": 3.947295570373535, |
| "ce_loss_7": 3.703710603713989, |
| "epoch": 0.83, |
| "grad_norm": 286.0, |
| "kl_loss_10": 96.30242042541504, |
| "kl_loss_2": 1087.0319366455078, |
| "kl_loss_3": 748.0092193603516, |
| "kl_loss_7": 187.4295867919922, |
| "learning_rate": 7.100829338251146e-05, |
| "loss": 527.7667, |
| "step": 8300 |
| }, |
| { |
| "ce_loss_10": 3.5980669021606446, |
| "ce_loss_13": 3.5371885776519774, |
| "ce_loss_2": 4.070665979385376, |
| "ce_loss_3": 3.8979653000831602, |
| "ce_loss_7": 3.6431610107421877, |
| "epoch": 0.831, |
| "grad_norm": 394.0, |
| "kl_loss_10": 95.44490776062011, |
| "kl_loss_2": 1113.3842803955079, |
| "kl_loss_3": 769.6158874511718, |
| "kl_loss_7": 189.99929428100586, |
| "learning_rate": 7.019542183254046e-05, |
| "loss": 531.0445, |
| "step": 8310 |
| }, |
| { |
| "ce_loss_10": 3.6354474306106566, |
| "ce_loss_13": 3.57179137468338, |
| "ce_loss_2": 4.082340836524963, |
| "ce_loss_3": 3.9207422971725463, |
| "ce_loss_7": 3.6777117967605593, |
| "epoch": 0.832, |
| "grad_norm": 474.0, |
| "kl_loss_10": 100.207564163208, |
| "kl_loss_2": 1084.2285125732421, |
| "kl_loss_3": 748.0254974365234, |
| "kl_loss_7": 190.82402954101562, |
| "learning_rate": 6.938687840989971e-05, |
| "loss": 528.8804, |
| "step": 8320 |
| }, |
| { |
| "ce_loss_10": 3.5696911811828613, |
| "ce_loss_13": 3.508439671993256, |
| "ce_loss_2": 4.0291890621185305, |
| "ce_loss_3": 3.8622559309005737, |
| "ce_loss_7": 3.614106321334839, |
| "epoch": 0.833, |
| "grad_norm": 600.0, |
| "kl_loss_10": 96.55842895507813, |
| "kl_loss_2": 1082.4974243164063, |
| "kl_loss_3": 748.5556121826172, |
| "kl_loss_7": 188.75322189331055, |
| "learning_rate": 6.858267125661271e-05, |
| "loss": 531.4916, |
| "step": 8330 |
| }, |
| { |
| "ce_loss_10": 3.6338680744171143, |
| "ce_loss_13": 3.575134778022766, |
| "ce_loss_2": 4.0971689343452455, |
| "ce_loss_3": 3.930681896209717, |
| "ce_loss_7": 3.6769707798957825, |
| "epoch": 0.834, |
| "grad_norm": 418.0, |
| "kl_loss_10": 93.3882438659668, |
| "kl_loss_2": 1085.4937896728516, |
| "kl_loss_3": 746.0253967285156, |
| "kl_loss_7": 184.32117233276367, |
| "learning_rate": 6.778280847103668e-05, |
| "loss": 538.0241, |
| "step": 8340 |
| }, |
| { |
| "ce_loss_10": 3.6449947714805604, |
| "ce_loss_13": 3.581918466091156, |
| "ce_loss_2": 4.1008768558502195, |
| "ce_loss_3": 3.937298035621643, |
| "ce_loss_7": 3.686388850212097, |
| "epoch": 0.835, |
| "grad_norm": 290.0, |
| "kl_loss_10": 98.43625144958496, |
| "kl_loss_2": 1102.1855102539062, |
| "kl_loss_3": 759.7929138183594, |
| "kl_loss_7": 191.51789016723632, |
| "learning_rate": 6.698729810778065e-05, |
| "loss": 532.2951, |
| "step": 8350 |
| }, |
| { |
| "ce_loss_10": 3.5478424787521363, |
| "ce_loss_13": 3.489585447311401, |
| "ce_loss_2": 4.0140421986579895, |
| "ce_loss_3": 3.8517470717430116, |
| "ce_loss_7": 3.592922496795654, |
| "epoch": 0.836, |
| "grad_norm": 490.0, |
| "kl_loss_10": 91.77609100341797, |
| "kl_loss_2": 1092.1636932373046, |
| "kl_loss_3": 756.2904968261719, |
| "kl_loss_7": 183.14143447875978, |
| "learning_rate": 6.619614817762538e-05, |
| "loss": 531.3562, |
| "step": 8360 |
| }, |
| { |
| "ce_loss_10": 3.509856128692627, |
| "ce_loss_13": 3.4520259737968444, |
| "ce_loss_2": 4.005417215824127, |
| "ce_loss_3": 3.8302616715431212, |
| "ce_loss_7": 3.56083265542984, |
| "epoch": 0.837, |
| "grad_norm": 356.0, |
| "kl_loss_10": 91.30384330749511, |
| "kl_loss_2": 1146.0878509521485, |
| "kl_loss_3": 788.8349487304688, |
| "kl_loss_7": 189.73513488769532, |
| "learning_rate": 6.540936664744196e-05, |
| "loss": 543.0581, |
| "step": 8370 |
| }, |
| { |
| "ce_loss_10": 3.6644623279571533, |
| "ce_loss_13": 3.6040658593177795, |
| "ce_loss_2": 4.12789534330368, |
| "ce_loss_3": 3.959988057613373, |
| "ce_loss_7": 3.7062342405319213, |
| "epoch": 0.838, |
| "grad_norm": 366.0, |
| "kl_loss_10": 97.38574295043945, |
| "kl_loss_2": 1085.7984375, |
| "kl_loss_3": 749.598519897461, |
| "kl_loss_7": 188.30213012695313, |
| "learning_rate": 6.462696144011149e-05, |
| "loss": 525.3536, |
| "step": 8380 |
| }, |
| { |
| "ce_loss_10": 3.6138532400131225, |
| "ce_loss_13": 3.5537376523017885, |
| "ce_loss_2": 4.071477258205414, |
| "ce_loss_3": 3.910947525501251, |
| "ce_loss_7": 3.658327579498291, |
| "epoch": 0.839, |
| "grad_norm": 556.0, |
| "kl_loss_10": 98.20170745849609, |
| "kl_loss_2": 1090.382958984375, |
| "kl_loss_3": 762.5471374511719, |
| "kl_loss_7": 191.74814834594727, |
| "learning_rate": 6.384894043444567e-05, |
| "loss": 528.8093, |
| "step": 8390 |
| }, |
| { |
| "ce_loss_10": 3.644765245914459, |
| "ce_loss_13": 3.585478734970093, |
| "ce_loss_2": 4.109920060634613, |
| "ce_loss_3": 3.9416786313056944, |
| "ce_loss_7": 3.689965844154358, |
| "epoch": 0.84, |
| "grad_norm": 412.0, |
| "kl_loss_10": 97.19089965820312, |
| "kl_loss_2": 1101.7069030761718, |
| "kl_loss_3": 757.5290496826171, |
| "kl_loss_7": 188.98860778808594, |
| "learning_rate": 6.307531146510753e-05, |
| "loss": 529.2157, |
| "step": 8400 |
| }, |
| { |
| "ce_loss_10": 3.621027076244354, |
| "ce_loss_13": 3.5618404507637025, |
| "ce_loss_2": 4.0682983756065365, |
| "ce_loss_3": 3.90874502658844, |
| "ce_loss_7": 3.6661928296089172, |
| "epoch": 0.841, |
| "grad_norm": 384.0, |
| "kl_loss_10": 95.90530738830566, |
| "kl_loss_2": 1067.8680267333984, |
| "kl_loss_3": 738.8968048095703, |
| "kl_loss_7": 187.38672485351563, |
| "learning_rate": 6.230608232253226e-05, |
| "loss": 522.0211, |
| "step": 8410 |
| }, |
| { |
| "ce_loss_10": 3.5725093245506288, |
| "ce_loss_13": 3.5133079648017884, |
| "ce_loss_2": 4.052767491340637, |
| "ce_loss_3": 3.8865469098091125, |
| "ce_loss_7": 3.617572808265686, |
| "epoch": 0.842, |
| "grad_norm": 420.0, |
| "kl_loss_10": 93.54998550415038, |
| "kl_loss_2": 1118.0941436767578, |
| "kl_loss_3": 779.2841003417968, |
| "kl_loss_7": 188.06975250244142, |
| "learning_rate": 6.154126075284855e-05, |
| "loss": 530.6581, |
| "step": 8420 |
| }, |
| { |
| "ce_loss_10": 3.6709149718284606, |
| "ce_loss_13": 3.610610318183899, |
| "ce_loss_2": 4.11589070558548, |
| "ce_loss_3": 3.958199071884155, |
| "ce_loss_7": 3.7119770526885985, |
| "epoch": 0.843, |
| "grad_norm": 360.0, |
| "kl_loss_10": 93.72929344177246, |
| "kl_loss_2": 1052.0708984375, |
| "kl_loss_3": 727.1819213867187, |
| "kl_loss_7": 182.0021545410156, |
| "learning_rate": 6.078085445780129e-05, |
| "loss": 515.5865, |
| "step": 8430 |
| }, |
| { |
| "ce_loss_10": 3.678613018989563, |
| "ce_loss_13": 3.6185575127601624, |
| "ce_loss_2": 4.138859879970551, |
| "ce_loss_3": 3.970304036140442, |
| "ce_loss_7": 3.7233882188796996, |
| "epoch": 0.844, |
| "grad_norm": 708.0, |
| "kl_loss_10": 96.56619453430176, |
| "kl_loss_2": 1092.8436309814454, |
| "kl_loss_3": 748.7821746826172, |
| "kl_loss_7": 187.36514282226562, |
| "learning_rate": 6.002487109467347e-05, |
| "loss": 524.9962, |
| "step": 8440 |
| }, |
| { |
| "ce_loss_10": 3.681882548332214, |
| "ce_loss_13": 3.623554539680481, |
| "ce_loss_2": 4.131060492992401, |
| "ce_loss_3": 3.969043660163879, |
| "ce_loss_7": 3.7261468291282656, |
| "epoch": 0.845, |
| "grad_norm": 498.0, |
| "kl_loss_10": 95.19795646667481, |
| "kl_loss_2": 1083.3428985595704, |
| "kl_loss_3": 748.7116729736329, |
| "kl_loss_7": 188.84120330810546, |
| "learning_rate": 5.927331827620902e-05, |
| "loss": 524.2234, |
| "step": 8450 |
| }, |
| { |
| "ce_loss_10": 3.671555197238922, |
| "ce_loss_13": 3.6144081234931944, |
| "ce_loss_2": 4.109152019023895, |
| "ce_loss_3": 3.957107722759247, |
| "ce_loss_7": 3.7151288032531737, |
| "epoch": 0.846, |
| "grad_norm": 384.0, |
| "kl_loss_10": 92.54770011901856, |
| "kl_loss_2": 1047.1174011230469, |
| "kl_loss_3": 728.4162536621094, |
| "kl_loss_7": 183.04834442138673, |
| "learning_rate": 5.852620357053651e-05, |
| "loss": 522.9391, |
| "step": 8460 |
| }, |
| { |
| "ce_loss_10": 3.7129202485084534, |
| "ce_loss_13": 3.65321398973465, |
| "ce_loss_2": 4.155979669094085, |
| "ce_loss_3": 3.9961599469184876, |
| "ce_loss_7": 3.7558295488357545, |
| "epoch": 0.847, |
| "grad_norm": 432.0, |
| "kl_loss_10": 94.81909484863282, |
| "kl_loss_2": 1067.3740447998048, |
| "kl_loss_3": 736.2771881103515, |
| "kl_loss_7": 184.3846176147461, |
| "learning_rate": 5.778353450109286e-05, |
| "loss": 523.3945, |
| "step": 8470 |
| }, |
| { |
| "ce_loss_10": 3.7526662349700928, |
| "ce_loss_13": 3.6899970173835754, |
| "ce_loss_2": 4.2024567246437075, |
| "ce_loss_3": 4.037352812290192, |
| "ce_loss_7": 3.7961275696754457, |
| "epoch": 0.848, |
| "grad_norm": 420.0, |
| "kl_loss_10": 98.8898868560791, |
| "kl_loss_2": 1083.7428894042969, |
| "kl_loss_3": 747.87919921875, |
| "kl_loss_7": 190.12581558227538, |
| "learning_rate": 5.7045318546547206e-05, |
| "loss": 528.6064, |
| "step": 8480 |
| }, |
| { |
| "ce_loss_10": 3.6435152888298035, |
| "ce_loss_13": 3.5820479154586793, |
| "ce_loss_2": 4.10130136013031, |
| "ce_loss_3": 3.9336646437644958, |
| "ce_loss_7": 3.6865146279335024, |
| "epoch": 0.849, |
| "grad_norm": 476.0, |
| "kl_loss_10": 97.09412269592285, |
| "kl_loss_2": 1097.005093383789, |
| "kl_loss_3": 757.3569030761719, |
| "kl_loss_7": 187.13169021606444, |
| "learning_rate": 5.631156314072605e-05, |
| "loss": 526.7981, |
| "step": 8490 |
| }, |
| { |
| "ce_loss_10": 3.6548070907592773, |
| "ce_loss_13": 3.5959606409072875, |
| "ce_loss_2": 4.090519487857819, |
| "ce_loss_3": 3.9302281975746154, |
| "ce_loss_7": 3.6990505933761595, |
| "epoch": 0.85, |
| "grad_norm": 348.0, |
| "kl_loss_10": 94.60167617797852, |
| "kl_loss_2": 1058.567938232422, |
| "kl_loss_3": 726.6986267089844, |
| "kl_loss_7": 182.6941146850586, |
| "learning_rate": 5.5582275672538315e-05, |
| "loss": 518.2773, |
| "step": 8500 |
| }, |
| { |
| "ce_loss_10": 3.5718761324882506, |
| "ce_loss_13": 3.510132133960724, |
| "ce_loss_2": 4.058491265773773, |
| "ce_loss_3": 3.8868750095367433, |
| "ce_loss_7": 3.62018061876297, |
| "epoch": 0.851, |
| "grad_norm": 356.0, |
| "kl_loss_10": 98.47408905029297, |
| "kl_loss_2": 1129.9293365478516, |
| "kl_loss_3": 782.1435455322265, |
| "kl_loss_7": 191.806551361084, |
| "learning_rate": 5.4857463485900484e-05, |
| "loss": 540.5649, |
| "step": 8510 |
| }, |
| { |
| "ce_loss_10": 3.626720643043518, |
| "ce_loss_13": 3.5688146710395814, |
| "ce_loss_2": 4.081609988212586, |
| "ce_loss_3": 3.9117035031318665, |
| "ce_loss_7": 3.673699951171875, |
| "epoch": 0.852, |
| "grad_norm": 392.0, |
| "kl_loss_10": 94.4161319732666, |
| "kl_loss_2": 1082.976022338867, |
| "kl_loss_3": 743.9283477783204, |
| "kl_loss_7": 185.5842658996582, |
| "learning_rate": 5.413713387966329e-05, |
| "loss": 525.7675, |
| "step": 8520 |
| }, |
| { |
| "ce_loss_10": 3.6495197653770446, |
| "ce_loss_13": 3.5870252728462217, |
| "ce_loss_2": 4.1089702367782595, |
| "ce_loss_3": 3.943737292289734, |
| "ce_loss_7": 3.6925705909729003, |
| "epoch": 0.853, |
| "grad_norm": 560.0, |
| "kl_loss_10": 99.9091007232666, |
| "kl_loss_2": 1091.3887969970704, |
| "kl_loss_3": 754.8269989013672, |
| "kl_loss_7": 190.51073608398437, |
| "learning_rate": 5.34212941075381e-05, |
| "loss": 533.712, |
| "step": 8530 |
| }, |
| { |
| "ce_loss_10": 3.6638750314712523, |
| "ce_loss_13": 3.603909599781036, |
| "ce_loss_2": 4.105106854438782, |
| "ce_loss_3": 3.939826285839081, |
| "ce_loss_7": 3.703915464878082, |
| "epoch": 0.854, |
| "grad_norm": 324.0, |
| "kl_loss_10": 94.93586730957031, |
| "kl_loss_2": 1060.2898712158203, |
| "kl_loss_3": 729.1602386474609, |
| "kl_loss_7": 183.2039321899414, |
| "learning_rate": 5.270995137802315e-05, |
| "loss": 520.0254, |
| "step": 8540 |
| }, |
| { |
| "ce_loss_10": 3.586125075817108, |
| "ce_loss_13": 3.530829107761383, |
| "ce_loss_2": 4.0409599304199215, |
| "ce_loss_3": 3.876398241519928, |
| "ce_loss_7": 3.6288790106773376, |
| "epoch": 0.855, |
| "grad_norm": 390.0, |
| "kl_loss_10": 92.31447868347168, |
| "kl_loss_2": 1091.2599792480469, |
| "kl_loss_3": 750.2804168701172, |
| "kl_loss_7": 184.4141700744629, |
| "learning_rate": 5.2003112854332125e-05, |
| "loss": 530.1402, |
| "step": 8550 |
| }, |
| { |
| "ce_loss_10": 3.592084896564484, |
| "ce_loss_13": 3.5318885922431944, |
| "ce_loss_2": 4.045030009746552, |
| "ce_loss_3": 3.8797095656394958, |
| "ce_loss_7": 3.6342476487159727, |
| "epoch": 0.856, |
| "grad_norm": 410.0, |
| "kl_loss_10": 95.16406364440918, |
| "kl_loss_2": 1083.518502807617, |
| "kl_loss_3": 746.9155914306641, |
| "kl_loss_7": 184.60284118652345, |
| "learning_rate": 5.130078565432089e-05, |
| "loss": 519.0631, |
| "step": 8560 |
| }, |
| { |
| "ce_loss_10": 3.6698386430740357, |
| "ce_loss_13": 3.611102557182312, |
| "ce_loss_2": 4.1041951179504395, |
| "ce_loss_3": 3.9457595467567446, |
| "ce_loss_7": 3.714687442779541, |
| "epoch": 0.857, |
| "grad_norm": 330.0, |
| "kl_loss_10": 94.41157264709473, |
| "kl_loss_2": 1066.6546508789063, |
| "kl_loss_3": 732.30849609375, |
| "kl_loss_7": 183.59521484375, |
| "learning_rate": 5.060297685041659e-05, |
| "loss": 515.5307, |
| "step": 8570 |
| }, |
| { |
| "ce_loss_10": 3.594843864440918, |
| "ce_loss_13": 3.535090386867523, |
| "ce_loss_2": 4.058831119537354, |
| "ce_loss_3": 3.8907560467720033, |
| "ce_loss_7": 3.6390093684196474, |
| "epoch": 0.858, |
| "grad_norm": 396.0, |
| "kl_loss_10": 97.14489707946777, |
| "kl_loss_2": 1100.07861328125, |
| "kl_loss_3": 757.8477020263672, |
| "kl_loss_7": 190.17505111694337, |
| "learning_rate": 4.99096934695461e-05, |
| "loss": 537.0569, |
| "step": 8580 |
| }, |
| { |
| "ce_loss_10": 3.655477023124695, |
| "ce_loss_13": 3.592752683162689, |
| "ce_loss_2": 4.114116084575653, |
| "ce_loss_3": 3.950313460826874, |
| "ce_loss_7": 3.6980414509773256, |
| "epoch": 0.859, |
| "grad_norm": 370.0, |
| "kl_loss_10": 96.66123657226562, |
| "kl_loss_2": 1076.5634460449219, |
| "kl_loss_3": 745.2082977294922, |
| "kl_loss_7": 186.95159301757812, |
| "learning_rate": 4.922094249306558e-05, |
| "loss": 520.1718, |
| "step": 8590 |
| }, |
| { |
| "ce_loss_10": 3.677726352214813, |
| "ce_loss_13": 3.6172829270362854, |
| "ce_loss_2": 4.126979196071625, |
| "ce_loss_3": 3.9645047903060915, |
| "ce_loss_7": 3.7215185284614565, |
| "epoch": 0.86, |
| "grad_norm": 392.0, |
| "kl_loss_10": 96.89525718688965, |
| "kl_loss_2": 1065.1883819580078, |
| "kl_loss_3": 740.1956573486328, |
| "kl_loss_7": 187.83882064819335, |
| "learning_rate": 4.853673085668947e-05, |
| "loss": 516.6985, |
| "step": 8600 |
| }, |
| { |
| "ce_loss_10": 3.707137334346771, |
| "ce_loss_13": 3.6448033452033997, |
| "ce_loss_2": 4.162192296981812, |
| "ce_loss_3": 3.993678319454193, |
| "ce_loss_7": 3.7496466279029845, |
| "epoch": 0.861, |
| "grad_norm": 370.0, |
| "kl_loss_10": 98.02176780700684, |
| "kl_loss_2": 1078.1511993408203, |
| "kl_loss_3": 739.8441162109375, |
| "kl_loss_7": 186.5592399597168, |
| "learning_rate": 4.78570654504214e-05, |
| "loss": 529.6101, |
| "step": 8610 |
| }, |
| { |
| "ce_loss_10": 3.6458049774169923, |
| "ce_loss_13": 3.5854872465133667, |
| "ce_loss_2": 4.110537803173065, |
| "ce_loss_3": 3.938798224925995, |
| "ce_loss_7": 3.6893723726272585, |
| "epoch": 0.862, |
| "grad_norm": 414.0, |
| "kl_loss_10": 94.25516128540039, |
| "kl_loss_2": 1104.6271423339845, |
| "kl_loss_3": 758.221337890625, |
| "kl_loss_7": 185.93933029174804, |
| "learning_rate": 4.7181953118484556e-05, |
| "loss": 535.9025, |
| "step": 8620 |
| }, |
| { |
| "ce_loss_10": 3.6774216413497927, |
| "ce_loss_13": 3.6180386185646056, |
| "ce_loss_2": 4.12672735452652, |
| "ce_loss_3": 3.962115204334259, |
| "ce_loss_7": 3.720357131958008, |
| "epoch": 0.863, |
| "grad_norm": 356.0, |
| "kl_loss_10": 95.34017066955566, |
| "kl_loss_2": 1068.0610900878905, |
| "kl_loss_3": 737.2169891357422, |
| "kl_loss_7": 185.36345138549805, |
| "learning_rate": 4.651140065925269e-05, |
| "loss": 530.0095, |
| "step": 8630 |
| }, |
| { |
| "ce_loss_10": 3.609228265285492, |
| "ce_loss_13": 3.5492658615112305, |
| "ce_loss_2": 4.060226953029632, |
| "ce_loss_3": 3.895670175552368, |
| "ce_loss_7": 3.6542355179786683, |
| "epoch": 0.864, |
| "grad_norm": 360.0, |
| "kl_loss_10": 96.95414390563965, |
| "kl_loss_2": 1087.1394622802734, |
| "kl_loss_3": 748.6742889404297, |
| "kl_loss_7": 188.45738372802734, |
| "learning_rate": 4.58454148251814e-05, |
| "loss": 535.7555, |
| "step": 8640 |
| }, |
| { |
| "ce_loss_10": 3.6290027260780335, |
| "ce_loss_13": 3.566804575920105, |
| "ce_loss_2": 4.098408913612365, |
| "ce_loss_3": 3.928418016433716, |
| "ce_loss_7": 3.673435080051422, |
| "epoch": 0.865, |
| "grad_norm": 352.0, |
| "kl_loss_10": 97.77750358581542, |
| "kl_loss_2": 1105.780810546875, |
| "kl_loss_3": 762.838412475586, |
| "kl_loss_7": 187.93626327514647, |
| "learning_rate": 4.518400232274078e-05, |
| "loss": 530.3719, |
| "step": 8650 |
| }, |
| { |
| "ce_loss_10": 3.641969549655914, |
| "ce_loss_13": 3.5785802602767944, |
| "ce_loss_2": 4.092971992492676, |
| "ce_loss_3": 3.932430160045624, |
| "ce_loss_7": 3.6855560064315798, |
| "epoch": 0.866, |
| "grad_norm": 320.0, |
| "kl_loss_10": 100.24152946472168, |
| "kl_loss_2": 1078.2671875, |
| "kl_loss_3": 746.3800415039062, |
| "kl_loss_7": 188.71098556518555, |
| "learning_rate": 4.452716981234745e-05, |
| "loss": 518.2875, |
| "step": 8660 |
| }, |
| { |
| "ce_loss_10": 3.619352424144745, |
| "ce_loss_13": 3.5634596943855286, |
| "ce_loss_2": 4.0641814827919, |
| "ce_loss_3": 3.9009178042411805, |
| "ce_loss_7": 3.6601861000061033, |
| "epoch": 0.867, |
| "grad_norm": 334.0, |
| "kl_loss_10": 92.77517395019531, |
| "kl_loss_2": 1069.4530029296875, |
| "kl_loss_3": 742.2820404052734, |
| "kl_loss_7": 183.70159912109375, |
| "learning_rate": 4.3874923908297335e-05, |
| "loss": 518.2648, |
| "step": 8670 |
| }, |
| { |
| "ce_loss_10": 3.6679449677467346, |
| "ce_loss_13": 3.605993056297302, |
| "ce_loss_2": 4.122425937652588, |
| "ce_loss_3": 3.955815386772156, |
| "ce_loss_7": 3.710171031951904, |
| "epoch": 0.868, |
| "grad_norm": 372.0, |
| "kl_loss_10": 98.51640739440919, |
| "kl_loss_2": 1091.1497436523437, |
| "kl_loss_3": 753.822543334961, |
| "kl_loss_7": 189.5640121459961, |
| "learning_rate": 4.322727117869951e-05, |
| "loss": 527.5021, |
| "step": 8680 |
| }, |
| { |
| "ce_loss_10": 3.678618919849396, |
| "ce_loss_13": 3.61755256652832, |
| "ce_loss_2": 4.1355063915252686, |
| "ce_loss_3": 3.9705930352211, |
| "ce_loss_7": 3.7248330235481264, |
| "epoch": 0.869, |
| "grad_norm": 450.0, |
| "kl_loss_10": 97.55352783203125, |
| "kl_loss_2": 1094.9813720703125, |
| "kl_loss_3": 756.694857788086, |
| "kl_loss_7": 188.98089218139648, |
| "learning_rate": 4.2584218145409916e-05, |
| "loss": 526.9053, |
| "step": 8690 |
| }, |
| { |
| "ce_loss_10": 3.724055600166321, |
| "ce_loss_13": 3.6645130157470702, |
| "ce_loss_2": 4.164188587665558, |
| "ce_loss_3": 4.006092858314514, |
| "ce_loss_7": 3.766603982448578, |
| "epoch": 0.87, |
| "grad_norm": 368.0, |
| "kl_loss_10": 97.79985809326172, |
| "kl_loss_2": 1054.3090911865233, |
| "kl_loss_3": 727.9592834472656, |
| "kl_loss_7": 186.32457809448243, |
| "learning_rate": 4.194577128396521e-05, |
| "loss": 516.3896, |
| "step": 8700 |
| }, |
| { |
| "ce_loss_10": 3.59331738948822, |
| "ce_loss_13": 3.5345770716667175, |
| "ce_loss_2": 4.046900963783264, |
| "ce_loss_3": 3.882276177406311, |
| "ce_loss_7": 3.636314344406128, |
| "epoch": 0.871, |
| "grad_norm": 348.0, |
| "kl_loss_10": 93.78037185668946, |
| "kl_loss_2": 1077.3778259277344, |
| "kl_loss_3": 740.198031616211, |
| "kl_loss_7": 183.74533233642578, |
| "learning_rate": 4.1311937023518264e-05, |
| "loss": 527.0207, |
| "step": 8710 |
| }, |
| { |
| "ce_loss_10": 3.6144633054733277, |
| "ce_loss_13": 3.5550664901733398, |
| "ce_loss_2": 4.064953672885895, |
| "ce_loss_3": 3.891311466693878, |
| "ce_loss_7": 3.653948724269867, |
| "epoch": 0.872, |
| "grad_norm": 338.0, |
| "kl_loss_10": 94.96177291870117, |
| "kl_loss_2": 1085.5813049316407, |
| "kl_loss_3": 729.3066223144531, |
| "kl_loss_7": 181.0632652282715, |
| "learning_rate": 4.0682721746773344e-05, |
| "loss": 521.2992, |
| "step": 8720 |
| }, |
| { |
| "ce_loss_10": 3.4832905650138857, |
| "ce_loss_13": 3.4249367475509644, |
| "ce_loss_2": 3.961899662017822, |
| "ce_loss_3": 3.788464534282684, |
| "ce_loss_7": 3.527579641342163, |
| "epoch": 0.873, |
| "grad_norm": 370.0, |
| "kl_loss_10": 91.51293182373047, |
| "kl_loss_2": 1104.7394775390626, |
| "kl_loss_3": 759.5037414550782, |
| "kl_loss_7": 185.07400512695312, |
| "learning_rate": 4.0058131789920904e-05, |
| "loss": 521.9289, |
| "step": 8730 |
| }, |
| { |
| "ce_loss_10": 3.640140187740326, |
| "ce_loss_13": 3.57983558177948, |
| "ce_loss_2": 4.088211476802826, |
| "ce_loss_3": 3.927894616127014, |
| "ce_loss_7": 3.6845538139343263, |
| "epoch": 0.874, |
| "grad_norm": 438.0, |
| "kl_loss_10": 95.66121215820313, |
| "kl_loss_2": 1082.0109283447266, |
| "kl_loss_3": 751.8433319091797, |
| "kl_loss_7": 184.97217254638673, |
| "learning_rate": 3.9438173442575e-05, |
| "loss": 542.025, |
| "step": 8740 |
| }, |
| { |
| "ce_loss_10": 3.668476128578186, |
| "ce_loss_13": 3.6084399461746215, |
| "ce_loss_2": 4.114363825321197, |
| "ce_loss_3": 3.948890733718872, |
| "ce_loss_7": 3.712895894050598, |
| "epoch": 0.875, |
| "grad_norm": 360.0, |
| "kl_loss_10": 95.13606338500976, |
| "kl_loss_2": 1069.65205078125, |
| "kl_loss_3": 736.1352905273437, |
| "kl_loss_7": 185.31621551513672, |
| "learning_rate": 3.882285294770937e-05, |
| "loss": 524.7358, |
| "step": 8750 |
| }, |
| { |
| "ce_loss_10": 3.636470365524292, |
| "ce_loss_13": 3.576250433921814, |
| "ce_loss_2": 4.081735682487488, |
| "ce_loss_3": 3.9194202423095703, |
| "ce_loss_7": 3.6787103533744814, |
| "epoch": 0.876, |
| "grad_norm": 372.0, |
| "kl_loss_10": 97.42237510681153, |
| "kl_loss_2": 1070.8320678710938, |
| "kl_loss_3": 736.4440826416015, |
| "kl_loss_7": 186.42294464111328, |
| "learning_rate": 3.821217650159453e-05, |
| "loss": 528.159, |
| "step": 8760 |
| }, |
| { |
| "ce_loss_10": 3.501795244216919, |
| "ce_loss_13": 3.445420837402344, |
| "ce_loss_2": 3.993399131298065, |
| "ce_loss_3": 3.819171416759491, |
| "ce_loss_7": 3.5519042015075684, |
| "epoch": 0.877, |
| "grad_norm": 398.0, |
| "kl_loss_10": 91.19635620117188, |
| "kl_loss_2": 1126.038784790039, |
| "kl_loss_3": 777.8552947998047, |
| "kl_loss_7": 188.21297302246094, |
| "learning_rate": 3.760615025373543e-05, |
| "loss": 535.8912, |
| "step": 8770 |
| }, |
| { |
| "ce_loss_10": 3.687652599811554, |
| "ce_loss_13": 3.6275517463684084, |
| "ce_loss_2": 4.149944150447846, |
| "ce_loss_3": 3.984694278240204, |
| "ce_loss_7": 3.7361566066741942, |
| "epoch": 0.878, |
| "grad_norm": 426.0, |
| "kl_loss_10": 98.53735313415527, |
| "kl_loss_2": 1087.7767242431642, |
| "kl_loss_3": 754.1841644287109, |
| "kl_loss_7": 191.66405487060547, |
| "learning_rate": 3.700478030680987e-05, |
| "loss": 534.6525, |
| "step": 8780 |
| }, |
| { |
| "ce_loss_10": 3.672296917438507, |
| "ce_loss_13": 3.6126784920692443, |
| "ce_loss_2": 4.126206862926483, |
| "ce_loss_3": 3.9555336833000183, |
| "ce_loss_7": 3.7154035449028013, |
| "epoch": 0.879, |
| "grad_norm": 400.0, |
| "kl_loss_10": 95.93194694519043, |
| "kl_loss_2": 1067.7572967529297, |
| "kl_loss_3": 734.3840759277343, |
| "kl_loss_7": 185.99778594970704, |
| "learning_rate": 3.6408072716606344e-05, |
| "loss": 520.9604, |
| "step": 8790 |
| }, |
| { |
| "ce_loss_10": 3.5921829104423524, |
| "ce_loss_13": 3.5314606547355654, |
| "ce_loss_2": 4.064702832698822, |
| "ce_loss_3": 3.897125017642975, |
| "ce_loss_7": 3.639820373058319, |
| "epoch": 0.88, |
| "grad_norm": 424.0, |
| "kl_loss_10": 96.45306243896485, |
| "kl_loss_2": 1113.6997863769532, |
| "kl_loss_3": 769.2831970214844, |
| "kl_loss_7": 189.68171615600585, |
| "learning_rate": 3.5816033491963716e-05, |
| "loss": 546.457, |
| "step": 8800 |
| }, |
| { |
| "ce_loss_10": 3.4587510585784913, |
| "ce_loss_13": 3.398640847206116, |
| "ce_loss_2": 3.9295639514923097, |
| "ce_loss_3": 3.755736696720123, |
| "ce_loss_7": 3.502725625038147, |
| "epoch": 0.881, |
| "grad_norm": 374.0, |
| "kl_loss_10": 94.41120719909668, |
| "kl_loss_2": 1107.7318145751954, |
| "kl_loss_3": 762.6848449707031, |
| "kl_loss_7": 185.3354965209961, |
| "learning_rate": 3.522866859471047e-05, |
| "loss": 531.675, |
| "step": 8810 |
| }, |
| { |
| "ce_loss_10": 3.7003540635108947, |
| "ce_loss_13": 3.6417059302330017, |
| "ce_loss_2": 4.134489345550537, |
| "ce_loss_3": 3.972803270816803, |
| "ce_loss_7": 3.7418115973472594, |
| "epoch": 0.882, |
| "grad_norm": 620.0, |
| "kl_loss_10": 93.44988250732422, |
| "kl_loss_2": 1046.9635864257812, |
| "kl_loss_3": 718.645751953125, |
| "kl_loss_7": 180.43475570678712, |
| "learning_rate": 3.46459839396045e-05, |
| "loss": 519.2549, |
| "step": 8820 |
| }, |
| { |
| "ce_loss_10": 3.6235634326934814, |
| "ce_loss_13": 3.5625478267669677, |
| "ce_loss_2": 4.090062844753265, |
| "ce_loss_3": 3.9221726655960083, |
| "ce_loss_7": 3.6677647113800047, |
| "epoch": 0.883, |
| "grad_norm": 392.0, |
| "kl_loss_10": 97.41650848388672, |
| "kl_loss_2": 1090.359048461914, |
| "kl_loss_3": 752.6492370605469, |
| "kl_loss_7": 188.19114456176757, |
| "learning_rate": 3.406798539427386e-05, |
| "loss": 541.4702, |
| "step": 8830 |
| }, |
| { |
| "ce_loss_10": 3.6815385699272154, |
| "ce_loss_13": 3.622318422794342, |
| "ce_loss_2": 4.134820902347565, |
| "ce_loss_3": 3.9722886800765993, |
| "ce_loss_7": 3.7261940598487855, |
| "epoch": 0.884, |
| "grad_norm": 458.0, |
| "kl_loss_10": 95.14997901916504, |
| "kl_loss_2": 1087.6108123779297, |
| "kl_loss_3": 753.6235443115235, |
| "kl_loss_7": 186.09493026733398, |
| "learning_rate": 3.349467877915746e-05, |
| "loss": 532.4207, |
| "step": 8840 |
| }, |
| { |
| "ce_loss_10": 3.6383310556411743, |
| "ce_loss_13": 3.578685259819031, |
| "ce_loss_2": 4.10920352935791, |
| "ce_loss_3": 3.9395066857337953, |
| "ce_loss_7": 3.684439957141876, |
| "epoch": 0.885, |
| "grad_norm": 346.0, |
| "kl_loss_10": 94.56938552856445, |
| "kl_loss_2": 1107.4275299072265, |
| "kl_loss_3": 766.7192443847656, |
| "kl_loss_7": 187.05870895385743, |
| "learning_rate": 3.292606986744667e-05, |
| "loss": 544.0854, |
| "step": 8850 |
| }, |
| { |
| "ce_loss_10": 3.593039667606354, |
| "ce_loss_13": 3.5363111972808836, |
| "ce_loss_2": 4.061631453037262, |
| "ce_loss_3": 3.888974642753601, |
| "ce_loss_7": 3.6354947090148926, |
| "epoch": 0.886, |
| "grad_norm": 312.0, |
| "kl_loss_10": 94.36025886535644, |
| "kl_loss_2": 1094.437567138672, |
| "kl_loss_3": 755.0413787841796, |
| "kl_loss_7": 185.15854110717774, |
| "learning_rate": 3.23621643850267e-05, |
| "loss": 531.352, |
| "step": 8860 |
| }, |
| { |
| "ce_loss_10": 3.6675365686416628, |
| "ce_loss_13": 3.608867907524109, |
| "ce_loss_2": 4.1205101132392885, |
| "ce_loss_3": 3.9526678919792175, |
| "ce_loss_7": 3.71103777885437, |
| "epoch": 0.887, |
| "grad_norm": 398.0, |
| "kl_loss_10": 95.91901359558105, |
| "kl_loss_2": 1094.978707885742, |
| "kl_loss_3": 758.2980133056641, |
| "kl_loss_7": 187.99334793090821, |
| "learning_rate": 3.180296801041971e-05, |
| "loss": 525.304, |
| "step": 8870 |
| }, |
| { |
| "ce_loss_10": 3.6939959645271303, |
| "ce_loss_13": 3.6341704607009886, |
| "ce_loss_2": 4.136724853515625, |
| "ce_loss_3": 3.976076662540436, |
| "ce_loss_7": 3.7369011640548706, |
| "epoch": 0.888, |
| "grad_norm": 322.0, |
| "kl_loss_10": 96.13762168884277, |
| "kl_loss_2": 1061.462728881836, |
| "kl_loss_3": 731.0939331054688, |
| "kl_loss_7": 185.31768493652345, |
| "learning_rate": 3.124848637472688e-05, |
| "loss": 515.8721, |
| "step": 8880 |
| }, |
| { |
| "ce_loss_10": 3.5114728569984437, |
| "ce_loss_13": 3.452458143234253, |
| "ce_loss_2": 3.9819056034088134, |
| "ce_loss_3": 3.8095321655273438, |
| "ce_loss_7": 3.5549168229103087, |
| "epoch": 0.889, |
| "grad_norm": 430.0, |
| "kl_loss_10": 92.77987136840821, |
| "kl_loss_2": 1105.7576904296875, |
| "kl_loss_3": 760.3018249511719, |
| "kl_loss_7": 183.98031311035157, |
| "learning_rate": 3.069872506157212e-05, |
| "loss": 529.9256, |
| "step": 8890 |
| }, |
| { |
| "ce_loss_10": 3.6096359133720397, |
| "ce_loss_13": 3.5530964136123657, |
| "ce_loss_2": 4.066385662555694, |
| "ce_loss_3": 3.9037466764450075, |
| "ce_loss_7": 3.653862941265106, |
| "epoch": 0.89, |
| "grad_norm": 414.0, |
| "kl_loss_10": 94.68969841003418, |
| "kl_loss_2": 1082.7529907226562, |
| "kl_loss_3": 748.9955108642578, |
| "kl_loss_7": 186.7980583190918, |
| "learning_rate": 3.0153689607045842e-05, |
| "loss": 522.4292, |
| "step": 8900 |
| }, |
| { |
| "ce_loss_10": 3.5076727747917174, |
| "ce_loss_13": 3.4481669664382935, |
| "ce_loss_2": 3.998192644119263, |
| "ce_loss_3": 3.8251919507980348, |
| "ce_loss_7": 3.5543401718139647, |
| "epoch": 0.891, |
| "grad_norm": 462.0, |
| "kl_loss_10": 96.1771800994873, |
| "kl_loss_2": 1157.3876403808595, |
| "kl_loss_3": 799.3413696289062, |
| "kl_loss_7": 192.33385009765624, |
| "learning_rate": 2.9613385499648926e-05, |
| "loss": 537.2502, |
| "step": 8910 |
| }, |
| { |
| "ce_loss_10": 3.5617488503456114, |
| "ce_loss_13": 3.5028850078582763, |
| "ce_loss_2": 4.028625464439392, |
| "ce_loss_3": 3.8606330037117003, |
| "ce_loss_7": 3.60619056224823, |
| "epoch": 0.892, |
| "grad_norm": 364.0, |
| "kl_loss_10": 92.3734031677246, |
| "kl_loss_2": 1092.7289123535156, |
| "kl_loss_3": 755.3269073486329, |
| "kl_loss_7": 183.66201095581056, |
| "learning_rate": 2.9077818180237692e-05, |
| "loss": 529.899, |
| "step": 8920 |
| }, |
| { |
| "ce_loss_10": 3.611976993083954, |
| "ce_loss_13": 3.5523295164108277, |
| "ce_loss_2": 4.088427019119263, |
| "ce_loss_3": 3.911720395088196, |
| "ce_loss_7": 3.6568928718566895, |
| "epoch": 0.893, |
| "grad_norm": 604.0, |
| "kl_loss_10": 95.37241554260254, |
| "kl_loss_2": 1091.7466766357422, |
| "kl_loss_3": 749.5647033691406, |
| "kl_loss_7": 185.87219848632813, |
| "learning_rate": 2.8546993041969172e-05, |
| "loss": 528.8222, |
| "step": 8930 |
| }, |
| { |
| "ce_loss_10": 3.649553382396698, |
| "ce_loss_13": 3.5936214447021486, |
| "ce_loss_2": 4.095563900470734, |
| "ce_loss_3": 3.9343939542770388, |
| "ce_loss_7": 3.6919458627700807, |
| "epoch": 0.894, |
| "grad_norm": 356.0, |
| "kl_loss_10": 92.16914100646973, |
| "kl_loss_2": 1065.6531127929688, |
| "kl_loss_3": 739.0178924560547, |
| "kl_loss_7": 182.67144699096679, |
| "learning_rate": 2.802091543024671e-05, |
| "loss": 525.8132, |
| "step": 8940 |
| }, |
| { |
| "ce_loss_10": 3.6456188917160035, |
| "ce_loss_13": 3.5855357170104982, |
| "ce_loss_2": 4.1163407325744625, |
| "ce_loss_3": 3.9452737092971804, |
| "ce_loss_7": 3.690487289428711, |
| "epoch": 0.895, |
| "grad_norm": 376.0, |
| "kl_loss_10": 94.99068603515624, |
| "kl_loss_2": 1107.8523712158203, |
| "kl_loss_3": 763.5164489746094, |
| "kl_loss_7": 187.85556182861328, |
| "learning_rate": 2.7499590642665774e-05, |
| "loss": 543.5269, |
| "step": 8950 |
| }, |
| { |
| "ce_loss_10": 3.6521722793579103, |
| "ce_loss_13": 3.5920722246170045, |
| "ce_loss_2": 4.112611806392669, |
| "ce_loss_3": 3.942758357524872, |
| "ce_loss_7": 3.6924882411956785, |
| "epoch": 0.896, |
| "grad_norm": 434.0, |
| "kl_loss_10": 97.21023635864258, |
| "kl_loss_2": 1089.4108154296875, |
| "kl_loss_3": 742.6543731689453, |
| "kl_loss_7": 186.23975067138673, |
| "learning_rate": 2.6983023928961405e-05, |
| "loss": 523.9287, |
| "step": 8960 |
| }, |
| { |
| "ce_loss_10": 3.6287880539894104, |
| "ce_loss_13": 3.569942307472229, |
| "ce_loss_2": 4.086234021186828, |
| "ce_loss_3": 3.919290018081665, |
| "ce_loss_7": 3.6727704763412476, |
| "epoch": 0.897, |
| "grad_norm": 428.0, |
| "kl_loss_10": 96.33384323120117, |
| "kl_loss_2": 1081.610333251953, |
| "kl_loss_3": 747.162060546875, |
| "kl_loss_7": 187.28789825439452, |
| "learning_rate": 2.6471220490954628e-05, |
| "loss": 531.8677, |
| "step": 8970 |
| }, |
| { |
| "ce_loss_10": 3.6082414865493773, |
| "ce_loss_13": 3.5503612399101256, |
| "ce_loss_2": 4.054306983947754, |
| "ce_loss_3": 3.8875715851783754, |
| "ce_loss_7": 3.647981250286102, |
| "epoch": 0.898, |
| "grad_norm": 402.0, |
| "kl_loss_10": 93.92480773925782, |
| "kl_loss_2": 1068.1579833984374, |
| "kl_loss_3": 736.318814086914, |
| "kl_loss_7": 183.30384826660156, |
| "learning_rate": 2.596418548250029e-05, |
| "loss": 527.9295, |
| "step": 8980 |
| }, |
| { |
| "ce_loss_10": 3.6551415085792542, |
| "ce_loss_13": 3.5952192187309264, |
| "ce_loss_2": 4.1076843500137326, |
| "ce_loss_3": 3.944980025291443, |
| "ce_loss_7": 3.700137984752655, |
| "epoch": 0.899, |
| "grad_norm": 396.0, |
| "kl_loss_10": 97.98623161315918, |
| "kl_loss_2": 1081.954281616211, |
| "kl_loss_3": 746.2776489257812, |
| "kl_loss_7": 188.93777618408203, |
| "learning_rate": 2.5461924009435368e-05, |
| "loss": 524.2467, |
| "step": 8990 |
| }, |
| { |
| "ce_loss_10": 3.650333786010742, |
| "ce_loss_13": 3.590772497653961, |
| "ce_loss_2": 4.109632253646851, |
| "ce_loss_3": 3.9412980914115905, |
| "ce_loss_7": 3.6946743369102477, |
| "epoch": 0.9, |
| "grad_norm": 410.0, |
| "kl_loss_10": 96.09890708923339, |
| "kl_loss_2": 1079.7472290039063, |
| "kl_loss_3": 745.8318054199219, |
| "kl_loss_7": 186.11589736938475, |
| "learning_rate": 2.4964441129527336e-05, |
| "loss": 536.0899, |
| "step": 9000 |
| }, |
| { |
| "ce_loss_10": 3.6510029554367067, |
| "ce_loss_13": 3.590871715545654, |
| "ce_loss_2": 4.100390136241913, |
| "ce_loss_3": 3.932853305339813, |
| "ce_loss_7": 3.6917531371116636, |
| "epoch": 0.901, |
| "grad_norm": 418.0, |
| "kl_loss_10": 95.55135993957519, |
| "kl_loss_2": 1061.7380157470702, |
| "kl_loss_3": 727.2771514892578, |
| "kl_loss_7": 183.68069381713866, |
| "learning_rate": 2.4471741852423235e-05, |
| "loss": 518.1353, |
| "step": 9010 |
| }, |
| { |
| "ce_loss_10": 3.695908546447754, |
| "ce_loss_13": 3.6349289417266846, |
| "ce_loss_2": 4.151931369304657, |
| "ce_loss_3": 3.98497998714447, |
| "ce_loss_7": 3.739882934093475, |
| "epoch": 0.902, |
| "grad_norm": 392.0, |
| "kl_loss_10": 95.51335906982422, |
| "kl_loss_2": 1066.5906768798827, |
| "kl_loss_3": 733.3630157470703, |
| "kl_loss_7": 184.28593063354492, |
| "learning_rate": 2.3983831139599287e-05, |
| "loss": 522.8627, |
| "step": 9020 |
| }, |
| { |
| "ce_loss_10": 3.617437481880188, |
| "ce_loss_13": 3.558865213394165, |
| "ce_loss_2": 4.061969435214996, |
| "ce_loss_3": 3.8991889357566833, |
| "ce_loss_7": 3.660116195678711, |
| "epoch": 0.903, |
| "grad_norm": 456.0, |
| "kl_loss_10": 93.39376106262208, |
| "kl_loss_2": 1059.7717498779298, |
| "kl_loss_3": 733.3598663330079, |
| "kl_loss_7": 181.95840148925782, |
| "learning_rate": 2.3500713904311022e-05, |
| "loss": 512.7801, |
| "step": 9030 |
| }, |
| { |
| "ce_loss_10": 3.659070146083832, |
| "ce_loss_13": 3.5992442965507507, |
| "ce_loss_2": 4.08744889497757, |
| "ce_loss_3": 3.9278596162796022, |
| "ce_loss_7": 3.700530481338501, |
| "epoch": 0.904, |
| "grad_norm": 472.0, |
| "kl_loss_10": 95.7885025024414, |
| "kl_loss_2": 1036.0338073730468, |
| "kl_loss_3": 713.1754333496094, |
| "kl_loss_7": 181.65938034057618, |
| "learning_rate": 2.3022395011543685e-05, |
| "loss": 514.4845, |
| "step": 9040 |
| }, |
| { |
| "ce_loss_10": 3.6909992337226867, |
| "ce_loss_13": 3.630416977405548, |
| "ce_loss_2": 4.144919979572296, |
| "ce_loss_3": 3.98409184217453, |
| "ce_loss_7": 3.735574746131897, |
| "epoch": 0.905, |
| "grad_norm": 400.0, |
| "kl_loss_10": 95.80096397399902, |
| "kl_loss_2": 1091.1403015136718, |
| "kl_loss_3": 758.9450408935547, |
| "kl_loss_7": 188.74431228637695, |
| "learning_rate": 2.2548879277963063e-05, |
| "loss": 536.6219, |
| "step": 9050 |
| }, |
| { |
| "ce_loss_10": 3.6055094718933107, |
| "ce_loss_13": 3.5453344702720644, |
| "ce_loss_2": 4.055747485160827, |
| "ce_loss_3": 3.8876903891563415, |
| "ce_loss_7": 3.645590376853943, |
| "epoch": 0.906, |
| "grad_norm": 312.0, |
| "kl_loss_10": 94.81256561279297, |
| "kl_loss_2": 1081.8126281738282, |
| "kl_loss_3": 743.9638031005859, |
| "kl_loss_7": 185.8631164550781, |
| "learning_rate": 2.208017147186736e-05, |
| "loss": 517.0646, |
| "step": 9060 |
| }, |
| { |
| "ce_loss_10": 3.5984405398368837, |
| "ce_loss_13": 3.5392195105552675, |
| "ce_loss_2": 4.055430555343628, |
| "ce_loss_3": 3.8891077756881716, |
| "ce_loss_7": 3.643998312950134, |
| "epoch": 0.907, |
| "grad_norm": 424.0, |
| "kl_loss_10": 95.52283592224121, |
| "kl_loss_2": 1082.7356536865234, |
| "kl_loss_3": 749.8307952880859, |
| "kl_loss_7": 186.6390350341797, |
| "learning_rate": 2.1616276313139227e-05, |
| "loss": 522.272, |
| "step": 9070 |
| }, |
| { |
| "ce_loss_10": 3.6377461314201356, |
| "ce_loss_13": 3.5757868885993958, |
| "ce_loss_2": 4.087118625640869, |
| "ce_loss_3": 3.9254656434059143, |
| "ce_loss_7": 3.680292618274689, |
| "epoch": 0.908, |
| "grad_norm": 362.0, |
| "kl_loss_10": 96.6335952758789, |
| "kl_loss_2": 1071.57734375, |
| "kl_loss_3": 743.0760345458984, |
| "kl_loss_7": 186.97156448364257, |
| "learning_rate": 2.1157198473197415e-05, |
| "loss": 527.4616, |
| "step": 9080 |
| }, |
| { |
| "ce_loss_10": 3.7054911255836487, |
| "ce_loss_13": 3.646452081203461, |
| "ce_loss_2": 4.16020712852478, |
| "ce_loss_3": 3.99694961309433, |
| "ce_loss_7": 3.7527972936630247, |
| "epoch": 0.909, |
| "grad_norm": 428.0, |
| "kl_loss_10": 95.60770835876465, |
| "kl_loss_2": 1073.3848999023437, |
| "kl_loss_3": 744.7516662597657, |
| "kl_loss_7": 188.15945053100586, |
| "learning_rate": 2.0702942574950812e-05, |
| "loss": 526.0792, |
| "step": 9090 |
| }, |
| { |
| "ce_loss_10": 3.623731589317322, |
| "ce_loss_13": 3.5640787363052366, |
| "ce_loss_2": 4.083542311191559, |
| "ce_loss_3": 3.9220656394958495, |
| "ce_loss_7": 3.669620490074158, |
| "epoch": 0.91, |
| "grad_norm": 302.0, |
| "kl_loss_10": 95.35622863769531, |
| "kl_loss_2": 1087.3217651367188, |
| "kl_loss_3": 752.284033203125, |
| "kl_loss_7": 187.5697151184082, |
| "learning_rate": 2.025351319275137e-05, |
| "loss": 528.1311, |
| "step": 9100 |
| }, |
| { |
| "ce_loss_10": 3.761759030818939, |
| "ce_loss_13": 3.6962865233421325, |
| "ce_loss_2": 4.2175662279129025, |
| "ce_loss_3": 4.051000607013703, |
| "ce_loss_7": 3.8052276611328124, |
| "epoch": 0.911, |
| "grad_norm": 420.0, |
| "kl_loss_10": 101.6547290802002, |
| "kl_loss_2": 1108.3317321777345, |
| "kl_loss_3": 765.9157867431641, |
| "kl_loss_7": 194.34442520141602, |
| "learning_rate": 1.9808914852347816e-05, |
| "loss": 545.7752, |
| "step": 9110 |
| }, |
| { |
| "ce_loss_10": 3.599123954772949, |
| "ce_loss_13": 3.539510524272919, |
| "ce_loss_2": 4.069272911548614, |
| "ce_loss_3": 3.9009834051132204, |
| "ce_loss_7": 3.6455170154571532, |
| "epoch": 0.912, |
| "grad_norm": 416.0, |
| "kl_loss_10": 95.14377288818359, |
| "kl_loss_2": 1095.5253448486328, |
| "kl_loss_3": 750.8630340576171, |
| "kl_loss_7": 187.0247688293457, |
| "learning_rate": 1.9369152030840554e-05, |
| "loss": 527.6025, |
| "step": 9120 |
| }, |
| { |
| "ce_loss_10": 3.6806903958320616, |
| "ce_loss_13": 3.620557761192322, |
| "ce_loss_2": 4.135490739345551, |
| "ce_loss_3": 3.9653069972991943, |
| "ce_loss_7": 3.723483943939209, |
| "epoch": 0.913, |
| "grad_norm": 362.0, |
| "kl_loss_10": 97.92795066833496, |
| "kl_loss_2": 1089.1937438964844, |
| "kl_loss_3": 747.6420379638672, |
| "kl_loss_7": 187.34563446044922, |
| "learning_rate": 1.893422915663645e-05, |
| "loss": 529.2906, |
| "step": 9130 |
| }, |
| { |
| "ce_loss_10": 3.5489492774009705, |
| "ce_loss_13": 3.488741672039032, |
| "ce_loss_2": 4.032487225532532, |
| "ce_loss_3": 3.862708866596222, |
| "ce_loss_7": 3.594150650501251, |
| "epoch": 0.914, |
| "grad_norm": 460.0, |
| "kl_loss_10": 95.81211128234864, |
| "kl_loss_2": 1122.290625, |
| "kl_loss_3": 780.3386810302734, |
| "kl_loss_7": 190.92548141479492, |
| "learning_rate": 1.850415060940386e-05, |
| "loss": 539.4046, |
| "step": 9140 |
| }, |
| { |
| "ce_loss_10": 3.670183026790619, |
| "ce_loss_13": 3.611021101474762, |
| "ce_loss_2": 4.120828151702881, |
| "ce_loss_3": 3.9584792375564577, |
| "ce_loss_7": 3.712183046340942, |
| "epoch": 0.915, |
| "grad_norm": 418.0, |
| "kl_loss_10": 95.88972358703613, |
| "kl_loss_2": 1074.5135314941406, |
| "kl_loss_3": 738.371826171875, |
| "kl_loss_7": 185.7539405822754, |
| "learning_rate": 1.8078920720028978e-05, |
| "loss": 525.966, |
| "step": 9150 |
| }, |
| { |
| "ce_loss_10": 3.600800943374634, |
| "ce_loss_13": 3.5434103488922117, |
| "ce_loss_2": 4.046385419368744, |
| "ce_loss_3": 3.8842490911483765, |
| "ce_loss_7": 3.6435607194900514, |
| "epoch": 0.916, |
| "grad_norm": 468.0, |
| "kl_loss_10": 94.49675407409669, |
| "kl_loss_2": 1068.3072998046875, |
| "kl_loss_3": 736.1623046875, |
| "kl_loss_7": 182.35257797241212, |
| "learning_rate": 1.765854377057219e-05, |
| "loss": 533.5915, |
| "step": 9160 |
| }, |
| { |
| "ce_loss_10": 3.579929566383362, |
| "ce_loss_13": 3.52090607881546, |
| "ce_loss_2": 4.0303690195083615, |
| "ce_loss_3": 3.863832104206085, |
| "ce_loss_7": 3.621261489391327, |
| "epoch": 0.917, |
| "grad_norm": 344.0, |
| "kl_loss_10": 93.69845123291016, |
| "kl_loss_2": 1076.374838256836, |
| "kl_loss_3": 739.5320068359375, |
| "kl_loss_7": 182.73907394409179, |
| "learning_rate": 1.724302399422456e-05, |
| "loss": 525.9574, |
| "step": 9170 |
| }, |
| { |
| "ce_loss_10": 3.5273375153541564, |
| "ce_loss_13": 3.469092321395874, |
| "ce_loss_2": 3.98960462808609, |
| "ce_loss_3": 3.8235998272895815, |
| "ce_loss_7": 3.572177302837372, |
| "epoch": 0.918, |
| "grad_norm": 328.0, |
| "kl_loss_10": 94.86108894348145, |
| "kl_loss_2": 1092.3598358154297, |
| "kl_loss_3": 757.3310150146484, |
| "kl_loss_7": 188.48751983642578, |
| "learning_rate": 1.683236557526574e-05, |
| "loss": 533.8531, |
| "step": 9180 |
| }, |
| { |
| "ce_loss_10": 3.6514230132102967, |
| "ce_loss_13": 3.59556097984314, |
| "ce_loss_2": 4.083134496212006, |
| "ce_loss_3": 3.926029086112976, |
| "ce_loss_7": 3.693097734451294, |
| "epoch": 0.919, |
| "grad_norm": 276.0, |
| "kl_loss_10": 94.37221069335938, |
| "kl_loss_2": 1047.5379638671875, |
| "kl_loss_3": 720.9200286865234, |
| "kl_loss_7": 181.39565734863282, |
| "learning_rate": 1.6426572649021475e-05, |
| "loss": 520.5356, |
| "step": 9190 |
| }, |
| { |
| "ce_loss_10": 3.6877851486206055, |
| "ce_loss_13": 3.6274981617927553, |
| "ce_loss_2": 4.1144737839698795, |
| "ce_loss_3": 3.9595839619636535, |
| "ce_loss_7": 3.7264232993125916, |
| "epoch": 0.92, |
| "grad_norm": 430.0, |
| "kl_loss_10": 99.18587074279785, |
| "kl_loss_2": 1047.7421783447267, |
| "kl_loss_3": 721.9292663574219, |
| "kl_loss_7": 186.34831695556642, |
| "learning_rate": 1.6025649301821876e-05, |
| "loss": 520.097, |
| "step": 9200 |
| }, |
| { |
| "ce_loss_10": 3.6789560437202455, |
| "ce_loss_13": 3.6199841260910035, |
| "ce_loss_2": 4.116438376903534, |
| "ce_loss_3": 3.95575532913208, |
| "ce_loss_7": 3.720892333984375, |
| "epoch": 0.921, |
| "grad_norm": 430.0, |
| "kl_loss_10": 95.03273735046386, |
| "kl_loss_2": 1068.5045623779297, |
| "kl_loss_3": 740.7460571289063, |
| "kl_loss_7": 185.96430587768555, |
| "learning_rate": 1.5629599570960716e-05, |
| "loss": 522.4428, |
| "step": 9210 |
| }, |
| { |
| "ce_loss_10": 3.579318141937256, |
| "ce_loss_13": 3.5199381947517394, |
| "ce_loss_2": 4.029832947254181, |
| "ce_loss_3": 3.865503740310669, |
| "ce_loss_7": 3.6221681237220764, |
| "epoch": 0.922, |
| "grad_norm": 430.0, |
| "kl_loss_10": 94.97879791259766, |
| "kl_loss_2": 1084.768603515625, |
| "kl_loss_3": 748.8800231933594, |
| "kl_loss_7": 185.368741607666, |
| "learning_rate": 1.5238427444654367e-05, |
| "loss": 526.936, |
| "step": 9220 |
| }, |
| { |
| "ce_loss_10": 3.642410922050476, |
| "ce_loss_13": 3.5841264009475706, |
| "ce_loss_2": 4.090620064735413, |
| "ce_loss_3": 3.929516541957855, |
| "ce_loss_7": 3.68586403131485, |
| "epoch": 0.923, |
| "grad_norm": 340.0, |
| "kl_loss_10": 95.43446731567383, |
| "kl_loss_2": 1061.9394897460938, |
| "kl_loss_3": 729.8539154052735, |
| "kl_loss_7": 184.269775390625, |
| "learning_rate": 1.4852136862001764e-05, |
| "loss": 521.6809, |
| "step": 9230 |
| }, |
| { |
| "ce_loss_10": 3.6022266387939452, |
| "ce_loss_13": 3.5460850477218626, |
| "ce_loss_2": 4.056096696853638, |
| "ce_loss_3": 3.894578981399536, |
| "ce_loss_7": 3.6445172667503356, |
| "epoch": 0.924, |
| "grad_norm": 382.0, |
| "kl_loss_10": 90.83601989746094, |
| "kl_loss_2": 1070.5055114746094, |
| "kl_loss_3": 735.5364959716796, |
| "kl_loss_7": 180.06712493896484, |
| "learning_rate": 1.4470731712944884e-05, |
| "loss": 526.6606, |
| "step": 9240 |
| }, |
| { |
| "ce_loss_10": 3.632104980945587, |
| "ce_loss_13": 3.573563551902771, |
| "ce_loss_2": 4.086918556690216, |
| "ce_loss_3": 3.921724486351013, |
| "ce_loss_7": 3.676921808719635, |
| "epoch": 0.925, |
| "grad_norm": 404.0, |
| "kl_loss_10": 93.8505702972412, |
| "kl_loss_2": 1076.019464111328, |
| "kl_loss_3": 742.9348846435547, |
| "kl_loss_7": 185.7860206604004, |
| "learning_rate": 1.4094215838229174e-05, |
| "loss": 532.0963, |
| "step": 9250 |
| }, |
| { |
| "ce_loss_10": 3.5902254581451416, |
| "ce_loss_13": 3.531176710128784, |
| "ce_loss_2": 4.053838360309601, |
| "ce_loss_3": 3.8887827515602114, |
| "ce_loss_7": 3.634320020675659, |
| "epoch": 0.926, |
| "grad_norm": 440.0, |
| "kl_loss_10": 95.00082511901856, |
| "kl_loss_2": 1108.7564575195313, |
| "kl_loss_3": 761.1957458496094, |
| "kl_loss_7": 187.39419326782226, |
| "learning_rate": 1.372259302936546e-05, |
| "loss": 548.2919, |
| "step": 9260 |
| }, |
| { |
| "ce_loss_10": 3.7115341782569886, |
| "ce_loss_13": 3.6472853660583495, |
| "ce_loss_2": 4.159861445426941, |
| "ce_loss_3": 3.998417854309082, |
| "ce_loss_7": 3.7543888211250307, |
| "epoch": 0.927, |
| "grad_norm": 304.0, |
| "kl_loss_10": 100.11175384521485, |
| "kl_loss_2": 1075.1090118408204, |
| "kl_loss_3": 744.2237152099609, |
| "kl_loss_7": 190.9360038757324, |
| "learning_rate": 1.3355867028591206e-05, |
| "loss": 520.805, |
| "step": 9270 |
| }, |
| { |
| "ce_loss_10": 3.6113879919052123, |
| "ce_loss_13": 3.5496174573898314, |
| "ce_loss_2": 4.047625136375427, |
| "ce_loss_3": 3.8916648983955384, |
| "ce_loss_7": 3.653665769100189, |
| "epoch": 0.928, |
| "grad_norm": 334.0, |
| "kl_loss_10": 94.99486846923828, |
| "kl_loss_2": 1063.383090209961, |
| "kl_loss_3": 737.3780670166016, |
| "kl_loss_7": 184.87188415527345, |
| "learning_rate": 1.2994041528833267e-05, |
| "loss": 520.9468, |
| "step": 9280 |
| }, |
| { |
| "ce_loss_10": 3.612771439552307, |
| "ce_loss_13": 3.5519652009010314, |
| "ce_loss_2": 4.069023680686951, |
| "ce_loss_3": 3.9033527731895448, |
| "ce_loss_7": 3.653776025772095, |
| "epoch": 0.929, |
| "grad_norm": 394.0, |
| "kl_loss_10": 94.48731269836426, |
| "kl_loss_2": 1086.341064453125, |
| "kl_loss_3": 747.7527069091797, |
| "kl_loss_7": 184.27003555297853, |
| "learning_rate": 1.2637120173670358e-05, |
| "loss": 525.795, |
| "step": 9290 |
| }, |
| { |
| "ce_loss_10": 3.6342510104179384, |
| "ce_loss_13": 3.574049484729767, |
| "ce_loss_2": 4.097525525093078, |
| "ce_loss_3": 3.9327287077903748, |
| "ce_loss_7": 3.6803439974784853, |
| "epoch": 0.93, |
| "grad_norm": 492.0, |
| "kl_loss_10": 94.73881340026855, |
| "kl_loss_2": 1086.5091583251954, |
| "kl_loss_3": 750.7861236572265, |
| "kl_loss_7": 186.8117706298828, |
| "learning_rate": 1.2285106557296478e-05, |
| "loss": 526.7854, |
| "step": 9300 |
| }, |
| { |
| "ce_loss_10": 3.513438880443573, |
| "ce_loss_13": 3.453951287269592, |
| "ce_loss_2": 3.9955971360206606, |
| "ce_loss_3": 3.8230167746543886, |
| "ce_loss_7": 3.555509877204895, |
| "epoch": 0.931, |
| "grad_norm": 356.0, |
| "kl_loss_10": 93.80283432006836, |
| "kl_loss_2": 1116.4696807861328, |
| "kl_loss_3": 771.7997375488281, |
| "kl_loss_7": 186.52389373779297, |
| "learning_rate": 1.1938004224484989e-05, |
| "loss": 533.0822, |
| "step": 9310 |
| }, |
| { |
| "ce_loss_10": 3.7524689197540284, |
| "ce_loss_13": 3.6876933336257935, |
| "ce_loss_2": 4.20148618221283, |
| "ce_loss_3": 4.035860347747803, |
| "ce_loss_7": 3.7956905245780943, |
| "epoch": 0.932, |
| "grad_norm": 418.0, |
| "kl_loss_10": 99.70074195861817, |
| "kl_loss_2": 1085.114028930664, |
| "kl_loss_3": 747.7518859863281, |
| "kl_loss_7": 189.80009078979492, |
| "learning_rate": 1.1595816670552429e-05, |
| "loss": 536.128, |
| "step": 9320 |
| }, |
| { |
| "ce_loss_10": 3.6811413764953613, |
| "ce_loss_13": 3.619305157661438, |
| "ce_loss_2": 4.1267077088356015, |
| "ce_loss_3": 3.9628111124038696, |
| "ce_loss_7": 3.7232463002204894, |
| "epoch": 0.933, |
| "grad_norm": 430.0, |
| "kl_loss_10": 98.55138320922852, |
| "kl_loss_2": 1066.0611297607422, |
| "kl_loss_3": 732.6245086669921, |
| "kl_loss_7": 187.06882858276367, |
| "learning_rate": 1.1258547341323699e-05, |
| "loss": 518.9695, |
| "step": 9330 |
| }, |
| { |
| "ce_loss_10": 3.706856846809387, |
| "ce_loss_13": 3.6450837016105653, |
| "ce_loss_2": 4.152973532676697, |
| "ce_loss_3": 3.9891764402389525, |
| "ce_loss_7": 3.7481295585632326, |
| "epoch": 0.934, |
| "grad_norm": 394.0, |
| "kl_loss_10": 96.45535087585449, |
| "kl_loss_2": 1089.2688110351562, |
| "kl_loss_3": 747.8073425292969, |
| "kl_loss_7": 187.34025497436522, |
| "learning_rate": 1.0926199633097156e-05, |
| "loss": 527.061, |
| "step": 9340 |
| }, |
| { |
| "ce_loss_10": 3.7075893759727476, |
| "ce_loss_13": 3.6489187121391295, |
| "ce_loss_2": 4.135252356529236, |
| "ce_loss_3": 3.976875376701355, |
| "ce_loss_7": 3.747441065311432, |
| "epoch": 0.935, |
| "grad_norm": 428.0, |
| "kl_loss_10": 94.83727493286133, |
| "kl_loss_2": 1042.2317810058594, |
| "kl_loss_3": 718.6920349121094, |
| "kl_loss_7": 181.23108978271483, |
| "learning_rate": 1.0598776892610684e-05, |
| "loss": 526.2413, |
| "step": 9350 |
| }, |
| { |
| "ce_loss_10": 3.5169559955596923, |
| "ce_loss_13": 3.4603365540504454, |
| "ce_loss_2": 3.9802993655204775, |
| "ce_loss_3": 3.8121800780296327, |
| "ce_loss_7": 3.561786246299744, |
| "epoch": 0.936, |
| "grad_norm": 334.0, |
| "kl_loss_10": 92.96564292907715, |
| "kl_loss_2": 1091.1406646728515, |
| "kl_loss_3": 747.6543731689453, |
| "kl_loss_7": 183.7804039001465, |
| "learning_rate": 1.0276282417007399e-05, |
| "loss": 521.9861, |
| "step": 9360 |
| }, |
| { |
| "ce_loss_10": 3.6849416494369507, |
| "ce_loss_13": 3.626581645011902, |
| "ce_loss_2": 4.118964040279389, |
| "ce_loss_3": 3.9585147976875303, |
| "ce_loss_7": 3.7237794518470766, |
| "epoch": 0.937, |
| "grad_norm": 464.0, |
| "kl_loss_10": 95.02116394042969, |
| "kl_loss_2": 1044.2026397705079, |
| "kl_loss_3": 719.8276824951172, |
| "kl_loss_7": 182.06821365356444, |
| "learning_rate": 9.958719453803277e-06, |
| "loss": 518.1707, |
| "step": 9370 |
| }, |
| { |
| "ce_loss_10": 3.6774186968803404, |
| "ce_loss_13": 3.6149828910827635, |
| "ce_loss_2": 4.126804637908935, |
| "ce_loss_3": 3.964286994934082, |
| "ce_loss_7": 3.7206520080566405, |
| "epoch": 0.938, |
| "grad_norm": 364.0, |
| "kl_loss_10": 96.62460212707519, |
| "kl_loss_2": 1077.0972625732422, |
| "kl_loss_3": 746.5920196533203, |
| "kl_loss_7": 186.96116638183594, |
| "learning_rate": 9.646091200853802e-06, |
| "loss": 526.3039, |
| "step": 9380 |
| }, |
| { |
| "ce_loss_10": 3.633099365234375, |
| "ce_loss_13": 3.5745465636253355, |
| "ce_loss_2": 4.0883647203445435, |
| "ce_loss_3": 3.9242159128189087, |
| "ce_loss_7": 3.672658348083496, |
| "epoch": 0.939, |
| "grad_norm": 398.0, |
| "kl_loss_10": 93.04219818115234, |
| "kl_loss_2": 1075.2075500488281, |
| "kl_loss_3": 738.6250030517579, |
| "kl_loss_7": 181.5531784057617, |
| "learning_rate": 9.338400806321978e-06, |
| "loss": 512.8155, |
| "step": 9390 |
| }, |
| { |
| "ce_loss_10": 3.664756190776825, |
| "ce_loss_13": 3.603893756866455, |
| "ce_loss_2": 4.104370522499084, |
| "ce_loss_3": 3.941502547264099, |
| "ce_loss_7": 3.7107202291488646, |
| "epoch": 0.94, |
| "grad_norm": 330.0, |
| "kl_loss_10": 96.52969932556152, |
| "kl_loss_2": 1056.286117553711, |
| "kl_loss_3": 729.6215881347656, |
| "kl_loss_7": 186.73142929077147, |
| "learning_rate": 9.035651368646646e-06, |
| "loss": 517.5048, |
| "step": 9400 |
| }, |
| { |
| "ce_loss_10": 3.6749662160873413, |
| "ce_loss_13": 3.6150254607200623, |
| "ce_loss_2": 4.108079397678376, |
| "ce_loss_3": 3.9502787351608277, |
| "ce_loss_7": 3.71422598361969, |
| "epoch": 0.941, |
| "grad_norm": 368.0, |
| "kl_loss_10": 95.4813446044922, |
| "kl_loss_2": 1051.3231384277344, |
| "kl_loss_3": 730.8897918701172, |
| "kl_loss_7": 183.71395568847657, |
| "learning_rate": 8.737845936511335e-06, |
| "loss": 521.5386, |
| "step": 9410 |
| }, |
| { |
| "ce_loss_10": 3.621238374710083, |
| "ce_loss_13": 3.560182070732117, |
| "ce_loss_2": 4.075435829162598, |
| "ce_loss_3": 3.906463932991028, |
| "ce_loss_7": 3.6651031732559205, |
| "epoch": 0.942, |
| "grad_norm": 472.0, |
| "kl_loss_10": 95.50933799743652, |
| "kl_loss_2": 1087.418194580078, |
| "kl_loss_3": 749.9641418457031, |
| "kl_loss_7": 187.3939208984375, |
| "learning_rate": 8.444987508813451e-06, |
| "loss": 524.6778, |
| "step": 9420 |
| }, |
| { |
| "ce_loss_10": 3.567629599571228, |
| "ce_loss_13": 3.5098708271980286, |
| "ce_loss_2": 4.03240327835083, |
| "ce_loss_3": 3.868740451335907, |
| "ce_loss_7": 3.614664590358734, |
| "epoch": 0.943, |
| "grad_norm": 452.0, |
| "kl_loss_10": 95.83200073242188, |
| "kl_loss_2": 1111.0681640625, |
| "kl_loss_3": 769.0793914794922, |
| "kl_loss_7": 188.26431045532226, |
| "learning_rate": 8.157079034633974e-06, |
| "loss": 533.1891, |
| "step": 9430 |
| }, |
| { |
| "ce_loss_10": 3.5664173483848574, |
| "ce_loss_13": 3.5061603307724, |
| "ce_loss_2": 4.02851265668869, |
| "ce_loss_3": 3.862307035923004, |
| "ce_loss_7": 3.6107182621955873, |
| "epoch": 0.944, |
| "grad_norm": 426.0, |
| "kl_loss_10": 94.98325424194336, |
| "kl_loss_2": 1109.4172790527343, |
| "kl_loss_3": 762.6424713134766, |
| "kl_loss_7": 186.38191299438478, |
| "learning_rate": 7.874123413208145e-06, |
| "loss": 528.958, |
| "step": 9440 |
| }, |
| { |
| "ce_loss_10": 3.5382938742637635, |
| "ce_loss_13": 3.481018900871277, |
| "ce_loss_2": 4.006192743778229, |
| "ce_loss_3": 3.8386752605438232, |
| "ce_loss_7": 3.5831608533859254, |
| "epoch": 0.945, |
| "grad_norm": 338.0, |
| "kl_loss_10": 92.47231903076172, |
| "kl_loss_2": 1088.9563568115234, |
| "kl_loss_3": 753.4448974609375, |
| "kl_loss_7": 184.27166213989258, |
| "learning_rate": 7.59612349389599e-06, |
| "loss": 527.5225, |
| "step": 9450 |
| }, |
| { |
| "ce_loss_10": 3.633445167541504, |
| "ce_loss_13": 3.5758827209472654, |
| "ce_loss_2": 4.075440514087677, |
| "ce_loss_3": 3.9124983310699464, |
| "ce_loss_7": 3.6780736327171324, |
| "epoch": 0.946, |
| "grad_norm": 356.0, |
| "kl_loss_10": 91.38598556518555, |
| "kl_loss_2": 1046.8805053710937, |
| "kl_loss_3": 718.2211791992188, |
| "kl_loss_7": 180.72154998779297, |
| "learning_rate": 7.323082076153509e-06, |
| "loss": 519.5404, |
| "step": 9460 |
| }, |
| { |
| "ce_loss_10": 3.675933361053467, |
| "ce_loss_13": 3.616945672035217, |
| "ce_loss_2": 4.116010129451752, |
| "ce_loss_3": 3.954231834411621, |
| "ce_loss_7": 3.7195321679115296, |
| "epoch": 0.947, |
| "grad_norm": 376.0, |
| "kl_loss_10": 96.42714042663575, |
| "kl_loss_2": 1051.1879852294921, |
| "kl_loss_3": 727.5513549804688, |
| "kl_loss_7": 186.51647338867187, |
| "learning_rate": 7.055001909504755e-06, |
| "loss": 525.7655, |
| "step": 9470 |
| }, |
| { |
| "ce_loss_10": 3.7083083152770997, |
| "ce_loss_13": 3.647673761844635, |
| "ce_loss_2": 4.157342481613159, |
| "ce_loss_3": 3.991931939125061, |
| "ce_loss_7": 3.752028775215149, |
| "epoch": 0.948, |
| "grad_norm": 344.0, |
| "kl_loss_10": 96.79825706481934, |
| "kl_loss_2": 1084.5101806640625, |
| "kl_loss_3": 742.6272155761719, |
| "kl_loss_7": 187.0098518371582, |
| "learning_rate": 6.791885693514133e-06, |
| "loss": 528.4126, |
| "step": 9480 |
| }, |
| { |
| "ce_loss_10": 3.6131741404533386, |
| "ce_loss_13": 3.554737401008606, |
| "ce_loss_2": 4.069884133338928, |
| "ce_loss_3": 3.910088050365448, |
| "ce_loss_7": 3.657594072818756, |
| "epoch": 0.949, |
| "grad_norm": 444.0, |
| "kl_loss_10": 95.54262161254883, |
| "kl_loss_2": 1090.819403076172, |
| "kl_loss_3": 755.8211273193359, |
| "kl_loss_7": 187.30291366577148, |
| "learning_rate": 6.533736077758867e-06, |
| "loss": 532.407, |
| "step": 9490 |
| }, |
| { |
| "ce_loss_10": 3.5753329753875733, |
| "ce_loss_13": 3.5157718658447266, |
| "ce_loss_2": 4.050174379348755, |
| "ce_loss_3": 3.878748118877411, |
| "ce_loss_7": 3.621631395816803, |
| "epoch": 0.95, |
| "grad_norm": 454.0, |
| "kl_loss_10": 95.78313636779785, |
| "kl_loss_2": 1112.5021850585938, |
| "kl_loss_3": 766.8859832763671, |
| "kl_loss_7": 188.93851776123046, |
| "learning_rate": 6.2805556618028556e-06, |
| "loss": 531.8975, |
| "step": 9500 |
| }, |
| { |
| "ce_loss_10": 3.6739890694618227, |
| "ce_loss_13": 3.614563775062561, |
| "ce_loss_2": 4.105420649051666, |
| "ce_loss_3": 3.946949827671051, |
| "ce_loss_7": 3.713826298713684, |
| "epoch": 0.951, |
| "grad_norm": 428.0, |
| "kl_loss_10": 95.29025764465332, |
| "kl_loss_2": 1035.753839111328, |
| "kl_loss_3": 718.9863189697265, |
| "kl_loss_7": 182.34558639526367, |
| "learning_rate": 6.032346995169968e-06, |
| "loss": 506.1833, |
| "step": 9510 |
| }, |
| { |
| "ce_loss_10": 3.6744378566741944, |
| "ce_loss_13": 3.6160669803619383, |
| "ce_loss_2": 4.116178596019745, |
| "ce_loss_3": 3.952050065994263, |
| "ce_loss_7": 3.714146387577057, |
| "epoch": 0.952, |
| "grad_norm": 350.0, |
| "kl_loss_10": 95.77439384460449, |
| "kl_loss_2": 1065.6743865966796, |
| "kl_loss_3": 734.3932067871094, |
| "kl_loss_7": 184.87170867919923, |
| "learning_rate": 5.789112577318789e-06, |
| "loss": 520.2576, |
| "step": 9520 |
| }, |
| { |
| "ce_loss_10": 3.6489309549331663, |
| "ce_loss_13": 3.5895671963691713, |
| "ce_loss_2": 4.11376656293869, |
| "ce_loss_3": 3.946073520183563, |
| "ce_loss_7": 3.6925018429756165, |
| "epoch": 0.953, |
| "grad_norm": 460.0, |
| "kl_loss_10": 96.73359451293945, |
| "kl_loss_2": 1111.601629638672, |
| "kl_loss_3": 771.5278289794921, |
| "kl_loss_7": 187.8802848815918, |
| "learning_rate": 5.550854857617194e-06, |
| "loss": 527.3308, |
| "step": 9530 |
| }, |
| { |
| "ce_loss_10": 3.6415695905685426, |
| "ce_loss_13": 3.579833471775055, |
| "ce_loss_2": 4.102292227745056, |
| "ce_loss_3": 3.9383127331733703, |
| "ce_loss_7": 3.6863919377326964, |
| "epoch": 0.954, |
| "grad_norm": 398.0, |
| "kl_loss_10": 98.16804580688476, |
| "kl_loss_2": 1097.6046325683594, |
| "kl_loss_3": 757.5784729003906, |
| "kl_loss_7": 190.50857543945312, |
| "learning_rate": 5.317576235317756e-06, |
| "loss": 527.9396, |
| "step": 9540 |
| }, |
| { |
| "ce_loss_10": 3.6651427507400514, |
| "ce_loss_13": 3.604920470714569, |
| "ce_loss_2": 4.100248050689697, |
| "ce_loss_3": 3.94064177274704, |
| "ce_loss_7": 3.7060970425605775, |
| "epoch": 0.955, |
| "grad_norm": 386.0, |
| "kl_loss_10": 96.45015525817871, |
| "kl_loss_2": 1031.3038146972656, |
| "kl_loss_3": 712.4996978759766, |
| "kl_loss_7": 182.76630401611328, |
| "learning_rate": 5.089279059533658e-06, |
| "loss": 524.0002, |
| "step": 9550 |
| }, |
| { |
| "ce_loss_10": 3.7266568183898925, |
| "ce_loss_13": 3.663935911655426, |
| "ce_loss_2": 4.170814108848572, |
| "ce_loss_3": 4.006054651737213, |
| "ce_loss_7": 3.769794237613678, |
| "epoch": 0.956, |
| "grad_norm": 386.0, |
| "kl_loss_10": 100.15878944396972, |
| "kl_loss_2": 1068.9294799804688, |
| "kl_loss_3": 738.0209930419921, |
| "kl_loss_7": 192.08404541015625, |
| "learning_rate": 4.865965629214819e-06, |
| "loss": 520.8748, |
| "step": 9560 |
| }, |
| { |
| "ce_loss_10": 3.670477032661438, |
| "ce_loss_13": 3.611146903038025, |
| "ce_loss_2": 4.115479242801666, |
| "ce_loss_3": 3.9537983894348145, |
| "ce_loss_7": 3.7129539370536806, |
| "epoch": 0.957, |
| "grad_norm": 496.0, |
| "kl_loss_10": 96.79973983764648, |
| "kl_loss_2": 1085.6631072998048, |
| "kl_loss_3": 749.8902404785156, |
| "kl_loss_7": 188.74480895996095, |
| "learning_rate": 4.6476381931251366e-06, |
| "loss": 519.6521, |
| "step": 9570 |
| }, |
| { |
| "ce_loss_10": 3.646716892719269, |
| "ce_loss_13": 3.5878213763237, |
| "ce_loss_2": 4.089986479282379, |
| "ce_loss_3": 3.9314276933670045, |
| "ce_loss_7": 3.6911307334899903, |
| "epoch": 0.958, |
| "grad_norm": 318.0, |
| "kl_loss_10": 94.01541290283203, |
| "kl_loss_2": 1067.8105712890624, |
| "kl_loss_3": 740.1676208496094, |
| "kl_loss_7": 184.118741607666, |
| "learning_rate": 4.434298949819449e-06, |
| "loss": 523.6254, |
| "step": 9580 |
| }, |
| { |
| "ce_loss_10": 3.6008993268013, |
| "ce_loss_13": 3.538570249080658, |
| "ce_loss_2": 4.069638097286225, |
| "ce_loss_3": 3.8975520372390746, |
| "ce_loss_7": 3.6453381776809692, |
| "epoch": 0.959, |
| "grad_norm": 440.0, |
| "kl_loss_10": 97.41343994140625, |
| "kl_loss_2": 1125.892025756836, |
| "kl_loss_3": 772.14267578125, |
| "kl_loss_7": 189.9515396118164, |
| "learning_rate": 4.2259500476214406e-06, |
| "loss": 534.6609, |
| "step": 9590 |
| }, |
| { |
| "ce_loss_10": 3.58458696603775, |
| "ce_loss_13": 3.52560031414032, |
| "ce_loss_2": 4.040603399276733, |
| "ce_loss_3": 3.8742735624313354, |
| "ce_loss_7": 3.627805030345917, |
| "epoch": 0.96, |
| "grad_norm": 388.0, |
| "kl_loss_10": 94.08248367309571, |
| "kl_loss_2": 1083.009814453125, |
| "kl_loss_3": 746.2331970214843, |
| "kl_loss_7": 184.85717010498047, |
| "learning_rate": 4.02259358460233e-06, |
| "loss": 521.7564, |
| "step": 9600 |
| }, |
| { |
| "ce_loss_10": 3.6558929800987245, |
| "ce_loss_13": 3.5954962849617003, |
| "ce_loss_2": 4.101473760604859, |
| "ce_loss_3": 3.9380804181098936, |
| "ce_loss_7": 3.6987645506858824, |
| "epoch": 0.961, |
| "grad_norm": 544.0, |
| "kl_loss_10": 95.69773292541504, |
| "kl_loss_2": 1060.7937774658203, |
| "kl_loss_3": 733.2102172851562, |
| "kl_loss_7": 185.71547775268556, |
| "learning_rate": 3.8242316085594916e-06, |
| "loss": 516.8465, |
| "step": 9610 |
| }, |
| { |
| "ce_loss_10": 3.5343406558036805, |
| "ce_loss_13": 3.4767986059188845, |
| "ce_loss_2": 4.016193747520447, |
| "ce_loss_3": 3.8443652629852294, |
| "ce_loss_7": 3.580942380428314, |
| "epoch": 0.962, |
| "grad_norm": 366.0, |
| "kl_loss_10": 93.89258918762206, |
| "kl_loss_2": 1123.5916809082032, |
| "kl_loss_3": 780.3413696289062, |
| "kl_loss_7": 187.34277801513673, |
| "learning_rate": 3.630866116995757e-06, |
| "loss": 546.1011, |
| "step": 9620 |
| }, |
| { |
| "ce_loss_10": 3.6960983991622927, |
| "ce_loss_13": 3.635801446437836, |
| "ce_loss_2": 4.132487082481385, |
| "ce_loss_3": 3.9690314412117003, |
| "ce_loss_7": 3.737609100341797, |
| "epoch": 0.963, |
| "grad_norm": 312.0, |
| "kl_loss_10": 96.57149925231934, |
| "kl_loss_2": 1044.7675506591797, |
| "kl_loss_3": 718.4659484863281, |
| "kl_loss_7": 183.9634048461914, |
| "learning_rate": 3.4424990570994797e-06, |
| "loss": 523.2208, |
| "step": 9630 |
| }, |
| { |
| "ce_loss_10": 3.685701644420624, |
| "ce_loss_13": 3.624559962749481, |
| "ce_loss_2": 4.128798627853394, |
| "ce_loss_3": 3.968520772457123, |
| "ce_loss_7": 3.7257295846939087, |
| "epoch": 0.964, |
| "grad_norm": 280.0, |
| "kl_loss_10": 95.63589668273926, |
| "kl_loss_2": 1068.9191833496093, |
| "kl_loss_3": 737.6691131591797, |
| "kl_loss_7": 184.7596893310547, |
| "learning_rate": 3.2591323257248896e-06, |
| "loss": 522.5564, |
| "step": 9640 |
| }, |
| { |
| "ce_loss_10": 3.5315052390098574, |
| "ce_loss_13": 3.4732569575309755, |
| "ce_loss_2": 3.99234676361084, |
| "ce_loss_3": 3.822614312171936, |
| "ce_loss_7": 3.5727542638778687, |
| "epoch": 0.965, |
| "grad_norm": 338.0, |
| "kl_loss_10": 93.59828681945801, |
| "kl_loss_2": 1088.8541290283204, |
| "kl_loss_3": 750.0034118652344, |
| "kl_loss_7": 183.51124572753906, |
| "learning_rate": 3.0807677693729385e-06, |
| "loss": 528.9641, |
| "step": 9650 |
| }, |
| { |
| "ce_loss_10": 3.721923458576202, |
| "ce_loss_13": 3.6635044693946837, |
| "ce_loss_2": 4.157553017139435, |
| "ce_loss_3": 3.9980939745903017, |
| "ce_loss_7": 3.7649829506874086, |
| "epoch": 0.966, |
| "grad_norm": 328.0, |
| "kl_loss_10": 95.77610893249512, |
| "kl_loss_2": 1046.733694458008, |
| "kl_loss_3": 723.9284912109375, |
| "kl_loss_7": 183.63089752197266, |
| "learning_rate": 2.9074071841727055e-06, |
| "loss": 513.6759, |
| "step": 9660 |
| }, |
| { |
| "ce_loss_10": 3.6491685032844545, |
| "ce_loss_13": 3.5898547172546387, |
| "ce_loss_2": 4.10191251039505, |
| "ce_loss_3": 3.9377865552902223, |
| "ce_loss_7": 3.694057583808899, |
| "epoch": 0.967, |
| "grad_norm": 410.0, |
| "kl_loss_10": 94.75908012390137, |
| "kl_loss_2": 1074.1435485839843, |
| "kl_loss_3": 739.0172424316406, |
| "kl_loss_7": 185.9965072631836, |
| "learning_rate": 2.739052315863355e-06, |
| "loss": 514.4849, |
| "step": 9670 |
| }, |
| { |
| "ce_loss_10": 3.6381678700447084, |
| "ce_loss_13": 3.5745797991752624, |
| "ce_loss_2": 4.085923862457276, |
| "ce_loss_3": 3.9223034262657164, |
| "ce_loss_7": 3.679898130893707, |
| "epoch": 0.968, |
| "grad_norm": 400.0, |
| "kl_loss_10": 98.94500389099122, |
| "kl_loss_2": 1071.759048461914, |
| "kl_loss_3": 742.1754821777344, |
| "kl_loss_7": 186.4635383605957, |
| "learning_rate": 2.5757048597765396e-06, |
| "loss": 520.3133, |
| "step": 9680 |
| }, |
| { |
| "ce_loss_10": 3.6451838970184327, |
| "ce_loss_13": 3.584810471534729, |
| "ce_loss_2": 4.096628618240357, |
| "ce_loss_3": 3.9352465867996216, |
| "ce_loss_7": 3.6861096024513245, |
| "epoch": 0.969, |
| "grad_norm": 354.0, |
| "kl_loss_10": 95.84736633300781, |
| "kl_loss_2": 1089.9881774902344, |
| "kl_loss_3": 753.175894165039, |
| "kl_loss_7": 186.62908554077148, |
| "learning_rate": 2.417366460819359e-06, |
| "loss": 527.3621, |
| "step": 9690 |
| }, |
| { |
| "ce_loss_10": 3.6515438675880434, |
| "ce_loss_13": 3.5902424931526182, |
| "ce_loss_2": 4.121148645877838, |
| "ce_loss_3": 3.9508870244026184, |
| "ce_loss_7": 3.6973974823951723, |
| "epoch": 0.97, |
| "grad_norm": 378.0, |
| "kl_loss_10": 97.83438453674316, |
| "kl_loss_2": 1114.7044860839844, |
| "kl_loss_3": 766.2058898925782, |
| "kl_loss_7": 189.7753791809082, |
| "learning_rate": 2.2640387134577057e-06, |
| "loss": 528.5559, |
| "step": 9700 |
| }, |
| { |
| "ce_loss_10": 3.579375672340393, |
| "ce_loss_13": 3.5232587337493895, |
| "ce_loss_2": 4.008558976650238, |
| "ce_loss_3": 3.853218126296997, |
| "ce_loss_7": 3.621316111087799, |
| "epoch": 0.971, |
| "grad_norm": 346.0, |
| "kl_loss_10": 89.91974563598633, |
| "kl_loss_2": 1025.9577575683593, |
| "kl_loss_3": 709.8189392089844, |
| "kl_loss_7": 177.37805099487304, |
| "learning_rate": 2.115723161700278e-06, |
| "loss": 511.7921, |
| "step": 9710 |
| }, |
| { |
| "ce_loss_10": 3.5539788961410523, |
| "ce_loss_13": 3.493563008308411, |
| "ce_loss_2": 4.019102883338928, |
| "ce_loss_3": 3.8513848066329954, |
| "ce_loss_7": 3.6021719098091127, |
| "epoch": 0.972, |
| "grad_norm": 450.0, |
| "kl_loss_10": 97.08839912414551, |
| "kl_loss_2": 1102.8951354980468, |
| "kl_loss_3": 763.9947265625, |
| "kl_loss_7": 189.97327194213867, |
| "learning_rate": 1.9724212990830937e-06, |
| "loss": 534.7647, |
| "step": 9720 |
| }, |
| { |
| "ce_loss_10": 3.7055511236190797, |
| "ce_loss_13": 3.645791494846344, |
| "ce_loss_2": 4.164284873008728, |
| "ce_loss_3": 3.998200333118439, |
| "ce_loss_7": 3.748906970024109, |
| "epoch": 0.973, |
| "grad_norm": 306.0, |
| "kl_loss_10": 97.2132583618164, |
| "kl_loss_2": 1086.074758911133, |
| "kl_loss_3": 748.4372924804687, |
| "kl_loss_7": 187.37096481323243, |
| "learning_rate": 1.8341345686543331e-06, |
| "loss": 526.9427, |
| "step": 9730 |
| }, |
| { |
| "ce_loss_10": 3.688717949390411, |
| "ce_loss_13": 3.6282296895980837, |
| "ce_loss_2": 4.123041558265686, |
| "ce_loss_3": 3.963315784931183, |
| "ce_loss_7": 3.731441855430603, |
| "epoch": 0.974, |
| "grad_norm": 446.0, |
| "kl_loss_10": 95.65226020812989, |
| "kl_loss_2": 1053.469790649414, |
| "kl_loss_3": 725.7204162597657, |
| "kl_loss_7": 185.0056625366211, |
| "learning_rate": 1.7008643629596864e-06, |
| "loss": 524.4386, |
| "step": 9740 |
| }, |
| { |
| "ce_loss_10": 3.674058973789215, |
| "ce_loss_13": 3.6143284678459167, |
| "ce_loss_2": 4.119033622741699, |
| "ce_loss_3": 3.954865837097168, |
| "ce_loss_7": 3.7161202311515806, |
| "epoch": 0.975, |
| "grad_norm": 406.0, |
| "kl_loss_10": 96.88497962951661, |
| "kl_loss_2": 1081.9985229492188, |
| "kl_loss_3": 741.1380645751954, |
| "kl_loss_7": 186.00157089233397, |
| "learning_rate": 1.5726120240288633e-06, |
| "loss": 531.1466, |
| "step": 9750 |
| }, |
| { |
| "ce_loss_10": 3.569232928752899, |
| "ce_loss_13": 3.511077570915222, |
| "ce_loss_2": 4.014019024372101, |
| "ce_loss_3": 3.854006791114807, |
| "ce_loss_7": 3.6116040468215944, |
| "epoch": 0.976, |
| "grad_norm": 548.0, |
| "kl_loss_10": 94.0260025024414, |
| "kl_loss_2": 1069.765771484375, |
| "kl_loss_3": 740.6184356689453, |
| "kl_loss_7": 184.33246154785155, |
| "learning_rate": 1.4493788433612708e-06, |
| "loss": 520.0787, |
| "step": 9760 |
| }, |
| { |
| "ce_loss_10": 3.6905293107032775, |
| "ce_loss_13": 3.630939745903015, |
| "ce_loss_2": 4.1426611065864565, |
| "ce_loss_3": 3.9766762137413023, |
| "ce_loss_7": 3.7346643686294554, |
| "epoch": 0.977, |
| "grad_norm": 340.0, |
| "kl_loss_10": 95.88525352478027, |
| "kl_loss_2": 1083.8150268554687, |
| "kl_loss_3": 744.1571472167968, |
| "kl_loss_7": 186.4142593383789, |
| "learning_rate": 1.3311660619138578e-06, |
| "loss": 528.903, |
| "step": 9770 |
| }, |
| { |
| "ce_loss_10": 3.6836748480796815, |
| "ce_loss_13": 3.6255483746528627, |
| "ce_loss_2": 4.109962856769561, |
| "ce_loss_3": 3.9559788703918457, |
| "ce_loss_7": 3.7251157641410826, |
| "epoch": 0.978, |
| "grad_norm": 358.0, |
| "kl_loss_10": 94.83126564025879, |
| "kl_loss_2": 1041.5651123046875, |
| "kl_loss_3": 719.0739654541015, |
| "kl_loss_7": 183.43487930297852, |
| "learning_rate": 1.2179748700879012e-06, |
| "loss": 517.046, |
| "step": 9780 |
| }, |
| { |
| "ce_loss_10": 3.6114102602005005, |
| "ce_loss_13": 3.553410363197327, |
| "ce_loss_2": 4.060984289646148, |
| "ce_loss_3": 3.9008304476737976, |
| "ce_loss_7": 3.6546178460121155, |
| "epoch": 0.979, |
| "grad_norm": 448.0, |
| "kl_loss_10": 94.51130638122558, |
| "kl_loss_2": 1070.2796936035156, |
| "kl_loss_3": 734.1844604492187, |
| "kl_loss_7": 183.8596923828125, |
| "learning_rate": 1.1098064077174619e-06, |
| "loss": 522.2918, |
| "step": 9790 |
| }, |
| { |
| "ce_loss_10": 3.6468693137168886, |
| "ce_loss_13": 3.5864970564842222, |
| "ce_loss_2": 4.112797820568085, |
| "ce_loss_3": 3.9430916547775268, |
| "ce_loss_7": 3.6902678489685057, |
| "epoch": 0.98, |
| "grad_norm": 396.0, |
| "kl_loss_10": 94.27075653076172, |
| "kl_loss_2": 1089.721176147461, |
| "kl_loss_3": 749.4085998535156, |
| "kl_loss_7": 185.25458450317382, |
| "learning_rate": 1.006661764057837e-06, |
| "loss": 525.8869, |
| "step": 9800 |
| }, |
| { |
| "ce_loss_10": 3.6512062191963195, |
| "ce_loss_13": 3.591748607158661, |
| "ce_loss_2": 4.100599420070648, |
| "ce_loss_3": 3.94116724729538, |
| "ce_loss_7": 3.6929367065429686, |
| "epoch": 0.981, |
| "grad_norm": 370.0, |
| "kl_loss_10": 95.22041091918945, |
| "kl_loss_2": 1079.5743225097656, |
| "kl_loss_3": 744.3837371826172, |
| "kl_loss_7": 184.29767150878905, |
| "learning_rate": 9.085419777743465e-07, |
| "loss": 523.8814, |
| "step": 9810 |
| }, |
| { |
| "ce_loss_10": 3.5867176413536073, |
| "ce_loss_13": 3.5297884702682496, |
| "ce_loss_2": 4.040095067024231, |
| "ce_loss_3": 3.876063418388367, |
| "ce_loss_7": 3.6296881198883058, |
| "epoch": 0.982, |
| "grad_norm": 372.0, |
| "kl_loss_10": 92.43338165283203, |
| "kl_loss_2": 1068.0833831787108, |
| "kl_loss_3": 736.9401550292969, |
| "kl_loss_7": 179.86018447875978, |
| "learning_rate": 8.15448036932176e-07, |
| "loss": 515.9226, |
| "step": 9820 |
| }, |
| { |
| "ce_loss_10": 3.641873502731323, |
| "ce_loss_13": 3.5830901145935057, |
| "ce_loss_2": 4.088828957080841, |
| "ce_loss_3": 3.9226441621780395, |
| "ce_loss_7": 3.6845821142196655, |
| "epoch": 0.983, |
| "grad_norm": 450.0, |
| "kl_loss_10": 93.74083061218262, |
| "kl_loss_2": 1074.023745727539, |
| "kl_loss_3": 742.9956268310547, |
| "kl_loss_7": 184.04373016357422, |
| "learning_rate": 7.273808789862724e-07, |
| "loss": 527.5683, |
| "step": 9830 |
| }, |
| { |
| "ce_loss_10": 3.7283427119255066, |
| "ce_loss_13": 3.6678077578544617, |
| "ce_loss_2": 4.168006038665771, |
| "ce_loss_3": 4.004901099205017, |
| "ce_loss_7": 3.7688368439674376, |
| "epoch": 0.984, |
| "grad_norm": 302.0, |
| "kl_loss_10": 97.79412803649902, |
| "kl_loss_2": 1069.5015228271484, |
| "kl_loss_3": 732.57353515625, |
| "kl_loss_7": 187.13561325073243, |
| "learning_rate": 6.443413907720186e-07, |
| "loss": 519.9878, |
| "step": 9840 |
| }, |
| { |
| "ce_loss_10": 3.6514281272888183, |
| "ce_loss_13": 3.5926932096481323, |
| "ce_loss_2": 4.092191052436829, |
| "ce_loss_3": 3.930639326572418, |
| "ce_loss_7": 3.6929845571517945, |
| "epoch": 0.985, |
| "grad_norm": 370.0, |
| "kl_loss_10": 94.32459564208985, |
| "kl_loss_2": 1056.6608520507812, |
| "kl_loss_3": 730.5611633300781, |
| "kl_loss_7": 184.7391357421875, |
| "learning_rate": 5.663304084960185e-07, |
| "loss": 518.7671, |
| "step": 9850 |
| }, |
| { |
| "ce_loss_10": 3.5804072976112367, |
| "ce_loss_13": 3.520645248889923, |
| "ce_loss_2": 4.040905499458313, |
| "ce_loss_3": 3.8740663886070252, |
| "ce_loss_7": 3.6240461707115172, |
| "epoch": 0.986, |
| "grad_norm": 364.0, |
| "kl_loss_10": 96.15405921936035, |
| "kl_loss_2": 1090.2829711914062, |
| "kl_loss_3": 753.8562805175782, |
| "kl_loss_7": 186.33775329589844, |
| "learning_rate": 4.933487177280482e-07, |
| "loss": 518.7076, |
| "step": 9860 |
| }, |
| { |
| "ce_loss_10": 3.6774788737297057, |
| "ce_loss_13": 3.6186564683914186, |
| "ce_loss_2": 4.12188241481781, |
| "ce_loss_3": 3.958666682243347, |
| "ce_loss_7": 3.7196821093559267, |
| "epoch": 0.987, |
| "grad_norm": 408.0, |
| "kl_loss_10": 94.4230339050293, |
| "kl_loss_2": 1058.865460205078, |
| "kl_loss_3": 734.6985687255859, |
| "kl_loss_7": 181.19112243652344, |
| "learning_rate": 4.2539705339295075e-07, |
| "loss": 516.55, |
| "step": 9870 |
| }, |
| { |
| "ce_loss_10": 3.525623691082001, |
| "ce_loss_13": 3.46755028963089, |
| "ce_loss_2": 3.986714744567871, |
| "ce_loss_3": 3.828349435329437, |
| "ce_loss_7": 3.572091352939606, |
| "epoch": 0.988, |
| "grad_norm": 376.0, |
| "kl_loss_10": 93.51777114868165, |
| "kl_loss_2": 1095.3638244628905, |
| "kl_loss_3": 760.6636901855469, |
| "kl_loss_7": 187.32990493774415, |
| "learning_rate": 3.6247609976319816e-07, |
| "loss": 523.6327, |
| "step": 9880 |
| }, |
| { |
| "ce_loss_10": 3.6277820110321044, |
| "ce_loss_13": 3.567537808418274, |
| "ce_loss_2": 4.088609397411346, |
| "ce_loss_3": 3.924705386161804, |
| "ce_loss_7": 3.6743552684783936, |
| "epoch": 0.989, |
| "grad_norm": 476.0, |
| "kl_loss_10": 96.44798164367675, |
| "kl_loss_2": 1082.7717010498047, |
| "kl_loss_3": 749.9800903320313, |
| "kl_loss_7": 188.03020095825195, |
| "learning_rate": 3.0458649045211895e-07, |
| "loss": 536.5464, |
| "step": 9890 |
| }, |
| { |
| "ce_loss_10": 3.596536934375763, |
| "ce_loss_13": 3.5354753971099853, |
| "ce_loss_2": 4.064900302886963, |
| "ce_loss_3": 3.895892357826233, |
| "ce_loss_7": 3.6420591354370115, |
| "epoch": 0.99, |
| "grad_norm": 354.0, |
| "kl_loss_10": 95.05449028015137, |
| "kl_loss_2": 1090.392593383789, |
| "kl_loss_3": 754.5375762939453, |
| "kl_loss_7": 188.19856491088868, |
| "learning_rate": 2.517288084074587e-07, |
| "loss": 534.519, |
| "step": 9900 |
| }, |
| { |
| "ce_loss_10": 3.635116171836853, |
| "ce_loss_13": 3.574588453769684, |
| "ce_loss_2": 4.111752784252166, |
| "ce_loss_3": 3.944272756576538, |
| "ce_loss_7": 3.682980465888977, |
| "epoch": 0.991, |
| "grad_norm": 354.0, |
| "kl_loss_10": 95.2107322692871, |
| "kl_loss_2": 1113.8304351806642, |
| "kl_loss_3": 770.2767669677735, |
| "kl_loss_7": 189.45772018432618, |
| "learning_rate": 2.0390358590538505e-07, |
| "loss": 533.4635, |
| "step": 9910 |
| }, |
| { |
| "ce_loss_10": 3.644596242904663, |
| "ce_loss_13": 3.5852715373039246, |
| "ce_loss_2": 4.097472989559174, |
| "ce_loss_3": 3.93781635761261, |
| "ce_loss_7": 3.6912411212921143, |
| "epoch": 0.992, |
| "grad_norm": 360.0, |
| "kl_loss_10": 95.30287055969238, |
| "kl_loss_2": 1081.2569366455077, |
| "kl_loss_3": 748.5147064208984, |
| "kl_loss_7": 189.18702926635743, |
| "learning_rate": 1.61111304545436e-07, |
| "loss": 523.9828, |
| "step": 9920 |
| }, |
| { |
| "ce_loss_10": 3.6098355293273925, |
| "ce_loss_13": 3.5515360593795777, |
| "ce_loss_2": 4.0586741924285885, |
| "ce_loss_3": 3.895809698104858, |
| "ce_loss_7": 3.651991581916809, |
| "epoch": 0.993, |
| "grad_norm": 408.0, |
| "kl_loss_10": 94.64987220764161, |
| "kl_loss_2": 1077.9339660644532, |
| "kl_loss_3": 744.9157043457031, |
| "kl_loss_7": 185.0061477661133, |
| "learning_rate": 1.2335239524541298e-07, |
| "loss": 518.6489, |
| "step": 9930 |
| }, |
| { |
| "ce_loss_10": 3.5815568804740905, |
| "ce_loss_13": 3.523807632923126, |
| "ce_loss_2": 4.032287573814392, |
| "ce_loss_3": 3.871028816699982, |
| "ce_loss_7": 3.625322496891022, |
| "epoch": 0.994, |
| "grad_norm": 396.0, |
| "kl_loss_10": 94.44433670043945, |
| "kl_loss_2": 1070.128958129883, |
| "kl_loss_3": 738.4679992675781, |
| "kl_loss_7": 184.16616668701172, |
| "learning_rate": 9.06272382371065e-08, |
| "loss": 522.681, |
| "step": 9940 |
| }, |
| { |
| "ce_loss_10": 3.649178886413574, |
| "ce_loss_13": 3.5898856997489927, |
| "ce_loss_2": 4.107921350002289, |
| "ce_loss_3": 3.9442641377449035, |
| "ce_loss_7": 3.6927329182624815, |
| "epoch": 0.995, |
| "grad_norm": 366.0, |
| "kl_loss_10": 97.43391189575195, |
| "kl_loss_2": 1093.5839904785157, |
| "kl_loss_3": 755.0986083984375, |
| "kl_loss_7": 187.86676559448242, |
| "learning_rate": 6.293616306246586e-08, |
| "loss": 528.0195, |
| "step": 9950 |
| }, |
| { |
| "ce_loss_10": 3.6503029584884645, |
| "ce_loss_13": 3.5912461996078493, |
| "ce_loss_2": 4.083380007743836, |
| "ce_loss_3": 3.9251137375831604, |
| "ce_loss_7": 3.6914992809295653, |
| "epoch": 0.996, |
| "grad_norm": 386.0, |
| "kl_loss_10": 92.51536598205567, |
| "kl_loss_2": 1047.152279663086, |
| "kl_loss_3": 724.7971832275391, |
| "kl_loss_7": 180.47787857055664, |
| "learning_rate": 4.027944857032395e-08, |
| "loss": 508.9252, |
| "step": 9960 |
| }, |
| { |
| "ce_loss_10": 3.640582966804504, |
| "ce_loss_13": 3.5819292664527893, |
| "ce_loss_2": 4.071112728118896, |
| "ce_loss_3": 3.9061211466789247, |
| "ce_loss_7": 3.678541886806488, |
| "epoch": 0.997, |
| "grad_norm": 332.0, |
| "kl_loss_10": 94.77192039489746, |
| "kl_loss_2": 1031.1441497802734, |
| "kl_loss_3": 710.4731018066407, |
| "kl_loss_7": 178.55070953369142, |
| "learning_rate": 2.265732291356626e-08, |
| "loss": 508.5261, |
| "step": 9970 |
| }, |
| { |
| "ce_loss_10": 3.6857742786407472, |
| "ce_loss_13": 3.6264986276626585, |
| "ce_loss_2": 4.119817900657654, |
| "ce_loss_3": 3.9565661191940307, |
| "ce_loss_7": 3.7256898403167726, |
| "epoch": 0.998, |
| "grad_norm": 354.0, |
| "kl_loss_10": 95.05257987976074, |
| "kl_loss_2": 1045.786294555664, |
| "kl_loss_3": 725.6510559082031, |
| "kl_loss_7": 184.49017181396485, |
| "learning_rate": 1.0069963546743833e-08, |
| "loss": 527.3226, |
| "step": 9980 |
| }, |
| { |
| "ce_loss_10": 3.66132515668869, |
| "ce_loss_13": 3.6029880166053774, |
| "ce_loss_2": 4.108680582046508, |
| "ce_loss_3": 3.9468571662902834, |
| "ce_loss_7": 3.7058345794677736, |
| "epoch": 0.999, |
| "grad_norm": 358.0, |
| "kl_loss_10": 95.48841247558593, |
| "kl_loss_2": 1072.3943481445312, |
| "kl_loss_3": 746.4591552734375, |
| "kl_loss_7": 185.93264389038086, |
| "learning_rate": 2.517497224463483e-09, |
| "loss": 522.7165, |
| "step": 9990 |
| }, |
| { |
| "ce_loss_10": 3.6195040583610534, |
| "ce_loss_13": 3.559644305706024, |
| "ce_loss_2": 4.096893215179444, |
| "ce_loss_3": 3.9238691568374633, |
| "ce_loss_7": 3.6661298632621766, |
| "epoch": 1.0, |
| "grad_norm": 502.0, |
| "kl_loss_10": 96.35170402526856, |
| "kl_loss_2": 1110.4292907714844, |
| "kl_loss_3": 763.8701965332032, |
| "kl_loss_7": 189.7416961669922, |
| "learning_rate": 0.0, |
| "loss": 533.5189, |
| "step": 10000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 10000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 250, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.177819035608023e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|