| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9998557067962099, | |
| "eval_steps": 500, | |
| "global_step": 5197, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 8.974358974358975e-07, | |
| "loss": 2.9732, | |
| "loss_": 1.436, | |
| "moe_loss": 0.1675, | |
| "moe_loss_longrong": 1.4982, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.794871794871795e-06, | |
| "loss": 2.9414, | |
| "loss_": 1.3375, | |
| "moe_loss": 0.1674, | |
| "moe_loss_longrong": 1.4964, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2.6923076923076923e-06, | |
| "loss": 2.8682, | |
| "loss_": 1.1438, | |
| "moe_loss": 0.1663, | |
| "moe_loss_longrong": 1.4917, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 3.58974358974359e-06, | |
| "loss": 2.806, | |
| "loss_": 1.2312, | |
| "moe_loss": 0.1655, | |
| "moe_loss_longrong": 1.488, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.487179487179488e-06, | |
| "loss": 2.7734, | |
| "loss_": 1.2285, | |
| "moe_loss": 0.1645, | |
| "moe_loss_longrong": 1.4882, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 5.384615384615385e-06, | |
| "loss": 2.7314, | |
| "loss_": 1.0526, | |
| "moe_loss": 0.1635, | |
| "moe_loss_longrong": 1.4847, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 6.282051282051282e-06, | |
| "loss": 2.6961, | |
| "loss_": 0.9249, | |
| "moe_loss": 0.1627, | |
| "moe_loss_longrong": 1.4803, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 7.17948717948718e-06, | |
| "loss": 2.6779, | |
| "loss_": 0.9535, | |
| "moe_loss": 0.1665, | |
| "moe_loss_longrong": 1.5082, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 8.076923076923077e-06, | |
| "loss": 2.6983, | |
| "loss_": 1.1541, | |
| "moe_loss": 0.1617, | |
| "moe_loss_longrong": 1.4777, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 8.974358974358976e-06, | |
| "loss": 2.6718, | |
| "loss_": 1.0194, | |
| "moe_loss": 0.1613, | |
| "moe_loss_longrong": 1.4718, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 9.871794871794872e-06, | |
| "loss": 2.6443, | |
| "loss_": 0.7302, | |
| "moe_loss": 0.1615, | |
| "moe_loss_longrong": 1.4723, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.076923076923077e-05, | |
| "loss": 2.7002, | |
| "loss_": 1.1497, | |
| "moe_loss": 0.1615, | |
| "moe_loss_longrong": 1.4679, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.1666666666666668e-05, | |
| "loss": 2.6528, | |
| "loss_": 1.114, | |
| "moe_loss": 0.1613, | |
| "moe_loss_longrong": 1.4653, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.2564102564102565e-05, | |
| "loss": 2.667, | |
| "loss_": 1.0337, | |
| "moe_loss": 0.1614, | |
| "moe_loss_longrong": 1.466, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.3461538461538463e-05, | |
| "loss": 2.61, | |
| "loss_": 0.8435, | |
| "moe_loss": 0.1643, | |
| "moe_loss_longrong": 1.4985, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.435897435897436e-05, | |
| "loss": 2.6878, | |
| "loss_": 1.2116, | |
| "moe_loss": 0.1611, | |
| "moe_loss_longrong": 1.4619, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.5256410256410257e-05, | |
| "loss": 2.6614, | |
| "loss_": 1.2295, | |
| "moe_loss": 0.1612, | |
| "moe_loss_longrong": 1.4628, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.6153846153846154e-05, | |
| "loss": 2.6541, | |
| "loss_": 1.087, | |
| "moe_loss": 0.1612, | |
| "moe_loss_longrong": 1.4593, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.7051282051282053e-05, | |
| "loss": 2.6268, | |
| "loss_": 1.0603, | |
| "moe_loss": 0.1611, | |
| "moe_loss_longrong": 1.4563, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.794871794871795e-05, | |
| "loss": 2.6591, | |
| "loss_": 1.1362, | |
| "moe_loss": 0.1611, | |
| "moe_loss_longrong": 1.4566, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.8846153846153846e-05, | |
| "loss": 2.6675, | |
| "loss_": 0.9348, | |
| "moe_loss": 0.1611, | |
| "moe_loss_longrong": 1.4551, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.9743589743589745e-05, | |
| "loss": 2.6317, | |
| "loss_": 1.1054, | |
| "moe_loss": 0.1611, | |
| "moe_loss_longrong": 1.4542, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.999995145147809e-05, | |
| "loss": 2.658, | |
| "loss_": 0.9526, | |
| "moe_loss": 0.1611, | |
| "moe_loss_longrong": 1.4514, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.9999720361590812e-05, | |
| "loss": 2.6381, | |
| "loss_": 0.9977, | |
| "moe_loss": 0.1615, | |
| "moe_loss_longrong": 1.4506, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.9999298966967264e-05, | |
| "loss": 2.6193, | |
| "loss_": 0.8738, | |
| "moe_loss": 0.1612, | |
| "moe_loss_longrong": 1.4527, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9998687275627008e-05, | |
| "loss": 2.617, | |
| "loss_": 1.0383, | |
| "moe_loss": 0.1612, | |
| "moe_loss_longrong": 1.4503, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.999788529921114e-05, | |
| "loss": 2.6334, | |
| "loss_": 0.9878, | |
| "moe_loss": 0.1611, | |
| "moe_loss_longrong": 1.4484, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9996893052982083e-05, | |
| "loss": 2.6288, | |
| "loss_": 0.9578, | |
| "moe_loss": 0.161, | |
| "moe_loss_longrong": 1.4476, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9995710555823277e-05, | |
| "loss": 2.6573, | |
| "loss_": 1.0551, | |
| "moe_loss": 0.1608, | |
| "moe_loss_longrong": 1.4478, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9994337830238836e-05, | |
| "loss": 2.6195, | |
| "loss_": 1.2421, | |
| "moe_loss": 0.1609, | |
| "moe_loss_longrong": 1.4449, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9992774902353104e-05, | |
| "loss": 2.5979, | |
| "loss_": 1.0235, | |
| "moe_loss": 0.1627, | |
| "moe_loss_longrong": 1.4802, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9991021801910177e-05, | |
| "loss": 2.6143, | |
| "loss_": 1.1486, | |
| "moe_loss": 0.1608, | |
| "moe_loss_longrong": 1.4443, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.9989078562273313e-05, | |
| "loss": 2.6047, | |
| "loss_": 0.9541, | |
| "moe_loss": 0.161, | |
| "moe_loss_longrong": 1.4455, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9986945220424326e-05, | |
| "loss": 2.6336, | |
| "loss_": 1.1406, | |
| "moe_loss": 0.1607, | |
| "moe_loss_longrong": 1.4428, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9984621816962843e-05, | |
| "loss": 2.6217, | |
| "loss_": 1.0207, | |
| "moe_loss": 0.1608, | |
| "moe_loss_longrong": 1.4447, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9982108396105584e-05, | |
| "loss": 2.6014, | |
| "loss_": 1.3744, | |
| "moe_loss": 0.1608, | |
| "moe_loss_longrong": 1.4426, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9979405005685466e-05, | |
| "loss": 2.6134, | |
| "loss_": 0.9548, | |
| "moe_loss": 0.1609, | |
| "moe_loss_longrong": 1.4415, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.997651169715073e-05, | |
| "loss": 2.6022, | |
| "loss_": 1.058, | |
| "moe_loss": 0.1607, | |
| "moe_loss_longrong": 1.4413, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9973428525563948e-05, | |
| "loss": 2.6219, | |
| "loss_": 1.1897, | |
| "moe_loss": 0.1607, | |
| "moe_loss_longrong": 1.4404, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9970155549600978e-05, | |
| "loss": 2.6232, | |
| "loss_": 1.25, | |
| "moe_loss": 0.1607, | |
| "moe_loss_longrong": 1.4401, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.996669283154984e-05, | |
| "loss": 2.5805, | |
| "loss_": 1.138, | |
| "moe_loss": 0.1607, | |
| "moe_loss_longrong": 1.4387, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.996304043730955e-05, | |
| "loss": 2.6188, | |
| "loss_": 1.2563, | |
| "moe_loss": 0.1607, | |
| "moe_loss_longrong": 1.4386, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.995919843638883e-05, | |
| "loss": 2.5867, | |
| "loss_": 0.9975, | |
| "moe_loss": 0.1607, | |
| "moe_loss_longrong": 1.4387, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9955166901904838e-05, | |
| "loss": 2.5987, | |
| "loss_": 1.157, | |
| "moe_loss": 0.1607, | |
| "moe_loss_longrong": 1.4366, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9950945910581718e-05, | |
| "loss": 2.5875, | |
| "loss_": 1.0582, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.4378, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9946535542749187e-05, | |
| "loss": 2.5848, | |
| "loss_": 0.9934, | |
| "moe_loss": 0.1624, | |
| "moe_loss_longrong": 1.4703, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9941935882340976e-05, | |
| "loss": 2.6086, | |
| "loss_": 0.8756, | |
| "moe_loss": 0.1607, | |
| "moe_loss_longrong": 1.4366, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.9937147016893257e-05, | |
| "loss": 2.5968, | |
| "loss_": 1.1941, | |
| "moe_loss": 0.1608, | |
| "moe_loss_longrong": 1.4376, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9932169037542947e-05, | |
| "loss": 2.6158, | |
| "loss_": 0.9761, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.4358, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9927002039026002e-05, | |
| "loss": 2.5944, | |
| "loss_": 1.2162, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4346, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9921646119675606e-05, | |
| "loss": 2.5806, | |
| "loss_": 0.8511, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.4358, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9916101381420285e-05, | |
| "loss": 2.6285, | |
| "loss_": 0.9065, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4361, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.991036792978199e-05, | |
| "loss": 2.6076, | |
| "loss_": 0.7095, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.4347, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.9904445873874068e-05, | |
| "loss": 2.5824, | |
| "loss_": 0.571, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.4343, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.98983353263992e-05, | |
| "loss": 2.5803, | |
| "loss_": 0.9037, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.4344, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9892036403647256e-05, | |
| "loss": 2.6071, | |
| "loss_": 1.0289, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.4333, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9885549225493064e-05, | |
| "loss": 2.6155, | |
| "loss_": 1.227, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4324, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9878873915394154e-05, | |
| "loss": 2.6057, | |
| "loss_": 1.276, | |
| "moe_loss": 0.1607, | |
| "moe_loss_longrong": 1.4326, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.987201060038839e-05, | |
| "loss": 2.5446, | |
| "loss_": 1.1148, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.432, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.986495941109156e-05, | |
| "loss": 2.5787, | |
| "loss_": 0.9601, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4317, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9857720481694887e-05, | |
| "loss": 2.6018, | |
| "loss_": 0.8145, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.4318, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.985029394996248e-05, | |
| "loss": 2.5863, | |
| "loss_": 0.9872, | |
| "moe_loss": 0.1618, | |
| "moe_loss_longrong": 1.4613, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9842679957228706e-05, | |
| "loss": 2.5837, | |
| "loss_": 1.165, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.43, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9834878648395507e-05, | |
| "loss": 2.6015, | |
| "loss_": 0.9562, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4307, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9826890171929634e-05, | |
| "loss": 2.5453, | |
| "loss_": 0.8231, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4306, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.981871467985983e-05, | |
| "loss": 2.578, | |
| "loss_": 0.9864, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4306, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9810352327773935e-05, | |
| "loss": 2.5723, | |
| "loss_": 1.1748, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4305, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9801803274815915e-05, | |
| "loss": 2.6173, | |
| "loss_": 1.0737, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4297, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.979306768368285e-05, | |
| "loss": 2.5664, | |
| "loss_": 1.3735, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4291, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.9784145720621827e-05, | |
| "loss": 2.5832, | |
| "loss_": 1.0223, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4296, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9775037555426772e-05, | |
| "loss": 2.5448, | |
| "loss_": 1.2395, | |
| "moe_loss": 0.1607, | |
| "moe_loss_longrong": 1.4291, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9765743361435234e-05, | |
| "loss": 2.5729, | |
| "loss_": 1.1156, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4273, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.975626331552507e-05, | |
| "loss": 2.5526, | |
| "loss_": 0.8797, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.4281, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.974659759811109e-05, | |
| "loss": 2.573, | |
| "loss_": 1.1636, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4274, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9736746393141617e-05, | |
| "loss": 2.59, | |
| "loss_": 1.1342, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.4279, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9726709888094994e-05, | |
| "loss": 2.5921, | |
| "loss_": 0.8051, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4277, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9716488273976006e-05, | |
| "loss": 2.6023, | |
| "loss_": 1.2093, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4276, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.970608174531224e-05, | |
| "loss": 2.5744, | |
| "loss_": 0.9951, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4256, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9695490500150418e-05, | |
| "loss": 2.5917, | |
| "loss_": 1.0794, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.427, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9684714740052584e-05, | |
| "loss": 2.5849, | |
| "loss_": 0.8469, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.426, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9673754670092283e-05, | |
| "loss": 2.5705, | |
| "loss_": 0.96, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4265, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9662610498850684e-05, | |
| "loss": 2.5672, | |
| "loss_": 1.038, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4258, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.965128243841256e-05, | |
| "loss": 2.553, | |
| "loss_": 1.1173, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.4267, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9639770704362305e-05, | |
| "loss": 2.5951, | |
| "loss_": 1.1815, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4255, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9628075515779796e-05, | |
| "loss": 2.5528, | |
| "loss_": 0.916, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4247, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.961619709523623e-05, | |
| "loss": 2.5537, | |
| "loss_": 1.1069, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.4252, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9604135668789897e-05, | |
| "loss": 2.553, | |
| "loss_": 0.8815, | |
| "moe_loss": 0.1616, | |
| "moe_loss_longrong": 1.4545, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.959189146598188e-05, | |
| "loss": 2.557, | |
| "loss_": 0.3617, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4253, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9579464719831668e-05, | |
| "loss": 2.5735, | |
| "loss_": 1.1934, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4248, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9566855666832743e-05, | |
| "loss": 2.5679, | |
| "loss_": 1.2144, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4241, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9554064546948064e-05, | |
| "loss": 2.5541, | |
| "loss_": 0.7773, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4247, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9541091603605508e-05, | |
| "loss": 2.5396, | |
| "loss_": 1.0911, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4238, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9527937083693233e-05, | |
| "loss": 2.5328, | |
| "loss_": 1.2836, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4241, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.951460123755499e-05, | |
| "loss": 2.559, | |
| "loss_": 0.6191, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4249, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9501084318985335e-05, | |
| "loss": 2.5656, | |
| "loss_": 0.5936, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4238, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.948738658522483e-05, | |
| "loss": 2.5408, | |
| "loss_": 1.0426, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4256, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9473508296955126e-05, | |
| "loss": 2.5346, | |
| "loss_": 1.0259, | |
| "moe_loss": 0.1613, | |
| "moe_loss_longrong": 1.4496, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9459449718294008e-05, | |
| "loss": 2.5744, | |
| "loss_": 1.2413, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.4233, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9445211116790365e-05, | |
| "loss": 2.5513, | |
| "loss_": 1.1087, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4224, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9430792763419105e-05, | |
| "loss": 2.5552, | |
| "loss_": 1.1375, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4219, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.9416194932576e-05, | |
| "loss": 2.5634, | |
| "loss_": 0.8712, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.4224, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.9401417902072447e-05, | |
| "loss": 2.5538, | |
| "loss_": 0.9992, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4211, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.93864619531302e-05, | |
| "loss": 2.5786, | |
| "loss_": 1.0579, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4223, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.9371327370376018e-05, | |
| "loss": 2.5565, | |
| "loss_": 1.1717, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4223, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.935601444183622e-05, | |
| "loss": 2.5491, | |
| "loss_": 1.0508, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4216, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.934052345893125e-05, | |
| "loss": 2.5485, | |
| "loss_": 1.1535, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4201, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.932485471647009e-05, | |
| "loss": 2.5434, | |
| "loss_": 0.8935, | |
| "moe_loss": 0.1613, | |
| "moe_loss_longrong": 1.4462, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.9309008512644668e-05, | |
| "loss": 2.5549, | |
| "loss_": 0.9146, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4212, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.929298514902418e-05, | |
| "loss": 2.5655, | |
| "loss_": 1.2834, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4206, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.927678493054935e-05, | |
| "loss": 2.5664, | |
| "loss_": 1.1543, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4203, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.9260408165526638e-05, | |
| "loss": 2.5559, | |
| "loss_": 1.1544, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4205, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.9243855165622345e-05, | |
| "loss": 2.538, | |
| "loss_": 0.9985, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.42, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.9227126245856716e-05, | |
| "loss": 2.528, | |
| "loss_": 0.766, | |
| "moe_loss": 0.1609, | |
| "moe_loss_longrong": 1.4442, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.921022172459791e-05, | |
| "loss": 2.56, | |
| "loss_": 1.0356, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4201, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.9193141923555984e-05, | |
| "loss": 2.5418, | |
| "loss_": 1.0224, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4191, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.917588716777672e-05, | |
| "loss": 2.5489, | |
| "loss_": 1.2415, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.42, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.9158457785635478e-05, | |
| "loss": 2.5647, | |
| "loss_": 1.0902, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4194, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.914085410883093e-05, | |
| "loss": 2.5695, | |
| "loss_": 1.0454, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4196, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.9123076472378753e-05, | |
| "loss": 2.5355, | |
| "loss_": 1.1475, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4185, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.910512521460525e-05, | |
| "loss": 2.5557, | |
| "loss_": 0.7606, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4187, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.908700067714091e-05, | |
| "loss": 2.5223, | |
| "loss_": 0.9963, | |
| "moe_loss": 0.1611, | |
| "moe_loss_longrong": 1.4451, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.906870320491391e-05, | |
| "loss": 2.5422, | |
| "loss_": 0.6658, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4189, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.9050233146143554e-05, | |
| "loss": 2.5373, | |
| "loss_": 1.022, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4191, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.9031590852333637e-05, | |
| "loss": 2.5536, | |
| "loss_": 0.9191, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4191, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.9012776678265756e-05, | |
| "loss": 2.5076, | |
| "loss_": 0.4788, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4179, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.899379098199257e-05, | |
| "loss": 2.5061, | |
| "loss_": 1.1005, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4178, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.897463412483098e-05, | |
| "loss": 2.5584, | |
| "loss_": 1.0856, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4184, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.895530647135524e-05, | |
| "loss": 2.5329, | |
| "loss_": 0.7922, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4175, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.8935808389390032e-05, | |
| "loss": 2.524, | |
| "loss_": 1.0799, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4163, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.8916140250003475e-05, | |
| "loss": 2.5423, | |
| "loss_": 0.8152, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4174, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.8896302427500042e-05, | |
| "loss": 2.533, | |
| "loss_": 1.0454, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4181, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.8876295299413445e-05, | |
| "loss": 2.522, | |
| "loss_": 1.1185, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4169, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.885611924649946e-05, | |
| "loss": 2.5539, | |
| "loss_": 1.1256, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4167, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.883577465272866e-05, | |
| "loss": 2.5069, | |
| "loss_": 0.743, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4177, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.8815261905279133e-05, | |
| "loss": 2.5429, | |
| "loss_": 1.0463, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4162, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.879458139452909e-05, | |
| "loss": 2.5381, | |
| "loss_": 1.0908, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4152, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.877373351404946e-05, | |
| "loss": 2.4924, | |
| "loss_": 1.1275, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4167, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.8752718660596367e-05, | |
| "loss": 2.536, | |
| "loss_": 0.7467, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4165, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.873153723410362e-05, | |
| "loss": 2.507, | |
| "loss_": 1.0083, | |
| "moe_loss": 0.1609, | |
| "moe_loss_longrong": 1.4404, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.8710189637675055e-05, | |
| "loss": 2.5118, | |
| "loss_": 0.874, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4161, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.8688676277576916e-05, | |
| "loss": 2.5415, | |
| "loss_": 1.1152, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4157, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.866699756323008e-05, | |
| "loss": 2.5225, | |
| "loss_": 0.8857, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.416, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.8645153907202285e-05, | |
| "loss": 2.5093, | |
| "loss_": 1.0791, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4157, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.862314572520028e-05, | |
| "loss": 2.534, | |
| "loss_": 1.1649, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4167, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.86009734360619e-05, | |
| "loss": 2.559, | |
| "loss_": 1.2289, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4163, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.8578637461748105e-05, | |
| "loss": 2.5738, | |
| "loss_": 0.8422, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.416, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.8556138227334957e-05, | |
| "loss": 2.5752, | |
| "loss_": 1.0332, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4157, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.853347616100552e-05, | |
| "loss": 2.5633, | |
| "loss_": 1.2742, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4152, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.8510651694041702e-05, | |
| "loss": 2.5491, | |
| "loss_": 1.1394, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4153, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.848766526081607e-05, | |
| "loss": 2.5032, | |
| "loss_": 1.0904, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4148, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.846451729878357e-05, | |
| "loss": 2.5687, | |
| "loss_": 1.0973, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4144, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.84412082484732e-05, | |
| "loss": 2.5378, | |
| "loss_": 0.7628, | |
| "moe_loss": 0.161, | |
| "moe_loss_longrong": 1.4372, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.841773855347963e-05, | |
| "loss": 2.5285, | |
| "loss_": 1.0832, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4148, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.8394108660454766e-05, | |
| "loss": 2.53, | |
| "loss_": 0.7952, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.415, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.8370319019099236e-05, | |
| "loss": 2.5457, | |
| "loss_": 0.8855, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4146, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.8346370082153843e-05, | |
| "loss": 2.5227, | |
| "loss_": 1.1518, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4148, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.8322262305390948e-05, | |
| "loss": 2.5268, | |
| "loss_": 1.0055, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4139, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.8297996147605787e-05, | |
| "loss": 2.5418, | |
| "loss_": 1.2226, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4139, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.8273572070607756e-05, | |
| "loss": 2.5465, | |
| "loss_": 1.0475, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.414, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.8248990539211596e-05, | |
| "loss": 2.5132, | |
| "loss_": 1.2063, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4148, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.822425202122858e-05, | |
| "loss": 2.5236, | |
| "loss_": 1.3605, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4145, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.819935698745759e-05, | |
| "loss": 2.517, | |
| "loss_": 0.9965, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4137, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.817430591167615e-05, | |
| "loss": 2.5347, | |
| "loss_": 1.1165, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4133, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.8149099270631434e-05, | |
| "loss": 2.5051, | |
| "loss_": 0.9918, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4133, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.8123737544031178e-05, | |
| "loss": 2.5228, | |
| "loss_": 1.0757, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4132, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.8098221214534543e-05, | |
| "loss": 2.5117, | |
| "loss_": 0.9441, | |
| "moe_loss": 0.1608, | |
| "moe_loss_longrong": 1.435, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.807255076774294e-05, | |
| "loss": 2.5292, | |
| "loss_": 0.9315, | |
| "moe_loss": 0.1608, | |
| "moe_loss_longrong": 1.4332, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.80467266921908e-05, | |
| "loss": 2.4974, | |
| "loss_": 0.9341, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.414, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.802074947933625e-05, | |
| "loss": 2.5251, | |
| "loss_": 1.0369, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4127, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.799461962355178e-05, | |
| "loss": 2.5424, | |
| "loss_": 1.2525, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4133, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.7968337622114824e-05, | |
| "loss": 2.5123, | |
| "loss_": 1.4116, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4131, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.7941903975198305e-05, | |
| "loss": 2.5119, | |
| "loss_": 0.8823, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4132, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.791531918586112e-05, | |
| "loss": 2.5372, | |
| "loss_": 0.9075, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4125, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.7888583760038534e-05, | |
| "loss": 2.5356, | |
| "loss_": 1.065, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4133, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.78616982065326e-05, | |
| "loss": 2.5154, | |
| "loss_": 0.9056, | |
| "moe_loss": 0.1611, | |
| "moe_loss_longrong": 1.4338, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.7834663037002444e-05, | |
| "loss": 2.5377, | |
| "loss_": 0.8469, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4123, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.7807478765954532e-05, | |
| "loss": 2.5363, | |
| "loss_": 1.0507, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4116, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.778014591073288e-05, | |
| "loss": 2.5131, | |
| "loss_": 1.0501, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4123, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.7752664991509224e-05, | |
| "loss": 2.5127, | |
| "loss_": 1.0583, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.412, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.7725036531273087e-05, | |
| "loss": 2.5147, | |
| "loss_": 1.0225, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4119, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.7697261055821864e-05, | |
| "loss": 2.5399, | |
| "loss_": 1.1622, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4123, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.7669339093750786e-05, | |
| "loss": 2.5042, | |
| "loss_": 1.1, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4118, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.7641271176442876e-05, | |
| "loss": 2.4867, | |
| "loss_": 1.1917, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4116, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.761305783805883e-05, | |
| "loss": 2.5159, | |
| "loss_": 1.1055, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4114, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.7584699615526857e-05, | |
| "loss": 2.5064, | |
| "loss_": 0.7231, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4119, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.755619704853246e-05, | |
| "loss": 2.5335, | |
| "loss_": 0.9998, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4112, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.752755067950814e-05, | |
| "loss": 2.5332, | |
| "loss_": 1.2823, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4114, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.749876105362313e-05, | |
| "loss": 2.5212, | |
| "loss_": 1.2273, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4109, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.746982871877296e-05, | |
| "loss": 2.5149, | |
| "loss_": 1.2552, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4113, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.744075422556906e-05, | |
| "loss": 2.4876, | |
| "loss_": 0.9236, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4115, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.741153812732828e-05, | |
| "loss": 2.5421, | |
| "loss_": 1.155, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4106, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.7382180980062365e-05, | |
| "loss": 2.5095, | |
| "loss_": 0.6978, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4109, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.735268334246734e-05, | |
| "loss": 2.5068, | |
| "loss_": 1.0142, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4105, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.7323045775912927e-05, | |
| "loss": 2.5247, | |
| "loss_": 0.9039, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4111, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.7293268844431826e-05, | |
| "loss": 2.5308, | |
| "loss_": 1.1161, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4101, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.7263353114708993e-05, | |
| "loss": 2.516, | |
| "loss_": 0.5494, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4105, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.7233299156070852e-05, | |
| "loss": 2.5405, | |
| "loss_": 1.0823, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4091, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.720310754047446e-05, | |
| "loss": 2.5123, | |
| "loss_": 1.016, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.41, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.717277884249664e-05, | |
| "loss": 2.4917, | |
| "loss_": 1.1087, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4103, | |
| "step": 1393 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.7142313639323012e-05, | |
| "loss": 2.5184, | |
| "loss_": 1.2711, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4101, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.7111712510737035e-05, | |
| "loss": 2.5037, | |
| "loss_": 1.1198, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4102, | |
| "step": 1407 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.7080976039108964e-05, | |
| "loss": 2.5262, | |
| "loss_": 0.9286, | |
| "moe_loss": 0.1612, | |
| "moe_loss_longrong": 1.4315, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.7050104809384774e-05, | |
| "loss": 2.4892, | |
| "loss_": 1.0377, | |
| "moe_loss": 0.1609, | |
| "moe_loss_longrong": 1.4276, | |
| "step": 1421 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.7019099409075014e-05, | |
| "loss": 2.5076, | |
| "loss_": 0.9405, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4092, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.6987960428243637e-05, | |
| "loss": 2.5198, | |
| "loss_": 1.3093, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4094, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.6956688459496767e-05, | |
| "loss": 2.5508, | |
| "loss_": 1.0043, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4083, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.6925284097971427e-05, | |
| "loss": 2.5299, | |
| "loss_": 1.1324, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4091, | |
| "step": 1449 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.6893747941324197e-05, | |
| "loss": 2.5495, | |
| "loss_": 0.7979, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4097, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.6862080589719863e-05, | |
| "loss": 2.4692, | |
| "loss_": 0.563, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4097, | |
| "step": 1463 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.6830282645819974e-05, | |
| "loss": 2.5107, | |
| "loss_": 0.3532, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4107, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.679835471477139e-05, | |
| "loss": 2.498, | |
| "loss_": 0.9877, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4086, | |
| "step": 1477 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.6766297404194745e-05, | |
| "loss": 2.502, | |
| "loss_": 1.1402, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4091, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.673411132417291e-05, | |
| "loss": 2.5066, | |
| "loss_": 1.167, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.409, | |
| "step": 1491 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.6701797087239354e-05, | |
| "loss": 2.5273, | |
| "loss_": 0.914, | |
| "moe_loss": 0.1607, | |
| "moe_loss_longrong": 1.4264, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.666935530836651e-05, | |
| "loss": 2.5022, | |
| "loss_": 0.9724, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4087, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.663678660495406e-05, | |
| "loss": 2.4806, | |
| "loss_": 1.1766, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4086, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.6604091596817193e-05, | |
| "loss": 2.5228, | |
| "loss_": 0.8955, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4086, | |
| "step": 1519 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.657127090617479e-05, | |
| "loss": 2.5303, | |
| "loss_": 0.9496, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4079, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.6538325157637614e-05, | |
| "loss": 2.5162, | |
| "loss_": 0.8978, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4085, | |
| "step": 1533 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.650525497819639e-05, | |
| "loss": 2.5187, | |
| "loss_": 0.787, | |
| "moe_loss": 0.1608, | |
| "moe_loss_longrong": 1.4256, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.6472060997209898e-05, | |
| "loss": 2.5283, | |
| "loss_": 1.1671, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4085, | |
| "step": 1547 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.6438743846392987e-05, | |
| "loss": 2.5049, | |
| "loss_": 0.7488, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4082, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.6405304159804534e-05, | |
| "loss": 2.4966, | |
| "loss_": 1.0698, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4078, | |
| "step": 1561 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.6371742573835426e-05, | |
| "loss": 2.5307, | |
| "loss_": 1.1426, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4075, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.6338059727196386e-05, | |
| "loss": 2.4884, | |
| "loss_": 1.0242, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4078, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.6304256260905872e-05, | |
| "loss": 2.5239, | |
| "loss_": 1.1671, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4079, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.627033281827785e-05, | |
| "loss": 2.5292, | |
| "loss_": 0.9625, | |
| "moe_loss": 0.1607, | |
| "moe_loss_longrong": 1.4262, | |
| "step": 1589 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.6236290044909543e-05, | |
| "loss": 2.5336, | |
| "loss_": 0.8255, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4081, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.6202128588669177e-05, | |
| "loss": 2.5205, | |
| "loss_": 0.7348, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4082, | |
| "step": 1603 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.6167849099683623e-05, | |
| "loss": 2.4854, | |
| "loss_": 0.9767, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4075, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.6133452230326035e-05, | |
| "loss": 2.5265, | |
| "loss_": 0.9913, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4074, | |
| "step": 1617 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.609893863520343e-05, | |
| "loss": 2.4785, | |
| "loss_": 0.9806, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4076, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.6064308971144236e-05, | |
| "loss": 2.5053, | |
| "loss_": 1.207, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4073, | |
| "step": 1631 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.60295638971858e-05, | |
| "loss": 2.5212, | |
| "loss_": 1.2091, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4072, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.599470407456182e-05, | |
| "loss": 2.5177, | |
| "loss_": 1.0634, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4071, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.5959730166689783e-05, | |
| "loss": 2.5219, | |
| "loss_": 0.7302, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4069, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.5924642839158334e-05, | |
| "loss": 2.5273, | |
| "loss_": 1.1267, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4065, | |
| "step": 1659 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.5889442759714603e-05, | |
| "loss": 2.5067, | |
| "loss_": 0.8492, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.4234, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.5854130598251514e-05, | |
| "loss": 2.4997, | |
| "loss_": 1.0397, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4065, | |
| "step": 1673 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.581870702679501e-05, | |
| "loss": 2.5277, | |
| "loss_": 0.9804, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4073, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.5783172719491288e-05, | |
| "loss": 2.5191, | |
| "loss_": 1.1463, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4064, | |
| "step": 1687 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.5747528352593956e-05, | |
| "loss": 2.4859, | |
| "loss_": 1.0594, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4063, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.5711774604451168e-05, | |
| "loss": 2.5146, | |
| "loss_": 1.0352, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4063, | |
| "step": 1701 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.567591215549271e-05, | |
| "loss": 2.5086, | |
| "loss_": 0.8248, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4067, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.5639941688217063e-05, | |
| "loss": 2.4807, | |
| "loss_": 0.8445, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4063, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.5603863887178393e-05, | |
| "loss": 2.5192, | |
| "loss_": 0.9476, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4057, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.5567679438973543e-05, | |
| "loss": 2.5131, | |
| "loss_": 1.1376, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4062, | |
| "step": 1729 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.5531389032228955e-05, | |
| "loss": 2.4964, | |
| "loss_": 1.2426, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4064, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.549499335758757e-05, | |
| "loss": 2.5134, | |
| "loss_": 0.8763, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.406, | |
| "step": 1743 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.5458493107695688e-05, | |
| "loss": 2.4855, | |
| "loss_": 1.1827, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4061, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.542188897718977e-05, | |
| "loss": 2.4889, | |
| "loss_": 1.014, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4064, | |
| "step": 1757 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.5385181662683244e-05, | |
| "loss": 2.5111, | |
| "loss_": 1.0933, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4058, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.534837186275322e-05, | |
| "loss": 2.5385, | |
| "loss_": 0.8571, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4062, | |
| "step": 1771 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.531146027792722e-05, | |
| "loss": 2.5107, | |
| "loss_": 0.9431, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4058, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.527444761066982e-05, | |
| "loss": 2.5031, | |
| "loss_": 1.2333, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4056, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.523733456536931e-05, | |
| "loss": 2.4927, | |
| "loss_": 1.1367, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.406, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.5200121848324276e-05, | |
| "loss": 2.5148, | |
| "loss_": 1.0457, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4052, | |
| "step": 1799 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.5162810167730144e-05, | |
| "loss": 2.4974, | |
| "loss_": 0.8327, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4055, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.5125400233665728e-05, | |
| "loss": 2.4938, | |
| "loss_": 0.965, | |
| "moe_loss": 0.1603, | |
| "moe_loss_longrong": 1.4061, | |
| "step": 1813 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.50878927580797e-05, | |
| "loss": 2.4854, | |
| "loss_": 1.0636, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4061, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.5050288454777047e-05, | |
| "loss": 2.4829, | |
| "loss_": 0.8501, | |
| "moe_loss": 0.1607, | |
| "moe_loss_longrong": 1.4209, | |
| "step": 1827 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.501258803940548e-05, | |
| "loss": 2.5151, | |
| "loss_": 1.2857, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4049, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.4974792229441826e-05, | |
| "loss": 2.5045, | |
| "loss_": 1.0645, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4054, | |
| "step": 1841 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.4936901744178367e-05, | |
| "loss": 2.5062, | |
| "loss_": 0.5678, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4056, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.489891730470914e-05, | |
| "loss": 2.4826, | |
| "loss_": 1.2262, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4049, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.4860839633916236e-05, | |
| "loss": 2.466, | |
| "loss_": 0.7849, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4048, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.4822669456456031e-05, | |
| "loss": 2.4872, | |
| "loss_": 0.8576, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4052, | |
| "step": 1869 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.4784407498745394e-05, | |
| "loss": 2.4951, | |
| "loss_": 0.8778, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4052, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.4746054488947863e-05, | |
| "loss": 2.4876, | |
| "loss_": 0.8237, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.405, | |
| "step": 1883 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.470761115695979e-05, | |
| "loss": 2.4986, | |
| "loss_": 0.9971, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4045, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.4669078234396454e-05, | |
| "loss": 2.4678, | |
| "loss_": 1.1283, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4043, | |
| "step": 1897 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.4630456454578122e-05, | |
| "loss": 2.516, | |
| "loss_": 0.9592, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4045, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.4591746552516109e-05, | |
| "loss": 2.5208, | |
| "loss_": 1.0451, | |
| "moe_loss": 0.1607, | |
| "moe_loss_longrong": 1.4182, | |
| "step": 1911 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.4552949264898795e-05, | |
| "loss": 2.498, | |
| "loss_": 0.8697, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4045, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.4514065330077575e-05, | |
| "loss": 2.5174, | |
| "loss_": 0.8274, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4044, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.4475095488052843e-05, | |
| "loss": 2.5038, | |
| "loss_": 0.7792, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4052, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.4436040480459891e-05, | |
| "loss": 2.5116, | |
| "loss_": 0.9444, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4044, | |
| "step": 1939 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.4396901050554794e-05, | |
| "loss": 2.4786, | |
| "loss_": 1.0648, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4047, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.435767794320027e-05, | |
| "loss": 2.4987, | |
| "loss_": 1.0158, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4046, | |
| "step": 1953 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.4318371904851502e-05, | |
| "loss": 2.5188, | |
| "loss_": 1.1058, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4045, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.4278983683541934e-05, | |
| "loss": 2.491, | |
| "loss_": 1.1232, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.4187, | |
| "step": 1967 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.4239514028869032e-05, | |
| "loss": 2.487, | |
| "loss_": 0.9791, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4039, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.4199963691980027e-05, | |
| "loss": 2.492, | |
| "loss_": 1.0493, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4046, | |
| "step": 1981 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.4160333425557616e-05, | |
| "loss": 2.5256, | |
| "loss_": 0.6311, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4039, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.4120623983805617e-05, | |
| "loss": 2.502, | |
| "loss_": 0.8536, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4037, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.408083612243465e-05, | |
| "loss": 2.4939, | |
| "loss_": 1.0558, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4185, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.4040970598647742e-05, | |
| "loss": 2.4975, | |
| "loss_": 0.9278, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.4179, | |
| "step": 2009 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.40010281711259e-05, | |
| "loss": 2.4624, | |
| "loss_": 0.8695, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4036, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.3961009600013702e-05, | |
| "loss": 2.4981, | |
| "loss_": 0.9502, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4041, | |
| "step": 2023 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.39209156469048e-05, | |
| "loss": 2.4973, | |
| "loss_": 1.0486, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4037, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.3880747074827454e-05, | |
| "loss": 2.498, | |
| "loss_": 1.0935, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4031, | |
| "step": 2037 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.384050464822999e-05, | |
| "loss": 2.4956, | |
| "loss_": 0.978, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4045, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.3800189132966257e-05, | |
| "loss": 2.4826, | |
| "loss_": 0.9682, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4039, | |
| "step": 2051 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.3759801296281072e-05, | |
| "loss": 2.499, | |
| "loss_": 0.8618, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.4181, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.371934190679558e-05, | |
| "loss": 2.4876, | |
| "loss_": 0.7575, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4032, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.3678811734492659e-05, | |
| "loss": 2.4821, | |
| "loss_": 0.8992, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4031, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.3638211550702256e-05, | |
| "loss": 2.4975, | |
| "loss_": 0.9085, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4031, | |
| "step": 2079 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.3597542128086702e-05, | |
| "loss": 2.4958, | |
| "loss_": 1.1546, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.403, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.3556804240626019e-05, | |
| "loss": 2.5323, | |
| "loss_": 1.0748, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4033, | |
| "step": 2093 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.3515998663603174e-05, | |
| "loss": 2.5085, | |
| "loss_": 1.1199, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4034, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.3475126173589343e-05, | |
| "loss": 2.4864, | |
| "loss_": 0.8556, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4029, | |
| "step": 2107 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.3434187548429126e-05, | |
| "loss": 2.5068, | |
| "loss_": 0.946, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4031, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.3393183567225724e-05, | |
| "loss": 2.4837, | |
| "loss_": 1.1161, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4029, | |
| "step": 2121 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.3352115010326155e-05, | |
| "loss": 2.4825, | |
| "loss_": 0.6543, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4027, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.3310982659306352e-05, | |
| "loss": 2.5257, | |
| "loss_": 1.2189, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4026, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.3269787296956333e-05, | |
| "loss": 2.4993, | |
| "loss_": 0.9341, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4028, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.3228529707265279e-05, | |
| "loss": 2.4981, | |
| "loss_": 1.102, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4029, | |
| "step": 2149 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.3187210675406617e-05, | |
| "loss": 2.5076, | |
| "loss_": 0.6091, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4026, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.3145830987723081e-05, | |
| "loss": 2.4946, | |
| "loss_": 0.972, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4025, | |
| "step": 2163 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.3104391431711748e-05, | |
| "loss": 2.471, | |
| "loss_": 0.8826, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4026, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.306289279600905e-05, | |
| "loss": 2.4847, | |
| "loss_": 1.1855, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4025, | |
| "step": 2177 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.3021335870375763e-05, | |
| "loss": 2.505, | |
| "loss_": 1.0819, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4025, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.297972144568198e-05, | |
| "loss": 2.4909, | |
| "loss_": 0.8074, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4024, | |
| "step": 2191 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.2938050313892062e-05, | |
| "loss": 2.4929, | |
| "loss_": 1.0944, | |
| "moe_loss": 0.1607, | |
| "moe_loss_longrong": 1.4177, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.289632326804956e-05, | |
| "loss": 2.4747, | |
| "loss_": 0.8172, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4027, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.2854541102262119e-05, | |
| "loss": 2.4782, | |
| "loss_": 0.8552, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4028, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.2812704611686386e-05, | |
| "loss": 2.4825, | |
| "loss_": 0.9202, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.402, | |
| "step": 2219 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.2770814592512853e-05, | |
| "loss": 2.4951, | |
| "loss_": 1.1396, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4022, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.2728871841950719e-05, | |
| "loss": 2.4628, | |
| "loss_": 0.9138, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4025, | |
| "step": 2233 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.2686877158212715e-05, | |
| "loss": 2.5028, | |
| "loss_": 0.8915, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.402, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.2644831340499906e-05, | |
| "loss": 2.4802, | |
| "loss_": 1.3262, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4022, | |
| "step": 2247 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.2602735188986498e-05, | |
| "loss": 2.4888, | |
| "loss_": 1.1958, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4025, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.2560589504804592e-05, | |
| "loss": 2.4964, | |
| "loss_": 1.0784, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4019, | |
| "step": 2261 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.2518395090028952e-05, | |
| "loss": 2.4972, | |
| "loss_": 1.164, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.402, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.2476152747661727e-05, | |
| "loss": 2.5173, | |
| "loss_": 1.083, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4018, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.243386328161718e-05, | |
| "loss": 2.5094, | |
| "loss_": 1.1749, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4021, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.2391527496706389e-05, | |
| "loss": 2.5007, | |
| "loss_": 1.2048, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4019, | |
| "step": 2289 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.2349146198621917e-05, | |
| "loss": 2.4613, | |
| "loss_": 0.9356, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4018, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.23067201939225e-05, | |
| "loss": 2.522, | |
| "loss_": 1.3161, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4019, | |
| "step": 2303 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.2264250290017675e-05, | |
| "loss": 2.4876, | |
| "loss_": 0.9183, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4018, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.222173729515243e-05, | |
| "loss": 2.4852, | |
| "loss_": 1.0262, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4013, | |
| "step": 2317 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.217918201839182e-05, | |
| "loss": 2.4974, | |
| "loss_": 0.9078, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4019, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.2136585269605558e-05, | |
| "loss": 2.4873, | |
| "loss_": 1.063, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4015, | |
| "step": 2331 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.209394785945263e-05, | |
| "loss": 2.4491, | |
| "loss_": 0.7031, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4026, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.2051270599365825e-05, | |
| "loss": 2.5059, | |
| "loss_": 1.0756, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4012, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.2008554301536328e-05, | |
| "loss": 2.4821, | |
| "loss_": 0.508, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4018, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.1965799778898258e-05, | |
| "loss": 2.4776, | |
| "loss_": 1.0053, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4016, | |
| "step": 2359 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.1923007845113178e-05, | |
| "loss": 2.512, | |
| "loss_": 0.6722, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4016, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.1880179314554629e-05, | |
| "loss": 2.4488, | |
| "loss_": 0.4041, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.402, | |
| "step": 2373 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.1837315002292629e-05, | |
| "loss": 2.4889, | |
| "loss_": 1.1084, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.401, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.1794415724078147e-05, | |
| "loss": 2.4732, | |
| "loss_": 0.6909, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4019, | |
| "step": 2387 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.17514822963276e-05, | |
| "loss": 2.4599, | |
| "loss_": 1.0441, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4013, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.1708515536107299e-05, | |
| "loss": 2.472, | |
| "loss_": 0.7234, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4012, | |
| "step": 2401 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.1665516261117914e-05, | |
| "loss": 2.4923, | |
| "loss_": 1.2036, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4014, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.1622485289678886e-05, | |
| "loss": 2.4794, | |
| "loss_": 0.9414, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4011, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.1579423440712887e-05, | |
| "loss": 2.4873, | |
| "loss_": 0.8799, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4007, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.153633153373022e-05, | |
| "loss": 2.4685, | |
| "loss_": 0.8351, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.414, | |
| "step": 2429 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.149321038881321e-05, | |
| "loss": 2.4965, | |
| "loss_": 1.0812, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4007, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.1450060826600618e-05, | |
| "loss": 2.467, | |
| "loss_": 1.0899, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4007, | |
| "step": 2443 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.1406883668272015e-05, | |
| "loss": 2.5148, | |
| "loss_": 0.9878, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4008, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.1363679735532151e-05, | |
| "loss": 2.4869, | |
| "loss_": 1.0094, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4004, | |
| "step": 2457 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.132044985059532e-05, | |
| "loss": 2.4687, | |
| "loss_": 0.8133, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4004, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.1277194836169714e-05, | |
| "loss": 2.4692, | |
| "loss_": 1.154, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.401, | |
| "step": 2471 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.1233915515441765e-05, | |
| "loss": 2.4768, | |
| "loss_": 0.9273, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4005, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.1190612712060475e-05, | |
| "loss": 2.465, | |
| "loss_": 0.9635, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4005, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.1147287250121745e-05, | |
| "loss": 2.5032, | |
| "loss_": 1.3144, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4006, | |
| "step": 2492 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.11039399541527e-05, | |
| "loss": 2.4839, | |
| "loss_": 0.852, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4001, | |
| "step": 2499 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.1060571649095972e-05, | |
| "loss": 2.4618, | |
| "loss_": 0.816, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4135, | |
| "step": 2506 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.1017183160294033e-05, | |
| "loss": 2.5082, | |
| "loss_": 0.7247, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4005, | |
| "step": 2513 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.0973775313473465e-05, | |
| "loss": 2.5026, | |
| "loss_": 0.9287, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4007, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.0930348934729249e-05, | |
| "loss": 2.4564, | |
| "loss_": 1.0246, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4009, | |
| "step": 2527 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.0886904850509052e-05, | |
| "loss": 2.5123, | |
| "loss_": 1.1915, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4005, | |
| "step": 2534 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.0843443887597495e-05, | |
| "loss": 2.4786, | |
| "loss_": 0.9271, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4004, | |
| "step": 2541 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.0799966873100419e-05, | |
| "loss": 2.4941, | |
| "loss_": 1.2428, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4007, | |
| "step": 2548 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.0756474634429133e-05, | |
| "loss": 2.4861, | |
| "loss_": 1.1406, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4004, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.0712967999284682e-05, | |
| "loss": 2.474, | |
| "loss_": 1.0874, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4006, | |
| "step": 2562 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.0669447795642103e-05, | |
| "loss": 2.478, | |
| "loss_": 1.2379, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4004, | |
| "step": 2569 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.0625914851734632e-05, | |
| "loss": 2.4567, | |
| "loss_": 1.0187, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4004, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.0582369996037985e-05, | |
| "loss": 2.4762, | |
| "loss_": 0.9279, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4007, | |
| "step": 2583 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.053881405725456e-05, | |
| "loss": 2.4869, | |
| "loss_": 0.8599, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.0495247864297684e-05, | |
| "loss": 2.5043, | |
| "loss_": 1.0374, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4, | |
| "step": 2597 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.0451672246275826e-05, | |
| "loss": 2.4675, | |
| "loss_": 0.57, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4005, | |
| "step": 2604 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.0408088032476822e-05, | |
| "loss": 2.4752, | |
| "loss_": 0.5417, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4003, | |
| "step": 2611 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.036449605235211e-05, | |
| "loss": 2.4568, | |
| "loss_": 1.056, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3999, | |
| "step": 2618 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.0320897135500904e-05, | |
| "loss": 2.4658, | |
| "loss_": 0.8557, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.4, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.0277292111654447e-05, | |
| "loss": 2.4553, | |
| "loss_": 0.9975, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3999, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.0233681810660207e-05, | |
| "loss": 2.4815, | |
| "loss_": 1.0191, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4004, | |
| "step": 2639 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.019006706246607e-05, | |
| "loss": 2.4735, | |
| "loss_": 1.0673, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4001, | |
| "step": 2646 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.0146448697104561e-05, | |
| "loss": 2.458, | |
| "loss_": 1.2381, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.4, | |
| "step": 2653 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.010282754467705e-05, | |
| "loss": 2.463, | |
| "loss_": 1.1428, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3997, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.0059204435337938e-05, | |
| "loss": 2.483, | |
| "loss_": 1.2124, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3999, | |
| "step": 2667 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.0015580199278873e-05, | |
| "loss": 2.4907, | |
| "loss_": 0.8323, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3995, | |
| "step": 2674 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.971955666712945e-06, | |
| "loss": 2.4936, | |
| "loss_": 1.1091, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3998, | |
| "step": 2681 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.928331667858886e-06, | |
| "loss": 2.5039, | |
| "loss_": 1.0505, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3998, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.884709032925274e-06, | |
| "loss": 2.4704, | |
| "loss_": 0.9685, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.3998, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.841088592094726e-06, | |
| "loss": 2.4897, | |
| "loss_": 1.2011, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3993, | |
| "step": 2702 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.797471175508101e-06, | |
| "loss": 2.4642, | |
| "loss_": 1.064, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3997, | |
| "step": 2709 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.753857613248714e-06, | |
| "loss": 2.4746, | |
| "loss_": 1.089, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3995, | |
| "step": 2716 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 9.710248735326519e-06, | |
| "loss": 2.4767, | |
| "loss_": 0.7312, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3996, | |
| "step": 2723 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.666645371662324e-06, | |
| "loss": 2.4693, | |
| "loss_": 1.0271, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3994, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.623048352071998e-06, | |
| "loss": 2.4631, | |
| "loss_": 0.7867, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3992, | |
| "step": 2737 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.579458506250668e-06, | |
| "loss": 2.4744, | |
| "loss_": 1.1123, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.3996, | |
| "step": 2744 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.535876663756955e-06, | |
| "loss": 2.4836, | |
| "loss_": 0.9437, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3994, | |
| "step": 2751 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.492303653997146e-06, | |
| "loss": 2.4822, | |
| "loss_": 0.9857, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.399, | |
| "step": 2758 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.448740306209447e-06, | |
| "loss": 2.4816, | |
| "loss_": 0.9489, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3991, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.40518744944818e-06, | |
| "loss": 2.4744, | |
| "loss_": 0.6401, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3996, | |
| "step": 2772 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 9.361645912568015e-06, | |
| "loss": 2.4736, | |
| "loss_": 0.8008, | |
| "moe_loss": 0.1602, | |
| "moe_loss_longrong": 1.3996, | |
| "step": 2779 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 9.318116524208198e-06, | |
| "loss": 2.4719, | |
| "loss_": 0.9666, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3997, | |
| "step": 2786 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 9.27460011277677e-06, | |
| "loss": 2.4865, | |
| "loss_": 1.0383, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3994, | |
| "step": 2793 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 9.231097506434808e-06, | |
| "loss": 2.4683, | |
| "loss_": 0.807, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3995, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 9.187609533080668e-06, | |
| "loss": 2.4738, | |
| "loss_": 1.0131, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3992, | |
| "step": 2807 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 9.144137020334214e-06, | |
| "loss": 2.4559, | |
| "loss_": 0.9178, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3994, | |
| "step": 2814 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 9.100680795521104e-06, | |
| "loss": 2.4832, | |
| "loss_": 0.8958, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3993, | |
| "step": 2821 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 9.057241685656995e-06, | |
| "loss": 2.4729, | |
| "loss_": 0.8244, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.399, | |
| "step": 2828 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 9.013820517431841e-06, | |
| "loss": 2.4458, | |
| "loss_": 0.6857, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3989, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8.970418117194146e-06, | |
| "loss": 2.4789, | |
| "loss_": 0.8677, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3991, | |
| "step": 2842 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8.927035310935241e-06, | |
| "loss": 2.4633, | |
| "loss_": 0.755, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.399, | |
| "step": 2849 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8.883672924273566e-06, | |
| "loss": 2.481, | |
| "loss_": 0.9947, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.399, | |
| "step": 2856 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8.840331782438954e-06, | |
| "loss": 2.4821, | |
| "loss_": 0.6639, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3988, | |
| "step": 2863 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8.797012710256923e-06, | |
| "loss": 2.4683, | |
| "loss_": 1.2205, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3992, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8.753716532132992e-06, | |
| "loss": 2.4611, | |
| "loss_": 0.9415, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3989, | |
| "step": 2877 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 8.71044407203697e-06, | |
| "loss": 2.491, | |
| "loss_": 0.9864, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.399, | |
| "step": 2884 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.667196153487308e-06, | |
| "loss": 2.4726, | |
| "loss_": 0.865, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3986, | |
| "step": 2891 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.623973599535385e-06, | |
| "loss": 2.457, | |
| "loss_": 0.8526, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.399, | |
| "step": 2898 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.580777232749883e-06, | |
| "loss": 2.4576, | |
| "loss_": 1.0116, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3988, | |
| "step": 2905 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.537607875201106e-06, | |
| "loss": 2.5113, | |
| "loss_": 0.8642, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3987, | |
| "step": 2912 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.494466348445345e-06, | |
| "loss": 2.4787, | |
| "loss_": 1.0994, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3988, | |
| "step": 2919 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.451353473509254e-06, | |
| "loss": 2.4797, | |
| "loss_": 1.1009, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3983, | |
| "step": 2926 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 8.408270070874201e-06, | |
| "loss": 2.4709, | |
| "loss_": 0.8487, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3984, | |
| "step": 2933 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 8.365216960460675e-06, | |
| "loss": 2.5019, | |
| "loss_": 0.9758, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3987, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 8.322194961612668e-06, | |
| "loss": 2.4919, | |
| "loss_": 0.9281, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3986, | |
| "step": 2947 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 8.279204893082083e-06, | |
| "loss": 2.4788, | |
| "loss_": 0.9675, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3986, | |
| "step": 2954 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 8.23624757301318e-06, | |
| "loss": 2.4796, | |
| "loss_": 0.9676, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3986, | |
| "step": 2961 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 8.193323818926955e-06, | |
| "loss": 2.4471, | |
| "loss_": 0.3893, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3986, | |
| "step": 2968 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 8.150434447705623e-06, | |
| "loss": 2.4644, | |
| "loss_": 1.0186, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3985, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 8.107580275577059e-06, | |
| "loss": 2.4517, | |
| "loss_": 1.2343, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3985, | |
| "step": 2982 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 8.064762118099258e-06, | |
| "loss": 2.4524, | |
| "loss_": 0.7846, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3985, | |
| "step": 2989 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 8.021980790144828e-06, | |
| "loss": 2.4626, | |
| "loss_": 1.0468, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3988, | |
| "step": 2996 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.979237105885467e-06, | |
| "loss": 2.4822, | |
| "loss_": 0.5538, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3985, | |
| "step": 3003 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.936531878776484e-06, | |
| "loss": 2.4753, | |
| "loss_": 1.1616, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3983, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.893865921541294e-06, | |
| "loss": 2.4418, | |
| "loss_": 0.5863, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3987, | |
| "step": 3017 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.85124004615598e-06, | |
| "loss": 2.4724, | |
| "loss_": 0.9406, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3982, | |
| "step": 3024 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.808655063833832e-06, | |
| "loss": 2.4884, | |
| "loss_": 1.0104, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3989, | |
| "step": 3031 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 7.766111785009888e-06, | |
| "loss": 2.4676, | |
| "loss_": 0.9396, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.398, | |
| "step": 3038 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 7.723611019325538e-06, | |
| "loss": 2.4705, | |
| "loss_": 0.9611, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3982, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 7.681153575613098e-06, | |
| "loss": 2.4555, | |
| "loss_": 0.931, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3986, | |
| "step": 3052 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 7.638740261880423e-06, | |
| "loss": 2.4369, | |
| "loss_": 0.7901, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3985, | |
| "step": 3059 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 7.596371885295542e-06, | |
| "loss": 2.4852, | |
| "loss_": 0.9128, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3981, | |
| "step": 3066 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 7.55404925217127e-06, | |
| "loss": 2.5004, | |
| "loss_": 1.0571, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3983, | |
| "step": 3073 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 7.511773167949885e-06, | |
| "loss": 2.4582, | |
| "loss_": 1.0777, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3986, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 7.46954443718779e-06, | |
| "loss": 2.4644, | |
| "loss_": 0.89, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3984, | |
| "step": 3087 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 7.427363863540202e-06, | |
| "loss": 2.4668, | |
| "loss_": 1.0102, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3986, | |
| "step": 3094 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 7.385232249745873e-06, | |
| "loss": 2.4733, | |
| "loss_": 0.6698, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3988, | |
| "step": 3101 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 7.343150397611782e-06, | |
| "loss": 2.5122, | |
| "loss_": 1.2655, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3979, | |
| "step": 3108 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 7.301119107997905e-06, | |
| "loss": 2.461, | |
| "loss_": 1.1851, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3978, | |
| "step": 3115 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 7.2591391808019555e-06, | |
| "loss": 2.4727, | |
| "loss_": 0.8541, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3979, | |
| "step": 3122 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 7.217211414944171e-06, | |
| "loss": 2.4443, | |
| "loss_": 1.0654, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3978, | |
| "step": 3129 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 7.175336608352113e-06, | |
| "loss": 2.4922, | |
| "loss_": 1.184, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3982, | |
| "step": 3136 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 7.133515557945463e-06, | |
| "loss": 2.4643, | |
| "loss_": 1.1851, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3981, | |
| "step": 3143 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 7.091749059620881e-06, | |
| "loss": 2.4581, | |
| "loss_": 1.328, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3982, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 7.0500379082368305e-06, | |
| "loss": 2.4708, | |
| "loss_": 1.0728, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.398, | |
| "step": 3157 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 7.008382897598477e-06, | |
| "loss": 2.4901, | |
| "loss_": 0.9315, | |
| "moe_loss": 0.1606, | |
| "moe_loss_longrong": 1.4088, | |
| "step": 3164 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 6.9667848204425785e-06, | |
| "loss": 2.4706, | |
| "loss_": 1.1113, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3979, | |
| "step": 3171 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 6.9252444684223765e-06, | |
| "loss": 2.4442, | |
| "loss_": 0.7937, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3977, | |
| "step": 3178 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 6.88376263209255e-06, | |
| "loss": 2.4406, | |
| "loss_": 1.1277, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3982, | |
| "step": 3185 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 6.84234010089417e-06, | |
| "loss": 2.4761, | |
| "loss_": 0.9565, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3982, | |
| "step": 3192 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.800977663139666e-06, | |
| "loss": 2.4832, | |
| "loss_": 0.7461, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4073, | |
| "step": 3199 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.759676105997834e-06, | |
| "loss": 2.4752, | |
| "loss_": 1.1396, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3978, | |
| "step": 3206 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.718436215478849e-06, | |
| "loss": 2.4594, | |
| "loss_": 1.1075, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3978, | |
| "step": 3213 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.677258776419304e-06, | |
| "loss": 2.4703, | |
| "loss_": 0.9133, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3975, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.63614457246728e-06, | |
| "loss": 2.4534, | |
| "loss_": 0.9049, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3978, | |
| "step": 3227 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.595094386067428e-06, | |
| "loss": 2.4945, | |
| "loss_": 1.1701, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3975, | |
| "step": 3234 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.554108998446096e-06, | |
| "loss": 2.4832, | |
| "loss_": 0.9606, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3977, | |
| "step": 3241 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.513189189596422e-06, | |
| "loss": 2.4639, | |
| "loss_": 1.0635, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3978, | |
| "step": 3248 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 6.472335738263534e-06, | |
| "loss": 2.4609, | |
| "loss_": 0.9759, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3982, | |
| "step": 3255 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 6.431549421929694e-06, | |
| "loss": 2.4641, | |
| "loss_": 0.7825, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.398, | |
| "step": 3262 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 6.390831016799527e-06, | |
| "loss": 2.458, | |
| "loss_": 0.8033, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3977, | |
| "step": 3269 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 6.350181297785242e-06, | |
| "loss": 2.4584, | |
| "loss_": 1.0825, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3974, | |
| "step": 3276 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 6.309601038491874e-06, | |
| "loss": 2.4911, | |
| "loss_": 0.7566, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3979, | |
| "step": 3283 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 6.269091011202576e-06, | |
| "loss": 2.457, | |
| "loss_": 0.6181, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3977, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 6.2286519868639095e-06, | |
| "loss": 2.4458, | |
| "loss_": 1.0953, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3975, | |
| "step": 3297 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.188284735071177e-06, | |
| "loss": 2.4848, | |
| "loss_": 0.9802, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3976, | |
| "step": 3304 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.1479900240537956e-06, | |
| "loss": 2.4815, | |
| "loss_": 1.2048, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3977, | |
| "step": 3311 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.107768620660633e-06, | |
| "loss": 2.4552, | |
| "loss_": 1.039, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3976, | |
| "step": 3318 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.067621290345455e-06, | |
| "loss": 2.4365, | |
| "loss_": 1.2129, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3974, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.027548797152336e-06, | |
| "loss": 2.4546, | |
| "loss_": 0.6566, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3973, | |
| "step": 3332 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 5.987551903701128e-06, | |
| "loss": 2.4409, | |
| "loss_": 1.0142, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.408, | |
| "step": 3339 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 5.947631371172943e-06, | |
| "loss": 2.4488, | |
| "loss_": 0.9704, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.408, | |
| "step": 3346 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 5.9077879592956675e-06, | |
| "loss": 2.4569, | |
| "loss_": 0.936, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3972, | |
| "step": 3353 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 5.8680224263295045e-06, | |
| "loss": 2.4519, | |
| "loss_": 0.9728, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3972, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 5.828335529052541e-06, | |
| "loss": 2.4757, | |
| "loss_": 0.9242, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3972, | |
| "step": 3367 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 5.788728022746348e-06, | |
| "loss": 2.4769, | |
| "loss_": 0.8005, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3976, | |
| "step": 3374 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 5.749200661181611e-06, | |
| "loss": 2.4548, | |
| "loss_": 1.116, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3971, | |
| "step": 3381 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 5.709754196603781e-06, | |
| "loss": 2.4687, | |
| "loss_": 0.8613, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3971, | |
| "step": 3388 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 5.67038937971875e-06, | |
| "loss": 2.437, | |
| "loss_": 0.9275, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3974, | |
| "step": 3395 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 5.631106959678575e-06, | |
| "loss": 2.4636, | |
| "loss_": 1.1476, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3974, | |
| "step": 3402 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 5.5919076840672215e-06, | |
| "loss": 2.449, | |
| "loss_": 0.9428, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.397, | |
| "step": 3409 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 5.552792298886335e-06, | |
| "loss": 2.4572, | |
| "loss_": 0.8202, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3973, | |
| "step": 3416 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 5.513761548541032e-06, | |
| "loss": 2.444, | |
| "loss_": 0.905, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3973, | |
| "step": 3423 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 5.474816175825754e-06, | |
| "loss": 2.4189, | |
| "loss_": 1.1022, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3973, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 5.4359569219101115e-06, | |
| "loss": 2.5038, | |
| "loss_": 1.1099, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3969, | |
| "step": 3437 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 5.397184526324792e-06, | |
| "loss": 2.4885, | |
| "loss_": 0.9227, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3973, | |
| "step": 3444 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 5.358499726947488e-06, | |
| "loss": 2.4389, | |
| "loss_": 0.9602, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3971, | |
| "step": 3451 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.31990325998883e-06, | |
| "loss": 2.4275, | |
| "loss_": 0.9191, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3969, | |
| "step": 3458 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.281395859978414e-06, | |
| "loss": 2.4647, | |
| "loss_": 1.0229, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.397, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.24297825975079e-06, | |
| "loss": 2.4649, | |
| "loss_": 0.9973, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3973, | |
| "step": 3472 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.2046511904315265e-06, | |
| "loss": 2.4409, | |
| "loss_": 0.6513, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3969, | |
| "step": 3479 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.166415381423306e-06, | |
| "loss": 2.4805, | |
| "loss_": 1.1712, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3971, | |
| "step": 3486 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.128271560392037e-06, | |
| "loss": 2.4496, | |
| "loss_": 1.0721, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3971, | |
| "step": 3493 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.09022045325299e-06, | |
| "loss": 2.473, | |
| "loss_": 1.1122, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3971, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.052262784157014e-06, | |
| "loss": 2.4654, | |
| "loss_": 1.0388, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.397, | |
| "step": 3507 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5.014399275476721e-06, | |
| "loss": 2.463, | |
| "loss_": 1.0244, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3968, | |
| "step": 3514 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.976630647792771e-06, | |
| "loss": 2.4481, | |
| "loss_": 0.7509, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3972, | |
| "step": 3521 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.938957619880138e-06, | |
| "loss": 2.4624, | |
| "loss_": 1.0897, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.397, | |
| "step": 3528 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.901380908694434e-06, | |
| "loss": 2.4236, | |
| "loss_": 1.1599, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.397, | |
| "step": 3535 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.863901229358261e-06, | |
| "loss": 2.4483, | |
| "loss_": 0.8951, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3971, | |
| "step": 3542 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.8265192951476206e-06, | |
| "loss": 2.4552, | |
| "loss_": 0.9006, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3969, | |
| "step": 3549 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 4.789235817478322e-06, | |
| "loss": 2.457, | |
| "loss_": 1.0357, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3969, | |
| "step": 3556 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.752051505892438e-06, | |
| "loss": 2.462, | |
| "loss_": 1.031, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3968, | |
| "step": 3563 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.714967068044826e-06, | |
| "loss": 2.459, | |
| "loss_": 1.2418, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3969, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.677983209689631e-06, | |
| "loss": 2.4449, | |
| "loss_": 0.7941, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3973, | |
| "step": 3577 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.641100634666877e-06, | |
| "loss": 2.4528, | |
| "loss_": 0.7962, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.397, | |
| "step": 3584 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.6043200448890724e-06, | |
| "loss": 2.4674, | |
| "loss_": 1.0349, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3967, | |
| "step": 3591 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.567642140327823e-06, | |
| "loss": 2.4498, | |
| "loss_": 0.9343, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3966, | |
| "step": 3598 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.531067619000553e-06, | |
| "loss": 2.4711, | |
| "loss_": 0.7285, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3967, | |
| "step": 3605 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.494597176957186e-06, | |
| "loss": 2.4578, | |
| "loss_": 0.6286, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3968, | |
| "step": 3612 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.458231508266912e-06, | |
| "loss": 2.4736, | |
| "loss_": 0.8458, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4062, | |
| "step": 3619 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.421971305004989e-06, | |
| "loss": 2.4841, | |
| "loss_": 0.7491, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3965, | |
| "step": 3626 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.385817257239556e-06, | |
| "loss": 2.4332, | |
| "loss_": 0.9237, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4068, | |
| "step": 3633 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.349770053018502e-06, | |
| "loss": 2.4673, | |
| "loss_": 0.9196, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3968, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.313830378356384e-06, | |
| "loss": 2.4438, | |
| "loss_": 1.0917, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.397, | |
| "step": 3647 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.277998917221354e-06, | |
| "loss": 2.4672, | |
| "loss_": 0.8497, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4064, | |
| "step": 3654 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.242276351522161e-06, | |
| "loss": 2.4468, | |
| "loss_": 0.8331, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3969, | |
| "step": 3661 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.206663361095164e-06, | |
| "loss": 2.4639, | |
| "loss_": 0.9817, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3963, | |
| "step": 3668 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.171160623691384e-06, | |
| "loss": 2.4403, | |
| "loss_": 1.0819, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3967, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.135768814963622e-06, | |
| "loss": 2.4281, | |
| "loss_": 0.9681, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3966, | |
| "step": 3682 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.100488608453599e-06, | |
| "loss": 2.4383, | |
| "loss_": 0.8748, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3966, | |
| "step": 3689 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.065320675579132e-06, | |
| "loss": 2.4811, | |
| "loss_": 1.2776, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3966, | |
| "step": 3696 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.03026568562135e-06, | |
| "loss": 2.4559, | |
| "loss_": 0.8804, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3967, | |
| "step": 3703 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.995324305711976e-06, | |
| "loss": 2.4263, | |
| "loss_": 0.9593, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3968, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.9604972008206085e-06, | |
| "loss": 2.4698, | |
| "loss_": 1.2848, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3967, | |
| "step": 3717 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.9257850337420856e-06, | |
| "loss": 2.4923, | |
| "loss_": 1.0082, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3965, | |
| "step": 3724 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.891188465083865e-06, | |
| "loss": 2.4502, | |
| "loss_": 1.0253, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4059, | |
| "step": 3731 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.8567081532534374e-06, | |
| "loss": 2.4543, | |
| "loss_": 0.6744, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3962, | |
| "step": 3738 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.822344754445826e-06, | |
| "loss": 2.4628, | |
| "loss_": 1.0211, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3968, | |
| "step": 3745 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.788098922631067e-06, | |
| "loss": 2.4765, | |
| "loss_": 1.0228, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3968, | |
| "step": 3752 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.753971309541784e-06, | |
| "loss": 2.4717, | |
| "loss_": 0.846, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3967, | |
| "step": 3759 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 3.719962564660783e-06, | |
| "loss": 2.4447, | |
| "loss_": 0.4578, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3973, | |
| "step": 3766 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.6860733352086866e-06, | |
| "loss": 2.4563, | |
| "loss_": 0.8938, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3965, | |
| "step": 3773 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.652304266131612e-06, | |
| "loss": 2.4641, | |
| "loss_": 0.9597, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3966, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.618656000088916e-06, | |
| "loss": 2.4801, | |
| "loss_": 0.7477, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3968, | |
| "step": 3787 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.585129177440938e-06, | |
| "loss": 2.4649, | |
| "loss_": 1.1009, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3965, | |
| "step": 3794 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.5517244362368363e-06, | |
| "loss": 2.4828, | |
| "loss_": 1.1634, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3966, | |
| "step": 3801 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.5184424122024406e-06, | |
| "loss": 2.4532, | |
| "loss_": 1.1849, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3965, | |
| "step": 3808 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 3.485283738728139e-06, | |
| "loss": 2.4494, | |
| "loss_": 0.8625, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3965, | |
| "step": 3815 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.452249046856836e-06, | |
| "loss": 2.4715, | |
| "loss_": 1.012, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4064, | |
| "step": 3822 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.4193389652719478e-06, | |
| "loss": 2.4256, | |
| "loss_": 1.0154, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3961, | |
| "step": 3829 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.3865541202854314e-06, | |
| "loss": 2.4636, | |
| "loss_": 1.1752, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3964, | |
| "step": 3836 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.353895135825854e-06, | |
| "loss": 2.442, | |
| "loss_": 0.8945, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3964, | |
| "step": 3843 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.321362633426547e-06, | |
| "loss": 2.4677, | |
| "loss_": 0.9853, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3963, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.2889572322137454e-06, | |
| "loss": 2.4633, | |
| "loss_": 1.2634, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3961, | |
| "step": 3857 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.256679548894831e-06, | |
| "loss": 2.4637, | |
| "loss_": 0.8044, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3965, | |
| "step": 3864 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.224530197746587e-06, | |
| "loss": 2.4343, | |
| "loss_": 0.8018, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3968, | |
| "step": 3871 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.1925097906034962e-06, | |
| "loss": 2.4328, | |
| "loss_": 0.8425, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4055, | |
| "step": 3878 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.1606189368461117e-06, | |
| "loss": 2.4648, | |
| "loss_": 0.9644, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3964, | |
| "step": 3885 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.128858243389461e-06, | |
| "loss": 2.4541, | |
| "loss_": 0.6231, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3966, | |
| "step": 3892 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.097228314671481e-06, | |
| "loss": 2.476, | |
| "loss_": 0.9949, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3961, | |
| "step": 3899 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.065729752641532e-06, | |
| "loss": 2.4229, | |
| "loss_": 0.8875, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3962, | |
| "step": 3906 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.034363156748933e-06, | |
| "loss": 2.4502, | |
| "loss_": 0.9087, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4054, | |
| "step": 3913 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.0031291239315473e-06, | |
| "loss": 2.4367, | |
| "loss_": 0.8938, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3965, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.9720282486044407e-06, | |
| "loss": 2.471, | |
| "loss_": 0.742, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3963, | |
| "step": 3927 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.941061122648545e-06, | |
| "loss": 2.4598, | |
| "loss_": 0.6142, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3966, | |
| "step": 3934 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.910228335399419e-06, | |
| "loss": 2.4532, | |
| "loss_": 0.9248, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3961, | |
| "step": 3941 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.8795304736360184e-06, | |
| "loss": 2.4694, | |
| "loss_": 0.876, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3961, | |
| "step": 3948 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.8489681215695242e-06, | |
| "loss": 2.4464, | |
| "loss_": 1.0146, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3961, | |
| "step": 3955 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.8185418608322344e-06, | |
| "loss": 2.4632, | |
| "loss_": 0.7415, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3964, | |
| "step": 3962 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.788252270466497e-06, | |
| "loss": 2.4575, | |
| "loss_": 1.1931, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3963, | |
| "step": 3969 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 2.7580999269136854e-06, | |
| "loss": 2.4825, | |
| "loss_": 0.9967, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3961, | |
| "step": 3976 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.728085404003217e-06, | |
| "loss": 2.4658, | |
| "loss_": 0.9402, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3961, | |
| "step": 3983 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.698209272941659e-06, | |
| "loss": 2.4466, | |
| "loss_": 1.1097, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3963, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.668472102301829e-06, | |
| "loss": 2.4544, | |
| "loss_": 1.018, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3962, | |
| "step": 3997 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.6388744580119975e-06, | |
| "loss": 2.4195, | |
| "loss_": 0.9804, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3961, | |
| "step": 4004 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.6094169033451066e-06, | |
| "loss": 2.4628, | |
| "loss_": 0.7708, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3963, | |
| "step": 4011 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.580099998908049e-06, | |
| "loss": 2.4624, | |
| "loss_": 0.6729, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3962, | |
| "step": 4018 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.5509243026309983e-06, | |
| "loss": 2.4753, | |
| "loss_": 1.177, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3961, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.5218903697568075e-06, | |
| "loss": 2.4669, | |
| "loss_": 1.1103, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.396, | |
| "step": 4032 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.4929987528304144e-06, | |
| "loss": 2.4671, | |
| "loss_": 1.3009, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3962, | |
| "step": 4039 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.4642500016883532e-06, | |
| "loss": 2.4649, | |
| "loss_": 0.4641, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3964, | |
| "step": 4046 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.4356446634482756e-06, | |
| "loss": 2.4255, | |
| "loss_": 0.7561, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3963, | |
| "step": 4053 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.407183282498534e-06, | |
| "loss": 2.4512, | |
| "loss_": 1.0891, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.396, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.3788664004878405e-06, | |
| "loss": 2.4548, | |
| "loss_": 0.8427, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3964, | |
| "step": 4067 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.350694556314934e-06, | |
| "loss": 2.4775, | |
| "loss_": 1.1603, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.396, | |
| "step": 4074 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.32266828611835e-06, | |
| "loss": 2.4762, | |
| "loss_": 0.982, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.396, | |
| "step": 4081 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.2947881232662007e-06, | |
| "loss": 2.4574, | |
| "loss_": 0.6854, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3965, | |
| "step": 4088 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.2670545983460245e-06, | |
| "loss": 2.4641, | |
| "loss_": 1.1094, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.396, | |
| "step": 4095 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.2394682391546928e-06, | |
| "loss": 2.4546, | |
| "loss_": 0.8832, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3962, | |
| "step": 4102 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.2120295706883698e-06, | |
| "loss": 2.4228, | |
| "loss_": 0.534, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3966, | |
| "step": 4109 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.184739115132517e-06, | |
| "loss": 2.4502, | |
| "loss_": 0.6129, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 4116 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.157597391851949e-06, | |
| "loss": 2.4514, | |
| "loss_": 0.8927, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4045, | |
| "step": 4123 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.130604917380962e-06, | |
| "loss": 2.4434, | |
| "loss_": 0.87, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.103762205413493e-06, | |
| "loss": 2.4475, | |
| "loss_": 1.1291, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3961, | |
| "step": 4137 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.0770697667933436e-06, | |
| "loss": 2.4697, | |
| "loss_": 0.8274, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3961, | |
| "step": 4144 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.0505281095044804e-06, | |
| "loss": 2.4725, | |
| "loss_": 1.0877, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.396, | |
| "step": 4151 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.024137738661329e-06, | |
| "loss": 2.4757, | |
| "loss_": 0.6894, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3962, | |
| "step": 4158 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.997899156499191e-06, | |
| "loss": 2.4566, | |
| "loss_": 0.7625, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3962, | |
| "step": 4165 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.9718128623646792e-06, | |
| "loss": 2.514, | |
| "loss_": 1.3132, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4172 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.9458793527062035e-06, | |
| "loss": 2.4659, | |
| "loss_": 0.6083, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3961, | |
| "step": 4179 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.9200991210645394e-06, | |
| "loss": 2.441, | |
| "loss_": 0.8687, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3964, | |
| "step": 4186 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.8944726580634287e-06, | |
| "loss": 2.4227, | |
| "loss_": 0.863, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4043, | |
| "step": 4193 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.8690004514002314e-06, | |
| "loss": 2.4488, | |
| "loss_": 1.0513, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3962, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.8436829858366655e-06, | |
| "loss": 2.4269, | |
| "loss_": 0.9285, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 4207 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.8185207431895613e-06, | |
| "loss": 2.4577, | |
| "loss_": 1.0791, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4214 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.7935142023217056e-06, | |
| "loss": 2.4565, | |
| "loss_": 1.0052, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4221 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.768663839132727e-06, | |
| "loss": 2.4314, | |
| "loss_": 0.7553, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 4228 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.7439701265500274e-06, | |
| "loss": 2.432, | |
| "loss_": 1.137, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4235 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.7194335345197933e-06, | |
| "loss": 2.466, | |
| "loss_": 0.804, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3963, | |
| "step": 4242 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.6950545299980526e-06, | |
| "loss": 2.4119, | |
| "loss_": 0.8362, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3961, | |
| "step": 4249 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.6708335769417827e-06, | |
| "loss": 2.4555, | |
| "loss_": 0.8946, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4256 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.6467711363000794e-06, | |
| "loss": 2.434, | |
| "loss_": 1.1374, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4263 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.6228676660053932e-06, | |
| "loss": 2.4705, | |
| "loss_": 1.07, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.5991236209648052e-06, | |
| "loss": 2.4467, | |
| "loss_": 0.5343, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3962, | |
| "step": 4277 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.575539453051369e-06, | |
| "loss": 2.4617, | |
| "loss_": 1.2505, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4284 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.5521156110955293e-06, | |
| "loss": 2.4389, | |
| "loss_": 1.0836, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 4291 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.5288525408765564e-06, | |
| "loss": 2.4877, | |
| "loss_": 0.8473, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 4298 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.5057506851140701e-06, | |
| "loss": 2.4786, | |
| "loss_": 1.0259, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4305 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.4828104834596268e-06, | |
| "loss": 2.4086, | |
| "loss_": 0.5643, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3965, | |
| "step": 4312 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.4600323724883337e-06, | |
| "loss": 2.4481, | |
| "loss_": 1.0485, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 4319 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.4374167856905542e-06, | |
| "loss": 2.4386, | |
| "loss_": 0.9296, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.396, | |
| "step": 4326 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.414964153463655e-06, | |
| "loss": 2.4538, | |
| "loss_": 0.7446, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 4333 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.3926749031038055e-06, | |
| "loss": 2.4252, | |
| "loss_": 1.0624, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3963, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.370549458797863e-06, | |
| "loss": 2.4477, | |
| "loss_": 1.0075, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4347 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.3485882416152819e-06, | |
| "loss": 2.4224, | |
| "loss_": 0.9794, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 4354 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.3267916695001172e-06, | |
| "loss": 2.4571, | |
| "loss_": 0.9473, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3954, | |
| "step": 4361 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.3051601572630611e-06, | |
| "loss": 2.449, | |
| "loss_": 1.1259, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4368 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.283694116573546e-06, | |
| "loss": 2.4477, | |
| "loss_": 1.0313, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.2623939559519161e-06, | |
| "loss": 2.46, | |
| "loss_": 0.888, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4382 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.2412600807616526e-06, | |
| "loss": 2.4559, | |
| "loss_": 0.9206, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4389 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.2202928932016588e-06, | |
| "loss": 2.4259, | |
| "loss_": 0.8758, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4396 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.1994927922985999e-06, | |
| "loss": 2.4477, | |
| "loss_": 0.8513, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3961, | |
| "step": 4403 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.178860173899321e-06, | |
| "loss": 2.4408, | |
| "loss_": 1.0152, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3955, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.1583954306633004e-06, | |
| "loss": 2.4442, | |
| "loss_": 1.1666, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.396, | |
| "step": 4417 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.138098952055181e-06, | |
| "loss": 2.4404, | |
| "loss_": 0.6781, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 4424 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.1179711243373736e-06, | |
| "loss": 2.4439, | |
| "loss_": 0.8599, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3962, | |
| "step": 4431 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.0980123305626812e-06, | |
| "loss": 2.4635, | |
| "loss_": 1.0466, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4055, | |
| "step": 4438 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.0782229505670195e-06, | |
| "loss": 2.4436, | |
| "loss_": 1.1018, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4445 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.0586033609622004e-06, | |
| "loss": 2.4521, | |
| "loss_": 0.9168, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3954, | |
| "step": 4452 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.039153935128744e-06, | |
| "loss": 2.4435, | |
| "loss_": 1.1978, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3955, | |
| "step": 4459 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.0198750432087855e-06, | |
| "loss": 2.4683, | |
| "loss_": 1.2032, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.3955, | |
| "step": 4466 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.0007670520990331e-06, | |
| "loss": 2.4688, | |
| "loss_": 1.0949, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4473 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 9.818303254437723e-07, | |
| "loss": 2.459, | |
| "loss_": 1.3033, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 9.630652236279626e-07, | |
| "loss": 2.4758, | |
| "loss_": 0.8537, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4487 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 9.444721037703597e-07, | |
| "loss": 2.4579, | |
| "loss_": 0.6901, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 4494 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 9.260513197167398e-07, | |
| "loss": 2.4456, | |
| "loss_": 1.0315, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4501 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 9.078032220331523e-07, | |
| "loss": 2.4451, | |
| "loss_": 1.1487, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4508 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 8.897281579992467e-07, | |
| "loss": 2.4204, | |
| "loss_": 1.1135, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4515 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 8.718264716016722e-07, | |
| "loss": 2.4218, | |
| "loss_": 0.8637, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3955, | |
| "step": 4522 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 8.540985035275273e-07, | |
| "loss": 2.447, | |
| "loss_": 0.8818, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.3955, | |
| "step": 4529 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 8.365445911578785e-07, | |
| "loss": 2.4654, | |
| "loss_": 0.9987, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3955, | |
| "step": 4536 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 8.191650685613273e-07, | |
| "loss": 2.4603, | |
| "loss_": 1.1883, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3956, | |
| "step": 4543 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 8.019602664876758e-07, | |
| "loss": 2.4475, | |
| "loss_": 1.0646, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 7.849305123616091e-07, | |
| "loss": 2.4486, | |
| "loss_": 0.8589, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4557 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 7.680761302764727e-07, | |
| "loss": 2.4336, | |
| "loss_": 1.0525, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4564 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 7.513974409881186e-07, | |
| "loss": 2.4663, | |
| "loss_": 1.0924, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4571 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 7.348947619087754e-07, | |
| "loss": 2.4417, | |
| "loss_": 1.0197, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4578 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 7.185684071010224e-07, | |
| "loss": 2.4364, | |
| "loss_": 0.9028, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3956, | |
| "step": 4585 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 7.024186872718164e-07, | |
| "loss": 2.4733, | |
| "loss_": 0.5258, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3961, | |
| "step": 4592 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.864459097665654e-07, | |
| "loss": 2.4453, | |
| "loss_": 0.9338, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4599 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 6.706503785632934e-07, | |
| "loss": 2.432, | |
| "loss_": 1.1129, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.3956, | |
| "step": 4606 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 6.550323942668469e-07, | |
| "loss": 2.4297, | |
| "loss_": 0.6761, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3963, | |
| "step": 4613 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 6.395922541031741e-07, | |
| "loss": 2.4152, | |
| "loss_": 0.8792, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3956, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 6.24330251913674e-07, | |
| "loss": 2.4526, | |
| "loss_": 1.1836, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.396, | |
| "step": 4627 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 6.092466781495976e-07, | |
| "loss": 2.4362, | |
| "loss_": 0.9499, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4634 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.943418198665251e-07, | |
| "loss": 2.4439, | |
| "loss_": 1.1622, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4641 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.796159607189001e-07, | |
| "loss": 2.4273, | |
| "loss_": 0.9876, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3956, | |
| "step": 4648 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.650693809546348e-07, | |
| "loss": 2.4735, | |
| "loss_": 1.1284, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.3956, | |
| "step": 4655 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.507023574097725e-07, | |
| "loss": 2.4393, | |
| "loss_": 0.8675, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4662 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.365151635032218e-07, | |
| "loss": 2.4482, | |
| "loss_": 0.901, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3955, | |
| "step": 4669 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.225080692315532e-07, | |
| "loss": 2.441, | |
| "loss_": 1.0355, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3956, | |
| "step": 4676 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.086813411638581e-07, | |
| "loss": 2.4277, | |
| "loss_": 0.9478, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.3955, | |
| "step": 4683 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.9503524243668e-07, | |
| "loss": 2.444, | |
| "loss_": 0.8901, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4048, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.815700327490014e-07, | |
| "loss": 2.4286, | |
| "loss_": 0.8906, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4697 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.6828596835730487e-07, | |
| "loss": 2.4475, | |
| "loss_": 1.028, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4704 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.551833020707008e-07, | |
| "loss": 2.4281, | |
| "loss_": 0.6545, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4711 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.4226228324610544e-07, | |
| "loss": 2.4677, | |
| "loss_": 0.9228, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.3954, | |
| "step": 4718 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.295231577835024e-07, | |
| "loss": 2.443, | |
| "loss_": 0.8677, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.1696616812126333e-07, | |
| "loss": 2.4452, | |
| "loss_": 0.8619, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4732 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.0459155323153034e-07, | |
| "loss": 2.4501, | |
| "loss_": 0.5721, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 4739 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.9239954861567177e-07, | |
| "loss": 2.4452, | |
| "loss_": 1.2849, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4746 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 3.803903862998004e-07, | |
| "loss": 2.4681, | |
| "loss_": 1.0272, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4052, | |
| "step": 4753 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.685642948303503e-07, | |
| "loss": 2.4437, | |
| "loss_": 0.929, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.5692149926974006e-07, | |
| "loss": 2.4502, | |
| "loss_": 1.1455, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4767 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.454622211920766e-07, | |
| "loss": 2.4262, | |
| "loss_": 0.6494, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3955, | |
| "step": 4774 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.341866786789505e-07, | |
| "loss": 2.4259, | |
| "loss_": 1.113, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 4781 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.2309508631527486e-07, | |
| "loss": 2.4309, | |
| "loss_": 0.8977, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4788 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.121876551852099e-07, | |
| "loss": 2.4311, | |
| "loss_": 1.0739, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.396, | |
| "step": 4795 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.0146459286813924e-07, | |
| "loss": 2.4515, | |
| "loss_": 0.9781, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 4802 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.909261034347255e-07, | |
| "loss": 2.4553, | |
| "loss_": 0.9123, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 4809 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.8057238744301994e-07, | |
| "loss": 2.4516, | |
| "loss_": 1.131, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.396, | |
| "step": 4816 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.704036419346534e-07, | |
| "loss": 2.4628, | |
| "loss_": 1.0138, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4051, | |
| "step": 4823 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.604200604310825e-07, | |
| "loss": 2.4657, | |
| "loss_": 1.0133, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3955, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.506218329299026e-07, | |
| "loss": 2.4311, | |
| "loss_": 1.1114, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.396, | |
| "step": 4837 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.410091459012376e-07, | |
| "loss": 2.4529, | |
| "loss_": 1.0407, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.396, | |
| "step": 4844 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.3158218228419127e-07, | |
| "loss": 2.4564, | |
| "loss_": 1.061, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3955, | |
| "step": 4851 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.2234112148336373e-07, | |
| "loss": 2.4584, | |
| "loss_": 0.735, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3956, | |
| "step": 4858 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.1328613936543396e-07, | |
| "loss": 2.425, | |
| "loss_": 0.9476, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3962, | |
| "step": 4865 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.0441740825582258e-07, | |
| "loss": 2.4643, | |
| "loss_": 1.0806, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3956, | |
| "step": 4872 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.9573509693540104e-07, | |
| "loss": 2.4676, | |
| "loss_": 0.825, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.3954, | |
| "step": 4879 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.872393706372866e-07, | |
| "loss": 2.4485, | |
| "loss_": 0.8625, | |
| "moe_loss": 0.1605, | |
| "moe_loss_longrong": 1.4046, | |
| "step": 4886 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.789303910436968e-07, | |
| "loss": 2.4413, | |
| "loss_": 0.7278, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4893 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.7080831628286886e-07, | |
| "loss": 2.4452, | |
| "loss_": 0.812, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3961, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.6287330092605525e-07, | |
| "loss": 2.4596, | |
| "loss_": 0.6445, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 4907 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.551254959845805e-07, | |
| "loss": 2.4541, | |
| "loss_": 1.0014, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4914 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.4756504890696466e-07, | |
| "loss": 2.4342, | |
| "loss_": 1.1963, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4921 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.401921035761189e-07, | |
| "loss": 2.4346, | |
| "loss_": 0.8071, | |
| "moe_loss": 0.1604, | |
| "moe_loss_longrong": 1.4051, | |
| "step": 4928 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.3300680030661096e-07, | |
| "loss": 2.464, | |
| "loss_": 1.1756, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4935 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.2600927584198618e-07, | |
| "loss": 2.4334, | |
| "loss_": 1.0679, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3954, | |
| "step": 4942 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.1919966335217636e-07, | |
| "loss": 2.4779, | |
| "loss_": 0.8872, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.3956, | |
| "step": 4949 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.1257809243095385e-07, | |
| "loss": 2.4339, | |
| "loss_": 0.8956, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3956, | |
| "step": 4956 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.0614468909347476e-07, | |
| "loss": 2.4414, | |
| "loss_": 1.1397, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3956, | |
| "step": 4963 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 9.989957577387521e-08, | |
| "loss": 2.4253, | |
| "loss_": 0.8755, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3955, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 9.384287132294223e-08, | |
| "loss": 2.4599, | |
| "loss_": 0.9577, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.3954, | |
| "step": 4977 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 8.797469100585432e-08, | |
| "loss": 2.4615, | |
| "loss_": 0.7768, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4984 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 8.229514649998438e-08, | |
| "loss": 2.4414, | |
| "loss_": 1.0058, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.3956, | |
| "step": 4991 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 7.680434589277696e-08, | |
| "loss": 2.4587, | |
| "loss_": 1.0013, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 4998 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 7.150239367969102e-08, | |
| "loss": 2.4314, | |
| "loss_": 0.9539, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 5005 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 6.63893907622104e-08, | |
| "loss": 2.4711, | |
| "loss_": 0.9778, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3954, | |
| "step": 5012 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 6.14654344459209e-08, | |
| "loss": 2.4558, | |
| "loss_": 0.9942, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 5019 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 5.673061843866623e-08, | |
| "loss": 2.4748, | |
| "loss_": 0.7836, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 5026 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 5.218503284875609e-08, | |
| "loss": 2.4418, | |
| "loss_": 1.0962, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3955, | |
| "step": 5033 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.7828764183257545e-08, | |
| "loss": 2.4128, | |
| "loss_": 0.9561, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.366189534634191e-08, | |
| "loss": 2.4604, | |
| "loss_": 0.9591, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.3955, | |
| "step": 5047 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.9684505637718194e-08, | |
| "loss": 2.4619, | |
| "loss_": 1.1709, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 5054 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.589667075110992e-08, | |
| "loss": 2.4199, | |
| "loss_": 1.0163, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 5061 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.229846277282511e-08, | |
| "loss": 2.4621, | |
| "loss_": 0.9223, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 5068 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.8889950180382985e-08, | |
| "loss": 2.4625, | |
| "loss_": 0.4562, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.5671197841203867e-08, | |
| "loss": 2.4386, | |
| "loss_": 1.0035, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3958, | |
| "step": 5082 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.264226701138461e-08, | |
| "loss": 2.4681, | |
| "loss_": 1.218, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.3955, | |
| "step": 5089 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.9803215334522895e-08, | |
| "loss": 2.427, | |
| "loss_": 0.7696, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3961, | |
| "step": 5096 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.7154096840629186e-08, | |
| "loss": 2.4851, | |
| "loss_": 1.054, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3956, | |
| "step": 5103 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.4694961945093122e-08, | |
| "loss": 2.4448, | |
| "loss_": 0.8593, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.2425857447725397e-08, | |
| "loss": 2.454, | |
| "loss_": 0.8875, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3956, | |
| "step": 5117 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.0346826531865139e-08, | |
| "loss": 2.4289, | |
| "loss_": 0.9578, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 5124 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 8.457908763562783e-09, | |
| "loss": 2.4436, | |
| "loss_": 1.026, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3956, | |
| "step": 5131 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 6.759140090824012e-09, | |
| "loss": 2.4605, | |
| "loss_": 0.6569, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3954, | |
| "step": 5138 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 5.25055284292475e-09, | |
| "loss": 2.4668, | |
| "loss_": 1.2459, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 5145 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.932175729797205e-09, | |
| "loss": 2.4129, | |
| "loss_": 0.9565, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 5152 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.8040338414847545e-09, | |
| "loss": 2.4469, | |
| "loss_": 0.8875, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3957, | |
| "step": 5159 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.8661486476612144e-09, | |
| "loss": 2.4186, | |
| "loss_": 0.7829, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.396, | |
| "step": 5166 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.1185379972256105e-09, | |
| "loss": 2.4351, | |
| "loss_": 0.7496, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3956, | |
| "step": 5173 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 5.612161179613385e-10, | |
| "loss": 2.4242, | |
| "loss_": 0.9366, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3956, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.9419361626416e-10, | |
| "loss": 2.4684, | |
| "loss_": 1.0884, | |
| "moe_loss": 0.16, | |
| "moe_loss_longrong": 1.3956, | |
| "step": 5187 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.7477476940142013e-11, | |
| "loss": 2.4801, | |
| "loss_": 1.0033, | |
| "moe_loss": 0.1601, | |
| "moe_loss_longrong": 1.3959, | |
| "step": 5194 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 5197, | |
| "total_flos": 8.818772994970092e+18, | |
| "train_loss": 2.4979199135512236, | |
| "train_runtime": 95362.7285, | |
| "train_samples_per_second": 6.976, | |
| "train_steps_per_second": 0.054 | |
| } | |
| ], | |
| "logging_steps": 7, | |
| "max_steps": 5197, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000, | |
| "total_flos": 8.818772994970092e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |