| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9999059885306008, |
| "eval_steps": 500, |
| "global_step": 7977, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 5.833333333333334e-07, |
| "loss": 3.0143, |
| "loss_": 1.9639, |
| "moe_loss": 0.1727, |
| "moe_loss_longrong": 1.5072, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 1.1666666666666668e-06, |
| "loss": 3.1915, |
| "loss_": 1.6485, |
| "moe_loss": 0.1717, |
| "moe_loss_longrong": 1.5034, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 1.75e-06, |
| "loss": 3.1397, |
| "loss_": 1.5496, |
| "moe_loss": 0.171, |
| "moe_loss_longrong": 1.4995, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 2.3333333333333336e-06, |
| "loss": 3.0585, |
| "loss_": 2.0086, |
| "moe_loss": 0.1701, |
| "moe_loss_longrong": 1.4995, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 2.916666666666667e-06, |
| "loss": 2.9633, |
| "loss_": 1.0198, |
| "moe_loss": 0.1676, |
| "moe_loss_longrong": 1.5194, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.5e-06, |
| "loss": 3.0075, |
| "loss_": 1.4577, |
| "moe_loss": 0.1675, |
| "moe_loss_longrong": 1.4922, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.083333333333334e-06, |
| "loss": 2.9169, |
| "loss_": 1.446, |
| "moe_loss": 0.1664, |
| "moe_loss_longrong": 1.4899, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.666666666666667e-06, |
| "loss": 2.9903, |
| "loss_": 1.3955, |
| "moe_loss": 0.1647, |
| "moe_loss_longrong": 1.483, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 5.2500000000000006e-06, |
| "loss": 2.9445, |
| "loss_": 1.7934, |
| "moe_loss": 0.1646, |
| "moe_loss_longrong": 1.4828, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 5.833333333333334e-06, |
| "loss": 2.9122, |
| "loss_": 1.4323, |
| "moe_loss": 0.1631, |
| "moe_loss_longrong": 1.4793, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 6.416666666666667e-06, |
| "loss": 2.87, |
| "loss_": 0.8099, |
| "moe_loss": 0.1652, |
| "moe_loss_longrong": 1.5108, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7e-06, |
| "loss": 2.931, |
| "loss_": 1.4784, |
| "moe_loss": 0.1621, |
| "moe_loss_longrong": 1.473, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 7.583333333333333e-06, |
| "loss": 2.7996, |
| "loss_": 1.0334, |
| "moe_loss": 0.1645, |
| "moe_loss_longrong": 1.5067, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 8.166666666666668e-06, |
| "loss": 2.8791, |
| "loss_": 1.4959, |
| "moe_loss": 0.1617, |
| "moe_loss_longrong": 1.4725, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 8.750000000000001e-06, |
| "loss": 2.8593, |
| "loss_": 1.3625, |
| "moe_loss": 0.1617, |
| "moe_loss_longrong": 1.4711, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 9.333333333333334e-06, |
| "loss": 2.8592, |
| "loss_": 1.0826, |
| "moe_loss": 0.1639, |
| "moe_loss_longrong": 1.5051, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 9.916666666666668e-06, |
| "loss": 2.8772, |
| "loss_": 1.2496, |
| "moe_loss": 0.1616, |
| "moe_loss_longrong": 1.4683, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.0500000000000001e-05, |
| "loss": 2.7839, |
| "loss_": 0.8619, |
| "moe_loss": 0.1636, |
| "moe_loss_longrong": 1.5017, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.1083333333333335e-05, |
| "loss": 2.845, |
| "loss_": 1.519, |
| "moe_loss": 0.1615, |
| "moe_loss_longrong": 1.4653, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.1666666666666668e-05, |
| "loss": 2.8779, |
| "loss_": 1.4328, |
| "moe_loss": 0.1615, |
| "moe_loss_longrong": 1.4632, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.2250000000000001e-05, |
| "loss": 2.8133, |
| "loss_": 1.7345, |
| "moe_loss": 0.1615, |
| "moe_loss_longrong": 1.4635, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.2833333333333335e-05, |
| "loss": 2.8421, |
| "loss_": 1.443, |
| "moe_loss": 0.1615, |
| "moe_loss_longrong": 1.4609, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.3416666666666666e-05, |
| "loss": 2.8433, |
| "loss_": 1.0833, |
| "moe_loss": 0.1613, |
| "moe_loss_longrong": 1.4601, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.4e-05, |
| "loss": 2.7887, |
| "loss_": 1.1754, |
| "moe_loss": 0.1611, |
| "moe_loss_longrong": 1.458, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.4583333333333333e-05, |
| "loss": 2.8346, |
| "loss_": 1.4786, |
| "moe_loss": 0.1615, |
| "moe_loss_longrong": 1.461, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.5166666666666667e-05, |
| "loss": 2.8158, |
| "loss_": 1.1078, |
| "moe_loss": 0.161, |
| "moe_loss_longrong": 1.4563, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.575e-05, |
| "loss": 2.8165, |
| "loss_": 1.5185, |
| "moe_loss": 0.1612, |
| "moe_loss_longrong": 1.4575, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.6333333333333335e-05, |
| "loss": 2.7353, |
| "loss_": 1.5306, |
| "moe_loss": 0.1612, |
| "moe_loss_longrong": 1.4575, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.6916666666666667e-05, |
| "loss": 2.8177, |
| "loss_": 1.6701, |
| "moe_loss": 0.1613, |
| "moe_loss_longrong": 1.4552, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.7500000000000002e-05, |
| "loss": 2.8141, |
| "loss_": 1.147, |
| "moe_loss": 0.1611, |
| "moe_loss_longrong": 1.454, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.8083333333333334e-05, |
| "loss": 2.7925, |
| "loss_": 1.1863, |
| "moe_loss": 0.1612, |
| "moe_loss_longrong": 1.454, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.866666666666667e-05, |
| "loss": 2.7435, |
| "loss_": 1.2765, |
| "moe_loss": 0.1611, |
| "moe_loss_longrong": 1.4537, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.925e-05, |
| "loss": 2.7391, |
| "loss_": 1.1006, |
| "moe_loss": 0.1612, |
| "moe_loss_longrong": 1.4517, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.9833333333333335e-05, |
| "loss": 2.7548, |
| "loss_": 1.1628, |
| "moe_loss": 0.1612, |
| "moe_loss_longrong": 1.4493, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.999997939064427e-05, |
| "loss": 2.8216, |
| "loss_": 1.3669, |
| "moe_loss": 0.1613, |
| "moe_loss_longrong": 1.4484, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.9999881290305082e-05, |
| "loss": 2.7406, |
| "loss_": 0.9434, |
| "moe_loss": 0.1621, |
| "moe_loss_longrong": 1.4837, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.9999702402277115e-05, |
| "loss": 2.7835, |
| "loss_": 1.6082, |
| "moe_loss": 0.1614, |
| "moe_loss_longrong": 1.4501, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.9999442728005572e-05, |
| "loss": 2.783, |
| "loss_": 1.3989, |
| "moe_loss": 0.161, |
| "moe_loss_longrong": 1.4474, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.999910226958833e-05, |
| "loss": 2.7628, |
| "loss_": 1.2652, |
| "moe_loss": 0.161, |
| "moe_loss_longrong": 1.4481, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9998681029775905e-05, |
| "loss": 2.7709, |
| "loss_": 1.4145, |
| "moe_loss": 0.1609, |
| "moe_loss_longrong": 1.4454, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.999817901197144e-05, |
| "loss": 2.7808, |
| "loss_": 1.167, |
| "moe_loss": 0.1612, |
| "moe_loss_longrong": 1.4457, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9997596220230666e-05, |
| "loss": 2.7761, |
| "loss_": 1.1866, |
| "moe_loss": 0.161, |
| "moe_loss_longrong": 1.4467, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.999693265926188e-05, |
| "loss": 2.7605, |
| "loss_": 1.2065, |
| "moe_loss": 0.1608, |
| "moe_loss_longrong": 1.4448, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.99961883344259e-05, |
| "loss": 2.7849, |
| "loss_": 1.2751, |
| "moe_loss": 0.161, |
| "moe_loss_longrong": 1.4447, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9995363251736027e-05, |
| "loss": 2.7919, |
| "loss_": 1.1653, |
| "moe_loss": 0.1608, |
| "moe_loss_longrong": 1.4426, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9994457417857998e-05, |
| "loss": 2.7698, |
| "loss_": 1.4965, |
| "moe_loss": 0.161, |
| "moe_loss_longrong": 1.4423, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.999347084010991e-05, |
| "loss": 2.7421, |
| "loss_": 0.8783, |
| "moe_loss": 0.1608, |
| "moe_loss_longrong": 1.4427, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.99924035264622e-05, |
| "loss": 2.7618, |
| "loss_": 1.2226, |
| "moe_loss": 0.161, |
| "moe_loss_longrong": 1.4429, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9991255485537547e-05, |
| "loss": 2.76, |
| "loss_": 1.2206, |
| "moe_loss": 0.1609, |
| "moe_loss_longrong": 1.4425, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.999002672661082e-05, |
| "loss": 2.7533, |
| "loss_": 1.3051, |
| "moe_loss": 0.1609, |
| "moe_loss_longrong": 1.4398, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.9988717259609e-05, |
| "loss": 2.7161, |
| "loss_": 0.8665, |
| "moe_loss": 0.1615, |
| "moe_loss_longrong": 1.4731, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.9987327095111085e-05, |
| "loss": 2.7577, |
| "loss_": 1.1175, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.4404, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.9985856244348034e-05, |
| "loss": 2.7281, |
| "loss_": 1.2506, |
| "moe_loss": 0.1607, |
| "moe_loss_longrong": 1.4399, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.9984304719202647e-05, |
| "loss": 2.7585, |
| "loss_": 1.6201, |
| "moe_loss": 0.161, |
| "moe_loss_longrong": 1.4402, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.9982672532209487e-05, |
| "loss": 2.7048, |
| "loss_": 1.3787, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.44, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.998095969655477e-05, |
| "loss": 2.7554, |
| "loss_": 1.5028, |
| "moe_loss": 0.1607, |
| "moe_loss_longrong": 1.4382, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.997916622607627e-05, |
| "loss": 2.7604, |
| "loss_": 1.5144, |
| "moe_loss": 0.1608, |
| "moe_loss_longrong": 1.4369, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.9977292135263187e-05, |
| "loss": 2.6773, |
| "loss_": 1.2568, |
| "moe_loss": 0.1608, |
| "moe_loss_longrong": 1.4391, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.9975337439256046e-05, |
| "loss": 2.7524, |
| "loss_": 1.3851, |
| "moe_loss": 0.1608, |
| "moe_loss_longrong": 1.4373, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.9973302153846577e-05, |
| "loss": 2.7138, |
| "loss_": 0.945, |
| "moe_loss": 0.1607, |
| "moe_loss_longrong": 1.4365, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.9971186295477575e-05, |
| "loss": 2.723, |
| "loss_": 1.2851, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.4373, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.9968989881242766e-05, |
| "loss": 2.7099, |
| "loss_": 1.4266, |
| "moe_loss": 0.1607, |
| "moe_loss_longrong": 1.4357, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9966712928886697e-05, |
| "loss": 2.7253, |
| "loss_": 1.2214, |
| "moe_loss": 0.1608, |
| "moe_loss_longrong": 1.436, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.996435545680454e-05, |
| "loss": 2.7258, |
| "loss_": 1.2096, |
| "moe_loss": 0.1608, |
| "moe_loss_longrong": 1.4344, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9961917484042012e-05, |
| "loss": 2.6884, |
| "loss_": 1.2, |
| "moe_loss": 0.1607, |
| "moe_loss_longrong": 1.4344, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9959399030295158e-05, |
| "loss": 2.685, |
| "loss_": 0.9126, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4363, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9956800115910216e-05, |
| "loss": 2.7146, |
| "loss_": 1.0302, |
| "moe_loss": 0.1607, |
| "moe_loss_longrong": 1.4344, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.995412076188348e-05, |
| "loss": 2.665, |
| "loss_": 1.4803, |
| "moe_loss": 0.1607, |
| "moe_loss_longrong": 1.4334, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9951360989861077e-05, |
| "loss": 2.7331, |
| "loss_": 1.1431, |
| "moe_loss": 0.1609, |
| "moe_loss_longrong": 1.4341, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9948520822138837e-05, |
| "loss": 2.7357, |
| "loss_": 1.2042, |
| "moe_loss": 0.1607, |
| "moe_loss_longrong": 1.4322, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9945600281662088e-05, |
| "loss": 2.7075, |
| "loss_": 0.97, |
| "moe_loss": 0.1614, |
| "moe_loss_longrong": 1.4623, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9942599392025488e-05, |
| "loss": 2.7172, |
| "loss_": 1.2653, |
| "moe_loss": 0.1608, |
| "moe_loss_longrong": 1.4338, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9939518177472813e-05, |
| "loss": 2.718, |
| "loss_": 1.1215, |
| "moe_loss": 0.1608, |
| "moe_loss_longrong": 1.4347, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.9936356662896777e-05, |
| "loss": 2.7166, |
| "loss_": 0.9601, |
| "moe_loss": 0.1607, |
| "moe_loss_longrong": 1.432, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9933114873838832e-05, |
| "loss": 2.711, |
| "loss_": 1.2031, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.4313, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9929792836488954e-05, |
| "loss": 2.7297, |
| "loss_": 1.0668, |
| "moe_loss": 0.1607, |
| "moe_loss_longrong": 1.4315, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9926390577685434e-05, |
| "loss": 2.7135, |
| "loss_": 0.8892, |
| "moe_loss": 0.1616, |
| "moe_loss_longrong": 1.461, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.992290812491466e-05, |
| "loss": 2.676, |
| "loss_": 1.2917, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4313, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9919345506310896e-05, |
| "loss": 2.6813, |
| "loss_": 1.4059, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4321, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9915702750656053e-05, |
| "loss": 2.7125, |
| "loss_": 1.085, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4315, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.991197988737947e-05, |
| "loss": 2.7119, |
| "loss_": 1.1287, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4303, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9908176946557646e-05, |
| "loss": 2.6879, |
| "loss_": 1.1955, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.4301, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9904293958914032e-05, |
| "loss": 2.7081, |
| "loss_": 1.3866, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.4304, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.990033095581876e-05, |
| "loss": 2.7152, |
| "loss_": 1.2814, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4308, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.9896287969288396e-05, |
| "loss": 2.6944, |
| "loss_": 1.2075, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.4292, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.989216503198568e-05, |
| "loss": 2.6422, |
| "loss_": 0.9899, |
| "moe_loss": 0.1617, |
| "moe_loss_longrong": 1.458, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.988796217721926e-05, |
| "loss": 2.7321, |
| "loss_": 1.437, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.4304, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9883679438943444e-05, |
| "loss": 2.6757, |
| "loss_": 1.3196, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4289, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9879316851757885e-05, |
| "loss": 2.688, |
| "loss_": 1.514, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.429, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9874874450907338e-05, |
| "loss": 2.7082, |
| "loss_": 1.1938, |
| "moe_loss": 0.1607, |
| "moe_loss_longrong": 1.4291, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.987035227228136e-05, |
| "loss": 2.7276, |
| "loss_": 1.3401, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.428, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9865750352414016e-05, |
| "loss": 2.685, |
| "loss_": 1.2597, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.4279, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9861068728483603e-05, |
| "loss": 2.7331, |
| "loss_": 1.3278, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.4274, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.985630743831232e-05, |
| "loss": 2.7276, |
| "loss_": 1.542, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4281, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.985146652036599e-05, |
| "loss": 2.6914, |
| "loss_": 1.4054, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.427, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.984654601375373e-05, |
| "loss": 2.6531, |
| "loss_": 1.5136, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4262, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9841545958227654e-05, |
| "loss": 2.7346, |
| "loss_": 1.1452, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.427, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.983646639418253e-05, |
| "loss": 2.707, |
| "loss_": 1.3454, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4264, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9831307362655473e-05, |
| "loss": 2.6949, |
| "loss_": 1.316, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.427, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9826068905325598e-05, |
| "loss": 2.6725, |
| "loss_": 0.8014, |
| "moe_loss": 0.1612, |
| "moe_loss_longrong": 1.4544, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9820751064513693e-05, |
| "loss": 2.7006, |
| "loss_": 1.3368, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.4252, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.981535388318188e-05, |
| "loss": 2.6809, |
| "loss_": 1.161, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.425, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.980987740493325e-05, |
| "loss": 2.6964, |
| "loss_": 1.1942, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4257, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9804321674011533e-05, |
| "loss": 2.6673, |
| "loss_": 1.1932, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.4267, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.979868673530073e-05, |
| "loss": 2.6938, |
| "loss_": 1.2555, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4248, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9792972634324744e-05, |
| "loss": 2.7032, |
| "loss_": 1.1953, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4241, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9787179417247032e-05, |
| "loss": 2.6754, |
| "loss_": 1.1357, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4249, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.9781307130870204e-05, |
| "loss": 2.6969, |
| "loss_": 1.3238, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4245, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9775355822635675e-05, |
| "loss": 2.6831, |
| "loss_": 1.2612, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4237, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.976932554062325e-05, |
| "loss": 2.6701, |
| "loss_": 1.1135, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4243, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9763216333550768e-05, |
| "loss": 2.7003, |
| "loss_": 1.2469, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4236, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9757028250773686e-05, |
| "loss": 2.6854, |
| "loss_": 1.3538, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4234, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.975076134228469e-05, |
| "loss": 2.6874, |
| "loss_": 1.226, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4239, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9744415658713282e-05, |
| "loss": 2.7152, |
| "loss_": 1.2979, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4231, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9737991251325384e-05, |
| "loss": 2.6908, |
| "loss_": 1.0737, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4238, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9731488172022915e-05, |
| "loss": 2.7375, |
| "loss_": 1.416, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4219, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.972490647334337e-05, |
| "loss": 2.6467, |
| "loss_": 1.0544, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.4236, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.971824620845941e-05, |
| "loss": 2.6613, |
| "loss_": 1.1665, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4229, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.9711507431178403e-05, |
| "loss": 2.654, |
| "loss_": 1.2871, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.4239, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9704690195942035e-05, |
| "loss": 2.6831, |
| "loss_": 1.4114, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.4235, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9697794557825812e-05, |
| "loss": 2.7215, |
| "loss_": 1.367, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4221, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.969082057253867e-05, |
| "loss": 2.6998, |
| "loss_": 1.0197, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.422, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9683768296422495e-05, |
| "loss": 2.6869, |
| "loss_": 1.2449, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4224, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9676637786451665e-05, |
| "loss": 2.7047, |
| "loss_": 1.2273, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.422, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.966942910023261e-05, |
| "loss": 2.6873, |
| "loss_": 0.9599, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4206, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9662142296003335e-05, |
| "loss": 2.6721, |
| "loss_": 1.2456, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4221, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.965477743263294e-05, |
| "loss": 2.6481, |
| "loss_": 1.4271, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4222, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.964733456962116e-05, |
| "loss": 2.6621, |
| "loss_": 1.1236, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.422, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9639813767097886e-05, |
| "loss": 2.66, |
| "loss_": 1.2049, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4193, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9632215085822658e-05, |
| "loss": 2.7064, |
| "loss_": 1.2497, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4214, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.9624538587184197e-05, |
| "loss": 2.6533, |
| "loss_": 1.22, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4211, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9616784333199896e-05, |
| "loss": 2.644, |
| "loss_": 1.1443, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4217, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9608952386515327e-05, |
| "loss": 2.6987, |
| "loss_": 1.1736, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.4197, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9601042810403725e-05, |
| "loss": 2.6732, |
| "loss_": 1.1886, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4196, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.959305566876549e-05, |
| "loss": 2.6806, |
| "loss_": 1.0944, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4205, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9584991026127655e-05, |
| "loss": 2.6919, |
| "loss_": 1.4477, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4204, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.957684894764338e-05, |
| "loss": 2.6751, |
| "loss_": 1.1916, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4204, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9568629499091413e-05, |
| "loss": 2.6459, |
| "loss_": 1.2407, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4207, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9560332746875574e-05, |
| "loss": 2.6572, |
| "loss_": 0.8698, |
| "moe_loss": 0.1613, |
| "moe_loss_longrong": 1.4436, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9551958758024194e-05, |
| "loss": 2.6679, |
| "loss_": 1.3397, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4202, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9543507600189606e-05, |
| "loss": 2.6673, |
| "loss_": 1.164, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4188, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9534979341647562e-05, |
| "loss": 2.6295, |
| "loss_": 1.3512, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4192, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9526374051296714e-05, |
| "loss": 2.645, |
| "loss_": 1.1948, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4185, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9517691798658042e-05, |
| "loss": 2.7004, |
| "loss_": 1.19, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4188, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9508932653874283e-05, |
| "loss": 2.6404, |
| "loss_": 1.4758, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.418, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9500096687709393e-05, |
| "loss": 2.6529, |
| "loss_": 1.1355, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4179, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9491183971547943e-05, |
| "loss": 2.6448, |
| "loss_": 1.3669, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4182, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.948219457739456e-05, |
| "loss": 2.674, |
| "loss_": 1.5143, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4184, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9473128577873346e-05, |
| "loss": 2.6813, |
| "loss_": 1.3613, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4184, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9463986046227284e-05, |
| "loss": 2.6566, |
| "loss_": 1.2685, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4179, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9454767056317654e-05, |
| "loss": 2.6556, |
| "loss_": 1.1164, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4171, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9445471682623425e-05, |
| "loss": 2.6723, |
| "loss_": 1.3762, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4178, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 1.9436100000240668e-05, |
| "loss": 2.6654, |
| "loss_": 1.3065, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4181, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9426652084881934e-05, |
| "loss": 2.6471, |
| "loss_": 0.7216, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4175, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9417128012875657e-05, |
| "loss": 2.6433, |
| "loss_": 1.311, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4172, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9407527861165523e-05, |
| "loss": 2.6788, |
| "loss_": 1.4472, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4174, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9397851707309864e-05, |
| "loss": 2.6715, |
| "loss_": 1.2477, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4167, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9388099629481017e-05, |
| "loss": 2.6497, |
| "loss_": 1.2279, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4171, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.93782717064647e-05, |
| "loss": 2.6772, |
| "loss_": 1.0676, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4161, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9368368017659368e-05, |
| "loss": 2.6543, |
| "loss_": 1.3057, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4164, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9358388643075597e-05, |
| "loss": 2.6439, |
| "loss_": 1.2984, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4155, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9348333663335393e-05, |
| "loss": 2.6489, |
| "loss_": 1.1934, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.415, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9338203159671584e-05, |
| "loss": 2.6834, |
| "loss_": 1.2899, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4166, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.9327997213927136e-05, |
| "loss": 2.6676, |
| "loss_": 1.3016, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4161, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 1.931771590855451e-05, |
| "loss": 2.6612, |
| "loss_": 1.0128, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4157, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9307359326614975e-05, |
| "loss": 2.6457, |
| "loss_": 1.1214, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4161, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.929692755177796e-05, |
| "loss": 2.6583, |
| "loss_": 1.2741, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4164, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9286420668320356e-05, |
| "loss": 2.6487, |
| "loss_": 0.9804, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.416, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9275838761125866e-05, |
| "loss": 2.6338, |
| "loss_": 1.1369, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4158, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.926518191568428e-05, |
| "loss": 2.6547, |
| "loss_": 1.2162, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4163, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9254450218090814e-05, |
| "loss": 2.6478, |
| "loss_": 1.1011, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.415, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.92436437550454e-05, |
| "loss": 2.6527, |
| "loss_": 1.2542, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4151, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9232762613851993e-05, |
| "loss": 2.6584, |
| "loss_": 1.269, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.414, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.922180688241786e-05, |
| "loss": 2.6481, |
| "loss_": 1.1536, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4155, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9210776649252875e-05, |
| "loss": 2.6695, |
| "loss_": 1.318, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4146, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 1.9199672003468795e-05, |
| "loss": 2.6144, |
| "loss_": 1.1917, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4145, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.918849303477856e-05, |
| "loss": 2.6512, |
| "loss_": 1.3762, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4143, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9177239833495545e-05, |
| "loss": 2.6538, |
| "loss_": 1.3326, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4144, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9165912490532838e-05, |
| "loss": 2.6337, |
| "loss_": 1.3393, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4141, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9154511097402512e-05, |
| "loss": 2.6493, |
| "loss_": 1.3026, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4143, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9143035746214883e-05, |
| "loss": 2.6833, |
| "loss_": 1.2821, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4147, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9131486529677755e-05, |
| "loss": 2.6348, |
| "loss_": 1.2194, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4144, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9119863541095697e-05, |
| "loss": 2.622, |
| "loss_": 1.4341, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4139, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9108166874369253e-05, |
| "loss": 2.6579, |
| "loss_": 1.1947, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4141, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9096396623994215e-05, |
| "loss": 2.6413, |
| "loss_": 1.3734, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4131, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9084552885060846e-05, |
| "loss": 2.6371, |
| "loss_": 1.2291, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.413, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9072635753253112e-05, |
| "loss": 2.6483, |
| "loss_": 1.1361, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4127, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 1.9060645324847904e-05, |
| "loss": 2.6325, |
| "loss_": 1.3775, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4128, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.9048581696714276e-05, |
| "loss": 2.6272, |
| "loss_": 1.2366, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4132, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.9036444966312652e-05, |
| "loss": 2.6566, |
| "loss_": 1.2485, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4132, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.9024235231694024e-05, |
| "loss": 2.6189, |
| "loss_": 1.3857, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4133, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.90119525914992e-05, |
| "loss": 2.6107, |
| "loss_": 1.0882, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4129, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.899959714495796e-05, |
| "loss": 2.6564, |
| "loss_": 1.0952, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4127, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.8987168991888293e-05, |
| "loss": 2.648, |
| "loss_": 0.9829, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4129, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.8974668232695562e-05, |
| "loss": 2.6334, |
| "loss_": 1.1611, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4121, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.896209496837171e-05, |
| "loss": 2.6435, |
| "loss_": 1.351, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4122, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.8949449300494444e-05, |
| "loss": 2.6572, |
| "loss_": 1.2158, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4125, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.8936731331226402e-05, |
| "loss": 2.6249, |
| "loss_": 1.2495, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4127, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 1.892394116331434e-05, |
| "loss": 2.6299, |
| "loss_": 1.0987, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4117, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8911078900088295e-05, |
| "loss": 2.6377, |
| "loss_": 1.179, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4116, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8898144645460744e-05, |
| "loss": 2.6133, |
| "loss_": 1.1341, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.412, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8885138503925793e-05, |
| "loss": 2.6514, |
| "loss_": 1.1486, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4115, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8872060580558295e-05, |
| "loss": 2.6529, |
| "loss_": 1.2706, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4111, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8858910981013025e-05, |
| "loss": 2.6298, |
| "loss_": 1.2814, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4114, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.884568981152382e-05, |
| "loss": 2.6107, |
| "loss_": 0.8031, |
| "moe_loss": 0.161, |
| "moe_loss_longrong": 1.433, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.883239717890272e-05, |
| "loss": 2.6321, |
| "loss_": 1.3342, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4109, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.881903319053911e-05, |
| "loss": 2.6271, |
| "loss_": 1.224, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.412, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.880559795439884e-05, |
| "loss": 2.6168, |
| "loss_": 1.0488, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4113, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8792091579023365e-05, |
| "loss": 2.6358, |
| "loss_": 1.0113, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.411, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 1.8778514173528873e-05, |
| "loss": 2.6396, |
| "loss_": 1.1213, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4112, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8764865847605384e-05, |
| "loss": 2.6268, |
| "loss_": 1.0843, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4125, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.875114671151587e-05, |
| "loss": 2.6604, |
| "loss_": 1.1708, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4112, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8737356876095387e-05, |
| "loss": 2.6187, |
| "loss_": 1.0976, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4107, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8723496452750146e-05, |
| "loss": 2.6198, |
| "loss_": 1.1692, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4107, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8709565553456632e-05, |
| "loss": 2.621, |
| "loss_": 1.3206, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4102, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.86955642907607e-05, |
| "loss": 2.6184, |
| "loss_": 1.1808, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4106, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8681492777776656e-05, |
| "loss": 2.6577, |
| "loss_": 1.1146, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4105, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8667351128186347e-05, |
| "loss": 2.6417, |
| "loss_": 1.3074, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4109, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8653139456238257e-05, |
| "loss": 2.6165, |
| "loss_": 1.0176, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4104, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8638857876746556e-05, |
| "loss": 2.6841, |
| "loss_": 1.3367, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4101, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8624506505090192e-05, |
| "loss": 2.6275, |
| "loss_": 1.1427, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4107, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 1.8610085457211958e-05, |
| "loss": 2.6526, |
| "loss_": 1.3939, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4104, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8595594849617552e-05, |
| "loss": 2.6202, |
| "loss_": 1.1005, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4099, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8581034799374632e-05, |
| "loss": 2.6634, |
| "loss_": 1.2608, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4098, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8566405424111873e-05, |
| "loss": 2.6483, |
| "loss_": 1.2839, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4097, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.855170684201802e-05, |
| "loss": 2.6077, |
| "loss_": 1.3359, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.41, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8536939171840934e-05, |
| "loss": 2.6574, |
| "loss_": 1.3449, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4099, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8522102532886627e-05, |
| "loss": 2.6374, |
| "loss_": 1.0245, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4102, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8507197045018286e-05, |
| "loss": 2.6555, |
| "loss_": 1.2334, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4094, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8492222828655347e-05, |
| "loss": 2.6118, |
| "loss_": 1.294, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4088, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8477180004772473e-05, |
| "loss": 2.6092, |
| "loss_": 1.1013, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4087, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8462068694898603e-05, |
| "loss": 2.6415, |
| "loss_": 1.1863, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4088, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 1.8446889021115967e-05, |
| "loss": 2.6141, |
| "loss_": 1.2587, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4091, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.84316411060591e-05, |
| "loss": 2.6031, |
| "loss_": 0.798, |
| "moe_loss": 0.1608, |
| "moe_loss_longrong": 1.4271, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.841632507291384e-05, |
| "loss": 2.6305, |
| "loss_": 0.9585, |
| "moe_loss": 0.161, |
| "moe_loss_longrong": 1.427, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8400941045416352e-05, |
| "loss": 2.5888, |
| "loss_": 1.2668, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4088, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8385489147852117e-05, |
| "loss": 2.6253, |
| "loss_": 1.0907, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4084, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8369969505054915e-05, |
| "loss": 2.6541, |
| "loss_": 1.1209, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4083, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8354382242405853e-05, |
| "loss": 2.6553, |
| "loss_": 1.1877, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4086, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8338727485832317e-05, |
| "loss": 2.6105, |
| "loss_": 1.0542, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4087, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.832300536180696e-05, |
| "loss": 2.6209, |
| "loss_": 1.3141, |
| "moe_loss": 0.1608, |
| "moe_loss_longrong": 1.4275, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8307215997346703e-05, |
| "loss": 2.6477, |
| "loss_": 1.3156, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4087, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8291359520011687e-05, |
| "loss": 2.6633, |
| "loss_": 1.2031, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4075, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.8275436057904246e-05, |
| "loss": 2.6259, |
| "loss_": 1.1971, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4082, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 1.825944573966788e-05, |
| "loss": 2.6185, |
| "loss_": 1.098, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4073, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.82433886944862e-05, |
| "loss": 2.614, |
| "loss_": 1.0326, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4079, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.8227265052081913e-05, |
| "loss": 2.6257, |
| "loss_": 1.3002, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4078, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.821107494271574e-05, |
| "loss": 2.6101, |
| "loss_": 1.3726, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4089, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.8194818497185385e-05, |
| "loss": 2.6377, |
| "loss_": 1.2734, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4081, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.8178495846824474e-05, |
| "loss": 2.6187, |
| "loss_": 1.062, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4082, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.81621071235015e-05, |
| "loss": 2.6136, |
| "loss_": 1.3893, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4079, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.814565245961873e-05, |
| "loss": 2.6332, |
| "loss_": 1.3444, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4075, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.8129131988111174e-05, |
| "loss": 2.6251, |
| "loss_": 1.0967, |
| "moe_loss": 0.161, |
| "moe_loss_longrong": 1.4258, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.8112545842445488e-05, |
| "loss": 2.6364, |
| "loss_": 1.1797, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4066, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.80958941566189e-05, |
| "loss": 2.618, |
| "loss_": 1.2443, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4075, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 1.807917706515813e-05, |
| "loss": 2.5878, |
| "loss_": 1.4678, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4082, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.8062394703118294e-05, |
| "loss": 2.6224, |
| "loss_": 1.1059, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4077, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.804554720608183e-05, |
| "loss": 2.6185, |
| "loss_": 1.2769, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4077, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.8028634710157392e-05, |
| "loss": 2.5904, |
| "loss_": 1.1422, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4071, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.801165735197874e-05, |
| "loss": 2.5663, |
| "loss_": 0.9287, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4073, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.7994615268703655e-05, |
| "loss": 2.6135, |
| "loss_": 1.2268, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4064, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.7977508598012834e-05, |
| "loss": 2.5989, |
| "loss_": 1.1974, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4069, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.7960337478108743e-05, |
| "loss": 2.5877, |
| "loss_": 1.13, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4064, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.7943102047714548e-05, |
| "loss": 2.5955, |
| "loss_": 0.9327, |
| "moe_loss": 0.1609, |
| "moe_loss_longrong": 1.4235, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.7925802446072957e-05, |
| "loss": 2.596, |
| "loss_": 1.2759, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4068, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.7908438812945106e-05, |
| "loss": 2.6038, |
| "loss_": 1.1389, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4064, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 1.7891011288609454e-05, |
| "loss": 2.585, |
| "loss_": 1.3728, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4061, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7873520013860595e-05, |
| "loss": 2.6263, |
| "loss_": 1.3243, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4064, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7855965130008188e-05, |
| "loss": 2.6254, |
| "loss_": 1.2195, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4066, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.783834677887576e-05, |
| "loss": 2.6312, |
| "loss_": 0.8851, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4064, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.782066510279959e-05, |
| "loss": 2.6333, |
| "loss_": 1.3439, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4063, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7802920244627543e-05, |
| "loss": 2.6112, |
| "loss_": 1.1944, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4063, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.778511234771793e-05, |
| "loss": 2.614, |
| "loss_": 1.3766, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4055, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.776724155593835e-05, |
| "loss": 2.5922, |
| "loss_": 1.1792, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4058, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7749308013664503e-05, |
| "loss": 2.6604, |
| "loss_": 1.1936, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4058, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7731311865779058e-05, |
| "loss": 2.6211, |
| "loss_": 1.235, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4057, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.771325325767046e-05, |
| "loss": 2.6152, |
| "loss_": 1.1696, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4055, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7695132335231758e-05, |
| "loss": 2.6476, |
| "loss_": 1.2283, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4054, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 1.7676949244859435e-05, |
| "loss": 2.6351, |
| "loss_": 1.0637, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4051, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7658704133452228e-05, |
| "loss": 2.6196, |
| "loss_": 1.3258, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4058, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.764039714840991e-05, |
| "loss": 2.5882, |
| "loss_": 0.9733, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4048, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7622028437632154e-05, |
| "loss": 2.6128, |
| "loss_": 1.2358, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4052, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7603598149517277e-05, |
| "loss": 2.6192, |
| "loss_": 0.9268, |
| "moe_loss": 0.1607, |
| "moe_loss_longrong": 1.4196, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7585106432961093e-05, |
| "loss": 2.593, |
| "loss_": 1.0061, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4049, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7566553437355674e-05, |
| "loss": 2.6141, |
| "loss_": 1.083, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4046, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.754793931258817e-05, |
| "loss": 2.6423, |
| "loss_": 1.0028, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4047, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7529264209039573e-05, |
| "loss": 2.5863, |
| "loss_": 1.4222, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4047, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.751052827758352e-05, |
| "loss": 2.6299, |
| "loss_": 0.9747, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4046, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.7491731669585066e-05, |
| "loss": 2.6117, |
| "loss_": 1.1316, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4043, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 1.747287453689947e-05, |
| "loss": 2.6125, |
| "loss_": 0.9258, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.405, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.745395703187095e-05, |
| "loss": 2.6568, |
| "loss_": 1.2147, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4046, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7434979307331482e-05, |
| "loss": 2.6449, |
| "loss_": 1.033, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4042, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7415941516599525e-05, |
| "loss": 2.6137, |
| "loss_": 1.2328, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4039, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7396843813478825e-05, |
| "loss": 2.6196, |
| "loss_": 1.1898, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.404, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7377686352257136e-05, |
| "loss": 2.6021, |
| "loss_": 1.2029, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4045, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7358469287705e-05, |
| "loss": 2.6354, |
| "loss_": 1.368, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4045, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7339192775074486e-05, |
| "loss": 2.619, |
| "loss_": 1.4305, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4048, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7319856970097927e-05, |
| "loss": 2.6185, |
| "loss_": 1.19, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4043, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.730046202898668e-05, |
| "loss": 2.589, |
| "loss_": 1.1368, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4042, |
| "step": 2093 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7281008108429854e-05, |
| "loss": 2.6104, |
| "loss_": 1.02, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4044, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.726149536559304e-05, |
| "loss": 2.6138, |
| "loss_": 1.2443, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.404, |
| "step": 2107 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 1.7241923958117047e-05, |
| "loss": 2.6079, |
| "loss_": 1.272, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4039, |
| "step": 2114 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7222294044116637e-05, |
| "loss": 2.6155, |
| "loss_": 1.2334, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4037, |
| "step": 2121 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7202605782179223e-05, |
| "loss": 2.6217, |
| "loss_": 1.1778, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4039, |
| "step": 2128 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.718285933136361e-05, |
| "loss": 2.6156, |
| "loss_": 1.0468, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4037, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7163054851198712e-05, |
| "loss": 2.6145, |
| "loss_": 1.2375, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4037, |
| "step": 2142 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7143192501682243e-05, |
| "loss": 2.6167, |
| "loss_": 1.325, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4036, |
| "step": 2149 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.712327244327944e-05, |
| "loss": 2.5924, |
| "loss_": 0.955, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4035, |
| "step": 2156 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7103294836921752e-05, |
| "loss": 2.6235, |
| "loss_": 1.0911, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4034, |
| "step": 2163 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.708325984400557e-05, |
| "loss": 2.6047, |
| "loss_": 0.8419, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4037, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7063167626390893e-05, |
| "loss": 2.6268, |
| "loss_": 1.1833, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4029, |
| "step": 2177 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7043018346400024e-05, |
| "loss": 2.622, |
| "loss_": 1.0641, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4028, |
| "step": 2184 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 1.7022812166816277e-05, |
| "loss": 2.6011, |
| "loss_": 0.9805, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4028, |
| "step": 2191 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.7002549250882637e-05, |
| "loss": 2.5584, |
| "loss_": 1.1622, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4025, |
| "step": 2198 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.698222976230047e-05, |
| "loss": 2.607, |
| "loss_": 1.2586, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4026, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.6961853865228176e-05, |
| "loss": 2.6328, |
| "loss_": 1.3466, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4029, |
| "step": 2212 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.6941421724279866e-05, |
| "loss": 2.568, |
| "loss_": 1.2851, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4028, |
| "step": 2219 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.6920933504524048e-05, |
| "loss": 2.5682, |
| "loss_": 1.033, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4028, |
| "step": 2226 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.6900389371482286e-05, |
| "loss": 2.5863, |
| "loss_": 1.1035, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4031, |
| "step": 2233 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.6879789491127837e-05, |
| "loss": 2.5745, |
| "loss_": 0.9979, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4026, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.685913402988436e-05, |
| "loss": 2.5738, |
| "loss_": 1.056, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.403, |
| "step": 2247 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.6838423154624534e-05, |
| "loss": 2.5971, |
| "loss_": 0.9538, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4021, |
| "step": 2254 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.6817657032668715e-05, |
| "loss": 2.5999, |
| "loss_": 1.2746, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4024, |
| "step": 2261 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 1.6796835831783597e-05, |
| "loss": 2.5917, |
| "loss_": 1.1284, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.4021, |
| "step": 2268 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.6775959720180847e-05, |
| "loss": 2.5756, |
| "loss_": 1.0512, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.402, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.675502886651574e-05, |
| "loss": 2.5869, |
| "loss_": 1.1705, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4019, |
| "step": 2282 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.6734043439885826e-05, |
| "loss": 2.6105, |
| "loss_": 1.2021, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4019, |
| "step": 2289 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.6713003609829518e-05, |
| "loss": 2.6133, |
| "loss_": 1.2789, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.402, |
| "step": 2296 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.669190954632477e-05, |
| "loss": 2.6103, |
| "loss_": 1.0784, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4019, |
| "step": 2303 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.667076141978765e-05, |
| "loss": 2.5459, |
| "loss_": 1.27, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4021, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.664955940107103e-05, |
| "loss": 2.5936, |
| "loss_": 1.0663, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4026, |
| "step": 2317 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.662830366146315e-05, |
| "loss": 2.5879, |
| "loss_": 0.9998, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4019, |
| "step": 2324 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.6606994372686246e-05, |
| "loss": 2.6045, |
| "loss_": 1.2394, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.402, |
| "step": 2331 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.6585631706895186e-05, |
| "loss": 2.5902, |
| "loss_": 1.1972, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4015, |
| "step": 2338 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.6564215836676066e-05, |
| "loss": 2.5844, |
| "loss_": 0.948, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4019, |
| "step": 2345 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 1.6542746935044793e-05, |
| "loss": 2.5781, |
| "loss_": 1.4827, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4025, |
| "step": 2352 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.652122517544573e-05, |
| "loss": 2.5821, |
| "loss_": 1.0247, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4022, |
| "step": 2359 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6499650731750256e-05, |
| "loss": 2.6092, |
| "loss_": 0.8974, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4018, |
| "step": 2366 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.647802377825539e-05, |
| "loss": 2.5766, |
| "loss_": 1.0648, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4016, |
| "step": 2373 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.645634448968236e-05, |
| "loss": 2.603, |
| "loss_": 1.244, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4019, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.643461304117521e-05, |
| "loss": 2.6323, |
| "loss_": 1.3655, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.4015, |
| "step": 2387 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6412829608299373e-05, |
| "loss": 2.6053, |
| "loss_": 1.3408, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4014, |
| "step": 2394 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6390994367040257e-05, |
| "loss": 2.6053, |
| "loss_": 1.3031, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4016, |
| "step": 2401 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.636910749380183e-05, |
| "loss": 2.5956, |
| "loss_": 1.434, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4015, |
| "step": 2408 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.634716916540517e-05, |
| "loss": 2.6072, |
| "loss_": 1.3772, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4013, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.632517955908707e-05, |
| "loss": 2.6077, |
| "loss_": 1.2657, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4017, |
| "step": 2422 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 1.6303138852498594e-05, |
| "loss": 2.5694, |
| "loss_": 1.3289, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4014, |
| "step": 2429 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.6281047223703625e-05, |
| "loss": 2.5821, |
| "loss_": 1.1676, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4008, |
| "step": 2436 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.6258904851177434e-05, |
| "loss": 2.5965, |
| "loss_": 1.2449, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4013, |
| "step": 2443 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.6236711913805273e-05, |
| "loss": 2.6104, |
| "loss_": 1.1732, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4008, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.621446859088087e-05, |
| "loss": 2.5975, |
| "loss_": 1.2338, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4008, |
| "step": 2457 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.619217506210503e-05, |
| "loss": 2.6063, |
| "loss_": 1.28, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.401, |
| "step": 2464 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.6169831507584152e-05, |
| "loss": 2.5977, |
| "loss_": 1.3583, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4011, |
| "step": 2471 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.614743810782879e-05, |
| "loss": 2.6263, |
| "loss_": 1.4302, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.401, |
| "step": 2478 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.61249950437522e-05, |
| "loss": 2.6303, |
| "loss_": 1.0776, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.401, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.610250249666886e-05, |
| "loss": 2.5851, |
| "loss_": 1.0742, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4004, |
| "step": 2492 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.6079960648293016e-05, |
| "loss": 2.5652, |
| "loss_": 1.2411, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4006, |
| "step": 2499 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 1.605736968073721e-05, |
| "loss": 2.5674, |
| "loss_": 1.3629, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4002, |
| "step": 2506 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.6034729776510817e-05, |
| "loss": 2.5844, |
| "loss_": 1.2259, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4007, |
| "step": 2513 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.6012041118518558e-05, |
| "loss": 2.592, |
| "loss_": 1.3237, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4005, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.598930389005904e-05, |
| "loss": 2.5949, |
| "loss_": 1.1398, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4003, |
| "step": 2527 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.596651827482325e-05, |
| "loss": 2.5823, |
| "loss_": 0.984, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4, |
| "step": 2534 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.5943684456893103e-05, |
| "loss": 2.5586, |
| "loss_": 0.8138, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.412, |
| "step": 2541 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.5920802620739914e-05, |
| "loss": 2.6019, |
| "loss_": 0.9803, |
| "moe_loss": 0.1606, |
| "moe_loss_longrong": 1.4116, |
| "step": 2548 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.5897872951222946e-05, |
| "loss": 2.5744, |
| "loss_": 0.8654, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3999, |
| "step": 2555 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.5874895633587904e-05, |
| "loss": 2.5881, |
| "loss_": 1.1376, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4001, |
| "step": 2562 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.585187085346543e-05, |
| "loss": 2.6219, |
| "loss_": 1.1824, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4003, |
| "step": 2569 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.5828798796869607e-05, |
| "loss": 2.5878, |
| "loss_": 1.2474, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4005, |
| "step": 2576 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.5805679650196456e-05, |
| "loss": 2.5889, |
| "loss_": 1.3011, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4002, |
| "step": 2583 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 1.5782513600222443e-05, |
| "loss": 2.5666, |
| "loss_": 0.8722, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.4005, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.5759300834102952e-05, |
| "loss": 2.562, |
| "loss_": 1.0941, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3999, |
| "step": 2597 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.5736041539370783e-05, |
| "loss": 2.5698, |
| "loss_": 1.1632, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3999, |
| "step": 2604 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.5712735903934627e-05, |
| "loss": 2.6022, |
| "loss_": 1.1992, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3999, |
| "step": 2611 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.568938411607757e-05, |
| "loss": 2.5882, |
| "loss_": 1.148, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.4, |
| "step": 2618 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.566598636445554e-05, |
| "loss": 2.5838, |
| "loss_": 0.9669, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3997, |
| "step": 2625 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.5642542838095814e-05, |
| "loss": 2.5775, |
| "loss_": 1.1281, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3995, |
| "step": 2632 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.5619053726395468e-05, |
| "loss": 2.5868, |
| "loss_": 1.0479, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3995, |
| "step": 2639 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.5595519219119863e-05, |
| "loss": 2.6, |
| "loss_": 0.9972, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3993, |
| "step": 2646 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.5571939506401103e-05, |
| "loss": 2.6007, |
| "loss_": 1.2232, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3993, |
| "step": 2653 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.5548314778736487e-05, |
| "loss": 2.6087, |
| "loss_": 1.2657, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3996, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 1.552464522698701e-05, |
| "loss": 2.5675, |
| "loss_": 1.2458, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3992, |
| "step": 2667 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.550093104237577e-05, |
| "loss": 2.5844, |
| "loss_": 1.1855, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3993, |
| "step": 2674 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5477172416486464e-05, |
| "loss": 2.6192, |
| "loss_": 1.2552, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3993, |
| "step": 2681 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5453369541261814e-05, |
| "loss": 2.5796, |
| "loss_": 1.3244, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3996, |
| "step": 2688 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5429522609002034e-05, |
| "loss": 2.5859, |
| "loss_": 1.1373, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3992, |
| "step": 2695 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.540563181236326e-05, |
| "loss": 2.5702, |
| "loss_": 1.3094, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.399, |
| "step": 2702 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5381697344356014e-05, |
| "loss": 2.5893, |
| "loss_": 1.2666, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3991, |
| "step": 2709 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.535771939834362e-05, |
| "loss": 2.5504, |
| "loss_": 1.2537, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3992, |
| "step": 2716 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5333698168040664e-05, |
| "loss": 2.6094, |
| "loss_": 1.0996, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3992, |
| "step": 2723 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.530963384751142e-05, |
| "loss": 2.6049, |
| "loss_": 1.2289, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3992, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5285526631168273e-05, |
| "loss": 2.5766, |
| "loss_": 1.115, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3992, |
| "step": 2737 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5261376713770176e-05, |
| "loss": 2.5589, |
| "loss_": 1.3787, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3992, |
| "step": 2744 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 1.5237184290421035e-05, |
| "loss": 2.5508, |
| "loss_": 1.1691, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3993, |
| "step": 2751 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.521294955656817e-05, |
| "loss": 2.567, |
| "loss_": 1.0558, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3989, |
| "step": 2758 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5188672708000725e-05, |
| "loss": 2.636, |
| "loss_": 1.464, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3991, |
| "step": 2765 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5164353940848068e-05, |
| "loss": 2.5519, |
| "loss_": 0.957, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3986, |
| "step": 2772 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5139993451578236e-05, |
| "loss": 2.6053, |
| "loss_": 1.2139, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3991, |
| "step": 2779 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5115591436996327e-05, |
| "loss": 2.5661, |
| "loss_": 1.424, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3987, |
| "step": 2786 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5091148094242913e-05, |
| "loss": 2.5659, |
| "loss_": 0.9377, |
| "moe_loss": 0.1605, |
| "moe_loss_longrong": 1.4097, |
| "step": 2793 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5066663620792463e-05, |
| "loss": 2.5646, |
| "loss_": 1.0845, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3987, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5042138214451719e-05, |
| "loss": 2.5793, |
| "loss_": 1.1437, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3982, |
| "step": 2807 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.5017572073358127e-05, |
| "loss": 2.5658, |
| "loss_": 1.1455, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3984, |
| "step": 2814 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.4992965395978219e-05, |
| "loss": 2.5799, |
| "loss_": 0.9263, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3986, |
| "step": 2821 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 1.4968318381106013e-05, |
| "loss": 2.6166, |
| "loss_": 1.1558, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3986, |
| "step": 2828 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4943631227861412e-05, |
| "loss": 2.5847, |
| "loss_": 1.2277, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3985, |
| "step": 2835 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4918904135688586e-05, |
| "loss": 2.5822, |
| "loss_": 1.3077, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3986, |
| "step": 2842 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4894137304354367e-05, |
| "loss": 2.5709, |
| "loss_": 1.1239, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3986, |
| "step": 2849 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4869330933946641e-05, |
| "loss": 2.6017, |
| "loss_": 1.3636, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3984, |
| "step": 2856 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4844485224872721e-05, |
| "loss": 2.5933, |
| "loss_": 1.1977, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3983, |
| "step": 2863 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.481960037785773e-05, |
| "loss": 2.5739, |
| "loss_": 1.3326, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3982, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4794676593942979e-05, |
| "loss": 2.5793, |
| "loss_": 1.0945, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3983, |
| "step": 2877 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.476971407448435e-05, |
| "loss": 2.5561, |
| "loss_": 0.9802, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3987, |
| "step": 2884 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4744713021150665e-05, |
| "loss": 2.5553, |
| "loss_": 1.025, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3978, |
| "step": 2891 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4719673635922047e-05, |
| "loss": 2.5462, |
| "loss_": 1.1738, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3979, |
| "step": 2898 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 1.4694596121088309e-05, |
| "loss": 2.58, |
| "loss_": 1.1755, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3978, |
| "step": 2905 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4669480679247299e-05, |
| "loss": 2.5715, |
| "loss_": 1.37, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3982, |
| "step": 2912 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4644327513303281e-05, |
| "loss": 2.5696, |
| "loss_": 1.2128, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3979, |
| "step": 2919 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4619136826465277e-05, |
| "loss": 2.6001, |
| "loss_": 1.1853, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3983, |
| "step": 2926 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4593908822245437e-05, |
| "loss": 2.5781, |
| "loss_": 1.0309, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3977, |
| "step": 2933 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4568643704457404e-05, |
| "loss": 2.5805, |
| "loss_": 1.1558, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3981, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.454334167721464e-05, |
| "loss": 2.5546, |
| "loss_": 1.1177, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3976, |
| "step": 2947 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4518002944928807e-05, |
| "loss": 2.5872, |
| "loss_": 1.4162, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3979, |
| "step": 2954 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4492627712308094e-05, |
| "loss": 2.5779, |
| "loss_": 1.3387, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.398, |
| "step": 2961 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4467216184355577e-05, |
| "loss": 2.5994, |
| "loss_": 1.3317, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3977, |
| "step": 2968 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4441768566367554e-05, |
| "loss": 2.5828, |
| "loss_": 1.0536, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3975, |
| "step": 2975 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4416285063931887e-05, |
| "loss": 2.5719, |
| "loss_": 1.1378, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3975, |
| "step": 2982 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 1.4390765882926348e-05, |
| "loss": 2.5612, |
| "loss_": 1.2159, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3978, |
| "step": 2989 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4365211229516951e-05, |
| "loss": 2.5558, |
| "loss_": 1.1645, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3975, |
| "step": 2996 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.433962131015628e-05, |
| "loss": 2.5854, |
| "loss_": 1.1987, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3977, |
| "step": 3003 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4313996331581841e-05, |
| "loss": 2.5635, |
| "loss_": 1.3072, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3975, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4288336500814366e-05, |
| "loss": 2.5645, |
| "loss_": 1.0643, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3976, |
| "step": 3017 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.426264202515616e-05, |
| "loss": 2.563, |
| "loss_": 1.2845, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3975, |
| "step": 3024 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4236913112189417e-05, |
| "loss": 2.5718, |
| "loss_": 1.175, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3975, |
| "step": 3031 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4211149969774544e-05, |
| "loss": 2.533, |
| "loss_": 1.1995, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3975, |
| "step": 3038 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.418535280604849e-05, |
| "loss": 2.5548, |
| "loss_": 1.2251, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.397, |
| "step": 3045 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4159521829423049e-05, |
| "loss": 2.5767, |
| "loss_": 0.8661, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.397, |
| "step": 3052 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.4133657248583186e-05, |
| "loss": 2.584, |
| "loss_": 0.7004, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4067, |
| "step": 3059 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 1.410775927248536e-05, |
| "loss": 2.6066, |
| "loss_": 0.9504, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3974, |
| "step": 3066 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.4081828110355806e-05, |
| "loss": 2.5768, |
| "loss_": 1.0234, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.397, |
| "step": 3073 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.4055863971688886e-05, |
| "loss": 2.5702, |
| "loss_": 1.0861, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3972, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.4029867066245363e-05, |
| "loss": 2.5943, |
| "loss_": 1.0817, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3969, |
| "step": 3087 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.400383760405072e-05, |
| "loss": 2.5626, |
| "loss_": 0.9245, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.397, |
| "step": 3094 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.3977775795393467e-05, |
| "loss": 2.5936, |
| "loss_": 1.3773, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3972, |
| "step": 3101 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.3951681850823427e-05, |
| "loss": 2.5673, |
| "loss_": 0.8812, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3967, |
| "step": 3108 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.392555598115005e-05, |
| "loss": 2.556, |
| "loss_": 1.3045, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3968, |
| "step": 3115 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.3899398397440704e-05, |
| "loss": 2.5809, |
| "loss_": 1.0862, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3969, |
| "step": 3122 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.3873209311018974e-05, |
| "loss": 2.5601, |
| "loss_": 0.764, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4061, |
| "step": 3129 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.3846988933462944e-05, |
| "loss": 2.5884, |
| "loss_": 1.0127, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3964, |
| "step": 3136 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.3820737476603506e-05, |
| "loss": 2.5553, |
| "loss_": 1.1064, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3969, |
| "step": 3143 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 1.3794455152522619e-05, |
| "loss": 2.5814, |
| "loss_": 1.2526, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3969, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3768142173551638e-05, |
| "loss": 2.5803, |
| "loss_": 1.084, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3968, |
| "step": 3157 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3741798752269553e-05, |
| "loss": 2.5698, |
| "loss_": 1.1246, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3964, |
| "step": 3164 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3715425101501306e-05, |
| "loss": 2.5792, |
| "loss_": 1.421, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3965, |
| "step": 3171 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3689021434316057e-05, |
| "loss": 2.5823, |
| "loss_": 0.9307, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3965, |
| "step": 3178 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3662587964025456e-05, |
| "loss": 2.596, |
| "loss_": 1.0908, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.397, |
| "step": 3185 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.363612490418194e-05, |
| "loss": 2.5583, |
| "loss_": 1.2338, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3968, |
| "step": 3192 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3609632468576997e-05, |
| "loss": 2.5646, |
| "loss_": 1.1325, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3964, |
| "step": 3199 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.358311087123942e-05, |
| "loss": 2.557, |
| "loss_": 1.0712, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3961, |
| "step": 3206 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3556560326433617e-05, |
| "loss": 2.5436, |
| "loss_": 0.9923, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3967, |
| "step": 3213 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.3529981048657846e-05, |
| "loss": 2.5845, |
| "loss_": 1.101, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3964, |
| "step": 3220 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1.35033732526425e-05, |
| "loss": 2.586, |
| "loss_": 1.153, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3963, |
| "step": 3227 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3476737153348363e-05, |
| "loss": 2.5813, |
| "loss_": 1.171, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3963, |
| "step": 3234 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3450072965964878e-05, |
| "loss": 2.5771, |
| "loss_": 1.2221, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.396, |
| "step": 3241 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.342338090590841e-05, |
| "loss": 2.5494, |
| "loss_": 1.0355, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3962, |
| "step": 3248 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3396661188820505e-05, |
| "loss": 2.611, |
| "loss_": 1.2927, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3962, |
| "step": 3255 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3369914030566147e-05, |
| "loss": 2.5692, |
| "loss_": 0.5951, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3963, |
| "step": 3262 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3343139647232008e-05, |
| "loss": 2.566, |
| "loss_": 1.2681, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3958, |
| "step": 3269 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3316338255124708e-05, |
| "loss": 2.562, |
| "loss_": 1.0295, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3965, |
| "step": 3276 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3289510070769074e-05, |
| "loss": 2.5404, |
| "loss_": 1.3584, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3958, |
| "step": 3283 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3262655310906375e-05, |
| "loss": 2.5778, |
| "loss_": 1.018, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3956, |
| "step": 3290 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.323577419249259e-05, |
| "loss": 2.5806, |
| "loss_": 0.9819, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3959, |
| "step": 3297 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 1.3208866932696639e-05, |
| "loss": 2.5737, |
| "loss_": 1.1931, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3961, |
| "step": 3304 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.3181933748898629e-05, |
| "loss": 2.5643, |
| "loss_": 1.1444, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3957, |
| "step": 3311 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.3154974858688121e-05, |
| "loss": 2.5495, |
| "loss_": 1.2428, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3959, |
| "step": 3318 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.3127990479862333e-05, |
| "loss": 2.5653, |
| "loss_": 1.3658, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3957, |
| "step": 3325 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.3100980830424419e-05, |
| "loss": 2.5537, |
| "loss_": 1.2466, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3957, |
| "step": 3332 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.3073946128581685e-05, |
| "loss": 2.5784, |
| "loss_": 1.1899, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3956, |
| "step": 3339 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.3046886592743828e-05, |
| "loss": 2.5491, |
| "loss_": 1.1516, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.396, |
| "step": 3346 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.3019802441521181e-05, |
| "loss": 2.5584, |
| "loss_": 1.3479, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3958, |
| "step": 3353 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.2992693893722939e-05, |
| "loss": 2.5629, |
| "loss_": 1.226, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3956, |
| "step": 3360 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.2965561168355394e-05, |
| "loss": 2.5635, |
| "loss_": 1.2831, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3956, |
| "step": 3367 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.2938404484620169e-05, |
| "loss": 2.5392, |
| "loss_": 1.3104, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3956, |
| "step": 3374 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.2911224061912433e-05, |
| "loss": 2.5353, |
| "loss_": 1.0487, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3953, |
| "step": 3381 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 1.2884020119819152e-05, |
| "loss": 2.5758, |
| "loss_": 0.7415, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3954, |
| "step": 3388 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2856792878117293e-05, |
| "loss": 2.56, |
| "loss_": 1.1296, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3954, |
| "step": 3395 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2829542556772059e-05, |
| "loss": 2.5564, |
| "loss_": 0.8006, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3953, |
| "step": 3402 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2802269375935112e-05, |
| "loss": 2.569, |
| "loss_": 1.131, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3951, |
| "step": 3409 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2774973555942796e-05, |
| "loss": 2.5637, |
| "loss_": 0.9494, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3955, |
| "step": 3416 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2747655317314344e-05, |
| "loss": 2.5588, |
| "loss_": 1.3893, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3956, |
| "step": 3423 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2720314880750118e-05, |
| "loss": 2.562, |
| "loss_": 1.1676, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3955, |
| "step": 3430 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.26929524671298e-05, |
| "loss": 2.5562, |
| "loss_": 0.9704, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3951, |
| "step": 3437 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.266556829751064e-05, |
| "loss": 2.583, |
| "loss_": 1.1975, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3954, |
| "step": 3444 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2638162593125634e-05, |
| "loss": 2.6252, |
| "loss_": 1.1995, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3953, |
| "step": 3451 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2610735575381763e-05, |
| "loss": 2.5464, |
| "loss_": 0.9304, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4035, |
| "step": 3458 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 1.2583287465858197e-05, |
| "loss": 2.5619, |
| "loss_": 1.1987, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3952, |
| "step": 3465 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2555818486304497e-05, |
| "loss": 2.5559, |
| "loss_": 1.2018, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.395, |
| "step": 3472 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2528328858638844e-05, |
| "loss": 2.5436, |
| "loss_": 0.6166, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3952, |
| "step": 3479 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2500818804946211e-05, |
| "loss": 2.5634, |
| "loss_": 1.1188, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3952, |
| "step": 3486 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.247328854747661e-05, |
| "loss": 2.5476, |
| "loss_": 1.1271, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3948, |
| "step": 3493 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2445738308643267e-05, |
| "loss": 2.5728, |
| "loss_": 1.0833, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3952, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2418168311020834e-05, |
| "loss": 2.5511, |
| "loss_": 1.2348, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3951, |
| "step": 3507 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2390578777343594e-05, |
| "loss": 2.5674, |
| "loss_": 1.3258, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3952, |
| "step": 3514 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.236296993050366e-05, |
| "loss": 2.5809, |
| "loss_": 1.2076, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3947, |
| "step": 3521 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2335341993549175e-05, |
| "loss": 2.5583, |
| "loss_": 0.945, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3947, |
| "step": 3528 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2307695189682502e-05, |
| "loss": 2.5778, |
| "loss_": 1.414, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3947, |
| "step": 3535 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2280029742258435e-05, |
| "loss": 2.5572, |
| "loss_": 1.2353, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.395, |
| "step": 3542 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 1.2252345874782376e-05, |
| "loss": 2.5725, |
| "loss_": 1.304, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3949, |
| "step": 3549 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2224643810908556e-05, |
| "loss": 2.5498, |
| "loss_": 1.2018, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.395, |
| "step": 3556 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2196923774438195e-05, |
| "loss": 2.534, |
| "loss_": 1.2461, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3947, |
| "step": 3563 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2169185989317724e-05, |
| "loss": 2.5985, |
| "loss_": 1.173, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3947, |
| "step": 3570 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2141430679636959e-05, |
| "loss": 2.5532, |
| "loss_": 1.2553, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3947, |
| "step": 3577 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.211365806962729e-05, |
| "loss": 2.5379, |
| "loss_": 1.1121, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3946, |
| "step": 3584 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2085868383659882e-05, |
| "loss": 2.5589, |
| "loss_": 1.1214, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3948, |
| "step": 3591 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2058061846243847e-05, |
| "loss": 2.5311, |
| "loss_": 0.9122, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3946, |
| "step": 3598 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2030238682024444e-05, |
| "loss": 2.5311, |
| "loss_": 0.7285, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4021, |
| "step": 3605 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.2002399115781253e-05, |
| "loss": 2.5848, |
| "loss_": 1.1433, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3946, |
| "step": 3612 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.1974543372426363e-05, |
| "loss": 2.5491, |
| "loss_": 0.8086, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3944, |
| "step": 3619 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 1.1946671677002563e-05, |
| "loss": 2.5353, |
| "loss_": 1.1375, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3947, |
| "step": 3626 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1918784254681506e-05, |
| "loss": 2.5366, |
| "loss_": 1.0491, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3944, |
| "step": 3633 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.189088133076191e-05, |
| "loss": 2.5363, |
| "loss_": 1.254, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3945, |
| "step": 3640 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1862963130667724e-05, |
| "loss": 2.5588, |
| "loss_": 1.2051, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3943, |
| "step": 3647 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1835029879946308e-05, |
| "loss": 2.5656, |
| "loss_": 0.9809, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3943, |
| "step": 3654 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1807081804266625e-05, |
| "loss": 2.5597, |
| "loss_": 0.9086, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3946, |
| "step": 3661 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1779119129417394e-05, |
| "loss": 2.5387, |
| "loss_": 0.9881, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4026, |
| "step": 3668 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.175114208130528e-05, |
| "loss": 2.5745, |
| "loss_": 1.1147, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3942, |
| "step": 3675 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1723150885953081e-05, |
| "loss": 2.5314, |
| "loss_": 1.0994, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4017, |
| "step": 3682 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1695145769497871e-05, |
| "loss": 2.5591, |
| "loss_": 1.1917, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3943, |
| "step": 3689 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1667126958189203e-05, |
| "loss": 2.5559, |
| "loss_": 1.122, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3944, |
| "step": 3696 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 1.1639094678387268e-05, |
| "loss": 2.5315, |
| "loss_": 1.3327, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3941, |
| "step": 3703 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1611049156561055e-05, |
| "loss": 2.5537, |
| "loss_": 1.1127, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.394, |
| "step": 3710 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1582990619286555e-05, |
| "loss": 2.5667, |
| "loss_": 1.1163, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3942, |
| "step": 3717 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1554919293244885e-05, |
| "loss": 2.5407, |
| "loss_": 1.3315, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3944, |
| "step": 3724 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1526835405220503e-05, |
| "loss": 2.5515, |
| "loss_": 0.9382, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3944, |
| "step": 3731 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1498739182099335e-05, |
| "loss": 2.5467, |
| "loss_": 0.94, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3941, |
| "step": 3738 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1470630850866966e-05, |
| "loss": 2.5174, |
| "loss_": 1.0814, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4014, |
| "step": 3745 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1442510638606813e-05, |
| "loss": 2.5437, |
| "loss_": 1.1433, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3939, |
| "step": 3752 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.141437877249826e-05, |
| "loss": 2.561, |
| "loss_": 1.0612, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3941, |
| "step": 3759 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1386235479814856e-05, |
| "loss": 2.5184, |
| "loss_": 1.009, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.394, |
| "step": 3766 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1358080987922452e-05, |
| "loss": 2.5491, |
| "loss_": 1.0159, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.394, |
| "step": 3773 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1329915524277384e-05, |
| "loss": 2.5448, |
| "loss_": 1.2692, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3943, |
| "step": 3780 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 1.1301739316424623e-05, |
| "loss": 2.5624, |
| "loss_": 1.1014, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3936, |
| "step": 3787 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1273552591995943e-05, |
| "loss": 2.5418, |
| "loss_": 1.3636, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3938, |
| "step": 3794 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.124535557870808e-05, |
| "loss": 2.5431, |
| "loss_": 1.0197, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3938, |
| "step": 3801 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1217148504360885e-05, |
| "loss": 2.5773, |
| "loss_": 1.4614, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3941, |
| "step": 3808 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1188931596835509e-05, |
| "loss": 2.562, |
| "loss_": 0.9753, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3934, |
| "step": 3815 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1160705084092526e-05, |
| "loss": 2.5293, |
| "loss_": 1.1196, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3936, |
| "step": 3822 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1132469194170117e-05, |
| "loss": 2.5569, |
| "loss_": 1.3226, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3939, |
| "step": 3829 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1104224155182215e-05, |
| "loss": 2.5512, |
| "loss_": 1.1311, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3939, |
| "step": 3836 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.1075970195316677e-05, |
| "loss": 2.5509, |
| "loss_": 1.1204, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3938, |
| "step": 3843 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.104770754283342e-05, |
| "loss": 2.5397, |
| "loss_": 1.1743, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3936, |
| "step": 3850 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.101943642606259e-05, |
| "loss": 2.5674, |
| "loss_": 1.3249, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3936, |
| "step": 3857 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 1.0991157073402723e-05, |
| "loss": 2.5645, |
| "loss_": 0.9719, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3936, |
| "step": 3864 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.096286971331888e-05, |
| "loss": 2.5283, |
| "loss_": 1.0872, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3938, |
| "step": 3871 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0934574574340821e-05, |
| "loss": 2.5598, |
| "loss_": 1.186, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3934, |
| "step": 3878 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0906271885061149e-05, |
| "loss": 2.5509, |
| "loss_": 1.0989, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3936, |
| "step": 3885 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0877961874133458e-05, |
| "loss": 2.5613, |
| "loss_": 1.1127, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3937, |
| "step": 3892 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0849644770270502e-05, |
| "loss": 2.5592, |
| "loss_": 1.1666, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3934, |
| "step": 3899 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0821320802242335e-05, |
| "loss": 2.5586, |
| "loss_": 1.2445, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3936, |
| "step": 3906 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0792990198874462e-05, |
| "loss": 2.5441, |
| "loss_": 0.8492, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3937, |
| "step": 3913 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0764653189046002e-05, |
| "loss": 2.5834, |
| "loss_": 1.3096, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3932, |
| "step": 3920 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.073631000168782e-05, |
| "loss": 2.5308, |
| "loss_": 1.0803, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3936, |
| "step": 3927 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0707960865780697e-05, |
| "loss": 2.5575, |
| "loss_": 1.2032, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3936, |
| "step": 3934 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0679606010353467e-05, |
| "loss": 2.5341, |
| "loss_": 1.1694, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3932, |
| "step": 3941 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 1.0651245664481176e-05, |
| "loss": 2.5644, |
| "loss_": 0.8158, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4, |
| "step": 3948 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.062288005728322e-05, |
| "loss": 2.5545, |
| "loss_": 0.8523, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.3993, |
| "step": 3955 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0594509417921505e-05, |
| "loss": 2.5287, |
| "loss_": 1.0746, |
| "moe_loss": 0.1604, |
| "moe_loss_longrong": 1.4, |
| "step": 3962 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0566133975598592e-05, |
| "loss": 2.5596, |
| "loss_": 0.8816, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.393, |
| "step": 3969 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0537753959555844e-05, |
| "loss": 2.5464, |
| "loss_": 1.315, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3936, |
| "step": 3976 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0509369599071563e-05, |
| "loss": 2.5213, |
| "loss_": 1.2024, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.4002, |
| "step": 3983 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0480981123459175e-05, |
| "loss": 2.5931, |
| "loss_": 1.2043, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3931, |
| "step": 3990 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0452588762065323e-05, |
| "loss": 2.5559, |
| "loss_": 1.1557, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3932, |
| "step": 3997 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0424192744268063e-05, |
| "loss": 2.5297, |
| "loss_": 1.2939, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3929, |
| "step": 4004 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0395793299474979e-05, |
| "loss": 2.5487, |
| "loss_": 0.9489, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.393, |
| "step": 4011 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.0367390657121346e-05, |
| "loss": 2.5221, |
| "loss_": 1.0307, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3933, |
| "step": 4018 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 1.033898504666827e-05, |
| "loss": 2.5383, |
| "loss_": 1.0582, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.393, |
| "step": 4025 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.031057669760084e-05, |
| "loss": 2.5864, |
| "loss_": 1.1518, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3933, |
| "step": 4032 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0282165839426268e-05, |
| "loss": 2.5311, |
| "loss_": 1.1738, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3931, |
| "step": 4039 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0253752701672033e-05, |
| "loss": 2.5819, |
| "loss_": 1.1982, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.393, |
| "step": 4046 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.022533751388403e-05, |
| "loss": 2.5437, |
| "loss_": 1.288, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3929, |
| "step": 4053 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0196920505624726e-05, |
| "loss": 2.5305, |
| "loss_": 1.1663, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3928, |
| "step": 4060 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0168501906471284e-05, |
| "loss": 2.5629, |
| "loss_": 1.1848, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.393, |
| "step": 4067 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.014008194601372e-05, |
| "loss": 2.5355, |
| "loss_": 1.0939, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.393, |
| "step": 4074 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0111660853853056e-05, |
| "loss": 2.5297, |
| "loss_": 0.9667, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.393, |
| "step": 4081 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0083238859599453e-05, |
| "loss": 2.5437, |
| "loss_": 0.7903, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.3992, |
| "step": 4088 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.005481619287036e-05, |
| "loss": 2.5958, |
| "loss_": 1.1979, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3927, |
| "step": 4095 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 1.0026393083288659e-05, |
| "loss": 2.5676, |
| "loss_": 1.1374, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3926, |
| "step": 4102 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.997969760480802e-06, |
| "loss": 2.5415, |
| "loss_": 1.132, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3929, |
| "step": 4109 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.969546454074977e-06, |
| "loss": 2.5337, |
| "loss_": 1.0972, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3929, |
| "step": 4116 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.941123393699235e-06, |
| "loss": 2.5709, |
| "loss_": 1.0257, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3931, |
| "step": 4123 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.912700808979632e-06, |
| "loss": 2.5593, |
| "loss_": 1.1574, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3928, |
| "step": 4130 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.884278929538387e-06, |
| "loss": 2.5532, |
| "loss_": 1.0852, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3928, |
| "step": 4137 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.855857984992026e-06, |
| "loss": 2.544, |
| "loss_": 1.2876, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3928, |
| "step": 4144 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.82743820494951e-06, |
| "loss": 2.5251, |
| "loss_": 1.0657, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3925, |
| "step": 4151 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.799019819010405e-06, |
| "loss": 2.5677, |
| "loss_": 1.0093, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3927, |
| "step": 4158 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.770603056763009e-06, |
| "loss": 2.5323, |
| "loss_": 1.0455, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3927, |
| "step": 4165 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.742188147782494e-06, |
| "loss": 2.5443, |
| "loss_": 1.2455, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3927, |
| "step": 4172 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.713775321629073e-06, |
| "loss": 2.5462, |
| "loss_": 0.9975, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3928, |
| "step": 4179 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.685364807846127e-06, |
| "loss": 2.5703, |
| "loss_": 1.3459, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3925, |
| "step": 4186 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.656956835958356e-06, |
| "loss": 2.5182, |
| "loss_": 1.0759, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3927, |
| "step": 4193 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.628551635469918e-06, |
| "loss": 2.566, |
| "loss_": 1.248, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3924, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.600149435862593e-06, |
| "loss": 2.5279, |
| "loss_": 1.1282, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3925, |
| "step": 4207 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.571750466593912e-06, |
| "loss": 2.5369, |
| "loss_": 0.8771, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3924, |
| "step": 4214 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.543354957095299e-06, |
| "loss": 2.5527, |
| "loss_": 1.0412, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3926, |
| "step": 4221 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.514963136770242e-06, |
| "loss": 2.5574, |
| "loss_": 0.977, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3923, |
| "step": 4228 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.486575234992423e-06, |
| "loss": 2.5446, |
| "loss_": 1.2368, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3924, |
| "step": 4235 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.45819148110385e-06, |
| "loss": 2.5138, |
| "loss_": 1.0194, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3925, |
| "step": 4242 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.429812104413042e-06, |
| "loss": 2.5362, |
| "loss_": 0.59, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3924, |
| "step": 4249 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.401437334193143e-06, |
| "loss": 2.5026, |
| "loss_": 1.1442, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3923, |
| "step": 4256 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.373067399680084e-06, |
| "loss": 2.4977, |
| "loss_": 1.0847, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3925, |
| "step": 4263 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.344702530070729e-06, |
| "loss": 2.5063, |
| "loss_": 1.2399, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3926, |
| "step": 4270 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.316342954521028e-06, |
| "loss": 2.5195, |
| "loss_": 1.0512, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.392, |
| "step": 4277 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.287988902144157e-06, |
| "loss": 2.5811, |
| "loss_": 1.2023, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3926, |
| "step": 4284 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.259640602008667e-06, |
| "loss": 2.5491, |
| "loss_": 0.9779, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3922, |
| "step": 4291 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.231298283136641e-06, |
| "loss": 2.5653, |
| "loss_": 1.156, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3923, |
| "step": 4298 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.202962174501848e-06, |
| "loss": 2.5369, |
| "loss_": 1.1885, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3924, |
| "step": 4305 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.17463250502787e-06, |
| "loss": 2.5003, |
| "loss_": 1.1876, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.392, |
| "step": 4312 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.146309503586282e-06, |
| "loss": 2.5501, |
| "loss_": 1.0059, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.3976, |
| "step": 4319 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.117993398994784e-06, |
| "loss": 2.5294, |
| "loss_": 1.2092, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.392, |
| "step": 4326 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.089684420015346e-06, |
| "loss": 2.5311, |
| "loss_": 1.092, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3922, |
| "step": 4333 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.06138279535239e-06, |
| "loss": 2.5313, |
| "loss_": 1.3094, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.392, |
| "step": 4340 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.033088753650918e-06, |
| "loss": 2.5423, |
| "loss_": 0.8346, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3921, |
| "step": 4347 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 9.004802523494655e-06, |
| "loss": 2.5419, |
| "loss_": 0.9882, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3922, |
| "step": 4354 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.976524333404238e-06, |
| "loss": 2.5791, |
| "loss_": 1.1859, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3921, |
| "step": 4361 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.94825441183534e-06, |
| "loss": 2.5575, |
| "loss_": 1.104, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3921, |
| "step": 4368 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.919992987176836e-06, |
| "loss": 2.5297, |
| "loss_": 1.0182, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3922, |
| "step": 4375 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.891740287748952e-06, |
| "loss": 2.5778, |
| "loss_": 1.0814, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3922, |
| "step": 4382 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.863496541801424e-06, |
| "loss": 2.5495, |
| "loss_": 1.171, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.392, |
| "step": 4389 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.835261977511666e-06, |
| "loss": 2.5199, |
| "loss_": 1.1374, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.392, |
| "step": 4396 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.807036822982892e-06, |
| "loss": 2.5528, |
| "loss_": 1.2331, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3919, |
| "step": 4403 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.778821306242318e-06, |
| "loss": 2.5504, |
| "loss_": 1.3552, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3921, |
| "step": 4410 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.750615655239287e-06, |
| "loss": 2.5511, |
| "loss_": 1.2613, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3918, |
| "step": 4417 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 8.722420097843437e-06, |
| "loss": 2.5019, |
| "loss_": 0.9546, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3919, |
| "step": 4424 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.694234861842865e-06, |
| "loss": 2.5351, |
| "loss_": 1.1331, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3919, |
| "step": 4431 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.66606017494228e-06, |
| "loss": 2.5412, |
| "loss_": 1.2775, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3921, |
| "step": 4438 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.637896264761176e-06, |
| "loss": 2.4963, |
| "loss_": 0.7466, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.3971, |
| "step": 4445 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.609743358831965e-06, |
| "loss": 2.5192, |
| "loss_": 1.0139, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3918, |
| "step": 4452 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.58160168459817e-06, |
| "loss": 2.5407, |
| "loss_": 1.0999, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3918, |
| "step": 4459 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.553471469412577e-06, |
| "loss": 2.5692, |
| "loss_": 1.1933, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3917, |
| "step": 4466 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.525352940535381e-06, |
| "loss": 2.5063, |
| "loss_": 0.9695, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.392, |
| "step": 4473 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.497246325132382e-06, |
| "loss": 2.5941, |
| "loss_": 1.288, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3919, |
| "step": 4480 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.469151850273124e-06, |
| "loss": 2.5543, |
| "loss_": 1.1849, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.392, |
| "step": 4487 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.441069742929069e-06, |
| "loss": 2.5168, |
| "loss_": 1.1032, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3917, |
| "step": 4494 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 8.413000229971765e-06, |
| "loss": 2.5085, |
| "loss_": 1.1973, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3917, |
| "step": 4501 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.384943538171017e-06, |
| "loss": 2.5283, |
| "loss_": 1.2194, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3918, |
| "step": 4508 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.356899894193038e-06, |
| "loss": 2.5342, |
| "loss_": 1.176, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3917, |
| "step": 4515 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.328869524598635e-06, |
| "loss": 2.5705, |
| "loss_": 1.1662, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3916, |
| "step": 4522 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.300852655841378e-06, |
| "loss": 2.5423, |
| "loss_": 1.0822, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3916, |
| "step": 4529 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.272849514265763e-06, |
| "loss": 2.5792, |
| "loss_": 1.1001, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3915, |
| "step": 4536 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.244860326105378e-06, |
| "loss": 2.539, |
| "loss_": 1.3069, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3916, |
| "step": 4543 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.216885317481091e-06, |
| "loss": 2.5393, |
| "loss_": 1.0325, |
| "moe_loss": 0.1603, |
| "moe_loss_longrong": 1.3971, |
| "step": 4550 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.188924714399222e-06, |
| "loss": 2.5338, |
| "loss_": 1.0938, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3919, |
| "step": 4557 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.160978742749692e-06, |
| "loss": 2.5578, |
| "loss_": 0.9998, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3916, |
| "step": 4564 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.133047628304229e-06, |
| "loss": 2.5287, |
| "loss_": 1.1445, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3915, |
| "step": 4571 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.105131596714538e-06, |
| "loss": 2.5354, |
| "loss_": 1.0362, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3915, |
| "step": 4578 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 8.077230873510452e-06, |
| "loss": 2.548, |
| "loss_": 1.0803, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3916, |
| "step": 4585 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 8.049345684098148e-06, |
| "loss": 2.5192, |
| "loss_": 1.1937, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3916, |
| "step": 4592 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 8.021476253758303e-06, |
| "loss": 2.5454, |
| "loss_": 1.2712, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3914, |
| "step": 4599 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.99362280764427e-06, |
| "loss": 2.5142, |
| "loss_": 1.3818, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3918, |
| "step": 4606 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.965785570780275e-06, |
| "loss": 2.5291, |
| "loss_": 1.2159, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3915, |
| "step": 4613 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.937964768059592e-06, |
| "loss": 2.5456, |
| "loss_": 1.0392, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3915, |
| "step": 4620 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.91016062424273e-06, |
| "loss": 2.541, |
| "loss_": 1.2045, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3915, |
| "step": 4627 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.882373363955597e-06, |
| "loss": 2.5365, |
| "loss_": 1.1843, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3914, |
| "step": 4634 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.854603211687715e-06, |
| "loss": 2.5216, |
| "loss_": 1.0943, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3914, |
| "step": 4641 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.826850391790393e-06, |
| "loss": 2.4891, |
| "loss_": 0.9685, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3913, |
| "step": 4648 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.799115128474907e-06, |
| "loss": 2.5239, |
| "loss_": 1.093, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3914, |
| "step": 4655 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 7.771397645810699e-06, |
| "loss": 2.5494, |
| "loss_": 1.4255, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3915, |
| "step": 4662 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.743698167723568e-06, |
| "loss": 2.5264, |
| "loss_": 1.3261, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3915, |
| "step": 4669 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.716016917993843e-06, |
| "loss": 2.5483, |
| "loss_": 1.047, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3912, |
| "step": 4676 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.688354120254606e-06, |
| "loss": 2.5823, |
| "loss_": 1.3127, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3914, |
| "step": 4683 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.660709997989855e-06, |
| "loss": 2.6013, |
| "loss_": 1.2204, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3914, |
| "step": 4690 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.633084774532717e-06, |
| "loss": 2.5238, |
| "loss_": 0.9103, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3915, |
| "step": 4697 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.605478673063635e-06, |
| "loss": 2.5269, |
| "loss_": 1.2055, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3914, |
| "step": 4704 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.577891916608574e-06, |
| "loss": 2.5042, |
| "loss_": 1.0835, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3913, |
| "step": 4711 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.5503247280372104e-06, |
| "loss": 2.5373, |
| "loss_": 0.7241, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3914, |
| "step": 4718 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.522777330061126e-06, |
| "loss": 2.562, |
| "loss_": 1.3803, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3916, |
| "step": 4725 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.495249945232028e-06, |
| "loss": 2.5934, |
| "loss_": 0.7858, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3914, |
| "step": 4732 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.467742795939941e-06, |
| "loss": 2.5437, |
| "loss_": 1.304, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3913, |
| "step": 4739 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 7.440256104411394e-06, |
| "loss": 2.5461, |
| "loss_": 1.188, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3913, |
| "step": 4746 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.4127900927076575e-06, |
| "loss": 2.538, |
| "loss_": 1.0811, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3912, |
| "step": 4753 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.385344982722928e-06, |
| "loss": 2.5834, |
| "loss_": 1.0429, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3911, |
| "step": 4760 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.3579209961825346e-06, |
| "loss": 2.5379, |
| "loss_": 1.0402, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3913, |
| "step": 4767 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.330518354641156e-06, |
| "loss": 2.5444, |
| "loss_": 0.9376, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3912, |
| "step": 4774 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.303137279481034e-06, |
| "loss": 2.507, |
| "loss_": 1.0915, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3914, |
| "step": 4781 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.275777991910164e-06, |
| "loss": 2.4976, |
| "loss_": 1.0431, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3912, |
| "step": 4788 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.248440712960535e-06, |
| "loss": 2.5333, |
| "loss_": 1.1329, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3911, |
| "step": 4795 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.2211256634863255e-06, |
| "loss": 2.5446, |
| "loss_": 1.0213, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3914, |
| "step": 4802 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.1938330641621316e-06, |
| "loss": 2.5385, |
| "loss_": 1.0947, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.391, |
| "step": 4809 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.166563135481166e-06, |
| "loss": 2.5381, |
| "loss_": 1.0597, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3911, |
| "step": 4816 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 7.139316097753499e-06, |
| "loss": 2.5394, |
| "loss_": 1.2727, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3911, |
| "step": 4823 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 7.112092171104268e-06, |
| "loss": 2.5323, |
| "loss_": 0.9192, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.391, |
| "step": 4830 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 7.084891575471885e-06, |
| "loss": 2.5276, |
| "loss_": 1.0647, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3913, |
| "step": 4837 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 7.05771453060629e-06, |
| "loss": 2.5388, |
| "loss_": 1.02, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3912, |
| "step": 4844 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 7.030561256067159e-06, |
| "loss": 2.5259, |
| "loss_": 1.1344, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.391, |
| "step": 4851 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 7.003431971222115e-06, |
| "loss": 2.5743, |
| "loss_": 1.1341, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3913, |
| "step": 4858 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.976326895244987e-06, |
| "loss": 2.5281, |
| "loss_": 1.0979, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3909, |
| "step": 4865 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.949246247114019e-06, |
| "loss": 2.5364, |
| "loss_": 1.1133, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.391, |
| "step": 4872 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.922190245610106e-06, |
| "loss": 2.4967, |
| "loss_": 1.1036, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3909, |
| "step": 4879 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.895159109315022e-06, |
| "loss": 2.525, |
| "loss_": 1.0468, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3951, |
| "step": 4886 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.868153056609665e-06, |
| "loss": 2.5504, |
| "loss_": 1.0107, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3912, |
| "step": 4893 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 6.841172305672289e-06, |
| "loss": 2.5496, |
| "loss_": 1.1926, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3909, |
| "step": 4900 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.814217074476721e-06, |
| "loss": 2.5328, |
| "loss_": 1.2163, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3909, |
| "step": 4907 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.787287580790634e-06, |
| "loss": 2.5072, |
| "loss_": 0.9685, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.391, |
| "step": 4914 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.760384042173769e-06, |
| "loss": 2.5442, |
| "loss_": 0.9827, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.391, |
| "step": 4921 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.733506675976171e-06, |
| "loss": 2.5335, |
| "loss_": 0.9915, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3911, |
| "step": 4928 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.7066556993364525e-06, |
| "loss": 2.5435, |
| "loss_": 1.1737, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3908, |
| "step": 4935 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.679831329180025e-06, |
| "loss": 2.5157, |
| "loss_": 0.8595, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.391, |
| "step": 4942 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.653033782217337e-06, |
| "loss": 2.518, |
| "loss_": 1.1064, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3908, |
| "step": 4949 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.626263274942157e-06, |
| "loss": 2.5186, |
| "loss_": 1.2309, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3908, |
| "step": 4956 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.599520023629789e-06, |
| "loss": 2.531, |
| "loss_": 1.2354, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.391, |
| "step": 4963 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.572804244335349e-06, |
| "loss": 2.5529, |
| "loss_": 1.056, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3909, |
| "step": 4970 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.546116152891998e-06, |
| "loss": 2.5579, |
| "loss_": 1.0829, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.391, |
| "step": 4977 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 6.519455964909223e-06, |
| "loss": 2.5493, |
| "loss_": 1.1124, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3908, |
| "step": 4984 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.492823895771077e-06, |
| "loss": 2.5263, |
| "loss_": 0.9684, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.395, |
| "step": 4991 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.466220160634444e-06, |
| "loss": 2.5259, |
| "loss_": 1.2248, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.391, |
| "step": 4998 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.439644974427304e-06, |
| "loss": 2.5509, |
| "loss_": 0.9338, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.391, |
| "step": 5005 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.4130985518469965e-06, |
| "loss": 2.5509, |
| "loss_": 1.1724, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3907, |
| "step": 5012 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.386581107358473e-06, |
| "loss": 2.5498, |
| "loss_": 1.2157, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.391, |
| "step": 5019 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.360092855192586e-06, |
| "loss": 2.5417, |
| "loss_": 1.0377, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3907, |
| "step": 5026 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.3336340093443424e-06, |
| "loss": 2.527, |
| "loss_": 1.2523, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3908, |
| "step": 5033 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.307204783571179e-06, |
| "loss": 2.523, |
| "loss_": 0.7425, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3908, |
| "step": 5040 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.280805391391238e-06, |
| "loss": 2.5512, |
| "loss_": 1.2265, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3906, |
| "step": 5047 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.254436046081641e-06, |
| "loss": 2.5116, |
| "loss_": 1.2887, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3908, |
| "step": 5054 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 6.228096960676764e-06, |
| "loss": 2.5107, |
| "loss_": 0.8449, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3907, |
| "step": 5061 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.201788347966511e-06, |
| "loss": 2.5181, |
| "loss_": 1.1947, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3908, |
| "step": 5068 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.175510420494609e-06, |
| "loss": 2.5637, |
| "loss_": 1.3614, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3909, |
| "step": 5075 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.149263390556887e-06, |
| "loss": 2.5147, |
| "loss_": 1.0484, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3906, |
| "step": 5082 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.123047470199539e-06, |
| "loss": 2.5507, |
| "loss_": 1.3089, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3906, |
| "step": 5089 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.096862871217448e-06, |
| "loss": 2.5643, |
| "loss_": 1.0995, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3907, |
| "step": 5096 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.070709805152451e-06, |
| "loss": 2.5202, |
| "loss_": 1.3114, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3907, |
| "step": 5103 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.044588483291625e-06, |
| "loss": 2.5343, |
| "loss_": 1.2697, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3908, |
| "step": 5110 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 6.018499116665603e-06, |
| "loss": 2.5169, |
| "loss_": 1.2687, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3906, |
| "step": 5117 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 5.9924419160468515e-06, |
| "loss": 2.5049, |
| "loss_": 0.9986, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3907, |
| "step": 5124 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 5.966417091947965e-06, |
| "loss": 2.5498, |
| "loss_": 1.1027, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3907, |
| "step": 5131 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 5.9404248546199795e-06, |
| "loss": 2.5273, |
| "loss_": 1.1325, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3906, |
| "step": 5138 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 5.914465414050669e-06, |
| "loss": 2.5246, |
| "loss_": 1.1098, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3906, |
| "step": 5145 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.888538979962843e-06, |
| "loss": 2.5145, |
| "loss_": 1.1524, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3905, |
| "step": 5152 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.862645761812655e-06, |
| "loss": 2.5404, |
| "loss_": 1.3356, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3907, |
| "step": 5159 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.836785968787915e-06, |
| "loss": 2.5027, |
| "loss_": 1.0651, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3907, |
| "step": 5166 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.810959809806396e-06, |
| "loss": 2.5426, |
| "loss_": 1.0368, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3904, |
| "step": 5173 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.785167493514137e-06, |
| "loss": 2.5547, |
| "loss_": 1.1137, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3908, |
| "step": 5180 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.759409228283779e-06, |
| "loss": 2.5616, |
| "loss_": 1.0141, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3904, |
| "step": 5187 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.733685222212868e-06, |
| "loss": 2.5659, |
| "loss_": 1.0579, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3906, |
| "step": 5194 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.7079956831221616e-06, |
| "loss": 2.5385, |
| "loss_": 1.1832, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3906, |
| "step": 5201 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.682340818553978e-06, |
| "loss": 2.5505, |
| "loss_": 0.9514, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3907, |
| "step": 5208 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.656720835770499e-06, |
| "loss": 2.5296, |
| "loss_": 1.1111, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3907, |
| "step": 5215 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 5.6311359417520975e-06, |
| "loss": 2.556, |
| "loss_": 1.1038, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3906, |
| "step": 5222 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.605586343195676e-06, |
| "loss": 2.5203, |
| "loss_": 1.0794, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3905, |
| "step": 5229 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.580072246512984e-06, |
| "loss": 2.531, |
| "loss_": 1.1714, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3905, |
| "step": 5236 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.5545938578289626e-06, |
| "loss": 2.5175, |
| "loss_": 1.0077, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3904, |
| "step": 5243 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.529151382980065e-06, |
| "loss": 2.5567, |
| "loss_": 0.9865, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3903, |
| "step": 5250 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.503745027512608e-06, |
| "loss": 2.5494, |
| "loss_": 1.1312, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3903, |
| "step": 5257 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.478374996681104e-06, |
| "loss": 2.511, |
| "loss_": 0.957, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3903, |
| "step": 5264 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.453041495446596e-06, |
| "loss": 2.5376, |
| "loss_": 1.1284, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.395, |
| "step": 5271 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.427744728475016e-06, |
| "loss": 2.519, |
| "loss_": 1.1458, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3904, |
| "step": 5278 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.40248490013553e-06, |
| "loss": 2.5264, |
| "loss_": 1.0995, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3906, |
| "step": 5285 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.3772622144988665e-06, |
| "loss": 2.5051, |
| "loss_": 1.1345, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3904, |
| "step": 5292 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 5.352076875335697e-06, |
| "loss": 2.5742, |
| "loss_": 0.9607, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3904, |
| "step": 5299 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.326929086114972e-06, |
| "loss": 2.5419, |
| "loss_": 0.9965, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3903, |
| "step": 5306 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.30181905000228e-06, |
| "loss": 2.5681, |
| "loss_": 0.7612, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3902, |
| "step": 5313 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.276746969858204e-06, |
| "loss": 2.537, |
| "loss_": 1.0697, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3903, |
| "step": 5320 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.251713048236691e-06, |
| "loss": 2.5471, |
| "loss_": 1.0796, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3903, |
| "step": 5327 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.226717487383414e-06, |
| "loss": 2.51, |
| "loss_": 1.2432, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3903, |
| "step": 5334 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.20176048923412e-06, |
| "loss": 2.5478, |
| "loss_": 1.0954, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3904, |
| "step": 5341 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.176842255413028e-06, |
| "loss": 2.5248, |
| "loss_": 1.106, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3903, |
| "step": 5348 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.151962987231179e-06, |
| "loss": 2.5251, |
| "loss_": 1.1429, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3904, |
| "step": 5355 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.127122885684815e-06, |
| "loss": 2.5393, |
| "loss_": 1.047, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3903, |
| "step": 5362 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.102322151453759e-06, |
| "loss": 2.5347, |
| "loss_": 1.1776, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3904, |
| "step": 5369 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.077560984899794e-06, |
| "loss": 2.5264, |
| "loss_": 1.1651, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3903, |
| "step": 5376 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 5.052839586065027e-06, |
| "loss": 2.5453, |
| "loss_": 1.2535, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3903, |
| "step": 5383 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 5.028158154670302e-06, |
| "loss": 2.5428, |
| "loss_": 1.1274, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3905, |
| "step": 5390 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 5.003516890113563e-06, |
| "loss": 2.5141, |
| "loss_": 1.1251, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3903, |
| "step": 5397 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.978915991468262e-06, |
| "loss": 2.5363, |
| "loss_": 1.1698, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3903, |
| "step": 5404 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.954355657481722e-06, |
| "loss": 2.5367, |
| "loss_": 1.1349, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3902, |
| "step": 5411 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.929836086573566e-06, |
| "loss": 2.5367, |
| "loss_": 1.2559, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3903, |
| "step": 5418 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.905357476834095e-06, |
| "loss": 2.5303, |
| "loss_": 1.0117, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3902, |
| "step": 5425 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.88092002602268e-06, |
| "loss": 2.5277, |
| "loss_": 1.118, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3902, |
| "step": 5432 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.856523931566184e-06, |
| "loss": 2.5355, |
| "loss_": 1.2879, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3904, |
| "step": 5439 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.832169390557357e-06, |
| "loss": 2.5615, |
| "loss_": 0.9215, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3902, |
| "step": 5446 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.807856599753243e-06, |
| "loss": 2.5715, |
| "loss_": 1.0914, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3902, |
| "step": 5453 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.783585755573589e-06, |
| "loss": 2.5301, |
| "loss_": 1.1468, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3902, |
| "step": 5460 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.75935705409927e-06, |
| "loss": 2.5139, |
| "loss_": 1.2252, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3902, |
| "step": 5467 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.735170691070679e-06, |
| "loss": 2.5219, |
| "loss_": 0.8784, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3902, |
| "step": 5474 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.711026861886176e-06, |
| "loss": 2.5056, |
| "loss_": 1.276, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3902, |
| "step": 5481 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.686925761600496e-06, |
| "loss": 2.5303, |
| "loss_": 1.0171, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3902, |
| "step": 5488 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.662867584923169e-06, |
| "loss": 2.5533, |
| "loss_": 1.2451, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3901, |
| "step": 5495 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.638852526216947e-06, |
| "loss": 2.5456, |
| "loss_": 1.0837, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3901, |
| "step": 5502 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.614880779496244e-06, |
| "loss": 2.559, |
| "loss_": 1.108, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.39, |
| "step": 5509 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.590952538425563e-06, |
| "loss": 2.5412, |
| "loss_": 1.1554, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3901, |
| "step": 5516 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.567067996317922e-06, |
| "loss": 2.5085, |
| "loss_": 1.0805, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.39, |
| "step": 5523 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.543227346133312e-06, |
| "loss": 2.5361, |
| "loss_": 0.7085, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.39, |
| "step": 5530 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.519430780477124e-06, |
| "loss": 2.535, |
| "loss_": 1.2076, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3903, |
| "step": 5537 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.495678491598587e-06, |
| "loss": 2.5142, |
| "loss_": 0.8921, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3901, |
| "step": 5544 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.471970671389237e-06, |
| "loss": 2.4935, |
| "loss_": 0.8663, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3901, |
| "step": 5551 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.4483075113813445e-06, |
| "loss": 2.5257, |
| "loss_": 1.1068, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3899, |
| "step": 5558 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.4246892027463815e-06, |
| "loss": 2.5516, |
| "loss_": 1.3583, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3901, |
| "step": 5565 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.401115936293468e-06, |
| "loss": 2.5143, |
| "loss_": 1.2772, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.39, |
| "step": 5572 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.377587902467841e-06, |
| "loss": 2.5213, |
| "loss_": 1.0711, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3901, |
| "step": 5579 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.354105291349301e-06, |
| "loss": 2.551, |
| "loss_": 1.0323, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.39, |
| "step": 5586 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.330668292650686e-06, |
| "loss": 2.556, |
| "loss_": 1.2594, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.39, |
| "step": 5593 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.3072770957163415e-06, |
| "loss": 2.5254, |
| "loss_": 1.1564, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3901, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.283931889520587e-06, |
| "loss": 2.5109, |
| "loss_": 1.099, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3901, |
| "step": 5607 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.260632862666181e-06, |
| "loss": 2.5028, |
| "loss_": 1.2475, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3901, |
| "step": 5614 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.237380203382815e-06, |
| "loss": 2.5318, |
| "loss_": 1.149, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3899, |
| "step": 5621 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.214174099525581e-06, |
| "loss": 2.5268, |
| "loss_": 1.2824, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3901, |
| "step": 5628 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.191014738573448e-06, |
| "loss": 2.5064, |
| "loss_": 1.0164, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3901, |
| "step": 5635 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.1679023076277644e-06, |
| "loss": 2.5413, |
| "loss_": 1.1083, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3902, |
| "step": 5642 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.144836993410739e-06, |
| "loss": 2.5067, |
| "loss_": 1.174, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.39, |
| "step": 5649 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.12181898226392e-06, |
| "loss": 2.5303, |
| "loss_": 1.3489, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.39, |
| "step": 5656 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.098848460146709e-06, |
| "loss": 2.5134, |
| "loss_": 1.2256, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.39, |
| "step": 5663 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.07592561263485e-06, |
| "loss": 2.5352, |
| "loss_": 1.0677, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3897, |
| "step": 5670 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.053050624918927e-06, |
| "loss": 2.5389, |
| "loss_": 1.3495, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3902, |
| "step": 5677 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.030223681802873e-06, |
| "loss": 2.5214, |
| "loss_": 0.9889, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3899, |
| "step": 5684 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.007444967702475e-06, |
| "loss": 2.5118, |
| "loss_": 0.9998, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3901, |
| "step": 5691 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 3.984714666643887e-06, |
| "loss": 2.5307, |
| "loss_": 1.176, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3899, |
| "step": 5698 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.962032962262132e-06, |
| "loss": 2.5366, |
| "loss_": 1.0218, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3898, |
| "step": 5705 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.9394000377996355e-06, |
| "loss": 2.5117, |
| "loss_": 0.8796, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3942, |
| "step": 5712 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.916816076104737e-06, |
| "loss": 2.5142, |
| "loss_": 0.9879, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3941, |
| "step": 5719 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.894281259630203e-06, |
| "loss": 2.505, |
| "loss_": 1.0209, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.39, |
| "step": 5726 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.871795770431772e-06, |
| "loss": 2.547, |
| "loss_": 1.0637, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.39, |
| "step": 5733 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.84935979016667e-06, |
| "loss": 2.5078, |
| "loss_": 1.1125, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.39, |
| "step": 5740 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.826973500092153e-06, |
| "loss": 2.5075, |
| "loss_": 1.0352, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.39, |
| "step": 5747 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.8046370810640223e-06, |
| "loss": 2.5161, |
| "loss_": 1.167, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3899, |
| "step": 5754 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.782350713535192e-06, |
| "loss": 2.5364, |
| "loss_": 1.0836, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.39, |
| "step": 5761 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.760114577554216e-06, |
| "loss": 2.5025, |
| "loss_": 1.31, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3898, |
| "step": 5768 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.7379288527638203e-06, |
| "loss": 2.4932, |
| "loss_": 1.139, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3899, |
| "step": 5775 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.715793718399482e-06, |
| "loss": 2.5266, |
| "loss_": 1.0568, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3899, |
| "step": 5782 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.6937093532879576e-06, |
| "loss": 2.4954, |
| "loss_": 1.087, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3899, |
| "step": 5789 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.6716759358458467e-06, |
| "loss": 2.5337, |
| "loss_": 1.1604, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3899, |
| "step": 5796 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.6496936440781496e-06, |
| "loss": 2.5421, |
| "loss_": 1.2507, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.39, |
| "step": 5803 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.6277626555768307e-06, |
| "loss": 2.5217, |
| "loss_": 1.1458, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3898, |
| "step": 5810 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.605883147519377e-06, |
| "loss": 2.5247, |
| "loss_": 1.0988, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3899, |
| "step": 5817 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.584055296667377e-06, |
| "loss": 2.5367, |
| "loss_": 1.2706, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3898, |
| "step": 5824 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.562279279365086e-06, |
| "loss": 2.4891, |
| "loss_": 0.9378, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3898, |
| "step": 5831 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.5405552715380075e-06, |
| "loss": 2.5432, |
| "loss_": 1.1877, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3898, |
| "step": 5838 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.518883448691457e-06, |
| "loss": 2.5668, |
| "loss_": 1.1926, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3898, |
| "step": 5845 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.497263985909163e-06, |
| "loss": 2.5219, |
| "loss_": 1.0161, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3898, |
| "step": 5852 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 3.4756970578518456e-06, |
| "loss": 2.4932, |
| "loss_": 0.8275, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3899, |
| "step": 5859 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.4541828387557953e-06, |
| "loss": 2.5043, |
| "loss_": 1.0652, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3897, |
| "step": 5866 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.43272150243148e-06, |
| "loss": 2.5058, |
| "loss_": 1.0976, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.39, |
| "step": 5873 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.4113132222621382e-06, |
| "loss": 2.531, |
| "loss_": 1.2403, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3899, |
| "step": 5880 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.3899581712023644e-06, |
| "loss": 2.5157, |
| "loss_": 1.1653, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3901, |
| "step": 5887 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.3686565217767307e-06, |
| "loss": 2.5229, |
| "loss_": 1.175, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3898, |
| "step": 5894 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.347408446078384e-06, |
| "loss": 2.5029, |
| "loss_": 0.9687, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.39, |
| "step": 5901 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.326214115767654e-06, |
| "loss": 2.5651, |
| "loss_": 1.1922, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3899, |
| "step": 5908 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.3050737020706693e-06, |
| "loss": 2.5259, |
| "loss_": 1.145, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3898, |
| "step": 5915 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.283987375777974e-06, |
| "loss": 2.5289, |
| "loss_": 1.2907, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3898, |
| "step": 5922 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.26295530724315e-06, |
| "loss": 2.5091, |
| "loss_": 0.8317, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3898, |
| "step": 5929 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.2419776663814284e-06, |
| "loss": 2.5086, |
| "loss_": 1.212, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3897, |
| "step": 5936 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 3.221054622668337e-06, |
| "loss": 2.5146, |
| "loss_": 0.9387, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3899, |
| "step": 5943 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.2001863451383186e-06, |
| "loss": 2.5292, |
| "loss_": 1.1503, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.39, |
| "step": 5950 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.1793730023833613e-06, |
| "loss": 2.5435, |
| "loss_": 1.1373, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3897, |
| "step": 5957 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.1586147625516485e-06, |
| "loss": 2.5105, |
| "loss_": 0.9076, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3897, |
| "step": 5964 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.1379117933461967e-06, |
| "loss": 2.5015, |
| "loss_": 1.1139, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 5971 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.117264262023488e-06, |
| "loss": 2.5259, |
| "loss_": 1.0666, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3898, |
| "step": 5978 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.096672335392139e-06, |
| "loss": 2.511, |
| "loss_": 0.959, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3897, |
| "step": 5985 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.0761361798115454e-06, |
| "loss": 2.5324, |
| "loss_": 1.265, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3898, |
| "step": 5992 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.0556559611905236e-06, |
| "loss": 2.5163, |
| "loss_": 1.1558, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3899, |
| "step": 5999 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.035231844985993e-06, |
| "loss": 2.5014, |
| "loss_": 1.2063, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3897, |
| "step": 6006 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 3.014863996201628e-06, |
| "loss": 2.4744, |
| "loss_": 0.9746, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3936, |
| "step": 6013 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 2.9945525793865237e-06, |
| "loss": 2.5197, |
| "loss_": 0.9524, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3898, |
| "step": 6020 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.9742977586338718e-06, |
| "loss": 2.5209, |
| "loss_": 1.0009, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6027 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.9540996975796288e-06, |
| "loss": 2.4865, |
| "loss_": 0.8742, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3898, |
| "step": 6034 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.9339585594012034e-06, |
| "loss": 2.5309, |
| "loss_": 0.9582, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6041 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.913874506816119e-06, |
| "loss": 2.527, |
| "loss_": 1.2344, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3897, |
| "step": 6048 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.8938477020807267e-06, |
| "loss": 2.524, |
| "loss_": 1.0626, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6055 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.873878306988874e-06, |
| "loss": 2.5321, |
| "loss_": 0.7181, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3939, |
| "step": 6062 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.8539664828706002e-06, |
| "loss": 2.4993, |
| "loss_": 1.0792, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3898, |
| "step": 6069 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.8341123905908406e-06, |
| "loss": 2.5386, |
| "loss_": 1.1162, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6076 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.8143161905481277e-06, |
| "loss": 2.5657, |
| "loss_": 1.2773, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3897, |
| "step": 6083 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.7945780426732773e-06, |
| "loss": 2.5597, |
| "loss_": 1.1365, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3897, |
| "step": 6090 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 2.77489810642812e-06, |
| "loss": 2.5335, |
| "loss_": 1.1916, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6097 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.7552765408042003e-06, |
| "loss": 2.5169, |
| "loss_": 1.1662, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3897, |
| "step": 6104 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.7357135043214954e-06, |
| "loss": 2.5135, |
| "loss_": 1.0476, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3898, |
| "step": 6111 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.7162091550271273e-06, |
| "loss": 2.4995, |
| "loss_": 1.1279, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6118 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.6967636504940995e-06, |
| "loss": 2.54, |
| "loss_": 1.1283, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6125 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.677377147820013e-06, |
| "loss": 2.5405, |
| "loss_": 0.896, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3897, |
| "step": 6132 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.6580498036258016e-06, |
| "loss": 2.5475, |
| "loss_": 1.0748, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6139 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.6387817740544665e-06, |
| "loss": 2.5046, |
| "loss_": 1.0242, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6146 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.6195732147698148e-06, |
| "loss": 2.5553, |
| "loss_": 0.9529, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3936, |
| "step": 6153 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.600424280955196e-06, |
| "loss": 2.5311, |
| "loss_": 1.1188, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6160 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.581335127312257e-06, |
| "loss": 2.4974, |
| "loss_": 0.9955, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3897, |
| "step": 6167 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.562305908059691e-06, |
| "loss": 2.5107, |
| "loss_": 1.0771, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6174 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 2.5433367769319894e-06, |
| "loss": 2.5161, |
| "loss_": 1.1527, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6181 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.5244278871781924e-06, |
| "loss": 2.5067, |
| "loss_": 0.7494, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6188 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.505579391560665e-06, |
| "loss": 2.5101, |
| "loss_": 1.05, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3897, |
| "step": 6195 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.4867914423538596e-06, |
| "loss": 2.505, |
| "loss_": 0.8529, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3931, |
| "step": 6202 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.4680641913430703e-06, |
| "loss": 2.5413, |
| "loss_": 1.0996, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3897, |
| "step": 6209 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.449397789823229e-06, |
| "loss": 2.5299, |
| "loss_": 1.198, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6216 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.4307923885976724e-06, |
| "loss": 2.5472, |
| "loss_": 1.1477, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3897, |
| "step": 6223 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.4122481379769157e-06, |
| "loss": 2.5024, |
| "loss_": 1.329, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6230 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.3937651877774537e-06, |
| "loss": 2.5363, |
| "loss_": 1.3376, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3897, |
| "step": 6237 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.3753436873205437e-06, |
| "loss": 2.5159, |
| "loss_": 0.9258, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6244 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.356983785430996e-06, |
| "loss": 2.5133, |
| "loss_": 1.1375, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6251 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 2.338685630435975e-06, |
| "loss": 2.5141, |
| "loss_": 1.0395, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6258 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.320449370163802e-06, |
| "loss": 2.5141, |
| "loss_": 1.2221, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6265 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.30227515194276e-06, |
| "loss": 2.5207, |
| "loss_": 1.1959, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6272 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.284163122599895e-06, |
| "loss": 2.5455, |
| "loss_": 1.0789, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6279 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.2661134284598442e-06, |
| "loss": 2.507, |
| "loss_": 1.1284, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3898, |
| "step": 6286 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.248126215343651e-06, |
| "loss": 2.5232, |
| "loss_": 1.0936, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6293 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.230201628567572e-06, |
| "loss": 2.5369, |
| "loss_": 1.2088, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6300 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.2123398129419214e-06, |
| "loss": 2.5085, |
| "loss_": 1.3623, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3897, |
| "step": 6307 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.1945409127698967e-06, |
| "loss": 2.5114, |
| "loss_": 1.109, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6314 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.1768050718464006e-06, |
| "loss": 2.5095, |
| "loss_": 1.1855, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6321 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.1591324334568943e-06, |
| "loss": 2.5014, |
| "loss_": 1.1081, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6328 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.1415231403762383e-06, |
| "loss": 2.4978, |
| "loss_": 0.9622, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6335 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 2.123977334867523e-06, |
| "loss": 2.5578, |
| "loss_": 1.4506, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6342 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.1064951586809434e-06, |
| "loss": 2.5026, |
| "loss_": 1.1986, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6349 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.0890767530526358e-06, |
| "loss": 2.5363, |
| "loss_": 1.1286, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3897, |
| "step": 6356 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.0717222587035435e-06, |
| "loss": 2.5241, |
| "loss_": 0.963, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6363 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.0544318158382815e-06, |
| "loss": 2.5148, |
| "loss_": 0.9621, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6370 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.037205564143999e-06, |
| "loss": 2.5373, |
| "loss_": 1.147, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6377 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.0200436427892554e-06, |
| "loss": 2.5173, |
| "loss_": 1.198, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6384 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 2.0029461904228896e-06, |
| "loss": 2.5232, |
| "loss_": 1.058, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6391 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.9859133451729094e-06, |
| "loss": 2.5238, |
| "loss_": 1.14, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6398 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.9689452446453693e-06, |
| "loss": 2.5138, |
| "loss_": 1.137, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6405 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.9520420259232566e-06, |
| "loss": 2.5304, |
| "loss_": 1.024, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6412 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 1.9352038255653893e-06, |
| "loss": 2.514, |
| "loss_": 0.6869, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6419 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.918430779605317e-06, |
| "loss": 2.5432, |
| "loss_": 1.1982, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6426 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.9017230235502027e-06, |
| "loss": 2.5134, |
| "loss_": 0.7847, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6433 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.8850806923797516e-06, |
| "loss": 2.5159, |
| "loss_": 1.457, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6440 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.8685039205451072e-06, |
| "loss": 2.5284, |
| "loss_": 0.9184, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6447 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.8519928419677703e-06, |
| "loss": 2.5196, |
| "loss_": 1.1945, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6454 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.8355475900385056e-06, |
| "loss": 2.5399, |
| "loss_": 1.0441, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6461 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.819168297616284e-06, |
| "loss": 2.4934, |
| "loss_": 0.9845, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3931, |
| "step": 6468 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.802855097027194e-06, |
| "loss": 2.4904, |
| "loss_": 1.1509, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6475 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.7866081200633756e-06, |
| "loss": 2.4643, |
| "loss_": 1.0501, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6482 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.7704274979819614e-06, |
| "loss": 2.5138, |
| "loss_": 1.171, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6489 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 1.7543133615040098e-06, |
| "loss": 2.5229, |
| "loss_": 1.4327, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6496 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.7382658408134467e-06, |
| "loss": 2.5343, |
| "loss_": 1.0981, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6503 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.7222850655560241e-06, |
| "loss": 2.5169, |
| "loss_": 1.0696, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6510 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.7063711648382665e-06, |
| "loss": 2.5251, |
| "loss_": 1.1954, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6517 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.690524267226421e-06, |
| "loss": 2.5307, |
| "loss_": 1.1597, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6524 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.6747445007454333e-06, |
| "loss": 2.5079, |
| "loss_": 1.1213, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6531 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.659031992877903e-06, |
| "loss": 2.5285, |
| "loss_": 1.0738, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6538 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.6433868705630584e-06, |
| "loss": 2.517, |
| "loss_": 0.7318, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6545 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.6278092601957241e-06, |
| "loss": 2.5266, |
| "loss_": 1.0867, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3896, |
| "step": 6552 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.6122992876253086e-06, |
| "loss": 2.5332, |
| "loss_": 1.0809, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6559 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.5968570781547864e-06, |
| "loss": 2.5117, |
| "loss_": 1.2174, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6566 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.581482756539674e-06, |
| "loss": 2.517, |
| "loss_": 1.3342, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6573 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 1.5661764469870412e-06, |
| "loss": 2.5286, |
| "loss_": 0.9451, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6580 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.5509382731544908e-06, |
| "loss": 2.5163, |
| "loss_": 1.1528, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6587 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.53576835814917e-06, |
| "loss": 2.5028, |
| "loss_": 0.835, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3933, |
| "step": 6594 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.5206668245267709e-06, |
| "loss": 2.5126, |
| "loss_": 0.8754, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6601 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.5056337942905408e-06, |
| "loss": 2.5059, |
| "loss_": 0.9523, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6608 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.4906693888903022e-06, |
| "loss": 2.5235, |
| "loss_": 1.1009, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6615 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.475773729221457e-06, |
| "loss": 2.5315, |
| "loss_": 1.1289, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6622 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.460946935624027e-06, |
| "loss": 2.5179, |
| "loss_": 0.9098, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6629 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.4461891278816775e-06, |
| "loss": 2.5291, |
| "loss_": 1.1364, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6636 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.4315004252207354e-06, |
| "loss": 2.5287, |
| "loss_": 1.0815, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6643 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.4168809463092459e-06, |
| "loss": 2.5112, |
| "loss_": 0.9575, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6650 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 1.402330809256005e-06, |
| "loss": 2.5271, |
| "loss_": 1.1014, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6657 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.387850131609597e-06, |
| "loss": 2.4711, |
| "loss_": 1.0401, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6664 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.3734390303574619e-06, |
| "loss": 2.5261, |
| "loss_": 1.2037, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6671 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.3590976219249386e-06, |
| "loss": 2.5267, |
| "loss_": 1.1024, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6678 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.3448260221743249e-06, |
| "loss": 2.5327, |
| "loss_": 1.1507, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6685 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.3306243464039458e-06, |
| "loss": 2.5205, |
| "loss_": 1.2929, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6692 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.3164927093472235e-06, |
| "loss": 2.5205, |
| "loss_": 1.1681, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6699 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.3024312251717365e-06, |
| "loss": 2.5222, |
| "loss_": 1.2896, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6706 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.2884400074783176e-06, |
| "loss": 2.482, |
| "loss_": 1.4715, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6713 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.2745191693001214e-06, |
| "loss": 2.5152, |
| "loss_": 1.1018, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6720 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.2606688231017205e-06, |
| "loss": 2.4911, |
| "loss_": 0.8081, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6727 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.246889080778184e-06, |
| "loss": 2.5173, |
| "loss_": 1.234, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6734 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 1.2331800536541894e-06, |
| "loss": 2.5114, |
| "loss_": 1.3323, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6741 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.219541852483115e-06, |
| "loss": 2.5135, |
| "loss_": 1.0719, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6748 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.2059745874461403e-06, |
| "loss": 2.5229, |
| "loss_": 1.1145, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6755 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.1924783681513664e-06, |
| "loss": 2.5145, |
| "loss_": 1.0924, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6762 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.1790533036329265e-06, |
| "loss": 2.5242, |
| "loss_": 1.1827, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6769 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.1656995023500971e-06, |
| "loss": 2.51, |
| "loss_": 0.9651, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6776 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.1524170721864358e-06, |
| "loss": 2.5144, |
| "loss_": 1.4801, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6783 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.139206120448899e-06, |
| "loss": 2.4961, |
| "loss_": 0.9054, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6790 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.12606675386698e-06, |
| "loss": 2.5091, |
| "loss_": 1.19, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6797 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.1129990785918444e-06, |
| "loss": 2.5346, |
| "loss_": 0.9222, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6804 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.100003200195474e-06, |
| "loss": 2.5121, |
| "loss_": 0.9459, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6811 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 1.0870792236698157e-06, |
| "loss": 2.5331, |
| "loss_": 1.1242, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6818 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.0742272534259234e-06, |
| "loss": 2.5094, |
| "loss_": 1.0776, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6825 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.061447393293129e-06, |
| "loss": 2.5038, |
| "loss_": 0.8106, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6832 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.048739746518197e-06, |
| "loss": 2.5264, |
| "loss_": 1.0267, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6839 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.0361044157644828e-06, |
| "loss": 2.4963, |
| "loss_": 0.8518, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6846 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.0235415031111173e-06, |
| "loss": 2.5199, |
| "loss_": 0.9971, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6853 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 1.0110511100521747e-06, |
| "loss": 2.5356, |
| "loss_": 0.9752, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6860 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.98633337495848e-07, |
| "loss": 2.5046, |
| "loss_": 1.149, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6867 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.862882857636446e-07, |
| "loss": 2.5399, |
| "loss_": 1.2307, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6874 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.740160545895683e-07, |
| "loss": 2.506, |
| "loss_": 1.0997, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 6881 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.61816743119317e-07, |
| "loss": 2.4961, |
| "loss_": 0.9872, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3931, |
| "step": 6888 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.49690449909475e-07, |
| "loss": 2.5033, |
| "loss_": 0.8414, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3932, |
| "step": 6895 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.376372729267269e-07, |
| "loss": 2.5298, |
| "loss_": 1.2211, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6902 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.256573095470601e-07, |
| "loss": 2.5483, |
| "loss_": 0.8932, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6909 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.137506565549791e-07, |
| "loss": 2.493, |
| "loss_": 1.1945, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6916 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.019174101427219e-07, |
| "loss": 2.5231, |
| "loss_": 1.1885, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6923 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.901576659094901e-07, |
| "loss": 2.5306, |
| "loss_": 1.0664, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6930 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.784715188606629e-07, |
| "loss": 2.5236, |
| "loss_": 1.3339, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6937 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.668590634070428e-07, |
| "loss": 2.5428, |
| "loss_": 0.9196, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3933, |
| "step": 6944 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.553203933640908e-07, |
| "loss": 2.5645, |
| "loss_": 1.1573, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6951 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.438556019511568e-07, |
| "loss": 2.5178, |
| "loss_": 0.9821, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6958 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.324647817907427e-07, |
| "loss": 2.563, |
| "loss_": 1.2098, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 6965 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.211480249077441e-07, |
| "loss": 2.5238, |
| "loss_": 1.3804, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6972 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 8.099054227287129e-07, |
| "loss": 2.5693, |
| "loss_": 1.077, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6979 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.987370660811066e-07, |
| "loss": 2.5288, |
| "loss_": 0.8073, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 6986 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.87643045192571e-07, |
| "loss": 2.501, |
| "loss_": 1.0172, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 6993 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.766234496902025e-07, |
| "loss": 2.5408, |
| "loss_": 1.3127, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.656783685998192e-07, |
| "loss": 2.5051, |
| "loss_": 0.7769, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7007 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.548078903452527e-07, |
| "loss": 2.5057, |
| "loss_": 1.2042, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7014 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.440121027476288e-07, |
| "loss": 2.5155, |
| "loss_": 1.0635, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7021 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.332910930246528e-07, |
| "loss": 2.521, |
| "loss_": 1.1436, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7028 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.226449477899156e-07, |
| "loss": 2.5023, |
| "loss_": 1.1022, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7035 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.120737530521826e-07, |
| "loss": 2.5197, |
| "loss_": 0.9678, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7042 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 7.015775942147107e-07, |
| "loss": 2.4997, |
| "loss_": 1.1735, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7049 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 6.911565560745414e-07, |
| "loss": 2.5206, |
| "loss_": 1.1128, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7056 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.808107228218375e-07, |
| "loss": 2.5601, |
| "loss_": 1.2738, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7063 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.705401780391862e-07, |
| "loss": 2.5242, |
| "loss_": 0.8098, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3924, |
| "step": 7070 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.603450047009286e-07, |
| "loss": 2.5201, |
| "loss_": 1.3133, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7077 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.502252851724922e-07, |
| "loss": 2.5253, |
| "loss_": 1.1757, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7084 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.401811012097248e-07, |
| "loss": 2.5515, |
| "loss_": 1.1474, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3891, |
| "step": 7091 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.302125339582266e-07, |
| "loss": 2.5258, |
| "loss_": 0.9865, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 7098 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.203196639527065e-07, |
| "loss": 2.5225, |
| "loss_": 1.2881, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7105 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.105025711163249e-07, |
| "loss": 2.4979, |
| "loss_": 1.1086, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7112 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 6.007613347600438e-07, |
| "loss": 2.5174, |
| "loss_": 0.6185, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7119 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 5.910960335819982e-07, |
| "loss": 2.5157, |
| "loss_": 0.8674, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7126 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 5.815067456668467e-07, |
| "loss": 2.5212, |
| "loss_": 1.2026, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7133 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 5.719935484851513e-07, |
| "loss": 2.5215, |
| "loss_": 1.1283, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7140 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.625565188927462e-07, |
| "loss": 2.4856, |
| "loss_": 1.2003, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7147 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.531957331301152e-07, |
| "loss": 2.5027, |
| "loss_": 1.234, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7154 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.43911266821785e-07, |
| "loss": 2.527, |
| "loss_": 1.2273, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7161 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.347031949756987e-07, |
| "loss": 2.5546, |
| "loss_": 1.1449, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.255715919826254e-07, |
| "loss": 2.5321, |
| "loss_": 1.0256, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 7175 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.165165316155519e-07, |
| "loss": 2.5126, |
| "loss_": 0.9853, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7182 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 5.075380870290847e-07, |
| "loss": 2.5047, |
| "loss_": 1.0876, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3935, |
| "step": 7189 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 4.986363307588648e-07, |
| "loss": 2.5314, |
| "loss_": 1.1338, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7196 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 4.898113347209788e-07, |
| "loss": 2.493, |
| "loss_": 1.0642, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7203 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 4.810631702113722e-07, |
| "loss": 2.4985, |
| "loss_": 0.9782, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7210 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 4.723919079052874e-07, |
| "loss": 2.5288, |
| "loss_": 1.4337, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7217 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.637976178566772e-07, |
| "loss": 2.5036, |
| "loss_": 1.2508, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7224 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.5528036949765155e-07, |
| "loss": 2.5172, |
| "loss_": 1.0779, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3891, |
| "step": 7231 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.46840231637905e-07, |
| "loss": 2.4901, |
| "loss_": 1.1486, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3891, |
| "step": 7238 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.3847727246417283e-07, |
| "loss": 2.5265, |
| "loss_": 1.089, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7245 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.3019155953966995e-07, |
| "loss": 2.534, |
| "loss_": 1.0542, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7252 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.2198315980355066e-07, |
| "loss": 2.4964, |
| "loss_": 1.2698, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7259 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.1385213957036763e-07, |
| "loss": 2.4997, |
| "loss_": 1.0581, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7266 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 4.057985645295337e-07, |
| "loss": 2.5273, |
| "loss_": 1.1317, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7273 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 3.9782249974479105e-07, |
| "loss": 2.506, |
| "loss_": 1.1506, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7280 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 3.899240096536905e-07, |
| "loss": 2.5387, |
| "loss_": 1.1072, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7287 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 3.8210315806706535e-07, |
| "loss": 2.5092, |
| "loss_": 0.8953, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7294 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.7436000816851504e-07, |
| "loss": 2.5046, |
| "loss_": 1.1341, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7301 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.666946225139045e-07, |
| "loss": 2.5416, |
| "loss_": 1.2474, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7308 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.5910706303084574e-07, |
| "loss": 2.521, |
| "loss_": 0.4458, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7315 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.515973910182069e-07, |
| "loss": 2.5129, |
| "loss_": 1.0651, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7322 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.4416566714561174e-07, |
| "loss": 2.5029, |
| "loss_": 1.0087, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7329 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.368119514529533e-07, |
| "loss": 2.5211, |
| "loss_": 1.3133, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7336 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.295363033499066e-07, |
| "loss": 2.5431, |
| "loss_": 1.005, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7343 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.223387816154466e-07, |
| "loss": 2.5257, |
| "loss_": 1.1226, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7350 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.1521944439738104e-07, |
| "loss": 2.52, |
| "loss_": 1.0687, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7357 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.081783492118706e-07, |
| "loss": 2.5225, |
| "loss_": 1.1186, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7364 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.012155529429728e-07, |
| "loss": 2.5037, |
| "loss_": 1.177, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7371 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 2.9433111184217656e-07, |
| "loss": 2.5031, |
| "loss_": 1.2232, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7378 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.875250815279518e-07, |
| "loss": 2.525, |
| "loss_": 1.0972, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7385 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.807975169852939e-07, |
| "loss": 2.5529, |
| "loss_": 0.9882, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7392 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.7414847256528985e-07, |
| "loss": 2.5408, |
| "loss_": 1.3431, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7399 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.675780019846697e-07, |
| "loss": 2.5362, |
| "loss_": 1.0673, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7406 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.6108615832537765e-07, |
| "loss": 2.4947, |
| "loss_": 1.136, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7413 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.546729940341386e-07, |
| "loss": 2.4788, |
| "loss_": 1.2069, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7420 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.4833856092204124e-07, |
| "loss": 2.5045, |
| "loss_": 1.1985, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7427 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.4208291016411536e-07, |
| "loss": 2.5433, |
| "loss_": 1.0931, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7434 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.3590609229891537e-07, |
| "loss": 2.5113, |
| "loss_": 1.2254, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7441 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.2980815722811855e-07, |
| "loss": 2.5482, |
| "loss_": 1.243, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7448 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 2.2378915421611746e-07, |
| "loss": 2.5383, |
| "loss_": 1.2111, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7455 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2.1784913188962365e-07, |
| "loss": 2.5388, |
| "loss_": 1.0636, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7462 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2.119881382372746e-07, |
| "loss": 2.502, |
| "loss_": 1.0249, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7469 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2.0620622060924522e-07, |
| "loss": 2.5066, |
| "loss_": 0.847, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7476 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 2.0050342571686589e-07, |
| "loss": 2.5093, |
| "loss_": 1.1436, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7483 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.9487979963224712e-07, |
| "loss": 2.5268, |
| "loss_": 1.0466, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7490 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.8933538778790118e-07, |
| "loss": 2.5413, |
| "loss_": 1.3326, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7497 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.8387023497638324e-07, |
| "loss": 2.5318, |
| "loss_": 1.1098, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7504 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.7848438534992407e-07, |
| "loss": 2.5091, |
| "loss_": 0.8163, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7511 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.7317788242007361e-07, |
| "loss": 2.5119, |
| "loss_": 0.9991, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7518 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.679507690573523e-07, |
| "loss": 2.5155, |
| "loss_": 1.0626, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7525 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 1.6280308749090036e-07, |
| "loss": 2.5254, |
| "loss_": 1.207, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7532 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.5773487930814345e-07, |
| "loss": 2.516, |
| "loss_": 1.1184, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7539 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.5274618545444985e-07, |
| "loss": 2.5236, |
| "loss_": 1.3532, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7546 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.4783704623280048e-07, |
| "loss": 2.53, |
| "loss_": 1.1781, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7553 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.430075013034693e-07, |
| "loss": 2.5614, |
| "loss_": 1.17, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7560 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.3825758968369684e-07, |
| "loss": 2.5444, |
| "loss_": 1.2882, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7567 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.335873497473761e-07, |
| "loss": 2.5334, |
| "loss_": 1.429, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7574 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.2899681922474482e-07, |
| "loss": 2.5562, |
| "loss_": 1.0573, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7581 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.2448603520207603e-07, |
| "loss": 2.4816, |
| "loss_": 1.1828, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7588 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.2005503412138685e-07, |
| "loss": 2.5387, |
| "loss_": 1.1538, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7595 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.1570385178013454e-07, |
| "loss": 2.5206, |
| "loss_": 0.9157, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7602 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.1143252333093213e-07, |
| "loss": 2.4838, |
| "loss_": 0.7925, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7609 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 1.0724108328126647e-07, |
| "loss": 2.5101, |
| "loss_": 1.2665, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7616 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 1.0312956549321407e-07, |
| "loss": 2.501, |
| "loss_": 1.0971, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7623 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.909800318317008e-08, |
| "loss": 2.5096, |
| "loss_": 1.2361, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7630 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.51464289215831e-08, |
| "loss": 2.5052, |
| "loss_": 0.8553, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7637 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.127487463268636e-08, |
| "loss": 2.5264, |
| "loss_": 0.9662, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3929, |
| "step": 7644 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 8.748337159424247e-08, |
| "loss": 2.5115, |
| "loss_": 0.9187, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7651 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 8.377195043729358e-08, |
| "loss": 2.519, |
| "loss_": 1.3642, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7658 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 8.014064114590936e-08, |
| "loss": 2.5068, |
| "loss_": 1.052, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7665 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 7.658947305694497e-08, |
| "loss": 2.4804, |
| "loss_": 1.1747, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7672 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 7.311847485980794e-08, |
| "loss": 2.5175, |
| "loss_": 1.0972, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7679 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 6.972767459622387e-08, |
| "loss": 2.4884, |
| "loss_": 0.9969, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7686 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 6.641709966000886e-08, |
| "loss": 2.5285, |
| "loss_": 0.9687, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3891, |
| "step": 7693 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 6.318677679685081e-08, |
| "loss": 2.4968, |
| "loss_": 1.151, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7700 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 6.003673210409067e-08, |
| "loss": 2.5292, |
| "loss_": 0.7981, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7707 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 5.696699103051484e-08, |
| "loss": 2.5312, |
| "loss_": 1.1779, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7714 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 5.3977578376144257e-08, |
| "loss": 2.5219, |
| "loss_": 1.0229, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3895, |
| "step": 7721 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 5.1068518292042293e-08, |
| "loss": 2.5087, |
| "loss_": 1.0511, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3929, |
| "step": 7728 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 4.823983428010936e-08, |
| "loss": 2.4938, |
| "loss_": 0.9396, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7735 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 4.549154919290199e-08, |
| "loss": 2.554, |
| "loss_": 1.0694, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7742 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 4.2823685233445155e-08, |
| "loss": 2.5225, |
| "loss_": 1.023, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7749 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 4.0236263955049095e-08, |
| "loss": 2.4966, |
| "loss_": 1.2173, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7756 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 3.7729306261141685e-08, |
| "loss": 2.5165, |
| "loss_": 1.0892, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7763 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 3.530283240509414e-08, |
| "loss": 2.5096, |
| "loss_": 0.9228, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7770 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 3.2956861990062203e-08, |
| "loss": 2.5389, |
| "loss_": 1.1298, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3891, |
| "step": 7777 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 3.0691413968821915e-08, |
| "loss": 2.5103, |
| "loss_": 0.8584, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3891, |
| "step": 7784 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2.8506506643621866e-08, |
| "loss": 2.5083, |
| "loss_": 1.001, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7791 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2.6402157666034488e-08, |
| "loss": 2.5312, |
| "loss_": 1.1118, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7798 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2.4378384036808368e-08, |
| "loss": 2.4881, |
| "loss_": 1.061, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7805 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2.243520210573946e-08, |
| "loss": 2.529, |
| "loss_": 1.1615, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7812 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 2.0572627571529e-08, |
| "loss": 2.5355, |
| "loss_": 1.0449, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7819 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.8790675481666908e-08, |
| "loss": 2.5263, |
| "loss_": 0.9646, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7826 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.70893602323019e-08, |
| "loss": 2.5028, |
| "loss_": 1.3031, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7833 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.5468695568131576e-08, |
| "loss": 2.5335, |
| "loss_": 1.0215, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7840 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.3928694582284741e-08, |
| "loss": 2.5138, |
| "loss_": 1.0019, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7847 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 1.246936971622148e-08, |
| "loss": 2.5227, |
| "loss_": 1.2481, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3891, |
| "step": 7854 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.1090732759631018e-08, |
| "loss": 2.5075, |
| "loss_": 1.1757, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3891, |
| "step": 7861 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 9.79279485033402e-09, |
| "loss": 2.524, |
| "loss_": 1.3905, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7868 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 8.575566474195996e-09, |
| "loss": 2.5326, |
| "loss_": 1.1002, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7875 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 7.43905746503959e-09, |
| "loss": 2.5051, |
| "loss_": 0.7085, |
| "moe_loss": 0.1602, |
| "moe_loss_longrong": 1.3925, |
| "step": 7882 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 6.383277004569088e-09, |
| "loss": 2.5389, |
| "loss_": 1.1412, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7889 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 5.408233622289371e-09, |
| "loss": 2.5165, |
| "loss_": 0.9492, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7896 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 4.513935195445962e-09, |
| "loss": 2.5191, |
| "loss_": 0.9539, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3894, |
| "step": 7903 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 3.7003889489550806e-09, |
| "loss": 2.4844, |
| "loss_": 0.9553, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7910 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 2.9676014553459145e-09, |
| "loss": 2.5133, |
| "loss_": 0.8643, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3891, |
| "step": 7917 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 2.315578634710658e-09, |
| "loss": 2.4846, |
| "loss_": 1.0907, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3891, |
| "step": 7924 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 1.7443257546512215e-09, |
| "loss": 2.4985, |
| "loss_": 0.8135, |
| "moe_loss": 0.1601, |
| "moe_loss_longrong": 1.3927, |
| "step": 7931 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.2538474302459246e-09, |
| "loss": 2.5193, |
| "loss_": 1.0192, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7938 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 8.441476239995361e-10, |
| "loss": 2.5156, |
| "loss_": 0.857, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7945 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 5.152296458232897e-10, |
| "loss": 2.5293, |
| "loss_": 1.1247, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7952 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 2.6709615299935763e-10, |
| "loss": 2.5155, |
| "loss_": 0.9838, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7959 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 9.97491501675274e-11, |
| "loss": 2.5037, |
| "loss_": 1.1665, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3892, |
| "step": 7966 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 1.3189989298556527e-11, |
| "loss": 2.5389, |
| "loss_": 0.9918, |
| "moe_loss": 0.16, |
| "moe_loss_longrong": 1.3893, |
| "step": 7973 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 7977, |
| "total_flos": 1.1960092486052872e+19, |
| "train_loss": 2.5803030949420793, |
| "train_runtime": 142162.8835, |
| "train_samples_per_second": 7.183, |
| "train_steps_per_second": 0.056 |
| } |
| ], |
| "logging_steps": 7, |
| "max_steps": 7977, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "total_flos": 1.1960092486052872e+19, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|