STGC-Phi2-384-1021K / trainer_state.json
7LRY's picture
Upload folder using huggingface_hub
d8b4e3c verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9999059885306008,
"eval_steps": 500,
"global_step": 7977,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 5.833333333333334e-07,
"loss": 3.0143,
"loss_": 1.9639,
"moe_loss": 0.1727,
"moe_loss_longrong": 1.5072,
"step": 7
},
{
"epoch": 0.0,
"learning_rate": 1.1666666666666668e-06,
"loss": 3.1915,
"loss_": 1.6485,
"moe_loss": 0.1717,
"moe_loss_longrong": 1.5034,
"step": 14
},
{
"epoch": 0.0,
"learning_rate": 1.75e-06,
"loss": 3.1397,
"loss_": 1.5496,
"moe_loss": 0.171,
"moe_loss_longrong": 1.4995,
"step": 21
},
{
"epoch": 0.0,
"learning_rate": 2.3333333333333336e-06,
"loss": 3.0585,
"loss_": 2.0086,
"moe_loss": 0.1701,
"moe_loss_longrong": 1.4995,
"step": 28
},
{
"epoch": 0.0,
"learning_rate": 2.916666666666667e-06,
"loss": 2.9633,
"loss_": 1.0198,
"moe_loss": 0.1676,
"moe_loss_longrong": 1.5194,
"step": 35
},
{
"epoch": 0.01,
"learning_rate": 3.5e-06,
"loss": 3.0075,
"loss_": 1.4577,
"moe_loss": 0.1675,
"moe_loss_longrong": 1.4922,
"step": 42
},
{
"epoch": 0.01,
"learning_rate": 4.083333333333334e-06,
"loss": 2.9169,
"loss_": 1.446,
"moe_loss": 0.1664,
"moe_loss_longrong": 1.4899,
"step": 49
},
{
"epoch": 0.01,
"learning_rate": 4.666666666666667e-06,
"loss": 2.9903,
"loss_": 1.3955,
"moe_loss": 0.1647,
"moe_loss_longrong": 1.483,
"step": 56
},
{
"epoch": 0.01,
"learning_rate": 5.2500000000000006e-06,
"loss": 2.9445,
"loss_": 1.7934,
"moe_loss": 0.1646,
"moe_loss_longrong": 1.4828,
"step": 63
},
{
"epoch": 0.01,
"learning_rate": 5.833333333333334e-06,
"loss": 2.9122,
"loss_": 1.4323,
"moe_loss": 0.1631,
"moe_loss_longrong": 1.4793,
"step": 70
},
{
"epoch": 0.01,
"learning_rate": 6.416666666666667e-06,
"loss": 2.87,
"loss_": 0.8099,
"moe_loss": 0.1652,
"moe_loss_longrong": 1.5108,
"step": 77
},
{
"epoch": 0.01,
"learning_rate": 7e-06,
"loss": 2.931,
"loss_": 1.4784,
"moe_loss": 0.1621,
"moe_loss_longrong": 1.473,
"step": 84
},
{
"epoch": 0.01,
"learning_rate": 7.583333333333333e-06,
"loss": 2.7996,
"loss_": 1.0334,
"moe_loss": 0.1645,
"moe_loss_longrong": 1.5067,
"step": 91
},
{
"epoch": 0.01,
"learning_rate": 8.166666666666668e-06,
"loss": 2.8791,
"loss_": 1.4959,
"moe_loss": 0.1617,
"moe_loss_longrong": 1.4725,
"step": 98
},
{
"epoch": 0.01,
"learning_rate": 8.750000000000001e-06,
"loss": 2.8593,
"loss_": 1.3625,
"moe_loss": 0.1617,
"moe_loss_longrong": 1.4711,
"step": 105
},
{
"epoch": 0.01,
"learning_rate": 9.333333333333334e-06,
"loss": 2.8592,
"loss_": 1.0826,
"moe_loss": 0.1639,
"moe_loss_longrong": 1.5051,
"step": 112
},
{
"epoch": 0.01,
"learning_rate": 9.916666666666668e-06,
"loss": 2.8772,
"loss_": 1.2496,
"moe_loss": 0.1616,
"moe_loss_longrong": 1.4683,
"step": 119
},
{
"epoch": 0.02,
"learning_rate": 1.0500000000000001e-05,
"loss": 2.7839,
"loss_": 0.8619,
"moe_loss": 0.1636,
"moe_loss_longrong": 1.5017,
"step": 126
},
{
"epoch": 0.02,
"learning_rate": 1.1083333333333335e-05,
"loss": 2.845,
"loss_": 1.519,
"moe_loss": 0.1615,
"moe_loss_longrong": 1.4653,
"step": 133
},
{
"epoch": 0.02,
"learning_rate": 1.1666666666666668e-05,
"loss": 2.8779,
"loss_": 1.4328,
"moe_loss": 0.1615,
"moe_loss_longrong": 1.4632,
"step": 140
},
{
"epoch": 0.02,
"learning_rate": 1.2250000000000001e-05,
"loss": 2.8133,
"loss_": 1.7345,
"moe_loss": 0.1615,
"moe_loss_longrong": 1.4635,
"step": 147
},
{
"epoch": 0.02,
"learning_rate": 1.2833333333333335e-05,
"loss": 2.8421,
"loss_": 1.443,
"moe_loss": 0.1615,
"moe_loss_longrong": 1.4609,
"step": 154
},
{
"epoch": 0.02,
"learning_rate": 1.3416666666666666e-05,
"loss": 2.8433,
"loss_": 1.0833,
"moe_loss": 0.1613,
"moe_loss_longrong": 1.4601,
"step": 161
},
{
"epoch": 0.02,
"learning_rate": 1.4e-05,
"loss": 2.7887,
"loss_": 1.1754,
"moe_loss": 0.1611,
"moe_loss_longrong": 1.458,
"step": 168
},
{
"epoch": 0.02,
"learning_rate": 1.4583333333333333e-05,
"loss": 2.8346,
"loss_": 1.4786,
"moe_loss": 0.1615,
"moe_loss_longrong": 1.461,
"step": 175
},
{
"epoch": 0.02,
"learning_rate": 1.5166666666666667e-05,
"loss": 2.8158,
"loss_": 1.1078,
"moe_loss": 0.161,
"moe_loss_longrong": 1.4563,
"step": 182
},
{
"epoch": 0.02,
"learning_rate": 1.575e-05,
"loss": 2.8165,
"loss_": 1.5185,
"moe_loss": 0.1612,
"moe_loss_longrong": 1.4575,
"step": 189
},
{
"epoch": 0.02,
"learning_rate": 1.6333333333333335e-05,
"loss": 2.7353,
"loss_": 1.5306,
"moe_loss": 0.1612,
"moe_loss_longrong": 1.4575,
"step": 196
},
{
"epoch": 0.03,
"learning_rate": 1.6916666666666667e-05,
"loss": 2.8177,
"loss_": 1.6701,
"moe_loss": 0.1613,
"moe_loss_longrong": 1.4552,
"step": 203
},
{
"epoch": 0.03,
"learning_rate": 1.7500000000000002e-05,
"loss": 2.8141,
"loss_": 1.147,
"moe_loss": 0.1611,
"moe_loss_longrong": 1.454,
"step": 210
},
{
"epoch": 0.03,
"learning_rate": 1.8083333333333334e-05,
"loss": 2.7925,
"loss_": 1.1863,
"moe_loss": 0.1612,
"moe_loss_longrong": 1.454,
"step": 217
},
{
"epoch": 0.03,
"learning_rate": 1.866666666666667e-05,
"loss": 2.7435,
"loss_": 1.2765,
"moe_loss": 0.1611,
"moe_loss_longrong": 1.4537,
"step": 224
},
{
"epoch": 0.03,
"learning_rate": 1.925e-05,
"loss": 2.7391,
"loss_": 1.1006,
"moe_loss": 0.1612,
"moe_loss_longrong": 1.4517,
"step": 231
},
{
"epoch": 0.03,
"learning_rate": 1.9833333333333335e-05,
"loss": 2.7548,
"loss_": 1.1628,
"moe_loss": 0.1612,
"moe_loss_longrong": 1.4493,
"step": 238
},
{
"epoch": 0.03,
"learning_rate": 1.999997939064427e-05,
"loss": 2.8216,
"loss_": 1.3669,
"moe_loss": 0.1613,
"moe_loss_longrong": 1.4484,
"step": 245
},
{
"epoch": 0.03,
"learning_rate": 1.9999881290305082e-05,
"loss": 2.7406,
"loss_": 0.9434,
"moe_loss": 0.1621,
"moe_loss_longrong": 1.4837,
"step": 252
},
{
"epoch": 0.03,
"learning_rate": 1.9999702402277115e-05,
"loss": 2.7835,
"loss_": 1.6082,
"moe_loss": 0.1614,
"moe_loss_longrong": 1.4501,
"step": 259
},
{
"epoch": 0.03,
"learning_rate": 1.9999442728005572e-05,
"loss": 2.783,
"loss_": 1.3989,
"moe_loss": 0.161,
"moe_loss_longrong": 1.4474,
"step": 266
},
{
"epoch": 0.03,
"learning_rate": 1.999910226958833e-05,
"loss": 2.7628,
"loss_": 1.2652,
"moe_loss": 0.161,
"moe_loss_longrong": 1.4481,
"step": 273
},
{
"epoch": 0.04,
"learning_rate": 1.9998681029775905e-05,
"loss": 2.7709,
"loss_": 1.4145,
"moe_loss": 0.1609,
"moe_loss_longrong": 1.4454,
"step": 280
},
{
"epoch": 0.04,
"learning_rate": 1.999817901197144e-05,
"loss": 2.7808,
"loss_": 1.167,
"moe_loss": 0.1612,
"moe_loss_longrong": 1.4457,
"step": 287
},
{
"epoch": 0.04,
"learning_rate": 1.9997596220230666e-05,
"loss": 2.7761,
"loss_": 1.1866,
"moe_loss": 0.161,
"moe_loss_longrong": 1.4467,
"step": 294
},
{
"epoch": 0.04,
"learning_rate": 1.999693265926188e-05,
"loss": 2.7605,
"loss_": 1.2065,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.4448,
"step": 301
},
{
"epoch": 0.04,
"learning_rate": 1.99961883344259e-05,
"loss": 2.7849,
"loss_": 1.2751,
"moe_loss": 0.161,
"moe_loss_longrong": 1.4447,
"step": 308
},
{
"epoch": 0.04,
"learning_rate": 1.9995363251736027e-05,
"loss": 2.7919,
"loss_": 1.1653,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.4426,
"step": 315
},
{
"epoch": 0.04,
"learning_rate": 1.9994457417857998e-05,
"loss": 2.7698,
"loss_": 1.4965,
"moe_loss": 0.161,
"moe_loss_longrong": 1.4423,
"step": 322
},
{
"epoch": 0.04,
"learning_rate": 1.999347084010991e-05,
"loss": 2.7421,
"loss_": 0.8783,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.4427,
"step": 329
},
{
"epoch": 0.04,
"learning_rate": 1.99924035264622e-05,
"loss": 2.7618,
"loss_": 1.2226,
"moe_loss": 0.161,
"moe_loss_longrong": 1.4429,
"step": 336
},
{
"epoch": 0.04,
"learning_rate": 1.9991255485537547e-05,
"loss": 2.76,
"loss_": 1.2206,
"moe_loss": 0.1609,
"moe_loss_longrong": 1.4425,
"step": 343
},
{
"epoch": 0.04,
"learning_rate": 1.999002672661082e-05,
"loss": 2.7533,
"loss_": 1.3051,
"moe_loss": 0.1609,
"moe_loss_longrong": 1.4398,
"step": 350
},
{
"epoch": 0.04,
"learning_rate": 1.9988717259609e-05,
"loss": 2.7161,
"loss_": 0.8665,
"moe_loss": 0.1615,
"moe_loss_longrong": 1.4731,
"step": 357
},
{
"epoch": 0.05,
"learning_rate": 1.9987327095111085e-05,
"loss": 2.7577,
"loss_": 1.1175,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4404,
"step": 364
},
{
"epoch": 0.05,
"learning_rate": 1.9985856244348034e-05,
"loss": 2.7281,
"loss_": 1.2506,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4399,
"step": 371
},
{
"epoch": 0.05,
"learning_rate": 1.9984304719202647e-05,
"loss": 2.7585,
"loss_": 1.6201,
"moe_loss": 0.161,
"moe_loss_longrong": 1.4402,
"step": 378
},
{
"epoch": 0.05,
"learning_rate": 1.9982672532209487e-05,
"loss": 2.7048,
"loss_": 1.3787,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.44,
"step": 385
},
{
"epoch": 0.05,
"learning_rate": 1.998095969655477e-05,
"loss": 2.7554,
"loss_": 1.5028,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4382,
"step": 392
},
{
"epoch": 0.05,
"learning_rate": 1.997916622607627e-05,
"loss": 2.7604,
"loss_": 1.5144,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.4369,
"step": 399
},
{
"epoch": 0.05,
"learning_rate": 1.9977292135263187e-05,
"loss": 2.6773,
"loss_": 1.2568,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.4391,
"step": 406
},
{
"epoch": 0.05,
"learning_rate": 1.9975337439256046e-05,
"loss": 2.7524,
"loss_": 1.3851,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.4373,
"step": 413
},
{
"epoch": 0.05,
"learning_rate": 1.9973302153846577e-05,
"loss": 2.7138,
"loss_": 0.945,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4365,
"step": 420
},
{
"epoch": 0.05,
"learning_rate": 1.9971186295477575e-05,
"loss": 2.723,
"loss_": 1.2851,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4373,
"step": 427
},
{
"epoch": 0.05,
"learning_rate": 1.9968989881242766e-05,
"loss": 2.7099,
"loss_": 1.4266,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4357,
"step": 434
},
{
"epoch": 0.06,
"learning_rate": 1.9966712928886697e-05,
"loss": 2.7253,
"loss_": 1.2214,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.436,
"step": 441
},
{
"epoch": 0.06,
"learning_rate": 1.996435545680454e-05,
"loss": 2.7258,
"loss_": 1.2096,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.4344,
"step": 448
},
{
"epoch": 0.06,
"learning_rate": 1.9961917484042012e-05,
"loss": 2.6884,
"loss_": 1.2,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4344,
"step": 455
},
{
"epoch": 0.06,
"learning_rate": 1.9959399030295158e-05,
"loss": 2.685,
"loss_": 0.9126,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4363,
"step": 462
},
{
"epoch": 0.06,
"learning_rate": 1.9956800115910216e-05,
"loss": 2.7146,
"loss_": 1.0302,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4344,
"step": 469
},
{
"epoch": 0.06,
"learning_rate": 1.995412076188348e-05,
"loss": 2.665,
"loss_": 1.4803,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4334,
"step": 476
},
{
"epoch": 0.06,
"learning_rate": 1.9951360989861077e-05,
"loss": 2.7331,
"loss_": 1.1431,
"moe_loss": 0.1609,
"moe_loss_longrong": 1.4341,
"step": 483
},
{
"epoch": 0.06,
"learning_rate": 1.9948520822138837e-05,
"loss": 2.7357,
"loss_": 1.2042,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4322,
"step": 490
},
{
"epoch": 0.06,
"learning_rate": 1.9945600281662088e-05,
"loss": 2.7075,
"loss_": 0.97,
"moe_loss": 0.1614,
"moe_loss_longrong": 1.4623,
"step": 497
},
{
"epoch": 0.06,
"learning_rate": 1.9942599392025488e-05,
"loss": 2.7172,
"loss_": 1.2653,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.4338,
"step": 504
},
{
"epoch": 0.06,
"learning_rate": 1.9939518177472813e-05,
"loss": 2.718,
"loss_": 1.1215,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.4347,
"step": 511
},
{
"epoch": 0.06,
"learning_rate": 1.9936356662896777e-05,
"loss": 2.7166,
"loss_": 0.9601,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.432,
"step": 518
},
{
"epoch": 0.07,
"learning_rate": 1.9933114873838832e-05,
"loss": 2.711,
"loss_": 1.2031,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4313,
"step": 525
},
{
"epoch": 0.07,
"learning_rate": 1.9929792836488954e-05,
"loss": 2.7297,
"loss_": 1.0668,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4315,
"step": 532
},
{
"epoch": 0.07,
"learning_rate": 1.9926390577685434e-05,
"loss": 2.7135,
"loss_": 0.8892,
"moe_loss": 0.1616,
"moe_loss_longrong": 1.461,
"step": 539
},
{
"epoch": 0.07,
"learning_rate": 1.992290812491466e-05,
"loss": 2.676,
"loss_": 1.2917,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4313,
"step": 546
},
{
"epoch": 0.07,
"learning_rate": 1.9919345506310896e-05,
"loss": 2.6813,
"loss_": 1.4059,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4321,
"step": 553
},
{
"epoch": 0.07,
"learning_rate": 1.9915702750656053e-05,
"loss": 2.7125,
"loss_": 1.085,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4315,
"step": 560
},
{
"epoch": 0.07,
"learning_rate": 1.991197988737947e-05,
"loss": 2.7119,
"loss_": 1.1287,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4303,
"step": 567
},
{
"epoch": 0.07,
"learning_rate": 1.9908176946557646e-05,
"loss": 2.6879,
"loss_": 1.1955,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4301,
"step": 574
},
{
"epoch": 0.07,
"learning_rate": 1.9904293958914032e-05,
"loss": 2.7081,
"loss_": 1.3866,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4304,
"step": 581
},
{
"epoch": 0.07,
"learning_rate": 1.990033095581876e-05,
"loss": 2.7152,
"loss_": 1.2814,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4308,
"step": 588
},
{
"epoch": 0.07,
"learning_rate": 1.9896287969288396e-05,
"loss": 2.6944,
"loss_": 1.2075,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4292,
"step": 595
},
{
"epoch": 0.08,
"learning_rate": 1.989216503198568e-05,
"loss": 2.6422,
"loss_": 0.9899,
"moe_loss": 0.1617,
"moe_loss_longrong": 1.458,
"step": 602
},
{
"epoch": 0.08,
"learning_rate": 1.988796217721926e-05,
"loss": 2.7321,
"loss_": 1.437,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4304,
"step": 609
},
{
"epoch": 0.08,
"learning_rate": 1.9883679438943444e-05,
"loss": 2.6757,
"loss_": 1.3196,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4289,
"step": 616
},
{
"epoch": 0.08,
"learning_rate": 1.9879316851757885e-05,
"loss": 2.688,
"loss_": 1.514,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.429,
"step": 623
},
{
"epoch": 0.08,
"learning_rate": 1.9874874450907338e-05,
"loss": 2.7082,
"loss_": 1.1938,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4291,
"step": 630
},
{
"epoch": 0.08,
"learning_rate": 1.987035227228136e-05,
"loss": 2.7276,
"loss_": 1.3401,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.428,
"step": 637
},
{
"epoch": 0.08,
"learning_rate": 1.9865750352414016e-05,
"loss": 2.685,
"loss_": 1.2597,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4279,
"step": 644
},
{
"epoch": 0.08,
"learning_rate": 1.9861068728483603e-05,
"loss": 2.7331,
"loss_": 1.3278,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4274,
"step": 651
},
{
"epoch": 0.08,
"learning_rate": 1.985630743831232e-05,
"loss": 2.7276,
"loss_": 1.542,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4281,
"step": 658
},
{
"epoch": 0.08,
"learning_rate": 1.985146652036599e-05,
"loss": 2.6914,
"loss_": 1.4054,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.427,
"step": 665
},
{
"epoch": 0.08,
"learning_rate": 1.984654601375373e-05,
"loss": 2.6531,
"loss_": 1.5136,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4262,
"step": 672
},
{
"epoch": 0.09,
"learning_rate": 1.9841545958227654e-05,
"loss": 2.7346,
"loss_": 1.1452,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.427,
"step": 679
},
{
"epoch": 0.09,
"learning_rate": 1.983646639418253e-05,
"loss": 2.707,
"loss_": 1.3454,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4264,
"step": 686
},
{
"epoch": 0.09,
"learning_rate": 1.9831307362655473e-05,
"loss": 2.6949,
"loss_": 1.316,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.427,
"step": 693
},
{
"epoch": 0.09,
"learning_rate": 1.9826068905325598e-05,
"loss": 2.6725,
"loss_": 0.8014,
"moe_loss": 0.1612,
"moe_loss_longrong": 1.4544,
"step": 700
},
{
"epoch": 0.09,
"learning_rate": 1.9820751064513693e-05,
"loss": 2.7006,
"loss_": 1.3368,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4252,
"step": 707
},
{
"epoch": 0.09,
"learning_rate": 1.981535388318188e-05,
"loss": 2.6809,
"loss_": 1.161,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.425,
"step": 714
},
{
"epoch": 0.09,
"learning_rate": 1.980987740493325e-05,
"loss": 2.6964,
"loss_": 1.1942,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4257,
"step": 721
},
{
"epoch": 0.09,
"learning_rate": 1.9804321674011533e-05,
"loss": 2.6673,
"loss_": 1.1932,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4267,
"step": 728
},
{
"epoch": 0.09,
"learning_rate": 1.979868673530073e-05,
"loss": 2.6938,
"loss_": 1.2555,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4248,
"step": 735
},
{
"epoch": 0.09,
"learning_rate": 1.9792972634324744e-05,
"loss": 2.7032,
"loss_": 1.1953,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4241,
"step": 742
},
{
"epoch": 0.09,
"learning_rate": 1.9787179417247032e-05,
"loss": 2.6754,
"loss_": 1.1357,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4249,
"step": 749
},
{
"epoch": 0.09,
"learning_rate": 1.9781307130870204e-05,
"loss": 2.6969,
"loss_": 1.3238,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4245,
"step": 756
},
{
"epoch": 0.1,
"learning_rate": 1.9775355822635675e-05,
"loss": 2.6831,
"loss_": 1.2612,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4237,
"step": 763
},
{
"epoch": 0.1,
"learning_rate": 1.976932554062325e-05,
"loss": 2.6701,
"loss_": 1.1135,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4243,
"step": 770
},
{
"epoch": 0.1,
"learning_rate": 1.9763216333550768e-05,
"loss": 2.7003,
"loss_": 1.2469,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4236,
"step": 777
},
{
"epoch": 0.1,
"learning_rate": 1.9757028250773686e-05,
"loss": 2.6854,
"loss_": 1.3538,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4234,
"step": 784
},
{
"epoch": 0.1,
"learning_rate": 1.975076134228469e-05,
"loss": 2.6874,
"loss_": 1.226,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4239,
"step": 791
},
{
"epoch": 0.1,
"learning_rate": 1.9744415658713282e-05,
"loss": 2.7152,
"loss_": 1.2979,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4231,
"step": 798
},
{
"epoch": 0.1,
"learning_rate": 1.9737991251325384e-05,
"loss": 2.6908,
"loss_": 1.0737,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4238,
"step": 805
},
{
"epoch": 0.1,
"learning_rate": 1.9731488172022915e-05,
"loss": 2.7375,
"loss_": 1.416,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4219,
"step": 812
},
{
"epoch": 0.1,
"learning_rate": 1.972490647334337e-05,
"loss": 2.6467,
"loss_": 1.0544,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4236,
"step": 819
},
{
"epoch": 0.1,
"learning_rate": 1.971824620845941e-05,
"loss": 2.6613,
"loss_": 1.1665,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4229,
"step": 826
},
{
"epoch": 0.1,
"learning_rate": 1.9711507431178403e-05,
"loss": 2.654,
"loss_": 1.2871,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4239,
"step": 833
},
{
"epoch": 0.11,
"learning_rate": 1.9704690195942035e-05,
"loss": 2.6831,
"loss_": 1.4114,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4235,
"step": 840
},
{
"epoch": 0.11,
"learning_rate": 1.9697794557825812e-05,
"loss": 2.7215,
"loss_": 1.367,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4221,
"step": 847
},
{
"epoch": 0.11,
"learning_rate": 1.969082057253867e-05,
"loss": 2.6998,
"loss_": 1.0197,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.422,
"step": 854
},
{
"epoch": 0.11,
"learning_rate": 1.9683768296422495e-05,
"loss": 2.6869,
"loss_": 1.2449,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4224,
"step": 861
},
{
"epoch": 0.11,
"learning_rate": 1.9676637786451665e-05,
"loss": 2.7047,
"loss_": 1.2273,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.422,
"step": 868
},
{
"epoch": 0.11,
"learning_rate": 1.966942910023261e-05,
"loss": 2.6873,
"loss_": 0.9599,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4206,
"step": 875
},
{
"epoch": 0.11,
"learning_rate": 1.9662142296003335e-05,
"loss": 2.6721,
"loss_": 1.2456,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4221,
"step": 882
},
{
"epoch": 0.11,
"learning_rate": 1.965477743263294e-05,
"loss": 2.6481,
"loss_": 1.4271,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4222,
"step": 889
},
{
"epoch": 0.11,
"learning_rate": 1.964733456962116e-05,
"loss": 2.6621,
"loss_": 1.1236,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.422,
"step": 896
},
{
"epoch": 0.11,
"learning_rate": 1.9639813767097886e-05,
"loss": 2.66,
"loss_": 1.2049,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4193,
"step": 903
},
{
"epoch": 0.11,
"learning_rate": 1.9632215085822658e-05,
"loss": 2.7064,
"loss_": 1.2497,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4214,
"step": 910
},
{
"epoch": 0.11,
"learning_rate": 1.9624538587184197e-05,
"loss": 2.6533,
"loss_": 1.22,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4211,
"step": 917
},
{
"epoch": 0.12,
"learning_rate": 1.9616784333199896e-05,
"loss": 2.644,
"loss_": 1.1443,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4217,
"step": 924
},
{
"epoch": 0.12,
"learning_rate": 1.9608952386515327e-05,
"loss": 2.6987,
"loss_": 1.1736,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4197,
"step": 931
},
{
"epoch": 0.12,
"learning_rate": 1.9601042810403725e-05,
"loss": 2.6732,
"loss_": 1.1886,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4196,
"step": 938
},
{
"epoch": 0.12,
"learning_rate": 1.959305566876549e-05,
"loss": 2.6806,
"loss_": 1.0944,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4205,
"step": 945
},
{
"epoch": 0.12,
"learning_rate": 1.9584991026127655e-05,
"loss": 2.6919,
"loss_": 1.4477,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4204,
"step": 952
},
{
"epoch": 0.12,
"learning_rate": 1.957684894764338e-05,
"loss": 2.6751,
"loss_": 1.1916,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4204,
"step": 959
},
{
"epoch": 0.12,
"learning_rate": 1.9568629499091413e-05,
"loss": 2.6459,
"loss_": 1.2407,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4207,
"step": 966
},
{
"epoch": 0.12,
"learning_rate": 1.9560332746875574e-05,
"loss": 2.6572,
"loss_": 0.8698,
"moe_loss": 0.1613,
"moe_loss_longrong": 1.4436,
"step": 973
},
{
"epoch": 0.12,
"learning_rate": 1.9551958758024194e-05,
"loss": 2.6679,
"loss_": 1.3397,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4202,
"step": 980
},
{
"epoch": 0.12,
"learning_rate": 1.9543507600189606e-05,
"loss": 2.6673,
"loss_": 1.164,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4188,
"step": 987
},
{
"epoch": 0.12,
"learning_rate": 1.9534979341647562e-05,
"loss": 2.6295,
"loss_": 1.3512,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4192,
"step": 994
},
{
"epoch": 0.13,
"learning_rate": 1.9526374051296714e-05,
"loss": 2.645,
"loss_": 1.1948,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4185,
"step": 1001
},
{
"epoch": 0.13,
"learning_rate": 1.9517691798658042e-05,
"loss": 2.7004,
"loss_": 1.19,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4188,
"step": 1008
},
{
"epoch": 0.13,
"learning_rate": 1.9508932653874283e-05,
"loss": 2.6404,
"loss_": 1.4758,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.418,
"step": 1015
},
{
"epoch": 0.13,
"learning_rate": 1.9500096687709393e-05,
"loss": 2.6529,
"loss_": 1.1355,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4179,
"step": 1022
},
{
"epoch": 0.13,
"learning_rate": 1.9491183971547943e-05,
"loss": 2.6448,
"loss_": 1.3669,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4182,
"step": 1029
},
{
"epoch": 0.13,
"learning_rate": 1.948219457739456e-05,
"loss": 2.674,
"loss_": 1.5143,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4184,
"step": 1036
},
{
"epoch": 0.13,
"learning_rate": 1.9473128577873346e-05,
"loss": 2.6813,
"loss_": 1.3613,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4184,
"step": 1043
},
{
"epoch": 0.13,
"learning_rate": 1.9463986046227284e-05,
"loss": 2.6566,
"loss_": 1.2685,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4179,
"step": 1050
},
{
"epoch": 0.13,
"learning_rate": 1.9454767056317654e-05,
"loss": 2.6556,
"loss_": 1.1164,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4171,
"step": 1057
},
{
"epoch": 0.13,
"learning_rate": 1.9445471682623425e-05,
"loss": 2.6723,
"loss_": 1.3762,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4178,
"step": 1064
},
{
"epoch": 0.13,
"learning_rate": 1.9436100000240668e-05,
"loss": 2.6654,
"loss_": 1.3065,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4181,
"step": 1071
},
{
"epoch": 0.14,
"learning_rate": 1.9426652084881934e-05,
"loss": 2.6471,
"loss_": 0.7216,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4175,
"step": 1078
},
{
"epoch": 0.14,
"learning_rate": 1.9417128012875657e-05,
"loss": 2.6433,
"loss_": 1.311,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4172,
"step": 1085
},
{
"epoch": 0.14,
"learning_rate": 1.9407527861165523e-05,
"loss": 2.6788,
"loss_": 1.4472,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4174,
"step": 1092
},
{
"epoch": 0.14,
"learning_rate": 1.9397851707309864e-05,
"loss": 2.6715,
"loss_": 1.2477,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4167,
"step": 1099
},
{
"epoch": 0.14,
"learning_rate": 1.9388099629481017e-05,
"loss": 2.6497,
"loss_": 1.2279,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4171,
"step": 1106
},
{
"epoch": 0.14,
"learning_rate": 1.93782717064647e-05,
"loss": 2.6772,
"loss_": 1.0676,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4161,
"step": 1113
},
{
"epoch": 0.14,
"learning_rate": 1.9368368017659368e-05,
"loss": 2.6543,
"loss_": 1.3057,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4164,
"step": 1120
},
{
"epoch": 0.14,
"learning_rate": 1.9358388643075597e-05,
"loss": 2.6439,
"loss_": 1.2984,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4155,
"step": 1127
},
{
"epoch": 0.14,
"learning_rate": 1.9348333663335393e-05,
"loss": 2.6489,
"loss_": 1.1934,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.415,
"step": 1134
},
{
"epoch": 0.14,
"learning_rate": 1.9338203159671584e-05,
"loss": 2.6834,
"loss_": 1.2899,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4166,
"step": 1141
},
{
"epoch": 0.14,
"learning_rate": 1.9327997213927136e-05,
"loss": 2.6676,
"loss_": 1.3016,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4161,
"step": 1148
},
{
"epoch": 0.14,
"learning_rate": 1.931771590855451e-05,
"loss": 2.6612,
"loss_": 1.0128,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4157,
"step": 1155
},
{
"epoch": 0.15,
"learning_rate": 1.9307359326614975e-05,
"loss": 2.6457,
"loss_": 1.1214,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4161,
"step": 1162
},
{
"epoch": 0.15,
"learning_rate": 1.929692755177796e-05,
"loss": 2.6583,
"loss_": 1.2741,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4164,
"step": 1169
},
{
"epoch": 0.15,
"learning_rate": 1.9286420668320356e-05,
"loss": 2.6487,
"loss_": 0.9804,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.416,
"step": 1176
},
{
"epoch": 0.15,
"learning_rate": 1.9275838761125866e-05,
"loss": 2.6338,
"loss_": 1.1369,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4158,
"step": 1183
},
{
"epoch": 0.15,
"learning_rate": 1.926518191568428e-05,
"loss": 2.6547,
"loss_": 1.2162,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4163,
"step": 1190
},
{
"epoch": 0.15,
"learning_rate": 1.9254450218090814e-05,
"loss": 2.6478,
"loss_": 1.1011,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.415,
"step": 1197
},
{
"epoch": 0.15,
"learning_rate": 1.92436437550454e-05,
"loss": 2.6527,
"loss_": 1.2542,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4151,
"step": 1204
},
{
"epoch": 0.15,
"learning_rate": 1.9232762613851993e-05,
"loss": 2.6584,
"loss_": 1.269,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.414,
"step": 1211
},
{
"epoch": 0.15,
"learning_rate": 1.922180688241786e-05,
"loss": 2.6481,
"loss_": 1.1536,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4155,
"step": 1218
},
{
"epoch": 0.15,
"learning_rate": 1.9210776649252875e-05,
"loss": 2.6695,
"loss_": 1.318,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4146,
"step": 1225
},
{
"epoch": 0.15,
"learning_rate": 1.9199672003468795e-05,
"loss": 2.6144,
"loss_": 1.1917,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4145,
"step": 1232
},
{
"epoch": 0.16,
"learning_rate": 1.918849303477856e-05,
"loss": 2.6512,
"loss_": 1.3762,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4143,
"step": 1239
},
{
"epoch": 0.16,
"learning_rate": 1.9177239833495545e-05,
"loss": 2.6538,
"loss_": 1.3326,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4144,
"step": 1246
},
{
"epoch": 0.16,
"learning_rate": 1.9165912490532838e-05,
"loss": 2.6337,
"loss_": 1.3393,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4141,
"step": 1253
},
{
"epoch": 0.16,
"learning_rate": 1.9154511097402512e-05,
"loss": 2.6493,
"loss_": 1.3026,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4143,
"step": 1260
},
{
"epoch": 0.16,
"learning_rate": 1.9143035746214883e-05,
"loss": 2.6833,
"loss_": 1.2821,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4147,
"step": 1267
},
{
"epoch": 0.16,
"learning_rate": 1.9131486529677755e-05,
"loss": 2.6348,
"loss_": 1.2194,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4144,
"step": 1274
},
{
"epoch": 0.16,
"learning_rate": 1.9119863541095697e-05,
"loss": 2.622,
"loss_": 1.4341,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4139,
"step": 1281
},
{
"epoch": 0.16,
"learning_rate": 1.9108166874369253e-05,
"loss": 2.6579,
"loss_": 1.1947,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4141,
"step": 1288
},
{
"epoch": 0.16,
"learning_rate": 1.9096396623994215e-05,
"loss": 2.6413,
"loss_": 1.3734,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4131,
"step": 1295
},
{
"epoch": 0.16,
"learning_rate": 1.9084552885060846e-05,
"loss": 2.6371,
"loss_": 1.2291,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.413,
"step": 1302
},
{
"epoch": 0.16,
"learning_rate": 1.9072635753253112e-05,
"loss": 2.6483,
"loss_": 1.1361,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4127,
"step": 1309
},
{
"epoch": 0.16,
"learning_rate": 1.9060645324847904e-05,
"loss": 2.6325,
"loss_": 1.3775,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4128,
"step": 1316
},
{
"epoch": 0.17,
"learning_rate": 1.9048581696714276e-05,
"loss": 2.6272,
"loss_": 1.2366,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4132,
"step": 1323
},
{
"epoch": 0.17,
"learning_rate": 1.9036444966312652e-05,
"loss": 2.6566,
"loss_": 1.2485,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4132,
"step": 1330
},
{
"epoch": 0.17,
"learning_rate": 1.9024235231694024e-05,
"loss": 2.6189,
"loss_": 1.3857,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4133,
"step": 1337
},
{
"epoch": 0.17,
"learning_rate": 1.90119525914992e-05,
"loss": 2.6107,
"loss_": 1.0882,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4129,
"step": 1344
},
{
"epoch": 0.17,
"learning_rate": 1.899959714495796e-05,
"loss": 2.6564,
"loss_": 1.0952,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4127,
"step": 1351
},
{
"epoch": 0.17,
"learning_rate": 1.8987168991888293e-05,
"loss": 2.648,
"loss_": 0.9829,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4129,
"step": 1358
},
{
"epoch": 0.17,
"learning_rate": 1.8974668232695562e-05,
"loss": 2.6334,
"loss_": 1.1611,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4121,
"step": 1365
},
{
"epoch": 0.17,
"learning_rate": 1.896209496837171e-05,
"loss": 2.6435,
"loss_": 1.351,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4122,
"step": 1372
},
{
"epoch": 0.17,
"learning_rate": 1.8949449300494444e-05,
"loss": 2.6572,
"loss_": 1.2158,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4125,
"step": 1379
},
{
"epoch": 0.17,
"learning_rate": 1.8936731331226402e-05,
"loss": 2.6249,
"loss_": 1.2495,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4127,
"step": 1386
},
{
"epoch": 0.17,
"learning_rate": 1.892394116331434e-05,
"loss": 2.6299,
"loss_": 1.0987,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4117,
"step": 1393
},
{
"epoch": 0.18,
"learning_rate": 1.8911078900088295e-05,
"loss": 2.6377,
"loss_": 1.179,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4116,
"step": 1400
},
{
"epoch": 0.18,
"learning_rate": 1.8898144645460744e-05,
"loss": 2.6133,
"loss_": 1.1341,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.412,
"step": 1407
},
{
"epoch": 0.18,
"learning_rate": 1.8885138503925793e-05,
"loss": 2.6514,
"loss_": 1.1486,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4115,
"step": 1414
},
{
"epoch": 0.18,
"learning_rate": 1.8872060580558295e-05,
"loss": 2.6529,
"loss_": 1.2706,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4111,
"step": 1421
},
{
"epoch": 0.18,
"learning_rate": 1.8858910981013025e-05,
"loss": 2.6298,
"loss_": 1.2814,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4114,
"step": 1428
},
{
"epoch": 0.18,
"learning_rate": 1.884568981152382e-05,
"loss": 2.6107,
"loss_": 0.8031,
"moe_loss": 0.161,
"moe_loss_longrong": 1.433,
"step": 1435
},
{
"epoch": 0.18,
"learning_rate": 1.883239717890272e-05,
"loss": 2.6321,
"loss_": 1.3342,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4109,
"step": 1442
},
{
"epoch": 0.18,
"learning_rate": 1.881903319053911e-05,
"loss": 2.6271,
"loss_": 1.224,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.412,
"step": 1449
},
{
"epoch": 0.18,
"learning_rate": 1.880559795439884e-05,
"loss": 2.6168,
"loss_": 1.0488,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4113,
"step": 1456
},
{
"epoch": 0.18,
"learning_rate": 1.8792091579023365e-05,
"loss": 2.6358,
"loss_": 1.0113,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.411,
"step": 1463
},
{
"epoch": 0.18,
"learning_rate": 1.8778514173528873e-05,
"loss": 2.6396,
"loss_": 1.1213,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4112,
"step": 1470
},
{
"epoch": 0.19,
"learning_rate": 1.8764865847605384e-05,
"loss": 2.6268,
"loss_": 1.0843,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4125,
"step": 1477
},
{
"epoch": 0.19,
"learning_rate": 1.875114671151587e-05,
"loss": 2.6604,
"loss_": 1.1708,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4112,
"step": 1484
},
{
"epoch": 0.19,
"learning_rate": 1.8737356876095387e-05,
"loss": 2.6187,
"loss_": 1.0976,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4107,
"step": 1491
},
{
"epoch": 0.19,
"learning_rate": 1.8723496452750146e-05,
"loss": 2.6198,
"loss_": 1.1692,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4107,
"step": 1498
},
{
"epoch": 0.19,
"learning_rate": 1.8709565553456632e-05,
"loss": 2.621,
"loss_": 1.3206,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4102,
"step": 1505
},
{
"epoch": 0.19,
"learning_rate": 1.86955642907607e-05,
"loss": 2.6184,
"loss_": 1.1808,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4106,
"step": 1512
},
{
"epoch": 0.19,
"learning_rate": 1.8681492777776656e-05,
"loss": 2.6577,
"loss_": 1.1146,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4105,
"step": 1519
},
{
"epoch": 0.19,
"learning_rate": 1.8667351128186347e-05,
"loss": 2.6417,
"loss_": 1.3074,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4109,
"step": 1526
},
{
"epoch": 0.19,
"learning_rate": 1.8653139456238257e-05,
"loss": 2.6165,
"loss_": 1.0176,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4104,
"step": 1533
},
{
"epoch": 0.19,
"learning_rate": 1.8638857876746556e-05,
"loss": 2.6841,
"loss_": 1.3367,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4101,
"step": 1540
},
{
"epoch": 0.19,
"learning_rate": 1.8624506505090192e-05,
"loss": 2.6275,
"loss_": 1.1427,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4107,
"step": 1547
},
{
"epoch": 0.19,
"learning_rate": 1.8610085457211958e-05,
"loss": 2.6526,
"loss_": 1.3939,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4104,
"step": 1554
},
{
"epoch": 0.2,
"learning_rate": 1.8595594849617552e-05,
"loss": 2.6202,
"loss_": 1.1005,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4099,
"step": 1561
},
{
"epoch": 0.2,
"learning_rate": 1.8581034799374632e-05,
"loss": 2.6634,
"loss_": 1.2608,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4098,
"step": 1568
},
{
"epoch": 0.2,
"learning_rate": 1.8566405424111873e-05,
"loss": 2.6483,
"loss_": 1.2839,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4097,
"step": 1575
},
{
"epoch": 0.2,
"learning_rate": 1.855170684201802e-05,
"loss": 2.6077,
"loss_": 1.3359,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.41,
"step": 1582
},
{
"epoch": 0.2,
"learning_rate": 1.8536939171840934e-05,
"loss": 2.6574,
"loss_": 1.3449,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4099,
"step": 1589
},
{
"epoch": 0.2,
"learning_rate": 1.8522102532886627e-05,
"loss": 2.6374,
"loss_": 1.0245,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4102,
"step": 1596
},
{
"epoch": 0.2,
"learning_rate": 1.8507197045018286e-05,
"loss": 2.6555,
"loss_": 1.2334,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4094,
"step": 1603
},
{
"epoch": 0.2,
"learning_rate": 1.8492222828655347e-05,
"loss": 2.6118,
"loss_": 1.294,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4088,
"step": 1610
},
{
"epoch": 0.2,
"learning_rate": 1.8477180004772473e-05,
"loss": 2.6092,
"loss_": 1.1013,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4087,
"step": 1617
},
{
"epoch": 0.2,
"learning_rate": 1.8462068694898603e-05,
"loss": 2.6415,
"loss_": 1.1863,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4088,
"step": 1624
},
{
"epoch": 0.2,
"learning_rate": 1.8446889021115967e-05,
"loss": 2.6141,
"loss_": 1.2587,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4091,
"step": 1631
},
{
"epoch": 0.21,
"learning_rate": 1.84316411060591e-05,
"loss": 2.6031,
"loss_": 0.798,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.4271,
"step": 1638
},
{
"epoch": 0.21,
"learning_rate": 1.841632507291384e-05,
"loss": 2.6305,
"loss_": 0.9585,
"moe_loss": 0.161,
"moe_loss_longrong": 1.427,
"step": 1645
},
{
"epoch": 0.21,
"learning_rate": 1.8400941045416352e-05,
"loss": 2.5888,
"loss_": 1.2668,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4088,
"step": 1652
},
{
"epoch": 0.21,
"learning_rate": 1.8385489147852117e-05,
"loss": 2.6253,
"loss_": 1.0907,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4084,
"step": 1659
},
{
"epoch": 0.21,
"learning_rate": 1.8369969505054915e-05,
"loss": 2.6541,
"loss_": 1.1209,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4083,
"step": 1666
},
{
"epoch": 0.21,
"learning_rate": 1.8354382242405853e-05,
"loss": 2.6553,
"loss_": 1.1877,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4086,
"step": 1673
},
{
"epoch": 0.21,
"learning_rate": 1.8338727485832317e-05,
"loss": 2.6105,
"loss_": 1.0542,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4087,
"step": 1680
},
{
"epoch": 0.21,
"learning_rate": 1.832300536180696e-05,
"loss": 2.6209,
"loss_": 1.3141,
"moe_loss": 0.1608,
"moe_loss_longrong": 1.4275,
"step": 1687
},
{
"epoch": 0.21,
"learning_rate": 1.8307215997346703e-05,
"loss": 2.6477,
"loss_": 1.3156,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4087,
"step": 1694
},
{
"epoch": 0.21,
"learning_rate": 1.8291359520011687e-05,
"loss": 2.6633,
"loss_": 1.2031,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4075,
"step": 1701
},
{
"epoch": 0.21,
"learning_rate": 1.8275436057904246e-05,
"loss": 2.6259,
"loss_": 1.1971,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4082,
"step": 1708
},
{
"epoch": 0.21,
"learning_rate": 1.825944573966788e-05,
"loss": 2.6185,
"loss_": 1.098,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4073,
"step": 1715
},
{
"epoch": 0.22,
"learning_rate": 1.82433886944862e-05,
"loss": 2.614,
"loss_": 1.0326,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4079,
"step": 1722
},
{
"epoch": 0.22,
"learning_rate": 1.8227265052081913e-05,
"loss": 2.6257,
"loss_": 1.3002,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4078,
"step": 1729
},
{
"epoch": 0.22,
"learning_rate": 1.821107494271574e-05,
"loss": 2.6101,
"loss_": 1.3726,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4089,
"step": 1736
},
{
"epoch": 0.22,
"learning_rate": 1.8194818497185385e-05,
"loss": 2.6377,
"loss_": 1.2734,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4081,
"step": 1743
},
{
"epoch": 0.22,
"learning_rate": 1.8178495846824474e-05,
"loss": 2.6187,
"loss_": 1.062,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4082,
"step": 1750
},
{
"epoch": 0.22,
"learning_rate": 1.81621071235015e-05,
"loss": 2.6136,
"loss_": 1.3893,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4079,
"step": 1757
},
{
"epoch": 0.22,
"learning_rate": 1.814565245961873e-05,
"loss": 2.6332,
"loss_": 1.3444,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4075,
"step": 1764
},
{
"epoch": 0.22,
"learning_rate": 1.8129131988111174e-05,
"loss": 2.6251,
"loss_": 1.0967,
"moe_loss": 0.161,
"moe_loss_longrong": 1.4258,
"step": 1771
},
{
"epoch": 0.22,
"learning_rate": 1.8112545842445488e-05,
"loss": 2.6364,
"loss_": 1.1797,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4066,
"step": 1778
},
{
"epoch": 0.22,
"learning_rate": 1.80958941566189e-05,
"loss": 2.618,
"loss_": 1.2443,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4075,
"step": 1785
},
{
"epoch": 0.22,
"learning_rate": 1.807917706515813e-05,
"loss": 2.5878,
"loss_": 1.4678,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4082,
"step": 1792
},
{
"epoch": 0.23,
"learning_rate": 1.8062394703118294e-05,
"loss": 2.6224,
"loss_": 1.1059,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4077,
"step": 1799
},
{
"epoch": 0.23,
"learning_rate": 1.804554720608183e-05,
"loss": 2.6185,
"loss_": 1.2769,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4077,
"step": 1806
},
{
"epoch": 0.23,
"learning_rate": 1.8028634710157392e-05,
"loss": 2.5904,
"loss_": 1.1422,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4071,
"step": 1813
},
{
"epoch": 0.23,
"learning_rate": 1.801165735197874e-05,
"loss": 2.5663,
"loss_": 0.9287,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4073,
"step": 1820
},
{
"epoch": 0.23,
"learning_rate": 1.7994615268703655e-05,
"loss": 2.6135,
"loss_": 1.2268,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4064,
"step": 1827
},
{
"epoch": 0.23,
"learning_rate": 1.7977508598012834e-05,
"loss": 2.5989,
"loss_": 1.1974,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4069,
"step": 1834
},
{
"epoch": 0.23,
"learning_rate": 1.7960337478108743e-05,
"loss": 2.5877,
"loss_": 1.13,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4064,
"step": 1841
},
{
"epoch": 0.23,
"learning_rate": 1.7943102047714548e-05,
"loss": 2.5955,
"loss_": 0.9327,
"moe_loss": 0.1609,
"moe_loss_longrong": 1.4235,
"step": 1848
},
{
"epoch": 0.23,
"learning_rate": 1.7925802446072957e-05,
"loss": 2.596,
"loss_": 1.2759,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4068,
"step": 1855
},
{
"epoch": 0.23,
"learning_rate": 1.7908438812945106e-05,
"loss": 2.6038,
"loss_": 1.1389,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4064,
"step": 1862
},
{
"epoch": 0.23,
"learning_rate": 1.7891011288609454e-05,
"loss": 2.585,
"loss_": 1.3728,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4061,
"step": 1869
},
{
"epoch": 0.24,
"learning_rate": 1.7873520013860595e-05,
"loss": 2.6263,
"loss_": 1.3243,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4064,
"step": 1876
},
{
"epoch": 0.24,
"learning_rate": 1.7855965130008188e-05,
"loss": 2.6254,
"loss_": 1.2195,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4066,
"step": 1883
},
{
"epoch": 0.24,
"learning_rate": 1.783834677887576e-05,
"loss": 2.6312,
"loss_": 0.8851,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4064,
"step": 1890
},
{
"epoch": 0.24,
"learning_rate": 1.782066510279959e-05,
"loss": 2.6333,
"loss_": 1.3439,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4063,
"step": 1897
},
{
"epoch": 0.24,
"learning_rate": 1.7802920244627543e-05,
"loss": 2.6112,
"loss_": 1.1944,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4063,
"step": 1904
},
{
"epoch": 0.24,
"learning_rate": 1.778511234771793e-05,
"loss": 2.614,
"loss_": 1.3766,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4055,
"step": 1911
},
{
"epoch": 0.24,
"learning_rate": 1.776724155593835e-05,
"loss": 2.5922,
"loss_": 1.1792,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4058,
"step": 1918
},
{
"epoch": 0.24,
"learning_rate": 1.7749308013664503e-05,
"loss": 2.6604,
"loss_": 1.1936,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4058,
"step": 1925
},
{
"epoch": 0.24,
"learning_rate": 1.7731311865779058e-05,
"loss": 2.6211,
"loss_": 1.235,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4057,
"step": 1932
},
{
"epoch": 0.24,
"learning_rate": 1.771325325767046e-05,
"loss": 2.6152,
"loss_": 1.1696,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4055,
"step": 1939
},
{
"epoch": 0.24,
"learning_rate": 1.7695132335231758e-05,
"loss": 2.6476,
"loss_": 1.2283,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4054,
"step": 1946
},
{
"epoch": 0.24,
"learning_rate": 1.7676949244859435e-05,
"loss": 2.6351,
"loss_": 1.0637,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4051,
"step": 1953
},
{
"epoch": 0.25,
"learning_rate": 1.7658704133452228e-05,
"loss": 2.6196,
"loss_": 1.3258,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4058,
"step": 1960
},
{
"epoch": 0.25,
"learning_rate": 1.764039714840991e-05,
"loss": 2.5882,
"loss_": 0.9733,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4048,
"step": 1967
},
{
"epoch": 0.25,
"learning_rate": 1.7622028437632154e-05,
"loss": 2.6128,
"loss_": 1.2358,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4052,
"step": 1974
},
{
"epoch": 0.25,
"learning_rate": 1.7603598149517277e-05,
"loss": 2.6192,
"loss_": 0.9268,
"moe_loss": 0.1607,
"moe_loss_longrong": 1.4196,
"step": 1981
},
{
"epoch": 0.25,
"learning_rate": 1.7585106432961093e-05,
"loss": 2.593,
"loss_": 1.0061,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4049,
"step": 1988
},
{
"epoch": 0.25,
"learning_rate": 1.7566553437355674e-05,
"loss": 2.6141,
"loss_": 1.083,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4046,
"step": 1995
},
{
"epoch": 0.25,
"learning_rate": 1.754793931258817e-05,
"loss": 2.6423,
"loss_": 1.0028,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4047,
"step": 2002
},
{
"epoch": 0.25,
"learning_rate": 1.7529264209039573e-05,
"loss": 2.5863,
"loss_": 1.4222,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4047,
"step": 2009
},
{
"epoch": 0.25,
"learning_rate": 1.751052827758352e-05,
"loss": 2.6299,
"loss_": 0.9747,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4046,
"step": 2016
},
{
"epoch": 0.25,
"learning_rate": 1.7491731669585066e-05,
"loss": 2.6117,
"loss_": 1.1316,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4043,
"step": 2023
},
{
"epoch": 0.25,
"learning_rate": 1.747287453689947e-05,
"loss": 2.6125,
"loss_": 0.9258,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.405,
"step": 2030
},
{
"epoch": 0.26,
"learning_rate": 1.745395703187095e-05,
"loss": 2.6568,
"loss_": 1.2147,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4046,
"step": 2037
},
{
"epoch": 0.26,
"learning_rate": 1.7434979307331482e-05,
"loss": 2.6449,
"loss_": 1.033,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4042,
"step": 2044
},
{
"epoch": 0.26,
"learning_rate": 1.7415941516599525e-05,
"loss": 2.6137,
"loss_": 1.2328,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4039,
"step": 2051
},
{
"epoch": 0.26,
"learning_rate": 1.7396843813478825e-05,
"loss": 2.6196,
"loss_": 1.1898,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.404,
"step": 2058
},
{
"epoch": 0.26,
"learning_rate": 1.7377686352257136e-05,
"loss": 2.6021,
"loss_": 1.2029,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4045,
"step": 2065
},
{
"epoch": 0.26,
"learning_rate": 1.7358469287705e-05,
"loss": 2.6354,
"loss_": 1.368,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4045,
"step": 2072
},
{
"epoch": 0.26,
"learning_rate": 1.7339192775074486e-05,
"loss": 2.619,
"loss_": 1.4305,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4048,
"step": 2079
},
{
"epoch": 0.26,
"learning_rate": 1.7319856970097927e-05,
"loss": 2.6185,
"loss_": 1.19,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4043,
"step": 2086
},
{
"epoch": 0.26,
"learning_rate": 1.730046202898668e-05,
"loss": 2.589,
"loss_": 1.1368,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4042,
"step": 2093
},
{
"epoch": 0.26,
"learning_rate": 1.7281008108429854e-05,
"loss": 2.6104,
"loss_": 1.02,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4044,
"step": 2100
},
{
"epoch": 0.26,
"learning_rate": 1.726149536559304e-05,
"loss": 2.6138,
"loss_": 1.2443,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.404,
"step": 2107
},
{
"epoch": 0.26,
"learning_rate": 1.7241923958117047e-05,
"loss": 2.6079,
"loss_": 1.272,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4039,
"step": 2114
},
{
"epoch": 0.27,
"learning_rate": 1.7222294044116637e-05,
"loss": 2.6155,
"loss_": 1.2334,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4037,
"step": 2121
},
{
"epoch": 0.27,
"learning_rate": 1.7202605782179223e-05,
"loss": 2.6217,
"loss_": 1.1778,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4039,
"step": 2128
},
{
"epoch": 0.27,
"learning_rate": 1.718285933136361e-05,
"loss": 2.6156,
"loss_": 1.0468,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4037,
"step": 2135
},
{
"epoch": 0.27,
"learning_rate": 1.7163054851198712e-05,
"loss": 2.6145,
"loss_": 1.2375,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4037,
"step": 2142
},
{
"epoch": 0.27,
"learning_rate": 1.7143192501682243e-05,
"loss": 2.6167,
"loss_": 1.325,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4036,
"step": 2149
},
{
"epoch": 0.27,
"learning_rate": 1.712327244327944e-05,
"loss": 2.5924,
"loss_": 0.955,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4035,
"step": 2156
},
{
"epoch": 0.27,
"learning_rate": 1.7103294836921752e-05,
"loss": 2.6235,
"loss_": 1.0911,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4034,
"step": 2163
},
{
"epoch": 0.27,
"learning_rate": 1.708325984400557e-05,
"loss": 2.6047,
"loss_": 0.8419,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4037,
"step": 2170
},
{
"epoch": 0.27,
"learning_rate": 1.7063167626390893e-05,
"loss": 2.6268,
"loss_": 1.1833,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4029,
"step": 2177
},
{
"epoch": 0.27,
"learning_rate": 1.7043018346400024e-05,
"loss": 2.622,
"loss_": 1.0641,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4028,
"step": 2184
},
{
"epoch": 0.27,
"learning_rate": 1.7022812166816277e-05,
"loss": 2.6011,
"loss_": 0.9805,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4028,
"step": 2191
},
{
"epoch": 0.28,
"learning_rate": 1.7002549250882637e-05,
"loss": 2.5584,
"loss_": 1.1622,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4025,
"step": 2198
},
{
"epoch": 0.28,
"learning_rate": 1.698222976230047e-05,
"loss": 2.607,
"loss_": 1.2586,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4026,
"step": 2205
},
{
"epoch": 0.28,
"learning_rate": 1.6961853865228176e-05,
"loss": 2.6328,
"loss_": 1.3466,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4029,
"step": 2212
},
{
"epoch": 0.28,
"learning_rate": 1.6941421724279866e-05,
"loss": 2.568,
"loss_": 1.2851,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4028,
"step": 2219
},
{
"epoch": 0.28,
"learning_rate": 1.6920933504524048e-05,
"loss": 2.5682,
"loss_": 1.033,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4028,
"step": 2226
},
{
"epoch": 0.28,
"learning_rate": 1.6900389371482286e-05,
"loss": 2.5863,
"loss_": 1.1035,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4031,
"step": 2233
},
{
"epoch": 0.28,
"learning_rate": 1.6879789491127837e-05,
"loss": 2.5745,
"loss_": 0.9979,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4026,
"step": 2240
},
{
"epoch": 0.28,
"learning_rate": 1.685913402988436e-05,
"loss": 2.5738,
"loss_": 1.056,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.403,
"step": 2247
},
{
"epoch": 0.28,
"learning_rate": 1.6838423154624534e-05,
"loss": 2.5971,
"loss_": 0.9538,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4021,
"step": 2254
},
{
"epoch": 0.28,
"learning_rate": 1.6817657032668715e-05,
"loss": 2.5999,
"loss_": 1.2746,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4024,
"step": 2261
},
{
"epoch": 0.28,
"learning_rate": 1.6796835831783597e-05,
"loss": 2.5917,
"loss_": 1.1284,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4021,
"step": 2268
},
{
"epoch": 0.29,
"learning_rate": 1.6775959720180847e-05,
"loss": 2.5756,
"loss_": 1.0512,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.402,
"step": 2275
},
{
"epoch": 0.29,
"learning_rate": 1.675502886651574e-05,
"loss": 2.5869,
"loss_": 1.1705,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4019,
"step": 2282
},
{
"epoch": 0.29,
"learning_rate": 1.6734043439885826e-05,
"loss": 2.6105,
"loss_": 1.2021,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4019,
"step": 2289
},
{
"epoch": 0.29,
"learning_rate": 1.6713003609829518e-05,
"loss": 2.6133,
"loss_": 1.2789,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.402,
"step": 2296
},
{
"epoch": 0.29,
"learning_rate": 1.669190954632477e-05,
"loss": 2.6103,
"loss_": 1.0784,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4019,
"step": 2303
},
{
"epoch": 0.29,
"learning_rate": 1.667076141978765e-05,
"loss": 2.5459,
"loss_": 1.27,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4021,
"step": 2310
},
{
"epoch": 0.29,
"learning_rate": 1.664955940107103e-05,
"loss": 2.5936,
"loss_": 1.0663,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4026,
"step": 2317
},
{
"epoch": 0.29,
"learning_rate": 1.662830366146315e-05,
"loss": 2.5879,
"loss_": 0.9998,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4019,
"step": 2324
},
{
"epoch": 0.29,
"learning_rate": 1.6606994372686246e-05,
"loss": 2.6045,
"loss_": 1.2394,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.402,
"step": 2331
},
{
"epoch": 0.29,
"learning_rate": 1.6585631706895186e-05,
"loss": 2.5902,
"loss_": 1.1972,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4015,
"step": 2338
},
{
"epoch": 0.29,
"learning_rate": 1.6564215836676066e-05,
"loss": 2.5844,
"loss_": 0.948,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4019,
"step": 2345
},
{
"epoch": 0.29,
"learning_rate": 1.6542746935044793e-05,
"loss": 2.5781,
"loss_": 1.4827,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4025,
"step": 2352
},
{
"epoch": 0.3,
"learning_rate": 1.652122517544573e-05,
"loss": 2.5821,
"loss_": 1.0247,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4022,
"step": 2359
},
{
"epoch": 0.3,
"learning_rate": 1.6499650731750256e-05,
"loss": 2.6092,
"loss_": 0.8974,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4018,
"step": 2366
},
{
"epoch": 0.3,
"learning_rate": 1.647802377825539e-05,
"loss": 2.5766,
"loss_": 1.0648,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4016,
"step": 2373
},
{
"epoch": 0.3,
"learning_rate": 1.645634448968236e-05,
"loss": 2.603,
"loss_": 1.244,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4019,
"step": 2380
},
{
"epoch": 0.3,
"learning_rate": 1.643461304117521e-05,
"loss": 2.6323,
"loss_": 1.3655,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4015,
"step": 2387
},
{
"epoch": 0.3,
"learning_rate": 1.6412829608299373e-05,
"loss": 2.6053,
"loss_": 1.3408,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4014,
"step": 2394
},
{
"epoch": 0.3,
"learning_rate": 1.6390994367040257e-05,
"loss": 2.6053,
"loss_": 1.3031,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4016,
"step": 2401
},
{
"epoch": 0.3,
"learning_rate": 1.636910749380183e-05,
"loss": 2.5956,
"loss_": 1.434,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4015,
"step": 2408
},
{
"epoch": 0.3,
"learning_rate": 1.634716916540517e-05,
"loss": 2.6072,
"loss_": 1.3772,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4013,
"step": 2415
},
{
"epoch": 0.3,
"learning_rate": 1.632517955908707e-05,
"loss": 2.6077,
"loss_": 1.2657,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4017,
"step": 2422
},
{
"epoch": 0.3,
"learning_rate": 1.6303138852498594e-05,
"loss": 2.5694,
"loss_": 1.3289,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4014,
"step": 2429
},
{
"epoch": 0.31,
"learning_rate": 1.6281047223703625e-05,
"loss": 2.5821,
"loss_": 1.1676,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4008,
"step": 2436
},
{
"epoch": 0.31,
"learning_rate": 1.6258904851177434e-05,
"loss": 2.5965,
"loss_": 1.2449,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4013,
"step": 2443
},
{
"epoch": 0.31,
"learning_rate": 1.6236711913805273e-05,
"loss": 2.6104,
"loss_": 1.1732,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4008,
"step": 2450
},
{
"epoch": 0.31,
"learning_rate": 1.621446859088087e-05,
"loss": 2.5975,
"loss_": 1.2338,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4008,
"step": 2457
},
{
"epoch": 0.31,
"learning_rate": 1.619217506210503e-05,
"loss": 2.6063,
"loss_": 1.28,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.401,
"step": 2464
},
{
"epoch": 0.31,
"learning_rate": 1.6169831507584152e-05,
"loss": 2.5977,
"loss_": 1.3583,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4011,
"step": 2471
},
{
"epoch": 0.31,
"learning_rate": 1.614743810782879e-05,
"loss": 2.6263,
"loss_": 1.4302,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.401,
"step": 2478
},
{
"epoch": 0.31,
"learning_rate": 1.61249950437522e-05,
"loss": 2.6303,
"loss_": 1.0776,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.401,
"step": 2485
},
{
"epoch": 0.31,
"learning_rate": 1.610250249666886e-05,
"loss": 2.5851,
"loss_": 1.0742,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4004,
"step": 2492
},
{
"epoch": 0.31,
"learning_rate": 1.6079960648293016e-05,
"loss": 2.5652,
"loss_": 1.2411,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4006,
"step": 2499
},
{
"epoch": 0.31,
"learning_rate": 1.605736968073721e-05,
"loss": 2.5674,
"loss_": 1.3629,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4002,
"step": 2506
},
{
"epoch": 0.32,
"learning_rate": 1.6034729776510817e-05,
"loss": 2.5844,
"loss_": 1.2259,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4007,
"step": 2513
},
{
"epoch": 0.32,
"learning_rate": 1.6012041118518558e-05,
"loss": 2.592,
"loss_": 1.3237,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4005,
"step": 2520
},
{
"epoch": 0.32,
"learning_rate": 1.598930389005904e-05,
"loss": 2.5949,
"loss_": 1.1398,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4003,
"step": 2527
},
{
"epoch": 0.32,
"learning_rate": 1.596651827482325e-05,
"loss": 2.5823,
"loss_": 0.984,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4,
"step": 2534
},
{
"epoch": 0.32,
"learning_rate": 1.5943684456893103e-05,
"loss": 2.5586,
"loss_": 0.8138,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.412,
"step": 2541
},
{
"epoch": 0.32,
"learning_rate": 1.5920802620739914e-05,
"loss": 2.6019,
"loss_": 0.9803,
"moe_loss": 0.1606,
"moe_loss_longrong": 1.4116,
"step": 2548
},
{
"epoch": 0.32,
"learning_rate": 1.5897872951222946e-05,
"loss": 2.5744,
"loss_": 0.8654,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3999,
"step": 2555
},
{
"epoch": 0.32,
"learning_rate": 1.5874895633587904e-05,
"loss": 2.5881,
"loss_": 1.1376,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4001,
"step": 2562
},
{
"epoch": 0.32,
"learning_rate": 1.585187085346543e-05,
"loss": 2.6219,
"loss_": 1.1824,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4003,
"step": 2569
},
{
"epoch": 0.32,
"learning_rate": 1.5828798796869607e-05,
"loss": 2.5878,
"loss_": 1.2474,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4005,
"step": 2576
},
{
"epoch": 0.32,
"learning_rate": 1.5805679650196456e-05,
"loss": 2.5889,
"loss_": 1.3011,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4002,
"step": 2583
},
{
"epoch": 0.32,
"learning_rate": 1.5782513600222443e-05,
"loss": 2.5666,
"loss_": 0.8722,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.4005,
"step": 2590
},
{
"epoch": 0.33,
"learning_rate": 1.5759300834102952e-05,
"loss": 2.562,
"loss_": 1.0941,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3999,
"step": 2597
},
{
"epoch": 0.33,
"learning_rate": 1.5736041539370783e-05,
"loss": 2.5698,
"loss_": 1.1632,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3999,
"step": 2604
},
{
"epoch": 0.33,
"learning_rate": 1.5712735903934627e-05,
"loss": 2.6022,
"loss_": 1.1992,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3999,
"step": 2611
},
{
"epoch": 0.33,
"learning_rate": 1.568938411607757e-05,
"loss": 2.5882,
"loss_": 1.148,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.4,
"step": 2618
},
{
"epoch": 0.33,
"learning_rate": 1.566598636445554e-05,
"loss": 2.5838,
"loss_": 0.9669,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3997,
"step": 2625
},
{
"epoch": 0.33,
"learning_rate": 1.5642542838095814e-05,
"loss": 2.5775,
"loss_": 1.1281,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3995,
"step": 2632
},
{
"epoch": 0.33,
"learning_rate": 1.5619053726395468e-05,
"loss": 2.5868,
"loss_": 1.0479,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3995,
"step": 2639
},
{
"epoch": 0.33,
"learning_rate": 1.5595519219119863e-05,
"loss": 2.6,
"loss_": 0.9972,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3993,
"step": 2646
},
{
"epoch": 0.33,
"learning_rate": 1.5571939506401103e-05,
"loss": 2.6007,
"loss_": 1.2232,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3993,
"step": 2653
},
{
"epoch": 0.33,
"learning_rate": 1.5548314778736487e-05,
"loss": 2.6087,
"loss_": 1.2657,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3996,
"step": 2660
},
{
"epoch": 0.33,
"learning_rate": 1.552464522698701e-05,
"loss": 2.5675,
"loss_": 1.2458,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3992,
"step": 2667
},
{
"epoch": 0.34,
"learning_rate": 1.550093104237577e-05,
"loss": 2.5844,
"loss_": 1.1855,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3993,
"step": 2674
},
{
"epoch": 0.34,
"learning_rate": 1.5477172416486464e-05,
"loss": 2.6192,
"loss_": 1.2552,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3993,
"step": 2681
},
{
"epoch": 0.34,
"learning_rate": 1.5453369541261814e-05,
"loss": 2.5796,
"loss_": 1.3244,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3996,
"step": 2688
},
{
"epoch": 0.34,
"learning_rate": 1.5429522609002034e-05,
"loss": 2.5859,
"loss_": 1.1373,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3992,
"step": 2695
},
{
"epoch": 0.34,
"learning_rate": 1.540563181236326e-05,
"loss": 2.5702,
"loss_": 1.3094,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.399,
"step": 2702
},
{
"epoch": 0.34,
"learning_rate": 1.5381697344356014e-05,
"loss": 2.5893,
"loss_": 1.2666,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3991,
"step": 2709
},
{
"epoch": 0.34,
"learning_rate": 1.535771939834362e-05,
"loss": 2.5504,
"loss_": 1.2537,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3992,
"step": 2716
},
{
"epoch": 0.34,
"learning_rate": 1.5333698168040664e-05,
"loss": 2.6094,
"loss_": 1.0996,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3992,
"step": 2723
},
{
"epoch": 0.34,
"learning_rate": 1.530963384751142e-05,
"loss": 2.6049,
"loss_": 1.2289,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3992,
"step": 2730
},
{
"epoch": 0.34,
"learning_rate": 1.5285526631168273e-05,
"loss": 2.5766,
"loss_": 1.115,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3992,
"step": 2737
},
{
"epoch": 0.34,
"learning_rate": 1.5261376713770176e-05,
"loss": 2.5589,
"loss_": 1.3787,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3992,
"step": 2744
},
{
"epoch": 0.34,
"learning_rate": 1.5237184290421035e-05,
"loss": 2.5508,
"loss_": 1.1691,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3993,
"step": 2751
},
{
"epoch": 0.35,
"learning_rate": 1.521294955656817e-05,
"loss": 2.567,
"loss_": 1.0558,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3989,
"step": 2758
},
{
"epoch": 0.35,
"learning_rate": 1.5188672708000725e-05,
"loss": 2.636,
"loss_": 1.464,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3991,
"step": 2765
},
{
"epoch": 0.35,
"learning_rate": 1.5164353940848068e-05,
"loss": 2.5519,
"loss_": 0.957,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3986,
"step": 2772
},
{
"epoch": 0.35,
"learning_rate": 1.5139993451578236e-05,
"loss": 2.6053,
"loss_": 1.2139,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3991,
"step": 2779
},
{
"epoch": 0.35,
"learning_rate": 1.5115591436996327e-05,
"loss": 2.5661,
"loss_": 1.424,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3987,
"step": 2786
},
{
"epoch": 0.35,
"learning_rate": 1.5091148094242913e-05,
"loss": 2.5659,
"loss_": 0.9377,
"moe_loss": 0.1605,
"moe_loss_longrong": 1.4097,
"step": 2793
},
{
"epoch": 0.35,
"learning_rate": 1.5066663620792463e-05,
"loss": 2.5646,
"loss_": 1.0845,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3987,
"step": 2800
},
{
"epoch": 0.35,
"learning_rate": 1.5042138214451719e-05,
"loss": 2.5793,
"loss_": 1.1437,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3982,
"step": 2807
},
{
"epoch": 0.35,
"learning_rate": 1.5017572073358127e-05,
"loss": 2.5658,
"loss_": 1.1455,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3984,
"step": 2814
},
{
"epoch": 0.35,
"learning_rate": 1.4992965395978219e-05,
"loss": 2.5799,
"loss_": 0.9263,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3986,
"step": 2821
},
{
"epoch": 0.35,
"learning_rate": 1.4968318381106013e-05,
"loss": 2.6166,
"loss_": 1.1558,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3986,
"step": 2828
},
{
"epoch": 0.36,
"learning_rate": 1.4943631227861412e-05,
"loss": 2.5847,
"loss_": 1.2277,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3985,
"step": 2835
},
{
"epoch": 0.36,
"learning_rate": 1.4918904135688586e-05,
"loss": 2.5822,
"loss_": 1.3077,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3986,
"step": 2842
},
{
"epoch": 0.36,
"learning_rate": 1.4894137304354367e-05,
"loss": 2.5709,
"loss_": 1.1239,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3986,
"step": 2849
},
{
"epoch": 0.36,
"learning_rate": 1.4869330933946641e-05,
"loss": 2.6017,
"loss_": 1.3636,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3984,
"step": 2856
},
{
"epoch": 0.36,
"learning_rate": 1.4844485224872721e-05,
"loss": 2.5933,
"loss_": 1.1977,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3983,
"step": 2863
},
{
"epoch": 0.36,
"learning_rate": 1.481960037785773e-05,
"loss": 2.5739,
"loss_": 1.3326,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3982,
"step": 2870
},
{
"epoch": 0.36,
"learning_rate": 1.4794676593942979e-05,
"loss": 2.5793,
"loss_": 1.0945,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3983,
"step": 2877
},
{
"epoch": 0.36,
"learning_rate": 1.476971407448435e-05,
"loss": 2.5561,
"loss_": 0.9802,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3987,
"step": 2884
},
{
"epoch": 0.36,
"learning_rate": 1.4744713021150665e-05,
"loss": 2.5553,
"loss_": 1.025,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3978,
"step": 2891
},
{
"epoch": 0.36,
"learning_rate": 1.4719673635922047e-05,
"loss": 2.5462,
"loss_": 1.1738,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3979,
"step": 2898
},
{
"epoch": 0.36,
"learning_rate": 1.4694596121088309e-05,
"loss": 2.58,
"loss_": 1.1755,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3978,
"step": 2905
},
{
"epoch": 0.37,
"learning_rate": 1.4669480679247299e-05,
"loss": 2.5715,
"loss_": 1.37,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3982,
"step": 2912
},
{
"epoch": 0.37,
"learning_rate": 1.4644327513303281e-05,
"loss": 2.5696,
"loss_": 1.2128,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3979,
"step": 2919
},
{
"epoch": 0.37,
"learning_rate": 1.4619136826465277e-05,
"loss": 2.6001,
"loss_": 1.1853,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3983,
"step": 2926
},
{
"epoch": 0.37,
"learning_rate": 1.4593908822245437e-05,
"loss": 2.5781,
"loss_": 1.0309,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3977,
"step": 2933
},
{
"epoch": 0.37,
"learning_rate": 1.4568643704457404e-05,
"loss": 2.5805,
"loss_": 1.1558,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3981,
"step": 2940
},
{
"epoch": 0.37,
"learning_rate": 1.454334167721464e-05,
"loss": 2.5546,
"loss_": 1.1177,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3976,
"step": 2947
},
{
"epoch": 0.37,
"learning_rate": 1.4518002944928807e-05,
"loss": 2.5872,
"loss_": 1.4162,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3979,
"step": 2954
},
{
"epoch": 0.37,
"learning_rate": 1.4492627712308094e-05,
"loss": 2.5779,
"loss_": 1.3387,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.398,
"step": 2961
},
{
"epoch": 0.37,
"learning_rate": 1.4467216184355577e-05,
"loss": 2.5994,
"loss_": 1.3317,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3977,
"step": 2968
},
{
"epoch": 0.37,
"learning_rate": 1.4441768566367554e-05,
"loss": 2.5828,
"loss_": 1.0536,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3975,
"step": 2975
},
{
"epoch": 0.37,
"learning_rate": 1.4416285063931887e-05,
"loss": 2.5719,
"loss_": 1.1378,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3975,
"step": 2982
},
{
"epoch": 0.37,
"learning_rate": 1.4390765882926348e-05,
"loss": 2.5612,
"loss_": 1.2159,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3978,
"step": 2989
},
{
"epoch": 0.38,
"learning_rate": 1.4365211229516951e-05,
"loss": 2.5558,
"loss_": 1.1645,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3975,
"step": 2996
},
{
"epoch": 0.38,
"learning_rate": 1.433962131015628e-05,
"loss": 2.5854,
"loss_": 1.1987,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3977,
"step": 3003
},
{
"epoch": 0.38,
"learning_rate": 1.4313996331581841e-05,
"loss": 2.5635,
"loss_": 1.3072,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3975,
"step": 3010
},
{
"epoch": 0.38,
"learning_rate": 1.4288336500814366e-05,
"loss": 2.5645,
"loss_": 1.0643,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3976,
"step": 3017
},
{
"epoch": 0.38,
"learning_rate": 1.426264202515616e-05,
"loss": 2.563,
"loss_": 1.2845,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3975,
"step": 3024
},
{
"epoch": 0.38,
"learning_rate": 1.4236913112189417e-05,
"loss": 2.5718,
"loss_": 1.175,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3975,
"step": 3031
},
{
"epoch": 0.38,
"learning_rate": 1.4211149969774544e-05,
"loss": 2.533,
"loss_": 1.1995,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3975,
"step": 3038
},
{
"epoch": 0.38,
"learning_rate": 1.418535280604849e-05,
"loss": 2.5548,
"loss_": 1.2251,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.397,
"step": 3045
},
{
"epoch": 0.38,
"learning_rate": 1.4159521829423049e-05,
"loss": 2.5767,
"loss_": 0.8661,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.397,
"step": 3052
},
{
"epoch": 0.38,
"learning_rate": 1.4133657248583186e-05,
"loss": 2.584,
"loss_": 0.7004,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4067,
"step": 3059
},
{
"epoch": 0.38,
"learning_rate": 1.410775927248536e-05,
"loss": 2.6066,
"loss_": 0.9504,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3974,
"step": 3066
},
{
"epoch": 0.39,
"learning_rate": 1.4081828110355806e-05,
"loss": 2.5768,
"loss_": 1.0234,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.397,
"step": 3073
},
{
"epoch": 0.39,
"learning_rate": 1.4055863971688886e-05,
"loss": 2.5702,
"loss_": 1.0861,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3972,
"step": 3080
},
{
"epoch": 0.39,
"learning_rate": 1.4029867066245363e-05,
"loss": 2.5943,
"loss_": 1.0817,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3969,
"step": 3087
},
{
"epoch": 0.39,
"learning_rate": 1.400383760405072e-05,
"loss": 2.5626,
"loss_": 0.9245,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.397,
"step": 3094
},
{
"epoch": 0.39,
"learning_rate": 1.3977775795393467e-05,
"loss": 2.5936,
"loss_": 1.3773,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3972,
"step": 3101
},
{
"epoch": 0.39,
"learning_rate": 1.3951681850823427e-05,
"loss": 2.5673,
"loss_": 0.8812,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3967,
"step": 3108
},
{
"epoch": 0.39,
"learning_rate": 1.392555598115005e-05,
"loss": 2.556,
"loss_": 1.3045,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3968,
"step": 3115
},
{
"epoch": 0.39,
"learning_rate": 1.3899398397440704e-05,
"loss": 2.5809,
"loss_": 1.0862,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3969,
"step": 3122
},
{
"epoch": 0.39,
"learning_rate": 1.3873209311018974e-05,
"loss": 2.5601,
"loss_": 0.764,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4061,
"step": 3129
},
{
"epoch": 0.39,
"learning_rate": 1.3846988933462944e-05,
"loss": 2.5884,
"loss_": 1.0127,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3964,
"step": 3136
},
{
"epoch": 0.39,
"learning_rate": 1.3820737476603506e-05,
"loss": 2.5553,
"loss_": 1.1064,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3969,
"step": 3143
},
{
"epoch": 0.39,
"learning_rate": 1.3794455152522619e-05,
"loss": 2.5814,
"loss_": 1.2526,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3969,
"step": 3150
},
{
"epoch": 0.4,
"learning_rate": 1.3768142173551638e-05,
"loss": 2.5803,
"loss_": 1.084,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3968,
"step": 3157
},
{
"epoch": 0.4,
"learning_rate": 1.3741798752269553e-05,
"loss": 2.5698,
"loss_": 1.1246,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3964,
"step": 3164
},
{
"epoch": 0.4,
"learning_rate": 1.3715425101501306e-05,
"loss": 2.5792,
"loss_": 1.421,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3965,
"step": 3171
},
{
"epoch": 0.4,
"learning_rate": 1.3689021434316057e-05,
"loss": 2.5823,
"loss_": 0.9307,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3965,
"step": 3178
},
{
"epoch": 0.4,
"learning_rate": 1.3662587964025456e-05,
"loss": 2.596,
"loss_": 1.0908,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.397,
"step": 3185
},
{
"epoch": 0.4,
"learning_rate": 1.363612490418194e-05,
"loss": 2.5583,
"loss_": 1.2338,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3968,
"step": 3192
},
{
"epoch": 0.4,
"learning_rate": 1.3609632468576997e-05,
"loss": 2.5646,
"loss_": 1.1325,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3964,
"step": 3199
},
{
"epoch": 0.4,
"learning_rate": 1.358311087123942e-05,
"loss": 2.557,
"loss_": 1.0712,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3961,
"step": 3206
},
{
"epoch": 0.4,
"learning_rate": 1.3556560326433617e-05,
"loss": 2.5436,
"loss_": 0.9923,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3967,
"step": 3213
},
{
"epoch": 0.4,
"learning_rate": 1.3529981048657846e-05,
"loss": 2.5845,
"loss_": 1.101,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3964,
"step": 3220
},
{
"epoch": 0.4,
"learning_rate": 1.35033732526425e-05,
"loss": 2.586,
"loss_": 1.153,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3963,
"step": 3227
},
{
"epoch": 0.41,
"learning_rate": 1.3476737153348363e-05,
"loss": 2.5813,
"loss_": 1.171,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3963,
"step": 3234
},
{
"epoch": 0.41,
"learning_rate": 1.3450072965964878e-05,
"loss": 2.5771,
"loss_": 1.2221,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.396,
"step": 3241
},
{
"epoch": 0.41,
"learning_rate": 1.342338090590841e-05,
"loss": 2.5494,
"loss_": 1.0355,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3962,
"step": 3248
},
{
"epoch": 0.41,
"learning_rate": 1.3396661188820505e-05,
"loss": 2.611,
"loss_": 1.2927,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3962,
"step": 3255
},
{
"epoch": 0.41,
"learning_rate": 1.3369914030566147e-05,
"loss": 2.5692,
"loss_": 0.5951,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3963,
"step": 3262
},
{
"epoch": 0.41,
"learning_rate": 1.3343139647232008e-05,
"loss": 2.566,
"loss_": 1.2681,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 3269
},
{
"epoch": 0.41,
"learning_rate": 1.3316338255124708e-05,
"loss": 2.562,
"loss_": 1.0295,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3965,
"step": 3276
},
{
"epoch": 0.41,
"learning_rate": 1.3289510070769074e-05,
"loss": 2.5404,
"loss_": 1.3584,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 3283
},
{
"epoch": 0.41,
"learning_rate": 1.3262655310906375e-05,
"loss": 2.5778,
"loss_": 1.018,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3956,
"step": 3290
},
{
"epoch": 0.41,
"learning_rate": 1.323577419249259e-05,
"loss": 2.5806,
"loss_": 0.9819,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 3297
},
{
"epoch": 0.41,
"learning_rate": 1.3208866932696639e-05,
"loss": 2.5737,
"loss_": 1.1931,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3961,
"step": 3304
},
{
"epoch": 0.42,
"learning_rate": 1.3181933748898629e-05,
"loss": 2.5643,
"loss_": 1.1444,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 3311
},
{
"epoch": 0.42,
"learning_rate": 1.3154974858688121e-05,
"loss": 2.5495,
"loss_": 1.2428,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3959,
"step": 3318
},
{
"epoch": 0.42,
"learning_rate": 1.3127990479862333e-05,
"loss": 2.5653,
"loss_": 1.3658,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 3325
},
{
"epoch": 0.42,
"learning_rate": 1.3100980830424419e-05,
"loss": 2.5537,
"loss_": 1.2466,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3957,
"step": 3332
},
{
"epoch": 0.42,
"learning_rate": 1.3073946128581685e-05,
"loss": 2.5784,
"loss_": 1.1899,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3956,
"step": 3339
},
{
"epoch": 0.42,
"learning_rate": 1.3046886592743828e-05,
"loss": 2.5491,
"loss_": 1.1516,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.396,
"step": 3346
},
{
"epoch": 0.42,
"learning_rate": 1.3019802441521181e-05,
"loss": 2.5584,
"loss_": 1.3479,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3958,
"step": 3353
},
{
"epoch": 0.42,
"learning_rate": 1.2992693893722939e-05,
"loss": 2.5629,
"loss_": 1.226,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3956,
"step": 3360
},
{
"epoch": 0.42,
"learning_rate": 1.2965561168355394e-05,
"loss": 2.5635,
"loss_": 1.2831,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3956,
"step": 3367
},
{
"epoch": 0.42,
"learning_rate": 1.2938404484620169e-05,
"loss": 2.5392,
"loss_": 1.3104,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3956,
"step": 3374
},
{
"epoch": 0.42,
"learning_rate": 1.2911224061912433e-05,
"loss": 2.5353,
"loss_": 1.0487,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3953,
"step": 3381
},
{
"epoch": 0.42,
"learning_rate": 1.2884020119819152e-05,
"loss": 2.5758,
"loss_": 0.7415,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3954,
"step": 3388
},
{
"epoch": 0.43,
"learning_rate": 1.2856792878117293e-05,
"loss": 2.56,
"loss_": 1.1296,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3954,
"step": 3395
},
{
"epoch": 0.43,
"learning_rate": 1.2829542556772059e-05,
"loss": 2.5564,
"loss_": 0.8006,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3953,
"step": 3402
},
{
"epoch": 0.43,
"learning_rate": 1.2802269375935112e-05,
"loss": 2.569,
"loss_": 1.131,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3951,
"step": 3409
},
{
"epoch": 0.43,
"learning_rate": 1.2774973555942796e-05,
"loss": 2.5637,
"loss_": 0.9494,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3955,
"step": 3416
},
{
"epoch": 0.43,
"learning_rate": 1.2747655317314344e-05,
"loss": 2.5588,
"loss_": 1.3893,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3956,
"step": 3423
},
{
"epoch": 0.43,
"learning_rate": 1.2720314880750118e-05,
"loss": 2.562,
"loss_": 1.1676,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3955,
"step": 3430
},
{
"epoch": 0.43,
"learning_rate": 1.26929524671298e-05,
"loss": 2.5562,
"loss_": 0.9704,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3951,
"step": 3437
},
{
"epoch": 0.43,
"learning_rate": 1.266556829751064e-05,
"loss": 2.583,
"loss_": 1.1975,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3954,
"step": 3444
},
{
"epoch": 0.43,
"learning_rate": 1.2638162593125634e-05,
"loss": 2.6252,
"loss_": 1.1995,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3953,
"step": 3451
},
{
"epoch": 0.43,
"learning_rate": 1.2610735575381763e-05,
"loss": 2.5464,
"loss_": 0.9304,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4035,
"step": 3458
},
{
"epoch": 0.43,
"learning_rate": 1.2583287465858197e-05,
"loss": 2.5619,
"loss_": 1.1987,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3952,
"step": 3465
},
{
"epoch": 0.44,
"learning_rate": 1.2555818486304497e-05,
"loss": 2.5559,
"loss_": 1.2018,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.395,
"step": 3472
},
{
"epoch": 0.44,
"learning_rate": 1.2528328858638844e-05,
"loss": 2.5436,
"loss_": 0.6166,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3952,
"step": 3479
},
{
"epoch": 0.44,
"learning_rate": 1.2500818804946211e-05,
"loss": 2.5634,
"loss_": 1.1188,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3952,
"step": 3486
},
{
"epoch": 0.44,
"learning_rate": 1.247328854747661e-05,
"loss": 2.5476,
"loss_": 1.1271,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3948,
"step": 3493
},
{
"epoch": 0.44,
"learning_rate": 1.2445738308643267e-05,
"loss": 2.5728,
"loss_": 1.0833,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3952,
"step": 3500
},
{
"epoch": 0.44,
"learning_rate": 1.2418168311020834e-05,
"loss": 2.5511,
"loss_": 1.2348,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3951,
"step": 3507
},
{
"epoch": 0.44,
"learning_rate": 1.2390578777343594e-05,
"loss": 2.5674,
"loss_": 1.3258,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3952,
"step": 3514
},
{
"epoch": 0.44,
"learning_rate": 1.236296993050366e-05,
"loss": 2.5809,
"loss_": 1.2076,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3947,
"step": 3521
},
{
"epoch": 0.44,
"learning_rate": 1.2335341993549175e-05,
"loss": 2.5583,
"loss_": 0.945,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3947,
"step": 3528
},
{
"epoch": 0.44,
"learning_rate": 1.2307695189682502e-05,
"loss": 2.5778,
"loss_": 1.414,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3947,
"step": 3535
},
{
"epoch": 0.44,
"learning_rate": 1.2280029742258435e-05,
"loss": 2.5572,
"loss_": 1.2353,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.395,
"step": 3542
},
{
"epoch": 0.44,
"learning_rate": 1.2252345874782376e-05,
"loss": 2.5725,
"loss_": 1.304,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3949,
"step": 3549
},
{
"epoch": 0.45,
"learning_rate": 1.2224643810908556e-05,
"loss": 2.5498,
"loss_": 1.2018,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.395,
"step": 3556
},
{
"epoch": 0.45,
"learning_rate": 1.2196923774438195e-05,
"loss": 2.534,
"loss_": 1.2461,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3947,
"step": 3563
},
{
"epoch": 0.45,
"learning_rate": 1.2169185989317724e-05,
"loss": 2.5985,
"loss_": 1.173,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3947,
"step": 3570
},
{
"epoch": 0.45,
"learning_rate": 1.2141430679636959e-05,
"loss": 2.5532,
"loss_": 1.2553,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3947,
"step": 3577
},
{
"epoch": 0.45,
"learning_rate": 1.211365806962729e-05,
"loss": 2.5379,
"loss_": 1.1121,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3946,
"step": 3584
},
{
"epoch": 0.45,
"learning_rate": 1.2085868383659882e-05,
"loss": 2.5589,
"loss_": 1.1214,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3948,
"step": 3591
},
{
"epoch": 0.45,
"learning_rate": 1.2058061846243847e-05,
"loss": 2.5311,
"loss_": 0.9122,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3946,
"step": 3598
},
{
"epoch": 0.45,
"learning_rate": 1.2030238682024444e-05,
"loss": 2.5311,
"loss_": 0.7285,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4021,
"step": 3605
},
{
"epoch": 0.45,
"learning_rate": 1.2002399115781253e-05,
"loss": 2.5848,
"loss_": 1.1433,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3946,
"step": 3612
},
{
"epoch": 0.45,
"learning_rate": 1.1974543372426363e-05,
"loss": 2.5491,
"loss_": 0.8086,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3944,
"step": 3619
},
{
"epoch": 0.45,
"learning_rate": 1.1946671677002563e-05,
"loss": 2.5353,
"loss_": 1.1375,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3947,
"step": 3626
},
{
"epoch": 0.46,
"learning_rate": 1.1918784254681506e-05,
"loss": 2.5366,
"loss_": 1.0491,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3944,
"step": 3633
},
{
"epoch": 0.46,
"learning_rate": 1.189088133076191e-05,
"loss": 2.5363,
"loss_": 1.254,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3945,
"step": 3640
},
{
"epoch": 0.46,
"learning_rate": 1.1862963130667724e-05,
"loss": 2.5588,
"loss_": 1.2051,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3943,
"step": 3647
},
{
"epoch": 0.46,
"learning_rate": 1.1835029879946308e-05,
"loss": 2.5656,
"loss_": 0.9809,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3943,
"step": 3654
},
{
"epoch": 0.46,
"learning_rate": 1.1807081804266625e-05,
"loss": 2.5597,
"loss_": 0.9086,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3946,
"step": 3661
},
{
"epoch": 0.46,
"learning_rate": 1.1779119129417394e-05,
"loss": 2.5387,
"loss_": 0.9881,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4026,
"step": 3668
},
{
"epoch": 0.46,
"learning_rate": 1.175114208130528e-05,
"loss": 2.5745,
"loss_": 1.1147,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3942,
"step": 3675
},
{
"epoch": 0.46,
"learning_rate": 1.1723150885953081e-05,
"loss": 2.5314,
"loss_": 1.0994,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4017,
"step": 3682
},
{
"epoch": 0.46,
"learning_rate": 1.1695145769497871e-05,
"loss": 2.5591,
"loss_": 1.1917,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3943,
"step": 3689
},
{
"epoch": 0.46,
"learning_rate": 1.1667126958189203e-05,
"loss": 2.5559,
"loss_": 1.122,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3944,
"step": 3696
},
{
"epoch": 0.46,
"learning_rate": 1.1639094678387268e-05,
"loss": 2.5315,
"loss_": 1.3327,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3941,
"step": 3703
},
{
"epoch": 0.47,
"learning_rate": 1.1611049156561055e-05,
"loss": 2.5537,
"loss_": 1.1127,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.394,
"step": 3710
},
{
"epoch": 0.47,
"learning_rate": 1.1582990619286555e-05,
"loss": 2.5667,
"loss_": 1.1163,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3942,
"step": 3717
},
{
"epoch": 0.47,
"learning_rate": 1.1554919293244885e-05,
"loss": 2.5407,
"loss_": 1.3315,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3944,
"step": 3724
},
{
"epoch": 0.47,
"learning_rate": 1.1526835405220503e-05,
"loss": 2.5515,
"loss_": 0.9382,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3944,
"step": 3731
},
{
"epoch": 0.47,
"learning_rate": 1.1498739182099335e-05,
"loss": 2.5467,
"loss_": 0.94,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3941,
"step": 3738
},
{
"epoch": 0.47,
"learning_rate": 1.1470630850866966e-05,
"loss": 2.5174,
"loss_": 1.0814,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4014,
"step": 3745
},
{
"epoch": 0.47,
"learning_rate": 1.1442510638606813e-05,
"loss": 2.5437,
"loss_": 1.1433,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3939,
"step": 3752
},
{
"epoch": 0.47,
"learning_rate": 1.141437877249826e-05,
"loss": 2.561,
"loss_": 1.0612,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3941,
"step": 3759
},
{
"epoch": 0.47,
"learning_rate": 1.1386235479814856e-05,
"loss": 2.5184,
"loss_": 1.009,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.394,
"step": 3766
},
{
"epoch": 0.47,
"learning_rate": 1.1358080987922452e-05,
"loss": 2.5491,
"loss_": 1.0159,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.394,
"step": 3773
},
{
"epoch": 0.47,
"learning_rate": 1.1329915524277384e-05,
"loss": 2.5448,
"loss_": 1.2692,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3943,
"step": 3780
},
{
"epoch": 0.47,
"learning_rate": 1.1301739316424623e-05,
"loss": 2.5624,
"loss_": 1.1014,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3936,
"step": 3787
},
{
"epoch": 0.48,
"learning_rate": 1.1273552591995943e-05,
"loss": 2.5418,
"loss_": 1.3636,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3938,
"step": 3794
},
{
"epoch": 0.48,
"learning_rate": 1.124535557870808e-05,
"loss": 2.5431,
"loss_": 1.0197,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3938,
"step": 3801
},
{
"epoch": 0.48,
"learning_rate": 1.1217148504360885e-05,
"loss": 2.5773,
"loss_": 1.4614,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3941,
"step": 3808
},
{
"epoch": 0.48,
"learning_rate": 1.1188931596835509e-05,
"loss": 2.562,
"loss_": 0.9753,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3934,
"step": 3815
},
{
"epoch": 0.48,
"learning_rate": 1.1160705084092526e-05,
"loss": 2.5293,
"loss_": 1.1196,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3936,
"step": 3822
},
{
"epoch": 0.48,
"learning_rate": 1.1132469194170117e-05,
"loss": 2.5569,
"loss_": 1.3226,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3939,
"step": 3829
},
{
"epoch": 0.48,
"learning_rate": 1.1104224155182215e-05,
"loss": 2.5512,
"loss_": 1.1311,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3939,
"step": 3836
},
{
"epoch": 0.48,
"learning_rate": 1.1075970195316677e-05,
"loss": 2.5509,
"loss_": 1.1204,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3938,
"step": 3843
},
{
"epoch": 0.48,
"learning_rate": 1.104770754283342e-05,
"loss": 2.5397,
"loss_": 1.1743,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3936,
"step": 3850
},
{
"epoch": 0.48,
"learning_rate": 1.101943642606259e-05,
"loss": 2.5674,
"loss_": 1.3249,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3936,
"step": 3857
},
{
"epoch": 0.48,
"learning_rate": 1.0991157073402723e-05,
"loss": 2.5645,
"loss_": 0.9719,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3936,
"step": 3864
},
{
"epoch": 0.49,
"learning_rate": 1.096286971331888e-05,
"loss": 2.5283,
"loss_": 1.0872,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3938,
"step": 3871
},
{
"epoch": 0.49,
"learning_rate": 1.0934574574340821e-05,
"loss": 2.5598,
"loss_": 1.186,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3934,
"step": 3878
},
{
"epoch": 0.49,
"learning_rate": 1.0906271885061149e-05,
"loss": 2.5509,
"loss_": 1.0989,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3936,
"step": 3885
},
{
"epoch": 0.49,
"learning_rate": 1.0877961874133458e-05,
"loss": 2.5613,
"loss_": 1.1127,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3937,
"step": 3892
},
{
"epoch": 0.49,
"learning_rate": 1.0849644770270502e-05,
"loss": 2.5592,
"loss_": 1.1666,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3934,
"step": 3899
},
{
"epoch": 0.49,
"learning_rate": 1.0821320802242335e-05,
"loss": 2.5586,
"loss_": 1.2445,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3936,
"step": 3906
},
{
"epoch": 0.49,
"learning_rate": 1.0792990198874462e-05,
"loss": 2.5441,
"loss_": 0.8492,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3937,
"step": 3913
},
{
"epoch": 0.49,
"learning_rate": 1.0764653189046002e-05,
"loss": 2.5834,
"loss_": 1.3096,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3932,
"step": 3920
},
{
"epoch": 0.49,
"learning_rate": 1.073631000168782e-05,
"loss": 2.5308,
"loss_": 1.0803,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3936,
"step": 3927
},
{
"epoch": 0.49,
"learning_rate": 1.0707960865780697e-05,
"loss": 2.5575,
"loss_": 1.2032,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3936,
"step": 3934
},
{
"epoch": 0.49,
"learning_rate": 1.0679606010353467e-05,
"loss": 2.5341,
"loss_": 1.1694,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3932,
"step": 3941
},
{
"epoch": 0.49,
"learning_rate": 1.0651245664481176e-05,
"loss": 2.5644,
"loss_": 0.8158,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4,
"step": 3948
},
{
"epoch": 0.5,
"learning_rate": 1.062288005728322e-05,
"loss": 2.5545,
"loss_": 0.8523,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.3993,
"step": 3955
},
{
"epoch": 0.5,
"learning_rate": 1.0594509417921505e-05,
"loss": 2.5287,
"loss_": 1.0746,
"moe_loss": 0.1604,
"moe_loss_longrong": 1.4,
"step": 3962
},
{
"epoch": 0.5,
"learning_rate": 1.0566133975598592e-05,
"loss": 2.5596,
"loss_": 0.8816,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.393,
"step": 3969
},
{
"epoch": 0.5,
"learning_rate": 1.0537753959555844e-05,
"loss": 2.5464,
"loss_": 1.315,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3936,
"step": 3976
},
{
"epoch": 0.5,
"learning_rate": 1.0509369599071563e-05,
"loss": 2.5213,
"loss_": 1.2024,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.4002,
"step": 3983
},
{
"epoch": 0.5,
"learning_rate": 1.0480981123459175e-05,
"loss": 2.5931,
"loss_": 1.2043,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3931,
"step": 3990
},
{
"epoch": 0.5,
"learning_rate": 1.0452588762065323e-05,
"loss": 2.5559,
"loss_": 1.1557,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3932,
"step": 3997
},
{
"epoch": 0.5,
"learning_rate": 1.0424192744268063e-05,
"loss": 2.5297,
"loss_": 1.2939,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3929,
"step": 4004
},
{
"epoch": 0.5,
"learning_rate": 1.0395793299474979e-05,
"loss": 2.5487,
"loss_": 0.9489,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.393,
"step": 4011
},
{
"epoch": 0.5,
"learning_rate": 1.0367390657121346e-05,
"loss": 2.5221,
"loss_": 1.0307,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3933,
"step": 4018
},
{
"epoch": 0.5,
"learning_rate": 1.033898504666827e-05,
"loss": 2.5383,
"loss_": 1.0582,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.393,
"step": 4025
},
{
"epoch": 0.51,
"learning_rate": 1.031057669760084e-05,
"loss": 2.5864,
"loss_": 1.1518,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3933,
"step": 4032
},
{
"epoch": 0.51,
"learning_rate": 1.0282165839426268e-05,
"loss": 2.5311,
"loss_": 1.1738,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3931,
"step": 4039
},
{
"epoch": 0.51,
"learning_rate": 1.0253752701672033e-05,
"loss": 2.5819,
"loss_": 1.1982,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.393,
"step": 4046
},
{
"epoch": 0.51,
"learning_rate": 1.022533751388403e-05,
"loss": 2.5437,
"loss_": 1.288,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3929,
"step": 4053
},
{
"epoch": 0.51,
"learning_rate": 1.0196920505624726e-05,
"loss": 2.5305,
"loss_": 1.1663,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3928,
"step": 4060
},
{
"epoch": 0.51,
"learning_rate": 1.0168501906471284e-05,
"loss": 2.5629,
"loss_": 1.1848,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.393,
"step": 4067
},
{
"epoch": 0.51,
"learning_rate": 1.014008194601372e-05,
"loss": 2.5355,
"loss_": 1.0939,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.393,
"step": 4074
},
{
"epoch": 0.51,
"learning_rate": 1.0111660853853056e-05,
"loss": 2.5297,
"loss_": 0.9667,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.393,
"step": 4081
},
{
"epoch": 0.51,
"learning_rate": 1.0083238859599453e-05,
"loss": 2.5437,
"loss_": 0.7903,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.3992,
"step": 4088
},
{
"epoch": 0.51,
"learning_rate": 1.005481619287036e-05,
"loss": 2.5958,
"loss_": 1.1979,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3927,
"step": 4095
},
{
"epoch": 0.51,
"learning_rate": 1.0026393083288659e-05,
"loss": 2.5676,
"loss_": 1.1374,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3926,
"step": 4102
},
{
"epoch": 0.52,
"learning_rate": 9.997969760480802e-06,
"loss": 2.5415,
"loss_": 1.132,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3929,
"step": 4109
},
{
"epoch": 0.52,
"learning_rate": 9.969546454074977e-06,
"loss": 2.5337,
"loss_": 1.0972,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3929,
"step": 4116
},
{
"epoch": 0.52,
"learning_rate": 9.941123393699235e-06,
"loss": 2.5709,
"loss_": 1.0257,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3931,
"step": 4123
},
{
"epoch": 0.52,
"learning_rate": 9.912700808979632e-06,
"loss": 2.5593,
"loss_": 1.1574,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3928,
"step": 4130
},
{
"epoch": 0.52,
"learning_rate": 9.884278929538387e-06,
"loss": 2.5532,
"loss_": 1.0852,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3928,
"step": 4137
},
{
"epoch": 0.52,
"learning_rate": 9.855857984992026e-06,
"loss": 2.544,
"loss_": 1.2876,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3928,
"step": 4144
},
{
"epoch": 0.52,
"learning_rate": 9.82743820494951e-06,
"loss": 2.5251,
"loss_": 1.0657,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3925,
"step": 4151
},
{
"epoch": 0.52,
"learning_rate": 9.799019819010405e-06,
"loss": 2.5677,
"loss_": 1.0093,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3927,
"step": 4158
},
{
"epoch": 0.52,
"learning_rate": 9.770603056763009e-06,
"loss": 2.5323,
"loss_": 1.0455,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3927,
"step": 4165
},
{
"epoch": 0.52,
"learning_rate": 9.742188147782494e-06,
"loss": 2.5443,
"loss_": 1.2455,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3927,
"step": 4172
},
{
"epoch": 0.52,
"learning_rate": 9.713775321629073e-06,
"loss": 2.5462,
"loss_": 0.9975,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3928,
"step": 4179
},
{
"epoch": 0.52,
"learning_rate": 9.685364807846127e-06,
"loss": 2.5703,
"loss_": 1.3459,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3925,
"step": 4186
},
{
"epoch": 0.53,
"learning_rate": 9.656956835958356e-06,
"loss": 2.5182,
"loss_": 1.0759,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3927,
"step": 4193
},
{
"epoch": 0.53,
"learning_rate": 9.628551635469918e-06,
"loss": 2.566,
"loss_": 1.248,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3924,
"step": 4200
},
{
"epoch": 0.53,
"learning_rate": 9.600149435862593e-06,
"loss": 2.5279,
"loss_": 1.1282,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3925,
"step": 4207
},
{
"epoch": 0.53,
"learning_rate": 9.571750466593912e-06,
"loss": 2.5369,
"loss_": 0.8771,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3924,
"step": 4214
},
{
"epoch": 0.53,
"learning_rate": 9.543354957095299e-06,
"loss": 2.5527,
"loss_": 1.0412,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3926,
"step": 4221
},
{
"epoch": 0.53,
"learning_rate": 9.514963136770242e-06,
"loss": 2.5574,
"loss_": 0.977,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3923,
"step": 4228
},
{
"epoch": 0.53,
"learning_rate": 9.486575234992423e-06,
"loss": 2.5446,
"loss_": 1.2368,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3924,
"step": 4235
},
{
"epoch": 0.53,
"learning_rate": 9.45819148110385e-06,
"loss": 2.5138,
"loss_": 1.0194,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3925,
"step": 4242
},
{
"epoch": 0.53,
"learning_rate": 9.429812104413042e-06,
"loss": 2.5362,
"loss_": 0.59,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3924,
"step": 4249
},
{
"epoch": 0.53,
"learning_rate": 9.401437334193143e-06,
"loss": 2.5026,
"loss_": 1.1442,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3923,
"step": 4256
},
{
"epoch": 0.53,
"learning_rate": 9.373067399680084e-06,
"loss": 2.4977,
"loss_": 1.0847,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3925,
"step": 4263
},
{
"epoch": 0.54,
"learning_rate": 9.344702530070729e-06,
"loss": 2.5063,
"loss_": 1.2399,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3926,
"step": 4270
},
{
"epoch": 0.54,
"learning_rate": 9.316342954521028e-06,
"loss": 2.5195,
"loss_": 1.0512,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.392,
"step": 4277
},
{
"epoch": 0.54,
"learning_rate": 9.287988902144157e-06,
"loss": 2.5811,
"loss_": 1.2023,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3926,
"step": 4284
},
{
"epoch": 0.54,
"learning_rate": 9.259640602008667e-06,
"loss": 2.5491,
"loss_": 0.9779,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3922,
"step": 4291
},
{
"epoch": 0.54,
"learning_rate": 9.231298283136641e-06,
"loss": 2.5653,
"loss_": 1.156,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3923,
"step": 4298
},
{
"epoch": 0.54,
"learning_rate": 9.202962174501848e-06,
"loss": 2.5369,
"loss_": 1.1885,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3924,
"step": 4305
},
{
"epoch": 0.54,
"learning_rate": 9.17463250502787e-06,
"loss": 2.5003,
"loss_": 1.1876,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.392,
"step": 4312
},
{
"epoch": 0.54,
"learning_rate": 9.146309503586282e-06,
"loss": 2.5501,
"loss_": 1.0059,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.3976,
"step": 4319
},
{
"epoch": 0.54,
"learning_rate": 9.117993398994784e-06,
"loss": 2.5294,
"loss_": 1.2092,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.392,
"step": 4326
},
{
"epoch": 0.54,
"learning_rate": 9.089684420015346e-06,
"loss": 2.5311,
"loss_": 1.092,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3922,
"step": 4333
},
{
"epoch": 0.54,
"learning_rate": 9.06138279535239e-06,
"loss": 2.5313,
"loss_": 1.3094,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.392,
"step": 4340
},
{
"epoch": 0.54,
"learning_rate": 9.033088753650918e-06,
"loss": 2.5423,
"loss_": 0.8346,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3921,
"step": 4347
},
{
"epoch": 0.55,
"learning_rate": 9.004802523494655e-06,
"loss": 2.5419,
"loss_": 0.9882,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3922,
"step": 4354
},
{
"epoch": 0.55,
"learning_rate": 8.976524333404238e-06,
"loss": 2.5791,
"loss_": 1.1859,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3921,
"step": 4361
},
{
"epoch": 0.55,
"learning_rate": 8.94825441183534e-06,
"loss": 2.5575,
"loss_": 1.104,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3921,
"step": 4368
},
{
"epoch": 0.55,
"learning_rate": 8.919992987176836e-06,
"loss": 2.5297,
"loss_": 1.0182,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3922,
"step": 4375
},
{
"epoch": 0.55,
"learning_rate": 8.891740287748952e-06,
"loss": 2.5778,
"loss_": 1.0814,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3922,
"step": 4382
},
{
"epoch": 0.55,
"learning_rate": 8.863496541801424e-06,
"loss": 2.5495,
"loss_": 1.171,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.392,
"step": 4389
},
{
"epoch": 0.55,
"learning_rate": 8.835261977511666e-06,
"loss": 2.5199,
"loss_": 1.1374,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.392,
"step": 4396
},
{
"epoch": 0.55,
"learning_rate": 8.807036822982892e-06,
"loss": 2.5528,
"loss_": 1.2331,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3919,
"step": 4403
},
{
"epoch": 0.55,
"learning_rate": 8.778821306242318e-06,
"loss": 2.5504,
"loss_": 1.3552,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3921,
"step": 4410
},
{
"epoch": 0.55,
"learning_rate": 8.750615655239287e-06,
"loss": 2.5511,
"loss_": 1.2613,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3918,
"step": 4417
},
{
"epoch": 0.55,
"learning_rate": 8.722420097843437e-06,
"loss": 2.5019,
"loss_": 0.9546,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3919,
"step": 4424
},
{
"epoch": 0.56,
"learning_rate": 8.694234861842865e-06,
"loss": 2.5351,
"loss_": 1.1331,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3919,
"step": 4431
},
{
"epoch": 0.56,
"learning_rate": 8.66606017494228e-06,
"loss": 2.5412,
"loss_": 1.2775,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3921,
"step": 4438
},
{
"epoch": 0.56,
"learning_rate": 8.637896264761176e-06,
"loss": 2.4963,
"loss_": 0.7466,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.3971,
"step": 4445
},
{
"epoch": 0.56,
"learning_rate": 8.609743358831965e-06,
"loss": 2.5192,
"loss_": 1.0139,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3918,
"step": 4452
},
{
"epoch": 0.56,
"learning_rate": 8.58160168459817e-06,
"loss": 2.5407,
"loss_": 1.0999,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3918,
"step": 4459
},
{
"epoch": 0.56,
"learning_rate": 8.553471469412577e-06,
"loss": 2.5692,
"loss_": 1.1933,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3917,
"step": 4466
},
{
"epoch": 0.56,
"learning_rate": 8.525352940535381e-06,
"loss": 2.5063,
"loss_": 0.9695,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.392,
"step": 4473
},
{
"epoch": 0.56,
"learning_rate": 8.497246325132382e-06,
"loss": 2.5941,
"loss_": 1.288,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3919,
"step": 4480
},
{
"epoch": 0.56,
"learning_rate": 8.469151850273124e-06,
"loss": 2.5543,
"loss_": 1.1849,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.392,
"step": 4487
},
{
"epoch": 0.56,
"learning_rate": 8.441069742929069e-06,
"loss": 2.5168,
"loss_": 1.1032,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3917,
"step": 4494
},
{
"epoch": 0.56,
"learning_rate": 8.413000229971765e-06,
"loss": 2.5085,
"loss_": 1.1973,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3917,
"step": 4501
},
{
"epoch": 0.57,
"learning_rate": 8.384943538171017e-06,
"loss": 2.5283,
"loss_": 1.2194,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3918,
"step": 4508
},
{
"epoch": 0.57,
"learning_rate": 8.356899894193038e-06,
"loss": 2.5342,
"loss_": 1.176,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3917,
"step": 4515
},
{
"epoch": 0.57,
"learning_rate": 8.328869524598635e-06,
"loss": 2.5705,
"loss_": 1.1662,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3916,
"step": 4522
},
{
"epoch": 0.57,
"learning_rate": 8.300852655841378e-06,
"loss": 2.5423,
"loss_": 1.0822,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3916,
"step": 4529
},
{
"epoch": 0.57,
"learning_rate": 8.272849514265763e-06,
"loss": 2.5792,
"loss_": 1.1001,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3915,
"step": 4536
},
{
"epoch": 0.57,
"learning_rate": 8.244860326105378e-06,
"loss": 2.539,
"loss_": 1.3069,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3916,
"step": 4543
},
{
"epoch": 0.57,
"learning_rate": 8.216885317481091e-06,
"loss": 2.5393,
"loss_": 1.0325,
"moe_loss": 0.1603,
"moe_loss_longrong": 1.3971,
"step": 4550
},
{
"epoch": 0.57,
"learning_rate": 8.188924714399222e-06,
"loss": 2.5338,
"loss_": 1.0938,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3919,
"step": 4557
},
{
"epoch": 0.57,
"learning_rate": 8.160978742749692e-06,
"loss": 2.5578,
"loss_": 0.9998,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3916,
"step": 4564
},
{
"epoch": 0.57,
"learning_rate": 8.133047628304229e-06,
"loss": 2.5287,
"loss_": 1.1445,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3915,
"step": 4571
},
{
"epoch": 0.57,
"learning_rate": 8.105131596714538e-06,
"loss": 2.5354,
"loss_": 1.0362,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3915,
"step": 4578
},
{
"epoch": 0.57,
"learning_rate": 8.077230873510452e-06,
"loss": 2.548,
"loss_": 1.0803,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3916,
"step": 4585
},
{
"epoch": 0.58,
"learning_rate": 8.049345684098148e-06,
"loss": 2.5192,
"loss_": 1.1937,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3916,
"step": 4592
},
{
"epoch": 0.58,
"learning_rate": 8.021476253758303e-06,
"loss": 2.5454,
"loss_": 1.2712,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3914,
"step": 4599
},
{
"epoch": 0.58,
"learning_rate": 7.99362280764427e-06,
"loss": 2.5142,
"loss_": 1.3818,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3918,
"step": 4606
},
{
"epoch": 0.58,
"learning_rate": 7.965785570780275e-06,
"loss": 2.5291,
"loss_": 1.2159,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3915,
"step": 4613
},
{
"epoch": 0.58,
"learning_rate": 7.937964768059592e-06,
"loss": 2.5456,
"loss_": 1.0392,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3915,
"step": 4620
},
{
"epoch": 0.58,
"learning_rate": 7.91016062424273e-06,
"loss": 2.541,
"loss_": 1.2045,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3915,
"step": 4627
},
{
"epoch": 0.58,
"learning_rate": 7.882373363955597e-06,
"loss": 2.5365,
"loss_": 1.1843,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3914,
"step": 4634
},
{
"epoch": 0.58,
"learning_rate": 7.854603211687715e-06,
"loss": 2.5216,
"loss_": 1.0943,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3914,
"step": 4641
},
{
"epoch": 0.58,
"learning_rate": 7.826850391790393e-06,
"loss": 2.4891,
"loss_": 0.9685,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3913,
"step": 4648
},
{
"epoch": 0.58,
"learning_rate": 7.799115128474907e-06,
"loss": 2.5239,
"loss_": 1.093,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3914,
"step": 4655
},
{
"epoch": 0.58,
"learning_rate": 7.771397645810699e-06,
"loss": 2.5494,
"loss_": 1.4255,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3915,
"step": 4662
},
{
"epoch": 0.59,
"learning_rate": 7.743698167723568e-06,
"loss": 2.5264,
"loss_": 1.3261,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3915,
"step": 4669
},
{
"epoch": 0.59,
"learning_rate": 7.716016917993843e-06,
"loss": 2.5483,
"loss_": 1.047,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3912,
"step": 4676
},
{
"epoch": 0.59,
"learning_rate": 7.688354120254606e-06,
"loss": 2.5823,
"loss_": 1.3127,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3914,
"step": 4683
},
{
"epoch": 0.59,
"learning_rate": 7.660709997989855e-06,
"loss": 2.6013,
"loss_": 1.2204,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3914,
"step": 4690
},
{
"epoch": 0.59,
"learning_rate": 7.633084774532717e-06,
"loss": 2.5238,
"loss_": 0.9103,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3915,
"step": 4697
},
{
"epoch": 0.59,
"learning_rate": 7.605478673063635e-06,
"loss": 2.5269,
"loss_": 1.2055,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3914,
"step": 4704
},
{
"epoch": 0.59,
"learning_rate": 7.577891916608574e-06,
"loss": 2.5042,
"loss_": 1.0835,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3913,
"step": 4711
},
{
"epoch": 0.59,
"learning_rate": 7.5503247280372104e-06,
"loss": 2.5373,
"loss_": 0.7241,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3914,
"step": 4718
},
{
"epoch": 0.59,
"learning_rate": 7.522777330061126e-06,
"loss": 2.562,
"loss_": 1.3803,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3916,
"step": 4725
},
{
"epoch": 0.59,
"learning_rate": 7.495249945232028e-06,
"loss": 2.5934,
"loss_": 0.7858,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3914,
"step": 4732
},
{
"epoch": 0.59,
"learning_rate": 7.467742795939941e-06,
"loss": 2.5437,
"loss_": 1.304,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3913,
"step": 4739
},
{
"epoch": 0.59,
"learning_rate": 7.440256104411394e-06,
"loss": 2.5461,
"loss_": 1.188,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3913,
"step": 4746
},
{
"epoch": 0.6,
"learning_rate": 7.4127900927076575e-06,
"loss": 2.538,
"loss_": 1.0811,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3912,
"step": 4753
},
{
"epoch": 0.6,
"learning_rate": 7.385344982722928e-06,
"loss": 2.5834,
"loss_": 1.0429,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3911,
"step": 4760
},
{
"epoch": 0.6,
"learning_rate": 7.3579209961825346e-06,
"loss": 2.5379,
"loss_": 1.0402,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3913,
"step": 4767
},
{
"epoch": 0.6,
"learning_rate": 7.330518354641156e-06,
"loss": 2.5444,
"loss_": 0.9376,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3912,
"step": 4774
},
{
"epoch": 0.6,
"learning_rate": 7.303137279481034e-06,
"loss": 2.507,
"loss_": 1.0915,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3914,
"step": 4781
},
{
"epoch": 0.6,
"learning_rate": 7.275777991910164e-06,
"loss": 2.4976,
"loss_": 1.0431,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3912,
"step": 4788
},
{
"epoch": 0.6,
"learning_rate": 7.248440712960535e-06,
"loss": 2.5333,
"loss_": 1.1329,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3911,
"step": 4795
},
{
"epoch": 0.6,
"learning_rate": 7.2211256634863255e-06,
"loss": 2.5446,
"loss_": 1.0213,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3914,
"step": 4802
},
{
"epoch": 0.6,
"learning_rate": 7.1938330641621316e-06,
"loss": 2.5385,
"loss_": 1.0947,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.391,
"step": 4809
},
{
"epoch": 0.6,
"learning_rate": 7.166563135481166e-06,
"loss": 2.5381,
"loss_": 1.0597,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3911,
"step": 4816
},
{
"epoch": 0.6,
"learning_rate": 7.139316097753499e-06,
"loss": 2.5394,
"loss_": 1.2727,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3911,
"step": 4823
},
{
"epoch": 0.61,
"learning_rate": 7.112092171104268e-06,
"loss": 2.5323,
"loss_": 0.9192,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.391,
"step": 4830
},
{
"epoch": 0.61,
"learning_rate": 7.084891575471885e-06,
"loss": 2.5276,
"loss_": 1.0647,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3913,
"step": 4837
},
{
"epoch": 0.61,
"learning_rate": 7.05771453060629e-06,
"loss": 2.5388,
"loss_": 1.02,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3912,
"step": 4844
},
{
"epoch": 0.61,
"learning_rate": 7.030561256067159e-06,
"loss": 2.5259,
"loss_": 1.1344,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.391,
"step": 4851
},
{
"epoch": 0.61,
"learning_rate": 7.003431971222115e-06,
"loss": 2.5743,
"loss_": 1.1341,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3913,
"step": 4858
},
{
"epoch": 0.61,
"learning_rate": 6.976326895244987e-06,
"loss": 2.5281,
"loss_": 1.0979,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3909,
"step": 4865
},
{
"epoch": 0.61,
"learning_rate": 6.949246247114019e-06,
"loss": 2.5364,
"loss_": 1.1133,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.391,
"step": 4872
},
{
"epoch": 0.61,
"learning_rate": 6.922190245610106e-06,
"loss": 2.4967,
"loss_": 1.1036,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3909,
"step": 4879
},
{
"epoch": 0.61,
"learning_rate": 6.895159109315022e-06,
"loss": 2.525,
"loss_": 1.0468,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3951,
"step": 4886
},
{
"epoch": 0.61,
"learning_rate": 6.868153056609665e-06,
"loss": 2.5504,
"loss_": 1.0107,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3912,
"step": 4893
},
{
"epoch": 0.61,
"learning_rate": 6.841172305672289e-06,
"loss": 2.5496,
"loss_": 1.1926,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3909,
"step": 4900
},
{
"epoch": 0.62,
"learning_rate": 6.814217074476721e-06,
"loss": 2.5328,
"loss_": 1.2163,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3909,
"step": 4907
},
{
"epoch": 0.62,
"learning_rate": 6.787287580790634e-06,
"loss": 2.5072,
"loss_": 0.9685,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.391,
"step": 4914
},
{
"epoch": 0.62,
"learning_rate": 6.760384042173769e-06,
"loss": 2.5442,
"loss_": 0.9827,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.391,
"step": 4921
},
{
"epoch": 0.62,
"learning_rate": 6.733506675976171e-06,
"loss": 2.5335,
"loss_": 0.9915,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3911,
"step": 4928
},
{
"epoch": 0.62,
"learning_rate": 6.7066556993364525e-06,
"loss": 2.5435,
"loss_": 1.1737,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3908,
"step": 4935
},
{
"epoch": 0.62,
"learning_rate": 6.679831329180025e-06,
"loss": 2.5157,
"loss_": 0.8595,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.391,
"step": 4942
},
{
"epoch": 0.62,
"learning_rate": 6.653033782217337e-06,
"loss": 2.518,
"loss_": 1.1064,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3908,
"step": 4949
},
{
"epoch": 0.62,
"learning_rate": 6.626263274942157e-06,
"loss": 2.5186,
"loss_": 1.2309,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3908,
"step": 4956
},
{
"epoch": 0.62,
"learning_rate": 6.599520023629789e-06,
"loss": 2.531,
"loss_": 1.2354,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.391,
"step": 4963
},
{
"epoch": 0.62,
"learning_rate": 6.572804244335349e-06,
"loss": 2.5529,
"loss_": 1.056,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3909,
"step": 4970
},
{
"epoch": 0.62,
"learning_rate": 6.546116152891998e-06,
"loss": 2.5579,
"loss_": 1.0829,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.391,
"step": 4977
},
{
"epoch": 0.62,
"learning_rate": 6.519455964909223e-06,
"loss": 2.5493,
"loss_": 1.1124,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3908,
"step": 4984
},
{
"epoch": 0.63,
"learning_rate": 6.492823895771077e-06,
"loss": 2.5263,
"loss_": 0.9684,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.395,
"step": 4991
},
{
"epoch": 0.63,
"learning_rate": 6.466220160634444e-06,
"loss": 2.5259,
"loss_": 1.2248,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.391,
"step": 4998
},
{
"epoch": 0.63,
"learning_rate": 6.439644974427304e-06,
"loss": 2.5509,
"loss_": 0.9338,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.391,
"step": 5005
},
{
"epoch": 0.63,
"learning_rate": 6.4130985518469965e-06,
"loss": 2.5509,
"loss_": 1.1724,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3907,
"step": 5012
},
{
"epoch": 0.63,
"learning_rate": 6.386581107358473e-06,
"loss": 2.5498,
"loss_": 1.2157,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.391,
"step": 5019
},
{
"epoch": 0.63,
"learning_rate": 6.360092855192586e-06,
"loss": 2.5417,
"loss_": 1.0377,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3907,
"step": 5026
},
{
"epoch": 0.63,
"learning_rate": 6.3336340093443424e-06,
"loss": 2.527,
"loss_": 1.2523,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3908,
"step": 5033
},
{
"epoch": 0.63,
"learning_rate": 6.307204783571179e-06,
"loss": 2.523,
"loss_": 0.7425,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3908,
"step": 5040
},
{
"epoch": 0.63,
"learning_rate": 6.280805391391238e-06,
"loss": 2.5512,
"loss_": 1.2265,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3906,
"step": 5047
},
{
"epoch": 0.63,
"learning_rate": 6.254436046081641e-06,
"loss": 2.5116,
"loss_": 1.2887,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3908,
"step": 5054
},
{
"epoch": 0.63,
"learning_rate": 6.228096960676764e-06,
"loss": 2.5107,
"loss_": 0.8449,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3907,
"step": 5061
},
{
"epoch": 0.64,
"learning_rate": 6.201788347966511e-06,
"loss": 2.5181,
"loss_": 1.1947,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3908,
"step": 5068
},
{
"epoch": 0.64,
"learning_rate": 6.175510420494609e-06,
"loss": 2.5637,
"loss_": 1.3614,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3909,
"step": 5075
},
{
"epoch": 0.64,
"learning_rate": 6.149263390556887e-06,
"loss": 2.5147,
"loss_": 1.0484,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3906,
"step": 5082
},
{
"epoch": 0.64,
"learning_rate": 6.123047470199539e-06,
"loss": 2.5507,
"loss_": 1.3089,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3906,
"step": 5089
},
{
"epoch": 0.64,
"learning_rate": 6.096862871217448e-06,
"loss": 2.5643,
"loss_": 1.0995,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3907,
"step": 5096
},
{
"epoch": 0.64,
"learning_rate": 6.070709805152451e-06,
"loss": 2.5202,
"loss_": 1.3114,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3907,
"step": 5103
},
{
"epoch": 0.64,
"learning_rate": 6.044588483291625e-06,
"loss": 2.5343,
"loss_": 1.2697,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3908,
"step": 5110
},
{
"epoch": 0.64,
"learning_rate": 6.018499116665603e-06,
"loss": 2.5169,
"loss_": 1.2687,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3906,
"step": 5117
},
{
"epoch": 0.64,
"learning_rate": 5.9924419160468515e-06,
"loss": 2.5049,
"loss_": 0.9986,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3907,
"step": 5124
},
{
"epoch": 0.64,
"learning_rate": 5.966417091947965e-06,
"loss": 2.5498,
"loss_": 1.1027,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3907,
"step": 5131
},
{
"epoch": 0.64,
"learning_rate": 5.9404248546199795e-06,
"loss": 2.5273,
"loss_": 1.1325,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3906,
"step": 5138
},
{
"epoch": 0.64,
"learning_rate": 5.914465414050669e-06,
"loss": 2.5246,
"loss_": 1.1098,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3906,
"step": 5145
},
{
"epoch": 0.65,
"learning_rate": 5.888538979962843e-06,
"loss": 2.5145,
"loss_": 1.1524,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3905,
"step": 5152
},
{
"epoch": 0.65,
"learning_rate": 5.862645761812655e-06,
"loss": 2.5404,
"loss_": 1.3356,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3907,
"step": 5159
},
{
"epoch": 0.65,
"learning_rate": 5.836785968787915e-06,
"loss": 2.5027,
"loss_": 1.0651,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3907,
"step": 5166
},
{
"epoch": 0.65,
"learning_rate": 5.810959809806396e-06,
"loss": 2.5426,
"loss_": 1.0368,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3904,
"step": 5173
},
{
"epoch": 0.65,
"learning_rate": 5.785167493514137e-06,
"loss": 2.5547,
"loss_": 1.1137,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3908,
"step": 5180
},
{
"epoch": 0.65,
"learning_rate": 5.759409228283779e-06,
"loss": 2.5616,
"loss_": 1.0141,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3904,
"step": 5187
},
{
"epoch": 0.65,
"learning_rate": 5.733685222212868e-06,
"loss": 2.5659,
"loss_": 1.0579,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3906,
"step": 5194
},
{
"epoch": 0.65,
"learning_rate": 5.7079956831221616e-06,
"loss": 2.5385,
"loss_": 1.1832,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3906,
"step": 5201
},
{
"epoch": 0.65,
"learning_rate": 5.682340818553978e-06,
"loss": 2.5505,
"loss_": 0.9514,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3907,
"step": 5208
},
{
"epoch": 0.65,
"learning_rate": 5.656720835770499e-06,
"loss": 2.5296,
"loss_": 1.1111,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3907,
"step": 5215
},
{
"epoch": 0.65,
"learning_rate": 5.6311359417520975e-06,
"loss": 2.556,
"loss_": 1.1038,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3906,
"step": 5222
},
{
"epoch": 0.66,
"learning_rate": 5.605586343195676e-06,
"loss": 2.5203,
"loss_": 1.0794,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3905,
"step": 5229
},
{
"epoch": 0.66,
"learning_rate": 5.580072246512984e-06,
"loss": 2.531,
"loss_": 1.1714,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3905,
"step": 5236
},
{
"epoch": 0.66,
"learning_rate": 5.5545938578289626e-06,
"loss": 2.5175,
"loss_": 1.0077,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3904,
"step": 5243
},
{
"epoch": 0.66,
"learning_rate": 5.529151382980065e-06,
"loss": 2.5567,
"loss_": 0.9865,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3903,
"step": 5250
},
{
"epoch": 0.66,
"learning_rate": 5.503745027512608e-06,
"loss": 2.5494,
"loss_": 1.1312,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3903,
"step": 5257
},
{
"epoch": 0.66,
"learning_rate": 5.478374996681104e-06,
"loss": 2.511,
"loss_": 0.957,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3903,
"step": 5264
},
{
"epoch": 0.66,
"learning_rate": 5.453041495446596e-06,
"loss": 2.5376,
"loss_": 1.1284,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.395,
"step": 5271
},
{
"epoch": 0.66,
"learning_rate": 5.427744728475016e-06,
"loss": 2.519,
"loss_": 1.1458,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3904,
"step": 5278
},
{
"epoch": 0.66,
"learning_rate": 5.40248490013553e-06,
"loss": 2.5264,
"loss_": 1.0995,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3906,
"step": 5285
},
{
"epoch": 0.66,
"learning_rate": 5.3772622144988665e-06,
"loss": 2.5051,
"loss_": 1.1345,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3904,
"step": 5292
},
{
"epoch": 0.66,
"learning_rate": 5.352076875335697e-06,
"loss": 2.5742,
"loss_": 0.9607,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3904,
"step": 5299
},
{
"epoch": 0.67,
"learning_rate": 5.326929086114972e-06,
"loss": 2.5419,
"loss_": 0.9965,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3903,
"step": 5306
},
{
"epoch": 0.67,
"learning_rate": 5.30181905000228e-06,
"loss": 2.5681,
"loss_": 0.7612,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3902,
"step": 5313
},
{
"epoch": 0.67,
"learning_rate": 5.276746969858204e-06,
"loss": 2.537,
"loss_": 1.0697,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3903,
"step": 5320
},
{
"epoch": 0.67,
"learning_rate": 5.251713048236691e-06,
"loss": 2.5471,
"loss_": 1.0796,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3903,
"step": 5327
},
{
"epoch": 0.67,
"learning_rate": 5.226717487383414e-06,
"loss": 2.51,
"loss_": 1.2432,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3903,
"step": 5334
},
{
"epoch": 0.67,
"learning_rate": 5.20176048923412e-06,
"loss": 2.5478,
"loss_": 1.0954,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3904,
"step": 5341
},
{
"epoch": 0.67,
"learning_rate": 5.176842255413028e-06,
"loss": 2.5248,
"loss_": 1.106,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3903,
"step": 5348
},
{
"epoch": 0.67,
"learning_rate": 5.151962987231179e-06,
"loss": 2.5251,
"loss_": 1.1429,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3904,
"step": 5355
},
{
"epoch": 0.67,
"learning_rate": 5.127122885684815e-06,
"loss": 2.5393,
"loss_": 1.047,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3903,
"step": 5362
},
{
"epoch": 0.67,
"learning_rate": 5.102322151453759e-06,
"loss": 2.5347,
"loss_": 1.1776,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3904,
"step": 5369
},
{
"epoch": 0.67,
"learning_rate": 5.077560984899794e-06,
"loss": 2.5264,
"loss_": 1.1651,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3903,
"step": 5376
},
{
"epoch": 0.67,
"learning_rate": 5.052839586065027e-06,
"loss": 2.5453,
"loss_": 1.2535,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3903,
"step": 5383
},
{
"epoch": 0.68,
"learning_rate": 5.028158154670302e-06,
"loss": 2.5428,
"loss_": 1.1274,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3905,
"step": 5390
},
{
"epoch": 0.68,
"learning_rate": 5.003516890113563e-06,
"loss": 2.5141,
"loss_": 1.1251,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3903,
"step": 5397
},
{
"epoch": 0.68,
"learning_rate": 4.978915991468262e-06,
"loss": 2.5363,
"loss_": 1.1698,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3903,
"step": 5404
},
{
"epoch": 0.68,
"learning_rate": 4.954355657481722e-06,
"loss": 2.5367,
"loss_": 1.1349,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3902,
"step": 5411
},
{
"epoch": 0.68,
"learning_rate": 4.929836086573566e-06,
"loss": 2.5367,
"loss_": 1.2559,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3903,
"step": 5418
},
{
"epoch": 0.68,
"learning_rate": 4.905357476834095e-06,
"loss": 2.5303,
"loss_": 1.0117,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3902,
"step": 5425
},
{
"epoch": 0.68,
"learning_rate": 4.88092002602268e-06,
"loss": 2.5277,
"loss_": 1.118,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3902,
"step": 5432
},
{
"epoch": 0.68,
"learning_rate": 4.856523931566184e-06,
"loss": 2.5355,
"loss_": 1.2879,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3904,
"step": 5439
},
{
"epoch": 0.68,
"learning_rate": 4.832169390557357e-06,
"loss": 2.5615,
"loss_": 0.9215,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3902,
"step": 5446
},
{
"epoch": 0.68,
"learning_rate": 4.807856599753243e-06,
"loss": 2.5715,
"loss_": 1.0914,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3902,
"step": 5453
},
{
"epoch": 0.68,
"learning_rate": 4.783585755573589e-06,
"loss": 2.5301,
"loss_": 1.1468,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3902,
"step": 5460
},
{
"epoch": 0.69,
"learning_rate": 4.75935705409927e-06,
"loss": 2.5139,
"loss_": 1.2252,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3902,
"step": 5467
},
{
"epoch": 0.69,
"learning_rate": 4.735170691070679e-06,
"loss": 2.5219,
"loss_": 0.8784,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3902,
"step": 5474
},
{
"epoch": 0.69,
"learning_rate": 4.711026861886176e-06,
"loss": 2.5056,
"loss_": 1.276,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3902,
"step": 5481
},
{
"epoch": 0.69,
"learning_rate": 4.686925761600496e-06,
"loss": 2.5303,
"loss_": 1.0171,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3902,
"step": 5488
},
{
"epoch": 0.69,
"learning_rate": 4.662867584923169e-06,
"loss": 2.5533,
"loss_": 1.2451,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3901,
"step": 5495
},
{
"epoch": 0.69,
"learning_rate": 4.638852526216947e-06,
"loss": 2.5456,
"loss_": 1.0837,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3901,
"step": 5502
},
{
"epoch": 0.69,
"learning_rate": 4.614880779496244e-06,
"loss": 2.559,
"loss_": 1.108,
"moe_loss": 0.16,
"moe_loss_longrong": 1.39,
"step": 5509
},
{
"epoch": 0.69,
"learning_rate": 4.590952538425563e-06,
"loss": 2.5412,
"loss_": 1.1554,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3901,
"step": 5516
},
{
"epoch": 0.69,
"learning_rate": 4.567067996317922e-06,
"loss": 2.5085,
"loss_": 1.0805,
"moe_loss": 0.16,
"moe_loss_longrong": 1.39,
"step": 5523
},
{
"epoch": 0.69,
"learning_rate": 4.543227346133312e-06,
"loss": 2.5361,
"loss_": 0.7085,
"moe_loss": 0.16,
"moe_loss_longrong": 1.39,
"step": 5530
},
{
"epoch": 0.69,
"learning_rate": 4.519430780477124e-06,
"loss": 2.535,
"loss_": 1.2076,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3903,
"step": 5537
},
{
"epoch": 0.69,
"learning_rate": 4.495678491598587e-06,
"loss": 2.5142,
"loss_": 0.8921,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3901,
"step": 5544
},
{
"epoch": 0.7,
"learning_rate": 4.471970671389237e-06,
"loss": 2.4935,
"loss_": 0.8663,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3901,
"step": 5551
},
{
"epoch": 0.7,
"learning_rate": 4.4483075113813445e-06,
"loss": 2.5257,
"loss_": 1.1068,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3899,
"step": 5558
},
{
"epoch": 0.7,
"learning_rate": 4.4246892027463815e-06,
"loss": 2.5516,
"loss_": 1.3583,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3901,
"step": 5565
},
{
"epoch": 0.7,
"learning_rate": 4.401115936293468e-06,
"loss": 2.5143,
"loss_": 1.2772,
"moe_loss": 0.16,
"moe_loss_longrong": 1.39,
"step": 5572
},
{
"epoch": 0.7,
"learning_rate": 4.377587902467841e-06,
"loss": 2.5213,
"loss_": 1.0711,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3901,
"step": 5579
},
{
"epoch": 0.7,
"learning_rate": 4.354105291349301e-06,
"loss": 2.551,
"loss_": 1.0323,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.39,
"step": 5586
},
{
"epoch": 0.7,
"learning_rate": 4.330668292650686e-06,
"loss": 2.556,
"loss_": 1.2594,
"moe_loss": 0.16,
"moe_loss_longrong": 1.39,
"step": 5593
},
{
"epoch": 0.7,
"learning_rate": 4.3072770957163415e-06,
"loss": 2.5254,
"loss_": 1.1564,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3901,
"step": 5600
},
{
"epoch": 0.7,
"learning_rate": 4.283931889520587e-06,
"loss": 2.5109,
"loss_": 1.099,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3901,
"step": 5607
},
{
"epoch": 0.7,
"learning_rate": 4.260632862666181e-06,
"loss": 2.5028,
"loss_": 1.2475,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3901,
"step": 5614
},
{
"epoch": 0.7,
"learning_rate": 4.237380203382815e-06,
"loss": 2.5318,
"loss_": 1.149,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3899,
"step": 5621
},
{
"epoch": 0.71,
"learning_rate": 4.214174099525581e-06,
"loss": 2.5268,
"loss_": 1.2824,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3901,
"step": 5628
},
{
"epoch": 0.71,
"learning_rate": 4.191014738573448e-06,
"loss": 2.5064,
"loss_": 1.0164,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3901,
"step": 5635
},
{
"epoch": 0.71,
"learning_rate": 4.1679023076277644e-06,
"loss": 2.5413,
"loss_": 1.1083,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3902,
"step": 5642
},
{
"epoch": 0.71,
"learning_rate": 4.144836993410739e-06,
"loss": 2.5067,
"loss_": 1.174,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.39,
"step": 5649
},
{
"epoch": 0.71,
"learning_rate": 4.12181898226392e-06,
"loss": 2.5303,
"loss_": 1.3489,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.39,
"step": 5656
},
{
"epoch": 0.71,
"learning_rate": 4.098848460146709e-06,
"loss": 2.5134,
"loss_": 1.2256,
"moe_loss": 0.16,
"moe_loss_longrong": 1.39,
"step": 5663
},
{
"epoch": 0.71,
"learning_rate": 4.07592561263485e-06,
"loss": 2.5352,
"loss_": 1.0677,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3897,
"step": 5670
},
{
"epoch": 0.71,
"learning_rate": 4.053050624918927e-06,
"loss": 2.5389,
"loss_": 1.3495,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3902,
"step": 5677
},
{
"epoch": 0.71,
"learning_rate": 4.030223681802873e-06,
"loss": 2.5214,
"loss_": 0.9889,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3899,
"step": 5684
},
{
"epoch": 0.71,
"learning_rate": 4.007444967702475e-06,
"loss": 2.5118,
"loss_": 0.9998,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3901,
"step": 5691
},
{
"epoch": 0.71,
"learning_rate": 3.984714666643887e-06,
"loss": 2.5307,
"loss_": 1.176,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3899,
"step": 5698
},
{
"epoch": 0.72,
"learning_rate": 3.962032962262132e-06,
"loss": 2.5366,
"loss_": 1.0218,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3898,
"step": 5705
},
{
"epoch": 0.72,
"learning_rate": 3.9394000377996355e-06,
"loss": 2.5117,
"loss_": 0.8796,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3942,
"step": 5712
},
{
"epoch": 0.72,
"learning_rate": 3.916816076104737e-06,
"loss": 2.5142,
"loss_": 0.9879,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3941,
"step": 5719
},
{
"epoch": 0.72,
"learning_rate": 3.894281259630203e-06,
"loss": 2.505,
"loss_": 1.0209,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.39,
"step": 5726
},
{
"epoch": 0.72,
"learning_rate": 3.871795770431772e-06,
"loss": 2.547,
"loss_": 1.0637,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.39,
"step": 5733
},
{
"epoch": 0.72,
"learning_rate": 3.84935979016667e-06,
"loss": 2.5078,
"loss_": 1.1125,
"moe_loss": 0.16,
"moe_loss_longrong": 1.39,
"step": 5740
},
{
"epoch": 0.72,
"learning_rate": 3.826973500092153e-06,
"loss": 2.5075,
"loss_": 1.0352,
"moe_loss": 0.16,
"moe_loss_longrong": 1.39,
"step": 5747
},
{
"epoch": 0.72,
"learning_rate": 3.8046370810640223e-06,
"loss": 2.5161,
"loss_": 1.167,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3899,
"step": 5754
},
{
"epoch": 0.72,
"learning_rate": 3.782350713535192e-06,
"loss": 2.5364,
"loss_": 1.0836,
"moe_loss": 0.16,
"moe_loss_longrong": 1.39,
"step": 5761
},
{
"epoch": 0.72,
"learning_rate": 3.760114577554216e-06,
"loss": 2.5025,
"loss_": 1.31,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3898,
"step": 5768
},
{
"epoch": 0.72,
"learning_rate": 3.7379288527638203e-06,
"loss": 2.4932,
"loss_": 1.139,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3899,
"step": 5775
},
{
"epoch": 0.72,
"learning_rate": 3.715793718399482e-06,
"loss": 2.5266,
"loss_": 1.0568,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3899,
"step": 5782
},
{
"epoch": 0.73,
"learning_rate": 3.6937093532879576e-06,
"loss": 2.4954,
"loss_": 1.087,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3899,
"step": 5789
},
{
"epoch": 0.73,
"learning_rate": 3.6716759358458467e-06,
"loss": 2.5337,
"loss_": 1.1604,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3899,
"step": 5796
},
{
"epoch": 0.73,
"learning_rate": 3.6496936440781496e-06,
"loss": 2.5421,
"loss_": 1.2507,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.39,
"step": 5803
},
{
"epoch": 0.73,
"learning_rate": 3.6277626555768307e-06,
"loss": 2.5217,
"loss_": 1.1458,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3898,
"step": 5810
},
{
"epoch": 0.73,
"learning_rate": 3.605883147519377e-06,
"loss": 2.5247,
"loss_": 1.0988,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3899,
"step": 5817
},
{
"epoch": 0.73,
"learning_rate": 3.584055296667377e-06,
"loss": 2.5367,
"loss_": 1.2706,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3898,
"step": 5824
},
{
"epoch": 0.73,
"learning_rate": 3.562279279365086e-06,
"loss": 2.4891,
"loss_": 0.9378,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3898,
"step": 5831
},
{
"epoch": 0.73,
"learning_rate": 3.5405552715380075e-06,
"loss": 2.5432,
"loss_": 1.1877,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3898,
"step": 5838
},
{
"epoch": 0.73,
"learning_rate": 3.518883448691457e-06,
"loss": 2.5668,
"loss_": 1.1926,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3898,
"step": 5845
},
{
"epoch": 0.73,
"learning_rate": 3.497263985909163e-06,
"loss": 2.5219,
"loss_": 1.0161,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3898,
"step": 5852
},
{
"epoch": 0.73,
"learning_rate": 3.4756970578518456e-06,
"loss": 2.4932,
"loss_": 0.8275,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3899,
"step": 5859
},
{
"epoch": 0.74,
"learning_rate": 3.4541828387557953e-06,
"loss": 2.5043,
"loss_": 1.0652,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3897,
"step": 5866
},
{
"epoch": 0.74,
"learning_rate": 3.43272150243148e-06,
"loss": 2.5058,
"loss_": 1.0976,
"moe_loss": 0.16,
"moe_loss_longrong": 1.39,
"step": 5873
},
{
"epoch": 0.74,
"learning_rate": 3.4113132222621382e-06,
"loss": 2.531,
"loss_": 1.2403,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3899,
"step": 5880
},
{
"epoch": 0.74,
"learning_rate": 3.3899581712023644e-06,
"loss": 2.5157,
"loss_": 1.1653,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3901,
"step": 5887
},
{
"epoch": 0.74,
"learning_rate": 3.3686565217767307e-06,
"loss": 2.5229,
"loss_": 1.175,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3898,
"step": 5894
},
{
"epoch": 0.74,
"learning_rate": 3.347408446078384e-06,
"loss": 2.5029,
"loss_": 0.9687,
"moe_loss": 0.16,
"moe_loss_longrong": 1.39,
"step": 5901
},
{
"epoch": 0.74,
"learning_rate": 3.326214115767654e-06,
"loss": 2.5651,
"loss_": 1.1922,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3899,
"step": 5908
},
{
"epoch": 0.74,
"learning_rate": 3.3050737020706693e-06,
"loss": 2.5259,
"loss_": 1.145,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3898,
"step": 5915
},
{
"epoch": 0.74,
"learning_rate": 3.283987375777974e-06,
"loss": 2.5289,
"loss_": 1.2907,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3898,
"step": 5922
},
{
"epoch": 0.74,
"learning_rate": 3.26295530724315e-06,
"loss": 2.5091,
"loss_": 0.8317,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3898,
"step": 5929
},
{
"epoch": 0.74,
"learning_rate": 3.2419776663814284e-06,
"loss": 2.5086,
"loss_": 1.212,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3897,
"step": 5936
},
{
"epoch": 0.74,
"learning_rate": 3.221054622668337e-06,
"loss": 2.5146,
"loss_": 0.9387,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3899,
"step": 5943
},
{
"epoch": 0.75,
"learning_rate": 3.2001863451383186e-06,
"loss": 2.5292,
"loss_": 1.1503,
"moe_loss": 0.16,
"moe_loss_longrong": 1.39,
"step": 5950
},
{
"epoch": 0.75,
"learning_rate": 3.1793730023833613e-06,
"loss": 2.5435,
"loss_": 1.1373,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3897,
"step": 5957
},
{
"epoch": 0.75,
"learning_rate": 3.1586147625516485e-06,
"loss": 2.5105,
"loss_": 0.9076,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3897,
"step": 5964
},
{
"epoch": 0.75,
"learning_rate": 3.1379117933461967e-06,
"loss": 2.5015,
"loss_": 1.1139,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 5971
},
{
"epoch": 0.75,
"learning_rate": 3.117264262023488e-06,
"loss": 2.5259,
"loss_": 1.0666,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3898,
"step": 5978
},
{
"epoch": 0.75,
"learning_rate": 3.096672335392139e-06,
"loss": 2.511,
"loss_": 0.959,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3897,
"step": 5985
},
{
"epoch": 0.75,
"learning_rate": 3.0761361798115454e-06,
"loss": 2.5324,
"loss_": 1.265,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3898,
"step": 5992
},
{
"epoch": 0.75,
"learning_rate": 3.0556559611905236e-06,
"loss": 2.5163,
"loss_": 1.1558,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3899,
"step": 5999
},
{
"epoch": 0.75,
"learning_rate": 3.035231844985993e-06,
"loss": 2.5014,
"loss_": 1.2063,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3897,
"step": 6006
},
{
"epoch": 0.75,
"learning_rate": 3.014863996201628e-06,
"loss": 2.4744,
"loss_": 0.9746,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3936,
"step": 6013
},
{
"epoch": 0.75,
"learning_rate": 2.9945525793865237e-06,
"loss": 2.5197,
"loss_": 0.9524,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3898,
"step": 6020
},
{
"epoch": 0.76,
"learning_rate": 2.9742977586338718e-06,
"loss": 2.5209,
"loss_": 1.0009,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6027
},
{
"epoch": 0.76,
"learning_rate": 2.9540996975796288e-06,
"loss": 2.4865,
"loss_": 0.8742,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3898,
"step": 6034
},
{
"epoch": 0.76,
"learning_rate": 2.9339585594012034e-06,
"loss": 2.5309,
"loss_": 0.9582,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6041
},
{
"epoch": 0.76,
"learning_rate": 2.913874506816119e-06,
"loss": 2.527,
"loss_": 1.2344,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3897,
"step": 6048
},
{
"epoch": 0.76,
"learning_rate": 2.8938477020807267e-06,
"loss": 2.524,
"loss_": 1.0626,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6055
},
{
"epoch": 0.76,
"learning_rate": 2.873878306988874e-06,
"loss": 2.5321,
"loss_": 0.7181,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3939,
"step": 6062
},
{
"epoch": 0.76,
"learning_rate": 2.8539664828706002e-06,
"loss": 2.4993,
"loss_": 1.0792,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3898,
"step": 6069
},
{
"epoch": 0.76,
"learning_rate": 2.8341123905908406e-06,
"loss": 2.5386,
"loss_": 1.1162,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6076
},
{
"epoch": 0.76,
"learning_rate": 2.8143161905481277e-06,
"loss": 2.5657,
"loss_": 1.2773,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3897,
"step": 6083
},
{
"epoch": 0.76,
"learning_rate": 2.7945780426732773e-06,
"loss": 2.5597,
"loss_": 1.1365,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3897,
"step": 6090
},
{
"epoch": 0.76,
"learning_rate": 2.77489810642812e-06,
"loss": 2.5335,
"loss_": 1.1916,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6097
},
{
"epoch": 0.77,
"learning_rate": 2.7552765408042003e-06,
"loss": 2.5169,
"loss_": 1.1662,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3897,
"step": 6104
},
{
"epoch": 0.77,
"learning_rate": 2.7357135043214954e-06,
"loss": 2.5135,
"loss_": 1.0476,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3898,
"step": 6111
},
{
"epoch": 0.77,
"learning_rate": 2.7162091550271273e-06,
"loss": 2.4995,
"loss_": 1.1279,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6118
},
{
"epoch": 0.77,
"learning_rate": 2.6967636504940995e-06,
"loss": 2.54,
"loss_": 1.1283,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6125
},
{
"epoch": 0.77,
"learning_rate": 2.677377147820013e-06,
"loss": 2.5405,
"loss_": 0.896,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3897,
"step": 6132
},
{
"epoch": 0.77,
"learning_rate": 2.6580498036258016e-06,
"loss": 2.5475,
"loss_": 1.0748,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6139
},
{
"epoch": 0.77,
"learning_rate": 2.6387817740544665e-06,
"loss": 2.5046,
"loss_": 1.0242,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6146
},
{
"epoch": 0.77,
"learning_rate": 2.6195732147698148e-06,
"loss": 2.5553,
"loss_": 0.9529,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3936,
"step": 6153
},
{
"epoch": 0.77,
"learning_rate": 2.600424280955196e-06,
"loss": 2.5311,
"loss_": 1.1188,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6160
},
{
"epoch": 0.77,
"learning_rate": 2.581335127312257e-06,
"loss": 2.4974,
"loss_": 0.9955,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3897,
"step": 6167
},
{
"epoch": 0.77,
"learning_rate": 2.562305908059691e-06,
"loss": 2.5107,
"loss_": 1.0771,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6174
},
{
"epoch": 0.77,
"learning_rate": 2.5433367769319894e-06,
"loss": 2.5161,
"loss_": 1.1527,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6181
},
{
"epoch": 0.78,
"learning_rate": 2.5244278871781924e-06,
"loss": 2.5067,
"loss_": 0.7494,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6188
},
{
"epoch": 0.78,
"learning_rate": 2.505579391560665e-06,
"loss": 2.5101,
"loss_": 1.05,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3897,
"step": 6195
},
{
"epoch": 0.78,
"learning_rate": 2.4867914423538596e-06,
"loss": 2.505,
"loss_": 0.8529,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3931,
"step": 6202
},
{
"epoch": 0.78,
"learning_rate": 2.4680641913430703e-06,
"loss": 2.5413,
"loss_": 1.0996,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3897,
"step": 6209
},
{
"epoch": 0.78,
"learning_rate": 2.449397789823229e-06,
"loss": 2.5299,
"loss_": 1.198,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6216
},
{
"epoch": 0.78,
"learning_rate": 2.4307923885976724e-06,
"loss": 2.5472,
"loss_": 1.1477,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3897,
"step": 6223
},
{
"epoch": 0.78,
"learning_rate": 2.4122481379769157e-06,
"loss": 2.5024,
"loss_": 1.329,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6230
},
{
"epoch": 0.78,
"learning_rate": 2.3937651877774537e-06,
"loss": 2.5363,
"loss_": 1.3376,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3897,
"step": 6237
},
{
"epoch": 0.78,
"learning_rate": 2.3753436873205437e-06,
"loss": 2.5159,
"loss_": 0.9258,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6244
},
{
"epoch": 0.78,
"learning_rate": 2.356983785430996e-06,
"loss": 2.5133,
"loss_": 1.1375,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6251
},
{
"epoch": 0.78,
"learning_rate": 2.338685630435975e-06,
"loss": 2.5141,
"loss_": 1.0395,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6258
},
{
"epoch": 0.79,
"learning_rate": 2.320449370163802e-06,
"loss": 2.5141,
"loss_": 1.2221,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6265
},
{
"epoch": 0.79,
"learning_rate": 2.30227515194276e-06,
"loss": 2.5207,
"loss_": 1.1959,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6272
},
{
"epoch": 0.79,
"learning_rate": 2.284163122599895e-06,
"loss": 2.5455,
"loss_": 1.0789,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6279
},
{
"epoch": 0.79,
"learning_rate": 2.2661134284598442e-06,
"loss": 2.507,
"loss_": 1.1284,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3898,
"step": 6286
},
{
"epoch": 0.79,
"learning_rate": 2.248126215343651e-06,
"loss": 2.5232,
"loss_": 1.0936,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6293
},
{
"epoch": 0.79,
"learning_rate": 2.230201628567572e-06,
"loss": 2.5369,
"loss_": 1.2088,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6300
},
{
"epoch": 0.79,
"learning_rate": 2.2123398129419214e-06,
"loss": 2.5085,
"loss_": 1.3623,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3897,
"step": 6307
},
{
"epoch": 0.79,
"learning_rate": 2.1945409127698967e-06,
"loss": 2.5114,
"loss_": 1.109,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6314
},
{
"epoch": 0.79,
"learning_rate": 2.1768050718464006e-06,
"loss": 2.5095,
"loss_": 1.1855,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6321
},
{
"epoch": 0.79,
"learning_rate": 2.1591324334568943e-06,
"loss": 2.5014,
"loss_": 1.1081,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6328
},
{
"epoch": 0.79,
"learning_rate": 2.1415231403762383e-06,
"loss": 2.4978,
"loss_": 0.9622,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6335
},
{
"epoch": 0.79,
"learning_rate": 2.123977334867523e-06,
"loss": 2.5578,
"loss_": 1.4506,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6342
},
{
"epoch": 0.8,
"learning_rate": 2.1064951586809434e-06,
"loss": 2.5026,
"loss_": 1.1986,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6349
},
{
"epoch": 0.8,
"learning_rate": 2.0890767530526358e-06,
"loss": 2.5363,
"loss_": 1.1286,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3897,
"step": 6356
},
{
"epoch": 0.8,
"learning_rate": 2.0717222587035435e-06,
"loss": 2.5241,
"loss_": 0.963,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6363
},
{
"epoch": 0.8,
"learning_rate": 2.0544318158382815e-06,
"loss": 2.5148,
"loss_": 0.9621,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6370
},
{
"epoch": 0.8,
"learning_rate": 2.037205564143999e-06,
"loss": 2.5373,
"loss_": 1.147,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6377
},
{
"epoch": 0.8,
"learning_rate": 2.0200436427892554e-06,
"loss": 2.5173,
"loss_": 1.198,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6384
},
{
"epoch": 0.8,
"learning_rate": 2.0029461904228896e-06,
"loss": 2.5232,
"loss_": 1.058,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6391
},
{
"epoch": 0.8,
"learning_rate": 1.9859133451729094e-06,
"loss": 2.5238,
"loss_": 1.14,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6398
},
{
"epoch": 0.8,
"learning_rate": 1.9689452446453693e-06,
"loss": 2.5138,
"loss_": 1.137,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6405
},
{
"epoch": 0.8,
"learning_rate": 1.9520420259232566e-06,
"loss": 2.5304,
"loss_": 1.024,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6412
},
{
"epoch": 0.8,
"learning_rate": 1.9352038255653893e-06,
"loss": 2.514,
"loss_": 0.6869,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6419
},
{
"epoch": 0.81,
"learning_rate": 1.918430779605317e-06,
"loss": 2.5432,
"loss_": 1.1982,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6426
},
{
"epoch": 0.81,
"learning_rate": 1.9017230235502027e-06,
"loss": 2.5134,
"loss_": 0.7847,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6433
},
{
"epoch": 0.81,
"learning_rate": 1.8850806923797516e-06,
"loss": 2.5159,
"loss_": 1.457,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6440
},
{
"epoch": 0.81,
"learning_rate": 1.8685039205451072e-06,
"loss": 2.5284,
"loss_": 0.9184,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6447
},
{
"epoch": 0.81,
"learning_rate": 1.8519928419677703e-06,
"loss": 2.5196,
"loss_": 1.1945,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6454
},
{
"epoch": 0.81,
"learning_rate": 1.8355475900385056e-06,
"loss": 2.5399,
"loss_": 1.0441,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6461
},
{
"epoch": 0.81,
"learning_rate": 1.819168297616284e-06,
"loss": 2.4934,
"loss_": 0.9845,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3931,
"step": 6468
},
{
"epoch": 0.81,
"learning_rate": 1.802855097027194e-06,
"loss": 2.4904,
"loss_": 1.1509,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6475
},
{
"epoch": 0.81,
"learning_rate": 1.7866081200633756e-06,
"loss": 2.4643,
"loss_": 1.0501,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6482
},
{
"epoch": 0.81,
"learning_rate": 1.7704274979819614e-06,
"loss": 2.5138,
"loss_": 1.171,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6489
},
{
"epoch": 0.81,
"learning_rate": 1.7543133615040098e-06,
"loss": 2.5229,
"loss_": 1.4327,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6496
},
{
"epoch": 0.82,
"learning_rate": 1.7382658408134467e-06,
"loss": 2.5343,
"loss_": 1.0981,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6503
},
{
"epoch": 0.82,
"learning_rate": 1.7222850655560241e-06,
"loss": 2.5169,
"loss_": 1.0696,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6510
},
{
"epoch": 0.82,
"learning_rate": 1.7063711648382665e-06,
"loss": 2.5251,
"loss_": 1.1954,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6517
},
{
"epoch": 0.82,
"learning_rate": 1.690524267226421e-06,
"loss": 2.5307,
"loss_": 1.1597,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6524
},
{
"epoch": 0.82,
"learning_rate": 1.6747445007454333e-06,
"loss": 2.5079,
"loss_": 1.1213,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6531
},
{
"epoch": 0.82,
"learning_rate": 1.659031992877903e-06,
"loss": 2.5285,
"loss_": 1.0738,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6538
},
{
"epoch": 0.82,
"learning_rate": 1.6433868705630584e-06,
"loss": 2.517,
"loss_": 0.7318,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6545
},
{
"epoch": 0.82,
"learning_rate": 1.6278092601957241e-06,
"loss": 2.5266,
"loss_": 1.0867,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3896,
"step": 6552
},
{
"epoch": 0.82,
"learning_rate": 1.6122992876253086e-06,
"loss": 2.5332,
"loss_": 1.0809,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6559
},
{
"epoch": 0.82,
"learning_rate": 1.5968570781547864e-06,
"loss": 2.5117,
"loss_": 1.2174,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6566
},
{
"epoch": 0.82,
"learning_rate": 1.581482756539674e-06,
"loss": 2.517,
"loss_": 1.3342,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6573
},
{
"epoch": 0.82,
"learning_rate": 1.5661764469870412e-06,
"loss": 2.5286,
"loss_": 0.9451,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6580
},
{
"epoch": 0.83,
"learning_rate": 1.5509382731544908e-06,
"loss": 2.5163,
"loss_": 1.1528,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6587
},
{
"epoch": 0.83,
"learning_rate": 1.53576835814917e-06,
"loss": 2.5028,
"loss_": 0.835,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3933,
"step": 6594
},
{
"epoch": 0.83,
"learning_rate": 1.5206668245267709e-06,
"loss": 2.5126,
"loss_": 0.8754,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6601
},
{
"epoch": 0.83,
"learning_rate": 1.5056337942905408e-06,
"loss": 2.5059,
"loss_": 0.9523,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6608
},
{
"epoch": 0.83,
"learning_rate": 1.4906693888903022e-06,
"loss": 2.5235,
"loss_": 1.1009,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6615
},
{
"epoch": 0.83,
"learning_rate": 1.475773729221457e-06,
"loss": 2.5315,
"loss_": 1.1289,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6622
},
{
"epoch": 0.83,
"learning_rate": 1.460946935624027e-06,
"loss": 2.5179,
"loss_": 0.9098,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6629
},
{
"epoch": 0.83,
"learning_rate": 1.4461891278816775e-06,
"loss": 2.5291,
"loss_": 1.1364,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6636
},
{
"epoch": 0.83,
"learning_rate": 1.4315004252207354e-06,
"loss": 2.5287,
"loss_": 1.0815,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6643
},
{
"epoch": 0.83,
"learning_rate": 1.4168809463092459e-06,
"loss": 2.5112,
"loss_": 0.9575,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6650
},
{
"epoch": 0.83,
"learning_rate": 1.402330809256005e-06,
"loss": 2.5271,
"loss_": 1.1014,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6657
},
{
"epoch": 0.84,
"learning_rate": 1.387850131609597e-06,
"loss": 2.4711,
"loss_": 1.0401,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6664
},
{
"epoch": 0.84,
"learning_rate": 1.3734390303574619e-06,
"loss": 2.5261,
"loss_": 1.2037,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6671
},
{
"epoch": 0.84,
"learning_rate": 1.3590976219249386e-06,
"loss": 2.5267,
"loss_": 1.1024,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6678
},
{
"epoch": 0.84,
"learning_rate": 1.3448260221743249e-06,
"loss": 2.5327,
"loss_": 1.1507,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6685
},
{
"epoch": 0.84,
"learning_rate": 1.3306243464039458e-06,
"loss": 2.5205,
"loss_": 1.2929,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6692
},
{
"epoch": 0.84,
"learning_rate": 1.3164927093472235e-06,
"loss": 2.5205,
"loss_": 1.1681,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6699
},
{
"epoch": 0.84,
"learning_rate": 1.3024312251717365e-06,
"loss": 2.5222,
"loss_": 1.2896,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6706
},
{
"epoch": 0.84,
"learning_rate": 1.2884400074783176e-06,
"loss": 2.482,
"loss_": 1.4715,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6713
},
{
"epoch": 0.84,
"learning_rate": 1.2745191693001214e-06,
"loss": 2.5152,
"loss_": 1.1018,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6720
},
{
"epoch": 0.84,
"learning_rate": 1.2606688231017205e-06,
"loss": 2.4911,
"loss_": 0.8081,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6727
},
{
"epoch": 0.84,
"learning_rate": 1.246889080778184e-06,
"loss": 2.5173,
"loss_": 1.234,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6734
},
{
"epoch": 0.84,
"learning_rate": 1.2331800536541894e-06,
"loss": 2.5114,
"loss_": 1.3323,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6741
},
{
"epoch": 0.85,
"learning_rate": 1.219541852483115e-06,
"loss": 2.5135,
"loss_": 1.0719,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6748
},
{
"epoch": 0.85,
"learning_rate": 1.2059745874461403e-06,
"loss": 2.5229,
"loss_": 1.1145,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6755
},
{
"epoch": 0.85,
"learning_rate": 1.1924783681513664e-06,
"loss": 2.5145,
"loss_": 1.0924,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6762
},
{
"epoch": 0.85,
"learning_rate": 1.1790533036329265e-06,
"loss": 2.5242,
"loss_": 1.1827,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6769
},
{
"epoch": 0.85,
"learning_rate": 1.1656995023500971e-06,
"loss": 2.51,
"loss_": 0.9651,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6776
},
{
"epoch": 0.85,
"learning_rate": 1.1524170721864358e-06,
"loss": 2.5144,
"loss_": 1.4801,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6783
},
{
"epoch": 0.85,
"learning_rate": 1.139206120448899e-06,
"loss": 2.4961,
"loss_": 0.9054,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6790
},
{
"epoch": 0.85,
"learning_rate": 1.12606675386698e-06,
"loss": 2.5091,
"loss_": 1.19,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6797
},
{
"epoch": 0.85,
"learning_rate": 1.1129990785918444e-06,
"loss": 2.5346,
"loss_": 0.9222,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6804
},
{
"epoch": 0.85,
"learning_rate": 1.100003200195474e-06,
"loss": 2.5121,
"loss_": 0.9459,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6811
},
{
"epoch": 0.85,
"learning_rate": 1.0870792236698157e-06,
"loss": 2.5331,
"loss_": 1.1242,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6818
},
{
"epoch": 0.86,
"learning_rate": 1.0742272534259234e-06,
"loss": 2.5094,
"loss_": 1.0776,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6825
},
{
"epoch": 0.86,
"learning_rate": 1.061447393293129e-06,
"loss": 2.5038,
"loss_": 0.8106,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6832
},
{
"epoch": 0.86,
"learning_rate": 1.048739746518197e-06,
"loss": 2.5264,
"loss_": 1.0267,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6839
},
{
"epoch": 0.86,
"learning_rate": 1.0361044157644828e-06,
"loss": 2.4963,
"loss_": 0.8518,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6846
},
{
"epoch": 0.86,
"learning_rate": 1.0235415031111173e-06,
"loss": 2.5199,
"loss_": 0.9971,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6853
},
{
"epoch": 0.86,
"learning_rate": 1.0110511100521747e-06,
"loss": 2.5356,
"loss_": 0.9752,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6860
},
{
"epoch": 0.86,
"learning_rate": 9.98633337495848e-07,
"loss": 2.5046,
"loss_": 1.149,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6867
},
{
"epoch": 0.86,
"learning_rate": 9.862882857636446e-07,
"loss": 2.5399,
"loss_": 1.2307,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6874
},
{
"epoch": 0.86,
"learning_rate": 9.740160545895683e-07,
"loss": 2.506,
"loss_": 1.0997,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 6881
},
{
"epoch": 0.86,
"learning_rate": 9.61816743119317e-07,
"loss": 2.4961,
"loss_": 0.9872,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3931,
"step": 6888
},
{
"epoch": 0.86,
"learning_rate": 9.49690449909475e-07,
"loss": 2.5033,
"loss_": 0.8414,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3932,
"step": 6895
},
{
"epoch": 0.87,
"learning_rate": 9.376372729267269e-07,
"loss": 2.5298,
"loss_": 1.2211,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6902
},
{
"epoch": 0.87,
"learning_rate": 9.256573095470601e-07,
"loss": 2.5483,
"loss_": 0.8932,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6909
},
{
"epoch": 0.87,
"learning_rate": 9.137506565549791e-07,
"loss": 2.493,
"loss_": 1.1945,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6916
},
{
"epoch": 0.87,
"learning_rate": 9.019174101427219e-07,
"loss": 2.5231,
"loss_": 1.1885,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6923
},
{
"epoch": 0.87,
"learning_rate": 8.901576659094901e-07,
"loss": 2.5306,
"loss_": 1.0664,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6930
},
{
"epoch": 0.87,
"learning_rate": 8.784715188606629e-07,
"loss": 2.5236,
"loss_": 1.3339,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6937
},
{
"epoch": 0.87,
"learning_rate": 8.668590634070428e-07,
"loss": 2.5428,
"loss_": 0.9196,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3933,
"step": 6944
},
{
"epoch": 0.87,
"learning_rate": 8.553203933640908e-07,
"loss": 2.5645,
"loss_": 1.1573,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6951
},
{
"epoch": 0.87,
"learning_rate": 8.438556019511568e-07,
"loss": 2.5178,
"loss_": 0.9821,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6958
},
{
"epoch": 0.87,
"learning_rate": 8.324647817907427e-07,
"loss": 2.563,
"loss_": 1.2098,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 6965
},
{
"epoch": 0.87,
"learning_rate": 8.211480249077441e-07,
"loss": 2.5238,
"loss_": 1.3804,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6972
},
{
"epoch": 0.87,
"learning_rate": 8.099054227287129e-07,
"loss": 2.5693,
"loss_": 1.077,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6979
},
{
"epoch": 0.88,
"learning_rate": 7.987370660811066e-07,
"loss": 2.5288,
"loss_": 0.8073,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 6986
},
{
"epoch": 0.88,
"learning_rate": 7.87643045192571e-07,
"loss": 2.501,
"loss_": 1.0172,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 6993
},
{
"epoch": 0.88,
"learning_rate": 7.766234496902025e-07,
"loss": 2.5408,
"loss_": 1.3127,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7000
},
{
"epoch": 0.88,
"learning_rate": 7.656783685998192e-07,
"loss": 2.5051,
"loss_": 0.7769,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7007
},
{
"epoch": 0.88,
"learning_rate": 7.548078903452527e-07,
"loss": 2.5057,
"loss_": 1.2042,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7014
},
{
"epoch": 0.88,
"learning_rate": 7.440121027476288e-07,
"loss": 2.5155,
"loss_": 1.0635,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7021
},
{
"epoch": 0.88,
"learning_rate": 7.332910930246528e-07,
"loss": 2.521,
"loss_": 1.1436,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7028
},
{
"epoch": 0.88,
"learning_rate": 7.226449477899156e-07,
"loss": 2.5023,
"loss_": 1.1022,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7035
},
{
"epoch": 0.88,
"learning_rate": 7.120737530521826e-07,
"loss": 2.5197,
"loss_": 0.9678,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7042
},
{
"epoch": 0.88,
"learning_rate": 7.015775942147107e-07,
"loss": 2.4997,
"loss_": 1.1735,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7049
},
{
"epoch": 0.88,
"learning_rate": 6.911565560745414e-07,
"loss": 2.5206,
"loss_": 1.1128,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7056
},
{
"epoch": 0.89,
"learning_rate": 6.808107228218375e-07,
"loss": 2.5601,
"loss_": 1.2738,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7063
},
{
"epoch": 0.89,
"learning_rate": 6.705401780391862e-07,
"loss": 2.5242,
"loss_": 0.8098,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3924,
"step": 7070
},
{
"epoch": 0.89,
"learning_rate": 6.603450047009286e-07,
"loss": 2.5201,
"loss_": 1.3133,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7077
},
{
"epoch": 0.89,
"learning_rate": 6.502252851724922e-07,
"loss": 2.5253,
"loss_": 1.1757,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7084
},
{
"epoch": 0.89,
"learning_rate": 6.401811012097248e-07,
"loss": 2.5515,
"loss_": 1.1474,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3891,
"step": 7091
},
{
"epoch": 0.89,
"learning_rate": 6.302125339582266e-07,
"loss": 2.5258,
"loss_": 0.9865,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 7098
},
{
"epoch": 0.89,
"learning_rate": 6.203196639527065e-07,
"loss": 2.5225,
"loss_": 1.2881,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7105
},
{
"epoch": 0.89,
"learning_rate": 6.105025711163249e-07,
"loss": 2.4979,
"loss_": 1.1086,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7112
},
{
"epoch": 0.89,
"learning_rate": 6.007613347600438e-07,
"loss": 2.5174,
"loss_": 0.6185,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7119
},
{
"epoch": 0.89,
"learning_rate": 5.910960335819982e-07,
"loss": 2.5157,
"loss_": 0.8674,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7126
},
{
"epoch": 0.89,
"learning_rate": 5.815067456668467e-07,
"loss": 2.5212,
"loss_": 1.2026,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7133
},
{
"epoch": 0.89,
"learning_rate": 5.719935484851513e-07,
"loss": 2.5215,
"loss_": 1.1283,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7140
},
{
"epoch": 0.9,
"learning_rate": 5.625565188927462e-07,
"loss": 2.4856,
"loss_": 1.2003,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7147
},
{
"epoch": 0.9,
"learning_rate": 5.531957331301152e-07,
"loss": 2.5027,
"loss_": 1.234,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7154
},
{
"epoch": 0.9,
"learning_rate": 5.43911266821785e-07,
"loss": 2.527,
"loss_": 1.2273,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7161
},
{
"epoch": 0.9,
"learning_rate": 5.347031949756987e-07,
"loss": 2.5546,
"loss_": 1.1449,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7168
},
{
"epoch": 0.9,
"learning_rate": 5.255715919826254e-07,
"loss": 2.5321,
"loss_": 1.0256,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 7175
},
{
"epoch": 0.9,
"learning_rate": 5.165165316155519e-07,
"loss": 2.5126,
"loss_": 0.9853,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7182
},
{
"epoch": 0.9,
"learning_rate": 5.075380870290847e-07,
"loss": 2.5047,
"loss_": 1.0876,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3935,
"step": 7189
},
{
"epoch": 0.9,
"learning_rate": 4.986363307588648e-07,
"loss": 2.5314,
"loss_": 1.1338,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7196
},
{
"epoch": 0.9,
"learning_rate": 4.898113347209788e-07,
"loss": 2.493,
"loss_": 1.0642,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7203
},
{
"epoch": 0.9,
"learning_rate": 4.810631702113722e-07,
"loss": 2.4985,
"loss_": 0.9782,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7210
},
{
"epoch": 0.9,
"learning_rate": 4.723919079052874e-07,
"loss": 2.5288,
"loss_": 1.4337,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7217
},
{
"epoch": 0.91,
"learning_rate": 4.637976178566772e-07,
"loss": 2.5036,
"loss_": 1.2508,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7224
},
{
"epoch": 0.91,
"learning_rate": 4.5528036949765155e-07,
"loss": 2.5172,
"loss_": 1.0779,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3891,
"step": 7231
},
{
"epoch": 0.91,
"learning_rate": 4.46840231637905e-07,
"loss": 2.4901,
"loss_": 1.1486,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3891,
"step": 7238
},
{
"epoch": 0.91,
"learning_rate": 4.3847727246417283e-07,
"loss": 2.5265,
"loss_": 1.089,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7245
},
{
"epoch": 0.91,
"learning_rate": 4.3019155953966995e-07,
"loss": 2.534,
"loss_": 1.0542,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7252
},
{
"epoch": 0.91,
"learning_rate": 4.2198315980355066e-07,
"loss": 2.4964,
"loss_": 1.2698,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7259
},
{
"epoch": 0.91,
"learning_rate": 4.1385213957036763e-07,
"loss": 2.4997,
"loss_": 1.0581,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7266
},
{
"epoch": 0.91,
"learning_rate": 4.057985645295337e-07,
"loss": 2.5273,
"loss_": 1.1317,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7273
},
{
"epoch": 0.91,
"learning_rate": 3.9782249974479105e-07,
"loss": 2.506,
"loss_": 1.1506,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7280
},
{
"epoch": 0.91,
"learning_rate": 3.899240096536905e-07,
"loss": 2.5387,
"loss_": 1.1072,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7287
},
{
"epoch": 0.91,
"learning_rate": 3.8210315806706535e-07,
"loss": 2.5092,
"loss_": 0.8953,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7294
},
{
"epoch": 0.92,
"learning_rate": 3.7436000816851504e-07,
"loss": 2.5046,
"loss_": 1.1341,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7301
},
{
"epoch": 0.92,
"learning_rate": 3.666946225139045e-07,
"loss": 2.5416,
"loss_": 1.2474,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7308
},
{
"epoch": 0.92,
"learning_rate": 3.5910706303084574e-07,
"loss": 2.521,
"loss_": 0.4458,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7315
},
{
"epoch": 0.92,
"learning_rate": 3.515973910182069e-07,
"loss": 2.5129,
"loss_": 1.0651,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7322
},
{
"epoch": 0.92,
"learning_rate": 3.4416566714561174e-07,
"loss": 2.5029,
"loss_": 1.0087,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7329
},
{
"epoch": 0.92,
"learning_rate": 3.368119514529533e-07,
"loss": 2.5211,
"loss_": 1.3133,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7336
},
{
"epoch": 0.92,
"learning_rate": 3.295363033499066e-07,
"loss": 2.5431,
"loss_": 1.005,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7343
},
{
"epoch": 0.92,
"learning_rate": 3.223387816154466e-07,
"loss": 2.5257,
"loss_": 1.1226,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7350
},
{
"epoch": 0.92,
"learning_rate": 3.1521944439738104e-07,
"loss": 2.52,
"loss_": 1.0687,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7357
},
{
"epoch": 0.92,
"learning_rate": 3.081783492118706e-07,
"loss": 2.5225,
"loss_": 1.1186,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7364
},
{
"epoch": 0.92,
"learning_rate": 3.012155529429728e-07,
"loss": 2.5037,
"loss_": 1.177,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7371
},
{
"epoch": 0.92,
"learning_rate": 2.9433111184217656e-07,
"loss": 2.5031,
"loss_": 1.2232,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7378
},
{
"epoch": 0.93,
"learning_rate": 2.875250815279518e-07,
"loss": 2.525,
"loss_": 1.0972,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7385
},
{
"epoch": 0.93,
"learning_rate": 2.807975169852939e-07,
"loss": 2.5529,
"loss_": 0.9882,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7392
},
{
"epoch": 0.93,
"learning_rate": 2.7414847256528985e-07,
"loss": 2.5408,
"loss_": 1.3431,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7399
},
{
"epoch": 0.93,
"learning_rate": 2.675780019846697e-07,
"loss": 2.5362,
"loss_": 1.0673,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7406
},
{
"epoch": 0.93,
"learning_rate": 2.6108615832537765e-07,
"loss": 2.4947,
"loss_": 1.136,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7413
},
{
"epoch": 0.93,
"learning_rate": 2.546729940341386e-07,
"loss": 2.4788,
"loss_": 1.2069,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7420
},
{
"epoch": 0.93,
"learning_rate": 2.4833856092204124e-07,
"loss": 2.5045,
"loss_": 1.1985,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7427
},
{
"epoch": 0.93,
"learning_rate": 2.4208291016411536e-07,
"loss": 2.5433,
"loss_": 1.0931,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7434
},
{
"epoch": 0.93,
"learning_rate": 2.3590609229891537e-07,
"loss": 2.5113,
"loss_": 1.2254,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7441
},
{
"epoch": 0.93,
"learning_rate": 2.2980815722811855e-07,
"loss": 2.5482,
"loss_": 1.243,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7448
},
{
"epoch": 0.93,
"learning_rate": 2.2378915421611746e-07,
"loss": 2.5383,
"loss_": 1.2111,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7455
},
{
"epoch": 0.94,
"learning_rate": 2.1784913188962365e-07,
"loss": 2.5388,
"loss_": 1.0636,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7462
},
{
"epoch": 0.94,
"learning_rate": 2.119881382372746e-07,
"loss": 2.502,
"loss_": 1.0249,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7469
},
{
"epoch": 0.94,
"learning_rate": 2.0620622060924522e-07,
"loss": 2.5066,
"loss_": 0.847,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7476
},
{
"epoch": 0.94,
"learning_rate": 2.0050342571686589e-07,
"loss": 2.5093,
"loss_": 1.1436,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7483
},
{
"epoch": 0.94,
"learning_rate": 1.9487979963224712e-07,
"loss": 2.5268,
"loss_": 1.0466,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7490
},
{
"epoch": 0.94,
"learning_rate": 1.8933538778790118e-07,
"loss": 2.5413,
"loss_": 1.3326,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7497
},
{
"epoch": 0.94,
"learning_rate": 1.8387023497638324e-07,
"loss": 2.5318,
"loss_": 1.1098,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7504
},
{
"epoch": 0.94,
"learning_rate": 1.7848438534992407e-07,
"loss": 2.5091,
"loss_": 0.8163,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7511
},
{
"epoch": 0.94,
"learning_rate": 1.7317788242007361e-07,
"loss": 2.5119,
"loss_": 0.9991,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7518
},
{
"epoch": 0.94,
"learning_rate": 1.679507690573523e-07,
"loss": 2.5155,
"loss_": 1.0626,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7525
},
{
"epoch": 0.94,
"learning_rate": 1.6280308749090036e-07,
"loss": 2.5254,
"loss_": 1.207,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7532
},
{
"epoch": 0.95,
"learning_rate": 1.5773487930814345e-07,
"loss": 2.516,
"loss_": 1.1184,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7539
},
{
"epoch": 0.95,
"learning_rate": 1.5274618545444985e-07,
"loss": 2.5236,
"loss_": 1.3532,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7546
},
{
"epoch": 0.95,
"learning_rate": 1.4783704623280048e-07,
"loss": 2.53,
"loss_": 1.1781,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7553
},
{
"epoch": 0.95,
"learning_rate": 1.430075013034693e-07,
"loss": 2.5614,
"loss_": 1.17,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7560
},
{
"epoch": 0.95,
"learning_rate": 1.3825758968369684e-07,
"loss": 2.5444,
"loss_": 1.2882,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7567
},
{
"epoch": 0.95,
"learning_rate": 1.335873497473761e-07,
"loss": 2.5334,
"loss_": 1.429,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7574
},
{
"epoch": 0.95,
"learning_rate": 1.2899681922474482e-07,
"loss": 2.5562,
"loss_": 1.0573,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7581
},
{
"epoch": 0.95,
"learning_rate": 1.2448603520207603e-07,
"loss": 2.4816,
"loss_": 1.1828,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7588
},
{
"epoch": 0.95,
"learning_rate": 1.2005503412138685e-07,
"loss": 2.5387,
"loss_": 1.1538,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7595
},
{
"epoch": 0.95,
"learning_rate": 1.1570385178013454e-07,
"loss": 2.5206,
"loss_": 0.9157,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7602
},
{
"epoch": 0.95,
"learning_rate": 1.1143252333093213e-07,
"loss": 2.4838,
"loss_": 0.7925,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7609
},
{
"epoch": 0.95,
"learning_rate": 1.0724108328126647e-07,
"loss": 2.5101,
"loss_": 1.2665,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7616
},
{
"epoch": 0.96,
"learning_rate": 1.0312956549321407e-07,
"loss": 2.501,
"loss_": 1.0971,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7623
},
{
"epoch": 0.96,
"learning_rate": 9.909800318317008e-08,
"loss": 2.5096,
"loss_": 1.2361,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7630
},
{
"epoch": 0.96,
"learning_rate": 9.51464289215831e-08,
"loss": 2.5052,
"loss_": 0.8553,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7637
},
{
"epoch": 0.96,
"learning_rate": 9.127487463268636e-08,
"loss": 2.5264,
"loss_": 0.9662,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3929,
"step": 7644
},
{
"epoch": 0.96,
"learning_rate": 8.748337159424247e-08,
"loss": 2.5115,
"loss_": 0.9187,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7651
},
{
"epoch": 0.96,
"learning_rate": 8.377195043729358e-08,
"loss": 2.519,
"loss_": 1.3642,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7658
},
{
"epoch": 0.96,
"learning_rate": 8.014064114590936e-08,
"loss": 2.5068,
"loss_": 1.052,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7665
},
{
"epoch": 0.96,
"learning_rate": 7.658947305694497e-08,
"loss": 2.4804,
"loss_": 1.1747,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7672
},
{
"epoch": 0.96,
"learning_rate": 7.311847485980794e-08,
"loss": 2.5175,
"loss_": 1.0972,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7679
},
{
"epoch": 0.96,
"learning_rate": 6.972767459622387e-08,
"loss": 2.4884,
"loss_": 0.9969,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7686
},
{
"epoch": 0.96,
"learning_rate": 6.641709966000886e-08,
"loss": 2.5285,
"loss_": 0.9687,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3891,
"step": 7693
},
{
"epoch": 0.97,
"learning_rate": 6.318677679685081e-08,
"loss": 2.4968,
"loss_": 1.151,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7700
},
{
"epoch": 0.97,
"learning_rate": 6.003673210409067e-08,
"loss": 2.5292,
"loss_": 0.7981,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7707
},
{
"epoch": 0.97,
"learning_rate": 5.696699103051484e-08,
"loss": 2.5312,
"loss_": 1.1779,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7714
},
{
"epoch": 0.97,
"learning_rate": 5.3977578376144257e-08,
"loss": 2.5219,
"loss_": 1.0229,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3895,
"step": 7721
},
{
"epoch": 0.97,
"learning_rate": 5.1068518292042293e-08,
"loss": 2.5087,
"loss_": 1.0511,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3929,
"step": 7728
},
{
"epoch": 0.97,
"learning_rate": 4.823983428010936e-08,
"loss": 2.4938,
"loss_": 0.9396,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7735
},
{
"epoch": 0.97,
"learning_rate": 4.549154919290199e-08,
"loss": 2.554,
"loss_": 1.0694,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7742
},
{
"epoch": 0.97,
"learning_rate": 4.2823685233445155e-08,
"loss": 2.5225,
"loss_": 1.023,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7749
},
{
"epoch": 0.97,
"learning_rate": 4.0236263955049095e-08,
"loss": 2.4966,
"loss_": 1.2173,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7756
},
{
"epoch": 0.97,
"learning_rate": 3.7729306261141685e-08,
"loss": 2.5165,
"loss_": 1.0892,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7763
},
{
"epoch": 0.97,
"learning_rate": 3.530283240509414e-08,
"loss": 2.5096,
"loss_": 0.9228,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7770
},
{
"epoch": 0.97,
"learning_rate": 3.2956861990062203e-08,
"loss": 2.5389,
"loss_": 1.1298,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3891,
"step": 7777
},
{
"epoch": 0.98,
"learning_rate": 3.0691413968821915e-08,
"loss": 2.5103,
"loss_": 0.8584,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3891,
"step": 7784
},
{
"epoch": 0.98,
"learning_rate": 2.8506506643621866e-08,
"loss": 2.5083,
"loss_": 1.001,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7791
},
{
"epoch": 0.98,
"learning_rate": 2.6402157666034488e-08,
"loss": 2.5312,
"loss_": 1.1118,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7798
},
{
"epoch": 0.98,
"learning_rate": 2.4378384036808368e-08,
"loss": 2.4881,
"loss_": 1.061,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7805
},
{
"epoch": 0.98,
"learning_rate": 2.243520210573946e-08,
"loss": 2.529,
"loss_": 1.1615,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7812
},
{
"epoch": 0.98,
"learning_rate": 2.0572627571529e-08,
"loss": 2.5355,
"loss_": 1.0449,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7819
},
{
"epoch": 0.98,
"learning_rate": 1.8790675481666908e-08,
"loss": 2.5263,
"loss_": 0.9646,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7826
},
{
"epoch": 0.98,
"learning_rate": 1.70893602323019e-08,
"loss": 2.5028,
"loss_": 1.3031,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7833
},
{
"epoch": 0.98,
"learning_rate": 1.5468695568131576e-08,
"loss": 2.5335,
"loss_": 1.0215,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7840
},
{
"epoch": 0.98,
"learning_rate": 1.3928694582284741e-08,
"loss": 2.5138,
"loss_": 1.0019,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7847
},
{
"epoch": 0.98,
"learning_rate": 1.246936971622148e-08,
"loss": 2.5227,
"loss_": 1.2481,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3891,
"step": 7854
},
{
"epoch": 0.99,
"learning_rate": 1.1090732759631018e-08,
"loss": 2.5075,
"loss_": 1.1757,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3891,
"step": 7861
},
{
"epoch": 0.99,
"learning_rate": 9.79279485033402e-09,
"loss": 2.524,
"loss_": 1.3905,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7868
},
{
"epoch": 0.99,
"learning_rate": 8.575566474195996e-09,
"loss": 2.5326,
"loss_": 1.1002,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7875
},
{
"epoch": 0.99,
"learning_rate": 7.43905746503959e-09,
"loss": 2.5051,
"loss_": 0.7085,
"moe_loss": 0.1602,
"moe_loss_longrong": 1.3925,
"step": 7882
},
{
"epoch": 0.99,
"learning_rate": 6.383277004569088e-09,
"loss": 2.5389,
"loss_": 1.1412,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7889
},
{
"epoch": 0.99,
"learning_rate": 5.408233622289371e-09,
"loss": 2.5165,
"loss_": 0.9492,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7896
},
{
"epoch": 0.99,
"learning_rate": 4.513935195445962e-09,
"loss": 2.5191,
"loss_": 0.9539,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3894,
"step": 7903
},
{
"epoch": 0.99,
"learning_rate": 3.7003889489550806e-09,
"loss": 2.4844,
"loss_": 0.9553,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7910
},
{
"epoch": 0.99,
"learning_rate": 2.9676014553459145e-09,
"loss": 2.5133,
"loss_": 0.8643,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3891,
"step": 7917
},
{
"epoch": 0.99,
"learning_rate": 2.315578634710658e-09,
"loss": 2.4846,
"loss_": 1.0907,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3891,
"step": 7924
},
{
"epoch": 0.99,
"learning_rate": 1.7443257546512215e-09,
"loss": 2.4985,
"loss_": 0.8135,
"moe_loss": 0.1601,
"moe_loss_longrong": 1.3927,
"step": 7931
},
{
"epoch": 1.0,
"learning_rate": 1.2538474302459246e-09,
"loss": 2.5193,
"loss_": 1.0192,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7938
},
{
"epoch": 1.0,
"learning_rate": 8.441476239995361e-10,
"loss": 2.5156,
"loss_": 0.857,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7945
},
{
"epoch": 1.0,
"learning_rate": 5.152296458232897e-10,
"loss": 2.5293,
"loss_": 1.1247,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7952
},
{
"epoch": 1.0,
"learning_rate": 2.6709615299935763e-10,
"loss": 2.5155,
"loss_": 0.9838,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7959
},
{
"epoch": 1.0,
"learning_rate": 9.97491501675274e-11,
"loss": 2.5037,
"loss_": 1.1665,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3892,
"step": 7966
},
{
"epoch": 1.0,
"learning_rate": 1.3189989298556527e-11,
"loss": 2.5389,
"loss_": 0.9918,
"moe_loss": 0.16,
"moe_loss_longrong": 1.3893,
"step": 7973
},
{
"epoch": 1.0,
"step": 7977,
"total_flos": 1.1960092486052872e+19,
"train_loss": 2.5803030949420793,
"train_runtime": 142162.8835,
"train_samples_per_second": 7.183,
"train_steps_per_second": 0.056
}
],
"logging_steps": 7,
"max_steps": 7977,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 1000,
"total_flos": 1.1960092486052872e+19,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}