FiLM / trainer_state.json
HYdsl's picture
FiLM_2.4B model upload
f770e74
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"global_step": 383745,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.993485257136901e-05,
"loss": 1.5554,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 4.9869705142738016e-05,
"loss": 1.5383,
"step": 1000
},
{
"epoch": 0.0,
"learning_rate": 4.9804557714107027e-05,
"loss": 1.5335,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 4.973941028547604e-05,
"loss": 1.528,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 4.967426285684505e-05,
"loss": 1.5146,
"step": 2500
},
{
"epoch": 0.01,
"learning_rate": 4.960911542821405e-05,
"loss": 1.5074,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 4.9543967999583054e-05,
"loss": 1.5076,
"step": 3500
},
{
"epoch": 0.01,
"learning_rate": 4.9478820570952065e-05,
"loss": 1.5025,
"step": 4000
},
{
"epoch": 0.01,
"learning_rate": 4.9413673142321075e-05,
"loss": 1.5122,
"step": 4500
},
{
"epoch": 0.01,
"learning_rate": 4.934852571369008e-05,
"loss": 1.5013,
"step": 5000
},
{
"epoch": 0.01,
"learning_rate": 4.928337828505909e-05,
"loss": 1.5068,
"step": 5500
},
{
"epoch": 0.02,
"learning_rate": 4.92182308564281e-05,
"loss": 1.4992,
"step": 6000
},
{
"epoch": 0.02,
"learning_rate": 4.915308342779711e-05,
"loss": 1.4902,
"step": 6500
},
{
"epoch": 0.02,
"learning_rate": 4.908793599916611e-05,
"loss": 1.4905,
"step": 7000
},
{
"epoch": 0.02,
"learning_rate": 4.902278857053512e-05,
"loss": 1.4883,
"step": 7500
},
{
"epoch": 0.02,
"learning_rate": 4.8957641141904134e-05,
"loss": 1.4811,
"step": 8000
},
{
"epoch": 0.02,
"learning_rate": 4.8892493713273144e-05,
"loss": 1.4823,
"step": 8500
},
{
"epoch": 0.02,
"learning_rate": 4.882734628464215e-05,
"loss": 1.4842,
"step": 9000
},
{
"epoch": 0.02,
"learning_rate": 4.876219885601115e-05,
"loss": 1.4744,
"step": 9500
},
{
"epoch": 0.03,
"learning_rate": 4.869705142738016e-05,
"loss": 1.471,
"step": 10000
},
{
"epoch": 0.03,
"learning_rate": 4.863190399874917e-05,
"loss": 1.4788,
"step": 10500
},
{
"epoch": 0.03,
"learning_rate": 4.8566756570118175e-05,
"loss": 1.4833,
"step": 11000
},
{
"epoch": 0.03,
"learning_rate": 4.8501609141487185e-05,
"loss": 1.4778,
"step": 11500
},
{
"epoch": 0.03,
"learning_rate": 4.8436461712856196e-05,
"loss": 1.4651,
"step": 12000
},
{
"epoch": 0.03,
"learning_rate": 4.8371314284225206e-05,
"loss": 1.4722,
"step": 12500
},
{
"epoch": 0.03,
"learning_rate": 4.830616685559421e-05,
"loss": 1.4741,
"step": 13000
},
{
"epoch": 0.04,
"learning_rate": 4.824101942696322e-05,
"loss": 1.4711,
"step": 13500
},
{
"epoch": 0.04,
"learning_rate": 4.817587199833223e-05,
"loss": 1.463,
"step": 14000
},
{
"epoch": 0.04,
"learning_rate": 4.811072456970124e-05,
"loss": 1.4641,
"step": 14500
},
{
"epoch": 0.04,
"learning_rate": 4.8045577141070244e-05,
"loss": 1.465,
"step": 15000
},
{
"epoch": 0.04,
"learning_rate": 4.7980429712439254e-05,
"loss": 1.46,
"step": 15500
},
{
"epoch": 0.04,
"learning_rate": 4.791528228380826e-05,
"loss": 1.4563,
"step": 16000
},
{
"epoch": 0.04,
"learning_rate": 4.785013485517727e-05,
"loss": 1.4545,
"step": 16500
},
{
"epoch": 0.04,
"learning_rate": 4.778498742654627e-05,
"loss": 1.4643,
"step": 17000
},
{
"epoch": 0.05,
"learning_rate": 4.771983999791528e-05,
"loss": 1.4612,
"step": 17500
},
{
"epoch": 0.05,
"learning_rate": 4.765469256928429e-05,
"loss": 1.4538,
"step": 18000
},
{
"epoch": 0.05,
"learning_rate": 4.75895451406533e-05,
"loss": 1.4508,
"step": 18500
},
{
"epoch": 0.05,
"learning_rate": 4.7524397712022306e-05,
"loss": 1.4565,
"step": 19000
},
{
"epoch": 0.05,
"learning_rate": 4.7459250283391316e-05,
"loss": 1.4562,
"step": 19500
},
{
"epoch": 0.05,
"learning_rate": 4.739410285476033e-05,
"loss": 1.4538,
"step": 20000
},
{
"epoch": 0.05,
"learning_rate": 4.732895542612934e-05,
"loss": 1.4476,
"step": 20500
},
{
"epoch": 0.05,
"learning_rate": 4.726380799749834e-05,
"loss": 1.4567,
"step": 21000
},
{
"epoch": 0.06,
"learning_rate": 4.719866056886735e-05,
"loss": 1.4464,
"step": 21500
},
{
"epoch": 0.06,
"learning_rate": 4.7133513140236354e-05,
"loss": 1.4514,
"step": 22000
},
{
"epoch": 0.06,
"learning_rate": 4.7068365711605365e-05,
"loss": 1.4504,
"step": 22500
},
{
"epoch": 0.06,
"learning_rate": 4.700321828297437e-05,
"loss": 1.4467,
"step": 23000
},
{
"epoch": 0.06,
"learning_rate": 4.693807085434338e-05,
"loss": 1.4449,
"step": 23500
},
{
"epoch": 0.06,
"learning_rate": 4.687292342571239e-05,
"loss": 1.439,
"step": 24000
},
{
"epoch": 0.06,
"learning_rate": 4.68077759970814e-05,
"loss": 1.4369,
"step": 24500
},
{
"epoch": 0.07,
"learning_rate": 4.67426285684504e-05,
"loss": 1.4486,
"step": 25000
},
{
"epoch": 0.07,
"learning_rate": 4.667748113981941e-05,
"loss": 1.4426,
"step": 25500
},
{
"epoch": 0.07,
"learning_rate": 4.6612333711188423e-05,
"loss": 1.4371,
"step": 26000
},
{
"epoch": 0.07,
"learning_rate": 4.6547186282557434e-05,
"loss": 1.4382,
"step": 26500
},
{
"epoch": 0.07,
"learning_rate": 4.648203885392644e-05,
"loss": 1.4306,
"step": 27000
},
{
"epoch": 0.07,
"learning_rate": 4.641689142529545e-05,
"loss": 1.4491,
"step": 27500
},
{
"epoch": 0.07,
"learning_rate": 4.635174399666445e-05,
"loss": 1.441,
"step": 28000
},
{
"epoch": 0.07,
"learning_rate": 4.628659656803346e-05,
"loss": 1.4499,
"step": 28500
},
{
"epoch": 0.08,
"learning_rate": 4.6221449139402465e-05,
"loss": 1.4347,
"step": 29000
},
{
"epoch": 0.08,
"learning_rate": 4.6156301710771475e-05,
"loss": 1.4458,
"step": 29500
},
{
"epoch": 0.08,
"learning_rate": 4.6091154282140486e-05,
"loss": 1.4394,
"step": 30000
},
{
"epoch": 0.08,
"learning_rate": 4.6026006853509496e-05,
"loss": 1.4264,
"step": 30500
},
{
"epoch": 0.08,
"learning_rate": 4.59608594248785e-05,
"loss": 1.422,
"step": 31000
},
{
"epoch": 0.08,
"learning_rate": 4.589571199624751e-05,
"loss": 1.4297,
"step": 31500
},
{
"epoch": 0.08,
"learning_rate": 4.583056456761652e-05,
"loss": 1.4204,
"step": 32000
},
{
"epoch": 0.08,
"learning_rate": 4.576541713898553e-05,
"loss": 1.4287,
"step": 32500
},
{
"epoch": 0.09,
"learning_rate": 4.5700269710354534e-05,
"loss": 1.4262,
"step": 33000
},
{
"epoch": 0.09,
"learning_rate": 4.5635122281723544e-05,
"loss": 1.4353,
"step": 33500
},
{
"epoch": 0.09,
"learning_rate": 4.556997485309255e-05,
"loss": 1.422,
"step": 34000
},
{
"epoch": 0.09,
"learning_rate": 4.550482742446156e-05,
"loss": 1.4264,
"step": 34500
},
{
"epoch": 0.09,
"learning_rate": 4.543967999583056e-05,
"loss": 1.4279,
"step": 35000
},
{
"epoch": 0.09,
"learning_rate": 4.537453256719957e-05,
"loss": 1.4255,
"step": 35500
},
{
"epoch": 0.09,
"learning_rate": 4.530938513856858e-05,
"loss": 1.4245,
"step": 36000
},
{
"epoch": 0.1,
"learning_rate": 4.524423770993759e-05,
"loss": 1.4112,
"step": 36500
},
{
"epoch": 0.1,
"learning_rate": 4.5179090281306596e-05,
"loss": 1.4267,
"step": 37000
},
{
"epoch": 0.1,
"learning_rate": 4.5113942852675606e-05,
"loss": 1.4233,
"step": 37500
},
{
"epoch": 0.1,
"learning_rate": 4.504879542404462e-05,
"loss": 1.4283,
"step": 38000
},
{
"epoch": 0.1,
"learning_rate": 4.498364799541363e-05,
"loss": 1.4263,
"step": 38500
},
{
"epoch": 0.1,
"learning_rate": 4.491850056678263e-05,
"loss": 1.4239,
"step": 39000
},
{
"epoch": 0.1,
"learning_rate": 4.485335313815164e-05,
"loss": 1.4243,
"step": 39500
},
{
"epoch": 0.1,
"learning_rate": 4.4788205709520644e-05,
"loss": 1.4223,
"step": 40000
},
{
"epoch": 0.11,
"learning_rate": 4.4723058280889655e-05,
"loss": 1.4162,
"step": 40500
},
{
"epoch": 0.11,
"learning_rate": 4.465791085225866e-05,
"loss": 1.4142,
"step": 41000
},
{
"epoch": 0.11,
"learning_rate": 4.459276342362767e-05,
"loss": 1.4186,
"step": 41500
},
{
"epoch": 0.11,
"learning_rate": 4.452761599499668e-05,
"loss": 1.4115,
"step": 42000
},
{
"epoch": 0.11,
"learning_rate": 4.446246856636569e-05,
"loss": 1.4171,
"step": 42500
},
{
"epoch": 0.11,
"learning_rate": 4.439732113773469e-05,
"loss": 1.4107,
"step": 43000
},
{
"epoch": 0.11,
"learning_rate": 4.43321737091037e-05,
"loss": 1.4115,
"step": 43500
},
{
"epoch": 0.11,
"learning_rate": 4.426702628047271e-05,
"loss": 1.4064,
"step": 44000
},
{
"epoch": 0.12,
"learning_rate": 4.4201878851841724e-05,
"loss": 1.4168,
"step": 44500
},
{
"epoch": 0.12,
"learning_rate": 4.413673142321073e-05,
"loss": 1.415,
"step": 45000
},
{
"epoch": 0.12,
"learning_rate": 4.407158399457974e-05,
"loss": 1.4082,
"step": 45500
},
{
"epoch": 0.12,
"learning_rate": 4.400643656594874e-05,
"loss": 1.4104,
"step": 46000
},
{
"epoch": 0.12,
"learning_rate": 4.394128913731775e-05,
"loss": 1.4077,
"step": 46500
},
{
"epoch": 0.12,
"learning_rate": 4.3876141708686755e-05,
"loss": 1.4152,
"step": 47000
},
{
"epoch": 0.12,
"learning_rate": 4.3810994280055765e-05,
"loss": 1.4087,
"step": 47500
},
{
"epoch": 0.13,
"learning_rate": 4.3745846851424775e-05,
"loss": 1.4101,
"step": 48000
},
{
"epoch": 0.13,
"learning_rate": 4.3680699422793786e-05,
"loss": 1.4064,
"step": 48500
},
{
"epoch": 0.13,
"learning_rate": 4.361555199416279e-05,
"loss": 1.4071,
"step": 49000
},
{
"epoch": 0.13,
"learning_rate": 4.35504045655318e-05,
"loss": 1.4124,
"step": 49500
},
{
"epoch": 0.13,
"learning_rate": 4.348525713690081e-05,
"loss": 1.4091,
"step": 50000
},
{
"epoch": 0.13,
"learning_rate": 4.342010970826982e-05,
"loss": 1.4081,
"step": 50500
},
{
"epoch": 0.13,
"learning_rate": 4.3354962279638824e-05,
"loss": 1.4099,
"step": 51000
},
{
"epoch": 0.13,
"learning_rate": 4.3289814851007834e-05,
"loss": 1.4087,
"step": 51500
},
{
"epoch": 0.14,
"learning_rate": 4.3224667422376844e-05,
"loss": 1.3954,
"step": 52000
},
{
"epoch": 0.14,
"learning_rate": 4.315951999374585e-05,
"loss": 1.3962,
"step": 52500
},
{
"epoch": 0.14,
"learning_rate": 4.309437256511485e-05,
"loss": 1.4091,
"step": 53000
},
{
"epoch": 0.14,
"learning_rate": 4.302922513648386e-05,
"loss": 1.403,
"step": 53500
},
{
"epoch": 0.14,
"learning_rate": 4.296407770785287e-05,
"loss": 1.4087,
"step": 54000
},
{
"epoch": 0.14,
"learning_rate": 4.289893027922188e-05,
"loss": 1.4044,
"step": 54500
},
{
"epoch": 0.14,
"learning_rate": 4.2833782850590886e-05,
"loss": 1.3922,
"step": 55000
},
{
"epoch": 0.14,
"learning_rate": 4.2768635421959896e-05,
"loss": 1.4006,
"step": 55500
},
{
"epoch": 0.15,
"learning_rate": 4.2703487993328907e-05,
"loss": 1.3969,
"step": 56000
},
{
"epoch": 0.15,
"learning_rate": 4.263834056469792e-05,
"loss": 1.3985,
"step": 56500
},
{
"epoch": 0.15,
"learning_rate": 4.257319313606692e-05,
"loss": 1.4059,
"step": 57000
},
{
"epoch": 0.15,
"learning_rate": 4.250804570743593e-05,
"loss": 1.3923,
"step": 57500
},
{
"epoch": 0.15,
"learning_rate": 4.244289827880494e-05,
"loss": 1.3966,
"step": 58000
},
{
"epoch": 0.15,
"learning_rate": 4.2377750850173945e-05,
"loss": 1.3921,
"step": 58500
},
{
"epoch": 0.15,
"learning_rate": 4.231260342154295e-05,
"loss": 1.3987,
"step": 59000
},
{
"epoch": 0.16,
"learning_rate": 4.224745599291196e-05,
"loss": 1.3984,
"step": 59500
},
{
"epoch": 0.16,
"learning_rate": 4.218230856428097e-05,
"loss": 1.3914,
"step": 60000
},
{
"epoch": 0.16,
"learning_rate": 4.211716113564998e-05,
"loss": 1.3976,
"step": 60500
},
{
"epoch": 0.16,
"learning_rate": 4.205201370701898e-05,
"loss": 1.3883,
"step": 61000
},
{
"epoch": 0.16,
"learning_rate": 4.198686627838799e-05,
"loss": 1.3898,
"step": 61500
},
{
"epoch": 0.16,
"learning_rate": 4.1921718849757e-05,
"loss": 1.3917,
"step": 62000
},
{
"epoch": 0.16,
"learning_rate": 4.1856571421126014e-05,
"loss": 1.3973,
"step": 62500
},
{
"epoch": 0.16,
"learning_rate": 4.179142399249502e-05,
"loss": 1.392,
"step": 63000
},
{
"epoch": 0.17,
"learning_rate": 4.172627656386403e-05,
"loss": 1.3966,
"step": 63500
},
{
"epoch": 0.17,
"learning_rate": 4.166112913523304e-05,
"loss": 1.3923,
"step": 64000
},
{
"epoch": 0.17,
"learning_rate": 4.159598170660204e-05,
"loss": 1.3838,
"step": 64500
},
{
"epoch": 0.17,
"learning_rate": 4.1530834277971045e-05,
"loss": 1.3881,
"step": 65000
},
{
"epoch": 0.17,
"learning_rate": 4.1465686849340055e-05,
"loss": 1.3865,
"step": 65500
},
{
"epoch": 0.17,
"learning_rate": 4.1400539420709065e-05,
"loss": 1.3855,
"step": 66000
},
{
"epoch": 0.17,
"learning_rate": 4.1335391992078076e-05,
"loss": 1.3888,
"step": 66500
},
{
"epoch": 0.17,
"learning_rate": 4.127024456344708e-05,
"loss": 1.3954,
"step": 67000
},
{
"epoch": 0.18,
"learning_rate": 4.120509713481609e-05,
"loss": 1.387,
"step": 67500
},
{
"epoch": 0.18,
"learning_rate": 4.11399497061851e-05,
"loss": 1.3765,
"step": 68000
},
{
"epoch": 0.18,
"learning_rate": 4.107480227755411e-05,
"loss": 1.387,
"step": 68500
},
{
"epoch": 0.18,
"learning_rate": 4.1009654848923114e-05,
"loss": 1.3865,
"step": 69000
},
{
"epoch": 0.18,
"learning_rate": 4.0944507420292124e-05,
"loss": 1.3913,
"step": 69500
},
{
"epoch": 0.18,
"learning_rate": 4.0879359991661134e-05,
"loss": 1.3781,
"step": 70000
},
{
"epoch": 0.18,
"learning_rate": 4.081421256303014e-05,
"loss": 1.3833,
"step": 70500
},
{
"epoch": 0.19,
"learning_rate": 4.074906513439914e-05,
"loss": 1.3776,
"step": 71000
},
{
"epoch": 0.19,
"learning_rate": 4.068391770576815e-05,
"loss": 1.3837,
"step": 71500
},
{
"epoch": 0.19,
"learning_rate": 4.061877027713716e-05,
"loss": 1.3884,
"step": 72000
},
{
"epoch": 0.19,
"learning_rate": 4.055362284850617e-05,
"loss": 1.3811,
"step": 72500
},
{
"epoch": 0.19,
"learning_rate": 4.0488475419875176e-05,
"loss": 1.3868,
"step": 73000
},
{
"epoch": 0.19,
"learning_rate": 4.0423327991244186e-05,
"loss": 1.384,
"step": 73500
},
{
"epoch": 0.19,
"learning_rate": 4.0358180562613196e-05,
"loss": 1.3832,
"step": 74000
},
{
"epoch": 0.19,
"learning_rate": 4.029303313398221e-05,
"loss": 1.3871,
"step": 74500
},
{
"epoch": 0.2,
"learning_rate": 4.022788570535121e-05,
"loss": 1.386,
"step": 75000
},
{
"epoch": 0.2,
"learning_rate": 4.016273827672022e-05,
"loss": 1.3815,
"step": 75500
},
{
"epoch": 0.2,
"learning_rate": 4.009759084808923e-05,
"loss": 1.3755,
"step": 76000
},
{
"epoch": 0.2,
"learning_rate": 4.0032443419458234e-05,
"loss": 1.3859,
"step": 76500
},
{
"epoch": 0.2,
"learning_rate": 3.9967295990827245e-05,
"loss": 1.38,
"step": 77000
},
{
"epoch": 0.2,
"learning_rate": 3.990214856219625e-05,
"loss": 1.3834,
"step": 77500
},
{
"epoch": 0.2,
"learning_rate": 3.983700113356526e-05,
"loss": 1.3793,
"step": 78000
},
{
"epoch": 0.2,
"learning_rate": 3.977185370493427e-05,
"loss": 1.3765,
"step": 78500
},
{
"epoch": 0.21,
"learning_rate": 3.970670627630327e-05,
"loss": 1.3787,
"step": 79000
},
{
"epoch": 0.21,
"learning_rate": 3.964155884767228e-05,
"loss": 1.3818,
"step": 79500
},
{
"epoch": 0.21,
"learning_rate": 3.957641141904129e-05,
"loss": 1.3736,
"step": 80000
},
{
"epoch": 0.21,
"learning_rate": 3.9511263990410303e-05,
"loss": 1.3854,
"step": 80500
},
{
"epoch": 0.21,
"learning_rate": 3.944611656177931e-05,
"loss": 1.3796,
"step": 81000
},
{
"epoch": 0.21,
"learning_rate": 3.938096913314832e-05,
"loss": 1.3775,
"step": 81500
},
{
"epoch": 0.21,
"learning_rate": 3.931582170451733e-05,
"loss": 1.3768,
"step": 82000
},
{
"epoch": 0.21,
"learning_rate": 3.925067427588633e-05,
"loss": 1.3691,
"step": 82500
},
{
"epoch": 0.22,
"learning_rate": 3.918552684725534e-05,
"loss": 1.3796,
"step": 83000
},
{
"epoch": 0.22,
"learning_rate": 3.9120379418624345e-05,
"loss": 1.3701,
"step": 83500
},
{
"epoch": 0.22,
"learning_rate": 3.9055231989993355e-05,
"loss": 1.3828,
"step": 84000
},
{
"epoch": 0.22,
"learning_rate": 3.8990084561362366e-05,
"loss": 1.3812,
"step": 84500
},
{
"epoch": 0.22,
"learning_rate": 3.892493713273137e-05,
"loss": 1.3767,
"step": 85000
},
{
"epoch": 0.22,
"learning_rate": 3.885978970410038e-05,
"loss": 1.3712,
"step": 85500
},
{
"epoch": 0.22,
"learning_rate": 3.879464227546939e-05,
"loss": 1.3715,
"step": 86000
},
{
"epoch": 0.23,
"learning_rate": 3.87294948468384e-05,
"loss": 1.3662,
"step": 86500
},
{
"epoch": 0.23,
"learning_rate": 3.8664347418207404e-05,
"loss": 1.3782,
"step": 87000
},
{
"epoch": 0.23,
"learning_rate": 3.8599199989576414e-05,
"loss": 1.3758,
"step": 87500
},
{
"epoch": 0.23,
"learning_rate": 3.8534052560945424e-05,
"loss": 1.3655,
"step": 88000
},
{
"epoch": 0.23,
"learning_rate": 3.8468905132314435e-05,
"loss": 1.3802,
"step": 88500
},
{
"epoch": 0.23,
"learning_rate": 3.840375770368344e-05,
"loss": 1.3766,
"step": 89000
},
{
"epoch": 0.23,
"learning_rate": 3.833861027505244e-05,
"loss": 1.3648,
"step": 89500
},
{
"epoch": 0.23,
"learning_rate": 3.827346284642145e-05,
"loss": 1.3703,
"step": 90000
},
{
"epoch": 0.24,
"learning_rate": 3.820831541779046e-05,
"loss": 1.3643,
"step": 90500
},
{
"epoch": 0.24,
"learning_rate": 3.8143167989159466e-05,
"loss": 1.368,
"step": 91000
},
{
"epoch": 0.24,
"learning_rate": 3.8078020560528476e-05,
"loss": 1.367,
"step": 91500
},
{
"epoch": 0.24,
"learning_rate": 3.8012873131897486e-05,
"loss": 1.3641,
"step": 92000
},
{
"epoch": 0.24,
"learning_rate": 3.79477257032665e-05,
"loss": 1.3743,
"step": 92500
},
{
"epoch": 0.24,
"learning_rate": 3.78825782746355e-05,
"loss": 1.3724,
"step": 93000
},
{
"epoch": 0.24,
"learning_rate": 3.781743084600451e-05,
"loss": 1.3729,
"step": 93500
},
{
"epoch": 0.24,
"learning_rate": 3.775228341737352e-05,
"loss": 1.3627,
"step": 94000
},
{
"epoch": 0.25,
"learning_rate": 3.768713598874253e-05,
"loss": 1.3649,
"step": 94500
},
{
"epoch": 0.25,
"learning_rate": 3.7621988560111535e-05,
"loss": 1.3617,
"step": 95000
},
{
"epoch": 0.25,
"learning_rate": 3.755684113148054e-05,
"loss": 1.3645,
"step": 95500
},
{
"epoch": 0.25,
"learning_rate": 3.749169370284955e-05,
"loss": 1.3537,
"step": 96000
},
{
"epoch": 0.25,
"learning_rate": 3.742654627421856e-05,
"loss": 1.3666,
"step": 96500
},
{
"epoch": 0.25,
"learning_rate": 3.736139884558756e-05,
"loss": 1.3629,
"step": 97000
},
{
"epoch": 0.25,
"learning_rate": 3.729625141695657e-05,
"loss": 1.367,
"step": 97500
},
{
"epoch": 0.26,
"learning_rate": 3.723110398832558e-05,
"loss": 1.3658,
"step": 98000
},
{
"epoch": 0.26,
"learning_rate": 3.716595655969459e-05,
"loss": 1.3599,
"step": 98500
},
{
"epoch": 0.26,
"learning_rate": 3.71008091310636e-05,
"loss": 1.3658,
"step": 99000
},
{
"epoch": 0.26,
"learning_rate": 3.703566170243261e-05,
"loss": 1.3595,
"step": 99500
},
{
"epoch": 0.26,
"learning_rate": 3.697051427380162e-05,
"loss": 1.3662,
"step": 100000
},
{
"epoch": 0.26,
"learning_rate": 3.690536684517063e-05,
"loss": 1.3613,
"step": 100500
},
{
"epoch": 0.26,
"learning_rate": 3.684021941653963e-05,
"loss": 1.3626,
"step": 101000
},
{
"epoch": 0.26,
"learning_rate": 3.6775071987908635e-05,
"loss": 1.3692,
"step": 101500
},
{
"epoch": 0.27,
"learning_rate": 3.6709924559277645e-05,
"loss": 1.3572,
"step": 102000
},
{
"epoch": 0.27,
"learning_rate": 3.6644777130646656e-05,
"loss": 1.3553,
"step": 102500
},
{
"epoch": 0.27,
"learning_rate": 3.657962970201566e-05,
"loss": 1.3561,
"step": 103000
},
{
"epoch": 0.27,
"learning_rate": 3.651448227338467e-05,
"loss": 1.3588,
"step": 103500
},
{
"epoch": 0.27,
"learning_rate": 3.644933484475368e-05,
"loss": 1.3554,
"step": 104000
},
{
"epoch": 0.27,
"learning_rate": 3.638418741612269e-05,
"loss": 1.3612,
"step": 104500
},
{
"epoch": 0.27,
"learning_rate": 3.6319039987491694e-05,
"loss": 1.3579,
"step": 105000
},
{
"epoch": 0.27,
"learning_rate": 3.6253892558860704e-05,
"loss": 1.3564,
"step": 105500
},
{
"epoch": 0.28,
"learning_rate": 3.6188745130229714e-05,
"loss": 1.3523,
"step": 106000
},
{
"epoch": 0.28,
"learning_rate": 3.6123597701598724e-05,
"loss": 1.353,
"step": 106500
},
{
"epoch": 0.28,
"learning_rate": 3.605845027296773e-05,
"loss": 1.3539,
"step": 107000
},
{
"epoch": 0.28,
"learning_rate": 3.599330284433673e-05,
"loss": 1.3577,
"step": 107500
},
{
"epoch": 0.28,
"learning_rate": 3.592815541570574e-05,
"loss": 1.353,
"step": 108000
},
{
"epoch": 0.28,
"learning_rate": 3.586300798707475e-05,
"loss": 1.3577,
"step": 108500
},
{
"epoch": 0.28,
"learning_rate": 3.5797860558443756e-05,
"loss": 1.3528,
"step": 109000
},
{
"epoch": 0.29,
"learning_rate": 3.5732713129812766e-05,
"loss": 1.364,
"step": 109500
},
{
"epoch": 0.29,
"learning_rate": 3.5667565701181776e-05,
"loss": 1.3585,
"step": 110000
},
{
"epoch": 0.29,
"learning_rate": 3.560241827255079e-05,
"loss": 1.3559,
"step": 110500
},
{
"epoch": 0.29,
"learning_rate": 3.553727084391979e-05,
"loss": 1.3518,
"step": 111000
},
{
"epoch": 0.29,
"learning_rate": 3.54721234152888e-05,
"loss": 1.3575,
"step": 111500
},
{
"epoch": 0.29,
"learning_rate": 3.540697598665781e-05,
"loss": 1.3527,
"step": 112000
},
{
"epoch": 0.29,
"learning_rate": 3.534182855802682e-05,
"loss": 1.3525,
"step": 112500
},
{
"epoch": 0.29,
"learning_rate": 3.5276681129395825e-05,
"loss": 1.353,
"step": 113000
},
{
"epoch": 0.3,
"learning_rate": 3.5211533700764835e-05,
"loss": 1.3471,
"step": 113500
},
{
"epoch": 0.3,
"learning_rate": 3.514638627213384e-05,
"loss": 1.3439,
"step": 114000
},
{
"epoch": 0.3,
"learning_rate": 3.508123884350285e-05,
"loss": 1.3478,
"step": 114500
},
{
"epoch": 0.3,
"learning_rate": 3.501609141487185e-05,
"loss": 1.3502,
"step": 115000
},
{
"epoch": 0.3,
"learning_rate": 3.495094398624086e-05,
"loss": 1.3482,
"step": 115500
},
{
"epoch": 0.3,
"learning_rate": 3.488579655760987e-05,
"loss": 1.3565,
"step": 116000
},
{
"epoch": 0.3,
"learning_rate": 3.482064912897888e-05,
"loss": 1.3556,
"step": 116500
},
{
"epoch": 0.3,
"learning_rate": 3.475550170034789e-05,
"loss": 1.3455,
"step": 117000
},
{
"epoch": 0.31,
"learning_rate": 3.46903542717169e-05,
"loss": 1.3487,
"step": 117500
},
{
"epoch": 0.31,
"learning_rate": 3.462520684308591e-05,
"loss": 1.3422,
"step": 118000
},
{
"epoch": 0.31,
"learning_rate": 3.456005941445492e-05,
"loss": 1.3493,
"step": 118500
},
{
"epoch": 0.31,
"learning_rate": 3.449491198582392e-05,
"loss": 1.3547,
"step": 119000
},
{
"epoch": 0.31,
"learning_rate": 3.442976455719293e-05,
"loss": 1.3355,
"step": 119500
},
{
"epoch": 0.31,
"learning_rate": 3.4364617128561935e-05,
"loss": 1.3569,
"step": 120000
},
{
"epoch": 0.31,
"learning_rate": 3.4299469699930945e-05,
"loss": 1.3542,
"step": 120500
},
{
"epoch": 0.32,
"learning_rate": 3.423432227129995e-05,
"loss": 1.3408,
"step": 121000
},
{
"epoch": 0.32,
"learning_rate": 3.416917484266896e-05,
"loss": 1.3452,
"step": 121500
},
{
"epoch": 0.32,
"learning_rate": 3.410402741403797e-05,
"loss": 1.3521,
"step": 122000
},
{
"epoch": 0.32,
"learning_rate": 3.403887998540698e-05,
"loss": 1.3483,
"step": 122500
},
{
"epoch": 0.32,
"learning_rate": 3.3973732556775983e-05,
"loss": 1.3409,
"step": 123000
},
{
"epoch": 0.32,
"learning_rate": 3.3908585128144994e-05,
"loss": 1.3493,
"step": 123500
},
{
"epoch": 0.32,
"learning_rate": 3.3843437699514004e-05,
"loss": 1.3459,
"step": 124000
},
{
"epoch": 0.32,
"learning_rate": 3.3778290270883014e-05,
"loss": 1.3358,
"step": 124500
},
{
"epoch": 0.33,
"learning_rate": 3.371314284225202e-05,
"loss": 1.347,
"step": 125000
},
{
"epoch": 0.33,
"learning_rate": 3.364799541362103e-05,
"loss": 1.3453,
"step": 125500
},
{
"epoch": 0.33,
"learning_rate": 3.358284798499003e-05,
"loss": 1.3358,
"step": 126000
},
{
"epoch": 0.33,
"learning_rate": 3.351770055635904e-05,
"loss": 1.3601,
"step": 126500
},
{
"epoch": 0.33,
"learning_rate": 3.3452553127728046e-05,
"loss": 1.3471,
"step": 127000
},
{
"epoch": 0.33,
"learning_rate": 3.3387405699097056e-05,
"loss": 1.3505,
"step": 127500
},
{
"epoch": 0.33,
"learning_rate": 3.3322258270466066e-05,
"loss": 1.3415,
"step": 128000
},
{
"epoch": 0.33,
"learning_rate": 3.3257110841835077e-05,
"loss": 1.3301,
"step": 128500
},
{
"epoch": 0.34,
"learning_rate": 3.319196341320408e-05,
"loss": 1.3444,
"step": 129000
},
{
"epoch": 0.34,
"learning_rate": 3.312681598457309e-05,
"loss": 1.34,
"step": 129500
},
{
"epoch": 0.34,
"learning_rate": 3.30616685559421e-05,
"loss": 1.3383,
"step": 130000
},
{
"epoch": 0.34,
"learning_rate": 3.299652112731111e-05,
"loss": 1.3386,
"step": 130500
},
{
"epoch": 0.34,
"learning_rate": 3.2931373698680115e-05,
"loss": 1.3397,
"step": 131000
},
{
"epoch": 0.34,
"learning_rate": 3.2866226270049125e-05,
"loss": 1.3377,
"step": 131500
},
{
"epoch": 0.34,
"learning_rate": 3.280107884141813e-05,
"loss": 1.3377,
"step": 132000
},
{
"epoch": 0.35,
"learning_rate": 3.273593141278714e-05,
"loss": 1.3327,
"step": 132500
},
{
"epoch": 0.35,
"learning_rate": 3.267078398415614e-05,
"loss": 1.3333,
"step": 133000
},
{
"epoch": 0.35,
"learning_rate": 3.260563655552515e-05,
"loss": 1.3369,
"step": 133500
},
{
"epoch": 0.35,
"learning_rate": 3.254048912689416e-05,
"loss": 1.3389,
"step": 134000
},
{
"epoch": 0.35,
"learning_rate": 3.247534169826317e-05,
"loss": 1.3419,
"step": 134500
},
{
"epoch": 0.35,
"learning_rate": 3.241019426963218e-05,
"loss": 1.3362,
"step": 135000
},
{
"epoch": 0.35,
"learning_rate": 3.234504684100119e-05,
"loss": 1.338,
"step": 135500
},
{
"epoch": 0.35,
"learning_rate": 3.22798994123702e-05,
"loss": 1.3349,
"step": 136000
},
{
"epoch": 0.36,
"learning_rate": 3.221475198373921e-05,
"loss": 1.3299,
"step": 136500
},
{
"epoch": 0.36,
"learning_rate": 3.214960455510821e-05,
"loss": 1.3305,
"step": 137000
},
{
"epoch": 0.36,
"learning_rate": 3.208445712647722e-05,
"loss": 1.3395,
"step": 137500
},
{
"epoch": 0.36,
"learning_rate": 3.2019309697846225e-05,
"loss": 1.3348,
"step": 138000
},
{
"epoch": 0.36,
"learning_rate": 3.1954162269215235e-05,
"loss": 1.3358,
"step": 138500
},
{
"epoch": 0.36,
"learning_rate": 3.188901484058424e-05,
"loss": 1.3295,
"step": 139000
},
{
"epoch": 0.36,
"learning_rate": 3.182386741195325e-05,
"loss": 1.335,
"step": 139500
},
{
"epoch": 0.36,
"learning_rate": 3.175871998332226e-05,
"loss": 1.3326,
"step": 140000
},
{
"epoch": 0.37,
"learning_rate": 3.169357255469127e-05,
"loss": 1.3367,
"step": 140500
},
{
"epoch": 0.37,
"learning_rate": 3.162842512606027e-05,
"loss": 1.3372,
"step": 141000
},
{
"epoch": 0.37,
"learning_rate": 3.1563277697429284e-05,
"loss": 1.3337,
"step": 141500
},
{
"epoch": 0.37,
"learning_rate": 3.1498130268798294e-05,
"loss": 1.3295,
"step": 142000
},
{
"epoch": 0.37,
"learning_rate": 3.1432982840167304e-05,
"loss": 1.3445,
"step": 142500
},
{
"epoch": 0.37,
"learning_rate": 3.136783541153631e-05,
"loss": 1.33,
"step": 143000
},
{
"epoch": 0.37,
"learning_rate": 3.130268798290532e-05,
"loss": 1.3319,
"step": 143500
},
{
"epoch": 0.38,
"learning_rate": 3.123754055427432e-05,
"loss": 1.3295,
"step": 144000
},
{
"epoch": 0.38,
"learning_rate": 3.117239312564333e-05,
"loss": 1.3294,
"step": 144500
},
{
"epoch": 0.38,
"learning_rate": 3.1107245697012335e-05,
"loss": 1.3217,
"step": 145000
},
{
"epoch": 0.38,
"learning_rate": 3.1042098268381346e-05,
"loss": 1.3308,
"step": 145500
},
{
"epoch": 0.38,
"learning_rate": 3.0976950839750356e-05,
"loss": 1.3278,
"step": 146000
},
{
"epoch": 0.38,
"learning_rate": 3.0911803411119366e-05,
"loss": 1.3286,
"step": 146500
},
{
"epoch": 0.38,
"learning_rate": 3.084665598248837e-05,
"loss": 1.3352,
"step": 147000
},
{
"epoch": 0.38,
"learning_rate": 3.078150855385738e-05,
"loss": 1.3125,
"step": 147500
},
{
"epoch": 0.39,
"learning_rate": 3.071636112522639e-05,
"loss": 1.3303,
"step": 148000
},
{
"epoch": 0.39,
"learning_rate": 3.06512136965954e-05,
"loss": 1.3185,
"step": 148500
},
{
"epoch": 0.39,
"learning_rate": 3.0586066267964404e-05,
"loss": 1.3295,
"step": 149000
},
{
"epoch": 0.39,
"learning_rate": 3.0520918839333415e-05,
"loss": 1.3218,
"step": 149500
},
{
"epoch": 0.39,
"learning_rate": 3.045577141070242e-05,
"loss": 1.3252,
"step": 150000
},
{
"epoch": 0.39,
"learning_rate": 3.0390623982071432e-05,
"loss": 1.3241,
"step": 150500
},
{
"epoch": 0.39,
"learning_rate": 3.0325476553440436e-05,
"loss": 1.3232,
"step": 151000
},
{
"epoch": 0.39,
"learning_rate": 3.0260329124809446e-05,
"loss": 1.329,
"step": 151500
},
{
"epoch": 0.4,
"learning_rate": 3.0195181696178453e-05,
"loss": 1.33,
"step": 152000
},
{
"epoch": 0.4,
"learning_rate": 3.0130034267547463e-05,
"loss": 1.3331,
"step": 152500
},
{
"epoch": 0.4,
"learning_rate": 3.0064886838916467e-05,
"loss": 1.32,
"step": 153000
},
{
"epoch": 0.4,
"learning_rate": 2.9999739410285477e-05,
"loss": 1.316,
"step": 153500
},
{
"epoch": 0.4,
"learning_rate": 2.9934591981654487e-05,
"loss": 1.3224,
"step": 154000
},
{
"epoch": 0.4,
"learning_rate": 2.9869444553023494e-05,
"loss": 1.3297,
"step": 154500
},
{
"epoch": 0.4,
"learning_rate": 2.98042971243925e-05,
"loss": 1.3222,
"step": 155000
},
{
"epoch": 0.41,
"learning_rate": 2.9739149695761508e-05,
"loss": 1.3346,
"step": 155500
},
{
"epoch": 0.41,
"learning_rate": 2.9674002267130518e-05,
"loss": 1.3314,
"step": 156000
},
{
"epoch": 0.41,
"learning_rate": 2.960885483849953e-05,
"loss": 1.3266,
"step": 156500
},
{
"epoch": 0.41,
"learning_rate": 2.9543707409868532e-05,
"loss": 1.3189,
"step": 157000
},
{
"epoch": 0.41,
"learning_rate": 2.9478559981237542e-05,
"loss": 1.3265,
"step": 157500
},
{
"epoch": 0.41,
"learning_rate": 2.941341255260655e-05,
"loss": 1.3136,
"step": 158000
},
{
"epoch": 0.41,
"learning_rate": 2.934826512397556e-05,
"loss": 1.3229,
"step": 158500
},
{
"epoch": 0.41,
"learning_rate": 2.9283117695344563e-05,
"loss": 1.3244,
"step": 159000
},
{
"epoch": 0.42,
"learning_rate": 2.9217970266713574e-05,
"loss": 1.3237,
"step": 159500
},
{
"epoch": 0.42,
"learning_rate": 2.9152822838082584e-05,
"loss": 1.3136,
"step": 160000
},
{
"epoch": 0.42,
"learning_rate": 2.908767540945159e-05,
"loss": 1.3158,
"step": 160500
},
{
"epoch": 0.42,
"learning_rate": 2.9022527980820598e-05,
"loss": 1.3164,
"step": 161000
},
{
"epoch": 0.42,
"learning_rate": 2.8957380552189605e-05,
"loss": 1.3238,
"step": 161500
},
{
"epoch": 0.42,
"learning_rate": 2.8892233123558615e-05,
"loss": 1.3206,
"step": 162000
},
{
"epoch": 0.42,
"learning_rate": 2.8827085694927625e-05,
"loss": 1.3232,
"step": 162500
},
{
"epoch": 0.42,
"learning_rate": 2.876193826629663e-05,
"loss": 1.316,
"step": 163000
},
{
"epoch": 0.43,
"learning_rate": 2.869679083766564e-05,
"loss": 1.3094,
"step": 163500
},
{
"epoch": 0.43,
"learning_rate": 2.8631643409034646e-05,
"loss": 1.313,
"step": 164000
},
{
"epoch": 0.43,
"learning_rate": 2.8566495980403656e-05,
"loss": 1.3158,
"step": 164500
},
{
"epoch": 0.43,
"learning_rate": 2.850134855177266e-05,
"loss": 1.3211,
"step": 165000
},
{
"epoch": 0.43,
"learning_rate": 2.843620112314167e-05,
"loss": 1.3155,
"step": 165500
},
{
"epoch": 0.43,
"learning_rate": 2.837105369451068e-05,
"loss": 1.314,
"step": 166000
},
{
"epoch": 0.43,
"learning_rate": 2.830590626587969e-05,
"loss": 1.3217,
"step": 166500
},
{
"epoch": 0.44,
"learning_rate": 2.8240758837248694e-05,
"loss": 1.3236,
"step": 167000
},
{
"epoch": 0.44,
"learning_rate": 2.81756114086177e-05,
"loss": 1.3118,
"step": 167500
},
{
"epoch": 0.44,
"learning_rate": 2.811046397998671e-05,
"loss": 1.3163,
"step": 168000
},
{
"epoch": 0.44,
"learning_rate": 2.8045316551355722e-05,
"loss": 1.3117,
"step": 168500
},
{
"epoch": 0.44,
"learning_rate": 2.7980169122724725e-05,
"loss": 1.3104,
"step": 169000
},
{
"epoch": 0.44,
"learning_rate": 2.7915021694093736e-05,
"loss": 1.3209,
"step": 169500
},
{
"epoch": 0.44,
"learning_rate": 2.7849874265462743e-05,
"loss": 1.3155,
"step": 170000
},
{
"epoch": 0.44,
"learning_rate": 2.7784726836831753e-05,
"loss": 1.3046,
"step": 170500
},
{
"epoch": 0.45,
"learning_rate": 2.7719579408200757e-05,
"loss": 1.3179,
"step": 171000
},
{
"epoch": 0.45,
"learning_rate": 2.7654431979569767e-05,
"loss": 1.3151,
"step": 171500
},
{
"epoch": 0.45,
"learning_rate": 2.7589284550938777e-05,
"loss": 1.3135,
"step": 172000
},
{
"epoch": 0.45,
"learning_rate": 2.7524137122307787e-05,
"loss": 1.3182,
"step": 172500
},
{
"epoch": 0.45,
"learning_rate": 2.745898969367679e-05,
"loss": 1.3114,
"step": 173000
},
{
"epoch": 0.45,
"learning_rate": 2.7393842265045798e-05,
"loss": 1.3103,
"step": 173500
},
{
"epoch": 0.45,
"learning_rate": 2.7328694836414808e-05,
"loss": 1.3097,
"step": 174000
},
{
"epoch": 0.45,
"learning_rate": 2.726354740778382e-05,
"loss": 1.3122,
"step": 174500
},
{
"epoch": 0.46,
"learning_rate": 2.7198399979152822e-05,
"loss": 1.3144,
"step": 175000
},
{
"epoch": 0.46,
"learning_rate": 2.7133252550521832e-05,
"loss": 1.3087,
"step": 175500
},
{
"epoch": 0.46,
"learning_rate": 2.7068105121890843e-05,
"loss": 1.3114,
"step": 176000
},
{
"epoch": 0.46,
"learning_rate": 2.700295769325985e-05,
"loss": 1.3098,
"step": 176500
},
{
"epoch": 0.46,
"learning_rate": 2.6937810264628853e-05,
"loss": 1.3131,
"step": 177000
},
{
"epoch": 0.46,
"learning_rate": 2.6872662835997863e-05,
"loss": 1.3064,
"step": 177500
},
{
"epoch": 0.46,
"learning_rate": 2.6807515407366874e-05,
"loss": 1.3102,
"step": 178000
},
{
"epoch": 0.47,
"learning_rate": 2.6742367978735884e-05,
"loss": 1.31,
"step": 178500
},
{
"epoch": 0.47,
"learning_rate": 2.6677220550104888e-05,
"loss": 1.3068,
"step": 179000
},
{
"epoch": 0.47,
"learning_rate": 2.6612073121473895e-05,
"loss": 1.3148,
"step": 179500
},
{
"epoch": 0.47,
"learning_rate": 2.6546925692842905e-05,
"loss": 1.317,
"step": 180000
},
{
"epoch": 0.47,
"learning_rate": 2.6481778264211915e-05,
"loss": 1.3105,
"step": 180500
},
{
"epoch": 0.47,
"learning_rate": 2.641663083558092e-05,
"loss": 1.3166,
"step": 181000
},
{
"epoch": 0.47,
"learning_rate": 2.635148340694993e-05,
"loss": 1.3029,
"step": 181500
},
{
"epoch": 0.47,
"learning_rate": 2.628633597831894e-05,
"loss": 1.3018,
"step": 182000
},
{
"epoch": 0.48,
"learning_rate": 2.6221188549687946e-05,
"loss": 1.311,
"step": 182500
},
{
"epoch": 0.48,
"learning_rate": 2.615604112105695e-05,
"loss": 1.3071,
"step": 183000
},
{
"epoch": 0.48,
"learning_rate": 2.609089369242596e-05,
"loss": 1.3201,
"step": 183500
},
{
"epoch": 0.48,
"learning_rate": 2.602574626379497e-05,
"loss": 1.2955,
"step": 184000
},
{
"epoch": 0.48,
"learning_rate": 2.596059883516398e-05,
"loss": 1.3063,
"step": 184500
},
{
"epoch": 0.48,
"learning_rate": 2.5895451406532984e-05,
"loss": 1.3042,
"step": 185000
},
{
"epoch": 0.48,
"learning_rate": 2.583030397790199e-05,
"loss": 1.3157,
"step": 185500
},
{
"epoch": 0.48,
"learning_rate": 2.5765156549271e-05,
"loss": 1.3007,
"step": 186000
},
{
"epoch": 0.49,
"learning_rate": 2.5700009120640012e-05,
"loss": 1.3051,
"step": 186500
},
{
"epoch": 0.49,
"learning_rate": 2.5634861692009015e-05,
"loss": 1.3082,
"step": 187000
},
{
"epoch": 0.49,
"learning_rate": 2.5569714263378026e-05,
"loss": 1.3,
"step": 187500
},
{
"epoch": 0.49,
"learning_rate": 2.5504566834747036e-05,
"loss": 1.3076,
"step": 188000
},
{
"epoch": 0.49,
"learning_rate": 2.5439419406116043e-05,
"loss": 1.3042,
"step": 188500
},
{
"epoch": 0.49,
"learning_rate": 2.5374271977485046e-05,
"loss": 1.3045,
"step": 189000
},
{
"epoch": 0.49,
"learning_rate": 2.5309124548854057e-05,
"loss": 1.306,
"step": 189500
},
{
"epoch": 0.5,
"learning_rate": 2.5243977120223067e-05,
"loss": 1.3041,
"step": 190000
},
{
"epoch": 0.5,
"learning_rate": 2.5178829691592077e-05,
"loss": 1.3057,
"step": 190500
},
{
"epoch": 0.5,
"learning_rate": 2.511368226296108e-05,
"loss": 1.3108,
"step": 191000
},
{
"epoch": 0.5,
"learning_rate": 2.504853483433009e-05,
"loss": 1.3084,
"step": 191500
},
{
"epoch": 0.5,
"learning_rate": 2.4983387405699098e-05,
"loss": 1.3049,
"step": 192000
},
{
"epoch": 0.5,
"learning_rate": 2.4918239977068105e-05,
"loss": 1.3015,
"step": 192500
},
{
"epoch": 0.5,
"learning_rate": 2.4853092548437115e-05,
"loss": 1.3037,
"step": 193000
},
{
"epoch": 0.5,
"learning_rate": 2.4787945119806122e-05,
"loss": 1.3066,
"step": 193500
},
{
"epoch": 0.51,
"learning_rate": 2.4722797691175133e-05,
"loss": 1.3041,
"step": 194000
},
{
"epoch": 0.51,
"learning_rate": 2.465765026254414e-05,
"loss": 1.3004,
"step": 194500
},
{
"epoch": 0.51,
"learning_rate": 2.4592502833913146e-05,
"loss": 1.3052,
"step": 195000
},
{
"epoch": 0.51,
"learning_rate": 2.4527355405282153e-05,
"loss": 1.3044,
"step": 195500
},
{
"epoch": 0.51,
"learning_rate": 2.4462207976651164e-05,
"loss": 1.2971,
"step": 196000
},
{
"epoch": 0.51,
"learning_rate": 2.439706054802017e-05,
"loss": 1.3006,
"step": 196500
},
{
"epoch": 0.51,
"learning_rate": 2.433191311938918e-05,
"loss": 1.3022,
"step": 197000
},
{
"epoch": 0.51,
"learning_rate": 2.4266765690758188e-05,
"loss": 1.3031,
"step": 197500
},
{
"epoch": 0.52,
"learning_rate": 2.4201618262127195e-05,
"loss": 1.3019,
"step": 198000
},
{
"epoch": 0.52,
"learning_rate": 2.41364708334962e-05,
"loss": 1.2947,
"step": 198500
},
{
"epoch": 0.52,
"learning_rate": 2.4071323404865212e-05,
"loss": 1.2943,
"step": 199000
},
{
"epoch": 0.52,
"learning_rate": 2.400617597623422e-05,
"loss": 1.3004,
"step": 199500
},
{
"epoch": 0.52,
"learning_rate": 2.394102854760323e-05,
"loss": 1.2991,
"step": 200000
},
{
"epoch": 0.52,
"learning_rate": 2.3875881118972236e-05,
"loss": 1.2941,
"step": 200500
},
{
"epoch": 0.52,
"learning_rate": 2.3810733690341243e-05,
"loss": 1.3016,
"step": 201000
},
{
"epoch": 0.53,
"learning_rate": 2.374558626171025e-05,
"loss": 1.3067,
"step": 201500
},
{
"epoch": 0.53,
"learning_rate": 2.368043883307926e-05,
"loss": 1.2968,
"step": 202000
},
{
"epoch": 0.53,
"learning_rate": 2.3615291404448267e-05,
"loss": 1.2987,
"step": 202500
},
{
"epoch": 0.53,
"learning_rate": 2.3550143975817278e-05,
"loss": 1.292,
"step": 203000
},
{
"epoch": 0.53,
"learning_rate": 2.3484996547186284e-05,
"loss": 1.3013,
"step": 203500
},
{
"epoch": 0.53,
"learning_rate": 2.341984911855529e-05,
"loss": 1.2993,
"step": 204000
},
{
"epoch": 0.53,
"learning_rate": 2.33547016899243e-05,
"loss": 1.289,
"step": 204500
},
{
"epoch": 0.53,
"learning_rate": 2.328955426129331e-05,
"loss": 1.2991,
"step": 205000
},
{
"epoch": 0.54,
"learning_rate": 2.3224406832662316e-05,
"loss": 1.3004,
"step": 205500
},
{
"epoch": 0.54,
"learning_rate": 2.3159259404031326e-05,
"loss": 1.3011,
"step": 206000
},
{
"epoch": 0.54,
"learning_rate": 2.3094111975400333e-05,
"loss": 1.3007,
"step": 206500
},
{
"epoch": 0.54,
"learning_rate": 2.302896454676934e-05,
"loss": 1.292,
"step": 207000
},
{
"epoch": 0.54,
"learning_rate": 2.2963817118138347e-05,
"loss": 1.2954,
"step": 207500
},
{
"epoch": 0.54,
"learning_rate": 2.2898669689507357e-05,
"loss": 1.2993,
"step": 208000
},
{
"epoch": 0.54,
"learning_rate": 2.2833522260876364e-05,
"loss": 1.2923,
"step": 208500
},
{
"epoch": 0.54,
"learning_rate": 2.2768374832245374e-05,
"loss": 1.2959,
"step": 209000
},
{
"epoch": 0.55,
"learning_rate": 2.270322740361438e-05,
"loss": 1.2818,
"step": 209500
},
{
"epoch": 0.55,
"learning_rate": 2.2638079974983388e-05,
"loss": 1.2905,
"step": 210000
},
{
"epoch": 0.55,
"learning_rate": 2.2572932546352395e-05,
"loss": 1.295,
"step": 210500
},
{
"epoch": 0.55,
"learning_rate": 2.2507785117721405e-05,
"loss": 1.2953,
"step": 211000
},
{
"epoch": 0.55,
"learning_rate": 2.2442637689090412e-05,
"loss": 1.2963,
"step": 211500
},
{
"epoch": 0.55,
"learning_rate": 2.2377490260459422e-05,
"loss": 1.2898,
"step": 212000
},
{
"epoch": 0.55,
"learning_rate": 2.231234283182843e-05,
"loss": 1.2868,
"step": 212500
},
{
"epoch": 0.56,
"learning_rate": 2.2247195403197436e-05,
"loss": 1.2888,
"step": 213000
},
{
"epoch": 0.56,
"learning_rate": 2.2182047974566443e-05,
"loss": 1.2844,
"step": 213500
},
{
"epoch": 0.56,
"learning_rate": 2.2116900545935454e-05,
"loss": 1.2876,
"step": 214000
},
{
"epoch": 0.56,
"learning_rate": 2.205175311730446e-05,
"loss": 1.2836,
"step": 214500
},
{
"epoch": 0.56,
"learning_rate": 2.198660568867347e-05,
"loss": 1.291,
"step": 215000
},
{
"epoch": 0.56,
"learning_rate": 2.1921458260042478e-05,
"loss": 1.2887,
"step": 215500
},
{
"epoch": 0.56,
"learning_rate": 2.1856310831411485e-05,
"loss": 1.2891,
"step": 216000
},
{
"epoch": 0.56,
"learning_rate": 2.179116340278049e-05,
"loss": 1.2859,
"step": 216500
},
{
"epoch": 0.57,
"learning_rate": 2.1726015974149502e-05,
"loss": 1.2833,
"step": 217000
},
{
"epoch": 0.57,
"learning_rate": 2.166086854551851e-05,
"loss": 1.2901,
"step": 217500
},
{
"epoch": 0.57,
"learning_rate": 2.159572111688752e-05,
"loss": 1.2928,
"step": 218000
},
{
"epoch": 0.57,
"learning_rate": 2.1530573688256526e-05,
"loss": 1.2991,
"step": 218500
},
{
"epoch": 0.57,
"learning_rate": 2.1465426259625533e-05,
"loss": 1.2895,
"step": 219000
},
{
"epoch": 0.57,
"learning_rate": 2.140027883099454e-05,
"loss": 1.2908,
"step": 219500
},
{
"epoch": 0.57,
"learning_rate": 2.133513140236355e-05,
"loss": 1.2973,
"step": 220000
},
{
"epoch": 0.57,
"learning_rate": 2.1269983973732557e-05,
"loss": 1.2887,
"step": 220500
},
{
"epoch": 0.58,
"learning_rate": 2.1204836545101567e-05,
"loss": 1.2807,
"step": 221000
},
{
"epoch": 0.58,
"learning_rate": 2.1139689116470574e-05,
"loss": 1.2805,
"step": 221500
},
{
"epoch": 0.58,
"learning_rate": 2.107454168783958e-05,
"loss": 1.2887,
"step": 222000
},
{
"epoch": 0.58,
"learning_rate": 2.1009394259208588e-05,
"loss": 1.2902,
"step": 222500
},
{
"epoch": 0.58,
"learning_rate": 2.09442468305776e-05,
"loss": 1.2915,
"step": 223000
},
{
"epoch": 0.58,
"learning_rate": 2.0879099401946605e-05,
"loss": 1.2829,
"step": 223500
},
{
"epoch": 0.58,
"learning_rate": 2.0813951973315616e-05,
"loss": 1.2915,
"step": 224000
},
{
"epoch": 0.59,
"learning_rate": 2.0748804544684623e-05,
"loss": 1.286,
"step": 224500
},
{
"epoch": 0.59,
"learning_rate": 2.0683657116053633e-05,
"loss": 1.2893,
"step": 225000
},
{
"epoch": 0.59,
"learning_rate": 2.0618509687422637e-05,
"loss": 1.2825,
"step": 225500
},
{
"epoch": 0.59,
"learning_rate": 2.0553362258791647e-05,
"loss": 1.2829,
"step": 226000
},
{
"epoch": 0.59,
"learning_rate": 2.0488214830160654e-05,
"loss": 1.2924,
"step": 226500
},
{
"epoch": 0.59,
"learning_rate": 2.0423067401529664e-05,
"loss": 1.2794,
"step": 227000
},
{
"epoch": 0.59,
"learning_rate": 2.035791997289867e-05,
"loss": 1.2912,
"step": 227500
},
{
"epoch": 0.59,
"learning_rate": 2.029277254426768e-05,
"loss": 1.2656,
"step": 228000
},
{
"epoch": 0.6,
"learning_rate": 2.0227625115636685e-05,
"loss": 1.2763,
"step": 228500
},
{
"epoch": 0.6,
"learning_rate": 2.0162477687005695e-05,
"loss": 1.2925,
"step": 229000
},
{
"epoch": 0.6,
"learning_rate": 2.0097330258374702e-05,
"loss": 1.28,
"step": 229500
},
{
"epoch": 0.6,
"learning_rate": 2.0032182829743712e-05,
"loss": 1.2827,
"step": 230000
},
{
"epoch": 0.6,
"learning_rate": 1.996703540111272e-05,
"loss": 1.2835,
"step": 230500
},
{
"epoch": 0.6,
"learning_rate": 1.990188797248173e-05,
"loss": 1.2814,
"step": 231000
},
{
"epoch": 0.6,
"learning_rate": 1.9836740543850733e-05,
"loss": 1.2853,
"step": 231500
},
{
"epoch": 0.6,
"learning_rate": 1.9771593115219743e-05,
"loss": 1.2748,
"step": 232000
},
{
"epoch": 0.61,
"learning_rate": 1.970644568658875e-05,
"loss": 1.2812,
"step": 232500
},
{
"epoch": 0.61,
"learning_rate": 1.964129825795776e-05,
"loss": 1.284,
"step": 233000
},
{
"epoch": 0.61,
"learning_rate": 1.9576150829326768e-05,
"loss": 1.2796,
"step": 233500
},
{
"epoch": 0.61,
"learning_rate": 1.9511003400695778e-05,
"loss": 1.2759,
"step": 234000
},
{
"epoch": 0.61,
"learning_rate": 1.944585597206478e-05,
"loss": 1.285,
"step": 234500
},
{
"epoch": 0.61,
"learning_rate": 1.9380708543433792e-05,
"loss": 1.2847,
"step": 235000
},
{
"epoch": 0.61,
"learning_rate": 1.93155611148028e-05,
"loss": 1.2795,
"step": 235500
},
{
"epoch": 0.61,
"learning_rate": 1.925041368617181e-05,
"loss": 1.2723,
"step": 236000
},
{
"epoch": 0.62,
"learning_rate": 1.9185266257540816e-05,
"loss": 1.2786,
"step": 236500
},
{
"epoch": 0.62,
"learning_rate": 1.9120118828909826e-05,
"loss": 1.272,
"step": 237000
},
{
"epoch": 0.62,
"learning_rate": 1.9054971400278833e-05,
"loss": 1.2739,
"step": 237500
},
{
"epoch": 0.62,
"learning_rate": 1.898982397164784e-05,
"loss": 1.2694,
"step": 238000
},
{
"epoch": 0.62,
"learning_rate": 1.8924676543016847e-05,
"loss": 1.2819,
"step": 238500
},
{
"epoch": 0.62,
"learning_rate": 1.8859529114385857e-05,
"loss": 1.2792,
"step": 239000
},
{
"epoch": 0.62,
"learning_rate": 1.8794381685754864e-05,
"loss": 1.2849,
"step": 239500
},
{
"epoch": 0.63,
"learning_rate": 1.8729234257123875e-05,
"loss": 1.2889,
"step": 240000
},
{
"epoch": 0.63,
"learning_rate": 1.866408682849288e-05,
"loss": 1.274,
"step": 240500
},
{
"epoch": 0.63,
"learning_rate": 1.859893939986189e-05,
"loss": 1.2778,
"step": 241000
},
{
"epoch": 0.63,
"learning_rate": 1.8533791971230895e-05,
"loss": 1.2751,
"step": 241500
},
{
"epoch": 0.63,
"learning_rate": 1.8468644542599906e-05,
"loss": 1.2791,
"step": 242000
},
{
"epoch": 0.63,
"learning_rate": 1.8403497113968913e-05,
"loss": 1.2803,
"step": 242500
},
{
"epoch": 0.63,
"learning_rate": 1.8338349685337923e-05,
"loss": 1.2785,
"step": 243000
},
{
"epoch": 0.63,
"learning_rate": 1.827320225670693e-05,
"loss": 1.272,
"step": 243500
},
{
"epoch": 0.64,
"learning_rate": 1.8208054828075937e-05,
"loss": 1.267,
"step": 244000
},
{
"epoch": 0.64,
"learning_rate": 1.8142907399444944e-05,
"loss": 1.2723,
"step": 244500
},
{
"epoch": 0.64,
"learning_rate": 1.8077759970813954e-05,
"loss": 1.2766,
"step": 245000
},
{
"epoch": 0.64,
"learning_rate": 1.801261254218296e-05,
"loss": 1.2756,
"step": 245500
},
{
"epoch": 0.64,
"learning_rate": 1.794746511355197e-05,
"loss": 1.275,
"step": 246000
},
{
"epoch": 0.64,
"learning_rate": 1.7882317684920978e-05,
"loss": 1.2775,
"step": 246500
},
{
"epoch": 0.64,
"learning_rate": 1.7817170256289985e-05,
"loss": 1.2707,
"step": 247000
},
{
"epoch": 0.64,
"learning_rate": 1.7752022827658992e-05,
"loss": 1.273,
"step": 247500
},
{
"epoch": 0.65,
"learning_rate": 1.7686875399028002e-05,
"loss": 1.2714,
"step": 248000
},
{
"epoch": 0.65,
"learning_rate": 1.762172797039701e-05,
"loss": 1.2758,
"step": 248500
},
{
"epoch": 0.65,
"learning_rate": 1.755658054176602e-05,
"loss": 1.2756,
"step": 249000
},
{
"epoch": 0.65,
"learning_rate": 1.7491433113135026e-05,
"loss": 1.2762,
"step": 249500
},
{
"epoch": 0.65,
"learning_rate": 1.7426285684504033e-05,
"loss": 1.2763,
"step": 250000
},
{
"epoch": 0.65,
"learning_rate": 1.736113825587304e-05,
"loss": 1.2766,
"step": 250500
},
{
"epoch": 0.65,
"learning_rate": 1.729599082724205e-05,
"loss": 1.2707,
"step": 251000
},
{
"epoch": 0.66,
"learning_rate": 1.7230843398611058e-05,
"loss": 1.2719,
"step": 251500
},
{
"epoch": 0.66,
"learning_rate": 1.7165695969980068e-05,
"loss": 1.2686,
"step": 252000
},
{
"epoch": 0.66,
"learning_rate": 1.7100548541349075e-05,
"loss": 1.2693,
"step": 252500
},
{
"epoch": 0.66,
"learning_rate": 1.7035401112718082e-05,
"loss": 1.2699,
"step": 253000
},
{
"epoch": 0.66,
"learning_rate": 1.697025368408709e-05,
"loss": 1.2696,
"step": 253500
},
{
"epoch": 0.66,
"learning_rate": 1.69051062554561e-05,
"loss": 1.2693,
"step": 254000
},
{
"epoch": 0.66,
"learning_rate": 1.6839958826825106e-05,
"loss": 1.2666,
"step": 254500
},
{
"epoch": 0.66,
"learning_rate": 1.6774811398194116e-05,
"loss": 1.2697,
"step": 255000
},
{
"epoch": 0.67,
"learning_rate": 1.6709663969563123e-05,
"loss": 1.2691,
"step": 255500
},
{
"epoch": 0.67,
"learning_rate": 1.664451654093213e-05,
"loss": 1.2669,
"step": 256000
},
{
"epoch": 0.67,
"learning_rate": 1.6579369112301137e-05,
"loss": 1.2663,
"step": 256500
},
{
"epoch": 0.67,
"learning_rate": 1.6514221683670147e-05,
"loss": 1.2694,
"step": 257000
},
{
"epoch": 0.67,
"learning_rate": 1.6449074255039154e-05,
"loss": 1.2706,
"step": 257500
},
{
"epoch": 0.67,
"learning_rate": 1.6383926826408164e-05,
"loss": 1.2704,
"step": 258000
},
{
"epoch": 0.67,
"learning_rate": 1.631877939777717e-05,
"loss": 1.258,
"step": 258500
},
{
"epoch": 0.67,
"learning_rate": 1.625363196914618e-05,
"loss": 1.2664,
"step": 259000
},
{
"epoch": 0.68,
"learning_rate": 1.6188484540515185e-05,
"loss": 1.2659,
"step": 259500
},
{
"epoch": 0.68,
"learning_rate": 1.6123337111884196e-05,
"loss": 1.2677,
"step": 260000
},
{
"epoch": 0.68,
"learning_rate": 1.6058189683253202e-05,
"loss": 1.2664,
"step": 260500
},
{
"epoch": 0.68,
"learning_rate": 1.5993042254622213e-05,
"loss": 1.2661,
"step": 261000
},
{
"epoch": 0.68,
"learning_rate": 1.592789482599122e-05,
"loss": 1.267,
"step": 261500
},
{
"epoch": 0.68,
"learning_rate": 1.5862747397360227e-05,
"loss": 1.2642,
"step": 262000
},
{
"epoch": 0.68,
"learning_rate": 1.5797599968729234e-05,
"loss": 1.2681,
"step": 262500
},
{
"epoch": 0.69,
"learning_rate": 1.5732452540098244e-05,
"loss": 1.2618,
"step": 263000
},
{
"epoch": 0.69,
"learning_rate": 1.566730511146725e-05,
"loss": 1.2658,
"step": 263500
},
{
"epoch": 0.69,
"learning_rate": 1.560215768283626e-05,
"loss": 1.264,
"step": 264000
},
{
"epoch": 0.69,
"learning_rate": 1.5537010254205268e-05,
"loss": 1.2676,
"step": 264500
},
{
"epoch": 0.69,
"learning_rate": 1.5471862825574275e-05,
"loss": 1.2665,
"step": 265000
},
{
"epoch": 0.69,
"learning_rate": 1.5406715396943282e-05,
"loss": 1.2559,
"step": 265500
},
{
"epoch": 0.69,
"learning_rate": 1.5341567968312292e-05,
"loss": 1.266,
"step": 266000
},
{
"epoch": 0.69,
"learning_rate": 1.52764205396813e-05,
"loss": 1.2616,
"step": 266500
},
{
"epoch": 0.7,
"learning_rate": 1.521127311105031e-05,
"loss": 1.2641,
"step": 267000
},
{
"epoch": 0.7,
"learning_rate": 1.5146125682419315e-05,
"loss": 1.2645,
"step": 267500
},
{
"epoch": 0.7,
"learning_rate": 1.5080978253788325e-05,
"loss": 1.2654,
"step": 268000
},
{
"epoch": 0.7,
"learning_rate": 1.5015830825157332e-05,
"loss": 1.2604,
"step": 268500
},
{
"epoch": 0.7,
"learning_rate": 1.495068339652634e-05,
"loss": 1.2601,
"step": 269000
},
{
"epoch": 0.7,
"learning_rate": 1.4885535967895347e-05,
"loss": 1.2642,
"step": 269500
},
{
"epoch": 0.7,
"learning_rate": 1.4820388539264358e-05,
"loss": 1.2579,
"step": 270000
},
{
"epoch": 0.7,
"learning_rate": 1.4755241110633363e-05,
"loss": 1.2642,
"step": 270500
},
{
"epoch": 0.71,
"learning_rate": 1.4690093682002373e-05,
"loss": 1.2728,
"step": 271000
},
{
"epoch": 0.71,
"learning_rate": 1.462494625337138e-05,
"loss": 1.265,
"step": 271500
},
{
"epoch": 0.71,
"learning_rate": 1.4559798824740389e-05,
"loss": 1.2657,
"step": 272000
},
{
"epoch": 0.71,
"learning_rate": 1.4494651396109396e-05,
"loss": 1.2748,
"step": 272500
},
{
"epoch": 0.71,
"learning_rate": 1.4429503967478406e-05,
"loss": 1.2682,
"step": 273000
},
{
"epoch": 0.71,
"learning_rate": 1.4364356538847413e-05,
"loss": 1.2628,
"step": 273500
},
{
"epoch": 0.71,
"learning_rate": 1.4299209110216422e-05,
"loss": 1.2729,
"step": 274000
},
{
"epoch": 0.72,
"learning_rate": 1.4234061681585429e-05,
"loss": 1.2698,
"step": 274500
},
{
"epoch": 0.72,
"learning_rate": 1.4168914252954437e-05,
"loss": 1.263,
"step": 275000
},
{
"epoch": 0.72,
"learning_rate": 1.4103766824323444e-05,
"loss": 1.2563,
"step": 275500
},
{
"epoch": 0.72,
"learning_rate": 1.4038619395692454e-05,
"loss": 1.2606,
"step": 276000
},
{
"epoch": 0.72,
"learning_rate": 1.3973471967061461e-05,
"loss": 1.2605,
"step": 276500
},
{
"epoch": 0.72,
"learning_rate": 1.390832453843047e-05,
"loss": 1.2604,
"step": 277000
},
{
"epoch": 0.72,
"learning_rate": 1.3843177109799477e-05,
"loss": 1.2627,
"step": 277500
},
{
"epoch": 0.72,
"learning_rate": 1.3778029681168485e-05,
"loss": 1.2586,
"step": 278000
},
{
"epoch": 0.73,
"learning_rate": 1.3712882252537492e-05,
"loss": 1.2617,
"step": 278500
},
{
"epoch": 0.73,
"learning_rate": 1.3647734823906503e-05,
"loss": 1.2648,
"step": 279000
},
{
"epoch": 0.73,
"learning_rate": 1.358258739527551e-05,
"loss": 1.2552,
"step": 279500
},
{
"epoch": 0.73,
"learning_rate": 1.3517439966644518e-05,
"loss": 1.2602,
"step": 280000
},
{
"epoch": 0.73,
"learning_rate": 1.3452292538013525e-05,
"loss": 1.2603,
"step": 280500
},
{
"epoch": 0.73,
"learning_rate": 1.3387145109382534e-05,
"loss": 1.2644,
"step": 281000
},
{
"epoch": 0.73,
"learning_rate": 1.332199768075154e-05,
"loss": 1.2553,
"step": 281500
},
{
"epoch": 0.73,
"learning_rate": 1.3256850252120551e-05,
"loss": 1.2513,
"step": 282000
},
{
"epoch": 0.74,
"learning_rate": 1.3191702823489558e-05,
"loss": 1.2441,
"step": 282500
},
{
"epoch": 0.74,
"learning_rate": 1.3126555394858567e-05,
"loss": 1.2602,
"step": 283000
},
{
"epoch": 0.74,
"learning_rate": 1.3061407966227573e-05,
"loss": 1.2647,
"step": 283500
},
{
"epoch": 0.74,
"learning_rate": 1.2996260537596582e-05,
"loss": 1.258,
"step": 284000
},
{
"epoch": 0.74,
"learning_rate": 1.2931113108965589e-05,
"loss": 1.2548,
"step": 284500
},
{
"epoch": 0.74,
"learning_rate": 1.28659656803346e-05,
"loss": 1.2598,
"step": 285000
},
{
"epoch": 0.74,
"learning_rate": 1.2800818251703606e-05,
"loss": 1.2573,
"step": 285500
},
{
"epoch": 0.75,
"learning_rate": 1.2735670823072615e-05,
"loss": 1.2622,
"step": 286000
},
{
"epoch": 0.75,
"learning_rate": 1.2670523394441622e-05,
"loss": 1.2579,
"step": 286500
},
{
"epoch": 0.75,
"learning_rate": 1.2605375965810632e-05,
"loss": 1.2516,
"step": 287000
},
{
"epoch": 0.75,
"learning_rate": 1.2540228537179637e-05,
"loss": 1.2547,
"step": 287500
},
{
"epoch": 0.75,
"learning_rate": 1.2475081108548646e-05,
"loss": 1.2576,
"step": 288000
},
{
"epoch": 0.75,
"learning_rate": 1.2409933679917655e-05,
"loss": 1.2495,
"step": 288500
},
{
"epoch": 0.75,
"learning_rate": 1.2344786251286662e-05,
"loss": 1.2578,
"step": 289000
},
{
"epoch": 0.75,
"learning_rate": 1.227963882265567e-05,
"loss": 1.2646,
"step": 289500
},
{
"epoch": 0.76,
"learning_rate": 1.2214491394024679e-05,
"loss": 1.2638,
"step": 290000
},
{
"epoch": 0.76,
"learning_rate": 1.2149343965393686e-05,
"loss": 1.2554,
"step": 290500
},
{
"epoch": 0.76,
"learning_rate": 1.2084196536762694e-05,
"loss": 1.2501,
"step": 291000
},
{
"epoch": 0.76,
"learning_rate": 1.2019049108131703e-05,
"loss": 1.2508,
"step": 291500
},
{
"epoch": 0.76,
"learning_rate": 1.195390167950071e-05,
"loss": 1.2557,
"step": 292000
},
{
"epoch": 0.76,
"learning_rate": 1.1888754250869718e-05,
"loss": 1.2434,
"step": 292500
},
{
"epoch": 0.76,
"learning_rate": 1.1823606822238727e-05,
"loss": 1.2519,
"step": 293000
},
{
"epoch": 0.76,
"learning_rate": 1.1758459393607734e-05,
"loss": 1.2475,
"step": 293500
},
{
"epoch": 0.77,
"learning_rate": 1.1693311964976743e-05,
"loss": 1.2479,
"step": 294000
},
{
"epoch": 0.77,
"learning_rate": 1.1628164536345751e-05,
"loss": 1.2572,
"step": 294500
},
{
"epoch": 0.77,
"learning_rate": 1.1563017107714758e-05,
"loss": 1.2527,
"step": 295000
},
{
"epoch": 0.77,
"learning_rate": 1.1497869679083767e-05,
"loss": 1.2505,
"step": 295500
},
{
"epoch": 0.77,
"learning_rate": 1.1432722250452775e-05,
"loss": 1.2472,
"step": 296000
},
{
"epoch": 0.77,
"learning_rate": 1.1367574821821782e-05,
"loss": 1.2524,
"step": 296500
},
{
"epoch": 0.77,
"learning_rate": 1.1302427393190791e-05,
"loss": 1.2548,
"step": 297000
},
{
"epoch": 0.78,
"learning_rate": 1.12372799645598e-05,
"loss": 1.2437,
"step": 297500
},
{
"epoch": 0.78,
"learning_rate": 1.1172132535928808e-05,
"loss": 1.2503,
"step": 298000
},
{
"epoch": 0.78,
"learning_rate": 1.1106985107297815e-05,
"loss": 1.259,
"step": 298500
},
{
"epoch": 0.78,
"learning_rate": 1.1041837678666824e-05,
"loss": 1.2518,
"step": 299000
},
{
"epoch": 0.78,
"learning_rate": 1.0976690250035832e-05,
"loss": 1.2511,
"step": 299500
},
{
"epoch": 0.78,
"learning_rate": 1.091154282140484e-05,
"loss": 1.2536,
"step": 300000
},
{
"epoch": 0.78,
"learning_rate": 1.0846395392773848e-05,
"loss": 1.2522,
"step": 300500
},
{
"epoch": 0.78,
"learning_rate": 1.0781247964142856e-05,
"loss": 1.2527,
"step": 301000
},
{
"epoch": 0.79,
"learning_rate": 1.0716100535511863e-05,
"loss": 1.2461,
"step": 301500
},
{
"epoch": 0.79,
"learning_rate": 1.0650953106880872e-05,
"loss": 1.2507,
"step": 302000
},
{
"epoch": 0.79,
"learning_rate": 1.058580567824988e-05,
"loss": 1.2536,
"step": 302500
},
{
"epoch": 0.79,
"learning_rate": 1.0520658249618888e-05,
"loss": 1.2427,
"step": 303000
},
{
"epoch": 0.79,
"learning_rate": 1.0455510820987896e-05,
"loss": 1.2488,
"step": 303500
},
{
"epoch": 0.79,
"learning_rate": 1.0390363392356905e-05,
"loss": 1.2536,
"step": 304000
},
{
"epoch": 0.79,
"learning_rate": 1.0325215963725912e-05,
"loss": 1.2464,
"step": 304500
},
{
"epoch": 0.79,
"learning_rate": 1.026006853509492e-05,
"loss": 1.2432,
"step": 305000
},
{
"epoch": 0.8,
"learning_rate": 1.0194921106463929e-05,
"loss": 1.2573,
"step": 305500
},
{
"epoch": 0.8,
"learning_rate": 1.0129773677832936e-05,
"loss": 1.2486,
"step": 306000
},
{
"epoch": 0.8,
"learning_rate": 1.0064626249201944e-05,
"loss": 1.2575,
"step": 306500
},
{
"epoch": 0.8,
"learning_rate": 9.999478820570953e-06,
"loss": 1.2395,
"step": 307000
},
{
"epoch": 0.8,
"learning_rate": 9.93433139193996e-06,
"loss": 1.2447,
"step": 307500
},
{
"epoch": 0.8,
"learning_rate": 9.869183963308969e-06,
"loss": 1.252,
"step": 308000
},
{
"epoch": 0.8,
"learning_rate": 9.804036534677977e-06,
"loss": 1.2417,
"step": 308500
},
{
"epoch": 0.81,
"learning_rate": 9.738889106046984e-06,
"loss": 1.2573,
"step": 309000
},
{
"epoch": 0.81,
"learning_rate": 9.673741677415993e-06,
"loss": 1.2414,
"step": 309500
},
{
"epoch": 0.81,
"learning_rate": 9.608594248785001e-06,
"loss": 1.248,
"step": 310000
},
{
"epoch": 0.81,
"learning_rate": 9.543446820154008e-06,
"loss": 1.2386,
"step": 310500
},
{
"epoch": 0.81,
"learning_rate": 9.478299391523017e-06,
"loss": 1.258,
"step": 311000
},
{
"epoch": 0.81,
"learning_rate": 9.413151962892026e-06,
"loss": 1.2442,
"step": 311500
},
{
"epoch": 0.81,
"learning_rate": 9.348004534261033e-06,
"loss": 1.2456,
"step": 312000
},
{
"epoch": 0.81,
"learning_rate": 9.282857105630041e-06,
"loss": 1.2505,
"step": 312500
},
{
"epoch": 0.82,
"learning_rate": 9.21770967699905e-06,
"loss": 1.2391,
"step": 313000
},
{
"epoch": 0.82,
"learning_rate": 9.152562248368057e-06,
"loss": 1.2483,
"step": 313500
},
{
"epoch": 0.82,
"learning_rate": 9.087414819737065e-06,
"loss": 1.2432,
"step": 314000
},
{
"epoch": 0.82,
"learning_rate": 9.022267391106074e-06,
"loss": 1.2428,
"step": 314500
},
{
"epoch": 0.82,
"learning_rate": 8.95711996247508e-06,
"loss": 1.2468,
"step": 315000
},
{
"epoch": 0.82,
"learning_rate": 8.89197253384409e-06,
"loss": 1.2469,
"step": 315500
},
{
"epoch": 0.82,
"learning_rate": 8.826825105213098e-06,
"loss": 1.2465,
"step": 316000
},
{
"epoch": 0.82,
"learning_rate": 8.761677676582105e-06,
"loss": 1.2313,
"step": 316500
},
{
"epoch": 0.83,
"learning_rate": 8.696530247951114e-06,
"loss": 1.2507,
"step": 317000
},
{
"epoch": 0.83,
"learning_rate": 8.631382819320122e-06,
"loss": 1.2452,
"step": 317500
},
{
"epoch": 0.83,
"learning_rate": 8.566235390689129e-06,
"loss": 1.2403,
"step": 318000
},
{
"epoch": 0.83,
"learning_rate": 8.501087962058138e-06,
"loss": 1.2561,
"step": 318500
},
{
"epoch": 0.83,
"learning_rate": 8.435940533427146e-06,
"loss": 1.2514,
"step": 319000
},
{
"epoch": 0.83,
"learning_rate": 8.370793104796153e-06,
"loss": 1.2375,
"step": 319500
},
{
"epoch": 0.83,
"learning_rate": 8.305645676165162e-06,
"loss": 1.245,
"step": 320000
},
{
"epoch": 0.84,
"learning_rate": 8.24049824753417e-06,
"loss": 1.2454,
"step": 320500
},
{
"epoch": 0.84,
"learning_rate": 8.175350818903177e-06,
"loss": 1.2389,
"step": 321000
},
{
"epoch": 0.84,
"learning_rate": 8.110203390272186e-06,
"loss": 1.241,
"step": 321500
},
{
"epoch": 0.84,
"learning_rate": 8.045055961641195e-06,
"loss": 1.2465,
"step": 322000
},
{
"epoch": 0.84,
"learning_rate": 7.979908533010202e-06,
"loss": 1.2451,
"step": 322500
},
{
"epoch": 0.84,
"learning_rate": 7.91476110437921e-06,
"loss": 1.2396,
"step": 323000
},
{
"epoch": 0.84,
"learning_rate": 7.849613675748219e-06,
"loss": 1.2392,
"step": 323500
},
{
"epoch": 0.84,
"learning_rate": 7.784466247117227e-06,
"loss": 1.2431,
"step": 324000
},
{
"epoch": 0.85,
"learning_rate": 7.719318818486234e-06,
"loss": 1.2502,
"step": 324500
},
{
"epoch": 0.85,
"learning_rate": 7.654171389855243e-06,
"loss": 1.2453,
"step": 325000
},
{
"epoch": 0.85,
"learning_rate": 7.589023961224251e-06,
"loss": 1.237,
"step": 325500
},
{
"epoch": 0.85,
"learning_rate": 7.5238765325932586e-06,
"loss": 1.2242,
"step": 326000
},
{
"epoch": 0.85,
"learning_rate": 7.458729103962267e-06,
"loss": 1.2353,
"step": 326500
},
{
"epoch": 0.85,
"learning_rate": 7.393581675331275e-06,
"loss": 1.239,
"step": 327000
},
{
"epoch": 0.85,
"learning_rate": 7.328434246700283e-06,
"loss": 1.2448,
"step": 327500
},
{
"epoch": 0.85,
"learning_rate": 7.263286818069291e-06,
"loss": 1.2389,
"step": 328000
},
{
"epoch": 0.86,
"learning_rate": 7.198139389438299e-06,
"loss": 1.2387,
"step": 328500
},
{
"epoch": 0.86,
"learning_rate": 7.132991960807307e-06,
"loss": 1.242,
"step": 329000
},
{
"epoch": 0.86,
"learning_rate": 7.0678445321763155e-06,
"loss": 1.2347,
"step": 329500
},
{
"epoch": 0.86,
"learning_rate": 7.002697103545323e-06,
"loss": 1.2357,
"step": 330000
},
{
"epoch": 0.86,
"learning_rate": 6.937549674914332e-06,
"loss": 1.2378,
"step": 330500
},
{
"epoch": 0.86,
"learning_rate": 6.87240224628334e-06,
"loss": 1.2436,
"step": 331000
},
{
"epoch": 0.86,
"learning_rate": 6.8072548176523474e-06,
"loss": 1.2335,
"step": 331500
},
{
"epoch": 0.87,
"learning_rate": 6.742107389021356e-06,
"loss": 1.2444,
"step": 332000
},
{
"epoch": 0.87,
"learning_rate": 6.676959960390364e-06,
"loss": 1.2385,
"step": 332500
},
{
"epoch": 0.87,
"learning_rate": 6.611812531759372e-06,
"loss": 1.2399,
"step": 333000
},
{
"epoch": 0.87,
"learning_rate": 6.54666510312838e-06,
"loss": 1.2405,
"step": 333500
},
{
"epoch": 0.87,
"learning_rate": 6.481517674497388e-06,
"loss": 1.2354,
"step": 334000
},
{
"epoch": 0.87,
"learning_rate": 6.416370245866396e-06,
"loss": 1.2357,
"step": 334500
},
{
"epoch": 0.87,
"learning_rate": 6.351222817235404e-06,
"loss": 1.2321,
"step": 335000
},
{
"epoch": 0.87,
"learning_rate": 6.286075388604412e-06,
"loss": 1.241,
"step": 335500
},
{
"epoch": 0.88,
"learning_rate": 6.22092795997342e-06,
"loss": 1.2333,
"step": 336000
},
{
"epoch": 0.88,
"learning_rate": 6.1557805313424285e-06,
"loss": 1.2356,
"step": 336500
},
{
"epoch": 0.88,
"learning_rate": 6.090633102711436e-06,
"loss": 1.2372,
"step": 337000
},
{
"epoch": 0.88,
"learning_rate": 6.025485674080444e-06,
"loss": 1.238,
"step": 337500
},
{
"epoch": 0.88,
"learning_rate": 5.960338245449453e-06,
"loss": 1.2322,
"step": 338000
},
{
"epoch": 0.88,
"learning_rate": 5.8951908168184605e-06,
"loss": 1.2388,
"step": 338500
},
{
"epoch": 0.88,
"learning_rate": 5.830043388187468e-06,
"loss": 1.2371,
"step": 339000
},
{
"epoch": 0.88,
"learning_rate": 5.764895959556477e-06,
"loss": 1.23,
"step": 339500
},
{
"epoch": 0.89,
"learning_rate": 5.699748530925485e-06,
"loss": 1.2387,
"step": 340000
},
{
"epoch": 0.89,
"learning_rate": 5.634601102294492e-06,
"loss": 1.2379,
"step": 340500
},
{
"epoch": 0.89,
"learning_rate": 5.569453673663501e-06,
"loss": 1.2343,
"step": 341000
},
{
"epoch": 0.89,
"learning_rate": 5.504306245032509e-06,
"loss": 1.2251,
"step": 341500
},
{
"epoch": 0.89,
"learning_rate": 5.439158816401517e-06,
"loss": 1.2316,
"step": 342000
},
{
"epoch": 0.89,
"learning_rate": 5.374011387770525e-06,
"loss": 1.2261,
"step": 342500
},
{
"epoch": 0.89,
"learning_rate": 5.308863959139533e-06,
"loss": 1.2313,
"step": 343000
},
{
"epoch": 0.9,
"learning_rate": 5.2437165305085415e-06,
"loss": 1.2262,
"step": 343500
},
{
"epoch": 0.9,
"learning_rate": 5.178569101877549e-06,
"loss": 1.2244,
"step": 344000
},
{
"epoch": 0.9,
"learning_rate": 5.113421673246557e-06,
"loss": 1.2219,
"step": 344500
},
{
"epoch": 0.9,
"learning_rate": 5.048274244615566e-06,
"loss": 1.2371,
"step": 345000
},
{
"epoch": 0.9,
"learning_rate": 4.9831268159845735e-06,
"loss": 1.2361,
"step": 345500
},
{
"epoch": 0.9,
"learning_rate": 4.917979387353581e-06,
"loss": 1.2376,
"step": 346000
},
{
"epoch": 0.9,
"learning_rate": 4.85283195872259e-06,
"loss": 1.2303,
"step": 346500
},
{
"epoch": 0.9,
"learning_rate": 4.787684530091598e-06,
"loss": 1.2284,
"step": 347000
},
{
"epoch": 0.91,
"learning_rate": 4.722537101460605e-06,
"loss": 1.2293,
"step": 347500
},
{
"epoch": 0.91,
"learning_rate": 4.657389672829614e-06,
"loss": 1.2322,
"step": 348000
},
{
"epoch": 0.91,
"learning_rate": 4.592242244198622e-06,
"loss": 1.2377,
"step": 348500
},
{
"epoch": 0.91,
"learning_rate": 4.5270948155676296e-06,
"loss": 1.2291,
"step": 349000
},
{
"epoch": 0.91,
"learning_rate": 4.461947386936638e-06,
"loss": 1.2351,
"step": 349500
},
{
"epoch": 0.91,
"learning_rate": 4.396799958305646e-06,
"loss": 1.2346,
"step": 350000
},
{
"epoch": 0.91,
"learning_rate": 4.331652529674654e-06,
"loss": 1.2344,
"step": 350500
},
{
"epoch": 0.91,
"learning_rate": 4.266505101043662e-06,
"loss": 1.2364,
"step": 351000
},
{
"epoch": 0.92,
"learning_rate": 4.20135767241267e-06,
"loss": 1.2236,
"step": 351500
},
{
"epoch": 0.92,
"learning_rate": 4.136210243781678e-06,
"loss": 1.2356,
"step": 352000
},
{
"epoch": 0.92,
"learning_rate": 4.0710628151506865e-06,
"loss": 1.2279,
"step": 352500
},
{
"epoch": 0.92,
"learning_rate": 4.005915386519694e-06,
"loss": 1.2347,
"step": 353000
},
{
"epoch": 0.92,
"learning_rate": 3.940767957888702e-06,
"loss": 1.2281,
"step": 353500
},
{
"epoch": 0.92,
"learning_rate": 3.875620529257711e-06,
"loss": 1.2307,
"step": 354000
},
{
"epoch": 0.92,
"learning_rate": 3.8104731006267184e-06,
"loss": 1.2296,
"step": 354500
},
{
"epoch": 0.93,
"learning_rate": 3.7453256719957266e-06,
"loss": 1.2299,
"step": 355000
},
{
"epoch": 0.93,
"learning_rate": 3.680178243364735e-06,
"loss": 1.2335,
"step": 355500
},
{
"epoch": 0.93,
"learning_rate": 3.6150308147337426e-06,
"loss": 1.2345,
"step": 356000
},
{
"epoch": 0.93,
"learning_rate": 3.5498833861027508e-06,
"loss": 1.2255,
"step": 356500
},
{
"epoch": 0.93,
"learning_rate": 3.484735957471759e-06,
"loss": 1.2279,
"step": 357000
},
{
"epoch": 0.93,
"learning_rate": 3.419588528840767e-06,
"loss": 1.2318,
"step": 357500
},
{
"epoch": 0.93,
"learning_rate": 3.354441100209775e-06,
"loss": 1.2328,
"step": 358000
},
{
"epoch": 0.93,
"learning_rate": 3.289293671578783e-06,
"loss": 1.2237,
"step": 358500
},
{
"epoch": 0.94,
"learning_rate": 3.2241462429477913e-06,
"loss": 1.2246,
"step": 359000
},
{
"epoch": 0.94,
"learning_rate": 3.158998814316799e-06,
"loss": 1.2295,
"step": 359500
},
{
"epoch": 0.94,
"learning_rate": 3.0938513856858073e-06,
"loss": 1.2272,
"step": 360000
},
{
"epoch": 0.94,
"learning_rate": 3.0287039570548155e-06,
"loss": 1.2275,
"step": 360500
},
{
"epoch": 0.94,
"learning_rate": 2.9635565284238233e-06,
"loss": 1.2233,
"step": 361000
},
{
"epoch": 0.94,
"learning_rate": 2.8984090997928315e-06,
"loss": 1.2321,
"step": 361500
},
{
"epoch": 0.94,
"learning_rate": 2.8332616711618396e-06,
"loss": 1.2314,
"step": 362000
},
{
"epoch": 0.94,
"learning_rate": 2.7681142425308474e-06,
"loss": 1.2295,
"step": 362500
},
{
"epoch": 0.95,
"learning_rate": 2.702966813899855e-06,
"loss": 1.2241,
"step": 363000
},
{
"epoch": 0.95,
"learning_rate": 2.6378193852688634e-06,
"loss": 1.2239,
"step": 363500
},
{
"epoch": 0.95,
"learning_rate": 2.5726719566378716e-06,
"loss": 1.224,
"step": 364000
},
{
"epoch": 0.95,
"learning_rate": 2.5075245280068793e-06,
"loss": 1.2289,
"step": 364500
},
{
"epoch": 0.95,
"learning_rate": 2.4423770993758875e-06,
"loss": 1.2216,
"step": 365000
},
{
"epoch": 0.95,
"learning_rate": 2.3772296707448957e-06,
"loss": 1.2295,
"step": 365500
},
{
"epoch": 0.95,
"learning_rate": 2.3120822421139035e-06,
"loss": 1.2328,
"step": 366000
},
{
"epoch": 0.96,
"learning_rate": 2.2469348134829117e-06,
"loss": 1.2245,
"step": 366500
},
{
"epoch": 0.96,
"learning_rate": 2.18178738485192e-06,
"loss": 1.2259,
"step": 367000
},
{
"epoch": 0.96,
"learning_rate": 2.116639956220928e-06,
"loss": 1.2159,
"step": 367500
},
{
"epoch": 0.96,
"learning_rate": 2.051492527589936e-06,
"loss": 1.2287,
"step": 368000
},
{
"epoch": 0.96,
"learning_rate": 1.986345098958944e-06,
"loss": 1.2279,
"step": 368500
},
{
"epoch": 0.96,
"learning_rate": 1.9211976703279522e-06,
"loss": 1.23,
"step": 369000
},
{
"epoch": 0.96,
"learning_rate": 1.8560502416969602e-06,
"loss": 1.2333,
"step": 369500
},
{
"epoch": 0.96,
"learning_rate": 1.7909028130659682e-06,
"loss": 1.2307,
"step": 370000
},
{
"epoch": 0.97,
"learning_rate": 1.7257553844349762e-06,
"loss": 1.2207,
"step": 370500
},
{
"epoch": 0.97,
"learning_rate": 1.6606079558039844e-06,
"loss": 1.2358,
"step": 371000
},
{
"epoch": 0.97,
"learning_rate": 1.5954605271729924e-06,
"loss": 1.2185,
"step": 371500
},
{
"epoch": 0.97,
"learning_rate": 1.5303130985420006e-06,
"loss": 1.2283,
"step": 372000
},
{
"epoch": 0.97,
"learning_rate": 1.4651656699110088e-06,
"loss": 1.2243,
"step": 372500
},
{
"epoch": 0.97,
"learning_rate": 1.4000182412800167e-06,
"loss": 1.2249,
"step": 373000
},
{
"epoch": 0.97,
"learning_rate": 1.334870812649025e-06,
"loss": 1.2199,
"step": 373500
},
{
"epoch": 0.97,
"learning_rate": 1.269723384018033e-06,
"loss": 1.2299,
"step": 374000
},
{
"epoch": 0.98,
"learning_rate": 1.204575955387041e-06,
"loss": 1.2126,
"step": 374500
},
{
"epoch": 0.98,
"learning_rate": 1.139428526756049e-06,
"loss": 1.2272,
"step": 375000
},
{
"epoch": 0.98,
"learning_rate": 1.074281098125057e-06,
"loss": 1.2325,
"step": 375500
},
{
"epoch": 0.98,
"learning_rate": 1.0091336694940653e-06,
"loss": 1.2247,
"step": 376000
},
{
"epoch": 0.98,
"learning_rate": 9.439862408630733e-07,
"loss": 1.2243,
"step": 376500
},
{
"epoch": 0.98,
"learning_rate": 8.788388122320813e-07,
"loss": 1.2236,
"step": 377000
},
{
"epoch": 0.98,
"learning_rate": 8.136913836010893e-07,
"loss": 1.2297,
"step": 377500
},
{
"epoch": 0.99,
"learning_rate": 7.485439549700973e-07,
"loss": 1.2215,
"step": 378000
},
{
"epoch": 0.99,
"learning_rate": 6.833965263391054e-07,
"loss": 1.2209,
"step": 378500
},
{
"epoch": 0.99,
"learning_rate": 6.182490977081135e-07,
"loss": 1.2283,
"step": 379000
},
{
"epoch": 0.99,
"learning_rate": 5.531016690771216e-07,
"loss": 1.2304,
"step": 379500
},
{
"epoch": 0.99,
"learning_rate": 4.879542404461296e-07,
"loss": 1.2144,
"step": 380000
},
{
"epoch": 0.99,
"learning_rate": 4.2280681181513764e-07,
"loss": 1.2267,
"step": 380500
},
{
"epoch": 0.99,
"learning_rate": 3.5765938318414573e-07,
"loss": 1.2266,
"step": 381000
},
{
"epoch": 0.99,
"learning_rate": 2.925119545531538e-07,
"loss": 1.2236,
"step": 381500
},
{
"epoch": 1.0,
"learning_rate": 2.2736452592216185e-07,
"loss": 1.2162,
"step": 382000
},
{
"epoch": 1.0,
"learning_rate": 1.6221709729116992e-07,
"loss": 1.2267,
"step": 382500
},
{
"epoch": 1.0,
"learning_rate": 9.706966866017799e-08,
"loss": 1.234,
"step": 383000
},
{
"epoch": 1.0,
"learning_rate": 3.192224002918605e-08,
"loss": 1.2369,
"step": 383500
}
],
"max_steps": 383745,
"num_train_epochs": 1,
"total_flos": 1.61642598748028e+18,
"trial_name": null,
"trial_params": null
}