PetBERT / trainer_state.json
seanfarrell's picture
Upload Model
4d6d0d0
raw
history blame
312 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 1260555,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.9992066986367118e-05,
"loss": 3.3636,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 1.9984133972734233e-05,
"loss": 2.8247,
"step": 1000
},
{
"epoch": 0.0,
"learning_rate": 1.997620095910135e-05,
"loss": 2.6434,
"step": 1500
},
{
"epoch": 0.0,
"learning_rate": 1.996826794546847e-05,
"loss": 2.5482,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 1.996033493183558e-05,
"loss": 2.4555,
"step": 2500
},
{
"epoch": 0.01,
"learning_rate": 1.9952401918202697e-05,
"loss": 2.4086,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 1.9944468904569813e-05,
"loss": 2.348,
"step": 3500
},
{
"epoch": 0.01,
"learning_rate": 1.993653589093693e-05,
"loss": 2.3079,
"step": 4000
},
{
"epoch": 0.01,
"learning_rate": 1.9928602877304048e-05,
"loss": 2.263,
"step": 4500
},
{
"epoch": 0.01,
"learning_rate": 1.9920669863671164e-05,
"loss": 2.2407,
"step": 5000
},
{
"epoch": 0.01,
"learning_rate": 1.991273685003828e-05,
"loss": 2.2031,
"step": 5500
},
{
"epoch": 0.01,
"learning_rate": 1.9904803836405395e-05,
"loss": 2.1797,
"step": 6000
},
{
"epoch": 0.02,
"learning_rate": 1.989687082277251e-05,
"loss": 2.1896,
"step": 6500
},
{
"epoch": 0.02,
"learning_rate": 1.9888937809139627e-05,
"loss": 2.14,
"step": 7000
},
{
"epoch": 0.02,
"learning_rate": 1.9881004795506743e-05,
"loss": 2.1279,
"step": 7500
},
{
"epoch": 0.02,
"learning_rate": 1.987307178187386e-05,
"loss": 2.0861,
"step": 8000
},
{
"epoch": 0.02,
"learning_rate": 1.9865138768240975e-05,
"loss": 2.0768,
"step": 8500
},
{
"epoch": 0.02,
"learning_rate": 1.985720575460809e-05,
"loss": 2.0827,
"step": 9000
},
{
"epoch": 0.02,
"learning_rate": 1.9849272740975206e-05,
"loss": 2.0539,
"step": 9500
},
{
"epoch": 0.02,
"learning_rate": 1.9841339727342322e-05,
"loss": 2.03,
"step": 10000
},
{
"epoch": 0.02,
"learning_rate": 1.983340671370944e-05,
"loss": 2.0105,
"step": 10500
},
{
"epoch": 0.03,
"learning_rate": 1.9825473700076557e-05,
"loss": 1.9989,
"step": 11000
},
{
"epoch": 0.03,
"learning_rate": 1.981754068644367e-05,
"loss": 1.9886,
"step": 11500
},
{
"epoch": 0.03,
"learning_rate": 1.9809607672810786e-05,
"loss": 1.9745,
"step": 12000
},
{
"epoch": 0.03,
"learning_rate": 1.98016746591779e-05,
"loss": 1.9839,
"step": 12500
},
{
"epoch": 0.03,
"learning_rate": 1.979374164554502e-05,
"loss": 1.9611,
"step": 13000
},
{
"epoch": 0.03,
"learning_rate": 1.9785808631912137e-05,
"loss": 1.9685,
"step": 13500
},
{
"epoch": 0.03,
"learning_rate": 1.9777875618279253e-05,
"loss": 1.9465,
"step": 14000
},
{
"epoch": 0.03,
"learning_rate": 1.976994260464637e-05,
"loss": 1.9532,
"step": 14500
},
{
"epoch": 0.04,
"learning_rate": 1.9762009591013484e-05,
"loss": 1.9243,
"step": 15000
},
{
"epoch": 0.04,
"learning_rate": 1.97540765773806e-05,
"loss": 1.9132,
"step": 15500
},
{
"epoch": 0.04,
"learning_rate": 1.9746143563747716e-05,
"loss": 1.907,
"step": 16000
},
{
"epoch": 0.04,
"learning_rate": 1.9738210550114832e-05,
"loss": 1.9201,
"step": 16500
},
{
"epoch": 0.04,
"learning_rate": 1.9730277536481948e-05,
"loss": 1.9033,
"step": 17000
},
{
"epoch": 0.04,
"learning_rate": 1.9722344522849064e-05,
"loss": 1.9114,
"step": 17500
},
{
"epoch": 0.04,
"learning_rate": 1.971441150921618e-05,
"loss": 1.8667,
"step": 18000
},
{
"epoch": 0.04,
"learning_rate": 1.9706478495583295e-05,
"loss": 1.8737,
"step": 18500
},
{
"epoch": 0.05,
"learning_rate": 1.969854548195041e-05,
"loss": 1.8696,
"step": 19000
},
{
"epoch": 0.05,
"learning_rate": 1.969061246831753e-05,
"loss": 1.871,
"step": 19500
},
{
"epoch": 0.05,
"learning_rate": 1.9682679454684646e-05,
"loss": 1.8694,
"step": 20000
},
{
"epoch": 0.05,
"learning_rate": 1.967474644105176e-05,
"loss": 1.8408,
"step": 20500
},
{
"epoch": 0.05,
"learning_rate": 1.9666813427418875e-05,
"loss": 1.8475,
"step": 21000
},
{
"epoch": 0.05,
"learning_rate": 1.9658880413785994e-05,
"loss": 1.8506,
"step": 21500
},
{
"epoch": 0.05,
"learning_rate": 1.965094740015311e-05,
"loss": 1.8373,
"step": 22000
},
{
"epoch": 0.05,
"learning_rate": 1.9643014386520226e-05,
"loss": 1.8109,
"step": 22500
},
{
"epoch": 0.05,
"learning_rate": 1.963508137288734e-05,
"loss": 1.819,
"step": 23000
},
{
"epoch": 0.06,
"learning_rate": 1.9627148359254457e-05,
"loss": 1.8175,
"step": 23500
},
{
"epoch": 0.06,
"learning_rate": 1.9619215345621573e-05,
"loss": 1.806,
"step": 24000
},
{
"epoch": 0.06,
"learning_rate": 1.961128233198869e-05,
"loss": 1.824,
"step": 24500
},
{
"epoch": 0.06,
"learning_rate": 1.9603349318355805e-05,
"loss": 1.8055,
"step": 25000
},
{
"epoch": 0.06,
"learning_rate": 1.959541630472292e-05,
"loss": 1.7914,
"step": 25500
},
{
"epoch": 0.06,
"learning_rate": 1.9587483291090037e-05,
"loss": 1.7943,
"step": 26000
},
{
"epoch": 0.06,
"learning_rate": 1.9579550277457153e-05,
"loss": 1.7821,
"step": 26500
},
{
"epoch": 0.06,
"learning_rate": 1.957161726382427e-05,
"loss": 1.7968,
"step": 27000
},
{
"epoch": 0.07,
"learning_rate": 1.9563684250191384e-05,
"loss": 1.7688,
"step": 27500
},
{
"epoch": 0.07,
"learning_rate": 1.9555751236558504e-05,
"loss": 1.7674,
"step": 28000
},
{
"epoch": 0.07,
"learning_rate": 1.954781822292562e-05,
"loss": 1.7623,
"step": 28500
},
{
"epoch": 0.07,
"learning_rate": 1.9539885209292732e-05,
"loss": 1.7549,
"step": 29000
},
{
"epoch": 0.07,
"learning_rate": 1.9531952195659848e-05,
"loss": 1.7584,
"step": 29500
},
{
"epoch": 0.07,
"learning_rate": 1.9524019182026964e-05,
"loss": 1.7497,
"step": 30000
},
{
"epoch": 0.07,
"learning_rate": 1.9516086168394083e-05,
"loss": 1.7464,
"step": 30500
},
{
"epoch": 0.07,
"learning_rate": 1.95081531547612e-05,
"loss": 1.7458,
"step": 31000
},
{
"epoch": 0.07,
"learning_rate": 1.9500220141128315e-05,
"loss": 1.7666,
"step": 31500
},
{
"epoch": 0.08,
"learning_rate": 1.949228712749543e-05,
"loss": 1.7518,
"step": 32000
},
{
"epoch": 0.08,
"learning_rate": 1.9484354113862546e-05,
"loss": 1.7312,
"step": 32500
},
{
"epoch": 0.08,
"learning_rate": 1.9476421100229662e-05,
"loss": 1.7569,
"step": 33000
},
{
"epoch": 0.08,
"learning_rate": 1.9468488086596778e-05,
"loss": 1.7352,
"step": 33500
},
{
"epoch": 0.08,
"learning_rate": 1.9460555072963894e-05,
"loss": 1.723,
"step": 34000
},
{
"epoch": 0.08,
"learning_rate": 1.945262205933101e-05,
"loss": 1.7439,
"step": 34500
},
{
"epoch": 0.08,
"learning_rate": 1.9444689045698126e-05,
"loss": 1.7154,
"step": 35000
},
{
"epoch": 0.08,
"learning_rate": 1.943675603206524e-05,
"loss": 1.7245,
"step": 35500
},
{
"epoch": 0.09,
"learning_rate": 1.9428823018432357e-05,
"loss": 1.7139,
"step": 36000
},
{
"epoch": 0.09,
"learning_rate": 1.9420890004799477e-05,
"loss": 1.7167,
"step": 36500
},
{
"epoch": 0.09,
"learning_rate": 1.9412956991166593e-05,
"loss": 1.7218,
"step": 37000
},
{
"epoch": 0.09,
"learning_rate": 1.940502397753371e-05,
"loss": 1.7136,
"step": 37500
},
{
"epoch": 0.09,
"learning_rate": 1.939709096390082e-05,
"loss": 1.6848,
"step": 38000
},
{
"epoch": 0.09,
"learning_rate": 1.9389157950267937e-05,
"loss": 1.7024,
"step": 38500
},
{
"epoch": 0.09,
"learning_rate": 1.9381224936635056e-05,
"loss": 1.7083,
"step": 39000
},
{
"epoch": 0.09,
"learning_rate": 1.9373291923002172e-05,
"loss": 1.6827,
"step": 39500
},
{
"epoch": 0.1,
"learning_rate": 1.9365358909369288e-05,
"loss": 1.6955,
"step": 40000
},
{
"epoch": 0.1,
"learning_rate": 1.9357425895736404e-05,
"loss": 1.6975,
"step": 40500
},
{
"epoch": 0.1,
"learning_rate": 1.934949288210352e-05,
"loss": 1.6946,
"step": 41000
},
{
"epoch": 0.1,
"learning_rate": 1.9341559868470635e-05,
"loss": 1.6997,
"step": 41500
},
{
"epoch": 0.1,
"learning_rate": 1.933362685483775e-05,
"loss": 1.6943,
"step": 42000
},
{
"epoch": 0.1,
"learning_rate": 1.9325693841204867e-05,
"loss": 1.6775,
"step": 42500
},
{
"epoch": 0.1,
"learning_rate": 1.9317760827571983e-05,
"loss": 1.6608,
"step": 43000
},
{
"epoch": 0.1,
"learning_rate": 1.93098278139391e-05,
"loss": 1.6745,
"step": 43500
},
{
"epoch": 0.1,
"learning_rate": 1.9301894800306215e-05,
"loss": 1.6841,
"step": 44000
},
{
"epoch": 0.11,
"learning_rate": 1.929396178667333e-05,
"loss": 1.6698,
"step": 44500
},
{
"epoch": 0.11,
"learning_rate": 1.928602877304045e-05,
"loss": 1.6706,
"step": 45000
},
{
"epoch": 0.11,
"learning_rate": 1.9278095759407566e-05,
"loss": 1.6591,
"step": 45500
},
{
"epoch": 0.11,
"learning_rate": 1.927016274577468e-05,
"loss": 1.6406,
"step": 46000
},
{
"epoch": 0.11,
"learning_rate": 1.9262229732141797e-05,
"loss": 1.6599,
"step": 46500
},
{
"epoch": 0.11,
"learning_rate": 1.925429671850891e-05,
"loss": 1.6872,
"step": 47000
},
{
"epoch": 0.11,
"learning_rate": 1.924636370487603e-05,
"loss": 1.6721,
"step": 47500
},
{
"epoch": 0.11,
"learning_rate": 1.9238430691243145e-05,
"loss": 1.6741,
"step": 48000
},
{
"epoch": 0.12,
"learning_rate": 1.923049767761026e-05,
"loss": 1.6763,
"step": 48500
},
{
"epoch": 0.12,
"learning_rate": 1.9222564663977377e-05,
"loss": 1.64,
"step": 49000
},
{
"epoch": 0.12,
"learning_rate": 1.9214631650344493e-05,
"loss": 1.6618,
"step": 49500
},
{
"epoch": 0.12,
"learning_rate": 1.920669863671161e-05,
"loss": 1.6495,
"step": 50000
},
{
"epoch": 0.12,
"learning_rate": 1.9198765623078724e-05,
"loss": 1.6656,
"step": 50500
},
{
"epoch": 0.12,
"learning_rate": 1.9190832609445844e-05,
"loss": 1.6539,
"step": 51000
},
{
"epoch": 0.12,
"learning_rate": 1.9182899595812956e-05,
"loss": 1.6522,
"step": 51500
},
{
"epoch": 0.12,
"learning_rate": 1.9174966582180072e-05,
"loss": 1.6519,
"step": 52000
},
{
"epoch": 0.12,
"learning_rate": 1.9167033568547188e-05,
"loss": 1.6273,
"step": 52500
},
{
"epoch": 0.13,
"learning_rate": 1.9159100554914304e-05,
"loss": 1.6323,
"step": 53000
},
{
"epoch": 0.13,
"learning_rate": 1.9151167541281423e-05,
"loss": 1.6291,
"step": 53500
},
{
"epoch": 0.13,
"learning_rate": 1.914323452764854e-05,
"loss": 1.6351,
"step": 54000
},
{
"epoch": 0.13,
"learning_rate": 1.9135301514015655e-05,
"loss": 1.6392,
"step": 54500
},
{
"epoch": 0.13,
"learning_rate": 1.912736850038277e-05,
"loss": 1.631,
"step": 55000
},
{
"epoch": 0.13,
"learning_rate": 1.9119435486749886e-05,
"loss": 1.6256,
"step": 55500
},
{
"epoch": 0.13,
"learning_rate": 1.9111502473117002e-05,
"loss": 1.6261,
"step": 56000
},
{
"epoch": 0.13,
"learning_rate": 1.9103569459484118e-05,
"loss": 1.64,
"step": 56500
},
{
"epoch": 0.14,
"learning_rate": 1.9095636445851234e-05,
"loss": 1.6334,
"step": 57000
},
{
"epoch": 0.14,
"learning_rate": 1.908770343221835e-05,
"loss": 1.6164,
"step": 57500
},
{
"epoch": 0.14,
"learning_rate": 1.9079770418585466e-05,
"loss": 1.6201,
"step": 58000
},
{
"epoch": 0.14,
"learning_rate": 1.907183740495258e-05,
"loss": 1.6119,
"step": 58500
},
{
"epoch": 0.14,
"learning_rate": 1.9063904391319697e-05,
"loss": 1.6112,
"step": 59000
},
{
"epoch": 0.14,
"learning_rate": 1.9055971377686813e-05,
"loss": 1.608,
"step": 59500
},
{
"epoch": 0.14,
"learning_rate": 1.9048038364053933e-05,
"loss": 1.61,
"step": 60000
},
{
"epoch": 0.14,
"learning_rate": 1.9040105350421045e-05,
"loss": 1.6156,
"step": 60500
},
{
"epoch": 0.15,
"learning_rate": 1.903217233678816e-05,
"loss": 1.5982,
"step": 61000
},
{
"epoch": 0.15,
"learning_rate": 1.9024239323155277e-05,
"loss": 1.6265,
"step": 61500
},
{
"epoch": 0.15,
"learning_rate": 1.9016306309522396e-05,
"loss": 1.5894,
"step": 62000
},
{
"epoch": 0.15,
"learning_rate": 1.9008373295889512e-05,
"loss": 1.619,
"step": 62500
},
{
"epoch": 0.15,
"learning_rate": 1.9000440282256628e-05,
"loss": 1.5871,
"step": 63000
},
{
"epoch": 0.15,
"learning_rate": 1.8992507268623744e-05,
"loss": 1.5939,
"step": 63500
},
{
"epoch": 0.15,
"learning_rate": 1.898457425499086e-05,
"loss": 1.5893,
"step": 64000
},
{
"epoch": 0.15,
"learning_rate": 1.8976641241357975e-05,
"loss": 1.5878,
"step": 64500
},
{
"epoch": 0.15,
"learning_rate": 1.896870822772509e-05,
"loss": 1.5954,
"step": 65000
},
{
"epoch": 0.16,
"learning_rate": 1.8960775214092207e-05,
"loss": 1.5962,
"step": 65500
},
{
"epoch": 0.16,
"learning_rate": 1.8952842200459323e-05,
"loss": 1.6119,
"step": 66000
},
{
"epoch": 0.16,
"learning_rate": 1.894490918682644e-05,
"loss": 1.5776,
"step": 66500
},
{
"epoch": 0.16,
"learning_rate": 1.8936976173193555e-05,
"loss": 1.5796,
"step": 67000
},
{
"epoch": 0.16,
"learning_rate": 1.892904315956067e-05,
"loss": 1.6101,
"step": 67500
},
{
"epoch": 0.16,
"learning_rate": 1.8921110145927786e-05,
"loss": 1.5797,
"step": 68000
},
{
"epoch": 0.16,
"learning_rate": 1.8913177132294906e-05,
"loss": 1.5989,
"step": 68500
},
{
"epoch": 0.16,
"learning_rate": 1.8905244118662018e-05,
"loss": 1.5874,
"step": 69000
},
{
"epoch": 0.17,
"learning_rate": 1.8897311105029134e-05,
"loss": 1.5918,
"step": 69500
},
{
"epoch": 0.17,
"learning_rate": 1.888937809139625e-05,
"loss": 1.5872,
"step": 70000
},
{
"epoch": 0.17,
"learning_rate": 1.8881445077763366e-05,
"loss": 1.6041,
"step": 70500
},
{
"epoch": 0.17,
"learning_rate": 1.8873512064130485e-05,
"loss": 1.5861,
"step": 71000
},
{
"epoch": 0.17,
"learning_rate": 1.88655790504976e-05,
"loss": 1.5676,
"step": 71500
},
{
"epoch": 0.17,
"learning_rate": 1.8857646036864717e-05,
"loss": 1.5859,
"step": 72000
},
{
"epoch": 0.17,
"learning_rate": 1.8849713023231833e-05,
"loss": 1.5694,
"step": 72500
},
{
"epoch": 0.17,
"learning_rate": 1.884178000959895e-05,
"loss": 1.5606,
"step": 73000
},
{
"epoch": 0.17,
"learning_rate": 1.8833846995966064e-05,
"loss": 1.5768,
"step": 73500
},
{
"epoch": 0.18,
"learning_rate": 1.882591398233318e-05,
"loss": 1.5834,
"step": 74000
},
{
"epoch": 0.18,
"learning_rate": 1.8817980968700296e-05,
"loss": 1.5815,
"step": 74500
},
{
"epoch": 0.18,
"learning_rate": 1.8810047955067412e-05,
"loss": 1.569,
"step": 75000
},
{
"epoch": 0.18,
"learning_rate": 1.8802114941434528e-05,
"loss": 1.5839,
"step": 75500
},
{
"epoch": 0.18,
"learning_rate": 1.8794181927801644e-05,
"loss": 1.577,
"step": 76000
},
{
"epoch": 0.18,
"learning_rate": 1.878624891416876e-05,
"loss": 1.5707,
"step": 76500
},
{
"epoch": 0.18,
"learning_rate": 1.877831590053588e-05,
"loss": 1.5722,
"step": 77000
},
{
"epoch": 0.18,
"learning_rate": 1.8770382886902995e-05,
"loss": 1.5634,
"step": 77500
},
{
"epoch": 0.19,
"learning_rate": 1.8762449873270107e-05,
"loss": 1.5612,
"step": 78000
},
{
"epoch": 0.19,
"learning_rate": 1.8754516859637223e-05,
"loss": 1.5566,
"step": 78500
},
{
"epoch": 0.19,
"learning_rate": 1.874658384600434e-05,
"loss": 1.5693,
"step": 79000
},
{
"epoch": 0.19,
"learning_rate": 1.8738650832371458e-05,
"loss": 1.5542,
"step": 79500
},
{
"epoch": 0.19,
"learning_rate": 1.8730717818738574e-05,
"loss": 1.5495,
"step": 80000
},
{
"epoch": 0.19,
"learning_rate": 1.872278480510569e-05,
"loss": 1.5419,
"step": 80500
},
{
"epoch": 0.19,
"learning_rate": 1.8714851791472806e-05,
"loss": 1.5529,
"step": 81000
},
{
"epoch": 0.19,
"learning_rate": 1.870691877783992e-05,
"loss": 1.5448,
"step": 81500
},
{
"epoch": 0.2,
"learning_rate": 1.8698985764207037e-05,
"loss": 1.5696,
"step": 82000
},
{
"epoch": 0.2,
"learning_rate": 1.8691052750574153e-05,
"loss": 1.5589,
"step": 82500
},
{
"epoch": 0.2,
"learning_rate": 1.868311973694127e-05,
"loss": 1.5483,
"step": 83000
},
{
"epoch": 0.2,
"learning_rate": 1.8675186723308385e-05,
"loss": 1.5546,
"step": 83500
},
{
"epoch": 0.2,
"learning_rate": 1.86672537096755e-05,
"loss": 1.5535,
"step": 84000
},
{
"epoch": 0.2,
"learning_rate": 1.8659320696042617e-05,
"loss": 1.5528,
"step": 84500
},
{
"epoch": 0.2,
"learning_rate": 1.8651387682409733e-05,
"loss": 1.5478,
"step": 85000
},
{
"epoch": 0.2,
"learning_rate": 1.8643454668776852e-05,
"loss": 1.5418,
"step": 85500
},
{
"epoch": 0.2,
"learning_rate": 1.8635521655143968e-05,
"loss": 1.5414,
"step": 86000
},
{
"epoch": 0.21,
"learning_rate": 1.8627588641511084e-05,
"loss": 1.542,
"step": 86500
},
{
"epoch": 0.21,
"learning_rate": 1.8619655627878196e-05,
"loss": 1.5685,
"step": 87000
},
{
"epoch": 0.21,
"learning_rate": 1.8611722614245312e-05,
"loss": 1.5616,
"step": 87500
},
{
"epoch": 0.21,
"learning_rate": 1.860378960061243e-05,
"loss": 1.5236,
"step": 88000
},
{
"epoch": 0.21,
"learning_rate": 1.8595856586979547e-05,
"loss": 1.5412,
"step": 88500
},
{
"epoch": 0.21,
"learning_rate": 1.8587923573346663e-05,
"loss": 1.5395,
"step": 89000
},
{
"epoch": 0.21,
"learning_rate": 1.857999055971378e-05,
"loss": 1.537,
"step": 89500
},
{
"epoch": 0.21,
"learning_rate": 1.8572057546080895e-05,
"loss": 1.5405,
"step": 90000
},
{
"epoch": 0.22,
"learning_rate": 1.856412453244801e-05,
"loss": 1.5417,
"step": 90500
},
{
"epoch": 0.22,
"learning_rate": 1.8556191518815126e-05,
"loss": 1.5448,
"step": 91000
},
{
"epoch": 0.22,
"learning_rate": 1.8548258505182242e-05,
"loss": 1.5352,
"step": 91500
},
{
"epoch": 0.22,
"learning_rate": 1.8540325491549358e-05,
"loss": 1.531,
"step": 92000
},
{
"epoch": 0.22,
"learning_rate": 1.8532392477916474e-05,
"loss": 1.528,
"step": 92500
},
{
"epoch": 0.22,
"learning_rate": 1.852445946428359e-05,
"loss": 1.5332,
"step": 93000
},
{
"epoch": 0.22,
"learning_rate": 1.8516526450650706e-05,
"loss": 1.523,
"step": 93500
},
{
"epoch": 0.22,
"learning_rate": 1.8508593437017825e-05,
"loss": 1.5142,
"step": 94000
},
{
"epoch": 0.22,
"learning_rate": 1.850066042338494e-05,
"loss": 1.5457,
"step": 94500
},
{
"epoch": 0.23,
"learning_rate": 1.8492727409752057e-05,
"loss": 1.5238,
"step": 95000
},
{
"epoch": 0.23,
"learning_rate": 1.8484794396119172e-05,
"loss": 1.5247,
"step": 95500
},
{
"epoch": 0.23,
"learning_rate": 1.8476861382486285e-05,
"loss": 1.5395,
"step": 96000
},
{
"epoch": 0.23,
"learning_rate": 1.8468928368853404e-05,
"loss": 1.5273,
"step": 96500
},
{
"epoch": 0.23,
"learning_rate": 1.846099535522052e-05,
"loss": 1.5271,
"step": 97000
},
{
"epoch": 0.23,
"learning_rate": 1.8453062341587636e-05,
"loss": 1.5232,
"step": 97500
},
{
"epoch": 0.23,
"learning_rate": 1.8445129327954752e-05,
"loss": 1.5027,
"step": 98000
},
{
"epoch": 0.23,
"learning_rate": 1.8437196314321868e-05,
"loss": 1.5073,
"step": 98500
},
{
"epoch": 0.24,
"learning_rate": 1.8429263300688984e-05,
"loss": 1.5218,
"step": 99000
},
{
"epoch": 0.24,
"learning_rate": 1.84213302870561e-05,
"loss": 1.5144,
"step": 99500
},
{
"epoch": 0.24,
"learning_rate": 1.8413397273423215e-05,
"loss": 1.5173,
"step": 100000
},
{
"epoch": 0.24,
"learning_rate": 1.840546425979033e-05,
"loss": 1.5381,
"step": 100500
},
{
"epoch": 0.24,
"learning_rate": 1.8397531246157447e-05,
"loss": 1.5068,
"step": 101000
},
{
"epoch": 0.24,
"learning_rate": 1.8389598232524563e-05,
"loss": 1.5072,
"step": 101500
},
{
"epoch": 0.24,
"learning_rate": 1.838166521889168e-05,
"loss": 1.5171,
"step": 102000
},
{
"epoch": 0.24,
"learning_rate": 1.8373732205258798e-05,
"loss": 1.5188,
"step": 102500
},
{
"epoch": 0.25,
"learning_rate": 1.8365799191625914e-05,
"loss": 1.5285,
"step": 103000
},
{
"epoch": 0.25,
"learning_rate": 1.835786617799303e-05,
"loss": 1.5022,
"step": 103500
},
{
"epoch": 0.25,
"learning_rate": 1.8349933164360146e-05,
"loss": 1.5106,
"step": 104000
},
{
"epoch": 0.25,
"learning_rate": 1.834200015072726e-05,
"loss": 1.5098,
"step": 104500
},
{
"epoch": 0.25,
"learning_rate": 1.8334067137094377e-05,
"loss": 1.4992,
"step": 105000
},
{
"epoch": 0.25,
"learning_rate": 1.8326134123461493e-05,
"loss": 1.4907,
"step": 105500
},
{
"epoch": 0.25,
"learning_rate": 1.831820110982861e-05,
"loss": 1.5323,
"step": 106000
},
{
"epoch": 0.25,
"learning_rate": 1.8310268096195725e-05,
"loss": 1.5156,
"step": 106500
},
{
"epoch": 0.25,
"learning_rate": 1.830233508256284e-05,
"loss": 1.5154,
"step": 107000
},
{
"epoch": 0.26,
"learning_rate": 1.8294402068929957e-05,
"loss": 1.5203,
"step": 107500
},
{
"epoch": 0.26,
"learning_rate": 1.8286469055297072e-05,
"loss": 1.4912,
"step": 108000
},
{
"epoch": 0.26,
"learning_rate": 1.827853604166419e-05,
"loss": 1.5087,
"step": 108500
},
{
"epoch": 0.26,
"learning_rate": 1.8270603028031308e-05,
"loss": 1.4957,
"step": 109000
},
{
"epoch": 0.26,
"learning_rate": 1.826267001439842e-05,
"loss": 1.5215,
"step": 109500
},
{
"epoch": 0.26,
"learning_rate": 1.8254737000765536e-05,
"loss": 1.5169,
"step": 110000
},
{
"epoch": 0.26,
"learning_rate": 1.8246803987132652e-05,
"loss": 1.495,
"step": 110500
},
{
"epoch": 0.26,
"learning_rate": 1.8238870973499768e-05,
"loss": 1.4967,
"step": 111000
},
{
"epoch": 0.27,
"learning_rate": 1.8230937959866887e-05,
"loss": 1.5037,
"step": 111500
},
{
"epoch": 0.27,
"learning_rate": 1.8223004946234003e-05,
"loss": 1.4955,
"step": 112000
},
{
"epoch": 0.27,
"learning_rate": 1.821507193260112e-05,
"loss": 1.5085,
"step": 112500
},
{
"epoch": 0.27,
"learning_rate": 1.8207138918968235e-05,
"loss": 1.503,
"step": 113000
},
{
"epoch": 0.27,
"learning_rate": 1.8199205905335347e-05,
"loss": 1.4851,
"step": 113500
},
{
"epoch": 0.27,
"learning_rate": 1.8191272891702466e-05,
"loss": 1.4809,
"step": 114000
},
{
"epoch": 0.27,
"learning_rate": 1.8183339878069582e-05,
"loss": 1.492,
"step": 114500
},
{
"epoch": 0.27,
"learning_rate": 1.8175406864436698e-05,
"loss": 1.4864,
"step": 115000
},
{
"epoch": 0.27,
"learning_rate": 1.8167473850803814e-05,
"loss": 1.51,
"step": 115500
},
{
"epoch": 0.28,
"learning_rate": 1.815954083717093e-05,
"loss": 1.4747,
"step": 116000
},
{
"epoch": 0.28,
"learning_rate": 1.8151607823538046e-05,
"loss": 1.4872,
"step": 116500
},
{
"epoch": 0.28,
"learning_rate": 1.814367480990516e-05,
"loss": 1.5075,
"step": 117000
},
{
"epoch": 0.28,
"learning_rate": 1.813574179627228e-05,
"loss": 1.4848,
"step": 117500
},
{
"epoch": 0.28,
"learning_rate": 1.8127808782639393e-05,
"loss": 1.5053,
"step": 118000
},
{
"epoch": 0.28,
"learning_rate": 1.811987576900651e-05,
"loss": 1.4734,
"step": 118500
},
{
"epoch": 0.28,
"learning_rate": 1.8111942755373625e-05,
"loss": 1.4905,
"step": 119000
},
{
"epoch": 0.28,
"learning_rate": 1.810400974174074e-05,
"loss": 1.5025,
"step": 119500
},
{
"epoch": 0.29,
"learning_rate": 1.809607672810786e-05,
"loss": 1.4873,
"step": 120000
},
{
"epoch": 0.29,
"learning_rate": 1.8088143714474976e-05,
"loss": 1.4957,
"step": 120500
},
{
"epoch": 0.29,
"learning_rate": 1.8080210700842092e-05,
"loss": 1.5084,
"step": 121000
},
{
"epoch": 0.29,
"learning_rate": 1.8072277687209208e-05,
"loss": 1.4878,
"step": 121500
},
{
"epoch": 0.29,
"learning_rate": 1.8064344673576323e-05,
"loss": 1.4849,
"step": 122000
},
{
"epoch": 0.29,
"learning_rate": 1.805641165994344e-05,
"loss": 1.4744,
"step": 122500
},
{
"epoch": 0.29,
"learning_rate": 1.8048478646310555e-05,
"loss": 1.4895,
"step": 123000
},
{
"epoch": 0.29,
"learning_rate": 1.804054563267767e-05,
"loss": 1.4884,
"step": 123500
},
{
"epoch": 0.3,
"learning_rate": 1.8032612619044787e-05,
"loss": 1.4721,
"step": 124000
},
{
"epoch": 0.3,
"learning_rate": 1.8024679605411903e-05,
"loss": 1.481,
"step": 124500
},
{
"epoch": 0.3,
"learning_rate": 1.801674659177902e-05,
"loss": 1.4799,
"step": 125000
},
{
"epoch": 0.3,
"learning_rate": 1.8008813578146135e-05,
"loss": 1.4843,
"step": 125500
},
{
"epoch": 0.3,
"learning_rate": 1.8000880564513254e-05,
"loss": 1.4828,
"step": 126000
},
{
"epoch": 0.3,
"learning_rate": 1.799294755088037e-05,
"loss": 1.4799,
"step": 126500
},
{
"epoch": 0.3,
"learning_rate": 1.7985014537247482e-05,
"loss": 1.4707,
"step": 127000
},
{
"epoch": 0.3,
"learning_rate": 1.7977081523614598e-05,
"loss": 1.4809,
"step": 127500
},
{
"epoch": 0.3,
"learning_rate": 1.7969148509981714e-05,
"loss": 1.4778,
"step": 128000
},
{
"epoch": 0.31,
"learning_rate": 1.7961215496348833e-05,
"loss": 1.4675,
"step": 128500
},
{
"epoch": 0.31,
"learning_rate": 1.795328248271595e-05,
"loss": 1.4854,
"step": 129000
},
{
"epoch": 0.31,
"learning_rate": 1.7945349469083065e-05,
"loss": 1.4501,
"step": 129500
},
{
"epoch": 0.31,
"learning_rate": 1.793741645545018e-05,
"loss": 1.4603,
"step": 130000
},
{
"epoch": 0.31,
"learning_rate": 1.7929483441817297e-05,
"loss": 1.4748,
"step": 130500
},
{
"epoch": 0.31,
"learning_rate": 1.7921550428184412e-05,
"loss": 1.4578,
"step": 131000
},
{
"epoch": 0.31,
"learning_rate": 1.791361741455153e-05,
"loss": 1.4679,
"step": 131500
},
{
"epoch": 0.31,
"learning_rate": 1.7905684400918644e-05,
"loss": 1.4745,
"step": 132000
},
{
"epoch": 0.32,
"learning_rate": 1.789775138728576e-05,
"loss": 1.4779,
"step": 132500
},
{
"epoch": 0.32,
"learning_rate": 1.7889818373652876e-05,
"loss": 1.4708,
"step": 133000
},
{
"epoch": 0.32,
"learning_rate": 1.7881885360019992e-05,
"loss": 1.4761,
"step": 133500
},
{
"epoch": 0.32,
"learning_rate": 1.7873952346387108e-05,
"loss": 1.4682,
"step": 134000
},
{
"epoch": 0.32,
"learning_rate": 1.7866019332754227e-05,
"loss": 1.47,
"step": 134500
},
{
"epoch": 0.32,
"learning_rate": 1.7858086319121343e-05,
"loss": 1.4355,
"step": 135000
},
{
"epoch": 0.32,
"learning_rate": 1.785015330548846e-05,
"loss": 1.448,
"step": 135500
},
{
"epoch": 0.32,
"learning_rate": 1.784222029185557e-05,
"loss": 1.4724,
"step": 136000
},
{
"epoch": 0.32,
"learning_rate": 1.7834287278222687e-05,
"loss": 1.4684,
"step": 136500
},
{
"epoch": 0.33,
"learning_rate": 1.7826354264589806e-05,
"loss": 1.4582,
"step": 137000
},
{
"epoch": 0.33,
"learning_rate": 1.7818421250956922e-05,
"loss": 1.4742,
"step": 137500
},
{
"epoch": 0.33,
"learning_rate": 1.7810488237324038e-05,
"loss": 1.4583,
"step": 138000
},
{
"epoch": 0.33,
"learning_rate": 1.7802555223691154e-05,
"loss": 1.465,
"step": 138500
},
{
"epoch": 0.33,
"learning_rate": 1.779462221005827e-05,
"loss": 1.4817,
"step": 139000
},
{
"epoch": 0.33,
"learning_rate": 1.7786689196425386e-05,
"loss": 1.4492,
"step": 139500
},
{
"epoch": 0.33,
"learning_rate": 1.77787561827925e-05,
"loss": 1.4506,
"step": 140000
},
{
"epoch": 0.33,
"learning_rate": 1.7770823169159617e-05,
"loss": 1.4632,
"step": 140500
},
{
"epoch": 0.34,
"learning_rate": 1.7762890155526733e-05,
"loss": 1.4393,
"step": 141000
},
{
"epoch": 0.34,
"learning_rate": 1.775495714189385e-05,
"loss": 1.4658,
"step": 141500
},
{
"epoch": 0.34,
"learning_rate": 1.7747024128260965e-05,
"loss": 1.4482,
"step": 142000
},
{
"epoch": 0.34,
"learning_rate": 1.773909111462808e-05,
"loss": 1.4754,
"step": 142500
},
{
"epoch": 0.34,
"learning_rate": 1.77311581009952e-05,
"loss": 1.464,
"step": 143000
},
{
"epoch": 0.34,
"learning_rate": 1.7723225087362316e-05,
"loss": 1.4376,
"step": 143500
},
{
"epoch": 0.34,
"learning_rate": 1.7715292073729432e-05,
"loss": 1.4426,
"step": 144000
},
{
"epoch": 0.34,
"learning_rate": 1.7707359060096548e-05,
"loss": 1.453,
"step": 144500
},
{
"epoch": 0.35,
"learning_rate": 1.769942604646366e-05,
"loss": 1.4593,
"step": 145000
},
{
"epoch": 0.35,
"learning_rate": 1.769149303283078e-05,
"loss": 1.4472,
"step": 145500
},
{
"epoch": 0.35,
"learning_rate": 1.7683560019197895e-05,
"loss": 1.4566,
"step": 146000
},
{
"epoch": 0.35,
"learning_rate": 1.767562700556501e-05,
"loss": 1.4435,
"step": 146500
},
{
"epoch": 0.35,
"learning_rate": 1.7667693991932127e-05,
"loss": 1.4489,
"step": 147000
},
{
"epoch": 0.35,
"learning_rate": 1.7659760978299243e-05,
"loss": 1.4362,
"step": 147500
},
{
"epoch": 0.35,
"learning_rate": 1.765182796466636e-05,
"loss": 1.4713,
"step": 148000
},
{
"epoch": 0.35,
"learning_rate": 1.7643894951033474e-05,
"loss": 1.4414,
"step": 148500
},
{
"epoch": 0.35,
"learning_rate": 1.763596193740059e-05,
"loss": 1.4431,
"step": 149000
},
{
"epoch": 0.36,
"learning_rate": 1.7628028923767706e-05,
"loss": 1.4597,
"step": 149500
},
{
"epoch": 0.36,
"learning_rate": 1.7620095910134822e-05,
"loss": 1.454,
"step": 150000
},
{
"epoch": 0.36,
"learning_rate": 1.7612162896501938e-05,
"loss": 1.4594,
"step": 150500
},
{
"epoch": 0.36,
"learning_rate": 1.7604229882869054e-05,
"loss": 1.4496,
"step": 151000
},
{
"epoch": 0.36,
"learning_rate": 1.759629686923617e-05,
"loss": 1.4593,
"step": 151500
},
{
"epoch": 0.36,
"learning_rate": 1.758836385560329e-05,
"loss": 1.462,
"step": 152000
},
{
"epoch": 0.36,
"learning_rate": 1.7580430841970405e-05,
"loss": 1.4471,
"step": 152500
},
{
"epoch": 0.36,
"learning_rate": 1.757249782833752e-05,
"loss": 1.4455,
"step": 153000
},
{
"epoch": 0.37,
"learning_rate": 1.7564564814704637e-05,
"loss": 1.4595,
"step": 153500
},
{
"epoch": 0.37,
"learning_rate": 1.755663180107175e-05,
"loss": 1.4522,
"step": 154000
},
{
"epoch": 0.37,
"learning_rate": 1.7548698787438868e-05,
"loss": 1.4458,
"step": 154500
},
{
"epoch": 0.37,
"learning_rate": 1.7540765773805984e-05,
"loss": 1.443,
"step": 155000
},
{
"epoch": 0.37,
"learning_rate": 1.75328327601731e-05,
"loss": 1.4546,
"step": 155500
},
{
"epoch": 0.37,
"learning_rate": 1.7524899746540216e-05,
"loss": 1.466,
"step": 156000
},
{
"epoch": 0.37,
"learning_rate": 1.7516966732907332e-05,
"loss": 1.4545,
"step": 156500
},
{
"epoch": 0.37,
"learning_rate": 1.7509033719274448e-05,
"loss": 1.4423,
"step": 157000
},
{
"epoch": 0.37,
"learning_rate": 1.7501100705641563e-05,
"loss": 1.4351,
"step": 157500
},
{
"epoch": 0.38,
"learning_rate": 1.7493167692008683e-05,
"loss": 1.4404,
"step": 158000
},
{
"epoch": 0.38,
"learning_rate": 1.7485234678375795e-05,
"loss": 1.4345,
"step": 158500
},
{
"epoch": 0.38,
"learning_rate": 1.747730166474291e-05,
"loss": 1.4435,
"step": 159000
},
{
"epoch": 0.38,
"learning_rate": 1.7469368651110027e-05,
"loss": 1.4248,
"step": 159500
},
{
"epoch": 0.38,
"learning_rate": 1.7461435637477143e-05,
"loss": 1.4276,
"step": 160000
},
{
"epoch": 0.38,
"learning_rate": 1.7453502623844262e-05,
"loss": 1.4206,
"step": 160500
},
{
"epoch": 0.38,
"learning_rate": 1.7445569610211378e-05,
"loss": 1.4408,
"step": 161000
},
{
"epoch": 0.38,
"learning_rate": 1.7437636596578494e-05,
"loss": 1.4219,
"step": 161500
},
{
"epoch": 0.39,
"learning_rate": 1.742970358294561e-05,
"loss": 1.4568,
"step": 162000
},
{
"epoch": 0.39,
"learning_rate": 1.7421770569312722e-05,
"loss": 1.4342,
"step": 162500
},
{
"epoch": 0.39,
"learning_rate": 1.741383755567984e-05,
"loss": 1.443,
"step": 163000
},
{
"epoch": 0.39,
"learning_rate": 1.7405904542046957e-05,
"loss": 1.4288,
"step": 163500
},
{
"epoch": 0.39,
"learning_rate": 1.7397971528414073e-05,
"loss": 1.4399,
"step": 164000
},
{
"epoch": 0.39,
"learning_rate": 1.739003851478119e-05,
"loss": 1.4421,
"step": 164500
},
{
"epoch": 0.39,
"learning_rate": 1.7382105501148305e-05,
"loss": 1.4323,
"step": 165000
},
{
"epoch": 0.39,
"learning_rate": 1.737417248751542e-05,
"loss": 1.434,
"step": 165500
},
{
"epoch": 0.4,
"learning_rate": 1.7366239473882537e-05,
"loss": 1.4261,
"step": 166000
},
{
"epoch": 0.4,
"learning_rate": 1.7358306460249656e-05,
"loss": 1.4221,
"step": 166500
},
{
"epoch": 0.4,
"learning_rate": 1.7350373446616768e-05,
"loss": 1.4418,
"step": 167000
},
{
"epoch": 0.4,
"learning_rate": 1.7342440432983884e-05,
"loss": 1.4297,
"step": 167500
},
{
"epoch": 0.4,
"learning_rate": 1.7334507419351e-05,
"loss": 1.419,
"step": 168000
},
{
"epoch": 0.4,
"learning_rate": 1.7326574405718116e-05,
"loss": 1.4328,
"step": 168500
},
{
"epoch": 0.4,
"learning_rate": 1.7318641392085235e-05,
"loss": 1.4227,
"step": 169000
},
{
"epoch": 0.4,
"learning_rate": 1.731070837845235e-05,
"loss": 1.432,
"step": 169500
},
{
"epoch": 0.4,
"learning_rate": 1.7302775364819467e-05,
"loss": 1.4383,
"step": 170000
},
{
"epoch": 0.41,
"learning_rate": 1.7294842351186583e-05,
"loss": 1.4363,
"step": 170500
},
{
"epoch": 0.41,
"learning_rate": 1.72869093375537e-05,
"loss": 1.438,
"step": 171000
},
{
"epoch": 0.41,
"learning_rate": 1.7278976323920814e-05,
"loss": 1.4065,
"step": 171500
},
{
"epoch": 0.41,
"learning_rate": 1.727104331028793e-05,
"loss": 1.4143,
"step": 172000
},
{
"epoch": 0.41,
"learning_rate": 1.7263110296655046e-05,
"loss": 1.4316,
"step": 172500
},
{
"epoch": 0.41,
"learning_rate": 1.7255177283022162e-05,
"loss": 1.4189,
"step": 173000
},
{
"epoch": 0.41,
"learning_rate": 1.7247244269389278e-05,
"loss": 1.4218,
"step": 173500
},
{
"epoch": 0.41,
"learning_rate": 1.7239311255756394e-05,
"loss": 1.4213,
"step": 174000
},
{
"epoch": 0.42,
"learning_rate": 1.723137824212351e-05,
"loss": 1.4286,
"step": 174500
},
{
"epoch": 0.42,
"learning_rate": 1.722344522849063e-05,
"loss": 1.4231,
"step": 175000
},
{
"epoch": 0.42,
"learning_rate": 1.7215512214857745e-05,
"loss": 1.4295,
"step": 175500
},
{
"epoch": 0.42,
"learning_rate": 1.7207579201224857e-05,
"loss": 1.4329,
"step": 176000
},
{
"epoch": 0.42,
"learning_rate": 1.7199646187591973e-05,
"loss": 1.413,
"step": 176500
},
{
"epoch": 0.42,
"learning_rate": 1.719171317395909e-05,
"loss": 1.4222,
"step": 177000
},
{
"epoch": 0.42,
"learning_rate": 1.7183780160326208e-05,
"loss": 1.4188,
"step": 177500
},
{
"epoch": 0.42,
"learning_rate": 1.7175847146693324e-05,
"loss": 1.4304,
"step": 178000
},
{
"epoch": 0.42,
"learning_rate": 1.716791413306044e-05,
"loss": 1.4011,
"step": 178500
},
{
"epoch": 0.43,
"learning_rate": 1.7159981119427556e-05,
"loss": 1.4098,
"step": 179000
},
{
"epoch": 0.43,
"learning_rate": 1.715204810579467e-05,
"loss": 1.4229,
"step": 179500
},
{
"epoch": 0.43,
"learning_rate": 1.7144115092161788e-05,
"loss": 1.4314,
"step": 180000
},
{
"epoch": 0.43,
"learning_rate": 1.7136182078528903e-05,
"loss": 1.437,
"step": 180500
},
{
"epoch": 0.43,
"learning_rate": 1.712824906489602e-05,
"loss": 1.4239,
"step": 181000
},
{
"epoch": 0.43,
"learning_rate": 1.7120316051263135e-05,
"loss": 1.4092,
"step": 181500
},
{
"epoch": 0.43,
"learning_rate": 1.711238303763025e-05,
"loss": 1.4114,
"step": 182000
},
{
"epoch": 0.43,
"learning_rate": 1.7104450023997367e-05,
"loss": 1.4131,
"step": 182500
},
{
"epoch": 0.44,
"learning_rate": 1.7096517010364483e-05,
"loss": 1.4138,
"step": 183000
},
{
"epoch": 0.44,
"learning_rate": 1.7088583996731602e-05,
"loss": 1.4207,
"step": 183500
},
{
"epoch": 0.44,
"learning_rate": 1.7080650983098718e-05,
"loss": 1.4036,
"step": 184000
},
{
"epoch": 0.44,
"learning_rate": 1.7072717969465834e-05,
"loss": 1.4177,
"step": 184500
},
{
"epoch": 0.44,
"learning_rate": 1.7064784955832946e-05,
"loss": 1.4006,
"step": 185000
},
{
"epoch": 0.44,
"learning_rate": 1.7056851942200062e-05,
"loss": 1.4258,
"step": 185500
},
{
"epoch": 0.44,
"learning_rate": 1.704891892856718e-05,
"loss": 1.4067,
"step": 186000
},
{
"epoch": 0.44,
"learning_rate": 1.7040985914934297e-05,
"loss": 1.4066,
"step": 186500
},
{
"epoch": 0.45,
"learning_rate": 1.7033052901301413e-05,
"loss": 1.4097,
"step": 187000
},
{
"epoch": 0.45,
"learning_rate": 1.702511988766853e-05,
"loss": 1.3993,
"step": 187500
},
{
"epoch": 0.45,
"learning_rate": 1.7017186874035645e-05,
"loss": 1.3967,
"step": 188000
},
{
"epoch": 0.45,
"learning_rate": 1.700925386040276e-05,
"loss": 1.3943,
"step": 188500
},
{
"epoch": 0.45,
"learning_rate": 1.7001320846769876e-05,
"loss": 1.4084,
"step": 189000
},
{
"epoch": 0.45,
"learning_rate": 1.6993387833136992e-05,
"loss": 1.4102,
"step": 189500
},
{
"epoch": 0.45,
"learning_rate": 1.6985454819504108e-05,
"loss": 1.3981,
"step": 190000
},
{
"epoch": 0.45,
"learning_rate": 1.6977521805871224e-05,
"loss": 1.4251,
"step": 190500
},
{
"epoch": 0.45,
"learning_rate": 1.696958879223834e-05,
"loss": 1.4113,
"step": 191000
},
{
"epoch": 0.46,
"learning_rate": 1.6961655778605456e-05,
"loss": 1.3979,
"step": 191500
},
{
"epoch": 0.46,
"learning_rate": 1.695372276497257e-05,
"loss": 1.4171,
"step": 192000
},
{
"epoch": 0.46,
"learning_rate": 1.694578975133969e-05,
"loss": 1.4015,
"step": 192500
},
{
"epoch": 0.46,
"learning_rate": 1.6937856737706807e-05,
"loss": 1.4109,
"step": 193000
},
{
"epoch": 0.46,
"learning_rate": 1.6929923724073923e-05,
"loss": 1.4151,
"step": 193500
},
{
"epoch": 0.46,
"learning_rate": 1.6921990710441035e-05,
"loss": 1.4171,
"step": 194000
},
{
"epoch": 0.46,
"learning_rate": 1.691405769680815e-05,
"loss": 1.4033,
"step": 194500
},
{
"epoch": 0.46,
"learning_rate": 1.690612468317527e-05,
"loss": 1.4259,
"step": 195000
},
{
"epoch": 0.47,
"learning_rate": 1.6898191669542386e-05,
"loss": 1.3964,
"step": 195500
},
{
"epoch": 0.47,
"learning_rate": 1.6890258655909502e-05,
"loss": 1.4096,
"step": 196000
},
{
"epoch": 0.47,
"learning_rate": 1.6882325642276618e-05,
"loss": 1.3983,
"step": 196500
},
{
"epoch": 0.47,
"learning_rate": 1.6874392628643734e-05,
"loss": 1.396,
"step": 197000
},
{
"epoch": 0.47,
"learning_rate": 1.686645961501085e-05,
"loss": 1.3992,
"step": 197500
},
{
"epoch": 0.47,
"learning_rate": 1.6858526601377965e-05,
"loss": 1.3996,
"step": 198000
},
{
"epoch": 0.47,
"learning_rate": 1.685059358774508e-05,
"loss": 1.3947,
"step": 198500
},
{
"epoch": 0.47,
"learning_rate": 1.6842660574112197e-05,
"loss": 1.4166,
"step": 199000
},
{
"epoch": 0.47,
"learning_rate": 1.6834727560479313e-05,
"loss": 1.403,
"step": 199500
},
{
"epoch": 0.48,
"learning_rate": 1.682679454684643e-05,
"loss": 1.4067,
"step": 200000
},
{
"epoch": 0.48,
"learning_rate": 1.6818861533213545e-05,
"loss": 1.3818,
"step": 200500
},
{
"epoch": 0.48,
"learning_rate": 1.6810928519580664e-05,
"loss": 1.425,
"step": 201000
},
{
"epoch": 0.48,
"learning_rate": 1.680299550594778e-05,
"loss": 1.4004,
"step": 201500
},
{
"epoch": 0.48,
"learning_rate": 1.6795062492314896e-05,
"loss": 1.3936,
"step": 202000
},
{
"epoch": 0.48,
"learning_rate": 1.678712947868201e-05,
"loss": 1.3815,
"step": 202500
},
{
"epoch": 0.48,
"learning_rate": 1.6779196465049124e-05,
"loss": 1.3987,
"step": 203000
},
{
"epoch": 0.48,
"learning_rate": 1.6771263451416243e-05,
"loss": 1.4018,
"step": 203500
},
{
"epoch": 0.49,
"learning_rate": 1.676333043778336e-05,
"loss": 1.3852,
"step": 204000
},
{
"epoch": 0.49,
"learning_rate": 1.6755397424150475e-05,
"loss": 1.4024,
"step": 204500
},
{
"epoch": 0.49,
"learning_rate": 1.674746441051759e-05,
"loss": 1.3827,
"step": 205000
},
{
"epoch": 0.49,
"learning_rate": 1.6739531396884707e-05,
"loss": 1.4046,
"step": 205500
},
{
"epoch": 0.49,
"learning_rate": 1.6731598383251823e-05,
"loss": 1.3966,
"step": 206000
},
{
"epoch": 0.49,
"learning_rate": 1.672366536961894e-05,
"loss": 1.3957,
"step": 206500
},
{
"epoch": 0.49,
"learning_rate": 1.6715732355986054e-05,
"loss": 1.4029,
"step": 207000
},
{
"epoch": 0.49,
"learning_rate": 1.670779934235317e-05,
"loss": 1.4036,
"step": 207500
},
{
"epoch": 0.5,
"learning_rate": 1.6699866328720286e-05,
"loss": 1.3773,
"step": 208000
},
{
"epoch": 0.5,
"learning_rate": 1.6691933315087402e-05,
"loss": 1.4042,
"step": 208500
},
{
"epoch": 0.5,
"learning_rate": 1.6684000301454518e-05,
"loss": 1.395,
"step": 209000
},
{
"epoch": 0.5,
"learning_rate": 1.6676067287821637e-05,
"loss": 1.3992,
"step": 209500
},
{
"epoch": 0.5,
"learning_rate": 1.6668134274188753e-05,
"loss": 1.404,
"step": 210000
},
{
"epoch": 0.5,
"learning_rate": 1.666020126055587e-05,
"loss": 1.4033,
"step": 210500
},
{
"epoch": 0.5,
"learning_rate": 1.6652268246922985e-05,
"loss": 1.3797,
"step": 211000
},
{
"epoch": 0.5,
"learning_rate": 1.6644335233290097e-05,
"loss": 1.3864,
"step": 211500
},
{
"epoch": 0.5,
"learning_rate": 1.6636402219657216e-05,
"loss": 1.4064,
"step": 212000
},
{
"epoch": 0.51,
"learning_rate": 1.6628469206024332e-05,
"loss": 1.3973,
"step": 212500
},
{
"epoch": 0.51,
"learning_rate": 1.6620536192391448e-05,
"loss": 1.3933,
"step": 213000
},
{
"epoch": 0.51,
"learning_rate": 1.6612603178758564e-05,
"loss": 1.3928,
"step": 213500
},
{
"epoch": 0.51,
"learning_rate": 1.660467016512568e-05,
"loss": 1.4047,
"step": 214000
},
{
"epoch": 0.51,
"learning_rate": 1.6596737151492796e-05,
"loss": 1.4135,
"step": 214500
},
{
"epoch": 0.51,
"learning_rate": 1.658880413785991e-05,
"loss": 1.3821,
"step": 215000
},
{
"epoch": 0.51,
"learning_rate": 1.658087112422703e-05,
"loss": 1.3826,
"step": 215500
},
{
"epoch": 0.51,
"learning_rate": 1.6572938110594143e-05,
"loss": 1.3868,
"step": 216000
},
{
"epoch": 0.52,
"learning_rate": 1.656500509696126e-05,
"loss": 1.407,
"step": 216500
},
{
"epoch": 0.52,
"learning_rate": 1.6557072083328375e-05,
"loss": 1.3846,
"step": 217000
},
{
"epoch": 0.52,
"learning_rate": 1.654913906969549e-05,
"loss": 1.3727,
"step": 217500
},
{
"epoch": 0.52,
"learning_rate": 1.654120605606261e-05,
"loss": 1.3815,
"step": 218000
},
{
"epoch": 0.52,
"learning_rate": 1.6533273042429726e-05,
"loss": 1.3885,
"step": 218500
},
{
"epoch": 0.52,
"learning_rate": 1.6525340028796842e-05,
"loss": 1.3858,
"step": 219000
},
{
"epoch": 0.52,
"learning_rate": 1.6517407015163958e-05,
"loss": 1.3936,
"step": 219500
},
{
"epoch": 0.52,
"learning_rate": 1.6509474001531074e-05,
"loss": 1.3923,
"step": 220000
},
{
"epoch": 0.52,
"learning_rate": 1.650154098789819e-05,
"loss": 1.3787,
"step": 220500
},
{
"epoch": 0.53,
"learning_rate": 1.6493607974265305e-05,
"loss": 1.3872,
"step": 221000
},
{
"epoch": 0.53,
"learning_rate": 1.648567496063242e-05,
"loss": 1.3694,
"step": 221500
},
{
"epoch": 0.53,
"learning_rate": 1.6477741946999537e-05,
"loss": 1.389,
"step": 222000
},
{
"epoch": 0.53,
"learning_rate": 1.6469808933366653e-05,
"loss": 1.3849,
"step": 222500
},
{
"epoch": 0.53,
"learning_rate": 1.646187591973377e-05,
"loss": 1.385,
"step": 223000
},
{
"epoch": 0.53,
"learning_rate": 1.6453942906100885e-05,
"loss": 1.3686,
"step": 223500
},
{
"epoch": 0.53,
"learning_rate": 1.6446009892468004e-05,
"loss": 1.3916,
"step": 224000
},
{
"epoch": 0.53,
"learning_rate": 1.643807687883512e-05,
"loss": 1.375,
"step": 224500
},
{
"epoch": 0.54,
"learning_rate": 1.6430143865202232e-05,
"loss": 1.3811,
"step": 225000
},
{
"epoch": 0.54,
"learning_rate": 1.6422210851569348e-05,
"loss": 1.3755,
"step": 225500
},
{
"epoch": 0.54,
"learning_rate": 1.6414277837936464e-05,
"loss": 1.381,
"step": 226000
},
{
"epoch": 0.54,
"learning_rate": 1.6406344824303583e-05,
"loss": 1.3781,
"step": 226500
},
{
"epoch": 0.54,
"learning_rate": 1.63984118106707e-05,
"loss": 1.3714,
"step": 227000
},
{
"epoch": 0.54,
"learning_rate": 1.6390478797037815e-05,
"loss": 1.3936,
"step": 227500
},
{
"epoch": 0.54,
"learning_rate": 1.638254578340493e-05,
"loss": 1.3711,
"step": 228000
},
{
"epoch": 0.54,
"learning_rate": 1.6374612769772047e-05,
"loss": 1.3872,
"step": 228500
},
{
"epoch": 0.54,
"learning_rate": 1.6366679756139163e-05,
"loss": 1.3851,
"step": 229000
},
{
"epoch": 0.55,
"learning_rate": 1.635874674250628e-05,
"loss": 1.3924,
"step": 229500
},
{
"epoch": 0.55,
"learning_rate": 1.6350813728873394e-05,
"loss": 1.381,
"step": 230000
},
{
"epoch": 0.55,
"learning_rate": 1.634288071524051e-05,
"loss": 1.3746,
"step": 230500
},
{
"epoch": 0.55,
"learning_rate": 1.6334947701607626e-05,
"loss": 1.3714,
"step": 231000
},
{
"epoch": 0.55,
"learning_rate": 1.6327014687974742e-05,
"loss": 1.386,
"step": 231500
},
{
"epoch": 0.55,
"learning_rate": 1.6319081674341858e-05,
"loss": 1.3823,
"step": 232000
},
{
"epoch": 0.55,
"learning_rate": 1.6311148660708974e-05,
"loss": 1.3791,
"step": 232500
},
{
"epoch": 0.55,
"learning_rate": 1.6303215647076093e-05,
"loss": 1.3683,
"step": 233000
},
{
"epoch": 0.56,
"learning_rate": 1.629528263344321e-05,
"loss": 1.3786,
"step": 233500
},
{
"epoch": 0.56,
"learning_rate": 1.628734961981032e-05,
"loss": 1.3739,
"step": 234000
},
{
"epoch": 0.56,
"learning_rate": 1.6279416606177437e-05,
"loss": 1.3765,
"step": 234500
},
{
"epoch": 0.56,
"learning_rate": 1.6271483592544553e-05,
"loss": 1.3808,
"step": 235000
},
{
"epoch": 0.56,
"learning_rate": 1.6263550578911672e-05,
"loss": 1.3843,
"step": 235500
},
{
"epoch": 0.56,
"learning_rate": 1.6255617565278788e-05,
"loss": 1.3607,
"step": 236000
},
{
"epoch": 0.56,
"learning_rate": 1.6247684551645904e-05,
"loss": 1.3586,
"step": 236500
},
{
"epoch": 0.56,
"learning_rate": 1.623975153801302e-05,
"loss": 1.4029,
"step": 237000
},
{
"epoch": 0.57,
"learning_rate": 1.6231818524380136e-05,
"loss": 1.3793,
"step": 237500
},
{
"epoch": 0.57,
"learning_rate": 1.622388551074725e-05,
"loss": 1.3644,
"step": 238000
},
{
"epoch": 0.57,
"learning_rate": 1.6215952497114367e-05,
"loss": 1.3749,
"step": 238500
},
{
"epoch": 0.57,
"learning_rate": 1.6208019483481483e-05,
"loss": 1.3771,
"step": 239000
},
{
"epoch": 0.57,
"learning_rate": 1.62000864698486e-05,
"loss": 1.3759,
"step": 239500
},
{
"epoch": 0.57,
"learning_rate": 1.6192153456215715e-05,
"loss": 1.3703,
"step": 240000
},
{
"epoch": 0.57,
"learning_rate": 1.618422044258283e-05,
"loss": 1.3821,
"step": 240500
},
{
"epoch": 0.57,
"learning_rate": 1.6176287428949947e-05,
"loss": 1.363,
"step": 241000
},
{
"epoch": 0.57,
"learning_rate": 1.6168354415317066e-05,
"loss": 1.3758,
"step": 241500
},
{
"epoch": 0.58,
"learning_rate": 1.6160421401684182e-05,
"loss": 1.3875,
"step": 242000
},
{
"epoch": 0.58,
"learning_rate": 1.6152488388051298e-05,
"loss": 1.3751,
"step": 242500
},
{
"epoch": 0.58,
"learning_rate": 1.614455537441841e-05,
"loss": 1.3703,
"step": 243000
},
{
"epoch": 0.58,
"learning_rate": 1.6136622360785526e-05,
"loss": 1.3672,
"step": 243500
},
{
"epoch": 0.58,
"learning_rate": 1.6128689347152645e-05,
"loss": 1.3743,
"step": 244000
},
{
"epoch": 0.58,
"learning_rate": 1.612075633351976e-05,
"loss": 1.3427,
"step": 244500
},
{
"epoch": 0.58,
"learning_rate": 1.6112823319886877e-05,
"loss": 1.3816,
"step": 245000
},
{
"epoch": 0.58,
"learning_rate": 1.6104890306253993e-05,
"loss": 1.381,
"step": 245500
},
{
"epoch": 0.59,
"learning_rate": 1.609695729262111e-05,
"loss": 1.3709,
"step": 246000
},
{
"epoch": 0.59,
"learning_rate": 1.6089024278988225e-05,
"loss": 1.3632,
"step": 246500
},
{
"epoch": 0.59,
"learning_rate": 1.608109126535534e-05,
"loss": 1.3776,
"step": 247000
},
{
"epoch": 0.59,
"learning_rate": 1.6073158251722456e-05,
"loss": 1.3771,
"step": 247500
},
{
"epoch": 0.59,
"learning_rate": 1.6065225238089572e-05,
"loss": 1.3684,
"step": 248000
},
{
"epoch": 0.59,
"learning_rate": 1.6057292224456688e-05,
"loss": 1.3799,
"step": 248500
},
{
"epoch": 0.59,
"learning_rate": 1.6049359210823804e-05,
"loss": 1.3803,
"step": 249000
},
{
"epoch": 0.59,
"learning_rate": 1.604142619719092e-05,
"loss": 1.3725,
"step": 249500
},
{
"epoch": 0.59,
"learning_rate": 1.603349318355804e-05,
"loss": 1.3825,
"step": 250000
},
{
"epoch": 0.6,
"learning_rate": 1.6025560169925155e-05,
"loss": 1.3642,
"step": 250500
},
{
"epoch": 0.6,
"learning_rate": 1.601762715629227e-05,
"loss": 1.3714,
"step": 251000
},
{
"epoch": 0.6,
"learning_rate": 1.6009694142659383e-05,
"loss": 1.3651,
"step": 251500
},
{
"epoch": 0.6,
"learning_rate": 1.60017611290265e-05,
"loss": 1.3768,
"step": 252000
},
{
"epoch": 0.6,
"learning_rate": 1.599382811539362e-05,
"loss": 1.3662,
"step": 252500
},
{
"epoch": 0.6,
"learning_rate": 1.5985895101760734e-05,
"loss": 1.3668,
"step": 253000
},
{
"epoch": 0.6,
"learning_rate": 1.597796208812785e-05,
"loss": 1.3642,
"step": 253500
},
{
"epoch": 0.6,
"learning_rate": 1.5970029074494966e-05,
"loss": 1.3711,
"step": 254000
},
{
"epoch": 0.61,
"learning_rate": 1.5962096060862082e-05,
"loss": 1.3713,
"step": 254500
},
{
"epoch": 0.61,
"learning_rate": 1.5954163047229198e-05,
"loss": 1.3604,
"step": 255000
},
{
"epoch": 0.61,
"learning_rate": 1.5946230033596314e-05,
"loss": 1.3728,
"step": 255500
},
{
"epoch": 0.61,
"learning_rate": 1.593829701996343e-05,
"loss": 1.3526,
"step": 256000
},
{
"epoch": 0.61,
"learning_rate": 1.5930364006330545e-05,
"loss": 1.3693,
"step": 256500
},
{
"epoch": 0.61,
"learning_rate": 1.592243099269766e-05,
"loss": 1.3555,
"step": 257000
},
{
"epoch": 0.61,
"learning_rate": 1.5914497979064777e-05,
"loss": 1.3622,
"step": 257500
},
{
"epoch": 0.61,
"learning_rate": 1.5906564965431893e-05,
"loss": 1.355,
"step": 258000
},
{
"epoch": 0.62,
"learning_rate": 1.5898631951799012e-05,
"loss": 1.3632,
"step": 258500
},
{
"epoch": 0.62,
"learning_rate": 1.5890698938166128e-05,
"loss": 1.3706,
"step": 259000
},
{
"epoch": 0.62,
"learning_rate": 1.5882765924533244e-05,
"loss": 1.3667,
"step": 259500
},
{
"epoch": 0.62,
"learning_rate": 1.587483291090036e-05,
"loss": 1.359,
"step": 260000
},
{
"epoch": 0.62,
"learning_rate": 1.5866899897267472e-05,
"loss": 1.3596,
"step": 260500
},
{
"epoch": 0.62,
"learning_rate": 1.585896688363459e-05,
"loss": 1.3723,
"step": 261000
},
{
"epoch": 0.62,
"learning_rate": 1.5851033870001707e-05,
"loss": 1.3712,
"step": 261500
},
{
"epoch": 0.62,
"learning_rate": 1.5843100856368823e-05,
"loss": 1.3611,
"step": 262000
},
{
"epoch": 0.62,
"learning_rate": 1.583516784273594e-05,
"loss": 1.3625,
"step": 262500
},
{
"epoch": 0.63,
"learning_rate": 1.5827234829103055e-05,
"loss": 1.3692,
"step": 263000
},
{
"epoch": 0.63,
"learning_rate": 1.581930181547017e-05,
"loss": 1.3352,
"step": 263500
},
{
"epoch": 0.63,
"learning_rate": 1.5811368801837287e-05,
"loss": 1.3738,
"step": 264000
},
{
"epoch": 0.63,
"learning_rate": 1.5803435788204403e-05,
"loss": 1.3665,
"step": 264500
},
{
"epoch": 0.63,
"learning_rate": 1.579550277457152e-05,
"loss": 1.3579,
"step": 265000
},
{
"epoch": 0.63,
"learning_rate": 1.5787569760938634e-05,
"loss": 1.38,
"step": 265500
},
{
"epoch": 0.63,
"learning_rate": 1.577963674730575e-05,
"loss": 1.3522,
"step": 266000
},
{
"epoch": 0.63,
"learning_rate": 1.5771703733672866e-05,
"loss": 1.3583,
"step": 266500
},
{
"epoch": 0.64,
"learning_rate": 1.5763770720039985e-05,
"loss": 1.3458,
"step": 267000
},
{
"epoch": 0.64,
"learning_rate": 1.57558377064071e-05,
"loss": 1.3676,
"step": 267500
},
{
"epoch": 0.64,
"learning_rate": 1.5747904692774217e-05,
"loss": 1.3463,
"step": 268000
},
{
"epoch": 0.64,
"learning_rate": 1.5739971679141333e-05,
"loss": 1.3685,
"step": 268500
},
{
"epoch": 0.64,
"learning_rate": 1.573203866550845e-05,
"loss": 1.3594,
"step": 269000
},
{
"epoch": 0.64,
"learning_rate": 1.5724105651875565e-05,
"loss": 1.355,
"step": 269500
},
{
"epoch": 0.64,
"learning_rate": 1.571617263824268e-05,
"loss": 1.3491,
"step": 270000
},
{
"epoch": 0.64,
"learning_rate": 1.5708239624609796e-05,
"loss": 1.3462,
"step": 270500
},
{
"epoch": 0.64,
"learning_rate": 1.5700306610976912e-05,
"loss": 1.3533,
"step": 271000
},
{
"epoch": 0.65,
"learning_rate": 1.5692373597344028e-05,
"loss": 1.3695,
"step": 271500
},
{
"epoch": 0.65,
"learning_rate": 1.5684440583711144e-05,
"loss": 1.3666,
"step": 272000
},
{
"epoch": 0.65,
"learning_rate": 1.567650757007826e-05,
"loss": 1.3446,
"step": 272500
},
{
"epoch": 0.65,
"learning_rate": 1.5668574556445376e-05,
"loss": 1.3747,
"step": 273000
},
{
"epoch": 0.65,
"learning_rate": 1.5660641542812495e-05,
"loss": 1.3456,
"step": 273500
},
{
"epoch": 0.65,
"learning_rate": 1.5652708529179607e-05,
"loss": 1.3613,
"step": 274000
},
{
"epoch": 0.65,
"learning_rate": 1.5644775515546723e-05,
"loss": 1.3537,
"step": 274500
},
{
"epoch": 0.65,
"learning_rate": 1.563684250191384e-05,
"loss": 1.3617,
"step": 275000
},
{
"epoch": 0.66,
"learning_rate": 1.5628909488280955e-05,
"loss": 1.3369,
"step": 275500
},
{
"epoch": 0.66,
"learning_rate": 1.5620976474648074e-05,
"loss": 1.3451,
"step": 276000
},
{
"epoch": 0.66,
"learning_rate": 1.561304346101519e-05,
"loss": 1.3559,
"step": 276500
},
{
"epoch": 0.66,
"learning_rate": 1.5605110447382306e-05,
"loss": 1.3487,
"step": 277000
},
{
"epoch": 0.66,
"learning_rate": 1.5597177433749422e-05,
"loss": 1.3382,
"step": 277500
},
{
"epoch": 0.66,
"learning_rate": 1.5589244420116538e-05,
"loss": 1.3394,
"step": 278000
},
{
"epoch": 0.66,
"learning_rate": 1.5581311406483654e-05,
"loss": 1.342,
"step": 278500
},
{
"epoch": 0.66,
"learning_rate": 1.557337839285077e-05,
"loss": 1.3467,
"step": 279000
},
{
"epoch": 0.67,
"learning_rate": 1.5565445379217885e-05,
"loss": 1.3409,
"step": 279500
},
{
"epoch": 0.67,
"learning_rate": 1.5557512365585e-05,
"loss": 1.355,
"step": 280000
},
{
"epoch": 0.67,
"learning_rate": 1.5549579351952117e-05,
"loss": 1.3444,
"step": 280500
},
{
"epoch": 0.67,
"learning_rate": 1.5541646338319233e-05,
"loss": 1.3609,
"step": 281000
},
{
"epoch": 0.67,
"learning_rate": 1.553371332468635e-05,
"loss": 1.3515,
"step": 281500
},
{
"epoch": 0.67,
"learning_rate": 1.5525780311053468e-05,
"loss": 1.3388,
"step": 282000
},
{
"epoch": 0.67,
"learning_rate": 1.5517847297420584e-05,
"loss": 1.3392,
"step": 282500
},
{
"epoch": 0.67,
"learning_rate": 1.5509914283787696e-05,
"loss": 1.3389,
"step": 283000
},
{
"epoch": 0.67,
"learning_rate": 1.5501981270154812e-05,
"loss": 1.3493,
"step": 283500
},
{
"epoch": 0.68,
"learning_rate": 1.5494048256521928e-05,
"loss": 1.3489,
"step": 284000
},
{
"epoch": 0.68,
"learning_rate": 1.5486115242889047e-05,
"loss": 1.3458,
"step": 284500
},
{
"epoch": 0.68,
"learning_rate": 1.5478182229256163e-05,
"loss": 1.346,
"step": 285000
},
{
"epoch": 0.68,
"learning_rate": 1.547024921562328e-05,
"loss": 1.3303,
"step": 285500
},
{
"epoch": 0.68,
"learning_rate": 1.5462316201990395e-05,
"loss": 1.3384,
"step": 286000
},
{
"epoch": 0.68,
"learning_rate": 1.545438318835751e-05,
"loss": 1.3537,
"step": 286500
},
{
"epoch": 0.68,
"learning_rate": 1.5446450174724627e-05,
"loss": 1.3511,
"step": 287000
},
{
"epoch": 0.68,
"learning_rate": 1.5438517161091743e-05,
"loss": 1.3256,
"step": 287500
},
{
"epoch": 0.69,
"learning_rate": 1.543058414745886e-05,
"loss": 1.3465,
"step": 288000
},
{
"epoch": 0.69,
"learning_rate": 1.5422651133825974e-05,
"loss": 1.3329,
"step": 288500
},
{
"epoch": 0.69,
"learning_rate": 1.541471812019309e-05,
"loss": 1.3549,
"step": 289000
},
{
"epoch": 0.69,
"learning_rate": 1.5406785106560206e-05,
"loss": 1.3458,
"step": 289500
},
{
"epoch": 0.69,
"learning_rate": 1.5398852092927322e-05,
"loss": 1.3359,
"step": 290000
},
{
"epoch": 0.69,
"learning_rate": 1.539091907929444e-05,
"loss": 1.3343,
"step": 290500
},
{
"epoch": 0.69,
"learning_rate": 1.5382986065661557e-05,
"loss": 1.3617,
"step": 291000
},
{
"epoch": 0.69,
"learning_rate": 1.5375053052028673e-05,
"loss": 1.3335,
"step": 291500
},
{
"epoch": 0.69,
"learning_rate": 1.5367120038395785e-05,
"loss": 1.3508,
"step": 292000
},
{
"epoch": 0.7,
"learning_rate": 1.53591870247629e-05,
"loss": 1.3492,
"step": 292500
},
{
"epoch": 0.7,
"learning_rate": 1.535125401113002e-05,
"loss": 1.3382,
"step": 293000
},
{
"epoch": 0.7,
"learning_rate": 1.5343320997497136e-05,
"loss": 1.3423,
"step": 293500
},
{
"epoch": 0.7,
"learning_rate": 1.5335387983864252e-05,
"loss": 1.3388,
"step": 294000
},
{
"epoch": 0.7,
"learning_rate": 1.5327454970231368e-05,
"loss": 1.3606,
"step": 294500
},
{
"epoch": 0.7,
"learning_rate": 1.5319521956598484e-05,
"loss": 1.3439,
"step": 295000
},
{
"epoch": 0.7,
"learning_rate": 1.53115889429656e-05,
"loss": 1.3292,
"step": 295500
},
{
"epoch": 0.7,
"learning_rate": 1.5303655929332716e-05,
"loss": 1.341,
"step": 296000
},
{
"epoch": 0.71,
"learning_rate": 1.529572291569983e-05,
"loss": 1.3289,
"step": 296500
},
{
"epoch": 0.71,
"learning_rate": 1.5287789902066947e-05,
"loss": 1.3151,
"step": 297000
},
{
"epoch": 0.71,
"learning_rate": 1.5279856888434063e-05,
"loss": 1.3513,
"step": 297500
},
{
"epoch": 0.71,
"learning_rate": 1.527192387480118e-05,
"loss": 1.3459,
"step": 298000
},
{
"epoch": 0.71,
"learning_rate": 1.5263990861168295e-05,
"loss": 1.3493,
"step": 298500
},
{
"epoch": 0.71,
"learning_rate": 1.5256057847535413e-05,
"loss": 1.331,
"step": 299000
},
{
"epoch": 0.71,
"learning_rate": 1.5248124833902528e-05,
"loss": 1.342,
"step": 299500
},
{
"epoch": 0.71,
"learning_rate": 1.5240191820269646e-05,
"loss": 1.3355,
"step": 300000
},
{
"epoch": 0.72,
"learning_rate": 1.523225880663676e-05,
"loss": 1.3206,
"step": 300500
},
{
"epoch": 0.72,
"learning_rate": 1.5224325793003876e-05,
"loss": 1.3463,
"step": 301000
},
{
"epoch": 0.72,
"learning_rate": 1.5216392779370992e-05,
"loss": 1.3243,
"step": 301500
},
{
"epoch": 0.72,
"learning_rate": 1.5208459765738108e-05,
"loss": 1.3299,
"step": 302000
},
{
"epoch": 0.72,
"learning_rate": 1.5200526752105225e-05,
"loss": 1.3552,
"step": 302500
},
{
"epoch": 0.72,
"learning_rate": 1.5192593738472341e-05,
"loss": 1.3497,
"step": 303000
},
{
"epoch": 0.72,
"learning_rate": 1.5184660724839457e-05,
"loss": 1.335,
"step": 303500
},
{
"epoch": 0.72,
"learning_rate": 1.5176727711206573e-05,
"loss": 1.3517,
"step": 304000
},
{
"epoch": 0.72,
"learning_rate": 1.516879469757369e-05,
"loss": 1.3291,
"step": 304500
},
{
"epoch": 0.73,
"learning_rate": 1.5160861683940805e-05,
"loss": 1.351,
"step": 305000
},
{
"epoch": 0.73,
"learning_rate": 1.515292867030792e-05,
"loss": 1.3385,
"step": 305500
},
{
"epoch": 0.73,
"learning_rate": 1.5144995656675036e-05,
"loss": 1.3322,
"step": 306000
},
{
"epoch": 0.73,
"learning_rate": 1.5137062643042152e-05,
"loss": 1.3398,
"step": 306500
},
{
"epoch": 0.73,
"learning_rate": 1.512912962940927e-05,
"loss": 1.3344,
"step": 307000
},
{
"epoch": 0.73,
"learning_rate": 1.5121196615776386e-05,
"loss": 1.3396,
"step": 307500
},
{
"epoch": 0.73,
"learning_rate": 1.5113263602143501e-05,
"loss": 1.3425,
"step": 308000
},
{
"epoch": 0.73,
"learning_rate": 1.5105330588510619e-05,
"loss": 1.3207,
"step": 308500
},
{
"epoch": 0.74,
"learning_rate": 1.5097397574877735e-05,
"loss": 1.3348,
"step": 309000
},
{
"epoch": 0.74,
"learning_rate": 1.5089464561244849e-05,
"loss": 1.3415,
"step": 309500
},
{
"epoch": 0.74,
"learning_rate": 1.5081531547611965e-05,
"loss": 1.3427,
"step": 310000
},
{
"epoch": 0.74,
"learning_rate": 1.507359853397908e-05,
"loss": 1.3465,
"step": 310500
},
{
"epoch": 0.74,
"learning_rate": 1.5065665520346198e-05,
"loss": 1.329,
"step": 311000
},
{
"epoch": 0.74,
"learning_rate": 1.5057732506713314e-05,
"loss": 1.3409,
"step": 311500
},
{
"epoch": 0.74,
"learning_rate": 1.504979949308043e-05,
"loss": 1.3252,
"step": 312000
},
{
"epoch": 0.74,
"learning_rate": 1.5041866479447546e-05,
"loss": 1.3378,
"step": 312500
},
{
"epoch": 0.74,
"learning_rate": 1.5033933465814663e-05,
"loss": 1.3381,
"step": 313000
},
{
"epoch": 0.75,
"learning_rate": 1.502600045218178e-05,
"loss": 1.3467,
"step": 313500
},
{
"epoch": 0.75,
"learning_rate": 1.5018067438548894e-05,
"loss": 1.3331,
"step": 314000
},
{
"epoch": 0.75,
"learning_rate": 1.501013442491601e-05,
"loss": 1.3487,
"step": 314500
},
{
"epoch": 0.75,
"learning_rate": 1.5002201411283125e-05,
"loss": 1.3421,
"step": 315000
},
{
"epoch": 0.75,
"learning_rate": 1.4994268397650243e-05,
"loss": 1.3232,
"step": 315500
},
{
"epoch": 0.75,
"learning_rate": 1.4986335384017359e-05,
"loss": 1.3304,
"step": 316000
},
{
"epoch": 0.75,
"learning_rate": 1.4978402370384475e-05,
"loss": 1.3374,
"step": 316500
},
{
"epoch": 0.75,
"learning_rate": 1.4970469356751592e-05,
"loss": 1.333,
"step": 317000
},
{
"epoch": 0.76,
"learning_rate": 1.4962536343118708e-05,
"loss": 1.3221,
"step": 317500
},
{
"epoch": 0.76,
"learning_rate": 1.4954603329485824e-05,
"loss": 1.3317,
"step": 318000
},
{
"epoch": 0.76,
"learning_rate": 1.4946670315852938e-05,
"loss": 1.3269,
"step": 318500
},
{
"epoch": 0.76,
"learning_rate": 1.4938737302220054e-05,
"loss": 1.318,
"step": 319000
},
{
"epoch": 0.76,
"learning_rate": 1.4930804288587171e-05,
"loss": 1.3188,
"step": 319500
},
{
"epoch": 0.76,
"learning_rate": 1.4922871274954287e-05,
"loss": 1.3333,
"step": 320000
},
{
"epoch": 0.76,
"learning_rate": 1.4914938261321403e-05,
"loss": 1.3207,
"step": 320500
},
{
"epoch": 0.76,
"learning_rate": 1.4907005247688519e-05,
"loss": 1.3166,
"step": 321000
},
{
"epoch": 0.77,
"learning_rate": 1.4899072234055637e-05,
"loss": 1.3143,
"step": 321500
},
{
"epoch": 0.77,
"learning_rate": 1.4891139220422752e-05,
"loss": 1.3376,
"step": 322000
},
{
"epoch": 0.77,
"learning_rate": 1.4883206206789868e-05,
"loss": 1.3167,
"step": 322500
},
{
"epoch": 0.77,
"learning_rate": 1.4875273193156982e-05,
"loss": 1.3148,
"step": 323000
},
{
"epoch": 0.77,
"learning_rate": 1.4867340179524098e-05,
"loss": 1.3274,
"step": 323500
},
{
"epoch": 0.77,
"learning_rate": 1.4859407165891216e-05,
"loss": 1.3247,
"step": 324000
},
{
"epoch": 0.77,
"learning_rate": 1.4851474152258332e-05,
"loss": 1.3306,
"step": 324500
},
{
"epoch": 0.77,
"learning_rate": 1.4843541138625448e-05,
"loss": 1.3114,
"step": 325000
},
{
"epoch": 0.77,
"learning_rate": 1.4835608124992564e-05,
"loss": 1.342,
"step": 325500
},
{
"epoch": 0.78,
"learning_rate": 1.4827675111359681e-05,
"loss": 1.3312,
"step": 326000
},
{
"epoch": 0.78,
"learning_rate": 1.4819742097726797e-05,
"loss": 1.3318,
"step": 326500
},
{
"epoch": 0.78,
"learning_rate": 1.4811809084093913e-05,
"loss": 1.328,
"step": 327000
},
{
"epoch": 0.78,
"learning_rate": 1.4803876070461027e-05,
"loss": 1.3375,
"step": 327500
},
{
"epoch": 0.78,
"learning_rate": 1.4795943056828145e-05,
"loss": 1.3197,
"step": 328000
},
{
"epoch": 0.78,
"learning_rate": 1.478801004319526e-05,
"loss": 1.3103,
"step": 328500
},
{
"epoch": 0.78,
"learning_rate": 1.4780077029562376e-05,
"loss": 1.3349,
"step": 329000
},
{
"epoch": 0.78,
"learning_rate": 1.4772144015929492e-05,
"loss": 1.3162,
"step": 329500
},
{
"epoch": 0.79,
"learning_rate": 1.476421100229661e-05,
"loss": 1.3056,
"step": 330000
},
{
"epoch": 0.79,
"learning_rate": 1.4756277988663726e-05,
"loss": 1.3276,
"step": 330500
},
{
"epoch": 0.79,
"learning_rate": 1.4748344975030841e-05,
"loss": 1.3215,
"step": 331000
},
{
"epoch": 0.79,
"learning_rate": 1.4740411961397957e-05,
"loss": 1.3295,
"step": 331500
},
{
"epoch": 0.79,
"learning_rate": 1.4732478947765071e-05,
"loss": 1.3234,
"step": 332000
},
{
"epoch": 0.79,
"learning_rate": 1.4724545934132189e-05,
"loss": 1.3274,
"step": 332500
},
{
"epoch": 0.79,
"learning_rate": 1.4716612920499305e-05,
"loss": 1.3364,
"step": 333000
},
{
"epoch": 0.79,
"learning_rate": 1.470867990686642e-05,
"loss": 1.3036,
"step": 333500
},
{
"epoch": 0.79,
"learning_rate": 1.4700746893233537e-05,
"loss": 1.3256,
"step": 334000
},
{
"epoch": 0.8,
"learning_rate": 1.4692813879600654e-05,
"loss": 1.3096,
"step": 334500
},
{
"epoch": 0.8,
"learning_rate": 1.468488086596777e-05,
"loss": 1.339,
"step": 335000
},
{
"epoch": 0.8,
"learning_rate": 1.4676947852334886e-05,
"loss": 1.328,
"step": 335500
},
{
"epoch": 0.8,
"learning_rate": 1.4669014838702003e-05,
"loss": 1.3228,
"step": 336000
},
{
"epoch": 0.8,
"learning_rate": 1.4661081825069116e-05,
"loss": 1.3213,
"step": 336500
},
{
"epoch": 0.8,
"learning_rate": 1.4653148811436233e-05,
"loss": 1.332,
"step": 337000
},
{
"epoch": 0.8,
"learning_rate": 1.464521579780335e-05,
"loss": 1.3262,
"step": 337500
},
{
"epoch": 0.8,
"learning_rate": 1.4637282784170465e-05,
"loss": 1.3213,
"step": 338000
},
{
"epoch": 0.81,
"learning_rate": 1.4629349770537583e-05,
"loss": 1.3289,
"step": 338500
},
{
"epoch": 0.81,
"learning_rate": 1.4621416756904699e-05,
"loss": 1.3137,
"step": 339000
},
{
"epoch": 0.81,
"learning_rate": 1.4613483743271814e-05,
"loss": 1.3191,
"step": 339500
},
{
"epoch": 0.81,
"learning_rate": 1.460555072963893e-05,
"loss": 1.3242,
"step": 340000
},
{
"epoch": 0.81,
"learning_rate": 1.4597617716006045e-05,
"loss": 1.3244,
"step": 340500
},
{
"epoch": 0.81,
"learning_rate": 1.4589684702373162e-05,
"loss": 1.3339,
"step": 341000
},
{
"epoch": 0.81,
"learning_rate": 1.4581751688740278e-05,
"loss": 1.3387,
"step": 341500
},
{
"epoch": 0.81,
"learning_rate": 1.4573818675107394e-05,
"loss": 1.317,
"step": 342000
},
{
"epoch": 0.82,
"learning_rate": 1.456588566147451e-05,
"loss": 1.3094,
"step": 342500
},
{
"epoch": 0.82,
"learning_rate": 1.4557952647841627e-05,
"loss": 1.322,
"step": 343000
},
{
"epoch": 0.82,
"learning_rate": 1.4550019634208743e-05,
"loss": 1.3226,
"step": 343500
},
{
"epoch": 0.82,
"learning_rate": 1.4542086620575859e-05,
"loss": 1.3209,
"step": 344000
},
{
"epoch": 0.82,
"learning_rate": 1.4534153606942975e-05,
"loss": 1.3332,
"step": 344500
},
{
"epoch": 0.82,
"learning_rate": 1.4526220593310089e-05,
"loss": 1.3088,
"step": 345000
},
{
"epoch": 0.82,
"learning_rate": 1.4518287579677207e-05,
"loss": 1.3244,
"step": 345500
},
{
"epoch": 0.82,
"learning_rate": 1.4510354566044322e-05,
"loss": 1.323,
"step": 346000
},
{
"epoch": 0.82,
"learning_rate": 1.4502421552411438e-05,
"loss": 1.3268,
"step": 346500
},
{
"epoch": 0.83,
"learning_rate": 1.4494488538778554e-05,
"loss": 1.3168,
"step": 347000
},
{
"epoch": 0.83,
"learning_rate": 1.4486555525145672e-05,
"loss": 1.3126,
"step": 347500
},
{
"epoch": 0.83,
"learning_rate": 1.4478622511512788e-05,
"loss": 1.3337,
"step": 348000
},
{
"epoch": 0.83,
"learning_rate": 1.4470689497879903e-05,
"loss": 1.3058,
"step": 348500
},
{
"epoch": 0.83,
"learning_rate": 1.4462756484247021e-05,
"loss": 1.3144,
"step": 349000
},
{
"epoch": 0.83,
"learning_rate": 1.4454823470614135e-05,
"loss": 1.2985,
"step": 349500
},
{
"epoch": 0.83,
"learning_rate": 1.4446890456981251e-05,
"loss": 1.3195,
"step": 350000
},
{
"epoch": 0.83,
"learning_rate": 1.4438957443348367e-05,
"loss": 1.3055,
"step": 350500
},
{
"epoch": 0.84,
"learning_rate": 1.4431024429715483e-05,
"loss": 1.3234,
"step": 351000
},
{
"epoch": 0.84,
"learning_rate": 1.44230914160826e-05,
"loss": 1.314,
"step": 351500
},
{
"epoch": 0.84,
"learning_rate": 1.4415158402449716e-05,
"loss": 1.3196,
"step": 352000
},
{
"epoch": 0.84,
"learning_rate": 1.4407225388816832e-05,
"loss": 1.3052,
"step": 352500
},
{
"epoch": 0.84,
"learning_rate": 1.4399292375183948e-05,
"loss": 1.3182,
"step": 353000
},
{
"epoch": 0.84,
"learning_rate": 1.4391359361551065e-05,
"loss": 1.3213,
"step": 353500
},
{
"epoch": 0.84,
"learning_rate": 1.438342634791818e-05,
"loss": 1.2952,
"step": 354000
},
{
"epoch": 0.84,
"learning_rate": 1.4375493334285296e-05,
"loss": 1.3257,
"step": 354500
},
{
"epoch": 0.84,
"learning_rate": 1.4367560320652411e-05,
"loss": 1.3322,
"step": 355000
},
{
"epoch": 0.85,
"learning_rate": 1.4359627307019527e-05,
"loss": 1.3058,
"step": 355500
},
{
"epoch": 0.85,
"learning_rate": 1.4351694293386645e-05,
"loss": 1.2913,
"step": 356000
},
{
"epoch": 0.85,
"learning_rate": 1.434376127975376e-05,
"loss": 1.3154,
"step": 356500
},
{
"epoch": 0.85,
"learning_rate": 1.4335828266120877e-05,
"loss": 1.3069,
"step": 357000
},
{
"epoch": 0.85,
"learning_rate": 1.4327895252487994e-05,
"loss": 1.3171,
"step": 357500
},
{
"epoch": 0.85,
"learning_rate": 1.431996223885511e-05,
"loss": 1.3136,
"step": 358000
},
{
"epoch": 0.85,
"learning_rate": 1.4312029225222224e-05,
"loss": 1.3125,
"step": 358500
},
{
"epoch": 0.85,
"learning_rate": 1.430409621158934e-05,
"loss": 1.3019,
"step": 359000
},
{
"epoch": 0.86,
"learning_rate": 1.4296163197956456e-05,
"loss": 1.3075,
"step": 359500
},
{
"epoch": 0.86,
"learning_rate": 1.4288230184323573e-05,
"loss": 1.2988,
"step": 360000
},
{
"epoch": 0.86,
"learning_rate": 1.428029717069069e-05,
"loss": 1.307,
"step": 360500
},
{
"epoch": 0.86,
"learning_rate": 1.4272364157057805e-05,
"loss": 1.3014,
"step": 361000
},
{
"epoch": 0.86,
"learning_rate": 1.4264431143424921e-05,
"loss": 1.2961,
"step": 361500
},
{
"epoch": 0.86,
"learning_rate": 1.4256498129792039e-05,
"loss": 1.293,
"step": 362000
},
{
"epoch": 0.86,
"learning_rate": 1.4248565116159154e-05,
"loss": 1.3178,
"step": 362500
},
{
"epoch": 0.86,
"learning_rate": 1.4240632102526269e-05,
"loss": 1.3029,
"step": 363000
},
{
"epoch": 0.87,
"learning_rate": 1.4232699088893384e-05,
"loss": 1.2999,
"step": 363500
},
{
"epoch": 0.87,
"learning_rate": 1.42247660752605e-05,
"loss": 1.312,
"step": 364000
},
{
"epoch": 0.87,
"learning_rate": 1.4216833061627618e-05,
"loss": 1.3113,
"step": 364500
},
{
"epoch": 0.87,
"learning_rate": 1.4208900047994734e-05,
"loss": 1.3054,
"step": 365000
},
{
"epoch": 0.87,
"learning_rate": 1.420096703436185e-05,
"loss": 1.3112,
"step": 365500
},
{
"epoch": 0.87,
"learning_rate": 1.4193034020728966e-05,
"loss": 1.3033,
"step": 366000
},
{
"epoch": 0.87,
"learning_rate": 1.4185101007096083e-05,
"loss": 1.3126,
"step": 366500
},
{
"epoch": 0.87,
"learning_rate": 1.4177167993463199e-05,
"loss": 1.2953,
"step": 367000
},
{
"epoch": 0.87,
"learning_rate": 1.4169234979830313e-05,
"loss": 1.3032,
"step": 367500
},
{
"epoch": 0.88,
"learning_rate": 1.4161301966197429e-05,
"loss": 1.3037,
"step": 368000
},
{
"epoch": 0.88,
"learning_rate": 1.4153368952564547e-05,
"loss": 1.3037,
"step": 368500
},
{
"epoch": 0.88,
"learning_rate": 1.4145435938931662e-05,
"loss": 1.3157,
"step": 369000
},
{
"epoch": 0.88,
"learning_rate": 1.4137502925298778e-05,
"loss": 1.2962,
"step": 369500
},
{
"epoch": 0.88,
"learning_rate": 1.4129569911665894e-05,
"loss": 1.3041,
"step": 370000
},
{
"epoch": 0.88,
"learning_rate": 1.4121636898033012e-05,
"loss": 1.3162,
"step": 370500
},
{
"epoch": 0.88,
"learning_rate": 1.4113703884400128e-05,
"loss": 1.3037,
"step": 371000
},
{
"epoch": 0.88,
"learning_rate": 1.4105770870767243e-05,
"loss": 1.3072,
"step": 371500
},
{
"epoch": 0.89,
"learning_rate": 1.4097837857134358e-05,
"loss": 1.2938,
"step": 372000
},
{
"epoch": 0.89,
"learning_rate": 1.4089904843501473e-05,
"loss": 1.3014,
"step": 372500
},
{
"epoch": 0.89,
"learning_rate": 1.4081971829868591e-05,
"loss": 1.3023,
"step": 373000
},
{
"epoch": 0.89,
"learning_rate": 1.4074038816235707e-05,
"loss": 1.3017,
"step": 373500
},
{
"epoch": 0.89,
"learning_rate": 1.4066105802602823e-05,
"loss": 1.3143,
"step": 374000
},
{
"epoch": 0.89,
"learning_rate": 1.4058172788969939e-05,
"loss": 1.31,
"step": 374500
},
{
"epoch": 0.89,
"learning_rate": 1.4050239775337056e-05,
"loss": 1.3034,
"step": 375000
},
{
"epoch": 0.89,
"learning_rate": 1.4042306761704172e-05,
"loss": 1.3064,
"step": 375500
},
{
"epoch": 0.89,
"learning_rate": 1.4034373748071288e-05,
"loss": 1.3006,
"step": 376000
},
{
"epoch": 0.9,
"learning_rate": 1.4026440734438402e-05,
"loss": 1.3022,
"step": 376500
},
{
"epoch": 0.9,
"learning_rate": 1.4018507720805518e-05,
"loss": 1.2904,
"step": 377000
},
{
"epoch": 0.9,
"learning_rate": 1.4010574707172635e-05,
"loss": 1.3114,
"step": 377500
},
{
"epoch": 0.9,
"learning_rate": 1.4002641693539751e-05,
"loss": 1.3129,
"step": 378000
},
{
"epoch": 0.9,
"learning_rate": 1.3994708679906867e-05,
"loss": 1.3128,
"step": 378500
},
{
"epoch": 0.9,
"learning_rate": 1.3986775666273985e-05,
"loss": 1.3276,
"step": 379000
},
{
"epoch": 0.9,
"learning_rate": 1.39788426526411e-05,
"loss": 1.3104,
"step": 379500
},
{
"epoch": 0.9,
"learning_rate": 1.3970909639008216e-05,
"loss": 1.3133,
"step": 380000
},
{
"epoch": 0.91,
"learning_rate": 1.3962976625375332e-05,
"loss": 1.3067,
"step": 380500
},
{
"epoch": 0.91,
"learning_rate": 1.3955043611742447e-05,
"loss": 1.288,
"step": 381000
},
{
"epoch": 0.91,
"learning_rate": 1.3947110598109564e-05,
"loss": 1.2915,
"step": 381500
},
{
"epoch": 0.91,
"learning_rate": 1.393917758447668e-05,
"loss": 1.3016,
"step": 382000
},
{
"epoch": 0.91,
"learning_rate": 1.3931244570843796e-05,
"loss": 1.3032,
"step": 382500
},
{
"epoch": 0.91,
"learning_rate": 1.3923311557210912e-05,
"loss": 1.3005,
"step": 383000
},
{
"epoch": 0.91,
"learning_rate": 1.391537854357803e-05,
"loss": 1.2996,
"step": 383500
},
{
"epoch": 0.91,
"learning_rate": 1.3907445529945145e-05,
"loss": 1.2896,
"step": 384000
},
{
"epoch": 0.92,
"learning_rate": 1.3899512516312261e-05,
"loss": 1.3101,
"step": 384500
},
{
"epoch": 0.92,
"learning_rate": 1.3891579502679375e-05,
"loss": 1.2886,
"step": 385000
},
{
"epoch": 0.92,
"learning_rate": 1.3883646489046491e-05,
"loss": 1.295,
"step": 385500
},
{
"epoch": 0.92,
"learning_rate": 1.3875713475413609e-05,
"loss": 1.3001,
"step": 386000
},
{
"epoch": 0.92,
"learning_rate": 1.3867780461780724e-05,
"loss": 1.3173,
"step": 386500
},
{
"epoch": 0.92,
"learning_rate": 1.385984744814784e-05,
"loss": 1.3049,
"step": 387000
},
{
"epoch": 0.92,
"learning_rate": 1.3851914434514956e-05,
"loss": 1.2951,
"step": 387500
},
{
"epoch": 0.92,
"learning_rate": 1.3843981420882074e-05,
"loss": 1.3051,
"step": 388000
},
{
"epoch": 0.92,
"learning_rate": 1.383604840724919e-05,
"loss": 1.2801,
"step": 388500
},
{
"epoch": 0.93,
"learning_rate": 1.3828115393616305e-05,
"loss": 1.2945,
"step": 389000
},
{
"epoch": 0.93,
"learning_rate": 1.382018237998342e-05,
"loss": 1.3094,
"step": 389500
},
{
"epoch": 0.93,
"learning_rate": 1.3812249366350537e-05,
"loss": 1.2932,
"step": 390000
},
{
"epoch": 0.93,
"learning_rate": 1.3804316352717653e-05,
"loss": 1.3115,
"step": 390500
},
{
"epoch": 0.93,
"learning_rate": 1.3796383339084769e-05,
"loss": 1.2897,
"step": 391000
},
{
"epoch": 0.93,
"learning_rate": 1.3788450325451885e-05,
"loss": 1.3055,
"step": 391500
},
{
"epoch": 0.93,
"learning_rate": 1.3780517311819002e-05,
"loss": 1.2972,
"step": 392000
},
{
"epoch": 0.93,
"learning_rate": 1.3772584298186118e-05,
"loss": 1.306,
"step": 392500
},
{
"epoch": 0.94,
"learning_rate": 1.3764651284553234e-05,
"loss": 1.2972,
"step": 393000
},
{
"epoch": 0.94,
"learning_rate": 1.375671827092035e-05,
"loss": 1.3171,
"step": 393500
},
{
"epoch": 0.94,
"learning_rate": 1.3748785257287464e-05,
"loss": 1.2894,
"step": 394000
},
{
"epoch": 0.94,
"learning_rate": 1.3740852243654582e-05,
"loss": 1.2987,
"step": 394500
},
{
"epoch": 0.94,
"learning_rate": 1.3732919230021698e-05,
"loss": 1.3164,
"step": 395000
},
{
"epoch": 0.94,
"learning_rate": 1.3724986216388813e-05,
"loss": 1.2989,
"step": 395500
},
{
"epoch": 0.94,
"learning_rate": 1.371705320275593e-05,
"loss": 1.2899,
"step": 396000
},
{
"epoch": 0.94,
"learning_rate": 1.3709120189123047e-05,
"loss": 1.2895,
"step": 396500
},
{
"epoch": 0.94,
"learning_rate": 1.3701187175490163e-05,
"loss": 1.3058,
"step": 397000
},
{
"epoch": 0.95,
"learning_rate": 1.3693254161857279e-05,
"loss": 1.3147,
"step": 397500
},
{
"epoch": 0.95,
"learning_rate": 1.3685321148224396e-05,
"loss": 1.3051,
"step": 398000
},
{
"epoch": 0.95,
"learning_rate": 1.3677388134591509e-05,
"loss": 1.3063,
"step": 398500
},
{
"epoch": 0.95,
"learning_rate": 1.3669455120958626e-05,
"loss": 1.2834,
"step": 399000
},
{
"epoch": 0.95,
"learning_rate": 1.3661522107325742e-05,
"loss": 1.3036,
"step": 399500
},
{
"epoch": 0.95,
"learning_rate": 1.3653589093692858e-05,
"loss": 1.2919,
"step": 400000
},
{
"epoch": 0.95,
"learning_rate": 1.3645656080059975e-05,
"loss": 1.2906,
"step": 400500
},
{
"epoch": 0.95,
"learning_rate": 1.3637723066427091e-05,
"loss": 1.3035,
"step": 401000
},
{
"epoch": 0.96,
"learning_rate": 1.3629790052794207e-05,
"loss": 1.3112,
"step": 401500
},
{
"epoch": 0.96,
"learning_rate": 1.3621857039161323e-05,
"loss": 1.3068,
"step": 402000
},
{
"epoch": 0.96,
"learning_rate": 1.361392402552844e-05,
"loss": 1.2963,
"step": 402500
},
{
"epoch": 0.96,
"learning_rate": 1.3605991011895555e-05,
"loss": 1.283,
"step": 403000
},
{
"epoch": 0.96,
"learning_rate": 1.359805799826267e-05,
"loss": 1.2828,
"step": 403500
},
{
"epoch": 0.96,
"learning_rate": 1.3590124984629786e-05,
"loss": 1.306,
"step": 404000
},
{
"epoch": 0.96,
"learning_rate": 1.3582191970996902e-05,
"loss": 1.2986,
"step": 404500
},
{
"epoch": 0.96,
"learning_rate": 1.357425895736402e-05,
"loss": 1.2872,
"step": 405000
},
{
"epoch": 0.97,
"learning_rate": 1.3566325943731136e-05,
"loss": 1.2923,
"step": 405500
},
{
"epoch": 0.97,
"learning_rate": 1.3558392930098252e-05,
"loss": 1.2967,
"step": 406000
},
{
"epoch": 0.97,
"learning_rate": 1.3550459916465367e-05,
"loss": 1.2779,
"step": 406500
},
{
"epoch": 0.97,
"learning_rate": 1.3542526902832485e-05,
"loss": 1.2828,
"step": 407000
},
{
"epoch": 0.97,
"learning_rate": 1.35345938891996e-05,
"loss": 1.2953,
"step": 407500
},
{
"epoch": 0.97,
"learning_rate": 1.3526660875566715e-05,
"loss": 1.2748,
"step": 408000
},
{
"epoch": 0.97,
"learning_rate": 1.3518727861933831e-05,
"loss": 1.2979,
"step": 408500
},
{
"epoch": 0.97,
"learning_rate": 1.3510794848300949e-05,
"loss": 1.2942,
"step": 409000
},
{
"epoch": 0.97,
"learning_rate": 1.3502861834668064e-05,
"loss": 1.3034,
"step": 409500
},
{
"epoch": 0.98,
"learning_rate": 1.349492882103518e-05,
"loss": 1.3038,
"step": 410000
},
{
"epoch": 0.98,
"learning_rate": 1.3486995807402296e-05,
"loss": 1.3012,
"step": 410500
},
{
"epoch": 0.98,
"learning_rate": 1.3479062793769414e-05,
"loss": 1.307,
"step": 411000
},
{
"epoch": 0.98,
"learning_rate": 1.347112978013653e-05,
"loss": 1.296,
"step": 411500
},
{
"epoch": 0.98,
"learning_rate": 1.3463196766503644e-05,
"loss": 1.3014,
"step": 412000
},
{
"epoch": 0.98,
"learning_rate": 1.345526375287076e-05,
"loss": 1.2976,
"step": 412500
},
{
"epoch": 0.98,
"learning_rate": 1.3447330739237875e-05,
"loss": 1.3056,
"step": 413000
},
{
"epoch": 0.98,
"learning_rate": 1.3439397725604993e-05,
"loss": 1.2974,
"step": 413500
},
{
"epoch": 0.99,
"learning_rate": 1.3431464711972109e-05,
"loss": 1.287,
"step": 414000
},
{
"epoch": 0.99,
"learning_rate": 1.3423531698339225e-05,
"loss": 1.2929,
"step": 414500
},
{
"epoch": 0.99,
"learning_rate": 1.341559868470634e-05,
"loss": 1.2903,
"step": 415000
},
{
"epoch": 0.99,
"learning_rate": 1.3407665671073458e-05,
"loss": 1.2967,
"step": 415500
},
{
"epoch": 0.99,
"learning_rate": 1.3399732657440574e-05,
"loss": 1.2881,
"step": 416000
},
{
"epoch": 0.99,
"learning_rate": 1.3391799643807688e-05,
"loss": 1.2844,
"step": 416500
},
{
"epoch": 0.99,
"learning_rate": 1.3383866630174804e-05,
"loss": 1.2803,
"step": 417000
},
{
"epoch": 0.99,
"learning_rate": 1.337593361654192e-05,
"loss": 1.2975,
"step": 417500
},
{
"epoch": 0.99,
"learning_rate": 1.3368000602909037e-05,
"loss": 1.2977,
"step": 418000
},
{
"epoch": 1.0,
"learning_rate": 1.3360067589276153e-05,
"loss": 1.2968,
"step": 418500
},
{
"epoch": 1.0,
"learning_rate": 1.335213457564327e-05,
"loss": 1.2808,
"step": 419000
},
{
"epoch": 1.0,
"learning_rate": 1.3344201562010387e-05,
"loss": 1.291,
"step": 419500
},
{
"epoch": 1.0,
"learning_rate": 1.3336268548377503e-05,
"loss": 1.2857,
"step": 420000
},
{
"epoch": 1.0,
"eval_loss": 1.2632273435592651,
"eval_runtime": 3622.7986,
"eval_samples_per_second": 366.442,
"eval_steps_per_second": 22.903,
"step": 420185
},
{
"epoch": 1.0,
"learning_rate": 1.3328335534744618e-05,
"loss": 1.2928,
"step": 420500
},
{
"epoch": 1.0,
"learning_rate": 1.3320402521111733e-05,
"loss": 1.274,
"step": 421000
},
{
"epoch": 1.0,
"learning_rate": 1.3312469507478849e-05,
"loss": 1.2829,
"step": 421500
},
{
"epoch": 1.0,
"learning_rate": 1.3304536493845966e-05,
"loss": 1.2867,
"step": 422000
},
{
"epoch": 1.01,
"learning_rate": 1.3296603480213082e-05,
"loss": 1.2886,
"step": 422500
},
{
"epoch": 1.01,
"learning_rate": 1.3288670466580198e-05,
"loss": 1.2908,
"step": 423000
},
{
"epoch": 1.01,
"learning_rate": 1.3280737452947314e-05,
"loss": 1.2776,
"step": 423500
},
{
"epoch": 1.01,
"learning_rate": 1.3272804439314431e-05,
"loss": 1.2855,
"step": 424000
},
{
"epoch": 1.01,
"learning_rate": 1.3264871425681547e-05,
"loss": 1.2843,
"step": 424500
},
{
"epoch": 1.01,
"learning_rate": 1.3256938412048663e-05,
"loss": 1.276,
"step": 425000
},
{
"epoch": 1.01,
"learning_rate": 1.3249005398415777e-05,
"loss": 1.2771,
"step": 425500
},
{
"epoch": 1.01,
"learning_rate": 1.3241072384782893e-05,
"loss": 1.291,
"step": 426000
},
{
"epoch": 1.02,
"learning_rate": 1.323313937115001e-05,
"loss": 1.2731,
"step": 426500
},
{
"epoch": 1.02,
"learning_rate": 1.3225206357517126e-05,
"loss": 1.2979,
"step": 427000
},
{
"epoch": 1.02,
"learning_rate": 1.3217273343884242e-05,
"loss": 1.3006,
"step": 427500
},
{
"epoch": 1.02,
"learning_rate": 1.3209340330251358e-05,
"loss": 1.2872,
"step": 428000
},
{
"epoch": 1.02,
"learning_rate": 1.3201407316618476e-05,
"loss": 1.2708,
"step": 428500
},
{
"epoch": 1.02,
"learning_rate": 1.3193474302985592e-05,
"loss": 1.2794,
"step": 429000
},
{
"epoch": 1.02,
"learning_rate": 1.3185541289352707e-05,
"loss": 1.2916,
"step": 429500
},
{
"epoch": 1.02,
"learning_rate": 1.3177608275719822e-05,
"loss": 1.2796,
"step": 430000
},
{
"epoch": 1.02,
"learning_rate": 1.316967526208694e-05,
"loss": 1.2867,
"step": 430500
},
{
"epoch": 1.03,
"learning_rate": 1.3161742248454055e-05,
"loss": 1.2937,
"step": 431000
},
{
"epoch": 1.03,
"learning_rate": 1.3153809234821171e-05,
"loss": 1.2805,
"step": 431500
},
{
"epoch": 1.03,
"learning_rate": 1.3145876221188287e-05,
"loss": 1.2908,
"step": 432000
},
{
"epoch": 1.03,
"learning_rate": 1.3137943207555404e-05,
"loss": 1.2829,
"step": 432500
},
{
"epoch": 1.03,
"learning_rate": 1.313001019392252e-05,
"loss": 1.2626,
"step": 433000
},
{
"epoch": 1.03,
"learning_rate": 1.3122077180289636e-05,
"loss": 1.2799,
"step": 433500
},
{
"epoch": 1.03,
"learning_rate": 1.311414416665675e-05,
"loss": 1.282,
"step": 434000
},
{
"epoch": 1.03,
"learning_rate": 1.3106211153023866e-05,
"loss": 1.2944,
"step": 434500
},
{
"epoch": 1.04,
"learning_rate": 1.3098278139390984e-05,
"loss": 1.2696,
"step": 435000
},
{
"epoch": 1.04,
"learning_rate": 1.30903451257581e-05,
"loss": 1.2904,
"step": 435500
},
{
"epoch": 1.04,
"learning_rate": 1.3082412112125215e-05,
"loss": 1.2845,
"step": 436000
},
{
"epoch": 1.04,
"learning_rate": 1.3074479098492331e-05,
"loss": 1.2837,
"step": 436500
},
{
"epoch": 1.04,
"learning_rate": 1.3066546084859449e-05,
"loss": 1.2837,
"step": 437000
},
{
"epoch": 1.04,
"learning_rate": 1.3058613071226565e-05,
"loss": 1.2696,
"step": 437500
},
{
"epoch": 1.04,
"learning_rate": 1.305068005759368e-05,
"loss": 1.2887,
"step": 438000
},
{
"epoch": 1.04,
"learning_rate": 1.3042747043960795e-05,
"loss": 1.2719,
"step": 438500
},
{
"epoch": 1.04,
"learning_rate": 1.303481403032791e-05,
"loss": 1.2717,
"step": 439000
},
{
"epoch": 1.05,
"learning_rate": 1.3026881016695028e-05,
"loss": 1.2922,
"step": 439500
},
{
"epoch": 1.05,
"learning_rate": 1.3018948003062144e-05,
"loss": 1.2643,
"step": 440000
},
{
"epoch": 1.05,
"learning_rate": 1.301101498942926e-05,
"loss": 1.2637,
"step": 440500
},
{
"epoch": 1.05,
"learning_rate": 1.3003081975796377e-05,
"loss": 1.2732,
"step": 441000
},
{
"epoch": 1.05,
"learning_rate": 1.2995148962163493e-05,
"loss": 1.2595,
"step": 441500
},
{
"epoch": 1.05,
"learning_rate": 1.2987215948530609e-05,
"loss": 1.2757,
"step": 442000
},
{
"epoch": 1.05,
"learning_rate": 1.2979282934897725e-05,
"loss": 1.3007,
"step": 442500
},
{
"epoch": 1.05,
"learning_rate": 1.297134992126484e-05,
"loss": 1.2722,
"step": 443000
},
{
"epoch": 1.06,
"learning_rate": 1.2963416907631957e-05,
"loss": 1.2739,
"step": 443500
},
{
"epoch": 1.06,
"learning_rate": 1.2955483893999073e-05,
"loss": 1.267,
"step": 444000
},
{
"epoch": 1.06,
"learning_rate": 1.2947550880366188e-05,
"loss": 1.2832,
"step": 444500
},
{
"epoch": 1.06,
"learning_rate": 1.2939617866733304e-05,
"loss": 1.2718,
"step": 445000
},
{
"epoch": 1.06,
"learning_rate": 1.2931684853100422e-05,
"loss": 1.2563,
"step": 445500
},
{
"epoch": 1.06,
"learning_rate": 1.2923751839467538e-05,
"loss": 1.28,
"step": 446000
},
{
"epoch": 1.06,
"learning_rate": 1.2915818825834654e-05,
"loss": 1.2692,
"step": 446500
},
{
"epoch": 1.06,
"learning_rate": 1.290788581220177e-05,
"loss": 1.278,
"step": 447000
},
{
"epoch": 1.07,
"learning_rate": 1.2899952798568884e-05,
"loss": 1.288,
"step": 447500
},
{
"epoch": 1.07,
"learning_rate": 1.2892019784936001e-05,
"loss": 1.2656,
"step": 448000
},
{
"epoch": 1.07,
"learning_rate": 1.2884086771303117e-05,
"loss": 1.28,
"step": 448500
},
{
"epoch": 1.07,
"learning_rate": 1.2876153757670233e-05,
"loss": 1.2734,
"step": 449000
},
{
"epoch": 1.07,
"learning_rate": 1.2868220744037349e-05,
"loss": 1.2654,
"step": 449500
},
{
"epoch": 1.07,
"learning_rate": 1.2860287730404466e-05,
"loss": 1.2963,
"step": 450000
},
{
"epoch": 1.07,
"learning_rate": 1.2852354716771582e-05,
"loss": 1.2817,
"step": 450500
},
{
"epoch": 1.07,
"learning_rate": 1.2844421703138698e-05,
"loss": 1.2841,
"step": 451000
},
{
"epoch": 1.07,
"learning_rate": 1.2836488689505816e-05,
"loss": 1.2771,
"step": 451500
},
{
"epoch": 1.08,
"learning_rate": 1.282855567587293e-05,
"loss": 1.2715,
"step": 452000
},
{
"epoch": 1.08,
"learning_rate": 1.2820622662240046e-05,
"loss": 1.2824,
"step": 452500
},
{
"epoch": 1.08,
"learning_rate": 1.2812689648607162e-05,
"loss": 1.2809,
"step": 453000
},
{
"epoch": 1.08,
"learning_rate": 1.2804756634974277e-05,
"loss": 1.2755,
"step": 453500
},
{
"epoch": 1.08,
"learning_rate": 1.2796823621341395e-05,
"loss": 1.2732,
"step": 454000
},
{
"epoch": 1.08,
"learning_rate": 1.2788890607708511e-05,
"loss": 1.3004,
"step": 454500
},
{
"epoch": 1.08,
"learning_rate": 1.2780957594075627e-05,
"loss": 1.268,
"step": 455000
},
{
"epoch": 1.08,
"learning_rate": 1.2773024580442743e-05,
"loss": 1.2652,
"step": 455500
},
{
"epoch": 1.09,
"learning_rate": 1.276509156680986e-05,
"loss": 1.2742,
"step": 456000
},
{
"epoch": 1.09,
"learning_rate": 1.2757158553176974e-05,
"loss": 1.2517,
"step": 456500
},
{
"epoch": 1.09,
"learning_rate": 1.274922553954409e-05,
"loss": 1.2721,
"step": 457000
},
{
"epoch": 1.09,
"learning_rate": 1.2741292525911206e-05,
"loss": 1.281,
"step": 457500
},
{
"epoch": 1.09,
"learning_rate": 1.2733359512278322e-05,
"loss": 1.272,
"step": 458000
},
{
"epoch": 1.09,
"learning_rate": 1.272542649864544e-05,
"loss": 1.2756,
"step": 458500
},
{
"epoch": 1.09,
"learning_rate": 1.2717493485012555e-05,
"loss": 1.2691,
"step": 459000
},
{
"epoch": 1.09,
"learning_rate": 1.2709560471379671e-05,
"loss": 1.2635,
"step": 459500
},
{
"epoch": 1.09,
"learning_rate": 1.2701627457746789e-05,
"loss": 1.2589,
"step": 460000
},
{
"epoch": 1.1,
"learning_rate": 1.2693694444113905e-05,
"loss": 1.2861,
"step": 460500
},
{
"epoch": 1.1,
"learning_rate": 1.2685761430481019e-05,
"loss": 1.2718,
"step": 461000
},
{
"epoch": 1.1,
"learning_rate": 1.2677828416848135e-05,
"loss": 1.2716,
"step": 461500
},
{
"epoch": 1.1,
"learning_rate": 1.266989540321525e-05,
"loss": 1.2627,
"step": 462000
},
{
"epoch": 1.1,
"learning_rate": 1.2661962389582368e-05,
"loss": 1.2708,
"step": 462500
},
{
"epoch": 1.1,
"learning_rate": 1.2654029375949484e-05,
"loss": 1.2742,
"step": 463000
},
{
"epoch": 1.1,
"learning_rate": 1.26460963623166e-05,
"loss": 1.2576,
"step": 463500
},
{
"epoch": 1.1,
"learning_rate": 1.2638163348683716e-05,
"loss": 1.2793,
"step": 464000
},
{
"epoch": 1.11,
"learning_rate": 1.2630230335050833e-05,
"loss": 1.2698,
"step": 464500
},
{
"epoch": 1.11,
"learning_rate": 1.2622297321417949e-05,
"loss": 1.2602,
"step": 465000
},
{
"epoch": 1.11,
"learning_rate": 1.2614364307785063e-05,
"loss": 1.2813,
"step": 465500
},
{
"epoch": 1.11,
"learning_rate": 1.2606431294152179e-05,
"loss": 1.2683,
"step": 466000
},
{
"epoch": 1.11,
"learning_rate": 1.2598498280519295e-05,
"loss": 1.2693,
"step": 466500
},
{
"epoch": 1.11,
"learning_rate": 1.2590565266886413e-05,
"loss": 1.2724,
"step": 467000
},
{
"epoch": 1.11,
"learning_rate": 1.2582632253253528e-05,
"loss": 1.272,
"step": 467500
},
{
"epoch": 1.11,
"learning_rate": 1.2574699239620644e-05,
"loss": 1.2723,
"step": 468000
},
{
"epoch": 1.11,
"learning_rate": 1.256676622598776e-05,
"loss": 1.2667,
"step": 468500
},
{
"epoch": 1.12,
"learning_rate": 1.2558833212354878e-05,
"loss": 1.2661,
"step": 469000
},
{
"epoch": 1.12,
"learning_rate": 1.2550900198721994e-05,
"loss": 1.2673,
"step": 469500
},
{
"epoch": 1.12,
"learning_rate": 1.2542967185089108e-05,
"loss": 1.2657,
"step": 470000
},
{
"epoch": 1.12,
"learning_rate": 1.2535034171456224e-05,
"loss": 1.2734,
"step": 470500
},
{
"epoch": 1.12,
"learning_rate": 1.2527101157823341e-05,
"loss": 1.2571,
"step": 471000
},
{
"epoch": 1.12,
"learning_rate": 1.2519168144190457e-05,
"loss": 1.2637,
"step": 471500
},
{
"epoch": 1.12,
"learning_rate": 1.2511235130557573e-05,
"loss": 1.2697,
"step": 472000
},
{
"epoch": 1.12,
"learning_rate": 1.2503302116924689e-05,
"loss": 1.2722,
"step": 472500
},
{
"epoch": 1.13,
"learning_rate": 1.2495369103291806e-05,
"loss": 1.2659,
"step": 473000
},
{
"epoch": 1.13,
"learning_rate": 1.2487436089658922e-05,
"loss": 1.2732,
"step": 473500
},
{
"epoch": 1.13,
"learning_rate": 1.2479503076026038e-05,
"loss": 1.2419,
"step": 474000
},
{
"epoch": 1.13,
"learning_rate": 1.2471570062393152e-05,
"loss": 1.2505,
"step": 474500
},
{
"epoch": 1.13,
"learning_rate": 1.2463637048760268e-05,
"loss": 1.2709,
"step": 475000
},
{
"epoch": 1.13,
"learning_rate": 1.2455704035127386e-05,
"loss": 1.2733,
"step": 475500
},
{
"epoch": 1.13,
"learning_rate": 1.2447771021494502e-05,
"loss": 1.2586,
"step": 476000
},
{
"epoch": 1.13,
"learning_rate": 1.2439838007861617e-05,
"loss": 1.2517,
"step": 476500
},
{
"epoch": 1.14,
"learning_rate": 1.2431904994228733e-05,
"loss": 1.2657,
"step": 477000
},
{
"epoch": 1.14,
"learning_rate": 1.242397198059585e-05,
"loss": 1.2724,
"step": 477500
},
{
"epoch": 1.14,
"learning_rate": 1.2416038966962967e-05,
"loss": 1.2481,
"step": 478000
},
{
"epoch": 1.14,
"learning_rate": 1.240810595333008e-05,
"loss": 1.2894,
"step": 478500
},
{
"epoch": 1.14,
"learning_rate": 1.2400172939697197e-05,
"loss": 1.2753,
"step": 479000
},
{
"epoch": 1.14,
"learning_rate": 1.2392239926064313e-05,
"loss": 1.254,
"step": 479500
},
{
"epoch": 1.14,
"learning_rate": 1.238430691243143e-05,
"loss": 1.2603,
"step": 480000
},
{
"epoch": 1.14,
"learning_rate": 1.2376373898798546e-05,
"loss": 1.2692,
"step": 480500
},
{
"epoch": 1.14,
"learning_rate": 1.2368440885165662e-05,
"loss": 1.2742,
"step": 481000
},
{
"epoch": 1.15,
"learning_rate": 1.236050787153278e-05,
"loss": 1.2838,
"step": 481500
},
{
"epoch": 1.15,
"learning_rate": 1.2352574857899895e-05,
"loss": 1.2536,
"step": 482000
},
{
"epoch": 1.15,
"learning_rate": 1.2344641844267011e-05,
"loss": 1.2669,
"step": 482500
},
{
"epoch": 1.15,
"learning_rate": 1.2336708830634125e-05,
"loss": 1.2685,
"step": 483000
},
{
"epoch": 1.15,
"learning_rate": 1.2328775817001241e-05,
"loss": 1.2623,
"step": 483500
},
{
"epoch": 1.15,
"learning_rate": 1.2320842803368359e-05,
"loss": 1.2721,
"step": 484000
},
{
"epoch": 1.15,
"learning_rate": 1.2312909789735475e-05,
"loss": 1.2683,
"step": 484500
},
{
"epoch": 1.15,
"learning_rate": 1.230497677610259e-05,
"loss": 1.2667,
"step": 485000
},
{
"epoch": 1.16,
"learning_rate": 1.2297043762469706e-05,
"loss": 1.2757,
"step": 485500
},
{
"epoch": 1.16,
"learning_rate": 1.2289110748836824e-05,
"loss": 1.2731,
"step": 486000
},
{
"epoch": 1.16,
"learning_rate": 1.228117773520394e-05,
"loss": 1.2847,
"step": 486500
},
{
"epoch": 1.16,
"learning_rate": 1.2273244721571056e-05,
"loss": 1.2712,
"step": 487000
},
{
"epoch": 1.16,
"learning_rate": 1.226531170793817e-05,
"loss": 1.2747,
"step": 487500
},
{
"epoch": 1.16,
"learning_rate": 1.2257378694305286e-05,
"loss": 1.2632,
"step": 488000
},
{
"epoch": 1.16,
"learning_rate": 1.2249445680672403e-05,
"loss": 1.2437,
"step": 488500
},
{
"epoch": 1.16,
"learning_rate": 1.2241512667039519e-05,
"loss": 1.2564,
"step": 489000
},
{
"epoch": 1.16,
"learning_rate": 1.2233579653406635e-05,
"loss": 1.2496,
"step": 489500
},
{
"epoch": 1.17,
"learning_rate": 1.222564663977375e-05,
"loss": 1.2671,
"step": 490000
},
{
"epoch": 1.17,
"learning_rate": 1.2217713626140868e-05,
"loss": 1.2625,
"step": 490500
},
{
"epoch": 1.17,
"learning_rate": 1.2209780612507984e-05,
"loss": 1.2625,
"step": 491000
},
{
"epoch": 1.17,
"learning_rate": 1.22018475988751e-05,
"loss": 1.2608,
"step": 491500
},
{
"epoch": 1.17,
"learning_rate": 1.2193914585242214e-05,
"loss": 1.2733,
"step": 492000
},
{
"epoch": 1.17,
"learning_rate": 1.2185981571609332e-05,
"loss": 1.269,
"step": 492500
},
{
"epoch": 1.17,
"learning_rate": 1.2178048557976448e-05,
"loss": 1.2539,
"step": 493000
},
{
"epoch": 1.17,
"learning_rate": 1.2170115544343564e-05,
"loss": 1.271,
"step": 493500
},
{
"epoch": 1.18,
"learning_rate": 1.216218253071068e-05,
"loss": 1.2579,
"step": 494000
},
{
"epoch": 1.18,
"learning_rate": 1.2154249517077797e-05,
"loss": 1.2536,
"step": 494500
},
{
"epoch": 1.18,
"learning_rate": 1.2146316503444913e-05,
"loss": 1.2563,
"step": 495000
},
{
"epoch": 1.18,
"learning_rate": 1.2138383489812029e-05,
"loss": 1.2678,
"step": 495500
},
{
"epoch": 1.18,
"learning_rate": 1.2130450476179145e-05,
"loss": 1.2557,
"step": 496000
},
{
"epoch": 1.18,
"learning_rate": 1.2122517462546259e-05,
"loss": 1.2654,
"step": 496500
},
{
"epoch": 1.18,
"learning_rate": 1.2114584448913376e-05,
"loss": 1.2754,
"step": 497000
},
{
"epoch": 1.18,
"learning_rate": 1.2106651435280492e-05,
"loss": 1.2644,
"step": 497500
},
{
"epoch": 1.19,
"learning_rate": 1.2098718421647608e-05,
"loss": 1.2749,
"step": 498000
},
{
"epoch": 1.19,
"learning_rate": 1.2090785408014724e-05,
"loss": 1.2735,
"step": 498500
},
{
"epoch": 1.19,
"learning_rate": 1.2082852394381841e-05,
"loss": 1.2462,
"step": 499000
},
{
"epoch": 1.19,
"learning_rate": 1.2074919380748957e-05,
"loss": 1.2446,
"step": 499500
},
{
"epoch": 1.19,
"learning_rate": 1.2066986367116073e-05,
"loss": 1.2712,
"step": 500000
},
{
"epoch": 1.19,
"learning_rate": 1.205905335348319e-05,
"loss": 1.2877,
"step": 500500
},
{
"epoch": 1.19,
"learning_rate": 1.2051120339850303e-05,
"loss": 1.2603,
"step": 501000
},
{
"epoch": 1.19,
"learning_rate": 1.204318732621742e-05,
"loss": 1.2627,
"step": 501500
},
{
"epoch": 1.19,
"learning_rate": 1.2035254312584537e-05,
"loss": 1.2672,
"step": 502000
},
{
"epoch": 1.2,
"learning_rate": 1.2027321298951653e-05,
"loss": 1.2648,
"step": 502500
},
{
"epoch": 1.2,
"learning_rate": 1.201938828531877e-05,
"loss": 1.2715,
"step": 503000
},
{
"epoch": 1.2,
"learning_rate": 1.2011455271685886e-05,
"loss": 1.2591,
"step": 503500
},
{
"epoch": 1.2,
"learning_rate": 1.2003522258053002e-05,
"loss": 1.2527,
"step": 504000
},
{
"epoch": 1.2,
"learning_rate": 1.1995589244420118e-05,
"loss": 1.2525,
"step": 504500
},
{
"epoch": 1.2,
"learning_rate": 1.1987656230787235e-05,
"loss": 1.2487,
"step": 505000
},
{
"epoch": 1.2,
"learning_rate": 1.197972321715435e-05,
"loss": 1.2802,
"step": 505500
},
{
"epoch": 1.2,
"learning_rate": 1.1971790203521465e-05,
"loss": 1.2523,
"step": 506000
},
{
"epoch": 1.21,
"learning_rate": 1.1963857189888581e-05,
"loss": 1.2546,
"step": 506500
},
{
"epoch": 1.21,
"learning_rate": 1.1955924176255697e-05,
"loss": 1.2496,
"step": 507000
},
{
"epoch": 1.21,
"learning_rate": 1.1947991162622815e-05,
"loss": 1.2648,
"step": 507500
},
{
"epoch": 1.21,
"learning_rate": 1.194005814898993e-05,
"loss": 1.2594,
"step": 508000
},
{
"epoch": 1.21,
"learning_rate": 1.1932125135357046e-05,
"loss": 1.2646,
"step": 508500
},
{
"epoch": 1.21,
"learning_rate": 1.1924192121724162e-05,
"loss": 1.2682,
"step": 509000
},
{
"epoch": 1.21,
"learning_rate": 1.191625910809128e-05,
"loss": 1.2853,
"step": 509500
},
{
"epoch": 1.21,
"learning_rate": 1.1908326094458394e-05,
"loss": 1.2447,
"step": 510000
},
{
"epoch": 1.21,
"learning_rate": 1.190039308082551e-05,
"loss": 1.2698,
"step": 510500
},
{
"epoch": 1.22,
"learning_rate": 1.1892460067192626e-05,
"loss": 1.261,
"step": 511000
},
{
"epoch": 1.22,
"learning_rate": 1.1884527053559743e-05,
"loss": 1.2583,
"step": 511500
},
{
"epoch": 1.22,
"learning_rate": 1.1876594039926859e-05,
"loss": 1.2381,
"step": 512000
},
{
"epoch": 1.22,
"learning_rate": 1.1868661026293975e-05,
"loss": 1.2632,
"step": 512500
},
{
"epoch": 1.22,
"learning_rate": 1.186072801266109e-05,
"loss": 1.2616,
"step": 513000
},
{
"epoch": 1.22,
"learning_rate": 1.1852794999028208e-05,
"loss": 1.2488,
"step": 513500
},
{
"epoch": 1.22,
"learning_rate": 1.1844861985395324e-05,
"loss": 1.2648,
"step": 514000
},
{
"epoch": 1.22,
"learning_rate": 1.1836928971762438e-05,
"loss": 1.2885,
"step": 514500
},
{
"epoch": 1.23,
"learning_rate": 1.1828995958129554e-05,
"loss": 1.2656,
"step": 515000
},
{
"epoch": 1.23,
"learning_rate": 1.182106294449667e-05,
"loss": 1.2588,
"step": 515500
},
{
"epoch": 1.23,
"learning_rate": 1.1813129930863788e-05,
"loss": 1.2548,
"step": 516000
},
{
"epoch": 1.23,
"learning_rate": 1.1805196917230904e-05,
"loss": 1.2548,
"step": 516500
},
{
"epoch": 1.23,
"learning_rate": 1.179726390359802e-05,
"loss": 1.2725,
"step": 517000
},
{
"epoch": 1.23,
"learning_rate": 1.1789330889965135e-05,
"loss": 1.2597,
"step": 517500
},
{
"epoch": 1.23,
"learning_rate": 1.1781397876332253e-05,
"loss": 1.2402,
"step": 518000
},
{
"epoch": 1.23,
"learning_rate": 1.1773464862699369e-05,
"loss": 1.2608,
"step": 518500
},
{
"epoch": 1.24,
"learning_rate": 1.1765531849066483e-05,
"loss": 1.2566,
"step": 519000
},
{
"epoch": 1.24,
"learning_rate": 1.1757598835433599e-05,
"loss": 1.2647,
"step": 519500
},
{
"epoch": 1.24,
"learning_rate": 1.1749665821800715e-05,
"loss": 1.2612,
"step": 520000
},
{
"epoch": 1.24,
"learning_rate": 1.1741732808167832e-05,
"loss": 1.2745,
"step": 520500
},
{
"epoch": 1.24,
"learning_rate": 1.1733799794534948e-05,
"loss": 1.2398,
"step": 521000
},
{
"epoch": 1.24,
"learning_rate": 1.1725866780902064e-05,
"loss": 1.2673,
"step": 521500
},
{
"epoch": 1.24,
"learning_rate": 1.1717933767269181e-05,
"loss": 1.2562,
"step": 522000
},
{
"epoch": 1.24,
"learning_rate": 1.1710000753636297e-05,
"loss": 1.2438,
"step": 522500
},
{
"epoch": 1.24,
"learning_rate": 1.1702067740003411e-05,
"loss": 1.2576,
"step": 523000
},
{
"epoch": 1.25,
"learning_rate": 1.1694134726370527e-05,
"loss": 1.2526,
"step": 523500
},
{
"epoch": 1.25,
"learning_rate": 1.1686201712737643e-05,
"loss": 1.2539,
"step": 524000
},
{
"epoch": 1.25,
"learning_rate": 1.167826869910476e-05,
"loss": 1.266,
"step": 524500
},
{
"epoch": 1.25,
"learning_rate": 1.1670335685471877e-05,
"loss": 1.2492,
"step": 525000
},
{
"epoch": 1.25,
"learning_rate": 1.1662402671838992e-05,
"loss": 1.2492,
"step": 525500
},
{
"epoch": 1.25,
"learning_rate": 1.1654469658206108e-05,
"loss": 1.2582,
"step": 526000
},
{
"epoch": 1.25,
"learning_rate": 1.1646536644573226e-05,
"loss": 1.255,
"step": 526500
},
{
"epoch": 1.25,
"learning_rate": 1.1638603630940342e-05,
"loss": 1.2504,
"step": 527000
},
{
"epoch": 1.26,
"learning_rate": 1.1630670617307456e-05,
"loss": 1.274,
"step": 527500
},
{
"epoch": 1.26,
"learning_rate": 1.1622737603674572e-05,
"loss": 1.2571,
"step": 528000
},
{
"epoch": 1.26,
"learning_rate": 1.1614804590041688e-05,
"loss": 1.2645,
"step": 528500
},
{
"epoch": 1.26,
"learning_rate": 1.1606871576408805e-05,
"loss": 1.2595,
"step": 529000
},
{
"epoch": 1.26,
"learning_rate": 1.1598938562775921e-05,
"loss": 1.2425,
"step": 529500
},
{
"epoch": 1.26,
"learning_rate": 1.1591005549143037e-05,
"loss": 1.2575,
"step": 530000
},
{
"epoch": 1.26,
"learning_rate": 1.1583072535510153e-05,
"loss": 1.2421,
"step": 530500
},
{
"epoch": 1.26,
"learning_rate": 1.157513952187727e-05,
"loss": 1.2479,
"step": 531000
},
{
"epoch": 1.26,
"learning_rate": 1.1567206508244386e-05,
"loss": 1.2727,
"step": 531500
},
{
"epoch": 1.27,
"learning_rate": 1.15592734946115e-05,
"loss": 1.2378,
"step": 532000
},
{
"epoch": 1.27,
"learning_rate": 1.1551340480978616e-05,
"loss": 1.2603,
"step": 532500
},
{
"epoch": 1.27,
"learning_rate": 1.1543407467345734e-05,
"loss": 1.2565,
"step": 533000
},
{
"epoch": 1.27,
"learning_rate": 1.153547445371285e-05,
"loss": 1.2526,
"step": 533500
},
{
"epoch": 1.27,
"learning_rate": 1.1527541440079966e-05,
"loss": 1.2603,
"step": 534000
},
{
"epoch": 1.27,
"learning_rate": 1.1519608426447081e-05,
"loss": 1.2542,
"step": 534500
},
{
"epoch": 1.27,
"learning_rate": 1.1511675412814199e-05,
"loss": 1.2574,
"step": 535000
},
{
"epoch": 1.27,
"learning_rate": 1.1503742399181315e-05,
"loss": 1.258,
"step": 535500
},
{
"epoch": 1.28,
"learning_rate": 1.149580938554843e-05,
"loss": 1.2556,
"step": 536000
},
{
"epoch": 1.28,
"learning_rate": 1.1487876371915545e-05,
"loss": 1.2515,
"step": 536500
},
{
"epoch": 1.28,
"learning_rate": 1.147994335828266e-05,
"loss": 1.2427,
"step": 537000
},
{
"epoch": 1.28,
"learning_rate": 1.1472010344649778e-05,
"loss": 1.2583,
"step": 537500
},
{
"epoch": 1.28,
"learning_rate": 1.1464077331016894e-05,
"loss": 1.2782,
"step": 538000
},
{
"epoch": 1.28,
"learning_rate": 1.145614431738401e-05,
"loss": 1.2524,
"step": 538500
},
{
"epoch": 1.28,
"learning_rate": 1.1448211303751126e-05,
"loss": 1.2447,
"step": 539000
},
{
"epoch": 1.28,
"learning_rate": 1.1440278290118243e-05,
"loss": 1.2417,
"step": 539500
},
{
"epoch": 1.29,
"learning_rate": 1.143234527648536e-05,
"loss": 1.2527,
"step": 540000
},
{
"epoch": 1.29,
"learning_rate": 1.1424412262852475e-05,
"loss": 1.2551,
"step": 540500
},
{
"epoch": 1.29,
"learning_rate": 1.141647924921959e-05,
"loss": 1.2426,
"step": 541000
},
{
"epoch": 1.29,
"learning_rate": 1.1408546235586705e-05,
"loss": 1.2493,
"step": 541500
},
{
"epoch": 1.29,
"learning_rate": 1.1400613221953823e-05,
"loss": 1.2722,
"step": 542000
},
{
"epoch": 1.29,
"learning_rate": 1.1392680208320939e-05,
"loss": 1.2572,
"step": 542500
},
{
"epoch": 1.29,
"learning_rate": 1.1384747194688055e-05,
"loss": 1.2443,
"step": 543000
},
{
"epoch": 1.29,
"learning_rate": 1.1376814181055172e-05,
"loss": 1.2579,
"step": 543500
},
{
"epoch": 1.29,
"learning_rate": 1.1368881167422288e-05,
"loss": 1.2437,
"step": 544000
},
{
"epoch": 1.3,
"learning_rate": 1.1360948153789404e-05,
"loss": 1.2388,
"step": 544500
},
{
"epoch": 1.3,
"learning_rate": 1.135301514015652e-05,
"loss": 1.2611,
"step": 545000
},
{
"epoch": 1.3,
"learning_rate": 1.1345082126523634e-05,
"loss": 1.2557,
"step": 545500
},
{
"epoch": 1.3,
"learning_rate": 1.1337149112890751e-05,
"loss": 1.2445,
"step": 546000
},
{
"epoch": 1.3,
"learning_rate": 1.1329216099257867e-05,
"loss": 1.2309,
"step": 546500
},
{
"epoch": 1.3,
"learning_rate": 1.1321283085624983e-05,
"loss": 1.2554,
"step": 547000
},
{
"epoch": 1.3,
"learning_rate": 1.1313350071992099e-05,
"loss": 1.2679,
"step": 547500
},
{
"epoch": 1.3,
"learning_rate": 1.1305417058359217e-05,
"loss": 1.2626,
"step": 548000
},
{
"epoch": 1.31,
"learning_rate": 1.1297484044726332e-05,
"loss": 1.2426,
"step": 548500
},
{
"epoch": 1.31,
"learning_rate": 1.1289551031093448e-05,
"loss": 1.2694,
"step": 549000
},
{
"epoch": 1.31,
"learning_rate": 1.1281618017460564e-05,
"loss": 1.2539,
"step": 549500
},
{
"epoch": 1.31,
"learning_rate": 1.1273685003827678e-05,
"loss": 1.2498,
"step": 550000
},
{
"epoch": 1.31,
"learning_rate": 1.1265751990194796e-05,
"loss": 1.2407,
"step": 550500
},
{
"epoch": 1.31,
"learning_rate": 1.1257818976561912e-05,
"loss": 1.2472,
"step": 551000
},
{
"epoch": 1.31,
"learning_rate": 1.1249885962929028e-05,
"loss": 1.2337,
"step": 551500
},
{
"epoch": 1.31,
"learning_rate": 1.1241952949296145e-05,
"loss": 1.2631,
"step": 552000
},
{
"epoch": 1.31,
"learning_rate": 1.1234019935663261e-05,
"loss": 1.2417,
"step": 552500
},
{
"epoch": 1.32,
"learning_rate": 1.1226086922030377e-05,
"loss": 1.2538,
"step": 553000
},
{
"epoch": 1.32,
"learning_rate": 1.1218153908397493e-05,
"loss": 1.2546,
"step": 553500
},
{
"epoch": 1.32,
"learning_rate": 1.121022089476461e-05,
"loss": 1.253,
"step": 554000
},
{
"epoch": 1.32,
"learning_rate": 1.1202287881131724e-05,
"loss": 1.2454,
"step": 554500
},
{
"epoch": 1.32,
"learning_rate": 1.119435486749884e-05,
"loss": 1.2478,
"step": 555000
},
{
"epoch": 1.32,
"learning_rate": 1.1186421853865956e-05,
"loss": 1.2628,
"step": 555500
},
{
"epoch": 1.32,
"learning_rate": 1.1178488840233072e-05,
"loss": 1.2592,
"step": 556000
},
{
"epoch": 1.32,
"learning_rate": 1.117055582660019e-05,
"loss": 1.258,
"step": 556500
},
{
"epoch": 1.33,
"learning_rate": 1.1162622812967306e-05,
"loss": 1.2573,
"step": 557000
},
{
"epoch": 1.33,
"learning_rate": 1.1154689799334421e-05,
"loss": 1.2372,
"step": 557500
},
{
"epoch": 1.33,
"learning_rate": 1.1146756785701537e-05,
"loss": 1.2416,
"step": 558000
},
{
"epoch": 1.33,
"learning_rate": 1.1138823772068655e-05,
"loss": 1.2381,
"step": 558500
},
{
"epoch": 1.33,
"learning_rate": 1.1130890758435769e-05,
"loss": 1.2492,
"step": 559000
},
{
"epoch": 1.33,
"learning_rate": 1.1122957744802885e-05,
"loss": 1.259,
"step": 559500
},
{
"epoch": 1.33,
"learning_rate": 1.111502473117e-05,
"loss": 1.2353,
"step": 560000
},
{
"epoch": 1.33,
"learning_rate": 1.1107091717537117e-05,
"loss": 1.2314,
"step": 560500
},
{
"epoch": 1.34,
"learning_rate": 1.1099158703904234e-05,
"loss": 1.2596,
"step": 561000
},
{
"epoch": 1.34,
"learning_rate": 1.109122569027135e-05,
"loss": 1.2457,
"step": 561500
},
{
"epoch": 1.34,
"learning_rate": 1.1083292676638466e-05,
"loss": 1.2315,
"step": 562000
},
{
"epoch": 1.34,
"learning_rate": 1.1075359663005583e-05,
"loss": 1.2322,
"step": 562500
},
{
"epoch": 1.34,
"learning_rate": 1.10674266493727e-05,
"loss": 1.2474,
"step": 563000
},
{
"epoch": 1.34,
"learning_rate": 1.1059493635739813e-05,
"loss": 1.251,
"step": 563500
},
{
"epoch": 1.34,
"learning_rate": 1.105156062210693e-05,
"loss": 1.2431,
"step": 564000
},
{
"epoch": 1.34,
"learning_rate": 1.1043627608474045e-05,
"loss": 1.2544,
"step": 564500
},
{
"epoch": 1.34,
"learning_rate": 1.1035694594841163e-05,
"loss": 1.2439,
"step": 565000
},
{
"epoch": 1.35,
"learning_rate": 1.1027761581208279e-05,
"loss": 1.2488,
"step": 565500
},
{
"epoch": 1.35,
"learning_rate": 1.1019828567575394e-05,
"loss": 1.2515,
"step": 566000
},
{
"epoch": 1.35,
"learning_rate": 1.101189555394251e-05,
"loss": 1.2324,
"step": 566500
},
{
"epoch": 1.35,
"learning_rate": 1.1003962540309628e-05,
"loss": 1.2294,
"step": 567000
},
{
"epoch": 1.35,
"learning_rate": 1.0996029526676742e-05,
"loss": 1.2316,
"step": 567500
},
{
"epoch": 1.35,
"learning_rate": 1.0988096513043858e-05,
"loss": 1.219,
"step": 568000
},
{
"epoch": 1.35,
"learning_rate": 1.0980163499410974e-05,
"loss": 1.2251,
"step": 568500
},
{
"epoch": 1.35,
"learning_rate": 1.097223048577809e-05,
"loss": 1.2195,
"step": 569000
},
{
"epoch": 1.36,
"learning_rate": 1.0964297472145207e-05,
"loss": 1.2508,
"step": 569500
},
{
"epoch": 1.36,
"learning_rate": 1.0956364458512323e-05,
"loss": 1.2491,
"step": 570000
},
{
"epoch": 1.36,
"learning_rate": 1.0948431444879439e-05,
"loss": 1.2378,
"step": 570500
},
{
"epoch": 1.36,
"learning_rate": 1.0940498431246555e-05,
"loss": 1.2378,
"step": 571000
},
{
"epoch": 1.36,
"learning_rate": 1.0932565417613672e-05,
"loss": 1.2436,
"step": 571500
},
{
"epoch": 1.36,
"learning_rate": 1.0924632403980787e-05,
"loss": 1.2443,
"step": 572000
},
{
"epoch": 1.36,
"learning_rate": 1.0916699390347902e-05,
"loss": 1.2383,
"step": 572500
},
{
"epoch": 1.36,
"learning_rate": 1.0908766376715018e-05,
"loss": 1.2309,
"step": 573000
},
{
"epoch": 1.36,
"learning_rate": 1.0900833363082136e-05,
"loss": 1.2478,
"step": 573500
},
{
"epoch": 1.37,
"learning_rate": 1.0892900349449252e-05,
"loss": 1.2505,
"step": 574000
},
{
"epoch": 1.37,
"learning_rate": 1.0884967335816368e-05,
"loss": 1.2439,
"step": 574500
},
{
"epoch": 1.37,
"learning_rate": 1.0877034322183483e-05,
"loss": 1.2237,
"step": 575000
},
{
"epoch": 1.37,
"learning_rate": 1.0869101308550601e-05,
"loss": 1.2199,
"step": 575500
},
{
"epoch": 1.37,
"learning_rate": 1.0861168294917717e-05,
"loss": 1.2324,
"step": 576000
},
{
"epoch": 1.37,
"learning_rate": 1.0853235281284831e-05,
"loss": 1.2398,
"step": 576500
},
{
"epoch": 1.37,
"learning_rate": 1.0845302267651947e-05,
"loss": 1.2273,
"step": 577000
},
{
"epoch": 1.37,
"learning_rate": 1.0837369254019063e-05,
"loss": 1.2428,
"step": 577500
},
{
"epoch": 1.38,
"learning_rate": 1.082943624038618e-05,
"loss": 1.255,
"step": 578000
},
{
"epoch": 1.38,
"learning_rate": 1.0821503226753296e-05,
"loss": 1.2402,
"step": 578500
},
{
"epoch": 1.38,
"learning_rate": 1.0813570213120412e-05,
"loss": 1.2374,
"step": 579000
},
{
"epoch": 1.38,
"learning_rate": 1.0805637199487528e-05,
"loss": 1.2142,
"step": 579500
},
{
"epoch": 1.38,
"learning_rate": 1.0797704185854645e-05,
"loss": 1.2368,
"step": 580000
},
{
"epoch": 1.38,
"learning_rate": 1.0789771172221761e-05,
"loss": 1.2328,
"step": 580500
},
{
"epoch": 1.38,
"learning_rate": 1.0781838158588875e-05,
"loss": 1.2355,
"step": 581000
},
{
"epoch": 1.38,
"learning_rate": 1.0773905144955991e-05,
"loss": 1.2521,
"step": 581500
},
{
"epoch": 1.39,
"learning_rate": 1.0765972131323107e-05,
"loss": 1.234,
"step": 582000
},
{
"epoch": 1.39,
"learning_rate": 1.0758039117690225e-05,
"loss": 1.2303,
"step": 582500
},
{
"epoch": 1.39,
"learning_rate": 1.075010610405734e-05,
"loss": 1.2484,
"step": 583000
},
{
"epoch": 1.39,
"learning_rate": 1.0742173090424457e-05,
"loss": 1.2394,
"step": 583500
},
{
"epoch": 1.39,
"learning_rate": 1.0734240076791574e-05,
"loss": 1.2242,
"step": 584000
},
{
"epoch": 1.39,
"learning_rate": 1.072630706315869e-05,
"loss": 1.2562,
"step": 584500
},
{
"epoch": 1.39,
"learning_rate": 1.0718374049525806e-05,
"loss": 1.2375,
"step": 585000
},
{
"epoch": 1.39,
"learning_rate": 1.071044103589292e-05,
"loss": 1.2331,
"step": 585500
},
{
"epoch": 1.39,
"learning_rate": 1.0702508022260036e-05,
"loss": 1.2309,
"step": 586000
},
{
"epoch": 1.4,
"learning_rate": 1.0694575008627153e-05,
"loss": 1.2495,
"step": 586500
},
{
"epoch": 1.4,
"learning_rate": 1.068664199499427e-05,
"loss": 1.2242,
"step": 587000
},
{
"epoch": 1.4,
"learning_rate": 1.0678708981361385e-05,
"loss": 1.2333,
"step": 587500
},
{
"epoch": 1.4,
"learning_rate": 1.0670775967728501e-05,
"loss": 1.2456,
"step": 588000
},
{
"epoch": 1.4,
"learning_rate": 1.0662842954095619e-05,
"loss": 1.2451,
"step": 588500
},
{
"epoch": 1.4,
"learning_rate": 1.0654909940462734e-05,
"loss": 1.2471,
"step": 589000
},
{
"epoch": 1.4,
"learning_rate": 1.064697692682985e-05,
"loss": 1.2343,
"step": 589500
},
{
"epoch": 1.4,
"learning_rate": 1.0639043913196964e-05,
"loss": 1.225,
"step": 590000
},
{
"epoch": 1.41,
"learning_rate": 1.063111089956408e-05,
"loss": 1.2223,
"step": 590500
},
{
"epoch": 1.41,
"learning_rate": 1.0623177885931198e-05,
"loss": 1.2502,
"step": 591000
},
{
"epoch": 1.41,
"learning_rate": 1.0615244872298314e-05,
"loss": 1.2455,
"step": 591500
},
{
"epoch": 1.41,
"learning_rate": 1.060731185866543e-05,
"loss": 1.2542,
"step": 592000
},
{
"epoch": 1.41,
"learning_rate": 1.0599378845032547e-05,
"loss": 1.2274,
"step": 592500
},
{
"epoch": 1.41,
"learning_rate": 1.0591445831399663e-05,
"loss": 1.2338,
"step": 593000
},
{
"epoch": 1.41,
"learning_rate": 1.0583512817766779e-05,
"loss": 1.2195,
"step": 593500
},
{
"epoch": 1.41,
"learning_rate": 1.0575579804133895e-05,
"loss": 1.2531,
"step": 594000
},
{
"epoch": 1.41,
"learning_rate": 1.0567646790501009e-05,
"loss": 1.2461,
"step": 594500
},
{
"epoch": 1.42,
"learning_rate": 1.0559713776868126e-05,
"loss": 1.2358,
"step": 595000
},
{
"epoch": 1.42,
"learning_rate": 1.0551780763235242e-05,
"loss": 1.2296,
"step": 595500
},
{
"epoch": 1.42,
"learning_rate": 1.0543847749602358e-05,
"loss": 1.228,
"step": 596000
},
{
"epoch": 1.42,
"learning_rate": 1.0535914735969474e-05,
"loss": 1.2335,
"step": 596500
},
{
"epoch": 1.42,
"learning_rate": 1.0527981722336592e-05,
"loss": 1.2414,
"step": 597000
},
{
"epoch": 1.42,
"learning_rate": 1.0520048708703708e-05,
"loss": 1.2255,
"step": 597500
},
{
"epoch": 1.42,
"learning_rate": 1.0512115695070823e-05,
"loss": 1.2497,
"step": 598000
},
{
"epoch": 1.42,
"learning_rate": 1.050418268143794e-05,
"loss": 1.2509,
"step": 598500
},
{
"epoch": 1.43,
"learning_rate": 1.0496249667805053e-05,
"loss": 1.2435,
"step": 599000
},
{
"epoch": 1.43,
"learning_rate": 1.0488316654172171e-05,
"loss": 1.2341,
"step": 599500
},
{
"epoch": 1.43,
"learning_rate": 1.0480383640539287e-05,
"loss": 1.2228,
"step": 600000
},
{
"epoch": 1.43,
"learning_rate": 1.0472450626906403e-05,
"loss": 1.24,
"step": 600500
},
{
"epoch": 1.43,
"learning_rate": 1.0464517613273519e-05,
"loss": 1.2094,
"step": 601000
},
{
"epoch": 1.43,
"learning_rate": 1.0456584599640636e-05,
"loss": 1.2489,
"step": 601500
},
{
"epoch": 1.43,
"learning_rate": 1.0448651586007752e-05,
"loss": 1.2388,
"step": 602000
},
{
"epoch": 1.43,
"learning_rate": 1.0440718572374868e-05,
"loss": 1.2268,
"step": 602500
},
{
"epoch": 1.44,
"learning_rate": 1.0432785558741985e-05,
"loss": 1.2429,
"step": 603000
},
{
"epoch": 1.44,
"learning_rate": 1.0424852545109098e-05,
"loss": 1.2336,
"step": 603500
},
{
"epoch": 1.44,
"learning_rate": 1.0416919531476215e-05,
"loss": 1.2319,
"step": 604000
},
{
"epoch": 1.44,
"learning_rate": 1.0408986517843331e-05,
"loss": 1.231,
"step": 604500
},
{
"epoch": 1.44,
"learning_rate": 1.0401053504210447e-05,
"loss": 1.2306,
"step": 605000
},
{
"epoch": 1.44,
"learning_rate": 1.0393120490577565e-05,
"loss": 1.2165,
"step": 605500
},
{
"epoch": 1.44,
"learning_rate": 1.038518747694468e-05,
"loss": 1.2401,
"step": 606000
},
{
"epoch": 1.44,
"learning_rate": 1.0377254463311796e-05,
"loss": 1.2291,
"step": 606500
},
{
"epoch": 1.44,
"learning_rate": 1.0369321449678912e-05,
"loss": 1.2372,
"step": 607000
},
{
"epoch": 1.45,
"learning_rate": 1.036138843604603e-05,
"loss": 1.2341,
"step": 607500
},
{
"epoch": 1.45,
"learning_rate": 1.0353455422413144e-05,
"loss": 1.2404,
"step": 608000
},
{
"epoch": 1.45,
"learning_rate": 1.034552240878026e-05,
"loss": 1.2312,
"step": 608500
},
{
"epoch": 1.45,
"learning_rate": 1.0337589395147376e-05,
"loss": 1.2478,
"step": 609000
},
{
"epoch": 1.45,
"learning_rate": 1.0329656381514492e-05,
"loss": 1.2271,
"step": 609500
},
{
"epoch": 1.45,
"learning_rate": 1.032172336788161e-05,
"loss": 1.2301,
"step": 610000
},
{
"epoch": 1.45,
"learning_rate": 1.0313790354248725e-05,
"loss": 1.219,
"step": 610500
},
{
"epoch": 1.45,
"learning_rate": 1.0305857340615841e-05,
"loss": 1.2283,
"step": 611000
},
{
"epoch": 1.46,
"learning_rate": 1.0297924326982957e-05,
"loss": 1.2512,
"step": 611500
},
{
"epoch": 1.46,
"learning_rate": 1.0289991313350071e-05,
"loss": 1.2383,
"step": 612000
},
{
"epoch": 1.46,
"learning_rate": 1.0282058299717189e-05,
"loss": 1.2406,
"step": 612500
},
{
"epoch": 1.46,
"learning_rate": 1.0274125286084304e-05,
"loss": 1.2417,
"step": 613000
},
{
"epoch": 1.46,
"learning_rate": 1.026619227245142e-05,
"loss": 1.2424,
"step": 613500
},
{
"epoch": 1.46,
"learning_rate": 1.0258259258818538e-05,
"loss": 1.2309,
"step": 614000
},
{
"epoch": 1.46,
"learning_rate": 1.0250326245185654e-05,
"loss": 1.2327,
"step": 614500
},
{
"epoch": 1.46,
"learning_rate": 1.024239323155277e-05,
"loss": 1.2523,
"step": 615000
},
{
"epoch": 1.46,
"learning_rate": 1.0234460217919885e-05,
"loss": 1.2256,
"step": 615500
},
{
"epoch": 1.47,
"learning_rate": 1.0226527204287003e-05,
"loss": 1.2333,
"step": 616000
},
{
"epoch": 1.47,
"learning_rate": 1.0218594190654117e-05,
"loss": 1.237,
"step": 616500
},
{
"epoch": 1.47,
"learning_rate": 1.0210661177021233e-05,
"loss": 1.2365,
"step": 617000
},
{
"epoch": 1.47,
"learning_rate": 1.0202728163388349e-05,
"loss": 1.2267,
"step": 617500
},
{
"epoch": 1.47,
"learning_rate": 1.0194795149755465e-05,
"loss": 1.2259,
"step": 618000
},
{
"epoch": 1.47,
"learning_rate": 1.0186862136122582e-05,
"loss": 1.2367,
"step": 618500
},
{
"epoch": 1.47,
"learning_rate": 1.0178929122489698e-05,
"loss": 1.2462,
"step": 619000
},
{
"epoch": 1.47,
"learning_rate": 1.0170996108856814e-05,
"loss": 1.2328,
"step": 619500
},
{
"epoch": 1.48,
"learning_rate": 1.016306309522393e-05,
"loss": 1.2295,
"step": 620000
},
{
"epoch": 1.48,
"learning_rate": 1.0155130081591047e-05,
"loss": 1.2077,
"step": 620500
},
{
"epoch": 1.48,
"learning_rate": 1.0147197067958162e-05,
"loss": 1.2405,
"step": 621000
},
{
"epoch": 1.48,
"learning_rate": 1.0139264054325277e-05,
"loss": 1.236,
"step": 621500
},
{
"epoch": 1.48,
"learning_rate": 1.0131331040692393e-05,
"loss": 1.2219,
"step": 622000
},
{
"epoch": 1.48,
"learning_rate": 1.012339802705951e-05,
"loss": 1.2249,
"step": 622500
},
{
"epoch": 1.48,
"learning_rate": 1.0115465013426627e-05,
"loss": 1.2262,
"step": 623000
},
{
"epoch": 1.48,
"learning_rate": 1.0107531999793743e-05,
"loss": 1.2271,
"step": 623500
},
{
"epoch": 1.49,
"learning_rate": 1.0099598986160859e-05,
"loss": 1.2369,
"step": 624000
},
{
"epoch": 1.49,
"learning_rate": 1.0091665972527976e-05,
"loss": 1.2349,
"step": 624500
},
{
"epoch": 1.49,
"learning_rate": 1.0083732958895092e-05,
"loss": 1.2275,
"step": 625000
},
{
"epoch": 1.49,
"learning_rate": 1.0075799945262206e-05,
"loss": 1.2324,
"step": 625500
},
{
"epoch": 1.49,
"learning_rate": 1.0067866931629322e-05,
"loss": 1.2136,
"step": 626000
},
{
"epoch": 1.49,
"learning_rate": 1.0059933917996438e-05,
"loss": 1.2246,
"step": 626500
},
{
"epoch": 1.49,
"learning_rate": 1.0052000904363555e-05,
"loss": 1.2136,
"step": 627000
},
{
"epoch": 1.49,
"learning_rate": 1.0044067890730671e-05,
"loss": 1.2296,
"step": 627500
},
{
"epoch": 1.49,
"learning_rate": 1.0036134877097787e-05,
"loss": 1.2207,
"step": 628000
},
{
"epoch": 1.5,
"learning_rate": 1.0028201863464903e-05,
"loss": 1.225,
"step": 628500
},
{
"epoch": 1.5,
"learning_rate": 1.002026884983202e-05,
"loss": 1.2222,
"step": 629000
},
{
"epoch": 1.5,
"learning_rate": 1.0012335836199136e-05,
"loss": 1.227,
"step": 629500
},
{
"epoch": 1.5,
"learning_rate": 1.000440282256625e-05,
"loss": 1.2424,
"step": 630000
},
{
"epoch": 1.5,
"learning_rate": 9.996469808933368e-06,
"loss": 1.2185,
"step": 630500
},
{
"epoch": 1.5,
"learning_rate": 9.988536795300482e-06,
"loss": 1.2252,
"step": 631000
},
{
"epoch": 1.5,
"learning_rate": 9.9806037816676e-06,
"loss": 1.2155,
"step": 631500
},
{
"epoch": 1.5,
"learning_rate": 9.972670768034716e-06,
"loss": 1.2481,
"step": 632000
},
{
"epoch": 1.51,
"learning_rate": 9.964737754401832e-06,
"loss": 1.2385,
"step": 632500
},
{
"epoch": 1.51,
"learning_rate": 9.95680474076895e-06,
"loss": 1.2393,
"step": 633000
},
{
"epoch": 1.51,
"learning_rate": 9.948871727136063e-06,
"loss": 1.2289,
"step": 633500
},
{
"epoch": 1.51,
"learning_rate": 9.94093871350318e-06,
"loss": 1.2412,
"step": 634000
},
{
"epoch": 1.51,
"learning_rate": 9.933005699870297e-06,
"loss": 1.2337,
"step": 634500
},
{
"epoch": 1.51,
"learning_rate": 9.925072686237413e-06,
"loss": 1.2283,
"step": 635000
},
{
"epoch": 1.51,
"learning_rate": 9.917139672604528e-06,
"loss": 1.2191,
"step": 635500
},
{
"epoch": 1.51,
"learning_rate": 9.909206658971644e-06,
"loss": 1.2281,
"step": 636000
},
{
"epoch": 1.51,
"learning_rate": 9.90127364533876e-06,
"loss": 1.2253,
"step": 636500
},
{
"epoch": 1.52,
"learning_rate": 9.893340631705876e-06,
"loss": 1.2368,
"step": 637000
},
{
"epoch": 1.52,
"learning_rate": 9.885407618072994e-06,
"loss": 1.221,
"step": 637500
},
{
"epoch": 1.52,
"learning_rate": 9.877474604440108e-06,
"loss": 1.2251,
"step": 638000
},
{
"epoch": 1.52,
"learning_rate": 9.869541590807224e-06,
"loss": 1.2191,
"step": 638500
},
{
"epoch": 1.52,
"learning_rate": 9.861608577174341e-06,
"loss": 1.2252,
"step": 639000
},
{
"epoch": 1.52,
"learning_rate": 9.853675563541457e-06,
"loss": 1.2376,
"step": 639500
},
{
"epoch": 1.52,
"learning_rate": 9.845742549908573e-06,
"loss": 1.2339,
"step": 640000
},
{
"epoch": 1.52,
"learning_rate": 9.837809536275689e-06,
"loss": 1.2308,
"step": 640500
},
{
"epoch": 1.53,
"learning_rate": 9.829876522642805e-06,
"loss": 1.2171,
"step": 641000
},
{
"epoch": 1.53,
"learning_rate": 9.82194350900992e-06,
"loss": 1.224,
"step": 641500
},
{
"epoch": 1.53,
"learning_rate": 9.814010495377038e-06,
"loss": 1.2415,
"step": 642000
},
{
"epoch": 1.53,
"learning_rate": 9.806077481744152e-06,
"loss": 1.2177,
"step": 642500
},
{
"epoch": 1.53,
"learning_rate": 9.79814446811127e-06,
"loss": 1.2285,
"step": 643000
},
{
"epoch": 1.53,
"learning_rate": 9.790211454478386e-06,
"loss": 1.2279,
"step": 643500
},
{
"epoch": 1.53,
"learning_rate": 9.782278440845502e-06,
"loss": 1.223,
"step": 644000
},
{
"epoch": 1.53,
"learning_rate": 9.774345427212617e-06,
"loss": 1.2278,
"step": 644500
},
{
"epoch": 1.54,
"learning_rate": 9.766412413579733e-06,
"loss": 1.2226,
"step": 645000
},
{
"epoch": 1.54,
"learning_rate": 9.75847939994685e-06,
"loss": 1.2169,
"step": 645500
},
{
"epoch": 1.54,
"learning_rate": 9.750546386313967e-06,
"loss": 1.216,
"step": 646000
},
{
"epoch": 1.54,
"learning_rate": 9.742613372681081e-06,
"loss": 1.2381,
"step": 646500
},
{
"epoch": 1.54,
"learning_rate": 9.734680359048197e-06,
"loss": 1.2404,
"step": 647000
},
{
"epoch": 1.54,
"learning_rate": 9.726747345415314e-06,
"loss": 1.2249,
"step": 647500
},
{
"epoch": 1.54,
"learning_rate": 9.71881433178243e-06,
"loss": 1.2213,
"step": 648000
},
{
"epoch": 1.54,
"learning_rate": 9.710881318149546e-06,
"loss": 1.2233,
"step": 648500
},
{
"epoch": 1.54,
"learning_rate": 9.702948304516662e-06,
"loss": 1.2286,
"step": 649000
},
{
"epoch": 1.55,
"learning_rate": 9.695015290883778e-06,
"loss": 1.2079,
"step": 649500
},
{
"epoch": 1.55,
"learning_rate": 9.687082277250894e-06,
"loss": 1.2364,
"step": 650000
},
{
"epoch": 1.55,
"learning_rate": 9.679149263618011e-06,
"loss": 1.2281,
"step": 650500
},
{
"epoch": 1.55,
"learning_rate": 9.671216249985125e-06,
"loss": 1.2287,
"step": 651000
},
{
"epoch": 1.55,
"learning_rate": 9.663283236352243e-06,
"loss": 1.2199,
"step": 651500
},
{
"epoch": 1.55,
"learning_rate": 9.655350222719359e-06,
"loss": 1.2197,
"step": 652000
},
{
"epoch": 1.55,
"learning_rate": 9.647417209086475e-06,
"loss": 1.2189,
"step": 652500
},
{
"epoch": 1.55,
"learning_rate": 9.63948419545359e-06,
"loss": 1.2235,
"step": 653000
},
{
"epoch": 1.56,
"learning_rate": 9.631551181820706e-06,
"loss": 1.2138,
"step": 653500
},
{
"epoch": 1.56,
"learning_rate": 9.623618168187822e-06,
"loss": 1.2133,
"step": 654000
},
{
"epoch": 1.56,
"learning_rate": 9.61568515455494e-06,
"loss": 1.2297,
"step": 654500
},
{
"epoch": 1.56,
"learning_rate": 9.607752140922056e-06,
"loss": 1.222,
"step": 655000
},
{
"epoch": 1.56,
"learning_rate": 9.59981912728917e-06,
"loss": 1.2225,
"step": 655500
},
{
"epoch": 1.56,
"learning_rate": 9.591886113656287e-06,
"loss": 1.214,
"step": 656000
},
{
"epoch": 1.56,
"learning_rate": 9.583953100023403e-06,
"loss": 1.2258,
"step": 656500
},
{
"epoch": 1.56,
"learning_rate": 9.576020086390519e-06,
"loss": 1.2062,
"step": 657000
},
{
"epoch": 1.56,
"learning_rate": 9.568087072757635e-06,
"loss": 1.2271,
"step": 657500
},
{
"epoch": 1.57,
"learning_rate": 9.560154059124751e-06,
"loss": 1.2179,
"step": 658000
},
{
"epoch": 1.57,
"learning_rate": 9.552221045491867e-06,
"loss": 1.2252,
"step": 658500
},
{
"epoch": 1.57,
"learning_rate": 9.544288031858984e-06,
"loss": 1.2442,
"step": 659000
},
{
"epoch": 1.57,
"learning_rate": 9.5363550182261e-06,
"loss": 1.2145,
"step": 659500
},
{
"epoch": 1.57,
"learning_rate": 9.528422004593216e-06,
"loss": 1.2347,
"step": 660000
},
{
"epoch": 1.57,
"learning_rate": 9.520488990960332e-06,
"loss": 1.2252,
"step": 660500
},
{
"epoch": 1.57,
"learning_rate": 9.512555977327448e-06,
"loss": 1.2191,
"step": 661000
},
{
"epoch": 1.57,
"learning_rate": 9.504622963694564e-06,
"loss": 1.2274,
"step": 661500
},
{
"epoch": 1.58,
"learning_rate": 9.496689950061681e-06,
"loss": 1.2219,
"step": 662000
},
{
"epoch": 1.58,
"learning_rate": 9.488756936428795e-06,
"loss": 1.2181,
"step": 662500
},
{
"epoch": 1.58,
"learning_rate": 9.480823922795911e-06,
"loss": 1.2131,
"step": 663000
},
{
"epoch": 1.58,
"learning_rate": 9.472890909163029e-06,
"loss": 1.2294,
"step": 663500
},
{
"epoch": 1.58,
"learning_rate": 9.464957895530145e-06,
"loss": 1.2429,
"step": 664000
},
{
"epoch": 1.58,
"learning_rate": 9.45702488189726e-06,
"loss": 1.2208,
"step": 664500
},
{
"epoch": 1.58,
"learning_rate": 9.449091868264376e-06,
"loss": 1.2231,
"step": 665000
},
{
"epoch": 1.58,
"learning_rate": 9.441158854631492e-06,
"loss": 1.2108,
"step": 665500
},
{
"epoch": 1.59,
"learning_rate": 9.433225840998608e-06,
"loss": 1.2158,
"step": 666000
},
{
"epoch": 1.59,
"learning_rate": 9.425292827365726e-06,
"loss": 1.2191,
"step": 666500
},
{
"epoch": 1.59,
"learning_rate": 9.41735981373284e-06,
"loss": 1.2236,
"step": 667000
},
{
"epoch": 1.59,
"learning_rate": 9.409426800099957e-06,
"loss": 1.2326,
"step": 667500
},
{
"epoch": 1.59,
"learning_rate": 9.401493786467073e-06,
"loss": 1.2071,
"step": 668000
},
{
"epoch": 1.59,
"learning_rate": 9.393560772834189e-06,
"loss": 1.2211,
"step": 668500
},
{
"epoch": 1.59,
"learning_rate": 9.385627759201305e-06,
"loss": 1.2259,
"step": 669000
},
{
"epoch": 1.59,
"learning_rate": 9.377694745568421e-06,
"loss": 1.2159,
"step": 669500
},
{
"epoch": 1.59,
"learning_rate": 9.369761731935537e-06,
"loss": 1.2172,
"step": 670000
},
{
"epoch": 1.6,
"learning_rate": 9.361828718302654e-06,
"loss": 1.2237,
"step": 670500
},
{
"epoch": 1.6,
"learning_rate": 9.353895704669768e-06,
"loss": 1.2084,
"step": 671000
},
{
"epoch": 1.6,
"learning_rate": 9.345962691036884e-06,
"loss": 1.2226,
"step": 671500
},
{
"epoch": 1.6,
"learning_rate": 9.338029677404002e-06,
"loss": 1.2274,
"step": 672000
},
{
"epoch": 1.6,
"learning_rate": 9.330096663771118e-06,
"loss": 1.2333,
"step": 672500
},
{
"epoch": 1.6,
"learning_rate": 9.322163650138234e-06,
"loss": 1.2306,
"step": 673000
},
{
"epoch": 1.6,
"learning_rate": 9.31423063650535e-06,
"loss": 1.2133,
"step": 673500
},
{
"epoch": 1.6,
"learning_rate": 9.306297622872465e-06,
"loss": 1.2164,
"step": 674000
},
{
"epoch": 1.61,
"learning_rate": 9.298364609239581e-06,
"loss": 1.2292,
"step": 674500
},
{
"epoch": 1.61,
"learning_rate": 9.290431595606699e-06,
"loss": 1.2242,
"step": 675000
},
{
"epoch": 1.61,
"learning_rate": 9.282498581973813e-06,
"loss": 1.2099,
"step": 675500
},
{
"epoch": 1.61,
"learning_rate": 9.27456556834093e-06,
"loss": 1.234,
"step": 676000
},
{
"epoch": 1.61,
"learning_rate": 9.266632554708046e-06,
"loss": 1.216,
"step": 676500
},
{
"epoch": 1.61,
"learning_rate": 9.258699541075162e-06,
"loss": 1.2322,
"step": 677000
},
{
"epoch": 1.61,
"learning_rate": 9.250766527442278e-06,
"loss": 1.2198,
"step": 677500
},
{
"epoch": 1.61,
"learning_rate": 9.242833513809394e-06,
"loss": 1.217,
"step": 678000
},
{
"epoch": 1.61,
"learning_rate": 9.23490050017651e-06,
"loss": 1.2182,
"step": 678500
},
{
"epoch": 1.62,
"learning_rate": 9.226967486543626e-06,
"loss": 1.2326,
"step": 679000
},
{
"epoch": 1.62,
"learning_rate": 9.219034472910743e-06,
"loss": 1.2211,
"step": 679500
},
{
"epoch": 1.62,
"learning_rate": 9.211101459277857e-06,
"loss": 1.2206,
"step": 680000
},
{
"epoch": 1.62,
"learning_rate": 9.203168445644975e-06,
"loss": 1.2312,
"step": 680500
},
{
"epoch": 1.62,
"learning_rate": 9.19523543201209e-06,
"loss": 1.2136,
"step": 681000
},
{
"epoch": 1.62,
"learning_rate": 9.187302418379207e-06,
"loss": 1.2123,
"step": 681500
},
{
"epoch": 1.62,
"learning_rate": 9.179369404746323e-06,
"loss": 1.2244,
"step": 682000
},
{
"epoch": 1.62,
"learning_rate": 9.171436391113438e-06,
"loss": 1.2188,
"step": 682500
},
{
"epoch": 1.63,
"learning_rate": 9.163503377480554e-06,
"loss": 1.2209,
"step": 683000
},
{
"epoch": 1.63,
"learning_rate": 9.155570363847672e-06,
"loss": 1.2239,
"step": 683500
},
{
"epoch": 1.63,
"learning_rate": 9.147637350214788e-06,
"loss": 1.2193,
"step": 684000
},
{
"epoch": 1.63,
"learning_rate": 9.139704336581902e-06,
"loss": 1.2076,
"step": 684500
},
{
"epoch": 1.63,
"learning_rate": 9.13177132294902e-06,
"loss": 1.2177,
"step": 685000
},
{
"epoch": 1.63,
"learning_rate": 9.123838309316135e-06,
"loss": 1.2086,
"step": 685500
},
{
"epoch": 1.63,
"learning_rate": 9.115905295683251e-06,
"loss": 1.2029,
"step": 686000
},
{
"epoch": 1.63,
"learning_rate": 9.107972282050369e-06,
"loss": 1.2213,
"step": 686500
},
{
"epoch": 1.63,
"learning_rate": 9.100039268417483e-06,
"loss": 1.2117,
"step": 687000
},
{
"epoch": 1.64,
"learning_rate": 9.092106254784599e-06,
"loss": 1.2133,
"step": 687500
},
{
"epoch": 1.64,
"learning_rate": 9.084173241151716e-06,
"loss": 1.2118,
"step": 688000
},
{
"epoch": 1.64,
"learning_rate": 9.076240227518832e-06,
"loss": 1.2065,
"step": 688500
},
{
"epoch": 1.64,
"learning_rate": 9.068307213885948e-06,
"loss": 1.2298,
"step": 689000
},
{
"epoch": 1.64,
"learning_rate": 9.060374200253064e-06,
"loss": 1.2306,
"step": 689500
},
{
"epoch": 1.64,
"learning_rate": 9.05244118662018e-06,
"loss": 1.2198,
"step": 690000
},
{
"epoch": 1.64,
"learning_rate": 9.044508172987296e-06,
"loss": 1.1961,
"step": 690500
},
{
"epoch": 1.64,
"learning_rate": 9.036575159354412e-06,
"loss": 1.2213,
"step": 691000
},
{
"epoch": 1.65,
"learning_rate": 9.028642145721527e-06,
"loss": 1.2063,
"step": 691500
},
{
"epoch": 1.65,
"learning_rate": 9.020709132088645e-06,
"loss": 1.2194,
"step": 692000
},
{
"epoch": 1.65,
"learning_rate": 9.01277611845576e-06,
"loss": 1.2069,
"step": 692500
},
{
"epoch": 1.65,
"learning_rate": 9.004843104822877e-06,
"loss": 1.2097,
"step": 693000
},
{
"epoch": 1.65,
"learning_rate": 8.996910091189993e-06,
"loss": 1.239,
"step": 693500
},
{
"epoch": 1.65,
"learning_rate": 8.988977077557108e-06,
"loss": 1.2312,
"step": 694000
},
{
"epoch": 1.65,
"learning_rate": 8.981044063924224e-06,
"loss": 1.2149,
"step": 694500
},
{
"epoch": 1.65,
"learning_rate": 8.973111050291342e-06,
"loss": 1.2192,
"step": 695000
},
{
"epoch": 1.66,
"learning_rate": 8.965178036658456e-06,
"loss": 1.212,
"step": 695500
},
{
"epoch": 1.66,
"learning_rate": 8.957245023025572e-06,
"loss": 1.2026,
"step": 696000
},
{
"epoch": 1.66,
"learning_rate": 8.94931200939269e-06,
"loss": 1.2288,
"step": 696500
},
{
"epoch": 1.66,
"learning_rate": 8.941378995759805e-06,
"loss": 1.2238,
"step": 697000
},
{
"epoch": 1.66,
"learning_rate": 8.933445982126921e-06,
"loss": 1.2216,
"step": 697500
},
{
"epoch": 1.66,
"learning_rate": 8.925512968494037e-06,
"loss": 1.2042,
"step": 698000
},
{
"epoch": 1.66,
"learning_rate": 8.917579954861153e-06,
"loss": 1.2156,
"step": 698500
},
{
"epoch": 1.66,
"learning_rate": 8.909646941228269e-06,
"loss": 1.2029,
"step": 699000
},
{
"epoch": 1.66,
"learning_rate": 8.901713927595386e-06,
"loss": 1.2186,
"step": 699500
},
{
"epoch": 1.67,
"learning_rate": 8.8937809139625e-06,
"loss": 1.2116,
"step": 700000
},
{
"epoch": 1.67,
"learning_rate": 8.885847900329618e-06,
"loss": 1.2051,
"step": 700500
},
{
"epoch": 1.67,
"learning_rate": 8.877914886696734e-06,
"loss": 1.2052,
"step": 701000
},
{
"epoch": 1.67,
"learning_rate": 8.86998187306385e-06,
"loss": 1.2049,
"step": 701500
},
{
"epoch": 1.67,
"learning_rate": 8.862048859430966e-06,
"loss": 1.2096,
"step": 702000
},
{
"epoch": 1.67,
"learning_rate": 8.854115845798081e-06,
"loss": 1.1989,
"step": 702500
},
{
"epoch": 1.67,
"learning_rate": 8.846182832165197e-06,
"loss": 1.2356,
"step": 703000
},
{
"epoch": 1.67,
"learning_rate": 8.838249818532313e-06,
"loss": 1.2042,
"step": 703500
},
{
"epoch": 1.68,
"learning_rate": 8.83031680489943e-06,
"loss": 1.2152,
"step": 704000
},
{
"epoch": 1.68,
"learning_rate": 8.822383791266545e-06,
"loss": 1.2113,
"step": 704500
},
{
"epoch": 1.68,
"learning_rate": 8.814450777633663e-06,
"loss": 1.218,
"step": 705000
},
{
"epoch": 1.68,
"learning_rate": 8.806517764000778e-06,
"loss": 1.2144,
"step": 705500
},
{
"epoch": 1.68,
"learning_rate": 8.798584750367894e-06,
"loss": 1.1974,
"step": 706000
},
{
"epoch": 1.68,
"learning_rate": 8.79065173673501e-06,
"loss": 1.2119,
"step": 706500
},
{
"epoch": 1.68,
"learning_rate": 8.782718723102126e-06,
"loss": 1.2106,
"step": 707000
},
{
"epoch": 1.68,
"learning_rate": 8.774785709469242e-06,
"loss": 1.2054,
"step": 707500
},
{
"epoch": 1.68,
"learning_rate": 8.76685269583636e-06,
"loss": 1.2112,
"step": 708000
},
{
"epoch": 1.69,
"learning_rate": 8.758919682203475e-06,
"loss": 1.1947,
"step": 708500
},
{
"epoch": 1.69,
"learning_rate": 8.75098666857059e-06,
"loss": 1.1973,
"step": 709000
},
{
"epoch": 1.69,
"learning_rate": 8.743053654937707e-06,
"loss": 1.2104,
"step": 709500
},
{
"epoch": 1.69,
"learning_rate": 8.735120641304823e-06,
"loss": 1.2197,
"step": 710000
},
{
"epoch": 1.69,
"learning_rate": 8.727187627671939e-06,
"loss": 1.2159,
"step": 710500
},
{
"epoch": 1.69,
"learning_rate": 8.719254614039056e-06,
"loss": 1.2197,
"step": 711000
},
{
"epoch": 1.69,
"learning_rate": 8.71132160040617e-06,
"loss": 1.2245,
"step": 711500
},
{
"epoch": 1.69,
"learning_rate": 8.703388586773286e-06,
"loss": 1.209,
"step": 712000
},
{
"epoch": 1.7,
"learning_rate": 8.695455573140404e-06,
"loss": 1.2146,
"step": 712500
},
{
"epoch": 1.7,
"learning_rate": 8.68752255950752e-06,
"loss": 1.2209,
"step": 713000
},
{
"epoch": 1.7,
"learning_rate": 8.679589545874636e-06,
"loss": 1.2176,
"step": 713500
},
{
"epoch": 1.7,
"learning_rate": 8.671656532241751e-06,
"loss": 1.1941,
"step": 714000
},
{
"epoch": 1.7,
"learning_rate": 8.663723518608867e-06,
"loss": 1.2102,
"step": 714500
},
{
"epoch": 1.7,
"learning_rate": 8.655790504975983e-06,
"loss": 1.211,
"step": 715000
},
{
"epoch": 1.7,
"learning_rate": 8.647857491343099e-06,
"loss": 1.2402,
"step": 715500
},
{
"epoch": 1.7,
"learning_rate": 8.639924477710215e-06,
"loss": 1.2039,
"step": 716000
},
{
"epoch": 1.71,
"learning_rate": 8.631991464077332e-06,
"loss": 1.2133,
"step": 716500
},
{
"epoch": 1.71,
"learning_rate": 8.624058450444448e-06,
"loss": 1.2174,
"step": 717000
},
{
"epoch": 1.71,
"learning_rate": 8.616125436811564e-06,
"loss": 1.2189,
"step": 717500
},
{
"epoch": 1.71,
"learning_rate": 8.60819242317868e-06,
"loss": 1.2109,
"step": 718000
},
{
"epoch": 1.71,
"learning_rate": 8.600259409545796e-06,
"loss": 1.2058,
"step": 718500
},
{
"epoch": 1.71,
"learning_rate": 8.592326395912912e-06,
"loss": 1.2209,
"step": 719000
},
{
"epoch": 1.71,
"learning_rate": 8.584393382280028e-06,
"loss": 1.2154,
"step": 719500
},
{
"epoch": 1.71,
"learning_rate": 8.576460368647144e-06,
"loss": 1.2254,
"step": 720000
},
{
"epoch": 1.71,
"learning_rate": 8.56852735501426e-06,
"loss": 1.2102,
"step": 720500
},
{
"epoch": 1.72,
"learning_rate": 8.560594341381377e-06,
"loss": 1.2163,
"step": 721000
},
{
"epoch": 1.72,
"learning_rate": 8.552661327748493e-06,
"loss": 1.2094,
"step": 721500
},
{
"epoch": 1.72,
"learning_rate": 8.544728314115609e-06,
"loss": 1.1982,
"step": 722000
},
{
"epoch": 1.72,
"learning_rate": 8.536795300482725e-06,
"loss": 1.2143,
"step": 722500
},
{
"epoch": 1.72,
"learning_rate": 8.52886228684984e-06,
"loss": 1.2127,
"step": 723000
},
{
"epoch": 1.72,
"learning_rate": 8.520929273216956e-06,
"loss": 1.2106,
"step": 723500
},
{
"epoch": 1.72,
"learning_rate": 8.512996259584074e-06,
"loss": 1.2127,
"step": 724000
},
{
"epoch": 1.72,
"learning_rate": 8.505063245951188e-06,
"loss": 1.2172,
"step": 724500
},
{
"epoch": 1.73,
"learning_rate": 8.497130232318304e-06,
"loss": 1.2115,
"step": 725000
},
{
"epoch": 1.73,
"learning_rate": 8.489197218685421e-06,
"loss": 1.1987,
"step": 725500
},
{
"epoch": 1.73,
"learning_rate": 8.481264205052537e-06,
"loss": 1.217,
"step": 726000
},
{
"epoch": 1.73,
"learning_rate": 8.473331191419653e-06,
"loss": 1.2072,
"step": 726500
},
{
"epoch": 1.73,
"learning_rate": 8.465398177786769e-06,
"loss": 1.2258,
"step": 727000
},
{
"epoch": 1.73,
"learning_rate": 8.457465164153885e-06,
"loss": 1.197,
"step": 727500
},
{
"epoch": 1.73,
"learning_rate": 8.449532150521e-06,
"loss": 1.2187,
"step": 728000
},
{
"epoch": 1.73,
"learning_rate": 8.441599136888118e-06,
"loss": 1.1952,
"step": 728500
},
{
"epoch": 1.73,
"learning_rate": 8.433666123255232e-06,
"loss": 1.1982,
"step": 729000
},
{
"epoch": 1.74,
"learning_rate": 8.42573310962235e-06,
"loss": 1.2218,
"step": 729500
},
{
"epoch": 1.74,
"learning_rate": 8.417800095989466e-06,
"loss": 1.214,
"step": 730000
},
{
"epoch": 1.74,
"learning_rate": 8.409867082356582e-06,
"loss": 1.2218,
"step": 730500
},
{
"epoch": 1.74,
"learning_rate": 8.401934068723698e-06,
"loss": 1.1943,
"step": 731000
},
{
"epoch": 1.74,
"learning_rate": 8.394001055090814e-06,
"loss": 1.2029,
"step": 731500
},
{
"epoch": 1.74,
"learning_rate": 8.38606804145793e-06,
"loss": 1.1989,
"step": 732000
},
{
"epoch": 1.74,
"learning_rate": 8.378135027825047e-06,
"loss": 1.2144,
"step": 732500
},
{
"epoch": 1.74,
"learning_rate": 8.370202014192163e-06,
"loss": 1.2149,
"step": 733000
},
{
"epoch": 1.75,
"learning_rate": 8.362269000559277e-06,
"loss": 1.1873,
"step": 733500
},
{
"epoch": 1.75,
"learning_rate": 8.354335986926395e-06,
"loss": 1.1982,
"step": 734000
},
{
"epoch": 1.75,
"learning_rate": 8.34640297329351e-06,
"loss": 1.1941,
"step": 734500
},
{
"epoch": 1.75,
"learning_rate": 8.338469959660626e-06,
"loss": 1.201,
"step": 735000
},
{
"epoch": 1.75,
"learning_rate": 8.330536946027742e-06,
"loss": 1.2132,
"step": 735500
},
{
"epoch": 1.75,
"learning_rate": 8.322603932394858e-06,
"loss": 1.2072,
"step": 736000
},
{
"epoch": 1.75,
"learning_rate": 8.314670918761974e-06,
"loss": 1.2003,
"step": 736500
},
{
"epoch": 1.75,
"learning_rate": 8.306737905129091e-06,
"loss": 1.2099,
"step": 737000
},
{
"epoch": 1.76,
"learning_rate": 8.298804891496207e-06,
"loss": 1.2126,
"step": 737500
},
{
"epoch": 1.76,
"learning_rate": 8.290871877863323e-06,
"loss": 1.2085,
"step": 738000
},
{
"epoch": 1.76,
"learning_rate": 8.282938864230439e-06,
"loss": 1.208,
"step": 738500
},
{
"epoch": 1.76,
"learning_rate": 8.275005850597555e-06,
"loss": 1.2003,
"step": 739000
},
{
"epoch": 1.76,
"learning_rate": 8.26707283696467e-06,
"loss": 1.2053,
"step": 739500
},
{
"epoch": 1.76,
"learning_rate": 8.259139823331787e-06,
"loss": 1.2025,
"step": 740000
},
{
"epoch": 1.76,
"learning_rate": 8.251206809698902e-06,
"loss": 1.2121,
"step": 740500
},
{
"epoch": 1.76,
"learning_rate": 8.24327379606602e-06,
"loss": 1.2019,
"step": 741000
},
{
"epoch": 1.76,
"learning_rate": 8.235340782433136e-06,
"loss": 1.2121,
"step": 741500
},
{
"epoch": 1.77,
"learning_rate": 8.227407768800252e-06,
"loss": 1.2208,
"step": 742000
},
{
"epoch": 1.77,
"learning_rate": 8.219474755167368e-06,
"loss": 1.2161,
"step": 742500
},
{
"epoch": 1.77,
"learning_rate": 8.211541741534483e-06,
"loss": 1.2203,
"step": 743000
},
{
"epoch": 1.77,
"learning_rate": 8.2036087279016e-06,
"loss": 1.2102,
"step": 743500
},
{
"epoch": 1.77,
"learning_rate": 8.195675714268715e-06,
"loss": 1.2069,
"step": 744000
},
{
"epoch": 1.77,
"learning_rate": 8.187742700635831e-06,
"loss": 1.2107,
"step": 744500
},
{
"epoch": 1.77,
"learning_rate": 8.179809687002947e-06,
"loss": 1.196,
"step": 745000
},
{
"epoch": 1.77,
"learning_rate": 8.171876673370064e-06,
"loss": 1.1884,
"step": 745500
},
{
"epoch": 1.78,
"learning_rate": 8.16394365973718e-06,
"loss": 1.2029,
"step": 746000
},
{
"epoch": 1.78,
"learning_rate": 8.156010646104296e-06,
"loss": 1.2077,
"step": 746500
},
{
"epoch": 1.78,
"learning_rate": 8.148077632471412e-06,
"loss": 1.2021,
"step": 747000
},
{
"epoch": 1.78,
"learning_rate": 8.140144618838528e-06,
"loss": 1.2097,
"step": 747500
},
{
"epoch": 1.78,
"learning_rate": 8.132211605205644e-06,
"loss": 1.2107,
"step": 748000
},
{
"epoch": 1.78,
"learning_rate": 8.124278591572761e-06,
"loss": 1.2107,
"step": 748500
},
{
"epoch": 1.78,
"learning_rate": 8.116345577939876e-06,
"loss": 1.2031,
"step": 749000
},
{
"epoch": 1.78,
"learning_rate": 8.108412564306991e-06,
"loss": 1.215,
"step": 749500
},
{
"epoch": 1.78,
"learning_rate": 8.100479550674109e-06,
"loss": 1.2002,
"step": 750000
},
{
"epoch": 1.79,
"learning_rate": 8.092546537041225e-06,
"loss": 1.2135,
"step": 750500
},
{
"epoch": 1.79,
"learning_rate": 8.08461352340834e-06,
"loss": 1.196,
"step": 751000
},
{
"epoch": 1.79,
"learning_rate": 8.076680509775457e-06,
"loss": 1.2067,
"step": 751500
},
{
"epoch": 1.79,
"learning_rate": 8.068747496142572e-06,
"loss": 1.2048,
"step": 752000
},
{
"epoch": 1.79,
"learning_rate": 8.060814482509688e-06,
"loss": 1.2176,
"step": 752500
},
{
"epoch": 1.79,
"learning_rate": 8.052881468876806e-06,
"loss": 1.2126,
"step": 753000
},
{
"epoch": 1.79,
"learning_rate": 8.04494845524392e-06,
"loss": 1.2093,
"step": 753500
},
{
"epoch": 1.79,
"learning_rate": 8.037015441611038e-06,
"loss": 1.206,
"step": 754000
},
{
"epoch": 1.8,
"learning_rate": 8.029082427978153e-06,
"loss": 1.1969,
"step": 754500
},
{
"epoch": 1.8,
"learning_rate": 8.02114941434527e-06,
"loss": 1.2162,
"step": 755000
},
{
"epoch": 1.8,
"learning_rate": 8.013216400712385e-06,
"loss": 1.1944,
"step": 755500
},
{
"epoch": 1.8,
"learning_rate": 8.005283387079501e-06,
"loss": 1.2028,
"step": 756000
},
{
"epoch": 1.8,
"learning_rate": 7.997350373446617e-06,
"loss": 1.1856,
"step": 756500
},
{
"epoch": 1.8,
"learning_rate": 7.989417359813734e-06,
"loss": 1.221,
"step": 757000
},
{
"epoch": 1.8,
"learning_rate": 7.98148434618085e-06,
"loss": 1.1955,
"step": 757500
},
{
"epoch": 1.8,
"learning_rate": 7.973551332547965e-06,
"loss": 1.1967,
"step": 758000
},
{
"epoch": 1.81,
"learning_rate": 7.965618318915082e-06,
"loss": 1.2161,
"step": 758500
},
{
"epoch": 1.81,
"learning_rate": 7.957685305282198e-06,
"loss": 1.203,
"step": 759000
},
{
"epoch": 1.81,
"learning_rate": 7.949752291649314e-06,
"loss": 1.2127,
"step": 759500
},
{
"epoch": 1.81,
"learning_rate": 7.94181927801643e-06,
"loss": 1.2106,
"step": 760000
},
{
"epoch": 1.81,
"learning_rate": 7.933886264383546e-06,
"loss": 1.2082,
"step": 760500
},
{
"epoch": 1.81,
"learning_rate": 7.925953250750661e-06,
"loss": 1.2047,
"step": 761000
},
{
"epoch": 1.81,
"learning_rate": 7.918020237117779e-06,
"loss": 1.1979,
"step": 761500
},
{
"epoch": 1.81,
"learning_rate": 7.910087223484895e-06,
"loss": 1.1888,
"step": 762000
},
{
"epoch": 1.81,
"learning_rate": 7.90215420985201e-06,
"loss": 1.2056,
"step": 762500
},
{
"epoch": 1.82,
"learning_rate": 7.894221196219127e-06,
"loss": 1.2081,
"step": 763000
},
{
"epoch": 1.82,
"learning_rate": 7.886288182586242e-06,
"loss": 1.183,
"step": 763500
},
{
"epoch": 1.82,
"learning_rate": 7.878355168953358e-06,
"loss": 1.1933,
"step": 764000
},
{
"epoch": 1.82,
"learning_rate": 7.870422155320474e-06,
"loss": 1.1922,
"step": 764500
},
{
"epoch": 1.82,
"learning_rate": 7.86248914168759e-06,
"loss": 1.2201,
"step": 765000
},
{
"epoch": 1.82,
"learning_rate": 7.854556128054706e-06,
"loss": 1.1971,
"step": 765500
},
{
"epoch": 1.82,
"learning_rate": 7.846623114421823e-06,
"loss": 1.205,
"step": 766000
},
{
"epoch": 1.82,
"learning_rate": 7.83869010078894e-06,
"loss": 1.2127,
"step": 766500
},
{
"epoch": 1.83,
"learning_rate": 7.830757087156055e-06,
"loss": 1.1925,
"step": 767000
},
{
"epoch": 1.83,
"learning_rate": 7.822824073523171e-06,
"loss": 1.2001,
"step": 767500
},
{
"epoch": 1.83,
"learning_rate": 7.814891059890287e-06,
"loss": 1.2059,
"step": 768000
},
{
"epoch": 1.83,
"learning_rate": 7.806958046257403e-06,
"loss": 1.1868,
"step": 768500
},
{
"epoch": 1.83,
"learning_rate": 7.799025032624519e-06,
"loss": 1.1982,
"step": 769000
},
{
"epoch": 1.83,
"learning_rate": 7.791092018991634e-06,
"loss": 1.1957,
"step": 769500
},
{
"epoch": 1.83,
"learning_rate": 7.783159005358752e-06,
"loss": 1.2069,
"step": 770000
},
{
"epoch": 1.83,
"learning_rate": 7.775225991725868e-06,
"loss": 1.191,
"step": 770500
},
{
"epoch": 1.83,
"learning_rate": 7.767292978092984e-06,
"loss": 1.2078,
"step": 771000
},
{
"epoch": 1.84,
"learning_rate": 7.7593599644601e-06,
"loss": 1.1973,
"step": 771500
},
{
"epoch": 1.84,
"learning_rate": 7.751426950827216e-06,
"loss": 1.1992,
"step": 772000
},
{
"epoch": 1.84,
"learning_rate": 7.743493937194331e-06,
"loss": 1.1951,
"step": 772500
},
{
"epoch": 1.84,
"learning_rate": 7.735560923561449e-06,
"loss": 1.1879,
"step": 773000
},
{
"epoch": 1.84,
"learning_rate": 7.727627909928563e-06,
"loss": 1.1847,
"step": 773500
},
{
"epoch": 1.84,
"learning_rate": 7.719694896295679e-06,
"loss": 1.2002,
"step": 774000
},
{
"epoch": 1.84,
"learning_rate": 7.711761882662797e-06,
"loss": 1.193,
"step": 774500
},
{
"epoch": 1.84,
"learning_rate": 7.703828869029912e-06,
"loss": 1.1985,
"step": 775000
},
{
"epoch": 1.85,
"learning_rate": 7.695895855397028e-06,
"loss": 1.1993,
"step": 775500
},
{
"epoch": 1.85,
"learning_rate": 7.687962841764144e-06,
"loss": 1.2016,
"step": 776000
},
{
"epoch": 1.85,
"learning_rate": 7.68002982813126e-06,
"loss": 1.2017,
"step": 776500
},
{
"epoch": 1.85,
"learning_rate": 7.672096814498376e-06,
"loss": 1.2132,
"step": 777000
},
{
"epoch": 1.85,
"learning_rate": 7.664163800865493e-06,
"loss": 1.2184,
"step": 777500
},
{
"epoch": 1.85,
"learning_rate": 7.656230787232608e-06,
"loss": 1.1996,
"step": 778000
},
{
"epoch": 1.85,
"learning_rate": 7.648297773599725e-06,
"loss": 1.2067,
"step": 778500
},
{
"epoch": 1.85,
"learning_rate": 7.640364759966841e-06,
"loss": 1.1858,
"step": 779000
},
{
"epoch": 1.86,
"learning_rate": 7.632431746333957e-06,
"loss": 1.1925,
"step": 779500
},
{
"epoch": 1.86,
"learning_rate": 7.6244987327010736e-06,
"loss": 1.1994,
"step": 780000
},
{
"epoch": 1.86,
"learning_rate": 7.616565719068189e-06,
"loss": 1.2115,
"step": 780500
},
{
"epoch": 1.86,
"learning_rate": 7.6086327054353045e-06,
"loss": 1.2029,
"step": 781000
},
{
"epoch": 1.86,
"learning_rate": 7.600699691802421e-06,
"loss": 1.202,
"step": 781500
},
{
"epoch": 1.86,
"learning_rate": 7.592766678169538e-06,
"loss": 1.1985,
"step": 782000
},
{
"epoch": 1.86,
"learning_rate": 7.584833664536653e-06,
"loss": 1.1924,
"step": 782500
},
{
"epoch": 1.86,
"learning_rate": 7.576900650903769e-06,
"loss": 1.199,
"step": 783000
},
{
"epoch": 1.86,
"learning_rate": 7.5689676372708855e-06,
"loss": 1.21,
"step": 783500
},
{
"epoch": 1.87,
"learning_rate": 7.561034623638001e-06,
"loss": 1.2042,
"step": 784000
},
{
"epoch": 1.87,
"learning_rate": 7.553101610005117e-06,
"loss": 1.202,
"step": 784500
},
{
"epoch": 1.87,
"learning_rate": 7.545168596372233e-06,
"loss": 1.1943,
"step": 785000
},
{
"epoch": 1.87,
"learning_rate": 7.53723558273935e-06,
"loss": 1.1947,
"step": 785500
},
{
"epoch": 1.87,
"learning_rate": 7.529302569106466e-06,
"loss": 1.2078,
"step": 786000
},
{
"epoch": 1.87,
"learning_rate": 7.521369555473582e-06,
"loss": 1.1961,
"step": 786500
},
{
"epoch": 1.87,
"learning_rate": 7.513436541840697e-06,
"loss": 1.1909,
"step": 787000
},
{
"epoch": 1.87,
"learning_rate": 7.505503528207814e-06,
"loss": 1.1897,
"step": 787500
},
{
"epoch": 1.88,
"learning_rate": 7.49757051457493e-06,
"loss": 1.1851,
"step": 788000
},
{
"epoch": 1.88,
"learning_rate": 7.489637500942047e-06,
"loss": 1.1985,
"step": 788500
},
{
"epoch": 1.88,
"learning_rate": 7.481704487309162e-06,
"loss": 1.2052,
"step": 789000
},
{
"epoch": 1.88,
"learning_rate": 7.4737714736762775e-06,
"loss": 1.1853,
"step": 789500
},
{
"epoch": 1.88,
"learning_rate": 7.465838460043394e-06,
"loss": 1.2105,
"step": 790000
},
{
"epoch": 1.88,
"learning_rate": 7.45790544641051e-06,
"loss": 1.2175,
"step": 790500
},
{
"epoch": 1.88,
"learning_rate": 7.449972432777627e-06,
"loss": 1.1808,
"step": 791000
},
{
"epoch": 1.88,
"learning_rate": 7.442039419144742e-06,
"loss": 1.1983,
"step": 791500
},
{
"epoch": 1.88,
"learning_rate": 7.4341064055118586e-06,
"loss": 1.2002,
"step": 792000
},
{
"epoch": 1.89,
"learning_rate": 7.4261733918789744e-06,
"loss": 1.2018,
"step": 792500
},
{
"epoch": 1.89,
"learning_rate": 7.418240378246091e-06,
"loss": 1.2046,
"step": 793000
},
{
"epoch": 1.89,
"learning_rate": 7.410307364613206e-06,
"loss": 1.2096,
"step": 793500
},
{
"epoch": 1.89,
"learning_rate": 7.402374350980323e-06,
"loss": 1.1899,
"step": 794000
},
{
"epoch": 1.89,
"learning_rate": 7.394441337347439e-06,
"loss": 1.2129,
"step": 794500
},
{
"epoch": 1.89,
"learning_rate": 7.3865083237145554e-06,
"loss": 1.2009,
"step": 795000
},
{
"epoch": 1.89,
"learning_rate": 7.378575310081671e-06,
"loss": 1.1873,
"step": 795500
},
{
"epoch": 1.89,
"learning_rate": 7.370642296448786e-06,
"loss": 1.1951,
"step": 796000
},
{
"epoch": 1.9,
"learning_rate": 7.362709282815903e-06,
"loss": 1.189,
"step": 796500
},
{
"epoch": 1.9,
"learning_rate": 7.35477626918302e-06,
"loss": 1.196,
"step": 797000
},
{
"epoch": 1.9,
"learning_rate": 7.346843255550136e-06,
"loss": 1.1971,
"step": 797500
},
{
"epoch": 1.9,
"learning_rate": 7.338910241917251e-06,
"loss": 1.1967,
"step": 798000
},
{
"epoch": 1.9,
"learning_rate": 7.330977228284367e-06,
"loss": 1.1899,
"step": 798500
},
{
"epoch": 1.9,
"learning_rate": 7.323044214651483e-06,
"loss": 1.1989,
"step": 799000
},
{
"epoch": 1.9,
"learning_rate": 7.3151112010186e-06,
"loss": 1.2093,
"step": 799500
},
{
"epoch": 1.9,
"learning_rate": 7.307178187385716e-06,
"loss": 1.1961,
"step": 800000
},
{
"epoch": 1.91,
"learning_rate": 7.299245173752832e-06,
"loss": 1.1913,
"step": 800500
},
{
"epoch": 1.91,
"learning_rate": 7.2913121601199475e-06,
"loss": 1.187,
"step": 801000
},
{
"epoch": 1.91,
"learning_rate": 7.283379146487064e-06,
"loss": 1.182,
"step": 801500
},
{
"epoch": 1.91,
"learning_rate": 7.27544613285418e-06,
"loss": 1.1837,
"step": 802000
},
{
"epoch": 1.91,
"learning_rate": 7.267513119221296e-06,
"loss": 1.1905,
"step": 802500
},
{
"epoch": 1.91,
"learning_rate": 7.259580105588412e-06,
"loss": 1.1965,
"step": 803000
},
{
"epoch": 1.91,
"learning_rate": 7.2516470919555285e-06,
"loss": 1.1928,
"step": 803500
},
{
"epoch": 1.91,
"learning_rate": 7.243714078322644e-06,
"loss": 1.1874,
"step": 804000
},
{
"epoch": 1.91,
"learning_rate": 7.235781064689759e-06,
"loss": 1.2034,
"step": 804500
},
{
"epoch": 1.92,
"learning_rate": 7.227848051056876e-06,
"loss": 1.1998,
"step": 805000
},
{
"epoch": 1.92,
"learning_rate": 7.219915037423992e-06,
"loss": 1.1794,
"step": 805500
},
{
"epoch": 1.92,
"learning_rate": 7.211982023791109e-06,
"loss": 1.2004,
"step": 806000
},
{
"epoch": 1.92,
"learning_rate": 7.204049010158225e-06,
"loss": 1.1982,
"step": 806500
},
{
"epoch": 1.92,
"learning_rate": 7.1961159965253404e-06,
"loss": 1.1876,
"step": 807000
},
{
"epoch": 1.92,
"learning_rate": 7.188182982892456e-06,
"loss": 1.2054,
"step": 807500
},
{
"epoch": 1.92,
"learning_rate": 7.180249969259573e-06,
"loss": 1.2039,
"step": 808000
},
{
"epoch": 1.92,
"learning_rate": 7.172316955626689e-06,
"loss": 1.1919,
"step": 808500
},
{
"epoch": 1.93,
"learning_rate": 7.164383941993805e-06,
"loss": 1.1786,
"step": 809000
},
{
"epoch": 1.93,
"learning_rate": 7.156450928360921e-06,
"loss": 1.1874,
"step": 809500
},
{
"epoch": 1.93,
"learning_rate": 7.148517914728037e-06,
"loss": 1.1925,
"step": 810000
},
{
"epoch": 1.93,
"learning_rate": 7.140584901095153e-06,
"loss": 1.2053,
"step": 810500
},
{
"epoch": 1.93,
"learning_rate": 7.13265188746227e-06,
"loss": 1.2005,
"step": 811000
},
{
"epoch": 1.93,
"learning_rate": 7.124718873829385e-06,
"loss": 1.204,
"step": 811500
},
{
"epoch": 1.93,
"learning_rate": 7.116785860196501e-06,
"loss": 1.214,
"step": 812000
},
{
"epoch": 1.93,
"learning_rate": 7.1088528465636175e-06,
"loss": 1.1993,
"step": 812500
},
{
"epoch": 1.93,
"learning_rate": 7.100919832930734e-06,
"loss": 1.1895,
"step": 813000
},
{
"epoch": 1.94,
"learning_rate": 7.092986819297849e-06,
"loss": 1.1815,
"step": 813500
},
{
"epoch": 1.94,
"learning_rate": 7.085053805664965e-06,
"loss": 1.1974,
"step": 814000
},
{
"epoch": 1.94,
"learning_rate": 7.077120792032082e-06,
"loss": 1.1966,
"step": 814500
},
{
"epoch": 1.94,
"learning_rate": 7.069187778399198e-06,
"loss": 1.2029,
"step": 815000
},
{
"epoch": 1.94,
"learning_rate": 7.061254764766314e-06,
"loss": 1.2016,
"step": 815500
},
{
"epoch": 1.94,
"learning_rate": 7.053321751133429e-06,
"loss": 1.191,
"step": 816000
},
{
"epoch": 1.94,
"learning_rate": 7.045388737500546e-06,
"loss": 1.1918,
"step": 816500
},
{
"epoch": 1.94,
"learning_rate": 7.037455723867662e-06,
"loss": 1.1887,
"step": 817000
},
{
"epoch": 1.95,
"learning_rate": 7.029522710234779e-06,
"loss": 1.2043,
"step": 817500
},
{
"epoch": 1.95,
"learning_rate": 7.021589696601894e-06,
"loss": 1.1876,
"step": 818000
},
{
"epoch": 1.95,
"learning_rate": 7.01365668296901e-06,
"loss": 1.1809,
"step": 818500
},
{
"epoch": 1.95,
"learning_rate": 7.005723669336126e-06,
"loss": 1.1993,
"step": 819000
},
{
"epoch": 1.95,
"learning_rate": 6.997790655703243e-06,
"loss": 1.1977,
"step": 819500
},
{
"epoch": 1.95,
"learning_rate": 6.989857642070359e-06,
"loss": 1.1895,
"step": 820000
},
{
"epoch": 1.95,
"learning_rate": 6.981924628437474e-06,
"loss": 1.1873,
"step": 820500
},
{
"epoch": 1.95,
"learning_rate": 6.973991614804591e-06,
"loss": 1.1772,
"step": 821000
},
{
"epoch": 1.96,
"learning_rate": 6.9660586011717065e-06,
"loss": 1.1851,
"step": 821500
},
{
"epoch": 1.96,
"learning_rate": 6.958125587538823e-06,
"loss": 1.1978,
"step": 822000
},
{
"epoch": 1.96,
"learning_rate": 6.950192573905938e-06,
"loss": 1.1879,
"step": 822500
},
{
"epoch": 1.96,
"learning_rate": 6.942259560273055e-06,
"loss": 1.1934,
"step": 823000
},
{
"epoch": 1.96,
"learning_rate": 6.934326546640171e-06,
"loss": 1.1992,
"step": 823500
},
{
"epoch": 1.96,
"learning_rate": 6.9263935330072875e-06,
"loss": 1.1991,
"step": 824000
},
{
"epoch": 1.96,
"learning_rate": 6.918460519374403e-06,
"loss": 1.2042,
"step": 824500
},
{
"epoch": 1.96,
"learning_rate": 6.910527505741519e-06,
"loss": 1.1911,
"step": 825000
},
{
"epoch": 1.96,
"learning_rate": 6.902594492108635e-06,
"loss": 1.1863,
"step": 825500
},
{
"epoch": 1.97,
"learning_rate": 6.894661478475752e-06,
"loss": 1.1915,
"step": 826000
},
{
"epoch": 1.97,
"learning_rate": 6.886728464842868e-06,
"loss": 1.1803,
"step": 826500
},
{
"epoch": 1.97,
"learning_rate": 6.878795451209983e-06,
"loss": 1.1952,
"step": 827000
},
{
"epoch": 1.97,
"learning_rate": 6.870862437577099e-06,
"loss": 1.178,
"step": 827500
},
{
"epoch": 1.97,
"learning_rate": 6.862929423944216e-06,
"loss": 1.1832,
"step": 828000
},
{
"epoch": 1.97,
"learning_rate": 6.854996410311332e-06,
"loss": 1.2055,
"step": 828500
},
{
"epoch": 1.97,
"learning_rate": 6.847063396678447e-06,
"loss": 1.1833,
"step": 829000
},
{
"epoch": 1.97,
"learning_rate": 6.839130383045564e-06,
"loss": 1.1813,
"step": 829500
},
{
"epoch": 1.98,
"learning_rate": 6.8311973694126795e-06,
"loss": 1.1941,
"step": 830000
},
{
"epoch": 1.98,
"learning_rate": 6.823264355779796e-06,
"loss": 1.1762,
"step": 830500
},
{
"epoch": 1.98,
"learning_rate": 6.815331342146912e-06,
"loss": 1.1858,
"step": 831000
},
{
"epoch": 1.98,
"learning_rate": 6.807398328514028e-06,
"loss": 1.1913,
"step": 831500
},
{
"epoch": 1.98,
"learning_rate": 6.799465314881144e-06,
"loss": 1.1903,
"step": 832000
},
{
"epoch": 1.98,
"learning_rate": 6.7915323012482606e-06,
"loss": 1.2029,
"step": 832500
},
{
"epoch": 1.98,
"learning_rate": 6.7835992876153764e-06,
"loss": 1.175,
"step": 833000
},
{
"epoch": 1.98,
"learning_rate": 6.775666273982492e-06,
"loss": 1.2037,
"step": 833500
},
{
"epoch": 1.98,
"learning_rate": 6.767733260349608e-06,
"loss": 1.204,
"step": 834000
},
{
"epoch": 1.99,
"learning_rate": 6.759800246716725e-06,
"loss": 1.1814,
"step": 834500
},
{
"epoch": 1.99,
"learning_rate": 6.751867233083841e-06,
"loss": 1.1863,
"step": 835000
},
{
"epoch": 1.99,
"learning_rate": 6.7439342194509574e-06,
"loss": 1.1878,
"step": 835500
},
{
"epoch": 1.99,
"learning_rate": 6.7360012058180725e-06,
"loss": 1.1881,
"step": 836000
},
{
"epoch": 1.99,
"learning_rate": 6.728068192185188e-06,
"loss": 1.1876,
"step": 836500
},
{
"epoch": 1.99,
"learning_rate": 6.720135178552305e-06,
"loss": 1.1892,
"step": 837000
},
{
"epoch": 1.99,
"learning_rate": 6.712202164919422e-06,
"loss": 1.1922,
"step": 837500
},
{
"epoch": 1.99,
"learning_rate": 6.704269151286537e-06,
"loss": 1.1981,
"step": 838000
},
{
"epoch": 2.0,
"learning_rate": 6.696336137653653e-06,
"loss": 1.1833,
"step": 838500
},
{
"epoch": 2.0,
"learning_rate": 6.688403124020769e-06,
"loss": 1.1869,
"step": 839000
},
{
"epoch": 2.0,
"learning_rate": 6.680470110387885e-06,
"loss": 1.1834,
"step": 839500
},
{
"epoch": 2.0,
"learning_rate": 6.672537096755002e-06,
"loss": 1.1937,
"step": 840000
},
{
"epoch": 2.0,
"eval_loss": 1.169049859046936,
"eval_runtime": 3623.1597,
"eval_samples_per_second": 366.405,
"eval_steps_per_second": 22.9,
"step": 840370
},
{
"epoch": 2.0,
"learning_rate": 6.664604083122117e-06,
"loss": 1.1996,
"step": 840500
},
{
"epoch": 2.0,
"learning_rate": 6.656671069489234e-06,
"loss": 1.1678,
"step": 841000
},
{
"epoch": 2.0,
"learning_rate": 6.6487380558563495e-06,
"loss": 1.1902,
"step": 841500
},
{
"epoch": 2.0,
"learning_rate": 6.640805042223466e-06,
"loss": 1.1885,
"step": 842000
},
{
"epoch": 2.01,
"learning_rate": 6.632872028590581e-06,
"loss": 1.1788,
"step": 842500
},
{
"epoch": 2.01,
"learning_rate": 6.624939014957697e-06,
"loss": 1.1884,
"step": 843000
},
{
"epoch": 2.01,
"learning_rate": 6.617006001324814e-06,
"loss": 1.1748,
"step": 843500
},
{
"epoch": 2.01,
"learning_rate": 6.6090729876919305e-06,
"loss": 1.1931,
"step": 844000
},
{
"epoch": 2.01,
"learning_rate": 6.601139974059046e-06,
"loss": 1.1791,
"step": 844500
},
{
"epoch": 2.01,
"learning_rate": 6.593206960426161e-06,
"loss": 1.1721,
"step": 845000
},
{
"epoch": 2.01,
"learning_rate": 6.585273946793278e-06,
"loss": 1.1864,
"step": 845500
},
{
"epoch": 2.01,
"learning_rate": 6.577340933160394e-06,
"loss": 1.1891,
"step": 846000
},
{
"epoch": 2.01,
"learning_rate": 6.569407919527511e-06,
"loss": 1.1789,
"step": 846500
},
{
"epoch": 2.02,
"learning_rate": 6.561474905894626e-06,
"loss": 1.1773,
"step": 847000
},
{
"epoch": 2.02,
"learning_rate": 6.5535418922617424e-06,
"loss": 1.1967,
"step": 847500
},
{
"epoch": 2.02,
"learning_rate": 6.545608878628858e-06,
"loss": 1.1974,
"step": 848000
},
{
"epoch": 2.02,
"learning_rate": 6.537675864995975e-06,
"loss": 1.1686,
"step": 848500
},
{
"epoch": 2.02,
"learning_rate": 6.52974285136309e-06,
"loss": 1.1928,
"step": 849000
},
{
"epoch": 2.02,
"learning_rate": 6.521809837730207e-06,
"loss": 1.1865,
"step": 849500
},
{
"epoch": 2.02,
"learning_rate": 6.513876824097323e-06,
"loss": 1.1695,
"step": 850000
},
{
"epoch": 2.02,
"learning_rate": 6.505943810464439e-06,
"loss": 1.1982,
"step": 850500
},
{
"epoch": 2.03,
"learning_rate": 6.498010796831555e-06,
"loss": 1.1748,
"step": 851000
},
{
"epoch": 2.03,
"learning_rate": 6.49007778319867e-06,
"loss": 1.1837,
"step": 851500
},
{
"epoch": 2.03,
"learning_rate": 6.482144769565787e-06,
"loss": 1.1869,
"step": 852000
},
{
"epoch": 2.03,
"learning_rate": 6.474211755932903e-06,
"loss": 1.178,
"step": 852500
},
{
"epoch": 2.03,
"learning_rate": 6.4662787423000195e-06,
"loss": 1.1664,
"step": 853000
},
{
"epoch": 2.03,
"learning_rate": 6.4583457286671345e-06,
"loss": 1.1672,
"step": 853500
},
{
"epoch": 2.03,
"learning_rate": 6.450412715034251e-06,
"loss": 1.1839,
"step": 854000
},
{
"epoch": 2.03,
"learning_rate": 6.442479701401367e-06,
"loss": 1.1904,
"step": 854500
},
{
"epoch": 2.03,
"learning_rate": 6.434546687768484e-06,
"loss": 1.1913,
"step": 855000
},
{
"epoch": 2.04,
"learning_rate": 6.4266136741356e-06,
"loss": 1.1941,
"step": 855500
},
{
"epoch": 2.04,
"learning_rate": 6.4186806605027155e-06,
"loss": 1.1875,
"step": 856000
},
{
"epoch": 2.04,
"learning_rate": 6.410747646869831e-06,
"loss": 1.1772,
"step": 856500
},
{
"epoch": 2.04,
"learning_rate": 6.402814633236948e-06,
"loss": 1.2005,
"step": 857000
},
{
"epoch": 2.04,
"learning_rate": 6.394881619604064e-06,
"loss": 1.1729,
"step": 857500
},
{
"epoch": 2.04,
"learning_rate": 6.386948605971179e-06,
"loss": 1.163,
"step": 858000
},
{
"epoch": 2.04,
"learning_rate": 6.379015592338296e-06,
"loss": 1.1781,
"step": 858500
},
{
"epoch": 2.04,
"learning_rate": 6.371082578705412e-06,
"loss": 1.1802,
"step": 859000
},
{
"epoch": 2.05,
"learning_rate": 6.363149565072528e-06,
"loss": 1.1739,
"step": 859500
},
{
"epoch": 2.05,
"learning_rate": 6.355216551439645e-06,
"loss": 1.1935,
"step": 860000
},
{
"epoch": 2.05,
"learning_rate": 6.34728353780676e-06,
"loss": 1.1873,
"step": 860500
},
{
"epoch": 2.05,
"learning_rate": 6.339350524173876e-06,
"loss": 1.1752,
"step": 861000
},
{
"epoch": 2.05,
"learning_rate": 6.331417510540993e-06,
"loss": 1.1839,
"step": 861500
},
{
"epoch": 2.05,
"learning_rate": 6.3234844969081084e-06,
"loss": 1.185,
"step": 862000
},
{
"epoch": 2.05,
"learning_rate": 6.315551483275224e-06,
"loss": 1.1716,
"step": 862500
},
{
"epoch": 2.05,
"learning_rate": 6.30761846964234e-06,
"loss": 1.1794,
"step": 863000
},
{
"epoch": 2.06,
"learning_rate": 6.299685456009457e-06,
"loss": 1.1826,
"step": 863500
},
{
"epoch": 2.06,
"learning_rate": 6.291752442376573e-06,
"loss": 1.1728,
"step": 864000
},
{
"epoch": 2.06,
"learning_rate": 6.2838194287436895e-06,
"loss": 1.1873,
"step": 864500
},
{
"epoch": 2.06,
"learning_rate": 6.2758864151108045e-06,
"loss": 1.1739,
"step": 865000
},
{
"epoch": 2.06,
"learning_rate": 6.267953401477921e-06,
"loss": 1.1873,
"step": 865500
},
{
"epoch": 2.06,
"learning_rate": 6.260020387845037e-06,
"loss": 1.1794,
"step": 866000
},
{
"epoch": 2.06,
"learning_rate": 6.252087374212154e-06,
"loss": 1.175,
"step": 866500
},
{
"epoch": 2.06,
"learning_rate": 6.244154360579269e-06,
"loss": 1.1816,
"step": 867000
},
{
"epoch": 2.06,
"learning_rate": 6.236221346946385e-06,
"loss": 1.1868,
"step": 867500
},
{
"epoch": 2.07,
"learning_rate": 6.228288333313501e-06,
"loss": 1.1741,
"step": 868000
},
{
"epoch": 2.07,
"learning_rate": 6.220355319680618e-06,
"loss": 1.1915,
"step": 868500
},
{
"epoch": 2.07,
"learning_rate": 6.212422306047734e-06,
"loss": 1.1641,
"step": 869000
},
{
"epoch": 2.07,
"learning_rate": 6.204489292414849e-06,
"loss": 1.1909,
"step": 869500
},
{
"epoch": 2.07,
"learning_rate": 6.196556278781966e-06,
"loss": 1.181,
"step": 870000
},
{
"epoch": 2.07,
"learning_rate": 6.1886232651490815e-06,
"loss": 1.1886,
"step": 870500
},
{
"epoch": 2.07,
"learning_rate": 6.180690251516198e-06,
"loss": 1.194,
"step": 871000
},
{
"epoch": 2.07,
"learning_rate": 6.172757237883313e-06,
"loss": 1.1887,
"step": 871500
},
{
"epoch": 2.08,
"learning_rate": 6.16482422425043e-06,
"loss": 1.1794,
"step": 872000
},
{
"epoch": 2.08,
"learning_rate": 6.156891210617546e-06,
"loss": 1.1913,
"step": 872500
},
{
"epoch": 2.08,
"learning_rate": 6.1489581969846626e-06,
"loss": 1.1855,
"step": 873000
},
{
"epoch": 2.08,
"learning_rate": 6.1410251833517776e-06,
"loss": 1.1831,
"step": 873500
},
{
"epoch": 2.08,
"learning_rate": 6.133092169718894e-06,
"loss": 1.1839,
"step": 874000
},
{
"epoch": 2.08,
"learning_rate": 6.12515915608601e-06,
"loss": 1.1855,
"step": 874500
},
{
"epoch": 2.08,
"learning_rate": 6.117226142453127e-06,
"loss": 1.1661,
"step": 875000
},
{
"epoch": 2.08,
"learning_rate": 6.109293128820243e-06,
"loss": 1.1778,
"step": 875500
},
{
"epoch": 2.08,
"learning_rate": 6.101360115187358e-06,
"loss": 1.1805,
"step": 876000
},
{
"epoch": 2.09,
"learning_rate": 6.0934271015544745e-06,
"loss": 1.1852,
"step": 876500
},
{
"epoch": 2.09,
"learning_rate": 6.08549408792159e-06,
"loss": 1.1708,
"step": 877000
},
{
"epoch": 2.09,
"learning_rate": 6.077561074288707e-06,
"loss": 1.1756,
"step": 877500
},
{
"epoch": 2.09,
"learning_rate": 6.069628060655822e-06,
"loss": 1.1901,
"step": 878000
},
{
"epoch": 2.09,
"learning_rate": 6.061695047022939e-06,
"loss": 1.1923,
"step": 878500
},
{
"epoch": 2.09,
"learning_rate": 6.053762033390055e-06,
"loss": 1.1793,
"step": 879000
},
{
"epoch": 2.09,
"learning_rate": 6.045829019757171e-06,
"loss": 1.1748,
"step": 879500
},
{
"epoch": 2.09,
"learning_rate": 6.037896006124287e-06,
"loss": 1.1789,
"step": 880000
},
{
"epoch": 2.1,
"learning_rate": 6.029962992491403e-06,
"loss": 1.1759,
"step": 880500
},
{
"epoch": 2.1,
"learning_rate": 6.022029978858519e-06,
"loss": 1.1656,
"step": 881000
},
{
"epoch": 2.1,
"learning_rate": 6.014096965225636e-06,
"loss": 1.1951,
"step": 881500
},
{
"epoch": 2.1,
"learning_rate": 6.0061639515927515e-06,
"loss": 1.1889,
"step": 882000
},
{
"epoch": 2.1,
"learning_rate": 5.9982309379598665e-06,
"loss": 1.1804,
"step": 882500
},
{
"epoch": 2.1,
"learning_rate": 5.990297924326983e-06,
"loss": 1.1807,
"step": 883000
},
{
"epoch": 2.1,
"learning_rate": 5.982364910694099e-06,
"loss": 1.1788,
"step": 883500
},
{
"epoch": 2.1,
"learning_rate": 5.974431897061216e-06,
"loss": 1.1859,
"step": 884000
},
{
"epoch": 2.11,
"learning_rate": 5.9664988834283325e-06,
"loss": 1.1857,
"step": 884500
},
{
"epoch": 2.11,
"learning_rate": 5.9585658697954476e-06,
"loss": 1.1705,
"step": 885000
},
{
"epoch": 2.11,
"learning_rate": 5.950632856162563e-06,
"loss": 1.1769,
"step": 885500
},
{
"epoch": 2.11,
"learning_rate": 5.94269984252968e-06,
"loss": 1.1752,
"step": 886000
},
{
"epoch": 2.11,
"learning_rate": 5.934766828896796e-06,
"loss": 1.172,
"step": 886500
},
{
"epoch": 2.11,
"learning_rate": 5.926833815263912e-06,
"loss": 1.1983,
"step": 887000
},
{
"epoch": 2.11,
"learning_rate": 5.918900801631028e-06,
"loss": 1.1735,
"step": 887500
},
{
"epoch": 2.11,
"learning_rate": 5.9109677879981444e-06,
"loss": 1.1638,
"step": 888000
},
{
"epoch": 2.11,
"learning_rate": 5.90303477436526e-06,
"loss": 1.1754,
"step": 888500
},
{
"epoch": 2.12,
"learning_rate": 5.895101760732377e-06,
"loss": 1.1755,
"step": 889000
},
{
"epoch": 2.12,
"learning_rate": 5.887168747099492e-06,
"loss": 1.1851,
"step": 889500
},
{
"epoch": 2.12,
"learning_rate": 5.879235733466609e-06,
"loss": 1.184,
"step": 890000
},
{
"epoch": 2.12,
"learning_rate": 5.871302719833725e-06,
"loss": 1.1843,
"step": 890500
},
{
"epoch": 2.12,
"learning_rate": 5.863369706200841e-06,
"loss": 1.1807,
"step": 891000
},
{
"epoch": 2.12,
"learning_rate": 5.855436692567956e-06,
"loss": 1.1601,
"step": 891500
},
{
"epoch": 2.12,
"learning_rate": 5.847503678935072e-06,
"loss": 1.185,
"step": 892000
},
{
"epoch": 2.12,
"learning_rate": 5.839570665302189e-06,
"loss": 1.1869,
"step": 892500
},
{
"epoch": 2.13,
"learning_rate": 5.831637651669305e-06,
"loss": 1.1678,
"step": 893000
},
{
"epoch": 2.13,
"learning_rate": 5.823704638036421e-06,
"loss": 1.1667,
"step": 893500
},
{
"epoch": 2.13,
"learning_rate": 5.8157716244035365e-06,
"loss": 1.1733,
"step": 894000
},
{
"epoch": 2.13,
"learning_rate": 5.807838610770653e-06,
"loss": 1.1803,
"step": 894500
},
{
"epoch": 2.13,
"learning_rate": 5.799905597137769e-06,
"loss": 1.177,
"step": 895000
},
{
"epoch": 2.13,
"learning_rate": 5.791972583504886e-06,
"loss": 1.1802,
"step": 895500
},
{
"epoch": 2.13,
"learning_rate": 5.784039569872001e-06,
"loss": 1.1784,
"step": 896000
},
{
"epoch": 2.13,
"learning_rate": 5.7761065562391175e-06,
"loss": 1.181,
"step": 896500
},
{
"epoch": 2.13,
"learning_rate": 5.768173542606233e-06,
"loss": 1.1664,
"step": 897000
},
{
"epoch": 2.14,
"learning_rate": 5.76024052897335e-06,
"loss": 1.1781,
"step": 897500
},
{
"epoch": 2.14,
"learning_rate": 5.752307515340465e-06,
"loss": 1.1857,
"step": 898000
},
{
"epoch": 2.14,
"learning_rate": 5.744374501707581e-06,
"loss": 1.1846,
"step": 898500
},
{
"epoch": 2.14,
"learning_rate": 5.736441488074698e-06,
"loss": 1.1582,
"step": 899000
},
{
"epoch": 2.14,
"learning_rate": 5.728508474441814e-06,
"loss": 1.1878,
"step": 899500
},
{
"epoch": 2.14,
"learning_rate": 5.72057546080893e-06,
"loss": 1.1752,
"step": 900000
},
{
"epoch": 2.14,
"learning_rate": 5.712642447176045e-06,
"loss": 1.1841,
"step": 900500
},
{
"epoch": 2.14,
"learning_rate": 5.704709433543162e-06,
"loss": 1.1902,
"step": 901000
},
{
"epoch": 2.15,
"learning_rate": 5.696776419910278e-06,
"loss": 1.1786,
"step": 901500
},
{
"epoch": 2.15,
"learning_rate": 5.688843406277395e-06,
"loss": 1.1815,
"step": 902000
},
{
"epoch": 2.15,
"learning_rate": 5.68091039264451e-06,
"loss": 1.193,
"step": 902500
},
{
"epoch": 2.15,
"learning_rate": 5.672977379011626e-06,
"loss": 1.1724,
"step": 903000
},
{
"epoch": 2.15,
"learning_rate": 5.665044365378742e-06,
"loss": 1.1818,
"step": 903500
},
{
"epoch": 2.15,
"learning_rate": 5.657111351745859e-06,
"loss": 1.1881,
"step": 904000
},
{
"epoch": 2.15,
"learning_rate": 5.649178338112975e-06,
"loss": 1.1733,
"step": 904500
},
{
"epoch": 2.15,
"learning_rate": 5.641245324480091e-06,
"loss": 1.1959,
"step": 905000
},
{
"epoch": 2.16,
"learning_rate": 5.6333123108472065e-06,
"loss": 1.1908,
"step": 905500
},
{
"epoch": 2.16,
"learning_rate": 5.625379297214323e-06,
"loss": 1.1697,
"step": 906000
},
{
"epoch": 2.16,
"learning_rate": 5.617446283581439e-06,
"loss": 1.1973,
"step": 906500
},
{
"epoch": 2.16,
"learning_rate": 5.609513269948554e-06,
"loss": 1.1845,
"step": 907000
},
{
"epoch": 2.16,
"learning_rate": 5.601580256315671e-06,
"loss": 1.1933,
"step": 907500
},
{
"epoch": 2.16,
"learning_rate": 5.593647242682787e-06,
"loss": 1.1832,
"step": 908000
},
{
"epoch": 2.16,
"learning_rate": 5.585714229049903e-06,
"loss": 1.1727,
"step": 908500
},
{
"epoch": 2.16,
"learning_rate": 5.57778121541702e-06,
"loss": 1.1759,
"step": 909000
},
{
"epoch": 2.16,
"learning_rate": 5.569848201784135e-06,
"loss": 1.162,
"step": 909500
},
{
"epoch": 2.17,
"learning_rate": 5.561915188151251e-06,
"loss": 1.1837,
"step": 910000
},
{
"epoch": 2.17,
"learning_rate": 5.553982174518368e-06,
"loss": 1.176,
"step": 910500
},
{
"epoch": 2.17,
"learning_rate": 5.5460491608854835e-06,
"loss": 1.1852,
"step": 911000
},
{
"epoch": 2.17,
"learning_rate": 5.538116147252599e-06,
"loss": 1.1746,
"step": 911500
},
{
"epoch": 2.17,
"learning_rate": 5.530183133619715e-06,
"loss": 1.1885,
"step": 912000
},
{
"epoch": 2.17,
"learning_rate": 5.522250119986832e-06,
"loss": 1.1591,
"step": 912500
},
{
"epoch": 2.17,
"learning_rate": 5.514317106353948e-06,
"loss": 1.1838,
"step": 913000
},
{
"epoch": 2.17,
"learning_rate": 5.5063840927210646e-06,
"loss": 1.1825,
"step": 913500
},
{
"epoch": 2.18,
"learning_rate": 5.4984510790881796e-06,
"loss": 1.1765,
"step": 914000
},
{
"epoch": 2.18,
"learning_rate": 5.490518065455296e-06,
"loss": 1.182,
"step": 914500
},
{
"epoch": 2.18,
"learning_rate": 5.482585051822412e-06,
"loss": 1.1725,
"step": 915000
},
{
"epoch": 2.18,
"learning_rate": 5.474652038189529e-06,
"loss": 1.1765,
"step": 915500
},
{
"epoch": 2.18,
"learning_rate": 5.466719024556644e-06,
"loss": 1.1798,
"step": 916000
},
{
"epoch": 2.18,
"learning_rate": 5.45878601092376e-06,
"loss": 1.1747,
"step": 916500
},
{
"epoch": 2.18,
"learning_rate": 5.4508529972908765e-06,
"loss": 1.1802,
"step": 917000
},
{
"epoch": 2.18,
"learning_rate": 5.442919983657992e-06,
"loss": 1.1751,
"step": 917500
},
{
"epoch": 2.18,
"learning_rate": 5.434986970025108e-06,
"loss": 1.1739,
"step": 918000
},
{
"epoch": 2.19,
"learning_rate": 5.427053956392224e-06,
"loss": 1.1917,
"step": 918500
},
{
"epoch": 2.19,
"learning_rate": 5.419120942759341e-06,
"loss": 1.177,
"step": 919000
},
{
"epoch": 2.19,
"learning_rate": 5.411187929126457e-06,
"loss": 1.1866,
"step": 919500
},
{
"epoch": 2.19,
"learning_rate": 5.403254915493573e-06,
"loss": 1.1553,
"step": 920000
},
{
"epoch": 2.19,
"learning_rate": 5.395321901860688e-06,
"loss": 1.1725,
"step": 920500
},
{
"epoch": 2.19,
"learning_rate": 5.387388888227805e-06,
"loss": 1.1702,
"step": 921000
},
{
"epoch": 2.19,
"learning_rate": 5.379455874594921e-06,
"loss": 1.1863,
"step": 921500
},
{
"epoch": 2.19,
"learning_rate": 5.371522860962038e-06,
"loss": 1.178,
"step": 922000
},
{
"epoch": 2.2,
"learning_rate": 5.363589847329153e-06,
"loss": 1.1789,
"step": 922500
},
{
"epoch": 2.2,
"learning_rate": 5.3556568336962685e-06,
"loss": 1.167,
"step": 923000
},
{
"epoch": 2.2,
"learning_rate": 5.347723820063385e-06,
"loss": 1.1732,
"step": 923500
},
{
"epoch": 2.2,
"learning_rate": 5.339790806430501e-06,
"loss": 1.1585,
"step": 924000
},
{
"epoch": 2.2,
"learning_rate": 5.331857792797618e-06,
"loss": 1.1692,
"step": 924500
},
{
"epoch": 2.2,
"learning_rate": 5.323924779164733e-06,
"loss": 1.1929,
"step": 925000
},
{
"epoch": 2.2,
"learning_rate": 5.3159917655318495e-06,
"loss": 1.1731,
"step": 925500
},
{
"epoch": 2.2,
"learning_rate": 5.308058751898965e-06,
"loss": 1.1743,
"step": 926000
},
{
"epoch": 2.2,
"learning_rate": 5.300125738266082e-06,
"loss": 1.1681,
"step": 926500
},
{
"epoch": 2.21,
"learning_rate": 5.292192724633197e-06,
"loss": 1.1739,
"step": 927000
},
{
"epoch": 2.21,
"learning_rate": 5.284259711000314e-06,
"loss": 1.1711,
"step": 927500
},
{
"epoch": 2.21,
"learning_rate": 5.27632669736743e-06,
"loss": 1.1751,
"step": 928000
},
{
"epoch": 2.21,
"learning_rate": 5.2683936837345464e-06,
"loss": 1.1826,
"step": 928500
},
{
"epoch": 2.21,
"learning_rate": 5.260460670101662e-06,
"loss": 1.1761,
"step": 929000
},
{
"epoch": 2.21,
"learning_rate": 5.252527656468777e-06,
"loss": 1.1812,
"step": 929500
},
{
"epoch": 2.21,
"learning_rate": 5.244594642835894e-06,
"loss": 1.1699,
"step": 930000
},
{
"epoch": 2.21,
"learning_rate": 5.236661629203011e-06,
"loss": 1.1799,
"step": 930500
},
{
"epoch": 2.22,
"learning_rate": 5.228728615570127e-06,
"loss": 1.1826,
"step": 931000
},
{
"epoch": 2.22,
"learning_rate": 5.220795601937242e-06,
"loss": 1.1762,
"step": 931500
},
{
"epoch": 2.22,
"learning_rate": 5.212862588304358e-06,
"loss": 1.1777,
"step": 932000
},
{
"epoch": 2.22,
"learning_rate": 5.204929574671474e-06,
"loss": 1.1801,
"step": 932500
},
{
"epoch": 2.22,
"learning_rate": 5.196996561038591e-06,
"loss": 1.1674,
"step": 933000
},
{
"epoch": 2.22,
"learning_rate": 5.189063547405707e-06,
"loss": 1.1584,
"step": 933500
},
{
"epoch": 2.22,
"learning_rate": 5.181130533772823e-06,
"loss": 1.1535,
"step": 934000
},
{
"epoch": 2.22,
"learning_rate": 5.1731975201399385e-06,
"loss": 1.1742,
"step": 934500
},
{
"epoch": 2.23,
"learning_rate": 5.165264506507055e-06,
"loss": 1.1859,
"step": 935000
},
{
"epoch": 2.23,
"learning_rate": 5.157331492874171e-06,
"loss": 1.1815,
"step": 935500
},
{
"epoch": 2.23,
"learning_rate": 5.149398479241287e-06,
"loss": 1.1693,
"step": 936000
},
{
"epoch": 2.23,
"learning_rate": 5.141465465608403e-06,
"loss": 1.169,
"step": 936500
},
{
"epoch": 2.23,
"learning_rate": 5.1335324519755195e-06,
"loss": 1.1595,
"step": 937000
},
{
"epoch": 2.23,
"learning_rate": 5.125599438342635e-06,
"loss": 1.1743,
"step": 937500
},
{
"epoch": 2.23,
"learning_rate": 5.117666424709752e-06,
"loss": 1.1576,
"step": 938000
},
{
"epoch": 2.23,
"learning_rate": 5.109733411076867e-06,
"loss": 1.1905,
"step": 938500
},
{
"epoch": 2.23,
"learning_rate": 5.101800397443983e-06,
"loss": 1.1705,
"step": 939000
},
{
"epoch": 2.24,
"learning_rate": 5.0938673838111e-06,
"loss": 1.1682,
"step": 939500
},
{
"epoch": 2.24,
"learning_rate": 5.085934370178216e-06,
"loss": 1.1637,
"step": 940000
},
{
"epoch": 2.24,
"learning_rate": 5.078001356545331e-06,
"loss": 1.1584,
"step": 940500
},
{
"epoch": 2.24,
"learning_rate": 5.070068342912447e-06,
"loss": 1.1787,
"step": 941000
},
{
"epoch": 2.24,
"learning_rate": 5.062135329279564e-06,
"loss": 1.1805,
"step": 941500
},
{
"epoch": 2.24,
"learning_rate": 5.05420231564668e-06,
"loss": 1.1684,
"step": 942000
},
{
"epoch": 2.24,
"learning_rate": 5.046269302013796e-06,
"loss": 1.1735,
"step": 942500
},
{
"epoch": 2.24,
"learning_rate": 5.038336288380912e-06,
"loss": 1.1674,
"step": 943000
},
{
"epoch": 2.25,
"learning_rate": 5.030403274748028e-06,
"loss": 1.1652,
"step": 943500
},
{
"epoch": 2.25,
"learning_rate": 5.022470261115144e-06,
"loss": 1.171,
"step": 944000
},
{
"epoch": 2.25,
"learning_rate": 5.014537247482261e-06,
"loss": 1.1813,
"step": 944500
},
{
"epoch": 2.25,
"learning_rate": 5.006604233849376e-06,
"loss": 1.1689,
"step": 945000
},
{
"epoch": 2.25,
"learning_rate": 4.998671220216493e-06,
"loss": 1.1807,
"step": 945500
},
{
"epoch": 2.25,
"learning_rate": 4.9907382065836085e-06,
"loss": 1.1766,
"step": 946000
},
{
"epoch": 2.25,
"learning_rate": 4.982805192950724e-06,
"loss": 1.1727,
"step": 946500
},
{
"epoch": 2.25,
"learning_rate": 4.97487217931784e-06,
"loss": 1.1717,
"step": 947000
},
{
"epoch": 2.25,
"learning_rate": 4.966939165684956e-06,
"loss": 1.1744,
"step": 947500
},
{
"epoch": 2.26,
"learning_rate": 4.959006152052073e-06,
"loss": 1.1776,
"step": 948000
},
{
"epoch": 2.26,
"learning_rate": 4.951073138419189e-06,
"loss": 1.1788,
"step": 948500
},
{
"epoch": 2.26,
"learning_rate": 4.9431401247863045e-06,
"loss": 1.1792,
"step": 949000
},
{
"epoch": 2.26,
"learning_rate": 4.935207111153421e-06,
"loss": 1.1628,
"step": 949500
},
{
"epoch": 2.26,
"learning_rate": 4.927274097520537e-06,
"loss": 1.176,
"step": 950000
},
{
"epoch": 2.26,
"learning_rate": 4.919341083887653e-06,
"loss": 1.1735,
"step": 950500
},
{
"epoch": 2.26,
"learning_rate": 4.911408070254769e-06,
"loss": 1.175,
"step": 951000
},
{
"epoch": 2.26,
"learning_rate": 4.9034750566218855e-06,
"loss": 1.1689,
"step": 951500
},
{
"epoch": 2.27,
"learning_rate": 4.895542042989001e-06,
"loss": 1.1726,
"step": 952000
},
{
"epoch": 2.27,
"learning_rate": 4.887609029356117e-06,
"loss": 1.1653,
"step": 952500
},
{
"epoch": 2.27,
"learning_rate": 4.879676015723234e-06,
"loss": 1.1516,
"step": 953000
},
{
"epoch": 2.27,
"learning_rate": 4.87174300209035e-06,
"loss": 1.1582,
"step": 953500
},
{
"epoch": 2.27,
"learning_rate": 4.863809988457466e-06,
"loss": 1.1732,
"step": 954000
},
{
"epoch": 2.27,
"learning_rate": 4.8558769748245816e-06,
"loss": 1.164,
"step": 954500
},
{
"epoch": 2.27,
"learning_rate": 4.8479439611916974e-06,
"loss": 1.1866,
"step": 955000
},
{
"epoch": 2.27,
"learning_rate": 4.840010947558813e-06,
"loss": 1.1639,
"step": 955500
},
{
"epoch": 2.28,
"learning_rate": 4.83207793392593e-06,
"loss": 1.1672,
"step": 956000
},
{
"epoch": 2.28,
"learning_rate": 4.824144920293046e-06,
"loss": 1.1631,
"step": 956500
},
{
"epoch": 2.28,
"learning_rate": 4.816211906660162e-06,
"loss": 1.1629,
"step": 957000
},
{
"epoch": 2.28,
"learning_rate": 4.808278893027278e-06,
"loss": 1.1709,
"step": 957500
},
{
"epoch": 2.28,
"learning_rate": 4.800345879394394e-06,
"loss": 1.1668,
"step": 958000
},
{
"epoch": 2.28,
"learning_rate": 4.79241286576151e-06,
"loss": 1.1825,
"step": 958500
},
{
"epoch": 2.28,
"learning_rate": 4.784479852128626e-06,
"loss": 1.1643,
"step": 959000
},
{
"epoch": 2.28,
"learning_rate": 4.776546838495743e-06,
"loss": 1.176,
"step": 959500
},
{
"epoch": 2.28,
"learning_rate": 4.768613824862859e-06,
"loss": 1.1738,
"step": 960000
},
{
"epoch": 2.29,
"learning_rate": 4.7606808112299745e-06,
"loss": 1.1737,
"step": 960500
},
{
"epoch": 2.29,
"learning_rate": 4.75274779759709e-06,
"loss": 1.1634,
"step": 961000
},
{
"epoch": 2.29,
"learning_rate": 4.744814783964207e-06,
"loss": 1.1711,
"step": 961500
},
{
"epoch": 2.29,
"learning_rate": 4.736881770331322e-06,
"loss": 1.1768,
"step": 962000
},
{
"epoch": 2.29,
"learning_rate": 4.728948756698439e-06,
"loss": 1.1614,
"step": 962500
},
{
"epoch": 2.29,
"learning_rate": 4.7210157430655555e-06,
"loss": 1.168,
"step": 963000
},
{
"epoch": 2.29,
"learning_rate": 4.7130827294326705e-06,
"loss": 1.1608,
"step": 963500
},
{
"epoch": 2.29,
"learning_rate": 4.705149715799787e-06,
"loss": 1.177,
"step": 964000
},
{
"epoch": 2.3,
"learning_rate": 4.697216702166903e-06,
"loss": 1.1692,
"step": 964500
},
{
"epoch": 2.3,
"learning_rate": 4.689283688534019e-06,
"loss": 1.1644,
"step": 965000
},
{
"epoch": 2.3,
"learning_rate": 4.681350674901135e-06,
"loss": 1.1563,
"step": 965500
},
{
"epoch": 2.3,
"learning_rate": 4.6734176612682515e-06,
"loss": 1.1629,
"step": 966000
},
{
"epoch": 2.3,
"learning_rate": 4.665484647635367e-06,
"loss": 1.1587,
"step": 966500
},
{
"epoch": 2.3,
"learning_rate": 4.657551634002483e-06,
"loss": 1.1762,
"step": 967000
},
{
"epoch": 2.3,
"learning_rate": 4.649618620369599e-06,
"loss": 1.1616,
"step": 967500
},
{
"epoch": 2.3,
"learning_rate": 4.641685606736716e-06,
"loss": 1.1682,
"step": 968000
},
{
"epoch": 2.3,
"learning_rate": 4.633752593103832e-06,
"loss": 1.1561,
"step": 968500
},
{
"epoch": 2.31,
"learning_rate": 4.625819579470948e-06,
"loss": 1.1736,
"step": 969000
},
{
"epoch": 2.31,
"learning_rate": 4.617886565838064e-06,
"loss": 1.161,
"step": 969500
},
{
"epoch": 2.31,
"learning_rate": 4.609953552205179e-06,
"loss": 1.1766,
"step": 970000
},
{
"epoch": 2.31,
"learning_rate": 4.602020538572296e-06,
"loss": 1.1677,
"step": 970500
},
{
"epoch": 2.31,
"learning_rate": 4.594087524939412e-06,
"loss": 1.1856,
"step": 971000
},
{
"epoch": 2.31,
"learning_rate": 4.586154511306528e-06,
"loss": 1.1703,
"step": 971500
},
{
"epoch": 2.31,
"learning_rate": 4.578221497673644e-06,
"loss": 1.1735,
"step": 972000
},
{
"epoch": 2.31,
"learning_rate": 4.57028848404076e-06,
"loss": 1.1751,
"step": 972500
},
{
"epoch": 2.32,
"learning_rate": 4.562355470407876e-06,
"loss": 1.1728,
"step": 973000
},
{
"epoch": 2.32,
"learning_rate": 4.554422456774992e-06,
"loss": 1.1845,
"step": 973500
},
{
"epoch": 2.32,
"learning_rate": 4.546489443142109e-06,
"loss": 1.1732,
"step": 974000
},
{
"epoch": 2.32,
"learning_rate": 4.538556429509225e-06,
"loss": 1.1657,
"step": 974500
},
{
"epoch": 2.32,
"learning_rate": 4.5306234158763405e-06,
"loss": 1.1675,
"step": 975000
},
{
"epoch": 2.32,
"learning_rate": 4.522690402243456e-06,
"loss": 1.1611,
"step": 975500
},
{
"epoch": 2.32,
"learning_rate": 4.514757388610573e-06,
"loss": 1.1631,
"step": 976000
},
{
"epoch": 2.32,
"learning_rate": 4.506824374977689e-06,
"loss": 1.1663,
"step": 976500
},
{
"epoch": 2.33,
"learning_rate": 4.498891361344805e-06,
"loss": 1.164,
"step": 977000
},
{
"epoch": 2.33,
"learning_rate": 4.4909583477119215e-06,
"loss": 1.1705,
"step": 977500
},
{
"epoch": 2.33,
"learning_rate": 4.4830253340790365e-06,
"loss": 1.1648,
"step": 978000
},
{
"epoch": 2.33,
"learning_rate": 4.475092320446153e-06,
"loss": 1.1627,
"step": 978500
},
{
"epoch": 2.33,
"learning_rate": 4.467159306813269e-06,
"loss": 1.1514,
"step": 979000
},
{
"epoch": 2.33,
"learning_rate": 4.459226293180385e-06,
"loss": 1.156,
"step": 979500
},
{
"epoch": 2.33,
"learning_rate": 4.451293279547501e-06,
"loss": 1.1699,
"step": 980000
},
{
"epoch": 2.33,
"learning_rate": 4.4433602659146176e-06,
"loss": 1.1801,
"step": 980500
},
{
"epoch": 2.33,
"learning_rate": 4.435427252281733e-06,
"loss": 1.1514,
"step": 981000
},
{
"epoch": 2.34,
"learning_rate": 4.427494238648849e-06,
"loss": 1.1701,
"step": 981500
},
{
"epoch": 2.34,
"learning_rate": 4.419561225015965e-06,
"loss": 1.1529,
"step": 982000
},
{
"epoch": 2.34,
"learning_rate": 4.411628211383082e-06,
"loss": 1.16,
"step": 982500
},
{
"epoch": 2.34,
"learning_rate": 4.403695197750198e-06,
"loss": 1.1811,
"step": 983000
},
{
"epoch": 2.34,
"learning_rate": 4.395762184117314e-06,
"loss": 1.1824,
"step": 983500
},
{
"epoch": 2.34,
"learning_rate": 4.38782917048443e-06,
"loss": 1.1603,
"step": 984000
},
{
"epoch": 2.34,
"learning_rate": 4.379896156851546e-06,
"loss": 1.1484,
"step": 984500
},
{
"epoch": 2.34,
"learning_rate": 4.371963143218662e-06,
"loss": 1.1672,
"step": 985000
},
{
"epoch": 2.35,
"learning_rate": 4.364030129585778e-06,
"loss": 1.172,
"step": 985500
},
{
"epoch": 2.35,
"learning_rate": 4.356097115952895e-06,
"loss": 1.1569,
"step": 986000
},
{
"epoch": 2.35,
"learning_rate": 4.34816410232001e-06,
"loss": 1.1726,
"step": 986500
},
{
"epoch": 2.35,
"learning_rate": 4.340231088687126e-06,
"loss": 1.1626,
"step": 987000
},
{
"epoch": 2.35,
"learning_rate": 4.332298075054242e-06,
"loss": 1.1794,
"step": 987500
},
{
"epoch": 2.35,
"learning_rate": 4.324365061421358e-06,
"loss": 1.1578,
"step": 988000
},
{
"epoch": 2.35,
"learning_rate": 4.316432047788475e-06,
"loss": 1.1586,
"step": 988500
},
{
"epoch": 2.35,
"learning_rate": 4.308499034155591e-06,
"loss": 1.1594,
"step": 989000
},
{
"epoch": 2.35,
"learning_rate": 4.3005660205227065e-06,
"loss": 1.1714,
"step": 989500
},
{
"epoch": 2.36,
"learning_rate": 4.292633006889822e-06,
"loss": 1.1657,
"step": 990000
},
{
"epoch": 2.36,
"learning_rate": 4.284699993256939e-06,
"loss": 1.1736,
"step": 990500
},
{
"epoch": 2.36,
"learning_rate": 4.276766979624055e-06,
"loss": 1.1616,
"step": 991000
},
{
"epoch": 2.36,
"learning_rate": 4.268833965991171e-06,
"loss": 1.1556,
"step": 991500
},
{
"epoch": 2.36,
"learning_rate": 4.260900952358287e-06,
"loss": 1.1796,
"step": 992000
},
{
"epoch": 2.36,
"learning_rate": 4.252967938725403e-06,
"loss": 1.1476,
"step": 992500
},
{
"epoch": 2.36,
"learning_rate": 4.245034925092519e-06,
"loss": 1.1616,
"step": 993000
},
{
"epoch": 2.36,
"learning_rate": 4.237101911459635e-06,
"loss": 1.1571,
"step": 993500
},
{
"epoch": 2.37,
"learning_rate": 4.229168897826752e-06,
"loss": 1.1862,
"step": 994000
},
{
"epoch": 2.37,
"learning_rate": 4.221235884193867e-06,
"loss": 1.1714,
"step": 994500
},
{
"epoch": 2.37,
"learning_rate": 4.2133028705609836e-06,
"loss": 1.1653,
"step": 995000
},
{
"epoch": 2.37,
"learning_rate": 4.2053698569280994e-06,
"loss": 1.159,
"step": 995500
},
{
"epoch": 2.37,
"learning_rate": 4.197436843295215e-06,
"loss": 1.1627,
"step": 996000
},
{
"epoch": 2.37,
"learning_rate": 4.189503829662331e-06,
"loss": 1.1754,
"step": 996500
},
{
"epoch": 2.37,
"learning_rate": 4.181570816029448e-06,
"loss": 1.1775,
"step": 997000
},
{
"epoch": 2.37,
"learning_rate": 4.173637802396564e-06,
"loss": 1.1588,
"step": 997500
},
{
"epoch": 2.38,
"learning_rate": 4.16570478876368e-06,
"loss": 1.1677,
"step": 998000
},
{
"epoch": 2.38,
"learning_rate": 4.157771775130796e-06,
"loss": 1.1699,
"step": 998500
},
{
"epoch": 2.38,
"learning_rate": 4.149838761497912e-06,
"loss": 1.1826,
"step": 999000
},
{
"epoch": 2.38,
"learning_rate": 4.141905747865028e-06,
"loss": 1.1611,
"step": 999500
},
{
"epoch": 2.38,
"learning_rate": 4.133972734232144e-06,
"loss": 1.1621,
"step": 1000000
},
{
"epoch": 2.38,
"learning_rate": 4.126039720599261e-06,
"loss": 1.1675,
"step": 1000500
},
{
"epoch": 2.38,
"learning_rate": 4.118106706966376e-06,
"loss": 1.1795,
"step": 1001000
},
{
"epoch": 2.38,
"learning_rate": 4.110173693333492e-06,
"loss": 1.1591,
"step": 1001500
},
{
"epoch": 2.38,
"learning_rate": 4.102240679700608e-06,
"loss": 1.1839,
"step": 1002000
},
{
"epoch": 2.39,
"learning_rate": 4.094307666067724e-06,
"loss": 1.1603,
"step": 1002500
},
{
"epoch": 2.39,
"learning_rate": 4.086374652434841e-06,
"loss": 1.1582,
"step": 1003000
},
{
"epoch": 2.39,
"learning_rate": 4.078441638801957e-06,
"loss": 1.1804,
"step": 1003500
},
{
"epoch": 2.39,
"learning_rate": 4.0705086251690725e-06,
"loss": 1.1517,
"step": 1004000
},
{
"epoch": 2.39,
"learning_rate": 4.062575611536188e-06,
"loss": 1.1675,
"step": 1004500
},
{
"epoch": 2.39,
"learning_rate": 4.054642597903305e-06,
"loss": 1.1616,
"step": 1005000
},
{
"epoch": 2.39,
"learning_rate": 4.046709584270421e-06,
"loss": 1.1809,
"step": 1005500
},
{
"epoch": 2.39,
"learning_rate": 4.038776570637537e-06,
"loss": 1.1575,
"step": 1006000
},
{
"epoch": 2.4,
"learning_rate": 4.030843557004653e-06,
"loss": 1.1477,
"step": 1006500
},
{
"epoch": 2.4,
"learning_rate": 4.022910543371769e-06,
"loss": 1.1603,
"step": 1007000
},
{
"epoch": 2.4,
"learning_rate": 4.014977529738885e-06,
"loss": 1.1656,
"step": 1007500
},
{
"epoch": 2.4,
"learning_rate": 4.007044516106001e-06,
"loss": 1.1509,
"step": 1008000
},
{
"epoch": 2.4,
"learning_rate": 3.999111502473118e-06,
"loss": 1.1531,
"step": 1008500
},
{
"epoch": 2.4,
"learning_rate": 3.991178488840234e-06,
"loss": 1.18,
"step": 1009000
},
{
"epoch": 2.4,
"learning_rate": 3.9832454752073496e-06,
"loss": 1.1605,
"step": 1009500
},
{
"epoch": 2.4,
"learning_rate": 3.9753124615744654e-06,
"loss": 1.1566,
"step": 1010000
},
{
"epoch": 2.4,
"learning_rate": 3.967379447941581e-06,
"loss": 1.1533,
"step": 1010500
},
{
"epoch": 2.41,
"learning_rate": 3.959446434308697e-06,
"loss": 1.172,
"step": 1011000
},
{
"epoch": 2.41,
"learning_rate": 3.951513420675814e-06,
"loss": 1.1685,
"step": 1011500
},
{
"epoch": 2.41,
"learning_rate": 3.94358040704293e-06,
"loss": 1.1672,
"step": 1012000
},
{
"epoch": 2.41,
"learning_rate": 3.935647393410046e-06,
"loss": 1.1782,
"step": 1012500
},
{
"epoch": 2.41,
"learning_rate": 3.927714379777162e-06,
"loss": 1.1696,
"step": 1013000
},
{
"epoch": 2.41,
"learning_rate": 3.919781366144278e-06,
"loss": 1.1686,
"step": 1013500
},
{
"epoch": 2.41,
"learning_rate": 3.911848352511394e-06,
"loss": 1.1603,
"step": 1014000
},
{
"epoch": 2.41,
"learning_rate": 3.90391533887851e-06,
"loss": 1.1643,
"step": 1014500
},
{
"epoch": 2.42,
"learning_rate": 3.895982325245627e-06,
"loss": 1.1517,
"step": 1015000
},
{
"epoch": 2.42,
"learning_rate": 3.8880493116127425e-06,
"loss": 1.1589,
"step": 1015500
},
{
"epoch": 2.42,
"learning_rate": 3.880116297979858e-06,
"loss": 1.1743,
"step": 1016000
},
{
"epoch": 2.42,
"learning_rate": 3.872183284346974e-06,
"loss": 1.162,
"step": 1016500
},
{
"epoch": 2.42,
"learning_rate": 3.864250270714091e-06,
"loss": 1.1791,
"step": 1017000
},
{
"epoch": 2.42,
"learning_rate": 3.856317257081207e-06,
"loss": 1.1741,
"step": 1017500
},
{
"epoch": 2.42,
"learning_rate": 3.848384243448323e-06,
"loss": 1.1521,
"step": 1018000
},
{
"epoch": 2.42,
"learning_rate": 3.8404512298154385e-06,
"loss": 1.1583,
"step": 1018500
},
{
"epoch": 2.43,
"learning_rate": 3.832518216182554e-06,
"loss": 1.1573,
"step": 1019000
},
{
"epoch": 2.43,
"learning_rate": 3.824585202549671e-06,
"loss": 1.1499,
"step": 1019500
},
{
"epoch": 2.43,
"learning_rate": 3.816652188916787e-06,
"loss": 1.1533,
"step": 1020000
},
{
"epoch": 2.43,
"learning_rate": 3.8087191752839033e-06,
"loss": 1.1518,
"step": 1020500
},
{
"epoch": 2.43,
"learning_rate": 3.8007861616510187e-06,
"loss": 1.1769,
"step": 1021000
},
{
"epoch": 2.43,
"learning_rate": 3.792853148018135e-06,
"loss": 1.1597,
"step": 1021500
},
{
"epoch": 2.43,
"learning_rate": 3.7849201343852517e-06,
"loss": 1.1713,
"step": 1022000
},
{
"epoch": 2.43,
"learning_rate": 3.776987120752367e-06,
"loss": 1.1516,
"step": 1022500
},
{
"epoch": 2.43,
"learning_rate": 3.7690541071194834e-06,
"loss": 1.1585,
"step": 1023000
},
{
"epoch": 2.44,
"learning_rate": 3.7611210934865993e-06,
"loss": 1.1438,
"step": 1023500
},
{
"epoch": 2.44,
"learning_rate": 3.7531880798537156e-06,
"loss": 1.1602,
"step": 1024000
},
{
"epoch": 2.44,
"learning_rate": 3.7452550662208315e-06,
"loss": 1.1579,
"step": 1024500
},
{
"epoch": 2.44,
"learning_rate": 3.7373220525879477e-06,
"loss": 1.1639,
"step": 1025000
},
{
"epoch": 2.44,
"learning_rate": 3.7293890389550636e-06,
"loss": 1.1612,
"step": 1025500
},
{
"epoch": 2.44,
"learning_rate": 3.72145602532218e-06,
"loss": 1.172,
"step": 1026000
},
{
"epoch": 2.44,
"learning_rate": 3.7135230116892958e-06,
"loss": 1.1449,
"step": 1026500
},
{
"epoch": 2.44,
"learning_rate": 3.705589998056412e-06,
"loss": 1.1701,
"step": 1027000
},
{
"epoch": 2.45,
"learning_rate": 3.6976569844235283e-06,
"loss": 1.1665,
"step": 1027500
},
{
"epoch": 2.45,
"learning_rate": 3.689723970790644e-06,
"loss": 1.1765,
"step": 1028000
},
{
"epoch": 2.45,
"learning_rate": 3.6817909571577605e-06,
"loss": 1.1455,
"step": 1028500
},
{
"epoch": 2.45,
"learning_rate": 3.673857943524876e-06,
"loss": 1.1789,
"step": 1029000
},
{
"epoch": 2.45,
"learning_rate": 3.6659249298919926e-06,
"loss": 1.1603,
"step": 1029500
},
{
"epoch": 2.45,
"learning_rate": 3.657991916259108e-06,
"loss": 1.1704,
"step": 1030000
},
{
"epoch": 2.45,
"learning_rate": 3.6500589026262244e-06,
"loss": 1.1654,
"step": 1030500
},
{
"epoch": 2.45,
"learning_rate": 3.6421258889933402e-06,
"loss": 1.1568,
"step": 1031000
},
{
"epoch": 2.45,
"learning_rate": 3.6341928753604565e-06,
"loss": 1.1505,
"step": 1031500
},
{
"epoch": 2.46,
"learning_rate": 3.626259861727573e-06,
"loss": 1.1562,
"step": 1032000
},
{
"epoch": 2.46,
"learning_rate": 3.6183268480946887e-06,
"loss": 1.163,
"step": 1032500
},
{
"epoch": 2.46,
"learning_rate": 3.610393834461805e-06,
"loss": 1.156,
"step": 1033000
},
{
"epoch": 2.46,
"learning_rate": 3.602460820828921e-06,
"loss": 1.1745,
"step": 1033500
},
{
"epoch": 2.46,
"learning_rate": 3.594527807196037e-06,
"loss": 1.1505,
"step": 1034000
},
{
"epoch": 2.46,
"learning_rate": 3.586594793563153e-06,
"loss": 1.1707,
"step": 1034500
},
{
"epoch": 2.46,
"learning_rate": 3.5786617799302693e-06,
"loss": 1.1594,
"step": 1035000
},
{
"epoch": 2.46,
"learning_rate": 3.570728766297385e-06,
"loss": 1.1612,
"step": 1035500
},
{
"epoch": 2.47,
"learning_rate": 3.5627957526645014e-06,
"loss": 1.1601,
"step": 1036000
},
{
"epoch": 2.47,
"learning_rate": 3.554862739031617e-06,
"loss": 1.1685,
"step": 1036500
},
{
"epoch": 2.47,
"learning_rate": 3.5469297253987336e-06,
"loss": 1.1675,
"step": 1037000
},
{
"epoch": 2.47,
"learning_rate": 3.53899671176585e-06,
"loss": 1.1472,
"step": 1037500
},
{
"epoch": 2.47,
"learning_rate": 3.5310636981329653e-06,
"loss": 1.1618,
"step": 1038000
},
{
"epoch": 2.47,
"learning_rate": 3.5231306845000816e-06,
"loss": 1.1558,
"step": 1038500
},
{
"epoch": 2.47,
"learning_rate": 3.5151976708671975e-06,
"loss": 1.1641,
"step": 1039000
},
{
"epoch": 2.47,
"learning_rate": 3.5072646572343138e-06,
"loss": 1.1571,
"step": 1039500
},
{
"epoch": 2.48,
"learning_rate": 3.4993316436014296e-06,
"loss": 1.1632,
"step": 1040000
},
{
"epoch": 2.48,
"learning_rate": 3.491398629968546e-06,
"loss": 1.1586,
"step": 1040500
},
{
"epoch": 2.48,
"learning_rate": 3.4834656163356618e-06,
"loss": 1.166,
"step": 1041000
},
{
"epoch": 2.48,
"learning_rate": 3.475532602702778e-06,
"loss": 1.1631,
"step": 1041500
},
{
"epoch": 2.48,
"learning_rate": 3.4675995890698943e-06,
"loss": 1.1777,
"step": 1042000
},
{
"epoch": 2.48,
"learning_rate": 3.45966657543701e-06,
"loss": 1.168,
"step": 1042500
},
{
"epoch": 2.48,
"learning_rate": 3.4517335618041265e-06,
"loss": 1.164,
"step": 1043000
},
{
"epoch": 2.48,
"learning_rate": 3.4438005481712424e-06,
"loss": 1.1374,
"step": 1043500
},
{
"epoch": 2.48,
"learning_rate": 3.4358675345383587e-06,
"loss": 1.1615,
"step": 1044000
},
{
"epoch": 2.49,
"learning_rate": 3.427934520905474e-06,
"loss": 1.1612,
"step": 1044500
},
{
"epoch": 2.49,
"learning_rate": 3.420001507272591e-06,
"loss": 1.1565,
"step": 1045000
},
{
"epoch": 2.49,
"learning_rate": 3.4120684936397062e-06,
"loss": 1.1597,
"step": 1045500
},
{
"epoch": 2.49,
"learning_rate": 3.4041354800068225e-06,
"loss": 1.1464,
"step": 1046000
},
{
"epoch": 2.49,
"learning_rate": 3.3962024663739384e-06,
"loss": 1.138,
"step": 1046500
},
{
"epoch": 2.49,
"learning_rate": 3.3882694527410547e-06,
"loss": 1.1457,
"step": 1047000
},
{
"epoch": 2.49,
"learning_rate": 3.380336439108171e-06,
"loss": 1.1774,
"step": 1047500
},
{
"epoch": 2.49,
"learning_rate": 3.372403425475287e-06,
"loss": 1.1501,
"step": 1048000
},
{
"epoch": 2.5,
"learning_rate": 3.364470411842403e-06,
"loss": 1.1656,
"step": 1048500
},
{
"epoch": 2.5,
"learning_rate": 3.356537398209519e-06,
"loss": 1.1614,
"step": 1049000
},
{
"epoch": 2.5,
"learning_rate": 3.3486043845766353e-06,
"loss": 1.1633,
"step": 1049500
},
{
"epoch": 2.5,
"learning_rate": 3.340671370943751e-06,
"loss": 1.1512,
"step": 1050000
},
{
"epoch": 2.5,
"learning_rate": 3.3327383573108674e-06,
"loss": 1.1414,
"step": 1050500
},
{
"epoch": 2.5,
"learning_rate": 3.3248053436779833e-06,
"loss": 1.1622,
"step": 1051000
},
{
"epoch": 2.5,
"learning_rate": 3.3168723300450996e-06,
"loss": 1.1565,
"step": 1051500
},
{
"epoch": 2.5,
"learning_rate": 3.308939316412216e-06,
"loss": 1.1566,
"step": 1052000
},
{
"epoch": 2.5,
"learning_rate": 3.3010063027793317e-06,
"loss": 1.1606,
"step": 1052500
},
{
"epoch": 2.51,
"learning_rate": 3.293073289146448e-06,
"loss": 1.1589,
"step": 1053000
},
{
"epoch": 2.51,
"learning_rate": 3.2851402755135635e-06,
"loss": 1.1599,
"step": 1053500
},
{
"epoch": 2.51,
"learning_rate": 3.2772072618806798e-06,
"loss": 1.1522,
"step": 1054000
},
{
"epoch": 2.51,
"learning_rate": 3.2692742482477956e-06,
"loss": 1.1559,
"step": 1054500
},
{
"epoch": 2.51,
"learning_rate": 3.261341234614912e-06,
"loss": 1.1468,
"step": 1055000
},
{
"epoch": 2.51,
"learning_rate": 3.2534082209820278e-06,
"loss": 1.1578,
"step": 1055500
},
{
"epoch": 2.51,
"learning_rate": 3.245475207349144e-06,
"loss": 1.1604,
"step": 1056000
},
{
"epoch": 2.51,
"learning_rate": 3.2375421937162604e-06,
"loss": 1.1723,
"step": 1056500
},
{
"epoch": 2.52,
"learning_rate": 3.2296091800833762e-06,
"loss": 1.155,
"step": 1057000
},
{
"epoch": 2.52,
"learning_rate": 3.2216761664504925e-06,
"loss": 1.1568,
"step": 1057500
},
{
"epoch": 2.52,
"learning_rate": 3.2137431528176084e-06,
"loss": 1.1568,
"step": 1058000
},
{
"epoch": 2.52,
"learning_rate": 3.2058101391847247e-06,
"loss": 1.1539,
"step": 1058500
},
{
"epoch": 2.52,
"learning_rate": 3.1978771255518405e-06,
"loss": 1.1582,
"step": 1059000
},
{
"epoch": 2.52,
"learning_rate": 3.189944111918957e-06,
"loss": 1.1531,
"step": 1059500
},
{
"epoch": 2.52,
"learning_rate": 3.1820110982860723e-06,
"loss": 1.1547,
"step": 1060000
},
{
"epoch": 2.52,
"learning_rate": 3.174078084653189e-06,
"loss": 1.1332,
"step": 1060500
},
{
"epoch": 2.53,
"learning_rate": 3.1661450710203044e-06,
"loss": 1.17,
"step": 1061000
},
{
"epoch": 2.53,
"learning_rate": 3.1582120573874207e-06,
"loss": 1.1534,
"step": 1061500
},
{
"epoch": 2.53,
"learning_rate": 3.150279043754537e-06,
"loss": 1.1561,
"step": 1062000
},
{
"epoch": 2.53,
"learning_rate": 3.142346030121653e-06,
"loss": 1.1375,
"step": 1062500
},
{
"epoch": 2.53,
"learning_rate": 3.134413016488769e-06,
"loss": 1.1432,
"step": 1063000
},
{
"epoch": 2.53,
"learning_rate": 3.126480002855885e-06,
"loss": 1.1522,
"step": 1063500
},
{
"epoch": 2.53,
"learning_rate": 3.1185469892230013e-06,
"loss": 1.1614,
"step": 1064000
},
{
"epoch": 2.53,
"learning_rate": 3.110613975590117e-06,
"loss": 1.1685,
"step": 1064500
},
{
"epoch": 2.53,
"learning_rate": 3.1026809619572334e-06,
"loss": 1.1501,
"step": 1065000
},
{
"epoch": 2.54,
"learning_rate": 3.0947479483243493e-06,
"loss": 1.1492,
"step": 1065500
},
{
"epoch": 2.54,
"learning_rate": 3.0868149346914656e-06,
"loss": 1.1668,
"step": 1066000
},
{
"epoch": 2.54,
"learning_rate": 3.078881921058582e-06,
"loss": 1.1526,
"step": 1066500
},
{
"epoch": 2.54,
"learning_rate": 3.0709489074256978e-06,
"loss": 1.1527,
"step": 1067000
},
{
"epoch": 2.54,
"learning_rate": 3.063015893792814e-06,
"loss": 1.1548,
"step": 1067500
},
{
"epoch": 2.54,
"learning_rate": 3.05508288015993e-06,
"loss": 1.1463,
"step": 1068000
},
{
"epoch": 2.54,
"learning_rate": 3.047149866527046e-06,
"loss": 1.1696,
"step": 1068500
},
{
"epoch": 2.54,
"learning_rate": 3.0392168528941616e-06,
"loss": 1.1562,
"step": 1069000
},
{
"epoch": 2.55,
"learning_rate": 3.031283839261278e-06,
"loss": 1.1556,
"step": 1069500
},
{
"epoch": 2.55,
"learning_rate": 3.023350825628394e-06,
"loss": 1.1552,
"step": 1070000
},
{
"epoch": 2.55,
"learning_rate": 3.01541781199551e-06,
"loss": 1.1538,
"step": 1070500
},
{
"epoch": 2.55,
"learning_rate": 3.007484798362626e-06,
"loss": 1.1532,
"step": 1071000
},
{
"epoch": 2.55,
"learning_rate": 2.9995517847297422e-06,
"loss": 1.1498,
"step": 1071500
},
{
"epoch": 2.55,
"learning_rate": 2.9916187710968585e-06,
"loss": 1.146,
"step": 1072000
},
{
"epoch": 2.55,
"learning_rate": 2.9836857574639744e-06,
"loss": 1.154,
"step": 1072500
},
{
"epoch": 2.55,
"learning_rate": 2.9757527438310907e-06,
"loss": 1.1584,
"step": 1073000
},
{
"epoch": 2.55,
"learning_rate": 2.9678197301982065e-06,
"loss": 1.1418,
"step": 1073500
},
{
"epoch": 2.56,
"learning_rate": 2.959886716565323e-06,
"loss": 1.1458,
"step": 1074000
},
{
"epoch": 2.56,
"learning_rate": 2.9519537029324387e-06,
"loss": 1.1503,
"step": 1074500
},
{
"epoch": 2.56,
"learning_rate": 2.944020689299555e-06,
"loss": 1.1475,
"step": 1075000
},
{
"epoch": 2.56,
"learning_rate": 2.9360876756666704e-06,
"loss": 1.1629,
"step": 1075500
},
{
"epoch": 2.56,
"learning_rate": 2.928154662033787e-06,
"loss": 1.1565,
"step": 1076000
},
{
"epoch": 2.56,
"learning_rate": 2.9202216484009034e-06,
"loss": 1.1727,
"step": 1076500
},
{
"epoch": 2.56,
"learning_rate": 2.912288634768019e-06,
"loss": 1.1451,
"step": 1077000
},
{
"epoch": 2.56,
"learning_rate": 2.904355621135135e-06,
"loss": 1.1485,
"step": 1077500
},
{
"epoch": 2.57,
"learning_rate": 2.896422607502251e-06,
"loss": 1.1484,
"step": 1078000
},
{
"epoch": 2.57,
"learning_rate": 2.8884895938693673e-06,
"loss": 1.1559,
"step": 1078500
},
{
"epoch": 2.57,
"learning_rate": 2.880556580236483e-06,
"loss": 1.1353,
"step": 1079000
},
{
"epoch": 2.57,
"learning_rate": 2.8726235666035995e-06,
"loss": 1.1427,
"step": 1079500
},
{
"epoch": 2.57,
"learning_rate": 2.8646905529707153e-06,
"loss": 1.1653,
"step": 1080000
},
{
"epoch": 2.57,
"learning_rate": 2.8567575393378316e-06,
"loss": 1.1541,
"step": 1080500
},
{
"epoch": 2.57,
"learning_rate": 2.8488245257049475e-06,
"loss": 1.1528,
"step": 1081000
},
{
"epoch": 2.57,
"learning_rate": 2.8408915120720638e-06,
"loss": 1.1531,
"step": 1081500
},
{
"epoch": 2.58,
"learning_rate": 2.83295849843918e-06,
"loss": 1.1671,
"step": 1082000
},
{
"epoch": 2.58,
"learning_rate": 2.825025484806296e-06,
"loss": 1.1541,
"step": 1082500
},
{
"epoch": 2.58,
"learning_rate": 2.817092471173412e-06,
"loss": 1.1451,
"step": 1083000
},
{
"epoch": 2.58,
"learning_rate": 2.809159457540528e-06,
"loss": 1.1609,
"step": 1083500
},
{
"epoch": 2.58,
"learning_rate": 2.8012264439076444e-06,
"loss": 1.1614,
"step": 1084000
},
{
"epoch": 2.58,
"learning_rate": 2.79329343027476e-06,
"loss": 1.1722,
"step": 1084500
},
{
"epoch": 2.58,
"learning_rate": 2.785360416641876e-06,
"loss": 1.1521,
"step": 1085000
},
{
"epoch": 2.58,
"learning_rate": 2.777427403008992e-06,
"loss": 1.1569,
"step": 1085500
},
{
"epoch": 2.58,
"learning_rate": 2.7694943893761082e-06,
"loss": 1.1467,
"step": 1086000
},
{
"epoch": 2.59,
"learning_rate": 2.7615613757432245e-06,
"loss": 1.1583,
"step": 1086500
},
{
"epoch": 2.59,
"learning_rate": 2.7536283621103404e-06,
"loss": 1.1665,
"step": 1087000
},
{
"epoch": 2.59,
"learning_rate": 2.7456953484774567e-06,
"loss": 1.1646,
"step": 1087500
},
{
"epoch": 2.59,
"learning_rate": 2.7377623348445725e-06,
"loss": 1.1541,
"step": 1088000
},
{
"epoch": 2.59,
"learning_rate": 2.729829321211689e-06,
"loss": 1.1588,
"step": 1088500
},
{
"epoch": 2.59,
"learning_rate": 2.7218963075788047e-06,
"loss": 1.1405,
"step": 1089000
},
{
"epoch": 2.59,
"learning_rate": 2.713963293945921e-06,
"loss": 1.1502,
"step": 1089500
},
{
"epoch": 2.59,
"learning_rate": 2.706030280313037e-06,
"loss": 1.1474,
"step": 1090000
},
{
"epoch": 2.6,
"learning_rate": 2.698097266680153e-06,
"loss": 1.1505,
"step": 1090500
},
{
"epoch": 2.6,
"learning_rate": 2.6901642530472694e-06,
"loss": 1.1579,
"step": 1091000
},
{
"epoch": 2.6,
"learning_rate": 2.6822312394143853e-06,
"loss": 1.1499,
"step": 1091500
},
{
"epoch": 2.6,
"learning_rate": 2.6742982257815016e-06,
"loss": 1.1668,
"step": 1092000
},
{
"epoch": 2.6,
"learning_rate": 2.666365212148617e-06,
"loss": 1.15,
"step": 1092500
},
{
"epoch": 2.6,
"learning_rate": 2.6584321985157337e-06,
"loss": 1.152,
"step": 1093000
},
{
"epoch": 2.6,
"learning_rate": 2.650499184882849e-06,
"loss": 1.1495,
"step": 1093500
},
{
"epoch": 2.6,
"learning_rate": 2.6425661712499655e-06,
"loss": 1.1559,
"step": 1094000
},
{
"epoch": 2.6,
"learning_rate": 2.6346331576170813e-06,
"loss": 1.1531,
"step": 1094500
},
{
"epoch": 2.61,
"learning_rate": 2.6267001439841976e-06,
"loss": 1.1509,
"step": 1095000
},
{
"epoch": 2.61,
"learning_rate": 2.6187671303513135e-06,
"loss": 1.1694,
"step": 1095500
},
{
"epoch": 2.61,
"learning_rate": 2.6108341167184298e-06,
"loss": 1.1527,
"step": 1096000
},
{
"epoch": 2.61,
"learning_rate": 2.602901103085546e-06,
"loss": 1.1524,
"step": 1096500
},
{
"epoch": 2.61,
"learning_rate": 2.594968089452662e-06,
"loss": 1.1582,
"step": 1097000
},
{
"epoch": 2.61,
"learning_rate": 2.5870350758197782e-06,
"loss": 1.1525,
"step": 1097500
},
{
"epoch": 2.61,
"learning_rate": 2.579102062186894e-06,
"loss": 1.1551,
"step": 1098000
},
{
"epoch": 2.61,
"learning_rate": 2.5711690485540104e-06,
"loss": 1.1551,
"step": 1098500
},
{
"epoch": 2.62,
"learning_rate": 2.5632360349211262e-06,
"loss": 1.1389,
"step": 1099000
},
{
"epoch": 2.62,
"learning_rate": 2.5553030212882425e-06,
"loss": 1.1448,
"step": 1099500
},
{
"epoch": 2.62,
"learning_rate": 2.547370007655358e-06,
"loss": 1.1498,
"step": 1100000
},
{
"epoch": 2.62,
"learning_rate": 2.5394369940224743e-06,
"loss": 1.1473,
"step": 1100500
},
{
"epoch": 2.62,
"learning_rate": 2.531503980389591e-06,
"loss": 1.1536,
"step": 1101000
},
{
"epoch": 2.62,
"learning_rate": 2.5235709667567064e-06,
"loss": 1.1498,
"step": 1101500
},
{
"epoch": 2.62,
"learning_rate": 2.5156379531238227e-06,
"loss": 1.1545,
"step": 1102000
},
{
"epoch": 2.62,
"learning_rate": 2.5077049394909386e-06,
"loss": 1.1536,
"step": 1102500
},
{
"epoch": 2.63,
"learning_rate": 2.499771925858055e-06,
"loss": 1.1677,
"step": 1103000
},
{
"epoch": 2.63,
"learning_rate": 2.491838912225171e-06,
"loss": 1.1516,
"step": 1103500
},
{
"epoch": 2.63,
"learning_rate": 2.483905898592287e-06,
"loss": 1.1645,
"step": 1104000
},
{
"epoch": 2.63,
"learning_rate": 2.475972884959403e-06,
"loss": 1.134,
"step": 1104500
},
{
"epoch": 2.63,
"learning_rate": 2.468039871326519e-06,
"loss": 1.1678,
"step": 1105000
},
{
"epoch": 2.63,
"learning_rate": 2.460106857693635e-06,
"loss": 1.1544,
"step": 1105500
},
{
"epoch": 2.63,
"learning_rate": 2.4521738440607513e-06,
"loss": 1.1343,
"step": 1106000
},
{
"epoch": 2.63,
"learning_rate": 2.444240830427867e-06,
"loss": 1.1478,
"step": 1106500
},
{
"epoch": 2.63,
"learning_rate": 2.4363078167949835e-06,
"loss": 1.152,
"step": 1107000
},
{
"epoch": 2.64,
"learning_rate": 2.4283748031620993e-06,
"loss": 1.1585,
"step": 1107500
},
{
"epoch": 2.64,
"learning_rate": 2.420441789529215e-06,
"loss": 1.1586,
"step": 1108000
},
{
"epoch": 2.64,
"learning_rate": 2.412508775896332e-06,
"loss": 1.1608,
"step": 1108500
},
{
"epoch": 2.64,
"learning_rate": 2.4045757622634478e-06,
"loss": 1.1614,
"step": 1109000
},
{
"epoch": 2.64,
"learning_rate": 2.3966427486305636e-06,
"loss": 1.1655,
"step": 1109500
},
{
"epoch": 2.64,
"learning_rate": 2.38870973499768e-06,
"loss": 1.162,
"step": 1110000
},
{
"epoch": 2.64,
"learning_rate": 2.3807767213647958e-06,
"loss": 1.157,
"step": 1110500
},
{
"epoch": 2.64,
"learning_rate": 2.372843707731912e-06,
"loss": 1.1472,
"step": 1111000
},
{
"epoch": 2.65,
"learning_rate": 2.364910694099028e-06,
"loss": 1.1376,
"step": 1111500
},
{
"epoch": 2.65,
"learning_rate": 2.356977680466144e-06,
"loss": 1.1512,
"step": 1112000
},
{
"epoch": 2.65,
"learning_rate": 2.34904466683326e-06,
"loss": 1.1498,
"step": 1112500
},
{
"epoch": 2.65,
"learning_rate": 2.341111653200376e-06,
"loss": 1.1588,
"step": 1113000
},
{
"epoch": 2.65,
"learning_rate": 2.3331786395674922e-06,
"loss": 1.1418,
"step": 1113500
},
{
"epoch": 2.65,
"learning_rate": 2.3252456259346085e-06,
"loss": 1.1655,
"step": 1114000
},
{
"epoch": 2.65,
"learning_rate": 2.3173126123017244e-06,
"loss": 1.151,
"step": 1114500
},
{
"epoch": 2.65,
"learning_rate": 2.3093795986688407e-06,
"loss": 1.141,
"step": 1115000
},
{
"epoch": 2.65,
"learning_rate": 2.3014465850359566e-06,
"loss": 1.1537,
"step": 1115500
},
{
"epoch": 2.66,
"learning_rate": 2.2935135714030724e-06,
"loss": 1.142,
"step": 1116000
},
{
"epoch": 2.66,
"learning_rate": 2.2855805577701887e-06,
"loss": 1.1444,
"step": 1116500
},
{
"epoch": 2.66,
"learning_rate": 2.2776475441373046e-06,
"loss": 1.1461,
"step": 1117000
},
{
"epoch": 2.66,
"learning_rate": 2.269714530504421e-06,
"loss": 1.1479,
"step": 1117500
},
{
"epoch": 2.66,
"learning_rate": 2.2617815168715367e-06,
"loss": 1.1634,
"step": 1118000
},
{
"epoch": 2.66,
"learning_rate": 2.253848503238653e-06,
"loss": 1.1528,
"step": 1118500
},
{
"epoch": 2.66,
"learning_rate": 2.2459154896057693e-06,
"loss": 1.1513,
"step": 1119000
},
{
"epoch": 2.66,
"learning_rate": 2.237982475972885e-06,
"loss": 1.1547,
"step": 1119500
},
{
"epoch": 2.67,
"learning_rate": 2.2300494623400015e-06,
"loss": 1.159,
"step": 1120000
},
{
"epoch": 2.67,
"learning_rate": 2.2221164487071173e-06,
"loss": 1.1388,
"step": 1120500
},
{
"epoch": 2.67,
"learning_rate": 2.214183435074233e-06,
"loss": 1.1616,
"step": 1121000
},
{
"epoch": 2.67,
"learning_rate": 2.2062504214413495e-06,
"loss": 1.1555,
"step": 1121500
},
{
"epoch": 2.67,
"learning_rate": 2.1983174078084653e-06,
"loss": 1.1521,
"step": 1122000
},
{
"epoch": 2.67,
"learning_rate": 2.1903843941755816e-06,
"loss": 1.1381,
"step": 1122500
},
{
"epoch": 2.67,
"learning_rate": 2.1824513805426975e-06,
"loss": 1.1495,
"step": 1123000
},
{
"epoch": 2.67,
"learning_rate": 2.1745183669098138e-06,
"loss": 1.1394,
"step": 1123500
},
{
"epoch": 2.68,
"learning_rate": 2.16658535327693e-06,
"loss": 1.1362,
"step": 1124000
},
{
"epoch": 2.68,
"learning_rate": 2.158652339644046e-06,
"loss": 1.1356,
"step": 1124500
},
{
"epoch": 2.68,
"learning_rate": 2.150719326011162e-06,
"loss": 1.1565,
"step": 1125000
},
{
"epoch": 2.68,
"learning_rate": 2.142786312378278e-06,
"loss": 1.1576,
"step": 1125500
},
{
"epoch": 2.68,
"learning_rate": 2.134853298745394e-06,
"loss": 1.1514,
"step": 1126000
},
{
"epoch": 2.68,
"learning_rate": 2.1269202851125102e-06,
"loss": 1.1508,
"step": 1126500
},
{
"epoch": 2.68,
"learning_rate": 2.118987271479626e-06,
"loss": 1.142,
"step": 1127000
},
{
"epoch": 2.68,
"learning_rate": 2.111054257846742e-06,
"loss": 1.1526,
"step": 1127500
},
{
"epoch": 2.68,
"learning_rate": 2.1031212442138583e-06,
"loss": 1.1457,
"step": 1128000
},
{
"epoch": 2.69,
"learning_rate": 2.0951882305809745e-06,
"loss": 1.1467,
"step": 1128500
},
{
"epoch": 2.69,
"learning_rate": 2.0872552169480904e-06,
"loss": 1.1466,
"step": 1129000
},
{
"epoch": 2.69,
"learning_rate": 2.0793222033152067e-06,
"loss": 1.1427,
"step": 1129500
},
{
"epoch": 2.69,
"learning_rate": 2.0713891896823226e-06,
"loss": 1.1412,
"step": 1130000
},
{
"epoch": 2.69,
"learning_rate": 2.063456176049439e-06,
"loss": 1.1154,
"step": 1130500
},
{
"epoch": 2.69,
"learning_rate": 2.0555231624165547e-06,
"loss": 1.1585,
"step": 1131000
},
{
"epoch": 2.69,
"learning_rate": 2.047590148783671e-06,
"loss": 1.1434,
"step": 1131500
},
{
"epoch": 2.69,
"learning_rate": 2.039657135150787e-06,
"loss": 1.1517,
"step": 1132000
},
{
"epoch": 2.7,
"learning_rate": 2.0317241215179027e-06,
"loss": 1.1624,
"step": 1132500
},
{
"epoch": 2.7,
"learning_rate": 2.023791107885019e-06,
"loss": 1.1429,
"step": 1133000
},
{
"epoch": 2.7,
"learning_rate": 2.0158580942521353e-06,
"loss": 1.1583,
"step": 1133500
},
{
"epoch": 2.7,
"learning_rate": 2.007925080619251e-06,
"loss": 1.1542,
"step": 1134000
},
{
"epoch": 2.7,
"learning_rate": 1.9999920669863675e-06,
"loss": 1.165,
"step": 1134500
},
{
"epoch": 2.7,
"learning_rate": 1.9920590533534833e-06,
"loss": 1.1383,
"step": 1135000
},
{
"epoch": 2.7,
"learning_rate": 1.9841260397205996e-06,
"loss": 1.1618,
"step": 1135500
},
{
"epoch": 2.7,
"learning_rate": 1.9761930260877155e-06,
"loss": 1.1569,
"step": 1136000
},
{
"epoch": 2.7,
"learning_rate": 1.9682600124548313e-06,
"loss": 1.1444,
"step": 1136500
},
{
"epoch": 2.71,
"learning_rate": 1.9603269988219476e-06,
"loss": 1.1771,
"step": 1137000
},
{
"epoch": 2.71,
"learning_rate": 1.9523939851890635e-06,
"loss": 1.1612,
"step": 1137500
},
{
"epoch": 2.71,
"learning_rate": 1.94446097155618e-06,
"loss": 1.1523,
"step": 1138000
},
{
"epoch": 2.71,
"learning_rate": 1.936527957923296e-06,
"loss": 1.1509,
"step": 1138500
},
{
"epoch": 2.71,
"learning_rate": 1.928594944290412e-06,
"loss": 1.1395,
"step": 1139000
},
{
"epoch": 2.71,
"learning_rate": 1.9206619306575282e-06,
"loss": 1.1454,
"step": 1139500
},
{
"epoch": 2.71,
"learning_rate": 1.912728917024644e-06,
"loss": 1.1569,
"step": 1140000
},
{
"epoch": 2.71,
"learning_rate": 1.9047959033917602e-06,
"loss": 1.1469,
"step": 1140500
},
{
"epoch": 2.72,
"learning_rate": 1.8968628897588763e-06,
"loss": 1.137,
"step": 1141000
},
{
"epoch": 2.72,
"learning_rate": 1.8889298761259921e-06,
"loss": 1.1388,
"step": 1141500
},
{
"epoch": 2.72,
"learning_rate": 1.8809968624931082e-06,
"loss": 1.1416,
"step": 1142000
},
{
"epoch": 2.72,
"learning_rate": 1.8730638488602243e-06,
"loss": 1.1556,
"step": 1142500
},
{
"epoch": 2.72,
"learning_rate": 1.8651308352273406e-06,
"loss": 1.1506,
"step": 1143000
},
{
"epoch": 2.72,
"learning_rate": 1.8571978215944566e-06,
"loss": 1.154,
"step": 1143500
},
{
"epoch": 2.72,
"learning_rate": 1.8492648079615727e-06,
"loss": 1.1644,
"step": 1144000
},
{
"epoch": 2.72,
"learning_rate": 1.8413317943286888e-06,
"loss": 1.1385,
"step": 1144500
},
{
"epoch": 2.72,
"learning_rate": 1.8333987806958049e-06,
"loss": 1.1396,
"step": 1145000
},
{
"epoch": 2.73,
"learning_rate": 1.825465767062921e-06,
"loss": 1.1317,
"step": 1145500
},
{
"epoch": 2.73,
"learning_rate": 1.8175327534300368e-06,
"loss": 1.1523,
"step": 1146000
},
{
"epoch": 2.73,
"learning_rate": 1.8095997397971529e-06,
"loss": 1.1546,
"step": 1146500
},
{
"epoch": 2.73,
"learning_rate": 1.801666726164269e-06,
"loss": 1.1568,
"step": 1147000
},
{
"epoch": 2.73,
"learning_rate": 1.793733712531385e-06,
"loss": 1.144,
"step": 1147500
},
{
"epoch": 2.73,
"learning_rate": 1.7858006988985013e-06,
"loss": 1.1573,
"step": 1148000
},
{
"epoch": 2.73,
"learning_rate": 1.7778676852656174e-06,
"loss": 1.1468,
"step": 1148500
},
{
"epoch": 2.73,
"learning_rate": 1.7699346716327335e-06,
"loss": 1.1495,
"step": 1149000
},
{
"epoch": 2.74,
"learning_rate": 1.7620016579998496e-06,
"loss": 1.14,
"step": 1149500
},
{
"epoch": 2.74,
"learning_rate": 1.7540686443669654e-06,
"loss": 1.1577,
"step": 1150000
},
{
"epoch": 2.74,
"learning_rate": 1.7461356307340815e-06,
"loss": 1.1323,
"step": 1150500
},
{
"epoch": 2.74,
"learning_rate": 1.7382026171011976e-06,
"loss": 1.1407,
"step": 1151000
},
{
"epoch": 2.74,
"learning_rate": 1.7302696034683136e-06,
"loss": 1.1487,
"step": 1151500
},
{
"epoch": 2.74,
"learning_rate": 1.7223365898354297e-06,
"loss": 1.1521,
"step": 1152000
},
{
"epoch": 2.74,
"learning_rate": 1.7144035762025458e-06,
"loss": 1.1614,
"step": 1152500
},
{
"epoch": 2.74,
"learning_rate": 1.706470562569662e-06,
"loss": 1.1469,
"step": 1153000
},
{
"epoch": 2.75,
"learning_rate": 1.6985375489367782e-06,
"loss": 1.155,
"step": 1153500
},
{
"epoch": 2.75,
"learning_rate": 1.690604535303894e-06,
"loss": 1.1583,
"step": 1154000
},
{
"epoch": 2.75,
"learning_rate": 1.6826715216710101e-06,
"loss": 1.1408,
"step": 1154500
},
{
"epoch": 2.75,
"learning_rate": 1.6747385080381262e-06,
"loss": 1.152,
"step": 1155000
},
{
"epoch": 2.75,
"learning_rate": 1.6668054944052423e-06,
"loss": 1.1549,
"step": 1155500
},
{
"epoch": 2.75,
"learning_rate": 1.6588724807723583e-06,
"loss": 1.1553,
"step": 1156000
},
{
"epoch": 2.75,
"learning_rate": 1.6509394671394744e-06,
"loss": 1.1451,
"step": 1156500
},
{
"epoch": 2.75,
"learning_rate": 1.6430064535065903e-06,
"loss": 1.1408,
"step": 1157000
},
{
"epoch": 2.75,
"learning_rate": 1.6350734398737064e-06,
"loss": 1.151,
"step": 1157500
},
{
"epoch": 2.76,
"learning_rate": 1.6271404262408226e-06,
"loss": 1.1568,
"step": 1158000
},
{
"epoch": 2.76,
"learning_rate": 1.6192074126079387e-06,
"loss": 1.1528,
"step": 1158500
},
{
"epoch": 2.76,
"learning_rate": 1.6112743989750548e-06,
"loss": 1.152,
"step": 1159000
},
{
"epoch": 2.76,
"learning_rate": 1.6033413853421709e-06,
"loss": 1.149,
"step": 1159500
},
{
"epoch": 2.76,
"learning_rate": 1.595408371709287e-06,
"loss": 1.1235,
"step": 1160000
},
{
"epoch": 2.76,
"learning_rate": 1.587475358076403e-06,
"loss": 1.1339,
"step": 1160500
},
{
"epoch": 2.76,
"learning_rate": 1.579542344443519e-06,
"loss": 1.1548,
"step": 1161000
},
{
"epoch": 2.76,
"learning_rate": 1.571609330810635e-06,
"loss": 1.1462,
"step": 1161500
},
{
"epoch": 2.77,
"learning_rate": 1.563676317177751e-06,
"loss": 1.1392,
"step": 1162000
},
{
"epoch": 2.77,
"learning_rate": 1.5557433035448671e-06,
"loss": 1.1461,
"step": 1162500
},
{
"epoch": 2.77,
"learning_rate": 1.5478102899119834e-06,
"loss": 1.1394,
"step": 1163000
},
{
"epoch": 2.77,
"learning_rate": 1.5398772762790995e-06,
"loss": 1.1375,
"step": 1163500
},
{
"epoch": 2.77,
"learning_rate": 1.5319442626462156e-06,
"loss": 1.1464,
"step": 1164000
},
{
"epoch": 2.77,
"learning_rate": 1.5240112490133316e-06,
"loss": 1.1496,
"step": 1164500
},
{
"epoch": 2.77,
"learning_rate": 1.5160782353804477e-06,
"loss": 1.156,
"step": 1165000
},
{
"epoch": 2.77,
"learning_rate": 1.5081452217475636e-06,
"loss": 1.1427,
"step": 1165500
},
{
"epoch": 2.77,
"learning_rate": 1.5002122081146797e-06,
"loss": 1.1399,
"step": 1166000
},
{
"epoch": 2.78,
"learning_rate": 1.4922791944817957e-06,
"loss": 1.1335,
"step": 1166500
},
{
"epoch": 2.78,
"learning_rate": 1.4843461808489118e-06,
"loss": 1.133,
"step": 1167000
},
{
"epoch": 2.78,
"learning_rate": 1.4764131672160279e-06,
"loss": 1.1617,
"step": 1167500
},
{
"epoch": 2.78,
"learning_rate": 1.4684801535831442e-06,
"loss": 1.1388,
"step": 1168000
},
{
"epoch": 2.78,
"learning_rate": 1.4605471399502603e-06,
"loss": 1.1378,
"step": 1168500
},
{
"epoch": 2.78,
"learning_rate": 1.4526141263173763e-06,
"loss": 1.1484,
"step": 1169000
},
{
"epoch": 2.78,
"learning_rate": 1.4446811126844922e-06,
"loss": 1.1449,
"step": 1169500
},
{
"epoch": 2.78,
"learning_rate": 1.4367480990516083e-06,
"loss": 1.1451,
"step": 1170000
},
{
"epoch": 2.79,
"learning_rate": 1.4288150854187243e-06,
"loss": 1.1632,
"step": 1170500
},
{
"epoch": 2.79,
"learning_rate": 1.4208820717858404e-06,
"loss": 1.1472,
"step": 1171000
},
{
"epoch": 2.79,
"learning_rate": 1.4129490581529565e-06,
"loss": 1.1508,
"step": 1171500
},
{
"epoch": 2.79,
"learning_rate": 1.4050160445200726e-06,
"loss": 1.1391,
"step": 1172000
},
{
"epoch": 2.79,
"learning_rate": 1.3970830308871889e-06,
"loss": 1.1599,
"step": 1172500
},
{
"epoch": 2.79,
"learning_rate": 1.389150017254305e-06,
"loss": 1.1427,
"step": 1173000
},
{
"epoch": 2.79,
"learning_rate": 1.381217003621421e-06,
"loss": 1.15,
"step": 1173500
},
{
"epoch": 2.79,
"learning_rate": 1.3732839899885369e-06,
"loss": 1.1542,
"step": 1174000
},
{
"epoch": 2.8,
"learning_rate": 1.365350976355653e-06,
"loss": 1.1528,
"step": 1174500
},
{
"epoch": 2.8,
"learning_rate": 1.357417962722769e-06,
"loss": 1.1424,
"step": 1175000
},
{
"epoch": 2.8,
"learning_rate": 1.3494849490898851e-06,
"loss": 1.1195,
"step": 1175500
},
{
"epoch": 2.8,
"learning_rate": 1.3415519354570012e-06,
"loss": 1.1542,
"step": 1176000
},
{
"epoch": 2.8,
"learning_rate": 1.3336189218241173e-06,
"loss": 1.1437,
"step": 1176500
},
{
"epoch": 2.8,
"learning_rate": 1.3256859081912331e-06,
"loss": 1.1434,
"step": 1177000
},
{
"epoch": 2.8,
"learning_rate": 1.3177528945583496e-06,
"loss": 1.1357,
"step": 1177500
},
{
"epoch": 2.8,
"learning_rate": 1.3098198809254655e-06,
"loss": 1.15,
"step": 1178000
},
{
"epoch": 2.8,
"learning_rate": 1.3018868672925816e-06,
"loss": 1.1593,
"step": 1178500
},
{
"epoch": 2.81,
"learning_rate": 1.2939538536596977e-06,
"loss": 1.1398,
"step": 1179000
},
{
"epoch": 2.81,
"learning_rate": 1.2860208400268137e-06,
"loss": 1.142,
"step": 1179500
},
{
"epoch": 2.81,
"learning_rate": 1.2780878263939298e-06,
"loss": 1.1422,
"step": 1180000
},
{
"epoch": 2.81,
"learning_rate": 1.2701548127610459e-06,
"loss": 1.1652,
"step": 1180500
},
{
"epoch": 2.81,
"learning_rate": 1.2622217991281617e-06,
"loss": 1.1397,
"step": 1181000
},
{
"epoch": 2.81,
"learning_rate": 1.2542887854952778e-06,
"loss": 1.1315,
"step": 1181500
},
{
"epoch": 2.81,
"learning_rate": 1.2463557718623941e-06,
"loss": 1.1462,
"step": 1182000
},
{
"epoch": 2.81,
"learning_rate": 1.2384227582295102e-06,
"loss": 1.1303,
"step": 1182500
},
{
"epoch": 2.82,
"learning_rate": 1.230489744596626e-06,
"loss": 1.1381,
"step": 1183000
},
{
"epoch": 2.82,
"learning_rate": 1.2225567309637421e-06,
"loss": 1.1645,
"step": 1183500
},
{
"epoch": 2.82,
"learning_rate": 1.2146237173308584e-06,
"loss": 1.1457,
"step": 1184000
},
{
"epoch": 2.82,
"learning_rate": 1.2066907036979745e-06,
"loss": 1.1397,
"step": 1184500
},
{
"epoch": 2.82,
"learning_rate": 1.1987576900650904e-06,
"loss": 1.1473,
"step": 1185000
},
{
"epoch": 2.82,
"learning_rate": 1.1908246764322064e-06,
"loss": 1.1547,
"step": 1185500
},
{
"epoch": 2.82,
"learning_rate": 1.1828916627993225e-06,
"loss": 1.1382,
"step": 1186000
},
{
"epoch": 2.82,
"learning_rate": 1.1749586491664388e-06,
"loss": 1.144,
"step": 1186500
},
{
"epoch": 2.82,
"learning_rate": 1.1670256355335549e-06,
"loss": 1.1461,
"step": 1187000
},
{
"epoch": 2.83,
"learning_rate": 1.1590926219006707e-06,
"loss": 1.1598,
"step": 1187500
},
{
"epoch": 2.83,
"learning_rate": 1.1511596082677868e-06,
"loss": 1.1352,
"step": 1188000
},
{
"epoch": 2.83,
"learning_rate": 1.1432265946349031e-06,
"loss": 1.1369,
"step": 1188500
},
{
"epoch": 2.83,
"learning_rate": 1.1352935810020192e-06,
"loss": 1.1548,
"step": 1189000
},
{
"epoch": 2.83,
"learning_rate": 1.127360567369135e-06,
"loss": 1.1429,
"step": 1189500
},
{
"epoch": 2.83,
"learning_rate": 1.1194275537362511e-06,
"loss": 1.1349,
"step": 1190000
},
{
"epoch": 2.83,
"learning_rate": 1.1114945401033672e-06,
"loss": 1.15,
"step": 1190500
},
{
"epoch": 2.83,
"learning_rate": 1.1035615264704835e-06,
"loss": 1.1365,
"step": 1191000
},
{
"epoch": 2.84,
"learning_rate": 1.0956285128375994e-06,
"loss": 1.1535,
"step": 1191500
},
{
"epoch": 2.84,
"learning_rate": 1.0876954992047154e-06,
"loss": 1.144,
"step": 1192000
},
{
"epoch": 2.84,
"learning_rate": 1.0797624855718315e-06,
"loss": 1.153,
"step": 1192500
},
{
"epoch": 2.84,
"learning_rate": 1.0718294719389476e-06,
"loss": 1.155,
"step": 1193000
},
{
"epoch": 2.84,
"learning_rate": 1.0638964583060637e-06,
"loss": 1.149,
"step": 1193500
},
{
"epoch": 2.84,
"learning_rate": 1.0559634446731797e-06,
"loss": 1.1501,
"step": 1194000
},
{
"epoch": 2.84,
"learning_rate": 1.0480304310402958e-06,
"loss": 1.1324,
"step": 1194500
},
{
"epoch": 2.84,
"learning_rate": 1.0400974174074119e-06,
"loss": 1.1415,
"step": 1195000
},
{
"epoch": 2.85,
"learning_rate": 1.032164403774528e-06,
"loss": 1.1606,
"step": 1195500
},
{
"epoch": 2.85,
"learning_rate": 1.024231390141644e-06,
"loss": 1.1448,
"step": 1196000
},
{
"epoch": 2.85,
"learning_rate": 1.0162983765087601e-06,
"loss": 1.1527,
"step": 1196500
},
{
"epoch": 2.85,
"learning_rate": 1.0083653628758762e-06,
"loss": 1.1392,
"step": 1197000
},
{
"epoch": 2.85,
"learning_rate": 1.0004323492429923e-06,
"loss": 1.1395,
"step": 1197500
},
{
"epoch": 2.85,
"learning_rate": 9.924993356101084e-07,
"loss": 1.1415,
"step": 1198000
},
{
"epoch": 2.85,
"learning_rate": 9.845663219772244e-07,
"loss": 1.1452,
"step": 1198500
},
{
"epoch": 2.85,
"learning_rate": 9.766333083443405e-07,
"loss": 1.1166,
"step": 1199000
},
{
"epoch": 2.85,
"learning_rate": 9.687002947114566e-07,
"loss": 1.1494,
"step": 1199500
},
{
"epoch": 2.86,
"learning_rate": 9.607672810785727e-07,
"loss": 1.1373,
"step": 1200000
},
{
"epoch": 2.86,
"learning_rate": 9.528342674456886e-07,
"loss": 1.1255,
"step": 1200500
},
{
"epoch": 2.86,
"learning_rate": 9.449012538128048e-07,
"loss": 1.1561,
"step": 1201000
},
{
"epoch": 2.86,
"learning_rate": 9.369682401799209e-07,
"loss": 1.1426,
"step": 1201500
},
{
"epoch": 2.86,
"learning_rate": 9.290352265470369e-07,
"loss": 1.1391,
"step": 1202000
},
{
"epoch": 2.86,
"learning_rate": 9.211022129141529e-07,
"loss": 1.1552,
"step": 1202500
},
{
"epoch": 2.86,
"learning_rate": 9.13169199281269e-07,
"loss": 1.1487,
"step": 1203000
},
{
"epoch": 2.86,
"learning_rate": 9.052361856483852e-07,
"loss": 1.1381,
"step": 1203500
},
{
"epoch": 2.87,
"learning_rate": 8.973031720155012e-07,
"loss": 1.1335,
"step": 1204000
},
{
"epoch": 2.87,
"learning_rate": 8.893701583826172e-07,
"loss": 1.1576,
"step": 1204500
},
{
"epoch": 2.87,
"learning_rate": 8.814371447497333e-07,
"loss": 1.1522,
"step": 1205000
},
{
"epoch": 2.87,
"learning_rate": 8.735041311168493e-07,
"loss": 1.1431,
"step": 1205500
},
{
"epoch": 2.87,
"learning_rate": 8.655711174839655e-07,
"loss": 1.1496,
"step": 1206000
},
{
"epoch": 2.87,
"learning_rate": 8.576381038510815e-07,
"loss": 1.1481,
"step": 1206500
},
{
"epoch": 2.87,
"learning_rate": 8.497050902181976e-07,
"loss": 1.1362,
"step": 1207000
},
{
"epoch": 2.87,
"learning_rate": 8.417720765853137e-07,
"loss": 1.1626,
"step": 1207500
},
{
"epoch": 2.87,
"learning_rate": 8.338390629524297e-07,
"loss": 1.1314,
"step": 1208000
},
{
"epoch": 2.88,
"learning_rate": 8.259060493195459e-07,
"loss": 1.1469,
"step": 1208500
},
{
"epoch": 2.88,
"learning_rate": 8.179730356866619e-07,
"loss": 1.1418,
"step": 1209000
},
{
"epoch": 2.88,
"learning_rate": 8.10040022053778e-07,
"loss": 1.1524,
"step": 1209500
},
{
"epoch": 2.88,
"learning_rate": 8.02107008420894e-07,
"loss": 1.1509,
"step": 1210000
},
{
"epoch": 2.88,
"learning_rate": 7.941739947880101e-07,
"loss": 1.1392,
"step": 1210500
},
{
"epoch": 2.88,
"learning_rate": 7.862409811551262e-07,
"loss": 1.1414,
"step": 1211000
},
{
"epoch": 2.88,
"learning_rate": 7.783079675222423e-07,
"loss": 1.1357,
"step": 1211500
},
{
"epoch": 2.88,
"learning_rate": 7.703749538893583e-07,
"loss": 1.1481,
"step": 1212000
},
{
"epoch": 2.89,
"learning_rate": 7.624419402564744e-07,
"loss": 1.1617,
"step": 1212500
},
{
"epoch": 2.89,
"learning_rate": 7.545089266235904e-07,
"loss": 1.1444,
"step": 1213000
},
{
"epoch": 2.89,
"learning_rate": 7.465759129907066e-07,
"loss": 1.155,
"step": 1213500
},
{
"epoch": 2.89,
"learning_rate": 7.386428993578226e-07,
"loss": 1.129,
"step": 1214000
},
{
"epoch": 2.89,
"learning_rate": 7.307098857249387e-07,
"loss": 1.154,
"step": 1214500
},
{
"epoch": 2.89,
"learning_rate": 7.227768720920547e-07,
"loss": 1.1496,
"step": 1215000
},
{
"epoch": 2.89,
"learning_rate": 7.148438584591707e-07,
"loss": 1.1322,
"step": 1215500
},
{
"epoch": 2.89,
"learning_rate": 7.069108448262869e-07,
"loss": 1.1548,
"step": 1216000
},
{
"epoch": 2.9,
"learning_rate": 6.98977831193403e-07,
"loss": 1.1403,
"step": 1216500
},
{
"epoch": 2.9,
"learning_rate": 6.91044817560519e-07,
"loss": 1.1508,
"step": 1217000
},
{
"epoch": 2.9,
"learning_rate": 6.83111803927635e-07,
"loss": 1.1455,
"step": 1217500
},
{
"epoch": 2.9,
"learning_rate": 6.751787902947511e-07,
"loss": 1.1381,
"step": 1218000
},
{
"epoch": 2.9,
"learning_rate": 6.672457766618673e-07,
"loss": 1.1509,
"step": 1218500
},
{
"epoch": 2.9,
"learning_rate": 6.593127630289834e-07,
"loss": 1.1373,
"step": 1219000
},
{
"epoch": 2.9,
"learning_rate": 6.513797493960993e-07,
"loss": 1.1482,
"step": 1219500
},
{
"epoch": 2.9,
"learning_rate": 6.434467357632154e-07,
"loss": 1.1388,
"step": 1220000
},
{
"epoch": 2.9,
"learning_rate": 6.355137221303315e-07,
"loss": 1.1422,
"step": 1220500
},
{
"epoch": 2.91,
"learning_rate": 6.275807084974477e-07,
"loss": 1.1342,
"step": 1221000
},
{
"epoch": 2.91,
"learning_rate": 6.196476948645637e-07,
"loss": 1.1466,
"step": 1221500
},
{
"epoch": 2.91,
"learning_rate": 6.117146812316797e-07,
"loss": 1.1359,
"step": 1222000
},
{
"epoch": 2.91,
"learning_rate": 6.037816675987959e-07,
"loss": 1.1504,
"step": 1222500
},
{
"epoch": 2.91,
"learning_rate": 5.958486539659119e-07,
"loss": 1.1369,
"step": 1223000
},
{
"epoch": 2.91,
"learning_rate": 5.879156403330279e-07,
"loss": 1.1404,
"step": 1223500
},
{
"epoch": 2.91,
"learning_rate": 5.79982626700144e-07,
"loss": 1.1438,
"step": 1224000
},
{
"epoch": 2.91,
"learning_rate": 5.720496130672601e-07,
"loss": 1.1355,
"step": 1224500
},
{
"epoch": 2.92,
"learning_rate": 5.641165994343762e-07,
"loss": 1.1538,
"step": 1225000
},
{
"epoch": 2.92,
"learning_rate": 5.561835858014922e-07,
"loss": 1.1396,
"step": 1225500
},
{
"epoch": 2.92,
"learning_rate": 5.482505721686083e-07,
"loss": 1.1504,
"step": 1226000
},
{
"epoch": 2.92,
"learning_rate": 5.403175585357244e-07,
"loss": 1.1428,
"step": 1226500
},
{
"epoch": 2.92,
"learning_rate": 5.323845449028405e-07,
"loss": 1.1513,
"step": 1227000
},
{
"epoch": 2.92,
"learning_rate": 5.244515312699566e-07,
"loss": 1.1489,
"step": 1227500
},
{
"epoch": 2.92,
"learning_rate": 5.165185176370726e-07,
"loss": 1.1469,
"step": 1228000
},
{
"epoch": 2.92,
"learning_rate": 5.085855040041886e-07,
"loss": 1.1604,
"step": 1228500
},
{
"epoch": 2.92,
"learning_rate": 5.006524903713048e-07,
"loss": 1.1446,
"step": 1229000
},
{
"epoch": 2.93,
"learning_rate": 4.927194767384208e-07,
"loss": 1.1386,
"step": 1229500
},
{
"epoch": 2.93,
"learning_rate": 4.847864631055369e-07,
"loss": 1.1435,
"step": 1230000
},
{
"epoch": 2.93,
"learning_rate": 4.768534494726529e-07,
"loss": 1.1456,
"step": 1230500
},
{
"epoch": 2.93,
"learning_rate": 4.68920435839769e-07,
"loss": 1.1422,
"step": 1231000
},
{
"epoch": 2.93,
"learning_rate": 4.609874222068851e-07,
"loss": 1.1365,
"step": 1231500
},
{
"epoch": 2.93,
"learning_rate": 4.5305440857400114e-07,
"loss": 1.1592,
"step": 1232000
},
{
"epoch": 2.93,
"learning_rate": 4.4512139494111727e-07,
"loss": 1.171,
"step": 1232500
},
{
"epoch": 2.93,
"learning_rate": 4.371883813082333e-07,
"loss": 1.1478,
"step": 1233000
},
{
"epoch": 2.94,
"learning_rate": 4.2925536767534937e-07,
"loss": 1.1486,
"step": 1233500
},
{
"epoch": 2.94,
"learning_rate": 4.2132235404246545e-07,
"loss": 1.1485,
"step": 1234000
},
{
"epoch": 2.94,
"learning_rate": 4.133893404095815e-07,
"loss": 1.131,
"step": 1234500
},
{
"epoch": 2.94,
"learning_rate": 4.054563267766976e-07,
"loss": 1.1415,
"step": 1235000
},
{
"epoch": 2.94,
"learning_rate": 3.975233131438137e-07,
"loss": 1.1492,
"step": 1235500
},
{
"epoch": 2.94,
"learning_rate": 3.8959029951092975e-07,
"loss": 1.1399,
"step": 1236000
},
{
"epoch": 2.94,
"learning_rate": 3.8165728587804583e-07,
"loss": 1.1325,
"step": 1236500
},
{
"epoch": 2.94,
"learning_rate": 3.7372427224516185e-07,
"loss": 1.1632,
"step": 1237000
},
{
"epoch": 2.95,
"learning_rate": 3.65791258612278e-07,
"loss": 1.1534,
"step": 1237500
},
{
"epoch": 2.95,
"learning_rate": 3.57858244979394e-07,
"loss": 1.1527,
"step": 1238000
},
{
"epoch": 2.95,
"learning_rate": 3.4992523134651014e-07,
"loss": 1.1318,
"step": 1238500
},
{
"epoch": 2.95,
"learning_rate": 3.4199221771362616e-07,
"loss": 1.1535,
"step": 1239000
},
{
"epoch": 2.95,
"learning_rate": 3.3405920408074224e-07,
"loss": 1.1368,
"step": 1239500
},
{
"epoch": 2.95,
"learning_rate": 3.261261904478583e-07,
"loss": 1.1448,
"step": 1240000
},
{
"epoch": 2.95,
"learning_rate": 3.181931768149744e-07,
"loss": 1.1597,
"step": 1240500
},
{
"epoch": 2.95,
"learning_rate": 3.1026016318209047e-07,
"loss": 1.149,
"step": 1241000
},
{
"epoch": 2.95,
"learning_rate": 3.0232714954920654e-07,
"loss": 1.1383,
"step": 1241500
},
{
"epoch": 2.96,
"learning_rate": 2.943941359163226e-07,
"loss": 1.1288,
"step": 1242000
},
{
"epoch": 2.96,
"learning_rate": 2.864611222834387e-07,
"loss": 1.1397,
"step": 1242500
},
{
"epoch": 2.96,
"learning_rate": 2.785281086505547e-07,
"loss": 1.1452,
"step": 1243000
},
{
"epoch": 2.96,
"learning_rate": 2.705950950176708e-07,
"loss": 1.1439,
"step": 1243500
},
{
"epoch": 2.96,
"learning_rate": 2.626620813847869e-07,
"loss": 1.144,
"step": 1244000
},
{
"epoch": 2.96,
"learning_rate": 2.5472906775190295e-07,
"loss": 1.139,
"step": 1244500
},
{
"epoch": 2.96,
"learning_rate": 2.4679605411901903e-07,
"loss": 1.138,
"step": 1245000
},
{
"epoch": 2.96,
"learning_rate": 2.388630404861351e-07,
"loss": 1.1524,
"step": 1245500
},
{
"epoch": 2.97,
"learning_rate": 2.3093002685325115e-07,
"loss": 1.145,
"step": 1246000
},
{
"epoch": 2.97,
"learning_rate": 2.2299701322036723e-07,
"loss": 1.1471,
"step": 1246500
},
{
"epoch": 2.97,
"learning_rate": 2.150639995874833e-07,
"loss": 1.1336,
"step": 1247000
},
{
"epoch": 2.97,
"learning_rate": 2.0713098595459938e-07,
"loss": 1.1451,
"step": 1247500
},
{
"epoch": 2.97,
"learning_rate": 1.9919797232171543e-07,
"loss": 1.1405,
"step": 1248000
},
{
"epoch": 2.97,
"learning_rate": 1.912649586888315e-07,
"loss": 1.1174,
"step": 1248500
},
{
"epoch": 2.97,
"learning_rate": 1.833319450559476e-07,
"loss": 1.1498,
"step": 1249000
},
{
"epoch": 2.97,
"learning_rate": 1.7539893142306366e-07,
"loss": 1.1536,
"step": 1249500
},
{
"epoch": 2.97,
"learning_rate": 1.6746591779017974e-07,
"loss": 1.1362,
"step": 1250000
},
{
"epoch": 2.98,
"learning_rate": 1.5953290415729582e-07,
"loss": 1.1453,
"step": 1250500
},
{
"epoch": 2.98,
"learning_rate": 1.515998905244119e-07,
"loss": 1.133,
"step": 1251000
},
{
"epoch": 2.98,
"learning_rate": 1.4366687689152794e-07,
"loss": 1.1544,
"step": 1251500
},
{
"epoch": 2.98,
"learning_rate": 1.3573386325864402e-07,
"loss": 1.1403,
"step": 1252000
},
{
"epoch": 2.98,
"learning_rate": 1.2780084962576007e-07,
"loss": 1.1533,
"step": 1252500
},
{
"epoch": 2.98,
"learning_rate": 1.1986783599287617e-07,
"loss": 1.1512,
"step": 1253000
},
{
"epoch": 2.98,
"learning_rate": 1.1193482235999224e-07,
"loss": 1.1531,
"step": 1253500
},
{
"epoch": 2.98,
"learning_rate": 1.040018087271083e-07,
"loss": 1.1366,
"step": 1254000
},
{
"epoch": 2.99,
"learning_rate": 9.606879509422438e-08,
"loss": 1.1419,
"step": 1254500
},
{
"epoch": 2.99,
"learning_rate": 8.813578146134045e-08,
"loss": 1.1226,
"step": 1255000
},
{
"epoch": 2.99,
"learning_rate": 8.020276782845652e-08,
"loss": 1.1279,
"step": 1255500
},
{
"epoch": 2.99,
"learning_rate": 7.22697541955726e-08,
"loss": 1.1539,
"step": 1256000
},
{
"epoch": 2.99,
"learning_rate": 6.433674056268866e-08,
"loss": 1.1498,
"step": 1256500
},
{
"epoch": 2.99,
"learning_rate": 5.6403726929804734e-08,
"loss": 1.133,
"step": 1257000
},
{
"epoch": 2.99,
"learning_rate": 4.8470713296920804e-08,
"loss": 1.1432,
"step": 1257500
},
{
"epoch": 2.99,
"learning_rate": 4.053769966403688e-08,
"loss": 1.1493,
"step": 1258000
},
{
"epoch": 3.0,
"learning_rate": 3.260468603115295e-08,
"loss": 1.1356,
"step": 1258500
},
{
"epoch": 3.0,
"learning_rate": 2.4671672398269018e-08,
"loss": 1.1405,
"step": 1259000
},
{
"epoch": 3.0,
"learning_rate": 1.6738658765385088e-08,
"loss": 1.148,
"step": 1259500
},
{
"epoch": 3.0,
"learning_rate": 8.805645132501161e-09,
"loss": 1.1409,
"step": 1260000
},
{
"epoch": 3.0,
"learning_rate": 8.726314996172322e-10,
"loss": 1.1348,
"step": 1260500
},
{
"epoch": 3.0,
"eval_loss": 1.1232123374938965,
"eval_runtime": 3625.9093,
"eval_samples_per_second": 366.127,
"eval_steps_per_second": 22.883,
"step": 1260555
}
],
"max_steps": 1260555,
"num_train_epochs": 3,
"total_flos": 2.738376510217363e+18,
"trial_name": null,
"trial_params": null
}