llama-160m / trainer_state.json
JackFram's picture
Upload folder using huggingface_hub
8142e69
raw
history blame
371 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9997086941553455,
"global_step": 302000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.998344853155372e-05,
"loss": 2.7721,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 4.996689706310744e-05,
"loss": 2.7878,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 4.9950345594661166e-05,
"loss": 2.7908,
"step": 300
},
{
"epoch": 0.0,
"learning_rate": 4.993379412621488e-05,
"loss": 2.8032,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 4.99172426577686e-05,
"loss": 2.8096,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 4.9900691189322316e-05,
"loss": 2.7972,
"step": 600
},
{
"epoch": 0.0,
"learning_rate": 4.9884139720876035e-05,
"loss": 2.797,
"step": 700
},
{
"epoch": 0.0,
"learning_rate": 4.986758825242976e-05,
"loss": 2.8063,
"step": 800
},
{
"epoch": 0.0,
"learning_rate": 4.985103678398348e-05,
"loss": 2.8167,
"step": 900
},
{
"epoch": 0.0,
"learning_rate": 4.98344853155372e-05,
"loss": 2.8002,
"step": 1000
},
{
"epoch": 0.0,
"learning_rate": 4.981793384709091e-05,
"loss": 2.8002,
"step": 1100
},
{
"epoch": 0.0,
"learning_rate": 4.980138237864464e-05,
"loss": 2.8049,
"step": 1200
},
{
"epoch": 0.0,
"learning_rate": 4.9784830910198356e-05,
"loss": 2.8064,
"step": 1300
},
{
"epoch": 0.0,
"learning_rate": 4.9768279441752075e-05,
"loss": 2.8052,
"step": 1400
},
{
"epoch": 0.0,
"learning_rate": 4.9751727973305794e-05,
"loss": 2.7996,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 4.973517650485951e-05,
"loss": 2.807,
"step": 1600
},
{
"epoch": 0.01,
"learning_rate": 4.971862503641324e-05,
"loss": 2.8028,
"step": 1700
},
{
"epoch": 0.01,
"learning_rate": 4.970207356796695e-05,
"loss": 2.7891,
"step": 1800
},
{
"epoch": 0.01,
"learning_rate": 4.968552209952067e-05,
"loss": 2.7999,
"step": 1900
},
{
"epoch": 0.01,
"learning_rate": 4.966897063107439e-05,
"loss": 2.7993,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 4.965241916262811e-05,
"loss": 2.8064,
"step": 2100
},
{
"epoch": 0.01,
"learning_rate": 4.9635867694181834e-05,
"loss": 2.8121,
"step": 2200
},
{
"epoch": 0.01,
"learning_rate": 4.961931622573555e-05,
"loss": 2.8099,
"step": 2300
},
{
"epoch": 0.01,
"learning_rate": 4.960276475728927e-05,
"loss": 2.8138,
"step": 2400
},
{
"epoch": 0.01,
"learning_rate": 4.9586213288842984e-05,
"loss": 2.8153,
"step": 2500
},
{
"epoch": 0.01,
"learning_rate": 4.956966182039671e-05,
"loss": 2.8168,
"step": 2600
},
{
"epoch": 0.01,
"learning_rate": 4.955311035195043e-05,
"loss": 2.808,
"step": 2700
},
{
"epoch": 0.01,
"learning_rate": 4.953655888350415e-05,
"loss": 2.8072,
"step": 2800
},
{
"epoch": 0.01,
"learning_rate": 4.9520007415057866e-05,
"loss": 2.8262,
"step": 2900
},
{
"epoch": 0.01,
"learning_rate": 4.9503455946611585e-05,
"loss": 2.8076,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 4.948690447816531e-05,
"loss": 2.8218,
"step": 3100
},
{
"epoch": 0.01,
"learning_rate": 4.947035300971902e-05,
"loss": 2.8215,
"step": 3200
},
{
"epoch": 0.01,
"learning_rate": 4.945380154127274e-05,
"loss": 2.8039,
"step": 3300
},
{
"epoch": 0.01,
"learning_rate": 4.943725007282646e-05,
"loss": 2.8078,
"step": 3400
},
{
"epoch": 0.01,
"learning_rate": 4.942069860438018e-05,
"loss": 2.8186,
"step": 3500
},
{
"epoch": 0.01,
"learning_rate": 4.9404147135933906e-05,
"loss": 2.8091,
"step": 3600
},
{
"epoch": 0.01,
"learning_rate": 4.9387595667487625e-05,
"loss": 2.7946,
"step": 3700
},
{
"epoch": 0.01,
"learning_rate": 4.937104419904134e-05,
"loss": 2.8129,
"step": 3800
},
{
"epoch": 0.01,
"learning_rate": 4.9354492730595056e-05,
"loss": 2.8099,
"step": 3900
},
{
"epoch": 0.01,
"learning_rate": 4.933794126214878e-05,
"loss": 2.8073,
"step": 4000
},
{
"epoch": 0.01,
"learning_rate": 4.93213897937025e-05,
"loss": 2.8009,
"step": 4100
},
{
"epoch": 0.01,
"learning_rate": 4.930483832525622e-05,
"loss": 2.8141,
"step": 4200
},
{
"epoch": 0.01,
"learning_rate": 4.928828685680994e-05,
"loss": 2.8163,
"step": 4300
},
{
"epoch": 0.01,
"learning_rate": 4.927173538836366e-05,
"loss": 2.8123,
"step": 4400
},
{
"epoch": 0.01,
"learning_rate": 4.925518391991738e-05,
"loss": 2.8308,
"step": 4500
},
{
"epoch": 0.02,
"learning_rate": 4.9238632451471096e-05,
"loss": 2.8078,
"step": 4600
},
{
"epoch": 0.02,
"learning_rate": 4.9222080983024815e-05,
"loss": 2.8176,
"step": 4700
},
{
"epoch": 0.02,
"learning_rate": 4.9205529514578534e-05,
"loss": 2.8254,
"step": 4800
},
{
"epoch": 0.02,
"learning_rate": 4.918897804613225e-05,
"loss": 2.8092,
"step": 4900
},
{
"epoch": 0.02,
"learning_rate": 4.917242657768598e-05,
"loss": 2.7964,
"step": 5000
},
{
"epoch": 0.02,
"learning_rate": 4.91558751092397e-05,
"loss": 2.8133,
"step": 5100
},
{
"epoch": 0.02,
"learning_rate": 4.913932364079341e-05,
"loss": 2.814,
"step": 5200
},
{
"epoch": 0.02,
"learning_rate": 4.912277217234713e-05,
"loss": 2.8066,
"step": 5300
},
{
"epoch": 0.02,
"learning_rate": 4.9106220703900855e-05,
"loss": 2.8021,
"step": 5400
},
{
"epoch": 0.02,
"learning_rate": 4.9089669235454574e-05,
"loss": 2.8167,
"step": 5500
},
{
"epoch": 0.02,
"learning_rate": 4.907311776700829e-05,
"loss": 2.8132,
"step": 5600
},
{
"epoch": 0.02,
"learning_rate": 4.905656629856201e-05,
"loss": 2.8201,
"step": 5700
},
{
"epoch": 0.02,
"learning_rate": 4.904001483011573e-05,
"loss": 2.8111,
"step": 5800
},
{
"epoch": 0.02,
"learning_rate": 4.902346336166945e-05,
"loss": 2.8159,
"step": 5900
},
{
"epoch": 0.02,
"learning_rate": 4.900691189322317e-05,
"loss": 2.809,
"step": 6000
},
{
"epoch": 0.02,
"learning_rate": 4.899036042477689e-05,
"loss": 2.809,
"step": 6100
},
{
"epoch": 0.02,
"learning_rate": 4.8973808956330606e-05,
"loss": 2.8167,
"step": 6200
},
{
"epoch": 0.02,
"learning_rate": 4.8957257487884325e-05,
"loss": 2.8271,
"step": 6300
},
{
"epoch": 0.02,
"learning_rate": 4.894070601943805e-05,
"loss": 2.8136,
"step": 6400
},
{
"epoch": 0.02,
"learning_rate": 4.892415455099177e-05,
"loss": 2.809,
"step": 6500
},
{
"epoch": 0.02,
"learning_rate": 4.890760308254548e-05,
"loss": 2.8097,
"step": 6600
},
{
"epoch": 0.02,
"learning_rate": 4.88910516140992e-05,
"loss": 2.7963,
"step": 6700
},
{
"epoch": 0.02,
"learning_rate": 4.887450014565293e-05,
"loss": 2.8047,
"step": 6800
},
{
"epoch": 0.02,
"learning_rate": 4.8857948677206646e-05,
"loss": 2.8084,
"step": 6900
},
{
"epoch": 0.02,
"learning_rate": 4.8841397208760365e-05,
"loss": 2.8134,
"step": 7000
},
{
"epoch": 0.02,
"learning_rate": 4.8824845740314084e-05,
"loss": 2.8075,
"step": 7100
},
{
"epoch": 0.02,
"learning_rate": 4.88082942718678e-05,
"loss": 2.7972,
"step": 7200
},
{
"epoch": 0.02,
"learning_rate": 4.879174280342152e-05,
"loss": 2.8139,
"step": 7300
},
{
"epoch": 0.02,
"learning_rate": 4.877519133497524e-05,
"loss": 2.8022,
"step": 7400
},
{
"epoch": 0.02,
"learning_rate": 4.875863986652896e-05,
"loss": 2.8181,
"step": 7500
},
{
"epoch": 0.03,
"learning_rate": 4.874208839808268e-05,
"loss": 2.8265,
"step": 7600
},
{
"epoch": 0.03,
"learning_rate": 4.87255369296364e-05,
"loss": 2.8182,
"step": 7700
},
{
"epoch": 0.03,
"learning_rate": 4.8708985461190124e-05,
"loss": 2.8139,
"step": 7800
},
{
"epoch": 0.03,
"learning_rate": 4.869243399274384e-05,
"loss": 2.8045,
"step": 7900
},
{
"epoch": 0.03,
"learning_rate": 4.8675882524297555e-05,
"loss": 2.7956,
"step": 8000
},
{
"epoch": 0.03,
"learning_rate": 4.8659331055851274e-05,
"loss": 2.8211,
"step": 8100
},
{
"epoch": 0.03,
"learning_rate": 4.8642779587405e-05,
"loss": 2.8181,
"step": 8200
},
{
"epoch": 0.03,
"learning_rate": 4.862622811895872e-05,
"loss": 2.7978,
"step": 8300
},
{
"epoch": 0.03,
"learning_rate": 4.860967665051244e-05,
"loss": 2.8014,
"step": 8400
},
{
"epoch": 0.03,
"learning_rate": 4.8593125182066157e-05,
"loss": 2.8099,
"step": 8500
},
{
"epoch": 0.03,
"learning_rate": 4.8576573713619876e-05,
"loss": 2.819,
"step": 8600
},
{
"epoch": 0.03,
"learning_rate": 4.8560022245173595e-05,
"loss": 2.8252,
"step": 8700
},
{
"epoch": 0.03,
"learning_rate": 4.8543470776727314e-05,
"loss": 2.8039,
"step": 8800
},
{
"epoch": 0.03,
"learning_rate": 4.852691930828103e-05,
"loss": 2.8084,
"step": 8900
},
{
"epoch": 0.03,
"learning_rate": 4.851036783983475e-05,
"loss": 2.8251,
"step": 9000
},
{
"epoch": 0.03,
"learning_rate": 4.849381637138847e-05,
"loss": 2.8183,
"step": 9100
},
{
"epoch": 0.03,
"learning_rate": 4.8477264902942196e-05,
"loss": 2.8023,
"step": 9200
},
{
"epoch": 0.03,
"learning_rate": 4.8460713434495915e-05,
"loss": 2.8056,
"step": 9300
},
{
"epoch": 0.03,
"learning_rate": 4.844416196604963e-05,
"loss": 2.8341,
"step": 9400
},
{
"epoch": 0.03,
"learning_rate": 4.8427610497603346e-05,
"loss": 2.8251,
"step": 9500
},
{
"epoch": 0.03,
"learning_rate": 4.8411059029157065e-05,
"loss": 2.8124,
"step": 9600
},
{
"epoch": 0.03,
"learning_rate": 4.839450756071079e-05,
"loss": 2.8115,
"step": 9700
},
{
"epoch": 0.03,
"learning_rate": 4.837795609226451e-05,
"loss": 2.8074,
"step": 9800
},
{
"epoch": 0.03,
"learning_rate": 4.836140462381823e-05,
"loss": 2.8205,
"step": 9900
},
{
"epoch": 0.03,
"learning_rate": 4.834485315537194e-05,
"loss": 2.8196,
"step": 10000
},
{
"epoch": 0.03,
"learning_rate": 4.832830168692567e-05,
"loss": 2.8264,
"step": 10100
},
{
"epoch": 0.03,
"learning_rate": 4.8311750218479386e-05,
"loss": 2.8211,
"step": 10200
},
{
"epoch": 0.03,
"learning_rate": 4.8295198750033105e-05,
"loss": 2.8174,
"step": 10300
},
{
"epoch": 0.03,
"learning_rate": 4.8278647281586824e-05,
"loss": 2.8217,
"step": 10400
},
{
"epoch": 0.03,
"learning_rate": 4.826209581314054e-05,
"loss": 2.805,
"step": 10500
},
{
"epoch": 0.04,
"learning_rate": 4.824554434469427e-05,
"loss": 2.8154,
"step": 10600
},
{
"epoch": 0.04,
"learning_rate": 4.822899287624798e-05,
"loss": 2.8108,
"step": 10700
},
{
"epoch": 0.04,
"learning_rate": 4.82124414078017e-05,
"loss": 2.8132,
"step": 10800
},
{
"epoch": 0.04,
"learning_rate": 4.819588993935542e-05,
"loss": 2.812,
"step": 10900
},
{
"epoch": 0.04,
"learning_rate": 4.817933847090914e-05,
"loss": 2.8159,
"step": 11000
},
{
"epoch": 0.04,
"learning_rate": 4.8162787002462864e-05,
"loss": 2.808,
"step": 11100
},
{
"epoch": 0.04,
"learning_rate": 4.814623553401658e-05,
"loss": 2.8049,
"step": 11200
},
{
"epoch": 0.04,
"learning_rate": 4.81296840655703e-05,
"loss": 2.8226,
"step": 11300
},
{
"epoch": 0.04,
"learning_rate": 4.8113132597124014e-05,
"loss": 2.8259,
"step": 11400
},
{
"epoch": 0.04,
"learning_rate": 4.809658112867774e-05,
"loss": 2.8072,
"step": 11500
},
{
"epoch": 0.04,
"learning_rate": 4.808002966023146e-05,
"loss": 2.8118,
"step": 11600
},
{
"epoch": 0.04,
"learning_rate": 4.806347819178518e-05,
"loss": 2.8244,
"step": 11700
},
{
"epoch": 0.04,
"learning_rate": 4.8046926723338897e-05,
"loss": 2.8146,
"step": 11800
},
{
"epoch": 0.04,
"learning_rate": 4.8030375254892616e-05,
"loss": 2.8068,
"step": 11900
},
{
"epoch": 0.04,
"learning_rate": 4.801382378644634e-05,
"loss": 2.8115,
"step": 12000
},
{
"epoch": 0.04,
"learning_rate": 4.7997272318000053e-05,
"loss": 2.7999,
"step": 12100
},
{
"epoch": 0.04,
"learning_rate": 4.798072084955377e-05,
"loss": 2.8037,
"step": 12200
},
{
"epoch": 0.04,
"learning_rate": 4.796416938110749e-05,
"loss": 2.8246,
"step": 12300
},
{
"epoch": 0.04,
"learning_rate": 4.794761791266121e-05,
"loss": 2.8262,
"step": 12400
},
{
"epoch": 0.04,
"learning_rate": 4.7931066444214936e-05,
"loss": 2.8186,
"step": 12500
},
{
"epoch": 0.04,
"learning_rate": 4.7914514975768655e-05,
"loss": 2.8099,
"step": 12600
},
{
"epoch": 0.04,
"learning_rate": 4.7897963507322374e-05,
"loss": 2.8106,
"step": 12700
},
{
"epoch": 0.04,
"learning_rate": 4.7881412038876086e-05,
"loss": 2.8141,
"step": 12800
},
{
"epoch": 0.04,
"learning_rate": 4.786486057042981e-05,
"loss": 2.8181,
"step": 12900
},
{
"epoch": 0.04,
"learning_rate": 4.784830910198353e-05,
"loss": 2.8083,
"step": 13000
},
{
"epoch": 0.04,
"learning_rate": 4.783175763353725e-05,
"loss": 2.8187,
"step": 13100
},
{
"epoch": 0.04,
"learning_rate": 4.781520616509097e-05,
"loss": 2.8075,
"step": 13200
},
{
"epoch": 0.04,
"learning_rate": 4.779865469664469e-05,
"loss": 2.8215,
"step": 13300
},
{
"epoch": 0.04,
"learning_rate": 4.7782103228198414e-05,
"loss": 2.8186,
"step": 13400
},
{
"epoch": 0.04,
"learning_rate": 4.7765551759752126e-05,
"loss": 2.8136,
"step": 13500
},
{
"epoch": 0.05,
"learning_rate": 4.7749000291305845e-05,
"loss": 2.8021,
"step": 13600
},
{
"epoch": 0.05,
"learning_rate": 4.7732448822859564e-05,
"loss": 2.7984,
"step": 13700
},
{
"epoch": 0.05,
"learning_rate": 4.771589735441328e-05,
"loss": 2.829,
"step": 13800
},
{
"epoch": 0.05,
"learning_rate": 4.769934588596701e-05,
"loss": 2.82,
"step": 13900
},
{
"epoch": 0.05,
"learning_rate": 4.768279441752073e-05,
"loss": 2.8201,
"step": 14000
},
{
"epoch": 0.05,
"learning_rate": 4.766624294907445e-05,
"loss": 2.842,
"step": 14100
},
{
"epoch": 0.05,
"learning_rate": 4.764969148062816e-05,
"loss": 2.8121,
"step": 14200
},
{
"epoch": 0.05,
"learning_rate": 4.7633140012181885e-05,
"loss": 2.8175,
"step": 14300
},
{
"epoch": 0.05,
"learning_rate": 4.7616588543735604e-05,
"loss": 2.8192,
"step": 14400
},
{
"epoch": 0.05,
"learning_rate": 4.760003707528932e-05,
"loss": 2.8123,
"step": 14500
},
{
"epoch": 0.05,
"learning_rate": 4.758348560684304e-05,
"loss": 2.8076,
"step": 14600
},
{
"epoch": 0.05,
"learning_rate": 4.756693413839676e-05,
"loss": 2.8153,
"step": 14700
},
{
"epoch": 0.05,
"learning_rate": 4.755038266995048e-05,
"loss": 2.8077,
"step": 14800
},
{
"epoch": 0.05,
"learning_rate": 4.75338312015042e-05,
"loss": 2.8155,
"step": 14900
},
{
"epoch": 0.05,
"learning_rate": 4.751727973305792e-05,
"loss": 2.8092,
"step": 15000
},
{
"epoch": 0.05,
"learning_rate": 4.7500728264611637e-05,
"loss": 2.8259,
"step": 15100
},
{
"epoch": 0.05,
"learning_rate": 4.7484176796165355e-05,
"loss": 2.8017,
"step": 15200
},
{
"epoch": 0.05,
"learning_rate": 4.746762532771908e-05,
"loss": 2.818,
"step": 15300
},
{
"epoch": 0.05,
"learning_rate": 4.74510738592728e-05,
"loss": 2.8197,
"step": 15400
},
{
"epoch": 0.05,
"learning_rate": 4.743452239082652e-05,
"loss": 2.8164,
"step": 15500
},
{
"epoch": 0.05,
"learning_rate": 4.741797092238023e-05,
"loss": 2.8274,
"step": 15600
},
{
"epoch": 0.05,
"learning_rate": 4.740141945393396e-05,
"loss": 2.8307,
"step": 15700
},
{
"epoch": 0.05,
"learning_rate": 4.7384867985487676e-05,
"loss": 2.8148,
"step": 15800
},
{
"epoch": 0.05,
"learning_rate": 4.7368316517041395e-05,
"loss": 2.8272,
"step": 15900
},
{
"epoch": 0.05,
"learning_rate": 4.7351765048595114e-05,
"loss": 2.8161,
"step": 16000
},
{
"epoch": 0.05,
"learning_rate": 4.733521358014883e-05,
"loss": 2.8145,
"step": 16100
},
{
"epoch": 0.05,
"learning_rate": 4.731866211170255e-05,
"loss": 2.8019,
"step": 16200
},
{
"epoch": 0.05,
"learning_rate": 4.730211064325627e-05,
"loss": 2.8244,
"step": 16300
},
{
"epoch": 0.05,
"learning_rate": 4.728555917480999e-05,
"loss": 2.8194,
"step": 16400
},
{
"epoch": 0.05,
"learning_rate": 4.726900770636371e-05,
"loss": 2.8087,
"step": 16500
},
{
"epoch": 0.05,
"learning_rate": 4.725245623791743e-05,
"loss": 2.8109,
"step": 16600
},
{
"epoch": 0.06,
"learning_rate": 4.7235904769471154e-05,
"loss": 2.813,
"step": 16700
},
{
"epoch": 0.06,
"learning_rate": 4.721935330102487e-05,
"loss": 2.8144,
"step": 16800
},
{
"epoch": 0.06,
"learning_rate": 4.7202801832578585e-05,
"loss": 2.8075,
"step": 16900
},
{
"epoch": 0.06,
"learning_rate": 4.7186250364132304e-05,
"loss": 2.8134,
"step": 17000
},
{
"epoch": 0.06,
"learning_rate": 4.716969889568603e-05,
"loss": 2.7994,
"step": 17100
},
{
"epoch": 0.06,
"learning_rate": 4.715314742723975e-05,
"loss": 2.807,
"step": 17200
},
{
"epoch": 0.06,
"learning_rate": 4.713659595879347e-05,
"loss": 2.8123,
"step": 17300
},
{
"epoch": 0.06,
"learning_rate": 4.712004449034719e-05,
"loss": 2.8217,
"step": 17400
},
{
"epoch": 0.06,
"learning_rate": 4.7103493021900906e-05,
"loss": 2.8129,
"step": 17500
},
{
"epoch": 0.06,
"learning_rate": 4.7086941553454625e-05,
"loss": 2.7981,
"step": 17600
},
{
"epoch": 0.06,
"learning_rate": 4.7070390085008344e-05,
"loss": 2.8116,
"step": 17700
},
{
"epoch": 0.06,
"learning_rate": 4.705383861656206e-05,
"loss": 2.8084,
"step": 17800
},
{
"epoch": 0.06,
"learning_rate": 4.703728714811578e-05,
"loss": 2.8146,
"step": 17900
},
{
"epoch": 0.06,
"learning_rate": 4.70207356796695e-05,
"loss": 2.802,
"step": 18000
},
{
"epoch": 0.06,
"learning_rate": 4.7004184211223226e-05,
"loss": 2.8217,
"step": 18100
},
{
"epoch": 0.06,
"learning_rate": 4.6987632742776945e-05,
"loss": 2.8007,
"step": 18200
},
{
"epoch": 0.06,
"learning_rate": 4.697108127433066e-05,
"loss": 2.8053,
"step": 18300
},
{
"epoch": 0.06,
"learning_rate": 4.6954529805884376e-05,
"loss": 2.7995,
"step": 18400
},
{
"epoch": 0.06,
"learning_rate": 4.6937978337438095e-05,
"loss": 2.8113,
"step": 18500
},
{
"epoch": 0.06,
"learning_rate": 4.692142686899182e-05,
"loss": 2.822,
"step": 18600
},
{
"epoch": 0.06,
"learning_rate": 4.690487540054554e-05,
"loss": 2.8202,
"step": 18700
},
{
"epoch": 0.06,
"learning_rate": 4.688832393209926e-05,
"loss": 2.8018,
"step": 18800
},
{
"epoch": 0.06,
"learning_rate": 4.687177246365298e-05,
"loss": 2.8097,
"step": 18900
},
{
"epoch": 0.06,
"learning_rate": 4.68552209952067e-05,
"loss": 2.815,
"step": 19000
},
{
"epoch": 0.06,
"learning_rate": 4.6838669526760416e-05,
"loss": 2.8089,
"step": 19100
},
{
"epoch": 0.06,
"learning_rate": 4.6822118058314135e-05,
"loss": 2.8112,
"step": 19200
},
{
"epoch": 0.06,
"learning_rate": 4.6805566589867854e-05,
"loss": 2.8313,
"step": 19300
},
{
"epoch": 0.06,
"learning_rate": 4.678901512142157e-05,
"loss": 2.8209,
"step": 19400
},
{
"epoch": 0.06,
"learning_rate": 4.67724636529753e-05,
"loss": 2.8056,
"step": 19500
},
{
"epoch": 0.06,
"learning_rate": 4.675591218452902e-05,
"loss": 2.8137,
"step": 19600
},
{
"epoch": 0.07,
"learning_rate": 4.673936071608273e-05,
"loss": 2.8136,
"step": 19700
},
{
"epoch": 0.07,
"learning_rate": 4.672280924763645e-05,
"loss": 2.8201,
"step": 19800
},
{
"epoch": 0.07,
"learning_rate": 4.670625777919017e-05,
"loss": 2.8131,
"step": 19900
},
{
"epoch": 0.07,
"learning_rate": 4.6689706310743894e-05,
"loss": 2.8219,
"step": 20000
},
{
"epoch": 0.07,
"learning_rate": 4.667315484229761e-05,
"loss": 2.8149,
"step": 20100
},
{
"epoch": 0.07,
"learning_rate": 4.665660337385133e-05,
"loss": 2.8027,
"step": 20200
},
{
"epoch": 0.07,
"learning_rate": 4.664005190540505e-05,
"loss": 2.8364,
"step": 20300
},
{
"epoch": 0.07,
"learning_rate": 4.662350043695877e-05,
"loss": 2.8238,
"step": 20400
},
{
"epoch": 0.07,
"learning_rate": 4.660694896851249e-05,
"loss": 2.825,
"step": 20500
},
{
"epoch": 0.07,
"learning_rate": 4.659039750006621e-05,
"loss": 2.8069,
"step": 20600
},
{
"epoch": 0.07,
"learning_rate": 4.657384603161993e-05,
"loss": 2.8114,
"step": 20700
},
{
"epoch": 0.07,
"learning_rate": 4.6557294563173646e-05,
"loss": 2.8192,
"step": 20800
},
{
"epoch": 0.07,
"learning_rate": 4.654074309472737e-05,
"loss": 2.8049,
"step": 20900
},
{
"epoch": 0.07,
"learning_rate": 4.652419162628109e-05,
"loss": 2.803,
"step": 21000
},
{
"epoch": 0.07,
"learning_rate": 4.65076401578348e-05,
"loss": 2.8005,
"step": 21100
},
{
"epoch": 0.07,
"learning_rate": 4.649108868938852e-05,
"loss": 2.8187,
"step": 21200
},
{
"epoch": 0.07,
"learning_rate": 4.647453722094224e-05,
"loss": 2.8054,
"step": 21300
},
{
"epoch": 0.07,
"learning_rate": 4.6457985752495966e-05,
"loss": 2.8148,
"step": 21400
},
{
"epoch": 0.07,
"learning_rate": 4.6441434284049685e-05,
"loss": 2.8146,
"step": 21500
},
{
"epoch": 0.07,
"learning_rate": 4.6424882815603404e-05,
"loss": 2.8058,
"step": 21600
},
{
"epoch": 0.07,
"learning_rate": 4.640833134715712e-05,
"loss": 2.8261,
"step": 21700
},
{
"epoch": 0.07,
"learning_rate": 4.639177987871084e-05,
"loss": 2.8264,
"step": 21800
},
{
"epoch": 0.07,
"learning_rate": 4.637522841026456e-05,
"loss": 2.8053,
"step": 21900
},
{
"epoch": 0.07,
"learning_rate": 4.635867694181828e-05,
"loss": 2.8113,
"step": 22000
},
{
"epoch": 0.07,
"learning_rate": 4.6342125473372e-05,
"loss": 2.8248,
"step": 22100
},
{
"epoch": 0.07,
"learning_rate": 4.632557400492572e-05,
"loss": 2.8315,
"step": 22200
},
{
"epoch": 0.07,
"learning_rate": 4.6309022536479444e-05,
"loss": 2.8085,
"step": 22300
},
{
"epoch": 0.07,
"learning_rate": 4.629247106803316e-05,
"loss": 2.8159,
"step": 22400
},
{
"epoch": 0.07,
"learning_rate": 4.6275919599586875e-05,
"loss": 2.8176,
"step": 22500
},
{
"epoch": 0.07,
"learning_rate": 4.6259368131140594e-05,
"loss": 2.7987,
"step": 22600
},
{
"epoch": 0.08,
"learning_rate": 4.624281666269431e-05,
"loss": 2.8256,
"step": 22700
},
{
"epoch": 0.08,
"learning_rate": 4.622626519424804e-05,
"loss": 2.8213,
"step": 22800
},
{
"epoch": 0.08,
"learning_rate": 4.620971372580176e-05,
"loss": 2.8096,
"step": 22900
},
{
"epoch": 0.08,
"learning_rate": 4.619316225735548e-05,
"loss": 2.8044,
"step": 23000
},
{
"epoch": 0.08,
"learning_rate": 4.6176610788909196e-05,
"loss": 2.8182,
"step": 23100
},
{
"epoch": 0.08,
"learning_rate": 4.6160059320462915e-05,
"loss": 2.8114,
"step": 23200
},
{
"epoch": 0.08,
"learning_rate": 4.6143507852016634e-05,
"loss": 2.8297,
"step": 23300
},
{
"epoch": 0.08,
"learning_rate": 4.612695638357035e-05,
"loss": 2.8087,
"step": 23400
},
{
"epoch": 0.08,
"learning_rate": 4.611040491512407e-05,
"loss": 2.8071,
"step": 23500
},
{
"epoch": 0.08,
"learning_rate": 4.609385344667779e-05,
"loss": 2.8116,
"step": 23600
},
{
"epoch": 0.08,
"learning_rate": 4.607730197823151e-05,
"loss": 2.8134,
"step": 23700
},
{
"epoch": 0.08,
"learning_rate": 4.606075050978523e-05,
"loss": 2.8133,
"step": 23800
},
{
"epoch": 0.08,
"learning_rate": 4.604419904133895e-05,
"loss": 2.8104,
"step": 23900
},
{
"epoch": 0.08,
"learning_rate": 4.6027647572892667e-05,
"loss": 2.8233,
"step": 24000
},
{
"epoch": 0.08,
"learning_rate": 4.6011096104446386e-05,
"loss": 2.8129,
"step": 24100
},
{
"epoch": 0.08,
"learning_rate": 4.599454463600011e-05,
"loss": 2.8178,
"step": 24200
},
{
"epoch": 0.08,
"learning_rate": 4.597799316755383e-05,
"loss": 2.7981,
"step": 24300
},
{
"epoch": 0.08,
"learning_rate": 4.596144169910755e-05,
"loss": 2.8125,
"step": 24400
},
{
"epoch": 0.08,
"learning_rate": 4.594489023066126e-05,
"loss": 2.8201,
"step": 24500
},
{
"epoch": 0.08,
"learning_rate": 4.592833876221499e-05,
"loss": 2.804,
"step": 24600
},
{
"epoch": 0.08,
"learning_rate": 4.5911787293768706e-05,
"loss": 2.8178,
"step": 24700
},
{
"epoch": 0.08,
"learning_rate": 4.5895235825322425e-05,
"loss": 2.8102,
"step": 24800
},
{
"epoch": 0.08,
"learning_rate": 4.5878684356876144e-05,
"loss": 2.811,
"step": 24900
},
{
"epoch": 0.08,
"learning_rate": 4.586213288842986e-05,
"loss": 2.8142,
"step": 25000
},
{
"epoch": 0.08,
"learning_rate": 4.584558141998358e-05,
"loss": 2.8072,
"step": 25100
},
{
"epoch": 0.08,
"learning_rate": 4.58290299515373e-05,
"loss": 2.8098,
"step": 25200
},
{
"epoch": 0.08,
"learning_rate": 4.581247848309102e-05,
"loss": 2.8195,
"step": 25300
},
{
"epoch": 0.08,
"learning_rate": 4.579592701464474e-05,
"loss": 2.8112,
"step": 25400
},
{
"epoch": 0.08,
"learning_rate": 4.577937554619846e-05,
"loss": 2.822,
"step": 25500
},
{
"epoch": 0.08,
"learning_rate": 4.5762824077752184e-05,
"loss": 2.8091,
"step": 25600
},
{
"epoch": 0.09,
"learning_rate": 4.57462726093059e-05,
"loss": 2.8281,
"step": 25700
},
{
"epoch": 0.09,
"learning_rate": 4.572972114085962e-05,
"loss": 2.8093,
"step": 25800
},
{
"epoch": 0.09,
"learning_rate": 4.5713169672413334e-05,
"loss": 2.8143,
"step": 25900
},
{
"epoch": 0.09,
"learning_rate": 4.569661820396706e-05,
"loss": 2.8002,
"step": 26000
},
{
"epoch": 0.09,
"learning_rate": 4.568006673552078e-05,
"loss": 2.8091,
"step": 26100
},
{
"epoch": 0.09,
"learning_rate": 4.56635152670745e-05,
"loss": 2.8114,
"step": 26200
},
{
"epoch": 0.09,
"learning_rate": 4.564696379862822e-05,
"loss": 2.7986,
"step": 26300
},
{
"epoch": 0.09,
"learning_rate": 4.5630412330181936e-05,
"loss": 2.8263,
"step": 26400
},
{
"epoch": 0.09,
"learning_rate": 4.5613860861735655e-05,
"loss": 2.7956,
"step": 26500
},
{
"epoch": 0.09,
"learning_rate": 4.5597309393289374e-05,
"loss": 2.8136,
"step": 26600
},
{
"epoch": 0.09,
"learning_rate": 4.558075792484309e-05,
"loss": 2.8219,
"step": 26700
},
{
"epoch": 0.09,
"learning_rate": 4.556420645639681e-05,
"loss": 2.809,
"step": 26800
},
{
"epoch": 0.09,
"learning_rate": 4.554765498795053e-05,
"loss": 2.806,
"step": 26900
},
{
"epoch": 0.09,
"learning_rate": 4.5531103519504256e-05,
"loss": 2.8139,
"step": 27000
},
{
"epoch": 0.09,
"learning_rate": 4.5514552051057975e-05,
"loss": 2.8167,
"step": 27100
},
{
"epoch": 0.09,
"learning_rate": 4.5498000582611694e-05,
"loss": 2.8023,
"step": 27200
},
{
"epoch": 0.09,
"learning_rate": 4.5481449114165407e-05,
"loss": 2.7996,
"step": 27300
},
{
"epoch": 0.09,
"learning_rate": 4.5464897645719126e-05,
"loss": 2.8053,
"step": 27400
},
{
"epoch": 0.09,
"learning_rate": 4.544834617727285e-05,
"loss": 2.8116,
"step": 27500
},
{
"epoch": 0.09,
"learning_rate": 4.543179470882657e-05,
"loss": 2.8057,
"step": 27600
},
{
"epoch": 0.09,
"learning_rate": 4.541524324038029e-05,
"loss": 2.8028,
"step": 27700
},
{
"epoch": 0.09,
"learning_rate": 4.539869177193401e-05,
"loss": 2.8114,
"step": 27800
},
{
"epoch": 0.09,
"learning_rate": 4.538214030348773e-05,
"loss": 2.8049,
"step": 27900
},
{
"epoch": 0.09,
"learning_rate": 4.5365588835041446e-05,
"loss": 2.8253,
"step": 28000
},
{
"epoch": 0.09,
"learning_rate": 4.5349037366595165e-05,
"loss": 2.8078,
"step": 28100
},
{
"epoch": 0.09,
"learning_rate": 4.5332485898148884e-05,
"loss": 2.8083,
"step": 28200
},
{
"epoch": 0.09,
"learning_rate": 4.53159344297026e-05,
"loss": 2.8228,
"step": 28300
},
{
"epoch": 0.09,
"learning_rate": 4.529938296125633e-05,
"loss": 2.8095,
"step": 28400
},
{
"epoch": 0.09,
"learning_rate": 4.528283149281005e-05,
"loss": 2.822,
"step": 28500
},
{
"epoch": 0.09,
"learning_rate": 4.526628002436377e-05,
"loss": 2.8086,
"step": 28600
},
{
"epoch": 0.1,
"learning_rate": 4.524972855591748e-05,
"loss": 2.7954,
"step": 28700
},
{
"epoch": 0.1,
"learning_rate": 4.52331770874712e-05,
"loss": 2.8052,
"step": 28800
},
{
"epoch": 0.1,
"learning_rate": 4.5216625619024924e-05,
"loss": 2.8118,
"step": 28900
},
{
"epoch": 0.1,
"learning_rate": 4.520007415057864e-05,
"loss": 2.8199,
"step": 29000
},
{
"epoch": 0.1,
"learning_rate": 4.518352268213236e-05,
"loss": 2.8096,
"step": 29100
},
{
"epoch": 0.1,
"learning_rate": 4.516697121368608e-05,
"loss": 2.7978,
"step": 29200
},
{
"epoch": 0.1,
"learning_rate": 4.51504197452398e-05,
"loss": 2.8108,
"step": 29300
},
{
"epoch": 0.1,
"learning_rate": 4.513386827679352e-05,
"loss": 2.8116,
"step": 29400
},
{
"epoch": 0.1,
"learning_rate": 4.511731680834724e-05,
"loss": 2.8155,
"step": 29500
},
{
"epoch": 0.1,
"learning_rate": 4.510076533990096e-05,
"loss": 2.8031,
"step": 29600
},
{
"epoch": 0.1,
"learning_rate": 4.5084213871454676e-05,
"loss": 2.8047,
"step": 29700
},
{
"epoch": 0.1,
"learning_rate": 4.50676624030084e-05,
"loss": 2.8153,
"step": 29800
},
{
"epoch": 0.1,
"learning_rate": 4.505111093456212e-05,
"loss": 2.8112,
"step": 29900
},
{
"epoch": 0.1,
"learning_rate": 4.503455946611583e-05,
"loss": 2.8158,
"step": 30000
},
{
"epoch": 0.1,
"learning_rate": 4.501800799766955e-05,
"loss": 2.825,
"step": 30100
},
{
"epoch": 0.1,
"learning_rate": 4.500145652922327e-05,
"loss": 2.8182,
"step": 30200
},
{
"epoch": 0.1,
"learning_rate": 4.4984905060776996e-05,
"loss": 2.8059,
"step": 30300
},
{
"epoch": 0.1,
"learning_rate": 4.4968353592330715e-05,
"loss": 2.801,
"step": 30400
},
{
"epoch": 0.1,
"learning_rate": 4.4951802123884434e-05,
"loss": 2.8125,
"step": 30500
},
{
"epoch": 0.1,
"learning_rate": 4.493525065543815e-05,
"loss": 2.8173,
"step": 30600
},
{
"epoch": 0.1,
"learning_rate": 4.491869918699187e-05,
"loss": 2.8212,
"step": 30700
},
{
"epoch": 0.1,
"learning_rate": 4.490214771854559e-05,
"loss": 2.8168,
"step": 30800
},
{
"epoch": 0.1,
"learning_rate": 4.488559625009931e-05,
"loss": 2.8093,
"step": 30900
},
{
"epoch": 0.1,
"learning_rate": 4.486904478165303e-05,
"loss": 2.7983,
"step": 31000
},
{
"epoch": 0.1,
"learning_rate": 4.485249331320675e-05,
"loss": 2.8148,
"step": 31100
},
{
"epoch": 0.1,
"learning_rate": 4.4835941844760474e-05,
"loss": 2.8102,
"step": 31200
},
{
"epoch": 0.1,
"learning_rate": 4.481939037631419e-05,
"loss": 2.8075,
"step": 31300
},
{
"epoch": 0.1,
"learning_rate": 4.4802838907867905e-05,
"loss": 2.8192,
"step": 31400
},
{
"epoch": 0.1,
"learning_rate": 4.4786287439421624e-05,
"loss": 2.8144,
"step": 31500
},
{
"epoch": 0.1,
"learning_rate": 4.476973597097534e-05,
"loss": 2.828,
"step": 31600
},
{
"epoch": 0.1,
"learning_rate": 4.475318450252907e-05,
"loss": 2.8107,
"step": 31700
},
{
"epoch": 0.11,
"learning_rate": 4.473663303408279e-05,
"loss": 2.8072,
"step": 31800
},
{
"epoch": 0.11,
"learning_rate": 4.472008156563651e-05,
"loss": 2.8178,
"step": 31900
},
{
"epoch": 0.11,
"learning_rate": 4.4703530097190226e-05,
"loss": 2.82,
"step": 32000
},
{
"epoch": 0.11,
"learning_rate": 4.4686978628743945e-05,
"loss": 2.8096,
"step": 32100
},
{
"epoch": 0.11,
"learning_rate": 4.4670427160297664e-05,
"loss": 2.8131,
"step": 32200
},
{
"epoch": 0.11,
"learning_rate": 4.465387569185138e-05,
"loss": 2.8138,
"step": 32300
},
{
"epoch": 0.11,
"learning_rate": 4.46373242234051e-05,
"loss": 2.8009,
"step": 32400
},
{
"epoch": 0.11,
"learning_rate": 4.462077275495882e-05,
"loss": 2.828,
"step": 32500
},
{
"epoch": 0.11,
"learning_rate": 4.460422128651254e-05,
"loss": 2.8212,
"step": 32600
},
{
"epoch": 0.11,
"learning_rate": 4.4587669818066266e-05,
"loss": 2.81,
"step": 32700
},
{
"epoch": 0.11,
"learning_rate": 4.457111834961998e-05,
"loss": 2.819,
"step": 32800
},
{
"epoch": 0.11,
"learning_rate": 4.45545668811737e-05,
"loss": 2.8013,
"step": 32900
},
{
"epoch": 0.11,
"learning_rate": 4.4538015412727416e-05,
"loss": 2.8188,
"step": 33000
},
{
"epoch": 0.11,
"learning_rate": 4.452146394428114e-05,
"loss": 2.8181,
"step": 33100
},
{
"epoch": 0.11,
"learning_rate": 4.450491247583486e-05,
"loss": 2.8019,
"step": 33200
},
{
"epoch": 0.11,
"learning_rate": 4.448836100738858e-05,
"loss": 2.8019,
"step": 33300
},
{
"epoch": 0.11,
"learning_rate": 4.44718095389423e-05,
"loss": 2.8125,
"step": 33400
},
{
"epoch": 0.11,
"learning_rate": 4.445525807049602e-05,
"loss": 2.7891,
"step": 33500
},
{
"epoch": 0.11,
"learning_rate": 4.4438706602049736e-05,
"loss": 2.8158,
"step": 33600
},
{
"epoch": 0.11,
"learning_rate": 4.4422155133603455e-05,
"loss": 2.8035,
"step": 33700
},
{
"epoch": 0.11,
"learning_rate": 4.4405603665157174e-05,
"loss": 2.8154,
"step": 33800
},
{
"epoch": 0.11,
"learning_rate": 4.438905219671089e-05,
"loss": 2.8074,
"step": 33900
},
{
"epoch": 0.11,
"learning_rate": 4.437250072826461e-05,
"loss": 2.8071,
"step": 34000
},
{
"epoch": 0.11,
"learning_rate": 4.435594925981834e-05,
"loss": 2.8131,
"step": 34100
},
{
"epoch": 0.11,
"learning_rate": 4.433939779137205e-05,
"loss": 2.8091,
"step": 34200
},
{
"epoch": 0.11,
"learning_rate": 4.432284632292577e-05,
"loss": 2.8209,
"step": 34300
},
{
"epoch": 0.11,
"learning_rate": 4.430629485447949e-05,
"loss": 2.8109,
"step": 34400
},
{
"epoch": 0.11,
"learning_rate": 4.4289743386033214e-05,
"loss": 2.8085,
"step": 34500
},
{
"epoch": 0.11,
"learning_rate": 4.427319191758693e-05,
"loss": 2.8065,
"step": 34600
},
{
"epoch": 0.11,
"learning_rate": 4.425664044914065e-05,
"loss": 2.8186,
"step": 34700
},
{
"epoch": 0.12,
"learning_rate": 4.424008898069437e-05,
"loss": 2.8115,
"step": 34800
},
{
"epoch": 0.12,
"learning_rate": 4.422353751224809e-05,
"loss": 2.8165,
"step": 34900
},
{
"epoch": 0.12,
"learning_rate": 4.420698604380181e-05,
"loss": 2.8051,
"step": 35000
},
{
"epoch": 0.12,
"learning_rate": 4.419043457535553e-05,
"loss": 2.8182,
"step": 35100
},
{
"epoch": 0.12,
"learning_rate": 4.417388310690925e-05,
"loss": 2.829,
"step": 35200
},
{
"epoch": 0.12,
"learning_rate": 4.4157331638462966e-05,
"loss": 2.8209,
"step": 35300
},
{
"epoch": 0.12,
"learning_rate": 4.4140780170016685e-05,
"loss": 2.8115,
"step": 35400
},
{
"epoch": 0.12,
"learning_rate": 4.412422870157041e-05,
"loss": 2.8039,
"step": 35500
},
{
"epoch": 0.12,
"learning_rate": 4.410767723312412e-05,
"loss": 2.8209,
"step": 35600
},
{
"epoch": 0.12,
"learning_rate": 4.409112576467784e-05,
"loss": 2.8151,
"step": 35700
},
{
"epoch": 0.12,
"learning_rate": 4.407457429623156e-05,
"loss": 2.8072,
"step": 35800
},
{
"epoch": 0.12,
"learning_rate": 4.4058022827785287e-05,
"loss": 2.8079,
"step": 35900
},
{
"epoch": 0.12,
"learning_rate": 4.4041471359339005e-05,
"loss": 2.8155,
"step": 36000
},
{
"epoch": 0.12,
"learning_rate": 4.4024919890892724e-05,
"loss": 2.827,
"step": 36100
},
{
"epoch": 0.12,
"learning_rate": 4.4008368422446443e-05,
"loss": 2.8087,
"step": 36200
},
{
"epoch": 0.12,
"learning_rate": 4.3991816954000156e-05,
"loss": 2.8112,
"step": 36300
},
{
"epoch": 0.12,
"learning_rate": 4.397526548555388e-05,
"loss": 2.8066,
"step": 36400
},
{
"epoch": 0.12,
"learning_rate": 4.39587140171076e-05,
"loss": 2.7995,
"step": 36500
},
{
"epoch": 0.12,
"learning_rate": 4.394216254866132e-05,
"loss": 2.8124,
"step": 36600
},
{
"epoch": 0.12,
"learning_rate": 4.392561108021504e-05,
"loss": 2.8112,
"step": 36700
},
{
"epoch": 0.12,
"learning_rate": 4.390905961176876e-05,
"loss": 2.8205,
"step": 36800
},
{
"epoch": 0.12,
"learning_rate": 4.3892508143322476e-05,
"loss": 2.8132,
"step": 36900
},
{
"epoch": 0.12,
"learning_rate": 4.3875956674876195e-05,
"loss": 2.81,
"step": 37000
},
{
"epoch": 0.12,
"learning_rate": 4.3859405206429914e-05,
"loss": 2.8004,
"step": 37100
},
{
"epoch": 0.12,
"learning_rate": 4.384285373798363e-05,
"loss": 2.8055,
"step": 37200
},
{
"epoch": 0.12,
"learning_rate": 4.382630226953736e-05,
"loss": 2.8111,
"step": 37300
},
{
"epoch": 0.12,
"learning_rate": 4.380975080109108e-05,
"loss": 2.8189,
"step": 37400
},
{
"epoch": 0.12,
"learning_rate": 4.37931993326448e-05,
"loss": 2.8021,
"step": 37500
},
{
"epoch": 0.12,
"learning_rate": 4.377664786419851e-05,
"loss": 2.7961,
"step": 37600
},
{
"epoch": 0.12,
"learning_rate": 4.376009639575223e-05,
"loss": 2.8059,
"step": 37700
},
{
"epoch": 0.13,
"learning_rate": 4.3743544927305954e-05,
"loss": 2.8151,
"step": 37800
},
{
"epoch": 0.13,
"learning_rate": 4.372699345885967e-05,
"loss": 2.815,
"step": 37900
},
{
"epoch": 0.13,
"learning_rate": 4.371044199041339e-05,
"loss": 2.8215,
"step": 38000
},
{
"epoch": 0.13,
"learning_rate": 4.369389052196711e-05,
"loss": 2.8042,
"step": 38100
},
{
"epoch": 0.13,
"learning_rate": 4.367733905352083e-05,
"loss": 2.808,
"step": 38200
},
{
"epoch": 0.13,
"learning_rate": 4.366078758507455e-05,
"loss": 2.7963,
"step": 38300
},
{
"epoch": 0.13,
"learning_rate": 4.364423611662827e-05,
"loss": 2.8151,
"step": 38400
},
{
"epoch": 0.13,
"learning_rate": 4.362768464818199e-05,
"loss": 2.8098,
"step": 38500
},
{
"epoch": 0.13,
"learning_rate": 4.3611133179735706e-05,
"loss": 2.8169,
"step": 38600
},
{
"epoch": 0.13,
"learning_rate": 4.359458171128943e-05,
"loss": 2.8037,
"step": 38700
},
{
"epoch": 0.13,
"learning_rate": 4.357803024284315e-05,
"loss": 2.8016,
"step": 38800
},
{
"epoch": 0.13,
"learning_rate": 4.356147877439687e-05,
"loss": 2.802,
"step": 38900
},
{
"epoch": 0.13,
"learning_rate": 4.354492730595058e-05,
"loss": 2.8094,
"step": 39000
},
{
"epoch": 0.13,
"learning_rate": 4.35283758375043e-05,
"loss": 2.8136,
"step": 39100
},
{
"epoch": 0.13,
"learning_rate": 4.3511824369058026e-05,
"loss": 2.8157,
"step": 39200
},
{
"epoch": 0.13,
"learning_rate": 4.3495272900611745e-05,
"loss": 2.8079,
"step": 39300
},
{
"epoch": 0.13,
"learning_rate": 4.3478721432165464e-05,
"loss": 2.8003,
"step": 39400
},
{
"epoch": 0.13,
"learning_rate": 4.3462169963719183e-05,
"loss": 2.8123,
"step": 39500
},
{
"epoch": 0.13,
"learning_rate": 4.34456184952729e-05,
"loss": 2.8088,
"step": 39600
},
{
"epoch": 0.13,
"learning_rate": 4.342906702682662e-05,
"loss": 2.8015,
"step": 39700
},
{
"epoch": 0.13,
"learning_rate": 4.341251555838034e-05,
"loss": 2.8109,
"step": 39800
},
{
"epoch": 0.13,
"learning_rate": 4.339596408993406e-05,
"loss": 2.7914,
"step": 39900
},
{
"epoch": 0.13,
"learning_rate": 4.337941262148778e-05,
"loss": 2.8155,
"step": 40000
},
{
"epoch": 0.13,
"learning_rate": 4.3362861153041504e-05,
"loss": 2.8105,
"step": 40100
},
{
"epoch": 0.13,
"learning_rate": 4.334630968459522e-05,
"loss": 2.7998,
"step": 40200
},
{
"epoch": 0.13,
"learning_rate": 4.332975821614894e-05,
"loss": 2.8032,
"step": 40300
},
{
"epoch": 0.13,
"learning_rate": 4.3313206747702654e-05,
"loss": 2.8343,
"step": 40400
},
{
"epoch": 0.13,
"learning_rate": 4.329665527925637e-05,
"loss": 2.8116,
"step": 40500
},
{
"epoch": 0.13,
"learning_rate": 4.32801038108101e-05,
"loss": 2.8113,
"step": 40600
},
{
"epoch": 0.13,
"learning_rate": 4.326355234236382e-05,
"loss": 2.8066,
"step": 40700
},
{
"epoch": 0.14,
"learning_rate": 4.324700087391754e-05,
"loss": 2.8092,
"step": 40800
},
{
"epoch": 0.14,
"learning_rate": 4.3230449405471256e-05,
"loss": 2.8082,
"step": 40900
},
{
"epoch": 0.14,
"learning_rate": 4.3213897937024975e-05,
"loss": 2.8049,
"step": 41000
},
{
"epoch": 0.14,
"learning_rate": 4.3197346468578694e-05,
"loss": 2.8015,
"step": 41100
},
{
"epoch": 0.14,
"learning_rate": 4.318079500013241e-05,
"loss": 2.8148,
"step": 41200
},
{
"epoch": 0.14,
"learning_rate": 4.316424353168613e-05,
"loss": 2.8122,
"step": 41300
},
{
"epoch": 0.14,
"learning_rate": 4.314769206323985e-05,
"loss": 2.8091,
"step": 41400
},
{
"epoch": 0.14,
"learning_rate": 4.313114059479357e-05,
"loss": 2.8106,
"step": 41500
},
{
"epoch": 0.14,
"learning_rate": 4.3114589126347296e-05,
"loss": 2.8122,
"step": 41600
},
{
"epoch": 0.14,
"learning_rate": 4.3098037657901015e-05,
"loss": 2.8174,
"step": 41700
},
{
"epoch": 0.14,
"learning_rate": 4.308148618945473e-05,
"loss": 2.816,
"step": 41800
},
{
"epoch": 0.14,
"learning_rate": 4.3064934721008446e-05,
"loss": 2.8264,
"step": 41900
},
{
"epoch": 0.14,
"learning_rate": 4.304838325256217e-05,
"loss": 2.8113,
"step": 42000
},
{
"epoch": 0.14,
"learning_rate": 4.303183178411589e-05,
"loss": 2.7861,
"step": 42100
},
{
"epoch": 0.14,
"learning_rate": 4.301528031566961e-05,
"loss": 2.8134,
"step": 42200
},
{
"epoch": 0.14,
"learning_rate": 4.299872884722333e-05,
"loss": 2.8011,
"step": 42300
},
{
"epoch": 0.14,
"learning_rate": 4.298217737877705e-05,
"loss": 2.7928,
"step": 42400
},
{
"epoch": 0.14,
"learning_rate": 4.2965625910330766e-05,
"loss": 2.7977,
"step": 42500
},
{
"epoch": 0.14,
"learning_rate": 4.2949074441884485e-05,
"loss": 2.8288,
"step": 42600
},
{
"epoch": 0.14,
"learning_rate": 4.2932522973438204e-05,
"loss": 2.7978,
"step": 42700
},
{
"epoch": 0.14,
"learning_rate": 4.2915971504991923e-05,
"loss": 2.8003,
"step": 42800
},
{
"epoch": 0.14,
"learning_rate": 4.289942003654564e-05,
"loss": 2.7981,
"step": 42900
},
{
"epoch": 0.14,
"learning_rate": 4.288286856809937e-05,
"loss": 2.7947,
"step": 43000
},
{
"epoch": 0.14,
"learning_rate": 4.286631709965308e-05,
"loss": 2.8156,
"step": 43100
},
{
"epoch": 0.14,
"learning_rate": 4.28497656312068e-05,
"loss": 2.8202,
"step": 43200
},
{
"epoch": 0.14,
"learning_rate": 4.283321416276052e-05,
"loss": 2.8246,
"step": 43300
},
{
"epoch": 0.14,
"learning_rate": 4.2816662694314244e-05,
"loss": 2.8088,
"step": 43400
},
{
"epoch": 0.14,
"learning_rate": 4.280011122586796e-05,
"loss": 2.8038,
"step": 43500
},
{
"epoch": 0.14,
"learning_rate": 4.278355975742168e-05,
"loss": 2.8082,
"step": 43600
},
{
"epoch": 0.14,
"learning_rate": 4.27670082889754e-05,
"loss": 2.802,
"step": 43700
},
{
"epoch": 0.14,
"learning_rate": 4.275045682052912e-05,
"loss": 2.8142,
"step": 43800
},
{
"epoch": 0.15,
"learning_rate": 4.273390535208284e-05,
"loss": 2.8056,
"step": 43900
},
{
"epoch": 0.15,
"learning_rate": 4.271735388363656e-05,
"loss": 2.8144,
"step": 44000
},
{
"epoch": 0.15,
"learning_rate": 4.270080241519028e-05,
"loss": 2.7998,
"step": 44100
},
{
"epoch": 0.15,
"learning_rate": 4.2684250946743996e-05,
"loss": 2.8136,
"step": 44200
},
{
"epoch": 0.15,
"learning_rate": 4.2667699478297715e-05,
"loss": 2.8057,
"step": 44300
},
{
"epoch": 0.15,
"learning_rate": 4.265114800985144e-05,
"loss": 2.8254,
"step": 44400
},
{
"epoch": 0.15,
"learning_rate": 4.263459654140515e-05,
"loss": 2.7837,
"step": 44500
},
{
"epoch": 0.15,
"learning_rate": 4.261804507295887e-05,
"loss": 2.8068,
"step": 44600
},
{
"epoch": 0.15,
"learning_rate": 4.260149360451259e-05,
"loss": 2.8084,
"step": 44700
},
{
"epoch": 0.15,
"learning_rate": 4.2584942136066317e-05,
"loss": 2.828,
"step": 44800
},
{
"epoch": 0.15,
"learning_rate": 4.2568390667620036e-05,
"loss": 2.8163,
"step": 44900
},
{
"epoch": 0.15,
"learning_rate": 4.2551839199173755e-05,
"loss": 2.8021,
"step": 45000
},
{
"epoch": 0.15,
"learning_rate": 4.2535287730727474e-05,
"loss": 2.81,
"step": 45100
},
{
"epoch": 0.15,
"learning_rate": 4.2518736262281186e-05,
"loss": 2.8128,
"step": 45200
},
{
"epoch": 0.15,
"learning_rate": 4.250218479383491e-05,
"loss": 2.808,
"step": 45300
},
{
"epoch": 0.15,
"learning_rate": 4.248563332538863e-05,
"loss": 2.8042,
"step": 45400
},
{
"epoch": 0.15,
"learning_rate": 4.246908185694235e-05,
"loss": 2.8202,
"step": 45500
},
{
"epoch": 0.15,
"learning_rate": 4.245253038849607e-05,
"loss": 2.813,
"step": 45600
},
{
"epoch": 0.15,
"learning_rate": 4.243597892004979e-05,
"loss": 2.8098,
"step": 45700
},
{
"epoch": 0.15,
"learning_rate": 4.241942745160351e-05,
"loss": 2.8086,
"step": 45800
},
{
"epoch": 0.15,
"learning_rate": 4.2402875983157225e-05,
"loss": 2.8128,
"step": 45900
},
{
"epoch": 0.15,
"learning_rate": 4.2386324514710944e-05,
"loss": 2.7975,
"step": 46000
},
{
"epoch": 0.15,
"learning_rate": 4.236977304626466e-05,
"loss": 2.8128,
"step": 46100
},
{
"epoch": 0.15,
"learning_rate": 4.235322157781839e-05,
"loss": 2.8199,
"step": 46200
},
{
"epoch": 0.15,
"learning_rate": 4.233667010937211e-05,
"loss": 2.8186,
"step": 46300
},
{
"epoch": 0.15,
"learning_rate": 4.232011864092583e-05,
"loss": 2.8012,
"step": 46400
},
{
"epoch": 0.15,
"learning_rate": 4.2303567172479546e-05,
"loss": 2.809,
"step": 46500
},
{
"epoch": 0.15,
"learning_rate": 4.228701570403326e-05,
"loss": 2.8052,
"step": 46600
},
{
"epoch": 0.15,
"learning_rate": 4.2270464235586984e-05,
"loss": 2.8122,
"step": 46700
},
{
"epoch": 0.15,
"learning_rate": 4.22539127671407e-05,
"loss": 2.8034,
"step": 46800
},
{
"epoch": 0.16,
"learning_rate": 4.223736129869442e-05,
"loss": 2.8148,
"step": 46900
},
{
"epoch": 0.16,
"learning_rate": 4.222080983024814e-05,
"loss": 2.8127,
"step": 47000
},
{
"epoch": 0.16,
"learning_rate": 4.220425836180186e-05,
"loss": 2.8045,
"step": 47100
},
{
"epoch": 0.16,
"learning_rate": 4.2187706893355586e-05,
"loss": 2.7989,
"step": 47200
},
{
"epoch": 0.16,
"learning_rate": 4.21711554249093e-05,
"loss": 2.8028,
"step": 47300
},
{
"epoch": 0.16,
"learning_rate": 4.215460395646302e-05,
"loss": 2.7974,
"step": 47400
},
{
"epoch": 0.16,
"learning_rate": 4.2138052488016736e-05,
"loss": 2.8092,
"step": 47500
},
{
"epoch": 0.16,
"learning_rate": 4.212150101957046e-05,
"loss": 2.7943,
"step": 47600
},
{
"epoch": 0.16,
"learning_rate": 4.210494955112418e-05,
"loss": 2.804,
"step": 47700
},
{
"epoch": 0.16,
"learning_rate": 4.20883980826779e-05,
"loss": 2.8135,
"step": 47800
},
{
"epoch": 0.16,
"learning_rate": 4.207184661423162e-05,
"loss": 2.7891,
"step": 47900
},
{
"epoch": 0.16,
"learning_rate": 4.205529514578533e-05,
"loss": 2.8112,
"step": 48000
},
{
"epoch": 0.16,
"learning_rate": 4.2038743677339057e-05,
"loss": 2.8007,
"step": 48100
},
{
"epoch": 0.16,
"learning_rate": 4.2022192208892776e-05,
"loss": 2.8103,
"step": 48200
},
{
"epoch": 0.16,
"learning_rate": 4.2005640740446495e-05,
"loss": 2.8088,
"step": 48300
},
{
"epoch": 0.16,
"learning_rate": 4.1989089272000214e-05,
"loss": 2.8118,
"step": 48400
},
{
"epoch": 0.16,
"learning_rate": 4.197253780355393e-05,
"loss": 2.8217,
"step": 48500
},
{
"epoch": 0.16,
"learning_rate": 4.195598633510766e-05,
"loss": 2.8115,
"step": 48600
},
{
"epoch": 0.16,
"learning_rate": 4.193943486666137e-05,
"loss": 2.8141,
"step": 48700
},
{
"epoch": 0.16,
"learning_rate": 4.192288339821509e-05,
"loss": 2.8035,
"step": 48800
},
{
"epoch": 0.16,
"learning_rate": 4.190633192976881e-05,
"loss": 2.8094,
"step": 48900
},
{
"epoch": 0.16,
"learning_rate": 4.1889780461322534e-05,
"loss": 2.7941,
"step": 49000
},
{
"epoch": 0.16,
"learning_rate": 4.187322899287625e-05,
"loss": 2.7959,
"step": 49100
},
{
"epoch": 0.16,
"learning_rate": 4.185667752442997e-05,
"loss": 2.8021,
"step": 49200
},
{
"epoch": 0.16,
"learning_rate": 4.184012605598369e-05,
"loss": 2.8077,
"step": 49300
},
{
"epoch": 0.16,
"learning_rate": 4.18235745875374e-05,
"loss": 2.8029,
"step": 49400
},
{
"epoch": 0.16,
"learning_rate": 4.180702311909113e-05,
"loss": 2.8087,
"step": 49500
},
{
"epoch": 0.16,
"learning_rate": 4.179047165064485e-05,
"loss": 2.8156,
"step": 49600
},
{
"epoch": 0.16,
"learning_rate": 4.177392018219857e-05,
"loss": 2.8078,
"step": 49700
},
{
"epoch": 0.16,
"learning_rate": 4.1757368713752286e-05,
"loss": 2.7915,
"step": 49800
},
{
"epoch": 0.17,
"learning_rate": 4.1740817245306005e-05,
"loss": 2.805,
"step": 49900
},
{
"epoch": 0.17,
"learning_rate": 4.1724265776859724e-05,
"loss": 2.8067,
"step": 50000
},
{
"epoch": 0.17,
"learning_rate": 4.170771430841344e-05,
"loss": 2.816,
"step": 50100
},
{
"epoch": 0.17,
"learning_rate": 4.169116283996716e-05,
"loss": 2.8095,
"step": 50200
},
{
"epoch": 0.17,
"learning_rate": 4.167461137152088e-05,
"loss": 2.8193,
"step": 50300
},
{
"epoch": 0.17,
"learning_rate": 4.16580599030746e-05,
"loss": 2.8174,
"step": 50400
},
{
"epoch": 0.17,
"learning_rate": 4.1641508434628326e-05,
"loss": 2.8053,
"step": 50500
},
{
"epoch": 0.17,
"learning_rate": 4.1624956966182045e-05,
"loss": 2.8069,
"step": 50600
},
{
"epoch": 0.17,
"learning_rate": 4.160840549773576e-05,
"loss": 2.8127,
"step": 50700
},
{
"epoch": 0.17,
"learning_rate": 4.1591854029289476e-05,
"loss": 2.819,
"step": 50800
},
{
"epoch": 0.17,
"learning_rate": 4.15753025608432e-05,
"loss": 2.8113,
"step": 50900
},
{
"epoch": 0.17,
"learning_rate": 4.155875109239692e-05,
"loss": 2.7989,
"step": 51000
},
{
"epoch": 0.17,
"learning_rate": 4.154219962395064e-05,
"loss": 2.816,
"step": 51100
},
{
"epoch": 0.17,
"learning_rate": 4.152564815550436e-05,
"loss": 2.7987,
"step": 51200
},
{
"epoch": 0.17,
"learning_rate": 4.150909668705808e-05,
"loss": 2.8008,
"step": 51300
},
{
"epoch": 0.17,
"learning_rate": 4.1492545218611797e-05,
"loss": 2.7955,
"step": 51400
},
{
"epoch": 0.17,
"learning_rate": 4.1475993750165516e-05,
"loss": 2.8359,
"step": 51500
},
{
"epoch": 0.17,
"learning_rate": 4.1459442281719235e-05,
"loss": 2.7892,
"step": 51600
},
{
"epoch": 0.17,
"learning_rate": 4.1442890813272954e-05,
"loss": 2.8003,
"step": 51700
},
{
"epoch": 0.17,
"learning_rate": 4.142633934482667e-05,
"loss": 2.8014,
"step": 51800
},
{
"epoch": 0.17,
"learning_rate": 4.14097878763804e-05,
"loss": 2.8021,
"step": 51900
},
{
"epoch": 0.17,
"learning_rate": 4.139323640793412e-05,
"loss": 2.8198,
"step": 52000
},
{
"epoch": 0.17,
"learning_rate": 4.137668493948783e-05,
"loss": 2.8041,
"step": 52100
},
{
"epoch": 0.17,
"learning_rate": 4.136013347104155e-05,
"loss": 2.8143,
"step": 52200
},
{
"epoch": 0.17,
"learning_rate": 4.1343582002595274e-05,
"loss": 2.8188,
"step": 52300
},
{
"epoch": 0.17,
"learning_rate": 4.132703053414899e-05,
"loss": 2.7977,
"step": 52400
},
{
"epoch": 0.17,
"learning_rate": 4.131047906570271e-05,
"loss": 2.8216,
"step": 52500
},
{
"epoch": 0.17,
"learning_rate": 4.129392759725643e-05,
"loss": 2.7988,
"step": 52600
},
{
"epoch": 0.17,
"learning_rate": 4.127737612881015e-05,
"loss": 2.8149,
"step": 52700
},
{
"epoch": 0.17,
"learning_rate": 4.126082466036387e-05,
"loss": 2.7965,
"step": 52800
},
{
"epoch": 0.18,
"learning_rate": 4.124427319191759e-05,
"loss": 2.8059,
"step": 52900
},
{
"epoch": 0.18,
"learning_rate": 4.122772172347131e-05,
"loss": 2.8075,
"step": 53000
},
{
"epoch": 0.18,
"learning_rate": 4.1211170255025026e-05,
"loss": 2.8086,
"step": 53100
},
{
"epoch": 0.18,
"learning_rate": 4.1194618786578745e-05,
"loss": 2.8161,
"step": 53200
},
{
"epoch": 0.18,
"learning_rate": 4.117806731813247e-05,
"loss": 2.8058,
"step": 53300
},
{
"epoch": 0.18,
"learning_rate": 4.116151584968619e-05,
"loss": 2.8104,
"step": 53400
},
{
"epoch": 0.18,
"learning_rate": 4.11449643812399e-05,
"loss": 2.8105,
"step": 53500
},
{
"epoch": 0.18,
"learning_rate": 4.112841291279362e-05,
"loss": 2.8044,
"step": 53600
},
{
"epoch": 0.18,
"learning_rate": 4.111186144434735e-05,
"loss": 2.791,
"step": 53700
},
{
"epoch": 0.18,
"learning_rate": 4.1095309975901066e-05,
"loss": 2.7971,
"step": 53800
},
{
"epoch": 0.18,
"learning_rate": 4.1078758507454785e-05,
"loss": 2.8085,
"step": 53900
},
{
"epoch": 0.18,
"learning_rate": 4.1062207039008504e-05,
"loss": 2.8043,
"step": 54000
},
{
"epoch": 0.18,
"learning_rate": 4.104565557056222e-05,
"loss": 2.8138,
"step": 54100
},
{
"epoch": 0.18,
"learning_rate": 4.102910410211594e-05,
"loss": 2.8023,
"step": 54200
},
{
"epoch": 0.18,
"learning_rate": 4.101255263366966e-05,
"loss": 2.8177,
"step": 54300
},
{
"epoch": 0.18,
"learning_rate": 4.099600116522338e-05,
"loss": 2.8152,
"step": 54400
},
{
"epoch": 0.18,
"learning_rate": 4.09794496967771e-05,
"loss": 2.813,
"step": 54500
},
{
"epoch": 0.18,
"learning_rate": 4.096289822833082e-05,
"loss": 2.7945,
"step": 54600
},
{
"epoch": 0.18,
"learning_rate": 4.094634675988454e-05,
"loss": 2.8041,
"step": 54700
},
{
"epoch": 0.18,
"learning_rate": 4.092979529143826e-05,
"loss": 2.8123,
"step": 54800
},
{
"epoch": 0.18,
"learning_rate": 4.0913243822991974e-05,
"loss": 2.8047,
"step": 54900
},
{
"epoch": 0.18,
"learning_rate": 4.0896692354545693e-05,
"loss": 2.8108,
"step": 55000
},
{
"epoch": 0.18,
"learning_rate": 4.088014088609942e-05,
"loss": 2.8052,
"step": 55100
},
{
"epoch": 0.18,
"learning_rate": 4.086358941765314e-05,
"loss": 2.814,
"step": 55200
},
{
"epoch": 0.18,
"learning_rate": 4.084703794920686e-05,
"loss": 2.8038,
"step": 55300
},
{
"epoch": 0.18,
"learning_rate": 4.0830486480760576e-05,
"loss": 2.8004,
"step": 55400
},
{
"epoch": 0.18,
"learning_rate": 4.0813935012314295e-05,
"loss": 2.8046,
"step": 55500
},
{
"epoch": 0.18,
"learning_rate": 4.0797383543868014e-05,
"loss": 2.8175,
"step": 55600
},
{
"epoch": 0.18,
"learning_rate": 4.078083207542173e-05,
"loss": 2.8085,
"step": 55700
},
{
"epoch": 0.18,
"learning_rate": 4.076428060697545e-05,
"loss": 2.8076,
"step": 55800
},
{
"epoch": 0.19,
"learning_rate": 4.074772913852917e-05,
"loss": 2.8047,
"step": 55900
},
{
"epoch": 0.19,
"learning_rate": 4.073117767008289e-05,
"loss": 2.8232,
"step": 56000
},
{
"epoch": 0.19,
"learning_rate": 4.0714626201636616e-05,
"loss": 2.8034,
"step": 56100
},
{
"epoch": 0.19,
"learning_rate": 4.0698074733190335e-05,
"loss": 2.81,
"step": 56200
},
{
"epoch": 0.19,
"learning_rate": 4.068152326474405e-05,
"loss": 2.8006,
"step": 56300
},
{
"epoch": 0.19,
"learning_rate": 4.0664971796297766e-05,
"loss": 2.8121,
"step": 56400
},
{
"epoch": 0.19,
"learning_rate": 4.064842032785149e-05,
"loss": 2.8019,
"step": 56500
},
{
"epoch": 0.19,
"learning_rate": 4.063186885940521e-05,
"loss": 2.7927,
"step": 56600
},
{
"epoch": 0.19,
"learning_rate": 4.061531739095893e-05,
"loss": 2.8002,
"step": 56700
},
{
"epoch": 0.19,
"learning_rate": 4.059876592251265e-05,
"loss": 2.8026,
"step": 56800
},
{
"epoch": 0.19,
"learning_rate": 4.058221445406636e-05,
"loss": 2.8267,
"step": 56900
},
{
"epoch": 0.19,
"learning_rate": 4.056566298562009e-05,
"loss": 2.7956,
"step": 57000
},
{
"epoch": 0.19,
"learning_rate": 4.0549111517173806e-05,
"loss": 2.7924,
"step": 57100
},
{
"epoch": 0.19,
"learning_rate": 4.0532560048727525e-05,
"loss": 2.8085,
"step": 57200
},
{
"epoch": 0.19,
"learning_rate": 4.0516008580281244e-05,
"loss": 2.8003,
"step": 57300
},
{
"epoch": 0.19,
"learning_rate": 4.049945711183496e-05,
"loss": 2.8188,
"step": 57400
},
{
"epoch": 0.19,
"learning_rate": 4.048290564338869e-05,
"loss": 2.7995,
"step": 57500
},
{
"epoch": 0.19,
"learning_rate": 4.04663541749424e-05,
"loss": 2.8052,
"step": 57600
},
{
"epoch": 0.19,
"learning_rate": 4.044980270649612e-05,
"loss": 2.8037,
"step": 57700
},
{
"epoch": 0.19,
"learning_rate": 4.043325123804984e-05,
"loss": 2.7951,
"step": 57800
},
{
"epoch": 0.19,
"learning_rate": 4.0416699769603564e-05,
"loss": 2.8199,
"step": 57900
},
{
"epoch": 0.19,
"learning_rate": 4.040014830115728e-05,
"loss": 2.8044,
"step": 58000
},
{
"epoch": 0.19,
"learning_rate": 4.0383596832711e-05,
"loss": 2.7989,
"step": 58100
},
{
"epoch": 0.19,
"learning_rate": 4.036704536426472e-05,
"loss": 2.8152,
"step": 58200
},
{
"epoch": 0.19,
"learning_rate": 4.0350493895818433e-05,
"loss": 2.8057,
"step": 58300
},
{
"epoch": 0.19,
"learning_rate": 4.033394242737216e-05,
"loss": 2.8312,
"step": 58400
},
{
"epoch": 0.19,
"learning_rate": 4.031739095892588e-05,
"loss": 2.8065,
"step": 58500
},
{
"epoch": 0.19,
"learning_rate": 4.03008394904796e-05,
"loss": 2.8048,
"step": 58600
},
{
"epoch": 0.19,
"learning_rate": 4.0284288022033316e-05,
"loss": 2.8145,
"step": 58700
},
{
"epoch": 0.19,
"learning_rate": 4.0267736553587035e-05,
"loss": 2.8112,
"step": 58800
},
{
"epoch": 0.19,
"learning_rate": 4.025118508514076e-05,
"loss": 2.8147,
"step": 58900
},
{
"epoch": 0.2,
"learning_rate": 4.023463361669447e-05,
"loss": 2.8039,
"step": 59000
},
{
"epoch": 0.2,
"learning_rate": 4.021808214824819e-05,
"loss": 2.7997,
"step": 59100
},
{
"epoch": 0.2,
"learning_rate": 4.020153067980191e-05,
"loss": 2.7909,
"step": 59200
},
{
"epoch": 0.2,
"learning_rate": 4.018497921135563e-05,
"loss": 2.8021,
"step": 59300
},
{
"epoch": 0.2,
"learning_rate": 4.0168427742909356e-05,
"loss": 2.8064,
"step": 59400
},
{
"epoch": 0.2,
"learning_rate": 4.0151876274463075e-05,
"loss": 2.8002,
"step": 59500
},
{
"epoch": 0.2,
"learning_rate": 4.0135324806016794e-05,
"loss": 2.8056,
"step": 59600
},
{
"epoch": 0.2,
"learning_rate": 4.0118773337570506e-05,
"loss": 2.8139,
"step": 59700
},
{
"epoch": 0.2,
"learning_rate": 4.010222186912423e-05,
"loss": 2.7957,
"step": 59800
},
{
"epoch": 0.2,
"learning_rate": 4.008567040067795e-05,
"loss": 2.8015,
"step": 59900
},
{
"epoch": 0.2,
"learning_rate": 4.006911893223167e-05,
"loss": 2.809,
"step": 60000
},
{
"epoch": 0.2,
"learning_rate": 4.005256746378539e-05,
"loss": 2.8058,
"step": 60100
},
{
"epoch": 0.2,
"learning_rate": 4.003601599533911e-05,
"loss": 2.817,
"step": 60200
},
{
"epoch": 0.2,
"learning_rate": 4.0019464526892833e-05,
"loss": 2.8093,
"step": 60300
},
{
"epoch": 0.2,
"learning_rate": 4.0002913058446546e-05,
"loss": 2.7949,
"step": 60400
},
{
"epoch": 0.2,
"learning_rate": 3.9986361590000265e-05,
"loss": 2.7912,
"step": 60500
},
{
"epoch": 0.2,
"learning_rate": 3.9969810121553984e-05,
"loss": 2.8034,
"step": 60600
},
{
"epoch": 0.2,
"learning_rate": 3.99532586531077e-05,
"loss": 2.8178,
"step": 60700
},
{
"epoch": 0.2,
"learning_rate": 3.993670718466143e-05,
"loss": 2.7983,
"step": 60800
},
{
"epoch": 0.2,
"learning_rate": 3.992015571621515e-05,
"loss": 2.8039,
"step": 60900
},
{
"epoch": 0.2,
"learning_rate": 3.9903604247768866e-05,
"loss": 2.8196,
"step": 61000
},
{
"epoch": 0.2,
"learning_rate": 3.988705277932258e-05,
"loss": 2.8025,
"step": 61100
},
{
"epoch": 0.2,
"learning_rate": 3.9870501310876304e-05,
"loss": 2.7947,
"step": 61200
},
{
"epoch": 0.2,
"learning_rate": 3.985394984243002e-05,
"loss": 2.8098,
"step": 61300
},
{
"epoch": 0.2,
"learning_rate": 3.983739837398374e-05,
"loss": 2.8016,
"step": 61400
},
{
"epoch": 0.2,
"learning_rate": 3.982084690553746e-05,
"loss": 2.8038,
"step": 61500
},
{
"epoch": 0.2,
"learning_rate": 3.980429543709118e-05,
"loss": 2.818,
"step": 61600
},
{
"epoch": 0.2,
"learning_rate": 3.9787743968644906e-05,
"loss": 2.8017,
"step": 61700
},
{
"epoch": 0.2,
"learning_rate": 3.977119250019862e-05,
"loss": 2.801,
"step": 61800
},
{
"epoch": 0.2,
"learning_rate": 3.975464103175234e-05,
"loss": 2.8037,
"step": 61900
},
{
"epoch": 0.21,
"learning_rate": 3.9738089563306056e-05,
"loss": 2.7792,
"step": 62000
},
{
"epoch": 0.21,
"learning_rate": 3.9721538094859775e-05,
"loss": 2.799,
"step": 62100
},
{
"epoch": 0.21,
"learning_rate": 3.97049866264135e-05,
"loss": 2.7958,
"step": 62200
},
{
"epoch": 0.21,
"learning_rate": 3.968843515796722e-05,
"loss": 2.8078,
"step": 62300
},
{
"epoch": 0.21,
"learning_rate": 3.967188368952094e-05,
"loss": 2.7961,
"step": 62400
},
{
"epoch": 0.21,
"learning_rate": 3.965533222107465e-05,
"loss": 2.7925,
"step": 62500
},
{
"epoch": 0.21,
"learning_rate": 3.963878075262838e-05,
"loss": 2.8041,
"step": 62600
},
{
"epoch": 0.21,
"learning_rate": 3.9622229284182096e-05,
"loss": 2.8071,
"step": 62700
},
{
"epoch": 0.21,
"learning_rate": 3.9605677815735815e-05,
"loss": 2.7948,
"step": 62800
},
{
"epoch": 0.21,
"learning_rate": 3.9589126347289534e-05,
"loss": 2.7875,
"step": 62900
},
{
"epoch": 0.21,
"learning_rate": 3.957257487884325e-05,
"loss": 2.8135,
"step": 63000
},
{
"epoch": 0.21,
"learning_rate": 3.955602341039697e-05,
"loss": 2.8226,
"step": 63100
},
{
"epoch": 0.21,
"learning_rate": 3.953947194195069e-05,
"loss": 2.8012,
"step": 63200
},
{
"epoch": 0.21,
"learning_rate": 3.952292047350441e-05,
"loss": 2.795,
"step": 63300
},
{
"epoch": 0.21,
"learning_rate": 3.950636900505813e-05,
"loss": 2.8081,
"step": 63400
},
{
"epoch": 0.21,
"learning_rate": 3.948981753661185e-05,
"loss": 2.8029,
"step": 63500
},
{
"epoch": 0.21,
"learning_rate": 3.9473266068165573e-05,
"loss": 2.8052,
"step": 63600
},
{
"epoch": 0.21,
"learning_rate": 3.945671459971929e-05,
"loss": 2.8015,
"step": 63700
},
{
"epoch": 0.21,
"learning_rate": 3.9440163131273005e-05,
"loss": 2.7956,
"step": 63800
},
{
"epoch": 0.21,
"learning_rate": 3.9423611662826724e-05,
"loss": 2.787,
"step": 63900
},
{
"epoch": 0.21,
"learning_rate": 3.940706019438045e-05,
"loss": 2.7956,
"step": 64000
},
{
"epoch": 0.21,
"learning_rate": 3.939050872593417e-05,
"loss": 2.7939,
"step": 64100
},
{
"epoch": 0.21,
"learning_rate": 3.937395725748789e-05,
"loss": 2.7985,
"step": 64200
},
{
"epoch": 0.21,
"learning_rate": 3.9357405789041606e-05,
"loss": 2.7921,
"step": 64300
},
{
"epoch": 0.21,
"learning_rate": 3.9340854320595325e-05,
"loss": 2.8069,
"step": 64400
},
{
"epoch": 0.21,
"learning_rate": 3.9324302852149044e-05,
"loss": 2.8148,
"step": 64500
},
{
"epoch": 0.21,
"learning_rate": 3.930775138370276e-05,
"loss": 2.8064,
"step": 64600
},
{
"epoch": 0.21,
"learning_rate": 3.929119991525648e-05,
"loss": 2.8038,
"step": 64700
},
{
"epoch": 0.21,
"learning_rate": 3.92746484468102e-05,
"loss": 2.7962,
"step": 64800
},
{
"epoch": 0.21,
"learning_rate": 3.925809697836392e-05,
"loss": 2.8021,
"step": 64900
},
{
"epoch": 0.22,
"learning_rate": 3.9241545509917646e-05,
"loss": 2.8189,
"step": 65000
},
{
"epoch": 0.22,
"learning_rate": 3.9224994041471365e-05,
"loss": 2.7926,
"step": 65100
},
{
"epoch": 0.22,
"learning_rate": 3.920844257302508e-05,
"loss": 2.7796,
"step": 65200
},
{
"epoch": 0.22,
"learning_rate": 3.9191891104578796e-05,
"loss": 2.7977,
"step": 65300
},
{
"epoch": 0.22,
"learning_rate": 3.917533963613252e-05,
"loss": 2.7965,
"step": 65400
},
{
"epoch": 0.22,
"learning_rate": 3.915878816768624e-05,
"loss": 2.7975,
"step": 65500
},
{
"epoch": 0.22,
"learning_rate": 3.914223669923996e-05,
"loss": 2.7907,
"step": 65600
},
{
"epoch": 0.22,
"learning_rate": 3.912568523079368e-05,
"loss": 2.7874,
"step": 65700
},
{
"epoch": 0.22,
"learning_rate": 3.91091337623474e-05,
"loss": 2.8194,
"step": 65800
},
{
"epoch": 0.22,
"learning_rate": 3.909258229390112e-05,
"loss": 2.8091,
"step": 65900
},
{
"epoch": 0.22,
"learning_rate": 3.9076030825454836e-05,
"loss": 2.7992,
"step": 66000
},
{
"epoch": 0.22,
"learning_rate": 3.9059479357008555e-05,
"loss": 2.8089,
"step": 66100
},
{
"epoch": 0.22,
"learning_rate": 3.9042927888562274e-05,
"loss": 2.8013,
"step": 66200
},
{
"epoch": 0.22,
"learning_rate": 3.902637642011599e-05,
"loss": 2.8114,
"step": 66300
},
{
"epoch": 0.22,
"learning_rate": 3.900982495166972e-05,
"loss": 2.7981,
"step": 66400
},
{
"epoch": 0.22,
"learning_rate": 3.899327348322344e-05,
"loss": 2.7984,
"step": 66500
},
{
"epoch": 0.22,
"learning_rate": 3.897672201477715e-05,
"loss": 2.8112,
"step": 66600
},
{
"epoch": 0.22,
"learning_rate": 3.896017054633087e-05,
"loss": 2.8126,
"step": 66700
},
{
"epoch": 0.22,
"learning_rate": 3.8943619077884594e-05,
"loss": 2.7928,
"step": 66800
},
{
"epoch": 0.22,
"learning_rate": 3.892706760943831e-05,
"loss": 2.8067,
"step": 66900
},
{
"epoch": 0.22,
"learning_rate": 3.891051614099203e-05,
"loss": 2.7965,
"step": 67000
},
{
"epoch": 0.22,
"learning_rate": 3.889396467254575e-05,
"loss": 2.7896,
"step": 67100
},
{
"epoch": 0.22,
"learning_rate": 3.887741320409947e-05,
"loss": 2.8063,
"step": 67200
},
{
"epoch": 0.22,
"learning_rate": 3.886086173565319e-05,
"loss": 2.806,
"step": 67300
},
{
"epoch": 0.22,
"learning_rate": 3.884431026720691e-05,
"loss": 2.8023,
"step": 67400
},
{
"epoch": 0.22,
"learning_rate": 3.882775879876063e-05,
"loss": 2.7984,
"step": 67500
},
{
"epoch": 0.22,
"learning_rate": 3.8811207330314346e-05,
"loss": 2.8007,
"step": 67600
},
{
"epoch": 0.22,
"learning_rate": 3.8794655861868065e-05,
"loss": 2.8216,
"step": 67700
},
{
"epoch": 0.22,
"learning_rate": 3.877810439342179e-05,
"loss": 2.7899,
"step": 67800
},
{
"epoch": 0.22,
"learning_rate": 3.876155292497551e-05,
"loss": 2.8003,
"step": 67900
},
{
"epoch": 0.23,
"learning_rate": 3.874500145652922e-05,
"loss": 2.7922,
"step": 68000
},
{
"epoch": 0.23,
"learning_rate": 3.872844998808294e-05,
"loss": 2.8048,
"step": 68100
},
{
"epoch": 0.23,
"learning_rate": 3.871189851963666e-05,
"loss": 2.8043,
"step": 68200
},
{
"epoch": 0.23,
"learning_rate": 3.8695347051190386e-05,
"loss": 2.7971,
"step": 68300
},
{
"epoch": 0.23,
"learning_rate": 3.8678795582744105e-05,
"loss": 2.8044,
"step": 68400
},
{
"epoch": 0.23,
"learning_rate": 3.8662244114297824e-05,
"loss": 2.7983,
"step": 68500
},
{
"epoch": 0.23,
"learning_rate": 3.864569264585154e-05,
"loss": 2.7938,
"step": 68600
},
{
"epoch": 0.23,
"learning_rate": 3.862914117740526e-05,
"loss": 2.7924,
"step": 68700
},
{
"epoch": 0.23,
"learning_rate": 3.861258970895898e-05,
"loss": 2.7966,
"step": 68800
},
{
"epoch": 0.23,
"learning_rate": 3.85960382405127e-05,
"loss": 2.8047,
"step": 68900
},
{
"epoch": 0.23,
"learning_rate": 3.857948677206642e-05,
"loss": 2.8105,
"step": 69000
},
{
"epoch": 0.23,
"learning_rate": 3.856293530362014e-05,
"loss": 2.8037,
"step": 69100
},
{
"epoch": 0.23,
"learning_rate": 3.8546383835173864e-05,
"loss": 2.7932,
"step": 69200
},
{
"epoch": 0.23,
"learning_rate": 3.852983236672758e-05,
"loss": 2.7984,
"step": 69300
},
{
"epoch": 0.23,
"learning_rate": 3.8513280898281295e-05,
"loss": 2.7906,
"step": 69400
},
{
"epoch": 0.23,
"learning_rate": 3.8496729429835014e-05,
"loss": 2.8004,
"step": 69500
},
{
"epoch": 0.23,
"learning_rate": 3.848017796138873e-05,
"loss": 2.8039,
"step": 69600
},
{
"epoch": 0.23,
"learning_rate": 3.846362649294246e-05,
"loss": 2.7961,
"step": 69700
},
{
"epoch": 0.23,
"learning_rate": 3.844707502449618e-05,
"loss": 2.812,
"step": 69800
},
{
"epoch": 0.23,
"learning_rate": 3.8430523556049896e-05,
"loss": 2.793,
"step": 69900
},
{
"epoch": 0.23,
"learning_rate": 3.841397208760361e-05,
"loss": 2.8116,
"step": 70000
},
{
"epoch": 0.23,
"learning_rate": 3.8397420619157334e-05,
"loss": 2.8052,
"step": 70100
},
{
"epoch": 0.23,
"learning_rate": 3.838086915071105e-05,
"loss": 2.8059,
"step": 70200
},
{
"epoch": 0.23,
"learning_rate": 3.836431768226477e-05,
"loss": 2.8099,
"step": 70300
},
{
"epoch": 0.23,
"learning_rate": 3.834776621381849e-05,
"loss": 2.8017,
"step": 70400
},
{
"epoch": 0.23,
"learning_rate": 3.833121474537221e-05,
"loss": 2.8093,
"step": 70500
},
{
"epoch": 0.23,
"learning_rate": 3.8314663276925936e-05,
"loss": 2.7935,
"step": 70600
},
{
"epoch": 0.23,
"learning_rate": 3.829811180847965e-05,
"loss": 2.7994,
"step": 70700
},
{
"epoch": 0.23,
"learning_rate": 3.828156034003337e-05,
"loss": 2.7913,
"step": 70800
},
{
"epoch": 0.23,
"learning_rate": 3.8265008871587086e-05,
"loss": 2.8116,
"step": 70900
},
{
"epoch": 0.24,
"learning_rate": 3.8248457403140805e-05,
"loss": 2.793,
"step": 71000
},
{
"epoch": 0.24,
"learning_rate": 3.823190593469453e-05,
"loss": 2.7858,
"step": 71100
},
{
"epoch": 0.24,
"learning_rate": 3.821535446624825e-05,
"loss": 2.8034,
"step": 71200
},
{
"epoch": 0.24,
"learning_rate": 3.819880299780197e-05,
"loss": 2.8032,
"step": 71300
},
{
"epoch": 0.24,
"learning_rate": 3.818225152935568e-05,
"loss": 2.8033,
"step": 71400
},
{
"epoch": 0.24,
"learning_rate": 3.816570006090941e-05,
"loss": 2.7903,
"step": 71500
},
{
"epoch": 0.24,
"learning_rate": 3.8149148592463126e-05,
"loss": 2.7951,
"step": 71600
},
{
"epoch": 0.24,
"learning_rate": 3.8132597124016845e-05,
"loss": 2.7933,
"step": 71700
},
{
"epoch": 0.24,
"learning_rate": 3.8116045655570564e-05,
"loss": 2.7944,
"step": 71800
},
{
"epoch": 0.24,
"learning_rate": 3.809949418712428e-05,
"loss": 2.7994,
"step": 71900
},
{
"epoch": 0.24,
"learning_rate": 3.808294271867801e-05,
"loss": 2.7814,
"step": 72000
},
{
"epoch": 0.24,
"learning_rate": 3.806639125023172e-05,
"loss": 2.8116,
"step": 72100
},
{
"epoch": 0.24,
"learning_rate": 3.804983978178544e-05,
"loss": 2.8044,
"step": 72200
},
{
"epoch": 0.24,
"learning_rate": 3.803328831333916e-05,
"loss": 2.7912,
"step": 72300
},
{
"epoch": 0.24,
"learning_rate": 3.801673684489288e-05,
"loss": 2.7934,
"step": 72400
},
{
"epoch": 0.24,
"learning_rate": 3.8000185376446604e-05,
"loss": 2.7945,
"step": 72500
},
{
"epoch": 0.24,
"learning_rate": 3.798363390800032e-05,
"loss": 2.7875,
"step": 72600
},
{
"epoch": 0.24,
"learning_rate": 3.796708243955404e-05,
"loss": 2.7922,
"step": 72700
},
{
"epoch": 0.24,
"learning_rate": 3.7950530971107754e-05,
"loss": 2.8091,
"step": 72800
},
{
"epoch": 0.24,
"learning_rate": 3.793397950266148e-05,
"loss": 2.7993,
"step": 72900
},
{
"epoch": 0.24,
"learning_rate": 3.79174280342152e-05,
"loss": 2.7959,
"step": 73000
},
{
"epoch": 0.24,
"learning_rate": 3.790087656576892e-05,
"loss": 2.7881,
"step": 73100
},
{
"epoch": 0.24,
"learning_rate": 3.7884325097322636e-05,
"loss": 2.7973,
"step": 73200
},
{
"epoch": 0.24,
"learning_rate": 3.7867773628876355e-05,
"loss": 2.7978,
"step": 73300
},
{
"epoch": 0.24,
"learning_rate": 3.7851222160430074e-05,
"loss": 2.8087,
"step": 73400
},
{
"epoch": 0.24,
"learning_rate": 3.783467069198379e-05,
"loss": 2.8027,
"step": 73500
},
{
"epoch": 0.24,
"learning_rate": 3.781811922353751e-05,
"loss": 2.7975,
"step": 73600
},
{
"epoch": 0.24,
"learning_rate": 3.780156775509123e-05,
"loss": 2.7962,
"step": 73700
},
{
"epoch": 0.24,
"learning_rate": 3.778501628664495e-05,
"loss": 2.8015,
"step": 73800
},
{
"epoch": 0.24,
"learning_rate": 3.7768464818198676e-05,
"loss": 2.8002,
"step": 73900
},
{
"epoch": 0.24,
"learning_rate": 3.7751913349752395e-05,
"loss": 2.8134,
"step": 74000
},
{
"epoch": 0.25,
"learning_rate": 3.7735361881306114e-05,
"loss": 2.8049,
"step": 74100
},
{
"epoch": 0.25,
"learning_rate": 3.7718810412859826e-05,
"loss": 2.7857,
"step": 74200
},
{
"epoch": 0.25,
"learning_rate": 3.770225894441355e-05,
"loss": 2.8082,
"step": 74300
},
{
"epoch": 0.25,
"learning_rate": 3.768570747596727e-05,
"loss": 2.7961,
"step": 74400
},
{
"epoch": 0.25,
"learning_rate": 3.766915600752099e-05,
"loss": 2.7975,
"step": 74500
},
{
"epoch": 0.25,
"learning_rate": 3.765260453907471e-05,
"loss": 2.8033,
"step": 74600
},
{
"epoch": 0.25,
"learning_rate": 3.763605307062843e-05,
"loss": 2.789,
"step": 74700
},
{
"epoch": 0.25,
"learning_rate": 3.761950160218215e-05,
"loss": 2.8044,
"step": 74800
},
{
"epoch": 0.25,
"learning_rate": 3.7602950133735866e-05,
"loss": 2.8039,
"step": 74900
},
{
"epoch": 0.25,
"learning_rate": 3.7586398665289585e-05,
"loss": 2.7816,
"step": 75000
},
{
"epoch": 0.25,
"learning_rate": 3.7569847196843304e-05,
"loss": 2.7964,
"step": 75100
},
{
"epoch": 0.25,
"learning_rate": 3.755329572839702e-05,
"loss": 2.8026,
"step": 75200
},
{
"epoch": 0.25,
"learning_rate": 3.753674425995075e-05,
"loss": 2.7933,
"step": 75300
},
{
"epoch": 0.25,
"learning_rate": 3.752019279150447e-05,
"loss": 2.791,
"step": 75400
},
{
"epoch": 0.25,
"learning_rate": 3.7503641323058187e-05,
"loss": 2.8032,
"step": 75500
},
{
"epoch": 0.25,
"learning_rate": 3.74870898546119e-05,
"loss": 2.7881,
"step": 75600
},
{
"epoch": 0.25,
"learning_rate": 3.7470538386165624e-05,
"loss": 2.8178,
"step": 75700
},
{
"epoch": 0.25,
"learning_rate": 3.7453986917719343e-05,
"loss": 2.7971,
"step": 75800
},
{
"epoch": 0.25,
"learning_rate": 3.743743544927306e-05,
"loss": 2.7892,
"step": 75900
},
{
"epoch": 0.25,
"learning_rate": 3.742088398082678e-05,
"loss": 2.7943,
"step": 76000
},
{
"epoch": 0.25,
"learning_rate": 3.74043325123805e-05,
"loss": 2.7954,
"step": 76100
},
{
"epoch": 0.25,
"learning_rate": 3.738778104393422e-05,
"loss": 2.8102,
"step": 76200
},
{
"epoch": 0.25,
"learning_rate": 3.737122957548794e-05,
"loss": 2.7908,
"step": 76300
},
{
"epoch": 0.25,
"learning_rate": 3.735467810704166e-05,
"loss": 2.8037,
"step": 76400
},
{
"epoch": 0.25,
"learning_rate": 3.7338126638595376e-05,
"loss": 2.8037,
"step": 76500
},
{
"epoch": 0.25,
"learning_rate": 3.7321575170149095e-05,
"loss": 2.7986,
"step": 76600
},
{
"epoch": 0.25,
"learning_rate": 3.730502370170282e-05,
"loss": 2.8097,
"step": 76700
},
{
"epoch": 0.25,
"learning_rate": 3.728847223325654e-05,
"loss": 2.7869,
"step": 76800
},
{
"epoch": 0.25,
"learning_rate": 3.727192076481025e-05,
"loss": 2.803,
"step": 76900
},
{
"epoch": 0.25,
"learning_rate": 3.725536929636397e-05,
"loss": 2.7874,
"step": 77000
},
{
"epoch": 0.26,
"learning_rate": 3.723881782791769e-05,
"loss": 2.793,
"step": 77100
},
{
"epoch": 0.26,
"learning_rate": 3.7222266359471416e-05,
"loss": 2.7934,
"step": 77200
},
{
"epoch": 0.26,
"learning_rate": 3.7205714891025135e-05,
"loss": 2.7955,
"step": 77300
},
{
"epoch": 0.26,
"learning_rate": 3.7189163422578854e-05,
"loss": 2.8036,
"step": 77400
},
{
"epoch": 0.26,
"learning_rate": 3.717261195413257e-05,
"loss": 2.7991,
"step": 77500
},
{
"epoch": 0.26,
"learning_rate": 3.715606048568629e-05,
"loss": 2.7935,
"step": 77600
},
{
"epoch": 0.26,
"learning_rate": 3.713950901724001e-05,
"loss": 2.8034,
"step": 77700
},
{
"epoch": 0.26,
"learning_rate": 3.712295754879373e-05,
"loss": 2.7982,
"step": 77800
},
{
"epoch": 0.26,
"learning_rate": 3.710640608034745e-05,
"loss": 2.7929,
"step": 77900
},
{
"epoch": 0.26,
"learning_rate": 3.708985461190117e-05,
"loss": 2.8051,
"step": 78000
},
{
"epoch": 0.26,
"learning_rate": 3.7073303143454894e-05,
"loss": 2.7848,
"step": 78100
},
{
"epoch": 0.26,
"learning_rate": 3.705675167500861e-05,
"loss": 2.8122,
"step": 78200
},
{
"epoch": 0.26,
"learning_rate": 3.7040200206562325e-05,
"loss": 2.7852,
"step": 78300
},
{
"epoch": 0.26,
"learning_rate": 3.7023648738116044e-05,
"loss": 2.8042,
"step": 78400
},
{
"epoch": 0.26,
"learning_rate": 3.700709726966976e-05,
"loss": 2.8095,
"step": 78500
},
{
"epoch": 0.26,
"learning_rate": 3.699054580122349e-05,
"loss": 2.7797,
"step": 78600
},
{
"epoch": 0.26,
"learning_rate": 3.697399433277721e-05,
"loss": 2.8006,
"step": 78700
},
{
"epoch": 0.26,
"learning_rate": 3.6957442864330927e-05,
"loss": 2.7917,
"step": 78800
},
{
"epoch": 0.26,
"learning_rate": 3.6940891395884645e-05,
"loss": 2.8105,
"step": 78900
},
{
"epoch": 0.26,
"learning_rate": 3.6924339927438364e-05,
"loss": 2.7962,
"step": 79000
},
{
"epoch": 0.26,
"learning_rate": 3.6907788458992083e-05,
"loss": 2.8003,
"step": 79100
},
{
"epoch": 0.26,
"learning_rate": 3.68912369905458e-05,
"loss": 2.7943,
"step": 79200
},
{
"epoch": 0.26,
"learning_rate": 3.687468552209952e-05,
"loss": 2.7884,
"step": 79300
},
{
"epoch": 0.26,
"learning_rate": 3.685813405365324e-05,
"loss": 2.7898,
"step": 79400
},
{
"epoch": 0.26,
"learning_rate": 3.6841582585206966e-05,
"loss": 2.7911,
"step": 79500
},
{
"epoch": 0.26,
"learning_rate": 3.6825031116760685e-05,
"loss": 2.8001,
"step": 79600
},
{
"epoch": 0.26,
"learning_rate": 3.68084796483144e-05,
"loss": 2.8035,
"step": 79700
},
{
"epoch": 0.26,
"learning_rate": 3.6791928179868116e-05,
"loss": 2.7965,
"step": 79800
},
{
"epoch": 0.26,
"learning_rate": 3.6775376711421835e-05,
"loss": 2.7912,
"step": 79900
},
{
"epoch": 0.26,
"learning_rate": 3.675882524297556e-05,
"loss": 2.806,
"step": 80000
},
{
"epoch": 0.27,
"learning_rate": 3.674227377452928e-05,
"loss": 2.7825,
"step": 80100
},
{
"epoch": 0.27,
"learning_rate": 3.6725722306083e-05,
"loss": 2.8045,
"step": 80200
},
{
"epoch": 0.27,
"learning_rate": 3.670917083763672e-05,
"loss": 2.8077,
"step": 80300
},
{
"epoch": 0.27,
"learning_rate": 3.669261936919044e-05,
"loss": 2.799,
"step": 80400
},
{
"epoch": 0.27,
"learning_rate": 3.6676067900744156e-05,
"loss": 2.795,
"step": 80500
},
{
"epoch": 0.27,
"learning_rate": 3.6659516432297875e-05,
"loss": 2.7984,
"step": 80600
},
{
"epoch": 0.27,
"learning_rate": 3.6642964963851594e-05,
"loss": 2.799,
"step": 80700
},
{
"epoch": 0.27,
"learning_rate": 3.662641349540531e-05,
"loss": 2.7995,
"step": 80800
},
{
"epoch": 0.27,
"learning_rate": 3.660986202695904e-05,
"loss": 2.7957,
"step": 80900
},
{
"epoch": 0.27,
"learning_rate": 3.659331055851276e-05,
"loss": 2.7811,
"step": 81000
},
{
"epoch": 0.27,
"learning_rate": 3.657675909006647e-05,
"loss": 2.7886,
"step": 81100
},
{
"epoch": 0.27,
"learning_rate": 3.656020762162019e-05,
"loss": 2.8242,
"step": 81200
},
{
"epoch": 0.27,
"learning_rate": 3.654365615317391e-05,
"loss": 2.7909,
"step": 81300
},
{
"epoch": 0.27,
"learning_rate": 3.6527104684727634e-05,
"loss": 2.798,
"step": 81400
},
{
"epoch": 0.27,
"learning_rate": 3.651055321628135e-05,
"loss": 2.7922,
"step": 81500
},
{
"epoch": 0.27,
"learning_rate": 3.649400174783507e-05,
"loss": 2.7939,
"step": 81600
},
{
"epoch": 0.27,
"learning_rate": 3.647745027938879e-05,
"loss": 2.8081,
"step": 81700
},
{
"epoch": 0.27,
"learning_rate": 3.646089881094251e-05,
"loss": 2.7998,
"step": 81800
},
{
"epoch": 0.27,
"learning_rate": 3.644434734249623e-05,
"loss": 2.8032,
"step": 81900
},
{
"epoch": 0.27,
"learning_rate": 3.642779587404995e-05,
"loss": 2.7968,
"step": 82000
},
{
"epoch": 0.27,
"learning_rate": 3.6411244405603666e-05,
"loss": 2.7985,
"step": 82100
},
{
"epoch": 0.27,
"learning_rate": 3.6394692937157385e-05,
"loss": 2.7965,
"step": 82200
},
{
"epoch": 0.27,
"learning_rate": 3.6378141468711104e-05,
"loss": 2.7992,
"step": 82300
},
{
"epoch": 0.27,
"learning_rate": 3.636159000026483e-05,
"loss": 2.8059,
"step": 82400
},
{
"epoch": 0.27,
"learning_rate": 3.634503853181854e-05,
"loss": 2.7912,
"step": 82500
},
{
"epoch": 0.27,
"learning_rate": 3.632848706337226e-05,
"loss": 2.7922,
"step": 82600
},
{
"epoch": 0.27,
"learning_rate": 3.631193559492598e-05,
"loss": 2.7988,
"step": 82700
},
{
"epoch": 0.27,
"learning_rate": 3.6295384126479706e-05,
"loss": 2.8027,
"step": 82800
},
{
"epoch": 0.27,
"learning_rate": 3.6278832658033425e-05,
"loss": 2.8087,
"step": 82900
},
{
"epoch": 0.27,
"learning_rate": 3.6262281189587144e-05,
"loss": 2.7949,
"step": 83000
},
{
"epoch": 0.28,
"learning_rate": 3.6245729721140856e-05,
"loss": 2.7839,
"step": 83100
},
{
"epoch": 0.28,
"learning_rate": 3.622917825269458e-05,
"loss": 2.798,
"step": 83200
},
{
"epoch": 0.28,
"learning_rate": 3.62126267842483e-05,
"loss": 2.8024,
"step": 83300
},
{
"epoch": 0.28,
"learning_rate": 3.619607531580202e-05,
"loss": 2.7996,
"step": 83400
},
{
"epoch": 0.28,
"learning_rate": 3.617952384735574e-05,
"loss": 2.7864,
"step": 83500
},
{
"epoch": 0.28,
"learning_rate": 3.616297237890946e-05,
"loss": 2.7975,
"step": 83600
},
{
"epoch": 0.28,
"learning_rate": 3.614642091046318e-05,
"loss": 2.8074,
"step": 83700
},
{
"epoch": 0.28,
"learning_rate": 3.6129869442016896e-05,
"loss": 2.7986,
"step": 83800
},
{
"epoch": 0.28,
"learning_rate": 3.6113317973570615e-05,
"loss": 2.7989,
"step": 83900
},
{
"epoch": 0.28,
"learning_rate": 3.6096766505124334e-05,
"loss": 2.7948,
"step": 84000
},
{
"epoch": 0.28,
"learning_rate": 3.608021503667805e-05,
"loss": 2.7916,
"step": 84100
},
{
"epoch": 0.28,
"learning_rate": 3.606366356823178e-05,
"loss": 2.7937,
"step": 84200
},
{
"epoch": 0.28,
"learning_rate": 3.60471120997855e-05,
"loss": 2.7929,
"step": 84300
},
{
"epoch": 0.28,
"learning_rate": 3.603056063133922e-05,
"loss": 2.7914,
"step": 84400
},
{
"epoch": 0.28,
"learning_rate": 3.601400916289293e-05,
"loss": 2.7831,
"step": 84500
},
{
"epoch": 0.28,
"learning_rate": 3.5997457694446655e-05,
"loss": 2.78,
"step": 84600
},
{
"epoch": 0.28,
"learning_rate": 3.5980906226000374e-05,
"loss": 2.809,
"step": 84700
},
{
"epoch": 0.28,
"learning_rate": 3.596435475755409e-05,
"loss": 2.7987,
"step": 84800
},
{
"epoch": 0.28,
"learning_rate": 3.594780328910781e-05,
"loss": 2.7889,
"step": 84900
},
{
"epoch": 0.28,
"learning_rate": 3.593125182066153e-05,
"loss": 2.806,
"step": 85000
},
{
"epoch": 0.28,
"learning_rate": 3.591470035221525e-05,
"loss": 2.8064,
"step": 85100
},
{
"epoch": 0.28,
"learning_rate": 3.589814888376897e-05,
"loss": 2.7939,
"step": 85200
},
{
"epoch": 0.28,
"learning_rate": 3.588159741532269e-05,
"loss": 2.7968,
"step": 85300
},
{
"epoch": 0.28,
"learning_rate": 3.5865045946876406e-05,
"loss": 2.8035,
"step": 85400
},
{
"epoch": 0.28,
"learning_rate": 3.5848494478430125e-05,
"loss": 2.7947,
"step": 85500
},
{
"epoch": 0.28,
"learning_rate": 3.583194300998385e-05,
"loss": 2.79,
"step": 85600
},
{
"epoch": 0.28,
"learning_rate": 3.581539154153757e-05,
"loss": 2.7937,
"step": 85700
},
{
"epoch": 0.28,
"learning_rate": 3.579884007309129e-05,
"loss": 2.7986,
"step": 85800
},
{
"epoch": 0.28,
"learning_rate": 3.5782288604645e-05,
"loss": 2.7959,
"step": 85900
},
{
"epoch": 0.28,
"learning_rate": 3.576573713619872e-05,
"loss": 2.7817,
"step": 86000
},
{
"epoch": 0.29,
"learning_rate": 3.5749185667752446e-05,
"loss": 2.7887,
"step": 86100
},
{
"epoch": 0.29,
"learning_rate": 3.5732634199306165e-05,
"loss": 2.8046,
"step": 86200
},
{
"epoch": 0.29,
"learning_rate": 3.5716082730859884e-05,
"loss": 2.8154,
"step": 86300
},
{
"epoch": 0.29,
"learning_rate": 3.56995312624136e-05,
"loss": 2.7992,
"step": 86400
},
{
"epoch": 0.29,
"learning_rate": 3.568297979396732e-05,
"loss": 2.7906,
"step": 86500
},
{
"epoch": 0.29,
"learning_rate": 3.566642832552104e-05,
"loss": 2.7971,
"step": 86600
},
{
"epoch": 0.29,
"learning_rate": 3.564987685707476e-05,
"loss": 2.8006,
"step": 86700
},
{
"epoch": 0.29,
"learning_rate": 3.563332538862848e-05,
"loss": 2.7867,
"step": 86800
},
{
"epoch": 0.29,
"learning_rate": 3.56167739201822e-05,
"loss": 2.8029,
"step": 86900
},
{
"epoch": 0.29,
"learning_rate": 3.5600222451735924e-05,
"loss": 2.8045,
"step": 87000
},
{
"epoch": 0.29,
"learning_rate": 3.558367098328964e-05,
"loss": 2.7958,
"step": 87100
},
{
"epoch": 0.29,
"learning_rate": 3.556711951484336e-05,
"loss": 2.7877,
"step": 87200
},
{
"epoch": 0.29,
"learning_rate": 3.5550568046397074e-05,
"loss": 2.7957,
"step": 87300
},
{
"epoch": 0.29,
"learning_rate": 3.553401657795079e-05,
"loss": 2.7879,
"step": 87400
},
{
"epoch": 0.29,
"learning_rate": 3.551746510950452e-05,
"loss": 2.7893,
"step": 87500
},
{
"epoch": 0.29,
"learning_rate": 3.550091364105824e-05,
"loss": 2.7988,
"step": 87600
},
{
"epoch": 0.29,
"learning_rate": 3.5484362172611957e-05,
"loss": 2.7894,
"step": 87700
},
{
"epoch": 0.29,
"learning_rate": 3.5467810704165676e-05,
"loss": 2.7894,
"step": 87800
},
{
"epoch": 0.29,
"learning_rate": 3.5451259235719395e-05,
"loss": 2.7947,
"step": 87900
},
{
"epoch": 0.29,
"learning_rate": 3.5434707767273114e-05,
"loss": 2.7858,
"step": 88000
},
{
"epoch": 0.29,
"learning_rate": 3.541815629882683e-05,
"loss": 2.7793,
"step": 88100
},
{
"epoch": 0.29,
"learning_rate": 3.540160483038055e-05,
"loss": 2.7998,
"step": 88200
},
{
"epoch": 0.29,
"learning_rate": 3.538505336193427e-05,
"loss": 2.7966,
"step": 88300
},
{
"epoch": 0.29,
"learning_rate": 3.5368501893487996e-05,
"loss": 2.7911,
"step": 88400
},
{
"epoch": 0.29,
"learning_rate": 3.5351950425041715e-05,
"loss": 2.7925,
"step": 88500
},
{
"epoch": 0.29,
"learning_rate": 3.5335398956595434e-05,
"loss": 2.7874,
"step": 88600
},
{
"epoch": 0.29,
"learning_rate": 3.5318847488149146e-05,
"loss": 2.792,
"step": 88700
},
{
"epoch": 0.29,
"learning_rate": 3.5302296019702865e-05,
"loss": 2.798,
"step": 88800
},
{
"epoch": 0.29,
"learning_rate": 3.528574455125659e-05,
"loss": 2.8084,
"step": 88900
},
{
"epoch": 0.29,
"learning_rate": 3.526919308281031e-05,
"loss": 2.8082,
"step": 89000
},
{
"epoch": 0.29,
"learning_rate": 3.525264161436403e-05,
"loss": 2.7882,
"step": 89100
},
{
"epoch": 0.3,
"learning_rate": 3.523609014591775e-05,
"loss": 2.7884,
"step": 89200
},
{
"epoch": 0.3,
"learning_rate": 3.521953867747147e-05,
"loss": 2.7991,
"step": 89300
},
{
"epoch": 0.3,
"learning_rate": 3.5202987209025186e-05,
"loss": 2.7927,
"step": 89400
},
{
"epoch": 0.3,
"learning_rate": 3.5186435740578905e-05,
"loss": 2.8031,
"step": 89500
},
{
"epoch": 0.3,
"learning_rate": 3.5169884272132624e-05,
"loss": 2.7861,
"step": 89600
},
{
"epoch": 0.3,
"learning_rate": 3.515333280368634e-05,
"loss": 2.7839,
"step": 89700
},
{
"epoch": 0.3,
"learning_rate": 3.513678133524007e-05,
"loss": 2.7954,
"step": 89800
},
{
"epoch": 0.3,
"learning_rate": 3.512022986679379e-05,
"loss": 2.8076,
"step": 89900
},
{
"epoch": 0.3,
"learning_rate": 3.51036783983475e-05,
"loss": 2.7951,
"step": 90000
},
{
"epoch": 0.3,
"learning_rate": 3.508712692990122e-05,
"loss": 2.8056,
"step": 90100
},
{
"epoch": 0.3,
"learning_rate": 3.507057546145494e-05,
"loss": 2.784,
"step": 90200
},
{
"epoch": 0.3,
"learning_rate": 3.5054023993008664e-05,
"loss": 2.7861,
"step": 90300
},
{
"epoch": 0.3,
"learning_rate": 3.503747252456238e-05,
"loss": 2.8018,
"step": 90400
},
{
"epoch": 0.3,
"learning_rate": 3.50209210561161e-05,
"loss": 2.7928,
"step": 90500
},
{
"epoch": 0.3,
"learning_rate": 3.500436958766982e-05,
"loss": 2.8092,
"step": 90600
},
{
"epoch": 0.3,
"learning_rate": 3.498781811922354e-05,
"loss": 2.8036,
"step": 90700
},
{
"epoch": 0.3,
"learning_rate": 3.497126665077726e-05,
"loss": 2.8082,
"step": 90800
},
{
"epoch": 0.3,
"learning_rate": 3.495471518233098e-05,
"loss": 2.7942,
"step": 90900
},
{
"epoch": 0.3,
"learning_rate": 3.4938163713884697e-05,
"loss": 2.7854,
"step": 91000
},
{
"epoch": 0.3,
"learning_rate": 3.4921612245438416e-05,
"loss": 2.7866,
"step": 91100
},
{
"epoch": 0.3,
"learning_rate": 3.4905060776992135e-05,
"loss": 2.7999,
"step": 91200
},
{
"epoch": 0.3,
"learning_rate": 3.488850930854586e-05,
"loss": 2.7915,
"step": 91300
},
{
"epoch": 0.3,
"learning_rate": 3.487195784009957e-05,
"loss": 2.7863,
"step": 91400
},
{
"epoch": 0.3,
"learning_rate": 3.485540637165329e-05,
"loss": 2.7946,
"step": 91500
},
{
"epoch": 0.3,
"learning_rate": 3.483885490320701e-05,
"loss": 2.8056,
"step": 91600
},
{
"epoch": 0.3,
"learning_rate": 3.4822303434760736e-05,
"loss": 2.8009,
"step": 91700
},
{
"epoch": 0.3,
"learning_rate": 3.4805751966314455e-05,
"loss": 2.7764,
"step": 91800
},
{
"epoch": 0.3,
"learning_rate": 3.4789200497868174e-05,
"loss": 2.7902,
"step": 91900
},
{
"epoch": 0.3,
"learning_rate": 3.477264902942189e-05,
"loss": 2.798,
"step": 92000
},
{
"epoch": 0.3,
"learning_rate": 3.475609756097561e-05,
"loss": 2.8087,
"step": 92100
},
{
"epoch": 0.31,
"learning_rate": 3.473954609252933e-05,
"loss": 2.8151,
"step": 92200
},
{
"epoch": 0.31,
"learning_rate": 3.472299462408305e-05,
"loss": 2.7908,
"step": 92300
},
{
"epoch": 0.31,
"learning_rate": 3.470644315563677e-05,
"loss": 2.7867,
"step": 92400
},
{
"epoch": 0.31,
"learning_rate": 3.468989168719049e-05,
"loss": 2.7974,
"step": 92500
},
{
"epoch": 0.31,
"learning_rate": 3.467334021874421e-05,
"loss": 2.7866,
"step": 92600
},
{
"epoch": 0.31,
"learning_rate": 3.465678875029793e-05,
"loss": 2.7805,
"step": 92700
},
{
"epoch": 0.31,
"learning_rate": 3.4640237281851645e-05,
"loss": 2.7888,
"step": 92800
},
{
"epoch": 0.31,
"learning_rate": 3.4623685813405364e-05,
"loss": 2.8069,
"step": 92900
},
{
"epoch": 0.31,
"learning_rate": 3.460713434495908e-05,
"loss": 2.7875,
"step": 93000
},
{
"epoch": 0.31,
"learning_rate": 3.459058287651281e-05,
"loss": 2.7935,
"step": 93100
},
{
"epoch": 0.31,
"learning_rate": 3.457403140806653e-05,
"loss": 2.7907,
"step": 93200
},
{
"epoch": 0.31,
"learning_rate": 3.455747993962025e-05,
"loss": 2.8058,
"step": 93300
},
{
"epoch": 0.31,
"learning_rate": 3.4540928471173966e-05,
"loss": 2.8035,
"step": 93400
},
{
"epoch": 0.31,
"learning_rate": 3.4524377002727685e-05,
"loss": 2.7926,
"step": 93500
},
{
"epoch": 0.31,
"learning_rate": 3.4507825534281404e-05,
"loss": 2.7808,
"step": 93600
},
{
"epoch": 0.31,
"learning_rate": 3.449127406583512e-05,
"loss": 2.7932,
"step": 93700
},
{
"epoch": 0.31,
"learning_rate": 3.447472259738884e-05,
"loss": 2.7944,
"step": 93800
},
{
"epoch": 0.31,
"learning_rate": 3.445817112894256e-05,
"loss": 2.781,
"step": 93900
},
{
"epoch": 0.31,
"learning_rate": 3.444161966049628e-05,
"loss": 2.8042,
"step": 94000
},
{
"epoch": 0.31,
"learning_rate": 3.4425068192050005e-05,
"loss": 2.7935,
"step": 94100
},
{
"epoch": 0.31,
"learning_rate": 3.440851672360372e-05,
"loss": 2.7941,
"step": 94200
},
{
"epoch": 0.31,
"learning_rate": 3.4391965255157437e-05,
"loss": 2.7841,
"step": 94300
},
{
"epoch": 0.31,
"learning_rate": 3.4375413786711156e-05,
"loss": 2.7892,
"step": 94400
},
{
"epoch": 0.31,
"learning_rate": 3.435886231826488e-05,
"loss": 2.7938,
"step": 94500
},
{
"epoch": 0.31,
"learning_rate": 3.43423108498186e-05,
"loss": 2.7926,
"step": 94600
},
{
"epoch": 0.31,
"learning_rate": 3.432575938137232e-05,
"loss": 2.793,
"step": 94700
},
{
"epoch": 0.31,
"learning_rate": 3.430920791292604e-05,
"loss": 2.7951,
"step": 94800
},
{
"epoch": 0.31,
"learning_rate": 3.429265644447975e-05,
"loss": 2.8048,
"step": 94900
},
{
"epoch": 0.31,
"learning_rate": 3.4276104976033476e-05,
"loss": 2.7899,
"step": 95000
},
{
"epoch": 0.31,
"learning_rate": 3.4259553507587195e-05,
"loss": 2.7918,
"step": 95100
},
{
"epoch": 0.32,
"learning_rate": 3.4243002039140914e-05,
"loss": 2.7899,
"step": 95200
},
{
"epoch": 0.32,
"learning_rate": 3.422645057069463e-05,
"loss": 2.7859,
"step": 95300
},
{
"epoch": 0.32,
"learning_rate": 3.420989910224835e-05,
"loss": 2.7945,
"step": 95400
},
{
"epoch": 0.32,
"learning_rate": 3.419334763380208e-05,
"loss": 2.7848,
"step": 95500
},
{
"epoch": 0.32,
"learning_rate": 3.417679616535579e-05,
"loss": 2.7842,
"step": 95600
},
{
"epoch": 0.32,
"learning_rate": 3.416024469690951e-05,
"loss": 2.7983,
"step": 95700
},
{
"epoch": 0.32,
"learning_rate": 3.414369322846323e-05,
"loss": 2.8021,
"step": 95800
},
{
"epoch": 0.32,
"learning_rate": 3.4127141760016954e-05,
"loss": 2.7948,
"step": 95900
},
{
"epoch": 0.32,
"learning_rate": 3.411059029157067e-05,
"loss": 2.7876,
"step": 96000
},
{
"epoch": 0.32,
"learning_rate": 3.409403882312439e-05,
"loss": 2.7933,
"step": 96100
},
{
"epoch": 0.32,
"learning_rate": 3.4077487354678104e-05,
"loss": 2.7829,
"step": 96200
},
{
"epoch": 0.32,
"learning_rate": 3.406093588623182e-05,
"loss": 2.8034,
"step": 96300
},
{
"epoch": 0.32,
"learning_rate": 3.404438441778555e-05,
"loss": 2.7833,
"step": 96400
},
{
"epoch": 0.32,
"learning_rate": 3.402783294933927e-05,
"loss": 2.8065,
"step": 96500
},
{
"epoch": 0.32,
"learning_rate": 3.401128148089299e-05,
"loss": 2.7941,
"step": 96600
},
{
"epoch": 0.32,
"learning_rate": 3.3994730012446706e-05,
"loss": 2.7911,
"step": 96700
},
{
"epoch": 0.32,
"learning_rate": 3.3978178544000425e-05,
"loss": 2.8107,
"step": 96800
},
{
"epoch": 0.32,
"learning_rate": 3.3961627075554144e-05,
"loss": 2.7953,
"step": 96900
},
{
"epoch": 0.32,
"learning_rate": 3.394507560710786e-05,
"loss": 2.7856,
"step": 97000
},
{
"epoch": 0.32,
"learning_rate": 3.392852413866158e-05,
"loss": 2.7887,
"step": 97100
},
{
"epoch": 0.32,
"learning_rate": 3.39119726702153e-05,
"loss": 2.7941,
"step": 97200
},
{
"epoch": 0.32,
"learning_rate": 3.3895421201769026e-05,
"loss": 2.7983,
"step": 97300
},
{
"epoch": 0.32,
"learning_rate": 3.3878869733322745e-05,
"loss": 2.791,
"step": 97400
},
{
"epoch": 0.32,
"learning_rate": 3.3862318264876464e-05,
"loss": 2.7934,
"step": 97500
},
{
"epoch": 0.32,
"learning_rate": 3.3845766796430177e-05,
"loss": 2.7918,
"step": 97600
},
{
"epoch": 0.32,
"learning_rate": 3.3829215327983896e-05,
"loss": 2.7978,
"step": 97700
},
{
"epoch": 0.32,
"learning_rate": 3.381266385953762e-05,
"loss": 2.7978,
"step": 97800
},
{
"epoch": 0.32,
"learning_rate": 3.379611239109134e-05,
"loss": 2.8136,
"step": 97900
},
{
"epoch": 0.32,
"learning_rate": 3.377956092264506e-05,
"loss": 2.7875,
"step": 98000
},
{
"epoch": 0.32,
"learning_rate": 3.376300945419878e-05,
"loss": 2.7835,
"step": 98100
},
{
"epoch": 0.33,
"learning_rate": 3.37464579857525e-05,
"loss": 2.7872,
"step": 98200
},
{
"epoch": 0.33,
"learning_rate": 3.3729906517306216e-05,
"loss": 2.7989,
"step": 98300
},
{
"epoch": 0.33,
"learning_rate": 3.3713355048859935e-05,
"loss": 2.7881,
"step": 98400
},
{
"epoch": 0.33,
"learning_rate": 3.3696803580413654e-05,
"loss": 2.7989,
"step": 98500
},
{
"epoch": 0.33,
"learning_rate": 3.368025211196737e-05,
"loss": 2.7989,
"step": 98600
},
{
"epoch": 0.33,
"learning_rate": 3.36637006435211e-05,
"loss": 2.8182,
"step": 98700
},
{
"epoch": 0.33,
"learning_rate": 3.364714917507482e-05,
"loss": 2.7951,
"step": 98800
},
{
"epoch": 0.33,
"learning_rate": 3.363059770662854e-05,
"loss": 2.7792,
"step": 98900
},
{
"epoch": 0.33,
"learning_rate": 3.361404623818225e-05,
"loss": 2.7931,
"step": 99000
},
{
"epoch": 0.33,
"learning_rate": 3.359749476973597e-05,
"loss": 2.8,
"step": 99100
},
{
"epoch": 0.33,
"learning_rate": 3.3580943301289694e-05,
"loss": 2.7939,
"step": 99200
},
{
"epoch": 0.33,
"learning_rate": 3.356439183284341e-05,
"loss": 2.7996,
"step": 99300
},
{
"epoch": 0.33,
"learning_rate": 3.354784036439713e-05,
"loss": 2.8009,
"step": 99400
},
{
"epoch": 0.33,
"learning_rate": 3.353128889595085e-05,
"loss": 2.7943,
"step": 99500
},
{
"epoch": 0.33,
"learning_rate": 3.351473742750457e-05,
"loss": 2.7853,
"step": 99600
},
{
"epoch": 0.33,
"learning_rate": 3.349818595905829e-05,
"loss": 2.7853,
"step": 99700
},
{
"epoch": 0.33,
"learning_rate": 3.348163449061201e-05,
"loss": 2.8004,
"step": 99800
},
{
"epoch": 0.33,
"learning_rate": 3.346508302216573e-05,
"loss": 2.7958,
"step": 99900
},
{
"epoch": 0.33,
"learning_rate": 3.3448531553719446e-05,
"loss": 2.7812,
"step": 100000
},
{
"epoch": 0.33,
"learning_rate": 3.3431980085273165e-05,
"loss": 2.7962,
"step": 100100
},
{
"epoch": 0.33,
"learning_rate": 3.341542861682689e-05,
"loss": 2.788,
"step": 100200
},
{
"epoch": 0.33,
"learning_rate": 3.339887714838061e-05,
"loss": 2.7938,
"step": 100300
},
{
"epoch": 0.33,
"learning_rate": 3.338232567993432e-05,
"loss": 2.7806,
"step": 100400
},
{
"epoch": 0.33,
"learning_rate": 3.336577421148804e-05,
"loss": 2.7939,
"step": 100500
},
{
"epoch": 0.33,
"learning_rate": 3.3349222743041766e-05,
"loss": 2.7813,
"step": 100600
},
{
"epoch": 0.33,
"learning_rate": 3.3332671274595485e-05,
"loss": 2.8064,
"step": 100700
},
{
"epoch": 0.33,
"learning_rate": 3.3316119806149204e-05,
"loss": 2.7841,
"step": 100800
},
{
"epoch": 0.33,
"learning_rate": 3.329956833770292e-05,
"loss": 2.8049,
"step": 100900
},
{
"epoch": 0.33,
"learning_rate": 3.328301686925664e-05,
"loss": 2.8005,
"step": 101000
},
{
"epoch": 0.33,
"learning_rate": 3.326646540081036e-05,
"loss": 2.7827,
"step": 101100
},
{
"epoch": 0.34,
"learning_rate": 3.324991393236408e-05,
"loss": 2.7936,
"step": 101200
},
{
"epoch": 0.34,
"learning_rate": 3.32333624639178e-05,
"loss": 2.7981,
"step": 101300
},
{
"epoch": 0.34,
"learning_rate": 3.321681099547152e-05,
"loss": 2.7851,
"step": 101400
},
{
"epoch": 0.34,
"learning_rate": 3.320025952702524e-05,
"loss": 2.8006,
"step": 101500
},
{
"epoch": 0.34,
"learning_rate": 3.318370805857896e-05,
"loss": 2.7972,
"step": 101600
},
{
"epoch": 0.34,
"learning_rate": 3.316715659013268e-05,
"loss": 2.7888,
"step": 101700
},
{
"epoch": 0.34,
"learning_rate": 3.3150605121686394e-05,
"loss": 2.7833,
"step": 101800
},
{
"epoch": 0.34,
"learning_rate": 3.313405365324011e-05,
"loss": 2.7792,
"step": 101900
},
{
"epoch": 0.34,
"learning_rate": 3.311750218479384e-05,
"loss": 2.788,
"step": 102000
},
{
"epoch": 0.34,
"learning_rate": 3.310095071634756e-05,
"loss": 2.7767,
"step": 102100
},
{
"epoch": 0.34,
"learning_rate": 3.308439924790128e-05,
"loss": 2.7829,
"step": 102200
},
{
"epoch": 0.34,
"learning_rate": 3.3067847779454996e-05,
"loss": 2.797,
"step": 102300
},
{
"epoch": 0.34,
"learning_rate": 3.3051296311008715e-05,
"loss": 2.7872,
"step": 102400
},
{
"epoch": 0.34,
"learning_rate": 3.3034744842562434e-05,
"loss": 2.7898,
"step": 102500
},
{
"epoch": 0.34,
"learning_rate": 3.301819337411615e-05,
"loss": 2.7898,
"step": 102600
},
{
"epoch": 0.34,
"learning_rate": 3.300164190566987e-05,
"loss": 2.7948,
"step": 102700
},
{
"epoch": 0.34,
"learning_rate": 3.298509043722359e-05,
"loss": 2.7868,
"step": 102800
},
{
"epoch": 0.34,
"learning_rate": 3.296853896877731e-05,
"loss": 2.803,
"step": 102900
},
{
"epoch": 0.34,
"learning_rate": 3.2951987500331035e-05,
"loss": 2.7888,
"step": 103000
},
{
"epoch": 0.34,
"learning_rate": 3.293543603188475e-05,
"loss": 2.7972,
"step": 103100
},
{
"epoch": 0.34,
"learning_rate": 3.291888456343847e-05,
"loss": 2.7844,
"step": 103200
},
{
"epoch": 0.34,
"learning_rate": 3.2902333094992186e-05,
"loss": 2.7916,
"step": 103300
},
{
"epoch": 0.34,
"learning_rate": 3.288578162654591e-05,
"loss": 2.7793,
"step": 103400
},
{
"epoch": 0.34,
"learning_rate": 3.286923015809963e-05,
"loss": 2.8114,
"step": 103500
},
{
"epoch": 0.34,
"learning_rate": 3.285267868965335e-05,
"loss": 2.8014,
"step": 103600
},
{
"epoch": 0.34,
"learning_rate": 3.283612722120707e-05,
"loss": 2.7956,
"step": 103700
},
{
"epoch": 0.34,
"learning_rate": 3.281957575276078e-05,
"loss": 2.7915,
"step": 103800
},
{
"epoch": 0.34,
"learning_rate": 3.2803024284314506e-05,
"loss": 2.783,
"step": 103900
},
{
"epoch": 0.34,
"learning_rate": 3.2786472815868225e-05,
"loss": 2.792,
"step": 104000
},
{
"epoch": 0.34,
"learning_rate": 3.2769921347421944e-05,
"loss": 2.7838,
"step": 104100
},
{
"epoch": 0.34,
"learning_rate": 3.275336987897566e-05,
"loss": 2.7865,
"step": 104200
},
{
"epoch": 0.35,
"learning_rate": 3.273681841052938e-05,
"loss": 2.7995,
"step": 104300
},
{
"epoch": 0.35,
"learning_rate": 3.272026694208311e-05,
"loss": 2.7825,
"step": 104400
},
{
"epoch": 0.35,
"learning_rate": 3.270371547363682e-05,
"loss": 2.7923,
"step": 104500
},
{
"epoch": 0.35,
"learning_rate": 3.268716400519054e-05,
"loss": 2.7852,
"step": 104600
},
{
"epoch": 0.35,
"learning_rate": 3.267061253674426e-05,
"loss": 2.7987,
"step": 104700
},
{
"epoch": 0.35,
"learning_rate": 3.2654061068297984e-05,
"loss": 2.7958,
"step": 104800
},
{
"epoch": 0.35,
"learning_rate": 3.26375095998517e-05,
"loss": 2.7904,
"step": 104900
},
{
"epoch": 0.35,
"learning_rate": 3.262095813140542e-05,
"loss": 2.8019,
"step": 105000
},
{
"epoch": 0.35,
"learning_rate": 3.260440666295914e-05,
"loss": 2.7867,
"step": 105100
},
{
"epoch": 0.35,
"learning_rate": 3.258785519451285e-05,
"loss": 2.786,
"step": 105200
},
{
"epoch": 0.35,
"learning_rate": 3.257130372606658e-05,
"loss": 2.7963,
"step": 105300
},
{
"epoch": 0.35,
"learning_rate": 3.25547522576203e-05,
"loss": 2.7889,
"step": 105400
},
{
"epoch": 0.35,
"learning_rate": 3.253820078917402e-05,
"loss": 2.7809,
"step": 105500
},
{
"epoch": 0.35,
"learning_rate": 3.2521649320727736e-05,
"loss": 2.7993,
"step": 105600
},
{
"epoch": 0.35,
"learning_rate": 3.2505097852281455e-05,
"loss": 2.778,
"step": 105700
},
{
"epoch": 0.35,
"learning_rate": 3.248854638383518e-05,
"loss": 2.7981,
"step": 105800
},
{
"epoch": 0.35,
"learning_rate": 3.247199491538889e-05,
"loss": 2.796,
"step": 105900
},
{
"epoch": 0.35,
"learning_rate": 3.245544344694261e-05,
"loss": 2.7921,
"step": 106000
},
{
"epoch": 0.35,
"learning_rate": 3.243889197849633e-05,
"loss": 2.8061,
"step": 106100
},
{
"epoch": 0.35,
"learning_rate": 3.2422340510050056e-05,
"loss": 2.7927,
"step": 106200
},
{
"epoch": 0.35,
"learning_rate": 3.2405789041603775e-05,
"loss": 2.7969,
"step": 106300
},
{
"epoch": 0.35,
"learning_rate": 3.2389237573157494e-05,
"loss": 2.7894,
"step": 106400
},
{
"epoch": 0.35,
"learning_rate": 3.2372686104711213e-05,
"loss": 2.798,
"step": 106500
},
{
"epoch": 0.35,
"learning_rate": 3.2356134636264926e-05,
"loss": 2.7968,
"step": 106600
},
{
"epoch": 0.35,
"learning_rate": 3.233958316781865e-05,
"loss": 2.796,
"step": 106700
},
{
"epoch": 0.35,
"learning_rate": 3.232303169937237e-05,
"loss": 2.7892,
"step": 106800
},
{
"epoch": 0.35,
"learning_rate": 3.230648023092609e-05,
"loss": 2.801,
"step": 106900
},
{
"epoch": 0.35,
"learning_rate": 3.228992876247981e-05,
"loss": 2.7956,
"step": 107000
},
{
"epoch": 0.35,
"learning_rate": 3.227337729403353e-05,
"loss": 2.7937,
"step": 107100
},
{
"epoch": 0.35,
"learning_rate": 3.225682582558725e-05,
"loss": 2.799,
"step": 107200
},
{
"epoch": 0.36,
"learning_rate": 3.2240274357140965e-05,
"loss": 2.7903,
"step": 107300
},
{
"epoch": 0.36,
"learning_rate": 3.2223722888694684e-05,
"loss": 2.7842,
"step": 107400
},
{
"epoch": 0.36,
"learning_rate": 3.22071714202484e-05,
"loss": 2.8027,
"step": 107500
},
{
"epoch": 0.36,
"learning_rate": 3.219061995180213e-05,
"loss": 2.8145,
"step": 107600
},
{
"epoch": 0.36,
"learning_rate": 3.217406848335585e-05,
"loss": 2.794,
"step": 107700
},
{
"epoch": 0.36,
"learning_rate": 3.215751701490957e-05,
"loss": 2.7794,
"step": 107800
},
{
"epoch": 0.36,
"learning_rate": 3.2140965546463286e-05,
"loss": 2.7773,
"step": 107900
},
{
"epoch": 0.36,
"learning_rate": 3.2124414078017e-05,
"loss": 2.7884,
"step": 108000
},
{
"epoch": 0.36,
"learning_rate": 3.2107862609570724e-05,
"loss": 2.7808,
"step": 108100
},
{
"epoch": 0.36,
"learning_rate": 3.209131114112444e-05,
"loss": 2.8017,
"step": 108200
},
{
"epoch": 0.36,
"learning_rate": 3.207475967267816e-05,
"loss": 2.805,
"step": 108300
},
{
"epoch": 0.36,
"learning_rate": 3.205820820423188e-05,
"loss": 2.7912,
"step": 108400
},
{
"epoch": 0.36,
"learning_rate": 3.20416567357856e-05,
"loss": 2.792,
"step": 108500
},
{
"epoch": 0.36,
"learning_rate": 3.2025105267339326e-05,
"loss": 2.7913,
"step": 108600
},
{
"epoch": 0.36,
"learning_rate": 3.200855379889304e-05,
"loss": 2.7977,
"step": 108700
},
{
"epoch": 0.36,
"learning_rate": 3.199200233044676e-05,
"loss": 2.8004,
"step": 108800
},
{
"epoch": 0.36,
"learning_rate": 3.1975450862000476e-05,
"loss": 2.7981,
"step": 108900
},
{
"epoch": 0.36,
"learning_rate": 3.1958899393554195e-05,
"loss": 2.7803,
"step": 109000
},
{
"epoch": 0.36,
"learning_rate": 3.194234792510792e-05,
"loss": 2.7979,
"step": 109100
},
{
"epoch": 0.36,
"learning_rate": 3.192579645666164e-05,
"loss": 2.7885,
"step": 109200
},
{
"epoch": 0.36,
"learning_rate": 3.190924498821535e-05,
"loss": 2.8018,
"step": 109300
},
{
"epoch": 0.36,
"learning_rate": 3.189269351976907e-05,
"loss": 2.8063,
"step": 109400
},
{
"epoch": 0.36,
"learning_rate": 3.1876142051322796e-05,
"loss": 2.804,
"step": 109500
},
{
"epoch": 0.36,
"learning_rate": 3.1859590582876515e-05,
"loss": 2.799,
"step": 109600
},
{
"epoch": 0.36,
"learning_rate": 3.1843039114430234e-05,
"loss": 2.7842,
"step": 109700
},
{
"epoch": 0.36,
"learning_rate": 3.182648764598395e-05,
"loss": 2.7896,
"step": 109800
},
{
"epoch": 0.36,
"learning_rate": 3.180993617753767e-05,
"loss": 2.7947,
"step": 109900
},
{
"epoch": 0.36,
"learning_rate": 3.179338470909139e-05,
"loss": 2.7942,
"step": 110000
},
{
"epoch": 0.36,
"learning_rate": 3.177683324064511e-05,
"loss": 2.7778,
"step": 110100
},
{
"epoch": 0.36,
"learning_rate": 3.176028177219883e-05,
"loss": 2.7983,
"step": 110200
},
{
"epoch": 0.37,
"learning_rate": 3.174373030375255e-05,
"loss": 2.7934,
"step": 110300
},
{
"epoch": 0.37,
"learning_rate": 3.172717883530627e-05,
"loss": 2.7849,
"step": 110400
},
{
"epoch": 0.37,
"learning_rate": 3.171062736685999e-05,
"loss": 2.7911,
"step": 110500
},
{
"epoch": 0.37,
"learning_rate": 3.169407589841371e-05,
"loss": 2.7987,
"step": 110600
},
{
"epoch": 0.37,
"learning_rate": 3.1677524429967424e-05,
"loss": 2.782,
"step": 110700
},
{
"epoch": 0.37,
"learning_rate": 3.166097296152114e-05,
"loss": 2.7827,
"step": 110800
},
{
"epoch": 0.37,
"learning_rate": 3.164442149307487e-05,
"loss": 2.7702,
"step": 110900
},
{
"epoch": 0.37,
"learning_rate": 3.162787002462859e-05,
"loss": 2.7883,
"step": 111000
},
{
"epoch": 0.37,
"learning_rate": 3.161131855618231e-05,
"loss": 2.7813,
"step": 111100
},
{
"epoch": 0.37,
"learning_rate": 3.1594767087736026e-05,
"loss": 2.788,
"step": 111200
},
{
"epoch": 0.37,
"learning_rate": 3.1578215619289745e-05,
"loss": 2.7826,
"step": 111300
},
{
"epoch": 0.37,
"learning_rate": 3.1561664150843464e-05,
"loss": 2.7912,
"step": 111400
},
{
"epoch": 0.37,
"learning_rate": 3.154511268239718e-05,
"loss": 2.776,
"step": 111500
},
{
"epoch": 0.37,
"learning_rate": 3.15285612139509e-05,
"loss": 2.7785,
"step": 111600
},
{
"epoch": 0.37,
"learning_rate": 3.151200974550462e-05,
"loss": 2.7956,
"step": 111700
},
{
"epoch": 0.37,
"learning_rate": 3.149545827705834e-05,
"loss": 2.7874,
"step": 111800
},
{
"epoch": 0.37,
"learning_rate": 3.1478906808612066e-05,
"loss": 2.792,
"step": 111900
},
{
"epoch": 0.37,
"learning_rate": 3.1462355340165785e-05,
"loss": 2.798,
"step": 112000
},
{
"epoch": 0.37,
"learning_rate": 3.14458038717195e-05,
"loss": 2.7935,
"step": 112100
},
{
"epoch": 0.37,
"learning_rate": 3.1429252403273216e-05,
"loss": 2.7692,
"step": 112200
},
{
"epoch": 0.37,
"learning_rate": 3.141270093482694e-05,
"loss": 2.7896,
"step": 112300
},
{
"epoch": 0.37,
"learning_rate": 3.139614946638066e-05,
"loss": 2.794,
"step": 112400
},
{
"epoch": 0.37,
"learning_rate": 3.137959799793438e-05,
"loss": 2.7887,
"step": 112500
},
{
"epoch": 0.37,
"learning_rate": 3.13630465294881e-05,
"loss": 2.785,
"step": 112600
},
{
"epoch": 0.37,
"learning_rate": 3.134649506104182e-05,
"loss": 2.7814,
"step": 112700
},
{
"epoch": 0.37,
"learning_rate": 3.1329943592595536e-05,
"loss": 2.7795,
"step": 112800
},
{
"epoch": 0.37,
"learning_rate": 3.1313392124149255e-05,
"loss": 2.7815,
"step": 112900
},
{
"epoch": 0.37,
"learning_rate": 3.1296840655702974e-05,
"loss": 2.7926,
"step": 113000
},
{
"epoch": 0.37,
"learning_rate": 3.128028918725669e-05,
"loss": 2.7787,
"step": 113100
},
{
"epoch": 0.37,
"learning_rate": 3.126373771881041e-05,
"loss": 2.7863,
"step": 113200
},
{
"epoch": 0.38,
"learning_rate": 3.124718625036414e-05,
"loss": 2.7841,
"step": 113300
},
{
"epoch": 0.38,
"learning_rate": 3.123063478191786e-05,
"loss": 2.7931,
"step": 113400
},
{
"epoch": 0.38,
"learning_rate": 3.121408331347157e-05,
"loss": 2.7927,
"step": 113500
},
{
"epoch": 0.38,
"learning_rate": 3.119753184502529e-05,
"loss": 2.7846,
"step": 113600
},
{
"epoch": 0.38,
"learning_rate": 3.1180980376579014e-05,
"loss": 2.7902,
"step": 113700
},
{
"epoch": 0.38,
"learning_rate": 3.116442890813273e-05,
"loss": 2.789,
"step": 113800
},
{
"epoch": 0.38,
"learning_rate": 3.114787743968645e-05,
"loss": 2.7942,
"step": 113900
},
{
"epoch": 0.38,
"learning_rate": 3.113132597124017e-05,
"loss": 2.7986,
"step": 114000
},
{
"epoch": 0.38,
"learning_rate": 3.111477450279389e-05,
"loss": 2.7819,
"step": 114100
},
{
"epoch": 0.38,
"learning_rate": 3.109822303434761e-05,
"loss": 2.7942,
"step": 114200
},
{
"epoch": 0.38,
"learning_rate": 3.108167156590133e-05,
"loss": 2.7762,
"step": 114300
},
{
"epoch": 0.38,
"learning_rate": 3.106512009745505e-05,
"loss": 2.774,
"step": 114400
},
{
"epoch": 0.38,
"learning_rate": 3.1048568629008766e-05,
"loss": 2.7817,
"step": 114500
},
{
"epoch": 0.38,
"learning_rate": 3.1032017160562485e-05,
"loss": 2.7936,
"step": 114600
},
{
"epoch": 0.38,
"learning_rate": 3.101546569211621e-05,
"loss": 2.7957,
"step": 114700
},
{
"epoch": 0.38,
"learning_rate": 3.099891422366993e-05,
"loss": 2.7702,
"step": 114800
},
{
"epoch": 0.38,
"learning_rate": 3.098236275522364e-05,
"loss": 2.7816,
"step": 114900
},
{
"epoch": 0.38,
"learning_rate": 3.096581128677736e-05,
"loss": 2.7776,
"step": 115000
},
{
"epoch": 0.38,
"learning_rate": 3.0949259818331087e-05,
"loss": 2.7865,
"step": 115100
},
{
"epoch": 0.38,
"learning_rate": 3.0932708349884806e-05,
"loss": 2.794,
"step": 115200
},
{
"epoch": 0.38,
"learning_rate": 3.0916156881438525e-05,
"loss": 2.7873,
"step": 115300
},
{
"epoch": 0.38,
"learning_rate": 3.0899605412992244e-05,
"loss": 2.7894,
"step": 115400
},
{
"epoch": 0.38,
"learning_rate": 3.088305394454596e-05,
"loss": 2.7833,
"step": 115500
},
{
"epoch": 0.38,
"learning_rate": 3.086650247609968e-05,
"loss": 2.7832,
"step": 115600
},
{
"epoch": 0.38,
"learning_rate": 3.08499510076534e-05,
"loss": 2.7865,
"step": 115700
},
{
"epoch": 0.38,
"learning_rate": 3.083339953920712e-05,
"loss": 2.7856,
"step": 115800
},
{
"epoch": 0.38,
"learning_rate": 3.081684807076084e-05,
"loss": 2.7897,
"step": 115900
},
{
"epoch": 0.38,
"learning_rate": 3.080029660231456e-05,
"loss": 2.7917,
"step": 116000
},
{
"epoch": 0.38,
"learning_rate": 3.078374513386828e-05,
"loss": 2.788,
"step": 116100
},
{
"epoch": 0.38,
"learning_rate": 3.0767193665421995e-05,
"loss": 2.7895,
"step": 116200
},
{
"epoch": 0.38,
"learning_rate": 3.0750642196975714e-05,
"loss": 2.7844,
"step": 116300
},
{
"epoch": 0.39,
"learning_rate": 3.073409072852943e-05,
"loss": 2.7912,
"step": 116400
},
{
"epoch": 0.39,
"learning_rate": 3.071753926008316e-05,
"loss": 2.7877,
"step": 116500
},
{
"epoch": 0.39,
"learning_rate": 3.070098779163688e-05,
"loss": 2.7909,
"step": 116600
},
{
"epoch": 0.39,
"learning_rate": 3.06844363231906e-05,
"loss": 2.7926,
"step": 116700
},
{
"epoch": 0.39,
"learning_rate": 3.0667884854744316e-05,
"loss": 2.7806,
"step": 116800
},
{
"epoch": 0.39,
"learning_rate": 3.065133338629803e-05,
"loss": 2.7841,
"step": 116900
},
{
"epoch": 0.39,
"learning_rate": 3.0634781917851754e-05,
"loss": 2.787,
"step": 117000
},
{
"epoch": 0.39,
"learning_rate": 3.061823044940547e-05,
"loss": 2.7789,
"step": 117100
},
{
"epoch": 0.39,
"learning_rate": 3.060167898095919e-05,
"loss": 2.7702,
"step": 117200
},
{
"epoch": 0.39,
"learning_rate": 3.058512751251291e-05,
"loss": 2.7936,
"step": 117300
},
{
"epoch": 0.39,
"learning_rate": 3.056857604406663e-05,
"loss": 2.7896,
"step": 117400
},
{
"epoch": 0.39,
"learning_rate": 3.0552024575620356e-05,
"loss": 2.7848,
"step": 117500
},
{
"epoch": 0.39,
"learning_rate": 3.053547310717407e-05,
"loss": 2.7803,
"step": 117600
},
{
"epoch": 0.39,
"learning_rate": 3.051892163872779e-05,
"loss": 2.7919,
"step": 117700
},
{
"epoch": 0.39,
"learning_rate": 3.050237017028151e-05,
"loss": 2.7847,
"step": 117800
},
{
"epoch": 0.39,
"learning_rate": 3.0485818701835228e-05,
"loss": 2.7884,
"step": 117900
},
{
"epoch": 0.39,
"learning_rate": 3.0469267233388947e-05,
"loss": 2.8017,
"step": 118000
},
{
"epoch": 0.39,
"learning_rate": 3.045271576494267e-05,
"loss": 2.7902,
"step": 118100
},
{
"epoch": 0.39,
"learning_rate": 3.043616429649639e-05,
"loss": 2.7662,
"step": 118200
},
{
"epoch": 0.39,
"learning_rate": 3.0419612828050104e-05,
"loss": 2.7879,
"step": 118300
},
{
"epoch": 0.39,
"learning_rate": 3.0403061359603823e-05,
"loss": 2.7693,
"step": 118400
},
{
"epoch": 0.39,
"learning_rate": 3.0386509891157546e-05,
"loss": 2.781,
"step": 118500
},
{
"epoch": 0.39,
"learning_rate": 3.0369958422711264e-05,
"loss": 2.7978,
"step": 118600
},
{
"epoch": 0.39,
"learning_rate": 3.0353406954264983e-05,
"loss": 2.7619,
"step": 118700
},
{
"epoch": 0.39,
"learning_rate": 3.0336855485818706e-05,
"loss": 2.7885,
"step": 118800
},
{
"epoch": 0.39,
"learning_rate": 3.0320304017372425e-05,
"loss": 2.7805,
"step": 118900
},
{
"epoch": 0.39,
"learning_rate": 3.030375254892614e-05,
"loss": 2.7908,
"step": 119000
},
{
"epoch": 0.39,
"learning_rate": 3.028720108047986e-05,
"loss": 2.7909,
"step": 119100
},
{
"epoch": 0.39,
"learning_rate": 3.0270649612033582e-05,
"loss": 2.7909,
"step": 119200
},
{
"epoch": 0.39,
"learning_rate": 3.02540981435873e-05,
"loss": 2.7831,
"step": 119300
},
{
"epoch": 0.4,
"learning_rate": 3.023754667514102e-05,
"loss": 2.7889,
"step": 119400
},
{
"epoch": 0.4,
"learning_rate": 3.0220995206694742e-05,
"loss": 2.7773,
"step": 119500
},
{
"epoch": 0.4,
"learning_rate": 3.020444373824846e-05,
"loss": 2.8016,
"step": 119600
},
{
"epoch": 0.4,
"learning_rate": 3.0187892269802177e-05,
"loss": 2.7952,
"step": 119700
},
{
"epoch": 0.4,
"learning_rate": 3.0171340801355896e-05,
"loss": 2.7855,
"step": 119800
},
{
"epoch": 0.4,
"learning_rate": 3.0154789332909618e-05,
"loss": 2.7886,
"step": 119900
},
{
"epoch": 0.4,
"learning_rate": 3.0138237864463337e-05,
"loss": 2.7865,
"step": 120000
},
{
"epoch": 0.4,
"learning_rate": 3.0121686396017056e-05,
"loss": 2.7916,
"step": 120100
},
{
"epoch": 0.4,
"learning_rate": 3.010513492757078e-05,
"loss": 2.7964,
"step": 120200
},
{
"epoch": 0.4,
"learning_rate": 3.0088583459124497e-05,
"loss": 2.795,
"step": 120300
},
{
"epoch": 0.4,
"learning_rate": 3.0072031990678213e-05,
"loss": 2.7841,
"step": 120400
},
{
"epoch": 0.4,
"learning_rate": 3.0055480522231932e-05,
"loss": 2.7781,
"step": 120500
},
{
"epoch": 0.4,
"learning_rate": 3.0038929053785654e-05,
"loss": 2.8013,
"step": 120600
},
{
"epoch": 0.4,
"learning_rate": 3.0022377585339373e-05,
"loss": 2.7758,
"step": 120700
},
{
"epoch": 0.4,
"learning_rate": 3.0005826116893092e-05,
"loss": 2.7948,
"step": 120800
},
{
"epoch": 0.4,
"learning_rate": 2.9989274648446815e-05,
"loss": 2.7904,
"step": 120900
},
{
"epoch": 0.4,
"learning_rate": 2.9972723180000534e-05,
"loss": 2.7814,
"step": 121000
},
{
"epoch": 0.4,
"learning_rate": 2.995617171155425e-05,
"loss": 2.7874,
"step": 121100
},
{
"epoch": 0.4,
"learning_rate": 2.9939620243107968e-05,
"loss": 2.7753,
"step": 121200
},
{
"epoch": 0.4,
"learning_rate": 2.992306877466169e-05,
"loss": 2.7826,
"step": 121300
},
{
"epoch": 0.4,
"learning_rate": 2.990651730621541e-05,
"loss": 2.7654,
"step": 121400
},
{
"epoch": 0.4,
"learning_rate": 2.988996583776913e-05,
"loss": 2.789,
"step": 121500
},
{
"epoch": 0.4,
"learning_rate": 2.987341436932285e-05,
"loss": 2.7849,
"step": 121600
},
{
"epoch": 0.4,
"learning_rate": 2.985686290087657e-05,
"loss": 2.7842,
"step": 121700
},
{
"epoch": 0.4,
"learning_rate": 2.9840311432430285e-05,
"loss": 2.795,
"step": 121800
},
{
"epoch": 0.4,
"learning_rate": 2.9823759963984004e-05,
"loss": 2.7939,
"step": 121900
},
{
"epoch": 0.4,
"learning_rate": 2.9807208495537723e-05,
"loss": 2.7912,
"step": 122000
},
{
"epoch": 0.4,
"learning_rate": 2.9790657027091446e-05,
"loss": 2.7731,
"step": 122100
},
{
"epoch": 0.4,
"learning_rate": 2.9774105558645165e-05,
"loss": 2.7808,
"step": 122200
},
{
"epoch": 0.4,
"learning_rate": 2.9757554090198887e-05,
"loss": 2.7876,
"step": 122300
},
{
"epoch": 0.41,
"learning_rate": 2.97410026217526e-05,
"loss": 2.7896,
"step": 122400
},
{
"epoch": 0.41,
"learning_rate": 2.9724451153306322e-05,
"loss": 2.7935,
"step": 122500
},
{
"epoch": 0.41,
"learning_rate": 2.970789968486004e-05,
"loss": 2.7737,
"step": 122600
},
{
"epoch": 0.41,
"learning_rate": 2.969134821641376e-05,
"loss": 2.7898,
"step": 122700
},
{
"epoch": 0.41,
"learning_rate": 2.9674796747967482e-05,
"loss": 2.7938,
"step": 122800
},
{
"epoch": 0.41,
"learning_rate": 2.96582452795212e-05,
"loss": 2.7695,
"step": 122900
},
{
"epoch": 0.41,
"learning_rate": 2.9641693811074923e-05,
"loss": 2.7848,
"step": 123000
},
{
"epoch": 0.41,
"learning_rate": 2.9625142342628636e-05,
"loss": 2.7643,
"step": 123100
},
{
"epoch": 0.41,
"learning_rate": 2.9608590874182358e-05,
"loss": 2.7844,
"step": 123200
},
{
"epoch": 0.41,
"learning_rate": 2.9592039405736077e-05,
"loss": 2.7749,
"step": 123300
},
{
"epoch": 0.41,
"learning_rate": 2.9575487937289796e-05,
"loss": 2.8042,
"step": 123400
},
{
"epoch": 0.41,
"learning_rate": 2.955893646884352e-05,
"loss": 2.7863,
"step": 123500
},
{
"epoch": 0.41,
"learning_rate": 2.9542385000397237e-05,
"loss": 2.7736,
"step": 123600
},
{
"epoch": 0.41,
"learning_rate": 2.952583353195096e-05,
"loss": 2.7848,
"step": 123700
},
{
"epoch": 0.41,
"learning_rate": 2.9509282063504672e-05,
"loss": 2.7822,
"step": 123800
},
{
"epoch": 0.41,
"learning_rate": 2.9492730595058394e-05,
"loss": 2.7827,
"step": 123900
},
{
"epoch": 0.41,
"learning_rate": 2.9476179126612113e-05,
"loss": 2.7772,
"step": 124000
},
{
"epoch": 0.41,
"learning_rate": 2.9459627658165832e-05,
"loss": 2.7733,
"step": 124100
},
{
"epoch": 0.41,
"learning_rate": 2.9443076189719555e-05,
"loss": 2.7775,
"step": 124200
},
{
"epoch": 0.41,
"learning_rate": 2.9426524721273274e-05,
"loss": 2.7931,
"step": 124300
},
{
"epoch": 0.41,
"learning_rate": 2.9409973252826996e-05,
"loss": 2.7846,
"step": 124400
},
{
"epoch": 0.41,
"learning_rate": 2.9393421784380708e-05,
"loss": 2.7969,
"step": 124500
},
{
"epoch": 0.41,
"learning_rate": 2.937687031593443e-05,
"loss": 2.7884,
"step": 124600
},
{
"epoch": 0.41,
"learning_rate": 2.936031884748815e-05,
"loss": 2.783,
"step": 124700
},
{
"epoch": 0.41,
"learning_rate": 2.934376737904187e-05,
"loss": 2.7943,
"step": 124800
},
{
"epoch": 0.41,
"learning_rate": 2.932721591059559e-05,
"loss": 2.7843,
"step": 124900
},
{
"epoch": 0.41,
"learning_rate": 2.931066444214931e-05,
"loss": 2.7877,
"step": 125000
},
{
"epoch": 0.41,
"learning_rate": 2.9294112973703032e-05,
"loss": 2.7661,
"step": 125100
},
{
"epoch": 0.41,
"learning_rate": 2.9277561505256744e-05,
"loss": 2.791,
"step": 125200
},
{
"epoch": 0.41,
"learning_rate": 2.9261010036810467e-05,
"loss": 2.7752,
"step": 125300
},
{
"epoch": 0.42,
"learning_rate": 2.9244458568364186e-05,
"loss": 2.7788,
"step": 125400
},
{
"epoch": 0.42,
"learning_rate": 2.9227907099917905e-05,
"loss": 2.7977,
"step": 125500
},
{
"epoch": 0.42,
"learning_rate": 2.9211355631471627e-05,
"loss": 2.7815,
"step": 125600
},
{
"epoch": 0.42,
"learning_rate": 2.9194804163025346e-05,
"loss": 2.7868,
"step": 125700
},
{
"epoch": 0.42,
"learning_rate": 2.917825269457907e-05,
"loss": 2.7939,
"step": 125800
},
{
"epoch": 0.42,
"learning_rate": 2.916170122613278e-05,
"loss": 2.7874,
"step": 125900
},
{
"epoch": 0.42,
"learning_rate": 2.9145149757686503e-05,
"loss": 2.7769,
"step": 126000
},
{
"epoch": 0.42,
"learning_rate": 2.9128598289240222e-05,
"loss": 2.7846,
"step": 126100
},
{
"epoch": 0.42,
"learning_rate": 2.911204682079394e-05,
"loss": 2.7854,
"step": 126200
},
{
"epoch": 0.42,
"learning_rate": 2.9095495352347663e-05,
"loss": 2.7891,
"step": 126300
},
{
"epoch": 0.42,
"learning_rate": 2.9078943883901382e-05,
"loss": 2.7594,
"step": 126400
},
{
"epoch": 0.42,
"learning_rate": 2.90623924154551e-05,
"loss": 2.7707,
"step": 126500
},
{
"epoch": 0.42,
"learning_rate": 2.9045840947008817e-05,
"loss": 2.8029,
"step": 126600
},
{
"epoch": 0.42,
"learning_rate": 2.902928947856254e-05,
"loss": 2.7806,
"step": 126700
},
{
"epoch": 0.42,
"learning_rate": 2.901273801011626e-05,
"loss": 2.8062,
"step": 126800
},
{
"epoch": 0.42,
"learning_rate": 2.8996186541669977e-05,
"loss": 2.7994,
"step": 126900
},
{
"epoch": 0.42,
"learning_rate": 2.89796350732237e-05,
"loss": 2.8045,
"step": 127000
},
{
"epoch": 0.42,
"learning_rate": 2.896308360477742e-05,
"loss": 2.7918,
"step": 127100
},
{
"epoch": 0.42,
"learning_rate": 2.8946532136331138e-05,
"loss": 2.7762,
"step": 127200
},
{
"epoch": 0.42,
"learning_rate": 2.8929980667884853e-05,
"loss": 2.7756,
"step": 127300
},
{
"epoch": 0.42,
"learning_rate": 2.8913429199438576e-05,
"loss": 2.7933,
"step": 127400
},
{
"epoch": 0.42,
"learning_rate": 2.8896877730992295e-05,
"loss": 2.7812,
"step": 127500
},
{
"epoch": 0.42,
"learning_rate": 2.8880326262546014e-05,
"loss": 2.762,
"step": 127600
},
{
"epoch": 0.42,
"learning_rate": 2.8863774794099736e-05,
"loss": 2.7768,
"step": 127700
},
{
"epoch": 0.42,
"learning_rate": 2.8847223325653455e-05,
"loss": 2.778,
"step": 127800
},
{
"epoch": 0.42,
"learning_rate": 2.8830671857207174e-05,
"loss": 2.8011,
"step": 127900
},
{
"epoch": 0.42,
"learning_rate": 2.881412038876089e-05,
"loss": 2.7789,
"step": 128000
},
{
"epoch": 0.42,
"learning_rate": 2.8797568920314612e-05,
"loss": 2.7836,
"step": 128100
},
{
"epoch": 0.42,
"learning_rate": 2.878101745186833e-05,
"loss": 2.7829,
"step": 128200
},
{
"epoch": 0.42,
"learning_rate": 2.876446598342205e-05,
"loss": 2.7892,
"step": 128300
},
{
"epoch": 0.43,
"learning_rate": 2.8747914514975772e-05,
"loss": 2.7763,
"step": 128400
},
{
"epoch": 0.43,
"learning_rate": 2.873136304652949e-05,
"loss": 2.7696,
"step": 128500
},
{
"epoch": 0.43,
"learning_rate": 2.871481157808321e-05,
"loss": 2.7819,
"step": 128600
},
{
"epoch": 0.43,
"learning_rate": 2.8698260109636926e-05,
"loss": 2.7802,
"step": 128700
},
{
"epoch": 0.43,
"learning_rate": 2.8681708641190648e-05,
"loss": 2.7636,
"step": 128800
},
{
"epoch": 0.43,
"learning_rate": 2.8665157172744367e-05,
"loss": 2.7766,
"step": 128900
},
{
"epoch": 0.43,
"learning_rate": 2.8648605704298086e-05,
"loss": 2.7798,
"step": 129000
},
{
"epoch": 0.43,
"learning_rate": 2.863205423585181e-05,
"loss": 2.7852,
"step": 129100
},
{
"epoch": 0.43,
"learning_rate": 2.8615502767405527e-05,
"loss": 2.7858,
"step": 129200
},
{
"epoch": 0.43,
"learning_rate": 2.8598951298959243e-05,
"loss": 2.7768,
"step": 129300
},
{
"epoch": 0.43,
"learning_rate": 2.8582399830512962e-05,
"loss": 2.7866,
"step": 129400
},
{
"epoch": 0.43,
"learning_rate": 2.8565848362066684e-05,
"loss": 2.7912,
"step": 129500
},
{
"epoch": 0.43,
"learning_rate": 2.8549296893620403e-05,
"loss": 2.7902,
"step": 129600
},
{
"epoch": 0.43,
"learning_rate": 2.8532745425174122e-05,
"loss": 2.8026,
"step": 129700
},
{
"epoch": 0.43,
"learning_rate": 2.8516193956727845e-05,
"loss": 2.7745,
"step": 129800
},
{
"epoch": 0.43,
"learning_rate": 2.8499642488281564e-05,
"loss": 2.7687,
"step": 129900
},
{
"epoch": 0.43,
"learning_rate": 2.848309101983528e-05,
"loss": 2.7818,
"step": 130000
},
{
"epoch": 0.43,
"learning_rate": 2.8466539551388998e-05,
"loss": 2.7829,
"step": 130100
},
{
"epoch": 0.43,
"learning_rate": 2.844998808294272e-05,
"loss": 2.7766,
"step": 130200
},
{
"epoch": 0.43,
"learning_rate": 2.843343661449644e-05,
"loss": 2.7889,
"step": 130300
},
{
"epoch": 0.43,
"learning_rate": 2.841688514605016e-05,
"loss": 2.7971,
"step": 130400
},
{
"epoch": 0.43,
"learning_rate": 2.840033367760388e-05,
"loss": 2.7856,
"step": 130500
},
{
"epoch": 0.43,
"learning_rate": 2.83837822091576e-05,
"loss": 2.7652,
"step": 130600
},
{
"epoch": 0.43,
"learning_rate": 2.8367230740711316e-05,
"loss": 2.7974,
"step": 130700
},
{
"epoch": 0.43,
"learning_rate": 2.8350679272265035e-05,
"loss": 2.7818,
"step": 130800
},
{
"epoch": 0.43,
"learning_rate": 2.8334127803818754e-05,
"loss": 2.7826,
"step": 130900
},
{
"epoch": 0.43,
"learning_rate": 2.8317576335372476e-05,
"loss": 2.7694,
"step": 131000
},
{
"epoch": 0.43,
"learning_rate": 2.8301024866926195e-05,
"loss": 2.7688,
"step": 131100
},
{
"epoch": 0.43,
"learning_rate": 2.8284473398479917e-05,
"loss": 2.7764,
"step": 131200
},
{
"epoch": 0.43,
"learning_rate": 2.8267921930033636e-05,
"loss": 2.7901,
"step": 131300
},
{
"epoch": 0.43,
"learning_rate": 2.8251370461587352e-05,
"loss": 2.7953,
"step": 131400
},
{
"epoch": 0.44,
"learning_rate": 2.823481899314107e-05,
"loss": 2.7874,
"step": 131500
},
{
"epoch": 0.44,
"learning_rate": 2.821826752469479e-05,
"loss": 2.7741,
"step": 131600
},
{
"epoch": 0.44,
"learning_rate": 2.8201716056248512e-05,
"loss": 2.7774,
"step": 131700
},
{
"epoch": 0.44,
"learning_rate": 2.818516458780223e-05,
"loss": 2.7807,
"step": 131800
},
{
"epoch": 0.44,
"learning_rate": 2.8168613119355954e-05,
"loss": 2.789,
"step": 131900
},
{
"epoch": 0.44,
"learning_rate": 2.8152061650909673e-05,
"loss": 2.7807,
"step": 132000
},
{
"epoch": 0.44,
"learning_rate": 2.8135510182463388e-05,
"loss": 2.7825,
"step": 132100
},
{
"epoch": 0.44,
"learning_rate": 2.8118958714017107e-05,
"loss": 2.7828,
"step": 132200
},
{
"epoch": 0.44,
"learning_rate": 2.8102407245570826e-05,
"loss": 2.782,
"step": 132300
},
{
"epoch": 0.44,
"learning_rate": 2.808585577712455e-05,
"loss": 2.7815,
"step": 132400
},
{
"epoch": 0.44,
"learning_rate": 2.8069304308678267e-05,
"loss": 2.7775,
"step": 132500
},
{
"epoch": 0.44,
"learning_rate": 2.805275284023199e-05,
"loss": 2.7914,
"step": 132600
},
{
"epoch": 0.44,
"learning_rate": 2.803620137178571e-05,
"loss": 2.78,
"step": 132700
},
{
"epoch": 0.44,
"learning_rate": 2.8019649903339424e-05,
"loss": 2.7863,
"step": 132800
},
{
"epoch": 0.44,
"learning_rate": 2.8003098434893143e-05,
"loss": 2.7995,
"step": 132900
},
{
"epoch": 0.44,
"learning_rate": 2.7986546966446862e-05,
"loss": 2.7783,
"step": 133000
},
{
"epoch": 0.44,
"learning_rate": 2.7969995498000585e-05,
"loss": 2.7738,
"step": 133100
},
{
"epoch": 0.44,
"learning_rate": 2.7953444029554304e-05,
"loss": 2.7817,
"step": 133200
},
{
"epoch": 0.44,
"learning_rate": 2.7936892561108026e-05,
"loss": 2.7834,
"step": 133300
},
{
"epoch": 0.44,
"learning_rate": 2.7920341092661745e-05,
"loss": 2.784,
"step": 133400
},
{
"epoch": 0.44,
"learning_rate": 2.790378962421546e-05,
"loss": 2.7928,
"step": 133500
},
{
"epoch": 0.44,
"learning_rate": 2.788723815576918e-05,
"loss": 2.7845,
"step": 133600
},
{
"epoch": 0.44,
"learning_rate": 2.78706866873229e-05,
"loss": 2.7788,
"step": 133700
},
{
"epoch": 0.44,
"learning_rate": 2.785413521887662e-05,
"loss": 2.7807,
"step": 133800
},
{
"epoch": 0.44,
"learning_rate": 2.783758375043034e-05,
"loss": 2.7874,
"step": 133900
},
{
"epoch": 0.44,
"learning_rate": 2.7821032281984062e-05,
"loss": 2.7794,
"step": 134000
},
{
"epoch": 0.44,
"learning_rate": 2.780448081353778e-05,
"loss": 2.7964,
"step": 134100
},
{
"epoch": 0.44,
"learning_rate": 2.7787929345091497e-05,
"loss": 2.7854,
"step": 134200
},
{
"epoch": 0.44,
"learning_rate": 2.7771377876645216e-05,
"loss": 2.7872,
"step": 134300
},
{
"epoch": 0.44,
"learning_rate": 2.7754826408198935e-05,
"loss": 2.7848,
"step": 134400
},
{
"epoch": 0.45,
"learning_rate": 2.7738274939752657e-05,
"loss": 2.7696,
"step": 134500
},
{
"epoch": 0.45,
"learning_rate": 2.7721723471306376e-05,
"loss": 2.7839,
"step": 134600
},
{
"epoch": 0.45,
"learning_rate": 2.77051720028601e-05,
"loss": 2.7744,
"step": 134700
},
{
"epoch": 0.45,
"learning_rate": 2.7688620534413818e-05,
"loss": 2.7838,
"step": 134800
},
{
"epoch": 0.45,
"learning_rate": 2.7672069065967533e-05,
"loss": 2.7841,
"step": 134900
},
{
"epoch": 0.45,
"learning_rate": 2.7655517597521252e-05,
"loss": 2.7769,
"step": 135000
},
{
"epoch": 0.45,
"learning_rate": 2.763896612907497e-05,
"loss": 2.7861,
"step": 135100
},
{
"epoch": 0.45,
"learning_rate": 2.7622414660628694e-05,
"loss": 2.7781,
"step": 135200
},
{
"epoch": 0.45,
"learning_rate": 2.7605863192182412e-05,
"loss": 2.7735,
"step": 135300
},
{
"epoch": 0.45,
"learning_rate": 2.758931172373613e-05,
"loss": 2.7855,
"step": 135400
},
{
"epoch": 0.45,
"learning_rate": 2.7572760255289854e-05,
"loss": 2.7908,
"step": 135500
},
{
"epoch": 0.45,
"learning_rate": 2.755620878684357e-05,
"loss": 2.7784,
"step": 135600
},
{
"epoch": 0.45,
"learning_rate": 2.753965731839729e-05,
"loss": 2.7769,
"step": 135700
},
{
"epoch": 0.45,
"learning_rate": 2.7523105849951007e-05,
"loss": 2.7862,
"step": 135800
},
{
"epoch": 0.45,
"learning_rate": 2.750655438150473e-05,
"loss": 2.7947,
"step": 135900
},
{
"epoch": 0.45,
"learning_rate": 2.749000291305845e-05,
"loss": 2.782,
"step": 136000
},
{
"epoch": 0.45,
"learning_rate": 2.7473451444612168e-05,
"loss": 2.7997,
"step": 136100
},
{
"epoch": 0.45,
"learning_rate": 2.7456899976165883e-05,
"loss": 2.7789,
"step": 136200
},
{
"epoch": 0.45,
"learning_rate": 2.7440348507719606e-05,
"loss": 2.7945,
"step": 136300
},
{
"epoch": 0.45,
"learning_rate": 2.7423797039273325e-05,
"loss": 2.7824,
"step": 136400
},
{
"epoch": 0.45,
"learning_rate": 2.7407245570827044e-05,
"loss": 2.7898,
"step": 136500
},
{
"epoch": 0.45,
"learning_rate": 2.7390694102380766e-05,
"loss": 2.7922,
"step": 136600
},
{
"epoch": 0.45,
"learning_rate": 2.7374142633934485e-05,
"loss": 2.7762,
"step": 136700
},
{
"epoch": 0.45,
"learning_rate": 2.7357591165488204e-05,
"loss": 2.7785,
"step": 136800
},
{
"epoch": 0.45,
"learning_rate": 2.734103969704192e-05,
"loss": 2.786,
"step": 136900
},
{
"epoch": 0.45,
"learning_rate": 2.7324488228595642e-05,
"loss": 2.7985,
"step": 137000
},
{
"epoch": 0.45,
"learning_rate": 2.730793676014936e-05,
"loss": 2.7882,
"step": 137100
},
{
"epoch": 0.45,
"learning_rate": 2.729138529170308e-05,
"loss": 2.7706,
"step": 137200
},
{
"epoch": 0.45,
"learning_rate": 2.7274833823256802e-05,
"loss": 2.7838,
"step": 137300
},
{
"epoch": 0.45,
"learning_rate": 2.725828235481052e-05,
"loss": 2.7664,
"step": 137400
},
{
"epoch": 0.46,
"learning_rate": 2.724173088636424e-05,
"loss": 2.7881,
"step": 137500
},
{
"epoch": 0.46,
"learning_rate": 2.7225179417917956e-05,
"loss": 2.787,
"step": 137600
},
{
"epoch": 0.46,
"learning_rate": 2.7208627949471678e-05,
"loss": 2.7782,
"step": 137700
},
{
"epoch": 0.46,
"learning_rate": 2.7192076481025397e-05,
"loss": 2.7863,
"step": 137800
},
{
"epoch": 0.46,
"learning_rate": 2.7175525012579116e-05,
"loss": 2.7757,
"step": 137900
},
{
"epoch": 0.46,
"learning_rate": 2.715897354413284e-05,
"loss": 2.7758,
"step": 138000
},
{
"epoch": 0.46,
"learning_rate": 2.7142422075686558e-05,
"loss": 2.7666,
"step": 138100
},
{
"epoch": 0.46,
"learning_rate": 2.7125870607240277e-05,
"loss": 2.7665,
"step": 138200
},
{
"epoch": 0.46,
"learning_rate": 2.7109319138793992e-05,
"loss": 2.7796,
"step": 138300
},
{
"epoch": 0.46,
"learning_rate": 2.7092767670347715e-05,
"loss": 2.7694,
"step": 138400
},
{
"epoch": 0.46,
"learning_rate": 2.7076216201901433e-05,
"loss": 2.7732,
"step": 138500
},
{
"epoch": 0.46,
"learning_rate": 2.7059664733455152e-05,
"loss": 2.7804,
"step": 138600
},
{
"epoch": 0.46,
"learning_rate": 2.7043113265008875e-05,
"loss": 2.7899,
"step": 138700
},
{
"epoch": 0.46,
"learning_rate": 2.7026561796562594e-05,
"loss": 2.7705,
"step": 138800
},
{
"epoch": 0.46,
"learning_rate": 2.7010010328116313e-05,
"loss": 2.7996,
"step": 138900
},
{
"epoch": 0.46,
"learning_rate": 2.699345885967003e-05,
"loss": 2.7725,
"step": 139000
},
{
"epoch": 0.46,
"learning_rate": 2.697690739122375e-05,
"loss": 2.783,
"step": 139100
},
{
"epoch": 0.46,
"learning_rate": 2.696035592277747e-05,
"loss": 2.7715,
"step": 139200
},
{
"epoch": 0.46,
"learning_rate": 2.694380445433119e-05,
"loss": 2.7758,
"step": 139300
},
{
"epoch": 0.46,
"learning_rate": 2.692725298588491e-05,
"loss": 2.7751,
"step": 139400
},
{
"epoch": 0.46,
"learning_rate": 2.691070151743863e-05,
"loss": 2.7897,
"step": 139500
},
{
"epoch": 0.46,
"learning_rate": 2.689415004899235e-05,
"loss": 2.7988,
"step": 139600
},
{
"epoch": 0.46,
"learning_rate": 2.6877598580546065e-05,
"loss": 2.7896,
"step": 139700
},
{
"epoch": 0.46,
"learning_rate": 2.6861047112099784e-05,
"loss": 2.7764,
"step": 139800
},
{
"epoch": 0.46,
"learning_rate": 2.6844495643653506e-05,
"loss": 2.7989,
"step": 139900
},
{
"epoch": 0.46,
"learning_rate": 2.6827944175207225e-05,
"loss": 2.7605,
"step": 140000
},
{
"epoch": 0.46,
"learning_rate": 2.6811392706760947e-05,
"loss": 2.7864,
"step": 140100
},
{
"epoch": 0.46,
"learning_rate": 2.6794841238314666e-05,
"loss": 2.7637,
"step": 140200
},
{
"epoch": 0.46,
"learning_rate": 2.6778289769868385e-05,
"loss": 2.777,
"step": 140300
},
{
"epoch": 0.46,
"learning_rate": 2.67617383014221e-05,
"loss": 2.7684,
"step": 140400
},
{
"epoch": 0.47,
"learning_rate": 2.674518683297582e-05,
"loss": 2.8049,
"step": 140500
},
{
"epoch": 0.47,
"learning_rate": 2.6728635364529542e-05,
"loss": 2.7747,
"step": 140600
},
{
"epoch": 0.47,
"learning_rate": 2.671208389608326e-05,
"loss": 2.7763,
"step": 140700
},
{
"epoch": 0.47,
"learning_rate": 2.6695532427636984e-05,
"loss": 2.7659,
"step": 140800
},
{
"epoch": 0.47,
"learning_rate": 2.6678980959190703e-05,
"loss": 2.7725,
"step": 140900
},
{
"epoch": 0.47,
"learning_rate": 2.666242949074442e-05,
"loss": 2.761,
"step": 141000
},
{
"epoch": 0.47,
"learning_rate": 2.6645878022298137e-05,
"loss": 2.7763,
"step": 141100
},
{
"epoch": 0.47,
"learning_rate": 2.6629326553851856e-05,
"loss": 2.7859,
"step": 141200
},
{
"epoch": 0.47,
"learning_rate": 2.661277508540558e-05,
"loss": 2.7826,
"step": 141300
},
{
"epoch": 0.47,
"learning_rate": 2.6596223616959298e-05,
"loss": 2.7764,
"step": 141400
},
{
"epoch": 0.47,
"learning_rate": 2.657967214851302e-05,
"loss": 2.789,
"step": 141500
},
{
"epoch": 0.47,
"learning_rate": 2.656312068006674e-05,
"loss": 2.7831,
"step": 141600
},
{
"epoch": 0.47,
"learning_rate": 2.6546569211620458e-05,
"loss": 2.7786,
"step": 141700
},
{
"epoch": 0.47,
"learning_rate": 2.6530017743174173e-05,
"loss": 2.7798,
"step": 141800
},
{
"epoch": 0.47,
"learning_rate": 2.6513466274727892e-05,
"loss": 2.7768,
"step": 141900
},
{
"epoch": 0.47,
"learning_rate": 2.6496914806281615e-05,
"loss": 2.7826,
"step": 142000
},
{
"epoch": 0.47,
"learning_rate": 2.6480363337835334e-05,
"loss": 2.7907,
"step": 142100
},
{
"epoch": 0.47,
"learning_rate": 2.6463811869389056e-05,
"loss": 2.7842,
"step": 142200
},
{
"epoch": 0.47,
"learning_rate": 2.6447260400942775e-05,
"loss": 2.7853,
"step": 142300
},
{
"epoch": 0.47,
"learning_rate": 2.643070893249649e-05,
"loss": 2.79,
"step": 142400
},
{
"epoch": 0.47,
"learning_rate": 2.641415746405021e-05,
"loss": 2.7784,
"step": 142500
},
{
"epoch": 0.47,
"learning_rate": 2.639760599560393e-05,
"loss": 2.7841,
"step": 142600
},
{
"epoch": 0.47,
"learning_rate": 2.638105452715765e-05,
"loss": 2.7839,
"step": 142700
},
{
"epoch": 0.47,
"learning_rate": 2.636450305871137e-05,
"loss": 2.7875,
"step": 142800
},
{
"epoch": 0.47,
"learning_rate": 2.6347951590265092e-05,
"loss": 2.7778,
"step": 142900
},
{
"epoch": 0.47,
"learning_rate": 2.633140012181881e-05,
"loss": 2.7759,
"step": 143000
},
{
"epoch": 0.47,
"learning_rate": 2.6314848653372527e-05,
"loss": 2.7884,
"step": 143100
},
{
"epoch": 0.47,
"learning_rate": 2.6298297184926246e-05,
"loss": 2.796,
"step": 143200
},
{
"epoch": 0.47,
"learning_rate": 2.6281745716479965e-05,
"loss": 2.7757,
"step": 143300
},
{
"epoch": 0.47,
"learning_rate": 2.6265194248033687e-05,
"loss": 2.7815,
"step": 143400
},
{
"epoch": 0.48,
"learning_rate": 2.6248642779587406e-05,
"loss": 2.781,
"step": 143500
},
{
"epoch": 0.48,
"learning_rate": 2.623209131114113e-05,
"loss": 2.7859,
"step": 143600
},
{
"epoch": 0.48,
"learning_rate": 2.6215539842694848e-05,
"loss": 2.7894,
"step": 143700
},
{
"epoch": 0.48,
"learning_rate": 2.6198988374248563e-05,
"loss": 2.7809,
"step": 143800
},
{
"epoch": 0.48,
"learning_rate": 2.6182436905802282e-05,
"loss": 2.7695,
"step": 143900
},
{
"epoch": 0.48,
"learning_rate": 2.6165885437356e-05,
"loss": 2.7796,
"step": 144000
},
{
"epoch": 0.48,
"learning_rate": 2.6149333968909724e-05,
"loss": 2.7594,
"step": 144100
},
{
"epoch": 0.48,
"learning_rate": 2.6132782500463443e-05,
"loss": 2.7758,
"step": 144200
},
{
"epoch": 0.48,
"learning_rate": 2.611623103201716e-05,
"loss": 2.7983,
"step": 144300
},
{
"epoch": 0.48,
"learning_rate": 2.6099679563570884e-05,
"loss": 2.7545,
"step": 144400
},
{
"epoch": 0.48,
"learning_rate": 2.60831280951246e-05,
"loss": 2.7757,
"step": 144500
},
{
"epoch": 0.48,
"learning_rate": 2.606657662667832e-05,
"loss": 2.7695,
"step": 144600
},
{
"epoch": 0.48,
"learning_rate": 2.6050025158232038e-05,
"loss": 2.7797,
"step": 144700
},
{
"epoch": 0.48,
"learning_rate": 2.603347368978576e-05,
"loss": 2.7871,
"step": 144800
},
{
"epoch": 0.48,
"learning_rate": 2.601692222133948e-05,
"loss": 2.776,
"step": 144900
},
{
"epoch": 0.48,
"learning_rate": 2.6000370752893198e-05,
"loss": 2.783,
"step": 145000
},
{
"epoch": 0.48,
"learning_rate": 2.598381928444692e-05,
"loss": 2.7894,
"step": 145100
},
{
"epoch": 0.48,
"learning_rate": 2.5967267816000636e-05,
"loss": 2.7742,
"step": 145200
},
{
"epoch": 0.48,
"learning_rate": 2.5950716347554355e-05,
"loss": 2.7708,
"step": 145300
},
{
"epoch": 0.48,
"learning_rate": 2.5934164879108074e-05,
"loss": 2.7786,
"step": 145400
},
{
"epoch": 0.48,
"learning_rate": 2.5917613410661796e-05,
"loss": 2.7765,
"step": 145500
},
{
"epoch": 0.48,
"learning_rate": 2.5901061942215515e-05,
"loss": 2.785,
"step": 145600
},
{
"epoch": 0.48,
"learning_rate": 2.5884510473769234e-05,
"loss": 2.7876,
"step": 145700
},
{
"epoch": 0.48,
"learning_rate": 2.5867959005322956e-05,
"loss": 2.7776,
"step": 145800
},
{
"epoch": 0.48,
"learning_rate": 2.5851407536876672e-05,
"loss": 2.7827,
"step": 145900
},
{
"epoch": 0.48,
"learning_rate": 2.583485606843039e-05,
"loss": 2.7813,
"step": 146000
},
{
"epoch": 0.48,
"learning_rate": 2.581830459998411e-05,
"loss": 2.7732,
"step": 146100
},
{
"epoch": 0.48,
"learning_rate": 2.5801753131537832e-05,
"loss": 2.7818,
"step": 146200
},
{
"epoch": 0.48,
"learning_rate": 2.578520166309155e-05,
"loss": 2.7636,
"step": 146300
},
{
"epoch": 0.48,
"learning_rate": 2.576865019464527e-05,
"loss": 2.7867,
"step": 146400
},
{
"epoch": 0.48,
"learning_rate": 2.5752098726198993e-05,
"loss": 2.7743,
"step": 146500
},
{
"epoch": 0.49,
"learning_rate": 2.573554725775271e-05,
"loss": 2.78,
"step": 146600
},
{
"epoch": 0.49,
"learning_rate": 2.5718995789306427e-05,
"loss": 2.7681,
"step": 146700
},
{
"epoch": 0.49,
"learning_rate": 2.5702444320860146e-05,
"loss": 2.7787,
"step": 146800
},
{
"epoch": 0.49,
"learning_rate": 2.568589285241387e-05,
"loss": 2.776,
"step": 146900
},
{
"epoch": 0.49,
"learning_rate": 2.5669341383967588e-05,
"loss": 2.7711,
"step": 147000
},
{
"epoch": 0.49,
"learning_rate": 2.5652789915521307e-05,
"loss": 2.7769,
"step": 147100
},
{
"epoch": 0.49,
"learning_rate": 2.563623844707503e-05,
"loss": 2.7751,
"step": 147200
},
{
"epoch": 0.49,
"learning_rate": 2.5619686978628745e-05,
"loss": 2.7804,
"step": 147300
},
{
"epoch": 0.49,
"learning_rate": 2.5603135510182464e-05,
"loss": 2.7816,
"step": 147400
},
{
"epoch": 0.49,
"learning_rate": 2.5586584041736183e-05,
"loss": 2.7874,
"step": 147500
},
{
"epoch": 0.49,
"learning_rate": 2.5570032573289905e-05,
"loss": 2.7858,
"step": 147600
},
{
"epoch": 0.49,
"learning_rate": 2.5553481104843624e-05,
"loss": 2.7698,
"step": 147700
},
{
"epoch": 0.49,
"learning_rate": 2.5536929636397343e-05,
"loss": 2.7745,
"step": 147800
},
{
"epoch": 0.49,
"learning_rate": 2.5520378167951065e-05,
"loss": 2.7773,
"step": 147900
},
{
"epoch": 0.49,
"learning_rate": 2.550382669950478e-05,
"loss": 2.7765,
"step": 148000
},
{
"epoch": 0.49,
"learning_rate": 2.54872752310585e-05,
"loss": 2.7765,
"step": 148100
},
{
"epoch": 0.49,
"learning_rate": 2.547072376261222e-05,
"loss": 2.7636,
"step": 148200
},
{
"epoch": 0.49,
"learning_rate": 2.545417229416594e-05,
"loss": 2.7628,
"step": 148300
},
{
"epoch": 0.49,
"learning_rate": 2.543762082571966e-05,
"loss": 2.7709,
"step": 148400
},
{
"epoch": 0.49,
"learning_rate": 2.542106935727338e-05,
"loss": 2.7823,
"step": 148500
},
{
"epoch": 0.49,
"learning_rate": 2.54045178888271e-05,
"loss": 2.7758,
"step": 148600
},
{
"epoch": 0.49,
"learning_rate": 2.5387966420380814e-05,
"loss": 2.7836,
"step": 148700
},
{
"epoch": 0.49,
"learning_rate": 2.5371414951934536e-05,
"loss": 2.7923,
"step": 148800
},
{
"epoch": 0.49,
"learning_rate": 2.5354863483488255e-05,
"loss": 2.7784,
"step": 148900
},
{
"epoch": 0.49,
"learning_rate": 2.5338312015041977e-05,
"loss": 2.7932,
"step": 149000
},
{
"epoch": 0.49,
"learning_rate": 2.5321760546595696e-05,
"loss": 2.7562,
"step": 149100
},
{
"epoch": 0.49,
"learning_rate": 2.5305209078149415e-05,
"loss": 2.7844,
"step": 149200
},
{
"epoch": 0.49,
"learning_rate": 2.528865760970313e-05,
"loss": 2.7901,
"step": 149300
},
{
"epoch": 0.49,
"learning_rate": 2.527210614125685e-05,
"loss": 2.7835,
"step": 149400
},
{
"epoch": 0.49,
"learning_rate": 2.5255554672810572e-05,
"loss": 2.7709,
"step": 149500
},
{
"epoch": 0.5,
"learning_rate": 2.523900320436429e-05,
"loss": 2.7873,
"step": 149600
},
{
"epoch": 0.5,
"learning_rate": 2.5222451735918014e-05,
"loss": 2.7812,
"step": 149700
},
{
"epoch": 0.5,
"learning_rate": 2.5205900267471733e-05,
"loss": 2.7799,
"step": 149800
},
{
"epoch": 0.5,
"learning_rate": 2.5189348799025452e-05,
"loss": 2.7729,
"step": 149900
},
{
"epoch": 0.5,
"learning_rate": 2.5172797330579167e-05,
"loss": 2.7721,
"step": 150000
},
{
"epoch": 0.5,
"learning_rate": 2.5156245862132886e-05,
"loss": 2.7846,
"step": 150100
},
{
"epoch": 0.5,
"learning_rate": 2.513969439368661e-05,
"loss": 2.7772,
"step": 150200
},
{
"epoch": 0.5,
"learning_rate": 2.5123142925240328e-05,
"loss": 2.7811,
"step": 150300
},
{
"epoch": 0.5,
"learning_rate": 2.510659145679405e-05,
"loss": 2.7659,
"step": 150400
},
{
"epoch": 0.5,
"learning_rate": 2.509003998834777e-05,
"loss": 2.7581,
"step": 150500
},
{
"epoch": 0.5,
"learning_rate": 2.5073488519901488e-05,
"loss": 2.7842,
"step": 150600
},
{
"epoch": 0.5,
"learning_rate": 2.5056937051455204e-05,
"loss": 2.7747,
"step": 150700
},
{
"epoch": 0.5,
"learning_rate": 2.5040385583008923e-05,
"loss": 2.7642,
"step": 150800
},
{
"epoch": 0.5,
"learning_rate": 2.5023834114562645e-05,
"loss": 2.7829,
"step": 150900
},
{
"epoch": 0.5,
"learning_rate": 2.5007282646116364e-05,
"loss": 2.7744,
"step": 151000
},
{
"epoch": 0.5,
"learning_rate": 2.4990731177670086e-05,
"loss": 2.7909,
"step": 151100
},
{
"epoch": 0.5,
"learning_rate": 2.4974179709223802e-05,
"loss": 2.788,
"step": 151200
},
{
"epoch": 0.5,
"learning_rate": 2.495762824077752e-05,
"loss": 2.7896,
"step": 151300
},
{
"epoch": 0.5,
"learning_rate": 2.4941076772331243e-05,
"loss": 2.7651,
"step": 151400
},
{
"epoch": 0.5,
"learning_rate": 2.4924525303884962e-05,
"loss": 2.7675,
"step": 151500
},
{
"epoch": 0.5,
"learning_rate": 2.490797383543868e-05,
"loss": 2.7791,
"step": 151600
},
{
"epoch": 0.5,
"learning_rate": 2.48914223669924e-05,
"loss": 2.764,
"step": 151700
},
{
"epoch": 0.5,
"learning_rate": 2.4874870898546123e-05,
"loss": 2.77,
"step": 151800
},
{
"epoch": 0.5,
"learning_rate": 2.4858319430099838e-05,
"loss": 2.7764,
"step": 151900
},
{
"epoch": 0.5,
"learning_rate": 2.4841767961653557e-05,
"loss": 2.7905,
"step": 152000
},
{
"epoch": 0.5,
"learning_rate": 2.482521649320728e-05,
"loss": 2.7796,
"step": 152100
},
{
"epoch": 0.5,
"learning_rate": 2.4808665024761e-05,
"loss": 2.7783,
"step": 152200
},
{
"epoch": 0.5,
"learning_rate": 2.4792113556314717e-05,
"loss": 2.7838,
"step": 152300
},
{
"epoch": 0.5,
"learning_rate": 2.4775562087868436e-05,
"loss": 2.7647,
"step": 152400
},
{
"epoch": 0.5,
"learning_rate": 2.475901061942216e-05,
"loss": 2.7721,
"step": 152500
},
{
"epoch": 0.51,
"learning_rate": 2.4742459150975874e-05,
"loss": 2.7786,
"step": 152600
},
{
"epoch": 0.51,
"learning_rate": 2.4725907682529593e-05,
"loss": 2.7852,
"step": 152700
},
{
"epoch": 0.51,
"learning_rate": 2.4709356214083316e-05,
"loss": 2.7804,
"step": 152800
},
{
"epoch": 0.51,
"learning_rate": 2.4692804745637035e-05,
"loss": 2.7809,
"step": 152900
},
{
"epoch": 0.51,
"learning_rate": 2.4676253277190754e-05,
"loss": 2.7754,
"step": 153000
},
{
"epoch": 0.51,
"learning_rate": 2.4659701808744473e-05,
"loss": 2.7772,
"step": 153100
},
{
"epoch": 0.51,
"learning_rate": 2.464315034029819e-05,
"loss": 2.7783,
"step": 153200
},
{
"epoch": 0.51,
"learning_rate": 2.462659887185191e-05,
"loss": 2.7704,
"step": 153300
},
{
"epoch": 0.51,
"learning_rate": 2.461004740340563e-05,
"loss": 2.7845,
"step": 153400
},
{
"epoch": 0.51,
"learning_rate": 2.4593495934959352e-05,
"loss": 2.7748,
"step": 153500
},
{
"epoch": 0.51,
"learning_rate": 2.457694446651307e-05,
"loss": 2.779,
"step": 153600
},
{
"epoch": 0.51,
"learning_rate": 2.456039299806679e-05,
"loss": 2.7755,
"step": 153700
},
{
"epoch": 0.51,
"learning_rate": 2.454384152962051e-05,
"loss": 2.7755,
"step": 153800
},
{
"epoch": 0.51,
"learning_rate": 2.4527290061174228e-05,
"loss": 2.7758,
"step": 153900
},
{
"epoch": 0.51,
"learning_rate": 2.4510738592727947e-05,
"loss": 2.7733,
"step": 154000
},
{
"epoch": 0.51,
"learning_rate": 2.4494187124281666e-05,
"loss": 2.7738,
"step": 154100
},
{
"epoch": 0.51,
"learning_rate": 2.4477635655835388e-05,
"loss": 2.7675,
"step": 154200
},
{
"epoch": 0.51,
"learning_rate": 2.4461084187389104e-05,
"loss": 2.7745,
"step": 154300
},
{
"epoch": 0.51,
"learning_rate": 2.4444532718942826e-05,
"loss": 2.7798,
"step": 154400
},
{
"epoch": 0.51,
"learning_rate": 2.4427981250496545e-05,
"loss": 2.7743,
"step": 154500
},
{
"epoch": 0.51,
"learning_rate": 2.4411429782050264e-05,
"loss": 2.7694,
"step": 154600
},
{
"epoch": 0.51,
"learning_rate": 2.4394878313603983e-05,
"loss": 2.7859,
"step": 154700
},
{
"epoch": 0.51,
"learning_rate": 2.4378326845157702e-05,
"loss": 2.7566,
"step": 154800
},
{
"epoch": 0.51,
"learning_rate": 2.4361775376711425e-05,
"loss": 2.7756,
"step": 154900
},
{
"epoch": 0.51,
"learning_rate": 2.434522390826514e-05,
"loss": 2.7873,
"step": 155000
},
{
"epoch": 0.51,
"learning_rate": 2.4328672439818863e-05,
"loss": 2.7751,
"step": 155100
},
{
"epoch": 0.51,
"learning_rate": 2.431212097137258e-05,
"loss": 2.7782,
"step": 155200
},
{
"epoch": 0.51,
"learning_rate": 2.42955695029263e-05,
"loss": 2.7759,
"step": 155300
},
{
"epoch": 0.51,
"learning_rate": 2.427901803448002e-05,
"loss": 2.7655,
"step": 155400
},
{
"epoch": 0.51,
"learning_rate": 2.426246656603374e-05,
"loss": 2.7713,
"step": 155500
},
{
"epoch": 0.52,
"learning_rate": 2.424591509758746e-05,
"loss": 2.7826,
"step": 155600
},
{
"epoch": 0.52,
"learning_rate": 2.4229363629141176e-05,
"loss": 2.7691,
"step": 155700
},
{
"epoch": 0.52,
"learning_rate": 2.42128121606949e-05,
"loss": 2.7695,
"step": 155800
},
{
"epoch": 0.52,
"learning_rate": 2.4196260692248618e-05,
"loss": 2.7703,
"step": 155900
},
{
"epoch": 0.52,
"learning_rate": 2.4179709223802337e-05,
"loss": 2.7866,
"step": 156000
},
{
"epoch": 0.52,
"learning_rate": 2.4163157755356056e-05,
"loss": 2.7742,
"step": 156100
},
{
"epoch": 0.52,
"learning_rate": 2.4146606286909775e-05,
"loss": 2.7717,
"step": 156200
},
{
"epoch": 0.52,
"learning_rate": 2.4130054818463497e-05,
"loss": 2.7704,
"step": 156300
},
{
"epoch": 0.52,
"learning_rate": 2.4113503350017213e-05,
"loss": 2.7683,
"step": 156400
},
{
"epoch": 0.52,
"learning_rate": 2.4096951881570935e-05,
"loss": 2.7762,
"step": 156500
},
{
"epoch": 0.52,
"learning_rate": 2.4080400413124654e-05,
"loss": 2.7689,
"step": 156600
},
{
"epoch": 0.52,
"learning_rate": 2.4063848944678373e-05,
"loss": 2.7627,
"step": 156700
},
{
"epoch": 0.52,
"learning_rate": 2.4047297476232092e-05,
"loss": 2.765,
"step": 156800
},
{
"epoch": 0.52,
"learning_rate": 2.403074600778581e-05,
"loss": 2.7791,
"step": 156900
},
{
"epoch": 0.52,
"learning_rate": 2.4014194539339533e-05,
"loss": 2.7762,
"step": 157000
},
{
"epoch": 0.52,
"learning_rate": 2.399764307089325e-05,
"loss": 2.768,
"step": 157100
},
{
"epoch": 0.52,
"learning_rate": 2.398109160244697e-05,
"loss": 2.7787,
"step": 157200
},
{
"epoch": 0.52,
"learning_rate": 2.396454013400069e-05,
"loss": 2.7708,
"step": 157300
},
{
"epoch": 0.52,
"learning_rate": 2.394798866555441e-05,
"loss": 2.7643,
"step": 157400
},
{
"epoch": 0.52,
"learning_rate": 2.3931437197108128e-05,
"loss": 2.7718,
"step": 157500
},
{
"epoch": 0.52,
"learning_rate": 2.3914885728661847e-05,
"loss": 2.7743,
"step": 157600
},
{
"epoch": 0.52,
"learning_rate": 2.389833426021557e-05,
"loss": 2.7786,
"step": 157700
},
{
"epoch": 0.52,
"learning_rate": 2.3881782791769285e-05,
"loss": 2.7858,
"step": 157800
},
{
"epoch": 0.52,
"learning_rate": 2.3865231323323008e-05,
"loss": 2.7724,
"step": 157900
},
{
"epoch": 0.52,
"learning_rate": 2.3848679854876727e-05,
"loss": 2.7746,
"step": 158000
},
{
"epoch": 0.52,
"learning_rate": 2.3832128386430446e-05,
"loss": 2.7739,
"step": 158100
},
{
"epoch": 0.52,
"learning_rate": 2.3815576917984165e-05,
"loss": 2.7851,
"step": 158200
},
{
"epoch": 0.52,
"learning_rate": 2.3799025449537883e-05,
"loss": 2.7693,
"step": 158300
},
{
"epoch": 0.52,
"learning_rate": 2.3782473981091606e-05,
"loss": 2.7834,
"step": 158400
},
{
"epoch": 0.52,
"learning_rate": 2.376592251264532e-05,
"loss": 2.7767,
"step": 158500
},
{
"epoch": 0.53,
"learning_rate": 2.3749371044199044e-05,
"loss": 2.7717,
"step": 158600
},
{
"epoch": 0.53,
"learning_rate": 2.3732819575752763e-05,
"loss": 2.7751,
"step": 158700
},
{
"epoch": 0.53,
"learning_rate": 2.3716268107306482e-05,
"loss": 2.7723,
"step": 158800
},
{
"epoch": 0.53,
"learning_rate": 2.36997166388602e-05,
"loss": 2.7702,
"step": 158900
},
{
"epoch": 0.53,
"learning_rate": 2.368316517041392e-05,
"loss": 2.7705,
"step": 159000
},
{
"epoch": 0.53,
"learning_rate": 2.3666613701967642e-05,
"loss": 2.7683,
"step": 159100
},
{
"epoch": 0.53,
"learning_rate": 2.3650062233521358e-05,
"loss": 2.785,
"step": 159200
},
{
"epoch": 0.53,
"learning_rate": 2.363351076507508e-05,
"loss": 2.7568,
"step": 159300
},
{
"epoch": 0.53,
"learning_rate": 2.36169592966288e-05,
"loss": 2.7794,
"step": 159400
},
{
"epoch": 0.53,
"learning_rate": 2.3600407828182518e-05,
"loss": 2.7751,
"step": 159500
},
{
"epoch": 0.53,
"learning_rate": 2.3583856359736237e-05,
"loss": 2.7912,
"step": 159600
},
{
"epoch": 0.53,
"learning_rate": 2.3567304891289956e-05,
"loss": 2.7772,
"step": 159700
},
{
"epoch": 0.53,
"learning_rate": 2.355075342284368e-05,
"loss": 2.7737,
"step": 159800
},
{
"epoch": 0.53,
"learning_rate": 2.3534201954397394e-05,
"loss": 2.7799,
"step": 159900
},
{
"epoch": 0.53,
"learning_rate": 2.3517650485951116e-05,
"loss": 2.7545,
"step": 160000
},
{
"epoch": 0.53,
"learning_rate": 2.3501099017504835e-05,
"loss": 2.7612,
"step": 160100
},
{
"epoch": 0.53,
"learning_rate": 2.348454754905855e-05,
"loss": 2.7749,
"step": 160200
},
{
"epoch": 0.53,
"learning_rate": 2.3467996080612273e-05,
"loss": 2.7754,
"step": 160300
},
{
"epoch": 0.53,
"learning_rate": 2.3451444612165992e-05,
"loss": 2.789,
"step": 160400
},
{
"epoch": 0.53,
"learning_rate": 2.343489314371971e-05,
"loss": 2.7792,
"step": 160500
},
{
"epoch": 0.53,
"learning_rate": 2.341834167527343e-05,
"loss": 2.7658,
"step": 160600
},
{
"epoch": 0.53,
"learning_rate": 2.3401790206827153e-05,
"loss": 2.799,
"step": 160700
},
{
"epoch": 0.53,
"learning_rate": 2.338523873838087e-05,
"loss": 2.7865,
"step": 160800
},
{
"epoch": 0.53,
"learning_rate": 2.3368687269934587e-05,
"loss": 2.7736,
"step": 160900
},
{
"epoch": 0.53,
"learning_rate": 2.335213580148831e-05,
"loss": 2.7832,
"step": 161000
},
{
"epoch": 0.53,
"learning_rate": 2.333558433304203e-05,
"loss": 2.7767,
"step": 161100
},
{
"epoch": 0.53,
"learning_rate": 2.3319032864595748e-05,
"loss": 2.7661,
"step": 161200
},
{
"epoch": 0.53,
"learning_rate": 2.3302481396149467e-05,
"loss": 2.7658,
"step": 161300
},
{
"epoch": 0.53,
"learning_rate": 2.328592992770319e-05,
"loss": 2.7777,
"step": 161400
},
{
"epoch": 0.53,
"learning_rate": 2.3269378459256908e-05,
"loss": 2.7651,
"step": 161500
},
{
"epoch": 0.53,
"learning_rate": 2.3252826990810623e-05,
"loss": 2.768,
"step": 161600
},
{
"epoch": 0.54,
"learning_rate": 2.3236275522364346e-05,
"loss": 2.7773,
"step": 161700
},
{
"epoch": 0.54,
"learning_rate": 2.3219724053918065e-05,
"loss": 2.7782,
"step": 161800
},
{
"epoch": 0.54,
"learning_rate": 2.3203172585471784e-05,
"loss": 2.7709,
"step": 161900
},
{
"epoch": 0.54,
"learning_rate": 2.3186621117025503e-05,
"loss": 2.7593,
"step": 162000
},
{
"epoch": 0.54,
"learning_rate": 2.3170069648579222e-05,
"loss": 2.7735,
"step": 162100
},
{
"epoch": 0.54,
"learning_rate": 2.3153518180132944e-05,
"loss": 2.7657,
"step": 162200
},
{
"epoch": 0.54,
"learning_rate": 2.313696671168666e-05,
"loss": 2.7746,
"step": 162300
},
{
"epoch": 0.54,
"learning_rate": 2.3120415243240382e-05,
"loss": 2.7657,
"step": 162400
},
{
"epoch": 0.54,
"learning_rate": 2.31038637747941e-05,
"loss": 2.7741,
"step": 162500
},
{
"epoch": 0.54,
"learning_rate": 2.308731230634782e-05,
"loss": 2.7657,
"step": 162600
},
{
"epoch": 0.54,
"learning_rate": 2.307076083790154e-05,
"loss": 2.7794,
"step": 162700
},
{
"epoch": 0.54,
"learning_rate": 2.3054209369455258e-05,
"loss": 2.7656,
"step": 162800
},
{
"epoch": 0.54,
"learning_rate": 2.303765790100898e-05,
"loss": 2.7864,
"step": 162900
},
{
"epoch": 0.54,
"learning_rate": 2.3021106432562696e-05,
"loss": 2.7802,
"step": 163000
},
{
"epoch": 0.54,
"learning_rate": 2.300455496411642e-05,
"loss": 2.771,
"step": 163100
},
{
"epoch": 0.54,
"learning_rate": 2.2988003495670137e-05,
"loss": 2.7843,
"step": 163200
},
{
"epoch": 0.54,
"learning_rate": 2.2971452027223856e-05,
"loss": 2.7701,
"step": 163300
},
{
"epoch": 0.54,
"learning_rate": 2.2954900558777575e-05,
"loss": 2.7797,
"step": 163400
},
{
"epoch": 0.54,
"learning_rate": 2.2938349090331294e-05,
"loss": 2.7632,
"step": 163500
},
{
"epoch": 0.54,
"learning_rate": 2.2921797621885017e-05,
"loss": 2.7667,
"step": 163600
},
{
"epoch": 0.54,
"learning_rate": 2.2905246153438732e-05,
"loss": 2.7814,
"step": 163700
},
{
"epoch": 0.54,
"learning_rate": 2.2888694684992455e-05,
"loss": 2.7647,
"step": 163800
},
{
"epoch": 0.54,
"learning_rate": 2.2872143216546174e-05,
"loss": 2.777,
"step": 163900
},
{
"epoch": 0.54,
"learning_rate": 2.2855591748099893e-05,
"loss": 2.7843,
"step": 164000
},
{
"epoch": 0.54,
"learning_rate": 2.283904027965361e-05,
"loss": 2.7703,
"step": 164100
},
{
"epoch": 0.54,
"learning_rate": 2.282248881120733e-05,
"loss": 2.7741,
"step": 164200
},
{
"epoch": 0.54,
"learning_rate": 2.280593734276105e-05,
"loss": 2.7625,
"step": 164300
},
{
"epoch": 0.54,
"learning_rate": 2.278938587431477e-05,
"loss": 2.7684,
"step": 164400
},
{
"epoch": 0.54,
"learning_rate": 2.277283440586849e-05,
"loss": 2.7745,
"step": 164500
},
{
"epoch": 0.54,
"learning_rate": 2.275628293742221e-05,
"loss": 2.7705,
"step": 164600
},
{
"epoch": 0.55,
"learning_rate": 2.273973146897593e-05,
"loss": 2.7706,
"step": 164700
},
{
"epoch": 0.55,
"learning_rate": 2.2723180000529648e-05,
"loss": 2.7703,
"step": 164800
},
{
"epoch": 0.55,
"learning_rate": 2.2706628532083367e-05,
"loss": 2.7832,
"step": 164900
},
{
"epoch": 0.55,
"learning_rate": 2.2690077063637086e-05,
"loss": 2.77,
"step": 165000
},
{
"epoch": 0.55,
"learning_rate": 2.2673525595190805e-05,
"loss": 2.7693,
"step": 165100
},
{
"epoch": 0.55,
"learning_rate": 2.2656974126744527e-05,
"loss": 2.7677,
"step": 165200
},
{
"epoch": 0.55,
"learning_rate": 2.2640422658298246e-05,
"loss": 2.7743,
"step": 165300
},
{
"epoch": 0.55,
"learning_rate": 2.2623871189851965e-05,
"loss": 2.7795,
"step": 165400
},
{
"epoch": 0.55,
"learning_rate": 2.2607319721405684e-05,
"loss": 2.7723,
"step": 165500
},
{
"epoch": 0.55,
"learning_rate": 2.2590768252959403e-05,
"loss": 2.7777,
"step": 165600
},
{
"epoch": 0.55,
"learning_rate": 2.2574216784513122e-05,
"loss": 2.7832,
"step": 165700
},
{
"epoch": 0.55,
"learning_rate": 2.255766531606684e-05,
"loss": 2.776,
"step": 165800
},
{
"epoch": 0.55,
"learning_rate": 2.2541113847620563e-05,
"loss": 2.7851,
"step": 165900
},
{
"epoch": 0.55,
"learning_rate": 2.2524562379174282e-05,
"loss": 2.7789,
"step": 166000
},
{
"epoch": 0.55,
"learning_rate": 2.2508010910728e-05,
"loss": 2.7758,
"step": 166100
},
{
"epoch": 0.55,
"learning_rate": 2.249145944228172e-05,
"loss": 2.7738,
"step": 166200
},
{
"epoch": 0.55,
"learning_rate": 2.247490797383544e-05,
"loss": 2.7708,
"step": 166300
},
{
"epoch": 0.55,
"learning_rate": 2.245835650538916e-05,
"loss": 2.7732,
"step": 166400
},
{
"epoch": 0.55,
"learning_rate": 2.2441805036942877e-05,
"loss": 2.7713,
"step": 166500
},
{
"epoch": 0.55,
"learning_rate": 2.24252535684966e-05,
"loss": 2.7775,
"step": 166600
},
{
"epoch": 0.55,
"learning_rate": 2.240870210005032e-05,
"loss": 2.7756,
"step": 166700
},
{
"epoch": 0.55,
"learning_rate": 2.2392150631604038e-05,
"loss": 2.79,
"step": 166800
},
{
"epoch": 0.55,
"learning_rate": 2.2375599163157757e-05,
"loss": 2.7529,
"step": 166900
},
{
"epoch": 0.55,
"learning_rate": 2.2359047694711476e-05,
"loss": 2.7694,
"step": 167000
},
{
"epoch": 0.55,
"learning_rate": 2.2342496226265195e-05,
"loss": 2.7619,
"step": 167100
},
{
"epoch": 0.55,
"learning_rate": 2.2325944757818914e-05,
"loss": 2.78,
"step": 167200
},
{
"epoch": 0.55,
"learning_rate": 2.2309393289372636e-05,
"loss": 2.7749,
"step": 167300
},
{
"epoch": 0.55,
"learning_rate": 2.229284182092635e-05,
"loss": 2.7647,
"step": 167400
},
{
"epoch": 0.55,
"learning_rate": 2.2276290352480074e-05,
"loss": 2.7698,
"step": 167500
},
{
"epoch": 0.55,
"learning_rate": 2.2259738884033793e-05,
"loss": 2.7779,
"step": 167600
},
{
"epoch": 0.56,
"learning_rate": 2.2243187415587512e-05,
"loss": 2.7892,
"step": 167700
},
{
"epoch": 0.56,
"learning_rate": 2.222663594714123e-05,
"loss": 2.7611,
"step": 167800
},
{
"epoch": 0.56,
"learning_rate": 2.221008447869495e-05,
"loss": 2.7672,
"step": 167900
},
{
"epoch": 0.56,
"learning_rate": 2.2193533010248672e-05,
"loss": 2.7739,
"step": 168000
},
{
"epoch": 0.56,
"learning_rate": 2.2176981541802388e-05,
"loss": 2.7772,
"step": 168100
},
{
"epoch": 0.56,
"learning_rate": 2.216043007335611e-05,
"loss": 2.779,
"step": 168200
},
{
"epoch": 0.56,
"learning_rate": 2.214387860490983e-05,
"loss": 2.7805,
"step": 168300
},
{
"epoch": 0.56,
"learning_rate": 2.2127327136463548e-05,
"loss": 2.7793,
"step": 168400
},
{
"epoch": 0.56,
"learning_rate": 2.2110775668017267e-05,
"loss": 2.7771,
"step": 168500
},
{
"epoch": 0.56,
"learning_rate": 2.2094224199570986e-05,
"loss": 2.7875,
"step": 168600
},
{
"epoch": 0.56,
"learning_rate": 2.207767273112471e-05,
"loss": 2.7675,
"step": 168700
},
{
"epoch": 0.56,
"learning_rate": 2.2061121262678424e-05,
"loss": 2.7653,
"step": 168800
},
{
"epoch": 0.56,
"learning_rate": 2.2044569794232146e-05,
"loss": 2.7586,
"step": 168900
},
{
"epoch": 0.56,
"learning_rate": 2.2028018325785865e-05,
"loss": 2.7756,
"step": 169000
},
{
"epoch": 0.56,
"learning_rate": 2.2011466857339584e-05,
"loss": 2.7629,
"step": 169100
},
{
"epoch": 0.56,
"learning_rate": 2.1994915388893303e-05,
"loss": 2.7659,
"step": 169200
},
{
"epoch": 0.56,
"learning_rate": 2.1978363920447022e-05,
"loss": 2.7691,
"step": 169300
},
{
"epoch": 0.56,
"learning_rate": 2.1961812452000745e-05,
"loss": 2.7651,
"step": 169400
},
{
"epoch": 0.56,
"learning_rate": 2.194526098355446e-05,
"loss": 2.7774,
"step": 169500
},
{
"epoch": 0.56,
"learning_rate": 2.1928709515108183e-05,
"loss": 2.7766,
"step": 169600
},
{
"epoch": 0.56,
"learning_rate": 2.1912158046661902e-05,
"loss": 2.7809,
"step": 169700
},
{
"epoch": 0.56,
"learning_rate": 2.189560657821562e-05,
"loss": 2.7745,
"step": 169800
},
{
"epoch": 0.56,
"learning_rate": 2.187905510976934e-05,
"loss": 2.7677,
"step": 169900
},
{
"epoch": 0.56,
"learning_rate": 2.186250364132306e-05,
"loss": 2.7652,
"step": 170000
},
{
"epoch": 0.56,
"learning_rate": 2.184595217287678e-05,
"loss": 2.762,
"step": 170100
},
{
"epoch": 0.56,
"learning_rate": 2.1829400704430497e-05,
"loss": 2.7749,
"step": 170200
},
{
"epoch": 0.56,
"learning_rate": 2.181284923598422e-05,
"loss": 2.7707,
"step": 170300
},
{
"epoch": 0.56,
"learning_rate": 2.1796297767537938e-05,
"loss": 2.7644,
"step": 170400
},
{
"epoch": 0.56,
"learning_rate": 2.1779746299091654e-05,
"loss": 2.7718,
"step": 170500
},
{
"epoch": 0.56,
"learning_rate": 2.1763194830645376e-05,
"loss": 2.77,
"step": 170600
},
{
"epoch": 0.57,
"learning_rate": 2.1746643362199095e-05,
"loss": 2.7747,
"step": 170700
},
{
"epoch": 0.57,
"learning_rate": 2.1730091893752817e-05,
"loss": 2.7642,
"step": 170800
},
{
"epoch": 0.57,
"learning_rate": 2.1713540425306533e-05,
"loss": 2.7832,
"step": 170900
},
{
"epoch": 0.57,
"learning_rate": 2.1696988956860255e-05,
"loss": 2.78,
"step": 171000
},
{
"epoch": 0.57,
"learning_rate": 2.1680437488413974e-05,
"loss": 2.7665,
"step": 171100
},
{
"epoch": 0.57,
"learning_rate": 2.166388601996769e-05,
"loss": 2.761,
"step": 171200
},
{
"epoch": 0.57,
"learning_rate": 2.1647334551521412e-05,
"loss": 2.7768,
"step": 171300
},
{
"epoch": 0.57,
"learning_rate": 2.163078308307513e-05,
"loss": 2.7644,
"step": 171400
},
{
"epoch": 0.57,
"learning_rate": 2.1614231614628854e-05,
"loss": 2.7685,
"step": 171500
},
{
"epoch": 0.57,
"learning_rate": 2.159768014618257e-05,
"loss": 2.7757,
"step": 171600
},
{
"epoch": 0.57,
"learning_rate": 2.1581128677736288e-05,
"loss": 2.7757,
"step": 171700
},
{
"epoch": 0.57,
"learning_rate": 2.156457720929001e-05,
"loss": 2.7659,
"step": 171800
},
{
"epoch": 0.57,
"learning_rate": 2.1548025740843726e-05,
"loss": 2.7876,
"step": 171900
},
{
"epoch": 0.57,
"learning_rate": 2.153147427239745e-05,
"loss": 2.7802,
"step": 172000
},
{
"epoch": 0.57,
"learning_rate": 2.1514922803951167e-05,
"loss": 2.7587,
"step": 172100
},
{
"epoch": 0.57,
"learning_rate": 2.149837133550489e-05,
"loss": 2.7759,
"step": 172200
},
{
"epoch": 0.57,
"learning_rate": 2.1481819867058605e-05,
"loss": 2.7621,
"step": 172300
},
{
"epoch": 0.57,
"learning_rate": 2.1465268398612324e-05,
"loss": 2.7692,
"step": 172400
},
{
"epoch": 0.57,
"learning_rate": 2.1448716930166047e-05,
"loss": 2.7707,
"step": 172500
},
{
"epoch": 0.57,
"learning_rate": 2.1432165461719762e-05,
"loss": 2.7617,
"step": 172600
},
{
"epoch": 0.57,
"learning_rate": 2.1415613993273485e-05,
"loss": 2.7636,
"step": 172700
},
{
"epoch": 0.57,
"learning_rate": 2.1399062524827204e-05,
"loss": 2.7678,
"step": 172800
},
{
"epoch": 0.57,
"learning_rate": 2.1382511056380926e-05,
"loss": 2.7697,
"step": 172900
},
{
"epoch": 0.57,
"learning_rate": 2.136595958793464e-05,
"loss": 2.746,
"step": 173000
},
{
"epoch": 0.57,
"learning_rate": 2.134940811948836e-05,
"loss": 2.7609,
"step": 173100
},
{
"epoch": 0.57,
"learning_rate": 2.1332856651042083e-05,
"loss": 2.7696,
"step": 173200
},
{
"epoch": 0.57,
"learning_rate": 2.13163051825958e-05,
"loss": 2.7779,
"step": 173300
},
{
"epoch": 0.57,
"learning_rate": 2.129975371414952e-05,
"loss": 2.7735,
"step": 173400
},
{
"epoch": 0.57,
"learning_rate": 2.128320224570324e-05,
"loss": 2.7653,
"step": 173500
},
{
"epoch": 0.57,
"learning_rate": 2.126665077725696e-05,
"loss": 2.7689,
"step": 173600
},
{
"epoch": 0.57,
"learning_rate": 2.1250099308810678e-05,
"loss": 2.7639,
"step": 173700
},
{
"epoch": 0.58,
"learning_rate": 2.1233547840364397e-05,
"loss": 2.7797,
"step": 173800
},
{
"epoch": 0.58,
"learning_rate": 2.121699637191812e-05,
"loss": 2.7747,
"step": 173900
},
{
"epoch": 0.58,
"learning_rate": 2.1200444903471835e-05,
"loss": 2.7779,
"step": 174000
},
{
"epoch": 0.58,
"learning_rate": 2.1183893435025557e-05,
"loss": 2.7676,
"step": 174100
},
{
"epoch": 0.58,
"learning_rate": 2.1167341966579276e-05,
"loss": 2.7788,
"step": 174200
},
{
"epoch": 0.58,
"learning_rate": 2.1150790498132995e-05,
"loss": 2.7644,
"step": 174300
},
{
"epoch": 0.58,
"learning_rate": 2.1134239029686714e-05,
"loss": 2.7671,
"step": 174400
},
{
"epoch": 0.58,
"learning_rate": 2.1117687561240433e-05,
"loss": 2.7686,
"step": 174500
},
{
"epoch": 0.58,
"learning_rate": 2.1101136092794156e-05,
"loss": 2.7638,
"step": 174600
},
{
"epoch": 0.58,
"learning_rate": 2.108458462434787e-05,
"loss": 2.774,
"step": 174700
},
{
"epoch": 0.58,
"learning_rate": 2.1068033155901594e-05,
"loss": 2.7704,
"step": 174800
},
{
"epoch": 0.58,
"learning_rate": 2.1051481687455313e-05,
"loss": 2.7678,
"step": 174900
},
{
"epoch": 0.58,
"learning_rate": 2.103493021900903e-05,
"loss": 2.765,
"step": 175000
},
{
"epoch": 0.58,
"learning_rate": 2.101837875056275e-05,
"loss": 2.7626,
"step": 175100
},
{
"epoch": 0.58,
"learning_rate": 2.100182728211647e-05,
"loss": 2.7687,
"step": 175200
},
{
"epoch": 0.58,
"learning_rate": 2.0985275813670192e-05,
"loss": 2.7849,
"step": 175300
},
{
"epoch": 0.58,
"learning_rate": 2.0968724345223907e-05,
"loss": 2.7829,
"step": 175400
},
{
"epoch": 0.58,
"learning_rate": 2.095217287677763e-05,
"loss": 2.7749,
"step": 175500
},
{
"epoch": 0.58,
"learning_rate": 2.093562140833135e-05,
"loss": 2.7706,
"step": 175600
},
{
"epoch": 0.58,
"learning_rate": 2.0919069939885068e-05,
"loss": 2.7783,
"step": 175700
},
{
"epoch": 0.58,
"learning_rate": 2.0902518471438787e-05,
"loss": 2.7617,
"step": 175800
},
{
"epoch": 0.58,
"learning_rate": 2.0885967002992506e-05,
"loss": 2.7624,
"step": 175900
},
{
"epoch": 0.58,
"learning_rate": 2.0869415534546228e-05,
"loss": 2.7728,
"step": 176000
},
{
"epoch": 0.58,
"learning_rate": 2.0852864066099944e-05,
"loss": 2.7636,
"step": 176100
},
{
"epoch": 0.58,
"learning_rate": 2.0836312597653666e-05,
"loss": 2.7811,
"step": 176200
},
{
"epoch": 0.58,
"learning_rate": 2.0819761129207385e-05,
"loss": 2.7646,
"step": 176300
},
{
"epoch": 0.58,
"learning_rate": 2.0803209660761104e-05,
"loss": 2.7692,
"step": 176400
},
{
"epoch": 0.58,
"learning_rate": 2.0786658192314823e-05,
"loss": 2.7841,
"step": 176500
},
{
"epoch": 0.58,
"learning_rate": 2.0770106723868542e-05,
"loss": 2.7692,
"step": 176600
},
{
"epoch": 0.58,
"learning_rate": 2.0753555255422264e-05,
"loss": 2.7731,
"step": 176700
},
{
"epoch": 0.59,
"learning_rate": 2.073700378697598e-05,
"loss": 2.7535,
"step": 176800
},
{
"epoch": 0.59,
"learning_rate": 2.0720452318529702e-05,
"loss": 2.7508,
"step": 176900
},
{
"epoch": 0.59,
"learning_rate": 2.070390085008342e-05,
"loss": 2.7703,
"step": 177000
},
{
"epoch": 0.59,
"learning_rate": 2.068734938163714e-05,
"loss": 2.7787,
"step": 177100
},
{
"epoch": 0.59,
"learning_rate": 2.067079791319086e-05,
"loss": 2.7648,
"step": 177200
},
{
"epoch": 0.59,
"learning_rate": 2.0654246444744578e-05,
"loss": 2.7818,
"step": 177300
},
{
"epoch": 0.59,
"learning_rate": 2.0637694976298297e-05,
"loss": 2.761,
"step": 177400
},
{
"epoch": 0.59,
"learning_rate": 2.0621143507852016e-05,
"loss": 2.7701,
"step": 177500
},
{
"epoch": 0.59,
"learning_rate": 2.060459203940574e-05,
"loss": 2.7804,
"step": 177600
},
{
"epoch": 0.59,
"learning_rate": 2.0588040570959458e-05,
"loss": 2.7745,
"step": 177700
},
{
"epoch": 0.59,
"learning_rate": 2.0571489102513177e-05,
"loss": 2.7703,
"step": 177800
},
{
"epoch": 0.59,
"learning_rate": 2.0554937634066896e-05,
"loss": 2.7754,
"step": 177900
},
{
"epoch": 0.59,
"learning_rate": 2.0538386165620615e-05,
"loss": 2.779,
"step": 178000
},
{
"epoch": 0.59,
"learning_rate": 2.0521834697174334e-05,
"loss": 2.7749,
"step": 178100
},
{
"epoch": 0.59,
"learning_rate": 2.0505283228728052e-05,
"loss": 2.7779,
"step": 178200
},
{
"epoch": 0.59,
"learning_rate": 2.0488731760281775e-05,
"loss": 2.7831,
"step": 178300
},
{
"epoch": 0.59,
"learning_rate": 2.0472180291835494e-05,
"loss": 2.7718,
"step": 178400
},
{
"epoch": 0.59,
"learning_rate": 2.0455628823389213e-05,
"loss": 2.7558,
"step": 178500
},
{
"epoch": 0.59,
"learning_rate": 2.0439077354942932e-05,
"loss": 2.7524,
"step": 178600
},
{
"epoch": 0.59,
"learning_rate": 2.042252588649665e-05,
"loss": 2.7689,
"step": 178700
},
{
"epoch": 0.59,
"learning_rate": 2.040597441805037e-05,
"loss": 2.7726,
"step": 178800
},
{
"epoch": 0.59,
"learning_rate": 2.038942294960409e-05,
"loss": 2.7735,
"step": 178900
},
{
"epoch": 0.59,
"learning_rate": 2.037287148115781e-05,
"loss": 2.7695,
"step": 179000
},
{
"epoch": 0.59,
"learning_rate": 2.035632001271153e-05,
"loss": 2.7745,
"step": 179100
},
{
"epoch": 0.59,
"learning_rate": 2.033976854426525e-05,
"loss": 2.7693,
"step": 179200
},
{
"epoch": 0.59,
"learning_rate": 2.0323217075818968e-05,
"loss": 2.7759,
"step": 179300
},
{
"epoch": 0.59,
"learning_rate": 2.0306665607372687e-05,
"loss": 2.7561,
"step": 179400
},
{
"epoch": 0.59,
"learning_rate": 2.0290114138926406e-05,
"loss": 2.7643,
"step": 179500
},
{
"epoch": 0.59,
"learning_rate": 2.0273562670480125e-05,
"loss": 2.7722,
"step": 179600
},
{
"epoch": 0.59,
"learning_rate": 2.0257011202033847e-05,
"loss": 2.7734,
"step": 179700
},
{
"epoch": 0.6,
"learning_rate": 2.0240459733587566e-05,
"loss": 2.764,
"step": 179800
},
{
"epoch": 0.6,
"learning_rate": 2.0223908265141285e-05,
"loss": 2.7772,
"step": 179900
},
{
"epoch": 0.6,
"learning_rate": 2.0207356796695004e-05,
"loss": 2.7599,
"step": 180000
},
{
"epoch": 0.6,
"learning_rate": 2.0190805328248723e-05,
"loss": 2.7759,
"step": 180100
},
{
"epoch": 0.6,
"learning_rate": 2.0174253859802442e-05,
"loss": 2.7576,
"step": 180200
},
{
"epoch": 0.6,
"learning_rate": 2.015770239135616e-05,
"loss": 2.7691,
"step": 180300
},
{
"epoch": 0.6,
"learning_rate": 2.0141150922909884e-05,
"loss": 2.7625,
"step": 180400
},
{
"epoch": 0.6,
"learning_rate": 2.01245994544636e-05,
"loss": 2.7595,
"step": 180500
},
{
"epoch": 0.6,
"learning_rate": 2.0108047986017318e-05,
"loss": 2.7678,
"step": 180600
},
{
"epoch": 0.6,
"learning_rate": 2.009149651757104e-05,
"loss": 2.7664,
"step": 180700
},
{
"epoch": 0.6,
"learning_rate": 2.007494504912476e-05,
"loss": 2.7717,
"step": 180800
},
{
"epoch": 0.6,
"learning_rate": 2.005839358067848e-05,
"loss": 2.7672,
"step": 180900
},
{
"epoch": 0.6,
"learning_rate": 2.0041842112232198e-05,
"loss": 2.7558,
"step": 181000
},
{
"epoch": 0.6,
"learning_rate": 2.002529064378592e-05,
"loss": 2.764,
"step": 181100
},
{
"epoch": 0.6,
"learning_rate": 2.0008739175339636e-05,
"loss": 2.787,
"step": 181200
},
{
"epoch": 0.6,
"learning_rate": 1.9992187706893355e-05,
"loss": 2.7576,
"step": 181300
},
{
"epoch": 0.6,
"learning_rate": 1.9975636238447077e-05,
"loss": 2.7626,
"step": 181400
},
{
"epoch": 0.6,
"learning_rate": 1.9959084770000796e-05,
"loss": 2.7736,
"step": 181500
},
{
"epoch": 0.6,
"learning_rate": 1.9942533301554515e-05,
"loss": 2.779,
"step": 181600
},
{
"epoch": 0.6,
"learning_rate": 1.9925981833108234e-05,
"loss": 2.7547,
"step": 181700
},
{
"epoch": 0.6,
"learning_rate": 1.9909430364661956e-05,
"loss": 2.7641,
"step": 181800
},
{
"epoch": 0.6,
"learning_rate": 1.9892878896215672e-05,
"loss": 2.7756,
"step": 181900
},
{
"epoch": 0.6,
"learning_rate": 1.987632742776939e-05,
"loss": 2.77,
"step": 182000
},
{
"epoch": 0.6,
"learning_rate": 1.9859775959323113e-05,
"loss": 2.7693,
"step": 182100
},
{
"epoch": 0.6,
"learning_rate": 1.9843224490876832e-05,
"loss": 2.7573,
"step": 182200
},
{
"epoch": 0.6,
"learning_rate": 1.982667302243055e-05,
"loss": 2.7708,
"step": 182300
},
{
"epoch": 0.6,
"learning_rate": 1.981012155398427e-05,
"loss": 2.7711,
"step": 182400
},
{
"epoch": 0.6,
"learning_rate": 1.979357008553799e-05,
"loss": 2.7591,
"step": 182500
},
{
"epoch": 0.6,
"learning_rate": 1.9777018617091708e-05,
"loss": 2.7848,
"step": 182600
},
{
"epoch": 0.6,
"learning_rate": 1.9760467148645427e-05,
"loss": 2.7717,
"step": 182700
},
{
"epoch": 0.61,
"learning_rate": 1.974391568019915e-05,
"loss": 2.76,
"step": 182800
},
{
"epoch": 0.61,
"learning_rate": 1.972736421175287e-05,
"loss": 2.7876,
"step": 182900
},
{
"epoch": 0.61,
"learning_rate": 1.9710812743306587e-05,
"loss": 2.7658,
"step": 183000
},
{
"epoch": 0.61,
"learning_rate": 1.9694261274860306e-05,
"loss": 2.7631,
"step": 183100
},
{
"epoch": 0.61,
"learning_rate": 1.9677709806414025e-05,
"loss": 2.7673,
"step": 183200
},
{
"epoch": 0.61,
"learning_rate": 1.9661158337967744e-05,
"loss": 2.7736,
"step": 183300
},
{
"epoch": 0.61,
"learning_rate": 1.9644606869521463e-05,
"loss": 2.7583,
"step": 183400
},
{
"epoch": 0.61,
"learning_rate": 1.9628055401075186e-05,
"loss": 2.7608,
"step": 183500
},
{
"epoch": 0.61,
"learning_rate": 1.96115039326289e-05,
"loss": 2.7583,
"step": 183600
},
{
"epoch": 0.61,
"learning_rate": 1.9594952464182624e-05,
"loss": 2.7671,
"step": 183700
},
{
"epoch": 0.61,
"learning_rate": 1.9578400995736343e-05,
"loss": 2.7809,
"step": 183800
},
{
"epoch": 0.61,
"learning_rate": 1.956184952729006e-05,
"loss": 2.7612,
"step": 183900
},
{
"epoch": 0.61,
"learning_rate": 1.954529805884378e-05,
"loss": 2.7756,
"step": 184000
},
{
"epoch": 0.61,
"learning_rate": 1.95287465903975e-05,
"loss": 2.778,
"step": 184100
},
{
"epoch": 0.61,
"learning_rate": 1.9512195121951222e-05,
"loss": 2.7684,
"step": 184200
},
{
"epoch": 0.61,
"learning_rate": 1.9495643653504938e-05,
"loss": 2.7734,
"step": 184300
},
{
"epoch": 0.61,
"learning_rate": 1.947909218505866e-05,
"loss": 2.7585,
"step": 184400
},
{
"epoch": 0.61,
"learning_rate": 1.946254071661238e-05,
"loss": 2.7597,
"step": 184500
},
{
"epoch": 0.61,
"learning_rate": 1.9445989248166098e-05,
"loss": 2.7885,
"step": 184600
},
{
"epoch": 0.61,
"learning_rate": 1.9429437779719817e-05,
"loss": 2.7789,
"step": 184700
},
{
"epoch": 0.61,
"learning_rate": 1.9412886311273536e-05,
"loss": 2.7688,
"step": 184800
},
{
"epoch": 0.61,
"learning_rate": 1.9396334842827258e-05,
"loss": 2.7744,
"step": 184900
},
{
"epoch": 0.61,
"learning_rate": 1.9379783374380974e-05,
"loss": 2.7818,
"step": 185000
},
{
"epoch": 0.61,
"learning_rate": 1.9363231905934696e-05,
"loss": 2.7694,
"step": 185100
},
{
"epoch": 0.61,
"learning_rate": 1.9346680437488415e-05,
"loss": 2.7829,
"step": 185200
},
{
"epoch": 0.61,
"learning_rate": 1.9330128969042134e-05,
"loss": 2.763,
"step": 185300
},
{
"epoch": 0.61,
"learning_rate": 1.9313577500595853e-05,
"loss": 2.7785,
"step": 185400
},
{
"epoch": 0.61,
"learning_rate": 1.9297026032149572e-05,
"loss": 2.7672,
"step": 185500
},
{
"epoch": 0.61,
"learning_rate": 1.9280474563703294e-05,
"loss": 2.7602,
"step": 185600
},
{
"epoch": 0.61,
"learning_rate": 1.926392309525701e-05,
"loss": 2.7606,
"step": 185700
},
{
"epoch": 0.62,
"learning_rate": 1.9247371626810732e-05,
"loss": 2.7565,
"step": 185800
},
{
"epoch": 0.62,
"learning_rate": 1.923082015836445e-05,
"loss": 2.7636,
"step": 185900
},
{
"epoch": 0.62,
"learning_rate": 1.921426868991817e-05,
"loss": 2.7625,
"step": 186000
},
{
"epoch": 0.62,
"learning_rate": 1.919771722147189e-05,
"loss": 2.7672,
"step": 186100
},
{
"epoch": 0.62,
"learning_rate": 1.918116575302561e-05,
"loss": 2.7605,
"step": 186200
},
{
"epoch": 0.62,
"learning_rate": 1.916461428457933e-05,
"loss": 2.7693,
"step": 186300
},
{
"epoch": 0.62,
"learning_rate": 1.9148062816133046e-05,
"loss": 2.7771,
"step": 186400
},
{
"epoch": 0.62,
"learning_rate": 1.913151134768677e-05,
"loss": 2.752,
"step": 186500
},
{
"epoch": 0.62,
"learning_rate": 1.9114959879240488e-05,
"loss": 2.7688,
"step": 186600
},
{
"epoch": 0.62,
"learning_rate": 1.9098408410794207e-05,
"loss": 2.7591,
"step": 186700
},
{
"epoch": 0.62,
"learning_rate": 1.9081856942347926e-05,
"loss": 2.7863,
"step": 186800
},
{
"epoch": 0.62,
"learning_rate": 1.9065305473901645e-05,
"loss": 2.7794,
"step": 186900
},
{
"epoch": 0.62,
"learning_rate": 1.9048754005455367e-05,
"loss": 2.7691,
"step": 187000
},
{
"epoch": 0.62,
"learning_rate": 1.9032202537009083e-05,
"loss": 2.7752,
"step": 187100
},
{
"epoch": 0.62,
"learning_rate": 1.9015651068562805e-05,
"loss": 2.7635,
"step": 187200
},
{
"epoch": 0.62,
"learning_rate": 1.8999099600116524e-05,
"loss": 2.7686,
"step": 187300
},
{
"epoch": 0.62,
"learning_rate": 1.8982548131670243e-05,
"loss": 2.7562,
"step": 187400
},
{
"epoch": 0.62,
"learning_rate": 1.8965996663223962e-05,
"loss": 2.7718,
"step": 187500
},
{
"epoch": 0.62,
"learning_rate": 1.894944519477768e-05,
"loss": 2.7574,
"step": 187600
},
{
"epoch": 0.62,
"learning_rate": 1.8932893726331403e-05,
"loss": 2.775,
"step": 187700
},
{
"epoch": 0.62,
"learning_rate": 1.891634225788512e-05,
"loss": 2.7597,
"step": 187800
},
{
"epoch": 0.62,
"learning_rate": 1.889979078943884e-05,
"loss": 2.785,
"step": 187900
},
{
"epoch": 0.62,
"learning_rate": 1.888323932099256e-05,
"loss": 2.7544,
"step": 188000
},
{
"epoch": 0.62,
"learning_rate": 1.886668785254628e-05,
"loss": 2.7748,
"step": 188100
},
{
"epoch": 0.62,
"learning_rate": 1.8850136384099998e-05,
"loss": 2.7543,
"step": 188200
},
{
"epoch": 0.62,
"learning_rate": 1.8833584915653717e-05,
"loss": 2.769,
"step": 188300
},
{
"epoch": 0.62,
"learning_rate": 1.881703344720744e-05,
"loss": 2.7602,
"step": 188400
},
{
"epoch": 0.62,
"learning_rate": 1.8800481978761155e-05,
"loss": 2.7781,
"step": 188500
},
{
"epoch": 0.62,
"learning_rate": 1.8783930510314877e-05,
"loss": 2.7662,
"step": 188600
},
{
"epoch": 0.62,
"learning_rate": 1.8767379041868596e-05,
"loss": 2.7859,
"step": 188700
},
{
"epoch": 0.62,
"learning_rate": 1.8750827573422315e-05,
"loss": 2.7706,
"step": 188800
},
{
"epoch": 0.63,
"learning_rate": 1.8734276104976034e-05,
"loss": 2.7667,
"step": 188900
},
{
"epoch": 0.63,
"learning_rate": 1.8717724636529753e-05,
"loss": 2.7641,
"step": 189000
},
{
"epoch": 0.63,
"learning_rate": 1.8701173168083476e-05,
"loss": 2.7637,
"step": 189100
},
{
"epoch": 0.63,
"learning_rate": 1.868462169963719e-05,
"loss": 2.7765,
"step": 189200
},
{
"epoch": 0.63,
"learning_rate": 1.8668070231190914e-05,
"loss": 2.7639,
"step": 189300
},
{
"epoch": 0.63,
"learning_rate": 1.8651518762744633e-05,
"loss": 2.7632,
"step": 189400
},
{
"epoch": 0.63,
"learning_rate": 1.863496729429835e-05,
"loss": 2.7781,
"step": 189500
},
{
"epoch": 0.63,
"learning_rate": 1.861841582585207e-05,
"loss": 2.759,
"step": 189600
},
{
"epoch": 0.63,
"learning_rate": 1.860186435740579e-05,
"loss": 2.7747,
"step": 189700
},
{
"epoch": 0.63,
"learning_rate": 1.8585312888959512e-05,
"loss": 2.7788,
"step": 189800
},
{
"epoch": 0.63,
"learning_rate": 1.8568761420513228e-05,
"loss": 2.7649,
"step": 189900
},
{
"epoch": 0.63,
"learning_rate": 1.855220995206695e-05,
"loss": 2.7657,
"step": 190000
},
{
"epoch": 0.63,
"learning_rate": 1.853565848362067e-05,
"loss": 2.7779,
"step": 190100
},
{
"epoch": 0.63,
"learning_rate": 1.8519107015174385e-05,
"loss": 2.7649,
"step": 190200
},
{
"epoch": 0.63,
"learning_rate": 1.8502555546728107e-05,
"loss": 2.7537,
"step": 190300
},
{
"epoch": 0.63,
"learning_rate": 1.8486004078281826e-05,
"loss": 2.7542,
"step": 190400
},
{
"epoch": 0.63,
"learning_rate": 1.8469452609835545e-05,
"loss": 2.7635,
"step": 190500
},
{
"epoch": 0.63,
"learning_rate": 1.8452901141389264e-05,
"loss": 2.7576,
"step": 190600
},
{
"epoch": 0.63,
"learning_rate": 1.8436349672942986e-05,
"loss": 2.7732,
"step": 190700
},
{
"epoch": 0.63,
"learning_rate": 1.8419798204496705e-05,
"loss": 2.763,
"step": 190800
},
{
"epoch": 0.63,
"learning_rate": 1.840324673605042e-05,
"loss": 2.7685,
"step": 190900
},
{
"epoch": 0.63,
"learning_rate": 1.8386695267604143e-05,
"loss": 2.7675,
"step": 191000
},
{
"epoch": 0.63,
"learning_rate": 1.8370143799157862e-05,
"loss": 2.757,
"step": 191100
},
{
"epoch": 0.63,
"learning_rate": 1.835359233071158e-05,
"loss": 2.7541,
"step": 191200
},
{
"epoch": 0.63,
"learning_rate": 1.83370408622653e-05,
"loss": 2.773,
"step": 191300
},
{
"epoch": 0.63,
"learning_rate": 1.832048939381902e-05,
"loss": 2.7706,
"step": 191400
},
{
"epoch": 0.63,
"learning_rate": 1.830393792537274e-05,
"loss": 2.7747,
"step": 191500
},
{
"epoch": 0.63,
"learning_rate": 1.8287386456926457e-05,
"loss": 2.7699,
"step": 191600
},
{
"epoch": 0.63,
"learning_rate": 1.827083498848018e-05,
"loss": 2.7587,
"step": 191700
},
{
"epoch": 0.63,
"learning_rate": 1.82542835200339e-05,
"loss": 2.7776,
"step": 191800
},
{
"epoch": 0.64,
"learning_rate": 1.8237732051587617e-05,
"loss": 2.7496,
"step": 191900
},
{
"epoch": 0.64,
"learning_rate": 1.8221180583141336e-05,
"loss": 2.7576,
"step": 192000
},
{
"epoch": 0.64,
"learning_rate": 1.8204629114695055e-05,
"loss": 2.7596,
"step": 192100
},
{
"epoch": 0.64,
"learning_rate": 1.8188077646248778e-05,
"loss": 2.7648,
"step": 192200
},
{
"epoch": 0.64,
"learning_rate": 1.8171526177802493e-05,
"loss": 2.7717,
"step": 192300
},
{
"epoch": 0.64,
"learning_rate": 1.8154974709356216e-05,
"loss": 2.7882,
"step": 192400
},
{
"epoch": 0.64,
"learning_rate": 1.8138423240909935e-05,
"loss": 2.7604,
"step": 192500
},
{
"epoch": 0.64,
"learning_rate": 1.8121871772463654e-05,
"loss": 2.7539,
"step": 192600
},
{
"epoch": 0.64,
"learning_rate": 1.8105320304017373e-05,
"loss": 2.7602,
"step": 192700
},
{
"epoch": 0.64,
"learning_rate": 1.8088768835571092e-05,
"loss": 2.7713,
"step": 192800
},
{
"epoch": 0.64,
"learning_rate": 1.8072217367124814e-05,
"loss": 2.7632,
"step": 192900
},
{
"epoch": 0.64,
"learning_rate": 1.805566589867853e-05,
"loss": 2.7697,
"step": 193000
},
{
"epoch": 0.64,
"learning_rate": 1.8039114430232252e-05,
"loss": 2.7612,
"step": 193100
},
{
"epoch": 0.64,
"learning_rate": 1.802256296178597e-05,
"loss": 2.7671,
"step": 193200
},
{
"epoch": 0.64,
"learning_rate": 1.800601149333969e-05,
"loss": 2.774,
"step": 193300
},
{
"epoch": 0.64,
"learning_rate": 1.798946002489341e-05,
"loss": 2.7677,
"step": 193400
},
{
"epoch": 0.64,
"learning_rate": 1.7972908556447128e-05,
"loss": 2.7751,
"step": 193500
},
{
"epoch": 0.64,
"learning_rate": 1.7956357088000847e-05,
"loss": 2.7664,
"step": 193600
},
{
"epoch": 0.64,
"learning_rate": 1.7939805619554566e-05,
"loss": 2.7738,
"step": 193700
},
{
"epoch": 0.64,
"learning_rate": 1.7923254151108288e-05,
"loss": 2.7726,
"step": 193800
},
{
"epoch": 0.64,
"learning_rate": 1.7906702682662007e-05,
"loss": 2.7597,
"step": 193900
},
{
"epoch": 0.64,
"learning_rate": 1.7890151214215726e-05,
"loss": 2.7672,
"step": 194000
},
{
"epoch": 0.64,
"learning_rate": 1.7873599745769445e-05,
"loss": 2.7716,
"step": 194100
},
{
"epoch": 0.64,
"learning_rate": 1.7857048277323164e-05,
"loss": 2.7638,
"step": 194200
},
{
"epoch": 0.64,
"learning_rate": 1.7840496808876883e-05,
"loss": 2.7747,
"step": 194300
},
{
"epoch": 0.64,
"learning_rate": 1.7823945340430602e-05,
"loss": 2.7643,
"step": 194400
},
{
"epoch": 0.64,
"learning_rate": 1.7807393871984325e-05,
"loss": 2.7669,
"step": 194500
},
{
"epoch": 0.64,
"learning_rate": 1.7790842403538044e-05,
"loss": 2.7663,
"step": 194600
},
{
"epoch": 0.64,
"learning_rate": 1.7774290935091763e-05,
"loss": 2.7682,
"step": 194700
},
{
"epoch": 0.64,
"learning_rate": 1.775773946664548e-05,
"loss": 2.7708,
"step": 194800
},
{
"epoch": 0.65,
"learning_rate": 1.77411879981992e-05,
"loss": 2.7674,
"step": 194900
},
{
"epoch": 0.65,
"learning_rate": 1.772463652975292e-05,
"loss": 2.7655,
"step": 195000
},
{
"epoch": 0.65,
"learning_rate": 1.770808506130664e-05,
"loss": 2.7644,
"step": 195100
},
{
"epoch": 0.65,
"learning_rate": 1.769153359286036e-05,
"loss": 2.767,
"step": 195200
},
{
"epoch": 0.65,
"learning_rate": 1.767498212441408e-05,
"loss": 2.755,
"step": 195300
},
{
"epoch": 0.65,
"learning_rate": 1.76584306559678e-05,
"loss": 2.7718,
"step": 195400
},
{
"epoch": 0.65,
"learning_rate": 1.7641879187521518e-05,
"loss": 2.7488,
"step": 195500
},
{
"epoch": 0.65,
"learning_rate": 1.7625327719075237e-05,
"loss": 2.7556,
"step": 195600
},
{
"epoch": 0.65,
"learning_rate": 1.7608776250628956e-05,
"loss": 2.7695,
"step": 195700
},
{
"epoch": 0.65,
"learning_rate": 1.7592224782182675e-05,
"loss": 2.7587,
"step": 195800
},
{
"epoch": 0.65,
"learning_rate": 1.7575673313736397e-05,
"loss": 2.7693,
"step": 195900
},
{
"epoch": 0.65,
"learning_rate": 1.7559121845290116e-05,
"loss": 2.7675,
"step": 196000
},
{
"epoch": 0.65,
"learning_rate": 1.7542570376843835e-05,
"loss": 2.7771,
"step": 196100
},
{
"epoch": 0.65,
"learning_rate": 1.7526018908397554e-05,
"loss": 2.7595,
"step": 196200
},
{
"epoch": 0.65,
"learning_rate": 1.7509467439951273e-05,
"loss": 2.7705,
"step": 196300
},
{
"epoch": 0.65,
"learning_rate": 1.7492915971504992e-05,
"loss": 2.7699,
"step": 196400
},
{
"epoch": 0.65,
"learning_rate": 1.747636450305871e-05,
"loss": 2.7563,
"step": 196500
},
{
"epoch": 0.65,
"learning_rate": 1.7459813034612433e-05,
"loss": 2.7528,
"step": 196600
},
{
"epoch": 0.65,
"learning_rate": 1.7443261566166152e-05,
"loss": 2.7555,
"step": 196700
},
{
"epoch": 0.65,
"learning_rate": 1.742671009771987e-05,
"loss": 2.7614,
"step": 196800
},
{
"epoch": 0.65,
"learning_rate": 1.741015862927359e-05,
"loss": 2.7654,
"step": 196900
},
{
"epoch": 0.65,
"learning_rate": 1.739360716082731e-05,
"loss": 2.7746,
"step": 197000
},
{
"epoch": 0.65,
"learning_rate": 1.7377055692381028e-05,
"loss": 2.7823,
"step": 197100
},
{
"epoch": 0.65,
"learning_rate": 1.7360504223934747e-05,
"loss": 2.7636,
"step": 197200
},
{
"epoch": 0.65,
"learning_rate": 1.734395275548847e-05,
"loss": 2.7673,
"step": 197300
},
{
"epoch": 0.65,
"learning_rate": 1.7327401287042185e-05,
"loss": 2.7687,
"step": 197400
},
{
"epoch": 0.65,
"learning_rate": 1.7310849818595908e-05,
"loss": 2.767,
"step": 197500
},
{
"epoch": 0.65,
"learning_rate": 1.7294298350149627e-05,
"loss": 2.7665,
"step": 197600
},
{
"epoch": 0.65,
"learning_rate": 1.7277746881703346e-05,
"loss": 2.766,
"step": 197700
},
{
"epoch": 0.65,
"learning_rate": 1.7261195413257065e-05,
"loss": 2.7693,
"step": 197800
},
{
"epoch": 0.66,
"learning_rate": 1.7244643944810784e-05,
"loss": 2.755,
"step": 197900
},
{
"epoch": 0.66,
"learning_rate": 1.7228092476364506e-05,
"loss": 2.7694,
"step": 198000
},
{
"epoch": 0.66,
"learning_rate": 1.721154100791822e-05,
"loss": 2.7638,
"step": 198100
},
{
"epoch": 0.66,
"learning_rate": 1.7194989539471944e-05,
"loss": 2.7543,
"step": 198200
},
{
"epoch": 0.66,
"learning_rate": 1.7178438071025663e-05,
"loss": 2.745,
"step": 198300
},
{
"epoch": 0.66,
"learning_rate": 1.7161886602579382e-05,
"loss": 2.7536,
"step": 198400
},
{
"epoch": 0.66,
"learning_rate": 1.71453351341331e-05,
"loss": 2.7689,
"step": 198500
},
{
"epoch": 0.66,
"learning_rate": 1.712878366568682e-05,
"loss": 2.7601,
"step": 198600
},
{
"epoch": 0.66,
"learning_rate": 1.7112232197240542e-05,
"loss": 2.746,
"step": 198700
},
{
"epoch": 0.66,
"learning_rate": 1.7095680728794258e-05,
"loss": 2.7534,
"step": 198800
},
{
"epoch": 0.66,
"learning_rate": 1.707912926034798e-05,
"loss": 2.7565,
"step": 198900
},
{
"epoch": 0.66,
"learning_rate": 1.70625777919017e-05,
"loss": 2.7682,
"step": 199000
},
{
"epoch": 0.66,
"learning_rate": 1.7046026323455418e-05,
"loss": 2.754,
"step": 199100
},
{
"epoch": 0.66,
"learning_rate": 1.7029474855009137e-05,
"loss": 2.7548,
"step": 199200
},
{
"epoch": 0.66,
"learning_rate": 1.7012923386562856e-05,
"loss": 2.7587,
"step": 199300
},
{
"epoch": 0.66,
"learning_rate": 1.699637191811658e-05,
"loss": 2.7688,
"step": 199400
},
{
"epoch": 0.66,
"learning_rate": 1.6979820449670294e-05,
"loss": 2.7688,
"step": 199500
},
{
"epoch": 0.66,
"learning_rate": 1.6963268981224016e-05,
"loss": 2.7574,
"step": 199600
},
{
"epoch": 0.66,
"learning_rate": 1.6946717512777735e-05,
"loss": 2.7669,
"step": 199700
},
{
"epoch": 0.66,
"learning_rate": 1.6930166044331454e-05,
"loss": 2.755,
"step": 199800
},
{
"epoch": 0.66,
"learning_rate": 1.6913614575885173e-05,
"loss": 2.7706,
"step": 199900
},
{
"epoch": 0.66,
"learning_rate": 1.6897063107438892e-05,
"loss": 2.7628,
"step": 200000
},
{
"epoch": 0.66,
"learning_rate": 1.6880511638992615e-05,
"loss": 2.7714,
"step": 200100
},
{
"epoch": 0.66,
"learning_rate": 1.686396017054633e-05,
"loss": 2.7604,
"step": 200200
},
{
"epoch": 0.66,
"learning_rate": 1.684740870210005e-05,
"loss": 2.7644,
"step": 200300
},
{
"epoch": 0.66,
"learning_rate": 1.683085723365377e-05,
"loss": 2.7634,
"step": 200400
},
{
"epoch": 0.66,
"learning_rate": 1.6814305765207487e-05,
"loss": 2.7519,
"step": 200500
},
{
"epoch": 0.66,
"learning_rate": 1.679775429676121e-05,
"loss": 2.7659,
"step": 200600
},
{
"epoch": 0.66,
"learning_rate": 1.678120282831493e-05,
"loss": 2.7687,
"step": 200700
},
{
"epoch": 0.66,
"learning_rate": 1.676465135986865e-05,
"loss": 2.769,
"step": 200800
},
{
"epoch": 0.67,
"learning_rate": 1.6748099891422367e-05,
"loss": 2.7653,
"step": 200900
},
{
"epoch": 0.67,
"learning_rate": 1.6731548422976086e-05,
"loss": 2.7697,
"step": 201000
},
{
"epoch": 0.67,
"learning_rate": 1.6714996954529808e-05,
"loss": 2.7651,
"step": 201100
},
{
"epoch": 0.67,
"learning_rate": 1.6698445486083523e-05,
"loss": 2.7784,
"step": 201200
},
{
"epoch": 0.67,
"learning_rate": 1.6681894017637246e-05,
"loss": 2.7694,
"step": 201300
},
{
"epoch": 0.67,
"learning_rate": 1.6665342549190965e-05,
"loss": 2.7672,
"step": 201400
},
{
"epoch": 0.67,
"learning_rate": 1.6648791080744687e-05,
"loss": 2.7708,
"step": 201500
},
{
"epoch": 0.67,
"learning_rate": 1.6632239612298403e-05,
"loss": 2.7602,
"step": 201600
},
{
"epoch": 0.67,
"learning_rate": 1.6615688143852122e-05,
"loss": 2.7647,
"step": 201700
},
{
"epoch": 0.67,
"learning_rate": 1.6599136675405844e-05,
"loss": 2.7553,
"step": 201800
},
{
"epoch": 0.67,
"learning_rate": 1.658258520695956e-05,
"loss": 2.7653,
"step": 201900
},
{
"epoch": 0.67,
"learning_rate": 1.6566033738513282e-05,
"loss": 2.7675,
"step": 202000
},
{
"epoch": 0.67,
"learning_rate": 1.6549482270067e-05,
"loss": 2.7668,
"step": 202100
},
{
"epoch": 0.67,
"learning_rate": 1.6532930801620723e-05,
"loss": 2.7561,
"step": 202200
},
{
"epoch": 0.67,
"learning_rate": 1.651637933317444e-05,
"loss": 2.7594,
"step": 202300
},
{
"epoch": 0.67,
"learning_rate": 1.6499827864728158e-05,
"loss": 2.7636,
"step": 202400
},
{
"epoch": 0.67,
"learning_rate": 1.648327639628188e-05,
"loss": 2.7597,
"step": 202500
},
{
"epoch": 0.67,
"learning_rate": 1.6466724927835596e-05,
"loss": 2.7781,
"step": 202600
},
{
"epoch": 0.67,
"learning_rate": 1.645017345938932e-05,
"loss": 2.762,
"step": 202700
},
{
"epoch": 0.67,
"learning_rate": 1.6433621990943037e-05,
"loss": 2.7585,
"step": 202800
},
{
"epoch": 0.67,
"learning_rate": 1.6417070522496756e-05,
"loss": 2.7733,
"step": 202900
},
{
"epoch": 0.67,
"learning_rate": 1.6400519054050475e-05,
"loss": 2.7604,
"step": 203000
},
{
"epoch": 0.67,
"learning_rate": 1.6383967585604194e-05,
"loss": 2.7742,
"step": 203100
},
{
"epoch": 0.67,
"learning_rate": 1.6367416117157917e-05,
"loss": 2.756,
"step": 203200
},
{
"epoch": 0.67,
"learning_rate": 1.6350864648711632e-05,
"loss": 2.7693,
"step": 203300
},
{
"epoch": 0.67,
"learning_rate": 1.6334313180265355e-05,
"loss": 2.7572,
"step": 203400
},
{
"epoch": 0.67,
"learning_rate": 1.6317761711819074e-05,
"loss": 2.773,
"step": 203500
},
{
"epoch": 0.67,
"learning_rate": 1.6301210243372793e-05,
"loss": 2.754,
"step": 203600
},
{
"epoch": 0.67,
"learning_rate": 1.628465877492651e-05,
"loss": 2.7596,
"step": 203700
},
{
"epoch": 0.67,
"learning_rate": 1.626810730648023e-05,
"loss": 2.7827,
"step": 203800
},
{
"epoch": 0.67,
"learning_rate": 1.6251555838033953e-05,
"loss": 2.765,
"step": 203900
},
{
"epoch": 0.68,
"learning_rate": 1.623500436958767e-05,
"loss": 2.7653,
"step": 204000
},
{
"epoch": 0.68,
"learning_rate": 1.621845290114139e-05,
"loss": 2.7537,
"step": 204100
},
{
"epoch": 0.68,
"learning_rate": 1.620190143269511e-05,
"loss": 2.7681,
"step": 204200
},
{
"epoch": 0.68,
"learning_rate": 1.618534996424883e-05,
"loss": 2.764,
"step": 204300
},
{
"epoch": 0.68,
"learning_rate": 1.6168798495802548e-05,
"loss": 2.7661,
"step": 204400
},
{
"epoch": 0.68,
"learning_rate": 1.6152247027356267e-05,
"loss": 2.7472,
"step": 204500
},
{
"epoch": 0.68,
"learning_rate": 1.613569555890999e-05,
"loss": 2.7576,
"step": 204600
},
{
"epoch": 0.68,
"learning_rate": 1.6119144090463705e-05,
"loss": 2.7733,
"step": 204700
},
{
"epoch": 0.68,
"learning_rate": 1.6102592622017427e-05,
"loss": 2.7607,
"step": 204800
},
{
"epoch": 0.68,
"learning_rate": 1.6086041153571146e-05,
"loss": 2.7699,
"step": 204900
},
{
"epoch": 0.68,
"learning_rate": 1.6069489685124865e-05,
"loss": 2.7631,
"step": 205000
},
{
"epoch": 0.68,
"learning_rate": 1.6052938216678584e-05,
"loss": 2.7644,
"step": 205100
},
{
"epoch": 0.68,
"learning_rate": 1.6036386748232303e-05,
"loss": 2.7772,
"step": 205200
},
{
"epoch": 0.68,
"learning_rate": 1.6019835279786025e-05,
"loss": 2.7662,
"step": 205300
},
{
"epoch": 0.68,
"learning_rate": 1.600328381133974e-05,
"loss": 2.7611,
"step": 205400
},
{
"epoch": 0.68,
"learning_rate": 1.5986732342893463e-05,
"loss": 2.7568,
"step": 205500
},
{
"epoch": 0.68,
"learning_rate": 1.5970180874447182e-05,
"loss": 2.7605,
"step": 205600
},
{
"epoch": 0.68,
"learning_rate": 1.59536294060009e-05,
"loss": 2.7605,
"step": 205700
},
{
"epoch": 0.68,
"learning_rate": 1.593707793755462e-05,
"loss": 2.7673,
"step": 205800
},
{
"epoch": 0.68,
"learning_rate": 1.592052646910834e-05,
"loss": 2.7588,
"step": 205900
},
{
"epoch": 0.68,
"learning_rate": 1.5903975000662062e-05,
"loss": 2.7709,
"step": 206000
},
{
"epoch": 0.68,
"learning_rate": 1.5887423532215777e-05,
"loss": 2.7593,
"step": 206100
},
{
"epoch": 0.68,
"learning_rate": 1.58708720637695e-05,
"loss": 2.7684,
"step": 206200
},
{
"epoch": 0.68,
"learning_rate": 1.585432059532322e-05,
"loss": 2.7634,
"step": 206300
},
{
"epoch": 0.68,
"learning_rate": 1.5837769126876938e-05,
"loss": 2.7529,
"step": 206400
},
{
"epoch": 0.68,
"learning_rate": 1.5821217658430657e-05,
"loss": 2.7538,
"step": 206500
},
{
"epoch": 0.68,
"learning_rate": 1.5804666189984376e-05,
"loss": 2.7731,
"step": 206600
},
{
"epoch": 0.68,
"learning_rate": 1.5788114721538095e-05,
"loss": 2.776,
"step": 206700
},
{
"epoch": 0.68,
"learning_rate": 1.5771563253091814e-05,
"loss": 2.7787,
"step": 206800
},
{
"epoch": 0.68,
"learning_rate": 1.5755011784645536e-05,
"loss": 2.7656,
"step": 206900
},
{
"epoch": 0.69,
"learning_rate": 1.5738460316199255e-05,
"loss": 2.7516,
"step": 207000
},
{
"epoch": 0.69,
"learning_rate": 1.5721908847752974e-05,
"loss": 2.7672,
"step": 207100
},
{
"epoch": 0.69,
"learning_rate": 1.5705357379306693e-05,
"loss": 2.7688,
"step": 207200
},
{
"epoch": 0.69,
"learning_rate": 1.5688805910860412e-05,
"loss": 2.7646,
"step": 207300
},
{
"epoch": 0.69,
"learning_rate": 1.567225444241413e-05,
"loss": 2.7679,
"step": 207400
},
{
"epoch": 0.69,
"learning_rate": 1.565570297396785e-05,
"loss": 2.7549,
"step": 207500
},
{
"epoch": 0.69,
"learning_rate": 1.5639151505521572e-05,
"loss": 2.7493,
"step": 207600
},
{
"epoch": 0.69,
"learning_rate": 1.562260003707529e-05,
"loss": 2.7651,
"step": 207700
},
{
"epoch": 0.69,
"learning_rate": 1.560604856862901e-05,
"loss": 2.7626,
"step": 207800
},
{
"epoch": 0.69,
"learning_rate": 1.558949710018273e-05,
"loss": 2.7696,
"step": 207900
},
{
"epoch": 0.69,
"learning_rate": 1.5572945631736448e-05,
"loss": 2.7629,
"step": 208000
},
{
"epoch": 0.69,
"learning_rate": 1.5556394163290167e-05,
"loss": 2.7609,
"step": 208100
},
{
"epoch": 0.69,
"learning_rate": 1.5539842694843886e-05,
"loss": 2.7546,
"step": 208200
},
{
"epoch": 0.69,
"learning_rate": 1.552329122639761e-05,
"loss": 2.7777,
"step": 208300
},
{
"epoch": 0.69,
"learning_rate": 1.5506739757951328e-05,
"loss": 2.7735,
"step": 208400
},
{
"epoch": 0.69,
"learning_rate": 1.5490188289505046e-05,
"loss": 2.7711,
"step": 208500
},
{
"epoch": 0.69,
"learning_rate": 1.5473636821058765e-05,
"loss": 2.7568,
"step": 208600
},
{
"epoch": 0.69,
"learning_rate": 1.5457085352612484e-05,
"loss": 2.7651,
"step": 208700
},
{
"epoch": 0.69,
"learning_rate": 1.5440533884166203e-05,
"loss": 2.7577,
"step": 208800
},
{
"epoch": 0.69,
"learning_rate": 1.5423982415719922e-05,
"loss": 2.7658,
"step": 208900
},
{
"epoch": 0.69,
"learning_rate": 1.5407430947273645e-05,
"loss": 2.763,
"step": 209000
},
{
"epoch": 0.69,
"learning_rate": 1.5390879478827364e-05,
"loss": 2.7485,
"step": 209100
},
{
"epoch": 0.69,
"learning_rate": 1.5374328010381083e-05,
"loss": 2.7614,
"step": 209200
},
{
"epoch": 0.69,
"learning_rate": 1.5357776541934802e-05,
"loss": 2.7624,
"step": 209300
},
{
"epoch": 0.69,
"learning_rate": 1.534122507348852e-05,
"loss": 2.7601,
"step": 209400
},
{
"epoch": 0.69,
"learning_rate": 1.532467360504224e-05,
"loss": 2.7649,
"step": 209500
},
{
"epoch": 0.69,
"learning_rate": 1.530812213659596e-05,
"loss": 2.7732,
"step": 209600
},
{
"epoch": 0.69,
"learning_rate": 1.529157066814968e-05,
"loss": 2.7569,
"step": 209700
},
{
"epoch": 0.69,
"learning_rate": 1.52750191997034e-05,
"loss": 2.7487,
"step": 209800
},
{
"epoch": 0.69,
"learning_rate": 1.5258467731257117e-05,
"loss": 2.7646,
"step": 209900
},
{
"epoch": 0.7,
"learning_rate": 1.5241916262810838e-05,
"loss": 2.7701,
"step": 210000
},
{
"epoch": 0.7,
"learning_rate": 1.5225364794364557e-05,
"loss": 2.7624,
"step": 210100
},
{
"epoch": 0.7,
"learning_rate": 1.5208813325918276e-05,
"loss": 2.7584,
"step": 210200
},
{
"epoch": 0.7,
"learning_rate": 1.5192261857471995e-05,
"loss": 2.7564,
"step": 210300
},
{
"epoch": 0.7,
"learning_rate": 1.5175710389025716e-05,
"loss": 2.75,
"step": 210400
},
{
"epoch": 0.7,
"learning_rate": 1.5159158920579433e-05,
"loss": 2.7519,
"step": 210500
},
{
"epoch": 0.7,
"learning_rate": 1.5142607452133154e-05,
"loss": 2.7688,
"step": 210600
},
{
"epoch": 0.7,
"learning_rate": 1.5126055983686874e-05,
"loss": 2.7544,
"step": 210700
},
{
"epoch": 0.7,
"learning_rate": 1.5109504515240593e-05,
"loss": 2.7614,
"step": 210800
},
{
"epoch": 0.7,
"learning_rate": 1.5092953046794312e-05,
"loss": 2.7605,
"step": 210900
},
{
"epoch": 0.7,
"learning_rate": 1.5076401578348031e-05,
"loss": 2.7745,
"step": 211000
},
{
"epoch": 0.7,
"learning_rate": 1.5059850109901752e-05,
"loss": 2.7695,
"step": 211100
},
{
"epoch": 0.7,
"learning_rate": 1.504329864145547e-05,
"loss": 2.7476,
"step": 211200
},
{
"epoch": 0.7,
"learning_rate": 1.502674717300919e-05,
"loss": 2.7688,
"step": 211300
},
{
"epoch": 0.7,
"learning_rate": 1.501019570456291e-05,
"loss": 2.7568,
"step": 211400
},
{
"epoch": 0.7,
"learning_rate": 1.499364423611663e-05,
"loss": 2.7646,
"step": 211500
},
{
"epoch": 0.7,
"learning_rate": 1.4977092767670347e-05,
"loss": 2.7504,
"step": 211600
},
{
"epoch": 0.7,
"learning_rate": 1.4960541299224067e-05,
"loss": 2.7674,
"step": 211700
},
{
"epoch": 0.7,
"learning_rate": 1.4943989830777788e-05,
"loss": 2.7603,
"step": 211800
},
{
"epoch": 0.7,
"learning_rate": 1.4927438362331505e-05,
"loss": 2.7682,
"step": 211900
},
{
"epoch": 0.7,
"learning_rate": 1.4910886893885226e-05,
"loss": 2.7744,
"step": 212000
},
{
"epoch": 0.7,
"learning_rate": 1.4894335425438947e-05,
"loss": 2.7598,
"step": 212100
},
{
"epoch": 0.7,
"learning_rate": 1.4877783956992666e-05,
"loss": 2.7579,
"step": 212200
},
{
"epoch": 0.7,
"learning_rate": 1.4861232488546383e-05,
"loss": 2.751,
"step": 212300
},
{
"epoch": 0.7,
"learning_rate": 1.4844681020100104e-05,
"loss": 2.7527,
"step": 212400
},
{
"epoch": 0.7,
"learning_rate": 1.4828129551653824e-05,
"loss": 2.7657,
"step": 212500
},
{
"epoch": 0.7,
"learning_rate": 1.4811578083207542e-05,
"loss": 2.7597,
"step": 212600
},
{
"epoch": 0.7,
"learning_rate": 1.4795026614761262e-05,
"loss": 2.7654,
"step": 212700
},
{
"epoch": 0.7,
"learning_rate": 1.4778475146314983e-05,
"loss": 2.7741,
"step": 212800
},
{
"epoch": 0.7,
"learning_rate": 1.4761923677868702e-05,
"loss": 2.7525,
"step": 212900
},
{
"epoch": 0.71,
"learning_rate": 1.474537220942242e-05,
"loss": 2.7613,
"step": 213000
},
{
"epoch": 0.71,
"learning_rate": 1.472882074097614e-05,
"loss": 2.7507,
"step": 213100
},
{
"epoch": 0.71,
"learning_rate": 1.471226927252986e-05,
"loss": 2.7606,
"step": 213200
},
{
"epoch": 0.71,
"learning_rate": 1.4695717804083578e-05,
"loss": 2.7705,
"step": 213300
},
{
"epoch": 0.71,
"learning_rate": 1.4679166335637299e-05,
"loss": 2.7665,
"step": 213400
},
{
"epoch": 0.71,
"learning_rate": 1.466261486719102e-05,
"loss": 2.7562,
"step": 213500
},
{
"epoch": 0.71,
"learning_rate": 1.4646063398744737e-05,
"loss": 2.7638,
"step": 213600
},
{
"epoch": 0.71,
"learning_rate": 1.4629511930298456e-05,
"loss": 2.7577,
"step": 213700
},
{
"epoch": 0.71,
"learning_rate": 1.4612960461852176e-05,
"loss": 2.7539,
"step": 213800
},
{
"epoch": 0.71,
"learning_rate": 1.4596408993405897e-05,
"loss": 2.7723,
"step": 213900
},
{
"epoch": 0.71,
"learning_rate": 1.4579857524959614e-05,
"loss": 2.7708,
"step": 214000
},
{
"epoch": 0.71,
"learning_rate": 1.4563306056513335e-05,
"loss": 2.758,
"step": 214100
},
{
"epoch": 0.71,
"learning_rate": 1.4546754588067054e-05,
"loss": 2.7772,
"step": 214200
},
{
"epoch": 0.71,
"learning_rate": 1.4530203119620773e-05,
"loss": 2.7653,
"step": 214300
},
{
"epoch": 0.71,
"learning_rate": 1.4513651651174492e-05,
"loss": 2.7629,
"step": 214400
},
{
"epoch": 0.71,
"learning_rate": 1.4497100182728213e-05,
"loss": 2.7615,
"step": 214500
},
{
"epoch": 0.71,
"learning_rate": 1.4480548714281933e-05,
"loss": 2.7692,
"step": 214600
},
{
"epoch": 0.71,
"learning_rate": 1.446399724583565e-05,
"loss": 2.754,
"step": 214700
},
{
"epoch": 0.71,
"learning_rate": 1.4447445777389371e-05,
"loss": 2.7641,
"step": 214800
},
{
"epoch": 0.71,
"learning_rate": 1.443089430894309e-05,
"loss": 2.7685,
"step": 214900
},
{
"epoch": 0.71,
"learning_rate": 1.4414342840496809e-05,
"loss": 2.7615,
"step": 215000
},
{
"epoch": 0.71,
"learning_rate": 1.4397791372050528e-05,
"loss": 2.7597,
"step": 215100
},
{
"epoch": 0.71,
"learning_rate": 1.4381239903604249e-05,
"loss": 2.7684,
"step": 215200
},
{
"epoch": 0.71,
"learning_rate": 1.436468843515797e-05,
"loss": 2.7564,
"step": 215300
},
{
"epoch": 0.71,
"learning_rate": 1.4348136966711687e-05,
"loss": 2.7552,
"step": 215400
},
{
"epoch": 0.71,
"learning_rate": 1.4331585498265407e-05,
"loss": 2.7693,
"step": 215500
},
{
"epoch": 0.71,
"learning_rate": 1.4315034029819126e-05,
"loss": 2.7732,
"step": 215600
},
{
"epoch": 0.71,
"learning_rate": 1.4298482561372845e-05,
"loss": 2.7628,
"step": 215700
},
{
"epoch": 0.71,
"learning_rate": 1.4281931092926564e-05,
"loss": 2.7624,
"step": 215800
},
{
"epoch": 0.71,
"learning_rate": 1.4265379624480285e-05,
"loss": 2.7623,
"step": 215900
},
{
"epoch": 0.72,
"learning_rate": 1.4248828156034006e-05,
"loss": 2.7694,
"step": 216000
},
{
"epoch": 0.72,
"learning_rate": 1.4232276687587723e-05,
"loss": 2.7802,
"step": 216100
},
{
"epoch": 0.72,
"learning_rate": 1.4215725219141444e-05,
"loss": 2.7446,
"step": 216200
},
{
"epoch": 0.72,
"learning_rate": 1.4199173750695163e-05,
"loss": 2.7755,
"step": 216300
},
{
"epoch": 0.72,
"learning_rate": 1.418262228224888e-05,
"loss": 2.7581,
"step": 216400
},
{
"epoch": 0.72,
"learning_rate": 1.41660708138026e-05,
"loss": 2.7618,
"step": 216500
},
{
"epoch": 0.72,
"learning_rate": 1.4149519345356321e-05,
"loss": 2.7691,
"step": 216600
},
{
"epoch": 0.72,
"learning_rate": 1.4132967876910039e-05,
"loss": 2.7626,
"step": 216700
},
{
"epoch": 0.72,
"learning_rate": 1.411641640846376e-05,
"loss": 2.7571,
"step": 216800
},
{
"epoch": 0.72,
"learning_rate": 1.409986494001748e-05,
"loss": 2.7696,
"step": 216900
},
{
"epoch": 0.72,
"learning_rate": 1.4083313471571199e-05,
"loss": 2.7652,
"step": 217000
},
{
"epoch": 0.72,
"learning_rate": 1.4066762003124916e-05,
"loss": 2.7553,
"step": 217100
},
{
"epoch": 0.72,
"learning_rate": 1.4050210534678637e-05,
"loss": 2.7521,
"step": 217200
},
{
"epoch": 0.72,
"learning_rate": 1.4033659066232358e-05,
"loss": 2.7479,
"step": 217300
},
{
"epoch": 0.72,
"learning_rate": 1.4017107597786075e-05,
"loss": 2.7609,
"step": 217400
},
{
"epoch": 0.72,
"learning_rate": 1.4000556129339796e-05,
"loss": 2.7603,
"step": 217500
},
{
"epoch": 0.72,
"learning_rate": 1.3984004660893516e-05,
"loss": 2.7474,
"step": 217600
},
{
"epoch": 0.72,
"learning_rate": 1.3967453192447235e-05,
"loss": 2.7572,
"step": 217700
},
{
"epoch": 0.72,
"learning_rate": 1.3950901724000953e-05,
"loss": 2.754,
"step": 217800
},
{
"epoch": 0.72,
"learning_rate": 1.3934350255554673e-05,
"loss": 2.7588,
"step": 217900
},
{
"epoch": 0.72,
"learning_rate": 1.3917798787108394e-05,
"loss": 2.7542,
"step": 218000
},
{
"epoch": 0.72,
"learning_rate": 1.3901247318662111e-05,
"loss": 2.7586,
"step": 218100
},
{
"epoch": 0.72,
"learning_rate": 1.3884695850215832e-05,
"loss": 2.7664,
"step": 218200
},
{
"epoch": 0.72,
"learning_rate": 1.3868144381769553e-05,
"loss": 2.7626,
"step": 218300
},
{
"epoch": 0.72,
"learning_rate": 1.3851592913323271e-05,
"loss": 2.7712,
"step": 218400
},
{
"epoch": 0.72,
"learning_rate": 1.3835041444876989e-05,
"loss": 2.7606,
"step": 218500
},
{
"epoch": 0.72,
"learning_rate": 1.381848997643071e-05,
"loss": 2.7465,
"step": 218600
},
{
"epoch": 0.72,
"learning_rate": 1.380193850798443e-05,
"loss": 2.7608,
"step": 218700
},
{
"epoch": 0.72,
"learning_rate": 1.3785387039538147e-05,
"loss": 2.7693,
"step": 218800
},
{
"epoch": 0.72,
"learning_rate": 1.3768835571091868e-05,
"loss": 2.7496,
"step": 218900
},
{
"epoch": 0.72,
"learning_rate": 1.3752284102645587e-05,
"loss": 2.7704,
"step": 219000
},
{
"epoch": 0.73,
"learning_rate": 1.3735732634199308e-05,
"loss": 2.7589,
"step": 219100
},
{
"epoch": 0.73,
"learning_rate": 1.3719181165753025e-05,
"loss": 2.7588,
"step": 219200
},
{
"epoch": 0.73,
"learning_rate": 1.3702629697306746e-05,
"loss": 2.7525,
"step": 219300
},
{
"epoch": 0.73,
"learning_rate": 1.3686078228860466e-05,
"loss": 2.7568,
"step": 219400
},
{
"epoch": 0.73,
"learning_rate": 1.3669526760414184e-05,
"loss": 2.7687,
"step": 219500
},
{
"epoch": 0.73,
"learning_rate": 1.3652975291967904e-05,
"loss": 2.7713,
"step": 219600
},
{
"epoch": 0.73,
"learning_rate": 1.3636423823521623e-05,
"loss": 2.7573,
"step": 219700
},
{
"epoch": 0.73,
"learning_rate": 1.3619872355075344e-05,
"loss": 2.7625,
"step": 219800
},
{
"epoch": 0.73,
"learning_rate": 1.3603320886629061e-05,
"loss": 2.7552,
"step": 219900
},
{
"epoch": 0.73,
"learning_rate": 1.3586769418182782e-05,
"loss": 2.7736,
"step": 220000
},
{
"epoch": 0.73,
"learning_rate": 1.3570217949736503e-05,
"loss": 2.7452,
"step": 220100
},
{
"epoch": 0.73,
"learning_rate": 1.355366648129022e-05,
"loss": 2.7462,
"step": 220200
},
{
"epoch": 0.73,
"learning_rate": 1.353711501284394e-05,
"loss": 2.7562,
"step": 220300
},
{
"epoch": 0.73,
"learning_rate": 1.352056354439766e-05,
"loss": 2.7479,
"step": 220400
},
{
"epoch": 0.73,
"learning_rate": 1.3504012075951377e-05,
"loss": 2.7564,
"step": 220500
},
{
"epoch": 0.73,
"learning_rate": 1.3487460607505098e-05,
"loss": 2.7735,
"step": 220600
},
{
"epoch": 0.73,
"learning_rate": 1.3470909139058818e-05,
"loss": 2.7683,
"step": 220700
},
{
"epoch": 0.73,
"learning_rate": 1.3454357670612539e-05,
"loss": 2.7601,
"step": 220800
},
{
"epoch": 0.73,
"learning_rate": 1.3437806202166256e-05,
"loss": 2.7441,
"step": 220900
},
{
"epoch": 0.73,
"learning_rate": 1.3421254733719977e-05,
"loss": 2.7692,
"step": 221000
},
{
"epoch": 0.73,
"learning_rate": 1.3404703265273696e-05,
"loss": 2.7497,
"step": 221100
},
{
"epoch": 0.73,
"learning_rate": 1.3388151796827413e-05,
"loss": 2.7671,
"step": 221200
},
{
"epoch": 0.73,
"learning_rate": 1.3371600328381134e-05,
"loss": 2.7618,
"step": 221300
},
{
"epoch": 0.73,
"learning_rate": 1.3355048859934855e-05,
"loss": 2.7591,
"step": 221400
},
{
"epoch": 0.73,
"learning_rate": 1.3338497391488575e-05,
"loss": 2.7558,
"step": 221500
},
{
"epoch": 0.73,
"learning_rate": 1.3321945923042292e-05,
"loss": 2.7706,
"step": 221600
},
{
"epoch": 0.73,
"learning_rate": 1.3305394454596013e-05,
"loss": 2.7579,
"step": 221700
},
{
"epoch": 0.73,
"learning_rate": 1.3288842986149732e-05,
"loss": 2.762,
"step": 221800
},
{
"epoch": 0.73,
"learning_rate": 1.327229151770345e-05,
"loss": 2.7587,
"step": 221900
},
{
"epoch": 0.73,
"learning_rate": 1.325574004925717e-05,
"loss": 2.7797,
"step": 222000
},
{
"epoch": 0.74,
"learning_rate": 1.323918858081089e-05,
"loss": 2.7493,
"step": 222100
},
{
"epoch": 0.74,
"learning_rate": 1.3222637112364611e-05,
"loss": 2.7677,
"step": 222200
},
{
"epoch": 0.74,
"learning_rate": 1.3206085643918329e-05,
"loss": 2.758,
"step": 222300
},
{
"epoch": 0.74,
"learning_rate": 1.318953417547205e-05,
"loss": 2.7514,
"step": 222400
},
{
"epoch": 0.74,
"learning_rate": 1.3172982707025768e-05,
"loss": 2.7609,
"step": 222500
},
{
"epoch": 0.74,
"learning_rate": 1.3156431238579486e-05,
"loss": 2.7669,
"step": 222600
},
{
"epoch": 0.74,
"learning_rate": 1.3139879770133206e-05,
"loss": 2.7623,
"step": 222700
},
{
"epoch": 0.74,
"learning_rate": 1.3123328301686927e-05,
"loss": 2.7669,
"step": 222800
},
{
"epoch": 0.74,
"learning_rate": 1.3106776833240648e-05,
"loss": 2.7461,
"step": 222900
},
{
"epoch": 0.74,
"learning_rate": 1.3090225364794365e-05,
"loss": 2.747,
"step": 223000
},
{
"epoch": 0.74,
"learning_rate": 1.3073673896348084e-05,
"loss": 2.7535,
"step": 223100
},
{
"epoch": 0.74,
"learning_rate": 1.3057122427901805e-05,
"loss": 2.7713,
"step": 223200
},
{
"epoch": 0.74,
"learning_rate": 1.3040570959455522e-05,
"loss": 2.7419,
"step": 223300
},
{
"epoch": 0.74,
"learning_rate": 1.3024019491009243e-05,
"loss": 2.7733,
"step": 223400
},
{
"epoch": 0.74,
"learning_rate": 1.3007468022562963e-05,
"loss": 2.7672,
"step": 223500
},
{
"epoch": 0.74,
"learning_rate": 1.299091655411668e-05,
"loss": 2.7738,
"step": 223600
},
{
"epoch": 0.74,
"learning_rate": 1.2974365085670401e-05,
"loss": 2.7636,
"step": 223700
},
{
"epoch": 0.74,
"learning_rate": 1.295781361722412e-05,
"loss": 2.765,
"step": 223800
},
{
"epoch": 0.74,
"learning_rate": 1.2941262148777841e-05,
"loss": 2.7526,
"step": 223900
},
{
"epoch": 0.74,
"learning_rate": 1.2924710680331558e-05,
"loss": 2.7703,
"step": 224000
},
{
"epoch": 0.74,
"learning_rate": 1.2908159211885279e-05,
"loss": 2.7737,
"step": 224100
},
{
"epoch": 0.74,
"learning_rate": 1.2891607743439e-05,
"loss": 2.7686,
"step": 224200
},
{
"epoch": 0.74,
"learning_rate": 1.2875056274992717e-05,
"loss": 2.7574,
"step": 224300
},
{
"epoch": 0.74,
"learning_rate": 1.2858504806546438e-05,
"loss": 2.7592,
"step": 224400
},
{
"epoch": 0.74,
"learning_rate": 1.2841953338100157e-05,
"loss": 2.7584,
"step": 224500
},
{
"epoch": 0.74,
"learning_rate": 1.2825401869653877e-05,
"loss": 2.761,
"step": 224600
},
{
"epoch": 0.74,
"learning_rate": 1.2808850401207594e-05,
"loss": 2.7542,
"step": 224700
},
{
"epoch": 0.74,
"learning_rate": 1.2792298932761315e-05,
"loss": 2.7585,
"step": 224800
},
{
"epoch": 0.74,
"learning_rate": 1.2775747464315036e-05,
"loss": 2.7566,
"step": 224900
},
{
"epoch": 0.74,
"learning_rate": 1.2759195995868753e-05,
"loss": 2.7727,
"step": 225000
},
{
"epoch": 0.75,
"learning_rate": 1.2742644527422474e-05,
"loss": 2.7518,
"step": 225100
},
{
"epoch": 0.75,
"learning_rate": 1.2726093058976193e-05,
"loss": 2.754,
"step": 225200
},
{
"epoch": 0.75,
"learning_rate": 1.2709541590529913e-05,
"loss": 2.7637,
"step": 225300
},
{
"epoch": 0.75,
"learning_rate": 1.269299012208363e-05,
"loss": 2.7561,
"step": 225400
},
{
"epoch": 0.75,
"learning_rate": 1.2676438653637351e-05,
"loss": 2.7536,
"step": 225500
},
{
"epoch": 0.75,
"learning_rate": 1.2659887185191072e-05,
"loss": 2.7477,
"step": 225600
},
{
"epoch": 0.75,
"learning_rate": 1.264333571674479e-05,
"loss": 2.7465,
"step": 225700
},
{
"epoch": 0.75,
"learning_rate": 1.262678424829851e-05,
"loss": 2.77,
"step": 225800
},
{
"epoch": 0.75,
"learning_rate": 1.2610232779852229e-05,
"loss": 2.754,
"step": 225900
},
{
"epoch": 0.75,
"learning_rate": 1.259368131140595e-05,
"loss": 2.7335,
"step": 226000
},
{
"epoch": 0.75,
"learning_rate": 1.2577129842959667e-05,
"loss": 2.754,
"step": 226100
},
{
"epoch": 0.75,
"learning_rate": 1.2560578374513388e-05,
"loss": 2.7408,
"step": 226200
},
{
"epoch": 0.75,
"learning_rate": 1.2544026906067108e-05,
"loss": 2.7549,
"step": 226300
},
{
"epoch": 0.75,
"learning_rate": 1.2527475437620826e-05,
"loss": 2.7623,
"step": 226400
},
{
"epoch": 0.75,
"learning_rate": 1.2510923969174546e-05,
"loss": 2.7483,
"step": 226500
},
{
"epoch": 0.75,
"learning_rate": 1.2494372500728265e-05,
"loss": 2.7609,
"step": 226600
},
{
"epoch": 0.75,
"learning_rate": 1.2477821032281984e-05,
"loss": 2.7531,
"step": 226700
},
{
"epoch": 0.75,
"learning_rate": 1.2461269563835703e-05,
"loss": 2.7631,
"step": 226800
},
{
"epoch": 0.75,
"learning_rate": 1.2444718095389424e-05,
"loss": 2.7389,
"step": 226900
},
{
"epoch": 0.75,
"learning_rate": 1.2428166626943143e-05,
"loss": 2.7543,
"step": 227000
},
{
"epoch": 0.75,
"learning_rate": 1.2411615158496864e-05,
"loss": 2.758,
"step": 227100
},
{
"epoch": 0.75,
"learning_rate": 1.2395063690050583e-05,
"loss": 2.7611,
"step": 227200
},
{
"epoch": 0.75,
"learning_rate": 1.2378512221604302e-05,
"loss": 2.7658,
"step": 227300
},
{
"epoch": 0.75,
"learning_rate": 1.236196075315802e-05,
"loss": 2.7604,
"step": 227400
},
{
"epoch": 0.75,
"learning_rate": 1.234540928471174e-05,
"loss": 2.7575,
"step": 227500
},
{
"epoch": 0.75,
"learning_rate": 1.232885781626546e-05,
"loss": 2.7522,
"step": 227600
},
{
"epoch": 0.75,
"learning_rate": 1.231230634781918e-05,
"loss": 2.7691,
"step": 227700
},
{
"epoch": 0.75,
"learning_rate": 1.22957548793729e-05,
"loss": 2.7591,
"step": 227800
},
{
"epoch": 0.75,
"learning_rate": 1.2279203410926617e-05,
"loss": 2.765,
"step": 227900
},
{
"epoch": 0.75,
"learning_rate": 1.2262651942480336e-05,
"loss": 2.7542,
"step": 228000
},
{
"epoch": 0.76,
"learning_rate": 1.2246100474034057e-05,
"loss": 2.7567,
"step": 228100
},
{
"epoch": 0.76,
"learning_rate": 1.2229549005587776e-05,
"loss": 2.7569,
"step": 228200
},
{
"epoch": 0.76,
"learning_rate": 1.2212997537141496e-05,
"loss": 2.7528,
"step": 228300
},
{
"epoch": 0.76,
"learning_rate": 1.2196446068695215e-05,
"loss": 2.7642,
"step": 228400
},
{
"epoch": 0.76,
"learning_rate": 1.2179894600248934e-05,
"loss": 2.7623,
"step": 228500
},
{
"epoch": 0.76,
"learning_rate": 1.2163343131802653e-05,
"loss": 2.7576,
"step": 228600
},
{
"epoch": 0.76,
"learning_rate": 1.2146791663356372e-05,
"loss": 2.7557,
"step": 228700
},
{
"epoch": 0.76,
"learning_rate": 1.2130240194910093e-05,
"loss": 2.7578,
"step": 228800
},
{
"epoch": 0.76,
"learning_rate": 1.2113688726463812e-05,
"loss": 2.7517,
"step": 228900
},
{
"epoch": 0.76,
"learning_rate": 1.2097137258017533e-05,
"loss": 2.7623,
"step": 229000
},
{
"epoch": 0.76,
"learning_rate": 1.2080585789571252e-05,
"loss": 2.7628,
"step": 229100
},
{
"epoch": 0.76,
"learning_rate": 1.206403432112497e-05,
"loss": 2.7588,
"step": 229200
},
{
"epoch": 0.76,
"learning_rate": 1.204748285267869e-05,
"loss": 2.763,
"step": 229300
},
{
"epoch": 0.76,
"learning_rate": 1.2030931384232409e-05,
"loss": 2.7596,
"step": 229400
},
{
"epoch": 0.76,
"learning_rate": 1.201437991578613e-05,
"loss": 2.7613,
"step": 229500
},
{
"epoch": 0.76,
"learning_rate": 1.1997828447339848e-05,
"loss": 2.754,
"step": 229600
},
{
"epoch": 0.76,
"learning_rate": 1.1981276978893569e-05,
"loss": 2.7586,
"step": 229700
},
{
"epoch": 0.76,
"learning_rate": 1.1964725510447288e-05,
"loss": 2.7566,
"step": 229800
},
{
"epoch": 0.76,
"learning_rate": 1.1948174042001007e-05,
"loss": 2.7488,
"step": 229900
},
{
"epoch": 0.76,
"learning_rate": 1.1931622573554726e-05,
"loss": 2.7631,
"step": 230000
},
{
"epoch": 0.76,
"learning_rate": 1.1915071105108445e-05,
"loss": 2.7658,
"step": 230100
},
{
"epoch": 0.76,
"learning_rate": 1.1898519636662166e-05,
"loss": 2.755,
"step": 230200
},
{
"epoch": 0.76,
"learning_rate": 1.1881968168215885e-05,
"loss": 2.7556,
"step": 230300
},
{
"epoch": 0.76,
"learning_rate": 1.1865416699769605e-05,
"loss": 2.7565,
"step": 230400
},
{
"epoch": 0.76,
"learning_rate": 1.1848865231323324e-05,
"loss": 2.7496,
"step": 230500
},
{
"epoch": 0.76,
"learning_rate": 1.1832313762877043e-05,
"loss": 2.7626,
"step": 230600
},
{
"epoch": 0.76,
"learning_rate": 1.1815762294430762e-05,
"loss": 2.7598,
"step": 230700
},
{
"epoch": 0.76,
"learning_rate": 1.1799210825984481e-05,
"loss": 2.7589,
"step": 230800
},
{
"epoch": 0.76,
"learning_rate": 1.1782659357538202e-05,
"loss": 2.751,
"step": 230900
},
{
"epoch": 0.76,
"learning_rate": 1.1766107889091921e-05,
"loss": 2.7669,
"step": 231000
},
{
"epoch": 0.77,
"learning_rate": 1.174955642064564e-05,
"loss": 2.7537,
"step": 231100
},
{
"epoch": 0.77,
"learning_rate": 1.173300495219936e-05,
"loss": 2.7612,
"step": 231200
},
{
"epoch": 0.77,
"learning_rate": 1.171645348375308e-05,
"loss": 2.7601,
"step": 231300
},
{
"epoch": 0.77,
"learning_rate": 1.1699902015306799e-05,
"loss": 2.7486,
"step": 231400
},
{
"epoch": 0.77,
"learning_rate": 1.1683350546860517e-05,
"loss": 2.7521,
"step": 231500
},
{
"epoch": 0.77,
"learning_rate": 1.1666799078414238e-05,
"loss": 2.7573,
"step": 231600
},
{
"epoch": 0.77,
"learning_rate": 1.1650247609967957e-05,
"loss": 2.7527,
"step": 231700
},
{
"epoch": 0.77,
"learning_rate": 1.1633696141521676e-05,
"loss": 2.7562,
"step": 231800
},
{
"epoch": 0.77,
"learning_rate": 1.1617144673075397e-05,
"loss": 2.7593,
"step": 231900
},
{
"epoch": 0.77,
"learning_rate": 1.1600593204629114e-05,
"loss": 2.7537,
"step": 232000
},
{
"epoch": 0.77,
"learning_rate": 1.1584041736182835e-05,
"loss": 2.7641,
"step": 232100
},
{
"epoch": 0.77,
"learning_rate": 1.1567490267736554e-05,
"loss": 2.7569,
"step": 232200
},
{
"epoch": 0.77,
"learning_rate": 1.1550938799290273e-05,
"loss": 2.7614,
"step": 232300
},
{
"epoch": 0.77,
"learning_rate": 1.1534387330843993e-05,
"loss": 2.7532,
"step": 232400
},
{
"epoch": 0.77,
"learning_rate": 1.1517835862397712e-05,
"loss": 2.7478,
"step": 232500
},
{
"epoch": 0.77,
"learning_rate": 1.1501284393951433e-05,
"loss": 2.7649,
"step": 232600
},
{
"epoch": 0.77,
"learning_rate": 1.148473292550515e-05,
"loss": 2.7545,
"step": 232700
},
{
"epoch": 0.77,
"learning_rate": 1.1468181457058871e-05,
"loss": 2.7577,
"step": 232800
},
{
"epoch": 0.77,
"learning_rate": 1.145162998861259e-05,
"loss": 2.7467,
"step": 232900
},
{
"epoch": 0.77,
"learning_rate": 1.1435078520166309e-05,
"loss": 2.7622,
"step": 233000
},
{
"epoch": 0.77,
"learning_rate": 1.141852705172003e-05,
"loss": 2.7779,
"step": 233100
},
{
"epoch": 0.77,
"learning_rate": 1.1401975583273749e-05,
"loss": 2.7469,
"step": 233200
},
{
"epoch": 0.77,
"learning_rate": 1.1385424114827468e-05,
"loss": 2.7561,
"step": 233300
},
{
"epoch": 0.77,
"learning_rate": 1.1368872646381187e-05,
"loss": 2.7556,
"step": 233400
},
{
"epoch": 0.77,
"learning_rate": 1.1352321177934907e-05,
"loss": 2.7612,
"step": 233500
},
{
"epoch": 0.77,
"learning_rate": 1.1335769709488626e-05,
"loss": 2.7486,
"step": 233600
},
{
"epoch": 0.77,
"learning_rate": 1.1319218241042345e-05,
"loss": 2.7379,
"step": 233700
},
{
"epoch": 0.77,
"learning_rate": 1.1302666772596066e-05,
"loss": 2.7548,
"step": 233800
},
{
"epoch": 0.77,
"learning_rate": 1.1286115304149785e-05,
"loss": 2.7648,
"step": 233900
},
{
"epoch": 0.77,
"learning_rate": 1.1269563835703504e-05,
"loss": 2.7504,
"step": 234000
},
{
"epoch": 0.77,
"learning_rate": 1.1253012367257223e-05,
"loss": 2.7582,
"step": 234100
},
{
"epoch": 0.78,
"learning_rate": 1.1236460898810942e-05,
"loss": 2.7601,
"step": 234200
},
{
"epoch": 0.78,
"learning_rate": 1.1219909430364663e-05,
"loss": 2.7572,
"step": 234300
},
{
"epoch": 0.78,
"learning_rate": 1.1203357961918382e-05,
"loss": 2.7595,
"step": 234400
},
{
"epoch": 0.78,
"learning_rate": 1.1186806493472102e-05,
"loss": 2.7622,
"step": 234500
},
{
"epoch": 0.78,
"learning_rate": 1.1170255025025821e-05,
"loss": 2.7495,
"step": 234600
},
{
"epoch": 0.78,
"learning_rate": 1.115370355657954e-05,
"loss": 2.7545,
"step": 234700
},
{
"epoch": 0.78,
"learning_rate": 1.113715208813326e-05,
"loss": 2.7541,
"step": 234800
},
{
"epoch": 0.78,
"learning_rate": 1.1120600619686978e-05,
"loss": 2.7475,
"step": 234900
},
{
"epoch": 0.78,
"learning_rate": 1.1104049151240699e-05,
"loss": 2.7687,
"step": 235000
},
{
"epoch": 0.78,
"learning_rate": 1.1087497682794418e-05,
"loss": 2.7556,
"step": 235100
},
{
"epoch": 0.78,
"learning_rate": 1.1070946214348138e-05,
"loss": 2.7571,
"step": 235200
},
{
"epoch": 0.78,
"learning_rate": 1.1054394745901857e-05,
"loss": 2.7557,
"step": 235300
},
{
"epoch": 0.78,
"learning_rate": 1.1037843277455576e-05,
"loss": 2.7581,
"step": 235400
},
{
"epoch": 0.78,
"learning_rate": 1.1021291809009295e-05,
"loss": 2.7619,
"step": 235500
},
{
"epoch": 0.78,
"learning_rate": 1.1004740340563014e-05,
"loss": 2.7568,
"step": 235600
},
{
"epoch": 0.78,
"learning_rate": 1.0988188872116735e-05,
"loss": 2.7632,
"step": 235700
},
{
"epoch": 0.78,
"learning_rate": 1.0971637403670454e-05,
"loss": 2.7567,
"step": 235800
},
{
"epoch": 0.78,
"learning_rate": 1.0955085935224175e-05,
"loss": 2.7542,
"step": 235900
},
{
"epoch": 0.78,
"learning_rate": 1.0938534466777894e-05,
"loss": 2.7441,
"step": 236000
},
{
"epoch": 0.78,
"learning_rate": 1.0921982998331613e-05,
"loss": 2.7627,
"step": 236100
},
{
"epoch": 0.78,
"learning_rate": 1.0905431529885332e-05,
"loss": 2.7559,
"step": 236200
},
{
"epoch": 0.78,
"learning_rate": 1.088888006143905e-05,
"loss": 2.7558,
"step": 236300
},
{
"epoch": 0.78,
"learning_rate": 1.0872328592992771e-05,
"loss": 2.7548,
"step": 236400
},
{
"epoch": 0.78,
"learning_rate": 1.085577712454649e-05,
"loss": 2.7463,
"step": 236500
},
{
"epoch": 0.78,
"learning_rate": 1.0839225656100211e-05,
"loss": 2.7541,
"step": 236600
},
{
"epoch": 0.78,
"learning_rate": 1.082267418765393e-05,
"loss": 2.7527,
"step": 236700
},
{
"epoch": 0.78,
"learning_rate": 1.0806122719207647e-05,
"loss": 2.7564,
"step": 236800
},
{
"epoch": 0.78,
"learning_rate": 1.0789571250761368e-05,
"loss": 2.7602,
"step": 236900
},
{
"epoch": 0.78,
"learning_rate": 1.0773019782315087e-05,
"loss": 2.7584,
"step": 237000
},
{
"epoch": 0.78,
"learning_rate": 1.0756468313868808e-05,
"loss": 2.7595,
"step": 237100
},
{
"epoch": 0.79,
"learning_rate": 1.0739916845422527e-05,
"loss": 2.7478,
"step": 237200
},
{
"epoch": 0.79,
"learning_rate": 1.0723365376976246e-05,
"loss": 2.7501,
"step": 237300
},
{
"epoch": 0.79,
"learning_rate": 1.0706813908529966e-05,
"loss": 2.7441,
"step": 237400
},
{
"epoch": 0.79,
"learning_rate": 1.0690262440083684e-05,
"loss": 2.7687,
"step": 237500
},
{
"epoch": 0.79,
"learning_rate": 1.0673710971637404e-05,
"loss": 2.7644,
"step": 237600
},
{
"epoch": 0.79,
"learning_rate": 1.0657159503191123e-05,
"loss": 2.7472,
"step": 237700
},
{
"epoch": 0.79,
"learning_rate": 1.0640608034744844e-05,
"loss": 2.7465,
"step": 237800
},
{
"epoch": 0.79,
"learning_rate": 1.0624056566298563e-05,
"loss": 2.7558,
"step": 237900
},
{
"epoch": 0.79,
"learning_rate": 1.0607505097852282e-05,
"loss": 2.7526,
"step": 238000
},
{
"epoch": 0.79,
"learning_rate": 1.0590953629406e-05,
"loss": 2.7455,
"step": 238100
},
{
"epoch": 0.79,
"learning_rate": 1.057440216095972e-05,
"loss": 2.7548,
"step": 238200
},
{
"epoch": 0.79,
"learning_rate": 1.055785069251344e-05,
"loss": 2.7483,
"step": 238300
},
{
"epoch": 0.79,
"learning_rate": 1.054129922406716e-05,
"loss": 2.7454,
"step": 238400
},
{
"epoch": 0.79,
"learning_rate": 1.052474775562088e-05,
"loss": 2.7615,
"step": 238500
},
{
"epoch": 0.79,
"learning_rate": 1.0508196287174599e-05,
"loss": 2.765,
"step": 238600
},
{
"epoch": 0.79,
"learning_rate": 1.0491644818728318e-05,
"loss": 2.7463,
"step": 238700
},
{
"epoch": 0.79,
"learning_rate": 1.0475093350282037e-05,
"loss": 2.7658,
"step": 238800
},
{
"epoch": 0.79,
"learning_rate": 1.0458541881835756e-05,
"loss": 2.7769,
"step": 238900
},
{
"epoch": 0.79,
"learning_rate": 1.0441990413389477e-05,
"loss": 2.7475,
"step": 239000
},
{
"epoch": 0.79,
"learning_rate": 1.0425438944943196e-05,
"loss": 2.7513,
"step": 239100
},
{
"epoch": 0.79,
"learning_rate": 1.0408887476496915e-05,
"loss": 2.752,
"step": 239200
},
{
"epoch": 0.79,
"learning_rate": 1.0392336008050635e-05,
"loss": 2.7589,
"step": 239300
},
{
"epoch": 0.79,
"learning_rate": 1.0375784539604354e-05,
"loss": 2.7562,
"step": 239400
},
{
"epoch": 0.79,
"learning_rate": 1.0359233071158073e-05,
"loss": 2.7609,
"step": 239500
},
{
"epoch": 0.79,
"learning_rate": 1.0342681602711792e-05,
"loss": 2.7548,
"step": 239600
},
{
"epoch": 0.79,
"learning_rate": 1.0326130134265513e-05,
"loss": 2.7456,
"step": 239700
},
{
"epoch": 0.79,
"learning_rate": 1.0309578665819232e-05,
"loss": 2.7518,
"step": 239800
},
{
"epoch": 0.79,
"learning_rate": 1.0293027197372951e-05,
"loss": 2.759,
"step": 239900
},
{
"epoch": 0.79,
"learning_rate": 1.0276475728926672e-05,
"loss": 2.7448,
"step": 240000
},
{
"epoch": 0.79,
"learning_rate": 1.025992426048039e-05,
"loss": 2.7573,
"step": 240100
},
{
"epoch": 0.8,
"learning_rate": 1.024337279203411e-05,
"loss": 2.7477,
"step": 240200
},
{
"epoch": 0.8,
"learning_rate": 1.0226821323587829e-05,
"loss": 2.7491,
"step": 240300
},
{
"epoch": 0.8,
"learning_rate": 1.021026985514155e-05,
"loss": 2.7671,
"step": 240400
},
{
"epoch": 0.8,
"learning_rate": 1.0193718386695268e-05,
"loss": 2.7504,
"step": 240500
},
{
"epoch": 0.8,
"learning_rate": 1.0177166918248987e-05,
"loss": 2.7602,
"step": 240600
},
{
"epoch": 0.8,
"learning_rate": 1.0160615449802708e-05,
"loss": 2.7566,
"step": 240700
},
{
"epoch": 0.8,
"learning_rate": 1.0144063981356427e-05,
"loss": 2.7492,
"step": 240800
},
{
"epoch": 0.8,
"learning_rate": 1.0127512512910146e-05,
"loss": 2.7732,
"step": 240900
},
{
"epoch": 0.8,
"learning_rate": 1.0110961044463865e-05,
"loss": 2.7491,
"step": 241000
},
{
"epoch": 0.8,
"learning_rate": 1.0094409576017584e-05,
"loss": 2.7615,
"step": 241100
},
{
"epoch": 0.8,
"learning_rate": 1.0077858107571305e-05,
"loss": 2.7608,
"step": 241200
},
{
"epoch": 0.8,
"learning_rate": 1.0061306639125024e-05,
"loss": 2.752,
"step": 241300
},
{
"epoch": 0.8,
"learning_rate": 1.0044755170678744e-05,
"loss": 2.7352,
"step": 241400
},
{
"epoch": 0.8,
"learning_rate": 1.0028203702232463e-05,
"loss": 2.7488,
"step": 241500
},
{
"epoch": 0.8,
"learning_rate": 1.0011652233786182e-05,
"loss": 2.7588,
"step": 241600
},
{
"epoch": 0.8,
"learning_rate": 9.995100765339901e-06,
"loss": 2.7578,
"step": 241700
},
{
"epoch": 0.8,
"learning_rate": 9.97854929689362e-06,
"loss": 2.7627,
"step": 241800
},
{
"epoch": 0.8,
"learning_rate": 9.96199782844734e-06,
"loss": 2.7565,
"step": 241900
},
{
"epoch": 0.8,
"learning_rate": 9.94544636000106e-06,
"loss": 2.7434,
"step": 242000
},
{
"epoch": 0.8,
"learning_rate": 9.92889489155478e-06,
"loss": 2.7456,
"step": 242100
},
{
"epoch": 0.8,
"learning_rate": 9.912343423108498e-06,
"loss": 2.7598,
"step": 242200
},
{
"epoch": 0.8,
"learning_rate": 9.895791954662217e-06,
"loss": 2.7496,
"step": 242300
},
{
"epoch": 0.8,
"learning_rate": 9.879240486215937e-06,
"loss": 2.7579,
"step": 242400
},
{
"epoch": 0.8,
"learning_rate": 9.862689017769656e-06,
"loss": 2.7486,
"step": 242500
},
{
"epoch": 0.8,
"learning_rate": 9.846137549323377e-06,
"loss": 2.7576,
"step": 242600
},
{
"epoch": 0.8,
"learning_rate": 9.829586080877096e-06,
"loss": 2.7585,
"step": 242700
},
{
"epoch": 0.8,
"learning_rate": 9.813034612430817e-06,
"loss": 2.7527,
"step": 242800
},
{
"epoch": 0.8,
"learning_rate": 9.796483143984534e-06,
"loss": 2.7563,
"step": 242900
},
{
"epoch": 0.8,
"learning_rate": 9.779931675538253e-06,
"loss": 2.7464,
"step": 243000
},
{
"epoch": 0.8,
"learning_rate": 9.763380207091974e-06,
"loss": 2.7391,
"step": 243100
},
{
"epoch": 0.81,
"learning_rate": 9.746828738645693e-06,
"loss": 2.7617,
"step": 243200
},
{
"epoch": 0.81,
"learning_rate": 9.730277270199413e-06,
"loss": 2.7574,
"step": 243300
},
{
"epoch": 0.81,
"learning_rate": 9.713725801753132e-06,
"loss": 2.758,
"step": 243400
},
{
"epoch": 0.81,
"learning_rate": 9.697174333306851e-06,
"loss": 2.758,
"step": 243500
},
{
"epoch": 0.81,
"learning_rate": 9.68062286486057e-06,
"loss": 2.7697,
"step": 243600
},
{
"epoch": 0.81,
"learning_rate": 9.66407139641429e-06,
"loss": 2.7418,
"step": 243700
},
{
"epoch": 0.81,
"learning_rate": 9.64751992796801e-06,
"loss": 2.7591,
"step": 243800
},
{
"epoch": 0.81,
"learning_rate": 9.630968459521729e-06,
"loss": 2.7602,
"step": 243900
},
{
"epoch": 0.81,
"learning_rate": 9.61441699107545e-06,
"loss": 2.7543,
"step": 244000
},
{
"epoch": 0.81,
"learning_rate": 9.597865522629169e-06,
"loss": 2.7484,
"step": 244100
},
{
"epoch": 0.81,
"learning_rate": 9.581314054182888e-06,
"loss": 2.7543,
"step": 244200
},
{
"epoch": 0.81,
"learning_rate": 9.564762585736607e-06,
"loss": 2.7647,
"step": 244300
},
{
"epoch": 0.81,
"learning_rate": 9.548211117290326e-06,
"loss": 2.7472,
"step": 244400
},
{
"epoch": 0.81,
"learning_rate": 9.531659648844046e-06,
"loss": 2.7552,
"step": 244500
},
{
"epoch": 0.81,
"learning_rate": 9.515108180397765e-06,
"loss": 2.7469,
"step": 244600
},
{
"epoch": 0.81,
"learning_rate": 9.498556711951486e-06,
"loss": 2.7635,
"step": 244700
},
{
"epoch": 0.81,
"learning_rate": 9.482005243505205e-06,
"loss": 2.764,
"step": 244800
},
{
"epoch": 0.81,
"learning_rate": 9.465453775058924e-06,
"loss": 2.7551,
"step": 244900
},
{
"epoch": 0.81,
"learning_rate": 9.448902306612643e-06,
"loss": 2.7641,
"step": 245000
},
{
"epoch": 0.81,
"learning_rate": 9.432350838166362e-06,
"loss": 2.7573,
"step": 245100
},
{
"epoch": 0.81,
"learning_rate": 9.415799369720082e-06,
"loss": 2.7588,
"step": 245200
},
{
"epoch": 0.81,
"learning_rate": 9.399247901273801e-06,
"loss": 2.7564,
"step": 245300
},
{
"epoch": 0.81,
"learning_rate": 9.382696432827522e-06,
"loss": 2.7435,
"step": 245400
},
{
"epoch": 0.81,
"learning_rate": 9.366144964381241e-06,
"loss": 2.7468,
"step": 245500
},
{
"epoch": 0.81,
"learning_rate": 9.34959349593496e-06,
"loss": 2.7513,
"step": 245600
},
{
"epoch": 0.81,
"learning_rate": 9.333042027488679e-06,
"loss": 2.7653,
"step": 245700
},
{
"epoch": 0.81,
"learning_rate": 9.316490559042398e-06,
"loss": 2.774,
"step": 245800
},
{
"epoch": 0.81,
"learning_rate": 9.299939090596119e-06,
"loss": 2.7381,
"step": 245900
},
{
"epoch": 0.81,
"learning_rate": 9.283387622149838e-06,
"loss": 2.7602,
"step": 246000
},
{
"epoch": 0.81,
"learning_rate": 9.266836153703557e-06,
"loss": 2.7598,
"step": 246100
},
{
"epoch": 0.81,
"learning_rate": 9.250284685257277e-06,
"loss": 2.7557,
"step": 246200
},
{
"epoch": 0.82,
"learning_rate": 9.233733216810996e-06,
"loss": 2.7556,
"step": 246300
},
{
"epoch": 0.82,
"learning_rate": 9.217181748364715e-06,
"loss": 2.7567,
"step": 246400
},
{
"epoch": 0.82,
"learning_rate": 9.200630279918434e-06,
"loss": 2.7707,
"step": 246500
},
{
"epoch": 0.82,
"learning_rate": 9.184078811472155e-06,
"loss": 2.753,
"step": 246600
},
{
"epoch": 0.82,
"learning_rate": 9.167527343025874e-06,
"loss": 2.7478,
"step": 246700
},
{
"epoch": 0.82,
"learning_rate": 9.150975874579593e-06,
"loss": 2.7509,
"step": 246800
},
{
"epoch": 0.82,
"learning_rate": 9.134424406133314e-06,
"loss": 2.7545,
"step": 246900
},
{
"epoch": 0.82,
"learning_rate": 9.117872937687031e-06,
"loss": 2.7644,
"step": 247000
},
{
"epoch": 0.82,
"learning_rate": 9.101321469240752e-06,
"loss": 2.757,
"step": 247100
},
{
"epoch": 0.82,
"learning_rate": 9.08477000079447e-06,
"loss": 2.76,
"step": 247200
},
{
"epoch": 0.82,
"learning_rate": 9.06821853234819e-06,
"loss": 2.7464,
"step": 247300
},
{
"epoch": 0.82,
"learning_rate": 9.05166706390191e-06,
"loss": 2.7534,
"step": 247400
},
{
"epoch": 0.82,
"learning_rate": 9.03511559545563e-06,
"loss": 2.7672,
"step": 247500
},
{
"epoch": 0.82,
"learning_rate": 9.018564127009348e-06,
"loss": 2.7423,
"step": 247600
},
{
"epoch": 0.82,
"learning_rate": 9.002012658563067e-06,
"loss": 2.753,
"step": 247700
},
{
"epoch": 0.82,
"learning_rate": 8.985461190116788e-06,
"loss": 2.7415,
"step": 247800
},
{
"epoch": 0.82,
"learning_rate": 8.968909721670507e-06,
"loss": 2.749,
"step": 247900
},
{
"epoch": 0.82,
"learning_rate": 8.952358253224226e-06,
"loss": 2.7373,
"step": 248000
},
{
"epoch": 0.82,
"learning_rate": 8.935806784777947e-06,
"loss": 2.75,
"step": 248100
},
{
"epoch": 0.82,
"learning_rate": 8.919255316331665e-06,
"loss": 2.7545,
"step": 248200
},
{
"epoch": 0.82,
"learning_rate": 8.902703847885384e-06,
"loss": 2.7428,
"step": 248300
},
{
"epoch": 0.82,
"learning_rate": 8.886152379439103e-06,
"loss": 2.7619,
"step": 248400
},
{
"epoch": 0.82,
"learning_rate": 8.869600910992824e-06,
"loss": 2.7557,
"step": 248500
},
{
"epoch": 0.82,
"learning_rate": 8.853049442546543e-06,
"loss": 2.7511,
"step": 248600
},
{
"epoch": 0.82,
"learning_rate": 8.836497974100262e-06,
"loss": 2.7657,
"step": 248700
},
{
"epoch": 0.82,
"learning_rate": 8.819946505653983e-06,
"loss": 2.7629,
"step": 248800
},
{
"epoch": 0.82,
"learning_rate": 8.803395037207702e-06,
"loss": 2.7567,
"step": 248900
},
{
"epoch": 0.82,
"learning_rate": 8.78684356876142e-06,
"loss": 2.7626,
"step": 249000
},
{
"epoch": 0.82,
"learning_rate": 8.77029210031514e-06,
"loss": 2.7587,
"step": 249100
},
{
"epoch": 0.82,
"learning_rate": 8.753740631868859e-06,
"loss": 2.7437,
"step": 249200
},
{
"epoch": 0.83,
"learning_rate": 8.73718916342258e-06,
"loss": 2.7416,
"step": 249300
},
{
"epoch": 0.83,
"learning_rate": 8.720637694976298e-06,
"loss": 2.7517,
"step": 249400
},
{
"epoch": 0.83,
"learning_rate": 8.704086226530019e-06,
"loss": 2.7495,
"step": 249500
},
{
"epoch": 0.83,
"learning_rate": 8.687534758083738e-06,
"loss": 2.7541,
"step": 249600
},
{
"epoch": 0.83,
"learning_rate": 8.670983289637457e-06,
"loss": 2.7381,
"step": 249700
},
{
"epoch": 0.83,
"learning_rate": 8.654431821191176e-06,
"loss": 2.7364,
"step": 249800
},
{
"epoch": 0.83,
"learning_rate": 8.637880352744895e-06,
"loss": 2.7609,
"step": 249900
},
{
"epoch": 0.83,
"learning_rate": 8.621328884298616e-06,
"loss": 2.7481,
"step": 250000
},
{
"epoch": 0.83,
"learning_rate": 8.604777415852335e-06,
"loss": 2.7681,
"step": 250100
},
{
"epoch": 0.83,
"learning_rate": 8.588225947406055e-06,
"loss": 2.7547,
"step": 250200
},
{
"epoch": 0.83,
"learning_rate": 8.571674478959774e-06,
"loss": 2.7478,
"step": 250300
},
{
"epoch": 0.83,
"learning_rate": 8.555123010513493e-06,
"loss": 2.7537,
"step": 250400
},
{
"epoch": 0.83,
"learning_rate": 8.538571542067212e-06,
"loss": 2.7402,
"step": 250500
},
{
"epoch": 0.83,
"learning_rate": 8.522020073620931e-06,
"loss": 2.749,
"step": 250600
},
{
"epoch": 0.83,
"learning_rate": 8.505468605174652e-06,
"loss": 2.7618,
"step": 250700
},
{
"epoch": 0.83,
"learning_rate": 8.488917136728371e-06,
"loss": 2.7421,
"step": 250800
},
{
"epoch": 0.83,
"learning_rate": 8.472365668282092e-06,
"loss": 2.7597,
"step": 250900
},
{
"epoch": 0.83,
"learning_rate": 8.45581419983581e-06,
"loss": 2.7598,
"step": 251000
},
{
"epoch": 0.83,
"learning_rate": 8.439262731389528e-06,
"loss": 2.7549,
"step": 251100
},
{
"epoch": 0.83,
"learning_rate": 8.422711262943249e-06,
"loss": 2.7564,
"step": 251200
},
{
"epoch": 0.83,
"learning_rate": 8.406159794496967e-06,
"loss": 2.7566,
"step": 251300
},
{
"epoch": 0.83,
"learning_rate": 8.389608326050688e-06,
"loss": 2.7518,
"step": 251400
},
{
"epoch": 0.83,
"learning_rate": 8.373056857604407e-06,
"loss": 2.7604,
"step": 251500
},
{
"epoch": 0.83,
"learning_rate": 8.356505389158128e-06,
"loss": 2.7532,
"step": 251600
},
{
"epoch": 0.83,
"learning_rate": 8.339953920711847e-06,
"loss": 2.7611,
"step": 251700
},
{
"epoch": 0.83,
"learning_rate": 8.323402452265564e-06,
"loss": 2.7478,
"step": 251800
},
{
"epoch": 0.83,
"learning_rate": 8.306850983819285e-06,
"loss": 2.7529,
"step": 251900
},
{
"epoch": 0.83,
"learning_rate": 8.290299515373004e-06,
"loss": 2.7382,
"step": 252000
},
{
"epoch": 0.83,
"learning_rate": 8.273748046926724e-06,
"loss": 2.7683,
"step": 252100
},
{
"epoch": 0.83,
"learning_rate": 8.257196578480443e-06,
"loss": 2.7738,
"step": 252200
},
{
"epoch": 0.84,
"learning_rate": 8.240645110034162e-06,
"loss": 2.7573,
"step": 252300
},
{
"epoch": 0.84,
"learning_rate": 8.224093641587881e-06,
"loss": 2.7543,
"step": 252400
},
{
"epoch": 0.84,
"learning_rate": 8.2075421731416e-06,
"loss": 2.7482,
"step": 252500
},
{
"epoch": 0.84,
"learning_rate": 8.190990704695321e-06,
"loss": 2.7458,
"step": 252600
},
{
"epoch": 0.84,
"learning_rate": 8.17443923624904e-06,
"loss": 2.764,
"step": 252700
},
{
"epoch": 0.84,
"learning_rate": 8.15788776780276e-06,
"loss": 2.7482,
"step": 252800
},
{
"epoch": 0.84,
"learning_rate": 8.14133629935648e-06,
"loss": 2.7414,
"step": 252900
},
{
"epoch": 0.84,
"learning_rate": 8.124784830910199e-06,
"loss": 2.7455,
"step": 253000
},
{
"epoch": 0.84,
"learning_rate": 8.108233362463918e-06,
"loss": 2.7433,
"step": 253100
},
{
"epoch": 0.84,
"learning_rate": 8.091681894017637e-06,
"loss": 2.7544,
"step": 253200
},
{
"epoch": 0.84,
"learning_rate": 8.075130425571357e-06,
"loss": 2.752,
"step": 253300
},
{
"epoch": 0.84,
"learning_rate": 8.058578957125076e-06,
"loss": 2.7592,
"step": 253400
},
{
"epoch": 0.84,
"learning_rate": 8.042027488678797e-06,
"loss": 2.7584,
"step": 253500
},
{
"epoch": 0.84,
"learning_rate": 8.025476020232516e-06,
"loss": 2.7561,
"step": 253600
},
{
"epoch": 0.84,
"learning_rate": 8.008924551786235e-06,
"loss": 2.7424,
"step": 253700
},
{
"epoch": 0.84,
"learning_rate": 7.992373083339954e-06,
"loss": 2.7556,
"step": 253800
},
{
"epoch": 0.84,
"learning_rate": 7.975821614893673e-06,
"loss": 2.7459,
"step": 253900
},
{
"epoch": 0.84,
"learning_rate": 7.959270146447394e-06,
"loss": 2.7412,
"step": 254000
},
{
"epoch": 0.84,
"learning_rate": 7.942718678001113e-06,
"loss": 2.7453,
"step": 254100
},
{
"epoch": 0.84,
"learning_rate": 7.926167209554832e-06,
"loss": 2.767,
"step": 254200
},
{
"epoch": 0.84,
"learning_rate": 7.909615741108552e-06,
"loss": 2.7336,
"step": 254300
},
{
"epoch": 0.84,
"learning_rate": 7.893064272662271e-06,
"loss": 2.7463,
"step": 254400
},
{
"epoch": 0.84,
"learning_rate": 7.87651280421599e-06,
"loss": 2.7542,
"step": 254500
},
{
"epoch": 0.84,
"learning_rate": 7.85996133576971e-06,
"loss": 2.7599,
"step": 254600
},
{
"epoch": 0.84,
"learning_rate": 7.84340986732343e-06,
"loss": 2.7506,
"step": 254700
},
{
"epoch": 0.84,
"learning_rate": 7.826858398877149e-06,
"loss": 2.7518,
"step": 254800
},
{
"epoch": 0.84,
"learning_rate": 7.810306930430868e-06,
"loss": 2.7397,
"step": 254900
},
{
"epoch": 0.84,
"learning_rate": 7.793755461984588e-06,
"loss": 2.7508,
"step": 255000
},
{
"epoch": 0.84,
"learning_rate": 7.777203993538307e-06,
"loss": 2.7566,
"step": 255100
},
{
"epoch": 0.84,
"learning_rate": 7.760652525092026e-06,
"loss": 2.7464,
"step": 255200
},
{
"epoch": 0.85,
"learning_rate": 7.744101056645745e-06,
"loss": 2.7414,
"step": 255300
},
{
"epoch": 0.85,
"learning_rate": 7.727549588199466e-06,
"loss": 2.7552,
"step": 255400
},
{
"epoch": 0.85,
"learning_rate": 7.710998119753185e-06,
"loss": 2.743,
"step": 255500
},
{
"epoch": 0.85,
"learning_rate": 7.694446651306904e-06,
"loss": 2.7563,
"step": 255600
},
{
"epoch": 0.85,
"learning_rate": 7.677895182860625e-06,
"loss": 2.7409,
"step": 255700
},
{
"epoch": 0.85,
"learning_rate": 7.661343714414344e-06,
"loss": 2.7585,
"step": 255800
},
{
"epoch": 0.85,
"learning_rate": 7.644792245968063e-06,
"loss": 2.7638,
"step": 255900
},
{
"epoch": 0.85,
"learning_rate": 7.628240777521782e-06,
"loss": 2.7583,
"step": 256000
},
{
"epoch": 0.85,
"learning_rate": 7.611689309075501e-06,
"loss": 2.739,
"step": 256100
},
{
"epoch": 0.85,
"learning_rate": 7.595137840629221e-06,
"loss": 2.7411,
"step": 256200
},
{
"epoch": 0.85,
"learning_rate": 7.57858637218294e-06,
"loss": 2.7533,
"step": 256300
},
{
"epoch": 0.85,
"learning_rate": 7.56203490373666e-06,
"loss": 2.7521,
"step": 256400
},
{
"epoch": 0.85,
"learning_rate": 7.545483435290379e-06,
"loss": 2.7548,
"step": 256500
},
{
"epoch": 0.85,
"learning_rate": 7.528931966844099e-06,
"loss": 2.7567,
"step": 256600
},
{
"epoch": 0.85,
"learning_rate": 7.512380498397818e-06,
"loss": 2.7472,
"step": 256700
},
{
"epoch": 0.85,
"learning_rate": 7.495829029951537e-06,
"loss": 2.734,
"step": 256800
},
{
"epoch": 0.85,
"learning_rate": 7.479277561505258e-06,
"loss": 2.7375,
"step": 256900
},
{
"epoch": 0.85,
"learning_rate": 7.462726093058977e-06,
"loss": 2.7464,
"step": 257000
},
{
"epoch": 0.85,
"learning_rate": 7.4461746246126964e-06,
"loss": 2.7558,
"step": 257100
},
{
"epoch": 0.85,
"learning_rate": 7.429623156166415e-06,
"loss": 2.7455,
"step": 257200
},
{
"epoch": 0.85,
"learning_rate": 7.413071687720134e-06,
"loss": 2.7457,
"step": 257300
},
{
"epoch": 0.85,
"learning_rate": 7.396520219273854e-06,
"loss": 2.752,
"step": 257400
},
{
"epoch": 0.85,
"learning_rate": 7.379968750827573e-06,
"loss": 2.7553,
"step": 257500
},
{
"epoch": 0.85,
"learning_rate": 7.363417282381294e-06,
"loss": 2.77,
"step": 257600
},
{
"epoch": 0.85,
"learning_rate": 7.346865813935012e-06,
"loss": 2.7628,
"step": 257700
},
{
"epoch": 0.85,
"learning_rate": 7.330314345488733e-06,
"loss": 2.7503,
"step": 257800
},
{
"epoch": 0.85,
"learning_rate": 7.313762877042452e-06,
"loss": 2.7369,
"step": 257900
},
{
"epoch": 0.85,
"learning_rate": 7.297211408596171e-06,
"loss": 2.7516,
"step": 258000
},
{
"epoch": 0.85,
"learning_rate": 7.2806599401498905e-06,
"loss": 2.7439,
"step": 258100
},
{
"epoch": 0.85,
"learning_rate": 7.2641084717036095e-06,
"loss": 2.7648,
"step": 258200
},
{
"epoch": 0.86,
"learning_rate": 7.24755700325733e-06,
"loss": 2.7483,
"step": 258300
},
{
"epoch": 0.86,
"learning_rate": 7.231005534811048e-06,
"loss": 2.7534,
"step": 258400
},
{
"epoch": 0.86,
"learning_rate": 7.214454066364769e-06,
"loss": 2.7499,
"step": 258500
},
{
"epoch": 0.86,
"learning_rate": 7.197902597918488e-06,
"loss": 2.7586,
"step": 258600
},
{
"epoch": 0.86,
"learning_rate": 7.181351129472207e-06,
"loss": 2.7542,
"step": 258700
},
{
"epoch": 0.86,
"learning_rate": 7.164799661025927e-06,
"loss": 2.7603,
"step": 258800
},
{
"epoch": 0.86,
"learning_rate": 7.148248192579646e-06,
"loss": 2.7507,
"step": 258900
},
{
"epoch": 0.86,
"learning_rate": 7.1316967241333656e-06,
"loss": 2.7567,
"step": 259000
},
{
"epoch": 0.86,
"learning_rate": 7.1151452556870846e-06,
"loss": 2.7538,
"step": 259100
},
{
"epoch": 0.86,
"learning_rate": 7.0985937872408035e-06,
"loss": 2.7438,
"step": 259200
},
{
"epoch": 0.86,
"learning_rate": 7.082042318794524e-06,
"loss": 2.7572,
"step": 259300
},
{
"epoch": 0.86,
"learning_rate": 7.065490850348243e-06,
"loss": 2.7405,
"step": 259400
},
{
"epoch": 0.86,
"learning_rate": 7.048939381901963e-06,
"loss": 2.7389,
"step": 259500
},
{
"epoch": 0.86,
"learning_rate": 7.032387913455682e-06,
"loss": 2.7333,
"step": 259600
},
{
"epoch": 0.86,
"learning_rate": 7.015836445009402e-06,
"loss": 2.7451,
"step": 259700
},
{
"epoch": 0.86,
"learning_rate": 6.999284976563121e-06,
"loss": 2.7516,
"step": 259800
},
{
"epoch": 0.86,
"learning_rate": 6.98273350811684e-06,
"loss": 2.7503,
"step": 259900
},
{
"epoch": 0.86,
"learning_rate": 6.9661820396705605e-06,
"loss": 2.746,
"step": 260000
},
{
"epoch": 0.86,
"learning_rate": 6.949630571224279e-06,
"loss": 2.7416,
"step": 260100
},
{
"epoch": 0.86,
"learning_rate": 6.933079102777999e-06,
"loss": 2.7475,
"step": 260200
},
{
"epoch": 0.86,
"learning_rate": 6.916527634331718e-06,
"loss": 2.7491,
"step": 260300
},
{
"epoch": 0.86,
"learning_rate": 6.899976165885437e-06,
"loss": 2.7493,
"step": 260400
},
{
"epoch": 0.86,
"learning_rate": 6.883424697439157e-06,
"loss": 2.7503,
"step": 260500
},
{
"epoch": 0.86,
"learning_rate": 6.866873228992876e-06,
"loss": 2.7372,
"step": 260600
},
{
"epoch": 0.86,
"learning_rate": 6.850321760546597e-06,
"loss": 2.7666,
"step": 260700
},
{
"epoch": 0.86,
"learning_rate": 6.833770292100315e-06,
"loss": 2.7661,
"step": 260800
},
{
"epoch": 0.86,
"learning_rate": 6.8172188236540356e-06,
"loss": 2.7542,
"step": 260900
},
{
"epoch": 0.86,
"learning_rate": 6.8006673552077545e-06,
"loss": 2.7459,
"step": 261000
},
{
"epoch": 0.86,
"learning_rate": 6.7841158867614735e-06,
"loss": 2.7432,
"step": 261100
},
{
"epoch": 0.86,
"learning_rate": 6.767564418315193e-06,
"loss": 2.7501,
"step": 261200
},
{
"epoch": 0.86,
"learning_rate": 6.751012949868912e-06,
"loss": 2.7415,
"step": 261300
},
{
"epoch": 0.87,
"learning_rate": 6.734461481422632e-06,
"loss": 2.754,
"step": 261400
},
{
"epoch": 0.87,
"learning_rate": 6.717910012976351e-06,
"loss": 2.7459,
"step": 261500
},
{
"epoch": 0.87,
"learning_rate": 6.701358544530072e-06,
"loss": 2.7676,
"step": 261600
},
{
"epoch": 0.87,
"learning_rate": 6.684807076083791e-06,
"loss": 2.7591,
"step": 261700
},
{
"epoch": 0.87,
"learning_rate": 6.66825560763751e-06,
"loss": 2.7401,
"step": 261800
},
{
"epoch": 0.87,
"learning_rate": 6.65170413919123e-06,
"loss": 2.7533,
"step": 261900
},
{
"epoch": 0.87,
"learning_rate": 6.635152670744949e-06,
"loss": 2.7438,
"step": 262000
},
{
"epoch": 0.87,
"learning_rate": 6.6186012022986684e-06,
"loss": 2.746,
"step": 262100
},
{
"epoch": 0.87,
"learning_rate": 6.602049733852387e-06,
"loss": 2.7529,
"step": 262200
},
{
"epoch": 0.87,
"learning_rate": 6.585498265406106e-06,
"loss": 2.7414,
"step": 262300
},
{
"epoch": 0.87,
"learning_rate": 6.568946796959827e-06,
"loss": 2.7319,
"step": 262400
},
{
"epoch": 0.87,
"learning_rate": 6.552395328513545e-06,
"loss": 2.7405,
"step": 262500
},
{
"epoch": 0.87,
"learning_rate": 6.535843860067266e-06,
"loss": 2.7606,
"step": 262600
},
{
"epoch": 0.87,
"learning_rate": 6.519292391620985e-06,
"loss": 2.7472,
"step": 262700
},
{
"epoch": 0.87,
"learning_rate": 6.502740923174705e-06,
"loss": 2.7333,
"step": 262800
},
{
"epoch": 0.87,
"learning_rate": 6.486189454728424e-06,
"loss": 2.7405,
"step": 262900
},
{
"epoch": 0.87,
"learning_rate": 6.469637986282143e-06,
"loss": 2.7524,
"step": 263000
},
{
"epoch": 0.87,
"learning_rate": 6.453086517835863e-06,
"loss": 2.741,
"step": 263100
},
{
"epoch": 0.87,
"learning_rate": 6.4365350493895815e-06,
"loss": 2.7545,
"step": 263200
},
{
"epoch": 0.87,
"learning_rate": 6.419983580943302e-06,
"loss": 2.7353,
"step": 263300
},
{
"epoch": 0.87,
"learning_rate": 6.403432112497021e-06,
"loss": 2.7545,
"step": 263400
},
{
"epoch": 0.87,
"learning_rate": 6.386880644050741e-06,
"loss": 2.7504,
"step": 263500
},
{
"epoch": 0.87,
"learning_rate": 6.37032917560446e-06,
"loss": 2.7419,
"step": 263600
},
{
"epoch": 0.87,
"learning_rate": 6.353777707158179e-06,
"loss": 2.7564,
"step": 263700
},
{
"epoch": 0.87,
"learning_rate": 6.337226238711899e-06,
"loss": 2.7474,
"step": 263800
},
{
"epoch": 0.87,
"learning_rate": 6.320674770265618e-06,
"loss": 2.758,
"step": 263900
},
{
"epoch": 0.87,
"learning_rate": 6.304123301819338e-06,
"loss": 2.7502,
"step": 264000
},
{
"epoch": 0.87,
"learning_rate": 6.287571833373057e-06,
"loss": 2.733,
"step": 264100
},
{
"epoch": 0.87,
"learning_rate": 6.271020364926776e-06,
"loss": 2.7444,
"step": 264200
},
{
"epoch": 0.87,
"learning_rate": 6.254468896480496e-06,
"loss": 2.7605,
"step": 264300
},
{
"epoch": 0.88,
"learning_rate": 6.237917428034216e-06,
"loss": 2.7537,
"step": 264400
},
{
"epoch": 0.88,
"learning_rate": 6.221365959587935e-06,
"loss": 2.7518,
"step": 264500
},
{
"epoch": 0.88,
"learning_rate": 6.204814491141654e-06,
"loss": 2.7624,
"step": 264600
},
{
"epoch": 0.88,
"learning_rate": 6.188263022695374e-06,
"loss": 2.7612,
"step": 264700
},
{
"epoch": 0.88,
"learning_rate": 6.171711554249094e-06,
"loss": 2.7534,
"step": 264800
},
{
"epoch": 0.88,
"learning_rate": 6.155160085802813e-06,
"loss": 2.7624,
"step": 264900
},
{
"epoch": 0.88,
"learning_rate": 6.1386086173565325e-06,
"loss": 2.7515,
"step": 265000
},
{
"epoch": 0.88,
"learning_rate": 6.1220571489102515e-06,
"loss": 2.7534,
"step": 265100
},
{
"epoch": 0.88,
"learning_rate": 6.1055056804639704e-06,
"loss": 2.7669,
"step": 265200
},
{
"epoch": 0.88,
"learning_rate": 6.08895421201769e-06,
"loss": 2.7546,
"step": 265300
},
{
"epoch": 0.88,
"learning_rate": 6.07240274357141e-06,
"loss": 2.7538,
"step": 265400
},
{
"epoch": 0.88,
"learning_rate": 6.05585127512513e-06,
"loss": 2.7525,
"step": 265500
},
{
"epoch": 0.88,
"learning_rate": 6.039299806678849e-06,
"loss": 2.7458,
"step": 265600
},
{
"epoch": 0.88,
"learning_rate": 6.022748338232569e-06,
"loss": 2.7465,
"step": 265700
},
{
"epoch": 0.88,
"learning_rate": 6.006196869786288e-06,
"loss": 2.7563,
"step": 265800
},
{
"epoch": 0.88,
"learning_rate": 5.989645401340007e-06,
"loss": 2.744,
"step": 265900
},
{
"epoch": 0.88,
"learning_rate": 5.9730939328937265e-06,
"loss": 2.7377,
"step": 266000
},
{
"epoch": 0.88,
"learning_rate": 5.956542464447446e-06,
"loss": 2.7466,
"step": 266100
},
{
"epoch": 0.88,
"learning_rate": 5.939990996001165e-06,
"loss": 2.7559,
"step": 266200
},
{
"epoch": 0.88,
"learning_rate": 5.923439527554885e-06,
"loss": 2.7584,
"step": 266300
},
{
"epoch": 0.88,
"learning_rate": 5.906888059108604e-06,
"loss": 2.7524,
"step": 266400
},
{
"epoch": 0.88,
"learning_rate": 5.890336590662324e-06,
"loss": 2.7572,
"step": 266500
},
{
"epoch": 0.88,
"learning_rate": 5.873785122216043e-06,
"loss": 2.7473,
"step": 266600
},
{
"epoch": 0.88,
"learning_rate": 5.857233653769763e-06,
"loss": 2.7638,
"step": 266700
},
{
"epoch": 0.88,
"learning_rate": 5.840682185323483e-06,
"loss": 2.7577,
"step": 266800
},
{
"epoch": 0.88,
"learning_rate": 5.824130716877202e-06,
"loss": 2.7519,
"step": 266900
},
{
"epoch": 0.88,
"learning_rate": 5.807579248430921e-06,
"loss": 2.7518,
"step": 267000
},
{
"epoch": 0.88,
"learning_rate": 5.79102777998464e-06,
"loss": 2.7483,
"step": 267100
},
{
"epoch": 0.88,
"learning_rate": 5.77447631153836e-06,
"loss": 2.7452,
"step": 267200
},
{
"epoch": 0.88,
"learning_rate": 5.757924843092079e-06,
"loss": 2.7326,
"step": 267300
},
{
"epoch": 0.89,
"learning_rate": 5.741373374645799e-06,
"loss": 2.7618,
"step": 267400
},
{
"epoch": 0.89,
"learning_rate": 5.724821906199519e-06,
"loss": 2.7353,
"step": 267500
},
{
"epoch": 0.89,
"learning_rate": 5.708270437753237e-06,
"loss": 2.7488,
"step": 267600
},
{
"epoch": 0.89,
"learning_rate": 5.691718969306957e-06,
"loss": 2.7483,
"step": 267700
},
{
"epoch": 0.89,
"learning_rate": 5.675167500860677e-06,
"loss": 2.7467,
"step": 267800
},
{
"epoch": 0.89,
"learning_rate": 5.658616032414396e-06,
"loss": 2.7538,
"step": 267900
},
{
"epoch": 0.89,
"learning_rate": 5.6420645639681155e-06,
"loss": 2.7592,
"step": 268000
},
{
"epoch": 0.89,
"learning_rate": 5.625513095521835e-06,
"loss": 2.7456,
"step": 268100
},
{
"epoch": 0.89,
"learning_rate": 5.608961627075555e-06,
"loss": 2.7734,
"step": 268200
},
{
"epoch": 0.89,
"learning_rate": 5.592410158629273e-06,
"loss": 2.7488,
"step": 268300
},
{
"epoch": 0.89,
"learning_rate": 5.575858690182993e-06,
"loss": 2.7549,
"step": 268400
},
{
"epoch": 0.89,
"learning_rate": 5.559307221736713e-06,
"loss": 2.748,
"step": 268500
},
{
"epoch": 0.89,
"learning_rate": 5.542755753290432e-06,
"loss": 2.7621,
"step": 268600
},
{
"epoch": 0.89,
"learning_rate": 5.526204284844152e-06,
"loss": 2.7469,
"step": 268700
},
{
"epoch": 0.89,
"learning_rate": 5.509652816397872e-06,
"loss": 2.7381,
"step": 268800
},
{
"epoch": 0.89,
"learning_rate": 5.493101347951591e-06,
"loss": 2.7526,
"step": 268900
},
{
"epoch": 0.89,
"learning_rate": 5.4765498795053096e-06,
"loss": 2.759,
"step": 269000
},
{
"epoch": 0.89,
"learning_rate": 5.459998411059029e-06,
"loss": 2.7419,
"step": 269100
},
{
"epoch": 0.89,
"learning_rate": 5.443446942612749e-06,
"loss": 2.7591,
"step": 269200
},
{
"epoch": 0.89,
"learning_rate": 5.426895474166468e-06,
"loss": 2.7486,
"step": 269300
},
{
"epoch": 0.89,
"learning_rate": 5.410344005720188e-06,
"loss": 2.7511,
"step": 269400
},
{
"epoch": 0.89,
"learning_rate": 5.393792537273907e-06,
"loss": 2.7438,
"step": 269500
},
{
"epoch": 0.89,
"learning_rate": 5.377241068827627e-06,
"loss": 2.7584,
"step": 269600
},
{
"epoch": 0.89,
"learning_rate": 5.360689600381346e-06,
"loss": 2.7333,
"step": 269700
},
{
"epoch": 0.89,
"learning_rate": 5.344138131935066e-06,
"loss": 2.7365,
"step": 269800
},
{
"epoch": 0.89,
"learning_rate": 5.3275866634887855e-06,
"loss": 2.7407,
"step": 269900
},
{
"epoch": 0.89,
"learning_rate": 5.3110351950425045e-06,
"loss": 2.7517,
"step": 270000
},
{
"epoch": 0.89,
"learning_rate": 5.2944837265962235e-06,
"loss": 2.7426,
"step": 270100
},
{
"epoch": 0.89,
"learning_rate": 5.277932258149943e-06,
"loss": 2.7449,
"step": 270200
},
{
"epoch": 0.89,
"learning_rate": 5.261380789703662e-06,
"loss": 2.7402,
"step": 270300
},
{
"epoch": 0.9,
"learning_rate": 5.244829321257382e-06,
"loss": 2.7582,
"step": 270400
},
{
"epoch": 0.9,
"learning_rate": 5.228277852811102e-06,
"loss": 2.7453,
"step": 270500
},
{
"epoch": 0.9,
"learning_rate": 5.211726384364822e-06,
"loss": 2.7585,
"step": 270600
},
{
"epoch": 0.9,
"learning_rate": 5.195174915918541e-06,
"loss": 2.7502,
"step": 270700
},
{
"epoch": 0.9,
"learning_rate": 5.17862344747226e-06,
"loss": 2.7567,
"step": 270800
},
{
"epoch": 0.9,
"learning_rate": 5.1620719790259795e-06,
"loss": 2.7506,
"step": 270900
},
{
"epoch": 0.9,
"learning_rate": 5.1455205105796985e-06,
"loss": 2.7401,
"step": 271000
},
{
"epoch": 0.9,
"learning_rate": 5.128969042133418e-06,
"loss": 2.7438,
"step": 271100
},
{
"epoch": 0.9,
"learning_rate": 5.112417573687138e-06,
"loss": 2.7304,
"step": 271200
},
{
"epoch": 0.9,
"learning_rate": 5.095866105240857e-06,
"loss": 2.7508,
"step": 271300
},
{
"epoch": 0.9,
"learning_rate": 5.079314636794576e-06,
"loss": 2.7388,
"step": 271400
},
{
"epoch": 0.9,
"learning_rate": 5.062763168348296e-06,
"loss": 2.7519,
"step": 271500
},
{
"epoch": 0.9,
"learning_rate": 5.046211699902016e-06,
"loss": 2.731,
"step": 271600
},
{
"epoch": 0.9,
"learning_rate": 5.029660231455735e-06,
"loss": 2.76,
"step": 271700
},
{
"epoch": 0.9,
"learning_rate": 5.013108763009455e-06,
"loss": 2.7508,
"step": 271800
},
{
"epoch": 0.9,
"learning_rate": 4.9965572945631745e-06,
"loss": 2.7589,
"step": 271900
},
{
"epoch": 0.9,
"learning_rate": 4.9800058261168934e-06,
"loss": 2.7485,
"step": 272000
},
{
"epoch": 0.9,
"learning_rate": 4.963454357670612e-06,
"loss": 2.743,
"step": 272100
},
{
"epoch": 0.9,
"learning_rate": 4.946902889224332e-06,
"loss": 2.7607,
"step": 272200
},
{
"epoch": 0.9,
"learning_rate": 4.930351420778052e-06,
"loss": 2.742,
"step": 272300
},
{
"epoch": 0.9,
"learning_rate": 4.913799952331771e-06,
"loss": 2.7419,
"step": 272400
},
{
"epoch": 0.9,
"learning_rate": 4.897248483885491e-06,
"loss": 2.7496,
"step": 272500
},
{
"epoch": 0.9,
"learning_rate": 4.88069701543921e-06,
"loss": 2.7502,
"step": 272600
},
{
"epoch": 0.9,
"learning_rate": 4.864145546992929e-06,
"loss": 2.7372,
"step": 272700
},
{
"epoch": 0.9,
"learning_rate": 4.847594078546649e-06,
"loss": 2.7502,
"step": 272800
},
{
"epoch": 0.9,
"learning_rate": 4.8310426101003685e-06,
"loss": 2.7352,
"step": 272900
},
{
"epoch": 0.9,
"learning_rate": 4.8144911416540875e-06,
"loss": 2.7495,
"step": 273000
},
{
"epoch": 0.9,
"learning_rate": 4.797939673207807e-06,
"loss": 2.7424,
"step": 273100
},
{
"epoch": 0.9,
"learning_rate": 4.781388204761527e-06,
"loss": 2.7468,
"step": 273200
},
{
"epoch": 0.9,
"learning_rate": 4.764836736315246e-06,
"loss": 2.7409,
"step": 273300
},
{
"epoch": 0.91,
"learning_rate": 4.748285267868965e-06,
"loss": 2.7514,
"step": 273400
},
{
"epoch": 0.91,
"learning_rate": 4.731733799422685e-06,
"loss": 2.7444,
"step": 273500
},
{
"epoch": 0.91,
"learning_rate": 4.715182330976405e-06,
"loss": 2.7648,
"step": 273600
},
{
"epoch": 0.91,
"learning_rate": 4.698630862530124e-06,
"loss": 2.7497,
"step": 273700
},
{
"epoch": 0.91,
"learning_rate": 4.682079394083844e-06,
"loss": 2.7593,
"step": 273800
},
{
"epoch": 0.91,
"learning_rate": 4.6655279256375626e-06,
"loss": 2.7445,
"step": 273900
},
{
"epoch": 0.91,
"learning_rate": 4.648976457191282e-06,
"loss": 2.7566,
"step": 274000
},
{
"epoch": 0.91,
"learning_rate": 4.632424988745001e-06,
"loss": 2.7489,
"step": 274100
},
{
"epoch": 0.91,
"learning_rate": 4.615873520298721e-06,
"loss": 2.7413,
"step": 274200
},
{
"epoch": 0.91,
"learning_rate": 4.599322051852441e-06,
"loss": 2.7411,
"step": 274300
},
{
"epoch": 0.91,
"learning_rate": 4.58277058340616e-06,
"loss": 2.7686,
"step": 274400
},
{
"epoch": 0.91,
"learning_rate": 4.566219114959879e-06,
"loss": 2.7492,
"step": 274500
},
{
"epoch": 0.91,
"learning_rate": 4.549667646513599e-06,
"loss": 2.7524,
"step": 274600
},
{
"epoch": 0.91,
"learning_rate": 4.533116178067319e-06,
"loss": 2.759,
"step": 274700
},
{
"epoch": 0.91,
"learning_rate": 4.516564709621038e-06,
"loss": 2.7587,
"step": 274800
},
{
"epoch": 0.91,
"learning_rate": 4.5000132411747575e-06,
"loss": 2.7479,
"step": 274900
},
{
"epoch": 0.91,
"learning_rate": 4.483461772728477e-06,
"loss": 2.7479,
"step": 275000
},
{
"epoch": 0.91,
"learning_rate": 4.4669103042821954e-06,
"loss": 2.7593,
"step": 275100
},
{
"epoch": 0.91,
"learning_rate": 4.450358835835915e-06,
"loss": 2.7551,
"step": 275200
},
{
"epoch": 0.91,
"learning_rate": 4.433807367389635e-06,
"loss": 2.7317,
"step": 275300
},
{
"epoch": 0.91,
"learning_rate": 4.417255898943354e-06,
"loss": 2.7403,
"step": 275400
},
{
"epoch": 0.91,
"learning_rate": 4.400704430497074e-06,
"loss": 2.7439,
"step": 275500
},
{
"epoch": 0.91,
"learning_rate": 4.384152962050794e-06,
"loss": 2.7422,
"step": 275600
},
{
"epoch": 0.91,
"learning_rate": 4.367601493604513e-06,
"loss": 2.74,
"step": 275700
},
{
"epoch": 0.91,
"learning_rate": 4.351050025158232e-06,
"loss": 2.7425,
"step": 275800
},
{
"epoch": 0.91,
"learning_rate": 4.3344985567119515e-06,
"loss": 2.7427,
"step": 275900
},
{
"epoch": 0.91,
"learning_rate": 4.317947088265671e-06,
"loss": 2.7584,
"step": 276000
},
{
"epoch": 0.91,
"learning_rate": 4.30139561981939e-06,
"loss": 2.7433,
"step": 276100
},
{
"epoch": 0.91,
"learning_rate": 4.28484415137311e-06,
"loss": 2.7418,
"step": 276200
},
{
"epoch": 0.91,
"learning_rate": 4.26829268292683e-06,
"loss": 2.7536,
"step": 276300
},
{
"epoch": 0.91,
"learning_rate": 4.251741214480549e-06,
"loss": 2.7477,
"step": 276400
},
{
"epoch": 0.92,
"learning_rate": 4.235189746034268e-06,
"loss": 2.7344,
"step": 276500
},
{
"epoch": 0.92,
"learning_rate": 4.218638277587988e-06,
"loss": 2.7535,
"step": 276600
},
{
"epoch": 0.92,
"learning_rate": 4.202086809141708e-06,
"loss": 2.7492,
"step": 276700
},
{
"epoch": 0.92,
"learning_rate": 4.185535340695427e-06,
"loss": 2.7449,
"step": 276800
},
{
"epoch": 0.92,
"learning_rate": 4.1689838722491464e-06,
"loss": 2.7552,
"step": 276900
},
{
"epoch": 0.92,
"learning_rate": 4.1524324038028654e-06,
"loss": 2.7392,
"step": 277000
},
{
"epoch": 0.92,
"learning_rate": 4.135880935356585e-06,
"loss": 2.7482,
"step": 277100
},
{
"epoch": 0.92,
"learning_rate": 4.119329466910304e-06,
"loss": 2.7502,
"step": 277200
},
{
"epoch": 0.92,
"learning_rate": 4.102777998464024e-06,
"loss": 2.7499,
"step": 277300
},
{
"epoch": 0.92,
"learning_rate": 4.086226530017744e-06,
"loss": 2.7431,
"step": 277400
},
{
"epoch": 0.92,
"learning_rate": 4.069675061571463e-06,
"loss": 2.734,
"step": 277500
},
{
"epoch": 0.92,
"learning_rate": 4.053123593125182e-06,
"loss": 2.7456,
"step": 277600
},
{
"epoch": 0.92,
"learning_rate": 4.036572124678902e-06,
"loss": 2.7669,
"step": 277700
},
{
"epoch": 0.92,
"learning_rate": 4.020020656232621e-06,
"loss": 2.7498,
"step": 277800
},
{
"epoch": 0.92,
"learning_rate": 4.0034691877863405e-06,
"loss": 2.7376,
"step": 277900
},
{
"epoch": 0.92,
"learning_rate": 3.98691771934006e-06,
"loss": 2.7455,
"step": 278000
},
{
"epoch": 0.92,
"learning_rate": 3.970366250893779e-06,
"loss": 2.751,
"step": 278100
},
{
"epoch": 0.92,
"learning_rate": 3.953814782447498e-06,
"loss": 2.7451,
"step": 278200
},
{
"epoch": 0.92,
"learning_rate": 3.937263314001218e-06,
"loss": 2.7316,
"step": 278300
},
{
"epoch": 0.92,
"learning_rate": 3.920711845554938e-06,
"loss": 2.7511,
"step": 278400
},
{
"epoch": 0.92,
"learning_rate": 3.904160377108657e-06,
"loss": 2.7521,
"step": 278500
},
{
"epoch": 0.92,
"learning_rate": 3.887608908662377e-06,
"loss": 2.7418,
"step": 278600
},
{
"epoch": 0.92,
"learning_rate": 3.871057440216097e-06,
"loss": 2.7696,
"step": 278700
},
{
"epoch": 0.92,
"learning_rate": 3.854505971769816e-06,
"loss": 2.7411,
"step": 278800
},
{
"epoch": 0.92,
"learning_rate": 3.8379545033235346e-06,
"loss": 2.7362,
"step": 278900
},
{
"epoch": 0.92,
"learning_rate": 3.821403034877254e-06,
"loss": 2.7447,
"step": 279000
},
{
"epoch": 0.92,
"learning_rate": 3.804851566430974e-06,
"loss": 2.7331,
"step": 279100
},
{
"epoch": 0.92,
"learning_rate": 3.7883000979846936e-06,
"loss": 2.7555,
"step": 279200
},
{
"epoch": 0.92,
"learning_rate": 3.771748629538413e-06,
"loss": 2.7549,
"step": 279300
},
{
"epoch": 0.92,
"learning_rate": 3.7551971610921324e-06,
"loss": 2.7387,
"step": 279400
},
{
"epoch": 0.93,
"learning_rate": 3.7386456926458514e-06,
"loss": 2.7558,
"step": 279500
},
{
"epoch": 0.93,
"learning_rate": 3.722094224199571e-06,
"loss": 2.7497,
"step": 279600
},
{
"epoch": 0.93,
"learning_rate": 3.7055427557532907e-06,
"loss": 2.7438,
"step": 279700
},
{
"epoch": 0.93,
"learning_rate": 3.68899128730701e-06,
"loss": 2.7423,
"step": 279800
},
{
"epoch": 0.93,
"learning_rate": 3.6724398188607295e-06,
"loss": 2.7559,
"step": 279900
},
{
"epoch": 0.93,
"learning_rate": 3.6558883504144493e-06,
"loss": 2.7352,
"step": 280000
},
{
"epoch": 0.93,
"learning_rate": 3.6393368819681683e-06,
"loss": 2.7382,
"step": 280100
},
{
"epoch": 0.93,
"learning_rate": 3.6227854135218877e-06,
"loss": 2.7566,
"step": 280200
},
{
"epoch": 0.93,
"learning_rate": 3.606233945075607e-06,
"loss": 2.7594,
"step": 280300
},
{
"epoch": 0.93,
"learning_rate": 3.589682476629327e-06,
"loss": 2.7479,
"step": 280400
},
{
"epoch": 0.93,
"learning_rate": 3.5731310081830463e-06,
"loss": 2.7465,
"step": 280500
},
{
"epoch": 0.93,
"learning_rate": 3.5565795397367657e-06,
"loss": 2.7462,
"step": 280600
},
{
"epoch": 0.93,
"learning_rate": 3.5400280712904847e-06,
"loss": 2.7472,
"step": 280700
},
{
"epoch": 0.93,
"learning_rate": 3.523476602844204e-06,
"loss": 2.7328,
"step": 280800
},
{
"epoch": 0.93,
"learning_rate": 3.506925134397924e-06,
"loss": 2.7364,
"step": 280900
},
{
"epoch": 0.93,
"learning_rate": 3.4903736659516434e-06,
"loss": 2.7636,
"step": 281000
},
{
"epoch": 0.93,
"learning_rate": 3.4738221975053628e-06,
"loss": 2.7424,
"step": 281100
},
{
"epoch": 0.93,
"learning_rate": 3.4572707290590826e-06,
"loss": 2.7461,
"step": 281200
},
{
"epoch": 0.93,
"learning_rate": 3.440719260612802e-06,
"loss": 2.7489,
"step": 281300
},
{
"epoch": 0.93,
"learning_rate": 3.424167792166521e-06,
"loss": 2.7425,
"step": 281400
},
{
"epoch": 0.93,
"learning_rate": 3.4076163237202404e-06,
"loss": 2.7326,
"step": 281500
},
{
"epoch": 0.93,
"learning_rate": 3.3910648552739602e-06,
"loss": 2.753,
"step": 281600
},
{
"epoch": 0.93,
"learning_rate": 3.3745133868276796e-06,
"loss": 2.7561,
"step": 281700
},
{
"epoch": 0.93,
"learning_rate": 3.357961918381399e-06,
"loss": 2.7518,
"step": 281800
},
{
"epoch": 0.93,
"learning_rate": 3.341410449935119e-06,
"loss": 2.7343,
"step": 281900
},
{
"epoch": 0.93,
"learning_rate": 3.3248589814888374e-06,
"loss": 2.7507,
"step": 282000
},
{
"epoch": 0.93,
"learning_rate": 3.3083075130425573e-06,
"loss": 2.7425,
"step": 282100
},
{
"epoch": 0.93,
"learning_rate": 3.2917560445962767e-06,
"loss": 2.7406,
"step": 282200
},
{
"epoch": 0.93,
"learning_rate": 3.275204576149996e-06,
"loss": 2.7563,
"step": 282300
},
{
"epoch": 0.93,
"learning_rate": 3.258653107703716e-06,
"loss": 2.7447,
"step": 282400
},
{
"epoch": 0.94,
"learning_rate": 3.2421016392574353e-06,
"loss": 2.7492,
"step": 282500
},
{
"epoch": 0.94,
"learning_rate": 3.2255501708111543e-06,
"loss": 2.7582,
"step": 282600
},
{
"epoch": 0.94,
"learning_rate": 3.2089987023648737e-06,
"loss": 2.74,
"step": 282700
},
{
"epoch": 0.94,
"learning_rate": 3.1924472339185935e-06,
"loss": 2.7476,
"step": 282800
},
{
"epoch": 0.94,
"learning_rate": 3.175895765472313e-06,
"loss": 2.7407,
"step": 282900
},
{
"epoch": 0.94,
"learning_rate": 3.1593442970260323e-06,
"loss": 2.7461,
"step": 283000
},
{
"epoch": 0.94,
"learning_rate": 3.142792828579752e-06,
"loss": 2.7401,
"step": 283100
},
{
"epoch": 0.94,
"learning_rate": 3.1262413601334707e-06,
"loss": 2.7581,
"step": 283200
},
{
"epoch": 0.94,
"learning_rate": 3.1096898916871905e-06,
"loss": 2.7486,
"step": 283300
},
{
"epoch": 0.94,
"learning_rate": 3.09313842324091e-06,
"loss": 2.7606,
"step": 283400
},
{
"epoch": 0.94,
"learning_rate": 3.0765869547946294e-06,
"loss": 2.7364,
"step": 283500
},
{
"epoch": 0.94,
"learning_rate": 3.060035486348349e-06,
"loss": 2.7462,
"step": 283600
},
{
"epoch": 0.94,
"learning_rate": 3.043484017902068e-06,
"loss": 2.7555,
"step": 283700
},
{
"epoch": 0.94,
"learning_rate": 3.026932549455788e-06,
"loss": 2.7372,
"step": 283800
},
{
"epoch": 0.94,
"learning_rate": 3.0103810810095074e-06,
"loss": 2.7559,
"step": 283900
},
{
"epoch": 0.94,
"learning_rate": 2.993829612563227e-06,
"loss": 2.7416,
"step": 284000
},
{
"epoch": 0.94,
"learning_rate": 2.9772781441169462e-06,
"loss": 2.7477,
"step": 284100
},
{
"epoch": 0.94,
"learning_rate": 2.9607266756706656e-06,
"loss": 2.7414,
"step": 284200
},
{
"epoch": 0.94,
"learning_rate": 2.944175207224385e-06,
"loss": 2.7488,
"step": 284300
},
{
"epoch": 0.94,
"learning_rate": 2.9276237387781044e-06,
"loss": 2.7358,
"step": 284400
},
{
"epoch": 0.94,
"learning_rate": 2.911072270331824e-06,
"loss": 2.7428,
"step": 284500
},
{
"epoch": 0.94,
"learning_rate": 2.8945208018855432e-06,
"loss": 2.7465,
"step": 284600
},
{
"epoch": 0.94,
"learning_rate": 2.8779693334392627e-06,
"loss": 2.7357,
"step": 284700
},
{
"epoch": 0.94,
"learning_rate": 2.8614178649929825e-06,
"loss": 2.7398,
"step": 284800
},
{
"epoch": 0.94,
"learning_rate": 2.844866396546702e-06,
"loss": 2.7407,
"step": 284900
},
{
"epoch": 0.94,
"learning_rate": 2.8283149281004213e-06,
"loss": 2.7411,
"step": 285000
},
{
"epoch": 0.94,
"learning_rate": 2.8117634596541407e-06,
"loss": 2.7504,
"step": 285100
},
{
"epoch": 0.94,
"learning_rate": 2.79521199120786e-06,
"loss": 2.7491,
"step": 285200
},
{
"epoch": 0.94,
"learning_rate": 2.7786605227615795e-06,
"loss": 2.7464,
"step": 285300
},
{
"epoch": 0.94,
"learning_rate": 2.762109054315299e-06,
"loss": 2.744,
"step": 285400
},
{
"epoch": 0.95,
"learning_rate": 2.7455575858690187e-06,
"loss": 2.7457,
"step": 285500
},
{
"epoch": 0.95,
"learning_rate": 2.7290061174227377e-06,
"loss": 2.7404,
"step": 285600
},
{
"epoch": 0.95,
"learning_rate": 2.712454648976457e-06,
"loss": 2.7522,
"step": 285700
},
{
"epoch": 0.95,
"learning_rate": 2.695903180530177e-06,
"loss": 2.756,
"step": 285800
},
{
"epoch": 0.95,
"learning_rate": 2.679351712083896e-06,
"loss": 2.7467,
"step": 285900
},
{
"epoch": 0.95,
"learning_rate": 2.6628002436376158e-06,
"loss": 2.7523,
"step": 286000
},
{
"epoch": 0.95,
"learning_rate": 2.646248775191335e-06,
"loss": 2.7469,
"step": 286100
},
{
"epoch": 0.95,
"learning_rate": 2.6296973067450546e-06,
"loss": 2.74,
"step": 286200
},
{
"epoch": 0.95,
"learning_rate": 2.613145838298774e-06,
"loss": 2.7507,
"step": 286300
},
{
"epoch": 0.95,
"learning_rate": 2.5965943698524934e-06,
"loss": 2.75,
"step": 286400
},
{
"epoch": 0.95,
"learning_rate": 2.580042901406213e-06,
"loss": 2.7437,
"step": 286500
},
{
"epoch": 0.95,
"learning_rate": 2.5634914329599322e-06,
"loss": 2.7476,
"step": 286600
},
{
"epoch": 0.95,
"learning_rate": 2.546939964513652e-06,
"loss": 2.7286,
"step": 286700
},
{
"epoch": 0.95,
"learning_rate": 2.530388496067371e-06,
"loss": 2.7356,
"step": 286800
},
{
"epoch": 0.95,
"learning_rate": 2.5138370276210904e-06,
"loss": 2.7454,
"step": 286900
},
{
"epoch": 0.95,
"learning_rate": 2.4972855591748103e-06,
"loss": 2.7489,
"step": 287000
},
{
"epoch": 0.95,
"learning_rate": 2.4807340907285292e-06,
"loss": 2.7597,
"step": 287100
},
{
"epoch": 0.95,
"learning_rate": 2.464182622282249e-06,
"loss": 2.7469,
"step": 287200
},
{
"epoch": 0.95,
"learning_rate": 2.4476311538359685e-06,
"loss": 2.7373,
"step": 287300
},
{
"epoch": 0.95,
"learning_rate": 2.431079685389688e-06,
"loss": 2.7459,
"step": 287400
},
{
"epoch": 0.95,
"learning_rate": 2.4145282169434073e-06,
"loss": 2.7457,
"step": 287500
},
{
"epoch": 0.95,
"learning_rate": 2.3979767484971267e-06,
"loss": 2.7496,
"step": 287600
},
{
"epoch": 0.95,
"learning_rate": 2.3814252800508465e-06,
"loss": 2.7484,
"step": 287700
},
{
"epoch": 0.95,
"learning_rate": 2.3648738116045655e-06,
"loss": 2.7411,
"step": 287800
},
{
"epoch": 0.95,
"learning_rate": 2.3483223431582853e-06,
"loss": 2.7451,
"step": 287900
},
{
"epoch": 0.95,
"learning_rate": 2.3317708747120047e-06,
"loss": 2.7447,
"step": 288000
},
{
"epoch": 0.95,
"learning_rate": 2.3152194062657237e-06,
"loss": 2.7529,
"step": 288100
},
{
"epoch": 0.95,
"learning_rate": 2.2986679378194436e-06,
"loss": 2.7578,
"step": 288200
},
{
"epoch": 0.95,
"learning_rate": 2.282116469373163e-06,
"loss": 2.7456,
"step": 288300
},
{
"epoch": 0.95,
"learning_rate": 2.2655650009268824e-06,
"loss": 2.7471,
"step": 288400
},
{
"epoch": 0.96,
"learning_rate": 2.2490135324806018e-06,
"loss": 2.7395,
"step": 288500
},
{
"epoch": 0.96,
"learning_rate": 2.232462064034321e-06,
"loss": 2.7413,
"step": 288600
},
{
"epoch": 0.96,
"learning_rate": 2.2159105955880406e-06,
"loss": 2.7405,
"step": 288700
},
{
"epoch": 0.96,
"learning_rate": 2.19935912714176e-06,
"loss": 2.7453,
"step": 288800
},
{
"epoch": 0.96,
"learning_rate": 2.18280765869548e-06,
"loss": 2.7467,
"step": 288900
},
{
"epoch": 0.96,
"learning_rate": 2.166256190249199e-06,
"loss": 2.7482,
"step": 289000
},
{
"epoch": 0.96,
"learning_rate": 2.1497047218029186e-06,
"loss": 2.7424,
"step": 289100
},
{
"epoch": 0.96,
"learning_rate": 2.133153253356638e-06,
"loss": 2.7557,
"step": 289200
},
{
"epoch": 0.96,
"learning_rate": 2.116601784910357e-06,
"loss": 2.7541,
"step": 289300
},
{
"epoch": 0.96,
"learning_rate": 2.100050316464077e-06,
"loss": 2.7388,
"step": 289400
},
{
"epoch": 0.96,
"learning_rate": 2.0834988480177963e-06,
"loss": 2.7535,
"step": 289500
},
{
"epoch": 0.96,
"learning_rate": 2.0669473795715157e-06,
"loss": 2.7298,
"step": 289600
},
{
"epoch": 0.96,
"learning_rate": 2.050395911125235e-06,
"loss": 2.745,
"step": 289700
},
{
"epoch": 0.96,
"learning_rate": 2.0338444426789545e-06,
"loss": 2.7455,
"step": 289800
},
{
"epoch": 0.96,
"learning_rate": 2.0172929742326743e-06,
"loss": 2.738,
"step": 289900
},
{
"epoch": 0.96,
"learning_rate": 2.0007415057863933e-06,
"loss": 2.7626,
"step": 290000
},
{
"epoch": 0.96,
"learning_rate": 1.984190037340113e-06,
"loss": 2.7373,
"step": 290100
},
{
"epoch": 0.96,
"learning_rate": 1.9676385688938325e-06,
"loss": 2.7464,
"step": 290200
},
{
"epoch": 0.96,
"learning_rate": 1.9510871004475515e-06,
"loss": 2.7452,
"step": 290300
},
{
"epoch": 0.96,
"learning_rate": 1.9345356320012713e-06,
"loss": 2.7437,
"step": 290400
},
{
"epoch": 0.96,
"learning_rate": 1.9179841635549907e-06,
"loss": 2.7349,
"step": 290500
},
{
"epoch": 0.96,
"learning_rate": 1.90143269510871e-06,
"loss": 2.7581,
"step": 290600
},
{
"epoch": 0.96,
"learning_rate": 1.8848812266624296e-06,
"loss": 2.7641,
"step": 290700
},
{
"epoch": 0.96,
"learning_rate": 1.8683297582161492e-06,
"loss": 2.7371,
"step": 290800
},
{
"epoch": 0.96,
"learning_rate": 1.8517782897698684e-06,
"loss": 2.7527,
"step": 290900
},
{
"epoch": 0.96,
"learning_rate": 1.835226821323588e-06,
"loss": 2.7542,
"step": 291000
},
{
"epoch": 0.96,
"learning_rate": 1.8186753528773074e-06,
"loss": 2.7375,
"step": 291100
},
{
"epoch": 0.96,
"learning_rate": 1.8021238844310266e-06,
"loss": 2.745,
"step": 291200
},
{
"epoch": 0.96,
"learning_rate": 1.7855724159847462e-06,
"loss": 2.744,
"step": 291300
},
{
"epoch": 0.96,
"learning_rate": 1.7690209475384658e-06,
"loss": 2.7416,
"step": 291400
},
{
"epoch": 0.96,
"learning_rate": 1.752469479092185e-06,
"loss": 2.7422,
"step": 291500
},
{
"epoch": 0.97,
"learning_rate": 1.7359180106459046e-06,
"loss": 2.7445,
"step": 291600
},
{
"epoch": 0.97,
"learning_rate": 1.719366542199624e-06,
"loss": 2.7469,
"step": 291700
},
{
"epoch": 0.97,
"learning_rate": 1.7028150737533432e-06,
"loss": 2.7418,
"step": 291800
},
{
"epoch": 0.97,
"learning_rate": 1.6862636053070629e-06,
"loss": 2.7358,
"step": 291900
},
{
"epoch": 0.97,
"learning_rate": 1.6697121368607825e-06,
"loss": 2.758,
"step": 292000
},
{
"epoch": 0.97,
"learning_rate": 1.6531606684145017e-06,
"loss": 2.74,
"step": 292100
},
{
"epoch": 0.97,
"learning_rate": 1.6366091999682213e-06,
"loss": 2.7506,
"step": 292200
},
{
"epoch": 0.97,
"learning_rate": 1.6200577315219407e-06,
"loss": 2.7442,
"step": 292300
},
{
"epoch": 0.97,
"learning_rate": 1.6035062630756603e-06,
"loss": 2.7545,
"step": 292400
},
{
"epoch": 0.97,
"learning_rate": 1.5869547946293795e-06,
"loss": 2.7404,
"step": 292500
},
{
"epoch": 0.97,
"learning_rate": 1.5704033261830991e-06,
"loss": 2.7335,
"step": 292600
},
{
"epoch": 0.97,
"learning_rate": 1.5538518577368185e-06,
"loss": 2.7331,
"step": 292700
},
{
"epoch": 0.97,
"learning_rate": 1.537300389290538e-06,
"loss": 2.7512,
"step": 292800
},
{
"epoch": 0.97,
"learning_rate": 1.5207489208442573e-06,
"loss": 2.7392,
"step": 292900
},
{
"epoch": 0.97,
"learning_rate": 1.5041974523979767e-06,
"loss": 2.7439,
"step": 293000
},
{
"epoch": 0.97,
"learning_rate": 1.4876459839516964e-06,
"loss": 2.7444,
"step": 293100
},
{
"epoch": 0.97,
"learning_rate": 1.4710945155054158e-06,
"loss": 2.7449,
"step": 293200
},
{
"epoch": 0.97,
"learning_rate": 1.4545430470591352e-06,
"loss": 2.7497,
"step": 293300
},
{
"epoch": 0.97,
"learning_rate": 1.4379915786128546e-06,
"loss": 2.7465,
"step": 293400
},
{
"epoch": 0.97,
"learning_rate": 1.421440110166574e-06,
"loss": 2.7511,
"step": 293500
},
{
"epoch": 0.97,
"learning_rate": 1.4048886417202934e-06,
"loss": 2.7475,
"step": 293600
},
{
"epoch": 0.97,
"learning_rate": 1.388337173274013e-06,
"loss": 2.7375,
"step": 293700
},
{
"epoch": 0.97,
"learning_rate": 1.3717857048277324e-06,
"loss": 2.7464,
"step": 293800
},
{
"epoch": 0.97,
"learning_rate": 1.3552342363814518e-06,
"loss": 2.7384,
"step": 293900
},
{
"epoch": 0.97,
"learning_rate": 1.3386827679351712e-06,
"loss": 2.7525,
"step": 294000
},
{
"epoch": 0.97,
"learning_rate": 1.3221312994888906e-06,
"loss": 2.743,
"step": 294100
},
{
"epoch": 0.97,
"learning_rate": 1.30557983104261e-06,
"loss": 2.7583,
"step": 294200
},
{
"epoch": 0.97,
"learning_rate": 1.2890283625963297e-06,
"loss": 2.7431,
"step": 294300
},
{
"epoch": 0.97,
"learning_rate": 1.272476894150049e-06,
"loss": 2.7308,
"step": 294400
},
{
"epoch": 0.97,
"learning_rate": 1.2559254257037687e-06,
"loss": 2.733,
"step": 294500
},
{
"epoch": 0.98,
"learning_rate": 1.2393739572574879e-06,
"loss": 2.7525,
"step": 294600
},
{
"epoch": 0.98,
"learning_rate": 1.2228224888112073e-06,
"loss": 2.7322,
"step": 294700
},
{
"epoch": 0.98,
"learning_rate": 1.206271020364927e-06,
"loss": 2.7517,
"step": 294800
},
{
"epoch": 0.98,
"learning_rate": 1.1897195519186463e-06,
"loss": 2.7369,
"step": 294900
},
{
"epoch": 0.98,
"learning_rate": 1.1731680834723657e-06,
"loss": 2.7407,
"step": 295000
},
{
"epoch": 0.98,
"learning_rate": 1.1566166150260851e-06,
"loss": 2.7378,
"step": 295100
},
{
"epoch": 0.98,
"learning_rate": 1.1400651465798045e-06,
"loss": 2.7395,
"step": 295200
},
{
"epoch": 0.98,
"learning_rate": 1.123513678133524e-06,
"loss": 2.7352,
"step": 295300
},
{
"epoch": 0.98,
"learning_rate": 1.1069622096872435e-06,
"loss": 2.7434,
"step": 295400
},
{
"epoch": 0.98,
"learning_rate": 1.090410741240963e-06,
"loss": 2.7483,
"step": 295500
},
{
"epoch": 0.98,
"learning_rate": 1.0738592727946826e-06,
"loss": 2.7582,
"step": 295600
},
{
"epoch": 0.98,
"learning_rate": 1.0573078043484018e-06,
"loss": 2.7475,
"step": 295700
},
{
"epoch": 0.98,
"learning_rate": 1.0407563359021212e-06,
"loss": 2.7477,
"step": 295800
},
{
"epoch": 0.98,
"learning_rate": 1.0242048674558408e-06,
"loss": 2.7503,
"step": 295900
},
{
"epoch": 0.98,
"learning_rate": 1.0076533990095602e-06,
"loss": 2.7432,
"step": 296000
},
{
"epoch": 0.98,
"learning_rate": 9.911019305632796e-07,
"loss": 2.7523,
"step": 296100
},
{
"epoch": 0.98,
"learning_rate": 9.745504621169992e-07,
"loss": 2.7503,
"step": 296200
},
{
"epoch": 0.98,
"learning_rate": 9.579989936707184e-07,
"loss": 2.7481,
"step": 296300
},
{
"epoch": 0.98,
"learning_rate": 9.414475252244379e-07,
"loss": 2.7505,
"step": 296400
},
{
"epoch": 0.98,
"learning_rate": 9.248960567781574e-07,
"loss": 2.7301,
"step": 296500
},
{
"epoch": 0.98,
"learning_rate": 9.083445883318768e-07,
"loss": 2.7475,
"step": 296600
},
{
"epoch": 0.98,
"learning_rate": 8.917931198855962e-07,
"loss": 2.743,
"step": 296700
},
{
"epoch": 0.98,
"learning_rate": 8.752416514393158e-07,
"loss": 2.7464,
"step": 296800
},
{
"epoch": 0.98,
"learning_rate": 8.586901829930352e-07,
"loss": 2.7416,
"step": 296900
},
{
"epoch": 0.98,
"learning_rate": 8.421387145467547e-07,
"loss": 2.7406,
"step": 297000
},
{
"epoch": 0.98,
"learning_rate": 8.255872461004741e-07,
"loss": 2.7322,
"step": 297100
},
{
"epoch": 0.98,
"learning_rate": 8.090357776541935e-07,
"loss": 2.7264,
"step": 297200
},
{
"epoch": 0.98,
"learning_rate": 7.92484309207913e-07,
"loss": 2.7506,
"step": 297300
},
{
"epoch": 0.98,
"learning_rate": 7.759328407616324e-07,
"loss": 2.7487,
"step": 297400
},
{
"epoch": 0.98,
"learning_rate": 7.593813723153519e-07,
"loss": 2.7492,
"step": 297500
},
{
"epoch": 0.99,
"learning_rate": 7.428299038690712e-07,
"loss": 2.7277,
"step": 297600
},
{
"epoch": 0.99,
"learning_rate": 7.262784354227907e-07,
"loss": 2.7638,
"step": 297700
},
{
"epoch": 0.99,
"learning_rate": 7.097269669765102e-07,
"loss": 2.742,
"step": 297800
},
{
"epoch": 0.99,
"learning_rate": 6.931754985302296e-07,
"loss": 2.7582,
"step": 297900
},
{
"epoch": 0.99,
"learning_rate": 6.76624030083949e-07,
"loss": 2.7386,
"step": 298000
},
{
"epoch": 0.99,
"learning_rate": 6.600725616376686e-07,
"loss": 2.7488,
"step": 298100
},
{
"epoch": 0.99,
"learning_rate": 6.43521093191388e-07,
"loss": 2.75,
"step": 298200
},
{
"epoch": 0.99,
"learning_rate": 6.269696247451074e-07,
"loss": 2.7419,
"step": 298300
},
{
"epoch": 0.99,
"learning_rate": 6.104181562988269e-07,
"loss": 2.738,
"step": 298400
},
{
"epoch": 0.99,
"learning_rate": 5.938666878525463e-07,
"loss": 2.7521,
"step": 298500
},
{
"epoch": 0.99,
"learning_rate": 5.773152194062658e-07,
"loss": 2.7501,
"step": 298600
},
{
"epoch": 0.99,
"learning_rate": 5.607637509599852e-07,
"loss": 2.7357,
"step": 298700
},
{
"epoch": 0.99,
"learning_rate": 5.442122825137046e-07,
"loss": 2.748,
"step": 298800
},
{
"epoch": 0.99,
"learning_rate": 5.276608140674241e-07,
"loss": 2.7403,
"step": 298900
},
{
"epoch": 0.99,
"learning_rate": 5.111093456211435e-07,
"loss": 2.7486,
"step": 299000
},
{
"epoch": 0.99,
"learning_rate": 4.945578771748629e-07,
"loss": 2.7265,
"step": 299100
},
{
"epoch": 0.99,
"learning_rate": 4.780064087285825e-07,
"loss": 2.7547,
"step": 299200
},
{
"epoch": 0.99,
"learning_rate": 4.614549402823019e-07,
"loss": 2.7536,
"step": 299300
},
{
"epoch": 0.99,
"learning_rate": 4.4490347183602127e-07,
"loss": 2.728,
"step": 299400
},
{
"epoch": 0.99,
"learning_rate": 4.283520033897408e-07,
"loss": 2.733,
"step": 299500
},
{
"epoch": 0.99,
"learning_rate": 4.1180053494346024e-07,
"loss": 2.7592,
"step": 299600
},
{
"epoch": 0.99,
"learning_rate": 3.952490664971797e-07,
"loss": 2.7456,
"step": 299700
},
{
"epoch": 0.99,
"learning_rate": 3.7869759805089905e-07,
"loss": 2.7258,
"step": 299800
},
{
"epoch": 0.99,
"learning_rate": 3.6214612960461856e-07,
"loss": 2.7382,
"step": 299900
},
{
"epoch": 0.99,
"learning_rate": 3.4559466115833797e-07,
"loss": 2.7404,
"step": 300000
},
{
"epoch": 0.99,
"learning_rate": 3.290431927120574e-07,
"loss": 2.7367,
"step": 300100
},
{
"epoch": 0.99,
"learning_rate": 3.124917242657769e-07,
"loss": 2.7528,
"step": 300200
},
{
"epoch": 0.99,
"learning_rate": 2.9594025581949634e-07,
"loss": 2.745,
"step": 300300
},
{
"epoch": 0.99,
"learning_rate": 2.7938878737321575e-07,
"loss": 2.7525,
"step": 300400
},
{
"epoch": 0.99,
"learning_rate": 2.628373189269352e-07,
"loss": 2.7259,
"step": 300500
},
{
"epoch": 1.0,
"learning_rate": 2.4628585048065467e-07,
"loss": 2.7397,
"step": 300600
},
{
"epoch": 1.0,
"learning_rate": 2.2973438203437407e-07,
"loss": 2.7356,
"step": 300700
},
{
"epoch": 1.0,
"learning_rate": 2.1318291358809356e-07,
"loss": 2.7418,
"step": 300800
},
{
"epoch": 1.0,
"learning_rate": 1.96631445141813e-07,
"loss": 2.744,
"step": 300900
},
{
"epoch": 1.0,
"learning_rate": 1.8007997669553242e-07,
"loss": 2.7339,
"step": 301000
},
{
"epoch": 1.0,
"learning_rate": 1.6352850824925188e-07,
"loss": 2.7423,
"step": 301100
},
{
"epoch": 1.0,
"learning_rate": 1.4697703980297131e-07,
"loss": 2.7457,
"step": 301200
},
{
"epoch": 1.0,
"learning_rate": 1.3042557135669077e-07,
"loss": 2.743,
"step": 301300
},
{
"epoch": 1.0,
"learning_rate": 1.1387410291041022e-07,
"loss": 2.7527,
"step": 301400
},
{
"epoch": 1.0,
"learning_rate": 9.732263446412967e-08,
"loss": 2.7325,
"step": 301500
},
{
"epoch": 1.0,
"learning_rate": 8.07711660178491e-08,
"loss": 2.7482,
"step": 301600
},
{
"epoch": 1.0,
"learning_rate": 6.421969757156856e-08,
"loss": 2.7457,
"step": 301700
},
{
"epoch": 1.0,
"learning_rate": 4.7668229125288e-08,
"loss": 2.7431,
"step": 301800
},
{
"epoch": 1.0,
"learning_rate": 3.111676067900744e-08,
"loss": 2.7425,
"step": 301900
},
{
"epoch": 1.0,
"learning_rate": 1.456529223272689e-08,
"loss": 2.7382,
"step": 302000
}
],
"max_steps": 302088,
"num_train_epochs": 1,
"total_flos": 8.184418510307328e+18,
"trial_name": null,
"trial_params": null
}