lib_gpt_med / trainer_state.json
akozlo's picture
Update from mm.dd.yy
b1c594a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9999977087395546,
"global_step": 654660,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.996196498946018e-05,
"loss": 3.0555,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 4.9923777227874016e-05,
"loss": 3.0074,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 4.988558946628785e-05,
"loss": 2.9648,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 4.984740170470168e-05,
"loss": 2.9454,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 4.980921394311551e-05,
"loss": 2.9381,
"step": 2500
},
{
"epoch": 0.01,
"learning_rate": 4.9771026181529345e-05,
"loss": 2.9264,
"step": 3000
},
{
"epoch": 0.02,
"learning_rate": 4.973283841994318e-05,
"loss": 2.9214,
"step": 3500
},
{
"epoch": 0.02,
"learning_rate": 4.969465065835701e-05,
"loss": 2.9099,
"step": 4000
},
{
"epoch": 0.02,
"learning_rate": 4.965646289677084e-05,
"loss": 2.8927,
"step": 4500
},
{
"epoch": 0.02,
"learning_rate": 4.961827513518468e-05,
"loss": 2.8839,
"step": 5000
},
{
"epoch": 0.03,
"learning_rate": 4.958008737359851e-05,
"loss": 2.8857,
"step": 5500
},
{
"epoch": 0.03,
"learning_rate": 4.9541899612012344e-05,
"loss": 2.898,
"step": 6000
},
{
"epoch": 0.03,
"learning_rate": 4.950378822594935e-05,
"loss": 2.8848,
"step": 6500
},
{
"epoch": 0.03,
"learning_rate": 4.9465600464363184e-05,
"loss": 2.879,
"step": 7000
},
{
"epoch": 0.03,
"learning_rate": 4.9427412702777016e-05,
"loss": 2.8717,
"step": 7500
},
{
"epoch": 0.04,
"learning_rate": 4.938922494119085e-05,
"loss": 2.8649,
"step": 8000
},
{
"epoch": 0.04,
"learning_rate": 4.935103717960468e-05,
"loss": 2.8712,
"step": 8500
},
{
"epoch": 0.04,
"learning_rate": 4.931284941801851e-05,
"loss": 2.8493,
"step": 9000
},
{
"epoch": 0.04,
"learning_rate": 4.9274661656432344e-05,
"loss": 2.8726,
"step": 9500
},
{
"epoch": 0.05,
"learning_rate": 4.923647389484618e-05,
"loss": 2.8612,
"step": 10000
},
{
"epoch": 0.05,
"learning_rate": 4.9198286133260015e-05,
"loss": 2.8523,
"step": 10500
},
{
"epoch": 0.05,
"learning_rate": 4.916025112272019e-05,
"loss": 2.8501,
"step": 11000
},
{
"epoch": 0.05,
"learning_rate": 4.912206336113402e-05,
"loss": 2.8377,
"step": 11500
},
{
"epoch": 0.05,
"learning_rate": 4.9083875599547855e-05,
"loss": 2.8491,
"step": 12000
},
{
"epoch": 0.06,
"learning_rate": 4.9045687837961693e-05,
"loss": 2.844,
"step": 12500
},
{
"epoch": 0.06,
"learning_rate": 4.9007500076375526e-05,
"loss": 2.8197,
"step": 13000
},
{
"epoch": 0.06,
"learning_rate": 4.896931231478936e-05,
"loss": 2.8503,
"step": 13500
},
{
"epoch": 0.06,
"learning_rate": 4.893112455320319e-05,
"loss": 2.825,
"step": 14000
},
{
"epoch": 0.07,
"learning_rate": 4.889293679161702e-05,
"loss": 2.8277,
"step": 14500
},
{
"epoch": 0.07,
"learning_rate": 4.885474903003086e-05,
"loss": 2.8359,
"step": 15000
},
{
"epoch": 0.07,
"learning_rate": 4.881663764396786e-05,
"loss": 2.8288,
"step": 15500
},
{
"epoch": 0.07,
"learning_rate": 4.87784498823817e-05,
"loss": 2.812,
"step": 16000
},
{
"epoch": 0.08,
"learning_rate": 4.874033849631871e-05,
"loss": 2.8276,
"step": 16500
},
{
"epoch": 0.08,
"learning_rate": 4.870215073473254e-05,
"loss": 2.823,
"step": 17000
},
{
"epoch": 0.08,
"learning_rate": 4.866396297314637e-05,
"loss": 2.8204,
"step": 17500
},
{
"epoch": 0.08,
"learning_rate": 4.86257752115602e-05,
"loss": 2.8187,
"step": 18000
},
{
"epoch": 0.08,
"learning_rate": 4.8587587449974036e-05,
"loss": 2.8106,
"step": 18500
},
{
"epoch": 0.09,
"learning_rate": 4.854939968838787e-05,
"loss": 2.811,
"step": 19000
},
{
"epoch": 0.09,
"learning_rate": 4.85112119268017e-05,
"loss": 2.8186,
"step": 19500
},
{
"epoch": 0.09,
"learning_rate": 4.847302416521554e-05,
"loss": 2.7965,
"step": 20000
},
{
"epoch": 0.09,
"learning_rate": 4.843483640362937e-05,
"loss": 2.8085,
"step": 20500
},
{
"epoch": 0.1,
"learning_rate": 4.83966486420432e-05,
"loss": 2.8108,
"step": 21000
},
{
"epoch": 0.1,
"learning_rate": 4.835853725598021e-05,
"loss": 2.8031,
"step": 21500
},
{
"epoch": 0.1,
"learning_rate": 4.832034949439404e-05,
"loss": 2.8085,
"step": 22000
},
{
"epoch": 0.1,
"learning_rate": 4.8282161732807874e-05,
"loss": 2.8044,
"step": 22500
},
{
"epoch": 0.11,
"learning_rate": 4.8243973971221706e-05,
"loss": 2.802,
"step": 23000
},
{
"epoch": 0.11,
"learning_rate": 4.820578620963554e-05,
"loss": 2.7915,
"step": 23500
},
{
"epoch": 0.11,
"learning_rate": 4.8167674823572545e-05,
"loss": 2.7981,
"step": 24000
},
{
"epoch": 0.11,
"learning_rate": 4.812948706198638e-05,
"loss": 2.8002,
"step": 24500
},
{
"epoch": 0.11,
"learning_rate": 4.809129930040021e-05,
"loss": 2.7929,
"step": 25000
},
{
"epoch": 0.12,
"learning_rate": 4.805311153881404e-05,
"loss": 2.7923,
"step": 25500
},
{
"epoch": 0.12,
"learning_rate": 4.8014923777227874e-05,
"loss": 2.7845,
"step": 26000
},
{
"epoch": 0.12,
"learning_rate": 4.797681239116488e-05,
"loss": 2.792,
"step": 26500
},
{
"epoch": 0.12,
"learning_rate": 4.793862462957871e-05,
"loss": 2.7944,
"step": 27000
},
{
"epoch": 0.13,
"learning_rate": 4.7900436867992545e-05,
"loss": 2.7897,
"step": 27500
},
{
"epoch": 0.13,
"learning_rate": 4.7862249106406384e-05,
"loss": 2.7927,
"step": 28000
},
{
"epoch": 0.13,
"learning_rate": 4.7824061344820216e-05,
"loss": 2.7925,
"step": 28500
},
{
"epoch": 0.13,
"learning_rate": 4.778594995875722e-05,
"loss": 2.7703,
"step": 29000
},
{
"epoch": 0.14,
"learning_rate": 4.7747762197171055e-05,
"loss": 2.7849,
"step": 29500
},
{
"epoch": 0.14,
"learning_rate": 4.770957443558489e-05,
"loss": 2.786,
"step": 30000
},
{
"epoch": 0.14,
"learning_rate": 4.767138667399872e-05,
"loss": 2.7615,
"step": 30500
},
{
"epoch": 0.14,
"learning_rate": 4.763319891241255e-05,
"loss": 2.7704,
"step": 31000
},
{
"epoch": 0.14,
"learning_rate": 4.7595011150826384e-05,
"loss": 2.7749,
"step": 31500
},
{
"epoch": 0.15,
"learning_rate": 4.755689976476339e-05,
"loss": 2.7844,
"step": 32000
},
{
"epoch": 0.15,
"learning_rate": 4.751871200317722e-05,
"loss": 2.7758,
"step": 32500
},
{
"epoch": 0.15,
"learning_rate": 4.7480524241591055e-05,
"loss": 2.7723,
"step": 33000
},
{
"epoch": 0.15,
"learning_rate": 4.744233648000489e-05,
"loss": 2.7674,
"step": 33500
},
{
"epoch": 0.16,
"learning_rate": 4.740414871841872e-05,
"loss": 2.7649,
"step": 34000
},
{
"epoch": 0.16,
"learning_rate": 4.7366037332355726e-05,
"loss": 2.7876,
"step": 34500
},
{
"epoch": 0.16,
"learning_rate": 4.732784957076956e-05,
"loss": 2.7723,
"step": 35000
},
{
"epoch": 0.16,
"learning_rate": 4.72896618091834e-05,
"loss": 2.7732,
"step": 35500
},
{
"epoch": 0.16,
"learning_rate": 4.725147404759723e-05,
"loss": 2.7617,
"step": 36000
},
{
"epoch": 0.17,
"learning_rate": 4.721328628601106e-05,
"loss": 2.7687,
"step": 36500
},
{
"epoch": 0.17,
"learning_rate": 4.7175098524424894e-05,
"loss": 2.7682,
"step": 37000
},
{
"epoch": 0.17,
"learning_rate": 4.71369871383619e-05,
"loss": 2.7533,
"step": 37500
},
{
"epoch": 0.17,
"learning_rate": 4.709879937677573e-05,
"loss": 2.7571,
"step": 38000
},
{
"epoch": 0.18,
"learning_rate": 4.7060611615189565e-05,
"loss": 2.763,
"step": 38500
},
{
"epoch": 0.18,
"learning_rate": 4.70224238536034e-05,
"loss": 2.7686,
"step": 39000
},
{
"epoch": 0.18,
"learning_rate": 4.698423609201723e-05,
"loss": 2.7591,
"step": 39500
},
{
"epoch": 0.18,
"learning_rate": 4.6946124705954236e-05,
"loss": 2.761,
"step": 40000
},
{
"epoch": 0.19,
"learning_rate": 4.690793694436807e-05,
"loss": 2.7617,
"step": 40500
},
{
"epoch": 0.19,
"learning_rate": 4.6869825558305075e-05,
"loss": 2.7551,
"step": 41000
},
{
"epoch": 0.19,
"learning_rate": 4.683163779671891e-05,
"loss": 2.7651,
"step": 41500
},
{
"epoch": 0.19,
"learning_rate": 4.679345003513274e-05,
"loss": 2.7581,
"step": 42000
},
{
"epoch": 0.19,
"learning_rate": 4.675526227354657e-05,
"loss": 2.7431,
"step": 42500
},
{
"epoch": 0.2,
"learning_rate": 4.6717074511960404e-05,
"loss": 2.7553,
"step": 43000
},
{
"epoch": 0.2,
"learning_rate": 4.667888675037424e-05,
"loss": 2.7451,
"step": 43500
},
{
"epoch": 0.2,
"learning_rate": 4.6640698988788075e-05,
"loss": 2.763,
"step": 44000
},
{
"epoch": 0.2,
"learning_rate": 4.660251122720191e-05,
"loss": 2.7454,
"step": 44500
},
{
"epoch": 0.21,
"learning_rate": 4.656432346561574e-05,
"loss": 2.7472,
"step": 45000
},
{
"epoch": 0.21,
"learning_rate": 4.6526212079552746e-05,
"loss": 2.7321,
"step": 45500
},
{
"epoch": 0.21,
"learning_rate": 4.648802431796658e-05,
"loss": 2.7418,
"step": 46000
},
{
"epoch": 0.21,
"learning_rate": 4.644983655638042e-05,
"loss": 2.7441,
"step": 46500
},
{
"epoch": 0.22,
"learning_rate": 4.641164879479425e-05,
"loss": 2.7342,
"step": 47000
},
{
"epoch": 0.22,
"learning_rate": 4.6373537408731256e-05,
"loss": 2.7211,
"step": 47500
},
{
"epoch": 0.22,
"learning_rate": 4.633534964714509e-05,
"loss": 2.7377,
"step": 48000
},
{
"epoch": 0.22,
"learning_rate": 4.629716188555892e-05,
"loss": 2.7481,
"step": 48500
},
{
"epoch": 0.22,
"learning_rate": 4.625897412397275e-05,
"loss": 2.7373,
"step": 49000
},
{
"epoch": 0.23,
"learning_rate": 4.6220786362386585e-05,
"loss": 2.7236,
"step": 49500
},
{
"epoch": 0.23,
"learning_rate": 4.618259860080042e-05,
"loss": 2.7393,
"step": 50000
},
{
"epoch": 0.23,
"learning_rate": 4.6144410839214256e-05,
"loss": 2.7406,
"step": 50500
},
{
"epoch": 0.23,
"learning_rate": 4.610622307762809e-05,
"loss": 2.7201,
"step": 51000
},
{
"epoch": 0.24,
"learning_rate": 4.606803531604192e-05,
"loss": 2.7447,
"step": 51500
},
{
"epoch": 0.24,
"learning_rate": 4.602984755445575e-05,
"loss": 2.7393,
"step": 52000
},
{
"epoch": 0.24,
"learning_rate": 4.5991659792869585e-05,
"loss": 2.7194,
"step": 52500
},
{
"epoch": 0.24,
"learning_rate": 4.595347203128342e-05,
"loss": 2.7429,
"step": 53000
},
{
"epoch": 0.25,
"learning_rate": 4.5915360645220424e-05,
"loss": 2.7358,
"step": 53500
},
{
"epoch": 0.25,
"learning_rate": 4.587724925915743e-05,
"loss": 2.7373,
"step": 54000
},
{
"epoch": 0.25,
"learning_rate": 4.583906149757126e-05,
"loss": 2.7368,
"step": 54500
},
{
"epoch": 0.25,
"learning_rate": 4.5800873735985095e-05,
"loss": 2.7161,
"step": 55000
},
{
"epoch": 0.25,
"learning_rate": 4.576268597439893e-05,
"loss": 2.7314,
"step": 55500
},
{
"epoch": 0.26,
"learning_rate": 4.572449821281276e-05,
"loss": 2.7221,
"step": 56000
},
{
"epoch": 0.26,
"learning_rate": 4.568631045122659e-05,
"loss": 2.7051,
"step": 56500
},
{
"epoch": 0.26,
"learning_rate": 4.5648122689640424e-05,
"loss": 2.7213,
"step": 57000
},
{
"epoch": 0.26,
"learning_rate": 4.560993492805426e-05,
"loss": 2.7178,
"step": 57500
},
{
"epoch": 0.27,
"learning_rate": 4.557182354199126e-05,
"loss": 2.7164,
"step": 58000
},
{
"epoch": 0.27,
"learning_rate": 4.553371215592827e-05,
"loss": 2.7223,
"step": 58500
},
{
"epoch": 0.27,
"learning_rate": 4.54955243943421e-05,
"loss": 2.7162,
"step": 59000
},
{
"epoch": 0.27,
"learning_rate": 4.5457336632755934e-05,
"loss": 2.7074,
"step": 59500
},
{
"epoch": 0.27,
"learning_rate": 4.541914887116977e-05,
"loss": 2.7116,
"step": 60000
},
{
"epoch": 0.28,
"learning_rate": 4.5380961109583605e-05,
"loss": 2.709,
"step": 60500
},
{
"epoch": 0.28,
"learning_rate": 4.534277334799744e-05,
"loss": 2.7131,
"step": 61000
},
{
"epoch": 0.28,
"learning_rate": 4.5304661961934444e-05,
"loss": 2.725,
"step": 61500
},
{
"epoch": 0.28,
"learning_rate": 4.5266474200348276e-05,
"loss": 2.7129,
"step": 62000
},
{
"epoch": 0.29,
"learning_rate": 4.522828643876211e-05,
"loss": 2.7157,
"step": 62500
},
{
"epoch": 0.29,
"learning_rate": 4.519009867717594e-05,
"loss": 2.7114,
"step": 63000
},
{
"epoch": 0.29,
"learning_rate": 4.515191091558977e-05,
"loss": 2.7101,
"step": 63500
},
{
"epoch": 0.29,
"learning_rate": 4.5113723154003605e-05,
"loss": 2.7129,
"step": 64000
},
{
"epoch": 0.3,
"learning_rate": 4.507553539241744e-05,
"loss": 2.715,
"step": 64500
},
{
"epoch": 0.3,
"learning_rate": 4.5037347630831276e-05,
"loss": 2.7164,
"step": 65000
},
{
"epoch": 0.3,
"learning_rate": 4.499915986924511e-05,
"loss": 2.7004,
"step": 65500
},
{
"epoch": 0.3,
"learning_rate": 4.496097210765894e-05,
"loss": 2.7114,
"step": 66000
},
{
"epoch": 0.3,
"learning_rate": 4.492286072159595e-05,
"loss": 2.7219,
"step": 66500
},
{
"epoch": 0.31,
"learning_rate": 4.488467296000978e-05,
"loss": 2.6966,
"step": 67000
},
{
"epoch": 0.31,
"learning_rate": 4.484648519842361e-05,
"loss": 2.7097,
"step": 67500
},
{
"epoch": 0.31,
"learning_rate": 4.480829743683744e-05,
"loss": 2.72,
"step": 68000
},
{
"epoch": 0.31,
"learning_rate": 4.477018605077445e-05,
"loss": 2.7164,
"step": 68500
},
{
"epoch": 0.32,
"learning_rate": 4.473199828918828e-05,
"loss": 2.7056,
"step": 69000
},
{
"epoch": 0.32,
"learning_rate": 4.4693810527602115e-05,
"loss": 2.7102,
"step": 69500
},
{
"epoch": 0.32,
"learning_rate": 4.465562276601595e-05,
"loss": 2.7023,
"step": 70000
},
{
"epoch": 0.32,
"learning_rate": 4.461743500442978e-05,
"loss": 2.7024,
"step": 70500
},
{
"epoch": 0.33,
"learning_rate": 4.4579323618366786e-05,
"loss": 2.7,
"step": 71000
},
{
"epoch": 0.33,
"learning_rate": 4.454121223230379e-05,
"loss": 2.7006,
"step": 71500
},
{
"epoch": 0.33,
"learning_rate": 4.4503024470717625e-05,
"loss": 2.6956,
"step": 72000
},
{
"epoch": 0.33,
"learning_rate": 4.446491308465463e-05,
"loss": 2.7053,
"step": 72500
},
{
"epoch": 0.33,
"learning_rate": 4.4426725323068464e-05,
"loss": 2.7132,
"step": 73000
},
{
"epoch": 0.34,
"learning_rate": 4.4388537561482296e-05,
"loss": 2.692,
"step": 73500
},
{
"epoch": 0.34,
"learning_rate": 4.435034979989613e-05,
"loss": 2.6934,
"step": 74000
},
{
"epoch": 0.34,
"learning_rate": 4.431216203830996e-05,
"loss": 2.6871,
"step": 74500
},
{
"epoch": 0.34,
"learning_rate": 4.427397427672379e-05,
"loss": 2.6979,
"step": 75000
},
{
"epoch": 0.35,
"learning_rate": 4.423578651513763e-05,
"loss": 2.696,
"step": 75500
},
{
"epoch": 0.35,
"learning_rate": 4.4197598753551464e-05,
"loss": 2.6928,
"step": 76000
},
{
"epoch": 0.35,
"learning_rate": 4.41594109919653e-05,
"loss": 2.6864,
"step": 76500
},
{
"epoch": 0.35,
"learning_rate": 4.4121223230379135e-05,
"loss": 2.6974,
"step": 77000
},
{
"epoch": 0.36,
"learning_rate": 4.408303546879297e-05,
"loss": 2.6817,
"step": 77500
},
{
"epoch": 0.36,
"learning_rate": 4.4044924082729974e-05,
"loss": 2.6817,
"step": 78000
},
{
"epoch": 0.36,
"learning_rate": 4.4006736321143806e-05,
"loss": 2.685,
"step": 78500
},
{
"epoch": 0.36,
"learning_rate": 4.396854855955764e-05,
"loss": 2.6829,
"step": 79000
},
{
"epoch": 0.36,
"learning_rate": 4.393036079797147e-05,
"loss": 2.6871,
"step": 79500
},
{
"epoch": 0.37,
"learning_rate": 4.38921730363853e-05,
"loss": 2.6841,
"step": 80000
},
{
"epoch": 0.37,
"learning_rate": 4.385406165032231e-05,
"loss": 2.671,
"step": 80500
},
{
"epoch": 0.37,
"learning_rate": 4.381587388873614e-05,
"loss": 2.6906,
"step": 81000
},
{
"epoch": 0.37,
"learning_rate": 4.3777686127149973e-05,
"loss": 2.6874,
"step": 81500
},
{
"epoch": 0.38,
"learning_rate": 4.3739498365563806e-05,
"loss": 2.6838,
"step": 82000
},
{
"epoch": 0.38,
"learning_rate": 4.370131060397764e-05,
"loss": 2.6918,
"step": 82500
},
{
"epoch": 0.38,
"learning_rate": 4.366312284239148e-05,
"loss": 2.7004,
"step": 83000
},
{
"epoch": 0.38,
"learning_rate": 4.362501145632848e-05,
"loss": 2.6895,
"step": 83500
},
{
"epoch": 0.38,
"learning_rate": 4.3586823694742316e-05,
"loss": 2.6846,
"step": 84000
},
{
"epoch": 0.39,
"learning_rate": 4.354863593315615e-05,
"loss": 2.6808,
"step": 84500
},
{
"epoch": 0.39,
"learning_rate": 4.351044817156998e-05,
"loss": 2.6849,
"step": 85000
},
{
"epoch": 0.39,
"learning_rate": 4.347226040998381e-05,
"loss": 2.6967,
"step": 85500
},
{
"epoch": 0.39,
"learning_rate": 4.343414902392082e-05,
"loss": 2.6933,
"step": 86000
},
{
"epoch": 0.4,
"learning_rate": 4.339596126233465e-05,
"loss": 2.6779,
"step": 86500
},
{
"epoch": 0.4,
"learning_rate": 4.335777350074848e-05,
"loss": 2.6937,
"step": 87000
},
{
"epoch": 0.4,
"learning_rate": 4.3319585739162316e-05,
"loss": 2.691,
"step": 87500
},
{
"epoch": 0.4,
"learning_rate": 4.328147435309932e-05,
"loss": 2.6864,
"step": 88000
},
{
"epoch": 0.41,
"learning_rate": 4.3243286591513155e-05,
"loss": 2.6732,
"step": 88500
},
{
"epoch": 0.41,
"learning_rate": 4.320509882992699e-05,
"loss": 2.6637,
"step": 89000
},
{
"epoch": 0.41,
"learning_rate": 4.3166987443863994e-05,
"loss": 2.6791,
"step": 89500
},
{
"epoch": 0.41,
"learning_rate": 4.3128799682277826e-05,
"loss": 2.673,
"step": 90000
},
{
"epoch": 0.41,
"learning_rate": 4.309061192069166e-05,
"loss": 2.6774,
"step": 90500
},
{
"epoch": 0.42,
"learning_rate": 4.305242415910549e-05,
"loss": 2.671,
"step": 91000
},
{
"epoch": 0.42,
"learning_rate": 4.301423639751932e-05,
"loss": 2.6695,
"step": 91500
},
{
"epoch": 0.42,
"learning_rate": 4.297604863593316e-05,
"loss": 2.6704,
"step": 92000
},
{
"epoch": 0.42,
"learning_rate": 4.293786087434699e-05,
"loss": 2.6632,
"step": 92500
},
{
"epoch": 0.43,
"learning_rate": 4.2899673112760825e-05,
"loss": 2.6861,
"step": 93000
},
{
"epoch": 0.43,
"learning_rate": 4.286148535117466e-05,
"loss": 2.6649,
"step": 93500
},
{
"epoch": 0.43,
"learning_rate": 4.282329758958849e-05,
"loss": 2.6774,
"step": 94000
},
{
"epoch": 0.43,
"learning_rate": 4.278510982800232e-05,
"loss": 2.6544,
"step": 94500
},
{
"epoch": 0.44,
"learning_rate": 4.2746922066416154e-05,
"loss": 2.6761,
"step": 95000
},
{
"epoch": 0.44,
"learning_rate": 4.270881068035316e-05,
"loss": 2.6762,
"step": 95500
},
{
"epoch": 0.44,
"learning_rate": 4.267062291876699e-05,
"loss": 2.6762,
"step": 96000
},
{
"epoch": 0.44,
"learning_rate": 4.2632511532704e-05,
"loss": 2.6695,
"step": 96500
},
{
"epoch": 0.44,
"learning_rate": 4.259432377111783e-05,
"loss": 2.6717,
"step": 97000
},
{
"epoch": 0.45,
"learning_rate": 4.2556136009531664e-05,
"loss": 2.6925,
"step": 97500
},
{
"epoch": 0.45,
"learning_rate": 4.2517948247945496e-05,
"loss": 2.6566,
"step": 98000
},
{
"epoch": 0.45,
"learning_rate": 4.2479760486359335e-05,
"loss": 2.669,
"step": 98500
},
{
"epoch": 0.45,
"learning_rate": 4.244157272477317e-05,
"loss": 2.6793,
"step": 99000
},
{
"epoch": 0.46,
"learning_rate": 4.240346133871017e-05,
"loss": 2.6654,
"step": 99500
},
{
"epoch": 0.46,
"learning_rate": 4.2365273577124007e-05,
"loss": 2.665,
"step": 100000
},
{
"epoch": 0.46,
"learning_rate": 4.232708581553784e-05,
"loss": 2.6532,
"step": 100500
},
{
"epoch": 0.46,
"learning_rate": 4.228889805395167e-05,
"loss": 2.6686,
"step": 101000
},
{
"epoch": 0.47,
"learning_rate": 4.22507102923655e-05,
"loss": 2.6629,
"step": 101500
},
{
"epoch": 0.47,
"learning_rate": 4.2212522530779335e-05,
"loss": 2.6792,
"step": 102000
},
{
"epoch": 0.47,
"learning_rate": 4.217433476919317e-05,
"loss": 2.6797,
"step": 102500
},
{
"epoch": 0.47,
"learning_rate": 4.2136147007607e-05,
"loss": 2.6618,
"step": 103000
},
{
"epoch": 0.47,
"learning_rate": 4.2098035621544006e-05,
"loss": 2.6528,
"step": 103500
},
{
"epoch": 0.48,
"learning_rate": 4.205984785995784e-05,
"loss": 2.6497,
"step": 104000
},
{
"epoch": 0.48,
"learning_rate": 4.202166009837167e-05,
"loss": 2.6629,
"step": 104500
},
{
"epoch": 0.48,
"learning_rate": 4.198347233678551e-05,
"loss": 2.656,
"step": 105000
},
{
"epoch": 0.48,
"learning_rate": 4.194528457519934e-05,
"loss": 2.6646,
"step": 105500
},
{
"epoch": 0.49,
"learning_rate": 4.1907096813613174e-05,
"loss": 2.6467,
"step": 106000
},
{
"epoch": 0.49,
"learning_rate": 4.186890905202701e-05,
"loss": 2.6529,
"step": 106500
},
{
"epoch": 0.49,
"learning_rate": 4.183087404148718e-05,
"loss": 2.6593,
"step": 107000
},
{
"epoch": 0.49,
"learning_rate": 4.179268627990102e-05,
"loss": 2.6556,
"step": 107500
},
{
"epoch": 0.49,
"learning_rate": 4.175449851831485e-05,
"loss": 2.661,
"step": 108000
},
{
"epoch": 0.5,
"learning_rate": 4.171631075672869e-05,
"loss": 2.6502,
"step": 108500
},
{
"epoch": 0.5,
"learning_rate": 4.167812299514252e-05,
"loss": 2.66,
"step": 109000
},
{
"epoch": 0.5,
"learning_rate": 4.164001160907953e-05,
"loss": 2.6497,
"step": 109500
},
{
"epoch": 0.5,
"learning_rate": 4.160182384749336e-05,
"loss": 2.6573,
"step": 110000
},
{
"epoch": 0.51,
"learning_rate": 4.1563636085907194e-05,
"loss": 2.6646,
"step": 110500
},
{
"epoch": 0.51,
"learning_rate": 4.1525448324321026e-05,
"loss": 2.6692,
"step": 111000
},
{
"epoch": 0.51,
"learning_rate": 4.148726056273486e-05,
"loss": 2.6464,
"step": 111500
},
{
"epoch": 0.51,
"learning_rate": 4.144907280114869e-05,
"loss": 2.6656,
"step": 112000
},
{
"epoch": 0.52,
"learning_rate": 4.141088503956252e-05,
"loss": 2.6579,
"step": 112500
},
{
"epoch": 0.52,
"learning_rate": 4.137277365349953e-05,
"loss": 2.6474,
"step": 113000
},
{
"epoch": 0.52,
"learning_rate": 4.133458589191336e-05,
"loss": 2.6526,
"step": 113500
},
{
"epoch": 0.52,
"learning_rate": 4.1296398130327194e-05,
"loss": 2.6537,
"step": 114000
},
{
"epoch": 0.52,
"learning_rate": 4.1258210368741026e-05,
"loss": 2.6416,
"step": 114500
},
{
"epoch": 0.53,
"learning_rate": 4.1220022607154865e-05,
"loss": 2.6602,
"step": 115000
},
{
"epoch": 0.53,
"learning_rate": 4.11818348455687e-05,
"loss": 2.6473,
"step": 115500
},
{
"epoch": 0.53,
"learning_rate": 4.114364708398253e-05,
"loss": 2.666,
"step": 116000
},
{
"epoch": 0.53,
"learning_rate": 4.110545932239636e-05,
"loss": 2.6553,
"step": 116500
},
{
"epoch": 0.54,
"learning_rate": 4.106734793633337e-05,
"loss": 2.6489,
"step": 117000
},
{
"epoch": 0.54,
"learning_rate": 4.1029236550270375e-05,
"loss": 2.646,
"step": 117500
},
{
"epoch": 0.54,
"learning_rate": 4.0991125164207376e-05,
"loss": 2.6448,
"step": 118000
},
{
"epoch": 0.54,
"learning_rate": 4.095293740262121e-05,
"loss": 2.6547,
"step": 118500
},
{
"epoch": 0.55,
"learning_rate": 4.0914749641035047e-05,
"loss": 2.6467,
"step": 119000
},
{
"epoch": 0.55,
"learning_rate": 4.087656187944888e-05,
"loss": 2.6508,
"step": 119500
},
{
"epoch": 0.55,
"learning_rate": 4.083837411786271e-05,
"loss": 2.6422,
"step": 120000
},
{
"epoch": 0.55,
"learning_rate": 4.080018635627654e-05,
"loss": 2.6576,
"step": 120500
},
{
"epoch": 0.55,
"learning_rate": 4.0761998594690375e-05,
"loss": 2.662,
"step": 121000
},
{
"epoch": 0.56,
"learning_rate": 4.072381083310421e-05,
"loss": 2.6479,
"step": 121500
},
{
"epoch": 0.56,
"learning_rate": 4.068562307151804e-05,
"loss": 2.6385,
"step": 122000
},
{
"epoch": 0.56,
"learning_rate": 4.064743530993187e-05,
"loss": 2.6539,
"step": 122500
},
{
"epoch": 0.56,
"learning_rate": 4.060924754834571e-05,
"loss": 2.6507,
"step": 123000
},
{
"epoch": 0.57,
"learning_rate": 4.057113616228271e-05,
"loss": 2.6231,
"step": 123500
},
{
"epoch": 0.57,
"learning_rate": 4.053294840069655e-05,
"loss": 2.6401,
"step": 124000
},
{
"epoch": 0.57,
"learning_rate": 4.049476063911038e-05,
"loss": 2.6362,
"step": 124500
},
{
"epoch": 0.57,
"learning_rate": 4.0456572877524214e-05,
"loss": 2.6484,
"step": 125000
},
{
"epoch": 0.58,
"learning_rate": 4.0418385115938046e-05,
"loss": 2.6523,
"step": 125500
},
{
"epoch": 0.58,
"learning_rate": 4.038019735435188e-05,
"loss": 2.6453,
"step": 126000
},
{
"epoch": 0.58,
"learning_rate": 4.0342085968288885e-05,
"loss": 2.6572,
"step": 126500
},
{
"epoch": 0.58,
"learning_rate": 4.030389820670272e-05,
"loss": 2.6525,
"step": 127000
},
{
"epoch": 0.58,
"learning_rate": 4.026571044511655e-05,
"loss": 2.6412,
"step": 127500
},
{
"epoch": 0.59,
"learning_rate": 4.022752268353038e-05,
"loss": 2.6408,
"step": 128000
},
{
"epoch": 0.59,
"learning_rate": 4.018941129746739e-05,
"loss": 2.6422,
"step": 128500
},
{
"epoch": 0.59,
"learning_rate": 4.015122353588122e-05,
"loss": 2.6263,
"step": 129000
},
{
"epoch": 0.59,
"learning_rate": 4.011303577429505e-05,
"loss": 2.6372,
"step": 129500
},
{
"epoch": 0.6,
"learning_rate": 4.0074848012708885e-05,
"loss": 2.6506,
"step": 130000
},
{
"epoch": 0.6,
"learning_rate": 4.0036660251122724e-05,
"loss": 2.6432,
"step": 130500
},
{
"epoch": 0.6,
"learning_rate": 3.9998472489536556e-05,
"loss": 2.6233,
"step": 131000
},
{
"epoch": 0.6,
"learning_rate": 3.9960361103473556e-05,
"loss": 2.6392,
"step": 131500
},
{
"epoch": 0.6,
"learning_rate": 3.9922173341887395e-05,
"loss": 2.6375,
"step": 132000
},
{
"epoch": 0.61,
"learning_rate": 3.988398558030123e-05,
"loss": 2.6172,
"step": 132500
},
{
"epoch": 0.61,
"learning_rate": 3.984579781871506e-05,
"loss": 2.6359,
"step": 133000
},
{
"epoch": 0.61,
"learning_rate": 3.980761005712889e-05,
"loss": 2.6329,
"step": 133500
},
{
"epoch": 0.61,
"learning_rate": 3.9769422295542724e-05,
"loss": 2.6304,
"step": 134000
},
{
"epoch": 0.62,
"learning_rate": 3.9731234533956556e-05,
"loss": 2.6381,
"step": 134500
},
{
"epoch": 0.62,
"learning_rate": 3.969304677237039e-05,
"loss": 2.6382,
"step": 135000
},
{
"epoch": 0.62,
"learning_rate": 3.965485901078422e-05,
"loss": 2.6273,
"step": 135500
},
{
"epoch": 0.62,
"learning_rate": 3.961674762472123e-05,
"loss": 2.6309,
"step": 136000
},
{
"epoch": 0.63,
"learning_rate": 3.9578636238658234e-05,
"loss": 2.6375,
"step": 136500
},
{
"epoch": 0.63,
"learning_rate": 3.954052485259524e-05,
"loss": 2.6384,
"step": 137000
},
{
"epoch": 0.63,
"learning_rate": 3.950233709100907e-05,
"loss": 2.6379,
"step": 137500
},
{
"epoch": 0.63,
"learning_rate": 3.9464149329422905e-05,
"loss": 2.6143,
"step": 138000
},
{
"epoch": 0.63,
"learning_rate": 3.942596156783674e-05,
"loss": 2.6299,
"step": 138500
},
{
"epoch": 0.64,
"learning_rate": 3.9387773806250576e-05,
"loss": 2.6331,
"step": 139000
},
{
"epoch": 0.64,
"learning_rate": 3.934958604466441e-05,
"loss": 2.6242,
"step": 139500
},
{
"epoch": 0.64,
"learning_rate": 3.931139828307824e-05,
"loss": 2.6214,
"step": 140000
},
{
"epoch": 0.64,
"learning_rate": 3.927321052149208e-05,
"loss": 2.6365,
"step": 140500
},
{
"epoch": 0.65,
"learning_rate": 3.923502275990591e-05,
"loss": 2.6411,
"step": 141000
},
{
"epoch": 0.65,
"learning_rate": 3.9196834998319744e-05,
"loss": 2.6103,
"step": 141500
},
{
"epoch": 0.65,
"learning_rate": 3.9158647236733576e-05,
"loss": 2.6253,
"step": 142000
},
{
"epoch": 0.65,
"learning_rate": 3.912053585067058e-05,
"loss": 2.634,
"step": 142500
},
{
"epoch": 0.66,
"learning_rate": 3.9082348089084415e-05,
"loss": 2.6405,
"step": 143000
},
{
"epoch": 0.66,
"learning_rate": 3.904416032749825e-05,
"loss": 2.6187,
"step": 143500
},
{
"epoch": 0.66,
"learning_rate": 3.900597256591208e-05,
"loss": 2.6322,
"step": 144000
},
{
"epoch": 0.66,
"learning_rate": 3.896778480432591e-05,
"loss": 2.6313,
"step": 144500
},
{
"epoch": 0.66,
"learning_rate": 3.8929597042739744e-05,
"loss": 2.6342,
"step": 145000
},
{
"epoch": 0.67,
"learning_rate": 3.889148565667675e-05,
"loss": 2.6221,
"step": 145500
},
{
"epoch": 0.67,
"learning_rate": 3.885329789509058e-05,
"loss": 2.6278,
"step": 146000
},
{
"epoch": 0.67,
"learning_rate": 3.8815110133504415e-05,
"loss": 2.6224,
"step": 146500
},
{
"epoch": 0.67,
"learning_rate": 3.8776922371918254e-05,
"loss": 2.6191,
"step": 147000
},
{
"epoch": 0.68,
"learning_rate": 3.8738734610332086e-05,
"loss": 2.6043,
"step": 147500
},
{
"epoch": 0.68,
"learning_rate": 3.8700623224269086e-05,
"loss": 2.6286,
"step": 148000
},
{
"epoch": 0.68,
"learning_rate": 3.8662435462682925e-05,
"loss": 2.6093,
"step": 148500
},
{
"epoch": 0.68,
"learning_rate": 3.862424770109676e-05,
"loss": 2.6345,
"step": 149000
},
{
"epoch": 0.69,
"learning_rate": 3.858605993951059e-05,
"loss": 2.6133,
"step": 149500
},
{
"epoch": 0.69,
"learning_rate": 3.854787217792442e-05,
"loss": 2.6221,
"step": 150000
},
{
"epoch": 0.69,
"learning_rate": 3.8509684416338253e-05,
"loss": 2.6208,
"step": 150500
},
{
"epoch": 0.69,
"learning_rate": 3.847157303027526e-05,
"loss": 2.6144,
"step": 151000
},
{
"epoch": 0.69,
"learning_rate": 3.843338526868909e-05,
"loss": 2.6165,
"step": 151500
},
{
"epoch": 0.7,
"learning_rate": 3.8395197507102925e-05,
"loss": 2.6218,
"step": 152000
},
{
"epoch": 0.7,
"learning_rate": 3.835700974551676e-05,
"loss": 2.6376,
"step": 152500
},
{
"epoch": 0.7,
"learning_rate": 3.831882198393059e-05,
"loss": 2.637,
"step": 153000
},
{
"epoch": 0.7,
"learning_rate": 3.828063422234443e-05,
"loss": 2.6056,
"step": 153500
},
{
"epoch": 0.71,
"learning_rate": 3.824244646075826e-05,
"loss": 2.6012,
"step": 154000
},
{
"epoch": 0.71,
"learning_rate": 3.820425869917209e-05,
"loss": 2.6156,
"step": 154500
},
{
"epoch": 0.71,
"learning_rate": 3.8166070937585924e-05,
"loss": 2.6286,
"step": 155000
},
{
"epoch": 0.71,
"learning_rate": 3.8127883175999756e-05,
"loss": 2.6332,
"step": 155500
},
{
"epoch": 0.71,
"learning_rate": 3.808977178993676e-05,
"loss": 2.6177,
"step": 156000
},
{
"epoch": 0.72,
"learning_rate": 3.8051584028350596e-05,
"loss": 2.613,
"step": 156500
},
{
"epoch": 0.72,
"learning_rate": 3.801339626676443e-05,
"loss": 2.6077,
"step": 157000
},
{
"epoch": 0.72,
"learning_rate": 3.797520850517826e-05,
"loss": 2.6197,
"step": 157500
},
{
"epoch": 0.72,
"learning_rate": 3.793709711911527e-05,
"loss": 2.6205,
"step": 158000
},
{
"epoch": 0.73,
"learning_rate": 3.78989093575291e-05,
"loss": 2.6046,
"step": 158500
},
{
"epoch": 0.73,
"learning_rate": 3.786072159594293e-05,
"loss": 2.6028,
"step": 159000
},
{
"epoch": 0.73,
"learning_rate": 3.782253383435676e-05,
"loss": 2.6369,
"step": 159500
},
{
"epoch": 0.73,
"learning_rate": 3.778442244829377e-05,
"loss": 2.6246,
"step": 160000
},
{
"epoch": 0.74,
"learning_rate": 3.774631106223078e-05,
"loss": 2.6113,
"step": 160500
},
{
"epoch": 0.74,
"learning_rate": 3.770812330064461e-05,
"loss": 2.613,
"step": 161000
},
{
"epoch": 0.74,
"learning_rate": 3.766993553905844e-05,
"loss": 2.6249,
"step": 161500
},
{
"epoch": 0.74,
"learning_rate": 3.7631747777472273e-05,
"loss": 2.6026,
"step": 162000
},
{
"epoch": 0.74,
"learning_rate": 3.759356001588611e-05,
"loss": 2.6113,
"step": 162500
},
{
"epoch": 0.75,
"learning_rate": 3.7555372254299944e-05,
"loss": 2.6041,
"step": 163000
},
{
"epoch": 0.75,
"learning_rate": 3.751718449271378e-05,
"loss": 2.6257,
"step": 163500
},
{
"epoch": 0.75,
"learning_rate": 3.747899673112761e-05,
"loss": 2.6065,
"step": 164000
},
{
"epoch": 0.75,
"learning_rate": 3.744080896954144e-05,
"loss": 2.6112,
"step": 164500
},
{
"epoch": 0.76,
"learning_rate": 3.740269758347845e-05,
"loss": 2.6151,
"step": 165000
},
{
"epoch": 0.76,
"learning_rate": 3.736450982189228e-05,
"loss": 2.6017,
"step": 165500
},
{
"epoch": 0.76,
"learning_rate": 3.732632206030611e-05,
"loss": 2.6126,
"step": 166000
},
{
"epoch": 0.76,
"learning_rate": 3.7288134298719944e-05,
"loss": 2.6254,
"step": 166500
},
{
"epoch": 0.77,
"learning_rate": 3.725002291265695e-05,
"loss": 2.6014,
"step": 167000
},
{
"epoch": 0.77,
"learning_rate": 3.721183515107078e-05,
"loss": 2.6046,
"step": 167500
},
{
"epoch": 0.77,
"learning_rate": 3.7173647389484615e-05,
"loss": 2.6185,
"step": 168000
},
{
"epoch": 0.77,
"learning_rate": 3.713545962789845e-05,
"loss": 2.601,
"step": 168500
},
{
"epoch": 0.77,
"learning_rate": 3.7097271866312287e-05,
"loss": 2.6168,
"step": 169000
},
{
"epoch": 0.78,
"learning_rate": 3.7059160480249293e-05,
"loss": 2.6054,
"step": 169500
},
{
"epoch": 0.78,
"learning_rate": 3.7020972718663126e-05,
"loss": 2.6124,
"step": 170000
},
{
"epoch": 0.78,
"learning_rate": 3.698278495707696e-05,
"loss": 2.6031,
"step": 170500
},
{
"epoch": 0.78,
"learning_rate": 3.694459719549079e-05,
"loss": 2.6041,
"step": 171000
},
{
"epoch": 0.79,
"learning_rate": 3.690640943390463e-05,
"loss": 2.6057,
"step": 171500
},
{
"epoch": 0.79,
"learning_rate": 3.686822167231846e-05,
"loss": 2.5905,
"step": 172000
},
{
"epoch": 0.79,
"learning_rate": 3.683003391073229e-05,
"loss": 2.601,
"step": 172500
},
{
"epoch": 0.79,
"learning_rate": 3.6791846149146125e-05,
"loss": 2.6032,
"step": 173000
},
{
"epoch": 0.8,
"learning_rate": 3.675365838755996e-05,
"loss": 2.6097,
"step": 173500
},
{
"epoch": 0.8,
"learning_rate": 3.671547062597379e-05,
"loss": 2.6002,
"step": 174000
},
{
"epoch": 0.8,
"learning_rate": 3.6677359239910797e-05,
"loss": 2.6165,
"step": 174500
},
{
"epoch": 0.8,
"learning_rate": 3.663917147832463e-05,
"loss": 2.6062,
"step": 175000
},
{
"epoch": 0.8,
"learning_rate": 3.660098371673846e-05,
"loss": 2.5957,
"step": 175500
},
{
"epoch": 0.81,
"learning_rate": 3.656279595515229e-05,
"loss": 2.6121,
"step": 176000
},
{
"epoch": 0.81,
"learning_rate": 3.65246845690893e-05,
"loss": 2.5918,
"step": 176500
},
{
"epoch": 0.81,
"learning_rate": 3.648649680750313e-05,
"loss": 2.6204,
"step": 177000
},
{
"epoch": 0.81,
"learning_rate": 3.6448309045916964e-05,
"loss": 2.6062,
"step": 177500
},
{
"epoch": 0.82,
"learning_rate": 3.64101212843308e-05,
"loss": 2.5986,
"step": 178000
},
{
"epoch": 0.82,
"learning_rate": 3.63720098982678e-05,
"loss": 2.605,
"step": 178500
},
{
"epoch": 0.82,
"learning_rate": 3.633382213668164e-05,
"loss": 2.5993,
"step": 179000
},
{
"epoch": 0.82,
"learning_rate": 3.6295634375095474e-05,
"loss": 2.606,
"step": 179500
},
{
"epoch": 0.82,
"learning_rate": 3.6257446613509306e-05,
"loss": 2.5927,
"step": 180000
},
{
"epoch": 0.83,
"learning_rate": 3.621933522744631e-05,
"loss": 2.6058,
"step": 180500
},
{
"epoch": 0.83,
"learning_rate": 3.6181147465860145e-05,
"loss": 2.6094,
"step": 181000
},
{
"epoch": 0.83,
"learning_rate": 3.614295970427398e-05,
"loss": 2.5979,
"step": 181500
},
{
"epoch": 0.83,
"learning_rate": 3.610477194268781e-05,
"loss": 2.5967,
"step": 182000
},
{
"epoch": 0.84,
"learning_rate": 3.606666055662482e-05,
"loss": 2.6083,
"step": 182500
},
{
"epoch": 0.84,
"learning_rate": 3.602847279503865e-05,
"loss": 2.6111,
"step": 183000
},
{
"epoch": 0.84,
"learning_rate": 3.599028503345248e-05,
"loss": 2.6049,
"step": 183500
},
{
"epoch": 0.84,
"learning_rate": 3.595209727186631e-05,
"loss": 2.6157,
"step": 184000
},
{
"epoch": 0.85,
"learning_rate": 3.5913909510280145e-05,
"loss": 2.6048,
"step": 184500
},
{
"epoch": 0.85,
"learning_rate": 3.587579812421715e-05,
"loss": 2.6068,
"step": 185000
},
{
"epoch": 0.85,
"learning_rate": 3.5837610362630984e-05,
"loss": 2.5958,
"step": 185500
},
{
"epoch": 0.85,
"learning_rate": 3.5799422601044816e-05,
"loss": 2.5922,
"step": 186000
},
{
"epoch": 0.85,
"learning_rate": 3.576123483945865e-05,
"loss": 2.5969,
"step": 186500
},
{
"epoch": 0.86,
"learning_rate": 3.572319982891883e-05,
"loss": 2.6064,
"step": 187000
},
{
"epoch": 0.86,
"learning_rate": 3.568501206733266e-05,
"loss": 2.5968,
"step": 187500
},
{
"epoch": 0.86,
"learning_rate": 3.5646824305746495e-05,
"loss": 2.5958,
"step": 188000
},
{
"epoch": 0.86,
"learning_rate": 3.560863654416033e-05,
"loss": 2.5861,
"step": 188500
},
{
"epoch": 0.87,
"learning_rate": 3.557044878257416e-05,
"loss": 2.5702,
"step": 189000
},
{
"epoch": 0.87,
"learning_rate": 3.5532261020988e-05,
"loss": 2.5778,
"step": 189500
},
{
"epoch": 0.87,
"learning_rate": 3.549407325940183e-05,
"loss": 2.5904,
"step": 190000
},
{
"epoch": 0.87,
"learning_rate": 3.545596187333883e-05,
"loss": 2.5935,
"step": 190500
},
{
"epoch": 0.88,
"learning_rate": 3.541777411175267e-05,
"loss": 2.6034,
"step": 191000
},
{
"epoch": 0.88,
"learning_rate": 3.53795863501665e-05,
"loss": 2.5932,
"step": 191500
},
{
"epoch": 0.88,
"learning_rate": 3.534139858858033e-05,
"loss": 2.5814,
"step": 192000
},
{
"epoch": 0.88,
"learning_rate": 3.5303210826994165e-05,
"loss": 2.5764,
"step": 192500
},
{
"epoch": 0.88,
"learning_rate": 3.5265023065408e-05,
"loss": 2.5804,
"step": 193000
},
{
"epoch": 0.89,
"learning_rate": 3.522683530382183e-05,
"loss": 2.6004,
"step": 193500
},
{
"epoch": 0.89,
"learning_rate": 3.518864754223566e-05,
"loss": 2.5761,
"step": 194000
},
{
"epoch": 0.89,
"learning_rate": 3.5150459780649494e-05,
"loss": 2.6058,
"step": 194500
},
{
"epoch": 0.89,
"learning_rate": 3.51123483945865e-05,
"loss": 2.6052,
"step": 195000
},
{
"epoch": 0.9,
"learning_rate": 3.507423700852351e-05,
"loss": 2.5899,
"step": 195500
},
{
"epoch": 0.9,
"learning_rate": 3.503604924693734e-05,
"loss": 2.5873,
"step": 196000
},
{
"epoch": 0.9,
"learning_rate": 3.499786148535117e-05,
"loss": 2.5917,
"step": 196500
},
{
"epoch": 0.9,
"learning_rate": 3.4959673723765004e-05,
"loss": 2.5979,
"step": 197000
},
{
"epoch": 0.91,
"learning_rate": 3.492148596217884e-05,
"loss": 2.5871,
"step": 197500
},
{
"epoch": 0.91,
"learning_rate": 3.4883298200592675e-05,
"loss": 2.5734,
"step": 198000
},
{
"epoch": 0.91,
"learning_rate": 3.484511043900651e-05,
"loss": 2.5835,
"step": 198500
},
{
"epoch": 0.91,
"learning_rate": 3.480692267742034e-05,
"loss": 2.5906,
"step": 199000
},
{
"epoch": 0.91,
"learning_rate": 3.4768811291357347e-05,
"loss": 2.5929,
"step": 199500
},
{
"epoch": 0.92,
"learning_rate": 3.473062352977118e-05,
"loss": 2.5853,
"step": 200000
},
{
"epoch": 0.92,
"learning_rate": 3.469243576818501e-05,
"loss": 2.5786,
"step": 200500
},
{
"epoch": 0.92,
"learning_rate": 3.465424800659885e-05,
"loss": 2.5977,
"step": 201000
},
{
"epoch": 0.92,
"learning_rate": 3.461606024501268e-05,
"loss": 2.5808,
"step": 201500
},
{
"epoch": 0.93,
"learning_rate": 3.4577872483426514e-05,
"loss": 2.5888,
"step": 202000
},
{
"epoch": 0.93,
"learning_rate": 3.453976109736352e-05,
"loss": 2.5883,
"step": 202500
},
{
"epoch": 0.93,
"learning_rate": 3.450157333577735e-05,
"loss": 2.5927,
"step": 203000
},
{
"epoch": 0.93,
"learning_rate": 3.4463385574191185e-05,
"loss": 2.5882,
"step": 203500
},
{
"epoch": 0.93,
"learning_rate": 3.442519781260502e-05,
"loss": 2.5923,
"step": 204000
},
{
"epoch": 0.94,
"learning_rate": 3.4387010051018856e-05,
"loss": 2.5892,
"step": 204500
},
{
"epoch": 0.94,
"learning_rate": 3.434882228943269e-05,
"loss": 2.5886,
"step": 205000
},
{
"epoch": 0.94,
"learning_rate": 3.431063452784652e-05,
"loss": 2.5904,
"step": 205500
},
{
"epoch": 0.94,
"learning_rate": 3.427244676626035e-05,
"loss": 2.5851,
"step": 206000
},
{
"epoch": 0.95,
"learning_rate": 3.423433538019736e-05,
"loss": 2.5894,
"step": 206500
},
{
"epoch": 0.95,
"learning_rate": 3.419614761861119e-05,
"loss": 2.5733,
"step": 207000
},
{
"epoch": 0.95,
"learning_rate": 3.4157959857025024e-05,
"loss": 2.594,
"step": 207500
},
{
"epoch": 0.95,
"learning_rate": 3.4119772095438856e-05,
"loss": 2.5767,
"step": 208000
},
{
"epoch": 0.96,
"learning_rate": 3.408166070937586e-05,
"loss": 2.5857,
"step": 208500
},
{
"epoch": 0.96,
"learning_rate": 3.4043472947789695e-05,
"loss": 2.5687,
"step": 209000
},
{
"epoch": 0.96,
"learning_rate": 3.400528518620353e-05,
"loss": 2.5883,
"step": 209500
},
{
"epoch": 0.96,
"learning_rate": 3.396709742461736e-05,
"loss": 2.584,
"step": 210000
},
{
"epoch": 0.96,
"learning_rate": 3.392890966303119e-05,
"loss": 2.5787,
"step": 210500
},
{
"epoch": 0.97,
"learning_rate": 3.38907982769682e-05,
"loss": 2.5826,
"step": 211000
},
{
"epoch": 0.97,
"learning_rate": 3.385261051538203e-05,
"loss": 2.5828,
"step": 211500
},
{
"epoch": 0.97,
"learning_rate": 3.381442275379586e-05,
"loss": 2.5865,
"step": 212000
},
{
"epoch": 0.97,
"learning_rate": 3.37762349922097e-05,
"loss": 2.5867,
"step": 212500
},
{
"epoch": 0.98,
"learning_rate": 3.3738047230623534e-05,
"loss": 2.5799,
"step": 213000
},
{
"epoch": 0.98,
"learning_rate": 3.3699935844560534e-05,
"loss": 2.5761,
"step": 213500
},
{
"epoch": 0.98,
"learning_rate": 3.366174808297437e-05,
"loss": 2.5771,
"step": 214000
},
{
"epoch": 0.98,
"learning_rate": 3.362363669691137e-05,
"loss": 2.5876,
"step": 214500
},
{
"epoch": 0.99,
"learning_rate": 3.358544893532521e-05,
"loss": 2.574,
"step": 215000
},
{
"epoch": 0.99,
"learning_rate": 3.3547261173739044e-05,
"loss": 2.5878,
"step": 215500
},
{
"epoch": 0.99,
"learning_rate": 3.3509073412152876e-05,
"loss": 2.5874,
"step": 216000
},
{
"epoch": 0.99,
"learning_rate": 3.347088565056671e-05,
"loss": 2.5752,
"step": 216500
},
{
"epoch": 0.99,
"learning_rate": 3.343269788898054e-05,
"loss": 2.5783,
"step": 217000
},
{
"epoch": 1.0,
"learning_rate": 3.339451012739437e-05,
"loss": 2.5884,
"step": 217500
},
{
"epoch": 1.0,
"learning_rate": 3.3356322365808205e-05,
"loss": 2.5687,
"step": 218000
},
{
"epoch": 1.0,
"learning_rate": 3.331821097974521e-05,
"loss": 2.5582,
"step": 218500
},
{
"epoch": 1.0,
"learning_rate": 3.3280023218159044e-05,
"loss": 2.5573,
"step": 219000
},
{
"epoch": 1.01,
"learning_rate": 3.3241835456572876e-05,
"loss": 2.551,
"step": 219500
},
{
"epoch": 1.01,
"learning_rate": 3.320364769498671e-05,
"loss": 2.5366,
"step": 220000
},
{
"epoch": 1.01,
"learning_rate": 3.316545993340055e-05,
"loss": 2.5569,
"step": 220500
},
{
"epoch": 1.01,
"learning_rate": 3.312742492286072e-05,
"loss": 2.5285,
"step": 221000
},
{
"epoch": 1.02,
"learning_rate": 3.3089237161274554e-05,
"loss": 2.5448,
"step": 221500
},
{
"epoch": 1.02,
"learning_rate": 3.3051049399688386e-05,
"loss": 2.5567,
"step": 222000
},
{
"epoch": 1.02,
"learning_rate": 3.301286163810222e-05,
"loss": 2.5396,
"step": 222500
},
{
"epoch": 1.02,
"learning_rate": 3.297467387651606e-05,
"loss": 2.5534,
"step": 223000
},
{
"epoch": 1.02,
"learning_rate": 3.293648611492989e-05,
"loss": 2.5584,
"step": 223500
},
{
"epoch": 1.03,
"learning_rate": 3.289829835334372e-05,
"loss": 2.5447,
"step": 224000
},
{
"epoch": 1.03,
"learning_rate": 3.286018696728073e-05,
"loss": 2.5504,
"step": 224500
},
{
"epoch": 1.03,
"learning_rate": 3.282199920569456e-05,
"loss": 2.562,
"step": 225000
},
{
"epoch": 1.03,
"learning_rate": 3.278381144410839e-05,
"loss": 2.5367,
"step": 225500
},
{
"epoch": 1.04,
"learning_rate": 3.2745623682522225e-05,
"loss": 2.548,
"step": 226000
},
{
"epoch": 1.04,
"learning_rate": 3.270743592093606e-05,
"loss": 2.5425,
"step": 226500
},
{
"epoch": 1.04,
"learning_rate": 3.266924815934989e-05,
"loss": 2.5467,
"step": 227000
},
{
"epoch": 1.04,
"learning_rate": 3.263106039776372e-05,
"loss": 2.5352,
"step": 227500
},
{
"epoch": 1.04,
"learning_rate": 3.259287263617756e-05,
"loss": 2.5381,
"step": 228000
},
{
"epoch": 1.05,
"learning_rate": 3.255476125011456e-05,
"loss": 2.5468,
"step": 228500
},
{
"epoch": 1.05,
"learning_rate": 3.251657348852839e-05,
"loss": 2.523,
"step": 229000
},
{
"epoch": 1.05,
"learning_rate": 3.247838572694223e-05,
"loss": 2.5617,
"step": 229500
},
{
"epoch": 1.05,
"learning_rate": 3.244027434087923e-05,
"loss": 2.5385,
"step": 230000
},
{
"epoch": 1.06,
"learning_rate": 3.2402086579293064e-05,
"loss": 2.5488,
"step": 230500
},
{
"epoch": 1.06,
"learning_rate": 3.23638988177069e-05,
"loss": 2.5556,
"step": 231000
},
{
"epoch": 1.06,
"learning_rate": 3.2325711056120735e-05,
"loss": 2.545,
"step": 231500
},
{
"epoch": 1.06,
"learning_rate": 3.2287523294534574e-05,
"loss": 2.5458,
"step": 232000
},
{
"epoch": 1.07,
"learning_rate": 3.2249335532948406e-05,
"loss": 2.5588,
"step": 232500
},
{
"epoch": 1.07,
"learning_rate": 3.221114777136224e-05,
"loss": 2.5626,
"step": 233000
},
{
"epoch": 1.07,
"learning_rate": 3.217296000977607e-05,
"loss": 2.5346,
"step": 233500
},
{
"epoch": 1.07,
"learning_rate": 3.21347722481899e-05,
"loss": 2.5441,
"step": 234000
},
{
"epoch": 1.07,
"learning_rate": 3.209666086212691e-05,
"loss": 2.5406,
"step": 234500
},
{
"epoch": 1.08,
"learning_rate": 3.205847310054074e-05,
"loss": 2.5426,
"step": 235000
},
{
"epoch": 1.08,
"learning_rate": 3.202036171447775e-05,
"loss": 2.5486,
"step": 235500
},
{
"epoch": 1.08,
"learning_rate": 3.198217395289158e-05,
"loss": 2.5389,
"step": 236000
},
{
"epoch": 1.08,
"learning_rate": 3.194398619130541e-05,
"loss": 2.5307,
"step": 236500
},
{
"epoch": 1.09,
"learning_rate": 3.1905798429719245e-05,
"loss": 2.5546,
"step": 237000
},
{
"epoch": 1.09,
"learning_rate": 3.186761066813308e-05,
"loss": 2.5433,
"step": 237500
},
{
"epoch": 1.09,
"learning_rate": 3.1829422906546916e-05,
"loss": 2.5518,
"step": 238000
},
{
"epoch": 1.09,
"learning_rate": 3.179123514496075e-05,
"loss": 2.5476,
"step": 238500
},
{
"epoch": 1.1,
"learning_rate": 3.175304738337458e-05,
"loss": 2.5427,
"step": 239000
},
{
"epoch": 1.1,
"learning_rate": 3.171485962178841e-05,
"loss": 2.5359,
"step": 239500
},
{
"epoch": 1.1,
"learning_rate": 3.167674823572542e-05,
"loss": 2.5404,
"step": 240000
},
{
"epoch": 1.1,
"learning_rate": 3.163856047413925e-05,
"loss": 2.537,
"step": 240500
},
{
"epoch": 1.1,
"learning_rate": 3.1600372712553084e-05,
"loss": 2.5354,
"step": 241000
},
{
"epoch": 1.11,
"learning_rate": 3.1562184950966916e-05,
"loss": 2.5492,
"step": 241500
},
{
"epoch": 1.11,
"learning_rate": 3.152399718938075e-05,
"loss": 2.547,
"step": 242000
},
{
"epoch": 1.11,
"learning_rate": 3.148580942779458e-05,
"loss": 2.5364,
"step": 242500
},
{
"epoch": 1.11,
"learning_rate": 3.144769804173159e-05,
"loss": 2.566,
"step": 243000
},
{
"epoch": 1.12,
"learning_rate": 3.140951028014542e-05,
"loss": 2.5474,
"step": 243500
},
{
"epoch": 1.12,
"learning_rate": 3.137132251855925e-05,
"loss": 2.5425,
"step": 244000
},
{
"epoch": 1.12,
"learning_rate": 3.133313475697309e-05,
"loss": 2.5494,
"step": 244500
},
{
"epoch": 1.12,
"learning_rate": 3.129494699538692e-05,
"loss": 2.5373,
"step": 245000
},
{
"epoch": 1.13,
"learning_rate": 3.125683560932392e-05,
"loss": 2.5279,
"step": 245500
},
{
"epoch": 1.13,
"learning_rate": 3.121864784773776e-05,
"loss": 2.5559,
"step": 246000
},
{
"epoch": 1.13,
"learning_rate": 3.118053646167476e-05,
"loss": 2.5413,
"step": 246500
},
{
"epoch": 1.13,
"learning_rate": 3.11423487000886e-05,
"loss": 2.5333,
"step": 247000
},
{
"epoch": 1.13,
"learning_rate": 3.110416093850243e-05,
"loss": 2.541,
"step": 247500
},
{
"epoch": 1.14,
"learning_rate": 3.1065973176916265e-05,
"loss": 2.5568,
"step": 248000
},
{
"epoch": 1.14,
"learning_rate": 3.10277854153301e-05,
"loss": 2.5394,
"step": 248500
},
{
"epoch": 1.14,
"learning_rate": 3.098959765374393e-05,
"loss": 2.5354,
"step": 249000
},
{
"epoch": 1.14,
"learning_rate": 3.095140989215776e-05,
"loss": 2.5219,
"step": 249500
},
{
"epoch": 1.15,
"learning_rate": 3.091329850609477e-05,
"loss": 2.5332,
"step": 250000
},
{
"epoch": 1.15,
"learning_rate": 3.08751107445086e-05,
"loss": 2.5265,
"step": 250500
},
{
"epoch": 1.15,
"learning_rate": 3.083692298292243e-05,
"loss": 2.5466,
"step": 251000
},
{
"epoch": 1.15,
"learning_rate": 3.0798735221336265e-05,
"loss": 2.5518,
"step": 251500
},
{
"epoch": 1.15,
"learning_rate": 3.07605474597501e-05,
"loss": 2.5508,
"step": 252000
},
{
"epoch": 1.16,
"learning_rate": 3.0722359698163936e-05,
"loss": 2.5337,
"step": 252500
},
{
"epoch": 1.16,
"learning_rate": 3.068417193657777e-05,
"loss": 2.539,
"step": 253000
},
{
"epoch": 1.16,
"learning_rate": 3.06459841749916e-05,
"loss": 2.543,
"step": 253500
},
{
"epoch": 1.16,
"learning_rate": 3.060779641340543e-05,
"loss": 2.5212,
"step": 254000
},
{
"epoch": 1.17,
"learning_rate": 3.056968502734244e-05,
"loss": 2.5283,
"step": 254500
},
{
"epoch": 1.17,
"learning_rate": 3.053149726575627e-05,
"loss": 2.5406,
"step": 255000
},
{
"epoch": 1.17,
"learning_rate": 3.0493385879693275e-05,
"loss": 2.5387,
"step": 255500
},
{
"epoch": 1.17,
"learning_rate": 3.045519811810711e-05,
"loss": 2.5409,
"step": 256000
},
{
"epoch": 1.18,
"learning_rate": 3.0417010356520942e-05,
"loss": 2.5463,
"step": 256500
},
{
"epoch": 1.18,
"learning_rate": 3.0378822594934775e-05,
"loss": 2.5373,
"step": 257000
},
{
"epoch": 1.18,
"learning_rate": 3.0340634833348607e-05,
"loss": 2.5277,
"step": 257500
},
{
"epoch": 1.18,
"learning_rate": 3.0302447071762442e-05,
"loss": 2.5286,
"step": 258000
},
{
"epoch": 1.18,
"learning_rate": 3.0264259310176274e-05,
"loss": 2.5371,
"step": 258500
},
{
"epoch": 1.19,
"learning_rate": 3.0226071548590107e-05,
"loss": 2.5496,
"step": 259000
},
{
"epoch": 1.19,
"learning_rate": 3.018788378700394e-05,
"loss": 2.5281,
"step": 259500
},
{
"epoch": 1.19,
"learning_rate": 3.0149696025417774e-05,
"loss": 2.5319,
"step": 260000
},
{
"epoch": 1.19,
"learning_rate": 3.0111584639354778e-05,
"loss": 2.5421,
"step": 260500
},
{
"epoch": 1.2,
"learning_rate": 3.007339687776861e-05,
"loss": 2.5277,
"step": 261000
},
{
"epoch": 1.2,
"learning_rate": 3.003520911618245e-05,
"loss": 2.5327,
"step": 261500
},
{
"epoch": 1.2,
"learning_rate": 2.9997021354596284e-05,
"loss": 2.5222,
"step": 262000
},
{
"epoch": 1.2,
"learning_rate": 2.9958909968533288e-05,
"loss": 2.5413,
"step": 262500
},
{
"epoch": 1.21,
"learning_rate": 2.992072220694712e-05,
"loss": 2.5395,
"step": 263000
},
{
"epoch": 1.21,
"learning_rate": 2.9882610820884127e-05,
"loss": 2.5356,
"step": 263500
},
{
"epoch": 1.21,
"learning_rate": 2.984442305929796e-05,
"loss": 2.5391,
"step": 264000
},
{
"epoch": 1.21,
"learning_rate": 2.9806235297711795e-05,
"loss": 2.5429,
"step": 264500
},
{
"epoch": 1.21,
"learning_rate": 2.9768047536125627e-05,
"loss": 2.5278,
"step": 265000
},
{
"epoch": 1.22,
"learning_rate": 2.972985977453946e-05,
"loss": 2.5308,
"step": 265500
},
{
"epoch": 1.22,
"learning_rate": 2.969167201295329e-05,
"loss": 2.544,
"step": 266000
},
{
"epoch": 1.22,
"learning_rate": 2.9653484251367127e-05,
"loss": 2.5335,
"step": 266500
},
{
"epoch": 1.22,
"learning_rate": 2.961529648978096e-05,
"loss": 2.5356,
"step": 267000
},
{
"epoch": 1.23,
"learning_rate": 2.9577185103717962e-05,
"loss": 2.5371,
"step": 267500
},
{
"epoch": 1.23,
"learning_rate": 2.9538997342131798e-05,
"loss": 2.5302,
"step": 268000
},
{
"epoch": 1.23,
"learning_rate": 2.950080958054563e-05,
"loss": 2.5275,
"step": 268500
},
{
"epoch": 1.23,
"learning_rate": 2.9462621818959462e-05,
"loss": 2.5434,
"step": 269000
},
{
"epoch": 1.23,
"learning_rate": 2.942451043289647e-05,
"loss": 2.5332,
"step": 269500
},
{
"epoch": 1.24,
"learning_rate": 2.93863226713103e-05,
"loss": 2.5238,
"step": 270000
},
{
"epoch": 1.24,
"learning_rate": 2.9348134909724133e-05,
"loss": 2.5383,
"step": 270500
},
{
"epoch": 1.24,
"learning_rate": 2.930994714813797e-05,
"loss": 2.5484,
"step": 271000
},
{
"epoch": 1.24,
"learning_rate": 2.92717593865518e-05,
"loss": 2.532,
"step": 271500
},
{
"epoch": 1.25,
"learning_rate": 2.9233571624965633e-05,
"loss": 2.5318,
"step": 272000
},
{
"epoch": 1.25,
"learning_rate": 2.9195383863379465e-05,
"loss": 2.5163,
"step": 272500
},
{
"epoch": 1.25,
"learning_rate": 2.91571961017933e-05,
"loss": 2.5415,
"step": 273000
},
{
"epoch": 1.25,
"learning_rate": 2.9119161091253476e-05,
"loss": 2.5271,
"step": 273500
},
{
"epoch": 1.26,
"learning_rate": 2.908097332966731e-05,
"loss": 2.5285,
"step": 274000
},
{
"epoch": 1.26,
"learning_rate": 2.9042785568081143e-05,
"loss": 2.5256,
"step": 274500
},
{
"epoch": 1.26,
"learning_rate": 2.9004597806494976e-05,
"loss": 2.5373,
"step": 275000
},
{
"epoch": 1.26,
"learning_rate": 2.896641004490881e-05,
"loss": 2.5252,
"step": 275500
},
{
"epoch": 1.26,
"learning_rate": 2.8928222283322643e-05,
"loss": 2.5303,
"step": 276000
},
{
"epoch": 1.27,
"learning_rate": 2.8890110897259647e-05,
"loss": 2.5321,
"step": 276500
},
{
"epoch": 1.27,
"learning_rate": 2.8851923135673482e-05,
"loss": 2.5266,
"step": 277000
},
{
"epoch": 1.27,
"learning_rate": 2.8813735374087314e-05,
"loss": 2.5352,
"step": 277500
},
{
"epoch": 1.27,
"learning_rate": 2.8775547612501147e-05,
"loss": 2.5234,
"step": 278000
},
{
"epoch": 1.28,
"learning_rate": 2.8737436226438153e-05,
"loss": 2.5307,
"step": 278500
},
{
"epoch": 1.28,
"learning_rate": 2.8699248464851986e-05,
"loss": 2.5142,
"step": 279000
},
{
"epoch": 1.28,
"learning_rate": 2.8661060703265818e-05,
"loss": 2.5325,
"step": 279500
},
{
"epoch": 1.28,
"learning_rate": 2.862287294167965e-05,
"loss": 2.5186,
"step": 280000
},
{
"epoch": 1.29,
"learning_rate": 2.8584761555616657e-05,
"loss": 2.5356,
"step": 280500
},
{
"epoch": 1.29,
"learning_rate": 2.854657379403049e-05,
"loss": 2.5329,
"step": 281000
},
{
"epoch": 1.29,
"learning_rate": 2.8508386032444324e-05,
"loss": 2.5316,
"step": 281500
},
{
"epoch": 1.29,
"learning_rate": 2.8470274646381328e-05,
"loss": 2.5192,
"step": 282000
},
{
"epoch": 1.29,
"learning_rate": 2.843208688479516e-05,
"loss": 2.5469,
"step": 282500
},
{
"epoch": 1.3,
"learning_rate": 2.8393899123208996e-05,
"loss": 2.532,
"step": 283000
},
{
"epoch": 1.3,
"learning_rate": 2.8355711361622828e-05,
"loss": 2.525,
"step": 283500
},
{
"epoch": 1.3,
"learning_rate": 2.831752360003666e-05,
"loss": 2.5439,
"step": 284000
},
{
"epoch": 1.3,
"learning_rate": 2.8279412213973667e-05,
"loss": 2.5347,
"step": 284500
},
{
"epoch": 1.31,
"learning_rate": 2.82412244523875e-05,
"loss": 2.5168,
"step": 285000
},
{
"epoch": 1.31,
"learning_rate": 2.820303669080133e-05,
"loss": 2.5167,
"step": 285500
},
{
"epoch": 1.31,
"learning_rate": 2.8164848929215167e-05,
"loss": 2.5167,
"step": 286000
},
{
"epoch": 1.31,
"learning_rate": 2.8126661167629e-05,
"loss": 2.5207,
"step": 286500
},
{
"epoch": 1.32,
"learning_rate": 2.808847340604283e-05,
"loss": 2.5361,
"step": 287000
},
{
"epoch": 1.32,
"learning_rate": 2.8050285644456663e-05,
"loss": 2.529,
"step": 287500
},
{
"epoch": 1.32,
"learning_rate": 2.80120978828705e-05,
"loss": 2.5263,
"step": 288000
},
{
"epoch": 1.32,
"learning_rate": 2.7974062872330674e-05,
"loss": 2.528,
"step": 288500
},
{
"epoch": 1.32,
"learning_rate": 2.793587511074451e-05,
"loss": 2.5292,
"step": 289000
},
{
"epoch": 1.33,
"learning_rate": 2.789768734915834e-05,
"loss": 2.5142,
"step": 289500
},
{
"epoch": 1.33,
"learning_rate": 2.7859499587572173e-05,
"loss": 2.5312,
"step": 290000
},
{
"epoch": 1.33,
"learning_rate": 2.782131182598601e-05,
"loss": 2.5294,
"step": 290500
},
{
"epoch": 1.33,
"learning_rate": 2.778312406439984e-05,
"loss": 2.5228,
"step": 291000
},
{
"epoch": 1.34,
"learning_rate": 2.7744936302813673e-05,
"loss": 2.5368,
"step": 291500
},
{
"epoch": 1.34,
"learning_rate": 2.7706748541227505e-05,
"loss": 2.5152,
"step": 292000
},
{
"epoch": 1.34,
"learning_rate": 2.7668637155164512e-05,
"loss": 2.5217,
"step": 292500
},
{
"epoch": 1.34,
"learning_rate": 2.7630449393578344e-05,
"loss": 2.532,
"step": 293000
},
{
"epoch": 1.34,
"learning_rate": 2.7592261631992177e-05,
"loss": 2.5123,
"step": 293500
},
{
"epoch": 1.35,
"learning_rate": 2.7554073870406012e-05,
"loss": 2.5352,
"step": 294000
},
{
"epoch": 1.35,
"learning_rate": 2.7515962484343016e-05,
"loss": 2.5273,
"step": 294500
},
{
"epoch": 1.35,
"learning_rate": 2.7477774722756855e-05,
"loss": 2.531,
"step": 295000
},
{
"epoch": 1.35,
"learning_rate": 2.7439586961170687e-05,
"loss": 2.5236,
"step": 295500
},
{
"epoch": 1.36,
"learning_rate": 2.7401399199584522e-05,
"loss": 2.5073,
"step": 296000
},
{
"epoch": 1.36,
"learning_rate": 2.7363287813521526e-05,
"loss": 2.5285,
"step": 296500
},
{
"epoch": 1.36,
"learning_rate": 2.7325100051935358e-05,
"loss": 2.5331,
"step": 297000
},
{
"epoch": 1.36,
"learning_rate": 2.7286912290349193e-05,
"loss": 2.5148,
"step": 297500
},
{
"epoch": 1.37,
"learning_rate": 2.7248724528763026e-05,
"loss": 2.511,
"step": 298000
},
{
"epoch": 1.37,
"learning_rate": 2.7210536767176858e-05,
"loss": 2.5316,
"step": 298500
},
{
"epoch": 1.37,
"learning_rate": 2.7172425381113865e-05,
"loss": 2.5126,
"step": 299000
},
{
"epoch": 1.37,
"learning_rate": 2.7134237619527697e-05,
"loss": 2.5141,
"step": 299500
},
{
"epoch": 1.37,
"learning_rate": 2.709604985794153e-05,
"loss": 2.5339,
"step": 300000
},
{
"epoch": 1.38,
"learning_rate": 2.7057862096355365e-05,
"loss": 2.5261,
"step": 300500
},
{
"epoch": 1.38,
"learning_rate": 2.7019674334769197e-05,
"loss": 2.5023,
"step": 301000
},
{
"epoch": 1.38,
"learning_rate": 2.69815629487062e-05,
"loss": 2.5203,
"step": 301500
},
{
"epoch": 1.38,
"learning_rate": 2.6943375187120036e-05,
"loss": 2.5198,
"step": 302000
},
{
"epoch": 1.39,
"learning_rate": 2.6905187425533868e-05,
"loss": 2.5013,
"step": 302500
},
{
"epoch": 1.39,
"learning_rate": 2.68669996639477e-05,
"loss": 2.5304,
"step": 303000
},
{
"epoch": 1.39,
"learning_rate": 2.6828811902361532e-05,
"loss": 2.5089,
"step": 303500
},
{
"epoch": 1.39,
"learning_rate": 2.6790624140775368e-05,
"loss": 2.5109,
"step": 304000
},
{
"epoch": 1.4,
"learning_rate": 2.67524363791892e-05,
"loss": 2.512,
"step": 304500
},
{
"epoch": 1.4,
"learning_rate": 2.6714248617603032e-05,
"loss": 2.4975,
"step": 305000
},
{
"epoch": 1.4,
"learning_rate": 2.667613723154004e-05,
"loss": 2.497,
"step": 305500
},
{
"epoch": 1.4,
"learning_rate": 2.663794946995387e-05,
"loss": 2.504,
"step": 306000
},
{
"epoch": 1.4,
"learning_rate": 2.6599761708367703e-05,
"loss": 2.5093,
"step": 306500
},
{
"epoch": 1.41,
"learning_rate": 2.656157394678154e-05,
"loss": 2.5111,
"step": 307000
},
{
"epoch": 1.41,
"learning_rate": 2.652338618519537e-05,
"loss": 2.5143,
"step": 307500
},
{
"epoch": 1.41,
"learning_rate": 2.6485198423609203e-05,
"loss": 2.5183,
"step": 308000
},
{
"epoch": 1.41,
"learning_rate": 2.644708703754621e-05,
"loss": 2.5143,
"step": 308500
},
{
"epoch": 1.42,
"learning_rate": 2.6408899275960042e-05,
"loss": 2.4955,
"step": 309000
},
{
"epoch": 1.42,
"learning_rate": 2.6370711514373874e-05,
"loss": 2.5183,
"step": 309500
},
{
"epoch": 1.42,
"learning_rate": 2.633260012831088e-05,
"loss": 2.5143,
"step": 310000
},
{
"epoch": 1.42,
"learning_rate": 2.6294412366724713e-05,
"loss": 2.5165,
"step": 310500
},
{
"epoch": 1.43,
"learning_rate": 2.6256224605138545e-05,
"loss": 2.5055,
"step": 311000
},
{
"epoch": 1.43,
"learning_rate": 2.621803684355238e-05,
"loss": 2.5095,
"step": 311500
},
{
"epoch": 1.43,
"learning_rate": 2.6179849081966213e-05,
"loss": 2.5243,
"step": 312000
},
{
"epoch": 1.43,
"learning_rate": 2.6141661320380045e-05,
"loss": 2.5074,
"step": 312500
},
{
"epoch": 1.43,
"learning_rate": 2.6103549934317052e-05,
"loss": 2.5096,
"step": 313000
},
{
"epoch": 1.44,
"learning_rate": 2.6065362172730884e-05,
"loss": 2.508,
"step": 313500
},
{
"epoch": 1.44,
"learning_rate": 2.6027174411144716e-05,
"loss": 2.5096,
"step": 314000
},
{
"epoch": 1.44,
"learning_rate": 2.598898664955855e-05,
"loss": 2.4983,
"step": 314500
},
{
"epoch": 1.44,
"learning_rate": 2.5950798887972384e-05,
"loss": 2.5052,
"step": 315000
},
{
"epoch": 1.45,
"learning_rate": 2.5912611126386216e-05,
"loss": 2.5174,
"step": 315500
},
{
"epoch": 1.45,
"learning_rate": 2.587442336480005e-05,
"loss": 2.5116,
"step": 316000
},
{
"epoch": 1.45,
"learning_rate": 2.583623560321388e-05,
"loss": 2.5065,
"step": 316500
},
{
"epoch": 1.45,
"learning_rate": 2.5798047841627716e-05,
"loss": 2.52,
"step": 317000
},
{
"epoch": 1.45,
"learning_rate": 2.575993645556472e-05,
"loss": 2.5261,
"step": 317500
},
{
"epoch": 1.46,
"learning_rate": 2.5721748693978552e-05,
"loss": 2.5244,
"step": 318000
},
{
"epoch": 1.46,
"learning_rate": 2.5683560932392387e-05,
"loss": 2.5157,
"step": 318500
},
{
"epoch": 1.46,
"learning_rate": 2.564537317080622e-05,
"loss": 2.5092,
"step": 319000
},
{
"epoch": 1.46,
"learning_rate": 2.5607261784743226e-05,
"loss": 2.5104,
"step": 319500
},
{
"epoch": 1.47,
"learning_rate": 2.556907402315706e-05,
"loss": 2.5138,
"step": 320000
},
{
"epoch": 1.47,
"learning_rate": 2.553088626157089e-05,
"loss": 2.5047,
"step": 320500
},
{
"epoch": 1.47,
"learning_rate": 2.5492698499984723e-05,
"loss": 2.5195,
"step": 321000
},
{
"epoch": 1.47,
"learning_rate": 2.545458711392173e-05,
"loss": 2.5151,
"step": 321500
},
{
"epoch": 1.48,
"learning_rate": 2.5416399352335562e-05,
"loss": 2.5174,
"step": 322000
},
{
"epoch": 1.48,
"learning_rate": 2.5378211590749394e-05,
"loss": 2.5153,
"step": 322500
},
{
"epoch": 1.48,
"learning_rate": 2.534002382916323e-05,
"loss": 2.5133,
"step": 323000
},
{
"epoch": 1.48,
"learning_rate": 2.5301836067577062e-05,
"loss": 2.5207,
"step": 323500
},
{
"epoch": 1.48,
"learning_rate": 2.5263648305990894e-05,
"loss": 2.521,
"step": 324000
},
{
"epoch": 1.49,
"learning_rate": 2.5225460544404726e-05,
"loss": 2.5116,
"step": 324500
},
{
"epoch": 1.49,
"learning_rate": 2.5187272782818565e-05,
"loss": 2.5153,
"step": 325000
},
{
"epoch": 1.49,
"learning_rate": 2.51490850212324e-05,
"loss": 2.5185,
"step": 325500
},
{
"epoch": 1.49,
"learning_rate": 2.5110973635169404e-05,
"loss": 2.5144,
"step": 326000
},
{
"epoch": 1.5,
"learning_rate": 2.5072785873583236e-05,
"loss": 2.4997,
"step": 326500
},
{
"epoch": 1.5,
"learning_rate": 2.5034598111997072e-05,
"loss": 2.5065,
"step": 327000
},
{
"epoch": 1.5,
"learning_rate": 2.4996486725934072e-05,
"loss": 2.4958,
"step": 327500
},
{
"epoch": 1.5,
"learning_rate": 2.4958298964347907e-05,
"loss": 2.5255,
"step": 328000
},
{
"epoch": 1.51,
"learning_rate": 2.492011120276174e-05,
"loss": 2.5061,
"step": 328500
},
{
"epoch": 1.51,
"learning_rate": 2.488192344117557e-05,
"loss": 2.5157,
"step": 329000
},
{
"epoch": 1.51,
"learning_rate": 2.4843735679589404e-05,
"loss": 2.5118,
"step": 329500
},
{
"epoch": 1.51,
"learning_rate": 2.480554791800324e-05,
"loss": 2.5054,
"step": 330000
},
{
"epoch": 1.51,
"learning_rate": 2.476736015641707e-05,
"loss": 2.5111,
"step": 330500
},
{
"epoch": 1.52,
"learning_rate": 2.4729172394830904e-05,
"loss": 2.5024,
"step": 331000
},
{
"epoch": 1.52,
"learning_rate": 2.469098463324474e-05,
"loss": 2.5045,
"step": 331500
},
{
"epoch": 1.52,
"learning_rate": 2.4652796871658575e-05,
"loss": 2.4979,
"step": 332000
},
{
"epoch": 1.52,
"learning_rate": 2.4614685485595578e-05,
"loss": 2.5169,
"step": 332500
},
{
"epoch": 1.53,
"learning_rate": 2.457649772400941e-05,
"loss": 2.5107,
"step": 333000
},
{
"epoch": 1.53,
"learning_rate": 2.4538309962423246e-05,
"loss": 2.507,
"step": 333500
},
{
"epoch": 1.53,
"learning_rate": 2.450019857636025e-05,
"loss": 2.5057,
"step": 334000
},
{
"epoch": 1.53,
"learning_rate": 2.4462010814774085e-05,
"loss": 2.5156,
"step": 334500
},
{
"epoch": 1.54,
"learning_rate": 2.4423823053187917e-05,
"loss": 2.4845,
"step": 335000
},
{
"epoch": 1.54,
"learning_rate": 2.438563529160175e-05,
"loss": 2.5032,
"step": 335500
},
{
"epoch": 1.54,
"learning_rate": 2.434744753001558e-05,
"loss": 2.4944,
"step": 336000
},
{
"epoch": 1.54,
"learning_rate": 2.4309259768429417e-05,
"loss": 2.5036,
"step": 336500
},
{
"epoch": 1.54,
"learning_rate": 2.427107200684325e-05,
"loss": 2.5061,
"step": 337000
},
{
"epoch": 1.55,
"learning_rate": 2.423288424525708e-05,
"loss": 2.509,
"step": 337500
},
{
"epoch": 1.55,
"learning_rate": 2.4194772859194088e-05,
"loss": 2.4925,
"step": 338000
},
{
"epoch": 1.55,
"learning_rate": 2.415658509760792e-05,
"loss": 2.5023,
"step": 338500
},
{
"epoch": 1.55,
"learning_rate": 2.4118473711544924e-05,
"loss": 2.5172,
"step": 339000
},
{
"epoch": 1.56,
"learning_rate": 2.408028594995876e-05,
"loss": 2.5129,
"step": 339500
},
{
"epoch": 1.56,
"learning_rate": 2.404209818837259e-05,
"loss": 2.5011,
"step": 340000
},
{
"epoch": 1.56,
"learning_rate": 2.4003910426786424e-05,
"loss": 2.4977,
"step": 340500
},
{
"epoch": 1.56,
"learning_rate": 2.396572266520026e-05,
"loss": 2.4979,
"step": 341000
},
{
"epoch": 1.56,
"learning_rate": 2.392753490361409e-05,
"loss": 2.5248,
"step": 341500
},
{
"epoch": 1.57,
"learning_rate": 2.3889347142027924e-05,
"loss": 2.5024,
"step": 342000
},
{
"epoch": 1.57,
"learning_rate": 2.385123575596493e-05,
"loss": 2.5032,
"step": 342500
},
{
"epoch": 1.57,
"learning_rate": 2.3813047994378763e-05,
"loss": 2.5078,
"step": 343000
},
{
"epoch": 1.57,
"learning_rate": 2.3774860232792595e-05,
"loss": 2.514,
"step": 343500
},
{
"epoch": 1.58,
"learning_rate": 2.3736672471206427e-05,
"loss": 2.5069,
"step": 344000
},
{
"epoch": 1.58,
"learning_rate": 2.3698484709620262e-05,
"loss": 2.5132,
"step": 344500
},
{
"epoch": 1.58,
"learning_rate": 2.3660296948034095e-05,
"loss": 2.5165,
"step": 345000
},
{
"epoch": 1.58,
"learning_rate": 2.3622109186447927e-05,
"loss": 2.5133,
"step": 345500
},
{
"epoch": 1.59,
"learning_rate": 2.358392142486176e-05,
"loss": 2.5142,
"step": 346000
},
{
"epoch": 1.59,
"learning_rate": 2.3545810038798766e-05,
"loss": 2.5116,
"step": 346500
},
{
"epoch": 1.59,
"learning_rate": 2.3507698652735773e-05,
"loss": 2.506,
"step": 347000
},
{
"epoch": 1.59,
"learning_rate": 2.3469587266672776e-05,
"loss": 2.481,
"step": 347500
},
{
"epoch": 1.59,
"learning_rate": 2.3431399505086608e-05,
"loss": 2.496,
"step": 348000
},
{
"epoch": 1.6,
"learning_rate": 2.3393211743500444e-05,
"loss": 2.5162,
"step": 348500
},
{
"epoch": 1.6,
"learning_rate": 2.335502398191428e-05,
"loss": 2.4868,
"step": 349000
},
{
"epoch": 1.6,
"learning_rate": 2.331683622032811e-05,
"loss": 2.4927,
"step": 349500
},
{
"epoch": 1.6,
"learning_rate": 2.3278648458741944e-05,
"loss": 2.4977,
"step": 350000
},
{
"epoch": 1.61,
"learning_rate": 2.324046069715578e-05,
"loss": 2.5081,
"step": 350500
},
{
"epoch": 1.61,
"learning_rate": 2.320227293556961e-05,
"loss": 2.5005,
"step": 351000
},
{
"epoch": 1.61,
"learning_rate": 2.3164085173983444e-05,
"loss": 2.5109,
"step": 351500
},
{
"epoch": 1.61,
"learning_rate": 2.3125897412397276e-05,
"loss": 2.5022,
"step": 352000
},
{
"epoch": 1.62,
"learning_rate": 2.308770965081111e-05,
"loss": 2.4936,
"step": 352500
},
{
"epoch": 1.62,
"learning_rate": 2.3049521889224943e-05,
"loss": 2.4932,
"step": 353000
},
{
"epoch": 1.62,
"learning_rate": 2.3011410503161947e-05,
"loss": 2.4868,
"step": 353500
},
{
"epoch": 1.62,
"learning_rate": 2.2973222741575782e-05,
"loss": 2.5014,
"step": 354000
},
{
"epoch": 1.62,
"learning_rate": 2.2935034979989615e-05,
"loss": 2.5233,
"step": 354500
},
{
"epoch": 1.63,
"learning_rate": 2.2896847218403447e-05,
"loss": 2.4916,
"step": 355000
},
{
"epoch": 1.63,
"learning_rate": 2.285865945681728e-05,
"loss": 2.4821,
"step": 355500
},
{
"epoch": 1.63,
"learning_rate": 2.2820548070754286e-05,
"loss": 2.51,
"step": 356000
},
{
"epoch": 1.63,
"learning_rate": 2.2782360309168118e-05,
"loss": 2.5103,
"step": 356500
},
{
"epoch": 1.64,
"learning_rate": 2.274417254758195e-05,
"loss": 2.4884,
"step": 357000
},
{
"epoch": 1.64,
"learning_rate": 2.2705984785995786e-05,
"loss": 2.4913,
"step": 357500
},
{
"epoch": 1.64,
"learning_rate": 2.2667797024409618e-05,
"loss": 2.4929,
"step": 358000
},
{
"epoch": 1.64,
"learning_rate": 2.262960926282345e-05,
"loss": 2.5079,
"step": 358500
},
{
"epoch": 1.65,
"learning_rate": 2.2591421501237282e-05,
"loss": 2.5,
"step": 359000
},
{
"epoch": 1.65,
"learning_rate": 2.255331011517429e-05,
"loss": 2.4958,
"step": 359500
},
{
"epoch": 1.65,
"learning_rate": 2.251512235358812e-05,
"loss": 2.5076,
"step": 360000
},
{
"epoch": 1.65,
"learning_rate": 2.2476934592001957e-05,
"loss": 2.4991,
"step": 360500
},
{
"epoch": 1.65,
"learning_rate": 2.243874683041579e-05,
"loss": 2.4979,
"step": 361000
},
{
"epoch": 1.66,
"learning_rate": 2.240055906882962e-05,
"loss": 2.4877,
"step": 361500
},
{
"epoch": 1.66,
"learning_rate": 2.2362447682766628e-05,
"loss": 2.5059,
"step": 362000
},
{
"epoch": 1.66,
"learning_rate": 2.232425992118046e-05,
"loss": 2.4843,
"step": 362500
},
{
"epoch": 1.66,
"learning_rate": 2.2286072159594296e-05,
"loss": 2.4881,
"step": 363000
},
{
"epoch": 1.67,
"learning_rate": 2.2247884398008128e-05,
"loss": 2.5017,
"step": 363500
},
{
"epoch": 1.67,
"learning_rate": 2.2209773011945135e-05,
"loss": 2.4981,
"step": 364000
},
{
"epoch": 1.67,
"learning_rate": 2.2171585250358967e-05,
"loss": 2.489,
"step": 364500
},
{
"epoch": 1.67,
"learning_rate": 2.21333974887728e-05,
"loss": 2.4955,
"step": 365000
},
{
"epoch": 1.67,
"learning_rate": 2.2095209727186634e-05,
"loss": 2.4974,
"step": 365500
},
{
"epoch": 1.68,
"learning_rate": 2.2057021965600467e-05,
"loss": 2.4976,
"step": 366000
},
{
"epoch": 1.68,
"learning_rate": 2.20188342040143e-05,
"loss": 2.4934,
"step": 366500
},
{
"epoch": 1.68,
"learning_rate": 2.198064644242813e-05,
"loss": 2.5053,
"step": 367000
},
{
"epoch": 1.68,
"learning_rate": 2.1942535056365138e-05,
"loss": 2.4881,
"step": 367500
},
{
"epoch": 1.69,
"learning_rate": 2.190434729477897e-05,
"loss": 2.501,
"step": 368000
},
{
"epoch": 1.69,
"learning_rate": 2.1866159533192802e-05,
"loss": 2.4958,
"step": 368500
},
{
"epoch": 1.69,
"learning_rate": 2.182804814712981e-05,
"loss": 2.4916,
"step": 369000
},
{
"epoch": 1.69,
"learning_rate": 2.178986038554364e-05,
"loss": 2.4897,
"step": 369500
},
{
"epoch": 1.7,
"learning_rate": 2.1751672623957477e-05,
"loss": 2.4845,
"step": 370000
},
{
"epoch": 1.7,
"learning_rate": 2.171348486237131e-05,
"loss": 2.5041,
"step": 370500
},
{
"epoch": 1.7,
"learning_rate": 2.167529710078514e-05,
"loss": 2.4962,
"step": 371000
},
{
"epoch": 1.7,
"learning_rate": 2.1637109339198973e-05,
"loss": 2.4884,
"step": 371500
},
{
"epoch": 1.7,
"learning_rate": 2.159892157761281e-05,
"loss": 2.4941,
"step": 372000
},
{
"epoch": 1.71,
"learning_rate": 2.156073381602664e-05,
"loss": 2.5096,
"step": 372500
},
{
"epoch": 1.71,
"learning_rate": 2.1522546054440473e-05,
"loss": 2.4854,
"step": 373000
},
{
"epoch": 1.71,
"learning_rate": 2.148443466837748e-05,
"loss": 2.4825,
"step": 373500
},
{
"epoch": 1.71,
"learning_rate": 2.1446246906791312e-05,
"loss": 2.5111,
"step": 374000
},
{
"epoch": 1.72,
"learning_rate": 2.1408059145205144e-05,
"loss": 2.5055,
"step": 374500
},
{
"epoch": 1.72,
"learning_rate": 2.1369871383618976e-05,
"loss": 2.5146,
"step": 375000
},
{
"epoch": 1.72,
"learning_rate": 2.1331759997555983e-05,
"loss": 2.4959,
"step": 375500
},
{
"epoch": 1.72,
"learning_rate": 2.1293572235969815e-05,
"loss": 2.4997,
"step": 376000
},
{
"epoch": 1.73,
"learning_rate": 2.125538447438365e-05,
"loss": 2.4885,
"step": 376500
},
{
"epoch": 1.73,
"learning_rate": 2.1217196712797483e-05,
"loss": 2.526,
"step": 377000
},
{
"epoch": 1.73,
"learning_rate": 2.1179008951211315e-05,
"loss": 2.5,
"step": 377500
},
{
"epoch": 1.73,
"learning_rate": 2.1140897565148322e-05,
"loss": 2.4943,
"step": 378000
},
{
"epoch": 1.73,
"learning_rate": 2.1102709803562158e-05,
"loss": 2.4872,
"step": 378500
},
{
"epoch": 1.74,
"learning_rate": 2.106452204197599e-05,
"loss": 2.4927,
"step": 379000
},
{
"epoch": 1.74,
"learning_rate": 2.1026334280389822e-05,
"loss": 2.5075,
"step": 379500
},
{
"epoch": 1.74,
"learning_rate": 2.0988146518803657e-05,
"loss": 2.5004,
"step": 380000
},
{
"epoch": 1.74,
"learning_rate": 2.095003513274066e-05,
"loss": 2.5027,
"step": 380500
},
{
"epoch": 1.75,
"learning_rate": 2.0911923746677668e-05,
"loss": 2.4956,
"step": 381000
},
{
"epoch": 1.75,
"learning_rate": 2.08737359850915e-05,
"loss": 2.487,
"step": 381500
},
{
"epoch": 1.75,
"learning_rate": 2.0835548223505332e-05,
"loss": 2.4883,
"step": 382000
},
{
"epoch": 1.75,
"learning_rate": 2.0797360461919164e-05,
"loss": 2.4898,
"step": 382500
},
{
"epoch": 1.76,
"learning_rate": 2.0759172700333e-05,
"loss": 2.4909,
"step": 383000
},
{
"epoch": 1.76,
"learning_rate": 2.0720984938746832e-05,
"loss": 2.4797,
"step": 383500
},
{
"epoch": 1.76,
"learning_rate": 2.0682797177160664e-05,
"loss": 2.4808,
"step": 384000
},
{
"epoch": 1.76,
"learning_rate": 2.0644609415574496e-05,
"loss": 2.4991,
"step": 384500
},
{
"epoch": 1.76,
"learning_rate": 2.0606498029511503e-05,
"loss": 2.4897,
"step": 385000
},
{
"epoch": 1.77,
"learning_rate": 2.0568310267925335e-05,
"loss": 2.496,
"step": 385500
},
{
"epoch": 1.77,
"learning_rate": 2.0530198881862342e-05,
"loss": 2.4874,
"step": 386000
},
{
"epoch": 1.77,
"learning_rate": 2.0492011120276174e-05,
"loss": 2.4978,
"step": 386500
},
{
"epoch": 1.77,
"learning_rate": 2.0453823358690007e-05,
"loss": 2.4873,
"step": 387000
},
{
"epoch": 1.78,
"learning_rate": 2.0415635597103842e-05,
"loss": 2.501,
"step": 387500
},
{
"epoch": 1.78,
"learning_rate": 2.0377447835517674e-05,
"loss": 2.4895,
"step": 388000
},
{
"epoch": 1.78,
"learning_rate": 2.0339260073931506e-05,
"loss": 2.4798,
"step": 388500
},
{
"epoch": 1.78,
"learning_rate": 2.030107231234534e-05,
"loss": 2.4804,
"step": 389000
},
{
"epoch": 1.78,
"learning_rate": 2.0262884550759174e-05,
"loss": 2.4919,
"step": 389500
},
{
"epoch": 1.79,
"learning_rate": 2.0224696789173006e-05,
"loss": 2.4777,
"step": 390000
},
{
"epoch": 1.79,
"learning_rate": 2.0186585403110013e-05,
"loss": 2.4773,
"step": 390500
},
{
"epoch": 1.79,
"learning_rate": 2.0148474017047017e-05,
"loss": 2.4739,
"step": 391000
},
{
"epoch": 1.79,
"learning_rate": 2.011028625546085e-05,
"loss": 2.5024,
"step": 391500
},
{
"epoch": 1.8,
"learning_rate": 2.0072098493874684e-05,
"loss": 2.4834,
"step": 392000
},
{
"epoch": 1.8,
"learning_rate": 2.0033910732288516e-05,
"loss": 2.4924,
"step": 392500
},
{
"epoch": 1.8,
"learning_rate": 1.999572297070235e-05,
"loss": 2.4797,
"step": 393000
},
{
"epoch": 1.8,
"learning_rate": 1.995753520911618e-05,
"loss": 2.484,
"step": 393500
},
{
"epoch": 1.81,
"learning_rate": 1.9919347447530016e-05,
"loss": 2.4977,
"step": 394000
},
{
"epoch": 1.81,
"learning_rate": 1.9881159685943852e-05,
"loss": 2.4783,
"step": 394500
},
{
"epoch": 1.81,
"learning_rate": 1.9843048299880855e-05,
"loss": 2.4962,
"step": 395000
},
{
"epoch": 1.81,
"learning_rate": 1.9804936913817862e-05,
"loss": 2.4938,
"step": 395500
},
{
"epoch": 1.81,
"learning_rate": 1.9766749152231694e-05,
"loss": 2.4844,
"step": 396000
},
{
"epoch": 1.82,
"learning_rate": 1.9728561390645527e-05,
"loss": 2.4872,
"step": 396500
},
{
"epoch": 1.82,
"learning_rate": 1.9690373629059362e-05,
"loss": 2.4955,
"step": 397000
},
{
"epoch": 1.82,
"learning_rate": 1.9652185867473194e-05,
"loss": 2.4852,
"step": 397500
},
{
"epoch": 1.82,
"learning_rate": 1.9613998105887026e-05,
"loss": 2.4799,
"step": 398000
},
{
"epoch": 1.83,
"learning_rate": 1.957581034430086e-05,
"loss": 2.4944,
"step": 398500
},
{
"epoch": 1.83,
"learning_rate": 1.9537698958237865e-05,
"loss": 2.4988,
"step": 399000
},
{
"epoch": 1.83,
"learning_rate": 1.9499511196651698e-05,
"loss": 2.4875,
"step": 399500
},
{
"epoch": 1.83,
"learning_rate": 1.9461323435065533e-05,
"loss": 2.4724,
"step": 400000
},
{
"epoch": 1.84,
"learning_rate": 1.9423212049002537e-05,
"loss": 2.4987,
"step": 400500
},
{
"epoch": 1.84,
"learning_rate": 1.938502428741637e-05,
"loss": 2.4957,
"step": 401000
},
{
"epoch": 1.84,
"learning_rate": 1.9346836525830204e-05,
"loss": 2.4821,
"step": 401500
},
{
"epoch": 1.84,
"learning_rate": 1.9308648764244036e-05,
"loss": 2.4774,
"step": 402000
},
{
"epoch": 1.84,
"learning_rate": 1.927046100265787e-05,
"loss": 2.4921,
"step": 402500
},
{
"epoch": 1.85,
"learning_rate": 1.92322732410717e-05,
"loss": 2.4885,
"step": 403000
},
{
"epoch": 1.85,
"learning_rate": 1.9194085479485536e-05,
"loss": 2.4955,
"step": 403500
},
{
"epoch": 1.85,
"learning_rate": 1.915589771789937e-05,
"loss": 2.4887,
"step": 404000
},
{
"epoch": 1.85,
"learning_rate": 1.91177099563132e-05,
"loss": 2.4868,
"step": 404500
},
{
"epoch": 1.86,
"learning_rate": 1.9079598570250208e-05,
"loss": 2.4786,
"step": 405000
},
{
"epoch": 1.86,
"learning_rate": 1.904141080866404e-05,
"loss": 2.4996,
"step": 405500
},
{
"epoch": 1.86,
"learning_rate": 1.9003223047077872e-05,
"loss": 2.4757,
"step": 406000
},
{
"epoch": 1.86,
"learning_rate": 1.8965035285491707e-05,
"loss": 2.4956,
"step": 406500
},
{
"epoch": 1.87,
"learning_rate": 1.892684752390554e-05,
"loss": 2.4914,
"step": 407000
},
{
"epoch": 1.87,
"learning_rate": 1.8888736137842543e-05,
"loss": 2.4867,
"step": 407500
},
{
"epoch": 1.87,
"learning_rate": 1.885054837625638e-05,
"loss": 2.4797,
"step": 408000
},
{
"epoch": 1.87,
"learning_rate": 1.881236061467021e-05,
"loss": 2.4955,
"step": 408500
},
{
"epoch": 1.87,
"learning_rate": 1.8774172853084043e-05,
"loss": 2.4922,
"step": 409000
},
{
"epoch": 1.88,
"learning_rate": 1.8735985091497875e-05,
"loss": 2.4875,
"step": 409500
},
{
"epoch": 1.88,
"learning_rate": 1.869779732991171e-05,
"loss": 2.4836,
"step": 410000
},
{
"epoch": 1.88,
"learning_rate": 1.8659609568325546e-05,
"loss": 2.4803,
"step": 410500
},
{
"epoch": 1.88,
"learning_rate": 1.8621421806739378e-05,
"loss": 2.4964,
"step": 411000
},
{
"epoch": 1.89,
"learning_rate": 1.8583310420676385e-05,
"loss": 2.4875,
"step": 411500
},
{
"epoch": 1.89,
"learning_rate": 1.8545122659090217e-05,
"loss": 2.4915,
"step": 412000
},
{
"epoch": 1.89,
"learning_rate": 1.850693489750405e-05,
"loss": 2.4995,
"step": 412500
},
{
"epoch": 1.89,
"learning_rate": 1.846874713591788e-05,
"loss": 2.4809,
"step": 413000
},
{
"epoch": 1.89,
"learning_rate": 1.8430559374331717e-05,
"loss": 2.4906,
"step": 413500
},
{
"epoch": 1.9,
"learning_rate": 1.839244798826872e-05,
"loss": 2.4659,
"step": 414000
},
{
"epoch": 1.9,
"learning_rate": 1.8354336602205728e-05,
"loss": 2.4745,
"step": 414500
},
{
"epoch": 1.9,
"learning_rate": 1.831614884061956e-05,
"loss": 2.4956,
"step": 415000
},
{
"epoch": 1.9,
"learning_rate": 1.8277961079033392e-05,
"loss": 2.4996,
"step": 415500
},
{
"epoch": 1.91,
"learning_rate": 1.8239773317447227e-05,
"loss": 2.4669,
"step": 416000
},
{
"epoch": 1.91,
"learning_rate": 1.820158555586106e-05,
"loss": 2.4857,
"step": 416500
},
{
"epoch": 1.91,
"learning_rate": 1.8163397794274892e-05,
"loss": 2.4912,
"step": 417000
},
{
"epoch": 1.91,
"learning_rate": 1.8125210032688724e-05,
"loss": 2.4874,
"step": 417500
},
{
"epoch": 1.92,
"learning_rate": 1.808702227110256e-05,
"loss": 2.4717,
"step": 418000
},
{
"epoch": 1.92,
"learning_rate": 1.8048910885039563e-05,
"loss": 2.5005,
"step": 418500
},
{
"epoch": 1.92,
"learning_rate": 1.801079949897657e-05,
"loss": 2.485,
"step": 419000
},
{
"epoch": 1.92,
"learning_rate": 1.7972611737390402e-05,
"loss": 2.4802,
"step": 419500
},
{
"epoch": 1.92,
"learning_rate": 1.7934423975804234e-05,
"loss": 2.5059,
"step": 420000
},
{
"epoch": 1.93,
"learning_rate": 1.7896236214218066e-05,
"loss": 2.481,
"step": 420500
},
{
"epoch": 1.93,
"learning_rate": 1.7858048452631902e-05,
"loss": 2.4955,
"step": 421000
},
{
"epoch": 1.93,
"learning_rate": 1.7819860691045734e-05,
"loss": 2.4833,
"step": 421500
},
{
"epoch": 1.93,
"learning_rate": 1.7781672929459566e-05,
"loss": 2.4695,
"step": 422000
},
{
"epoch": 1.94,
"learning_rate": 1.7743485167873398e-05,
"loss": 2.4649,
"step": 422500
},
{
"epoch": 1.94,
"learning_rate": 1.7705373781810405e-05,
"loss": 2.4864,
"step": 423000
},
{
"epoch": 1.94,
"learning_rate": 1.7667186020224237e-05,
"loss": 2.4922,
"step": 423500
},
{
"epoch": 1.94,
"learning_rate": 1.7629074634161244e-05,
"loss": 2.4674,
"step": 424000
},
{
"epoch": 1.95,
"learning_rate": 1.7590886872575076e-05,
"loss": 2.4778,
"step": 424500
},
{
"epoch": 1.95,
"learning_rate": 1.755269911098891e-05,
"loss": 2.4762,
"step": 425000
},
{
"epoch": 1.95,
"learning_rate": 1.7514511349402744e-05,
"loss": 2.4852,
"step": 425500
},
{
"epoch": 1.95,
"learning_rate": 1.747632358781658e-05,
"loss": 2.4944,
"step": 426000
},
{
"epoch": 1.95,
"learning_rate": 1.7438135826230412e-05,
"loss": 2.4754,
"step": 426500
},
{
"epoch": 1.96,
"learning_rate": 1.740002444016742e-05,
"loss": 2.4842,
"step": 427000
},
{
"epoch": 1.96,
"learning_rate": 1.736183667858125e-05,
"loss": 2.4783,
"step": 427500
},
{
"epoch": 1.96,
"learning_rate": 1.7323648916995083e-05,
"loss": 2.4826,
"step": 428000
},
{
"epoch": 1.96,
"learning_rate": 1.728553753093209e-05,
"loss": 2.4984,
"step": 428500
},
{
"epoch": 1.97,
"learning_rate": 1.7247349769345922e-05,
"loss": 2.4978,
"step": 429000
},
{
"epoch": 1.97,
"learning_rate": 1.7209162007759754e-05,
"loss": 2.4852,
"step": 429500
},
{
"epoch": 1.97,
"learning_rate": 1.7170974246173586e-05,
"loss": 2.4654,
"step": 430000
},
{
"epoch": 1.97,
"learning_rate": 1.7132786484587422e-05,
"loss": 2.4961,
"step": 430500
},
{
"epoch": 1.98,
"learning_rate": 1.7094675098524425e-05,
"loss": 2.4828,
"step": 431000
},
{
"epoch": 1.98,
"learning_rate": 1.705648733693826e-05,
"loss": 2.4824,
"step": 431500
},
{
"epoch": 1.98,
"learning_rate": 1.7018299575352093e-05,
"loss": 2.4922,
"step": 432000
},
{
"epoch": 1.98,
"learning_rate": 1.6980111813765925e-05,
"loss": 2.4749,
"step": 432500
},
{
"epoch": 1.98,
"learning_rate": 1.6941924052179757e-05,
"loss": 2.4765,
"step": 433000
},
{
"epoch": 1.99,
"learning_rate": 1.6903736290593593e-05,
"loss": 2.4909,
"step": 433500
},
{
"epoch": 1.99,
"learning_rate": 1.6865548529007425e-05,
"loss": 2.4865,
"step": 434000
},
{
"epoch": 1.99,
"learning_rate": 1.6827360767421257e-05,
"loss": 2.4848,
"step": 434500
},
{
"epoch": 1.99,
"learning_rate": 1.678917300583509e-05,
"loss": 2.4798,
"step": 435000
},
{
"epoch": 2.0,
"learning_rate": 1.6750985244248925e-05,
"loss": 2.4661,
"step": 435500
},
{
"epoch": 2.0,
"learning_rate": 1.6712797482662757e-05,
"loss": 2.456,
"step": 436000
},
{
"epoch": 2.0,
"learning_rate": 1.667468609659976e-05,
"loss": 2.4836,
"step": 436500
},
{
"epoch": 2.0,
"learning_rate": 1.6636498335013596e-05,
"loss": 2.4701,
"step": 437000
},
{
"epoch": 2.0,
"learning_rate": 1.6598310573427428e-05,
"loss": 2.4511,
"step": 437500
},
{
"epoch": 2.01,
"learning_rate": 1.656012281184126e-05,
"loss": 2.4736,
"step": 438000
},
{
"epoch": 2.01,
"learning_rate": 1.6521935050255092e-05,
"loss": 2.442,
"step": 438500
},
{
"epoch": 2.01,
"learning_rate": 1.64838236641921e-05,
"loss": 2.4498,
"step": 439000
},
{
"epoch": 2.01,
"learning_rate": 1.644563590260593e-05,
"loss": 2.4634,
"step": 439500
},
{
"epoch": 2.02,
"learning_rate": 1.6407448141019767e-05,
"loss": 2.4437,
"step": 440000
},
{
"epoch": 2.02,
"learning_rate": 1.63692603794336e-05,
"loss": 2.4635,
"step": 440500
},
{
"epoch": 2.02,
"learning_rate": 1.6331072617847435e-05,
"loss": 2.4361,
"step": 441000
},
{
"epoch": 2.02,
"learning_rate": 1.6292884856261267e-05,
"loss": 2.4482,
"step": 441500
},
{
"epoch": 2.03,
"learning_rate": 1.62546970946751e-05,
"loss": 2.4501,
"step": 442000
},
{
"epoch": 2.03,
"learning_rate": 1.6216509333088935e-05,
"loss": 2.448,
"step": 442500
},
{
"epoch": 2.03,
"learning_rate": 1.6178321571502767e-05,
"loss": 2.4459,
"step": 443000
},
{
"epoch": 2.03,
"learning_rate": 1.61401338099166e-05,
"loss": 2.4403,
"step": 443500
},
{
"epoch": 2.03,
"learning_rate": 1.610194604833043e-05,
"loss": 2.4537,
"step": 444000
},
{
"epoch": 2.04,
"learning_rate": 1.6063758286744267e-05,
"loss": 2.4677,
"step": 444500
},
{
"epoch": 2.04,
"learning_rate": 1.602564690068127e-05,
"loss": 2.4495,
"step": 445000
},
{
"epoch": 2.04,
"learning_rate": 1.5987459139095106e-05,
"loss": 2.4662,
"step": 445500
},
{
"epoch": 2.04,
"learning_rate": 1.594934775303211e-05,
"loss": 2.4608,
"step": 446000
},
{
"epoch": 2.05,
"learning_rate": 1.591115999144594e-05,
"loss": 2.462,
"step": 446500
},
{
"epoch": 2.05,
"learning_rate": 1.5873048605382948e-05,
"loss": 2.4542,
"step": 447000
},
{
"epoch": 2.05,
"learning_rate": 1.583486084379678e-05,
"loss": 2.4337,
"step": 447500
},
{
"epoch": 2.05,
"learning_rate": 1.5796673082210612e-05,
"loss": 2.4451,
"step": 448000
},
{
"epoch": 2.06,
"learning_rate": 1.5758485320624448e-05,
"loss": 2.4314,
"step": 448500
},
{
"epoch": 2.06,
"learning_rate": 1.572029755903828e-05,
"loss": 2.4542,
"step": 449000
},
{
"epoch": 2.06,
"learning_rate": 1.5682109797452112e-05,
"loss": 2.4507,
"step": 449500
},
{
"epoch": 2.06,
"learning_rate": 1.5643922035865944e-05,
"loss": 2.439,
"step": 450000
},
{
"epoch": 2.06,
"learning_rate": 1.560573427427978e-05,
"loss": 2.449,
"step": 450500
},
{
"epoch": 2.07,
"learning_rate": 1.5567546512693612e-05,
"loss": 2.4579,
"step": 451000
},
{
"epoch": 2.07,
"learning_rate": 1.552943512663062e-05,
"loss": 2.4466,
"step": 451500
},
{
"epoch": 2.07,
"learning_rate": 1.549124736504445e-05,
"loss": 2.4515,
"step": 452000
},
{
"epoch": 2.07,
"learning_rate": 1.5453135978981455e-05,
"loss": 2.4527,
"step": 452500
},
{
"epoch": 2.08,
"learning_rate": 1.541494821739529e-05,
"loss": 2.4501,
"step": 453000
},
{
"epoch": 2.08,
"learning_rate": 1.5376760455809122e-05,
"loss": 2.4574,
"step": 453500
},
{
"epoch": 2.08,
"learning_rate": 1.5338572694222955e-05,
"loss": 2.4663,
"step": 454000
},
{
"epoch": 2.08,
"learning_rate": 1.530046130815996e-05,
"loss": 2.4687,
"step": 454500
},
{
"epoch": 2.09,
"learning_rate": 1.5262273546573794e-05,
"loss": 2.4509,
"step": 455000
},
{
"epoch": 2.09,
"learning_rate": 1.5224085784987627e-05,
"loss": 2.4578,
"step": 455500
},
{
"epoch": 2.09,
"learning_rate": 1.518589802340146e-05,
"loss": 2.4494,
"step": 456000
},
{
"epoch": 2.09,
"learning_rate": 1.5147710261815295e-05,
"loss": 2.4576,
"step": 456500
},
{
"epoch": 2.09,
"learning_rate": 1.5109598875752299e-05,
"loss": 2.4476,
"step": 457000
},
{
"epoch": 2.1,
"learning_rate": 1.5071411114166134e-05,
"loss": 2.4516,
"step": 457500
},
{
"epoch": 2.1,
"learning_rate": 1.5033223352579966e-05,
"loss": 2.4582,
"step": 458000
},
{
"epoch": 2.1,
"learning_rate": 1.49950355909938e-05,
"loss": 2.4447,
"step": 458500
},
{
"epoch": 2.1,
"learning_rate": 1.4956847829407632e-05,
"loss": 2.4487,
"step": 459000
},
{
"epoch": 2.11,
"learning_rate": 1.491873644334464e-05,
"loss": 2.4449,
"step": 459500
},
{
"epoch": 2.11,
"learning_rate": 1.4880548681758471e-05,
"loss": 2.4465,
"step": 460000
},
{
"epoch": 2.11,
"learning_rate": 1.4842360920172305e-05,
"loss": 2.4439,
"step": 460500
},
{
"epoch": 2.11,
"learning_rate": 1.4804173158586137e-05,
"loss": 2.4527,
"step": 461000
},
{
"epoch": 2.11,
"learning_rate": 1.4766061772523143e-05,
"loss": 2.4518,
"step": 461500
},
{
"epoch": 2.12,
"learning_rate": 1.4727874010936976e-05,
"loss": 2.4407,
"step": 462000
},
{
"epoch": 2.12,
"learning_rate": 1.4689686249350809e-05,
"loss": 2.4485,
"step": 462500
},
{
"epoch": 2.12,
"learning_rate": 1.4651498487764642e-05,
"loss": 2.4401,
"step": 463000
},
{
"epoch": 2.12,
"learning_rate": 1.4613310726178475e-05,
"loss": 2.4549,
"step": 463500
},
{
"epoch": 2.13,
"learning_rate": 1.4575122964592308e-05,
"loss": 2.4468,
"step": 464000
},
{
"epoch": 2.13,
"learning_rate": 1.453693520300614e-05,
"loss": 2.4626,
"step": 464500
},
{
"epoch": 2.13,
"learning_rate": 1.4498747441419974e-05,
"loss": 2.4491,
"step": 465000
},
{
"epoch": 2.13,
"learning_rate": 1.446063605535698e-05,
"loss": 2.4539,
"step": 465500
},
{
"epoch": 2.14,
"learning_rate": 1.4422524669293985e-05,
"loss": 2.4641,
"step": 466000
},
{
"epoch": 2.14,
"learning_rate": 1.4384336907707819e-05,
"loss": 2.4342,
"step": 466500
},
{
"epoch": 2.14,
"learning_rate": 1.434614914612165e-05,
"loss": 2.4475,
"step": 467000
},
{
"epoch": 2.14,
"learning_rate": 1.4307961384535485e-05,
"loss": 2.4415,
"step": 467500
},
{
"epoch": 2.14,
"learning_rate": 1.4269773622949317e-05,
"loss": 2.4543,
"step": 468000
},
{
"epoch": 2.15,
"learning_rate": 1.423158586136315e-05,
"loss": 2.4339,
"step": 468500
},
{
"epoch": 2.15,
"learning_rate": 1.4193398099776983e-05,
"loss": 2.4473,
"step": 469000
},
{
"epoch": 2.15,
"learning_rate": 1.4155210338190817e-05,
"loss": 2.4478,
"step": 469500
},
{
"epoch": 2.15,
"learning_rate": 1.4117022576604649e-05,
"loss": 2.4604,
"step": 470000
},
{
"epoch": 2.16,
"learning_rate": 1.4078834815018483e-05,
"loss": 2.4417,
"step": 470500
},
{
"epoch": 2.16,
"learning_rate": 1.4040647053432315e-05,
"loss": 2.4471,
"step": 471000
},
{
"epoch": 2.16,
"learning_rate": 1.400245929184615e-05,
"loss": 2.4461,
"step": 471500
},
{
"epoch": 2.16,
"learning_rate": 1.3964271530259984e-05,
"loss": 2.4557,
"step": 472000
},
{
"epoch": 2.17,
"learning_rate": 1.392616014419699e-05,
"loss": 2.4534,
"step": 472500
},
{
"epoch": 2.17,
"learning_rate": 1.3888048758133995e-05,
"loss": 2.4506,
"step": 473000
},
{
"epoch": 2.17,
"learning_rate": 1.3849860996547828e-05,
"loss": 2.4578,
"step": 473500
},
{
"epoch": 2.17,
"learning_rate": 1.381167323496166e-05,
"loss": 2.4611,
"step": 474000
},
{
"epoch": 2.17,
"learning_rate": 1.3773485473375494e-05,
"loss": 2.426,
"step": 474500
},
{
"epoch": 2.18,
"learning_rate": 1.3735297711789327e-05,
"loss": 2.4545,
"step": 475000
},
{
"epoch": 2.18,
"learning_rate": 1.369710995020316e-05,
"loss": 2.4478,
"step": 475500
},
{
"epoch": 2.18,
"learning_rate": 1.3658922188616993e-05,
"loss": 2.4582,
"step": 476000
},
{
"epoch": 2.18,
"learning_rate": 1.3620810802554e-05,
"loss": 2.4719,
"step": 476500
},
{
"epoch": 2.19,
"learning_rate": 1.3582623040967832e-05,
"loss": 2.4513,
"step": 477000
},
{
"epoch": 2.19,
"learning_rate": 1.3544435279381665e-05,
"loss": 2.4509,
"step": 477500
},
{
"epoch": 2.19,
"learning_rate": 1.3506247517795498e-05,
"loss": 2.4464,
"step": 478000
},
{
"epoch": 2.19,
"learning_rate": 1.3468059756209331e-05,
"loss": 2.4385,
"step": 478500
},
{
"epoch": 2.2,
"learning_rate": 1.3429871994623164e-05,
"loss": 2.4486,
"step": 479000
},
{
"epoch": 2.2,
"learning_rate": 1.3391684233036997e-05,
"loss": 2.4516,
"step": 479500
},
{
"epoch": 2.2,
"learning_rate": 1.335349647145083e-05,
"loss": 2.4542,
"step": 480000
},
{
"epoch": 2.2,
"learning_rate": 1.3315385085387835e-05,
"loss": 2.4439,
"step": 480500
},
{
"epoch": 2.2,
"learning_rate": 1.3277197323801669e-05,
"loss": 2.4697,
"step": 481000
},
{
"epoch": 2.21,
"learning_rate": 1.32390095622155e-05,
"loss": 2.4428,
"step": 481500
},
{
"epoch": 2.21,
"learning_rate": 1.3200821800629335e-05,
"loss": 2.4372,
"step": 482000
},
{
"epoch": 2.21,
"learning_rate": 1.316271041456634e-05,
"loss": 2.4531,
"step": 482500
},
{
"epoch": 2.21,
"learning_rate": 1.3124522652980174e-05,
"loss": 2.4374,
"step": 483000
},
{
"epoch": 2.22,
"learning_rate": 1.3086334891394006e-05,
"loss": 2.4511,
"step": 483500
},
{
"epoch": 2.22,
"learning_rate": 1.304814712980784e-05,
"loss": 2.4434,
"step": 484000
},
{
"epoch": 2.22,
"learning_rate": 1.3009959368221672e-05,
"loss": 2.4376,
"step": 484500
},
{
"epoch": 2.22,
"learning_rate": 1.2971771606635506e-05,
"loss": 2.4383,
"step": 485000
},
{
"epoch": 2.22,
"learning_rate": 1.2933660220572511e-05,
"loss": 2.4472,
"step": 485500
},
{
"epoch": 2.23,
"learning_rate": 1.2895472458986343e-05,
"loss": 2.4618,
"step": 486000
},
{
"epoch": 2.23,
"learning_rate": 1.2857284697400177e-05,
"loss": 2.4335,
"step": 486500
},
{
"epoch": 2.23,
"learning_rate": 1.2819096935814012e-05,
"loss": 2.4536,
"step": 487000
},
{
"epoch": 2.23,
"learning_rate": 1.2781061925274187e-05,
"loss": 2.4489,
"step": 487500
},
{
"epoch": 2.24,
"learning_rate": 1.2742874163688021e-05,
"loss": 2.4469,
"step": 488000
},
{
"epoch": 2.24,
"learning_rate": 1.2704686402101857e-05,
"loss": 2.4408,
"step": 488500
},
{
"epoch": 2.24,
"learning_rate": 1.2666498640515689e-05,
"loss": 2.4476,
"step": 489000
},
{
"epoch": 2.24,
"learning_rate": 1.2628310878929523e-05,
"loss": 2.4503,
"step": 489500
},
{
"epoch": 2.25,
"learning_rate": 1.2590123117343355e-05,
"loss": 2.4336,
"step": 490000
},
{
"epoch": 2.25,
"learning_rate": 1.2551935355757189e-05,
"loss": 2.4564,
"step": 490500
},
{
"epoch": 2.25,
"learning_rate": 1.2513823969694194e-05,
"loss": 2.431,
"step": 491000
},
{
"epoch": 2.25,
"learning_rate": 1.2475636208108026e-05,
"loss": 2.4405,
"step": 491500
},
{
"epoch": 2.25,
"learning_rate": 1.243744844652186e-05,
"loss": 2.443,
"step": 492000
},
{
"epoch": 2.26,
"learning_rate": 1.2399260684935694e-05,
"loss": 2.4474,
"step": 492500
},
{
"epoch": 2.26,
"learning_rate": 1.2361072923349526e-05,
"loss": 2.443,
"step": 493000
},
{
"epoch": 2.26,
"learning_rate": 1.232288516176336e-05,
"loss": 2.4471,
"step": 493500
},
{
"epoch": 2.26,
"learning_rate": 1.2284697400177192e-05,
"loss": 2.4584,
"step": 494000
},
{
"epoch": 2.27,
"learning_rate": 1.2246509638591026e-05,
"loss": 2.4465,
"step": 494500
},
{
"epoch": 2.27,
"learning_rate": 1.2208321877004858e-05,
"loss": 2.4397,
"step": 495000
},
{
"epoch": 2.27,
"learning_rate": 1.2170134115418692e-05,
"loss": 2.4618,
"step": 495500
},
{
"epoch": 2.27,
"learning_rate": 1.2131946353832524e-05,
"loss": 2.4752,
"step": 496000
},
{
"epoch": 2.28,
"learning_rate": 1.2093758592246358e-05,
"loss": 2.4567,
"step": 496500
},
{
"epoch": 2.28,
"learning_rate": 1.205557083066019e-05,
"loss": 2.4568,
"step": 497000
},
{
"epoch": 2.28,
"learning_rate": 1.2017612195643541e-05,
"loss": 2.4374,
"step": 497500
},
{
"epoch": 2.28,
"learning_rate": 1.1979424434057373e-05,
"loss": 2.443,
"step": 498000
},
{
"epoch": 2.28,
"learning_rate": 1.1941236672471207e-05,
"loss": 2.4478,
"step": 498500
},
{
"epoch": 2.29,
"learning_rate": 1.190304891088504e-05,
"loss": 2.4496,
"step": 499000
},
{
"epoch": 2.29,
"learning_rate": 1.1864861149298873e-05,
"loss": 2.4571,
"step": 499500
},
{
"epoch": 2.29,
"learning_rate": 1.1826673387712707e-05,
"loss": 2.4355,
"step": 500000
},
{
"epoch": 2.29,
"learning_rate": 1.178848562612654e-05,
"loss": 2.4499,
"step": 500500
},
{
"epoch": 2.3,
"learning_rate": 1.1750297864540373e-05,
"loss": 2.4439,
"step": 501000
},
{
"epoch": 2.3,
"learning_rate": 1.1712110102954207e-05,
"loss": 2.4372,
"step": 501500
},
{
"epoch": 2.3,
"learning_rate": 1.1673998716891212e-05,
"loss": 2.4406,
"step": 502000
},
{
"epoch": 2.3,
"learning_rate": 1.1635810955305044e-05,
"loss": 2.4465,
"step": 502500
},
{
"epoch": 2.31,
"learning_rate": 1.1597623193718878e-05,
"loss": 2.4579,
"step": 503000
},
{
"epoch": 2.31,
"learning_rate": 1.155943543213271e-05,
"loss": 2.456,
"step": 503500
},
{
"epoch": 2.31,
"learning_rate": 1.1521324046069715e-05,
"loss": 2.4521,
"step": 504000
},
{
"epoch": 2.31,
"learning_rate": 1.148313628448355e-05,
"loss": 2.4564,
"step": 504500
},
{
"epoch": 2.31,
"learning_rate": 1.1445024898420554e-05,
"loss": 2.4622,
"step": 505000
},
{
"epoch": 2.32,
"learning_rate": 1.1406837136834388e-05,
"loss": 2.4393,
"step": 505500
},
{
"epoch": 2.32,
"learning_rate": 1.136864937524822e-05,
"loss": 2.452,
"step": 506000
},
{
"epoch": 2.32,
"learning_rate": 1.1330461613662054e-05,
"loss": 2.4385,
"step": 506500
},
{
"epoch": 2.32,
"learning_rate": 1.1292273852075886e-05,
"loss": 2.4319,
"step": 507000
},
{
"epoch": 2.33,
"learning_rate": 1.125408609048972e-05,
"loss": 2.4473,
"step": 507500
},
{
"epoch": 2.33,
"learning_rate": 1.1215898328903554e-05,
"loss": 2.4442,
"step": 508000
},
{
"epoch": 2.33,
"learning_rate": 1.1177710567317386e-05,
"loss": 2.4359,
"step": 508500
},
{
"epoch": 2.33,
"learning_rate": 1.113952280573122e-05,
"loss": 2.4457,
"step": 509000
},
{
"epoch": 2.33,
"learning_rate": 1.1101335044145052e-05,
"loss": 2.4606,
"step": 509500
},
{
"epoch": 2.34,
"learning_rate": 1.1063223658082059e-05,
"loss": 2.4343,
"step": 510000
},
{
"epoch": 2.34,
"learning_rate": 1.1025035896495891e-05,
"loss": 2.4368,
"step": 510500
},
{
"epoch": 2.34,
"learning_rate": 1.0986848134909725e-05,
"loss": 2.4222,
"step": 511000
},
{
"epoch": 2.34,
"learning_rate": 1.0948660373323557e-05,
"loss": 2.442,
"step": 511500
},
{
"epoch": 2.35,
"learning_rate": 1.0910472611737391e-05,
"loss": 2.451,
"step": 512000
},
{
"epoch": 2.35,
"learning_rate": 1.0872284850151223e-05,
"loss": 2.4269,
"step": 512500
},
{
"epoch": 2.35,
"learning_rate": 1.0834173464088228e-05,
"loss": 2.455,
"step": 513000
},
{
"epoch": 2.35,
"learning_rate": 1.0795985702502062e-05,
"loss": 2.4436,
"step": 513500
},
{
"epoch": 2.36,
"learning_rate": 1.0757874316439068e-05,
"loss": 2.4479,
"step": 514000
},
{
"epoch": 2.36,
"learning_rate": 1.0719686554852901e-05,
"loss": 2.448,
"step": 514500
},
{
"epoch": 2.36,
"learning_rate": 1.0681498793266734e-05,
"loss": 2.4465,
"step": 515000
},
{
"epoch": 2.36,
"learning_rate": 1.0643311031680567e-05,
"loss": 2.4565,
"step": 515500
},
{
"epoch": 2.36,
"learning_rate": 1.0605123270094401e-05,
"loss": 2.4629,
"step": 516000
},
{
"epoch": 2.37,
"learning_rate": 1.0566935508508233e-05,
"loss": 2.4651,
"step": 516500
},
{
"epoch": 2.37,
"learning_rate": 1.0528747746922067e-05,
"loss": 2.4393,
"step": 517000
},
{
"epoch": 2.37,
"learning_rate": 1.04905599853359e-05,
"loss": 2.4447,
"step": 517500
},
{
"epoch": 2.37,
"learning_rate": 1.0452372223749733e-05,
"loss": 2.4384,
"step": 518000
},
{
"epoch": 2.38,
"learning_rate": 1.0414260837686738e-05,
"loss": 2.4547,
"step": 518500
},
{
"epoch": 2.38,
"learning_rate": 1.0376073076100572e-05,
"loss": 2.4342,
"step": 519000
},
{
"epoch": 2.38,
"learning_rate": 1.0337885314514404e-05,
"loss": 2.4307,
"step": 519500
},
{
"epoch": 2.38,
"learning_rate": 1.0299697552928238e-05,
"loss": 2.4439,
"step": 520000
},
{
"epoch": 2.39,
"learning_rate": 1.026150979134207e-05,
"loss": 2.4509,
"step": 520500
},
{
"epoch": 2.39,
"learning_rate": 1.0223322029755904e-05,
"loss": 2.4485,
"step": 521000
},
{
"epoch": 2.39,
"learning_rate": 1.0185134268169736e-05,
"loss": 2.4511,
"step": 521500
},
{
"epoch": 2.39,
"learning_rate": 1.0146946506583572e-05,
"loss": 2.4464,
"step": 522000
},
{
"epoch": 2.39,
"learning_rate": 1.0108835120520577e-05,
"loss": 2.4437,
"step": 522500
},
{
"epoch": 2.4,
"learning_rate": 1.007064735893441e-05,
"loss": 2.4495,
"step": 523000
},
{
"epoch": 2.4,
"learning_rate": 1.0032535972871414e-05,
"loss": 2.4382,
"step": 523500
},
{
"epoch": 2.4,
"learning_rate": 9.994348211285248e-06,
"loss": 2.443,
"step": 524000
},
{
"epoch": 2.4,
"learning_rate": 9.956236825222254e-06,
"loss": 2.4442,
"step": 524500
},
{
"epoch": 2.41,
"learning_rate": 9.918049063636087e-06,
"loss": 2.4554,
"step": 525000
},
{
"epoch": 2.41,
"learning_rate": 9.87986130204992e-06,
"loss": 2.4317,
"step": 525500
},
{
"epoch": 2.41,
"learning_rate": 9.841673540463753e-06,
"loss": 2.4502,
"step": 526000
},
{
"epoch": 2.41,
"learning_rate": 9.803485778877586e-06,
"loss": 2.4515,
"step": 526500
},
{
"epoch": 2.41,
"learning_rate": 9.76529801729142e-06,
"loss": 2.4367,
"step": 527000
},
{
"epoch": 2.42,
"learning_rate": 9.727110255705252e-06,
"loss": 2.4285,
"step": 527500
},
{
"epoch": 2.42,
"learning_rate": 9.688922494119085e-06,
"loss": 2.4296,
"step": 528000
},
{
"epoch": 2.42,
"learning_rate": 9.650734732532918e-06,
"loss": 2.4533,
"step": 528500
},
{
"epoch": 2.42,
"learning_rate": 9.612623346469923e-06,
"loss": 2.427,
"step": 529000
},
{
"epoch": 2.43,
"learning_rate": 9.574435584883757e-06,
"loss": 2.4342,
"step": 529500
},
{
"epoch": 2.43,
"learning_rate": 9.536247823297589e-06,
"loss": 2.4367,
"step": 530000
},
{
"epoch": 2.43,
"learning_rate": 9.498060061711424e-06,
"loss": 2.4302,
"step": 530500
},
{
"epoch": 2.43,
"learning_rate": 9.45994867564843e-06,
"loss": 2.4319,
"step": 531000
},
{
"epoch": 2.44,
"learning_rate": 9.421760914062262e-06,
"loss": 2.4366,
"step": 531500
},
{
"epoch": 2.44,
"learning_rate": 9.383573152476095e-06,
"loss": 2.457,
"step": 532000
},
{
"epoch": 2.44,
"learning_rate": 9.345385390889928e-06,
"loss": 2.4379,
"step": 532500
},
{
"epoch": 2.44,
"learning_rate": 9.307197629303761e-06,
"loss": 2.4566,
"step": 533000
},
{
"epoch": 2.44,
"learning_rate": 9.269086243240767e-06,
"loss": 2.4534,
"step": 533500
},
{
"epoch": 2.45,
"learning_rate": 9.2308984816546e-06,
"loss": 2.4507,
"step": 534000
},
{
"epoch": 2.45,
"learning_rate": 9.192710720068433e-06,
"loss": 2.4391,
"step": 534500
},
{
"epoch": 2.45,
"learning_rate": 9.154599334005438e-06,
"loss": 2.4498,
"step": 535000
},
{
"epoch": 2.45,
"learning_rate": 9.116411572419272e-06,
"loss": 2.4481,
"step": 535500
},
{
"epoch": 2.46,
"learning_rate": 9.078223810833104e-06,
"loss": 2.4372,
"step": 536000
},
{
"epoch": 2.46,
"learning_rate": 9.040036049246938e-06,
"loss": 2.4461,
"step": 536500
},
{
"epoch": 2.46,
"learning_rate": 9.00184828766077e-06,
"loss": 2.4341,
"step": 537000
},
{
"epoch": 2.46,
"learning_rate": 8.963660526074604e-06,
"loss": 2.4365,
"step": 537500
},
{
"epoch": 2.47,
"learning_rate": 8.925472764488438e-06,
"loss": 2.4351,
"step": 538000
},
{
"epoch": 2.47,
"learning_rate": 8.887285002902271e-06,
"loss": 2.4408,
"step": 538500
},
{
"epoch": 2.47,
"learning_rate": 8.849173616839277e-06,
"loss": 2.4623,
"step": 539000
},
{
"epoch": 2.47,
"learning_rate": 8.810985855253109e-06,
"loss": 2.4498,
"step": 539500
},
{
"epoch": 2.47,
"learning_rate": 8.772798093666943e-06,
"loss": 2.4255,
"step": 540000
},
{
"epoch": 2.48,
"learning_rate": 8.734610332080775e-06,
"loss": 2.4218,
"step": 540500
},
{
"epoch": 2.48,
"learning_rate": 8.696422570494609e-06,
"loss": 2.4306,
"step": 541000
},
{
"epoch": 2.48,
"learning_rate": 8.658311184431614e-06,
"loss": 2.441,
"step": 541500
},
{
"epoch": 2.48,
"learning_rate": 8.620123422845448e-06,
"loss": 2.4526,
"step": 542000
},
{
"epoch": 2.49,
"learning_rate": 8.58193566125928e-06,
"loss": 2.4509,
"step": 542500
},
{
"epoch": 2.49,
"learning_rate": 8.543824275196285e-06,
"loss": 2.4365,
"step": 543000
},
{
"epoch": 2.49,
"learning_rate": 8.505636513610119e-06,
"loss": 2.4411,
"step": 543500
},
{
"epoch": 2.49,
"learning_rate": 8.467448752023951e-06,
"loss": 2.4501,
"step": 544000
},
{
"epoch": 2.5,
"learning_rate": 8.429260990437785e-06,
"loss": 2.4357,
"step": 544500
},
{
"epoch": 2.5,
"learning_rate": 8.391073228851617e-06,
"loss": 2.4511,
"step": 545000
},
{
"epoch": 2.5,
"learning_rate": 8.35288546726545e-06,
"loss": 2.4388,
"step": 545500
},
{
"epoch": 2.5,
"learning_rate": 8.314697705679285e-06,
"loss": 2.4441,
"step": 546000
},
{
"epoch": 2.5,
"learning_rate": 8.276509944093119e-06,
"loss": 2.4441,
"step": 546500
},
{
"epoch": 2.51,
"learning_rate": 8.23832218250695e-06,
"loss": 2.4389,
"step": 547000
},
{
"epoch": 2.51,
"learning_rate": 8.200134420920785e-06,
"loss": 2.4493,
"step": 547500
},
{
"epoch": 2.51,
"learning_rate": 8.161946659334617e-06,
"loss": 2.46,
"step": 548000
},
{
"epoch": 2.51,
"learning_rate": 8.12375889774845e-06,
"loss": 2.4401,
"step": 548500
},
{
"epoch": 2.52,
"learning_rate": 8.085571136162283e-06,
"loss": 2.4234,
"step": 549000
},
{
"epoch": 2.52,
"learning_rate": 8.047383374576117e-06,
"loss": 2.432,
"step": 549500
},
{
"epoch": 2.52,
"learning_rate": 8.009195612989949e-06,
"loss": 2.4419,
"step": 550000
},
{
"epoch": 2.52,
"learning_rate": 7.971007851403783e-06,
"loss": 2.4433,
"step": 550500
},
{
"epoch": 2.52,
"learning_rate": 7.932896465340788e-06,
"loss": 2.4406,
"step": 551000
},
{
"epoch": 2.53,
"learning_rate": 7.89470870375462e-06,
"loss": 2.4506,
"step": 551500
},
{
"epoch": 2.53,
"learning_rate": 7.856520942168455e-06,
"loss": 2.4455,
"step": 552000
},
{
"epoch": 2.53,
"learning_rate": 7.818409556105459e-06,
"loss": 2.426,
"step": 552500
},
{
"epoch": 2.53,
"learning_rate": 7.780221794519293e-06,
"loss": 2.4455,
"step": 553000
},
{
"epoch": 2.54,
"learning_rate": 7.742034032933127e-06,
"loss": 2.4256,
"step": 553500
},
{
"epoch": 2.54,
"learning_rate": 7.703846271346959e-06,
"loss": 2.4406,
"step": 554000
},
{
"epoch": 2.54,
"learning_rate": 7.665734885283966e-06,
"loss": 2.4354,
"step": 554500
},
{
"epoch": 2.54,
"learning_rate": 7.627547123697798e-06,
"loss": 2.4411,
"step": 555000
},
{
"epoch": 2.55,
"learning_rate": 7.589359362111631e-06,
"loss": 2.4288,
"step": 555500
},
{
"epoch": 2.55,
"learning_rate": 7.551171600525464e-06,
"loss": 2.4325,
"step": 556000
},
{
"epoch": 2.55,
"learning_rate": 7.512983838939297e-06,
"loss": 2.4231,
"step": 556500
},
{
"epoch": 2.55,
"learning_rate": 7.47479607735313e-06,
"loss": 2.4492,
"step": 557000
},
{
"epoch": 2.55,
"learning_rate": 7.436608315766963e-06,
"loss": 2.4552,
"step": 557500
},
{
"epoch": 2.56,
"learning_rate": 7.398420554180796e-06,
"loss": 2.4487,
"step": 558000
},
{
"epoch": 2.56,
"learning_rate": 7.360385543640974e-06,
"loss": 2.4361,
"step": 558500
},
{
"epoch": 2.56,
"learning_rate": 7.322197782054807e-06,
"loss": 2.4493,
"step": 559000
},
{
"epoch": 2.56,
"learning_rate": 7.28401002046864e-06,
"loss": 2.4353,
"step": 559500
},
{
"epoch": 2.57,
"learning_rate": 7.245822258882473e-06,
"loss": 2.4285,
"step": 560000
},
{
"epoch": 2.57,
"learning_rate": 7.207634497296306e-06,
"loss": 2.4285,
"step": 560500
},
{
"epoch": 2.57,
"learning_rate": 7.169446735710141e-06,
"loss": 2.4331,
"step": 561000
},
{
"epoch": 2.57,
"learning_rate": 7.131258974123974e-06,
"loss": 2.4412,
"step": 561500
},
{
"epoch": 2.58,
"learning_rate": 7.093071212537807e-06,
"loss": 2.4206,
"step": 562000
},
{
"epoch": 2.58,
"learning_rate": 7.05488345095164e-06,
"loss": 2.4278,
"step": 562500
},
{
"epoch": 2.58,
"learning_rate": 7.016695689365473e-06,
"loss": 2.4425,
"step": 563000
},
{
"epoch": 2.58,
"learning_rate": 6.978507927779306e-06,
"loss": 2.4253,
"step": 563500
},
{
"epoch": 2.58,
"learning_rate": 6.940320166193139e-06,
"loss": 2.4526,
"step": 564000
},
{
"epoch": 2.59,
"learning_rate": 6.902208780130144e-06,
"loss": 2.4563,
"step": 564500
},
{
"epoch": 2.59,
"learning_rate": 6.864021018543977e-06,
"loss": 2.4494,
"step": 565000
},
{
"epoch": 2.59,
"learning_rate": 6.82583325695781e-06,
"loss": 2.4169,
"step": 565500
},
{
"epoch": 2.59,
"learning_rate": 6.787645495371643e-06,
"loss": 2.4331,
"step": 566000
},
{
"epoch": 2.6,
"learning_rate": 6.749457733785476e-06,
"loss": 2.4448,
"step": 566500
},
{
"epoch": 2.6,
"learning_rate": 6.711346347722482e-06,
"loss": 2.4283,
"step": 567000
},
{
"epoch": 2.6,
"learning_rate": 6.673158586136315e-06,
"loss": 2.4315,
"step": 567500
},
{
"epoch": 2.6,
"learning_rate": 6.634970824550149e-06,
"loss": 2.4446,
"step": 568000
},
{
"epoch": 2.61,
"learning_rate": 6.596783062963982e-06,
"loss": 2.4255,
"step": 568500
},
{
"epoch": 2.61,
"learning_rate": 6.558671676900988e-06,
"loss": 2.4402,
"step": 569000
},
{
"epoch": 2.61,
"learning_rate": 6.520560290837993e-06,
"loss": 2.4345,
"step": 569500
},
{
"epoch": 2.61,
"learning_rate": 6.482372529251826e-06,
"loss": 2.4266,
"step": 570000
},
{
"epoch": 2.61,
"learning_rate": 6.444184767665659e-06,
"loss": 2.4297,
"step": 570500
},
{
"epoch": 2.62,
"learning_rate": 6.405997006079492e-06,
"loss": 2.4315,
"step": 571000
},
{
"epoch": 2.62,
"learning_rate": 6.367885620016498e-06,
"loss": 2.4302,
"step": 571500
},
{
"epoch": 2.62,
"learning_rate": 6.329697858430331e-06,
"loss": 2.428,
"step": 572000
},
{
"epoch": 2.62,
"learning_rate": 6.291510096844164e-06,
"loss": 2.4299,
"step": 572500
},
{
"epoch": 2.63,
"learning_rate": 6.253322335257997e-06,
"loss": 2.4304,
"step": 573000
},
{
"epoch": 2.63,
"learning_rate": 6.21513457367183e-06,
"loss": 2.452,
"step": 573500
},
{
"epoch": 2.63,
"learning_rate": 6.176946812085663e-06,
"loss": 2.4342,
"step": 574000
},
{
"epoch": 2.63,
"learning_rate": 6.138835426022668e-06,
"loss": 2.4262,
"step": 574500
},
{
"epoch": 2.63,
"learning_rate": 6.100647664436502e-06,
"loss": 2.446,
"step": 575000
},
{
"epoch": 2.64,
"learning_rate": 6.062536278373507e-06,
"loss": 2.4221,
"step": 575500
},
{
"epoch": 2.64,
"learning_rate": 6.02434851678734e-06,
"loss": 2.4428,
"step": 576000
},
{
"epoch": 2.64,
"learning_rate": 5.986160755201173e-06,
"loss": 2.4236,
"step": 576500
},
{
"epoch": 2.64,
"learning_rate": 5.947972993615006e-06,
"loss": 2.4389,
"step": 577000
},
{
"epoch": 2.65,
"learning_rate": 5.909785232028839e-06,
"loss": 2.4393,
"step": 577500
},
{
"epoch": 2.65,
"learning_rate": 5.871597470442672e-06,
"loss": 2.4377,
"step": 578000
},
{
"epoch": 2.65,
"learning_rate": 5.833409708856506e-06,
"loss": 2.4413,
"step": 578500
},
{
"epoch": 2.65,
"learning_rate": 5.795221947270339e-06,
"loss": 2.4346,
"step": 579000
},
{
"epoch": 2.66,
"learning_rate": 5.757034185684172e-06,
"loss": 2.4069,
"step": 579500
},
{
"epoch": 2.66,
"learning_rate": 5.718846424098005e-06,
"loss": 2.4508,
"step": 580000
},
{
"epoch": 2.66,
"learning_rate": 5.680658662511838e-06,
"loss": 2.435,
"step": 580500
},
{
"epoch": 2.66,
"learning_rate": 5.642470900925671e-06,
"loss": 2.4577,
"step": 581000
},
{
"epoch": 2.66,
"learning_rate": 5.604283139339505e-06,
"loss": 2.4099,
"step": 581500
},
{
"epoch": 2.67,
"learning_rate": 5.566095377753338e-06,
"loss": 2.436,
"step": 582000
},
{
"epoch": 2.67,
"learning_rate": 5.527983991690343e-06,
"loss": 2.4231,
"step": 582500
},
{
"epoch": 2.67,
"learning_rate": 5.489796230104176e-06,
"loss": 2.4254,
"step": 583000
},
{
"epoch": 2.67,
"learning_rate": 5.451608468518009e-06,
"loss": 2.448,
"step": 583500
},
{
"epoch": 2.68,
"learning_rate": 5.413497082455015e-06,
"loss": 2.4402,
"step": 584000
},
{
"epoch": 2.68,
"learning_rate": 5.375309320868848e-06,
"loss": 2.4291,
"step": 584500
},
{
"epoch": 2.68,
"learning_rate": 5.337121559282681e-06,
"loss": 2.4349,
"step": 585000
},
{
"epoch": 2.68,
"learning_rate": 5.298933797696515e-06,
"loss": 2.4311,
"step": 585500
},
{
"epoch": 2.69,
"learning_rate": 5.260746036110348e-06,
"loss": 2.4328,
"step": 586000
},
{
"epoch": 2.69,
"learning_rate": 5.222558274524181e-06,
"loss": 2.4363,
"step": 586500
},
{
"epoch": 2.69,
"learning_rate": 5.184370512938014e-06,
"loss": 2.4224,
"step": 587000
},
{
"epoch": 2.69,
"learning_rate": 5.146182751351847e-06,
"loss": 2.437,
"step": 587500
},
{
"epoch": 2.69,
"learning_rate": 5.10799498976568e-06,
"loss": 2.4392,
"step": 588000
},
{
"epoch": 2.7,
"learning_rate": 5.069883603702685e-06,
"loss": 2.4361,
"step": 588500
},
{
"epoch": 2.7,
"learning_rate": 5.031695842116519e-06,
"loss": 2.4345,
"step": 589000
},
{
"epoch": 2.7,
"learning_rate": 4.993508080530352e-06,
"loss": 2.4301,
"step": 589500
},
{
"epoch": 2.7,
"learning_rate": 4.955320318944185e-06,
"loss": 2.4188,
"step": 590000
},
{
"epoch": 2.71,
"learning_rate": 4.917132557358018e-06,
"loss": 2.4286,
"step": 590500
},
{
"epoch": 2.71,
"learning_rate": 4.879021171295023e-06,
"loss": 2.4451,
"step": 591000
},
{
"epoch": 2.71,
"learning_rate": 4.840833409708856e-06,
"loss": 2.4475,
"step": 591500
},
{
"epoch": 2.71,
"learning_rate": 4.802645648122689e-06,
"loss": 2.4494,
"step": 592000
},
{
"epoch": 2.72,
"learning_rate": 4.764457886536523e-06,
"loss": 2.4347,
"step": 592500
},
{
"epoch": 2.72,
"learning_rate": 4.726270124950356e-06,
"loss": 2.4342,
"step": 593000
},
{
"epoch": 2.72,
"learning_rate": 4.688158738887362e-06,
"loss": 2.4301,
"step": 593500
},
{
"epoch": 2.72,
"learning_rate": 4.649970977301195e-06,
"loss": 2.435,
"step": 594000
},
{
"epoch": 2.72,
"learning_rate": 4.611783215715028e-06,
"loss": 2.4211,
"step": 594500
},
{
"epoch": 2.73,
"learning_rate": 4.573595454128861e-06,
"loss": 2.4424,
"step": 595000
},
{
"epoch": 2.73,
"learning_rate": 4.535407692542694e-06,
"loss": 2.4131,
"step": 595500
},
{
"epoch": 2.73,
"learning_rate": 4.497219930956527e-06,
"loss": 2.4284,
"step": 596000
},
{
"epoch": 2.73,
"learning_rate": 4.459108544893532e-06,
"loss": 2.4395,
"step": 596500
},
{
"epoch": 2.74,
"learning_rate": 4.420997158830538e-06,
"loss": 2.4276,
"step": 597000
},
{
"epoch": 2.74,
"learning_rate": 4.3828093972443714e-06,
"loss": 2.4365,
"step": 597500
},
{
"epoch": 2.74,
"learning_rate": 4.3446216356582044e-06,
"loss": 2.4223,
"step": 598000
},
{
"epoch": 2.74,
"learning_rate": 4.3064338740720374e-06,
"loss": 2.4116,
"step": 598500
},
{
"epoch": 2.74,
"learning_rate": 4.2682461124858704e-06,
"loss": 2.4536,
"step": 599000
},
{
"epoch": 2.75,
"learning_rate": 4.2300583508997034e-06,
"loss": 2.4433,
"step": 599500
},
{
"epoch": 2.75,
"learning_rate": 4.1918705893135364e-06,
"loss": 2.4403,
"step": 600000
},
{
"epoch": 2.75,
"learning_rate": 4.15368282772737e-06,
"loss": 2.4414,
"step": 600500
},
{
"epoch": 2.75,
"learning_rate": 4.115495066141203e-06,
"loss": 2.4472,
"step": 601000
},
{
"epoch": 2.76,
"learning_rate": 4.077383680078209e-06,
"loss": 2.4324,
"step": 601500
},
{
"epoch": 2.76,
"learning_rate": 4.039195918492042e-06,
"loss": 2.4367,
"step": 602000
},
{
"epoch": 2.76,
"learning_rate": 4.001008156905875e-06,
"loss": 2.4215,
"step": 602500
},
{
"epoch": 2.76,
"learning_rate": 3.962820395319708e-06,
"loss": 2.4327,
"step": 603000
},
{
"epoch": 2.77,
"learning_rate": 3.924632633733541e-06,
"loss": 2.4284,
"step": 603500
},
{
"epoch": 2.77,
"learning_rate": 3.886444872147374e-06,
"loss": 2.4257,
"step": 604000
},
{
"epoch": 2.77,
"learning_rate": 3.84833348608438e-06,
"loss": 2.4366,
"step": 604500
},
{
"epoch": 2.77,
"learning_rate": 3.8101457244982134e-06,
"loss": 2.4546,
"step": 605000
},
{
"epoch": 2.77,
"learning_rate": 3.7719579629120464e-06,
"loss": 2.4376,
"step": 605500
},
{
"epoch": 2.78,
"learning_rate": 3.7337702013258794e-06,
"loss": 2.4267,
"step": 606000
},
{
"epoch": 2.78,
"learning_rate": 3.6956588152628845e-06,
"loss": 2.4348,
"step": 606500
},
{
"epoch": 2.78,
"learning_rate": 3.6574710536767175e-06,
"loss": 2.4243,
"step": 607000
},
{
"epoch": 2.78,
"learning_rate": 3.6192832920905505e-06,
"loss": 2.4352,
"step": 607500
},
{
"epoch": 2.79,
"learning_rate": 3.5810955305043844e-06,
"loss": 2.424,
"step": 608000
},
{
"epoch": 2.79,
"learning_rate": 3.542984144441389e-06,
"loss": 2.4384,
"step": 608500
},
{
"epoch": 2.79,
"learning_rate": 3.504796382855223e-06,
"loss": 2.4168,
"step": 609000
},
{
"epoch": 2.79,
"learning_rate": 3.466608621269056e-06,
"loss": 2.4274,
"step": 609500
},
{
"epoch": 2.8,
"learning_rate": 3.4284972352060616e-06,
"loss": 2.4322,
"step": 610000
},
{
"epoch": 2.8,
"learning_rate": 3.3903094736198946e-06,
"loss": 2.4362,
"step": 610500
},
{
"epoch": 2.8,
"learning_rate": 3.3521217120337276e-06,
"loss": 2.4174,
"step": 611000
},
{
"epoch": 2.8,
"learning_rate": 3.3139339504475606e-06,
"loss": 2.4281,
"step": 611500
},
{
"epoch": 2.8,
"learning_rate": 3.2757461888613936e-06,
"loss": 2.4349,
"step": 612000
},
{
"epoch": 2.81,
"learning_rate": 3.2375584272752275e-06,
"loss": 2.4416,
"step": 612500
},
{
"epoch": 2.81,
"learning_rate": 3.1993706656890605e-06,
"loss": 2.4361,
"step": 613000
},
{
"epoch": 2.81,
"learning_rate": 3.1611829041028935e-06,
"loss": 2.4214,
"step": 613500
},
{
"epoch": 2.81,
"learning_rate": 3.1229951425167265e-06,
"loss": 2.4186,
"step": 614000
},
{
"epoch": 2.82,
"learning_rate": 3.0848073809305595e-06,
"loss": 2.4362,
"step": 614500
},
{
"epoch": 2.82,
"learning_rate": 3.0466196193443925e-06,
"loss": 2.436,
"step": 615000
},
{
"epoch": 2.82,
"learning_rate": 3.0084318577582255e-06,
"loss": 2.4341,
"step": 615500
},
{
"epoch": 2.82,
"learning_rate": 2.970244096172059e-06,
"loss": 2.4275,
"step": 616000
},
{
"epoch": 2.83,
"learning_rate": 2.9321327101090645e-06,
"loss": 2.4447,
"step": 616500
},
{
"epoch": 2.83,
"learning_rate": 2.8939449485228975e-06,
"loss": 2.4238,
"step": 617000
},
{
"epoch": 2.83,
"learning_rate": 2.855833562459903e-06,
"loss": 2.4418,
"step": 617500
},
{
"epoch": 2.83,
"learning_rate": 2.8177221763969087e-06,
"loss": 2.4255,
"step": 618000
},
{
"epoch": 2.83,
"learning_rate": 2.7795344148107417e-06,
"loss": 2.4169,
"step": 618500
},
{
"epoch": 2.84,
"learning_rate": 2.7413466532245747e-06,
"loss": 2.4201,
"step": 619000
},
{
"epoch": 2.84,
"learning_rate": 2.703158891638408e-06,
"loss": 2.4145,
"step": 619500
},
{
"epoch": 2.84,
"learning_rate": 2.664971130052241e-06,
"loss": 2.4464,
"step": 620000
},
{
"epoch": 2.84,
"learning_rate": 2.626783368466074e-06,
"loss": 2.4336,
"step": 620500
},
{
"epoch": 2.85,
"learning_rate": 2.588595606879907e-06,
"loss": 2.4021,
"step": 621000
},
{
"epoch": 2.85,
"learning_rate": 2.55040784529374e-06,
"loss": 2.4143,
"step": 621500
},
{
"epoch": 2.85,
"learning_rate": 2.512220083707573e-06,
"loss": 2.4113,
"step": 622000
},
{
"epoch": 2.85,
"learning_rate": 2.4740323221214066e-06,
"loss": 2.4401,
"step": 622500
},
{
"epoch": 2.85,
"learning_rate": 2.435920936058412e-06,
"loss": 2.4253,
"step": 623000
},
{
"epoch": 2.86,
"learning_rate": 2.397733174472245e-06,
"loss": 2.4336,
"step": 623500
},
{
"epoch": 2.86,
"learning_rate": 2.359545412886078e-06,
"loss": 2.4242,
"step": 624000
},
{
"epoch": 2.86,
"learning_rate": 2.3213576512999116e-06,
"loss": 2.4268,
"step": 624500
},
{
"epoch": 2.86,
"learning_rate": 2.2831698897137446e-06,
"loss": 2.4487,
"step": 625000
},
{
"epoch": 2.87,
"learning_rate": 2.2449821281275776e-06,
"loss": 2.4442,
"step": 625500
},
{
"epoch": 2.87,
"learning_rate": 2.2068707420645833e-06,
"loss": 2.4219,
"step": 626000
},
{
"epoch": 2.87,
"learning_rate": 2.1686829804784167e-06,
"loss": 2.438,
"step": 626500
},
{
"epoch": 2.87,
"learning_rate": 2.1304952188922497e-06,
"loss": 2.4211,
"step": 627000
},
{
"epoch": 2.88,
"learning_rate": 2.0923074573060827e-06,
"loss": 2.4224,
"step": 627500
},
{
"epoch": 2.88,
"learning_rate": 2.054196071243088e-06,
"loss": 2.4236,
"step": 628000
},
{
"epoch": 2.88,
"learning_rate": 2.0160083096569213e-06,
"loss": 2.4088,
"step": 628500
},
{
"epoch": 2.88,
"learning_rate": 1.9778205480707543e-06,
"loss": 2.4216,
"step": 629000
},
{
"epoch": 2.88,
"learning_rate": 1.93970916200776e-06,
"loss": 2.434,
"step": 629500
},
{
"epoch": 2.89,
"learning_rate": 1.901521400421593e-06,
"loss": 2.4511,
"step": 630000
},
{
"epoch": 2.89,
"learning_rate": 1.863333638835426e-06,
"loss": 2.4358,
"step": 630500
},
{
"epoch": 2.89,
"learning_rate": 1.8251458772492593e-06,
"loss": 2.4467,
"step": 631000
},
{
"epoch": 2.89,
"learning_rate": 1.7869581156630923e-06,
"loss": 2.425,
"step": 631500
},
{
"epoch": 2.9,
"learning_rate": 1.7487703540769253e-06,
"loss": 2.4136,
"step": 632000
},
{
"epoch": 2.9,
"learning_rate": 1.7105825924907588e-06,
"loss": 2.4209,
"step": 632500
},
{
"epoch": 2.9,
"learning_rate": 1.6723948309045918e-06,
"loss": 2.4297,
"step": 633000
},
{
"epoch": 2.9,
"learning_rate": 1.634207069318425e-06,
"loss": 2.4426,
"step": 633500
},
{
"epoch": 2.91,
"learning_rate": 1.5960956832554302e-06,
"loss": 2.4172,
"step": 634000
},
{
"epoch": 2.91,
"learning_rate": 1.5579079216692636e-06,
"loss": 2.4378,
"step": 634500
},
{
"epoch": 2.91,
"learning_rate": 1.519796535606269e-06,
"loss": 2.4301,
"step": 635000
},
{
"epoch": 2.91,
"learning_rate": 1.4816087740201022e-06,
"loss": 2.4116,
"step": 635500
},
{
"epoch": 2.91,
"learning_rate": 1.4434210124339354e-06,
"loss": 2.4258,
"step": 636000
},
{
"epoch": 2.92,
"learning_rate": 1.4052332508477684e-06,
"loss": 2.4371,
"step": 636500
},
{
"epoch": 2.92,
"learning_rate": 1.3670454892616014e-06,
"loss": 2.4201,
"step": 637000
},
{
"epoch": 2.92,
"learning_rate": 1.3288577276754346e-06,
"loss": 2.4293,
"step": 637500
},
{
"epoch": 2.92,
"learning_rate": 1.2906699660892676e-06,
"loss": 2.4312,
"step": 638000
},
{
"epoch": 2.93,
"learning_rate": 1.2524822045031008e-06,
"loss": 2.4226,
"step": 638500
},
{
"epoch": 2.93,
"learning_rate": 1.2143708184401065e-06,
"loss": 2.433,
"step": 639000
},
{
"epoch": 2.93,
"learning_rate": 1.1762594323771119e-06,
"loss": 2.4163,
"step": 639500
},
{
"epoch": 2.93,
"learning_rate": 1.1380716707909449e-06,
"loss": 2.4187,
"step": 640000
},
{
"epoch": 2.94,
"learning_rate": 1.099883909204778e-06,
"loss": 2.4133,
"step": 640500
},
{
"epoch": 2.94,
"learning_rate": 1.0616961476186113e-06,
"loss": 2.4094,
"step": 641000
},
{
"epoch": 2.94,
"learning_rate": 1.0235083860324443e-06,
"loss": 2.4239,
"step": 641500
},
{
"epoch": 2.94,
"learning_rate": 9.853206244462775e-07,
"loss": 2.4384,
"step": 642000
},
{
"epoch": 2.94,
"learning_rate": 9.471328628601107e-07,
"loss": 2.4324,
"step": 642500
},
{
"epoch": 2.95,
"learning_rate": 9.09021476797116e-07,
"loss": 2.4286,
"step": 643000
},
{
"epoch": 2.95,
"learning_rate": 8.708337152109492e-07,
"loss": 2.4441,
"step": 643500
},
{
"epoch": 2.95,
"learning_rate": 8.326459536247824e-07,
"loss": 2.4167,
"step": 644000
},
{
"epoch": 2.95,
"learning_rate": 7.944581920386155e-07,
"loss": 2.4394,
"step": 644500
},
{
"epoch": 2.96,
"learning_rate": 7.562704304524486e-07,
"loss": 2.4224,
"step": 645000
},
{
"epoch": 2.96,
"learning_rate": 7.180826688662817e-07,
"loss": 2.4448,
"step": 645500
},
{
"epoch": 2.96,
"learning_rate": 6.79894907280115e-07,
"loss": 2.4181,
"step": 646000
},
{
"epoch": 2.96,
"learning_rate": 6.417071456939481e-07,
"loss": 2.4207,
"step": 646500
},
{
"epoch": 2.96,
"learning_rate": 6.035193841077812e-07,
"loss": 2.4257,
"step": 647000
},
{
"epoch": 2.97,
"learning_rate": 5.653316225216143e-07,
"loss": 2.4369,
"step": 647500
},
{
"epoch": 2.97,
"learning_rate": 5.272202364586198e-07,
"loss": 2.4371,
"step": 648000
},
{
"epoch": 2.97,
"learning_rate": 4.890324748724529e-07,
"loss": 2.4236,
"step": 648500
},
{
"epoch": 2.97,
"learning_rate": 4.5084471328628605e-07,
"loss": 2.43,
"step": 649000
},
{
"epoch": 2.98,
"learning_rate": 4.126569517001191e-07,
"loss": 2.4313,
"step": 649500
},
{
"epoch": 2.98,
"learning_rate": 3.744691901139523e-07,
"loss": 2.4409,
"step": 650000
},
{
"epoch": 2.98,
"learning_rate": 3.3635780405095777e-07,
"loss": 2.4284,
"step": 650500
},
{
"epoch": 2.98,
"learning_rate": 2.981700424647909e-07,
"loss": 2.4234,
"step": 651000
},
{
"epoch": 2.99,
"learning_rate": 2.5998228087862404e-07,
"loss": 2.4511,
"step": 651500
},
{
"epoch": 2.99,
"learning_rate": 2.2179451929245717e-07,
"loss": 2.4348,
"step": 652000
},
{
"epoch": 2.99,
"learning_rate": 1.8368313322946263e-07,
"loss": 2.4319,
"step": 652500
},
{
"epoch": 2.99,
"learning_rate": 1.4549537164329576e-07,
"loss": 2.4315,
"step": 653000
},
{
"epoch": 2.99,
"learning_rate": 1.073076100571289e-07,
"loss": 2.4327,
"step": 653500
},
{
"epoch": 3.0,
"learning_rate": 6.911984847096204e-08,
"loss": 2.4033,
"step": 654000
},
{
"epoch": 3.0,
"learning_rate": 3.093208688479516e-08,
"loss": 2.4401,
"step": 654500
},
{
"epoch": 3.0,
"step": 654660,
"total_flos": 9.727742291283542e+18,
"train_loss": 2.543785205228415,
"train_runtime": 2023217.6227,
"train_samples_per_second": 2.589,
"train_steps_per_second": 0.324
}
],
"max_steps": 654660,
"num_train_epochs": 3,
"total_flos": 9.727742291283542e+18,
"trial_name": null,
"trial_params": null
}