ALR_BERT / trainer_state.json
dragosnicolae555's picture
upload all files
4ecd0e8
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.079355858472596,
"global_step": 900000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 2.2222222222222223e-05,
"loss": 10.6946,
"step": 2000
},
{
"epoch": 0.03,
"learning_rate": 4.4444444444444447e-05,
"loss": 10.5675,
"step": 4000
},
{
"epoch": 0.04,
"learning_rate": 6.666666666666667e-05,
"loss": 10.4211,
"step": 6000
},
{
"epoch": 0.05,
"learning_rate": 8.888888888888889e-05,
"loss": 10.2041,
"step": 8000
},
{
"epoch": 0.07,
"learning_rate": 9.988776655443322e-05,
"loss": 9.8916,
"step": 10000
},
{
"epoch": 0.08,
"learning_rate": 9.966329966329967e-05,
"loss": 9.4948,
"step": 12000
},
{
"epoch": 0.09,
"learning_rate": 9.943883277216612e-05,
"loss": 9.0658,
"step": 14000
},
{
"epoch": 0.11,
"learning_rate": 9.921436588103256e-05,
"loss": 8.6284,
"step": 16000
},
{
"epoch": 0.12,
"learning_rate": 9.8989898989899e-05,
"loss": 8.1946,
"step": 18000
},
{
"epoch": 0.14,
"learning_rate": 9.876543209876543e-05,
"loss": 7.801,
"step": 20000
},
{
"epoch": 0.15,
"learning_rate": 9.854096520763188e-05,
"loss": 7.4541,
"step": 22000
},
{
"epoch": 0.16,
"learning_rate": 9.831649831649832e-05,
"loss": 7.283,
"step": 24000
},
{
"epoch": 0.18,
"learning_rate": 9.809203142536476e-05,
"loss": 7.2067,
"step": 26000
},
{
"epoch": 0.19,
"learning_rate": 9.78675645342312e-05,
"loss": 7.1146,
"step": 28000
},
{
"epoch": 0.2,
"learning_rate": 9.764309764309764e-05,
"loss": 7.0541,
"step": 30000
},
{
"epoch": 0.22,
"learning_rate": 9.74186307519641e-05,
"loss": 6.9996,
"step": 32000
},
{
"epoch": 0.23,
"learning_rate": 9.719416386083054e-05,
"loss": 6.967,
"step": 34000
},
{
"epoch": 0.24,
"learning_rate": 9.696969696969698e-05,
"loss": 6.9272,
"step": 36000
},
{
"epoch": 0.26,
"learning_rate": 9.674523007856342e-05,
"loss": 6.9136,
"step": 38000
},
{
"epoch": 0.27,
"learning_rate": 9.652076318742987e-05,
"loss": 6.8708,
"step": 40000
},
{
"epoch": 0.28,
"learning_rate": 9.62962962962963e-05,
"loss": 6.8544,
"step": 42000
},
{
"epoch": 0.3,
"learning_rate": 9.607182940516274e-05,
"loss": 6.8332,
"step": 44000
},
{
"epoch": 0.31,
"learning_rate": 9.584736251402918e-05,
"loss": 6.8299,
"step": 46000
},
{
"epoch": 0.32,
"learning_rate": 9.562289562289563e-05,
"loss": 6.806,
"step": 48000
},
{
"epoch": 0.34,
"learning_rate": 9.539842873176207e-05,
"loss": 6.7812,
"step": 50000
},
{
"epoch": 0.35,
"learning_rate": 9.517396184062851e-05,
"loss": 6.7821,
"step": 52000
},
{
"epoch": 0.36,
"learning_rate": 9.494949494949495e-05,
"loss": 6.756,
"step": 54000
},
{
"epoch": 0.38,
"learning_rate": 9.472502805836139e-05,
"loss": 6.7547,
"step": 56000
},
{
"epoch": 0.39,
"learning_rate": 9.450056116722785e-05,
"loss": 6.7199,
"step": 58000
},
{
"epoch": 0.41,
"learning_rate": 9.427609427609429e-05,
"loss": 6.6967,
"step": 60000
},
{
"epoch": 0.42,
"learning_rate": 9.405162738496073e-05,
"loss": 6.6726,
"step": 62000
},
{
"epoch": 0.43,
"learning_rate": 9.382716049382717e-05,
"loss": 6.6519,
"step": 64000
},
{
"epoch": 0.45,
"learning_rate": 9.36026936026936e-05,
"loss": 6.6244,
"step": 66000
},
{
"epoch": 0.46,
"learning_rate": 9.337822671156005e-05,
"loss": 6.5836,
"step": 68000
},
{
"epoch": 0.47,
"learning_rate": 9.315375982042649e-05,
"loss": 6.5503,
"step": 70000
},
{
"epoch": 0.49,
"learning_rate": 9.292929292929293e-05,
"loss": 6.5117,
"step": 72000
},
{
"epoch": 0.5,
"learning_rate": 9.270482603815938e-05,
"loss": 6.4852,
"step": 74000
},
{
"epoch": 0.51,
"learning_rate": 9.248035914702582e-05,
"loss": 6.4517,
"step": 76000
},
{
"epoch": 0.53,
"learning_rate": 9.225589225589226e-05,
"loss": 6.405,
"step": 78000
},
{
"epoch": 0.54,
"learning_rate": 9.20314253647587e-05,
"loss": 6.3787,
"step": 80000
},
{
"epoch": 0.55,
"learning_rate": 9.180695847362514e-05,
"loss": 6.3171,
"step": 82000
},
{
"epoch": 0.57,
"learning_rate": 9.15824915824916e-05,
"loss": 6.2806,
"step": 84000
},
{
"epoch": 0.58,
"learning_rate": 9.135802469135802e-05,
"loss": 6.3758,
"step": 86000
},
{
"epoch": 0.59,
"learning_rate": 9.113355780022446e-05,
"loss": 6.3753,
"step": 88000
},
{
"epoch": 0.61,
"learning_rate": 9.090909090909092e-05,
"loss": 6.2428,
"step": 90000
},
{
"epoch": 0.62,
"learning_rate": 9.068462401795736e-05,
"loss": 5.9605,
"step": 92000
},
{
"epoch": 0.63,
"learning_rate": 9.04601571268238e-05,
"loss": 5.774,
"step": 94000
},
{
"epoch": 0.65,
"learning_rate": 9.023569023569024e-05,
"loss": 5.8005,
"step": 96000
},
{
"epoch": 0.66,
"learning_rate": 9.001122334455668e-05,
"loss": 5.8735,
"step": 98000
},
{
"epoch": 0.68,
"learning_rate": 8.978675645342313e-05,
"loss": 5.8452,
"step": 100000
},
{
"epoch": 0.69,
"learning_rate": 8.956228956228957e-05,
"loss": 5.7882,
"step": 102000
},
{
"epoch": 0.7,
"learning_rate": 8.933782267115601e-05,
"loss": 6.0639,
"step": 104000
},
{
"epoch": 0.72,
"learning_rate": 8.911335578002245e-05,
"loss": 6.0042,
"step": 106000
},
{
"epoch": 0.73,
"learning_rate": 8.888888888888889e-05,
"loss": 5.9511,
"step": 108000
},
{
"epoch": 0.74,
"learning_rate": 8.866442199775533e-05,
"loss": 5.8909,
"step": 110000
},
{
"epoch": 0.76,
"learning_rate": 8.843995510662177e-05,
"loss": 5.8925,
"step": 112000
},
{
"epoch": 0.77,
"learning_rate": 8.821548821548821e-05,
"loss": 5.9042,
"step": 114000
},
{
"epoch": 0.78,
"learning_rate": 8.799102132435467e-05,
"loss": 5.9797,
"step": 116000
},
{
"epoch": 0.8,
"learning_rate": 8.776655443322111e-05,
"loss": 6.1174,
"step": 118000
},
{
"epoch": 0.81,
"learning_rate": 8.754208754208755e-05,
"loss": 6.2284,
"step": 120000
},
{
"epoch": 0.82,
"learning_rate": 8.731762065095399e-05,
"loss": 6.1204,
"step": 122000
},
{
"epoch": 0.84,
"learning_rate": 8.709315375982043e-05,
"loss": 5.6589,
"step": 124000
},
{
"epoch": 0.85,
"learning_rate": 8.686868686868688e-05,
"loss": 5.7295,
"step": 126000
},
{
"epoch": 0.86,
"learning_rate": 8.664421997755332e-05,
"loss": 5.7535,
"step": 128000
},
{
"epoch": 0.88,
"learning_rate": 8.641975308641975e-05,
"loss": 5.6045,
"step": 130000
},
{
"epoch": 0.89,
"learning_rate": 8.619528619528619e-05,
"loss": 5.6091,
"step": 132000
},
{
"epoch": 0.91,
"learning_rate": 8.597081930415264e-05,
"loss": 5.5933,
"step": 134000
},
{
"epoch": 0.92,
"learning_rate": 8.574635241301908e-05,
"loss": 5.6344,
"step": 136000
},
{
"epoch": 0.93,
"learning_rate": 8.552188552188552e-05,
"loss": 5.6063,
"step": 138000
},
{
"epoch": 0.95,
"learning_rate": 8.529741863075196e-05,
"loss": 5.4933,
"step": 140000
},
{
"epoch": 0.96,
"learning_rate": 8.507295173961842e-05,
"loss": 5.5386,
"step": 142000
},
{
"epoch": 0.97,
"learning_rate": 8.484848484848486e-05,
"loss": 5.5085,
"step": 144000
},
{
"epoch": 0.99,
"learning_rate": 8.46240179573513e-05,
"loss": 5.4098,
"step": 146000
},
{
"epoch": 1.0,
"learning_rate": 8.439955106621774e-05,
"loss": 5.0913,
"step": 148000
},
{
"epoch": 1.01,
"learning_rate": 8.417508417508418e-05,
"loss": 5.6115,
"step": 150000
},
{
"epoch": 1.03,
"learning_rate": 8.395061728395062e-05,
"loss": 5.5311,
"step": 152000
},
{
"epoch": 1.04,
"learning_rate": 8.372615039281706e-05,
"loss": 5.4583,
"step": 154000
},
{
"epoch": 1.05,
"learning_rate": 8.35016835016835e-05,
"loss": 5.4188,
"step": 156000
},
{
"epoch": 1.07,
"learning_rate": 8.327721661054994e-05,
"loss": 5.3814,
"step": 158000
},
{
"epoch": 1.08,
"learning_rate": 8.30527497194164e-05,
"loss": 5.3292,
"step": 160000
},
{
"epoch": 1.09,
"learning_rate": 8.282828282828283e-05,
"loss": 5.291,
"step": 162000
},
{
"epoch": 1.11,
"learning_rate": 8.260381593714927e-05,
"loss": 5.2601,
"step": 164000
},
{
"epoch": 1.12,
"learning_rate": 8.237934904601572e-05,
"loss": 5.2253,
"step": 166000
},
{
"epoch": 1.13,
"learning_rate": 8.215488215488217e-05,
"loss": 5.2076,
"step": 168000
},
{
"epoch": 1.15,
"learning_rate": 8.193041526374861e-05,
"loss": 5.1529,
"step": 170000
},
{
"epoch": 1.16,
"learning_rate": 8.170594837261504e-05,
"loss": 5.1493,
"step": 172000
},
{
"epoch": 1.18,
"learning_rate": 8.148148148148148e-05,
"loss": 5.1449,
"step": 174000
},
{
"epoch": 1.19,
"learning_rate": 8.125701459034793e-05,
"loss": 5.0768,
"step": 176000
},
{
"epoch": 1.2,
"learning_rate": 8.103254769921437e-05,
"loss": 5.0558,
"step": 178000
},
{
"epoch": 1.22,
"learning_rate": 8.080808080808081e-05,
"loss": 5.0232,
"step": 180000
},
{
"epoch": 1.23,
"learning_rate": 8.058361391694725e-05,
"loss": 5.0037,
"step": 182000
},
{
"epoch": 1.24,
"learning_rate": 8.035914702581369e-05,
"loss": 4.9835,
"step": 184000
},
{
"epoch": 1.26,
"learning_rate": 8.013468013468015e-05,
"loss": 4.9832,
"step": 186000
},
{
"epoch": 1.27,
"learning_rate": 7.991021324354659e-05,
"loss": 4.9235,
"step": 188000
},
{
"epoch": 1.28,
"learning_rate": 7.968574635241303e-05,
"loss": 4.9121,
"step": 190000
},
{
"epoch": 1.3,
"learning_rate": 7.946127946127947e-05,
"loss": 4.8973,
"step": 192000
},
{
"epoch": 1.31,
"learning_rate": 7.92368125701459e-05,
"loss": 4.8992,
"step": 194000
},
{
"epoch": 1.32,
"learning_rate": 7.901234567901235e-05,
"loss": 4.8595,
"step": 196000
},
{
"epoch": 1.34,
"learning_rate": 7.878787878787879e-05,
"loss": 4.84,
"step": 198000
},
{
"epoch": 1.35,
"learning_rate": 7.856341189674523e-05,
"loss": 4.8424,
"step": 200000
},
{
"epoch": 1.36,
"learning_rate": 7.833894500561168e-05,
"loss": 4.8017,
"step": 202000
},
{
"epoch": 1.38,
"learning_rate": 7.811447811447812e-05,
"loss": 4.8083,
"step": 204000
},
{
"epoch": 1.39,
"learning_rate": 7.789001122334456e-05,
"loss": 4.7732,
"step": 206000
},
{
"epoch": 1.41,
"learning_rate": 7.7665544332211e-05,
"loss": 4.7528,
"step": 208000
},
{
"epoch": 1.42,
"learning_rate": 7.744107744107744e-05,
"loss": 4.7499,
"step": 210000
},
{
"epoch": 1.43,
"learning_rate": 7.72166105499439e-05,
"loss": 4.7275,
"step": 212000
},
{
"epoch": 1.45,
"learning_rate": 7.699214365881034e-05,
"loss": 4.7118,
"step": 214000
},
{
"epoch": 1.46,
"learning_rate": 7.676767676767676e-05,
"loss": 4.6839,
"step": 216000
},
{
"epoch": 1.47,
"learning_rate": 7.65432098765432e-05,
"loss": 4.6654,
"step": 218000
},
{
"epoch": 1.49,
"learning_rate": 7.631874298540966e-05,
"loss": 4.6509,
"step": 220000
},
{
"epoch": 1.5,
"learning_rate": 7.60942760942761e-05,
"loss": 4.6436,
"step": 222000
},
{
"epoch": 1.51,
"learning_rate": 7.586980920314254e-05,
"loss": 4.6321,
"step": 224000
},
{
"epoch": 1.53,
"learning_rate": 7.564534231200898e-05,
"loss": 4.6257,
"step": 226000
},
{
"epoch": 1.54,
"learning_rate": 7.542087542087543e-05,
"loss": 4.6259,
"step": 228000
},
{
"epoch": 1.55,
"learning_rate": 7.519640852974187e-05,
"loss": 4.5876,
"step": 230000
},
{
"epoch": 1.57,
"learning_rate": 7.497194163860831e-05,
"loss": 4.5918,
"step": 232000
},
{
"epoch": 1.58,
"learning_rate": 7.474747474747475e-05,
"loss": 4.7349,
"step": 234000
},
{
"epoch": 1.59,
"learning_rate": 7.452300785634119e-05,
"loss": 4.7906,
"step": 236000
},
{
"epoch": 1.61,
"learning_rate": 7.429854096520763e-05,
"loss": 4.5217,
"step": 238000
},
{
"epoch": 1.62,
"learning_rate": 7.407407407407407e-05,
"loss": 4.2357,
"step": 240000
},
{
"epoch": 1.63,
"learning_rate": 7.384960718294051e-05,
"loss": 4.1207,
"step": 242000
},
{
"epoch": 1.65,
"learning_rate": 7.362514029180695e-05,
"loss": 4.1877,
"step": 244000
},
{
"epoch": 1.66,
"learning_rate": 7.340067340067341e-05,
"loss": 4.4264,
"step": 246000
},
{
"epoch": 1.68,
"learning_rate": 7.317620650953985e-05,
"loss": 4.3763,
"step": 248000
},
{
"epoch": 1.69,
"learning_rate": 7.295173961840629e-05,
"loss": 4.3764,
"step": 250000
},
{
"epoch": 1.7,
"learning_rate": 7.272727272727273e-05,
"loss": 4.6481,
"step": 252000
},
{
"epoch": 1.72,
"learning_rate": 7.250280583613918e-05,
"loss": 4.6296,
"step": 254000
},
{
"epoch": 1.73,
"learning_rate": 7.227833894500562e-05,
"loss": 4.6029,
"step": 256000
},
{
"epoch": 1.74,
"learning_rate": 7.205387205387206e-05,
"loss": 4.5606,
"step": 258000
},
{
"epoch": 1.76,
"learning_rate": 7.182940516273849e-05,
"loss": 4.6015,
"step": 260000
},
{
"epoch": 1.77,
"learning_rate": 7.160493827160494e-05,
"loss": 4.6519,
"step": 262000
},
{
"epoch": 1.78,
"learning_rate": 7.138047138047138e-05,
"loss": 4.7609,
"step": 264000
},
{
"epoch": 1.8,
"learning_rate": 7.115600448933782e-05,
"loss": 4.9615,
"step": 266000
},
{
"epoch": 1.81,
"learning_rate": 7.093153759820426e-05,
"loss": 5.1392,
"step": 268000
},
{
"epoch": 1.82,
"learning_rate": 7.07070707070707e-05,
"loss": 5.0403,
"step": 270000
},
{
"epoch": 1.84,
"learning_rate": 7.048260381593716e-05,
"loss": 4.3987,
"step": 272000
},
{
"epoch": 1.85,
"learning_rate": 7.02581369248036e-05,
"loss": 4.6271,
"step": 274000
},
{
"epoch": 1.86,
"learning_rate": 7.003367003367004e-05,
"loss": 4.6795,
"step": 276000
},
{
"epoch": 1.88,
"learning_rate": 6.980920314253648e-05,
"loss": 4.5634,
"step": 278000
},
{
"epoch": 1.89,
"learning_rate": 6.958473625140293e-05,
"loss": 4.5767,
"step": 280000
},
{
"epoch": 1.9,
"learning_rate": 6.936026936026936e-05,
"loss": 4.5943,
"step": 282000
},
{
"epoch": 1.92,
"learning_rate": 6.91358024691358e-05,
"loss": 4.6575,
"step": 284000
},
{
"epoch": 1.93,
"learning_rate": 6.891133557800224e-05,
"loss": 4.6512,
"step": 286000
},
{
"epoch": 1.95,
"learning_rate": 6.86868686868687e-05,
"loss": 4.5433,
"step": 288000
},
{
"epoch": 1.96,
"learning_rate": 6.846240179573513e-05,
"loss": 4.6108,
"step": 290000
},
{
"epoch": 1.97,
"learning_rate": 6.823793490460157e-05,
"loss": 4.6175,
"step": 292000
},
{
"epoch": 1.99,
"learning_rate": 6.801346801346801e-05,
"loss": 4.5076,
"step": 294000
},
{
"epoch": 2.0,
"learning_rate": 6.778900112233445e-05,
"loss": 4.131,
"step": 296000
},
{
"epoch": 2.01,
"learning_rate": 6.756453423120091e-05,
"loss": 4.4705,
"step": 298000
},
{
"epoch": 2.03,
"learning_rate": 6.734006734006735e-05,
"loss": 4.4578,
"step": 300000
},
{
"epoch": 2.04,
"learning_rate": 6.711560044893379e-05,
"loss": 4.4142,
"step": 302000
},
{
"epoch": 2.05,
"learning_rate": 6.689113355780023e-05,
"loss": 4.3968,
"step": 304000
},
{
"epoch": 2.07,
"learning_rate": 6.666666666666667e-05,
"loss": 4.3893,
"step": 306000
},
{
"epoch": 2.08,
"learning_rate": 6.644219977553311e-05,
"loss": 4.3628,
"step": 308000
},
{
"epoch": 2.09,
"learning_rate": 6.621773288439955e-05,
"loss": 4.3452,
"step": 310000
},
{
"epoch": 2.11,
"learning_rate": 6.599326599326599e-05,
"loss": 4.3391,
"step": 312000
},
{
"epoch": 2.12,
"learning_rate": 6.576879910213244e-05,
"loss": 4.3232,
"step": 314000
},
{
"epoch": 2.13,
"learning_rate": 6.554433221099888e-05,
"loss": 4.3286,
"step": 316000
},
{
"epoch": 2.15,
"learning_rate": 6.531986531986533e-05,
"loss": 4.2963,
"step": 318000
},
{
"epoch": 2.16,
"learning_rate": 6.509539842873177e-05,
"loss": 4.3098,
"step": 320000
},
{
"epoch": 2.18,
"learning_rate": 6.48709315375982e-05,
"loss": 4.3239,
"step": 322000
},
{
"epoch": 2.19,
"learning_rate": 6.464646464646466e-05,
"loss": 4.2606,
"step": 324000
},
{
"epoch": 2.2,
"learning_rate": 6.442199775533109e-05,
"loss": 4.2617,
"step": 326000
},
{
"epoch": 2.22,
"learning_rate": 6.419753086419753e-05,
"loss": 4.2429,
"step": 328000
},
{
"epoch": 2.23,
"learning_rate": 6.397306397306398e-05,
"loss": 4.2412,
"step": 330000
},
{
"epoch": 2.24,
"learning_rate": 6.374859708193042e-05,
"loss": 4.2415,
"step": 332000
},
{
"epoch": 2.26,
"learning_rate": 6.352413019079686e-05,
"loss": 4.2517,
"step": 334000
},
{
"epoch": 2.27,
"learning_rate": 6.32996632996633e-05,
"loss": 4.2051,
"step": 336000
},
{
"epoch": 2.28,
"learning_rate": 6.307519640852974e-05,
"loss": 4.2003,
"step": 338000
},
{
"epoch": 2.3,
"learning_rate": 6.28507295173962e-05,
"loss": 4.2039,
"step": 340000
},
{
"epoch": 2.31,
"learning_rate": 6.262626262626264e-05,
"loss": 4.2197,
"step": 342000
},
{
"epoch": 2.32,
"learning_rate": 6.240179573512908e-05,
"loss": 4.1898,
"step": 344000
},
{
"epoch": 2.34,
"learning_rate": 6.21773288439955e-05,
"loss": 4.1859,
"step": 346000
},
{
"epoch": 2.35,
"learning_rate": 6.195286195286196e-05,
"loss": 4.2003,
"step": 348000
},
{
"epoch": 2.36,
"learning_rate": 6.17283950617284e-05,
"loss": 4.1667,
"step": 350000
},
{
"epoch": 2.38,
"learning_rate": 6.150392817059484e-05,
"loss": 4.1896,
"step": 352000
},
{
"epoch": 2.39,
"learning_rate": 6.127946127946128e-05,
"loss": 4.1576,
"step": 354000
},
{
"epoch": 2.4,
"learning_rate": 6.105499438832773e-05,
"loss": 4.1515,
"step": 356000
},
{
"epoch": 2.42,
"learning_rate": 6.0830527497194164e-05,
"loss": 4.1666,
"step": 358000
},
{
"epoch": 2.43,
"learning_rate": 6.060606060606061e-05,
"loss": 4.1459,
"step": 360000
},
{
"epoch": 2.45,
"learning_rate": 6.038159371492705e-05,
"loss": 4.1438,
"step": 362000
},
{
"epoch": 2.46,
"learning_rate": 6.01571268237935e-05,
"loss": 4.1272,
"step": 364000
},
{
"epoch": 2.47,
"learning_rate": 5.993265993265994e-05,
"loss": 4.1105,
"step": 366000
},
{
"epoch": 2.49,
"learning_rate": 5.970819304152637e-05,
"loss": 4.1111,
"step": 368000
},
{
"epoch": 2.5,
"learning_rate": 5.948372615039281e-05,
"loss": 4.1062,
"step": 370000
},
{
"epoch": 2.51,
"learning_rate": 5.925925925925926e-05,
"loss": 4.1072,
"step": 372000
},
{
"epoch": 2.53,
"learning_rate": 5.90347923681257e-05,
"loss": 4.1112,
"step": 374000
},
{
"epoch": 2.54,
"learning_rate": 5.881032547699215e-05,
"loss": 4.1251,
"step": 376000
},
{
"epoch": 2.55,
"learning_rate": 5.858585858585859e-05,
"loss": 4.0813,
"step": 378000
},
{
"epoch": 2.57,
"learning_rate": 5.8361391694725034e-05,
"loss": 4.0908,
"step": 380000
},
{
"epoch": 2.58,
"learning_rate": 5.8136924803591475e-05,
"loss": 4.2286,
"step": 382000
},
{
"epoch": 2.59,
"learning_rate": 5.7912457912457915e-05,
"loss": 4.3118,
"step": 384000
},
{
"epoch": 2.61,
"learning_rate": 5.768799102132436e-05,
"loss": 4.0238,
"step": 386000
},
{
"epoch": 2.62,
"learning_rate": 5.74635241301908e-05,
"loss": 3.7601,
"step": 388000
},
{
"epoch": 2.63,
"learning_rate": 5.7239057239057236e-05,
"loss": 3.6653,
"step": 390000
},
{
"epoch": 2.65,
"learning_rate": 5.701459034792368e-05,
"loss": 3.7115,
"step": 392000
},
{
"epoch": 2.66,
"learning_rate": 5.679012345679012e-05,
"loss": 4.0332,
"step": 394000
},
{
"epoch": 2.67,
"learning_rate": 5.6565656565656563e-05,
"loss": 3.9273,
"step": 396000
},
{
"epoch": 2.69,
"learning_rate": 5.634118967452301e-05,
"loss": 3.9359,
"step": 398000
},
{
"epoch": 2.7,
"learning_rate": 5.611672278338945e-05,
"loss": 4.2338,
"step": 400000
},
{
"epoch": 2.72,
"learning_rate": 5.58922558922559e-05,
"loss": 4.2178,
"step": 402000
},
{
"epoch": 2.73,
"learning_rate": 5.566778900112234e-05,
"loss": 4.2029,
"step": 404000
},
{
"epoch": 2.74,
"learning_rate": 5.5443322109988785e-05,
"loss": 4.1545,
"step": 406000
},
{
"epoch": 2.76,
"learning_rate": 5.5218855218855225e-05,
"loss": 4.2009,
"step": 408000
},
{
"epoch": 2.77,
"learning_rate": 5.4994388327721666e-05,
"loss": 4.2519,
"step": 410000
},
{
"epoch": 2.78,
"learning_rate": 5.47699214365881e-05,
"loss": 4.3561,
"step": 412000
},
{
"epoch": 2.8,
"learning_rate": 5.4545454545454546e-05,
"loss": 4.5544,
"step": 414000
},
{
"epoch": 2.81,
"learning_rate": 5.4320987654320986e-05,
"loss": 4.7515,
"step": 416000
},
{
"epoch": 2.82,
"learning_rate": 5.4096520763187434e-05,
"loss": 4.6801,
"step": 418000
},
{
"epoch": 2.84,
"learning_rate": 5.3872053872053874e-05,
"loss": 3.9784,
"step": 420000
},
{
"epoch": 2.85,
"learning_rate": 5.3647586980920314e-05,
"loss": 4.2571,
"step": 422000
},
{
"epoch": 2.86,
"learning_rate": 5.342312008978676e-05,
"loss": 4.3184,
"step": 424000
},
{
"epoch": 2.88,
"learning_rate": 5.31986531986532e-05,
"loss": 4.2075,
"step": 426000
},
{
"epoch": 2.89,
"learning_rate": 5.297418630751965e-05,
"loss": 4.2215,
"step": 428000
},
{
"epoch": 2.9,
"learning_rate": 5.274971941638609e-05,
"loss": 4.2475,
"step": 430000
},
{
"epoch": 2.92,
"learning_rate": 5.2525252525252536e-05,
"loss": 4.3064,
"step": 432000
},
{
"epoch": 2.93,
"learning_rate": 5.230078563411896e-05,
"loss": 4.3034,
"step": 434000
},
{
"epoch": 2.95,
"learning_rate": 5.207631874298541e-05,
"loss": 4.2119,
"step": 436000
},
{
"epoch": 2.96,
"learning_rate": 5.185185185185185e-05,
"loss": 4.2714,
"step": 438000
},
{
"epoch": 2.97,
"learning_rate": 5.16273849607183e-05,
"loss": 4.2878,
"step": 440000
},
{
"epoch": 2.99,
"learning_rate": 5.140291806958474e-05,
"loss": 4.1744,
"step": 442000
},
{
"epoch": 3.0,
"learning_rate": 5.1178451178451184e-05,
"loss": 3.8192,
"step": 444000
},
{
"epoch": 3.01,
"learning_rate": 5.0953984287317624e-05,
"loss": 4.0513,
"step": 446000
},
{
"epoch": 3.03,
"learning_rate": 5.0729517396184065e-05,
"loss": 4.0666,
"step": 448000
},
{
"epoch": 3.04,
"learning_rate": 5.050505050505051e-05,
"loss": 4.0297,
"step": 450000
},
{
"epoch": 3.05,
"learning_rate": 5.028058361391695e-05,
"loss": 4.019,
"step": 452000
},
{
"epoch": 3.07,
"learning_rate": 5.00561167227834e-05,
"loss": 4.0183,
"step": 454000
},
{
"epoch": 3.08,
"learning_rate": 4.983164983164983e-05,
"loss": 3.9988,
"step": 456000
},
{
"epoch": 3.09,
"learning_rate": 4.960718294051628e-05,
"loss": 3.9882,
"step": 458000
},
{
"epoch": 3.11,
"learning_rate": 4.938271604938271e-05,
"loss": 3.9879,
"step": 460000
},
{
"epoch": 3.12,
"learning_rate": 4.915824915824916e-05,
"loss": 3.9752,
"step": 462000
},
{
"epoch": 3.13,
"learning_rate": 4.89337822671156e-05,
"loss": 3.9901,
"step": 464000
},
{
"epoch": 3.15,
"learning_rate": 4.870931537598205e-05,
"loss": 3.9586,
"step": 466000
},
{
"epoch": 3.16,
"learning_rate": 4.848484848484849e-05,
"loss": 3.9711,
"step": 468000
},
{
"epoch": 3.17,
"learning_rate": 4.8260381593714935e-05,
"loss": 4.0005,
"step": 470000
},
{
"epoch": 3.19,
"learning_rate": 4.803591470258137e-05,
"loss": 3.9334,
"step": 472000
},
{
"epoch": 3.2,
"learning_rate": 4.7811447811447815e-05,
"loss": 3.9461,
"step": 474000
},
{
"epoch": 3.22,
"learning_rate": 4.7586980920314256e-05,
"loss": 3.9312,
"step": 476000
},
{
"epoch": 3.23,
"learning_rate": 4.7362514029180696e-05,
"loss": 3.928,
"step": 478000
},
{
"epoch": 3.24,
"learning_rate": 4.713804713804714e-05,
"loss": 3.9341,
"step": 480000
},
{
"epoch": 3.26,
"learning_rate": 4.691358024691358e-05,
"loss": 3.949,
"step": 482000
},
{
"epoch": 3.27,
"learning_rate": 4.6689113355780024e-05,
"loss": 3.9045,
"step": 484000
},
{
"epoch": 3.28,
"learning_rate": 4.6464646464646464e-05,
"loss": 3.9063,
"step": 486000
},
{
"epoch": 3.3,
"learning_rate": 4.624017957351291e-05,
"loss": 3.9079,
"step": 488000
},
{
"epoch": 3.31,
"learning_rate": 4.601571268237935e-05,
"loss": 3.9365,
"step": 490000
},
{
"epoch": 3.32,
"learning_rate": 4.57912457912458e-05,
"loss": 3.908,
"step": 492000
},
{
"epoch": 3.34,
"learning_rate": 4.556677890011223e-05,
"loss": 3.9043,
"step": 494000
},
{
"epoch": 3.35,
"learning_rate": 4.534231200897868e-05,
"loss": 3.9217,
"step": 496000
},
{
"epoch": 3.36,
"learning_rate": 4.511784511784512e-05,
"loss": 3.8926,
"step": 498000
},
{
"epoch": 3.38,
"learning_rate": 4.4893378226711566e-05,
"loss": 3.9196,
"step": 500000
},
{
"epoch": 3.39,
"learning_rate": 4.4668911335578006e-05,
"loss": 3.8883,
"step": 502000
},
{
"epoch": 3.4,
"learning_rate": 4.4444444444444447e-05,
"loss": 3.8888,
"step": 504000
},
{
"epoch": 3.42,
"learning_rate": 4.421997755331089e-05,
"loss": 3.9072,
"step": 506000
},
{
"epoch": 3.43,
"learning_rate": 4.3995510662177334e-05,
"loss": 3.8872,
"step": 508000
},
{
"epoch": 3.44,
"learning_rate": 4.3771043771043774e-05,
"loss": 3.8941,
"step": 510000
},
{
"epoch": 3.46,
"learning_rate": 4.3546576879910214e-05,
"loss": 3.8768,
"step": 512000
},
{
"epoch": 3.47,
"learning_rate": 4.332210998877666e-05,
"loss": 3.8667,
"step": 514000
},
{
"epoch": 3.49,
"learning_rate": 4.3097643097643095e-05,
"loss": 3.867,
"step": 516000
},
{
"epoch": 3.5,
"learning_rate": 4.287317620650954e-05,
"loss": 3.8665,
"step": 518000
},
{
"epoch": 3.51,
"learning_rate": 4.264870931537598e-05,
"loss": 3.8677,
"step": 520000
},
{
"epoch": 3.53,
"learning_rate": 4.242424242424243e-05,
"loss": 3.8752,
"step": 522000
},
{
"epoch": 3.54,
"learning_rate": 4.219977553310887e-05,
"loss": 3.888,
"step": 524000
},
{
"epoch": 3.55,
"learning_rate": 4.197530864197531e-05,
"loss": 3.849,
"step": 526000
},
{
"epoch": 3.57,
"learning_rate": 4.175084175084175e-05,
"loss": 3.8591,
"step": 528000
},
{
"epoch": 3.58,
"learning_rate": 4.15263748597082e-05,
"loss": 3.9935,
"step": 530000
},
{
"epoch": 3.59,
"learning_rate": 4.130190796857464e-05,
"loss": 4.1179,
"step": 532000
},
{
"epoch": 3.61,
"learning_rate": 4.1077441077441085e-05,
"loss": 3.8085,
"step": 534000
},
{
"epoch": 3.62,
"learning_rate": 4.085297418630752e-05,
"loss": 3.5572,
"step": 536000
},
{
"epoch": 3.63,
"learning_rate": 4.0628507295173965e-05,
"loss": 3.4772,
"step": 538000
},
{
"epoch": 3.65,
"learning_rate": 4.0404040404040405e-05,
"loss": 3.4955,
"step": 540000
},
{
"epoch": 3.66,
"learning_rate": 4.0179573512906846e-05,
"loss": 3.8805,
"step": 542000
},
{
"epoch": 3.67,
"learning_rate": 3.995510662177329e-05,
"loss": 3.7327,
"step": 544000
},
{
"epoch": 3.69,
"learning_rate": 3.973063973063973e-05,
"loss": 3.7417,
"step": 546000
},
{
"epoch": 3.7,
"learning_rate": 3.950617283950617e-05,
"loss": 4.0379,
"step": 548000
},
{
"epoch": 3.72,
"learning_rate": 3.9281705948372613e-05,
"loss": 4.0257,
"step": 550000
},
{
"epoch": 3.73,
"learning_rate": 3.905723905723906e-05,
"loss": 4.0116,
"step": 552000
},
{
"epoch": 3.74,
"learning_rate": 3.88327721661055e-05,
"loss": 3.9664,
"step": 554000
},
{
"epoch": 3.76,
"learning_rate": 3.860830527497195e-05,
"loss": 4.0149,
"step": 556000
},
{
"epoch": 3.77,
"learning_rate": 3.838383838383838e-05,
"loss": 4.0601,
"step": 558000
},
{
"epoch": 3.78,
"learning_rate": 3.815937149270483e-05,
"loss": 4.1639,
"step": 560000
},
{
"epoch": 3.8,
"learning_rate": 3.793490460157127e-05,
"loss": 4.3604,
"step": 562000
},
{
"epoch": 3.81,
"learning_rate": 3.7710437710437716e-05,
"loss": 4.5598,
"step": 564000
},
{
"epoch": 3.82,
"learning_rate": 3.7485970819304156e-05,
"loss": 4.5141,
"step": 566000
},
{
"epoch": 3.84,
"learning_rate": 3.7261503928170596e-05,
"loss": 3.8061,
"step": 568000
},
{
"epoch": 3.85,
"learning_rate": 3.7037037037037037e-05,
"loss": 4.0652,
"step": 570000
},
{
"epoch": 3.86,
"learning_rate": 3.681257014590348e-05,
"loss": 4.1378,
"step": 572000
},
{
"epoch": 3.88,
"learning_rate": 3.6588103254769924e-05,
"loss": 4.0305,
"step": 574000
},
{
"epoch": 3.89,
"learning_rate": 3.6363636363636364e-05,
"loss": 4.0567,
"step": 576000
},
{
"epoch": 3.9,
"learning_rate": 3.613916947250281e-05,
"loss": 4.0735,
"step": 578000
},
{
"epoch": 3.92,
"learning_rate": 3.5914702581369245e-05,
"loss": 4.1457,
"step": 580000
},
{
"epoch": 3.93,
"learning_rate": 3.569023569023569e-05,
"loss": 4.1228,
"step": 582000
},
{
"epoch": 3.94,
"learning_rate": 3.546576879910213e-05,
"loss": 4.061,
"step": 584000
},
{
"epoch": 3.96,
"learning_rate": 3.524130190796858e-05,
"loss": 4.1077,
"step": 586000
},
{
"epoch": 3.97,
"learning_rate": 3.501683501683502e-05,
"loss": 4.1329,
"step": 588000
},
{
"epoch": 3.99,
"learning_rate": 3.4792368125701466e-05,
"loss": 4.0207,
"step": 590000
},
{
"epoch": 4.0,
"learning_rate": 3.45679012345679e-05,
"loss": 3.6761,
"step": 592000
},
{
"epoch": 4.01,
"learning_rate": 3.434343434343435e-05,
"loss": 3.8304,
"step": 594000
},
{
"epoch": 4.03,
"learning_rate": 3.411896745230079e-05,
"loss": 3.8686,
"step": 596000
},
{
"epoch": 4.04,
"learning_rate": 3.389450056116723e-05,
"loss": 3.8363,
"step": 598000
},
{
"epoch": 4.05,
"learning_rate": 3.3670033670033675e-05,
"loss": 3.8302,
"step": 600000
},
{
"epoch": 4.07,
"learning_rate": 3.3445566778900115e-05,
"loss": 3.8316,
"step": 602000
},
{
"epoch": 4.08,
"learning_rate": 3.3221099887766555e-05,
"loss": 3.8158,
"step": 604000
},
{
"epoch": 4.09,
"learning_rate": 3.2996632996632995e-05,
"loss": 3.8058,
"step": 606000
},
{
"epoch": 4.11,
"learning_rate": 3.277216610549944e-05,
"loss": 3.8102,
"step": 608000
},
{
"epoch": 4.12,
"learning_rate": 3.254769921436588e-05,
"loss": 3.8001,
"step": 610000
},
{
"epoch": 4.13,
"learning_rate": 3.232323232323233e-05,
"loss": 3.8153,
"step": 612000
},
{
"epoch": 4.15,
"learning_rate": 3.209876543209876e-05,
"loss": 3.7888,
"step": 614000
},
{
"epoch": 4.16,
"learning_rate": 3.187429854096521e-05,
"loss": 3.8019,
"step": 616000
},
{
"epoch": 4.17,
"learning_rate": 3.164983164983165e-05,
"loss": 3.8379,
"step": 618000
},
{
"epoch": 4.19,
"learning_rate": 3.14253647586981e-05,
"loss": 3.7686,
"step": 620000
},
{
"epoch": 4.2,
"learning_rate": 3.120089786756454e-05,
"loss": 3.7825,
"step": 622000
},
{
"epoch": 4.22,
"learning_rate": 3.097643097643098e-05,
"loss": 3.7695,
"step": 624000
},
{
"epoch": 4.23,
"learning_rate": 3.075196408529742e-05,
"loss": 3.7713,
"step": 626000
},
{
"epoch": 4.24,
"learning_rate": 3.0527497194163865e-05,
"loss": 3.7789,
"step": 628000
},
{
"epoch": 4.26,
"learning_rate": 3.0303030303030306e-05,
"loss": 3.7927,
"step": 630000
},
{
"epoch": 4.27,
"learning_rate": 3.007856341189675e-05,
"loss": 3.7568,
"step": 632000
},
{
"epoch": 4.28,
"learning_rate": 2.9854096520763186e-05,
"loss": 3.7555,
"step": 634000
},
{
"epoch": 4.3,
"learning_rate": 2.962962962962963e-05,
"loss": 3.761,
"step": 636000
},
{
"epoch": 4.31,
"learning_rate": 2.9405162738496074e-05,
"loss": 3.7894,
"step": 638000
},
{
"epoch": 4.32,
"learning_rate": 2.9180695847362517e-05,
"loss": 3.7595,
"step": 640000
},
{
"epoch": 4.34,
"learning_rate": 2.8956228956228958e-05,
"loss": 3.763,
"step": 642000
},
{
"epoch": 4.35,
"learning_rate": 2.87317620650954e-05,
"loss": 3.7774,
"step": 644000
},
{
"epoch": 4.36,
"learning_rate": 2.850729517396184e-05,
"loss": 3.7551,
"step": 646000
},
{
"epoch": 4.38,
"learning_rate": 2.8282828282828282e-05,
"loss": 3.7829,
"step": 648000
},
{
"epoch": 4.39,
"learning_rate": 2.8058361391694725e-05,
"loss": 3.7527,
"step": 650000
},
{
"epoch": 4.4,
"learning_rate": 2.783389450056117e-05,
"loss": 3.753,
"step": 652000
},
{
"epoch": 4.42,
"learning_rate": 2.7609427609427613e-05,
"loss": 3.7746,
"step": 654000
},
{
"epoch": 4.43,
"learning_rate": 2.738496071829405e-05,
"loss": 3.7543,
"step": 656000
},
{
"epoch": 4.44,
"learning_rate": 2.7160493827160493e-05,
"loss": 3.7604,
"step": 658000
},
{
"epoch": 4.46,
"learning_rate": 2.6936026936026937e-05,
"loss": 3.7473,
"step": 660000
},
{
"epoch": 4.47,
"learning_rate": 2.671156004489338e-05,
"loss": 3.7377,
"step": 662000
},
{
"epoch": 4.49,
"learning_rate": 2.6487093153759824e-05,
"loss": 3.7398,
"step": 664000
},
{
"epoch": 4.5,
"learning_rate": 2.6262626262626268e-05,
"loss": 3.7432,
"step": 666000
},
{
"epoch": 4.51,
"learning_rate": 2.6038159371492705e-05,
"loss": 3.7484,
"step": 668000
},
{
"epoch": 4.53,
"learning_rate": 2.581369248035915e-05,
"loss": 3.7542,
"step": 670000
},
{
"epoch": 4.54,
"learning_rate": 2.5589225589225592e-05,
"loss": 3.7721,
"step": 672000
},
{
"epoch": 4.55,
"learning_rate": 2.5364758698092032e-05,
"loss": 3.7291,
"step": 674000
},
{
"epoch": 4.57,
"learning_rate": 2.5140291806958476e-05,
"loss": 3.7455,
"step": 676000
},
{
"epoch": 4.58,
"learning_rate": 2.4915824915824916e-05,
"loss": 3.8808,
"step": 678000
},
{
"epoch": 4.59,
"learning_rate": 2.4691358024691357e-05,
"loss": 4.0555,
"step": 680000
},
{
"epoch": 4.61,
"learning_rate": 2.44668911335578e-05,
"loss": 3.7132,
"step": 682000
},
{
"epoch": 4.62,
"learning_rate": 2.4242424242424244e-05,
"loss": 3.4602,
"step": 684000
},
{
"epoch": 4.63,
"learning_rate": 2.4017957351290684e-05,
"loss": 3.4057,
"step": 686000
},
{
"epoch": 4.65,
"learning_rate": 2.3793490460157128e-05,
"loss": 3.397,
"step": 688000
},
{
"epoch": 4.66,
"learning_rate": 2.356902356902357e-05,
"loss": 3.8375,
"step": 690000
},
{
"epoch": 4.67,
"learning_rate": 2.3344556677890012e-05,
"loss": 3.6615,
"step": 692000
},
{
"epoch": 4.69,
"learning_rate": 2.3120089786756455e-05,
"loss": 3.6624,
"step": 694000
},
{
"epoch": 4.7,
"learning_rate": 2.28956228956229e-05,
"loss": 3.9217,
"step": 696000
},
{
"epoch": 4.71,
"learning_rate": 2.267115600448934e-05,
"loss": 3.9308,
"step": 698000
},
{
"epoch": 4.73,
"learning_rate": 2.2446689113355783e-05,
"loss": 3.9061,
"step": 700000
},
{
"epoch": 4.74,
"learning_rate": 2.2222222222222223e-05,
"loss": 3.87,
"step": 702000
},
{
"epoch": 4.76,
"learning_rate": 2.1997755331088667e-05,
"loss": 3.919,
"step": 704000
},
{
"epoch": 4.77,
"learning_rate": 2.1773288439955107e-05,
"loss": 3.9621,
"step": 706000
},
{
"epoch": 4.78,
"learning_rate": 2.1548821548821547e-05,
"loss": 4.0667,
"step": 708000
},
{
"epoch": 4.8,
"learning_rate": 2.132435465768799e-05,
"loss": 4.2579,
"step": 710000
},
{
"epoch": 4.81,
"learning_rate": 2.1099887766554435e-05,
"loss": 4.4613,
"step": 712000
},
{
"epoch": 4.82,
"learning_rate": 2.0875420875420875e-05,
"loss": 4.4471,
"step": 714000
},
{
"epoch": 4.84,
"learning_rate": 2.065095398428732e-05,
"loss": 3.7612,
"step": 716000
},
{
"epoch": 4.85,
"learning_rate": 2.042648709315376e-05,
"loss": 3.9657,
"step": 718000
},
{
"epoch": 4.86,
"learning_rate": 2.0202020202020203e-05,
"loss": 4.0575,
"step": 720000
},
{
"epoch": 4.88,
"learning_rate": 1.9977553310886646e-05,
"loss": 3.9554,
"step": 722000
},
{
"epoch": 4.89,
"learning_rate": 1.9753086419753087e-05,
"loss": 3.9786,
"step": 724000
},
{
"epoch": 4.9,
"learning_rate": 1.952861952861953e-05,
"loss": 3.9809,
"step": 726000
},
{
"epoch": 4.92,
"learning_rate": 1.9304152637485974e-05,
"loss": 4.0793,
"step": 728000
},
{
"epoch": 4.93,
"learning_rate": 1.9079685746352414e-05,
"loss": 4.0373,
"step": 730000
},
{
"epoch": 4.94,
"learning_rate": 1.8855218855218858e-05,
"loss": 4.0029,
"step": 732000
},
{
"epoch": 4.96,
"learning_rate": 1.8630751964085298e-05,
"loss": 4.0193,
"step": 734000
},
{
"epoch": 4.97,
"learning_rate": 1.840628507295174e-05,
"loss": 4.0719,
"step": 736000
},
{
"epoch": 4.99,
"learning_rate": 1.8181818181818182e-05,
"loss": 3.9453,
"step": 738000
},
{
"epoch": 5.0,
"learning_rate": 1.7957351290684622e-05,
"loss": 3.6204,
"step": 740000
},
{
"epoch": 5.01,
"learning_rate": 1.7732884399551066e-05,
"loss": 3.6973,
"step": 742000
},
{
"epoch": 5.03,
"learning_rate": 1.750841750841751e-05,
"loss": 3.7598,
"step": 744000
},
{
"epoch": 5.04,
"learning_rate": 1.728395061728395e-05,
"loss": 3.7362,
"step": 746000
},
{
"epoch": 5.05,
"learning_rate": 1.7059483726150394e-05,
"loss": 3.7313,
"step": 748000
},
{
"epoch": 5.07,
"learning_rate": 1.6835016835016837e-05,
"loss": 3.7332,
"step": 750000
},
{
"epoch": 5.08,
"learning_rate": 1.6610549943883278e-05,
"loss": 3.722,
"step": 752000
},
{
"epoch": 5.09,
"learning_rate": 1.638608305274972e-05,
"loss": 3.713,
"step": 754000
},
{
"epoch": 5.11,
"learning_rate": 1.6161616161616165e-05,
"loss": 3.72,
"step": 756000
},
{
"epoch": 5.12,
"learning_rate": 1.5937149270482605e-05,
"loss": 3.7103,
"step": 758000
},
{
"epoch": 5.13,
"learning_rate": 1.571268237934905e-05,
"loss": 3.7315,
"step": 760000
},
{
"epoch": 5.15,
"learning_rate": 1.548821548821549e-05,
"loss": 3.7024,
"step": 762000
},
{
"epoch": 5.16,
"learning_rate": 1.5263748597081933e-05,
"loss": 3.7156,
"step": 764000
},
{
"epoch": 5.17,
"learning_rate": 1.5039281705948375e-05,
"loss": 3.7549,
"step": 766000
},
{
"epoch": 5.19,
"learning_rate": 1.4814814814814815e-05,
"loss": 3.6887,
"step": 768000
},
{
"epoch": 5.2,
"learning_rate": 1.4590347923681259e-05,
"loss": 3.7033,
"step": 770000
},
{
"epoch": 5.21,
"learning_rate": 1.43658810325477e-05,
"loss": 3.6917,
"step": 772000
},
{
"epoch": 5.23,
"learning_rate": 1.4141414141414141e-05,
"loss": 3.6936,
"step": 774000
},
{
"epoch": 5.24,
"learning_rate": 1.3916947250280585e-05,
"loss": 3.7019,
"step": 776000
},
{
"epoch": 5.26,
"learning_rate": 1.3692480359147025e-05,
"loss": 3.7172,
"step": 778000
},
{
"epoch": 5.27,
"learning_rate": 1.3468013468013468e-05,
"loss": 3.6812,
"step": 780000
},
{
"epoch": 5.28,
"learning_rate": 1.3243546576879912e-05,
"loss": 3.6841,
"step": 782000
},
{
"epoch": 5.3,
"learning_rate": 1.3019079685746352e-05,
"loss": 3.6837,
"step": 784000
},
{
"epoch": 5.31,
"learning_rate": 1.2794612794612796e-05,
"loss": 3.7225,
"step": 786000
},
{
"epoch": 5.32,
"learning_rate": 1.2570145903479238e-05,
"loss": 3.6897,
"step": 788000
},
{
"epoch": 5.34,
"learning_rate": 1.2345679012345678e-05,
"loss": 3.6941,
"step": 790000
},
{
"epoch": 5.35,
"learning_rate": 1.2121212121212122e-05,
"loss": 3.7125,
"step": 792000
},
{
"epoch": 5.36,
"learning_rate": 1.1896745230078564e-05,
"loss": 3.688,
"step": 794000
},
{
"epoch": 5.38,
"learning_rate": 1.1672278338945006e-05,
"loss": 3.7184,
"step": 796000
},
{
"epoch": 5.39,
"learning_rate": 1.144781144781145e-05,
"loss": 3.6901,
"step": 798000
},
{
"epoch": 5.4,
"learning_rate": 1.1223344556677892e-05,
"loss": 3.6886,
"step": 800000
},
{
"epoch": 5.42,
"learning_rate": 1.0998877665544333e-05,
"loss": 3.7129,
"step": 802000
},
{
"epoch": 5.43,
"learning_rate": 1.0774410774410774e-05,
"loss": 3.6953,
"step": 804000
},
{
"epoch": 5.44,
"learning_rate": 1.0549943883277217e-05,
"loss": 3.7016,
"step": 806000
},
{
"epoch": 5.46,
"learning_rate": 1.032547699214366e-05,
"loss": 3.69,
"step": 808000
},
{
"epoch": 5.47,
"learning_rate": 1.0101010101010101e-05,
"loss": 3.6834,
"step": 810000
},
{
"epoch": 5.48,
"learning_rate": 9.876543209876543e-06,
"loss": 3.6809,
"step": 812000
},
{
"epoch": 5.5,
"learning_rate": 9.652076318742987e-06,
"loss": 3.6861,
"step": 814000
},
{
"epoch": 5.51,
"learning_rate": 9.427609427609429e-06,
"loss": 3.6977,
"step": 816000
},
{
"epoch": 5.53,
"learning_rate": 9.20314253647587e-06,
"loss": 3.7002,
"step": 818000
},
{
"epoch": 5.54,
"learning_rate": 8.978675645342311e-06,
"loss": 3.7243,
"step": 820000
},
{
"epoch": 5.55,
"learning_rate": 8.754208754208755e-06,
"loss": 3.6768,
"step": 822000
},
{
"epoch": 5.57,
"learning_rate": 8.529741863075197e-06,
"loss": 3.6947,
"step": 824000
},
{
"epoch": 5.58,
"learning_rate": 8.305274971941639e-06,
"loss": 3.8538,
"step": 826000
},
{
"epoch": 5.59,
"learning_rate": 8.080808080808082e-06,
"loss": 4.0943,
"step": 828000
},
{
"epoch": 5.61,
"learning_rate": 7.856341189674524e-06,
"loss": 3.7048,
"step": 830000
},
{
"epoch": 5.62,
"learning_rate": 7.631874298540966e-06,
"loss": 3.4471,
"step": 832000
},
{
"epoch": 5.63,
"learning_rate": 7.4074074074074075e-06,
"loss": 3.4131,
"step": 834000
},
{
"epoch": 5.65,
"learning_rate": 7.18294051627385e-06,
"loss": 3.3965,
"step": 836000
},
{
"epoch": 5.66,
"learning_rate": 6.958473625140292e-06,
"loss": 3.9014,
"step": 838000
},
{
"epoch": 5.67,
"learning_rate": 6.734006734006734e-06,
"loss": 3.6888,
"step": 840000
},
{
"epoch": 5.69,
"learning_rate": 6.509539842873176e-06,
"loss": 3.6862,
"step": 842000
},
{
"epoch": 5.7,
"learning_rate": 6.285072951739619e-06,
"loss": 3.8646,
"step": 844000
},
{
"epoch": 5.71,
"learning_rate": 6.060606060606061e-06,
"loss": 3.8818,
"step": 846000
},
{
"epoch": 5.73,
"learning_rate": 5.836139169472503e-06,
"loss": 3.8666,
"step": 848000
},
{
"epoch": 5.74,
"learning_rate": 5.611672278338946e-06,
"loss": 3.8303,
"step": 850000
},
{
"epoch": 5.76,
"learning_rate": 5.387205387205387e-06,
"loss": 3.8764,
"step": 852000
},
{
"epoch": 5.77,
"learning_rate": 5.16273849607183e-06,
"loss": 3.9326,
"step": 854000
},
{
"epoch": 5.78,
"learning_rate": 4.938271604938272e-06,
"loss": 4.0327,
"step": 856000
},
{
"epoch": 5.8,
"learning_rate": 4.7138047138047145e-06,
"loss": 4.2255,
"step": 858000
},
{
"epoch": 5.81,
"learning_rate": 4.489337822671156e-06,
"loss": 4.4367,
"step": 860000
},
{
"epoch": 5.82,
"learning_rate": 4.264870931537598e-06,
"loss": 4.4619,
"step": 862000
},
{
"epoch": 5.84,
"learning_rate": 4.040404040404041e-06,
"loss": 3.8376,
"step": 864000
},
{
"epoch": 5.85,
"learning_rate": 3.815937149270483e-06,
"loss": 3.9389,
"step": 866000
},
{
"epoch": 5.86,
"learning_rate": 3.591470258136925e-06,
"loss": 4.051,
"step": 868000
},
{
"epoch": 5.88,
"learning_rate": 3.367003367003367e-06,
"loss": 3.9391,
"step": 870000
},
{
"epoch": 5.89,
"learning_rate": 3.1425364758698095e-06,
"loss": 3.969,
"step": 872000
},
{
"epoch": 5.9,
"learning_rate": 2.9180695847362515e-06,
"loss": 3.9726,
"step": 874000
},
{
"epoch": 5.92,
"learning_rate": 2.6936026936026934e-06,
"loss": 4.081,
"step": 876000
},
{
"epoch": 5.93,
"learning_rate": 2.469135802469136e-06,
"loss": 4.0299,
"step": 878000
},
{
"epoch": 5.94,
"learning_rate": 2.244668911335578e-06,
"loss": 4.0083,
"step": 880000
},
{
"epoch": 5.96,
"learning_rate": 2.0202020202020206e-06,
"loss": 4.0233,
"step": 882000
},
{
"epoch": 5.97,
"learning_rate": 1.7957351290684626e-06,
"loss": 4.0916,
"step": 884000
},
{
"epoch": 5.98,
"learning_rate": 1.5712682379349048e-06,
"loss": 3.9541,
"step": 886000
},
{
"epoch": 6.0,
"learning_rate": 1.3468013468013467e-06,
"loss": 3.6427,
"step": 888000
},
{
"epoch": 6.01,
"learning_rate": 1.122334455667789e-06,
"loss": 3.6296,
"step": 890000
},
{
"epoch": 6.03,
"learning_rate": 8.978675645342313e-07,
"loss": 3.7085,
"step": 892000
},
{
"epoch": 6.04,
"learning_rate": 6.734006734006734e-07,
"loss": 3.6918,
"step": 894000
},
{
"epoch": 6.05,
"learning_rate": 4.4893378226711564e-07,
"loss": 3.6907,
"step": 896000
},
{
"epoch": 6.07,
"learning_rate": 2.2446689113355782e-07,
"loss": 3.6985,
"step": 898000
},
{
"epoch": 6.08,
"learning_rate": 0.0,
"loss": 3.6945,
"step": 900000
}
],
"max_steps": 900000,
"num_train_epochs": 7,
"total_flos": 1.894127697690476e+18,
"trial_name": null,
"trial_params": null
}