gpt2-large-lora-stf4 / trainer_state.json
Mikivis's picture
commit from Mikivis
82eff3a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 16517,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 7.991281709753588e-05,
"loss": 1.7472,
"step": 20
},
{
"epoch": 0.0,
"learning_rate": 7.981594720590907e-05,
"loss": 1.7753,
"step": 40
},
{
"epoch": 0.0,
"learning_rate": 7.971907731428226e-05,
"loss": 1.7184,
"step": 60
},
{
"epoch": 0.0,
"learning_rate": 7.962220742265544e-05,
"loss": 1.7141,
"step": 80
},
{
"epoch": 0.01,
"learning_rate": 7.952533753102863e-05,
"loss": 1.7126,
"step": 100
},
{
"epoch": 0.01,
"learning_rate": 7.942846763940184e-05,
"loss": 1.7041,
"step": 120
},
{
"epoch": 0.01,
"learning_rate": 7.933159774777503e-05,
"loss": 1.7034,
"step": 140
},
{
"epoch": 0.01,
"learning_rate": 7.923472785614822e-05,
"loss": 1.7561,
"step": 160
},
{
"epoch": 0.01,
"learning_rate": 7.913785796452141e-05,
"loss": 1.7129,
"step": 180
},
{
"epoch": 0.01,
"learning_rate": 7.90409880728946e-05,
"loss": 1.7346,
"step": 200
},
{
"epoch": 0.01,
"learning_rate": 7.894411818126779e-05,
"loss": 1.7077,
"step": 220
},
{
"epoch": 0.01,
"learning_rate": 7.884724828964099e-05,
"loss": 1.6912,
"step": 240
},
{
"epoch": 0.02,
"learning_rate": 7.875037839801418e-05,
"loss": 1.7212,
"step": 260
},
{
"epoch": 0.02,
"learning_rate": 7.865350850638737e-05,
"loss": 1.6983,
"step": 280
},
{
"epoch": 0.02,
"learning_rate": 7.855663861476056e-05,
"loss": 1.7179,
"step": 300
},
{
"epoch": 0.02,
"learning_rate": 7.845976872313375e-05,
"loss": 1.7045,
"step": 320
},
{
"epoch": 0.02,
"learning_rate": 7.836289883150694e-05,
"loss": 1.728,
"step": 340
},
{
"epoch": 0.02,
"learning_rate": 7.826602893988014e-05,
"loss": 1.7014,
"step": 360
},
{
"epoch": 0.02,
"learning_rate": 7.816915904825333e-05,
"loss": 1.653,
"step": 380
},
{
"epoch": 0.02,
"learning_rate": 7.807228915662652e-05,
"loss": 1.6554,
"step": 400
},
{
"epoch": 0.03,
"learning_rate": 7.797541926499971e-05,
"loss": 1.6843,
"step": 420
},
{
"epoch": 0.03,
"learning_rate": 7.78785493733729e-05,
"loss": 1.6688,
"step": 440
},
{
"epoch": 0.03,
"learning_rate": 7.778167948174609e-05,
"loss": 1.7169,
"step": 460
},
{
"epoch": 0.03,
"learning_rate": 7.768480959011928e-05,
"loss": 1.7075,
"step": 480
},
{
"epoch": 0.03,
"learning_rate": 7.758793969849247e-05,
"loss": 1.683,
"step": 500
},
{
"epoch": 0.03,
"learning_rate": 7.749106980686566e-05,
"loss": 1.7154,
"step": 520
},
{
"epoch": 0.03,
"learning_rate": 7.739419991523885e-05,
"loss": 1.6954,
"step": 540
},
{
"epoch": 0.03,
"learning_rate": 7.729733002361204e-05,
"loss": 1.7439,
"step": 560
},
{
"epoch": 0.04,
"learning_rate": 7.720046013198524e-05,
"loss": 1.6487,
"step": 580
},
{
"epoch": 0.04,
"learning_rate": 7.710359024035843e-05,
"loss": 1.703,
"step": 600
},
{
"epoch": 0.04,
"learning_rate": 7.700672034873162e-05,
"loss": 1.6851,
"step": 620
},
{
"epoch": 0.04,
"learning_rate": 7.690985045710481e-05,
"loss": 1.6851,
"step": 640
},
{
"epoch": 0.04,
"learning_rate": 7.6812980565478e-05,
"loss": 1.7042,
"step": 660
},
{
"epoch": 0.04,
"learning_rate": 7.671611067385119e-05,
"loss": 1.7165,
"step": 680
},
{
"epoch": 0.04,
"learning_rate": 7.661924078222438e-05,
"loss": 1.6873,
"step": 700
},
{
"epoch": 0.04,
"learning_rate": 7.652237089059757e-05,
"loss": 1.6847,
"step": 720
},
{
"epoch": 0.04,
"learning_rate": 7.642550099897076e-05,
"loss": 1.6251,
"step": 740
},
{
"epoch": 0.05,
"learning_rate": 7.632863110734395e-05,
"loss": 1.6964,
"step": 760
},
{
"epoch": 0.05,
"learning_rate": 7.623176121571715e-05,
"loss": 1.6821,
"step": 780
},
{
"epoch": 0.05,
"learning_rate": 7.613489132409034e-05,
"loss": 1.6484,
"step": 800
},
{
"epoch": 0.05,
"learning_rate": 7.603802143246353e-05,
"loss": 1.7012,
"step": 820
},
{
"epoch": 0.05,
"learning_rate": 7.594115154083672e-05,
"loss": 1.7086,
"step": 840
},
{
"epoch": 0.05,
"learning_rate": 7.584428164920991e-05,
"loss": 1.6811,
"step": 860
},
{
"epoch": 0.05,
"learning_rate": 7.57474117575831e-05,
"loss": 1.6587,
"step": 880
},
{
"epoch": 0.05,
"learning_rate": 7.565054186595629e-05,
"loss": 1.6945,
"step": 900
},
{
"epoch": 0.06,
"learning_rate": 7.555367197432948e-05,
"loss": 1.6781,
"step": 920
},
{
"epoch": 0.06,
"learning_rate": 7.545680208270267e-05,
"loss": 1.6977,
"step": 940
},
{
"epoch": 0.06,
"learning_rate": 7.535993219107586e-05,
"loss": 1.6935,
"step": 960
},
{
"epoch": 0.06,
"learning_rate": 7.526306229944905e-05,
"loss": 1.6759,
"step": 980
},
{
"epoch": 0.06,
"learning_rate": 7.516619240782225e-05,
"loss": 1.6708,
"step": 1000
},
{
"epoch": 0.06,
"learning_rate": 7.507900950535812e-05,
"loss": 1.7009,
"step": 1020
},
{
"epoch": 0.06,
"learning_rate": 7.498213961373132e-05,
"loss": 1.6572,
"step": 1040
},
{
"epoch": 0.06,
"learning_rate": 7.488526972210451e-05,
"loss": 1.705,
"step": 1060
},
{
"epoch": 0.07,
"learning_rate": 7.47883998304777e-05,
"loss": 1.6606,
"step": 1080
},
{
"epoch": 0.07,
"learning_rate": 7.469152993885089e-05,
"loss": 1.6746,
"step": 1100
},
{
"epoch": 0.07,
"learning_rate": 7.459466004722408e-05,
"loss": 1.6656,
"step": 1120
},
{
"epoch": 0.07,
"learning_rate": 7.449779015559727e-05,
"loss": 1.6738,
"step": 1140
},
{
"epoch": 0.07,
"learning_rate": 7.440092026397046e-05,
"loss": 1.6524,
"step": 1160
},
{
"epoch": 0.07,
"learning_rate": 7.430405037234365e-05,
"loss": 1.6875,
"step": 1180
},
{
"epoch": 0.07,
"learning_rate": 7.420718048071684e-05,
"loss": 1.6954,
"step": 1200
},
{
"epoch": 0.07,
"learning_rate": 7.411031058909003e-05,
"loss": 1.6745,
"step": 1220
},
{
"epoch": 0.08,
"learning_rate": 7.401344069746322e-05,
"loss": 1.6925,
"step": 1240
},
{
"epoch": 0.08,
"learning_rate": 7.391657080583642e-05,
"loss": 1.6939,
"step": 1260
},
{
"epoch": 0.08,
"learning_rate": 7.381970091420961e-05,
"loss": 1.6746,
"step": 1280
},
{
"epoch": 0.08,
"learning_rate": 7.37228310225828e-05,
"loss": 1.707,
"step": 1300
},
{
"epoch": 0.08,
"learning_rate": 7.362596113095599e-05,
"loss": 1.6802,
"step": 1320
},
{
"epoch": 0.08,
"learning_rate": 7.352909123932918e-05,
"loss": 1.6477,
"step": 1340
},
{
"epoch": 0.08,
"learning_rate": 7.343222134770237e-05,
"loss": 1.6885,
"step": 1360
},
{
"epoch": 0.08,
"learning_rate": 7.333535145607556e-05,
"loss": 1.6761,
"step": 1380
},
{
"epoch": 0.08,
"learning_rate": 7.323848156444875e-05,
"loss": 1.6532,
"step": 1400
},
{
"epoch": 0.09,
"learning_rate": 7.314161167282194e-05,
"loss": 1.6493,
"step": 1420
},
{
"epoch": 0.09,
"learning_rate": 7.304474178119513e-05,
"loss": 1.654,
"step": 1440
},
{
"epoch": 0.09,
"learning_rate": 7.295271538414967e-05,
"loss": 1.7094,
"step": 1460
},
{
"epoch": 0.09,
"learning_rate": 7.285584549252286e-05,
"loss": 1.6744,
"step": 1480
},
{
"epoch": 0.09,
"learning_rate": 7.275897560089605e-05,
"loss": 1.6327,
"step": 1500
},
{
"epoch": 0.09,
"learning_rate": 7.266210570926924e-05,
"loss": 1.7002,
"step": 1520
},
{
"epoch": 0.09,
"learning_rate": 7.256523581764243e-05,
"loss": 1.6273,
"step": 1540
},
{
"epoch": 0.09,
"learning_rate": 7.246836592601563e-05,
"loss": 1.6569,
"step": 1560
},
{
"epoch": 0.1,
"learning_rate": 7.237149603438882e-05,
"loss": 1.6679,
"step": 1580
},
{
"epoch": 0.1,
"learning_rate": 7.227462614276201e-05,
"loss": 1.6805,
"step": 1600
},
{
"epoch": 0.1,
"learning_rate": 7.21777562511352e-05,
"loss": 1.6357,
"step": 1620
},
{
"epoch": 0.1,
"learning_rate": 7.208088635950839e-05,
"loss": 1.6735,
"step": 1640
},
{
"epoch": 0.1,
"learning_rate": 7.198401646788158e-05,
"loss": 1.7039,
"step": 1660
},
{
"epoch": 0.1,
"learning_rate": 7.188714657625478e-05,
"loss": 1.6771,
"step": 1680
},
{
"epoch": 0.1,
"learning_rate": 7.179027668462797e-05,
"loss": 1.6594,
"step": 1700
},
{
"epoch": 0.1,
"learning_rate": 7.169340679300116e-05,
"loss": 1.6508,
"step": 1720
},
{
"epoch": 0.11,
"learning_rate": 7.159653690137435e-05,
"loss": 1.695,
"step": 1740
},
{
"epoch": 0.11,
"learning_rate": 7.149966700974754e-05,
"loss": 1.7088,
"step": 1760
},
{
"epoch": 0.11,
"learning_rate": 7.140279711812073e-05,
"loss": 1.687,
"step": 1780
},
{
"epoch": 0.11,
"learning_rate": 7.130592722649393e-05,
"loss": 1.6499,
"step": 1800
},
{
"epoch": 0.11,
"learning_rate": 7.120905733486712e-05,
"loss": 1.6608,
"step": 1820
},
{
"epoch": 0.11,
"learning_rate": 7.111218744324031e-05,
"loss": 1.7031,
"step": 1840
},
{
"epoch": 0.11,
"learning_rate": 7.10153175516135e-05,
"loss": 1.6258,
"step": 1860
},
{
"epoch": 0.11,
"learning_rate": 7.091844765998669e-05,
"loss": 1.6773,
"step": 1880
},
{
"epoch": 0.12,
"learning_rate": 7.082157776835988e-05,
"loss": 1.6606,
"step": 1900
},
{
"epoch": 0.12,
"learning_rate": 7.072470787673307e-05,
"loss": 1.6672,
"step": 1920
},
{
"epoch": 0.12,
"learning_rate": 7.062783798510626e-05,
"loss": 1.6583,
"step": 1940
},
{
"epoch": 0.12,
"learning_rate": 7.053096809347945e-05,
"loss": 1.6742,
"step": 1960
},
{
"epoch": 0.12,
"learning_rate": 7.043409820185264e-05,
"loss": 1.6509,
"step": 1980
},
{
"epoch": 0.12,
"learning_rate": 7.033722831022584e-05,
"loss": 1.7019,
"step": 2000
},
{
"epoch": 0.12,
"learning_rate": 7.024035841859903e-05,
"loss": 1.6296,
"step": 2020
},
{
"epoch": 0.12,
"learning_rate": 7.014348852697222e-05,
"loss": 1.6784,
"step": 2040
},
{
"epoch": 0.12,
"learning_rate": 7.004661863534541e-05,
"loss": 1.6412,
"step": 2060
},
{
"epoch": 0.13,
"learning_rate": 6.99497487437186e-05,
"loss": 1.6583,
"step": 2080
},
{
"epoch": 0.13,
"learning_rate": 6.985287885209179e-05,
"loss": 1.6388,
"step": 2100
},
{
"epoch": 0.13,
"learning_rate": 6.975600896046498e-05,
"loss": 1.6553,
"step": 2120
},
{
"epoch": 0.13,
"learning_rate": 6.965913906883817e-05,
"loss": 1.6597,
"step": 2140
},
{
"epoch": 0.13,
"learning_rate": 6.956226917721136e-05,
"loss": 1.6743,
"step": 2160
},
{
"epoch": 0.13,
"learning_rate": 6.946539928558455e-05,
"loss": 1.651,
"step": 2180
},
{
"epoch": 0.13,
"learning_rate": 6.936852939395774e-05,
"loss": 1.6682,
"step": 2200
},
{
"epoch": 0.13,
"learning_rate": 6.927165950233094e-05,
"loss": 1.6666,
"step": 2220
},
{
"epoch": 0.14,
"learning_rate": 6.917478961070413e-05,
"loss": 1.681,
"step": 2240
},
{
"epoch": 0.14,
"learning_rate": 6.907791971907732e-05,
"loss": 1.6866,
"step": 2260
},
{
"epoch": 0.14,
"learning_rate": 6.898104982745051e-05,
"loss": 1.664,
"step": 2280
},
{
"epoch": 0.14,
"learning_rate": 6.88841799358237e-05,
"loss": 1.6581,
"step": 2300
},
{
"epoch": 0.14,
"learning_rate": 6.878731004419689e-05,
"loss": 1.6411,
"step": 2320
},
{
"epoch": 0.14,
"learning_rate": 6.869044015257008e-05,
"loss": 1.6443,
"step": 2340
},
{
"epoch": 0.14,
"learning_rate": 6.859357026094327e-05,
"loss": 1.6331,
"step": 2360
},
{
"epoch": 0.14,
"learning_rate": 6.849670036931646e-05,
"loss": 1.6616,
"step": 2380
},
{
"epoch": 0.15,
"learning_rate": 6.839983047768965e-05,
"loss": 1.6334,
"step": 2400
},
{
"epoch": 0.15,
"learning_rate": 6.830296058606285e-05,
"loss": 1.6791,
"step": 2420
},
{
"epoch": 0.15,
"learning_rate": 6.820609069443604e-05,
"loss": 1.6658,
"step": 2440
},
{
"epoch": 0.15,
"learning_rate": 6.810922080280923e-05,
"loss": 1.648,
"step": 2460
},
{
"epoch": 0.15,
"learning_rate": 6.801235091118242e-05,
"loss": 1.6717,
"step": 2480
},
{
"epoch": 0.15,
"learning_rate": 6.791548101955561e-05,
"loss": 1.6567,
"step": 2500
},
{
"epoch": 0.15,
"learning_rate": 6.78186111279288e-05,
"loss": 1.6754,
"step": 2520
},
{
"epoch": 0.15,
"learning_rate": 6.772174123630199e-05,
"loss": 1.6728,
"step": 2540
},
{
"epoch": 0.15,
"learning_rate": 6.76248713446752e-05,
"loss": 1.6699,
"step": 2560
},
{
"epoch": 0.16,
"learning_rate": 6.752800145304838e-05,
"loss": 1.7137,
"step": 2580
},
{
"epoch": 0.16,
"learning_rate": 6.743113156142157e-05,
"loss": 1.6644,
"step": 2600
},
{
"epoch": 0.16,
"learning_rate": 6.733426166979476e-05,
"loss": 1.6794,
"step": 2620
},
{
"epoch": 0.16,
"learning_rate": 6.723739177816795e-05,
"loss": 1.6934,
"step": 2640
},
{
"epoch": 0.16,
"learning_rate": 6.714052188654114e-05,
"loss": 1.7115,
"step": 2660
},
{
"epoch": 0.16,
"learning_rate": 6.704365199491434e-05,
"loss": 1.7005,
"step": 2680
},
{
"epoch": 0.16,
"learning_rate": 6.694678210328753e-05,
"loss": 1.6568,
"step": 2700
},
{
"epoch": 0.16,
"learning_rate": 6.684991221166072e-05,
"loss": 1.6423,
"step": 2720
},
{
"epoch": 0.17,
"learning_rate": 6.675304232003391e-05,
"loss": 1.6507,
"step": 2740
},
{
"epoch": 0.17,
"learning_rate": 6.66561724284071e-05,
"loss": 1.6431,
"step": 2760
},
{
"epoch": 0.17,
"learning_rate": 6.65593025367803e-05,
"loss": 1.6746,
"step": 2780
},
{
"epoch": 0.17,
"learning_rate": 6.646243264515348e-05,
"loss": 1.6981,
"step": 2800
},
{
"epoch": 0.17,
"learning_rate": 6.636556275352667e-05,
"loss": 1.6163,
"step": 2820
},
{
"epoch": 0.17,
"learning_rate": 6.626869286189988e-05,
"loss": 1.6597,
"step": 2840
},
{
"epoch": 0.17,
"learning_rate": 6.617182297027307e-05,
"loss": 1.6613,
"step": 2860
},
{
"epoch": 0.17,
"learning_rate": 6.607495307864626e-05,
"loss": 1.6561,
"step": 2880
},
{
"epoch": 0.18,
"learning_rate": 6.597808318701944e-05,
"loss": 1.7012,
"step": 2900
},
{
"epoch": 0.18,
"learning_rate": 6.588121329539263e-05,
"loss": 1.6617,
"step": 2920
},
{
"epoch": 0.18,
"learning_rate": 6.578434340376582e-05,
"loss": 1.6302,
"step": 2940
},
{
"epoch": 0.18,
"learning_rate": 6.568747351213901e-05,
"loss": 1.6831,
"step": 2960
},
{
"epoch": 0.18,
"learning_rate": 6.55906036205122e-05,
"loss": 1.6385,
"step": 2980
},
{
"epoch": 0.18,
"learning_rate": 6.549373372888539e-05,
"loss": 1.6413,
"step": 3000
},
{
"epoch": 0.18,
"learning_rate": 6.539686383725858e-05,
"loss": 1.6389,
"step": 3020
},
{
"epoch": 0.18,
"learning_rate": 6.529999394563177e-05,
"loss": 1.6309,
"step": 3040
},
{
"epoch": 0.19,
"learning_rate": 6.520312405400498e-05,
"loss": 1.6157,
"step": 3060
},
{
"epoch": 0.19,
"learning_rate": 6.510625416237817e-05,
"loss": 1.6425,
"step": 3080
},
{
"epoch": 0.19,
"learning_rate": 6.500938427075136e-05,
"loss": 1.648,
"step": 3100
},
{
"epoch": 0.19,
"learning_rate": 6.491251437912454e-05,
"loss": 1.6576,
"step": 3120
},
{
"epoch": 0.19,
"learning_rate": 6.481564448749773e-05,
"loss": 1.6705,
"step": 3140
},
{
"epoch": 0.19,
"learning_rate": 6.471877459587092e-05,
"loss": 1.6638,
"step": 3160
},
{
"epoch": 0.19,
"learning_rate": 6.462190470424411e-05,
"loss": 1.688,
"step": 3180
},
{
"epoch": 0.19,
"learning_rate": 6.45250348126173e-05,
"loss": 1.6642,
"step": 3200
},
{
"epoch": 0.19,
"learning_rate": 6.442816492099049e-05,
"loss": 1.6906,
"step": 3220
},
{
"epoch": 0.2,
"learning_rate": 6.433129502936368e-05,
"loss": 1.6478,
"step": 3240
},
{
"epoch": 0.2,
"learning_rate": 6.423442513773689e-05,
"loss": 1.6413,
"step": 3260
},
{
"epoch": 0.2,
"learning_rate": 6.413755524611008e-05,
"loss": 1.6741,
"step": 3280
},
{
"epoch": 0.2,
"learning_rate": 6.404068535448327e-05,
"loss": 1.651,
"step": 3300
},
{
"epoch": 0.2,
"learning_rate": 6.394381546285645e-05,
"loss": 1.6515,
"step": 3320
},
{
"epoch": 0.2,
"learning_rate": 6.384694557122964e-05,
"loss": 1.6146,
"step": 3340
},
{
"epoch": 0.2,
"learning_rate": 6.375007567960283e-05,
"loss": 1.6759,
"step": 3360
},
{
"epoch": 0.2,
"learning_rate": 6.365320578797602e-05,
"loss": 1.6573,
"step": 3380
},
{
"epoch": 0.21,
"learning_rate": 6.355633589634921e-05,
"loss": 1.6676,
"step": 3400
},
{
"epoch": 0.21,
"learning_rate": 6.34594660047224e-05,
"loss": 1.6448,
"step": 3420
},
{
"epoch": 0.21,
"learning_rate": 6.33625961130956e-05,
"loss": 1.6594,
"step": 3440
},
{
"epoch": 0.21,
"learning_rate": 6.327541321063148e-05,
"loss": 1.6245,
"step": 3460
},
{
"epoch": 0.21,
"learning_rate": 6.318338681358601e-05,
"loss": 1.67,
"step": 3480
},
{
"epoch": 0.21,
"learning_rate": 6.30865169219592e-05,
"loss": 1.6262,
"step": 3500
},
{
"epoch": 0.21,
"learning_rate": 6.298964703033239e-05,
"loss": 1.6462,
"step": 3520
},
{
"epoch": 0.21,
"learning_rate": 6.289277713870558e-05,
"loss": 1.6803,
"step": 3540
},
{
"epoch": 0.22,
"learning_rate": 6.279590724707877e-05,
"loss": 1.6403,
"step": 3560
},
{
"epoch": 0.22,
"learning_rate": 6.269903735545196e-05,
"loss": 1.653,
"step": 3580
},
{
"epoch": 0.22,
"learning_rate": 6.260216746382515e-05,
"loss": 1.6777,
"step": 3600
},
{
"epoch": 0.22,
"learning_rate": 6.250529757219834e-05,
"loss": 1.6302,
"step": 3620
},
{
"epoch": 0.22,
"learning_rate": 6.240842768057154e-05,
"loss": 1.7027,
"step": 3640
},
{
"epoch": 0.22,
"learning_rate": 6.231155778894473e-05,
"loss": 1.636,
"step": 3660
},
{
"epoch": 0.22,
"learning_rate": 6.221468789731792e-05,
"loss": 1.665,
"step": 3680
},
{
"epoch": 0.22,
"learning_rate": 6.211781800569111e-05,
"loss": 1.6566,
"step": 3700
},
{
"epoch": 0.23,
"learning_rate": 6.20209481140643e-05,
"loss": 1.6898,
"step": 3720
},
{
"epoch": 0.23,
"learning_rate": 6.192407822243749e-05,
"loss": 1.6244,
"step": 3740
},
{
"epoch": 0.23,
"learning_rate": 6.182720833081068e-05,
"loss": 1.6511,
"step": 3760
},
{
"epoch": 0.23,
"learning_rate": 6.173033843918387e-05,
"loss": 1.6601,
"step": 3780
},
{
"epoch": 0.23,
"learning_rate": 6.163346854755706e-05,
"loss": 1.6207,
"step": 3800
},
{
"epoch": 0.23,
"learning_rate": 6.153659865593025e-05,
"loss": 1.6243,
"step": 3820
},
{
"epoch": 0.23,
"learning_rate": 6.143972876430344e-05,
"loss": 1.6506,
"step": 3840
},
{
"epoch": 0.23,
"learning_rate": 6.134285887267664e-05,
"loss": 1.6521,
"step": 3860
},
{
"epoch": 0.23,
"learning_rate": 6.124598898104983e-05,
"loss": 1.6568,
"step": 3880
},
{
"epoch": 0.24,
"learning_rate": 6.114911908942302e-05,
"loss": 1.6896,
"step": 3900
},
{
"epoch": 0.24,
"learning_rate": 6.105224919779621e-05,
"loss": 1.647,
"step": 3920
},
{
"epoch": 0.24,
"learning_rate": 6.095537930616941e-05,
"loss": 1.6389,
"step": 3940
},
{
"epoch": 0.24,
"learning_rate": 6.08585094145426e-05,
"loss": 1.6302,
"step": 3960
},
{
"epoch": 0.24,
"learning_rate": 6.076163952291579e-05,
"loss": 1.6465,
"step": 3980
},
{
"epoch": 0.24,
"learning_rate": 6.066476963128898e-05,
"loss": 1.6929,
"step": 4000
},
{
"epoch": 0.24,
"learning_rate": 6.056789973966217e-05,
"loss": 1.7165,
"step": 4020
},
{
"epoch": 0.24,
"learning_rate": 6.047102984803536e-05,
"loss": 1.6415,
"step": 4040
},
{
"epoch": 0.25,
"learning_rate": 6.037415995640856e-05,
"loss": 1.6101,
"step": 4060
},
{
"epoch": 0.25,
"learning_rate": 6.027729006478175e-05,
"loss": 1.6642,
"step": 4080
},
{
"epoch": 0.25,
"learning_rate": 6.018042017315494e-05,
"loss": 1.6804,
"step": 4100
},
{
"epoch": 0.25,
"learning_rate": 6.008355028152813e-05,
"loss": 1.6298,
"step": 4120
},
{
"epoch": 0.25,
"learning_rate": 5.998668038990132e-05,
"loss": 1.6838,
"step": 4140
},
{
"epoch": 0.25,
"learning_rate": 5.988981049827451e-05,
"loss": 1.6595,
"step": 4160
},
{
"epoch": 0.25,
"learning_rate": 5.97929406066477e-05,
"loss": 1.6489,
"step": 4180
},
{
"epoch": 0.25,
"learning_rate": 5.969607071502089e-05,
"loss": 1.6692,
"step": 4200
},
{
"epoch": 0.26,
"learning_rate": 5.9599200823394084e-05,
"loss": 1.6966,
"step": 4220
},
{
"epoch": 0.26,
"learning_rate": 5.9502330931767274e-05,
"loss": 1.646,
"step": 4240
},
{
"epoch": 0.26,
"learning_rate": 5.9405461040140463e-05,
"loss": 1.6557,
"step": 4260
},
{
"epoch": 0.26,
"learning_rate": 5.930859114851366e-05,
"loss": 1.6043,
"step": 4280
},
{
"epoch": 0.26,
"learning_rate": 5.921172125688685e-05,
"loss": 1.6096,
"step": 4300
},
{
"epoch": 0.26,
"learning_rate": 5.911485136526004e-05,
"loss": 1.6354,
"step": 4320
},
{
"epoch": 0.26,
"learning_rate": 5.9017981473633236e-05,
"loss": 1.6252,
"step": 4340
},
{
"epoch": 0.26,
"learning_rate": 5.8921111582006425e-05,
"loss": 1.6732,
"step": 4360
},
{
"epoch": 0.27,
"learning_rate": 5.8824241690379615e-05,
"loss": 1.6357,
"step": 4380
},
{
"epoch": 0.27,
"learning_rate": 5.8727371798752805e-05,
"loss": 1.6793,
"step": 4400
},
{
"epoch": 0.27,
"learning_rate": 5.8630501907125994e-05,
"loss": 1.6414,
"step": 4420
},
{
"epoch": 0.27,
"learning_rate": 5.8533632015499184e-05,
"loss": 1.6417,
"step": 4440
},
{
"epoch": 0.27,
"learning_rate": 5.8436762123872374e-05,
"loss": 1.6241,
"step": 4460
},
{
"epoch": 0.27,
"learning_rate": 5.833989223224558e-05,
"loss": 1.6493,
"step": 4480
},
{
"epoch": 0.27,
"learning_rate": 5.8243022340618767e-05,
"loss": 1.6638,
"step": 4500
},
{
"epoch": 0.27,
"learning_rate": 5.8146152448991956e-05,
"loss": 1.6724,
"step": 4520
},
{
"epoch": 0.27,
"learning_rate": 5.8049282557365146e-05,
"loss": 1.6689,
"step": 4540
},
{
"epoch": 0.28,
"learning_rate": 5.7952412665738336e-05,
"loss": 1.6383,
"step": 4560
},
{
"epoch": 0.28,
"learning_rate": 5.7855542774111525e-05,
"loss": 1.6477,
"step": 4580
},
{
"epoch": 0.28,
"learning_rate": 5.7758672882484715e-05,
"loss": 1.6391,
"step": 4600
},
{
"epoch": 0.28,
"learning_rate": 5.7661802990857905e-05,
"loss": 1.6811,
"step": 4620
},
{
"epoch": 0.28,
"learning_rate": 5.7564933099231094e-05,
"loss": 1.6605,
"step": 4640
},
{
"epoch": 0.28,
"learning_rate": 5.746806320760429e-05,
"loss": 1.6345,
"step": 4660
},
{
"epoch": 0.28,
"learning_rate": 5.737119331597748e-05,
"loss": 1.6144,
"step": 4680
},
{
"epoch": 0.28,
"learning_rate": 5.727432342435068e-05,
"loss": 1.6525,
"step": 4700
},
{
"epoch": 0.29,
"learning_rate": 5.7177453532723866e-05,
"loss": 1.6995,
"step": 4720
},
{
"epoch": 0.29,
"learning_rate": 5.7080583641097056e-05,
"loss": 1.6183,
"step": 4740
},
{
"epoch": 0.29,
"learning_rate": 5.6983713749470246e-05,
"loss": 1.6517,
"step": 4760
},
{
"epoch": 0.29,
"learning_rate": 5.688684385784344e-05,
"loss": 1.6354,
"step": 4780
},
{
"epoch": 0.29,
"learning_rate": 5.678997396621663e-05,
"loss": 1.6626,
"step": 4800
},
{
"epoch": 0.29,
"learning_rate": 5.669310407458982e-05,
"loss": 1.6677,
"step": 4820
},
{
"epoch": 0.29,
"learning_rate": 5.660107767754436e-05,
"loss": 1.6899,
"step": 4840
},
{
"epoch": 0.29,
"learning_rate": 5.650420778591755e-05,
"loss": 1.6313,
"step": 4860
},
{
"epoch": 0.3,
"learning_rate": 5.640733789429074e-05,
"loss": 1.6399,
"step": 4880
},
{
"epoch": 0.3,
"learning_rate": 5.631046800266393e-05,
"loss": 1.6035,
"step": 4900
},
{
"epoch": 0.3,
"learning_rate": 5.6213598111037117e-05,
"loss": 1.6412,
"step": 4920
},
{
"epoch": 0.3,
"learning_rate": 5.6116728219410306e-05,
"loss": 1.6152,
"step": 4940
},
{
"epoch": 0.3,
"learning_rate": 5.6019858327783496e-05,
"loss": 1.6289,
"step": 4960
},
{
"epoch": 0.3,
"learning_rate": 5.5922988436156686e-05,
"loss": 1.6329,
"step": 4980
},
{
"epoch": 0.3,
"learning_rate": 5.582611854452988e-05,
"loss": 1.6427,
"step": 5000
},
{
"epoch": 0.3,
"learning_rate": 5.572924865290308e-05,
"loss": 1.6263,
"step": 5020
},
{
"epoch": 0.31,
"learning_rate": 5.563237876127627e-05,
"loss": 1.628,
"step": 5040
},
{
"epoch": 0.31,
"learning_rate": 5.553550886964946e-05,
"loss": 1.59,
"step": 5060
},
{
"epoch": 0.31,
"learning_rate": 5.543863897802265e-05,
"loss": 1.6063,
"step": 5080
},
{
"epoch": 0.31,
"learning_rate": 5.534176908639584e-05,
"loss": 1.6684,
"step": 5100
},
{
"epoch": 0.31,
"learning_rate": 5.5244899194769034e-05,
"loss": 1.6565,
"step": 5120
},
{
"epoch": 0.31,
"learning_rate": 5.514802930314222e-05,
"loss": 1.6514,
"step": 5140
},
{
"epoch": 0.31,
"learning_rate": 5.505115941151541e-05,
"loss": 1.6782,
"step": 5160
},
{
"epoch": 0.31,
"learning_rate": 5.49542895198886e-05,
"loss": 1.6394,
"step": 5180
},
{
"epoch": 0.31,
"learning_rate": 5.485741962826179e-05,
"loss": 1.6535,
"step": 5200
},
{
"epoch": 0.32,
"learning_rate": 5.476054973663498e-05,
"loss": 1.6642,
"step": 5220
},
{
"epoch": 0.32,
"learning_rate": 5.4663679845008185e-05,
"loss": 1.65,
"step": 5240
},
{
"epoch": 0.32,
"learning_rate": 5.4566809953381375e-05,
"loss": 1.653,
"step": 5260
},
{
"epoch": 0.32,
"learning_rate": 5.4469940061754564e-05,
"loss": 1.6605,
"step": 5280
},
{
"epoch": 0.32,
"learning_rate": 5.4373070170127754e-05,
"loss": 1.6758,
"step": 5300
},
{
"epoch": 0.32,
"learning_rate": 5.4276200278500944e-05,
"loss": 1.6901,
"step": 5320
},
{
"epoch": 0.32,
"learning_rate": 5.4179330386874133e-05,
"loss": 1.678,
"step": 5340
},
{
"epoch": 0.32,
"learning_rate": 5.408246049524732e-05,
"loss": 1.6638,
"step": 5360
},
{
"epoch": 0.33,
"learning_rate": 5.398559060362051e-05,
"loss": 1.6284,
"step": 5380
},
{
"epoch": 0.33,
"learning_rate": 5.38887207119937e-05,
"loss": 1.6429,
"step": 5400
},
{
"epoch": 0.33,
"learning_rate": 5.379185082036689e-05,
"loss": 1.6303,
"step": 5420
},
{
"epoch": 0.33,
"learning_rate": 5.3694980928740095e-05,
"loss": 1.6324,
"step": 5440
},
{
"epoch": 0.33,
"learning_rate": 5.3598111037113285e-05,
"loss": 1.6158,
"step": 5460
},
{
"epoch": 0.33,
"learning_rate": 5.3501241145486475e-05,
"loss": 1.655,
"step": 5480
},
{
"epoch": 0.33,
"learning_rate": 5.3404371253859664e-05,
"loss": 1.6224,
"step": 5500
},
{
"epoch": 0.33,
"learning_rate": 5.3307501362232854e-05,
"loss": 1.6428,
"step": 5520
},
{
"epoch": 0.34,
"learning_rate": 5.3210631470606044e-05,
"loss": 1.681,
"step": 5540
},
{
"epoch": 0.34,
"learning_rate": 5.311376157897924e-05,
"loss": 1.6493,
"step": 5560
},
{
"epoch": 0.34,
"learning_rate": 5.301689168735243e-05,
"loss": 1.6387,
"step": 5580
},
{
"epoch": 0.34,
"learning_rate": 5.292002179572562e-05,
"loss": 1.6817,
"step": 5600
},
{
"epoch": 0.34,
"learning_rate": 5.282315190409881e-05,
"loss": 1.6243,
"step": 5620
},
{
"epoch": 0.34,
"learning_rate": 5.2726282012472e-05,
"loss": 1.5869,
"step": 5640
},
{
"epoch": 0.34,
"learning_rate": 5.2629412120845195e-05,
"loss": 1.6693,
"step": 5660
},
{
"epoch": 0.34,
"learning_rate": 5.253254222921839e-05,
"loss": 1.6266,
"step": 5680
},
{
"epoch": 0.35,
"learning_rate": 5.243567233759158e-05,
"loss": 1.6395,
"step": 5700
},
{
"epoch": 0.35,
"learning_rate": 5.233880244596477e-05,
"loss": 1.6288,
"step": 5720
},
{
"epoch": 0.35,
"learning_rate": 5.2246776048919294e-05,
"loss": 1.7032,
"step": 5740
},
{
"epoch": 0.35,
"learning_rate": 5.214990615729249e-05,
"loss": 1.6419,
"step": 5760
},
{
"epoch": 0.35,
"learning_rate": 5.205303626566569e-05,
"loss": 1.6131,
"step": 5780
},
{
"epoch": 0.35,
"learning_rate": 5.1956166374038876e-05,
"loss": 1.6691,
"step": 5800
},
{
"epoch": 0.35,
"learning_rate": 5.1859296482412066e-05,
"loss": 1.656,
"step": 5820
},
{
"epoch": 0.35,
"learning_rate": 5.1762426590785256e-05,
"loss": 1.64,
"step": 5840
},
{
"epoch": 0.35,
"learning_rate": 5.1665556699158445e-05,
"loss": 1.6653,
"step": 5860
},
{
"epoch": 0.36,
"learning_rate": 5.1568686807531635e-05,
"loss": 1.6299,
"step": 5880
},
{
"epoch": 0.36,
"learning_rate": 5.147181691590483e-05,
"loss": 1.6345,
"step": 5900
},
{
"epoch": 0.36,
"learning_rate": 5.137494702427802e-05,
"loss": 1.6253,
"step": 5920
},
{
"epoch": 0.36,
"learning_rate": 5.127807713265121e-05,
"loss": 1.6713,
"step": 5940
},
{
"epoch": 0.36,
"learning_rate": 5.11812072410244e-05,
"loss": 1.6141,
"step": 5960
},
{
"epoch": 0.36,
"learning_rate": 5.108433734939759e-05,
"loss": 1.6324,
"step": 5980
},
{
"epoch": 0.36,
"learning_rate": 5.0987467457770787e-05,
"loss": 1.6248,
"step": 6000
},
{
"epoch": 0.36,
"learning_rate": 5.089059756614398e-05,
"loss": 1.615,
"step": 6020
},
{
"epoch": 0.37,
"learning_rate": 5.079372767451717e-05,
"loss": 1.6247,
"step": 6040
},
{
"epoch": 0.37,
"learning_rate": 5.069685778289036e-05,
"loss": 1.6629,
"step": 6060
},
{
"epoch": 0.37,
"learning_rate": 5.059998789126355e-05,
"loss": 1.6665,
"step": 6080
},
{
"epoch": 0.37,
"learning_rate": 5.050311799963674e-05,
"loss": 1.6781,
"step": 6100
},
{
"epoch": 0.37,
"learning_rate": 5.040624810800993e-05,
"loss": 1.7025,
"step": 6120
},
{
"epoch": 0.37,
"learning_rate": 5.030937821638312e-05,
"loss": 1.6107,
"step": 6140
},
{
"epoch": 0.37,
"learning_rate": 5.021250832475631e-05,
"loss": 1.6079,
"step": 6160
},
{
"epoch": 0.37,
"learning_rate": 5.01156384331295e-05,
"loss": 1.5942,
"step": 6180
},
{
"epoch": 0.38,
"learning_rate": 5.0018768541502704e-05,
"loss": 1.662,
"step": 6200
},
{
"epoch": 0.38,
"learning_rate": 4.992189864987589e-05,
"loss": 1.6141,
"step": 6220
},
{
"epoch": 0.38,
"learning_rate": 4.982502875824908e-05,
"loss": 1.6506,
"step": 6240
},
{
"epoch": 0.38,
"learning_rate": 4.972815886662227e-05,
"loss": 1.6635,
"step": 6260
},
{
"epoch": 0.38,
"learning_rate": 4.963128897499546e-05,
"loss": 1.6342,
"step": 6280
},
{
"epoch": 0.38,
"learning_rate": 4.953441908336865e-05,
"loss": 1.6126,
"step": 6300
},
{
"epoch": 0.38,
"learning_rate": 4.943754919174184e-05,
"loss": 1.6632,
"step": 6320
},
{
"epoch": 0.38,
"learning_rate": 4.934067930011504e-05,
"loss": 1.6529,
"step": 6340
},
{
"epoch": 0.39,
"learning_rate": 4.924380940848823e-05,
"loss": 1.6363,
"step": 6360
},
{
"epoch": 0.39,
"learning_rate": 4.914693951686142e-05,
"loss": 1.6081,
"step": 6380
},
{
"epoch": 0.39,
"learning_rate": 4.905006962523461e-05,
"loss": 1.6618,
"step": 6400
},
{
"epoch": 0.39,
"learning_rate": 4.8953199733607803e-05,
"loss": 1.6469,
"step": 6420
},
{
"epoch": 0.39,
"learning_rate": 4.885632984198099e-05,
"loss": 1.6323,
"step": 6440
},
{
"epoch": 0.39,
"learning_rate": 4.875945995035419e-05,
"loss": 1.6043,
"step": 6460
},
{
"epoch": 0.39,
"learning_rate": 4.866259005872738e-05,
"loss": 1.6382,
"step": 6480
},
{
"epoch": 0.39,
"learning_rate": 4.856572016710057e-05,
"loss": 1.6343,
"step": 6500
},
{
"epoch": 0.39,
"learning_rate": 4.846885027547376e-05,
"loss": 1.64,
"step": 6520
},
{
"epoch": 0.4,
"learning_rate": 4.837198038384695e-05,
"loss": 1.6238,
"step": 6540
},
{
"epoch": 0.4,
"learning_rate": 4.827511049222014e-05,
"loss": 1.6478,
"step": 6560
},
{
"epoch": 0.4,
"learning_rate": 4.817824060059333e-05,
"loss": 1.6598,
"step": 6580
},
{
"epoch": 0.4,
"learning_rate": 4.808137070896652e-05,
"loss": 1.598,
"step": 6600
},
{
"epoch": 0.4,
"learning_rate": 4.798450081733972e-05,
"loss": 1.6265,
"step": 6620
},
{
"epoch": 0.4,
"learning_rate": 4.788763092571291e-05,
"loss": 1.662,
"step": 6640
},
{
"epoch": 0.4,
"learning_rate": 4.77907610340861e-05,
"loss": 1.6587,
"step": 6660
},
{
"epoch": 0.4,
"learning_rate": 4.769389114245929e-05,
"loss": 1.6199,
"step": 6680
},
{
"epoch": 0.41,
"learning_rate": 4.759702125083248e-05,
"loss": 1.5937,
"step": 6700
},
{
"epoch": 0.41,
"learning_rate": 4.750015135920567e-05,
"loss": 1.6235,
"step": 6720
},
{
"epoch": 0.41,
"learning_rate": 4.740328146757886e-05,
"loss": 1.645,
"step": 6740
},
{
"epoch": 0.41,
"learning_rate": 4.730641157595205e-05,
"loss": 1.6179,
"step": 6760
},
{
"epoch": 0.41,
"learning_rate": 4.7209541684325245e-05,
"loss": 1.6384,
"step": 6780
},
{
"epoch": 0.41,
"learning_rate": 4.7112671792698434e-05,
"loss": 1.6708,
"step": 6800
},
{
"epoch": 0.41,
"learning_rate": 4.7015801901071624e-05,
"loss": 1.5984,
"step": 6820
},
{
"epoch": 0.41,
"learning_rate": 4.691893200944482e-05,
"loss": 1.583,
"step": 6840
},
{
"epoch": 0.42,
"learning_rate": 4.682206211781801e-05,
"loss": 1.6398,
"step": 6860
},
{
"epoch": 0.42,
"learning_rate": 4.67251922261912e-05,
"loss": 1.6267,
"step": 6880
},
{
"epoch": 0.42,
"learning_rate": 4.6628322334564396e-05,
"loss": 1.6761,
"step": 6900
},
{
"epoch": 0.42,
"learning_rate": 4.6531452442937586e-05,
"loss": 1.6057,
"step": 6920
},
{
"epoch": 0.42,
"learning_rate": 4.6434582551310776e-05,
"loss": 1.6732,
"step": 6940
},
{
"epoch": 0.42,
"learning_rate": 4.6337712659683965e-05,
"loss": 1.6742,
"step": 6960
},
{
"epoch": 0.42,
"learning_rate": 4.6240842768057155e-05,
"loss": 1.634,
"step": 6980
},
{
"epoch": 0.42,
"learning_rate": 4.6143972876430345e-05,
"loss": 1.6217,
"step": 7000
},
{
"epoch": 0.43,
"learning_rate": 4.6047102984803534e-05,
"loss": 1.6745,
"step": 7020
},
{
"epoch": 0.43,
"learning_rate": 4.595023309317674e-05,
"loss": 1.6547,
"step": 7040
},
{
"epoch": 0.43,
"learning_rate": 4.585336320154993e-05,
"loss": 1.6313,
"step": 7060
},
{
"epoch": 0.43,
"learning_rate": 4.575649330992312e-05,
"loss": 1.6233,
"step": 7080
},
{
"epoch": 0.43,
"learning_rate": 4.5659623418296306e-05,
"loss": 1.6077,
"step": 7100
},
{
"epoch": 0.43,
"learning_rate": 4.5562753526669496e-05,
"loss": 1.6164,
"step": 7120
},
{
"epoch": 0.43,
"learning_rate": 4.5465883635042686e-05,
"loss": 1.5924,
"step": 7140
},
{
"epoch": 0.43,
"learning_rate": 4.5369013743415875e-05,
"loss": 1.6472,
"step": 7160
},
{
"epoch": 0.43,
"learning_rate": 4.5272143851789065e-05,
"loss": 1.602,
"step": 7180
},
{
"epoch": 0.44,
"learning_rate": 4.5175273960162255e-05,
"loss": 1.6277,
"step": 7200
},
{
"epoch": 0.44,
"learning_rate": 4.507840406853545e-05,
"loss": 1.6479,
"step": 7220
},
{
"epoch": 0.44,
"learning_rate": 4.498153417690864e-05,
"loss": 1.6605,
"step": 7240
},
{
"epoch": 0.44,
"learning_rate": 4.488950777986318e-05,
"loss": 1.654,
"step": 7260
},
{
"epoch": 0.44,
"learning_rate": 4.479263788823637e-05,
"loss": 1.6573,
"step": 7280
},
{
"epoch": 0.44,
"learning_rate": 4.4695767996609557e-05,
"loss": 1.6305,
"step": 7300
},
{
"epoch": 0.44,
"learning_rate": 4.4598898104982746e-05,
"loss": 1.6183,
"step": 7320
},
{
"epoch": 0.44,
"learning_rate": 4.4502028213355936e-05,
"loss": 1.6172,
"step": 7340
},
{
"epoch": 0.45,
"learning_rate": 4.4405158321729126e-05,
"loss": 1.6151,
"step": 7360
},
{
"epoch": 0.45,
"learning_rate": 4.430828843010233e-05,
"loss": 1.6275,
"step": 7380
},
{
"epoch": 0.45,
"learning_rate": 4.421141853847552e-05,
"loss": 1.6516,
"step": 7400
},
{
"epoch": 0.45,
"learning_rate": 4.411454864684871e-05,
"loss": 1.6697,
"step": 7420
},
{
"epoch": 0.45,
"learning_rate": 4.40176787552219e-05,
"loss": 1.6268,
"step": 7440
},
{
"epoch": 0.45,
"learning_rate": 4.392080886359509e-05,
"loss": 1.6127,
"step": 7460
},
{
"epoch": 0.45,
"learning_rate": 4.382393897196828e-05,
"loss": 1.6206,
"step": 7480
},
{
"epoch": 0.45,
"learning_rate": 4.372706908034147e-05,
"loss": 1.6071,
"step": 7500
},
{
"epoch": 0.46,
"learning_rate": 4.3635042683296e-05,
"loss": 1.6238,
"step": 7520
},
{
"epoch": 0.46,
"learning_rate": 4.353817279166919e-05,
"loss": 1.6293,
"step": 7540
},
{
"epoch": 0.46,
"learning_rate": 4.344130290004239e-05,
"loss": 1.6227,
"step": 7560
},
{
"epoch": 0.46,
"learning_rate": 4.334443300841558e-05,
"loss": 1.6475,
"step": 7580
},
{
"epoch": 0.46,
"learning_rate": 4.324756311678877e-05,
"loss": 1.6461,
"step": 7600
},
{
"epoch": 0.46,
"learning_rate": 4.315069322516196e-05,
"loss": 1.6342,
"step": 7620
},
{
"epoch": 0.46,
"learning_rate": 4.305382333353515e-05,
"loss": 1.6426,
"step": 7640
},
{
"epoch": 0.46,
"learning_rate": 4.295695344190834e-05,
"loss": 1.6257,
"step": 7660
},
{
"epoch": 0.46,
"learning_rate": 4.286008355028153e-05,
"loss": 1.6107,
"step": 7680
},
{
"epoch": 0.47,
"learning_rate": 4.276321365865472e-05,
"loss": 1.6541,
"step": 7700
},
{
"epoch": 0.47,
"learning_rate": 4.266634376702792e-05,
"loss": 1.6058,
"step": 7720
},
{
"epoch": 0.47,
"learning_rate": 4.256947387540111e-05,
"loss": 1.6299,
"step": 7740
},
{
"epoch": 0.47,
"learning_rate": 4.24726039837743e-05,
"loss": 1.6259,
"step": 7760
},
{
"epoch": 0.47,
"learning_rate": 4.237573409214749e-05,
"loss": 1.5975,
"step": 7780
},
{
"epoch": 0.47,
"learning_rate": 4.227886420052068e-05,
"loss": 1.6052,
"step": 7800
},
{
"epoch": 0.47,
"learning_rate": 4.218199430889387e-05,
"loss": 1.6554,
"step": 7820
},
{
"epoch": 0.47,
"learning_rate": 4.208512441726706e-05,
"loss": 1.6382,
"step": 7840
},
{
"epoch": 0.48,
"learning_rate": 4.198825452564025e-05,
"loss": 1.6169,
"step": 7860
},
{
"epoch": 0.48,
"learning_rate": 4.1891384634013444e-05,
"loss": 1.5987,
"step": 7880
},
{
"epoch": 0.48,
"learning_rate": 4.1794514742386634e-05,
"loss": 1.6301,
"step": 7900
},
{
"epoch": 0.48,
"learning_rate": 4.169764485075983e-05,
"loss": 1.651,
"step": 7920
},
{
"epoch": 0.48,
"learning_rate": 4.160077495913302e-05,
"loss": 1.6717,
"step": 7940
},
{
"epoch": 0.48,
"learning_rate": 4.150390506750621e-05,
"loss": 1.6473,
"step": 7960
},
{
"epoch": 0.48,
"learning_rate": 4.14070351758794e-05,
"loss": 1.6392,
"step": 7980
},
{
"epoch": 0.48,
"learning_rate": 4.1310165284252596e-05,
"loss": 1.6427,
"step": 8000
},
{
"epoch": 0.49,
"learning_rate": 4.1213295392625785e-05,
"loss": 1.6286,
"step": 8020
},
{
"epoch": 0.49,
"learning_rate": 4.1116425500998975e-05,
"loss": 1.6028,
"step": 8040
},
{
"epoch": 0.49,
"learning_rate": 4.1019555609372165e-05,
"loss": 1.6379,
"step": 8060
},
{
"epoch": 0.49,
"learning_rate": 4.0922685717745354e-05,
"loss": 1.6602,
"step": 8080
},
{
"epoch": 0.49,
"learning_rate": 4.0825815826118544e-05,
"loss": 1.6526,
"step": 8100
},
{
"epoch": 0.49,
"learning_rate": 4.0728945934491734e-05,
"loss": 1.6584,
"step": 8120
},
{
"epoch": 0.49,
"learning_rate": 4.063207604286494e-05,
"loss": 1.6524,
"step": 8140
},
{
"epoch": 0.49,
"learning_rate": 4.053520615123813e-05,
"loss": 1.613,
"step": 8160
},
{
"epoch": 0.5,
"learning_rate": 4.0438336259611316e-05,
"loss": 1.6253,
"step": 8180
},
{
"epoch": 0.5,
"learning_rate": 4.0341466367984506e-05,
"loss": 1.6772,
"step": 8200
},
{
"epoch": 0.5,
"learning_rate": 4.0244596476357696e-05,
"loss": 1.6434,
"step": 8220
},
{
"epoch": 0.5,
"learning_rate": 4.0147726584730885e-05,
"loss": 1.6362,
"step": 8240
},
{
"epoch": 0.5,
"learning_rate": 4.0050856693104075e-05,
"loss": 1.6365,
"step": 8260
},
{
"epoch": 0.5,
"learning_rate": 3.9953986801477265e-05,
"loss": 1.6536,
"step": 8280
},
{
"epoch": 0.5,
"learning_rate": 3.985711690985046e-05,
"loss": 1.6025,
"step": 8300
},
{
"epoch": 0.5,
"learning_rate": 3.976024701822365e-05,
"loss": 1.6554,
"step": 8320
},
{
"epoch": 0.5,
"learning_rate": 3.966337712659684e-05,
"loss": 1.6463,
"step": 8340
},
{
"epoch": 0.51,
"learning_rate": 3.956650723497003e-05,
"loss": 1.6767,
"step": 8360
},
{
"epoch": 0.51,
"learning_rate": 3.9469637343343227e-05,
"loss": 1.6266,
"step": 8380
},
{
"epoch": 0.51,
"learning_rate": 3.9372767451716416e-05,
"loss": 1.6107,
"step": 8400
},
{
"epoch": 0.51,
"learning_rate": 3.9275897560089606e-05,
"loss": 1.6846,
"step": 8420
},
{
"epoch": 0.51,
"learning_rate": 3.91790276684628e-05,
"loss": 1.6271,
"step": 8440
},
{
"epoch": 0.51,
"learning_rate": 3.908215777683599e-05,
"loss": 1.6517,
"step": 8460
},
{
"epoch": 0.51,
"learning_rate": 3.898528788520918e-05,
"loss": 1.6377,
"step": 8480
},
{
"epoch": 0.51,
"learning_rate": 3.888841799358237e-05,
"loss": 1.6022,
"step": 8500
},
{
"epoch": 0.52,
"learning_rate": 3.879639159653691e-05,
"loss": 1.6196,
"step": 8520
},
{
"epoch": 0.52,
"learning_rate": 3.86995217049101e-05,
"loss": 1.609,
"step": 8540
},
{
"epoch": 0.52,
"learning_rate": 3.860265181328329e-05,
"loss": 1.5948,
"step": 8560
},
{
"epoch": 0.52,
"learning_rate": 3.8505781921656477e-05,
"loss": 1.6345,
"step": 8580
},
{
"epoch": 0.52,
"learning_rate": 3.8408912030029666e-05,
"loss": 1.6538,
"step": 8600
},
{
"epoch": 0.52,
"learning_rate": 3.831204213840286e-05,
"loss": 1.5958,
"step": 8620
},
{
"epoch": 0.52,
"learning_rate": 3.821517224677605e-05,
"loss": 1.5788,
"step": 8640
},
{
"epoch": 0.52,
"learning_rate": 3.811830235514924e-05,
"loss": 1.6435,
"step": 8660
},
{
"epoch": 0.53,
"learning_rate": 3.802143246352243e-05,
"loss": 1.6344,
"step": 8680
},
{
"epoch": 0.53,
"learning_rate": 3.792456257189562e-05,
"loss": 1.6415,
"step": 8700
},
{
"epoch": 0.53,
"learning_rate": 3.782769268026882e-05,
"loss": 1.6518,
"step": 8720
},
{
"epoch": 0.53,
"learning_rate": 3.773082278864201e-05,
"loss": 1.6454,
"step": 8740
},
{
"epoch": 0.53,
"learning_rate": 3.76339528970152e-05,
"loss": 1.6205,
"step": 8760
},
{
"epoch": 0.53,
"learning_rate": 3.7537083005388394e-05,
"loss": 1.6164,
"step": 8780
},
{
"epoch": 0.53,
"learning_rate": 3.744021311376158e-05,
"loss": 1.6247,
"step": 8800
},
{
"epoch": 0.53,
"learning_rate": 3.734334322213477e-05,
"loss": 1.6439,
"step": 8820
},
{
"epoch": 0.54,
"learning_rate": 3.724647333050797e-05,
"loss": 1.6466,
"step": 8840
},
{
"epoch": 0.54,
"learning_rate": 3.714960343888116e-05,
"loss": 1.6438,
"step": 8860
},
{
"epoch": 0.54,
"learning_rate": 3.705273354725435e-05,
"loss": 1.6367,
"step": 8880
},
{
"epoch": 0.54,
"learning_rate": 3.695586365562754e-05,
"loss": 1.6494,
"step": 8900
},
{
"epoch": 0.54,
"learning_rate": 3.685899376400073e-05,
"loss": 1.638,
"step": 8920
},
{
"epoch": 0.54,
"learning_rate": 3.6762123872373925e-05,
"loss": 1.6599,
"step": 8940
},
{
"epoch": 0.54,
"learning_rate": 3.6665253980747114e-05,
"loss": 1.6203,
"step": 8960
},
{
"epoch": 0.54,
"learning_rate": 3.6568384089120304e-05,
"loss": 1.6327,
"step": 8980
},
{
"epoch": 0.54,
"learning_rate": 3.6471514197493494e-05,
"loss": 1.6339,
"step": 9000
},
{
"epoch": 0.55,
"learning_rate": 3.637464430586668e-05,
"loss": 1.6199,
"step": 9020
},
{
"epoch": 0.55,
"learning_rate": 3.627777441423988e-05,
"loss": 1.6392,
"step": 9040
},
{
"epoch": 0.55,
"learning_rate": 3.618090452261307e-05,
"loss": 1.6041,
"step": 9060
},
{
"epoch": 0.55,
"learning_rate": 3.608403463098626e-05,
"loss": 1.6333,
"step": 9080
},
{
"epoch": 0.55,
"learning_rate": 3.598716473935945e-05,
"loss": 1.6227,
"step": 9100
},
{
"epoch": 0.55,
"learning_rate": 3.589029484773264e-05,
"loss": 1.6119,
"step": 9120
},
{
"epoch": 0.55,
"learning_rate": 3.579342495610583e-05,
"loss": 1.6078,
"step": 9140
},
{
"epoch": 0.55,
"learning_rate": 3.5696555064479024e-05,
"loss": 1.6142,
"step": 9160
},
{
"epoch": 0.56,
"learning_rate": 3.5599685172852214e-05,
"loss": 1.6544,
"step": 9180
},
{
"epoch": 0.56,
"learning_rate": 3.5502815281225404e-05,
"loss": 1.6159,
"step": 9200
},
{
"epoch": 0.56,
"learning_rate": 3.54059453895986e-05,
"loss": 1.6207,
"step": 9220
},
{
"epoch": 0.56,
"learning_rate": 3.530907549797179e-05,
"loss": 1.6188,
"step": 9240
},
{
"epoch": 0.56,
"learning_rate": 3.521220560634498e-05,
"loss": 1.6436,
"step": 9260
},
{
"epoch": 0.56,
"learning_rate": 3.5115335714718176e-05,
"loss": 1.6105,
"step": 9280
},
{
"epoch": 0.56,
"learning_rate": 3.5018465823091366e-05,
"loss": 1.587,
"step": 9300
},
{
"epoch": 0.56,
"learning_rate": 3.4921595931464555e-05,
"loss": 1.6655,
"step": 9320
},
{
"epoch": 0.57,
"learning_rate": 3.4824726039837745e-05,
"loss": 1.599,
"step": 9340
},
{
"epoch": 0.57,
"learning_rate": 3.472785614821094e-05,
"loss": 1.656,
"step": 9360
},
{
"epoch": 0.57,
"learning_rate": 3.463098625658413e-05,
"loss": 1.6239,
"step": 9380
},
{
"epoch": 0.57,
"learning_rate": 3.453411636495732e-05,
"loss": 1.6149,
"step": 9400
},
{
"epoch": 0.57,
"learning_rate": 3.443724647333051e-05,
"loss": 1.5823,
"step": 9420
},
{
"epoch": 0.57,
"learning_rate": 3.43403765817037e-05,
"loss": 1.6556,
"step": 9440
},
{
"epoch": 0.57,
"learning_rate": 3.4243506690076897e-05,
"loss": 1.642,
"step": 9460
},
{
"epoch": 0.57,
"learning_rate": 3.4146636798450086e-05,
"loss": 1.6644,
"step": 9480
},
{
"epoch": 0.58,
"learning_rate": 3.4049766906823276e-05,
"loss": 1.6018,
"step": 9500
},
{
"epoch": 0.58,
"learning_rate": 3.3952897015196466e-05,
"loss": 1.6551,
"step": 9520
},
{
"epoch": 0.58,
"learning_rate": 3.3856027123569655e-05,
"loss": 1.6417,
"step": 9540
},
{
"epoch": 0.58,
"learning_rate": 3.3759157231942845e-05,
"loss": 1.635,
"step": 9560
},
{
"epoch": 0.58,
"learning_rate": 3.366228734031604e-05,
"loss": 1.5891,
"step": 9580
},
{
"epoch": 0.58,
"learning_rate": 3.356541744868923e-05,
"loss": 1.6094,
"step": 9600
},
{
"epoch": 0.58,
"learning_rate": 3.346854755706242e-05,
"loss": 1.6072,
"step": 9620
},
{
"epoch": 0.58,
"learning_rate": 3.337167766543561e-05,
"loss": 1.5916,
"step": 9640
},
{
"epoch": 0.58,
"learning_rate": 3.327480777380881e-05,
"loss": 1.6311,
"step": 9660
},
{
"epoch": 0.59,
"learning_rate": 3.3177937882181996e-05,
"loss": 1.5902,
"step": 9680
},
{
"epoch": 0.59,
"learning_rate": 3.3081067990555186e-05,
"loss": 1.6291,
"step": 9700
},
{
"epoch": 0.59,
"learning_rate": 3.298419809892838e-05,
"loss": 1.6226,
"step": 9720
},
{
"epoch": 0.59,
"learning_rate": 3.288732820730157e-05,
"loss": 1.6495,
"step": 9740
},
{
"epoch": 0.59,
"learning_rate": 3.279045831567476e-05,
"loss": 1.6452,
"step": 9760
},
{
"epoch": 0.59,
"learning_rate": 3.269358842404796e-05,
"loss": 1.6255,
"step": 9780
},
{
"epoch": 0.59,
"learning_rate": 3.259671853242115e-05,
"loss": 1.6299,
"step": 9800
},
{
"epoch": 0.59,
"learning_rate": 3.249984864079434e-05,
"loss": 1.6181,
"step": 9820
},
{
"epoch": 0.6,
"learning_rate": 3.240297874916753e-05,
"loss": 1.5816,
"step": 9840
},
{
"epoch": 0.6,
"learning_rate": 3.230610885754072e-05,
"loss": 1.6412,
"step": 9860
},
{
"epoch": 0.6,
"learning_rate": 3.2209238965913913e-05,
"loss": 1.6286,
"step": 9880
},
{
"epoch": 0.6,
"learning_rate": 3.21123690742871e-05,
"loss": 1.6004,
"step": 9900
},
{
"epoch": 0.6,
"learning_rate": 3.201549918266029e-05,
"loss": 1.6279,
"step": 9920
},
{
"epoch": 0.6,
"learning_rate": 3.191862929103348e-05,
"loss": 1.6091,
"step": 9940
},
{
"epoch": 0.6,
"learning_rate": 3.182175939940667e-05,
"loss": 1.6441,
"step": 9960
},
{
"epoch": 0.6,
"learning_rate": 3.172488950777987e-05,
"loss": 1.607,
"step": 9980
},
{
"epoch": 0.61,
"learning_rate": 3.162801961615306e-05,
"loss": 1.6157,
"step": 10000
},
{
"epoch": 0.61,
"learning_rate": 3.153114972452625e-05,
"loss": 1.5979,
"step": 10020
},
{
"epoch": 0.61,
"learning_rate": 3.143427983289944e-05,
"loss": 1.6115,
"step": 10040
},
{
"epoch": 0.61,
"learning_rate": 3.133740994127263e-05,
"loss": 1.6077,
"step": 10060
},
{
"epoch": 0.61,
"learning_rate": 3.124054004964582e-05,
"loss": 1.6243,
"step": 10080
},
{
"epoch": 0.61,
"learning_rate": 3.114367015801901e-05,
"loss": 1.6196,
"step": 10100
},
{
"epoch": 0.61,
"learning_rate": 3.10468002663922e-05,
"loss": 1.617,
"step": 10120
},
{
"epoch": 0.61,
"learning_rate": 3.094993037476539e-05,
"loss": 1.5994,
"step": 10140
},
{
"epoch": 0.62,
"learning_rate": 3.085306048313859e-05,
"loss": 1.6345,
"step": 10160
},
{
"epoch": 0.62,
"learning_rate": 3.075619059151178e-05,
"loss": 1.6041,
"step": 10180
},
{
"epoch": 0.62,
"learning_rate": 3.065932069988497e-05,
"loss": 1.6216,
"step": 10200
},
{
"epoch": 0.62,
"learning_rate": 3.0562450808258165e-05,
"loss": 1.662,
"step": 10220
},
{
"epoch": 0.62,
"learning_rate": 3.046558091663135e-05,
"loss": 1.6519,
"step": 10240
},
{
"epoch": 0.62,
"learning_rate": 3.036871102500454e-05,
"loss": 1.5814,
"step": 10260
},
{
"epoch": 0.62,
"learning_rate": 3.0271841133377734e-05,
"loss": 1.5762,
"step": 10280
},
{
"epoch": 0.62,
"learning_rate": 3.0174971241750927e-05,
"loss": 1.6593,
"step": 10300
},
{
"epoch": 0.62,
"learning_rate": 3.0078101350124117e-05,
"loss": 1.6166,
"step": 10320
},
{
"epoch": 0.63,
"learning_rate": 2.998123145849731e-05,
"loss": 1.5812,
"step": 10340
},
{
"epoch": 0.63,
"learning_rate": 2.98843615668705e-05,
"loss": 1.6252,
"step": 10360
},
{
"epoch": 0.63,
"learning_rate": 2.978749167524369e-05,
"loss": 1.6418,
"step": 10380
},
{
"epoch": 0.63,
"learning_rate": 2.9690621783616886e-05,
"loss": 1.5902,
"step": 10400
},
{
"epoch": 0.63,
"learning_rate": 2.9593751891990075e-05,
"loss": 1.6068,
"step": 10420
},
{
"epoch": 0.63,
"learning_rate": 2.9496882000363265e-05,
"loss": 1.6119,
"step": 10440
},
{
"epoch": 0.63,
"learning_rate": 2.9400012108736455e-05,
"loss": 1.6209,
"step": 10460
},
{
"epoch": 0.63,
"learning_rate": 2.9303142217109644e-05,
"loss": 1.6088,
"step": 10480
},
{
"epoch": 0.64,
"learning_rate": 2.9206272325482837e-05,
"loss": 1.6232,
"step": 10500
},
{
"epoch": 0.64,
"learning_rate": 2.910940243385603e-05,
"loss": 1.6127,
"step": 10520
},
{
"epoch": 0.64,
"learning_rate": 2.901253254222922e-05,
"loss": 1.6084,
"step": 10540
},
{
"epoch": 0.64,
"learning_rate": 2.8915662650602413e-05,
"loss": 1.6199,
"step": 10560
},
{
"epoch": 0.64,
"learning_rate": 2.8818792758975603e-05,
"loss": 1.621,
"step": 10580
},
{
"epoch": 0.64,
"learning_rate": 2.8721922867348792e-05,
"loss": 1.5999,
"step": 10600
},
{
"epoch": 0.64,
"learning_rate": 2.862505297572199e-05,
"loss": 1.6145,
"step": 10620
},
{
"epoch": 0.64,
"learning_rate": 2.852818308409518e-05,
"loss": 1.6529,
"step": 10640
},
{
"epoch": 0.65,
"learning_rate": 2.8431313192468368e-05,
"loss": 1.6412,
"step": 10660
},
{
"epoch": 0.65,
"learning_rate": 2.8334443300841558e-05,
"loss": 1.6285,
"step": 10680
},
{
"epoch": 0.65,
"learning_rate": 2.8237573409214747e-05,
"loss": 1.6022,
"step": 10700
},
{
"epoch": 0.65,
"learning_rate": 2.8140703517587944e-05,
"loss": 1.6413,
"step": 10720
},
{
"epoch": 0.65,
"learning_rate": 2.8043833625961134e-05,
"loss": 1.6499,
"step": 10740
},
{
"epoch": 0.65,
"learning_rate": 2.7946963734334323e-05,
"loss": 1.6409,
"step": 10760
},
{
"epoch": 0.65,
"learning_rate": 2.7850093842707516e-05,
"loss": 1.6504,
"step": 10780
},
{
"epoch": 0.65,
"learning_rate": 2.7753223951080706e-05,
"loss": 1.6054,
"step": 10800
},
{
"epoch": 0.66,
"learning_rate": 2.76563540594539e-05,
"loss": 1.6421,
"step": 10820
},
{
"epoch": 0.66,
"learning_rate": 2.7559484167827092e-05,
"loss": 1.5858,
"step": 10840
},
{
"epoch": 0.66,
"learning_rate": 2.7462614276200282e-05,
"loss": 1.6222,
"step": 10860
},
{
"epoch": 0.66,
"learning_rate": 2.736574438457347e-05,
"loss": 1.6614,
"step": 10880
},
{
"epoch": 0.66,
"learning_rate": 2.726887449294666e-05,
"loss": 1.6056,
"step": 10900
},
{
"epoch": 0.66,
"learning_rate": 2.717200460131985e-05,
"loss": 1.6194,
"step": 10920
},
{
"epoch": 0.66,
"learning_rate": 2.7075134709693047e-05,
"loss": 1.6388,
"step": 10940
},
{
"epoch": 0.66,
"learning_rate": 2.6978264818066237e-05,
"loss": 1.6386,
"step": 10960
},
{
"epoch": 0.66,
"learning_rate": 2.6881394926439427e-05,
"loss": 1.6303,
"step": 10980
},
{
"epoch": 0.67,
"learning_rate": 2.678452503481262e-05,
"loss": 1.6549,
"step": 11000
},
{
"epoch": 0.67,
"learning_rate": 2.668765514318581e-05,
"loss": 1.6619,
"step": 11020
},
{
"epoch": 0.67,
"learning_rate": 2.6590785251559002e-05,
"loss": 1.6272,
"step": 11040
},
{
"epoch": 0.67,
"learning_rate": 2.6493915359932195e-05,
"loss": 1.6504,
"step": 11060
},
{
"epoch": 0.67,
"learning_rate": 2.6397045468305385e-05,
"loss": 1.6099,
"step": 11080
},
{
"epoch": 0.67,
"learning_rate": 2.6300175576678575e-05,
"loss": 1.6438,
"step": 11100
},
{
"epoch": 0.67,
"learning_rate": 2.6203305685051764e-05,
"loss": 1.6325,
"step": 11120
},
{
"epoch": 0.67,
"learning_rate": 2.610643579342496e-05,
"loss": 1.5917,
"step": 11140
},
{
"epoch": 0.68,
"learning_rate": 2.600956590179815e-05,
"loss": 1.6323,
"step": 11160
},
{
"epoch": 0.68,
"learning_rate": 2.591269601017134e-05,
"loss": 1.615,
"step": 11180
},
{
"epoch": 0.68,
"learning_rate": 2.581582611854453e-05,
"loss": 1.6116,
"step": 11200
},
{
"epoch": 0.68,
"learning_rate": 2.5718956226917723e-05,
"loss": 1.6488,
"step": 11220
},
{
"epoch": 0.68,
"learning_rate": 2.5622086335290916e-05,
"loss": 1.626,
"step": 11240
},
{
"epoch": 0.68,
"learning_rate": 2.5525216443664106e-05,
"loss": 1.6602,
"step": 11260
},
{
"epoch": 0.68,
"learning_rate": 2.54283465520373e-05,
"loss": 1.6268,
"step": 11280
},
{
"epoch": 0.68,
"learning_rate": 2.533147666041049e-05,
"loss": 1.5818,
"step": 11300
},
{
"epoch": 0.69,
"learning_rate": 2.5234606768783678e-05,
"loss": 1.6234,
"step": 11320
},
{
"epoch": 0.69,
"learning_rate": 2.5137736877156868e-05,
"loss": 1.6101,
"step": 11340
},
{
"epoch": 0.69,
"learning_rate": 2.5040866985530064e-05,
"loss": 1.5931,
"step": 11360
},
{
"epoch": 0.69,
"learning_rate": 2.4943997093903254e-05,
"loss": 1.5965,
"step": 11380
},
{
"epoch": 0.69,
"learning_rate": 2.4847127202276443e-05,
"loss": 1.6204,
"step": 11400
},
{
"epoch": 0.69,
"learning_rate": 2.4750257310649633e-05,
"loss": 1.6493,
"step": 11420
},
{
"epoch": 0.69,
"learning_rate": 2.4653387419022826e-05,
"loss": 1.6024,
"step": 11440
},
{
"epoch": 0.69,
"learning_rate": 2.455651752739602e-05,
"loss": 1.6381,
"step": 11460
},
{
"epoch": 0.7,
"learning_rate": 2.445964763576921e-05,
"loss": 1.6501,
"step": 11480
},
{
"epoch": 0.7,
"learning_rate": 2.4362777744142402e-05,
"loss": 1.6419,
"step": 11500
},
{
"epoch": 0.7,
"learning_rate": 2.4275594841678275e-05,
"loss": 1.6053,
"step": 11520
},
{
"epoch": 0.7,
"learning_rate": 2.4178724950051464e-05,
"loss": 1.633,
"step": 11540
},
{
"epoch": 0.7,
"learning_rate": 2.4081855058424654e-05,
"loss": 1.6474,
"step": 11560
},
{
"epoch": 0.7,
"learning_rate": 2.3984985166797844e-05,
"loss": 1.6556,
"step": 11580
},
{
"epoch": 0.7,
"learning_rate": 2.388811527517104e-05,
"loss": 1.574,
"step": 11600
},
{
"epoch": 0.7,
"learning_rate": 2.379124538354423e-05,
"loss": 1.6143,
"step": 11620
},
{
"epoch": 0.7,
"learning_rate": 2.369437549191742e-05,
"loss": 1.5776,
"step": 11640
},
{
"epoch": 0.71,
"learning_rate": 2.3597505600290613e-05,
"loss": 1.6133,
"step": 11660
},
{
"epoch": 0.71,
"learning_rate": 2.3500635708663802e-05,
"loss": 1.5879,
"step": 11680
},
{
"epoch": 0.71,
"learning_rate": 2.3403765817036992e-05,
"loss": 1.6376,
"step": 11700
},
{
"epoch": 0.71,
"learning_rate": 2.330689592541019e-05,
"loss": 1.643,
"step": 11720
},
{
"epoch": 0.71,
"learning_rate": 2.3210026033783378e-05,
"loss": 1.6301,
"step": 11740
},
{
"epoch": 0.71,
"learning_rate": 2.3113156142156568e-05,
"loss": 1.6139,
"step": 11760
},
{
"epoch": 0.71,
"learning_rate": 2.3016286250529757e-05,
"loss": 1.6043,
"step": 11780
},
{
"epoch": 0.71,
"learning_rate": 2.2919416358902947e-05,
"loss": 1.6527,
"step": 11800
},
{
"epoch": 0.72,
"learning_rate": 2.2822546467276143e-05,
"loss": 1.6155,
"step": 11820
},
{
"epoch": 0.72,
"learning_rate": 2.2725676575649333e-05,
"loss": 1.6381,
"step": 11840
},
{
"epoch": 0.72,
"learning_rate": 2.2628806684022523e-05,
"loss": 1.6296,
"step": 11860
},
{
"epoch": 0.72,
"learning_rate": 2.2531936792395716e-05,
"loss": 1.6015,
"step": 11880
},
{
"epoch": 0.72,
"learning_rate": 2.2435066900768906e-05,
"loss": 1.6142,
"step": 11900
},
{
"epoch": 0.72,
"learning_rate": 2.23381970091421e-05,
"loss": 1.6117,
"step": 11920
},
{
"epoch": 0.72,
"learning_rate": 2.224132711751529e-05,
"loss": 1.6364,
"step": 11940
},
{
"epoch": 0.72,
"learning_rate": 2.214445722588848e-05,
"loss": 1.6433,
"step": 11960
},
{
"epoch": 0.73,
"learning_rate": 2.204758733426167e-05,
"loss": 1.6061,
"step": 11980
},
{
"epoch": 0.73,
"learning_rate": 2.195071744263486e-05,
"loss": 1.635,
"step": 12000
},
{
"epoch": 0.73,
"learning_rate": 2.1853847551008057e-05,
"loss": 1.6139,
"step": 12020
},
{
"epoch": 0.73,
"learning_rate": 2.1756977659381247e-05,
"loss": 1.5924,
"step": 12040
},
{
"epoch": 0.73,
"learning_rate": 2.1660107767754436e-05,
"loss": 1.6072,
"step": 12060
},
{
"epoch": 0.73,
"learning_rate": 2.1563237876127626e-05,
"loss": 1.6365,
"step": 12080
},
{
"epoch": 0.73,
"learning_rate": 2.146636798450082e-05,
"loss": 1.623,
"step": 12100
},
{
"epoch": 0.73,
"learning_rate": 2.1369498092874012e-05,
"loss": 1.5519,
"step": 12120
},
{
"epoch": 0.74,
"learning_rate": 2.1272628201247202e-05,
"loss": 1.6497,
"step": 12140
},
{
"epoch": 0.74,
"learning_rate": 2.1175758309620395e-05,
"loss": 1.584,
"step": 12160
},
{
"epoch": 0.74,
"learning_rate": 2.1078888417993585e-05,
"loss": 1.6577,
"step": 12180
},
{
"epoch": 0.74,
"learning_rate": 2.0982018526366774e-05,
"loss": 1.6174,
"step": 12200
},
{
"epoch": 0.74,
"learning_rate": 2.0885148634739964e-05,
"loss": 1.6349,
"step": 12220
},
{
"epoch": 0.74,
"learning_rate": 2.078827874311316e-05,
"loss": 1.6145,
"step": 12240
},
{
"epoch": 0.74,
"learning_rate": 2.069140885148635e-05,
"loss": 1.6049,
"step": 12260
},
{
"epoch": 0.74,
"learning_rate": 2.059453895985954e-05,
"loss": 1.6141,
"step": 12280
},
{
"epoch": 0.74,
"learning_rate": 2.049766906823273e-05,
"loss": 1.5754,
"step": 12300
},
{
"epoch": 0.75,
"learning_rate": 2.0400799176605922e-05,
"loss": 1.6248,
"step": 12320
},
{
"epoch": 0.75,
"learning_rate": 2.0303929284979115e-05,
"loss": 1.6399,
"step": 12340
},
{
"epoch": 0.75,
"learning_rate": 2.0207059393352305e-05,
"loss": 1.6479,
"step": 12360
},
{
"epoch": 0.75,
"learning_rate": 2.0110189501725498e-05,
"loss": 1.6281,
"step": 12380
},
{
"epoch": 0.75,
"learning_rate": 2.0013319610098688e-05,
"loss": 1.6421,
"step": 12400
},
{
"epoch": 0.75,
"learning_rate": 1.991644971847188e-05,
"loss": 1.6336,
"step": 12420
},
{
"epoch": 0.75,
"learning_rate": 1.981957982684507e-05,
"loss": 1.6385,
"step": 12440
},
{
"epoch": 0.75,
"learning_rate": 1.972270993521826e-05,
"loss": 1.6422,
"step": 12460
},
{
"epoch": 0.76,
"learning_rate": 1.9625840043591453e-05,
"loss": 1.6494,
"step": 12480
},
{
"epoch": 0.76,
"learning_rate": 1.9528970151964643e-05,
"loss": 1.6346,
"step": 12500
},
{
"epoch": 0.76,
"learning_rate": 1.9441787249500516e-05,
"loss": 1.6043,
"step": 12520
},
{
"epoch": 0.76,
"learning_rate": 1.9344917357873705e-05,
"loss": 1.6199,
"step": 12540
},
{
"epoch": 0.76,
"learning_rate": 1.92480474662469e-05,
"loss": 1.6253,
"step": 12560
},
{
"epoch": 0.76,
"learning_rate": 1.915117757462009e-05,
"loss": 1.5972,
"step": 12580
},
{
"epoch": 0.76,
"learning_rate": 1.905430768299328e-05,
"loss": 1.6235,
"step": 12600
},
{
"epoch": 0.76,
"learning_rate": 1.8957437791366474e-05,
"loss": 1.6214,
"step": 12620
},
{
"epoch": 0.77,
"learning_rate": 1.8860567899739664e-05,
"loss": 1.6023,
"step": 12640
},
{
"epoch": 0.77,
"learning_rate": 1.8763698008112854e-05,
"loss": 1.6415,
"step": 12660
},
{
"epoch": 0.77,
"learning_rate": 1.8666828116486047e-05,
"loss": 1.626,
"step": 12680
},
{
"epoch": 0.77,
"learning_rate": 1.8569958224859236e-05,
"loss": 1.6269,
"step": 12700
},
{
"epoch": 0.77,
"learning_rate": 1.847308833323243e-05,
"loss": 1.6438,
"step": 12720
},
{
"epoch": 0.77,
"learning_rate": 1.837621844160562e-05,
"loss": 1.6215,
"step": 12740
},
{
"epoch": 0.77,
"learning_rate": 1.827934854997881e-05,
"loss": 1.6225,
"step": 12760
},
{
"epoch": 0.77,
"learning_rate": 1.8182478658352002e-05,
"loss": 1.6084,
"step": 12780
},
{
"epoch": 0.77,
"learning_rate": 1.8085608766725195e-05,
"loss": 1.6473,
"step": 12800
},
{
"epoch": 0.78,
"learning_rate": 1.7988738875098384e-05,
"loss": 1.6381,
"step": 12820
},
{
"epoch": 0.78,
"learning_rate": 1.7891868983471578e-05,
"loss": 1.6372,
"step": 12840
},
{
"epoch": 0.78,
"learning_rate": 1.7794999091844767e-05,
"loss": 1.5853,
"step": 12860
},
{
"epoch": 0.78,
"learning_rate": 1.769812920021796e-05,
"loss": 1.6291,
"step": 12880
},
{
"epoch": 0.78,
"learning_rate": 1.760125930859115e-05,
"loss": 1.5902,
"step": 12900
},
{
"epoch": 0.78,
"learning_rate": 1.750438941696434e-05,
"loss": 1.6082,
"step": 12920
},
{
"epoch": 0.78,
"learning_rate": 1.7407519525337533e-05,
"loss": 1.6108,
"step": 12940
},
{
"epoch": 0.78,
"learning_rate": 1.7310649633710722e-05,
"loss": 1.5951,
"step": 12960
},
{
"epoch": 0.79,
"learning_rate": 1.7213779742083915e-05,
"loss": 1.618,
"step": 12980
},
{
"epoch": 0.79,
"learning_rate": 1.7116909850457105e-05,
"loss": 1.6123,
"step": 13000
},
{
"epoch": 0.79,
"learning_rate": 1.7020039958830298e-05,
"loss": 1.6595,
"step": 13020
},
{
"epoch": 0.79,
"learning_rate": 1.6923170067203488e-05,
"loss": 1.6186,
"step": 13040
},
{
"epoch": 0.79,
"learning_rate": 1.682630017557668e-05,
"loss": 1.5884,
"step": 13060
},
{
"epoch": 0.79,
"learning_rate": 1.672943028394987e-05,
"loss": 1.6596,
"step": 13080
},
{
"epoch": 0.79,
"learning_rate": 1.6632560392323064e-05,
"loss": 1.6358,
"step": 13100
},
{
"epoch": 0.79,
"learning_rate": 1.6535690500696253e-05,
"loss": 1.5966,
"step": 13120
},
{
"epoch": 0.8,
"learning_rate": 1.6438820609069446e-05,
"loss": 1.6002,
"step": 13140
},
{
"epoch": 0.8,
"learning_rate": 1.6341950717442636e-05,
"loss": 1.6349,
"step": 13160
},
{
"epoch": 0.8,
"learning_rate": 1.6245080825815826e-05,
"loss": 1.633,
"step": 13180
},
{
"epoch": 0.8,
"learning_rate": 1.614821093418902e-05,
"loss": 1.62,
"step": 13200
},
{
"epoch": 0.8,
"learning_rate": 1.605134104256221e-05,
"loss": 1.6009,
"step": 13220
},
{
"epoch": 0.8,
"learning_rate": 1.59544711509354e-05,
"loss": 1.6092,
"step": 13240
},
{
"epoch": 0.8,
"learning_rate": 1.585760125930859e-05,
"loss": 1.6431,
"step": 13260
},
{
"epoch": 0.8,
"learning_rate": 1.5760731367681784e-05,
"loss": 1.6354,
"step": 13280
},
{
"epoch": 0.81,
"learning_rate": 1.5663861476054977e-05,
"loss": 1.6481,
"step": 13300
},
{
"epoch": 0.81,
"learning_rate": 1.5566991584428167e-05,
"loss": 1.615,
"step": 13320
},
{
"epoch": 0.81,
"learning_rate": 1.5470121692801357e-05,
"loss": 1.6221,
"step": 13340
},
{
"epoch": 0.81,
"learning_rate": 1.537325180117455e-05,
"loss": 1.6417,
"step": 13360
},
{
"epoch": 0.81,
"learning_rate": 1.527638190954774e-05,
"loss": 1.6291,
"step": 13380
},
{
"epoch": 0.81,
"learning_rate": 1.5179512017920932e-05,
"loss": 1.6075,
"step": 13400
},
{
"epoch": 0.81,
"learning_rate": 1.5082642126294122e-05,
"loss": 1.6505,
"step": 13420
},
{
"epoch": 0.81,
"learning_rate": 1.4985772234667313e-05,
"loss": 1.6026,
"step": 13440
},
{
"epoch": 0.81,
"learning_rate": 1.4888902343040505e-05,
"loss": 1.6148,
"step": 13460
},
{
"epoch": 0.82,
"learning_rate": 1.4792032451413696e-05,
"loss": 1.6282,
"step": 13480
},
{
"epoch": 0.82,
"learning_rate": 1.4695162559786889e-05,
"loss": 1.6298,
"step": 13500
},
{
"epoch": 0.82,
"learning_rate": 1.460797965732276e-05,
"loss": 1.5895,
"step": 13520
},
{
"epoch": 0.82,
"learning_rate": 1.451110976569595e-05,
"loss": 1.6124,
"step": 13540
},
{
"epoch": 0.82,
"learning_rate": 1.4414239874069143e-05,
"loss": 1.6071,
"step": 13560
},
{
"epoch": 0.82,
"learning_rate": 1.4317369982442333e-05,
"loss": 1.6016,
"step": 13580
},
{
"epoch": 0.82,
"learning_rate": 1.4220500090815526e-05,
"loss": 1.6199,
"step": 13600
},
{
"epoch": 0.82,
"learning_rate": 1.4123630199188715e-05,
"loss": 1.6594,
"step": 13620
},
{
"epoch": 0.83,
"learning_rate": 1.4026760307561907e-05,
"loss": 1.5613,
"step": 13640
},
{
"epoch": 0.83,
"learning_rate": 1.39298904159351e-05,
"loss": 1.6392,
"step": 13660
},
{
"epoch": 0.83,
"learning_rate": 1.383302052430829e-05,
"loss": 1.6183,
"step": 13680
},
{
"epoch": 0.83,
"learning_rate": 1.3736150632681479e-05,
"loss": 1.622,
"step": 13700
},
{
"epoch": 0.83,
"learning_rate": 1.3639280741054672e-05,
"loss": 1.5837,
"step": 13720
},
{
"epoch": 0.83,
"learning_rate": 1.3542410849427863e-05,
"loss": 1.6705,
"step": 13740
},
{
"epoch": 0.83,
"learning_rate": 1.3445540957801055e-05,
"loss": 1.6064,
"step": 13760
},
{
"epoch": 0.83,
"learning_rate": 1.3348671066174246e-05,
"loss": 1.6101,
"step": 13780
},
{
"epoch": 0.84,
"learning_rate": 1.3251801174547436e-05,
"loss": 1.6061,
"step": 13800
},
{
"epoch": 0.84,
"learning_rate": 1.3154931282920629e-05,
"loss": 1.6196,
"step": 13820
},
{
"epoch": 0.84,
"learning_rate": 1.3058061391293819e-05,
"loss": 1.61,
"step": 13840
},
{
"epoch": 0.84,
"learning_rate": 1.2961191499667012e-05,
"loss": 1.592,
"step": 13860
},
{
"epoch": 0.84,
"learning_rate": 1.2864321608040203e-05,
"loss": 1.6764,
"step": 13880
},
{
"epoch": 0.84,
"learning_rate": 1.2767451716413393e-05,
"loss": 1.6044,
"step": 13900
},
{
"epoch": 0.84,
"learning_rate": 1.2670581824786586e-05,
"loss": 1.6059,
"step": 13920
},
{
"epoch": 0.84,
"learning_rate": 1.2573711933159775e-05,
"loss": 1.6053,
"step": 13940
},
{
"epoch": 0.85,
"learning_rate": 1.2476842041532967e-05,
"loss": 1.6275,
"step": 13960
},
{
"epoch": 0.85,
"learning_rate": 1.2384815644487498e-05,
"loss": 1.6191,
"step": 13980
},
{
"epoch": 0.85,
"learning_rate": 1.2287945752860691e-05,
"loss": 1.5719,
"step": 14000
},
{
"epoch": 0.85,
"learning_rate": 1.219107586123388e-05,
"loss": 1.6111,
"step": 14020
},
{
"epoch": 0.85,
"learning_rate": 1.2094205969607074e-05,
"loss": 1.6068,
"step": 14040
},
{
"epoch": 0.85,
"learning_rate": 1.1997336077980263e-05,
"loss": 1.6219,
"step": 14060
},
{
"epoch": 0.85,
"learning_rate": 1.1900466186353455e-05,
"loss": 1.5711,
"step": 14080
},
{
"epoch": 0.85,
"learning_rate": 1.1803596294726646e-05,
"loss": 1.618,
"step": 14100
},
{
"epoch": 0.85,
"learning_rate": 1.1706726403099837e-05,
"loss": 1.591,
"step": 14120
},
{
"epoch": 0.86,
"learning_rate": 1.1609856511473027e-05,
"loss": 1.6204,
"step": 14140
},
{
"epoch": 0.86,
"learning_rate": 1.151298661984622e-05,
"loss": 1.5756,
"step": 14160
},
{
"epoch": 0.86,
"learning_rate": 1.1416116728219412e-05,
"loss": 1.6675,
"step": 14180
},
{
"epoch": 0.86,
"learning_rate": 1.1319246836592603e-05,
"loss": 1.6211,
"step": 14200
},
{
"epoch": 0.86,
"learning_rate": 1.1222376944965794e-05,
"loss": 1.59,
"step": 14220
},
{
"epoch": 0.86,
"learning_rate": 1.1125507053338984e-05,
"loss": 1.603,
"step": 14240
},
{
"epoch": 0.86,
"learning_rate": 1.1028637161712177e-05,
"loss": 1.6458,
"step": 14260
},
{
"epoch": 0.86,
"learning_rate": 1.0931767270085367e-05,
"loss": 1.6501,
"step": 14280
},
{
"epoch": 0.87,
"learning_rate": 1.083489737845856e-05,
"loss": 1.6259,
"step": 14300
},
{
"epoch": 0.87,
"learning_rate": 1.073802748683175e-05,
"loss": 1.6043,
"step": 14320
},
{
"epoch": 0.87,
"learning_rate": 1.064115759520494e-05,
"loss": 1.6215,
"step": 14340
},
{
"epoch": 0.87,
"learning_rate": 1.0544287703578134e-05,
"loss": 1.629,
"step": 14360
},
{
"epoch": 0.87,
"learning_rate": 1.0447417811951323e-05,
"loss": 1.5798,
"step": 14380
},
{
"epoch": 0.87,
"learning_rate": 1.0350547920324515e-05,
"loss": 1.6105,
"step": 14400
},
{
"epoch": 0.87,
"learning_rate": 1.0253678028697706e-05,
"loss": 1.6394,
"step": 14420
},
{
"epoch": 0.87,
"learning_rate": 1.0156808137070898e-05,
"loss": 1.6194,
"step": 14440
},
{
"epoch": 0.88,
"learning_rate": 1.0059938245444089e-05,
"loss": 1.613,
"step": 14460
},
{
"epoch": 0.88,
"learning_rate": 9.96306835381728e-06,
"loss": 1.5793,
"step": 14480
},
{
"epoch": 0.88,
"learning_rate": 9.866198462190472e-06,
"loss": 1.6265,
"step": 14500
},
{
"epoch": 0.88,
"learning_rate": 9.769328570563661e-06,
"loss": 1.608,
"step": 14520
},
{
"epoch": 0.88,
"learning_rate": 9.672458678936853e-06,
"loss": 1.6366,
"step": 14540
},
{
"epoch": 0.88,
"learning_rate": 9.575588787310046e-06,
"loss": 1.6604,
"step": 14560
},
{
"epoch": 0.88,
"learning_rate": 9.478718895683237e-06,
"loss": 1.6664,
"step": 14580
},
{
"epoch": 0.88,
"learning_rate": 9.381849004056427e-06,
"loss": 1.6276,
"step": 14600
},
{
"epoch": 0.89,
"learning_rate": 9.284979112429618e-06,
"loss": 1.5965,
"step": 14620
},
{
"epoch": 0.89,
"learning_rate": 9.18810922080281e-06,
"loss": 1.6168,
"step": 14640
},
{
"epoch": 0.89,
"learning_rate": 9.091239329176001e-06,
"loss": 1.6102,
"step": 14660
},
{
"epoch": 0.89,
"learning_rate": 8.994369437549192e-06,
"loss": 1.6648,
"step": 14680
},
{
"epoch": 0.89,
"learning_rate": 8.897499545922384e-06,
"loss": 1.6623,
"step": 14700
},
{
"epoch": 0.89,
"learning_rate": 8.800629654295575e-06,
"loss": 1.6088,
"step": 14720
},
{
"epoch": 0.89,
"learning_rate": 8.703759762668766e-06,
"loss": 1.6608,
"step": 14740
},
{
"epoch": 0.89,
"learning_rate": 8.606889871041958e-06,
"loss": 1.6145,
"step": 14760
},
{
"epoch": 0.89,
"learning_rate": 8.510019979415149e-06,
"loss": 1.6249,
"step": 14780
},
{
"epoch": 0.9,
"learning_rate": 8.41315008778834e-06,
"loss": 1.6214,
"step": 14800
},
{
"epoch": 0.9,
"learning_rate": 8.316280196161532e-06,
"loss": 1.6216,
"step": 14820
},
{
"epoch": 0.9,
"learning_rate": 8.219410304534723e-06,
"loss": 1.6071,
"step": 14840
},
{
"epoch": 0.9,
"learning_rate": 8.122540412907913e-06,
"loss": 1.6488,
"step": 14860
},
{
"epoch": 0.9,
"learning_rate": 8.025670521281104e-06,
"loss": 1.6402,
"step": 14880
},
{
"epoch": 0.9,
"learning_rate": 7.928800629654296e-06,
"loss": 1.6431,
"step": 14900
},
{
"epoch": 0.9,
"learning_rate": 7.831930738027489e-06,
"loss": 1.6442,
"step": 14920
},
{
"epoch": 0.9,
"learning_rate": 7.735060846400678e-06,
"loss": 1.5912,
"step": 14940
},
{
"epoch": 0.91,
"learning_rate": 7.63819095477387e-06,
"loss": 1.6202,
"step": 14960
},
{
"epoch": 0.91,
"learning_rate": 7.541321063147061e-06,
"loss": 1.5843,
"step": 14980
},
{
"epoch": 0.91,
"learning_rate": 7.444451171520252e-06,
"loss": 1.6148,
"step": 15000
},
{
"epoch": 0.91,
"learning_rate": 7.3475812798934446e-06,
"loss": 1.577,
"step": 15020
},
{
"epoch": 0.91,
"learning_rate": 7.250711388266635e-06,
"loss": 1.6337,
"step": 15040
},
{
"epoch": 0.91,
"learning_rate": 7.1538414966398264e-06,
"loss": 1.5959,
"step": 15060
},
{
"epoch": 0.91,
"learning_rate": 7.056971605013018e-06,
"loss": 1.5869,
"step": 15080
},
{
"epoch": 0.91,
"learning_rate": 6.960101713386209e-06,
"loss": 1.6214,
"step": 15100
},
{
"epoch": 0.92,
"learning_rate": 6.8632318217594e-06,
"loss": 1.6185,
"step": 15120
},
{
"epoch": 0.92,
"learning_rate": 6.766361930132591e-06,
"loss": 1.6571,
"step": 15140
},
{
"epoch": 0.92,
"learning_rate": 6.669492038505782e-06,
"loss": 1.603,
"step": 15160
},
{
"epoch": 0.92,
"learning_rate": 6.572622146878974e-06,
"loss": 1.6044,
"step": 15180
},
{
"epoch": 0.92,
"learning_rate": 6.475752255252164e-06,
"loss": 1.6188,
"step": 15200
},
{
"epoch": 0.92,
"learning_rate": 6.3788823636253565e-06,
"loss": 1.6317,
"step": 15220
},
{
"epoch": 0.92,
"learning_rate": 6.282012471998548e-06,
"loss": 1.6314,
"step": 15240
},
{
"epoch": 0.92,
"learning_rate": 6.185142580371739e-06,
"loss": 1.6427,
"step": 15260
},
{
"epoch": 0.93,
"learning_rate": 6.08827268874493e-06,
"loss": 1.583,
"step": 15280
},
{
"epoch": 0.93,
"learning_rate": 5.991402797118121e-06,
"loss": 1.6431,
"step": 15300
},
{
"epoch": 0.93,
"learning_rate": 5.8945329054913125e-06,
"loss": 1.6482,
"step": 15320
},
{
"epoch": 0.93,
"learning_rate": 5.797663013864504e-06,
"loss": 1.6195,
"step": 15340
},
{
"epoch": 0.93,
"learning_rate": 5.700793122237694e-06,
"loss": 1.6567,
"step": 15360
},
{
"epoch": 0.93,
"learning_rate": 5.603923230610886e-06,
"loss": 1.6008,
"step": 15380
},
{
"epoch": 0.93,
"learning_rate": 5.507053338984077e-06,
"loss": 1.5918,
"step": 15400
},
{
"epoch": 0.93,
"learning_rate": 5.410183447357269e-06,
"loss": 1.5933,
"step": 15420
},
{
"epoch": 0.93,
"learning_rate": 5.313313555730461e-06,
"loss": 1.6406,
"step": 15440
},
{
"epoch": 0.94,
"learning_rate": 5.216443664103651e-06,
"loss": 1.6311,
"step": 15460
},
{
"epoch": 0.94,
"learning_rate": 5.1195737724768425e-06,
"loss": 1.5971,
"step": 15480
},
{
"epoch": 0.94,
"learning_rate": 5.022703880850034e-06,
"loss": 1.6802,
"step": 15500
},
{
"epoch": 0.94,
"learning_rate": 4.925833989223225e-06,
"loss": 1.6229,
"step": 15520
},
{
"epoch": 0.94,
"learning_rate": 4.828964097596417e-06,
"loss": 1.6126,
"step": 15540
},
{
"epoch": 0.94,
"learning_rate": 4.732094205969607e-06,
"loss": 1.6453,
"step": 15560
},
{
"epoch": 0.94,
"learning_rate": 4.6352243143427985e-06,
"loss": 1.6218,
"step": 15580
},
{
"epoch": 0.94,
"learning_rate": 4.53835442271599e-06,
"loss": 1.6083,
"step": 15600
},
{
"epoch": 0.95,
"learning_rate": 4.441484531089181e-06,
"loss": 1.6494,
"step": 15620
},
{
"epoch": 0.95,
"learning_rate": 4.3446146394623726e-06,
"loss": 1.5942,
"step": 15640
},
{
"epoch": 0.95,
"learning_rate": 4.247744747835564e-06,
"loss": 1.6456,
"step": 15660
},
{
"epoch": 0.95,
"learning_rate": 4.1508748562087544e-06,
"loss": 1.6403,
"step": 15680
},
{
"epoch": 0.95,
"learning_rate": 4.054004964581947e-06,
"loss": 1.6275,
"step": 15700
},
{
"epoch": 0.95,
"learning_rate": 3.957135072955137e-06,
"loss": 1.6257,
"step": 15720
},
{
"epoch": 0.95,
"learning_rate": 3.8602651813283285e-06,
"loss": 1.6009,
"step": 15740
},
{
"epoch": 0.95,
"learning_rate": 3.76339528970152e-06,
"loss": 1.6072,
"step": 15760
},
{
"epoch": 0.96,
"learning_rate": 3.6665253980747112e-06,
"loss": 1.629,
"step": 15780
},
{
"epoch": 0.96,
"learning_rate": 3.5696555064479026e-06,
"loss": 1.6552,
"step": 15800
},
{
"epoch": 0.96,
"learning_rate": 3.4727856148210936e-06,
"loss": 1.6031,
"step": 15820
},
{
"epoch": 0.96,
"learning_rate": 3.3759157231942853e-06,
"loss": 1.6107,
"step": 15840
},
{
"epoch": 0.96,
"learning_rate": 3.2790458315674763e-06,
"loss": 1.6204,
"step": 15860
},
{
"epoch": 0.96,
"learning_rate": 3.1821759399406676e-06,
"loss": 1.6136,
"step": 15880
},
{
"epoch": 0.96,
"learning_rate": 3.0853060483138586e-06,
"loss": 1.6245,
"step": 15900
},
{
"epoch": 0.96,
"learning_rate": 2.98843615668705e-06,
"loss": 1.5918,
"step": 15920
},
{
"epoch": 0.97,
"learning_rate": 2.891566265060241e-06,
"loss": 1.6492,
"step": 15940
},
{
"epoch": 0.97,
"learning_rate": 2.7946963734334327e-06,
"loss": 1.6135,
"step": 15960
},
{
"epoch": 0.97,
"learning_rate": 2.7075134709693046e-06,
"loss": 1.6236,
"step": 15980
},
{
"epoch": 0.97,
"learning_rate": 2.6106435793424955e-06,
"loss": 1.6056,
"step": 16000
},
{
"epoch": 0.97,
"learning_rate": 2.5137736877156873e-06,
"loss": 1.6172,
"step": 16020
},
{
"epoch": 0.97,
"learning_rate": 2.4169037960888782e-06,
"loss": 1.6262,
"step": 16040
},
{
"epoch": 0.97,
"learning_rate": 2.3200339044620696e-06,
"loss": 1.6332,
"step": 16060
},
{
"epoch": 0.97,
"learning_rate": 2.223164012835261e-06,
"loss": 1.5828,
"step": 16080
},
{
"epoch": 0.97,
"learning_rate": 2.126294121208452e-06,
"loss": 1.5993,
"step": 16100
},
{
"epoch": 0.98,
"learning_rate": 2.0294242295816432e-06,
"loss": 1.6222,
"step": 16120
},
{
"epoch": 0.98,
"learning_rate": 1.9325543379548346e-06,
"loss": 1.5744,
"step": 16140
},
{
"epoch": 0.98,
"learning_rate": 1.8356844463280258e-06,
"loss": 1.5746,
"step": 16160
},
{
"epoch": 0.98,
"learning_rate": 1.738814554701217e-06,
"loss": 1.6224,
"step": 16180
},
{
"epoch": 0.98,
"learning_rate": 1.6419446630744083e-06,
"loss": 1.6085,
"step": 16200
},
{
"epoch": 0.98,
"learning_rate": 1.5450747714475994e-06,
"loss": 1.6125,
"step": 16220
},
{
"epoch": 0.98,
"learning_rate": 1.448204879820791e-06,
"loss": 1.6118,
"step": 16240
},
{
"epoch": 0.98,
"learning_rate": 1.3513349881939821e-06,
"loss": 1.6274,
"step": 16260
},
{
"epoch": 0.99,
"learning_rate": 1.2544650965671735e-06,
"loss": 1.6516,
"step": 16280
},
{
"epoch": 0.99,
"learning_rate": 1.1575952049403644e-06,
"loss": 1.5913,
"step": 16300
},
{
"epoch": 0.99,
"learning_rate": 1.0607253133135558e-06,
"loss": 1.6084,
"step": 16320
},
{
"epoch": 0.99,
"learning_rate": 9.638554216867472e-07,
"loss": 1.6105,
"step": 16340
},
{
"epoch": 0.99,
"learning_rate": 8.669855300599383e-07,
"loss": 1.5631,
"step": 16360
},
{
"epoch": 0.99,
"learning_rate": 7.701156384331296e-07,
"loss": 1.592,
"step": 16380
},
{
"epoch": 0.99,
"learning_rate": 6.732457468063208e-07,
"loss": 1.5951,
"step": 16400
},
{
"epoch": 0.99,
"learning_rate": 5.763758551795121e-07,
"loss": 1.6118,
"step": 16420
},
{
"epoch": 1.0,
"learning_rate": 4.795059635527032e-07,
"loss": 1.5745,
"step": 16440
},
{
"epoch": 1.0,
"learning_rate": 3.826360719258946e-07,
"loss": 1.6601,
"step": 16460
},
{
"epoch": 1.0,
"learning_rate": 2.857661802990858e-07,
"loss": 1.6232,
"step": 16480
},
{
"epoch": 1.0,
"learning_rate": 1.8889628867227707e-07,
"loss": 1.5696,
"step": 16500
},
{
"epoch": 1.0,
"step": 16517,
"total_flos": 2.161122321010524e+17,
"train_loss": 1.638563478888687,
"train_runtime": 6501.0445,
"train_samples_per_second": 15.244,
"train_steps_per_second": 2.541
}
],
"logging_steps": 20,
"max_steps": 16517,
"num_train_epochs": 1,
"save_steps": 5000,
"total_flos": 2.161122321010524e+17,
"trial_name": null,
"trial_params": null
}