llama-160m / trainer_state.json
JackFram's picture
Upload folder using huggingface_hub
91cf7f2
raw
history blame
371 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9991450102399937,
"global_step": 301500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.9983430431007635e-05,
"loss": 2.8598,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 4.996686086201526e-05,
"loss": 2.8633,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 4.995029129302289e-05,
"loss": 2.8648,
"step": 300
},
{
"epoch": 0.0,
"learning_rate": 4.993372172403052e-05,
"loss": 2.869,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 4.9917152155038145e-05,
"loss": 2.8665,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 4.990058258604578e-05,
"loss": 2.879,
"step": 600
},
{
"epoch": 0.0,
"learning_rate": 4.9884013017053403e-05,
"loss": 2.8641,
"step": 700
},
{
"epoch": 0.0,
"learning_rate": 4.986744344806103e-05,
"loss": 2.861,
"step": 800
},
{
"epoch": 0.0,
"learning_rate": 4.985087387906866e-05,
"loss": 2.8651,
"step": 900
},
{
"epoch": 0.0,
"learning_rate": 4.983430431007629e-05,
"loss": 2.8765,
"step": 1000
},
{
"epoch": 0.0,
"learning_rate": 4.9817734741083913e-05,
"loss": 2.8865,
"step": 1100
},
{
"epoch": 0.0,
"learning_rate": 4.9801165172091546e-05,
"loss": 2.867,
"step": 1200
},
{
"epoch": 0.0,
"learning_rate": 4.978459560309918e-05,
"loss": 2.8801,
"step": 1300
},
{
"epoch": 0.0,
"learning_rate": 4.9768026034106804e-05,
"loss": 2.8792,
"step": 1400
},
{
"epoch": 0.0,
"learning_rate": 4.975145646511443e-05,
"loss": 2.8938,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 4.973488689612206e-05,
"loss": 2.8889,
"step": 1600
},
{
"epoch": 0.01,
"learning_rate": 4.971831732712969e-05,
"loss": 2.8893,
"step": 1700
},
{
"epoch": 0.01,
"learning_rate": 4.970174775813732e-05,
"loss": 2.8893,
"step": 1800
},
{
"epoch": 0.01,
"learning_rate": 4.968517818914495e-05,
"loss": 2.8776,
"step": 1900
},
{
"epoch": 0.01,
"learning_rate": 4.966860862015257e-05,
"loss": 2.8732,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 4.9652039051160206e-05,
"loss": 2.8671,
"step": 2100
},
{
"epoch": 0.01,
"learning_rate": 4.963546948216783e-05,
"loss": 2.8947,
"step": 2200
},
{
"epoch": 0.01,
"learning_rate": 4.961889991317546e-05,
"loss": 2.8639,
"step": 2300
},
{
"epoch": 0.01,
"learning_rate": 4.960233034418309e-05,
"loss": 2.8813,
"step": 2400
},
{
"epoch": 0.01,
"learning_rate": 4.958576077519072e-05,
"loss": 2.8697,
"step": 2500
},
{
"epoch": 0.01,
"learning_rate": 4.956919120619835e-05,
"loss": 2.8819,
"step": 2600
},
{
"epoch": 0.01,
"learning_rate": 4.9552621637205974e-05,
"loss": 2.8677,
"step": 2700
},
{
"epoch": 0.01,
"learning_rate": 4.9536052068213607e-05,
"loss": 2.8836,
"step": 2800
},
{
"epoch": 0.01,
"learning_rate": 4.951948249922123e-05,
"loss": 2.8779,
"step": 2900
},
{
"epoch": 0.01,
"learning_rate": 4.9502912930228865e-05,
"loss": 2.8932,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 4.948634336123649e-05,
"loss": 2.8818,
"step": 3100
},
{
"epoch": 0.01,
"learning_rate": 4.946977379224412e-05,
"loss": 2.8833,
"step": 3200
},
{
"epoch": 0.01,
"learning_rate": 4.945320422325175e-05,
"loss": 2.8793,
"step": 3300
},
{
"epoch": 0.01,
"learning_rate": 4.9436634654259375e-05,
"loss": 2.8781,
"step": 3400
},
{
"epoch": 0.01,
"learning_rate": 4.9420065085267e-05,
"loss": 2.8882,
"step": 3500
},
{
"epoch": 0.01,
"learning_rate": 4.9403495516274633e-05,
"loss": 2.8784,
"step": 3600
},
{
"epoch": 0.01,
"learning_rate": 4.9386925947282266e-05,
"loss": 2.89,
"step": 3700
},
{
"epoch": 0.01,
"learning_rate": 4.937035637828989e-05,
"loss": 2.9005,
"step": 3800
},
{
"epoch": 0.01,
"learning_rate": 4.935378680929752e-05,
"loss": 2.882,
"step": 3900
},
{
"epoch": 0.01,
"learning_rate": 4.933721724030515e-05,
"loss": 2.8975,
"step": 4000
},
{
"epoch": 0.01,
"learning_rate": 4.9320647671312776e-05,
"loss": 2.8952,
"step": 4100
},
{
"epoch": 0.01,
"learning_rate": 4.930407810232041e-05,
"loss": 2.8861,
"step": 4200
},
{
"epoch": 0.01,
"learning_rate": 4.9287508533328035e-05,
"loss": 2.8897,
"step": 4300
},
{
"epoch": 0.01,
"learning_rate": 4.927093896433566e-05,
"loss": 2.8957,
"step": 4400
},
{
"epoch": 0.01,
"learning_rate": 4.925436939534329e-05,
"loss": 2.8876,
"step": 4500
},
{
"epoch": 0.02,
"learning_rate": 4.923779982635092e-05,
"loss": 2.8913,
"step": 4600
},
{
"epoch": 0.02,
"learning_rate": 4.9221230257358545e-05,
"loss": 2.8834,
"step": 4700
},
{
"epoch": 0.02,
"learning_rate": 4.920466068836618e-05,
"loss": 2.8871,
"step": 4800
},
{
"epoch": 0.02,
"learning_rate": 4.91880911193738e-05,
"loss": 2.8789,
"step": 4900
},
{
"epoch": 0.02,
"learning_rate": 4.9171521550381436e-05,
"loss": 2.8819,
"step": 5000
},
{
"epoch": 0.02,
"learning_rate": 4.915495198138906e-05,
"loss": 2.8866,
"step": 5100
},
{
"epoch": 0.02,
"learning_rate": 4.913838241239669e-05,
"loss": 2.8986,
"step": 5200
},
{
"epoch": 0.02,
"learning_rate": 4.912181284340432e-05,
"loss": 2.8782,
"step": 5300
},
{
"epoch": 0.02,
"learning_rate": 4.910524327441195e-05,
"loss": 2.8754,
"step": 5400
},
{
"epoch": 0.02,
"learning_rate": 4.908867370541957e-05,
"loss": 2.8898,
"step": 5500
},
{
"epoch": 0.02,
"learning_rate": 4.9072104136427204e-05,
"loss": 2.9146,
"step": 5600
},
{
"epoch": 0.02,
"learning_rate": 4.905553456743484e-05,
"loss": 2.8713,
"step": 5700
},
{
"epoch": 0.02,
"learning_rate": 4.903896499844246e-05,
"loss": 2.8926,
"step": 5800
},
{
"epoch": 0.02,
"learning_rate": 4.902239542945009e-05,
"loss": 2.8898,
"step": 5900
},
{
"epoch": 0.02,
"learning_rate": 4.900582586045772e-05,
"loss": 2.9037,
"step": 6000
},
{
"epoch": 0.02,
"learning_rate": 4.898925629146535e-05,
"loss": 2.8794,
"step": 6100
},
{
"epoch": 0.02,
"learning_rate": 4.897268672247298e-05,
"loss": 2.8816,
"step": 6200
},
{
"epoch": 0.02,
"learning_rate": 4.8956117153480605e-05,
"loss": 2.8867,
"step": 6300
},
{
"epoch": 0.02,
"learning_rate": 4.893954758448823e-05,
"loss": 2.8907,
"step": 6400
},
{
"epoch": 0.02,
"learning_rate": 4.8922978015495863e-05,
"loss": 2.8834,
"step": 6500
},
{
"epoch": 0.02,
"learning_rate": 4.8906408446503496e-05,
"loss": 2.8797,
"step": 6600
},
{
"epoch": 0.02,
"learning_rate": 4.8889838877511115e-05,
"loss": 2.883,
"step": 6700
},
{
"epoch": 0.02,
"learning_rate": 4.887326930851875e-05,
"loss": 2.8634,
"step": 6800
},
{
"epoch": 0.02,
"learning_rate": 4.885669973952638e-05,
"loss": 2.8735,
"step": 6900
},
{
"epoch": 0.02,
"learning_rate": 4.8840130170534006e-05,
"loss": 2.8793,
"step": 7000
},
{
"epoch": 0.02,
"learning_rate": 4.882356060154163e-05,
"loss": 2.8909,
"step": 7100
},
{
"epoch": 0.02,
"learning_rate": 4.8806991032549265e-05,
"loss": 2.8971,
"step": 7200
},
{
"epoch": 0.02,
"learning_rate": 4.879042146355689e-05,
"loss": 2.8824,
"step": 7300
},
{
"epoch": 0.02,
"learning_rate": 4.877385189456452e-05,
"loss": 2.8769,
"step": 7400
},
{
"epoch": 0.02,
"learning_rate": 4.875728232557215e-05,
"loss": 2.8758,
"step": 7500
},
{
"epoch": 0.03,
"learning_rate": 4.8740712756579775e-05,
"loss": 2.8865,
"step": 7600
},
{
"epoch": 0.03,
"learning_rate": 4.872414318758741e-05,
"loss": 2.8928,
"step": 7700
},
{
"epoch": 0.03,
"learning_rate": 4.870757361859504e-05,
"loss": 2.8861,
"step": 7800
},
{
"epoch": 0.03,
"learning_rate": 4.869100404960266e-05,
"loss": 2.8862,
"step": 7900
},
{
"epoch": 0.03,
"learning_rate": 4.867443448061029e-05,
"loss": 2.9014,
"step": 8000
},
{
"epoch": 0.03,
"learning_rate": 4.8657864911617924e-05,
"loss": 2.8986,
"step": 8100
},
{
"epoch": 0.03,
"learning_rate": 4.864129534262555e-05,
"loss": 2.8918,
"step": 8200
},
{
"epoch": 0.03,
"learning_rate": 4.8624725773633176e-05,
"loss": 2.9018,
"step": 8300
},
{
"epoch": 0.03,
"learning_rate": 4.860815620464081e-05,
"loss": 2.8817,
"step": 8400
},
{
"epoch": 0.03,
"learning_rate": 4.8591586635648434e-05,
"loss": 2.8922,
"step": 8500
},
{
"epoch": 0.03,
"learning_rate": 4.857501706665607e-05,
"loss": 2.8831,
"step": 8600
},
{
"epoch": 0.03,
"learning_rate": 4.855844749766369e-05,
"loss": 2.8727,
"step": 8700
},
{
"epoch": 0.03,
"learning_rate": 4.854187792867132e-05,
"loss": 2.8897,
"step": 8800
},
{
"epoch": 0.03,
"learning_rate": 4.852530835967895e-05,
"loss": 2.8857,
"step": 8900
},
{
"epoch": 0.03,
"learning_rate": 4.8508738790686583e-05,
"loss": 2.8748,
"step": 9000
},
{
"epoch": 0.03,
"learning_rate": 4.84921692216942e-05,
"loss": 2.8772,
"step": 9100
},
{
"epoch": 0.03,
"learning_rate": 4.8475599652701835e-05,
"loss": 2.8752,
"step": 9200
},
{
"epoch": 0.03,
"learning_rate": 4.845903008370947e-05,
"loss": 2.8874,
"step": 9300
},
{
"epoch": 0.03,
"learning_rate": 4.8442460514717094e-05,
"loss": 2.893,
"step": 9400
},
{
"epoch": 0.03,
"learning_rate": 4.842589094572472e-05,
"loss": 2.8855,
"step": 9500
},
{
"epoch": 0.03,
"learning_rate": 4.840932137673235e-05,
"loss": 2.8894,
"step": 9600
},
{
"epoch": 0.03,
"learning_rate": 4.839275180773998e-05,
"loss": 2.8988,
"step": 9700
},
{
"epoch": 0.03,
"learning_rate": 4.837618223874761e-05,
"loss": 2.8821,
"step": 9800
},
{
"epoch": 0.03,
"learning_rate": 4.8359612669755236e-05,
"loss": 2.8968,
"step": 9900
},
{
"epoch": 0.03,
"learning_rate": 4.834304310076286e-05,
"loss": 2.879,
"step": 10000
},
{
"epoch": 0.03,
"learning_rate": 4.8326473531770495e-05,
"loss": 2.8916,
"step": 10100
},
{
"epoch": 0.03,
"learning_rate": 4.830990396277813e-05,
"loss": 2.8793,
"step": 10200
},
{
"epoch": 0.03,
"learning_rate": 4.829333439378575e-05,
"loss": 2.8822,
"step": 10300
},
{
"epoch": 0.03,
"learning_rate": 4.827676482479338e-05,
"loss": 2.8822,
"step": 10400
},
{
"epoch": 0.03,
"learning_rate": 4.826019525580101e-05,
"loss": 2.8899,
"step": 10500
},
{
"epoch": 0.04,
"learning_rate": 4.824362568680864e-05,
"loss": 2.8749,
"step": 10600
},
{
"epoch": 0.04,
"learning_rate": 4.822705611781626e-05,
"loss": 2.8839,
"step": 10700
},
{
"epoch": 0.04,
"learning_rate": 4.8210486548823896e-05,
"loss": 2.8886,
"step": 10800
},
{
"epoch": 0.04,
"learning_rate": 4.819391697983152e-05,
"loss": 2.8839,
"step": 10900
},
{
"epoch": 0.04,
"learning_rate": 4.8177347410839154e-05,
"loss": 2.897,
"step": 11000
},
{
"epoch": 0.04,
"learning_rate": 4.816077784184678e-05,
"loss": 2.875,
"step": 11100
},
{
"epoch": 0.04,
"learning_rate": 4.8144208272854406e-05,
"loss": 2.886,
"step": 11200
},
{
"epoch": 0.04,
"learning_rate": 4.812763870386204e-05,
"loss": 2.889,
"step": 11300
},
{
"epoch": 0.04,
"learning_rate": 4.811106913486967e-05,
"loss": 2.8768,
"step": 11400
},
{
"epoch": 0.04,
"learning_rate": 4.80944995658773e-05,
"loss": 2.8899,
"step": 11500
},
{
"epoch": 0.04,
"learning_rate": 4.807792999688492e-05,
"loss": 2.8758,
"step": 11600
},
{
"epoch": 0.04,
"learning_rate": 4.8061360427892555e-05,
"loss": 2.895,
"step": 11700
},
{
"epoch": 0.04,
"learning_rate": 4.804479085890018e-05,
"loss": 2.8856,
"step": 11800
},
{
"epoch": 0.04,
"learning_rate": 4.802822128990781e-05,
"loss": 2.8841,
"step": 11900
},
{
"epoch": 0.04,
"learning_rate": 4.801165172091544e-05,
"loss": 2.8729,
"step": 12000
},
{
"epoch": 0.04,
"learning_rate": 4.7995082151923065e-05,
"loss": 2.9014,
"step": 12100
},
{
"epoch": 0.04,
"learning_rate": 4.79785125829307e-05,
"loss": 2.8859,
"step": 12200
},
{
"epoch": 0.04,
"learning_rate": 4.7961943013938324e-05,
"loss": 2.8837,
"step": 12300
},
{
"epoch": 0.04,
"learning_rate": 4.794537344494595e-05,
"loss": 2.8781,
"step": 12400
},
{
"epoch": 0.04,
"learning_rate": 4.792880387595358e-05,
"loss": 2.8895,
"step": 12500
},
{
"epoch": 0.04,
"learning_rate": 4.7912234306961215e-05,
"loss": 2.8848,
"step": 12600
},
{
"epoch": 0.04,
"learning_rate": 4.789566473796884e-05,
"loss": 2.8941,
"step": 12700
},
{
"epoch": 0.04,
"learning_rate": 4.7879095168976466e-05,
"loss": 2.8837,
"step": 12800
},
{
"epoch": 0.04,
"learning_rate": 4.78625255999841e-05,
"loss": 2.881,
"step": 12900
},
{
"epoch": 0.04,
"learning_rate": 4.7845956030991725e-05,
"loss": 2.8789,
"step": 13000
},
{
"epoch": 0.04,
"learning_rate": 4.782938646199935e-05,
"loss": 2.8818,
"step": 13100
},
{
"epoch": 0.04,
"learning_rate": 4.7812816893006976e-05,
"loss": 2.871,
"step": 13200
},
{
"epoch": 0.04,
"learning_rate": 4.779624732401461e-05,
"loss": 2.8757,
"step": 13300
},
{
"epoch": 0.04,
"learning_rate": 4.777967775502224e-05,
"loss": 2.8987,
"step": 13400
},
{
"epoch": 0.04,
"learning_rate": 4.776310818602987e-05,
"loss": 2.8781,
"step": 13500
},
{
"epoch": 0.05,
"learning_rate": 4.774653861703749e-05,
"loss": 2.8854,
"step": 13600
},
{
"epoch": 0.05,
"learning_rate": 4.7729969048045126e-05,
"loss": 2.8756,
"step": 13700
},
{
"epoch": 0.05,
"learning_rate": 4.771339947905275e-05,
"loss": 2.8938,
"step": 13800
},
{
"epoch": 0.05,
"learning_rate": 4.7696829910060384e-05,
"loss": 2.8908,
"step": 13900
},
{
"epoch": 0.05,
"learning_rate": 4.768026034106801e-05,
"loss": 2.8843,
"step": 14000
},
{
"epoch": 0.05,
"learning_rate": 4.7663690772075636e-05,
"loss": 2.907,
"step": 14100
},
{
"epoch": 0.05,
"learning_rate": 4.764712120308327e-05,
"loss": 2.8807,
"step": 14200
},
{
"epoch": 0.05,
"learning_rate": 4.76305516340909e-05,
"loss": 2.8979,
"step": 14300
},
{
"epoch": 0.05,
"learning_rate": 4.761398206509852e-05,
"loss": 2.8875,
"step": 14400
},
{
"epoch": 0.05,
"learning_rate": 4.759741249610615e-05,
"loss": 2.8968,
"step": 14500
},
{
"epoch": 0.05,
"learning_rate": 4.7580842927113785e-05,
"loss": 2.8774,
"step": 14600
},
{
"epoch": 0.05,
"learning_rate": 4.756427335812141e-05,
"loss": 2.878,
"step": 14700
},
{
"epoch": 0.05,
"learning_rate": 4.754770378912904e-05,
"loss": 2.8689,
"step": 14800
},
{
"epoch": 0.05,
"learning_rate": 4.753113422013667e-05,
"loss": 2.8798,
"step": 14900
},
{
"epoch": 0.05,
"learning_rate": 4.7514564651144295e-05,
"loss": 2.891,
"step": 15000
},
{
"epoch": 0.05,
"learning_rate": 4.749799508215193e-05,
"loss": 2.8843,
"step": 15100
},
{
"epoch": 0.05,
"learning_rate": 4.7481425513159554e-05,
"loss": 2.886,
"step": 15200
},
{
"epoch": 0.05,
"learning_rate": 4.746485594416718e-05,
"loss": 2.8874,
"step": 15300
},
{
"epoch": 0.05,
"learning_rate": 4.744828637517481e-05,
"loss": 2.8832,
"step": 15400
},
{
"epoch": 0.05,
"learning_rate": 4.7431716806182445e-05,
"loss": 2.8716,
"step": 15500
},
{
"epoch": 0.05,
"learning_rate": 4.7415147237190064e-05,
"loss": 2.8966,
"step": 15600
},
{
"epoch": 0.05,
"learning_rate": 4.7398577668197696e-05,
"loss": 2.8853,
"step": 15700
},
{
"epoch": 0.05,
"learning_rate": 4.738200809920533e-05,
"loss": 2.8801,
"step": 15800
},
{
"epoch": 0.05,
"learning_rate": 4.7365438530212955e-05,
"loss": 2.8825,
"step": 15900
},
{
"epoch": 0.05,
"learning_rate": 4.734886896122058e-05,
"loss": 2.8878,
"step": 16000
},
{
"epoch": 0.05,
"learning_rate": 4.733229939222821e-05,
"loss": 2.8877,
"step": 16100
},
{
"epoch": 0.05,
"learning_rate": 4.731572982323584e-05,
"loss": 2.8964,
"step": 16200
},
{
"epoch": 0.05,
"learning_rate": 4.729916025424347e-05,
"loss": 2.8981,
"step": 16300
},
{
"epoch": 0.05,
"learning_rate": 4.72825906852511e-05,
"loss": 2.8828,
"step": 16400
},
{
"epoch": 0.05,
"learning_rate": 4.726602111625872e-05,
"loss": 2.8739,
"step": 16500
},
{
"epoch": 0.06,
"learning_rate": 4.7249451547266356e-05,
"loss": 2.8852,
"step": 16600
},
{
"epoch": 0.06,
"learning_rate": 4.723288197827399e-05,
"loss": 2.8911,
"step": 16700
},
{
"epoch": 0.06,
"learning_rate": 4.721631240928161e-05,
"loss": 2.8796,
"step": 16800
},
{
"epoch": 0.06,
"learning_rate": 4.719974284028924e-05,
"loss": 2.8707,
"step": 16900
},
{
"epoch": 0.06,
"learning_rate": 4.718317327129687e-05,
"loss": 2.8808,
"step": 17000
},
{
"epoch": 0.06,
"learning_rate": 4.71666037023045e-05,
"loss": 2.8642,
"step": 17100
},
{
"epoch": 0.06,
"learning_rate": 4.7150034133312124e-05,
"loss": 2.8836,
"step": 17200
},
{
"epoch": 0.06,
"learning_rate": 4.713346456431976e-05,
"loss": 2.8791,
"step": 17300
},
{
"epoch": 0.06,
"learning_rate": 4.711689499532738e-05,
"loss": 2.8852,
"step": 17400
},
{
"epoch": 0.06,
"learning_rate": 4.7100325426335015e-05,
"loss": 2.8958,
"step": 17500
},
{
"epoch": 0.06,
"learning_rate": 4.708375585734264e-05,
"loss": 2.8892,
"step": 17600
},
{
"epoch": 0.06,
"learning_rate": 4.706718628835027e-05,
"loss": 2.8727,
"step": 17700
},
{
"epoch": 0.06,
"learning_rate": 4.70506167193579e-05,
"loss": 2.8783,
"step": 17800
},
{
"epoch": 0.06,
"learning_rate": 4.703404715036553e-05,
"loss": 2.881,
"step": 17900
},
{
"epoch": 0.06,
"learning_rate": 4.701747758137315e-05,
"loss": 2.8885,
"step": 18000
},
{
"epoch": 0.06,
"learning_rate": 4.7000908012380784e-05,
"loss": 2.8683,
"step": 18100
},
{
"epoch": 0.06,
"learning_rate": 4.6984338443388416e-05,
"loss": 2.8804,
"step": 18200
},
{
"epoch": 0.06,
"learning_rate": 4.696776887439604e-05,
"loss": 2.8916,
"step": 18300
},
{
"epoch": 0.06,
"learning_rate": 4.695119930540367e-05,
"loss": 2.8911,
"step": 18400
},
{
"epoch": 0.06,
"learning_rate": 4.69346297364113e-05,
"loss": 2.8865,
"step": 18500
},
{
"epoch": 0.06,
"learning_rate": 4.6918060167418926e-05,
"loss": 2.8834,
"step": 18600
},
{
"epoch": 0.06,
"learning_rate": 4.690149059842656e-05,
"loss": 2.891,
"step": 18700
},
{
"epoch": 0.06,
"learning_rate": 4.6884921029434185e-05,
"loss": 2.8755,
"step": 18800
},
{
"epoch": 0.06,
"learning_rate": 4.686835146044181e-05,
"loss": 2.8833,
"step": 18900
},
{
"epoch": 0.06,
"learning_rate": 4.685178189144944e-05,
"loss": 2.8774,
"step": 19000
},
{
"epoch": 0.06,
"learning_rate": 4.6835212322457076e-05,
"loss": 2.8798,
"step": 19100
},
{
"epoch": 0.06,
"learning_rate": 4.6818642753464695e-05,
"loss": 2.8907,
"step": 19200
},
{
"epoch": 0.06,
"learning_rate": 4.680207318447233e-05,
"loss": 2.8808,
"step": 19300
},
{
"epoch": 0.06,
"learning_rate": 4.678550361547996e-05,
"loss": 2.8843,
"step": 19400
},
{
"epoch": 0.06,
"learning_rate": 4.6768934046487586e-05,
"loss": 2.8705,
"step": 19500
},
{
"epoch": 0.06,
"learning_rate": 4.675236447749521e-05,
"loss": 2.8892,
"step": 19600
},
{
"epoch": 0.07,
"learning_rate": 4.6735794908502844e-05,
"loss": 2.8853,
"step": 19700
},
{
"epoch": 0.07,
"learning_rate": 4.671922533951047e-05,
"loss": 2.8858,
"step": 19800
},
{
"epoch": 0.07,
"learning_rate": 4.67026557705181e-05,
"loss": 2.8793,
"step": 19900
},
{
"epoch": 0.07,
"learning_rate": 4.668608620152573e-05,
"loss": 2.8856,
"step": 20000
},
{
"epoch": 0.07,
"learning_rate": 4.6669516632533354e-05,
"loss": 2.8905,
"step": 20100
},
{
"epoch": 0.07,
"learning_rate": 4.665294706354099e-05,
"loss": 2.8834,
"step": 20200
},
{
"epoch": 0.07,
"learning_rate": 4.663637749454862e-05,
"loss": 2.8915,
"step": 20300
},
{
"epoch": 0.07,
"learning_rate": 4.661980792555624e-05,
"loss": 2.876,
"step": 20400
},
{
"epoch": 0.07,
"learning_rate": 4.660323835656387e-05,
"loss": 2.894,
"step": 20500
},
{
"epoch": 0.07,
"learning_rate": 4.6586668787571504e-05,
"loss": 2.8906,
"step": 20600
},
{
"epoch": 0.07,
"learning_rate": 4.657009921857913e-05,
"loss": 2.9034,
"step": 20700
},
{
"epoch": 0.07,
"learning_rate": 4.6553529649586755e-05,
"loss": 2.8833,
"step": 20800
},
{
"epoch": 0.07,
"learning_rate": 4.653696008059439e-05,
"loss": 2.8901,
"step": 20900
},
{
"epoch": 0.07,
"learning_rate": 4.6520390511602014e-05,
"loss": 2.8794,
"step": 21000
},
{
"epoch": 0.07,
"learning_rate": 4.6503820942609646e-05,
"loss": 2.8964,
"step": 21100
},
{
"epoch": 0.07,
"learning_rate": 4.648725137361727e-05,
"loss": 2.8859,
"step": 21200
},
{
"epoch": 0.07,
"learning_rate": 4.64706818046249e-05,
"loss": 2.8827,
"step": 21300
},
{
"epoch": 0.07,
"learning_rate": 4.645411223563253e-05,
"loss": 2.8869,
"step": 21400
},
{
"epoch": 0.07,
"learning_rate": 4.643754266664016e-05,
"loss": 2.8749,
"step": 21500
},
{
"epoch": 0.07,
"learning_rate": 4.642097309764778e-05,
"loss": 2.8716,
"step": 21600
},
{
"epoch": 0.07,
"learning_rate": 4.6404403528655415e-05,
"loss": 2.8852,
"step": 21700
},
{
"epoch": 0.07,
"learning_rate": 4.638783395966305e-05,
"loss": 2.8804,
"step": 21800
},
{
"epoch": 0.07,
"learning_rate": 4.637126439067067e-05,
"loss": 2.8932,
"step": 21900
},
{
"epoch": 0.07,
"learning_rate": 4.63546948216783e-05,
"loss": 2.8824,
"step": 22000
},
{
"epoch": 0.07,
"learning_rate": 4.6338125252685925e-05,
"loss": 2.8857,
"step": 22100
},
{
"epoch": 0.07,
"learning_rate": 4.632155568369356e-05,
"loss": 2.8903,
"step": 22200
},
{
"epoch": 0.07,
"learning_rate": 4.630498611470119e-05,
"loss": 2.8821,
"step": 22300
},
{
"epoch": 0.07,
"learning_rate": 4.6288416545708816e-05,
"loss": 2.8966,
"step": 22400
},
{
"epoch": 0.07,
"learning_rate": 4.627184697671644e-05,
"loss": 2.8814,
"step": 22500
},
{
"epoch": 0.07,
"learning_rate": 4.6255277407724074e-05,
"loss": 2.8921,
"step": 22600
},
{
"epoch": 0.08,
"learning_rate": 4.62387078387317e-05,
"loss": 2.8945,
"step": 22700
},
{
"epoch": 0.08,
"learning_rate": 4.6222138269739326e-05,
"loss": 2.8871,
"step": 22800
},
{
"epoch": 0.08,
"learning_rate": 4.620556870074696e-05,
"loss": 2.8815,
"step": 22900
},
{
"epoch": 0.08,
"learning_rate": 4.6188999131754584e-05,
"loss": 2.8852,
"step": 23000
},
{
"epoch": 0.08,
"learning_rate": 4.617242956276222e-05,
"loss": 2.8682,
"step": 23100
},
{
"epoch": 0.08,
"learning_rate": 4.615585999376984e-05,
"loss": 2.8855,
"step": 23200
},
{
"epoch": 0.08,
"learning_rate": 4.613929042477747e-05,
"loss": 2.8876,
"step": 23300
},
{
"epoch": 0.08,
"learning_rate": 4.61227208557851e-05,
"loss": 2.8727,
"step": 23400
},
{
"epoch": 0.08,
"learning_rate": 4.6106151286792734e-05,
"loss": 2.8845,
"step": 23500
},
{
"epoch": 0.08,
"learning_rate": 4.608958171780036e-05,
"loss": 2.8828,
"step": 23600
},
{
"epoch": 0.08,
"learning_rate": 4.6073012148807985e-05,
"loss": 2.8865,
"step": 23700
},
{
"epoch": 0.08,
"learning_rate": 4.605644257981562e-05,
"loss": 2.887,
"step": 23800
},
{
"epoch": 0.08,
"learning_rate": 4.6039873010823244e-05,
"loss": 2.8761,
"step": 23900
},
{
"epoch": 0.08,
"learning_rate": 4.6023303441830876e-05,
"loss": 2.8802,
"step": 24000
},
{
"epoch": 0.08,
"learning_rate": 4.60067338728385e-05,
"loss": 2.8885,
"step": 24100
},
{
"epoch": 0.08,
"learning_rate": 4.599016430384613e-05,
"loss": 2.8833,
"step": 24200
},
{
"epoch": 0.08,
"learning_rate": 4.597359473485376e-05,
"loss": 2.8707,
"step": 24300
},
{
"epoch": 0.08,
"learning_rate": 4.5957025165861386e-05,
"loss": 2.8915,
"step": 24400
},
{
"epoch": 0.08,
"learning_rate": 4.594045559686901e-05,
"loss": 2.8707,
"step": 24500
},
{
"epoch": 0.08,
"learning_rate": 4.5923886027876645e-05,
"loss": 2.8875,
"step": 24600
},
{
"epoch": 0.08,
"learning_rate": 4.590731645888428e-05,
"loss": 2.8727,
"step": 24700
},
{
"epoch": 0.08,
"learning_rate": 4.58907468898919e-05,
"loss": 2.8832,
"step": 24800
},
{
"epoch": 0.08,
"learning_rate": 4.587417732089953e-05,
"loss": 2.8737,
"step": 24900
},
{
"epoch": 0.08,
"learning_rate": 4.585760775190716e-05,
"loss": 2.8779,
"step": 25000
},
{
"epoch": 0.08,
"learning_rate": 4.584103818291479e-05,
"loss": 2.855,
"step": 25100
},
{
"epoch": 0.08,
"learning_rate": 4.582446861392242e-05,
"loss": 2.8702,
"step": 25200
},
{
"epoch": 0.08,
"learning_rate": 4.5807899044930046e-05,
"loss": 2.8976,
"step": 25300
},
{
"epoch": 0.08,
"learning_rate": 4.579132947593767e-05,
"loss": 2.8895,
"step": 25400
},
{
"epoch": 0.08,
"learning_rate": 4.5774759906945304e-05,
"loss": 2.8685,
"step": 25500
},
{
"epoch": 0.08,
"learning_rate": 4.575819033795293e-05,
"loss": 2.8737,
"step": 25600
},
{
"epoch": 0.09,
"learning_rate": 4.5741620768960556e-05,
"loss": 2.8713,
"step": 25700
},
{
"epoch": 0.09,
"learning_rate": 4.572505119996819e-05,
"loss": 2.8779,
"step": 25800
},
{
"epoch": 0.09,
"learning_rate": 4.570848163097582e-05,
"loss": 2.8782,
"step": 25900
},
{
"epoch": 0.09,
"learning_rate": 4.569191206198345e-05,
"loss": 2.8845,
"step": 26000
},
{
"epoch": 0.09,
"learning_rate": 4.567534249299107e-05,
"loss": 2.8829,
"step": 26100
},
{
"epoch": 0.09,
"learning_rate": 4.5658772923998705e-05,
"loss": 2.8809,
"step": 26200
},
{
"epoch": 0.09,
"learning_rate": 4.564220335500633e-05,
"loss": 2.8855,
"step": 26300
},
{
"epoch": 0.09,
"learning_rate": 4.5625633786013964e-05,
"loss": 2.8812,
"step": 26400
},
{
"epoch": 0.09,
"learning_rate": 4.560906421702159e-05,
"loss": 2.882,
"step": 26500
},
{
"epoch": 0.09,
"learning_rate": 4.5592494648029215e-05,
"loss": 2.8751,
"step": 26600
},
{
"epoch": 0.09,
"learning_rate": 4.557592507903685e-05,
"loss": 2.8827,
"step": 26700
},
{
"epoch": 0.09,
"learning_rate": 4.5559355510044474e-05,
"loss": 2.8655,
"step": 26800
},
{
"epoch": 0.09,
"learning_rate": 4.55427859410521e-05,
"loss": 2.8743,
"step": 26900
},
{
"epoch": 0.09,
"learning_rate": 4.552621637205973e-05,
"loss": 2.8871,
"step": 27000
},
{
"epoch": 0.09,
"learning_rate": 4.5509646803067365e-05,
"loss": 2.874,
"step": 27100
},
{
"epoch": 0.09,
"learning_rate": 4.549307723407499e-05,
"loss": 2.881,
"step": 27200
},
{
"epoch": 0.09,
"learning_rate": 4.5476507665082616e-05,
"loss": 2.8821,
"step": 27300
},
{
"epoch": 0.09,
"learning_rate": 4.545993809609025e-05,
"loss": 2.8724,
"step": 27400
},
{
"epoch": 0.09,
"learning_rate": 4.5443368527097875e-05,
"loss": 2.8967,
"step": 27500
},
{
"epoch": 0.09,
"learning_rate": 4.542679895810551e-05,
"loss": 2.8707,
"step": 27600
},
{
"epoch": 0.09,
"learning_rate": 4.541022938911313e-05,
"loss": 2.8694,
"step": 27700
},
{
"epoch": 0.09,
"learning_rate": 4.539365982012076e-05,
"loss": 2.871,
"step": 27800
},
{
"epoch": 0.09,
"learning_rate": 4.537709025112839e-05,
"loss": 2.8732,
"step": 27900
},
{
"epoch": 0.09,
"learning_rate": 4.536052068213602e-05,
"loss": 2.8772,
"step": 28000
},
{
"epoch": 0.09,
"learning_rate": 4.534395111314364e-05,
"loss": 2.8731,
"step": 28100
},
{
"epoch": 0.09,
"learning_rate": 4.5327381544151276e-05,
"loss": 2.8789,
"step": 28200
},
{
"epoch": 0.09,
"learning_rate": 4.531081197515891e-05,
"loss": 2.8816,
"step": 28300
},
{
"epoch": 0.09,
"learning_rate": 4.5294242406166534e-05,
"loss": 2.874,
"step": 28400
},
{
"epoch": 0.09,
"learning_rate": 4.527767283717416e-05,
"loss": 2.8838,
"step": 28500
},
{
"epoch": 0.09,
"learning_rate": 4.526110326818179e-05,
"loss": 2.89,
"step": 28600
},
{
"epoch": 0.1,
"learning_rate": 4.524453369918942e-05,
"loss": 2.8833,
"step": 28700
},
{
"epoch": 0.1,
"learning_rate": 4.522796413019705e-05,
"loss": 2.8781,
"step": 28800
},
{
"epoch": 0.1,
"learning_rate": 4.521139456120468e-05,
"loss": 2.8706,
"step": 28900
},
{
"epoch": 0.1,
"learning_rate": 4.51948249922123e-05,
"loss": 2.8858,
"step": 29000
},
{
"epoch": 0.1,
"learning_rate": 4.5178255423219935e-05,
"loss": 2.8841,
"step": 29100
},
{
"epoch": 0.1,
"learning_rate": 4.516168585422757e-05,
"loss": 2.8686,
"step": 29200
},
{
"epoch": 0.1,
"learning_rate": 4.514511628523519e-05,
"loss": 2.8856,
"step": 29300
},
{
"epoch": 0.1,
"learning_rate": 4.512854671624282e-05,
"loss": 2.8806,
"step": 29400
},
{
"epoch": 0.1,
"learning_rate": 4.511197714725045e-05,
"loss": 2.8672,
"step": 29500
},
{
"epoch": 0.1,
"learning_rate": 4.509540757825808e-05,
"loss": 2.8823,
"step": 29600
},
{
"epoch": 0.1,
"learning_rate": 4.5078838009265704e-05,
"loss": 2.8852,
"step": 29700
},
{
"epoch": 0.1,
"learning_rate": 4.5062268440273336e-05,
"loss": 2.8756,
"step": 29800
},
{
"epoch": 0.1,
"learning_rate": 4.504569887128096e-05,
"loss": 2.8682,
"step": 29900
},
{
"epoch": 0.1,
"learning_rate": 4.5029129302288595e-05,
"loss": 2.892,
"step": 30000
},
{
"epoch": 0.1,
"learning_rate": 4.501255973329622e-05,
"loss": 2.8685,
"step": 30100
},
{
"epoch": 0.1,
"learning_rate": 4.4995990164303846e-05,
"loss": 2.8643,
"step": 30200
},
{
"epoch": 0.1,
"learning_rate": 4.497942059531148e-05,
"loss": 2.8812,
"step": 30300
},
{
"epoch": 0.1,
"learning_rate": 4.496285102631911e-05,
"loss": 2.8733,
"step": 30400
},
{
"epoch": 0.1,
"learning_rate": 4.494628145732673e-05,
"loss": 2.8975,
"step": 30500
},
{
"epoch": 0.1,
"learning_rate": 4.492971188833436e-05,
"loss": 2.8826,
"step": 30600
},
{
"epoch": 0.1,
"learning_rate": 4.491314231934199e-05,
"loss": 2.8897,
"step": 30700
},
{
"epoch": 0.1,
"learning_rate": 4.489657275034962e-05,
"loss": 2.8822,
"step": 30800
},
{
"epoch": 0.1,
"learning_rate": 4.488000318135725e-05,
"loss": 2.8831,
"step": 30900
},
{
"epoch": 0.1,
"learning_rate": 4.486343361236487e-05,
"loss": 2.8792,
"step": 31000
},
{
"epoch": 0.1,
"learning_rate": 4.4846864043372506e-05,
"loss": 2.8854,
"step": 31100
},
{
"epoch": 0.1,
"learning_rate": 4.483029447438014e-05,
"loss": 2.8808,
"step": 31200
},
{
"epoch": 0.1,
"learning_rate": 4.481372490538776e-05,
"loss": 2.8687,
"step": 31300
},
{
"epoch": 0.1,
"learning_rate": 4.479715533639539e-05,
"loss": 2.875,
"step": 31400
},
{
"epoch": 0.1,
"learning_rate": 4.478058576740302e-05,
"loss": 2.8832,
"step": 31500
},
{
"epoch": 0.1,
"learning_rate": 4.476401619841065e-05,
"loss": 2.8729,
"step": 31600
},
{
"epoch": 0.11,
"learning_rate": 4.4747446629418274e-05,
"loss": 2.8814,
"step": 31700
},
{
"epoch": 0.11,
"learning_rate": 4.473087706042591e-05,
"loss": 2.8784,
"step": 31800
},
{
"epoch": 0.11,
"learning_rate": 4.471430749143353e-05,
"loss": 2.8718,
"step": 31900
},
{
"epoch": 0.11,
"learning_rate": 4.4697737922441165e-05,
"loss": 2.8581,
"step": 32000
},
{
"epoch": 0.11,
"learning_rate": 4.468116835344879e-05,
"loss": 2.8911,
"step": 32100
},
{
"epoch": 0.11,
"learning_rate": 4.466459878445642e-05,
"loss": 2.8691,
"step": 32200
},
{
"epoch": 0.11,
"learning_rate": 4.464802921546405e-05,
"loss": 2.869,
"step": 32300
},
{
"epoch": 0.11,
"learning_rate": 4.463145964647168e-05,
"loss": 2.8646,
"step": 32400
},
{
"epoch": 0.11,
"learning_rate": 4.46148900774793e-05,
"loss": 2.8904,
"step": 32500
},
{
"epoch": 0.11,
"learning_rate": 4.4598320508486934e-05,
"loss": 2.876,
"step": 32600
},
{
"epoch": 0.11,
"learning_rate": 4.4581750939494566e-05,
"loss": 2.8749,
"step": 32700
},
{
"epoch": 0.11,
"learning_rate": 4.456518137050219e-05,
"loss": 2.8644,
"step": 32800
},
{
"epoch": 0.11,
"learning_rate": 4.454861180150982e-05,
"loss": 2.8892,
"step": 32900
},
{
"epoch": 0.11,
"learning_rate": 4.453204223251745e-05,
"loss": 2.8765,
"step": 33000
},
{
"epoch": 0.11,
"learning_rate": 4.4515472663525076e-05,
"loss": 2.8891,
"step": 33100
},
{
"epoch": 0.11,
"learning_rate": 4.449890309453271e-05,
"loss": 2.8664,
"step": 33200
},
{
"epoch": 0.11,
"learning_rate": 4.4482333525540335e-05,
"loss": 2.8841,
"step": 33300
},
{
"epoch": 0.11,
"learning_rate": 4.446576395654796e-05,
"loss": 2.8683,
"step": 33400
},
{
"epoch": 0.11,
"learning_rate": 4.444919438755559e-05,
"loss": 2.8845,
"step": 33500
},
{
"epoch": 0.11,
"learning_rate": 4.4432624818563226e-05,
"loss": 2.8995,
"step": 33600
},
{
"epoch": 0.11,
"learning_rate": 4.441605524957085e-05,
"loss": 2.8879,
"step": 33700
},
{
"epoch": 0.11,
"learning_rate": 4.439948568057848e-05,
"loss": 2.878,
"step": 33800
},
{
"epoch": 0.11,
"learning_rate": 4.438291611158611e-05,
"loss": 2.8934,
"step": 33900
},
{
"epoch": 0.11,
"learning_rate": 4.4366346542593736e-05,
"loss": 2.8827,
"step": 34000
},
{
"epoch": 0.11,
"learning_rate": 4.434977697360136e-05,
"loss": 2.8611,
"step": 34100
},
{
"epoch": 0.11,
"learning_rate": 4.4333207404608994e-05,
"loss": 2.8768,
"step": 34200
},
{
"epoch": 0.11,
"learning_rate": 4.431663783561662e-05,
"loss": 2.8772,
"step": 34300
},
{
"epoch": 0.11,
"learning_rate": 4.430006826662425e-05,
"loss": 2.8768,
"step": 34400
},
{
"epoch": 0.11,
"learning_rate": 4.428349869763188e-05,
"loss": 2.865,
"step": 34500
},
{
"epoch": 0.11,
"learning_rate": 4.4266929128639504e-05,
"loss": 2.8789,
"step": 34600
},
{
"epoch": 0.11,
"learning_rate": 4.425035955964714e-05,
"loss": 2.868,
"step": 34700
},
{
"epoch": 0.12,
"learning_rate": 4.423378999065477e-05,
"loss": 2.8788,
"step": 34800
},
{
"epoch": 0.12,
"learning_rate": 4.4217220421662395e-05,
"loss": 2.8736,
"step": 34900
},
{
"epoch": 0.12,
"learning_rate": 4.420065085267002e-05,
"loss": 2.8714,
"step": 35000
},
{
"epoch": 0.12,
"learning_rate": 4.4184081283677654e-05,
"loss": 2.8768,
"step": 35100
},
{
"epoch": 0.12,
"learning_rate": 4.416751171468528e-05,
"loss": 2.8719,
"step": 35200
},
{
"epoch": 0.12,
"learning_rate": 4.4150942145692905e-05,
"loss": 2.8812,
"step": 35300
},
{
"epoch": 0.12,
"learning_rate": 4.413437257670054e-05,
"loss": 2.8675,
"step": 35400
},
{
"epoch": 0.12,
"learning_rate": 4.4117803007708164e-05,
"loss": 2.8768,
"step": 35500
},
{
"epoch": 0.12,
"learning_rate": 4.4101233438715796e-05,
"loss": 2.8702,
"step": 35600
},
{
"epoch": 0.12,
"learning_rate": 4.408466386972342e-05,
"loss": 2.8699,
"step": 35700
},
{
"epoch": 0.12,
"learning_rate": 4.406809430073105e-05,
"loss": 2.8855,
"step": 35800
},
{
"epoch": 0.12,
"learning_rate": 4.405152473173868e-05,
"loss": 2.8843,
"step": 35900
},
{
"epoch": 0.12,
"learning_rate": 4.403495516274631e-05,
"loss": 2.8754,
"step": 36000
},
{
"epoch": 0.12,
"learning_rate": 4.401838559375394e-05,
"loss": 2.8795,
"step": 36100
},
{
"epoch": 0.12,
"learning_rate": 4.4001816024761565e-05,
"loss": 2.8783,
"step": 36200
},
{
"epoch": 0.12,
"learning_rate": 4.39852464557692e-05,
"loss": 2.8755,
"step": 36300
},
{
"epoch": 0.12,
"learning_rate": 4.396867688677682e-05,
"loss": 2.8706,
"step": 36400
},
{
"epoch": 0.12,
"learning_rate": 4.395210731778445e-05,
"loss": 2.8727,
"step": 36500
},
{
"epoch": 0.12,
"learning_rate": 4.393553774879208e-05,
"loss": 2.8639,
"step": 36600
},
{
"epoch": 0.12,
"learning_rate": 4.391896817979971e-05,
"loss": 2.8749,
"step": 36700
},
{
"epoch": 0.12,
"learning_rate": 4.390239861080734e-05,
"loss": 2.8777,
"step": 36800
},
{
"epoch": 0.12,
"learning_rate": 4.3885829041814966e-05,
"loss": 2.883,
"step": 36900
},
{
"epoch": 0.12,
"learning_rate": 4.386925947282259e-05,
"loss": 2.8687,
"step": 37000
},
{
"epoch": 0.12,
"learning_rate": 4.3852689903830224e-05,
"loss": 2.8761,
"step": 37100
},
{
"epoch": 0.12,
"learning_rate": 4.383612033483786e-05,
"loss": 2.867,
"step": 37200
},
{
"epoch": 0.12,
"learning_rate": 4.381955076584548e-05,
"loss": 2.8649,
"step": 37300
},
{
"epoch": 0.12,
"learning_rate": 4.380298119685311e-05,
"loss": 2.8777,
"step": 37400
},
{
"epoch": 0.12,
"learning_rate": 4.378641162786074e-05,
"loss": 2.8778,
"step": 37500
},
{
"epoch": 0.12,
"learning_rate": 4.376984205886837e-05,
"loss": 2.8736,
"step": 37600
},
{
"epoch": 0.12,
"learning_rate": 4.375327248987599e-05,
"loss": 2.8616,
"step": 37700
},
{
"epoch": 0.13,
"learning_rate": 4.3736702920883625e-05,
"loss": 2.8654,
"step": 37800
},
{
"epoch": 0.13,
"learning_rate": 4.372013335189125e-05,
"loss": 2.87,
"step": 37900
},
{
"epoch": 0.13,
"learning_rate": 4.3703563782898884e-05,
"loss": 2.8939,
"step": 38000
},
{
"epoch": 0.13,
"learning_rate": 4.368699421390651e-05,
"loss": 2.8633,
"step": 38100
},
{
"epoch": 0.13,
"learning_rate": 4.3670424644914135e-05,
"loss": 2.8715,
"step": 38200
},
{
"epoch": 0.13,
"learning_rate": 4.365385507592177e-05,
"loss": 2.8771,
"step": 38300
},
{
"epoch": 0.13,
"learning_rate": 4.36372855069294e-05,
"loss": 2.8872,
"step": 38400
},
{
"epoch": 0.13,
"learning_rate": 4.3620715937937026e-05,
"loss": 2.8665,
"step": 38500
},
{
"epoch": 0.13,
"learning_rate": 4.360414636894465e-05,
"loss": 2.8721,
"step": 38600
},
{
"epoch": 0.13,
"learning_rate": 4.3587576799952285e-05,
"loss": 2.888,
"step": 38700
},
{
"epoch": 0.13,
"learning_rate": 4.357100723095991e-05,
"loss": 2.8804,
"step": 38800
},
{
"epoch": 0.13,
"learning_rate": 4.355443766196754e-05,
"loss": 2.8701,
"step": 38900
},
{
"epoch": 0.13,
"learning_rate": 4.353786809297517e-05,
"loss": 2.8942,
"step": 39000
},
{
"epoch": 0.13,
"learning_rate": 4.3521298523982795e-05,
"loss": 2.878,
"step": 39100
},
{
"epoch": 0.13,
"learning_rate": 4.350472895499043e-05,
"loss": 2.8652,
"step": 39200
},
{
"epoch": 0.13,
"learning_rate": 4.348815938599805e-05,
"loss": 2.881,
"step": 39300
},
{
"epoch": 0.13,
"learning_rate": 4.347158981700568e-05,
"loss": 2.8821,
"step": 39400
},
{
"epoch": 0.13,
"learning_rate": 4.345502024801331e-05,
"loss": 2.8811,
"step": 39500
},
{
"epoch": 0.13,
"learning_rate": 4.343845067902094e-05,
"loss": 2.8787,
"step": 39600
},
{
"epoch": 0.13,
"learning_rate": 4.342188111002857e-05,
"loss": 2.8823,
"step": 39700
},
{
"epoch": 0.13,
"learning_rate": 4.3405311541036196e-05,
"loss": 2.8767,
"step": 39800
},
{
"epoch": 0.13,
"learning_rate": 4.338874197204382e-05,
"loss": 2.887,
"step": 39900
},
{
"epoch": 0.13,
"learning_rate": 4.3372172403051454e-05,
"loss": 2.8841,
"step": 40000
},
{
"epoch": 0.13,
"learning_rate": 4.335560283405909e-05,
"loss": 2.8701,
"step": 40100
},
{
"epoch": 0.13,
"learning_rate": 4.3339033265066706e-05,
"loss": 2.8895,
"step": 40200
},
{
"epoch": 0.13,
"learning_rate": 4.332246369607434e-05,
"loss": 2.8632,
"step": 40300
},
{
"epoch": 0.13,
"learning_rate": 4.330589412708197e-05,
"loss": 2.8622,
"step": 40400
},
{
"epoch": 0.13,
"learning_rate": 4.32893245580896e-05,
"loss": 2.8757,
"step": 40500
},
{
"epoch": 0.13,
"learning_rate": 4.327275498909722e-05,
"loss": 2.8779,
"step": 40600
},
{
"epoch": 0.13,
"learning_rate": 4.3256185420104855e-05,
"loss": 2.8589,
"step": 40700
},
{
"epoch": 0.14,
"learning_rate": 4.323961585111248e-05,
"loss": 2.8718,
"step": 40800
},
{
"epoch": 0.14,
"learning_rate": 4.3223046282120114e-05,
"loss": 2.8863,
"step": 40900
},
{
"epoch": 0.14,
"learning_rate": 4.320647671312774e-05,
"loss": 2.8732,
"step": 41000
},
{
"epoch": 0.14,
"learning_rate": 4.3189907144135365e-05,
"loss": 2.8788,
"step": 41100
},
{
"epoch": 0.14,
"learning_rate": 4.3173337575143e-05,
"loss": 2.8636,
"step": 41200
},
{
"epoch": 0.14,
"learning_rate": 4.315676800615063e-05,
"loss": 2.8778,
"step": 41300
},
{
"epoch": 0.14,
"learning_rate": 4.314019843715825e-05,
"loss": 2.876,
"step": 41400
},
{
"epoch": 0.14,
"learning_rate": 4.312362886816588e-05,
"loss": 2.8743,
"step": 41500
},
{
"epoch": 0.14,
"learning_rate": 4.3107059299173515e-05,
"loss": 2.8686,
"step": 41600
},
{
"epoch": 0.14,
"learning_rate": 4.309048973018114e-05,
"loss": 2.8808,
"step": 41700
},
{
"epoch": 0.14,
"learning_rate": 4.3073920161188766e-05,
"loss": 2.8529,
"step": 41800
},
{
"epoch": 0.14,
"learning_rate": 4.30573505921964e-05,
"loss": 2.8753,
"step": 41900
},
{
"epoch": 0.14,
"learning_rate": 4.3040781023204025e-05,
"loss": 2.8596,
"step": 42000
},
{
"epoch": 0.14,
"learning_rate": 4.302421145421166e-05,
"loss": 2.8777,
"step": 42100
},
{
"epoch": 0.14,
"learning_rate": 4.300764188521928e-05,
"loss": 2.8836,
"step": 42200
},
{
"epoch": 0.14,
"learning_rate": 4.299107231622691e-05,
"loss": 2.8621,
"step": 42300
},
{
"epoch": 0.14,
"learning_rate": 4.297450274723454e-05,
"loss": 2.8779,
"step": 42400
},
{
"epoch": 0.14,
"learning_rate": 4.2957933178242174e-05,
"loss": 2.8718,
"step": 42500
},
{
"epoch": 0.14,
"learning_rate": 4.294136360924979e-05,
"loss": 2.8646,
"step": 42600
},
{
"epoch": 0.14,
"learning_rate": 4.2924794040257426e-05,
"loss": 2.8749,
"step": 42700
},
{
"epoch": 0.14,
"learning_rate": 4.290822447126506e-05,
"loss": 2.8705,
"step": 42800
},
{
"epoch": 0.14,
"learning_rate": 4.2891654902272684e-05,
"loss": 2.8778,
"step": 42900
},
{
"epoch": 0.14,
"learning_rate": 4.287508533328031e-05,
"loss": 2.8759,
"step": 43000
},
{
"epoch": 0.14,
"learning_rate": 4.285851576428794e-05,
"loss": 2.8595,
"step": 43100
},
{
"epoch": 0.14,
"learning_rate": 4.284194619529557e-05,
"loss": 2.8714,
"step": 43200
},
{
"epoch": 0.14,
"learning_rate": 4.28253766263032e-05,
"loss": 2.8567,
"step": 43300
},
{
"epoch": 0.14,
"learning_rate": 4.280880705731083e-05,
"loss": 2.8734,
"step": 43400
},
{
"epoch": 0.14,
"learning_rate": 4.279223748831845e-05,
"loss": 2.8663,
"step": 43500
},
{
"epoch": 0.14,
"learning_rate": 4.2775667919326085e-05,
"loss": 2.8847,
"step": 43600
},
{
"epoch": 0.14,
"learning_rate": 4.275909835033372e-05,
"loss": 2.8647,
"step": 43700
},
{
"epoch": 0.15,
"learning_rate": 4.274252878134134e-05,
"loss": 2.8713,
"step": 43800
},
{
"epoch": 0.15,
"learning_rate": 4.272595921234897e-05,
"loss": 2.8771,
"step": 43900
},
{
"epoch": 0.15,
"learning_rate": 4.27093896433566e-05,
"loss": 2.8749,
"step": 44000
},
{
"epoch": 0.15,
"learning_rate": 4.269282007436423e-05,
"loss": 2.8777,
"step": 44100
},
{
"epoch": 0.15,
"learning_rate": 4.2676250505371854e-05,
"loss": 2.8683,
"step": 44200
},
{
"epoch": 0.15,
"learning_rate": 4.2659680936379486e-05,
"loss": 2.8662,
"step": 44300
},
{
"epoch": 0.15,
"learning_rate": 4.264311136738711e-05,
"loss": 2.8714,
"step": 44400
},
{
"epoch": 0.15,
"learning_rate": 4.2626541798394745e-05,
"loss": 2.8633,
"step": 44500
},
{
"epoch": 0.15,
"learning_rate": 4.260997222940237e-05,
"loss": 2.8828,
"step": 44600
},
{
"epoch": 0.15,
"learning_rate": 4.2593402660409996e-05,
"loss": 2.8646,
"step": 44700
},
{
"epoch": 0.15,
"learning_rate": 4.257683309141763e-05,
"loss": 2.8803,
"step": 44800
},
{
"epoch": 0.15,
"learning_rate": 4.256026352242526e-05,
"loss": 2.8565,
"step": 44900
},
{
"epoch": 0.15,
"learning_rate": 4.254369395343288e-05,
"loss": 2.8685,
"step": 45000
},
{
"epoch": 0.15,
"learning_rate": 4.252712438444051e-05,
"loss": 2.8717,
"step": 45100
},
{
"epoch": 0.15,
"learning_rate": 4.2510554815448146e-05,
"loss": 2.8616,
"step": 45200
},
{
"epoch": 0.15,
"learning_rate": 4.249398524645577e-05,
"loss": 2.8755,
"step": 45300
},
{
"epoch": 0.15,
"learning_rate": 4.24774156774634e-05,
"loss": 2.8586,
"step": 45400
},
{
"epoch": 0.15,
"learning_rate": 4.246084610847103e-05,
"loss": 2.8828,
"step": 45500
},
{
"epoch": 0.15,
"learning_rate": 4.2444276539478656e-05,
"loss": 2.8659,
"step": 45600
},
{
"epoch": 0.15,
"learning_rate": 4.242770697048629e-05,
"loss": 2.8562,
"step": 45700
},
{
"epoch": 0.15,
"learning_rate": 4.2411137401493914e-05,
"loss": 2.8847,
"step": 45800
},
{
"epoch": 0.15,
"learning_rate": 4.239456783250154e-05,
"loss": 2.8579,
"step": 45900
},
{
"epoch": 0.15,
"learning_rate": 4.237799826350917e-05,
"loss": 2.8848,
"step": 46000
},
{
"epoch": 0.15,
"learning_rate": 4.2361428694516805e-05,
"loss": 2.8824,
"step": 46100
},
{
"epoch": 0.15,
"learning_rate": 4.2344859125524424e-05,
"loss": 2.862,
"step": 46200
},
{
"epoch": 0.15,
"learning_rate": 4.232828955653206e-05,
"loss": 2.8759,
"step": 46300
},
{
"epoch": 0.15,
"learning_rate": 4.231171998753969e-05,
"loss": 2.8627,
"step": 46400
},
{
"epoch": 0.15,
"learning_rate": 4.2295150418547315e-05,
"loss": 2.8741,
"step": 46500
},
{
"epoch": 0.15,
"learning_rate": 4.227858084955494e-05,
"loss": 2.8658,
"step": 46600
},
{
"epoch": 0.15,
"learning_rate": 4.2262011280562574e-05,
"loss": 2.8693,
"step": 46700
},
{
"epoch": 0.16,
"learning_rate": 4.22454417115702e-05,
"loss": 2.8672,
"step": 46800
},
{
"epoch": 0.16,
"learning_rate": 4.222887214257783e-05,
"loss": 2.8549,
"step": 46900
},
{
"epoch": 0.16,
"learning_rate": 4.221230257358546e-05,
"loss": 2.8759,
"step": 47000
},
{
"epoch": 0.16,
"learning_rate": 4.2195733004593084e-05,
"loss": 2.8681,
"step": 47100
},
{
"epoch": 0.16,
"learning_rate": 4.2179163435600716e-05,
"loss": 2.8741,
"step": 47200
},
{
"epoch": 0.16,
"learning_rate": 4.216259386660835e-05,
"loss": 2.8543,
"step": 47300
},
{
"epoch": 0.16,
"learning_rate": 4.2146024297615975e-05,
"loss": 2.8716,
"step": 47400
},
{
"epoch": 0.16,
"learning_rate": 4.21294547286236e-05,
"loss": 2.8811,
"step": 47500
},
{
"epoch": 0.16,
"learning_rate": 4.211288515963123e-05,
"loss": 2.8691,
"step": 47600
},
{
"epoch": 0.16,
"learning_rate": 4.209631559063886e-05,
"loss": 2.855,
"step": 47700
},
{
"epoch": 0.16,
"learning_rate": 4.2079746021646485e-05,
"loss": 2.8747,
"step": 47800
},
{
"epoch": 0.16,
"learning_rate": 4.206317645265412e-05,
"loss": 2.8592,
"step": 47900
},
{
"epoch": 0.16,
"learning_rate": 4.204660688366174e-05,
"loss": 2.8695,
"step": 48000
},
{
"epoch": 0.16,
"learning_rate": 4.2030037314669376e-05,
"loss": 2.8542,
"step": 48100
},
{
"epoch": 0.16,
"learning_rate": 4.2013467745677e-05,
"loss": 2.8717,
"step": 48200
},
{
"epoch": 0.16,
"learning_rate": 4.199689817668463e-05,
"loss": 2.8833,
"step": 48300
},
{
"epoch": 0.16,
"learning_rate": 4.198032860769226e-05,
"loss": 2.8618,
"step": 48400
},
{
"epoch": 0.16,
"learning_rate": 4.1963759038699886e-05,
"loss": 2.8547,
"step": 48500
},
{
"epoch": 0.16,
"learning_rate": 4.194718946970752e-05,
"loss": 2.8721,
"step": 48600
},
{
"epoch": 0.16,
"learning_rate": 4.1930619900715144e-05,
"loss": 2.8698,
"step": 48700
},
{
"epoch": 0.16,
"learning_rate": 4.191405033172277e-05,
"loss": 2.8639,
"step": 48800
},
{
"epoch": 0.16,
"learning_rate": 4.18974807627304e-05,
"loss": 2.8786,
"step": 48900
},
{
"epoch": 0.16,
"learning_rate": 4.188091119373803e-05,
"loss": 2.8681,
"step": 49000
},
{
"epoch": 0.16,
"learning_rate": 4.1864341624745654e-05,
"loss": 2.8843,
"step": 49100
},
{
"epoch": 0.16,
"learning_rate": 4.184777205575329e-05,
"loss": 2.8637,
"step": 49200
},
{
"epoch": 0.16,
"learning_rate": 4.183120248676092e-05,
"loss": 2.861,
"step": 49300
},
{
"epoch": 0.16,
"learning_rate": 4.1814632917768545e-05,
"loss": 2.8792,
"step": 49400
},
{
"epoch": 0.16,
"learning_rate": 4.179806334877617e-05,
"loss": 2.8725,
"step": 49500
},
{
"epoch": 0.16,
"learning_rate": 4.1781493779783804e-05,
"loss": 2.8809,
"step": 49600
},
{
"epoch": 0.16,
"learning_rate": 4.176492421079143e-05,
"loss": 2.8572,
"step": 49700
},
{
"epoch": 0.17,
"learning_rate": 4.174835464179906e-05,
"loss": 2.8731,
"step": 49800
},
{
"epoch": 0.17,
"learning_rate": 4.173178507280669e-05,
"loss": 2.8722,
"step": 49900
},
{
"epoch": 0.17,
"learning_rate": 4.1715215503814314e-05,
"loss": 2.8634,
"step": 50000
},
{
"epoch": 0.17,
"learning_rate": 4.1698645934821946e-05,
"loss": 2.8716,
"step": 50100
},
{
"epoch": 0.17,
"learning_rate": 4.168207636582957e-05,
"loss": 2.8733,
"step": 50200
},
{
"epoch": 0.17,
"learning_rate": 4.16655067968372e-05,
"loss": 2.8766,
"step": 50300
},
{
"epoch": 0.17,
"learning_rate": 4.164893722784483e-05,
"loss": 2.8619,
"step": 50400
},
{
"epoch": 0.17,
"learning_rate": 4.163236765885246e-05,
"loss": 2.8663,
"step": 50500
},
{
"epoch": 0.17,
"learning_rate": 4.161579808986009e-05,
"loss": 2.8646,
"step": 50600
},
{
"epoch": 0.17,
"learning_rate": 4.1599228520867715e-05,
"loss": 2.8729,
"step": 50700
},
{
"epoch": 0.17,
"learning_rate": 4.158265895187535e-05,
"loss": 2.8768,
"step": 50800
},
{
"epoch": 0.17,
"learning_rate": 4.156608938288297e-05,
"loss": 2.853,
"step": 50900
},
{
"epoch": 0.17,
"learning_rate": 4.1549519813890606e-05,
"loss": 2.8677,
"step": 51000
},
{
"epoch": 0.17,
"learning_rate": 4.153295024489823e-05,
"loss": 2.8616,
"step": 51100
},
{
"epoch": 0.17,
"learning_rate": 4.151638067590586e-05,
"loss": 2.8701,
"step": 51200
},
{
"epoch": 0.17,
"learning_rate": 4.149981110691349e-05,
"loss": 2.8817,
"step": 51300
},
{
"epoch": 0.17,
"learning_rate": 4.1483241537921116e-05,
"loss": 2.8626,
"step": 51400
},
{
"epoch": 0.17,
"learning_rate": 4.146667196892874e-05,
"loss": 2.8665,
"step": 51500
},
{
"epoch": 0.17,
"learning_rate": 4.1450102399936374e-05,
"loss": 2.8741,
"step": 51600
},
{
"epoch": 0.17,
"learning_rate": 4.143353283094401e-05,
"loss": 2.874,
"step": 51700
},
{
"epoch": 0.17,
"learning_rate": 4.141696326195163e-05,
"loss": 2.8703,
"step": 51800
},
{
"epoch": 0.17,
"learning_rate": 4.140039369295926e-05,
"loss": 2.8614,
"step": 51900
},
{
"epoch": 0.17,
"learning_rate": 4.138382412396689e-05,
"loss": 2.8767,
"step": 52000
},
{
"epoch": 0.17,
"learning_rate": 4.136725455497452e-05,
"loss": 2.872,
"step": 52100
},
{
"epoch": 0.17,
"learning_rate": 4.135068498598215e-05,
"loss": 2.8675,
"step": 52200
},
{
"epoch": 0.17,
"learning_rate": 4.1334115416989775e-05,
"loss": 2.8717,
"step": 52300
},
{
"epoch": 0.17,
"learning_rate": 4.13175458479974e-05,
"loss": 2.8538,
"step": 52400
},
{
"epoch": 0.17,
"learning_rate": 4.1300976279005034e-05,
"loss": 2.8645,
"step": 52500
},
{
"epoch": 0.17,
"learning_rate": 4.1284406710012666e-05,
"loss": 2.8681,
"step": 52600
},
{
"epoch": 0.17,
"learning_rate": 4.1267837141020285e-05,
"loss": 2.8464,
"step": 52700
},
{
"epoch": 0.17,
"learning_rate": 4.125126757202792e-05,
"loss": 2.8656,
"step": 52800
},
{
"epoch": 0.18,
"learning_rate": 4.123469800303555e-05,
"loss": 2.8628,
"step": 52900
},
{
"epoch": 0.18,
"learning_rate": 4.1218128434043176e-05,
"loss": 2.8707,
"step": 53000
},
{
"epoch": 0.18,
"learning_rate": 4.12015588650508e-05,
"loss": 2.8671,
"step": 53100
},
{
"epoch": 0.18,
"learning_rate": 4.1184989296058435e-05,
"loss": 2.8719,
"step": 53200
},
{
"epoch": 0.18,
"learning_rate": 4.116841972706606e-05,
"loss": 2.8642,
"step": 53300
},
{
"epoch": 0.18,
"learning_rate": 4.115185015807369e-05,
"loss": 2.8664,
"step": 53400
},
{
"epoch": 0.18,
"learning_rate": 4.113528058908132e-05,
"loss": 2.8768,
"step": 53500
},
{
"epoch": 0.18,
"learning_rate": 4.1118711020088945e-05,
"loss": 2.8581,
"step": 53600
},
{
"epoch": 0.18,
"learning_rate": 4.110214145109658e-05,
"loss": 2.8602,
"step": 53700
},
{
"epoch": 0.18,
"learning_rate": 4.108557188210421e-05,
"loss": 2.8745,
"step": 53800
},
{
"epoch": 0.18,
"learning_rate": 4.106900231311183e-05,
"loss": 2.8516,
"step": 53900
},
{
"epoch": 0.18,
"learning_rate": 4.105243274411946e-05,
"loss": 2.8812,
"step": 54000
},
{
"epoch": 0.18,
"learning_rate": 4.1035863175127094e-05,
"loss": 2.8539,
"step": 54100
},
{
"epoch": 0.18,
"learning_rate": 4.101929360613472e-05,
"loss": 2.8586,
"step": 54200
},
{
"epoch": 0.18,
"learning_rate": 4.1002724037142346e-05,
"loss": 2.858,
"step": 54300
},
{
"epoch": 0.18,
"learning_rate": 4.098615446814998e-05,
"loss": 2.8706,
"step": 54400
},
{
"epoch": 0.18,
"learning_rate": 4.0969584899157604e-05,
"loss": 2.858,
"step": 54500
},
{
"epoch": 0.18,
"learning_rate": 4.095301533016524e-05,
"loss": 2.8687,
"step": 54600
},
{
"epoch": 0.18,
"learning_rate": 4.093644576117286e-05,
"loss": 2.8598,
"step": 54700
},
{
"epoch": 0.18,
"learning_rate": 4.091987619218049e-05,
"loss": 2.8636,
"step": 54800
},
{
"epoch": 0.18,
"learning_rate": 4.090330662318812e-05,
"loss": 2.8621,
"step": 54900
},
{
"epoch": 0.18,
"learning_rate": 4.0886737054195754e-05,
"loss": 2.8693,
"step": 55000
},
{
"epoch": 0.18,
"learning_rate": 4.087016748520337e-05,
"loss": 2.8605,
"step": 55100
},
{
"epoch": 0.18,
"learning_rate": 4.0853597916211005e-05,
"loss": 2.8634,
"step": 55200
},
{
"epoch": 0.18,
"learning_rate": 4.083702834721864e-05,
"loss": 2.8501,
"step": 55300
},
{
"epoch": 0.18,
"learning_rate": 4.0820458778226264e-05,
"loss": 2.8579,
"step": 55400
},
{
"epoch": 0.18,
"learning_rate": 4.080388920923389e-05,
"loss": 2.8707,
"step": 55500
},
{
"epoch": 0.18,
"learning_rate": 4.078731964024152e-05,
"loss": 2.8765,
"step": 55600
},
{
"epoch": 0.18,
"learning_rate": 4.077075007124915e-05,
"loss": 2.8835,
"step": 55700
},
{
"epoch": 0.18,
"learning_rate": 4.075418050225678e-05,
"loss": 2.867,
"step": 55800
},
{
"epoch": 0.19,
"learning_rate": 4.0737610933264406e-05,
"loss": 2.8742,
"step": 55900
},
{
"epoch": 0.19,
"learning_rate": 4.072104136427203e-05,
"loss": 2.8572,
"step": 56000
},
{
"epoch": 0.19,
"learning_rate": 4.0704471795279665e-05,
"loss": 2.8787,
"step": 56100
},
{
"epoch": 0.19,
"learning_rate": 4.06879022262873e-05,
"loss": 2.8592,
"step": 56200
},
{
"epoch": 0.19,
"learning_rate": 4.0671332657294916e-05,
"loss": 2.8688,
"step": 56300
},
{
"epoch": 0.19,
"learning_rate": 4.065476308830255e-05,
"loss": 2.8557,
"step": 56400
},
{
"epoch": 0.19,
"learning_rate": 4.063819351931018e-05,
"loss": 2.864,
"step": 56500
},
{
"epoch": 0.19,
"learning_rate": 4.062162395031781e-05,
"loss": 2.859,
"step": 56600
},
{
"epoch": 0.19,
"learning_rate": 4.060505438132543e-05,
"loss": 2.861,
"step": 56700
},
{
"epoch": 0.19,
"learning_rate": 4.058848481233306e-05,
"loss": 2.8545,
"step": 56800
},
{
"epoch": 0.19,
"learning_rate": 4.057191524334069e-05,
"loss": 2.8656,
"step": 56900
},
{
"epoch": 0.19,
"learning_rate": 4.0555345674348324e-05,
"loss": 2.843,
"step": 57000
},
{
"epoch": 0.19,
"learning_rate": 4.053877610535595e-05,
"loss": 2.8606,
"step": 57100
},
{
"epoch": 0.19,
"learning_rate": 4.0522206536363576e-05,
"loss": 2.874,
"step": 57200
},
{
"epoch": 0.19,
"learning_rate": 4.050563696737121e-05,
"loss": 2.8662,
"step": 57300
},
{
"epoch": 0.19,
"learning_rate": 4.0489067398378834e-05,
"loss": 2.8733,
"step": 57400
},
{
"epoch": 0.19,
"learning_rate": 4.047249782938646e-05,
"loss": 2.8734,
"step": 57500
},
{
"epoch": 0.19,
"learning_rate": 4.045592826039409e-05,
"loss": 2.8638,
"step": 57600
},
{
"epoch": 0.19,
"learning_rate": 4.043935869140172e-05,
"loss": 2.8527,
"step": 57700
},
{
"epoch": 0.19,
"learning_rate": 4.042278912240935e-05,
"loss": 2.8622,
"step": 57800
},
{
"epoch": 0.19,
"learning_rate": 4.040621955341698e-05,
"loss": 2.8611,
"step": 57900
},
{
"epoch": 0.19,
"learning_rate": 4.03896499844246e-05,
"loss": 2.8769,
"step": 58000
},
{
"epoch": 0.19,
"learning_rate": 4.0373080415432235e-05,
"loss": 2.8573,
"step": 58100
},
{
"epoch": 0.19,
"learning_rate": 4.035651084643987e-05,
"loss": 2.8551,
"step": 58200
},
{
"epoch": 0.19,
"learning_rate": 4.0339941277447494e-05,
"loss": 2.8651,
"step": 58300
},
{
"epoch": 0.19,
"learning_rate": 4.032337170845512e-05,
"loss": 2.8783,
"step": 58400
},
{
"epoch": 0.19,
"learning_rate": 4.030680213946275e-05,
"loss": 2.8475,
"step": 58500
},
{
"epoch": 0.19,
"learning_rate": 4.029023257047038e-05,
"loss": 2.8586,
"step": 58600
},
{
"epoch": 0.19,
"learning_rate": 4.0273663001478004e-05,
"loss": 2.8815,
"step": 58700
},
{
"epoch": 0.19,
"learning_rate": 4.0257093432485636e-05,
"loss": 2.8703,
"step": 58800
},
{
"epoch": 0.2,
"learning_rate": 4.024052386349326e-05,
"loss": 2.8635,
"step": 58900
},
{
"epoch": 0.2,
"learning_rate": 4.0223954294500895e-05,
"loss": 2.8565,
"step": 59000
},
{
"epoch": 0.2,
"learning_rate": 4.020738472550852e-05,
"loss": 2.8472,
"step": 59100
},
{
"epoch": 0.2,
"learning_rate": 4.0190815156516146e-05,
"loss": 2.8734,
"step": 59200
},
{
"epoch": 0.2,
"learning_rate": 4.017424558752378e-05,
"loss": 2.8725,
"step": 59300
},
{
"epoch": 0.2,
"learning_rate": 4.015767601853141e-05,
"loss": 2.866,
"step": 59400
},
{
"epoch": 0.2,
"learning_rate": 4.014110644953904e-05,
"loss": 2.8582,
"step": 59500
},
{
"epoch": 0.2,
"learning_rate": 4.012453688054666e-05,
"loss": 2.8544,
"step": 59600
},
{
"epoch": 0.2,
"learning_rate": 4.0107967311554296e-05,
"loss": 2.8656,
"step": 59700
},
{
"epoch": 0.2,
"learning_rate": 4.009139774256192e-05,
"loss": 2.8689,
"step": 59800
},
{
"epoch": 0.2,
"learning_rate": 4.007482817356955e-05,
"loss": 2.8556,
"step": 59900
},
{
"epoch": 0.2,
"learning_rate": 4.005825860457718e-05,
"loss": 2.871,
"step": 60000
},
{
"epoch": 0.2,
"learning_rate": 4.0041689035584806e-05,
"loss": 2.876,
"step": 60100
},
{
"epoch": 0.2,
"learning_rate": 4.002511946659244e-05,
"loss": 2.8535,
"step": 60200
},
{
"epoch": 0.2,
"learning_rate": 4.0008549897600064e-05,
"loss": 2.8473,
"step": 60300
},
{
"epoch": 0.2,
"learning_rate": 3.999198032860769e-05,
"loss": 2.857,
"step": 60400
},
{
"epoch": 0.2,
"learning_rate": 3.997541075961532e-05,
"loss": 2.8531,
"step": 60500
},
{
"epoch": 0.2,
"learning_rate": 3.9958841190622955e-05,
"loss": 2.8817,
"step": 60600
},
{
"epoch": 0.2,
"learning_rate": 3.994227162163058e-05,
"loss": 2.8718,
"step": 60700
},
{
"epoch": 0.2,
"learning_rate": 3.992570205263821e-05,
"loss": 2.8639,
"step": 60800
},
{
"epoch": 0.2,
"learning_rate": 3.990913248364584e-05,
"loss": 2.8487,
"step": 60900
},
{
"epoch": 0.2,
"learning_rate": 3.9892562914653465e-05,
"loss": 2.8645,
"step": 61000
},
{
"epoch": 0.2,
"learning_rate": 3.987599334566109e-05,
"loss": 2.8665,
"step": 61100
},
{
"epoch": 0.2,
"learning_rate": 3.9859423776668724e-05,
"loss": 2.8667,
"step": 61200
},
{
"epoch": 0.2,
"learning_rate": 3.984285420767635e-05,
"loss": 2.8411,
"step": 61300
},
{
"epoch": 0.2,
"learning_rate": 3.982628463868398e-05,
"loss": 2.8771,
"step": 61400
},
{
"epoch": 0.2,
"learning_rate": 3.980971506969161e-05,
"loss": 2.8558,
"step": 61500
},
{
"epoch": 0.2,
"learning_rate": 3.9793145500699234e-05,
"loss": 2.8704,
"step": 61600
},
{
"epoch": 0.2,
"learning_rate": 3.9776575931706866e-05,
"loss": 2.8805,
"step": 61700
},
{
"epoch": 0.2,
"learning_rate": 3.97600063627145e-05,
"loss": 2.8736,
"step": 61800
},
{
"epoch": 0.21,
"learning_rate": 3.9743436793722125e-05,
"loss": 2.8442,
"step": 61900
},
{
"epoch": 0.21,
"learning_rate": 3.972686722472975e-05,
"loss": 2.8572,
"step": 62000
},
{
"epoch": 0.21,
"learning_rate": 3.971029765573738e-05,
"loss": 2.8668,
"step": 62100
},
{
"epoch": 0.21,
"learning_rate": 3.969372808674501e-05,
"loss": 2.8605,
"step": 62200
},
{
"epoch": 0.21,
"learning_rate": 3.967715851775264e-05,
"loss": 2.8597,
"step": 62300
},
{
"epoch": 0.21,
"learning_rate": 3.966058894876027e-05,
"loss": 2.8658,
"step": 62400
},
{
"epoch": 0.21,
"learning_rate": 3.964401937976789e-05,
"loss": 2.8544,
"step": 62500
},
{
"epoch": 0.21,
"learning_rate": 3.9627449810775526e-05,
"loss": 2.8607,
"step": 62600
},
{
"epoch": 0.21,
"learning_rate": 3.961088024178315e-05,
"loss": 2.8508,
"step": 62700
},
{
"epoch": 0.21,
"learning_rate": 3.959431067279078e-05,
"loss": 2.8648,
"step": 62800
},
{
"epoch": 0.21,
"learning_rate": 3.957774110379841e-05,
"loss": 2.8674,
"step": 62900
},
{
"epoch": 0.21,
"learning_rate": 3.956117153480604e-05,
"loss": 2.8705,
"step": 63000
},
{
"epoch": 0.21,
"learning_rate": 3.954460196581367e-05,
"loss": 2.8638,
"step": 63100
},
{
"epoch": 0.21,
"learning_rate": 3.9528032396821294e-05,
"loss": 2.8589,
"step": 63200
},
{
"epoch": 0.21,
"learning_rate": 3.951146282782893e-05,
"loss": 2.871,
"step": 63300
},
{
"epoch": 0.21,
"learning_rate": 3.949489325883655e-05,
"loss": 2.8714,
"step": 63400
},
{
"epoch": 0.21,
"learning_rate": 3.9478323689844185e-05,
"loss": 2.8601,
"step": 63500
},
{
"epoch": 0.21,
"learning_rate": 3.946175412085181e-05,
"loss": 2.8575,
"step": 63600
},
{
"epoch": 0.21,
"learning_rate": 3.944518455185944e-05,
"loss": 2.8573,
"step": 63700
},
{
"epoch": 0.21,
"learning_rate": 3.942861498286707e-05,
"loss": 2.8656,
"step": 63800
},
{
"epoch": 0.21,
"learning_rate": 3.9412045413874695e-05,
"loss": 2.8505,
"step": 63900
},
{
"epoch": 0.21,
"learning_rate": 3.939547584488232e-05,
"loss": 2.8725,
"step": 64000
},
{
"epoch": 0.21,
"learning_rate": 3.9378906275889954e-05,
"loss": 2.8738,
"step": 64100
},
{
"epoch": 0.21,
"learning_rate": 3.9362336706897586e-05,
"loss": 2.8665,
"step": 64200
},
{
"epoch": 0.21,
"learning_rate": 3.934576713790521e-05,
"loss": 2.8639,
"step": 64300
},
{
"epoch": 0.21,
"learning_rate": 3.932919756891284e-05,
"loss": 2.8665,
"step": 64400
},
{
"epoch": 0.21,
"learning_rate": 3.931262799992047e-05,
"loss": 2.8692,
"step": 64500
},
{
"epoch": 0.21,
"learning_rate": 3.9296058430928096e-05,
"loss": 2.8645,
"step": 64600
},
{
"epoch": 0.21,
"learning_rate": 3.927948886193573e-05,
"loss": 2.8413,
"step": 64700
},
{
"epoch": 0.21,
"learning_rate": 3.9262919292943355e-05,
"loss": 2.8649,
"step": 64800
},
{
"epoch": 0.22,
"learning_rate": 3.924634972395098e-05,
"loss": 2.8578,
"step": 64900
},
{
"epoch": 0.22,
"learning_rate": 3.922978015495861e-05,
"loss": 2.863,
"step": 65000
},
{
"epoch": 0.22,
"learning_rate": 3.921321058596624e-05,
"loss": 2.861,
"step": 65100
},
{
"epoch": 0.22,
"learning_rate": 3.9196641016973865e-05,
"loss": 2.8649,
"step": 65200
},
{
"epoch": 0.22,
"learning_rate": 3.91800714479815e-05,
"loss": 2.864,
"step": 65300
},
{
"epoch": 0.22,
"learning_rate": 3.916350187898913e-05,
"loss": 2.8639,
"step": 65400
},
{
"epoch": 0.22,
"learning_rate": 3.9146932309996756e-05,
"loss": 2.8496,
"step": 65500
},
{
"epoch": 0.22,
"learning_rate": 3.913036274100438e-05,
"loss": 2.8847,
"step": 65600
},
{
"epoch": 0.22,
"learning_rate": 3.911379317201201e-05,
"loss": 2.8463,
"step": 65700
},
{
"epoch": 0.22,
"learning_rate": 3.909722360301964e-05,
"loss": 2.8421,
"step": 65800
},
{
"epoch": 0.22,
"learning_rate": 3.908065403402727e-05,
"loss": 2.8634,
"step": 65900
},
{
"epoch": 0.22,
"learning_rate": 3.906408446503489e-05,
"loss": 2.8571,
"step": 66000
},
{
"epoch": 0.22,
"learning_rate": 3.9047514896042524e-05,
"loss": 2.8558,
"step": 66100
},
{
"epoch": 0.22,
"learning_rate": 3.903094532705016e-05,
"loss": 2.8505,
"step": 66200
},
{
"epoch": 0.22,
"learning_rate": 3.901437575805778e-05,
"loss": 2.8738,
"step": 66300
},
{
"epoch": 0.22,
"learning_rate": 3.899780618906541e-05,
"loss": 2.8574,
"step": 66400
},
{
"epoch": 0.22,
"learning_rate": 3.898123662007304e-05,
"loss": 2.8589,
"step": 66500
},
{
"epoch": 0.22,
"learning_rate": 3.896466705108067e-05,
"loss": 2.8528,
"step": 66600
},
{
"epoch": 0.22,
"learning_rate": 3.89480974820883e-05,
"loss": 2.8618,
"step": 66700
},
{
"epoch": 0.22,
"learning_rate": 3.8931527913095925e-05,
"loss": 2.8641,
"step": 66800
},
{
"epoch": 0.22,
"learning_rate": 3.891495834410355e-05,
"loss": 2.8632,
"step": 66900
},
{
"epoch": 0.22,
"learning_rate": 3.8898388775111184e-05,
"loss": 2.8544,
"step": 67000
},
{
"epoch": 0.22,
"learning_rate": 3.8881819206118816e-05,
"loss": 2.8561,
"step": 67100
},
{
"epoch": 0.22,
"learning_rate": 3.8865249637126435e-05,
"loss": 2.8678,
"step": 67200
},
{
"epoch": 0.22,
"learning_rate": 3.884868006813407e-05,
"loss": 2.8703,
"step": 67300
},
{
"epoch": 0.22,
"learning_rate": 3.88321104991417e-05,
"loss": 2.8862,
"step": 67400
},
{
"epoch": 0.22,
"learning_rate": 3.8815540930149326e-05,
"loss": 2.8442,
"step": 67500
},
{
"epoch": 0.22,
"learning_rate": 3.879897136115695e-05,
"loss": 2.8564,
"step": 67600
},
{
"epoch": 0.22,
"learning_rate": 3.8782401792164585e-05,
"loss": 2.8611,
"step": 67700
},
{
"epoch": 0.22,
"learning_rate": 3.876583222317221e-05,
"loss": 2.8605,
"step": 67800
},
{
"epoch": 0.23,
"learning_rate": 3.874926265417984e-05,
"loss": 2.8528,
"step": 67900
},
{
"epoch": 0.23,
"learning_rate": 3.873269308518747e-05,
"loss": 2.868,
"step": 68000
},
{
"epoch": 0.23,
"learning_rate": 3.8716123516195095e-05,
"loss": 2.8628,
"step": 68100
},
{
"epoch": 0.23,
"learning_rate": 3.869955394720273e-05,
"loss": 2.8617,
"step": 68200
},
{
"epoch": 0.23,
"learning_rate": 3.868298437821036e-05,
"loss": 2.8508,
"step": 68300
},
{
"epoch": 0.23,
"learning_rate": 3.866641480921798e-05,
"loss": 2.8696,
"step": 68400
},
{
"epoch": 0.23,
"learning_rate": 3.864984524022561e-05,
"loss": 2.8618,
"step": 68500
},
{
"epoch": 0.23,
"learning_rate": 3.8633275671233244e-05,
"loss": 2.8593,
"step": 68600
},
{
"epoch": 0.23,
"learning_rate": 3.861670610224087e-05,
"loss": 2.86,
"step": 68700
},
{
"epoch": 0.23,
"learning_rate": 3.8600136533248496e-05,
"loss": 2.8575,
"step": 68800
},
{
"epoch": 0.23,
"learning_rate": 3.858356696425613e-05,
"loss": 2.8671,
"step": 68900
},
{
"epoch": 0.23,
"learning_rate": 3.8566997395263754e-05,
"loss": 2.8605,
"step": 69000
},
{
"epoch": 0.23,
"learning_rate": 3.855042782627139e-05,
"loss": 2.8469,
"step": 69100
},
{
"epoch": 0.23,
"learning_rate": 3.853385825727901e-05,
"loss": 2.8504,
"step": 69200
},
{
"epoch": 0.23,
"learning_rate": 3.851728868828664e-05,
"loss": 2.848,
"step": 69300
},
{
"epoch": 0.23,
"learning_rate": 3.850071911929427e-05,
"loss": 2.8589,
"step": 69400
},
{
"epoch": 0.23,
"learning_rate": 3.8484149550301904e-05,
"loss": 2.8569,
"step": 69500
},
{
"epoch": 0.23,
"learning_rate": 3.846757998130952e-05,
"loss": 2.849,
"step": 69600
},
{
"epoch": 0.23,
"learning_rate": 3.8451010412317155e-05,
"loss": 2.8664,
"step": 69700
},
{
"epoch": 0.23,
"learning_rate": 3.843444084332479e-05,
"loss": 2.8444,
"step": 69800
},
{
"epoch": 0.23,
"learning_rate": 3.8417871274332414e-05,
"loss": 2.8574,
"step": 69900
},
{
"epoch": 0.23,
"learning_rate": 3.840130170534004e-05,
"loss": 2.8662,
"step": 70000
},
{
"epoch": 0.23,
"learning_rate": 3.838473213634767e-05,
"loss": 2.8635,
"step": 70100
},
{
"epoch": 0.23,
"learning_rate": 3.83681625673553e-05,
"loss": 2.866,
"step": 70200
},
{
"epoch": 0.23,
"learning_rate": 3.835159299836293e-05,
"loss": 2.8495,
"step": 70300
},
{
"epoch": 0.23,
"learning_rate": 3.8335023429370556e-05,
"loss": 2.8607,
"step": 70400
},
{
"epoch": 0.23,
"learning_rate": 3.831845386037818e-05,
"loss": 2.8626,
"step": 70500
},
{
"epoch": 0.23,
"learning_rate": 3.8301884291385815e-05,
"loss": 2.8607,
"step": 70600
},
{
"epoch": 0.23,
"learning_rate": 3.828531472239345e-05,
"loss": 2.8538,
"step": 70700
},
{
"epoch": 0.23,
"learning_rate": 3.8268745153401067e-05,
"loss": 2.8535,
"step": 70800
},
{
"epoch": 0.23,
"learning_rate": 3.82521755844087e-05,
"loss": 2.8563,
"step": 70900
},
{
"epoch": 0.24,
"learning_rate": 3.823560601541633e-05,
"loss": 2.8591,
"step": 71000
},
{
"epoch": 0.24,
"learning_rate": 3.821903644642396e-05,
"loss": 2.8579,
"step": 71100
},
{
"epoch": 0.24,
"learning_rate": 3.820246687743158e-05,
"loss": 2.8588,
"step": 71200
},
{
"epoch": 0.24,
"learning_rate": 3.8185897308439216e-05,
"loss": 2.8511,
"step": 71300
},
{
"epoch": 0.24,
"learning_rate": 3.816932773944684e-05,
"loss": 2.8719,
"step": 71400
},
{
"epoch": 0.24,
"learning_rate": 3.8152758170454474e-05,
"loss": 2.8605,
"step": 71500
},
{
"epoch": 0.24,
"learning_rate": 3.81361886014621e-05,
"loss": 2.8734,
"step": 71600
},
{
"epoch": 0.24,
"learning_rate": 3.8119619032469726e-05,
"loss": 2.8432,
"step": 71700
},
{
"epoch": 0.24,
"learning_rate": 3.810304946347736e-05,
"loss": 2.8572,
"step": 71800
},
{
"epoch": 0.24,
"learning_rate": 3.808647989448499e-05,
"loss": 2.8568,
"step": 71900
},
{
"epoch": 0.24,
"learning_rate": 3.806991032549262e-05,
"loss": 2.8528,
"step": 72000
},
{
"epoch": 0.24,
"learning_rate": 3.805334075650024e-05,
"loss": 2.8591,
"step": 72100
},
{
"epoch": 0.24,
"learning_rate": 3.8036771187507875e-05,
"loss": 2.8672,
"step": 72200
},
{
"epoch": 0.24,
"learning_rate": 3.80202016185155e-05,
"loss": 2.8686,
"step": 72300
},
{
"epoch": 0.24,
"learning_rate": 3.800363204952313e-05,
"loss": 2.8698,
"step": 72400
},
{
"epoch": 0.24,
"learning_rate": 3.798706248053076e-05,
"loss": 2.8593,
"step": 72500
},
{
"epoch": 0.24,
"learning_rate": 3.7970492911538385e-05,
"loss": 2.8633,
"step": 72600
},
{
"epoch": 0.24,
"learning_rate": 3.795392334254602e-05,
"loss": 2.8676,
"step": 72700
},
{
"epoch": 0.24,
"learning_rate": 3.7937353773553644e-05,
"loss": 2.8546,
"step": 72800
},
{
"epoch": 0.24,
"learning_rate": 3.792078420456127e-05,
"loss": 2.8522,
"step": 72900
},
{
"epoch": 0.24,
"learning_rate": 3.79042146355689e-05,
"loss": 2.8566,
"step": 73000
},
{
"epoch": 0.24,
"learning_rate": 3.7887645066576535e-05,
"loss": 2.8582,
"step": 73100
},
{
"epoch": 0.24,
"learning_rate": 3.787107549758416e-05,
"loss": 2.8591,
"step": 73200
},
{
"epoch": 0.24,
"learning_rate": 3.7854505928591787e-05,
"loss": 2.8616,
"step": 73300
},
{
"epoch": 0.24,
"learning_rate": 3.783793635959942e-05,
"loss": 2.8576,
"step": 73400
},
{
"epoch": 0.24,
"learning_rate": 3.7821366790607045e-05,
"loss": 2.8603,
"step": 73500
},
{
"epoch": 0.24,
"learning_rate": 3.780479722161467e-05,
"loss": 2.8691,
"step": 73600
},
{
"epoch": 0.24,
"learning_rate": 3.77882276526223e-05,
"loss": 2.8522,
"step": 73700
},
{
"epoch": 0.24,
"learning_rate": 3.777165808362993e-05,
"loss": 2.858,
"step": 73800
},
{
"epoch": 0.24,
"learning_rate": 3.775508851463756e-05,
"loss": 2.8609,
"step": 73900
},
{
"epoch": 0.25,
"learning_rate": 3.773851894564519e-05,
"loss": 2.8583,
"step": 74000
},
{
"epoch": 0.25,
"learning_rate": 3.772194937665281e-05,
"loss": 2.8683,
"step": 74100
},
{
"epoch": 0.25,
"learning_rate": 3.7705379807660446e-05,
"loss": 2.859,
"step": 74200
},
{
"epoch": 0.25,
"learning_rate": 3.768881023866807e-05,
"loss": 2.8667,
"step": 74300
},
{
"epoch": 0.25,
"learning_rate": 3.7672240669675704e-05,
"loss": 2.8549,
"step": 74400
},
{
"epoch": 0.25,
"learning_rate": 3.765567110068333e-05,
"loss": 2.8632,
"step": 74500
},
{
"epoch": 0.25,
"learning_rate": 3.7639101531690956e-05,
"loss": 2.8646,
"step": 74600
},
{
"epoch": 0.25,
"learning_rate": 3.762253196269859e-05,
"loss": 2.8621,
"step": 74700
},
{
"epoch": 0.25,
"learning_rate": 3.7605962393706214e-05,
"loss": 2.8522,
"step": 74800
},
{
"epoch": 0.25,
"learning_rate": 3.758939282471384e-05,
"loss": 2.8724,
"step": 74900
},
{
"epoch": 0.25,
"learning_rate": 3.757282325572147e-05,
"loss": 2.8591,
"step": 75000
},
{
"epoch": 0.25,
"learning_rate": 3.7556253686729105e-05,
"loss": 2.8504,
"step": 75100
},
{
"epoch": 0.25,
"learning_rate": 3.753968411773673e-05,
"loss": 2.8628,
"step": 75200
},
{
"epoch": 0.25,
"learning_rate": 3.752311454874436e-05,
"loss": 2.8609,
"step": 75300
},
{
"epoch": 0.25,
"learning_rate": 3.750654497975199e-05,
"loss": 2.857,
"step": 75400
},
{
"epoch": 0.25,
"learning_rate": 3.7489975410759615e-05,
"loss": 2.8457,
"step": 75500
},
{
"epoch": 0.25,
"learning_rate": 3.747340584176725e-05,
"loss": 2.8683,
"step": 75600
},
{
"epoch": 0.25,
"learning_rate": 3.7456836272774874e-05,
"loss": 2.8696,
"step": 75700
},
{
"epoch": 0.25,
"learning_rate": 3.74402667037825e-05,
"loss": 2.8506,
"step": 75800
},
{
"epoch": 0.25,
"learning_rate": 3.742369713479013e-05,
"loss": 2.8525,
"step": 75900
},
{
"epoch": 0.25,
"learning_rate": 3.7407127565797765e-05,
"loss": 2.8597,
"step": 76000
},
{
"epoch": 0.25,
"learning_rate": 3.7390557996805384e-05,
"loss": 2.8474,
"step": 76100
},
{
"epoch": 0.25,
"learning_rate": 3.7373988427813017e-05,
"loss": 2.8634,
"step": 76200
},
{
"epoch": 0.25,
"learning_rate": 3.735741885882065e-05,
"loss": 2.8652,
"step": 76300
},
{
"epoch": 0.25,
"learning_rate": 3.7340849289828275e-05,
"loss": 2.8672,
"step": 76400
},
{
"epoch": 0.25,
"learning_rate": 3.73242797208359e-05,
"loss": 2.8574,
"step": 76500
},
{
"epoch": 0.25,
"learning_rate": 3.730771015184353e-05,
"loss": 2.8473,
"step": 76600
},
{
"epoch": 0.25,
"learning_rate": 3.729114058285116e-05,
"loss": 2.8561,
"step": 76700
},
{
"epoch": 0.25,
"learning_rate": 3.727457101385879e-05,
"loss": 2.8552,
"step": 76800
},
{
"epoch": 0.25,
"learning_rate": 3.725800144486642e-05,
"loss": 2.8521,
"step": 76900
},
{
"epoch": 0.26,
"learning_rate": 3.7241431875874043e-05,
"loss": 2.8684,
"step": 77000
},
{
"epoch": 0.26,
"learning_rate": 3.7224862306881676e-05,
"loss": 2.8444,
"step": 77100
},
{
"epoch": 0.26,
"learning_rate": 3.720829273788931e-05,
"loss": 2.8551,
"step": 77200
},
{
"epoch": 0.26,
"learning_rate": 3.719172316889693e-05,
"loss": 2.8448,
"step": 77300
},
{
"epoch": 0.26,
"learning_rate": 3.717515359990456e-05,
"loss": 2.8521,
"step": 77400
},
{
"epoch": 0.26,
"learning_rate": 3.715858403091219e-05,
"loss": 2.8582,
"step": 77500
},
{
"epoch": 0.26,
"learning_rate": 3.714201446191982e-05,
"loss": 2.866,
"step": 77600
},
{
"epoch": 0.26,
"learning_rate": 3.7125444892927444e-05,
"loss": 2.8595,
"step": 77700
},
{
"epoch": 0.26,
"learning_rate": 3.710887532393508e-05,
"loss": 2.8515,
"step": 77800
},
{
"epoch": 0.26,
"learning_rate": 3.70923057549427e-05,
"loss": 2.8735,
"step": 77900
},
{
"epoch": 0.26,
"learning_rate": 3.7075736185950335e-05,
"loss": 2.8699,
"step": 78000
},
{
"epoch": 0.26,
"learning_rate": 3.705916661695796e-05,
"loss": 2.8684,
"step": 78100
},
{
"epoch": 0.26,
"learning_rate": 3.704259704796559e-05,
"loss": 2.8583,
"step": 78200
},
{
"epoch": 0.26,
"learning_rate": 3.702602747897322e-05,
"loss": 2.8547,
"step": 78300
},
{
"epoch": 0.26,
"learning_rate": 3.700945790998085e-05,
"loss": 2.8474,
"step": 78400
},
{
"epoch": 0.26,
"learning_rate": 3.699288834098847e-05,
"loss": 2.8514,
"step": 78500
},
{
"epoch": 0.26,
"learning_rate": 3.6976318771996104e-05,
"loss": 2.8635,
"step": 78600
},
{
"epoch": 0.26,
"learning_rate": 3.6959749203003737e-05,
"loss": 2.8521,
"step": 78700
},
{
"epoch": 0.26,
"learning_rate": 3.694317963401136e-05,
"loss": 2.8649,
"step": 78800
},
{
"epoch": 0.26,
"learning_rate": 3.692661006501899e-05,
"loss": 2.8525,
"step": 78900
},
{
"epoch": 0.26,
"learning_rate": 3.691004049602662e-05,
"loss": 2.8487,
"step": 79000
},
{
"epoch": 0.26,
"learning_rate": 3.6893470927034247e-05,
"loss": 2.8485,
"step": 79100
},
{
"epoch": 0.26,
"learning_rate": 3.687690135804188e-05,
"loss": 2.8489,
"step": 79200
},
{
"epoch": 0.26,
"learning_rate": 3.6860331789049505e-05,
"loss": 2.861,
"step": 79300
},
{
"epoch": 0.26,
"learning_rate": 3.684376222005713e-05,
"loss": 2.8572,
"step": 79400
},
{
"epoch": 0.26,
"learning_rate": 3.682719265106476e-05,
"loss": 2.8542,
"step": 79500
},
{
"epoch": 0.26,
"learning_rate": 3.6810623082072396e-05,
"loss": 2.8741,
"step": 79600
},
{
"epoch": 0.26,
"learning_rate": 3.6794053513080015e-05,
"loss": 2.871,
"step": 79700
},
{
"epoch": 0.26,
"learning_rate": 3.677748394408765e-05,
"loss": 2.8548,
"step": 79800
},
{
"epoch": 0.26,
"learning_rate": 3.676091437509528e-05,
"loss": 2.8401,
"step": 79900
},
{
"epoch": 0.27,
"learning_rate": 3.6744344806102906e-05,
"loss": 2.8518,
"step": 80000
},
{
"epoch": 0.27,
"learning_rate": 3.672777523711053e-05,
"loss": 2.8597,
"step": 80100
},
{
"epoch": 0.27,
"learning_rate": 3.6711205668118164e-05,
"loss": 2.8414,
"step": 80200
},
{
"epoch": 0.27,
"learning_rate": 3.669463609912579e-05,
"loss": 2.8477,
"step": 80300
},
{
"epoch": 0.27,
"learning_rate": 3.667806653013342e-05,
"loss": 2.8465,
"step": 80400
},
{
"epoch": 0.27,
"learning_rate": 3.666149696114105e-05,
"loss": 2.8487,
"step": 80500
},
{
"epoch": 0.27,
"learning_rate": 3.6644927392148674e-05,
"loss": 2.8482,
"step": 80600
},
{
"epoch": 0.27,
"learning_rate": 3.662835782315631e-05,
"loss": 2.866,
"step": 80700
},
{
"epoch": 0.27,
"learning_rate": 3.661178825416394e-05,
"loss": 2.8512,
"step": 80800
},
{
"epoch": 0.27,
"learning_rate": 3.659521868517156e-05,
"loss": 2.8478,
"step": 80900
},
{
"epoch": 0.27,
"learning_rate": 3.657864911617919e-05,
"loss": 2.8579,
"step": 81000
},
{
"epoch": 0.27,
"learning_rate": 3.6562079547186824e-05,
"loss": 2.855,
"step": 81100
},
{
"epoch": 0.27,
"learning_rate": 3.654550997819445e-05,
"loss": 2.8546,
"step": 81200
},
{
"epoch": 0.27,
"learning_rate": 3.6528940409202076e-05,
"loss": 2.8589,
"step": 81300
},
{
"epoch": 0.27,
"learning_rate": 3.651237084020971e-05,
"loss": 2.8564,
"step": 81400
},
{
"epoch": 0.27,
"learning_rate": 3.6495801271217334e-05,
"loss": 2.8564,
"step": 81500
},
{
"epoch": 0.27,
"learning_rate": 3.6479231702224967e-05,
"loss": 2.8518,
"step": 81600
},
{
"epoch": 0.27,
"learning_rate": 3.646266213323259e-05,
"loss": 2.8616,
"step": 81700
},
{
"epoch": 0.27,
"learning_rate": 3.644609256424022e-05,
"loss": 2.8381,
"step": 81800
},
{
"epoch": 0.27,
"learning_rate": 3.642952299524785e-05,
"loss": 2.845,
"step": 81900
},
{
"epoch": 0.27,
"learning_rate": 3.641295342625548e-05,
"loss": 2.8503,
"step": 82000
},
{
"epoch": 0.27,
"learning_rate": 3.63963838572631e-05,
"loss": 2.8557,
"step": 82100
},
{
"epoch": 0.27,
"learning_rate": 3.6379814288270735e-05,
"loss": 2.8529,
"step": 82200
},
{
"epoch": 0.27,
"learning_rate": 3.636324471927837e-05,
"loss": 2.8566,
"step": 82300
},
{
"epoch": 0.27,
"learning_rate": 3.6346675150285993e-05,
"loss": 2.8674,
"step": 82400
},
{
"epoch": 0.27,
"learning_rate": 3.633010558129362e-05,
"loss": 2.8519,
"step": 82500
},
{
"epoch": 0.27,
"learning_rate": 3.631353601230125e-05,
"loss": 2.8564,
"step": 82600
},
{
"epoch": 0.27,
"learning_rate": 3.629696644330888e-05,
"loss": 2.8482,
"step": 82700
},
{
"epoch": 0.27,
"learning_rate": 3.628039687431651e-05,
"loss": 2.852,
"step": 82800
},
{
"epoch": 0.27,
"learning_rate": 3.6263827305324136e-05,
"loss": 2.8468,
"step": 82900
},
{
"epoch": 0.28,
"learning_rate": 3.624725773633176e-05,
"loss": 2.8459,
"step": 83000
},
{
"epoch": 0.28,
"learning_rate": 3.6230688167339394e-05,
"loss": 2.8593,
"step": 83100
},
{
"epoch": 0.28,
"learning_rate": 3.621411859834702e-05,
"loss": 2.8531,
"step": 83200
},
{
"epoch": 0.28,
"learning_rate": 3.6197549029354646e-05,
"loss": 2.8493,
"step": 83300
},
{
"epoch": 0.28,
"learning_rate": 3.618097946036228e-05,
"loss": 2.8463,
"step": 83400
},
{
"epoch": 0.28,
"learning_rate": 3.6164409891369904e-05,
"loss": 2.8441,
"step": 83500
},
{
"epoch": 0.28,
"learning_rate": 3.614784032237754e-05,
"loss": 2.8487,
"step": 83600
},
{
"epoch": 0.28,
"learning_rate": 3.613127075338516e-05,
"loss": 2.8589,
"step": 83700
},
{
"epoch": 0.28,
"learning_rate": 3.611470118439279e-05,
"loss": 2.8656,
"step": 83800
},
{
"epoch": 0.28,
"learning_rate": 3.609813161540042e-05,
"loss": 2.85,
"step": 83900
},
{
"epoch": 0.28,
"learning_rate": 3.6081562046408054e-05,
"loss": 2.8562,
"step": 84000
},
{
"epoch": 0.28,
"learning_rate": 3.606499247741568e-05,
"loss": 2.8442,
"step": 84100
},
{
"epoch": 0.28,
"learning_rate": 3.6048422908423306e-05,
"loss": 2.8553,
"step": 84200
},
{
"epoch": 0.28,
"learning_rate": 3.603185333943094e-05,
"loss": 2.8438,
"step": 84300
},
{
"epoch": 0.28,
"learning_rate": 3.6015283770438564e-05,
"loss": 2.8613,
"step": 84400
},
{
"epoch": 0.28,
"learning_rate": 3.599871420144619e-05,
"loss": 2.8599,
"step": 84500
},
{
"epoch": 0.28,
"learning_rate": 3.598214463245382e-05,
"loss": 2.8446,
"step": 84600
},
{
"epoch": 0.28,
"learning_rate": 3.596557506346145e-05,
"loss": 2.8644,
"step": 84700
},
{
"epoch": 0.28,
"learning_rate": 3.594900549446908e-05,
"loss": 2.8449,
"step": 84800
},
{
"epoch": 0.28,
"learning_rate": 3.5932435925476707e-05,
"loss": 2.8636,
"step": 84900
},
{
"epoch": 0.28,
"learning_rate": 3.591586635648433e-05,
"loss": 2.8388,
"step": 85000
},
{
"epoch": 0.28,
"learning_rate": 3.5899296787491965e-05,
"loss": 2.8585,
"step": 85100
},
{
"epoch": 0.28,
"learning_rate": 3.58827272184996e-05,
"loss": 2.8509,
"step": 85200
},
{
"epoch": 0.28,
"learning_rate": 3.5866157649507223e-05,
"loss": 2.8502,
"step": 85300
},
{
"epoch": 0.28,
"learning_rate": 3.584958808051485e-05,
"loss": 2.8448,
"step": 85400
},
{
"epoch": 0.28,
"learning_rate": 3.583301851152248e-05,
"loss": 2.849,
"step": 85500
},
{
"epoch": 0.28,
"learning_rate": 3.581644894253011e-05,
"loss": 2.8597,
"step": 85600
},
{
"epoch": 0.28,
"learning_rate": 3.579987937353774e-05,
"loss": 2.8674,
"step": 85700
},
{
"epoch": 0.28,
"learning_rate": 3.5783309804545366e-05,
"loss": 2.8621,
"step": 85800
},
{
"epoch": 0.28,
"learning_rate": 3.576674023555299e-05,
"loss": 2.856,
"step": 85900
},
{
"epoch": 0.28,
"learning_rate": 3.5750170666560624e-05,
"loss": 2.8369,
"step": 86000
},
{
"epoch": 0.29,
"learning_rate": 3.573360109756825e-05,
"loss": 2.862,
"step": 86100
},
{
"epoch": 0.29,
"learning_rate": 3.5717031528575876e-05,
"loss": 2.8336,
"step": 86200
},
{
"epoch": 0.29,
"learning_rate": 3.570046195958351e-05,
"loss": 2.8479,
"step": 86300
},
{
"epoch": 0.29,
"learning_rate": 3.568389239059114e-05,
"loss": 2.852,
"step": 86400
},
{
"epoch": 0.29,
"learning_rate": 3.566732282159877e-05,
"loss": 2.8375,
"step": 86500
},
{
"epoch": 0.29,
"learning_rate": 3.565075325260639e-05,
"loss": 2.8514,
"step": 86600
},
{
"epoch": 0.29,
"learning_rate": 3.5634183683614026e-05,
"loss": 2.8465,
"step": 86700
},
{
"epoch": 0.29,
"learning_rate": 3.561761411462165e-05,
"loss": 2.8493,
"step": 86800
},
{
"epoch": 0.29,
"learning_rate": 3.5601044545629284e-05,
"loss": 2.8586,
"step": 86900
},
{
"epoch": 0.29,
"learning_rate": 3.558447497663691e-05,
"loss": 2.8535,
"step": 87000
},
{
"epoch": 0.29,
"learning_rate": 3.5567905407644536e-05,
"loss": 2.8534,
"step": 87100
},
{
"epoch": 0.29,
"learning_rate": 3.555133583865217e-05,
"loss": 2.8364,
"step": 87200
},
{
"epoch": 0.29,
"learning_rate": 3.5534766269659794e-05,
"loss": 2.8493,
"step": 87300
},
{
"epoch": 0.29,
"learning_rate": 3.551819670066742e-05,
"loss": 2.853,
"step": 87400
},
{
"epoch": 0.29,
"learning_rate": 3.550162713167505e-05,
"loss": 2.8595,
"step": 87500
},
{
"epoch": 0.29,
"learning_rate": 3.5485057562682685e-05,
"loss": 2.8405,
"step": 87600
},
{
"epoch": 0.29,
"learning_rate": 3.546848799369031e-05,
"loss": 2.8512,
"step": 87700
},
{
"epoch": 0.29,
"learning_rate": 3.5451918424697937e-05,
"loss": 2.8608,
"step": 87800
},
{
"epoch": 0.29,
"learning_rate": 3.543534885570557e-05,
"loss": 2.8495,
"step": 87900
},
{
"epoch": 0.29,
"learning_rate": 3.5418779286713195e-05,
"loss": 2.8797,
"step": 88000
},
{
"epoch": 0.29,
"learning_rate": 3.540220971772083e-05,
"loss": 2.8596,
"step": 88100
},
{
"epoch": 0.29,
"learning_rate": 3.5385640148728453e-05,
"loss": 2.8551,
"step": 88200
},
{
"epoch": 0.29,
"learning_rate": 3.536907057973608e-05,
"loss": 2.8503,
"step": 88300
},
{
"epoch": 0.29,
"learning_rate": 3.535250101074371e-05,
"loss": 2.8514,
"step": 88400
},
{
"epoch": 0.29,
"learning_rate": 3.533593144175134e-05,
"loss": 2.8432,
"step": 88500
},
{
"epoch": 0.29,
"learning_rate": 3.5319361872758963e-05,
"loss": 2.8344,
"step": 88600
},
{
"epoch": 0.29,
"learning_rate": 3.5302792303766596e-05,
"loss": 2.8571,
"step": 88700
},
{
"epoch": 0.29,
"learning_rate": 3.528622273477423e-05,
"loss": 2.8586,
"step": 88800
},
{
"epoch": 0.29,
"learning_rate": 3.5269653165781854e-05,
"loss": 2.8393,
"step": 88900
},
{
"epoch": 0.29,
"learning_rate": 3.525308359678948e-05,
"loss": 2.8311,
"step": 89000
},
{
"epoch": 0.3,
"learning_rate": 3.523651402779711e-05,
"loss": 2.838,
"step": 89100
},
{
"epoch": 0.3,
"learning_rate": 3.521994445880474e-05,
"loss": 2.8465,
"step": 89200
},
{
"epoch": 0.3,
"learning_rate": 3.520337488981237e-05,
"loss": 2.8497,
"step": 89300
},
{
"epoch": 0.3,
"learning_rate": 3.518680532082e-05,
"loss": 2.8434,
"step": 89400
},
{
"epoch": 0.3,
"learning_rate": 3.517023575182762e-05,
"loss": 2.8456,
"step": 89500
},
{
"epoch": 0.3,
"learning_rate": 3.5153666182835256e-05,
"loss": 2.8514,
"step": 89600
},
{
"epoch": 0.3,
"learning_rate": 3.513709661384289e-05,
"loss": 2.8567,
"step": 89700
},
{
"epoch": 0.3,
"learning_rate": 3.512052704485051e-05,
"loss": 2.8567,
"step": 89800
},
{
"epoch": 0.3,
"learning_rate": 3.510395747585814e-05,
"loss": 2.8465,
"step": 89900
},
{
"epoch": 0.3,
"learning_rate": 3.508738790686577e-05,
"loss": 2.8481,
"step": 90000
},
{
"epoch": 0.3,
"learning_rate": 3.50708183378734e-05,
"loss": 2.8514,
"step": 90100
},
{
"epoch": 0.3,
"learning_rate": 3.5054248768881024e-05,
"loss": 2.8635,
"step": 90200
},
{
"epoch": 0.3,
"learning_rate": 3.5037679199888657e-05,
"loss": 2.867,
"step": 90300
},
{
"epoch": 0.3,
"learning_rate": 3.502110963089628e-05,
"loss": 2.8461,
"step": 90400
},
{
"epoch": 0.3,
"learning_rate": 3.5004540061903915e-05,
"loss": 2.8382,
"step": 90500
},
{
"epoch": 0.3,
"learning_rate": 3.498797049291154e-05,
"loss": 2.8325,
"step": 90600
},
{
"epoch": 0.3,
"learning_rate": 3.497140092391917e-05,
"loss": 2.8574,
"step": 90700
},
{
"epoch": 0.3,
"learning_rate": 3.49548313549268e-05,
"loss": 2.8534,
"step": 90800
},
{
"epoch": 0.3,
"learning_rate": 3.493826178593443e-05,
"loss": 2.8417,
"step": 90900
},
{
"epoch": 0.3,
"learning_rate": 3.492169221694205e-05,
"loss": 2.8488,
"step": 91000
},
{
"epoch": 0.3,
"learning_rate": 3.4905122647949683e-05,
"loss": 2.8566,
"step": 91100
},
{
"epoch": 0.3,
"learning_rate": 3.4888553078957316e-05,
"loss": 2.8355,
"step": 91200
},
{
"epoch": 0.3,
"learning_rate": 3.487198350996494e-05,
"loss": 2.8505,
"step": 91300
},
{
"epoch": 0.3,
"learning_rate": 3.485541394097257e-05,
"loss": 2.8438,
"step": 91400
},
{
"epoch": 0.3,
"learning_rate": 3.48388443719802e-05,
"loss": 2.8613,
"step": 91500
},
{
"epoch": 0.3,
"learning_rate": 3.4822274802987826e-05,
"loss": 2.8455,
"step": 91600
},
{
"epoch": 0.3,
"learning_rate": 3.480570523399546e-05,
"loss": 2.8487,
"step": 91700
},
{
"epoch": 0.3,
"learning_rate": 3.478913566500308e-05,
"loss": 2.8574,
"step": 91800
},
{
"epoch": 0.3,
"learning_rate": 3.477256609601071e-05,
"loss": 2.8424,
"step": 91900
},
{
"epoch": 0.3,
"learning_rate": 3.475599652701834e-05,
"loss": 2.85,
"step": 92000
},
{
"epoch": 0.31,
"learning_rate": 3.473942695802597e-05,
"loss": 2.8399,
"step": 92100
},
{
"epoch": 0.31,
"learning_rate": 3.4722857389033595e-05,
"loss": 2.8603,
"step": 92200
},
{
"epoch": 0.31,
"learning_rate": 3.470628782004123e-05,
"loss": 2.8633,
"step": 92300
},
{
"epoch": 0.31,
"learning_rate": 3.468971825104885e-05,
"loss": 2.8462,
"step": 92400
},
{
"epoch": 0.31,
"learning_rate": 3.4673148682056486e-05,
"loss": 2.8552,
"step": 92500
},
{
"epoch": 0.31,
"learning_rate": 3.465657911306411e-05,
"loss": 2.8454,
"step": 92600
},
{
"epoch": 0.31,
"learning_rate": 3.464000954407174e-05,
"loss": 2.8388,
"step": 92700
},
{
"epoch": 0.31,
"learning_rate": 3.462343997507937e-05,
"loss": 2.8486,
"step": 92800
},
{
"epoch": 0.31,
"learning_rate": 3.4606870406087e-05,
"loss": 2.8446,
"step": 92900
},
{
"epoch": 0.31,
"learning_rate": 3.459030083709462e-05,
"loss": 2.8634,
"step": 93000
},
{
"epoch": 0.31,
"learning_rate": 3.4573731268102254e-05,
"loss": 2.8495,
"step": 93100
},
{
"epoch": 0.31,
"learning_rate": 3.4557161699109887e-05,
"loss": 2.8479,
"step": 93200
},
{
"epoch": 0.31,
"learning_rate": 3.454059213011751e-05,
"loss": 2.8651,
"step": 93300
},
{
"epoch": 0.31,
"learning_rate": 3.452402256112514e-05,
"loss": 2.8437,
"step": 93400
},
{
"epoch": 0.31,
"learning_rate": 3.450745299213277e-05,
"loss": 2.8517,
"step": 93500
},
{
"epoch": 0.31,
"learning_rate": 3.44908834231404e-05,
"loss": 2.8521,
"step": 93600
},
{
"epoch": 0.31,
"learning_rate": 3.447431385414803e-05,
"loss": 2.8487,
"step": 93700
},
{
"epoch": 0.31,
"learning_rate": 3.4457744285155655e-05,
"loss": 2.8523,
"step": 93800
},
{
"epoch": 0.31,
"learning_rate": 3.444117471616328e-05,
"loss": 2.8545,
"step": 93900
},
{
"epoch": 0.31,
"learning_rate": 3.4424605147170913e-05,
"loss": 2.8606,
"step": 94000
},
{
"epoch": 0.31,
"learning_rate": 3.4408035578178546e-05,
"loss": 2.8392,
"step": 94100
},
{
"epoch": 0.31,
"learning_rate": 3.4391466009186165e-05,
"loss": 2.8509,
"step": 94200
},
{
"epoch": 0.31,
"learning_rate": 3.43748964401938e-05,
"loss": 2.845,
"step": 94300
},
{
"epoch": 0.31,
"learning_rate": 3.435832687120143e-05,
"loss": 2.8364,
"step": 94400
},
{
"epoch": 0.31,
"learning_rate": 3.4341757302209056e-05,
"loss": 2.8595,
"step": 94500
},
{
"epoch": 0.31,
"learning_rate": 3.432518773321668e-05,
"loss": 2.8259,
"step": 94600
},
{
"epoch": 0.31,
"learning_rate": 3.4308618164224315e-05,
"loss": 2.8481,
"step": 94700
},
{
"epoch": 0.31,
"learning_rate": 3.429204859523194e-05,
"loss": 2.8455,
"step": 94800
},
{
"epoch": 0.31,
"learning_rate": 3.427547902623957e-05,
"loss": 2.8473,
"step": 94900
},
{
"epoch": 0.31,
"learning_rate": 3.42589094572472e-05,
"loss": 2.8363,
"step": 95000
},
{
"epoch": 0.32,
"learning_rate": 3.4242339888254825e-05,
"loss": 2.8491,
"step": 95100
},
{
"epoch": 0.32,
"learning_rate": 3.422577031926246e-05,
"loss": 2.8516,
"step": 95200
},
{
"epoch": 0.32,
"learning_rate": 3.420920075027009e-05,
"loss": 2.8719,
"step": 95300
},
{
"epoch": 0.32,
"learning_rate": 3.4192631181277716e-05,
"loss": 2.8452,
"step": 95400
},
{
"epoch": 0.32,
"learning_rate": 3.417606161228534e-05,
"loss": 2.8425,
"step": 95500
},
{
"epoch": 0.32,
"learning_rate": 3.4159492043292974e-05,
"loss": 2.8527,
"step": 95600
},
{
"epoch": 0.32,
"learning_rate": 3.41429224743006e-05,
"loss": 2.8514,
"step": 95700
},
{
"epoch": 0.32,
"learning_rate": 3.4126352905308226e-05,
"loss": 2.8493,
"step": 95800
},
{
"epoch": 0.32,
"learning_rate": 3.410978333631586e-05,
"loss": 2.845,
"step": 95900
},
{
"epoch": 0.32,
"learning_rate": 3.4093213767323484e-05,
"loss": 2.8484,
"step": 96000
},
{
"epoch": 0.32,
"learning_rate": 3.407664419833112e-05,
"loss": 2.83,
"step": 96100
},
{
"epoch": 0.32,
"learning_rate": 3.406007462933874e-05,
"loss": 2.8633,
"step": 96200
},
{
"epoch": 0.32,
"learning_rate": 3.404350506034637e-05,
"loss": 2.8493,
"step": 96300
},
{
"epoch": 0.32,
"learning_rate": 3.4026935491354e-05,
"loss": 2.8451,
"step": 96400
},
{
"epoch": 0.32,
"learning_rate": 3.4010365922361633e-05,
"loss": 2.8621,
"step": 96500
},
{
"epoch": 0.32,
"learning_rate": 3.399379635336926e-05,
"loss": 2.8458,
"step": 96600
},
{
"epoch": 0.32,
"learning_rate": 3.3977226784376885e-05,
"loss": 2.8384,
"step": 96700
},
{
"epoch": 0.32,
"learning_rate": 3.396065721538452e-05,
"loss": 2.8302,
"step": 96800
},
{
"epoch": 0.32,
"learning_rate": 3.3944087646392143e-05,
"loss": 2.8659,
"step": 96900
},
{
"epoch": 0.32,
"learning_rate": 3.392751807739977e-05,
"loss": 2.8388,
"step": 97000
},
{
"epoch": 0.32,
"learning_rate": 3.39109485084074e-05,
"loss": 2.8436,
"step": 97100
},
{
"epoch": 0.32,
"learning_rate": 3.389437893941503e-05,
"loss": 2.8471,
"step": 97200
},
{
"epoch": 0.32,
"learning_rate": 3.387780937042266e-05,
"loss": 2.8463,
"step": 97300
},
{
"epoch": 0.32,
"learning_rate": 3.3861239801430286e-05,
"loss": 2.8327,
"step": 97400
},
{
"epoch": 0.32,
"learning_rate": 3.384467023243791e-05,
"loss": 2.8403,
"step": 97500
},
{
"epoch": 0.32,
"learning_rate": 3.3828100663445545e-05,
"loss": 2.8407,
"step": 97600
},
{
"epoch": 0.32,
"learning_rate": 3.381153109445318e-05,
"loss": 2.848,
"step": 97700
},
{
"epoch": 0.32,
"learning_rate": 3.37949615254608e-05,
"loss": 2.8527,
"step": 97800
},
{
"epoch": 0.32,
"learning_rate": 3.377839195646843e-05,
"loss": 2.8417,
"step": 97900
},
{
"epoch": 0.32,
"learning_rate": 3.376182238747606e-05,
"loss": 2.8398,
"step": 98000
},
{
"epoch": 0.33,
"learning_rate": 3.374525281848369e-05,
"loss": 2.8535,
"step": 98100
},
{
"epoch": 0.33,
"learning_rate": 3.372868324949131e-05,
"loss": 2.8415,
"step": 98200
},
{
"epoch": 0.33,
"learning_rate": 3.3712113680498946e-05,
"loss": 2.8433,
"step": 98300
},
{
"epoch": 0.33,
"learning_rate": 3.369554411150657e-05,
"loss": 2.8308,
"step": 98400
},
{
"epoch": 0.33,
"learning_rate": 3.3678974542514204e-05,
"loss": 2.846,
"step": 98500
},
{
"epoch": 0.33,
"learning_rate": 3.366240497352183e-05,
"loss": 2.8474,
"step": 98600
},
{
"epoch": 0.33,
"learning_rate": 3.3645835404529456e-05,
"loss": 2.8429,
"step": 98700
},
{
"epoch": 0.33,
"learning_rate": 3.362926583553709e-05,
"loss": 2.8545,
"step": 98800
},
{
"epoch": 0.33,
"learning_rate": 3.361269626654472e-05,
"loss": 2.8409,
"step": 98900
},
{
"epoch": 0.33,
"learning_rate": 3.359612669755235e-05,
"loss": 2.8458,
"step": 99000
},
{
"epoch": 0.33,
"learning_rate": 3.357955712855997e-05,
"loss": 2.8503,
"step": 99100
},
{
"epoch": 0.33,
"learning_rate": 3.3562987559567605e-05,
"loss": 2.8399,
"step": 99200
},
{
"epoch": 0.33,
"learning_rate": 3.354641799057523e-05,
"loss": 2.8494,
"step": 99300
},
{
"epoch": 0.33,
"learning_rate": 3.3529848421582863e-05,
"loss": 2.8499,
"step": 99400
},
{
"epoch": 0.33,
"learning_rate": 3.351327885259049e-05,
"loss": 2.831,
"step": 99500
},
{
"epoch": 0.33,
"learning_rate": 3.3496709283598115e-05,
"loss": 2.8477,
"step": 99600
},
{
"epoch": 0.33,
"learning_rate": 3.348013971460575e-05,
"loss": 2.8251,
"step": 99700
},
{
"epoch": 0.33,
"learning_rate": 3.3463570145613374e-05,
"loss": 2.8574,
"step": 99800
},
{
"epoch": 0.33,
"learning_rate": 3.3447000576621e-05,
"loss": 2.8455,
"step": 99900
},
{
"epoch": 0.33,
"learning_rate": 3.343043100762863e-05,
"loss": 2.8298,
"step": 100000
},
{
"epoch": 0.33,
"learning_rate": 3.3413861438636265e-05,
"loss": 2.8527,
"step": 100100
},
{
"epoch": 0.33,
"learning_rate": 3.339729186964389e-05,
"loss": 2.844,
"step": 100200
},
{
"epoch": 0.33,
"learning_rate": 3.3380722300651516e-05,
"loss": 2.8597,
"step": 100300
},
{
"epoch": 0.33,
"learning_rate": 3.336415273165915e-05,
"loss": 2.8442,
"step": 100400
},
{
"epoch": 0.33,
"learning_rate": 3.3347583162666775e-05,
"loss": 2.8554,
"step": 100500
},
{
"epoch": 0.33,
"learning_rate": 3.333101359367441e-05,
"loss": 2.8486,
"step": 100600
},
{
"epoch": 0.33,
"learning_rate": 3.3314444024682026e-05,
"loss": 2.8631,
"step": 100700
},
{
"epoch": 0.33,
"learning_rate": 3.329787445568966e-05,
"loss": 2.8464,
"step": 100800
},
{
"epoch": 0.33,
"learning_rate": 3.328130488669729e-05,
"loss": 2.847,
"step": 100900
},
{
"epoch": 0.33,
"learning_rate": 3.326473531770492e-05,
"loss": 2.8494,
"step": 101000
},
{
"epoch": 0.34,
"learning_rate": 3.324816574871254e-05,
"loss": 2.84,
"step": 101100
},
{
"epoch": 0.34,
"learning_rate": 3.3231596179720176e-05,
"loss": 2.8384,
"step": 101200
},
{
"epoch": 0.34,
"learning_rate": 3.32150266107278e-05,
"loss": 2.8365,
"step": 101300
},
{
"epoch": 0.34,
"learning_rate": 3.3198457041735434e-05,
"loss": 2.8574,
"step": 101400
},
{
"epoch": 0.34,
"learning_rate": 3.318188747274306e-05,
"loss": 2.8581,
"step": 101500
},
{
"epoch": 0.34,
"learning_rate": 3.3165317903750686e-05,
"loss": 2.8495,
"step": 101600
},
{
"epoch": 0.34,
"learning_rate": 3.314874833475832e-05,
"loss": 2.8376,
"step": 101700
},
{
"epoch": 0.34,
"learning_rate": 3.313217876576595e-05,
"loss": 2.853,
"step": 101800
},
{
"epoch": 0.34,
"learning_rate": 3.311560919677357e-05,
"loss": 2.8409,
"step": 101900
},
{
"epoch": 0.34,
"learning_rate": 3.30990396277812e-05,
"loss": 2.8445,
"step": 102000
},
{
"epoch": 0.34,
"learning_rate": 3.3082470058788835e-05,
"loss": 2.8433,
"step": 102100
},
{
"epoch": 0.34,
"learning_rate": 3.306590048979646e-05,
"loss": 2.8389,
"step": 102200
},
{
"epoch": 0.34,
"learning_rate": 3.304933092080409e-05,
"loss": 2.8548,
"step": 102300
},
{
"epoch": 0.34,
"learning_rate": 3.303276135181172e-05,
"loss": 2.8355,
"step": 102400
},
{
"epoch": 0.34,
"learning_rate": 3.3016191782819345e-05,
"loss": 2.8458,
"step": 102500
},
{
"epoch": 0.34,
"learning_rate": 3.299962221382698e-05,
"loss": 2.841,
"step": 102600
},
{
"epoch": 0.34,
"learning_rate": 3.2983052644834604e-05,
"loss": 2.8428,
"step": 102700
},
{
"epoch": 0.34,
"learning_rate": 3.296648307584223e-05,
"loss": 2.8336,
"step": 102800
},
{
"epoch": 0.34,
"learning_rate": 3.294991350684986e-05,
"loss": 2.8532,
"step": 102900
},
{
"epoch": 0.34,
"learning_rate": 3.2933343937857495e-05,
"loss": 2.8434,
"step": 103000
},
{
"epoch": 0.34,
"learning_rate": 3.2916774368865114e-05,
"loss": 2.8419,
"step": 103100
},
{
"epoch": 0.34,
"learning_rate": 3.2900204799872746e-05,
"loss": 2.8456,
"step": 103200
},
{
"epoch": 0.34,
"learning_rate": 3.288363523088038e-05,
"loss": 2.8622,
"step": 103300
},
{
"epoch": 0.34,
"learning_rate": 3.2867065661888005e-05,
"loss": 2.8602,
"step": 103400
},
{
"epoch": 0.34,
"learning_rate": 3.285049609289563e-05,
"loss": 2.849,
"step": 103500
},
{
"epoch": 0.34,
"learning_rate": 3.283392652390326e-05,
"loss": 2.8438,
"step": 103600
},
{
"epoch": 0.34,
"learning_rate": 3.281735695491089e-05,
"loss": 2.843,
"step": 103700
},
{
"epoch": 0.34,
"learning_rate": 3.280078738591852e-05,
"loss": 2.8345,
"step": 103800
},
{
"epoch": 0.34,
"learning_rate": 3.278421781692615e-05,
"loss": 2.843,
"step": 103900
},
{
"epoch": 0.34,
"learning_rate": 3.276764824793377e-05,
"loss": 2.844,
"step": 104000
},
{
"epoch": 0.34,
"learning_rate": 3.2751078678941406e-05,
"loss": 2.8415,
"step": 104100
},
{
"epoch": 0.35,
"learning_rate": 3.273450910994904e-05,
"loss": 2.8367,
"step": 104200
},
{
"epoch": 0.35,
"learning_rate": 3.271793954095666e-05,
"loss": 2.8421,
"step": 104300
},
{
"epoch": 0.35,
"learning_rate": 3.270136997196429e-05,
"loss": 2.8384,
"step": 104400
},
{
"epoch": 0.35,
"learning_rate": 3.268480040297192e-05,
"loss": 2.8247,
"step": 104500
},
{
"epoch": 0.35,
"learning_rate": 3.266823083397955e-05,
"loss": 2.8483,
"step": 104600
},
{
"epoch": 0.35,
"learning_rate": 3.2651661264987174e-05,
"loss": 2.8405,
"step": 104700
},
{
"epoch": 0.35,
"learning_rate": 3.263509169599481e-05,
"loss": 2.8387,
"step": 104800
},
{
"epoch": 0.35,
"learning_rate": 3.261852212700243e-05,
"loss": 2.8201,
"step": 104900
},
{
"epoch": 0.35,
"learning_rate": 3.2601952558010065e-05,
"loss": 2.8507,
"step": 105000
},
{
"epoch": 0.35,
"learning_rate": 3.258538298901769e-05,
"loss": 2.8452,
"step": 105100
},
{
"epoch": 0.35,
"learning_rate": 3.256881342002532e-05,
"loss": 2.8428,
"step": 105200
},
{
"epoch": 0.35,
"learning_rate": 3.255224385103295e-05,
"loss": 2.8485,
"step": 105300
},
{
"epoch": 0.35,
"learning_rate": 3.253567428204058e-05,
"loss": 2.8477,
"step": 105400
},
{
"epoch": 0.35,
"learning_rate": 3.25191047130482e-05,
"loss": 2.8372,
"step": 105500
},
{
"epoch": 0.35,
"learning_rate": 3.2502535144055834e-05,
"loss": 2.8458,
"step": 105600
},
{
"epoch": 0.35,
"learning_rate": 3.2485965575063466e-05,
"loss": 2.8499,
"step": 105700
},
{
"epoch": 0.35,
"learning_rate": 3.246939600607109e-05,
"loss": 2.8545,
"step": 105800
},
{
"epoch": 0.35,
"learning_rate": 3.245282643707872e-05,
"loss": 2.8399,
"step": 105900
},
{
"epoch": 0.35,
"learning_rate": 3.243625686808635e-05,
"loss": 2.8495,
"step": 106000
},
{
"epoch": 0.35,
"learning_rate": 3.2419687299093976e-05,
"loss": 2.8477,
"step": 106100
},
{
"epoch": 0.35,
"learning_rate": 3.240311773010161e-05,
"loss": 2.8534,
"step": 106200
},
{
"epoch": 0.35,
"learning_rate": 3.2386548161109235e-05,
"loss": 2.8408,
"step": 106300
},
{
"epoch": 0.35,
"learning_rate": 3.236997859211686e-05,
"loss": 2.8523,
"step": 106400
},
{
"epoch": 0.35,
"learning_rate": 3.235340902312449e-05,
"loss": 2.8364,
"step": 106500
},
{
"epoch": 0.35,
"learning_rate": 3.2336839454132126e-05,
"loss": 2.8539,
"step": 106600
},
{
"epoch": 0.35,
"learning_rate": 3.2320269885139745e-05,
"loss": 2.8271,
"step": 106700
},
{
"epoch": 0.35,
"learning_rate": 3.230370031614738e-05,
"loss": 2.8632,
"step": 106800
},
{
"epoch": 0.35,
"learning_rate": 3.228713074715501e-05,
"loss": 2.8406,
"step": 106900
},
{
"epoch": 0.35,
"learning_rate": 3.2270561178162636e-05,
"loss": 2.8529,
"step": 107000
},
{
"epoch": 0.35,
"learning_rate": 3.225399160917026e-05,
"loss": 2.8446,
"step": 107100
},
{
"epoch": 0.36,
"learning_rate": 3.2237422040177894e-05,
"loss": 2.8441,
"step": 107200
},
{
"epoch": 0.36,
"learning_rate": 3.222085247118552e-05,
"loss": 2.8318,
"step": 107300
},
{
"epoch": 0.36,
"learning_rate": 3.220428290219315e-05,
"loss": 2.8375,
"step": 107400
},
{
"epoch": 0.36,
"learning_rate": 3.218771333320078e-05,
"loss": 2.8459,
"step": 107500
},
{
"epoch": 0.36,
"learning_rate": 3.2171143764208404e-05,
"loss": 2.839,
"step": 107600
},
{
"epoch": 0.36,
"learning_rate": 3.215457419521604e-05,
"loss": 2.8474,
"step": 107700
},
{
"epoch": 0.36,
"learning_rate": 3.213800462622367e-05,
"loss": 2.8363,
"step": 107800
},
{
"epoch": 0.36,
"learning_rate": 3.212143505723129e-05,
"loss": 2.8383,
"step": 107900
},
{
"epoch": 0.36,
"learning_rate": 3.210486548823892e-05,
"loss": 2.8554,
"step": 108000
},
{
"epoch": 0.36,
"learning_rate": 3.2088295919246554e-05,
"loss": 2.832,
"step": 108100
},
{
"epoch": 0.36,
"learning_rate": 3.207172635025418e-05,
"loss": 2.8201,
"step": 108200
},
{
"epoch": 0.36,
"learning_rate": 3.2055156781261805e-05,
"loss": 2.8435,
"step": 108300
},
{
"epoch": 0.36,
"learning_rate": 3.203858721226944e-05,
"loss": 2.8481,
"step": 108400
},
{
"epoch": 0.36,
"learning_rate": 3.2022017643277064e-05,
"loss": 2.8325,
"step": 108500
},
{
"epoch": 0.36,
"learning_rate": 3.2005448074284696e-05,
"loss": 2.837,
"step": 108600
},
{
"epoch": 0.36,
"learning_rate": 3.198887850529232e-05,
"loss": 2.8338,
"step": 108700
},
{
"epoch": 0.36,
"learning_rate": 3.197230893629995e-05,
"loss": 2.8486,
"step": 108800
},
{
"epoch": 0.36,
"learning_rate": 3.195573936730758e-05,
"loss": 2.8465,
"step": 108900
},
{
"epoch": 0.36,
"learning_rate": 3.193916979831521e-05,
"loss": 2.8357,
"step": 109000
},
{
"epoch": 0.36,
"learning_rate": 3.192260022932284e-05,
"loss": 2.8242,
"step": 109100
},
{
"epoch": 0.36,
"learning_rate": 3.1906030660330465e-05,
"loss": 2.8466,
"step": 109200
},
{
"epoch": 0.36,
"learning_rate": 3.188946109133809e-05,
"loss": 2.8479,
"step": 109300
},
{
"epoch": 0.36,
"learning_rate": 3.187289152234572e-05,
"loss": 2.8449,
"step": 109400
},
{
"epoch": 0.36,
"learning_rate": 3.185632195335335e-05,
"loss": 2.8362,
"step": 109500
},
{
"epoch": 0.36,
"learning_rate": 3.1839752384360975e-05,
"loss": 2.8333,
"step": 109600
},
{
"epoch": 0.36,
"learning_rate": 3.182318281536861e-05,
"loss": 2.8464,
"step": 109700
},
{
"epoch": 0.36,
"learning_rate": 3.180661324637624e-05,
"loss": 2.8468,
"step": 109800
},
{
"epoch": 0.36,
"learning_rate": 3.1790043677383866e-05,
"loss": 2.8407,
"step": 109900
},
{
"epoch": 0.36,
"learning_rate": 3.177347410839149e-05,
"loss": 2.8292,
"step": 110000
},
{
"epoch": 0.36,
"learning_rate": 3.1756904539399124e-05,
"loss": 2.8328,
"step": 110100
},
{
"epoch": 0.37,
"learning_rate": 3.174033497040675e-05,
"loss": 2.8421,
"step": 110200
},
{
"epoch": 0.37,
"learning_rate": 3.172376540141438e-05,
"loss": 2.8359,
"step": 110300
},
{
"epoch": 0.37,
"learning_rate": 3.170719583242201e-05,
"loss": 2.8288,
"step": 110400
},
{
"epoch": 0.37,
"learning_rate": 3.1690626263429634e-05,
"loss": 2.8271,
"step": 110500
},
{
"epoch": 0.37,
"learning_rate": 3.167405669443727e-05,
"loss": 2.8247,
"step": 110600
},
{
"epoch": 0.37,
"learning_rate": 3.165748712544489e-05,
"loss": 2.8495,
"step": 110700
},
{
"epoch": 0.37,
"learning_rate": 3.164091755645252e-05,
"loss": 2.8396,
"step": 110800
},
{
"epoch": 0.37,
"learning_rate": 3.162434798746015e-05,
"loss": 2.8421,
"step": 110900
},
{
"epoch": 0.37,
"learning_rate": 3.1607778418467784e-05,
"loss": 2.8576,
"step": 111000
},
{
"epoch": 0.37,
"learning_rate": 3.159120884947541e-05,
"loss": 2.8284,
"step": 111100
},
{
"epoch": 0.37,
"learning_rate": 3.1574639280483035e-05,
"loss": 2.8273,
"step": 111200
},
{
"epoch": 0.37,
"learning_rate": 3.155806971149067e-05,
"loss": 2.8576,
"step": 111300
},
{
"epoch": 0.37,
"learning_rate": 3.1541500142498294e-05,
"loss": 2.8418,
"step": 111400
},
{
"epoch": 0.37,
"learning_rate": 3.1524930573505926e-05,
"loss": 2.8357,
"step": 111500
},
{
"epoch": 0.37,
"learning_rate": 3.150836100451355e-05,
"loss": 2.8583,
"step": 111600
},
{
"epoch": 0.37,
"learning_rate": 3.149179143552118e-05,
"loss": 2.8411,
"step": 111700
},
{
"epoch": 0.37,
"learning_rate": 3.147522186652881e-05,
"loss": 2.8496,
"step": 111800
},
{
"epoch": 0.37,
"learning_rate": 3.1458652297536436e-05,
"loss": 2.841,
"step": 111900
},
{
"epoch": 0.37,
"learning_rate": 3.144208272854406e-05,
"loss": 2.856,
"step": 112000
},
{
"epoch": 0.37,
"learning_rate": 3.1425513159551695e-05,
"loss": 2.8418,
"step": 112100
},
{
"epoch": 0.37,
"learning_rate": 3.140894359055933e-05,
"loss": 2.842,
"step": 112200
},
{
"epoch": 0.37,
"learning_rate": 3.139237402156695e-05,
"loss": 2.8444,
"step": 112300
},
{
"epoch": 0.37,
"learning_rate": 3.137580445257458e-05,
"loss": 2.8361,
"step": 112400
},
{
"epoch": 0.37,
"learning_rate": 3.135923488358221e-05,
"loss": 2.844,
"step": 112500
},
{
"epoch": 0.37,
"learning_rate": 3.134266531458984e-05,
"loss": 2.8419,
"step": 112600
},
{
"epoch": 0.37,
"learning_rate": 3.132609574559747e-05,
"loss": 2.8437,
"step": 112700
},
{
"epoch": 0.37,
"learning_rate": 3.1309526176605096e-05,
"loss": 2.8431,
"step": 112800
},
{
"epoch": 0.37,
"learning_rate": 3.129295660761272e-05,
"loss": 2.8317,
"step": 112900
},
{
"epoch": 0.37,
"learning_rate": 3.1276387038620354e-05,
"loss": 2.8387,
"step": 113000
},
{
"epoch": 0.37,
"learning_rate": 3.125981746962798e-05,
"loss": 2.8381,
"step": 113100
},
{
"epoch": 0.38,
"learning_rate": 3.1243247900635606e-05,
"loss": 2.827,
"step": 113200
},
{
"epoch": 0.38,
"learning_rate": 3.122667833164324e-05,
"loss": 2.8488,
"step": 113300
},
{
"epoch": 0.38,
"learning_rate": 3.121010876265087e-05,
"loss": 2.8355,
"step": 113400
},
{
"epoch": 0.38,
"learning_rate": 3.11935391936585e-05,
"loss": 2.8337,
"step": 113500
},
{
"epoch": 0.38,
"learning_rate": 3.117696962466612e-05,
"loss": 2.8341,
"step": 113600
},
{
"epoch": 0.38,
"learning_rate": 3.1160400055673755e-05,
"loss": 2.8391,
"step": 113700
},
{
"epoch": 0.38,
"learning_rate": 3.114383048668138e-05,
"loss": 2.8331,
"step": 113800
},
{
"epoch": 0.38,
"learning_rate": 3.1127260917689014e-05,
"loss": 2.8345,
"step": 113900
},
{
"epoch": 0.38,
"learning_rate": 3.111069134869664e-05,
"loss": 2.8419,
"step": 114000
},
{
"epoch": 0.38,
"learning_rate": 3.1094121779704265e-05,
"loss": 2.8514,
"step": 114100
},
{
"epoch": 0.38,
"learning_rate": 3.10775522107119e-05,
"loss": 2.8414,
"step": 114200
},
{
"epoch": 0.38,
"learning_rate": 3.106098264171953e-05,
"loss": 2.8489,
"step": 114300
},
{
"epoch": 0.38,
"learning_rate": 3.104441307272715e-05,
"loss": 2.8364,
"step": 114400
},
{
"epoch": 0.38,
"learning_rate": 3.102784350373478e-05,
"loss": 2.8542,
"step": 114500
},
{
"epoch": 0.38,
"learning_rate": 3.1011273934742415e-05,
"loss": 2.842,
"step": 114600
},
{
"epoch": 0.38,
"learning_rate": 3.099470436575004e-05,
"loss": 2.8364,
"step": 114700
},
{
"epoch": 0.38,
"learning_rate": 3.0978134796757666e-05,
"loss": 2.8413,
"step": 114800
},
{
"epoch": 0.38,
"learning_rate": 3.09615652277653e-05,
"loss": 2.8456,
"step": 114900
},
{
"epoch": 0.38,
"learning_rate": 3.0944995658772925e-05,
"loss": 2.8296,
"step": 115000
},
{
"epoch": 0.38,
"learning_rate": 3.092842608978056e-05,
"loss": 2.8262,
"step": 115100
},
{
"epoch": 0.38,
"learning_rate": 3.091185652078818e-05,
"loss": 2.8265,
"step": 115200
},
{
"epoch": 0.38,
"learning_rate": 3.089528695179581e-05,
"loss": 2.8459,
"step": 115300
},
{
"epoch": 0.38,
"learning_rate": 3.087871738280344e-05,
"loss": 2.8488,
"step": 115400
},
{
"epoch": 0.38,
"learning_rate": 3.0862147813811074e-05,
"loss": 2.8378,
"step": 115500
},
{
"epoch": 0.38,
"learning_rate": 3.084557824481869e-05,
"loss": 2.852,
"step": 115600
},
{
"epoch": 0.38,
"learning_rate": 3.0829008675826326e-05,
"loss": 2.8356,
"step": 115700
},
{
"epoch": 0.38,
"learning_rate": 3.081243910683396e-05,
"loss": 2.8524,
"step": 115800
},
{
"epoch": 0.38,
"learning_rate": 3.0795869537841584e-05,
"loss": 2.8431,
"step": 115900
},
{
"epoch": 0.38,
"learning_rate": 3.077929996884921e-05,
"loss": 2.8301,
"step": 116000
},
{
"epoch": 0.38,
"learning_rate": 3.076273039985684e-05,
"loss": 2.8415,
"step": 116100
},
{
"epoch": 0.39,
"learning_rate": 3.074616083086447e-05,
"loss": 2.8234,
"step": 116200
},
{
"epoch": 0.39,
"learning_rate": 3.07295912618721e-05,
"loss": 2.8285,
"step": 116300
},
{
"epoch": 0.39,
"learning_rate": 3.071302169287973e-05,
"loss": 2.8434,
"step": 116400
},
{
"epoch": 0.39,
"learning_rate": 3.069645212388735e-05,
"loss": 2.8278,
"step": 116500
},
{
"epoch": 0.39,
"learning_rate": 3.0679882554894985e-05,
"loss": 2.8476,
"step": 116600
},
{
"epoch": 0.39,
"learning_rate": 3.066331298590262e-05,
"loss": 2.8529,
"step": 116700
},
{
"epoch": 0.39,
"learning_rate": 3.064674341691024e-05,
"loss": 2.8309,
"step": 116800
},
{
"epoch": 0.39,
"learning_rate": 3.063017384791787e-05,
"loss": 2.8577,
"step": 116900
},
{
"epoch": 0.39,
"learning_rate": 3.06136042789255e-05,
"loss": 2.8411,
"step": 117000
},
{
"epoch": 0.39,
"learning_rate": 3.059703470993313e-05,
"loss": 2.8521,
"step": 117100
},
{
"epoch": 0.39,
"learning_rate": 3.0580465140940754e-05,
"loss": 2.8317,
"step": 117200
},
{
"epoch": 0.39,
"learning_rate": 3.0563895571948386e-05,
"loss": 2.8491,
"step": 117300
},
{
"epoch": 0.39,
"learning_rate": 3.054732600295601e-05,
"loss": 2.8299,
"step": 117400
},
{
"epoch": 0.39,
"learning_rate": 3.0530756433963645e-05,
"loss": 2.8404,
"step": 117500
},
{
"epoch": 0.39,
"learning_rate": 3.0514186864971274e-05,
"loss": 2.841,
"step": 117600
},
{
"epoch": 0.39,
"learning_rate": 3.0497617295978896e-05,
"loss": 2.8363,
"step": 117700
},
{
"epoch": 0.39,
"learning_rate": 3.048104772698653e-05,
"loss": 2.8372,
"step": 117800
},
{
"epoch": 0.39,
"learning_rate": 3.046447815799415e-05,
"loss": 2.8482,
"step": 117900
},
{
"epoch": 0.39,
"learning_rate": 3.0447908589001784e-05,
"loss": 2.8319,
"step": 118000
},
{
"epoch": 0.39,
"learning_rate": 3.0431339020009413e-05,
"loss": 2.8296,
"step": 118100
},
{
"epoch": 0.39,
"learning_rate": 3.041476945101704e-05,
"loss": 2.8455,
"step": 118200
},
{
"epoch": 0.39,
"learning_rate": 3.0398199882024668e-05,
"loss": 2.8366,
"step": 118300
},
{
"epoch": 0.39,
"learning_rate": 3.03816303130323e-05,
"loss": 2.8465,
"step": 118400
},
{
"epoch": 0.39,
"learning_rate": 3.0365060744039927e-05,
"loss": 2.8358,
"step": 118500
},
{
"epoch": 0.39,
"learning_rate": 3.0348491175047556e-05,
"loss": 2.8384,
"step": 118600
},
{
"epoch": 0.39,
"learning_rate": 3.0331921606055185e-05,
"loss": 2.8338,
"step": 118700
},
{
"epoch": 0.39,
"learning_rate": 3.031535203706281e-05,
"loss": 2.8341,
"step": 118800
},
{
"epoch": 0.39,
"learning_rate": 3.029878246807044e-05,
"loss": 2.8467,
"step": 118900
},
{
"epoch": 0.39,
"learning_rate": 3.0282212899078073e-05,
"loss": 2.8278,
"step": 119000
},
{
"epoch": 0.39,
"learning_rate": 3.02656433300857e-05,
"loss": 2.8334,
"step": 119100
},
{
"epoch": 0.4,
"learning_rate": 3.0249073761093328e-05,
"loss": 2.8375,
"step": 119200
},
{
"epoch": 0.4,
"learning_rate": 3.0232504192100957e-05,
"loss": 2.8375,
"step": 119300
},
{
"epoch": 0.4,
"learning_rate": 3.0215934623108583e-05,
"loss": 2.843,
"step": 119400
},
{
"epoch": 0.4,
"learning_rate": 3.0199365054116212e-05,
"loss": 2.8349,
"step": 119500
},
{
"epoch": 0.4,
"learning_rate": 3.0182795485123844e-05,
"loss": 2.8297,
"step": 119600
},
{
"epoch": 0.4,
"learning_rate": 3.016622591613147e-05,
"loss": 2.8371,
"step": 119700
},
{
"epoch": 0.4,
"learning_rate": 3.01496563471391e-05,
"loss": 2.8468,
"step": 119800
},
{
"epoch": 0.4,
"learning_rate": 3.013308677814673e-05,
"loss": 2.823,
"step": 119900
},
{
"epoch": 0.4,
"learning_rate": 3.0116517209154354e-05,
"loss": 2.8514,
"step": 120000
},
{
"epoch": 0.4,
"learning_rate": 3.0099947640161984e-05,
"loss": 2.8213,
"step": 120100
},
{
"epoch": 0.4,
"learning_rate": 3.0083378071169616e-05,
"loss": 2.8326,
"step": 120200
},
{
"epoch": 0.4,
"learning_rate": 3.0066808502177242e-05,
"loss": 2.8573,
"step": 120300
},
{
"epoch": 0.4,
"learning_rate": 3.005023893318487e-05,
"loss": 2.8307,
"step": 120400
},
{
"epoch": 0.4,
"learning_rate": 3.00336693641925e-05,
"loss": 2.8461,
"step": 120500
},
{
"epoch": 0.4,
"learning_rate": 3.0017099795200126e-05,
"loss": 2.8449,
"step": 120600
},
{
"epoch": 0.4,
"learning_rate": 3.0000530226207755e-05,
"loss": 2.8323,
"step": 120700
},
{
"epoch": 0.4,
"learning_rate": 2.9983960657215388e-05,
"loss": 2.8442,
"step": 120800
},
{
"epoch": 0.4,
"learning_rate": 2.9967391088223014e-05,
"loss": 2.8413,
"step": 120900
},
{
"epoch": 0.4,
"learning_rate": 2.9950821519230643e-05,
"loss": 2.8336,
"step": 121000
},
{
"epoch": 0.4,
"learning_rate": 2.9934251950238272e-05,
"loss": 2.8342,
"step": 121100
},
{
"epoch": 0.4,
"learning_rate": 2.9917682381245898e-05,
"loss": 2.8279,
"step": 121200
},
{
"epoch": 0.4,
"learning_rate": 2.9901112812253527e-05,
"loss": 2.8509,
"step": 121300
},
{
"epoch": 0.4,
"learning_rate": 2.988454324326116e-05,
"loss": 2.834,
"step": 121400
},
{
"epoch": 0.4,
"learning_rate": 2.9867973674268786e-05,
"loss": 2.8231,
"step": 121500
},
{
"epoch": 0.4,
"learning_rate": 2.9851404105276415e-05,
"loss": 2.827,
"step": 121600
},
{
"epoch": 0.4,
"learning_rate": 2.9834834536284044e-05,
"loss": 2.8354,
"step": 121700
},
{
"epoch": 0.4,
"learning_rate": 2.981826496729167e-05,
"loss": 2.8452,
"step": 121800
},
{
"epoch": 0.4,
"learning_rate": 2.98016953982993e-05,
"loss": 2.8261,
"step": 121900
},
{
"epoch": 0.4,
"learning_rate": 2.9785125829306932e-05,
"loss": 2.842,
"step": 122000
},
{
"epoch": 0.4,
"learning_rate": 2.9768556260314558e-05,
"loss": 2.821,
"step": 122100
},
{
"epoch": 0.4,
"learning_rate": 2.9751986691322187e-05,
"loss": 2.8456,
"step": 122200
},
{
"epoch": 0.41,
"learning_rate": 2.9735417122329816e-05,
"loss": 2.8458,
"step": 122300
},
{
"epoch": 0.41,
"learning_rate": 2.9718847553337442e-05,
"loss": 2.8413,
"step": 122400
},
{
"epoch": 0.41,
"learning_rate": 2.970227798434507e-05,
"loss": 2.8622,
"step": 122500
},
{
"epoch": 0.41,
"learning_rate": 2.9685708415352704e-05,
"loss": 2.8429,
"step": 122600
},
{
"epoch": 0.41,
"learning_rate": 2.966913884636033e-05,
"loss": 2.8433,
"step": 122700
},
{
"epoch": 0.41,
"learning_rate": 2.965256927736796e-05,
"loss": 2.8389,
"step": 122800
},
{
"epoch": 0.41,
"learning_rate": 2.9635999708375588e-05,
"loss": 2.8354,
"step": 122900
},
{
"epoch": 0.41,
"learning_rate": 2.9619430139383214e-05,
"loss": 2.8333,
"step": 123000
},
{
"epoch": 0.41,
"learning_rate": 2.9602860570390846e-05,
"loss": 2.8328,
"step": 123100
},
{
"epoch": 0.41,
"learning_rate": 2.9586291001398475e-05,
"loss": 2.8379,
"step": 123200
},
{
"epoch": 0.41,
"learning_rate": 2.95697214324061e-05,
"loss": 2.8322,
"step": 123300
},
{
"epoch": 0.41,
"learning_rate": 2.955315186341373e-05,
"loss": 2.8295,
"step": 123400
},
{
"epoch": 0.41,
"learning_rate": 2.953658229442136e-05,
"loss": 2.8453,
"step": 123500
},
{
"epoch": 0.41,
"learning_rate": 2.9520012725428986e-05,
"loss": 2.8332,
"step": 123600
},
{
"epoch": 0.41,
"learning_rate": 2.9503443156436618e-05,
"loss": 2.8338,
"step": 123700
},
{
"epoch": 0.41,
"learning_rate": 2.9486873587444247e-05,
"loss": 2.8348,
"step": 123800
},
{
"epoch": 0.41,
"learning_rate": 2.9470304018451873e-05,
"loss": 2.8259,
"step": 123900
},
{
"epoch": 0.41,
"learning_rate": 2.9453734449459502e-05,
"loss": 2.827,
"step": 124000
},
{
"epoch": 0.41,
"learning_rate": 2.943716488046713e-05,
"loss": 2.838,
"step": 124100
},
{
"epoch": 0.41,
"learning_rate": 2.9420595311474757e-05,
"loss": 2.8323,
"step": 124200
},
{
"epoch": 0.41,
"learning_rate": 2.940402574248239e-05,
"loss": 2.831,
"step": 124300
},
{
"epoch": 0.41,
"learning_rate": 2.938745617349002e-05,
"loss": 2.8175,
"step": 124400
},
{
"epoch": 0.41,
"learning_rate": 2.9370886604497645e-05,
"loss": 2.8344,
"step": 124500
},
{
"epoch": 0.41,
"learning_rate": 2.9354317035505274e-05,
"loss": 2.8475,
"step": 124600
},
{
"epoch": 0.41,
"learning_rate": 2.9337747466512903e-05,
"loss": 2.8395,
"step": 124700
},
{
"epoch": 0.41,
"learning_rate": 2.932117789752053e-05,
"loss": 2.8426,
"step": 124800
},
{
"epoch": 0.41,
"learning_rate": 2.9304608328528162e-05,
"loss": 2.8458,
"step": 124900
},
{
"epoch": 0.41,
"learning_rate": 2.928803875953579e-05,
"loss": 2.8286,
"step": 125000
},
{
"epoch": 0.41,
"learning_rate": 2.9271469190543417e-05,
"loss": 2.828,
"step": 125100
},
{
"epoch": 0.41,
"learning_rate": 2.9254899621551046e-05,
"loss": 2.8445,
"step": 125200
},
{
"epoch": 0.42,
"learning_rate": 2.9238330052558675e-05,
"loss": 2.82,
"step": 125300
},
{
"epoch": 0.42,
"learning_rate": 2.92217604835663e-05,
"loss": 2.8364,
"step": 125400
},
{
"epoch": 0.42,
"learning_rate": 2.9205190914573934e-05,
"loss": 2.8505,
"step": 125500
},
{
"epoch": 0.42,
"learning_rate": 2.9188621345581563e-05,
"loss": 2.8237,
"step": 125600
},
{
"epoch": 0.42,
"learning_rate": 2.917205177658919e-05,
"loss": 2.8371,
"step": 125700
},
{
"epoch": 0.42,
"learning_rate": 2.9155482207596818e-05,
"loss": 2.8295,
"step": 125800
},
{
"epoch": 0.42,
"learning_rate": 2.9138912638604447e-05,
"loss": 2.8319,
"step": 125900
},
{
"epoch": 0.42,
"learning_rate": 2.9122343069612073e-05,
"loss": 2.846,
"step": 126000
},
{
"epoch": 0.42,
"learning_rate": 2.9105773500619705e-05,
"loss": 2.8466,
"step": 126100
},
{
"epoch": 0.42,
"learning_rate": 2.9089203931627335e-05,
"loss": 2.8345,
"step": 126200
},
{
"epoch": 0.42,
"learning_rate": 2.907263436263496e-05,
"loss": 2.8401,
"step": 126300
},
{
"epoch": 0.42,
"learning_rate": 2.905606479364259e-05,
"loss": 2.8247,
"step": 126400
},
{
"epoch": 0.42,
"learning_rate": 2.903949522465022e-05,
"loss": 2.847,
"step": 126500
},
{
"epoch": 0.42,
"learning_rate": 2.9022925655657845e-05,
"loss": 2.8422,
"step": 126600
},
{
"epoch": 0.42,
"learning_rate": 2.9006356086665477e-05,
"loss": 2.8295,
"step": 126700
},
{
"epoch": 0.42,
"learning_rate": 2.89897865176731e-05,
"loss": 2.8363,
"step": 126800
},
{
"epoch": 0.42,
"learning_rate": 2.8973216948680732e-05,
"loss": 2.8352,
"step": 126900
},
{
"epoch": 0.42,
"learning_rate": 2.895664737968836e-05,
"loss": 2.8338,
"step": 127000
},
{
"epoch": 0.42,
"learning_rate": 2.8940077810695987e-05,
"loss": 2.8169,
"step": 127100
},
{
"epoch": 0.42,
"learning_rate": 2.8923508241703617e-05,
"loss": 2.8349,
"step": 127200
},
{
"epoch": 0.42,
"learning_rate": 2.890693867271125e-05,
"loss": 2.829,
"step": 127300
},
{
"epoch": 0.42,
"learning_rate": 2.889036910371887e-05,
"loss": 2.8273,
"step": 127400
},
{
"epoch": 0.42,
"learning_rate": 2.8873799534726504e-05,
"loss": 2.8445,
"step": 127500
},
{
"epoch": 0.42,
"learning_rate": 2.8857229965734133e-05,
"loss": 2.838,
"step": 127600
},
{
"epoch": 0.42,
"learning_rate": 2.884066039674176e-05,
"loss": 2.8432,
"step": 127700
},
{
"epoch": 0.42,
"learning_rate": 2.882409082774939e-05,
"loss": 2.8257,
"step": 127800
},
{
"epoch": 0.42,
"learning_rate": 2.880752125875702e-05,
"loss": 2.8406,
"step": 127900
},
{
"epoch": 0.42,
"learning_rate": 2.8790951689764643e-05,
"loss": 2.8379,
"step": 128000
},
{
"epoch": 0.42,
"learning_rate": 2.8774382120772276e-05,
"loss": 2.8102,
"step": 128100
},
{
"epoch": 0.42,
"learning_rate": 2.8757812551779905e-05,
"loss": 2.8394,
"step": 128200
},
{
"epoch": 0.43,
"learning_rate": 2.874124298278753e-05,
"loss": 2.8307,
"step": 128300
},
{
"epoch": 0.43,
"learning_rate": 2.872467341379516e-05,
"loss": 2.8361,
"step": 128400
},
{
"epoch": 0.43,
"learning_rate": 2.8708103844802793e-05,
"loss": 2.8257,
"step": 128500
},
{
"epoch": 0.43,
"learning_rate": 2.8691534275810415e-05,
"loss": 2.8246,
"step": 128600
},
{
"epoch": 0.43,
"learning_rate": 2.8674964706818048e-05,
"loss": 2.8294,
"step": 128700
},
{
"epoch": 0.43,
"learning_rate": 2.8658395137825677e-05,
"loss": 2.8345,
"step": 128800
},
{
"epoch": 0.43,
"learning_rate": 2.8641825568833303e-05,
"loss": 2.8251,
"step": 128900
},
{
"epoch": 0.43,
"learning_rate": 2.8625255999840932e-05,
"loss": 2.8363,
"step": 129000
},
{
"epoch": 0.43,
"learning_rate": 2.8608686430848565e-05,
"loss": 2.8352,
"step": 129100
},
{
"epoch": 0.43,
"learning_rate": 2.8592116861856187e-05,
"loss": 2.8412,
"step": 129200
},
{
"epoch": 0.43,
"learning_rate": 2.857554729286382e-05,
"loss": 2.8294,
"step": 129300
},
{
"epoch": 0.43,
"learning_rate": 2.855897772387145e-05,
"loss": 2.8406,
"step": 129400
},
{
"epoch": 0.43,
"learning_rate": 2.8542408154879075e-05,
"loss": 2.8272,
"step": 129500
},
{
"epoch": 0.43,
"learning_rate": 2.8525838585886704e-05,
"loss": 2.8418,
"step": 129600
},
{
"epoch": 0.43,
"learning_rate": 2.8509269016894337e-05,
"loss": 2.823,
"step": 129700
},
{
"epoch": 0.43,
"learning_rate": 2.849269944790196e-05,
"loss": 2.8424,
"step": 129800
},
{
"epoch": 0.43,
"learning_rate": 2.847612987890959e-05,
"loss": 2.8401,
"step": 129900
},
{
"epoch": 0.43,
"learning_rate": 2.845956030991722e-05,
"loss": 2.818,
"step": 130000
},
{
"epoch": 0.43,
"learning_rate": 2.8442990740924847e-05,
"loss": 2.828,
"step": 130100
},
{
"epoch": 0.43,
"learning_rate": 2.8426421171932476e-05,
"loss": 2.8356,
"step": 130200
},
{
"epoch": 0.43,
"learning_rate": 2.840985160294011e-05,
"loss": 2.8295,
"step": 130300
},
{
"epoch": 0.43,
"learning_rate": 2.839328203394773e-05,
"loss": 2.8351,
"step": 130400
},
{
"epoch": 0.43,
"learning_rate": 2.8376712464955363e-05,
"loss": 2.8332,
"step": 130500
},
{
"epoch": 0.43,
"learning_rate": 2.8360142895962993e-05,
"loss": 2.8381,
"step": 130600
},
{
"epoch": 0.43,
"learning_rate": 2.834357332697062e-05,
"loss": 2.8367,
"step": 130700
},
{
"epoch": 0.43,
"learning_rate": 2.8327003757978248e-05,
"loss": 2.8397,
"step": 130800
},
{
"epoch": 0.43,
"learning_rate": 2.831043418898588e-05,
"loss": 2.8354,
"step": 130900
},
{
"epoch": 0.43,
"learning_rate": 2.8293864619993503e-05,
"loss": 2.8172,
"step": 131000
},
{
"epoch": 0.43,
"learning_rate": 2.8277295051001135e-05,
"loss": 2.84,
"step": 131100
},
{
"epoch": 0.43,
"learning_rate": 2.8260725482008764e-05,
"loss": 2.837,
"step": 131200
},
{
"epoch": 0.44,
"learning_rate": 2.824415591301639e-05,
"loss": 2.8355,
"step": 131300
},
{
"epoch": 0.44,
"learning_rate": 2.822758634402402e-05,
"loss": 2.8155,
"step": 131400
},
{
"epoch": 0.44,
"learning_rate": 2.8211016775031652e-05,
"loss": 2.8266,
"step": 131500
},
{
"epoch": 0.44,
"learning_rate": 2.8194447206039275e-05,
"loss": 2.8401,
"step": 131600
},
{
"epoch": 0.44,
"learning_rate": 2.8177877637046907e-05,
"loss": 2.8414,
"step": 131700
},
{
"epoch": 0.44,
"learning_rate": 2.8161308068054536e-05,
"loss": 2.8338,
"step": 131800
},
{
"epoch": 0.44,
"learning_rate": 2.8144738499062162e-05,
"loss": 2.8218,
"step": 131900
},
{
"epoch": 0.44,
"learning_rate": 2.812816893006979e-05,
"loss": 2.832,
"step": 132000
},
{
"epoch": 0.44,
"learning_rate": 2.8111599361077424e-05,
"loss": 2.8353,
"step": 132100
},
{
"epoch": 0.44,
"learning_rate": 2.8095029792085046e-05,
"loss": 2.8454,
"step": 132200
},
{
"epoch": 0.44,
"learning_rate": 2.807846022309268e-05,
"loss": 2.8537,
"step": 132300
},
{
"epoch": 0.44,
"learning_rate": 2.8061890654100308e-05,
"loss": 2.8305,
"step": 132400
},
{
"epoch": 0.44,
"learning_rate": 2.8045321085107934e-05,
"loss": 2.8388,
"step": 132500
},
{
"epoch": 0.44,
"learning_rate": 2.8028751516115563e-05,
"loss": 2.8421,
"step": 132600
},
{
"epoch": 0.44,
"learning_rate": 2.8012181947123196e-05,
"loss": 2.8072,
"step": 132700
},
{
"epoch": 0.44,
"learning_rate": 2.799561237813082e-05,
"loss": 2.8349,
"step": 132800
},
{
"epoch": 0.44,
"learning_rate": 2.797904280913845e-05,
"loss": 2.812,
"step": 132900
},
{
"epoch": 0.44,
"learning_rate": 2.796247324014608e-05,
"loss": 2.8223,
"step": 133000
},
{
"epoch": 0.44,
"learning_rate": 2.7945903671153706e-05,
"loss": 2.8384,
"step": 133100
},
{
"epoch": 0.44,
"learning_rate": 2.7929334102161335e-05,
"loss": 2.8356,
"step": 133200
},
{
"epoch": 0.44,
"learning_rate": 2.7912764533168968e-05,
"loss": 2.8278,
"step": 133300
},
{
"epoch": 0.44,
"learning_rate": 2.7896194964176593e-05,
"loss": 2.8323,
"step": 133400
},
{
"epoch": 0.44,
"learning_rate": 2.7879625395184223e-05,
"loss": 2.8317,
"step": 133500
},
{
"epoch": 0.44,
"learning_rate": 2.7863055826191852e-05,
"loss": 2.8358,
"step": 133600
},
{
"epoch": 0.44,
"learning_rate": 2.7846486257199478e-05,
"loss": 2.838,
"step": 133700
},
{
"epoch": 0.44,
"learning_rate": 2.7829916688207107e-05,
"loss": 2.8335,
"step": 133800
},
{
"epoch": 0.44,
"learning_rate": 2.781334711921474e-05,
"loss": 2.8238,
"step": 133900
},
{
"epoch": 0.44,
"learning_rate": 2.7796777550222365e-05,
"loss": 2.8437,
"step": 134000
},
{
"epoch": 0.44,
"learning_rate": 2.7780207981229994e-05,
"loss": 2.8282,
"step": 134100
},
{
"epoch": 0.44,
"learning_rate": 2.7763638412237624e-05,
"loss": 2.8189,
"step": 134200
},
{
"epoch": 0.45,
"learning_rate": 2.774706884324525e-05,
"loss": 2.8292,
"step": 134300
},
{
"epoch": 0.45,
"learning_rate": 2.773049927425288e-05,
"loss": 2.8395,
"step": 134400
},
{
"epoch": 0.45,
"learning_rate": 2.771392970526051e-05,
"loss": 2.8371,
"step": 134500
},
{
"epoch": 0.45,
"learning_rate": 2.7697360136268137e-05,
"loss": 2.8385,
"step": 134600
},
{
"epoch": 0.45,
"learning_rate": 2.7680790567275766e-05,
"loss": 2.8459,
"step": 134700
},
{
"epoch": 0.45,
"learning_rate": 2.7664220998283396e-05,
"loss": 2.8029,
"step": 134800
},
{
"epoch": 0.45,
"learning_rate": 2.764765142929102e-05,
"loss": 2.8293,
"step": 134900
},
{
"epoch": 0.45,
"learning_rate": 2.763108186029865e-05,
"loss": 2.8366,
"step": 135000
},
{
"epoch": 0.45,
"learning_rate": 2.7614512291306283e-05,
"loss": 2.8267,
"step": 135100
},
{
"epoch": 0.45,
"learning_rate": 2.759794272231391e-05,
"loss": 2.8349,
"step": 135200
},
{
"epoch": 0.45,
"learning_rate": 2.7581373153321538e-05,
"loss": 2.8264,
"step": 135300
},
{
"epoch": 0.45,
"learning_rate": 2.7564803584329164e-05,
"loss": 2.8394,
"step": 135400
},
{
"epoch": 0.45,
"learning_rate": 2.7548234015336793e-05,
"loss": 2.8333,
"step": 135500
},
{
"epoch": 0.45,
"learning_rate": 2.7531664446344422e-05,
"loss": 2.8374,
"step": 135600
},
{
"epoch": 0.45,
"learning_rate": 2.7515094877352048e-05,
"loss": 2.8262,
"step": 135700
},
{
"epoch": 0.45,
"learning_rate": 2.749852530835968e-05,
"loss": 2.8415,
"step": 135800
},
{
"epoch": 0.45,
"learning_rate": 2.748195573936731e-05,
"loss": 2.8328,
"step": 135900
},
{
"epoch": 0.45,
"learning_rate": 2.7465386170374936e-05,
"loss": 2.8417,
"step": 136000
},
{
"epoch": 0.45,
"learning_rate": 2.7448816601382565e-05,
"loss": 2.8219,
"step": 136100
},
{
"epoch": 0.45,
"learning_rate": 2.7432247032390194e-05,
"loss": 2.8287,
"step": 136200
},
{
"epoch": 0.45,
"learning_rate": 2.741567746339782e-05,
"loss": 2.8418,
"step": 136300
},
{
"epoch": 0.45,
"learning_rate": 2.7399107894405453e-05,
"loss": 2.8332,
"step": 136400
},
{
"epoch": 0.45,
"learning_rate": 2.7382538325413082e-05,
"loss": 2.8319,
"step": 136500
},
{
"epoch": 0.45,
"learning_rate": 2.7365968756420708e-05,
"loss": 2.8308,
"step": 136600
},
{
"epoch": 0.45,
"learning_rate": 2.7349399187428337e-05,
"loss": 2.8384,
"step": 136700
},
{
"epoch": 0.45,
"learning_rate": 2.733282961843597e-05,
"loss": 2.8502,
"step": 136800
},
{
"epoch": 0.45,
"learning_rate": 2.7316260049443592e-05,
"loss": 2.8291,
"step": 136900
},
{
"epoch": 0.45,
"learning_rate": 2.7299690480451225e-05,
"loss": 2.8285,
"step": 137000
},
{
"epoch": 0.45,
"learning_rate": 2.7283120911458854e-05,
"loss": 2.8277,
"step": 137100
},
{
"epoch": 0.45,
"learning_rate": 2.726655134246648e-05,
"loss": 2.8278,
"step": 137200
},
{
"epoch": 0.46,
"learning_rate": 2.724998177347411e-05,
"loss": 2.8317,
"step": 137300
},
{
"epoch": 0.46,
"learning_rate": 2.723341220448174e-05,
"loss": 2.8277,
"step": 137400
},
{
"epoch": 0.46,
"learning_rate": 2.7216842635489364e-05,
"loss": 2.8154,
"step": 137500
},
{
"epoch": 0.46,
"learning_rate": 2.7200273066496996e-05,
"loss": 2.8352,
"step": 137600
},
{
"epoch": 0.46,
"learning_rate": 2.7183703497504626e-05,
"loss": 2.844,
"step": 137700
},
{
"epoch": 0.46,
"learning_rate": 2.716713392851225e-05,
"loss": 2.8172,
"step": 137800
},
{
"epoch": 0.46,
"learning_rate": 2.715056435951988e-05,
"loss": 2.8369,
"step": 137900
},
{
"epoch": 0.46,
"learning_rate": 2.7133994790527513e-05,
"loss": 2.8439,
"step": 138000
},
{
"epoch": 0.46,
"learning_rate": 2.7117425221535136e-05,
"loss": 2.8323,
"step": 138100
},
{
"epoch": 0.46,
"learning_rate": 2.7100855652542768e-05,
"loss": 2.8276,
"step": 138200
},
{
"epoch": 0.46,
"learning_rate": 2.7084286083550397e-05,
"loss": 2.8242,
"step": 138300
},
{
"epoch": 0.46,
"learning_rate": 2.7067716514558023e-05,
"loss": 2.8282,
"step": 138400
},
{
"epoch": 0.46,
"learning_rate": 2.7051146945565652e-05,
"loss": 2.8159,
"step": 138500
},
{
"epoch": 0.46,
"learning_rate": 2.7034577376573285e-05,
"loss": 2.8317,
"step": 138600
},
{
"epoch": 0.46,
"learning_rate": 2.7018007807580907e-05,
"loss": 2.8357,
"step": 138700
},
{
"epoch": 0.46,
"learning_rate": 2.700143823858854e-05,
"loss": 2.828,
"step": 138800
},
{
"epoch": 0.46,
"learning_rate": 2.698486866959617e-05,
"loss": 2.8173,
"step": 138900
},
{
"epoch": 0.46,
"learning_rate": 2.6968299100603795e-05,
"loss": 2.8357,
"step": 139000
},
{
"epoch": 0.46,
"learning_rate": 2.6951729531611424e-05,
"loss": 2.8234,
"step": 139100
},
{
"epoch": 0.46,
"learning_rate": 2.6935159962619057e-05,
"loss": 2.818,
"step": 139200
},
{
"epoch": 0.46,
"learning_rate": 2.691859039362668e-05,
"loss": 2.8299,
"step": 139300
},
{
"epoch": 0.46,
"learning_rate": 2.6902020824634312e-05,
"loss": 2.8245,
"step": 139400
},
{
"epoch": 0.46,
"learning_rate": 2.688545125564194e-05,
"loss": 2.8307,
"step": 139500
},
{
"epoch": 0.46,
"learning_rate": 2.6868881686649567e-05,
"loss": 2.8529,
"step": 139600
},
{
"epoch": 0.46,
"learning_rate": 2.6852312117657196e-05,
"loss": 2.8292,
"step": 139700
},
{
"epoch": 0.46,
"learning_rate": 2.683574254866483e-05,
"loss": 2.8435,
"step": 139800
},
{
"epoch": 0.46,
"learning_rate": 2.681917297967245e-05,
"loss": 2.8218,
"step": 139900
},
{
"epoch": 0.46,
"learning_rate": 2.6802603410680084e-05,
"loss": 2.8341,
"step": 140000
},
{
"epoch": 0.46,
"learning_rate": 2.6786033841687713e-05,
"loss": 2.8299,
"step": 140100
},
{
"epoch": 0.46,
"learning_rate": 2.676946427269534e-05,
"loss": 2.831,
"step": 140200
},
{
"epoch": 0.46,
"learning_rate": 2.6752894703702968e-05,
"loss": 2.8282,
"step": 140300
},
{
"epoch": 0.47,
"learning_rate": 2.67363251347106e-05,
"loss": 2.8371,
"step": 140400
},
{
"epoch": 0.47,
"learning_rate": 2.6719755565718223e-05,
"loss": 2.8249,
"step": 140500
},
{
"epoch": 0.47,
"learning_rate": 2.6703185996725856e-05,
"loss": 2.8423,
"step": 140600
},
{
"epoch": 0.47,
"learning_rate": 2.6686616427733485e-05,
"loss": 2.8284,
"step": 140700
},
{
"epoch": 0.47,
"learning_rate": 2.667004685874111e-05,
"loss": 2.8351,
"step": 140800
},
{
"epoch": 0.47,
"learning_rate": 2.665347728974874e-05,
"loss": 2.8237,
"step": 140900
},
{
"epoch": 0.47,
"learning_rate": 2.6636907720756372e-05,
"loss": 2.828,
"step": 141000
},
{
"epoch": 0.47,
"learning_rate": 2.6620338151763995e-05,
"loss": 2.8354,
"step": 141100
},
{
"epoch": 0.47,
"learning_rate": 2.6603768582771627e-05,
"loss": 2.8409,
"step": 141200
},
{
"epoch": 0.47,
"learning_rate": 2.6587199013779257e-05,
"loss": 2.8188,
"step": 141300
},
{
"epoch": 0.47,
"learning_rate": 2.6570629444786882e-05,
"loss": 2.8247,
"step": 141400
},
{
"epoch": 0.47,
"learning_rate": 2.655405987579451e-05,
"loss": 2.8293,
"step": 141500
},
{
"epoch": 0.47,
"learning_rate": 2.6537490306802144e-05,
"loss": 2.8314,
"step": 141600
},
{
"epoch": 0.47,
"learning_rate": 2.6520920737809767e-05,
"loss": 2.8474,
"step": 141700
},
{
"epoch": 0.47,
"learning_rate": 2.65043511688174e-05,
"loss": 2.8304,
"step": 141800
},
{
"epoch": 0.47,
"learning_rate": 2.648778159982503e-05,
"loss": 2.8285,
"step": 141900
},
{
"epoch": 0.47,
"learning_rate": 2.6471212030832654e-05,
"loss": 2.8342,
"step": 142000
},
{
"epoch": 0.47,
"learning_rate": 2.6454642461840283e-05,
"loss": 2.8243,
"step": 142100
},
{
"epoch": 0.47,
"learning_rate": 2.6438072892847916e-05,
"loss": 2.8399,
"step": 142200
},
{
"epoch": 0.47,
"learning_rate": 2.642150332385554e-05,
"loss": 2.8301,
"step": 142300
},
{
"epoch": 0.47,
"learning_rate": 2.640493375486317e-05,
"loss": 2.8377,
"step": 142400
},
{
"epoch": 0.47,
"learning_rate": 2.63883641858708e-05,
"loss": 2.8188,
"step": 142500
},
{
"epoch": 0.47,
"learning_rate": 2.6371794616878426e-05,
"loss": 2.8314,
"step": 142600
},
{
"epoch": 0.47,
"learning_rate": 2.6355225047886055e-05,
"loss": 2.8209,
"step": 142700
},
{
"epoch": 0.47,
"learning_rate": 2.6338655478893688e-05,
"loss": 2.8313,
"step": 142800
},
{
"epoch": 0.47,
"learning_rate": 2.632208590990131e-05,
"loss": 2.8341,
"step": 142900
},
{
"epoch": 0.47,
"learning_rate": 2.6305516340908943e-05,
"loss": 2.8411,
"step": 143000
},
{
"epoch": 0.47,
"learning_rate": 2.6288946771916572e-05,
"loss": 2.8361,
"step": 143100
},
{
"epoch": 0.47,
"learning_rate": 2.6272377202924198e-05,
"loss": 2.842,
"step": 143200
},
{
"epoch": 0.47,
"learning_rate": 2.6255807633931827e-05,
"loss": 2.8143,
"step": 143300
},
{
"epoch": 0.48,
"learning_rate": 2.623923806493946e-05,
"loss": 2.8187,
"step": 143400
},
{
"epoch": 0.48,
"learning_rate": 2.6222668495947082e-05,
"loss": 2.8286,
"step": 143500
},
{
"epoch": 0.48,
"learning_rate": 2.6206098926954715e-05,
"loss": 2.8388,
"step": 143600
},
{
"epoch": 0.48,
"learning_rate": 2.6189529357962344e-05,
"loss": 2.8258,
"step": 143700
},
{
"epoch": 0.48,
"learning_rate": 2.617295978896997e-05,
"loss": 2.8146,
"step": 143800
},
{
"epoch": 0.48,
"learning_rate": 2.61563902199776e-05,
"loss": 2.8328,
"step": 143900
},
{
"epoch": 0.48,
"learning_rate": 2.613982065098523e-05,
"loss": 2.8326,
"step": 144000
},
{
"epoch": 0.48,
"learning_rate": 2.6123251081992854e-05,
"loss": 2.8132,
"step": 144100
},
{
"epoch": 0.48,
"learning_rate": 2.6106681513000487e-05,
"loss": 2.8541,
"step": 144200
},
{
"epoch": 0.48,
"learning_rate": 2.6090111944008112e-05,
"loss": 2.8216,
"step": 144300
},
{
"epoch": 0.48,
"learning_rate": 2.607354237501574e-05,
"loss": 2.8207,
"step": 144400
},
{
"epoch": 0.48,
"learning_rate": 2.605697280602337e-05,
"loss": 2.8269,
"step": 144500
},
{
"epoch": 0.48,
"learning_rate": 2.6040403237030997e-05,
"loss": 2.8368,
"step": 144600
},
{
"epoch": 0.48,
"learning_rate": 2.6023833668038626e-05,
"loss": 2.8091,
"step": 144700
},
{
"epoch": 0.48,
"learning_rate": 2.600726409904626e-05,
"loss": 2.823,
"step": 144800
},
{
"epoch": 0.48,
"learning_rate": 2.5990694530053884e-05,
"loss": 2.8332,
"step": 144900
},
{
"epoch": 0.48,
"learning_rate": 2.5974124961061514e-05,
"loss": 2.8252,
"step": 145000
},
{
"epoch": 0.48,
"learning_rate": 2.5957555392069143e-05,
"loss": 2.8343,
"step": 145100
},
{
"epoch": 0.48,
"learning_rate": 2.594098582307677e-05,
"loss": 2.8185,
"step": 145200
},
{
"epoch": 0.48,
"learning_rate": 2.5924416254084398e-05,
"loss": 2.8307,
"step": 145300
},
{
"epoch": 0.48,
"learning_rate": 2.590784668509203e-05,
"loss": 2.8317,
"step": 145400
},
{
"epoch": 0.48,
"learning_rate": 2.5891277116099656e-05,
"loss": 2.825,
"step": 145500
},
{
"epoch": 0.48,
"learning_rate": 2.5874707547107285e-05,
"loss": 2.8129,
"step": 145600
},
{
"epoch": 0.48,
"learning_rate": 2.5858137978114915e-05,
"loss": 2.8166,
"step": 145700
},
{
"epoch": 0.48,
"learning_rate": 2.584156840912254e-05,
"loss": 2.8245,
"step": 145800
},
{
"epoch": 0.48,
"learning_rate": 2.582499884013017e-05,
"loss": 2.8321,
"step": 145900
},
{
"epoch": 0.48,
"learning_rate": 2.5808429271137802e-05,
"loss": 2.8137,
"step": 146000
},
{
"epoch": 0.48,
"learning_rate": 2.5791859702145428e-05,
"loss": 2.8387,
"step": 146100
},
{
"epoch": 0.48,
"learning_rate": 2.5775290133153057e-05,
"loss": 2.8264,
"step": 146200
},
{
"epoch": 0.48,
"learning_rate": 2.5758720564160686e-05,
"loss": 2.8317,
"step": 146300
},
{
"epoch": 0.49,
"learning_rate": 2.5742150995168312e-05,
"loss": 2.8367,
"step": 146400
},
{
"epoch": 0.49,
"learning_rate": 2.5725581426175945e-05,
"loss": 2.812,
"step": 146500
},
{
"epoch": 0.49,
"learning_rate": 2.5709011857183574e-05,
"loss": 2.8313,
"step": 146600
},
{
"epoch": 0.49,
"learning_rate": 2.56924422881912e-05,
"loss": 2.8354,
"step": 146700
},
{
"epoch": 0.49,
"learning_rate": 2.567587271919883e-05,
"loss": 2.8201,
"step": 146800
},
{
"epoch": 0.49,
"learning_rate": 2.5659303150206458e-05,
"loss": 2.8411,
"step": 146900
},
{
"epoch": 0.49,
"learning_rate": 2.5642733581214084e-05,
"loss": 2.8231,
"step": 147000
},
{
"epoch": 0.49,
"learning_rate": 2.5626164012221717e-05,
"loss": 2.8374,
"step": 147100
},
{
"epoch": 0.49,
"learning_rate": 2.5609594443229346e-05,
"loss": 2.8329,
"step": 147200
},
{
"epoch": 0.49,
"learning_rate": 2.559302487423697e-05,
"loss": 2.8307,
"step": 147300
},
{
"epoch": 0.49,
"learning_rate": 2.55764553052446e-05,
"loss": 2.8232,
"step": 147400
},
{
"epoch": 0.49,
"learning_rate": 2.555988573625223e-05,
"loss": 2.8416,
"step": 147500
},
{
"epoch": 0.49,
"learning_rate": 2.5543316167259856e-05,
"loss": 2.8206,
"step": 147600
},
{
"epoch": 0.49,
"learning_rate": 2.552674659826749e-05,
"loss": 2.8276,
"step": 147700
},
{
"epoch": 0.49,
"learning_rate": 2.5510177029275118e-05,
"loss": 2.8218,
"step": 147800
},
{
"epoch": 0.49,
"learning_rate": 2.5493607460282744e-05,
"loss": 2.8224,
"step": 147900
},
{
"epoch": 0.49,
"learning_rate": 2.5477037891290373e-05,
"loss": 2.838,
"step": 148000
},
{
"epoch": 0.49,
"learning_rate": 2.5460468322298002e-05,
"loss": 2.8374,
"step": 148100
},
{
"epoch": 0.49,
"learning_rate": 2.5443898753305628e-05,
"loss": 2.8287,
"step": 148200
},
{
"epoch": 0.49,
"learning_rate": 2.542732918431326e-05,
"loss": 2.8239,
"step": 148300
},
{
"epoch": 0.49,
"learning_rate": 2.541075961532089e-05,
"loss": 2.8412,
"step": 148400
},
{
"epoch": 0.49,
"learning_rate": 2.5394190046328515e-05,
"loss": 2.8219,
"step": 148500
},
{
"epoch": 0.49,
"learning_rate": 2.5377620477336145e-05,
"loss": 2.8183,
"step": 148600
},
{
"epoch": 0.49,
"learning_rate": 2.5361050908343774e-05,
"loss": 2.816,
"step": 148700
},
{
"epoch": 0.49,
"learning_rate": 2.53444813393514e-05,
"loss": 2.8196,
"step": 148800
},
{
"epoch": 0.49,
"learning_rate": 2.5327911770359032e-05,
"loss": 2.8166,
"step": 148900
},
{
"epoch": 0.49,
"learning_rate": 2.531134220136666e-05,
"loss": 2.8284,
"step": 149000
},
{
"epoch": 0.49,
"learning_rate": 2.5294772632374287e-05,
"loss": 2.8454,
"step": 149100
},
{
"epoch": 0.49,
"learning_rate": 2.5278203063381916e-05,
"loss": 2.8266,
"step": 149200
},
{
"epoch": 0.49,
"learning_rate": 2.5261633494389546e-05,
"loss": 2.8338,
"step": 149300
},
{
"epoch": 0.5,
"learning_rate": 2.524506392539717e-05,
"loss": 2.8351,
"step": 149400
},
{
"epoch": 0.5,
"learning_rate": 2.5228494356404804e-05,
"loss": 2.836,
"step": 149500
},
{
"epoch": 0.5,
"learning_rate": 2.5211924787412433e-05,
"loss": 2.8219,
"step": 149600
},
{
"epoch": 0.5,
"learning_rate": 2.519535521842006e-05,
"loss": 2.8244,
"step": 149700
},
{
"epoch": 0.5,
"learning_rate": 2.5178785649427688e-05,
"loss": 2.8277,
"step": 149800
},
{
"epoch": 0.5,
"learning_rate": 2.5162216080435317e-05,
"loss": 2.8073,
"step": 149900
},
{
"epoch": 0.5,
"learning_rate": 2.5145646511442943e-05,
"loss": 2.8429,
"step": 150000
},
{
"epoch": 0.5,
"learning_rate": 2.5129076942450576e-05,
"loss": 2.8086,
"step": 150100
},
{
"epoch": 0.5,
"learning_rate": 2.5112507373458205e-05,
"loss": 2.818,
"step": 150200
},
{
"epoch": 0.5,
"learning_rate": 2.509593780446583e-05,
"loss": 2.8389,
"step": 150300
},
{
"epoch": 0.5,
"learning_rate": 2.507936823547346e-05,
"loss": 2.8389,
"step": 150400
},
{
"epoch": 0.5,
"learning_rate": 2.506279866648109e-05,
"loss": 2.8111,
"step": 150500
},
{
"epoch": 0.5,
"learning_rate": 2.5046229097488715e-05,
"loss": 2.8286,
"step": 150600
},
{
"epoch": 0.5,
"learning_rate": 2.5029659528496348e-05,
"loss": 2.8099,
"step": 150700
},
{
"epoch": 0.5,
"learning_rate": 2.5013089959503977e-05,
"loss": 2.8213,
"step": 150800
},
{
"epoch": 0.5,
"learning_rate": 2.4996520390511603e-05,
"loss": 2.8466,
"step": 150900
},
{
"epoch": 0.5,
"learning_rate": 2.4979950821519232e-05,
"loss": 2.8339,
"step": 151000
},
{
"epoch": 0.5,
"learning_rate": 2.496338125252686e-05,
"loss": 2.8246,
"step": 151100
},
{
"epoch": 0.5,
"learning_rate": 2.4946811683534487e-05,
"loss": 2.8332,
"step": 151200
},
{
"epoch": 0.5,
"learning_rate": 2.493024211454212e-05,
"loss": 2.8275,
"step": 151300
},
{
"epoch": 0.5,
"learning_rate": 2.4913672545549745e-05,
"loss": 2.8241,
"step": 151400
},
{
"epoch": 0.5,
"learning_rate": 2.4897102976557375e-05,
"loss": 2.8396,
"step": 151500
},
{
"epoch": 0.5,
"learning_rate": 2.4880533407565004e-05,
"loss": 2.8204,
"step": 151600
},
{
"epoch": 0.5,
"learning_rate": 2.4863963838572633e-05,
"loss": 2.8233,
"step": 151700
},
{
"epoch": 0.5,
"learning_rate": 2.484739426958026e-05,
"loss": 2.8204,
"step": 151800
},
{
"epoch": 0.5,
"learning_rate": 2.483082470058789e-05,
"loss": 2.8224,
"step": 151900
},
{
"epoch": 0.5,
"learning_rate": 2.4814255131595517e-05,
"loss": 2.8265,
"step": 152000
},
{
"epoch": 0.5,
"learning_rate": 2.4797685562603146e-05,
"loss": 2.8332,
"step": 152100
},
{
"epoch": 0.5,
"learning_rate": 2.4781115993610776e-05,
"loss": 2.8229,
"step": 152200
},
{
"epoch": 0.5,
"learning_rate": 2.4764546424618405e-05,
"loss": 2.8296,
"step": 152300
},
{
"epoch": 0.51,
"learning_rate": 2.474797685562603e-05,
"loss": 2.8369,
"step": 152400
},
{
"epoch": 0.51,
"learning_rate": 2.4731407286633663e-05,
"loss": 2.8164,
"step": 152500
},
{
"epoch": 0.51,
"learning_rate": 2.471483771764129e-05,
"loss": 2.8278,
"step": 152600
},
{
"epoch": 0.51,
"learning_rate": 2.4698268148648918e-05,
"loss": 2.8278,
"step": 152700
},
{
"epoch": 0.51,
"learning_rate": 2.4681698579656547e-05,
"loss": 2.8265,
"step": 152800
},
{
"epoch": 0.51,
"learning_rate": 2.4665129010664177e-05,
"loss": 2.8301,
"step": 152900
},
{
"epoch": 0.51,
"learning_rate": 2.4648559441671803e-05,
"loss": 2.8217,
"step": 153000
},
{
"epoch": 0.51,
"learning_rate": 2.4631989872679435e-05,
"loss": 2.8385,
"step": 153100
},
{
"epoch": 0.51,
"learning_rate": 2.461542030368706e-05,
"loss": 2.842,
"step": 153200
},
{
"epoch": 0.51,
"learning_rate": 2.459885073469469e-05,
"loss": 2.8266,
"step": 153300
},
{
"epoch": 0.51,
"learning_rate": 2.458228116570232e-05,
"loss": 2.8354,
"step": 153400
},
{
"epoch": 0.51,
"learning_rate": 2.456571159670995e-05,
"loss": 2.8217,
"step": 153500
},
{
"epoch": 0.51,
"learning_rate": 2.4549142027717574e-05,
"loss": 2.8368,
"step": 153600
},
{
"epoch": 0.51,
"learning_rate": 2.4532572458725207e-05,
"loss": 2.8128,
"step": 153700
},
{
"epoch": 0.51,
"learning_rate": 2.4516002889732833e-05,
"loss": 2.8192,
"step": 153800
},
{
"epoch": 0.51,
"learning_rate": 2.4499433320740462e-05,
"loss": 2.8218,
"step": 153900
},
{
"epoch": 0.51,
"learning_rate": 2.448286375174809e-05,
"loss": 2.8232,
"step": 154000
},
{
"epoch": 0.51,
"learning_rate": 2.446629418275572e-05,
"loss": 2.8263,
"step": 154100
},
{
"epoch": 0.51,
"learning_rate": 2.4449724613763346e-05,
"loss": 2.8345,
"step": 154200
},
{
"epoch": 0.51,
"learning_rate": 2.443315504477098e-05,
"loss": 2.83,
"step": 154300
},
{
"epoch": 0.51,
"learning_rate": 2.4416585475778605e-05,
"loss": 2.8274,
"step": 154400
},
{
"epoch": 0.51,
"learning_rate": 2.4400015906786234e-05,
"loss": 2.8129,
"step": 154500
},
{
"epoch": 0.51,
"learning_rate": 2.4383446337793863e-05,
"loss": 2.8325,
"step": 154600
},
{
"epoch": 0.51,
"learning_rate": 2.4366876768801492e-05,
"loss": 2.8558,
"step": 154700
},
{
"epoch": 0.51,
"learning_rate": 2.4350307199809118e-05,
"loss": 2.8291,
"step": 154800
},
{
"epoch": 0.51,
"learning_rate": 2.433373763081675e-05,
"loss": 2.8284,
"step": 154900
},
{
"epoch": 0.51,
"learning_rate": 2.4317168061824376e-05,
"loss": 2.8239,
"step": 155000
},
{
"epoch": 0.51,
"learning_rate": 2.4300598492832006e-05,
"loss": 2.8279,
"step": 155100
},
{
"epoch": 0.51,
"learning_rate": 2.428402892383963e-05,
"loss": 2.8043,
"step": 155200
},
{
"epoch": 0.51,
"learning_rate": 2.4267459354847264e-05,
"loss": 2.8393,
"step": 155300
},
{
"epoch": 0.51,
"learning_rate": 2.425088978585489e-05,
"loss": 2.8491,
"step": 155400
},
{
"epoch": 0.52,
"learning_rate": 2.423432021686252e-05,
"loss": 2.826,
"step": 155500
},
{
"epoch": 0.52,
"learning_rate": 2.421775064787015e-05,
"loss": 2.8207,
"step": 155600
},
{
"epoch": 0.52,
"learning_rate": 2.4201181078877778e-05,
"loss": 2.8271,
"step": 155700
},
{
"epoch": 0.52,
"learning_rate": 2.4184611509885403e-05,
"loss": 2.8257,
"step": 155800
},
{
"epoch": 0.52,
"learning_rate": 2.4168041940893036e-05,
"loss": 2.8362,
"step": 155900
},
{
"epoch": 0.52,
"learning_rate": 2.4151472371900662e-05,
"loss": 2.8251,
"step": 156000
},
{
"epoch": 0.52,
"learning_rate": 2.413490280290829e-05,
"loss": 2.8298,
"step": 156100
},
{
"epoch": 0.52,
"learning_rate": 2.411833323391592e-05,
"loss": 2.8247,
"step": 156200
},
{
"epoch": 0.52,
"learning_rate": 2.410176366492355e-05,
"loss": 2.8282,
"step": 156300
},
{
"epoch": 0.52,
"learning_rate": 2.4085194095931175e-05,
"loss": 2.8227,
"step": 156400
},
{
"epoch": 0.52,
"learning_rate": 2.4068624526938808e-05,
"loss": 2.8085,
"step": 156500
},
{
"epoch": 0.52,
"learning_rate": 2.4052054957946434e-05,
"loss": 2.821,
"step": 156600
},
{
"epoch": 0.52,
"learning_rate": 2.4035485388954063e-05,
"loss": 2.8203,
"step": 156700
},
{
"epoch": 0.52,
"learning_rate": 2.4018915819961692e-05,
"loss": 2.8185,
"step": 156800
},
{
"epoch": 0.52,
"learning_rate": 2.400234625096932e-05,
"loss": 2.8302,
"step": 156900
},
{
"epoch": 0.52,
"learning_rate": 2.3985776681976947e-05,
"loss": 2.8193,
"step": 157000
},
{
"epoch": 0.52,
"learning_rate": 2.396920711298458e-05,
"loss": 2.8131,
"step": 157100
},
{
"epoch": 0.52,
"learning_rate": 2.3952637543992205e-05,
"loss": 2.828,
"step": 157200
},
{
"epoch": 0.52,
"learning_rate": 2.3936067974999835e-05,
"loss": 2.8322,
"step": 157300
},
{
"epoch": 0.52,
"learning_rate": 2.3919498406007464e-05,
"loss": 2.8157,
"step": 157400
},
{
"epoch": 0.52,
"learning_rate": 2.3902928837015093e-05,
"loss": 2.8349,
"step": 157500
},
{
"epoch": 0.52,
"learning_rate": 2.388635926802272e-05,
"loss": 2.8319,
"step": 157600
},
{
"epoch": 0.52,
"learning_rate": 2.386978969903035e-05,
"loss": 2.831,
"step": 157700
},
{
"epoch": 0.52,
"learning_rate": 2.3853220130037977e-05,
"loss": 2.816,
"step": 157800
},
{
"epoch": 0.52,
"learning_rate": 2.3836650561045606e-05,
"loss": 2.8195,
"step": 157900
},
{
"epoch": 0.52,
"learning_rate": 2.3820080992053236e-05,
"loss": 2.813,
"step": 158000
},
{
"epoch": 0.52,
"learning_rate": 2.3803511423060865e-05,
"loss": 2.81,
"step": 158100
},
{
"epoch": 0.52,
"learning_rate": 2.378694185406849e-05,
"loss": 2.8182,
"step": 158200
},
{
"epoch": 0.52,
"learning_rate": 2.3770372285076123e-05,
"loss": 2.8224,
"step": 158300
},
{
"epoch": 0.52,
"learning_rate": 2.375380271608375e-05,
"loss": 2.8279,
"step": 158400
},
{
"epoch": 0.53,
"learning_rate": 2.373723314709138e-05,
"loss": 2.8365,
"step": 158500
},
{
"epoch": 0.53,
"learning_rate": 2.3720663578099008e-05,
"loss": 2.8459,
"step": 158600
},
{
"epoch": 0.53,
"learning_rate": 2.3704094009106637e-05,
"loss": 2.8178,
"step": 158700
},
{
"epoch": 0.53,
"learning_rate": 2.3687524440114266e-05,
"loss": 2.8194,
"step": 158800
},
{
"epoch": 0.53,
"learning_rate": 2.3670954871121895e-05,
"loss": 2.8056,
"step": 158900
},
{
"epoch": 0.53,
"learning_rate": 2.365438530212952e-05,
"loss": 2.8349,
"step": 159000
},
{
"epoch": 0.53,
"learning_rate": 2.363781573313715e-05,
"loss": 2.8142,
"step": 159100
},
{
"epoch": 0.53,
"learning_rate": 2.362124616414478e-05,
"loss": 2.8158,
"step": 159200
},
{
"epoch": 0.53,
"learning_rate": 2.360467659515241e-05,
"loss": 2.8147,
"step": 159300
},
{
"epoch": 0.53,
"learning_rate": 2.3588107026160038e-05,
"loss": 2.8192,
"step": 159400
},
{
"epoch": 0.53,
"learning_rate": 2.3571537457167664e-05,
"loss": 2.8152,
"step": 159500
},
{
"epoch": 0.53,
"learning_rate": 2.3554967888175293e-05,
"loss": 2.8219,
"step": 159600
},
{
"epoch": 0.53,
"learning_rate": 2.3538398319182922e-05,
"loss": 2.8171,
"step": 159700
},
{
"epoch": 0.53,
"learning_rate": 2.352182875019055e-05,
"loss": 2.8147,
"step": 159800
},
{
"epoch": 0.53,
"learning_rate": 2.350525918119818e-05,
"loss": 2.8268,
"step": 159900
},
{
"epoch": 0.53,
"learning_rate": 2.348868961220581e-05,
"loss": 2.8282,
"step": 160000
},
{
"epoch": 0.53,
"learning_rate": 2.3472120043213435e-05,
"loss": 2.8175,
"step": 160100
},
{
"epoch": 0.53,
"learning_rate": 2.3455550474221065e-05,
"loss": 2.8055,
"step": 160200
},
{
"epoch": 0.53,
"learning_rate": 2.3438980905228694e-05,
"loss": 2.8193,
"step": 160300
},
{
"epoch": 0.53,
"learning_rate": 2.3422411336236323e-05,
"loss": 2.8185,
"step": 160400
},
{
"epoch": 0.53,
"learning_rate": 2.3405841767243952e-05,
"loss": 2.8204,
"step": 160500
},
{
"epoch": 0.53,
"learning_rate": 2.338927219825158e-05,
"loss": 2.8215,
"step": 160600
},
{
"epoch": 0.53,
"learning_rate": 2.3372702629259207e-05,
"loss": 2.8326,
"step": 160700
},
{
"epoch": 0.53,
"learning_rate": 2.335613306026684e-05,
"loss": 2.8275,
"step": 160800
},
{
"epoch": 0.53,
"learning_rate": 2.3339563491274466e-05,
"loss": 2.8318,
"step": 160900
},
{
"epoch": 0.53,
"learning_rate": 2.3322993922282095e-05,
"loss": 2.8188,
"step": 161000
},
{
"epoch": 0.53,
"learning_rate": 2.3306424353289724e-05,
"loss": 2.8194,
"step": 161100
},
{
"epoch": 0.53,
"learning_rate": 2.3289854784297353e-05,
"loss": 2.8367,
"step": 161200
},
{
"epoch": 0.53,
"learning_rate": 2.327328521530498e-05,
"loss": 2.8154,
"step": 161300
},
{
"epoch": 0.53,
"learning_rate": 2.3256715646312612e-05,
"loss": 2.8138,
"step": 161400
},
{
"epoch": 0.54,
"learning_rate": 2.3240146077320238e-05,
"loss": 2.8212,
"step": 161500
},
{
"epoch": 0.54,
"learning_rate": 2.3223576508327867e-05,
"loss": 2.8199,
"step": 161600
},
{
"epoch": 0.54,
"learning_rate": 2.3207006939335496e-05,
"loss": 2.8281,
"step": 161700
},
{
"epoch": 0.54,
"learning_rate": 2.3190437370343125e-05,
"loss": 2.8053,
"step": 161800
},
{
"epoch": 0.54,
"learning_rate": 2.317386780135075e-05,
"loss": 2.8199,
"step": 161900
},
{
"epoch": 0.54,
"learning_rate": 2.3157298232358384e-05,
"loss": 2.8227,
"step": 162000
},
{
"epoch": 0.54,
"learning_rate": 2.314072866336601e-05,
"loss": 2.8193,
"step": 162100
},
{
"epoch": 0.54,
"learning_rate": 2.312415909437364e-05,
"loss": 2.8252,
"step": 162200
},
{
"epoch": 0.54,
"learning_rate": 2.3107589525381268e-05,
"loss": 2.8145,
"step": 162300
},
{
"epoch": 0.54,
"learning_rate": 2.3091019956388897e-05,
"loss": 2.8084,
"step": 162400
},
{
"epoch": 0.54,
"learning_rate": 2.3074450387396523e-05,
"loss": 2.8193,
"step": 162500
},
{
"epoch": 0.54,
"learning_rate": 2.3057880818404155e-05,
"loss": 2.8198,
"step": 162600
},
{
"epoch": 0.54,
"learning_rate": 2.304131124941178e-05,
"loss": 2.8213,
"step": 162700
},
{
"epoch": 0.54,
"learning_rate": 2.302474168041941e-05,
"loss": 2.8238,
"step": 162800
},
{
"epoch": 0.54,
"learning_rate": 2.300817211142704e-05,
"loss": 2.8226,
"step": 162900
},
{
"epoch": 0.54,
"learning_rate": 2.299160254243467e-05,
"loss": 2.825,
"step": 163000
},
{
"epoch": 0.54,
"learning_rate": 2.2975032973442295e-05,
"loss": 2.8184,
"step": 163100
},
{
"epoch": 0.54,
"learning_rate": 2.2958463404449927e-05,
"loss": 2.8357,
"step": 163200
},
{
"epoch": 0.54,
"learning_rate": 2.2941893835457553e-05,
"loss": 2.8241,
"step": 163300
},
{
"epoch": 0.54,
"learning_rate": 2.2925324266465182e-05,
"loss": 2.8124,
"step": 163400
},
{
"epoch": 0.54,
"learning_rate": 2.290875469747281e-05,
"loss": 2.8143,
"step": 163500
},
{
"epoch": 0.54,
"learning_rate": 2.289218512848044e-05,
"loss": 2.8284,
"step": 163600
},
{
"epoch": 0.54,
"learning_rate": 2.2875615559488067e-05,
"loss": 2.8118,
"step": 163700
},
{
"epoch": 0.54,
"learning_rate": 2.2859045990495696e-05,
"loss": 2.8197,
"step": 163800
},
{
"epoch": 0.54,
"learning_rate": 2.2842476421503325e-05,
"loss": 2.8246,
"step": 163900
},
{
"epoch": 0.54,
"learning_rate": 2.2825906852510954e-05,
"loss": 2.8253,
"step": 164000
},
{
"epoch": 0.54,
"learning_rate": 2.280933728351858e-05,
"loss": 2.8271,
"step": 164100
},
{
"epoch": 0.54,
"learning_rate": 2.2792767714526213e-05,
"loss": 2.8202,
"step": 164200
},
{
"epoch": 0.54,
"learning_rate": 2.277619814553384e-05,
"loss": 2.8168,
"step": 164300
},
{
"epoch": 0.54,
"learning_rate": 2.2759628576541468e-05,
"loss": 2.827,
"step": 164400
},
{
"epoch": 0.55,
"learning_rate": 2.2743059007549097e-05,
"loss": 2.8298,
"step": 164500
},
{
"epoch": 0.55,
"learning_rate": 2.2726489438556726e-05,
"loss": 2.8233,
"step": 164600
},
{
"epoch": 0.55,
"learning_rate": 2.2709919869564352e-05,
"loss": 2.8135,
"step": 164700
},
{
"epoch": 0.55,
"learning_rate": 2.2693350300571984e-05,
"loss": 2.8251,
"step": 164800
},
{
"epoch": 0.55,
"learning_rate": 2.267678073157961e-05,
"loss": 2.8275,
"step": 164900
},
{
"epoch": 0.55,
"learning_rate": 2.266021116258724e-05,
"loss": 2.8113,
"step": 165000
},
{
"epoch": 0.55,
"learning_rate": 2.264364159359487e-05,
"loss": 2.8203,
"step": 165100
},
{
"epoch": 0.55,
"learning_rate": 2.2627072024602498e-05,
"loss": 2.8278,
"step": 165200
},
{
"epoch": 0.55,
"learning_rate": 2.2610502455610124e-05,
"loss": 2.8234,
"step": 165300
},
{
"epoch": 0.55,
"learning_rate": 2.2593932886617756e-05,
"loss": 2.8305,
"step": 165400
},
{
"epoch": 0.55,
"learning_rate": 2.2577363317625382e-05,
"loss": 2.8446,
"step": 165500
},
{
"epoch": 0.55,
"learning_rate": 2.256079374863301e-05,
"loss": 2.8269,
"step": 165600
},
{
"epoch": 0.55,
"learning_rate": 2.254422417964064e-05,
"loss": 2.8241,
"step": 165700
},
{
"epoch": 0.55,
"learning_rate": 2.252765461064827e-05,
"loss": 2.815,
"step": 165800
},
{
"epoch": 0.55,
"learning_rate": 2.2511085041655895e-05,
"loss": 2.819,
"step": 165900
},
{
"epoch": 0.55,
"learning_rate": 2.2494515472663528e-05,
"loss": 2.8162,
"step": 166000
},
{
"epoch": 0.55,
"learning_rate": 2.2477945903671154e-05,
"loss": 2.8443,
"step": 166100
},
{
"epoch": 0.55,
"learning_rate": 2.2461376334678783e-05,
"loss": 2.8078,
"step": 166200
},
{
"epoch": 0.55,
"learning_rate": 2.2444806765686412e-05,
"loss": 2.8349,
"step": 166300
},
{
"epoch": 0.55,
"learning_rate": 2.242823719669404e-05,
"loss": 2.8213,
"step": 166400
},
{
"epoch": 0.55,
"learning_rate": 2.2411667627701667e-05,
"loss": 2.8109,
"step": 166500
},
{
"epoch": 0.55,
"learning_rate": 2.23950980587093e-05,
"loss": 2.8305,
"step": 166600
},
{
"epoch": 0.55,
"learning_rate": 2.2378528489716926e-05,
"loss": 2.8146,
"step": 166700
},
{
"epoch": 0.55,
"learning_rate": 2.2361958920724555e-05,
"loss": 2.8146,
"step": 166800
},
{
"epoch": 0.55,
"learning_rate": 2.2345389351732184e-05,
"loss": 2.8196,
"step": 166900
},
{
"epoch": 0.55,
"learning_rate": 2.2328819782739813e-05,
"loss": 2.8234,
"step": 167000
},
{
"epoch": 0.55,
"learning_rate": 2.231225021374744e-05,
"loss": 2.8274,
"step": 167100
},
{
"epoch": 0.55,
"learning_rate": 2.2295680644755072e-05,
"loss": 2.8245,
"step": 167200
},
{
"epoch": 0.55,
"learning_rate": 2.2279111075762698e-05,
"loss": 2.815,
"step": 167300
},
{
"epoch": 0.55,
"learning_rate": 2.2262541506770327e-05,
"loss": 2.8072,
"step": 167400
},
{
"epoch": 0.56,
"learning_rate": 2.2245971937777956e-05,
"loss": 2.8286,
"step": 167500
},
{
"epoch": 0.56,
"learning_rate": 2.2229402368785585e-05,
"loss": 2.8116,
"step": 167600
},
{
"epoch": 0.56,
"learning_rate": 2.221283279979321e-05,
"loss": 2.8153,
"step": 167700
},
{
"epoch": 0.56,
"learning_rate": 2.2196263230800844e-05,
"loss": 2.8297,
"step": 167800
},
{
"epoch": 0.56,
"learning_rate": 2.217969366180847e-05,
"loss": 2.8041,
"step": 167900
},
{
"epoch": 0.56,
"learning_rate": 2.21631240928161e-05,
"loss": 2.8295,
"step": 168000
},
{
"epoch": 0.56,
"learning_rate": 2.2146554523823724e-05,
"loss": 2.8222,
"step": 168100
},
{
"epoch": 0.56,
"learning_rate": 2.2129984954831357e-05,
"loss": 2.822,
"step": 168200
},
{
"epoch": 0.56,
"learning_rate": 2.2113415385838983e-05,
"loss": 2.8112,
"step": 168300
},
{
"epoch": 0.56,
"learning_rate": 2.2096845816846612e-05,
"loss": 2.827,
"step": 168400
},
{
"epoch": 0.56,
"learning_rate": 2.208027624785424e-05,
"loss": 2.8098,
"step": 168500
},
{
"epoch": 0.56,
"learning_rate": 2.206370667886187e-05,
"loss": 2.8242,
"step": 168600
},
{
"epoch": 0.56,
"learning_rate": 2.2047137109869496e-05,
"loss": 2.8103,
"step": 168700
},
{
"epoch": 0.56,
"learning_rate": 2.203056754087713e-05,
"loss": 2.8063,
"step": 168800
},
{
"epoch": 0.56,
"learning_rate": 2.2013997971884755e-05,
"loss": 2.8347,
"step": 168900
},
{
"epoch": 0.56,
"learning_rate": 2.1997428402892384e-05,
"loss": 2.822,
"step": 169000
},
{
"epoch": 0.56,
"learning_rate": 2.1980858833900013e-05,
"loss": 2.8292,
"step": 169100
},
{
"epoch": 0.56,
"learning_rate": 2.1964289264907642e-05,
"loss": 2.817,
"step": 169200
},
{
"epoch": 0.56,
"learning_rate": 2.1947719695915268e-05,
"loss": 2.8124,
"step": 169300
},
{
"epoch": 0.56,
"learning_rate": 2.19311501269229e-05,
"loss": 2.8216,
"step": 169400
},
{
"epoch": 0.56,
"learning_rate": 2.1914580557930527e-05,
"loss": 2.8165,
"step": 169500
},
{
"epoch": 0.56,
"learning_rate": 2.1898010988938156e-05,
"loss": 2.8259,
"step": 169600
},
{
"epoch": 0.56,
"learning_rate": 2.1881441419945785e-05,
"loss": 2.8308,
"step": 169700
},
{
"epoch": 0.56,
"learning_rate": 2.1864871850953414e-05,
"loss": 2.8226,
"step": 169800
},
{
"epoch": 0.56,
"learning_rate": 2.184830228196104e-05,
"loss": 2.8283,
"step": 169900
},
{
"epoch": 0.56,
"learning_rate": 2.1831732712968673e-05,
"loss": 2.8209,
"step": 170000
},
{
"epoch": 0.56,
"learning_rate": 2.18151631439763e-05,
"loss": 2.8131,
"step": 170100
},
{
"epoch": 0.56,
"learning_rate": 2.1798593574983928e-05,
"loss": 2.8165,
"step": 170200
},
{
"epoch": 0.56,
"learning_rate": 2.1782024005991557e-05,
"loss": 2.8187,
"step": 170300
},
{
"epoch": 0.56,
"learning_rate": 2.1765454436999186e-05,
"loss": 2.8217,
"step": 170400
},
{
"epoch": 0.57,
"learning_rate": 2.1748884868006815e-05,
"loss": 2.8093,
"step": 170500
},
{
"epoch": 0.57,
"learning_rate": 2.1732315299014444e-05,
"loss": 2.8267,
"step": 170600
},
{
"epoch": 0.57,
"learning_rate": 2.171574573002207e-05,
"loss": 2.8147,
"step": 170700
},
{
"epoch": 0.57,
"learning_rate": 2.16991761610297e-05,
"loss": 2.8274,
"step": 170800
},
{
"epoch": 0.57,
"learning_rate": 2.168260659203733e-05,
"loss": 2.8226,
"step": 170900
},
{
"epoch": 0.57,
"learning_rate": 2.1666037023044958e-05,
"loss": 2.8347,
"step": 171000
},
{
"epoch": 0.57,
"learning_rate": 2.1649467454052587e-05,
"loss": 2.8223,
"step": 171100
},
{
"epoch": 0.57,
"learning_rate": 2.1632897885060216e-05,
"loss": 2.8204,
"step": 171200
},
{
"epoch": 0.57,
"learning_rate": 2.1616328316067842e-05,
"loss": 2.8237,
"step": 171300
},
{
"epoch": 0.57,
"learning_rate": 2.159975874707547e-05,
"loss": 2.808,
"step": 171400
},
{
"epoch": 0.57,
"learning_rate": 2.15831891780831e-05,
"loss": 2.825,
"step": 171500
},
{
"epoch": 0.57,
"learning_rate": 2.156661960909073e-05,
"loss": 2.8199,
"step": 171600
},
{
"epoch": 0.57,
"learning_rate": 2.155005004009836e-05,
"loss": 2.817,
"step": 171700
},
{
"epoch": 0.57,
"learning_rate": 2.1533480471105988e-05,
"loss": 2.8086,
"step": 171800
},
{
"epoch": 0.57,
"learning_rate": 2.1516910902113614e-05,
"loss": 2.8237,
"step": 171900
},
{
"epoch": 0.57,
"learning_rate": 2.1500341333121243e-05,
"loss": 2.8189,
"step": 172000
},
{
"epoch": 0.57,
"learning_rate": 2.1483771764128872e-05,
"loss": 2.8382,
"step": 172100
},
{
"epoch": 0.57,
"learning_rate": 2.14672021951365e-05,
"loss": 2.8165,
"step": 172200
},
{
"epoch": 0.57,
"learning_rate": 2.145063262614413e-05,
"loss": 2.8241,
"step": 172300
},
{
"epoch": 0.57,
"learning_rate": 2.143406305715176e-05,
"loss": 2.8177,
"step": 172400
},
{
"epoch": 0.57,
"learning_rate": 2.141749348815939e-05,
"loss": 2.825,
"step": 172500
},
{
"epoch": 0.57,
"learning_rate": 2.1400923919167015e-05,
"loss": 2.8237,
"step": 172600
},
{
"epoch": 0.57,
"learning_rate": 2.1384354350174644e-05,
"loss": 2.8087,
"step": 172700
},
{
"epoch": 0.57,
"learning_rate": 2.1367784781182273e-05,
"loss": 2.8139,
"step": 172800
},
{
"epoch": 0.57,
"learning_rate": 2.1351215212189903e-05,
"loss": 2.8049,
"step": 172900
},
{
"epoch": 0.57,
"learning_rate": 2.133464564319753e-05,
"loss": 2.8238,
"step": 173000
},
{
"epoch": 0.57,
"learning_rate": 2.131807607420516e-05,
"loss": 2.8181,
"step": 173100
},
{
"epoch": 0.57,
"learning_rate": 2.1301506505212787e-05,
"loss": 2.8281,
"step": 173200
},
{
"epoch": 0.57,
"learning_rate": 2.1284936936220416e-05,
"loss": 2.818,
"step": 173300
},
{
"epoch": 0.57,
"learning_rate": 2.1268367367228045e-05,
"loss": 2.8262,
"step": 173400
},
{
"epoch": 0.57,
"learning_rate": 2.1251797798235674e-05,
"loss": 2.8169,
"step": 173500
},
{
"epoch": 0.58,
"learning_rate": 2.12352282292433e-05,
"loss": 2.8132,
"step": 173600
},
{
"epoch": 0.58,
"learning_rate": 2.1218658660250933e-05,
"loss": 2.808,
"step": 173700
},
{
"epoch": 0.58,
"learning_rate": 2.120208909125856e-05,
"loss": 2.8272,
"step": 173800
},
{
"epoch": 0.58,
"learning_rate": 2.1185519522266188e-05,
"loss": 2.8124,
"step": 173900
},
{
"epoch": 0.58,
"learning_rate": 2.1168949953273817e-05,
"loss": 2.8121,
"step": 174000
},
{
"epoch": 0.58,
"learning_rate": 2.1152380384281446e-05,
"loss": 2.8097,
"step": 174100
},
{
"epoch": 0.58,
"learning_rate": 2.1135810815289072e-05,
"loss": 2.8253,
"step": 174200
},
{
"epoch": 0.58,
"learning_rate": 2.1119241246296705e-05,
"loss": 2.8223,
"step": 174300
},
{
"epoch": 0.58,
"learning_rate": 2.110267167730433e-05,
"loss": 2.8109,
"step": 174400
},
{
"epoch": 0.58,
"learning_rate": 2.108610210831196e-05,
"loss": 2.8238,
"step": 174500
},
{
"epoch": 0.58,
"learning_rate": 2.106953253931959e-05,
"loss": 2.8197,
"step": 174600
},
{
"epoch": 0.58,
"learning_rate": 2.1052962970327218e-05,
"loss": 2.8185,
"step": 174700
},
{
"epoch": 0.58,
"learning_rate": 2.1036393401334844e-05,
"loss": 2.8002,
"step": 174800
},
{
"epoch": 0.58,
"learning_rate": 2.1019823832342477e-05,
"loss": 2.8194,
"step": 174900
},
{
"epoch": 0.58,
"learning_rate": 2.1003254263350102e-05,
"loss": 2.8078,
"step": 175000
},
{
"epoch": 0.58,
"learning_rate": 2.098668469435773e-05,
"loss": 2.8157,
"step": 175100
},
{
"epoch": 0.58,
"learning_rate": 2.097011512536536e-05,
"loss": 2.7957,
"step": 175200
},
{
"epoch": 0.58,
"learning_rate": 2.095354555637299e-05,
"loss": 2.8125,
"step": 175300
},
{
"epoch": 0.58,
"learning_rate": 2.0936975987380616e-05,
"loss": 2.8175,
"step": 175400
},
{
"epoch": 0.58,
"learning_rate": 2.092040641838825e-05,
"loss": 2.8274,
"step": 175500
},
{
"epoch": 0.58,
"learning_rate": 2.0903836849395874e-05,
"loss": 2.8192,
"step": 175600
},
{
"epoch": 0.58,
"learning_rate": 2.0887267280403503e-05,
"loss": 2.8092,
"step": 175700
},
{
"epoch": 0.58,
"learning_rate": 2.0870697711411133e-05,
"loss": 2.8228,
"step": 175800
},
{
"epoch": 0.58,
"learning_rate": 2.0854128142418762e-05,
"loss": 2.8202,
"step": 175900
},
{
"epoch": 0.58,
"learning_rate": 2.0837558573426388e-05,
"loss": 2.7959,
"step": 176000
},
{
"epoch": 0.58,
"learning_rate": 2.082098900443402e-05,
"loss": 2.8134,
"step": 176100
},
{
"epoch": 0.58,
"learning_rate": 2.0804419435441646e-05,
"loss": 2.8116,
"step": 176200
},
{
"epoch": 0.58,
"learning_rate": 2.0787849866449275e-05,
"loss": 2.806,
"step": 176300
},
{
"epoch": 0.58,
"learning_rate": 2.0771280297456904e-05,
"loss": 2.8031,
"step": 176400
},
{
"epoch": 0.58,
"learning_rate": 2.0754710728464534e-05,
"loss": 2.8172,
"step": 176500
},
{
"epoch": 0.59,
"learning_rate": 2.073814115947216e-05,
"loss": 2.811,
"step": 176600
},
{
"epoch": 0.59,
"learning_rate": 2.0721571590479792e-05,
"loss": 2.8066,
"step": 176700
},
{
"epoch": 0.59,
"learning_rate": 2.0705002021487418e-05,
"loss": 2.8152,
"step": 176800
},
{
"epoch": 0.59,
"learning_rate": 2.0688432452495047e-05,
"loss": 2.8248,
"step": 176900
},
{
"epoch": 0.59,
"learning_rate": 2.0671862883502673e-05,
"loss": 2.8219,
"step": 177000
},
{
"epoch": 0.59,
"learning_rate": 2.0655293314510306e-05,
"loss": 2.804,
"step": 177100
},
{
"epoch": 0.59,
"learning_rate": 2.063872374551793e-05,
"loss": 2.8149,
"step": 177200
},
{
"epoch": 0.59,
"learning_rate": 2.062215417652556e-05,
"loss": 2.8252,
"step": 177300
},
{
"epoch": 0.59,
"learning_rate": 2.060558460753319e-05,
"loss": 2.8178,
"step": 177400
},
{
"epoch": 0.59,
"learning_rate": 2.058901503854082e-05,
"loss": 2.8185,
"step": 177500
},
{
"epoch": 0.59,
"learning_rate": 2.0572445469548445e-05,
"loss": 2.8066,
"step": 177600
},
{
"epoch": 0.59,
"learning_rate": 2.0555875900556077e-05,
"loss": 2.8071,
"step": 177700
},
{
"epoch": 0.59,
"learning_rate": 2.0539306331563703e-05,
"loss": 2.8212,
"step": 177800
},
{
"epoch": 0.59,
"learning_rate": 2.0522736762571332e-05,
"loss": 2.8227,
"step": 177900
},
{
"epoch": 0.59,
"learning_rate": 2.050616719357896e-05,
"loss": 2.8075,
"step": 178000
},
{
"epoch": 0.59,
"learning_rate": 2.048959762458659e-05,
"loss": 2.8206,
"step": 178100
},
{
"epoch": 0.59,
"learning_rate": 2.0473028055594217e-05,
"loss": 2.8139,
"step": 178200
},
{
"epoch": 0.59,
"learning_rate": 2.045645848660185e-05,
"loss": 2.8105,
"step": 178300
},
{
"epoch": 0.59,
"learning_rate": 2.0439888917609475e-05,
"loss": 2.8191,
"step": 178400
},
{
"epoch": 0.59,
"learning_rate": 2.0423319348617104e-05,
"loss": 2.8104,
"step": 178500
},
{
"epoch": 0.59,
"learning_rate": 2.0406749779624733e-05,
"loss": 2.8058,
"step": 178600
},
{
"epoch": 0.59,
"learning_rate": 2.0390180210632363e-05,
"loss": 2.8192,
"step": 178700
},
{
"epoch": 0.59,
"learning_rate": 2.037361064163999e-05,
"loss": 2.8384,
"step": 178800
},
{
"epoch": 0.59,
"learning_rate": 2.035704107264762e-05,
"loss": 2.7925,
"step": 178900
},
{
"epoch": 0.59,
"learning_rate": 2.0340471503655247e-05,
"loss": 2.8159,
"step": 179000
},
{
"epoch": 0.59,
"learning_rate": 2.0323901934662876e-05,
"loss": 2.8117,
"step": 179100
},
{
"epoch": 0.59,
"learning_rate": 2.0307332365670505e-05,
"loss": 2.817,
"step": 179200
},
{
"epoch": 0.59,
"learning_rate": 2.0290762796678134e-05,
"loss": 2.8223,
"step": 179300
},
{
"epoch": 0.59,
"learning_rate": 2.027419322768576e-05,
"loss": 2.8083,
"step": 179400
},
{
"epoch": 0.59,
"learning_rate": 2.0257623658693393e-05,
"loss": 2.8172,
"step": 179500
},
{
"epoch": 0.6,
"learning_rate": 2.024105408970102e-05,
"loss": 2.8341,
"step": 179600
},
{
"epoch": 0.6,
"learning_rate": 2.0224484520708648e-05,
"loss": 2.825,
"step": 179700
},
{
"epoch": 0.6,
"learning_rate": 2.0207914951716277e-05,
"loss": 2.8128,
"step": 179800
},
{
"epoch": 0.6,
"learning_rate": 2.0191345382723906e-05,
"loss": 2.8255,
"step": 179900
},
{
"epoch": 0.6,
"learning_rate": 2.0174775813731532e-05,
"loss": 2.8036,
"step": 180000
},
{
"epoch": 0.6,
"learning_rate": 2.0158206244739165e-05,
"loss": 2.8282,
"step": 180100
},
{
"epoch": 0.6,
"learning_rate": 2.014163667574679e-05,
"loss": 2.8137,
"step": 180200
},
{
"epoch": 0.6,
"learning_rate": 2.012506710675442e-05,
"loss": 2.8104,
"step": 180300
},
{
"epoch": 0.6,
"learning_rate": 2.010849753776205e-05,
"loss": 2.8194,
"step": 180400
},
{
"epoch": 0.6,
"learning_rate": 2.0091927968769678e-05,
"loss": 2.819,
"step": 180500
},
{
"epoch": 0.6,
"learning_rate": 2.0075358399777304e-05,
"loss": 2.8179,
"step": 180600
},
{
"epoch": 0.6,
"learning_rate": 2.0058788830784937e-05,
"loss": 2.8249,
"step": 180700
},
{
"epoch": 0.6,
"learning_rate": 2.0042219261792562e-05,
"loss": 2.8103,
"step": 180800
},
{
"epoch": 0.6,
"learning_rate": 2.002564969280019e-05,
"loss": 2.807,
"step": 180900
},
{
"epoch": 0.6,
"learning_rate": 2.000908012380782e-05,
"loss": 2.8121,
"step": 181000
},
{
"epoch": 0.6,
"learning_rate": 1.999251055481545e-05,
"loss": 2.811,
"step": 181100
},
{
"epoch": 0.6,
"learning_rate": 1.9975940985823076e-05,
"loss": 2.8257,
"step": 181200
},
{
"epoch": 0.6,
"learning_rate": 1.9959371416830705e-05,
"loss": 2.8282,
"step": 181300
},
{
"epoch": 0.6,
"learning_rate": 1.9942801847838334e-05,
"loss": 2.8128,
"step": 181400
},
{
"epoch": 0.6,
"learning_rate": 1.9926232278845963e-05,
"loss": 2.8243,
"step": 181500
},
{
"epoch": 0.6,
"learning_rate": 1.990966270985359e-05,
"loss": 2.8162,
"step": 181600
},
{
"epoch": 0.6,
"learning_rate": 1.9893093140861222e-05,
"loss": 2.8257,
"step": 181700
},
{
"epoch": 0.6,
"learning_rate": 1.9876523571868848e-05,
"loss": 2.8181,
"step": 181800
},
{
"epoch": 0.6,
"learning_rate": 1.9859954002876477e-05,
"loss": 2.8182,
"step": 181900
},
{
"epoch": 0.6,
"learning_rate": 1.9843384433884106e-05,
"loss": 2.8078,
"step": 182000
},
{
"epoch": 0.6,
"learning_rate": 1.9826814864891735e-05,
"loss": 2.8149,
"step": 182100
},
{
"epoch": 0.6,
"learning_rate": 1.9810245295899365e-05,
"loss": 2.8076,
"step": 182200
},
{
"epoch": 0.6,
"learning_rate": 1.9793675726906994e-05,
"loss": 2.8153,
"step": 182300
},
{
"epoch": 0.6,
"learning_rate": 1.977710615791462e-05,
"loss": 2.8118,
"step": 182400
},
{
"epoch": 0.6,
"learning_rate": 1.976053658892225e-05,
"loss": 2.8113,
"step": 182500
},
{
"epoch": 0.61,
"learning_rate": 1.9743967019929878e-05,
"loss": 2.8169,
"step": 182600
},
{
"epoch": 0.61,
"learning_rate": 1.9727397450937507e-05,
"loss": 2.8155,
"step": 182700
},
{
"epoch": 0.61,
"learning_rate": 1.9710827881945136e-05,
"loss": 2.8186,
"step": 182800
},
{
"epoch": 0.61,
"learning_rate": 1.9694258312952766e-05,
"loss": 2.8115,
"step": 182900
},
{
"epoch": 0.61,
"learning_rate": 1.967768874396039e-05,
"loss": 2.8192,
"step": 183000
},
{
"epoch": 0.61,
"learning_rate": 1.966111917496802e-05,
"loss": 2.809,
"step": 183100
},
{
"epoch": 0.61,
"learning_rate": 1.964454960597565e-05,
"loss": 2.7983,
"step": 183200
},
{
"epoch": 0.61,
"learning_rate": 1.962798003698328e-05,
"loss": 2.8182,
"step": 183300
},
{
"epoch": 0.61,
"learning_rate": 1.9611410467990908e-05,
"loss": 2.8214,
"step": 183400
},
{
"epoch": 0.61,
"learning_rate": 1.9594840898998537e-05,
"loss": 2.814,
"step": 183500
},
{
"epoch": 0.61,
"learning_rate": 1.9578271330006163e-05,
"loss": 2.8303,
"step": 183600
},
{
"epoch": 0.61,
"learning_rate": 1.9561701761013792e-05,
"loss": 2.8126,
"step": 183700
},
{
"epoch": 0.61,
"learning_rate": 1.954513219202142e-05,
"loss": 2.8102,
"step": 183800
},
{
"epoch": 0.61,
"learning_rate": 1.952856262302905e-05,
"loss": 2.8166,
"step": 183900
},
{
"epoch": 0.61,
"learning_rate": 1.951199305403668e-05,
"loss": 2.8131,
"step": 184000
},
{
"epoch": 0.61,
"learning_rate": 1.949542348504431e-05,
"loss": 2.7997,
"step": 184100
},
{
"epoch": 0.61,
"learning_rate": 1.947885391605194e-05,
"loss": 2.823,
"step": 184200
},
{
"epoch": 0.61,
"learning_rate": 1.9462284347059564e-05,
"loss": 2.8042,
"step": 184300
},
{
"epoch": 0.61,
"learning_rate": 1.9445714778067193e-05,
"loss": 2.8128,
"step": 184400
},
{
"epoch": 0.61,
"learning_rate": 1.9429145209074823e-05,
"loss": 2.8085,
"step": 184500
},
{
"epoch": 0.61,
"learning_rate": 1.9412575640082452e-05,
"loss": 2.8068,
"step": 184600
},
{
"epoch": 0.61,
"learning_rate": 1.939600607109008e-05,
"loss": 2.8233,
"step": 184700
},
{
"epoch": 0.61,
"learning_rate": 1.937943650209771e-05,
"loss": 2.8129,
"step": 184800
},
{
"epoch": 0.61,
"learning_rate": 1.9362866933105336e-05,
"loss": 2.8207,
"step": 184900
},
{
"epoch": 0.61,
"learning_rate": 1.9346297364112965e-05,
"loss": 2.8158,
"step": 185000
},
{
"epoch": 0.61,
"learning_rate": 1.9329727795120595e-05,
"loss": 2.8159,
"step": 185100
},
{
"epoch": 0.61,
"learning_rate": 1.9313158226128224e-05,
"loss": 2.8099,
"step": 185200
},
{
"epoch": 0.61,
"learning_rate": 1.9296588657135853e-05,
"loss": 2.8152,
"step": 185300
},
{
"epoch": 0.61,
"learning_rate": 1.9280019088143482e-05,
"loss": 2.8154,
"step": 185400
},
{
"epoch": 0.61,
"learning_rate": 1.9263449519151108e-05,
"loss": 2.8035,
"step": 185500
},
{
"epoch": 0.62,
"learning_rate": 1.9246879950158737e-05,
"loss": 2.8117,
"step": 185600
},
{
"epoch": 0.62,
"learning_rate": 1.9230310381166366e-05,
"loss": 2.8184,
"step": 185700
},
{
"epoch": 0.62,
"learning_rate": 1.9213740812173996e-05,
"loss": 2.832,
"step": 185800
},
{
"epoch": 0.62,
"learning_rate": 1.919717124318162e-05,
"loss": 2.8297,
"step": 185900
},
{
"epoch": 0.62,
"learning_rate": 1.9180601674189254e-05,
"loss": 2.8125,
"step": 186000
},
{
"epoch": 0.62,
"learning_rate": 1.916403210519688e-05,
"loss": 2.818,
"step": 186100
},
{
"epoch": 0.62,
"learning_rate": 1.914746253620451e-05,
"loss": 2.8209,
"step": 186200
},
{
"epoch": 0.62,
"learning_rate": 1.9130892967212138e-05,
"loss": 2.8196,
"step": 186300
},
{
"epoch": 0.62,
"learning_rate": 1.9114323398219767e-05,
"loss": 2.822,
"step": 186400
},
{
"epoch": 0.62,
"learning_rate": 1.9097753829227393e-05,
"loss": 2.811,
"step": 186500
},
{
"epoch": 0.62,
"learning_rate": 1.9081184260235026e-05,
"loss": 2.8063,
"step": 186600
},
{
"epoch": 0.62,
"learning_rate": 1.906461469124265e-05,
"loss": 2.7992,
"step": 186700
},
{
"epoch": 0.62,
"learning_rate": 1.904804512225028e-05,
"loss": 2.8194,
"step": 186800
},
{
"epoch": 0.62,
"learning_rate": 1.903147555325791e-05,
"loss": 2.8285,
"step": 186900
},
{
"epoch": 0.62,
"learning_rate": 1.901490598426554e-05,
"loss": 2.8108,
"step": 187000
},
{
"epoch": 0.62,
"learning_rate": 1.8998336415273165e-05,
"loss": 2.8166,
"step": 187100
},
{
"epoch": 0.62,
"learning_rate": 1.8981766846280798e-05,
"loss": 2.8111,
"step": 187200
},
{
"epoch": 0.62,
"learning_rate": 1.8965197277288423e-05,
"loss": 2.8098,
"step": 187300
},
{
"epoch": 0.62,
"learning_rate": 1.8948627708296053e-05,
"loss": 2.8221,
"step": 187400
},
{
"epoch": 0.62,
"learning_rate": 1.8932058139303682e-05,
"loss": 2.8123,
"step": 187500
},
{
"epoch": 0.62,
"learning_rate": 1.891548857031131e-05,
"loss": 2.8167,
"step": 187600
},
{
"epoch": 0.62,
"learning_rate": 1.8898919001318937e-05,
"loss": 2.8177,
"step": 187700
},
{
"epoch": 0.62,
"learning_rate": 1.888234943232657e-05,
"loss": 2.8188,
"step": 187800
},
{
"epoch": 0.62,
"learning_rate": 1.8865779863334195e-05,
"loss": 2.8168,
"step": 187900
},
{
"epoch": 0.62,
"learning_rate": 1.8849210294341825e-05,
"loss": 2.8054,
"step": 188000
},
{
"epoch": 0.62,
"learning_rate": 1.8832640725349454e-05,
"loss": 2.8192,
"step": 188100
},
{
"epoch": 0.62,
"learning_rate": 1.8816071156357083e-05,
"loss": 2.8223,
"step": 188200
},
{
"epoch": 0.62,
"learning_rate": 1.879950158736471e-05,
"loss": 2.8119,
"step": 188300
},
{
"epoch": 0.62,
"learning_rate": 1.878293201837234e-05,
"loss": 2.8296,
"step": 188400
},
{
"epoch": 0.62,
"learning_rate": 1.8766362449379967e-05,
"loss": 2.7941,
"step": 188500
},
{
"epoch": 0.63,
"learning_rate": 1.8749792880387596e-05,
"loss": 2.7976,
"step": 188600
},
{
"epoch": 0.63,
"learning_rate": 1.8733223311395226e-05,
"loss": 2.8133,
"step": 188700
},
{
"epoch": 0.63,
"learning_rate": 1.8716653742402855e-05,
"loss": 2.804,
"step": 188800
},
{
"epoch": 0.63,
"learning_rate": 1.870008417341048e-05,
"loss": 2.8133,
"step": 188900
},
{
"epoch": 0.63,
"learning_rate": 1.8683514604418113e-05,
"loss": 2.8134,
"step": 189000
},
{
"epoch": 0.63,
"learning_rate": 1.866694503542574e-05,
"loss": 2.7998,
"step": 189100
},
{
"epoch": 0.63,
"learning_rate": 1.8650375466433368e-05,
"loss": 2.8182,
"step": 189200
},
{
"epoch": 0.63,
"learning_rate": 1.8633805897440997e-05,
"loss": 2.8098,
"step": 189300
},
{
"epoch": 0.63,
"learning_rate": 1.8617236328448627e-05,
"loss": 2.8126,
"step": 189400
},
{
"epoch": 0.63,
"learning_rate": 1.8600666759456252e-05,
"loss": 2.8134,
"step": 189500
},
{
"epoch": 0.63,
"learning_rate": 1.8584097190463885e-05,
"loss": 2.813,
"step": 189600
},
{
"epoch": 0.63,
"learning_rate": 1.856752762147151e-05,
"loss": 2.8101,
"step": 189700
},
{
"epoch": 0.63,
"learning_rate": 1.855095805247914e-05,
"loss": 2.817,
"step": 189800
},
{
"epoch": 0.63,
"learning_rate": 1.853438848348677e-05,
"loss": 2.8049,
"step": 189900
},
{
"epoch": 0.63,
"learning_rate": 1.85178189144944e-05,
"loss": 2.8164,
"step": 190000
},
{
"epoch": 0.63,
"learning_rate": 1.8501249345502024e-05,
"loss": 2.8254,
"step": 190100
},
{
"epoch": 0.63,
"learning_rate": 1.8484679776509654e-05,
"loss": 2.8155,
"step": 190200
},
{
"epoch": 0.63,
"learning_rate": 1.8468110207517283e-05,
"loss": 2.8126,
"step": 190300
},
{
"epoch": 0.63,
"learning_rate": 1.8451540638524912e-05,
"loss": 2.8091,
"step": 190400
},
{
"epoch": 0.63,
"learning_rate": 1.8434971069532538e-05,
"loss": 2.8069,
"step": 190500
},
{
"epoch": 0.63,
"learning_rate": 1.841840150054017e-05,
"loss": 2.8224,
"step": 190600
},
{
"epoch": 0.63,
"learning_rate": 1.8401831931547796e-05,
"loss": 2.8063,
"step": 190700
},
{
"epoch": 0.63,
"learning_rate": 1.8385262362555425e-05,
"loss": 2.8209,
"step": 190800
},
{
"epoch": 0.63,
"learning_rate": 1.8368692793563055e-05,
"loss": 2.8054,
"step": 190900
},
{
"epoch": 0.63,
"learning_rate": 1.8352123224570684e-05,
"loss": 2.8243,
"step": 191000
},
{
"epoch": 0.63,
"learning_rate": 1.833555365557831e-05,
"loss": 2.7988,
"step": 191100
},
{
"epoch": 0.63,
"learning_rate": 1.8318984086585942e-05,
"loss": 2.8011,
"step": 191200
},
{
"epoch": 0.63,
"learning_rate": 1.8302414517593568e-05,
"loss": 2.8067,
"step": 191300
},
{
"epoch": 0.63,
"learning_rate": 1.8285844948601197e-05,
"loss": 2.8035,
"step": 191400
},
{
"epoch": 0.63,
"learning_rate": 1.8269275379608826e-05,
"loss": 2.8144,
"step": 191500
},
{
"epoch": 0.63,
"learning_rate": 1.8252705810616456e-05,
"loss": 2.8148,
"step": 191600
},
{
"epoch": 0.64,
"learning_rate": 1.823613624162408e-05,
"loss": 2.8009,
"step": 191700
},
{
"epoch": 0.64,
"learning_rate": 1.8219566672631714e-05,
"loss": 2.8221,
"step": 191800
},
{
"epoch": 0.64,
"learning_rate": 1.820299710363934e-05,
"loss": 2.815,
"step": 191900
},
{
"epoch": 0.64,
"learning_rate": 1.818642753464697e-05,
"loss": 2.8123,
"step": 192000
},
{
"epoch": 0.64,
"learning_rate": 1.8169857965654598e-05,
"loss": 2.8044,
"step": 192100
},
{
"epoch": 0.64,
"learning_rate": 1.8153288396662227e-05,
"loss": 2.8102,
"step": 192200
},
{
"epoch": 0.64,
"learning_rate": 1.8136718827669853e-05,
"loss": 2.8136,
"step": 192300
},
{
"epoch": 0.64,
"learning_rate": 1.8120149258677486e-05,
"loss": 2.8101,
"step": 192400
},
{
"epoch": 0.64,
"learning_rate": 1.810357968968511e-05,
"loss": 2.8128,
"step": 192500
},
{
"epoch": 0.64,
"learning_rate": 1.808701012069274e-05,
"loss": 2.8063,
"step": 192600
},
{
"epoch": 0.64,
"learning_rate": 1.807044055170037e-05,
"loss": 2.8075,
"step": 192700
},
{
"epoch": 0.64,
"learning_rate": 1.8053870982708e-05,
"loss": 2.8321,
"step": 192800
},
{
"epoch": 0.64,
"learning_rate": 1.8037301413715625e-05,
"loss": 2.8127,
"step": 192900
},
{
"epoch": 0.64,
"learning_rate": 1.8020731844723258e-05,
"loss": 2.8159,
"step": 193000
},
{
"epoch": 0.64,
"learning_rate": 1.8004162275730884e-05,
"loss": 2.8063,
"step": 193100
},
{
"epoch": 0.64,
"learning_rate": 1.7987592706738513e-05,
"loss": 2.8077,
"step": 193200
},
{
"epoch": 0.64,
"learning_rate": 1.7971023137746142e-05,
"loss": 2.8203,
"step": 193300
},
{
"epoch": 0.64,
"learning_rate": 1.795445356875377e-05,
"loss": 2.8121,
"step": 193400
},
{
"epoch": 0.64,
"learning_rate": 1.7937883999761397e-05,
"loss": 2.7967,
"step": 193500
},
{
"epoch": 0.64,
"learning_rate": 1.792131443076903e-05,
"loss": 2.816,
"step": 193600
},
{
"epoch": 0.64,
"learning_rate": 1.7904744861776655e-05,
"loss": 2.8176,
"step": 193700
},
{
"epoch": 0.64,
"learning_rate": 1.7888175292784285e-05,
"loss": 2.8002,
"step": 193800
},
{
"epoch": 0.64,
"learning_rate": 1.7871605723791914e-05,
"loss": 2.8022,
"step": 193900
},
{
"epoch": 0.64,
"learning_rate": 1.7855036154799543e-05,
"loss": 2.8059,
"step": 194000
},
{
"epoch": 0.64,
"learning_rate": 1.783846658580717e-05,
"loss": 2.7992,
"step": 194100
},
{
"epoch": 0.64,
"learning_rate": 1.78218970168148e-05,
"loss": 2.8207,
"step": 194200
},
{
"epoch": 0.64,
"learning_rate": 1.7805327447822427e-05,
"loss": 2.8259,
"step": 194300
},
{
"epoch": 0.64,
"learning_rate": 1.7788757878830056e-05,
"loss": 2.8033,
"step": 194400
},
{
"epoch": 0.64,
"learning_rate": 1.7772188309837686e-05,
"loss": 2.8134,
"step": 194500
},
{
"epoch": 0.64,
"learning_rate": 1.7755618740845315e-05,
"loss": 2.8189,
"step": 194600
},
{
"epoch": 0.65,
"learning_rate": 1.773904917185294e-05,
"loss": 2.8071,
"step": 194700
},
{
"epoch": 0.65,
"learning_rate": 1.772247960286057e-05,
"loss": 2.8178,
"step": 194800
},
{
"epoch": 0.65,
"learning_rate": 1.77059100338682e-05,
"loss": 2.813,
"step": 194900
},
{
"epoch": 0.65,
"learning_rate": 1.7689340464875828e-05,
"loss": 2.7985,
"step": 195000
},
{
"epoch": 0.65,
"learning_rate": 1.7672770895883457e-05,
"loss": 2.8107,
"step": 195100
},
{
"epoch": 0.65,
"learning_rate": 1.7656201326891087e-05,
"loss": 2.8082,
"step": 195200
},
{
"epoch": 0.65,
"learning_rate": 1.7639631757898713e-05,
"loss": 2.8188,
"step": 195300
},
{
"epoch": 0.65,
"learning_rate": 1.7623062188906342e-05,
"loss": 2.8034,
"step": 195400
},
{
"epoch": 0.65,
"learning_rate": 1.760649261991397e-05,
"loss": 2.8043,
"step": 195500
},
{
"epoch": 0.65,
"learning_rate": 1.75899230509216e-05,
"loss": 2.8007,
"step": 195600
},
{
"epoch": 0.65,
"learning_rate": 1.757335348192923e-05,
"loss": 2.8128,
"step": 195700
},
{
"epoch": 0.65,
"learning_rate": 1.755678391293686e-05,
"loss": 2.8164,
"step": 195800
},
{
"epoch": 0.65,
"learning_rate": 1.7540214343944484e-05,
"loss": 2.7995,
"step": 195900
},
{
"epoch": 0.65,
"learning_rate": 1.7523644774952114e-05,
"loss": 2.8059,
"step": 196000
},
{
"epoch": 0.65,
"learning_rate": 1.7507075205959743e-05,
"loss": 2.8169,
"step": 196100
},
{
"epoch": 0.65,
"learning_rate": 1.7490505636967372e-05,
"loss": 2.8087,
"step": 196200
},
{
"epoch": 0.65,
"learning_rate": 1.7473936067975e-05,
"loss": 2.808,
"step": 196300
},
{
"epoch": 0.65,
"learning_rate": 1.745736649898263e-05,
"loss": 2.8044,
"step": 196400
},
{
"epoch": 0.65,
"learning_rate": 1.744079692999026e-05,
"loss": 2.8189,
"step": 196500
},
{
"epoch": 0.65,
"learning_rate": 1.7424227360997885e-05,
"loss": 2.8138,
"step": 196600
},
{
"epoch": 0.65,
"learning_rate": 1.7407657792005515e-05,
"loss": 2.8039,
"step": 196700
},
{
"epoch": 0.65,
"learning_rate": 1.7391088223013144e-05,
"loss": 2.8103,
"step": 196800
},
{
"epoch": 0.65,
"learning_rate": 1.7374518654020773e-05,
"loss": 2.7884,
"step": 196900
},
{
"epoch": 0.65,
"learning_rate": 1.7357949085028402e-05,
"loss": 2.8082,
"step": 197000
},
{
"epoch": 0.65,
"learning_rate": 1.734137951603603e-05,
"loss": 2.8049,
"step": 197100
},
{
"epoch": 0.65,
"learning_rate": 1.7324809947043657e-05,
"loss": 2.8149,
"step": 197200
},
{
"epoch": 0.65,
"learning_rate": 1.7308240378051286e-05,
"loss": 2.8118,
"step": 197300
},
{
"epoch": 0.65,
"learning_rate": 1.7291670809058916e-05,
"loss": 2.8108,
"step": 197400
},
{
"epoch": 0.65,
"learning_rate": 1.7275101240066545e-05,
"loss": 2.7971,
"step": 197500
},
{
"epoch": 0.65,
"learning_rate": 1.7258531671074174e-05,
"loss": 2.8201,
"step": 197600
},
{
"epoch": 0.66,
"learning_rate": 1.7241962102081803e-05,
"loss": 2.8153,
"step": 197700
},
{
"epoch": 0.66,
"learning_rate": 1.722539253308943e-05,
"loss": 2.8051,
"step": 197800
},
{
"epoch": 0.66,
"learning_rate": 1.7208822964097058e-05,
"loss": 2.8185,
"step": 197900
},
{
"epoch": 0.66,
"learning_rate": 1.7192253395104687e-05,
"loss": 2.8297,
"step": 198000
},
{
"epoch": 0.66,
"learning_rate": 1.7175683826112317e-05,
"loss": 2.8148,
"step": 198100
},
{
"epoch": 0.66,
"learning_rate": 1.7159114257119946e-05,
"loss": 2.7993,
"step": 198200
},
{
"epoch": 0.66,
"learning_rate": 1.7142544688127575e-05,
"loss": 2.828,
"step": 198300
},
{
"epoch": 0.66,
"learning_rate": 1.71259751191352e-05,
"loss": 2.8188,
"step": 198400
},
{
"epoch": 0.66,
"learning_rate": 1.7109405550142834e-05,
"loss": 2.808,
"step": 198500
},
{
"epoch": 0.66,
"learning_rate": 1.709283598115046e-05,
"loss": 2.8014,
"step": 198600
},
{
"epoch": 0.66,
"learning_rate": 1.707626641215809e-05,
"loss": 2.8036,
"step": 198700
},
{
"epoch": 0.66,
"learning_rate": 1.7059696843165714e-05,
"loss": 2.8044,
"step": 198800
},
{
"epoch": 0.66,
"learning_rate": 1.7043127274173347e-05,
"loss": 2.8044,
"step": 198900
},
{
"epoch": 0.66,
"learning_rate": 1.7026557705180973e-05,
"loss": 2.8278,
"step": 199000
},
{
"epoch": 0.66,
"learning_rate": 1.7009988136188602e-05,
"loss": 2.8032,
"step": 199100
},
{
"epoch": 0.66,
"learning_rate": 1.699341856719623e-05,
"loss": 2.8123,
"step": 199200
},
{
"epoch": 0.66,
"learning_rate": 1.697684899820386e-05,
"loss": 2.8093,
"step": 199300
},
{
"epoch": 0.66,
"learning_rate": 1.6960279429211486e-05,
"loss": 2.8197,
"step": 199400
},
{
"epoch": 0.66,
"learning_rate": 1.694370986021912e-05,
"loss": 2.8007,
"step": 199500
},
{
"epoch": 0.66,
"learning_rate": 1.6927140291226745e-05,
"loss": 2.8275,
"step": 199600
},
{
"epoch": 0.66,
"learning_rate": 1.6910570722234374e-05,
"loss": 2.8066,
"step": 199700
},
{
"epoch": 0.66,
"learning_rate": 1.6894001153242003e-05,
"loss": 2.8015,
"step": 199800
},
{
"epoch": 0.66,
"learning_rate": 1.6877431584249632e-05,
"loss": 2.8074,
"step": 199900
},
{
"epoch": 0.66,
"learning_rate": 1.6860862015257258e-05,
"loss": 2.7969,
"step": 200000
},
{
"epoch": 0.66,
"learning_rate": 1.684429244626489e-05,
"loss": 2.7987,
"step": 200100
},
{
"epoch": 0.66,
"learning_rate": 1.6827722877272516e-05,
"loss": 2.8081,
"step": 200200
},
{
"epoch": 0.66,
"learning_rate": 1.6811153308280146e-05,
"loss": 2.8022,
"step": 200300
},
{
"epoch": 0.66,
"learning_rate": 1.6794583739287775e-05,
"loss": 2.7974,
"step": 200400
},
{
"epoch": 0.66,
"learning_rate": 1.6778014170295404e-05,
"loss": 2.8164,
"step": 200500
},
{
"epoch": 0.66,
"learning_rate": 1.676144460130303e-05,
"loss": 2.8114,
"step": 200600
},
{
"epoch": 0.67,
"learning_rate": 1.6744875032310662e-05,
"loss": 2.8081,
"step": 200700
},
{
"epoch": 0.67,
"learning_rate": 1.672830546331829e-05,
"loss": 2.8175,
"step": 200800
},
{
"epoch": 0.67,
"learning_rate": 1.6711735894325918e-05,
"loss": 2.7864,
"step": 200900
},
{
"epoch": 0.67,
"learning_rate": 1.6695166325333547e-05,
"loss": 2.7981,
"step": 201000
},
{
"epoch": 0.67,
"learning_rate": 1.6678596756341176e-05,
"loss": 2.8254,
"step": 201100
},
{
"epoch": 0.67,
"learning_rate": 1.6662027187348802e-05,
"loss": 2.8069,
"step": 201200
},
{
"epoch": 0.67,
"learning_rate": 1.6645457618356434e-05,
"loss": 2.8021,
"step": 201300
},
{
"epoch": 0.67,
"learning_rate": 1.662888804936406e-05,
"loss": 2.7928,
"step": 201400
},
{
"epoch": 0.67,
"learning_rate": 1.661231848037169e-05,
"loss": 2.8014,
"step": 201500
},
{
"epoch": 0.67,
"learning_rate": 1.659574891137932e-05,
"loss": 2.8068,
"step": 201600
},
{
"epoch": 0.67,
"learning_rate": 1.6579179342386948e-05,
"loss": 2.8082,
"step": 201700
},
{
"epoch": 0.67,
"learning_rate": 1.6562609773394574e-05,
"loss": 2.7834,
"step": 201800
},
{
"epoch": 0.67,
"learning_rate": 1.6546040204402206e-05,
"loss": 2.797,
"step": 201900
},
{
"epoch": 0.67,
"learning_rate": 1.6529470635409832e-05,
"loss": 2.8121,
"step": 202000
},
{
"epoch": 0.67,
"learning_rate": 1.651290106641746e-05,
"loss": 2.8028,
"step": 202100
},
{
"epoch": 0.67,
"learning_rate": 1.649633149742509e-05,
"loss": 2.8093,
"step": 202200
},
{
"epoch": 0.67,
"learning_rate": 1.647976192843272e-05,
"loss": 2.8147,
"step": 202300
},
{
"epoch": 0.67,
"learning_rate": 1.6463192359440345e-05,
"loss": 2.8005,
"step": 202400
},
{
"epoch": 0.67,
"learning_rate": 1.6446622790447978e-05,
"loss": 2.819,
"step": 202500
},
{
"epoch": 0.67,
"learning_rate": 1.6430053221455604e-05,
"loss": 2.8106,
"step": 202600
},
{
"epoch": 0.67,
"learning_rate": 1.6413483652463233e-05,
"loss": 2.8106,
"step": 202700
},
{
"epoch": 0.67,
"learning_rate": 1.6396914083470862e-05,
"loss": 2.8107,
"step": 202800
},
{
"epoch": 0.67,
"learning_rate": 1.638034451447849e-05,
"loss": 2.817,
"step": 202900
},
{
"epoch": 0.67,
"learning_rate": 1.6363774945486117e-05,
"loss": 2.8081,
"step": 203000
},
{
"epoch": 0.67,
"learning_rate": 1.6347205376493746e-05,
"loss": 2.8038,
"step": 203100
},
{
"epoch": 0.67,
"learning_rate": 1.6330635807501376e-05,
"loss": 2.8241,
"step": 203200
},
{
"epoch": 0.67,
"learning_rate": 1.6314066238509005e-05,
"loss": 2.8152,
"step": 203300
},
{
"epoch": 0.67,
"learning_rate": 1.629749666951663e-05,
"loss": 2.8115,
"step": 203400
},
{
"epoch": 0.67,
"learning_rate": 1.6280927100524263e-05,
"loss": 2.8025,
"step": 203500
},
{
"epoch": 0.67,
"learning_rate": 1.626435753153189e-05,
"loss": 2.8047,
"step": 203600
},
{
"epoch": 0.68,
"learning_rate": 1.624778796253952e-05,
"loss": 2.8179,
"step": 203700
},
{
"epoch": 0.68,
"learning_rate": 1.6231218393547148e-05,
"loss": 2.8098,
"step": 203800
},
{
"epoch": 0.68,
"learning_rate": 1.6214648824554777e-05,
"loss": 2.7918,
"step": 203900
},
{
"epoch": 0.68,
"learning_rate": 1.6198079255562403e-05,
"loss": 2.7988,
"step": 204000
},
{
"epoch": 0.68,
"learning_rate": 1.6181509686570035e-05,
"loss": 2.8065,
"step": 204100
},
{
"epoch": 0.68,
"learning_rate": 1.616494011757766e-05,
"loss": 2.8161,
"step": 204200
},
{
"epoch": 0.68,
"learning_rate": 1.614837054858529e-05,
"loss": 2.8056,
"step": 204300
},
{
"epoch": 0.68,
"learning_rate": 1.613180097959292e-05,
"loss": 2.8053,
"step": 204400
},
{
"epoch": 0.68,
"learning_rate": 1.611523141060055e-05,
"loss": 2.8103,
"step": 204500
},
{
"epoch": 0.68,
"learning_rate": 1.6098661841608174e-05,
"loss": 2.8176,
"step": 204600
},
{
"epoch": 0.68,
"learning_rate": 1.6082092272615807e-05,
"loss": 2.8022,
"step": 204700
},
{
"epoch": 0.68,
"learning_rate": 1.6065522703623433e-05,
"loss": 2.8154,
"step": 204800
},
{
"epoch": 0.68,
"learning_rate": 1.6048953134631062e-05,
"loss": 2.7969,
"step": 204900
},
{
"epoch": 0.68,
"learning_rate": 1.603238356563869e-05,
"loss": 2.821,
"step": 205000
},
{
"epoch": 0.68,
"learning_rate": 1.601581399664632e-05,
"loss": 2.8173,
"step": 205100
},
{
"epoch": 0.68,
"learning_rate": 1.5999244427653946e-05,
"loss": 2.7945,
"step": 205200
},
{
"epoch": 0.68,
"learning_rate": 1.598267485866158e-05,
"loss": 2.8073,
"step": 205300
},
{
"epoch": 0.68,
"learning_rate": 1.5966105289669205e-05,
"loss": 2.7915,
"step": 205400
},
{
"epoch": 0.68,
"learning_rate": 1.5949535720676834e-05,
"loss": 2.8005,
"step": 205500
},
{
"epoch": 0.68,
"learning_rate": 1.5932966151684463e-05,
"loss": 2.8143,
"step": 205600
},
{
"epoch": 0.68,
"learning_rate": 1.5916396582692092e-05,
"loss": 2.7952,
"step": 205700
},
{
"epoch": 0.68,
"learning_rate": 1.5899827013699718e-05,
"loss": 2.8004,
"step": 205800
},
{
"epoch": 0.68,
"learning_rate": 1.588325744470735e-05,
"loss": 2.8086,
"step": 205900
},
{
"epoch": 0.68,
"learning_rate": 1.5866687875714977e-05,
"loss": 2.8023,
"step": 206000
},
{
"epoch": 0.68,
"learning_rate": 1.5850118306722606e-05,
"loss": 2.8007,
"step": 206100
},
{
"epoch": 0.68,
"learning_rate": 1.5833548737730235e-05,
"loss": 2.7934,
"step": 206200
},
{
"epoch": 0.68,
"learning_rate": 1.5816979168737864e-05,
"loss": 2.8002,
"step": 206300
},
{
"epoch": 0.68,
"learning_rate": 1.580040959974549e-05,
"loss": 2.8093,
"step": 206400
},
{
"epoch": 0.68,
"learning_rate": 1.5783840030753123e-05,
"loss": 2.8126,
"step": 206500
},
{
"epoch": 0.68,
"learning_rate": 1.576727046176075e-05,
"loss": 2.8039,
"step": 206600
},
{
"epoch": 0.68,
"learning_rate": 1.5750700892768378e-05,
"loss": 2.7816,
"step": 206700
},
{
"epoch": 0.69,
"learning_rate": 1.5734131323776007e-05,
"loss": 2.8108,
"step": 206800
},
{
"epoch": 0.69,
"learning_rate": 1.5717561754783636e-05,
"loss": 2.8126,
"step": 206900
},
{
"epoch": 0.69,
"learning_rate": 1.5700992185791262e-05,
"loss": 2.8098,
"step": 207000
},
{
"epoch": 0.69,
"learning_rate": 1.5684422616798894e-05,
"loss": 2.8087,
"step": 207100
},
{
"epoch": 0.69,
"learning_rate": 1.566785304780652e-05,
"loss": 2.7985,
"step": 207200
},
{
"epoch": 0.69,
"learning_rate": 1.565128347881415e-05,
"loss": 2.8125,
"step": 207300
},
{
"epoch": 0.69,
"learning_rate": 1.563471390982178e-05,
"loss": 2.8056,
"step": 207400
},
{
"epoch": 0.69,
"learning_rate": 1.5618144340829408e-05,
"loss": 2.7942,
"step": 207500
},
{
"epoch": 0.69,
"learning_rate": 1.5601574771837034e-05,
"loss": 2.8101,
"step": 207600
},
{
"epoch": 0.69,
"learning_rate": 1.5585005202844663e-05,
"loss": 2.8101,
"step": 207700
},
{
"epoch": 0.69,
"learning_rate": 1.5568435633852292e-05,
"loss": 2.8011,
"step": 207800
},
{
"epoch": 0.69,
"learning_rate": 1.555186606485992e-05,
"loss": 2.8057,
"step": 207900
},
{
"epoch": 0.69,
"learning_rate": 1.553529649586755e-05,
"loss": 2.8125,
"step": 208000
},
{
"epoch": 0.69,
"learning_rate": 1.551872692687518e-05,
"loss": 2.8133,
"step": 208100
},
{
"epoch": 0.69,
"learning_rate": 1.550215735788281e-05,
"loss": 2.8177,
"step": 208200
},
{
"epoch": 0.69,
"learning_rate": 1.5485587788890435e-05,
"loss": 2.8114,
"step": 208300
},
{
"epoch": 0.69,
"learning_rate": 1.5469018219898064e-05,
"loss": 2.7998,
"step": 208400
},
{
"epoch": 0.69,
"learning_rate": 1.5452448650905693e-05,
"loss": 2.8172,
"step": 208500
},
{
"epoch": 0.69,
"learning_rate": 1.5435879081913322e-05,
"loss": 2.8063,
"step": 208600
},
{
"epoch": 0.69,
"learning_rate": 1.541930951292095e-05,
"loss": 2.8236,
"step": 208700
},
{
"epoch": 0.69,
"learning_rate": 1.540273994392858e-05,
"loss": 2.8067,
"step": 208800
},
{
"epoch": 0.69,
"learning_rate": 1.5386170374936207e-05,
"loss": 2.7934,
"step": 208900
},
{
"epoch": 0.69,
"learning_rate": 1.5369600805943836e-05,
"loss": 2.7962,
"step": 209000
},
{
"epoch": 0.69,
"learning_rate": 1.5353031236951465e-05,
"loss": 2.8105,
"step": 209100
},
{
"epoch": 0.69,
"learning_rate": 1.5336461667959094e-05,
"loss": 2.7983,
"step": 209200
},
{
"epoch": 0.69,
"learning_rate": 1.5319892098966723e-05,
"loss": 2.8006,
"step": 209300
},
{
"epoch": 0.69,
"learning_rate": 1.5303322529974353e-05,
"loss": 2.8128,
"step": 209400
},
{
"epoch": 0.69,
"learning_rate": 1.528675296098198e-05,
"loss": 2.8025,
"step": 209500
},
{
"epoch": 0.69,
"learning_rate": 1.5270183391989608e-05,
"loss": 2.8118,
"step": 209600
},
{
"epoch": 0.69,
"learning_rate": 1.5253613822997237e-05,
"loss": 2.7962,
"step": 209700
},
{
"epoch": 0.7,
"learning_rate": 1.5237044254004864e-05,
"loss": 2.8027,
"step": 209800
},
{
"epoch": 0.7,
"learning_rate": 1.5220474685012495e-05,
"loss": 2.7959,
"step": 209900
},
{
"epoch": 0.7,
"learning_rate": 1.5203905116020123e-05,
"loss": 2.829,
"step": 210000
},
{
"epoch": 0.7,
"learning_rate": 1.518733554702775e-05,
"loss": 2.8201,
"step": 210100
},
{
"epoch": 0.7,
"learning_rate": 1.5170765978035381e-05,
"loss": 2.8031,
"step": 210200
},
{
"epoch": 0.7,
"learning_rate": 1.5154196409043009e-05,
"loss": 2.7975,
"step": 210300
},
{
"epoch": 0.7,
"learning_rate": 1.5137626840050636e-05,
"loss": 2.8088,
"step": 210400
},
{
"epoch": 0.7,
"learning_rate": 1.5121057271058267e-05,
"loss": 2.8125,
"step": 210500
},
{
"epoch": 0.7,
"learning_rate": 1.5104487702065895e-05,
"loss": 2.7969,
"step": 210600
},
{
"epoch": 0.7,
"learning_rate": 1.5087918133073522e-05,
"loss": 2.8081,
"step": 210700
},
{
"epoch": 0.7,
"learning_rate": 1.5071348564081153e-05,
"loss": 2.8078,
"step": 210800
},
{
"epoch": 0.7,
"learning_rate": 1.505477899508878e-05,
"loss": 2.8205,
"step": 210900
},
{
"epoch": 0.7,
"learning_rate": 1.5038209426096408e-05,
"loss": 2.8077,
"step": 211000
},
{
"epoch": 0.7,
"learning_rate": 1.5021639857104039e-05,
"loss": 2.8074,
"step": 211100
},
{
"epoch": 0.7,
"learning_rate": 1.5005070288111666e-05,
"loss": 2.808,
"step": 211200
},
{
"epoch": 0.7,
"learning_rate": 1.4988500719119294e-05,
"loss": 2.7985,
"step": 211300
},
{
"epoch": 0.7,
"learning_rate": 1.4971931150126925e-05,
"loss": 2.7872,
"step": 211400
},
{
"epoch": 0.7,
"learning_rate": 1.4955361581134552e-05,
"loss": 2.8006,
"step": 211500
},
{
"epoch": 0.7,
"learning_rate": 1.493879201214218e-05,
"loss": 2.7957,
"step": 211600
},
{
"epoch": 0.7,
"learning_rate": 1.492222244314981e-05,
"loss": 2.8026,
"step": 211700
},
{
"epoch": 0.7,
"learning_rate": 1.4905652874157438e-05,
"loss": 2.8082,
"step": 211800
},
{
"epoch": 0.7,
"learning_rate": 1.4889083305165066e-05,
"loss": 2.8116,
"step": 211900
},
{
"epoch": 0.7,
"learning_rate": 1.4872513736172693e-05,
"loss": 2.8229,
"step": 212000
},
{
"epoch": 0.7,
"learning_rate": 1.4855944167180324e-05,
"loss": 2.815,
"step": 212100
},
{
"epoch": 0.7,
"learning_rate": 1.4839374598187952e-05,
"loss": 2.7938,
"step": 212200
},
{
"epoch": 0.7,
"learning_rate": 1.482280502919558e-05,
"loss": 2.7942,
"step": 212300
},
{
"epoch": 0.7,
"learning_rate": 1.480623546020321e-05,
"loss": 2.8179,
"step": 212400
},
{
"epoch": 0.7,
"learning_rate": 1.4789665891210838e-05,
"loss": 2.8054,
"step": 212500
},
{
"epoch": 0.7,
"learning_rate": 1.4773096322218467e-05,
"loss": 2.821,
"step": 212600
},
{
"epoch": 0.7,
"learning_rate": 1.4756526753226096e-05,
"loss": 2.8068,
"step": 212700
},
{
"epoch": 0.71,
"learning_rate": 1.4739957184233724e-05,
"loss": 2.801,
"step": 212800
},
{
"epoch": 0.71,
"learning_rate": 1.4723387615241353e-05,
"loss": 2.8085,
"step": 212900
},
{
"epoch": 0.71,
"learning_rate": 1.4706818046248982e-05,
"loss": 2.7926,
"step": 213000
},
{
"epoch": 0.71,
"learning_rate": 1.469024847725661e-05,
"loss": 2.8012,
"step": 213100
},
{
"epoch": 0.71,
"learning_rate": 1.4673678908264239e-05,
"loss": 2.8187,
"step": 213200
},
{
"epoch": 0.71,
"learning_rate": 1.4657109339271868e-05,
"loss": 2.8052,
"step": 213300
},
{
"epoch": 0.71,
"learning_rate": 1.4640539770279495e-05,
"loss": 2.8094,
"step": 213400
},
{
"epoch": 0.71,
"learning_rate": 1.4623970201287125e-05,
"loss": 2.8182,
"step": 213500
},
{
"epoch": 0.71,
"learning_rate": 1.4607400632294754e-05,
"loss": 2.7888,
"step": 213600
},
{
"epoch": 0.71,
"learning_rate": 1.4590831063302381e-05,
"loss": 2.7989,
"step": 213700
},
{
"epoch": 0.71,
"learning_rate": 1.457426149431001e-05,
"loss": 2.8045,
"step": 213800
},
{
"epoch": 0.71,
"learning_rate": 1.455769192531764e-05,
"loss": 2.8113,
"step": 213900
},
{
"epoch": 0.71,
"learning_rate": 1.4541122356325267e-05,
"loss": 2.8009,
"step": 214000
},
{
"epoch": 0.71,
"learning_rate": 1.4524552787332896e-05,
"loss": 2.8006,
"step": 214100
},
{
"epoch": 0.71,
"learning_rate": 1.4507983218340526e-05,
"loss": 2.8047,
"step": 214200
},
{
"epoch": 0.71,
"learning_rate": 1.4491413649348153e-05,
"loss": 2.8001,
"step": 214300
},
{
"epoch": 0.71,
"learning_rate": 1.4474844080355782e-05,
"loss": 2.8117,
"step": 214400
},
{
"epoch": 0.71,
"learning_rate": 1.4458274511363412e-05,
"loss": 2.8099,
"step": 214500
},
{
"epoch": 0.71,
"learning_rate": 1.4441704942371039e-05,
"loss": 2.7991,
"step": 214600
},
{
"epoch": 0.71,
"learning_rate": 1.4425135373378668e-05,
"loss": 2.8038,
"step": 214700
},
{
"epoch": 0.71,
"learning_rate": 1.4408565804386297e-05,
"loss": 2.8054,
"step": 214800
},
{
"epoch": 0.71,
"learning_rate": 1.4391996235393927e-05,
"loss": 2.8033,
"step": 214900
},
{
"epoch": 0.71,
"learning_rate": 1.4375426666401554e-05,
"loss": 2.8129,
"step": 215000
},
{
"epoch": 0.71,
"learning_rate": 1.4358857097409183e-05,
"loss": 2.8142,
"step": 215100
},
{
"epoch": 0.71,
"learning_rate": 1.4342287528416813e-05,
"loss": 2.7965,
"step": 215200
},
{
"epoch": 0.71,
"learning_rate": 1.432571795942444e-05,
"loss": 2.794,
"step": 215300
},
{
"epoch": 0.71,
"learning_rate": 1.430914839043207e-05,
"loss": 2.8018,
"step": 215400
},
{
"epoch": 0.71,
"learning_rate": 1.4292578821439699e-05,
"loss": 2.8016,
"step": 215500
},
{
"epoch": 0.71,
"learning_rate": 1.4276009252447326e-05,
"loss": 2.7977,
"step": 215600
},
{
"epoch": 0.71,
"learning_rate": 1.4259439683454955e-05,
"loss": 2.807,
"step": 215700
},
{
"epoch": 0.72,
"learning_rate": 1.4242870114462584e-05,
"loss": 2.798,
"step": 215800
},
{
"epoch": 0.72,
"learning_rate": 1.4226300545470212e-05,
"loss": 2.8158,
"step": 215900
},
{
"epoch": 0.72,
"learning_rate": 1.4209730976477841e-05,
"loss": 2.8197,
"step": 216000
},
{
"epoch": 0.72,
"learning_rate": 1.419316140748547e-05,
"loss": 2.8001,
"step": 216100
},
{
"epoch": 0.72,
"learning_rate": 1.4176591838493098e-05,
"loss": 2.7944,
"step": 216200
},
{
"epoch": 0.72,
"learning_rate": 1.4160022269500725e-05,
"loss": 2.7972,
"step": 216300
},
{
"epoch": 0.72,
"learning_rate": 1.4143452700508356e-05,
"loss": 2.8078,
"step": 216400
},
{
"epoch": 0.72,
"learning_rate": 1.4126883131515984e-05,
"loss": 2.8172,
"step": 216500
},
{
"epoch": 0.72,
"learning_rate": 1.4110313562523611e-05,
"loss": 2.794,
"step": 216600
},
{
"epoch": 0.72,
"learning_rate": 1.4093743993531242e-05,
"loss": 2.8154,
"step": 216700
},
{
"epoch": 0.72,
"learning_rate": 1.407717442453887e-05,
"loss": 2.8125,
"step": 216800
},
{
"epoch": 0.72,
"learning_rate": 1.4060604855546497e-05,
"loss": 2.8029,
"step": 216900
},
{
"epoch": 0.72,
"learning_rate": 1.4044035286554128e-05,
"loss": 2.7891,
"step": 217000
},
{
"epoch": 0.72,
"learning_rate": 1.4027465717561756e-05,
"loss": 2.8173,
"step": 217100
},
{
"epoch": 0.72,
"learning_rate": 1.4010896148569383e-05,
"loss": 2.8205,
"step": 217200
},
{
"epoch": 0.72,
"learning_rate": 1.3994326579577014e-05,
"loss": 2.8021,
"step": 217300
},
{
"epoch": 0.72,
"learning_rate": 1.3977757010584642e-05,
"loss": 2.8031,
"step": 217400
},
{
"epoch": 0.72,
"learning_rate": 1.3961187441592269e-05,
"loss": 2.7877,
"step": 217500
},
{
"epoch": 0.72,
"learning_rate": 1.39446178725999e-05,
"loss": 2.8076,
"step": 217600
},
{
"epoch": 0.72,
"learning_rate": 1.3928048303607527e-05,
"loss": 2.8166,
"step": 217700
},
{
"epoch": 0.72,
"learning_rate": 1.3911478734615155e-05,
"loss": 2.8112,
"step": 217800
},
{
"epoch": 0.72,
"learning_rate": 1.3894909165622786e-05,
"loss": 2.8036,
"step": 217900
},
{
"epoch": 0.72,
"learning_rate": 1.3878339596630413e-05,
"loss": 2.8021,
"step": 218000
},
{
"epoch": 0.72,
"learning_rate": 1.3861770027638041e-05,
"loss": 2.7983,
"step": 218100
},
{
"epoch": 0.72,
"learning_rate": 1.3845200458645672e-05,
"loss": 2.7951,
"step": 218200
},
{
"epoch": 0.72,
"learning_rate": 1.38286308896533e-05,
"loss": 2.7867,
"step": 218300
},
{
"epoch": 0.72,
"learning_rate": 1.3812061320660927e-05,
"loss": 2.7987,
"step": 218400
},
{
"epoch": 0.72,
"learning_rate": 1.3795491751668558e-05,
"loss": 2.8081,
"step": 218500
},
{
"epoch": 0.72,
"learning_rate": 1.3778922182676185e-05,
"loss": 2.8216,
"step": 218600
},
{
"epoch": 0.72,
"learning_rate": 1.3762352613683813e-05,
"loss": 2.803,
"step": 218700
},
{
"epoch": 0.73,
"learning_rate": 1.3745783044691444e-05,
"loss": 2.8087,
"step": 218800
},
{
"epoch": 0.73,
"learning_rate": 1.3729213475699071e-05,
"loss": 2.7958,
"step": 218900
},
{
"epoch": 0.73,
"learning_rate": 1.3712643906706699e-05,
"loss": 2.8047,
"step": 219000
},
{
"epoch": 0.73,
"learning_rate": 1.369607433771433e-05,
"loss": 2.813,
"step": 219100
},
{
"epoch": 0.73,
"learning_rate": 1.3679504768721957e-05,
"loss": 2.7964,
"step": 219200
},
{
"epoch": 0.73,
"learning_rate": 1.3662935199729585e-05,
"loss": 2.7875,
"step": 219300
},
{
"epoch": 0.73,
"learning_rate": 1.3646365630737216e-05,
"loss": 2.7965,
"step": 219400
},
{
"epoch": 0.73,
"learning_rate": 1.3629796061744843e-05,
"loss": 2.8001,
"step": 219500
},
{
"epoch": 0.73,
"learning_rate": 1.361322649275247e-05,
"loss": 2.804,
"step": 219600
},
{
"epoch": 0.73,
"learning_rate": 1.3596656923760101e-05,
"loss": 2.7974,
"step": 219700
},
{
"epoch": 0.73,
"learning_rate": 1.3580087354767729e-05,
"loss": 2.8127,
"step": 219800
},
{
"epoch": 0.73,
"learning_rate": 1.3563517785775356e-05,
"loss": 2.798,
"step": 219900
},
{
"epoch": 0.73,
"learning_rate": 1.3546948216782987e-05,
"loss": 2.7953,
"step": 220000
},
{
"epoch": 0.73,
"learning_rate": 1.3530378647790615e-05,
"loss": 2.8075,
"step": 220100
},
{
"epoch": 0.73,
"learning_rate": 1.3513809078798242e-05,
"loss": 2.7986,
"step": 220200
},
{
"epoch": 0.73,
"learning_rate": 1.3497239509805873e-05,
"loss": 2.8043,
"step": 220300
},
{
"epoch": 0.73,
"learning_rate": 1.34806699408135e-05,
"loss": 2.8051,
"step": 220400
},
{
"epoch": 0.73,
"learning_rate": 1.3464100371821128e-05,
"loss": 2.7975,
"step": 220500
},
{
"epoch": 0.73,
"learning_rate": 1.3447530802828756e-05,
"loss": 2.8062,
"step": 220600
},
{
"epoch": 0.73,
"learning_rate": 1.3430961233836387e-05,
"loss": 2.8,
"step": 220700
},
{
"epoch": 0.73,
"learning_rate": 1.3414391664844014e-05,
"loss": 2.8003,
"step": 220800
},
{
"epoch": 0.73,
"learning_rate": 1.3397822095851642e-05,
"loss": 2.7956,
"step": 220900
},
{
"epoch": 0.73,
"learning_rate": 1.3381252526859273e-05,
"loss": 2.7963,
"step": 221000
},
{
"epoch": 0.73,
"learning_rate": 1.33646829578669e-05,
"loss": 2.7872,
"step": 221100
},
{
"epoch": 0.73,
"learning_rate": 1.3348113388874528e-05,
"loss": 2.7966,
"step": 221200
},
{
"epoch": 0.73,
"learning_rate": 1.3331543819882159e-05,
"loss": 2.7877,
"step": 221300
},
{
"epoch": 0.73,
"learning_rate": 1.3314974250889786e-05,
"loss": 2.8018,
"step": 221400
},
{
"epoch": 0.73,
"learning_rate": 1.3298404681897414e-05,
"loss": 2.7986,
"step": 221500
},
{
"epoch": 0.73,
"learning_rate": 1.3281835112905044e-05,
"loss": 2.8149,
"step": 221600
},
{
"epoch": 0.73,
"learning_rate": 1.3265265543912672e-05,
"loss": 2.8071,
"step": 221700
},
{
"epoch": 0.74,
"learning_rate": 1.32486959749203e-05,
"loss": 2.7898,
"step": 221800
},
{
"epoch": 0.74,
"learning_rate": 1.323212640592793e-05,
"loss": 2.8228,
"step": 221900
},
{
"epoch": 0.74,
"learning_rate": 1.3215556836935558e-05,
"loss": 2.8034,
"step": 222000
},
{
"epoch": 0.74,
"learning_rate": 1.3198987267943185e-05,
"loss": 2.8031,
"step": 222100
},
{
"epoch": 0.74,
"learning_rate": 1.3182417698950816e-05,
"loss": 2.8061,
"step": 222200
},
{
"epoch": 0.74,
"learning_rate": 1.3165848129958444e-05,
"loss": 2.8017,
"step": 222300
},
{
"epoch": 0.74,
"learning_rate": 1.3149278560966071e-05,
"loss": 2.7965,
"step": 222400
},
{
"epoch": 0.74,
"learning_rate": 1.3132708991973702e-05,
"loss": 2.7895,
"step": 222500
},
{
"epoch": 0.74,
"learning_rate": 1.311613942298133e-05,
"loss": 2.8115,
"step": 222600
},
{
"epoch": 0.74,
"learning_rate": 1.3099569853988957e-05,
"loss": 2.7969,
"step": 222700
},
{
"epoch": 0.74,
"learning_rate": 1.3083000284996588e-05,
"loss": 2.8003,
"step": 222800
},
{
"epoch": 0.74,
"learning_rate": 1.3066430716004216e-05,
"loss": 2.7997,
"step": 222900
},
{
"epoch": 0.74,
"learning_rate": 1.3049861147011843e-05,
"loss": 2.8208,
"step": 223000
},
{
"epoch": 0.74,
"learning_rate": 1.3033291578019474e-05,
"loss": 2.7945,
"step": 223100
},
{
"epoch": 0.74,
"learning_rate": 1.3016722009027102e-05,
"loss": 2.7896,
"step": 223200
},
{
"epoch": 0.74,
"learning_rate": 1.3000152440034729e-05,
"loss": 2.804,
"step": 223300
},
{
"epoch": 0.74,
"learning_rate": 1.298358287104236e-05,
"loss": 2.7876,
"step": 223400
},
{
"epoch": 0.74,
"learning_rate": 1.2967013302049988e-05,
"loss": 2.8006,
"step": 223500
},
{
"epoch": 0.74,
"learning_rate": 1.2950443733057615e-05,
"loss": 2.8074,
"step": 223600
},
{
"epoch": 0.74,
"learning_rate": 1.2933874164065246e-05,
"loss": 2.8075,
"step": 223700
},
{
"epoch": 0.74,
"learning_rate": 1.2917304595072873e-05,
"loss": 2.7999,
"step": 223800
},
{
"epoch": 0.74,
"learning_rate": 1.2900735026080501e-05,
"loss": 2.8016,
"step": 223900
},
{
"epoch": 0.74,
"learning_rate": 1.2884165457088132e-05,
"loss": 2.8105,
"step": 224000
},
{
"epoch": 0.74,
"learning_rate": 1.286759588809576e-05,
"loss": 2.8093,
"step": 224100
},
{
"epoch": 0.74,
"learning_rate": 1.2851026319103387e-05,
"loss": 2.8134,
"step": 224200
},
{
"epoch": 0.74,
"learning_rate": 1.2834456750111018e-05,
"loss": 2.7924,
"step": 224300
},
{
"epoch": 0.74,
"learning_rate": 1.2817887181118645e-05,
"loss": 2.7957,
"step": 224400
},
{
"epoch": 0.74,
"learning_rate": 1.2801317612126273e-05,
"loss": 2.8064,
"step": 224500
},
{
"epoch": 0.74,
"learning_rate": 1.2784748043133904e-05,
"loss": 2.8068,
"step": 224600
},
{
"epoch": 0.74,
"learning_rate": 1.2768178474141531e-05,
"loss": 2.8111,
"step": 224700
},
{
"epoch": 0.74,
"learning_rate": 1.2751608905149159e-05,
"loss": 2.7981,
"step": 224800
},
{
"epoch": 0.75,
"learning_rate": 1.2735039336156788e-05,
"loss": 2.8208,
"step": 224900
},
{
"epoch": 0.75,
"learning_rate": 1.2718469767164417e-05,
"loss": 2.7987,
"step": 225000
},
{
"epoch": 0.75,
"learning_rate": 1.2701900198172045e-05,
"loss": 2.7943,
"step": 225100
},
{
"epoch": 0.75,
"learning_rate": 1.2685330629179674e-05,
"loss": 2.7964,
"step": 225200
},
{
"epoch": 0.75,
"learning_rate": 1.2668761060187303e-05,
"loss": 2.8105,
"step": 225300
},
{
"epoch": 0.75,
"learning_rate": 1.265219149119493e-05,
"loss": 2.8107,
"step": 225400
},
{
"epoch": 0.75,
"learning_rate": 1.263562192220256e-05,
"loss": 2.7927,
"step": 225500
},
{
"epoch": 0.75,
"learning_rate": 1.2619052353210189e-05,
"loss": 2.805,
"step": 225600
},
{
"epoch": 0.75,
"learning_rate": 1.2602482784217816e-05,
"loss": 2.7914,
"step": 225700
},
{
"epoch": 0.75,
"learning_rate": 1.2585913215225446e-05,
"loss": 2.8051,
"step": 225800
},
{
"epoch": 0.75,
"learning_rate": 1.2569343646233075e-05,
"loss": 2.8145,
"step": 225900
},
{
"epoch": 0.75,
"learning_rate": 1.2552774077240702e-05,
"loss": 2.8195,
"step": 226000
},
{
"epoch": 0.75,
"learning_rate": 1.2536204508248332e-05,
"loss": 2.7926,
"step": 226100
},
{
"epoch": 0.75,
"learning_rate": 1.251963493925596e-05,
"loss": 2.816,
"step": 226200
},
{
"epoch": 0.75,
"learning_rate": 1.2503065370263588e-05,
"loss": 2.7998,
"step": 226300
},
{
"epoch": 0.75,
"learning_rate": 1.2486495801271218e-05,
"loss": 2.8003,
"step": 226400
},
{
"epoch": 0.75,
"learning_rate": 1.2469926232278847e-05,
"loss": 2.8041,
"step": 226500
},
{
"epoch": 0.75,
"learning_rate": 1.2453356663286474e-05,
"loss": 2.8037,
"step": 226600
},
{
"epoch": 0.75,
"learning_rate": 1.2436787094294103e-05,
"loss": 2.8039,
"step": 226700
},
{
"epoch": 0.75,
"learning_rate": 1.2420217525301733e-05,
"loss": 2.8026,
"step": 226800
},
{
"epoch": 0.75,
"learning_rate": 1.2403647956309362e-05,
"loss": 2.7925,
"step": 226900
},
{
"epoch": 0.75,
"learning_rate": 1.238707838731699e-05,
"loss": 2.8019,
"step": 227000
},
{
"epoch": 0.75,
"learning_rate": 1.2370508818324619e-05,
"loss": 2.7935,
"step": 227100
},
{
"epoch": 0.75,
"learning_rate": 1.2353939249332248e-05,
"loss": 2.7985,
"step": 227200
},
{
"epoch": 0.75,
"learning_rate": 1.2337369680339875e-05,
"loss": 2.7963,
"step": 227300
},
{
"epoch": 0.75,
"learning_rate": 1.2320800111347505e-05,
"loss": 2.8077,
"step": 227400
},
{
"epoch": 0.75,
"learning_rate": 1.2304230542355134e-05,
"loss": 2.7995,
"step": 227500
},
{
"epoch": 0.75,
"learning_rate": 1.2287660973362761e-05,
"loss": 2.7965,
"step": 227600
},
{
"epoch": 0.75,
"learning_rate": 1.227109140437039e-05,
"loss": 2.8032,
"step": 227700
},
{
"epoch": 0.75,
"learning_rate": 1.225452183537802e-05,
"loss": 2.8021,
"step": 227800
},
{
"epoch": 0.76,
"learning_rate": 1.2237952266385649e-05,
"loss": 2.789,
"step": 227900
},
{
"epoch": 0.76,
"learning_rate": 1.2221382697393276e-05,
"loss": 2.7973,
"step": 228000
},
{
"epoch": 0.76,
"learning_rate": 1.2204813128400906e-05,
"loss": 2.8172,
"step": 228100
},
{
"epoch": 0.76,
"learning_rate": 1.2188243559408533e-05,
"loss": 2.8075,
"step": 228200
},
{
"epoch": 0.76,
"learning_rate": 1.2171673990416162e-05,
"loss": 2.8054,
"step": 228300
},
{
"epoch": 0.76,
"learning_rate": 1.2155104421423791e-05,
"loss": 2.7892,
"step": 228400
},
{
"epoch": 0.76,
"learning_rate": 1.2138534852431419e-05,
"loss": 2.8069,
"step": 228500
},
{
"epoch": 0.76,
"learning_rate": 1.2121965283439048e-05,
"loss": 2.7996,
"step": 228600
},
{
"epoch": 0.76,
"learning_rate": 1.2105395714446677e-05,
"loss": 2.7989,
"step": 228700
},
{
"epoch": 0.76,
"learning_rate": 1.2088826145454305e-05,
"loss": 2.7901,
"step": 228800
},
{
"epoch": 0.76,
"learning_rate": 1.2072256576461934e-05,
"loss": 2.7823,
"step": 228900
},
{
"epoch": 0.76,
"learning_rate": 1.2055687007469563e-05,
"loss": 2.8011,
"step": 229000
},
{
"epoch": 0.76,
"learning_rate": 1.203911743847719e-05,
"loss": 2.8124,
"step": 229100
},
{
"epoch": 0.76,
"learning_rate": 1.202254786948482e-05,
"loss": 2.7928,
"step": 229200
},
{
"epoch": 0.76,
"learning_rate": 1.200597830049245e-05,
"loss": 2.7947,
"step": 229300
},
{
"epoch": 0.76,
"learning_rate": 1.1989408731500077e-05,
"loss": 2.797,
"step": 229400
},
{
"epoch": 0.76,
"learning_rate": 1.1972839162507706e-05,
"loss": 2.7977,
"step": 229500
},
{
"epoch": 0.76,
"learning_rate": 1.1956269593515335e-05,
"loss": 2.7909,
"step": 229600
},
{
"epoch": 0.76,
"learning_rate": 1.1939700024522963e-05,
"loss": 2.808,
"step": 229700
},
{
"epoch": 0.76,
"learning_rate": 1.1923130455530592e-05,
"loss": 2.8082,
"step": 229800
},
{
"epoch": 0.76,
"learning_rate": 1.1906560886538221e-05,
"loss": 2.8051,
"step": 229900
},
{
"epoch": 0.76,
"learning_rate": 1.1889991317545849e-05,
"loss": 2.8147,
"step": 230000
},
{
"epoch": 0.76,
"learning_rate": 1.1873421748553478e-05,
"loss": 2.7934,
"step": 230100
},
{
"epoch": 0.76,
"learning_rate": 1.1856852179561107e-05,
"loss": 2.8092,
"step": 230200
},
{
"epoch": 0.76,
"learning_rate": 1.1840282610568735e-05,
"loss": 2.7891,
"step": 230300
},
{
"epoch": 0.76,
"learning_rate": 1.1823713041576364e-05,
"loss": 2.79,
"step": 230400
},
{
"epoch": 0.76,
"learning_rate": 1.1807143472583991e-05,
"loss": 2.8015,
"step": 230500
},
{
"epoch": 0.76,
"learning_rate": 1.179057390359162e-05,
"loss": 2.7894,
"step": 230600
},
{
"epoch": 0.76,
"learning_rate": 1.177400433459925e-05,
"loss": 2.8033,
"step": 230700
},
{
"epoch": 0.76,
"learning_rate": 1.1757434765606877e-05,
"loss": 2.787,
"step": 230800
},
{
"epoch": 0.77,
"learning_rate": 1.1740865196614506e-05,
"loss": 2.7938,
"step": 230900
},
{
"epoch": 0.77,
"learning_rate": 1.1724295627622136e-05,
"loss": 2.7933,
"step": 231000
},
{
"epoch": 0.77,
"learning_rate": 1.1707726058629763e-05,
"loss": 2.7957,
"step": 231100
},
{
"epoch": 0.77,
"learning_rate": 1.1691156489637392e-05,
"loss": 2.8062,
"step": 231200
},
{
"epoch": 0.77,
"learning_rate": 1.1674586920645022e-05,
"loss": 2.7989,
"step": 231300
},
{
"epoch": 0.77,
"learning_rate": 1.1658017351652649e-05,
"loss": 2.8011,
"step": 231400
},
{
"epoch": 0.77,
"learning_rate": 1.1641447782660278e-05,
"loss": 2.7988,
"step": 231500
},
{
"epoch": 0.77,
"learning_rate": 1.1624878213667907e-05,
"loss": 2.7983,
"step": 231600
},
{
"epoch": 0.77,
"learning_rate": 1.1608308644675535e-05,
"loss": 2.7977,
"step": 231700
},
{
"epoch": 0.77,
"learning_rate": 1.1591739075683164e-05,
"loss": 2.7991,
"step": 231800
},
{
"epoch": 0.77,
"learning_rate": 1.1575169506690793e-05,
"loss": 2.8152,
"step": 231900
},
{
"epoch": 0.77,
"learning_rate": 1.1558599937698421e-05,
"loss": 2.7951,
"step": 232000
},
{
"epoch": 0.77,
"learning_rate": 1.154203036870605e-05,
"loss": 2.7953,
"step": 232100
},
{
"epoch": 0.77,
"learning_rate": 1.152546079971368e-05,
"loss": 2.7927,
"step": 232200
},
{
"epoch": 0.77,
"learning_rate": 1.1508891230721307e-05,
"loss": 2.7958,
"step": 232300
},
{
"epoch": 0.77,
"learning_rate": 1.1492321661728936e-05,
"loss": 2.7947,
"step": 232400
},
{
"epoch": 0.77,
"learning_rate": 1.1475752092736563e-05,
"loss": 2.7933,
"step": 232500
},
{
"epoch": 0.77,
"learning_rate": 1.1459182523744193e-05,
"loss": 2.7931,
"step": 232600
},
{
"epoch": 0.77,
"learning_rate": 1.1442612954751822e-05,
"loss": 2.7974,
"step": 232700
},
{
"epoch": 0.77,
"learning_rate": 1.142604338575945e-05,
"loss": 2.804,
"step": 232800
},
{
"epoch": 0.77,
"learning_rate": 1.1409473816767079e-05,
"loss": 2.8083,
"step": 232900
},
{
"epoch": 0.77,
"learning_rate": 1.1392904247774708e-05,
"loss": 2.7934,
"step": 233000
},
{
"epoch": 0.77,
"learning_rate": 1.1376334678782335e-05,
"loss": 2.8075,
"step": 233100
},
{
"epoch": 0.77,
"learning_rate": 1.1359765109789965e-05,
"loss": 2.8058,
"step": 233200
},
{
"epoch": 0.77,
"learning_rate": 1.1343195540797594e-05,
"loss": 2.807,
"step": 233300
},
{
"epoch": 0.77,
"learning_rate": 1.1326625971805221e-05,
"loss": 2.8115,
"step": 233400
},
{
"epoch": 0.77,
"learning_rate": 1.131005640281285e-05,
"loss": 2.7982,
"step": 233500
},
{
"epoch": 0.77,
"learning_rate": 1.129348683382048e-05,
"loss": 2.7937,
"step": 233600
},
{
"epoch": 0.77,
"learning_rate": 1.1276917264828107e-05,
"loss": 2.7942,
"step": 233700
},
{
"epoch": 0.77,
"learning_rate": 1.1260347695835736e-05,
"loss": 2.7913,
"step": 233800
},
{
"epoch": 0.78,
"learning_rate": 1.1243778126843366e-05,
"loss": 2.7943,
"step": 233900
},
{
"epoch": 0.78,
"learning_rate": 1.1227208557850993e-05,
"loss": 2.8015,
"step": 234000
},
{
"epoch": 0.78,
"learning_rate": 1.1210638988858622e-05,
"loss": 2.7855,
"step": 234100
},
{
"epoch": 0.78,
"learning_rate": 1.1194069419866252e-05,
"loss": 2.7987,
"step": 234200
},
{
"epoch": 0.78,
"learning_rate": 1.1177499850873879e-05,
"loss": 2.8015,
"step": 234300
},
{
"epoch": 0.78,
"learning_rate": 1.1160930281881508e-05,
"loss": 2.7842,
"step": 234400
},
{
"epoch": 0.78,
"learning_rate": 1.1144360712889137e-05,
"loss": 2.8001,
"step": 234500
},
{
"epoch": 0.78,
"learning_rate": 1.1127791143896765e-05,
"loss": 2.8048,
"step": 234600
},
{
"epoch": 0.78,
"learning_rate": 1.1111221574904394e-05,
"loss": 2.8104,
"step": 234700
},
{
"epoch": 0.78,
"learning_rate": 1.1094652005912022e-05,
"loss": 2.804,
"step": 234800
},
{
"epoch": 0.78,
"learning_rate": 1.1078082436919651e-05,
"loss": 2.7977,
"step": 234900
},
{
"epoch": 0.78,
"learning_rate": 1.106151286792728e-05,
"loss": 2.8038,
"step": 235000
},
{
"epoch": 0.78,
"learning_rate": 1.1044943298934908e-05,
"loss": 2.7877,
"step": 235100
},
{
"epoch": 0.78,
"learning_rate": 1.1028373729942537e-05,
"loss": 2.8045,
"step": 235200
},
{
"epoch": 0.78,
"learning_rate": 1.1011804160950166e-05,
"loss": 2.779,
"step": 235300
},
{
"epoch": 0.78,
"learning_rate": 1.0995234591957794e-05,
"loss": 2.8029,
"step": 235400
},
{
"epoch": 0.78,
"learning_rate": 1.0978665022965423e-05,
"loss": 2.8032,
"step": 235500
},
{
"epoch": 0.78,
"learning_rate": 1.0962095453973052e-05,
"loss": 2.8026,
"step": 235600
},
{
"epoch": 0.78,
"learning_rate": 1.094552588498068e-05,
"loss": 2.8083,
"step": 235700
},
{
"epoch": 0.78,
"learning_rate": 1.0928956315988309e-05,
"loss": 2.8082,
"step": 235800
},
{
"epoch": 0.78,
"learning_rate": 1.0912386746995938e-05,
"loss": 2.7937,
"step": 235900
},
{
"epoch": 0.78,
"learning_rate": 1.0895817178003565e-05,
"loss": 2.8044,
"step": 236000
},
{
"epoch": 0.78,
"learning_rate": 1.0879247609011195e-05,
"loss": 2.8067,
"step": 236100
},
{
"epoch": 0.78,
"learning_rate": 1.0862678040018824e-05,
"loss": 2.7925,
"step": 236200
},
{
"epoch": 0.78,
"learning_rate": 1.0846108471026451e-05,
"loss": 2.7925,
"step": 236300
},
{
"epoch": 0.78,
"learning_rate": 1.082953890203408e-05,
"loss": 2.8049,
"step": 236400
},
{
"epoch": 0.78,
"learning_rate": 1.081296933304171e-05,
"loss": 2.8132,
"step": 236500
},
{
"epoch": 0.78,
"learning_rate": 1.0796399764049337e-05,
"loss": 2.8048,
"step": 236600
},
{
"epoch": 0.78,
"learning_rate": 1.0779830195056966e-05,
"loss": 2.8026,
"step": 236700
},
{
"epoch": 0.78,
"learning_rate": 1.0763260626064596e-05,
"loss": 2.8067,
"step": 236800
},
{
"epoch": 0.79,
"learning_rate": 1.0746691057072223e-05,
"loss": 2.787,
"step": 236900
},
{
"epoch": 0.79,
"learning_rate": 1.0730121488079852e-05,
"loss": 2.7979,
"step": 237000
},
{
"epoch": 0.79,
"learning_rate": 1.071355191908748e-05,
"loss": 2.7932,
"step": 237100
},
{
"epoch": 0.79,
"learning_rate": 1.0696982350095109e-05,
"loss": 2.802,
"step": 237200
},
{
"epoch": 0.79,
"learning_rate": 1.0680412781102738e-05,
"loss": 2.8177,
"step": 237300
},
{
"epoch": 0.79,
"learning_rate": 1.0663843212110366e-05,
"loss": 2.7809,
"step": 237400
},
{
"epoch": 0.79,
"learning_rate": 1.0647273643117995e-05,
"loss": 2.7873,
"step": 237500
},
{
"epoch": 0.79,
"learning_rate": 1.0630704074125624e-05,
"loss": 2.786,
"step": 237600
},
{
"epoch": 0.79,
"learning_rate": 1.0614134505133252e-05,
"loss": 2.7932,
"step": 237700
},
{
"epoch": 0.79,
"learning_rate": 1.0597564936140881e-05,
"loss": 2.7884,
"step": 237800
},
{
"epoch": 0.79,
"learning_rate": 1.058099536714851e-05,
"loss": 2.7919,
"step": 237900
},
{
"epoch": 0.79,
"learning_rate": 1.0564425798156138e-05,
"loss": 2.803,
"step": 238000
},
{
"epoch": 0.79,
"learning_rate": 1.0547856229163767e-05,
"loss": 2.7975,
"step": 238100
},
{
"epoch": 0.79,
"learning_rate": 1.0531286660171396e-05,
"loss": 2.7978,
"step": 238200
},
{
"epoch": 0.79,
"learning_rate": 1.0514717091179024e-05,
"loss": 2.7793,
"step": 238300
},
{
"epoch": 0.79,
"learning_rate": 1.0498147522186653e-05,
"loss": 2.7983,
"step": 238400
},
{
"epoch": 0.79,
"learning_rate": 1.0481577953194282e-05,
"loss": 2.7956,
"step": 238500
},
{
"epoch": 0.79,
"learning_rate": 1.0465008384201911e-05,
"loss": 2.7919,
"step": 238600
},
{
"epoch": 0.79,
"learning_rate": 1.0448438815209539e-05,
"loss": 2.7938,
"step": 238700
},
{
"epoch": 0.79,
"learning_rate": 1.0431869246217168e-05,
"loss": 2.8103,
"step": 238800
},
{
"epoch": 0.79,
"learning_rate": 1.0415299677224797e-05,
"loss": 2.7991,
"step": 238900
},
{
"epoch": 0.79,
"learning_rate": 1.0398730108232425e-05,
"loss": 2.7835,
"step": 239000
},
{
"epoch": 0.79,
"learning_rate": 1.0382160539240054e-05,
"loss": 2.7907,
"step": 239100
},
{
"epoch": 0.79,
"learning_rate": 1.0365590970247683e-05,
"loss": 2.7978,
"step": 239200
},
{
"epoch": 0.79,
"learning_rate": 1.034902140125531e-05,
"loss": 2.8048,
"step": 239300
},
{
"epoch": 0.79,
"learning_rate": 1.033245183226294e-05,
"loss": 2.7936,
"step": 239400
},
{
"epoch": 0.79,
"learning_rate": 1.0315882263270569e-05,
"loss": 2.8048,
"step": 239500
},
{
"epoch": 0.79,
"learning_rate": 1.0299312694278198e-05,
"loss": 2.7763,
"step": 239600
},
{
"epoch": 0.79,
"learning_rate": 1.0282743125285826e-05,
"loss": 2.7866,
"step": 239700
},
{
"epoch": 0.79,
"learning_rate": 1.0266173556293455e-05,
"loss": 2.8019,
"step": 239800
},
{
"epoch": 0.8,
"learning_rate": 1.0249603987301084e-05,
"loss": 2.7858,
"step": 239900
},
{
"epoch": 0.8,
"learning_rate": 1.0233034418308712e-05,
"loss": 2.7955,
"step": 240000
},
{
"epoch": 0.8,
"learning_rate": 1.021646484931634e-05,
"loss": 2.7887,
"step": 240100
},
{
"epoch": 0.8,
"learning_rate": 1.019989528032397e-05,
"loss": 2.7992,
"step": 240200
},
{
"epoch": 0.8,
"learning_rate": 1.0183325711331597e-05,
"loss": 2.7958,
"step": 240300
},
{
"epoch": 0.8,
"learning_rate": 1.0166756142339227e-05,
"loss": 2.8098,
"step": 240400
},
{
"epoch": 0.8,
"learning_rate": 1.0150186573346856e-05,
"loss": 2.7861,
"step": 240500
},
{
"epoch": 0.8,
"learning_rate": 1.0133617004354483e-05,
"loss": 2.8054,
"step": 240600
},
{
"epoch": 0.8,
"learning_rate": 1.0117047435362113e-05,
"loss": 2.7879,
"step": 240700
},
{
"epoch": 0.8,
"learning_rate": 1.0100477866369742e-05,
"loss": 2.7921,
"step": 240800
},
{
"epoch": 0.8,
"learning_rate": 1.008390829737737e-05,
"loss": 2.8056,
"step": 240900
},
{
"epoch": 0.8,
"learning_rate": 1.0067338728384999e-05,
"loss": 2.7939,
"step": 241000
},
{
"epoch": 0.8,
"learning_rate": 1.0050769159392628e-05,
"loss": 2.7893,
"step": 241100
},
{
"epoch": 0.8,
"learning_rate": 1.0034199590400255e-05,
"loss": 2.8002,
"step": 241200
},
{
"epoch": 0.8,
"learning_rate": 1.0017630021407884e-05,
"loss": 2.8005,
"step": 241300
},
{
"epoch": 0.8,
"learning_rate": 1.0001060452415512e-05,
"loss": 2.8,
"step": 241400
},
{
"epoch": 0.8,
"learning_rate": 9.984490883423141e-06,
"loss": 2.8067,
"step": 241500
},
{
"epoch": 0.8,
"learning_rate": 9.96792131443077e-06,
"loss": 2.7818,
"step": 241600
},
{
"epoch": 0.8,
"learning_rate": 9.951351745438398e-06,
"loss": 2.8016,
"step": 241700
},
{
"epoch": 0.8,
"learning_rate": 9.934782176446027e-06,
"loss": 2.7912,
"step": 241800
},
{
"epoch": 0.8,
"learning_rate": 9.918212607453656e-06,
"loss": 2.7861,
"step": 241900
},
{
"epoch": 0.8,
"learning_rate": 9.901643038461284e-06,
"loss": 2.796,
"step": 242000
},
{
"epoch": 0.8,
"learning_rate": 9.885073469468913e-06,
"loss": 2.7967,
"step": 242100
},
{
"epoch": 0.8,
"learning_rate": 9.868503900476542e-06,
"loss": 2.7855,
"step": 242200
},
{
"epoch": 0.8,
"learning_rate": 9.85193433148417e-06,
"loss": 2.7982,
"step": 242300
},
{
"epoch": 0.8,
"learning_rate": 9.835364762491799e-06,
"loss": 2.7869,
"step": 242400
},
{
"epoch": 0.8,
"learning_rate": 9.818795193499428e-06,
"loss": 2.8147,
"step": 242500
},
{
"epoch": 0.8,
"learning_rate": 9.802225624507056e-06,
"loss": 2.8093,
"step": 242600
},
{
"epoch": 0.8,
"learning_rate": 9.785656055514685e-06,
"loss": 2.7887,
"step": 242700
},
{
"epoch": 0.8,
"learning_rate": 9.769086486522314e-06,
"loss": 2.7951,
"step": 242800
},
{
"epoch": 0.8,
"learning_rate": 9.752516917529942e-06,
"loss": 2.7954,
"step": 242900
},
{
"epoch": 0.81,
"learning_rate": 9.73594734853757e-06,
"loss": 2.7778,
"step": 243000
},
{
"epoch": 0.81,
"learning_rate": 9.7193777795452e-06,
"loss": 2.7967,
"step": 243100
},
{
"epoch": 0.81,
"learning_rate": 9.702808210552828e-06,
"loss": 2.7833,
"step": 243200
},
{
"epoch": 0.81,
"learning_rate": 9.686238641560457e-06,
"loss": 2.8055,
"step": 243300
},
{
"epoch": 0.81,
"learning_rate": 9.669669072568084e-06,
"loss": 2.7981,
"step": 243400
},
{
"epoch": 0.81,
"learning_rate": 9.653099503575713e-06,
"loss": 2.7958,
"step": 243500
},
{
"epoch": 0.81,
"learning_rate": 9.636529934583343e-06,
"loss": 2.8037,
"step": 243600
},
{
"epoch": 0.81,
"learning_rate": 9.61996036559097e-06,
"loss": 2.7925,
"step": 243700
},
{
"epoch": 0.81,
"learning_rate": 9.6033907965986e-06,
"loss": 2.7985,
"step": 243800
},
{
"epoch": 0.81,
"learning_rate": 9.586821227606229e-06,
"loss": 2.7936,
"step": 243900
},
{
"epoch": 0.81,
"learning_rate": 9.570251658613856e-06,
"loss": 2.8121,
"step": 244000
},
{
"epoch": 0.81,
"learning_rate": 9.553682089621485e-06,
"loss": 2.8018,
"step": 244100
},
{
"epoch": 0.81,
"learning_rate": 9.537112520629114e-06,
"loss": 2.7848,
"step": 244200
},
{
"epoch": 0.81,
"learning_rate": 9.520542951636742e-06,
"loss": 2.794,
"step": 244300
},
{
"epoch": 0.81,
"learning_rate": 9.503973382644371e-06,
"loss": 2.7946,
"step": 244400
},
{
"epoch": 0.81,
"learning_rate": 9.487403813652e-06,
"loss": 2.809,
"step": 244500
},
{
"epoch": 0.81,
"learning_rate": 9.470834244659628e-06,
"loss": 2.7967,
"step": 244600
},
{
"epoch": 0.81,
"learning_rate": 9.454264675667257e-06,
"loss": 2.796,
"step": 244700
},
{
"epoch": 0.81,
"learning_rate": 9.437695106674886e-06,
"loss": 2.8046,
"step": 244800
},
{
"epoch": 0.81,
"learning_rate": 9.421125537682514e-06,
"loss": 2.7974,
"step": 244900
},
{
"epoch": 0.81,
"learning_rate": 9.404555968690143e-06,
"loss": 2.7823,
"step": 245000
},
{
"epoch": 0.81,
"learning_rate": 9.387986399697772e-06,
"loss": 2.7973,
"step": 245100
},
{
"epoch": 0.81,
"learning_rate": 9.3714168307054e-06,
"loss": 2.7902,
"step": 245200
},
{
"epoch": 0.81,
"learning_rate": 9.354847261713029e-06,
"loss": 2.7852,
"step": 245300
},
{
"epoch": 0.81,
"learning_rate": 9.338277692720658e-06,
"loss": 2.7927,
"step": 245400
},
{
"epoch": 0.81,
"learning_rate": 9.321708123728286e-06,
"loss": 2.7946,
"step": 245500
},
{
"epoch": 0.81,
"learning_rate": 9.305138554735915e-06,
"loss": 2.8045,
"step": 245600
},
{
"epoch": 0.81,
"learning_rate": 9.288568985743542e-06,
"loss": 2.7943,
"step": 245700
},
{
"epoch": 0.81,
"learning_rate": 9.271999416751172e-06,
"loss": 2.7966,
"step": 245800
},
{
"epoch": 0.81,
"learning_rate": 9.2554298477588e-06,
"loss": 2.7951,
"step": 245900
},
{
"epoch": 0.82,
"learning_rate": 9.238860278766428e-06,
"loss": 2.782,
"step": 246000
},
{
"epoch": 0.82,
"learning_rate": 9.222290709774058e-06,
"loss": 2.7998,
"step": 246100
},
{
"epoch": 0.82,
"learning_rate": 9.205721140781687e-06,
"loss": 2.7864,
"step": 246200
},
{
"epoch": 0.82,
"learning_rate": 9.189151571789314e-06,
"loss": 2.7991,
"step": 246300
},
{
"epoch": 0.82,
"learning_rate": 9.172582002796943e-06,
"loss": 2.7967,
"step": 246400
},
{
"epoch": 0.82,
"learning_rate": 9.156012433804573e-06,
"loss": 2.7894,
"step": 246500
},
{
"epoch": 0.82,
"learning_rate": 9.1394428648122e-06,
"loss": 2.7994,
"step": 246600
},
{
"epoch": 0.82,
"learning_rate": 9.12287329581983e-06,
"loss": 2.8036,
"step": 246700
},
{
"epoch": 0.82,
"learning_rate": 9.106303726827459e-06,
"loss": 2.7973,
"step": 246800
},
{
"epoch": 0.82,
"learning_rate": 9.089734157835086e-06,
"loss": 2.805,
"step": 246900
},
{
"epoch": 0.82,
"learning_rate": 9.073164588842715e-06,
"loss": 2.7782,
"step": 247000
},
{
"epoch": 0.82,
"learning_rate": 9.056595019850344e-06,
"loss": 2.7765,
"step": 247100
},
{
"epoch": 0.82,
"learning_rate": 9.040025450857972e-06,
"loss": 2.7964,
"step": 247200
},
{
"epoch": 0.82,
"learning_rate": 9.023455881865601e-06,
"loss": 2.7897,
"step": 247300
},
{
"epoch": 0.82,
"learning_rate": 9.00688631287323e-06,
"loss": 2.7858,
"step": 247400
},
{
"epoch": 0.82,
"learning_rate": 8.990316743880858e-06,
"loss": 2.7934,
"step": 247500
},
{
"epoch": 0.82,
"learning_rate": 8.973747174888487e-06,
"loss": 2.7956,
"step": 247600
},
{
"epoch": 0.82,
"learning_rate": 8.957177605896116e-06,
"loss": 2.7839,
"step": 247700
},
{
"epoch": 0.82,
"learning_rate": 8.940608036903744e-06,
"loss": 2.8125,
"step": 247800
},
{
"epoch": 0.82,
"learning_rate": 8.924038467911373e-06,
"loss": 2.8129,
"step": 247900
},
{
"epoch": 0.82,
"learning_rate": 8.907468898919e-06,
"loss": 2.8038,
"step": 248000
},
{
"epoch": 0.82,
"learning_rate": 8.89089932992663e-06,
"loss": 2.8047,
"step": 248100
},
{
"epoch": 0.82,
"learning_rate": 8.874329760934259e-06,
"loss": 2.7867,
"step": 248200
},
{
"epoch": 0.82,
"learning_rate": 8.857760191941886e-06,
"loss": 2.8021,
"step": 248300
},
{
"epoch": 0.82,
"learning_rate": 8.841190622949516e-06,
"loss": 2.7963,
"step": 248400
},
{
"epoch": 0.82,
"learning_rate": 8.824621053957145e-06,
"loss": 2.7938,
"step": 248500
},
{
"epoch": 0.82,
"learning_rate": 8.808051484964772e-06,
"loss": 2.7806,
"step": 248600
},
{
"epoch": 0.82,
"learning_rate": 8.791481915972402e-06,
"loss": 2.7969,
"step": 248700
},
{
"epoch": 0.82,
"learning_rate": 8.77491234698003e-06,
"loss": 2.8023,
"step": 248800
},
{
"epoch": 0.82,
"learning_rate": 8.758342777987658e-06,
"loss": 2.796,
"step": 248900
},
{
"epoch": 0.83,
"learning_rate": 8.741773208995288e-06,
"loss": 2.7882,
"step": 249000
},
{
"epoch": 0.83,
"learning_rate": 8.725203640002917e-06,
"loss": 2.7916,
"step": 249100
},
{
"epoch": 0.83,
"learning_rate": 8.708634071010544e-06,
"loss": 2.7876,
"step": 249200
},
{
"epoch": 0.83,
"learning_rate": 8.692064502018173e-06,
"loss": 2.7946,
"step": 249300
},
{
"epoch": 0.83,
"learning_rate": 8.675494933025803e-06,
"loss": 2.8042,
"step": 249400
},
{
"epoch": 0.83,
"learning_rate": 8.65892536403343e-06,
"loss": 2.7642,
"step": 249500
},
{
"epoch": 0.83,
"learning_rate": 8.64235579504106e-06,
"loss": 2.8043,
"step": 249600
},
{
"epoch": 0.83,
"learning_rate": 8.625786226048689e-06,
"loss": 2.8079,
"step": 249700
},
{
"epoch": 0.83,
"learning_rate": 8.609216657056316e-06,
"loss": 2.8011,
"step": 249800
},
{
"epoch": 0.83,
"learning_rate": 8.592647088063945e-06,
"loss": 2.7961,
"step": 249900
},
{
"epoch": 0.83,
"learning_rate": 8.576077519071573e-06,
"loss": 2.7989,
"step": 250000
},
{
"epoch": 0.83,
"learning_rate": 8.559507950079202e-06,
"loss": 2.7862,
"step": 250100
},
{
"epoch": 0.83,
"learning_rate": 8.542938381086831e-06,
"loss": 2.7805,
"step": 250200
},
{
"epoch": 0.83,
"learning_rate": 8.52636881209446e-06,
"loss": 2.8054,
"step": 250300
},
{
"epoch": 0.83,
"learning_rate": 8.509799243102088e-06,
"loss": 2.8029,
"step": 250400
},
{
"epoch": 0.83,
"learning_rate": 8.493229674109717e-06,
"loss": 2.7964,
"step": 250500
},
{
"epoch": 0.83,
"learning_rate": 8.476660105117346e-06,
"loss": 2.784,
"step": 250600
},
{
"epoch": 0.83,
"learning_rate": 8.460090536124974e-06,
"loss": 2.7888,
"step": 250700
},
{
"epoch": 0.83,
"learning_rate": 8.443520967132603e-06,
"loss": 2.7974,
"step": 250800
},
{
"epoch": 0.83,
"learning_rate": 8.426951398140232e-06,
"loss": 2.8005,
"step": 250900
},
{
"epoch": 0.83,
"learning_rate": 8.41038182914786e-06,
"loss": 2.8013,
"step": 251000
},
{
"epoch": 0.83,
"learning_rate": 8.393812260155489e-06,
"loss": 2.8026,
"step": 251100
},
{
"epoch": 0.83,
"learning_rate": 8.377242691163118e-06,
"loss": 2.8061,
"step": 251200
},
{
"epoch": 0.83,
"learning_rate": 8.360673122170746e-06,
"loss": 2.8008,
"step": 251300
},
{
"epoch": 0.83,
"learning_rate": 8.344103553178375e-06,
"loss": 2.8072,
"step": 251400
},
{
"epoch": 0.83,
"learning_rate": 8.327533984186004e-06,
"loss": 2.7958,
"step": 251500
},
{
"epoch": 0.83,
"learning_rate": 8.310964415193633e-06,
"loss": 2.7957,
"step": 251600
},
{
"epoch": 0.83,
"learning_rate": 8.29439484620126e-06,
"loss": 2.7955,
"step": 251700
},
{
"epoch": 0.83,
"learning_rate": 8.27782527720889e-06,
"loss": 2.7905,
"step": 251800
},
{
"epoch": 0.83,
"learning_rate": 8.26125570821652e-06,
"loss": 2.7914,
"step": 251900
},
{
"epoch": 0.84,
"learning_rate": 8.244686139224147e-06,
"loss": 2.7988,
"step": 252000
},
{
"epoch": 0.84,
"learning_rate": 8.228116570231776e-06,
"loss": 2.7998,
"step": 252100
},
{
"epoch": 0.84,
"learning_rate": 8.211547001239405e-06,
"loss": 2.7933,
"step": 252200
},
{
"epoch": 0.84,
"learning_rate": 8.194977432247033e-06,
"loss": 2.8,
"step": 252300
},
{
"epoch": 0.84,
"learning_rate": 8.178407863254662e-06,
"loss": 2.7943,
"step": 252400
},
{
"epoch": 0.84,
"learning_rate": 8.161838294262291e-06,
"loss": 2.7924,
"step": 252500
},
{
"epoch": 0.84,
"learning_rate": 8.145268725269919e-06,
"loss": 2.7823,
"step": 252600
},
{
"epoch": 0.84,
"learning_rate": 8.128699156277548e-06,
"loss": 2.7859,
"step": 252700
},
{
"epoch": 0.84,
"learning_rate": 8.112129587285177e-06,
"loss": 2.7725,
"step": 252800
},
{
"epoch": 0.84,
"learning_rate": 8.095560018292805e-06,
"loss": 2.797,
"step": 252900
},
{
"epoch": 0.84,
"learning_rate": 8.078990449300434e-06,
"loss": 2.801,
"step": 253000
},
{
"epoch": 0.84,
"learning_rate": 8.062420880308063e-06,
"loss": 2.7907,
"step": 253100
},
{
"epoch": 0.84,
"learning_rate": 8.04585131131569e-06,
"loss": 2.7974,
"step": 253200
},
{
"epoch": 0.84,
"learning_rate": 8.02928174232332e-06,
"loss": 2.7871,
"step": 253300
},
{
"epoch": 0.84,
"learning_rate": 8.012712173330949e-06,
"loss": 2.8006,
"step": 253400
},
{
"epoch": 0.84,
"learning_rate": 7.996142604338576e-06,
"loss": 2.8086,
"step": 253500
},
{
"epoch": 0.84,
"learning_rate": 7.979573035346206e-06,
"loss": 2.7921,
"step": 253600
},
{
"epoch": 0.84,
"learning_rate": 7.963003466353835e-06,
"loss": 2.78,
"step": 253700
},
{
"epoch": 0.84,
"learning_rate": 7.946433897361462e-06,
"loss": 2.8049,
"step": 253800
},
{
"epoch": 0.84,
"learning_rate": 7.929864328369092e-06,
"loss": 2.799,
"step": 253900
},
{
"epoch": 0.84,
"learning_rate": 7.91329475937672e-06,
"loss": 2.7912,
"step": 254000
},
{
"epoch": 0.84,
"learning_rate": 7.896725190384348e-06,
"loss": 2.7894,
"step": 254100
},
{
"epoch": 0.84,
"learning_rate": 7.880155621391977e-06,
"loss": 2.7886,
"step": 254200
},
{
"epoch": 0.84,
"learning_rate": 7.863586052399607e-06,
"loss": 2.8055,
"step": 254300
},
{
"epoch": 0.84,
"learning_rate": 7.847016483407234e-06,
"loss": 2.7853,
"step": 254400
},
{
"epoch": 0.84,
"learning_rate": 7.830446914414863e-06,
"loss": 2.8003,
"step": 254500
},
{
"epoch": 0.84,
"learning_rate": 7.813877345422491e-06,
"loss": 2.789,
"step": 254600
},
{
"epoch": 0.84,
"learning_rate": 7.79730777643012e-06,
"loss": 2.7753,
"step": 254700
},
{
"epoch": 0.84,
"learning_rate": 7.78073820743775e-06,
"loss": 2.7865,
"step": 254800
},
{
"epoch": 0.84,
"learning_rate": 7.764168638445377e-06,
"loss": 2.794,
"step": 254900
},
{
"epoch": 0.85,
"learning_rate": 7.747599069453006e-06,
"loss": 2.7952,
"step": 255000
},
{
"epoch": 0.85,
"learning_rate": 7.731029500460635e-06,
"loss": 2.7921,
"step": 255100
},
{
"epoch": 0.85,
"learning_rate": 7.714459931468263e-06,
"loss": 2.7876,
"step": 255200
},
{
"epoch": 0.85,
"learning_rate": 7.697890362475892e-06,
"loss": 2.7814,
"step": 255300
},
{
"epoch": 0.85,
"learning_rate": 7.681320793483521e-06,
"loss": 2.8002,
"step": 255400
},
{
"epoch": 0.85,
"learning_rate": 7.664751224491149e-06,
"loss": 2.8032,
"step": 255500
},
{
"epoch": 0.85,
"learning_rate": 7.648181655498778e-06,
"loss": 2.7835,
"step": 255600
},
{
"epoch": 0.85,
"learning_rate": 7.631612086506407e-06,
"loss": 2.7929,
"step": 255700
},
{
"epoch": 0.85,
"learning_rate": 7.6150425175140345e-06,
"loss": 2.7988,
"step": 255800
},
{
"epoch": 0.85,
"learning_rate": 7.598472948521664e-06,
"loss": 2.8067,
"step": 255900
},
{
"epoch": 0.85,
"learning_rate": 7.581903379529293e-06,
"loss": 2.8045,
"step": 256000
},
{
"epoch": 0.85,
"learning_rate": 7.5653338105369205e-06,
"loss": 2.7995,
"step": 256100
},
{
"epoch": 0.85,
"learning_rate": 7.54876424154455e-06,
"loss": 2.8029,
"step": 256200
},
{
"epoch": 0.85,
"learning_rate": 7.532194672552179e-06,
"loss": 2.7954,
"step": 256300
},
{
"epoch": 0.85,
"learning_rate": 7.515625103559806e-06,
"loss": 2.7837,
"step": 256400
},
{
"epoch": 0.85,
"learning_rate": 7.499055534567436e-06,
"loss": 2.7949,
"step": 256500
},
{
"epoch": 0.85,
"learning_rate": 7.482485965575063e-06,
"loss": 2.7919,
"step": 256600
},
{
"epoch": 0.85,
"learning_rate": 7.465916396582692e-06,
"loss": 2.8026,
"step": 256700
},
{
"epoch": 0.85,
"learning_rate": 7.4493468275903215e-06,
"loss": 2.7935,
"step": 256800
},
{
"epoch": 0.85,
"learning_rate": 7.432777258597949e-06,
"loss": 2.7939,
"step": 256900
},
{
"epoch": 0.85,
"learning_rate": 7.416207689605578e-06,
"loss": 2.7933,
"step": 257000
},
{
"epoch": 0.85,
"learning_rate": 7.3996381206132074e-06,
"loss": 2.7919,
"step": 257100
},
{
"epoch": 0.85,
"learning_rate": 7.383068551620835e-06,
"loss": 2.7945,
"step": 257200
},
{
"epoch": 0.85,
"learning_rate": 7.366498982628464e-06,
"loss": 2.7986,
"step": 257300
},
{
"epoch": 0.85,
"learning_rate": 7.349929413636093e-06,
"loss": 2.8011,
"step": 257400
},
{
"epoch": 0.85,
"learning_rate": 7.333359844643721e-06,
"loss": 2.7867,
"step": 257500
},
{
"epoch": 0.85,
"learning_rate": 7.31679027565135e-06,
"loss": 2.7911,
"step": 257600
},
{
"epoch": 0.85,
"learning_rate": 7.300220706658979e-06,
"loss": 2.7914,
"step": 257700
},
{
"epoch": 0.85,
"learning_rate": 7.283651137666607e-06,
"loss": 2.806,
"step": 257800
},
{
"epoch": 0.85,
"learning_rate": 7.267081568674236e-06,
"loss": 2.8052,
"step": 257900
},
{
"epoch": 0.85,
"learning_rate": 7.250511999681865e-06,
"loss": 2.7932,
"step": 258000
},
{
"epoch": 0.86,
"learning_rate": 7.233942430689493e-06,
"loss": 2.7814,
"step": 258100
},
{
"epoch": 0.86,
"learning_rate": 7.217372861697122e-06,
"loss": 2.7874,
"step": 258200
},
{
"epoch": 0.86,
"learning_rate": 7.200803292704751e-06,
"loss": 2.7936,
"step": 258300
},
{
"epoch": 0.86,
"learning_rate": 7.184233723712379e-06,
"loss": 2.7914,
"step": 258400
},
{
"epoch": 0.86,
"learning_rate": 7.167664154720008e-06,
"loss": 2.7894,
"step": 258500
},
{
"epoch": 0.86,
"learning_rate": 7.151094585727637e-06,
"loss": 2.7948,
"step": 258600
},
{
"epoch": 0.86,
"learning_rate": 7.1345250167352646e-06,
"loss": 2.7816,
"step": 258700
},
{
"epoch": 0.86,
"learning_rate": 7.117955447742894e-06,
"loss": 2.8043,
"step": 258800
},
{
"epoch": 0.86,
"learning_rate": 7.101385878750521e-06,
"loss": 2.7861,
"step": 258900
},
{
"epoch": 0.86,
"learning_rate": 7.0848163097581505e-06,
"loss": 2.8,
"step": 259000
},
{
"epoch": 0.86,
"learning_rate": 7.06824674076578e-06,
"loss": 2.7769,
"step": 259100
},
{
"epoch": 0.86,
"learning_rate": 7.051677171773407e-06,
"loss": 2.7788,
"step": 259200
},
{
"epoch": 0.86,
"learning_rate": 7.035107602781036e-06,
"loss": 2.7893,
"step": 259300
},
{
"epoch": 0.86,
"learning_rate": 7.018538033788666e-06,
"loss": 2.7944,
"step": 259400
},
{
"epoch": 0.86,
"learning_rate": 7.001968464796293e-06,
"loss": 2.7912,
"step": 259500
},
{
"epoch": 0.86,
"learning_rate": 6.985398895803922e-06,
"loss": 2.788,
"step": 259600
},
{
"epoch": 0.86,
"learning_rate": 6.9688293268115515e-06,
"loss": 2.7855,
"step": 259700
},
{
"epoch": 0.86,
"learning_rate": 6.95225975781918e-06,
"loss": 2.7968,
"step": 259800
},
{
"epoch": 0.86,
"learning_rate": 6.935690188826808e-06,
"loss": 2.8003,
"step": 259900
},
{
"epoch": 0.86,
"learning_rate": 6.9191206198344375e-06,
"loss": 2.797,
"step": 260000
},
{
"epoch": 0.86,
"learning_rate": 6.902551050842066e-06,
"loss": 2.7815,
"step": 260100
},
{
"epoch": 0.86,
"learning_rate": 6.885981481849694e-06,
"loss": 2.8013,
"step": 260200
},
{
"epoch": 0.86,
"learning_rate": 6.869411912857323e-06,
"loss": 2.8079,
"step": 260300
},
{
"epoch": 0.86,
"learning_rate": 6.852842343864952e-06,
"loss": 2.7889,
"step": 260400
},
{
"epoch": 0.86,
"learning_rate": 6.83627277487258e-06,
"loss": 2.7833,
"step": 260500
},
{
"epoch": 0.86,
"learning_rate": 6.819703205880209e-06,
"loss": 2.8068,
"step": 260600
},
{
"epoch": 0.86,
"learning_rate": 6.803133636887838e-06,
"loss": 2.7961,
"step": 260700
},
{
"epoch": 0.86,
"learning_rate": 6.786564067895466e-06,
"loss": 2.7919,
"step": 260800
},
{
"epoch": 0.86,
"learning_rate": 6.769994498903094e-06,
"loss": 2.8013,
"step": 260900
},
{
"epoch": 0.86,
"learning_rate": 6.753424929910724e-06,
"loss": 2.7801,
"step": 261000
},
{
"epoch": 0.87,
"learning_rate": 6.736855360918353e-06,
"loss": 2.7916,
"step": 261100
},
{
"epoch": 0.87,
"learning_rate": 6.72028579192598e-06,
"loss": 2.7992,
"step": 261200
},
{
"epoch": 0.87,
"learning_rate": 6.7037162229336095e-06,
"loss": 2.7991,
"step": 261300
},
{
"epoch": 0.87,
"learning_rate": 6.687146653941239e-06,
"loss": 2.7962,
"step": 261400
},
{
"epoch": 0.87,
"learning_rate": 6.670577084948866e-06,
"loss": 2.7893,
"step": 261500
},
{
"epoch": 0.87,
"learning_rate": 6.6540075159564954e-06,
"loss": 2.7992,
"step": 261600
},
{
"epoch": 0.87,
"learning_rate": 6.637437946964125e-06,
"loss": 2.7971,
"step": 261700
},
{
"epoch": 0.87,
"learning_rate": 6.620868377971752e-06,
"loss": 2.7827,
"step": 261800
},
{
"epoch": 0.87,
"learning_rate": 6.604298808979381e-06,
"loss": 2.7888,
"step": 261900
},
{
"epoch": 0.87,
"learning_rate": 6.5877292399870106e-06,
"loss": 2.7953,
"step": 262000
},
{
"epoch": 0.87,
"learning_rate": 6.571159670994638e-06,
"loss": 2.7796,
"step": 262100
},
{
"epoch": 0.87,
"learning_rate": 6.554590102002267e-06,
"loss": 2.8095,
"step": 262200
},
{
"epoch": 0.87,
"learning_rate": 6.5380205330098965e-06,
"loss": 2.8021,
"step": 262300
},
{
"epoch": 0.87,
"learning_rate": 6.521450964017524e-06,
"loss": 2.7859,
"step": 262400
},
{
"epoch": 0.87,
"learning_rate": 6.504881395025153e-06,
"loss": 2.7844,
"step": 262500
},
{
"epoch": 0.87,
"learning_rate": 6.488311826032782e-06,
"loss": 2.7912,
"step": 262600
},
{
"epoch": 0.87,
"learning_rate": 6.47174225704041e-06,
"loss": 2.7971,
"step": 262700
},
{
"epoch": 0.87,
"learning_rate": 6.455172688048039e-06,
"loss": 2.7795,
"step": 262800
},
{
"epoch": 0.87,
"learning_rate": 6.438603119055668e-06,
"loss": 2.7986,
"step": 262900
},
{
"epoch": 0.87,
"learning_rate": 6.422033550063296e-06,
"loss": 2.7888,
"step": 263000
},
{
"epoch": 0.87,
"learning_rate": 6.405463981070925e-06,
"loss": 2.7905,
"step": 263100
},
{
"epoch": 0.87,
"learning_rate": 6.3888944120785526e-06,
"loss": 2.7899,
"step": 263200
},
{
"epoch": 0.87,
"learning_rate": 6.372324843086182e-06,
"loss": 2.7947,
"step": 263300
},
{
"epoch": 0.87,
"learning_rate": 6.355755274093811e-06,
"loss": 2.7919,
"step": 263400
},
{
"epoch": 0.87,
"learning_rate": 6.3391857051014385e-06,
"loss": 2.7922,
"step": 263500
},
{
"epoch": 0.87,
"learning_rate": 6.322616136109068e-06,
"loss": 2.8088,
"step": 263600
},
{
"epoch": 0.87,
"learning_rate": 6.306046567116697e-06,
"loss": 2.7944,
"step": 263700
},
{
"epoch": 0.87,
"learning_rate": 6.289476998124324e-06,
"loss": 2.7931,
"step": 263800
},
{
"epoch": 0.87,
"learning_rate": 6.272907429131954e-06,
"loss": 2.7952,
"step": 263900
},
{
"epoch": 0.87,
"learning_rate": 6.256337860139583e-06,
"loss": 2.7892,
"step": 264000
},
{
"epoch": 0.88,
"learning_rate": 6.239768291147211e-06,
"loss": 2.7722,
"step": 264100
},
{
"epoch": 0.88,
"learning_rate": 6.2231987221548395e-06,
"loss": 2.7827,
"step": 264200
},
{
"epoch": 0.88,
"learning_rate": 6.206629153162468e-06,
"loss": 2.8034,
"step": 264300
},
{
"epoch": 0.88,
"learning_rate": 6.190059584170097e-06,
"loss": 2.7793,
"step": 264400
},
{
"epoch": 0.88,
"learning_rate": 6.1734900151777255e-06,
"loss": 2.796,
"step": 264500
},
{
"epoch": 0.88,
"learning_rate": 6.156920446185354e-06,
"loss": 2.8012,
"step": 264600
},
{
"epoch": 0.88,
"learning_rate": 6.140350877192982e-06,
"loss": 2.7965,
"step": 264700
},
{
"epoch": 0.88,
"learning_rate": 6.123781308200611e-06,
"loss": 2.785,
"step": 264800
},
{
"epoch": 0.88,
"learning_rate": 6.10721173920824e-06,
"loss": 2.7981,
"step": 264900
},
{
"epoch": 0.88,
"learning_rate": 6.090642170215868e-06,
"loss": 2.8093,
"step": 265000
},
{
"epoch": 0.88,
"learning_rate": 6.074072601223497e-06,
"loss": 2.7965,
"step": 265100
},
{
"epoch": 0.88,
"learning_rate": 6.057503032231126e-06,
"loss": 2.8013,
"step": 265200
},
{
"epoch": 0.88,
"learning_rate": 6.040933463238754e-06,
"loss": 2.8035,
"step": 265300
},
{
"epoch": 0.88,
"learning_rate": 6.024363894246383e-06,
"loss": 2.79,
"step": 265400
},
{
"epoch": 0.88,
"learning_rate": 6.007794325254012e-06,
"loss": 2.7817,
"step": 265500
},
{
"epoch": 0.88,
"learning_rate": 5.99122475626164e-06,
"loss": 2.7995,
"step": 265600
},
{
"epoch": 0.88,
"learning_rate": 5.974655187269269e-06,
"loss": 2.7914,
"step": 265700
},
{
"epoch": 0.88,
"learning_rate": 5.9580856182768975e-06,
"loss": 2.7941,
"step": 265800
},
{
"epoch": 0.88,
"learning_rate": 5.941516049284526e-06,
"loss": 2.7869,
"step": 265900
},
{
"epoch": 0.88,
"learning_rate": 5.924946480292154e-06,
"loss": 2.7886,
"step": 266000
},
{
"epoch": 0.88,
"learning_rate": 5.9083769112997834e-06,
"loss": 2.7841,
"step": 266100
},
{
"epoch": 0.88,
"learning_rate": 5.891807342307412e-06,
"loss": 2.7969,
"step": 266200
},
{
"epoch": 0.88,
"learning_rate": 5.875237773315041e-06,
"loss": 2.7855,
"step": 266300
},
{
"epoch": 0.88,
"learning_rate": 5.858668204322669e-06,
"loss": 2.7846,
"step": 266400
},
{
"epoch": 0.88,
"learning_rate": 5.842098635330298e-06,
"loss": 2.7826,
"step": 266500
},
{
"epoch": 0.88,
"learning_rate": 5.825529066337927e-06,
"loss": 2.7806,
"step": 266600
},
{
"epoch": 0.88,
"learning_rate": 5.808959497345555e-06,
"loss": 2.8034,
"step": 266700
},
{
"epoch": 0.88,
"learning_rate": 5.7923899283531845e-06,
"loss": 2.7939,
"step": 266800
},
{
"epoch": 0.88,
"learning_rate": 5.775820359360813e-06,
"loss": 2.7987,
"step": 266900
},
{
"epoch": 0.88,
"learning_rate": 5.759250790368441e-06,
"loss": 2.79,
"step": 267000
},
{
"epoch": 0.89,
"learning_rate": 5.74268122137607e-06,
"loss": 2.8049,
"step": 267100
},
{
"epoch": 0.89,
"learning_rate": 5.726111652383699e-06,
"loss": 2.7746,
"step": 267200
},
{
"epoch": 0.89,
"learning_rate": 5.709542083391327e-06,
"loss": 2.7901,
"step": 267300
},
{
"epoch": 0.89,
"learning_rate": 5.692972514398956e-06,
"loss": 2.7982,
"step": 267400
},
{
"epoch": 0.89,
"learning_rate": 5.676402945406585e-06,
"loss": 2.7899,
"step": 267500
},
{
"epoch": 0.89,
"learning_rate": 5.659833376414213e-06,
"loss": 2.792,
"step": 267600
},
{
"epoch": 0.89,
"learning_rate": 5.643263807421842e-06,
"loss": 2.7937,
"step": 267700
},
{
"epoch": 0.89,
"learning_rate": 5.626694238429471e-06,
"loss": 2.8037,
"step": 267800
},
{
"epoch": 0.89,
"learning_rate": 5.610124669437099e-06,
"loss": 2.7833,
"step": 267900
},
{
"epoch": 0.89,
"learning_rate": 5.593555100444727e-06,
"loss": 2.7985,
"step": 268000
},
{
"epoch": 0.89,
"learning_rate": 5.5769855314523565e-06,
"loss": 2.7915,
"step": 268100
},
{
"epoch": 0.89,
"learning_rate": 5.560415962459985e-06,
"loss": 2.7841,
"step": 268200
},
{
"epoch": 0.89,
"learning_rate": 5.543846393467613e-06,
"loss": 2.7811,
"step": 268300
},
{
"epoch": 0.89,
"learning_rate": 5.5272768244752424e-06,
"loss": 2.7818,
"step": 268400
},
{
"epoch": 0.89,
"learning_rate": 5.510707255482871e-06,
"loss": 2.7841,
"step": 268500
},
{
"epoch": 0.89,
"learning_rate": 5.494137686490499e-06,
"loss": 2.7984,
"step": 268600
},
{
"epoch": 0.89,
"learning_rate": 5.477568117498128e-06,
"loss": 2.7801,
"step": 268700
},
{
"epoch": 0.89,
"learning_rate": 5.460998548505757e-06,
"loss": 2.7964,
"step": 268800
},
{
"epoch": 0.89,
"learning_rate": 5.444428979513385e-06,
"loss": 2.7936,
"step": 268900
},
{
"epoch": 0.89,
"learning_rate": 5.427859410521014e-06,
"loss": 2.7942,
"step": 269000
},
{
"epoch": 0.89,
"learning_rate": 5.411289841528643e-06,
"loss": 2.7861,
"step": 269100
},
{
"epoch": 0.89,
"learning_rate": 5.394720272536271e-06,
"loss": 2.8002,
"step": 269200
},
{
"epoch": 0.89,
"learning_rate": 5.378150703543899e-06,
"loss": 2.7862,
"step": 269300
},
{
"epoch": 0.89,
"learning_rate": 5.3615811345515286e-06,
"loss": 2.7836,
"step": 269400
},
{
"epoch": 0.89,
"learning_rate": 5.345011565559157e-06,
"loss": 2.7821,
"step": 269500
},
{
"epoch": 0.89,
"learning_rate": 5.328441996566785e-06,
"loss": 2.7942,
"step": 269600
},
{
"epoch": 0.89,
"learning_rate": 5.3118724275744145e-06,
"loss": 2.7968,
"step": 269700
},
{
"epoch": 0.89,
"learning_rate": 5.295302858582043e-06,
"loss": 2.7795,
"step": 269800
},
{
"epoch": 0.89,
"learning_rate": 5.278733289589671e-06,
"loss": 2.7833,
"step": 269900
},
{
"epoch": 0.89,
"learning_rate": 5.2621637205973e-06,
"loss": 2.7735,
"step": 270000
},
{
"epoch": 0.9,
"learning_rate": 5.245594151604929e-06,
"loss": 2.7802,
"step": 270100
},
{
"epoch": 0.9,
"learning_rate": 5.229024582612557e-06,
"loss": 2.8002,
"step": 270200
},
{
"epoch": 0.9,
"learning_rate": 5.2124550136201855e-06,
"loss": 2.7805,
"step": 270300
},
{
"epoch": 0.9,
"learning_rate": 5.195885444627815e-06,
"loss": 2.7832,
"step": 270400
},
{
"epoch": 0.9,
"learning_rate": 5.179315875635443e-06,
"loss": 2.7818,
"step": 270500
},
{
"epoch": 0.9,
"learning_rate": 5.162746306643071e-06,
"loss": 2.7999,
"step": 270600
},
{
"epoch": 0.9,
"learning_rate": 5.146176737650701e-06,
"loss": 2.7897,
"step": 270700
},
{
"epoch": 0.9,
"learning_rate": 5.129607168658329e-06,
"loss": 2.8014,
"step": 270800
},
{
"epoch": 0.9,
"learning_rate": 5.113037599665957e-06,
"loss": 2.7753,
"step": 270900
},
{
"epoch": 0.9,
"learning_rate": 5.0964680306735865e-06,
"loss": 2.7898,
"step": 271000
},
{
"epoch": 0.9,
"learning_rate": 5.079898461681215e-06,
"loss": 2.7935,
"step": 271100
},
{
"epoch": 0.9,
"learning_rate": 5.063328892688843e-06,
"loss": 2.7942,
"step": 271200
},
{
"epoch": 0.9,
"learning_rate": 5.046759323696472e-06,
"loss": 2.7875,
"step": 271300
},
{
"epoch": 0.9,
"learning_rate": 5.030189754704101e-06,
"loss": 2.7916,
"step": 271400
},
{
"epoch": 0.9,
"learning_rate": 5.013620185711729e-06,
"loss": 2.7919,
"step": 271500
},
{
"epoch": 0.9,
"learning_rate": 4.9970506167193575e-06,
"loss": 2.7832,
"step": 271600
},
{
"epoch": 0.9,
"learning_rate": 4.980481047726987e-06,
"loss": 2.8091,
"step": 271700
},
{
"epoch": 0.9,
"learning_rate": 4.963911478734615e-06,
"loss": 2.7857,
"step": 271800
},
{
"epoch": 0.9,
"learning_rate": 4.9473419097422435e-06,
"loss": 2.7856,
"step": 271900
},
{
"epoch": 0.9,
"learning_rate": 4.930772340749873e-06,
"loss": 2.7802,
"step": 272000
},
{
"epoch": 0.9,
"learning_rate": 4.914202771757501e-06,
"loss": 2.7859,
"step": 272100
},
{
"epoch": 0.9,
"learning_rate": 4.897633202765129e-06,
"loss": 2.7872,
"step": 272200
},
{
"epoch": 0.9,
"learning_rate": 4.881063633772759e-06,
"loss": 2.8011,
"step": 272300
},
{
"epoch": 0.9,
"learning_rate": 4.864494064780387e-06,
"loss": 2.7879,
"step": 272400
},
{
"epoch": 0.9,
"learning_rate": 4.847924495788015e-06,
"loss": 2.7948,
"step": 272500
},
{
"epoch": 0.9,
"learning_rate": 4.8313549267956445e-06,
"loss": 2.7887,
"step": 272600
},
{
"epoch": 0.9,
"learning_rate": 4.814785357803273e-06,
"loss": 2.7793,
"step": 272700
},
{
"epoch": 0.9,
"learning_rate": 4.798215788810902e-06,
"loss": 2.7902,
"step": 272800
},
{
"epoch": 0.9,
"learning_rate": 4.7816462198185304e-06,
"loss": 2.7958,
"step": 272900
},
{
"epoch": 0.9,
"learning_rate": 4.765076650826159e-06,
"loss": 2.8016,
"step": 273000
},
{
"epoch": 0.91,
"learning_rate": 4.748507081833788e-06,
"loss": 2.8061,
"step": 273100
},
{
"epoch": 0.91,
"learning_rate": 4.731937512841416e-06,
"loss": 2.7983,
"step": 273200
},
{
"epoch": 0.91,
"learning_rate": 4.7153679438490456e-06,
"loss": 2.7893,
"step": 273300
},
{
"epoch": 0.91,
"learning_rate": 4.698798374856674e-06,
"loss": 2.8066,
"step": 273400
},
{
"epoch": 0.91,
"learning_rate": 4.682228805864302e-06,
"loss": 2.7843,
"step": 273500
},
{
"epoch": 0.91,
"learning_rate": 4.665659236871931e-06,
"loss": 2.7889,
"step": 273600
},
{
"epoch": 0.91,
"learning_rate": 4.64908966787956e-06,
"loss": 2.7809,
"step": 273700
},
{
"epoch": 0.91,
"learning_rate": 4.632520098887188e-06,
"loss": 2.7972,
"step": 273800
},
{
"epoch": 0.91,
"learning_rate": 4.6159505298948166e-06,
"loss": 2.7973,
"step": 273900
},
{
"epoch": 0.91,
"learning_rate": 4.599380960902446e-06,
"loss": 2.7943,
"step": 274000
},
{
"epoch": 0.91,
"learning_rate": 4.582811391910074e-06,
"loss": 2.7929,
"step": 274100
},
{
"epoch": 0.91,
"learning_rate": 4.5662418229177025e-06,
"loss": 2.7776,
"step": 274200
},
{
"epoch": 0.91,
"learning_rate": 4.549672253925332e-06,
"loss": 2.7853,
"step": 274300
},
{
"epoch": 0.91,
"learning_rate": 4.53310268493296e-06,
"loss": 2.7942,
"step": 274400
},
{
"epoch": 0.91,
"learning_rate": 4.516533115940588e-06,
"loss": 2.7865,
"step": 274500
},
{
"epoch": 0.91,
"learning_rate": 4.499963546948217e-06,
"loss": 2.7932,
"step": 274600
},
{
"epoch": 0.91,
"learning_rate": 4.483393977955846e-06,
"loss": 2.7731,
"step": 274700
},
{
"epoch": 0.91,
"learning_rate": 4.466824408963474e-06,
"loss": 2.7749,
"step": 274800
},
{
"epoch": 0.91,
"learning_rate": 4.450254839971103e-06,
"loss": 2.7969,
"step": 274900
},
{
"epoch": 0.91,
"learning_rate": 4.433685270978732e-06,
"loss": 2.7934,
"step": 275000
},
{
"epoch": 0.91,
"learning_rate": 4.41711570198636e-06,
"loss": 2.7833,
"step": 275100
},
{
"epoch": 0.91,
"learning_rate": 4.400546132993989e-06,
"loss": 2.7862,
"step": 275200
},
{
"epoch": 0.91,
"learning_rate": 4.383976564001618e-06,
"loss": 2.787,
"step": 275300
},
{
"epoch": 0.91,
"learning_rate": 4.367406995009246e-06,
"loss": 2.7897,
"step": 275400
},
{
"epoch": 0.91,
"learning_rate": 4.3508374260168745e-06,
"loss": 2.7766,
"step": 275500
},
{
"epoch": 0.91,
"learning_rate": 4.334267857024503e-06,
"loss": 2.7836,
"step": 275600
},
{
"epoch": 0.91,
"learning_rate": 4.317698288032132e-06,
"loss": 2.8063,
"step": 275700
},
{
"epoch": 0.91,
"learning_rate": 4.3011287190397605e-06,
"loss": 2.7856,
"step": 275800
},
{
"epoch": 0.91,
"learning_rate": 4.284559150047389e-06,
"loss": 2.8011,
"step": 275900
},
{
"epoch": 0.91,
"learning_rate": 4.267989581055018e-06,
"loss": 2.7862,
"step": 276000
},
{
"epoch": 0.91,
"learning_rate": 4.251420012062646e-06,
"loss": 2.7713,
"step": 276100
},
{
"epoch": 0.92,
"learning_rate": 4.234850443070275e-06,
"loss": 2.7955,
"step": 276200
},
{
"epoch": 0.92,
"learning_rate": 4.218280874077904e-06,
"loss": 2.8026,
"step": 276300
},
{
"epoch": 0.92,
"learning_rate": 4.201711305085532e-06,
"loss": 2.779,
"step": 276400
},
{
"epoch": 0.92,
"learning_rate": 4.185141736093161e-06,
"loss": 2.7817,
"step": 276500
},
{
"epoch": 0.92,
"learning_rate": 4.16857216710079e-06,
"loss": 2.7929,
"step": 276600
},
{
"epoch": 0.92,
"learning_rate": 4.152002598108418e-06,
"loss": 2.7972,
"step": 276700
},
{
"epoch": 0.92,
"learning_rate": 4.135433029116047e-06,
"loss": 2.7697,
"step": 276800
},
{
"epoch": 0.92,
"learning_rate": 4.118863460123675e-06,
"loss": 2.7907,
"step": 276900
},
{
"epoch": 0.92,
"learning_rate": 4.102293891131304e-06,
"loss": 2.7976,
"step": 277000
},
{
"epoch": 0.92,
"learning_rate": 4.0857243221389325e-06,
"loss": 2.7739,
"step": 277100
},
{
"epoch": 0.92,
"learning_rate": 4.069154753146561e-06,
"loss": 2.7852,
"step": 277200
},
{
"epoch": 0.92,
"learning_rate": 4.05258518415419e-06,
"loss": 2.7908,
"step": 277300
},
{
"epoch": 0.92,
"learning_rate": 4.0360156151618184e-06,
"loss": 2.8085,
"step": 277400
},
{
"epoch": 0.92,
"learning_rate": 4.019446046169447e-06,
"loss": 2.781,
"step": 277500
},
{
"epoch": 0.92,
"learning_rate": 4.002876477177076e-06,
"loss": 2.7925,
"step": 277600
},
{
"epoch": 0.92,
"learning_rate": 3.986306908184704e-06,
"loss": 2.7818,
"step": 277700
},
{
"epoch": 0.92,
"learning_rate": 3.969737339192333e-06,
"loss": 2.7804,
"step": 277800
},
{
"epoch": 0.92,
"learning_rate": 3.953167770199961e-06,
"loss": 2.7905,
"step": 277900
},
{
"epoch": 0.92,
"learning_rate": 3.93659820120759e-06,
"loss": 2.8011,
"step": 278000
},
{
"epoch": 0.92,
"learning_rate": 3.920028632215219e-06,
"loss": 2.7848,
"step": 278100
},
{
"epoch": 0.92,
"learning_rate": 3.903459063222847e-06,
"loss": 2.7785,
"step": 278200
},
{
"epoch": 0.92,
"learning_rate": 3.886889494230476e-06,
"loss": 2.7789,
"step": 278300
},
{
"epoch": 0.92,
"learning_rate": 3.8703199252381046e-06,
"loss": 2.8022,
"step": 278400
},
{
"epoch": 0.92,
"learning_rate": 3.853750356245733e-06,
"loss": 2.7924,
"step": 278500
},
{
"epoch": 0.92,
"learning_rate": 3.837180787253362e-06,
"loss": 2.7993,
"step": 278600
},
{
"epoch": 0.92,
"learning_rate": 3.8206112182609905e-06,
"loss": 2.7986,
"step": 278700
},
{
"epoch": 0.92,
"learning_rate": 3.8040416492686193e-06,
"loss": 2.7771,
"step": 278800
},
{
"epoch": 0.92,
"learning_rate": 3.7874720802762476e-06,
"loss": 2.7933,
"step": 278900
},
{
"epoch": 0.92,
"learning_rate": 3.770902511283877e-06,
"loss": 2.7826,
"step": 279000
},
{
"epoch": 0.92,
"learning_rate": 3.754332942291505e-06,
"loss": 2.7813,
"step": 279100
},
{
"epoch": 0.93,
"learning_rate": 3.7377633732991335e-06,
"loss": 2.7829,
"step": 279200
},
{
"epoch": 0.93,
"learning_rate": 3.7211938043067627e-06,
"loss": 2.7841,
"step": 279300
},
{
"epoch": 0.93,
"learning_rate": 3.704624235314391e-06,
"loss": 2.7766,
"step": 279400
},
{
"epoch": 0.93,
"learning_rate": 3.6880546663220195e-06,
"loss": 2.7846,
"step": 279500
},
{
"epoch": 0.93,
"learning_rate": 3.6714850973296487e-06,
"loss": 2.7978,
"step": 279600
},
{
"epoch": 0.93,
"learning_rate": 3.654915528337277e-06,
"loss": 2.7829,
"step": 279700
},
{
"epoch": 0.93,
"learning_rate": 3.6383459593449054e-06,
"loss": 2.7869,
"step": 279800
},
{
"epoch": 0.93,
"learning_rate": 3.6217763903525346e-06,
"loss": 2.7918,
"step": 279900
},
{
"epoch": 0.93,
"learning_rate": 3.605206821360163e-06,
"loss": 2.7964,
"step": 280000
},
{
"epoch": 0.93,
"learning_rate": 3.5886372523677913e-06,
"loss": 2.7785,
"step": 280100
},
{
"epoch": 0.93,
"learning_rate": 3.57206768337542e-06,
"loss": 2.7946,
"step": 280200
},
{
"epoch": 0.93,
"learning_rate": 3.555498114383049e-06,
"loss": 2.7775,
"step": 280300
},
{
"epoch": 0.93,
"learning_rate": 3.5389285453906776e-06,
"loss": 2.7756,
"step": 280400
},
{
"epoch": 0.93,
"learning_rate": 3.522358976398306e-06,
"loss": 2.7914,
"step": 280500
},
{
"epoch": 0.93,
"learning_rate": 3.505789407405935e-06,
"loss": 2.7921,
"step": 280600
},
{
"epoch": 0.93,
"learning_rate": 3.4892198384135636e-06,
"loss": 2.7899,
"step": 280700
},
{
"epoch": 0.93,
"learning_rate": 3.472650269421192e-06,
"loss": 2.7997,
"step": 280800
},
{
"epoch": 0.93,
"learning_rate": 3.456080700428821e-06,
"loss": 2.7789,
"step": 280900
},
{
"epoch": 0.93,
"learning_rate": 3.4395111314364495e-06,
"loss": 2.7972,
"step": 281000
},
{
"epoch": 0.93,
"learning_rate": 3.422941562444078e-06,
"loss": 2.7916,
"step": 281100
},
{
"epoch": 0.93,
"learning_rate": 3.406371993451706e-06,
"loss": 2.8016,
"step": 281200
},
{
"epoch": 0.93,
"learning_rate": 3.3898024244593354e-06,
"loss": 2.7794,
"step": 281300
},
{
"epoch": 0.93,
"learning_rate": 3.3732328554669638e-06,
"loss": 2.787,
"step": 281400
},
{
"epoch": 0.93,
"learning_rate": 3.356663286474592e-06,
"loss": 2.7995,
"step": 281500
},
{
"epoch": 0.93,
"learning_rate": 3.3400937174822213e-06,
"loss": 2.7894,
"step": 281600
},
{
"epoch": 0.93,
"learning_rate": 3.3235241484898497e-06,
"loss": 2.8005,
"step": 281700
},
{
"epoch": 0.93,
"learning_rate": 3.306954579497478e-06,
"loss": 2.7908,
"step": 281800
},
{
"epoch": 0.93,
"learning_rate": 3.2903850105051073e-06,
"loss": 2.7834,
"step": 281900
},
{
"epoch": 0.93,
"learning_rate": 3.2738154415127356e-06,
"loss": 2.7855,
"step": 282000
},
{
"epoch": 0.93,
"learning_rate": 3.257245872520364e-06,
"loss": 2.7727,
"step": 282100
},
{
"epoch": 0.94,
"learning_rate": 3.2406763035279923e-06,
"loss": 2.7922,
"step": 282200
},
{
"epoch": 0.94,
"learning_rate": 3.2241067345356215e-06,
"loss": 2.7854,
"step": 282300
},
{
"epoch": 0.94,
"learning_rate": 3.20753716554325e-06,
"loss": 2.7843,
"step": 282400
},
{
"epoch": 0.94,
"learning_rate": 3.1909675965508783e-06,
"loss": 2.7854,
"step": 282500
},
{
"epoch": 0.94,
"learning_rate": 3.1743980275585075e-06,
"loss": 2.7951,
"step": 282600
},
{
"epoch": 0.94,
"learning_rate": 3.157828458566136e-06,
"loss": 2.7877,
"step": 282700
},
{
"epoch": 0.94,
"learning_rate": 3.141258889573764e-06,
"loss": 2.7832,
"step": 282800
},
{
"epoch": 0.94,
"learning_rate": 3.124689320581393e-06,
"loss": 2.7908,
"step": 282900
},
{
"epoch": 0.94,
"learning_rate": 3.1081197515890217e-06,
"loss": 2.792,
"step": 283000
},
{
"epoch": 0.94,
"learning_rate": 3.09155018259665e-06,
"loss": 2.7853,
"step": 283100
},
{
"epoch": 0.94,
"learning_rate": 3.074980613604279e-06,
"loss": 2.7924,
"step": 283200
},
{
"epoch": 0.94,
"learning_rate": 3.0584110446119077e-06,
"loss": 2.7675,
"step": 283300
},
{
"epoch": 0.94,
"learning_rate": 3.0418414756195365e-06,
"loss": 2.7906,
"step": 283400
},
{
"epoch": 0.94,
"learning_rate": 3.025271906627165e-06,
"loss": 2.7653,
"step": 283500
},
{
"epoch": 0.94,
"learning_rate": 3.0087023376347936e-06,
"loss": 2.7686,
"step": 283600
},
{
"epoch": 0.94,
"learning_rate": 2.9921327686424224e-06,
"loss": 2.7918,
"step": 283700
},
{
"epoch": 0.94,
"learning_rate": 2.975563199650051e-06,
"loss": 2.7809,
"step": 283800
},
{
"epoch": 0.94,
"learning_rate": 2.9589936306576795e-06,
"loss": 2.7919,
"step": 283900
},
{
"epoch": 0.94,
"learning_rate": 2.9424240616653083e-06,
"loss": 2.7787,
"step": 284000
},
{
"epoch": 0.94,
"learning_rate": 2.9258544926729367e-06,
"loss": 2.7883,
"step": 284100
},
{
"epoch": 0.94,
"learning_rate": 2.9092849236805654e-06,
"loss": 2.7915,
"step": 284200
},
{
"epoch": 0.94,
"learning_rate": 2.8927153546881942e-06,
"loss": 2.7923,
"step": 284300
},
{
"epoch": 0.94,
"learning_rate": 2.8761457856958226e-06,
"loss": 2.7934,
"step": 284400
},
{
"epoch": 0.94,
"learning_rate": 2.8595762167034514e-06,
"loss": 2.7817,
"step": 284500
},
{
"epoch": 0.94,
"learning_rate": 2.8430066477110797e-06,
"loss": 2.782,
"step": 284600
},
{
"epoch": 0.94,
"learning_rate": 2.8264370787187085e-06,
"loss": 2.7875,
"step": 284700
},
{
"epoch": 0.94,
"learning_rate": 2.8098675097263373e-06,
"loss": 2.7891,
"step": 284800
},
{
"epoch": 0.94,
"learning_rate": 2.7932979407339656e-06,
"loss": 2.7918,
"step": 284900
},
{
"epoch": 0.94,
"learning_rate": 2.7767283717415944e-06,
"loss": 2.7869,
"step": 285000
},
{
"epoch": 0.94,
"learning_rate": 2.760158802749223e-06,
"loss": 2.7952,
"step": 285100
},
{
"epoch": 0.95,
"learning_rate": 2.7435892337568516e-06,
"loss": 2.7806,
"step": 285200
},
{
"epoch": 0.95,
"learning_rate": 2.7270196647644803e-06,
"loss": 2.7905,
"step": 285300
},
{
"epoch": 0.95,
"learning_rate": 2.7104500957721087e-06,
"loss": 2.7874,
"step": 285400
},
{
"epoch": 0.95,
"learning_rate": 2.6938805267797375e-06,
"loss": 2.7778,
"step": 285500
},
{
"epoch": 0.95,
"learning_rate": 2.6773109577873663e-06,
"loss": 2.7829,
"step": 285600
},
{
"epoch": 0.95,
"learning_rate": 2.6607413887949946e-06,
"loss": 2.8044,
"step": 285700
},
{
"epoch": 0.95,
"learning_rate": 2.6441718198026234e-06,
"loss": 2.8038,
"step": 285800
},
{
"epoch": 0.95,
"learning_rate": 2.6276022508102518e-06,
"loss": 2.7961,
"step": 285900
},
{
"epoch": 0.95,
"learning_rate": 2.6110326818178805e-06,
"loss": 2.7926,
"step": 286000
},
{
"epoch": 0.95,
"learning_rate": 2.5944631128255093e-06,
"loss": 2.7869,
"step": 286100
},
{
"epoch": 0.95,
"learning_rate": 2.5778935438331377e-06,
"loss": 2.7997,
"step": 286200
},
{
"epoch": 0.95,
"learning_rate": 2.5613239748407665e-06,
"loss": 2.8022,
"step": 286300
},
{
"epoch": 0.95,
"learning_rate": 2.5447544058483953e-06,
"loss": 2.779,
"step": 286400
},
{
"epoch": 0.95,
"learning_rate": 2.528184836856024e-06,
"loss": 2.7873,
"step": 286500
},
{
"epoch": 0.95,
"learning_rate": 2.5116152678636524e-06,
"loss": 2.7968,
"step": 286600
},
{
"epoch": 0.95,
"learning_rate": 2.495045698871281e-06,
"loss": 2.7864,
"step": 286700
},
{
"epoch": 0.95,
"learning_rate": 2.47847612987891e-06,
"loss": 2.7956,
"step": 286800
},
{
"epoch": 0.95,
"learning_rate": 2.4619065608865383e-06,
"loss": 2.79,
"step": 286900
},
{
"epoch": 0.95,
"learning_rate": 2.445336991894167e-06,
"loss": 2.7891,
"step": 287000
},
{
"epoch": 0.95,
"learning_rate": 2.428767422901796e-06,
"loss": 2.7814,
"step": 287100
},
{
"epoch": 0.95,
"learning_rate": 2.4121978539094242e-06,
"loss": 2.7782,
"step": 287200
},
{
"epoch": 0.95,
"learning_rate": 2.395628284917053e-06,
"loss": 2.7831,
"step": 287300
},
{
"epoch": 0.95,
"learning_rate": 2.3790587159246814e-06,
"loss": 2.8011,
"step": 287400
},
{
"epoch": 0.95,
"learning_rate": 2.36248914693231e-06,
"loss": 2.7916,
"step": 287500
},
{
"epoch": 0.95,
"learning_rate": 2.345919577939939e-06,
"loss": 2.799,
"step": 287600
},
{
"epoch": 0.95,
"learning_rate": 2.3293500089475673e-06,
"loss": 2.7857,
"step": 287700
},
{
"epoch": 0.95,
"learning_rate": 2.312780439955196e-06,
"loss": 2.785,
"step": 287800
},
{
"epoch": 0.95,
"learning_rate": 2.2962108709628244e-06,
"loss": 2.7769,
"step": 287900
},
{
"epoch": 0.95,
"learning_rate": 2.2796413019704532e-06,
"loss": 2.7927,
"step": 288000
},
{
"epoch": 0.95,
"learning_rate": 2.263071732978082e-06,
"loss": 2.7762,
"step": 288100
},
{
"epoch": 0.96,
"learning_rate": 2.2465021639857104e-06,
"loss": 2.788,
"step": 288200
},
{
"epoch": 0.96,
"learning_rate": 2.229932594993339e-06,
"loss": 2.7805,
"step": 288300
},
{
"epoch": 0.96,
"learning_rate": 2.213363026000968e-06,
"loss": 2.7904,
"step": 288400
},
{
"epoch": 0.96,
"learning_rate": 2.1967934570085963e-06,
"loss": 2.7615,
"step": 288500
},
{
"epoch": 0.96,
"learning_rate": 2.180223888016225e-06,
"loss": 2.7873,
"step": 288600
},
{
"epoch": 0.96,
"learning_rate": 2.1636543190238534e-06,
"loss": 2.7805,
"step": 288700
},
{
"epoch": 0.96,
"learning_rate": 2.147084750031482e-06,
"loss": 2.7967,
"step": 288800
},
{
"epoch": 0.96,
"learning_rate": 2.130515181039111e-06,
"loss": 2.7888,
"step": 288900
},
{
"epoch": 0.96,
"learning_rate": 2.1139456120467393e-06,
"loss": 2.7842,
"step": 289000
},
{
"epoch": 0.96,
"learning_rate": 2.097376043054368e-06,
"loss": 2.7936,
"step": 289100
},
{
"epoch": 0.96,
"learning_rate": 2.0808064740619965e-06,
"loss": 2.8028,
"step": 289200
},
{
"epoch": 0.96,
"learning_rate": 2.0642369050696253e-06,
"loss": 2.7782,
"step": 289300
},
{
"epoch": 0.96,
"learning_rate": 2.047667336077254e-06,
"loss": 2.7837,
"step": 289400
},
{
"epoch": 0.96,
"learning_rate": 2.031097767084883e-06,
"loss": 2.7825,
"step": 289500
},
{
"epoch": 0.96,
"learning_rate": 2.014528198092511e-06,
"loss": 2.7791,
"step": 289600
},
{
"epoch": 0.96,
"learning_rate": 1.99795862910014e-06,
"loss": 2.7804,
"step": 289700
},
{
"epoch": 0.96,
"learning_rate": 1.9813890601077688e-06,
"loss": 2.7957,
"step": 289800
},
{
"epoch": 0.96,
"learning_rate": 1.9648194911153975e-06,
"loss": 2.7987,
"step": 289900
},
{
"epoch": 0.96,
"learning_rate": 1.948249922123026e-06,
"loss": 2.7786,
"step": 290000
},
{
"epoch": 0.96,
"learning_rate": 1.9316803531306547e-06,
"loss": 2.7825,
"step": 290100
},
{
"epoch": 0.96,
"learning_rate": 1.915110784138283e-06,
"loss": 2.7735,
"step": 290200
},
{
"epoch": 0.96,
"learning_rate": 1.8985412151459116e-06,
"loss": 2.7714,
"step": 290300
},
{
"epoch": 0.96,
"learning_rate": 1.8819716461535404e-06,
"loss": 2.7839,
"step": 290400
},
{
"epoch": 0.96,
"learning_rate": 1.865402077161169e-06,
"loss": 2.7715,
"step": 290500
},
{
"epoch": 0.96,
"learning_rate": 1.8488325081687977e-06,
"loss": 2.7979,
"step": 290600
},
{
"epoch": 0.96,
"learning_rate": 1.832262939176426e-06,
"loss": 2.7835,
"step": 290700
},
{
"epoch": 0.96,
"learning_rate": 1.8156933701840549e-06,
"loss": 2.787,
"step": 290800
},
{
"epoch": 0.96,
"learning_rate": 1.7991238011916837e-06,
"loss": 2.7905,
"step": 290900
},
{
"epoch": 0.96,
"learning_rate": 1.782554232199312e-06,
"loss": 2.7955,
"step": 291000
},
{
"epoch": 0.96,
"learning_rate": 1.7659846632069408e-06,
"loss": 2.7798,
"step": 291100
},
{
"epoch": 0.97,
"learning_rate": 1.7494150942145692e-06,
"loss": 2.7979,
"step": 291200
},
{
"epoch": 0.97,
"learning_rate": 1.732845525222198e-06,
"loss": 2.7982,
"step": 291300
},
{
"epoch": 0.97,
"learning_rate": 1.7162759562298267e-06,
"loss": 2.7895,
"step": 291400
},
{
"epoch": 0.97,
"learning_rate": 1.699706387237455e-06,
"loss": 2.7896,
"step": 291500
},
{
"epoch": 0.97,
"learning_rate": 1.6831368182450839e-06,
"loss": 2.7818,
"step": 291600
},
{
"epoch": 0.97,
"learning_rate": 1.6665672492527124e-06,
"loss": 2.7871,
"step": 291700
},
{
"epoch": 0.97,
"learning_rate": 1.649997680260341e-06,
"loss": 2.7832,
"step": 291800
},
{
"epoch": 0.97,
"learning_rate": 1.6334281112679698e-06,
"loss": 2.7864,
"step": 291900
},
{
"epoch": 0.97,
"learning_rate": 1.6168585422755984e-06,
"loss": 2.7946,
"step": 292000
},
{
"epoch": 0.97,
"learning_rate": 1.6002889732832271e-06,
"loss": 2.7879,
"step": 292100
},
{
"epoch": 0.97,
"learning_rate": 1.583719404290856e-06,
"loss": 2.7825,
"step": 292200
},
{
"epoch": 0.97,
"learning_rate": 1.5671498352984843e-06,
"loss": 2.802,
"step": 292300
},
{
"epoch": 0.97,
"learning_rate": 1.5505802663061129e-06,
"loss": 2.7654,
"step": 292400
},
{
"epoch": 0.97,
"learning_rate": 1.5340106973137416e-06,
"loss": 2.7872,
"step": 292500
},
{
"epoch": 0.97,
"learning_rate": 1.5174411283213702e-06,
"loss": 2.7761,
"step": 292600
},
{
"epoch": 0.97,
"learning_rate": 1.5008715593289988e-06,
"loss": 2.7917,
"step": 292700
},
{
"epoch": 0.97,
"learning_rate": 1.4843019903366273e-06,
"loss": 2.7812,
"step": 292800
},
{
"epoch": 0.97,
"learning_rate": 1.4677324213442561e-06,
"loss": 2.7945,
"step": 292900
},
{
"epoch": 0.97,
"learning_rate": 1.4511628523518847e-06,
"loss": 2.7916,
"step": 293000
},
{
"epoch": 0.97,
"learning_rate": 1.4345932833595133e-06,
"loss": 2.7952,
"step": 293100
},
{
"epoch": 0.97,
"learning_rate": 1.4180237143671418e-06,
"loss": 2.7775,
"step": 293200
},
{
"epoch": 0.97,
"learning_rate": 1.4014541453747706e-06,
"loss": 2.7883,
"step": 293300
},
{
"epoch": 0.97,
"learning_rate": 1.3848845763823992e-06,
"loss": 2.7875,
"step": 293400
},
{
"epoch": 0.97,
"learning_rate": 1.368315007390028e-06,
"loss": 2.7815,
"step": 293500
},
{
"epoch": 0.97,
"learning_rate": 1.3517454383976565e-06,
"loss": 2.7692,
"step": 293600
},
{
"epoch": 0.97,
"learning_rate": 1.3351758694052851e-06,
"loss": 2.7841,
"step": 293700
},
{
"epoch": 0.97,
"learning_rate": 1.3186063004129137e-06,
"loss": 2.7686,
"step": 293800
},
{
"epoch": 0.97,
"learning_rate": 1.3020367314205425e-06,
"loss": 2.7932,
"step": 293900
},
{
"epoch": 0.97,
"learning_rate": 1.285467162428171e-06,
"loss": 2.7812,
"step": 294000
},
{
"epoch": 0.97,
"learning_rate": 1.2688975934357996e-06,
"loss": 2.7835,
"step": 294100
},
{
"epoch": 0.97,
"learning_rate": 1.2523280244434282e-06,
"loss": 2.7939,
"step": 294200
},
{
"epoch": 0.98,
"learning_rate": 1.2357584554510567e-06,
"loss": 2.7881,
"step": 294300
},
{
"epoch": 0.98,
"learning_rate": 1.2191888864586855e-06,
"loss": 2.7918,
"step": 294400
},
{
"epoch": 0.98,
"learning_rate": 1.202619317466314e-06,
"loss": 2.7844,
"step": 294500
},
{
"epoch": 0.98,
"learning_rate": 1.1860497484739427e-06,
"loss": 2.7685,
"step": 294600
},
{
"epoch": 0.98,
"learning_rate": 1.1694801794815712e-06,
"loss": 2.7759,
"step": 294700
},
{
"epoch": 0.98,
"learning_rate": 1.1529106104892e-06,
"loss": 2.7957,
"step": 294800
},
{
"epoch": 0.98,
"learning_rate": 1.1363410414968286e-06,
"loss": 2.7924,
"step": 294900
},
{
"epoch": 0.98,
"learning_rate": 1.1197714725044574e-06,
"loss": 2.7839,
"step": 295000
},
{
"epoch": 0.98,
"learning_rate": 1.103201903512086e-06,
"loss": 2.7779,
"step": 295100
},
{
"epoch": 0.98,
"learning_rate": 1.0866323345197145e-06,
"loss": 2.7712,
"step": 295200
},
{
"epoch": 0.98,
"learning_rate": 1.0700627655273433e-06,
"loss": 2.7884,
"step": 295300
},
{
"epoch": 0.98,
"learning_rate": 1.0534931965349719e-06,
"loss": 2.8035,
"step": 295400
},
{
"epoch": 0.98,
"learning_rate": 1.0369236275426004e-06,
"loss": 2.7855,
"step": 295500
},
{
"epoch": 0.98,
"learning_rate": 1.020354058550229e-06,
"loss": 2.7933,
"step": 295600
},
{
"epoch": 0.98,
"learning_rate": 1.0037844895578576e-06,
"loss": 2.7761,
"step": 295700
},
{
"epoch": 0.98,
"learning_rate": 9.872149205654864e-07,
"loss": 2.7905,
"step": 295800
},
{
"epoch": 0.98,
"learning_rate": 9.70645351573115e-07,
"loss": 2.7887,
"step": 295900
},
{
"epoch": 0.98,
"learning_rate": 9.540757825807435e-07,
"loss": 2.7872,
"step": 296000
},
{
"epoch": 0.98,
"learning_rate": 9.375062135883722e-07,
"loss": 2.807,
"step": 296100
},
{
"epoch": 0.98,
"learning_rate": 9.209366445960007e-07,
"loss": 2.7965,
"step": 296200
},
{
"epoch": 0.98,
"learning_rate": 9.043670756036295e-07,
"loss": 2.7723,
"step": 296300
},
{
"epoch": 0.98,
"learning_rate": 8.877975066112581e-07,
"loss": 2.7688,
"step": 296400
},
{
"epoch": 0.98,
"learning_rate": 8.712279376188867e-07,
"loss": 2.7897,
"step": 296500
},
{
"epoch": 0.98,
"learning_rate": 8.546583686265152e-07,
"loss": 2.7958,
"step": 296600
},
{
"epoch": 0.98,
"learning_rate": 8.38088799634144e-07,
"loss": 2.7961,
"step": 296700
},
{
"epoch": 0.98,
"learning_rate": 8.215192306417726e-07,
"loss": 2.7812,
"step": 296800
},
{
"epoch": 0.98,
"learning_rate": 8.049496616494013e-07,
"loss": 2.7825,
"step": 296900
},
{
"epoch": 0.98,
"learning_rate": 7.883800926570298e-07,
"loss": 2.7955,
"step": 297000
},
{
"epoch": 0.98,
"learning_rate": 7.718105236646585e-07,
"loss": 2.7744,
"step": 297100
},
{
"epoch": 0.98,
"learning_rate": 7.552409546722871e-07,
"loss": 2.7861,
"step": 297200
},
{
"epoch": 0.99,
"learning_rate": 7.386713856799158e-07,
"loss": 2.7807,
"step": 297300
},
{
"epoch": 0.99,
"learning_rate": 7.221018166875443e-07,
"loss": 2.8026,
"step": 297400
},
{
"epoch": 0.99,
"learning_rate": 7.055322476951729e-07,
"loss": 2.7841,
"step": 297500
},
{
"epoch": 0.99,
"learning_rate": 6.889626787028016e-07,
"loss": 2.776,
"step": 297600
},
{
"epoch": 0.99,
"learning_rate": 6.723931097104303e-07,
"loss": 2.7953,
"step": 297700
},
{
"epoch": 0.99,
"learning_rate": 6.558235407180589e-07,
"loss": 2.7979,
"step": 297800
},
{
"epoch": 0.99,
"learning_rate": 6.392539717256875e-07,
"loss": 2.7706,
"step": 297900
},
{
"epoch": 0.99,
"learning_rate": 6.226844027333162e-07,
"loss": 2.7909,
"step": 298000
},
{
"epoch": 0.99,
"learning_rate": 6.061148337409447e-07,
"loss": 2.782,
"step": 298100
},
{
"epoch": 0.99,
"learning_rate": 5.895452647485733e-07,
"loss": 2.7851,
"step": 298200
},
{
"epoch": 0.99,
"learning_rate": 5.72975695756202e-07,
"loss": 2.788,
"step": 298300
},
{
"epoch": 0.99,
"learning_rate": 5.564061267638307e-07,
"loss": 2.7713,
"step": 298400
},
{
"epoch": 0.99,
"learning_rate": 5.398365577714593e-07,
"loss": 2.7806,
"step": 298500
},
{
"epoch": 0.99,
"learning_rate": 5.232669887790879e-07,
"loss": 2.7827,
"step": 298600
},
{
"epoch": 0.99,
"learning_rate": 5.066974197867166e-07,
"loss": 2.7759,
"step": 298700
},
{
"epoch": 0.99,
"learning_rate": 4.901278507943452e-07,
"loss": 2.7812,
"step": 298800
},
{
"epoch": 0.99,
"learning_rate": 4.735582818019738e-07,
"loss": 2.7823,
"step": 298900
},
{
"epoch": 0.99,
"learning_rate": 4.5698871280960246e-07,
"loss": 2.7962,
"step": 299000
},
{
"epoch": 0.99,
"learning_rate": 4.4041914381723103e-07,
"loss": 2.7872,
"step": 299100
},
{
"epoch": 0.99,
"learning_rate": 4.238495748248597e-07,
"loss": 2.7821,
"step": 299200
},
{
"epoch": 0.99,
"learning_rate": 4.072800058324883e-07,
"loss": 2.7802,
"step": 299300
},
{
"epoch": 0.99,
"learning_rate": 3.907104368401169e-07,
"loss": 2.7848,
"step": 299400
},
{
"epoch": 0.99,
"learning_rate": 3.741408678477456e-07,
"loss": 2.7793,
"step": 299500
},
{
"epoch": 0.99,
"learning_rate": 3.575712988553742e-07,
"loss": 2.7806,
"step": 299600
},
{
"epoch": 0.99,
"learning_rate": 3.410017298630028e-07,
"loss": 2.7991,
"step": 299700
},
{
"epoch": 0.99,
"learning_rate": 3.2443216087063145e-07,
"loss": 2.7788,
"step": 299800
},
{
"epoch": 0.99,
"learning_rate": 3.0786259187826007e-07,
"loss": 2.766,
"step": 299900
},
{
"epoch": 0.99,
"learning_rate": 2.912930228858887e-07,
"loss": 2.7949,
"step": 300000
},
{
"epoch": 0.99,
"learning_rate": 2.747234538935173e-07,
"loss": 2.7723,
"step": 300100
},
{
"epoch": 0.99,
"learning_rate": 2.58153884901146e-07,
"loss": 2.7996,
"step": 300200
},
{
"epoch": 1.0,
"learning_rate": 2.415843159087746e-07,
"loss": 2.7776,
"step": 300300
},
{
"epoch": 1.0,
"learning_rate": 2.250147469164032e-07,
"loss": 2.7869,
"step": 300400
},
{
"epoch": 1.0,
"learning_rate": 2.0844517792403183e-07,
"loss": 2.7845,
"step": 300500
},
{
"epoch": 1.0,
"learning_rate": 1.9187560893166048e-07,
"loss": 2.7854,
"step": 300600
},
{
"epoch": 1.0,
"learning_rate": 1.753060399392891e-07,
"loss": 2.7895,
"step": 300700
},
{
"epoch": 1.0,
"learning_rate": 1.5873647094691776e-07,
"loss": 2.7742,
"step": 300800
},
{
"epoch": 1.0,
"learning_rate": 1.4216690195454635e-07,
"loss": 2.783,
"step": 300900
},
{
"epoch": 1.0,
"learning_rate": 1.25597332962175e-07,
"loss": 2.7889,
"step": 301000
},
{
"epoch": 1.0,
"learning_rate": 1.0902776396980363e-07,
"loss": 2.783,
"step": 301100
},
{
"epoch": 1.0,
"learning_rate": 9.245819497743225e-08,
"loss": 2.7938,
"step": 301200
},
{
"epoch": 1.0,
"learning_rate": 7.588862598506089e-08,
"loss": 2.7968,
"step": 301300
},
{
"epoch": 1.0,
"learning_rate": 5.93190569926895e-08,
"loss": 2.7839,
"step": 301400
},
{
"epoch": 1.0,
"learning_rate": 4.274948800031814e-08,
"loss": 2.7737,
"step": 301500
}
],
"max_steps": 301758,
"num_train_epochs": 1,
"total_flos": 8.170868148535296e+18,
"trial_name": null,
"trial_params": null
}