llama-160m-base / trainer_state.json
JackFram's picture
Upload folder using huggingface_hub
9e16f2c
raw
history blame
371 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9985129939161912,
"global_step": 301500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4.998344091220703e-05,
"loss": 2.749,
"step": 100
},
{
"epoch": 0.0,
"learning_rate": 4.9966881824414056e-05,
"loss": 2.7594,
"step": 200
},
{
"epoch": 0.0,
"learning_rate": 4.9950322736621087e-05,
"loss": 2.7598,
"step": 300
},
{
"epoch": 0.0,
"learning_rate": 4.993376364882812e-05,
"loss": 2.7508,
"step": 400
},
{
"epoch": 0.0,
"learning_rate": 4.991720456103514e-05,
"loss": 2.7558,
"step": 500
},
{
"epoch": 0.0,
"learning_rate": 4.990064547324218e-05,
"loss": 2.7641,
"step": 600
},
{
"epoch": 0.0,
"learning_rate": 4.98840863854492e-05,
"loss": 2.7775,
"step": 700
},
{
"epoch": 0.0,
"learning_rate": 4.986752729765623e-05,
"loss": 2.7908,
"step": 800
},
{
"epoch": 0.0,
"learning_rate": 4.9850968209863255e-05,
"loss": 2.767,
"step": 900
},
{
"epoch": 0.0,
"learning_rate": 4.9834409122070285e-05,
"loss": 2.7804,
"step": 1000
},
{
"epoch": 0.0,
"learning_rate": 4.9817850034277315e-05,
"loss": 2.7667,
"step": 1100
},
{
"epoch": 0.0,
"learning_rate": 4.980129094648434e-05,
"loss": 2.7679,
"step": 1200
},
{
"epoch": 0.0,
"learning_rate": 4.978473185869137e-05,
"loss": 2.792,
"step": 1300
},
{
"epoch": 0.0,
"learning_rate": 4.97681727708984e-05,
"loss": 2.7904,
"step": 1400
},
{
"epoch": 0.0,
"learning_rate": 4.975161368310542e-05,
"loss": 2.7738,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 4.973505459531246e-05,
"loss": 2.7821,
"step": 1600
},
{
"epoch": 0.01,
"learning_rate": 4.9718495507519484e-05,
"loss": 2.7821,
"step": 1700
},
{
"epoch": 0.01,
"learning_rate": 4.9701936419726514e-05,
"loss": 2.7774,
"step": 1800
},
{
"epoch": 0.01,
"learning_rate": 4.9685377331933544e-05,
"loss": 2.7651,
"step": 1900
},
{
"epoch": 0.01,
"learning_rate": 4.966881824414057e-05,
"loss": 2.7752,
"step": 2000
},
{
"epoch": 0.01,
"learning_rate": 4.96522591563476e-05,
"loss": 2.7897,
"step": 2100
},
{
"epoch": 0.01,
"learning_rate": 4.963570006855462e-05,
"loss": 2.7725,
"step": 2200
},
{
"epoch": 0.01,
"learning_rate": 4.961914098076165e-05,
"loss": 2.7753,
"step": 2300
},
{
"epoch": 0.01,
"learning_rate": 4.960258189296868e-05,
"loss": 2.7824,
"step": 2400
},
{
"epoch": 0.01,
"learning_rate": 4.9586022805175706e-05,
"loss": 2.7757,
"step": 2500
},
{
"epoch": 0.01,
"learning_rate": 4.956946371738274e-05,
"loss": 2.7658,
"step": 2600
},
{
"epoch": 0.01,
"learning_rate": 4.9552904629589766e-05,
"loss": 2.7781,
"step": 2700
},
{
"epoch": 0.01,
"learning_rate": 4.9536345541796797e-05,
"loss": 2.7819,
"step": 2800
},
{
"epoch": 0.01,
"learning_rate": 4.951978645400383e-05,
"loss": 2.7963,
"step": 2900
},
{
"epoch": 0.01,
"learning_rate": 4.950322736621085e-05,
"loss": 2.7773,
"step": 3000
},
{
"epoch": 0.01,
"learning_rate": 4.948666827841788e-05,
"loss": 2.7823,
"step": 3100
},
{
"epoch": 0.01,
"learning_rate": 4.947010919062491e-05,
"loss": 2.7753,
"step": 3200
},
{
"epoch": 0.01,
"learning_rate": 4.9453550102831935e-05,
"loss": 2.769,
"step": 3300
},
{
"epoch": 0.01,
"learning_rate": 4.9436991015038965e-05,
"loss": 2.7742,
"step": 3400
},
{
"epoch": 0.01,
"learning_rate": 4.942043192724599e-05,
"loss": 2.7804,
"step": 3500
},
{
"epoch": 0.01,
"learning_rate": 4.9403872839453025e-05,
"loss": 2.7811,
"step": 3600
},
{
"epoch": 0.01,
"learning_rate": 4.9387313751660056e-05,
"loss": 2.786,
"step": 3700
},
{
"epoch": 0.01,
"learning_rate": 4.937075466386708e-05,
"loss": 2.7747,
"step": 3800
},
{
"epoch": 0.01,
"learning_rate": 4.935419557607411e-05,
"loss": 2.7802,
"step": 3900
},
{
"epoch": 0.01,
"learning_rate": 4.933763648828113e-05,
"loss": 2.7754,
"step": 4000
},
{
"epoch": 0.01,
"learning_rate": 4.932107740048816e-05,
"loss": 2.7792,
"step": 4100
},
{
"epoch": 0.01,
"learning_rate": 4.9304518312695194e-05,
"loss": 2.784,
"step": 4200
},
{
"epoch": 0.01,
"learning_rate": 4.928795922490222e-05,
"loss": 2.7872,
"step": 4300
},
{
"epoch": 0.01,
"learning_rate": 4.927140013710925e-05,
"loss": 2.7802,
"step": 4400
},
{
"epoch": 0.01,
"learning_rate": 4.925484104931628e-05,
"loss": 2.7902,
"step": 4500
},
{
"epoch": 0.02,
"learning_rate": 4.923828196152331e-05,
"loss": 2.7898,
"step": 4600
},
{
"epoch": 0.02,
"learning_rate": 4.922172287373034e-05,
"loss": 2.7698,
"step": 4700
},
{
"epoch": 0.02,
"learning_rate": 4.920516378593736e-05,
"loss": 2.7779,
"step": 4800
},
{
"epoch": 0.02,
"learning_rate": 4.918860469814439e-05,
"loss": 2.7792,
"step": 4900
},
{
"epoch": 0.02,
"learning_rate": 4.917204561035142e-05,
"loss": 2.7807,
"step": 5000
},
{
"epoch": 0.02,
"learning_rate": 4.9155486522558446e-05,
"loss": 2.7927,
"step": 5100
},
{
"epoch": 0.02,
"learning_rate": 4.9138927434765476e-05,
"loss": 2.773,
"step": 5200
},
{
"epoch": 0.02,
"learning_rate": 4.91223683469725e-05,
"loss": 2.7723,
"step": 5300
},
{
"epoch": 0.02,
"learning_rate": 4.910580925917953e-05,
"loss": 2.7633,
"step": 5400
},
{
"epoch": 0.02,
"learning_rate": 4.908925017138656e-05,
"loss": 2.7821,
"step": 5500
},
{
"epoch": 0.02,
"learning_rate": 4.907269108359359e-05,
"loss": 2.7641,
"step": 5600
},
{
"epoch": 0.02,
"learning_rate": 4.905613199580062e-05,
"loss": 2.7798,
"step": 5700
},
{
"epoch": 0.02,
"learning_rate": 4.9039572908007645e-05,
"loss": 2.7849,
"step": 5800
},
{
"epoch": 0.02,
"learning_rate": 4.9023013820214675e-05,
"loss": 2.781,
"step": 5900
},
{
"epoch": 0.02,
"learning_rate": 4.9006454732421705e-05,
"loss": 2.7916,
"step": 6000
},
{
"epoch": 0.02,
"learning_rate": 4.898989564462873e-05,
"loss": 2.7924,
"step": 6100
},
{
"epoch": 0.02,
"learning_rate": 4.897333655683576e-05,
"loss": 2.7795,
"step": 6200
},
{
"epoch": 0.02,
"learning_rate": 4.895677746904279e-05,
"loss": 2.789,
"step": 6300
},
{
"epoch": 0.02,
"learning_rate": 4.894021838124981e-05,
"loss": 2.779,
"step": 6400
},
{
"epoch": 0.02,
"learning_rate": 4.892365929345684e-05,
"loss": 2.7844,
"step": 6500
},
{
"epoch": 0.02,
"learning_rate": 4.8907100205663873e-05,
"loss": 2.7797,
"step": 6600
},
{
"epoch": 0.02,
"learning_rate": 4.8890541117870904e-05,
"loss": 2.7895,
"step": 6700
},
{
"epoch": 0.02,
"learning_rate": 4.887398203007793e-05,
"loss": 2.7759,
"step": 6800
},
{
"epoch": 0.02,
"learning_rate": 4.885742294228496e-05,
"loss": 2.7844,
"step": 6900
},
{
"epoch": 0.02,
"learning_rate": 4.884086385449199e-05,
"loss": 2.7638,
"step": 7000
},
{
"epoch": 0.02,
"learning_rate": 4.882430476669901e-05,
"loss": 2.7765,
"step": 7100
},
{
"epoch": 0.02,
"learning_rate": 4.880774567890604e-05,
"loss": 2.7778,
"step": 7200
},
{
"epoch": 0.02,
"learning_rate": 4.879118659111307e-05,
"loss": 2.7846,
"step": 7300
},
{
"epoch": 0.02,
"learning_rate": 4.8774627503320095e-05,
"loss": 2.7719,
"step": 7400
},
{
"epoch": 0.02,
"learning_rate": 4.8758068415527126e-05,
"loss": 2.7748,
"step": 7500
},
{
"epoch": 0.03,
"learning_rate": 4.8741509327734156e-05,
"loss": 2.7751,
"step": 7600
},
{
"epoch": 0.03,
"learning_rate": 4.8724950239941186e-05,
"loss": 2.781,
"step": 7700
},
{
"epoch": 0.03,
"learning_rate": 4.870839115214822e-05,
"loss": 2.7754,
"step": 7800
},
{
"epoch": 0.03,
"learning_rate": 4.869183206435524e-05,
"loss": 2.7904,
"step": 7900
},
{
"epoch": 0.03,
"learning_rate": 4.867527297656227e-05,
"loss": 2.777,
"step": 8000
},
{
"epoch": 0.03,
"learning_rate": 4.8658713888769294e-05,
"loss": 2.7694,
"step": 8100
},
{
"epoch": 0.03,
"learning_rate": 4.8642154800976324e-05,
"loss": 2.7766,
"step": 8200
},
{
"epoch": 0.03,
"learning_rate": 4.8625595713183355e-05,
"loss": 2.7873,
"step": 8300
},
{
"epoch": 0.03,
"learning_rate": 4.860903662539038e-05,
"loss": 2.7604,
"step": 8400
},
{
"epoch": 0.03,
"learning_rate": 4.859247753759741e-05,
"loss": 2.78,
"step": 8500
},
{
"epoch": 0.03,
"learning_rate": 4.857591844980444e-05,
"loss": 2.7831,
"step": 8600
},
{
"epoch": 0.03,
"learning_rate": 4.855935936201147e-05,
"loss": 2.7907,
"step": 8700
},
{
"epoch": 0.03,
"learning_rate": 4.85428002742185e-05,
"loss": 2.7846,
"step": 8800
},
{
"epoch": 0.03,
"learning_rate": 4.852624118642552e-05,
"loss": 2.7813,
"step": 8900
},
{
"epoch": 0.03,
"learning_rate": 4.850968209863255e-05,
"loss": 2.7797,
"step": 9000
},
{
"epoch": 0.03,
"learning_rate": 4.8493123010839583e-05,
"loss": 2.7758,
"step": 9100
},
{
"epoch": 0.03,
"learning_rate": 4.847656392304661e-05,
"loss": 2.78,
"step": 9200
},
{
"epoch": 0.03,
"learning_rate": 4.846000483525364e-05,
"loss": 2.7885,
"step": 9300
},
{
"epoch": 0.03,
"learning_rate": 4.844344574746066e-05,
"loss": 2.7855,
"step": 9400
},
{
"epoch": 0.03,
"learning_rate": 4.842688665966769e-05,
"loss": 2.7835,
"step": 9500
},
{
"epoch": 0.03,
"learning_rate": 4.841032757187473e-05,
"loss": 2.7868,
"step": 9600
},
{
"epoch": 0.03,
"learning_rate": 4.839376848408175e-05,
"loss": 2.7991,
"step": 9700
},
{
"epoch": 0.03,
"learning_rate": 4.837720939628878e-05,
"loss": 2.7867,
"step": 9800
},
{
"epoch": 0.03,
"learning_rate": 4.8360650308495806e-05,
"loss": 2.7845,
"step": 9900
},
{
"epoch": 0.03,
"learning_rate": 4.8344091220702836e-05,
"loss": 2.7796,
"step": 10000
},
{
"epoch": 0.03,
"learning_rate": 4.8327532132909866e-05,
"loss": 2.7918,
"step": 10100
},
{
"epoch": 0.03,
"learning_rate": 4.831097304511689e-05,
"loss": 2.806,
"step": 10200
},
{
"epoch": 0.03,
"learning_rate": 4.829441395732392e-05,
"loss": 2.7917,
"step": 10300
},
{
"epoch": 0.03,
"learning_rate": 4.827785486953095e-05,
"loss": 2.7812,
"step": 10400
},
{
"epoch": 0.03,
"learning_rate": 4.8261295781737974e-05,
"loss": 2.7863,
"step": 10500
},
{
"epoch": 0.04,
"learning_rate": 4.824473669394501e-05,
"loss": 2.7812,
"step": 10600
},
{
"epoch": 0.04,
"learning_rate": 4.8228177606152034e-05,
"loss": 2.7725,
"step": 10700
},
{
"epoch": 0.04,
"learning_rate": 4.8211618518359065e-05,
"loss": 2.7792,
"step": 10800
},
{
"epoch": 0.04,
"learning_rate": 4.8195059430566095e-05,
"loss": 2.7858,
"step": 10900
},
{
"epoch": 0.04,
"learning_rate": 4.817850034277312e-05,
"loss": 2.7859,
"step": 11000
},
{
"epoch": 0.04,
"learning_rate": 4.816194125498015e-05,
"loss": 2.7818,
"step": 11100
},
{
"epoch": 0.04,
"learning_rate": 4.814538216718717e-05,
"loss": 2.7695,
"step": 11200
},
{
"epoch": 0.04,
"learning_rate": 4.81288230793942e-05,
"loss": 2.7782,
"step": 11300
},
{
"epoch": 0.04,
"learning_rate": 4.811226399160123e-05,
"loss": 2.7696,
"step": 11400
},
{
"epoch": 0.04,
"learning_rate": 4.8095704903808256e-05,
"loss": 2.775,
"step": 11500
},
{
"epoch": 0.04,
"learning_rate": 4.8079145816015294e-05,
"loss": 2.7835,
"step": 11600
},
{
"epoch": 0.04,
"learning_rate": 4.806258672822232e-05,
"loss": 2.8033,
"step": 11700
},
{
"epoch": 0.04,
"learning_rate": 4.804602764042935e-05,
"loss": 2.7839,
"step": 11800
},
{
"epoch": 0.04,
"learning_rate": 4.802946855263638e-05,
"loss": 2.7871,
"step": 11900
},
{
"epoch": 0.04,
"learning_rate": 4.80129094648434e-05,
"loss": 2.7912,
"step": 12000
},
{
"epoch": 0.04,
"learning_rate": 4.799635037705043e-05,
"loss": 2.7906,
"step": 12100
},
{
"epoch": 0.04,
"learning_rate": 4.797979128925746e-05,
"loss": 2.7903,
"step": 12200
},
{
"epoch": 0.04,
"learning_rate": 4.7963232201464485e-05,
"loss": 2.7771,
"step": 12300
},
{
"epoch": 0.04,
"learning_rate": 4.7946673113671516e-05,
"loss": 2.7825,
"step": 12400
},
{
"epoch": 0.04,
"learning_rate": 4.793011402587854e-05,
"loss": 2.7946,
"step": 12500
},
{
"epoch": 0.04,
"learning_rate": 4.7913554938085576e-05,
"loss": 2.7858,
"step": 12600
},
{
"epoch": 0.04,
"learning_rate": 4.78969958502926e-05,
"loss": 2.7768,
"step": 12700
},
{
"epoch": 0.04,
"learning_rate": 4.788043676249963e-05,
"loss": 2.7875,
"step": 12800
},
{
"epoch": 0.04,
"learning_rate": 4.786387767470666e-05,
"loss": 2.7762,
"step": 12900
},
{
"epoch": 0.04,
"learning_rate": 4.7847318586913684e-05,
"loss": 2.7937,
"step": 13000
},
{
"epoch": 0.04,
"learning_rate": 4.7830759499120714e-05,
"loss": 2.7903,
"step": 13100
},
{
"epoch": 0.04,
"learning_rate": 4.7814200411327744e-05,
"loss": 2.7923,
"step": 13200
},
{
"epoch": 0.04,
"learning_rate": 4.779764132353477e-05,
"loss": 2.7874,
"step": 13300
},
{
"epoch": 0.04,
"learning_rate": 4.77810822357418e-05,
"loss": 2.7847,
"step": 13400
},
{
"epoch": 0.04,
"learning_rate": 4.776452314794883e-05,
"loss": 2.772,
"step": 13500
},
{
"epoch": 0.05,
"learning_rate": 4.774796406015586e-05,
"loss": 2.7846,
"step": 13600
},
{
"epoch": 0.05,
"learning_rate": 4.773140497236289e-05,
"loss": 2.7824,
"step": 13700
},
{
"epoch": 0.05,
"learning_rate": 4.771484588456991e-05,
"loss": 2.7781,
"step": 13800
},
{
"epoch": 0.05,
"learning_rate": 4.769828679677694e-05,
"loss": 2.7761,
"step": 13900
},
{
"epoch": 0.05,
"learning_rate": 4.7681727708983967e-05,
"loss": 2.7677,
"step": 14000
},
{
"epoch": 0.05,
"learning_rate": 4.7665168621191e-05,
"loss": 2.7686,
"step": 14100
},
{
"epoch": 0.05,
"learning_rate": 4.764860953339803e-05,
"loss": 2.7688,
"step": 14200
},
{
"epoch": 0.05,
"learning_rate": 4.763205044560505e-05,
"loss": 2.7817,
"step": 14300
},
{
"epoch": 0.05,
"learning_rate": 4.761549135781208e-05,
"loss": 2.7674,
"step": 14400
},
{
"epoch": 0.05,
"learning_rate": 4.759893227001911e-05,
"loss": 2.7738,
"step": 14500
},
{
"epoch": 0.05,
"learning_rate": 4.758237318222614e-05,
"loss": 2.7799,
"step": 14600
},
{
"epoch": 0.05,
"learning_rate": 4.756581409443317e-05,
"loss": 2.7818,
"step": 14700
},
{
"epoch": 0.05,
"learning_rate": 4.7549255006640195e-05,
"loss": 2.7859,
"step": 14800
},
{
"epoch": 0.05,
"learning_rate": 4.7532695918847226e-05,
"loss": 2.7762,
"step": 14900
},
{
"epoch": 0.05,
"learning_rate": 4.7516136831054256e-05,
"loss": 2.7851,
"step": 15000
},
{
"epoch": 0.05,
"learning_rate": 4.749957774326128e-05,
"loss": 2.7814,
"step": 15100
},
{
"epoch": 0.05,
"learning_rate": 4.748301865546831e-05,
"loss": 2.7973,
"step": 15200
},
{
"epoch": 0.05,
"learning_rate": 4.746645956767533e-05,
"loss": 2.7885,
"step": 15300
},
{
"epoch": 0.05,
"learning_rate": 4.7449900479882364e-05,
"loss": 2.776,
"step": 15400
},
{
"epoch": 0.05,
"learning_rate": 4.7433341392089394e-05,
"loss": 2.7677,
"step": 15500
},
{
"epoch": 0.05,
"learning_rate": 4.7416782304296424e-05,
"loss": 2.7916,
"step": 15600
},
{
"epoch": 0.05,
"learning_rate": 4.7400223216503454e-05,
"loss": 2.7928,
"step": 15700
},
{
"epoch": 0.05,
"learning_rate": 4.738366412871048e-05,
"loss": 2.7846,
"step": 15800
},
{
"epoch": 0.05,
"learning_rate": 4.736710504091751e-05,
"loss": 2.7773,
"step": 15900
},
{
"epoch": 0.05,
"learning_rate": 4.735054595312454e-05,
"loss": 2.783,
"step": 16000
},
{
"epoch": 0.05,
"learning_rate": 4.733398686533156e-05,
"loss": 2.7862,
"step": 16100
},
{
"epoch": 0.05,
"learning_rate": 4.731742777753859e-05,
"loss": 2.7786,
"step": 16200
},
{
"epoch": 0.05,
"learning_rate": 4.730086868974562e-05,
"loss": 2.7859,
"step": 16300
},
{
"epoch": 0.05,
"learning_rate": 4.7284309601952646e-05,
"loss": 2.7774,
"step": 16400
},
{
"epoch": 0.05,
"learning_rate": 4.7267750514159677e-05,
"loss": 2.7832,
"step": 16500
},
{
"epoch": 0.05,
"learning_rate": 4.725119142636671e-05,
"loss": 2.7747,
"step": 16600
},
{
"epoch": 0.06,
"learning_rate": 4.723463233857374e-05,
"loss": 2.7643,
"step": 16700
},
{
"epoch": 0.06,
"learning_rate": 4.721807325078077e-05,
"loss": 2.7757,
"step": 16800
},
{
"epoch": 0.06,
"learning_rate": 4.720151416298779e-05,
"loss": 2.7791,
"step": 16900
},
{
"epoch": 0.06,
"learning_rate": 4.718495507519482e-05,
"loss": 2.7813,
"step": 17000
},
{
"epoch": 0.06,
"learning_rate": 4.7168395987401845e-05,
"loss": 2.7935,
"step": 17100
},
{
"epoch": 0.06,
"learning_rate": 4.7151836899608875e-05,
"loss": 2.7802,
"step": 17200
},
{
"epoch": 0.06,
"learning_rate": 4.7135277811815905e-05,
"loss": 2.7884,
"step": 17300
},
{
"epoch": 0.06,
"learning_rate": 4.711871872402293e-05,
"loss": 2.7835,
"step": 17400
},
{
"epoch": 0.06,
"learning_rate": 4.710215963622996e-05,
"loss": 2.7925,
"step": 17500
},
{
"epoch": 0.06,
"learning_rate": 4.708560054843699e-05,
"loss": 2.7974,
"step": 17600
},
{
"epoch": 0.06,
"learning_rate": 4.706904146064402e-05,
"loss": 2.7875,
"step": 17700
},
{
"epoch": 0.06,
"learning_rate": 4.705248237285105e-05,
"loss": 2.7878,
"step": 17800
},
{
"epoch": 0.06,
"learning_rate": 4.7035923285058074e-05,
"loss": 2.7869,
"step": 17900
},
{
"epoch": 0.06,
"learning_rate": 4.7019364197265104e-05,
"loss": 2.7716,
"step": 18000
},
{
"epoch": 0.06,
"learning_rate": 4.7002805109472134e-05,
"loss": 2.7774,
"step": 18100
},
{
"epoch": 0.06,
"learning_rate": 4.698624602167916e-05,
"loss": 2.7824,
"step": 18200
},
{
"epoch": 0.06,
"learning_rate": 4.696968693388619e-05,
"loss": 2.7863,
"step": 18300
},
{
"epoch": 0.06,
"learning_rate": 4.695312784609321e-05,
"loss": 2.7737,
"step": 18400
},
{
"epoch": 0.06,
"learning_rate": 4.693656875830024e-05,
"loss": 2.7721,
"step": 18500
},
{
"epoch": 0.06,
"learning_rate": 4.692000967050727e-05,
"loss": 2.7834,
"step": 18600
},
{
"epoch": 0.06,
"learning_rate": 4.69034505827143e-05,
"loss": 2.7756,
"step": 18700
},
{
"epoch": 0.06,
"learning_rate": 4.688689149492133e-05,
"loss": 2.7757,
"step": 18800
},
{
"epoch": 0.06,
"learning_rate": 4.6870332407128356e-05,
"loss": 2.7818,
"step": 18900
},
{
"epoch": 0.06,
"learning_rate": 4.685377331933539e-05,
"loss": 2.774,
"step": 19000
},
{
"epoch": 0.06,
"learning_rate": 4.683721423154242e-05,
"loss": 2.7885,
"step": 19100
},
{
"epoch": 0.06,
"learning_rate": 4.682065514374944e-05,
"loss": 2.7653,
"step": 19200
},
{
"epoch": 0.06,
"learning_rate": 4.680409605595647e-05,
"loss": 2.7829,
"step": 19300
},
{
"epoch": 0.06,
"learning_rate": 4.67875369681635e-05,
"loss": 2.7784,
"step": 19400
},
{
"epoch": 0.06,
"learning_rate": 4.6770977880370525e-05,
"loss": 2.7826,
"step": 19500
},
{
"epoch": 0.06,
"learning_rate": 4.675441879257756e-05,
"loss": 2.7876,
"step": 19600
},
{
"epoch": 0.07,
"learning_rate": 4.6737859704784585e-05,
"loss": 2.7801,
"step": 19700
},
{
"epoch": 0.07,
"learning_rate": 4.6721300616991615e-05,
"loss": 2.7858,
"step": 19800
},
{
"epoch": 0.07,
"learning_rate": 4.670474152919864e-05,
"loss": 2.7851,
"step": 19900
},
{
"epoch": 0.07,
"learning_rate": 4.668818244140567e-05,
"loss": 2.7871,
"step": 20000
},
{
"epoch": 0.07,
"learning_rate": 4.66716233536127e-05,
"loss": 2.7874,
"step": 20100
},
{
"epoch": 0.07,
"learning_rate": 4.665506426581972e-05,
"loss": 2.7855,
"step": 20200
},
{
"epoch": 0.07,
"learning_rate": 4.6638505178026753e-05,
"loss": 2.7686,
"step": 20300
},
{
"epoch": 0.07,
"learning_rate": 4.6621946090233784e-05,
"loss": 2.7826,
"step": 20400
},
{
"epoch": 0.07,
"learning_rate": 4.660538700244081e-05,
"loss": 2.7919,
"step": 20500
},
{
"epoch": 0.07,
"learning_rate": 4.6588827914647844e-05,
"loss": 2.7845,
"step": 20600
},
{
"epoch": 0.07,
"learning_rate": 4.657226882685487e-05,
"loss": 2.8025,
"step": 20700
},
{
"epoch": 0.07,
"learning_rate": 4.65557097390619e-05,
"loss": 2.7732,
"step": 20800
},
{
"epoch": 0.07,
"learning_rate": 4.653915065126893e-05,
"loss": 2.7927,
"step": 20900
},
{
"epoch": 0.07,
"learning_rate": 4.652259156347595e-05,
"loss": 2.7786,
"step": 21000
},
{
"epoch": 0.07,
"learning_rate": 4.650603247568298e-05,
"loss": 2.7611,
"step": 21100
},
{
"epoch": 0.07,
"learning_rate": 4.6489473387890006e-05,
"loss": 2.7858,
"step": 21200
},
{
"epoch": 0.07,
"learning_rate": 4.6472914300097036e-05,
"loss": 2.7905,
"step": 21300
},
{
"epoch": 0.07,
"learning_rate": 4.6456355212304066e-05,
"loss": 2.7664,
"step": 21400
},
{
"epoch": 0.07,
"learning_rate": 4.643979612451109e-05,
"loss": 2.7882,
"step": 21500
},
{
"epoch": 0.07,
"learning_rate": 4.642323703671813e-05,
"loss": 2.7849,
"step": 21600
},
{
"epoch": 0.07,
"learning_rate": 4.640667794892515e-05,
"loss": 2.7855,
"step": 21700
},
{
"epoch": 0.07,
"learning_rate": 4.639011886113218e-05,
"loss": 2.7765,
"step": 21800
},
{
"epoch": 0.07,
"learning_rate": 4.637355977333921e-05,
"loss": 2.7879,
"step": 21900
},
{
"epoch": 0.07,
"learning_rate": 4.6357000685546235e-05,
"loss": 2.779,
"step": 22000
},
{
"epoch": 0.07,
"learning_rate": 4.6340441597753265e-05,
"loss": 2.7673,
"step": 22100
},
{
"epoch": 0.07,
"learning_rate": 4.6323882509960295e-05,
"loss": 2.7804,
"step": 22200
},
{
"epoch": 0.07,
"learning_rate": 4.630732342216732e-05,
"loss": 2.7752,
"step": 22300
},
{
"epoch": 0.07,
"learning_rate": 4.629076433437435e-05,
"loss": 2.776,
"step": 22400
},
{
"epoch": 0.07,
"learning_rate": 4.627420524658137e-05,
"loss": 2.769,
"step": 22500
},
{
"epoch": 0.07,
"learning_rate": 4.625764615878841e-05,
"loss": 2.7803,
"step": 22600
},
{
"epoch": 0.08,
"learning_rate": 4.624108707099544e-05,
"loss": 2.7783,
"step": 22700
},
{
"epoch": 0.08,
"learning_rate": 4.6224527983202463e-05,
"loss": 2.7791,
"step": 22800
},
{
"epoch": 0.08,
"learning_rate": 4.6207968895409494e-05,
"loss": 2.7947,
"step": 22900
},
{
"epoch": 0.08,
"learning_rate": 4.619140980761652e-05,
"loss": 2.7764,
"step": 23000
},
{
"epoch": 0.08,
"learning_rate": 4.617485071982355e-05,
"loss": 2.7893,
"step": 23100
},
{
"epoch": 0.08,
"learning_rate": 4.615829163203058e-05,
"loss": 2.7636,
"step": 23200
},
{
"epoch": 0.08,
"learning_rate": 4.61417325442376e-05,
"loss": 2.7794,
"step": 23300
},
{
"epoch": 0.08,
"learning_rate": 4.612517345644463e-05,
"loss": 2.7965,
"step": 23400
},
{
"epoch": 0.08,
"learning_rate": 4.610861436865166e-05,
"loss": 2.7818,
"step": 23500
},
{
"epoch": 0.08,
"learning_rate": 4.609205528085869e-05,
"loss": 2.7773,
"step": 23600
},
{
"epoch": 0.08,
"learning_rate": 4.607549619306572e-05,
"loss": 2.7809,
"step": 23700
},
{
"epoch": 0.08,
"learning_rate": 4.6058937105272746e-05,
"loss": 2.7609,
"step": 23800
},
{
"epoch": 0.08,
"learning_rate": 4.6042378017479776e-05,
"loss": 2.7931,
"step": 23900
},
{
"epoch": 0.08,
"learning_rate": 4.602581892968681e-05,
"loss": 2.7823,
"step": 24000
},
{
"epoch": 0.08,
"learning_rate": 4.600925984189383e-05,
"loss": 2.8011,
"step": 24100
},
{
"epoch": 0.08,
"learning_rate": 4.599270075410086e-05,
"loss": 2.7806,
"step": 24200
},
{
"epoch": 0.08,
"learning_rate": 4.5976141666307884e-05,
"loss": 2.7745,
"step": 24300
},
{
"epoch": 0.08,
"learning_rate": 4.5959582578514914e-05,
"loss": 2.7718,
"step": 24400
},
{
"epoch": 0.08,
"learning_rate": 4.5943023490721945e-05,
"loss": 2.7815,
"step": 24500
},
{
"epoch": 0.08,
"learning_rate": 4.5926464402928975e-05,
"loss": 2.7826,
"step": 24600
},
{
"epoch": 0.08,
"learning_rate": 4.5909905315136005e-05,
"loss": 2.7894,
"step": 24700
},
{
"epoch": 0.08,
"learning_rate": 4.589334622734303e-05,
"loss": 2.7665,
"step": 24800
},
{
"epoch": 0.08,
"learning_rate": 4.587678713955006e-05,
"loss": 2.7787,
"step": 24900
},
{
"epoch": 0.08,
"learning_rate": 4.586022805175709e-05,
"loss": 2.7896,
"step": 25000
},
{
"epoch": 0.08,
"learning_rate": 4.584366896396411e-05,
"loss": 2.78,
"step": 25100
},
{
"epoch": 0.08,
"learning_rate": 4.582710987617114e-05,
"loss": 2.7894,
"step": 25200
},
{
"epoch": 0.08,
"learning_rate": 4.5810550788378174e-05,
"loss": 2.7943,
"step": 25300
},
{
"epoch": 0.08,
"learning_rate": 4.57939917005852e-05,
"loss": 2.7871,
"step": 25400
},
{
"epoch": 0.08,
"learning_rate": 4.577743261279223e-05,
"loss": 2.7814,
"step": 25500
},
{
"epoch": 0.08,
"learning_rate": 4.576087352499926e-05,
"loss": 2.7911,
"step": 25600
},
{
"epoch": 0.09,
"learning_rate": 4.574431443720629e-05,
"loss": 2.7669,
"step": 25700
},
{
"epoch": 0.09,
"learning_rate": 4.572775534941331e-05,
"loss": 2.7794,
"step": 25800
},
{
"epoch": 0.09,
"learning_rate": 4.571119626162034e-05,
"loss": 2.803,
"step": 25900
},
{
"epoch": 0.09,
"learning_rate": 4.569463717382737e-05,
"loss": 2.7836,
"step": 26000
},
{
"epoch": 0.09,
"learning_rate": 4.5678078086034396e-05,
"loss": 2.781,
"step": 26100
},
{
"epoch": 0.09,
"learning_rate": 4.5661518998241426e-05,
"loss": 2.7913,
"step": 26200
},
{
"epoch": 0.09,
"learning_rate": 4.5644959910448456e-05,
"loss": 2.7712,
"step": 26300
},
{
"epoch": 0.09,
"learning_rate": 4.562840082265548e-05,
"loss": 2.7788,
"step": 26400
},
{
"epoch": 0.09,
"learning_rate": 4.561184173486251e-05,
"loss": 2.7796,
"step": 26500
},
{
"epoch": 0.09,
"learning_rate": 4.559528264706954e-05,
"loss": 2.7754,
"step": 26600
},
{
"epoch": 0.09,
"learning_rate": 4.557872355927657e-05,
"loss": 2.7809,
"step": 26700
},
{
"epoch": 0.09,
"learning_rate": 4.55621644714836e-05,
"loss": 2.793,
"step": 26800
},
{
"epoch": 0.09,
"learning_rate": 4.5545605383690624e-05,
"loss": 2.7834,
"step": 26900
},
{
"epoch": 0.09,
"learning_rate": 4.5529046295897655e-05,
"loss": 2.798,
"step": 27000
},
{
"epoch": 0.09,
"learning_rate": 4.551248720810468e-05,
"loss": 2.7781,
"step": 27100
},
{
"epoch": 0.09,
"learning_rate": 4.549592812031171e-05,
"loss": 2.7852,
"step": 27200
},
{
"epoch": 0.09,
"learning_rate": 4.547936903251874e-05,
"loss": 2.782,
"step": 27300
},
{
"epoch": 0.09,
"learning_rate": 4.546280994472576e-05,
"loss": 2.7856,
"step": 27400
},
{
"epoch": 0.09,
"learning_rate": 4.544625085693279e-05,
"loss": 2.7721,
"step": 27500
},
{
"epoch": 0.09,
"learning_rate": 4.542969176913982e-05,
"loss": 2.7839,
"step": 27600
},
{
"epoch": 0.09,
"learning_rate": 4.541313268134685e-05,
"loss": 2.7848,
"step": 27700
},
{
"epoch": 0.09,
"learning_rate": 4.5396573593553884e-05,
"loss": 2.7943,
"step": 27800
},
{
"epoch": 0.09,
"learning_rate": 4.538001450576091e-05,
"loss": 2.7878,
"step": 27900
},
{
"epoch": 0.09,
"learning_rate": 4.536345541796794e-05,
"loss": 2.7936,
"step": 28000
},
{
"epoch": 0.09,
"learning_rate": 4.534689633017497e-05,
"loss": 2.7804,
"step": 28100
},
{
"epoch": 0.09,
"learning_rate": 4.533033724238199e-05,
"loss": 2.7908,
"step": 28200
},
{
"epoch": 0.09,
"learning_rate": 4.531377815458902e-05,
"loss": 2.7857,
"step": 28300
},
{
"epoch": 0.09,
"learning_rate": 4.5297219066796045e-05,
"loss": 2.7779,
"step": 28400
},
{
"epoch": 0.09,
"learning_rate": 4.5280659979003075e-05,
"loss": 2.7891,
"step": 28500
},
{
"epoch": 0.09,
"learning_rate": 4.526410089121011e-05,
"loss": 2.7715,
"step": 28600
},
{
"epoch": 0.1,
"learning_rate": 4.5247541803417136e-05,
"loss": 2.7961,
"step": 28700
},
{
"epoch": 0.1,
"learning_rate": 4.5230982715624166e-05,
"loss": 2.781,
"step": 28800
},
{
"epoch": 0.1,
"learning_rate": 4.521442362783119e-05,
"loss": 2.7705,
"step": 28900
},
{
"epoch": 0.1,
"learning_rate": 4.519786454003822e-05,
"loss": 2.7888,
"step": 29000
},
{
"epoch": 0.1,
"learning_rate": 4.518130545224525e-05,
"loss": 2.7971,
"step": 29100
},
{
"epoch": 0.1,
"learning_rate": 4.5164746364452274e-05,
"loss": 2.7816,
"step": 29200
},
{
"epoch": 0.1,
"learning_rate": 4.5148187276659304e-05,
"loss": 2.7808,
"step": 29300
},
{
"epoch": 0.1,
"learning_rate": 4.5131628188866334e-05,
"loss": 2.7827,
"step": 29400
},
{
"epoch": 0.1,
"learning_rate": 4.511506910107336e-05,
"loss": 2.7661,
"step": 29500
},
{
"epoch": 0.1,
"learning_rate": 4.5098510013280395e-05,
"loss": 2.7774,
"step": 29600
},
{
"epoch": 0.1,
"learning_rate": 4.508195092548742e-05,
"loss": 2.7698,
"step": 29700
},
{
"epoch": 0.1,
"learning_rate": 4.506539183769445e-05,
"loss": 2.7824,
"step": 29800
},
{
"epoch": 0.1,
"learning_rate": 4.504883274990148e-05,
"loss": 2.7745,
"step": 29900
},
{
"epoch": 0.1,
"learning_rate": 4.50322736621085e-05,
"loss": 2.7787,
"step": 30000
},
{
"epoch": 0.1,
"learning_rate": 4.501571457431553e-05,
"loss": 2.7825,
"step": 30100
},
{
"epoch": 0.1,
"learning_rate": 4.4999155486522557e-05,
"loss": 2.7763,
"step": 30200
},
{
"epoch": 0.1,
"learning_rate": 4.498259639872959e-05,
"loss": 2.7907,
"step": 30300
},
{
"epoch": 0.1,
"learning_rate": 4.496603731093662e-05,
"loss": 2.7634,
"step": 30400
},
{
"epoch": 0.1,
"learning_rate": 4.494947822314364e-05,
"loss": 2.7782,
"step": 30500
},
{
"epoch": 0.1,
"learning_rate": 4.493291913535068e-05,
"loss": 2.7861,
"step": 30600
},
{
"epoch": 0.1,
"learning_rate": 4.49163600475577e-05,
"loss": 2.7676,
"step": 30700
},
{
"epoch": 0.1,
"learning_rate": 4.489980095976473e-05,
"loss": 2.7768,
"step": 30800
},
{
"epoch": 0.1,
"learning_rate": 4.488324187197176e-05,
"loss": 2.7853,
"step": 30900
},
{
"epoch": 0.1,
"learning_rate": 4.4866682784178785e-05,
"loss": 2.7915,
"step": 31000
},
{
"epoch": 0.1,
"learning_rate": 4.4850123696385816e-05,
"loss": 2.7747,
"step": 31100
},
{
"epoch": 0.1,
"learning_rate": 4.4833564608592846e-05,
"loss": 2.7749,
"step": 31200
},
{
"epoch": 0.1,
"learning_rate": 4.481700552079987e-05,
"loss": 2.7841,
"step": 31300
},
{
"epoch": 0.1,
"learning_rate": 4.48004464330069e-05,
"loss": 2.7805,
"step": 31400
},
{
"epoch": 0.1,
"learning_rate": 4.478388734521392e-05,
"loss": 2.7825,
"step": 31500
},
{
"epoch": 0.1,
"learning_rate": 4.476732825742096e-05,
"loss": 2.7719,
"step": 31600
},
{
"epoch": 0.1,
"learning_rate": 4.475076916962799e-05,
"loss": 2.7913,
"step": 31700
},
{
"epoch": 0.11,
"learning_rate": 4.4734210081835014e-05,
"loss": 2.7799,
"step": 31800
},
{
"epoch": 0.11,
"learning_rate": 4.4717650994042045e-05,
"loss": 2.7791,
"step": 31900
},
{
"epoch": 0.11,
"learning_rate": 4.470109190624907e-05,
"loss": 2.8031,
"step": 32000
},
{
"epoch": 0.11,
"learning_rate": 4.46845328184561e-05,
"loss": 2.7789,
"step": 32100
},
{
"epoch": 0.11,
"learning_rate": 4.466797373066313e-05,
"loss": 2.7778,
"step": 32200
},
{
"epoch": 0.11,
"learning_rate": 4.465141464287015e-05,
"loss": 2.7862,
"step": 32300
},
{
"epoch": 0.11,
"learning_rate": 4.463485555507718e-05,
"loss": 2.7664,
"step": 32400
},
{
"epoch": 0.11,
"learning_rate": 4.461829646728421e-05,
"loss": 2.7767,
"step": 32500
},
{
"epoch": 0.11,
"learning_rate": 4.460173737949124e-05,
"loss": 2.7814,
"step": 32600
},
{
"epoch": 0.11,
"learning_rate": 4.458517829169827e-05,
"loss": 2.7786,
"step": 32700
},
{
"epoch": 0.11,
"learning_rate": 4.45686192039053e-05,
"loss": 2.765,
"step": 32800
},
{
"epoch": 0.11,
"learning_rate": 4.455206011611233e-05,
"loss": 2.783,
"step": 32900
},
{
"epoch": 0.11,
"learning_rate": 4.453550102831935e-05,
"loss": 2.7747,
"step": 33000
},
{
"epoch": 0.11,
"learning_rate": 4.451894194052638e-05,
"loss": 2.7765,
"step": 33100
},
{
"epoch": 0.11,
"learning_rate": 4.450238285273341e-05,
"loss": 2.7934,
"step": 33200
},
{
"epoch": 0.11,
"learning_rate": 4.4485823764940435e-05,
"loss": 2.7947,
"step": 33300
},
{
"epoch": 0.11,
"learning_rate": 4.4469264677147465e-05,
"loss": 2.7732,
"step": 33400
},
{
"epoch": 0.11,
"learning_rate": 4.4452705589354495e-05,
"loss": 2.7886,
"step": 33500
},
{
"epoch": 0.11,
"learning_rate": 4.4436146501561526e-05,
"loss": 2.7673,
"step": 33600
},
{
"epoch": 0.11,
"learning_rate": 4.4419587413768556e-05,
"loss": 2.7789,
"step": 33700
},
{
"epoch": 0.11,
"learning_rate": 4.440302832597558e-05,
"loss": 2.7755,
"step": 33800
},
{
"epoch": 0.11,
"learning_rate": 4.438646923818261e-05,
"loss": 2.7756,
"step": 33900
},
{
"epoch": 0.11,
"learning_rate": 4.436991015038964e-05,
"loss": 2.7785,
"step": 34000
},
{
"epoch": 0.11,
"learning_rate": 4.4353351062596664e-05,
"loss": 2.7778,
"step": 34100
},
{
"epoch": 0.11,
"learning_rate": 4.4336791974803694e-05,
"loss": 2.7646,
"step": 34200
},
{
"epoch": 0.11,
"learning_rate": 4.432023288701072e-05,
"loss": 2.7771,
"step": 34300
},
{
"epoch": 0.11,
"learning_rate": 4.430367379921775e-05,
"loss": 2.7974,
"step": 34400
},
{
"epoch": 0.11,
"learning_rate": 4.428711471142478e-05,
"loss": 2.7814,
"step": 34500
},
{
"epoch": 0.11,
"learning_rate": 4.427055562363181e-05,
"loss": 2.7861,
"step": 34600
},
{
"epoch": 0.11,
"learning_rate": 4.425399653583884e-05,
"loss": 2.7909,
"step": 34700
},
{
"epoch": 0.12,
"learning_rate": 4.423743744804586e-05,
"loss": 2.7781,
"step": 34800
},
{
"epoch": 0.12,
"learning_rate": 4.422087836025289e-05,
"loss": 2.7919,
"step": 34900
},
{
"epoch": 0.12,
"learning_rate": 4.420431927245992e-05,
"loss": 2.7792,
"step": 35000
},
{
"epoch": 0.12,
"learning_rate": 4.4187760184666946e-05,
"loss": 2.7902,
"step": 35100
},
{
"epoch": 0.12,
"learning_rate": 4.417120109687398e-05,
"loss": 2.7809,
"step": 35200
},
{
"epoch": 0.12,
"learning_rate": 4.415464200908101e-05,
"loss": 2.7827,
"step": 35300
},
{
"epoch": 0.12,
"learning_rate": 4.413808292128803e-05,
"loss": 2.7782,
"step": 35400
},
{
"epoch": 0.12,
"learning_rate": 4.412152383349506e-05,
"loss": 2.7922,
"step": 35500
},
{
"epoch": 0.12,
"learning_rate": 4.410496474570209e-05,
"loss": 2.788,
"step": 35600
},
{
"epoch": 0.12,
"learning_rate": 4.408840565790912e-05,
"loss": 2.7745,
"step": 35700
},
{
"epoch": 0.12,
"learning_rate": 4.407184657011615e-05,
"loss": 2.7619,
"step": 35800
},
{
"epoch": 0.12,
"learning_rate": 4.4055287482323175e-05,
"loss": 2.7751,
"step": 35900
},
{
"epoch": 0.12,
"learning_rate": 4.4038728394530206e-05,
"loss": 2.781,
"step": 36000
},
{
"epoch": 0.12,
"learning_rate": 4.402216930673723e-05,
"loss": 2.7855,
"step": 36100
},
{
"epoch": 0.12,
"learning_rate": 4.400561021894426e-05,
"loss": 2.7878,
"step": 36200
},
{
"epoch": 0.12,
"learning_rate": 4.398905113115129e-05,
"loss": 2.7891,
"step": 36300
},
{
"epoch": 0.12,
"learning_rate": 4.397249204335831e-05,
"loss": 2.7766,
"step": 36400
},
{
"epoch": 0.12,
"learning_rate": 4.3955932955565343e-05,
"loss": 2.7772,
"step": 36500
},
{
"epoch": 0.12,
"learning_rate": 4.3939373867772374e-05,
"loss": 2.7807,
"step": 36600
},
{
"epoch": 0.12,
"learning_rate": 4.3922814779979404e-05,
"loss": 2.7763,
"step": 36700
},
{
"epoch": 0.12,
"learning_rate": 4.3906255692186434e-05,
"loss": 2.7632,
"step": 36800
},
{
"epoch": 0.12,
"learning_rate": 4.388969660439346e-05,
"loss": 2.7803,
"step": 36900
},
{
"epoch": 0.12,
"learning_rate": 4.387313751660049e-05,
"loss": 2.7679,
"step": 37000
},
{
"epoch": 0.12,
"learning_rate": 4.385657842880752e-05,
"loss": 2.7711,
"step": 37100
},
{
"epoch": 0.12,
"learning_rate": 4.384001934101454e-05,
"loss": 2.7788,
"step": 37200
},
{
"epoch": 0.12,
"learning_rate": 4.382346025322157e-05,
"loss": 2.7683,
"step": 37300
},
{
"epoch": 0.12,
"learning_rate": 4.3806901165428596e-05,
"loss": 2.7817,
"step": 37400
},
{
"epoch": 0.12,
"learning_rate": 4.3790342077635626e-05,
"loss": 2.7799,
"step": 37500
},
{
"epoch": 0.12,
"learning_rate": 4.377378298984266e-05,
"loss": 2.7864,
"step": 37600
},
{
"epoch": 0.12,
"learning_rate": 4.375722390204969e-05,
"loss": 2.7847,
"step": 37700
},
{
"epoch": 0.13,
"learning_rate": 4.374066481425672e-05,
"loss": 2.7812,
"step": 37800
},
{
"epoch": 0.13,
"learning_rate": 4.372410572646374e-05,
"loss": 2.7792,
"step": 37900
},
{
"epoch": 0.13,
"learning_rate": 4.370754663867077e-05,
"loss": 2.7858,
"step": 38000
},
{
"epoch": 0.13,
"learning_rate": 4.36909875508778e-05,
"loss": 2.7867,
"step": 38100
},
{
"epoch": 0.13,
"learning_rate": 4.3674428463084825e-05,
"loss": 2.7868,
"step": 38200
},
{
"epoch": 0.13,
"learning_rate": 4.3657869375291855e-05,
"loss": 2.7763,
"step": 38300
},
{
"epoch": 0.13,
"learning_rate": 4.3641310287498885e-05,
"loss": 2.7739,
"step": 38400
},
{
"epoch": 0.13,
"learning_rate": 4.362475119970591e-05,
"loss": 2.7827,
"step": 38500
},
{
"epoch": 0.13,
"learning_rate": 4.3608192111912946e-05,
"loss": 2.7661,
"step": 38600
},
{
"epoch": 0.13,
"learning_rate": 4.359163302411997e-05,
"loss": 2.7839,
"step": 38700
},
{
"epoch": 0.13,
"learning_rate": 4.3575073936327e-05,
"loss": 2.7827,
"step": 38800
},
{
"epoch": 0.13,
"learning_rate": 4.355851484853403e-05,
"loss": 2.7967,
"step": 38900
},
{
"epoch": 0.13,
"learning_rate": 4.3541955760741054e-05,
"loss": 2.78,
"step": 39000
},
{
"epoch": 0.13,
"learning_rate": 4.3525396672948084e-05,
"loss": 2.7883,
"step": 39100
},
{
"epoch": 0.13,
"learning_rate": 4.350883758515511e-05,
"loss": 2.7717,
"step": 39200
},
{
"epoch": 0.13,
"learning_rate": 4.349227849736214e-05,
"loss": 2.7651,
"step": 39300
},
{
"epoch": 0.13,
"learning_rate": 4.347571940956917e-05,
"loss": 2.7869,
"step": 39400
},
{
"epoch": 0.13,
"learning_rate": 4.345916032177619e-05,
"loss": 2.7739,
"step": 39500
},
{
"epoch": 0.13,
"learning_rate": 4.344260123398323e-05,
"loss": 2.7743,
"step": 39600
},
{
"epoch": 0.13,
"learning_rate": 4.342604214619025e-05,
"loss": 2.7709,
"step": 39700
},
{
"epoch": 0.13,
"learning_rate": 4.340948305839728e-05,
"loss": 2.7829,
"step": 39800
},
{
"epoch": 0.13,
"learning_rate": 4.339292397060431e-05,
"loss": 2.783,
"step": 39900
},
{
"epoch": 0.13,
"learning_rate": 4.3376364882811336e-05,
"loss": 2.7823,
"step": 40000
},
{
"epoch": 0.13,
"learning_rate": 4.3359805795018366e-05,
"loss": 2.7846,
"step": 40100
},
{
"epoch": 0.13,
"learning_rate": 4.334324670722539e-05,
"loss": 2.7777,
"step": 40200
},
{
"epoch": 0.13,
"learning_rate": 4.332668761943242e-05,
"loss": 2.7726,
"step": 40300
},
{
"epoch": 0.13,
"learning_rate": 4.331012853163945e-05,
"loss": 2.7785,
"step": 40400
},
{
"epoch": 0.13,
"learning_rate": 4.3293569443846474e-05,
"loss": 2.776,
"step": 40500
},
{
"epoch": 0.13,
"learning_rate": 4.327701035605351e-05,
"loss": 2.7743,
"step": 40600
},
{
"epoch": 0.13,
"learning_rate": 4.3260451268260535e-05,
"loss": 2.7747,
"step": 40700
},
{
"epoch": 0.14,
"learning_rate": 4.3243892180467565e-05,
"loss": 2.7821,
"step": 40800
},
{
"epoch": 0.14,
"learning_rate": 4.3227333092674595e-05,
"loss": 2.7702,
"step": 40900
},
{
"epoch": 0.14,
"learning_rate": 4.321077400488162e-05,
"loss": 2.7712,
"step": 41000
},
{
"epoch": 0.14,
"learning_rate": 4.319421491708865e-05,
"loss": 2.7613,
"step": 41100
},
{
"epoch": 0.14,
"learning_rate": 4.317765582929568e-05,
"loss": 2.769,
"step": 41200
},
{
"epoch": 0.14,
"learning_rate": 4.31610967415027e-05,
"loss": 2.7878,
"step": 41300
},
{
"epoch": 0.14,
"learning_rate": 4.314453765370973e-05,
"loss": 2.7684,
"step": 41400
},
{
"epoch": 0.14,
"learning_rate": 4.312797856591676e-05,
"loss": 2.7804,
"step": 41500
},
{
"epoch": 0.14,
"learning_rate": 4.3111419478123794e-05,
"loss": 2.7601,
"step": 41600
},
{
"epoch": 0.14,
"learning_rate": 4.3094860390330824e-05,
"loss": 2.7866,
"step": 41700
},
{
"epoch": 0.14,
"learning_rate": 4.307830130253785e-05,
"loss": 2.7717,
"step": 41800
},
{
"epoch": 0.14,
"learning_rate": 4.306174221474488e-05,
"loss": 2.7905,
"step": 41900
},
{
"epoch": 0.14,
"learning_rate": 4.30451831269519e-05,
"loss": 2.7613,
"step": 42000
},
{
"epoch": 0.14,
"learning_rate": 4.302862403915893e-05,
"loss": 2.7759,
"step": 42100
},
{
"epoch": 0.14,
"learning_rate": 4.301206495136596e-05,
"loss": 2.7852,
"step": 42200
},
{
"epoch": 0.14,
"learning_rate": 4.2995505863572986e-05,
"loss": 2.7731,
"step": 42300
},
{
"epoch": 0.14,
"learning_rate": 4.2978946775780016e-05,
"loss": 2.7889,
"step": 42400
},
{
"epoch": 0.14,
"learning_rate": 4.2962387687987046e-05,
"loss": 2.7963,
"step": 42500
},
{
"epoch": 0.14,
"learning_rate": 4.2945828600194077e-05,
"loss": 2.7962,
"step": 42600
},
{
"epoch": 0.14,
"learning_rate": 4.292926951240111e-05,
"loss": 2.7735,
"step": 42700
},
{
"epoch": 0.14,
"learning_rate": 4.291271042460813e-05,
"loss": 2.7668,
"step": 42800
},
{
"epoch": 0.14,
"learning_rate": 4.289615133681516e-05,
"loss": 2.7756,
"step": 42900
},
{
"epoch": 0.14,
"learning_rate": 4.287959224902219e-05,
"loss": 2.7854,
"step": 43000
},
{
"epoch": 0.14,
"learning_rate": 4.2863033161229214e-05,
"loss": 2.786,
"step": 43100
},
{
"epoch": 0.14,
"learning_rate": 4.2846474073436245e-05,
"loss": 2.7745,
"step": 43200
},
{
"epoch": 0.14,
"learning_rate": 4.282991498564327e-05,
"loss": 2.7814,
"step": 43300
},
{
"epoch": 0.14,
"learning_rate": 4.28133558978503e-05,
"loss": 2.7922,
"step": 43400
},
{
"epoch": 0.14,
"learning_rate": 4.279679681005733e-05,
"loss": 2.7818,
"step": 43500
},
{
"epoch": 0.14,
"learning_rate": 4.278023772226436e-05,
"loss": 2.7714,
"step": 43600
},
{
"epoch": 0.14,
"learning_rate": 4.276367863447139e-05,
"loss": 2.7854,
"step": 43700
},
{
"epoch": 0.15,
"learning_rate": 4.274711954667841e-05,
"loss": 2.7787,
"step": 43800
},
{
"epoch": 0.15,
"learning_rate": 4.273056045888544e-05,
"loss": 2.788,
"step": 43900
},
{
"epoch": 0.15,
"learning_rate": 4.2714001371092474e-05,
"loss": 2.7661,
"step": 44000
},
{
"epoch": 0.15,
"learning_rate": 4.26974422832995e-05,
"loss": 2.7924,
"step": 44100
},
{
"epoch": 0.15,
"learning_rate": 4.268088319550653e-05,
"loss": 2.788,
"step": 44200
},
{
"epoch": 0.15,
"learning_rate": 4.266432410771356e-05,
"loss": 2.7842,
"step": 44300
},
{
"epoch": 0.15,
"learning_rate": 4.264776501992058e-05,
"loss": 2.7806,
"step": 44400
},
{
"epoch": 0.15,
"learning_rate": 4.263120593212761e-05,
"loss": 2.7746,
"step": 44500
},
{
"epoch": 0.15,
"learning_rate": 4.261464684433464e-05,
"loss": 2.788,
"step": 44600
},
{
"epoch": 0.15,
"learning_rate": 4.259808775654167e-05,
"loss": 2.7874,
"step": 44700
},
{
"epoch": 0.15,
"learning_rate": 4.25815286687487e-05,
"loss": 2.7711,
"step": 44800
},
{
"epoch": 0.15,
"learning_rate": 4.2564969580955726e-05,
"loss": 2.7841,
"step": 44900
},
{
"epoch": 0.15,
"learning_rate": 4.2548410493162756e-05,
"loss": 2.7632,
"step": 45000
},
{
"epoch": 0.15,
"learning_rate": 4.253185140536978e-05,
"loss": 2.7884,
"step": 45100
},
{
"epoch": 0.15,
"learning_rate": 4.251529231757681e-05,
"loss": 2.7688,
"step": 45200
},
{
"epoch": 0.15,
"learning_rate": 4.249873322978384e-05,
"loss": 2.7905,
"step": 45300
},
{
"epoch": 0.15,
"learning_rate": 4.2482174141990864e-05,
"loss": 2.7803,
"step": 45400
},
{
"epoch": 0.15,
"learning_rate": 4.2465615054197894e-05,
"loss": 2.7682,
"step": 45500
},
{
"epoch": 0.15,
"learning_rate": 4.2449055966404925e-05,
"loss": 2.7746,
"step": 45600
},
{
"epoch": 0.15,
"learning_rate": 4.2432496878611955e-05,
"loss": 2.7714,
"step": 45700
},
{
"epoch": 0.15,
"learning_rate": 4.2415937790818985e-05,
"loss": 2.7784,
"step": 45800
},
{
"epoch": 0.15,
"learning_rate": 4.239937870302601e-05,
"loss": 2.7796,
"step": 45900
},
{
"epoch": 0.15,
"learning_rate": 4.238281961523304e-05,
"loss": 2.768,
"step": 46000
},
{
"epoch": 0.15,
"learning_rate": 4.236626052744007e-05,
"loss": 2.787,
"step": 46100
},
{
"epoch": 0.15,
"learning_rate": 4.234970143964709e-05,
"loss": 2.7978,
"step": 46200
},
{
"epoch": 0.15,
"learning_rate": 4.233314235185412e-05,
"loss": 2.7752,
"step": 46300
},
{
"epoch": 0.15,
"learning_rate": 4.2316583264061147e-05,
"loss": 2.766,
"step": 46400
},
{
"epoch": 0.15,
"learning_rate": 4.230002417626818e-05,
"loss": 2.7893,
"step": 46500
},
{
"epoch": 0.15,
"learning_rate": 4.228346508847521e-05,
"loss": 2.778,
"step": 46600
},
{
"epoch": 0.15,
"learning_rate": 4.226690600068224e-05,
"loss": 2.7681,
"step": 46700
},
{
"epoch": 0.15,
"learning_rate": 4.225034691288927e-05,
"loss": 2.7622,
"step": 46800
},
{
"epoch": 0.16,
"learning_rate": 4.223378782509629e-05,
"loss": 2.7893,
"step": 46900
},
{
"epoch": 0.16,
"learning_rate": 4.221722873730332e-05,
"loss": 2.8002,
"step": 47000
},
{
"epoch": 0.16,
"learning_rate": 4.220066964951035e-05,
"loss": 2.7741,
"step": 47100
},
{
"epoch": 0.16,
"learning_rate": 4.2184110561717375e-05,
"loss": 2.7814,
"step": 47200
},
{
"epoch": 0.16,
"learning_rate": 4.2167551473924406e-05,
"loss": 2.7686,
"step": 47300
},
{
"epoch": 0.16,
"learning_rate": 4.215099238613143e-05,
"loss": 2.7809,
"step": 47400
},
{
"epoch": 0.16,
"learning_rate": 4.213443329833846e-05,
"loss": 2.7718,
"step": 47500
},
{
"epoch": 0.16,
"learning_rate": 4.21178742105455e-05,
"loss": 2.764,
"step": 47600
},
{
"epoch": 0.16,
"learning_rate": 4.210131512275252e-05,
"loss": 2.7808,
"step": 47700
},
{
"epoch": 0.16,
"learning_rate": 4.208475603495955e-05,
"loss": 2.7696,
"step": 47800
},
{
"epoch": 0.16,
"learning_rate": 4.2068196947166574e-05,
"loss": 2.7685,
"step": 47900
},
{
"epoch": 0.16,
"learning_rate": 4.2051637859373604e-05,
"loss": 2.773,
"step": 48000
},
{
"epoch": 0.16,
"learning_rate": 4.2035078771580635e-05,
"loss": 2.7705,
"step": 48100
},
{
"epoch": 0.16,
"learning_rate": 4.201851968378766e-05,
"loss": 2.7686,
"step": 48200
},
{
"epoch": 0.16,
"learning_rate": 4.200196059599469e-05,
"loss": 2.7807,
"step": 48300
},
{
"epoch": 0.16,
"learning_rate": 4.198540150820172e-05,
"loss": 2.7745,
"step": 48400
},
{
"epoch": 0.16,
"learning_rate": 4.196884242040874e-05,
"loss": 2.7956,
"step": 48500
},
{
"epoch": 0.16,
"learning_rate": 4.195228333261578e-05,
"loss": 2.7742,
"step": 48600
},
{
"epoch": 0.16,
"learning_rate": 4.19357242448228e-05,
"loss": 2.7747,
"step": 48700
},
{
"epoch": 0.16,
"learning_rate": 4.191916515702983e-05,
"loss": 2.7816,
"step": 48800
},
{
"epoch": 0.16,
"learning_rate": 4.1902606069236863e-05,
"loss": 2.7863,
"step": 48900
},
{
"epoch": 0.16,
"learning_rate": 4.188604698144389e-05,
"loss": 2.7786,
"step": 49000
},
{
"epoch": 0.16,
"learning_rate": 4.186948789365092e-05,
"loss": 2.775,
"step": 49100
},
{
"epoch": 0.16,
"learning_rate": 4.185292880585794e-05,
"loss": 2.7834,
"step": 49200
},
{
"epoch": 0.16,
"learning_rate": 4.183636971806497e-05,
"loss": 2.7661,
"step": 49300
},
{
"epoch": 0.16,
"learning_rate": 4.1819810630272e-05,
"loss": 2.7726,
"step": 49400
},
{
"epoch": 0.16,
"learning_rate": 4.1803251542479025e-05,
"loss": 2.7777,
"step": 49500
},
{
"epoch": 0.16,
"learning_rate": 4.178669245468606e-05,
"loss": 2.7784,
"step": 49600
},
{
"epoch": 0.16,
"learning_rate": 4.1770133366893085e-05,
"loss": 2.7934,
"step": 49700
},
{
"epoch": 0.16,
"learning_rate": 4.1753574279100116e-05,
"loss": 2.7814,
"step": 49800
},
{
"epoch": 0.17,
"learning_rate": 4.1737015191307146e-05,
"loss": 2.7681,
"step": 49900
},
{
"epoch": 0.17,
"learning_rate": 4.172045610351417e-05,
"loss": 2.7667,
"step": 50000
},
{
"epoch": 0.17,
"learning_rate": 4.17038970157212e-05,
"loss": 2.783,
"step": 50100
},
{
"epoch": 0.17,
"learning_rate": 4.168733792792823e-05,
"loss": 2.767,
"step": 50200
},
{
"epoch": 0.17,
"learning_rate": 4.1670778840135254e-05,
"loss": 2.7718,
"step": 50300
},
{
"epoch": 0.17,
"learning_rate": 4.1654219752342284e-05,
"loss": 2.7742,
"step": 50400
},
{
"epoch": 0.17,
"learning_rate": 4.163766066454931e-05,
"loss": 2.7618,
"step": 50500
},
{
"epoch": 0.17,
"learning_rate": 4.1621101576756345e-05,
"loss": 2.7596,
"step": 50600
},
{
"epoch": 0.17,
"learning_rate": 4.1604542488963375e-05,
"loss": 2.7777,
"step": 50700
},
{
"epoch": 0.17,
"learning_rate": 4.15879834011704e-05,
"loss": 2.7843,
"step": 50800
},
{
"epoch": 0.17,
"learning_rate": 4.157142431337743e-05,
"loss": 2.7709,
"step": 50900
},
{
"epoch": 0.17,
"learning_rate": 4.155486522558445e-05,
"loss": 2.7638,
"step": 51000
},
{
"epoch": 0.17,
"learning_rate": 4.153830613779148e-05,
"loss": 2.7725,
"step": 51100
},
{
"epoch": 0.17,
"learning_rate": 4.152174704999851e-05,
"loss": 2.7636,
"step": 51200
},
{
"epoch": 0.17,
"learning_rate": 4.1505187962205536e-05,
"loss": 2.7787,
"step": 51300
},
{
"epoch": 0.17,
"learning_rate": 4.148862887441257e-05,
"loss": 2.7779,
"step": 51400
},
{
"epoch": 0.17,
"learning_rate": 4.14720697866196e-05,
"loss": 2.7758,
"step": 51500
},
{
"epoch": 0.17,
"learning_rate": 4.145551069882663e-05,
"loss": 2.7652,
"step": 51600
},
{
"epoch": 0.17,
"learning_rate": 4.143895161103366e-05,
"loss": 2.7841,
"step": 51700
},
{
"epoch": 0.17,
"learning_rate": 4.142239252324068e-05,
"loss": 2.7681,
"step": 51800
},
{
"epoch": 0.17,
"learning_rate": 4.140583343544771e-05,
"loss": 2.7802,
"step": 51900
},
{
"epoch": 0.17,
"learning_rate": 4.138927434765474e-05,
"loss": 2.7882,
"step": 52000
},
{
"epoch": 0.17,
"learning_rate": 4.1372715259861765e-05,
"loss": 2.781,
"step": 52100
},
{
"epoch": 0.17,
"learning_rate": 4.1356156172068796e-05,
"loss": 2.7906,
"step": 52200
},
{
"epoch": 0.17,
"learning_rate": 4.133959708427582e-05,
"loss": 2.7847,
"step": 52300
},
{
"epoch": 0.17,
"learning_rate": 4.132303799648285e-05,
"loss": 2.7832,
"step": 52400
},
{
"epoch": 0.17,
"learning_rate": 4.130647890868988e-05,
"loss": 2.7637,
"step": 52500
},
{
"epoch": 0.17,
"learning_rate": 4.128991982089691e-05,
"loss": 2.7748,
"step": 52600
},
{
"epoch": 0.17,
"learning_rate": 4.127336073310394e-05,
"loss": 2.7669,
"step": 52700
},
{
"epoch": 0.17,
"learning_rate": 4.1256801645310964e-05,
"loss": 2.7726,
"step": 52800
},
{
"epoch": 0.18,
"learning_rate": 4.1240242557517994e-05,
"loss": 2.7811,
"step": 52900
},
{
"epoch": 0.18,
"learning_rate": 4.1223683469725024e-05,
"loss": 2.7769,
"step": 53000
},
{
"epoch": 0.18,
"learning_rate": 4.120712438193205e-05,
"loss": 2.7668,
"step": 53100
},
{
"epoch": 0.18,
"learning_rate": 4.119056529413908e-05,
"loss": 2.786,
"step": 53200
},
{
"epoch": 0.18,
"learning_rate": 4.117400620634611e-05,
"loss": 2.7671,
"step": 53300
},
{
"epoch": 0.18,
"learning_rate": 4.115744711855313e-05,
"loss": 2.7594,
"step": 53400
},
{
"epoch": 0.18,
"learning_rate": 4.114088803076016e-05,
"loss": 2.7735,
"step": 53500
},
{
"epoch": 0.18,
"learning_rate": 4.112432894296719e-05,
"loss": 2.7822,
"step": 53600
},
{
"epoch": 0.18,
"learning_rate": 4.110776985517422e-05,
"loss": 2.7798,
"step": 53700
},
{
"epoch": 0.18,
"learning_rate": 4.1091210767381246e-05,
"loss": 2.7783,
"step": 53800
},
{
"epoch": 0.18,
"learning_rate": 4.107465167958828e-05,
"loss": 2.7799,
"step": 53900
},
{
"epoch": 0.18,
"learning_rate": 4.105809259179531e-05,
"loss": 2.7772,
"step": 54000
},
{
"epoch": 0.18,
"learning_rate": 4.104153350400233e-05,
"loss": 2.7765,
"step": 54100
},
{
"epoch": 0.18,
"learning_rate": 4.102497441620936e-05,
"loss": 2.7779,
"step": 54200
},
{
"epoch": 0.18,
"learning_rate": 4.100841532841639e-05,
"loss": 2.7596,
"step": 54300
},
{
"epoch": 0.18,
"learning_rate": 4.0991856240623415e-05,
"loss": 2.7687,
"step": 54400
},
{
"epoch": 0.18,
"learning_rate": 4.0975297152830445e-05,
"loss": 2.7869,
"step": 54500
},
{
"epoch": 0.18,
"learning_rate": 4.0958738065037475e-05,
"loss": 2.7692,
"step": 54600
},
{
"epoch": 0.18,
"learning_rate": 4.0942178977244506e-05,
"loss": 2.7662,
"step": 54700
},
{
"epoch": 0.18,
"learning_rate": 4.0925619889451536e-05,
"loss": 2.7748,
"step": 54800
},
{
"epoch": 0.18,
"learning_rate": 4.090906080165856e-05,
"loss": 2.7703,
"step": 54900
},
{
"epoch": 0.18,
"learning_rate": 4.089250171386559e-05,
"loss": 2.7659,
"step": 55000
},
{
"epoch": 0.18,
"learning_rate": 4.087594262607261e-05,
"loss": 2.7831,
"step": 55100
},
{
"epoch": 0.18,
"learning_rate": 4.0859383538279644e-05,
"loss": 2.7628,
"step": 55200
},
{
"epoch": 0.18,
"learning_rate": 4.0842824450486674e-05,
"loss": 2.7835,
"step": 55300
},
{
"epoch": 0.18,
"learning_rate": 4.08262653626937e-05,
"loss": 2.7772,
"step": 55400
},
{
"epoch": 0.18,
"learning_rate": 4.080970627490073e-05,
"loss": 2.7799,
"step": 55500
},
{
"epoch": 0.18,
"learning_rate": 4.079314718710776e-05,
"loss": 2.8003,
"step": 55600
},
{
"epoch": 0.18,
"learning_rate": 4.077658809931479e-05,
"loss": 2.7896,
"step": 55700
},
{
"epoch": 0.18,
"learning_rate": 4.076002901152182e-05,
"loss": 2.7685,
"step": 55800
},
{
"epoch": 0.19,
"learning_rate": 4.074346992372884e-05,
"loss": 2.7851,
"step": 55900
},
{
"epoch": 0.19,
"learning_rate": 4.072691083593587e-05,
"loss": 2.78,
"step": 56000
},
{
"epoch": 0.19,
"learning_rate": 4.07103517481429e-05,
"loss": 2.7783,
"step": 56100
},
{
"epoch": 0.19,
"learning_rate": 4.0693792660349926e-05,
"loss": 2.7738,
"step": 56200
},
{
"epoch": 0.19,
"learning_rate": 4.0677233572556957e-05,
"loss": 2.7777,
"step": 56300
},
{
"epoch": 0.19,
"learning_rate": 4.066067448476398e-05,
"loss": 2.7716,
"step": 56400
},
{
"epoch": 0.19,
"learning_rate": 4.064411539697101e-05,
"loss": 2.7767,
"step": 56500
},
{
"epoch": 0.19,
"learning_rate": 4.062755630917805e-05,
"loss": 2.7845,
"step": 56600
},
{
"epoch": 0.19,
"learning_rate": 4.061099722138507e-05,
"loss": 2.7696,
"step": 56700
},
{
"epoch": 0.19,
"learning_rate": 4.05944381335921e-05,
"loss": 2.7712,
"step": 56800
},
{
"epoch": 0.19,
"learning_rate": 4.0577879045799125e-05,
"loss": 2.7781,
"step": 56900
},
{
"epoch": 0.19,
"learning_rate": 4.0561319958006155e-05,
"loss": 2.7841,
"step": 57000
},
{
"epoch": 0.19,
"learning_rate": 4.0544760870213185e-05,
"loss": 2.7655,
"step": 57100
},
{
"epoch": 0.19,
"learning_rate": 4.052820178242021e-05,
"loss": 2.7694,
"step": 57200
},
{
"epoch": 0.19,
"learning_rate": 4.051164269462724e-05,
"loss": 2.7764,
"step": 57300
},
{
"epoch": 0.19,
"learning_rate": 4.049508360683427e-05,
"loss": 2.7616,
"step": 57400
},
{
"epoch": 0.19,
"learning_rate": 4.047852451904129e-05,
"loss": 2.7696,
"step": 57500
},
{
"epoch": 0.19,
"learning_rate": 4.046196543124833e-05,
"loss": 2.7623,
"step": 57600
},
{
"epoch": 0.19,
"learning_rate": 4.0445406343455354e-05,
"loss": 2.7686,
"step": 57700
},
{
"epoch": 0.19,
"learning_rate": 4.0428847255662384e-05,
"loss": 2.7781,
"step": 57800
},
{
"epoch": 0.19,
"learning_rate": 4.0412288167869414e-05,
"loss": 2.7686,
"step": 57900
},
{
"epoch": 0.19,
"learning_rate": 4.039572908007644e-05,
"loss": 2.7831,
"step": 58000
},
{
"epoch": 0.19,
"learning_rate": 4.037916999228347e-05,
"loss": 2.7731,
"step": 58100
},
{
"epoch": 0.19,
"learning_rate": 4.036261090449049e-05,
"loss": 2.7776,
"step": 58200
},
{
"epoch": 0.19,
"learning_rate": 4.034605181669752e-05,
"loss": 2.7802,
"step": 58300
},
{
"epoch": 0.19,
"learning_rate": 4.032949272890455e-05,
"loss": 2.7728,
"step": 58400
},
{
"epoch": 0.19,
"learning_rate": 4.0312933641111576e-05,
"loss": 2.761,
"step": 58500
},
{
"epoch": 0.19,
"learning_rate": 4.029637455331861e-05,
"loss": 2.7785,
"step": 58600
},
{
"epoch": 0.19,
"learning_rate": 4.0279815465525636e-05,
"loss": 2.7761,
"step": 58700
},
{
"epoch": 0.19,
"learning_rate": 4.0263256377732667e-05,
"loss": 2.76,
"step": 58800
},
{
"epoch": 0.2,
"learning_rate": 4.02466972899397e-05,
"loss": 2.7783,
"step": 58900
},
{
"epoch": 0.2,
"learning_rate": 4.023013820214672e-05,
"loss": 2.7653,
"step": 59000
},
{
"epoch": 0.2,
"learning_rate": 4.021357911435375e-05,
"loss": 2.7826,
"step": 59100
},
{
"epoch": 0.2,
"learning_rate": 4.019702002656078e-05,
"loss": 2.7748,
"step": 59200
},
{
"epoch": 0.2,
"learning_rate": 4.0180460938767805e-05,
"loss": 2.7869,
"step": 59300
},
{
"epoch": 0.2,
"learning_rate": 4.0163901850974835e-05,
"loss": 2.7672,
"step": 59400
},
{
"epoch": 0.2,
"learning_rate": 4.014734276318186e-05,
"loss": 2.7845,
"step": 59500
},
{
"epoch": 0.2,
"learning_rate": 4.0130783675388895e-05,
"loss": 2.7661,
"step": 59600
},
{
"epoch": 0.2,
"learning_rate": 4.011422458759592e-05,
"loss": 2.7639,
"step": 59700
},
{
"epoch": 0.2,
"learning_rate": 4.009766549980295e-05,
"loss": 2.7762,
"step": 59800
},
{
"epoch": 0.2,
"learning_rate": 4.008110641200998e-05,
"loss": 2.7801,
"step": 59900
},
{
"epoch": 0.2,
"learning_rate": 4.0064547324217e-05,
"loss": 2.7627,
"step": 60000
},
{
"epoch": 0.2,
"learning_rate": 4.004798823642403e-05,
"loss": 2.7793,
"step": 60100
},
{
"epoch": 0.2,
"learning_rate": 4.0031429148631064e-05,
"loss": 2.7856,
"step": 60200
},
{
"epoch": 0.2,
"learning_rate": 4.001487006083809e-05,
"loss": 2.784,
"step": 60300
},
{
"epoch": 0.2,
"learning_rate": 3.999831097304512e-05,
"loss": 2.7667,
"step": 60400
},
{
"epoch": 0.2,
"learning_rate": 3.998175188525215e-05,
"loss": 2.777,
"step": 60500
},
{
"epoch": 0.2,
"learning_rate": 3.996519279745918e-05,
"loss": 2.7757,
"step": 60600
},
{
"epoch": 0.2,
"learning_rate": 3.994863370966621e-05,
"loss": 2.7821,
"step": 60700
},
{
"epoch": 0.2,
"learning_rate": 3.993207462187323e-05,
"loss": 2.7787,
"step": 60800
},
{
"epoch": 0.2,
"learning_rate": 3.991551553408026e-05,
"loss": 2.7794,
"step": 60900
},
{
"epoch": 0.2,
"learning_rate": 3.9898956446287286e-05,
"loss": 2.7627,
"step": 61000
},
{
"epoch": 0.2,
"learning_rate": 3.9882397358494316e-05,
"loss": 2.7562,
"step": 61100
},
{
"epoch": 0.2,
"learning_rate": 3.9865838270701346e-05,
"loss": 2.7877,
"step": 61200
},
{
"epoch": 0.2,
"learning_rate": 3.984927918290837e-05,
"loss": 2.7775,
"step": 61300
},
{
"epoch": 0.2,
"learning_rate": 3.98327200951154e-05,
"loss": 2.7629,
"step": 61400
},
{
"epoch": 0.2,
"learning_rate": 3.981616100732243e-05,
"loss": 2.7684,
"step": 61500
},
{
"epoch": 0.2,
"learning_rate": 3.979960191952946e-05,
"loss": 2.7833,
"step": 61600
},
{
"epoch": 0.2,
"learning_rate": 3.978304283173649e-05,
"loss": 2.7686,
"step": 61700
},
{
"epoch": 0.2,
"learning_rate": 3.9766483743943515e-05,
"loss": 2.7748,
"step": 61800
},
{
"epoch": 0.21,
"learning_rate": 3.9749924656150545e-05,
"loss": 2.7687,
"step": 61900
},
{
"epoch": 0.21,
"learning_rate": 3.9733365568357575e-05,
"loss": 2.7628,
"step": 62000
},
{
"epoch": 0.21,
"learning_rate": 3.97168064805646e-05,
"loss": 2.7854,
"step": 62100
},
{
"epoch": 0.21,
"learning_rate": 3.970024739277163e-05,
"loss": 2.7701,
"step": 62200
},
{
"epoch": 0.21,
"learning_rate": 3.968368830497865e-05,
"loss": 2.7823,
"step": 62300
},
{
"epoch": 0.21,
"learning_rate": 3.966712921718568e-05,
"loss": 2.7639,
"step": 62400
},
{
"epoch": 0.21,
"learning_rate": 3.965057012939271e-05,
"loss": 2.7754,
"step": 62500
},
{
"epoch": 0.21,
"learning_rate": 3.9634011041599743e-05,
"loss": 2.7737,
"step": 62600
},
{
"epoch": 0.21,
"learning_rate": 3.9617451953806774e-05,
"loss": 2.7724,
"step": 62700
},
{
"epoch": 0.21,
"learning_rate": 3.96008928660138e-05,
"loss": 2.7747,
"step": 62800
},
{
"epoch": 0.21,
"learning_rate": 3.958433377822083e-05,
"loss": 2.7731,
"step": 62900
},
{
"epoch": 0.21,
"learning_rate": 3.956777469042786e-05,
"loss": 2.7666,
"step": 63000
},
{
"epoch": 0.21,
"learning_rate": 3.955121560263488e-05,
"loss": 2.7722,
"step": 63100
},
{
"epoch": 0.21,
"learning_rate": 3.953465651484191e-05,
"loss": 2.7725,
"step": 63200
},
{
"epoch": 0.21,
"learning_rate": 3.951809742704894e-05,
"loss": 2.7758,
"step": 63300
},
{
"epoch": 0.21,
"learning_rate": 3.9501538339255965e-05,
"loss": 2.7809,
"step": 63400
},
{
"epoch": 0.21,
"learning_rate": 3.9484979251462996e-05,
"loss": 2.785,
"step": 63500
},
{
"epoch": 0.21,
"learning_rate": 3.9468420163670026e-05,
"loss": 2.7749,
"step": 63600
},
{
"epoch": 0.21,
"learning_rate": 3.9451861075877056e-05,
"loss": 2.7759,
"step": 63700
},
{
"epoch": 0.21,
"learning_rate": 3.943530198808409e-05,
"loss": 2.7773,
"step": 63800
},
{
"epoch": 0.21,
"learning_rate": 3.941874290029111e-05,
"loss": 2.7561,
"step": 63900
},
{
"epoch": 0.21,
"learning_rate": 3.940218381249814e-05,
"loss": 2.7739,
"step": 64000
},
{
"epoch": 0.21,
"learning_rate": 3.9385624724705164e-05,
"loss": 2.7602,
"step": 64100
},
{
"epoch": 0.21,
"learning_rate": 3.9369065636912194e-05,
"loss": 2.7807,
"step": 64200
},
{
"epoch": 0.21,
"learning_rate": 3.9352506549119225e-05,
"loss": 2.7672,
"step": 64300
},
{
"epoch": 0.21,
"learning_rate": 3.933594746132625e-05,
"loss": 2.7706,
"step": 64400
},
{
"epoch": 0.21,
"learning_rate": 3.931938837353328e-05,
"loss": 2.7772,
"step": 64500
},
{
"epoch": 0.21,
"learning_rate": 3.930282928574031e-05,
"loss": 2.7707,
"step": 64600
},
{
"epoch": 0.21,
"learning_rate": 3.928627019794734e-05,
"loss": 2.7742,
"step": 64700
},
{
"epoch": 0.21,
"learning_rate": 3.926971111015437e-05,
"loss": 2.7632,
"step": 64800
},
{
"epoch": 0.21,
"learning_rate": 3.925315202236139e-05,
"loss": 2.7765,
"step": 64900
},
{
"epoch": 0.22,
"learning_rate": 3.923659293456842e-05,
"loss": 2.7612,
"step": 65000
},
{
"epoch": 0.22,
"learning_rate": 3.9220033846775453e-05,
"loss": 2.7697,
"step": 65100
},
{
"epoch": 0.22,
"learning_rate": 3.920347475898248e-05,
"loss": 2.7762,
"step": 65200
},
{
"epoch": 0.22,
"learning_rate": 3.918691567118951e-05,
"loss": 2.7635,
"step": 65300
},
{
"epoch": 0.22,
"learning_rate": 3.917035658339653e-05,
"loss": 2.7821,
"step": 65400
},
{
"epoch": 0.22,
"learning_rate": 3.915379749560356e-05,
"loss": 2.7695,
"step": 65500
},
{
"epoch": 0.22,
"learning_rate": 3.913723840781059e-05,
"loss": 2.7734,
"step": 65600
},
{
"epoch": 0.22,
"learning_rate": 3.912067932001762e-05,
"loss": 2.7684,
"step": 65700
},
{
"epoch": 0.22,
"learning_rate": 3.910412023222465e-05,
"loss": 2.7958,
"step": 65800
},
{
"epoch": 0.22,
"learning_rate": 3.9087561144431676e-05,
"loss": 2.7738,
"step": 65900
},
{
"epoch": 0.22,
"learning_rate": 3.9071002056638706e-05,
"loss": 2.7866,
"step": 66000
},
{
"epoch": 0.22,
"learning_rate": 3.9054442968845736e-05,
"loss": 2.7766,
"step": 66100
},
{
"epoch": 0.22,
"learning_rate": 3.903788388105276e-05,
"loss": 2.7637,
"step": 66200
},
{
"epoch": 0.22,
"learning_rate": 3.902132479325979e-05,
"loss": 2.7692,
"step": 66300
},
{
"epoch": 0.22,
"learning_rate": 3.900476570546682e-05,
"loss": 2.7877,
"step": 66400
},
{
"epoch": 0.22,
"learning_rate": 3.8988206617673844e-05,
"loss": 2.778,
"step": 66500
},
{
"epoch": 0.22,
"learning_rate": 3.897164752988088e-05,
"loss": 2.767,
"step": 66600
},
{
"epoch": 0.22,
"learning_rate": 3.8955088442087904e-05,
"loss": 2.7792,
"step": 66700
},
{
"epoch": 0.22,
"learning_rate": 3.8938529354294935e-05,
"loss": 2.7619,
"step": 66800
},
{
"epoch": 0.22,
"learning_rate": 3.892197026650196e-05,
"loss": 2.7858,
"step": 66900
},
{
"epoch": 0.22,
"learning_rate": 3.890541117870899e-05,
"loss": 2.7774,
"step": 67000
},
{
"epoch": 0.22,
"learning_rate": 3.888885209091602e-05,
"loss": 2.78,
"step": 67100
},
{
"epoch": 0.22,
"learning_rate": 3.887229300312304e-05,
"loss": 2.77,
"step": 67200
},
{
"epoch": 0.22,
"learning_rate": 3.885573391533007e-05,
"loss": 2.7834,
"step": 67300
},
{
"epoch": 0.22,
"learning_rate": 3.88391748275371e-05,
"loss": 2.7817,
"step": 67400
},
{
"epoch": 0.22,
"learning_rate": 3.8822615739744126e-05,
"loss": 2.7656,
"step": 67500
},
{
"epoch": 0.22,
"learning_rate": 3.8806056651951164e-05,
"loss": 2.7873,
"step": 67600
},
{
"epoch": 0.22,
"learning_rate": 3.878949756415819e-05,
"loss": 2.7568,
"step": 67700
},
{
"epoch": 0.22,
"learning_rate": 3.877293847636522e-05,
"loss": 2.7832,
"step": 67800
},
{
"epoch": 0.22,
"learning_rate": 3.875637938857225e-05,
"loss": 2.7802,
"step": 67900
},
{
"epoch": 0.23,
"learning_rate": 3.873982030077927e-05,
"loss": 2.7669,
"step": 68000
},
{
"epoch": 0.23,
"learning_rate": 3.87232612129863e-05,
"loss": 2.7748,
"step": 68100
},
{
"epoch": 0.23,
"learning_rate": 3.8706702125193325e-05,
"loss": 2.767,
"step": 68200
},
{
"epoch": 0.23,
"learning_rate": 3.8690143037400355e-05,
"loss": 2.7727,
"step": 68300
},
{
"epoch": 0.23,
"learning_rate": 3.8673583949607386e-05,
"loss": 2.7725,
"step": 68400
},
{
"epoch": 0.23,
"learning_rate": 3.865702486181441e-05,
"loss": 2.7773,
"step": 68500
},
{
"epoch": 0.23,
"learning_rate": 3.8640465774021446e-05,
"loss": 2.7834,
"step": 68600
},
{
"epoch": 0.23,
"learning_rate": 3.862390668622847e-05,
"loss": 2.7777,
"step": 68700
},
{
"epoch": 0.23,
"learning_rate": 3.86073475984355e-05,
"loss": 2.7757,
"step": 68800
},
{
"epoch": 0.23,
"learning_rate": 3.859078851064253e-05,
"loss": 2.7624,
"step": 68900
},
{
"epoch": 0.23,
"learning_rate": 3.8574229422849554e-05,
"loss": 2.7765,
"step": 69000
},
{
"epoch": 0.23,
"learning_rate": 3.8557670335056584e-05,
"loss": 2.7782,
"step": 69100
},
{
"epoch": 0.23,
"learning_rate": 3.8541111247263614e-05,
"loss": 2.7757,
"step": 69200
},
{
"epoch": 0.23,
"learning_rate": 3.852455215947064e-05,
"loss": 2.775,
"step": 69300
},
{
"epoch": 0.23,
"learning_rate": 3.850799307167767e-05,
"loss": 2.7646,
"step": 69400
},
{
"epoch": 0.23,
"learning_rate": 3.849143398388469e-05,
"loss": 2.7669,
"step": 69500
},
{
"epoch": 0.23,
"learning_rate": 3.847487489609173e-05,
"loss": 2.7643,
"step": 69600
},
{
"epoch": 0.23,
"learning_rate": 3.845831580829876e-05,
"loss": 2.7815,
"step": 69700
},
{
"epoch": 0.23,
"learning_rate": 3.844175672050578e-05,
"loss": 2.7665,
"step": 69800
},
{
"epoch": 0.23,
"learning_rate": 3.842519763271281e-05,
"loss": 2.7642,
"step": 69900
},
{
"epoch": 0.23,
"learning_rate": 3.8408638544919837e-05,
"loss": 2.7747,
"step": 70000
},
{
"epoch": 0.23,
"learning_rate": 3.839207945712687e-05,
"loss": 2.7862,
"step": 70100
},
{
"epoch": 0.23,
"learning_rate": 3.83755203693339e-05,
"loss": 2.755,
"step": 70200
},
{
"epoch": 0.23,
"learning_rate": 3.835896128154092e-05,
"loss": 2.7713,
"step": 70300
},
{
"epoch": 0.23,
"learning_rate": 3.834240219374795e-05,
"loss": 2.7665,
"step": 70400
},
{
"epoch": 0.23,
"learning_rate": 3.832584310595498e-05,
"loss": 2.7831,
"step": 70500
},
{
"epoch": 0.23,
"learning_rate": 3.830928401816201e-05,
"loss": 2.774,
"step": 70600
},
{
"epoch": 0.23,
"learning_rate": 3.829272493036904e-05,
"loss": 2.7802,
"step": 70700
},
{
"epoch": 0.23,
"learning_rate": 3.8276165842576065e-05,
"loss": 2.7653,
"step": 70800
},
{
"epoch": 0.23,
"learning_rate": 3.8259606754783096e-05,
"loss": 2.7576,
"step": 70900
},
{
"epoch": 0.24,
"learning_rate": 3.8243047666990126e-05,
"loss": 2.7891,
"step": 71000
},
{
"epoch": 0.24,
"learning_rate": 3.822648857919715e-05,
"loss": 2.7715,
"step": 71100
},
{
"epoch": 0.24,
"learning_rate": 3.820992949140418e-05,
"loss": 2.764,
"step": 71200
},
{
"epoch": 0.24,
"learning_rate": 3.81933704036112e-05,
"loss": 2.7699,
"step": 71300
},
{
"epoch": 0.24,
"learning_rate": 3.8176811315818234e-05,
"loss": 2.7682,
"step": 71400
},
{
"epoch": 0.24,
"learning_rate": 3.8160252228025264e-05,
"loss": 2.7829,
"step": 71500
},
{
"epoch": 0.24,
"learning_rate": 3.8143693140232294e-05,
"loss": 2.7736,
"step": 71600
},
{
"epoch": 0.24,
"learning_rate": 3.8127134052439325e-05,
"loss": 2.789,
"step": 71700
},
{
"epoch": 0.24,
"learning_rate": 3.811057496464635e-05,
"loss": 2.7784,
"step": 71800
},
{
"epoch": 0.24,
"learning_rate": 3.809401587685338e-05,
"loss": 2.7782,
"step": 71900
},
{
"epoch": 0.24,
"learning_rate": 3.807745678906041e-05,
"loss": 2.776,
"step": 72000
},
{
"epoch": 0.24,
"learning_rate": 3.806089770126743e-05,
"loss": 2.7795,
"step": 72100
},
{
"epoch": 0.24,
"learning_rate": 3.804433861347446e-05,
"loss": 2.7474,
"step": 72200
},
{
"epoch": 0.24,
"learning_rate": 3.802777952568149e-05,
"loss": 2.7784,
"step": 72300
},
{
"epoch": 0.24,
"learning_rate": 3.8011220437888516e-05,
"loss": 2.7837,
"step": 72400
},
{
"epoch": 0.24,
"learning_rate": 3.7994661350095547e-05,
"loss": 2.7615,
"step": 72500
},
{
"epoch": 0.24,
"learning_rate": 3.797810226230258e-05,
"loss": 2.7742,
"step": 72600
},
{
"epoch": 0.24,
"learning_rate": 3.796154317450961e-05,
"loss": 2.7735,
"step": 72700
},
{
"epoch": 0.24,
"learning_rate": 3.794498408671663e-05,
"loss": 2.7739,
"step": 72800
},
{
"epoch": 0.24,
"learning_rate": 3.792842499892366e-05,
"loss": 2.7716,
"step": 72900
},
{
"epoch": 0.24,
"learning_rate": 3.791186591113069e-05,
"loss": 2.7806,
"step": 73000
},
{
"epoch": 0.24,
"learning_rate": 3.7895306823337715e-05,
"loss": 2.7703,
"step": 73100
},
{
"epoch": 0.24,
"learning_rate": 3.7878747735544745e-05,
"loss": 2.7753,
"step": 73200
},
{
"epoch": 0.24,
"learning_rate": 3.7862188647751775e-05,
"loss": 2.7746,
"step": 73300
},
{
"epoch": 0.24,
"learning_rate": 3.78456295599588e-05,
"loss": 2.7836,
"step": 73400
},
{
"epoch": 0.24,
"learning_rate": 3.782907047216583e-05,
"loss": 2.7821,
"step": 73500
},
{
"epoch": 0.24,
"learning_rate": 3.781251138437286e-05,
"loss": 2.7711,
"step": 73600
},
{
"epoch": 0.24,
"learning_rate": 3.779595229657989e-05,
"loss": 2.762,
"step": 73700
},
{
"epoch": 0.24,
"learning_rate": 3.777939320878692e-05,
"loss": 2.7759,
"step": 73800
},
{
"epoch": 0.24,
"learning_rate": 3.7762834120993944e-05,
"loss": 2.7617,
"step": 73900
},
{
"epoch": 0.25,
"learning_rate": 3.7746275033200974e-05,
"loss": 2.7818,
"step": 74000
},
{
"epoch": 0.25,
"learning_rate": 3.7729715945408e-05,
"loss": 2.7662,
"step": 74100
},
{
"epoch": 0.25,
"learning_rate": 3.771315685761503e-05,
"loss": 2.783,
"step": 74200
},
{
"epoch": 0.25,
"learning_rate": 3.769659776982206e-05,
"loss": 2.773,
"step": 74300
},
{
"epoch": 0.25,
"learning_rate": 3.768003868202908e-05,
"loss": 2.7712,
"step": 74400
},
{
"epoch": 0.25,
"learning_rate": 3.766347959423611e-05,
"loss": 2.7709,
"step": 74500
},
{
"epoch": 0.25,
"learning_rate": 3.764692050644314e-05,
"loss": 2.7641,
"step": 74600
},
{
"epoch": 0.25,
"learning_rate": 3.763036141865017e-05,
"loss": 2.7659,
"step": 74700
},
{
"epoch": 0.25,
"learning_rate": 3.76138023308572e-05,
"loss": 2.7594,
"step": 74800
},
{
"epoch": 0.25,
"learning_rate": 3.7597243243064226e-05,
"loss": 2.7561,
"step": 74900
},
{
"epoch": 0.25,
"learning_rate": 3.758068415527126e-05,
"loss": 2.7747,
"step": 75000
},
{
"epoch": 0.25,
"learning_rate": 3.756412506747829e-05,
"loss": 2.7664,
"step": 75100
},
{
"epoch": 0.25,
"learning_rate": 3.754756597968531e-05,
"loss": 2.7907,
"step": 75200
},
{
"epoch": 0.25,
"learning_rate": 3.753100689189234e-05,
"loss": 2.7689,
"step": 75300
},
{
"epoch": 0.25,
"learning_rate": 3.7514447804099364e-05,
"loss": 2.7684,
"step": 75400
},
{
"epoch": 0.25,
"learning_rate": 3.7497888716306395e-05,
"loss": 2.7677,
"step": 75500
},
{
"epoch": 0.25,
"learning_rate": 3.748132962851343e-05,
"loss": 2.7735,
"step": 75600
},
{
"epoch": 0.25,
"learning_rate": 3.7464770540720455e-05,
"loss": 2.757,
"step": 75700
},
{
"epoch": 0.25,
"learning_rate": 3.7448211452927485e-05,
"loss": 2.7717,
"step": 75800
},
{
"epoch": 0.25,
"learning_rate": 3.743165236513451e-05,
"loss": 2.7637,
"step": 75900
},
{
"epoch": 0.25,
"learning_rate": 3.741509327734154e-05,
"loss": 2.7833,
"step": 76000
},
{
"epoch": 0.25,
"learning_rate": 3.739853418954857e-05,
"loss": 2.7627,
"step": 76100
},
{
"epoch": 0.25,
"learning_rate": 3.738197510175559e-05,
"loss": 2.7698,
"step": 76200
},
{
"epoch": 0.25,
"learning_rate": 3.7365416013962623e-05,
"loss": 2.7685,
"step": 76300
},
{
"epoch": 0.25,
"learning_rate": 3.7348856926169654e-05,
"loss": 2.7677,
"step": 76400
},
{
"epoch": 0.25,
"learning_rate": 3.733229783837668e-05,
"loss": 2.7688,
"step": 76500
},
{
"epoch": 0.25,
"learning_rate": 3.7315738750583714e-05,
"loss": 2.7634,
"step": 76600
},
{
"epoch": 0.25,
"learning_rate": 3.729917966279074e-05,
"loss": 2.7738,
"step": 76700
},
{
"epoch": 0.25,
"learning_rate": 3.728262057499777e-05,
"loss": 2.7739,
"step": 76800
},
{
"epoch": 0.25,
"learning_rate": 3.72660614872048e-05,
"loss": 2.7631,
"step": 76900
},
{
"epoch": 0.26,
"learning_rate": 3.724950239941182e-05,
"loss": 2.7487,
"step": 77000
},
{
"epoch": 0.26,
"learning_rate": 3.723294331161885e-05,
"loss": 2.7657,
"step": 77100
},
{
"epoch": 0.26,
"learning_rate": 3.7216384223825876e-05,
"loss": 2.7645,
"step": 77200
},
{
"epoch": 0.26,
"learning_rate": 3.7199825136032906e-05,
"loss": 2.7838,
"step": 77300
},
{
"epoch": 0.26,
"learning_rate": 3.7183266048239936e-05,
"loss": 2.7717,
"step": 77400
},
{
"epoch": 0.26,
"learning_rate": 3.716670696044696e-05,
"loss": 2.7816,
"step": 77500
},
{
"epoch": 0.26,
"learning_rate": 3.7150147872654e-05,
"loss": 2.7741,
"step": 77600
},
{
"epoch": 0.26,
"learning_rate": 3.713358878486102e-05,
"loss": 2.7727,
"step": 77700
},
{
"epoch": 0.26,
"learning_rate": 3.711702969706805e-05,
"loss": 2.7721,
"step": 77800
},
{
"epoch": 0.26,
"learning_rate": 3.710047060927508e-05,
"loss": 2.7768,
"step": 77900
},
{
"epoch": 0.26,
"learning_rate": 3.7083911521482105e-05,
"loss": 2.7654,
"step": 78000
},
{
"epoch": 0.26,
"learning_rate": 3.7067352433689135e-05,
"loss": 2.7549,
"step": 78100
},
{
"epoch": 0.26,
"learning_rate": 3.7050793345896165e-05,
"loss": 2.7627,
"step": 78200
},
{
"epoch": 0.26,
"learning_rate": 3.703423425810319e-05,
"loss": 2.7784,
"step": 78300
},
{
"epoch": 0.26,
"learning_rate": 3.701767517031022e-05,
"loss": 2.7635,
"step": 78400
},
{
"epoch": 0.26,
"learning_rate": 3.700111608251724e-05,
"loss": 2.7707,
"step": 78500
},
{
"epoch": 0.26,
"learning_rate": 3.698455699472428e-05,
"loss": 2.7545,
"step": 78600
},
{
"epoch": 0.26,
"learning_rate": 3.69679979069313e-05,
"loss": 2.7658,
"step": 78700
},
{
"epoch": 0.26,
"learning_rate": 3.6951438819138333e-05,
"loss": 2.7701,
"step": 78800
},
{
"epoch": 0.26,
"learning_rate": 3.6934879731345364e-05,
"loss": 2.7816,
"step": 78900
},
{
"epoch": 0.26,
"learning_rate": 3.691832064355239e-05,
"loss": 2.771,
"step": 79000
},
{
"epoch": 0.26,
"learning_rate": 3.690176155575942e-05,
"loss": 2.7745,
"step": 79100
},
{
"epoch": 0.26,
"learning_rate": 3.688520246796645e-05,
"loss": 2.7756,
"step": 79200
},
{
"epoch": 0.26,
"learning_rate": 3.686864338017347e-05,
"loss": 2.7655,
"step": 79300
},
{
"epoch": 0.26,
"learning_rate": 3.68520842923805e-05,
"loss": 2.7838,
"step": 79400
},
{
"epoch": 0.26,
"learning_rate": 3.683552520458753e-05,
"loss": 2.7654,
"step": 79500
},
{
"epoch": 0.26,
"learning_rate": 3.681896611679456e-05,
"loss": 2.7728,
"step": 79600
},
{
"epoch": 0.26,
"learning_rate": 3.680240702900159e-05,
"loss": 2.7612,
"step": 79700
},
{
"epoch": 0.26,
"learning_rate": 3.6785847941208616e-05,
"loss": 2.7618,
"step": 79800
},
{
"epoch": 0.26,
"learning_rate": 3.6769288853415646e-05,
"loss": 2.7593,
"step": 79900
},
{
"epoch": 0.26,
"learning_rate": 3.675272976562267e-05,
"loss": 2.7689,
"step": 80000
},
{
"epoch": 0.27,
"learning_rate": 3.67361706778297e-05,
"loss": 2.7535,
"step": 80100
},
{
"epoch": 0.27,
"learning_rate": 3.671961159003673e-05,
"loss": 2.7723,
"step": 80200
},
{
"epoch": 0.27,
"learning_rate": 3.6703052502243754e-05,
"loss": 2.759,
"step": 80300
},
{
"epoch": 0.27,
"learning_rate": 3.6686493414450784e-05,
"loss": 2.7656,
"step": 80400
},
{
"epoch": 0.27,
"learning_rate": 3.6669934326657815e-05,
"loss": 2.7597,
"step": 80500
},
{
"epoch": 0.27,
"learning_rate": 3.6653375238864845e-05,
"loss": 2.7767,
"step": 80600
},
{
"epoch": 0.27,
"learning_rate": 3.6636816151071875e-05,
"loss": 2.7654,
"step": 80700
},
{
"epoch": 0.27,
"learning_rate": 3.66202570632789e-05,
"loss": 2.7803,
"step": 80800
},
{
"epoch": 0.27,
"learning_rate": 3.660369797548593e-05,
"loss": 2.7708,
"step": 80900
},
{
"epoch": 0.27,
"learning_rate": 3.658713888769296e-05,
"loss": 2.7643,
"step": 81000
},
{
"epoch": 0.27,
"learning_rate": 3.657057979989998e-05,
"loss": 2.7694,
"step": 81100
},
{
"epoch": 0.27,
"learning_rate": 3.655402071210701e-05,
"loss": 2.7566,
"step": 81200
},
{
"epoch": 0.27,
"learning_rate": 3.653746162431404e-05,
"loss": 2.7798,
"step": 81300
},
{
"epoch": 0.27,
"learning_rate": 3.652090253652107e-05,
"loss": 2.7593,
"step": 81400
},
{
"epoch": 0.27,
"learning_rate": 3.65043434487281e-05,
"loss": 2.7631,
"step": 81500
},
{
"epoch": 0.27,
"learning_rate": 3.648778436093513e-05,
"loss": 2.7619,
"step": 81600
},
{
"epoch": 0.27,
"learning_rate": 3.647122527314216e-05,
"loss": 2.7562,
"step": 81700
},
{
"epoch": 0.27,
"learning_rate": 3.645466618534918e-05,
"loss": 2.7616,
"step": 81800
},
{
"epoch": 0.27,
"learning_rate": 3.643810709755621e-05,
"loss": 2.7752,
"step": 81900
},
{
"epoch": 0.27,
"learning_rate": 3.642154800976324e-05,
"loss": 2.7622,
"step": 82000
},
{
"epoch": 0.27,
"learning_rate": 3.6404988921970266e-05,
"loss": 2.7738,
"step": 82100
},
{
"epoch": 0.27,
"learning_rate": 3.6388429834177296e-05,
"loss": 2.7691,
"step": 82200
},
{
"epoch": 0.27,
"learning_rate": 3.6371870746384326e-05,
"loss": 2.7709,
"step": 82300
},
{
"epoch": 0.27,
"learning_rate": 3.635531165859135e-05,
"loss": 2.789,
"step": 82400
},
{
"epoch": 0.27,
"learning_rate": 3.633875257079838e-05,
"loss": 2.7553,
"step": 82500
},
{
"epoch": 0.27,
"learning_rate": 3.632219348300541e-05,
"loss": 2.7778,
"step": 82600
},
{
"epoch": 0.27,
"learning_rate": 3.630563439521244e-05,
"loss": 2.7791,
"step": 82700
},
{
"epoch": 0.27,
"learning_rate": 3.628907530741947e-05,
"loss": 2.777,
"step": 82800
},
{
"epoch": 0.27,
"learning_rate": 3.6272516219626494e-05,
"loss": 2.7583,
"step": 82900
},
{
"epoch": 0.27,
"learning_rate": 3.6255957131833525e-05,
"loss": 2.7702,
"step": 83000
},
{
"epoch": 0.28,
"learning_rate": 3.623939804404055e-05,
"loss": 2.778,
"step": 83100
},
{
"epoch": 0.28,
"learning_rate": 3.622283895624758e-05,
"loss": 2.7604,
"step": 83200
},
{
"epoch": 0.28,
"learning_rate": 3.620627986845461e-05,
"loss": 2.7674,
"step": 83300
},
{
"epoch": 0.28,
"learning_rate": 3.618972078066163e-05,
"loss": 2.781,
"step": 83400
},
{
"epoch": 0.28,
"learning_rate": 3.617316169286866e-05,
"loss": 2.7616,
"step": 83500
},
{
"epoch": 0.28,
"learning_rate": 3.615660260507569e-05,
"loss": 2.7769,
"step": 83600
},
{
"epoch": 0.28,
"learning_rate": 3.614004351728272e-05,
"loss": 2.7688,
"step": 83700
},
{
"epoch": 0.28,
"learning_rate": 3.6123484429489754e-05,
"loss": 2.7652,
"step": 83800
},
{
"epoch": 0.28,
"learning_rate": 3.610692534169678e-05,
"loss": 2.7685,
"step": 83900
},
{
"epoch": 0.28,
"learning_rate": 3.609036625390381e-05,
"loss": 2.7694,
"step": 84000
},
{
"epoch": 0.28,
"learning_rate": 3.607380716611084e-05,
"loss": 2.7786,
"step": 84100
},
{
"epoch": 0.28,
"learning_rate": 3.605724807831786e-05,
"loss": 2.7719,
"step": 84200
},
{
"epoch": 0.28,
"learning_rate": 3.604068899052489e-05,
"loss": 2.7769,
"step": 84300
},
{
"epoch": 0.28,
"learning_rate": 3.6024129902731915e-05,
"loss": 2.7651,
"step": 84400
},
{
"epoch": 0.28,
"learning_rate": 3.6007570814938945e-05,
"loss": 2.7535,
"step": 84500
},
{
"epoch": 0.28,
"learning_rate": 3.5991011727145976e-05,
"loss": 2.757,
"step": 84600
},
{
"epoch": 0.28,
"learning_rate": 3.5974452639353006e-05,
"loss": 2.7571,
"step": 84700
},
{
"epoch": 0.28,
"learning_rate": 3.5957893551560036e-05,
"loss": 2.7545,
"step": 84800
},
{
"epoch": 0.28,
"learning_rate": 3.594133446376706e-05,
"loss": 2.7571,
"step": 84900
},
{
"epoch": 0.28,
"learning_rate": 3.592477537597409e-05,
"loss": 2.7558,
"step": 85000
},
{
"epoch": 0.28,
"learning_rate": 3.590821628818112e-05,
"loss": 2.7639,
"step": 85100
},
{
"epoch": 0.28,
"learning_rate": 3.5891657200388144e-05,
"loss": 2.7656,
"step": 85200
},
{
"epoch": 0.28,
"learning_rate": 3.5875098112595174e-05,
"loss": 2.7654,
"step": 85300
},
{
"epoch": 0.28,
"learning_rate": 3.5858539024802204e-05,
"loss": 2.7549,
"step": 85400
},
{
"epoch": 0.28,
"learning_rate": 3.584197993700923e-05,
"loss": 2.762,
"step": 85500
},
{
"epoch": 0.28,
"learning_rate": 3.5825420849216265e-05,
"loss": 2.7716,
"step": 85600
},
{
"epoch": 0.28,
"learning_rate": 3.580886176142329e-05,
"loss": 2.7848,
"step": 85700
},
{
"epoch": 0.28,
"learning_rate": 3.579230267363032e-05,
"loss": 2.7675,
"step": 85800
},
{
"epoch": 0.28,
"learning_rate": 3.577574358583734e-05,
"loss": 2.7798,
"step": 85900
},
{
"epoch": 0.28,
"learning_rate": 3.575918449804437e-05,
"loss": 2.7544,
"step": 86000
},
{
"epoch": 0.29,
"learning_rate": 3.57426254102514e-05,
"loss": 2.7634,
"step": 86100
},
{
"epoch": 0.29,
"learning_rate": 3.5726066322458427e-05,
"loss": 2.7796,
"step": 86200
},
{
"epoch": 0.29,
"learning_rate": 3.570950723466546e-05,
"loss": 2.7618,
"step": 86300
},
{
"epoch": 0.29,
"learning_rate": 3.569294814687249e-05,
"loss": 2.7575,
"step": 86400
},
{
"epoch": 0.29,
"learning_rate": 3.567638905907951e-05,
"loss": 2.7789,
"step": 86500
},
{
"epoch": 0.29,
"learning_rate": 3.565982997128655e-05,
"loss": 2.7719,
"step": 86600
},
{
"epoch": 0.29,
"learning_rate": 3.564327088349357e-05,
"loss": 2.7506,
"step": 86700
},
{
"epoch": 0.29,
"learning_rate": 3.56267117957006e-05,
"loss": 2.757,
"step": 86800
},
{
"epoch": 0.29,
"learning_rate": 3.561015270790763e-05,
"loss": 2.7716,
"step": 86900
},
{
"epoch": 0.29,
"learning_rate": 3.5593593620114655e-05,
"loss": 2.7676,
"step": 87000
},
{
"epoch": 0.29,
"learning_rate": 3.5577034532321686e-05,
"loss": 2.7605,
"step": 87100
},
{
"epoch": 0.29,
"learning_rate": 3.556047544452871e-05,
"loss": 2.7766,
"step": 87200
},
{
"epoch": 0.29,
"learning_rate": 3.554391635673574e-05,
"loss": 2.7602,
"step": 87300
},
{
"epoch": 0.29,
"learning_rate": 3.552735726894277e-05,
"loss": 2.7676,
"step": 87400
},
{
"epoch": 0.29,
"learning_rate": 3.551079818114979e-05,
"loss": 2.7678,
"step": 87500
},
{
"epoch": 0.29,
"learning_rate": 3.549423909335683e-05,
"loss": 2.7571,
"step": 87600
},
{
"epoch": 0.29,
"learning_rate": 3.5477680005563854e-05,
"loss": 2.7761,
"step": 87700
},
{
"epoch": 0.29,
"learning_rate": 3.5461120917770884e-05,
"loss": 2.7722,
"step": 87800
},
{
"epoch": 0.29,
"learning_rate": 3.5444561829977915e-05,
"loss": 2.7728,
"step": 87900
},
{
"epoch": 0.29,
"learning_rate": 3.542800274218494e-05,
"loss": 2.7666,
"step": 88000
},
{
"epoch": 0.29,
"learning_rate": 3.541144365439197e-05,
"loss": 2.7674,
"step": 88100
},
{
"epoch": 0.29,
"learning_rate": 3.5394884566599e-05,
"loss": 2.7665,
"step": 88200
},
{
"epoch": 0.29,
"learning_rate": 3.537832547880602e-05,
"loss": 2.7498,
"step": 88300
},
{
"epoch": 0.29,
"learning_rate": 3.536176639101305e-05,
"loss": 2.7735,
"step": 88400
},
{
"epoch": 0.29,
"learning_rate": 3.5345207303220076e-05,
"loss": 2.7572,
"step": 88500
},
{
"epoch": 0.29,
"learning_rate": 3.532864821542711e-05,
"loss": 2.7603,
"step": 88600
},
{
"epoch": 0.29,
"learning_rate": 3.5312089127634143e-05,
"loss": 2.7502,
"step": 88700
},
{
"epoch": 0.29,
"learning_rate": 3.529553003984117e-05,
"loss": 2.774,
"step": 88800
},
{
"epoch": 0.29,
"learning_rate": 3.52789709520482e-05,
"loss": 2.786,
"step": 88900
},
{
"epoch": 0.29,
"learning_rate": 3.526241186425522e-05,
"loss": 2.7827,
"step": 89000
},
{
"epoch": 0.3,
"learning_rate": 3.524585277646225e-05,
"loss": 2.7652,
"step": 89100
},
{
"epoch": 0.3,
"learning_rate": 3.522929368866928e-05,
"loss": 2.7652,
"step": 89200
},
{
"epoch": 0.3,
"learning_rate": 3.5212734600876305e-05,
"loss": 2.7619,
"step": 89300
},
{
"epoch": 0.3,
"learning_rate": 3.5196175513083335e-05,
"loss": 2.7673,
"step": 89400
},
{
"epoch": 0.3,
"learning_rate": 3.5179616425290365e-05,
"loss": 2.7625,
"step": 89500
},
{
"epoch": 0.3,
"learning_rate": 3.5163057337497396e-05,
"loss": 2.7567,
"step": 89600
},
{
"epoch": 0.3,
"learning_rate": 3.5146498249704426e-05,
"loss": 2.7665,
"step": 89700
},
{
"epoch": 0.3,
"learning_rate": 3.512993916191145e-05,
"loss": 2.7669,
"step": 89800
},
{
"epoch": 0.3,
"learning_rate": 3.511338007411848e-05,
"loss": 2.7634,
"step": 89900
},
{
"epoch": 0.3,
"learning_rate": 3.509682098632551e-05,
"loss": 2.7788,
"step": 90000
},
{
"epoch": 0.3,
"learning_rate": 3.5080261898532534e-05,
"loss": 2.7633,
"step": 90100
},
{
"epoch": 0.3,
"learning_rate": 3.5063702810739564e-05,
"loss": 2.7672,
"step": 90200
},
{
"epoch": 0.3,
"learning_rate": 3.504714372294659e-05,
"loss": 2.7642,
"step": 90300
},
{
"epoch": 0.3,
"learning_rate": 3.503058463515362e-05,
"loss": 2.7647,
"step": 90400
},
{
"epoch": 0.3,
"learning_rate": 3.501402554736065e-05,
"loss": 2.7723,
"step": 90500
},
{
"epoch": 0.3,
"learning_rate": 3.499746645956768e-05,
"loss": 2.7747,
"step": 90600
},
{
"epoch": 0.3,
"learning_rate": 3.498090737177471e-05,
"loss": 2.7588,
"step": 90700
},
{
"epoch": 0.3,
"learning_rate": 3.496434828398173e-05,
"loss": 2.7673,
"step": 90800
},
{
"epoch": 0.3,
"learning_rate": 3.494778919618876e-05,
"loss": 2.7654,
"step": 90900
},
{
"epoch": 0.3,
"learning_rate": 3.493123010839579e-05,
"loss": 2.7616,
"step": 91000
},
{
"epoch": 0.3,
"learning_rate": 3.4914671020602816e-05,
"loss": 2.7796,
"step": 91100
},
{
"epoch": 0.3,
"learning_rate": 3.489811193280985e-05,
"loss": 2.7652,
"step": 91200
},
{
"epoch": 0.3,
"learning_rate": 3.488155284501688e-05,
"loss": 2.7671,
"step": 91300
},
{
"epoch": 0.3,
"learning_rate": 3.48649937572239e-05,
"loss": 2.7556,
"step": 91400
},
{
"epoch": 0.3,
"learning_rate": 3.484843466943093e-05,
"loss": 2.7542,
"step": 91500
},
{
"epoch": 0.3,
"learning_rate": 3.483187558163796e-05,
"loss": 2.7582,
"step": 91600
},
{
"epoch": 0.3,
"learning_rate": 3.481531649384499e-05,
"loss": 2.7596,
"step": 91700
},
{
"epoch": 0.3,
"learning_rate": 3.4798757406052015e-05,
"loss": 2.7774,
"step": 91800
},
{
"epoch": 0.3,
"learning_rate": 3.4782198318259045e-05,
"loss": 2.7661,
"step": 91900
},
{
"epoch": 0.3,
"learning_rate": 3.4765639230466076e-05,
"loss": 2.7551,
"step": 92000
},
{
"epoch": 0.31,
"learning_rate": 3.47490801426731e-05,
"loss": 2.7545,
"step": 92100
},
{
"epoch": 0.31,
"learning_rate": 3.473252105488013e-05,
"loss": 2.7801,
"step": 92200
},
{
"epoch": 0.31,
"learning_rate": 3.471596196708716e-05,
"loss": 2.782,
"step": 92300
},
{
"epoch": 0.31,
"learning_rate": 3.469940287929418e-05,
"loss": 2.763,
"step": 92400
},
{
"epoch": 0.31,
"learning_rate": 3.4682843791501213e-05,
"loss": 2.7406,
"step": 92500
},
{
"epoch": 0.31,
"learning_rate": 3.4666284703708244e-05,
"loss": 2.765,
"step": 92600
},
{
"epoch": 0.31,
"learning_rate": 3.4649725615915274e-05,
"loss": 2.7653,
"step": 92700
},
{
"epoch": 0.31,
"learning_rate": 3.4633166528122304e-05,
"loss": 2.7632,
"step": 92800
},
{
"epoch": 0.31,
"learning_rate": 3.461660744032933e-05,
"loss": 2.761,
"step": 92900
},
{
"epoch": 0.31,
"learning_rate": 3.460004835253636e-05,
"loss": 2.7688,
"step": 93000
},
{
"epoch": 0.31,
"learning_rate": 3.458348926474338e-05,
"loss": 2.7714,
"step": 93100
},
{
"epoch": 0.31,
"learning_rate": 3.456693017695041e-05,
"loss": 2.7672,
"step": 93200
},
{
"epoch": 0.31,
"learning_rate": 3.455037108915744e-05,
"loss": 2.7682,
"step": 93300
},
{
"epoch": 0.31,
"learning_rate": 3.4533812001364466e-05,
"loss": 2.7727,
"step": 93400
},
{
"epoch": 0.31,
"learning_rate": 3.4517252913571496e-05,
"loss": 2.7762,
"step": 93500
},
{
"epoch": 0.31,
"learning_rate": 3.4500693825778526e-05,
"loss": 2.7726,
"step": 93600
},
{
"epoch": 0.31,
"learning_rate": 3.448413473798556e-05,
"loss": 2.7657,
"step": 93700
},
{
"epoch": 0.31,
"learning_rate": 3.446757565019259e-05,
"loss": 2.775,
"step": 93800
},
{
"epoch": 0.31,
"learning_rate": 3.445101656239961e-05,
"loss": 2.775,
"step": 93900
},
{
"epoch": 0.31,
"learning_rate": 3.443445747460664e-05,
"loss": 2.7544,
"step": 94000
},
{
"epoch": 0.31,
"learning_rate": 3.441789838681367e-05,
"loss": 2.7638,
"step": 94100
},
{
"epoch": 0.31,
"learning_rate": 3.4401339299020695e-05,
"loss": 2.7636,
"step": 94200
},
{
"epoch": 0.31,
"learning_rate": 3.4384780211227725e-05,
"loss": 2.7631,
"step": 94300
},
{
"epoch": 0.31,
"learning_rate": 3.436822112343475e-05,
"loss": 2.7649,
"step": 94400
},
{
"epoch": 0.31,
"learning_rate": 3.435166203564178e-05,
"loss": 2.7604,
"step": 94500
},
{
"epoch": 0.31,
"learning_rate": 3.4335102947848816e-05,
"loss": 2.7816,
"step": 94600
},
{
"epoch": 0.31,
"learning_rate": 3.431854386005584e-05,
"loss": 2.7671,
"step": 94700
},
{
"epoch": 0.31,
"learning_rate": 3.430198477226287e-05,
"loss": 2.7798,
"step": 94800
},
{
"epoch": 0.31,
"learning_rate": 3.428542568446989e-05,
"loss": 2.769,
"step": 94900
},
{
"epoch": 0.31,
"learning_rate": 3.4268866596676924e-05,
"loss": 2.7652,
"step": 95000
},
{
"epoch": 0.31,
"learning_rate": 3.4252307508883954e-05,
"loss": 2.7537,
"step": 95100
},
{
"epoch": 0.32,
"learning_rate": 3.423574842109098e-05,
"loss": 2.767,
"step": 95200
},
{
"epoch": 0.32,
"learning_rate": 3.421918933329801e-05,
"loss": 2.7716,
"step": 95300
},
{
"epoch": 0.32,
"learning_rate": 3.420263024550504e-05,
"loss": 2.7678,
"step": 95400
},
{
"epoch": 0.32,
"learning_rate": 3.418607115771206e-05,
"loss": 2.7497,
"step": 95500
},
{
"epoch": 0.32,
"learning_rate": 3.41695120699191e-05,
"loss": 2.7556,
"step": 95600
},
{
"epoch": 0.32,
"learning_rate": 3.415295298212612e-05,
"loss": 2.751,
"step": 95700
},
{
"epoch": 0.32,
"learning_rate": 3.413639389433315e-05,
"loss": 2.7716,
"step": 95800
},
{
"epoch": 0.32,
"learning_rate": 3.411983480654018e-05,
"loss": 2.7641,
"step": 95900
},
{
"epoch": 0.32,
"learning_rate": 3.4103275718747206e-05,
"loss": 2.7445,
"step": 96000
},
{
"epoch": 0.32,
"learning_rate": 3.4086716630954236e-05,
"loss": 2.7678,
"step": 96100
},
{
"epoch": 0.32,
"learning_rate": 3.407015754316126e-05,
"loss": 2.7545,
"step": 96200
},
{
"epoch": 0.32,
"learning_rate": 3.405359845536829e-05,
"loss": 2.7554,
"step": 96300
},
{
"epoch": 0.32,
"learning_rate": 3.403703936757532e-05,
"loss": 2.755,
"step": 96400
},
{
"epoch": 0.32,
"learning_rate": 3.4020480279782344e-05,
"loss": 2.7867,
"step": 96500
},
{
"epoch": 0.32,
"learning_rate": 3.400392119198938e-05,
"loss": 2.7597,
"step": 96600
},
{
"epoch": 0.32,
"learning_rate": 3.3987362104196405e-05,
"loss": 2.7579,
"step": 96700
},
{
"epoch": 0.32,
"learning_rate": 3.3970803016403435e-05,
"loss": 2.7721,
"step": 96800
},
{
"epoch": 0.32,
"learning_rate": 3.3954243928610465e-05,
"loss": 2.7588,
"step": 96900
},
{
"epoch": 0.32,
"learning_rate": 3.393768484081749e-05,
"loss": 2.7565,
"step": 97000
},
{
"epoch": 0.32,
"learning_rate": 3.392112575302452e-05,
"loss": 2.756,
"step": 97100
},
{
"epoch": 0.32,
"learning_rate": 3.390456666523155e-05,
"loss": 2.767,
"step": 97200
},
{
"epoch": 0.32,
"learning_rate": 3.388800757743857e-05,
"loss": 2.7759,
"step": 97300
},
{
"epoch": 0.32,
"learning_rate": 3.38714484896456e-05,
"loss": 2.7578,
"step": 97400
},
{
"epoch": 0.32,
"learning_rate": 3.385488940185263e-05,
"loss": 2.7654,
"step": 97500
},
{
"epoch": 0.32,
"learning_rate": 3.3838330314059664e-05,
"loss": 2.757,
"step": 97600
},
{
"epoch": 0.32,
"learning_rate": 3.3821771226266694e-05,
"loss": 2.7645,
"step": 97700
},
{
"epoch": 0.32,
"learning_rate": 3.380521213847372e-05,
"loss": 2.7632,
"step": 97800
},
{
"epoch": 0.32,
"learning_rate": 3.378865305068075e-05,
"loss": 2.7691,
"step": 97900
},
{
"epoch": 0.32,
"learning_rate": 3.377209396288777e-05,
"loss": 2.7674,
"step": 98000
},
{
"epoch": 0.32,
"learning_rate": 3.37555348750948e-05,
"loss": 2.7589,
"step": 98100
},
{
"epoch": 0.33,
"learning_rate": 3.373897578730183e-05,
"loss": 2.7618,
"step": 98200
},
{
"epoch": 0.33,
"learning_rate": 3.3722416699508856e-05,
"loss": 2.7834,
"step": 98300
},
{
"epoch": 0.33,
"learning_rate": 3.3705857611715886e-05,
"loss": 2.7622,
"step": 98400
},
{
"epoch": 0.33,
"learning_rate": 3.3689298523922916e-05,
"loss": 2.779,
"step": 98500
},
{
"epoch": 0.33,
"learning_rate": 3.3672739436129947e-05,
"loss": 2.7589,
"step": 98600
},
{
"epoch": 0.33,
"learning_rate": 3.365618034833698e-05,
"loss": 2.7683,
"step": 98700
},
{
"epoch": 0.33,
"learning_rate": 3.3639621260544e-05,
"loss": 2.7735,
"step": 98800
},
{
"epoch": 0.33,
"learning_rate": 3.362306217275103e-05,
"loss": 2.7465,
"step": 98900
},
{
"epoch": 0.33,
"learning_rate": 3.3606503084958054e-05,
"loss": 2.7598,
"step": 99000
},
{
"epoch": 0.33,
"learning_rate": 3.3589943997165084e-05,
"loss": 2.7521,
"step": 99100
},
{
"epoch": 0.33,
"learning_rate": 3.3573384909372115e-05,
"loss": 2.7639,
"step": 99200
},
{
"epoch": 0.33,
"learning_rate": 3.355682582157914e-05,
"loss": 2.7595,
"step": 99300
},
{
"epoch": 0.33,
"learning_rate": 3.354026673378617e-05,
"loss": 2.7575,
"step": 99400
},
{
"epoch": 0.33,
"learning_rate": 3.35237076459932e-05,
"loss": 2.7489,
"step": 99500
},
{
"epoch": 0.33,
"learning_rate": 3.350714855820023e-05,
"loss": 2.7656,
"step": 99600
},
{
"epoch": 0.33,
"learning_rate": 3.349058947040726e-05,
"loss": 2.7598,
"step": 99700
},
{
"epoch": 0.33,
"learning_rate": 3.347403038261428e-05,
"loss": 2.7486,
"step": 99800
},
{
"epoch": 0.33,
"learning_rate": 3.345747129482131e-05,
"loss": 2.7553,
"step": 99900
},
{
"epoch": 0.33,
"learning_rate": 3.3440912207028344e-05,
"loss": 2.7728,
"step": 100000
},
{
"epoch": 0.33,
"learning_rate": 3.342435311923537e-05,
"loss": 2.7468,
"step": 100100
},
{
"epoch": 0.33,
"learning_rate": 3.34077940314424e-05,
"loss": 2.7582,
"step": 100200
},
{
"epoch": 0.33,
"learning_rate": 3.339123494364942e-05,
"loss": 2.7531,
"step": 100300
},
{
"epoch": 0.33,
"learning_rate": 3.337467585585645e-05,
"loss": 2.7754,
"step": 100400
},
{
"epoch": 0.33,
"learning_rate": 3.335811676806348e-05,
"loss": 2.7671,
"step": 100500
},
{
"epoch": 0.33,
"learning_rate": 3.334155768027051e-05,
"loss": 2.7632,
"step": 100600
},
{
"epoch": 0.33,
"learning_rate": 3.332499859247754e-05,
"loss": 2.7483,
"step": 100700
},
{
"epoch": 0.33,
"learning_rate": 3.3308439504684566e-05,
"loss": 2.7764,
"step": 100800
},
{
"epoch": 0.33,
"learning_rate": 3.3291880416891596e-05,
"loss": 2.7616,
"step": 100900
},
{
"epoch": 0.33,
"learning_rate": 3.3275321329098626e-05,
"loss": 2.7647,
"step": 101000
},
{
"epoch": 0.33,
"learning_rate": 3.325876224130565e-05,
"loss": 2.7571,
"step": 101100
},
{
"epoch": 0.34,
"learning_rate": 3.324220315351268e-05,
"loss": 2.751,
"step": 101200
},
{
"epoch": 0.34,
"learning_rate": 3.322564406571971e-05,
"loss": 2.7625,
"step": 101300
},
{
"epoch": 0.34,
"learning_rate": 3.3209084977926734e-05,
"loss": 2.7686,
"step": 101400
},
{
"epoch": 0.34,
"learning_rate": 3.3192525890133764e-05,
"loss": 2.7722,
"step": 101500
},
{
"epoch": 0.34,
"learning_rate": 3.3175966802340795e-05,
"loss": 2.7697,
"step": 101600
},
{
"epoch": 0.34,
"learning_rate": 3.3159407714547825e-05,
"loss": 2.7732,
"step": 101700
},
{
"epoch": 0.34,
"learning_rate": 3.3142848626754855e-05,
"loss": 2.7656,
"step": 101800
},
{
"epoch": 0.34,
"learning_rate": 3.312628953896188e-05,
"loss": 2.7592,
"step": 101900
},
{
"epoch": 0.34,
"learning_rate": 3.310973045116891e-05,
"loss": 2.7466,
"step": 102000
},
{
"epoch": 0.34,
"learning_rate": 3.309317136337593e-05,
"loss": 2.763,
"step": 102100
},
{
"epoch": 0.34,
"learning_rate": 3.307661227558296e-05,
"loss": 2.773,
"step": 102200
},
{
"epoch": 0.34,
"learning_rate": 3.306005318778999e-05,
"loss": 2.7662,
"step": 102300
},
{
"epoch": 0.34,
"learning_rate": 3.3043494099997017e-05,
"loss": 2.7679,
"step": 102400
},
{
"epoch": 0.34,
"learning_rate": 3.302693501220405e-05,
"loss": 2.7694,
"step": 102500
},
{
"epoch": 0.34,
"learning_rate": 3.301037592441108e-05,
"loss": 2.7698,
"step": 102600
},
{
"epoch": 0.34,
"learning_rate": 3.299381683661811e-05,
"loss": 2.7527,
"step": 102700
},
{
"epoch": 0.34,
"learning_rate": 3.297725774882514e-05,
"loss": 2.7665,
"step": 102800
},
{
"epoch": 0.34,
"learning_rate": 3.296069866103216e-05,
"loss": 2.7618,
"step": 102900
},
{
"epoch": 0.34,
"learning_rate": 3.294413957323919e-05,
"loss": 2.7532,
"step": 103000
},
{
"epoch": 0.34,
"learning_rate": 3.292758048544622e-05,
"loss": 2.7778,
"step": 103100
},
{
"epoch": 0.34,
"learning_rate": 3.2911021397653245e-05,
"loss": 2.7687,
"step": 103200
},
{
"epoch": 0.34,
"learning_rate": 3.2894462309860276e-05,
"loss": 2.7574,
"step": 103300
},
{
"epoch": 0.34,
"learning_rate": 3.28779032220673e-05,
"loss": 2.7666,
"step": 103400
},
{
"epoch": 0.34,
"learning_rate": 3.286134413427433e-05,
"loss": 2.7559,
"step": 103500
},
{
"epoch": 0.34,
"learning_rate": 3.284478504648137e-05,
"loss": 2.7526,
"step": 103600
},
{
"epoch": 0.34,
"learning_rate": 3.282822595868839e-05,
"loss": 2.758,
"step": 103700
},
{
"epoch": 0.34,
"learning_rate": 3.281166687089542e-05,
"loss": 2.7578,
"step": 103800
},
{
"epoch": 0.34,
"learning_rate": 3.2795107783102444e-05,
"loss": 2.7533,
"step": 103900
},
{
"epoch": 0.34,
"learning_rate": 3.2778548695309474e-05,
"loss": 2.7938,
"step": 104000
},
{
"epoch": 0.34,
"learning_rate": 3.2761989607516505e-05,
"loss": 2.7731,
"step": 104100
},
{
"epoch": 0.35,
"learning_rate": 3.274543051972353e-05,
"loss": 2.7624,
"step": 104200
},
{
"epoch": 0.35,
"learning_rate": 3.272887143193056e-05,
"loss": 2.7623,
"step": 104300
},
{
"epoch": 0.35,
"learning_rate": 3.271231234413759e-05,
"loss": 2.7708,
"step": 104400
},
{
"epoch": 0.35,
"learning_rate": 3.269575325634461e-05,
"loss": 2.7552,
"step": 104500
},
{
"epoch": 0.35,
"learning_rate": 3.267919416855165e-05,
"loss": 2.7578,
"step": 104600
},
{
"epoch": 0.35,
"learning_rate": 3.266263508075867e-05,
"loss": 2.7699,
"step": 104700
},
{
"epoch": 0.35,
"learning_rate": 3.26460759929657e-05,
"loss": 2.7627,
"step": 104800
},
{
"epoch": 0.35,
"learning_rate": 3.2629516905172733e-05,
"loss": 2.7735,
"step": 104900
},
{
"epoch": 0.35,
"learning_rate": 3.261295781737976e-05,
"loss": 2.7692,
"step": 105000
},
{
"epoch": 0.35,
"learning_rate": 3.259639872958679e-05,
"loss": 2.7679,
"step": 105100
},
{
"epoch": 0.35,
"learning_rate": 3.257983964179381e-05,
"loss": 2.7618,
"step": 105200
},
{
"epoch": 0.35,
"learning_rate": 3.256328055400084e-05,
"loss": 2.7637,
"step": 105300
},
{
"epoch": 0.35,
"learning_rate": 3.254672146620787e-05,
"loss": 2.7641,
"step": 105400
},
{
"epoch": 0.35,
"learning_rate": 3.2530162378414895e-05,
"loss": 2.7697,
"step": 105500
},
{
"epoch": 0.35,
"learning_rate": 3.251360329062193e-05,
"loss": 2.7751,
"step": 105600
},
{
"epoch": 0.35,
"learning_rate": 3.2497044202828956e-05,
"loss": 2.7631,
"step": 105700
},
{
"epoch": 0.35,
"learning_rate": 3.2480485115035986e-05,
"loss": 2.7671,
"step": 105800
},
{
"epoch": 0.35,
"learning_rate": 3.2463926027243016e-05,
"loss": 2.7605,
"step": 105900
},
{
"epoch": 0.35,
"learning_rate": 3.244736693945004e-05,
"loss": 2.7699,
"step": 106000
},
{
"epoch": 0.35,
"learning_rate": 3.243080785165707e-05,
"loss": 2.7588,
"step": 106100
},
{
"epoch": 0.35,
"learning_rate": 3.2414248763864093e-05,
"loss": 2.7724,
"step": 106200
},
{
"epoch": 0.35,
"learning_rate": 3.2397689676071124e-05,
"loss": 2.7479,
"step": 106300
},
{
"epoch": 0.35,
"learning_rate": 3.2381130588278154e-05,
"loss": 2.762,
"step": 106400
},
{
"epoch": 0.35,
"learning_rate": 3.236457150048518e-05,
"loss": 2.7441,
"step": 106500
},
{
"epoch": 0.35,
"learning_rate": 3.2348012412692215e-05,
"loss": 2.7582,
"step": 106600
},
{
"epoch": 0.35,
"learning_rate": 3.233145332489924e-05,
"loss": 2.7559,
"step": 106700
},
{
"epoch": 0.35,
"learning_rate": 3.231489423710627e-05,
"loss": 2.7591,
"step": 106800
},
{
"epoch": 0.35,
"learning_rate": 3.22983351493133e-05,
"loss": 2.7725,
"step": 106900
},
{
"epoch": 0.35,
"learning_rate": 3.228177606152032e-05,
"loss": 2.7694,
"step": 107000
},
{
"epoch": 0.35,
"learning_rate": 3.226521697372735e-05,
"loss": 2.7617,
"step": 107100
},
{
"epoch": 0.36,
"learning_rate": 3.224865788593438e-05,
"loss": 2.7571,
"step": 107200
},
{
"epoch": 0.36,
"learning_rate": 3.2232098798141406e-05,
"loss": 2.7704,
"step": 107300
},
{
"epoch": 0.36,
"learning_rate": 3.221553971034844e-05,
"loss": 2.7718,
"step": 107400
},
{
"epoch": 0.36,
"learning_rate": 3.219898062255546e-05,
"loss": 2.7644,
"step": 107500
},
{
"epoch": 0.36,
"learning_rate": 3.21824215347625e-05,
"loss": 2.7794,
"step": 107600
},
{
"epoch": 0.36,
"learning_rate": 3.216586244696953e-05,
"loss": 2.7497,
"step": 107700
},
{
"epoch": 0.36,
"learning_rate": 3.214930335917655e-05,
"loss": 2.7569,
"step": 107800
},
{
"epoch": 0.36,
"learning_rate": 3.213274427138358e-05,
"loss": 2.7458,
"step": 107900
},
{
"epoch": 0.36,
"learning_rate": 3.2116185183590605e-05,
"loss": 2.7596,
"step": 108000
},
{
"epoch": 0.36,
"learning_rate": 3.2099626095797635e-05,
"loss": 2.7562,
"step": 108100
},
{
"epoch": 0.36,
"learning_rate": 3.2083067008004666e-05,
"loss": 2.76,
"step": 108200
},
{
"epoch": 0.36,
"learning_rate": 3.206650792021169e-05,
"loss": 2.7697,
"step": 108300
},
{
"epoch": 0.36,
"learning_rate": 3.204994883241872e-05,
"loss": 2.7668,
"step": 108400
},
{
"epoch": 0.36,
"learning_rate": 3.203338974462575e-05,
"loss": 2.7502,
"step": 108500
},
{
"epoch": 0.36,
"learning_rate": 3.201683065683278e-05,
"loss": 2.7758,
"step": 108600
},
{
"epoch": 0.36,
"learning_rate": 3.200027156903981e-05,
"loss": 2.7576,
"step": 108700
},
{
"epoch": 0.36,
"learning_rate": 3.1983712481246834e-05,
"loss": 2.7622,
"step": 108800
},
{
"epoch": 0.36,
"learning_rate": 3.1967153393453864e-05,
"loss": 2.7674,
"step": 108900
},
{
"epoch": 0.36,
"learning_rate": 3.1950594305660894e-05,
"loss": 2.775,
"step": 109000
},
{
"epoch": 0.36,
"learning_rate": 3.193403521786792e-05,
"loss": 2.7526,
"step": 109100
},
{
"epoch": 0.36,
"learning_rate": 3.191747613007495e-05,
"loss": 2.7595,
"step": 109200
},
{
"epoch": 0.36,
"learning_rate": 3.190091704228197e-05,
"loss": 2.7526,
"step": 109300
},
{
"epoch": 0.36,
"learning_rate": 3.1884357954489e-05,
"loss": 2.758,
"step": 109400
},
{
"epoch": 0.36,
"learning_rate": 3.186779886669603e-05,
"loss": 2.7494,
"step": 109500
},
{
"epoch": 0.36,
"learning_rate": 3.185123977890306e-05,
"loss": 2.7511,
"step": 109600
},
{
"epoch": 0.36,
"learning_rate": 3.183468069111009e-05,
"loss": 2.7453,
"step": 109700
},
{
"epoch": 0.36,
"learning_rate": 3.1818121603317116e-05,
"loss": 2.7605,
"step": 109800
},
{
"epoch": 0.36,
"learning_rate": 3.180156251552415e-05,
"loss": 2.7455,
"step": 109900
},
{
"epoch": 0.36,
"learning_rate": 3.178500342773118e-05,
"loss": 2.7523,
"step": 110000
},
{
"epoch": 0.36,
"learning_rate": 3.17684443399382e-05,
"loss": 2.7579,
"step": 110100
},
{
"epoch": 0.36,
"learning_rate": 3.175188525214523e-05,
"loss": 2.767,
"step": 110200
},
{
"epoch": 0.37,
"learning_rate": 3.173532616435226e-05,
"loss": 2.7535,
"step": 110300
},
{
"epoch": 0.37,
"learning_rate": 3.1718767076559285e-05,
"loss": 2.7637,
"step": 110400
},
{
"epoch": 0.37,
"learning_rate": 3.1702207988766315e-05,
"loss": 2.7577,
"step": 110500
},
{
"epoch": 0.37,
"learning_rate": 3.1685648900973345e-05,
"loss": 2.7741,
"step": 110600
},
{
"epoch": 0.37,
"learning_rate": 3.1669089813180376e-05,
"loss": 2.7747,
"step": 110700
},
{
"epoch": 0.37,
"learning_rate": 3.1652530725387406e-05,
"loss": 2.7647,
"step": 110800
},
{
"epoch": 0.37,
"learning_rate": 3.163597163759443e-05,
"loss": 2.7625,
"step": 110900
},
{
"epoch": 0.37,
"learning_rate": 3.161941254980146e-05,
"loss": 2.7657,
"step": 111000
},
{
"epoch": 0.37,
"learning_rate": 3.160285346200848e-05,
"loss": 2.7449,
"step": 111100
},
{
"epoch": 0.37,
"learning_rate": 3.1586294374215514e-05,
"loss": 2.7622,
"step": 111200
},
{
"epoch": 0.37,
"learning_rate": 3.1569735286422544e-05,
"loss": 2.7657,
"step": 111300
},
{
"epoch": 0.37,
"learning_rate": 3.155317619862957e-05,
"loss": 2.759,
"step": 111400
},
{
"epoch": 0.37,
"learning_rate": 3.15366171108366e-05,
"loss": 2.7602,
"step": 111500
},
{
"epoch": 0.37,
"learning_rate": 3.152005802304363e-05,
"loss": 2.7635,
"step": 111600
},
{
"epoch": 0.37,
"learning_rate": 3.150349893525066e-05,
"loss": 2.7549,
"step": 111700
},
{
"epoch": 0.37,
"learning_rate": 3.148693984745769e-05,
"loss": 2.7618,
"step": 111800
},
{
"epoch": 0.37,
"learning_rate": 3.147038075966471e-05,
"loss": 2.7703,
"step": 111900
},
{
"epoch": 0.37,
"learning_rate": 3.145382167187174e-05,
"loss": 2.7709,
"step": 112000
},
{
"epoch": 0.37,
"learning_rate": 3.143726258407877e-05,
"loss": 2.7669,
"step": 112100
},
{
"epoch": 0.37,
"learning_rate": 3.1420703496285796e-05,
"loss": 2.7517,
"step": 112200
},
{
"epoch": 0.37,
"learning_rate": 3.1404144408492827e-05,
"loss": 2.7328,
"step": 112300
},
{
"epoch": 0.37,
"learning_rate": 3.138758532069985e-05,
"loss": 2.7516,
"step": 112400
},
{
"epoch": 0.37,
"learning_rate": 3.137102623290688e-05,
"loss": 2.759,
"step": 112500
},
{
"epoch": 0.37,
"learning_rate": 3.135446714511391e-05,
"loss": 2.7515,
"step": 112600
},
{
"epoch": 0.37,
"learning_rate": 3.133790805732094e-05,
"loss": 2.7585,
"step": 112700
},
{
"epoch": 0.37,
"learning_rate": 3.132134896952797e-05,
"loss": 2.7767,
"step": 112800
},
{
"epoch": 0.37,
"learning_rate": 3.1304789881734995e-05,
"loss": 2.7467,
"step": 112900
},
{
"epoch": 0.37,
"learning_rate": 3.1288230793942025e-05,
"loss": 2.7571,
"step": 113000
},
{
"epoch": 0.37,
"learning_rate": 3.1271671706149055e-05,
"loss": 2.7672,
"step": 113100
},
{
"epoch": 0.37,
"learning_rate": 3.125511261835608e-05,
"loss": 2.7683,
"step": 113200
},
{
"epoch": 0.38,
"learning_rate": 3.123855353056311e-05,
"loss": 2.7675,
"step": 113300
},
{
"epoch": 0.38,
"learning_rate": 3.122199444277013e-05,
"loss": 2.7559,
"step": 113400
},
{
"epoch": 0.38,
"learning_rate": 3.120543535497716e-05,
"loss": 2.7681,
"step": 113500
},
{
"epoch": 0.38,
"learning_rate": 3.11888762671842e-05,
"loss": 2.7606,
"step": 113600
},
{
"epoch": 0.38,
"learning_rate": 3.1172317179391224e-05,
"loss": 2.7505,
"step": 113700
},
{
"epoch": 0.38,
"learning_rate": 3.1155758091598254e-05,
"loss": 2.7445,
"step": 113800
},
{
"epoch": 0.38,
"learning_rate": 3.113919900380528e-05,
"loss": 2.755,
"step": 113900
},
{
"epoch": 0.38,
"learning_rate": 3.112263991601231e-05,
"loss": 2.7527,
"step": 114000
},
{
"epoch": 0.38,
"learning_rate": 3.110608082821934e-05,
"loss": 2.7576,
"step": 114100
},
{
"epoch": 0.38,
"learning_rate": 3.108952174042636e-05,
"loss": 2.7761,
"step": 114200
},
{
"epoch": 0.38,
"learning_rate": 3.107296265263339e-05,
"loss": 2.7507,
"step": 114300
},
{
"epoch": 0.38,
"learning_rate": 3.105640356484042e-05,
"loss": 2.7524,
"step": 114400
},
{
"epoch": 0.38,
"learning_rate": 3.1039844477047446e-05,
"loss": 2.7524,
"step": 114500
},
{
"epoch": 0.38,
"learning_rate": 3.102328538925448e-05,
"loss": 2.7679,
"step": 114600
},
{
"epoch": 0.38,
"learning_rate": 3.1006726301461506e-05,
"loss": 2.7491,
"step": 114700
},
{
"epoch": 0.38,
"learning_rate": 3.0990167213668537e-05,
"loss": 2.766,
"step": 114800
},
{
"epoch": 0.38,
"learning_rate": 3.097360812587557e-05,
"loss": 2.7598,
"step": 114900
},
{
"epoch": 0.38,
"learning_rate": 3.095704903808259e-05,
"loss": 2.7573,
"step": 115000
},
{
"epoch": 0.38,
"learning_rate": 3.094048995028962e-05,
"loss": 2.772,
"step": 115100
},
{
"epoch": 0.38,
"learning_rate": 3.0923930862496644e-05,
"loss": 2.7522,
"step": 115200
},
{
"epoch": 0.38,
"learning_rate": 3.0907371774703675e-05,
"loss": 2.7459,
"step": 115300
},
{
"epoch": 0.38,
"learning_rate": 3.0890812686910705e-05,
"loss": 2.7562,
"step": 115400
},
{
"epoch": 0.38,
"learning_rate": 3.087425359911773e-05,
"loss": 2.7545,
"step": 115500
},
{
"epoch": 0.38,
"learning_rate": 3.0857694511324765e-05,
"loss": 2.7687,
"step": 115600
},
{
"epoch": 0.38,
"learning_rate": 3.084113542353179e-05,
"loss": 2.7686,
"step": 115700
},
{
"epoch": 0.38,
"learning_rate": 3.082457633573882e-05,
"loss": 2.7615,
"step": 115800
},
{
"epoch": 0.38,
"learning_rate": 3.080801724794585e-05,
"loss": 2.7742,
"step": 115900
},
{
"epoch": 0.38,
"learning_rate": 3.079145816015287e-05,
"loss": 2.7593,
"step": 116000
},
{
"epoch": 0.38,
"learning_rate": 3.07748990723599e-05,
"loss": 2.7572,
"step": 116100
},
{
"epoch": 0.38,
"learning_rate": 3.0758339984566934e-05,
"loss": 2.7566,
"step": 116200
},
{
"epoch": 0.39,
"learning_rate": 3.074178089677396e-05,
"loss": 2.7496,
"step": 116300
},
{
"epoch": 0.39,
"learning_rate": 3.072522180898099e-05,
"loss": 2.767,
"step": 116400
},
{
"epoch": 0.39,
"learning_rate": 3.070866272118801e-05,
"loss": 2.747,
"step": 116500
},
{
"epoch": 0.39,
"learning_rate": 3.069210363339505e-05,
"loss": 2.7728,
"step": 116600
},
{
"epoch": 0.39,
"learning_rate": 3.067554454560208e-05,
"loss": 2.7727,
"step": 116700
},
{
"epoch": 0.39,
"learning_rate": 3.06589854578091e-05,
"loss": 2.749,
"step": 116800
},
{
"epoch": 0.39,
"learning_rate": 3.064242637001613e-05,
"loss": 2.7447,
"step": 116900
},
{
"epoch": 0.39,
"learning_rate": 3.0625867282223156e-05,
"loss": 2.7553,
"step": 117000
},
{
"epoch": 0.39,
"learning_rate": 3.0609308194430186e-05,
"loss": 2.7591,
"step": 117100
},
{
"epoch": 0.39,
"learning_rate": 3.0592749106637216e-05,
"loss": 2.7478,
"step": 117200
},
{
"epoch": 0.39,
"learning_rate": 3.057619001884424e-05,
"loss": 2.7632,
"step": 117300
},
{
"epoch": 0.39,
"learning_rate": 3.055963093105127e-05,
"loss": 2.754,
"step": 117400
},
{
"epoch": 0.39,
"learning_rate": 3.05430718432583e-05,
"loss": 2.7479,
"step": 117500
},
{
"epoch": 0.39,
"learning_rate": 3.052651275546533e-05,
"loss": 2.7646,
"step": 117600
},
{
"epoch": 0.39,
"learning_rate": 3.0509953667672358e-05,
"loss": 2.7489,
"step": 117700
},
{
"epoch": 0.39,
"learning_rate": 3.0493394579879388e-05,
"loss": 2.7734,
"step": 117800
},
{
"epoch": 0.39,
"learning_rate": 3.0476835492086415e-05,
"loss": 2.7647,
"step": 117900
},
{
"epoch": 0.39,
"learning_rate": 3.0460276404293442e-05,
"loss": 2.7644,
"step": 118000
},
{
"epoch": 0.39,
"learning_rate": 3.044371731650047e-05,
"loss": 2.7662,
"step": 118100
},
{
"epoch": 0.39,
"learning_rate": 3.04271582287075e-05,
"loss": 2.7603,
"step": 118200
},
{
"epoch": 0.39,
"learning_rate": 3.0410599140914526e-05,
"loss": 2.7531,
"step": 118300
},
{
"epoch": 0.39,
"learning_rate": 3.0394040053121553e-05,
"loss": 2.7671,
"step": 118400
},
{
"epoch": 0.39,
"learning_rate": 3.037748096532858e-05,
"loss": 2.7601,
"step": 118500
},
{
"epoch": 0.39,
"learning_rate": 3.0360921877535613e-05,
"loss": 2.758,
"step": 118600
},
{
"epoch": 0.39,
"learning_rate": 3.034436278974264e-05,
"loss": 2.7721,
"step": 118700
},
{
"epoch": 0.39,
"learning_rate": 3.032780370194967e-05,
"loss": 2.7658,
"step": 118800
},
{
"epoch": 0.39,
"learning_rate": 3.0311244614156698e-05,
"loss": 2.7738,
"step": 118900
},
{
"epoch": 0.39,
"learning_rate": 3.0294685526363724e-05,
"loss": 2.7705,
"step": 119000
},
{
"epoch": 0.39,
"learning_rate": 3.027812643857075e-05,
"loss": 2.755,
"step": 119100
},
{
"epoch": 0.39,
"learning_rate": 3.026156735077778e-05,
"loss": 2.7609,
"step": 119200
},
{
"epoch": 0.4,
"learning_rate": 3.024500826298481e-05,
"loss": 2.7629,
"step": 119300
},
{
"epoch": 0.4,
"learning_rate": 3.0228449175191835e-05,
"loss": 2.7596,
"step": 119400
},
{
"epoch": 0.4,
"learning_rate": 3.0211890087398866e-05,
"loss": 2.7434,
"step": 119500
},
{
"epoch": 0.4,
"learning_rate": 3.0195330999605896e-05,
"loss": 2.7797,
"step": 119600
},
{
"epoch": 0.4,
"learning_rate": 3.0178771911812926e-05,
"loss": 2.7545,
"step": 119700
},
{
"epoch": 0.4,
"learning_rate": 3.0162212824019953e-05,
"loss": 2.7634,
"step": 119800
},
{
"epoch": 0.4,
"learning_rate": 3.014565373622698e-05,
"loss": 2.7587,
"step": 119900
},
{
"epoch": 0.4,
"learning_rate": 3.0129094648434007e-05,
"loss": 2.7685,
"step": 120000
},
{
"epoch": 0.4,
"learning_rate": 3.0112535560641037e-05,
"loss": 2.7517,
"step": 120100
},
{
"epoch": 0.4,
"learning_rate": 3.0095976472848064e-05,
"loss": 2.7565,
"step": 120200
},
{
"epoch": 0.4,
"learning_rate": 3.007941738505509e-05,
"loss": 2.7566,
"step": 120300
},
{
"epoch": 0.4,
"learning_rate": 3.0062858297262118e-05,
"loss": 2.7513,
"step": 120400
},
{
"epoch": 0.4,
"learning_rate": 3.004629920946915e-05,
"loss": 2.7686,
"step": 120500
},
{
"epoch": 0.4,
"learning_rate": 3.0029740121676182e-05,
"loss": 2.7608,
"step": 120600
},
{
"epoch": 0.4,
"learning_rate": 3.001318103388321e-05,
"loss": 2.7566,
"step": 120700
},
{
"epoch": 0.4,
"learning_rate": 2.9996621946090236e-05,
"loss": 2.7598,
"step": 120800
},
{
"epoch": 0.4,
"learning_rate": 2.9980062858297263e-05,
"loss": 2.7567,
"step": 120900
},
{
"epoch": 0.4,
"learning_rate": 2.9963503770504293e-05,
"loss": 2.7671,
"step": 121000
},
{
"epoch": 0.4,
"learning_rate": 2.994694468271132e-05,
"loss": 2.7633,
"step": 121100
},
{
"epoch": 0.4,
"learning_rate": 2.9930385594918347e-05,
"loss": 2.7682,
"step": 121200
},
{
"epoch": 0.4,
"learning_rate": 2.9913826507125374e-05,
"loss": 2.7629,
"step": 121300
},
{
"epoch": 0.4,
"learning_rate": 2.9897267419332404e-05,
"loss": 2.7464,
"step": 121400
},
{
"epoch": 0.4,
"learning_rate": 2.988070833153943e-05,
"loss": 2.7524,
"step": 121500
},
{
"epoch": 0.4,
"learning_rate": 2.9864149243746465e-05,
"loss": 2.7651,
"step": 121600
},
{
"epoch": 0.4,
"learning_rate": 2.9847590155953492e-05,
"loss": 2.7569,
"step": 121700
},
{
"epoch": 0.4,
"learning_rate": 2.983103106816052e-05,
"loss": 2.7521,
"step": 121800
},
{
"epoch": 0.4,
"learning_rate": 2.981447198036755e-05,
"loss": 2.7674,
"step": 121900
},
{
"epoch": 0.4,
"learning_rate": 2.9797912892574576e-05,
"loss": 2.7561,
"step": 122000
},
{
"epoch": 0.4,
"learning_rate": 2.9781353804781603e-05,
"loss": 2.7656,
"step": 122100
},
{
"epoch": 0.4,
"learning_rate": 2.976479471698863e-05,
"loss": 2.7588,
"step": 122200
},
{
"epoch": 0.41,
"learning_rate": 2.974823562919566e-05,
"loss": 2.7578,
"step": 122300
},
{
"epoch": 0.41,
"learning_rate": 2.9731676541402687e-05,
"loss": 2.7505,
"step": 122400
},
{
"epoch": 0.41,
"learning_rate": 2.9715117453609714e-05,
"loss": 2.769,
"step": 122500
},
{
"epoch": 0.41,
"learning_rate": 2.9698558365816747e-05,
"loss": 2.7546,
"step": 122600
},
{
"epoch": 0.41,
"learning_rate": 2.9681999278023774e-05,
"loss": 2.7474,
"step": 122700
},
{
"epoch": 0.41,
"learning_rate": 2.9665440190230805e-05,
"loss": 2.7522,
"step": 122800
},
{
"epoch": 0.41,
"learning_rate": 2.964888110243783e-05,
"loss": 2.7692,
"step": 122900
},
{
"epoch": 0.41,
"learning_rate": 2.963232201464486e-05,
"loss": 2.7625,
"step": 123000
},
{
"epoch": 0.41,
"learning_rate": 2.9615762926851885e-05,
"loss": 2.7502,
"step": 123100
},
{
"epoch": 0.41,
"learning_rate": 2.9599203839058916e-05,
"loss": 2.7606,
"step": 123200
},
{
"epoch": 0.41,
"learning_rate": 2.9582644751265943e-05,
"loss": 2.7565,
"step": 123300
},
{
"epoch": 0.41,
"learning_rate": 2.956608566347297e-05,
"loss": 2.7703,
"step": 123400
},
{
"epoch": 0.41,
"learning_rate": 2.9549526575679996e-05,
"loss": 2.7646,
"step": 123500
},
{
"epoch": 0.41,
"learning_rate": 2.953296748788703e-05,
"loss": 2.7451,
"step": 123600
},
{
"epoch": 0.41,
"learning_rate": 2.951640840009406e-05,
"loss": 2.7623,
"step": 123700
},
{
"epoch": 0.41,
"learning_rate": 2.9499849312301087e-05,
"loss": 2.7494,
"step": 123800
},
{
"epoch": 0.41,
"learning_rate": 2.9483290224508114e-05,
"loss": 2.7483,
"step": 123900
},
{
"epoch": 0.41,
"learning_rate": 2.946673113671514e-05,
"loss": 2.7581,
"step": 124000
},
{
"epoch": 0.41,
"learning_rate": 2.945017204892217e-05,
"loss": 2.7801,
"step": 124100
},
{
"epoch": 0.41,
"learning_rate": 2.94336129611292e-05,
"loss": 2.76,
"step": 124200
},
{
"epoch": 0.41,
"learning_rate": 2.9417053873336225e-05,
"loss": 2.7638,
"step": 124300
},
{
"epoch": 0.41,
"learning_rate": 2.9400494785543252e-05,
"loss": 2.7367,
"step": 124400
},
{
"epoch": 0.41,
"learning_rate": 2.9383935697750283e-05,
"loss": 2.7502,
"step": 124500
},
{
"epoch": 0.41,
"learning_rate": 2.9367376609957313e-05,
"loss": 2.7542,
"step": 124600
},
{
"epoch": 0.41,
"learning_rate": 2.9350817522164343e-05,
"loss": 2.7528,
"step": 124700
},
{
"epoch": 0.41,
"learning_rate": 2.933425843437137e-05,
"loss": 2.7583,
"step": 124800
},
{
"epoch": 0.41,
"learning_rate": 2.9317699346578397e-05,
"loss": 2.7601,
"step": 124900
},
{
"epoch": 0.41,
"learning_rate": 2.9301140258785427e-05,
"loss": 2.7696,
"step": 125000
},
{
"epoch": 0.41,
"learning_rate": 2.9284581170992454e-05,
"loss": 2.7524,
"step": 125100
},
{
"epoch": 0.41,
"learning_rate": 2.926802208319948e-05,
"loss": 2.7628,
"step": 125200
},
{
"epoch": 0.41,
"learning_rate": 2.9251462995406508e-05,
"loss": 2.7501,
"step": 125300
},
{
"epoch": 0.42,
"learning_rate": 2.9234903907613538e-05,
"loss": 2.7626,
"step": 125400
},
{
"epoch": 0.42,
"learning_rate": 2.9218344819820565e-05,
"loss": 2.7555,
"step": 125500
},
{
"epoch": 0.42,
"learning_rate": 2.92017857320276e-05,
"loss": 2.7382,
"step": 125600
},
{
"epoch": 0.42,
"learning_rate": 2.9185226644234626e-05,
"loss": 2.7408,
"step": 125700
},
{
"epoch": 0.42,
"learning_rate": 2.9168667556441653e-05,
"loss": 2.7702,
"step": 125800
},
{
"epoch": 0.42,
"learning_rate": 2.915210846864868e-05,
"loss": 2.744,
"step": 125900
},
{
"epoch": 0.42,
"learning_rate": 2.913554938085571e-05,
"loss": 2.7721,
"step": 126000
},
{
"epoch": 0.42,
"learning_rate": 2.9118990293062737e-05,
"loss": 2.758,
"step": 126100
},
{
"epoch": 0.42,
"learning_rate": 2.9102431205269764e-05,
"loss": 2.758,
"step": 126200
},
{
"epoch": 0.42,
"learning_rate": 2.908587211747679e-05,
"loss": 2.7522,
"step": 126300
},
{
"epoch": 0.42,
"learning_rate": 2.906931302968382e-05,
"loss": 2.7548,
"step": 126400
},
{
"epoch": 0.42,
"learning_rate": 2.9052753941890848e-05,
"loss": 2.7558,
"step": 126500
},
{
"epoch": 0.42,
"learning_rate": 2.903619485409788e-05,
"loss": 2.7564,
"step": 126600
},
{
"epoch": 0.42,
"learning_rate": 2.901963576630491e-05,
"loss": 2.756,
"step": 126700
},
{
"epoch": 0.42,
"learning_rate": 2.9003076678511935e-05,
"loss": 2.7608,
"step": 126800
},
{
"epoch": 0.42,
"learning_rate": 2.8986517590718966e-05,
"loss": 2.7604,
"step": 126900
},
{
"epoch": 0.42,
"learning_rate": 2.8969958502925993e-05,
"loss": 2.7504,
"step": 127000
},
{
"epoch": 0.42,
"learning_rate": 2.895339941513302e-05,
"loss": 2.7661,
"step": 127100
},
{
"epoch": 0.42,
"learning_rate": 2.8936840327340046e-05,
"loss": 2.747,
"step": 127200
},
{
"epoch": 0.42,
"learning_rate": 2.8920281239547077e-05,
"loss": 2.7703,
"step": 127300
},
{
"epoch": 0.42,
"learning_rate": 2.8903722151754104e-05,
"loss": 2.7673,
"step": 127400
},
{
"epoch": 0.42,
"learning_rate": 2.888716306396113e-05,
"loss": 2.7584,
"step": 127500
},
{
"epoch": 0.42,
"learning_rate": 2.8870603976168164e-05,
"loss": 2.7669,
"step": 127600
},
{
"epoch": 0.42,
"learning_rate": 2.885404488837519e-05,
"loss": 2.7593,
"step": 127700
},
{
"epoch": 0.42,
"learning_rate": 2.883748580058222e-05,
"loss": 2.7525,
"step": 127800
},
{
"epoch": 0.42,
"learning_rate": 2.882092671278925e-05,
"loss": 2.7673,
"step": 127900
},
{
"epoch": 0.42,
"learning_rate": 2.8804367624996275e-05,
"loss": 2.7647,
"step": 128000
},
{
"epoch": 0.42,
"learning_rate": 2.8787808537203302e-05,
"loss": 2.7812,
"step": 128100
},
{
"epoch": 0.42,
"learning_rate": 2.8771249449410332e-05,
"loss": 2.7534,
"step": 128200
},
{
"epoch": 0.42,
"learning_rate": 2.875469036161736e-05,
"loss": 2.7551,
"step": 128300
},
{
"epoch": 0.43,
"learning_rate": 2.8738131273824386e-05,
"loss": 2.7509,
"step": 128400
},
{
"epoch": 0.43,
"learning_rate": 2.8721572186031413e-05,
"loss": 2.7596,
"step": 128500
},
{
"epoch": 0.43,
"learning_rate": 2.8705013098238447e-05,
"loss": 2.7566,
"step": 128600
},
{
"epoch": 0.43,
"learning_rate": 2.8688454010445477e-05,
"loss": 2.7621,
"step": 128700
},
{
"epoch": 0.43,
"learning_rate": 2.8671894922652504e-05,
"loss": 2.7649,
"step": 128800
},
{
"epoch": 0.43,
"learning_rate": 2.865533583485953e-05,
"loss": 2.754,
"step": 128900
},
{
"epoch": 0.43,
"learning_rate": 2.8638776747066558e-05,
"loss": 2.7649,
"step": 129000
},
{
"epoch": 0.43,
"learning_rate": 2.8622217659273588e-05,
"loss": 2.7483,
"step": 129100
},
{
"epoch": 0.43,
"learning_rate": 2.8605658571480615e-05,
"loss": 2.7457,
"step": 129200
},
{
"epoch": 0.43,
"learning_rate": 2.8589099483687642e-05,
"loss": 2.7721,
"step": 129300
},
{
"epoch": 0.43,
"learning_rate": 2.857254039589467e-05,
"loss": 2.7535,
"step": 129400
},
{
"epoch": 0.43,
"learning_rate": 2.85559813081017e-05,
"loss": 2.7428,
"step": 129500
},
{
"epoch": 0.43,
"learning_rate": 2.8539422220308733e-05,
"loss": 2.7509,
"step": 129600
},
{
"epoch": 0.43,
"learning_rate": 2.852286313251576e-05,
"loss": 2.7598,
"step": 129700
},
{
"epoch": 0.43,
"learning_rate": 2.8506304044722787e-05,
"loss": 2.76,
"step": 129800
},
{
"epoch": 0.43,
"learning_rate": 2.8489744956929814e-05,
"loss": 2.7483,
"step": 129900
},
{
"epoch": 0.43,
"learning_rate": 2.8473185869136844e-05,
"loss": 2.7416,
"step": 130000
},
{
"epoch": 0.43,
"learning_rate": 2.845662678134387e-05,
"loss": 2.7611,
"step": 130100
},
{
"epoch": 0.43,
"learning_rate": 2.8440067693550898e-05,
"loss": 2.7499,
"step": 130200
},
{
"epoch": 0.43,
"learning_rate": 2.8423508605757925e-05,
"loss": 2.7544,
"step": 130300
},
{
"epoch": 0.43,
"learning_rate": 2.8406949517964955e-05,
"loss": 2.751,
"step": 130400
},
{
"epoch": 0.43,
"learning_rate": 2.8390390430171982e-05,
"loss": 2.7685,
"step": 130500
},
{
"epoch": 0.43,
"learning_rate": 2.8373831342379016e-05,
"loss": 2.7527,
"step": 130600
},
{
"epoch": 0.43,
"learning_rate": 2.8357272254586043e-05,
"loss": 2.738,
"step": 130700
},
{
"epoch": 0.43,
"learning_rate": 2.834071316679307e-05,
"loss": 2.7563,
"step": 130800
},
{
"epoch": 0.43,
"learning_rate": 2.83241540790001e-05,
"loss": 2.7542,
"step": 130900
},
{
"epoch": 0.43,
"learning_rate": 2.8307594991207127e-05,
"loss": 2.7592,
"step": 131000
},
{
"epoch": 0.43,
"learning_rate": 2.8291035903414154e-05,
"loss": 2.7531,
"step": 131100
},
{
"epoch": 0.43,
"learning_rate": 2.827447681562118e-05,
"loss": 2.7603,
"step": 131200
},
{
"epoch": 0.43,
"learning_rate": 2.825791772782821e-05,
"loss": 2.758,
"step": 131300
},
{
"epoch": 0.44,
"learning_rate": 2.8241358640035238e-05,
"loss": 2.7593,
"step": 131400
},
{
"epoch": 0.44,
"learning_rate": 2.8224799552242265e-05,
"loss": 2.7529,
"step": 131500
},
{
"epoch": 0.44,
"learning_rate": 2.8208240464449298e-05,
"loss": 2.7574,
"step": 131600
},
{
"epoch": 0.44,
"learning_rate": 2.8191681376656325e-05,
"loss": 2.764,
"step": 131700
},
{
"epoch": 0.44,
"learning_rate": 2.8175122288863352e-05,
"loss": 2.7549,
"step": 131800
},
{
"epoch": 0.44,
"learning_rate": 2.8158563201070382e-05,
"loss": 2.7575,
"step": 131900
},
{
"epoch": 0.44,
"learning_rate": 2.814200411327741e-05,
"loss": 2.7618,
"step": 132000
},
{
"epoch": 0.44,
"learning_rate": 2.8125445025484436e-05,
"loss": 2.7562,
"step": 132100
},
{
"epoch": 0.44,
"learning_rate": 2.8108885937691467e-05,
"loss": 2.7533,
"step": 132200
},
{
"epoch": 0.44,
"learning_rate": 2.8092326849898493e-05,
"loss": 2.7619,
"step": 132300
},
{
"epoch": 0.44,
"learning_rate": 2.807576776210552e-05,
"loss": 2.7684,
"step": 132400
},
{
"epoch": 0.44,
"learning_rate": 2.8059208674312547e-05,
"loss": 2.7523,
"step": 132500
},
{
"epoch": 0.44,
"learning_rate": 2.804264958651958e-05,
"loss": 2.7546,
"step": 132600
},
{
"epoch": 0.44,
"learning_rate": 2.8026090498726608e-05,
"loss": 2.7422,
"step": 132700
},
{
"epoch": 0.44,
"learning_rate": 2.8009531410933638e-05,
"loss": 2.739,
"step": 132800
},
{
"epoch": 0.44,
"learning_rate": 2.7992972323140665e-05,
"loss": 2.7486,
"step": 132900
},
{
"epoch": 0.44,
"learning_rate": 2.7976413235347692e-05,
"loss": 2.7702,
"step": 133000
},
{
"epoch": 0.44,
"learning_rate": 2.795985414755472e-05,
"loss": 2.757,
"step": 133100
},
{
"epoch": 0.44,
"learning_rate": 2.794329505976175e-05,
"loss": 2.753,
"step": 133200
},
{
"epoch": 0.44,
"learning_rate": 2.7926735971968776e-05,
"loss": 2.7609,
"step": 133300
},
{
"epoch": 0.44,
"learning_rate": 2.7910176884175803e-05,
"loss": 2.7548,
"step": 133400
},
{
"epoch": 0.44,
"learning_rate": 2.789361779638283e-05,
"loss": 2.7507,
"step": 133500
},
{
"epoch": 0.44,
"learning_rate": 2.7877058708589864e-05,
"loss": 2.7667,
"step": 133600
},
{
"epoch": 0.44,
"learning_rate": 2.7860499620796894e-05,
"loss": 2.7481,
"step": 133700
},
{
"epoch": 0.44,
"learning_rate": 2.784394053300392e-05,
"loss": 2.7568,
"step": 133800
},
{
"epoch": 0.44,
"learning_rate": 2.7827381445210948e-05,
"loss": 2.7523,
"step": 133900
},
{
"epoch": 0.44,
"learning_rate": 2.7810822357417975e-05,
"loss": 2.7501,
"step": 134000
},
{
"epoch": 0.44,
"learning_rate": 2.7794263269625005e-05,
"loss": 2.7641,
"step": 134100
},
{
"epoch": 0.44,
"learning_rate": 2.7777704181832032e-05,
"loss": 2.7577,
"step": 134200
},
{
"epoch": 0.44,
"learning_rate": 2.776114509403906e-05,
"loss": 2.7708,
"step": 134300
},
{
"epoch": 0.45,
"learning_rate": 2.7744586006246086e-05,
"loss": 2.7554,
"step": 134400
},
{
"epoch": 0.45,
"learning_rate": 2.7728026918453116e-05,
"loss": 2.7637,
"step": 134500
},
{
"epoch": 0.45,
"learning_rate": 2.771146783066015e-05,
"loss": 2.7588,
"step": 134600
},
{
"epoch": 0.45,
"learning_rate": 2.7694908742867177e-05,
"loss": 2.7354,
"step": 134700
},
{
"epoch": 0.45,
"learning_rate": 2.7678349655074203e-05,
"loss": 2.7412,
"step": 134800
},
{
"epoch": 0.45,
"learning_rate": 2.766179056728123e-05,
"loss": 2.7553,
"step": 134900
},
{
"epoch": 0.45,
"learning_rate": 2.764523147948826e-05,
"loss": 2.7737,
"step": 135000
},
{
"epoch": 0.45,
"learning_rate": 2.7628672391695288e-05,
"loss": 2.7591,
"step": 135100
},
{
"epoch": 0.45,
"learning_rate": 2.7612113303902315e-05,
"loss": 2.7673,
"step": 135200
},
{
"epoch": 0.45,
"learning_rate": 2.759555421610934e-05,
"loss": 2.7629,
"step": 135300
},
{
"epoch": 0.45,
"learning_rate": 2.7578995128316372e-05,
"loss": 2.7573,
"step": 135400
},
{
"epoch": 0.45,
"learning_rate": 2.75624360405234e-05,
"loss": 2.7672,
"step": 135500
},
{
"epoch": 0.45,
"learning_rate": 2.7545876952730432e-05,
"loss": 2.7616,
"step": 135600
},
{
"epoch": 0.45,
"learning_rate": 2.752931786493746e-05,
"loss": 2.7643,
"step": 135700
},
{
"epoch": 0.45,
"learning_rate": 2.7512758777144486e-05,
"loss": 2.7573,
"step": 135800
},
{
"epoch": 0.45,
"learning_rate": 2.7496199689351516e-05,
"loss": 2.7671,
"step": 135900
},
{
"epoch": 0.45,
"learning_rate": 2.7479640601558543e-05,
"loss": 2.7442,
"step": 136000
},
{
"epoch": 0.45,
"learning_rate": 2.746308151376557e-05,
"loss": 2.7525,
"step": 136100
},
{
"epoch": 0.45,
"learning_rate": 2.7446522425972597e-05,
"loss": 2.7564,
"step": 136200
},
{
"epoch": 0.45,
"learning_rate": 2.7429963338179627e-05,
"loss": 2.7669,
"step": 136300
},
{
"epoch": 0.45,
"learning_rate": 2.7413404250386654e-05,
"loss": 2.7549,
"step": 136400
},
{
"epoch": 0.45,
"learning_rate": 2.739684516259368e-05,
"loss": 2.7513,
"step": 136500
},
{
"epoch": 0.45,
"learning_rate": 2.7380286074800715e-05,
"loss": 2.7709,
"step": 136600
},
{
"epoch": 0.45,
"learning_rate": 2.7363726987007742e-05,
"loss": 2.76,
"step": 136700
},
{
"epoch": 0.45,
"learning_rate": 2.7347167899214772e-05,
"loss": 2.7617,
"step": 136800
},
{
"epoch": 0.45,
"learning_rate": 2.73306088114218e-05,
"loss": 2.7655,
"step": 136900
},
{
"epoch": 0.45,
"learning_rate": 2.7314049723628826e-05,
"loss": 2.758,
"step": 137000
},
{
"epoch": 0.45,
"learning_rate": 2.7297490635835853e-05,
"loss": 2.7473,
"step": 137100
},
{
"epoch": 0.45,
"learning_rate": 2.7280931548042883e-05,
"loss": 2.7639,
"step": 137200
},
{
"epoch": 0.45,
"learning_rate": 2.726437246024991e-05,
"loss": 2.7329,
"step": 137300
},
{
"epoch": 0.46,
"learning_rate": 2.7247813372456937e-05,
"loss": 2.738,
"step": 137400
},
{
"epoch": 0.46,
"learning_rate": 2.7231254284663964e-05,
"loss": 2.7539,
"step": 137500
},
{
"epoch": 0.46,
"learning_rate": 2.7214695196870998e-05,
"loss": 2.7639,
"step": 137600
},
{
"epoch": 0.46,
"learning_rate": 2.7198136109078025e-05,
"loss": 2.7594,
"step": 137700
},
{
"epoch": 0.46,
"learning_rate": 2.7181577021285055e-05,
"loss": 2.7552,
"step": 137800
},
{
"epoch": 0.46,
"learning_rate": 2.7165017933492082e-05,
"loss": 2.7571,
"step": 137900
},
{
"epoch": 0.46,
"learning_rate": 2.714845884569911e-05,
"loss": 2.7518,
"step": 138000
},
{
"epoch": 0.46,
"learning_rate": 2.713189975790614e-05,
"loss": 2.7523,
"step": 138100
},
{
"epoch": 0.46,
"learning_rate": 2.7115340670113166e-05,
"loss": 2.7529,
"step": 138200
},
{
"epoch": 0.46,
"learning_rate": 2.7098781582320193e-05,
"loss": 2.7553,
"step": 138300
},
{
"epoch": 0.46,
"learning_rate": 2.708222249452722e-05,
"loss": 2.7652,
"step": 138400
},
{
"epoch": 0.46,
"learning_rate": 2.706566340673425e-05,
"loss": 2.7662,
"step": 138500
},
{
"epoch": 0.46,
"learning_rate": 2.704910431894128e-05,
"loss": 2.7495,
"step": 138600
},
{
"epoch": 0.46,
"learning_rate": 2.703254523114831e-05,
"loss": 2.7556,
"step": 138700
},
{
"epoch": 0.46,
"learning_rate": 2.7015986143355338e-05,
"loss": 2.7361,
"step": 138800
},
{
"epoch": 0.46,
"learning_rate": 2.6999427055562364e-05,
"loss": 2.7461,
"step": 138900
},
{
"epoch": 0.46,
"learning_rate": 2.698286796776939e-05,
"loss": 2.7653,
"step": 139000
},
{
"epoch": 0.46,
"learning_rate": 2.696630887997642e-05,
"loss": 2.7568,
"step": 139100
},
{
"epoch": 0.46,
"learning_rate": 2.694974979218345e-05,
"loss": 2.7588,
"step": 139200
},
{
"epoch": 0.46,
"learning_rate": 2.6933190704390475e-05,
"loss": 2.758,
"step": 139300
},
{
"epoch": 0.46,
"learning_rate": 2.6916631616597506e-05,
"loss": 2.7678,
"step": 139400
},
{
"epoch": 0.46,
"learning_rate": 2.6900072528804533e-05,
"loss": 2.7525,
"step": 139500
},
{
"epoch": 0.46,
"learning_rate": 2.6883513441011566e-05,
"loss": 2.7535,
"step": 139600
},
{
"epoch": 0.46,
"learning_rate": 2.6866954353218593e-05,
"loss": 2.7437,
"step": 139700
},
{
"epoch": 0.46,
"learning_rate": 2.685039526542562e-05,
"loss": 2.7405,
"step": 139800
},
{
"epoch": 0.46,
"learning_rate": 2.6833836177632647e-05,
"loss": 2.7649,
"step": 139900
},
{
"epoch": 0.46,
"learning_rate": 2.6817277089839677e-05,
"loss": 2.7471,
"step": 140000
},
{
"epoch": 0.46,
"learning_rate": 2.6800718002046704e-05,
"loss": 2.7658,
"step": 140100
},
{
"epoch": 0.46,
"learning_rate": 2.678415891425373e-05,
"loss": 2.7554,
"step": 140200
},
{
"epoch": 0.46,
"learning_rate": 2.6767599826460758e-05,
"loss": 2.7472,
"step": 140300
},
{
"epoch": 0.46,
"learning_rate": 2.675104073866779e-05,
"loss": 2.7463,
"step": 140400
},
{
"epoch": 0.47,
"learning_rate": 2.6734481650874815e-05,
"loss": 2.7655,
"step": 140500
},
{
"epoch": 0.47,
"learning_rate": 2.671792256308185e-05,
"loss": 2.754,
"step": 140600
},
{
"epoch": 0.47,
"learning_rate": 2.6701363475288876e-05,
"loss": 2.7632,
"step": 140700
},
{
"epoch": 0.47,
"learning_rate": 2.6684804387495903e-05,
"loss": 2.7481,
"step": 140800
},
{
"epoch": 0.47,
"learning_rate": 2.6668245299702933e-05,
"loss": 2.7548,
"step": 140900
},
{
"epoch": 0.47,
"learning_rate": 2.665168621190996e-05,
"loss": 2.7708,
"step": 141000
},
{
"epoch": 0.47,
"learning_rate": 2.6635127124116987e-05,
"loss": 2.7772,
"step": 141100
},
{
"epoch": 0.47,
"learning_rate": 2.6618568036324014e-05,
"loss": 2.7578,
"step": 141200
},
{
"epoch": 0.47,
"learning_rate": 2.6602008948531044e-05,
"loss": 2.7649,
"step": 141300
},
{
"epoch": 0.47,
"learning_rate": 2.658544986073807e-05,
"loss": 2.768,
"step": 141400
},
{
"epoch": 0.47,
"learning_rate": 2.6568890772945098e-05,
"loss": 2.7488,
"step": 141500
},
{
"epoch": 0.47,
"learning_rate": 2.6552331685152132e-05,
"loss": 2.7581,
"step": 141600
},
{
"epoch": 0.47,
"learning_rate": 2.653577259735916e-05,
"loss": 2.7477,
"step": 141700
},
{
"epoch": 0.47,
"learning_rate": 2.651921350956619e-05,
"loss": 2.7524,
"step": 141800
},
{
"epoch": 0.47,
"learning_rate": 2.6502654421773216e-05,
"loss": 2.7611,
"step": 141900
},
{
"epoch": 0.47,
"learning_rate": 2.6486095333980243e-05,
"loss": 2.7412,
"step": 142000
},
{
"epoch": 0.47,
"learning_rate": 2.646953624618727e-05,
"loss": 2.7678,
"step": 142100
},
{
"epoch": 0.47,
"learning_rate": 2.64529771583943e-05,
"loss": 2.775,
"step": 142200
},
{
"epoch": 0.47,
"learning_rate": 2.6436418070601327e-05,
"loss": 2.7517,
"step": 142300
},
{
"epoch": 0.47,
"learning_rate": 2.6419858982808354e-05,
"loss": 2.7551,
"step": 142400
},
{
"epoch": 0.47,
"learning_rate": 2.640329989501538e-05,
"loss": 2.7571,
"step": 142500
},
{
"epoch": 0.47,
"learning_rate": 2.6386740807222414e-05,
"loss": 2.7454,
"step": 142600
},
{
"epoch": 0.47,
"learning_rate": 2.6370181719429445e-05,
"loss": 2.7501,
"step": 142700
},
{
"epoch": 0.47,
"learning_rate": 2.635362263163647e-05,
"loss": 2.7465,
"step": 142800
},
{
"epoch": 0.47,
"learning_rate": 2.63370635438435e-05,
"loss": 2.7412,
"step": 142900
},
{
"epoch": 0.47,
"learning_rate": 2.6320504456050525e-05,
"loss": 2.745,
"step": 143000
},
{
"epoch": 0.47,
"learning_rate": 2.6303945368257556e-05,
"loss": 2.7597,
"step": 143100
},
{
"epoch": 0.47,
"learning_rate": 2.6287386280464583e-05,
"loss": 2.746,
"step": 143200
},
{
"epoch": 0.47,
"learning_rate": 2.627082719267161e-05,
"loss": 2.7431,
"step": 143300
},
{
"epoch": 0.47,
"learning_rate": 2.6254268104878636e-05,
"loss": 2.7406,
"step": 143400
},
{
"epoch": 0.48,
"learning_rate": 2.6237709017085667e-05,
"loss": 2.7644,
"step": 143500
},
{
"epoch": 0.48,
"learning_rate": 2.62211499292927e-05,
"loss": 2.7502,
"step": 143600
},
{
"epoch": 0.48,
"learning_rate": 2.6204590841499727e-05,
"loss": 2.7523,
"step": 143700
},
{
"epoch": 0.48,
"learning_rate": 2.6188031753706754e-05,
"loss": 2.7476,
"step": 143800
},
{
"epoch": 0.48,
"learning_rate": 2.617147266591378e-05,
"loss": 2.7528,
"step": 143900
},
{
"epoch": 0.48,
"learning_rate": 2.615491357812081e-05,
"loss": 2.7566,
"step": 144000
},
{
"epoch": 0.48,
"learning_rate": 2.613835449032784e-05,
"loss": 2.756,
"step": 144100
},
{
"epoch": 0.48,
"learning_rate": 2.6121795402534865e-05,
"loss": 2.7496,
"step": 144200
},
{
"epoch": 0.48,
"learning_rate": 2.6105236314741892e-05,
"loss": 2.7507,
"step": 144300
},
{
"epoch": 0.48,
"learning_rate": 2.6088677226948922e-05,
"loss": 2.7564,
"step": 144400
},
{
"epoch": 0.48,
"learning_rate": 2.607211813915595e-05,
"loss": 2.76,
"step": 144500
},
{
"epoch": 0.48,
"learning_rate": 2.6055559051362983e-05,
"loss": 2.7514,
"step": 144600
},
{
"epoch": 0.48,
"learning_rate": 2.603899996357001e-05,
"loss": 2.7562,
"step": 144700
},
{
"epoch": 0.48,
"learning_rate": 2.6022440875777037e-05,
"loss": 2.7583,
"step": 144800
},
{
"epoch": 0.48,
"learning_rate": 2.6005881787984064e-05,
"loss": 2.7492,
"step": 144900
},
{
"epoch": 0.48,
"learning_rate": 2.5989322700191094e-05,
"loss": 2.7662,
"step": 145000
},
{
"epoch": 0.48,
"learning_rate": 2.597276361239812e-05,
"loss": 2.7653,
"step": 145100
},
{
"epoch": 0.48,
"learning_rate": 2.5956204524605148e-05,
"loss": 2.7465,
"step": 145200
},
{
"epoch": 0.48,
"learning_rate": 2.5939645436812178e-05,
"loss": 2.7628,
"step": 145300
},
{
"epoch": 0.48,
"learning_rate": 2.5923086349019205e-05,
"loss": 2.7586,
"step": 145400
},
{
"epoch": 0.48,
"learning_rate": 2.5906527261226232e-05,
"loss": 2.7376,
"step": 145500
},
{
"epoch": 0.48,
"learning_rate": 2.5889968173433266e-05,
"loss": 2.7586,
"step": 145600
},
{
"epoch": 0.48,
"learning_rate": 2.5873409085640293e-05,
"loss": 2.7498,
"step": 145700
},
{
"epoch": 0.48,
"learning_rate": 2.585684999784732e-05,
"loss": 2.7767,
"step": 145800
},
{
"epoch": 0.48,
"learning_rate": 2.584029091005435e-05,
"loss": 2.7533,
"step": 145900
},
{
"epoch": 0.48,
"learning_rate": 2.5823731822261377e-05,
"loss": 2.7485,
"step": 146000
},
{
"epoch": 0.48,
"learning_rate": 2.5807172734468404e-05,
"loss": 2.769,
"step": 146100
},
{
"epoch": 0.48,
"learning_rate": 2.579061364667543e-05,
"loss": 2.7463,
"step": 146200
},
{
"epoch": 0.48,
"learning_rate": 2.577405455888246e-05,
"loss": 2.7542,
"step": 146300
},
{
"epoch": 0.48,
"learning_rate": 2.5757495471089488e-05,
"loss": 2.7532,
"step": 146400
},
{
"epoch": 0.49,
"learning_rate": 2.5740936383296515e-05,
"loss": 2.7563,
"step": 146500
},
{
"epoch": 0.49,
"learning_rate": 2.572437729550355e-05,
"loss": 2.7542,
"step": 146600
},
{
"epoch": 0.49,
"learning_rate": 2.5707818207710575e-05,
"loss": 2.7628,
"step": 146700
},
{
"epoch": 0.49,
"learning_rate": 2.5691259119917606e-05,
"loss": 2.7446,
"step": 146800
},
{
"epoch": 0.49,
"learning_rate": 2.5674700032124633e-05,
"loss": 2.749,
"step": 146900
},
{
"epoch": 0.49,
"learning_rate": 2.565814094433166e-05,
"loss": 2.7578,
"step": 147000
},
{
"epoch": 0.49,
"learning_rate": 2.5641581856538686e-05,
"loss": 2.7656,
"step": 147100
},
{
"epoch": 0.49,
"learning_rate": 2.5625022768745717e-05,
"loss": 2.7646,
"step": 147200
},
{
"epoch": 0.49,
"learning_rate": 2.5608463680952744e-05,
"loss": 2.731,
"step": 147300
},
{
"epoch": 0.49,
"learning_rate": 2.559190459315977e-05,
"loss": 2.7534,
"step": 147400
},
{
"epoch": 0.49,
"learning_rate": 2.5575345505366797e-05,
"loss": 2.7461,
"step": 147500
},
{
"epoch": 0.49,
"learning_rate": 2.555878641757383e-05,
"loss": 2.7473,
"step": 147600
},
{
"epoch": 0.49,
"learning_rate": 2.554222732978086e-05,
"loss": 2.7605,
"step": 147700
},
{
"epoch": 0.49,
"learning_rate": 2.552566824198789e-05,
"loss": 2.7654,
"step": 147800
},
{
"epoch": 0.49,
"learning_rate": 2.5509109154194915e-05,
"loss": 2.7463,
"step": 147900
},
{
"epoch": 0.49,
"learning_rate": 2.5492550066401942e-05,
"loss": 2.7581,
"step": 148000
},
{
"epoch": 0.49,
"learning_rate": 2.5475990978608972e-05,
"loss": 2.7486,
"step": 148100
},
{
"epoch": 0.49,
"learning_rate": 2.5459431890816e-05,
"loss": 2.7464,
"step": 148200
},
{
"epoch": 0.49,
"learning_rate": 2.5442872803023026e-05,
"loss": 2.7435,
"step": 148300
},
{
"epoch": 0.49,
"learning_rate": 2.5426313715230053e-05,
"loss": 2.7494,
"step": 148400
},
{
"epoch": 0.49,
"learning_rate": 2.5409754627437083e-05,
"loss": 2.7672,
"step": 148500
},
{
"epoch": 0.49,
"learning_rate": 2.5393195539644117e-05,
"loss": 2.7532,
"step": 148600
},
{
"epoch": 0.49,
"learning_rate": 2.5376636451851144e-05,
"loss": 2.7632,
"step": 148700
},
{
"epoch": 0.49,
"learning_rate": 2.536007736405817e-05,
"loss": 2.7478,
"step": 148800
},
{
"epoch": 0.49,
"learning_rate": 2.5343518276265198e-05,
"loss": 2.732,
"step": 148900
},
{
"epoch": 0.49,
"learning_rate": 2.5326959188472228e-05,
"loss": 2.7473,
"step": 149000
},
{
"epoch": 0.49,
"learning_rate": 2.5310400100679255e-05,
"loss": 2.7461,
"step": 149100
},
{
"epoch": 0.49,
"learning_rate": 2.5293841012886282e-05,
"loss": 2.7607,
"step": 149200
},
{
"epoch": 0.49,
"learning_rate": 2.527728192509331e-05,
"loss": 2.7626,
"step": 149300
},
{
"epoch": 0.49,
"learning_rate": 2.526072283730034e-05,
"loss": 2.7515,
"step": 149400
},
{
"epoch": 0.5,
"learning_rate": 2.5244163749507366e-05,
"loss": 2.7182,
"step": 149500
},
{
"epoch": 0.5,
"learning_rate": 2.52276046617144e-05,
"loss": 2.7523,
"step": 149600
},
{
"epoch": 0.5,
"learning_rate": 2.5211045573921427e-05,
"loss": 2.7541,
"step": 149700
},
{
"epoch": 0.5,
"learning_rate": 2.5194486486128454e-05,
"loss": 2.7619,
"step": 149800
},
{
"epoch": 0.5,
"learning_rate": 2.5177927398335484e-05,
"loss": 2.7523,
"step": 149900
},
{
"epoch": 0.5,
"learning_rate": 2.516136831054251e-05,
"loss": 2.7475,
"step": 150000
},
{
"epoch": 0.5,
"learning_rate": 2.5144809222749538e-05,
"loss": 2.7466,
"step": 150100
},
{
"epoch": 0.5,
"learning_rate": 2.5128250134956565e-05,
"loss": 2.7518,
"step": 150200
},
{
"epoch": 0.5,
"learning_rate": 2.5111691047163595e-05,
"loss": 2.7484,
"step": 150300
},
{
"epoch": 0.5,
"learning_rate": 2.5095131959370622e-05,
"loss": 2.745,
"step": 150400
},
{
"epoch": 0.5,
"learning_rate": 2.507857287157765e-05,
"loss": 2.7493,
"step": 150500
},
{
"epoch": 0.5,
"learning_rate": 2.5062013783784682e-05,
"loss": 2.75,
"step": 150600
},
{
"epoch": 0.5,
"learning_rate": 2.504545469599171e-05,
"loss": 2.7496,
"step": 150700
},
{
"epoch": 0.5,
"learning_rate": 2.502889560819874e-05,
"loss": 2.7602,
"step": 150800
},
{
"epoch": 0.5,
"learning_rate": 2.5012336520405767e-05,
"loss": 2.755,
"step": 150900
},
{
"epoch": 0.5,
"learning_rate": 2.4995777432612794e-05,
"loss": 2.7668,
"step": 151000
},
{
"epoch": 0.5,
"learning_rate": 2.497921834481982e-05,
"loss": 2.7389,
"step": 151100
},
{
"epoch": 0.5,
"learning_rate": 2.496265925702685e-05,
"loss": 2.739,
"step": 151200
},
{
"epoch": 0.5,
"learning_rate": 2.494610016923388e-05,
"loss": 2.7475,
"step": 151300
},
{
"epoch": 0.5,
"learning_rate": 2.4929541081440908e-05,
"loss": 2.7705,
"step": 151400
},
{
"epoch": 0.5,
"learning_rate": 2.4912981993647935e-05,
"loss": 2.7449,
"step": 151500
},
{
"epoch": 0.5,
"learning_rate": 2.4896422905854962e-05,
"loss": 2.7536,
"step": 151600
},
{
"epoch": 0.5,
"learning_rate": 2.4879863818061992e-05,
"loss": 2.7573,
"step": 151700
},
{
"epoch": 0.5,
"learning_rate": 2.4863304730269022e-05,
"loss": 2.7434,
"step": 151800
},
{
"epoch": 0.5,
"learning_rate": 2.484674564247605e-05,
"loss": 2.7633,
"step": 151900
},
{
"epoch": 0.5,
"learning_rate": 2.4830186554683076e-05,
"loss": 2.7484,
"step": 152000
},
{
"epoch": 0.5,
"learning_rate": 2.4813627466890103e-05,
"loss": 2.7682,
"step": 152100
},
{
"epoch": 0.5,
"learning_rate": 2.4797068379097133e-05,
"loss": 2.7473,
"step": 152200
},
{
"epoch": 0.5,
"learning_rate": 2.4780509291304164e-05,
"loss": 2.7521,
"step": 152300
},
{
"epoch": 0.5,
"learning_rate": 2.476395020351119e-05,
"loss": 2.756,
"step": 152400
},
{
"epoch": 0.51,
"learning_rate": 2.4747391115718218e-05,
"loss": 2.7471,
"step": 152500
},
{
"epoch": 0.51,
"learning_rate": 2.4730832027925248e-05,
"loss": 2.7623,
"step": 152600
},
{
"epoch": 0.51,
"learning_rate": 2.4714272940132275e-05,
"loss": 2.752,
"step": 152700
},
{
"epoch": 0.51,
"learning_rate": 2.4697713852339305e-05,
"loss": 2.7477,
"step": 152800
},
{
"epoch": 0.51,
"learning_rate": 2.4681154764546332e-05,
"loss": 2.7503,
"step": 152900
},
{
"epoch": 0.51,
"learning_rate": 2.466459567675336e-05,
"loss": 2.7422,
"step": 153000
},
{
"epoch": 0.51,
"learning_rate": 2.464803658896039e-05,
"loss": 2.7497,
"step": 153100
},
{
"epoch": 0.51,
"learning_rate": 2.4631477501167416e-05,
"loss": 2.7603,
"step": 153200
},
{
"epoch": 0.51,
"learning_rate": 2.4614918413374446e-05,
"loss": 2.7386,
"step": 153300
},
{
"epoch": 0.51,
"learning_rate": 2.4598359325581473e-05,
"loss": 2.7537,
"step": 153400
},
{
"epoch": 0.51,
"learning_rate": 2.4581800237788504e-05,
"loss": 2.7566,
"step": 153500
},
{
"epoch": 0.51,
"learning_rate": 2.456524114999553e-05,
"loss": 2.7501,
"step": 153600
},
{
"epoch": 0.51,
"learning_rate": 2.4548682062202557e-05,
"loss": 2.7485,
"step": 153700
},
{
"epoch": 0.51,
"learning_rate": 2.4532122974409588e-05,
"loss": 2.7623,
"step": 153800
},
{
"epoch": 0.51,
"learning_rate": 2.4515563886616615e-05,
"loss": 2.7502,
"step": 153900
},
{
"epoch": 0.51,
"learning_rate": 2.4499004798823645e-05,
"loss": 2.7534,
"step": 154000
},
{
"epoch": 0.51,
"learning_rate": 2.4482445711030672e-05,
"loss": 2.7535,
"step": 154100
},
{
"epoch": 0.51,
"learning_rate": 2.44658866232377e-05,
"loss": 2.7581,
"step": 154200
},
{
"epoch": 0.51,
"learning_rate": 2.444932753544473e-05,
"loss": 2.7491,
"step": 154300
},
{
"epoch": 0.51,
"learning_rate": 2.443276844765176e-05,
"loss": 2.759,
"step": 154400
},
{
"epoch": 0.51,
"learning_rate": 2.4416209359858786e-05,
"loss": 2.7406,
"step": 154500
},
{
"epoch": 0.51,
"learning_rate": 2.4399650272065813e-05,
"loss": 2.7256,
"step": 154600
},
{
"epoch": 0.51,
"learning_rate": 2.438309118427284e-05,
"loss": 2.7426,
"step": 154700
},
{
"epoch": 0.51,
"learning_rate": 2.436653209647987e-05,
"loss": 2.7443,
"step": 154800
},
{
"epoch": 0.51,
"learning_rate": 2.43499730086869e-05,
"loss": 2.7427,
"step": 154900
},
{
"epoch": 0.51,
"learning_rate": 2.4333413920893928e-05,
"loss": 2.7406,
"step": 155000
},
{
"epoch": 0.51,
"learning_rate": 2.4316854833100954e-05,
"loss": 2.7413,
"step": 155100
},
{
"epoch": 0.51,
"learning_rate": 2.430029574530798e-05,
"loss": 2.7546,
"step": 155200
},
{
"epoch": 0.51,
"learning_rate": 2.4283736657515012e-05,
"loss": 2.7549,
"step": 155300
},
{
"epoch": 0.51,
"learning_rate": 2.4267177569722042e-05,
"loss": 2.7449,
"step": 155400
},
{
"epoch": 0.51,
"learning_rate": 2.425061848192907e-05,
"loss": 2.7493,
"step": 155500
},
{
"epoch": 0.52,
"learning_rate": 2.4234059394136096e-05,
"loss": 2.7444,
"step": 155600
},
{
"epoch": 0.52,
"learning_rate": 2.4217500306343123e-05,
"loss": 2.7492,
"step": 155700
},
{
"epoch": 0.52,
"learning_rate": 2.4200941218550156e-05,
"loss": 2.7444,
"step": 155800
},
{
"epoch": 0.52,
"learning_rate": 2.4184382130757183e-05,
"loss": 2.7415,
"step": 155900
},
{
"epoch": 0.52,
"learning_rate": 2.416782304296421e-05,
"loss": 2.7471,
"step": 156000
},
{
"epoch": 0.52,
"learning_rate": 2.4151263955171237e-05,
"loss": 2.7594,
"step": 156100
},
{
"epoch": 0.52,
"learning_rate": 2.4134704867378267e-05,
"loss": 2.7685,
"step": 156200
},
{
"epoch": 0.52,
"learning_rate": 2.4118145779585298e-05,
"loss": 2.7384,
"step": 156300
},
{
"epoch": 0.52,
"learning_rate": 2.4101586691792325e-05,
"loss": 2.7496,
"step": 156400
},
{
"epoch": 0.52,
"learning_rate": 2.408502760399935e-05,
"loss": 2.7602,
"step": 156500
},
{
"epoch": 0.52,
"learning_rate": 2.406846851620638e-05,
"loss": 2.7505,
"step": 156600
},
{
"epoch": 0.52,
"learning_rate": 2.405190942841341e-05,
"loss": 2.7545,
"step": 156700
},
{
"epoch": 0.52,
"learning_rate": 2.403535034062044e-05,
"loss": 2.732,
"step": 156800
},
{
"epoch": 0.52,
"learning_rate": 2.4018791252827466e-05,
"loss": 2.7503,
"step": 156900
},
{
"epoch": 0.52,
"learning_rate": 2.4002232165034493e-05,
"loss": 2.7363,
"step": 157000
},
{
"epoch": 0.52,
"learning_rate": 2.3985673077241523e-05,
"loss": 2.75,
"step": 157100
},
{
"epoch": 0.52,
"learning_rate": 2.396911398944855e-05,
"loss": 2.738,
"step": 157200
},
{
"epoch": 0.52,
"learning_rate": 2.395255490165558e-05,
"loss": 2.7571,
"step": 157300
},
{
"epoch": 0.52,
"learning_rate": 2.3935995813862607e-05,
"loss": 2.7663,
"step": 157400
},
{
"epoch": 0.52,
"learning_rate": 2.3919436726069634e-05,
"loss": 2.7488,
"step": 157500
},
{
"epoch": 0.52,
"learning_rate": 2.3902877638276665e-05,
"loss": 2.7504,
"step": 157600
},
{
"epoch": 0.52,
"learning_rate": 2.388631855048369e-05,
"loss": 2.7441,
"step": 157700
},
{
"epoch": 0.52,
"learning_rate": 2.3869759462690722e-05,
"loss": 2.7415,
"step": 157800
},
{
"epoch": 0.52,
"learning_rate": 2.385320037489775e-05,
"loss": 2.7663,
"step": 157900
},
{
"epoch": 0.52,
"learning_rate": 2.383664128710478e-05,
"loss": 2.7357,
"step": 158000
},
{
"epoch": 0.52,
"learning_rate": 2.3820082199311806e-05,
"loss": 2.736,
"step": 158100
},
{
"epoch": 0.52,
"learning_rate": 2.3803523111518833e-05,
"loss": 2.7418,
"step": 158200
},
{
"epoch": 0.52,
"learning_rate": 2.3786964023725863e-05,
"loss": 2.7541,
"step": 158300
},
{
"epoch": 0.52,
"learning_rate": 2.377040493593289e-05,
"loss": 2.7487,
"step": 158400
},
{
"epoch": 0.52,
"learning_rate": 2.375384584813992e-05,
"loss": 2.7572,
"step": 158500
},
{
"epoch": 0.53,
"learning_rate": 2.3737286760346947e-05,
"loss": 2.7491,
"step": 158600
},
{
"epoch": 0.53,
"learning_rate": 2.3720727672553974e-05,
"loss": 2.7457,
"step": 158700
},
{
"epoch": 0.53,
"learning_rate": 2.3704168584761004e-05,
"loss": 2.7538,
"step": 158800
},
{
"epoch": 0.53,
"learning_rate": 2.368760949696803e-05,
"loss": 2.7595,
"step": 158900
},
{
"epoch": 0.53,
"learning_rate": 2.367105040917506e-05,
"loss": 2.7432,
"step": 159000
},
{
"epoch": 0.53,
"learning_rate": 2.365449132138209e-05,
"loss": 2.7392,
"step": 159100
},
{
"epoch": 0.53,
"learning_rate": 2.3637932233589115e-05,
"loss": 2.7461,
"step": 159200
},
{
"epoch": 0.53,
"learning_rate": 2.3621373145796146e-05,
"loss": 2.759,
"step": 159300
},
{
"epoch": 0.53,
"learning_rate": 2.3604814058003176e-05,
"loss": 2.7552,
"step": 159400
},
{
"epoch": 0.53,
"learning_rate": 2.3588254970210203e-05,
"loss": 2.756,
"step": 159500
},
{
"epoch": 0.53,
"learning_rate": 2.357169588241723e-05,
"loss": 2.7384,
"step": 159600
},
{
"epoch": 0.53,
"learning_rate": 2.3555136794624257e-05,
"loss": 2.7421,
"step": 159700
},
{
"epoch": 0.53,
"learning_rate": 2.3538577706831287e-05,
"loss": 2.7392,
"step": 159800
},
{
"epoch": 0.53,
"learning_rate": 2.3522018619038317e-05,
"loss": 2.7529,
"step": 159900
},
{
"epoch": 0.53,
"learning_rate": 2.3505459531245344e-05,
"loss": 2.7453,
"step": 160000
},
{
"epoch": 0.53,
"learning_rate": 2.348890044345237e-05,
"loss": 2.7512,
"step": 160100
},
{
"epoch": 0.53,
"learning_rate": 2.3472341355659398e-05,
"loss": 2.7456,
"step": 160200
},
{
"epoch": 0.53,
"learning_rate": 2.3455782267866432e-05,
"loss": 2.7468,
"step": 160300
},
{
"epoch": 0.53,
"learning_rate": 2.343922318007346e-05,
"loss": 2.7498,
"step": 160400
},
{
"epoch": 0.53,
"learning_rate": 2.3422664092280486e-05,
"loss": 2.766,
"step": 160500
},
{
"epoch": 0.53,
"learning_rate": 2.3406105004487513e-05,
"loss": 2.741,
"step": 160600
},
{
"epoch": 0.53,
"learning_rate": 2.3389545916694543e-05,
"loss": 2.7401,
"step": 160700
},
{
"epoch": 0.53,
"learning_rate": 2.3372986828901573e-05,
"loss": 2.7291,
"step": 160800
},
{
"epoch": 0.53,
"learning_rate": 2.33564277411086e-05,
"loss": 2.7573,
"step": 160900
},
{
"epoch": 0.53,
"learning_rate": 2.3339868653315627e-05,
"loss": 2.7449,
"step": 161000
},
{
"epoch": 0.53,
"learning_rate": 2.3323309565522654e-05,
"loss": 2.7527,
"step": 161100
},
{
"epoch": 0.53,
"learning_rate": 2.3306750477729684e-05,
"loss": 2.7485,
"step": 161200
},
{
"epoch": 0.53,
"learning_rate": 2.3290191389936714e-05,
"loss": 2.7476,
"step": 161300
},
{
"epoch": 0.53,
"learning_rate": 2.327363230214374e-05,
"loss": 2.7679,
"step": 161400
},
{
"epoch": 0.53,
"learning_rate": 2.3257073214350768e-05,
"loss": 2.7428,
"step": 161500
},
{
"epoch": 0.54,
"learning_rate": 2.32405141265578e-05,
"loss": 2.7318,
"step": 161600
},
{
"epoch": 0.54,
"learning_rate": 2.3223955038764826e-05,
"loss": 2.7465,
"step": 161700
},
{
"epoch": 0.54,
"learning_rate": 2.3207395950971856e-05,
"loss": 2.74,
"step": 161800
},
{
"epoch": 0.54,
"learning_rate": 2.3190836863178883e-05,
"loss": 2.7525,
"step": 161900
},
{
"epoch": 0.54,
"learning_rate": 2.317427777538591e-05,
"loss": 2.7559,
"step": 162000
},
{
"epoch": 0.54,
"learning_rate": 2.315771868759294e-05,
"loss": 2.7388,
"step": 162100
},
{
"epoch": 0.54,
"learning_rate": 2.3141159599799967e-05,
"loss": 2.752,
"step": 162200
},
{
"epoch": 0.54,
"learning_rate": 2.3124600512006997e-05,
"loss": 2.7547,
"step": 162300
},
{
"epoch": 0.54,
"learning_rate": 2.3108041424214024e-05,
"loss": 2.7303,
"step": 162400
},
{
"epoch": 0.54,
"learning_rate": 2.309148233642105e-05,
"loss": 2.7317,
"step": 162500
},
{
"epoch": 0.54,
"learning_rate": 2.307492324862808e-05,
"loss": 2.7413,
"step": 162600
},
{
"epoch": 0.54,
"learning_rate": 2.3058364160835108e-05,
"loss": 2.7639,
"step": 162700
},
{
"epoch": 0.54,
"learning_rate": 2.304180507304214e-05,
"loss": 2.7692,
"step": 162800
},
{
"epoch": 0.54,
"learning_rate": 2.3025245985249165e-05,
"loss": 2.7522,
"step": 162900
},
{
"epoch": 0.54,
"learning_rate": 2.3008686897456196e-05,
"loss": 2.7499,
"step": 163000
},
{
"epoch": 0.54,
"learning_rate": 2.2992127809663223e-05,
"loss": 2.7452,
"step": 163100
},
{
"epoch": 0.54,
"learning_rate": 2.297556872187025e-05,
"loss": 2.7692,
"step": 163200
},
{
"epoch": 0.54,
"learning_rate": 2.295900963407728e-05,
"loss": 2.7514,
"step": 163300
},
{
"epoch": 0.54,
"learning_rate": 2.2942450546284307e-05,
"loss": 2.7399,
"step": 163400
},
{
"epoch": 0.54,
"learning_rate": 2.2925891458491337e-05,
"loss": 2.751,
"step": 163500
},
{
"epoch": 0.54,
"learning_rate": 2.2909332370698364e-05,
"loss": 2.7342,
"step": 163600
},
{
"epoch": 0.54,
"learning_rate": 2.289277328290539e-05,
"loss": 2.7472,
"step": 163700
},
{
"epoch": 0.54,
"learning_rate": 2.287621419511242e-05,
"loss": 2.7626,
"step": 163800
},
{
"epoch": 0.54,
"learning_rate": 2.285965510731945e-05,
"loss": 2.7511,
"step": 163900
},
{
"epoch": 0.54,
"learning_rate": 2.284309601952648e-05,
"loss": 2.7535,
"step": 164000
},
{
"epoch": 0.54,
"learning_rate": 2.2826536931733505e-05,
"loss": 2.7506,
"step": 164100
},
{
"epoch": 0.54,
"learning_rate": 2.2809977843940532e-05,
"loss": 2.7679,
"step": 164200
},
{
"epoch": 0.54,
"learning_rate": 2.2793418756147562e-05,
"loss": 2.7446,
"step": 164300
},
{
"epoch": 0.54,
"learning_rate": 2.2776859668354593e-05,
"loss": 2.7439,
"step": 164400
},
{
"epoch": 0.54,
"learning_rate": 2.276030058056162e-05,
"loss": 2.7631,
"step": 164500
},
{
"epoch": 0.55,
"learning_rate": 2.2743741492768647e-05,
"loss": 2.7448,
"step": 164600
},
{
"epoch": 0.55,
"learning_rate": 2.2727182404975674e-05,
"loss": 2.7571,
"step": 164700
},
{
"epoch": 0.55,
"learning_rate": 2.2710623317182704e-05,
"loss": 2.7517,
"step": 164800
},
{
"epoch": 0.55,
"learning_rate": 2.2694064229389734e-05,
"loss": 2.7324,
"step": 164900
},
{
"epoch": 0.55,
"learning_rate": 2.267750514159676e-05,
"loss": 2.7428,
"step": 165000
},
{
"epoch": 0.55,
"learning_rate": 2.2660946053803788e-05,
"loss": 2.7627,
"step": 165100
},
{
"epoch": 0.55,
"learning_rate": 2.2644386966010818e-05,
"loss": 2.7404,
"step": 165200
},
{
"epoch": 0.55,
"learning_rate": 2.262782787821785e-05,
"loss": 2.7404,
"step": 165300
},
{
"epoch": 0.55,
"learning_rate": 2.2611268790424875e-05,
"loss": 2.7552,
"step": 165400
},
{
"epoch": 0.55,
"learning_rate": 2.2594709702631902e-05,
"loss": 2.7461,
"step": 165500
},
{
"epoch": 0.55,
"learning_rate": 2.257815061483893e-05,
"loss": 2.7497,
"step": 165600
},
{
"epoch": 0.55,
"learning_rate": 2.256159152704596e-05,
"loss": 2.75,
"step": 165700
},
{
"epoch": 0.55,
"learning_rate": 2.254503243925299e-05,
"loss": 2.7509,
"step": 165800
},
{
"epoch": 0.55,
"learning_rate": 2.2528473351460017e-05,
"loss": 2.7442,
"step": 165900
},
{
"epoch": 0.55,
"learning_rate": 2.2511914263667044e-05,
"loss": 2.7411,
"step": 166000
},
{
"epoch": 0.55,
"learning_rate": 2.249535517587407e-05,
"loss": 2.7603,
"step": 166100
},
{
"epoch": 0.55,
"learning_rate": 2.24787960880811e-05,
"loss": 2.7477,
"step": 166200
},
{
"epoch": 0.55,
"learning_rate": 2.246223700028813e-05,
"loss": 2.742,
"step": 166300
},
{
"epoch": 0.55,
"learning_rate": 2.2445677912495158e-05,
"loss": 2.7428,
"step": 166400
},
{
"epoch": 0.55,
"learning_rate": 2.2429118824702185e-05,
"loss": 2.7425,
"step": 166500
},
{
"epoch": 0.55,
"learning_rate": 2.2412559736909215e-05,
"loss": 2.7574,
"step": 166600
},
{
"epoch": 0.55,
"learning_rate": 2.2396000649116242e-05,
"loss": 2.7359,
"step": 166700
},
{
"epoch": 0.55,
"learning_rate": 2.2379441561323273e-05,
"loss": 2.7462,
"step": 166800
},
{
"epoch": 0.55,
"learning_rate": 2.23628824735303e-05,
"loss": 2.7471,
"step": 166900
},
{
"epoch": 0.55,
"learning_rate": 2.2346323385737326e-05,
"loss": 2.7516,
"step": 167000
},
{
"epoch": 0.55,
"learning_rate": 2.2329764297944357e-05,
"loss": 2.7484,
"step": 167100
},
{
"epoch": 0.55,
"learning_rate": 2.2313205210151384e-05,
"loss": 2.7419,
"step": 167200
},
{
"epoch": 0.55,
"learning_rate": 2.2296646122358414e-05,
"loss": 2.7483,
"step": 167300
},
{
"epoch": 0.55,
"learning_rate": 2.228008703456544e-05,
"loss": 2.7379,
"step": 167400
},
{
"epoch": 0.55,
"learning_rate": 2.226352794677247e-05,
"loss": 2.7434,
"step": 167500
},
{
"epoch": 0.56,
"learning_rate": 2.2246968858979498e-05,
"loss": 2.7443,
"step": 167600
},
{
"epoch": 0.56,
"learning_rate": 2.2230409771186525e-05,
"loss": 2.7423,
"step": 167700
},
{
"epoch": 0.56,
"learning_rate": 2.2213850683393555e-05,
"loss": 2.7478,
"step": 167800
},
{
"epoch": 0.56,
"learning_rate": 2.2197291595600582e-05,
"loss": 2.7371,
"step": 167900
},
{
"epoch": 0.56,
"learning_rate": 2.2180732507807612e-05,
"loss": 2.7521,
"step": 168000
},
{
"epoch": 0.56,
"learning_rate": 2.216417342001464e-05,
"loss": 2.7481,
"step": 168100
},
{
"epoch": 0.56,
"learning_rate": 2.2147614332221666e-05,
"loss": 2.7661,
"step": 168200
},
{
"epoch": 0.56,
"learning_rate": 2.2131055244428697e-05,
"loss": 2.7528,
"step": 168300
},
{
"epoch": 0.56,
"learning_rate": 2.2114496156635723e-05,
"loss": 2.7454,
"step": 168400
},
{
"epoch": 0.56,
"learning_rate": 2.2097937068842754e-05,
"loss": 2.7572,
"step": 168500
},
{
"epoch": 0.56,
"learning_rate": 2.208137798104978e-05,
"loss": 2.7555,
"step": 168600
},
{
"epoch": 0.56,
"learning_rate": 2.2064818893256808e-05,
"loss": 2.7397,
"step": 168700
},
{
"epoch": 0.56,
"learning_rate": 2.2048259805463838e-05,
"loss": 2.7472,
"step": 168800
},
{
"epoch": 0.56,
"learning_rate": 2.2031700717670868e-05,
"loss": 2.7525,
"step": 168900
},
{
"epoch": 0.56,
"learning_rate": 2.2015141629877895e-05,
"loss": 2.7479,
"step": 169000
},
{
"epoch": 0.56,
"learning_rate": 2.1998582542084922e-05,
"loss": 2.7548,
"step": 169100
},
{
"epoch": 0.56,
"learning_rate": 2.198202345429195e-05,
"loss": 2.7515,
"step": 169200
},
{
"epoch": 0.56,
"learning_rate": 2.196546436649898e-05,
"loss": 2.7598,
"step": 169300
},
{
"epoch": 0.56,
"learning_rate": 2.194890527870601e-05,
"loss": 2.7638,
"step": 169400
},
{
"epoch": 0.56,
"learning_rate": 2.1932346190913036e-05,
"loss": 2.7554,
"step": 169500
},
{
"epoch": 0.56,
"learning_rate": 2.1915787103120063e-05,
"loss": 2.7554,
"step": 169600
},
{
"epoch": 0.56,
"learning_rate": 2.189922801532709e-05,
"loss": 2.7186,
"step": 169700
},
{
"epoch": 0.56,
"learning_rate": 2.1882668927534124e-05,
"loss": 2.7371,
"step": 169800
},
{
"epoch": 0.56,
"learning_rate": 2.186610983974115e-05,
"loss": 2.7606,
"step": 169900
},
{
"epoch": 0.56,
"learning_rate": 2.1849550751948178e-05,
"loss": 2.7465,
"step": 170000
},
{
"epoch": 0.56,
"learning_rate": 2.1832991664155205e-05,
"loss": 2.7502,
"step": 170100
},
{
"epoch": 0.56,
"learning_rate": 2.1816432576362235e-05,
"loss": 2.7428,
"step": 170200
},
{
"epoch": 0.56,
"learning_rate": 2.1799873488569265e-05,
"loss": 2.7499,
"step": 170300
},
{
"epoch": 0.56,
"learning_rate": 2.1783314400776292e-05,
"loss": 2.7497,
"step": 170400
},
{
"epoch": 0.56,
"learning_rate": 2.176675531298332e-05,
"loss": 2.7497,
"step": 170500
},
{
"epoch": 0.56,
"learning_rate": 2.1750196225190346e-05,
"loss": 2.7504,
"step": 170600
},
{
"epoch": 0.57,
"learning_rate": 2.1733637137397376e-05,
"loss": 2.7567,
"step": 170700
},
{
"epoch": 0.57,
"learning_rate": 2.1717078049604407e-05,
"loss": 2.7605,
"step": 170800
},
{
"epoch": 0.57,
"learning_rate": 2.1700518961811433e-05,
"loss": 2.7443,
"step": 170900
},
{
"epoch": 0.57,
"learning_rate": 2.168395987401846e-05,
"loss": 2.7547,
"step": 171000
},
{
"epoch": 0.57,
"learning_rate": 2.166740078622549e-05,
"loss": 2.7432,
"step": 171100
},
{
"epoch": 0.57,
"learning_rate": 2.1650841698432518e-05,
"loss": 2.7484,
"step": 171200
},
{
"epoch": 0.57,
"learning_rate": 2.1634282610639548e-05,
"loss": 2.7579,
"step": 171300
},
{
"epoch": 0.57,
"learning_rate": 2.1617723522846575e-05,
"loss": 2.7375,
"step": 171400
},
{
"epoch": 0.57,
"learning_rate": 2.1601164435053602e-05,
"loss": 2.7608,
"step": 171500
},
{
"epoch": 0.57,
"learning_rate": 2.1584605347260632e-05,
"loss": 2.7453,
"step": 171600
},
{
"epoch": 0.57,
"learning_rate": 2.156804625946766e-05,
"loss": 2.747,
"step": 171700
},
{
"epoch": 0.57,
"learning_rate": 2.155148717167469e-05,
"loss": 2.7596,
"step": 171800
},
{
"epoch": 0.57,
"learning_rate": 2.1534928083881716e-05,
"loss": 2.7526,
"step": 171900
},
{
"epoch": 0.57,
"learning_rate": 2.1518368996088743e-05,
"loss": 2.7525,
"step": 172000
},
{
"epoch": 0.57,
"learning_rate": 2.1501809908295773e-05,
"loss": 2.7486,
"step": 172100
},
{
"epoch": 0.57,
"learning_rate": 2.14852508205028e-05,
"loss": 2.7473,
"step": 172200
},
{
"epoch": 0.57,
"learning_rate": 2.146869173270983e-05,
"loss": 2.7536,
"step": 172300
},
{
"epoch": 0.57,
"learning_rate": 2.1452132644916857e-05,
"loss": 2.7546,
"step": 172400
},
{
"epoch": 0.57,
"learning_rate": 2.1435573557123888e-05,
"loss": 2.734,
"step": 172500
},
{
"epoch": 0.57,
"learning_rate": 2.1419014469330915e-05,
"loss": 2.7437,
"step": 172600
},
{
"epoch": 0.57,
"learning_rate": 2.140245538153794e-05,
"loss": 2.7471,
"step": 172700
},
{
"epoch": 0.57,
"learning_rate": 2.1385896293744972e-05,
"loss": 2.7469,
"step": 172800
},
{
"epoch": 0.57,
"learning_rate": 2.1369337205952e-05,
"loss": 2.759,
"step": 172900
},
{
"epoch": 0.57,
"learning_rate": 2.135277811815903e-05,
"loss": 2.756,
"step": 173000
},
{
"epoch": 0.57,
"learning_rate": 2.1336219030366056e-05,
"loss": 2.745,
"step": 173100
},
{
"epoch": 0.57,
"learning_rate": 2.1319659942573083e-05,
"loss": 2.7487,
"step": 173200
},
{
"epoch": 0.57,
"learning_rate": 2.1303100854780113e-05,
"loss": 2.7487,
"step": 173300
},
{
"epoch": 0.57,
"learning_rate": 2.1286541766987144e-05,
"loss": 2.7374,
"step": 173400
},
{
"epoch": 0.57,
"learning_rate": 2.126998267919417e-05,
"loss": 2.7561,
"step": 173500
},
{
"epoch": 0.57,
"learning_rate": 2.1253423591401197e-05,
"loss": 2.752,
"step": 173600
},
{
"epoch": 0.58,
"learning_rate": 2.1236864503608224e-05,
"loss": 2.7373,
"step": 173700
},
{
"epoch": 0.58,
"learning_rate": 2.1220305415815255e-05,
"loss": 2.7426,
"step": 173800
},
{
"epoch": 0.58,
"learning_rate": 2.1203746328022285e-05,
"loss": 2.7453,
"step": 173900
},
{
"epoch": 0.58,
"learning_rate": 2.1187187240229312e-05,
"loss": 2.7396,
"step": 174000
},
{
"epoch": 0.58,
"learning_rate": 2.117062815243634e-05,
"loss": 2.7657,
"step": 174100
},
{
"epoch": 0.58,
"learning_rate": 2.1154069064643366e-05,
"loss": 2.7561,
"step": 174200
},
{
"epoch": 0.58,
"learning_rate": 2.1137509976850396e-05,
"loss": 2.7527,
"step": 174300
},
{
"epoch": 0.58,
"learning_rate": 2.1120950889057426e-05,
"loss": 2.7513,
"step": 174400
},
{
"epoch": 0.58,
"learning_rate": 2.1104391801264453e-05,
"loss": 2.7367,
"step": 174500
},
{
"epoch": 0.58,
"learning_rate": 2.108783271347148e-05,
"loss": 2.7505,
"step": 174600
},
{
"epoch": 0.58,
"learning_rate": 2.107127362567851e-05,
"loss": 2.7495,
"step": 174700
},
{
"epoch": 0.58,
"learning_rate": 2.105471453788554e-05,
"loss": 2.7543,
"step": 174800
},
{
"epoch": 0.58,
"learning_rate": 2.1038155450092568e-05,
"loss": 2.7465,
"step": 174900
},
{
"epoch": 0.58,
"learning_rate": 2.1021596362299594e-05,
"loss": 2.73,
"step": 175000
},
{
"epoch": 0.58,
"learning_rate": 2.100503727450662e-05,
"loss": 2.7408,
"step": 175100
},
{
"epoch": 0.58,
"learning_rate": 2.098847818671365e-05,
"loss": 2.7525,
"step": 175200
},
{
"epoch": 0.58,
"learning_rate": 2.0971919098920682e-05,
"loss": 2.7428,
"step": 175300
},
{
"epoch": 0.58,
"learning_rate": 2.095536001112771e-05,
"loss": 2.7372,
"step": 175400
},
{
"epoch": 0.58,
"learning_rate": 2.0938800923334736e-05,
"loss": 2.7413,
"step": 175500
},
{
"epoch": 0.58,
"learning_rate": 2.0922241835541763e-05,
"loss": 2.7394,
"step": 175600
},
{
"epoch": 0.58,
"learning_rate": 2.0905682747748793e-05,
"loss": 2.7547,
"step": 175700
},
{
"epoch": 0.58,
"learning_rate": 2.0889123659955823e-05,
"loss": 2.7519,
"step": 175800
},
{
"epoch": 0.58,
"learning_rate": 2.087256457216285e-05,
"loss": 2.7428,
"step": 175900
},
{
"epoch": 0.58,
"learning_rate": 2.0856005484369877e-05,
"loss": 2.7628,
"step": 176000
},
{
"epoch": 0.58,
"learning_rate": 2.0839446396576907e-05,
"loss": 2.7362,
"step": 176100
},
{
"epoch": 0.58,
"learning_rate": 2.0822887308783934e-05,
"loss": 2.742,
"step": 176200
},
{
"epoch": 0.58,
"learning_rate": 2.0806328220990965e-05,
"loss": 2.7439,
"step": 176300
},
{
"epoch": 0.58,
"learning_rate": 2.078976913319799e-05,
"loss": 2.7554,
"step": 176400
},
{
"epoch": 0.58,
"learning_rate": 2.077321004540502e-05,
"loss": 2.7583,
"step": 176500
},
{
"epoch": 0.58,
"learning_rate": 2.075665095761205e-05,
"loss": 2.7484,
"step": 176600
},
{
"epoch": 0.59,
"learning_rate": 2.0740091869819076e-05,
"loss": 2.7455,
"step": 176700
},
{
"epoch": 0.59,
"learning_rate": 2.0723532782026106e-05,
"loss": 2.7394,
"step": 176800
},
{
"epoch": 0.59,
"learning_rate": 2.0706973694233133e-05,
"loss": 2.7497,
"step": 176900
},
{
"epoch": 0.59,
"learning_rate": 2.0690414606440163e-05,
"loss": 2.7391,
"step": 177000
},
{
"epoch": 0.59,
"learning_rate": 2.067385551864719e-05,
"loss": 2.7412,
"step": 177100
},
{
"epoch": 0.59,
"learning_rate": 2.0657296430854217e-05,
"loss": 2.7654,
"step": 177200
},
{
"epoch": 0.59,
"learning_rate": 2.0640737343061247e-05,
"loss": 2.7475,
"step": 177300
},
{
"epoch": 0.59,
"learning_rate": 2.0624178255268274e-05,
"loss": 2.7368,
"step": 177400
},
{
"epoch": 0.59,
"learning_rate": 2.0607619167475305e-05,
"loss": 2.7449,
"step": 177500
},
{
"epoch": 0.59,
"learning_rate": 2.059106007968233e-05,
"loss": 2.7554,
"step": 177600
},
{
"epoch": 0.59,
"learning_rate": 2.057450099188936e-05,
"loss": 2.7463,
"step": 177700
},
{
"epoch": 0.59,
"learning_rate": 2.055794190409639e-05,
"loss": 2.7435,
"step": 177800
},
{
"epoch": 0.59,
"learning_rate": 2.0541382816303416e-05,
"loss": 2.749,
"step": 177900
},
{
"epoch": 0.59,
"learning_rate": 2.0524823728510446e-05,
"loss": 2.7578,
"step": 178000
},
{
"epoch": 0.59,
"learning_rate": 2.0508264640717473e-05,
"loss": 2.7403,
"step": 178100
},
{
"epoch": 0.59,
"learning_rate": 2.04917055529245e-05,
"loss": 2.7413,
"step": 178200
},
{
"epoch": 0.59,
"learning_rate": 2.047514646513153e-05,
"loss": 2.7531,
"step": 178300
},
{
"epoch": 0.59,
"learning_rate": 2.045858737733856e-05,
"loss": 2.7382,
"step": 178400
},
{
"epoch": 0.59,
"learning_rate": 2.0442028289545587e-05,
"loss": 2.7328,
"step": 178500
},
{
"epoch": 0.59,
"learning_rate": 2.0425469201752614e-05,
"loss": 2.7523,
"step": 178600
},
{
"epoch": 0.59,
"learning_rate": 2.040891011395964e-05,
"loss": 2.751,
"step": 178700
},
{
"epoch": 0.59,
"learning_rate": 2.039235102616667e-05,
"loss": 2.7621,
"step": 178800
},
{
"epoch": 0.59,
"learning_rate": 2.03757919383737e-05,
"loss": 2.7409,
"step": 178900
},
{
"epoch": 0.59,
"learning_rate": 2.035923285058073e-05,
"loss": 2.7606,
"step": 179000
},
{
"epoch": 0.59,
"learning_rate": 2.0342673762787755e-05,
"loss": 2.7597,
"step": 179100
},
{
"epoch": 0.59,
"learning_rate": 2.0326114674994782e-05,
"loss": 2.734,
"step": 179200
},
{
"epoch": 0.59,
"learning_rate": 2.0309555587201816e-05,
"loss": 2.7502,
"step": 179300
},
{
"epoch": 0.59,
"learning_rate": 2.0292996499408843e-05,
"loss": 2.7438,
"step": 179400
},
{
"epoch": 0.59,
"learning_rate": 2.027643741161587e-05,
"loss": 2.737,
"step": 179500
},
{
"epoch": 0.59,
"learning_rate": 2.0259878323822897e-05,
"loss": 2.7505,
"step": 179600
},
{
"epoch": 0.6,
"learning_rate": 2.0243319236029927e-05,
"loss": 2.7519,
"step": 179700
},
{
"epoch": 0.6,
"learning_rate": 2.0226760148236957e-05,
"loss": 2.7409,
"step": 179800
},
{
"epoch": 0.6,
"learning_rate": 2.0210201060443984e-05,
"loss": 2.7325,
"step": 179900
},
{
"epoch": 0.6,
"learning_rate": 2.019364197265101e-05,
"loss": 2.7537,
"step": 180000
},
{
"epoch": 0.6,
"learning_rate": 2.0177082884858038e-05,
"loss": 2.7362,
"step": 180100
},
{
"epoch": 0.6,
"learning_rate": 2.016052379706507e-05,
"loss": 2.7377,
"step": 180200
},
{
"epoch": 0.6,
"learning_rate": 2.01439647092721e-05,
"loss": 2.7443,
"step": 180300
},
{
"epoch": 0.6,
"learning_rate": 2.0127405621479126e-05,
"loss": 2.7377,
"step": 180400
},
{
"epoch": 0.6,
"learning_rate": 2.0110846533686153e-05,
"loss": 2.7517,
"step": 180500
},
{
"epoch": 0.6,
"learning_rate": 2.0094287445893183e-05,
"loss": 2.7443,
"step": 180600
},
{
"epoch": 0.6,
"learning_rate": 2.007772835810021e-05,
"loss": 2.7444,
"step": 180700
},
{
"epoch": 0.6,
"learning_rate": 2.006116927030724e-05,
"loss": 2.7496,
"step": 180800
},
{
"epoch": 0.6,
"learning_rate": 2.0044610182514267e-05,
"loss": 2.7418,
"step": 180900
},
{
"epoch": 0.6,
"learning_rate": 2.0028051094721294e-05,
"loss": 2.7402,
"step": 181000
},
{
"epoch": 0.6,
"learning_rate": 2.0011492006928324e-05,
"loss": 2.7368,
"step": 181100
},
{
"epoch": 0.6,
"learning_rate": 1.999493291913535e-05,
"loss": 2.7474,
"step": 181200
},
{
"epoch": 0.6,
"learning_rate": 1.997837383134238e-05,
"loss": 2.7471,
"step": 181300
},
{
"epoch": 0.6,
"learning_rate": 1.9961814743549408e-05,
"loss": 2.7449,
"step": 181400
},
{
"epoch": 0.6,
"learning_rate": 1.9945255655756435e-05,
"loss": 2.7289,
"step": 181500
},
{
"epoch": 0.6,
"learning_rate": 1.9928696567963465e-05,
"loss": 2.7406,
"step": 181600
},
{
"epoch": 0.6,
"learning_rate": 1.9912137480170492e-05,
"loss": 2.7669,
"step": 181700
},
{
"epoch": 0.6,
"learning_rate": 1.9895578392377523e-05,
"loss": 2.7532,
"step": 181800
},
{
"epoch": 0.6,
"learning_rate": 1.987901930458455e-05,
"loss": 2.7445,
"step": 181900
},
{
"epoch": 0.6,
"learning_rate": 1.986246021679158e-05,
"loss": 2.7673,
"step": 182000
},
{
"epoch": 0.6,
"learning_rate": 1.9845901128998607e-05,
"loss": 2.7517,
"step": 182100
},
{
"epoch": 0.6,
"learning_rate": 1.9829342041205634e-05,
"loss": 2.747,
"step": 182200
},
{
"epoch": 0.6,
"learning_rate": 1.9812782953412664e-05,
"loss": 2.746,
"step": 182300
},
{
"epoch": 0.6,
"learning_rate": 1.979622386561969e-05,
"loss": 2.7475,
"step": 182400
},
{
"epoch": 0.6,
"learning_rate": 1.977966477782672e-05,
"loss": 2.7567,
"step": 182500
},
{
"epoch": 0.6,
"learning_rate": 1.9763105690033748e-05,
"loss": 2.7468,
"step": 182600
},
{
"epoch": 0.61,
"learning_rate": 1.9746546602240775e-05,
"loss": 2.7424,
"step": 182700
},
{
"epoch": 0.61,
"learning_rate": 1.9729987514447805e-05,
"loss": 2.7375,
"step": 182800
},
{
"epoch": 0.61,
"learning_rate": 1.9713428426654836e-05,
"loss": 2.74,
"step": 182900
},
{
"epoch": 0.61,
"learning_rate": 1.9696869338861863e-05,
"loss": 2.7503,
"step": 183000
},
{
"epoch": 0.61,
"learning_rate": 1.968031025106889e-05,
"loss": 2.7485,
"step": 183100
},
{
"epoch": 0.61,
"learning_rate": 1.9663751163275916e-05,
"loss": 2.7386,
"step": 183200
},
{
"epoch": 0.61,
"learning_rate": 1.9647192075482947e-05,
"loss": 2.73,
"step": 183300
},
{
"epoch": 0.61,
"learning_rate": 1.9630632987689977e-05,
"loss": 2.7399,
"step": 183400
},
{
"epoch": 0.61,
"learning_rate": 1.9614073899897004e-05,
"loss": 2.7399,
"step": 183500
},
{
"epoch": 0.61,
"learning_rate": 1.959751481210403e-05,
"loss": 2.7438,
"step": 183600
},
{
"epoch": 0.61,
"learning_rate": 1.9580955724311058e-05,
"loss": 2.7527,
"step": 183700
},
{
"epoch": 0.61,
"learning_rate": 1.956439663651809e-05,
"loss": 2.7412,
"step": 183800
},
{
"epoch": 0.61,
"learning_rate": 1.954783754872512e-05,
"loss": 2.7294,
"step": 183900
},
{
"epoch": 0.61,
"learning_rate": 1.9531278460932145e-05,
"loss": 2.7384,
"step": 184000
},
{
"epoch": 0.61,
"learning_rate": 1.9514719373139172e-05,
"loss": 2.7353,
"step": 184100
},
{
"epoch": 0.61,
"learning_rate": 1.9498160285346202e-05,
"loss": 2.7428,
"step": 184200
},
{
"epoch": 0.61,
"learning_rate": 1.9481601197553233e-05,
"loss": 2.7493,
"step": 184300
},
{
"epoch": 0.61,
"learning_rate": 1.946504210976026e-05,
"loss": 2.728,
"step": 184400
},
{
"epoch": 0.61,
"learning_rate": 1.9448483021967287e-05,
"loss": 2.7371,
"step": 184500
},
{
"epoch": 0.61,
"learning_rate": 1.9431923934174313e-05,
"loss": 2.7402,
"step": 184600
},
{
"epoch": 0.61,
"learning_rate": 1.9415364846381344e-05,
"loss": 2.7451,
"step": 184700
},
{
"epoch": 0.61,
"learning_rate": 1.9398805758588374e-05,
"loss": 2.7383,
"step": 184800
},
{
"epoch": 0.61,
"learning_rate": 1.93822466707954e-05,
"loss": 2.7366,
"step": 184900
},
{
"epoch": 0.61,
"learning_rate": 1.9365687583002428e-05,
"loss": 2.7473,
"step": 185000
},
{
"epoch": 0.61,
"learning_rate": 1.9349128495209455e-05,
"loss": 2.7424,
"step": 185100
},
{
"epoch": 0.61,
"learning_rate": 1.9332569407416485e-05,
"loss": 2.7531,
"step": 185200
},
{
"epoch": 0.61,
"learning_rate": 1.9316010319623515e-05,
"loss": 2.7323,
"step": 185300
},
{
"epoch": 0.61,
"learning_rate": 1.9299451231830542e-05,
"loss": 2.7391,
"step": 185400
},
{
"epoch": 0.61,
"learning_rate": 1.928289214403757e-05,
"loss": 2.7523,
"step": 185500
},
{
"epoch": 0.61,
"learning_rate": 1.92663330562446e-05,
"loss": 2.7478,
"step": 185600
},
{
"epoch": 0.62,
"learning_rate": 1.9249773968451626e-05,
"loss": 2.7492,
"step": 185700
},
{
"epoch": 0.62,
"learning_rate": 1.9233214880658657e-05,
"loss": 2.7306,
"step": 185800
},
{
"epoch": 0.62,
"learning_rate": 1.9216655792865684e-05,
"loss": 2.7402,
"step": 185900
},
{
"epoch": 0.62,
"learning_rate": 1.920009670507271e-05,
"loss": 2.7429,
"step": 186000
},
{
"epoch": 0.62,
"learning_rate": 1.918353761727974e-05,
"loss": 2.7388,
"step": 186100
},
{
"epoch": 0.62,
"learning_rate": 1.9166978529486768e-05,
"loss": 2.7384,
"step": 186200
},
{
"epoch": 0.62,
"learning_rate": 1.9150419441693798e-05,
"loss": 2.7398,
"step": 186300
},
{
"epoch": 0.62,
"learning_rate": 1.9133860353900825e-05,
"loss": 2.7571,
"step": 186400
},
{
"epoch": 0.62,
"learning_rate": 1.9117301266107855e-05,
"loss": 2.7548,
"step": 186500
},
{
"epoch": 0.62,
"learning_rate": 1.9100742178314882e-05,
"loss": 2.7339,
"step": 186600
},
{
"epoch": 0.62,
"learning_rate": 1.908418309052191e-05,
"loss": 2.7359,
"step": 186700
},
{
"epoch": 0.62,
"learning_rate": 1.906762400272894e-05,
"loss": 2.7395,
"step": 186800
},
{
"epoch": 0.62,
"learning_rate": 1.9051064914935966e-05,
"loss": 2.7336,
"step": 186900
},
{
"epoch": 0.62,
"learning_rate": 1.9034505827142997e-05,
"loss": 2.7336,
"step": 187000
},
{
"epoch": 0.62,
"learning_rate": 1.9017946739350024e-05,
"loss": 2.7447,
"step": 187100
},
{
"epoch": 0.62,
"learning_rate": 1.900138765155705e-05,
"loss": 2.7439,
"step": 187200
},
{
"epoch": 0.62,
"learning_rate": 1.898482856376408e-05,
"loss": 2.743,
"step": 187300
},
{
"epoch": 0.62,
"learning_rate": 1.896826947597111e-05,
"loss": 2.7444,
"step": 187400
},
{
"epoch": 0.62,
"learning_rate": 1.8951710388178138e-05,
"loss": 2.7332,
"step": 187500
},
{
"epoch": 0.62,
"learning_rate": 1.8935151300385165e-05,
"loss": 2.7369,
"step": 187600
},
{
"epoch": 0.62,
"learning_rate": 1.8918592212592192e-05,
"loss": 2.7413,
"step": 187700
},
{
"epoch": 0.62,
"learning_rate": 1.8902033124799222e-05,
"loss": 2.7716,
"step": 187800
},
{
"epoch": 0.62,
"learning_rate": 1.8885474037006252e-05,
"loss": 2.7431,
"step": 187900
},
{
"epoch": 0.62,
"learning_rate": 1.886891494921328e-05,
"loss": 2.7496,
"step": 188000
},
{
"epoch": 0.62,
"learning_rate": 1.8852355861420306e-05,
"loss": 2.7485,
"step": 188100
},
{
"epoch": 0.62,
"learning_rate": 1.8835796773627333e-05,
"loss": 2.7476,
"step": 188200
},
{
"epoch": 0.62,
"learning_rate": 1.8819237685834363e-05,
"loss": 2.7482,
"step": 188300
},
{
"epoch": 0.62,
"learning_rate": 1.8802678598041394e-05,
"loss": 2.7405,
"step": 188400
},
{
"epoch": 0.62,
"learning_rate": 1.878611951024842e-05,
"loss": 2.7468,
"step": 188500
},
{
"epoch": 0.62,
"learning_rate": 1.8769560422455448e-05,
"loss": 2.7531,
"step": 188600
},
{
"epoch": 0.62,
"learning_rate": 1.8753001334662474e-05,
"loss": 2.7557,
"step": 188700
},
{
"epoch": 0.63,
"learning_rate": 1.8736442246869508e-05,
"loss": 2.7441,
"step": 188800
},
{
"epoch": 0.63,
"learning_rate": 1.8719883159076535e-05,
"loss": 2.7522,
"step": 188900
},
{
"epoch": 0.63,
"learning_rate": 1.8703324071283562e-05,
"loss": 2.744,
"step": 189000
},
{
"epoch": 0.63,
"learning_rate": 1.868676498349059e-05,
"loss": 2.7527,
"step": 189100
},
{
"epoch": 0.63,
"learning_rate": 1.867020589569762e-05,
"loss": 2.7664,
"step": 189200
},
{
"epoch": 0.63,
"learning_rate": 1.865364680790465e-05,
"loss": 2.7386,
"step": 189300
},
{
"epoch": 0.63,
"learning_rate": 1.8637087720111676e-05,
"loss": 2.7297,
"step": 189400
},
{
"epoch": 0.63,
"learning_rate": 1.8620528632318703e-05,
"loss": 2.7398,
"step": 189500
},
{
"epoch": 0.63,
"learning_rate": 1.860396954452573e-05,
"loss": 2.7605,
"step": 189600
},
{
"epoch": 0.63,
"learning_rate": 1.858741045673276e-05,
"loss": 2.749,
"step": 189700
},
{
"epoch": 0.63,
"learning_rate": 1.857085136893979e-05,
"loss": 2.7359,
"step": 189800
},
{
"epoch": 0.63,
"learning_rate": 1.8554292281146818e-05,
"loss": 2.737,
"step": 189900
},
{
"epoch": 0.63,
"learning_rate": 1.8537733193353845e-05,
"loss": 2.7476,
"step": 190000
},
{
"epoch": 0.63,
"learning_rate": 1.8521174105560875e-05,
"loss": 2.7243,
"step": 190100
},
{
"epoch": 0.63,
"learning_rate": 1.8504615017767902e-05,
"loss": 2.74,
"step": 190200
},
{
"epoch": 0.63,
"learning_rate": 1.8488055929974932e-05,
"loss": 2.7522,
"step": 190300
},
{
"epoch": 0.63,
"learning_rate": 1.847149684218196e-05,
"loss": 2.7543,
"step": 190400
},
{
"epoch": 0.63,
"learning_rate": 1.8454937754388986e-05,
"loss": 2.7374,
"step": 190500
},
{
"epoch": 0.63,
"learning_rate": 1.8438378666596016e-05,
"loss": 2.7397,
"step": 190600
},
{
"epoch": 0.63,
"learning_rate": 1.8421819578803043e-05,
"loss": 2.7264,
"step": 190700
},
{
"epoch": 0.63,
"learning_rate": 1.8405260491010073e-05,
"loss": 2.7341,
"step": 190800
},
{
"epoch": 0.63,
"learning_rate": 1.83887014032171e-05,
"loss": 2.7382,
"step": 190900
},
{
"epoch": 0.63,
"learning_rate": 1.837214231542413e-05,
"loss": 2.7346,
"step": 191000
},
{
"epoch": 0.63,
"learning_rate": 1.8355583227631158e-05,
"loss": 2.7357,
"step": 191100
},
{
"epoch": 0.63,
"learning_rate": 1.8339024139838185e-05,
"loss": 2.7492,
"step": 191200
},
{
"epoch": 0.63,
"learning_rate": 1.8322465052045215e-05,
"loss": 2.7492,
"step": 191300
},
{
"epoch": 0.63,
"learning_rate": 1.8305905964252242e-05,
"loss": 2.7432,
"step": 191400
},
{
"epoch": 0.63,
"learning_rate": 1.8289346876459272e-05,
"loss": 2.7422,
"step": 191500
},
{
"epoch": 0.63,
"learning_rate": 1.82727877886663e-05,
"loss": 2.7427,
"step": 191600
},
{
"epoch": 0.63,
"learning_rate": 1.8256228700873326e-05,
"loss": 2.7504,
"step": 191700
},
{
"epoch": 0.64,
"learning_rate": 1.8239669613080356e-05,
"loss": 2.7348,
"step": 191800
},
{
"epoch": 0.64,
"learning_rate": 1.8223110525287383e-05,
"loss": 2.7355,
"step": 191900
},
{
"epoch": 0.64,
"learning_rate": 1.8206551437494413e-05,
"loss": 2.7447,
"step": 192000
},
{
"epoch": 0.64,
"learning_rate": 1.818999234970144e-05,
"loss": 2.7517,
"step": 192100
},
{
"epoch": 0.64,
"learning_rate": 1.8173433261908467e-05,
"loss": 2.7496,
"step": 192200
},
{
"epoch": 0.64,
"learning_rate": 1.8156874174115497e-05,
"loss": 2.7394,
"step": 192300
},
{
"epoch": 0.64,
"learning_rate": 1.8140315086322528e-05,
"loss": 2.7283,
"step": 192400
},
{
"epoch": 0.64,
"learning_rate": 1.8123755998529555e-05,
"loss": 2.7389,
"step": 192500
},
{
"epoch": 0.64,
"learning_rate": 1.810719691073658e-05,
"loss": 2.7368,
"step": 192600
},
{
"epoch": 0.64,
"learning_rate": 1.809063782294361e-05,
"loss": 2.7407,
"step": 192700
},
{
"epoch": 0.64,
"learning_rate": 1.807407873515064e-05,
"loss": 2.7288,
"step": 192800
},
{
"epoch": 0.64,
"learning_rate": 1.805751964735767e-05,
"loss": 2.7248,
"step": 192900
},
{
"epoch": 0.64,
"learning_rate": 1.8040960559564696e-05,
"loss": 2.7259,
"step": 193000
},
{
"epoch": 0.64,
"learning_rate": 1.8024401471771723e-05,
"loss": 2.7475,
"step": 193100
},
{
"epoch": 0.64,
"learning_rate": 1.800784238397875e-05,
"loss": 2.7297,
"step": 193200
},
{
"epoch": 0.64,
"learning_rate": 1.7991283296185784e-05,
"loss": 2.7498,
"step": 193300
},
{
"epoch": 0.64,
"learning_rate": 1.797472420839281e-05,
"loss": 2.7517,
"step": 193400
},
{
"epoch": 0.64,
"learning_rate": 1.7958165120599837e-05,
"loss": 2.7365,
"step": 193500
},
{
"epoch": 0.64,
"learning_rate": 1.7941606032806864e-05,
"loss": 2.7297,
"step": 193600
},
{
"epoch": 0.64,
"learning_rate": 1.7925046945013895e-05,
"loss": 2.7415,
"step": 193700
},
{
"epoch": 0.64,
"learning_rate": 1.7908487857220925e-05,
"loss": 2.7242,
"step": 193800
},
{
"epoch": 0.64,
"learning_rate": 1.7891928769427952e-05,
"loss": 2.7425,
"step": 193900
},
{
"epoch": 0.64,
"learning_rate": 1.787536968163498e-05,
"loss": 2.7562,
"step": 194000
},
{
"epoch": 0.64,
"learning_rate": 1.7858810593842006e-05,
"loss": 2.727,
"step": 194100
},
{
"epoch": 0.64,
"learning_rate": 1.7842251506049036e-05,
"loss": 2.7497,
"step": 194200
},
{
"epoch": 0.64,
"learning_rate": 1.7825692418256066e-05,
"loss": 2.7484,
"step": 194300
},
{
"epoch": 0.64,
"learning_rate": 1.7809133330463093e-05,
"loss": 2.7343,
"step": 194400
},
{
"epoch": 0.64,
"learning_rate": 1.779257424267012e-05,
"loss": 2.7723,
"step": 194500
},
{
"epoch": 0.64,
"learning_rate": 1.777601515487715e-05,
"loss": 2.7399,
"step": 194600
},
{
"epoch": 0.64,
"learning_rate": 1.7759456067084177e-05,
"loss": 2.7376,
"step": 194700
},
{
"epoch": 0.65,
"learning_rate": 1.7742896979291208e-05,
"loss": 2.7485,
"step": 194800
},
{
"epoch": 0.65,
"learning_rate": 1.7726337891498234e-05,
"loss": 2.7326,
"step": 194900
},
{
"epoch": 0.65,
"learning_rate": 1.770977880370526e-05,
"loss": 2.7403,
"step": 195000
},
{
"epoch": 0.65,
"learning_rate": 1.769321971591229e-05,
"loss": 2.7306,
"step": 195100
},
{
"epoch": 0.65,
"learning_rate": 1.767666062811932e-05,
"loss": 2.7347,
"step": 195200
},
{
"epoch": 0.65,
"learning_rate": 1.766010154032635e-05,
"loss": 2.7404,
"step": 195300
},
{
"epoch": 0.65,
"learning_rate": 1.7643542452533376e-05,
"loss": 2.7213,
"step": 195400
},
{
"epoch": 0.65,
"learning_rate": 1.7626983364740403e-05,
"loss": 2.7374,
"step": 195500
},
{
"epoch": 0.65,
"learning_rate": 1.7610424276947433e-05,
"loss": 2.7227,
"step": 195600
},
{
"epoch": 0.65,
"learning_rate": 1.759386518915446e-05,
"loss": 2.7226,
"step": 195700
},
{
"epoch": 0.65,
"learning_rate": 1.757730610136149e-05,
"loss": 2.7351,
"step": 195800
},
{
"epoch": 0.65,
"learning_rate": 1.7560747013568517e-05,
"loss": 2.7491,
"step": 195900
},
{
"epoch": 0.65,
"learning_rate": 1.7544187925775547e-05,
"loss": 2.7374,
"step": 196000
},
{
"epoch": 0.65,
"learning_rate": 1.7527628837982574e-05,
"loss": 2.7398,
"step": 196100
},
{
"epoch": 0.65,
"learning_rate": 1.75110697501896e-05,
"loss": 2.7369,
"step": 196200
},
{
"epoch": 0.65,
"learning_rate": 1.749451066239663e-05,
"loss": 2.7385,
"step": 196300
},
{
"epoch": 0.65,
"learning_rate": 1.747795157460366e-05,
"loss": 2.7371,
"step": 196400
},
{
"epoch": 0.65,
"learning_rate": 1.746139248681069e-05,
"loss": 2.7375,
"step": 196500
},
{
"epoch": 0.65,
"learning_rate": 1.7444833399017716e-05,
"loss": 2.7333,
"step": 196600
},
{
"epoch": 0.65,
"learning_rate": 1.7428274311224743e-05,
"loss": 2.7297,
"step": 196700
},
{
"epoch": 0.65,
"learning_rate": 1.7411715223431773e-05,
"loss": 2.7652,
"step": 196800
},
{
"epoch": 0.65,
"learning_rate": 1.7395156135638803e-05,
"loss": 2.7275,
"step": 196900
},
{
"epoch": 0.65,
"learning_rate": 1.737859704784583e-05,
"loss": 2.7314,
"step": 197000
},
{
"epoch": 0.65,
"learning_rate": 1.7362037960052857e-05,
"loss": 2.7405,
"step": 197100
},
{
"epoch": 0.65,
"learning_rate": 1.7345478872259884e-05,
"loss": 2.7291,
"step": 197200
},
{
"epoch": 0.65,
"learning_rate": 1.7328919784466914e-05,
"loss": 2.7445,
"step": 197300
},
{
"epoch": 0.65,
"learning_rate": 1.7312360696673944e-05,
"loss": 2.7509,
"step": 197400
},
{
"epoch": 0.65,
"learning_rate": 1.729580160888097e-05,
"loss": 2.7429,
"step": 197500
},
{
"epoch": 0.65,
"learning_rate": 1.7279242521088e-05,
"loss": 2.7431,
"step": 197600
},
{
"epoch": 0.65,
"learning_rate": 1.7262683433295025e-05,
"loss": 2.7473,
"step": 197700
},
{
"epoch": 0.66,
"learning_rate": 1.7246124345502056e-05,
"loss": 2.7478,
"step": 197800
},
{
"epoch": 0.66,
"learning_rate": 1.7229565257709086e-05,
"loss": 2.7408,
"step": 197900
},
{
"epoch": 0.66,
"learning_rate": 1.7213006169916113e-05,
"loss": 2.7434,
"step": 198000
},
{
"epoch": 0.66,
"learning_rate": 1.719644708212314e-05,
"loss": 2.7364,
"step": 198100
},
{
"epoch": 0.66,
"learning_rate": 1.717988799433017e-05,
"loss": 2.7433,
"step": 198200
},
{
"epoch": 0.66,
"learning_rate": 1.71633289065372e-05,
"loss": 2.7405,
"step": 198300
},
{
"epoch": 0.66,
"learning_rate": 1.7146769818744227e-05,
"loss": 2.7361,
"step": 198400
},
{
"epoch": 0.66,
"learning_rate": 1.7130210730951254e-05,
"loss": 2.7447,
"step": 198500
},
{
"epoch": 0.66,
"learning_rate": 1.711365164315828e-05,
"loss": 2.7294,
"step": 198600
},
{
"epoch": 0.66,
"learning_rate": 1.709709255536531e-05,
"loss": 2.731,
"step": 198700
},
{
"epoch": 0.66,
"learning_rate": 1.708053346757234e-05,
"loss": 2.7371,
"step": 198800
},
{
"epoch": 0.66,
"learning_rate": 1.706397437977937e-05,
"loss": 2.7419,
"step": 198900
},
{
"epoch": 0.66,
"learning_rate": 1.7047415291986395e-05,
"loss": 2.749,
"step": 199000
},
{
"epoch": 0.66,
"learning_rate": 1.7030856204193422e-05,
"loss": 2.7477,
"step": 199100
},
{
"epoch": 0.66,
"learning_rate": 1.7014297116400453e-05,
"loss": 2.7424,
"step": 199200
},
{
"epoch": 0.66,
"learning_rate": 1.6997738028607483e-05,
"loss": 2.7354,
"step": 199300
},
{
"epoch": 0.66,
"learning_rate": 1.698117894081451e-05,
"loss": 2.7491,
"step": 199400
},
{
"epoch": 0.66,
"learning_rate": 1.6964619853021537e-05,
"loss": 2.751,
"step": 199500
},
{
"epoch": 0.66,
"learning_rate": 1.6948060765228567e-05,
"loss": 2.7344,
"step": 199600
},
{
"epoch": 0.66,
"learning_rate": 1.6931501677435594e-05,
"loss": 2.7399,
"step": 199700
},
{
"epoch": 0.66,
"learning_rate": 1.6914942589642624e-05,
"loss": 2.7261,
"step": 199800
},
{
"epoch": 0.66,
"learning_rate": 1.689838350184965e-05,
"loss": 2.7282,
"step": 199900
},
{
"epoch": 0.66,
"learning_rate": 1.6881824414056678e-05,
"loss": 2.7567,
"step": 200000
},
{
"epoch": 0.66,
"learning_rate": 1.686526532626371e-05,
"loss": 2.7304,
"step": 200100
},
{
"epoch": 0.66,
"learning_rate": 1.6848706238470735e-05,
"loss": 2.7329,
"step": 200200
},
{
"epoch": 0.66,
"learning_rate": 1.6832147150677766e-05,
"loss": 2.7499,
"step": 200300
},
{
"epoch": 0.66,
"learning_rate": 1.6815588062884792e-05,
"loss": 2.7355,
"step": 200400
},
{
"epoch": 0.66,
"learning_rate": 1.6799028975091823e-05,
"loss": 2.7495,
"step": 200500
},
{
"epoch": 0.66,
"learning_rate": 1.678246988729885e-05,
"loss": 2.7438,
"step": 200600
},
{
"epoch": 0.66,
"learning_rate": 1.6765910799505877e-05,
"loss": 2.7387,
"step": 200700
},
{
"epoch": 0.67,
"learning_rate": 1.6749351711712907e-05,
"loss": 2.7485,
"step": 200800
},
{
"epoch": 0.67,
"learning_rate": 1.6732792623919934e-05,
"loss": 2.7428,
"step": 200900
},
{
"epoch": 0.67,
"learning_rate": 1.6716233536126964e-05,
"loss": 2.7364,
"step": 201000
},
{
"epoch": 0.67,
"learning_rate": 1.669967444833399e-05,
"loss": 2.7413,
"step": 201100
},
{
"epoch": 0.67,
"learning_rate": 1.6683115360541018e-05,
"loss": 2.7507,
"step": 201200
},
{
"epoch": 0.67,
"learning_rate": 1.6666556272748048e-05,
"loss": 2.713,
"step": 201300
},
{
"epoch": 0.67,
"learning_rate": 1.6649997184955075e-05,
"loss": 2.7451,
"step": 201400
},
{
"epoch": 0.67,
"learning_rate": 1.6633438097162105e-05,
"loss": 2.7487,
"step": 201500
},
{
"epoch": 0.67,
"learning_rate": 1.6616879009369132e-05,
"loss": 2.7377,
"step": 201600
},
{
"epoch": 0.67,
"learning_rate": 1.660031992157616e-05,
"loss": 2.7419,
"step": 201700
},
{
"epoch": 0.67,
"learning_rate": 1.658376083378319e-05,
"loss": 2.7301,
"step": 201800
},
{
"epoch": 0.67,
"learning_rate": 1.656720174599022e-05,
"loss": 2.7458,
"step": 201900
},
{
"epoch": 0.67,
"learning_rate": 1.6550642658197247e-05,
"loss": 2.7329,
"step": 202000
},
{
"epoch": 0.67,
"learning_rate": 1.6534083570404274e-05,
"loss": 2.7547,
"step": 202100
},
{
"epoch": 0.67,
"learning_rate": 1.65175244826113e-05,
"loss": 2.7139,
"step": 202200
},
{
"epoch": 0.67,
"learning_rate": 1.650096539481833e-05,
"loss": 2.7398,
"step": 202300
},
{
"epoch": 0.67,
"learning_rate": 1.648440630702536e-05,
"loss": 2.7443,
"step": 202400
},
{
"epoch": 0.67,
"learning_rate": 1.6467847219232388e-05,
"loss": 2.7437,
"step": 202500
},
{
"epoch": 0.67,
"learning_rate": 1.6451288131439415e-05,
"loss": 2.7371,
"step": 202600
},
{
"epoch": 0.67,
"learning_rate": 1.6434729043646442e-05,
"loss": 2.7587,
"step": 202700
},
{
"epoch": 0.67,
"learning_rate": 1.6418169955853476e-05,
"loss": 2.7479,
"step": 202800
},
{
"epoch": 0.67,
"learning_rate": 1.6401610868060503e-05,
"loss": 2.738,
"step": 202900
},
{
"epoch": 0.67,
"learning_rate": 1.638505178026753e-05,
"loss": 2.7384,
"step": 203000
},
{
"epoch": 0.67,
"learning_rate": 1.6368492692474556e-05,
"loss": 2.7542,
"step": 203100
},
{
"epoch": 0.67,
"learning_rate": 1.6351933604681587e-05,
"loss": 2.7438,
"step": 203200
},
{
"epoch": 0.67,
"learning_rate": 1.6335374516888617e-05,
"loss": 2.74,
"step": 203300
},
{
"epoch": 0.67,
"learning_rate": 1.6318815429095644e-05,
"loss": 2.7262,
"step": 203400
},
{
"epoch": 0.67,
"learning_rate": 1.630225634130267e-05,
"loss": 2.7397,
"step": 203500
},
{
"epoch": 0.67,
"learning_rate": 1.6285697253509698e-05,
"loss": 2.7411,
"step": 203600
},
{
"epoch": 0.67,
"learning_rate": 1.6269138165716728e-05,
"loss": 2.7475,
"step": 203700
},
{
"epoch": 0.67,
"learning_rate": 1.625257907792376e-05,
"loss": 2.7394,
"step": 203800
},
{
"epoch": 0.68,
"learning_rate": 1.6236019990130785e-05,
"loss": 2.727,
"step": 203900
},
{
"epoch": 0.68,
"learning_rate": 1.6219460902337812e-05,
"loss": 2.7518,
"step": 204000
},
{
"epoch": 0.68,
"learning_rate": 1.6202901814544842e-05,
"loss": 2.7461,
"step": 204100
},
{
"epoch": 0.68,
"learning_rate": 1.618634272675187e-05,
"loss": 2.734,
"step": 204200
},
{
"epoch": 0.68,
"learning_rate": 1.61697836389589e-05,
"loss": 2.7206,
"step": 204300
},
{
"epoch": 0.68,
"learning_rate": 1.6153224551165927e-05,
"loss": 2.762,
"step": 204400
},
{
"epoch": 0.68,
"learning_rate": 1.6136665463372953e-05,
"loss": 2.7351,
"step": 204500
},
{
"epoch": 0.68,
"learning_rate": 1.6120106375579984e-05,
"loss": 2.7437,
"step": 204600
},
{
"epoch": 0.68,
"learning_rate": 1.610354728778701e-05,
"loss": 2.7519,
"step": 204700
},
{
"epoch": 0.68,
"learning_rate": 1.608698819999404e-05,
"loss": 2.7358,
"step": 204800
},
{
"epoch": 0.68,
"learning_rate": 1.6070429112201068e-05,
"loss": 2.7435,
"step": 204900
},
{
"epoch": 0.68,
"learning_rate": 1.6053870024408095e-05,
"loss": 2.7644,
"step": 205000
},
{
"epoch": 0.68,
"learning_rate": 1.6037310936615125e-05,
"loss": 2.7336,
"step": 205100
},
{
"epoch": 0.68,
"learning_rate": 1.6020751848822152e-05,
"loss": 2.7355,
"step": 205200
},
{
"epoch": 0.68,
"learning_rate": 1.6004192761029182e-05,
"loss": 2.7354,
"step": 205300
},
{
"epoch": 0.68,
"learning_rate": 1.598763367323621e-05,
"loss": 2.7431,
"step": 205400
},
{
"epoch": 0.68,
"learning_rate": 1.597107458544324e-05,
"loss": 2.7423,
"step": 205500
},
{
"epoch": 0.68,
"learning_rate": 1.5954515497650266e-05,
"loss": 2.7402,
"step": 205600
},
{
"epoch": 0.68,
"learning_rate": 1.5937956409857293e-05,
"loss": 2.7426,
"step": 205700
},
{
"epoch": 0.68,
"learning_rate": 1.5921397322064324e-05,
"loss": 2.7303,
"step": 205800
},
{
"epoch": 0.68,
"learning_rate": 1.590483823427135e-05,
"loss": 2.7449,
"step": 205900
},
{
"epoch": 0.68,
"learning_rate": 1.588827914647838e-05,
"loss": 2.7423,
"step": 206000
},
{
"epoch": 0.68,
"learning_rate": 1.5871720058685408e-05,
"loss": 2.7294,
"step": 206100
},
{
"epoch": 0.68,
"learning_rate": 1.5855160970892435e-05,
"loss": 2.7421,
"step": 206200
},
{
"epoch": 0.68,
"learning_rate": 1.5838601883099465e-05,
"loss": 2.7409,
"step": 206300
},
{
"epoch": 0.68,
"learning_rate": 1.5822042795306495e-05,
"loss": 2.7481,
"step": 206400
},
{
"epoch": 0.68,
"learning_rate": 1.5805483707513522e-05,
"loss": 2.7521,
"step": 206500
},
{
"epoch": 0.68,
"learning_rate": 1.578892461972055e-05,
"loss": 2.7391,
"step": 206600
},
{
"epoch": 0.68,
"learning_rate": 1.5772365531927576e-05,
"loss": 2.7332,
"step": 206700
},
{
"epoch": 0.68,
"learning_rate": 1.5755806444134606e-05,
"loss": 2.7301,
"step": 206800
},
{
"epoch": 0.69,
"learning_rate": 1.5739247356341637e-05,
"loss": 2.7335,
"step": 206900
},
{
"epoch": 0.69,
"learning_rate": 1.5722688268548664e-05,
"loss": 2.7397,
"step": 207000
},
{
"epoch": 0.69,
"learning_rate": 1.570612918075569e-05,
"loss": 2.7328,
"step": 207100
},
{
"epoch": 0.69,
"learning_rate": 1.5689570092962717e-05,
"loss": 2.745,
"step": 207200
},
{
"epoch": 0.69,
"learning_rate": 1.5673011005169748e-05,
"loss": 2.7366,
"step": 207300
},
{
"epoch": 0.69,
"learning_rate": 1.5656451917376778e-05,
"loss": 2.7424,
"step": 207400
},
{
"epoch": 0.69,
"learning_rate": 1.5639892829583805e-05,
"loss": 2.7485,
"step": 207500
},
{
"epoch": 0.69,
"learning_rate": 1.5623333741790832e-05,
"loss": 2.7466,
"step": 207600
},
{
"epoch": 0.69,
"learning_rate": 1.5606774653997862e-05,
"loss": 2.7467,
"step": 207700
},
{
"epoch": 0.69,
"learning_rate": 1.5590215566204892e-05,
"loss": 2.7453,
"step": 207800
},
{
"epoch": 0.69,
"learning_rate": 1.557365647841192e-05,
"loss": 2.7395,
"step": 207900
},
{
"epoch": 0.69,
"learning_rate": 1.5557097390618946e-05,
"loss": 2.748,
"step": 208000
},
{
"epoch": 0.69,
"learning_rate": 1.5540538302825973e-05,
"loss": 2.7507,
"step": 208100
},
{
"epoch": 0.69,
"learning_rate": 1.5523979215033003e-05,
"loss": 2.7468,
"step": 208200
},
{
"epoch": 0.69,
"learning_rate": 1.5507420127240034e-05,
"loss": 2.7397,
"step": 208300
},
{
"epoch": 0.69,
"learning_rate": 1.549086103944706e-05,
"loss": 2.7297,
"step": 208400
},
{
"epoch": 0.69,
"learning_rate": 1.5474301951654088e-05,
"loss": 2.7447,
"step": 208500
},
{
"epoch": 0.69,
"learning_rate": 1.5457742863861114e-05,
"loss": 2.7362,
"step": 208600
},
{
"epoch": 0.69,
"learning_rate": 1.5441183776068145e-05,
"loss": 2.7343,
"step": 208700
},
{
"epoch": 0.69,
"learning_rate": 1.5424624688275175e-05,
"loss": 2.7268,
"step": 208800
},
{
"epoch": 0.69,
"learning_rate": 1.5408065600482202e-05,
"loss": 2.7216,
"step": 208900
},
{
"epoch": 0.69,
"learning_rate": 1.539150651268923e-05,
"loss": 2.7378,
"step": 209000
},
{
"epoch": 0.69,
"learning_rate": 1.537494742489626e-05,
"loss": 2.7276,
"step": 209100
},
{
"epoch": 0.69,
"learning_rate": 1.5358388337103286e-05,
"loss": 2.7316,
"step": 209200
},
{
"epoch": 0.69,
"learning_rate": 1.5341829249310316e-05,
"loss": 2.7451,
"step": 209300
},
{
"epoch": 0.69,
"learning_rate": 1.5325270161517343e-05,
"loss": 2.7364,
"step": 209400
},
{
"epoch": 0.69,
"learning_rate": 1.530871107372437e-05,
"loss": 2.7355,
"step": 209500
},
{
"epoch": 0.69,
"learning_rate": 1.52921519859314e-05,
"loss": 2.7339,
"step": 209600
},
{
"epoch": 0.69,
"learning_rate": 1.5275592898138427e-05,
"loss": 2.7487,
"step": 209700
},
{
"epoch": 0.69,
"learning_rate": 1.5259033810345458e-05,
"loss": 2.7462,
"step": 209800
},
{
"epoch": 0.7,
"learning_rate": 1.5242474722552486e-05,
"loss": 2.7466,
"step": 209900
},
{
"epoch": 0.7,
"learning_rate": 1.5225915634759513e-05,
"loss": 2.7356,
"step": 210000
},
{
"epoch": 0.7,
"learning_rate": 1.5209356546966542e-05,
"loss": 2.7226,
"step": 210100
},
{
"epoch": 0.7,
"learning_rate": 1.5192797459173569e-05,
"loss": 2.7362,
"step": 210200
},
{
"epoch": 0.7,
"learning_rate": 1.5176238371380599e-05,
"loss": 2.7423,
"step": 210300
},
{
"epoch": 0.7,
"learning_rate": 1.5159679283587628e-05,
"loss": 2.7491,
"step": 210400
},
{
"epoch": 0.7,
"learning_rate": 1.5143120195794655e-05,
"loss": 2.7306,
"step": 210500
},
{
"epoch": 0.7,
"learning_rate": 1.5126561108001683e-05,
"loss": 2.7319,
"step": 210600
},
{
"epoch": 0.7,
"learning_rate": 1.511000202020871e-05,
"loss": 2.7384,
"step": 210700
},
{
"epoch": 0.7,
"learning_rate": 1.509344293241574e-05,
"loss": 2.7373,
"step": 210800
},
{
"epoch": 0.7,
"learning_rate": 1.5076883844622769e-05,
"loss": 2.7249,
"step": 210900
},
{
"epoch": 0.7,
"learning_rate": 1.5060324756829796e-05,
"loss": 2.743,
"step": 211000
},
{
"epoch": 0.7,
"learning_rate": 1.5043765669036824e-05,
"loss": 2.7335,
"step": 211100
},
{
"epoch": 0.7,
"learning_rate": 1.5027206581243853e-05,
"loss": 2.7283,
"step": 211200
},
{
"epoch": 0.7,
"learning_rate": 1.5010647493450883e-05,
"loss": 2.734,
"step": 211300
},
{
"epoch": 0.7,
"learning_rate": 1.499408840565791e-05,
"loss": 2.731,
"step": 211400
},
{
"epoch": 0.7,
"learning_rate": 1.4977529317864939e-05,
"loss": 2.7261,
"step": 211500
},
{
"epoch": 0.7,
"learning_rate": 1.4960970230071966e-05,
"loss": 2.733,
"step": 211600
},
{
"epoch": 0.7,
"learning_rate": 1.4944411142278994e-05,
"loss": 2.7482,
"step": 211700
},
{
"epoch": 0.7,
"learning_rate": 1.4927852054486025e-05,
"loss": 2.7321,
"step": 211800
},
{
"epoch": 0.7,
"learning_rate": 1.4911292966693052e-05,
"loss": 2.7307,
"step": 211900
},
{
"epoch": 0.7,
"learning_rate": 1.489473387890008e-05,
"loss": 2.7265,
"step": 212000
},
{
"epoch": 0.7,
"learning_rate": 1.4878174791107107e-05,
"loss": 2.7469,
"step": 212100
},
{
"epoch": 0.7,
"learning_rate": 1.4861615703314136e-05,
"loss": 2.7325,
"step": 212200
},
{
"epoch": 0.7,
"learning_rate": 1.4845056615521166e-05,
"loss": 2.7321,
"step": 212300
},
{
"epoch": 0.7,
"learning_rate": 1.4828497527728195e-05,
"loss": 2.7393,
"step": 212400
},
{
"epoch": 0.7,
"learning_rate": 1.4811938439935222e-05,
"loss": 2.7369,
"step": 212500
},
{
"epoch": 0.7,
"learning_rate": 1.479537935214225e-05,
"loss": 2.7519,
"step": 212600
},
{
"epoch": 0.7,
"learning_rate": 1.4778820264349277e-05,
"loss": 2.7335,
"step": 212700
},
{
"epoch": 0.7,
"learning_rate": 1.4762261176556307e-05,
"loss": 2.7435,
"step": 212800
},
{
"epoch": 0.71,
"learning_rate": 1.4745702088763336e-05,
"loss": 2.7406,
"step": 212900
},
{
"epoch": 0.71,
"learning_rate": 1.4729143000970363e-05,
"loss": 2.7373,
"step": 213000
},
{
"epoch": 0.71,
"learning_rate": 1.4712583913177392e-05,
"loss": 2.7298,
"step": 213100
},
{
"epoch": 0.71,
"learning_rate": 1.4696024825384418e-05,
"loss": 2.7432,
"step": 213200
},
{
"epoch": 0.71,
"learning_rate": 1.467946573759145e-05,
"loss": 2.7399,
"step": 213300
},
{
"epoch": 0.71,
"learning_rate": 1.4662906649798477e-05,
"loss": 2.7334,
"step": 213400
},
{
"epoch": 0.71,
"learning_rate": 1.4646347562005506e-05,
"loss": 2.721,
"step": 213500
},
{
"epoch": 0.71,
"learning_rate": 1.4629788474212533e-05,
"loss": 2.7371,
"step": 213600
},
{
"epoch": 0.71,
"learning_rate": 1.4613229386419561e-05,
"loss": 2.7389,
"step": 213700
},
{
"epoch": 0.71,
"learning_rate": 1.4596670298626592e-05,
"loss": 2.7398,
"step": 213800
},
{
"epoch": 0.71,
"learning_rate": 1.4580111210833619e-05,
"loss": 2.7177,
"step": 213900
},
{
"epoch": 0.71,
"learning_rate": 1.4563552123040647e-05,
"loss": 2.7336,
"step": 214000
},
{
"epoch": 0.71,
"learning_rate": 1.4546993035247674e-05,
"loss": 2.7318,
"step": 214100
},
{
"epoch": 0.71,
"learning_rate": 1.4530433947454703e-05,
"loss": 2.7353,
"step": 214200
},
{
"epoch": 0.71,
"learning_rate": 1.4513874859661733e-05,
"loss": 2.736,
"step": 214300
},
{
"epoch": 0.71,
"learning_rate": 1.449731577186876e-05,
"loss": 2.7365,
"step": 214400
},
{
"epoch": 0.71,
"learning_rate": 1.4480756684075789e-05,
"loss": 2.7302,
"step": 214500
},
{
"epoch": 0.71,
"learning_rate": 1.4464197596282816e-05,
"loss": 2.7378,
"step": 214600
},
{
"epoch": 0.71,
"learning_rate": 1.4447638508489844e-05,
"loss": 2.7422,
"step": 214700
},
{
"epoch": 0.71,
"learning_rate": 1.4431079420696874e-05,
"loss": 2.7343,
"step": 214800
},
{
"epoch": 0.71,
"learning_rate": 1.4414520332903903e-05,
"loss": 2.743,
"step": 214900
},
{
"epoch": 0.71,
"learning_rate": 1.439796124511093e-05,
"loss": 2.7572,
"step": 215000
},
{
"epoch": 0.71,
"learning_rate": 1.4381402157317959e-05,
"loss": 2.7367,
"step": 215100
},
{
"epoch": 0.71,
"learning_rate": 1.4364843069524985e-05,
"loss": 2.7367,
"step": 215200
},
{
"epoch": 0.71,
"learning_rate": 1.4348283981732016e-05,
"loss": 2.7333,
"step": 215300
},
{
"epoch": 0.71,
"learning_rate": 1.4331724893939044e-05,
"loss": 2.7396,
"step": 215400
},
{
"epoch": 0.71,
"learning_rate": 1.4315165806146071e-05,
"loss": 2.737,
"step": 215500
},
{
"epoch": 0.71,
"learning_rate": 1.42986067183531e-05,
"loss": 2.7206,
"step": 215600
},
{
"epoch": 0.71,
"learning_rate": 1.4282047630560127e-05,
"loss": 2.7255,
"step": 215700
},
{
"epoch": 0.71,
"learning_rate": 1.4265488542767159e-05,
"loss": 2.7462,
"step": 215800
},
{
"epoch": 0.72,
"learning_rate": 1.4248929454974186e-05,
"loss": 2.7506,
"step": 215900
},
{
"epoch": 0.72,
"learning_rate": 1.4232370367181214e-05,
"loss": 2.7401,
"step": 216000
},
{
"epoch": 0.72,
"learning_rate": 1.4215811279388241e-05,
"loss": 2.7385,
"step": 216100
},
{
"epoch": 0.72,
"learning_rate": 1.419925219159527e-05,
"loss": 2.7381,
"step": 216200
},
{
"epoch": 0.72,
"learning_rate": 1.41826931038023e-05,
"loss": 2.7312,
"step": 216300
},
{
"epoch": 0.72,
"learning_rate": 1.4166134016009327e-05,
"loss": 2.7415,
"step": 216400
},
{
"epoch": 0.72,
"learning_rate": 1.4149574928216356e-05,
"loss": 2.7345,
"step": 216500
},
{
"epoch": 0.72,
"learning_rate": 1.4133015840423383e-05,
"loss": 2.748,
"step": 216600
},
{
"epoch": 0.72,
"learning_rate": 1.4116456752630411e-05,
"loss": 2.748,
"step": 216700
},
{
"epoch": 0.72,
"learning_rate": 1.4099897664837441e-05,
"loss": 2.7453,
"step": 216800
},
{
"epoch": 0.72,
"learning_rate": 1.408333857704447e-05,
"loss": 2.7378,
"step": 216900
},
{
"epoch": 0.72,
"learning_rate": 1.4066779489251497e-05,
"loss": 2.7328,
"step": 217000
},
{
"epoch": 0.72,
"learning_rate": 1.4050220401458526e-05,
"loss": 2.7334,
"step": 217100
},
{
"epoch": 0.72,
"learning_rate": 1.4033661313665552e-05,
"loss": 2.7476,
"step": 217200
},
{
"epoch": 0.72,
"learning_rate": 1.4017102225872583e-05,
"loss": 2.7333,
"step": 217300
},
{
"epoch": 0.72,
"learning_rate": 1.4000543138079611e-05,
"loss": 2.7417,
"step": 217400
},
{
"epoch": 0.72,
"learning_rate": 1.3983984050286638e-05,
"loss": 2.741,
"step": 217500
},
{
"epoch": 0.72,
"learning_rate": 1.3967424962493667e-05,
"loss": 2.7236,
"step": 217600
},
{
"epoch": 0.72,
"learning_rate": 1.3950865874700694e-05,
"loss": 2.7387,
"step": 217700
},
{
"epoch": 0.72,
"learning_rate": 1.3934306786907724e-05,
"loss": 2.7393,
"step": 217800
},
{
"epoch": 0.72,
"learning_rate": 1.3917747699114753e-05,
"loss": 2.7202,
"step": 217900
},
{
"epoch": 0.72,
"learning_rate": 1.390118861132178e-05,
"loss": 2.744,
"step": 218000
},
{
"epoch": 0.72,
"learning_rate": 1.3884629523528808e-05,
"loss": 2.7358,
"step": 218100
},
{
"epoch": 0.72,
"learning_rate": 1.3868070435735835e-05,
"loss": 2.7504,
"step": 218200
},
{
"epoch": 0.72,
"learning_rate": 1.3851511347942867e-05,
"loss": 2.7312,
"step": 218300
},
{
"epoch": 0.72,
"learning_rate": 1.3834952260149894e-05,
"loss": 2.7199,
"step": 218400
},
{
"epoch": 0.72,
"learning_rate": 1.3818393172356923e-05,
"loss": 2.7406,
"step": 218500
},
{
"epoch": 0.72,
"learning_rate": 1.380183408456395e-05,
"loss": 2.7419,
"step": 218600
},
{
"epoch": 0.72,
"learning_rate": 1.3785274996770978e-05,
"loss": 2.742,
"step": 218700
},
{
"epoch": 0.72,
"learning_rate": 1.3768715908978008e-05,
"loss": 2.7436,
"step": 218800
},
{
"epoch": 0.72,
"learning_rate": 1.3752156821185035e-05,
"loss": 2.7303,
"step": 218900
},
{
"epoch": 0.73,
"learning_rate": 1.3735597733392064e-05,
"loss": 2.7314,
"step": 219000
},
{
"epoch": 0.73,
"learning_rate": 1.3719038645599091e-05,
"loss": 2.7448,
"step": 219100
},
{
"epoch": 0.73,
"learning_rate": 1.370247955780612e-05,
"loss": 2.7358,
"step": 219200
},
{
"epoch": 0.73,
"learning_rate": 1.368592047001315e-05,
"loss": 2.7307,
"step": 219300
},
{
"epoch": 0.73,
"learning_rate": 1.3669361382220178e-05,
"loss": 2.7211,
"step": 219400
},
{
"epoch": 0.73,
"learning_rate": 1.3652802294427205e-05,
"loss": 2.7314,
"step": 219500
},
{
"epoch": 0.73,
"learning_rate": 1.3636243206634234e-05,
"loss": 2.7512,
"step": 219600
},
{
"epoch": 0.73,
"learning_rate": 1.361968411884126e-05,
"loss": 2.7242,
"step": 219700
},
{
"epoch": 0.73,
"learning_rate": 1.3603125031048291e-05,
"loss": 2.7493,
"step": 219800
},
{
"epoch": 0.73,
"learning_rate": 1.358656594325532e-05,
"loss": 2.7336,
"step": 219900
},
{
"epoch": 0.73,
"learning_rate": 1.3570006855462347e-05,
"loss": 2.7397,
"step": 220000
},
{
"epoch": 0.73,
"learning_rate": 1.3553447767669375e-05,
"loss": 2.7303,
"step": 220100
},
{
"epoch": 0.73,
"learning_rate": 1.3536888679876402e-05,
"loss": 2.7419,
"step": 220200
},
{
"epoch": 0.73,
"learning_rate": 1.3520329592083432e-05,
"loss": 2.7435,
"step": 220300
},
{
"epoch": 0.73,
"learning_rate": 1.3503770504290461e-05,
"loss": 2.7441,
"step": 220400
},
{
"epoch": 0.73,
"learning_rate": 1.348721141649749e-05,
"loss": 2.7431,
"step": 220500
},
{
"epoch": 0.73,
"learning_rate": 1.3470652328704517e-05,
"loss": 2.7252,
"step": 220600
},
{
"epoch": 0.73,
"learning_rate": 1.3454093240911545e-05,
"loss": 2.7465,
"step": 220700
},
{
"epoch": 0.73,
"learning_rate": 1.3437534153118576e-05,
"loss": 2.747,
"step": 220800
},
{
"epoch": 0.73,
"learning_rate": 1.3420975065325602e-05,
"loss": 2.7359,
"step": 220900
},
{
"epoch": 0.73,
"learning_rate": 1.3404415977532631e-05,
"loss": 2.7248,
"step": 221000
},
{
"epoch": 0.73,
"learning_rate": 1.3387856889739658e-05,
"loss": 2.7419,
"step": 221100
},
{
"epoch": 0.73,
"learning_rate": 1.3371297801946687e-05,
"loss": 2.7386,
"step": 221200
},
{
"epoch": 0.73,
"learning_rate": 1.3354738714153717e-05,
"loss": 2.7346,
"step": 221300
},
{
"epoch": 0.73,
"learning_rate": 1.3338179626360744e-05,
"loss": 2.7413,
"step": 221400
},
{
"epoch": 0.73,
"learning_rate": 1.3321620538567772e-05,
"loss": 2.7421,
"step": 221500
},
{
"epoch": 0.73,
"learning_rate": 1.33050614507748e-05,
"loss": 2.7441,
"step": 221600
},
{
"epoch": 0.73,
"learning_rate": 1.3288502362981828e-05,
"loss": 2.7452,
"step": 221700
},
{
"epoch": 0.73,
"learning_rate": 1.3271943275188858e-05,
"loss": 2.7443,
"step": 221800
},
{
"epoch": 0.73,
"learning_rate": 1.3255384187395887e-05,
"loss": 2.7384,
"step": 221900
},
{
"epoch": 0.74,
"learning_rate": 1.3238825099602914e-05,
"loss": 2.741,
"step": 222000
},
{
"epoch": 0.74,
"learning_rate": 1.3222266011809942e-05,
"loss": 2.7404,
"step": 222100
},
{
"epoch": 0.74,
"learning_rate": 1.320570692401697e-05,
"loss": 2.7281,
"step": 222200
},
{
"epoch": 0.74,
"learning_rate": 1.3189147836224e-05,
"loss": 2.7304,
"step": 222300
},
{
"epoch": 0.74,
"learning_rate": 1.3172588748431028e-05,
"loss": 2.7464,
"step": 222400
},
{
"epoch": 0.74,
"learning_rate": 1.3156029660638055e-05,
"loss": 2.7167,
"step": 222500
},
{
"epoch": 0.74,
"learning_rate": 1.3139470572845084e-05,
"loss": 2.7363,
"step": 222600
},
{
"epoch": 0.74,
"learning_rate": 1.312291148505211e-05,
"loss": 2.7286,
"step": 222700
},
{
"epoch": 0.74,
"learning_rate": 1.3106352397259143e-05,
"loss": 2.7519,
"step": 222800
},
{
"epoch": 0.74,
"learning_rate": 1.308979330946617e-05,
"loss": 2.741,
"step": 222900
},
{
"epoch": 0.74,
"learning_rate": 1.3073234221673198e-05,
"loss": 2.7186,
"step": 223000
},
{
"epoch": 0.74,
"learning_rate": 1.3056675133880225e-05,
"loss": 2.7306,
"step": 223100
},
{
"epoch": 0.74,
"learning_rate": 1.3040116046087254e-05,
"loss": 2.732,
"step": 223200
},
{
"epoch": 0.74,
"learning_rate": 1.3023556958294284e-05,
"loss": 2.7292,
"step": 223300
},
{
"epoch": 0.74,
"learning_rate": 1.300699787050131e-05,
"loss": 2.7223,
"step": 223400
},
{
"epoch": 0.74,
"learning_rate": 1.299043878270834e-05,
"loss": 2.7343,
"step": 223500
},
{
"epoch": 0.74,
"learning_rate": 1.2973879694915366e-05,
"loss": 2.7359,
"step": 223600
},
{
"epoch": 0.74,
"learning_rate": 1.2957320607122395e-05,
"loss": 2.7215,
"step": 223700
},
{
"epoch": 0.74,
"learning_rate": 1.2940761519329425e-05,
"loss": 2.7199,
"step": 223800
},
{
"epoch": 0.74,
"learning_rate": 1.2924202431536452e-05,
"loss": 2.7357,
"step": 223900
},
{
"epoch": 0.74,
"learning_rate": 1.290764334374348e-05,
"loss": 2.7388,
"step": 224000
},
{
"epoch": 0.74,
"learning_rate": 1.289108425595051e-05,
"loss": 2.7297,
"step": 224100
},
{
"epoch": 0.74,
"learning_rate": 1.2874525168157536e-05,
"loss": 2.7261,
"step": 224200
},
{
"epoch": 0.74,
"learning_rate": 1.2857966080364567e-05,
"loss": 2.7593,
"step": 224300
},
{
"epoch": 0.74,
"learning_rate": 1.2841406992571595e-05,
"loss": 2.7388,
"step": 224400
},
{
"epoch": 0.74,
"learning_rate": 1.2824847904778622e-05,
"loss": 2.7441,
"step": 224500
},
{
"epoch": 0.74,
"learning_rate": 1.280828881698565e-05,
"loss": 2.7375,
"step": 224600
},
{
"epoch": 0.74,
"learning_rate": 1.2791729729192678e-05,
"loss": 2.7274,
"step": 224700
},
{
"epoch": 0.74,
"learning_rate": 1.2775170641399708e-05,
"loss": 2.7279,
"step": 224800
},
{
"epoch": 0.74,
"learning_rate": 1.2758611553606736e-05,
"loss": 2.7245,
"step": 224900
},
{
"epoch": 0.75,
"learning_rate": 1.2742052465813763e-05,
"loss": 2.7332,
"step": 225000
},
{
"epoch": 0.75,
"learning_rate": 1.2725493378020792e-05,
"loss": 2.7349,
"step": 225100
},
{
"epoch": 0.75,
"learning_rate": 1.2708934290227819e-05,
"loss": 2.7183,
"step": 225200
},
{
"epoch": 0.75,
"learning_rate": 1.2692375202434851e-05,
"loss": 2.744,
"step": 225300
},
{
"epoch": 0.75,
"learning_rate": 1.2675816114641878e-05,
"loss": 2.755,
"step": 225400
},
{
"epoch": 0.75,
"learning_rate": 1.2659257026848906e-05,
"loss": 2.7396,
"step": 225500
},
{
"epoch": 0.75,
"learning_rate": 1.2642697939055933e-05,
"loss": 2.7201,
"step": 225600
},
{
"epoch": 0.75,
"learning_rate": 1.2626138851262962e-05,
"loss": 2.7371,
"step": 225700
},
{
"epoch": 0.75,
"learning_rate": 1.2609579763469992e-05,
"loss": 2.7388,
"step": 225800
},
{
"epoch": 0.75,
"learning_rate": 1.2593020675677019e-05,
"loss": 2.7232,
"step": 225900
},
{
"epoch": 0.75,
"learning_rate": 1.2576461587884048e-05,
"loss": 2.7321,
"step": 226000
},
{
"epoch": 0.75,
"learning_rate": 1.2559902500091075e-05,
"loss": 2.7349,
"step": 226100
},
{
"epoch": 0.75,
"learning_rate": 1.2543343412298103e-05,
"loss": 2.7497,
"step": 226200
},
{
"epoch": 0.75,
"learning_rate": 1.2526784324505134e-05,
"loss": 2.7385,
"step": 226300
},
{
"epoch": 0.75,
"learning_rate": 1.2510225236712162e-05,
"loss": 2.7397,
"step": 226400
},
{
"epoch": 0.75,
"learning_rate": 1.2493666148919189e-05,
"loss": 2.7345,
"step": 226500
},
{
"epoch": 0.75,
"learning_rate": 1.2477107061126218e-05,
"loss": 2.734,
"step": 226600
},
{
"epoch": 0.75,
"learning_rate": 1.2460547973333246e-05,
"loss": 2.731,
"step": 226700
},
{
"epoch": 0.75,
"learning_rate": 1.2443988885540273e-05,
"loss": 2.7298,
"step": 226800
},
{
"epoch": 0.75,
"learning_rate": 1.2427429797747302e-05,
"loss": 2.7303,
"step": 226900
},
{
"epoch": 0.75,
"learning_rate": 1.241087070995433e-05,
"loss": 2.7343,
"step": 227000
},
{
"epoch": 0.75,
"learning_rate": 1.2394311622161359e-05,
"loss": 2.7504,
"step": 227100
},
{
"epoch": 0.75,
"learning_rate": 1.2377752534368388e-05,
"loss": 2.7192,
"step": 227200
},
{
"epoch": 0.75,
"learning_rate": 1.2361193446575416e-05,
"loss": 2.7333,
"step": 227300
},
{
"epoch": 0.75,
"learning_rate": 1.2344634358782443e-05,
"loss": 2.7464,
"step": 227400
},
{
"epoch": 0.75,
"learning_rate": 1.2328075270989472e-05,
"loss": 2.7402,
"step": 227500
},
{
"epoch": 0.75,
"learning_rate": 1.23115161831965e-05,
"loss": 2.7246,
"step": 227600
},
{
"epoch": 0.75,
"learning_rate": 1.2294957095403529e-05,
"loss": 2.7297,
"step": 227700
},
{
"epoch": 0.75,
"learning_rate": 1.2278398007610558e-05,
"loss": 2.738,
"step": 227800
},
{
"epoch": 0.75,
"learning_rate": 1.2261838919817584e-05,
"loss": 2.7303,
"step": 227900
},
{
"epoch": 0.76,
"learning_rate": 1.2245279832024615e-05,
"loss": 2.7445,
"step": 228000
},
{
"epoch": 0.76,
"learning_rate": 1.2228720744231642e-05,
"loss": 2.7405,
"step": 228100
},
{
"epoch": 0.76,
"learning_rate": 1.2212161656438672e-05,
"loss": 2.7304,
"step": 228200
},
{
"epoch": 0.76,
"learning_rate": 1.2195602568645699e-05,
"loss": 2.723,
"step": 228300
},
{
"epoch": 0.76,
"learning_rate": 1.2179043480852728e-05,
"loss": 2.7344,
"step": 228400
},
{
"epoch": 0.76,
"learning_rate": 1.2162484393059756e-05,
"loss": 2.7267,
"step": 228500
},
{
"epoch": 0.76,
"learning_rate": 1.2145925305266783e-05,
"loss": 2.7427,
"step": 228600
},
{
"epoch": 0.76,
"learning_rate": 1.2129366217473813e-05,
"loss": 2.7328,
"step": 228700
},
{
"epoch": 0.76,
"learning_rate": 1.211280712968084e-05,
"loss": 2.7359,
"step": 228800
},
{
"epoch": 0.76,
"learning_rate": 1.2096248041887869e-05,
"loss": 2.7302,
"step": 228900
},
{
"epoch": 0.76,
"learning_rate": 1.2079688954094897e-05,
"loss": 2.742,
"step": 229000
},
{
"epoch": 0.76,
"learning_rate": 1.2063129866301926e-05,
"loss": 2.7364,
"step": 229100
},
{
"epoch": 0.76,
"learning_rate": 1.2046570778508955e-05,
"loss": 2.732,
"step": 229200
},
{
"epoch": 0.76,
"learning_rate": 1.2030011690715982e-05,
"loss": 2.7296,
"step": 229300
},
{
"epoch": 0.76,
"learning_rate": 1.201345260292301e-05,
"loss": 2.728,
"step": 229400
},
{
"epoch": 0.76,
"learning_rate": 1.1996893515130039e-05,
"loss": 2.7314,
"step": 229500
},
{
"epoch": 0.76,
"learning_rate": 1.1980334427337067e-05,
"loss": 2.7189,
"step": 229600
},
{
"epoch": 0.76,
"learning_rate": 1.1963775339544096e-05,
"loss": 2.7374,
"step": 229700
},
{
"epoch": 0.76,
"learning_rate": 1.1947216251751125e-05,
"loss": 2.7278,
"step": 229800
},
{
"epoch": 0.76,
"learning_rate": 1.1930657163958152e-05,
"loss": 2.7257,
"step": 229900
},
{
"epoch": 0.76,
"learning_rate": 1.1914098076165182e-05,
"loss": 2.731,
"step": 230000
},
{
"epoch": 0.76,
"learning_rate": 1.1897538988372209e-05,
"loss": 2.7389,
"step": 230100
},
{
"epoch": 0.76,
"learning_rate": 1.1880979900579237e-05,
"loss": 2.7379,
"step": 230200
},
{
"epoch": 0.76,
"learning_rate": 1.1864420812786266e-05,
"loss": 2.7315,
"step": 230300
},
{
"epoch": 0.76,
"learning_rate": 1.1847861724993293e-05,
"loss": 2.7324,
"step": 230400
},
{
"epoch": 0.76,
"learning_rate": 1.1831302637200323e-05,
"loss": 2.7426,
"step": 230500
},
{
"epoch": 0.76,
"learning_rate": 1.181474354940735e-05,
"loss": 2.7392,
"step": 230600
},
{
"epoch": 0.76,
"learning_rate": 1.179818446161438e-05,
"loss": 2.7276,
"step": 230700
},
{
"epoch": 0.76,
"learning_rate": 1.1781625373821407e-05,
"loss": 2.7275,
"step": 230800
},
{
"epoch": 0.76,
"learning_rate": 1.1765066286028436e-05,
"loss": 2.7456,
"step": 230900
},
{
"epoch": 0.77,
"learning_rate": 1.1748507198235464e-05,
"loss": 2.7287,
"step": 231000
},
{
"epoch": 0.77,
"learning_rate": 1.1731948110442491e-05,
"loss": 2.7375,
"step": 231100
},
{
"epoch": 0.77,
"learning_rate": 1.1715389022649522e-05,
"loss": 2.7393,
"step": 231200
},
{
"epoch": 0.77,
"learning_rate": 1.1698829934856549e-05,
"loss": 2.7384,
"step": 231300
},
{
"epoch": 0.77,
"learning_rate": 1.1682270847063577e-05,
"loss": 2.7277,
"step": 231400
},
{
"epoch": 0.77,
"learning_rate": 1.1665711759270606e-05,
"loss": 2.7267,
"step": 231500
},
{
"epoch": 0.77,
"learning_rate": 1.1649152671477634e-05,
"loss": 2.7496,
"step": 231600
},
{
"epoch": 0.77,
"learning_rate": 1.1632593583684663e-05,
"loss": 2.7337,
"step": 231700
},
{
"epoch": 0.77,
"learning_rate": 1.1616034495891692e-05,
"loss": 2.7402,
"step": 231800
},
{
"epoch": 0.77,
"learning_rate": 1.1599475408098719e-05,
"loss": 2.7322,
"step": 231900
},
{
"epoch": 0.77,
"learning_rate": 1.1582916320305747e-05,
"loss": 2.7449,
"step": 232000
},
{
"epoch": 0.77,
"learning_rate": 1.1566357232512776e-05,
"loss": 2.717,
"step": 232100
},
{
"epoch": 0.77,
"learning_rate": 1.1549798144719804e-05,
"loss": 2.721,
"step": 232200
},
{
"epoch": 0.77,
"learning_rate": 1.1533239056926833e-05,
"loss": 2.7512,
"step": 232300
},
{
"epoch": 0.77,
"learning_rate": 1.151667996913386e-05,
"loss": 2.7197,
"step": 232400
},
{
"epoch": 0.77,
"learning_rate": 1.150012088134089e-05,
"loss": 2.7312,
"step": 232500
},
{
"epoch": 0.77,
"learning_rate": 1.1483561793547917e-05,
"loss": 2.7292,
"step": 232600
},
{
"epoch": 0.77,
"learning_rate": 1.1467002705754946e-05,
"loss": 2.7392,
"step": 232700
},
{
"epoch": 0.77,
"learning_rate": 1.1450443617961974e-05,
"loss": 2.7247,
"step": 232800
},
{
"epoch": 0.77,
"learning_rate": 1.1433884530169001e-05,
"loss": 2.7287,
"step": 232900
},
{
"epoch": 0.77,
"learning_rate": 1.1417325442376031e-05,
"loss": 2.7429,
"step": 233000
},
{
"epoch": 0.77,
"learning_rate": 1.1400766354583058e-05,
"loss": 2.7295,
"step": 233100
},
{
"epoch": 0.77,
"learning_rate": 1.1384207266790089e-05,
"loss": 2.7364,
"step": 233200
},
{
"epoch": 0.77,
"learning_rate": 1.1367648178997116e-05,
"loss": 2.7233,
"step": 233300
},
{
"epoch": 0.77,
"learning_rate": 1.1351089091204144e-05,
"loss": 2.7265,
"step": 233400
},
{
"epoch": 0.77,
"learning_rate": 1.1334530003411173e-05,
"loss": 2.7441,
"step": 233500
},
{
"epoch": 0.77,
"learning_rate": 1.1317970915618201e-05,
"loss": 2.7349,
"step": 233600
},
{
"epoch": 0.77,
"learning_rate": 1.130141182782523e-05,
"loss": 2.7382,
"step": 233700
},
{
"epoch": 0.77,
"learning_rate": 1.1284852740032257e-05,
"loss": 2.7291,
"step": 233800
},
{
"epoch": 0.77,
"learning_rate": 1.1268293652239286e-05,
"loss": 2.7563,
"step": 233900
},
{
"epoch": 0.77,
"learning_rate": 1.1251734564446314e-05,
"loss": 2.7223,
"step": 234000
},
{
"epoch": 0.78,
"learning_rate": 1.1235175476653343e-05,
"loss": 2.7279,
"step": 234100
},
{
"epoch": 0.78,
"learning_rate": 1.1218616388860371e-05,
"loss": 2.744,
"step": 234200
},
{
"epoch": 0.78,
"learning_rate": 1.12020573010674e-05,
"loss": 2.727,
"step": 234300
},
{
"epoch": 0.78,
"learning_rate": 1.1185498213274427e-05,
"loss": 2.7376,
"step": 234400
},
{
"epoch": 0.78,
"learning_rate": 1.1168939125481455e-05,
"loss": 2.7504,
"step": 234500
},
{
"epoch": 0.78,
"learning_rate": 1.1152380037688484e-05,
"loss": 2.7306,
"step": 234600
},
{
"epoch": 0.78,
"learning_rate": 1.1135820949895513e-05,
"loss": 2.7442,
"step": 234700
},
{
"epoch": 0.78,
"learning_rate": 1.1119261862102541e-05,
"loss": 2.746,
"step": 234800
},
{
"epoch": 0.78,
"learning_rate": 1.1102702774309568e-05,
"loss": 2.7402,
"step": 234900
},
{
"epoch": 0.78,
"learning_rate": 1.1086143686516599e-05,
"loss": 2.7389,
"step": 235000
},
{
"epoch": 0.78,
"learning_rate": 1.1069584598723625e-05,
"loss": 2.7335,
"step": 235100
},
{
"epoch": 0.78,
"learning_rate": 1.1053025510930656e-05,
"loss": 2.7338,
"step": 235200
},
{
"epoch": 0.78,
"learning_rate": 1.1036466423137683e-05,
"loss": 2.7424,
"step": 235300
},
{
"epoch": 0.78,
"learning_rate": 1.1019907335344711e-05,
"loss": 2.74,
"step": 235400
},
{
"epoch": 0.78,
"learning_rate": 1.100334824755174e-05,
"loss": 2.7222,
"step": 235500
},
{
"epoch": 0.78,
"learning_rate": 1.0986789159758767e-05,
"loss": 2.741,
"step": 235600
},
{
"epoch": 0.78,
"learning_rate": 1.0970230071965797e-05,
"loss": 2.7255,
"step": 235700
},
{
"epoch": 0.78,
"learning_rate": 1.0953670984172824e-05,
"loss": 2.7184,
"step": 235800
},
{
"epoch": 0.78,
"learning_rate": 1.0937111896379853e-05,
"loss": 2.728,
"step": 235900
},
{
"epoch": 0.78,
"learning_rate": 1.0920552808586881e-05,
"loss": 2.7346,
"step": 236000
},
{
"epoch": 0.78,
"learning_rate": 1.090399372079391e-05,
"loss": 2.7221,
"step": 236100
},
{
"epoch": 0.78,
"learning_rate": 1.0887434633000938e-05,
"loss": 2.7363,
"step": 236200
},
{
"epoch": 0.78,
"learning_rate": 1.0870875545207965e-05,
"loss": 2.7295,
"step": 236300
},
{
"epoch": 0.78,
"learning_rate": 1.0854316457414994e-05,
"loss": 2.7392,
"step": 236400
},
{
"epoch": 0.78,
"learning_rate": 1.0837757369622023e-05,
"loss": 2.7505,
"step": 236500
},
{
"epoch": 0.78,
"learning_rate": 1.0821198281829051e-05,
"loss": 2.7302,
"step": 236600
},
{
"epoch": 0.78,
"learning_rate": 1.080463919403608e-05,
"loss": 2.7467,
"step": 236700
},
{
"epoch": 0.78,
"learning_rate": 1.0788080106243108e-05,
"loss": 2.749,
"step": 236800
},
{
"epoch": 0.78,
"learning_rate": 1.0771521018450135e-05,
"loss": 2.7397,
"step": 236900
},
{
"epoch": 0.78,
"learning_rate": 1.0754961930657166e-05,
"loss": 2.7338,
"step": 237000
},
{
"epoch": 0.79,
"learning_rate": 1.0738402842864192e-05,
"loss": 2.7321,
"step": 237100
},
{
"epoch": 0.79,
"learning_rate": 1.0721843755071221e-05,
"loss": 2.7236,
"step": 237200
},
{
"epoch": 0.79,
"learning_rate": 1.070528466727825e-05,
"loss": 2.7381,
"step": 237300
},
{
"epoch": 0.79,
"learning_rate": 1.0688725579485277e-05,
"loss": 2.7401,
"step": 237400
},
{
"epoch": 0.79,
"learning_rate": 1.0672166491692307e-05,
"loss": 2.7288,
"step": 237500
},
{
"epoch": 0.79,
"learning_rate": 1.0655607403899334e-05,
"loss": 2.7309,
"step": 237600
},
{
"epoch": 0.79,
"learning_rate": 1.0639048316106364e-05,
"loss": 2.7342,
"step": 237700
},
{
"epoch": 0.79,
"learning_rate": 1.0622489228313391e-05,
"loss": 2.7188,
"step": 237800
},
{
"epoch": 0.79,
"learning_rate": 1.060593014052042e-05,
"loss": 2.733,
"step": 237900
},
{
"epoch": 0.79,
"learning_rate": 1.0589371052727448e-05,
"loss": 2.7289,
"step": 238000
},
{
"epoch": 0.79,
"learning_rate": 1.0572811964934475e-05,
"loss": 2.7486,
"step": 238100
},
{
"epoch": 0.79,
"learning_rate": 1.0556252877141505e-05,
"loss": 2.7332,
"step": 238200
},
{
"epoch": 0.79,
"learning_rate": 1.0539693789348532e-05,
"loss": 2.7219,
"step": 238300
},
{
"epoch": 0.79,
"learning_rate": 1.0523134701555561e-05,
"loss": 2.7237,
"step": 238400
},
{
"epoch": 0.79,
"learning_rate": 1.050657561376259e-05,
"loss": 2.7207,
"step": 238500
},
{
"epoch": 0.79,
"learning_rate": 1.0490016525969618e-05,
"loss": 2.7311,
"step": 238600
},
{
"epoch": 0.79,
"learning_rate": 1.0473457438176647e-05,
"loss": 2.72,
"step": 238700
},
{
"epoch": 0.79,
"learning_rate": 1.0456898350383675e-05,
"loss": 2.7227,
"step": 238800
},
{
"epoch": 0.79,
"learning_rate": 1.0440339262590702e-05,
"loss": 2.7341,
"step": 238900
},
{
"epoch": 0.79,
"learning_rate": 1.0423780174797731e-05,
"loss": 2.7221,
"step": 239000
},
{
"epoch": 0.79,
"learning_rate": 1.040722108700476e-05,
"loss": 2.7184,
"step": 239100
},
{
"epoch": 0.79,
"learning_rate": 1.0390661999211788e-05,
"loss": 2.7403,
"step": 239200
},
{
"epoch": 0.79,
"learning_rate": 1.0374102911418817e-05,
"loss": 2.7283,
"step": 239300
},
{
"epoch": 0.79,
"learning_rate": 1.0357543823625844e-05,
"loss": 2.7418,
"step": 239400
},
{
"epoch": 0.79,
"learning_rate": 1.0340984735832874e-05,
"loss": 2.7326,
"step": 239500
},
{
"epoch": 0.79,
"learning_rate": 1.03244256480399e-05,
"loss": 2.724,
"step": 239600
},
{
"epoch": 0.79,
"learning_rate": 1.030786656024693e-05,
"loss": 2.7366,
"step": 239700
},
{
"epoch": 0.79,
"learning_rate": 1.0291307472453958e-05,
"loss": 2.7332,
"step": 239800
},
{
"epoch": 0.79,
"learning_rate": 1.0274748384660985e-05,
"loss": 2.7303,
"step": 239900
},
{
"epoch": 0.79,
"learning_rate": 1.0258189296868015e-05,
"loss": 2.7411,
"step": 240000
},
{
"epoch": 0.8,
"learning_rate": 1.0241630209075042e-05,
"loss": 2.7556,
"step": 240100
},
{
"epoch": 0.8,
"learning_rate": 1.0225071121282072e-05,
"loss": 2.7342,
"step": 240200
},
{
"epoch": 0.8,
"learning_rate": 1.02085120334891e-05,
"loss": 2.7435,
"step": 240300
},
{
"epoch": 0.8,
"learning_rate": 1.0191952945696128e-05,
"loss": 2.7391,
"step": 240400
},
{
"epoch": 0.8,
"learning_rate": 1.0175393857903157e-05,
"loss": 2.747,
"step": 240500
},
{
"epoch": 0.8,
"learning_rate": 1.0158834770110185e-05,
"loss": 2.7265,
"step": 240600
},
{
"epoch": 0.8,
"learning_rate": 1.0142275682317214e-05,
"loss": 2.7361,
"step": 240700
},
{
"epoch": 0.8,
"learning_rate": 1.012571659452424e-05,
"loss": 2.7358,
"step": 240800
},
{
"epoch": 0.8,
"learning_rate": 1.010915750673127e-05,
"loss": 2.7215,
"step": 240900
},
{
"epoch": 0.8,
"learning_rate": 1.0092598418938298e-05,
"loss": 2.7271,
"step": 241000
},
{
"epoch": 0.8,
"learning_rate": 1.0076039331145327e-05,
"loss": 2.7506,
"step": 241100
},
{
"epoch": 0.8,
"learning_rate": 1.0059480243352355e-05,
"loss": 2.7365,
"step": 241200
},
{
"epoch": 0.8,
"learning_rate": 1.0042921155559384e-05,
"loss": 2.7371,
"step": 241300
},
{
"epoch": 0.8,
"learning_rate": 1.002636206776641e-05,
"loss": 2.7289,
"step": 241400
},
{
"epoch": 0.8,
"learning_rate": 1.000980297997344e-05,
"loss": 2.7282,
"step": 241500
},
{
"epoch": 0.8,
"learning_rate": 9.993243892180468e-06,
"loss": 2.7335,
"step": 241600
},
{
"epoch": 0.8,
"learning_rate": 9.976684804387496e-06,
"loss": 2.7417,
"step": 241700
},
{
"epoch": 0.8,
"learning_rate": 9.960125716594525e-06,
"loss": 2.7407,
"step": 241800
},
{
"epoch": 0.8,
"learning_rate": 9.943566628801552e-06,
"loss": 2.7359,
"step": 241900
},
{
"epoch": 0.8,
"learning_rate": 9.927007541008582e-06,
"loss": 2.7315,
"step": 242000
},
{
"epoch": 0.8,
"learning_rate": 9.91044845321561e-06,
"loss": 2.7384,
"step": 242100
},
{
"epoch": 0.8,
"learning_rate": 9.893889365422638e-06,
"loss": 2.7202,
"step": 242200
},
{
"epoch": 0.8,
"learning_rate": 9.877330277629666e-06,
"loss": 2.7463,
"step": 242300
},
{
"epoch": 0.8,
"learning_rate": 9.860771189836695e-06,
"loss": 2.7396,
"step": 242400
},
{
"epoch": 0.8,
"learning_rate": 9.844212102043724e-06,
"loss": 2.7273,
"step": 242500
},
{
"epoch": 0.8,
"learning_rate": 9.82765301425075e-06,
"loss": 2.7276,
"step": 242600
},
{
"epoch": 0.8,
"learning_rate": 9.81109392645778e-06,
"loss": 2.7437,
"step": 242700
},
{
"epoch": 0.8,
"learning_rate": 9.794534838664808e-06,
"loss": 2.7189,
"step": 242800
},
{
"epoch": 0.8,
"learning_rate": 9.777975750871836e-06,
"loss": 2.7196,
"step": 242900
},
{
"epoch": 0.8,
"learning_rate": 9.761416663078865e-06,
"loss": 2.7228,
"step": 243000
},
{
"epoch": 0.81,
"learning_rate": 9.744857575285894e-06,
"loss": 2.7269,
"step": 243100
},
{
"epoch": 0.81,
"learning_rate": 9.728298487492922e-06,
"loss": 2.7313,
"step": 243200
},
{
"epoch": 0.81,
"learning_rate": 9.711739399699949e-06,
"loss": 2.7326,
"step": 243300
},
{
"epoch": 0.81,
"learning_rate": 9.695180311906978e-06,
"loss": 2.7212,
"step": 243400
},
{
"epoch": 0.81,
"learning_rate": 9.678621224114006e-06,
"loss": 2.7277,
"step": 243500
},
{
"epoch": 0.81,
"learning_rate": 9.662062136321035e-06,
"loss": 2.7364,
"step": 243600
},
{
"epoch": 0.81,
"learning_rate": 9.645503048528063e-06,
"loss": 2.7339,
"step": 243700
},
{
"epoch": 0.81,
"learning_rate": 9.628943960735092e-06,
"loss": 2.7141,
"step": 243800
},
{
"epoch": 0.81,
"learning_rate": 9.612384872942119e-06,
"loss": 2.7451,
"step": 243900
},
{
"epoch": 0.81,
"learning_rate": 9.595825785149148e-06,
"loss": 2.7415,
"step": 244000
},
{
"epoch": 0.81,
"learning_rate": 9.579266697356176e-06,
"loss": 2.7323,
"step": 244100
},
{
"epoch": 0.81,
"learning_rate": 9.562707609563205e-06,
"loss": 2.7279,
"step": 244200
},
{
"epoch": 0.81,
"learning_rate": 9.546148521770233e-06,
"loss": 2.7405,
"step": 244300
},
{
"epoch": 0.81,
"learning_rate": 9.52958943397726e-06,
"loss": 2.7326,
"step": 244400
},
{
"epoch": 0.81,
"learning_rate": 9.51303034618429e-06,
"loss": 2.7187,
"step": 244500
},
{
"epoch": 0.81,
"learning_rate": 9.496471258391318e-06,
"loss": 2.7227,
"step": 244600
},
{
"epoch": 0.81,
"learning_rate": 9.479912170598348e-06,
"loss": 2.715,
"step": 244700
},
{
"epoch": 0.81,
"learning_rate": 9.463353082805375e-06,
"loss": 2.7438,
"step": 244800
},
{
"epoch": 0.81,
"learning_rate": 9.446793995012403e-06,
"loss": 2.737,
"step": 244900
},
{
"epoch": 0.81,
"learning_rate": 9.430234907219432e-06,
"loss": 2.7309,
"step": 245000
},
{
"epoch": 0.81,
"learning_rate": 9.413675819426459e-06,
"loss": 2.7345,
"step": 245100
},
{
"epoch": 0.81,
"learning_rate": 9.39711673163349e-06,
"loss": 2.732,
"step": 245200
},
{
"epoch": 0.81,
"learning_rate": 9.380557643840516e-06,
"loss": 2.7364,
"step": 245300
},
{
"epoch": 0.81,
"learning_rate": 9.363998556047545e-06,
"loss": 2.7486,
"step": 245400
},
{
"epoch": 0.81,
"learning_rate": 9.347439468254573e-06,
"loss": 2.715,
"step": 245500
},
{
"epoch": 0.81,
"learning_rate": 9.330880380461602e-06,
"loss": 2.7204,
"step": 245600
},
{
"epoch": 0.81,
"learning_rate": 9.31432129266863e-06,
"loss": 2.7261,
"step": 245700
},
{
"epoch": 0.81,
"learning_rate": 9.297762204875657e-06,
"loss": 2.7294,
"step": 245800
},
{
"epoch": 0.81,
"learning_rate": 9.281203117082686e-06,
"loss": 2.7232,
"step": 245900
},
{
"epoch": 0.81,
"learning_rate": 9.264644029289715e-06,
"loss": 2.7211,
"step": 246000
},
{
"epoch": 0.82,
"learning_rate": 9.248084941496743e-06,
"loss": 2.7481,
"step": 246100
},
{
"epoch": 0.82,
"learning_rate": 9.231525853703772e-06,
"loss": 2.7316,
"step": 246200
},
{
"epoch": 0.82,
"learning_rate": 9.2149667659108e-06,
"loss": 2.7335,
"step": 246300
},
{
"epoch": 0.82,
"learning_rate": 9.198407678117827e-06,
"loss": 2.7222,
"step": 246400
},
{
"epoch": 0.82,
"learning_rate": 9.181848590324858e-06,
"loss": 2.7335,
"step": 246500
},
{
"epoch": 0.82,
"learning_rate": 9.165289502531885e-06,
"loss": 2.7255,
"step": 246600
},
{
"epoch": 0.82,
"learning_rate": 9.148730414738913e-06,
"loss": 2.7286,
"step": 246700
},
{
"epoch": 0.82,
"learning_rate": 9.132171326945942e-06,
"loss": 2.724,
"step": 246800
},
{
"epoch": 0.82,
"learning_rate": 9.115612239152969e-06,
"loss": 2.7224,
"step": 246900
},
{
"epoch": 0.82,
"learning_rate": 9.099053151359999e-06,
"loss": 2.7295,
"step": 247000
},
{
"epoch": 0.82,
"learning_rate": 9.082494063567026e-06,
"loss": 2.7223,
"step": 247100
},
{
"epoch": 0.82,
"learning_rate": 9.065934975774056e-06,
"loss": 2.7345,
"step": 247200
},
{
"epoch": 0.82,
"learning_rate": 9.049375887981083e-06,
"loss": 2.7282,
"step": 247300
},
{
"epoch": 0.82,
"learning_rate": 9.032816800188112e-06,
"loss": 2.7213,
"step": 247400
},
{
"epoch": 0.82,
"learning_rate": 9.01625771239514e-06,
"loss": 2.7421,
"step": 247500
},
{
"epoch": 0.82,
"learning_rate": 8.999698624602167e-06,
"loss": 2.7305,
"step": 247600
},
{
"epoch": 0.82,
"learning_rate": 8.983139536809198e-06,
"loss": 2.7322,
"step": 247700
},
{
"epoch": 0.82,
"learning_rate": 8.966580449016224e-06,
"loss": 2.7298,
"step": 247800
},
{
"epoch": 0.82,
"learning_rate": 8.950021361223253e-06,
"loss": 2.7287,
"step": 247900
},
{
"epoch": 0.82,
"learning_rate": 8.933462273430282e-06,
"loss": 2.7343,
"step": 248000
},
{
"epoch": 0.82,
"learning_rate": 8.91690318563731e-06,
"loss": 2.7329,
"step": 248100
},
{
"epoch": 0.82,
"learning_rate": 8.900344097844339e-06,
"loss": 2.7402,
"step": 248200
},
{
"epoch": 0.82,
"learning_rate": 8.883785010051367e-06,
"loss": 2.7289,
"step": 248300
},
{
"epoch": 0.82,
"learning_rate": 8.867225922258394e-06,
"loss": 2.7376,
"step": 248400
},
{
"epoch": 0.82,
"learning_rate": 8.850666834465423e-06,
"loss": 2.7236,
"step": 248500
},
{
"epoch": 0.82,
"learning_rate": 8.834107746672452e-06,
"loss": 2.728,
"step": 248600
},
{
"epoch": 0.82,
"learning_rate": 8.81754865887948e-06,
"loss": 2.7391,
"step": 248700
},
{
"epoch": 0.82,
"learning_rate": 8.800989571086509e-06,
"loss": 2.7361,
"step": 248800
},
{
"epoch": 0.82,
"learning_rate": 8.784430483293536e-06,
"loss": 2.75,
"step": 248900
},
{
"epoch": 0.82,
"learning_rate": 8.767871395500566e-06,
"loss": 2.7213,
"step": 249000
},
{
"epoch": 0.82,
"learning_rate": 8.751312307707593e-06,
"loss": 2.7327,
"step": 249100
},
{
"epoch": 0.83,
"learning_rate": 8.734753219914622e-06,
"loss": 2.7299,
"step": 249200
},
{
"epoch": 0.83,
"learning_rate": 8.71819413212165e-06,
"loss": 2.722,
"step": 249300
},
{
"epoch": 0.83,
"learning_rate": 8.701635044328677e-06,
"loss": 2.7191,
"step": 249400
},
{
"epoch": 0.83,
"learning_rate": 8.685075956535707e-06,
"loss": 2.7378,
"step": 249500
},
{
"epoch": 0.83,
"learning_rate": 8.668516868742734e-06,
"loss": 2.7283,
"step": 249600
},
{
"epoch": 0.83,
"learning_rate": 8.651957780949765e-06,
"loss": 2.7348,
"step": 249700
},
{
"epoch": 0.83,
"learning_rate": 8.635398693156791e-06,
"loss": 2.7295,
"step": 249800
},
{
"epoch": 0.83,
"learning_rate": 8.61883960536382e-06,
"loss": 2.7287,
"step": 249900
},
{
"epoch": 0.83,
"learning_rate": 8.602280517570849e-06,
"loss": 2.7304,
"step": 250000
},
{
"epoch": 0.83,
"learning_rate": 8.585721429777877e-06,
"loss": 2.7209,
"step": 250100
},
{
"epoch": 0.83,
"learning_rate": 8.569162341984906e-06,
"loss": 2.7467,
"step": 250200
},
{
"epoch": 0.83,
"learning_rate": 8.552603254191933e-06,
"loss": 2.7398,
"step": 250300
},
{
"epoch": 0.83,
"learning_rate": 8.536044166398961e-06,
"loss": 2.7311,
"step": 250400
},
{
"epoch": 0.83,
"learning_rate": 8.51948507860599e-06,
"loss": 2.7132,
"step": 250500
},
{
"epoch": 0.83,
"learning_rate": 8.502925990813019e-06,
"loss": 2.727,
"step": 250600
},
{
"epoch": 0.83,
"learning_rate": 8.486366903020047e-06,
"loss": 2.7394,
"step": 250700
},
{
"epoch": 0.83,
"learning_rate": 8.469807815227076e-06,
"loss": 2.7202,
"step": 250800
},
{
"epoch": 0.83,
"learning_rate": 8.453248727434103e-06,
"loss": 2.7196,
"step": 250900
},
{
"epoch": 0.83,
"learning_rate": 8.436689639641131e-06,
"loss": 2.7282,
"step": 251000
},
{
"epoch": 0.83,
"learning_rate": 8.42013055184816e-06,
"loss": 2.725,
"step": 251100
},
{
"epoch": 0.83,
"learning_rate": 8.403571464055189e-06,
"loss": 2.7321,
"step": 251200
},
{
"epoch": 0.83,
"learning_rate": 8.387012376262217e-06,
"loss": 2.7328,
"step": 251300
},
{
"epoch": 0.83,
"learning_rate": 8.370453288469244e-06,
"loss": 2.7245,
"step": 251400
},
{
"epoch": 0.83,
"learning_rate": 8.353894200676274e-06,
"loss": 2.7153,
"step": 251500
},
{
"epoch": 0.83,
"learning_rate": 8.337335112883301e-06,
"loss": 2.7212,
"step": 251600
},
{
"epoch": 0.83,
"learning_rate": 8.320776025090332e-06,
"loss": 2.7287,
"step": 251700
},
{
"epoch": 0.83,
"learning_rate": 8.304216937297359e-06,
"loss": 2.7498,
"step": 251800
},
{
"epoch": 0.83,
"learning_rate": 8.287657849504387e-06,
"loss": 2.7277,
"step": 251900
},
{
"epoch": 0.83,
"learning_rate": 8.271098761711416e-06,
"loss": 2.7236,
"step": 252000
},
{
"epoch": 0.83,
"learning_rate": 8.254539673918443e-06,
"loss": 2.747,
"step": 252100
},
{
"epoch": 0.84,
"learning_rate": 8.237980586125473e-06,
"loss": 2.7302,
"step": 252200
},
{
"epoch": 0.84,
"learning_rate": 8.2214214983325e-06,
"loss": 2.7151,
"step": 252300
},
{
"epoch": 0.84,
"learning_rate": 8.204862410539528e-06,
"loss": 2.7216,
"step": 252400
},
{
"epoch": 0.84,
"learning_rate": 8.188303322746557e-06,
"loss": 2.7344,
"step": 252500
},
{
"epoch": 0.84,
"learning_rate": 8.171744234953586e-06,
"loss": 2.7268,
"step": 252600
},
{
"epoch": 0.84,
"learning_rate": 8.155185147160614e-06,
"loss": 2.7449,
"step": 252700
},
{
"epoch": 0.84,
"learning_rate": 8.138626059367641e-06,
"loss": 2.7301,
"step": 252800
},
{
"epoch": 0.84,
"learning_rate": 8.12206697157467e-06,
"loss": 2.7315,
"step": 252900
},
{
"epoch": 0.84,
"learning_rate": 8.105507883781698e-06,
"loss": 2.7146,
"step": 253000
},
{
"epoch": 0.84,
"learning_rate": 8.088948795988727e-06,
"loss": 2.7296,
"step": 253100
},
{
"epoch": 0.84,
"learning_rate": 8.072389708195756e-06,
"loss": 2.7314,
"step": 253200
},
{
"epoch": 0.84,
"learning_rate": 8.055830620402784e-06,
"loss": 2.7356,
"step": 253300
},
{
"epoch": 0.84,
"learning_rate": 8.039271532609811e-06,
"loss": 2.7349,
"step": 253400
},
{
"epoch": 0.84,
"learning_rate": 8.022712444816841e-06,
"loss": 2.7229,
"step": 253500
},
{
"epoch": 0.84,
"learning_rate": 8.006153357023868e-06,
"loss": 2.7295,
"step": 253600
},
{
"epoch": 0.84,
"learning_rate": 7.989594269230897e-06,
"loss": 2.7299,
"step": 253700
},
{
"epoch": 0.84,
"learning_rate": 7.973035181437926e-06,
"loss": 2.7343,
"step": 253800
},
{
"epoch": 0.84,
"learning_rate": 7.956476093644952e-06,
"loss": 2.72,
"step": 253900
},
{
"epoch": 0.84,
"learning_rate": 7.939917005851983e-06,
"loss": 2.7371,
"step": 254000
},
{
"epoch": 0.84,
"learning_rate": 7.92335791805901e-06,
"loss": 2.7391,
"step": 254100
},
{
"epoch": 0.84,
"learning_rate": 7.90679883026604e-06,
"loss": 2.7278,
"step": 254200
},
{
"epoch": 0.84,
"learning_rate": 7.890239742473067e-06,
"loss": 2.7396,
"step": 254300
},
{
"epoch": 0.84,
"learning_rate": 7.873680654680095e-06,
"loss": 2.7233,
"step": 254400
},
{
"epoch": 0.84,
"learning_rate": 7.857121566887124e-06,
"loss": 2.7259,
"step": 254500
},
{
"epoch": 0.84,
"learning_rate": 7.840562479094151e-06,
"loss": 2.7401,
"step": 254600
},
{
"epoch": 0.84,
"learning_rate": 7.824003391301181e-06,
"loss": 2.7365,
"step": 254700
},
{
"epoch": 0.84,
"learning_rate": 7.807444303508208e-06,
"loss": 2.7104,
"step": 254800
},
{
"epoch": 0.84,
"learning_rate": 7.790885215715237e-06,
"loss": 2.7307,
"step": 254900
},
{
"epoch": 0.84,
"learning_rate": 7.774326127922265e-06,
"loss": 2.7335,
"step": 255000
},
{
"epoch": 0.84,
"learning_rate": 7.757767040129294e-06,
"loss": 2.7234,
"step": 255100
},
{
"epoch": 0.85,
"learning_rate": 7.741207952336323e-06,
"loss": 2.7422,
"step": 255200
},
{
"epoch": 0.85,
"learning_rate": 7.724648864543351e-06,
"loss": 2.7418,
"step": 255300
},
{
"epoch": 0.85,
"learning_rate": 7.708089776750378e-06,
"loss": 2.7111,
"step": 255400
},
{
"epoch": 0.85,
"learning_rate": 7.691530688957407e-06,
"loss": 2.7152,
"step": 255500
},
{
"epoch": 0.85,
"learning_rate": 7.674971601164435e-06,
"loss": 2.7339,
"step": 255600
},
{
"epoch": 0.85,
"learning_rate": 7.658412513371464e-06,
"loss": 2.7347,
"step": 255700
},
{
"epoch": 0.85,
"learning_rate": 7.641853425578493e-06,
"loss": 2.7201,
"step": 255800
},
{
"epoch": 0.85,
"learning_rate": 7.6252943377855195e-06,
"loss": 2.7186,
"step": 255900
},
{
"epoch": 0.85,
"learning_rate": 7.608735249992549e-06,
"loss": 2.7346,
"step": 256000
},
{
"epoch": 0.85,
"learning_rate": 7.592176162199577e-06,
"loss": 2.7375,
"step": 256100
},
{
"epoch": 0.85,
"learning_rate": 7.575617074406606e-06,
"loss": 2.716,
"step": 256200
},
{
"epoch": 0.85,
"learning_rate": 7.559057986613634e-06,
"loss": 2.7346,
"step": 256300
},
{
"epoch": 0.85,
"learning_rate": 7.542498898820662e-06,
"loss": 2.7283,
"step": 256400
},
{
"epoch": 0.85,
"learning_rate": 7.525939811027691e-06,
"loss": 2.7511,
"step": 256500
},
{
"epoch": 0.85,
"learning_rate": 7.509380723234719e-06,
"loss": 2.7392,
"step": 256600
},
{
"epoch": 0.85,
"learning_rate": 7.4928216354417475e-06,
"loss": 2.7376,
"step": 256700
},
{
"epoch": 0.85,
"learning_rate": 7.476262547648775e-06,
"loss": 2.7439,
"step": 256800
},
{
"epoch": 0.85,
"learning_rate": 7.459703459855803e-06,
"loss": 2.728,
"step": 256900
},
{
"epoch": 0.85,
"learning_rate": 7.4431443720628324e-06,
"loss": 2.7225,
"step": 257000
},
{
"epoch": 0.85,
"learning_rate": 7.42658528426986e-06,
"loss": 2.7322,
"step": 257100
},
{
"epoch": 0.85,
"learning_rate": 7.41002619647689e-06,
"loss": 2.7183,
"step": 257200
},
{
"epoch": 0.85,
"learning_rate": 7.393467108683917e-06,
"loss": 2.7408,
"step": 257300
},
{
"epoch": 0.85,
"learning_rate": 7.376908020890945e-06,
"loss": 2.7228,
"step": 257400
},
{
"epoch": 0.85,
"learning_rate": 7.360348933097974e-06,
"loss": 2.7164,
"step": 257500
},
{
"epoch": 0.85,
"learning_rate": 7.3437898453050015e-06,
"loss": 2.7168,
"step": 257600
},
{
"epoch": 0.85,
"learning_rate": 7.327230757512031e-06,
"loss": 2.7279,
"step": 257700
},
{
"epoch": 0.85,
"learning_rate": 7.310671669719059e-06,
"loss": 2.7405,
"step": 257800
},
{
"epoch": 0.85,
"learning_rate": 7.2941125819260865e-06,
"loss": 2.7341,
"step": 257900
},
{
"epoch": 0.85,
"learning_rate": 7.277553494133116e-06,
"loss": 2.7095,
"step": 258000
},
{
"epoch": 0.85,
"learning_rate": 7.260994406340144e-06,
"loss": 2.7196,
"step": 258100
},
{
"epoch": 0.86,
"learning_rate": 7.244435318547173e-06,
"loss": 2.7109,
"step": 258200
},
{
"epoch": 0.86,
"learning_rate": 7.227876230754201e-06,
"loss": 2.7292,
"step": 258300
},
{
"epoch": 0.86,
"learning_rate": 7.211317142961229e-06,
"loss": 2.7126,
"step": 258400
},
{
"epoch": 0.86,
"learning_rate": 7.194758055168257e-06,
"loss": 2.7343,
"step": 258500
},
{
"epoch": 0.86,
"learning_rate": 7.178198967375285e-06,
"loss": 2.7322,
"step": 258600
},
{
"epoch": 0.86,
"learning_rate": 7.1616398795823145e-06,
"loss": 2.732,
"step": 258700
},
{
"epoch": 0.86,
"learning_rate": 7.145080791789342e-06,
"loss": 2.7219,
"step": 258800
},
{
"epoch": 0.86,
"learning_rate": 7.12852170399637e-06,
"loss": 2.7284,
"step": 258900
},
{
"epoch": 0.86,
"learning_rate": 7.1119626162033995e-06,
"loss": 2.7177,
"step": 259000
},
{
"epoch": 0.86,
"learning_rate": 7.095403528410427e-06,
"loss": 2.7243,
"step": 259100
},
{
"epoch": 0.86,
"learning_rate": 7.078844440617456e-06,
"loss": 2.7318,
"step": 259200
},
{
"epoch": 0.86,
"learning_rate": 7.062285352824484e-06,
"loss": 2.7144,
"step": 259300
},
{
"epoch": 0.86,
"learning_rate": 7.045726265031511e-06,
"loss": 2.7396,
"step": 259400
},
{
"epoch": 0.86,
"learning_rate": 7.029167177238541e-06,
"loss": 2.7345,
"step": 259500
},
{
"epoch": 0.86,
"learning_rate": 7.0126080894455686e-06,
"loss": 2.7433,
"step": 259600
},
{
"epoch": 0.86,
"learning_rate": 6.996049001652598e-06,
"loss": 2.7284,
"step": 259700
},
{
"epoch": 0.86,
"learning_rate": 6.979489913859626e-06,
"loss": 2.7347,
"step": 259800
},
{
"epoch": 0.86,
"learning_rate": 6.9629308260666535e-06,
"loss": 2.7212,
"step": 259900
},
{
"epoch": 0.86,
"learning_rate": 6.946371738273683e-06,
"loss": 2.7116,
"step": 260000
},
{
"epoch": 0.86,
"learning_rate": 6.929812650480711e-06,
"loss": 2.7319,
"step": 260100
},
{
"epoch": 0.86,
"learning_rate": 6.913253562687739e-06,
"loss": 2.7408,
"step": 260200
},
{
"epoch": 0.86,
"learning_rate": 6.896694474894767e-06,
"loss": 2.7192,
"step": 260300
},
{
"epoch": 0.86,
"learning_rate": 6.880135387101795e-06,
"loss": 2.7284,
"step": 260400
},
{
"epoch": 0.86,
"learning_rate": 6.863576299308824e-06,
"loss": 2.7441,
"step": 260500
},
{
"epoch": 0.86,
"learning_rate": 6.847017211515852e-06,
"loss": 2.7253,
"step": 260600
},
{
"epoch": 0.86,
"learning_rate": 6.8304581237228815e-06,
"loss": 2.7369,
"step": 260700
},
{
"epoch": 0.86,
"learning_rate": 6.813899035929909e-06,
"loss": 2.7321,
"step": 260800
},
{
"epoch": 0.86,
"learning_rate": 6.797339948136937e-06,
"loss": 2.728,
"step": 260900
},
{
"epoch": 0.86,
"learning_rate": 6.780780860343966e-06,
"loss": 2.7196,
"step": 261000
},
{
"epoch": 0.86,
"learning_rate": 6.764221772550993e-06,
"loss": 2.7382,
"step": 261100
},
{
"epoch": 0.87,
"learning_rate": 6.747662684758023e-06,
"loss": 2.7289,
"step": 261200
},
{
"epoch": 0.87,
"learning_rate": 6.731103596965051e-06,
"loss": 2.7446,
"step": 261300
},
{
"epoch": 0.87,
"learning_rate": 6.714544509172078e-06,
"loss": 2.7189,
"step": 261400
},
{
"epoch": 0.87,
"learning_rate": 6.697985421379108e-06,
"loss": 2.7191,
"step": 261500
},
{
"epoch": 0.87,
"learning_rate": 6.681426333586136e-06,
"loss": 2.7291,
"step": 261600
},
{
"epoch": 0.87,
"learning_rate": 6.664867245793165e-06,
"loss": 2.7239,
"step": 261700
},
{
"epoch": 0.87,
"learning_rate": 6.648308158000193e-06,
"loss": 2.7359,
"step": 261800
},
{
"epoch": 0.87,
"learning_rate": 6.6317490702072206e-06,
"loss": 2.7416,
"step": 261900
},
{
"epoch": 0.87,
"learning_rate": 6.615189982414249e-06,
"loss": 2.7367,
"step": 262000
},
{
"epoch": 0.87,
"learning_rate": 6.598630894621277e-06,
"loss": 2.7358,
"step": 262100
},
{
"epoch": 0.87,
"learning_rate": 6.582071806828306e-06,
"loss": 2.7254,
"step": 262200
},
{
"epoch": 0.87,
"learning_rate": 6.565512719035334e-06,
"loss": 2.7489,
"step": 262300
},
{
"epoch": 0.87,
"learning_rate": 6.548953631242362e-06,
"loss": 2.7352,
"step": 262400
},
{
"epoch": 0.87,
"learning_rate": 6.532394543449391e-06,
"loss": 2.7318,
"step": 262500
},
{
"epoch": 0.87,
"learning_rate": 6.515835455656419e-06,
"loss": 2.7296,
"step": 262600
},
{
"epoch": 0.87,
"learning_rate": 6.499276367863448e-06,
"loss": 2.737,
"step": 262700
},
{
"epoch": 0.87,
"learning_rate": 6.4827172800704755e-06,
"loss": 2.732,
"step": 262800
},
{
"epoch": 0.87,
"learning_rate": 6.466158192277503e-06,
"loss": 2.7358,
"step": 262900
},
{
"epoch": 0.87,
"learning_rate": 6.449599104484533e-06,
"loss": 2.7234,
"step": 263000
},
{
"epoch": 0.87,
"learning_rate": 6.4330400166915604e-06,
"loss": 2.7303,
"step": 263100
},
{
"epoch": 0.87,
"learning_rate": 6.41648092889859e-06,
"loss": 2.7186,
"step": 263200
},
{
"epoch": 0.87,
"learning_rate": 6.399921841105618e-06,
"loss": 2.7135,
"step": 263300
},
{
"epoch": 0.87,
"learning_rate": 6.383362753312645e-06,
"loss": 2.7306,
"step": 263400
},
{
"epoch": 0.87,
"learning_rate": 6.366803665519675e-06,
"loss": 2.7262,
"step": 263500
},
{
"epoch": 0.87,
"learning_rate": 6.350244577726703e-06,
"loss": 2.7382,
"step": 263600
},
{
"epoch": 0.87,
"learning_rate": 6.333685489933731e-06,
"loss": 2.7274,
"step": 263700
},
{
"epoch": 0.87,
"learning_rate": 6.317126402140759e-06,
"loss": 2.7158,
"step": 263800
},
{
"epoch": 0.87,
"learning_rate": 6.300567314347787e-06,
"loss": 2.7248,
"step": 263900
},
{
"epoch": 0.87,
"learning_rate": 6.284008226554816e-06,
"loss": 2.7407,
"step": 264000
},
{
"epoch": 0.87,
"learning_rate": 6.267449138761844e-06,
"loss": 2.7307,
"step": 264100
},
{
"epoch": 0.87,
"learning_rate": 6.250890050968873e-06,
"loss": 2.729,
"step": 264200
},
{
"epoch": 0.88,
"learning_rate": 6.234330963175901e-06,
"loss": 2.7309,
"step": 264300
},
{
"epoch": 0.88,
"learning_rate": 6.21777187538293e-06,
"loss": 2.73,
"step": 264400
},
{
"epoch": 0.88,
"learning_rate": 6.2012127875899575e-06,
"loss": 2.7356,
"step": 264500
},
{
"epoch": 0.88,
"learning_rate": 6.184653699796985e-06,
"loss": 2.7466,
"step": 264600
},
{
"epoch": 0.88,
"learning_rate": 6.168094612004014e-06,
"loss": 2.7231,
"step": 264700
},
{
"epoch": 0.88,
"learning_rate": 6.1515355242110425e-06,
"loss": 2.7323,
"step": 264800
},
{
"epoch": 0.88,
"learning_rate": 6.134976436418071e-06,
"loss": 2.7242,
"step": 264900
},
{
"epoch": 0.88,
"learning_rate": 6.1184173486251e-06,
"loss": 2.6975,
"step": 265000
},
{
"epoch": 0.88,
"learning_rate": 6.1018582608321275e-06,
"loss": 2.7308,
"step": 265100
},
{
"epoch": 0.88,
"learning_rate": 6.085299173039156e-06,
"loss": 2.7255,
"step": 265200
},
{
"epoch": 0.88,
"learning_rate": 6.068740085246185e-06,
"loss": 2.739,
"step": 265300
},
{
"epoch": 0.88,
"learning_rate": 6.0521809974532124e-06,
"loss": 2.7254,
"step": 265400
},
{
"epoch": 0.88,
"learning_rate": 6.035621909660241e-06,
"loss": 2.7237,
"step": 265500
},
{
"epoch": 0.88,
"learning_rate": 6.019062821867269e-06,
"loss": 2.735,
"step": 265600
},
{
"epoch": 0.88,
"learning_rate": 6.002503734074297e-06,
"loss": 2.727,
"step": 265700
},
{
"epoch": 0.88,
"learning_rate": 5.985944646281326e-06,
"loss": 2.7353,
"step": 265800
},
{
"epoch": 0.88,
"learning_rate": 5.969385558488355e-06,
"loss": 2.711,
"step": 265900
},
{
"epoch": 0.88,
"learning_rate": 5.952826470695383e-06,
"loss": 2.7353,
"step": 266000
},
{
"epoch": 0.88,
"learning_rate": 5.936267382902411e-06,
"loss": 2.7081,
"step": 266100
},
{
"epoch": 0.88,
"learning_rate": 5.91970829510944e-06,
"loss": 2.7261,
"step": 266200
},
{
"epoch": 0.88,
"learning_rate": 5.903149207316467e-06,
"loss": 2.7307,
"step": 266300
},
{
"epoch": 0.88,
"learning_rate": 5.886590119523496e-06,
"loss": 2.7225,
"step": 266400
},
{
"epoch": 0.88,
"learning_rate": 5.8700310317305246e-06,
"loss": 2.7391,
"step": 266500
},
{
"epoch": 0.88,
"learning_rate": 5.853471943937552e-06,
"loss": 2.7216,
"step": 266600
},
{
"epoch": 0.88,
"learning_rate": 5.836912856144581e-06,
"loss": 2.7253,
"step": 266700
},
{
"epoch": 0.88,
"learning_rate": 5.8203537683516095e-06,
"loss": 2.7175,
"step": 266800
},
{
"epoch": 0.88,
"learning_rate": 5.803794680558638e-06,
"loss": 2.7278,
"step": 266900
},
{
"epoch": 0.88,
"learning_rate": 5.787235592765666e-06,
"loss": 2.7356,
"step": 267000
},
{
"epoch": 0.88,
"learning_rate": 5.7706765049726945e-06,
"loss": 2.7228,
"step": 267100
},
{
"epoch": 0.88,
"learning_rate": 5.754117417179722e-06,
"loss": 2.7192,
"step": 267200
},
{
"epoch": 0.89,
"learning_rate": 5.737558329386751e-06,
"loss": 2.7422,
"step": 267300
},
{
"epoch": 0.89,
"learning_rate": 5.7209992415937795e-06,
"loss": 2.7233,
"step": 267400
},
{
"epoch": 0.89,
"learning_rate": 5.704440153800808e-06,
"loss": 2.7247,
"step": 267500
},
{
"epoch": 0.89,
"learning_rate": 5.687881066007836e-06,
"loss": 2.7277,
"step": 267600
},
{
"epoch": 0.89,
"learning_rate": 5.6713219782148644e-06,
"loss": 2.7189,
"step": 267700
},
{
"epoch": 0.89,
"learning_rate": 5.654762890421893e-06,
"loss": 2.7251,
"step": 267800
},
{
"epoch": 0.89,
"learning_rate": 5.638203802628921e-06,
"loss": 2.718,
"step": 267900
},
{
"epoch": 0.89,
"learning_rate": 5.621644714835949e-06,
"loss": 2.7373,
"step": 268000
},
{
"epoch": 0.89,
"learning_rate": 5.605085627042977e-06,
"loss": 2.7404,
"step": 268100
},
{
"epoch": 0.89,
"learning_rate": 5.588526539250006e-06,
"loss": 2.7182,
"step": 268200
},
{
"epoch": 0.89,
"learning_rate": 5.571967451457034e-06,
"loss": 2.7222,
"step": 268300
},
{
"epoch": 0.89,
"learning_rate": 5.555408363664063e-06,
"loss": 2.7348,
"step": 268400
},
{
"epoch": 0.89,
"learning_rate": 5.538849275871092e-06,
"loss": 2.7245,
"step": 268500
},
{
"epoch": 0.89,
"learning_rate": 5.522290188078119e-06,
"loss": 2.7349,
"step": 268600
},
{
"epoch": 0.89,
"learning_rate": 5.505731100285148e-06,
"loss": 2.7358,
"step": 268700
},
{
"epoch": 0.89,
"learning_rate": 5.489172012492176e-06,
"loss": 2.7235,
"step": 268800
},
{
"epoch": 0.89,
"learning_rate": 5.472612924699204e-06,
"loss": 2.7184,
"step": 268900
},
{
"epoch": 0.89,
"learning_rate": 5.456053836906233e-06,
"loss": 2.7291,
"step": 269000
},
{
"epoch": 0.89,
"learning_rate": 5.439494749113261e-06,
"loss": 2.7339,
"step": 269100
},
{
"epoch": 0.89,
"learning_rate": 5.422935661320289e-06,
"loss": 2.7224,
"step": 269200
},
{
"epoch": 0.89,
"learning_rate": 5.406376573527318e-06,
"loss": 2.7226,
"step": 269300
},
{
"epoch": 0.89,
"learning_rate": 5.3898174857343465e-06,
"loss": 2.728,
"step": 269400
},
{
"epoch": 0.89,
"learning_rate": 5.373258397941375e-06,
"loss": 2.7079,
"step": 269500
},
{
"epoch": 0.89,
"learning_rate": 5.356699310148403e-06,
"loss": 2.7412,
"step": 269600
},
{
"epoch": 0.89,
"learning_rate": 5.340140222355431e-06,
"loss": 2.7284,
"step": 269700
},
{
"epoch": 0.89,
"learning_rate": 5.323581134562459e-06,
"loss": 2.7307,
"step": 269800
},
{
"epoch": 0.89,
"learning_rate": 5.307022046769488e-06,
"loss": 2.7229,
"step": 269900
},
{
"epoch": 0.89,
"learning_rate": 5.2904629589765164e-06,
"loss": 2.7277,
"step": 270000
},
{
"epoch": 0.89,
"learning_rate": 5.273903871183544e-06,
"loss": 2.7349,
"step": 270100
},
{
"epoch": 0.89,
"learning_rate": 5.257344783390573e-06,
"loss": 2.7279,
"step": 270200
},
{
"epoch": 0.9,
"learning_rate": 5.240785695597601e-06,
"loss": 2.7321,
"step": 270300
},
{
"epoch": 0.9,
"learning_rate": 5.22422660780463e-06,
"loss": 2.7224,
"step": 270400
},
{
"epoch": 0.9,
"learning_rate": 5.207667520011658e-06,
"loss": 2.7141,
"step": 270500
},
{
"epoch": 0.9,
"learning_rate": 5.1911084322186855e-06,
"loss": 2.732,
"step": 270600
},
{
"epoch": 0.9,
"learning_rate": 5.174549344425714e-06,
"loss": 2.7396,
"step": 270700
},
{
"epoch": 0.9,
"learning_rate": 5.157990256632743e-06,
"loss": 2.731,
"step": 270800
},
{
"epoch": 0.9,
"learning_rate": 5.141431168839771e-06,
"loss": 2.7335,
"step": 270900
},
{
"epoch": 0.9,
"learning_rate": 5.1248720810468e-06,
"loss": 2.7279,
"step": 271000
},
{
"epoch": 0.9,
"learning_rate": 5.108312993253828e-06,
"loss": 2.7192,
"step": 271100
},
{
"epoch": 0.9,
"learning_rate": 5.091753905460856e-06,
"loss": 2.7251,
"step": 271200
},
{
"epoch": 0.9,
"learning_rate": 5.075194817667885e-06,
"loss": 2.7374,
"step": 271300
},
{
"epoch": 0.9,
"learning_rate": 5.058635729874913e-06,
"loss": 2.7258,
"step": 271400
},
{
"epoch": 0.9,
"learning_rate": 5.042076642081941e-06,
"loss": 2.7273,
"step": 271500
},
{
"epoch": 0.9,
"learning_rate": 5.025517554288969e-06,
"loss": 2.7369,
"step": 271600
},
{
"epoch": 0.9,
"learning_rate": 5.008958466495998e-06,
"loss": 2.7309,
"step": 271700
},
{
"epoch": 0.9,
"learning_rate": 4.992399378703026e-06,
"loss": 2.7458,
"step": 271800
},
{
"epoch": 0.9,
"learning_rate": 4.975840290910055e-06,
"loss": 2.735,
"step": 271900
},
{
"epoch": 0.9,
"learning_rate": 4.9592812031170835e-06,
"loss": 2.7119,
"step": 272000
},
{
"epoch": 0.9,
"learning_rate": 4.942722115324111e-06,
"loss": 2.7245,
"step": 272100
},
{
"epoch": 0.9,
"learning_rate": 4.92616302753114e-06,
"loss": 2.7267,
"step": 272200
},
{
"epoch": 0.9,
"learning_rate": 4.909603939738168e-06,
"loss": 2.7273,
"step": 272300
},
{
"epoch": 0.9,
"learning_rate": 4.893044851945196e-06,
"loss": 2.73,
"step": 272400
},
{
"epoch": 0.9,
"learning_rate": 4.876485764152225e-06,
"loss": 2.7165,
"step": 272500
},
{
"epoch": 0.9,
"learning_rate": 4.8599266763592525e-06,
"loss": 2.7229,
"step": 272600
},
{
"epoch": 0.9,
"learning_rate": 4.843367588566281e-06,
"loss": 2.7099,
"step": 272700
},
{
"epoch": 0.9,
"learning_rate": 4.82680850077331e-06,
"loss": 2.7166,
"step": 272800
},
{
"epoch": 0.9,
"learning_rate": 4.810249412980338e-06,
"loss": 2.7251,
"step": 272900
},
{
"epoch": 0.9,
"learning_rate": 4.793690325187367e-06,
"loss": 2.7302,
"step": 273000
},
{
"epoch": 0.9,
"learning_rate": 4.777131237394395e-06,
"loss": 2.7249,
"step": 273100
},
{
"epoch": 0.9,
"learning_rate": 4.7605721496014225e-06,
"loss": 2.7428,
"step": 273200
},
{
"epoch": 0.91,
"learning_rate": 4.744013061808451e-06,
"loss": 2.7258,
"step": 273300
},
{
"epoch": 0.91,
"learning_rate": 4.72745397401548e-06,
"loss": 2.7171,
"step": 273400
},
{
"epoch": 0.91,
"learning_rate": 4.710894886222508e-06,
"loss": 2.7237,
"step": 273500
},
{
"epoch": 0.91,
"learning_rate": 4.694335798429536e-06,
"loss": 2.7271,
"step": 273600
},
{
"epoch": 0.91,
"learning_rate": 4.677776710636565e-06,
"loss": 2.7188,
"step": 273700
},
{
"epoch": 0.91,
"learning_rate": 4.661217622843593e-06,
"loss": 2.7193,
"step": 273800
},
{
"epoch": 0.91,
"learning_rate": 4.644658535050622e-06,
"loss": 2.7333,
"step": 273900
},
{
"epoch": 0.91,
"learning_rate": 4.62809944725765e-06,
"loss": 2.7178,
"step": 274000
},
{
"epoch": 0.91,
"learning_rate": 4.611540359464677e-06,
"loss": 2.7317,
"step": 274100
},
{
"epoch": 0.91,
"learning_rate": 4.594981271671706e-06,
"loss": 2.713,
"step": 274200
},
{
"epoch": 0.91,
"learning_rate": 4.578422183878735e-06,
"loss": 2.7263,
"step": 274300
},
{
"epoch": 0.91,
"learning_rate": 4.561863096085763e-06,
"loss": 2.7143,
"step": 274400
},
{
"epoch": 0.91,
"learning_rate": 4.545304008292792e-06,
"loss": 2.7164,
"step": 274500
},
{
"epoch": 0.91,
"learning_rate": 4.52874492049982e-06,
"loss": 2.7314,
"step": 274600
},
{
"epoch": 0.91,
"learning_rate": 4.512185832706848e-06,
"loss": 2.7256,
"step": 274700
},
{
"epoch": 0.91,
"learning_rate": 4.495626744913877e-06,
"loss": 2.7266,
"step": 274800
},
{
"epoch": 0.91,
"learning_rate": 4.4790676571209045e-06,
"loss": 2.7285,
"step": 274900
},
{
"epoch": 0.91,
"learning_rate": 4.462508569327933e-06,
"loss": 2.7193,
"step": 275000
},
{
"epoch": 0.91,
"learning_rate": 4.445949481534961e-06,
"loss": 2.7338,
"step": 275100
},
{
"epoch": 0.91,
"learning_rate": 4.4293903937419895e-06,
"loss": 2.7124,
"step": 275200
},
{
"epoch": 0.91,
"learning_rate": 4.412831305949018e-06,
"loss": 2.7237,
"step": 275300
},
{
"epoch": 0.91,
"learning_rate": 4.396272218156047e-06,
"loss": 2.7338,
"step": 275400
},
{
"epoch": 0.91,
"learning_rate": 4.379713130363075e-06,
"loss": 2.7274,
"step": 275500
},
{
"epoch": 0.91,
"learning_rate": 4.363154042570103e-06,
"loss": 2.7306,
"step": 275600
},
{
"epoch": 0.91,
"learning_rate": 4.346594954777132e-06,
"loss": 2.7527,
"step": 275700
},
{
"epoch": 0.91,
"learning_rate": 4.3300358669841595e-06,
"loss": 2.7208,
"step": 275800
},
{
"epoch": 0.91,
"learning_rate": 4.313476779191188e-06,
"loss": 2.7245,
"step": 275900
},
{
"epoch": 0.91,
"learning_rate": 4.296917691398217e-06,
"loss": 2.7296,
"step": 276000
},
{
"epoch": 0.91,
"learning_rate": 4.2803586036052444e-06,
"loss": 2.7225,
"step": 276100
},
{
"epoch": 0.91,
"learning_rate": 4.263799515812273e-06,
"loss": 2.735,
"step": 276200
},
{
"epoch": 0.92,
"learning_rate": 4.247240428019302e-06,
"loss": 2.7253,
"step": 276300
},
{
"epoch": 0.92,
"learning_rate": 4.23068134022633e-06,
"loss": 2.7186,
"step": 276400
},
{
"epoch": 0.92,
"learning_rate": 4.214122252433359e-06,
"loss": 2.719,
"step": 276500
},
{
"epoch": 0.92,
"learning_rate": 4.197563164640387e-06,
"loss": 2.7098,
"step": 276600
},
{
"epoch": 0.92,
"learning_rate": 4.181004076847414e-06,
"loss": 2.7251,
"step": 276700
},
{
"epoch": 0.92,
"learning_rate": 4.164444989054443e-06,
"loss": 2.7436,
"step": 276800
},
{
"epoch": 0.92,
"learning_rate": 4.1478859012614716e-06,
"loss": 2.7442,
"step": 276900
},
{
"epoch": 0.92,
"learning_rate": 4.1313268134685e-06,
"loss": 2.7355,
"step": 277000
},
{
"epoch": 0.92,
"learning_rate": 4.114767725675528e-06,
"loss": 2.728,
"step": 277100
},
{
"epoch": 0.92,
"learning_rate": 4.0982086378825565e-06,
"loss": 2.7312,
"step": 277200
},
{
"epoch": 0.92,
"learning_rate": 4.081649550089585e-06,
"loss": 2.7312,
"step": 277300
},
{
"epoch": 0.92,
"learning_rate": 4.065090462296614e-06,
"loss": 2.7253,
"step": 277400
},
{
"epoch": 0.92,
"learning_rate": 4.0485313745036415e-06,
"loss": 2.7228,
"step": 277500
},
{
"epoch": 0.92,
"learning_rate": 4.031972286710669e-06,
"loss": 2.7449,
"step": 277600
},
{
"epoch": 0.92,
"learning_rate": 4.015413198917698e-06,
"loss": 2.7197,
"step": 277700
},
{
"epoch": 0.92,
"learning_rate": 3.9988541111247265e-06,
"loss": 2.7268,
"step": 277800
},
{
"epoch": 0.92,
"learning_rate": 3.982295023331755e-06,
"loss": 2.7166,
"step": 277900
},
{
"epoch": 0.92,
"learning_rate": 3.965735935538784e-06,
"loss": 2.7307,
"step": 278000
},
{
"epoch": 0.92,
"learning_rate": 3.9491768477458115e-06,
"loss": 2.7178,
"step": 278100
},
{
"epoch": 0.92,
"learning_rate": 3.93261775995284e-06,
"loss": 2.7128,
"step": 278200
},
{
"epoch": 0.92,
"learning_rate": 3.916058672159869e-06,
"loss": 2.7134,
"step": 278300
},
{
"epoch": 0.92,
"learning_rate": 3.899499584366896e-06,
"loss": 2.7318,
"step": 278400
},
{
"epoch": 0.92,
"learning_rate": 3.882940496573925e-06,
"loss": 2.7211,
"step": 278500
},
{
"epoch": 0.92,
"learning_rate": 3.866381408780953e-06,
"loss": 2.7183,
"step": 278600
},
{
"epoch": 0.92,
"learning_rate": 3.849822320987981e-06,
"loss": 2.7227,
"step": 278700
},
{
"epoch": 0.92,
"learning_rate": 3.83326323319501e-06,
"loss": 2.7284,
"step": 278800
},
{
"epoch": 0.92,
"learning_rate": 3.816704145402039e-06,
"loss": 2.7242,
"step": 278900
},
{
"epoch": 0.92,
"learning_rate": 3.8001450576090668e-06,
"loss": 2.7107,
"step": 279000
},
{
"epoch": 0.92,
"learning_rate": 3.7835859698160945e-06,
"loss": 2.7204,
"step": 279100
},
{
"epoch": 0.92,
"learning_rate": 3.767026882023123e-06,
"loss": 2.7288,
"step": 279200
},
{
"epoch": 0.92,
"learning_rate": 3.7504677942301518e-06,
"loss": 2.7214,
"step": 279300
},
{
"epoch": 0.93,
"learning_rate": 3.73390870643718e-06,
"loss": 2.7142,
"step": 279400
},
{
"epoch": 0.93,
"learning_rate": 3.7173496186442085e-06,
"loss": 2.7304,
"step": 279500
},
{
"epoch": 0.93,
"learning_rate": 3.7007905308512363e-06,
"loss": 2.7156,
"step": 279600
},
{
"epoch": 0.93,
"learning_rate": 3.684231443058265e-06,
"loss": 2.7212,
"step": 279700
},
{
"epoch": 0.93,
"learning_rate": 3.6676723552652935e-06,
"loss": 2.7351,
"step": 279800
},
{
"epoch": 0.93,
"learning_rate": 3.6511132674723217e-06,
"loss": 2.7355,
"step": 279900
},
{
"epoch": 0.93,
"learning_rate": 3.6345541796793503e-06,
"loss": 2.7245,
"step": 280000
},
{
"epoch": 0.93,
"learning_rate": 3.617995091886378e-06,
"loss": 2.7301,
"step": 280100
},
{
"epoch": 0.93,
"learning_rate": 3.6014360040934067e-06,
"loss": 2.7221,
"step": 280200
},
{
"epoch": 0.93,
"learning_rate": 3.584876916300435e-06,
"loss": 2.7412,
"step": 280300
},
{
"epoch": 0.93,
"learning_rate": 3.5683178285074634e-06,
"loss": 2.7349,
"step": 280400
},
{
"epoch": 0.93,
"learning_rate": 3.551758740714492e-06,
"loss": 2.7274,
"step": 280500
},
{
"epoch": 0.93,
"learning_rate": 3.53519965292152e-06,
"loss": 2.7227,
"step": 280600
},
{
"epoch": 0.93,
"learning_rate": 3.5186405651285484e-06,
"loss": 2.7299,
"step": 280700
},
{
"epoch": 0.93,
"learning_rate": 3.5020814773355766e-06,
"loss": 2.7289,
"step": 280800
},
{
"epoch": 0.93,
"learning_rate": 3.485522389542605e-06,
"loss": 2.7145,
"step": 280900
},
{
"epoch": 0.93,
"learning_rate": 3.468963301749634e-06,
"loss": 2.7389,
"step": 281000
},
{
"epoch": 0.93,
"learning_rate": 3.4524042139566616e-06,
"loss": 2.7196,
"step": 281100
},
{
"epoch": 0.93,
"learning_rate": 3.4358451261636898e-06,
"loss": 2.7207,
"step": 281200
},
{
"epoch": 0.93,
"learning_rate": 3.4192860383707184e-06,
"loss": 2.7045,
"step": 281300
},
{
"epoch": 0.93,
"learning_rate": 3.402726950577747e-06,
"loss": 2.7072,
"step": 281400
},
{
"epoch": 0.93,
"learning_rate": 3.3861678627847756e-06,
"loss": 2.7164,
"step": 281500
},
{
"epoch": 0.93,
"learning_rate": 3.3696087749918033e-06,
"loss": 2.7163,
"step": 281600
},
{
"epoch": 0.93,
"learning_rate": 3.3530496871988315e-06,
"loss": 2.7205,
"step": 281700
},
{
"epoch": 0.93,
"learning_rate": 3.33649059940586e-06,
"loss": 2.7341,
"step": 281800
},
{
"epoch": 0.93,
"learning_rate": 3.3199315116128887e-06,
"loss": 2.714,
"step": 281900
},
{
"epoch": 0.93,
"learning_rate": 3.303372423819917e-06,
"loss": 2.7277,
"step": 282000
},
{
"epoch": 0.93,
"learning_rate": 3.2868133360269447e-06,
"loss": 2.7128,
"step": 282100
},
{
"epoch": 0.93,
"learning_rate": 3.2702542482339733e-06,
"loss": 2.7217,
"step": 282200
},
{
"epoch": 0.93,
"learning_rate": 3.253695160441002e-06,
"loss": 2.7212,
"step": 282300
},
{
"epoch": 0.94,
"learning_rate": 3.2371360726480305e-06,
"loss": 2.7114,
"step": 282400
},
{
"epoch": 0.94,
"learning_rate": 3.2205769848550587e-06,
"loss": 2.7221,
"step": 282500
},
{
"epoch": 0.94,
"learning_rate": 3.2040178970620864e-06,
"loss": 2.7163,
"step": 282600
},
{
"epoch": 0.94,
"learning_rate": 3.187458809269115e-06,
"loss": 2.731,
"step": 282700
},
{
"epoch": 0.94,
"learning_rate": 3.1708997214761436e-06,
"loss": 2.7097,
"step": 282800
},
{
"epoch": 0.94,
"learning_rate": 3.154340633683172e-06,
"loss": 2.7065,
"step": 282900
},
{
"epoch": 0.94,
"learning_rate": 3.1377815458902004e-06,
"loss": 2.7314,
"step": 283000
},
{
"epoch": 0.94,
"learning_rate": 3.1212224580972286e-06,
"loss": 2.718,
"step": 283100
},
{
"epoch": 0.94,
"learning_rate": 3.1046633703042568e-06,
"loss": 2.716,
"step": 283200
},
{
"epoch": 0.94,
"learning_rate": 3.0881042825112854e-06,
"loss": 2.7145,
"step": 283300
},
{
"epoch": 0.94,
"learning_rate": 3.0715451947183136e-06,
"loss": 2.7253,
"step": 283400
},
{
"epoch": 0.94,
"learning_rate": 3.0549861069253417e-06,
"loss": 2.7139,
"step": 283500
},
{
"epoch": 0.94,
"learning_rate": 3.0384270191323704e-06,
"loss": 2.7446,
"step": 283600
},
{
"epoch": 0.94,
"learning_rate": 3.0218679313393985e-06,
"loss": 2.7274,
"step": 283700
},
{
"epoch": 0.94,
"learning_rate": 3.0053088435464267e-06,
"loss": 2.7205,
"step": 283800
},
{
"epoch": 0.94,
"learning_rate": 2.9887497557534553e-06,
"loss": 2.7416,
"step": 283900
},
{
"epoch": 0.94,
"learning_rate": 2.9721906679604835e-06,
"loss": 2.7285,
"step": 284000
},
{
"epoch": 0.94,
"learning_rate": 2.955631580167512e-06,
"loss": 2.7306,
"step": 284100
},
{
"epoch": 0.94,
"learning_rate": 2.9390724923745403e-06,
"loss": 2.7156,
"step": 284200
},
{
"epoch": 0.94,
"learning_rate": 2.9225134045815685e-06,
"loss": 2.7181,
"step": 284300
},
{
"epoch": 0.94,
"learning_rate": 2.905954316788597e-06,
"loss": 2.7271,
"step": 284400
},
{
"epoch": 0.94,
"learning_rate": 2.8893952289956253e-06,
"loss": 2.7315,
"step": 284500
},
{
"epoch": 0.94,
"learning_rate": 2.872836141202654e-06,
"loss": 2.7206,
"step": 284600
},
{
"epoch": 0.94,
"learning_rate": 2.8562770534096816e-06,
"loss": 2.7288,
"step": 284700
},
{
"epoch": 0.94,
"learning_rate": 2.8397179656167102e-06,
"loss": 2.7197,
"step": 284800
},
{
"epoch": 0.94,
"learning_rate": 2.823158877823739e-06,
"loss": 2.7203,
"step": 284900
},
{
"epoch": 0.94,
"learning_rate": 2.806599790030767e-06,
"loss": 2.7156,
"step": 285000
},
{
"epoch": 0.94,
"learning_rate": 2.790040702237795e-06,
"loss": 2.7297,
"step": 285100
},
{
"epoch": 0.94,
"learning_rate": 2.7734816144448234e-06,
"loss": 2.7292,
"step": 285200
},
{
"epoch": 0.94,
"learning_rate": 2.756922526651852e-06,
"loss": 2.7221,
"step": 285300
},
{
"epoch": 0.95,
"learning_rate": 2.7403634388588806e-06,
"loss": 2.7198,
"step": 285400
},
{
"epoch": 0.95,
"learning_rate": 2.7238043510659088e-06,
"loss": 2.7139,
"step": 285500
},
{
"epoch": 0.95,
"learning_rate": 2.707245263272937e-06,
"loss": 2.7316,
"step": 285600
},
{
"epoch": 0.95,
"learning_rate": 2.690686175479965e-06,
"loss": 2.7253,
"step": 285700
},
{
"epoch": 0.95,
"learning_rate": 2.6741270876869937e-06,
"loss": 2.7337,
"step": 285800
},
{
"epoch": 0.95,
"learning_rate": 2.657567999894022e-06,
"loss": 2.728,
"step": 285900
},
{
"epoch": 0.95,
"learning_rate": 2.64100891210105e-06,
"loss": 2.7149,
"step": 286000
},
{
"epoch": 0.95,
"learning_rate": 2.6244498243080787e-06,
"loss": 2.73,
"step": 286100
},
{
"epoch": 0.95,
"learning_rate": 2.607890736515107e-06,
"loss": 2.7245,
"step": 286200
},
{
"epoch": 0.95,
"learning_rate": 2.5913316487221355e-06,
"loss": 2.7081,
"step": 286300
},
{
"epoch": 0.95,
"learning_rate": 2.5747725609291637e-06,
"loss": 2.7265,
"step": 286400
},
{
"epoch": 0.95,
"learning_rate": 2.558213473136192e-06,
"loss": 2.7257,
"step": 286500
},
{
"epoch": 0.95,
"learning_rate": 2.5416543853432205e-06,
"loss": 2.7335,
"step": 286600
},
{
"epoch": 0.95,
"learning_rate": 2.5250952975502487e-06,
"loss": 2.7297,
"step": 286700
},
{
"epoch": 0.95,
"learning_rate": 2.508536209757277e-06,
"loss": 2.719,
"step": 286800
},
{
"epoch": 0.95,
"learning_rate": 2.4919771219643054e-06,
"loss": 2.7298,
"step": 286900
},
{
"epoch": 0.95,
"learning_rate": 2.4754180341713336e-06,
"loss": 2.7118,
"step": 287000
},
{
"epoch": 0.95,
"learning_rate": 2.4588589463783622e-06,
"loss": 2.7186,
"step": 287100
},
{
"epoch": 0.95,
"learning_rate": 2.4422998585853904e-06,
"loss": 2.7432,
"step": 287200
},
{
"epoch": 0.95,
"learning_rate": 2.4257407707924186e-06,
"loss": 2.7148,
"step": 287300
},
{
"epoch": 0.95,
"learning_rate": 2.409181682999447e-06,
"loss": 2.7257,
"step": 287400
},
{
"epoch": 0.95,
"learning_rate": 2.3926225952064754e-06,
"loss": 2.7234,
"step": 287500
},
{
"epoch": 0.95,
"learning_rate": 2.376063507413504e-06,
"loss": 2.7229,
"step": 287600
},
{
"epoch": 0.95,
"learning_rate": 2.3595044196205317e-06,
"loss": 2.7151,
"step": 287700
},
{
"epoch": 0.95,
"learning_rate": 2.3429453318275604e-06,
"loss": 2.7244,
"step": 287800
},
{
"epoch": 0.95,
"learning_rate": 2.3263862440345885e-06,
"loss": 2.7171,
"step": 287900
},
{
"epoch": 0.95,
"learning_rate": 2.309827156241617e-06,
"loss": 2.7241,
"step": 288000
},
{
"epoch": 0.95,
"learning_rate": 2.2932680684486453e-06,
"loss": 2.7232,
"step": 288100
},
{
"epoch": 0.95,
"learning_rate": 2.2767089806556735e-06,
"loss": 2.7207,
"step": 288200
},
{
"epoch": 0.95,
"learning_rate": 2.260149892862702e-06,
"loss": 2.732,
"step": 288300
},
{
"epoch": 0.96,
"learning_rate": 2.2435908050697303e-06,
"loss": 2.712,
"step": 288400
},
{
"epoch": 0.96,
"learning_rate": 2.227031717276759e-06,
"loss": 2.7331,
"step": 288500
},
{
"epoch": 0.96,
"learning_rate": 2.210472629483787e-06,
"loss": 2.7276,
"step": 288600
},
{
"epoch": 0.96,
"learning_rate": 2.1939135416908153e-06,
"loss": 2.7387,
"step": 288700
},
{
"epoch": 0.96,
"learning_rate": 2.177354453897844e-06,
"loss": 2.7252,
"step": 288800
},
{
"epoch": 0.96,
"learning_rate": 2.160795366104872e-06,
"loss": 2.7242,
"step": 288900
},
{
"epoch": 0.96,
"learning_rate": 2.1442362783119002e-06,
"loss": 2.7224,
"step": 289000
},
{
"epoch": 0.96,
"learning_rate": 2.127677190518929e-06,
"loss": 2.7175,
"step": 289100
},
{
"epoch": 0.96,
"learning_rate": 2.111118102725957e-06,
"loss": 2.72,
"step": 289200
},
{
"epoch": 0.96,
"learning_rate": 2.0945590149329856e-06,
"loss": 2.7025,
"step": 289300
},
{
"epoch": 0.96,
"learning_rate": 2.077999927140014e-06,
"loss": 2.713,
"step": 289400
},
{
"epoch": 0.96,
"learning_rate": 2.061440839347042e-06,
"loss": 2.7141,
"step": 289500
},
{
"epoch": 0.96,
"learning_rate": 2.0448817515540706e-06,
"loss": 2.7224,
"step": 289600
},
{
"epoch": 0.96,
"learning_rate": 2.0283226637610988e-06,
"loss": 2.7317,
"step": 289700
},
{
"epoch": 0.96,
"learning_rate": 2.0117635759681274e-06,
"loss": 2.7207,
"step": 289800
},
{
"epoch": 0.96,
"learning_rate": 1.995204488175155e-06,
"loss": 2.7055,
"step": 289900
},
{
"epoch": 0.96,
"learning_rate": 1.9786454003821837e-06,
"loss": 2.7205,
"step": 290000
},
{
"epoch": 0.96,
"learning_rate": 1.9620863125892123e-06,
"loss": 2.7238,
"step": 290100
},
{
"epoch": 0.96,
"learning_rate": 1.9455272247962405e-06,
"loss": 2.7276,
"step": 290200
},
{
"epoch": 0.96,
"learning_rate": 1.9289681370032687e-06,
"loss": 2.7268,
"step": 290300
},
{
"epoch": 0.96,
"learning_rate": 1.912409049210297e-06,
"loss": 2.7161,
"step": 290400
},
{
"epoch": 0.96,
"learning_rate": 1.8958499614173255e-06,
"loss": 2.7319,
"step": 290500
},
{
"epoch": 0.96,
"learning_rate": 1.8792908736243539e-06,
"loss": 2.7139,
"step": 290600
},
{
"epoch": 0.96,
"learning_rate": 1.862731785831382e-06,
"loss": 2.7192,
"step": 290700
},
{
"epoch": 0.96,
"learning_rate": 1.8461726980384107e-06,
"loss": 2.7209,
"step": 290800
},
{
"epoch": 0.96,
"learning_rate": 1.8296136102454387e-06,
"loss": 2.7222,
"step": 290900
},
{
"epoch": 0.96,
"learning_rate": 1.8130545224524673e-06,
"loss": 2.7217,
"step": 291000
},
{
"epoch": 0.96,
"learning_rate": 1.7964954346594957e-06,
"loss": 2.7228,
"step": 291100
},
{
"epoch": 0.96,
"learning_rate": 1.7799363468665238e-06,
"loss": 2.7226,
"step": 291200
},
{
"epoch": 0.96,
"learning_rate": 1.7633772590735522e-06,
"loss": 2.7128,
"step": 291300
},
{
"epoch": 0.97,
"learning_rate": 1.7468181712805804e-06,
"loss": 2.7225,
"step": 291400
},
{
"epoch": 0.97,
"learning_rate": 1.7302590834876088e-06,
"loss": 2.7256,
"step": 291500
},
{
"epoch": 0.97,
"learning_rate": 1.7136999956946374e-06,
"loss": 2.7139,
"step": 291600
},
{
"epoch": 0.97,
"learning_rate": 1.6971409079016656e-06,
"loss": 2.7198,
"step": 291700
},
{
"epoch": 0.97,
"learning_rate": 1.680581820108694e-06,
"loss": 2.7132,
"step": 291800
},
{
"epoch": 0.97,
"learning_rate": 1.6640227323157222e-06,
"loss": 2.7164,
"step": 291900
},
{
"epoch": 0.97,
"learning_rate": 1.6474636445227506e-06,
"loss": 2.7285,
"step": 292000
},
{
"epoch": 0.97,
"learning_rate": 1.630904556729779e-06,
"loss": 2.7306,
"step": 292100
},
{
"epoch": 0.97,
"learning_rate": 1.6143454689368071e-06,
"loss": 2.7126,
"step": 292200
},
{
"epoch": 0.97,
"learning_rate": 1.5977863811438357e-06,
"loss": 2.7196,
"step": 292300
},
{
"epoch": 0.97,
"learning_rate": 1.5812272933508637e-06,
"loss": 2.7182,
"step": 292400
},
{
"epoch": 0.97,
"learning_rate": 1.5646682055578923e-06,
"loss": 2.7335,
"step": 292500
},
{
"epoch": 0.97,
"learning_rate": 1.5481091177649205e-06,
"loss": 2.7233,
"step": 292600
},
{
"epoch": 0.97,
"learning_rate": 1.531550029971949e-06,
"loss": 2.7149,
"step": 292700
},
{
"epoch": 0.97,
"learning_rate": 1.5149909421789773e-06,
"loss": 2.7299,
"step": 292800
},
{
"epoch": 0.97,
"learning_rate": 1.4984318543860057e-06,
"loss": 2.7152,
"step": 292900
},
{
"epoch": 0.97,
"learning_rate": 1.4818727665930339e-06,
"loss": 2.7089,
"step": 293000
},
{
"epoch": 0.97,
"learning_rate": 1.4653136788000623e-06,
"loss": 2.7286,
"step": 293100
},
{
"epoch": 0.97,
"learning_rate": 1.4487545910070906e-06,
"loss": 2.7195,
"step": 293200
},
{
"epoch": 0.97,
"learning_rate": 1.432195503214119e-06,
"loss": 2.7164,
"step": 293300
},
{
"epoch": 0.97,
"learning_rate": 1.4156364154211474e-06,
"loss": 2.7264,
"step": 293400
},
{
"epoch": 0.97,
"learning_rate": 1.3990773276281756e-06,
"loss": 2.7372,
"step": 293500
},
{
"epoch": 0.97,
"learning_rate": 1.382518239835204e-06,
"loss": 2.731,
"step": 293600
},
{
"epoch": 0.97,
"learning_rate": 1.3659591520422324e-06,
"loss": 2.7163,
"step": 293700
},
{
"epoch": 0.97,
"learning_rate": 1.3494000642492608e-06,
"loss": 2.7156,
"step": 293800
},
{
"epoch": 0.97,
"learning_rate": 1.332840976456289e-06,
"loss": 2.726,
"step": 293900
},
{
"epoch": 0.97,
"learning_rate": 1.3162818886633174e-06,
"loss": 2.7266,
"step": 294000
},
{
"epoch": 0.97,
"learning_rate": 1.2997228008703456e-06,
"loss": 2.6988,
"step": 294100
},
{
"epoch": 0.97,
"learning_rate": 1.2831637130773742e-06,
"loss": 2.7276,
"step": 294200
},
{
"epoch": 0.97,
"learning_rate": 1.2666046252844023e-06,
"loss": 2.7213,
"step": 294300
},
{
"epoch": 0.97,
"learning_rate": 1.2500455374914307e-06,
"loss": 2.7351,
"step": 294400
},
{
"epoch": 0.98,
"learning_rate": 1.2334864496984591e-06,
"loss": 2.7231,
"step": 294500
},
{
"epoch": 0.98,
"learning_rate": 1.2169273619054873e-06,
"loss": 2.7209,
"step": 294600
},
{
"epoch": 0.98,
"learning_rate": 1.200368274112516e-06,
"loss": 2.7231,
"step": 294700
},
{
"epoch": 0.98,
"learning_rate": 1.183809186319544e-06,
"loss": 2.7204,
"step": 294800
},
{
"epoch": 0.98,
"learning_rate": 1.1672500985265725e-06,
"loss": 2.7318,
"step": 294900
},
{
"epoch": 0.98,
"learning_rate": 1.1506910107336007e-06,
"loss": 2.7227,
"step": 295000
},
{
"epoch": 0.98,
"learning_rate": 1.134131922940629e-06,
"loss": 2.7249,
"step": 295100
},
{
"epoch": 0.98,
"learning_rate": 1.1175728351476575e-06,
"loss": 2.7216,
"step": 295200
},
{
"epoch": 0.98,
"learning_rate": 1.1010137473546859e-06,
"loss": 2.7317,
"step": 295300
},
{
"epoch": 0.98,
"learning_rate": 1.084454659561714e-06,
"loss": 2.7187,
"step": 295400
},
{
"epoch": 0.98,
"learning_rate": 1.0678955717687424e-06,
"loss": 2.7258,
"step": 295500
},
{
"epoch": 0.98,
"learning_rate": 1.0513364839757708e-06,
"loss": 2.7267,
"step": 295600
},
{
"epoch": 0.98,
"learning_rate": 1.0347773961827992e-06,
"loss": 2.7247,
"step": 295700
},
{
"epoch": 0.98,
"learning_rate": 1.0182183083898276e-06,
"loss": 2.7303,
"step": 295800
},
{
"epoch": 0.98,
"learning_rate": 1.0016592205968558e-06,
"loss": 2.7117,
"step": 295900
},
{
"epoch": 0.98,
"learning_rate": 9.851001328038842e-07,
"loss": 2.7089,
"step": 296000
},
{
"epoch": 0.98,
"learning_rate": 9.685410450109124e-07,
"loss": 2.7345,
"step": 296100
},
{
"epoch": 0.98,
"learning_rate": 9.519819572179409e-07,
"loss": 2.7273,
"step": 296200
},
{
"epoch": 0.98,
"learning_rate": 9.354228694249692e-07,
"loss": 2.7351,
"step": 296300
},
{
"epoch": 0.98,
"learning_rate": 9.188637816319976e-07,
"loss": 2.7199,
"step": 296400
},
{
"epoch": 0.98,
"learning_rate": 9.023046938390258e-07,
"loss": 2.7407,
"step": 296500
},
{
"epoch": 0.98,
"learning_rate": 8.857456060460541e-07,
"loss": 2.7201,
"step": 296600
},
{
"epoch": 0.98,
"learning_rate": 8.691865182530826e-07,
"loss": 2.7112,
"step": 296700
},
{
"epoch": 0.98,
"learning_rate": 8.526274304601109e-07,
"loss": 2.7131,
"step": 296800
},
{
"epoch": 0.98,
"learning_rate": 8.360683426671392e-07,
"loss": 2.7137,
"step": 296900
},
{
"epoch": 0.98,
"learning_rate": 8.195092548741675e-07,
"loss": 2.719,
"step": 297000
},
{
"epoch": 0.98,
"learning_rate": 8.029501670811958e-07,
"loss": 2.7234,
"step": 297100
},
{
"epoch": 0.98,
"learning_rate": 7.863910792882243e-07,
"loss": 2.7248,
"step": 297200
},
{
"epoch": 0.98,
"learning_rate": 7.698319914952525e-07,
"loss": 2.7146,
"step": 297300
},
{
"epoch": 0.98,
"learning_rate": 7.532729037022809e-07,
"loss": 2.7145,
"step": 297400
},
{
"epoch": 0.99,
"learning_rate": 7.367138159093093e-07,
"loss": 2.7288,
"step": 297500
},
{
"epoch": 0.99,
"learning_rate": 7.201547281163375e-07,
"loss": 2.714,
"step": 297600
},
{
"epoch": 0.99,
"learning_rate": 7.035956403233659e-07,
"loss": 2.7175,
"step": 297700
},
{
"epoch": 0.99,
"learning_rate": 6.870365525303942e-07,
"loss": 2.7141,
"step": 297800
},
{
"epoch": 0.99,
"learning_rate": 6.704774647374226e-07,
"loss": 2.7221,
"step": 297900
},
{
"epoch": 0.99,
"learning_rate": 6.539183769444509e-07,
"loss": 2.7387,
"step": 298000
},
{
"epoch": 0.99,
"learning_rate": 6.373592891514793e-07,
"loss": 2.7228,
"step": 298100
},
{
"epoch": 0.99,
"learning_rate": 6.208002013585076e-07,
"loss": 2.7076,
"step": 298200
},
{
"epoch": 0.99,
"learning_rate": 6.042411135655359e-07,
"loss": 2.7287,
"step": 298300
},
{
"epoch": 0.99,
"learning_rate": 5.876820257725643e-07,
"loss": 2.7311,
"step": 298400
},
{
"epoch": 0.99,
"learning_rate": 5.711229379795926e-07,
"loss": 2.7195,
"step": 298500
},
{
"epoch": 0.99,
"learning_rate": 5.545638501866209e-07,
"loss": 2.7107,
"step": 298600
},
{
"epoch": 0.99,
"learning_rate": 5.380047623936492e-07,
"loss": 2.7212,
"step": 298700
},
{
"epoch": 0.99,
"learning_rate": 5.214456746006776e-07,
"loss": 2.7202,
"step": 298800
},
{
"epoch": 0.99,
"learning_rate": 5.04886586807706e-07,
"loss": 2.6954,
"step": 298900
},
{
"epoch": 0.99,
"learning_rate": 4.883274990147343e-07,
"loss": 2.7267,
"step": 299000
},
{
"epoch": 0.99,
"learning_rate": 4.7176841122176265e-07,
"loss": 2.723,
"step": 299100
},
{
"epoch": 0.99,
"learning_rate": 4.55209323428791e-07,
"loss": 2.7153,
"step": 299200
},
{
"epoch": 0.99,
"learning_rate": 4.386502356358193e-07,
"loss": 2.711,
"step": 299300
},
{
"epoch": 0.99,
"learning_rate": 4.2209114784284767e-07,
"loss": 2.7385,
"step": 299400
},
{
"epoch": 0.99,
"learning_rate": 4.0553206004987596e-07,
"loss": 2.7214,
"step": 299500
},
{
"epoch": 0.99,
"learning_rate": 3.889729722569043e-07,
"loss": 2.7111,
"step": 299600
},
{
"epoch": 0.99,
"learning_rate": 3.7241388446393264e-07,
"loss": 2.7106,
"step": 299700
},
{
"epoch": 0.99,
"learning_rate": 3.5585479667096104e-07,
"loss": 2.7095,
"step": 299800
},
{
"epoch": 0.99,
"learning_rate": 3.392957088779894e-07,
"loss": 2.7217,
"step": 299900
},
{
"epoch": 0.99,
"learning_rate": 3.2273662108501766e-07,
"loss": 2.7331,
"step": 300000
},
{
"epoch": 0.99,
"learning_rate": 3.06177533292046e-07,
"loss": 2.7229,
"step": 300100
},
{
"epoch": 0.99,
"learning_rate": 2.8961844549907435e-07,
"loss": 2.7105,
"step": 300200
},
{
"epoch": 0.99,
"learning_rate": 2.730593577061027e-07,
"loss": 2.7208,
"step": 300300
},
{
"epoch": 0.99,
"learning_rate": 2.5650026991313103e-07,
"loss": 2.7261,
"step": 300400
},
{
"epoch": 1.0,
"learning_rate": 2.3994118212015937e-07,
"loss": 2.7352,
"step": 300500
},
{
"epoch": 1.0,
"learning_rate": 2.233820943271877e-07,
"loss": 2.7271,
"step": 300600
},
{
"epoch": 1.0,
"learning_rate": 2.0682300653421605e-07,
"loss": 2.727,
"step": 300700
},
{
"epoch": 1.0,
"learning_rate": 1.902639187412444e-07,
"loss": 2.7298,
"step": 300800
},
{
"epoch": 1.0,
"learning_rate": 1.7370483094827273e-07,
"loss": 2.7344,
"step": 300900
},
{
"epoch": 1.0,
"learning_rate": 1.5714574315530107e-07,
"loss": 2.7185,
"step": 301000
},
{
"epoch": 1.0,
"learning_rate": 1.4058665536232941e-07,
"loss": 2.7195,
"step": 301100
},
{
"epoch": 1.0,
"learning_rate": 1.2402756756935775e-07,
"loss": 2.7201,
"step": 301200
},
{
"epoch": 1.0,
"learning_rate": 1.074684797763861e-07,
"loss": 2.7188,
"step": 301300
},
{
"epoch": 1.0,
"learning_rate": 9.090939198341442e-08,
"loss": 2.7252,
"step": 301400
},
{
"epoch": 1.0,
"learning_rate": 7.435030419044276e-08,
"loss": 2.7194,
"step": 301500
}
],
"max_steps": 301949,
"num_train_epochs": 1,
"total_flos": 8.170868148535296e+18,
"trial_name": null,
"trial_params": null
}